From 5a2491a847beb035b37ee2a270029499065b1919 Mon Sep 17 00:00:00 2001 From: Jack Stouffer Date: Fri, 10 Feb 2017 09:32:45 -0500 Subject: [PATCH] Fix Issue 17161 - Revert all changes to std.regex from 2.072.2 onwards --- posix.mak | 2 +- std/regex/internal/backtracking.d | 232 ++++----- std/regex/internal/bitnfa.d | 754 ------------------------------ std/regex/internal/generator.d | 6 +- std/regex/internal/ir.d | 219 +++------ std/regex/internal/kickstart.d | 579 +++++++++++++++++++++++ std/regex/internal/parser.d | 156 +++++-- std/regex/internal/tests.d | 584 ++++++++++++++++++++++- std/regex/internal/tests2.d | 270 ----------- std/regex/internal/tests3.d | 321 ------------- std/regex/internal/thompson.d | 234 +++++----- std/regex/package.d | 224 ++++----- std/uni.d | 14 + win32.mak | 29 +- win64.mak | 27 +- 15 files changed, 1686 insertions(+), 1965 deletions(-) delete mode 100644 std/regex/internal/bitnfa.d create mode 100644 std/regex/internal/kickstart.d delete mode 100644 std/regex/internal/tests2.d delete mode 100644 std/regex/internal/tests3.d diff --git a/posix.mak b/posix.mak index 410e4e4fa3c..137d6105560 100644 --- a/posix.mak +++ b/posix.mak @@ -193,7 +193,7 @@ PACKAGE_std_experimental_ndslice = package iteration selection slice PACKAGE_std_net = curl isemail PACKAGE_std_range = interfaces package primitives PACKAGE_std_regex = package $(addprefix internal/,generator ir parser \ - backtracking bitnfa tests tests2 tests3 thompson shiftor) + backtracking tests thompson kickstart) # Modules in std (including those in packages) STD_MODULES=$(call P2MODULES,$(STD_PACKAGES)) diff --git a/std/regex/internal/backtracking.d b/std/regex/internal/backtracking.d index 767662a3c2b..a7c360c5e88 100644 --- a/std/regex/internal/backtracking.d +++ b/std/regex/internal/backtracking.d @@ -30,13 +30,7 @@ template BacktrackingMatcher(bool CTregex) alias String = const(Char)[]; alias RegEx = Regex!Char; alias MatchFn = bool function (ref BacktrackingMatcher!(Char, Stream)); - const(Bytecode)[] ir; - uint ngroup; - uint flags; - const(Interval[])[] charsets; - const(CharMatcher)[] matchers; - const(BitTable)[] filters; - const Kickstart!Char kickstart; + RegEx re; //regex program static if (CTregex) MatchFn nativeFn; //native code for that program //Stream state @@ -85,17 +79,12 @@ template BacktrackingMatcher(bool CTregex) static size_t initialMemory(const ref RegEx re) { - return stackSize(re.ngroup)*size_t.sizeof + re.hotspotTableSize*Trace.sizeof; + return stackSize(re)*size_t.sizeof + re.hotspotTableSize*Trace.sizeof; } - size_t initialMemory() + static size_t stackSize(const ref RegEx re) { - return stackSize(ngroup)*size_t.sizeof + merge.length*Trace.sizeof; - } - - static size_t stackSize(uint ngroup) - { - return initialStack*(stateSize + ngroup*(Group!DataIndex).sizeof/size_t.sizeof)+1; + return initialStack*(stateSize + re.ngroup*(Group!DataIndex).sizeof/size_t.sizeof)+1; } @property bool atStart(){ return index == 0; } @@ -112,7 +101,7 @@ template BacktrackingMatcher(bool CTregex) { static if (kicked) { - if (!s.search(kickstart, front, index)) + if (!s.search(re.kickstart, front, index)) { index = s.lastIndex; } @@ -124,69 +113,46 @@ template BacktrackingMatcher(bool CTregex) // void newStack() { - auto chunk = mallocArray!(size_t)(stackSize(ngroup)); + auto chunk = mallocArray!(size_t)(stackSize(re)); chunk[0] = cast(size_t)(memory.ptr); memory = chunk[1..$]; } - void initExternalMemory(void[] memBlock, size_t hotspotTableSize) + void initExternalMemory(void[] memBlock) { - merge = arrayInChunk!(Trace)(hotspotTableSize, memBlock); + merge = arrayInChunk!(Trace)(re.hotspotTableSize, memBlock); merge[] = Trace.init; memory = cast(size_t[])memBlock; memory[0] = 0; //hidden pointer memory = memory[1..$]; } - void dupTo(void[] memory) - { - initExternalMemory(memory, merge.length); - } - - this(Matcher)(ref Matcher matcher, Stream stream, void[] memBlock, dchar ch, DataIndex idx) + void initialize(ref RegEx program, Stream stream, void[] memBlock) { - ir = matcher.ir; - charsets = matcher.charsets; - filters = matcher.filters; - matchers = matcher.matchers; - ngroup = matcher.ngroup; - flags = matcher.flags; + re = program; s = stream; exhausted = false; - initExternalMemory(memBlock, matcher.merge.length); + initExternalMemory(memBlock); backrefed = null; - front = ch; - index = idx; } - this(Matcher)(ref Matcher matcher, Stream stream, void[] memBlock) + auto dupTo(void[] memory) { - ir = matcher.ir; - charsets = matcher.charsets; - filters = matcher.filters; - matchers = matcher.matchers; - ngroup = matcher.ngroup; - flags = matcher.flags; - s = stream; - exhausted = false; - initExternalMemory(memBlock, matcher.merge.length); - backrefed = null; - next(); + typeof(this) tmp = this; + tmp.initExternalMemory(memory); + return tmp; } - this()(ref const RegEx program, Stream stream, void[] memBlock, uint regexFlags) + this(ref RegEx program, Stream stream, void[] memBlock, dchar ch, DataIndex idx) { - kickstart = program.kickstart; - ir = program.ir; - charsets = program.charsets; - filters = program.filters; - matchers = program.matchers; - ngroup = program.ngroup; - flags = regexFlags; - s = stream; - exhausted = false; - initExternalMemory(memBlock, program.hotspotTableSize); - backrefed = null; + initialize(program, stream, memBlock); + front = ch; + index = idx; + } + + this(ref RegEx program, Stream stream, void[] memBlock) + { + initialize(program, stream, memBlock); next(); } @@ -194,7 +160,7 @@ template BacktrackingMatcher(bool CTregex) { alias BackMatcherTempl = .BacktrackingMatcher!(CTregex); alias BackMatcher = BackMatcherTempl!(Char, Stream); - auto fwdMatcher = BackMatcher(matcher, s, memBlock, front, index); + auto fwdMatcher = BackMatcher(matcher.re, s, memBlock, front, index); return fwdMatcher; } @@ -203,7 +169,7 @@ template BacktrackingMatcher(bool CTregex) alias BackMatcherTempl = .BacktrackingMatcher!(CTregex); alias BackMatcher = BackMatcherTempl!(Char, typeof(s.loopBack(index))); auto fwdMatcher = - BackMatcher(matcher, s.loopBack(index), memBlock); + BackMatcher(matcher.re, s.loopBack(index), memBlock); return fwdMatcher; } @@ -216,7 +182,7 @@ template BacktrackingMatcher(bool CTregex) {//stream is updated here matches[0].begin = start; matches[0].end = index; - if (!(flags & RegexOption.global) || atEnd) + if (!(re.flags & RegexOption.global) || atEnd) exhausted = true; if (start == index)//empty match advances input next(); @@ -236,7 +202,7 @@ template BacktrackingMatcher(bool CTregex) if (exhausted) //all matches collected return false; this.matches = matches; - if (flags & RegexInfo.oneShot) + if (re.flags & RegexInfo.oneShot) { exhausted = true; const DataIndex start = index; @@ -250,7 +216,7 @@ template BacktrackingMatcher(bool CTregex) } static if (kicked) { - if (kickstart) + if (!re.kickstart.empty) { for (;;) { @@ -318,19 +284,19 @@ template BacktrackingMatcher(bool CTregex) { debug(std_regex_matcher) writefln("PC: %s\tCNT: %s\t%s \tfront: %s src: %s", - pc, counter, disassemble(ir, pc), + pc, counter, disassemble(re.ir, pc, re.dict), front, s._index); - switch (ir[pc].code) + switch (re.ir[pc].code) { case IR.OrChar://assumes IRL!(OrChar) == 1 if (atEnd) goto L_backtrack; - uint len = ir[pc].sequence; + uint len = re.ir[pc].sequence; uint end = pc + len; - if (ir[pc].data != front && ir[pc+1].data != front) + if (re.ir[pc].data != front && re.ir[pc+1].data != front) { for (pc = pc+2; pc < end; pc++) - if (ir[pc].data == front) + if (re.ir[pc].data == front) break; if (pc == end) goto L_backtrack; @@ -339,7 +305,7 @@ template BacktrackingMatcher(bool CTregex) next(); break; case IR.Char: - if (atEnd || front != ir[pc].data) + if (atEnd || front != re.ir[pc].data) goto L_backtrack; pc += IRL!(IR.Char); next(); @@ -351,13 +317,13 @@ template BacktrackingMatcher(bool CTregex) next(); break; case IR.CodepointSet: - if (atEnd || !charsets[ir[pc].data].scanFor(front)) + if (atEnd || !re.charsets[re.ir[pc].data].scanFor(front)) goto L_backtrack; next(); pc += IRL!(IR.CodepointSet); break; case IR.Trie: - if (atEnd || !matchers[ir[pc].data][front]) + if (atEnd || !re.matchers[re.ir[pc].data][front]) goto L_backtrack; next(); pc += IRL!(IR.Trie); @@ -445,10 +411,10 @@ template BacktrackingMatcher(bool CTregex) goto L_backtrack; break; case IR.InfiniteStart, IR.InfiniteQStart: - pc += ir[pc].data + IRL!(IR.InfiniteStart); + pc += re.ir[pc].data + IRL!(IR.InfiniteStart); //now pc is at end IR.Infinite(Q)End - uint len = ir[pc].data; - if (ir[pc].code == IR.InfiniteEnd) + uint len = re.ir[pc].data; + if (re.ir[pc].code == IR.InfiniteEnd) { pushState(pc+IRL!(IR.InfiniteEnd), counter); pc -= len; @@ -460,29 +426,29 @@ template BacktrackingMatcher(bool CTregex) } break; case IR.InfiniteBloomStart: - pc += ir[pc].data + IRL!(IR.InfiniteBloomStart); + pc += re.ir[pc].data + IRL!(IR.InfiniteBloomStart); //now pc is at end IR.InfiniteBloomEnd - immutable len = ir[pc].data; - immutable filterIdx = ir[pc+2].raw; - if (filters[filterIdx][front]) + immutable len = re.ir[pc].data; + immutable filterIdx = re.ir[pc+2].raw; + if (re.filters[filterIdx][front]) pushState(pc+IRL!(IR.InfiniteBloomEnd), counter); pc -= len; break; case IR.RepeatStart, IR.RepeatQStart: - pc += ir[pc].data + IRL!(IR.RepeatStart); + pc += re.ir[pc].data + IRL!(IR.RepeatStart); break; case IR.RepeatEnd: case IR.RepeatQEnd: - if (merge[ir[pc + 1].raw+counter].mark(index)) + if (merge[re.ir[pc + 1].raw+counter].mark(index)) { // merged! goto L_backtrack; } //len, step, min, max - immutable len = ir[pc].data; - immutable step = ir[pc+2].raw; - immutable min = ir[pc+3].raw; - immutable max = ir[pc+4].raw; + immutable len = re.ir[pc].data; + immutable step = re.ir[pc+2].raw; + immutable min = re.ir[pc+3].raw; + immutable max = re.ir[pc+4].raw; if (counter < min) { counter += step; @@ -490,7 +456,7 @@ template BacktrackingMatcher(bool CTregex) } else if (counter < max) { - if (ir[pc].code == IR.RepeatEnd) + if (re.ir[pc].code == IR.RepeatEnd) { pushState(pc + IRL!(IR.RepeatEnd), counter%step); counter += step; @@ -512,13 +478,13 @@ template BacktrackingMatcher(bool CTregex) case IR.InfiniteEnd: case IR.InfiniteQEnd: debug(std_regex_matcher) writeln("Infinited nesting:", infiniteNesting); - if (merge[ir[pc + 1].raw+counter].mark(index)) + if (merge[re.ir[pc + 1].raw+counter].mark(index)) { // merged! goto L_backtrack; } - immutable len = ir[pc].data; - if (ir[pc].code == IR.InfiniteEnd) + immutable len = re.ir[pc].data; + if (re.ir[pc].code == IR.InfiniteEnd) { pushState(pc + IRL!(IR.InfiniteEnd), counter); pc -= len; @@ -531,14 +497,14 @@ template BacktrackingMatcher(bool CTregex) break; case IR.InfiniteBloomEnd: debug(std_regex_matcher) writeln("Infinited nesting:", infiniteNesting); - if (merge[ir[pc + 1].raw+counter].mark(index)) + if (merge[re.ir[pc + 1].raw+counter].mark(index)) { // merged! goto L_backtrack; } - immutable len = ir[pc].data; - immutable filterIdx = ir[pc+2].raw; - if (filters[filterIdx][front]) + immutable len = re.ir[pc].data; + immutable filterIdx = re.ir[pc+2].raw; + if (re.filters[filterIdx][front]) { infiniteNesting--; pushState(pc + IRL!(IR.InfiniteBloomEnd), counter); @@ -547,7 +513,7 @@ template BacktrackingMatcher(bool CTregex) pc -= len; break; case IR.OrEnd: - if (merge[ir[pc + 1].raw+counter].mark(index)) + if (merge[re.ir[pc + 1].raw+counter].mark(index)) { // merged! goto L_backtrack; @@ -558,34 +524,34 @@ template BacktrackingMatcher(bool CTregex) pc += IRL!(IR.OrStart); goto case; case IR.Option: - immutable len = ir[pc].data; - if (ir[pc+len].code == IR.GotoEndOr)//not a last one + immutable len = re.ir[pc].data; + if (re.ir[pc+len].code == IR.GotoEndOr)//not a last one { pushState(pc + len + IRL!(IR.Option), counter); //remember 2nd branch } pc += IRL!(IR.Option); break; case IR.GotoEndOr: - pc = pc + ir[pc].data + IRL!(IR.GotoEndOr); + pc = pc + re.ir[pc].data + IRL!(IR.GotoEndOr); break; case IR.GroupStart: - immutable n = ir[pc].data; + immutable n = re.ir[pc].data; matches[n].begin = index; debug(std_regex_matcher) writefln("IR group #%u starts at %u", n, index); pc += IRL!(IR.GroupStart); break; case IR.GroupEnd: - immutable n = ir[pc].data; + immutable n = re.ir[pc].data; matches[n].end = index; debug(std_regex_matcher) writefln("IR group #%u ends at %u", n, index); pc += IRL!(IR.GroupEnd); break; case IR.LookaheadStart: case IR.NeglookaheadStart: - immutable len = ir[pc].data; + immutable len = re.ir[pc].data; auto save = index; - immutable ms = ir[pc+1].raw, me = ir[pc+2].raw; - auto mem = malloc(initialMemory())[0..initialMemory()]; + immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw; + auto mem = malloc(initialMemory(re))[0..initialMemory(re)]; scope(exit) free(mem.ptr); static if (Stream.isLoopback) { @@ -597,10 +563,10 @@ template BacktrackingMatcher(bool CTregex) } matcher.matches = matches[ms .. me]; matcher.backrefed = backrefed.empty ? matches : backrefed; - matcher.ir = ir[ + matcher.re.ir = re.ir[ pc+IRL!(IR.LookaheadStart) .. pc+IRL!(IR.LookaheadStart)+len+IRL!(IR.LookaheadEnd) ]; - immutable match = (matcher.matchImpl() != 0) ^ (ir[pc].code == IR.NeglookaheadStart); + immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookaheadStart); s.reset(save); next(); if (!match) @@ -612,26 +578,26 @@ template BacktrackingMatcher(bool CTregex) break; case IR.LookbehindStart: case IR.NeglookbehindStart: - immutable len = ir[pc].data; - immutable ms = ir[pc+1].raw, me = ir[pc+2].raw; - auto mem = malloc(initialMemory())[0..initialMemory()]; + immutable len = re.ir[pc].data; + immutable ms = re.ir[pc+1].raw, me = re.ir[pc+2].raw; + auto mem = malloc(initialMemory(re))[0..initialMemory(re)]; scope(exit) free(mem.ptr); static if (Stream.isLoopback) { alias Matcher = BacktrackingMatcher!(Char, Stream); - auto matcher = Matcher(this, s, mem, front, index); + auto matcher = Matcher(re, s, mem, front, index); } else { alias Matcher = BacktrackingMatcher!(Char, typeof(s.loopBack(index))); - auto matcher = Matcher(this, s.loopBack(index), mem); + auto matcher = Matcher(re, s.loopBack(index), mem); } matcher.matches = matches[ms .. me]; - matcher.ir = ir[ + matcher.re.ir = re.ir[ pc + IRL!(IR.LookbehindStart) .. pc + IRL!(IR.LookbehindStart) + len + IRL!(IR.LookbehindEnd) ]; matcher.backrefed = backrefed.empty ? matches : backrefed; - immutable match = (matcher.matchImpl() != 0) ^ (ir[pc].code == IR.NeglookbehindStart); + immutable match = (matcher.matchImpl() != 0) ^ (re.ir[pc].code == IR.NeglookbehindStart); if (!match) goto L_backtrack; else @@ -640,8 +606,8 @@ template BacktrackingMatcher(bool CTregex) } break; case IR.Backref: - immutable n = ir[pc].data; - auto referenced = ir[pc].localRef + immutable n = re.ir[pc].data; + auto referenced = re.ir[pc].localRef ? s[matches[n].begin .. matches[n].end] : s[backrefed[n].begin .. backrefed[n].end]; while (!atEnd && !referenced.empty && front == referenced.front) @@ -662,9 +628,9 @@ template BacktrackingMatcher(bool CTregex) case IR.LookbehindEnd: case IR.NeglookbehindEnd: case IR.End: - return ir[pc].data; + return re.ir[pc].data; default: - debug printBytecode(ir[0..$]); + debug printBytecode(re.ir[0..$]); assert(0); L_backtrack: if (!popState()) @@ -693,7 +659,7 @@ template BacktrackingMatcher(bool CTregex) { import core.stdc.stdlib : free; free(memory.ptr);//last segment is freed in RegexMatch - immutable size = initialStack*(stateSize + 2*ngroup); + immutable size = initialStack*(stateSize + 2*re.ngroup); memory = prev[0..size]; lastState = size; return true; @@ -828,7 +794,7 @@ struct CtContext //to mark the portion of matches to save int match, total_matches; int reserved; - const Interval[][] charsets; + CodepointSet[] charsets; //state of codegenerator @@ -838,7 +804,7 @@ struct CtContext int addr; } - this(Char)(const Regex!Char re) + this(Char)(Regex!Char re) { match = 1; reserved = 1; //first match is skipped @@ -900,7 +866,7 @@ struct CtContext } // - CtState ctGenBlock(const(Bytecode)[] ir, int addr) + CtState ctGenBlock(Bytecode[] ir, int addr) { CtState result; result.addr = addr; @@ -914,7 +880,7 @@ struct CtContext } // - CtState ctGenGroup(ref const(Bytecode)[] ir, int addr) + CtState ctGenGroup(ref Bytecode[] ir, int addr) { import std.algorithm.comparison : max; auto bailOut = "goto L_backtrack;"; @@ -977,7 +943,7 @@ struct CtContext //(neg)lookaround piece ends } auto save = index; - auto mem = malloc(initialMemory())[0..initialMemory()]; + auto mem = malloc(initialMemory(re))[0..initialMemory(re)]; scope(exit) free(mem.ptr); static if (typeof(matcher.s).isLoopback) auto lookaround = $$; @@ -1016,7 +982,7 @@ struct CtContext } //generate source for bytecode contained in OrStart ... OrEnd - CtState ctGenAlternation(const(Bytecode)[] ir, int addr) + CtState ctGenAlternation(Bytecode[] ir, int addr) { CtState[] pieces; CtState r; @@ -1056,11 +1022,11 @@ struct CtContext // generate fixup code for instruction in ir, // fixup means it has an alternative way for control flow - string ctGenFixupCode(const(Bytecode)[] ir, int addr, int fixup) + string ctGenFixupCode(Bytecode[] ir, int addr, int fixup) { return ctGenFixupCode(ir, addr, fixup); // call ref Bytecode[] version } - string ctGenFixupCode(ref const(Bytecode)[] ir, int addr, int fixup) + string ctGenFixupCode(ref Bytecode[] ir, int addr, int fixup) { string r; string testCode; @@ -1214,7 +1180,7 @@ struct CtContext } - string ctQuickTest(const(Bytecode)[] ir, int id) + string ctQuickTest(Bytecode[] ir, int id) { uint pc = 0; while (pc < ir.length && ir[pc].isAtom) @@ -1241,7 +1207,7 @@ struct CtContext } //process & generate source for simple bytecodes at front of ir using address addr - CtState ctGenAtom(ref const(Bytecode)[] ir, int addr) + CtState ctGenAtom(ref Bytecode[] ir, int addr) { CtState result; result.code = ctAtomCode(ir, addr); @@ -1251,7 +1217,7 @@ struct CtContext } //D code for atom at ir using address addr, addr < 0 means quickTest - string ctAtomCode(const(Bytecode)[] ir, int addr) + string ctAtomCode(Bytecode[] ir, int addr) { string code; string bailOut, nextInstr; @@ -1296,7 +1262,7 @@ struct CtContext break; case IR.Any: code ~= ctSub( ` - if (atEnd || (!(flags & RegexOption.singleline) + if (atEnd || (!(re.flags & RegexOption.singleline) && (front == '\r' || front == '\n'))) $$ $$ @@ -1306,7 +1272,7 @@ struct CtContext if (charsets.length) { string name = `func_`~to!string(addr+1); - string funcCode = CodepointSet(charsets[ir[0].data]).toSourceCode(name); + string funcCode = charsets[ir[0].data].toSourceCode(name); code ~= ctSub( ` static $$ if (atEnd || !$$(front)) @@ -1316,16 +1282,16 @@ struct CtContext } else code ~= ctSub( ` - if (atEnd || !charsets[$$].scanFor(front)) + if (atEnd || !re.charsets[$$].scanFor(front)) $$ $$ $$`, ir[0].data, bailOut, addr >= 0 ? "next();" :"", nextInstr); break; case IR.Trie: - if (charsets.length && charsets[ir[0].data].length <= 8) + if (charsets.length && charsets[ir[0].data].byInterval.length <= 8) goto case IR.CodepointSet; code ~= ctSub( ` - if (atEnd || !matchers[$$][front]) + if (atEnd || !re.matchers[$$][front]) $$ $$ $$`, ir[0].data, bailOut, addr >= 0 ? "next();" :"", nextInstr); @@ -1463,7 +1429,7 @@ struct CtContext } //generate D code for the whole regex - public string ctGenRegEx(const(Bytecode)[] ir) + public string ctGenRegEx(Bytecode[] ir) { auto bdy = ctGenBlock(ir, 0); auto r = ` @@ -1509,7 +1475,7 @@ struct CtContext } -string ctGenRegExCode(Char)(const Regex!Char re) +string ctGenRegExCode(Char)(Regex!Char re) { auto context = CtContext(re); return context.ctGenRegEx(re.ir); diff --git a/std/regex/internal/bitnfa.d b/std/regex/internal/bitnfa.d deleted file mode 100644 index 3cbc2be0a7d..00000000000 --- a/std/regex/internal/bitnfa.d +++ /dev/null @@ -1,754 +0,0 @@ -//Written in the D programming language -/* - Implementation of a concept "NFA in a word" which is - bit-parallel impementation of regex where each bit represents - a state in an NFA. Execution is Thompson-style achieved via bit tricks. - - There is a great number of limitations inlcuding not tracking any state (captures) - and not supporting even basic assertions such as ^, $ or \b. -*/ -module std.regex.internal.bitnfa; - -package(std.regex): - -import std.regex.internal.ir; - -debug(std_regex_bitnfa) import std.stdio; -import std.algorithm; - - -struct HashTab -{ -pure: - @disable this(this); - - uint opIndex()(uint key) const - { - auto p = locateExisting(key, table); - assert(p.occupied); - return p.value; - } - - bool opBinaryRight(string op:"in")(uint key) const - { - auto p = locate(key, table); - return p.occupied; - } - - void opIndexAssign(uint value, uint key) - { - if (table.length == 0) grow(); - auto p = locate(key, table); - if (!p.occupied) - { - items++; - if (4 * items >= table.length * 3) - { - grow(); - p = locate(key, table); - } - p.key_ = key; - p.setOccupied(); - } - p.value = value; - } - - auto keys() const - { - import std.array : appender; - auto app = appender!(uint[])(); - foreach (i, v; table) - { - if (v.occupied) - app.put(v.key); - } - return app.data; - } - - auto values() const - { - import std.array : appender; - auto app = appender!(uint[])(); - foreach (i, v; table) - { - if (v.occupied) - app.put(v.value); - } - return app.data; - } - -private: - static uint hashOf()(uint val) - { - return (val >> 20) ^ (val>>8) ^ val; - } - - struct Node - { - pure: - uint key_; - uint value; - @property uint key()() const { return key_ & 0x7fff_ffff; } - @property bool occupied()() const { return (key_ & 0x8000_0000) != 0; } - void setOccupied(){ key_ |= 0x8000_0000; } - } - Node[] table; - size_t items; - - static N* locateExisting(N)(uint key, N[] table) - { - size_t slot = hashOf(key) & (table.length-1); - key |= 0x8000_0000; - while (table[slot].key_ != key) - { - slot += 1; - if (slot == table.length) - slot = 0; - } - return table.ptr + slot; - } - - static N* locate(N)(uint key, N[] table) - { - size_t slot = hashOf(key) & (table.length-1); - while (table[slot].occupied) - { - if (table[slot].key == key) - break; - slot += 1; - if (slot == table.length) - slot = 0; - } - return table.ptr + slot; - } - - void grow() - { - Node[] newTable = new Node[table.length ? table.length*2 : 4]; - foreach (i, v; table) - { - if (v.occupied) - { - auto p = locate(v.key, newTable); - *p = v; - } - } - table = newTable; - } -} - -unittest -{ - HashTab tab; - tab[3] = 1; - tab[7] = 2; - tab[11] = 3; - assert(tab[3] == 1); - assert(tab[7] == 2); - assert(tab[11] == 3); -} - - -// Specialized 2-level trie of uint masks for BitNfa. -// Uses the concept of CoW: a page gets modified in place -// if the block's ref-count is 1, else a newblock is allocated -// and ref count is decreased -struct UIntTrie2 -{ -pure: - ushort[] index; // pages --> blocks - ushort[] refCounts; // ref counts for each block - uint[] hashes; // hashes of blocks - uint[] blocks; // linear array with blocks - uint[] scratch; // temporary block - enum blockBits = 8; // size of block in bits - enum blockSize = 1<>blockBits]; - return blocks.ptr[blk*blockSize + (ch & (blockSize-1))]; - } - - void setPageRange(string op)(uint val, uint low, uint high) - { - immutable blk = index[low>>blockBits]; - if (refCounts[blk] == 1) // modify in-place - { - immutable lowIdx = blk*blockSize + (low & (blockSize-1)); - immutable highIdx = high - low + lowIdx; - mixin("blocks[lowIdx..highIdx] "~op~"= val;"); - } - else - { - // create a new page - refCounts[blk]--; - immutable lowIdx = low & (blockSize-1); - immutable highIdx = high - low + lowIdx; - scratch[] = blocks[blk*blockSize..(blk+1)*blockSize]; - mixin("scratch[lowIdx..highIdx] "~op~"= val;"); - uint h = hash(scratch); - bool found = false; - foreach (i,x; hashes) - { - if (x != h) continue; - if (scratch[] == blocks[i*blockSize .. (i+1)*blockSize]) - { - // re-route to existing page - index[low>>blockBits] = cast(ushort)i; - refCounts[i]++; // inc refs - found = true; - break; - } - } - if (!found) - { - index[low>>blockBits] = cast(ushort)hashes.length; - blocks ~= scratch[]; - refCounts ~= 1; - hashes ~= h; - } - } - } - - void opIndexOpAssign(string op)(uint val, dchar ch) - { - setPageRange!op(val, ch, ch+1); - } - - void opSliceOpAssign(string op)(uint val, uint start, uint end) - { - uint startBlk = start >> blockBits; - uint endBlk = end >> blockBits; - uint first = min(startBlk*blockSize+blockSize, end); - setPageRange!op(val, start, first); - foreach (blk; startBlk..endBlk) - setPageRange!op(val, blk*blockSize, (blk+1)*blockSize); - if (first != end) - { - setPageRange!op(val, endBlk*blockSize, end); - } - } -} - -unittest -{ - UIntTrie2 trie = UIntTrie2(); - trie['d'] &= 3; - assert(trie['d'] == 3); - trie['\u0280'] &= 1; - assert(trie['\u0280'] == 1); - import std.uni; - UIntTrie2 trie2 = UIntTrie2(); - auto letters = unicode("L"); - foreach (r; letters.byInterval) - trie2[r.a..r.b] &= 1; - foreach (ch; letters.byCodepoint) - assert(trie2[ch] == 1); - auto space = unicode("WhiteSpace"); - auto trie3 = UIntTrie2(); - foreach (r; space.byInterval) - trie3[r.a..r.b] &= 2; - foreach (ch; space.byCodepoint) - assert(trie3[ch] == 2); -} - -// Since there is no way to mark a starting position -// we need 2 instances of BitNfa: one to find the end, and the other -// to run backwards to find the start. -struct BitNfa -{ -pure: - uint[128] asciiTab; // state mask for ascii characters - UIntTrie2 uniTab; // state mask for unicode characters - HashTab controlFlow; // maps each bit pattern to resulting jumps pattern - uint controlFlowMask; // masks all control flow bits - uint finalMask; // marks final states terminating the NFA - uint length; // if this engine is empty - - @property bool empty() const { return length == 0; } - - void combineControlFlow() - { - uint[] keys = controlFlow.keys; - uint[] values = controlFlow.values; - auto selection = new bool[keys.length]; - bool nextChoice() - { - uint i; - for (i=0;i %d %s", j, ir[j].mnemonic); - paths.push(j+IRL!Option); - //writefln(">> %d", j+IRL!Option); - j = j + ir[j].data + IRL!Option; - } - break; - case GotoEndOr: - paths.push(j+IRL!GotoEndOr+ir[j].data); - break; - case OrEnd, Wordboundary, Notwordboundary, Bof, Bol, Eol, Eof, Nop, GroupStart, GroupEnd: - paths.push(j+ir[j].length); - break; - case LookaheadStart, NeglookaheadStart, LookbehindStart, - NeglookbehindStart: - paths.push(j + IRL!LookaheadStart + ir[j].data + IRL!LookaheadEnd); - break; - case InfiniteStart, InfiniteQStart: - paths.push(j+IRL!InfiniteStart); - paths.push(j+IRL!InfiniteStart+ir[j].data+IRL!InfiniteEnd); - break; - case InfiniteBloomStart: - paths.push(j+IRL!InfiniteStart); - paths.push(j+IRL!InfiniteBloomStart+ir[j].data+IRL!InfiniteBloomEnd); - break; - case InfiniteEnd, InfiniteQEnd: - paths.push(j-ir[j].data); - paths.push(j+IRL!InfiniteEnd); - break; - case InfiniteBloomEnd: - paths.push(j-ir[j].data); - paths.push(j+IRL!InfiniteBloomEnd); - break; - default: - result ~= j; - } - } - return result; - } - - this(Char)(auto ref Regex!Char re) - { - asciiTab[] = uint.max; // all ones - uniTab = UIntTrie2(); - controlFlow[0] = 0; - // pc -> bit number - uint[] bitMapping = new uint[re.ir.length]; - uint bitCount = 0, nesting=0, lastNonnested=0; - with(re) -outer: for (uint i=0; i user group number uint ngroup; // number of internal groups uint maxCounterDepth; // max depth of nested {n,m} repetitions uint hotspotTableSize; // number of entries in merge table uint threadCount; // upper bound on number of Thompson VM threads uint flags; // global regex flags - Interval[][] charsets; // intervals of characters - const(CharMatcher)[] matchers; // tables that represent character sets - const(BitTable)[] filters; // bloom filters for conditional loops + public const(CharMatcher)[] matchers; // tables that represent character sets + public const(BitTable)[] filters; // bloom filters for conditional loops uint[] backrefed; // bit array of backreferenced submatches Kickstart!Char kickstart; @@ -673,10 +558,10 @@ package(std.regex): {//@@@BUG@@@ write is system for (uint i = 0; i < ir.length; i += ir[i].length) { - debug(std_regex_parser) writefln("%d\t%s ", i, disassemble(ir, i, dict)); + writefln("%d\t%s ", i, disassemble(ir, i, dict)); } - debug(std_regex_parser) writeln("Total merge table size: ", hotspotTableSize); - debug(std_regex_parser) writeln("Max counter nesting depth: ", maxCounterDepth); + writeln("Total merge table size: ", hotspotTableSize); + writeln("Max counter nesting depth: ", maxCounterDepth); } } @@ -692,10 +577,11 @@ package(std.regex): public: Regex!Char _regex; alias _regex this; - this(immutable Regex!Char re, MatchFn fn) immutable + this(Regex!Char re, MatchFn fn) { _regex = re; nativeFn = fn; + } } @@ -736,10 +622,10 @@ struct Input(Char) @property bool atEnd(){ return _index == _origin.length; } - - bool search(const Kickstart!Char kick, ref dchar res, ref size_t pos) + bool search(Kickstart)(ref Kickstart kick, ref dchar res, ref size_t pos) { - kick.search(this); + size_t idx = kick.search(_origin, _index); + _index = idx; return nextChar(res, pos); } @@ -819,8 +705,8 @@ template BackLooper(E) } // -@safe uint lookupNamedGroup(String)(const(NamedGroup)[] dict, String name) -{ +@trusted uint lookupNamedGroup(String)(NamedGroup[] dict, String name) +{//equal is @system? import std.range : assumeSorted; import std.conv : text; import std.algorithm.iteration : map; @@ -856,7 +742,6 @@ public class RegexException : Exception // simple 128-entry bit-table used with a hash function struct BitTable { -pure: uint[4] filter; this(CodepointSet set){ @@ -885,7 +770,7 @@ pure: struct CharMatcher { BitTable ascii; // fast path for ASCII Trie trie; // slow path for Unicode -pure: + this(CodepointSet set) { auto asciiSet = set & unicode.ASCII; diff --git a/std/regex/internal/kickstart.d b/std/regex/internal/kickstart.d new file mode 100644 index 00000000000..f052a955509 --- /dev/null +++ b/std/regex/internal/kickstart.d @@ -0,0 +1,579 @@ +/* + Kickstart is a coarse-grained "filter" engine that finds likely matches + to be verified by full-blown matcher. +*/ +module std.regex.internal.kickstart; + +package(std.regex): + +import std.regex.internal.ir; +import std.range.primitives, std.utf; + +//utility for shiftOr, returns a minimum number of bytes to test in a Char +uint effectiveSize(Char)() +{ + static if (is(Char == char)) + return 1; + else static if (is(Char == wchar)) + return 2; + else static if (is(Char == dchar)) + return 3; + else + static assert(0); +} + +/* + Kickstart engine using ShiftOr algorithm, + a bit parallel technique for inexact string searching. +*/ +struct ShiftOr(Char) +{ +private: + uint[] table; + uint fChar; + uint n_length; + enum charSize = effectiveSize!Char(); + //maximum number of chars in CodepointSet to process + enum uint charsetThreshold = 32_000; + static struct ShiftThread + { + uint[] tab; + uint mask; + uint idx; + uint pc, counter, hops; + this(uint newPc, uint newCounter, uint[] table) + { + pc = newPc; + counter = newCounter; + mask = 1; + idx = 0; + hops = 0; + tab = table; + } + + void setMask(uint idx, uint mask) + { + tab[idx] |= mask; + } + + void setInvMask(uint idx, uint mask) + { + tab[idx] &= ~mask; + } + + void set(alias setBits = setInvMask)(dchar ch) + { + static if (charSize == 3) + { + uint val = ch, tmask = mask; + setBits(val&0xFF, tmask); + tmask <<= 1; + val >>= 8; + setBits(val&0xFF, tmask); + tmask <<= 1; + val >>= 8; + assert(val <= 0x10); + setBits(val, tmask); + tmask <<= 1; + } + else + { + Char[dchar.sizeof/Char.sizeof] buf; + uint tmask = mask; + size_t total = encode(buf, ch); + for (size_t i = 0; i < total; i++, tmask<<=1) + { + static if (charSize == 1) + setBits(buf[i], tmask); + else static if (charSize == 2) + { + setBits(buf[i]&0xFF, tmask); + tmask <<= 1; + setBits(buf[i]>>8, tmask); + } + } + } + } + void add(dchar ch){ return set!setInvMask(ch); } + void advance(uint s) + { + mask <<= s; + idx += s; + } + @property bool full(){ return !mask; } + } + + static ShiftThread fork(ShiftThread t, uint newPc, uint newCounter) + { + ShiftThread nt = t; + nt.pc = newPc; + nt.counter = newCounter; + return nt; + } + + @trusted static ShiftThread fetch(ref ShiftThread[] worklist) + { + auto t = worklist[$-1]; + worklist.length -= 1; + if (!__ctfe) + cast(void)worklist.assumeSafeAppend(); + return t; + } + + static uint charLen(uint ch) + { + assert(ch <= 0x10FFFF); + return codeLength!Char(cast(dchar)ch)*charSize; + } + +public: + @trusted this(ref Regex!Char re, uint[] memory) + { + static import std.algorithm.comparison; + import std.algorithm.searching : countUntil; + import std.conv : text; + import std.range : assumeSorted; + assert(memory.length == 256); + fChar = uint.max; + // FNV-1a flavored hash (uses 32bits at a time) + ulong hash(uint[] tab) + { + ulong h = 0xcbf29ce484222325; + foreach (v; tab) + { + h ^= v; + h *= 0x100000001b3; + } + return h; + } + L_FindChar: + for (size_t i = 0;;) + { + switch (re.ir[i].code) + { + case IR.Char: + fChar = re.ir[i].data; + static if (charSize != 3) + { + Char[dchar.sizeof/Char.sizeof] buf; + encode(buf, fChar); + fChar = buf[0]; + } + fChar = fChar & 0xFF; + break L_FindChar; + case IR.GroupStart, IR.GroupEnd: + i += IRL!(IR.GroupStart); + break; + case IR.Bof, IR.Bol, IR.Wordboundary, IR.Notwordboundary: + i += IRL!(IR.Bol); + break; + default: + break L_FindChar; + } + } + table = memory; + table[] = uint.max; + alias MergeTab = bool[ulong]; + // use reasonably complex hash to identify equivalent tables + auto merge = new MergeTab[re.hotspotTableSize]; + ShiftThread[] trs; + ShiftThread t = ShiftThread(0, 0, table); + //locate first fixed char if any + n_length = 32; + for (;;) + { + L_Eval_Thread: + for (;;) + { + switch (re.ir[t.pc].code) + { + case IR.Char: + uint s = charLen(re.ir[t.pc].data); + if (t.idx+s > n_length) + goto L_StopThread; + t.add(re.ir[t.pc].data); + t.advance(s); + t.pc += IRL!(IR.Char); + break; + case IR.OrChar://assumes IRL!(OrChar) == 1 + uint len = re.ir[t.pc].sequence; + uint end = t.pc + len; + uint[Bytecode.maxSequence] s; + uint numS; + for (uint i = 0; i < len; i++) + { + auto x = charLen(re.ir[t.pc+i].data); + if (countUntil(s[0..numS], x) < 0) + s[numS++] = x; + } + for (uint i = t.pc; i < end; i++) + { + t.add(re.ir[i].data); + } + for (uint i = 0; i < numS; i++) + { + auto tx = fork(t, t.pc + len, t.counter); + if (tx.idx + s[i] <= n_length) + { + tx.advance(s[i]); + trs ~= tx; + } + } + if (!trs.empty) + t = fetch(trs); + else + goto L_StopThread; + break; + case IR.CodepointSet: + case IR.Trie: + auto set = re.charsets[re.ir[t.pc].data]; + uint[4] s; + uint numS; + static if (charSize == 3) + { + s[0] = charSize; + numS = 1; + } + else + { + + static if (charSize == 1) + static immutable codeBounds = [0x0, 0x7F, 0x80, 0x7FF, 0x800, 0xFFFF, 0x10000, 0x10FFFF]; + else //== 2 + static immutable codeBounds = [0x0, 0xFFFF, 0x10000, 0x10FFFF]; + uint[] arr = new uint[set.byInterval.length * 2]; + size_t ofs = 0; + foreach (ival; set.byInterval) + { + arr[ofs++] = ival.a; + arr[ofs++] = ival.b; + } + auto srange = assumeSorted!"a <= b"(arr); + for (uint i = 0; i < codeBounds.length/2; i++) + { + auto start = srange.lowerBound(codeBounds[2*i]).length; + auto end = srange.lowerBound(codeBounds[2*i+1]).length; + if (end > start || (end == start && (end & 1))) + s[numS++] = (i+1)*charSize; + } + } + if (numS == 0 || t.idx + s[numS-1] > n_length) + goto L_StopThread; + auto chars = set.length; + if (chars > charsetThreshold) + goto L_StopThread; + foreach (ch; set.byCodepoint) + { + //avoid surrogate pairs + if (0xD800 <= ch && ch <= 0xDFFF) + continue; + t.add(ch); + } + for (uint i = 0; i < numS; i++) + { + auto tx = fork(t, t.pc + IRL!(IR.CodepointSet), t.counter); + tx.advance(s[i]); + trs ~= tx; + } + if (!trs.empty) + t = fetch(trs); + else + goto L_StopThread; + break; + case IR.Any: + goto L_StopThread; + + case IR.GotoEndOr: + t.pc += IRL!(IR.GotoEndOr)+re.ir[t.pc].data; + assert(re.ir[t.pc].code == IR.OrEnd); + goto case; + case IR.OrEnd: + auto slot = re.ir[t.pc+1].raw+t.counter; + auto val = hash(t.tab); + if (val in merge[slot]) + goto L_StopThread; // merge equivalent + merge[slot][val] = true; + t.pc += IRL!(IR.OrEnd); + break; + case IR.OrStart: + t.pc += IRL!(IR.OrStart); + goto case; + case IR.Option: + uint next = t.pc + re.ir[t.pc].data + IRL!(IR.Option); + //queue next Option + if (re.ir[next].code == IR.Option) + { + trs ~= fork(t, next, t.counter); + } + t.pc += IRL!(IR.Option); + break; + case IR.RepeatStart:case IR.RepeatQStart: + t.pc += IRL!(IR.RepeatStart)+re.ir[t.pc].data; + goto case IR.RepeatEnd; + case IR.RepeatEnd: + case IR.RepeatQEnd: + auto slot = re.ir[t.pc+1].raw+t.counter; + auto val = hash(t.tab); + if (val in merge[slot]) + goto L_StopThread; // merge equivalent + merge[slot][val] = true; + uint len = re.ir[t.pc].data; + uint step = re.ir[t.pc+2].raw; + uint min = re.ir[t.pc+3].raw; + if (t.counter < min) + { + t.counter += step; + t.pc -= len; + break; + } + uint max = re.ir[t.pc+4].raw; + if (t.counter < max) + { + trs ~= fork(t, t.pc - len, t.counter + step); + t.counter = t.counter%step; + t.pc += IRL!(IR.RepeatEnd); + } + else + { + t.counter = t.counter%step; + t.pc += IRL!(IR.RepeatEnd); + } + break; + case IR.InfiniteStart, IR.InfiniteQStart: + t.pc += re.ir[t.pc].data + IRL!(IR.InfiniteStart); + goto case IR.InfiniteEnd; //both Q and non-Q + case IR.InfiniteEnd: + case IR.InfiniteQEnd: + auto slot = re.ir[t.pc+1].raw+t.counter; + auto val = hash(t.tab); + if (val in merge[slot]) + goto L_StopThread; // merge equivalent + merge[slot][val] = true; + uint len = re.ir[t.pc].data; + uint pc1, pc2; //branches to take in priority order + if (++t.hops == 32) + goto L_StopThread; + pc1 = t.pc + IRL!(IR.InfiniteEnd); + pc2 = t.pc - len; + trs ~= fork(t, pc2, t.counter); + t.pc = pc1; + break; + case IR.GroupStart, IR.GroupEnd: + t.pc += IRL!(IR.GroupStart); + break; + case IR.Bof, IR.Bol, IR.Wordboundary, IR.Notwordboundary: + t.pc += IRL!(IR.Bol); + break; + case IR.LookaheadStart, IR.NeglookaheadStart, IR.LookbehindStart, IR.NeglookbehindStart: + t.pc += IRL!(IR.LookaheadStart) + IRL!(IR.LookaheadEnd) + re.ir[t.pc].data; + break; + default: + L_StopThread: + assert(re.ir[t.pc].code >= 0x80, text(re.ir[t.pc].code)); + debug (fred_search) writeln("ShiftOr stumbled on ",re.ir[t.pc].mnemonic); + n_length = std.algorithm.comparison.min(t.idx, n_length); + break L_Eval_Thread; + } + } + if (trs.empty) + break; + t = fetch(trs); + } + debug(std_regex_search) + { + writeln("Min length: ", n_length); + } + } + + @property bool empty() const { return n_length == 0; } + + @property uint length() const{ return n_length/charSize; } + + // lookup compatible bit pattern in haystack, return starting index + // has a useful trait: if supplied with valid UTF indexes, + // returns only valid UTF indexes + // (that given the haystack in question is valid UTF string) + @trusted size_t search(const(Char)[] haystack, size_t idx) + {//@BUG: apparently assumes little endian machines + import std.conv : text; + import core.stdc.string : memchr; + assert(!empty); + auto p = cast(const(ubyte)*)(haystack.ptr+idx); + uint state = uint.max; + uint limit = 1u<<(n_length - 1u); + debug(std_regex_search) writefln("Limit: %32b",limit); + if (fChar != uint.max) + { + const(ubyte)* end = cast(ubyte*)(haystack.ptr + haystack.length); + const orginalAlign = cast(size_t)p & (Char.sizeof-1); + while (p != end) + { + if (!~state) + {//speed up seeking first matching place + for (;;) + { + assert(p <= end, text(p," vs ", end)); + p = cast(ubyte*)memchr(p, fChar, end - p); + if (!p) + return haystack.length; + if ((cast(size_t)p & (Char.sizeof-1)) == orginalAlign) + break; + if (++p == end) + return haystack.length; + } + state = ~1u; + assert((cast(size_t)p & (Char.sizeof-1)) == orginalAlign); + static if (charSize == 3) + { + state = (state<<1) | table[p[1]]; + state = (state<<1) | table[p[2]]; + p += 4; + } + else + p++; + //first char is tested, see if that's all + if (!(state & limit)) + return (p-cast(ubyte*)haystack.ptr)/Char.sizeof + -length; + } + else + {//have some bits/states for possible matches, + //use the usual shift-or cycle + static if (charSize == 3) + { + state = (state<<1) | table[p[0]]; + state = (state<<1) | table[p[1]]; + state = (state<<1) | table[p[2]]; + p += 4; + } + else + { + state = (state<<1) | table[p[0]]; + p++; + } + if (!(state & limit)) + return (p-cast(ubyte*)haystack.ptr)/Char.sizeof + -length; + } + debug(std_regex_search) writefln("State: %32b", state); + } + } + else + { + //normal path, partially unrolled for char/wchar + static if (charSize == 3) + { + const(ubyte)* end = cast(ubyte*)(haystack.ptr + haystack.length); + while (p != end) + { + state = (state<<1) | table[p[0]]; + state = (state<<1) | table[p[1]]; + state = (state<<1) | table[p[2]]; + p += 4; + if (!(state & limit))//division rounds down for dchar + return (p-cast(ubyte*)haystack.ptr)/Char.sizeof + -length; + } + } + else + { + auto len = cast(ubyte*)(haystack.ptr + haystack.length) - p; + size_t i = 0; + if (len & 1) + { + state = (state<<1) | table[p[i++]]; + if (!(state & limit)) + return idx+i/Char.sizeof-length; + } + while (i < len) + { + state = (state<<1) | table[p[i++]]; + if (!(state & limit)) + return idx+i/Char.sizeof + -length; + state = (state<<1) | table[p[i++]]; + if (!(state & limit)) + return idx+i/Char.sizeof + -length; + debug(std_regex_search) writefln("State: %32b", state); + } + } + } + return haystack.length; + } + + @system debug static void dump(uint[] table) + {//@@@BUG@@@ writef(ln) is @system + import std.stdio : writefln; + for (size_t i = 0; i < table.length; i += 4) + { + writefln("%32b %32b %32b %32b",table[i], table[i+1], table[i+2], table[i+3]); + } + } +} + +unittest +{ + import std.conv, std.regex; + @trusted void test_fixed(alias Kick)() + { + foreach (i, v; AliasSeq!(char, wchar, dchar)) + { + alias Char = v; + alias String = immutable(v)[]; + auto r = regex(to!String(`abc$`)); + auto kick = Kick!Char(r, new uint[256]); + assert(kick.length == 3, text(Kick.stringof," ",v.stringof, " == ", kick.length)); + auto r2 = regex(to!String(`(abc){2}a+`)); + kick = Kick!Char(r2, new uint[256]); + assert(kick.length == 7, text(Kick.stringof,v.stringof," == ", kick.length)); + auto r3 = regex(to!String(`\b(a{2}b{3}){2,4}`)); + kick = Kick!Char(r3, new uint[256]); + assert(kick.length == 10, text(Kick.stringof,v.stringof," == ", kick.length)); + auto r4 = regex(to!String(`\ba{2}c\bxyz`)); + kick = Kick!Char(r4, new uint[256]); + assert(kick.length == 6, text(Kick.stringof,v.stringof, " == ", kick.length)); + auto r5 = regex(to!String(`\ba{2}c\b`)); + kick = Kick!Char(r5, new uint[256]); + size_t x = kick.search("aabaacaa", 0); + assert(x == 3, text(Kick.stringof,v.stringof," == ", kick.length)); + x = kick.search("aabaacaa", x+1); + assert(x == 8, text(Kick.stringof,v.stringof," == ", kick.length)); + } + } + @trusted void test_flex(alias Kick)() + { + foreach (i, v; AliasSeq!(char, wchar, dchar)) + { + alias Char = v; + alias String = immutable(v)[]; + auto r = regex(to!String(`abc[a-z]`)); + auto kick = Kick!Char(r, new uint[256]); + auto x = kick.search(to!String("abbabca"), 0); + assert(x == 3, text("real x is ", x, " ",v.stringof)); + + auto r2 = regex(to!String(`(ax|bd|cdy)`)); + String s2 = to!String("abdcdyabax"); + kick = Kick!Char(r2, new uint[256]); + x = kick.search(s2, 0); + assert(x == 1, text("real x is ", x)); + x = kick.search(s2, x+1); + assert(x == 3, text("real x is ", x)); + x = kick.search(s2, x+1); + assert(x == 8, text("real x is ", x)); + auto rdot = regex(to!String(`...`)); + kick = Kick!Char(rdot, new uint[256]); + assert(kick.length == 0); + auto rN = regex(to!String(`a(b+|c+)x`)); + kick = Kick!Char(rN, new uint[256]); + assert(kick.length == 3, to!string(kick.length)); + assert(kick.search("ababx",0) == 2); + assert(kick.search("abaacba",0) == 3);//expected inexact + + } + } + test_fixed!(ShiftOr)(); + test_flex!(ShiftOr)(); +} + +alias Kickstart = ShiftOr; diff --git a/std/regex/internal/parser.d b/std/regex/internal/parser.d index 6319c860091..49f6b45573f 100644 --- a/std/regex/internal/parser.d +++ b/std/regex/internal/parser.d @@ -4,14 +4,13 @@ */ module std.regex.internal.parser; -import std.regex.internal.ir, std.regex.internal.shiftor, - std.regex.internal.bitnfa; +import std.regex.internal.ir; import std.range.primitives, std.uni, std.meta, - std.traits, std.typecons, std.exception, std.range; + std.traits, std.typecons, std.exception; static import std.ascii; // package relevant info from parser into a regex object -auto makeRegex(S, CG)(Parser!(S, CG) p) pure +auto makeRegex(S, CG)(Parser!(S, CG) p) { Regex!(BasicElementOf!S) re; auto g = p.g; @@ -22,10 +21,7 @@ auto makeRegex(S, CG)(Parser!(S, CG) p) pure ngroup = g.ngroup; maxCounterDepth = g.counterDepth; flags = p.re_flags; - charsets = g.charsets - .map!(x => - x.byInterval.map!(x=>Interval(x.a,x.b)).array - ).array; + charsets = g.charsets; matchers = g.matchers; backrefed = g.backrefed; re.postprocess(); @@ -80,6 +76,87 @@ unittest assert(nc.equal(cp[1 .. $ - 1])); } + +@trusted void reverseBytecode()(Bytecode[] code) +{ + Bytecode[] rev = new Bytecode[code.length]; + uint revPc = cast(uint)rev.length; + Stack!(Tuple!(uint, uint, uint)) stack; + uint start = 0; + uint end = cast(uint)code.length; + for (;;) + { + for (uint pc = start; pc < end; ) + { + immutable len = code[pc].length; + if (code[pc].code == IR.GotoEndOr) + break; //pick next alternation branch + if (code[pc].isAtom) + { + rev[revPc - len .. revPc] = code[pc .. pc + len]; + revPc -= len; + pc += len; + } + else if (code[pc].isStart || code[pc].isEnd) + { + //skip over other embedded lookbehinds they are reversed + if (code[pc].code == IR.LookbehindStart + || code[pc].code == IR.NeglookbehindStart) + { + immutable blockLen = len + code[pc].data + + code[pc].pairedLength; + rev[revPc - blockLen .. revPc] = code[pc .. pc + blockLen]; + pc += blockLen; + revPc -= blockLen; + continue; + } + immutable second = code[pc].indexOfPair(pc); + immutable secLen = code[second].length; + rev[revPc - secLen .. revPc] = code[second .. second + secLen]; + revPc -= secLen; + if (code[pc].code == IR.OrStart) + { + //we pass len bytes forward, but secLen in reverse + immutable revStart = revPc - (second + len - secLen - pc); + uint r = revStart; + uint i = pc + IRL!(IR.OrStart); + while (code[i].code == IR.Option) + { + if (code[i - 1].code != IR.OrStart) + { + assert(code[i - 1].code == IR.GotoEndOr); + rev[r - 1] = code[i - 1]; + } + rev[r] = code[i]; + auto newStart = i + IRL!(IR.Option); + auto newEnd = newStart + code[i].data; + auto newRpc = r + code[i].data + IRL!(IR.Option); + if (code[newEnd].code != IR.OrEnd) + { + newRpc--; + } + stack.push(tuple(newStart, newEnd, newRpc)); + r += code[i].data + IRL!(IR.Option); + i += code[i].data + IRL!(IR.Option); + } + pc = i; + revPc = revStart; + assert(code[pc].code == IR.OrEnd); + } + else + pc += len; + } + } + if (stack.empty) + break; + start = stack.top[0]; + end = stack.top[1]; + revPc = stack.top[2]; + stack.pop(); + } + code[] = rev[]; +} + //test if a given string starts with hex number of maxDigit that's a valid codepoint //returns it's value and skips these maxDigit chars on success, throws on failure dchar parseUniHex(Char)(ref Char[] str, size_t maxDigit) @@ -104,7 +181,7 @@ dchar parseUniHex(Char)(ref Char[] str, size_t maxDigit) return val; } -@safe unittest +@system unittest //BUG canFind is system { import std.algorithm.searching : canFind; string[] non_hex = [ "000j", "000z", "FffG", "0Z"]; @@ -134,7 +211,7 @@ auto caseEnclose(CodepointSet set) /+ fetch codepoint set corresponding to a name (InBlock or binary property) +/ -@trusted CodepointSet getUnicodeSet(in char[] name, bool negated, bool casefold) pure +@trusted CodepointSet getUnicodeSet(in char[] name, bool negated, bool casefold) { CodepointSet s = unicode(name); //FIXME: caseEnclose for new uni as Set | CaseEnclose(SET && LC) @@ -145,9 +222,35 @@ auto caseEnclose(CodepointSet set) return s; } +//basic stack, just in case it gets used anywhere else then Parser +@trusted struct Stack(T) +{ + T[] data; + @property bool empty(){ return data.empty; } + + @property size_t length(){ return data.length; } + + void push(T val){ data ~= val; } + + T pop() + { + assert(!empty); + auto val = data[$ - 1]; + data = data[0 .. $ - 1]; + if (!__ctfe) + cast(void)data.assumeSafeAppend(); + return val; + } + + @property ref T top() + { + assert(!empty); + return data[$ - 1]; + } +} + struct CodeGen { -pure: Bytecode[] ir; // resulting bytecode Stack!(uint) fixupStack; // stack of opened start instructions NamedGroup[] dict; // maps name -> user group number @@ -232,7 +335,7 @@ pure: } if (ivals.length*2 > maxCharsetUsed) { - auto t = CharMatcher(set); + auto t = getMatcher(set); put(Bytecode(IR.Trie, cast(uint)matchers.length)); matchers ~= t; debug(std_regex_allocation) writeln("Trie generated"); @@ -513,7 +616,6 @@ enum infinite = ~0u; struct Parser(R, Generator) if (isForwardRange!R && is(ElementType!R : dchar)) { -pure: dchar _current; bool empty; R pat, origin; //keep full pattern for pretty printing error messages @@ -626,6 +728,8 @@ pure: while (!empty) { + debug(std_regex_parser) + __ctfe || writeln("*LR*\nSource: ", pat, "\nStack: ",fixupStack.data); switch (current) { case '(': @@ -1380,13 +1484,11 @@ pure: if (current >= privateUseStart && current <= privateUseEnd) { g.endPattern(current - privateUseStart + 1); + break; } - else - { - auto op = Bytecode(IR.Char, current); - g.put(op); - } + auto op = Bytecode(IR.Char, current); next(); + g.put(op); } } @@ -1440,7 +1542,7 @@ pure: /+ Postproces the IR, then optimize. +/ -@trusted void postprocess(Char)(ref Regex!Char zis) pure +@trusted void postprocess(Char)(ref Regex!Char zis) {//@@@BUG@@@ write is @system with(zis) { @@ -1501,16 +1603,8 @@ pure: } } checkIfOneShot(); - if (!(flags & RegexInfo.oneShot) && !__ctfe) - { - kickstart = new ShiftOr!Char(zis); - if (kickstart.empty) - { - kickstart = new BitMatcher!Char(zis); - if (kickstart.empty) - kickstart = null; - } - } + if (!(flags & RegexInfo.oneShot)) + kickstart = Kickstart!Char(zis, new uint[](256)); debug(std_regex_allocation) writefln("IR processed, max threads: %d", threadCount); optimize(zis); } @@ -1560,7 +1654,7 @@ void fixupBytecode()(Bytecode[] ir) assert(fixups.empty); } -void optimize(Char)(ref Regex!Char zis) pure +void optimize(Char)(ref Regex!Char zis) { import std.array : insertInPlace; CodepointSet nextSet(uint idx) @@ -1577,7 +1671,7 @@ void optimize(Char)(ref Regex!Char zis) pure goto default; //TODO: OrChar case Trie, CodepointSet: - set = .CodepointSet(zis.charsets[ir[i].data]); + set = zis.charsets[ir[i].data]; goto default; case GroupStart,GroupEnd: break; diff --git a/std/regex/internal/tests.d b/std/regex/internal/tests.d index 4f52f819c5d..a098fcc431c 100644 --- a/std/regex/internal/tests.d +++ b/std/regex/internal/tests.d @@ -8,6 +8,8 @@ package(std.regex): import std.conv, std.exception, std.meta, std.range, std.typecons, std.regex; +import std.regex.internal.parser : Escapables; // characters that need escaping + alias Sequence(int B, int E) = staticIota!(B, E); unittest @@ -313,7 +315,6 @@ unittest TestVectors( `\b[A-Za-z0-9.]+(?=(@(?!gmail)))`, "a@gmail,x@com", "y", "$&-$1", "x-@"), TestVectors( `x()(abc)(?=(d)(e)(f)\2)`, "xabcdefabc", "y", "$&", "xabc"), TestVectors( `x()(abc)(?=(d)(e)(f)()\3\4\5)`, "xabcdefdef", "y", "$&", "xabc"), - //lookback TestVectors( `(?<=(ab))\d`, "12ba3ab4", "y", "$&-$1", "4-ab", "i"), TestVectors( `\w(?"); + assert(bmatch("texttext", greed).hit + == "text"); +} + +unittest +{ + import std.algorithm.comparison : equal; + auto cr8 = ctRegex!("^(a)(b)?(c*)"); + auto m8 = bmatch("abcc",cr8); + assert(m8); + assert(m8.captures[1] == "a"); + assert(m8.captures[2] == "b"); + assert(m8.captures[3] == "cc"); + auto cr9 = ctRegex!("q(a|b)*q"); + auto m9 = match("xxqababqyy",cr9); + assert(m9); + assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"])); +} + +unittest +{ + import std.algorithm.comparison : equal; + auto rtr = regex("a|b|c"); + enum ctr = regex("a|b|c"); + assert(equal(rtr.ir,ctr.ir)); + //CTFE parser BUG is triggered by group + //in the middle of alternation (at least not first and not last) + enum testCT = regex(`abc|(edf)|xyz`); + auto testRT = regex(`abc|(edf)|xyz`); + assert(equal(testCT.ir,testRT.ir)); +} + +unittest +{ + import std.algorithm.iteration : map; + import std.algorithm.comparison : equal; + enum cx = ctRegex!"(A|B|C)"; + auto mx = match("B",cx); + assert(mx); + assert(equal(mx.captures, [ "B", "B"])); + enum cx2 = ctRegex!"(A|B)*"; + assert(match("BAAA",cx2)); + + enum cx3 = ctRegex!("a{3,4}","i"); + auto mx3 = match("AaA",cx3); + assert(mx3); + assert(mx3.captures[0] == "AaA"); + enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i"); + auto mx4 = match("aaaabc", cx4); + assert(mx4); + assert(mx4.captures[0] == "aaaab"); + auto cr8 = ctRegex!("(a)(b)?(c*)"); + auto m8 = bmatch("abcc",cr8); + assert(m8); + assert(m8.captures[1] == "a"); + assert(m8.captures[2] == "b"); + assert(m8.captures[3] == "cc"); + auto cr9 = ctRegex!(".*$", "gm"); + auto m9 = match("First\rSecond", cr9); + assert(m9); + assert(equal(map!"a.hit"(m9), ["First", "", "Second"])); +} + +unittest +{ + import std.algorithm.iteration : map; + import std.algorithm.comparison : equal; +//global matching + void test_body(alias matchFn)() + { + string s = "a quick brown fox jumps over a lazy dog"; + auto r1 = regex("\\b[a-z]+\\b","g"); + string[] test; + foreach (m; matchFn(s, r1)) + test ~= m.hit; + assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"])); + auto free_reg = regex(` + + abc + \s+ + " + ( + [^"]+ + | \\ " + )+ + " + z + `, "x"); + auto m = match(`abc "quoted string with \" inside"z`,free_reg); + assert(m); + string mails = " hey@you.com no@spam.net "; + auto rm = regex(`@(?<=\S+@)\S+`,"g"); + assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"])); + auto m2 = matchFn("First line\nSecond line",regex(".*$","gm")); + assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"])); + auto m2a = matchFn("First line\nSecond line",regex(".+$","gm")); + assert(equal(map!"a[0]"(m2a), ["First line", "Second line"])); + auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm")); + assert(equal(map!"a[0]"(m2b), ["First line", "Second line"])); + debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!"); + } + test_body!bmatch(); + test_body!match(); +} + +//tests for accumulated std.regex issues and other regressions +unittest +{ + import std.algorithm.iteration : map; + import std.algorithm.comparison : equal; + void test_body(alias matchFn)() + { + //issue 5857 + //matching goes out of control if ... in (...){x} has .*/.+ + auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures; + assert(c[0] == "axxxzayyyyyzd"); + assert(c[1] == "ayyyyyz"); + auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures; + assert(c2[0] == "axxxayyyyyd"); + assert(c2[1] == "ayyyyy"); + //issue 2108 + //greedy vs non-greedy + auto nogreed = regex(""); + assert(matchFn("texttext", nogreed).hit + == "text"); + auto greed = regex(""); + assert(matchFn("texttext", greed).hit + == "texttext"); + //issue 4574 + //empty successful match still advances the input + string[] pres, posts, hits; + foreach (m; matchFn("abcabc", regex("","g"))) + { + pres ~= m.pre; + posts ~= m.post; + assert(m.hit.empty); + + } + auto heads = [ + "abcabc", + "abcab", + "abca", + "abc", + "ab", + "a", + "" + ]; + auto tails = [ + "abcabc", + "bcabc", + "cabc", + "abc", + "bc", + "c", + "" + ]; + assert(pres == array(retro(heads))); + assert(posts == tails); + //issue 6076 + //regression on .* + auto re = regex("c.*|d"); + auto m = matchFn("mm", re); + assert(!m); + debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!"); + auto rprealloc = regex(`((.){5}.{1,10}){5}`); + auto arr = array(repeat('0',100)); + auto m2 = matchFn(arr, rprealloc); + assert(m2); + assert(collectException( + regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$") + ) is null); + foreach (ch; [Escapables]) + { + assert(match(to!string(ch),regex(`[\`~ch~`]`))); + assert(!match(to!string(ch),regex(`[^\`~ch~`]`))); + assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`))); + } + //bugzilla 7718 + string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'"; + auto reStrCmd = regex (`(".*")|('.*')`, "g"); + assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)), + [`"/GIT/Ruby Apps/sec"`, `'notimer'`])); + } + test_body!bmatch(); + test_body!match(); +} + +// tests for replace +unittest +{ + void test(alias matchFn)() + { + import std.uni : toUpper; + + foreach (i, v; AliasSeq!(string, wstring, dstring)) + { + auto baz(Cap)(Cap m) + if (is(Cap == Captures!(Cap.String))) + { + return toUpper(m.hit); + } + alias String = v; + assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c")) + == to!String("ack rapacity")); + assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c")) + == to!String("ack capacity")); + assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]")) + == to!String("[n]oon")); + assert(std.regex.replace!(matchFn)( + to!String("test1 test2"), regex(to!String(`\w+`),"g"), to!String("$`:$'") + ) == to!String(": test2 test1 :")); + auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."), + regex(to!String("[ar]"), "g")); + assert(s == "StRAp A Rocket engine on A chicken."); + } + debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~" !!!"); + } + test!(bmatch)(); + test!(match)(); +} + +// tests for splitter +unittest +{ + import std.algorithm.comparison : equal; + auto s1 = ", abc, de, fg, hi, "; + auto sp1 = splitter(s1, regex(", *")); + auto w1 = ["", "abc", "de", "fg", "hi", ""]; + assert(equal(sp1, w1)); + + auto s2 = ", abc, de, fg, hi"; + auto sp2 = splitter(s2, regex(", *")); + auto w2 = ["", "abc", "de", "fg", "hi"]; + + uint cnt; + foreach (e; sp2) + { + assert(w2[cnt++] == e); + } + assert(equal(sp2, w2)); +} + +unittest +{ + char[] s1 = ", abc, de, fg, hi, ".dup; + auto sp2 = splitter(s1, regex(", *")); +} + +unittest +{ + import std.algorithm.comparison : equal; + auto s1 = ", abc, de, fg, hi, "; + auto w1 = ["", "abc", "de", "fg", "hi", ""]; + assert(equal(split(s1, regex(", *")), w1[])); +} + +unittest +{ // bugzilla 7141 + string pattern = `[a\--b]`; + assert(match("-", pattern)); + assert(match("b", pattern)); + string pattern2 = `[&-z]`; + assert(match("b", pattern2)); +} +unittest +{//bugzilla 7111 + assert(match("", regex("^"))); +} +unittest +{//bugzilla 7300 + assert(!match("a"d, "aa"d)); +} + +// bugzilla 7551 +unittest +{ + auto r = regex("[]abc]*"); + assert("]ab".matchFirst(r).hit == "]ab"); + assertThrown(regex("[]")); + auto r2 = regex("[]abc--ab]*"); + assert("]ac".matchFirst(r2).hit == "]"); +} + +unittest +{//bugzilla 7674 + assert("1234".replace(regex("^"), "$$") == "$1234"); + assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?"); + assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?"); +} +unittest +{// bugzilla 7679 + import std.algorithm.comparison : equal; + foreach (S; AliasSeq!(string, wstring, dstring)) + (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 + enum re = ctRegex!(to!S(r"\.")); + auto str = to!S("a.b"); + assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")])); + assert(split(str, re) == [to!S("a"), to!S("b")]); + }(); +} +unittest +{//bugzilla 8203 + string data = " + NAME = XPAW01_STA:STATION + NAME = XPAW01_STA + "; + auto uniFileOld = data; + auto r = regex( + r"^NAME = (?P[a-zA-Z0-9_]+):*(?P[a-zA-Z0-9_]*)","gm"); + auto uniCapturesNew = match(uniFileOld, r); + for (int i = 0; i < 20; i++) + foreach (matchNew; uniCapturesNew) {} + //a second issue with same symptoms + auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`); + match("аллея Театральная", r2); +} +unittest +{// bugzilla 8637 purity of enforce + auto m = match("hello world", regex("world")); + enforce(m); +} + +// bugzilla 8725 +unittest +{ + static italic = regex( r"\* + (?!\s+) + (.*?) + (?!\s+) + \*", "gx" ); + string input = "this * is* interesting, *very* interesting"; + assert(replace(input, italic, "$1") == + "this * is* interesting, very interesting"); +} + +// bugzilla 8349 +unittest +{ + enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)"; + enum peakRegex = ctRegex!(peakRegexStr); + //note that the regex pattern itself is probably bogus + assert(match(r"\>wgEncode-blah-Tfbs.narrow", peakRegex)); +} + +// bugzilla 9211 +unittest +{ + import std.algorithm.comparison : equal; + auto rx_1 = regex(r"^(\w)*(\d)"); + auto m = match("1234", rx_1); + assert(equal(m.front, ["1234", "3", "4"])); + auto rx_2 = regex(r"^([0-9])*(\d)"); + auto m2 = match("1234", rx_2); + assert(equal(m2.front, ["1234", "3", "4"])); +} + +// bugzilla 9280 +unittest +{ + string tomatch = "a!b@c"; + static r = regex(r"^(?P.*?)!(?P.*?)@(?P.*?)$"); + auto nm = match(tomatch, r); + assert(nm); + auto c = nm.captures; + assert(c[1] == "a"); + assert(c["nick"] == "a"); +} + + +// bugzilla 9579 +unittest +{ + char[] input = ['a', 'b', 'c']; + string format = "($1)"; + // used to give a compile error: + auto re = regex(`(a)`, "g"); + auto r = replace(input, re, format); + assert(r == "(a)bc"); +} + +// bugzilla 9634 +unittest +{ + auto re = ctRegex!"(?:a+)"; + assert(match("aaaa", re).hit == "aaaa"); +} + +//bugzilla 10798 +unittest +{ + auto cr = ctRegex!("[abcd--c]*"); + auto m = "abc".match(cr); + assert(m); + assert(m.hit == "ab"); +} + +// bugzilla 10913 +unittest +{ + @system static string foo(const(char)[] s) + { + return s.dup; + } + @safe static string bar(const(char)[] s) + { + return s.dup; + } + () @system { + replace!((a) => foo(a.hit))("blah", regex(`a`)); + }(); + () @safe { + replace!((a) => bar(a.hit))("blah", regex(`a`)); + }(); +} + +// bugzilla 11262 +unittest +{ + enum reg = ctRegex!(r",", "g"); + auto str = "This,List"; + str = str.replace(reg, "-"); + assert(str == "This-List"); +} + +// bugzilla 11775 +unittest +{ + assert(collectException(regex("a{1,0}"))); +} + +// bugzilla 11839 +unittest +{ + import std.algorithm.comparison : equal; + assert(regex(`(?P\w+)`).namedCaptures.equal(["var1"])); + assert(collectException(regex(`(?P<1>\w+)`))); + assert(regex(`(?P\w+)`).namedCaptures.equal(["v1"])); + assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"])); + assert(regex(`(?P<я>\w+)`).namedCaptures.equal(["я"])); +} + +// bugzilla 12076 +unittest +{ + auto RE = ctRegex!(r"(?abc)`); + assert(collectException("abc".matchFirst(r)["b"])); +} + +// bugzilla 12691 +unittest +{ + assert(bmatch("e@", "^([a-z]|)*$").empty); + assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty); +} + +//bugzilla 12713 +unittest +{ + assertThrown(regex("[[a-z]([a-z]|(([[a-z])))")); +} + +//bugzilla 12747 +unittest +{ + assertThrown(regex(`^x(\1)`)); + assertThrown(regex(`^(x(\1))`)); + assertThrown(regex(`^((x)(?=\1))`)); +} + +// bugzilla 14504 +unittest +{ + auto p = ctRegex!("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?" ~ + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); +} + +// bugzilla 14529 +unittest +{ + auto ctPat2 = regex(r"^[CDF]$", "i"); + foreach (v; ["C", "c", "D", "d", "F", "f"]) + assert(matchAll(v, ctPat2).front.hit == v); +} + +// bugzilla 14615 +unittest +{ + import std.stdio : writeln; + import std.regex : replaceFirst, replaceFirstInto, regex; + import std.array : appender; + + auto example = "Hello, world!"; + auto pattern = regex("^Hello, (bug)"); // won't find this one + auto result = replaceFirst(example, pattern, "$1 Sponge Bob"); + assert(result == "Hello, world!"); // Ok. + + auto sink = appender!string; + replaceFirstInto(sink, example, pattern, "$1 Sponge Bob"); + assert(sink.data == "Hello, world!"); + replaceAllInto(sink, example, pattern, "$1 Sponge Bob"); + assert(sink.data == "Hello, world!Hello, world!"); +} + +// bugzilla 15573 +unittest +{ + auto rx = regex("[c d]", "x"); + assert("a b".matchFirst(rx)); +} + +// bugzilla 15864 +unittest +{ + regex(`("); - assert(bmatch("texttext", greed).hit - == "text"); -} - -unittest -{ - auto cr8 = ctRegex!("^(a)(b)?(c*)"); - auto m8 = bmatch("abcc",cr8); - assert(m8); - assert(m8.captures[1] == "a"); - assert(m8.captures[2] == "b"); - assert(m8.captures[3] == "cc"); - auto cr9 = ctRegex!("q(a|b)*q"); - auto m9 = match("xxqababqyy",cr9); - assert(m9); - assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"])); -} - -unittest -{ - auto rtr = regex("a|b|c"); - const ctr = regex("a|b|c"); - assert(equal(rtr.ir,ctr.ir)); - //CTFE parser BUG is triggered by group - //in the middle of alternation (at least not first and not last) - const testCT = regex(`abc|(edf)|xyz`); - auto testRT = regex(`abc|(edf)|xyz`); - assert(equal(testCT.ir,testRT.ir)); -} - -unittest -{ - immutable cx = ctRegex!"(A|B|C)"; - auto mx = match("B",cx); - assert(mx); - assert(equal(mx.captures, [ "B", "B"])); - immutable cx2 = ctRegex!"(A|B)*"; - assert(match("BAAA",cx2)); - - immutable cx3 = ctRegex!("a{3,4}","i"); - auto mx3 = match("AaA",cx3); - assert(mx3); - assert(mx3.captures[0] == "AaA"); - immutable cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i"); - auto mx4 = match("aaaabc", cx4); - assert(mx4); - assert(mx4.captures[0] == "aaaab"); - auto cr8 = ctRegex!("(a)(b)?(c*)"); - auto m8 = bmatch("abcc",cr8); - assert(m8); - assert(m8.captures[1] == "a"); - assert(m8.captures[2] == "b"); - assert(m8.captures[3] == "cc"); - auto cr9 = ctRegex!(".*$", "gm"); - auto m9 = match("First\rSecond", cr9); - assert(m9); - assert(equal(map!"a.hit"(m9), ["First", "", "Second"])); -} - -unittest -{ -//global matching - void test_body(alias matchFn)() - { - string s = "a quick brown fox jumps over a lazy dog"; - auto r1 = regex("\\b[a-z]+\\b","g"); - string[] test; - foreach (m; matchFn(s, r1)) - test ~= m.hit; - assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"])); - auto free_reg = regex(` - - abc - \s+ - " - ( - [^"]+ - | \\ " - )+ - " - z - `, "x"); - auto m = match(`abc "quoted string with \" inside"z`,free_reg); - assert(m); - string mails = " hey@you.com no@spam.net "; - auto rm = regex(`@(?<=\S+@)\S+`,"g"); - assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"])); - auto m2 = matchFn("First line\nSecond line",regex(".*$","gm")); - assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"])); - auto m2a = matchFn("First line\nSecond line",regex(".+$","gm")); - assert(equal(map!"a[0]"(m2a), ["First line", "Second line"])); - auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm")); - assert(equal(map!"a[0]"(m2b), ["First line", "Second line"])); - debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!"); - } - test_body!bmatch(); - test_body!match(); -} - -//tests for accumulated std.regex issues and other regressions -unittest -{ - void test_body(alias matchFn)() - { - //issue 5857 - //matching goes out of control if ... in (...){x} has .*/.+ - auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures; - assert(c[0] == "axxxzayyyyyzd"); - assert(c[1] == "ayyyyyz"); - auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures; - assert(c2[0] == "axxxayyyyyd"); - assert(c2[1] == "ayyyyy"); - //issue 2108 - //greedy vs non-greedy - auto nogreed = regex(""); - assert(matchFn("texttext", nogreed).hit - == "text"); - auto greed = regex(""); - assert(matchFn("texttext", greed).hit - == "texttext"); - //issue 4574 - //empty successful match still advances the input - string[] pres, posts, hits; - foreach (m; matchFn("abcabc", regex("","g"))) - { - pres ~= m.pre; - posts ~= m.post; - assert(m.hit.empty); - - } - auto heads = [ - "abcabc", - "abcab", - "abca", - "abc", - "ab", - "a", - "" - ]; - auto tails = [ - "abcabc", - "bcabc", - "cabc", - "abc", - "bc", - "c", - "" - ]; - assert(pres == array(retro(heads))); - assert(posts == tails); - //issue 6076 - //regression on .* - auto re = regex("c.*|d"); - auto m = matchFn("mm", re); - assert(!m); - debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!"); - auto rprealloc = regex(`((.){5}.{1,10}){5}`); - auto arr = array(repeat('0',100)); - auto m2 = matchFn(arr, rprealloc); - assert(m2); - assert(collectException( - regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$") - ) is null); - foreach (ch; [Escapables]) - { - assert(match(to!string(ch),regex(`[\`~ch~`]`))); - assert(!match(to!string(ch),regex(`[^\`~ch~`]`))); - assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`))); - } - //bugzilla 7718 - string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'"; - auto reStrCmd = regex (`(".*")|('.*')`, "g"); - assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)), - [`"/GIT/Ruby Apps/sec"`, `'notimer'`])); - } - test_body!bmatch(); - test_body!match(); -} - -// tests for replace -unittest -{ - void test(alias matchFn)() - { - import std.uni : toUpper; - - foreach (i, v; AliasSeq!(string, wstring, dstring)) - { - auto baz(Cap)(Cap m) - if (is(Cap == Captures!(Cap.String))) - { - return toUpper(m.hit); - } - alias String = v; - assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c")) - == to!String("ack rapacity")); - assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c")) - == to!String("ack capacity")); - assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]")) - == to!String("[n]oon")); - assert(std.regex.replace!(matchFn)(to!String("test1 test2"), - regex(to!String(`\w+`), "g"), to!String("$`:$'")) == to!String(": test2 test1 :")); - auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."), - regex(to!String("[ar]"), "g")); - assert(s == "StRAp A Rocket engine on A chicken.", text(s)); - } - debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~" !!!"); - } - test!(bmatch)(); - test!(match)(); -} - -// tests for splitter -unittest -{ - auto s1 = ", abc, de, fg, hi, "; - auto sp1 = splitter(s1, regex(", *")); - auto w1 = ["", "abc", "de", "fg", "hi", ""]; - assert(equal(sp1, w1)); - - auto s2 = ", abc, de, fg, hi"; - auto sp2 = splitter(s2, regex(", *")); - auto w2 = ["", "abc", "de", "fg", "hi"]; - - uint cnt; - foreach (e; sp2) - { - assert(w2[cnt++] == e); - } - assert(equal(sp2, w2)); -} - -unittest -{ - char[] s1 = ", abc, de, fg, hi, ".dup; - auto sp2 = splitter(s1, regex(", *")); -} - -unittest -{ - auto s1 = ", abc, de, fg, hi, "; - auto w1 = ["", "abc", "de", "fg", "hi", ""]; - assert(equal(split(s1, regex(", *")), w1[])); -} diff --git a/std/regex/internal/tests3.d b/std/regex/internal/tests3.d deleted file mode 100644 index 9b6bd9e115b..00000000000 --- a/std/regex/internal/tests3.d +++ /dev/null @@ -1,321 +0,0 @@ -/* - Regualar expressions package test suite part 3. -*/ -module std.regex.internal.tests3; - -package(std.regex): - -import std.algorithm, std.conv, std.exception, std.meta, std.range, - std.typecons, std.regex; - -unittest -{ // bugzilla 7141 - string pattern = `[a\--b]`; - assert(match("-", pattern)); - assert(match("b", pattern)); - string pattern2 = `[&-z]`; - assert(match("b", pattern2)); -} -unittest -{//bugzilla 7111 - assert(match("", regex("^"))); -} -unittest -{//bugzilla 7300 - assert(!match("a"d, "aa"d)); -} - -// bugzilla 7551 -unittest -{ - auto r = regex("[]abc]*"); - assert("]ab".matchFirst(r).hit == "]ab"); - assertThrown(regex("[]")); - auto r2 = regex("[]abc--ab]*"); - assert("]ac".matchFirst(r2).hit == "]"); -} - -unittest -{//bugzilla 7674 - assert("1234".replace(regex("^"), "$$") == "$1234"); - assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?"); - assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?"); -} -unittest -{// bugzilla 7679 - foreach (S; AliasSeq!(string, wstring, dstring)) - (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396 - const re = ctRegex!(to!S(r"\.")); - auto str = to!S("a.b"); - assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")])); - assert(split(str, re) == [to!S("a"), to!S("b")]); - }(); -} -unittest -{//bugzilla 8203 - string data = " - NAME = XPAW01_STA:STATION - NAME = XPAW01_STA - "; - auto uniFileOld = data; - auto r = regex( - r"^NAME = (?P[a-zA-Z0-9_]+):*(?P[a-zA-Z0-9_]*)","gm"); - auto uniCapturesNew = match(uniFileOld, r); - for (int i = 0; i < 20; i++) - foreach (matchNew; uniCapturesNew) {} - //a second issue with same symptoms - auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`); - match("аллея Театральная", r2); -} -unittest -{// bugzilla 8637 purity of enforce - auto m = match("hello world", regex("world")); - enforce(m); -} - -// bugzilla 8725 -unittest -{ - static italic = regex( r"\* - (?!\s+) - (.*?) - (?!\s+) - \*", "gx" ); - string input = "this * is* interesting, *very* interesting"; - assert(replace(input, italic, "$1") == - "this * is* interesting, very interesting"); -} - -// bugzilla 8349 -unittest -{ - const peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)"; - const peakRegex = ctRegex!(peakRegexStr); - //note that the regex pattern itself is probably bogus - assert(match(r"\>wgEncode-blah-Tfbs.narrow", peakRegex)); -} - -// bugzilla 9211 -unittest -{ - auto rx_1 = regex(r"^(\w)*(\d)"); - auto m = match("1234", rx_1); - assert(equal(m.front, ["1234", "3", "4"])); - auto rx_2 = regex(r"^([0-9])*(\d)"); - auto m2 = match("1234", rx_2); - assert(equal(m2.front, ["1234", "3", "4"])); -} - -// bugzilla 9280 -unittest -{ - string tomatch = "a!b@c"; - static r = regex(r"^(?P.*?)!(?P.*?)@(?P.*?)$"); - auto nm = match(tomatch, r); - assert(nm); - auto c = nm.captures; - assert(c[1] == "a"); - assert(c["nick"] == "a"); -} - - -// bugzilla 9579 -unittest -{ - char[] input = ['a', 'b', 'c']; - string format = "($1)"; - // used to give a compile error: - auto re = regex(`(a)`, "g"); - auto r = replace(input, re, format); - assert(r == "(a)bc"); -} - -// bugzilla 9634 -unittest -{ - auto re = ctRegex!"(?:a+)"; - assert(match("aaaa", re).hit == "aaaa"); -} - -//bugzilla 10798 -unittest -{ - auto cr = ctRegex!("[abcd--c]*"); - auto m = "abc".match(cr); - assert(m); - assert(m.hit == "ab"); -} - -// bugzilla 10913 -unittest -{ - @system static string foo(const(char)[] s) - { - return s.dup; - } - @safe static string bar(const(char)[] s) - { - return s.dup; - } - () @system { - replace!((a) => foo(a.hit))("blah", regex(`a`)); - }(); - () @safe { - replace!((a) => bar(a.hit))("blah", regex(`a`)); - }(); -} - -// bugzilla 11262 -unittest -{ - const reg = ctRegex!(r",", "g"); - auto str = "This,List"; - str = str.replace(reg, "-"); - assert(str == "This-List"); -} - -// bugzilla 11775 -unittest -{ - assert(collectException(regex("a{1,0}"))); -} - -// bugzilla 11839 -unittest -{ - assert(regex(`(?P\w+)`).namedCaptures.equal(["var1"])); - assert(collectException(regex(`(?P<1>\w+)`))); - assert(regex(`(?P\w+)`).namedCaptures.equal(["v1"])); - assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"])); - assert(regex(`(?P<я>\w+)`).namedCaptures.equal(["я"])); -} - -// bugzilla 12076 -unittest -{ - auto RE = ctRegex!(r"(?abc)`); - assert(collectException("abc".matchFirst(r)["b"])); -} - -// bugzilla 12691 -unittest -{ - assert(bmatch("e@", "^([a-z]|)*$").empty); - assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty); -} - -//bugzilla 12713 -unittest -{ - assertThrown(regex("[[a-z]([a-z]|(([[a-z])))")); -} - -//bugzilla 12747 -unittest -{ - assertThrown(regex(`^x(\1)`)); - assertThrown(regex(`^(x(\1))`)); - assertThrown(regex(`^((x)(?=\1))`)); -} - -// bugzilla 14504 -unittest -{ - auto p = ctRegex!("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?" ~ - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); -} - -// bugzilla 14529 -unittest -{ - auto ctPat2 = regex(r"^[CDF]$", "i"); - foreach (v; ["C", "c", "D", "d", "F", "f"]) - assert(matchAll(v, ctPat2).front.hit == v); -} - -// bugzilla 14615 -unittest -{ - import std.stdio : writeln; - import std.regex : replaceFirst, replaceFirstInto, regex; - import std.array : appender; - - auto example = "Hello, world!"; - auto pattern = regex("^Hello, (bug)"); // won't find this one - auto result = replaceFirst(example, pattern, "$1 Sponge Bob"); - assert(result == "Hello, world!"); // Ok. - - auto sink = appender!string; - replaceFirstInto(sink, example, pattern, "$1 Sponge Bob"); - assert(sink.data == "Hello, world!"); - replaceAllInto(sink, example, pattern, "$1 Sponge Bob"); - assert(sink.data == "Hello, world!Hello, world!"); -} - -// bugzilla 15573 -unittest -{ - auto rx = regex("[c d]", "x"); - assert("a b".matchFirst(rx)); -} - -// bugzilla 15864 -unittest -{ - regex(`( counter + OpFunc[] opCacheTrue; // pointers to Op!(IR.xyz) for each bytecode + OpFunc[] opCacheFalse; // ditto + OpBackFunc[] opCacheBackTrue; // ditto + OpBackFunc[] opCacheBackFalse; // ditto + size_t threadSize; int matched; bool exhausted; - const Kickstart!Char kickstart; - Group!DataIndex[] backrefed; - size_t[size_t] subCounters; // a table of gen counter per sub-engine: PC -> counter static struct State { @@ -806,7 +799,7 @@ template ThompsonOps(E,S, bool withInput:false) bool search() { - if (!s.search(kickstart, front, index)) + if (!s.search(re.kickstart, front, index)) { index = s.lastIndex; return false; @@ -815,23 +808,24 @@ template ThompsonOps(E,S, bool withInput:false) } } - void initExternalMemory(void[] memory, size_t hotspotTableSize) + void initExternalMemory(void[] memory) { - prepareFreeList(threadCount, memory); - if (hotspotTableSize) + threadSize = getThreadSize(re); + prepareFreeList(re.threadCount, memory); + if (re.hotspotTableSize) { - merge = arrayInChunk!(DataIndex)(hotspotTableSize, memory); + merge = arrayInChunk!(DataIndex)(re.hotspotTableSize, memory); merge[] = 0; } - opCacheTrue = arrayInChunk!(OpFunc)(ir.length, memory).ptr; - opCacheFalse = arrayInChunk!(OpFunc)(ir.length, memory).ptr; - opCacheBackTrue = arrayInChunk!(OpBackFunc)(ir.length, memory).ptr; - opCacheBackFalse = arrayInChunk!(OpBackFunc)(ir.length, memory).ptr; + opCacheTrue = arrayInChunk!(OpFunc)(re.ir.length, memory); + opCacheFalse = arrayInChunk!(OpFunc)(re.ir.length, memory); + opCacheBackTrue = arrayInChunk!(OpBackFunc)(re.ir.length, memory); + opCacheBackFalse = arrayInChunk!(OpBackFunc)(re.ir.length, memory); - for (uint pc = 0; pc 1) { auto app = appender!S(); @@ -332,28 +344,10 @@ public alias StaticRegex(Char) = std.regex.internal.ir.StaticRegex!(Char); } else pat = patterns[0]; - return regexImpl!S(pat, flags); -} - -/++ - Compile regular expression pattern for the later execution. - Returns: $(D Regex) object that works on inputs having - the same character width as $(D pattern). - Params: - pattern(s) = Regular expression(s) to match - flags = The _attributes (g, i, m and x accepted) - - Throws: $(D RegexException) if there were any errors during compilation. -+/ -@trusted public auto regex(S)(S[] patterns, const(char)[] flags="") - if (isSomeString!(S)) -{ - import std.functional : memoize; - enum cacheSize = 8; if (__ctfe) - return regexPure(patterns, flags); - return memoize!(regexPure!S, cacheSize)(patterns, flags); + return regexImpl(pat, flags); + return memoize!(regexImpl!S, cacheSize)(pat, flags); } ///ditto @@ -377,7 +371,7 @@ unittest assert(m.front[1] == "12"); } -private auto regexImpl(S)(S pattern, const(char)[] flags="") pure +public auto regexImpl(S)(S pattern, const(char)[] flags="") if (isSomeString!(S)) { import std.regex.internal.parser : Parser, CodeGen; @@ -387,25 +381,19 @@ private auto regexImpl(S)(S pattern, const(char)[] flags="") pure } -private template IsolatedFunc(Char, alias source) +template ctRegexImpl(alias pattern, string flags=[]) { - import std.regex.internal.backtracking; + import std.regex.internal.parser, std.regex.internal.backtracking; + enum r = regex(pattern, flags); + alias Char = BasicElementOf!(typeof(pattern)); + enum source = ctGenRegExCode(r); alias Matcher = BacktrackingMatcher!(true); - @trusted bool IsolatedFunc(ref Matcher!Char matcher) + @trusted bool func(ref Matcher!Char matcher) { debug(std_regex_ctr) pragma(msg, source); mixin(source); } -} - -template ctRegexImpl(alias pattern, string flags=[]) -{ - import std.regex.internal.parser, std.regex.internal.backtracking; - static immutable r = cast(immutable)regexPure([pattern], flags); - alias Char = BasicElementOf!(typeof(pattern)); - enum source = ctGenRegExCode(r); - alias func = IsolatedFunc!(Char, source); - static immutable nr = immutable StaticRegex!Char(r, &func); + enum nr = StaticRegex!Char(r, &func); } /++ @@ -418,7 +406,7 @@ template ctRegexImpl(alias pattern, string flags=[]) pattern = Regular expression flags = The _attributes (g, i, m and x accepted) +/ -public static immutable ctRegex(alias pattern, alias flags=[]) = ctRegexImpl!(pattern, flags).nr; +public enum ctRegex(alias pattern, alias flags=[]) = ctRegexImpl!(pattern, flags).nr; enum isRegexFor(RegEx, R) = is(RegEx == Regex!(BasicElementOf!R)) || is(RegEx == StaticRegex!(BasicElementOf!R)); @@ -448,9 +436,9 @@ private: } uint _f, _b; uint _refcount; // ref count or SMALL MASK + num groups - const NamedGroup[] _names; + NamedGroup[] _names; - this()(R input, uint n, const(NamedGroup)[] named) + this()(R input, uint n, NamedGroup[] named) { _input = input; _names = named; @@ -459,6 +447,16 @@ private: _f = 0; } + this(alias Engine)(ref RegexMatch!(R,Engine) rmatch) + { + _input = rmatch._input; + _names = rmatch._engine.re.dict; + immutable n = rmatch._engine.re.ngroup; + newMatches(n); + _b = n; + _f = 0; + } + @property inout(Group!DataIndex[]) matches() inout { return (_refcount & SMALL_MASK) ? small_matches[0 .. _refcount & 0xFF] : big_matches; @@ -594,9 +592,10 @@ public: assert(matchFirst("abc", "[0-9]+", "[a-z]+").whichPattern == 2); } - /// Lookup named submatch. - unittest - { + /++ + Lookup named submatch. + + --- import std.regex; import std.range; @@ -607,8 +606,8 @@ public: //named groups are unaffected by range primitives assert(c["var"] =="a"); assert(c.front == "42"); - } - + ---- + +/ R opIndex(String)(String i) /*const*/ //@@@BUG@@@ if (isSomeString!String) { @@ -626,8 +625,6 @@ public: /// unittest { - import std.range : popFrontN; - auto c = matchFirst("@abc#", regex(`(\w)(\w)(\w)`)); assert(c.pre == "@"); // Part of input preceding match assert(c.post == "#"); // Immediately after match @@ -663,24 +660,21 @@ private: alias EngineType = Engine!Char; EngineType _engine; R _input; - uint _ngroup; Captures!(R,EngineType.DataIndex) _captures; void[] _memory;//is ref-counted - this(RegEx)(R input, RegEx prog, uint reFlags) + this(RegEx)(R input, RegEx prog) { import std.exception : enforce; _input = input; - _ngroup = prog.ngroup; immutable size = EngineType.initialMemory(prog)+size_t.sizeof; _memory = (enforce(malloc(size), "malloc failed")[0..size]); scope(failure) free(_memory.ptr); *cast(size_t*)_memory.ptr = 1; - _engine = EngineType(prog, Input!Char(input), - _memory[size_t.sizeof..$], reFlags); - static if (is(typeof(prog.nativeFn))) + _engine = EngineType(prog, Input!Char(input), _memory[size_t.sizeof..$]); + static if (is(RegEx == StaticRegex!(BasicElementOf!R))) _engine.nativeFn = prog.nativeFn; - _captures = Captures!(R,EngineType.DataIndex)(input, prog.ngroup, prog.dict); + _captures = Captures!(R,EngineType.DataIndex)(this); _captures._nMatch = _engine.match(_captures.matches); debug(std_regex_allocation) writefln("RefCount (ctor): %x %d", _memory.ptr, counter); } @@ -749,16 +743,16 @@ public: if (counter != 1) {//do cow magic first counter--;//we abandon this reference - immutable size = _memory.length; + immutable size = EngineType.initialMemory(_engine.re)+size_t.sizeof; _memory = (enforce(malloc(size), "malloc failed")[0..size]); - _engine.dupTo(_memory[size_t.sizeof..size]); + _engine = _engine.dupTo(_memory[size_t.sizeof..size]); counter = 1;//points to new chunk } if (!_captures.unique) { // has external references - allocate new space - _captures.newMatches(_ngroup); + _captures.newMatches(_engine.re.ngroup); } _captures._nMatch = _engine.match(_captures.matches); } @@ -777,7 +771,7 @@ public: } -private @trusted auto matchOnce(alias Engine, RegEx, R)(R input, const RegEx re) +private @trusted auto matchOnce(alias Engine, RegEx, R)(R input, RegEx re) { import core.stdc.stdlib : malloc, free; import std.exception : enforce; @@ -788,16 +782,17 @@ private @trusted auto matchOnce(alias Engine, RegEx, R)(R input, const RegEx re) void[] memory = enforce(malloc(size), "malloc failed")[0..size]; scope(exit) free(memory.ptr); auto captures = Captures!(R, EngineType.DataIndex)(input, re.ngroup, re.dict); - auto engine = EngineType(re, Input!Char(input), memory, re.flags); - static if (is(typeof(re.nativeFn))) + auto engine = EngineType(re, Input!Char(input), memory); + static if (is(RegEx == StaticRegex!(BasicElementOf!R))) engine.nativeFn = re.nativeFn; captures._nMatch = engine.match(captures.matches); return captures; } -private auto matchMany(alias Engine, RegEx, R)(R input, const RegEx re) +private auto matchMany(alias Engine, RegEx, R)(R input, RegEx re) { - return RegexMatch!(R, Engine)(input, re, re.flags | RegexOption.global); + re.flags |= RegexOption.global; + return RegexMatch!(R, Engine)(input, re); } unittest @@ -852,7 +847,7 @@ private void replaceMatchesInto(alias output, Sink, R, T) } // a general skeleton of replaceFirst -private R replaceFirstWith(alias output, R, RegEx)(R input, const RegEx re) +private R replaceFirstWith(alias output, R, RegEx)(R input, RegEx re) if (isSomeString!R && isRegexFor!(RegEx, R)) { import std.array : appender; @@ -867,7 +862,7 @@ private R replaceFirstWith(alias output, R, RegEx)(R input, const RegEx re) // ditto for replaceAll // the method parameter allows old API to ride on the back of the new one private R replaceAllWith(alias output, - alias method=matchAll, R, RegEx)(R input, const RegEx re) + alias method=matchAll, R, RegEx)(R input, RegEx re) if (isSomeString!R && isRegexFor!(RegEx, R)) { import std.array : appender; @@ -896,12 +891,11 @@ private R replaceAllWith(alias output, Returns: a $(D RegexMatch) object holding engine state after first match. +/ -public auto match(R, RegEx)(R input, const RegEx re) +public auto match(R, RegEx)(R input, RegEx re) if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R))) { import std.regex.internal.thompson : ThompsonMatcher; - return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher) - (input, re, re.flags); + return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)(input, re); } ///ditto @@ -909,17 +903,14 @@ public auto match(R, String)(R input, String re) if (isSomeString!R && isSomeString!String) { import std.regex.internal.thompson : ThompsonMatcher; - auto r = regex(re); - return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher) - (input, r, r.flags); + return RegexMatch!(Unqual!(typeof(input)),ThompsonMatcher)(input, regex(re)); } -public auto match(R, RegEx)(R input, const RegEx re) +public auto match(R, RegEx)(R input, RegEx re) if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R))) { import std.regex.internal.backtracking : BacktrackingMatcher; - return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true) - (input, re, re.flags); + return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)(input, re); } /++ @@ -940,7 +931,7 @@ public auto match(R, RegEx)(R input, const RegEx re) $(LREF Captures) containing the extent of a match together with all submatches if there was a match, otherwise an empty $(LREF Captures) object. +/ -public auto matchFirst(R, RegEx)(R input, const RegEx re) +public auto matchFirst(R, RegEx)(R input, RegEx re) if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R))) { import std.regex.internal.thompson : ThompsonMatcher; @@ -963,7 +954,7 @@ public auto matchFirst(R, String)(R input, String[] re...) return matchOnce!ThompsonMatcher(input, regex(re)); } -public auto matchFirst(R, RegEx)(R input, const RegEx re) +public auto matchFirst(R, RegEx)(R input, RegEx re) if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R))) { import std.regex.internal.backtracking : BacktrackingMatcher; @@ -991,7 +982,7 @@ public auto matchFirst(R, RegEx)(R input, const RegEx re) $(LREF RegexMatch) object that represents matcher state after the first match was found or an empty one if not present. +/ -public auto matchAll(R, RegEx)(R input, const RegEx re) +public auto matchAll(R, RegEx)(R input, RegEx re) if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R))) { import std.regex.internal.thompson : ThompsonMatcher; @@ -1014,7 +1005,7 @@ public auto matchAll(R, String)(R input, String[] re...) return matchMany!ThompsonMatcher(input, regex(re)); } -public auto matchAll(R, RegEx)(R input, const RegEx re) +public auto matchAll(R, RegEx)(R input, RegEx re) if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R))) { import std.regex.internal.backtracking : BacktrackingMatcher; @@ -1031,7 +1022,7 @@ public auto matchAll(R, RegEx)(R input, const RegEx re) foreach (String; AliasSeq!(string, wstring, const(dchar)[])) { auto str1 = "blah-bleh".to!String(); - const pat1 = "bl[ae]h".to!String(); + auto pat1 = "bl[ae]h".to!String(); auto mf = matchFirst(str1, pat1); assert(mf.equal(["blah".to!String()])); auto mAll = matchAll(str1, pat1); @@ -1039,7 +1030,7 @@ public auto matchAll(R, RegEx)(R input, const RegEx re) ([["blah".to!String()], ["bleh".to!String()]])); auto str2 = "1/03/12 - 3/03/12".to!String(); - const pat2 = regex([r"(\d+)/(\d+)/(\d+)".to!String(), "abc".to!String]); + auto pat2 = regex([r"(\d+)/(\d+)/(\d+)".to!String(), "abc".to!String]); auto mf2 = matchFirst(str2, pat2); assert(mf2.equal(["1/03/12", "1", "03", "12"].map!(to!String)())); auto mAll2 = matchAll(str2, pat2); @@ -1049,7 +1040,7 @@ public auto matchAll(R, RegEx)(R input, const RegEx re) mf2.popFrontN(3); assert(mf2.equal(["12".to!String()])); - const ctPat = ctRegex!(`(?P\d+)/(?P\d+)`.to!String()); + auto ctPat = ctRegex!(`(?P\d+)/(?P\d+)`.to!String()); auto str = "2 + 34/56 - 6/1".to!String(); auto cmf = matchFirst(str, ctPat); assert(cmf.equal(["34/56", "34", "56"].map!(to!String)())); @@ -1080,12 +1071,11 @@ public auto matchAll(R, RegEx)(R input, const RegEx re) state after first match. +/ -public auto bmatch(R, RegEx)(R input, const RegEx re) +public auto bmatch(R, RegEx)(R input, RegEx re) if (isSomeString!R && is(RegEx == Regex!(BasicElementOf!R))) { import std.regex.internal.backtracking : BacktrackingMatcher; - return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false) - (input, re, re.flags); + return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)(input, re); } ///ditto @@ -1093,17 +1083,14 @@ public auto bmatch(R, String)(R input, String re) if (isSomeString!R && isSomeString!String) { import std.regex.internal.backtracking : BacktrackingMatcher; - auto r = regex(re); - return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false) - (input, r, r.flags); + return RegexMatch!(Unqual!(typeof(input)), BacktrackingMatcher!false)(input, regex(re)); } -public auto bmatch(R, RegEx)(R input, const RegEx re) +public auto bmatch(R, RegEx)(R input, RegEx re) if (isSomeString!R && is(RegEx == StaticRegex!(BasicElementOf!R))) { import std.regex.internal.backtracking : BacktrackingMatcher; - return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true) - (input, re, re.flags); + return RegexMatch!(Unqual!(typeof(input)),BacktrackingMatcher!true)(input, re); } // produces replacement string from format using captures for substitution @@ -1196,7 +1183,7 @@ L_Replace_Loop: A string of the same type with the first match (if any) replaced. If no match is found returns the input string itself. +/ -public R replaceFirst(R, C, RegEx)(R input, const RegEx re, const(C)[] format) +public R replaceFirst(R, C, RegEx)(R input, RegEx re, const(C)[] format) if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R)) { return replaceFirstWith!((m, sink) => replaceFmt(format, m, sink))(input, re); @@ -1223,7 +1210,7 @@ unittest replaced by return values of $(D fun). If no matches found returns the $(D input) itself. +/ -public R replaceFirst(alias fun, R, RegEx)(R input, const RegEx re) +public R replaceFirst(alias fun, R, RegEx)(R input, RegEx re) if (isSomeString!R && isRegexFor!(RegEx, R)) { return replaceFirstWith!((m, sink) => sink.put(fun(m)))(input, re); @@ -1249,7 +1236,7 @@ unittest and the one with the user defined callback. +/ public @trusted void replaceFirstInto(Sink, R, C, RegEx) - (ref Sink sink, R input, const RegEx re, const(C)[] format) + (ref Sink sink, R input, RegEx re, const(C)[] format) if (isOutputRange!(Sink, dchar) && isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R)) { @@ -1259,7 +1246,7 @@ public @trusted void replaceFirstInto(Sink, R, C, RegEx) ///ditto public @trusted void replaceFirstInto(alias fun, Sink, R, RegEx) - (Sink sink, R input, const RegEx re) + (Sink sink, R input, RegEx re) if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R)) { replaceCapturesInto!fun(sink, input, matchFirst(input, re)); @@ -1278,6 +1265,24 @@ unittest assert(result.data == "first\nsecond\n"); } +//examples for replaceFirst +@system unittest +{ + import std.conv; + string list = "#21 out of 46"; + string newList = replaceFirst!(cap => to!string(to!int(cap.hit)+1)) + (list, regex(`[0-9]+`)); + assert(newList == "#22 out of 46"); + import std.array; + string m1 = "first message\n"; + string m2 = "second message\n"; + auto result = appender!string(); + replaceFirstInto(result, m1, regex(`([a-z]+) message`), "$1"); + //equivalent of the above with user-defined callback + replaceFirstInto!(cap=>cap[1])(result, m2, regex(`([a-z]+) message`)); + assert(result.data == "first\nsecond\n"); +} + /++ Construct a new string from $(D input) by replacing all of the fragments that match a pattern $(D re) with a string generated @@ -1296,7 +1301,7 @@ unittest of the matches (if any) replaced. If no match is found returns the input string itself. +/ -public @trusted R replaceAll(R, C, RegEx)(R input, const RegEx re, const(C)[] format) +public @trusted R replaceAll(R, C, RegEx)(R input, RegEx re, const(C)[] format) if (isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R)) { return replaceAllWith!((m, sink) => replaceFmt(format, m, sink))(input, re); @@ -1306,7 +1311,7 @@ public @trusted R replaceAll(R, C, RegEx)(R input, const RegEx re, const(C)[] fo unittest { // insert comma as thousands delimiter - const re = regex(r"(?<=\d)(?=(\d\d\d)+\b)","g"); + auto re = regex(r"(?<=\d)(?=(\d\d\d)+\b)","g"); assert(replaceAll("12000 + 42100 = 54100", re, ",") == "12,000 + 42,100 = 54,100"); } @@ -1330,7 +1335,7 @@ unittest re = compiled regular expression fun = delegate to use +/ -public @trusted R replaceAll(alias fun, R, RegEx)(R input, const RegEx re) +public @trusted R replaceAll(alias fun, R, RegEx)(R input, RegEx re) if (isSomeString!R && isRegexFor!(RegEx, R)) { return replaceAllWith!((m, sink) => sink.put(fun(m)))(input, re); @@ -1359,7 +1364,7 @@ unittest the other one with a user defined functor. +/ public @trusted void replaceAllInto(Sink, R, C, RegEx) - (Sink sink, R input, const RegEx re, const(C)[] format) + (Sink sink, R input, RegEx re, const(C)[] format) if (isOutputRange!(Sink, dchar) && isSomeString!R && is(C : dchar) && isRegexFor!(RegEx, R)) { @@ -1369,7 +1374,7 @@ public @trusted void replaceAllInto(Sink, R, C, RegEx) ///ditto public @trusted void replaceAllInto(alias fun, Sink, R, RegEx) - (Sink sink, R input, const RegEx re) + (Sink sink, R input, RegEx re) if (isOutputRange!(Sink, dchar) && isSomeString!R && isRegexFor!(RegEx, R)) { replaceMatchesInto!fun(sink, input, matchAll(input, re)); @@ -1406,8 +1411,8 @@ public @trusted void replaceAllInto(alias fun, Sink, R, RegEx) S t2F = "hound dome".to!S(); S t1A = "court trial".to!S(); S t2A = "hound home".to!S(); - const re1 = regex("curt".to!S()); - const re2 = regex("[dr]o".to!S()); + auto re1 = regex("curt".to!S()); + auto re2 = regex("[dr]o".to!S()); assert(replaceFirst(s1, re1, "court") == t1F); assert(replaceFirst(s2, re2, "ho") == t2F); @@ -1441,14 +1446,14 @@ public @trusted void replaceAllInto(alias fun, Sink, R, RegEx) The use of this function is $(RED discouraged), please use $(LREF replaceAll) or $(LREF replaceFirst) explicitly. +/ -public R replace(alias scheme = match, R, C, RegEx)(R input, const RegEx re, const(C)[] format) +public R replace(alias scheme = match, R, C, RegEx)(R input, RegEx re, const(C)[] format) if (isSomeString!R && isRegexFor!(RegEx, R)) { return replaceAllWith!((m, sink) => replaceFmt(format, m, sink), match)(input, re); } ///ditto -public R replace(alias fun, R, RegEx)(R input, const RegEx re) +public R replace(alias fun, R, RegEx)(R input, RegEx re) if (isSomeString!R && isRegexFor!(RegEx, R)) { return replaceAllWith!(fun, match)(input, re); @@ -1470,14 +1475,15 @@ public struct Splitter(Flag!"keepSeparators" keepSeparators = No.keepSeparators, private: Range _input; size_t _offset; - alias Rx = typeof(matchAll(Range.init,RegEx.init)); + alias Rx = typeof(match(Range.init,RegEx.init)); Rx _match; static if (keepSeparators) bool onMatch = false; - @trusted this(Range input, const RegEx separator) + @trusted this(Range input, RegEx separator) {//@@@BUG@@@ generated opAssign of RegexMatch is not @trusted _input = input; + separator.flags |= RegexOption.global; if (_input.empty) { //there is nothing to match at all, make _offset > 0 @@ -1485,7 +1491,7 @@ private: } else { - _match = matchAll(_input, separator); + _match = Rx(_input, separator); static if (keepSeparators) if (_match.pre.empty) @@ -1573,9 +1579,8 @@ public: /// ditto public Splitter!(keepSeparators, Range, RegEx) splitter( - Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, RegEx) - (Range r, const RegEx pat) - if (is(BasicElementOf!Range : dchar) && isRegexFor!(RegEx, Range)) + Flag!"keepSeparators" keepSeparators = No.keepSeparators, Range, RegEx)(Range r, RegEx pat) if ( + is(BasicElementOf!Range : dchar) && isRegexFor!(RegEx, Range)) { return Splitter!(keepSeparators, Range, RegEx)(r, pat); } @@ -1593,9 +1598,8 @@ unittest unittest { import std.algorithm.comparison : equal; - import std.typecons : Yes; - const pattern = regex(`([\.,])`); + auto pattern = regex(`([\.,])`); assert("2003.04.05" .splitter!(Yes.keepSeparators)(pattern) @@ -1607,7 +1611,7 @@ unittest } ///An eager version of $(D splitter) that creates an array with splitted slices of $(D input). -public @trusted String[] split(String, RegEx)(String input, const RegEx rx) +public @trusted String[] split(String, RegEx)(String input, RegEx rx) if (isSomeString!String && isRegexFor!(RegEx, String)) { import std.array : appender; diff --git a/std/uni.d b/std/uni.d index 563ebe3d5fd..5c7754408b3 100644 --- a/std/uni.d +++ b/std/uni.d @@ -2116,6 +2116,20 @@ public: assert(!gothic['$']); } + + // Linear scan for $(D ch). Useful only for small sets. + // TODO: + // used internally in std.regex + // should be properly exposed in a public API ? + package auto scanFor()(dchar ch) const + { + immutable len = data.length; + for (size_t i = 0; i < len; i++) + if (ch < data[i]) + return i & 1; + return 0; + } + /// Number of $(CODEPOINTS) in this set @property size_t length() { diff --git a/win32.mak b/win32.mak index 3bf9f0a043c..6c4e92ebe05 100644 --- a/win32.mak +++ b/win32.mak @@ -220,19 +220,12 @@ SRC_STD_RANGE= \ SRC_STD_REGEX= \ std\regex\internal\ir.d \ std\regex\package.d \ - std\regex\internal\tests.d \ - std\regex\internal\generator.d - -SRC_STD_REGEX_2 = \ std\regex\internal\parser.d \ + std\regex\internal\tests.d \ std\regex\internal\backtracking.d \ std\regex\internal\thompson.d \ - std\regex\internal\tests2.d - -SRC_STD_REGEX_3 = \ - std\regex\internal\shiftor.d \ - std\regex\internal\bitnfa.d \ - std\regex\internal\tests3.d + std\regex\internal\kickstart.d \ + std\regex\internal\generator.d SRC_STD_C= \ std\c\process.d \ @@ -360,8 +353,6 @@ SRC_TO_COMPILE= \ $(SRC_STD_NET) \ $(SRC_STD_RANGE) \ $(SRC_STD_REGEX) \ - $(SRC_STD_REGEX_2) \ - $(SRC_STD_REGEX_3) \ $(SRC_STD_C) \ $(SRC_STD_WIN) \ $(SRC_STD_C_WIN) \ @@ -581,8 +572,6 @@ UNITTEST_OBJS= \ unittest8d.obj \ unittest8e.obj \ unittest8f.obj \ - unittest8g.obj \ - unittest8h.obj \ unittest9a.obj unittest : $(LIB) @@ -597,13 +586,11 @@ unittest : $(LIB) $(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest6.obj $(SRC_STD_6) $(SRC_STD_CONTAINER) $(SRC_STD_EXP_ALLOC) $(SRC_STD_EXP_LOGGER) $(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest7.obj $(SRC_STD_7) $(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8a.obj $(SRC_STD_REGEX) - $(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8b.obj $(SRC_STD_REGEX_2) - $(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8c.obj $(SRC_STD_REGEX_3) - $(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8d.obj $(SRC_STD_NET) - $(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8e.obj $(SRC_STD_C) $(SRC_STD_WIN) $(SRC_STD_C_WIN) - $(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8f.obj $(SRC_STD_INTERNAL) $(SRC_STD_INTERNAL_DIGEST) $(SRC_STD_INTERNAL_MATH) $(SRC_STD_INTERNAL_WINDOWS) - $(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8g.obj $(SRC_ETC) $(SRC_ETC_C) - $(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8h.obj $(SRC_STD_EXP) + $(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8b.obj $(SRC_STD_NET) + $(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8c.obj $(SRC_STD_C) $(SRC_STD_WIN) $(SRC_STD_C_WIN) + $(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8d.obj $(SRC_STD_INTERNAL) $(SRC_STD_INTERNAL_DIGEST) $(SRC_STD_INTERNAL_MATH) $(SRC_STD_INTERNAL_WINDOWS) + $(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8e.obj $(SRC_ETC) $(SRC_ETC_C) + $(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest8f.obj $(SRC_STD_EXP) $(DMD) $(UDFLAGS) -L/co -c -unittest -ofunittest9a.obj $(SRC_STD_EXP_NDSLICE) $(DMD) $(UDFLAGS) -L/co -unittest unittest.d $(UNITTEST_OBJS) \ $(ZLIB) $(DRUNTIMELIB) diff --git a/win64.mak b/win64.mak index 6abfae732cd..df4e1abbebd 100644 --- a/win64.mak +++ b/win64.mak @@ -239,19 +239,12 @@ SRC_STD_RANGE= \ SRC_STD_REGEX= \ std\regex\internal\ir.d \ std\regex\package.d \ - std\regex\internal\tests.d \ - std\regex\internal\generator.d - -SRC_STD_REGEX_2 = \ std\regex\internal\parser.d \ + std\regex\internal\tests.d \ std\regex\internal\backtracking.d \ std\regex\internal\thompson.d \ - std\regex\internal\tests2.d - -SRC_STD_REGEX_3 = \ - std\regex\internal\shiftor.d \ - std\regex\internal\bitnfa.d \ - std\regex\internal\tests3.d + std\regex\internal\kickstart.d \ + std\regex\internal\generator.d SRC_STD_C= \ std\c\process.d \ @@ -379,8 +372,6 @@ SRC_TO_COMPILE= \ $(SRC_STD_NET) \ $(SRC_STD_RANGE) \ $(SRC_STD_REGEX) \ - $(SRC_STD_REGEX_2) \ - $(SRC_STD_REGEX_3) \ $(SRC_STD_C) \ $(SRC_STD_WIN) \ $(SRC_STD_C_WIN) \ @@ -631,13 +622,11 @@ unittest : $(LIB) $(DMD) $(UDFLAGS) -c -unittest -ofunittest6i.obj $(SRC_STD_6i) $(DMD) $(UDFLAGS) -c -unittest -ofunittest7.obj $(SRC_STD_7) $(SRC_STD_EXP_LOGGER) $(DMD) $(UDFLAGS) -c -unittest -ofunittest8a.obj $(SRC_STD_REGEX) - $(DMD) $(UDFLAGS) -c -unittest -ofunittest8b.obj $(SRC_STD_REGEX_2) - $(DMD) $(UDFLAGS) -c -unittest -ofunittest8c.obj $(SRC_STD_REGEX_3) - $(DMD) $(UDFLAGS) -c -unittest -ofunittest8d.obj $(SRC_STD_NET) - $(DMD) $(UDFLAGS) -c -unittest -ofunittest8e.obj $(SRC_STD_C) $(SRC_STD_WIN) $(SRC_STD_C_WIN) - $(DMD) $(UDFLAGS) -c -unittest -ofunittest8f.obj $(SRC_STD_INTERNAL) $(SRC_STD_INTERNAL_DIGEST) $(SRC_STD_INTERNAL_MATH) $(SRC_STD_INTERNAL_WINDOWS) - $(DMD) $(UDFLAGS) -c -unittest -ofunittest8g.obj $(SRC_ETC) $(SRC_ETC_C) - $(DMD) $(UDFLAGS) -c -unittest -ofunittest8h.obj $(SRC_STD_EXP) + $(DMD) $(UDFLAGS) -c -unittest -ofunittest8b.obj $(SRC_STD_NET) + $(DMD) $(UDFLAGS) -c -unittest -ofunittest8c.obj $(SRC_STD_C) $(SRC_STD_WIN) $(SRC_STD_C_WIN) + $(DMD) $(UDFLAGS) -c -unittest -ofunittest8d.obj $(SRC_STD_INTERNAL) $(SRC_STD_INTERNAL_DIGEST) $(SRC_STD_INTERNAL_MATH) $(SRC_STD_INTERNAL_WINDOWS) + $(DMD) $(UDFLAGS) -c -unittest -ofunittest8e.obj $(SRC_ETC) $(SRC_ETC_C) + $(DMD) $(UDFLAGS) -c -unittest -ofunittest8f.obj $(SRC_STD_EXP) $(DMD) $(UDFLAGS) -c -unittest -ofunittest9.obj $(SRC_STD_EXP_ALLOC) $(DMD) $(UDFLAGS) -c -unittest -ofunittest9a.obj $(SRC_STD_EXP_NDSLICE) $(DMD) $(UDFLAGS) -L/OPT:NOICF -unittest unittest.d $(UNITTEST_OBJS) \