From c3e9e4db432308ea8ea8fde5006113fd717245e1 Mon Sep 17 00:00:00 2001 From: dsimcha Date: Thu, 24 Feb 2011 18:26:57 -0500 Subject: [PATCH 01/13] Add benchmarks. --- gcBench/doBenchmarks.d | 17 ++++++++++++ gcBench/largeRand.d | 21 ++++++++++++++ gcBench/singleHuge.d | 12 ++++++++ gcBench/smallRand.d | 21 ++++++++++++++ gcBench/tree1.d | 62 ++++++++++++++++++++++++++++++++++++++++++ gcBench/tree2.d | 20 ++++++++++++++ 6 files changed, 153 insertions(+) create mode 100644 gcBench/doBenchmarks.d create mode 100644 gcBench/largeRand.d create mode 100644 gcBench/singleHuge.d create mode 100644 gcBench/smallRand.d create mode 100644 gcBench/tree1.d create mode 100644 gcBench/tree2.d diff --git a/gcBench/doBenchmarks.d b/gcBench/doBenchmarks.d new file mode 100644 index 0000000000..f36d28aa3f --- /dev/null +++ b/gcBench/doBenchmarks.d @@ -0,0 +1,17 @@ +/**This is a driver script that runs the benchmarks.*/ + +import std.stdio, std.process; + +void main() { + system("dmd -O -inline -release singleHuge.d"); + system("dmd -O -inline -release largeRand.d"); + system("dmd -O -inline -release smallRand.d"); + system("dmd -O -inline -release tree1.d"); + system("dmd -O -inline -release tree2.d"); + + system("singleHuge"); + system("largeRand"); + system("smallRand"); + system("tree1"); + system("tree2"); +} diff --git a/gcBench/largeRand.d b/gcBench/largeRand.d new file mode 100644 index 0000000000..330f49283c --- /dev/null +++ b/gcBench/largeRand.d @@ -0,0 +1,21 @@ +/**Benchmark on uniformly distributed, random large allocations.*/ + +import std.random, core.memory, std.datetime, std.stdio; + +enum nIter = 1000; + +void main() { + auto ptrs = new void*[1024]; + + auto sw = StopWatch(autoStart); + + // Allocate 1024 large blocks with size uniformly distributed between 1 + // and 128 kilobytes. + foreach(i; 0..nIter) { + foreach(ref ptr; ptrs) { + ptr = GC.malloc(uniform(1024, 128 * 1024 + 1), GC.BlkAttr.NO_SCAN); + } + } + + writefln("LargeRand: Done %s iter in %s milliseconds.", nIter, sw.peek.msecs); +} diff --git a/gcBench/singleHuge.d b/gcBench/singleHuge.d new file mode 100644 index 0000000000..a05ee8712a --- /dev/null +++ b/gcBench/singleHuge.d @@ -0,0 +1,12 @@ +import std.stdio, std.datetime, core.memory; + +void main(string[] args) { + enum mul = 1000; + auto ptr = GC.malloc(mul * 1_048_576, GC.BlkAttr.NO_SCAN); + + auto sw = StopWatch(autoStart); + GC.collect(); + immutable msec = sw.peek.msecs; + writefln("SingleHuge: Collected a %s megabyte heap in %s milliseconds.", + mul, msec); +} diff --git a/gcBench/smallRand.d b/gcBench/smallRand.d new file mode 100644 index 0000000000..db4679b676 --- /dev/null +++ b/gcBench/smallRand.d @@ -0,0 +1,21 @@ +/**Benchmark on uniformly distributed, random small allocations.*/ + +import std.random, core.memory, std.datetime, std.stdio; + +enum nIter = 1000; + +void main() { + auto ptrs = new void*[4096]; + + auto sw = StopWatch(autoStart); + + // Allocate 1024 large blocks with size uniformly distributed between 8 + // and 2048 bytes. + foreach(i; 0..nIter) { + foreach(ref ptr; ptrs) { + ptr = GC.malloc(uniform(8, 2048), GC.BlkAttr.NO_SCAN); + } + } + + writefln("SmallRand: Done %s iter in %s milliseconds.", nIter, sw.peek.msecs); +} diff --git a/gcBench/tree1.d b/gcBench/tree1.d new file mode 100644 index 0000000000..b98d15d7a4 --- /dev/null +++ b/gcBench/tree1.d @@ -0,0 +1,62 @@ +/**Benchmark the GC on tree building. Thanks to Bearophile.*/ + +import std.stdio, std.conv, std.datetime; + +class TreeNode { + private TreeNode left, right; + private int item; + + this(int item) { + this.item = item; + } + + this(TreeNode left, TreeNode right, int item){ + this.left = left; + this.right = right; + this.item = item; + } + + private static TreeNode bottomUpTree(int item, int depth) { + if (depth > 0) { + return new TreeNode(bottomUpTree(2 * item - 1, depth - 1), + bottomUpTree(2 * item, depth - 1), + item); + } else { + return new TreeNode(item); + } + } + + private int itemCheck() { + if (left is null) + return item; + else + return item + left.itemCheck() - right.itemCheck(); + } +} + + +void main(string[] args) { + auto sw = StopWatch(autoStart); + + enum int minDepth = 4; + enum n = 18; + + int maxDepth = (minDepth + 2 > n) ? minDepth + 2 : n; + int stretchDepth = maxDepth + 1; + + int check = (TreeNode.bottomUpTree(0,stretchDepth)).itemCheck(); + + TreeNode longLivedTree = TreeNode.bottomUpTree(0, maxDepth); + + for (int depth = minDepth; depth <= maxDepth; depth += 2) { + int iterations = 1 << (maxDepth - depth + minDepth); + check = 0; + + foreach (int i; 1 .. iterations+1) { + check += (TreeNode.bottomUpTree(i, depth)).itemCheck(); + check += (TreeNode.bottomUpTree(-i, depth)).itemCheck(); + } + } + + writeln("Tree1: ", sw.peek.seconds); +} diff --git a/gcBench/tree2.d b/gcBench/tree2.d new file mode 100644 index 0000000000..354881dd8c --- /dev/null +++ b/gcBench/tree2.d @@ -0,0 +1,20 @@ +/**Another tree building benchmark. Thanks again to Bearophile.*/ + +import std.stdio, std.container, std.range, std.datetime; + +void main() { + auto sw = StopWatch(autoStart); + enum int range = 100; + enum int n = 1_000_000; + + auto t = RedBlackTree!int(0); + + for (int i = 0; i < n; i++) { + if (i > range) + t.removeFront(); + t.insert(i); + } + + writeln("Tree2: ", sw.peek.seconds); +} + From edd55a0212fdf7862881515bbfb2a54991921f7d Mon Sep 17 00:00:00 2001 From: dsimcha Date: Thu, 24 Feb 2011 18:29:11 -0500 Subject: [PATCH 02/13] Add units. --- gcBench/tree1.d | 2 +- gcBench/tree2.d | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gcBench/tree1.d b/gcBench/tree1.d index b98d15d7a4..8dd452fb90 100644 --- a/gcBench/tree1.d +++ b/gcBench/tree1.d @@ -58,5 +58,5 @@ void main(string[] args) { } } - writeln("Tree1: ", sw.peek.seconds); + writeln("Tree1: ", sw.peek.seconds, " seconds"); } diff --git a/gcBench/tree2.d b/gcBench/tree2.d index 354881dd8c..8fc19dc3c1 100644 --- a/gcBench/tree2.d +++ b/gcBench/tree2.d @@ -15,6 +15,6 @@ void main() { t.insert(i); } - writeln("Tree2: ", sw.peek.seconds); + writeln("Tree2: ", sw.peek.seconds, " seconds"); } From 912425f0f00c32b9958526c4ed0a60536bb37386 Mon Sep 17 00:00:00 2001 From: dsimcha Date: Thu, 24 Feb 2011 18:37:05 -0500 Subject: [PATCH 03/13] First-order optimizations. --- src/gc/gcx.d | 366 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 223 insertions(+), 143 deletions(-) diff --git a/src/gc/gcx.d b/src/gc/gcx.d index d78e7bfc20..e92b8e41ee 100644 --- a/src/gc/gcx.d +++ b/src/gc/gcx.d @@ -48,6 +48,16 @@ debug (PRINTF) import core.stdc.stdio : printf; debug (COLLECT_PRINTF) import core.stdc.stdio : printf; debug private import core.stdc.stdio; +debug(PRINTF) void printFreeInfo(Pool* pool) +{ + uint nReallyFree; + foreach(i; 0..pool.npages) { + if(pool.pagetable[i] >= B_FREE) nReallyFree++; + } + + printf("Pool %p: %d really free, %d supposedly be free\n", pool, nReallyFree, pool.freepages); +} + private { enum USE_CACHE = true; @@ -306,7 +316,7 @@ class GC if (pool) { - auto biti = cast(size_t)(p - pool.baseAddr) / 16; + auto biti = cast(size_t)(p - pool.baseAddr) / pool.divisor; oldb = gcx.getBits(pool, biti); } @@ -341,7 +351,7 @@ class GC if (pool) { - auto biti = cast(size_t)(p - pool.baseAddr) / 16; + auto biti = cast(size_t)(p - pool.baseAddr) / pool.divisor; oldb = gcx.getBits(pool, biti); gcx.setBits(pool, biti, mask); @@ -377,7 +387,7 @@ class GC if (pool) { - auto biti = cast(size_t)(p - pool.baseAddr) / 16; + auto biti = cast(size_t)(p - pool.baseAddr) / pool.divisor; oldb = gcx.getBits(pool, biti); gcx.clrBits(pool, biti, mask); @@ -467,14 +477,14 @@ class GC * of full pages freed. Perhaps this should instead be the amount of * memory freed. */ - gcx.newPool(1); + gcx.newPool(1,false); state = 2; } else state = 1; continue; case 1: - gcx.newPool(1); + gcx.newPool(1, false); state = 2; continue; case 2: @@ -511,7 +521,7 @@ class GC Pool *pool = gcx.findPool(p); assert(pool); - gcx.setBits(pool, cast(size_t)(p - pool.baseAddr) / 16, bits); + gcx.setBits(pool, cast(size_t)(p - pool.baseAddr) / pool.divisor, bits); } return p; } @@ -602,7 +612,7 @@ class GC if (pool) { - auto biti = cast(size_t)(p - pool.baseAddr) / 16; + auto biti = cast(size_t)(p - pool.baseAddr) / pool.divisor; if (bits) { @@ -642,6 +652,7 @@ class GC { debug (MEMSTOMP) memset(p + size, 0xF2, psize - size); pool.freePages(pagenum + newsz, psz - newsz); + pool.updateOffsets(pagenum); } if(alloc_size) *alloc_size = newsz * PAGESIZE; @@ -657,9 +668,13 @@ class GC if (i == pagenum + newsz) { debug (MEMSTOMP) memset(p + psize, 0xF0, size - psize); + debug(PRINTF) printFreeInfo(pool); memset(&pool.pagetable[pagenum + psz], B_PAGEPLUS, newsz - psz); + pool.updateOffsets(pagenum); if(alloc_size) *alloc_size = newsz * PAGESIZE; + pool.freepages -= (newsz - psz); + debug(PRINTF) printFreeInfo(pool); return p; } if (i == pool.ncommitted) @@ -686,7 +701,7 @@ class GC if (pool) { - auto biti = cast(size_t)(p - pool.baseAddr) / 16; + auto biti = cast(size_t)(p - pool.baseAddr) / pool.divisor; if (bits) { @@ -795,6 +810,8 @@ class GC return 0; debug (MEMSTOMP) memset(p + psize, 0xF0, (psz + sz) * PAGESIZE - psize); memset(pool.pagetable + pagenum + psz, B_PAGEPLUS, sz); + pool.updateOffsets(pagenum); + pool.freepages -= sz; if (p == gcx.cached_size_key) gcx.cached_size_val = (psz + sz) * PAGESIZE; if (p == gcx.cached_info_key) @@ -862,6 +879,7 @@ class GC // private void freeNoSync(void *p) { + debug(PRINTF) printf("Freeing %p\n", cast(size_t) p); assert (p); Pool* pool; @@ -876,19 +894,19 @@ class GC sentinel_Invariant(p); p = sentinel_sub(p); pagenum = cast(size_t)(p - pool.baseAddr) / PAGESIZE; - biti = cast(size_t)(p - pool.baseAddr) / 16; + + debug(PRINTF) printf("pool base = %p, PAGENUM = %d of %d / %d, bin = %d\n", pool.baseAddr, pagenum, pool.ncommitted, pool.npages, pool.pagetable[pagenum]); + debug(PRINTF) if(pool.isLargeObject) printf("Block size = %d\n", pool.bPageOffsets[pagenum]); + biti = cast(size_t)(p - pool.baseAddr) / pool.divisor; + gcx.clrBits(pool, biti, BlkAttr.ALL_BITS); bin = cast(Bins)pool.pagetable[pagenum]; if (bin == B_PAGE) // if large alloc { size_t npages; - size_t n; // Free pages - npages = 1; - n = pagenum; - while (++n < pool.ncommitted && pool.pagetable[n] == B_PAGEPLUS) - npages++; + npages = pool.bPageOffsets[pagenum]; debug (MEMSTOMP) memset(p, 0xF2, npages * PAGESIZE); pool.freePages(pagenum, npages); } @@ -1758,9 +1776,9 @@ struct Gcx } else if (bin == B_PAGEPLUS) { - do - { --pn, offset -= PAGESIZE; - } while (cast(Bins)pool.pagetable[pn] == B_PAGEPLUS); + auto pageOffset = pool.bPageOffsets[pn]; + offset -= pageOffset * PAGESIZE; + pn -= pageOffset; return pool.baseAddr + (offset & (offset.max ^ (PAGESIZE-1))); } @@ -1796,17 +1814,8 @@ struct Gcx bin = cast(Bins)pool.pagetable[pagenum]; size = binsize[bin]; if (bin == B_PAGE) - { size_t npages = pool.ncommitted; - ubyte* pt; - size_t i; - - pt = &pool.pagetable[0]; - for (i = pagenum + 1; i < npages; i++) - { - if (pt[i] != B_PAGEPLUS) - break; - } - size = (i - pagenum) * PAGESIZE; + { + size = pool.bPageOffsets[pagenum] * PAGESIZE; } cached_size_key = p; cached_size_val = size; @@ -1843,10 +1852,9 @@ struct Gcx } else if (bin == B_PAGEPLUS) { - do - { --pn, offset -= PAGESIZE; - } while (cast(Bins)pool.pagetable[pn] == B_PAGEPLUS); - + auto pageOffset = pool.bPageOffsets[pn]; + offset = pageOffset * PAGESIZE; + pn -= pageOffset; info.base = pool.baseAddr + (offset & (offset.max ^ (PAGESIZE-1))); // fix bin for use by size calc below @@ -1859,17 +1867,8 @@ struct Gcx info.size = binsize[bin]; if (bin == B_PAGE) - { size_t npages = pool.ncommitted; - ubyte* pt; - size_t i; - - pt = &pool.pagetable[0]; - for (i = pn + 1; i < npages; i++) - { - if (pt[i] != B_PAGEPLUS) - break; - } - info.size = (i - pn) * PAGESIZE; + { + info.size = pool.bPageOffsets[pn] * PAGESIZE; } //////////////////////////////////////////////////////////////////// @@ -1879,7 +1878,7 @@ struct Gcx // reset the offset to the base pointer, otherwise the bits // are the bits for the pointer, which may be garbage offset = cast(size_t)(info.base - pool.baseAddr); - info.attr = getBits(pool, cast(size_t)(offset / 16)); + info.attr = getBits(pool, cast(size_t)(offset / pool.divisor)); cached_info_key = p; cached_info_val = info; @@ -1943,7 +1942,9 @@ struct Gcx size_t reserve(size_t size) { size_t npages = (size + PAGESIZE - 1) / PAGESIZE; - Pool* pool = newPool(npages); + + // Assume reserve() is for small objects. + Pool* pool = newPool(npages, false); if (!pool || pool.extendPages(npages) == OPFAIL) return 0; @@ -1956,22 +1957,18 @@ struct Gcx */ void minimize() { + debug(PRINTF) printf("Minimizing.\n"); size_t n; size_t pn; Pool* pool; size_t ncommitted; + Outer: for (n = 0; n < npools; n++) { pool = pooltable[n]; - ncommitted = pool.ncommitted; - for (pn = 0; pn < ncommitted; pn++) - { - if (cast(Bins)pool.pagetable[pn] != B_FREE) - break; - } - if (pn < ncommitted) - continue; + debug(PRINTF) printFreeInfo(pool); + if(pool.freepages < pool.npages) continue; pool.Dtor(); cstdlib.free(pool); memmove(pooltable + n, @@ -1981,6 +1978,7 @@ struct Gcx } minAddr = pooltable[0].baseAddr; maxAddr = pooltable[npools - 1].topAddr; + debug(PRINTF) printf("Done minimizing.\n"); } @@ -1990,6 +1988,8 @@ struct Gcx */ void *bigAlloc(size_t size, size_t *alloc_size = null) { + debug(PRINTF) printf("In bigAlloc. Size: %d\n", size); + Pool* pool; size_t npages; size_t n; @@ -2009,6 +2009,7 @@ struct Gcx for (n = 0; n < npools; n++) { pool = pooltable[n]; + if(!pool.isLargeObject || pool.freepages < npages) continue; pn = pool.allocPages(npages); if (pn != OPFAIL) goto L1; @@ -2028,7 +2029,7 @@ struct Gcx // Release empty pools to prevent bloat minimize(); // Allocate new pool - pool = newPool(npages); + pool = newPool(npages, true); if (!pool) { state = 2; continue; @@ -2040,7 +2041,7 @@ struct Gcx // Release empty pools to prevent bloat minimize(); // Allocate new pool - pool = newPool(npages); + pool = newPool(npages, true); if (!pool) { if (collected) @@ -2059,10 +2060,17 @@ struct Gcx } L1: + debug(PRINTF) printFreeInfo(pool); pool.pagetable[pn] = B_PAGE; if (npages > 1) memset(&pool.pagetable[pn + 1], B_PAGEPLUS, npages - 1); + pool.updateOffsets(pn); + pool.freepages -= npages; + + debug(PRINTF) printFreeInfo(pool); + p = pool.baseAddr + pn * PAGESIZE; + debug(PRINTF) printf("Got large alloc: %p, pt = %d, np = %d\n", p, pool.pagetable[pn], npages); memset(cast(char *)p + size, 0, npages * PAGESIZE - size); debug (MEMSTOMP) memset(p, 0xF1, size); if(alloc_size) @@ -2080,7 +2088,7 @@ struct Gcx * Sort it into pooltable[]. * Return null if failed. */ - Pool *newPool(size_t npages) + Pool *newPool(size_t npages, bool isLargeObject) { Pool* pool; Pool** newpooltable; @@ -2121,7 +2129,7 @@ struct Gcx pool = cast(Pool *)cstdlib.calloc(1, Pool.sizeof); if (pool) { - pool.initialize(npages); + pool.initialize(npages, isLargeObject); if (!pool.baseAddr) goto Lerr; @@ -2171,6 +2179,7 @@ struct Gcx for (n = 0; n < npools; n++) { pool = pooltable[n]; + if(pool.isLargeObject) continue; pn = pool.allocPages(1); if (pn != OPFAIL) goto L1; @@ -2179,6 +2188,7 @@ struct Gcx L1: pool.pagetable[pn] = cast(ubyte)bin; + pool.freepages--; // Convert page to free list size_t size = binsize[bin]; @@ -2229,22 +2239,20 @@ struct Gcx // Adjust bit to be at start of allocated memory block if (bin < B_PAGE) { - biti = (offset & notbinsize[bin]) >> 4; + biti = (offset & notbinsize[bin]) / pool.divisor; //debug(PRINTF) printf("\t\tbiti = x%x\n", biti); } else if (bin == B_PAGE) { - biti = (offset & notbinsize[bin]) >> 4; + biti = (offset & notbinsize[bin]) / pool.divisor; //debug(PRINTF) printf("\t\tbiti = x%x\n", biti); pcache = cast(size_t)p & ~cast(size_t)(PAGESIZE-1); } else if (bin == B_PAGEPLUS) { - do - { --pn; - } while (cast(Bins)pool.pagetable[pn] == B_PAGEPLUS); - biti = pn * (PAGESIZE / 16); + pn -= pool.bPageOffsets[pn]; + biti = pn * (PAGESIZE / pool.divisor); pcache = cast(size_t)p & ~cast(size_t)(PAGESIZE-1); } @@ -2263,7 +2271,7 @@ struct Gcx pool.scan.set(biti); changes = 1; } - debug (LOGGING) log_parent(sentinel_add(pool.baseAddr + biti * 16), sentinel_add(pbot)); + debug (LOGGING) log_parent(sentinel_add(pool.baseAddr + biti * pool.divisor), sentinel_add(pbot)); } } } @@ -2392,6 +2400,8 @@ struct Gcx pool.freebits.zero(); } + debug(COLLECT_PRINTF) printf("Set bits\n"); + // Mark each free entry, so it doesn't get scanned for (n = 0; n < B_PAGE; n++) { @@ -2403,6 +2413,8 @@ struct Gcx } } + debug(COLLECT_PRINTF) printf("Marked free entries.\n"); + for (n = 0; n < npools; n++) { pool = pooltable[n]; @@ -2413,6 +2425,7 @@ struct Gcx { if (!noStack) { + debug(COLLECT_PRINTF) printf("scanning multithreaded stack.\n"); // Scan stacks and registers for each paused thread thread_scanAll( &mark, stackTop ); } @@ -2465,18 +2478,18 @@ struct Gcx } *b = 0; - auto o = pool.baseAddr + (b - bbase) * (typeof(bitm).sizeof*8) * 16; + auto o = pool.baseAddr + (b - bbase) * (typeof(bitm).sizeof*8) * pool.divisor; if (!(bitm & 0xFFFF)) { bitm >>= 16; - o += 16 * 16; + o += 16 * pool.divisor; } if (!(bitm & 0xFF)) { bitm >>= 8; - o += 8 * 16; + o += 8 * pool.divisor; } - for (; bitm; o += 16, bitm >>= 1) + for (; bitm; o += pool.divisor, bitm >>= 1) { if (!(bitm & 1)) continue; @@ -2491,12 +2504,9 @@ struct Gcx { if (bin == B_PAGEPLUS) { - while (pool.pagetable[pn - 1] != B_PAGE) - pn--; + pn -= pool.bPageOffsets[pn]; } - auto u = 1; - while (pn + u < pool.ncommitted && pool.pagetable[pn + u] == B_PAGEPLUS) - u++; + auto u = pool.bPageOffsets[pn]; mark(o, o + u * PAGESIZE); } } @@ -2517,69 +2527,14 @@ struct Gcx pool = pooltable[n]; auto bbase = pool.mark.base(); auto ncommitted = pool.ncommitted; - for (pn = 0; pn < ncommitted; pn++, bbase += PAGESIZE / (32 * 16)) - { - Bins bin = cast(Bins)pool.pagetable[pn]; - - if (bin < B_PAGE) - { byte* p; - byte* ptop; - size_t biti; - size_t bitstride; - auto size = binsize[bin]; - - p = pool.baseAddr + pn * PAGESIZE; - ptop = p + PAGESIZE; - biti = pn * (PAGESIZE/16); - bitstride = size / 16; - - version(none) // BUG: doesn't work because freebits() must also be cleared - { - // If free'd entire page - if (bbase[0] == 0 && bbase[1] == 0 && bbase[2] == 0 && bbase[3] == 0 && - bbase[4] == 0 && bbase[5] == 0 && bbase[6] == 0 && bbase[7] == 0) - { - for (; p < ptop; p += size, biti += bitstride) - { - if (pool.finals.nbits && pool.finals.testClear(biti)) - rt_finalize(cast(List *)sentinel_add(p), false/*noStack > 0*/); - gcx.clrBits(pool, biti, BlkAttr.ALL_BITS); - - List *list = cast(List *)p; - //debug(PRINTF) printf("\tcollecting %p\n", list); - log_free(sentinel_add(list)); - - debug (MEMSTOMP) memset(p, 0xF3, size); - } - pool.pagetable[pn] = B_FREE; - freed += PAGESIZE; - //debug(PRINTF) printf("freeing entire page %d\n", pn); - continue; - } - } - for (; p < ptop; p += size, biti += bitstride) - { - if (!pool.mark.test(biti)) - { - sentinel_Invariant(sentinel_add(p)); - pool.freebits.set(biti); - if (pool.finals.nbits && pool.finals.testClear(biti)) - rt_finalize(cast(List *)sentinel_add(p), false/*noStack > 0*/); - clrBits(pool, biti, BlkAttr.ALL_BITS); - - List *list = cast(List *)p; - debug(PRINTF) printf("\tcollecting %p\n", list); - log_free(sentinel_add(list)); - - debug (MEMSTOMP) memset(p, 0xF3, size); - - freed += size; - } - } - } - else if (bin == B_PAGE) - { size_t biti = pn * (PAGESIZE / 16); + if(pool.isLargeObject) + { + for(pn = 0; pn < ncommitted; pn++) + { + Bins bin = cast(Bins)pool.pagetable[pn]; + if(bin > B_PAGE) continue; + size_t biti = pn; if (!pool.mark.test(biti)) { byte *p = pool.baseAddr + pn * PAGESIZE; @@ -2593,11 +2548,14 @@ struct Gcx log_free(sentinel_add(p)); pool.pagetable[pn] = B_FREE; freedpages++; + pool.freepages++; + debug (MEMSTOMP) memset(p, 0xF3, PAGESIZE); while (pn + 1 < ncommitted && pool.pagetable[pn + 1] == B_PAGEPLUS) { pn++; pool.pagetable[pn] = B_FREE; + pool.freepages++; freedpages++; debug (MEMSTOMP) @@ -2607,6 +2565,75 @@ struct Gcx } } } + + continue; + } + else + { + + for (pn = 0; pn < ncommitted; pn++, bbase += PAGESIZE / (32 * 16)) + { + Bins bin = cast(Bins)pool.pagetable[pn]; + + if (bin < B_PAGE) + { byte* p; + byte* ptop; + size_t biti; + size_t bitstride; + auto size = binsize[bin]; + + p = pool.baseAddr + pn * PAGESIZE; + ptop = p + PAGESIZE; + biti = pn * (PAGESIZE/16); + bitstride = size / 16; + + version(none) // BUG: doesn't work because freebits() must also be cleared + { + // If free'd entire page + if (bbase[0] == 0 && bbase[1] == 0 && bbase[2] == 0 && bbase[3] == 0 && + bbase[4] == 0 && bbase[5] == 0 && bbase[6] == 0 && bbase[7] == 0) + { + for (; p < ptop; p += size, biti += bitstride) + { + if (pool.finals.nbits && pool.finals.testClear(biti)) + rt_finalize(cast(List *)sentinel_add(p), false/*noStack > 0*/); + gcx.clrBits(pool, biti, BlkAttr.ALL_BITS); + + List *list = cast(List *)p; + //debug(PRINTF) printf("\tcollecting %p\n", list); + log_free(sentinel_add(list)); + + debug (MEMSTOMP) memset(p, 0xF3, size); + } + pool.pagetable[pn] = B_FREE; + freed += PAGESIZE; + pool.freepages++; + //debug(PRINTF) printf("freeing entire page %d\n", pn); + continue; + } + } + for (; p < ptop; p += size, biti += bitstride) + { + if (!pool.mark.test(biti)) + { + sentinel_Invariant(sentinel_add(p)); + + pool.freebits.set(biti); + if (pool.finals.nbits && pool.finals.testClear(biti)) + rt_finalize(cast(List *)sentinel_add(p), false/*noStack > 0*/); + clrBits(pool, biti, BlkAttr.ALL_BITS); + + List *list = cast(List *)p; + debug(PRINTF) printf("\tcollecting %p\n", list); + log_free(sentinel_add(list)); + + debug (MEMSTOMP) memset(p, 0xF3, size); + + freed += size; + } + } + } + } } } @@ -2621,6 +2648,7 @@ struct Gcx size_t ncommitted; pool = pooltable[n]; + if(pool.isLargeObject) continue; ncommitted = pool.ncommitted; for (pn = 0; pn < ncommitted; pn++) { @@ -2642,6 +2670,7 @@ struct Gcx goto Lnotfree; } pool.pagetable[pn] = B_FREE; + pool.freepages++; recoveredpages++; continue; @@ -2689,7 +2718,7 @@ struct Gcx { assert(p == cast(void*)((cast(size_t)p) & notbinsize[bins])); // return true if the block is not marked. - return !(pool.mark.test(offset / 16)); + return !(pool.mark.test(offset / pool.divisor)); } } return false; // not collecting or pointer is a valid argument. @@ -2916,12 +2945,21 @@ struct Pool GCBits appendable; // entries that are appendable size_t npages; + size_t freepages; // The number of pages not in use. size_t ncommitted; // ncommitted <= npages ubyte* pagetable; + bool isLargeObject; - void initialize(size_t npages) + // This tracks how far back we have to go to find the nearest B_PAGE at + // a smaller address than a B_PAGEPLUS. To save space, we use a uint. + // This limits individual allocations to 16 terabytes, assuming a 4k + // pagesize. + uint* bPageOffsets; + + void initialize(size_t npages, bool isLargeObject) { + this.isLargeObject = isLargeObject; size_t poolsize; //debug(PRINTF) printf("Pool::Pool(%u)\n", npages); @@ -2942,19 +2980,30 @@ struct Pool } //assert(baseAddr); topAddr = baseAddr + poolsize; + auto div = this.divisor; + auto nbits = cast(size_t)poolsize / div; - mark.alloc(cast(size_t)poolsize / 16); - scan.alloc(cast(size_t)poolsize / 16); - freebits.alloc(cast(size_t)poolsize / 16); - noscan.alloc(cast(size_t)poolsize / 16); - appendable.alloc(cast(size_t)poolsize / 16); + mark.alloc(nbits); + scan.alloc(nbits); + freebits.alloc(nbits); + noscan.alloc(nbits); + appendable.alloc(nbits); pagetable = cast(ubyte*)cstdlib.malloc(npages); if (!pagetable) onOutOfMemoryError(); + + if(isLargeObject) + { + bPageOffsets = cast(uint*)cstdlib.malloc(npages * uint.sizeof); + if (!bPageOffsets) + onOutOfMemoryError(); + } + memset(pagetable, B_UNCOMMITTED, npages); this.npages = npages; + this.freepages = npages; ncommitted = 0; } @@ -2985,6 +3034,9 @@ struct Pool if (pagetable) cstdlib.free(pagetable); + if(bPageOffsets) + cstdlib.free(bPageOffsets); + mark.Dtor(); scan.Dtor(); freebits.Dtor(); @@ -3021,6 +3073,24 @@ struct Pool } } + // The divisor used for determining bit indices. + size_t divisor() { + return isLargeObject ? PAGESIZE : 16; + } + + void updateOffsets(size_t fromWhere) + { + assert(pagetable[fromWhere] == B_PAGE); + size_t pn = fromWhere + 1; + for(uint offset = 1; pn < ncommitted; pn++, offset++) + { + if(pagetable[pn] != B_PAGEPLUS) break; + bPageOffsets[pn] = offset; + } + + // Store the size of the block in bPageOffsets[fromWhere]. + bPageOffsets[fromWhere] = cast(uint) (pn - fromWhere); + } /** * Allocate n pages from Pool. @@ -3028,6 +3098,7 @@ struct Pool */ size_t allocPages(size_t n) { + if(freepages < n) return OPFAIL; size_t i; size_t n2; @@ -3117,7 +3188,16 @@ struct Pool */ void freePages(size_t pagenum, size_t npages) { - memset(&pagetable[pagenum], B_FREE, npages); + //memset(&pagetable[pagenum], B_FREE, npages); + for(size_t i = pagenum; i < npages + pagenum; i++) + { + if(pagetable[i] < B_FREE) + { + freepages++; + } + + pagetable[i] = B_FREE; + } } From 622d112d280aaef18af1a408bc82106c8bc8f200 Mon Sep 17 00:00:00 2001 From: dsimcha Date: Thu, 24 Feb 2011 18:41:20 -0500 Subject: [PATCH 04/13] Improve search for large free blocks. --- src/gc/gcx.d | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/gc/gcx.d b/src/gc/gcx.d index e92b8e41ee..eaa7619957 100644 --- a/src/gc/gcx.d +++ b/src/gc/gcx.d @@ -3114,7 +3114,15 @@ struct Pool } } else + { n2 = n; + if(pagetable[i] == B_PAGE) + { + // Then we have the offset information. We can skip a + // whole bunch of stuff. + i += bPageOffsets[i] - 1; + } + } } return extendPages(n); } From a441a797ac0fb3fe1d8e9d48ce635512a17c006b Mon Sep 17 00:00:00 2001 From: dsimcha Date: Thu, 24 Feb 2011 18:59:16 -0500 Subject: [PATCH 05/13] Added searchStart. --- src/gc/gcx.d | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/gc/gcx.d b/src/gc/gcx.d index eaa7619957..e1ed21ab19 100644 --- a/src/gc/gcx.d +++ b/src/gc/gcx.d @@ -2547,6 +2547,7 @@ struct Gcx debug(COLLECT_PRINTF) printf("\tcollecting big %p\n", p); log_free(sentinel_add(p)); pool.pagetable[pn] = B_FREE; + if(pn < pool.searchStart) pool.searchStart = pn; freedpages++; pool.freepages++; @@ -2555,6 +2556,11 @@ struct Gcx { pn++; pool.pagetable[pn] = B_FREE; + + // Don't need to update searchStart here because + // pn is guaranteed to be greater than last time + // we updated it. + pool.freepages++; freedpages++; @@ -2606,6 +2612,7 @@ struct Gcx debug (MEMSTOMP) memset(p, 0xF3, size); } pool.pagetable[pn] = B_FREE; + if(pn < pool.searchStart) pool.searchStart = pn; freed += PAGESIZE; pool.freepages++; //debug(PRINTF) printf("freeing entire page %d\n", pn); @@ -2670,6 +2677,7 @@ struct Gcx goto Lnotfree; } pool.pagetable[pn] = B_FREE; + if(pn < pool.searchStart) pool.searchStart = pn; pool.freepages++; recoveredpages++; continue; @@ -2957,6 +2965,11 @@ struct Pool // pagesize. uint* bPageOffsets; + // This variable tracks a conservative estimate of where the first free + // page in this pool is, so that if a lot of pages towards the beginning + // are occupied, we can bypass them in O(1). + size_t searchStart; + void initialize(size_t npages, bool isLargeObject) { this.isLargeObject = isLargeObject; @@ -3104,10 +3117,15 @@ struct Pool //debug(PRINTF) printf("Pool::allocPages(n = %d)\n", n); n2 = n; - for (i = 0; i < ncommitted; i++) + for (i = searchStart; i < ncommitted; i++) { if (pagetable[i] == B_FREE) { + if(pagetable[searchStart] < B_FREE) + { + searchStart = i + (!isLargeObject); + } + if (--n2 == 0) { //debug(PRINTF) printf("\texisting pn = %d\n", i - n + 1); return i - n + 1; @@ -3124,6 +3142,12 @@ struct Pool } } } + + if(pagetable[searchStart] < B_FREE) + { + searchStart = ncommitted; + } + return extendPages(n); } @@ -3197,6 +3221,8 @@ struct Pool void freePages(size_t pagenum, size_t npages) { //memset(&pagetable[pagenum], B_FREE, npages); + if(pagenum < searchStart) searchStart = pagenum; + for(size_t i = pagenum; i < npages + pagenum; i++) { if(pagetable[i] < B_FREE) From c5d9ad3280eaf093414229549b08f06d8a291458 Mon Sep 17 00:00:00 2001 From: dsimcha Date: Thu, 24 Feb 2011 19:23:17 -0500 Subject: [PATCH 06/13] Get rid of freebits for large objects. --- src/gc/gcx.d | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/gc/gcx.d b/src/gc/gcx.d index e1ed21ab19..9f0948821b 100644 --- a/src/gc/gcx.d +++ b/src/gc/gcx.d @@ -2397,7 +2397,7 @@ struct Gcx pool = pooltable[n]; pool.mark.zero(); pool.scan.zero(); - pool.freebits.zero(); + if(!pool.isLargeObject) pool.freebits.zero(); } debug(COLLECT_PRINTF) printf("Set bits\n"); @@ -2418,7 +2418,10 @@ struct Gcx for (n = 0; n < npools; n++) { pool = pooltable[n]; - pool.mark.copy(&pool.freebits); + if(!pool.isLargeObject) + { + pool.mark.copy(&pool.freebits); + } } version (MULTI_THREADED) @@ -2998,7 +3001,11 @@ struct Pool mark.alloc(nbits); scan.alloc(nbits); - freebits.alloc(nbits); + + // pagetable already keeps track of what's free for the large object + // pool. + if(!isLargeObject) freebits.alloc(nbits); + noscan.alloc(nbits); appendable.alloc(nbits); @@ -3052,7 +3059,7 @@ struct Pool mark.Dtor(); scan.Dtor(); - freebits.Dtor(); + if(!isLargeObject) freebits.Dtor(); finals.Dtor(); noscan.Dtor(); appendable.Dtor(); From b94ca289e63714506dfd163e60cafe8bccd722c5 Mon Sep 17 00:00:00 2001 From: dsimcha Date: Thu, 24 Feb 2011 19:53:06 -0500 Subject: [PATCH 07/13] Use bsf for marking. --- src/gc/gcx.d | 63 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/src/gc/gcx.d b/src/gc/gcx.d index 9f0948821b..a9d3edccbf 100644 --- a/src/gc/gcx.d +++ b/src/gc/gcx.d @@ -41,6 +41,7 @@ private import gc.gcalloc; private import cstdlib = core.stdc.stdlib : calloc, free, malloc, realloc; private import core.stdc.string; +private import core.bitop; version (GNU) import gcc.builtins; @@ -443,19 +444,7 @@ class GC //debug(PRINTF) printf("gcx.self = %x, pthread_self() = %x\n", gcx.self, pthread_self()); size += SENTINEL_EXTRA; - - // Compute size bin - // Cache previous binsize lookup - Dave Fladebo. - __gshared size_t lastsize = -1; - __gshared Bins lastbin; - if (size == lastsize) - bin = lastbin; - else - { - bin = gcx.findBin(size); - lastsize = size; - lastbin = bin; - } + bin = gcx.findBin(size); if (bin < B_PAGE) { @@ -1891,6 +1880,15 @@ struct Gcx * Compute bin for size. */ static Bins findBin(size_t size) + { + static const byte[2049] binTable = ctfeBins(); + + return (size <= 2048) ? + (cast(Bins) binTable[size]) : + B_PAGE; + } + + static Bins findBinImpl(size_t size) { Bins bin; if (size <= 256) @@ -1932,6 +1930,20 @@ struct Gcx return bin; } + /** + * Computes the bin table using CTFE. + */ + static byte[2049] ctfeBins() + { + byte[2049] ret; + for(size_t i = 0; i < 2049; i++) + { + ret[i] = cast(byte) findBinImpl(i); + } + + return ret; + } + /** * Allocate a new pool of at least size bytes. @@ -2482,21 +2494,13 @@ struct Gcx *b = 0; auto o = pool.baseAddr + (b - bbase) * (typeof(bitm).sizeof*8) * pool.divisor; - if (!(bitm & 0xFFFF)) - { - bitm >>= 16; - o += 16 * pool.divisor; - } - if (!(bitm & 0xFF)) - { - bitm >>= 8; - o += 8 * pool.divisor; - } - for (; bitm; o += pool.divisor, bitm >>= 1) - { - if (!(bitm & 1)) - continue; + auto firstset = bsf(bitm); + bitm >>= firstset; + o += firstset * pool.divisor; + + while(bitm) + { auto pn = cast(size_t)(o - pool.baseAddr) / PAGESIZE; auto bin = cast(Bins)pool.pagetable[pn]; if (bin < B_PAGE) @@ -2512,6 +2516,11 @@ struct Gcx auto u = pool.bPageOffsets[pn]; mark(o, o + u * PAGESIZE); } + + bitm >>= 1; + auto nbits = bsf(bitm); + bitm >>= nbits; + o += (nbits + 1) * pool.divisor; } } } From ddfa3b020ae8e86a3bfd59af4cac27ba602ecb14 Mon Sep 17 00:00:00 2001 From: dsimcha Date: Thu, 24 Feb 2011 20:08:34 -0500 Subject: [PATCH 08/13] Git rid of unnecessary findPool() call in mallocNoSync. --- src/gc/gcx.d | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/gc/gcx.d b/src/gc/gcx.d index a9d3edccbf..67bbce20dc 100644 --- a/src/gc/gcx.d +++ b/src/gc/gcx.d @@ -445,6 +445,7 @@ class GC size += SENTINEL_EXTRA; bin = gcx.findBin(size); + Pool *pool; if (bin < B_PAGE) { @@ -489,6 +490,7 @@ class GC // Return next item from free list gcx.bucket[bin] = (cast(List*)p).next; + pool = (cast(List*)p).pool; if( !(bits & BlkAttr.NO_SCAN) ) memset(p + size, 0, binsize[bin] - size); //debug(PRINTF) printf("\tmalloc => %p\n", p); @@ -496,7 +498,7 @@ class GC } else { - p = gcx.bigAlloc(size, alloc_size); + p = gcx.bigAlloc(size, &pool, alloc_size); if (!p) onOutOfMemoryError(); } @@ -507,9 +509,6 @@ class GC if (bits) { - Pool *pool = gcx.findPool(p); - assert(pool); - gcx.setBits(pool, cast(size_t)(p - pool.baseAddr) / pool.divisor, bits); } return p; @@ -906,6 +905,7 @@ class GC debug (MEMSTOMP) memset(p, 0xF2, binsize[bin]); list.next = gcx.bucket[bin]; + list.pool = pool; gcx.bucket[bin] = list; } gcx.log_free(sentinel_add(p)); @@ -1422,6 +1422,7 @@ alias ubyte Bins; struct List { List *next; + Pool *pool; } @@ -1998,7 +1999,7 @@ struct Gcx * Allocate a chunk of memory that is larger than a page. * Return null if out of memory. */ - void *bigAlloc(size_t size, size_t *alloc_size = null) + void *bigAlloc(size_t size, Pool **poolPtr, size_t *alloc_size = null) { debug(PRINTF) printf("In bigAlloc. Size: %d\n", size); @@ -2088,6 +2089,8 @@ struct Gcx if(alloc_size) *alloc_size = npages * PAGESIZE; //debug(PRINTF) printf("\tp = %p\n", p); + + *poolPtr = pool; return p; Lnomemory: @@ -2211,6 +2214,7 @@ struct Gcx for (; p < ptop; p += size) { (cast(List *)p).next = *b; + (cast(List *)p).pool = pool; *b = cast(List *)p; } return 1; @@ -2419,7 +2423,7 @@ struct Gcx { for (List *list = bucket[n]; list; list = list.next) { - pool = findPool(list); + pool = list.pool; assert(pool); pool.freebits.set(cast(size_t)(cast(byte*)list - pool.baseAddr) / 16); } @@ -2704,6 +2708,7 @@ struct Gcx list = cast(List *)(p + u); if (list.next != bucket[bin]) // avoid unnecessary writes list.next = bucket[bin]; + list.pool = pool; bucket[bin] = list; } } From 779b26e5ed7d1366d77a423b52b341d7430c0fb9 Mon Sep 17 00:00:00 2001 From: dsimcha Date: Thu, 24 Feb 2011 20:14:24 -0500 Subject: [PATCH 09/13] Get rid of some dead code. --- src/gc/gcx.d | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/gc/gcx.d b/src/gc/gcx.d index 67bbce20dc..db40166f12 100644 --- a/src/gc/gcx.d +++ b/src/gc/gcx.d @@ -2511,12 +2511,8 @@ struct Gcx { mark(o, o + binsize[bin]); } - else if (bin == B_PAGE || bin == B_PAGEPLUS) + else if (bin == B_PAGE) { - if (bin == B_PAGEPLUS) - { - pn -= pool.bPageOffsets[pn]; - } auto u = pool.bPageOffsets[pn]; mark(o, o + u * PAGESIZE); } From cafe97273d9ba390ff58888769a849ef776c8487 Mon Sep 17 00:00:00 2001 From: dsimcha Date: Thu, 24 Feb 2011 21:25:51 -0500 Subject: [PATCH 10/13] Store pool-specific changes info. --- src/gc/gcx.d | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/gc/gcx.d b/src/gc/gcx.d index db40166f12..c3a233e096 100644 --- a/src/gc/gcx.d +++ b/src/gc/gcx.d @@ -2286,6 +2286,7 @@ struct Gcx { pool.scan.set(biti); changes = 1; + pool.newChanges = true; } debug (LOGGING) log_parent(sentinel_add(pool.baseAddr + biti * pool.divisor), sentinel_add(pbot)); } @@ -2434,6 +2435,7 @@ struct Gcx for (n = 0; n < npools; n++) { pool = pooltable[n]; + pool.newChanges = false; // Some of these get set to true on stack scan. if(!pool.isLargeObject) { pool.mark.copy(&pool.freebits); @@ -2479,11 +2481,19 @@ struct Gcx debug(COLLECT_PRINTF) printf("\tscan heap\n"); while (anychanges) { + for (n = 0; n < npools; n++) + { + pool = pooltable[n]; + pool.oldChanges = pool.newChanges; + pool.newChanges = false; + } + debug(COLLECT_PRINTF) printf("\t\tpass\n"); anychanges = 0; for (n = 0; n < npools; n++) { pool = pooltable[n]; + if(!pool.oldChanges) continue; auto bbase = pool.scan.base(); auto btop = bbase + pool.scan.nwords; @@ -2971,6 +2981,8 @@ struct Pool ubyte* pagetable; bool isLargeObject; + bool oldChanges; // Whether there were changes on the last mark. + bool newChanges; // Whether there were changes on the current mark. // This tracks how far back we have to go to find the nearest B_PAGE at // a smaller address than a B_PAGEPLUS. To save space, we use a uint. From a486825413569c23faca43dbd7c760fc6c3602c4 Mon Sep 17 00:00:00 2001 From: dsimcha Date: Thu, 24 Feb 2011 22:16:14 -0500 Subject: [PATCH 11/13] Use bit shifting for multiplication, too. --- src/gc/gcx.d | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/src/gc/gcx.d b/src/gc/gcx.d index c3a233e096..3e73e2c12c 100644 --- a/src/gc/gcx.d +++ b/src/gc/gcx.d @@ -317,7 +317,7 @@ class GC if (pool) { - auto biti = cast(size_t)(p - pool.baseAddr) / pool.divisor; + auto biti = cast(size_t)(p - pool.baseAddr) >> pool.shiftBy; oldb = gcx.getBits(pool, biti); } @@ -352,7 +352,7 @@ class GC if (pool) { - auto biti = cast(size_t)(p - pool.baseAddr) / pool.divisor; + auto biti = cast(size_t)(p - pool.baseAddr) >> pool.shiftBy; oldb = gcx.getBits(pool, biti); gcx.setBits(pool, biti, mask); @@ -388,7 +388,7 @@ class GC if (pool) { - auto biti = cast(size_t)(p - pool.baseAddr) / pool.divisor; + auto biti = cast(size_t)(p - pool.baseAddr) >> pool.shiftBy; oldb = gcx.getBits(pool, biti); gcx.clrBits(pool, biti, mask); @@ -509,7 +509,7 @@ class GC if (bits) { - gcx.setBits(pool, cast(size_t)(p - pool.baseAddr) / pool.divisor, bits); + gcx.setBits(pool, cast(size_t)(p - pool.baseAddr) >> pool.shiftBy, bits); } return p; } @@ -600,7 +600,7 @@ class GC if (pool) { - auto biti = cast(size_t)(p - pool.baseAddr) / pool.divisor; + auto biti = cast(size_t)(p - pool.baseAddr) >> pool.shiftBy; if (bits) { @@ -689,7 +689,7 @@ class GC if (pool) { - auto biti = cast(size_t)(p - pool.baseAddr) / pool.divisor; + auto biti = cast(size_t)(p - pool.baseAddr) >> pool.shiftBy; if (bits) { @@ -885,7 +885,7 @@ class GC debug(PRINTF) printf("pool base = %p, PAGENUM = %d of %d / %d, bin = %d\n", pool.baseAddr, pagenum, pool.ncommitted, pool.npages, pool.pagetable[pagenum]); debug(PRINTF) if(pool.isLargeObject) printf("Block size = %d\n", pool.bPageOffsets[pagenum]); - biti = cast(size_t)(p - pool.baseAddr) / pool.divisor; + biti = cast(size_t)(p - pool.baseAddr) >> pool.shiftBy; gcx.clrBits(pool, biti, BlkAttr.ALL_BITS); @@ -1868,7 +1868,7 @@ struct Gcx // reset the offset to the base pointer, otherwise the bits // are the bits for the pointer, which may be garbage offset = cast(size_t)(info.base - pool.baseAddr); - info.attr = getBits(pool, cast(size_t)(offset / pool.divisor)); + info.attr = getBits(pool, cast(size_t)(offset >> pool.shiftBy)); cached_info_key = p; cached_info_val = info; @@ -2255,12 +2255,12 @@ struct Gcx // Adjust bit to be at start of allocated memory block if (bin < B_PAGE) { - biti = (offset & notbinsize[bin]) / pool.divisor; + biti = (offset & notbinsize[bin]) >> pool.shiftBy; //debug(PRINTF) printf("\t\tbiti = x%x\n", biti); } else if (bin == B_PAGE) { - biti = (offset & notbinsize[bin]) / pool.divisor; + biti = (offset & notbinsize[bin]) >> pool.shiftBy; //debug(PRINTF) printf("\t\tbiti = x%x\n", biti); pcache = cast(size_t)p & ~cast(size_t)(PAGESIZE-1); @@ -2268,7 +2268,7 @@ struct Gcx else if (bin == B_PAGEPLUS) { pn -= pool.bPageOffsets[pn]; - biti = pn * (PAGESIZE / pool.divisor); + biti = pn * (PAGESIZE >> pool.shiftBy); pcache = cast(size_t)p & ~cast(size_t)(PAGESIZE-1); } @@ -2288,7 +2288,7 @@ struct Gcx changes = 1; pool.newChanges = true; } - debug (LOGGING) log_parent(sentinel_add(pool.baseAddr + biti * pool.divisor), sentinel_add(pbot)); + debug (LOGGING) log_parent(sentinel_add(pool.baseAddr + (biti << pool.shiftBy)), sentinel_add(pbot)); } } } @@ -2507,11 +2507,11 @@ struct Gcx } *b = 0; - auto o = pool.baseAddr + (b - bbase) * (typeof(bitm).sizeof*8) * pool.divisor; + auto o = pool.baseAddr + (b - bbase) * ((typeof(bitm).sizeof*8) << pool.shiftBy); auto firstset = bsf(bitm); bitm >>= firstset; - o += firstset * pool.divisor; + o += firstset << pool.shiftBy; while(bitm) { @@ -2530,7 +2530,7 @@ struct Gcx bitm >>= 1; auto nbits = bsf(bitm); bitm >>= nbits; - o += (nbits + 1) * pool.divisor; + o += (nbits + 1) << pool.shiftBy; } } } @@ -2749,7 +2749,7 @@ struct Gcx { assert(p == cast(void*)((cast(size_t)p) & notbinsize[bins])); // return true if the block is not marked. - return !(pool.mark.test(offset / pool.divisor)); + return !(pool.mark.test(offset >> pool.shiftBy)); } } return false; // not collecting or pointer is a valid argument. @@ -3116,10 +3116,17 @@ struct Pool } // The divisor used for determining bit indices. - size_t divisor() { + size_t divisor() + { return isLargeObject ? PAGESIZE : 16; } + // Bit shift for fast division by divisor. + uint shiftBy() + { + return isLargeObject ? 12 : 4; + } + void updateOffsets(size_t fromWhere) { assert(pagetable[fromWhere] == B_PAGE); From 729771b264f9af2b8604cc8fcb0960405c086fbb Mon Sep 17 00:00:00 2001 From: dsimcha Date: Fri, 25 Feb 2011 22:42:40 -0500 Subject: [PATCH 12/13] Add profiling instrumentation, a little refactoring that I couldn't resist. --- src/gc/gcx.d | 77 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 65 insertions(+), 12 deletions(-) diff --git a/src/gc/gcx.d b/src/gc/gcx.d index 3e73e2c12c..c930e52d76 100644 --- a/src/gc/gcx.d +++ b/src/gc/gcx.d @@ -59,6 +59,17 @@ debug(PRINTF) void printFreeInfo(Pool* pool) printf("Pool %p: %d really free, %d supposedly be free\n", pool, nReallyFree, pool.freepages); } +debug(PROFILING) +{ + // Track total time spent preparing for GC, + // marking, sweeping and recovering pages. + import core.stdc.stdio, core.stdc.time; + __gshared long prepTime; + __gshared long markTime; + __gshared long sweepTime; + __gshared long recoverTime; +} + private { enum USE_CACHE = true; @@ -1439,7 +1450,6 @@ immutable size_t notbinsize[B_MAX] = [ ~(16-1),~(32-1),~(64-1),~(128-1),~(256-1) /* ============================ Gcx =============================== */ - struct Gcx { debug (THREADINVARIANT) @@ -1502,6 +1512,21 @@ struct Gcx void Dtor() { + debug(PROFILING) + { + printf("\tTotal GC prep time: %d milliseconds\n", + prepTime * 1000 / CLOCKS_PER_SEC); + printf("\tTotal mark time: %d milliseconds\n", + markTime * 1000 / CLOCKS_PER_SEC); + printf("\tTotal sweep time: %d milliseconds\n", + sweepTime * 1000 / CLOCKS_PER_SEC); + printf("\tTotal page recovery time: %d milliseconds\n", + recoverTime * 1000 / CLOCKS_PER_SEC); + printf("\tGrand total GC time: %d milliseconds\n", + 1000 * (recoverTime + sweepTime + markTime + prepTime) + / CLOCKS_PER_SEC); + } + inited = 0; for (size_t i = 0; i < npools; i++) @@ -2398,6 +2423,12 @@ struct Gcx size_t n; Pool* pool; + debug(PROFILING) + { + clock_t start, stop; + start = clock(); + } + debug(COLLECT_PRINTF) printf("Gcx.fullcollect()\n"); //printf("\tpool address range = %p .. %p\n", minAddr, maxAddr); @@ -2464,6 +2495,13 @@ struct Gcx } } + debug(PROFILING) + { + stop = clock(); + prepTime += (stop - start); + start = stop; + } + // Scan roots[] debug(COLLECT_PRINTF) printf("\tscan roots[]\n"); mark(roots, roots + nroots); @@ -2539,6 +2577,13 @@ struct Gcx thread_processGCMarks(); thread_resumeAll(); + debug(PROFILING) + { + stop = clock(); + markTime += (stop - start); + start = stop; + } + // Free up everything not marked debug(COLLECT_PRINTF) printf("\tfree'ing\n"); size_t freedpages = 0; @@ -2547,7 +2592,6 @@ struct Gcx { size_t pn; pool = pooltable[n]; - auto bbase = pool.mark.base(); auto ncommitted = pool.ncommitted; if(pool.isLargeObject) @@ -2599,21 +2643,17 @@ struct Gcx else { - for (pn = 0; pn < ncommitted; pn++, bbase += PAGESIZE / (32 * 16)) + for (pn = 0; pn < ncommitted; pn++) { Bins bin = cast(Bins)pool.pagetable[pn]; if (bin < B_PAGE) - { byte* p; - byte* ptop; - size_t biti; - size_t bitstride; + { auto size = binsize[bin]; - - p = pool.baseAddr + pn * PAGESIZE; - ptop = p + PAGESIZE; - biti = pn * (PAGESIZE/16); - bitstride = size / 16; + byte *p = pool.baseAddr + pn * PAGESIZE; + byte *ptop = p + PAGESIZE; + size_t biti = pn * (PAGESIZE/16); + size_t bitstride = size / 16; version(none) // BUG: doesn't work because freebits() must also be cleared { @@ -2666,6 +2706,13 @@ struct Gcx } } + debug(PROFILING) + { + stop = clock(); + sweepTime += (stop - start); + start = stop; + } + // Zero buckets bucket[] = null; @@ -2722,6 +2769,12 @@ struct Gcx } } + debug(PROFILING) + { + stop = clock(); + recoverTime += (stop - start); + } + debug(COLLECT_PRINTF) printf("\trecovered pages = %d\n", recoveredpages); debug(COLLECT_PRINTF) printf("\tfree'd %u bytes, %u pages from %u pools\n", freed, freedpages, npools); From ed01de1da7ee46609df5622236217d816024f3f7 Mon Sep 17 00:00:00 2001 From: dsimcha Date: Sat, 26 Feb 2011 20:47:48 -0500 Subject: [PATCH 13/13] Microoptimization: store shiftBy. --- src/gc/gcx.d | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gc/gcx.d b/src/gc/gcx.d index c930e52d76..35803ccad1 100644 --- a/src/gc/gcx.d +++ b/src/gc/gcx.d @@ -2533,6 +2533,7 @@ struct Gcx pool = pooltable[n]; if(!pool.oldChanges) continue; + auto shiftBy = pool.shiftBy; auto bbase = pool.scan.base(); auto btop = bbase + pool.scan.nwords; //printf("\t\tn = %d, bbase = %p, btop = %p\n", n, bbase, btop); @@ -2545,11 +2546,11 @@ struct Gcx } *b = 0; - auto o = pool.baseAddr + (b - bbase) * ((typeof(bitm).sizeof*8) << pool.shiftBy); + auto o = pool.baseAddr + (b - bbase) * ((typeof(bitm).sizeof*8) << shiftBy); auto firstset = bsf(bitm); bitm >>= firstset; - o += firstset << pool.shiftBy; + o += firstset << shiftBy; while(bitm) { @@ -2568,7 +2569,7 @@ struct Gcx bitm >>= 1; auto nbits = bsf(bitm); bitm >>= nbits; - o += (nbits + 1) << pool.shiftBy; + o += (nbits + 1) << shiftBy; } } }