diff --git a/.circleci/config.yml b/.circleci/config.yml index 38356ef470978..1a52e0a9eeda9 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -50,7 +50,7 @@ commands: - run: name: embuilder (LTO) command: | - python3 ./embuilder.py build libcompiler_rt libc libc++abi libc++abi-noexcept libc++ libc++-noexcept libal libdlmalloc libpthread_stub libc_rt_wasm struct_info libc-wasm --lto + python3 ./embuilder.py build libcompiler_rt libc libc++abi libc++abi-noexcept libc++ libc++-noexcept libal libdlmalloc libdlmalloc-debug libemmalloc libemmalloc-64bit libpthread_stub libc_rt_wasm struct_info libc-wasm --lto python3 tests/runner.py test_hello_world - run: name: embuilder (PIC) @@ -60,7 +60,7 @@ commands: - run: name: embuilder (PIC+LTO) command: | - python3 ./embuilder.py build libcompiler_rt libc libc++abi libc++abi-noexcept libc++ libc++-noexcept libal libdlmalloc libpthread_stub libc_rt_wasm struct_info libc-wasm --pic --lto + python3 ./embuilder.py build libcompiler_rt libc libc++abi libc++abi-noexcept libc++ libc++-noexcept libal libdlmalloc libdlmalloc-debug libemmalloc libemmalloc-64bit libpthread_stub libc_rt_wasm struct_info libc-wasm --pic --lto python3 tests/runner.py test_hello_world - run: name: freeze cache diff --git a/emcc.py b/emcc.py index bf6a6bcad9889..157b4aa7378f7 100755 --- a/emcc.py +++ b/emcc.py @@ -1291,6 +1291,13 @@ def is_supported_link_flag(f): shared.Settings.FETCH = 1 shared.Settings.SYSTEM_JS_LIBRARIES.append(shared.path_from_root('src', 'library_asmfs.js')) + # Explicitly drop linking in a malloc implementation if program is not using any dynamic allocation calls. + if not shared.Settings.USES_DYNAMIC_ALLOC: + shared.Settings.MALLOC = 'none' + + if shared.Settings.MALLOC == 'emmalloc': + shared.Settings.SYSTEM_JS_LIBRARIES.append(shared.path_from_root('src', 'library_emmalloc.js')) + if shared.Settings.FETCH and final_suffix in JS_CONTAINING_ENDINGS: forced_stdlibs.append('libfetch') next_arg_index += 1 diff --git a/src/deps_info.json b/src/deps_info.json index 2e801a92713ed..8914d7a5dbd4f 100644 --- a/src/deps_info.json +++ b/src/deps_info.json @@ -94,6 +94,5 @@ "_embind_register_std_string": ["malloc", "free"], "_embind_register_std_wstring": ["malloc", "free"], "__syscall192": ["emscripten_builtin_memalign"], - "pthread_create": ["malloc", "free"] + "pthread_create": ["malloc", "free", "emscripten_main_thread_process_queued_calls"] } - diff --git a/src/library_emmalloc.js b/src/library_emmalloc.js new file mode 100644 index 0000000000000..27fa5962368c9 --- /dev/null +++ b/src/library_emmalloc.js @@ -0,0 +1,24 @@ +mergeInto(LibraryManager.library, { + emmalloc_unclaimed_heap_memory__deps: ['emscripten_get_sbrk_ptr'], + emmalloc_unclaimed_heap_memory: function() { + var dynamicTop = HEAPU32[_emscripten_get_sbrk_ptr()>>2]; +#if ALLOW_MEMORY_GROWTH +#if WASM +#if WASM_MEM_MAX != -1 + // Using WASM_MEM_MAX to constrain max heap size. + return {{{ WASM_MEM_MAX }}} - dynamicTop; +#else + // Not using a Wasm memory bound. + return 2*1024*1024*1024 - 65536 - dynamicTop; +#endif +#else + // asm.js: + return 2*1024*1024*1024 - 16777216 - dynamicTop; +#endif +#else + // ALLOW_MEMORY_GROWTH is disabled, the current heap size + // is all we got. + return HEAPU8.length - dynamicTop; +#endif + } +}); diff --git a/src/library_pthread.js b/src/library_pthread.js index 474b3ebf27ddd..fd73fbd8495ab 100644 --- a/src/library_pthread.js +++ b/src/library_pthread.js @@ -6,7 +6,6 @@ var LibraryPThread = { $PThread__postset: 'if (!ENVIRONMENT_IS_PTHREAD) PThread.initMainThreadBlock(); else PThread.initWorker();', $PThread__deps: ['$PROCINFO', '_register_pthread_ptr', - 'emscripten_main_thread_process_queued_calls', '$ERRNO_CODES', 'emscripten_futex_wake', '_kill_thread', '_cancel_thread', '_cleanup_thread'], $PThread: { diff --git a/src/settings.js b/src/settings.js index 0eb7650f6a539..9d3c9970fbfb3 100644 --- a/src/settings.js +++ b/src/settings.js @@ -114,8 +114,8 @@ var TOTAL_MEMORY = 16777216; // * emmalloc - a simple and compact malloc designed for emscripten // * none - no malloc() implementation is provided, but you must implement // malloc() and free() yourself. -// dlmalloc is necessary for multithreading, split memory, and other special -// modes, and will be used automatically in those cases. +// dlmalloc is necessary for split memory and other special modes, and will be +// used automatically in those cases. // In general, if you don't need one of those special modes, and if you don't // allocate very many small objects, you should use emmalloc since it's // smaller. Otherwise, if you do allocate many small objects, dlmalloc diff --git a/system/include/emscripten/emmalloc.h b/system/include/emscripten/emmalloc.h new file mode 100644 index 0000000000000..07c5fcb069682 --- /dev/null +++ b/system/include/emscripten/emmalloc.h @@ -0,0 +1,122 @@ +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// emmalloc: A lightweight web-friendly memory allocator suitable for very small applications. +// Enable the usage of emmalloc by passing the linker flag -s MALLOC=emmalloc to the application. + +// A debug function that dumps the whole structure of malloc internal memory blocks to console. +// *extremely slow*, use for debugging allocation test cases. +void emmalloc_dump_memory_regions(void); + +// Allocates size bytes with the given pow-2 alignment. +void *memalign(size_t alignment, size_t size); +void *emmalloc_memalign(size_t alignment, size_t size); +void *emscripten_builtin_memalign(size_t alignment, size_t size); +void *aligned_alloc(size_t alignment, size_t size); + +// Allocates size bytes with default alignment (8 bytes) +void *malloc(size_t size); +void *emmalloc_malloc(size_t size); +void *emscripten_builtin_malloc(size_t size); + +// Returns the number of bytes that are actually allocated to the given pointer ptr. +// E.g. due to alignment or size requirements, the actual size of the allocation can be +// larger than what was requested. +size_t malloc_usable_size(void *ptr); +size_t emmalloc_usable_size(void *ptr); + +// Frees a memory pointer allocated with any of +// emmalloc_memalign, emmalloc_malloc, +void free(void *ptr); +void emmalloc_free(void *ptr); +void emscripten_builtin_free(void *ptr); + +// Performs a reallocation of the given memory pointer to a new size. If the memory region +// pointed by ptr cannot be resized in place, a new memory region will be allocated, old +// memory copied over, and the old memory area freed. The pointer ptr must have been +// allocated with one of the emmalloc memory allocation functions (malloc, memalign, ...). +// If called with size == 0, the pointer ptr is freed, and a null pointer is returned. If +// called with null ptr, a new pointer is allocated. +void *realloc(void *ptr, size_t size); +void *emmalloc_realloc(void *ptr, size_t size); + +// emmalloc_realloc_try() is like realloc(), but only attempts to try to resize the existing +// memory area. If resizing the existing memory area fails, then realloc_try() will return 0 +// (the original memory block is not freed or modified). If resizing succeeds, previous +// memory contents will be valid up to min(old length, new length) bytes. +// If a null pointer is passed, no allocation is attempted but the function will return 0. +// If zero size is passed, the function will behave like free(). +void *emmalloc_realloc_try(void *ptr, size_t size); + +// emmalloc_realloc_uninitialized() is like realloc(), but old memory contents +// will be undefined after reallocation. (old memory is not preserved in any case) +void *emmalloc_realloc_uninitialized(void *ptr, size_t size); + +// Like realloc(), but allows specifying the alignment to allocate to. This function cannot +// be used to change the alignment of an existing allocation, but the original pointer should +// be aligned to the given alignment already. +void *aligned_realloc(void *ptr, size_t alignment, size_t size); +void *emmalloc_aligned_realloc(void *ptr, size_t alignment, size_t size); + +// emmalloc_aligned_realloc_uninitialized() is like aligned_realloc(), but old memory contents +// will be undefined after reallocation. (old memory is not preserved in any case) +void *emmalloc_aligned_realloc_uninitialized(void *ptr, size_t alignment, size_t size); + +// posix_memalign allocates memory with a given alignment, like memalign, but with a slightly +// different usage signature. +int posix_memalign(void **memptr, size_t alignment, size_t size); +int emmalloc_posix_memalign(void **memptr, size_t alignment, size_t size); + +// calloc allocates memory that is initialized to zero. +void *calloc(size_t num, size_t size); +void *emmalloc_calloc(size_t num, size_t size); + +// mallinfo() returns information about current emmalloc allocation state. This function +// is very slow, only good for debugging. Avoid calling it for "routine" diagnostics. +struct mallinfo mallinfo(); +struct mallinfo emmalloc_mallinfo(); + +// malloc_trim() returns unused dynamic memory back to the WebAssembly heap. Returns 1 if it +// actually freed any memory, and 0 if not. Note: this function does not release memory back to +// the system, but it only marks memory held by emmalloc back to unused state for other users +// of sbrk() to claim. +int malloc_trim(size_t pad); +int emmalloc_trim(size_t pad); + +// Validates the consistency of the malloc heap. Returns non-zero and prints an error to console +// if memory map is corrupt. Returns 0 (and does not print anything) if memory is intact. +int emmalloc_validate_memory_regions(void); + +// Computes the size of the dynamic memory region governed by emmalloc. This represents the +// amount of memory that emmalloc has sbrk()ed in for itself to manage. Use this function +// for memory statistics tracking purposes. Calling this function is quite fast, practically +// O(1) time. +size_t emmalloc_dynamic_heap_size(void); + +// Computes the amount of memory currently reserved under emmalloc's governance that is free +// for the application to allocate. Use this function for memory statistics tracking purposes. +// Note that calling this function is very slow, as it walks through each free memory block in +// linear time. +size_t emmalloc_free_dynamic_memory(void); + +// Estimates the amount of untapped memory that emmalloc could expand its dynamic memory area +// via sbrk()ing. Theoretically the maximum amount of memory that can still be malloc()ed can +// be calculated via emmalloc_free_dynamic_memory() + emmalloc_unclaimed_heap_memory(). +// Calling this function is very fast constant time lookup. +size_t emmalloc_unclaimed_heap_memory(void); + +// Computes a detailed fragmentation map of available free memory. Pass in a pointer to a +// 32 element long array. This function populates into each array index i the number of free +// memory regions that have a size 2^i <= size < 2^(i+1), and returns the total number of +// free memory regions (the sum of the array entries). This function runs very slowly, as it +// iterates through all free memory blocks. +size_t emmalloc_compute_free_dynamic_memory_fragmentation_map(size_t freeMemorySizeMap[32]); + +#ifdef __cplusplus +} +#endif diff --git a/system/include/emscripten/heap.h b/system/include/emscripten/heap.h index 9ceff48864c1c..ccccfb639a634 100644 --- a/system/include/emscripten/heap.h +++ b/system/include/emscripten/heap.h @@ -13,8 +13,18 @@ extern "C" { #endif +// Returns a pointer to a memory location that contains the heap DYNAMICTOP +// variable (the end of the dynamic memory region) intptr_t *emscripten_get_sbrk_ptr(void); -int emscripten_resize_heap(size_t requested_size); + +// Attempts to geometrically or linearly increase the heap so that it +// grows by at least requested_growth_bytes new bytes. The heap size may +// be overallocated, see src/settings.js variables MEMORY_GROWTH_GEOMETRIC_STEP, +// MEMORY_GROWTH_GEOMETRIC_CAP and MEMORY_GROWTH_LINEAR_STEP. This function +// cannot be used to shrink the size of the heap. +int emscripten_resize_heap(size_t requested_growth_bytes); + +// Returns the current size of the WebAssembly heap. size_t emscripten_get_heap_size(void); #ifdef __cplusplus diff --git a/system/lib/emmalloc.cpp b/system/lib/emmalloc.cpp index e7c90a0ef0feb..417ae3f335adc 100644 --- a/system/lib/emmalloc.cpp +++ b/system/lib/emmalloc.cpp @@ -4,29 +4,30 @@ * University of Illinois/NCSA Open Source License. Both these licenses can be * found in the LICENSE file. * - * Simple minimalistic but efficient malloc/free. + * Simple minimalistic but efficient sbrk()-based malloc/free that works in + * singlethreaded and multithreaded builds. * * Assumptions: * * - Pointers are 32-bit. - * - Single-threaded. - * - sbrk() is used, and nothing else. - * - sbrk() will not be accessed by anyone else. + * - Maximum individual allocation size is 2GB-1 bytes (2147483647 bytes) + * - sbrk() is used to claim new memory (sbrk handles geometric/linear + * - overallocation growth) + * - sbrk() can be used by other code outside emmalloc. * - sbrk() is very fast in most cases (internal wasm call). * * Invariants: * - * - Metadata is 8 bytes, allocation payload is a - * multiple of 8 bytes. - * - All regions of memory are adjacent. - * - Due to the above, after initial alignment fixing, all - * regions are aligned. - * - A region is either in use (used payload > 0) or not. + * - Per-allocation header overhead is 8 bytes, smallest allocated payload + * amount is 8 bytes, and a multiple of 4 bytes. + * - Acquired memory blocks are subdivided into disjoint regions that lie + * next to each other. + * - A region is either in used or free. * Used regions may be adjacent, and a used and unused region * may be adjacent, but not two unused ones - they would be * merged. - * - A used region always has minimal space at the end - we - * split off extra space when possible immediately. + * - Memory allocation takes constant time, unless the alloc needs to sbrk() + * or memory is very close to being exhausted. * * Debugging: * @@ -34,1170 +35,1412 @@ * - If EMMALLOC_DEBUG is defined, a large amount of extra checks are done. * - If EMMALLOC_DEBUG_LOG is defined, a lot of operations are logged * out, in addition to EMMALLOC_DEBUG. - * - Debugging and logging uses EM_ASM, not printf etc., to minimize any - * risk of debugging or logging depending on malloc. - * - * TODO - * - * - Optimizations for small allocations that are not multiples of 8, like - * 12 and 20 (which take 24 and 32 bytes respectively) - * + * - Debugging and logging directly uses console.log via uses EM_ASM, not + * printf etc., to minimize any risk of debugging or logging depending on + * malloc. */ +#include +#include +#include #include +#include #include -#include // CHAR_BIT -#include // mallinfo -#include // for memcpy, memset -#include // for sbrk() - -#define EMMALLOC_EXPORT __attribute__((__weak__, __visibility__("default"))) - -// Assumptions - -static_assert(sizeof(void*) == 4, "32-bit system"); -static_assert(sizeof(size_t) == 4, "32-bit system"); -static_assert(sizeof(int) == 4, "32-bit system"); - -#define SIZE_T_BIT (sizeof(size_t) * CHAR_BIT) - -static_assert(CHAR_BIT == 8, "standard char bit size"); -static_assert(SIZE_T_BIT == 32, "standard size_t bit size"); - -// Debugging - -#ifdef EMMALLOC_DEBUG_LOG -#ifndef EMMALLOC_DEBUG -#define EMMALLOC_DEBUG -#endif -#endif +#include +#include -#ifdef EMMALLOC_DEBUG -// Forward declaration for convenience. -static void emmalloc_validate_all(); -#endif -#ifdef EMMALLOC_DEBUG -// Forward declaration for convenience. -static void emmalloc_dump_all(); +#ifdef __EMSCRIPTEN_TRACING__ +#include #endif -// Math utilities - -static bool isPowerOf2(size_t x) { return __builtin_popcount(x) == 1; } - -static size_t lowerBoundPowerOf2(size_t x) { - if (x == 0) - return 1; - // e.g. 5 is 0..0101, so clz is 29, and we want - // 4 which is 1 << 2, so the result should be 2 - return SIZE_T_BIT - 1 - __builtin_clz(x); -} - -// Constants - -// All allocations are aligned to this value. -static const size_t ALIGNMENT = 8; +// Behavior of right shifting a signed integer is compiler implementation defined. +static_assert((((int32_t)0x80000000U) >> 31) == -1, "This malloc implementation requires that right-shifting a signed integer produces a sign-extending (arithmetic) shift!"); -// Even allocating 1 byte incurs this much actual payload -// allocation. This is our minimum bin size. -static const size_t ALLOC_UNIT = ALIGNMENT; +extern "C" +{ -// How big the metadata is in each region. It is convenient -// that this is identical to the above values. -static const size_t METADATA_SIZE = ALLOC_UNIT; +// Configuration: specifies the minimum alignment that malloc()ed memory outputs. Allocation requests with smaller alignment +// than this will yield an allocation with this much alignment. +#define MALLOC_ALIGNMENT 8 -// How big a minimal region is. -static const size_t MIN_REGION_SIZE = METADATA_SIZE + ALLOC_UNIT; +// Configuration: If EMMALLOC_USE_64BIT_OPS is specified, emmalloc uses 64 buckets for free memory regions instead of just 32. +// When building to target asm.js/wasm2js, 64-bit ops are disabled, but in Wasm builds, 64-bit ops are enabled. (this is +// configured from command line in system_libs.py build) +// #define EMMALLOC_USE_64BIT_OPS -static_assert(ALLOC_UNIT == ALIGNMENT, "expected size of allocation unit"); -static_assert(METADATA_SIZE == ALIGNMENT, "expected size of metadata"); +#define EMMALLOC_EXPORT __attribute__((weak, __visibility__("default"))) -// Constant utilities +#define MIN(x, y) ((x) < (y) ? (x) : (y)) +#define MAX(x, y) ((x) > (y) ? (x) : (y)) -// Align a pointer, increasing it upwards as necessary -static size_t alignUp(size_t ptr) { return (ptr + ALIGNMENT - 1) & -ALIGNMENT; } +#ifdef EMMALLOC_USE_64BIT_OPS +#define NUM_FREE_BUCKETS 64 +#define BUCKET_BITMASK_T uint64_t +#define CountLeadingZeroesInBitmask __builtin_clzll +#define CountTrailingZeroesInBitmask __builtin_ctzll +#else +#define NUM_FREE_BUCKETS 32 +#define BUCKET_BITMASK_T uint32_t +#define CountLeadingZeroesInBitmask __builtin_clz +#define CountTrailingZeroesInBitmask __builtin_ctz +#endif -static void* alignUpPointer(void* ptr) { return (void*)alignUp(size_t(ptr)); } +// Dynamic memory is subdivided into regions, in the format -// -// Data structures -// +// ..... | ..... | ..... | ..... -struct Region; +// That is, at the bottom and top end of each memory region, the size of that region is stored. That allows traversing the +// memory regions backwards and forwards. Free regions are distinguished by used regions by having all bits inverted in the size +// field at the end of the region. Hence if the size values at the beginning and at the end of the region are the same, then the +// region is in use, otherwise it is a free region. -// Information memory that is a free list, i.e., may -// be reused. -// Note how this can fit instead of the payload (as -// the payload is a multiple of MIN_ALLOC). -struct FreeInfo { - // free lists are doubly-linked lists - FreeInfo* _prev; - FreeInfo* _next; +// A free region has the following structure: +// ... - FreeInfo*& prev() { return _prev; } - FreeInfo*& next() { return _next; } +struct Region +{ + uint32_t size; + // Use a circular doubly linked list to represent free region data. + Region *prev, *next; + // ... N bytes of free data + uint32_t _at_the_end_of_this_struct_size; // do not dereference, this is present for convenient struct sizeof() computation only }; -static_assert(sizeof(FreeInfo) == ALLOC_UNIT, "expected size of free info"); - -// The first region of memory. -static Region* firstRegion = nullptr; - -// The last region of memory. It's important to know the end -// since we may append to it. -static Region* lastRegion = nullptr; - -// A contiguous region of memory. Metadata at the beginning describes it, -// after which is the "payload", the sections that user code calling -// malloc can use. -struct Region { - // Whether this region is in use or not. - size_t _used : 1; - - // The total size of the section of memory this is associated - // with and contained in. - // That includes the metadata itself and the payload memory after, - // which includes the used and unused portions of it. - // FIXME: Shift by 1, as our size is even anyhow? - // Or, disallow allocation of half the total space or above. - // Browsers barely allow allocating 2^31 anyhow, so inside that - // space we can just allocate something smaller than it. - size_t _totalSize : 31; - - // Each memory area knows its previous neighbor, as we hope to merge them. - // To compute the next neighbor we can use the total size, and to know - // if a neighbor exists we can compare the region to lastRegion - Region* _prev; - - // Up to here was the fixed metadata, of size 16. The rest is either - // the payload, or freelist info. - union { - FreeInfo _freeInfo; - char _payload[]; - }; - - size_t getTotalSize() { return _totalSize; } - void setTotalSize(size_t x) { _totalSize = x; } - void incTotalSize(size_t x) { _totalSize += x; } - void decTotalSize(size_t x) { _totalSize -= x; } - - size_t getUsed() { return _used; } - void setUsed(size_t x) { _used = x; } - - Region*& prev() { return _prev; } - // The next region is not, as we compute it on the fly - Region* next() { - if (this != lastRegion) { - return (Region*)((char*)this + getTotalSize()); - } else { - return nullptr; - } - } - FreeInfo& freeInfo() { return _freeInfo; } - // The payload is special, we just return its address, as we - // never want to modify it ourselves. - char* payload() { return &_payload[0]; } -}; +#if defined(__EMSCRIPTEN_PTHREADS__) +// In multithreaded builds, use a simple global spinlock strategy to acquire/release access to the memory allocator. +static volatile uint8_t multithreadingLock = 0; +#define MALLOC_ACQUIRE() while(__sync_lock_test_and_set(&multithreadingLock, 1)) { while(multithreadingLock) { /*nop*/ } } +#define MALLOC_RELEASE() __sync_lock_release(&multithreadingLock) +// Test code to ensure we have tight malloc acquire/release guards in place. +#define ASSERT_MALLOC_IS_ACQUIRED() assert(multithreadingLock == 1) +#else +// In singlethreaded builds, no need for locking. +#define MALLOC_ACQUIRE() ((void)0) +#define MALLOC_RELEASE() ((void)0) +#define ASSERT_MALLOC_IS_ACQUIRED() ((void)0) +#endif + +#define IS_POWER_OF_2(val) (((val) & ((val)-1)) == 0) +#define ALIGN_UP(ptr, alignment) ((uint8_t*)((((uintptr_t)(ptr)) + ((alignment)-1)) & ~((alignment)-1))) +#define HAS_ALIGNMENT(ptr, alignment) ((((uintptr_t)(ptr)) & ((alignment)-1)) == 0) + +static_assert(IS_POWER_OF_2(MALLOC_ALIGNMENT), "MALLOC_ALIGNMENT must be a power of two value!"); +static_assert(MALLOC_ALIGNMENT >= 4, "Smallest possible MALLOC_ALIGNMENT if 4!"); + +// A region that contains as payload a single forward linked list of pointers to head regions of each disjoint region blocks. +static Region *listOfAllRegions = 0; + +// For each of the buckets, maintain a linked list head node. The head node for each +// free region is a sentinel node that does not actually represent any free space, but +// the sentinel is used to avoid awkward testing against (if node == freeRegionHeadNode) +// when adding and removing elements from the linked list, i.e. we are guaranteed that +// the sentinel node is always fixed and there, and the actual free region list elements +// start at freeRegionBuckets[i].next each. +static Region freeRegionBuckets[NUM_FREE_BUCKETS]; + +// A bitmask that tracks the population status for each of the 32 distinct memory regions: +// a zero at bit position i means that the free list bucket i is empty. This bitmask is +// used to avoid redundant scanning of the 32 different free region buckets: instead by +// looking at the bitmask we can find in constant time an index to a free region bucket +// that contains free memory of desired size. +static BUCKET_BITMASK_T freeRegionBucketsUsed = 0; + +// Amount of bytes taken up by allocation header data +#define REGION_HEADER_SIZE (2*sizeof(uint32_t)) + +// Smallest allocation size that is possible is 2*pointer size, since payload of each region must at least contain space +// to store the free region linked list prev and next pointers. An allocation size smaller than this will be rounded up +// to this size. +#define SMALLEST_ALLOCATION_SIZE (2*sizeof(void*)) + +/* Subdivide regions of free space into distinct circular doubly linked lists, where each linked list +represents a range of free space blocks. The following function compute_free_list_bucket() converts +an allocation size to the bucket index that should be looked at. + +When using 32 buckets, this function produces a subdivision/grouping as follows: + Bucket 0: [8-15], range size=8 + Bucket 1: [16-23], range size=8 + Bucket 2: [24-31], range size=8 + Bucket 3: [32-39], range size=8 + Bucket 4: [40-47], range size=8 + Bucket 5: [48-63], range size=16 + Bucket 6: [64-127], range size=64 + Bucket 7: [128-255], range size=128 + Bucket 8: [256-511], range size=256 + Bucket 9: [512-1023], range size=512 + Bucket 10: [1024-2047], range size=1024 + Bucket 11: [2048-3071], range size=1024 + Bucket 12: [3072-4095], range size=1024 + Bucket 13: [4096-6143], range size=2048 + Bucket 14: [6144-8191], range size=2048 + Bucket 15: [8192-12287], range size=4096 + Bucket 16: [12288-16383], range size=4096 + Bucket 17: [16384-24575], range size=8192 + Bucket 18: [24576-32767], range size=8192 + Bucket 19: [32768-49151], range size=16384 + Bucket 20: [49152-65535], range size=16384 + Bucket 21: [65536-98303], range size=32768 + Bucket 22: [98304-131071], range size=32768 + Bucket 23: [131072-196607], range size=65536 + Bucket 24: [196608-262143], range size=65536 + Bucket 25: [262144-393215], range size=131072 + Bucket 26: [393216-524287], range size=131072 + Bucket 27: [524288-786431], range size=262144 + Bucket 28: [786432-1048575], range size=262144 + Bucket 29: [1048576-1572863], range size=524288 + Bucket 30: [1572864-2097151], range size=524288 + Bucket 31: 2097152 bytes and larger. + +When using 64 buckets, this function produces a grouping as follows: + Bucket 0: [8, 15], range size=8 + Bucket 1: [16, 23], range size=8 + Bucket 2: [24, 31], range size=8 + Bucket 3: [32, 39], range size=8 + Bucket 4: [40, 47], range size=8 + Bucket 5: [48, 55], range size=8 + Bucket 6: [56, 63], range size=8 + Bucket 7: [64, 71], range size=8 + Bucket 8: [72, 79], range size=8 + Bucket 9: [80, 87], range size=8 + Bucket 10: [88, 95], range size=8 + Bucket 11: [96, 103], range size=8 + Bucket 12: [104, 111], range size=8 + Bucket 13: [112, 119], range size=8 + Bucket 14: [120, 159], range size=40 + Bucket 15: [160, 191], range size=32 + Bucket 16: [192, 223], range size=32 + Bucket 17: [224, 255], range size=32 + Bucket 18: [256, 319], range size=64 + Bucket 19: [320, 383], range size=64 + Bucket 20: [384, 447], range size=64 + Bucket 21: [448, 511], range size=64 + Bucket 22: [512, 639], range size=128 + Bucket 23: [640, 767], range size=128 + Bucket 24: [768, 895], range size=128 + Bucket 25: [896, 1023], range size=128 + Bucket 26: [1024, 1279], range size=256 + Bucket 27: [1280, 1535], range size=256 + Bucket 28: [1536, 1791], range size=256 + Bucket 29: [1792, 2047], range size=256 + Bucket 30: [2048, 2559], range size=512 + Bucket 31: [2560, 3071], range size=512 + Bucket 32: [3072, 3583], range size=512 + Bucket 33: [3584, 6143], range size=2560 + Bucket 34: [6144, 8191], range size=2048 + Bucket 35: [8192, 12287], range size=4096 + Bucket 36: [12288, 16383], range size=4096 + Bucket 37: [16384, 24575], range size=8192 + Bucket 38: [24576, 32767], range size=8192 + Bucket 39: [32768, 49151], range size=16384 + Bucket 40: [49152, 65535], range size=16384 + Bucket 41: [65536, 98303], range size=32768 + Bucket 42: [98304, 131071], range size=32768 + Bucket 43: [131072, 196607], range size=65536 + Bucket 44: [196608, 262143], range size=65536 + Bucket 45: [262144, 393215], range size=131072 + Bucket 46: [393216, 524287], range size=131072 + Bucket 47: [524288, 786431], range size=262144 + Bucket 48: [786432, 1048575], range size=262144 + Bucket 49: [1048576, 1572863], range size=524288 + Bucket 50: [1572864, 2097151], range size=524288 + Bucket 51: [2097152, 3145727], range size=1048576 + Bucket 52: [3145728, 4194303], range size=1048576 + Bucket 53: [4194304, 6291455], range size=2097152 + Bucket 54: [6291456, 8388607], range size=2097152 + Bucket 55: [8388608, 12582911], range size=4194304 + Bucket 56: [12582912, 16777215], range size=4194304 + Bucket 57: [16777216, 25165823], range size=8388608 + Bucket 58: [25165824, 33554431], range size=8388608 + Bucket 59: [33554432, 50331647], range size=16777216 + Bucket 60: [50331648, 67108863], range size=16777216 + Bucket 61: [67108864, 100663295], range size=33554432 + Bucket 62: [100663296, 134217727], range size=33554432 + Bucket 63: 134217728 bytes and larger. */ +static int compute_free_list_bucket(uint32_t allocSize) +{ +#if NUM_FREE_BUCKETS == 32 + if (allocSize < 48) return (allocSize >> 3) - 1; + int clz = __builtin_clz(allocSize); + int bucketIndex = (clz > 20) ? 31 - clz : MIN(51 - (clz<<1) + ((allocSize >> (30-clz)) ^ 2), NUM_FREE_BUCKETS-1); +#elif NUM_FREE_BUCKETS == 64 + if (allocSize < 128) return (allocSize >> 3) - 1; + int clz = __builtin_clz(allocSize); + int bucketIndex = (clz > 19) ? 110 - (clz<<2) + ((allocSize >> (29-clz)) ^ 4) : MIN(71 - (clz<<1) + ((allocSize >> (30-clz)) ^ 2), NUM_FREE_BUCKETS-1); +#else +#error Invalid size chosen for NUM_FREE_BUCKETS +#endif + assert(bucketIndex >= 0); + assert(bucketIndex < NUM_FREE_BUCKETS); + return bucketIndex; +} -// Region utilities +#define DECODE_CEILING_SIZE(size) (uint32_t)(((size) ^ (((int32_t)(size)) >> 31))) -static void* getPayload(Region* region) { - assert(((char*)®ion->freeInfo()) - ((char*)region) == METADATA_SIZE); - assert(region->getUsed()); - return region->payload(); +static Region *prev_region(Region *region) +{ + uint32_t prevRegionSize = ((uint32_t*)region)[-1]; + prevRegionSize = DECODE_CEILING_SIZE(prevRegionSize); + return (Region*)((uint8_t*)region - prevRegionSize); } -static Region* fromPayload(void* payload) { return (Region*)((char*)payload - METADATA_SIZE); } +static Region *next_region(Region *region) +{ + return (Region*)((uint8_t*)region + region->size); +} -static Region* fromFreeInfo(FreeInfo* freeInfo) { - return (Region*)((char*)freeInfo - METADATA_SIZE); +static uint32_t region_ceiling_size(Region *region) +{ + return ((uint32_t*)((uint8_t*)region + region->size))[-1]; } -static size_t getMaxPayload(Region* region) { return region->getTotalSize() - METADATA_SIZE; } +static bool region_is_free(Region *r) +{ + return region_ceiling_size(r) >> 31; +} -// TODO: move into class, make more similar to next() -static void* getAfter(Region* region) { return ((char*)region) + region->getTotalSize(); } +static bool region_is_in_use(Region *r) +{ + return r->size == region_ceiling_size(r); +} -// Globals +static uint32_t size_of_region_from_ceiling(Region *r) +{ + uint32_t size = region_ceiling_size(r); + return DECODE_CEILING_SIZE(size); +} -// TODO: For now we have a single global space for all allocations, -// but for multithreading etc. we may want to generalize that. +static bool debug_region_is_consistent(Region *r) +{ + assert(r); + uint32_t sizeAtBottom = r->size; + uint32_t sizeAtCeiling = size_of_region_from_ceiling(r); + return sizeAtBottom == sizeAtCeiling; +} -// A freelist (a list of Regions ready for re-use) for all -// power of 2 payload sizes (only the ones from ALIGNMENT -// size and above are relevant, though). The freelist at index -// K contains regions of memory big enough to contain at least -// 2^K bytes. -// -// Note that there is no freelist for 2^32, as that amount can -// never be allocated. +static uint8_t *region_payload_start_ptr(Region *region) +{ + return (uint8_t*)region + sizeof(uint32_t); +} -static const size_t MIN_FREELIST_INDEX = 3; // 8 == ALLOC_UNIT -static const size_t MAX_FREELIST_INDEX = 32; // uint32_t +static uint8_t *region_payload_end_ptr(Region *region) +{ + return (uint8_t*)region + region->size - sizeof(uint32_t); +} -static FreeInfo* freeLists[MAX_FREELIST_INDEX] = {nullptr, nullptr, nullptr, nullptr, nullptr, - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, - nullptr, nullptr, nullptr, nullptr, nullptr}; +static void create_used_region(void *ptr, uint32_t size) +{ + assert(ptr); + assert(HAS_ALIGNMENT(ptr, sizeof(uint32_t))); + assert(HAS_ALIGNMENT(size, sizeof(uint32_t))); + assert(size >= sizeof(Region)); + *(uint32_t*)ptr = size; + ((uint32_t*)ptr)[(size>>2)-1] = size; +} -// Global utilities +static void create_free_region(void *ptr, uint32_t size) +{ + assert(ptr); + assert(HAS_ALIGNMENT(ptr, sizeof(uint32_t))); + assert(HAS_ALIGNMENT(size, sizeof(uint32_t))); + assert(size >= sizeof(Region)); + Region *freeRegion = (Region*)ptr; + freeRegion->size = size; + ((uint32_t*)ptr)[(size>>2)-1] = ~size; +} -// The freelist index is where we would appear in a freelist if -// we were one. It is a list of items of size at least the power -// of 2 that lower bounds us. -static size_t getFreeListIndex(size_t size) { - assert(1 << MIN_FREELIST_INDEX == ALLOC_UNIT); - assert(size > 0); - if (size < ALLOC_UNIT) - size = ALLOC_UNIT; - // We need a lower bound here, as the list contains things - // that can contain at least a power of 2. - size_t index = lowerBoundPowerOf2(size); - assert(MIN_FREELIST_INDEX <= index && index < MAX_FREELIST_INDEX); -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.getFreeListIndex " + [ $0, $1 ])}, size, index); -#endif - return index; +static void resize_region(void *ptr, uint32_t size) +{ + assert(ptr); + assert(HAS_ALIGNMENT(ptr, sizeof(uint32_t))); + assert(HAS_ALIGNMENT(size, sizeof(uint32_t))); + assert(size >= sizeof(Region)); + *(uint32_t*)ptr = size; + uint32_t *sizeAtEnd = (uint32_t*)ptr + (size>>2) - 1; + uint32_t usedMask = (*(int32_t*)sizeAtEnd) >> 31; + *sizeAtEnd = size ^ usedMask; } -// The big-enough freelist index is the index of the freelist of -// items that are all big enough for us. This is computed using -// an upper bound power of 2. -static size_t getBigEnoughFreeListIndex(size_t size) { - assert(size > 0); - size_t index = getFreeListIndex(size); - // If we're a power of 2, the lower and upper bounds are the - // same. Otherwise, add one. - if (!isPowerOf2(size)) - index++; -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.getBigEnoughFreeListIndex " + [ $0, $1 ])}, size, index); -#endif - return index; +static void prepend_to_free_list(Region *region, Region *prependTo) +{ + assert(region); + assert(prependTo); + // N.b. the region we are prepending to is always the sentinel node, + // which represents a dummy node that is technically not a free node, so + // region_is_free(prependTo) does not hold. + assert(region_is_free((Region*)region)); + region->next = prependTo; + region->prev = prependTo->prev; + assert(region->prev); + prependTo->prev = region; + region->prev->next = region; } -// Items in the freelist at this index must be at least this large. -static size_t getMinSizeForFreeListIndex(size_t index) { return 1 << index; } +static void unlink_from_free_list(Region *region) +{ + assert(region); + assert(region_is_free((Region*)region)); + assert(region->prev); + assert(region->next); + region->prev->next = region->next; + region->next->prev = region->prev; +} -// Items in the freelist at this index must be smaller than this. -static size_t getMaxSizeForFreeListIndex(size_t index) { return 1 << (index + 1); } +static void link_to_free_list(Region *freeRegion) +{ + assert(freeRegion); + assert(freeRegion->size >= sizeof(Region)); + int bucketIndex = compute_free_list_bucket(freeRegion->size-REGION_HEADER_SIZE); + Region *freeListHead = freeRegionBuckets + bucketIndex; + freeRegion->prev = freeListHead; + freeRegion->next = freeListHead->next; + assert(freeRegion->next); + freeListHead->next = freeRegion; + freeRegion->next->prev = freeRegion; + freeRegionBucketsUsed |= ((BUCKET_BITMASK_T)1) << bucketIndex; +} -static void removeFromFreeList(Region* region) { -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.removeFromFreeList " + $0)}, region); -#endif - size_t index = getFreeListIndex(getMaxPayload(region)); - FreeInfo* freeInfo = ®ion->freeInfo(); - if (freeLists[index] == freeInfo) { - freeLists[index] = freeInfo->next(); - } - if (freeInfo->prev()) { - freeInfo->prev()->next() = freeInfo->next(); +static void dump_memory_regions() +{ + ASSERT_MALLOC_IS_ACQUIRED(); + Region *root = listOfAllRegions; + MAIN_THREAD_ASYNC_EM_ASM(console.log('All memory regions:')); + while(root) + { + Region *r = root; + assert(debug_region_is_consistent(r)); + uint8_t *lastRegionEnd = (uint8_t*)(((uint32_t*)root)[2]); + MAIN_THREAD_ASYNC_EM_ASM(console.log('Region block '+$0.toString(16)+'-'+$1.toString(16)+ ' ('+$2+' bytes):'), + r, lastRegionEnd, lastRegionEnd-(uint8_t*)r); + while((uint8_t*)r < lastRegionEnd) + { + MAIN_THREAD_ASYNC_EM_ASM(console.log('Region '+$0.toString(16)+', size: '+$1+' ('+($2?"used":"--FREE--")+')'), + r, r->size, region_ceiling_size(r) == r->size); + + assert(debug_region_is_consistent(r)); + uint32_t sizeFromCeiling = size_of_region_from_ceiling(r); + if (sizeFromCeiling != r->size) + MAIN_THREAD_ASYNC_EM_ASM(console.log('Corrupt region! Size marker at the end of the region does not match: '+$0), sizeFromCeiling); + if (r->size == 0) + break; + r = next_region(r); + } + root = ((Region*)((uint32_t*)root)[1]); + MAIN_THREAD_ASYNC_EM_ASM(console.log("")); } - if (freeInfo->next()) { - freeInfo->next()->prev() = freeInfo->prev(); + MAIN_THREAD_ASYNC_EM_ASM(console.log('Free regions:')); + for(int i = 0; i < NUM_FREE_BUCKETS; ++i) + { + Region *prev = &freeRegionBuckets[i]; + Region *fr = freeRegionBuckets[i].next; + while(fr != &freeRegionBuckets[i]) + { + MAIN_THREAD_ASYNC_EM_ASM(console.log('In bucket '+$0+', free region '+$1.toString(16)+', size: ' + $2 + ' (size at ceiling: '+$3+'), prev: ' + $4.toString(16) + ', next: ' + $5.toString(16)), + i, fr, fr->size, size_of_region_from_ceiling(fr), fr->prev, fr->next); + assert(debug_region_is_consistent(fr)); + assert(region_is_free(fr)); + assert(fr->prev == prev); + prev = fr; + assert(fr->next != fr); + assert(fr->prev != fr); + fr = fr->next; + } } +#if NUM_FREE_BUCKETS == 64 + MAIN_THREAD_ASYNC_EM_ASM(console.log('Free bucket index map: ' + ($0>>>0).toString(2) + ' ' + ($1>>>0).toString(2)), (uint32_t)(freeRegionBucketsUsed >> 32), (uint32_t)freeRegionBucketsUsed); +#else + MAIN_THREAD_ASYNC_EM_ASM(console.log('Free bucket index map: ' + ($0>>>0).toString(2)), freeRegionBucketsUsed); +#endif + MAIN_THREAD_ASYNC_EM_ASM(console.log("")); } -static void addToFreeList(Region* region) { -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.addToFreeList " + $0)}, region); -#endif - assert(getAfter(region) <= sbrk(0)); - size_t index = getFreeListIndex(getMaxPayload(region)); - FreeInfo* freeInfo = ®ion->freeInfo(); - FreeInfo* last = freeLists[index]; - freeLists[index] = freeInfo; - freeInfo->prev() = nullptr; - freeInfo->next() = last; - if (last) { - last->prev() = freeInfo; - } +void emmalloc_dump_memory_regions() +{ + MALLOC_ACQUIRE(); + dump_memory_regions(); + MALLOC_RELEASE(); } -// Receives a region that has just become free (and is not yet in a freelist). -// Tries to merge it into a region before or after it to which it is adjacent. -static int mergeIntoExistingFreeRegion(Region* region) { -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.mergeIntoExistingFreeRegion " + $0)}, region); -#endif - assert(getAfter(region) <= sbrk(0)); - int merged = 0; - Region* prev = region->prev(); - Region* next = region->next(); - if (prev && !prev->getUsed()) { - // Merge them. -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.mergeIntoExistingFreeRegion merge into prev " + $0)}, prev); -#endif - removeFromFreeList(prev); - prev->incTotalSize(region->getTotalSize()); - if (next) { - next->prev() = prev; // was: region - } else { - assert(region == lastRegion); - lastRegion = prev; +static int validate_memory_regions() +{ + ASSERT_MALLOC_IS_ACQUIRED(); + Region *root = listOfAllRegions; + while(root) + { + Region *r = root; + if (!debug_region_is_consistent(r)) + { + MAIN_THREAD_ASYNC_EM_ASM(console.error('Used region '+$0.toString(16)+', size: '+$1+' ('+($2?"used":"--FREE--")+') is corrupt (size markers in the beginning and at the end of the region do not match!)'), + r, r->size, region_ceiling_size(r) == r->size); + return 1; } - if (next) { - // We may also be able to merge with the next, keep trying. - if (!next->getUsed()) { -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.mergeIntoExistingFreeRegion also merge into next " + $0)}, next); -#endif - removeFromFreeList(next); - prev->incTotalSize(next->getTotalSize()); - if (next != lastRegion) { - next->next()->prev() = prev; - } else { - lastRegion = prev; - } + uint8_t *lastRegionEnd = (uint8_t*)(((uint32_t*)root)[2]); + while((uint8_t*)r < lastRegionEnd) + { + if (!debug_region_is_consistent(r)) + { + MAIN_THREAD_ASYNC_EM_ASM(console.error('Used region '+$0.toString(16)+', size: '+$1+' ('+($2?"used":"--FREE--")+') is corrupt (size markers in the beginning and at the end of the region do not match!)'), + r, r->size, region_ceiling_size(r) == r->size); + return 1; } + if (r->size == 0) + break; + r = next_region(r); } - addToFreeList(prev); - return 1; + root = ((Region*)((uint32_t*)root)[1]); } - if (next && !next->getUsed()) { -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.mergeIntoExistingFreeRegion merge into next " + $0)}, next); -#endif - // Merge them. - removeFromFreeList(next); - region->incTotalSize(next->getTotalSize()); - if (next != lastRegion) { - next->next()->prev() = region; - } else { - lastRegion = region; + for(int i = 0; i < NUM_FREE_BUCKETS; ++i) + { + Region *prev = &freeRegionBuckets[i]; + Region *fr = freeRegionBuckets[i].next; + while(fr != &freeRegionBuckets[i]) + { + if (!debug_region_is_consistent(fr) || !region_is_free(fr) || fr->prev != prev || fr->next == fr || fr->prev == fr) + { + MAIN_THREAD_ASYNC_EM_ASM(console.log('In bucket '+$0+', free region '+$1.toString(16)+', size: ' + $2 + ' (size at ceiling: '+$3+'), prev: ' + $4.toString(16) + ', next: ' + $5.toString(16) + ' is corrupt!'), + i, fr, fr->size, size_of_region_from_ceiling((Region*)fr), fr->prev, fr->next); + return 1; + } + prev = fr; + fr = fr->next; } - addToFreeList(region); - return 1; } return 0; } -static void stopUsing(Region* region) { - region->setUsed(0); - if (!mergeIntoExistingFreeRegion(region)) { - addToFreeList(region); - } +int emmalloc_validate_memory_regions() +{ + MALLOC_ACQUIRE(); + int memoryError = validate_memory_regions(); + MALLOC_RELEASE(); + return memoryError; } -// Grow a region. If not in use, we may need to be in another -// freelist. -// TODO: We can calculate that, to save some work. -static void growRegion(Region* region, size_t sizeDelta) { +static bool claim_more_memory(size_t numBytes) +{ #ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.growRegion " + [ $0, $1 ])}, region, sizeDelta); + MAIN_THREAD_ASYNC_EM_ASM(console.log('claim_more_memory(numBytes='+$0+ ')'), numBytes); #endif - if (!region->getUsed()) { - removeFromFreeList(region); - } - region->incTotalSize(sizeDelta); - if (!region->getUsed()) { - addToFreeList(region); - } -} -// Extends the last region to a certain payload size. Returns 1 if successful, -// 0 if an error occurred in sbrk(). -static int extendLastRegion(size_t size) { -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.extendLastRegionToSize " + $0)}, size); -#endif - size_t reusable = getMaxPayload(lastRegion); - size_t sbrkSize = alignUp(size) - reusable; - void* ptr = sbrk(sbrkSize); - if (ptr == (void*)-1) { - // sbrk() failed, we failed. -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.extendLastRegion sbrk failure")}); +#ifdef EMMALLOC_DEBUG + validate_memory_regions(); #endif - return 0; - } - // sbrk() should give us new space right after the last region. - assert(ptr == getAfter(lastRegion)); - // Increment the region's size. - growRegion(lastRegion, sbrkSize); - return 1; -} -static void possiblySplitRemainder(Region* region, size_t size) { -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.possiblySplitRemainder " + [ $0, $1 ])}, region, size); -#endif - size_t payloadSize = getMaxPayload(region); - assert(payloadSize >= size); - size_t extra = payloadSize - size; - // Room for a minimal region is definitely worth splitting. Otherwise, - // if we don't have room for a full region, but we do have an allocation - // unit's worth, and we are the last region, it's worth allocating some - // more memory to create a region here. The next allocation can reuse it, - // which is better than leaving it as unused and unreusable space at the - // end of this region. - if (region == lastRegion && extra >= ALLOC_UNIT && extra < MIN_REGION_SIZE) { - // Yes, this is a small-but-useful amount of memory in the final region, - // extend it. + // Claim memory via sbrk + uint8_t *startPtr = (uint8_t*)sbrk(numBytes); + if ((intptr_t)startPtr <= 0) + { #ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.possiblySplitRemainder pre-extending")}); + MAIN_THREAD_ASYNC_EM_ASM(console.error('claim_more_memory - sbrk failed!')); #endif - if (extendLastRegion(payloadSize + ALLOC_UNIT)) { - // Success. - extra += ALLOC_UNIT; - assert(extra >= MIN_REGION_SIZE); - } else { - return; - } + return false; } - if (extra >= MIN_REGION_SIZE) { #ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.possiblySplitRemainder is splitting")}); -#endif - // Worth it, split the region - // TODO: Consider not doing it, may affect long-term fragmentation. - void* after = getAfter(region); - Region* split = (Region*)alignUpPointer((char*)getPayload(region) + size); - region->setTotalSize((char*)split - (char*)region); - size_t totalSplitSize = (char*)after - (char*)split; - assert(totalSplitSize >= MIN_REGION_SIZE); - split->setTotalSize(totalSplitSize); - split->prev() = region; - if (region != lastRegion) { - split->next()->prev() = split; - } else { - lastRegion = split; + MAIN_THREAD_ASYNC_EM_ASM(console.log('claim_more_memory - claimed ' + $0.toString(16) + '-' + $1.toString(16) + ' (' + $2 + ' bytes) via sbrk()'), startPtr, startPtr + numBytes, numBytes); +#endif + assert(HAS_ALIGNMENT(startPtr, 4)); + uint8_t *endPtr = startPtr + numBytes; + + // Create a sentinel region at the end of the new heap block + Region *endSentinelRegion = (Region*)(endPtr - sizeof(Region)); + create_used_region(endSentinelRegion, sizeof(Region)); + + // If we are the sole user of sbrk(), it will feed us continuous/consecutive memory addresses - take advantage + // of that if so: instead of creating two disjoint memory regions blocks, expand the previous one to a larger size. + uint8_t *previousSbrkEndAddress = listOfAllRegions ? (uint8_t*)((uint32_t*)listOfAllRegions)[2] : 0; + if (startPtr == previousSbrkEndAddress) + { + Region *prevEndSentinel = prev_region((Region*)startPtr); + assert(debug_region_is_consistent(prevEndSentinel)); + assert(region_is_in_use(prevEndSentinel)); + Region *prevRegion = prev_region(prevEndSentinel); + assert(debug_region_is_consistent(prevRegion)); + + ((uint32_t*)listOfAllRegions)[2] = (uint32_t)endPtr; + + // Two scenarios, either the last region of the previous block was in use, in which case we need to create + // a new free region in the newly allocated space; or it was free, in which case we can extend that region + // to cover a larger size. + if (region_is_free(prevRegion)) + { + size_t newFreeRegionSize = (uint8_t*)endSentinelRegion - (uint8_t*)prevRegion; + unlink_from_free_list(prevRegion); + create_free_region(prevRegion, newFreeRegionSize); + link_to_free_list(prevRegion); + return true; } - stopUsing(split); + // else: last region of the previous block was in use. Since we are joining two consecutive sbrk() blocks, + // we can swallow the end sentinel of the previous block away. + startPtr -= sizeof(Region); + } + else + { + // Create a sentinel region at the start of the heap block + create_used_region(startPtr, sizeof(Region)); + + // Dynamic heap start region: + Region *newRegionBlock = (Region*)startPtr; + ((uint32_t*)newRegionBlock)[1] = (uint32_t)listOfAllRegions; // Pointer to next region block head + ((uint32_t*)newRegionBlock)[2] = (uint32_t)endPtr; // Pointer to the end address of this region block + listOfAllRegions = newRegionBlock; + startPtr += sizeof(Region); } + + // Create a new memory region for the new claimed free space. + create_free_region(startPtr, (uint8_t*)endSentinelRegion - startPtr); + link_to_free_list((Region*)startPtr); + return true; } -// Sets the used payload of a region, and does other necessary work when -// starting to use a region, such as splitting off a remainder if there is -// any. -static void useRegion(Region* region, size_t size) { -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.useRegion " + [ $0, $1 ])}, region, size); +// Initialize malloc during static initialization with highest constructor priority, +// so that it initializes before any other static initializers in compilation units. +static void EMSCRIPTEN_KEEPALIVE __attribute__((constructor(0))) initialize_malloc_heap() +{ +#if __EMSCRIPTEN_PTHREADS__ + // This function should be called on the main thread before any pthreads have been + // established to initialize the malloc subsystem. (so no lock acquire needed) + assert(emscripten_is_main_runtime_thread()); #endif - assert(size > 0); - region->setUsed(1); - // We may not be using all of it, split out a smaller - // region into a free list if it's large enough. - possiblySplitRemainder(region, size); -} -static Region* useFreeInfo(FreeInfo* freeInfo, size_t size) { - Region* region = fromFreeInfo(freeInfo); + // Initialize circular doubly linked lists representing free space +#pragma clang loop unroll(disable) // Never useful to unroll this for loop, just takes up code size. + for(int i = 0; i < NUM_FREE_BUCKETS; ++i) + freeRegionBuckets[i].prev = freeRegionBuckets[i].next = &freeRegionBuckets[i]; + #ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.useFreeInfo " + [ $0, $1 ])}, region, size); + MAIN_THREAD_ASYNC_EM_ASM(console.log('initialize_malloc_heap()')); #endif - // This region is no longer free - removeFromFreeList(region); - // This region is now in use - useRegion(region, size); - return region; -} -// Debugging - -// Mostly for testing purposes, wipes everything. -EMMALLOC_EXPORT -void emmalloc_blank_slate_from_orbit() { - for (int i = 0; i < MAX_FREELIST_INDEX; i++) { - freeLists[i] = nullptr; - } - firstRegion = nullptr; - lastRegion = nullptr; + // Start with a tiny dynamic region. + claim_more_memory(3*sizeof(Region)); } -#ifdef EMMALLOC_DEBUG -// For testing purposes, validate a region. -static void emmalloc_validate_region(Region* region) { - assert(getAfter(region) <= sbrk(0)); - assert(getMaxPayload(region) < region->getTotalSize()); - if (region->prev()) { - assert(getAfter(region->prev()) == region); - assert(region->prev()->next() == region); - } - if (region->next()) { - assert(getAfter(region) == region->next()); - assert(region->next()->prev() == region); - } +void emmalloc_blank_slate_from_orbit() +{ + listOfAllRegions = 0; + freeRegionBucketsUsed = 0; + initialize_malloc_heap(); } -// For testing purposes, check that everything is valid. -static void emmalloc_validate_all() { - void* end = sbrk(0); - // Validate regions. - Region* curr = firstRegion; - Region* prev = nullptr; - EM_ASM({ Module.emmallocDebug = {regions : {}}; }); - while (curr) { - // Note all region, so we can see freelist items are in the main list. - EM_ASM( - { - var region = $0; - assert(!Module.emmallocDebug.regions[region], "dupe region"); - Module.emmallocDebug.regions[region] = 1; - }, - curr); - assert(curr->prev() == prev); - if (prev) { - assert(getAfter(prev) == curr); - // Adjacent free regions must be merged. - assert(!(!prev->getUsed() && !curr->getUsed())); - } - assert(getAfter(curr) <= end); - prev = curr; - curr = curr->next(); - } - if (prev) { - assert(prev == lastRegion); - } else { - assert(!lastRegion); - } - if (lastRegion) { - assert(getAfter(lastRegion) == end); +static void *attempt_allocate(Region *freeRegion, size_t alignment, size_t size) +{ + ASSERT_MALLOC_IS_ACQUIRED(); + assert(freeRegion); + // Look at the next potential free region to allocate into. + // First, we should check if the free region has enough of payload bytes contained + // in it to accommodate the new allocation. This check needs to take account the + // requested allocation alignment, so the payload memory area needs to be rounded + // upwards to the desired alignment. + uint8_t *payloadStartPtr = region_payload_start_ptr(freeRegion); + uint8_t *payloadStartPtrAligned = ALIGN_UP(payloadStartPtr, alignment); + uint8_t *payloadEndPtr = region_payload_end_ptr(freeRegion); + + // Do we have enough free space, taking into account alignment? + if (payloadStartPtrAligned + size > payloadEndPtr) + return 0; + + // We have enough free space, so the memory allocation will be made into this region. Remove this free region + // from the list of free regions: whatever slop remains will be later added back to the free region pool. + unlink_from_free_list(freeRegion); + + // Before we proceed further, fix up the boundary of this region and the region that precedes this one, + // so that the boundary between the two regions happens at a right spot for the payload to be aligned. + if (payloadStartPtr != payloadStartPtrAligned) + { + Region *prevRegion = prev_region((Region*)freeRegion); + size_t regionBoundaryBumpAmount = payloadStartPtrAligned - payloadStartPtr; + size_t newThisRegionSize = freeRegion->size - regionBoundaryBumpAmount; + resize_region(prevRegion, prevRegion->size + regionBoundaryBumpAmount); + freeRegion = (Region *)((uint8_t*)freeRegion + regionBoundaryBumpAmount); + freeRegion->size = newThisRegionSize; } - // Validate freelists. - for (int i = 0; i < MAX_FREELIST_INDEX; i++) { - FreeInfo* curr = freeLists[i]; - if (!curr) - continue; - FreeInfo* prev = nullptr; - while (curr) { - assert(curr->prev() == prev); - Region* region = fromFreeInfo(curr); - // Regions must be in the main list. - EM_ASM( - { - var region = $0; - assert(Module.emmallocDebug.regions[region], "free region not in list"); - }, - region); - assert(getAfter(region) <= end); - assert(!region->getUsed()); - assert(getMaxPayload(region) >= getMinSizeForFreeListIndex(i)); - assert(getMaxPayload(region) < getMaxSizeForFreeListIndex(i)); - prev = curr; - curr = curr->next(); - } + // Next, we need to decide whether this region is so large that it should be split into two regions, + // one representing the newly used memory area, and at the high end a remaining leftover free area. + // This splitting to two is done always if there is enough space for the high end to fit a region. + // Carve 'size' bytes of payload off this region. So, + // [sz prev next sz] + // becomes + // [sz payload sz] [sz prev next sz] + if (sizeof(Region) + REGION_HEADER_SIZE + size <= freeRegion->size) + { + // There is enough space to keep a free region at the end of the carved out block + // -> construct the new block + Region *newFreeRegion = (Region *)((uint8_t*)freeRegion + REGION_HEADER_SIZE + size); + create_free_region(newFreeRegion, freeRegion->size - size - REGION_HEADER_SIZE); + link_to_free_list(newFreeRegion); + + // Recreate the resized Region under its new size. + create_used_region(freeRegion, size + REGION_HEADER_SIZE); } - // Validate lastRegion. - if (lastRegion) { - assert(lastRegion->next() == nullptr); - assert(getAfter(lastRegion) <= end); - assert(firstRegion); - } else { - assert(!firstRegion); + else + { + // There is not enough space to split the free memory region into used+free parts, so consume the whole + // region as used memory, not leaving a free memory region behind. + // Initialize the free region as used by resetting the ceiling size to the same value as the size at bottom. + ((uint32_t*)((uint8_t*)freeRegion + freeRegion->size))[-1] = freeRegion->size; } -} + +#ifdef __EMSCRIPTEN_TRACING__ + emscripten_trace_record_allocation(freeRegion, freeRegion->size); +#endif #ifdef EMMALLOC_DEBUG_LOG -// For testing purposes, dump out a region. -static void emmalloc_dump_region(Region* region) { - EM_ASM({out(" [" + $0 + " - " + $1 + " (" + $2 + " bytes" + ($3 ? ", used" : "") + ")]")}, - region, getAfter(region), getMaxPayload(region), region->getUsed()); + MAIN_THREAD_ASYNC_EM_ASM(console.log('attempt_allocate - succeeded allocating memory, region ptr=' + $0.toString(16) + ', align=' + $1 + ', payload size=' + $2 + ' bytes)'), freeRegion, alignment, size); +#endif + + return (uint8_t*)freeRegion + sizeof(uint32_t); } -// For testing purposes, dumps out the entire global state. -static void emmalloc_dump_all() { - EM_ASM({out(" emmalloc_dump_all:\n sbrk(0) = " + $0)}, sbrk(0)); - Region* curr = firstRegion; - EM_ASM({out(" all regions:")}); - while (curr) { - emmalloc_dump_region(curr); - curr = curr->next(); - } - for (int i = 0; i < MAX_FREELIST_INDEX; i++) { - FreeInfo* curr = freeLists[i]; - if (!curr) - continue; - EM_ASM({out(" freeList[" + $0 + "] sizes: [" + $1 + ", " + $2 + ")")}, i, - getMinSizeForFreeListIndex(i), getMaxSizeForFreeListIndex(i)); - FreeInfo* prev = nullptr; - while (curr) { - Region* region = fromFreeInfo(curr); - emmalloc_dump_region(region); - prev = curr; - curr = curr->next(); - } - } +static size_t validate_alloc_alignment(size_t alignment) +{ + // Cannot perform allocations that are less than 4 byte aligned, because the Region + // control structures need to be aligned. Also round up to minimum outputted alignment. + alignment = MAX(alignment, MALLOC_ALIGNMENT); + // Arbitrary upper limit on alignment - very likely a programming bug if alignment is higher than this. + assert(alignment <= 1024*1024); + return alignment; } -#endif // EMMALLOC_DEBUG_LOG -#endif // EMMALLOC_DEBUG - -// When we free something of size 100, we put it in the -// freelist for items of size 64 and above. Then when something -// needs 64 bytes, we know the things in that list are all -// suitable. However, note that this means that if we then -// try to allocate something of size 100 once more, we will -// look in the freelist for items of size 128 or more (again, -// so we know all items in the list are big enough), which means -// we may not reuse the perfect region we just freed. It's hard -// to do a perfect job on that without a lot more work (memory -// and/or time), so instead, we use a simple heuristic to look -// at the one-lower freelist, which *may* contain something -// big enough for us. We look at just a few elements, but that is -// enough if we are alloating/freeing a lot of such elements -// (since the recent items are there). -// TODO: Consider more optimizations, e.g. slow bubbling of larger -// items in each freelist towards the root, or even actually -// keep it sorted by size. -// Consider also what happens to the very largest allocations, -// 2^32 - a little. That goes in the freelist of items of size -// 2^31 or less. >2 tries is enough to go through that entire -// freelist because even 2 can't exist, they'd exhaust memory -// (together with metadata overhead). So we should be able to -// free and allocate such largest allocations (barring fragmentation -// happening in between). -static const size_t SPECULATIVE_FREELIST_TRIES = 32; - -static Region* tryFromFreeList(size_t size) { + +static size_t validate_alloc_size(size_t size) +{ + // Allocation sizes must be a multiple of pointer sizes, and at least 2*sizeof(pointer). + return size > 2*sizeof(Region*) ? (size_t)ALIGN_UP(size, sizeof(Region*)) : 2*sizeof(Region*); +} + +static void *allocate_memory(size_t alignment, size_t size) +{ + ASSERT_MALLOC_IS_ACQUIRED(); + #ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.tryFromFreeList " + $0)}, size); + MAIN_THREAD_ASYNC_EM_ASM(console.log('allocate_memory(align=' + $0 + ', size=' + $1 + ' bytes)'), alignment, size); #endif - // Look in the freelist of items big enough for us. - size_t index = getBigEnoughFreeListIndex(size); - // If we *may* find an item in the index one - // below us, try that briefly in constant time; - // see comment on algorithm on the declaration of - // SPECULATIVE_FREELIST_TRIES. - if (index > MIN_FREELIST_INDEX && size < getMinSizeForFreeListIndex(index)) { - FreeInfo* freeInfo = freeLists[index - 1]; - size_t tries = 0; - while (freeInfo && tries < SPECULATIVE_FREELIST_TRIES) { - Region* region = fromFreeInfo(freeInfo); - if (getMaxPayload(region) >= size) { - // Success, use it -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.tryFromFreeList try succeeded")}); + +#ifdef EMMALLOC_DEBUG + validate_memory_regions(); #endif - return useFreeInfo(freeInfo, size); - } - freeInfo = freeInfo->next(); - tries++; + + if (!IS_POWER_OF_2(alignment)) + return 0; + + alignment = validate_alloc_alignment(alignment); + size = validate_alloc_size(size); + + // Attempt to allocate memory starting from smallest bucket that can contain the required amount of memory. + // Under normal alignment conditions this should always be the first or second bucket we look at, but if + // performing an allocation with complex alignment, we may need to look at multiple buckets. + int bucketIndex = compute_free_list_bucket(size); + BUCKET_BITMASK_T bucketMask = freeRegionBucketsUsed >> bucketIndex; + + // Loop through each bucket that has free regions in it, based on bits set in freeRegionBucketsUsed bitmap. + while(bucketMask) + { + BUCKET_BITMASK_T indexAdd = CountTrailingZeroesInBitmask(bucketMask); + bucketIndex += indexAdd; + bucketMask >>= indexAdd; + assert(bucketIndex >= 0); + assert(bucketIndex <= NUM_FREE_BUCKETS-1); + assert(freeRegionBucketsUsed & (((BUCKET_BITMASK_T)1) << bucketIndex)); + + Region *freeRegion = freeRegionBuckets[bucketIndex].next; + assert(freeRegion); + if (freeRegion != &freeRegionBuckets[bucketIndex]) + { + void *ptr = attempt_allocate(freeRegion, alignment, size); + if (ptr) + return ptr; + + // We were not able to allocate from the first region found in this bucket, so penalize + // the region by cycling it to the end of the doubly circular linked list. (constant time) + // This provides a randomized guarantee that when performing allocations of size k to a + // bucket of [k-something, k+something] range, we will not always attempt to satisfy the + // allocation from the same available region at the front of the list, but we try each + // region in turn. + unlink_from_free_list(freeRegion); + prepend_to_free_list(freeRegion, &freeRegionBuckets[bucketIndex]); + // But do not stick around to attempt to look at other regions in this bucket - move + // to search the next populated bucket index if this did not fit. This gives a practical + // "allocation in constant time" guarantee, since the next higher bucket will only have + // regions that are all of strictly larger size than the requested allocation. Only if + // there is a difficult alignment requirement we may fail to perform the allocation from + // a region in the next bucket, and if so, we keep trying higher buckets until one of them + // works. + ++bucketIndex; + bucketMask >>= 1; } - } - // Note that index may start out at MAX_FREELIST_INDEX, - // if it is almost the largest allocation possible, - // 2^32 minus a little. In that case, looking in the lower - // freelist is our only hope, and it can contain at most 1 - // element (see discussion above), so we will find it if - // it's there). If not, and we got here, we'll never enter - // the loop at all. - while (index < MAX_FREELIST_INDEX) { - FreeInfo* freeInfo = freeLists[index]; - if (freeInfo) { - // We found one, use it. -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.tryFromFreeList had item to use")}); -#endif - return useFreeInfo(freeInfo, size); + else + { + // This bucket was not populated after all with any regions, + // but we just had a stale bit set to mark a populated bucket. + // Reset the bit to update latest status so that we do not + // redundantly look at this bucket again. + freeRegionBucketsUsed &= ~(((BUCKET_BITMASK_T)1) << bucketIndex); + bucketMask ^= 1; } - // Look in a freelist of larger elements. - // TODO This does increase the risk of fragmentation, though, - // and maybe the iteration adds runtime overhead. - index++; - } - // No luck, no free list. -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.tryFromFreeList no luck")}); -#endif - return nullptr; -} + // Instead of recomputing bucketMask from scratch at the end of each loop, it is updated as we go, + // to avoid undefined behavior with (x >> 32)/(x >> 64) when bucketIndex reaches 32/64, (the shift would comes out as a no-op instead of 0). -// Allocate a completely new region. -static Region* allocateRegion(size_t size) { -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.allocateRegion")}); -#endif - size_t sbrkSize = METADATA_SIZE + alignUp(size); - void* ptr = sbrk(sbrkSize); - if (ptr == (void*)-1) { - // sbrk() failed, we failed. -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.allocateRegion sbrk failure")}); -#endif - return nullptr; + // Work around bug https://github.com/emscripten-core/emscripten/issues/10173 +// assert((bucketIndex == NUM_FREE_BUCKETS && bucketMask == 0) || (bucketMask == freeRegionBucketsUsed >> bucketIndex)); + assert((bucketIndex == NUM_FREE_BUCKETS && bucketMask == 0) || (bucketMask + EM_ASM_INT(return 0) == (freeRegionBucketsUsed >> bucketIndex) + EM_ASM_INT(return 0))); } - // sbrk() results might not be aligned. We assume single-threaded sbrk() - // access here in order to fix that up - void* fixedPtr = alignUpPointer(ptr); - if (ptr != fixedPtr) { -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.allocateRegion fixing alignment")}); -#endif - size_t extra = (char*)fixedPtr - (char*)ptr; - void* extraPtr = sbrk(extra); - if (extraPtr == (void*)-1) { - // sbrk() failed, we failed. -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.newAllocation sbrk failure")}); - ; -#endif - return nullptr; + + // None of the buckets were able to accommodate an allocation. If this happens we are almost out of memory. + // The largest bucket might contain some suitable regions, but we only looked at one region in that bucket, so + // as a last resort, loop through more free regions in the bucket that represents the largest allocations available. + // But only if the bucket representing largest allocations available is not any of the first ten buckets (thirty buckets + // in 64-bit buckets build), these represent allocatable areas less than <1024 bytes - which could be a lot of scrap. + // In such case, prefer to sbrk() in more memory right away. + int largestBucketIndex = NUM_FREE_BUCKETS - 1 - CountLeadingZeroesInBitmask(freeRegionBucketsUsed); + Region *freeRegion = freeRegionBuckets[largestBucketIndex].next; +#ifdef EMMALLOC_USE_64BIT_OPS + if (freeRegionBucketsUsed >> 30) +#else + if (freeRegionBucketsUsed >> 10) +#endif + { + // Look only at a constant number of regions in this bucket max, to avoid bad worst case behavior. + // If this many regions cannot find free space, we give up and prefer to sbrk() more instead. + const int maxRegionsToTryBeforeGivingUp = 99; + int numTriesLeft = maxRegionsToTryBeforeGivingUp; + while(freeRegion != &freeRegionBuckets[largestBucketIndex] && numTriesLeft-- > 0) + { + void *ptr = attempt_allocate(freeRegion, alignment, size); + if (ptr) + return ptr; + freeRegion = freeRegion->next; } - // Verify the sbrk() assumption, no one else should call it. - // If this fails, it means we also leak the previous allocation, - // so we don't even try to handle it. - assert((char*)extraPtr == (char*)ptr + sbrkSize); - // After the first allocation, everything must remain aligned forever. - assert(!lastRegion); - // We now have a contiguous block of memory from ptr to - // ptr + sbrkSize + fixedPtr - ptr = fixedPtr + sbrkSize. - // fixedPtr is aligned and starts a region of the right - // amount of memory. } - Region* region = (Region*)fixedPtr; - // Apply globally - if (!lastRegion) { - assert(!firstRegion); - firstRegion = region; - lastRegion = region; - } else { - assert(firstRegion); - region->prev() = lastRegion; - lastRegion = region; + + // We were unable to find a free memory region. Must sbrk() in more memory! + size_t numBytesToClaim = size+sizeof(Region)*3; + bool success = claim_more_memory(numBytesToClaim); + if (success) + return allocate_memory(alignment, size); // Recurse back to itself to try again + + // also sbrk() failed, we are really really constrained :( As a last resort, go back to looking at the + // bucket we already looked at above, continuing where the above search left off - perhaps there are + // regions we overlooked the first time that might be able to satisfy the allocation. + while(freeRegion != &freeRegionBuckets[largestBucketIndex]) + { + void *ptr = attempt_allocate(freeRegion, alignment, size); + if (ptr) + return ptr; + freeRegion = freeRegion->next; } - // Success, we have new memory -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.newAllocation success")}); - ; -#endif - region->setTotalSize(sbrkSize); - region->setUsed(1); - return region; + + return 0; } -// Allocate new memory. This may reuse part of the last region, only -// allocating what we need. -static Region* newAllocation(size_t size) { -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.newAllocation " + $0)}, size); -#endif - assert(size > 0); - if (lastRegion) { - // If the last region is free, we can extend it rather than leave it - // as fragmented free spce between allocated regions. This is also - // more efficient and simple as well. - if (!lastRegion->getUsed()) { -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.newAllocation extending lastRegion at " + $0)}, lastRegion); -#endif - // Remove it first, before we adjust the size (which affects which list - // it should be in). Also mark it as used so extending it doesn't do - // freelist computations; we'll undo that if we fail. - lastRegion->setUsed(1); - removeFromFreeList(lastRegion); - if (extendLastRegion(size)) { - return lastRegion; - } else { - lastRegion->setUsed(0); - return nullptr; - } - } - } - // Otherwise, get a new region. - return allocateRegion(size); +void *emmalloc_memalign(size_t alignment, size_t size) +{ + MALLOC_ACQUIRE(); + void *ptr = allocate_memory(alignment, size); + MALLOC_RELEASE(); + return ptr; } +extern __typeof(emmalloc_memalign) emscripten_builtin_memalign __attribute__((alias("emmalloc_memalign"))); -// Internal mirror of public API. +void * EMMALLOC_EXPORT memalign(size_t alignment, size_t size) +{ + return emmalloc_memalign(alignment, size); +} -static void* emmalloc_malloc(size_t size) { - // for consistency with dlmalloc, for malloc(0), allocate a block of memory, - // though returning nullptr is permitted by the standard. - if (size == 0) - size = 1; - // Look in the freelist first. - Region* region = tryFromFreeList(size); - if (!region) { - // Allocate some new memory otherwise. - region = newAllocation(size); - if (!region) { - // We failed to allocate, sadly. - return nullptr; - } - } - assert(getAfter(region) <= sbrk(0)); - return getPayload(region); +void * EMMALLOC_EXPORT aligned_alloc(size_t alignment, size_t size) +{ + return emmalloc_memalign(alignment, size); } -static void emmalloc_free(void* ptr) { - if (ptr == nullptr) - return; - stopUsing(fromPayload(ptr)); +void *emmalloc_malloc(size_t size) +{ + return emmalloc_memalign(MALLOC_ALIGNMENT, size); } +extern __typeof(emmalloc_malloc) emscripten_builtin_malloc __attribute__((alias("emmalloc_malloc"))); -static void* emmalloc_calloc(size_t nmemb, size_t size) { - // TODO If we know no one else is using sbrk(), we can assume that new - // memory allocations are zero'd out. - void* ptr = emmalloc_malloc(nmemb * size); +void * EMMALLOC_EXPORT malloc(size_t size) +{ + return emmalloc_malloc(size); +} + +size_t emmalloc_usable_size(void *ptr) +{ if (!ptr) - return nullptr; - memset(ptr, 0, nmemb * size); - return ptr; + return 0; + + uint8_t *regionStartPtr = (uint8_t*)ptr - sizeof(uint32_t); + Region *region = (Region*)(regionStartPtr); + assert(HAS_ALIGNMENT(region, sizeof(uint32_t))); + + MALLOC_ACQUIRE(); + + uint32_t size = region->size; + assert(size >= sizeof(Region)); + assert(region_is_in_use(region)); + + MALLOC_RELEASE(); + + return size - REGION_HEADER_SIZE; } -static void* emmalloc_realloc(void* ptr, size_t size) { +size_t EMMALLOC_EXPORT malloc_usable_size(void *ptr) +{ + return emmalloc_usable_size(ptr); +} + +void emmalloc_free(void *ptr) +{ +#ifdef EMMALLOC_DEBUG + emmalloc_validate_memory_regions(); +#endif + if (!ptr) - return emmalloc_malloc(size); - if (!size) { - emmalloc_free(ptr); - return nullptr; - } - Region* region = fromPayload(ptr); - assert(region->getUsed()); - // Grow it. First, maybe we can do simple growth in the current region. - if (size <= getMaxPayload(region)) { + return; + #ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.emmalloc_realloc use existing payload space")}); + MAIN_THREAD_ASYNC_EM_ASM(console.log('free(ptr='+$0.toString(16)+')'), ptr); #endif - region->setUsed(1); - // There might be enough left over to split out now. - possiblySplitRemainder(region, size); - return ptr; - } - // Perhaps right after us is free space we can merge to us. - Region* next = region->next(); - if (next && !next->getUsed()) { -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.emmalloc_realloc merge in next")}); + + uint8_t *regionStartPtr = (uint8_t*)ptr - sizeof(uint32_t); + Region *region = (Region*)(regionStartPtr); + assert(HAS_ALIGNMENT(region, sizeof(uint32_t))); + + MALLOC_ACQUIRE(); + + uint32_t size = region->size; + assert(size >= sizeof(Region)); + assert(region_is_in_use(region)); + +#ifdef __EMSCRIPTEN_TRACING__ + emscripten_trace_record_free(region); #endif - removeFromFreeList(next); - region->incTotalSize(next->getTotalSize()); - if (next != lastRegion) { - next->next()->prev() = region; - } else { - lastRegion = region; - } + + // Check merging with left side + uint32_t prevRegionSize = ((uint32_t*)region)[-1]; + uint32_t prevRegionSizeMask = (uint32_t)((int32_t)prevRegionSize >> 31); + if (prevRegionSizeMask) + { + prevRegionSize ^= prevRegionSizeMask; + Region *prevRegion = (Region*)((uint8_t*)region - prevRegionSize); + assert(debug_region_is_consistent(prevRegion)); + unlink_from_free_list(prevRegion); + regionStartPtr = (uint8_t*)prevRegion; + size += prevRegionSize; } - // We may now be big enough. - if (size <= getMaxPayload(region)) { -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.emmalloc_realloc use existing payload space after merge")}); -#endif - region->setUsed(1); - // There might be enough left over to split out now. - possiblySplitRemainder(region, size); - return ptr; + + // Check merging with right side + Region *nextRegion = next_region(region); + assert(debug_region_is_consistent(nextRegion)); + uint32_t sizeAtEnd = *(uint32_t*)region_payload_end_ptr(nextRegion); + if (nextRegion->size != sizeAtEnd) + { + unlink_from_free_list(nextRegion); + size += nextRegion->size; } - // We still aren't big enough. If we are the last, we can extend ourselves - however, that - // definitely means increasing the total sbrk(), and there may be free space lower down, so - // this is a tradeoff between speed (avoid the memcpy) and space. It's not clear what's - // better here; for now, check for free space first. - Region* newRegion = tryFromFreeList(size); - if (!newRegion && region == lastRegion) { -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.emmalloc_realloc extend last region")}); + + create_free_region(regionStartPtr, size); + link_to_free_list((Region*)regionStartPtr); + + MALLOC_RELEASE(); + +#ifdef EMMALLOC_DEBUG + emmalloc_validate_memory_regions(); #endif - if (extendLastRegion(size)) { - // It worked. We don't need the formerly free region. - if (newRegion) { - stopUsing(newRegion); - } - return ptr; - } else { - // If this failed, we can also try the normal - // malloc path, which may find space in a freelist; - // fall through. +} +extern __typeof(emmalloc_free) emscripten_builtin_free __attribute__((alias("emmalloc_free"))); + +void EMMALLOC_EXPORT free(void *ptr) +{ + return emmalloc_free(ptr); +} + +// Can be called to attempt to increase or decrease the size of the given region +// to a new size (in-place). Returns 1 if resize succeeds, and 0 on failure. +static int attempt_region_resize(Region *region, size_t size) +{ + ASSERT_MALLOC_IS_ACQUIRED(); + assert(size > 0); + assert(HAS_ALIGNMENT(size, sizeof(uint32_t))); + +#ifdef EMMALLOC_DEBUG_LOG + MAIN_THREAD_ASYNC_EM_ASM(console.log('attempt_region_resize(region=' + $0.toString(16) + ', size=' + $1 + ' bytes)'), region, size); +#endif + + // First attempt to resize this region, if the next region that follows this one + // is a free region. + Region *nextRegion = next_region(region); + uint8_t *nextRegionEndPtr = (uint8_t*)nextRegion + nextRegion->size; + size_t sizeAtCeiling = ((uint32_t*)nextRegionEndPtr)[-1]; + if (nextRegion->size != sizeAtCeiling) // Next region is free? + { + assert(region_is_free(nextRegion)); + uint8_t *newNextRegionStartPtr = (uint8_t*)region + size; + assert(HAS_ALIGNMENT(newNextRegionStartPtr, sizeof(uint32_t))); + // Next region does not shrink to too small size? + if (newNextRegionStartPtr + sizeof(Region) <= nextRegionEndPtr) + { + unlink_from_free_list(nextRegion); + create_free_region(newNextRegionStartPtr, nextRegionEndPtr - newNextRegionStartPtr); + link_to_free_list((Region*)newNextRegionStartPtr); + create_used_region(region, newNextRegionStartPtr - (uint8_t*)region); + return 1; + } + // If we remove the next region altogether, allocation is satisfied? + if (newNextRegionStartPtr <= nextRegionEndPtr) + { + unlink_from_free_list(nextRegion); + create_used_region(region, region->size + nextRegion->size); + return 1; } } - // We need new space, and a copy - if (!newRegion) { - newRegion = newAllocation(size); - if (!newRegion) - return nullptr; + else + { + // Next region is an used region - we cannot change its starting address. However if we are shrinking the + // size of this region, we can create a new free region between this and the next used region. + if (size + sizeof(Region) <= region->size) + { + size_t freeRegionSize = region->size - size; + create_used_region(region, size); + Region *freeRegion = (Region *)((uint8_t*)region + size); + create_free_region(freeRegion, freeRegionSize); + link_to_free_list(freeRegion); + return 1; + } + else if (size <= region->size) + { + // Caller was asking to shrink the size, but due to not being able to fit a full Region in the shrunk + // area, we cannot actually do anything. This occurs if the shrink amount is really small. In such case, + // just call it success without doing any work. + return 1; + } } - memcpy(getPayload(newRegion), getPayload(region), - size < getMaxPayload(region) ? size : getMaxPayload(region)); - stopUsing(region); - return getPayload(newRegion); +#ifdef EMMALLOC_DEBUG_LOG + MAIN_THREAD_ASYNC_EM_ASM(console.log('attempt_region_resize failed.')); +#endif + return 0; } -static struct mallinfo emmalloc_mallinfo() { - struct mallinfo info; - info.arena = 0; - info.ordblks = 0; - info.smblks = 0; - info.hblks = 0; - info.hblkhd = 0; - info.usmblks = 0; - info.fsmblks = 0; - info.uordblks = 0; - info.ordblks = 0; - info.keepcost = 0; - if (firstRegion) { - info.arena = (char*)sbrk(0) - (char*)firstRegion; - Region* region = firstRegion; - while (region) { - if (region->getUsed()) { - info.uordblks += getMaxPayload(region); - } else { - info.fordblks += getMaxPayload(region); - info.ordblks++; - } - region = region->next(); - } - } - return info; +static int acquire_and_attempt_region_resize(Region *region, size_t size) +{ + MALLOC_ACQUIRE(); + int success = attempt_region_resize(region, size); + MALLOC_RELEASE(); + return success; } -// An aligned allocation. This is a rarer allocation path, and is -// much less optimized - the assumption is that it is used for few -// large allocations. -static void* alignedAllocation(size_t size, size_t alignment) { -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.alignedAllocation")}); -#endif - assert(alignment > ALIGNMENT); - assert(alignment % ALIGNMENT == 0); - // Try from the freelist first. We may be lucky and get something - // properly aligned. - // TODO: Perhaps look more carefully, checking alignment as we go, - // using multiple tries? - Region* fromFreeList = tryFromFreeList(size + alignment); - if (fromFreeList && size_t(getPayload(fromFreeList)) % alignment == 0) { - // Luck has favored us. - return getPayload(fromFreeList); - } else if (fromFreeList) { - stopUsing(fromFreeList); +void *emmalloc_aligned_realloc(void *ptr, size_t alignment, size_t size) +{ + if (!ptr) + return emmalloc_memalign(alignment, size); + + if (size == 0) + { + free(ptr); + return 0; } - // No luck from free list, so do a new allocation which we can - // force to be aligned. -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out(" emmalloc.alignedAllocation new allocation")}); + + assert(IS_POWER_OF_2(alignment)); + + // aligned_realloc() cannot be used to ask to change the alignment of a pointer. + assert(HAS_ALIGNMENT(ptr, alignment)); + + size = validate_alloc_size(size); + + // Calculate the region start address of the original allocation + Region *region = (Region*)((uint8_t*)ptr - sizeof(uint32_t)); + + // First attempt to resize the given region to avoid having to copy memory around + if (acquire_and_attempt_region_resize(region, size + REGION_HEADER_SIZE)) + { +#ifdef __EMSCRIPTEN_TRACING__ + emscripten_trace_record_reallocation(ptr, ptr, size); #endif - // Ensure a region before us, which we may enlarge as necessary. - if (!lastRegion) { - // This allocation is not freeable, but there is one at most. - void* prev = emmalloc_malloc(MIN_REGION_SIZE); - if (!prev) - return nullptr; + return ptr; } - // See if we need to enlarge the previous region in order to get - // us properly aligned. Take into account that our region will - // start with METADATA_SIZE of space. - size_t address = size_t(getAfter(lastRegion)) + METADATA_SIZE; - size_t error = address % alignment; - if (error != 0) { - // E.g. if we want alignment 24, and have address 16, then we - // need to add 8. - size_t extra = alignment - error; - assert(extra % ALIGNMENT == 0); - if (!extendLastRegion(getMaxPayload(lastRegion) + extra)) { - return nullptr; - } - address = size_t(getAfter(lastRegion)) + METADATA_SIZE; - error = address % alignment; - assert(error == 0); + + // If resize failed, we must allocate a new region, copy the data over, and then + // free the old region. + void *newptr = emmalloc_memalign(alignment, size); + if (newptr) + { + memcpy(newptr, ptr, MIN(size, region->size - REGION_HEADER_SIZE)); + free(ptr); } - Region* region = allocateRegion(size); - if (!region) - return nullptr; - void* ptr = getPayload(region); - assert(size_t(ptr) == address); - assert(size_t(ptr) % alignment == 0); - return ptr; + return newptr; } -static int isMultipleOfSizeT(size_t size) { return (size & 3) == 0; } +void * EMMALLOC_EXPORT aligned_realloc(void *ptr, size_t alignment, size_t size) +{ + return emmalloc_aligned_realloc(ptr, alignment, size); +} -static int emmalloc_posix_memalign(void** memptr, size_t alignment, size_t size) { - *memptr = nullptr; - if (!isPowerOf2(alignment) || !isMultipleOfSizeT(alignment)) { - return 22; // EINVAL - } - if (size == 0) { +// realloc_try() is like realloc(), but only attempts to try to resize the existing memory +// area. If resizing the existing memory area fails, then realloc_try() will return 0 +// (the original memory block is not freed or modified). If resizing succeeds, previous +// memory contents will be valid up to min(old length, new length) bytes. +void *emmalloc_realloc_try(void *ptr, size_t size) +{ + if (!ptr) + return 0; + + if (size == 0) + { + free(ptr); return 0; } - if (alignment <= ALIGNMENT) { - // Use normal allocation path, which will provide that alignment. - *memptr = emmalloc_malloc(size); - } else { - // Use more sophisticaed alignment-specific allocation path. - *memptr = alignedAllocation(size, alignment); + size = validate_alloc_size(size); + + // Calculate the region start address of the original allocation + Region *region = (Region*)((uint8_t*)ptr - sizeof(uint32_t)); + + // Attempt to resize the given region to avoid having to copy memory around + int success = acquire_and_attempt_region_resize(region, size + REGION_HEADER_SIZE); +#ifdef __EMSCRIPTEN_TRACING__ + if (success) + emscripten_trace_record_reallocation(ptr, ptr, size); +#endif + return success ? ptr : 0; +} + +// emmalloc_aligned_realloc_uninitialized() is like aligned_realloc(), but old memory contents +// will be undefined after reallocation. (old memory is not preserved in any case) +void *emmalloc_aligned_realloc_uninitialized(void *ptr, size_t alignment, size_t size) +{ + if (!ptr) + return emmalloc_memalign(alignment, size); + + if (size == 0) + { + free(ptr); + return 0; } - if (!*memptr) { - return 12; // ENOMEM + + size = validate_alloc_size(size); + + // Calculate the region start address of the original allocation + Region *region = (Region*)((uint8_t*)ptr - sizeof(uint32_t)); + + // First attempt to resize the given region to avoid having to copy memory around + if (acquire_and_attempt_region_resize(region, size + REGION_HEADER_SIZE)) + { +#ifdef __EMSCRIPTEN_TRACING__ + emscripten_trace_record_reallocation(ptr, ptr, size); +#endif + return ptr; } - return 0; + + // If resize failed, drop the old region and allocate a new region. Memory is not + // copied over + free(ptr); + return emmalloc_memalign(alignment, size); } -static void* emmalloc_memalign(size_t alignment, size_t size) { - void* ptr; - if (emmalloc_posix_memalign(&ptr, alignment, size) != 0) { - return nullptr; - } - return ptr; +void *emmalloc_realloc(void *ptr, size_t size) +{ + return emmalloc_aligned_realloc(ptr, MALLOC_ALIGNMENT, size); } -// Public API. This is a thin wrapper around our mirror of it, adding -// logging and validation when debugging. Otherwise it should inline -// out. +void * EMMALLOC_EXPORT realloc(void *ptr, size_t size) +{ + return emmalloc_realloc(ptr, size); +} -extern "C" { +// realloc_uninitialized() is like realloc(), but old memory contents +// will be undefined after reallocation. (old memory is not preserved in any case) +void *emmalloc_realloc_uninitialized(void *ptr, size_t size) +{ + return emmalloc_aligned_realloc_uninitialized(ptr, MALLOC_ALIGNMENT, size); +} -EMMALLOC_EXPORT -void* malloc(size_t size) { -#ifdef EMMALLOC_DEBUG -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out("emmalloc.malloc " + $0)}, size); -#endif - emmalloc_validate_all(); -#ifdef EMMALLOC_DEBUG_LOG - emmalloc_dump_all(); -#endif -#endif - void* ptr = emmalloc_malloc(size); -#ifdef EMMALLOC_DEBUG -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out("emmalloc.malloc ==> " + $0)}, ptr); -#endif -#ifdef EMMALLOC_DEBUG_LOG - emmalloc_dump_all(); -#endif - emmalloc_validate_all(); -#endif - return ptr; +int emmalloc_posix_memalign(void **memptr, size_t alignment, size_t size) +{ + assert(memptr); + *memptr = emmalloc_memalign(alignment, size); + return *memptr ? 0 : 12/*ENOMEM*/; } -EMMALLOC_EXPORT -void free(void* ptr) { -#ifdef EMMALLOC_DEBUG -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out("emmalloc.free " + $0)}, ptr); -#endif - emmalloc_validate_all(); -#ifdef EMMALLOC_DEBUG_LOG - emmalloc_dump_all(); -#endif -#endif - emmalloc_free(ptr); -#ifdef EMMALLOC_DEBUG -#ifdef EMMALLOC_DEBUG_LOG - emmalloc_dump_all(); -#endif - emmalloc_validate_all(); -#endif +int EMMALLOC_EXPORT posix_memalign(void **memptr, size_t alignment, size_t size) +{ + return emmalloc_posix_memalign(memptr, alignment, size); } -EMMALLOC_EXPORT -void* calloc(size_t nmemb, size_t size) { -#ifdef EMMALLOC_DEBUG -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out("emmalloc.calloc " + $0)}, size); -#endif - emmalloc_validate_all(); -#ifdef EMMALLOC_DEBUG_LOG - emmalloc_dump_all(); -#endif -#endif - void* ptr = emmalloc_calloc(nmemb, size); -#ifdef EMMALLOC_DEBUG -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out("emmalloc.calloc ==> " + $0)}, ptr); -#endif -#ifdef EMMALLOC_DEBUG_LOG - emmalloc_dump_all(); -#endif - emmalloc_validate_all(); -#endif +void *emmalloc_calloc(size_t num, size_t size) +{ + size_t bytes = num*size; + void *ptr = emmalloc_memalign(MALLOC_ALIGNMENT, bytes); + if (ptr) + memset(ptr, 0, bytes); return ptr; } -EMMALLOC_EXPORT -void* realloc(void* ptr, size_t size) { -#ifdef EMMALLOC_DEBUG -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out("emmalloc.realloc " + [ $0, $1 ])}, ptr, size); -#endif - emmalloc_validate_all(); -#ifdef EMMALLOC_DEBUG_LOG - emmalloc_dump_all(); -#endif -#endif - void* newPtr = emmalloc_realloc(ptr, size); -#ifdef EMMALLOC_DEBUG -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out("emmalloc.realloc ==> " + $0)}, newPtr); -#endif -#ifdef EMMALLOC_DEBUG_LOG - emmalloc_dump_all(); -#endif - emmalloc_validate_all(); -#endif - return newPtr; +void * EMMALLOC_EXPORT calloc(size_t num, size_t size) +{ + return emmalloc_calloc(num, size); } -EMMALLOC_EXPORT -int posix_memalign(void** memptr, size_t alignment, size_t size) { -#ifdef EMMALLOC_DEBUG -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out("emmalloc.posix_memalign " + [ $0, $1, $2 ])}, memptr, alignment, size); -#endif - emmalloc_validate_all(); -#ifdef EMMALLOC_DEBUG_LOG - emmalloc_dump_all(); -#endif -#endif - int result = emmalloc_posix_memalign(memptr, alignment, size); -#ifdef EMMALLOC_DEBUG -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out("emmalloc.posix_memalign ==> " + $0)}, result); -#endif -#ifdef EMMALLOC_DEBUG_LOG - emmalloc_dump_all(); -#endif - emmalloc_validate_all(); -#endif - return result; +static int count_linked_list_size(Region *list) +{ + int size = 1; + for(Region *i = list->next; i != list; list = list->next) + ++size; + return size; } -EMMALLOC_EXPORT -void* memalign(size_t alignment, size_t size) { -#ifdef EMMALLOC_DEBUG -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out("emmalloc.memalign " + [ $0, $1 ])}, alignment, size); -#endif - emmalloc_validate_all(); -#ifdef EMMALLOC_DEBUG_LOG - emmalloc_dump_all(); -#endif -#endif - void* ptr = emmalloc_memalign(alignment, size); -#ifdef EMMALLOC_DEBUG -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out("emmalloc.memalign ==> " + $0)}, ptr); -#endif -#ifdef EMMALLOC_DEBUG_LOG - emmalloc_dump_all(); -#endif - emmalloc_validate_all(); -#endif - return ptr; +static size_t count_linked_list_space(Region *list) +{ + size_t space = 0; + for(Region *i = list->next; i != list; list = list->next) + space += region_payload_end_ptr(i) - region_payload_start_ptr(i); + return space; } -EMMALLOC_EXPORT -struct mallinfo mallinfo() { -#ifdef EMMALLOC_DEBUG -#ifdef EMMALLOC_DEBUG_LOG - EM_ASM({out("emmalloc.mallinfo")}); -#endif - emmalloc_validate_all(); -#ifdef EMMALLOC_DEBUG_LOG - emmalloc_dump_all(); -#endif -#endif +struct mallinfo emmalloc_mallinfo() +{ + MALLOC_ACQUIRE(); + + struct mallinfo info; + // Non-mmapped space allocated (bytes): For emmalloc, + // let's define this as the difference between heap size and dynamic top end. + info.arena = emscripten_get_heap_size() - (size_t)*emscripten_get_sbrk_ptr(); + // Number of "ordinary" blocks. Let's define this as the number of highest + // size blocks. (subtract one from each, since there is a sentinel node in each list) + info.ordblks = count_linked_list_size(&freeRegionBuckets[NUM_FREE_BUCKETS-1])-1; + // Number of free "fastbin" blocks. For emmalloc, define this as the number + // of blocks that are not in the largest pristine block. + info.smblks = 0; + // The total number of bytes in free "fastbin" blocks. + info.fsmblks = 0; + for(int i = 0; i < NUM_FREE_BUCKETS-1; ++i) + { + info.smblks += count_linked_list_size(&freeRegionBuckets[i])-1; + info.fsmblks += count_linked_list_space(&freeRegionBuckets[i]); + } + + info.hblks = 0; // Number of mmapped regions: always 0. (no mmap support) + info.hblkhd = 0; // Amount of bytes in mmapped regions: always 0. (no mmap support) + + // Walk through all the heap blocks to report the following data: + // The "highwater mark" for allocated space—that is, the maximum amount of + // space that was ever allocated. Emmalloc does not want to pay code to + // track this, so this is only reported from current allocation data, and + // may not be accurate. + info.usmblks = 0; + info.uordblks = 0; // The total number of bytes used by in-use allocations. + info.fordblks = 0; // The total number of bytes in free blocks. + // The total amount of releasable free space at the top of the heap. + // This is the maximum number of bytes that could ideally be released by malloc_trim(3). + Region *lastActualRegion = prev_region((Region*)((uint8_t*)((uint32_t*)listOfAllRegions)[2] - sizeof(Region))); + info.keepcost = region_is_free(lastActualRegion) ? lastActualRegion->size : 0; + + Region *root = listOfAllRegions; + while(root) + { + Region *r = root; + assert(debug_region_is_consistent(r)); + uint8_t *lastRegionEnd = (uint8_t*)(((uint32_t*)root)[2]); + while((uint8_t*)r < lastRegionEnd) + { + assert(debug_region_is_consistent(r)); + + if (region_is_free(r)) + { + // Count only the payload of the free block towards free memory. + info.fordblks += region_payload_end_ptr(r) - region_payload_start_ptr(r); + // But the header data of the free block goes towards used memory. + info.uordblks += REGION_HEADER_SIZE; + } + else + { + info.uordblks += r->size; + } + // Update approximate watermark data + info.usmblks = MAX(info.usmblks, (int)(r + r->size)); + + if (r->size == 0) + break; + r = next_region(r); + } + root = ((Region*)((uint32_t*)root)[1]); + } + + MALLOC_RELEASE(); + return info; +} + +struct mallinfo EMMALLOC_EXPORT mallinfo() +{ return emmalloc_mallinfo(); } -// Export malloc and free as duplicate names emscripten_builtin_malloc and -// emscripten_builtin_free so that applications can replace malloc and free -// in their code, and make those replacements refer to the original malloc -// and free from this file. -// This allows an easy mechanism for hooking into memory allocation. -#if defined(__EMSCRIPTEN__) -extern __typeof(malloc) emscripten_builtin_malloc __attribute__((alias("malloc"))); -extern __typeof(free) emscripten_builtin_free __attribute__((alias("free"))); -extern __typeof(memalign) emscripten_builtin_memalign __attribute__((alias("memalign"))); +// Note! This function is not fully multithreadin safe: while this function is running, other threads should not be +// allowed to call sbrk()! +static int trim_dynamic_heap_reservation(size_t pad) +{ + ASSERT_MALLOC_IS_ACQUIRED(); + + if (!listOfAllRegions) + return 0; // emmalloc is not controlling any dynamic memory at all - cannot release memory. + uint32_t *previousSbrkEndAddress = (uint32_t*)((uint32_t*)listOfAllRegions)[2]; + assert(sbrk(0) == previousSbrkEndAddress); + uint32_t lastMemoryRegionSize = previousSbrkEndAddress[-1]; + assert(lastMemoryRegionSize == 16); // // The last memory region should be a sentinel node of exactly 16 bytes in size. + Region *endSentinelRegion = (Region*)((uint8_t*)previousSbrkEndAddress - sizeof(Region)); + Region *lastActualRegion = prev_region(endSentinelRegion); + + // Round padding up to multiple of 4 bytes to keep sbrk() and memory region alignment intact. + // Also have at least 8 bytes of payload so that we can form a full free region. + size_t newRegionSize = (size_t)ALIGN_UP(pad, 4); + if (pad > 0) + newRegionSize += sizeof(Region) - (newRegionSize - pad); + + if (!region_is_free(lastActualRegion) || lastActualRegion->size <= newRegionSize) + return 0; // Last actual region is in use, or caller desired to leave more free memory intact than there is. + + // This many bytes will be shrunk away. + size_t shrinkAmount = lastActualRegion->size - newRegionSize; + assert(HAS_ALIGNMENT(shrinkAmount, 4)); + + unlink_from_free_list(lastActualRegion); + // If pad == 0, we should delete the last free region altogether. If pad > 0, + // shrink the last free region to the desired size. + if (newRegionSize > 0) + { + create_free_region(lastActualRegion, newRegionSize); + link_to_free_list(lastActualRegion); + } + + // Recreate the sentinel region at the end of the last free region + endSentinelRegion = (Region*)((uint8_t*)lastActualRegion + newRegionSize); + create_used_region(endSentinelRegion, sizeof(Region)); + + // And update the size field of the whole region block. + ((uint32_t*)listOfAllRegions)[2] = (uint32_t)endSentinelRegion + sizeof(Region); + + // Finally call sbrk() to shrink the memory area. + void *oldSbrk = sbrk(-(intptr_t)shrinkAmount); + assert((intptr_t)oldSbrk != -1); // Shrinking with sbrk() should never fail. + assert(oldSbrk == previousSbrkEndAddress); // Another thread should not have raced to increase sbrk() on us! + + // All successful, and we actually trimmed memory! + return 1; +} + +int emmalloc_trim(size_t pad) +{ + MALLOC_ACQUIRE(); + int success = trim_dynamic_heap_reservation(pad); + MALLOC_RELEASE(); + return success; +} + +int EMMALLOC_EXPORT malloc_trim(size_t pad) +{ + return emmalloc_trim(pad); +} + +#if 0 +// TODO: In wasm2js/asm.js builds, we could use the following API to actually shrink the heap size, but in +// WebAssembly builds this won't work. Keeping this code here for future use. + +emmalloc.h: +// Shrinks the asm.js/wasm2js heap to the minimum size, releasing memory back to the system. +// Returns 1 if memory was actually freed, and 0 if not. In WebAssembly builds, this function +// does nothing, because it is not possible to shrink the Wasm heap size once it has grown. +// Call emmalloc_trim() first before calling this function to maximize the amount of +// free memory that is released. +int emmalloc_shrink_heap(void); + +emmalloc.c: +int emmalloc_shrink_heap() +{ + MALLOC_ACQUIRE(); + size_t sbrkTop = (size_t)*emscripten_get_sbrk_ptr(); + size_t heapSize = emscripten_get_heap_size(); + assert(heapSize >= sbrkTop); + int success = 0; + if (sbrkTop < heapSize) + { + success = emscripten_realloc_buffer(sbrkTop); + assert(!success || emscripten_get_heap_size() == sbrkTop); + } + MALLOC_RELEASE(); + return success; +} #endif +size_t emmalloc_dynamic_heap_size() +{ + size_t dynamicHeapSize = 0; + + MALLOC_ACQUIRE(); + Region *root = listOfAllRegions; + while(root) + { + Region *r = root; + uintptr_t blockEndPtr = ((uint32_t*)r)[2]; + dynamicHeapSize += blockEndPtr - (uintptr_t)r; + root = ((Region*)((uint32_t*)root)[1]); + } + MALLOC_RELEASE(); + return dynamicHeapSize; +} + +size_t emmalloc_free_dynamic_memory() +{ + size_t freeDynamicMemory = 0; + + int bucketIndex = 0; + + MALLOC_ACQUIRE(); + BUCKET_BITMASK_T bucketMask = freeRegionBucketsUsed; + + // Loop through each bucket that has free regions in it, based on bits set in freeRegionBucketsUsed bitmap. + while(bucketMask) + { + BUCKET_BITMASK_T indexAdd = CountTrailingZeroesInBitmask(bucketMask); + bucketIndex += indexAdd; + bucketMask >>= indexAdd; + for(Region *freeRegion = freeRegionBuckets[bucketIndex].next; + freeRegion != &freeRegionBuckets[bucketIndex]; + freeRegion = freeRegion->next) + { + freeDynamicMemory += freeRegion->size - REGION_HEADER_SIZE; + } + ++bucketIndex; + bucketMask >>= 1; + } + MALLOC_RELEASE(); + return freeDynamicMemory; +} + +size_t emmalloc_compute_free_dynamic_memory_fragmentation_map(size_t freeMemorySizeMap[32]) +{ + memset((void*)freeMemorySizeMap, 0, sizeof(freeMemorySizeMap[0])*32); + + size_t numFreeMemoryRegions = 0; + int bucketIndex = 0; + MALLOC_ACQUIRE(); + BUCKET_BITMASK_T bucketMask = freeRegionBucketsUsed; + + // Loop through each bucket that has free regions in it, based on bits set in freeRegionBucketsUsed bitmap. + while(bucketMask) + { + BUCKET_BITMASK_T indexAdd = CountTrailingZeroesInBitmask(bucketMask); + bucketIndex += indexAdd; + bucketMask >>= indexAdd; + for(Region *freeRegion = freeRegionBuckets[bucketIndex].next; + freeRegion != &freeRegionBuckets[bucketIndex]; + freeRegion = freeRegion->next) + { + ++numFreeMemoryRegions; + size_t freeDynamicMemory = freeRegion->size - REGION_HEADER_SIZE; + if (freeDynamicMemory > 0) + ++freeMemorySizeMap[31-__builtin_clz(freeDynamicMemory)]; + else + ++freeMemorySizeMap[0]; + } + ++bucketIndex; + bucketMask >>= 1; + } + MALLOC_RELEASE(); + return numFreeMemoryRegions; +} + } // extern "C" diff --git a/system/lib/sbrk.c b/system/lib/sbrk.c index 637344b3f8a81..95af6cde83abc 100644 --- a/system/lib/sbrk.c +++ b/system/lib/sbrk.c @@ -35,6 +35,8 @@ } void *sbrk(intptr_t increment) { + // Enforce preserving a minimal 4-byte alignment for sbrk. + increment = (increment + 3) & ~3; #if __EMSCRIPTEN_PTHREADS__ // Our default dlmalloc uses locks around each malloc/free, so no additional // work is necessary to keep things threadsafe, but we also make sure sbrk diff --git a/tests/core/test_emmalloc.cpp b/tests/core/test_emmalloc.cpp index 416223d5c54e0..4f86f2f25bc5f 100644 --- a/tests/core/test_emmalloc.cpp +++ b/tests/core/test_emmalloc.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -13,7 +14,7 @@ #define RANDOM_ITERS 12345 #endif -extern void emmalloc_blank_slate_from_orbit(); +extern "C" void emmalloc_blank_slate_from_orbit(); // Test emmalloc internals, but through the external interface. We expect // very specific outputs here based on the internals, this test would not @@ -103,7 +104,6 @@ void previous_sbrk() { void* other = malloc(10); free(other); assert(other != old); - assert((char*)other == (char*)old + 2 * ALLOCATION_UNIT); } void min_alloc() { @@ -146,7 +146,7 @@ void realloc() { stage("realloc0"); emmalloc_blank_slate_from_orbit(); for (int i = 0; i < 2; i++) { - char* ptr = (char*)malloc(10); + char* ptr = (char*)malloc(100); stage("realloc0.1"); char* raptr = (char*)realloc(ptr, 1); assert(raptr == ptr); @@ -205,11 +205,11 @@ void realloc() { } void check_aligned(size_t align, size_t ptr) { - if (align < 4 || ((align & (align - 1)) != 0)) { + if (align != 0 && ((align & (align - 1)) != 0)) { assert(ptr == 0); } else { assert(ptr); - assert(ptr % align == 0); + assert(align == 0 || ptr % align == 0); } } diff --git a/tests/core/test_emmalloc_memory_statistics.cpp b/tests/core/test_emmalloc_memory_statistics.cpp new file mode 100644 index 0000000000000..a8ca147fda9dd --- /dev/null +++ b/tests/core/test_emmalloc_memory_statistics.cpp @@ -0,0 +1,25 @@ +#include +#include + +int main() +{ + void *ptr = malloc(32*1024*1024); + void *ptr2 = malloc(4*1024*1024); + void *ptr3 = malloc(64*1024*1024); + void *ptr4 = malloc(16*1024); + void *ptr5 = malloc(2*1024*1024); + printf("%d\n", (int)(ptr && ptr2 && ptr3 && ptr4 && ptr5)); + free(ptr2); + free(ptr4); + printf("%d\n", emmalloc_validate_memory_regions()); + printf("%zu\n", emmalloc_dynamic_heap_size()); + printf("%zu\n", emmalloc_free_dynamic_memory()); + size_t numFreeMemoryRegions = 0; + size_t freeMemorySizeMap[32]; + numFreeMemoryRegions = emmalloc_compute_free_dynamic_memory_fragmentation_map(freeMemorySizeMap); + printf("%zu\n", numFreeMemoryRegions); + for(int i = 0; i < 32; ++i) + printf("%zu ", freeMemorySizeMap[i]); + printf("\n"); + printf("%zu\n", emmalloc_unclaimed_heap_memory()); +} diff --git a/tests/core/test_emmalloc_memory_statistics.txt b/tests/core/test_emmalloc_memory_statistics.txt new file mode 100644 index 0000000000000..557ff305ba36c --- /dev/null +++ b/tests/core/test_emmalloc_memory_statistics.txt @@ -0,0 +1,7 @@ +1 +0 +106971424 +4210892 +3 +0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 +21997632 diff --git a/tests/core/test_emmalloc_trim.cpp b/tests/core/test_emmalloc_trim.cpp new file mode 100644 index 0000000000000..dc426bb780bcb --- /dev/null +++ b/tests/core/test_emmalloc_trim.cpp @@ -0,0 +1,53 @@ +#include +#include +#include +#include + +int main() +{ + printf("heap size: %zu\n", emscripten_get_heap_size()); + + void *ptr = malloc(32*1024*1024); + void *ptr2 = malloc(4*1024*1024); + printf("%d\n", (int)(ptr && ptr2)); + printf("dynamic heap 1: %zu\n", emmalloc_dynamic_heap_size()); + printf("free dynamic memory 1: %zu\n", emmalloc_free_dynamic_memory()); + printf("unclaimed heap memory 1: %zu\n", emmalloc_unclaimed_heap_memory()); + printf("sbrk 1: %p\n", sbrk(0)); + + int success = emmalloc_trim(0); + printf("1st trim: %d\n", success); + printf("dynamic heap 1: %zu\n", emmalloc_dynamic_heap_size()); + printf("free dynamic memory 1: %zu\n", emmalloc_free_dynamic_memory()); + printf("unclaimed heap memory 1: %zu\n", emmalloc_unclaimed_heap_memory()); + printf("sbrk 1: %p\n", sbrk(0)); + + success = emmalloc_trim(0); + printf("2nd trim: %d\n", success); + printf("dynamic heap 2: %zu\n", emmalloc_dynamic_heap_size()); + printf("free dynamic memory 2: %zu\n", emmalloc_free_dynamic_memory()); + printf("unclaimed heap memory 2: %zu\n", emmalloc_unclaimed_heap_memory()); + printf("sbrk 2: %p\n", sbrk(0)); + free(ptr2); + + success = emmalloc_trim(100000); + printf("3rd trim: %d\n", success); + printf("dynamic heap 3: %zu\n", emmalloc_dynamic_heap_size()); + printf("free dynamic memory 3: %zu\n", emmalloc_free_dynamic_memory()); + printf("unclaimed heap memory 3: %zu\n", emmalloc_unclaimed_heap_memory()); + printf("sbrk 3: %p\n", sbrk(0)); + + success = emmalloc_trim(100000); + printf("4th trim: %d\n", success); + printf("dynamic heap 4: %zu\n", emmalloc_dynamic_heap_size()); + printf("free dynamic memory 4: %zu\n", emmalloc_free_dynamic_memory()); + printf("unclaimed heap memory 4: %zu\n", emmalloc_unclaimed_heap_memory()); + printf("sbrk 4: %p\n", sbrk(0)); + + success = emmalloc_trim(0); + printf("5th trim: %d\n", success); + printf("dynamic heap 5: %zu\n", emmalloc_dynamic_heap_size()); + printf("free dynamic memory 5: %zu\n", emmalloc_free_dynamic_memory()); + printf("unclaimed heap memory 5: %zu\n", emmalloc_unclaimed_heap_memory()); + printf("sbrk 5: %p\n", sbrk(0)); +} diff --git a/tests/core/test_emmalloc_trim.txt b/tests/core/test_emmalloc_trim.txt new file mode 100644 index 0000000000000..099fdd0f47375 --- /dev/null +++ b/tests/core/test_emmalloc_trim.txt @@ -0,0 +1,31 @@ +heap size: 134217728 +1 +dynamic heap 1: 37748880 +free dynamic memory 1: 84 +unclaimed heap memory 1: 2104421504 +sbrk 1: 0x2901380 +1st trim: 1 +dynamic heap 1: 37748788 +free dynamic memory 1: 0 +unclaimed heap memory 1: 2104421596 +sbrk 1: 0x2901324 +2nd trim: 0 +dynamic heap 2: 37748788 +free dynamic memory 2: 0 +unclaimed heap memory 2: 2104421596 +sbrk 2: 0x2901324 +3rd trim: 1 +dynamic heap 3: 33654492 +free dynamic memory 3: 100008 +unclaimed heap memory 3: 2108515892 +sbrk 3: 0x25199cc +4th trim: 0 +dynamic heap 4: 33654492 +free dynamic memory 4: 100008 +unclaimed heap memory 4: 2108515892 +sbrk 4: 0x25199cc +5th trim: 1 +dynamic heap 5: 33554476 +free dynamic memory 5: 0 +unclaimed heap memory 5: 2108615908 +sbrk 5: 0x250131c \ No newline at end of file diff --git a/tests/core/test_malloc_usable_size.c b/tests/core/test_malloc_usable_size.c new file mode 100644 index 0000000000000..77bb378e4182d --- /dev/null +++ b/tests/core/test_malloc_usable_size.c @@ -0,0 +1,8 @@ +#include +#include + +int main() +{ + void *ptr = malloc(1); + printf("%zu\n", malloc_usable_size(ptr)); +} diff --git a/tests/core/test_malloc_usable_size.txt b/tests/core/test_malloc_usable_size.txt new file mode 100644 index 0000000000000..45a4fb75db864 --- /dev/null +++ b/tests/core/test_malloc_usable_size.txt @@ -0,0 +1 @@ +8 diff --git a/tests/malloc_bench.cpp b/tests/malloc_bench.cpp index 3919c3f693aec..abc9d0643ce86 100644 --- a/tests/malloc_bench.cpp +++ b/tests/malloc_bench.cpp @@ -137,6 +137,7 @@ void randoms() { printf("sbrk chng: %u\n", sbrk_change); printf("sbrk chng/allocs: %.2f\n", sbrk_change / double(allocs_at_max)); printf("overhead: %.2f\n", -((double(sizes) / allocations) - (sbrk_change / double(allocs_at_max)))); + printf("sbrk top now: %p\n", (void*)sbrk(0)); if (POLL_SBRK) { printf("sbrk mean change: %.2f\n", (sum_sbrk / double(ITERS)) - before); printf("sbrk max change: %u\n", max_sbrk - before); diff --git a/tests/test_core.py b/tests/test_core.py index 17e12c2154eb0..476e37fb104f9 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -914,6 +914,31 @@ def test_emmalloc(self, *args): open(path_from_root('tests', 'core', 'test_emmalloc.cpp')).read(), open(path_from_root('tests', 'core', 'test_emmalloc.txt')).read()) + def test_emmalloc_usable_size(self, *args): + self.set_setting('MALLOC', 'emmalloc') + self.emcc_args += list(args) + + self.do_run_in_out_file_test('tests', 'core', 'test_malloc_usable_size') + + @no_fastcomp('this feature works in fastcomp, but test outputs are sensitive to wasm backend') + @no_optimize('output is sensitive to optimization flags, so only test unoptimized builds') + @no_wasm2js('output is specific to wasm debug builds only') + def test_emmalloc_memory_statistics(self, *args): + + self.set_setting('MALLOC', 'emmalloc') + self.emcc_args += ['-s', 'TOTAL_MEMORY=128MB', '-g'] + list(args) + + self.do_run_in_out_file_test('tests', 'core', 'test_emmalloc_memory_statistics') + + @no_fastcomp('this feature works in fastcomp, but test outputs are sensitive to wasm backend') + @no_optimize('output is sensitive to optimization flags, so only test unoptimized builds') + @no_wasm2js('output is specific to wasm debug builds only') + def test_emmalloc_trim(self, *args): + self.set_setting('MALLOC', 'emmalloc') + self.emcc_args += ['-s', 'TOTAL_MEMORY=128MB', '-s', 'ALLOW_MEMORY_GROWTH=1'] + list(args) + + self.do_run_in_out_file_test('tests', 'core', 'test_emmalloc_trim') + def test_newstruct(self): self.do_run(self.gen_struct_src.replace('{{gen_struct}}', 'new S').replace('{{del_struct}}', 'delete'), '*51,62*') @@ -1112,7 +1137,6 @@ def test_setjmp_noleak(self): printf("ok.\n"); } ''' - self.do_run(src, r'''ok.''') def test_exceptions(self): @@ -3050,7 +3074,7 @@ def post(filename): self.assertGreater(len(exports), 20) # wasm backend includes alias in NAMED_GLOBALS if self.is_wasm_backend(): - self.assertLess(len(exports), 55) + self.assertLess(len(exports), 56) else: self.assertLess(len(exports), 30) diff --git a/tests/test_other.py b/tests/test_other.py index 383ddaafb90be..5392b4e8a64f5 100644 --- a/tests/test_other.py +++ b/tests/test_other.py @@ -9574,23 +9574,23 @@ def test_minimal_runtime_code_size(self): path_from_root('tests', 'minimal_webgl', 'webgl.c'), '--js-library', path_from_root('tests', 'minimal_webgl', 'library_js.js'), '-s', 'RUNTIME_FUNCS_TO_IMPORT=[]', - '-s', 'USES_DYNAMIC_ALLOC=2', '-lwebgl.js', + '-s', 'USES_DYNAMIC_ALLOC=1', '-lwebgl.js', '-s', 'MODULARIZE=1'] hello_webgl2_sources = hello_webgl_sources + ['-s', 'MAX_WEBGL_VERSION=2'] if self.is_wasm_backend(): test_cases = [ (opts, hello_world_sources, {'a.html': 1445, 'a.js': 455, 'a.wasm': 176}), - (opts, hello_webgl_sources, {'a.html': 1565, 'a.js': 4636, 'a.wasm': 11918}), - (opts, hello_webgl2_sources, {'a.html': 1565, 'a.js': 5143, 'a.wasm': 11918}) # Compare how WebGL2 sizes stack up with WebGL 1 + (opts, hello_webgl_sources, {'a.html': 1565, 'a.js': 4636, 'a.wasm': 11809}), + (opts, hello_webgl2_sources, {'a.html': 1565, 'a.js': 5143, 'a.wasm': 11809}) # Compare how WebGL2 sizes stack up with WebGL 1 ] else: test_cases = [ - (asmjs + opts, hello_world_sources, {'a.html': 1483, 'a.js': 289, 'a.asm.js': 113, 'a.mem': 6}), - (opts, hello_world_sources, {'a.html': 1440, 'a.js': 604, 'a.wasm': 86}), - (asmjs + opts, hello_webgl_sources, {'a.html': 1606, 'a.js': 4880, 'a.asm.js': 11139, 'a.mem': 321}), - (opts, hello_webgl_sources, {'a.html': 1557, 'a.js': 4837, 'a.wasm': 8841}), - (opts, hello_webgl2_sources, {'a.html': 1557, 'a.js': 5324, 'a.wasm': 8841}) # Compare how WebGL2 sizes stack up with WebGL 1 + (asmjs + opts, hello_world_sources, {'a.html': 1481, 'a.js': 289, 'a.asm.js': 113, 'a.mem': 6}), + (opts, hello_world_sources, {'a.html': 1445, 'a.js': 604, 'a.wasm': 86}), + (asmjs + opts, hello_webgl_sources, {'a.html': 1605, 'a.js': 4921, 'a.asm.js': 11129, 'a.mem': 321}), + (opts, hello_webgl_sources, {'a.html': 1565, 'a.js': 4844, 'a.wasm': 8932}), + (opts, hello_webgl2_sources, {'a.html': 1565, 'a.js': 5331, 'a.wasm': 8932}) # Compare how WebGL2 sizes stack up with WebGL 1 ] success = True diff --git a/tools/shared.py b/tools/shared.py index 8f02005cdf1aa..f3aa8bd49beb7 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -1309,7 +1309,7 @@ def verify_settings(): exit_with_error('emcc: ASM_JS can only be set to either 1 or 2') if Settings.SAFE_HEAP not in [0, 1]: - exit_with_error('emcc: SAVE_HEAP must be 0 or 1 in fastcomp') + exit_with_error('emcc: SAFE_HEAP must be 0 or 1 in fastcomp') if Settings.WASM and Settings.EXPORT_FUNCTION_TABLES: exit_with_error('emcc: EXPORT_FUNCTION_TABLES incompatible with WASM') diff --git a/tools/system_libs.py b/tools/system_libs.py index 68148aabfc73e..9912b6ff0e7cf 100755 --- a/tools/system_libs.py +++ b/tools/system_libs.py @@ -919,13 +919,10 @@ def __init__(self, **kwargs): self.is_debug = kwargs.pop('is_debug') self.use_errno = kwargs.pop('use_errno') self.is_tracing = kwargs.pop('is_tracing') + self.use_64bit_ops = kwargs.pop('use_64bit_ops') super(libmalloc, self).__init__(**kwargs) - if self.malloc != 'dlmalloc': - assert not self.is_mt - assert not self.is_tracing - def get_files(self): malloc = shared.path_from_root('system', 'lib', { 'dlmalloc': 'dlmalloc.c', 'emmalloc': 'emmalloc.cpp' @@ -944,6 +941,8 @@ def get_cflags(self): cflags += ['-DMALLOC_FAILURE_ACTION=', '-DEMSCRIPTEN_NO_ERRNO'] if self.is_tracing: cflags += ['--tracing'] + if self.use_64bit_ops: + cflags += ['-DEMMALLOC_USE_64BIT_OPS=1'] return cflags def get_base_name_prefix(self): @@ -958,6 +957,8 @@ def get_base_name(self): name += '-noerrno' if self.is_tracing: name += '-tracing' + if self.use_64bit_ops: + name += '-64bit' return name def can_use(self): @@ -965,7 +966,7 @@ def can_use(self): @classmethod def vary_on(cls): - return super(libmalloc, cls).vary_on() + ['is_debug', 'use_errno', 'is_tracing'] + return super(libmalloc, cls).vary_on() + ['is_debug', 'use_errno', 'is_tracing', 'use_64bit_ops'] @classmethod def get_default_variation(cls, **kwargs): @@ -974,15 +975,15 @@ def get_default_variation(cls, **kwargs): is_debug=shared.Settings.DEBUG_LEVEL >= 3, use_errno=shared.Settings.SUPPORT_ERRNO, is_tracing=shared.Settings.EMSCRIPTEN_TRACING, + use_64bit_ops=shared.Settings.MALLOC == 'emmalloc' and (shared.Settings.WASM == 1 or (shared.Settings.WASM_BACKEND and shared.Settings.WASM2JS == 0)), **kwargs ) @classmethod def variations(cls): combos = super(libmalloc, cls).variations() - return ([dict(malloc='dlmalloc', **combo) for combo in combos] + - [dict(malloc='emmalloc', **combo) for combo in combos - if not combo['is_mt'] and not combo['is_tracing']]) + return ([dict(malloc='dlmalloc', **combo) for combo in combos if not combo['use_64bit_ops']] + + [dict(malloc='emmalloc', **combo) for combo in combos]) class libal(Library):