I was reading @kripken slides and blog post about asyncify and I gave a shot on trying to implement a minimal coroutine system in pure C for WASM using it, to work in standalone WASM. However while it has worked fine when I compiled with -O0 or -O1, it fails when enabling optimizations -O2 or -Oz.
Here is the minimal test case with comments that I could came up, it's really just a minimal coroutine system based on asyncify:
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
/* Import asyncify */
typedef struct _asyncify_stack_region {
void* start;
void* limit;
} _asyncify_stack_region;
__attribute__((import_module("asyncify"), import_name("start_unwind"))) void _asyncify_start_unwind(_asyncify_stack_region*);
__attribute__((import_module("asyncify"), import_name("stop_unwind"))) void _asyncify_stop_unwind();
__attribute__((import_module("asyncify"), import_name("start_rewind"))) void _asyncify_start_rewind(_asyncify_stack_region*);
__attribute__((import_module("asyncify"), import_name("stop_rewind"))) void _asyncify_stop_rewind();
/* Coroutine state. */
typedef enum mco_state {
MCO_DEAD = 0,
MCO_RUNNING,
MCO_SUSPENDED
} mco_state;
/* Coroutine structure. */
typedef struct mco_coro {
mco_state state;
_asyncify_stack_region stack_region;
int rewind_id; /* Current point to rewind to when resuming. */
void (*entry)(struct mco_coro* co); /* Entry point of the coroutine. */
uint8_t stack[8192]; /* Buffer for saving coroutine stack locals and call stack. */
} mco_coro;
/* Coroutine structure. */
static __attribute__((noinline)) void _mco_finish_jumpout(mco_coro* co, volatile int rewind_id) {
int next_rewind_id = co->rewind_id + 1;
if(rewind_id == next_rewind_id) { /* Begins unwinding the stack (save locals and call stack to rewind later) */
co->rewind_id = next_rewind_id;
_asyncify_start_unwind(&co->stack_region);
} else if(rewind_id == co->rewind_id) { /* Continue from yield point. */
_asyncify_stop_rewind();
} else {
/* Let it continue rewinding... */
}
}
/* Called when jumping out of a coroutine. */
static __attribute__((noinline)) void _mco_jumpout(mco_coro* co) {
/*
Save rewind point into a local, that should be restored when rewinding.
That is "rewind_id != co->rewind_id + 1" may be true when rewinding.
Use volatile here just to be safe from compiler optimizing this out.
*/
volatile int rewind_id = co->rewind_id + 1;
_mco_finish_jumpout(co, rewind_id);
}
/* Run coroutine entry.*/
static __attribute__((noinline)) void _mco_run_entry(mco_coro* co) {
co->entry(co);
co->state = MCO_DEAD; /* Coroutine finished, it should now be dead. */
_mco_jumpout(co); /* Jump out anyway, because `_mco_jumpin` will always call `_asyncify_stop_unwind`. */
}
/* Called when jumping into a coroutine. */
static __attribute__((noinline)) void _mco_jumpin(mco_coro* co) {
if(co->rewind_id > 0) { /* Begin rewinding until last yield point. */
_asyncify_start_rewind(&co->stack_region);
}
_mco_run_entry(co); /* Execute the coroutine entry. */
_asyncify_stop_unwind(); /* Stop saving coroutine stack. */
}
/* Resume coroutine. */
static void mco_resume(mco_coro* co) {
co->state = MCO_RUNNING;
_mco_jumpin(co);
}
/* Suspend coroutine. */
static void mco_yield(mco_coro* co) {
co->state = MCO_SUSPENDED;
_mco_jumpout(co);
}
/* Initialize coroutine. */
static void mco_init(mco_coro* co, void (*entry)(mco_coro* co)) {
co->state = MCO_SUSPENDED;
co->rewind_id = 0;
co->entry = entry;
co->stack_region.start = (void*)co->stack;
co->stack_region.limit = (void*)((size_t)co->stack_region.start + 8192);
}
// Coroutine test entry function.
void coro_entry(mco_coro* co) {
printf("coroutine started\n");
for(int i=0;i<10;++i) {
mco_yield(co);
printf("coroutine %d\n", i);
}
printf("coroutine finished\n");
}
mco_coro co;
int main() {
// Call `mco_create` with the output coroutine pointer and `desc` pointer.
mco_init(&co, coro_entry);
// The coroutine should be now in suspended state.
assert(co.state == MCO_SUSPENDED);
// Call `mco_resume` to start for the first time, switching to its context.
mco_resume(&co); // Should print "coroutine started".
// We get back from coroutine context in suspended state (because it's unfinished).
assert(co.state == MCO_SUSPENDED);
// Call `mco_resume` 10 more times.
for(int i=0;i<10;++i) {
mco_resume(&co); // Should print "coroutine X".
}
// The coroutine finished and should be now dead.
assert(co.state == MCO_DEAD);
return 0;
}
Sorry for sharing a test case this big, it's the most I could minimize the issue, but it should not be hard to understand what is going on and it is commented.
When I compile with
emcc t.c -o t.wasm -s ERROR_ON_UNDEFINED_SYMBOLS=0 -O0
wasm-opt --asyncify t.wasm -o t.wasm
wasmtime t.wasm
Things work fine and I get the expected output:
coroutine started
coroutine 0
coroutine 1
coroutine 2
coroutine 3
coroutine 4
coroutine 5
coroutine 6
coroutine 7
coroutine 8
coroutine 9
coroutine finished
However when I enable optimizations with -Oz it breaks:
emcc t.c -o t.wasm -s ERROR_ON_UNDEFINED_SYMBOLS=0 -Oz
wasm-opt --asyncify t.wasm -o t.wasm
wasmtime t.wasm
coroutine started
Assertion failed: co.state == MCO_SUSPENDED (t.c: main: 112)
Seems like enabling -Oz the unwinding does not work expected in the first yield point, leaving the coroutine in an invalid state.
What what I am doing wrong here, or is this a Binaryen bug? I've tried to use noinline in the asyncify's runtime place, this made things work with -O0, however it fails with -Oz.
Environment:
$ emcc -v
emcc (Emscripten gcc/clang-like replacement + linker emulating GNU ld) 3.1.1-git (1934a98e709b57d3592b8272d3f1264a72c089e4)
clang version 14.0.0 (/srcdest/llvm-project 50fb44eebb0397f9b5f45a44239d6b53faf07c3b)
Target: wasm32-unknown-emscripten
Thread model: posix
InstalledDir: /opt/emscripten-llvm/bin
$ wasm-opt --version
wasm-opt version 104 (version_104-23-g1ef8f1f2c)
Side notes:
The shared test case is a minimal version of where is being used in the https://github.com/edubart/minicoro project, a cross platform coroutine library for C. I hope to improve support there for standalone WASM with Asyncify by sorting out this issue.
I was reading @kripken slides and blog post about asyncify and I gave a shot on trying to implement a minimal coroutine system in pure C for WASM using it, to work in standalone WASM. However while it has worked fine when I compiled with
-O0or-O1, it fails when enabling optimizations-O2or-Oz.Here is the minimal test case with comments that I could came up, it's really just a minimal coroutine system based on asyncify:
Sorry for sharing a test case this big, it's the most I could minimize the issue, but it should not be hard to understand what is going on and it is commented.
When I compile with
Things work fine and I get the expected output:
However when I enable optimizations with
-Ozit breaks:Seems like enabling
-Ozthe unwinding does not work expected in the first yield point, leaving the coroutine in an invalid state.What what I am doing wrong here, or is this a Binaryen bug? I've tried to use
noinlinein the asyncify's runtime place, this made things work with-O0, however it fails with-Oz.Environment:
Side notes:
The shared test case is a minimal version of where is being used in the https://github.com/edubart/minicoro project, a cross platform coroutine library for C. I hope to improve support there for standalone WASM with Asyncify by sorting out this issue.