Skip to content

Asyncify runtime fails when enabling -Oz optimization #4484

@edubart

Description

@edubart

I was reading @kripken slides and blog post about asyncify and I gave a shot on trying to implement a minimal coroutine system in pure C for WASM using it, to work in standalone WASM. However while it has worked fine when I compiled with -O0 or -O1, it fails when enabling optimizations -O2 or -Oz.

Here is the minimal test case with comments that I could came up, it's really just a minimal coroutine system based on asyncify:

#include <stdint.h>
#include <stdio.h>
#include <assert.h>

/* Import asyncify */
typedef struct _asyncify_stack_region {
  void* start;
  void* limit;
} _asyncify_stack_region;
__attribute__((import_module("asyncify"), import_name("start_unwind"))) void _asyncify_start_unwind(_asyncify_stack_region*);
__attribute__((import_module("asyncify"), import_name("stop_unwind"))) void _asyncify_stop_unwind();
__attribute__((import_module("asyncify"), import_name("start_rewind"))) void _asyncify_start_rewind(_asyncify_stack_region*);
__attribute__((import_module("asyncify"), import_name("stop_rewind"))) void _asyncify_stop_rewind();

/* Coroutine state. */
typedef enum mco_state {
  MCO_DEAD = 0,
  MCO_RUNNING,
  MCO_SUSPENDED
} mco_state;

/* Coroutine structure. */
typedef struct mco_coro {
  mco_state state;
  _asyncify_stack_region stack_region;
  int rewind_id; /* Current point to rewind to when resuming. */
  void (*entry)(struct mco_coro* co); /* Entry point of the coroutine. */
  uint8_t stack[8192]; /* Buffer for saving coroutine stack locals and call stack. */
} mco_coro;

/* Coroutine structure. */
static __attribute__((noinline)) void _mco_finish_jumpout(mco_coro* co, volatile int rewind_id) {
  int next_rewind_id = co->rewind_id + 1;
  if(rewind_id == next_rewind_id) { /* Begins unwinding the stack (save locals and call stack to rewind later) */
    co->rewind_id = next_rewind_id;
    _asyncify_start_unwind(&co->stack_region);
  } else if(rewind_id == co->rewind_id) { /* Continue from yield point. */
    _asyncify_stop_rewind();
  } else {
    /* Let it continue rewinding... */
  }
}

/* Called when jumping out of a coroutine. */
static __attribute__((noinline)) void _mco_jumpout(mco_coro* co) {
  /*
  Save rewind point into a local, that should be restored when rewinding.
  That is "rewind_id != co->rewind_id + 1" may be true when rewinding.
  Use volatile here just to be safe from compiler optimizing this out.
  */
  volatile int rewind_id = co->rewind_id + 1;
  _mco_finish_jumpout(co, rewind_id);
}

/* Run coroutine entry.*/
static __attribute__((noinline)) void _mco_run_entry(mco_coro* co) {
  co->entry(co);
  co->state = MCO_DEAD; /* Coroutine finished, it should now be dead. */
  _mco_jumpout(co); /* Jump out anyway, because `_mco_jumpin` will always call `_asyncify_stop_unwind`. */
}

/* Called when jumping into a coroutine. */
static __attribute__((noinline)) void _mco_jumpin(mco_coro* co) {
  if(co->rewind_id > 0) {  /* Begin rewinding until last yield point. */
    _asyncify_start_rewind(&co->stack_region);
  }
  _mco_run_entry(co); /* Execute the coroutine entry. */
  _asyncify_stop_unwind(); /* Stop saving coroutine stack. */
}

/* Resume coroutine. */
static void mco_resume(mco_coro* co) {
  co->state = MCO_RUNNING;
  _mco_jumpin(co);
}

/* Suspend coroutine. */
static void mco_yield(mco_coro* co) {
  co->state = MCO_SUSPENDED;
  _mco_jumpout(co);
}

/* Initialize coroutine. */
static void mco_init(mco_coro* co, void (*entry)(mco_coro* co)) {
  co->state = MCO_SUSPENDED;
  co->rewind_id = 0;
  co->entry = entry;
  co->stack_region.start = (void*)co->stack;
  co->stack_region.limit = (void*)((size_t)co->stack_region.start + 8192);
}

// Coroutine test entry function.
void coro_entry(mco_coro* co) {
  printf("coroutine started\n");
  for(int i=0;i<10;++i) {
    mco_yield(co);
    printf("coroutine %d\n", i);
  }
  printf("coroutine finished\n");
}

mco_coro co;

int main() {
  // Call `mco_create` with the output coroutine pointer and `desc` pointer.
  mco_init(&co, coro_entry);
  // The coroutine should be now in suspended state.
  assert(co.state == MCO_SUSPENDED);
  // Call `mco_resume` to start for the first time, switching to its context.
  mco_resume(&co); // Should print "coroutine started".
  // We get back from coroutine context in suspended state (because it's unfinished).
  assert(co.state == MCO_SUSPENDED);
  // Call `mco_resume` 10 more times.
  for(int i=0;i<10;++i) {
      mco_resume(&co); // Should print "coroutine X".
  }
  // The coroutine finished and should be now dead.
  assert(co.state == MCO_DEAD);
  return 0;
}

Sorry for sharing a test case this big, it's the most I could minimize the issue, but it should not be hard to understand what is going on and it is commented.

When I compile with

emcc t.c -o t.wasm -s ERROR_ON_UNDEFINED_SYMBOLS=0 -O0
wasm-opt --asyncify t.wasm -o t.wasm
wasmtime t.wasm

Things work fine and I get the expected output:

coroutine started
coroutine 0
coroutine 1
coroutine 2
coroutine 3
coroutine 4
coroutine 5
coroutine 6
coroutine 7
coroutine 8
coroutine 9
coroutine finished

However when I enable optimizations with -Oz it breaks:

emcc t.c -o t.wasm -s ERROR_ON_UNDEFINED_SYMBOLS=0 -Oz
wasm-opt --asyncify t.wasm -o t.wasm
wasmtime t.wasm
coroutine started
Assertion failed: co.state == MCO_SUSPENDED (t.c: main: 112)

Seems like enabling -Oz the unwinding does not work expected in the first yield point, leaving the coroutine in an invalid state.
What what I am doing wrong here, or is this a Binaryen bug? I've tried to use noinline in the asyncify's runtime place, this made things work with -O0, however it fails with -Oz.

Environment:

$ emcc -v
emcc (Emscripten gcc/clang-like replacement + linker emulating GNU ld) 3.1.1-git (1934a98e709b57d3592b8272d3f1264a72c089e4)
clang version 14.0.0 (/srcdest/llvm-project 50fb44eebb0397f9b5f45a44239d6b53faf07c3b)
Target: wasm32-unknown-emscripten
Thread model: posix
InstalledDir: /opt/emscripten-llvm/bin

$ wasm-opt --version
wasm-opt version 104 (version_104-23-g1ef8f1f2c)

Side notes:
The shared test case is a minimal version of where is being used in the https://github.com/edubart/minicoro project, a cross platform coroutine library for C. I hope to improve support there for standalone WASM with Asyncify by sorting out this issue.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions