Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 85 additions & 3 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ static LONG atomic_fetch_add(atomic_int* ptr, LONG inc) {
static LONG atomic_fetch_sub(atomic_int* ptr, LONG dec) {
return atomic_fetch_add(ptr, -(dec));
}
static LONG atomic_compare_exchange_strong(atomic_int* ptr, atomic_int* expected, LONG desired) {
return InterlockedCompareExchange(ptr, expected, desired);
}

typedef HANDLE pthread_t;

Expand All @@ -72,6 +75,14 @@ static int sched_yield (void) {
Sleep (0);
return 0;
}

int nanosleep(const struct timespec *req, struct timespec *rem) {
// hack, there is a better impl here https://github.com/jart/cosmopolitan/blob/3f0bcdc3ef00c2c79b1be85b1fa207d508509ac7/libc/calls/nanosleep.c#L39
int64_t ms = (req->tv_sec * 1000) + (req->tv_nsec / 1000000) + 1;
Sleep(ms);
return 0;
}

#else
#include <pthread.h>
#include <stdatomic.h>
Expand Down Expand Up @@ -9085,6 +9096,77 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg
// I tried using spin locks, but not sure how to use them correctly - the things I tried were slower than busy loops
//

void ggml_naked_short_sleep(long ms) {
// https://github.com/AdoptOpenJDK/openjdk-jdk11/blob/master/src/hotspot/os/bsd/os_bsd.cpp#L2322
// makes sense to platformize
struct timespec req;

//assert(ms < 1000, "Un-interruptable sleep, short time use only");
req.tv_sec = 0;
if (ms > 0) {
req.tv_nsec = (ms % 1000) * 1000000;
} else {
req.tv_nsec = 1;
}

nanosleep(&req, NULL);

return;
}

void ggml_naked_yield(void) {
// https://github.com/AdoptOpenJDK/openjdk-jdk11/blob/master/src/hotspot/os/bsd/os_bsd.cpp#L2350
// make sense to platformize
sched_yield();
}

int ggml_spin_pause(void) {
//
return 0;
}

void ggml_spin_acquire(atomic_int * addr) {
// source https://github.com/AdoptOpenJDK/openjdk-jdk11/blob/19fb8f93c59dfd791f62d41f332db9e306bc1422/src/hotspot/share/runtime/thread.cpp#L4711

// Atomic::cmpxchg
// Performs atomic compare of *dest and compare_value, and exchanges
// *dest with exchange_value if the comparison succeeded. Returns prior
// value of *dest. cmpxchg*() provide:
// <fence> compare-and-exchange <membar StoreLoad|StoreStore>
// cmpxchg(T exchange_value, D volatile* dest, U compare_value, atomic_memory_order order = memory_order_conservative);
//
int expected = 0;
if (atomic_compare_exchange_strong(addr, &expected, 1)) { // if (Atomic::cmpxchg (1, adr, 0) == 0) {
return; // normal fast-path return
}

// Slow-path : We've encountered contention -- Spin/Yield/Block strategy.
int ctr = 0;
int Yields = 0;
for (;;) {
while (*addr != 0) {
++ctr;
if ((ctr & 0xFFF) == 0 /*|| !os::is_MP() */) {
if (Yields > 5) {
ggml_naked_short_sleep(1);
} else {
ggml_naked_yield();
++Yields;
}
} else {
ggml_spin_pause();
}
}
expected = 0;
if (atomic_compare_exchange_strong(addr, &expected, 1)) return;
}
}

void ggml_spin_release(atomic_int * addr) {
atomic_thread_fence(memory_order_seq_cst);
atomic_store(addr, 0);
}

#ifdef __APPLE__

//#include <os/lock.h>
Expand All @@ -9098,12 +9180,12 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg
//
//#define GGML_LOCK_INITIALIZER OS_UNFAIR_LOCK_INIT

typedef int ggml_lock_t;
typedef atomic_int ggml_lock_t;

#define ggml_lock_init(x) UNUSED(x)
#define ggml_lock_destroy(x) UNUSED(x)
#define ggml_lock_lock(x) UNUSED(x)
#define ggml_lock_unlock(x) UNUSED(x)
#define ggml_lock_lock(x) ggml_spin_acquire(x)
#define ggml_lock_unlock(x) ggml_spin_release(x)

#define GGML_LOCK_INITIALIZER 0

Expand Down