Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
305 changes: 151 additions & 154 deletions std/assembly/util/memory.ts
Original file line number Diff line number Diff line change
@@ -1,150 +1,143 @@
export function memcpy(dest: usize, src: usize, n: usize): void { // see: musl/src/string/memcpy.c
if (ASC_SHRINK_LEVEL > 1) {
while (n) {
store<u8>(dest++, load<u8>(src++));
--n;
}
} else {
let w: u32, x: u32;
var w: u32, x: u32;

// copy 1 byte each until src is aligned to 4 bytes
while (n && (src & 3)) {
// copy 1 byte each until src is aligned to 4 bytes
while (n && (src & 3)) {
store<u8>(dest++, load<u8>(src++));
n--;
}

// if dst is aligned to 4 bytes as well, copy 4 bytes each
if ((dest & 3) == 0) {
while (n >= 16) {
store<u32>(dest , load<u32>(src ));
store<u32>(dest + 4, load<u32>(src + 4));
store<u32>(dest + 8, load<u32>(src + 8));
store<u32>(dest + 12, load<u32>(src + 12));
src += 16; dest += 16; n -= 16;
}
if (n & 8) {
store<u32>(dest , load<u32>(src ));
store<u32>(dest + 4, load<u32>(src + 4));
dest += 8; src += 8;
}
if (n & 4) {
store<u32>(dest, load<u32>(src));
dest += 4; src += 4;
}
if (n & 2) { // drop to 2 bytes each
store<u16>(dest, load<u16>(src));
dest += 2; src += 2;
}
if (n & 1) { // drop to 1 byte
store<u8>(dest++, load<u8>(src++));
n--;
}
return;
}

// if dst is aligned to 4 bytes as well, copy 4 bytes each
if ((dest & 3) == 0) {
while (n >= 16) {
store<u32>(dest , load<u32>(src ));
store<u32>(dest + 4, load<u32>(src + 4));
store<u32>(dest + 8, load<u32>(src + 8));
store<u32>(dest + 12, load<u32>(src + 12));
src += 16; dest += 16; n -= 16;
}
if (n & 8) {
store<u32>(dest , load<u32>(src ));
store<u32>(dest + 4, load<u32>(src + 4));
dest += 8; src += 8;
}
if (n & 4) {
store<u32>(dest, load<u32>(src));
dest += 4; src += 4;
}
if (n & 2) { // drop to 2 bytes each
store<u16>(dest, load<u16>(src));
dest += 2; src += 2;
}
if (n & 1) { // drop to 1 byte
// if dst is not aligned to 4 bytes, use alternating shifts to copy 4 bytes each
// doing shifts if faster when copying enough bytes (here: 32 or more)
if (n >= 32) {
switch (dest & 3) {
// known to be != 0
case 1: {
w = load<u32>(src);
store<u8>(dest++, load<u8>(src++));
}
return;
}

// if dst is not aligned to 4 bytes, use alternating shifts to copy 4 bytes each
// doing shifts if faster when copying enough bytes (here: 32 or more)
if (n >= 32) {
switch (dest & 3) {
// known to be != 0
case 1: {
w = load<u32>(src);
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
n -= 3;
while (n >= 17) {
x = load<u32>(src + 1);
store<u32>(dest, w >> 24 | x << 8);
w = load<u32>(src + 5);
store<u32>(dest + 4, x >> 24 | w << 8);
x = load<u32>(src + 9);
store<u32>(dest + 8, w >> 24 | x << 8);
w = load<u32>(src + 13);
store<u32>(dest + 12, x >> 24 | w << 8);
src += 16; dest += 16; n -= 16;
}
break;
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
n -= 3;
while (n >= 17) {
x = load<u32>(src + 1);
store<u32>(dest, w >> 24 | x << 8);
w = load<u32>(src + 5);
store<u32>(dest + 4, x >> 24 | w << 8);
x = load<u32>(src + 9);
store<u32>(dest + 8, w >> 24 | x << 8);
w = load<u32>(src + 13);
store<u32>(dest + 12, x >> 24 | w << 8);
src += 16; dest += 16; n -= 16;
}
case 2: {
w = load<u32>(src);
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
n -= 2;
while (n >= 18) {
x = load<u32>(src + 2);
store<u32>(dest, w >> 16 | x << 16);
w = load<u32>(src + 6);
store<u32>(dest + 4, x >> 16 | w << 16);
x = load<u32>(src + 10);
store<u32>(dest + 8, w >> 16 | x << 16);
w = load<u32>(src + 14);
store<u32>(dest + 12, x >> 16 | w << 16);
src += 16; dest += 16; n -= 16;
}
break;
break;
}
case 2: {
w = load<u32>(src);
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
n -= 2;
while (n >= 18) {
x = load<u32>(src + 2);
store<u32>(dest, w >> 16 | x << 16);
w = load<u32>(src + 6);
store<u32>(dest + 4, x >> 16 | w << 16);
x = load<u32>(src + 10);
store<u32>(dest + 8, w >> 16 | x << 16);
w = load<u32>(src + 14);
store<u32>(dest + 12, x >> 16 | w << 16);
src += 16; dest += 16; n -= 16;
}
case 3: {
w = load<u32>(src);
store<u8>(dest++, load<u8>(src++));
n -= 1;
while (n >= 19) {
x = load<u32>(src + 3);
store<u32>(dest, w >> 8 | x << 24);
w = load<u32>(src + 7);
store<u32>(dest + 4, x >> 8 | w << 24);
x = load<u32>(src + 11);
store<u32>(dest + 8, w >> 8 | x << 24);
w = load<u32>(src + 15);
store<u32>(dest + 12, x >> 8 | w << 24);
src += 16; dest += 16; n -= 16;
}
break;
break;
}
case 3: {
w = load<u32>(src);
store<u8>(dest++, load<u8>(src++));
n -= 1;
while (n >= 19) {
x = load<u32>(src + 3);
store<u32>(dest, w >> 8 | x << 24);
w = load<u32>(src + 7);
store<u32>(dest + 4, x >> 8 | w << 24);
x = load<u32>(src + 11);
store<u32>(dest + 8, w >> 8 | x << 24);
w = load<u32>(src + 15);
store<u32>(dest + 12, x >> 8 | w << 24);
src += 16; dest += 16; n -= 16;
}
break;
}
}
}

// copy remaining bytes one by one
if (n & 16) {
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
}
if (n & 8) {
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
}
if (n & 4) {
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
}
if (n & 2) {
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
}
if (n & 1) {
store<u8>(dest++, load<u8>(src++));
}
// copy remaining bytes one by one
if (n & 16) {
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
}
if (n & 8) {
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
}
if (n & 4) {
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
}
if (n & 2) {
store<u8>(dest++, load<u8>(src++));
store<u8>(dest++, load<u8>(src++));
}
if (n & 1) {
store<u8>(dest++, load<u8>(src++));
}
}

Expand All @@ -159,32 +152,36 @@ export function memmove(dest: usize, src: usize, n: usize): void { // see: musl/
}
}
if (dest < src) {
if ((src & 7) == (dest & 7)) {
while (dest & 7) {
if (!n) return;
--n;
store<u8>(dest++, load<u8>(src++));
}
while (n >= 8) {
store<u64>(dest, load<u64>(src));
n -= 8;
dest += 8;
src += 8;
if (ASC_SHRINK_LEVEL < 2) {
if ((src & 7) == (dest & 7)) {
while (dest & 7) {
if (!n) return;
--n;
store<u8>(dest++, load<u8>(src++));
}
while (n >= 8) {
store<u64>(dest, load<u64>(src));
n -= 8;
dest += 8;
src += 8;
}
}
}
while (n) {
store<u8>(dest++, load<u8>(src++));
--n;
}
} else {
if ((src & 7) == (dest & 7)) {
while ((dest + n) & 7) {
if (!n) return;
store<u8>(dest + --n, load<u8>(src + n));
}
while (n >= 8) {
n -= 8;
store<u64>(dest + n, load<u64>(src + n));
if (ASC_SHRINK_LEVEL < 2) {
if ((src & 7) == (dest & 7)) {
while ((dest + n) & 7) {
if (!n) return;
store<u8>(dest + --n, load<u8>(src + n));
}
while (n >= 8) {
n -= 8;
store<u64>(dest + n, load<u64>(src + n));
}
}
}
while (n) {
Expand Down