From a5b164946ccc9dec037d4e0a1cd2f2202b1c918a Mon Sep 17 00:00:00 2001 From: Matti Picus Date: Mon, 5 Oct 2020 22:13:25 +0300 Subject: [PATCH 1/2] add fninit to reset fpu registers before assembler routines --- kernel/x86_64/amax.S | 2 ++ kernel/x86_64/asum.S | 3 ++- kernel/x86_64/dot.S | 1 + kernel/x86_64/iamax.S | 1 + kernel/x86_64/izamax.S | 1 + kernel/x86_64/nrm2.S | 1 + kernel/x86_64/qconjg.S | 1 + kernel/x86_64/qdot.S | 2 ++ kernel/x86_64/qgemm_kernel_2x2.S | 2 ++ kernel/x86_64/qgemv_n.S | 2 ++ kernel/x86_64/qgemv_t.S | 1 + kernel/x86_64/qtrsm_kernel_LN_2x2.S | 2 ++ kernel/x86_64/qtrsm_kernel_LT_2x2.S | 2 ++ kernel/x86_64/qtrsm_kernel_RT_2x2.S | 3 +++ kernel/x86_64/sum.S | 2 ++ kernel/x86_64/xdot.S | 3 +++ kernel/x86_64/xgemm3m_kernel_2x2.S | 2 ++ kernel/x86_64/xgemm_kernel_1x1.S | 2 ++ kernel/x86_64/xgemv_n.S | 2 ++ kernel/x86_64/xgemv_t.S | 2 ++ kernel/x86_64/xtrsm_kernel_LT_1x1.S | 2 ++ kernel/x86_64/zamax.S | 2 ++ kernel/x86_64/zasum.S | 2 ++ kernel/x86_64/zdot.S | 2 ++ kernel/x86_64/znrm2.S | 2 ++ kernel/x86_64/zscal.S | 2 ++ kernel/x86_64/zsum.S | 2 ++ 27 files changed, 50 insertions(+), 1 deletion(-) diff --git a/kernel/x86_64/amax.S b/kernel/x86_64/amax.S index 0e9bf4db44..257147dfb8 100644 --- a/kernel/x86_64/amax.S +++ b/kernel/x86_64/amax.S @@ -54,6 +54,8 @@ PROLOGUE PROFCODE + + fninit salq $BASE_SHIFT, INCX diff --git a/kernel/x86_64/asum.S b/kernel/x86_64/asum.S index 31f973894b..24f57dd111 100644 --- a/kernel/x86_64/asum.S +++ b/kernel/x86_64/asum.S @@ -49,7 +49,8 @@ PROLOGUE PROFCODE - + + fninit fldz testq M, M jle .L999 diff --git a/kernel/x86_64/dot.S b/kernel/x86_64/dot.S index e63d9cd893..2319885f19 100644 --- a/kernel/x86_64/dot.S +++ b/kernel/x86_64/dot.S @@ -49,6 +49,7 @@ PROLOGUE PROFCODE + fninit salq $BASE_SHIFT, INCX salq $BASE_SHIFT, INCY diff --git a/kernel/x86_64/iamax.S b/kernel/x86_64/iamax.S index 79e1bae1d0..0c666d623b 100644 --- a/kernel/x86_64/iamax.S +++ b/kernel/x86_64/iamax.S @@ -59,6 +59,7 @@ PROLOGUE PROFCODE + fninit salq $BASE_SHIFT, INCX diff --git a/kernel/x86_64/izamax.S b/kernel/x86_64/izamax.S index c066acd624..e450c2cd23 100644 --- a/kernel/x86_64/izamax.S +++ b/kernel/x86_64/izamax.S @@ -59,6 +59,7 @@ PROLOGUE PROFCODE + fninit salq $ZBASE_SHIFT, INCX diff --git a/kernel/x86_64/nrm2.S b/kernel/x86_64/nrm2.S index e9be1262ac..548e3b7447 100644 --- a/kernel/x86_64/nrm2.S +++ b/kernel/x86_64/nrm2.S @@ -50,6 +50,7 @@ PROLOGUE PROFCODE + fninit fldz testq M, M jle .L999 diff --git a/kernel/x86_64/qconjg.S b/kernel/x86_64/qconjg.S index 49ca766491..bab5418311 100644 --- a/kernel/x86_64/qconjg.S +++ b/kernel/x86_64/qconjg.S @@ -41,6 +41,7 @@ PROLOGUE PROFCODE + fninit fldz FLD 1 * SIZE(ARG1) diff --git a/kernel/x86_64/qdot.S b/kernel/x86_64/qdot.S index a48a04fdd0..e7d31360b0 100644 --- a/kernel/x86_64/qdot.S +++ b/kernel/x86_64/qdot.S @@ -58,6 +58,8 @@ PROLOGUE + fninit + pushl %edi pushl %esi pushl %ebx diff --git a/kernel/x86_64/qgemm_kernel_2x2.S b/kernel/x86_64/qgemm_kernel_2x2.S index 99db3961fa..7b5e7707d5 100644 --- a/kernel/x86_64/qgemm_kernel_2x2.S +++ b/kernel/x86_64/qgemm_kernel_2x2.S @@ -74,6 +74,8 @@ PROLOGUE PROFCODE + fninit + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/qgemv_n.S b/kernel/x86_64/qgemv_n.S index 630d03ffb1..1b65b03f0e 100644 --- a/kernel/x86_64/qgemv_n.S +++ b/kernel/x86_64/qgemv_n.S @@ -76,6 +76,8 @@ PROLOGUE PROFCODE + fninit + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/qgemv_t.S b/kernel/x86_64/qgemv_t.S index d7c9cd2a59..00188c2578 100644 --- a/kernel/x86_64/qgemv_t.S +++ b/kernel/x86_64/qgemv_t.S @@ -75,6 +75,7 @@ PROLOGUE PROFCODE + fninit subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/qtrsm_kernel_LN_2x2.S b/kernel/x86_64/qtrsm_kernel_LN_2x2.S index 536042e65e..030eff8934 100644 --- a/kernel/x86_64/qtrsm_kernel_LN_2x2.S +++ b/kernel/x86_64/qtrsm_kernel_LN_2x2.S @@ -74,6 +74,8 @@ PROLOGUE PROFCODE + fninit + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/qtrsm_kernel_LT_2x2.S b/kernel/x86_64/qtrsm_kernel_LT_2x2.S index 6e94976c5b..d86972c72c 100644 --- a/kernel/x86_64/qtrsm_kernel_LT_2x2.S +++ b/kernel/x86_64/qtrsm_kernel_LT_2x2.S @@ -74,6 +74,8 @@ PROLOGUE PROFCODE + fninit + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/qtrsm_kernel_RT_2x2.S b/kernel/x86_64/qtrsm_kernel_RT_2x2.S index caa7de14a6..2826a62c93 100644 --- a/kernel/x86_64/qtrsm_kernel_RT_2x2.S +++ b/kernel/x86_64/qtrsm_kernel_RT_2x2.S @@ -74,6 +74,9 @@ PROLOGUE PROFCODE + fninit + + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/sum.S b/kernel/x86_64/sum.S index d075eaa042..3d5fa7cc29 100644 --- a/kernel/x86_64/sum.S +++ b/kernel/x86_64/sum.S @@ -50,6 +50,8 @@ PROLOGUE PROFCODE + fninit + fldz testq M, M jle .L999 diff --git a/kernel/x86_64/xdot.S b/kernel/x86_64/xdot.S index ea97164b24..ec89b799c2 100644 --- a/kernel/x86_64/xdot.S +++ b/kernel/x86_64/xdot.S @@ -59,6 +59,9 @@ PROFCODE + fninit + + #define N %ebx #define X %esi #define INCX %ecx diff --git a/kernel/x86_64/xgemm3m_kernel_2x2.S b/kernel/x86_64/xgemm3m_kernel_2x2.S index 843fc243aa..e8da78d82a 100644 --- a/kernel/x86_64/xgemm3m_kernel_2x2.S +++ b/kernel/x86_64/xgemm3m_kernel_2x2.S @@ -78,6 +78,8 @@ PROLOGUE PROFCODE + fninit + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/xgemm_kernel_1x1.S b/kernel/x86_64/xgemm_kernel_1x1.S index e0cd1f1dfa..f04ab07f59 100644 --- a/kernel/x86_64/xgemm_kernel_1x1.S +++ b/kernel/x86_64/xgemm_kernel_1x1.S @@ -97,6 +97,8 @@ PROLOGUE PROFCODE + fninit + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/xgemv_n.S b/kernel/x86_64/xgemv_n.S index cbde6402dc..7d28c118ac 100644 --- a/kernel/x86_64/xgemv_n.S +++ b/kernel/x86_64/xgemv_n.S @@ -76,6 +76,8 @@ PROLOGUE PROFCODE + fninit + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/xgemv_t.S b/kernel/x86_64/xgemv_t.S index 31320f6514..e796760883 100644 --- a/kernel/x86_64/xgemv_t.S +++ b/kernel/x86_64/xgemv_t.S @@ -75,6 +75,8 @@ PROLOGUE PROFCODE + fninit + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/xtrsm_kernel_LT_1x1.S b/kernel/x86_64/xtrsm_kernel_LT_1x1.S index a61a240fdb..54d41932f8 100644 --- a/kernel/x86_64/xtrsm_kernel_LT_1x1.S +++ b/kernel/x86_64/xtrsm_kernel_LT_1x1.S @@ -90,6 +90,8 @@ PROLOGUE PROFCODE + fninit + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/zamax.S b/kernel/x86_64/zamax.S index 74e127e6c0..bfd836193d 100644 --- a/kernel/x86_64/zamax.S +++ b/kernel/x86_64/zamax.S @@ -55,6 +55,8 @@ PROLOGUE PROFCODE + fninit + salq $ZBASE_SHIFT, INCX fldz diff --git a/kernel/x86_64/zasum.S b/kernel/x86_64/zasum.S index c372fc5dd1..9ea2aadc05 100644 --- a/kernel/x86_64/zasum.S +++ b/kernel/x86_64/zasum.S @@ -50,6 +50,8 @@ PROLOGUE PROFCODE + fninit + fldz testq M, M jle .L999 diff --git a/kernel/x86_64/zdot.S b/kernel/x86_64/zdot.S index 94d1008ff1..f7df919b7c 100644 --- a/kernel/x86_64/zdot.S +++ b/kernel/x86_64/zdot.S @@ -54,6 +54,8 @@ PROLOGUE PROFCODE + fninit + #ifdef WINDOWS_ABI movq 40(%rsp), INCY #endif diff --git a/kernel/x86_64/znrm2.S b/kernel/x86_64/znrm2.S index 4115eab1db..cb02a5a9fe 100644 --- a/kernel/x86_64/znrm2.S +++ b/kernel/x86_64/znrm2.S @@ -50,6 +50,8 @@ PROLOGUE PROFCODE + fninit + fldz testq M, M jle .L999 diff --git a/kernel/x86_64/zscal.S b/kernel/x86_64/zscal.S index 5282e0f725..08c0831a44 100644 --- a/kernel/x86_64/zscal.S +++ b/kernel/x86_64/zscal.S @@ -50,6 +50,8 @@ PROLOGUE PROFCODE + fninit + salq $ZBASE_SHIFT, INCX FLD 8(%rsp) diff --git a/kernel/x86_64/zsum.S b/kernel/x86_64/zsum.S index 45e0ddff55..1c39048396 100644 --- a/kernel/x86_64/zsum.S +++ b/kernel/x86_64/zsum.S @@ -50,6 +50,8 @@ PROLOGUE PROFCODE + fninit + fldz testq M, M jle .L999 From 403eb513a0616020e7238b531bad739f6baef43a Mon Sep 17 00:00:00 2001 From: Matti Picus Date: Mon, 12 Oct 2020 18:15:01 +0300 Subject: [PATCH 2/2] use emms instead, add WIN guards --- kernel/x86_64/amax.S | 4 +++- kernel/x86_64/asum.S | 5 ++++- kernel/x86_64/dot.S | 5 ++++- kernel/x86_64/iamax.S | 5 ++++- kernel/x86_64/izamax.S | 5 ++++- kernel/x86_64/nrm2.S | 5 ++++- kernel/x86_64/qconjg.S | 5 ++++- kernel/x86_64/qdot.S | 4 +++- kernel/x86_64/qgemm_kernel_2x2.S | 4 +++- kernel/x86_64/qgemv_n.S | 4 +++- kernel/x86_64/qgemv_t.S | 5 ++++- kernel/x86_64/qtrsm_kernel_LN_2x2.S | 4 +++- kernel/x86_64/qtrsm_kernel_LT_2x2.S | 4 +++- kernel/x86_64/qtrsm_kernel_RT_2x2.S | 5 +++-- kernel/x86_64/sum.S | 4 +++- kernel/x86_64/xdot.S | 4 +++- kernel/x86_64/xgemm3m_kernel_2x2.S | 4 +++- kernel/x86_64/xgemm_kernel_1x1.S | 4 +++- kernel/x86_64/xgemv_n.S | 4 +++- kernel/x86_64/xgemv_t.S | 4 +++- kernel/x86_64/xtrsm_kernel_LT_1x1.S | 4 +++- kernel/x86_64/zamax.S | 4 +++- kernel/x86_64/zasum.S | 4 +++- kernel/x86_64/zdot.S | 4 ++-- kernel/x86_64/znrm2.S | 4 +++- kernel/x86_64/zscal.S | 4 +++- kernel/x86_64/zsum.S | 4 +++- 27 files changed, 87 insertions(+), 29 deletions(-) diff --git a/kernel/x86_64/amax.S b/kernel/x86_64/amax.S index 257147dfb8..1498bb226c 100644 --- a/kernel/x86_64/amax.S +++ b/kernel/x86_64/amax.S @@ -55,7 +55,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif salq $BASE_SHIFT, INCX diff --git a/kernel/x86_64/asum.S b/kernel/x86_64/asum.S index 24f57dd111..a2cbfd4804 100644 --- a/kernel/x86_64/asum.S +++ b/kernel/x86_64/asum.S @@ -50,7 +50,10 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif + fldz testq M, M jle .L999 diff --git a/kernel/x86_64/dot.S b/kernel/x86_64/dot.S index 2319885f19..a11d25e5d1 100644 --- a/kernel/x86_64/dot.S +++ b/kernel/x86_64/dot.S @@ -49,7 +49,10 @@ PROLOGUE PROFCODE - fninit + +#ifdef WINDOWS_ABI + emms +#endif salq $BASE_SHIFT, INCX salq $BASE_SHIFT, INCY diff --git a/kernel/x86_64/iamax.S b/kernel/x86_64/iamax.S index 0c666d623b..00999e25f2 100644 --- a/kernel/x86_64/iamax.S +++ b/kernel/x86_64/iamax.S @@ -59,7 +59,10 @@ PROLOGUE PROFCODE - fninit + +#ifdef WINDOWS_ABI + emms +#endif salq $BASE_SHIFT, INCX diff --git a/kernel/x86_64/izamax.S b/kernel/x86_64/izamax.S index e450c2cd23..b24b2e6925 100644 --- a/kernel/x86_64/izamax.S +++ b/kernel/x86_64/izamax.S @@ -59,7 +59,10 @@ PROLOGUE PROFCODE - fninit + +#ifdef WINDOWS_ABI + emms +#endif salq $ZBASE_SHIFT, INCX diff --git a/kernel/x86_64/nrm2.S b/kernel/x86_64/nrm2.S index 548e3b7447..b79ac2adb0 100644 --- a/kernel/x86_64/nrm2.S +++ b/kernel/x86_64/nrm2.S @@ -50,7 +50,10 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif + fldz testq M, M jle .L999 diff --git a/kernel/x86_64/qconjg.S b/kernel/x86_64/qconjg.S index bab5418311..823a15a845 100644 --- a/kernel/x86_64/qconjg.S +++ b/kernel/x86_64/qconjg.S @@ -41,7 +41,10 @@ PROLOGUE PROFCODE - fninit + +#ifdef WINDOWS_ABI + emms +#endif fldz FLD 1 * SIZE(ARG1) diff --git a/kernel/x86_64/qdot.S b/kernel/x86_64/qdot.S index e7d31360b0..2243b6b6d8 100644 --- a/kernel/x86_64/qdot.S +++ b/kernel/x86_64/qdot.S @@ -58,7 +58,9 @@ PROLOGUE - fninit +#ifdef WINDOWS_ABI + emms +#endif pushl %edi pushl %esi diff --git a/kernel/x86_64/qgemm_kernel_2x2.S b/kernel/x86_64/qgemm_kernel_2x2.S index 7b5e7707d5..c11f3a91d6 100644 --- a/kernel/x86_64/qgemm_kernel_2x2.S +++ b/kernel/x86_64/qgemm_kernel_2x2.S @@ -74,7 +74,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/qgemv_n.S b/kernel/x86_64/qgemv_n.S index 1b65b03f0e..c9d345cb18 100644 --- a/kernel/x86_64/qgemv_n.S +++ b/kernel/x86_64/qgemv_n.S @@ -76,7 +76,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/qgemv_t.S b/kernel/x86_64/qgemv_t.S index 00188c2578..32372ff15d 100644 --- a/kernel/x86_64/qgemv_t.S +++ b/kernel/x86_64/qgemv_t.S @@ -75,7 +75,10 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/qtrsm_kernel_LN_2x2.S b/kernel/x86_64/qtrsm_kernel_LN_2x2.S index 030eff8934..0a545faf87 100644 --- a/kernel/x86_64/qtrsm_kernel_LN_2x2.S +++ b/kernel/x86_64/qtrsm_kernel_LN_2x2.S @@ -74,7 +74,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/qtrsm_kernel_LT_2x2.S b/kernel/x86_64/qtrsm_kernel_LT_2x2.S index d86972c72c..16063fbcdd 100644 --- a/kernel/x86_64/qtrsm_kernel_LT_2x2.S +++ b/kernel/x86_64/qtrsm_kernel_LT_2x2.S @@ -74,7 +74,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/qtrsm_kernel_RT_2x2.S b/kernel/x86_64/qtrsm_kernel_RT_2x2.S index 2826a62c93..4c94ac02cf 100644 --- a/kernel/x86_64/qtrsm_kernel_RT_2x2.S +++ b/kernel/x86_64/qtrsm_kernel_RT_2x2.S @@ -74,8 +74,9 @@ PROLOGUE PROFCODE - fninit - +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/sum.S b/kernel/x86_64/sum.S index 3d5fa7cc29..9f2cdc1ecd 100644 --- a/kernel/x86_64/sum.S +++ b/kernel/x86_64/sum.S @@ -50,7 +50,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif fldz testq M, M diff --git a/kernel/x86_64/xdot.S b/kernel/x86_64/xdot.S index ec89b799c2..c4b4734947 100644 --- a/kernel/x86_64/xdot.S +++ b/kernel/x86_64/xdot.S @@ -59,7 +59,9 @@ PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif #define N %ebx diff --git a/kernel/x86_64/xgemm3m_kernel_2x2.S b/kernel/x86_64/xgemm3m_kernel_2x2.S index e8da78d82a..1d0b23c402 100644 --- a/kernel/x86_64/xgemm3m_kernel_2x2.S +++ b/kernel/x86_64/xgemm3m_kernel_2x2.S @@ -78,7 +78,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/xgemm_kernel_1x1.S b/kernel/x86_64/xgemm_kernel_1x1.S index f04ab07f59..ee67d8d430 100644 --- a/kernel/x86_64/xgemm_kernel_1x1.S +++ b/kernel/x86_64/xgemm_kernel_1x1.S @@ -97,7 +97,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/xgemv_n.S b/kernel/x86_64/xgemv_n.S index 7d28c118ac..b66f28d586 100644 --- a/kernel/x86_64/xgemv_n.S +++ b/kernel/x86_64/xgemv_n.S @@ -76,7 +76,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/xgemv_t.S b/kernel/x86_64/xgemv_t.S index e796760883..d6d37010d1 100644 --- a/kernel/x86_64/xgemv_t.S +++ b/kernel/x86_64/xgemv_t.S @@ -75,7 +75,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/xtrsm_kernel_LT_1x1.S b/kernel/x86_64/xtrsm_kernel_LT_1x1.S index 54d41932f8..875206363f 100644 --- a/kernel/x86_64/xtrsm_kernel_LT_1x1.S +++ b/kernel/x86_64/xtrsm_kernel_LT_1x1.S @@ -90,7 +90,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/zamax.S b/kernel/x86_64/zamax.S index bfd836193d..5cb2f60198 100644 --- a/kernel/x86_64/zamax.S +++ b/kernel/x86_64/zamax.S @@ -55,7 +55,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif salq $ZBASE_SHIFT, INCX diff --git a/kernel/x86_64/zasum.S b/kernel/x86_64/zasum.S index 9ea2aadc05..3460fcea30 100644 --- a/kernel/x86_64/zasum.S +++ b/kernel/x86_64/zasum.S @@ -50,7 +50,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif fldz testq M, M diff --git a/kernel/x86_64/zdot.S b/kernel/x86_64/zdot.S index f7df919b7c..87c08d7c80 100644 --- a/kernel/x86_64/zdot.S +++ b/kernel/x86_64/zdot.S @@ -54,9 +54,9 @@ PROLOGUE PROFCODE - fninit - #ifdef WINDOWS_ABI + emms + movq 40(%rsp), INCY #endif diff --git a/kernel/x86_64/znrm2.S b/kernel/x86_64/znrm2.S index cb02a5a9fe..0d2aa3480b 100644 --- a/kernel/x86_64/znrm2.S +++ b/kernel/x86_64/znrm2.S @@ -50,7 +50,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif fldz testq M, M diff --git a/kernel/x86_64/zscal.S b/kernel/x86_64/zscal.S index 08c0831a44..5ed4c4576b 100644 --- a/kernel/x86_64/zscal.S +++ b/kernel/x86_64/zscal.S @@ -50,7 +50,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif salq $ZBASE_SHIFT, INCX diff --git a/kernel/x86_64/zsum.S b/kernel/x86_64/zsum.S index 1c39048396..aa02637e47 100644 --- a/kernel/x86_64/zsum.S +++ b/kernel/x86_64/zsum.S @@ -50,7 +50,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif fldz testq M, M