bytecodealliance · iximeow · Apr 13, 2020 · Apr 11, 2020 · iximeow · Apr 13, 2020
diff --git a/cranelift/codegen/src/isa/x86/abi.rs b/cranelift/codegen/src/isa/x86/abi.rs
@@ -620,12 +620,8 @@ fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> C
         // The calling convention described in
         // https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention only requires
         // preserving the low 128 bits of XMM6-XMM15.
-        //
-        // TODO: For now, add just an `F64` rather than `F64X2` because `F64X2` would require
-        // encoding a fstDisp8 with REX bits set, and we currently can't encode that. F64 causes a
-        // whole XMM register to be preserved anyway.
         let csr_arg =
-            ir::AbiParam::special_reg(types::F64, ir::ArgumentPurpose::CalleeSaved, fp_csr);
+            ir::AbiParam::special_reg(types::F64X2, ir::ArgumentPurpose::CalleeSaved, fp_csr);
         func.signature.params.push(csr_arg);
         func.signature.returns.push(csr_arg);
     }
@@ -910,7 +906,7 @@ fn insert_common_prologue(
 
         for reg in csrs.iter(FPR) {
             // Append param to entry Block
-            let csr_arg = pos.func.dfg.append_block_param(block, types::F64);
+            let csr_arg = pos.func.dfg.append_block_param(block, types::F64X2);
 
             // Since regalloc has already run, we must assign a location.
             pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
@@ -1035,9 +1031,12 @@ fn insert_common_epilogue(
         let mut fpr_offset = 0;
 
         for reg in csrs.iter(FPR) {
-            let value = pos
-                .ins()
-                .load(types::F64, ir::MemFlags::trusted(), stack_addr, fpr_offset);
+            let value = pos.ins().load(
+                types::F64X2,
+                ir::MemFlags::trusted(),
+                stack_addr,
+                fpr_offset,
+            );
             fpr_offset += types::F64X2.bytes() as i32;
 
             if let Some(ref mut cfa_state) = cfa_state.as_mut() {

diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif
@@ -40,19 +40,19 @@ block0(v0: f64, v1: f64, v2: f64, v3: f64):
 [-, %xmm7]  v5 = fadd v0, v1
     return
 }
-; check: function %float_callee_sav(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp], f64 csr [%xmm6], f64 csr [%xmm7]) -> i64 fp [%rbp], f64 csr [%xmm6], f64 csr [%xmm7] windows_fastcall {
+; check: function %float_callee_sav(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7]) -> i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7] windows_fastcall {
 ; nextln:                         ss0 = explicit_slot 32, offset -80
 ; nextln:                         ss1 = incoming_arg 16, offset -48
-; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v6: i64 [%rbp], v8: f64 [%xmm6], v9: f64 [%xmm7]):
+; check: block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v6: i64 [%rbp], v8: f64x2 [%xmm6], v9: f64x2 [%xmm7]):
 ; nextln:                         x86_push v6
 ; nextln:                         copy_special %rsp -> %rbp
 ; nextln:                         adjust_sp_down_imm 64
 ; nextln:                         v7 = stack_addr.i64 ss0
 ; nextln:                         store notrap aligned v8, v7
 ; nextln:                         store notrap aligned v9, v7+16
 ; check:                          v10 = stack_addr.i64 ss0
-; nextln:                         v11 = load.f64 notrap aligned v10
-; nextln:                         v12 = load.f64 notrap aligned v10+16
+; nextln:                         v11 = load.f64x2 notrap aligned v10
+; nextln:                         v12 = load.f64x2 notrap aligned v10+16
 ; nextln:                         adjust_sp_up_imm 64
 ; nextln:                         v13 = x86_pop.i64
 ; nextln:                         v13, v11, v12

diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif
@@ -153,13 +153,13 @@ block0(v0: i64, v1: i64):
 ; sameln: UnwindInfo {
 ; nextln:     version: 1,
 ; nextln:     flags: 0,
-; nextln:     prologue_size: 26,
+; nextln:     prologue_size: 25,
 ; nextln:     unwind_code_count_raw: 7,
 ; nextln:     frame_register: 5,
 ; nextln:     frame_register_offset: 12,
 ; nextln:     unwind_codes: [
 ; nextln:       UnwindCode {
-; nextln:         offset: 26,
+; nextln:         offset: 25,
 ; nextln:         op: SaveXmm128,
 ; nextln:         info: 15,
 ; nextln:         value: U16(
@@ -217,29 +217,29 @@ block0(v0: i64, v1: i64):
 ; sameln: UnwindInfo {
 ; nextln:     version: 1,
 ; nextln:     flags: 0,
-; nextln:     prologue_size: 44,
+; nextln:     prologue_size: 41,
 ; nextln:     unwind_code_count_raw: 16,
 ; nextln:     frame_register: 5,
 ; nextln:     frame_register_offset: 10,
 ; nextln:     unwind_codes: [
 ; nextln:         UnwindCode {
-; nextln:             offset: 44,
+; nextln:             offset: 41,
 ; nextln:             op: SaveXmm128,
 ; nextln:             info: 8,
 ; nextln:             value: U16(
 ; nextln:                 2,
 ; nextln:             ),
 ; nextln:           },
 ; nextln:         UnwindCode {
-; nextln:             offset: 38,
+; nextln:             offset: 36,
 ; nextln:             op: SaveXmm128,
 ; nextln:             info: 7,
 ; nextln:             value: U16(
 ; nextln:                 1,
 ; nextln:             ),
 ; nextln:         },
 ; nextln:         UnwindCode {
-; nextln:             offset: 32,
+; nextln:             offset: 31,
 ; nextln:             op: SaveXmm128,
 ; nextln:             info: 6,
 ; nextln:             value: U16(