From 8dcc633d8082f92cc8fb7ea04fbe5da804467a80 Mon Sep 17 00:00:00 2001 From: saitama951 Date: Wed, 18 Jun 2025 06:03:17 +0000 Subject: [PATCH 1/4] [mono][2/2] Add SIMD Support for s390x This is a followup patch to #116669 to add vector support to s390x --- src/mono/mono/arch/s390x/s390x-codegen.h | 285 +++- src/mono/mono/mini/cpu-s390x.mdesc | 168 +++ src/mono/mono/mini/mini-ops.h | 122 ++ src/mono/mono/mini/mini-runtime.c | 2 +- src/mono/mono/mini/mini-s390x.c | 1547 +++++++++++++--------- src/mono/mono/mini/mini-s390x.h | 15 + src/mono/mono/mini/simd-intrinsics.c | 204 ++- 7 files changed, 1646 insertions(+), 697 deletions(-) diff --git a/src/mono/mono/arch/s390x/s390x-codegen.h b/src/mono/mono/arch/s390x/s390x-codegen.h index e5679d9c18087e..e0c944e3173243 100644 --- a/src/mono/mono/arch/s390x/s390x-codegen.h +++ b/src/mono/mono/arch/s390x/s390x-codegen.h @@ -139,38 +139,38 @@ typedef enum { } S390SpecialRegister; typedef enum { - s390_VR0 = 0, - s390_VR1 = 1, - s390_VR2 = 2, - s390_VR3 = 3, - s390_VR4 = 4, - s390_VR5 = 5, - s390_VR6 = 6, - s390_VR7 = 7, - s390_VR8 = 8, - s390_VR9 = 9, - s390_VR10 = 10, - s390_VR11 = 11, - s390_VR12 = 12, - s390_VR13 = 13, - s390_VR14 = 14, - s390_VR15 = 15, - s390_VR16 = 16, - s390_VR17 = 17, - s390_VR18 = 18, - s390_VR19 = 19, - s390_VR20 = 20, - s390_VR21 = 21, - s390_VR22 = 22, - s390_VR23 = 23, - s390_VR24 = 24, - s390_VR25 = 25, - s390_VR26 = 26, - s390_VR27 = 27, - s390_VR28 = 28, - s390_VR29 = 29, - s390_VR30 = 30, - s390_VR31 = 31, + s390_vr0 = 0, + s390_vr1 = 1, + s390_vr2 = 2, + s390_vr3 = 3, + s390_vr4 = 4, + s390_vr5 = 5, + s390_vr6 = 6, + s390_vr7 = 7, + s390_vr8 = 8, + s390_vr9 = 9, + s390_vr10 = 10, + s390_vr11 = 11, + s390_vr12 = 12, + s390_vr13 = 13, + s390_vr14 = 14, + s390_vr15 = 15, + s390_vr16 = 16, + s390_vr17 = 17, + s390_vr18 = 18, + s390_vr19 = 19, + s390_vr20 = 20, + s390_vr21 = 21, + s390_vr22 = 22, + s390_vr23 = 23, + s390_vr24 = 24, + s390_vr25 = 25, + s390_vr26 = 26, + s390_vr27 = 27, + s390_vr28 = 28, + s390_vr29 = 29, + s390_vr30 = 30, + s390_vr31 = 31, s390_VR_NREG = 32, } s390_VR_Reg_No; @@ -946,7 +946,7 @@ typedef struct { #define S390_SIY(c,opc,s1,p1,m2) do \ { \ s390_emit16(c, ((opc & 0xff00) | m2)); \ - s390_emit32(c, ((s1) << 24 | (((p2) & 0xfffff) << 8) | \ + s390_emit32(c, ((s1) << 28 | (((p1) & 0xfffff) << 8) | \ (opc & 0xff))); \ } while (0) @@ -1005,7 +1005,7 @@ typedef struct { #define S390_VRIa(c,opc,v1,i2,m3) do \ { \ - char rxb = (((v1) > 15) << 7); \ + char rxb = (((v1) > 15) << 3); \ int vr1 = ((v1) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4)); \ s390_emit16(c, (i2)); \ @@ -1014,7 +1014,7 @@ typedef struct { #define S390_VRIb(c,opc,v1,i2,i3,m4) do \ { \ - char rxb = (((v1) > 15) << 7); \ + char rxb = (((v1) > 15) << 3); \ int vr1 = ((v1) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4)); \ s390_emit16(c, (((i2) << 8) | (i3))); \ @@ -1023,18 +1023,17 @@ typedef struct { #define S390_VRIc(c,opc,v1,v3,i2,m4) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6) | \ - (((v3) > 15) << 5); \ + char rxb = (((v1) > 15) << 3) | (((v3) > 15) << 2); \ int vr1 = ((v1) % 16), vr3 = ((v3) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | (vr3)); \ - s390_emit16(c, (v4)); \ + s390_emit16(c, (i2)); \ s390_emit16(c, (((m4) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ } while (0) #define S390_VRId(c,opc,v1,v2,v3,i4,m5) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6) | \ - (((v3) > 15) << 5); \ + char rxb = (((v1) > 15) << 3) | (((v2) > 15) << 2) | \ + (((v3) > 15) << 1); \ int vr1 = ((v1) % 16), vr2 = ((v2) % 16), vr3 = ((v3) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | (vr2)); \ s390_emit16(c, (vr3 << 12) | (i2)); \ @@ -1043,7 +1042,7 @@ typedef struct { #define S390_VRIe(c,opc,v1,v2,i3,m4,m5) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6); \ + char rxb = (((v1) > 15) << 3) | (((v2) > 15) << 2); \ int vr1 = ((v1) % 16), vr2 = ((v2) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | (vr2)); \ s390_emit16(c, ((i2) << 8) | (m5)); \ @@ -1052,7 +1051,7 @@ typedef struct { #define S390_VRRa(c,opc,v1,v2,m3,m4,m5) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6); \ + char rxb = (((v1) > 15) << 3) | (((v2) > 15) << 2); \ int vr1 = ((v1) % 16), vr2 = ((v2) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | (vr2)); \ s390_emit16(c, ((m5) << 4) | (m4)); \ @@ -1061,27 +1060,27 @@ typedef struct { #define S390_VRRb(c,opc,v1,v2,v3,m4,m5) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6) | \ - (((v3) > 15) << 5); \ + char rxb = (((v1) > 15) << 3) | (((v2) > 15) << 2) | \ + (((v3) > 15) << 1); \ int vr1 = ((v1) % 16), vr2 = ((v2) % 16), vr3 = ((v3) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | (vr2)); \ - s390_emit16(c, (vr3 << 12) | ((m5) << 4) | (m4)); \ - s390_emit16(c, (((m3) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ + s390_emit16(c, (vr3 << 12) | ((m5) << 4)); \ + s390_emit16(c, (((m4) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ } while (0) -#define S390_VRRc(c,opc,v1,v2,m3,m4,m5) do \ +#define S390_VRRc(c,opc,v1,v2,v3,m4,m5,m6) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6) | \ - (((v3) > 15) << 5); \ + char rxb = (((v1) > 15) << 3) | (((v2) > 15) << 2) | \ + (((v3) > 15) << 1); \ int vr1 = ((v1) % 16), vr2 = ((v2) % 16), vr3 = ((v3) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | (vr2)); \ - s390_emit16(c, ((vr3 << 12)| (m5) << 4)); \ + s390_emit16(c, ((vr3 << 12)| ((m6) << 4) | (m5))); \ s390_emit16(c, (((m4) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ } while (0) #define S390_VRRd(c,opc,v1,v2,v3,v4,m5,m6) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6) | \ + char rxb = (((v1) > 15) << 3) | (((v2) > 15) << 2) | \ (((v3) > 15) << 5) | (((v4) > 15) << 4); \ int vr1 = ((v1) % 16), vr2 = ((v2) % 16), \ vr3 = ((v3) % 16); vr4 = ((v4) % 16); \ @@ -1090,19 +1089,20 @@ typedef struct { s390_emit16(c, ((vr4 << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ } while (0) -#define S390_VRRe(c,opc,v1,v2,v3,m4,m5,m6) do \ +#define S390_VRRe(c,opc,v1,v2,v3,v4,m5,m6) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6) | \ - (((v3) > 15) << 5); \ - int vr1 = ((v1) % 16), vr2 = ((v2) % 16), vr3 = ((v3) % 16); \ - s390_emit16(c, ((opc) & 0xff00) | ((v1) << 4) | ((v2))); \ - s390_emit16(c, (((v3) << 12)| ((m6) << 8)) | (m5)); \ - s390_emit16(c, (((m4) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ + char rxb = (((v1) > 15) << 3) | (((v2) > 15) << 2) | \ + (((v3) > 15) << 1) | ((v4) > 15); \ + int vr1 = ((v1) % 16), vr2 = ((v2) % 16), vr3 = ((v3) % 16), \ + vr4 = ((v4) % 16); \ + s390_emit16(c, ((opc) & 0xff00) | ((vr1) << 4) | ((vr2))); \ + s390_emit16(c, (((vr3) << 12)| ((m6) << 8)) | (m5)); \ + s390_emit16(c, (((vr4) << 12) | ((rxb) << 8) | ((opc) & 0xff)));\ } while (0) #define S390_VRRf(c,opc,v1,r2) do \ { \ - char rxb = (((v1) > 15) << 7); \ + char rxb = (((v1) > 15) << 3); \ s390_emit16(c, ((opc) & 0xff00) | ((v1) << 4) | ((v2))); \ s390_emit16(c, ((r2) << 12)| ((r3) << r8) | (m5)); \ s390_emit16(c, (((rxb) << 8) | ((opc) & 0xff))); \ @@ -1110,7 +1110,7 @@ typedef struct { #define S390_VRSa(c,opc,v1,v3,b2,d2,m4) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v3) > 15) << 6); \ + char rxb = (((v1) > 15) << 3) | (((v3) > 15) << 2); \ int vr1 = ((v1) % 16), vr3 = ((v3) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | (vr3)); \ s390_emit16(c, ((b2) << 12)| (d2)); \ @@ -1119,7 +1119,7 @@ typedef struct { #define S390_VRSb(c,opc,v1,r3,b2,d2,m4) do \ { \ - char rxb = (((v1) > 15) << 7); \ + char rxb = (((v1) > 15) << 3); \ int vr1 = (v1) % 16; \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | ((r3))); \ s390_emit16(c, ((b2) << 12)| (d2)); \ @@ -1128,7 +1128,7 @@ typedef struct { #define S390_VRSc(c,opc,r1,v3,b2,d2,m4) do \ { \ - char rxb = (((v1) > 15) << 7); \ + char rxb = (((v3) > 15) << 3); \ int vr3 = (v3) % 16; \ s390_emit16(c, ((opc) & 0xff00) | ((r1) << 4) | (vr3)); \ s390_emit16(c, ((b2) << 12)| (d2)); \ @@ -1137,7 +1137,7 @@ typedef struct { #define S390_VRV(c,opc,v1,v2,b2,d2,m3) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6); \ + char rxb = (((v1) > 15) << 3) | (((v2) > 15) << 2); \ int vr1 = ((v1) % 16), vr2 = ((v3) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | (vr2)); \ s390_emit16(c, ((b2) << 12)| (d2)); \ @@ -1146,7 +1146,7 @@ typedef struct { #define S390_VRX(c,opc,v1,x2,b2,d2,m3) do \ { \ - char rxb = ((v1) > 15) << 7; \ + char rxb = ((v1) > 15) << 3; \ int vr1 = (v1) % 16; \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | ((x2))); \ s390_emit16(c, ((b2) << 12)| (d2)); \ @@ -1167,7 +1167,7 @@ typedef struct { #define s390_aghik(c, r1, r3, v) S390_RIE_1(c, 0xecd9, r1, r3, v) #define s390_agr(c, r1, r2) S390_RRE(c, 0xb908, r1, r2) #define s390_agrk(c, r1, r2, r3) S390_RRF_1(c, 0xb9e8, r1, r2, r3) -#define s390_agsi(c, r, v) S390_SIY(c, 0xeb7a, r v) +#define s390_agsi(c, r, v, i) S390_SIY(c, 0xeb7a, r, v, i) #define s390_ahhhr(c, r1, r2, r3) S390_RRF_1(c, 0xb9c8, r1, r2, r3) #define s390_ahhlr(c, r1, r2, r3) S390_RRF_1(c, 0xb9d8, r1, r2, r3) #define s390_ahi(c, r, v) S390_RI(c, 0xa7a, r, v) @@ -1411,6 +1411,7 @@ typedef struct { #define s390_lochi(c, r1, i, m) S390_RIE_1(c, 0xec42, r1, m, i) #define s390_locghi(c, r1, i, m) S390_RIE_1(c, 0xec46, r1, m, i) #define s390_locghile(c, r1, i) s390_locghi(c, r1, i, S390_CC_LT|S390_CC_EQ) +#define s390_locghinle(c, r1, i) s390_locghi(c, r1, i, 3) #define s390_locghihe(c, r1, i) s390_locghi(c, r1, i, S390_CC_GT|S390_CC_EQ) #define s390_locghine(c, r1, i) s390_locghi(c, r1, i, S390_CC_NZ) #define s390_locghiho(c, r1, i) s390_locghi(c, r1, i, S390_CC_GT|S390_CC_OV) @@ -1421,6 +1422,7 @@ typedef struct { #define s390_locr(c, r1, m, r2) S390_RRF_2(c, 0xb9f2, r1, m, r2) #define s390_locgr(c, r1, m, r2) S390_RRF_2(c, 0xb9e2, r1, m, r2) #define s390_locgrle(c, r1, r2) s390_locgr(c, r1, S390_CC_LT|S390_CC_EQ, r2) +#define s390_locgrnle(c, r1, r2) s390_locgr(c, r1, 3, r2) #define s390_locgrhe(c, r1, r2) s390_locgr(c, r1, S390_CC_GT|S390_CC_EQ, r2) #define s390_locgrne(c, r1, r2) s390_locgr(c, r1, S390_CC_NZ, r2) #define s390_locgrho(c, r1, r2) s390_locgr(c, r1, S390_CC_GT|S390_CC_OV, r2) @@ -1569,14 +1571,151 @@ typedef struct { #define s390_tmlh(c, r, m) S390_RI(c, 0xa70, r, m) #define s390_tmll(c, r, m) S390_RI(c, 0xa71, r, m) #define s390_tm(c, b, d, v) S390_SI(c, 0x91, b, d, v) +#define s390_trap2(code) S390_E(code, 0x01ff) +#define s390_vab(c , v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 0, 0, 0) +#define s390_vah(c , v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 1, 0, 0) +#define s390_vaf(c , v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 2, 0, 0) +#define s390_vag(c , v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 3, 0, 0) +#define s390_vaq(c , v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 4, 0, 0) +#define s390_vceqbs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 0, 1 ) +#define s390_vceqhs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 1, 1 ) +#define s390_vceqfs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 2, 1 ) +#define s390_vceqgs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 3, 1 ) +#define s390_vchbs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 0, 1) +#define s390_vchhs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 1, 1) +#define s390_vchfs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 2, 1) +#define s390_vchgs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 3, 1) +#define s390_vchlbs(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 0, 1) +#define s390_vchlhs(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 1, 1) +#define s390_vchlfs(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 2, 1) +#define s390_vchlgs(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 3, 1) +#define s390_vecb(c, v1, v2) S390_VRRa(c, 0xe7db, v1, v2, 0, 0, 0) +#define s390_vech(c, v1, v2) S390_VRRa(c, 0xe7db, v1, v2, 1, 0, 0) +#define s390_vecf(c, v1, v2) S390_VRRa(c, 0xe7db, v1, v2, 2, 0, 0) +#define s390_vecg(c, v1, v2) S390_VRRa(c, 0xe7db, v1, v2, 3, 0, 0) +#define s390_veclb(c, v1, v2) S390_VRRa(c, 0xe7d9, v1, v2, 0, 0, 0) +#define s390_veclh(c, v1, v2) S390_VRRa(c, 0xe7d9, v1, v2, 1, 0, 0) +#define s390_veclf(c, v1, v2) S390_VRRa(c, 0xe7d9, v1, v2, 2, 0, 0) +#define s390_veclg(c, v1, v2) S390_VRRa(c, 0xe7d9, v1, v2, 3, 0, 0) +#define s390_vfasb(c, v1, v2, v3) S390_VRRc(c, 0xe7e3, v1, v2, v3, 2, 0, 0) +#define s390_vfadb(c, v1, v2, v3) S390_VRRc(c, 0xe7e3, v1, v2, v3, 3, 0, 0) +#define s390_vfcesbs(c, v1, v2, v3) S390_VRRc(c, 0xe7e8, v1, v2, v3, 2, 0, 1) +#define s390_vfcedbs(c, v1, v2, v3) S390_VRRc(c, 0xe7e8, v1, v2, v3, 3, 0, 1) +#define s390_vfchsbs(c, v1, v2, v3) S390_VRRc(c, 0xe7eb, v1, v2, v3, 2, 0, 1) +#define s390_vfchdbs(c, v1, v2, v3) S390_VRRc(c, 0xe7eb, v1, v2, v3, 3, 0, 1) +#define s390_vfdsb(c, v1, v2, v3) S390_VRRc(c, 0xe7e5, v1, v2, v3, 2, 0, 0) +#define s390_vfddb(c, v1, v2, v3) S390_VRRc(c, 0xe7e5, v1, v2, v3, 3, 0, 0) +#define s390_vfisb(c, v1, v2, m4, m5) S390_VRRa(c, 0xe7c7, v1, v2, 2, m4, m5) +#define s390_vfidb(c, v1, v2, m4, m5) S390_VRRa(c, 0xe7c7, v1, v2, 3, m4, m5) +#define s390_vfmsb(c, v1, v2, v3) S390_VRRc(c, 0xe7e7, v1, v2, v3, 2, 0, 0) +#define s390_vfmdb(c, v1, v2, v3) S390_VRRc(c, 0xe7e7, v1, v2, v3, 3, 0, 0) +#define s390_vfmaxsb(c, v1, v2, v3, m6) S390_VRRc(c, 0xe7ef, v1, v2, v3, 2, 0, m6) +#define s390_vfmaxdb(c, v1, v2, v3, m6) S390_VRRc(c, 0xe7ef, v1, v2, v3, 3, 0, m6) +#define s390_vfminsb(c, v1, v2, v3, m6) S390_VRRc(c, 0xe7ee, v1, v2, v3, 2, 0, m6) +#define s390_vfmindb(c, v1, v2, v3, m6) S390_VRRc(c, 0xe7ee, v1, v2, v3, 3, 0, m6) +#define s390_vflcdb(c, v1, v2) S390_VRRa(c, 0xe7cc, v1, v2, 3, 0, 0) +#define s390_vflcsb(c, v1, v2) S390_VRRa(c, 0xe7cc, v1, v2, 2, 0, 0) +#define s390_vfpsosb(c, v1, v2, m5) S390_VRRa(c, 0xe7cc, v1, v2, 2, 0, m5) +#define s390_vfpsodb(c, v1, v2, m5) S390_VRRa(c, 0xe7cc, v1, v2, 3, 0, m5) +#define s390_vfssb(c, v1, v2, v3) S390_VRRc(c, 0xe7e2, v1, v2, v3, 2, 0, 0) +#define s390_vfsdb(c, v1, v2, v3) S390_VRRc(c, 0xe7e2, v1, v2, v3, 3, 0, 0) +#define s390_vfsqsb(c, v1, v2) S390_VRRa(c, 0xe7ce, v1, v2, 2, 0, 0) +#define s390_vfsqdb(c, v1, v2) S390_VRRa(c, 0xe7ce, v1, v2, 3, 0, 0) +#define s390_vgbm(c, v, i) S390_VRIa(c, 0xe744, v, i, 0) +#define s390_vgmb(c, v1, i2, i3) S390_VRIb(c, 0xe746, v1, i2, i3, 0) +#define s390_vgmh(c, v1, i2, i3) S390_VRIb(c, 0xe746, v1, i2, i3, 1) +#define s390_vgmf(c, v1, i2, i3) S390_VRIb(c, 0xe746, v1, i2, i3, 2) +#define s390_vgmg(c, v1, i2, i3) S390_VRIb(c, 0xe746, v1, i2, i3, 3) +#define s390_vmlb(c, v1, v2, v3) S390_VRRc(c, 0xe7a2, v1, v2, v3, 0, 0, 0) +#define s390_vmlhw(c, v1, v2, v3) S390_VRRc(c, 0xe7a2, v1, v2, v3, 1, 0, 0) +#define s390_vmlf(c, v1, v2, v3) S390_VRRc(c, 0xe7a2, v1, v2, v3, 2, 0, 0) +#define s390_vmnb(c, v1, v2, v3) S390_VRRc(c, 0xe7fe, v1, v2, v3, 0, 0, 0) +#define s390_vmnh(c, v1, v2, v3) S390_VRRc(c, 0xe7fe, v1, v2, v3, 1, 0, 0) +#define s390_vmnf(c, v1, v2, v3) S390_VRRc(c, 0xe7fe, v1, v2, v3, 2, 0, 0) +#define s390_vmng(c, v1, v2, v3) S390_VRRc(c, 0xe7fe, v1, v2, v3, 3, 0, 0) +#define s390_vmnlb(c, v1, v2, v3) S390_VRRc(c, 0xe7fc, v1, v2, v3, 0, 0, 0) +#define s390_vmnlh(c, v1, v2, v3) S390_VRRc(c, 0xe7fc, v1, v2, v3, 1, 0, 0) +#define s390_vmnlf(c, v1, v2, v3) S390_VRRc(c, 0xe7fc, v1, v2, v3, 2, 0, 0) +#define s390_vmnlg(c, v1, v2, v3) S390_VRRc(c, 0xe7fc, v1, v2, v3, 3, 0, 0) +#define s390_vmxb(c, v1, v2, v3) S390_VRRc(c, 0xe7ff, v1, v2, v3, 0, 0, 0) +#define s390_vmxh(c, v1, v2, v3) S390_VRRc(c, 0xe7ff, v1, v2, v3, 1, 0, 0) +#define s390_vmxf(c, v1, v2, v3) S390_VRRc(c, 0xe7ff, v1, v2, v3, 2, 0, 0) +#define s390_vmxg(c, v1, v2, v3) S390_VRRc(c, 0xe7ff, v1, v2, v3, 3, 0, 0) +#define s390_vmxlb(c, v1, v2, v3) S390_VRRc(c, 0xe7fd, v1, v2, v3, 0, 0, 0) +#define s390_vmxlh(c, v1, v2, v3) S390_VRRc(c, 0xe7fd, v1, v2, v3, 1, 0, 0) +#define s390_vmxlf(c, v1, v2, v3) S390_VRRc(c, 0xe7fd, v1, v2, v3, 2, 0, 0) +#define s390_vmxlg(c, v1, v2, v3) S390_VRRc(c, 0xe7fd, v1, v2, v3, 3, 0, 0) +#define s390_vn(c, v1, v2, v3) S390_VRRc(c, 0xe768, v1, v2, v3, 0, 0, 0) +#define s390_vnn(c, v1, v2, v3) S390_VRRc(c, 0xe76e, v1, v2, v3, 0, 0, 0) +#define s390_vno(c, v1, v2, v3) S390_VRRc(c, 0xe76b, v1, v2, v3, 0, 0, 0) +#define s390_vo(c, v1, v2, v3) S390_VRRc(c, 0xe76a, v1, v2, v3, 0, 0, 0) +#define s390_vl(c, v, d, x, b) S390_VRX(c, 0xe706, v, x, b, d, 0) +#define s390_vlcb(c, v1, v2) S390_VRRa(c, 0xe7de, v1, v2, 0, 0, 0) +#define s390_vlch(c, v1, v2) S390_VRRa(c, 0xe7de, v1, v2, 1, 0, 0) +#define s390_vlcf(c, v1, v2) S390_VRRa(c, 0xe7de, v1, v2, 2, 0, 0) +#define s390_vlcg(c, v1, v2) S390_VRRa(c, 0xe7de, v1, v2, 3, 0, 0) +#define s390_vleg(c, v, d, x, b, m) S390_VRX(c, 0xe702, v, x, b, d, m) +#define s390_vleib(c, v1, i2, m3) S390_VRIa(c, 0xe740, v1, i2, m3) +#define s390_vleih(c, v1, i2, m3) S390_VRIa(c, 0xe741, v1, i2, m3) +#define s390_vleif(c, v1, i2, m3) S390_VRIa(c, 0xe743, v1, i2, m3) +#define s390_vleig(c, v1, i2, m3) S390_VRIa(c, 0xe742, v1, i2, m3) +#define s390_vlgvb(c, r1, v2, d3, b4) S390_VRSc(c, 0xe721, r1, v2, d3, b4, 0) +#define s390_vlgvh(c, r1, v2, d3, b4) S390_VRSc(c, 0xe721, r1, v2, d3, b4, 1) +#define s390_vlgvf(c, r1, v2, d3, b4) S390_VRSc(c, 0xe721, r1, v2, d3, b4, 2) +#define s390_vlgvg(c, r1, v2, d3, b4) S390_VRSc(c, 0xe721, r1, v2, d3, b4, 3) #define s390_vlm(c, v1, v2, b, d, m) S390_VRSa(c, 0xe736, v1, v2, b, d, m) +#define s390_vlpb(c, v1, v2) S390_VRRa(c, 0xe7df, v1, v2, 0, 0, 0) +#define s390_vlph(c, v1, v2) S390_VRRa(c, 0xe7df, v1, v2, 1, 0, 0) +#define s390_vlpf(c, v1, v2) S390_VRRa(c, 0xe7df, v1, v2, 2, 0, 0) +#define s390_vlpg(c, v1, v2) S390_VRRa(c, 0xe7df, v1, v2, 3, 0, 0) +#define s390_vlr(c, v1, v2) S390_VRRa(c,0xe756, v1, v2, 0, 0, 0) +#define s390_vlvgb(c, v1, r2, d3, b4) S390_VRSb(c, 0xe722, v1, r2, d3, b4, 0) +#define s390_vlvgh(c, v1, r2, d3, b4) S390_VRSb(c, 0xe722, v1, r2, d3, b4, 1) +#define s390_vlvgf(c, v1, r2, d3, b4) S390_VRSb(c, 0xe722, v1, r2, d3, b4, 2) +#define s390_vlvgg(c, v1, r2, d3, b4) S390_VRSb(c, 0xe722, v1, r2, d3, b4, 3) +#define s390_vperm(c, v1, v2, v3, v4) S390_VRRe(c, 0xe78c, v1, v2, v3, v4, 0, 0); +#define s390_vpkh(c, v1, v2, v3) S390_VRRc(c, 0xe794, v1, v2, v3, 1, 0, 0) +#define s390_vpkf(c, v1, v2, v3) S390_VRRc(c, 0xe794, v1, v2, v3, 2, 0, 0) +#define s390_vpkg(c, v1, v2, v3) S390_VRRc(c, 0xe794, v1, v2, v3, 3, 0, 0) +#define s390_vrepb(c, v1, v3, i2) S390_VRIc(c, 0xe74d, v1 ,v3, i2, 0) +#define s390_vreph(c, v1, v3, i2) S390_VRIc(c, 0xe74d, v1 ,v3, i2, 1) +#define s390_vrepf(c, v1, v3, i2) S390_VRIc(c, 0xe74d, v1 ,v3, i2, 2) +#define s390_vrepg(c, v1, v3, i2) S390_VRIc(c, 0xe74d, v1 ,v3, i2, 3) +#define s390_vrepib(c, v1, i2) S390_VRIa(c, 0xe745, v1, i2, 0) +#define s390_vrepih(c, v1, i2) S390_VRIa(c, 0xe745, v1, i2, 1) +#define s390_vrepif(c, v1, i2) S390_VRIa(c, 0xe745, v1, i2, 2) +#define s390_vrepig(c, v1, i2) S390_VRIa(c, 0xe745, v1, i2, 3) +#define s390_vsb(c , v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 0, 0, 0) +#define s390_vsh(c , v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 1, 0, 0) +#define s390_vsf(c , v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 2, 0, 0) +#define s390_vsg(c , v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 3, 0, 0) +#define s390_vsq(c , v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 4, 0, 0) +#define s390_vst(c, v, d, x, b) S390_VRX(c,0xe70e, v, x, b, d, 0) +#define s390_vsteg(c, v, d, x, b, m) S390_VRX(c, 0xe70a, v, x, b, d, m) #define s390_vstm(c, v1, v2, b, d, m) S390_VRSa(c, 0xe73e, v1, v2, b, d, m) -#define s390_x(c, r, x, b, d) S390_RX(c, 0x57, r, x, b, d) -#define s390_xihf(c, r, v) S390_RIL_1(c, 0xc06, r, v) -#define s390_xilf(c, r, v) S390_RIL_1(c, 0xc07, r, v) -#define s390_xg(c, r, x, b, d) S390_RXY(c, 0xe382, r, x, b, d) -#define s390_xgr(c, r1, r2) S390_RRE(c, 0xb982, r1, r2) -#define s390_xgrk(c, r1, r2, r3) S390_RRF_1(c, 0xb9e7, r1, r2, r3) -#define s390_xr(c, r1, r2) S390_RR(c, 0x17, r1, r2) -#define s390_xy(c, r, x, b, d) S390_RXY(c, 0xe357, r, x, b, d) +#define s390_vsumb(c, v1, v2, v3) S390_VRRc(c, 0xe764, v1, v2, v3, 0, 0, 0) +#define s390_vsumh(c, v1, v2, v3) S390_VRRc(c, 0xe764, v1, v2, v3, 1, 0, 0) +#define s390_vsumqf(c, v1, v2, v3) S390_VRRc(c, 0xe767, v1, v2, v3, 2, 0, 0) +#define s390_vsumqg(c, v1, v2, v3) S390_VRRc(c, 0xe767, v1, v2, v3, 3, 0, 0) +#define s390_vuplb(c, v1, v2) S390_VRRa(c, 0xe7d6, v1, v2, 0, 0, 0) +#define s390_vuplh(c, v1, v2) S390_VRRa(c, 0xe7d6, v1, v2, 1, 0, 0) +#define s390_vuplf(c, v1, v2) S390_VRRa(c, 0xe7d6, v1, v2, 2, 0, 0) +#define s390_vupllb(c, v1, v2) S390_VRRa(c, 0xe7d4, v1, v2, 0, 0, 0) +#define s390_vupllh(c, v1, v2) S390_VRRa(c, 0xe7d4, v1, v2, 1, 0, 0) +#define s390_vupllf(c, v1, v2) S390_VRRa(c, 0xe7d4, v1, v2, 2, 0, 0) +#define s390_vuplhb(c, v1, v2) S390_VRRa(c, 0xe7d5, v1, v2, 0, 0, 0) +#define s390_vuplhh(c, v1, v2) S390_VRRa(c, 0xe7d5, v1, v2, 1, 0, 0) +#define s390_vuplhf(c, v1, v2) S390_VRRa(c, 0xe7d5, v1, v2, 2, 0, 0) +#define s390_vuphb(c, v1, v2) S390_VRRa(c, 0xe7d7, v1, v2, 0, 0, 0) +#define s390_vuphh(c, v1, v2) S390_VRRa(c, 0xe7d7, v1, v2, 1, 0, 0) +#define s390_vuphf(c, v1, v2) S390_VRRa(c, 0xe7d7, v1, v2, 2, 0, 0) +#define s390_vx(c, v1, v2, v3) S390_VRRc(c, 0xe76d, v1, v2, v3, 0, 0, 0) +#define s390_x(c, r, x, b, d) S390_RX(c, 0x57, r, x, b, d) +#define s390_xihf(c, r, v) S390_RIL_1(c, 0xc06, r, v) +#define s390_xilf(c, r, v) S390_RIL_1(c, 0xc07, r, v) +#define s390_xg(c, r, x, b, d) S390_RXY(c, 0xe382, r, x, b, d) +#define s390_xgr(c, r1, r2) S390_RRE(c, 0xb982, r1, r2) +#define s390_xgrk(c, r1, r2, r3) S390_RRF_1(c, 0xb9e7, r1, r2, r3) +#define s390_xr(c, r1, r2) S390_RR(c, 0x17, r1, r2) +#define s390_xy(c, r, x, b, d) S390_RXY(c, 0xe357, r, x, b, d) #endif diff --git a/src/mono/mono/mini/cpu-s390x.mdesc b/src/mono/mono/mini/cpu-s390x.mdesc index 8e699367ecc155..722ff959785f21 100644 --- a/src/mono/mono/mini/cpu-s390x.mdesc +++ b/src/mono/mono/mini/cpu-s390x.mdesc @@ -298,6 +298,7 @@ int_clt_un: dest:i len:12 int_cneq: dest:i len:12 int_cge: dest:i len:12 int_cle: dest:i len:12 +int_cnle: dest:i len:12 int_cge_un: dest:i len:12 int_cle_un: dest:i len:12 @@ -472,3 +473,170 @@ s390_cij: len:24 s390_cij_un: src1:i len:24 s390_cgij: len:24 s390_cgij_un: len:24 + +insert_i1: dest:x src1:x src2:i len:12 +insert_i2: dest:x src1:x src2:i len:12 +insert_i4: dest:x src1:x src2:i len:12 +insert_i8: dest:x src1:x src2:i len:12 +insert_r4: dest:x src1:x src2:f clob:1 len:12 +insert_r8: dest:x src1:x src2:f clob:1 len:12 + +xinsert_i1: dest:x src1:x src2:i src3:i len:12 +xinsert_i2: dest:x src1:x src2:i src3:i len:12 +xinsert_i4: dest:x src1:x src2:i src3:i len:12 +xinsert_i8: dest:x src1:x src2:i src3:i len:12 +xinsert_r4: dest:x src1:x src2:f src3:i clob:1 len:12 +xinsert_r8: dest:x src1:x src2:f src3:i clob:1 len:12 + +extract_i1: dest:i src1:x len:6 +extract_i2: dest:i src1:x len:6 +extract_i4: dest:i src1:x len:6 +extract_i8: dest:i src1:x len:6 +extract_r4: dest:f src1:x len:12 +extract_r8: dest:f src1:x len:12 + +xextract_i1: dest:i src1:x src2:i len:6 +xextract_i2: dest:i src1:x src2:i len:6 +xextract_i4: dest:i src1:x src2:i len:6 +xextract_i8: dest:i src1:x src2:i len:6 +xextract_r4: dest:f src1:x src2:i len:10 +xextract_r8: dest:f src1:x src2:i len:10 + +expand_i1: dest:x src1:i len:12 +expand_i2: dest:x src1:i len:14 +expand_i4: dest:x src1:i len:12 +expand_i8: dest:x src1:i len:12 +expand_r8: dest:x src1:f len:20 +expand_r4: dest:x src1:f len:20 + + +xextract: dest:i src1:x len:12 +xones: dest:x len:6 +xmove: dest:x src1:x len:6 +#xcast: dest:x src1:x len:6 +xzero: dest:x len:6 +xcompare: dest:x src1:x src2:x len:6 +xcompare_fp: dest:x src1:x src2:x len:6 +loadx_membase: dest:x src1:b len:16 +storex_membase: dest:b src1:x len:16 +vaddb: dest:x src1:x src2:x len:6 +vaddh: dest:x src1:x src2:x len:6 +vaddf: dest:x src1:x src2:x len:6 +vaddg: dest:x src1:x src2:x len:6 +vaddq: dest:x src1:x src2:x len:6 +vfadds: dest:x src1:x src2:x len:6 +vfaddd: dest:x src1:x src2:x len:6 +vsubb: dest:x src1:x src2:x len:6 +vsubh: dest:x src1:x src2:x len:6 +vsubf: dest:x src1:x src2:x len:6 +vsubg: dest:x src1:x src2:x len:6 +vsubq: dest:x src1:x src2:x len:6 +vfsubs: dest:x src1:x src2:x len:6 +vfsubd: dest:x src1:x src2:x len:6 +vxor: dest:x src1:x src2:x len:6 +vor: dest:x src1:x src2:x len:6 +vnor: dest:x src1:x src2:x len:24 +vand: dest:x src1:x src2:x len:6 +vandnot: dest:x src1:x src2:x len:12 +vnand: dest:x src1:x src2:x len:6 +vmulb: dest:x src1:x src2:x len:6 +vmulhw: dest:x src1:x src2:x len:6 +vmulf: dest:x src1:x src2:x len:6 +vfmuls: dest:x src1:x src2:x len:6 +vfmuld: dest:x src1:x src2:x len:6 +vfdivs: dest:x src1:x src2:x len:6 +vfdivd: dest:x src1:x src2:x len:6 +vmxb: dest:x src1:x src2:x len:6 +vmxh: dest:x src1:x src2:x len:6 +vmxf: dest:x src1:x src2:x len:6 +vmxg: dest:x src1:x src2:x len:6 +vmnb: dest:x src1:x src2:x len:6 +vmnh: dest:x src1:x src2:x len:6 +vmnf: dest:x src1:x src2:x len:6 +vmng: dest:x src1:x src2:x len:6 +vmxlb: dest:x src1:x src2:x len:6 +vmxlh: dest:x src1:x src2:x len:6 +vmxlf: dest:x src1:x src2:x len:6 +vmxlg: dest:x src1:x src2:x len:6 +vfmaxs: dest:x src1:x src2:x len:6 +vfmaxd: dest:x src1:x src2:x len:6 +vmnlb: dest:x src1:x src2:x len:6 +vmnlh: dest:x src1:x src2:x len:6 +vmnlf: dest:x src1:x src2:x len:6 +vmnlg: dest:x src1:x src2:x len:6 +vfmins: dest:x src1:x src2:x len:6 +vfmind: dest:x src1:x src2:x len:6 +vsumb: dest:x src1:x src2:x len:6 +vsumh: dest:x src1:x src2:x len:6 +vsumqf: dest:x src1:x src2:x len:6 +vsumqg: dest:x src1:x src2:x len:6 +vperm: dest:x src1:x src2:x len:6 +vrepib: dest:x len:6 +vrepih: dest:x len:6 +vrepif: dest:x len:6 +vrepig: dest:x len:6 +vceqbs: dest:x src1:x src2:x len:6 +vceqhs: dest:x src1:x src2:x len:6 +vceqfs: dest:x src1:x src2:x len:6 +vceqgs: dest:x src1:x src2:x len:6 +vfcesbs: dest:x src1:x src2:x len:6 +vfcedbs: dest:x src1:x src2:x len:6 +vfchsbs: dest:x src1:x src2:x len:6 +vfchdbs: dest:x src1:x src2:x len:6 +vgmb: dest:x src1:i src2:i len:6 +vgmh: dest:x src1:i src2:i len:6 +vgmf: dest:x src1:i src2:i len:6 +vgmg: dest:x src1:i src2:i len:6 +vecb: dest:x src1:x len:6 +vech: dest:x src1:x len:6 +vecf: dest:x src1:x len:6 +vecg: dest:x src1:x len:6 +veclb: dest:x src1:x len:6 +veclh: dest:x src1:x len:6 +veclf: dest:x src1:x len:6 +veclg: dest:x src1:x len:6 +vchbs: dest:x src1:x src2:x len:6 +vchhs: dest:x src1:x src2:x len:6 +vchfs: dest:x src1:x src2:x len:6 +vchgs: dest:x src1:x src2:x len:6 +#vfsqsb: src1:x src2:x len:6 clob:1 +vfsqsb: dest:x src1:x len:6 +#vfsqdb: src1:x src2:x len:6 clob:1 +vfsqdb: dest:x src1:x len:6 +vchlbs: dest:x src1:x src2:x len:6 +vchlhs: dest:x src1:x src2:x len:6 +vchlfs: dest:x src1:x src2:x len:6 +vchlgs: dest:x src1:x src2:x len:6 +vlpb: dest:x src1:x len:6 +vlph: dest:x src1:x len:6 +vlpf: dest:x src1:x len:6 +vlpg: dest:x src1:x len:6 +vflpdb: dest:x src1:x len:6 +vflpsb: dest:x src1:x len:6 +vflcdb: dest:x src1:x len:6 +vflcsb: dest:x src1:x len:6 +vpkh: dest:x src1:x src2:x len:6 +vpkf: dest:x src1:x src2:x len:6 +vpkg: dest:x src1:x src2:x len:6 +vlcb: dest:x src1:x len:6 +vlch: dest:x src1:x len:6 +vlcf: dest:x src1:x len:6 +vlcg: dest:x src1:x len:6 +vuplb: dest:x src1:x len:6 +vuplh: dest:x src1:x len:6 +vuplf: dest:x src1:x len:6 +vupllb: dest:x src1:x len:6 +vupllh: dest:x src1:x len:6 +vupllf: dest:x src1:x len:6 +vuphb: dest:x src1:x len:6 +vuphh: dest:x src1:x len:6 +vuphf: dest:x src1:x len:6 +vuplhb: dest:x src1:x len:6 +vuplhh: dest:x src1:x len:6 +vuplhf: dest:x src1:x len:6 +vfisb: dest:x src1:x len:6 +vfidb: dest:x src1:x len:6 +ceil_floor: dest:x src1:x len:6 +ones_complement: dest:x src1:x len:6 +negate: dest:x src1:x len:12 +xconst: dest:x len:18 diff --git a/src/mono/mono/mini/mini-ops.h b/src/mono/mono/mini/mini-ops.h index fe0f4bcdffb763..51e6aa4a79f5a1 100644 --- a/src/mono/mono/mini/mini-ops.h +++ b/src/mono/mono/mini/mini-ops.h @@ -1498,6 +1498,128 @@ MINI_OP(OP_S390_CIJ, "s390_cij", IREG, NONE, NONE) MINI_OP(OP_S390_CLIJ, "s390_cij_un", IREG, IREG, NONE) MINI_OP(OP_S390_CGIJ, "s390_cgij", LREG, NONE, NONE) MINI_OP(OP_S390_CLGIJ, "s390_cgij_un", LREG, NONE, NONE) +MINI_OP(OP_VADDB, "vaddb", XREG, XREG, XREG) +MINI_OP(OP_VADDH, "vaddh", XREG, XREG, XREG) +MINI_OP(OP_VADDF, "vaddf", XREG, XREG, XREG) +MINI_OP(OP_VADDG, "vaddg", XREG, XREG, XREG) +MINI_OP(OP_VADDQ, "vaddq", XREG, XREG, XREG) +MINI_OP(OP_VFADDS, "vfadds", XREG, XREG, XREG) +MINI_OP(OP_VFADDD, "vfaddd", XREG, XREG, XREG) +MINI_OP(OP_VSUBB, "vsubb", XREG, XREG, XREG) +MINI_OP(OP_VSUBH, "vsubh", XREG, XREG, XREG) +MINI_OP(OP_VSUBF, "vsubf", XREG, XREG, XREG) +MINI_OP(OP_VSUBG, "vsubg", XREG, XREG, XREG) +MINI_OP(OP_VSUBQ, "vsubq", XREG, XREG, XREG) +MINI_OP(OP_VFSUBS, "vfsubs", XREG, XREG, XREG) +MINI_OP(OP_VFSUBD, "vfsubd", XREG, XREG, XREG) +MINI_OP(OP_VMULB, "vmulb", XREG, XREG, XREG) +MINI_OP(OP_VMULHW, "vmulhw", XREG, XREG, XREG) +MINI_OP(OP_VMULF, "vmulf", XREG, XREG, XREG) +MINI_OP(OP_VFMULS, "vfmuls", XREG, XREG, XREG) +MINI_OP(OP_VFMULD, "vfmuld", XREG, XREG, XREG) +MINI_OP(OP_VFDIVS, "vfdivs", XREG, XREG, XREG) +MINI_OP(OP_VFDIVD, "vfdivd", XREG, XREG, XREG) +MINI_OP(OP_VMXB, "vmxb", XREG, XREG, XREG) +MINI_OP(OP_VMXH, "vmxh", XREG, XREG, XREG) +MINI_OP(OP_VMXF, "vmxf", XREG, XREG, XREG) +MINI_OP(OP_VMXG, "vmxg", XREG, XREG, XREG) +MINI_OP(OP_VFMAXS, "vfmaxs", XREG, XREG, XREG) +MINI_OP(OP_VFMAXD, "vfmaxd", XREG, XREG, XREG) +MINI_OP(OP_VMNB, "vmnb", XREG, XREG, XREG) +MINI_OP(OP_VMNH, "vmnh", XREG, XREG, XREG) +MINI_OP(OP_VMNF, "vmnf", XREG, XREG, XREG) +MINI_OP(OP_VMNG, "vmng", XREG, XREG, XREG) +MINI_OP(OP_VFMINS, "vfmins", XREG, XREG, XREG) +MINI_OP(OP_VFMIND, "vfmind", XREG, XREG, XREG) +MINI_OP(OP_VMXLB, "vmxlb", XREG, XREG, XREG) +MINI_OP(OP_VMXLH, "vmxlh", XREG, XREG, XREG) +MINI_OP(OP_VMXLF, "vmxlf", XREG, XREG, XREG) +MINI_OP(OP_VMXLG, "vmxlg", XREG, XREG, XREG) +MINI_OP(OP_VMNLB, "vmnlb", XREG, XREG, XREG) +MINI_OP(OP_VMNLH, "vmnlh", XREG, XREG, XREG) +MINI_OP(OP_VMNLF, "vmnlf", XREG, XREG, XREG) +MINI_OP(OP_VMNLG, "vmnlg", XREG, XREG, XREG) +MINI_OP(OP_VOR, "vor", XREG, XREG, XREG) +MINI_OP(OP_VNOR, "vnor", XREG, XREG, XREG) +MINI_OP(OP_VXOR, "vxor", XREG, XREG, XREG) +MINI_OP(OP_VAND, "vand", XREG, XREG, XREG) +MINI_OP(OP_VNAND, "vnand", XREG, XREG, XREG) +MINI_OP(OP_VECTOR_ANDN, "vandnot", XREG, XREG, XREG) +MINI_OP(OP_VSUMB, "vsumb", XREG, XREG, XREG) +MINI_OP(OP_VSUMH, "vsumh", XREG, XREG, XREG) +MINI_OP(OP_VSUMQF, "vsumqf", XREG, XREG, XREG) +MINI_OP(OP_VSUMQG, "vsumqg", XREG, XREG, XREG) +MINI_OP(OP_VPERM, "vperm", XREG, XREG, XREG) +MINI_OP(OP_VREPIB, "vrepib", XREG, NONE, NONE) +MINI_OP(OP_VREPIH, "vrepih", XREG, NONE, NONE) +MINI_OP(OP_VREPIF, "vrepif", XREG, NONE, NONE) +MINI_OP(OP_VREPIG, "vrepig", XREG, NONE, NONE) +MINI_OP(OP_VFSQSB, "vfsqsb", XREG, XREG, NONE) +MINI_OP(OP_VFSQDB, "vfsqdb", XREG, XREG, NONE) +MINI_OP(OP_VFCESBS, "vfcesbs", XREG, XREG, XREG) +MINI_OP(OP_VFCEDBS, "vfcedbs", XREG, XREG, XREG) +MINI_OP(OP_VFCHSBS, "vfchsbs", XREG, XREG, XREG) +MINI_OP(OP_VFCHDBS, "vfchdbs", XREG, XREG, XREG) +MINI_OP(OP_VCEQBS, "vceqbs", XREG, XREG, XREG) +MINI_OP(OP_VCEQHS, "vceqhs", XREG, XREG, XREG) +MINI_OP(OP_VCEQFS, "vceqfs", XREG, XREG, XREG) +MINI_OP(OP_VCEQGS, "vceqgs", XREG, XREG, XREG) +MINI_OP(OP_VGMB, "vgmb", XREG, IREG, IREG) +MINI_OP(OP_VGMH, "vgmh", XREG, IREG, IREG) +MINI_OP(OP_VGMF, "vgmf", XREG, IREG, IREG) +MINI_OP(OP_VGMG, "vgmg", XREG, IREG, IREG) +MINI_OP(OP_VECB, "vecb", XREG, XREG, NONE) +MINI_OP(OP_VECF, "vecf", XREG, XREG, NONE) +MINI_OP(OP_VECH, "vech", XREG, XREG, NONE) +MINI_OP(OP_VECG, "vecg", XREG, XREG, NONE) +MINI_OP(OP_VECLB, "veclb", XREG, XREG, NONE) +MINI_OP(OP_VECLF, "veclf", XREG, XREG, NONE) +MINI_OP(OP_VECLH, "veclh", XREG, XREG, NONE) +MINI_OP(OP_VECLG, "veclg", XREG, XREG, NONE) +MINI_OP(OP_VCHBS, "vchbs", XREG, XREG, XREG) +MINI_OP(OP_VCHHS, "vchhs", XREG, XREG, XREG) +MINI_OP(OP_VCHFS, "vchfs", XREG, XREG, XREG) +MINI_OP(OP_VCHGS, "vchgs", XREG, XREG, XREG) +MINI_OP(OP_VCHLBS, "vchlbs", XREG, XREG, XREG) +MINI_OP(OP_VCHLHS, "vchlhs", XREG, XREG, XREG) +MINI_OP(OP_VCHLFS, "vchlfs", XREG, XREG, XREG) +MINI_OP(OP_VCHLGS, "vchlgs", XREG, XREG, XREG) +MINI_OP(OP_VEC_ABS, "vecabs", XREG, XREG, NONE) +MINI_OP(OP_VEC_ONE, "vecone", XREG, NONE, NONE) +MINI_OP(OP_VLPB, "vlpb", XREG, XREG, NONE) +MINI_OP(OP_VLPH, "vlph", XREG, XREG, NONE) +MINI_OP(OP_VLPF, "vlpf", XREG, XREG, NONE) +MINI_OP(OP_VLPG, "vlpg", XREG, XREG, NONE) +MINI_OP(OP_VFLPDB, "vflpdb", XREG, XREG, NONE) +MINI_OP(OP_VFLPSB, "vflpsb", XREG, XREG, NONE) +MINI_OP(OP_VFLCDB, "vflcdb", XREG, XREG, NONE) +MINI_OP(OP_VFLCSB, "vflcsb", XREG, XREG, NONE) +MINI_OP3(OP_BSL, "bitwise_select", XREG, XREG, XREG, XREG) +MINI_OP(OP_VPKH, "vpkh", XREG, XREG, XREG) +MINI_OP(OP_VPKF, "vpkf", XREG, XREG, XREG) +MINI_OP(OP_VPKG, "vpkg", XREG, XREG, XREG) +MINI_OP(OP_VLCB, "vlcb", XREG, XREG, NONE) +MINI_OP(OP_VLCH, "vlch", XREG, XREG, NONE) +MINI_OP(OP_VLCF, "vlcf", XREG, XREG, NONE) +MINI_OP(OP_VLCG, "vlcg", XREG, XREG, NONE) +MINI_OP(OP_VUPHB, "vuphb", XREG, XREG, NONE) +MINI_OP(OP_VUPHH, "vuphh", XREG, XREG, NONE) +MINI_OP(OP_VUPHF, "vuphf", XREG, XREG, NONE) +MINI_OP(OP_VUPLB, "vuplb", XREG, XREG, NONE) +MINI_OP(OP_VUPLH, "vuplh", XREG, XREG, NONE) +MINI_OP(OP_VUPLF, "vuplf", XREG, XREG, NONE) +MINI_OP(OP_VUPLHB, "vuplhb", XREG, XREG, NONE) +MINI_OP(OP_VUPLHH, "vuplhh", XREG, XREG, NONE) +MINI_OP(OP_VUPLHF, "vuplhf", XREG, XREG, NONE) +MINI_OP(OP_VUPLLB, "vupllb", XREG, XREG, NONE) +MINI_OP(OP_VUPLLH, "vupllh", XREG, XREG, NONE) +MINI_OP(OP_VUPLLF, "vupllf", XREG, XREG, NONE) +MINI_OP(OP_VFISB, "vfidb", XREG, XREG, NONE) +MINI_OP(OP_VFIDB, "vfisb", XREG, XREG, NONE) +MINI_OP(OP_CEIL_FLOOR, "ceil_floor", XREG, XREG, NONE) +MINI_OP(OP_ICNLE, "int_cnle", IREG, NONE, NONE) +MINI_OP(OP_ONES_COMPLEMENT, "ones_complement", XREG, XREG, NONE) +MINI_OP(OP_NEGATION, "negate", XREG, XREG, NONE) #endif #if defined(TARGET_ARM64) diff --git a/src/mono/mono/mini/mini-runtime.c b/src/mono/mono/mini/mini-runtime.c index e82d55c2645163..6cbcfd3d512d02 100644 --- a/src/mono/mono/mini/mini-runtime.c +++ b/src/mono/mono/mini/mini-runtime.c @@ -4496,7 +4496,7 @@ init_class (MonoClass *klass) } #endif -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_S390X) if (!strcmp (m_class_get_name_space (klass), "System.Numerics")) { if (!strcmp (name, "Vector2") || !strcmp (name, "Vector3") ||!strcmp (name, "Vector4") || !strcmp (name, "Quaternion") || !strcmp (name, "Plane")) mono_class_set_is_simd_type (klass, TRUE); diff --git a/src/mono/mono/mini/mini-s390x.c b/src/mono/mono/mini/mini-s390x.c index 3419a29768c707..2f696d88221bee 100644 --- a/src/mono/mono/mini/mini-s390x.c +++ b/src/mono/mono/mini/mini-s390x.c @@ -19,6 +19,23 @@ #define MAX_ARCH_DELEGATE_PARAMS 10 +#define NEW_INS(cfg,ins,dest,op) do { \ + MONO_INST_NEW ((cfg), (dest), (op)); \ + (dest)->cil_code = (ins)->cil_code; \ + mono_bblock_insert_before_ins (bb, ins, (dest)); \ + } while (0) + +#define NEW_SIMD_INS(cfg,ins,dest,op,d,s1,s2) do { \ + MONO_INST_NEW ((cfg), (dest), (op)); \ + (dest)->cil_code = (ins)->cil_code; \ + (dest)->dreg = d; \ + (dest)->sreg1 = s1; \ + (dest)->sreg2 = s2; \ + (dest)->type = STACK_VTYPE; \ + (dest)->klass = ins->klass; \ + mono_bblock_insert_before_ins (bb, ins, (dest)); \ + } while (0) + #define EMIT_COND_BRANCH(ins,cond) \ { \ if (ins->inst_true_bb->native_offset) { \ @@ -2177,6 +2194,332 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb) /*========================= End of Function ========================*/ +static int +simd_type_to_sub_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_VSUBB; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_VSUBH; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_VSUBF; + case MONO_TYPE_I8: + case MONO_TYPE_U8: + case MONO_TYPE_I: + case MONO_TYPE_U: + return OP_VSUBG; + case MONO_TYPE_R4: + return OP_VFSUBS; + case MONO_TYPE_R8: + return OP_VFSUBD; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_add_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_VADDB; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_VADDH; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_VADDF; + case MONO_TYPE_I8: + case MONO_TYPE_U8: + case MONO_TYPE_I: + case MONO_TYPE_U: + return OP_VADDG; + case MONO_TYPE_R4: + return OP_VFADDS; + case MONO_TYPE_R8: + return OP_VFADDD; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_mul_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_VMULB; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_VMULHW; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_VMULF; + case MONO_TYPE_R4: + return OP_VFMULS; + case MONO_TYPE_R8: + return OP_VFMULD; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_max_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + return OP_VMXB; + case MONO_TYPE_U1: + return OP_VMXLB; + case MONO_TYPE_I2: + return OP_VMXH; + case MONO_TYPE_U2: + return OP_VMXLH; + case MONO_TYPE_I4: + return OP_VMXF; + case MONO_TYPE_U4: + return OP_VMXLF; + case MONO_TYPE_I8: + case MONO_TYPE_I: + return OP_VMXG; + case MONO_TYPE_U8: + case MONO_TYPE_U: + return OP_VMXLG; + case MONO_TYPE_R4: + return OP_VFMAXS; + case MONO_TYPE_R8: + return OP_VFMAXD; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_min_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + return OP_VMNB; + case MONO_TYPE_U1: + return OP_VMNLB; + case MONO_TYPE_I2: + return OP_VMNH; + case MONO_TYPE_U2: + return OP_VMNLH; + case MONO_TYPE_I4: + return OP_VMNF; + case MONO_TYPE_U4: + return OP_VMNLF; + case MONO_TYPE_I8: + case MONO_TYPE_I: + return OP_VMNG; + case MONO_TYPE_U8: + case MONO_TYPE_U: + return OP_VMNLG; + case MONO_TYPE_R4: + return OP_VFMINS; + case MONO_TYPE_R8: + return OP_VFMIND; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_const_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_VREPIB; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_VREPIH; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_VREPIF; + case MONO_TYPE_I: + case MONO_TYPE_U: + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return OP_VREPIG; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_comp_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_VCEQBS; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_VCEQHS; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_VCEQFS; + case MONO_TYPE_I: + case MONO_TYPE_U: + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return OP_VCEQGS; + case MONO_TYPE_R4: + return OP_VFCESBS; + case MONO_TYPE_R8: + return OP_VFCEDBS; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_gt_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + return OP_VCHBS; + case MONO_TYPE_U1: + return OP_VCHLBS; + case MONO_TYPE_I2: + return OP_VCHHS; + case MONO_TYPE_U2: + return OP_VCHLHS; + case MONO_TYPE_I4: + return OP_VCHFS; + case MONO_TYPE_U4: + return OP_VCHLFS; + case MONO_TYPE_I: + case MONO_TYPE_I8: + return OP_VCHGS; + case MONO_TYPE_U: + case MONO_TYPE_U8: + return OP_VCHLGS; + case MONO_TYPE_R4: + return OP_VFCHSBS; + case MONO_TYPE_R8: + return OP_VFCHDBS; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_extract_op (int t, int q) +{ + switch (t){ + case SIMD_EXTR_ARE_ALL_SET:{ + switch (q){ + case CMP_LT: + case CMP_GT: + case CMP_GT_UN: + case CMP_LT_UN: + case CMP_EQ: + return OP_CEQ; + case CMP_GE: + case CMP_LE: + case CMP_GE_UN: + case CMP_LE_UN: + return OP_ICNLE; //ICNGE + default: + g_assert_not_reached(); + return -1; + } + } + case SIMD_EXTR_IS_ANY_SET:{ + switch (q){ + case CMP_GT: + case CMP_LT: + case CMP_GT_UN: + case CMP_LT_UN: + case CMP_EQ: + return OP_ICLE; + case CMP_GE: + case CMP_LE: + case CMP_GE_UN: + case CMP_LE_UN: + return OP_ICNEQ; + default: + g_assert_not_reached(); + return -1; + } + } + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_abs_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_VLPB; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_VLPH; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_VLPF; + case MONO_TYPE_R4: + return OP_VFLPSB; + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return OP_VLPG; + case MONO_TYPE_R8: + return OP_VFLPDB; + + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_negate_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_VLCB; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_VLCH; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_VLCF; + case MONO_TYPE_R4: + return OP_VFLCSB; + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return OP_VLCG; + case MONO_TYPE_R8: + return OP_VFLCDB; + default: + g_assert_not_reached (); + return -1; + } +} + /** * * @brief Architecture-specific lowering pass processing @@ -2190,7 +2533,8 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb) void mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb) { - MonoInst *ins, *next; + MonoInst *ins, *next, *temp_ins; + int temp; MONO_BB_FOR_EACH_INS_SAFE (bb, next, ins) { switch (ins->opcode) { @@ -2212,6 +2556,136 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb) /* This is created by the memcpy code which ignores is_inst_imm */ mono_decompose_op_imm (cfg, bb, ins); break; + case OP_XBINOP:{ + switch(ins->inst_c0){ + case OP_IADD: + ins->opcode = GINT_TO_OPCODE (simd_type_to_add_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_ISUB: + ins->opcode = GINT_TO_OPCODE (simd_type_to_sub_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_IMUL: + ins->opcode = GINT_TO_OPCODE (simd_type_to_mul_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_IMAX_UN: + case OP_IMAX: + ins->opcode = GINT_TO_OPCODE (simd_type_to_max_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_IMIN_UN: + case OP_IMIN: + ins->opcode = GINT_TO_OPCODE (simd_type_to_min_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_FADD: + ins->opcode = GINT_TO_OPCODE (simd_type_to_add_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_FSUB: + ins->opcode = GINT_TO_OPCODE (simd_type_to_sub_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_FMUL: + ins->opcode = GINT_TO_OPCODE (simd_type_to_mul_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_FDIV: + ins->opcode = ins->inst_c1 == MONO_TYPE_R4 ? OP_VFDIVS : OP_VFDIVD; + break; + case OP_FMIN: + ins->opcode = GINT_TO_OPCODE (simd_type_to_min_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_FMAX: + ins->opcode = GINT_TO_OPCODE (simd_type_to_max_op (GTMREG_TO_INT (ins->inst_c1))); + break; + default: + g_assert_not_reached (); + break; + } + break; + } + case OP_XBINOP_FORCEINT:{ + switch (ins->inst_c0) { + case XBINOP_FORCEINT_AND: + ins->opcode = OP_VAND; + break; + case XBINOP_FORCEINT_OR: + ins->opcode = OP_VOR; + break; + case XBINOP_FORCEINT_XOR: + ins->opcode = OP_VXOR; + break; + default: + g_assert_not_reached (); + break; + } + break; + } + case OP_XCAST:{ + ins->opcode = OP_XMOVE; + break; + } + case OP_XCOMPARE_FP: + case OP_XCOMPARE:{ + switch (ins->inst_c0){ + case CMP_EQ: + ins->opcode = GINT_TO_OPCODE (simd_type_to_comp_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case CMP_LT: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_GT: + ins->opcode = GINT_TO_OPCODE (simd_type_to_gt_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case CMP_LT_UN: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_GT_UN: + ins->opcode = GINT_TO_OPCODE (simd_type_to_gt_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case CMP_GE: + case CMP_GE_UN: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_LE: + case CMP_LE_UN:{ + /* FIX ME : setting condition code for cases other than ANY and ALL may degrade the performance*/ + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_gt_op (GTMREG_TO_INT (ins->inst_c1))), ins->dreg, ins->sreg1, ins->sreg2); + NEW_SIMD_INS (cfg, ins, temp_ins, OP_VNOR, ins->dreg, ins->dreg, ins->dreg); + NULLIFY_INS (ins); + break; + } + default: + g_assert_not_reached (); + break; + } + } + break; + case OP_XEXTRACT: + ins->opcode = GINT_TO_OPCODE (simd_type_to_extract_op (GTMREG_TO_INT (ins->inst_c0), GTMREG_TO_INT (ins->inst_c1))); + ins->sreg1 = -1; //we assign this -1 or else this messes up the code cache + break; + case OP_VEC_ABS: + ins->opcode = GINT_TO_OPCODE (simd_type_to_abs_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_NEGATION: + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_negate_op (GTMREG_TO_INT (ins->inst_c1))), ins->dreg, ins->sreg1, -1); + NULLIFY_INS (ins); + break; + case OP_ONES_COMPLEMENT: + ins->opcode = OP_VNOR; + ins->sreg2 = ins->sreg1; + break; + case OP_CEIL_FLOOR:{ + if (ins->inst_c0 == MONO_TYPE_R8) + NEW_SIMD_INS (cfg, ins, temp_ins, OP_VFIDB, ins->dreg, ins->sreg1, -1); + else + NEW_SIMD_INS (cfg, ins, temp_ins, OP_VFISB, ins->dreg, ins->sreg1, -1); + temp_ins->inst_c0 = ins->inst_c0; + NULLIFY_INS(ins); + } + break; + case OP_VEC_ONE: + ins->opcode = GINT_TO_OPCODE (simd_type_to_const_op (GTMREG_TO_INT (ins->inst_c0))); + break; default: break; } @@ -2411,7 +2885,6 @@ is_unsigned (MonoInst *next) * Process instructions within basic block emitting s390x instructions * based on the VM operation codes */ - void mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) { @@ -4048,6 +4521,21 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } } break; + case OP_ICNLE: { + if (mono_hwcap_s390x_has_lsoc2) { + s390_lghi (code, ins->dreg, 0); + s390_locghinle(code, ins->dreg, 1); + } else if (mono_hwcap_s390x_has_mlt) { + s390_lghi (code, ins->dreg, 0); + s390_lghi (code, s390_r13, 1); + s390_locgrnle(code, ins->dreg, s390_r13); + } else { + s390_lghi(code, ins->dreg, 1); + s390_jle (code, 4); + s390_lghi(code, ins->dreg, 0); + } + } + break; case OP_ICGE: case OP_ICGE_UN: { if (mono_hwcap_s390x_has_lsoc2) { @@ -4834,606 +5322,461 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) bb->spill_slot_defs = g_slist_prepend_mempool (cfg->mempool, bb->spill_slot_defs, ins); break; #ifdef MONO_ARCH_SIMD_INTRINSICS - case OP_ADDPS: - s390x_addps (code, ins->sreg1, ins->sreg2); - break; - case OP_DIVPS: - s390x_divps (code, ins->sreg1, ins->sreg2); - break; - case OP_MULPS: - s390x_mulps (code, ins->sreg1, ins->sreg2); - break; - case OP_SUBPS: - s390x_subps (code, ins->sreg1, ins->sreg2); - break; - case OP_MAXPS: - s390x_maxps (code, ins->sreg1, ins->sreg2); - break; - case OP_MINPS: - s390x_minps (code, ins->sreg1, ins->sreg2); - break; - case OP_COMPPS: - g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7); - s390x_cmpps_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0); - break; - case OP_ANDPS: - s390x_andps (code, ins->sreg1, ins->sreg2); - break; - case OP_ANDNPS: - s390x_andnps (code, ins->sreg1, ins->sreg2); - break; - case OP_ORPS: - s390x_orps (code, ins->sreg1, ins->sreg2); - break; - case OP_XORPS: - s390x_xorps (code, ins->sreg1, ins->sreg2); - break; - case OP_SQRTPS: - s390x_sqrtps (code, ins->dreg, ins->sreg1); - break; - case OP_RSQRTPS: - s390x_rsqrtps (code, ins->dreg, ins->sreg1); - break; - case OP_RCPPS: - s390x_rcpps (code, ins->dreg, ins->sreg1); - break; - case OP_ADDSUBPS: - s390x_addsubps (code, ins->sreg1, ins->sreg2); - break; - case OP_HADDPS: - s390x_haddps (code, ins->sreg1, ins->sreg2); - break; - case OP_HSUBPS: - s390x_hsubps (code, ins->sreg1, ins->sreg2); - break; - case OP_DUPPS_HIGH: - s390x_movshdup (code, ins->dreg, ins->sreg1); - break; - case OP_DUPPS_LOW: - s390x_movsldup (code, ins->dreg, ins->sreg1); - break; - - case OP_PSHUFLEW_HIGH: - g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF); - s390x_pshufhw_imm (code, ins->dreg, ins->sreg1, ins->inst_c0); - break; - case OP_PSHUFLEW_LOW: - g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF); - s390x_pshuflw_imm (code, ins->dreg, ins->sreg1, ins->inst_c0); - break; - case OP_PSHUFLED: - g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF); - s390x_pshufd_imm (code, ins->dreg, ins->sreg1, ins->inst_c0); - break; - case OP_SHUFPS: - g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF); - s390x_shufps_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0); - break; - case OP_SHUFPD: - g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0x3); - s390x_shufpd_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0); - break; - - case OP_ADDPD: - s390x_addpd (code, ins->sreg1, ins->sreg2); - break; - case OP_DIVPD: - s390x_divpd (code, ins->sreg1, ins->sreg2); - break; - case OP_MULPD: - s390x_mulpd (code, ins->sreg1, ins->sreg2); - break; - case OP_SUBPD: - s390x_subpd (code, ins->sreg1, ins->sreg2); - break; - case OP_MAXPD: - s390x_maxpd (code, ins->sreg1, ins->sreg2); - break; - case OP_MINPD: - s390x_minpd (code, ins->sreg1, ins->sreg2); - break; - case OP_COMPPD: - g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7); - s390x_cmppd_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0); - break; - case OP_ANDPD: - s390x_andpd (code, ins->sreg1, ins->sreg2); - break; - case OP_ANDNPD: - s390x_andnpd (code, ins->sreg1, ins->sreg2); - break; - case OP_ORPD: - s390x_orpd (code, ins->sreg1, ins->sreg2); - break; - case OP_XORPD: - s390x_xorpd (code, ins->sreg1, ins->sreg2); - break; - case OP_SQRTPD: - s390x_sqrtpd (code, ins->dreg, ins->sreg1); - break; - case OP_ADDSUBPD: - s390x_addsubpd (code, ins->sreg1, ins->sreg2); - break; - case OP_HADDPD: - s390x_haddpd (code, ins->sreg1, ins->sreg2); - break; - case OP_HSUBPD: - s390x_hsubpd (code, ins->sreg1, ins->sreg2); - break; - case OP_DUPPD: - s390x_movddup (code, ins->dreg, ins->sreg1); - break; - - case OP_EXTRACT_MASK: - s390x_pmovmskb (code, ins->dreg, ins->sreg1); - break; - - case OP_PAND: - s390x_pand (code, ins->sreg1, ins->sreg2); - break; - case OP_POR: - s390x_por (code, ins->sreg1, ins->sreg2); - break; - case OP_PXOR: - s390x_pxor (code, ins->sreg1, ins->sreg2); - break; - - case OP_PADDB: - s390x_paddb (code, ins->sreg1, ins->sreg2); - break; - case OP_PADDW: - s390x_paddw (code, ins->sreg1, ins->sreg2); - break; - case OP_PADDD: - s390x_paddd (code, ins->sreg1, ins->sreg2); - break; - case OP_PADDQ: - s390x_paddq (code, ins->sreg1, ins->sreg2); - break; - - case OP_PSUBB: - s390x_psubb (code, ins->sreg1, ins->sreg2); - break; - case OP_PSUBW: - s390x_psubw (code, ins->sreg1, ins->sreg2); - break; - case OP_PSUBD: - s390x_psubd (code, ins->sreg1, ins->sreg2); - break; - case OP_PSUBQ: - s390x_psubq (code, ins->sreg1, ins->sreg2); - break; - - case OP_PMAXB_UN: - s390x_pmaxub (code, ins->sreg1, ins->sreg2); - break; - case OP_PMAXW_UN: - s390x_pmaxuw (code, ins->sreg1, ins->sreg2); - break; - case OP_PMAXD_UN: - s390x_pmaxud (code, ins->sreg1, ins->sreg2); - break; - - case OP_PMAXB: - s390x_pmaxsb (code, ins->sreg1, ins->sreg2); - break; - case OP_PMAXW: - s390x_pmaxsw (code, ins->sreg1, ins->sreg2); - break; - case OP_PMAXD: - s390x_pmaxsd (code, ins->sreg1, ins->sreg2); - break; - - case OP_PAVGB_UN: - s390x_pavgb (code, ins->sreg1, ins->sreg2); - break; - case OP_PAVGW_UN: - s390x_pavgw (code, ins->sreg1, ins->sreg2); - break; - - case OP_PMINB_UN: - s390x_pminub (code, ins->sreg1, ins->sreg2); - break; - case OP_PMINW_UN: - s390x_pminuw (code, ins->sreg1, ins->sreg2); - break; - case OP_PMIND_UN: - s390x_pminud (code, ins->sreg1, ins->sreg2); - break; - - case OP_PMINB: - s390x_pminsb (code, ins->sreg1, ins->sreg2); - break; - case OP_PMINW: - s390x_pminsw (code, ins->sreg1, ins->sreg2); - break; - case OP_PMIND: - s390x_pminsd (code, ins->sreg1, ins->sreg2); - break; - - case OP_PCMPEQB: - s390x_pcmpeqb (code, ins->sreg1, ins->sreg2); - break; - case OP_PCMPEQW: - s390x_pcmpeqw (code, ins->sreg1, ins->sreg2); - break; - case OP_PCMPEQD: - s390x_pcmpeqd (code, ins->sreg1, ins->sreg2); - break; - case OP_PCMPEQQ: - s390x_pcmpeqq (code, ins->sreg1, ins->sreg2); - break; - - case OP_PCMPGTB: - s390x_pcmpgtb (code, ins->sreg1, ins->sreg2); - break; - case OP_PCMPGTW: - s390x_pcmpgtw (code, ins->sreg1, ins->sreg2); - break; - case OP_PCMPGTD: - s390x_pcmpgtd (code, ins->sreg1, ins->sreg2); - break; - case OP_PCMPGTQ: - s390x_pcmpgtq (code, ins->sreg1, ins->sreg2); - break; - - case OP_PSUM_ABS_DIFF: - s390x_psadbw (code, ins->sreg1, ins->sreg2); - break; - - case OP_UNPACK_LOWB: - s390x_punpcklbw (code, ins->sreg1, ins->sreg2); - break; - case OP_UNPACK_LOWW: - s390x_punpcklwd (code, ins->sreg1, ins->sreg2); - break; - case OP_UNPACK_LOWD: - s390x_punpckldq (code, ins->sreg1, ins->sreg2); - break; - case OP_UNPACK_LOWQ: - s390x_punpcklqdq (code, ins->sreg1, ins->sreg2); - break; - case OP_UNPACK_LOWPS: - s390x_unpcklps (code, ins->sreg1, ins->sreg2); - break; - case OP_UNPACK_LOWPD: - s390x_unpcklpd (code, ins->sreg1, ins->sreg2); - break; - - case OP_UNPACK_HIGHB: - s390x_punpckhbw (code, ins->sreg1, ins->sreg2); - break; - case OP_UNPACK_HIGHW: - s390x_punpckhwd (code, ins->sreg1, ins->sreg2); - break; - case OP_UNPACK_HIGHD: - s390x_punpckhdq (code, ins->sreg1, ins->sreg2); - break; - case OP_UNPACK_HIGHQ: - s390x_punpckhqdq (code, ins->sreg1, ins->sreg2); - break; - case OP_UNPACK_HIGHPS: - s390x_unpckhps (code, ins->sreg1, ins->sreg2); - break; - case OP_UNPACK_HIGHPD: - s390x_unpckhpd (code, ins->sreg1, ins->sreg2); - break; - - case OP_PACKW: - s390x_packsswb (code, ins->sreg1, ins->sreg2); - break; - case OP_PACKD: - s390x_packssdw (code, ins->sreg1, ins->sreg2); - break; - case OP_PACKW_UN: - s390x_packuswb (code, ins->sreg1, ins->sreg2); - break; - case OP_PACKD_UN: - s390x_packusdw (code, ins->sreg1, ins->sreg2); - break; - - case OP_PADDB_SAT_UN: - s390x_paddusb (code, ins->sreg1, ins->sreg2); - break; - case OP_PSUBB_SAT_UN: - s390x_psubusb (code, ins->sreg1, ins->sreg2); - break; - case OP_PADDW_SAT_UN: - s390x_paddusw (code, ins->sreg1, ins->sreg2); - break; - case OP_PSUBW_SAT_UN: - s390x_psubusw (code, ins->sreg1, ins->sreg2); - break; - - case OP_PADDB_SAT: - s390x_paddsb (code, ins->sreg1, ins->sreg2); - break; - case OP_PSUBB_SAT: - s390x_psubsb (code, ins->sreg1, ins->sreg2); - break; - case OP_PADDW_SAT: - s390x_paddsw (code, ins->sreg1, ins->sreg2); - break; - case OP_PSUBW_SAT: - s390x_psubsw (code, ins->sreg1, ins->sreg2); - break; - - case OP_PMULW: - s390x_pmullw (code, ins->sreg1, ins->sreg2); - break; - case OP_PMULD: - s390x_pmulld (code, ins->sreg1, ins->sreg2); - break; - case OP_PMULQ: - s390x_pmuludq (code, ins->sreg1, ins->sreg2); - break; - case OP_PMULW_HIGH_UN: - s390x_pmulhuw (code, ins->sreg1, ins->sreg2); - break; - case OP_PMULW_HIGH: - s390x_pmulhw (code, ins->sreg1, ins->sreg2); - break; - - case OP_PSHRW: - s390x_psrlw_reg_imm (code, ins->dreg, ins->inst_imm); - break; - case OP_PSHRW_REG: - s390x_psrlw (code, ins->dreg, ins->sreg2); - break; - - case OP_PSARW: - s390x_psraw_reg_imm (code, ins->dreg, ins->inst_imm); - break; - case OP_PSARW_REG: - s390x_psraw (code, ins->dreg, ins->sreg2); - break; - - case OP_PSHLW: - s390x_psllw_reg_imm (code, ins->dreg, ins->inst_imm); - break; - case OP_PSHLW_REG: - s390x_psllw (code, ins->dreg, ins->sreg2); - break; - - case OP_PSHRD: - s390x_psrld_reg_imm (code, ins->dreg, ins->inst_imm); - break; - case OP_PSHRD_REG: - s390x_psrld (code, ins->dreg, ins->sreg2); - break; - - case OP_PSARD: - s390x_psrad_reg_imm (code, ins->dreg, ins->inst_imm); - break; - case OP_PSARD_REG: - s390x_psrad (code, ins->dreg, ins->sreg2); - break; - - case OP_PSHLD: - s390x_pslld_reg_imm (code, ins->dreg, ins->inst_imm); - break; - case OP_PSHLD_REG: - s390x_pslld (code, ins->dreg, ins->sreg2); - break; - - case OP_PSHRQ: - s390x_psrlq_reg_imm (code, ins->dreg, ins->inst_imm); - break; - case OP_PSHRQ_REG: - s390x_psrlq (code, ins->dreg, ins->sreg2); - break; - - /*TODO: This is appart of the sse spec but not added - case OP_PSARQ: - s390x_psraq_reg_imm (code, ins->dreg, ins->inst_imm); - break; - case OP_PSARQ_REG: - s390x_psraq (code, ins->dreg, ins->sreg2); - break; - */ - - case OP_PSHLQ: - s390x_psllq_reg_imm (code, ins->dreg, ins->inst_imm); - break; - case OP_PSHLQ_REG: - s390x_psllq (code, ins->dreg, ins->sreg2); - break; - case OP_CVTDQ2PD: - s390x_cvtdq2pd (code, ins->dreg, ins->sreg1); - break; - case OP_CVTDQ2PS: - s390x_cvtdq2ps (code, ins->dreg, ins->sreg1); - break; - case OP_CVTPD2DQ: - s390x_cvtpd2dq (code, ins->dreg, ins->sreg1); - break; - case OP_CVTPD2PS: - s390x_cvtpd2ps (code, ins->dreg, ins->sreg1); - break; - case OP_CVTPS2DQ: - s390x_cvtps2dq (code, ins->dreg, ins->sreg1); - break; - case OP_CVTPS2PD: - s390x_cvtps2pd (code, ins->dreg, ins->sreg1); - break; - case OP_CVTTPD2DQ: - s390x_cvttpd2dq (code, ins->dreg, ins->sreg1); - break; - case OP_CVTTPS2DQ: - s390x_cvttps2dq (code, ins->dreg, ins->sreg1); - break; - - case OP_ICONV_TO_X: - amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4); - break; - case OP_EXTRACT_I4: - amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4); - break; - case OP_EXTRACT_I8: - if (ins->inst_c0) { - amd64_movhlps (code, MONO_ARCH_FP_SCRATCH_REG, ins->sreg1); - amd64_movd_reg_xreg_size (code, ins->dreg, MONO_ARCH_FP_SCRATCH_REG, 8); - } else { - amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 8); - } - break; - case OP_EXTRACT_I1: - case OP_EXTRACT_U1: - amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4); - if (ins->inst_c0) - amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_c0 * 8); - amd64_widen_reg (code, ins->dreg, ins->dreg, ins->inst_c1 == OP_EXTRACT_I1, FALSE); - break; - case OP_EXTRACT_I2: - case OP_EXTRACT_U2: - /*amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4); - if (ins->inst_c0) - amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, 16, 4);*/ - s390x_pextrw_imm (code, ins->dreg, ins->sreg1, ins->inst_c0); - amd64_widen_reg_size (code, ins->dreg, ins->dreg, ins->inst_c1 == OP_EXTRACT_I2, TRUE, 4); - break; - case OP_EXTRACT_R8: - if (ins->inst_c0) - amd64_movhlps (code, ins->dreg, ins->sreg1); - else - s390x_movsd (code, ins->dreg, ins->sreg1); - break; - case OP_INSERT_I2: - s390x_pinsrw_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0); - break; - case OP_EXTRACTX_U2: - s390x_pextrw_imm (code, ins->dreg, ins->sreg1, ins->inst_c0); - break; - case OP_INSERTX_U1_SLOW: - /*sreg1 is the extracted ireg (scratch) - /sreg2 is the to be inserted ireg (scratch) - /dreg is the xreg to receive the value*/ - - /*clear the bits from the extracted word*/ - amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_c0 & 1 ? 0x00FF : 0xFF00); - /*shift the value to insert if needed*/ - if (ins->inst_c0 & 1) - amd64_shift_reg_imm_size (code, X86_SHL, ins->sreg2, 8, 4); - /*join them together*/ - amd64_alu (code, X86_OR, ins->sreg1, ins->sreg2); - s390x_pinsrw_imm (code, ins->dreg, ins->sreg1, ins->inst_c0 / 2); - break; - case OP_INSERTX_I4_SLOW: - s390x_pinsrw_imm (code, ins->dreg, ins->sreg2, ins->inst_c0 * 2); - amd64_shift_reg_imm (code, X86_SHR, ins->sreg2, 16); - s390x_pinsrw_imm (code, ins->dreg, ins->sreg2, ins->inst_c0 * 2 + 1); - break; - case OP_INSERTX_I8_SLOW: - amd64_movd_xreg_reg_size(code, MONO_ARCH_FP_SCRATCH_REG, ins->sreg2, 8); - if (ins->inst_c0) - amd64_movlhps (code, ins->dreg, MONO_ARCH_FP_SCRATCH_REG); - else - s390x_movsd (code, ins->dreg, MONO_ARCH_FP_SCRATCH_REG); - break; - - case OP_INSERTX_R4_SLOW: - switch (ins->inst_c0) { - case 0: - s390x_movss (code, ins->dreg, ins->sreg2); - break; - case 1: - s390x_pshufd_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(1, 0, 2, 3)); - s390x_movss (code, ins->dreg, ins->sreg2); - s390x_pshufd_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(1, 0, 2, 3)); - break; - case 2: - s390x_pshufd_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(2, 1, 0, 3)); - s390x_movss (code, ins->dreg, ins->sreg2); - s390x_pshufd_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(2, 1, 0, 3)); - break; - case 3: - s390x_pshufd_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(3, 1, 2, 0)); - s390x_movss (code, ins->dreg, ins->sreg2); - s390x_pshufd_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(3, 1, 2, 0)); - break; - } - break; - case OP_INSERTX_R8_SLOW: - if (ins->inst_c0) - amd64_movlhps (code, ins->dreg, ins->sreg2); - else - s390x_movsd (code, ins->dreg, ins->sreg2); - break; - case OP_STOREX_MEMBASE_REG: - case OP_STOREX_MEMBASE: - s390x_movups_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1); - break; - case OP_LOADX_MEMBASE: - s390x_movups_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset); - break; - case OP_LOADX_ALIGNED_MEMBASE: - s390x_movaps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset); - break; - case OP_STOREX_ALIGNED_MEMBASE_REG: - s390x_movaps_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1); - break; - case OP_STOREX_NTA_MEMBASE_REG: - s390x_movntps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset); - break; - case OP_PREFETCH_MEMBASE: - s390x_prefetch_reg_membase (code, ins->backend.arg_info, ins->sreg1, ins->inst_offset); - break; - - case OP_XMOVE: - /*FIXME the peephole pass should have killed this*/ - if (ins->dreg != ins->sreg1) - s390x_movaps (code, ins->dreg, ins->sreg1); - break; - case OP_XZERO: - s390x_pxor (code, ins->dreg, ins->dreg); - break; - case OP_ICONV_TO_R4_RAW: - amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4); - break; - - case OP_FCONV_TO_R8_X: - s390x_movsd (code, ins->dreg, ins->sreg1); - break; - - case OP_XCONV_R8_TO_I4: - s390x_cvttsd2si_reg_xreg_size (code, ins->dreg, ins->sreg1, 4); - switch (ins->backend.source_opcode) { - case OP_FCONV_TO_I1: - amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE); - break; - case OP_FCONV_TO_U1: - amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE); - break; - case OP_FCONV_TO_I2: - amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE); - break; - case OP_FCONV_TO_U2: - amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE); - break; - } - break; - - case OP_EXPAND_I2: - s390x_pinsrw_imm (code, ins->dreg, ins->sreg1, 0); - s390x_pinsrw_imm (code, ins->dreg, ins->sreg1, 1); - s390x_pshufd_imm (code, ins->dreg, ins->dreg, 0); - break; - case OP_EXPAND_I4: - amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4); - s390x_pshufd_imm (code, ins->dreg, ins->dreg, 0); - break; - case OP_EXPAND_I8: - amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 8); - s390x_pshufd_imm (code, ins->dreg, ins->dreg, 0x44); - break; - case OP_EXPAND_R4: - s390x_movsd (code, ins->dreg, ins->sreg1); - s390x_pshufd_imm (code, ins->dreg, ins->dreg, 0); - break; - case OP_EXPAND_R8: - s390x_movsd (code, ins->dreg, ins->sreg1); - s390x_pshufd_imm (code, ins->dreg, ins->dreg, 0x44); - break; + case OP_XCONST: + S390_SET (code, s390_r13, ins->inst_p0); + s390_vl(code, ins->dreg, 0, 0, s390_r13); + break; + case OP_LOADX_MEMBASE: + S390_LONG_VEC(code, vl, vl, ins->dreg, ins->inst_offset, 0, ins->inst_basereg); + break; + case OP_STOREX_MEMBASE: + S390_LONG_VEC(code, vst, vst, ins->sreg1, ins->inst_offset,0, ins->inst_destbasereg); + break; + case OP_VAND: + s390_vn (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VNAND: + s390_vnn (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VECTOR_ANDN: + s390_vno (code, s390_vr16, ins->sreg1, ins->sreg1); + s390_vn (code, ins->dreg, s390_vr16, ins->sreg2); + break; + case OP_VOR: + s390_vo (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VNOR:{ + s390_vno (code, ins->dreg, ins->sreg1, ins->sreg2); + } + break; + case OP_VXOR: + s390_vx (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VADDB: + s390_vab (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VADDH: + s390_vah (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VADDF: + s390_vaf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VADDG: + s390_vag (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VADDQ: + s390_vaq (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFADDS: + s390_vfasb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFADDD: + s390_vfadb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VSUBB: + s390_vsb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VSUBH: + s390_vsh (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VSUBF: + s390_vsf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VSUBG: + s390_vsg (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VSUBQ: + s390_vsq (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFSUBS: + s390_vfssb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFSUBD: + s390_vfsdb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMULB: + s390_vmlb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMULHW: + s390_vmlhw (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMULF: + s390_vmlf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFMULS: + s390_vfmsb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFMULD: + s390_vfmdb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFDIVS: + s390_vfdsb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFDIVD: + s390_vfddb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VSUMB: + s390_vsumb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VSUMH: + s390_vsumh (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VSUMQF: + s390_vsumqf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VSUMQG: + s390_vsumqg (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMXB: + s390_vmxb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMXH: + s390_vmxh (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMXF: + s390_vmxf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMXG: + s390_vmxg (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFMAXS: + s390_vfmaxsb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); + break; + case OP_VFMAXD: + s390_vfmaxdb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); + break; + case OP_VMXLB: + s390_vmxlb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMXLH: + s390_vmxlh (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMXLF: + s390_vmxlf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMXLG: + s390_vmxlg (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMNB: + s390_vmnb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMNH: + s390_vmnh (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMNF: + s390_vmnf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMNG: + s390_vmng (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFMINS: + s390_vfminsb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); + break; + case OP_VFMIND: + s390_vfmindb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); + break; + case OP_VMNLB: + s390_vmnlb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMNLH: + s390_vmnlh (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMNLF: + s390_vmnlf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMNLG: + s390_vmnlg (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VREPIB: + s390_vrepib (code, ins->dreg, ins->inst_c1); + break; + case OP_VREPIH: + s390_vrepih (code, ins->dreg, ins->inst_c1); + break; + case OP_VREPIF: + s390_vrepif (code, ins->dreg, ins->inst_c1); + break; + case OP_VREPIG: + s390_vrepig (code, ins->dreg, ins->inst_c1); + break; + case OP_VCEQBS: + s390_vceqbs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCEQHS: + s390_vceqhs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCEQFS: + s390_vceqfs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCEQGS: + s390_vceqgs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFCESBS: + s390_vfcesbs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFCEDBS: + s390_vfcedbs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFCHSBS: + s390_vfchsbs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFCHDBS: + s390_vfchdbs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VGMB: + s390_vgmb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VGMH: + s390_vgmh (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VGMF: + s390_vgmf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VGMG: + s390_vgmg (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VECB: + s390_vecb (code, ins->dreg, ins->sreg1); + break; + case OP_VECH: + s390_vech (code, ins->dreg, ins->sreg1); + break; + case OP_VECF: + s390_vecf (code, ins->dreg, ins->sreg1); + break; + case OP_VECG: + s390_vecg (code, ins->dreg, ins->sreg1); + break; + case OP_VECLB: + s390_veclb (code, ins->dreg, ins->sreg1); + break; + case OP_VECLH: + s390_veclh (code, ins->dreg, ins->sreg1); + break; + case OP_VECLF: + s390_veclf (code, ins->dreg, ins->sreg1); + break; + case OP_VECLG: + s390_veclg (code, ins->dreg, ins->sreg1); + break; + case OP_VCHBS: + s390_vchbs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCHHS: + s390_vchhs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCHFS: + s390_vchfs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCHGS: + s390_vchgs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCHLBS: + s390_vchlbs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCHLHS: + s390_vchlhs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCHLFS: + s390_vchlfs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCHLGS: + s390_vchlgs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VLPB: + s390_vlpb (code, ins->dreg, ins->sreg1); + break; + case OP_VLPH: + s390_vlph (code, ins->dreg, ins->sreg1); + break; + case OP_VLPF: + s390_vlpf (code, ins->dreg, ins->sreg1); + break; + case OP_VLPG: + s390_vlpg (code, ins->dreg, ins->sreg1); + break; + case OP_VFLPDB: + s390_vfpsodb (code, ins->dreg, ins->sreg1, 2); + break; + case OP_VFLPSB: + s390_vfpsosb (code, ins->dreg, ins->sreg1, 2); + break; + case OP_VFLCDB: + s390_vflcdb (code, ins->dreg, ins->sreg1); + break; + case OP_VFLCSB: + s390_vflcsb (code, ins->dreg, ins->sreg1); + break; + case OP_INSERT_I1: + s390_vlr (code, ins->dreg, ins->sreg1); + s390_vlvgb (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_INSERT_I2: + s390_vlr (code, ins->dreg, ins->sreg1); + s390_vlvgh (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_INSERT_I4: + s390_vlr (code, ins->dreg, ins->sreg1); + s390_vlvgf (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_INSERT_I8: + s390_vlr (code, ins->dreg, ins->sreg1); + s390_vlvgg (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_INSERT_R4: + s390_vlgvf (code, s390_r13, ins->sreg2, 0, 0); + s390_vlvgf (code, ins->dreg, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_INSERT_R8: + s390_vlgvg (code, s390_r13, ins->sreg2, 0, 0); + s390_vlvgg (code, ins->dreg, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_EXTRACT_I1: + s390_vlgvb (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_EXTRACT_I2: + s390_vlgvh (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_EXTRACT_I4: + s390_vlgvf (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_EXTRACT_I8: + s390_vlgvg (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_EXTRACT_R4: + s390_vlgvf (code, s390_r13, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vlvgf (code, ins->dreg, s390_r13, 0, 0); + break; + case OP_EXTRACT_R8: + s390_vlgvg (code, s390_r13, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vlvgg (code, ins->dreg, s390_r13, 0, 0); + break; + case OP_XEXTRACT_I1: + s390_vlgvb (code, ins->dreg, ins->sreg1, ins->sreg2, 0); + break; + case OP_XEXTRACT_I2: + s390_vlgvh (code, ins->dreg, ins->sreg1, ins->sreg2, 0); + break; + case OP_XEXTRACT_I4: + s390_vlgvf (code, ins->dreg, ins->sreg1, ins->sreg2, 0); + break; + case OP_XEXTRACT_I8: + s390_vlgvg (code, ins->dreg, ins->sreg1, ins->sreg2, 0); + break; + case OP_XEXTRACT_R4: + s390_vlgvf (code, s390_r13, ins->sreg1, ins->sreg2, 0); + s390_ldgr (code, ins->dreg, s390_r13); + break; + case OP_XEXTRACT_R8: + s390_vlgvg (code, s390_r13, ins->sreg1, ins->sreg2, 0); + s390_ldgr (code, ins->dreg, s390_r13); + break; + case OP_EXPAND_I1: + s390_vlvgb (code, s390_vr16, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepb (code, ins->dreg, s390_vr16, 0); + break; + case OP_EXPAND_I2: + s390_vlvgh (code, s390_vr16, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vreph (code, ins->dreg, s390_vr16, 0); + break; + case OP_EXPAND_I4: + s390_vlvgf (code, s390_vr16, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepf (code, ins->dreg, s390_vr16, 0); + break; + case OP_EXPAND_I8: + s390_vlvgg (code, s390_vr16, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepg (code, ins->dreg, s390_vr16, 0); + break; + case OP_EXPAND_R4: + s390_vlgvf (code, s390_r13, ins->sreg1, 0, 0); + s390_vlvgf (code, s390_vr16, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepf (code, ins->dreg, s390_vr16, 0); + break; + case OP_EXPAND_R8: + s390_lgdr (code, s390_r13, ins->sreg1); + s390_vlvgg (code, s390_vr16, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepg (code, ins->dreg, s390_vr16, 0); + break; + case OP_VPKH: + s390_vpkh ( code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VPKF: + s390_vpkf ( code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VPKG: + s390_vpkg ( code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VLCB: + s390_vlcb (code, ins->dreg, ins->sreg1); + break; + case OP_VLCH: + s390_vlch (code, ins->dreg, ins->sreg1); + break; + case OP_VLCF: + s390_vlcf (code, ins->dreg, ins->sreg1); + break; + case OP_VLCG: + s390_vlcg (code, ins->dreg, ins->sreg1); + break; + case OP_VUPLB: + s390_vuplb (code, ins->dreg, ins->sreg1); + break; + case OP_VUPLH: + s390_vuplh (code, ins->dreg, ins->sreg1); + break; + case OP_VUPLF: + s390_vuplf (code, ins->dreg, ins->sreg1); + break; + case OP_VUPLLB: + s390_vupllb (code, ins->dreg, ins->sreg1); + break; + case OP_VUPLLH: + s390_vupllh (code, ins->dreg, ins->sreg1); + break; + case OP_VUPLLF: + s390_vupllf (code, ins->dreg, ins->sreg1); + break; + case OP_VUPHB: + s390_vuphb (code, ins->dreg, ins->sreg1); + break; + case OP_VUPHH: + s390_vuphh (code, ins->dreg, ins->sreg1); + break; + case OP_VUPHF: + s390_vuphf (code, ins->dreg, ins->sreg1); + break; + case OP_VUPLHB: + s390_vuplhb (code, ins->dreg, ins->sreg1); + break; + case OP_VUPLHH: + s390_vuplhh (code, ins->dreg, ins->sreg1); + break; + case OP_VUPLHF: + s390_vuplhf (code, ins->dreg, ins->sreg1); + break; + case OP_VFISB: + s390_vfisb (code, ins->dreg, ins->sreg1, 0, ins->inst_c0); + break; + case OP_VFIDB: + s390_vfidb (code, ins->dreg, ins->sreg1, 0, ins->inst_c0); + break; + case OP_VFSQSB: + s390_vfsqsb (code, ins->dreg, ins->sreg1); + break; + case OP_VFSQDB: + s390_vfsqdb (code, ins->dreg, ins->sreg1); + break; + case OP_XONES: + s390_vgbm (code, ins->dreg, 0xffff); + break; + case OP_XMOVE:{ + if (ins->dreg != ins->sreg1) + s390_vlr(code, ins->dreg, ins->sreg1); + break; + } + case OP_XZERO: + s390_vgbm (code, ins->dreg, 0); + break; #endif default: g_warning ("unknown opcode " M_PRI_INST " in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__); diff --git a/src/mono/mono/mini/mini-s390x.h b/src/mono/mono/mini/mini-s390x.h index 5f622938821b94..8fab52e1986f81 100644 --- a/src/mono/mono/mini/mini-s390x.h +++ b/src/mono/mono/mini/mini-s390x.h @@ -83,6 +83,7 @@ struct SeqPointInfo { #define MONO_ARCH_HAVE_SETUP_RESUME_FROM_SIGNAL_HANDLER_CTX 1 #define MONO_ARCH_HAVE_UNWIND_BACKTRACE 1 #define MONO_ARCH_FLOAT32_SUPPORTED 1 +#define MONO_ARCH_SIMD_INTRINSICS mono_hwcap_s390x_has_vec #define S390_STACK_ALIGNMENT 8 #define S390_FIRST_ARG_REG s390_r2 @@ -258,6 +259,20 @@ s390_patch_addr (guchar *code, guint64 target) } while (0) /*========================= End of Function ========================*/ +#define S390_LONG_VEC(loc, opy, op, r, off, ix, br) \ + if (s390_is_imm12(off)) { \ + s390_##opy (loc, r, off, ix, br); \ + } else { \ + if (ix == 0) { \ + S390_SET(loc, s390_r13, off); \ + s390_la (loc, s390_r13, s390_r13, br, 0); \ + } else { \ + s390_la (loc, s390_r13, ix, br, 0); \ + S390_SET (loc, s390_r0, off); \ + s390_agr (loc, s390_r13, s390_r0); \ + } \ + s390_##op (loc, r, 0, 0, s390_r13); \ + } #define S390_SET(loc, dr, v) \ do { \ diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index da2f83be0d5571..740e7433385c92 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -406,7 +406,7 @@ emit_simd_ins_for_binary_op (MonoCompile *cfg, MonoClass *klass, MonoMethodSigna if (type_enum_is_float (arg_type)) { instc0 = OP_FMUL; } else { -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_S390X) if (!COMPILE_LLVM (cfg) && (arg_type == MONO_TYPE_I8 || arg_type == MONO_TYPE_U8 || arg_type == MONO_TYPE_I || arg_type == MONO_TYPE_U)) return NULL; #endif @@ -448,7 +448,7 @@ emit_simd_ins_for_binary_op (MonoCompile *cfg, MonoClass *klass, MonoMethodSigna static MonoInst* emit_simd_ins_for_unary_op (MonoCompile *cfg, MonoClass *klass, MonoMethodSignature *fsig, MonoInst **args, MonoTypeEnum arg_type, int id) { -#if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) || defined(TARGET_S390X) int op = -1; switch (id){ case SN_Negate: @@ -557,6 +557,12 @@ emit_xequal (MonoCompile *cfg, MonoClass *klass, MonoTypeEnum element_type, Mono } else { return emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg); } +#elif defined(TARGET_S390X) + MonoInst* cmp = emit_xcompare (cfg, klass, element_type, arg1, arg2); + MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1); + ret->inst_c0 = SIMD_EXTR_ARE_ALL_SET; + ret->inst_c1 = cmp->inst_c0; + return ret; #else MonoInst *ins = emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg); if (!COMPILE_LLVM (cfg)) @@ -686,6 +692,64 @@ get_xconst_int_elem (MonoCompile *cfg, MonoInst *ins, MonoTypeEnum etype, int in } } +#ifdef TARGET_S390X +static int type_to_extract_op (MonoTypeEnum type); +static MonoInst* +emit_sum_vector (MonoCompile *cfg, MonoType *vector_type, MonoTypeEnum element_type, MonoInst *arg) +{ +MonoClass *vector_class = mono_class_from_mono_type_internal (vector_type); + + int op = -1; + MonoInst *tmp = emit_xzero (cfg, vector_class); + MonoInst *ins = arg; + int index = -1; + switch (element_type) { + case MONO_TYPE_R4: + op = -1; + return NULL; + break; + case MONO_TYPE_R8: + op = -1; + return NULL; + break; + case MONO_TYPE_I1: + case MONO_TYPE_U1: + op = OP_VSUMB; + ins = emit_simd_ins (cfg, vector_class, op, ins->dreg,tmp->dreg); + ins = emit_simd_ins (cfg, vector_class, OP_VSUMQF, ins->dreg,tmp->dreg); + index = 16; + break; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + op = OP_VSUMH; + ins = emit_simd_ins (cfg, vector_class, op, ins->dreg,tmp->dreg); + ins = emit_simd_ins (cfg, vector_class, OP_VSUMQF, ins->dreg,tmp->dreg); + index = 8; + break; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + op = OP_VSUMQF; + ins = emit_simd_ins (cfg, vector_class, op, ins->dreg,tmp->dreg); + index = 4; + break; + case MONO_TYPE_I: + case MONO_TYPE_U: + case MONO_TYPE_I8: + case MONO_TYPE_U8: { + op = OP_VSUMQG; + ins = emit_simd_ins (cfg, vector_class, op, ins->dreg,tmp->dreg); + index = 2; + break; + } + default: + return NULL; + } + op = type_to_extract_op(element_type); + ins = emit_simd_ins (cfg, vector_class, op, ins->dreg,-1); + ins->inst_c0 = index - 1; + return ins; +} +#endif #ifdef TARGET_ARM64 static int type_to_extract_op (MonoTypeEnum type); static MonoType* get_vector_t_elem_type (MonoType *vector_type); @@ -1473,7 +1537,7 @@ emit_vector_create_scalar ( if (COMPILE_LLVM (cfg)) { opcode = is_unsafe ? OP_CREATE_SCALAR_UNSAFE : OP_CREATE_SCALAR; } else { -#ifdef TARGET_AMD64 +#if defined(TARGET_AMD64) || defined(TARGET_S390X) MonoInst *ins; ins = emit_xzero (cfg, vklass); @@ -1819,12 +1883,12 @@ emit_dot (MonoCompile *cfg, MonoClass *klass, MonoType *vector_type, MonoTypeEnu #if defined(TARGET_WASM) if (!COMPILE_LLVM (cfg) && (arg0_type == MONO_TYPE_I8 || arg0_type == MONO_TYPE_U8)) return NULL; -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_S390X) if (!COMPILE_LLVM (cfg) && (arg0_type == MONO_TYPE_I8 || arg0_type == MONO_TYPE_U8 || arg0_type == MONO_TYPE_I || arg0_type == MONO_TYPE_U)) return NULL; #endif -#if defined(TARGET_ARM64) || defined(TARGET_WASM) +#if defined(TARGET_ARM64) || defined(TARGET_WASM) || defined(TARGET_S390X) MonoInst *pairwise_multiply = emit_simd_ins (cfg, klass, OP_XBINOP, sreg1, sreg2); pairwise_multiply->inst_c0 = type_enum_is_float (arg0_type) ? OP_FMUL : OP_IMUL; pairwise_multiply->inst_c1 = arg0_type; @@ -1939,7 +2003,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return NULL; // FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_S390X) if (!COMPILE_LLVM (cfg)) { if (vector_size != 128) return NULL; @@ -2022,6 +2086,8 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi } else { return emit_simd_ins_for_sig (cfg, klass, OP_VECTOR_IABS, -1, arg0_type, fsig, args); } +#elif defined(TARGET_S390X) + return emit_simd_ins_for_sig (cfg, klass, OP_VEC_ABS, -1, arg0_type, fsig, args); #else return NULL; #endif @@ -2041,7 +2107,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi if (!is_element_type_primitive (fsig->params [0]) || !is_element_type_primitive (fsig->params [1])) return NULL; -#ifndef TARGET_ARM64 +#if !defined(TARGET_ARM64) && !defined(TARGET_S390X) if (((id == SN_Max) || (id == SN_Min)) && type_enum_is_float(arg0_type)) return NULL; #endif @@ -2084,7 +2150,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return NULL; #ifdef TARGET_ARM64 return emit_simd_ins_for_sig (cfg, klass, OP_ARM64_BIC, -1, arg0_type, fsig, args); -#elif defined(TARGET_AMD64) || defined(TARGET_WASM) +#elif defined(TARGET_AMD64) || defined(TARGET_WASM) || defined(TARGET_S390X) /* Swap lhs and rhs because Vector128 needs lhs & !rhs whereas SSE2 does !lhs & rhs */ MonoInst *tmp = args[0]; @@ -2179,6 +2245,9 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi int ceil_or_floor = id == SN_Ceiling ? 10 : 9; return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_ROUNDP, ceil_or_floor, arg0_type, fsig, args); +#elif defined(TARGET_S390X) + int ceil_or_floor = id == SN_Ceiling ? 6 : 7; + return emit_simd_ins_for_sig (cfg, klass, OP_CEIL_FLOOR, ceil_or_floor, arg0_type, fsig, args); #else return NULL; #endif @@ -2187,9 +2256,9 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi if (!is_element_type_primitive (fsig->params [0])) return NULL; -#if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) || defined(TARGET_S390X) -#if defined(TARGET_AMD64) +#if defined(TARGET_AMD64) || defined(TARGET_S390X) if (!COMPILE_LLVM (cfg)) { MonoInst *val1 = emit_simd_ins (cfg, klass, OP_XBINOP_FORCEINT, args [0]->dreg, args [1]->dreg); val1->inst_c0 = XBINOP_FORCEINT_AND; @@ -2325,7 +2394,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi MonoClass *vklass = mono_class_from_mono_type_internal(fsig->ret); return emit_vector_create_broadcast (cfg, vklass, etype, args [0]); } else if (is_create_from_half_vectors_overload (fsig)) { -#if defined(TARGET_AMD64) +#if defined(TARGET_AMD64) || defined(TARGET_S390X) // Require Vector64 SIMD support if (!COMPILE_LLVM (cfg)) return NULL; @@ -2385,7 +2454,11 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi MonoInst* cmp = emit_xcompare (cfg, arg_class, arg0_type, args [0], args [1]); MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1); ret->inst_c0 = (id == SN_EqualsAll) ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET; - ret->inst_c1 = mono_class_value_size (klass, NULL); +#ifdef TARGET_S390X + ret->inst_c1 = cmp->inst_c0; +#else + ret->inst_c1 = mono_class_value_size (klass, NULL); +#endif return ret; } g_assert_not_reached (); @@ -2517,6 +2590,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return emit_simd_ins_for_sig (cfg, klass, OP_SSE_MOVMSK, -1, type, fsig, args); #endif + return NULL; } case SN_GetElement: { int elems; @@ -2606,7 +2680,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return NULL; int op = id == SN_GetLower ? OP_XLOWER : OP_XUPPER; -#ifdef TARGET_AMD64 +#if defined(TARGET_AMD64) || defined(TARGET_S390X) if (!COMPILE_LLVM (cfg)) /* These return a Vector64 */ return NULL; @@ -2669,7 +2743,11 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi MonoInst* cmp = emit_xcompare_for_intrinsic (cfg, arg_class, id, arg0_type, args [0], args [1]); MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1); ret->inst_c0 = is_all ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET; +#if defined(TARGET_S390X) + ret->inst_c1 = cmp->inst_c0; +#else ret->inst_c1 = mono_class_value_size (klass, NULL); +#endif return ret; } } @@ -2734,7 +2812,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi if (!type_enum_is_float(arg0_type)) return emit_xzero (cfg, klass); int op = -1; -#if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) || defined(TARGET_S390X) op = OP_ONES_COMPLEMENT; #endif if (op == -1) @@ -2984,6 +3062,20 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi } return NULL; +#elif defined(TARGET_S390X) + switch (arg0_type) { + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return emit_simd_ins_for_sig (cfg, klass, OP_VPKH, -1, -1, fsig, args); + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return emit_simd_ins_for_sig (cfg, klass, OP_VPKF, -1, -1, fsig, args); + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return emit_simd_ins_for_sig (cfg, klass, OP_VPKG, -1, -1, fsig, args); + } + + return NULL; #else return NULL; #endif @@ -3140,7 +3232,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi case SN_Sum: { if (!is_element_type_primitive (fsig->params [0])) return NULL; -#if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) || defined(TARGET_S390X) return emit_sum_vector (cfg, fsig->params [0], arg0_type, args [0]); #else return NULL; @@ -3157,6 +3249,9 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi int instc0 = arg0_type == MONO_TYPE_R4 ? INTRINS_SIMD_SQRT_R4 : INTRINS_SIMD_SQRT_R8; return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X, instc0, arg0_type, fsig, args); +#elif defined(TARGET_S390X) + int instc0 = arg0_type == MONO_TYPE_R4 ? OP_VFSQSB : OP_VFSQDB; + return emit_simd_ins_for_sig (cfg, klass, instc0, 0, arg0_type, fsig, args); #else return NULL; #endif @@ -3240,7 +3335,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException"); gboolean use_xextract; -#ifdef TARGET_AMD64 +#if defined(TARGET_AMD64) || defined(TARGET_S390X) use_xextract = FALSE; #else use_xextract = type_to_width_log2 (arg0_type) == 3; @@ -3314,13 +3409,51 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi #elif defined(TARGET_AMD64) // FIXME: return NULL; +#elif defined(TARGET_S390X) + if (id == SN_WidenLower) + { + switch (arg0_type){ + case MONO_TYPE_I1: + return emit_simd_ins (cfg, klass, OP_VUPHB, args[0]->dreg, -1); + case MONO_TYPE_U1: + return emit_simd_ins (cfg, klass, OP_VUPLHB, args[0]->dreg, -1); + case MONO_TYPE_I2: + return emit_simd_ins (cfg, klass, OP_VUPHH, args[0]->dreg, -1); + case MONO_TYPE_U2: + return emit_simd_ins (cfg, klass, OP_VUPLHH, args[0]->dreg, -1); + case MONO_TYPE_I4: + return emit_simd_ins (cfg, klass, OP_VUPHF, args[0]->dreg, -1); + case MONO_TYPE_U4: + return emit_simd_ins (cfg, klass, OP_VUPLHF, args[0]->dreg, -1); + default: + return NULL; + } + } + else{ + switch (arg0_type){ + case MONO_TYPE_I1: + return emit_simd_ins (cfg, klass, OP_VUPLB, args[0]->dreg, -1); + case MONO_TYPE_U1: + return emit_simd_ins (cfg, klass, OP_VUPLLB, args[0]->dreg, -1); + case MONO_TYPE_I2: + return emit_simd_ins (cfg, klass, OP_VUPLH, args[0]->dreg, -1); + case MONO_TYPE_U2: + return emit_simd_ins (cfg, klass, OP_VUPLLH, args[0]->dreg, -1); + case MONO_TYPE_I4: + return emit_simd_ins (cfg, klass, OP_VUPLF, args[0]->dreg, -1); + case MONO_TYPE_U4: + return emit_simd_ins (cfg, klass, OP_VUPLLF, args[0]->dreg, -1); + default: + return NULL; + } + } #else return NULL; #endif } case SN_WithLower: case SN_WithUpper: { -#ifdef TARGET_AMD64 +#if defined(TARGET_AMD64) || defined(TARGET_S390X) if (!COMPILE_LLVM (cfg)) /* These return a Vector64 */ return NULL; @@ -3468,7 +3601,7 @@ emit_sri_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f return NULL; #endif -#ifdef TARGET_AMD64 +#if defined(TARGET_AMD64) || defined(TARGET_S390X) if (!COMPILE_LLVM (cfg) && (size != 16)) return NULL; if (size != 16) @@ -3489,6 +3622,33 @@ emit_sri_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f g_assert (sizeof (buf) >= size); memset (buf, 0, sizeof (buf)); +#ifdef TARGET_S390X + /* we directly emit vrepi*/ + if (etype->type != MONO_TYPE_R4 && etype->type != MONO_TYPE_R8) + return emit_simd_ins_for_sig (cfg, klass, OP_VEC_ONE, etype->type, 1, fsig, args); + switch (etype->type){ + case MONO_TYPE_R4:{ + float *value = (float*)buf; + + for (int i = 0; i < len; ++i) { + value [i] = 1.0f; + } + + return emit_xconst_v128 (cfg, klass, (guint8*)value); + } + case MONO_TYPE_R8:{ + double *value = (double*)buf; + + for (int i = 0; i < len; ++i) { + value [i] = 1.0; + } + + return emit_xconst_v128 (cfg, klass, (guint8*)value); + + } + } +#else + switch (etype->type) { case MONO_TYPE_I1: case MONO_TYPE_U1: { @@ -3559,6 +3719,7 @@ emit_sri_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f default: g_assert_not_reached (); } +#endif } case SN_op_Addition: case SN_op_BitwiseAnd: @@ -6688,7 +6849,6 @@ emit_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi // If cmethod->klass is nested, the namespace is on the enclosing class. if (m_class_get_nested_in (cmethod->klass)) class_ns = m_class_get_name_space (m_class_get_nested_in (cmethod->klass)); - MonoInst *simd_inst = ecb (class_ns, class_name, cfg, cmethod, fsig, args); if (simd_inst) cfg->uses_simd_intrinsics = TRUE; @@ -6713,7 +6873,7 @@ mono_emit_common_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSi * for function arguments. When using SIMD intrinsics arguments optimized into OP_ARG needs to be decomposed * into correspondig SIMD LOADX/STOREX instructions. */ -#if defined(TARGET_WIN32) && defined(TARGET_AMD64) +#if (defined(TARGET_WIN32) && defined(TARGET_AMD64)) || defined(TARGET_S390X) static gboolean decompose_vtype_opt_uses_simd_intrinsics (MonoCompile *cfg, MonoInst *ins) { @@ -6725,9 +6885,11 @@ decompose_vtype_opt_uses_simd_intrinsics (MonoCompile *cfg, MonoInst *ins) case OP_XZERO: case OP_XPHI: case OP_LOADX_MEMBASE: +#ifndef TARGET_S390X case OP_LOADX_ALIGNED_MEMBASE: - case OP_STOREX_MEMBASE: case OP_STOREX_ALIGNED_MEMBASE_REG: +#endif + case OP_STOREX_MEMBASE: return TRUE; default: return FALSE; From bc1d70105ad01d003319b00eb44214b5bcd8c481 Mon Sep 17 00:00:00 2001 From: saitama951 Date: Thu, 3 Jul 2025 14:16:10 +0000 Subject: [PATCH 2/4] Address Review Comments * remove SIY_1 (duplicacy) * remove locgrnle and locghinle use locghiho instead * reformat whole patch * remove vflc use vfpso instead * move common op's to a common ifdef in mini-ops * remove NEW_INS , it's used no-where * rewrite the whole logic for Vector conditional ops for floats * update ANDN with vnc instruction * add couple of comments * remove some pseudo op in simd-intrinsics * add aligned loads and stores --- src/mono/mono/arch/s390x/s390x-codegen.h | 308 ++-- src/mono/mono/mini/cpu-s390x.mdesc | 12 +- src/mono/mono/mini/mini-ops.h | 19 +- src/mono/mono/mini/mini-s390x.c | 1805 +++++++++++----------- src/mono/mono/mini/mini-s390x.h | 31 +- src/mono/mono/mini/simd-intrinsics.c | 312 ++-- 6 files changed, 1274 insertions(+), 1213 deletions(-) diff --git a/src/mono/mono/arch/s390x/s390x-codegen.h b/src/mono/mono/arch/s390x/s390x-codegen.h index e0c944e3173243..c91335ab5e6f09 100644 --- a/src/mono/mono/arch/s390x/s390x-codegen.h +++ b/src/mono/mono/arch/s390x/s390x-codegen.h @@ -943,14 +943,7 @@ typedef struct { s390_emit16(c, ((i) & 0xffff)); \ } while (0) -#define S390_SIY(c,opc,s1,p1,m2) do \ -{ \ - s390_emit16(c, ((opc & 0xff00) | m2)); \ - s390_emit32(c, ((s1) << 28 | (((p1) & 0xfffff) << 8) | \ - (opc & 0xff))); \ -} while (0) - -#define S390_SIY_1(c,opc,d1,b1,i2) do \ +#define S390_SIY(c,opc,d1,b1,i2) do \ { \ s390_emit16(c, ((opc & 0xff00) | i2)); \ s390_emit32(c, ((b1) << 28 | (((d1) & 0xfff) << 16) | \ @@ -1083,7 +1076,7 @@ typedef struct { char rxb = (((v1) > 15) << 3) | (((v2) > 15) << 2) | \ (((v3) > 15) << 5) | (((v4) > 15) << 4); \ int vr1 = ((v1) % 16), vr2 = ((v2) % 16), \ - vr3 = ((v3) % 16); vr4 = ((v4) % 16); \ + vr3 = ((v3) % 16), vr4 = ((v4) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | (vr2)); \ s390_emit16(c, ((vr3 << 12)| ((m6) << 8)) | ((m5) << 4)); \ s390_emit16(c, ((vr4 << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ @@ -1187,13 +1180,13 @@ typedef struct { #define s390_alghsik(c, r1, r3, v) S390_RIE_1(c, 0xecdb, r1, r3, v) #define s390_algr(c, r1, r2) S390_RRE(c, 0xb90a, r1, r2) #define s390_algrk(c, r1, r2, r3) S390_RRF_1(c, 0xb9ea, r1, r2, r3) -#define s390_algsi(c, d1, b1, i2) S390_SIY_1(c, 0xeb7e, d1, b1, i2) +#define s390_algsi(c, d1, b1, i2) S390_SIY(c, 0xeb7e, d1, b1, i2) #define s390_alhhhr(c, r1, r2, r3) S390_RRF_1(c, 0xb9ca, r1, r2, r3) #define s390_alhhlr(c, r1, r2, r3) S390_RRF_1(c, 0xb9da, r1, r2, r3) #define s390_alhsik(c, r1, r3, v) S390_RIE_1(c, 0xecda, r1, r3, v) #define s390_alr(c, r1, r2) S390_RR(c, 0x1e, r1, r2) #define s390_alrk(c, r1, r2) S390_RRF(c, 0xb9fa, r1, r2) -#define s390_alsi(c, d1, b1, i2) S390_SIY_1(c, 0xeb6e, d1, b1, i2) +#define s390_alsi(c, d1, b1, i2) S390_SIY(c, 0xeb6e, d1, b1, i2) #define s390_alsih(c, r, v) S390_RIL_1(c, 0xcca, r, v) #define s390_alsihn(c, r, v) S390_RIL_1(c, 0xccb, r, v) #define s390_aly(c, r, x, b, d) S390_RXY(c, 0xe35e, r, x, b, d) @@ -1411,7 +1404,6 @@ typedef struct { #define s390_lochi(c, r1, i, m) S390_RIE_1(c, 0xec42, r1, m, i) #define s390_locghi(c, r1, i, m) S390_RIE_1(c, 0xec46, r1, m, i) #define s390_locghile(c, r1, i) s390_locghi(c, r1, i, S390_CC_LT|S390_CC_EQ) -#define s390_locghinle(c, r1, i) s390_locghi(c, r1, i, 3) #define s390_locghihe(c, r1, i) s390_locghi(c, r1, i, S390_CC_GT|S390_CC_EQ) #define s390_locghine(c, r1, i) s390_locghi(c, r1, i, S390_CC_NZ) #define s390_locghiho(c, r1, i) s390_locghi(c, r1, i, S390_CC_GT|S390_CC_OV) @@ -1422,7 +1414,6 @@ typedef struct { #define s390_locr(c, r1, m, r2) S390_RRF_2(c, 0xb9f2, r1, m, r2) #define s390_locgr(c, r1, m, r2) S390_RRF_2(c, 0xb9e2, r1, m, r2) #define s390_locgrle(c, r1, r2) s390_locgr(c, r1, S390_CC_LT|S390_CC_EQ, r2) -#define s390_locgrnle(c, r1, r2) s390_locgr(c, r1, 3, r2) #define s390_locgrhe(c, r1, r2) s390_locgr(c, r1, S390_CC_GT|S390_CC_EQ, r2) #define s390_locgrne(c, r1, r2) s390_locgr(c, r1, S390_CC_NZ, r2) #define s390_locgrho(c, r1, r2) s390_locgr(c, r1, S390_CC_GT|S390_CC_OV, r2) @@ -1571,151 +1562,152 @@ typedef struct { #define s390_tmlh(c, r, m) S390_RI(c, 0xa70, r, m) #define s390_tmll(c, r, m) S390_RI(c, 0xa71, r, m) #define s390_tm(c, b, d, v) S390_SI(c, 0x91, b, d, v) -#define s390_trap2(code) S390_E(code, 0x01ff) -#define s390_vab(c , v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 0, 0, 0) -#define s390_vah(c , v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 1, 0, 0) -#define s390_vaf(c , v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 2, 0, 0) -#define s390_vag(c , v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 3, 0, 0) -#define s390_vaq(c , v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 4, 0, 0) -#define s390_vceqbs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 0, 1 ) -#define s390_vceqhs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 1, 1 ) -#define s390_vceqfs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 2, 1 ) -#define s390_vceqgs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 3, 1 ) -#define s390_vchbs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 0, 1) -#define s390_vchhs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 1, 1) -#define s390_vchfs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 2, 1) -#define s390_vchgs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 3, 1) -#define s390_vchlbs(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 0, 1) -#define s390_vchlhs(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 1, 1) -#define s390_vchlfs(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 2, 1) -#define s390_vchlgs(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 3, 1) -#define s390_vecb(c, v1, v2) S390_VRRa(c, 0xe7db, v1, v2, 0, 0, 0) -#define s390_vech(c, v1, v2) S390_VRRa(c, 0xe7db, v1, v2, 1, 0, 0) -#define s390_vecf(c, v1, v2) S390_VRRa(c, 0xe7db, v1, v2, 2, 0, 0) -#define s390_vecg(c, v1, v2) S390_VRRa(c, 0xe7db, v1, v2, 3, 0, 0) -#define s390_veclb(c, v1, v2) S390_VRRa(c, 0xe7d9, v1, v2, 0, 0, 0) -#define s390_veclh(c, v1, v2) S390_VRRa(c, 0xe7d9, v1, v2, 1, 0, 0) -#define s390_veclf(c, v1, v2) S390_VRRa(c, 0xe7d9, v1, v2, 2, 0, 0) -#define s390_veclg(c, v1, v2) S390_VRRa(c, 0xe7d9, v1, v2, 3, 0, 0) -#define s390_vfasb(c, v1, v2, v3) S390_VRRc(c, 0xe7e3, v1, v2, v3, 2, 0, 0) -#define s390_vfadb(c, v1, v2, v3) S390_VRRc(c, 0xe7e3, v1, v2, v3, 3, 0, 0) -#define s390_vfcesbs(c, v1, v2, v3) S390_VRRc(c, 0xe7e8, v1, v2, v3, 2, 0, 1) -#define s390_vfcedbs(c, v1, v2, v3) S390_VRRc(c, 0xe7e8, v1, v2, v3, 3, 0, 1) -#define s390_vfchsbs(c, v1, v2, v3) S390_VRRc(c, 0xe7eb, v1, v2, v3, 2, 0, 1) -#define s390_vfchdbs(c, v1, v2, v3) S390_VRRc(c, 0xe7eb, v1, v2, v3, 3, 0, 1) -#define s390_vfdsb(c, v1, v2, v3) S390_VRRc(c, 0xe7e5, v1, v2, v3, 2, 0, 0) -#define s390_vfddb(c, v1, v2, v3) S390_VRRc(c, 0xe7e5, v1, v2, v3, 3, 0, 0) -#define s390_vfisb(c, v1, v2, m4, m5) S390_VRRa(c, 0xe7c7, v1, v2, 2, m4, m5) -#define s390_vfidb(c, v1, v2, m4, m5) S390_VRRa(c, 0xe7c7, v1, v2, 3, m4, m5) -#define s390_vfmsb(c, v1, v2, v3) S390_VRRc(c, 0xe7e7, v1, v2, v3, 2, 0, 0) -#define s390_vfmdb(c, v1, v2, v3) S390_VRRc(c, 0xe7e7, v1, v2, v3, 3, 0, 0) -#define s390_vfmaxsb(c, v1, v2, v3, m6) S390_VRRc(c, 0xe7ef, v1, v2, v3, 2, 0, m6) -#define s390_vfmaxdb(c, v1, v2, v3, m6) S390_VRRc(c, 0xe7ef, v1, v2, v3, 3, 0, m6) -#define s390_vfminsb(c, v1, v2, v3, m6) S390_VRRc(c, 0xe7ee, v1, v2, v3, 2, 0, m6) -#define s390_vfmindb(c, v1, v2, v3, m6) S390_VRRc(c, 0xe7ee, v1, v2, v3, 3, 0, m6) -#define s390_vflcdb(c, v1, v2) S390_VRRa(c, 0xe7cc, v1, v2, 3, 0, 0) -#define s390_vflcsb(c, v1, v2) S390_VRRa(c, 0xe7cc, v1, v2, 2, 0, 0) -#define s390_vfpsosb(c, v1, v2, m5) S390_VRRa(c, 0xe7cc, v1, v2, 2, 0, m5) -#define s390_vfpsodb(c, v1, v2, m5) S390_VRRa(c, 0xe7cc, v1, v2, 3, 0, m5) -#define s390_vfssb(c, v1, v2, v3) S390_VRRc(c, 0xe7e2, v1, v2, v3, 2, 0, 0) -#define s390_vfsdb(c, v1, v2, v3) S390_VRRc(c, 0xe7e2, v1, v2, v3, 3, 0, 0) -#define s390_vfsqsb(c, v1, v2) S390_VRRa(c, 0xe7ce, v1, v2, 2, 0, 0) -#define s390_vfsqdb(c, v1, v2) S390_VRRa(c, 0xe7ce, v1, v2, 3, 0, 0) -#define s390_vgbm(c, v, i) S390_VRIa(c, 0xe744, v, i, 0) -#define s390_vgmb(c, v1, i2, i3) S390_VRIb(c, 0xe746, v1, i2, i3, 0) -#define s390_vgmh(c, v1, i2, i3) S390_VRIb(c, 0xe746, v1, i2, i3, 1) -#define s390_vgmf(c, v1, i2, i3) S390_VRIb(c, 0xe746, v1, i2, i3, 2) -#define s390_vgmg(c, v1, i2, i3) S390_VRIb(c, 0xe746, v1, i2, i3, 3) -#define s390_vmlb(c, v1, v2, v3) S390_VRRc(c, 0xe7a2, v1, v2, v3, 0, 0, 0) -#define s390_vmlhw(c, v1, v2, v3) S390_VRRc(c, 0xe7a2, v1, v2, v3, 1, 0, 0) -#define s390_vmlf(c, v1, v2, v3) S390_VRRc(c, 0xe7a2, v1, v2, v3, 2, 0, 0) -#define s390_vmnb(c, v1, v2, v3) S390_VRRc(c, 0xe7fe, v1, v2, v3, 0, 0, 0) -#define s390_vmnh(c, v1, v2, v3) S390_VRRc(c, 0xe7fe, v1, v2, v3, 1, 0, 0) -#define s390_vmnf(c, v1, v2, v3) S390_VRRc(c, 0xe7fe, v1, v2, v3, 2, 0, 0) -#define s390_vmng(c, v1, v2, v3) S390_VRRc(c, 0xe7fe, v1, v2, v3, 3, 0, 0) -#define s390_vmnlb(c, v1, v2, v3) S390_VRRc(c, 0xe7fc, v1, v2, v3, 0, 0, 0) -#define s390_vmnlh(c, v1, v2, v3) S390_VRRc(c, 0xe7fc, v1, v2, v3, 1, 0, 0) -#define s390_vmnlf(c, v1, v2, v3) S390_VRRc(c, 0xe7fc, v1, v2, v3, 2, 0, 0) -#define s390_vmnlg(c, v1, v2, v3) S390_VRRc(c, 0xe7fc, v1, v2, v3, 3, 0, 0) -#define s390_vmxb(c, v1, v2, v3) S390_VRRc(c, 0xe7ff, v1, v2, v3, 0, 0, 0) -#define s390_vmxh(c, v1, v2, v3) S390_VRRc(c, 0xe7ff, v1, v2, v3, 1, 0, 0) -#define s390_vmxf(c, v1, v2, v3) S390_VRRc(c, 0xe7ff, v1, v2, v3, 2, 0, 0) -#define s390_vmxg(c, v1, v2, v3) S390_VRRc(c, 0xe7ff, v1, v2, v3, 3, 0, 0) -#define s390_vmxlb(c, v1, v2, v3) S390_VRRc(c, 0xe7fd, v1, v2, v3, 0, 0, 0) -#define s390_vmxlh(c, v1, v2, v3) S390_VRRc(c, 0xe7fd, v1, v2, v3, 1, 0, 0) -#define s390_vmxlf(c, v1, v2, v3) S390_VRRc(c, 0xe7fd, v1, v2, v3, 2, 0, 0) -#define s390_vmxlg(c, v1, v2, v3) S390_VRRc(c, 0xe7fd, v1, v2, v3, 3, 0, 0) -#define s390_vn(c, v1, v2, v3) S390_VRRc(c, 0xe768, v1, v2, v3, 0, 0, 0) -#define s390_vnn(c, v1, v2, v3) S390_VRRc(c, 0xe76e, v1, v2, v3, 0, 0, 0) -#define s390_vno(c, v1, v2, v3) S390_VRRc(c, 0xe76b, v1, v2, v3, 0, 0, 0) -#define s390_vo(c, v1, v2, v3) S390_VRRc(c, 0xe76a, v1, v2, v3, 0, 0, 0) -#define s390_vl(c, v, d, x, b) S390_VRX(c, 0xe706, v, x, b, d, 0) -#define s390_vlcb(c, v1, v2) S390_VRRa(c, 0xe7de, v1, v2, 0, 0, 0) -#define s390_vlch(c, v1, v2) S390_VRRa(c, 0xe7de, v1, v2, 1, 0, 0) -#define s390_vlcf(c, v1, v2) S390_VRRa(c, 0xe7de, v1, v2, 2, 0, 0) -#define s390_vlcg(c, v1, v2) S390_VRRa(c, 0xe7de, v1, v2, 3, 0, 0) -#define s390_vleg(c, v, d, x, b, m) S390_VRX(c, 0xe702, v, x, b, d, m) -#define s390_vleib(c, v1, i2, m3) S390_VRIa(c, 0xe740, v1, i2, m3) -#define s390_vleih(c, v1, i2, m3) S390_VRIa(c, 0xe741, v1, i2, m3) -#define s390_vleif(c, v1, i2, m3) S390_VRIa(c, 0xe743, v1, i2, m3) -#define s390_vleig(c, v1, i2, m3) S390_VRIa(c, 0xe742, v1, i2, m3) -#define s390_vlgvb(c, r1, v2, d3, b4) S390_VRSc(c, 0xe721, r1, v2, d3, b4, 0) -#define s390_vlgvh(c, r1, v2, d3, b4) S390_VRSc(c, 0xe721, r1, v2, d3, b4, 1) -#define s390_vlgvf(c, r1, v2, d3, b4) S390_VRSc(c, 0xe721, r1, v2, d3, b4, 2) -#define s390_vlgvg(c, r1, v2, d3, b4) S390_VRSc(c, 0xe721, r1, v2, d3, b4, 3) +#define s390_trap2(code) S390_E(code, 0x01ff) +#define s390_vab(c, v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 0, 0, 0) +#define s390_vah(c, v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 1, 0, 0) +#define s390_vaf(c, v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 2, 0, 0) +#define s390_vag(c, v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 3, 0, 0) +#define s390_vaq(c, v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 4, 0, 0) +#define s390_vceqbs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 0, 1 ) +#define s390_vceqhs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 1, 1 ) +#define s390_vceqfs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 2, 1 ) +#define s390_vceqgs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 3, 1 ) +#define s390_vchbs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 0, 1) +#define s390_vchhs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 1, 1) +#define s390_vchfs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 2, 1) +#define s390_vchgs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 3, 1) +#define s390_vchlbs(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 0, 1) +#define s390_vchlhs(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 1, 1) +#define s390_vchlfs(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 2, 1) +#define s390_vchlgs(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 3, 1) +#define s390_vecb(c, v1, v2) S390_VRRa(c, 0xe7db, v1, v2, 0, 0, 0) +#define s390_vech(c, v1, v2) S390_VRRa(c, 0xe7db, v1, v2, 1, 0, 0) +#define s390_vecf(c, v1, v2) S390_VRRa(c, 0xe7db, v1, v2, 2, 0, 0) +#define s390_vecg(c, v1, v2) S390_VRRa(c, 0xe7db, v1, v2, 3, 0, 0) +#define s390_veclb(c, v1, v2) S390_VRRa(c, 0xe7d9, v1, v2, 0, 0, 0) +#define s390_veclh(c, v1, v2) S390_VRRa(c, 0xe7d9, v1, v2, 1, 0, 0) +#define s390_veclf(c, v1, v2) S390_VRRa(c, 0xe7d9, v1, v2, 2, 0, 0) +#define s390_veclg(c, v1, v2) S390_VRRa(c, 0xe7d9, v1, v2, 3, 0, 0) +#define s390_vfasb(c, v1, v2, v3) S390_VRRc(c, 0xe7e3, v1, v2, v3, 2, 0, 0) +#define s390_vfadb(c, v1, v2, v3) S390_VRRc(c, 0xe7e3, v1, v2, v3, 3, 0, 0) +#define s390_vfcesbs(c, v1, v2, v3) S390_VRRc(c, 0xe7e8, v1, v2, v3, 2, 0, 1) +#define s390_vfcedbs(c, v1, v2, v3) S390_VRRc(c, 0xe7e8, v1, v2, v3, 3, 0, 1) +#define s390_vfchsbs(c, v1, v2, v3) S390_VRRc(c, 0xe7eb, v1, v2, v3, 2, 0, 1) +#define s390_vfchdbs(c, v1, v2, v3) S390_VRRc(c, 0xe7eb, v1, v2, v3, 3, 0, 1) +#define s390_vfchesbs(c, v1, v2, v3) S390_VRRc(c, 0xe7ea, v1, v2, v3, 2, 0, 1) +#define s390_vfchedbs(c, v1, v2, v3) S390_VRRc(c, 0xe7ea, v1, v2, v3, 3, 0, 1) +#define s390_vfdsb(c, v1, v2, v3) S390_VRRc(c, 0xe7e5, v1, v2, v3, 2, 0, 0) +#define s390_vfddb(c, v1, v2, v3) S390_VRRc(c, 0xe7e5, v1, v2, v3, 3, 0, 0) +#define s390_vfisb(c, v1, v2, m4, m5) S390_VRRa(c, 0xe7c7, v1, v2, 2, m4, m5) +#define s390_vfidb(c, v1, v2, m4, m5) S390_VRRa(c, 0xe7c7, v1, v2, 3, m4, m5) +#define s390_vfmsb(c, v1, v2, v3) S390_VRRc(c, 0xe7e7, v1, v2, v3, 2, 0, 0) +#define s390_vfmdb(c, v1, v2, v3) S390_VRRc(c, 0xe7e7, v1, v2, v3, 3, 0, 0) +#define s390_vfmaxsb(c, v1, v2, v3, m6) S390_VRRc(c, 0xe7ef, v1, v2, v3, 2, 0, m6) +#define s390_vfmaxdb(c, v1, v2, v3, m6) S390_VRRc(c, 0xe7ef, v1, v2, v3, 3, 0, m6) +#define s390_vfminsb(c, v1, v2, v3, m6) S390_VRRc(c, 0xe7ee, v1, v2, v3, 2, 0, m6) +#define s390_vfmindb(c, v1, v2, v3, m6) S390_VRRc(c, 0xe7ee, v1, v2, v3, 3, 0, m6) +#define s390_vfpsosb(c, v1, v2, m5) S390_VRRa(c, 0xe7cc, v1, v2, 2, 0, m5) +#define s390_vfpsodb(c, v1, v2, m5) S390_VRRa(c, 0xe7cc, v1, v2, 3, 0, m5) +#define s390_vfssb(c, v1, v2, v3) S390_VRRc(c, 0xe7e2, v1, v2, v3, 2, 0, 0) +#define s390_vfsdb(c, v1, v2, v3) S390_VRRc(c, 0xe7e2, v1, v2, v3, 3, 0, 0) +#define s390_vfsqsb(c, v1, v2) S390_VRRa(c, 0xe7ce, v1, v2, 2, 0, 0) +#define s390_vfsqdb(c, v1, v2) S390_VRRa(c, 0xe7ce, v1, v2, 3, 0, 0) +#define s390_vgbm(c, v1, i2) S390_VRIa(c, 0xe744, v1, i2, 0) +#define s390_vgmb(c, v1, i2, i3) S390_VRIb(c, 0xe746, v1, i2, i3, 0) +#define s390_vgmh(c, v1, i2, i3) S390_VRIb(c, 0xe746, v1, i2, i3, 1) +#define s390_vgmf(c, v1, i2, i3) S390_VRIb(c, 0xe746, v1, i2, i3, 2) +#define s390_vgmg(c, v1, i2, i3) S390_VRIb(c, 0xe746, v1, i2, i3, 3) +#define s390_vmlb(c, v1, v2, v3) S390_VRRc(c, 0xe7a2, v1, v2, v3, 0, 0, 0) +#define s390_vmlhw(c, v1, v2, v3) S390_VRRc(c, 0xe7a2, v1, v2, v3, 1, 0, 0) +#define s390_vmlf(c, v1, v2, v3) S390_VRRc(c, 0xe7a2, v1, v2, v3, 2, 0, 0) +#define s390_vmnb(c, v1, v2, v3) S390_VRRc(c, 0xe7fe, v1, v2, v3, 0, 0, 0) +#define s390_vmnh(c, v1, v2, v3) S390_VRRc(c, 0xe7fe, v1, v2, v3, 1, 0, 0) +#define s390_vmnf(c, v1, v2, v3) S390_VRRc(c, 0xe7fe, v1, v2, v3, 2, 0, 0) +#define s390_vmng(c, v1, v2, v3) S390_VRRc(c, 0xe7fe, v1, v2, v3, 3, 0, 0) +#define s390_vmnlb(c, v1, v2, v3) S390_VRRc(c, 0xe7fc, v1, v2, v3, 0, 0, 0) +#define s390_vmnlh(c, v1, v2, v3) S390_VRRc(c, 0xe7fc, v1, v2, v3, 1, 0, 0) +#define s390_vmnlf(c, v1, v2, v3) S390_VRRc(c, 0xe7fc, v1, v2, v3, 2, 0, 0) +#define s390_vmnlg(c, v1, v2, v3) S390_VRRc(c, 0xe7fc, v1, v2, v3, 3, 0, 0) +#define s390_vmxb(c, v1, v2, v3) S390_VRRc(c, 0xe7ff, v1, v2, v3, 0, 0, 0) +#define s390_vmxh(c, v1, v2, v3) S390_VRRc(c, 0xe7ff, v1, v2, v3, 1, 0, 0) +#define s390_vmxf(c, v1, v2, v3) S390_VRRc(c, 0xe7ff, v1, v2, v3, 2, 0, 0) +#define s390_vmxg(c, v1, v2, v3) S390_VRRc(c, 0xe7ff, v1, v2, v3, 3, 0, 0) +#define s390_vmxlb(c, v1, v2, v3) S390_VRRc(c, 0xe7fd, v1, v2, v3, 0, 0, 0) +#define s390_vmxlh(c, v1, v2, v3) S390_VRRc(c, 0xe7fd, v1, v2, v3, 1, 0, 0) +#define s390_vmxlf(c, v1, v2, v3) S390_VRRc(c, 0xe7fd, v1, v2, v3, 2, 0, 0) +#define s390_vmxlg(c, v1, v2, v3) S390_VRRc(c, 0xe7fd, v1, v2, v3, 3, 0, 0) +#define s390_vn(c, v1, v2, v3) S390_VRRc(c, 0xe768, v1, v2, v3, 0, 0, 0) +#define s390_vnc(c, v1, v2, v3) S390_VRRc(c, 0xe769, v1, v2, v3, 0, 0, 0) +#define s390_vnn(c, v1, v2, v3) S390_VRRc(c, 0xe76e, v1, v2, v3, 0, 0, 0) +#define s390_vno(c, v1, v2, v3) S390_VRRc(c, 0xe76b, v1, v2, v3, 0, 0, 0) +#define s390_vo(c, v1, v2, v3) S390_VRRc(c, 0xe76a, v1, v2, v3, 0, 0, 0) +#define s390_vl(c, v, x, b, d) S390_VRX(c, 0xe706, v, x, b, d, 0) +#define s390_vlcb(c, v1, v2) S390_VRRa(c, 0xe7de, v1, v2, 0, 0, 0) +#define s390_vlch(c, v1, v2) S390_VRRa(c, 0xe7de, v1, v2, 1, 0, 0) +#define s390_vlcf(c, v1, v2) S390_VRRa(c, 0xe7de, v1, v2, 2, 0, 0) +#define s390_vlcg(c, v1, v2) S390_VRRa(c, 0xe7de, v1, v2, 3, 0, 0) +#define s390_vleg(c, v, d, x, b, m) S390_VRX(c, 0xe702, v, x, b, d, m) +#define s390_vleib(c, v1, i2, m3) S390_VRIa(c, 0xe740, v1, i2, m3) +#define s390_vleih(c, v1, i2, m3) S390_VRIa(c, 0xe741, v1, i2, m3) +#define s390_vleif(c, v1, i2, m3) S390_VRIa(c, 0xe743, v1, i2, m3) +#define s390_vleig(c, v1, i2, m3) S390_VRIa(c, 0xe742, v1, i2, m3) +#define s390_vlgvb(c, r1, v2, d3, b4) S390_VRSc(c, 0xe721, r1, v2, d3, b4, 0) +#define s390_vlgvh(c, r1, v2, d3, b4) S390_VRSc(c, 0xe721, r1, v2, d3, b4, 1) +#define s390_vlgvf(c, r1, v2, d3, b4) S390_VRSc(c, 0xe721, r1, v2, d3, b4, 2) +#define s390_vlgvg(c, r1, v2, d3, b4) S390_VRSc(c, 0xe721, r1, v2, d3, b4, 3) #define s390_vlm(c, v1, v2, b, d, m) S390_VRSa(c, 0xe736, v1, v2, b, d, m) -#define s390_vlpb(c, v1, v2) S390_VRRa(c, 0xe7df, v1, v2, 0, 0, 0) -#define s390_vlph(c, v1, v2) S390_VRRa(c, 0xe7df, v1, v2, 1, 0, 0) -#define s390_vlpf(c, v1, v2) S390_VRRa(c, 0xe7df, v1, v2, 2, 0, 0) -#define s390_vlpg(c, v1, v2) S390_VRRa(c, 0xe7df, v1, v2, 3, 0, 0) -#define s390_vlr(c, v1, v2) S390_VRRa(c,0xe756, v1, v2, 0, 0, 0) -#define s390_vlvgb(c, v1, r2, d3, b4) S390_VRSb(c, 0xe722, v1, r2, d3, b4, 0) -#define s390_vlvgh(c, v1, r2, d3, b4) S390_VRSb(c, 0xe722, v1, r2, d3, b4, 1) -#define s390_vlvgf(c, v1, r2, d3, b4) S390_VRSb(c, 0xe722, v1, r2, d3, b4, 2) -#define s390_vlvgg(c, v1, r2, d3, b4) S390_VRSb(c, 0xe722, v1, r2, d3, b4, 3) -#define s390_vperm(c, v1, v2, v3, v4) S390_VRRe(c, 0xe78c, v1, v2, v3, v4, 0, 0); -#define s390_vpkh(c, v1, v2, v3) S390_VRRc(c, 0xe794, v1, v2, v3, 1, 0, 0) -#define s390_vpkf(c, v1, v2, v3) S390_VRRc(c, 0xe794, v1, v2, v3, 2, 0, 0) -#define s390_vpkg(c, v1, v2, v3) S390_VRRc(c, 0xe794, v1, v2, v3, 3, 0, 0) -#define s390_vrepb(c, v1, v3, i2) S390_VRIc(c, 0xe74d, v1 ,v3, i2, 0) -#define s390_vreph(c, v1, v3, i2) S390_VRIc(c, 0xe74d, v1 ,v3, i2, 1) -#define s390_vrepf(c, v1, v3, i2) S390_VRIc(c, 0xe74d, v1 ,v3, i2, 2) -#define s390_vrepg(c, v1, v3, i2) S390_VRIc(c, 0xe74d, v1 ,v3, i2, 3) -#define s390_vrepib(c, v1, i2) S390_VRIa(c, 0xe745, v1, i2, 0) -#define s390_vrepih(c, v1, i2) S390_VRIa(c, 0xe745, v1, i2, 1) -#define s390_vrepif(c, v1, i2) S390_VRIa(c, 0xe745, v1, i2, 2) -#define s390_vrepig(c, v1, i2) S390_VRIa(c, 0xe745, v1, i2, 3) -#define s390_vsb(c , v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 0, 0, 0) -#define s390_vsh(c , v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 1, 0, 0) -#define s390_vsf(c , v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 2, 0, 0) -#define s390_vsg(c , v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 3, 0, 0) -#define s390_vsq(c , v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 4, 0, 0) -#define s390_vst(c, v, d, x, b) S390_VRX(c,0xe70e, v, x, b, d, 0) -#define s390_vsteg(c, v, d, x, b, m) S390_VRX(c, 0xe70a, v, x, b, d, m) +#define s390_vlpb(c, v1, v2) S390_VRRa(c, 0xe7df, v1, v2, 0, 0, 0) +#define s390_vlph(c, v1, v2) S390_VRRa(c, 0xe7df, v1, v2, 1, 0, 0) +#define s390_vlpf(c, v1, v2) S390_VRRa(c, 0xe7df, v1, v2, 2, 0, 0) +#define s390_vlpg(c, v1, v2) S390_VRRa(c, 0xe7df, v1, v2, 3, 0, 0) +#define s390_vlr(c, v1, v2) S390_VRRa(c,0xe756, v1, v2, 0, 0, 0) +#define s390_vlvgb(c, v1, r2, d3, b4) S390_VRSb(c, 0xe722, v1, r2, d3, b4, 0) +#define s390_vlvgh(c, v1, r2, d3, b4) S390_VRSb(c, 0xe722, v1, r2, d3, b4, 1) +#define s390_vlvgf(c, v1, r2, d3, b4) S390_VRSb(c, 0xe722, v1, r2, d3, b4, 2) +#define s390_vlvgg(c, v1, r2, d3, b4) S390_VRSb(c, 0xe722, v1, r2, d3, b4, 3) +#define s390_vperm(c, v1, v2, v3, v4) S390_VRRe(c, 0xe78c, v1, v2, v3, v4, 0, 0); +#define s390_vpkh(c, v1, v2, v3) S390_VRRc(c, 0xe794, v1, v2, v3, 1, 0, 0) +#define s390_vpkf(c, v1, v2, v3) S390_VRRc(c, 0xe794, v1, v2, v3, 2, 0, 0) +#define s390_vpkg(c, v1, v2, v3) S390_VRRc(c, 0xe794, v1, v2, v3, 3, 0, 0) +#define s390_vrepb(c, v1, v3, i2) S390_VRIc(c, 0xe74d, v1 ,v3, i2, 0) +#define s390_vreph(c, v1, v3, i2) S390_VRIc(c, 0xe74d, v1 ,v3, i2, 1) +#define s390_vrepf(c, v1, v3, i2) S390_VRIc(c, 0xe74d, v1 ,v3, i2, 2) +#define s390_vrepg(c, v1, v3, i2) S390_VRIc(c, 0xe74d, v1 ,v3, i2, 3) +#define s390_vrepib(c, v1, i2) S390_VRIa(c, 0xe745, v1, i2, 0) +#define s390_vrepih(c, v1, i2) S390_VRIa(c, 0xe745, v1, i2, 1) +#define s390_vrepif(c, v1, i2) S390_VRIa(c, 0xe745, v1, i2, 2) +#define s390_vrepig(c, v1, i2) S390_VRIa(c, 0xe745, v1, i2, 3) +#define s390_vsb(c, v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 0, 0, 0) +#define s390_vsh(c, v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 1, 0, 0) +#define s390_vsf(c, v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 2, 0, 0) +#define s390_vsg(c, v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 3, 0, 0) +#define s390_vsq(c, v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 4, 0, 0) +#define s390_vst(c, v, x, b, d) S390_VRX(c,0xe70e, v, x, b, d, 0) +#define s390_vsteg(c, v, d, x, b, m) S390_VRX(c, 0xe70a, v, x, b, d, m) #define s390_vstm(c, v1, v2, b, d, m) S390_VRSa(c, 0xe73e, v1, v2, b, d, m) -#define s390_vsumb(c, v1, v2, v3) S390_VRRc(c, 0xe764, v1, v2, v3, 0, 0, 0) -#define s390_vsumh(c, v1, v2, v3) S390_VRRc(c, 0xe764, v1, v2, v3, 1, 0, 0) -#define s390_vsumqf(c, v1, v2, v3) S390_VRRc(c, 0xe767, v1, v2, v3, 2, 0, 0) -#define s390_vsumqg(c, v1, v2, v3) S390_VRRc(c, 0xe767, v1, v2, v3, 3, 0, 0) -#define s390_vuplb(c, v1, v2) S390_VRRa(c, 0xe7d6, v1, v2, 0, 0, 0) -#define s390_vuplh(c, v1, v2) S390_VRRa(c, 0xe7d6, v1, v2, 1, 0, 0) -#define s390_vuplf(c, v1, v2) S390_VRRa(c, 0xe7d6, v1, v2, 2, 0, 0) -#define s390_vupllb(c, v1, v2) S390_VRRa(c, 0xe7d4, v1, v2, 0, 0, 0) -#define s390_vupllh(c, v1, v2) S390_VRRa(c, 0xe7d4, v1, v2, 1, 0, 0) -#define s390_vupllf(c, v1, v2) S390_VRRa(c, 0xe7d4, v1, v2, 2, 0, 0) -#define s390_vuplhb(c, v1, v2) S390_VRRa(c, 0xe7d5, v1, v2, 0, 0, 0) -#define s390_vuplhh(c, v1, v2) S390_VRRa(c, 0xe7d5, v1, v2, 1, 0, 0) -#define s390_vuplhf(c, v1, v2) S390_VRRa(c, 0xe7d5, v1, v2, 2, 0, 0) -#define s390_vuphb(c, v1, v2) S390_VRRa(c, 0xe7d7, v1, v2, 0, 0, 0) -#define s390_vuphh(c, v1, v2) S390_VRRa(c, 0xe7d7, v1, v2, 1, 0, 0) -#define s390_vuphf(c, v1, v2) S390_VRRa(c, 0xe7d7, v1, v2, 2, 0, 0) -#define s390_vx(c, v1, v2, v3) S390_VRRc(c, 0xe76d, v1, v2, v3, 0, 0, 0) -#define s390_x(c, r, x, b, d) S390_RX(c, 0x57, r, x, b, d) -#define s390_xihf(c, r, v) S390_RIL_1(c, 0xc06, r, v) -#define s390_xilf(c, r, v) S390_RIL_1(c, 0xc07, r, v) -#define s390_xg(c, r, x, b, d) S390_RXY(c, 0xe382, r, x, b, d) -#define s390_xgr(c, r1, r2) S390_RRE(c, 0xb982, r1, r2) -#define s390_xgrk(c, r1, r2, r3) S390_RRF_1(c, 0xb9e7, r1, r2, r3) -#define s390_xr(c, r1, r2) S390_RR(c, 0x17, r1, r2) -#define s390_xy(c, r, x, b, d) S390_RXY(c, 0xe357, r, x, b, d) +#define s390_vsumb(c, v1, v2, v3) S390_VRRc(c, 0xe764, v1, v2, v3, 0, 0, 0) +#define s390_vsumh(c, v1, v2, v3) S390_VRRc(c, 0xe764, v1, v2, v3, 1, 0, 0) +#define s390_vsumqf(c, v1, v2, v3) S390_VRRc(c, 0xe767, v1, v2, v3, 2, 0, 0) +#define s390_vsumqg(c, v1, v2, v3) S390_VRRc(c, 0xe767, v1, v2, v3, 3, 0, 0) +#define s390_vuplb(c, v1, v2) S390_VRRa(c, 0xe7d6, v1, v2, 0, 0, 0) +#define s390_vuplhw(c, v1, v2) S390_VRRa(c, 0xe7d6, v1, v2, 1, 0, 0) +#define s390_vuplf(c, v1, v2) S390_VRRa(c, 0xe7d6, v1, v2, 2, 0, 0) +#define s390_vupllb(c, v1, v2) S390_VRRa(c, 0xe7d4, v1, v2, 0, 0, 0) +#define s390_vupllh(c, v1, v2) S390_VRRa(c, 0xe7d4, v1, v2, 1, 0, 0) +#define s390_vupllf(c, v1, v2) S390_VRRa(c, 0xe7d4, v1, v2, 2, 0, 0) +#define s390_vuplhb(c, v1, v2) S390_VRRa(c, 0xe7d5, v1, v2, 0, 0, 0) +#define s390_vuplhh(c, v1, v2) S390_VRRa(c, 0xe7d5, v1, v2, 1, 0, 0) +#define s390_vuplhf(c, v1, v2) S390_VRRa(c, 0xe7d5, v1, v2, 2, 0, 0) +#define s390_vuphb(c, v1, v2) S390_VRRa(c, 0xe7d7, v1, v2, 0, 0, 0) +#define s390_vuphh(c, v1, v2) S390_VRRa(c, 0xe7d7, v1, v2, 1, 0, 0) +#define s390_vuphf(c, v1, v2) S390_VRRa(c, 0xe7d7, v1, v2, 2, 0, 0) +#define s390_vx(c, v1, v2, v3) S390_VRRc(c, 0xe76d, v1, v2, v3, 0, 0, 0) +#define s390_x(c, r, x, b, d) S390_RX(c, 0x57, r, x, b, d) +#define s390_xihf(c, r, v) S390_RIL_1(c, 0xc06, r, v) +#define s390_xilf(c, r, v) S390_RIL_1(c, 0xc07, r, v) +#define s390_xg(c, r, x, b, d) S390_RXY(c, 0xe382, r, x, b, d) +#define s390_xgr(c, r1, r2) S390_RRE(c, 0xb982, r1, r2) +#define s390_xgrk(c, r1, r2, r3) S390_RRF_1(c, 0xb9e7, r1, r2, r3) +#define s390_xr(c, r1, r2) S390_RR(c, 0x17, r1, r2) +#define s390_xy(c, r, x, b, d) S390_RXY(c, 0xe357, r, x, b, d) #endif diff --git a/src/mono/mono/mini/cpu-s390x.mdesc b/src/mono/mono/mini/cpu-s390x.mdesc index 722ff959785f21..2910feb1a1fa56 100644 --- a/src/mono/mono/mini/cpu-s390x.mdesc +++ b/src/mono/mono/mini/cpu-s390x.mdesc @@ -298,7 +298,6 @@ int_clt_un: dest:i len:12 int_cneq: dest:i len:12 int_cge: dest:i len:12 int_cle: dest:i len:12 -int_cnle: dest:i len:12 int_cge_un: dest:i len:12 int_cle_un: dest:i len:12 @@ -510,10 +509,9 @@ expand_r8: dest:x src1:f len:20 expand_r4: dest:x src1:f len:20 -xextract: dest:i src1:x len:12 +s390_xextract: dest:i src1:x len:12 xones: dest:x len:6 xmove: dest:x src1:x len:6 -#xcast: dest:x src1:x len:6 xzero: dest:x len:6 xcompare: dest:x src1:x src2:x len:6 xcompare_fp: dest:x src1:x src2:x len:6 @@ -535,9 +533,9 @@ vfsubs: dest:x src1:x src2:x len:6 vfsubd: dest:x src1:x src2:x len:6 vxor: dest:x src1:x src2:x len:6 vor: dest:x src1:x src2:x len:6 -vnor: dest:x src1:x src2:x len:24 +vnor: dest:x src1:x src2:x len:6 vand: dest:x src1:x src2:x len:6 -vandnot: dest:x src1:x src2:x len:12 +vandnot: dest:x src1:x src2:x len:6 vnand: dest:x src1:x src2:x len:6 vmulb: dest:x src1:x src2:x len:6 vmulhw: dest:x src1:x src2:x len:6 @@ -599,14 +597,14 @@ vchbs: dest:x src1:x src2:x len:6 vchhs: dest:x src1:x src2:x len:6 vchfs: dest:x src1:x src2:x len:6 vchgs: dest:x src1:x src2:x len:6 -#vfsqsb: src1:x src2:x len:6 clob:1 vfsqsb: dest:x src1:x len:6 -#vfsqdb: src1:x src2:x len:6 clob:1 vfsqdb: dest:x src1:x len:6 vchlbs: dest:x src1:x src2:x len:6 vchlhs: dest:x src1:x src2:x len:6 vchlfs: dest:x src1:x src2:x len:6 vchlgs: dest:x src1:x src2:x len:6 +vfchesbs: dest:x src1:x src2:x len:6 +vfchedbs: dest:x src1:x src2:x len:6 vlpb: dest:x src1:x len:6 vlph: dest:x src1:x len:6 vlpf: dest:x src1:x len:6 diff --git a/src/mono/mono/mini/mini-ops.h b/src/mono/mono/mini/mini-ops.h index 51e6aa4a79f5a1..dc662a58b6febc 100644 --- a/src/mono/mono/mini/mini-ops.h +++ b/src/mono/mono/mini/mini-ops.h @@ -149,7 +149,7 @@ MINI_OP(OP_LOADR8_MEMBASE,"loadr8_membase", FREG, IREG, NONE) /* klass must be set to a simd class */ MINI_OP(OP_LOADX_MEMBASE, "loadx_membase", XREG, IREG, NONE) -#if defined(TARGET_X86) || defined(TARGET_AMD64) +#if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_S390X) MINI_OP(OP_LOADX_ALIGNED_MEMBASE, "loadx_aligned_membase", XREG, IREG, NONE) #endif @@ -1498,6 +1498,7 @@ MINI_OP(OP_S390_CIJ, "s390_cij", IREG, NONE, NONE) MINI_OP(OP_S390_CLIJ, "s390_cij_un", IREG, IREG, NONE) MINI_OP(OP_S390_CGIJ, "s390_cgij", LREG, NONE, NONE) MINI_OP(OP_S390_CLGIJ, "s390_cgij_un", LREG, NONE, NONE) +MINI_OP(OP_STOREX_ALIGNED_MEMBASE_REG, "storex_aligned_membase_reg", IREG, XREG, NONE) MINI_OP(OP_VADDB, "vaddb", XREG, XREG, XREG) MINI_OP(OP_VADDH, "vaddh", XREG, XREG, XREG) MINI_OP(OP_VADDF, "vaddf", XREG, XREG, XREG) @@ -1584,6 +1585,8 @@ MINI_OP(OP_VCHLBS, "vchlbs", XREG, XREG, XREG) MINI_OP(OP_VCHLHS, "vchlhs", XREG, XREG, XREG) MINI_OP(OP_VCHLFS, "vchlfs", XREG, XREG, XREG) MINI_OP(OP_VCHLGS, "vchlgs", XREG, XREG, XREG) +MINI_OP(OP_VFCHESBS, "vfchesbs", XREG, XREG, XREG) +MINI_OP(OP_VFCHEDBS, "vfchedbs", XREG, XREG, XREG) MINI_OP(OP_VEC_ABS, "vecabs", XREG, XREG, NONE) MINI_OP(OP_VEC_ONE, "vecone", XREG, NONE, NONE) MINI_OP(OP_VLPB, "vlpb", XREG, XREG, NONE) @@ -1594,7 +1597,6 @@ MINI_OP(OP_VFLPDB, "vflpdb", XREG, XREG, NONE) MINI_OP(OP_VFLPSB, "vflpsb", XREG, XREG, NONE) MINI_OP(OP_VFLCDB, "vflcdb", XREG, XREG, NONE) MINI_OP(OP_VFLCSB, "vflcsb", XREG, XREG, NONE) -MINI_OP3(OP_BSL, "bitwise_select", XREG, XREG, XREG, XREG) MINI_OP(OP_VPKH, "vpkh", XREG, XREG, XREG) MINI_OP(OP_VPKF, "vpkf", XREG, XREG, XREG) MINI_OP(OP_VPKG, "vpkg", XREG, XREG, XREG) @@ -1617,9 +1619,7 @@ MINI_OP(OP_VUPLLF, "vupllf", XREG, XREG, NONE) MINI_OP(OP_VFISB, "vfidb", XREG, XREG, NONE) MINI_OP(OP_VFIDB, "vfisb", XREG, XREG, NONE) MINI_OP(OP_CEIL_FLOOR, "ceil_floor", XREG, XREG, NONE) -MINI_OP(OP_ICNLE, "int_cnle", IREG, NONE, NONE) -MINI_OP(OP_ONES_COMPLEMENT, "ones_complement", XREG, XREG, NONE) -MINI_OP(OP_NEGATION, "negate", XREG, XREG, NONE) +MINI_OP(OP_S390_XEXTRACT, "s390_xextract", IREG, XREG, NONE) #endif #if defined(TARGET_ARM64) @@ -1984,7 +1984,6 @@ MINI_OP(OP_SIMD_LOAD_SCALAR_R8, "simd_load_scalar_r8", XREG, IREG, NONE) MINI_OP(OP_SIMD_STORE, "simd_store", NONE, XREG, XREG) #if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) -MINI_OP(OP_ONES_COMPLEMENT, "ones_complement", XREG, XREG, NONE) MINI_OP(OP_CVT_FP_UI, "convert_fp_to_ui", XREG, XREG, NONE) MINI_OP(OP_CVT_FP_SI, "convert_fp_to_si", XREG, XREG, NONE) MINI_OP(OP_CVT_FP_UI_SCALAR, "convert_fp_to_ui_scalar", XREG, XREG, NONE) @@ -1994,11 +1993,15 @@ MINI_OP(OP_CVT_SI_FP, "convert_si_to_fp", XREG, XREG, NONE) MINI_OP(OP_CVT_UI_FP_SCALAR, "convert_ui_to_fp_scalar", XREG, XREG, NONE) MINI_OP(OP_CVT_SI_FP_SCALAR, "convert_si_to_fp_scalar", XREG, XREG, NONE) /* inst_c1 is one of the MONO_TYPE_ constants */ -MINI_OP(OP_NEGATION, "negate", XREG, XREG, NONE) MINI_OP(OP_NEGATION_SCALAR, "negate_scalar", XREG, XREG, NONE) +#endif // TARGET_ARM64 || TARGET_AMD64 || TARGET_WASM + +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) || defined(TARGET_S390X) +MINI_OP(OP_NEGATION, "negate", XREG, XREG, NONE) +MINI_OP(OP_ONES_COMPLEMENT, "ones_complement", XREG, XREG, NONE) /* Select bits from src2/src3 using src1 */ MINI_OP3(OP_BSL, "bitwise_select", XREG, XREG, XREG, XREG) -#endif // TARGET_ARM64 || TARGET_AMD64 || TARGET_WASM +#endif #if defined(TARGET_RISCV64) || defined(TARGET_RISCV32) MINI_OP(OP_RISCV_EXC_BEQ, "riscv_exc_beq", NONE, IREG, IREG) diff --git a/src/mono/mono/mini/mini-s390x.c b/src/mono/mono/mini/mini-s390x.c index 2f696d88221bee..a8f680796043f6 100644 --- a/src/mono/mono/mini/mini-s390x.c +++ b/src/mono/mono/mini/mini-s390x.c @@ -19,12 +19,6 @@ #define MAX_ARCH_DELEGATE_PARAMS 10 -#define NEW_INS(cfg,ins,dest,op) do { \ - MONO_INST_NEW ((cfg), (dest), (op)); \ - (dest)->cil_code = (ins)->cil_code; \ - mono_bblock_insert_before_ins (bb, ins, (dest)); \ - } while (0) - #define NEW_SIMD_INS(cfg,ins,dest,op,d,s1,s2) do { \ MONO_INST_NEW ((cfg), (dest), (op)); \ (dest)->cil_code = (ins)->cil_code; \ @@ -2197,327 +2191,360 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb) static int simd_type_to_sub_op (int t) { - switch (t) { - case MONO_TYPE_I1: - case MONO_TYPE_U1: - return OP_VSUBB; - case MONO_TYPE_I2: - case MONO_TYPE_U2: - return OP_VSUBH; - case MONO_TYPE_I4: - case MONO_TYPE_U4: - return OP_VSUBF; - case MONO_TYPE_I8: - case MONO_TYPE_U8: - case MONO_TYPE_I: - case MONO_TYPE_U: - return OP_VSUBG; - case MONO_TYPE_R4: - return OP_VFSUBS; - case MONO_TYPE_R8: - return OP_VFSUBD; - default: - g_assert_not_reached (); - return -1; + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_VSUBB; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_VSUBH; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_VSUBF; + case MONO_TYPE_I8: + case MONO_TYPE_U8: + case MONO_TYPE_I: + case MONO_TYPE_U: + return OP_VSUBG; + case MONO_TYPE_R4: + return OP_VFSUBS; + case MONO_TYPE_R8: + return OP_VFSUBD; + default: + g_assert_not_reached (); + return -1; } } static int simd_type_to_add_op (int t) { - switch (t) { - case MONO_TYPE_I1: - case MONO_TYPE_U1: - return OP_VADDB; - case MONO_TYPE_I2: - case MONO_TYPE_U2: - return OP_VADDH; - case MONO_TYPE_I4: - case MONO_TYPE_U4: - return OP_VADDF; - case MONO_TYPE_I8: - case MONO_TYPE_U8: - case MONO_TYPE_I: - case MONO_TYPE_U: - return OP_VADDG; - case MONO_TYPE_R4: - return OP_VFADDS; - case MONO_TYPE_R8: - return OP_VFADDD; - default: - g_assert_not_reached (); - return -1; - } + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_VADDB; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_VADDH; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_VADDF; + case MONO_TYPE_I8: + case MONO_TYPE_U8: + case MONO_TYPE_I: + case MONO_TYPE_U: + return OP_VADDG; + case MONO_TYPE_R4: + return OP_VFADDS; + case MONO_TYPE_R8: + return OP_VFADDD; + default: + g_assert_not_reached (); + return -1; + } } static int simd_type_to_mul_op (int t) { - switch (t) { - case MONO_TYPE_I1: - case MONO_TYPE_U1: - return OP_VMULB; - case MONO_TYPE_I2: - case MONO_TYPE_U2: - return OP_VMULHW; - case MONO_TYPE_I4: - case MONO_TYPE_U4: - return OP_VMULF; - case MONO_TYPE_R4: - return OP_VFMULS; - case MONO_TYPE_R8: - return OP_VFMULD; - default: - g_assert_not_reached (); - return -1; - } + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_VMULB; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_VMULHW; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_VMULF; + case MONO_TYPE_R4: + return OP_VFMULS; + case MONO_TYPE_R8: + return OP_VFMULD; + default: + g_assert_not_reached (); + return -1; + } } static int simd_type_to_max_op (int t) { - switch (t) { - case MONO_TYPE_I1: - return OP_VMXB; - case MONO_TYPE_U1: - return OP_VMXLB; - case MONO_TYPE_I2: - return OP_VMXH; - case MONO_TYPE_U2: - return OP_VMXLH; - case MONO_TYPE_I4: - return OP_VMXF; - case MONO_TYPE_U4: - return OP_VMXLF; - case MONO_TYPE_I8: - case MONO_TYPE_I: - return OP_VMXG; - case MONO_TYPE_U8: - case MONO_TYPE_U: - return OP_VMXLG; - case MONO_TYPE_R4: - return OP_VFMAXS; - case MONO_TYPE_R8: - return OP_VFMAXD; - default: - g_assert_not_reached (); - return -1; - } + switch (t) { + case MONO_TYPE_I1: + return OP_VMXB; + case MONO_TYPE_U1: + return OP_VMXLB; + case MONO_TYPE_I2: + return OP_VMXH; + case MONO_TYPE_U2: + return OP_VMXLH; + case MONO_TYPE_I4: + return OP_VMXF; + case MONO_TYPE_U4: + return OP_VMXLF; + case MONO_TYPE_I8: + case MONO_TYPE_I: + return OP_VMXG; + case MONO_TYPE_U8: + case MONO_TYPE_U: + return OP_VMXLG; + case MONO_TYPE_R4: + return OP_VFMAXS; + case MONO_TYPE_R8: + return OP_VFMAXD; + default: + g_assert_not_reached (); + return -1; + } } static int simd_type_to_min_op (int t) { - switch (t) { - case MONO_TYPE_I1: - return OP_VMNB; - case MONO_TYPE_U1: - return OP_VMNLB; - case MONO_TYPE_I2: - return OP_VMNH; - case MONO_TYPE_U2: - return OP_VMNLH; - case MONO_TYPE_I4: - return OP_VMNF; - case MONO_TYPE_U4: - return OP_VMNLF; - case MONO_TYPE_I8: - case MONO_TYPE_I: - return OP_VMNG; - case MONO_TYPE_U8: - case MONO_TYPE_U: - return OP_VMNLG; - case MONO_TYPE_R4: - return OP_VFMINS; - case MONO_TYPE_R8: - return OP_VFMIND; - default: - g_assert_not_reached (); - return -1; - } + switch (t) { + case MONO_TYPE_I1: + return OP_VMNB; + case MONO_TYPE_U1: + return OP_VMNLB; + case MONO_TYPE_I2: + return OP_VMNH; + case MONO_TYPE_U2: + return OP_VMNLH; + case MONO_TYPE_I4: + return OP_VMNF; + case MONO_TYPE_U4: + return OP_VMNLF; + case MONO_TYPE_I8: + case MONO_TYPE_I: + return OP_VMNG; + case MONO_TYPE_U8: + case MONO_TYPE_U: + return OP_VMNLG; + case MONO_TYPE_R4: + return OP_VFMINS; + case MONO_TYPE_R8: + return OP_VFMIND; + default: + g_assert_not_reached (); + return -1; + } } static int -simd_type_to_const_op (int t) +simd_type_to_comp_op (int t) { - switch (t) { - case MONO_TYPE_I1: - case MONO_TYPE_U1: - return OP_VREPIB; - case MONO_TYPE_I2: - case MONO_TYPE_U2: - return OP_VREPIH; - case MONO_TYPE_I4: - case MONO_TYPE_U4: - return OP_VREPIF; - case MONO_TYPE_I: - case MONO_TYPE_U: - case MONO_TYPE_I8: - case MONO_TYPE_U8: - return OP_VREPIG; - default: - g_assert_not_reached (); - return -1; - } + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_VCEQBS; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_VCEQHS; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_VCEQFS; + case MONO_TYPE_I: + case MONO_TYPE_U: + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return OP_VCEQGS; + case MONO_TYPE_R4: + return OP_VFCESBS; + case MONO_TYPE_R8: + return OP_VFCEDBS; + default: + g_assert_not_reached (); + return -1; + } } static int -simd_type_to_comp_op (int t) +simd_type_to_gt_op (int t) { - switch (t) { - case MONO_TYPE_I1: - case MONO_TYPE_U1: - return OP_VCEQBS; - case MONO_TYPE_I2: - case MONO_TYPE_U2: - return OP_VCEQHS; - case MONO_TYPE_I4: - case MONO_TYPE_U4: - return OP_VCEQFS; - case MONO_TYPE_I: - case MONO_TYPE_U: - case MONO_TYPE_I8: - case MONO_TYPE_U8: - return OP_VCEQGS; - case MONO_TYPE_R4: - return OP_VFCESBS; - case MONO_TYPE_R8: - return OP_VFCEDBS; - default: - g_assert_not_reached (); - return -1; - } + switch (t) { + case MONO_TYPE_I1: + return OP_VCHBS; + case MONO_TYPE_U1: + return OP_VCHLBS; + case MONO_TYPE_I2: + return OP_VCHHS; + case MONO_TYPE_U2: + return OP_VCHLHS; + case MONO_TYPE_I4: + return OP_VCHFS; + case MONO_TYPE_U4: + return OP_VCHLFS; + case MONO_TYPE_I: + case MONO_TYPE_I8: + return OP_VCHGS; + case MONO_TYPE_U: + case MONO_TYPE_U8: + return OP_VCHLGS; + case MONO_TYPE_R4: + return OP_VFCHSBS; + case MONO_TYPE_R8: + return OP_VFCHDBS; + default: + g_assert_not_reached (); + return -1; + } } static int -simd_type_to_gt_op (int t) +simd_type_to_ge_fp_op (int t) { - switch (t) { - case MONO_TYPE_I1: - return OP_VCHBS; - case MONO_TYPE_U1: - return OP_VCHLBS; - case MONO_TYPE_I2: - return OP_VCHHS; - case MONO_TYPE_U2: - return OP_VCHLHS; - case MONO_TYPE_I4: - return OP_VCHFS; - case MONO_TYPE_U4: - return OP_VCHLFS; - case MONO_TYPE_I: - case MONO_TYPE_I8: - return OP_VCHGS; - case MONO_TYPE_U: - case MONO_TYPE_U8: - return OP_VCHLGS; - case MONO_TYPE_R4: - return OP_VFCHSBS; - case MONO_TYPE_R8: - return OP_VFCHDBS; - default: - g_assert_not_reached (); - return -1; - } + switch(t) { + case MONO_TYPE_R4: + return OP_VFCHESBS; + case MONO_TYPE_R8: + return OP_VFCHEDBS; + default: + g_assert_not_reached (); + return -1; + } } static int -simd_type_to_extract_op (int t, int q) +simd_type_to_extract_int_op (int t, int q) { - switch (t){ - case SIMD_EXTR_ARE_ALL_SET:{ - switch (q){ - case CMP_LT: - case CMP_GT: - case CMP_GT_UN: - case CMP_LT_UN: - case CMP_EQ: - return OP_CEQ; - case CMP_GE: - case CMP_LE: - case CMP_GE_UN: - case CMP_LE_UN: - return OP_ICNLE; //ICNGE - default: - g_assert_not_reached(); - return -1; - } - } - case SIMD_EXTR_IS_ANY_SET:{ - switch (q){ - case CMP_GT: - case CMP_LT: - case CMP_GT_UN: - case CMP_LT_UN: - case CMP_EQ: - return OP_ICLE; - case CMP_GE: - case CMP_LE: - case CMP_GE_UN: - case CMP_LE_UN: - return OP_ICNEQ; - default: - g_assert_not_reached(); - return -1; - } - } - default: - g_assert_not_reached (); - return -1; - } + switch (t){ + case SIMD_EXTR_ARE_ALL_SET:{ + switch (q){ + case CMP_LT: + case CMP_GT: + case CMP_GT_UN: + case CMP_LT_UN: + case CMP_EQ: + return OP_CEQ; + case CMP_GE: + case CMP_LE: + case CMP_GE_UN: + case CMP_LE_UN: + return OP_ICGT_UN; + default: + g_assert_not_reached(); + return -1; + } + } + case SIMD_EXTR_IS_ANY_SET:{ + switch (q){ + case CMP_GT: + case CMP_LT: + case CMP_GT_UN: + case CMP_LT_UN: + case CMP_EQ: + return OP_ICLE; + case CMP_GE: + case CMP_LE: + case CMP_GE_UN: + case CMP_LE_UN: + return OP_ICNEQ; + default: + g_assert_not_reached(); + return -1; + } + } + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_extract_fp_op (int t, int q) +{ + switch (t){ + case SIMD_EXTR_ARE_ALL_SET:{ + switch (q){ + case CMP_LT: + case CMP_GT: + case CMP_GT_UN: + case CMP_LT_UN: + case CMP_EQ: + case CMP_GE: + case CMP_LE: + case CMP_GE_UN: + case CMP_LE_UN: + return OP_ICEQ; + default: + g_assert_not_reached(); + return -1; + } + } + case SIMD_EXTR_IS_ANY_SET:{ + switch (q){ + case CMP_GT: + case CMP_LT: + case CMP_GT_UN: + case CMP_LT_UN: + case CMP_EQ: + case CMP_GE: + case CMP_LE: + case CMP_GE_UN: + case CMP_LE_UN: + return OP_ICLE; + default: + g_assert_not_reached(); + return -1; + } + } + default: + g_assert_not_reached (); + return -1; + } } static int simd_type_to_abs_op (int t) { - switch (t) { - case MONO_TYPE_I1: - case MONO_TYPE_U1: - return OP_VLPB; - case MONO_TYPE_I2: - case MONO_TYPE_U2: - return OP_VLPH; - case MONO_TYPE_I4: - case MONO_TYPE_U4: - return OP_VLPF; - case MONO_TYPE_R4: - return OP_VFLPSB; - case MONO_TYPE_I8: - case MONO_TYPE_U8: - return OP_VLPG; - case MONO_TYPE_R8: - return OP_VFLPDB; - - default: - g_assert_not_reached (); - return -1; - } + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_VLPB; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_VLPH; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_VLPF; + case MONO_TYPE_R4: + return OP_VFLPSB; + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return OP_VLPG; + case MONO_TYPE_R8: + return OP_VFLPDB; + default: + g_assert_not_reached (); + return -1; + } } static int simd_type_to_negate_op (int t) { - switch (t) { - case MONO_TYPE_I1: - case MONO_TYPE_U1: - return OP_VLCB; - case MONO_TYPE_I2: - case MONO_TYPE_U2: - return OP_VLCH; - case MONO_TYPE_I4: - case MONO_TYPE_U4: - return OP_VLCF; - case MONO_TYPE_R4: - return OP_VFLCSB; - case MONO_TYPE_I8: - case MONO_TYPE_U8: - return OP_VLCG; - case MONO_TYPE_R8: - return OP_VFLCDB; - default: - g_assert_not_reached (); - return -1; - } + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_VLCB; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_VLCH; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_VLCF; + case MONO_TYPE_R4: + return OP_VFLCSB; + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return OP_VLCG; + case MONO_TYPE_R8: + return OP_VFLCDB; + default: + g_assert_not_reached (); + return -1; + } } /** @@ -2534,7 +2561,7 @@ void mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb) { MonoInst *ins, *next, *temp_ins; - int temp; + int temp; MONO_BB_FOR_EACH_INS_SAFE (bb, next, ins) { switch (ins->opcode) { @@ -2556,136 +2583,161 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb) /* This is created by the memcpy code which ignores is_inst_imm */ mono_decompose_op_imm (cfg, bb, ins); break; - case OP_XBINOP:{ - switch(ins->inst_c0){ - case OP_IADD: - ins->opcode = GINT_TO_OPCODE (simd_type_to_add_op (GTMREG_TO_INT (ins->inst_c1))); - break; - case OP_ISUB: - ins->opcode = GINT_TO_OPCODE (simd_type_to_sub_op (GTMREG_TO_INT (ins->inst_c1))); - break; - case OP_IMUL: - ins->opcode = GINT_TO_OPCODE (simd_type_to_mul_op (GTMREG_TO_INT (ins->inst_c1))); - break; - case OP_IMAX_UN: - case OP_IMAX: - ins->opcode = GINT_TO_OPCODE (simd_type_to_max_op (GTMREG_TO_INT (ins->inst_c1))); - break; - case OP_IMIN_UN: - case OP_IMIN: - ins->opcode = GINT_TO_OPCODE (simd_type_to_min_op (GTMREG_TO_INT (ins->inst_c1))); - break; - case OP_FADD: - ins->opcode = GINT_TO_OPCODE (simd_type_to_add_op (GTMREG_TO_INT (ins->inst_c1))); - break; - case OP_FSUB: - ins->opcode = GINT_TO_OPCODE (simd_type_to_sub_op (GTMREG_TO_INT (ins->inst_c1))); - break; - case OP_FMUL: - ins->opcode = GINT_TO_OPCODE (simd_type_to_mul_op (GTMREG_TO_INT (ins->inst_c1))); - break; - case OP_FDIV: - ins->opcode = ins->inst_c1 == MONO_TYPE_R4 ? OP_VFDIVS : OP_VFDIVD; - break; - case OP_FMIN: - ins->opcode = GINT_TO_OPCODE (simd_type_to_min_op (GTMREG_TO_INT (ins->inst_c1))); - break; - case OP_FMAX: - ins->opcode = GINT_TO_OPCODE (simd_type_to_max_op (GTMREG_TO_INT (ins->inst_c1))); - break; - default: - g_assert_not_reached (); - break; - } - break; - } - case OP_XBINOP_FORCEINT:{ - switch (ins->inst_c0) { - case XBINOP_FORCEINT_AND: - ins->opcode = OP_VAND; - break; - case XBINOP_FORCEINT_OR: - ins->opcode = OP_VOR; - break; - case XBINOP_FORCEINT_XOR: - ins->opcode = OP_VXOR; - break; - default: - g_assert_not_reached (); - break; - } - break; - } - case OP_XCAST:{ - ins->opcode = OP_XMOVE; - break; - } - case OP_XCOMPARE_FP: - case OP_XCOMPARE:{ - switch (ins->inst_c0){ - case CMP_EQ: - ins->opcode = GINT_TO_OPCODE (simd_type_to_comp_op (GTMREG_TO_INT (ins->inst_c1))); - break; - case CMP_LT: - temp = ins->sreg1; - ins->sreg1 = ins->sreg2; - ins->sreg2 = temp; - case CMP_GT: - ins->opcode = GINT_TO_OPCODE (simd_type_to_gt_op (GTMREG_TO_INT (ins->inst_c1))); - break; - case CMP_LT_UN: - temp = ins->sreg1; - ins->sreg1 = ins->sreg2; - ins->sreg2 = temp; - case CMP_GT_UN: - ins->opcode = GINT_TO_OPCODE (simd_type_to_gt_op (GTMREG_TO_INT (ins->inst_c1))); - break; - case CMP_GE: - case CMP_GE_UN: - temp = ins->sreg1; - ins->sreg1 = ins->sreg2; - ins->sreg2 = temp; - case CMP_LE: - case CMP_LE_UN:{ - /* FIX ME : setting condition code for cases other than ANY and ALL may degrade the performance*/ - NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_gt_op (GTMREG_TO_INT (ins->inst_c1))), ins->dreg, ins->sreg1, ins->sreg2); - NEW_SIMD_INS (cfg, ins, temp_ins, OP_VNOR, ins->dreg, ins->dreg, ins->dreg); - NULLIFY_INS (ins); - break; - } - default: - g_assert_not_reached (); - break; - } - } - break; - case OP_XEXTRACT: - ins->opcode = GINT_TO_OPCODE (simd_type_to_extract_op (GTMREG_TO_INT (ins->inst_c0), GTMREG_TO_INT (ins->inst_c1))); - ins->sreg1 = -1; //we assign this -1 or else this messes up the code cache - break; - case OP_VEC_ABS: - ins->opcode = GINT_TO_OPCODE (simd_type_to_abs_op (GTMREG_TO_INT (ins->inst_c1))); - break; - case OP_NEGATION: - NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_negate_op (GTMREG_TO_INT (ins->inst_c1))), ins->dreg, ins->sreg1, -1); - NULLIFY_INS (ins); - break; - case OP_ONES_COMPLEMENT: - ins->opcode = OP_VNOR; - ins->sreg2 = ins->sreg1; - break; - case OP_CEIL_FLOOR:{ - if (ins->inst_c0 == MONO_TYPE_R8) - NEW_SIMD_INS (cfg, ins, temp_ins, OP_VFIDB, ins->dreg, ins->sreg1, -1); - else - NEW_SIMD_INS (cfg, ins, temp_ins, OP_VFISB, ins->dreg, ins->sreg1, -1); - temp_ins->inst_c0 = ins->inst_c0; - NULLIFY_INS(ins); - } - break; - case OP_VEC_ONE: - ins->opcode = GINT_TO_OPCODE (simd_type_to_const_op (GTMREG_TO_INT (ins->inst_c0))); - break; + case OP_XBINOP:{ + switch(ins->inst_c0){ + case OP_IADD: + ins->opcode = GINT_TO_OPCODE (simd_type_to_add_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_ISUB: + ins->opcode = GINT_TO_OPCODE (simd_type_to_sub_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_IMUL: + ins->opcode = GINT_TO_OPCODE (simd_type_to_mul_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_IMAX_UN: + case OP_IMAX: + ins->opcode = GINT_TO_OPCODE (simd_type_to_max_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_IMIN_UN: + case OP_IMIN: + ins->opcode = GINT_TO_OPCODE (simd_type_to_min_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_FADD: + ins->opcode = GINT_TO_OPCODE (simd_type_to_add_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_FSUB: + ins->opcode = GINT_TO_OPCODE (simd_type_to_sub_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_FMUL: + ins->opcode = GINT_TO_OPCODE (simd_type_to_mul_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_FDIV: + ins->opcode = ins->inst_c1 == MONO_TYPE_R4 ? OP_VFDIVS : OP_VFDIVD; + break; + case OP_FMIN: + ins->opcode = GINT_TO_OPCODE (simd_type_to_min_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_FMAX: + ins->opcode = GINT_TO_OPCODE (simd_type_to_max_op (GTMREG_TO_INT (ins->inst_c1))); + break; + default: + g_assert_not_reached (); + break; + } + break; + } + case OP_XBINOP_FORCEINT:{ + switch (ins->inst_c0) { + case XBINOP_FORCEINT_AND: + ins->opcode = OP_VAND; + break; + case XBINOP_FORCEINT_OR: + ins->opcode = OP_VOR; + break; + case XBINOP_FORCEINT_XOR: + ins->opcode = OP_VXOR; + break; + default: + g_assert_not_reached (); + break; + } + break; + } + case OP_XCAST:{ + ins->opcode = OP_XMOVE; + break; + } + case OP_XCOMPARE_FP:{ + switch (ins->inst_c0){ + case CMP_EQ: + ins->opcode = GINT_TO_OPCODE (simd_type_to_comp_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case CMP_LT_UN: + case CMP_LT: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_GT_UN: + case CMP_GT: + ins->opcode = GINT_TO_OPCODE (simd_type_to_gt_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case CMP_LE_UN: + case CMP_LE: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_GE_UN: + case CMP_GE: + ins->opcode = GINT_TO_OPCODE (simd_type_to_ge_fp_op (GTMREG_TO_INT (ins->inst_c1))); + break; + default: + g_assert_not_reached (); + break; + } + break; + } + case OP_XCOMPARE:{ + switch (ins->inst_c0){ + case CMP_EQ: + ins->opcode = GINT_TO_OPCODE (simd_type_to_comp_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case CMP_LT: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_GT: + ins->opcode = GINT_TO_OPCODE (simd_type_to_gt_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case CMP_LT_UN: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_GT_UN: + ins->opcode = GINT_TO_OPCODE (simd_type_to_gt_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case CMP_GE: + case CMP_GE_UN: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_LE: + case CMP_LE_UN:{ + /* FIX ME : setting condition code for cases other than ANY and ALL may degrade the performance*/ + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_gt_op (GTMREG_TO_INT (ins->inst_c1))), ins->dreg, ins->sreg1, ins->sreg2); + NEW_SIMD_INS (cfg, ins, temp_ins, OP_VNOR, ins->dreg, ins->dreg, ins->dreg); + NULLIFY_INS (ins); + break; + } + default: + g_assert_not_reached (); + break; + } + break; + } + case OP_S390_XEXTRACT: + switch (ins->inst_c1){ + case OP_XCOMPARE: + ins->opcode = GINT_TO_OPCODE (simd_type_to_extract_int_op (GTMREG_TO_INT (ins->inst_c0 & 0x0f ), GTMREG_TO_INT (ins->inst_c0 >> 4))); + break; + case OP_XCOMPARE_FP: + ins->opcode = GINT_TO_OPCODE (simd_type_to_extract_fp_op (GTMREG_TO_INT (ins->inst_c0 & 0x0f ), GTMREG_TO_INT (ins->inst_c0 >> 4))); + break; + default: + g_assert_not_reached (); + } + /* we don't use a register rather the CC set by the vector compare instructions */ + ins->sreg1 = -1; + break; + case OP_VEC_ABS: + ins->opcode = GINT_TO_OPCODE (simd_type_to_abs_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_NEGATION: + ins->opcode = GINT_TO_OPCODE (simd_type_to_negate_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_ONES_COMPLEMENT: + ins->opcode = OP_VNOR; + ins->sreg2 = ins->sreg1; + break; default: break; } @@ -2885,6 +2937,7 @@ is_unsigned (MonoInst *next) * Process instructions within basic block emitting s390x instructions * based on the VM operation codes */ + void mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) { @@ -4521,21 +4574,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } } break; - case OP_ICNLE: { - if (mono_hwcap_s390x_has_lsoc2) { - s390_lghi (code, ins->dreg, 0); - s390_locghinle(code, ins->dreg, 1); - } else if (mono_hwcap_s390x_has_mlt) { - s390_lghi (code, ins->dreg, 0); - s390_lghi (code, s390_r13, 1); - s390_locgrnle(code, ins->dreg, s390_r13); - } else { - s390_lghi(code, ins->dreg, 1); - s390_jle (code, 4); - s390_lghi(code, ins->dreg, 0); - } - } - break; case OP_ICGE: case OP_ICGE_UN: { if (mono_hwcap_s390x_has_lsoc2) { @@ -5322,461 +5360,466 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) bb->spill_slot_defs = g_slist_prepend_mempool (cfg->mempool, bb->spill_slot_defs, ins); break; #ifdef MONO_ARCH_SIMD_INTRINSICS - case OP_XCONST: - S390_SET (code, s390_r13, ins->inst_p0); - s390_vl(code, ins->dreg, 0, 0, s390_r13); - break; - case OP_LOADX_MEMBASE: - S390_LONG_VEC(code, vl, vl, ins->dreg, ins->inst_offset, 0, ins->inst_basereg); - break; - case OP_STOREX_MEMBASE: - S390_LONG_VEC(code, vst, vst, ins->sreg1, ins->inst_offset,0, ins->inst_destbasereg); - break; - case OP_VAND: - s390_vn (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VNAND: - s390_vnn (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VECTOR_ANDN: - s390_vno (code, s390_vr16, ins->sreg1, ins->sreg1); - s390_vn (code, ins->dreg, s390_vr16, ins->sreg2); - break; - case OP_VOR: - s390_vo (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VNOR:{ - s390_vno (code, ins->dreg, ins->sreg1, ins->sreg2); - } - break; - case OP_VXOR: - s390_vx (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VADDB: - s390_vab (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VADDH: - s390_vah (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VADDF: - s390_vaf (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VADDG: - s390_vag (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VADDQ: - s390_vaq (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VFADDS: - s390_vfasb (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VFADDD: - s390_vfadb (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VSUBB: - s390_vsb (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VSUBH: - s390_vsh (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VSUBF: - s390_vsf (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VSUBG: - s390_vsg (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VSUBQ: - s390_vsq (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VFSUBS: - s390_vfssb (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VFSUBD: - s390_vfsdb (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VMULB: - s390_vmlb (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VMULHW: - s390_vmlhw (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VMULF: - s390_vmlf (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VFMULS: - s390_vfmsb (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VFMULD: - s390_vfmdb (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VFDIVS: - s390_vfdsb (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VFDIVD: - s390_vfddb (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VSUMB: - s390_vsumb (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VSUMH: - s390_vsumh (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VSUMQF: - s390_vsumqf (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VSUMQG: - s390_vsumqg (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VMXB: - s390_vmxb (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VMXH: - s390_vmxh (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VMXF: - s390_vmxf (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VMXG: - s390_vmxg (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VFMAXS: - s390_vfmaxsb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); - break; - case OP_VFMAXD: - s390_vfmaxdb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); - break; - case OP_VMXLB: - s390_vmxlb (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VMXLH: - s390_vmxlh (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VMXLF: - s390_vmxlf (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VMXLG: - s390_vmxlg (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VMNB: - s390_vmnb (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VMNH: - s390_vmnh (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VMNF: - s390_vmnf (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VMNG: - s390_vmng (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VFMINS: - s390_vfminsb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); - break; - case OP_VFMIND: - s390_vfmindb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); - break; - case OP_VMNLB: - s390_vmnlb (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VMNLH: - s390_vmnlh (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VMNLF: - s390_vmnlf (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VMNLG: - s390_vmnlg (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VREPIB: - s390_vrepib (code, ins->dreg, ins->inst_c1); - break; - case OP_VREPIH: - s390_vrepih (code, ins->dreg, ins->inst_c1); - break; - case OP_VREPIF: - s390_vrepif (code, ins->dreg, ins->inst_c1); - break; - case OP_VREPIG: - s390_vrepig (code, ins->dreg, ins->inst_c1); - break; - case OP_VCEQBS: - s390_vceqbs (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VCEQHS: - s390_vceqhs (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VCEQFS: - s390_vceqfs (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VCEQGS: - s390_vceqgs (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VFCESBS: - s390_vfcesbs (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VFCEDBS: - s390_vfcedbs (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VFCHSBS: - s390_vfchsbs (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VFCHDBS: - s390_vfchdbs (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VGMB: - s390_vgmb (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VGMH: - s390_vgmh (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VGMF: - s390_vgmf (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VGMG: - s390_vgmg (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VECB: - s390_vecb (code, ins->dreg, ins->sreg1); - break; - case OP_VECH: - s390_vech (code, ins->dreg, ins->sreg1); - break; - case OP_VECF: - s390_vecf (code, ins->dreg, ins->sreg1); - break; - case OP_VECG: - s390_vecg (code, ins->dreg, ins->sreg1); - break; - case OP_VECLB: - s390_veclb (code, ins->dreg, ins->sreg1); - break; - case OP_VECLH: - s390_veclh (code, ins->dreg, ins->sreg1); - break; - case OP_VECLF: - s390_veclf (code, ins->dreg, ins->sreg1); - break; - case OP_VECLG: - s390_veclg (code, ins->dreg, ins->sreg1); - break; - case OP_VCHBS: - s390_vchbs (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VCHHS: - s390_vchhs (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VCHFS: - s390_vchfs (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VCHGS: - s390_vchgs (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VCHLBS: - s390_vchlbs (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VCHLHS: - s390_vchlhs (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VCHLFS: - s390_vchlfs (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VCHLGS: - s390_vchlgs (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VLPB: - s390_vlpb (code, ins->dreg, ins->sreg1); - break; - case OP_VLPH: - s390_vlph (code, ins->dreg, ins->sreg1); - break; - case OP_VLPF: - s390_vlpf (code, ins->dreg, ins->sreg1); - break; - case OP_VLPG: - s390_vlpg (code, ins->dreg, ins->sreg1); - break; - case OP_VFLPDB: - s390_vfpsodb (code, ins->dreg, ins->sreg1, 2); - break; - case OP_VFLPSB: - s390_vfpsosb (code, ins->dreg, ins->sreg1, 2); - break; - case OP_VFLCDB: - s390_vflcdb (code, ins->dreg, ins->sreg1); - break; - case OP_VFLCSB: - s390_vflcsb (code, ins->dreg, ins->sreg1); - break; - case OP_INSERT_I1: - s390_vlr (code, ins->dreg, ins->sreg1); - s390_vlvgb (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - break; - case OP_INSERT_I2: - s390_vlr (code, ins->dreg, ins->sreg1); - s390_vlvgh (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - break; - case OP_INSERT_I4: - s390_vlr (code, ins->dreg, ins->sreg1); - s390_vlvgf (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - break; - case OP_INSERT_I8: - s390_vlr (code, ins->dreg, ins->sreg1); - s390_vlvgg (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - break; - case OP_INSERT_R4: - s390_vlgvf (code, s390_r13, ins->sreg2, 0, 0); - s390_vlvgf (code, ins->dreg, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - break; - case OP_INSERT_R8: - s390_vlgvg (code, s390_r13, ins->sreg2, 0, 0); - s390_vlvgg (code, ins->dreg, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - break; - case OP_EXTRACT_I1: - s390_vlgvb (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - break; - case OP_EXTRACT_I2: - s390_vlgvh (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - break; - case OP_EXTRACT_I4: - s390_vlgvf (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - break; - case OP_EXTRACT_I8: - s390_vlgvg (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - break; - case OP_EXTRACT_R4: - s390_vlgvf (code, s390_r13, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - s390_vlvgf (code, ins->dreg, s390_r13, 0, 0); - break; - case OP_EXTRACT_R8: - s390_vlgvg (code, s390_r13, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - s390_vlvgg (code, ins->dreg, s390_r13, 0, 0); - break; - case OP_XEXTRACT_I1: - s390_vlgvb (code, ins->dreg, ins->sreg1, ins->sreg2, 0); - break; - case OP_XEXTRACT_I2: - s390_vlgvh (code, ins->dreg, ins->sreg1, ins->sreg2, 0); - break; - case OP_XEXTRACT_I4: - s390_vlgvf (code, ins->dreg, ins->sreg1, ins->sreg2, 0); - break; - case OP_XEXTRACT_I8: - s390_vlgvg (code, ins->dreg, ins->sreg1, ins->sreg2, 0); - break; - case OP_XEXTRACT_R4: - s390_vlgvf (code, s390_r13, ins->sreg1, ins->sreg2, 0); - s390_ldgr (code, ins->dreg, s390_r13); - break; - case OP_XEXTRACT_R8: - s390_vlgvg (code, s390_r13, ins->sreg1, ins->sreg2, 0); - s390_ldgr (code, ins->dreg, s390_r13); - break; - case OP_EXPAND_I1: - s390_vlvgb (code, s390_vr16, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - s390_vrepb (code, ins->dreg, s390_vr16, 0); - break; - case OP_EXPAND_I2: - s390_vlvgh (code, s390_vr16, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - s390_vreph (code, ins->dreg, s390_vr16, 0); - break; - case OP_EXPAND_I4: - s390_vlvgf (code, s390_vr16, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - s390_vrepf (code, ins->dreg, s390_vr16, 0); - break; - case OP_EXPAND_I8: - s390_vlvgg (code, s390_vr16, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - s390_vrepg (code, ins->dreg, s390_vr16, 0); - break; - case OP_EXPAND_R4: - s390_vlgvf (code, s390_r13, ins->sreg1, 0, 0); - s390_vlvgf (code, s390_vr16, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - s390_vrepf (code, ins->dreg, s390_vr16, 0); - break; - case OP_EXPAND_R8: - s390_lgdr (code, s390_r13, ins->sreg1); - s390_vlvgg (code, s390_vr16, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - s390_vrepg (code, ins->dreg, s390_vr16, 0); - break; - case OP_VPKH: - s390_vpkh ( code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VPKF: - s390_vpkf ( code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VPKG: - s390_vpkg ( code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VLCB: - s390_vlcb (code, ins->dreg, ins->sreg1); - break; - case OP_VLCH: - s390_vlch (code, ins->dreg, ins->sreg1); - break; - case OP_VLCF: - s390_vlcf (code, ins->dreg, ins->sreg1); - break; - case OP_VLCG: - s390_vlcg (code, ins->dreg, ins->sreg1); - break; - case OP_VUPLB: - s390_vuplb (code, ins->dreg, ins->sreg1); - break; - case OP_VUPLH: - s390_vuplh (code, ins->dreg, ins->sreg1); - break; - case OP_VUPLF: - s390_vuplf (code, ins->dreg, ins->sreg1); - break; - case OP_VUPLLB: - s390_vupllb (code, ins->dreg, ins->sreg1); - break; - case OP_VUPLLH: - s390_vupllh (code, ins->dreg, ins->sreg1); - break; - case OP_VUPLLF: - s390_vupllf (code, ins->dreg, ins->sreg1); - break; - case OP_VUPHB: - s390_vuphb (code, ins->dreg, ins->sreg1); - break; - case OP_VUPHH: - s390_vuphh (code, ins->dreg, ins->sreg1); - break; - case OP_VUPHF: - s390_vuphf (code, ins->dreg, ins->sreg1); - break; - case OP_VUPLHB: - s390_vuplhb (code, ins->dreg, ins->sreg1); - break; - case OP_VUPLHH: - s390_vuplhh (code, ins->dreg, ins->sreg1); - break; - case OP_VUPLHF: - s390_vuplhf (code, ins->dreg, ins->sreg1); - break; - case OP_VFISB: - s390_vfisb (code, ins->dreg, ins->sreg1, 0, ins->inst_c0); - break; - case OP_VFIDB: - s390_vfidb (code, ins->dreg, ins->sreg1, 0, ins->inst_c0); - break; - case OP_VFSQSB: - s390_vfsqsb (code, ins->dreg, ins->sreg1); - break; - case OP_VFSQDB: - s390_vfsqdb (code, ins->dreg, ins->sreg1); - break; - case OP_XONES: - s390_vgbm (code, ins->dreg, 0xffff); - break; - case OP_XMOVE:{ - if (ins->dreg != ins->sreg1) - s390_vlr(code, ins->dreg, ins->sreg1); - break; - } - case OP_XZERO: - s390_vgbm (code, ins->dreg, 0); - break; + case OP_XCONST: + S390_SET (code, s390_r13, ins->inst_p0); + s390_vl(code, ins->dreg, 0, s390_r13, 0); + break; + /* TO-DO: provide an alignment hint for the vector loads and stores*/ + case OP_LOADX_ALIGNED_MEMBASE: + case OP_LOADX_MEMBASE: + S390_LONG_VEC(code, vl, vl, ins->dreg, ins->inst_offset, 0, ins->inst_basereg); + break; + case OP_STOREX_ALIGNED_MEMBASE_REG: + case OP_STOREX_MEMBASE: + S390_LONG_VEC(code, vst, vst, ins->sreg1, ins->inst_offset,0, ins->inst_destbasereg); + break; + case OP_VAND: + s390_vn (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VNAND: + s390_vnn (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VECTOR_ANDN: + s390_vnc (code, ins->dreg, ins->sreg2, ins->sreg1); + break; + case OP_VOR: + s390_vo (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VNOR: + s390_vno (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VXOR: + s390_vx (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VADDB: + s390_vab (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VADDH: + s390_vah (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VADDF: + s390_vaf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VADDG: + s390_vag (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VADDQ: + s390_vaq (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFADDS: + s390_vfasb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFADDD: + s390_vfadb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VSUBB: + s390_vsb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VSUBH: + s390_vsh (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VSUBF: + s390_vsf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VSUBG: + s390_vsg (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VSUBQ: + s390_vsq (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFSUBS: + s390_vfssb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFSUBD: + s390_vfsdb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMULB: + s390_vmlb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMULHW: + s390_vmlhw (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMULF: + s390_vmlf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFMULS: + s390_vfmsb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFMULD: + s390_vfmdb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFDIVS: + s390_vfdsb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFDIVD: + s390_vfddb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VSUMB: + s390_vsumb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VSUMH: + s390_vsumh (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VSUMQF: + s390_vsumqf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VSUMQG: + s390_vsumqg (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMXB: + s390_vmxb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMXH: + s390_vmxh (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMXF: + s390_vmxf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMXG: + s390_vmxg (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFMAXS: + /* The max function used here is Java Math.Max() */ + s390_vfmaxsb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); + break; + case OP_VFMAXD: + s390_vfmaxdb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); + break; + case OP_VMXLB: + s390_vmxlb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMXLH: + s390_vmxlh (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMXLF: + s390_vmxlf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMXLG: + s390_vmxlg (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMNB: + s390_vmnb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMNH: + s390_vmnh (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMNF: + s390_vmnf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMNG: + s390_vmng (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFMINS: + /* The min function used here is Java Math.Min() */ + s390_vfminsb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); + break; + case OP_VFMIND: + s390_vfmindb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); + break; + case OP_VMNLB: + s390_vmnlb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMNLH: + s390_vmnlh (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMNLF: + s390_vmnlf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VMNLG: + s390_vmnlg (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VREPIB: + s390_vrepib (code, ins->dreg, ins->inst_c1); + break; + case OP_VREPIH: + s390_vrepih (code, ins->dreg, ins->inst_c1); + break; + case OP_VREPIF: + s390_vrepif (code, ins->dreg, ins->inst_c1); + break; + case OP_VREPIG: + s390_vrepig (code, ins->dreg, ins->inst_c1); + break; + case OP_VCEQBS: + s390_vceqbs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCEQHS: + s390_vceqhs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCEQFS: + s390_vceqfs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCEQGS: + s390_vceqgs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFCESBS: + s390_vfcesbs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFCEDBS: + s390_vfcedbs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFCHSBS: + s390_vfchsbs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFCHDBS: + s390_vfchdbs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFCHESBS: + s390_vfchesbs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VFCHEDBS: + s390_vfchedbs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VGMB: + s390_vgmb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VGMH: + s390_vgmh (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VGMF: + s390_vgmf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VGMG: + s390_vgmg (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VECB: + s390_vecb (code, ins->dreg, ins->sreg1); + break; + case OP_VECH: + s390_vech (code, ins->dreg, ins->sreg1); + break; + case OP_VECF: + s390_vecf (code, ins->dreg, ins->sreg1); + break; + case OP_VECG: + s390_vecg (code, ins->dreg, ins->sreg1); + break; + case OP_VECLB: + s390_veclb (code, ins->dreg, ins->sreg1); + break; + case OP_VECLH: + s390_veclh (code, ins->dreg, ins->sreg1); + break; + case OP_VECLF: + s390_veclf (code, ins->dreg, ins->sreg1); + break; + case OP_VECLG: + s390_veclg (code, ins->dreg, ins->sreg1); + break; + case OP_VCHBS: + s390_vchbs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCHHS: + s390_vchhs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCHFS: + s390_vchfs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCHGS: + s390_vchgs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCHLBS: + s390_vchlbs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCHLHS: + s390_vchlhs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCHLFS: + s390_vchlfs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VCHLGS: + s390_vchlgs (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VLPB: + s390_vlpb (code, ins->dreg, ins->sreg1); + break; + case OP_VLPH: + s390_vlph (code, ins->dreg, ins->sreg1); + break; + case OP_VLPF: + s390_vlpf (code, ins->dreg, ins->sreg1); + break; + case OP_VLPG: + s390_vlpg (code, ins->dreg, ins->sreg1); + break; + case OP_VFLPDB: + s390_vfpsodb (code, ins->dreg, ins->sreg1, 2); + break; + case OP_VFLPSB: + s390_vfpsosb (code, ins->dreg, ins->sreg1, 2); + break; + case OP_VFLCDB: + s390_vfpsodb (code, ins->dreg, ins->sreg1, 0); + break; + case OP_VFLCSB: + s390_vfpsosb (code, ins->dreg, ins->sreg1, 0); + break; + case OP_INSERT_I1: + s390_vlr (code, ins->dreg, ins->sreg1); + s390_vlvgb (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_INSERT_I2: + s390_vlr (code, ins->dreg, ins->sreg1); + s390_vlvgh (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_INSERT_I4: + s390_vlr (code, ins->dreg, ins->sreg1); + s390_vlvgf (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_INSERT_I8: + s390_vlr (code, ins->dreg, ins->sreg1); + s390_vlvgg (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_INSERT_R4: + s390_vlgvf (code, s390_r13, ins->sreg2, 0, 0); + s390_vlvgf (code, ins->dreg, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_INSERT_R8: + s390_vlgvg (code, s390_r13, ins->sreg2, 0, 0); + s390_vlvgg (code, ins->dreg, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_EXTRACT_I1: + s390_vlgvb (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_EXTRACT_I2: + s390_vlgvh (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_EXTRACT_I4: + s390_vlgvf (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_EXTRACT_I8: + s390_vlgvg (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_EXTRACT_R4: + s390_vlgvf (code, s390_r13, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vlvgf (code, ins->dreg, s390_r13, 0, 0); + break; + case OP_EXTRACT_R8: + s390_vlgvg (code, s390_r13, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vlvgg (code, ins->dreg, s390_r13, 0, 0); + break; + case OP_XEXTRACT_I1: + s390_vlgvb (code, ins->dreg, ins->sreg1, ins->sreg2, 0); + break; + case OP_XEXTRACT_I2: + s390_vlgvh (code, ins->dreg, ins->sreg1, ins->sreg2, 0); + break; + case OP_XEXTRACT_I4: + s390_vlgvf (code, ins->dreg, ins->sreg1, ins->sreg2, 0); + break; + case OP_XEXTRACT_I8: + s390_vlgvg (code, ins->dreg, ins->sreg1, ins->sreg2, 0); + break; + case OP_XEXTRACT_R4: + case OP_XEXTRACT_R8: + s390_vlgvg (code, s390_r13, ins->sreg1, ins->sreg2, 0); + s390_ldgr (code, ins->dreg, s390_r13); + break; + case OP_EXPAND_I1: + s390_vlvgb (code, s390_vr16, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepb (code, ins->dreg, s390_vr16, 0); + break; + case OP_EXPAND_I2: + s390_vlvgh (code, s390_vr16, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vreph (code, ins->dreg, s390_vr16, 0); + break; + case OP_EXPAND_I4: + s390_vlvgf (code, s390_vr16, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepf (code, ins->dreg, s390_vr16, 0); + break; + case OP_EXPAND_I8: + s390_vlvgg (code, s390_vr16, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepg (code, ins->dreg, s390_vr16, 0); + break; + case OP_EXPAND_R4: + s390_vlgvf (code, s390_r13, ins->sreg1, 0, 0); + s390_vlvgf (code, s390_vr16, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepf (code, ins->dreg, s390_vr16, 0); + break; + case OP_EXPAND_R8: + s390_lgdr (code, s390_r13, ins->sreg1); + s390_vlvgg (code, s390_vr16, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepg (code, ins->dreg, s390_vr16, 0); + break; + case OP_VPKH: + s390_vpkh ( code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VPKF: + s390_vpkf ( code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VPKG: + s390_vpkg ( code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_VLCB: + s390_vlcb (code, ins->dreg, ins->sreg1); + break; + case OP_VLCH: + s390_vlch (code, ins->dreg, ins->sreg1); + break; + case OP_VLCF: + s390_vlcf (code, ins->dreg, ins->sreg1); + break; + case OP_VLCG: + s390_vlcg (code, ins->dreg, ins->sreg1); + break; + case OP_VUPLB: + s390_vuplb (code, ins->dreg, ins->sreg1); + break; + case OP_VUPLH: + s390_vuplhw (code, ins->dreg, ins->sreg1); + break; + case OP_VUPLF: + s390_vuplf (code, ins->dreg, ins->sreg1); + break; + case OP_VUPLLB: + s390_vupllb (code, ins->dreg, ins->sreg1); + break; + case OP_VUPLLH: + s390_vupllh (code, ins->dreg, ins->sreg1); + break; + case OP_VUPLLF: + s390_vupllf (code, ins->dreg, ins->sreg1); + break; + case OP_VUPHB: + s390_vuphb (code, ins->dreg, ins->sreg1); + break; + case OP_VUPHH: + s390_vuphh (code, ins->dreg, ins->sreg1); + break; + case OP_VUPHF: + s390_vuphf (code, ins->dreg, ins->sreg1); + break; + case OP_VUPLHB: + s390_vuplhb (code, ins->dreg, ins->sreg1); + break; + case OP_VUPLHH: + s390_vuplhh (code, ins->dreg, ins->sreg1); + break; + case OP_VUPLHF: + s390_vuplhf (code, ins->dreg, ins->sreg1); + break; + case OP_VFISB: + s390_vfisb (code, ins->dreg, ins->sreg1, 0, ins->inst_c0); + break; + case OP_VFIDB: + s390_vfidb (code, ins->dreg, ins->sreg1, 0, ins->inst_c0); + break; + case OP_VFSQSB: + s390_vfsqsb (code, ins->dreg, ins->sreg1); + break; + case OP_VFSQDB: + s390_vfsqdb (code, ins->dreg, ins->sreg1); + break; + case OP_XONES: + s390_vgbm (code, ins->dreg, 0xffff); + break; + case OP_XMOVE: + if (ins->dreg != ins->sreg1) + s390_vlr(code, ins->dreg, ins->sreg1); + break; + case OP_XZERO: + s390_vgbm (code, ins->dreg, 0); + break; #endif default: g_warning ("unknown opcode " M_PRI_INST " in %s()\n", mono_inst_name (ins->opcode), __FUNCTION__); diff --git a/src/mono/mono/mini/mini-s390x.h b/src/mono/mono/mini/mini-s390x.h index 8fab52e1986f81..c6cfc367e97dbe 100644 --- a/src/mono/mono/mini/mini-s390x.h +++ b/src/mono/mono/mini/mini-s390x.h @@ -259,21 +259,6 @@ s390_patch_addr (guchar *code, guint64 target) } while (0) /*========================= End of Function ========================*/ -#define S390_LONG_VEC(loc, opy, op, r, off, ix, br) \ - if (s390_is_imm12(off)) { \ - s390_##opy (loc, r, off, ix, br); \ - } else { \ - if (ix == 0) { \ - S390_SET(loc, s390_r13, off); \ - s390_la (loc, s390_r13, s390_r13, br, 0); \ - } else { \ - s390_la (loc, s390_r13, ix, br, 0); \ - S390_SET (loc, s390_r0, off); \ - s390_agr (loc, s390_r13, s390_r0); \ - } \ - s390_##op (loc, r, 0, 0, s390_r13); \ - } - #define S390_SET(loc, dr, v) \ do { \ guint64 val = (guint64) v; \ @@ -308,6 +293,22 @@ s390_patch_addr (guchar *code, guint64 target) s390_##op (loc, r, 0, s390_r13, 0); \ } +#define S390_LONG_VEC(loc, opy, op, r, off, ix, br) \ + if (s390_is_imm12(off)) { \ + s390_##opy (loc, r, ix, br, off); \ + } else { \ + if (ix == 0) { \ + S390_SET(loc, s390_r13, off); \ + s390_la (loc, s390_r13, s390_r13, br, 0); \ + } else { \ + s390_la (loc, s390_r13, ix, br, 0); \ + S390_SET (loc, s390_r0, off); \ + s390_agr (loc, s390_r13, s390_r0); \ + } \ + s390_##op (loc, r, 0, s390_r13, 0); \ + } + + #define S390_SET_MASK(loc, dr, v) \ do { \ if (s390_is_imm16 (v)) { \ diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 740e7433385c92..187adc8c623365 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -558,11 +558,12 @@ emit_xequal (MonoCompile *cfg, MonoClass *klass, MonoTypeEnum element_type, Mono return emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg); } #elif defined(TARGET_S390X) - MonoInst* cmp = emit_xcompare (cfg, klass, element_type, arg1, arg2); - MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1); - ret->inst_c0 = SIMD_EXTR_ARE_ALL_SET; - ret->inst_c1 = cmp->inst_c0; - return ret; + MonoInst* cmp = emit_xcompare (cfg, klass, element_type, arg1, arg2); + MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_S390_XEXTRACT, cmp->dreg, -1); + ret->inst_c0 = SIMD_EXTR_ARE_ALL_SET; + ret->inst_c0 |= ((cmp->inst_c0 << 4) & 0xf0); + ret->inst_c1 = cmp->opcode; + return ret; #else MonoInst *ins = emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg); if (!COMPILE_LLVM (cfg)) @@ -697,57 +698,51 @@ static int type_to_extract_op (MonoTypeEnum type); static MonoInst* emit_sum_vector (MonoCompile *cfg, MonoType *vector_type, MonoTypeEnum element_type, MonoInst *arg) { -MonoClass *vector_class = mono_class_from_mono_type_internal (vector_type); - - int op = -1; - MonoInst *tmp = emit_xzero (cfg, vector_class); - MonoInst *ins = arg; - int index = -1; - switch (element_type) { - case MONO_TYPE_R4: - op = -1; - return NULL; - break; - case MONO_TYPE_R8: - op = -1; - return NULL; - break; - case MONO_TYPE_I1: - case MONO_TYPE_U1: - op = OP_VSUMB; - ins = emit_simd_ins (cfg, vector_class, op, ins->dreg,tmp->dreg); - ins = emit_simd_ins (cfg, vector_class, OP_VSUMQF, ins->dreg,tmp->dreg); - index = 16; - break; - case MONO_TYPE_I2: - case MONO_TYPE_U2: - op = OP_VSUMH; - ins = emit_simd_ins (cfg, vector_class, op, ins->dreg,tmp->dreg); - ins = emit_simd_ins (cfg, vector_class, OP_VSUMQF, ins->dreg,tmp->dreg); - index = 8; - break; - case MONO_TYPE_I4: - case MONO_TYPE_U4: - op = OP_VSUMQF; - ins = emit_simd_ins (cfg, vector_class, op, ins->dreg,tmp->dreg); - index = 4; - break; - case MONO_TYPE_I: - case MONO_TYPE_U: - case MONO_TYPE_I8: - case MONO_TYPE_U8: { - op = OP_VSUMQG; - ins = emit_simd_ins (cfg, vector_class, op, ins->dreg,tmp->dreg); - index = 2; - break; - } - default: - return NULL; - } - op = type_to_extract_op(element_type); - ins = emit_simd_ins (cfg, vector_class, op, ins->dreg,-1); - ins->inst_c0 = index - 1; - return ins; + MonoClass *vector_class = mono_class_from_mono_type_internal (vector_type); + int op = -1; + MonoInst *tmp = emit_xzero (cfg, vector_class); + MonoInst *ins = arg; + int index = -1; + switch (element_type) { + case MONO_TYPE_R4: + return NULL; + break; + case MONO_TYPE_R8: + return NULL; + break; + case MONO_TYPE_I1: + case MONO_TYPE_U1: + ins = emit_simd_ins (cfg, vector_class, OP_VSUMB, ins->dreg,tmp->dreg); + ins = emit_simd_ins (cfg, vector_class, OP_VSUMQF, ins->dreg,tmp->dreg); + index = 16; + break; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + ins = emit_simd_ins (cfg, vector_class, OP_VSUMH, ins->dreg,tmp->dreg); + ins = emit_simd_ins (cfg, vector_class, OP_VSUMQF, ins->dreg,tmp->dreg); + index = 8; + break; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + op = OP_VSUMQF; + ins = emit_simd_ins (cfg, vector_class, op, ins->dreg,tmp->dreg); + index = 4; + break; + case MONO_TYPE_I: + case MONO_TYPE_U: + case MONO_TYPE_I8: + case MONO_TYPE_U8: + op = OP_VSUMQG; + ins = emit_simd_ins (cfg, vector_class, op, ins->dreg,tmp->dreg); + index = 2; + break; + default: + return NULL; + } + op = type_to_extract_op(element_type); + ins = emit_simd_ins (cfg, vector_class, op, ins->dreg,-1); + ins->inst_c0 = index - 1; + return ins; } #endif #ifdef TARGET_ARM64 @@ -2087,7 +2082,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return emit_simd_ins_for_sig (cfg, klass, OP_VECTOR_IABS, -1, arg0_type, fsig, args); } #elif defined(TARGET_S390X) - return emit_simd_ins_for_sig (cfg, klass, OP_VEC_ABS, -1, arg0_type, fsig, args); + return emit_simd_ins_for_sig (cfg, klass, OP_VEC_ABS, -1, arg0_type, fsig, args); #else return NULL; #endif @@ -2246,8 +2241,15 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi int ceil_or_floor = id == SN_Ceiling ? 10 : 9; return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_ROUNDP, ceil_or_floor, arg0_type, fsig, args); #elif defined(TARGET_S390X) - int ceil_or_floor = id == SN_Ceiling ? 6 : 7; - return emit_simd_ins_for_sig (cfg, klass, OP_CEIL_FLOOR, ceil_or_floor, arg0_type, fsig, args); + int ceil_or_floor = id == SN_Ceiling ? 6 : 7; + switch (arg0_type){ + case MONO_TYPE_R4: + return emit_simd_ins_for_sig (cfg, klass, OP_VFISB, ceil_or_floor, arg0_type, fsig, args); + case MONO_TYPE_R8: + return emit_simd_ins_for_sig (cfg, klass, OP_VFISB, ceil_or_floor, arg0_type, fsig, args); + default: + g_assert_not_reached (); + } #else return NULL; #endif @@ -2452,12 +2454,17 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi } } else { MonoInst* cmp = emit_xcompare (cfg, arg_class, arg0_type, args [0], args [1]); +#ifndef TARGET_S390X MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1); ret->inst_c0 = (id == SN_EqualsAll) ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET; -#ifdef TARGET_S390X - ret->inst_c1 = cmp->inst_c0; + ret->inst_c1 = mono_class_value_size (klass, NULL); #else - ret->inst_c1 = mono_class_value_size (klass, NULL); + /* we need XCOMPARE(_FP), SIMD_EXTR_ARE_ALL/ANY_SET and CMP_* ops in the same ins to emit correct*/ + /* load on condition instructions */ + MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_S390_XEXTRACT, cmp->dreg, -1); + ret->inst_c0 = (id == SN_EqualsAll) ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET; + ret->inst_c0 |= ((cmp->inst_c0 << 4) & 0xf0); + ret->inst_c1 = cmp->opcode; #endif return ret; } @@ -2590,7 +2597,6 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return emit_simd_ins_for_sig (cfg, klass, OP_SSE_MOVMSK, -1, type, fsig, args); #endif - return NULL; } case SN_GetElement: { int elems; @@ -2741,12 +2747,17 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi } } else { MonoInst* cmp = emit_xcompare_for_intrinsic (cfg, arg_class, id, arg0_type, args [0], args [1]); +#ifndef TARGET_S390X MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1); ret->inst_c0 = is_all ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET; -#if defined(TARGET_S390X) - ret->inst_c1 = cmp->inst_c0; -#else ret->inst_c1 = mono_class_value_size (klass, NULL); +#else + /* we need XCOMPARE(_FP), SIMD_EXTR_ARE_ALL/ANY_SET and CMP_* ops in the same ins to emit correct*/ + /* load on condition instructions */ + MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_S390_XEXTRACT, cmp->dreg, -1); + ret->inst_c0 = is_all ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET; + ret->inst_c0 |= ((cmp->inst_c0 << 4) & 0xf0); + ret->inst_c1 = cmp->opcode; #endif return ret; } @@ -3063,19 +3074,18 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return NULL; #elif defined(TARGET_S390X) - switch (arg0_type) { - case MONO_TYPE_I2: - case MONO_TYPE_U2: - return emit_simd_ins_for_sig (cfg, klass, OP_VPKH, -1, -1, fsig, args); - case MONO_TYPE_I4: - case MONO_TYPE_U4: - return emit_simd_ins_for_sig (cfg, klass, OP_VPKF, -1, -1, fsig, args); - case MONO_TYPE_I8: - case MONO_TYPE_U8: - return emit_simd_ins_for_sig (cfg, klass, OP_VPKG, -1, -1, fsig, args); - } - - return NULL; + switch (arg0_type) { + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return emit_simd_ins_for_sig (cfg, klass, OP_VPKH, -1, -1, fsig, args); + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return emit_simd_ins_for_sig (cfg, klass, OP_VPKF, -1, -1, fsig, args); + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return emit_simd_ins_for_sig (cfg, klass, OP_VPKG, -1, -1, fsig, args); + } + return NULL; #else return NULL; #endif @@ -3250,8 +3260,8 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X, instc0, arg0_type, fsig, args); #elif defined(TARGET_S390X) - int instc0 = arg0_type == MONO_TYPE_R4 ? OP_VFSQSB : OP_VFSQDB; - return emit_simd_ins_for_sig (cfg, klass, instc0, 0, arg0_type, fsig, args); + int instc0 = arg0_type == MONO_TYPE_R4 ? OP_VFSQSB : OP_VFSQDB; + return emit_simd_ins_for_sig (cfg, klass, instc0, 0, arg0_type, fsig, args); #else return NULL; #endif @@ -3410,43 +3420,42 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi // FIXME: return NULL; #elif defined(TARGET_S390X) - if (id == SN_WidenLower) - { - switch (arg0_type){ - case MONO_TYPE_I1: - return emit_simd_ins (cfg, klass, OP_VUPHB, args[0]->dreg, -1); - case MONO_TYPE_U1: - return emit_simd_ins (cfg, klass, OP_VUPLHB, args[0]->dreg, -1); - case MONO_TYPE_I2: - return emit_simd_ins (cfg, klass, OP_VUPHH, args[0]->dreg, -1); - case MONO_TYPE_U2: - return emit_simd_ins (cfg, klass, OP_VUPLHH, args[0]->dreg, -1); - case MONO_TYPE_I4: - return emit_simd_ins (cfg, klass, OP_VUPHF, args[0]->dreg, -1); - case MONO_TYPE_U4: - return emit_simd_ins (cfg, klass, OP_VUPLHF, args[0]->dreg, -1); - default: - return NULL; - } - } - else{ - switch (arg0_type){ - case MONO_TYPE_I1: - return emit_simd_ins (cfg, klass, OP_VUPLB, args[0]->dreg, -1); - case MONO_TYPE_U1: - return emit_simd_ins (cfg, klass, OP_VUPLLB, args[0]->dreg, -1); - case MONO_TYPE_I2: - return emit_simd_ins (cfg, klass, OP_VUPLH, args[0]->dreg, -1); - case MONO_TYPE_U2: - return emit_simd_ins (cfg, klass, OP_VUPLLH, args[0]->dreg, -1); - case MONO_TYPE_I4: - return emit_simd_ins (cfg, klass, OP_VUPLF, args[0]->dreg, -1); - case MONO_TYPE_U4: - return emit_simd_ins (cfg, klass, OP_VUPLLF, args[0]->dreg, -1); - default: - return NULL; - } - } + if (id == SN_WidenLower) { + switch (arg0_type){ + case MONO_TYPE_I1: + return emit_simd_ins (cfg, klass, OP_VUPHB, args[0]->dreg, -1); + case MONO_TYPE_U1: + return emit_simd_ins (cfg, klass, OP_VUPLHB, args[0]->dreg, -1); + case MONO_TYPE_I2: + return emit_simd_ins (cfg, klass, OP_VUPHH, args[0]->dreg, -1); + case MONO_TYPE_U2: + return emit_simd_ins (cfg, klass, OP_VUPLHH, args[0]->dreg, -1); + case MONO_TYPE_I4: + return emit_simd_ins (cfg, klass, OP_VUPHF, args[0]->dreg, -1); + case MONO_TYPE_U4: + return emit_simd_ins (cfg, klass, OP_VUPLHF, args[0]->dreg, -1); + default: + return NULL; + } + } + else { + switch (arg0_type){ + case MONO_TYPE_I1: + return emit_simd_ins (cfg, klass, OP_VUPLB, args[0]->dreg, -1); + case MONO_TYPE_U1: + return emit_simd_ins (cfg, klass, OP_VUPLLB, args[0]->dreg, -1); + case MONO_TYPE_I2: + return emit_simd_ins (cfg, klass, OP_VUPLH, args[0]->dreg, -1); + case MONO_TYPE_U2: + return emit_simd_ins (cfg, klass, OP_VUPLLH, args[0]->dreg, -1); + case MONO_TYPE_I4: + return emit_simd_ins (cfg, klass, OP_VUPLF, args[0]->dreg, -1); + case MONO_TYPE_U4: + return emit_simd_ins (cfg, klass, OP_VUPLLF, args[0]->dreg, -1); + default: + return NULL; + } + } #else return NULL; #endif @@ -3623,30 +3632,47 @@ emit_sri_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f memset (buf, 0, sizeof (buf)); #ifdef TARGET_S390X - /* we directly emit vrepi*/ - if (etype->type != MONO_TYPE_R4 && etype->type != MONO_TYPE_R8) - return emit_simd_ins_for_sig (cfg, klass, OP_VEC_ONE, etype->type, 1, fsig, args); - switch (etype->type){ - case MONO_TYPE_R4:{ - float *value = (float*)buf; - - for (int i = 0; i < len; ++i) { - value [i] = 1.0f; - } - - return emit_xconst_v128 (cfg, klass, (guint8*)value); - } - case MONO_TYPE_R8:{ - double *value = (double*)buf; - - for (int i = 0; i < len; ++i) { - value [i] = 1.0; - } - - return emit_xconst_v128 (cfg, klass, (guint8*)value); - - } - } + /* we directly emit vrepi*/ + if (etype->type != MONO_TYPE_R4 && etype->type != MONO_TYPE_R8) { + switch (etype->type) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return emit_simd_ins_for_sig (cfg, klass, OP_VREPIB, etype->type, 1, fsig, args); + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return emit_simd_ins_for_sig (cfg, klass, OP_VREPIH, etype->type, 1, fsig, args); + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return emit_simd_ins_for_sig (cfg, klass, OP_VREPIF, etype->type, 1, fsig, args); + case MONO_TYPE_I: + case MONO_TYPE_U: + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return emit_simd_ins_for_sig (cfg, klass, OP_VREPIG, etype->type, 1, fsig, args); + default: + g_assert_not_reached (); + } + } + switch (etype->type){ + case MONO_TYPE_R4:{ + float *value = (float*)buf; + + for (int i = 0; i < len; ++i) { + value [i] = 1.0f; + } + + return emit_xconst_v128 (cfg, klass, (guint8*)value); + } + case MONO_TYPE_R8:{ + double *value = (double*)buf; + + for (int i = 0; i < len; ++i) { + value [i] = 1.0; + } + + return emit_xconst_v128 (cfg, klass, (guint8*)value); + } + } #else switch (etype->type) { @@ -6885,10 +6911,8 @@ decompose_vtype_opt_uses_simd_intrinsics (MonoCompile *cfg, MonoInst *ins) case OP_XZERO: case OP_XPHI: case OP_LOADX_MEMBASE: -#ifndef TARGET_S390X case OP_LOADX_ALIGNED_MEMBASE: case OP_STOREX_ALIGNED_MEMBASE_REG: -#endif case OP_STOREX_MEMBASE: return TRUE; default: From ca450d25b2e6094aab8a9954b5e35816e75eea45 Mon Sep 17 00:00:00 2001 From: saitama951 Date: Fri, 18 Jul 2025 13:51:12 +0000 Subject: [PATCH 3/4] Adress Review Comments - 2 * reformat patch - rename opcode from OP_ to OP_S390 * rewrite OP_XCOMPARE and OP_XEXTRACT into one opcode OP_S390_OPXCOMPARE_XEXTRACT * introduce compare neumonics without setting the condition code * rewrite OP_VEC_ABS to handle float and integer separately --- src/mono/mono/arch/s390x/s390x-codegen.h | 26 +- src/mono/mono/mini/cpu-s390x.mdesc | 272 +++++----- src/mono/mono/mini/mini-ops.h | 259 +++++----- src/mono/mono/mini/mini-s390x.c | 607 +++++++++++++++-------- src/mono/mono/mini/mini-s390x.h | 7 +- src/mono/mono/mini/simd-intrinsics.c | 124 +++-- 6 files changed, 779 insertions(+), 516 deletions(-) diff --git a/src/mono/mono/arch/s390x/s390x-codegen.h b/src/mono/mono/arch/s390x/s390x-codegen.h index c91335ab5e6f09..d28b9a6e2fe60f 100644 --- a/src/mono/mono/arch/s390x/s390x-codegen.h +++ b/src/mono/mono/arch/s390x/s390x-codegen.h @@ -1568,14 +1568,26 @@ typedef struct { #define s390_vaf(c, v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 2, 0, 0) #define s390_vag(c, v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 3, 0, 0) #define s390_vaq(c, v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 4, 0, 0) -#define s390_vceqbs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 0, 1 ) -#define s390_vceqhs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 1, 1 ) -#define s390_vceqfs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 2, 1 ) -#define s390_vceqgs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 3, 1 ) +#define s390_vceqb(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 0, 0) +#define s390_vceqh(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 1, 0) +#define s390_vceqf(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 2, 0) +#define s390_vceqg(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 3, 0) +#define s390_vceqbs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 0, 1) +#define s390_vceqhs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 1, 1) +#define s390_vceqfs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 2, 1) +#define s390_vceqgs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 3, 1) +#define s390_vchb(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 0, 0) +#define s390_vchh(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 1, 0) +#define s390_vchf(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 2, 0) +#define s390_vchg(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 3, 0) #define s390_vchbs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 0, 1) #define s390_vchhs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 1, 1) #define s390_vchfs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 2, 1) #define s390_vchgs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 3, 1) +#define s390_vchlb(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 0, 0) +#define s390_vchlh(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 1, 0) +#define s390_vchlf(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 2, 0) +#define s390_vchlg(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 3, 0) #define s390_vchlbs(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 0, 1) #define s390_vchlhs(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 1, 1) #define s390_vchlfs(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 2, 1) @@ -1590,10 +1602,16 @@ typedef struct { #define s390_veclg(c, v1, v2) S390_VRRa(c, 0xe7d9, v1, v2, 3, 0, 0) #define s390_vfasb(c, v1, v2, v3) S390_VRRc(c, 0xe7e3, v1, v2, v3, 2, 0, 0) #define s390_vfadb(c, v1, v2, v3) S390_VRRc(c, 0xe7e3, v1, v2, v3, 3, 0, 0) +#define s390_vfcesb(c, v1, v2, v3) S390_VRRc(c, 0xe7e8, v1, v2, v3, 2, 0, 0) +#define s390_vfcedb(c, v1, v2, v3) S390_VRRc(c, 0xe7e8, v1, v2, v3, 3, 0, 0) #define s390_vfcesbs(c, v1, v2, v3) S390_VRRc(c, 0xe7e8, v1, v2, v3, 2, 0, 1) #define s390_vfcedbs(c, v1, v2, v3) S390_VRRc(c, 0xe7e8, v1, v2, v3, 3, 0, 1) +#define s390_vfchsb(c, v1, v2, v3) S390_VRRc(c, 0xe7eb, v1, v2, v3, 2, 0, 0) +#define s390_vfchdb(c, v1, v2, v3) S390_VRRc(c, 0xe7eb, v1, v2, v3, 3, 0, 0) #define s390_vfchsbs(c, v1, v2, v3) S390_VRRc(c, 0xe7eb, v1, v2, v3, 2, 0, 1) #define s390_vfchdbs(c, v1, v2, v3) S390_VRRc(c, 0xe7eb, v1, v2, v3, 3, 0, 1) +#define s390_vfchesb(c, v1, v2, v3) S390_VRRc(c, 0xe7ea, v1, v2, v3, 2, 0, 0) +#define s390_vfchedb(c, v1, v2, v3) S390_VRRc(c, 0xe7ea, v1, v2, v3, 3, 0, 0) #define s390_vfchesbs(c, v1, v2, v3) S390_VRRc(c, 0xe7ea, v1, v2, v3, 2, 0, 1) #define s390_vfchedbs(c, v1, v2, v3) S390_VRRc(c, 0xe7ea, v1, v2, v3, 3, 0, 1) #define s390_vfdsb(c, v1, v2, v3) S390_VRRc(c, 0xe7e5, v1, v2, v3, 2, 0, 0) diff --git a/src/mono/mono/mini/cpu-s390x.mdesc b/src/mono/mono/mini/cpu-s390x.mdesc index 2910feb1a1fa56..19847ab24cf518 100644 --- a/src/mono/mono/mini/cpu-s390x.mdesc +++ b/src/mono/mono/mini/cpu-s390x.mdesc @@ -480,13 +480,6 @@ insert_i8: dest:x src1:x src2:i len:12 insert_r4: dest:x src1:x src2:f clob:1 len:12 insert_r8: dest:x src1:x src2:f clob:1 len:12 -xinsert_i1: dest:x src1:x src2:i src3:i len:12 -xinsert_i2: dest:x src1:x src2:i src3:i len:12 -xinsert_i4: dest:x src1:x src2:i src3:i len:12 -xinsert_i8: dest:x src1:x src2:i src3:i len:12 -xinsert_r4: dest:x src1:x src2:f src3:i clob:1 len:12 -xinsert_r8: dest:x src1:x src2:f src3:i clob:1 len:12 - extract_i1: dest:i src1:x len:6 extract_i2: dest:i src1:x len:6 extract_i4: dest:i src1:x len:6 @@ -502,139 +495,148 @@ xextract_r4: dest:f src1:x src2:i len:10 xextract_r8: dest:f src1:x src2:i len:10 expand_i1: dest:x src1:i len:12 -expand_i2: dest:x src1:i len:14 +expand_i2: dest:x src1:i len:12 expand_i4: dest:x src1:i len:12 expand_i8: dest:x src1:i len:12 -expand_r8: dest:x src1:f len:20 -expand_r4: dest:x src1:f len:20 - +expand_r4: dest:x src1:f len:18 +expand_r8: dest:x src1:f len:16 -s390_xextract: dest:i src1:x len:12 xones: dest:x len:6 xmove: dest:x src1:x len:6 xzero: dest:x len:6 -xcompare: dest:x src1:x src2:x len:6 -xcompare_fp: dest:x src1:x src2:x len:6 -loadx_membase: dest:x src1:b len:16 -storex_membase: dest:b src1:x len:16 -vaddb: dest:x src1:x src2:x len:6 -vaddh: dest:x src1:x src2:x len:6 -vaddf: dest:x src1:x src2:x len:6 -vaddg: dest:x src1:x src2:x len:6 -vaddq: dest:x src1:x src2:x len:6 -vfadds: dest:x src1:x src2:x len:6 -vfaddd: dest:x src1:x src2:x len:6 -vsubb: dest:x src1:x src2:x len:6 -vsubh: dest:x src1:x src2:x len:6 -vsubf: dest:x src1:x src2:x len:6 -vsubg: dest:x src1:x src2:x len:6 -vsubq: dest:x src1:x src2:x len:6 -vfsubs: dest:x src1:x src2:x len:6 -vfsubd: dest:x src1:x src2:x len:6 -vxor: dest:x src1:x src2:x len:6 -vor: dest:x src1:x src2:x len:6 -vnor: dest:x src1:x src2:x len:6 -vand: dest:x src1:x src2:x len:6 +loadx_membase: dest:x src1:b len:26 +storex_membase: dest:b src1:x len:26 +s390_vab: dest:x src1:x src2:x len:6 +s390_vah: dest:x src1:x src2:x len:6 +s390_vaf: dest:x src1:x src2:x len:6 +s390_vag: dest:x src1:x src2:x len:6 +s390_vfasb: dest:x src1:x src2:x len:6 +s390_vfadb: dest:x src1:x src2:x len:6 +s390_vsb: dest:x src1:x src2:x len:6 +s390_vsh: dest:x src1:x src2:x len:6 +s390_vsf: dest:x src1:x src2:x len:6 +s390_vsg: dest:x src1:x src2:x len:6 +s390_vfssb: dest:x src1:x src2:x len:6 +s390_vfsdb: dest:x src1:x src2:x len:6 +s390_vx: dest:x src1:x src2:x len:6 +s390_vo: dest:x src1:x src2:x len:6 +s390_vno: dest:x src1:x src2:x len:6 +s390_vn: dest:x src1:x src2:x len:6 vandnot: dest:x src1:x src2:x len:6 -vnand: dest:x src1:x src2:x len:6 -vmulb: dest:x src1:x src2:x len:6 -vmulhw: dest:x src1:x src2:x len:6 -vmulf: dest:x src1:x src2:x len:6 -vfmuls: dest:x src1:x src2:x len:6 -vfmuld: dest:x src1:x src2:x len:6 -vfdivs: dest:x src1:x src2:x len:6 -vfdivd: dest:x src1:x src2:x len:6 -vmxb: dest:x src1:x src2:x len:6 -vmxh: dest:x src1:x src2:x len:6 -vmxf: dest:x src1:x src2:x len:6 -vmxg: dest:x src1:x src2:x len:6 -vmnb: dest:x src1:x src2:x len:6 -vmnh: dest:x src1:x src2:x len:6 -vmnf: dest:x src1:x src2:x len:6 -vmng: dest:x src1:x src2:x len:6 -vmxlb: dest:x src1:x src2:x len:6 -vmxlh: dest:x src1:x src2:x len:6 -vmxlf: dest:x src1:x src2:x len:6 -vmxlg: dest:x src1:x src2:x len:6 -vfmaxs: dest:x src1:x src2:x len:6 -vfmaxd: dest:x src1:x src2:x len:6 -vmnlb: dest:x src1:x src2:x len:6 -vmnlh: dest:x src1:x src2:x len:6 -vmnlf: dest:x src1:x src2:x len:6 -vmnlg: dest:x src1:x src2:x len:6 -vfmins: dest:x src1:x src2:x len:6 -vfmind: dest:x src1:x src2:x len:6 -vsumb: dest:x src1:x src2:x len:6 -vsumh: dest:x src1:x src2:x len:6 -vsumqf: dest:x src1:x src2:x len:6 -vsumqg: dest:x src1:x src2:x len:6 -vperm: dest:x src1:x src2:x len:6 -vrepib: dest:x len:6 -vrepih: dest:x len:6 -vrepif: dest:x len:6 -vrepig: dest:x len:6 -vceqbs: dest:x src1:x src2:x len:6 -vceqhs: dest:x src1:x src2:x len:6 -vceqfs: dest:x src1:x src2:x len:6 -vceqgs: dest:x src1:x src2:x len:6 -vfcesbs: dest:x src1:x src2:x len:6 -vfcedbs: dest:x src1:x src2:x len:6 -vfchsbs: dest:x src1:x src2:x len:6 -vfchdbs: dest:x src1:x src2:x len:6 -vgmb: dest:x src1:i src2:i len:6 -vgmh: dest:x src1:i src2:i len:6 -vgmf: dest:x src1:i src2:i len:6 -vgmg: dest:x src1:i src2:i len:6 -vecb: dest:x src1:x len:6 -vech: dest:x src1:x len:6 -vecf: dest:x src1:x len:6 -vecg: dest:x src1:x len:6 -veclb: dest:x src1:x len:6 -veclh: dest:x src1:x len:6 -veclf: dest:x src1:x len:6 -veclg: dest:x src1:x len:6 -vchbs: dest:x src1:x src2:x len:6 -vchhs: dest:x src1:x src2:x len:6 -vchfs: dest:x src1:x src2:x len:6 -vchgs: dest:x src1:x src2:x len:6 -vfsqsb: dest:x src1:x len:6 -vfsqdb: dest:x src1:x len:6 -vchlbs: dest:x src1:x src2:x len:6 -vchlhs: dest:x src1:x src2:x len:6 -vchlfs: dest:x src1:x src2:x len:6 -vchlgs: dest:x src1:x src2:x len:6 -vfchesbs: dest:x src1:x src2:x len:6 -vfchedbs: dest:x src1:x src2:x len:6 -vlpb: dest:x src1:x len:6 -vlph: dest:x src1:x len:6 -vlpf: dest:x src1:x len:6 -vlpg: dest:x src1:x len:6 -vflpdb: dest:x src1:x len:6 -vflpsb: dest:x src1:x len:6 -vflcdb: dest:x src1:x len:6 -vflcsb: dest:x src1:x len:6 -vpkh: dest:x src1:x src2:x len:6 -vpkf: dest:x src1:x src2:x len:6 -vpkg: dest:x src1:x src2:x len:6 -vlcb: dest:x src1:x len:6 -vlch: dest:x src1:x len:6 -vlcf: dest:x src1:x len:6 -vlcg: dest:x src1:x len:6 -vuplb: dest:x src1:x len:6 -vuplh: dest:x src1:x len:6 -vuplf: dest:x src1:x len:6 -vupllb: dest:x src1:x len:6 -vupllh: dest:x src1:x len:6 -vupllf: dest:x src1:x len:6 -vuphb: dest:x src1:x len:6 -vuphh: dest:x src1:x len:6 -vuphf: dest:x src1:x len:6 -vuplhb: dest:x src1:x len:6 -vuplhh: dest:x src1:x len:6 -vuplhf: dest:x src1:x len:6 -vfisb: dest:x src1:x len:6 -vfidb: dest:x src1:x len:6 -ceil_floor: dest:x src1:x len:6 -ones_complement: dest:x src1:x len:6 -negate: dest:x src1:x len:12 +s390_vnn: dest:x src1:x src2:x len:6 +s390_vmlb: dest:x src1:x src2:x len:6 +s390_vmlhw: dest:x src1:x src2:x len:6 +s390_vmlf: dest:x src1:x src2:x len:6 +s390_vfmsb: dest:x src1:x src2:x len:6 +s390_vfmdb: dest:x src1:x src2:x len:6 +s390_vfdsb: dest:x src1:x src2:x len:6 +s390_vfddb: dest:x src1:x src2:x len:6 +s390_vmxb: dest:x src1:x src2:x len:6 +s390_vmxh: dest:x src1:x src2:x len:6 +s390_vmxf: dest:x src1:x src2:x len:6 +s390_vmxg: dest:x src1:x src2:x len:6 +s390_vmnb: dest:x src1:x src2:x len:6 +s390_vmnh: dest:x src1:x src2:x len:6 +s390_vmnf: dest:x src1:x src2:x len:6 +s390_vmng: dest:x src1:x src2:x len:6 +s390_vmxlb: dest:x src1:x src2:x len:6 +s390_vmxlh: dest:x src1:x src2:x len:6 +s390_vmxlf: dest:x src1:x src2:x len:6 +s390_vmxlg: dest:x src1:x src2:x len:6 +s390_vfmaxsb: dest:x src1:x src2:x len:6 +s390_vfmaxdb: dest:x src1:x src2:x len:6 +s390_vmnlb: dest:x src1:x src2:x len:6 +s390_vmnlh: dest:x src1:x src2:x len:6 +s390_vmnlf: dest:x src1:x src2:x len:6 +s390_vmnlg: dest:x src1:x src2:x len:6 +s390_vfminsb: dest:x src1:x src2:x len:6 +s390_vfmindb: dest:x src1:x src2:x len:6 +s390_vsumb: dest:x src1:x src2:x len:6 +s390_vsumh: dest:x src1:x src2:x len:6 +s390_vsumqf: dest:x src1:x src2:x len:6 +s390_vsumqg: dest:x src1:x src2:x len:6 +s390_vperm: dest:x src1:x src2:x len:6 +s390_vrepib: dest:x len:6 +s390_vrepih: dest:x len:6 +s390_vrepif: dest:x len:6 +s390_vrepig: dest:x len:6 +s390_vceqbs: dest:x src1:x src2:x len:6 +s390_vceqhs: dest:x src1:x src2:x len:6 +s390_vceqfs: dest:x src1:x src2:x len:6 +s390_vceqgs: dest:x src1:x src2:x len:6 +s390_vceqb: dest:x src1:x src2:x len:6 +s390_vceqh: dest:x src1:x src2:x len:6 +s390_vceqf: dest:x src1:x src2:x len:6 +s390_vceqg: dest:x src1:x src2:x len:6 +s390_vfcesbs: dest:x src1:x src2:x len:6 +s390_vfcedbs: dest:x src1:x src2:x len:6 +s390_vfchsbs: dest:x src1:x src2:x len:6 +s390_vfchdbs: dest:x src1:x src2:x len:6 +s390_vfcesb: dest:x src1:x src2:x len:6 +s390_vfcedb: dest:x src1:x src2:x len:6 +s390_vfchsb: dest:x src1:x src2:x len:6 +s390_vfchdb: dest:x src1:x src2:x len:6 +s390_vgmb: dest:x src1:i src2:i len:6 +s390_vgmh: dest:x src1:i src2:i len:6 +s390_vgmf: dest:x src1:i src2:i len:6 +s390_vgmg: dest:x src1:i src2:i len:6 +s390_vecb: dest:x src1:x len:6 +s390_vech: dest:x src1:x len:6 +s390_vecf: dest:x src1:x len:6 +s390_vecg: dest:x src1:x len:6 +s390_veclb: dest:x src1:x len:6 +s390_veclh: dest:x src1:x len:6 +s390_veclf: dest:x src1:x len:6 +s390_veclg: dest:x src1:x len:6 +s390_vchbs: dest:x src1:x src2:x len:6 +s390_vchhs: dest:x src1:x src2:x len:6 +s390_vchfs: dest:x src1:x src2:x len:6 +s390_vchgs: dest:x src1:x src2:x len:6 +s390_vchb: dest:x src1:x src2:x len:6 +s390_vchh: dest:x src1:x src2:x len:6 +s390_vchf: dest:x src1:x src2:x len:6 +s390_vchg: dest:x src1:x src2:x len:6 +s390_vfsqsb: dest:x src1:x len:6 +s390_vfsqdb: dest:x src1:x len:6 +s390_vchlbs: dest:x src1:x src2:x len:6 +s390_vchlhs: dest:x src1:x src2:x len:6 +s390_vchlfs: dest:x src1:x src2:x len:6 +s390_vchlgs: dest:x src1:x src2:x len:6 +s390_vchlb: dest:x src1:x src2:x len:6 +s390_vchlh: dest:x src1:x src2:x len:6 +s390_vchlf: dest:x src1:x src2:x len:6 +s390_vchlg: dest:x src1:x src2:x len:6 +s390_vfchesbs: dest:x src1:x src2:x len:6 +s390_vfchedbs: dest:x src1:x src2:x len:6 +s390_vfchesb: dest:x src1:x src2:x len:6 +s390_vfchedb: dest:x src1:x src2:x len:6 +s390_vlpb: dest:x src1:x len:6 +s390_vlph: dest:x src1:x len:6 +s390_vlpf: dest:x src1:x len:6 +s390_vlpg: dest:x src1:x len:6 +s390_vflpdb: dest:x src1:x len:6 +s390_vflpsb: dest:x src1:x len:6 +s390_vflcdb: dest:x src1:x len:6 +s390_vflcsb: dest:x src1:x len:6 +s390_vpkh: dest:x src1:x src2:x len:6 +s390_vpkf: dest:x src1:x src2:x len:6 +s390_vpkg: dest:x src1:x src2:x len:6 +s390_vlcb: dest:x src1:x len:6 +s390_vlch: dest:x src1:x len:6 +s390_vlcf: dest:x src1:x len:6 +s390_vlcg: dest:x src1:x len:6 +s390_vuplb: dest:x src1:x len:6 +s390_vuplhw: dest:x src1:x len:6 +s390_vuplf: dest:x src1:x len:6 +s390_vupllb: dest:x src1:x len:6 +s390_vupllh: dest:x src1:x len:6 +s390_vupllf: dest:x src1:x len:6 +s390_vuphb: dest:x src1:x len:6 +s390_vuphh: dest:x src1:x len:6 +s390_vuphf: dest:x src1:x len:6 +s390_vuplhb: dest:x src1:x len:6 +s390_vuplhh: dest:x src1:x len:6 +s390_vuplhf: dest:x src1:x len:6 +s390_vfisb: dest:x src1:x len:6 +s390_vfidb: dest:x src1:x len:6 xconst: dest:x len:18 diff --git a/src/mono/mono/mini/mini-ops.h b/src/mono/mono/mini/mini-ops.h index dc662a58b6febc..03b622c5db0709 100644 --- a/src/mono/mono/mini/mini-ops.h +++ b/src/mono/mono/mini/mini-ops.h @@ -151,6 +151,7 @@ MINI_OP(OP_LOADX_MEMBASE, "loadx_membase", XREG, IREG, NONE) #if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_S390X) MINI_OP(OP_LOADX_ALIGNED_MEMBASE, "loadx_aligned_membase", XREG, IREG, NONE) +MINI_OP(OP_STOREX_ALIGNED_MEMBASE_REG, "storex_aligned_membase_reg", IREG, XREG, NONE) #endif MINI_OP(OP_LOADV_MEMBASE, "loadv_membase", VREG, IREG, NONE) @@ -1049,8 +1050,6 @@ MINI_OP(OP_CVTPS2PD, "cvtps2pd", XREG, XREG, NONE) MINI_OP(OP_CVTTPD2DQ, "cvttpd2dq", XREG, XREG, NONE) MINI_OP(OP_CVTTPS2DQ, "cvttps2dq", XREG, XREG, NONE) -MINI_OP(OP_VECTOR_IABS, "vector_integer_abs", XREG, XREG, NONE) -MINI_OP(OP_VECTOR_ANDN, "vector_andnot", XREG, XREG, XREG) /* sse 1 */ /* inst_c1 is target type */ @@ -1498,128 +1497,140 @@ MINI_OP(OP_S390_CIJ, "s390_cij", IREG, NONE, NONE) MINI_OP(OP_S390_CLIJ, "s390_cij_un", IREG, IREG, NONE) MINI_OP(OP_S390_CGIJ, "s390_cgij", LREG, NONE, NONE) MINI_OP(OP_S390_CLGIJ, "s390_cgij_un", LREG, NONE, NONE) -MINI_OP(OP_STOREX_ALIGNED_MEMBASE_REG, "storex_aligned_membase_reg", IREG, XREG, NONE) -MINI_OP(OP_VADDB, "vaddb", XREG, XREG, XREG) -MINI_OP(OP_VADDH, "vaddh", XREG, XREG, XREG) -MINI_OP(OP_VADDF, "vaddf", XREG, XREG, XREG) -MINI_OP(OP_VADDG, "vaddg", XREG, XREG, XREG) -MINI_OP(OP_VADDQ, "vaddq", XREG, XREG, XREG) -MINI_OP(OP_VFADDS, "vfadds", XREG, XREG, XREG) -MINI_OP(OP_VFADDD, "vfaddd", XREG, XREG, XREG) -MINI_OP(OP_VSUBB, "vsubb", XREG, XREG, XREG) -MINI_OP(OP_VSUBH, "vsubh", XREG, XREG, XREG) -MINI_OP(OP_VSUBF, "vsubf", XREG, XREG, XREG) -MINI_OP(OP_VSUBG, "vsubg", XREG, XREG, XREG) -MINI_OP(OP_VSUBQ, "vsubq", XREG, XREG, XREG) -MINI_OP(OP_VFSUBS, "vfsubs", XREG, XREG, XREG) -MINI_OP(OP_VFSUBD, "vfsubd", XREG, XREG, XREG) -MINI_OP(OP_VMULB, "vmulb", XREG, XREG, XREG) -MINI_OP(OP_VMULHW, "vmulhw", XREG, XREG, XREG) -MINI_OP(OP_VMULF, "vmulf", XREG, XREG, XREG) -MINI_OP(OP_VFMULS, "vfmuls", XREG, XREG, XREG) -MINI_OP(OP_VFMULD, "vfmuld", XREG, XREG, XREG) -MINI_OP(OP_VFDIVS, "vfdivs", XREG, XREG, XREG) -MINI_OP(OP_VFDIVD, "vfdivd", XREG, XREG, XREG) -MINI_OP(OP_VMXB, "vmxb", XREG, XREG, XREG) -MINI_OP(OP_VMXH, "vmxh", XREG, XREG, XREG) -MINI_OP(OP_VMXF, "vmxf", XREG, XREG, XREG) -MINI_OP(OP_VMXG, "vmxg", XREG, XREG, XREG) -MINI_OP(OP_VFMAXS, "vfmaxs", XREG, XREG, XREG) -MINI_OP(OP_VFMAXD, "vfmaxd", XREG, XREG, XREG) -MINI_OP(OP_VMNB, "vmnb", XREG, XREG, XREG) -MINI_OP(OP_VMNH, "vmnh", XREG, XREG, XREG) -MINI_OP(OP_VMNF, "vmnf", XREG, XREG, XREG) -MINI_OP(OP_VMNG, "vmng", XREG, XREG, XREG) -MINI_OP(OP_VFMINS, "vfmins", XREG, XREG, XREG) -MINI_OP(OP_VFMIND, "vfmind", XREG, XREG, XREG) -MINI_OP(OP_VMXLB, "vmxlb", XREG, XREG, XREG) -MINI_OP(OP_VMXLH, "vmxlh", XREG, XREG, XREG) -MINI_OP(OP_VMXLF, "vmxlf", XREG, XREG, XREG) -MINI_OP(OP_VMXLG, "vmxlg", XREG, XREG, XREG) -MINI_OP(OP_VMNLB, "vmnlb", XREG, XREG, XREG) -MINI_OP(OP_VMNLH, "vmnlh", XREG, XREG, XREG) -MINI_OP(OP_VMNLF, "vmnlf", XREG, XREG, XREG) -MINI_OP(OP_VMNLG, "vmnlg", XREG, XREG, XREG) -MINI_OP(OP_VOR, "vor", XREG, XREG, XREG) -MINI_OP(OP_VNOR, "vnor", XREG, XREG, XREG) -MINI_OP(OP_VXOR, "vxor", XREG, XREG, XREG) -MINI_OP(OP_VAND, "vand", XREG, XREG, XREG) -MINI_OP(OP_VNAND, "vnand", XREG, XREG, XREG) -MINI_OP(OP_VECTOR_ANDN, "vandnot", XREG, XREG, XREG) -MINI_OP(OP_VSUMB, "vsumb", XREG, XREG, XREG) -MINI_OP(OP_VSUMH, "vsumh", XREG, XREG, XREG) -MINI_OP(OP_VSUMQF, "vsumqf", XREG, XREG, XREG) -MINI_OP(OP_VSUMQG, "vsumqg", XREG, XREG, XREG) -MINI_OP(OP_VPERM, "vperm", XREG, XREG, XREG) -MINI_OP(OP_VREPIB, "vrepib", XREG, NONE, NONE) -MINI_OP(OP_VREPIH, "vrepih", XREG, NONE, NONE) -MINI_OP(OP_VREPIF, "vrepif", XREG, NONE, NONE) -MINI_OP(OP_VREPIG, "vrepig", XREG, NONE, NONE) -MINI_OP(OP_VFSQSB, "vfsqsb", XREG, XREG, NONE) -MINI_OP(OP_VFSQDB, "vfsqdb", XREG, XREG, NONE) -MINI_OP(OP_VFCESBS, "vfcesbs", XREG, XREG, XREG) -MINI_OP(OP_VFCEDBS, "vfcedbs", XREG, XREG, XREG) -MINI_OP(OP_VFCHSBS, "vfchsbs", XREG, XREG, XREG) -MINI_OP(OP_VFCHDBS, "vfchdbs", XREG, XREG, XREG) -MINI_OP(OP_VCEQBS, "vceqbs", XREG, XREG, XREG) -MINI_OP(OP_VCEQHS, "vceqhs", XREG, XREG, XREG) -MINI_OP(OP_VCEQFS, "vceqfs", XREG, XREG, XREG) -MINI_OP(OP_VCEQGS, "vceqgs", XREG, XREG, XREG) -MINI_OP(OP_VGMB, "vgmb", XREG, IREG, IREG) -MINI_OP(OP_VGMH, "vgmh", XREG, IREG, IREG) -MINI_OP(OP_VGMF, "vgmf", XREG, IREG, IREG) -MINI_OP(OP_VGMG, "vgmg", XREG, IREG, IREG) -MINI_OP(OP_VECB, "vecb", XREG, XREG, NONE) -MINI_OP(OP_VECF, "vecf", XREG, XREG, NONE) -MINI_OP(OP_VECH, "vech", XREG, XREG, NONE) -MINI_OP(OP_VECG, "vecg", XREG, XREG, NONE) -MINI_OP(OP_VECLB, "veclb", XREG, XREG, NONE) -MINI_OP(OP_VECLF, "veclf", XREG, XREG, NONE) -MINI_OP(OP_VECLH, "veclh", XREG, XREG, NONE) -MINI_OP(OP_VECLG, "veclg", XREG, XREG, NONE) -MINI_OP(OP_VCHBS, "vchbs", XREG, XREG, XREG) -MINI_OP(OP_VCHHS, "vchhs", XREG, XREG, XREG) -MINI_OP(OP_VCHFS, "vchfs", XREG, XREG, XREG) -MINI_OP(OP_VCHGS, "vchgs", XREG, XREG, XREG) -MINI_OP(OP_VCHLBS, "vchlbs", XREG, XREG, XREG) -MINI_OP(OP_VCHLHS, "vchlhs", XREG, XREG, XREG) -MINI_OP(OP_VCHLFS, "vchlfs", XREG, XREG, XREG) -MINI_OP(OP_VCHLGS, "vchlgs", XREG, XREG, XREG) -MINI_OP(OP_VFCHESBS, "vfchesbs", XREG, XREG, XREG) -MINI_OP(OP_VFCHEDBS, "vfchedbs", XREG, XREG, XREG) -MINI_OP(OP_VEC_ABS, "vecabs", XREG, XREG, NONE) -MINI_OP(OP_VEC_ONE, "vecone", XREG, NONE, NONE) -MINI_OP(OP_VLPB, "vlpb", XREG, XREG, NONE) -MINI_OP(OP_VLPH, "vlph", XREG, XREG, NONE) -MINI_OP(OP_VLPF, "vlpf", XREG, XREG, NONE) -MINI_OP(OP_VLPG, "vlpg", XREG, XREG, NONE) -MINI_OP(OP_VFLPDB, "vflpdb", XREG, XREG, NONE) -MINI_OP(OP_VFLPSB, "vflpsb", XREG, XREG, NONE) -MINI_OP(OP_VFLCDB, "vflcdb", XREG, XREG, NONE) -MINI_OP(OP_VFLCSB, "vflcsb", XREG, XREG, NONE) -MINI_OP(OP_VPKH, "vpkh", XREG, XREG, XREG) -MINI_OP(OP_VPKF, "vpkf", XREG, XREG, XREG) -MINI_OP(OP_VPKG, "vpkg", XREG, XREG, XREG) -MINI_OP(OP_VLCB, "vlcb", XREG, XREG, NONE) -MINI_OP(OP_VLCH, "vlch", XREG, XREG, NONE) -MINI_OP(OP_VLCF, "vlcf", XREG, XREG, NONE) -MINI_OP(OP_VLCG, "vlcg", XREG, XREG, NONE) -MINI_OP(OP_VUPHB, "vuphb", XREG, XREG, NONE) -MINI_OP(OP_VUPHH, "vuphh", XREG, XREG, NONE) -MINI_OP(OP_VUPHF, "vuphf", XREG, XREG, NONE) -MINI_OP(OP_VUPLB, "vuplb", XREG, XREG, NONE) -MINI_OP(OP_VUPLH, "vuplh", XREG, XREG, NONE) -MINI_OP(OP_VUPLF, "vuplf", XREG, XREG, NONE) -MINI_OP(OP_VUPLHB, "vuplhb", XREG, XREG, NONE) -MINI_OP(OP_VUPLHH, "vuplhh", XREG, XREG, NONE) -MINI_OP(OP_VUPLHF, "vuplhf", XREG, XREG, NONE) -MINI_OP(OP_VUPLLB, "vupllb", XREG, XREG, NONE) -MINI_OP(OP_VUPLLH, "vupllh", XREG, XREG, NONE) -MINI_OP(OP_VUPLLF, "vupllf", XREG, XREG, NONE) -MINI_OP(OP_VFISB, "vfidb", XREG, XREG, NONE) -MINI_OP(OP_VFIDB, "vfisb", XREG, XREG, NONE) -MINI_OP(OP_CEIL_FLOOR, "ceil_floor", XREG, XREG, NONE) +MINI_OP(OP_S390_VAB, "s390_vab", XREG, XREG, XREG) +MINI_OP(OP_S390_VAH, "s390_vah", XREG, XREG, XREG) +MINI_OP(OP_S390_VAF, "s390_vaf", XREG, XREG, XREG) +MINI_OP(OP_S390_VAG, "s390_vag", XREG, XREG, XREG) +MINI_OP(OP_S390_VFASB, "s390_vfasb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFADB, "s390_vfadb", XREG, XREG, XREG) +MINI_OP(OP_S390_VSB, "s390_vsb", XREG, XREG, XREG) +MINI_OP(OP_S390_VSH, "s390_vsh", XREG, XREG, XREG) +MINI_OP(OP_S390_VSF, "s390_vsf", XREG, XREG, XREG) +MINI_OP(OP_S390_VSG, "s390_vsg", XREG, XREG, XREG) +MINI_OP(OP_S390_VFSSB, "s390_vfssb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFSDB, "s390_vfsdb", XREG, XREG, XREG) +MINI_OP(OP_S390_VMLB, "s390_vmlb", XREG, XREG, XREG) +MINI_OP(OP_S390_VMLHW, "s390_vmlhw", XREG, XREG, XREG) +MINI_OP(OP_S390_VMLF, "s390_vmlf", XREG, XREG, XREG) +MINI_OP(OP_S390_VFMSB, "s390_vfmsb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFMDB, "s390_vfmdb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFDSB, "s390_vfdsb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFDDB, "s390_vfddb", XREG, XREG, XREG) +MINI_OP(OP_S390_VMXB, "s390_vmxb", XREG, XREG, XREG) +MINI_OP(OP_S390_VMXH, "s390_vmxh", XREG, XREG, XREG) +MINI_OP(OP_S390_VMXF, "s390_vmxf", XREG, XREG, XREG) +MINI_OP(OP_S390_VMXG, "s390_vmxg", XREG, XREG, XREG) +MINI_OP(OP_S390_VFMAXSB, "s390_vfmaxsb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFMAXDB, "s390_vfmaxdb", XREG, XREG, XREG) +MINI_OP(OP_S390_VMNB, "s390_vmnb", XREG, XREG, XREG) +MINI_OP(OP_S390_VMNH, "s390_vmnh", XREG, XREG, XREG) +MINI_OP(OP_S390_VMNF, "s390_vmnf", XREG, XREG, XREG) +MINI_OP(OP_S390_VMNG, "s390_vmng", XREG, XREG, XREG) +MINI_OP(OP_S390_VFMINSB, "s390_vfminsb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFMINDB, "s390_vfmindb", XREG, XREG, XREG) +MINI_OP(OP_S390_VMXLB, "s390_vmxlb", XREG, XREG, XREG) +MINI_OP(OP_S390_VMXLH, "s390_vmxlh", XREG, XREG, XREG) +MINI_OP(OP_S390_VMXLF, "s390_vmxlf", XREG, XREG, XREG) +MINI_OP(OP_S390_VMXLG, "s390_vmxlg", XREG, XREG, XREG) +MINI_OP(OP_S390_VMNLB, "s390_vmnlb", XREG, XREG, XREG) +MINI_OP(OP_S390_VMNLH, "s390_vmnlh", XREG, XREG, XREG) +MINI_OP(OP_S390_VMNLF, "s390_vmnlf", XREG, XREG, XREG) +MINI_OP(OP_S390_VMNLG, "s390_vmnlg", XREG, XREG, XREG) +MINI_OP(OP_S390_VO, "s390_vo", XREG, XREG, XREG) +MINI_OP(OP_S390_VNO, "s390_vno", XREG, XREG, XREG) +MINI_OP(OP_S390_VX, "s390_vx", XREG, XREG, XREG) +MINI_OP(OP_S390_VN, "s390_vn", XREG, XREG, XREG) +MINI_OP(OP_S390_VNN, "s390_vnn", XREG, XREG, XREG) +MINI_OP(OP_S390_VSUMB, "s390_vsumb", XREG, XREG, XREG) +MINI_OP(OP_S390_VSUMH, "s390_vsumh", XREG, XREG, XREG) +MINI_OP(OP_S390_VSUMQF, "s390_vsumqf", XREG, XREG, XREG) +MINI_OP(OP_S390_VSUMQG, "s390_vsumqg", XREG, XREG, XREG) +MINI_OP(OP_S390_VPERM, "s390_vperm", XREG, XREG, XREG) +MINI_OP(OP_S390_VREPIB, "s390_vrepib", XREG, NONE, NONE) +MINI_OP(OP_S390_VREPIH, "s390_vrepih", XREG, NONE, NONE) +MINI_OP(OP_S390_VREPIF, "s390_vrepif", XREG, NONE, NONE) +MINI_OP(OP_S390_VREPIG, "s390_vrepig", XREG, NONE, NONE) +MINI_OP(OP_S390_VFSQSB, "s390_vfsqsb", XREG, XREG, NONE) +MINI_OP(OP_S390_VFSQDB, "s390_vfsqdb", XREG, XREG, NONE) +MINI_OP(OP_S390_VFCESBS, "s390_vfcesbs", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCEDBS, "s390_vfcedbs", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCHSBS, "s390_vfchsbs", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCHDBS, "s390_vfchdbs", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCESB, "s390_vfcesb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCEDB, "s390_vfcedb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCHSB, "s390_vfchsb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCHDB, "s390_vfchdb", XREG, XREG, XREG) +MINI_OP(OP_S390_VCEQBS, "s390_vceqbs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCEQHS, "s390_vceqhs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCEQFS, "s390_vceqfs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCEQGS, "s390_vceqgs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCEQB, "s390_vceqb", XREG, XREG, XREG) +MINI_OP(OP_S390_VCEQH, "s390_vceqh", XREG, XREG, XREG) +MINI_OP(OP_S390_VCEQF, "s390_vceqf", XREG, XREG, XREG) +MINI_OP(OP_S390_VCEQG, "s390_vceqg", XREG, XREG, XREG) +MINI_OP(OP_S390_VGMB, "s390_vgmb", XREG, IREG, IREG) +MINI_OP(OP_S390_VGMH, "s390_vgmh", XREG, IREG, IREG) +MINI_OP(OP_S390_VGMF, "s390_vgmf", XREG, IREG, IREG) +MINI_OP(OP_S390_VGMG, "s390_vgmg", XREG, IREG, IREG) +MINI_OP(OP_S390_VECB, "s390_vecb", XREG, XREG, NONE) +MINI_OP(OP_S390_VECF, "s390_vecf", XREG, XREG, NONE) +MINI_OP(OP_S390_VECH, "s390_vech", XREG, XREG, NONE) +MINI_OP(OP_S390_VECG, "s390_vecg", XREG, XREG, NONE) +MINI_OP(OP_S390_VECLB, "s390_veclb", XREG, XREG, NONE) +MINI_OP(OP_S390_VECLF, "s390_veclf", XREG, XREG, NONE) +MINI_OP(OP_S390_VECLH, "s390_veclh", XREG, XREG, NONE) +MINI_OP(OP_S390_VECLG, "s390_veclg", XREG, XREG, NONE) +MINI_OP(OP_S390_VCHBS, "s390_vchbs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHHS, "s390_vchhs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHFS, "s390_vchfs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHGS, "s390_vchgs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHB, "s390_vchb", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHH, "s390_vchh", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHF, "s390_vchf", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHG, "s390_vchg", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHLBS, "s390_vchlbs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHLHS, "s390_vchlhs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHLFS, "s390_vchlfs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHLGS, "s390_vchlgs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHLB, "s390_vchlb", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHLH, "s390_vchlh", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHLF, "s390_vchlf", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHLG, "s390_vchlg", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCHESBS, "s390_vfchesbs", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCHEDBS, "s390_vfchedbs", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCHESB, "s390_vfchesb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCHEDB, "s390_vfchedb", XREG, XREG, XREG) +MINI_OP(OP_S390_VLPB, "s390_vlpb", XREG, XREG, NONE) +MINI_OP(OP_S390_VLPH, "s390_vlph", XREG, XREG, NONE) +MINI_OP(OP_S390_VLPF, "s390_vlpf", XREG, XREG, NONE) +MINI_OP(OP_S390_VLPG, "s390_vlpg", XREG, XREG, NONE) +MINI_OP(OP_S390_VFLPDB, "s390_vflpdb", XREG, XREG, NONE) +MINI_OP(OP_S390_VFLPSB, "s390_vflpsb", XREG, XREG, NONE) +MINI_OP(OP_S390_VFLCDB, "s390_vflcdb", XREG, XREG, NONE) +MINI_OP(OP_S390_VFLCSB, "s390_vflcsb", XREG, XREG, NONE) +MINI_OP(OP_S390_VPKH, "s390_vpkh", XREG, XREG, XREG) +MINI_OP(OP_S390_VPKF, "s390_vpkf", XREG, XREG, XREG) +MINI_OP(OP_S390_VPKG, "s390_vpkg", XREG, XREG, XREG) +MINI_OP(OP_S390_VLCB, "s390_vlcb", XREG, XREG, NONE) +MINI_OP(OP_S390_VLCH, "s390_vlch", XREG, XREG, NONE) +MINI_OP(OP_S390_VLCF, "s390_vlcf", XREG, XREG, NONE) +MINI_OP(OP_S390_VLCG, "s390_vlcg", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPHB, "s390_vuphb", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPHH, "s390_vuphh", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPHF, "s390_vuphf", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPLB, "s390_vuplb", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPLHW, "s390_vuplhw", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPLF, "s390_vuplf", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPLHB, "s390_vuplhb", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPLHH, "s390_vuplhh", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPLHF, "s390_vuplhf", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPLLB, "s390_vupllb", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPLLH, "s390_vupllh", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPLLF, "s390_vupllf", XREG, XREG, NONE) +MINI_OP(OP_S390_VFISB, "s390_vfidb", XREG, XREG, NONE) +MINI_OP(OP_S390_VFIDB, "s390_vfisb", XREG, XREG, NONE) MINI_OP(OP_S390_XEXTRACT, "s390_xextract", IREG, XREG, NONE) +MINI_OP(OP_S390_XCOMPARE_XEXTRACT, "s390_xcompare_xextract", IREG, XREG, XREG) #endif #if defined(TARGET_ARM64) @@ -2001,6 +2012,8 @@ MINI_OP(OP_NEGATION, "negate", XREG, XREG, NONE) MINI_OP(OP_ONES_COMPLEMENT, "ones_complement", XREG, XREG, NONE) /* Select bits from src2/src3 using src1 */ MINI_OP3(OP_BSL, "bitwise_select", XREG, XREG, XREG, XREG) +MINI_OP(OP_VECTOR_ANDN, "vandnot", XREG, XREG, XREG) +MINI_OP(OP_VECTOR_IABS, "vector_integer_abs", XREG, XREG, NONE) #endif #if defined(TARGET_RISCV64) || defined(TARGET_RISCV32) diff --git a/src/mono/mono/mini/mini-s390x.c b/src/mono/mono/mini/mini-s390x.c index a8f680796043f6..90fedfb7cf552a 100644 --- a/src/mono/mono/mini/mini-s390x.c +++ b/src/mono/mono/mini/mini-s390x.c @@ -2194,22 +2194,22 @@ simd_type_to_sub_op (int t) switch (t) { case MONO_TYPE_I1: case MONO_TYPE_U1: - return OP_VSUBB; + return OP_S390_VSB; case MONO_TYPE_I2: case MONO_TYPE_U2: - return OP_VSUBH; + return OP_S390_VSH; case MONO_TYPE_I4: case MONO_TYPE_U4: - return OP_VSUBF; + return OP_S390_VSF; case MONO_TYPE_I8: case MONO_TYPE_U8: case MONO_TYPE_I: case MONO_TYPE_U: - return OP_VSUBG; + return OP_S390_VSG; case MONO_TYPE_R4: - return OP_VFSUBS; + return OP_S390_VFSSB; case MONO_TYPE_R8: - return OP_VFSUBD; + return OP_S390_VFSDB; default: g_assert_not_reached (); return -1; @@ -2222,22 +2222,22 @@ simd_type_to_add_op (int t) switch (t) { case MONO_TYPE_I1: case MONO_TYPE_U1: - return OP_VADDB; + return OP_S390_VAB; case MONO_TYPE_I2: case MONO_TYPE_U2: - return OP_VADDH; + return OP_S390_VAH; case MONO_TYPE_I4: case MONO_TYPE_U4: - return OP_VADDF; + return OP_S390_VAF; case MONO_TYPE_I8: case MONO_TYPE_U8: case MONO_TYPE_I: case MONO_TYPE_U: - return OP_VADDG; + return OP_S390_VAG; case MONO_TYPE_R4: - return OP_VFADDS; + return OP_S390_VFASB; case MONO_TYPE_R8: - return OP_VFADDD; + return OP_S390_VFADB; default: g_assert_not_reached (); return -1; @@ -2250,17 +2250,17 @@ simd_type_to_mul_op (int t) switch (t) { case MONO_TYPE_I1: case MONO_TYPE_U1: - return OP_VMULB; + return OP_S390_VMLB; case MONO_TYPE_I2: case MONO_TYPE_U2: - return OP_VMULHW; + return OP_S390_VMLHW; case MONO_TYPE_I4: case MONO_TYPE_U4: - return OP_VMULF; + return OP_S390_VMLF; case MONO_TYPE_R4: - return OP_VFMULS; + return OP_S390_VFMSB; case MONO_TYPE_R8: - return OP_VFMULD; + return OP_S390_VFMDB; default: g_assert_not_reached (); return -1; @@ -2272,27 +2272,27 @@ simd_type_to_max_op (int t) { switch (t) { case MONO_TYPE_I1: - return OP_VMXB; + return OP_S390_VMXB; case MONO_TYPE_U1: - return OP_VMXLB; + return OP_S390_VMXLB; case MONO_TYPE_I2: - return OP_VMXH; + return OP_S390_VMXH; case MONO_TYPE_U2: - return OP_VMXLH; + return OP_S390_VMXLH; case MONO_TYPE_I4: - return OP_VMXF; + return OP_S390_VMXF; case MONO_TYPE_U4: - return OP_VMXLF; + return OP_S390_VMXLF; case MONO_TYPE_I8: case MONO_TYPE_I: - return OP_VMXG; + return OP_S390_VMXG; case MONO_TYPE_U8: case MONO_TYPE_U: - return OP_VMXLG; + return OP_S390_VMXLG; case MONO_TYPE_R4: - return OP_VFMAXS; + return OP_S390_VFMAXSB; case MONO_TYPE_R8: - return OP_VFMAXD; + return OP_S390_VFMAXDB; default: g_assert_not_reached (); return -1; @@ -2304,27 +2304,55 @@ simd_type_to_min_op (int t) { switch (t) { case MONO_TYPE_I1: - return OP_VMNB; + return OP_S390_VMNB; case MONO_TYPE_U1: - return OP_VMNLB; + return OP_S390_VMNLB; case MONO_TYPE_I2: - return OP_VMNH; + return OP_S390_VMNH; case MONO_TYPE_U2: - return OP_VMNLH; + return OP_S390_VMNLH; case MONO_TYPE_I4: - return OP_VMNF; + return OP_S390_VMNF; case MONO_TYPE_U4: - return OP_VMNLF; + return OP_S390_VMNLF; case MONO_TYPE_I8: case MONO_TYPE_I: - return OP_VMNG; + return OP_S390_VMNG; case MONO_TYPE_U8: case MONO_TYPE_U: - return OP_VMNLG; + return OP_S390_VMNLG; case MONO_TYPE_R4: - return OP_VFMINS; + return OP_S390_VFMINSB; case MONO_TYPE_R8: - return OP_VFMIND; + return OP_S390_VFMINDB; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_comp_any_all_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_S390_VCEQBS; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_S390_VCEQHS; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_S390_VCEQFS; + case MONO_TYPE_I: + case MONO_TYPE_U: + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return OP_S390_VCEQGS; + case MONO_TYPE_R4: + return OP_S390_VFCESBS; + case MONO_TYPE_R8: + return OP_S390_VFCEDBS; default: g_assert_not_reached (); return -1; @@ -2337,22 +2365,54 @@ simd_type_to_comp_op (int t) switch (t) { case MONO_TYPE_I1: case MONO_TYPE_U1: - return OP_VCEQBS; + return OP_S390_VCEQB; case MONO_TYPE_I2: case MONO_TYPE_U2: - return OP_VCEQHS; + return OP_S390_VCEQH; case MONO_TYPE_I4: case MONO_TYPE_U4: - return OP_VCEQFS; + return OP_S390_VCEQF; case MONO_TYPE_I: case MONO_TYPE_U: case MONO_TYPE_I8: case MONO_TYPE_U8: - return OP_VCEQGS; + return OP_S390_VCEQG; + case MONO_TYPE_R4: + return OP_S390_VFCESB; + case MONO_TYPE_R8: + return OP_S390_VFCEDB; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_gt_any_all_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + return OP_S390_VCHBS; + case MONO_TYPE_U1: + return OP_S390_VCHLBS; + case MONO_TYPE_I2: + return OP_S390_VCHHS; + case MONO_TYPE_U2: + return OP_S390_VCHLHS; + case MONO_TYPE_I4: + return OP_S390_VCHFS; + case MONO_TYPE_U4: + return OP_S390_VCHLFS; + case MONO_TYPE_I: + case MONO_TYPE_I8: + return OP_S390_VCHGS; + case MONO_TYPE_U: + case MONO_TYPE_U8: + return OP_S390_VCHLGS; case MONO_TYPE_R4: - return OP_VFCESBS; + return OP_S390_VFCHSBS; case MONO_TYPE_R8: - return OP_VFCEDBS; + return OP_S390_VFCHDBS; default: g_assert_not_reached (); return -1; @@ -2364,27 +2424,41 @@ simd_type_to_gt_op (int t) { switch (t) { case MONO_TYPE_I1: - return OP_VCHBS; + return OP_S390_VCHB; case MONO_TYPE_U1: - return OP_VCHLBS; + return OP_S390_VCHLB; case MONO_TYPE_I2: - return OP_VCHHS; + return OP_S390_VCHH; case MONO_TYPE_U2: - return OP_VCHLHS; + return OP_S390_VCHLH; case MONO_TYPE_I4: - return OP_VCHFS; + return OP_S390_VCHF; case MONO_TYPE_U4: - return OP_VCHLFS; + return OP_S390_VCHLF; case MONO_TYPE_I: case MONO_TYPE_I8: - return OP_VCHGS; + return OP_S390_VCHG; case MONO_TYPE_U: case MONO_TYPE_U8: - return OP_VCHLGS; + return OP_S390_VCHLG; case MONO_TYPE_R4: - return OP_VFCHSBS; + return OP_S390_VFCHSB; case MONO_TYPE_R8: - return OP_VFCHDBS; + return OP_S390_VFCHDB; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_ge_fp_any_all_op (int t) +{ + switch(t) { + case MONO_TYPE_R4: + return OP_S390_VFCHESBS; + case MONO_TYPE_R8: + return OP_S390_VFCHEDBS; default: g_assert_not_reached (); return -1; @@ -2396,15 +2470,16 @@ simd_type_to_ge_fp_op (int t) { switch(t) { case MONO_TYPE_R4: - return OP_VFCHESBS; + return OP_S390_VFCHESB; case MONO_TYPE_R8: - return OP_VFCHEDBS; + return OP_S390_VFCHEDB; default: g_assert_not_reached (); return -1; } } + static int simd_type_to_extract_int_op (int t, int q) { @@ -2501,20 +2576,16 @@ simd_type_to_abs_op (int t) switch (t) { case MONO_TYPE_I1: case MONO_TYPE_U1: - return OP_VLPB; + return OP_S390_VLPB; case MONO_TYPE_I2: case MONO_TYPE_U2: - return OP_VLPH; + return OP_S390_VLPH; case MONO_TYPE_I4: case MONO_TYPE_U4: - return OP_VLPF; - case MONO_TYPE_R4: - return OP_VFLPSB; + return OP_S390_VLPF; case MONO_TYPE_I8: case MONO_TYPE_U8: - return OP_VLPG; - case MONO_TYPE_R8: - return OP_VFLPDB; + return OP_S390_VLPG; default: g_assert_not_reached (); return -1; @@ -2527,26 +2598,31 @@ simd_type_to_negate_op (int t) switch (t) { case MONO_TYPE_I1: case MONO_TYPE_U1: - return OP_VLCB; + return OP_S390_VLCB; case MONO_TYPE_I2: case MONO_TYPE_U2: - return OP_VLCH; + return OP_S390_VLCH; case MONO_TYPE_I4: case MONO_TYPE_U4: - return OP_VLCF; + return OP_S390_VLCF; case MONO_TYPE_R4: - return OP_VFLCSB; + return OP_S390_VFLCSB; case MONO_TYPE_I8: case MONO_TYPE_U8: - return OP_VLCG; + return OP_S390_VLCG; case MONO_TYPE_R8: - return OP_VFLCDB; + return OP_S390_VFLCDB; default: g_assert_not_reached (); return -1; } } +static int +type_is_float (int t){ + return (t == MONO_TYPE_R4 || t == MONO_TYPE_R8) ? OP_XCOMPARE_FP : OP_XCOMPARE; +} + /** * * @brief Architecture-specific lowering pass processing @@ -2612,7 +2688,7 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb) ins->opcode = GINT_TO_OPCODE (simd_type_to_mul_op (GTMREG_TO_INT (ins->inst_c1))); break; case OP_FDIV: - ins->opcode = ins->inst_c1 == MONO_TYPE_R4 ? OP_VFDIVS : OP_VFDIVD; + ins->opcode = ins->inst_c1 == MONO_TYPE_R4 ? OP_S390_VFDSB : OP_S390_VFDDB; break; case OP_FMIN: ins->opcode = GINT_TO_OPCODE (simd_type_to_min_op (GTMREG_TO_INT (ins->inst_c1))); @@ -2629,13 +2705,13 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb) case OP_XBINOP_FORCEINT:{ switch (ins->inst_c0) { case XBINOP_FORCEINT_AND: - ins->opcode = OP_VAND; + ins->opcode = OP_S390_VN; break; case XBINOP_FORCEINT_OR: - ins->opcode = OP_VOR; + ins->opcode = OP_S390_VO; break; case XBINOP_FORCEINT_XOR: - ins->opcode = OP_VXOR; + ins->opcode = OP_S390_VX; break; default: g_assert_not_reached (); @@ -2702,9 +2778,8 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb) ins->sreg2 = temp; case CMP_LE: case CMP_LE_UN:{ - /* FIX ME : setting condition code for cases other than ANY and ALL may degrade the performance*/ NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_gt_op (GTMREG_TO_INT (ins->inst_c1))), ins->dreg, ins->sreg1, ins->sreg2); - NEW_SIMD_INS (cfg, ins, temp_ins, OP_VNOR, ins->dreg, ins->dreg, ins->dreg); + NEW_SIMD_INS (cfg, ins, temp_ins, OP_S390_VNO, ins->dreg, ins->dreg, ins->dreg); NULLIFY_INS (ins); break; } @@ -2714,28 +2789,95 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb) } break; } - case OP_S390_XEXTRACT: - switch (ins->inst_c1){ - case OP_XCOMPARE: - ins->opcode = GINT_TO_OPCODE (simd_type_to_extract_int_op (GTMREG_TO_INT (ins->inst_c0 & 0x0f ), GTMREG_TO_INT (ins->inst_c0 >> 4))); + case OP_S390_XCOMPARE_XEXTRACT:{ + switch (type_is_float(GTMREG_TO_INT(ins->inst_c1))){ + case OP_XCOMPARE:{ + switch (ins->inst_c0 >> 4){ + case CMP_EQ: + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_comp_any_all_op (GTMREG_TO_INT (ins->inst_c1))), s390_vr16, ins->sreg1, ins->sreg2); + break; + case CMP_LT: + case CMP_LT_UN: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_GT: + case CMP_GT_UN: + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_gt_any_all_op (GTMREG_TO_INT (ins->inst_c1))), s390_vr16, ins->sreg1, ins->sreg2); + break; + case CMP_GE: + case CMP_GE_UN: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_LE: + case CMP_LE_UN:{ + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_gt_any_all_op (GTMREG_TO_INT (ins->inst_c1))), s390_vr16, ins->sreg1, ins->sreg2); + NEW_SIMD_INS (cfg, ins, temp_ins, OP_S390_VNO, s390_vr16, s390_vr16, s390_vr16); + break; + } + default: + g_assert_not_reached (); + break; + } break; - case OP_XCOMPARE_FP: - ins->opcode = GINT_TO_OPCODE (simd_type_to_extract_fp_op (GTMREG_TO_INT (ins->inst_c0 & 0x0f ), GTMREG_TO_INT (ins->inst_c0 >> 4))); + } + case OP_XCOMPARE_FP:{ + switch (ins->inst_c0 >> 4){ + case CMP_EQ: + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_comp_any_all_op (GTMREG_TO_INT (ins->inst_c1))), s390_vr16, ins->sreg1, ins->sreg2); + break; + case CMP_LT_UN: + case CMP_LT: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_GT_UN: + case CMP_GT: + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_gt_any_all_op (GTMREG_TO_INT (ins->inst_c1))), s390_vr16, ins->sreg1, ins->sreg2); + break; + case CMP_LE_UN: + case CMP_LE: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_GE_UN: + case CMP_GE: + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_ge_fp_any_all_op (GTMREG_TO_INT (ins->inst_c1))), s390_vr16, ins->sreg1, ins->sreg2); + break; + default: + g_assert_not_reached (); + break; + } + } break; default: - g_assert_not_reached (); - } - /* we don't use a register rather the CC set by the vector compare instructions */ - ins->sreg1 = -1; + g_assert_not_reached(); + break; + } + switch (type_is_float(GTMREG_TO_INT(ins->inst_c1))){ + case OP_XCOMPARE: + ins->opcode = GINT_TO_OPCODE (simd_type_to_extract_int_op (GTMREG_TO_INT (ins->inst_c0 & 0x0f), GTMREG_TO_INT (ins->inst_c0 >> 4))); + break; + case OP_XCOMPARE_FP: + ins->opcode = GINT_TO_OPCODE (simd_type_to_extract_fp_op (GTMREG_TO_INT (ins->inst_c0 & 0x0f), GTMREG_TO_INT (ins->inst_c0 >> 4))); break; - case OP_VEC_ABS: + default: + g_assert_not_reached (); + break; + } + /* we don't use a register rather the CC set by the vector compare instructions */ + ins->sreg1 = -1; + } + break; + case OP_VECTOR_IABS: ins->opcode = GINT_TO_OPCODE (simd_type_to_abs_op (GTMREG_TO_INT (ins->inst_c1))); break; case OP_NEGATION: ins->opcode = GINT_TO_OPCODE (simd_type_to_negate_op (GTMREG_TO_INT (ins->inst_c1))); break; case OP_ONES_COMPLEMENT: - ins->opcode = OP_VNOR; + ins->opcode = OP_S390_VNO; ins->sreg2 = ins->sreg1; break; default: @@ -5373,285 +5515,333 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_STOREX_MEMBASE: S390_LONG_VEC(code, vst, vst, ins->sreg1, ins->inst_offset,0, ins->inst_destbasereg); break; - case OP_VAND: + case OP_S390_VN: s390_vn (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VNAND: + case OP_S390_VNN: s390_vnn (code, ins->dreg, ins->sreg1, ins->sreg2); break; case OP_VECTOR_ANDN: s390_vnc (code, ins->dreg, ins->sreg2, ins->sreg1); break; - case OP_VOR: + case OP_S390_VO: s390_vo (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VNOR: + case OP_S390_VNO: s390_vno (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VXOR: + case OP_S390_VX: s390_vx (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VADDB: + case OP_S390_VAB: s390_vab (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VADDH: + case OP_S390_VAH: s390_vah (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VADDF: + case OP_S390_VAF: s390_vaf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VADDG: + case OP_S390_VAG: s390_vag (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VADDQ: - s390_vaq (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VFADDS: + case OP_S390_VFASB: s390_vfasb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VFADDD: + case OP_S390_VFADB: s390_vfadb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VSUBB: + case OP_S390_VSB: s390_vsb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VSUBH: + case OP_S390_VSH: s390_vsh (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VSUBF: + case OP_S390_VSF: s390_vsf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VSUBG: + case OP_S390_VSG: s390_vsg (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VSUBQ: - s390_vsq (code, ins->dreg, ins->sreg1, ins->sreg2); - break; - case OP_VFSUBS: + case OP_S390_VFSSB: s390_vfssb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VFSUBD: + case OP_S390_VFSDB: s390_vfsdb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VMULB: + case OP_S390_VMLB: s390_vmlb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VMULHW: + case OP_S390_VMLHW: s390_vmlhw (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VMULF: + case OP_S390_VMLF: s390_vmlf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VFMULS: + case OP_S390_VFMSB: s390_vfmsb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VFMULD: + case OP_S390_VFMDB: s390_vfmdb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VFDIVS: + case OP_S390_VFDSB: s390_vfdsb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VFDIVD: + case OP_S390_VFDDB: s390_vfddb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VSUMB: + case OP_S390_VSUMB: s390_vsumb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VSUMH: + case OP_S390_VSUMH: s390_vsumh (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VSUMQF: + case OP_S390_VSUMQF: s390_vsumqf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VSUMQG: + case OP_S390_VSUMQG: s390_vsumqg (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VMXB: + case OP_S390_VMXB: s390_vmxb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VMXH: + case OP_S390_VMXH: s390_vmxh (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VMXF: + case OP_S390_VMXF: s390_vmxf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VMXG: + case OP_S390_VMXG: s390_vmxg (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VFMAXS: + case OP_S390_VFMAXSB: /* The max function used here is Java Math.Max() */ s390_vfmaxsb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); break; - case OP_VFMAXD: + case OP_S390_VFMAXDB: s390_vfmaxdb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); break; - case OP_VMXLB: + case OP_S390_VMXLB: s390_vmxlb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VMXLH: + case OP_S390_VMXLH: s390_vmxlh (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VMXLF: + case OP_S390_VMXLF: s390_vmxlf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VMXLG: + case OP_S390_VMXLG: s390_vmxlg (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VMNB: + case OP_S390_VMNB: s390_vmnb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VMNH: + case OP_S390_VMNH: s390_vmnh (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VMNF: + case OP_S390_VMNF: s390_vmnf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VMNG: + case OP_S390_VMNG: s390_vmng (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VFMINS: + case OP_S390_VFMINSB: /* The min function used here is Java Math.Min() */ s390_vfminsb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); break; - case OP_VFMIND: + case OP_S390_VFMINDB: s390_vfmindb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); break; - case OP_VMNLB: + case OP_S390_VMNLB: s390_vmnlb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VMNLH: + case OP_S390_VMNLH: s390_vmnlh (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VMNLF: + case OP_S390_VMNLF: s390_vmnlf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VMNLG: + case OP_S390_VMNLG: s390_vmnlg (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VREPIB: + case OP_S390_VREPIB: s390_vrepib (code, ins->dreg, ins->inst_c1); break; - case OP_VREPIH: + case OP_S390_VREPIH: s390_vrepih (code, ins->dreg, ins->inst_c1); break; - case OP_VREPIF: + case OP_S390_VREPIF: s390_vrepif (code, ins->dreg, ins->inst_c1); break; - case OP_VREPIG: + case OP_S390_VREPIG: s390_vrepig (code, ins->dreg, ins->inst_c1); break; - case OP_VCEQBS: + case OP_S390_VCEQBS: s390_vceqbs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VCEQHS: + case OP_S390_VCEQB: + s390_vceqb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_S390_VCEQHS: s390_vceqhs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VCEQFS: + case OP_S390_VCEQH: + s390_vceqh (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_S390_VCEQFS: s390_vceqfs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VCEQGS: + case OP_S390_VCEQF: + s390_vceqf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_S390_VCEQGS: s390_vceqgs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VFCESBS: + case OP_S390_VCEQG: + s390_vceqg (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_S390_VFCESBS: s390_vfcesbs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VFCEDBS: + case OP_S390_VFCESB: + s390_vfcesb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_S390_VFCEDBS: s390_vfcedbs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VFCHSBS: + case OP_S390_VFCEDB: + s390_vfcedb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_S390_VFCHSBS: s390_vfchsbs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VFCHDBS: + case OP_S390_VFCHSB: + s390_vfchsb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_S390_VFCHDBS: s390_vfchdbs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VFCHESBS: + case OP_S390_VFCHDB: + s390_vfchdb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_S390_VFCHESBS: s390_vfchesbs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VFCHEDBS: + case OP_S390_VFCHESB: + s390_vfchesb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_S390_VFCHEDBS: s390_vfchedbs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VGMB: + case OP_S390_VFCHEDB: + s390_vfchedb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_S390_VGMB: s390_vgmb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VGMH: + case OP_S390_VGMH: s390_vgmh (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VGMF: + case OP_S390_VGMF: s390_vgmf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VGMG: + case OP_S390_VGMG: s390_vgmg (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VECB: + case OP_S390_VECB: s390_vecb (code, ins->dreg, ins->sreg1); break; - case OP_VECH: + case OP_S390_VECH: s390_vech (code, ins->dreg, ins->sreg1); break; - case OP_VECF: + case OP_S390_VECF: s390_vecf (code, ins->dreg, ins->sreg1); break; - case OP_VECG: + case OP_S390_VECG: s390_vecg (code, ins->dreg, ins->sreg1); break; - case OP_VECLB: + case OP_S390_VECLB: s390_veclb (code, ins->dreg, ins->sreg1); break; - case OP_VECLH: + case OP_S390_VECLH: s390_veclh (code, ins->dreg, ins->sreg1); break; - case OP_VECLF: + case OP_S390_VECLF: s390_veclf (code, ins->dreg, ins->sreg1); break; - case OP_VECLG: + case OP_S390_VECLG: s390_veclg (code, ins->dreg, ins->sreg1); break; - case OP_VCHBS: + case OP_S390_VCHBS: s390_vchbs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VCHHS: + case OP_S390_VCHHS: s390_vchhs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VCHFS: + case OP_S390_VCHFS: s390_vchfs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VCHGS: + case OP_S390_VCHGS: s390_vchgs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VCHLBS: + case OP_S390_VCHB: + s390_vchb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_S390_VCHH: + s390_vchh (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_S390_VCHF: + s390_vchf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_S390_VCHG: + s390_vchg (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_S390_VCHLBS: s390_vchlbs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VCHLHS: + case OP_S390_VCHLHS: s390_vchlhs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VCHLFS: + case OP_S390_VCHLFS: s390_vchlfs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VCHLGS: + case OP_S390_VCHLGS: s390_vchlgs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VLPB: + case OP_S390_VCHLB: + s390_vchlb (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_S390_VCHLH: + s390_vchlh (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_S390_VCHLF: + s390_vchlf (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_S390_VCHLG: + s390_vchlg (code, ins->dreg, ins->sreg1, ins->sreg2); + break; + case OP_S390_VLPB: s390_vlpb (code, ins->dreg, ins->sreg1); break; - case OP_VLPH: + case OP_S390_VLPH: s390_vlph (code, ins->dreg, ins->sreg1); break; - case OP_VLPF: + case OP_S390_VLPF: s390_vlpf (code, ins->dreg, ins->sreg1); break; - case OP_VLPG: + case OP_S390_VLPG: s390_vlpg (code, ins->dreg, ins->sreg1); break; - case OP_VFLPDB: + case OP_S390_VFLPDB: s390_vfpsodb (code, ins->dreg, ins->sreg1, 2); break; - case OP_VFLPSB: + case OP_S390_VFLPSB: s390_vfpsosb (code, ins->dreg, ins->sreg1, 2); break; - case OP_VFLCDB: + case OP_S390_VFLCDB: s390_vfpsodb (code, ins->dreg, ins->sreg1, 0); break; - case OP_VFLCSB: + case OP_S390_VFLCSB: s390_vfpsosb (code, ins->dreg, ins->sreg1, 0); break; case OP_INSERT_I1: @@ -5696,7 +5886,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; case OP_EXTRACT_R8: s390_vlgvg (code, s390_r13, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - s390_vlvgg (code, ins->dreg, s390_r13, 0, 0); + s390_ldgr (code, ins->dreg, s390_r13); break; case OP_XEXTRACT_I1: s390_vlgvb (code, ins->dreg, ins->sreg1, ins->sreg2, 0); @@ -5711,6 +5901,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) s390_vlgvg (code, ins->dreg, ins->sreg1, ins->sreg2, 0); break; case OP_XEXTRACT_R4: + s390_vlgvf (code, s390_r13, ins->sreg1, ins->sreg2, 0); + s390_ldgr (code, ins->dreg, s390_r13); + break; case OP_XEXTRACT_R8: s390_vlgvg (code, s390_r13, ins->sreg1, ins->sreg2, 0); s390_ldgr (code, ins->dreg, s390_r13); @@ -5741,73 +5934,73 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) s390_vlvgg (code, s390_vr16, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); s390_vrepg (code, ins->dreg, s390_vr16, 0); break; - case OP_VPKH: + case OP_S390_VPKH: s390_vpkh ( code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VPKF: + case OP_S390_VPKF: s390_vpkf ( code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VPKG: + case OP_S390_VPKG: s390_vpkg ( code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_VLCB: + case OP_S390_VLCB: s390_vlcb (code, ins->dreg, ins->sreg1); break; - case OP_VLCH: + case OP_S390_VLCH: s390_vlch (code, ins->dreg, ins->sreg1); break; - case OP_VLCF: + case OP_S390_VLCF: s390_vlcf (code, ins->dreg, ins->sreg1); break; - case OP_VLCG: + case OP_S390_VLCG: s390_vlcg (code, ins->dreg, ins->sreg1); break; - case OP_VUPLB: + case OP_S390_VUPLB: s390_vuplb (code, ins->dreg, ins->sreg1); break; - case OP_VUPLH: + case OP_S390_VUPLHW: s390_vuplhw (code, ins->dreg, ins->sreg1); break; - case OP_VUPLF: + case OP_S390_VUPLF: s390_vuplf (code, ins->dreg, ins->sreg1); break; - case OP_VUPLLB: + case OP_S390_VUPLLB: s390_vupllb (code, ins->dreg, ins->sreg1); break; - case OP_VUPLLH: + case OP_S390_VUPLLH: s390_vupllh (code, ins->dreg, ins->sreg1); break; - case OP_VUPLLF: + case OP_S390_VUPLLF: s390_vupllf (code, ins->dreg, ins->sreg1); break; - case OP_VUPHB: + case OP_S390_VUPHB: s390_vuphb (code, ins->dreg, ins->sreg1); break; - case OP_VUPHH: + case OP_S390_VUPHH: s390_vuphh (code, ins->dreg, ins->sreg1); break; - case OP_VUPHF: + case OP_S390_VUPHF: s390_vuphf (code, ins->dreg, ins->sreg1); break; - case OP_VUPLHB: + case OP_S390_VUPLHB: s390_vuplhb (code, ins->dreg, ins->sreg1); break; - case OP_VUPLHH: + case OP_S390_VUPLHH: s390_vuplhh (code, ins->dreg, ins->sreg1); break; - case OP_VUPLHF: + case OP_S390_VUPLHF: s390_vuplhf (code, ins->dreg, ins->sreg1); break; - case OP_VFISB: + case OP_S390_VFISB: s390_vfisb (code, ins->dreg, ins->sreg1, 0, ins->inst_c0); break; - case OP_VFIDB: + case OP_S390_VFIDB: s390_vfidb (code, ins->dreg, ins->sreg1, 0, ins->inst_c0); break; - case OP_VFSQSB: + case OP_S390_VFSQSB: s390_vfsqsb (code, ins->dreg, ins->sreg1); break; - case OP_VFSQDB: + case OP_S390_VFSQDB: s390_vfsqdb (code, ins->dreg, ins->sreg1); break; case OP_XONES: diff --git a/src/mono/mono/mini/mini-s390x.h b/src/mono/mono/mini/mini-s390x.h index c6cfc367e97dbe..9611f4e70077ab 100644 --- a/src/mono/mono/mini/mini-s390x.h +++ b/src/mono/mono/mini/mini-s390x.h @@ -84,7 +84,8 @@ struct SeqPointInfo { #define MONO_ARCH_HAVE_UNWIND_BACKTRACE 1 #define MONO_ARCH_FLOAT32_SUPPORTED 1 #define MONO_ARCH_SIMD_INTRINSICS mono_hwcap_s390x_has_vec - +#define MONO_ARCH_NEED_SIMD_BANK 1 +#define MONO_ARCH_USE_SHARED_FP_SIMD_BANK 1 #define S390_STACK_ALIGNMENT 8 #define S390_FIRST_ARG_REG s390_r2 #define S390_LAST_ARG_REG s390_r6 @@ -150,7 +151,7 @@ struct SeqPointInfo { /*-----------------------------------------------*/ #define MONO_MAX_XREGS 31 -#define MONO_ARCH_CALLEE_XREGS 0x0 +#define MONO_ARCH_CALLEE_XREGS 0xFFFEFFFE #define MONO_ARCH_CALLEE_SAVED_XREGS 0x0 // Does the ABI have a volatile non-parameter register, so tailcall @@ -259,6 +260,7 @@ s390_patch_addr (guchar *code, guint64 target) } while (0) /*========================= End of Function ========================*/ + #define S390_SET(loc, dr, v) \ do { \ guint64 val = (guint64) v; \ @@ -308,7 +310,6 @@ s390_patch_addr (guchar *code, guint64 target) s390_##op (loc, r, 0, s390_r13, 0); \ } - #define S390_SET_MASK(loc, dr, v) \ do { \ if (s390_is_imm16 (v)) { \ diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 187adc8c623365..a3e37d6851e89c 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -558,11 +558,10 @@ emit_xequal (MonoCompile *cfg, MonoClass *klass, MonoTypeEnum element_type, Mono return emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg); } #elif defined(TARGET_S390X) - MonoInst* cmp = emit_xcompare (cfg, klass, element_type, arg1, arg2); - MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_S390_XEXTRACT, cmp->dreg, -1); + MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_S390_XCOMPARE_XEXTRACT, arg1->dreg, arg2->dreg); ret->inst_c0 = SIMD_EXTR_ARE_ALL_SET; - ret->inst_c0 |= ((cmp->inst_c0 << 4) & 0xf0); - ret->inst_c1 = cmp->opcode; + ret->inst_c0 |= ((((gint64)CMP_EQ) << 4) & 0xf0); + ret->inst_c1 = element_type; return ret; #else MonoInst *ins = emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg); @@ -695,6 +694,38 @@ get_xconst_int_elem (MonoCompile *cfg, MonoInst *ins, MonoTypeEnum etype, int in #ifdef TARGET_S390X static int type_to_extract_op (MonoTypeEnum type); + +static int +lower_xcompare_op (int intrinsic_id, MonoTypeEnum etype) +{ + gboolean is_unsigned = type_enum_is_unsigned (etype); + + switch (intrinsic_id) { + case SN_GreaterThan: + case SN_GreaterThanAll: + case SN_GreaterThanAny: + return is_unsigned ? CMP_GT_UN : CMP_GT; + break; + case SN_GreaterThanOrEqual: + case SN_GreaterThanOrEqualAll: + case SN_GreaterThanOrEqualAny: + return is_unsigned ? CMP_GE_UN : CMP_GE; + break; + case SN_LessThan: + case SN_LessThanAll: + case SN_LessThanAny: + return is_unsigned ? CMP_LT_UN : CMP_LT; + break; + case SN_LessThanOrEqual: + case SN_LessThanOrEqualAll: + case SN_LessThanOrEqualAny: + return is_unsigned ? CMP_LE_UN : CMP_LE; + break; + default: + g_assert_not_reached (); + } +} + static MonoInst* emit_sum_vector (MonoCompile *cfg, MonoType *vector_type, MonoTypeEnum element_type, MonoInst *arg) { @@ -712,28 +743,26 @@ emit_sum_vector (MonoCompile *cfg, MonoType *vector_type, MonoTypeEnum element_t break; case MONO_TYPE_I1: case MONO_TYPE_U1: - ins = emit_simd_ins (cfg, vector_class, OP_VSUMB, ins->dreg,tmp->dreg); - ins = emit_simd_ins (cfg, vector_class, OP_VSUMQF, ins->dreg,tmp->dreg); + ins = emit_simd_ins (cfg, vector_class, OP_S390_VSUMB, ins->dreg,tmp->dreg); + ins = emit_simd_ins (cfg, vector_class, OP_S390_VSUMQF, ins->dreg,tmp->dreg); index = 16; break; case MONO_TYPE_I2: case MONO_TYPE_U2: - ins = emit_simd_ins (cfg, vector_class, OP_VSUMH, ins->dreg,tmp->dreg); - ins = emit_simd_ins (cfg, vector_class, OP_VSUMQF, ins->dreg,tmp->dreg); + ins = emit_simd_ins (cfg, vector_class, OP_S390_VSUMH, ins->dreg,tmp->dreg); + ins = emit_simd_ins (cfg, vector_class, OP_S390_VSUMQF, ins->dreg,tmp->dreg); index = 8; break; case MONO_TYPE_I4: case MONO_TYPE_U4: - op = OP_VSUMQF; - ins = emit_simd_ins (cfg, vector_class, op, ins->dreg,tmp->dreg); + ins = emit_simd_ins (cfg, vector_class, OP_S390_VSUMQF, ins->dreg,tmp->dreg); index = 4; break; case MONO_TYPE_I: case MONO_TYPE_U: case MONO_TYPE_I8: case MONO_TYPE_U8: - op = OP_VSUMQG; - ins = emit_simd_ins (cfg, vector_class, op, ins->dreg,tmp->dreg); + ins = emit_simd_ins (cfg, vector_class, OP_S390_VSUMQG, ins->dreg,tmp->dreg); index = 2; break; default: @@ -2082,7 +2111,11 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return emit_simd_ins_for_sig (cfg, klass, OP_VECTOR_IABS, -1, arg0_type, fsig, args); } #elif defined(TARGET_S390X) - return emit_simd_ins_for_sig (cfg, klass, OP_VEC_ABS, -1, arg0_type, fsig, args); + if (type_enum_is_float(arg0_type)) { + return emit_simd_ins_for_sig (cfg, klass, arg0_type == MONO_TYPE_R8 ? OP_S390_VFLPDB : OP_S390_VFLPSB, -1, arg0_type, fsig, args); + } else { + return emit_simd_ins_for_sig (cfg, klass, OP_VECTOR_IABS, -1, arg0_type, fsig, args); + } #else return NULL; #endif @@ -2244,9 +2277,9 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi int ceil_or_floor = id == SN_Ceiling ? 6 : 7; switch (arg0_type){ case MONO_TYPE_R4: - return emit_simd_ins_for_sig (cfg, klass, OP_VFISB, ceil_or_floor, arg0_type, fsig, args); + return emit_simd_ins_for_sig (cfg, klass, OP_S390_VFISB, ceil_or_floor, arg0_type, fsig, args); case MONO_TYPE_R8: - return emit_simd_ins_for_sig (cfg, klass, OP_VFISB, ceil_or_floor, arg0_type, fsig, args); + return emit_simd_ins_for_sig (cfg, klass, OP_S390_VFIDB, ceil_or_floor, arg0_type, fsig, args); default: g_assert_not_reached (); } @@ -2453,18 +2486,18 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi } } } else { - MonoInst* cmp = emit_xcompare (cfg, arg_class, arg0_type, args [0], args [1]); #ifndef TARGET_S390X + MonoInst* cmp = emit_xcompare (cfg, arg_class, arg0_type, args [0], args [1]); MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1); ret->inst_c0 = (id == SN_EqualsAll) ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET; ret->inst_c1 = mono_class_value_size (klass, NULL); #else /* we need XCOMPARE(_FP), SIMD_EXTR_ARE_ALL/ANY_SET and CMP_* ops in the same ins to emit correct*/ /* load on condition instructions */ - MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_S390_XEXTRACT, cmp->dreg, -1); + MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_S390_XCOMPARE_XEXTRACT, args [0]->dreg, args [1]->dreg); ret->inst_c0 = (id == SN_EqualsAll) ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET; - ret->inst_c0 |= ((cmp->inst_c0 << 4) & 0xf0); - ret->inst_c1 = cmp->opcode; + ret->inst_c0 |= ((((gint64)CMP_EQ) << 4) & 0xf0); + ret->inst_c1 = arg0_type; #endif return ret; } @@ -2597,6 +2630,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return emit_simd_ins_for_sig (cfg, klass, OP_SSE_MOVMSK, -1, type, fsig, args); #endif + return NULL; } case SN_GetElement: { int elems; @@ -2746,18 +2780,19 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return emit_not_xequal (cfg, arg_class, arg0_type, cmp, zero); } } else { - MonoInst* cmp = emit_xcompare_for_intrinsic (cfg, arg_class, id, arg0_type, args [0], args [1]); #ifndef TARGET_S390X + MonoInst* cmp = emit_xcompare_for_intrinsic (cfg, arg_class, id, arg0_type, args [0], args [1]); MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1); ret->inst_c0 = is_all ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET; ret->inst_c1 = mono_class_value_size (klass, NULL); #else /* we need XCOMPARE(_FP), SIMD_EXTR_ARE_ALL/ANY_SET and CMP_* ops in the same ins to emit correct*/ /* load on condition instructions */ - MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_S390_XEXTRACT, cmp->dreg, -1); + MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_S390_XCOMPARE_XEXTRACT, args[0]->dreg, args[1]->dreg); + int temp = lower_xcompare_op(id, arg0_type); ret->inst_c0 = is_all ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET; - ret->inst_c0 |= ((cmp->inst_c0 << 4) & 0xf0); - ret->inst_c1 = cmp->opcode; + ret->inst_c0 |= ((temp << 4) & 0xf0); + ret->inst_c1 = arg0_type; #endif return ret; } @@ -3077,13 +3112,13 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi switch (arg0_type) { case MONO_TYPE_I2: case MONO_TYPE_U2: - return emit_simd_ins_for_sig (cfg, klass, OP_VPKH, -1, -1, fsig, args); + return emit_simd_ins_for_sig (cfg, klass, OP_S390_VPKH, -1, -1, fsig, args); case MONO_TYPE_I4: case MONO_TYPE_U4: - return emit_simd_ins_for_sig (cfg, klass, OP_VPKF, -1, -1, fsig, args); + return emit_simd_ins_for_sig (cfg, klass, OP_S390_VPKF, -1, -1, fsig, args); case MONO_TYPE_I8: case MONO_TYPE_U8: - return emit_simd_ins_for_sig (cfg, klass, OP_VPKG, -1, -1, fsig, args); + return emit_simd_ins_for_sig (cfg, klass, OP_S390_VPKG, -1, -1, fsig, args); } return NULL; #else @@ -3260,7 +3295,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X, instc0, arg0_type, fsig, args); #elif defined(TARGET_S390X) - int instc0 = arg0_type == MONO_TYPE_R4 ? OP_VFSQSB : OP_VFSQDB; + int instc0 = arg0_type == MONO_TYPE_R4 ? OP_S390_VFSQSB : OP_S390_VFSQDB; return emit_simd_ins_for_sig (cfg, klass, instc0, 0, arg0_type, fsig, args); #else return NULL; @@ -3423,17 +3458,17 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi if (id == SN_WidenLower) { switch (arg0_type){ case MONO_TYPE_I1: - return emit_simd_ins (cfg, klass, OP_VUPHB, args[0]->dreg, -1); + return emit_simd_ins (cfg, klass, OP_S390_VUPHB, args[0]->dreg, -1); case MONO_TYPE_U1: - return emit_simd_ins (cfg, klass, OP_VUPLHB, args[0]->dreg, -1); + return emit_simd_ins (cfg, klass, OP_S390_VUPLHB, args[0]->dreg, -1); case MONO_TYPE_I2: - return emit_simd_ins (cfg, klass, OP_VUPHH, args[0]->dreg, -1); + return emit_simd_ins (cfg, klass, OP_S390_VUPHH, args[0]->dreg, -1); case MONO_TYPE_U2: - return emit_simd_ins (cfg, klass, OP_VUPLHH, args[0]->dreg, -1); + return emit_simd_ins (cfg, klass, OP_S390_VUPLHH, args[0]->dreg, -1); case MONO_TYPE_I4: - return emit_simd_ins (cfg, klass, OP_VUPHF, args[0]->dreg, -1); + return emit_simd_ins (cfg, klass, OP_S390_VUPHF, args[0]->dreg, -1); case MONO_TYPE_U4: - return emit_simd_ins (cfg, klass, OP_VUPLHF, args[0]->dreg, -1); + return emit_simd_ins (cfg, klass, OP_S390_VUPLHF, args[0]->dreg, -1); default: return NULL; } @@ -3441,17 +3476,17 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi else { switch (arg0_type){ case MONO_TYPE_I1: - return emit_simd_ins (cfg, klass, OP_VUPLB, args[0]->dreg, -1); + return emit_simd_ins (cfg, klass, OP_S390_VUPLB, args[0]->dreg, -1); case MONO_TYPE_U1: - return emit_simd_ins (cfg, klass, OP_VUPLLB, args[0]->dreg, -1); + return emit_simd_ins (cfg, klass, OP_S390_VUPLLB, args[0]->dreg, -1); case MONO_TYPE_I2: - return emit_simd_ins (cfg, klass, OP_VUPLH, args[0]->dreg, -1); + return emit_simd_ins (cfg, klass, OP_S390_VUPLHW, args[0]->dreg, -1); case MONO_TYPE_U2: - return emit_simd_ins (cfg, klass, OP_VUPLLH, args[0]->dreg, -1); + return emit_simd_ins (cfg, klass, OP_S390_VUPLLH, args[0]->dreg, -1); case MONO_TYPE_I4: - return emit_simd_ins (cfg, klass, OP_VUPLF, args[0]->dreg, -1); + return emit_simd_ins (cfg, klass, OP_S390_VUPLF, args[0]->dreg, -1); case MONO_TYPE_U4: - return emit_simd_ins (cfg, klass, OP_VUPLLF, args[0]->dreg, -1); + return emit_simd_ins (cfg, klass, OP_S390_VUPLLF, args[0]->dreg, -1); default: return NULL; } @@ -3637,18 +3672,18 @@ emit_sri_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f switch (etype->type) { case MONO_TYPE_I1: case MONO_TYPE_U1: - return emit_simd_ins_for_sig (cfg, klass, OP_VREPIB, etype->type, 1, fsig, args); + return emit_simd_ins_for_sig (cfg, klass, OP_S390_VREPIB, etype->type, 1, fsig, args); case MONO_TYPE_I2: case MONO_TYPE_U2: - return emit_simd_ins_for_sig (cfg, klass, OP_VREPIH, etype->type, 1, fsig, args); + return emit_simd_ins_for_sig (cfg, klass, OP_S390_VREPIH, etype->type, 1, fsig, args); case MONO_TYPE_I4: case MONO_TYPE_U4: - return emit_simd_ins_for_sig (cfg, klass, OP_VREPIF, etype->type, 1, fsig, args); + return emit_simd_ins_for_sig (cfg, klass, OP_S390_VREPIF, etype->type, 1, fsig, args); case MONO_TYPE_I: case MONO_TYPE_U: case MONO_TYPE_I8: case MONO_TYPE_U8: - return emit_simd_ins_for_sig (cfg, klass, OP_VREPIG, etype->type, 1, fsig, args); + return emit_simd_ins_for_sig (cfg, klass, OP_S390_VREPIG, etype->type, 1, fsig, args); default: g_assert_not_reached (); } @@ -6875,6 +6910,7 @@ emit_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi // If cmethod->klass is nested, the namespace is on the enclosing class. if (m_class_get_nested_in (cmethod->klass)) class_ns = m_class_get_name_space (m_class_get_nested_in (cmethod->klass)); + MonoInst *simd_inst = ecb (class_ns, class_name, cfg, cmethod, fsig, args); if (simd_inst) cfg->uses_simd_intrinsics = TRUE; @@ -6912,8 +6948,8 @@ decompose_vtype_opt_uses_simd_intrinsics (MonoCompile *cfg, MonoInst *ins) case OP_XPHI: case OP_LOADX_MEMBASE: case OP_LOADX_ALIGNED_MEMBASE: - case OP_STOREX_ALIGNED_MEMBASE_REG: case OP_STOREX_MEMBASE: + case OP_STOREX_ALIGNED_MEMBASE_REG: return TRUE; default: return FALSE; From b802d1bac49002e2fe573c127dfc6261b508a8f8 Mon Sep 17 00:00:00 2001 From: saitama951 Date: Fri, 18 Jul 2025 14:33:40 +0000 Subject: [PATCH 4/4] Address Review Comments - 3 * remove hard-coded s390_vr16 to allocate a temp_reg instead * omit move incase ins->sreg and ins->dreg --- src/mono/mono/mini/cpu-s390x.mdesc | 2 +- src/mono/mono/mini/mini-ops.h | 4 +- src/mono/mono/mini/mini-s390x.c | 177 +++++++++++++---------------- src/mono/mono/mini/mini-s390x.h | 2 +- 4 files changed, 82 insertions(+), 103 deletions(-) diff --git a/src/mono/mono/mini/cpu-s390x.mdesc b/src/mono/mono/mini/cpu-s390x.mdesc index 19847ab24cf518..74959cb5a1dcb3 100644 --- a/src/mono/mono/mini/cpu-s390x.mdesc +++ b/src/mono/mono/mini/cpu-s390x.mdesc @@ -522,7 +522,7 @@ s390_vx: dest:x src1:x src2:x len:6 s390_vo: dest:x src1:x src2:x len:6 s390_vno: dest:x src1:x src2:x len:6 s390_vn: dest:x src1:x src2:x len:6 -vandnot: dest:x src1:x src2:x len:6 +vector_andnot: dest:x src1:x src2:x len:6 s390_vnn: dest:x src1:x src2:x len:6 s390_vmlb: dest:x src1:x src2:x len:6 s390_vmlhw: dest:x src1:x src2:x len:6 diff --git a/src/mono/mono/mini/mini-ops.h b/src/mono/mono/mini/mini-ops.h index 03b622c5db0709..ba9431fed958fa 100644 --- a/src/mono/mono/mini/mini-ops.h +++ b/src/mono/mono/mini/mini-ops.h @@ -121,7 +121,6 @@ MINI_OP(OP_STORER8_MEMBASE_REG, "storer8_membase_reg", IREG, FREG, NONE) #if defined(TARGET_X86) || defined(TARGET_AMD64) MINI_OP(OP_STOREX_MEMBASE_REG, "storex_membase_reg", IREG, XREG, NONE) -MINI_OP(OP_STOREX_ALIGNED_MEMBASE_REG, "storex_aligned_membase_reg", IREG, XREG, NONE) MINI_OP(OP_STOREX_NTA_MEMBASE_REG, "storex_nta_membase_reg", IREG, XREG, NONE) #endif @@ -1629,7 +1628,6 @@ MINI_OP(OP_S390_VUPLLH, "s390_vupllh", XREG, XREG, NONE) MINI_OP(OP_S390_VUPLLF, "s390_vupllf", XREG, XREG, NONE) MINI_OP(OP_S390_VFISB, "s390_vfidb", XREG, XREG, NONE) MINI_OP(OP_S390_VFIDB, "s390_vfisb", XREG, XREG, NONE) -MINI_OP(OP_S390_XEXTRACT, "s390_xextract", IREG, XREG, NONE) MINI_OP(OP_S390_XCOMPARE_XEXTRACT, "s390_xcompare_xextract", IREG, XREG, XREG) #endif @@ -2012,7 +2010,7 @@ MINI_OP(OP_NEGATION, "negate", XREG, XREG, NONE) MINI_OP(OP_ONES_COMPLEMENT, "ones_complement", XREG, XREG, NONE) /* Select bits from src2/src3 using src1 */ MINI_OP3(OP_BSL, "bitwise_select", XREG, XREG, XREG, XREG) -MINI_OP(OP_VECTOR_ANDN, "vandnot", XREG, XREG, XREG) +MINI_OP(OP_VECTOR_ANDN, "vector_andnot", XREG, XREG, XREG) MINI_OP(OP_VECTOR_IABS, "vector_integer_abs", XREG, XREG, NONE) #endif diff --git a/src/mono/mono/mini/mini-s390x.c b/src/mono/mono/mini/mini-s390x.c index 90fedfb7cf552a..04c87f37e7eaf1 100644 --- a/src/mono/mono/mini/mini-s390x.c +++ b/src/mono/mono/mini/mini-s390x.c @@ -2618,9 +2618,9 @@ simd_type_to_negate_op (int t) } } -static int +static bool type_is_float (int t){ - return (t == MONO_TYPE_R4 || t == MONO_TYPE_R8) ? OP_XCOMPARE_FP : OP_XCOMPARE; + return (t == MONO_TYPE_R4 || t == MONO_TYPE_R8); } /** @@ -2758,16 +2758,11 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb) ins->opcode = GINT_TO_OPCODE (simd_type_to_comp_op (GTMREG_TO_INT (ins->inst_c1))); break; case CMP_LT: - temp = ins->sreg1; - ins->sreg1 = ins->sreg2; - ins->sreg2 = temp; - case CMP_GT: - ins->opcode = GINT_TO_OPCODE (simd_type_to_gt_op (GTMREG_TO_INT (ins->inst_c1))); - break; case CMP_LT_UN: temp = ins->sreg1; ins->sreg1 = ins->sreg2; ins->sreg2 = temp; + case CMP_GT: case CMP_GT_UN: ins->opcode = GINT_TO_OPCODE (simd_type_to_gt_op (GTMREG_TO_INT (ins->inst_c1))); break; @@ -2790,86 +2785,68 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb) break; } case OP_S390_XCOMPARE_XEXTRACT:{ - switch (type_is_float(GTMREG_TO_INT(ins->inst_c1))){ - case OP_XCOMPARE:{ + guint32 temp_reg = alloc_ireg(cfg); + if (!type_is_float(GTMREG_TO_INT(ins->inst_c1))){ switch (ins->inst_c0 >> 4){ - case CMP_EQ: - NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_comp_any_all_op (GTMREG_TO_INT (ins->inst_c1))), s390_vr16, ins->sreg1, ins->sreg2); - break; - case CMP_LT: - case CMP_LT_UN: - temp = ins->sreg1; - ins->sreg1 = ins->sreg2; - ins->sreg2 = temp; - case CMP_GT: + case CMP_EQ: + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_comp_any_all_op (GTMREG_TO_INT (ins->inst_c1))), temp_reg, ins->sreg1, ins->sreg2); + break; + case CMP_LT: + case CMP_LT_UN: + case CMP_GE: + case CMP_GE_UN: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_GT: case CMP_GT_UN: - NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_gt_any_all_op (GTMREG_TO_INT (ins->inst_c1))), s390_vr16, ins->sreg1, ins->sreg2); - break; - case CMP_GE: - case CMP_GE_UN: - temp = ins->sreg1; - ins->sreg1 = ins->sreg2; - ins->sreg2 = temp; - case CMP_LE: - case CMP_LE_UN:{ - NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_gt_any_all_op (GTMREG_TO_INT (ins->inst_c1))), s390_vr16, ins->sreg1, ins->sreg2); - NEW_SIMD_INS (cfg, ins, temp_ins, OP_S390_VNO, s390_vr16, s390_vr16, s390_vr16); - break; - } - default: - g_assert_not_reached (); - break; - } - break; + case CMP_LE: + case CMP_LE_UN: + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_gt_any_all_op (GTMREG_TO_INT (ins->inst_c1))), temp_reg, ins->sreg1, ins->sreg2); + break; + default: + g_assert_not_reached (); + break; + } + } + else { + switch (ins->inst_c0 >> 4){ + case CMP_EQ: + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_comp_any_all_op (GTMREG_TO_INT (ins->inst_c1))), temp_reg, ins->sreg1, ins->sreg2); + break; + case CMP_LT_UN: + case CMP_LT: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_GT_UN: + case CMP_GT: + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_gt_any_all_op (GTMREG_TO_INT (ins->inst_c1))), temp_reg, ins->sreg1, ins->sreg2); + break; + case CMP_LE_UN: + case CMP_LE: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_GE_UN: + case CMP_GE: + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_ge_fp_any_all_op (GTMREG_TO_INT (ins->inst_c1))), temp_reg, ins->sreg1, ins->sreg2); + break; + default: + g_assert_not_reached (); + break; + } } - case OP_XCOMPARE_FP:{ - switch (ins->inst_c0 >> 4){ - case CMP_EQ: - NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_comp_any_all_op (GTMREG_TO_INT (ins->inst_c1))), s390_vr16, ins->sreg1, ins->sreg2); - break; - case CMP_LT_UN: - case CMP_LT: - temp = ins->sreg1; - ins->sreg1 = ins->sreg2; - ins->sreg2 = temp; - case CMP_GT_UN: - case CMP_GT: - NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_gt_any_all_op (GTMREG_TO_INT (ins->inst_c1))), s390_vr16, ins->sreg1, ins->sreg2); - break; - case CMP_LE_UN: - case CMP_LE: - temp = ins->sreg1; - ins->sreg1 = ins->sreg2; - ins->sreg2 = temp; - case CMP_GE_UN: - case CMP_GE: - NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_ge_fp_any_all_op (GTMREG_TO_INT (ins->inst_c1))), s390_vr16, ins->sreg1, ins->sreg2); - break; - default: - g_assert_not_reached (); - break; - } + if(!type_is_float(GTMREG_TO_INT(ins->inst_c1))){ + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_extract_int_op (GTMREG_TO_INT (ins->inst_c0 & 0x0f), GTMREG_TO_INT (ins->inst_c0 >> 4))), ins->dreg, -1, -1); + NULLIFY_INS(ins); + } + else { + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_extract_fp_op (GTMREG_TO_INT (ins->inst_c0 & 0x0f), GTMREG_TO_INT (ins->inst_c0 >> 4))), ins->dreg, -1, -1); + NULLIFY_INS(ins); } - break; - default: - g_assert_not_reached(); - break; } - switch (type_is_float(GTMREG_TO_INT(ins->inst_c1))){ - case OP_XCOMPARE: - ins->opcode = GINT_TO_OPCODE (simd_type_to_extract_int_op (GTMREG_TO_INT (ins->inst_c0 & 0x0f), GTMREG_TO_INT (ins->inst_c0 >> 4))); - break; - case OP_XCOMPARE_FP: - ins->opcode = GINT_TO_OPCODE (simd_type_to_extract_fp_op (GTMREG_TO_INT (ins->inst_c0 & 0x0f), GTMREG_TO_INT (ins->inst_c0 >> 4))); break; - default: - g_assert_not_reached (); - break; - } - /* we don't use a register rather the CC set by the vector compare instructions */ - ins->sreg1 = -1; - } - break; case OP_VECTOR_IABS: ins->opcode = GINT_TO_OPCODE (simd_type_to_abs_op (GTMREG_TO_INT (ins->inst_c1))); break; @@ -5845,19 +5822,23 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) s390_vfpsosb (code, ins->dreg, ins->sreg1, 0); break; case OP_INSERT_I1: - s390_vlr (code, ins->dreg, ins->sreg1); + if (ins->dreg != ins->sreg1) + s390_vlr (code, ins->dreg, ins->sreg1); s390_vlvgb (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); break; case OP_INSERT_I2: - s390_vlr (code, ins->dreg, ins->sreg1); + if (ins->dreg != ins->sreg1) + s390_vlr (code, ins->dreg, ins->sreg1); s390_vlvgh (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); break; case OP_INSERT_I4: - s390_vlr (code, ins->dreg, ins->sreg1); + if (ins->dreg != ins->sreg1) + s390_vlr (code, ins->dreg, ins->sreg1); s390_vlvgf (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); break; case OP_INSERT_I8: - s390_vlr (code, ins->dreg, ins->sreg1); + if (ins->dreg != ins->sreg1) + s390_vlr (code, ins->dreg, ins->sreg1); s390_vlvgg (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); break; case OP_INSERT_R4: @@ -5909,30 +5890,30 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) s390_ldgr (code, ins->dreg, s390_r13); break; case OP_EXPAND_I1: - s390_vlvgb (code, s390_vr16, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - s390_vrepb (code, ins->dreg, s390_vr16, 0); + s390_vlvgb (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepb (code, ins->dreg, ins->dreg, 0); break; case OP_EXPAND_I2: - s390_vlvgh (code, s390_vr16, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - s390_vreph (code, ins->dreg, s390_vr16, 0); + s390_vlvgh (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vreph (code, ins->dreg, ins->dreg, 0); break; case OP_EXPAND_I4: - s390_vlvgf (code, s390_vr16, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - s390_vrepf (code, ins->dreg, s390_vr16, 0); + s390_vlvgf (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepf (code, ins->dreg, ins->dreg, 0); break; case OP_EXPAND_I8: - s390_vlvgg (code, s390_vr16, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - s390_vrepg (code, ins->dreg, s390_vr16, 0); + s390_vlvgg (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepg (code, ins->dreg, ins->dreg, 0); break; case OP_EXPAND_R4: s390_vlgvf (code, s390_r13, ins->sreg1, 0, 0); - s390_vlvgf (code, s390_vr16, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - s390_vrepf (code, ins->dreg, s390_vr16, 0); + s390_vlvgf (code, ins->dreg, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepf (code, ins->dreg, ins->dreg, 0); break; case OP_EXPAND_R8: s390_lgdr (code, s390_r13, ins->sreg1); - s390_vlvgg (code, s390_vr16, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); - s390_vrepg (code, ins->dreg, s390_vr16, 0); + s390_vlvgg (code, ins->dreg, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepg (code, ins->dreg, ins->dreg, 0); break; case OP_S390_VPKH: s390_vpkh ( code, ins->dreg, ins->sreg1, ins->sreg2); diff --git a/src/mono/mono/mini/mini-s390x.h b/src/mono/mono/mini/mini-s390x.h index 9611f4e70077ab..b9231fd096d1ef 100644 --- a/src/mono/mono/mini/mini-s390x.h +++ b/src/mono/mono/mini/mini-s390x.h @@ -151,7 +151,7 @@ struct SeqPointInfo { /*-----------------------------------------------*/ #define MONO_MAX_XREGS 31 -#define MONO_ARCH_CALLEE_XREGS 0xFFFEFFFE +#define MONO_ARCH_CALLEE_XREGS 0xFFFFFFFE #define MONO_ARCH_CALLEE_SAVED_XREGS 0x0 // Does the ABI have a volatile non-parameter register, so tailcall