diff --git a/src/mono/mono/arch/s390x/s390x-codegen.h b/src/mono/mono/arch/s390x/s390x-codegen.h index e5679d9c18087e..d28b9a6e2fe60f 100644 --- a/src/mono/mono/arch/s390x/s390x-codegen.h +++ b/src/mono/mono/arch/s390x/s390x-codegen.h @@ -139,38 +139,38 @@ typedef enum { } S390SpecialRegister; typedef enum { - s390_VR0 = 0, - s390_VR1 = 1, - s390_VR2 = 2, - s390_VR3 = 3, - s390_VR4 = 4, - s390_VR5 = 5, - s390_VR6 = 6, - s390_VR7 = 7, - s390_VR8 = 8, - s390_VR9 = 9, - s390_VR10 = 10, - s390_VR11 = 11, - s390_VR12 = 12, - s390_VR13 = 13, - s390_VR14 = 14, - s390_VR15 = 15, - s390_VR16 = 16, - s390_VR17 = 17, - s390_VR18 = 18, - s390_VR19 = 19, - s390_VR20 = 20, - s390_VR21 = 21, - s390_VR22 = 22, - s390_VR23 = 23, - s390_VR24 = 24, - s390_VR25 = 25, - s390_VR26 = 26, - s390_VR27 = 27, - s390_VR28 = 28, - s390_VR29 = 29, - s390_VR30 = 30, - s390_VR31 = 31, + s390_vr0 = 0, + s390_vr1 = 1, + s390_vr2 = 2, + s390_vr3 = 3, + s390_vr4 = 4, + s390_vr5 = 5, + s390_vr6 = 6, + s390_vr7 = 7, + s390_vr8 = 8, + s390_vr9 = 9, + s390_vr10 = 10, + s390_vr11 = 11, + s390_vr12 = 12, + s390_vr13 = 13, + s390_vr14 = 14, + s390_vr15 = 15, + s390_vr16 = 16, + s390_vr17 = 17, + s390_vr18 = 18, + s390_vr19 = 19, + s390_vr20 = 20, + s390_vr21 = 21, + s390_vr22 = 22, + s390_vr23 = 23, + s390_vr24 = 24, + s390_vr25 = 25, + s390_vr26 = 26, + s390_vr27 = 27, + s390_vr28 = 28, + s390_vr29 = 29, + s390_vr30 = 30, + s390_vr31 = 31, s390_VR_NREG = 32, } s390_VR_Reg_No; @@ -943,14 +943,7 @@ typedef struct { s390_emit16(c, ((i) & 0xffff)); \ } while (0) -#define S390_SIY(c,opc,s1,p1,m2) do \ -{ \ - s390_emit16(c, ((opc & 0xff00) | m2)); \ - s390_emit32(c, ((s1) << 24 | (((p2) & 0xfffff) << 8) | \ - (opc & 0xff))); \ -} while (0) - -#define S390_SIY_1(c,opc,d1,b1,i2) do \ +#define S390_SIY(c,opc,d1,b1,i2) do \ { \ s390_emit16(c, ((opc & 0xff00) | i2)); \ s390_emit32(c, ((b1) << 28 | (((d1) & 0xfff) << 16) | \ @@ -1005,7 +998,7 @@ typedef struct { #define S390_VRIa(c,opc,v1,i2,m3) do \ { \ - char rxb = (((v1) > 15) << 7); \ + char rxb = (((v1) > 15) << 3); \ int vr1 = ((v1) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4)); \ s390_emit16(c, (i2)); \ @@ -1014,7 +1007,7 @@ typedef struct { #define S390_VRIb(c,opc,v1,i2,i3,m4) do \ { \ - char rxb = (((v1) > 15) << 7); \ + char rxb = (((v1) > 15) << 3); \ int vr1 = ((v1) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4)); \ s390_emit16(c, (((i2) << 8) | (i3))); \ @@ -1023,18 +1016,17 @@ typedef struct { #define S390_VRIc(c,opc,v1,v3,i2,m4) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6) | \ - (((v3) > 15) << 5); \ + char rxb = (((v1) > 15) << 3) | (((v3) > 15) << 2); \ int vr1 = ((v1) % 16), vr3 = ((v3) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | (vr3)); \ - s390_emit16(c, (v4)); \ + s390_emit16(c, (i2)); \ s390_emit16(c, (((m4) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ } while (0) #define S390_VRId(c,opc,v1,v2,v3,i4,m5) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6) | \ - (((v3) > 15) << 5); \ + char rxb = (((v1) > 15) << 3) | (((v2) > 15) << 2) | \ + (((v3) > 15) << 1); \ int vr1 = ((v1) % 16), vr2 = ((v2) % 16), vr3 = ((v3) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | (vr2)); \ s390_emit16(c, (vr3 << 12) | (i2)); \ @@ -1043,7 +1035,7 @@ typedef struct { #define S390_VRIe(c,opc,v1,v2,i3,m4,m5) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6); \ + char rxb = (((v1) > 15) << 3) | (((v2) > 15) << 2); \ int vr1 = ((v1) % 16), vr2 = ((v2) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | (vr2)); \ s390_emit16(c, ((i2) << 8) | (m5)); \ @@ -1052,7 +1044,7 @@ typedef struct { #define S390_VRRa(c,opc,v1,v2,m3,m4,m5) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6); \ + char rxb = (((v1) > 15) << 3) | (((v2) > 15) << 2); \ int vr1 = ((v1) % 16), vr2 = ((v2) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | (vr2)); \ s390_emit16(c, ((m5) << 4) | (m4)); \ @@ -1061,48 +1053,49 @@ typedef struct { #define S390_VRRb(c,opc,v1,v2,v3,m4,m5) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6) | \ - (((v3) > 15) << 5); \ + char rxb = (((v1) > 15) << 3) | (((v2) > 15) << 2) | \ + (((v3) > 15) << 1); \ int vr1 = ((v1) % 16), vr2 = ((v2) % 16), vr3 = ((v3) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | (vr2)); \ - s390_emit16(c, (vr3 << 12) | ((m5) << 4) | (m4)); \ - s390_emit16(c, (((m3) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ + s390_emit16(c, (vr3 << 12) | ((m5) << 4)); \ + s390_emit16(c, (((m4) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ } while (0) -#define S390_VRRc(c,opc,v1,v2,m3,m4,m5) do \ +#define S390_VRRc(c,opc,v1,v2,v3,m4,m5,m6) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6) | \ - (((v3) > 15) << 5); \ + char rxb = (((v1) > 15) << 3) | (((v2) > 15) << 2) | \ + (((v3) > 15) << 1); \ int vr1 = ((v1) % 16), vr2 = ((v2) % 16), vr3 = ((v3) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | (vr2)); \ - s390_emit16(c, ((vr3 << 12)| (m5) << 4)); \ + s390_emit16(c, ((vr3 << 12)| ((m6) << 4) | (m5))); \ s390_emit16(c, (((m4) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ } while (0) #define S390_VRRd(c,opc,v1,v2,v3,v4,m5,m6) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6) | \ + char rxb = (((v1) > 15) << 3) | (((v2) > 15) << 2) | \ (((v3) > 15) << 5) | (((v4) > 15) << 4); \ int vr1 = ((v1) % 16), vr2 = ((v2) % 16), \ - vr3 = ((v3) % 16); vr4 = ((v4) % 16); \ + vr3 = ((v3) % 16), vr4 = ((v4) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | (vr2)); \ s390_emit16(c, ((vr3 << 12)| ((m6) << 8)) | ((m5) << 4)); \ s390_emit16(c, ((vr4 << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ } while (0) -#define S390_VRRe(c,opc,v1,v2,v3,m4,m5,m6) do \ +#define S390_VRRe(c,opc,v1,v2,v3,v4,m5,m6) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6) | \ - (((v3) > 15) << 5); \ - int vr1 = ((v1) % 16), vr2 = ((v2) % 16), vr3 = ((v3) % 16); \ - s390_emit16(c, ((opc) & 0xff00) | ((v1) << 4) | ((v2))); \ - s390_emit16(c, (((v3) << 12)| ((m6) << 8)) | (m5)); \ - s390_emit16(c, (((m4) << 12) | ((rxb) << 8) | ((opc) & 0xff))); \ + char rxb = (((v1) > 15) << 3) | (((v2) > 15) << 2) | \ + (((v3) > 15) << 1) | ((v4) > 15); \ + int vr1 = ((v1) % 16), vr2 = ((v2) % 16), vr3 = ((v3) % 16), \ + vr4 = ((v4) % 16); \ + s390_emit16(c, ((opc) & 0xff00) | ((vr1) << 4) | ((vr2))); \ + s390_emit16(c, (((vr3) << 12)| ((m6) << 8)) | (m5)); \ + s390_emit16(c, (((vr4) << 12) | ((rxb) << 8) | ((opc) & 0xff)));\ } while (0) #define S390_VRRf(c,opc,v1,r2) do \ { \ - char rxb = (((v1) > 15) << 7); \ + char rxb = (((v1) > 15) << 3); \ s390_emit16(c, ((opc) & 0xff00) | ((v1) << 4) | ((v2))); \ s390_emit16(c, ((r2) << 12)| ((r3) << r8) | (m5)); \ s390_emit16(c, (((rxb) << 8) | ((opc) & 0xff))); \ @@ -1110,7 +1103,7 @@ typedef struct { #define S390_VRSa(c,opc,v1,v3,b2,d2,m4) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v3) > 15) << 6); \ + char rxb = (((v1) > 15) << 3) | (((v3) > 15) << 2); \ int vr1 = ((v1) % 16), vr3 = ((v3) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | (vr3)); \ s390_emit16(c, ((b2) << 12)| (d2)); \ @@ -1119,7 +1112,7 @@ typedef struct { #define S390_VRSb(c,opc,v1,r3,b2,d2,m4) do \ { \ - char rxb = (((v1) > 15) << 7); \ + char rxb = (((v1) > 15) << 3); \ int vr1 = (v1) % 16; \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | ((r3))); \ s390_emit16(c, ((b2) << 12)| (d2)); \ @@ -1128,7 +1121,7 @@ typedef struct { #define S390_VRSc(c,opc,r1,v3,b2,d2,m4) do \ { \ - char rxb = (((v1) > 15) << 7); \ + char rxb = (((v3) > 15) << 3); \ int vr3 = (v3) % 16; \ s390_emit16(c, ((opc) & 0xff00) | ((r1) << 4) | (vr3)); \ s390_emit16(c, ((b2) << 12)| (d2)); \ @@ -1137,7 +1130,7 @@ typedef struct { #define S390_VRV(c,opc,v1,v2,b2,d2,m3) do \ { \ - char rxb = (((v1) > 15) << 7) | (((v2) > 15) << 6); \ + char rxb = (((v1) > 15) << 3) | (((v2) > 15) << 2); \ int vr1 = ((v1) % 16), vr2 = ((v3) % 16); \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | (vr2)); \ s390_emit16(c, ((b2) << 12)| (d2)); \ @@ -1146,7 +1139,7 @@ typedef struct { #define S390_VRX(c,opc,v1,x2,b2,d2,m3) do \ { \ - char rxb = ((v1) > 15) << 7; \ + char rxb = ((v1) > 15) << 3; \ int vr1 = (v1) % 16; \ s390_emit16(c, ((opc) & 0xff00) | (vr1 << 4) | ((x2))); \ s390_emit16(c, ((b2) << 12)| (d2)); \ @@ -1167,7 +1160,7 @@ typedef struct { #define s390_aghik(c, r1, r3, v) S390_RIE_1(c, 0xecd9, r1, r3, v) #define s390_agr(c, r1, r2) S390_RRE(c, 0xb908, r1, r2) #define s390_agrk(c, r1, r2, r3) S390_RRF_1(c, 0xb9e8, r1, r2, r3) -#define s390_agsi(c, r, v) S390_SIY(c, 0xeb7a, r v) +#define s390_agsi(c, r, v, i) S390_SIY(c, 0xeb7a, r, v, i) #define s390_ahhhr(c, r1, r2, r3) S390_RRF_1(c, 0xb9c8, r1, r2, r3) #define s390_ahhlr(c, r1, r2, r3) S390_RRF_1(c, 0xb9d8, r1, r2, r3) #define s390_ahi(c, r, v) S390_RI(c, 0xa7a, r, v) @@ -1187,13 +1180,13 @@ typedef struct { #define s390_alghsik(c, r1, r3, v) S390_RIE_1(c, 0xecdb, r1, r3, v) #define s390_algr(c, r1, r2) S390_RRE(c, 0xb90a, r1, r2) #define s390_algrk(c, r1, r2, r3) S390_RRF_1(c, 0xb9ea, r1, r2, r3) -#define s390_algsi(c, d1, b1, i2) S390_SIY_1(c, 0xeb7e, d1, b1, i2) +#define s390_algsi(c, d1, b1, i2) S390_SIY(c, 0xeb7e, d1, b1, i2) #define s390_alhhhr(c, r1, r2, r3) S390_RRF_1(c, 0xb9ca, r1, r2, r3) #define s390_alhhlr(c, r1, r2, r3) S390_RRF_1(c, 0xb9da, r1, r2, r3) #define s390_alhsik(c, r1, r3, v) S390_RIE_1(c, 0xecda, r1, r3, v) #define s390_alr(c, r1, r2) S390_RR(c, 0x1e, r1, r2) #define s390_alrk(c, r1, r2) S390_RRF(c, 0xb9fa, r1, r2) -#define s390_alsi(c, d1, b1, i2) S390_SIY_1(c, 0xeb6e, d1, b1, i2) +#define s390_alsi(c, d1, b1, i2) S390_SIY(c, 0xeb6e, d1, b1, i2) #define s390_alsih(c, r, v) S390_RIL_1(c, 0xcca, r, v) #define s390_alsihn(c, r, v) S390_RIL_1(c, 0xccb, r, v) #define s390_aly(c, r, x, b, d) S390_RXY(c, 0xe35e, r, x, b, d) @@ -1569,8 +1562,164 @@ typedef struct { #define s390_tmlh(c, r, m) S390_RI(c, 0xa70, r, m) #define s390_tmll(c, r, m) S390_RI(c, 0xa71, r, m) #define s390_tm(c, b, d, v) S390_SI(c, 0x91, b, d, v) +#define s390_trap2(code) S390_E(code, 0x01ff) +#define s390_vab(c, v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 0, 0, 0) +#define s390_vah(c, v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 1, 0, 0) +#define s390_vaf(c, v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 2, 0, 0) +#define s390_vag(c, v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 3, 0, 0) +#define s390_vaq(c, v1, v2, v3) S390_VRRc(c, 0xe7f3, v1, v2, v3, 4, 0, 0) +#define s390_vceqb(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 0, 0) +#define s390_vceqh(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 1, 0) +#define s390_vceqf(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 2, 0) +#define s390_vceqg(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 3, 0) +#define s390_vceqbs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 0, 1) +#define s390_vceqhs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 1, 1) +#define s390_vceqfs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 2, 1) +#define s390_vceqgs(c, v1, v2, v3) S390_VRRb(c, 0xe7f8, v1, v2, v3, 3, 1) +#define s390_vchb(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 0, 0) +#define s390_vchh(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 1, 0) +#define s390_vchf(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 2, 0) +#define s390_vchg(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 3, 0) +#define s390_vchbs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 0, 1) +#define s390_vchhs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 1, 1) +#define s390_vchfs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 2, 1) +#define s390_vchgs(c, v1, v2, v3) S390_VRRb(c, 0xe7fb, v1, v2, v3, 3, 1) +#define s390_vchlb(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 0, 0) +#define s390_vchlh(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 1, 0) +#define s390_vchlf(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 2, 0) +#define s390_vchlg(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 3, 0) +#define s390_vchlbs(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 0, 1) +#define s390_vchlhs(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 1, 1) +#define s390_vchlfs(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 2, 1) +#define s390_vchlgs(c, v1, v2, v3) S390_VRRb(c, 0xe7f9, v1, v2, v3, 3, 1) +#define s390_vecb(c, v1, v2) S390_VRRa(c, 0xe7db, v1, v2, 0, 0, 0) +#define s390_vech(c, v1, v2) S390_VRRa(c, 0xe7db, v1, v2, 1, 0, 0) +#define s390_vecf(c, v1, v2) S390_VRRa(c, 0xe7db, v1, v2, 2, 0, 0) +#define s390_vecg(c, v1, v2) S390_VRRa(c, 0xe7db, v1, v2, 3, 0, 0) +#define s390_veclb(c, v1, v2) S390_VRRa(c, 0xe7d9, v1, v2, 0, 0, 0) +#define s390_veclh(c, v1, v2) S390_VRRa(c, 0xe7d9, v1, v2, 1, 0, 0) +#define s390_veclf(c, v1, v2) S390_VRRa(c, 0xe7d9, v1, v2, 2, 0, 0) +#define s390_veclg(c, v1, v2) S390_VRRa(c, 0xe7d9, v1, v2, 3, 0, 0) +#define s390_vfasb(c, v1, v2, v3) S390_VRRc(c, 0xe7e3, v1, v2, v3, 2, 0, 0) +#define s390_vfadb(c, v1, v2, v3) S390_VRRc(c, 0xe7e3, v1, v2, v3, 3, 0, 0) +#define s390_vfcesb(c, v1, v2, v3) S390_VRRc(c, 0xe7e8, v1, v2, v3, 2, 0, 0) +#define s390_vfcedb(c, v1, v2, v3) S390_VRRc(c, 0xe7e8, v1, v2, v3, 3, 0, 0) +#define s390_vfcesbs(c, v1, v2, v3) S390_VRRc(c, 0xe7e8, v1, v2, v3, 2, 0, 1) +#define s390_vfcedbs(c, v1, v2, v3) S390_VRRc(c, 0xe7e8, v1, v2, v3, 3, 0, 1) +#define s390_vfchsb(c, v1, v2, v3) S390_VRRc(c, 0xe7eb, v1, v2, v3, 2, 0, 0) +#define s390_vfchdb(c, v1, v2, v3) S390_VRRc(c, 0xe7eb, v1, v2, v3, 3, 0, 0) +#define s390_vfchsbs(c, v1, v2, v3) S390_VRRc(c, 0xe7eb, v1, v2, v3, 2, 0, 1) +#define s390_vfchdbs(c, v1, v2, v3) S390_VRRc(c, 0xe7eb, v1, v2, v3, 3, 0, 1) +#define s390_vfchesb(c, v1, v2, v3) S390_VRRc(c, 0xe7ea, v1, v2, v3, 2, 0, 0) +#define s390_vfchedb(c, v1, v2, v3) S390_VRRc(c, 0xe7ea, v1, v2, v3, 3, 0, 0) +#define s390_vfchesbs(c, v1, v2, v3) S390_VRRc(c, 0xe7ea, v1, v2, v3, 2, 0, 1) +#define s390_vfchedbs(c, v1, v2, v3) S390_VRRc(c, 0xe7ea, v1, v2, v3, 3, 0, 1) +#define s390_vfdsb(c, v1, v2, v3) S390_VRRc(c, 0xe7e5, v1, v2, v3, 2, 0, 0) +#define s390_vfddb(c, v1, v2, v3) S390_VRRc(c, 0xe7e5, v1, v2, v3, 3, 0, 0) +#define s390_vfisb(c, v1, v2, m4, m5) S390_VRRa(c, 0xe7c7, v1, v2, 2, m4, m5) +#define s390_vfidb(c, v1, v2, m4, m5) S390_VRRa(c, 0xe7c7, v1, v2, 3, m4, m5) +#define s390_vfmsb(c, v1, v2, v3) S390_VRRc(c, 0xe7e7, v1, v2, v3, 2, 0, 0) +#define s390_vfmdb(c, v1, v2, v3) S390_VRRc(c, 0xe7e7, v1, v2, v3, 3, 0, 0) +#define s390_vfmaxsb(c, v1, v2, v3, m6) S390_VRRc(c, 0xe7ef, v1, v2, v3, 2, 0, m6) +#define s390_vfmaxdb(c, v1, v2, v3, m6) S390_VRRc(c, 0xe7ef, v1, v2, v3, 3, 0, m6) +#define s390_vfminsb(c, v1, v2, v3, m6) S390_VRRc(c, 0xe7ee, v1, v2, v3, 2, 0, m6) +#define s390_vfmindb(c, v1, v2, v3, m6) S390_VRRc(c, 0xe7ee, v1, v2, v3, 3, 0, m6) +#define s390_vfpsosb(c, v1, v2, m5) S390_VRRa(c, 0xe7cc, v1, v2, 2, 0, m5) +#define s390_vfpsodb(c, v1, v2, m5) S390_VRRa(c, 0xe7cc, v1, v2, 3, 0, m5) +#define s390_vfssb(c, v1, v2, v3) S390_VRRc(c, 0xe7e2, v1, v2, v3, 2, 0, 0) +#define s390_vfsdb(c, v1, v2, v3) S390_VRRc(c, 0xe7e2, v1, v2, v3, 3, 0, 0) +#define s390_vfsqsb(c, v1, v2) S390_VRRa(c, 0xe7ce, v1, v2, 2, 0, 0) +#define s390_vfsqdb(c, v1, v2) S390_VRRa(c, 0xe7ce, v1, v2, 3, 0, 0) +#define s390_vgbm(c, v1, i2) S390_VRIa(c, 0xe744, v1, i2, 0) +#define s390_vgmb(c, v1, i2, i3) S390_VRIb(c, 0xe746, v1, i2, i3, 0) +#define s390_vgmh(c, v1, i2, i3) S390_VRIb(c, 0xe746, v1, i2, i3, 1) +#define s390_vgmf(c, v1, i2, i3) S390_VRIb(c, 0xe746, v1, i2, i3, 2) +#define s390_vgmg(c, v1, i2, i3) S390_VRIb(c, 0xe746, v1, i2, i3, 3) +#define s390_vmlb(c, v1, v2, v3) S390_VRRc(c, 0xe7a2, v1, v2, v3, 0, 0, 0) +#define s390_vmlhw(c, v1, v2, v3) S390_VRRc(c, 0xe7a2, v1, v2, v3, 1, 0, 0) +#define s390_vmlf(c, v1, v2, v3) S390_VRRc(c, 0xe7a2, v1, v2, v3, 2, 0, 0) +#define s390_vmnb(c, v1, v2, v3) S390_VRRc(c, 0xe7fe, v1, v2, v3, 0, 0, 0) +#define s390_vmnh(c, v1, v2, v3) S390_VRRc(c, 0xe7fe, v1, v2, v3, 1, 0, 0) +#define s390_vmnf(c, v1, v2, v3) S390_VRRc(c, 0xe7fe, v1, v2, v3, 2, 0, 0) +#define s390_vmng(c, v1, v2, v3) S390_VRRc(c, 0xe7fe, v1, v2, v3, 3, 0, 0) +#define s390_vmnlb(c, v1, v2, v3) S390_VRRc(c, 0xe7fc, v1, v2, v3, 0, 0, 0) +#define s390_vmnlh(c, v1, v2, v3) S390_VRRc(c, 0xe7fc, v1, v2, v3, 1, 0, 0) +#define s390_vmnlf(c, v1, v2, v3) S390_VRRc(c, 0xe7fc, v1, v2, v3, 2, 0, 0) +#define s390_vmnlg(c, v1, v2, v3) S390_VRRc(c, 0xe7fc, v1, v2, v3, 3, 0, 0) +#define s390_vmxb(c, v1, v2, v3) S390_VRRc(c, 0xe7ff, v1, v2, v3, 0, 0, 0) +#define s390_vmxh(c, v1, v2, v3) S390_VRRc(c, 0xe7ff, v1, v2, v3, 1, 0, 0) +#define s390_vmxf(c, v1, v2, v3) S390_VRRc(c, 0xe7ff, v1, v2, v3, 2, 0, 0) +#define s390_vmxg(c, v1, v2, v3) S390_VRRc(c, 0xe7ff, v1, v2, v3, 3, 0, 0) +#define s390_vmxlb(c, v1, v2, v3) S390_VRRc(c, 0xe7fd, v1, v2, v3, 0, 0, 0) +#define s390_vmxlh(c, v1, v2, v3) S390_VRRc(c, 0xe7fd, v1, v2, v3, 1, 0, 0) +#define s390_vmxlf(c, v1, v2, v3) S390_VRRc(c, 0xe7fd, v1, v2, v3, 2, 0, 0) +#define s390_vmxlg(c, v1, v2, v3) S390_VRRc(c, 0xe7fd, v1, v2, v3, 3, 0, 0) +#define s390_vn(c, v1, v2, v3) S390_VRRc(c, 0xe768, v1, v2, v3, 0, 0, 0) +#define s390_vnc(c, v1, v2, v3) S390_VRRc(c, 0xe769, v1, v2, v3, 0, 0, 0) +#define s390_vnn(c, v1, v2, v3) S390_VRRc(c, 0xe76e, v1, v2, v3, 0, 0, 0) +#define s390_vno(c, v1, v2, v3) S390_VRRc(c, 0xe76b, v1, v2, v3, 0, 0, 0) +#define s390_vo(c, v1, v2, v3) S390_VRRc(c, 0xe76a, v1, v2, v3, 0, 0, 0) +#define s390_vl(c, v, x, b, d) S390_VRX(c, 0xe706, v, x, b, d, 0) +#define s390_vlcb(c, v1, v2) S390_VRRa(c, 0xe7de, v1, v2, 0, 0, 0) +#define s390_vlch(c, v1, v2) S390_VRRa(c, 0xe7de, v1, v2, 1, 0, 0) +#define s390_vlcf(c, v1, v2) S390_VRRa(c, 0xe7de, v1, v2, 2, 0, 0) +#define s390_vlcg(c, v1, v2) S390_VRRa(c, 0xe7de, v1, v2, 3, 0, 0) +#define s390_vleg(c, v, d, x, b, m) S390_VRX(c, 0xe702, v, x, b, d, m) +#define s390_vleib(c, v1, i2, m3) S390_VRIa(c, 0xe740, v1, i2, m3) +#define s390_vleih(c, v1, i2, m3) S390_VRIa(c, 0xe741, v1, i2, m3) +#define s390_vleif(c, v1, i2, m3) S390_VRIa(c, 0xe743, v1, i2, m3) +#define s390_vleig(c, v1, i2, m3) S390_VRIa(c, 0xe742, v1, i2, m3) +#define s390_vlgvb(c, r1, v2, d3, b4) S390_VRSc(c, 0xe721, r1, v2, d3, b4, 0) +#define s390_vlgvh(c, r1, v2, d3, b4) S390_VRSc(c, 0xe721, r1, v2, d3, b4, 1) +#define s390_vlgvf(c, r1, v2, d3, b4) S390_VRSc(c, 0xe721, r1, v2, d3, b4, 2) +#define s390_vlgvg(c, r1, v2, d3, b4) S390_VRSc(c, 0xe721, r1, v2, d3, b4, 3) #define s390_vlm(c, v1, v2, b, d, m) S390_VRSa(c, 0xe736, v1, v2, b, d, m) +#define s390_vlpb(c, v1, v2) S390_VRRa(c, 0xe7df, v1, v2, 0, 0, 0) +#define s390_vlph(c, v1, v2) S390_VRRa(c, 0xe7df, v1, v2, 1, 0, 0) +#define s390_vlpf(c, v1, v2) S390_VRRa(c, 0xe7df, v1, v2, 2, 0, 0) +#define s390_vlpg(c, v1, v2) S390_VRRa(c, 0xe7df, v1, v2, 3, 0, 0) +#define s390_vlr(c, v1, v2) S390_VRRa(c,0xe756, v1, v2, 0, 0, 0) +#define s390_vlvgb(c, v1, r2, d3, b4) S390_VRSb(c, 0xe722, v1, r2, d3, b4, 0) +#define s390_vlvgh(c, v1, r2, d3, b4) S390_VRSb(c, 0xe722, v1, r2, d3, b4, 1) +#define s390_vlvgf(c, v1, r2, d3, b4) S390_VRSb(c, 0xe722, v1, r2, d3, b4, 2) +#define s390_vlvgg(c, v1, r2, d3, b4) S390_VRSb(c, 0xe722, v1, r2, d3, b4, 3) +#define s390_vperm(c, v1, v2, v3, v4) S390_VRRe(c, 0xe78c, v1, v2, v3, v4, 0, 0); +#define s390_vpkh(c, v1, v2, v3) S390_VRRc(c, 0xe794, v1, v2, v3, 1, 0, 0) +#define s390_vpkf(c, v1, v2, v3) S390_VRRc(c, 0xe794, v1, v2, v3, 2, 0, 0) +#define s390_vpkg(c, v1, v2, v3) S390_VRRc(c, 0xe794, v1, v2, v3, 3, 0, 0) +#define s390_vrepb(c, v1, v3, i2) S390_VRIc(c, 0xe74d, v1 ,v3, i2, 0) +#define s390_vreph(c, v1, v3, i2) S390_VRIc(c, 0xe74d, v1 ,v3, i2, 1) +#define s390_vrepf(c, v1, v3, i2) S390_VRIc(c, 0xe74d, v1 ,v3, i2, 2) +#define s390_vrepg(c, v1, v3, i2) S390_VRIc(c, 0xe74d, v1 ,v3, i2, 3) +#define s390_vrepib(c, v1, i2) S390_VRIa(c, 0xe745, v1, i2, 0) +#define s390_vrepih(c, v1, i2) S390_VRIa(c, 0xe745, v1, i2, 1) +#define s390_vrepif(c, v1, i2) S390_VRIa(c, 0xe745, v1, i2, 2) +#define s390_vrepig(c, v1, i2) S390_VRIa(c, 0xe745, v1, i2, 3) +#define s390_vsb(c, v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 0, 0, 0) +#define s390_vsh(c, v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 1, 0, 0) +#define s390_vsf(c, v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 2, 0, 0) +#define s390_vsg(c, v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 3, 0, 0) +#define s390_vsq(c, v1, v2, v3) S390_VRRc(c, 0xe7f7, v1, v2, v3, 4, 0, 0) +#define s390_vst(c, v, x, b, d) S390_VRX(c,0xe70e, v, x, b, d, 0) +#define s390_vsteg(c, v, d, x, b, m) S390_VRX(c, 0xe70a, v, x, b, d, m) #define s390_vstm(c, v1, v2, b, d, m) S390_VRSa(c, 0xe73e, v1, v2, b, d, m) +#define s390_vsumb(c, v1, v2, v3) S390_VRRc(c, 0xe764, v1, v2, v3, 0, 0, 0) +#define s390_vsumh(c, v1, v2, v3) S390_VRRc(c, 0xe764, v1, v2, v3, 1, 0, 0) +#define s390_vsumqf(c, v1, v2, v3) S390_VRRc(c, 0xe767, v1, v2, v3, 2, 0, 0) +#define s390_vsumqg(c, v1, v2, v3) S390_VRRc(c, 0xe767, v1, v2, v3, 3, 0, 0) +#define s390_vuplb(c, v1, v2) S390_VRRa(c, 0xe7d6, v1, v2, 0, 0, 0) +#define s390_vuplhw(c, v1, v2) S390_VRRa(c, 0xe7d6, v1, v2, 1, 0, 0) +#define s390_vuplf(c, v1, v2) S390_VRRa(c, 0xe7d6, v1, v2, 2, 0, 0) +#define s390_vupllb(c, v1, v2) S390_VRRa(c, 0xe7d4, v1, v2, 0, 0, 0) +#define s390_vupllh(c, v1, v2) S390_VRRa(c, 0xe7d4, v1, v2, 1, 0, 0) +#define s390_vupllf(c, v1, v2) S390_VRRa(c, 0xe7d4, v1, v2, 2, 0, 0) +#define s390_vuplhb(c, v1, v2) S390_VRRa(c, 0xe7d5, v1, v2, 0, 0, 0) +#define s390_vuplhh(c, v1, v2) S390_VRRa(c, 0xe7d5, v1, v2, 1, 0, 0) +#define s390_vuplhf(c, v1, v2) S390_VRRa(c, 0xe7d5, v1, v2, 2, 0, 0) +#define s390_vuphb(c, v1, v2) S390_VRRa(c, 0xe7d7, v1, v2, 0, 0, 0) +#define s390_vuphh(c, v1, v2) S390_VRRa(c, 0xe7d7, v1, v2, 1, 0, 0) +#define s390_vuphf(c, v1, v2) S390_VRRa(c, 0xe7d7, v1, v2, 2, 0, 0) +#define s390_vx(c, v1, v2, v3) S390_VRRc(c, 0xe76d, v1, v2, v3, 0, 0, 0) #define s390_x(c, r, x, b, d) S390_RX(c, 0x57, r, x, b, d) #define s390_xihf(c, r, v) S390_RIL_1(c, 0xc06, r, v) #define s390_xilf(c, r, v) S390_RIL_1(c, 0xc07, r, v) diff --git a/src/mono/mono/mini/cpu-s390x.mdesc b/src/mono/mono/mini/cpu-s390x.mdesc index 8e699367ecc155..74959cb5a1dcb3 100644 --- a/src/mono/mono/mini/cpu-s390x.mdesc +++ b/src/mono/mono/mini/cpu-s390x.mdesc @@ -472,3 +472,171 @@ s390_cij: len:24 s390_cij_un: src1:i len:24 s390_cgij: len:24 s390_cgij_un: len:24 + +insert_i1: dest:x src1:x src2:i len:12 +insert_i2: dest:x src1:x src2:i len:12 +insert_i4: dest:x src1:x src2:i len:12 +insert_i8: dest:x src1:x src2:i len:12 +insert_r4: dest:x src1:x src2:f clob:1 len:12 +insert_r8: dest:x src1:x src2:f clob:1 len:12 + +extract_i1: dest:i src1:x len:6 +extract_i2: dest:i src1:x len:6 +extract_i4: dest:i src1:x len:6 +extract_i8: dest:i src1:x len:6 +extract_r4: dest:f src1:x len:12 +extract_r8: dest:f src1:x len:12 + +xextract_i1: dest:i src1:x src2:i len:6 +xextract_i2: dest:i src1:x src2:i len:6 +xextract_i4: dest:i src1:x src2:i len:6 +xextract_i8: dest:i src1:x src2:i len:6 +xextract_r4: dest:f src1:x src2:i len:10 +xextract_r8: dest:f src1:x src2:i len:10 + +expand_i1: dest:x src1:i len:12 +expand_i2: dest:x src1:i len:12 +expand_i4: dest:x src1:i len:12 +expand_i8: dest:x src1:i len:12 +expand_r4: dest:x src1:f len:18 +expand_r8: dest:x src1:f len:16 + +xones: dest:x len:6 +xmove: dest:x src1:x len:6 +xzero: dest:x len:6 +loadx_membase: dest:x src1:b len:26 +storex_membase: dest:b src1:x len:26 +s390_vab: dest:x src1:x src2:x len:6 +s390_vah: dest:x src1:x src2:x len:6 +s390_vaf: dest:x src1:x src2:x len:6 +s390_vag: dest:x src1:x src2:x len:6 +s390_vfasb: dest:x src1:x src2:x len:6 +s390_vfadb: dest:x src1:x src2:x len:6 +s390_vsb: dest:x src1:x src2:x len:6 +s390_vsh: dest:x src1:x src2:x len:6 +s390_vsf: dest:x src1:x src2:x len:6 +s390_vsg: dest:x src1:x src2:x len:6 +s390_vfssb: dest:x src1:x src2:x len:6 +s390_vfsdb: dest:x src1:x src2:x len:6 +s390_vx: dest:x src1:x src2:x len:6 +s390_vo: dest:x src1:x src2:x len:6 +s390_vno: dest:x src1:x src2:x len:6 +s390_vn: dest:x src1:x src2:x len:6 +vector_andnot: dest:x src1:x src2:x len:6 +s390_vnn: dest:x src1:x src2:x len:6 +s390_vmlb: dest:x src1:x src2:x len:6 +s390_vmlhw: dest:x src1:x src2:x len:6 +s390_vmlf: dest:x src1:x src2:x len:6 +s390_vfmsb: dest:x src1:x src2:x len:6 +s390_vfmdb: dest:x src1:x src2:x len:6 +s390_vfdsb: dest:x src1:x src2:x len:6 +s390_vfddb: dest:x src1:x src2:x len:6 +s390_vmxb: dest:x src1:x src2:x len:6 +s390_vmxh: dest:x src1:x src2:x len:6 +s390_vmxf: dest:x src1:x src2:x len:6 +s390_vmxg: dest:x src1:x src2:x len:6 +s390_vmnb: dest:x src1:x src2:x len:6 +s390_vmnh: dest:x src1:x src2:x len:6 +s390_vmnf: dest:x src1:x src2:x len:6 +s390_vmng: dest:x src1:x src2:x len:6 +s390_vmxlb: dest:x src1:x src2:x len:6 +s390_vmxlh: dest:x src1:x src2:x len:6 +s390_vmxlf: dest:x src1:x src2:x len:6 +s390_vmxlg: dest:x src1:x src2:x len:6 +s390_vfmaxsb: dest:x src1:x src2:x len:6 +s390_vfmaxdb: dest:x src1:x src2:x len:6 +s390_vmnlb: dest:x src1:x src2:x len:6 +s390_vmnlh: dest:x src1:x src2:x len:6 +s390_vmnlf: dest:x src1:x src2:x len:6 +s390_vmnlg: dest:x src1:x src2:x len:6 +s390_vfminsb: dest:x src1:x src2:x len:6 +s390_vfmindb: dest:x src1:x src2:x len:6 +s390_vsumb: dest:x src1:x src2:x len:6 +s390_vsumh: dest:x src1:x src2:x len:6 +s390_vsumqf: dest:x src1:x src2:x len:6 +s390_vsumqg: dest:x src1:x src2:x len:6 +s390_vperm: dest:x src1:x src2:x len:6 +s390_vrepib: dest:x len:6 +s390_vrepih: dest:x len:6 +s390_vrepif: dest:x len:6 +s390_vrepig: dest:x len:6 +s390_vceqbs: dest:x src1:x src2:x len:6 +s390_vceqhs: dest:x src1:x src2:x len:6 +s390_vceqfs: dest:x src1:x src2:x len:6 +s390_vceqgs: dest:x src1:x src2:x len:6 +s390_vceqb: dest:x src1:x src2:x len:6 +s390_vceqh: dest:x src1:x src2:x len:6 +s390_vceqf: dest:x src1:x src2:x len:6 +s390_vceqg: dest:x src1:x src2:x len:6 +s390_vfcesbs: dest:x src1:x src2:x len:6 +s390_vfcedbs: dest:x src1:x src2:x len:6 +s390_vfchsbs: dest:x src1:x src2:x len:6 +s390_vfchdbs: dest:x src1:x src2:x len:6 +s390_vfcesb: dest:x src1:x src2:x len:6 +s390_vfcedb: dest:x src1:x src2:x len:6 +s390_vfchsb: dest:x src1:x src2:x len:6 +s390_vfchdb: dest:x src1:x src2:x len:6 +s390_vgmb: dest:x src1:i src2:i len:6 +s390_vgmh: dest:x src1:i src2:i len:6 +s390_vgmf: dest:x src1:i src2:i len:6 +s390_vgmg: dest:x src1:i src2:i len:6 +s390_vecb: dest:x src1:x len:6 +s390_vech: dest:x src1:x len:6 +s390_vecf: dest:x src1:x len:6 +s390_vecg: dest:x src1:x len:6 +s390_veclb: dest:x src1:x len:6 +s390_veclh: dest:x src1:x len:6 +s390_veclf: dest:x src1:x len:6 +s390_veclg: dest:x src1:x len:6 +s390_vchbs: dest:x src1:x src2:x len:6 +s390_vchhs: dest:x src1:x src2:x len:6 +s390_vchfs: dest:x src1:x src2:x len:6 +s390_vchgs: dest:x src1:x src2:x len:6 +s390_vchb: dest:x src1:x src2:x len:6 +s390_vchh: dest:x src1:x src2:x len:6 +s390_vchf: dest:x src1:x src2:x len:6 +s390_vchg: dest:x src1:x src2:x len:6 +s390_vfsqsb: dest:x src1:x len:6 +s390_vfsqdb: dest:x src1:x len:6 +s390_vchlbs: dest:x src1:x src2:x len:6 +s390_vchlhs: dest:x src1:x src2:x len:6 +s390_vchlfs: dest:x src1:x src2:x len:6 +s390_vchlgs: dest:x src1:x src2:x len:6 +s390_vchlb: dest:x src1:x src2:x len:6 +s390_vchlh: dest:x src1:x src2:x len:6 +s390_vchlf: dest:x src1:x src2:x len:6 +s390_vchlg: dest:x src1:x src2:x len:6 +s390_vfchesbs: dest:x src1:x src2:x len:6 +s390_vfchedbs: dest:x src1:x src2:x len:6 +s390_vfchesb: dest:x src1:x src2:x len:6 +s390_vfchedb: dest:x src1:x src2:x len:6 +s390_vlpb: dest:x src1:x len:6 +s390_vlph: dest:x src1:x len:6 +s390_vlpf: dest:x src1:x len:6 +s390_vlpg: dest:x src1:x len:6 +s390_vflpdb: dest:x src1:x len:6 +s390_vflpsb: dest:x src1:x len:6 +s390_vflcdb: dest:x src1:x len:6 +s390_vflcsb: dest:x src1:x len:6 +s390_vpkh: dest:x src1:x src2:x len:6 +s390_vpkf: dest:x src1:x src2:x len:6 +s390_vpkg: dest:x src1:x src2:x len:6 +s390_vlcb: dest:x src1:x len:6 +s390_vlch: dest:x src1:x len:6 +s390_vlcf: dest:x src1:x len:6 +s390_vlcg: dest:x src1:x len:6 +s390_vuplb: dest:x src1:x len:6 +s390_vuplhw: dest:x src1:x len:6 +s390_vuplf: dest:x src1:x len:6 +s390_vupllb: dest:x src1:x len:6 +s390_vupllh: dest:x src1:x len:6 +s390_vupllf: dest:x src1:x len:6 +s390_vuphb: dest:x src1:x len:6 +s390_vuphh: dest:x src1:x len:6 +s390_vuphf: dest:x src1:x len:6 +s390_vuplhb: dest:x src1:x len:6 +s390_vuplhh: dest:x src1:x len:6 +s390_vuplhf: dest:x src1:x len:6 +s390_vfisb: dest:x src1:x len:6 +s390_vfidb: dest:x src1:x len:6 +xconst: dest:x len:18 diff --git a/src/mono/mono/mini/mini-ops.h b/src/mono/mono/mini/mini-ops.h index fe0f4bcdffb763..ba9431fed958fa 100644 --- a/src/mono/mono/mini/mini-ops.h +++ b/src/mono/mono/mini/mini-ops.h @@ -121,7 +121,6 @@ MINI_OP(OP_STORER8_MEMBASE_REG, "storer8_membase_reg", IREG, FREG, NONE) #if defined(TARGET_X86) || defined(TARGET_AMD64) MINI_OP(OP_STOREX_MEMBASE_REG, "storex_membase_reg", IREG, XREG, NONE) -MINI_OP(OP_STOREX_ALIGNED_MEMBASE_REG, "storex_aligned_membase_reg", IREG, XREG, NONE) MINI_OP(OP_STOREX_NTA_MEMBASE_REG, "storex_nta_membase_reg", IREG, XREG, NONE) #endif @@ -149,8 +148,9 @@ MINI_OP(OP_LOADR8_MEMBASE,"loadr8_membase", FREG, IREG, NONE) /* klass must be set to a simd class */ MINI_OP(OP_LOADX_MEMBASE, "loadx_membase", XREG, IREG, NONE) -#if defined(TARGET_X86) || defined(TARGET_AMD64) +#if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_S390X) MINI_OP(OP_LOADX_ALIGNED_MEMBASE, "loadx_aligned_membase", XREG, IREG, NONE) +MINI_OP(OP_STOREX_ALIGNED_MEMBASE_REG, "storex_aligned_membase_reg", IREG, XREG, NONE) #endif MINI_OP(OP_LOADV_MEMBASE, "loadv_membase", VREG, IREG, NONE) @@ -1049,8 +1049,6 @@ MINI_OP(OP_CVTPS2PD, "cvtps2pd", XREG, XREG, NONE) MINI_OP(OP_CVTTPD2DQ, "cvttpd2dq", XREG, XREG, NONE) MINI_OP(OP_CVTTPS2DQ, "cvttps2dq", XREG, XREG, NONE) -MINI_OP(OP_VECTOR_IABS, "vector_integer_abs", XREG, XREG, NONE) -MINI_OP(OP_VECTOR_ANDN, "vector_andnot", XREG, XREG, XREG) /* sse 1 */ /* inst_c1 is target type */ @@ -1498,6 +1496,139 @@ MINI_OP(OP_S390_CIJ, "s390_cij", IREG, NONE, NONE) MINI_OP(OP_S390_CLIJ, "s390_cij_un", IREG, IREG, NONE) MINI_OP(OP_S390_CGIJ, "s390_cgij", LREG, NONE, NONE) MINI_OP(OP_S390_CLGIJ, "s390_cgij_un", LREG, NONE, NONE) +MINI_OP(OP_S390_VAB, "s390_vab", XREG, XREG, XREG) +MINI_OP(OP_S390_VAH, "s390_vah", XREG, XREG, XREG) +MINI_OP(OP_S390_VAF, "s390_vaf", XREG, XREG, XREG) +MINI_OP(OP_S390_VAG, "s390_vag", XREG, XREG, XREG) +MINI_OP(OP_S390_VFASB, "s390_vfasb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFADB, "s390_vfadb", XREG, XREG, XREG) +MINI_OP(OP_S390_VSB, "s390_vsb", XREG, XREG, XREG) +MINI_OP(OP_S390_VSH, "s390_vsh", XREG, XREG, XREG) +MINI_OP(OP_S390_VSF, "s390_vsf", XREG, XREG, XREG) +MINI_OP(OP_S390_VSG, "s390_vsg", XREG, XREG, XREG) +MINI_OP(OP_S390_VFSSB, "s390_vfssb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFSDB, "s390_vfsdb", XREG, XREG, XREG) +MINI_OP(OP_S390_VMLB, "s390_vmlb", XREG, XREG, XREG) +MINI_OP(OP_S390_VMLHW, "s390_vmlhw", XREG, XREG, XREG) +MINI_OP(OP_S390_VMLF, "s390_vmlf", XREG, XREG, XREG) +MINI_OP(OP_S390_VFMSB, "s390_vfmsb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFMDB, "s390_vfmdb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFDSB, "s390_vfdsb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFDDB, "s390_vfddb", XREG, XREG, XREG) +MINI_OP(OP_S390_VMXB, "s390_vmxb", XREG, XREG, XREG) +MINI_OP(OP_S390_VMXH, "s390_vmxh", XREG, XREG, XREG) +MINI_OP(OP_S390_VMXF, "s390_vmxf", XREG, XREG, XREG) +MINI_OP(OP_S390_VMXG, "s390_vmxg", XREG, XREG, XREG) +MINI_OP(OP_S390_VFMAXSB, "s390_vfmaxsb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFMAXDB, "s390_vfmaxdb", XREG, XREG, XREG) +MINI_OP(OP_S390_VMNB, "s390_vmnb", XREG, XREG, XREG) +MINI_OP(OP_S390_VMNH, "s390_vmnh", XREG, XREG, XREG) +MINI_OP(OP_S390_VMNF, "s390_vmnf", XREG, XREG, XREG) +MINI_OP(OP_S390_VMNG, "s390_vmng", XREG, XREG, XREG) +MINI_OP(OP_S390_VFMINSB, "s390_vfminsb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFMINDB, "s390_vfmindb", XREG, XREG, XREG) +MINI_OP(OP_S390_VMXLB, "s390_vmxlb", XREG, XREG, XREG) +MINI_OP(OP_S390_VMXLH, "s390_vmxlh", XREG, XREG, XREG) +MINI_OP(OP_S390_VMXLF, "s390_vmxlf", XREG, XREG, XREG) +MINI_OP(OP_S390_VMXLG, "s390_vmxlg", XREG, XREG, XREG) +MINI_OP(OP_S390_VMNLB, "s390_vmnlb", XREG, XREG, XREG) +MINI_OP(OP_S390_VMNLH, "s390_vmnlh", XREG, XREG, XREG) +MINI_OP(OP_S390_VMNLF, "s390_vmnlf", XREG, XREG, XREG) +MINI_OP(OP_S390_VMNLG, "s390_vmnlg", XREG, XREG, XREG) +MINI_OP(OP_S390_VO, "s390_vo", XREG, XREG, XREG) +MINI_OP(OP_S390_VNO, "s390_vno", XREG, XREG, XREG) +MINI_OP(OP_S390_VX, "s390_vx", XREG, XREG, XREG) +MINI_OP(OP_S390_VN, "s390_vn", XREG, XREG, XREG) +MINI_OP(OP_S390_VNN, "s390_vnn", XREG, XREG, XREG) +MINI_OP(OP_S390_VSUMB, "s390_vsumb", XREG, XREG, XREG) +MINI_OP(OP_S390_VSUMH, "s390_vsumh", XREG, XREG, XREG) +MINI_OP(OP_S390_VSUMQF, "s390_vsumqf", XREG, XREG, XREG) +MINI_OP(OP_S390_VSUMQG, "s390_vsumqg", XREG, XREG, XREG) +MINI_OP(OP_S390_VPERM, "s390_vperm", XREG, XREG, XREG) +MINI_OP(OP_S390_VREPIB, "s390_vrepib", XREG, NONE, NONE) +MINI_OP(OP_S390_VREPIH, "s390_vrepih", XREG, NONE, NONE) +MINI_OP(OP_S390_VREPIF, "s390_vrepif", XREG, NONE, NONE) +MINI_OP(OP_S390_VREPIG, "s390_vrepig", XREG, NONE, NONE) +MINI_OP(OP_S390_VFSQSB, "s390_vfsqsb", XREG, XREG, NONE) +MINI_OP(OP_S390_VFSQDB, "s390_vfsqdb", XREG, XREG, NONE) +MINI_OP(OP_S390_VFCESBS, "s390_vfcesbs", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCEDBS, "s390_vfcedbs", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCHSBS, "s390_vfchsbs", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCHDBS, "s390_vfchdbs", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCESB, "s390_vfcesb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCEDB, "s390_vfcedb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCHSB, "s390_vfchsb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCHDB, "s390_vfchdb", XREG, XREG, XREG) +MINI_OP(OP_S390_VCEQBS, "s390_vceqbs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCEQHS, "s390_vceqhs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCEQFS, "s390_vceqfs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCEQGS, "s390_vceqgs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCEQB, "s390_vceqb", XREG, XREG, XREG) +MINI_OP(OP_S390_VCEQH, "s390_vceqh", XREG, XREG, XREG) +MINI_OP(OP_S390_VCEQF, "s390_vceqf", XREG, XREG, XREG) +MINI_OP(OP_S390_VCEQG, "s390_vceqg", XREG, XREG, XREG) +MINI_OP(OP_S390_VGMB, "s390_vgmb", XREG, IREG, IREG) +MINI_OP(OP_S390_VGMH, "s390_vgmh", XREG, IREG, IREG) +MINI_OP(OP_S390_VGMF, "s390_vgmf", XREG, IREG, IREG) +MINI_OP(OP_S390_VGMG, "s390_vgmg", XREG, IREG, IREG) +MINI_OP(OP_S390_VECB, "s390_vecb", XREG, XREG, NONE) +MINI_OP(OP_S390_VECF, "s390_vecf", XREG, XREG, NONE) +MINI_OP(OP_S390_VECH, "s390_vech", XREG, XREG, NONE) +MINI_OP(OP_S390_VECG, "s390_vecg", XREG, XREG, NONE) +MINI_OP(OP_S390_VECLB, "s390_veclb", XREG, XREG, NONE) +MINI_OP(OP_S390_VECLF, "s390_veclf", XREG, XREG, NONE) +MINI_OP(OP_S390_VECLH, "s390_veclh", XREG, XREG, NONE) +MINI_OP(OP_S390_VECLG, "s390_veclg", XREG, XREG, NONE) +MINI_OP(OP_S390_VCHBS, "s390_vchbs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHHS, "s390_vchhs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHFS, "s390_vchfs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHGS, "s390_vchgs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHB, "s390_vchb", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHH, "s390_vchh", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHF, "s390_vchf", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHG, "s390_vchg", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHLBS, "s390_vchlbs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHLHS, "s390_vchlhs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHLFS, "s390_vchlfs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHLGS, "s390_vchlgs", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHLB, "s390_vchlb", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHLH, "s390_vchlh", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHLF, "s390_vchlf", XREG, XREG, XREG) +MINI_OP(OP_S390_VCHLG, "s390_vchlg", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCHESBS, "s390_vfchesbs", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCHEDBS, "s390_vfchedbs", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCHESB, "s390_vfchesb", XREG, XREG, XREG) +MINI_OP(OP_S390_VFCHEDB, "s390_vfchedb", XREG, XREG, XREG) +MINI_OP(OP_S390_VLPB, "s390_vlpb", XREG, XREG, NONE) +MINI_OP(OP_S390_VLPH, "s390_vlph", XREG, XREG, NONE) +MINI_OP(OP_S390_VLPF, "s390_vlpf", XREG, XREG, NONE) +MINI_OP(OP_S390_VLPG, "s390_vlpg", XREG, XREG, NONE) +MINI_OP(OP_S390_VFLPDB, "s390_vflpdb", XREG, XREG, NONE) +MINI_OP(OP_S390_VFLPSB, "s390_vflpsb", XREG, XREG, NONE) +MINI_OP(OP_S390_VFLCDB, "s390_vflcdb", XREG, XREG, NONE) +MINI_OP(OP_S390_VFLCSB, "s390_vflcsb", XREG, XREG, NONE) +MINI_OP(OP_S390_VPKH, "s390_vpkh", XREG, XREG, XREG) +MINI_OP(OP_S390_VPKF, "s390_vpkf", XREG, XREG, XREG) +MINI_OP(OP_S390_VPKG, "s390_vpkg", XREG, XREG, XREG) +MINI_OP(OP_S390_VLCB, "s390_vlcb", XREG, XREG, NONE) +MINI_OP(OP_S390_VLCH, "s390_vlch", XREG, XREG, NONE) +MINI_OP(OP_S390_VLCF, "s390_vlcf", XREG, XREG, NONE) +MINI_OP(OP_S390_VLCG, "s390_vlcg", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPHB, "s390_vuphb", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPHH, "s390_vuphh", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPHF, "s390_vuphf", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPLB, "s390_vuplb", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPLHW, "s390_vuplhw", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPLF, "s390_vuplf", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPLHB, "s390_vuplhb", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPLHH, "s390_vuplhh", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPLHF, "s390_vuplhf", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPLLB, "s390_vupllb", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPLLH, "s390_vupllh", XREG, XREG, NONE) +MINI_OP(OP_S390_VUPLLF, "s390_vupllf", XREG, XREG, NONE) +MINI_OP(OP_S390_VFISB, "s390_vfidb", XREG, XREG, NONE) +MINI_OP(OP_S390_VFIDB, "s390_vfisb", XREG, XREG, NONE) +MINI_OP(OP_S390_XCOMPARE_XEXTRACT, "s390_xcompare_xextract", IREG, XREG, XREG) #endif #if defined(TARGET_ARM64) @@ -1862,7 +1993,6 @@ MINI_OP(OP_SIMD_LOAD_SCALAR_R8, "simd_load_scalar_r8", XREG, IREG, NONE) MINI_OP(OP_SIMD_STORE, "simd_store", NONE, XREG, XREG) #if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) -MINI_OP(OP_ONES_COMPLEMENT, "ones_complement", XREG, XREG, NONE) MINI_OP(OP_CVT_FP_UI, "convert_fp_to_ui", XREG, XREG, NONE) MINI_OP(OP_CVT_FP_SI, "convert_fp_to_si", XREG, XREG, NONE) MINI_OP(OP_CVT_FP_UI_SCALAR, "convert_fp_to_ui_scalar", XREG, XREG, NONE) @@ -1872,11 +2002,17 @@ MINI_OP(OP_CVT_SI_FP, "convert_si_to_fp", XREG, XREG, NONE) MINI_OP(OP_CVT_UI_FP_SCALAR, "convert_ui_to_fp_scalar", XREG, XREG, NONE) MINI_OP(OP_CVT_SI_FP_SCALAR, "convert_si_to_fp_scalar", XREG, XREG, NONE) /* inst_c1 is one of the MONO_TYPE_ constants */ -MINI_OP(OP_NEGATION, "negate", XREG, XREG, NONE) MINI_OP(OP_NEGATION_SCALAR, "negate_scalar", XREG, XREG, NONE) +#endif // TARGET_ARM64 || TARGET_AMD64 || TARGET_WASM + +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) || defined(TARGET_S390X) +MINI_OP(OP_NEGATION, "negate", XREG, XREG, NONE) +MINI_OP(OP_ONES_COMPLEMENT, "ones_complement", XREG, XREG, NONE) /* Select bits from src2/src3 using src1 */ MINI_OP3(OP_BSL, "bitwise_select", XREG, XREG, XREG, XREG) -#endif // TARGET_ARM64 || TARGET_AMD64 || TARGET_WASM +MINI_OP(OP_VECTOR_ANDN, "vector_andnot", XREG, XREG, XREG) +MINI_OP(OP_VECTOR_IABS, "vector_integer_abs", XREG, XREG, NONE) +#endif #if defined(TARGET_RISCV64) || defined(TARGET_RISCV32) MINI_OP(OP_RISCV_EXC_BEQ, "riscv_exc_beq", NONE, IREG, IREG) diff --git a/src/mono/mono/mini/mini-runtime.c b/src/mono/mono/mini/mini-runtime.c index 55645dd51efb46..ddb1bfb6201bc2 100644 --- a/src/mono/mono/mini/mini-runtime.c +++ b/src/mono/mono/mini/mini-runtime.c @@ -4496,7 +4496,7 @@ init_class (MonoClass *klass) } #endif -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_S390X) if (!strcmp (m_class_get_name_space (klass), "System.Numerics")) { if (!strcmp (name, "Vector2") || !strcmp (name, "Vector3") ||!strcmp (name, "Vector4") || !strcmp (name, "Quaternion") || !strcmp (name, "Plane")) mono_class_set_is_simd_type (klass, TRUE); diff --git a/src/mono/mono/mini/mini-s390x.c b/src/mono/mono/mini/mini-s390x.c index 3419a29768c707..04c87f37e7eaf1 100644 --- a/src/mono/mono/mini/mini-s390x.c +++ b/src/mono/mono/mini/mini-s390x.c @@ -19,6 +19,17 @@ #define MAX_ARCH_DELEGATE_PARAMS 10 +#define NEW_SIMD_INS(cfg,ins,dest,op,d,s1,s2) do { \ + MONO_INST_NEW ((cfg), (dest), (op)); \ + (dest)->cil_code = (ins)->cil_code; \ + (dest)->dreg = d; \ + (dest)->sreg1 = s1; \ + (dest)->sreg2 = s2; \ + (dest)->type = STACK_VTYPE; \ + (dest)->klass = ins->klass; \ + mono_bblock_insert_before_ins (bb, ins, (dest)); \ + } while (0) + #define EMIT_COND_BRANCH(ins,cond) \ { \ if (ins->inst_true_bb->native_offset) { \ @@ -2177,6 +2188,441 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb) /*========================= End of Function ========================*/ +static int +simd_type_to_sub_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_S390_VSB; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_S390_VSH; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_S390_VSF; + case MONO_TYPE_I8: + case MONO_TYPE_U8: + case MONO_TYPE_I: + case MONO_TYPE_U: + return OP_S390_VSG; + case MONO_TYPE_R4: + return OP_S390_VFSSB; + case MONO_TYPE_R8: + return OP_S390_VFSDB; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_add_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_S390_VAB; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_S390_VAH; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_S390_VAF; + case MONO_TYPE_I8: + case MONO_TYPE_U8: + case MONO_TYPE_I: + case MONO_TYPE_U: + return OP_S390_VAG; + case MONO_TYPE_R4: + return OP_S390_VFASB; + case MONO_TYPE_R8: + return OP_S390_VFADB; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_mul_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_S390_VMLB; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_S390_VMLHW; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_S390_VMLF; + case MONO_TYPE_R4: + return OP_S390_VFMSB; + case MONO_TYPE_R8: + return OP_S390_VFMDB; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_max_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + return OP_S390_VMXB; + case MONO_TYPE_U1: + return OP_S390_VMXLB; + case MONO_TYPE_I2: + return OP_S390_VMXH; + case MONO_TYPE_U2: + return OP_S390_VMXLH; + case MONO_TYPE_I4: + return OP_S390_VMXF; + case MONO_TYPE_U4: + return OP_S390_VMXLF; + case MONO_TYPE_I8: + case MONO_TYPE_I: + return OP_S390_VMXG; + case MONO_TYPE_U8: + case MONO_TYPE_U: + return OP_S390_VMXLG; + case MONO_TYPE_R4: + return OP_S390_VFMAXSB; + case MONO_TYPE_R8: + return OP_S390_VFMAXDB; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_min_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + return OP_S390_VMNB; + case MONO_TYPE_U1: + return OP_S390_VMNLB; + case MONO_TYPE_I2: + return OP_S390_VMNH; + case MONO_TYPE_U2: + return OP_S390_VMNLH; + case MONO_TYPE_I4: + return OP_S390_VMNF; + case MONO_TYPE_U4: + return OP_S390_VMNLF; + case MONO_TYPE_I8: + case MONO_TYPE_I: + return OP_S390_VMNG; + case MONO_TYPE_U8: + case MONO_TYPE_U: + return OP_S390_VMNLG; + case MONO_TYPE_R4: + return OP_S390_VFMINSB; + case MONO_TYPE_R8: + return OP_S390_VFMINDB; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_comp_any_all_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_S390_VCEQBS; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_S390_VCEQHS; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_S390_VCEQFS; + case MONO_TYPE_I: + case MONO_TYPE_U: + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return OP_S390_VCEQGS; + case MONO_TYPE_R4: + return OP_S390_VFCESBS; + case MONO_TYPE_R8: + return OP_S390_VFCEDBS; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_comp_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_S390_VCEQB; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_S390_VCEQH; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_S390_VCEQF; + case MONO_TYPE_I: + case MONO_TYPE_U: + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return OP_S390_VCEQG; + case MONO_TYPE_R4: + return OP_S390_VFCESB; + case MONO_TYPE_R8: + return OP_S390_VFCEDB; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_gt_any_all_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + return OP_S390_VCHBS; + case MONO_TYPE_U1: + return OP_S390_VCHLBS; + case MONO_TYPE_I2: + return OP_S390_VCHHS; + case MONO_TYPE_U2: + return OP_S390_VCHLHS; + case MONO_TYPE_I4: + return OP_S390_VCHFS; + case MONO_TYPE_U4: + return OP_S390_VCHLFS; + case MONO_TYPE_I: + case MONO_TYPE_I8: + return OP_S390_VCHGS; + case MONO_TYPE_U: + case MONO_TYPE_U8: + return OP_S390_VCHLGS; + case MONO_TYPE_R4: + return OP_S390_VFCHSBS; + case MONO_TYPE_R8: + return OP_S390_VFCHDBS; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_gt_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + return OP_S390_VCHB; + case MONO_TYPE_U1: + return OP_S390_VCHLB; + case MONO_TYPE_I2: + return OP_S390_VCHH; + case MONO_TYPE_U2: + return OP_S390_VCHLH; + case MONO_TYPE_I4: + return OP_S390_VCHF; + case MONO_TYPE_U4: + return OP_S390_VCHLF; + case MONO_TYPE_I: + case MONO_TYPE_I8: + return OP_S390_VCHG; + case MONO_TYPE_U: + case MONO_TYPE_U8: + return OP_S390_VCHLG; + case MONO_TYPE_R4: + return OP_S390_VFCHSB; + case MONO_TYPE_R8: + return OP_S390_VFCHDB; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_ge_fp_any_all_op (int t) +{ + switch(t) { + case MONO_TYPE_R4: + return OP_S390_VFCHESBS; + case MONO_TYPE_R8: + return OP_S390_VFCHEDBS; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_ge_fp_op (int t) +{ + switch(t) { + case MONO_TYPE_R4: + return OP_S390_VFCHESB; + case MONO_TYPE_R8: + return OP_S390_VFCHEDB; + default: + g_assert_not_reached (); + return -1; + } +} + + +static int +simd_type_to_extract_int_op (int t, int q) +{ + switch (t){ + case SIMD_EXTR_ARE_ALL_SET:{ + switch (q){ + case CMP_LT: + case CMP_GT: + case CMP_GT_UN: + case CMP_LT_UN: + case CMP_EQ: + return OP_CEQ; + case CMP_GE: + case CMP_LE: + case CMP_GE_UN: + case CMP_LE_UN: + return OP_ICGT_UN; + default: + g_assert_not_reached(); + return -1; + } + } + case SIMD_EXTR_IS_ANY_SET:{ + switch (q){ + case CMP_GT: + case CMP_LT: + case CMP_GT_UN: + case CMP_LT_UN: + case CMP_EQ: + return OP_ICLE; + case CMP_GE: + case CMP_LE: + case CMP_GE_UN: + case CMP_LE_UN: + return OP_ICNEQ; + default: + g_assert_not_reached(); + return -1; + } + } + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_extract_fp_op (int t, int q) +{ + switch (t){ + case SIMD_EXTR_ARE_ALL_SET:{ + switch (q){ + case CMP_LT: + case CMP_GT: + case CMP_GT_UN: + case CMP_LT_UN: + case CMP_EQ: + case CMP_GE: + case CMP_LE: + case CMP_GE_UN: + case CMP_LE_UN: + return OP_ICEQ; + default: + g_assert_not_reached(); + return -1; + } + } + case SIMD_EXTR_IS_ANY_SET:{ + switch (q){ + case CMP_GT: + case CMP_LT: + case CMP_GT_UN: + case CMP_LT_UN: + case CMP_EQ: + case CMP_GE: + case CMP_LE: + case CMP_GE_UN: + case CMP_LE_UN: + return OP_ICLE; + default: + g_assert_not_reached(); + return -1; + } + } + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_abs_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_S390_VLPB; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_S390_VLPH; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_S390_VLPF; + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return OP_S390_VLPG; + default: + g_assert_not_reached (); + return -1; + } +} + +static int +simd_type_to_negate_op (int t) +{ + switch (t) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return OP_S390_VLCB; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return OP_S390_VLCH; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return OP_S390_VLCF; + case MONO_TYPE_R4: + return OP_S390_VFLCSB; + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return OP_S390_VLCG; + case MONO_TYPE_R8: + return OP_S390_VFLCDB; + default: + g_assert_not_reached (); + return -1; + } +} + +static bool +type_is_float (int t){ + return (t == MONO_TYPE_R4 || t == MONO_TYPE_R8); +} + /** * * @brief Architecture-specific lowering pass processing @@ -2190,7 +2636,8 @@ mono_arch_peephole_pass_2 (MonoCompile *cfg, MonoBasicBlock *bb) void mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb) { - MonoInst *ins, *next; + MonoInst *ins, *next, *temp_ins; + int temp; MONO_BB_FOR_EACH_INS_SAFE (bb, next, ins) { switch (ins->opcode) { @@ -2212,6 +2659,204 @@ mono_arch_lowering_pass (MonoCompile *cfg, MonoBasicBlock *bb) /* This is created by the memcpy code which ignores is_inst_imm */ mono_decompose_op_imm (cfg, bb, ins); break; + case OP_XBINOP:{ + switch(ins->inst_c0){ + case OP_IADD: + ins->opcode = GINT_TO_OPCODE (simd_type_to_add_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_ISUB: + ins->opcode = GINT_TO_OPCODE (simd_type_to_sub_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_IMUL: + ins->opcode = GINT_TO_OPCODE (simd_type_to_mul_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_IMAX_UN: + case OP_IMAX: + ins->opcode = GINT_TO_OPCODE (simd_type_to_max_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_IMIN_UN: + case OP_IMIN: + ins->opcode = GINT_TO_OPCODE (simd_type_to_min_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_FADD: + ins->opcode = GINT_TO_OPCODE (simd_type_to_add_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_FSUB: + ins->opcode = GINT_TO_OPCODE (simd_type_to_sub_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_FMUL: + ins->opcode = GINT_TO_OPCODE (simd_type_to_mul_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_FDIV: + ins->opcode = ins->inst_c1 == MONO_TYPE_R4 ? OP_S390_VFDSB : OP_S390_VFDDB; + break; + case OP_FMIN: + ins->opcode = GINT_TO_OPCODE (simd_type_to_min_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_FMAX: + ins->opcode = GINT_TO_OPCODE (simd_type_to_max_op (GTMREG_TO_INT (ins->inst_c1))); + break; + default: + g_assert_not_reached (); + break; + } + break; + } + case OP_XBINOP_FORCEINT:{ + switch (ins->inst_c0) { + case XBINOP_FORCEINT_AND: + ins->opcode = OP_S390_VN; + break; + case XBINOP_FORCEINT_OR: + ins->opcode = OP_S390_VO; + break; + case XBINOP_FORCEINT_XOR: + ins->opcode = OP_S390_VX; + break; + default: + g_assert_not_reached (); + break; + } + break; + } + case OP_XCAST:{ + ins->opcode = OP_XMOVE; + break; + } + case OP_XCOMPARE_FP:{ + switch (ins->inst_c0){ + case CMP_EQ: + ins->opcode = GINT_TO_OPCODE (simd_type_to_comp_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case CMP_LT_UN: + case CMP_LT: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_GT_UN: + case CMP_GT: + ins->opcode = GINT_TO_OPCODE (simd_type_to_gt_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case CMP_LE_UN: + case CMP_LE: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_GE_UN: + case CMP_GE: + ins->opcode = GINT_TO_OPCODE (simd_type_to_ge_fp_op (GTMREG_TO_INT (ins->inst_c1))); + break; + default: + g_assert_not_reached (); + break; + } + break; + } + case OP_XCOMPARE:{ + switch (ins->inst_c0){ + case CMP_EQ: + ins->opcode = GINT_TO_OPCODE (simd_type_to_comp_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case CMP_LT: + case CMP_LT_UN: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_GT: + case CMP_GT_UN: + ins->opcode = GINT_TO_OPCODE (simd_type_to_gt_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case CMP_GE: + case CMP_GE_UN: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_LE: + case CMP_LE_UN:{ + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_gt_op (GTMREG_TO_INT (ins->inst_c1))), ins->dreg, ins->sreg1, ins->sreg2); + NEW_SIMD_INS (cfg, ins, temp_ins, OP_S390_VNO, ins->dreg, ins->dreg, ins->dreg); + NULLIFY_INS (ins); + break; + } + default: + g_assert_not_reached (); + break; + } + break; + } + case OP_S390_XCOMPARE_XEXTRACT:{ + guint32 temp_reg = alloc_ireg(cfg); + if (!type_is_float(GTMREG_TO_INT(ins->inst_c1))){ + switch (ins->inst_c0 >> 4){ + case CMP_EQ: + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_comp_any_all_op (GTMREG_TO_INT (ins->inst_c1))), temp_reg, ins->sreg1, ins->sreg2); + break; + case CMP_LT: + case CMP_LT_UN: + case CMP_GE: + case CMP_GE_UN: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_GT: + case CMP_GT_UN: + case CMP_LE: + case CMP_LE_UN: + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_gt_any_all_op (GTMREG_TO_INT (ins->inst_c1))), temp_reg, ins->sreg1, ins->sreg2); + break; + default: + g_assert_not_reached (); + break; + } + } + else { + switch (ins->inst_c0 >> 4){ + case CMP_EQ: + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_comp_any_all_op (GTMREG_TO_INT (ins->inst_c1))), temp_reg, ins->sreg1, ins->sreg2); + break; + case CMP_LT_UN: + case CMP_LT: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_GT_UN: + case CMP_GT: + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_gt_any_all_op (GTMREG_TO_INT (ins->inst_c1))), temp_reg, ins->sreg1, ins->sreg2); + break; + case CMP_LE_UN: + case CMP_LE: + temp = ins->sreg1; + ins->sreg1 = ins->sreg2; + ins->sreg2 = temp; + case CMP_GE_UN: + case CMP_GE: + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_ge_fp_any_all_op (GTMREG_TO_INT (ins->inst_c1))), temp_reg, ins->sreg1, ins->sreg2); + break; + default: + g_assert_not_reached (); + break; + } + } + if(!type_is_float(GTMREG_TO_INT(ins->inst_c1))){ + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_extract_int_op (GTMREG_TO_INT (ins->inst_c0 & 0x0f), GTMREG_TO_INT (ins->inst_c0 >> 4))), ins->dreg, -1, -1); + NULLIFY_INS(ins); + } + else { + NEW_SIMD_INS (cfg, ins, temp_ins, GINT_TO_OPCODE (simd_type_to_extract_fp_op (GTMREG_TO_INT (ins->inst_c0 & 0x0f), GTMREG_TO_INT (ins->inst_c0 >> 4))), ins->dreg, -1, -1); + NULLIFY_INS(ins); + } + } + break; + case OP_VECTOR_IABS: + ins->opcode = GINT_TO_OPCODE (simd_type_to_abs_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_NEGATION: + ins->opcode = GINT_TO_OPCODE (simd_type_to_negate_op (GTMREG_TO_INT (ins->inst_c1))); + break; + case OP_ONES_COMPLEMENT: + ins->opcode = OP_S390_VNO; + ins->sreg2 = ins->sreg1; + break; default: break; } @@ -4834,605 +5479,520 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) bb->spill_slot_defs = g_slist_prepend_mempool (cfg->mempool, bb->spill_slot_defs, ins); break; #ifdef MONO_ARCH_SIMD_INTRINSICS - case OP_ADDPS: - s390x_addps (code, ins->sreg1, ins->sreg2); + case OP_XCONST: + S390_SET (code, s390_r13, ins->inst_p0); + s390_vl(code, ins->dreg, 0, s390_r13, 0); break; - case OP_DIVPS: - s390x_divps (code, ins->sreg1, ins->sreg2); + /* TO-DO: provide an alignment hint for the vector loads and stores*/ + case OP_LOADX_ALIGNED_MEMBASE: + case OP_LOADX_MEMBASE: + S390_LONG_VEC(code, vl, vl, ins->dreg, ins->inst_offset, 0, ins->inst_basereg); break; - case OP_MULPS: - s390x_mulps (code, ins->sreg1, ins->sreg2); + case OP_STOREX_ALIGNED_MEMBASE_REG: + case OP_STOREX_MEMBASE: + S390_LONG_VEC(code, vst, vst, ins->sreg1, ins->inst_offset,0, ins->inst_destbasereg); break; - case OP_SUBPS: - s390x_subps (code, ins->sreg1, ins->sreg2); + case OP_S390_VN: + s390_vn (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_MAXPS: - s390x_maxps (code, ins->sreg1, ins->sreg2); + case OP_S390_VNN: + s390_vnn (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_MINPS: - s390x_minps (code, ins->sreg1, ins->sreg2); + case OP_VECTOR_ANDN: + s390_vnc (code, ins->dreg, ins->sreg2, ins->sreg1); break; - case OP_COMPPS: - g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7); - s390x_cmpps_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0); + case OP_S390_VO: + s390_vo (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_ANDPS: - s390x_andps (code, ins->sreg1, ins->sreg2); + case OP_S390_VNO: + s390_vno (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_ANDNPS: - s390x_andnps (code, ins->sreg1, ins->sreg2); + case OP_S390_VX: + s390_vx (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_ORPS: - s390x_orps (code, ins->sreg1, ins->sreg2); + case OP_S390_VAB: + s390_vab (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_XORPS: - s390x_xorps (code, ins->sreg1, ins->sreg2); + case OP_S390_VAH: + s390_vah (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_SQRTPS: - s390x_sqrtps (code, ins->dreg, ins->sreg1); + case OP_S390_VAF: + s390_vaf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_RSQRTPS: - s390x_rsqrtps (code, ins->dreg, ins->sreg1); + case OP_S390_VAG: + s390_vag (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_RCPPS: - s390x_rcpps (code, ins->dreg, ins->sreg1); + case OP_S390_VFASB: + s390_vfasb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_ADDSUBPS: - s390x_addsubps (code, ins->sreg1, ins->sreg2); + case OP_S390_VFADB: + s390_vfadb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_HADDPS: - s390x_haddps (code, ins->sreg1, ins->sreg2); + case OP_S390_VSB: + s390_vsb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_HSUBPS: - s390x_hsubps (code, ins->sreg1, ins->sreg2); + case OP_S390_VSH: + s390_vsh (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_DUPPS_HIGH: - s390x_movshdup (code, ins->dreg, ins->sreg1); + case OP_S390_VSF: + s390_vsf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_DUPPS_LOW: - s390x_movsldup (code, ins->dreg, ins->sreg1); + case OP_S390_VSG: + s390_vsg (code, ins->dreg, ins->sreg1, ins->sreg2); break; - - case OP_PSHUFLEW_HIGH: - g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF); - s390x_pshufhw_imm (code, ins->dreg, ins->sreg1, ins->inst_c0); + case OP_S390_VFSSB: + s390_vfssb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PSHUFLEW_LOW: - g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF); - s390x_pshuflw_imm (code, ins->dreg, ins->sreg1, ins->inst_c0); + case OP_S390_VFSDB: + s390_vfsdb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PSHUFLED: - g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF); - s390x_pshufd_imm (code, ins->dreg, ins->sreg1, ins->inst_c0); + case OP_S390_VMLB: + s390_vmlb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_SHUFPS: - g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0xFF); - s390x_shufps_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0); + case OP_S390_VMLHW: + s390_vmlhw (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_SHUFPD: - g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 0x3); - s390x_shufpd_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0); + case OP_S390_VMLF: + s390_vmlf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - - case OP_ADDPD: - s390x_addpd (code, ins->sreg1, ins->sreg2); + case OP_S390_VFMSB: + s390_vfmsb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_DIVPD: - s390x_divpd (code, ins->sreg1, ins->sreg2); + case OP_S390_VFMDB: + s390_vfmdb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_MULPD: - s390x_mulpd (code, ins->sreg1, ins->sreg2); + case OP_S390_VFDSB: + s390_vfdsb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_SUBPD: - s390x_subpd (code, ins->sreg1, ins->sreg2); + case OP_S390_VFDDB: + s390_vfddb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_MAXPD: - s390x_maxpd (code, ins->sreg1, ins->sreg2); + case OP_S390_VSUMB: + s390_vsumb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_MINPD: - s390x_minpd (code, ins->sreg1, ins->sreg2); + case OP_S390_VSUMH: + s390_vsumh (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_COMPPD: - g_assert (ins->inst_c0 >= 0 && ins->inst_c0 <= 7); - s390x_cmppd_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0); + case OP_S390_VSUMQF: + s390_vsumqf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_ANDPD: - s390x_andpd (code, ins->sreg1, ins->sreg2); + case OP_S390_VSUMQG: + s390_vsumqg (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_ANDNPD: - s390x_andnpd (code, ins->sreg1, ins->sreg2); + case OP_S390_VMXB: + s390_vmxb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_ORPD: - s390x_orpd (code, ins->sreg1, ins->sreg2); + case OP_S390_VMXH: + s390_vmxh (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_XORPD: - s390x_xorpd (code, ins->sreg1, ins->sreg2); + case OP_S390_VMXF: + s390_vmxf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_SQRTPD: - s390x_sqrtpd (code, ins->dreg, ins->sreg1); + case OP_S390_VMXG: + s390_vmxg (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_ADDSUBPD: - s390x_addsubpd (code, ins->sreg1, ins->sreg2); + case OP_S390_VFMAXSB: + /* The max function used here is Java Math.Max() */ + s390_vfmaxsb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); break; - case OP_HADDPD: - s390x_haddpd (code, ins->sreg1, ins->sreg2); + case OP_S390_VFMAXDB: + s390_vfmaxdb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); break; - case OP_HSUBPD: - s390x_hsubpd (code, ins->sreg1, ins->sreg2); + case OP_S390_VMXLB: + s390_vmxlb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_DUPPD: - s390x_movddup (code, ins->dreg, ins->sreg1); + case OP_S390_VMXLH: + s390_vmxlh (code, ins->dreg, ins->sreg1, ins->sreg2); break; - - case OP_EXTRACT_MASK: - s390x_pmovmskb (code, ins->dreg, ins->sreg1); + case OP_S390_VMXLF: + s390_vmxlf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - - case OP_PAND: - s390x_pand (code, ins->sreg1, ins->sreg2); + case OP_S390_VMXLG: + s390_vmxlg (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_POR: - s390x_por (code, ins->sreg1, ins->sreg2); + case OP_S390_VMNB: + s390_vmnb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PXOR: - s390x_pxor (code, ins->sreg1, ins->sreg2); + case OP_S390_VMNH: + s390_vmnh (code, ins->dreg, ins->sreg1, ins->sreg2); break; - - case OP_PADDB: - s390x_paddb (code, ins->sreg1, ins->sreg2); + case OP_S390_VMNF: + s390_vmnf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PADDW: - s390x_paddw (code, ins->sreg1, ins->sreg2); + case OP_S390_VMNG: + s390_vmng (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PADDD: - s390x_paddd (code, ins->sreg1, ins->sreg2); + case OP_S390_VFMINSB: + /* The min function used here is Java Math.Min() */ + s390_vfminsb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); break; - case OP_PADDQ: - s390x_paddq (code, ins->sreg1, ins->sreg2); + case OP_S390_VFMINDB: + s390_vfmindb (code, ins->dreg, ins->sreg1, ins->sreg2, 1); break; - - case OP_PSUBB: - s390x_psubb (code, ins->sreg1, ins->sreg2); + case OP_S390_VMNLB: + s390_vmnlb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PSUBW: - s390x_psubw (code, ins->sreg1, ins->sreg2); + case OP_S390_VMNLH: + s390_vmnlh (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PSUBD: - s390x_psubd (code, ins->sreg1, ins->sreg2); + case OP_S390_VMNLF: + s390_vmnlf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PSUBQ: - s390x_psubq (code, ins->sreg1, ins->sreg2); + case OP_S390_VMNLG: + s390_vmnlg (code, ins->dreg, ins->sreg1, ins->sreg2); break; - - case OP_PMAXB_UN: - s390x_pmaxub (code, ins->sreg1, ins->sreg2); + case OP_S390_VREPIB: + s390_vrepib (code, ins->dreg, ins->inst_c1); break; - case OP_PMAXW_UN: - s390x_pmaxuw (code, ins->sreg1, ins->sreg2); + case OP_S390_VREPIH: + s390_vrepih (code, ins->dreg, ins->inst_c1); break; - case OP_PMAXD_UN: - s390x_pmaxud (code, ins->sreg1, ins->sreg2); + case OP_S390_VREPIF: + s390_vrepif (code, ins->dreg, ins->inst_c1); break; - - case OP_PMAXB: - s390x_pmaxsb (code, ins->sreg1, ins->sreg2); + case OP_S390_VREPIG: + s390_vrepig (code, ins->dreg, ins->inst_c1); break; - case OP_PMAXW: - s390x_pmaxsw (code, ins->sreg1, ins->sreg2); + case OP_S390_VCEQBS: + s390_vceqbs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PMAXD: - s390x_pmaxsd (code, ins->sreg1, ins->sreg2); + case OP_S390_VCEQB: + s390_vceqb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - - case OP_PAVGB_UN: - s390x_pavgb (code, ins->sreg1, ins->sreg2); + case OP_S390_VCEQHS: + s390_vceqhs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PAVGW_UN: - s390x_pavgw (code, ins->sreg1, ins->sreg2); + case OP_S390_VCEQH: + s390_vceqh (code, ins->dreg, ins->sreg1, ins->sreg2); break; - - case OP_PMINB_UN: - s390x_pminub (code, ins->sreg1, ins->sreg2); + case OP_S390_VCEQFS: + s390_vceqfs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PMINW_UN: - s390x_pminuw (code, ins->sreg1, ins->sreg2); + case OP_S390_VCEQF: + s390_vceqf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PMIND_UN: - s390x_pminud (code, ins->sreg1, ins->sreg2); + case OP_S390_VCEQGS: + s390_vceqgs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - - case OP_PMINB: - s390x_pminsb (code, ins->sreg1, ins->sreg2); + case OP_S390_VCEQG: + s390_vceqg (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PMINW: - s390x_pminsw (code, ins->sreg1, ins->sreg2); + case OP_S390_VFCESBS: + s390_vfcesbs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PMIND: - s390x_pminsd (code, ins->sreg1, ins->sreg2); + case OP_S390_VFCESB: + s390_vfcesb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - - case OP_PCMPEQB: - s390x_pcmpeqb (code, ins->sreg1, ins->sreg2); + case OP_S390_VFCEDBS: + s390_vfcedbs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PCMPEQW: - s390x_pcmpeqw (code, ins->sreg1, ins->sreg2); + case OP_S390_VFCEDB: + s390_vfcedb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PCMPEQD: - s390x_pcmpeqd (code, ins->sreg1, ins->sreg2); + case OP_S390_VFCHSBS: + s390_vfchsbs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PCMPEQQ: - s390x_pcmpeqq (code, ins->sreg1, ins->sreg2); + case OP_S390_VFCHSB: + s390_vfchsb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - - case OP_PCMPGTB: - s390x_pcmpgtb (code, ins->sreg1, ins->sreg2); + case OP_S390_VFCHDBS: + s390_vfchdbs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PCMPGTW: - s390x_pcmpgtw (code, ins->sreg1, ins->sreg2); + case OP_S390_VFCHDB: + s390_vfchdb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PCMPGTD: - s390x_pcmpgtd (code, ins->sreg1, ins->sreg2); + case OP_S390_VFCHESBS: + s390_vfchesbs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PCMPGTQ: - s390x_pcmpgtq (code, ins->sreg1, ins->sreg2); + case OP_S390_VFCHESB: + s390_vfchesb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - - case OP_PSUM_ABS_DIFF: - s390x_psadbw (code, ins->sreg1, ins->sreg2); + case OP_S390_VFCHEDBS: + s390_vfchedbs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - - case OP_UNPACK_LOWB: - s390x_punpcklbw (code, ins->sreg1, ins->sreg2); + case OP_S390_VFCHEDB: + s390_vfchedb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_UNPACK_LOWW: - s390x_punpcklwd (code, ins->sreg1, ins->sreg2); + case OP_S390_VGMB: + s390_vgmb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_UNPACK_LOWD: - s390x_punpckldq (code, ins->sreg1, ins->sreg2); + case OP_S390_VGMH: + s390_vgmh (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_UNPACK_LOWQ: - s390x_punpcklqdq (code, ins->sreg1, ins->sreg2); + case OP_S390_VGMF: + s390_vgmf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_UNPACK_LOWPS: - s390x_unpcklps (code, ins->sreg1, ins->sreg2); + case OP_S390_VGMG: + s390_vgmg (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_UNPACK_LOWPD: - s390x_unpcklpd (code, ins->sreg1, ins->sreg2); + case OP_S390_VECB: + s390_vecb (code, ins->dreg, ins->sreg1); break; - - case OP_UNPACK_HIGHB: - s390x_punpckhbw (code, ins->sreg1, ins->sreg2); + case OP_S390_VECH: + s390_vech (code, ins->dreg, ins->sreg1); break; - case OP_UNPACK_HIGHW: - s390x_punpckhwd (code, ins->sreg1, ins->sreg2); + case OP_S390_VECF: + s390_vecf (code, ins->dreg, ins->sreg1); break; - case OP_UNPACK_HIGHD: - s390x_punpckhdq (code, ins->sreg1, ins->sreg2); + case OP_S390_VECG: + s390_vecg (code, ins->dreg, ins->sreg1); break; - case OP_UNPACK_HIGHQ: - s390x_punpckhqdq (code, ins->sreg1, ins->sreg2); + case OP_S390_VECLB: + s390_veclb (code, ins->dreg, ins->sreg1); break; - case OP_UNPACK_HIGHPS: - s390x_unpckhps (code, ins->sreg1, ins->sreg2); + case OP_S390_VECLH: + s390_veclh (code, ins->dreg, ins->sreg1); break; - case OP_UNPACK_HIGHPD: - s390x_unpckhpd (code, ins->sreg1, ins->sreg2); + case OP_S390_VECLF: + s390_veclf (code, ins->dreg, ins->sreg1); break; - - case OP_PACKW: - s390x_packsswb (code, ins->sreg1, ins->sreg2); + case OP_S390_VECLG: + s390_veclg (code, ins->dreg, ins->sreg1); break; - case OP_PACKD: - s390x_packssdw (code, ins->sreg1, ins->sreg2); + case OP_S390_VCHBS: + s390_vchbs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PACKW_UN: - s390x_packuswb (code, ins->sreg1, ins->sreg2); + case OP_S390_VCHHS: + s390_vchhs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PACKD_UN: - s390x_packusdw (code, ins->sreg1, ins->sreg2); + case OP_S390_VCHFS: + s390_vchfs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - - case OP_PADDB_SAT_UN: - s390x_paddusb (code, ins->sreg1, ins->sreg2); + case OP_S390_VCHGS: + s390_vchgs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PSUBB_SAT_UN: - s390x_psubusb (code, ins->sreg1, ins->sreg2); + case OP_S390_VCHB: + s390_vchb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PADDW_SAT_UN: - s390x_paddusw (code, ins->sreg1, ins->sreg2); + case OP_S390_VCHH: + s390_vchh (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PSUBW_SAT_UN: - s390x_psubusw (code, ins->sreg1, ins->sreg2); + case OP_S390_VCHF: + s390_vchf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - - case OP_PADDB_SAT: - s390x_paddsb (code, ins->sreg1, ins->sreg2); + case OP_S390_VCHG: + s390_vchg (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PSUBB_SAT: - s390x_psubsb (code, ins->sreg1, ins->sreg2); + case OP_S390_VCHLBS: + s390_vchlbs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PADDW_SAT: - s390x_paddsw (code, ins->sreg1, ins->sreg2); + case OP_S390_VCHLHS: + s390_vchlhs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PSUBW_SAT: - s390x_psubsw (code, ins->sreg1, ins->sreg2); + case OP_S390_VCHLFS: + s390_vchlfs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - - case OP_PMULW: - s390x_pmullw (code, ins->sreg1, ins->sreg2); + case OP_S390_VCHLGS: + s390_vchlgs (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PMULD: - s390x_pmulld (code, ins->sreg1, ins->sreg2); + case OP_S390_VCHLB: + s390_vchlb (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PMULQ: - s390x_pmuludq (code, ins->sreg1, ins->sreg2); + case OP_S390_VCHLH: + s390_vchlh (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PMULW_HIGH_UN: - s390x_pmulhuw (code, ins->sreg1, ins->sreg2); + case OP_S390_VCHLF: + s390_vchlf (code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_PMULW_HIGH: - s390x_pmulhw (code, ins->sreg1, ins->sreg2); + case OP_S390_VCHLG: + s390_vchlg (code, ins->dreg, ins->sreg1, ins->sreg2); break; - - case OP_PSHRW: - s390x_psrlw_reg_imm (code, ins->dreg, ins->inst_imm); + case OP_S390_VLPB: + s390_vlpb (code, ins->dreg, ins->sreg1); break; - case OP_PSHRW_REG: - s390x_psrlw (code, ins->dreg, ins->sreg2); + case OP_S390_VLPH: + s390_vlph (code, ins->dreg, ins->sreg1); break; - - case OP_PSARW: - s390x_psraw_reg_imm (code, ins->dreg, ins->inst_imm); + case OP_S390_VLPF: + s390_vlpf (code, ins->dreg, ins->sreg1); break; - case OP_PSARW_REG: - s390x_psraw (code, ins->dreg, ins->sreg2); + case OP_S390_VLPG: + s390_vlpg (code, ins->dreg, ins->sreg1); break; - - case OP_PSHLW: - s390x_psllw_reg_imm (code, ins->dreg, ins->inst_imm); + case OP_S390_VFLPDB: + s390_vfpsodb (code, ins->dreg, ins->sreg1, 2); break; - case OP_PSHLW_REG: - s390x_psllw (code, ins->dreg, ins->sreg2); + case OP_S390_VFLPSB: + s390_vfpsosb (code, ins->dreg, ins->sreg1, 2); break; - - case OP_PSHRD: - s390x_psrld_reg_imm (code, ins->dreg, ins->inst_imm); + case OP_S390_VFLCDB: + s390_vfpsodb (code, ins->dreg, ins->sreg1, 0); break; - case OP_PSHRD_REG: - s390x_psrld (code, ins->dreg, ins->sreg2); + case OP_S390_VFLCSB: + s390_vfpsosb (code, ins->dreg, ins->sreg1, 0); break; - - case OP_PSARD: - s390x_psrad_reg_imm (code, ins->dreg, ins->inst_imm); + case OP_INSERT_I1: + if (ins->dreg != ins->sreg1) + s390_vlr (code, ins->dreg, ins->sreg1); + s390_vlvgb (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + break; + case OP_INSERT_I2: + if (ins->dreg != ins->sreg1) + s390_vlr (code, ins->dreg, ins->sreg1); + s390_vlvgh (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); break; - case OP_PSARD_REG: - s390x_psrad (code, ins->dreg, ins->sreg2); + case OP_INSERT_I4: + if (ins->dreg != ins->sreg1) + s390_vlr (code, ins->dreg, ins->sreg1); + s390_vlvgf (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); break; - - case OP_PSHLD: - s390x_pslld_reg_imm (code, ins->dreg, ins->inst_imm); + case OP_INSERT_I8: + if (ins->dreg != ins->sreg1) + s390_vlr (code, ins->dreg, ins->sreg1); + s390_vlvgg (code, ins->dreg, ins->sreg2, 0, GTMREG_TO_UINT32 (ins->inst_c0)); break; - case OP_PSHLD_REG: - s390x_pslld (code, ins->dreg, ins->sreg2); + case OP_INSERT_R4: + s390_vlgvf (code, s390_r13, ins->sreg2, 0, 0); + s390_vlvgf (code, ins->dreg, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); break; - - case OP_PSHRQ: - s390x_psrlq_reg_imm (code, ins->dreg, ins->inst_imm); + case OP_INSERT_R8: + s390_vlgvg (code, s390_r13, ins->sreg2, 0, 0); + s390_vlvgg (code, ins->dreg, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); break; - case OP_PSHRQ_REG: - s390x_psrlq (code, ins->dreg, ins->sreg2); + case OP_EXTRACT_I1: + s390_vlgvb (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); break; - - /*TODO: This is appart of the sse spec but not added - case OP_PSARQ: - s390x_psraq_reg_imm (code, ins->dreg, ins->inst_imm); + case OP_EXTRACT_I2: + s390_vlgvh (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); break; - case OP_PSARQ_REG: - s390x_psraq (code, ins->dreg, ins->sreg2); + case OP_EXTRACT_I4: + s390_vlgvf (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); break; - */ - - case OP_PSHLQ: - s390x_psllq_reg_imm (code, ins->dreg, ins->inst_imm); + case OP_EXTRACT_I8: + s390_vlgvg (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); break; - case OP_PSHLQ_REG: - s390x_psllq (code, ins->dreg, ins->sreg2); + case OP_EXTRACT_R4: + s390_vlgvf (code, s390_r13, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vlvgf (code, ins->dreg, s390_r13, 0, 0); break; - case OP_CVTDQ2PD: - s390x_cvtdq2pd (code, ins->dreg, ins->sreg1); + case OP_EXTRACT_R8: + s390_vlgvg (code, s390_r13, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_ldgr (code, ins->dreg, s390_r13); break; - case OP_CVTDQ2PS: - s390x_cvtdq2ps (code, ins->dreg, ins->sreg1); + case OP_XEXTRACT_I1: + s390_vlgvb (code, ins->dreg, ins->sreg1, ins->sreg2, 0); break; - case OP_CVTPD2DQ: - s390x_cvtpd2dq (code, ins->dreg, ins->sreg1); + case OP_XEXTRACT_I2: + s390_vlgvh (code, ins->dreg, ins->sreg1, ins->sreg2, 0); break; - case OP_CVTPD2PS: - s390x_cvtpd2ps (code, ins->dreg, ins->sreg1); + case OP_XEXTRACT_I4: + s390_vlgvf (code, ins->dreg, ins->sreg1, ins->sreg2, 0); break; - case OP_CVTPS2DQ: - s390x_cvtps2dq (code, ins->dreg, ins->sreg1); + case OP_XEXTRACT_I8: + s390_vlgvg (code, ins->dreg, ins->sreg1, ins->sreg2, 0); break; - case OP_CVTPS2PD: - s390x_cvtps2pd (code, ins->dreg, ins->sreg1); + case OP_XEXTRACT_R4: + s390_vlgvf (code, s390_r13, ins->sreg1, ins->sreg2, 0); + s390_ldgr (code, ins->dreg, s390_r13); break; - case OP_CVTTPD2DQ: - s390x_cvttpd2dq (code, ins->dreg, ins->sreg1); + case OP_XEXTRACT_R8: + s390_vlgvg (code, s390_r13, ins->sreg1, ins->sreg2, 0); + s390_ldgr (code, ins->dreg, s390_r13); break; - case OP_CVTTPS2DQ: - s390x_cvttps2dq (code, ins->dreg, ins->sreg1); + case OP_EXPAND_I1: + s390_vlvgb (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepb (code, ins->dreg, ins->dreg, 0); break; - - case OP_ICONV_TO_X: - amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4); + case OP_EXPAND_I2: + s390_vlvgh (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vreph (code, ins->dreg, ins->dreg, 0); break; - case OP_EXTRACT_I4: - amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4); + case OP_EXPAND_I4: + s390_vlvgf (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepf (code, ins->dreg, ins->dreg, 0); break; - case OP_EXTRACT_I8: - if (ins->inst_c0) { - amd64_movhlps (code, MONO_ARCH_FP_SCRATCH_REG, ins->sreg1); - amd64_movd_reg_xreg_size (code, ins->dreg, MONO_ARCH_FP_SCRATCH_REG, 8); - } else { - amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 8); - } + case OP_EXPAND_I8: + s390_vlvgg (code, ins->dreg, ins->sreg1, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepg (code, ins->dreg, ins->dreg, 0); break; - case OP_EXTRACT_I1: - case OP_EXTRACT_U1: - amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4); - if (ins->inst_c0) - amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_c0 * 8); - amd64_widen_reg (code, ins->dreg, ins->dreg, ins->inst_c1 == OP_EXTRACT_I1, FALSE); + case OP_EXPAND_R4: + s390_vlgvf (code, s390_r13, ins->sreg1, 0, 0); + s390_vlvgf (code, ins->dreg, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepf (code, ins->dreg, ins->dreg, 0); break; - case OP_EXTRACT_I2: - case OP_EXTRACT_U2: - /*amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4); - if (ins->inst_c0) - amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, 16, 4);*/ - s390x_pextrw_imm (code, ins->dreg, ins->sreg1, ins->inst_c0); - amd64_widen_reg_size (code, ins->dreg, ins->dreg, ins->inst_c1 == OP_EXTRACT_I2, TRUE, 4); + case OP_EXPAND_R8: + s390_lgdr (code, s390_r13, ins->sreg1); + s390_vlvgg (code, ins->dreg, s390_r13, 0, GTMREG_TO_UINT32 (ins->inst_c0)); + s390_vrepg (code, ins->dreg, ins->dreg, 0); break; - case OP_EXTRACT_R8: - if (ins->inst_c0) - amd64_movhlps (code, ins->dreg, ins->sreg1); - else - s390x_movsd (code, ins->dreg, ins->sreg1); + case OP_S390_VPKH: + s390_vpkh ( code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_INSERT_I2: - s390x_pinsrw_imm (code, ins->sreg1, ins->sreg2, ins->inst_c0); - break; - case OP_EXTRACTX_U2: - s390x_pextrw_imm (code, ins->dreg, ins->sreg1, ins->inst_c0); - break; - case OP_INSERTX_U1_SLOW: - /*sreg1 is the extracted ireg (scratch) - /sreg2 is the to be inserted ireg (scratch) - /dreg is the xreg to receive the value*/ - - /*clear the bits from the extracted word*/ - amd64_alu_reg_imm (code, X86_AND, ins->sreg1, ins->inst_c0 & 1 ? 0x00FF : 0xFF00); - /*shift the value to insert if needed*/ - if (ins->inst_c0 & 1) - amd64_shift_reg_imm_size (code, X86_SHL, ins->sreg2, 8, 4); - /*join them together*/ - amd64_alu (code, X86_OR, ins->sreg1, ins->sreg2); - s390x_pinsrw_imm (code, ins->dreg, ins->sreg1, ins->inst_c0 / 2); - break; - case OP_INSERTX_I4_SLOW: - s390x_pinsrw_imm (code, ins->dreg, ins->sreg2, ins->inst_c0 * 2); - amd64_shift_reg_imm (code, X86_SHR, ins->sreg2, 16); - s390x_pinsrw_imm (code, ins->dreg, ins->sreg2, ins->inst_c0 * 2 + 1); - break; - case OP_INSERTX_I8_SLOW: - amd64_movd_xreg_reg_size(code, MONO_ARCH_FP_SCRATCH_REG, ins->sreg2, 8); - if (ins->inst_c0) - amd64_movlhps (code, ins->dreg, MONO_ARCH_FP_SCRATCH_REG); - else - s390x_movsd (code, ins->dreg, MONO_ARCH_FP_SCRATCH_REG); + case OP_S390_VPKF: + s390_vpkf ( code, ins->dreg, ins->sreg1, ins->sreg2); break; - - case OP_INSERTX_R4_SLOW: - switch (ins->inst_c0) { - case 0: - s390x_movss (code, ins->dreg, ins->sreg2); - break; - case 1: - s390x_pshufd_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(1, 0, 2, 3)); - s390x_movss (code, ins->dreg, ins->sreg2); - s390x_pshufd_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(1, 0, 2, 3)); - break; - case 2: - s390x_pshufd_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(2, 1, 0, 3)); - s390x_movss (code, ins->dreg, ins->sreg2); - s390x_pshufd_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(2, 1, 0, 3)); - break; - case 3: - s390x_pshufd_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(3, 1, 2, 0)); - s390x_movss (code, ins->dreg, ins->sreg2); - s390x_pshufd_imm (code, ins->dreg, ins->dreg, mono_simd_shuffle_mask(3, 1, 2, 0)); - break; - } + case OP_S390_VPKG: + s390_vpkg ( code, ins->dreg, ins->sreg1, ins->sreg2); break; - case OP_INSERTX_R8_SLOW: - if (ins->inst_c0) - amd64_movlhps (code, ins->dreg, ins->sreg2); - else - s390x_movsd (code, ins->dreg, ins->sreg2); + case OP_S390_VLCB: + s390_vlcb (code, ins->dreg, ins->sreg1); break; - case OP_STOREX_MEMBASE_REG: - case OP_STOREX_MEMBASE: - s390x_movups_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1); + case OP_S390_VLCH: + s390_vlch (code, ins->dreg, ins->sreg1); break; - case OP_LOADX_MEMBASE: - s390x_movups_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset); + case OP_S390_VLCF: + s390_vlcf (code, ins->dreg, ins->sreg1); break; - case OP_LOADX_ALIGNED_MEMBASE: - s390x_movaps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset); + case OP_S390_VLCG: + s390_vlcg (code, ins->dreg, ins->sreg1); break; - case OP_STOREX_ALIGNED_MEMBASE_REG: - s390x_movaps_membase_reg (code, ins->dreg, ins->inst_offset, ins->sreg1); + case OP_S390_VUPLB: + s390_vuplb (code, ins->dreg, ins->sreg1); break; - case OP_STOREX_NTA_MEMBASE_REG: - s390x_movntps_reg_membase (code, ins->dreg, ins->sreg1, ins->inst_offset); + case OP_S390_VUPLHW: + s390_vuplhw (code, ins->dreg, ins->sreg1); break; - case OP_PREFETCH_MEMBASE: - s390x_prefetch_reg_membase (code, ins->backend.arg_info, ins->sreg1, ins->inst_offset); + case OP_S390_VUPLF: + s390_vuplf (code, ins->dreg, ins->sreg1); break; - - case OP_XMOVE: - /*FIXME the peephole pass should have killed this*/ - if (ins->dreg != ins->sreg1) - s390x_movaps (code, ins->dreg, ins->sreg1); + case OP_S390_VUPLLB: + s390_vupllb (code, ins->dreg, ins->sreg1); break; - case OP_XZERO: - s390x_pxor (code, ins->dreg, ins->dreg); + case OP_S390_VUPLLH: + s390_vupllh (code, ins->dreg, ins->sreg1); break; - case OP_ICONV_TO_R4_RAW: - amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4); + case OP_S390_VUPLLF: + s390_vupllf (code, ins->dreg, ins->sreg1); break; - - case OP_FCONV_TO_R8_X: - s390x_movsd (code, ins->dreg, ins->sreg1); + case OP_S390_VUPHB: + s390_vuphb (code, ins->dreg, ins->sreg1); break; - - case OP_XCONV_R8_TO_I4: - s390x_cvttsd2si_reg_xreg_size (code, ins->dreg, ins->sreg1, 4); - switch (ins->backend.source_opcode) { - case OP_FCONV_TO_I1: - amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, FALSE); - break; - case OP_FCONV_TO_U1: - amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, FALSE); - break; - case OP_FCONV_TO_I2: - amd64_widen_reg (code, ins->dreg, ins->dreg, TRUE, TRUE); - break; - case OP_FCONV_TO_U2: - amd64_widen_reg (code, ins->dreg, ins->dreg, FALSE, TRUE); - break; - } + case OP_S390_VUPHH: + s390_vuphh (code, ins->dreg, ins->sreg1); break; - - case OP_EXPAND_I2: - s390x_pinsrw_imm (code, ins->dreg, ins->sreg1, 0); - s390x_pinsrw_imm (code, ins->dreg, ins->sreg1, 1); - s390x_pshufd_imm (code, ins->dreg, ins->dreg, 0); + case OP_S390_VUPHF: + s390_vuphf (code, ins->dreg, ins->sreg1); break; - case OP_EXPAND_I4: - amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4); - s390x_pshufd_imm (code, ins->dreg, ins->dreg, 0); + case OP_S390_VUPLHB: + s390_vuplhb (code, ins->dreg, ins->sreg1); break; - case OP_EXPAND_I8: - amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 8); - s390x_pshufd_imm (code, ins->dreg, ins->dreg, 0x44); + case OP_S390_VUPLHH: + s390_vuplhh (code, ins->dreg, ins->sreg1); break; - case OP_EXPAND_R4: - s390x_movsd (code, ins->dreg, ins->sreg1); - s390x_pshufd_imm (code, ins->dreg, ins->dreg, 0); + case OP_S390_VUPLHF: + s390_vuplhf (code, ins->dreg, ins->sreg1); break; - case OP_EXPAND_R8: - s390x_movsd (code, ins->dreg, ins->sreg1); - s390x_pshufd_imm (code, ins->dreg, ins->dreg, 0x44); + case OP_S390_VFISB: + s390_vfisb (code, ins->dreg, ins->sreg1, 0, ins->inst_c0); + break; + case OP_S390_VFIDB: + s390_vfidb (code, ins->dreg, ins->sreg1, 0, ins->inst_c0); + break; + case OP_S390_VFSQSB: + s390_vfsqsb (code, ins->dreg, ins->sreg1); + break; + case OP_S390_VFSQDB: + s390_vfsqdb (code, ins->dreg, ins->sreg1); + break; + case OP_XONES: + s390_vgbm (code, ins->dreg, 0xffff); + break; + case OP_XMOVE: + if (ins->dreg != ins->sreg1) + s390_vlr(code, ins->dreg, ins->sreg1); + break; + case OP_XZERO: + s390_vgbm (code, ins->dreg, 0); break; #endif default: diff --git a/src/mono/mono/mini/mini-s390x.h b/src/mono/mono/mini/mini-s390x.h index 19fc8c7546a0b5..6e680ba9c6374b 100644 --- a/src/mono/mono/mini/mini-s390x.h +++ b/src/mono/mono/mini/mini-s390x.h @@ -83,6 +83,7 @@ struct SeqPointInfo { #define MONO_ARCH_HAVE_SETUP_RESUME_FROM_SIGNAL_HANDLER_CTX 1 #define MONO_ARCH_HAVE_UNWIND_BACKTRACE 1 #define MONO_ARCH_FLOAT32_SUPPORTED 1 +#define MONO_ARCH_SIMD_INTRINSICS mono_hwcap_s390x_has_vec #define MONO_ARCH_NEED_SIMD_BANK 1 #define MONO_ARCH_USE_SHARED_FP_SIMD_BANK 1 #define S390_STACK_ALIGNMENT 8 @@ -150,7 +151,7 @@ struct SeqPointInfo { /*-----------------------------------------------*/ /* f0 overlaps with v0 and vr16 is used internally */ #define MONO_MAX_XREGS 31 -#define MONO_ARCH_CALLEE_XREGS 0xFFFEFFFE +#define MONO_ARCH_CALLEE_XREGS 0xFFFFFFFE #define MONO_ARCH_CALLEE_SAVED_XREGS 0x0 // Does the ABI have a volatile non-parameter register, so tailcall @@ -294,6 +295,21 @@ s390_patch_addr (guchar *code, guint64 target) s390_##op (loc, r, 0, s390_r13, 0); \ } +#define S390_LONG_VEC(loc, opy, op, r, off, ix, br) \ + if (s390_is_imm12(off)) { \ + s390_##opy (loc, r, ix, br, off); \ + } else { \ + if (ix == 0) { \ + S390_SET(loc, s390_r13, off); \ + s390_la (loc, s390_r13, s390_r13, br, 0); \ + } else { \ + s390_la (loc, s390_r13, ix, br, 0); \ + S390_SET (loc, s390_r0, off); \ + s390_agr (loc, s390_r13, s390_r0); \ + } \ + s390_##op (loc, r, 0, s390_r13, 0); \ + } + #define S390_SET_MASK(loc, dr, v) \ do { \ if (s390_is_imm16 (v)) { \ diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index e839ce05ca29c3..88143637310ef9 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -406,7 +406,7 @@ emit_simd_ins_for_binary_op (MonoCompile *cfg, MonoClass *klass, MonoMethodSigna if (type_enum_is_float (arg_type)) { instc0 = OP_FMUL; } else { -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_S390X) if (!COMPILE_LLVM (cfg) && (arg_type == MONO_TYPE_I8 || arg_type == MONO_TYPE_U8 || arg_type == MONO_TYPE_I || arg_type == MONO_TYPE_U)) return NULL; #endif @@ -448,7 +448,7 @@ emit_simd_ins_for_binary_op (MonoCompile *cfg, MonoClass *klass, MonoMethodSigna static MonoInst* emit_simd_ins_for_unary_op (MonoCompile *cfg, MonoClass *klass, MonoMethodSignature *fsig, MonoInst **args, MonoTypeEnum arg_type, int id) { -#if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) || defined(TARGET_S390X) int op = -1; switch (id){ case SN_Negate: @@ -557,6 +557,12 @@ emit_xequal (MonoCompile *cfg, MonoClass *klass, MonoTypeEnum element_type, Mono } else { return emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg); } +#elif defined(TARGET_S390X) + MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_S390_XCOMPARE_XEXTRACT, arg1->dreg, arg2->dreg); + ret->inst_c0 = SIMD_EXTR_ARE_ALL_SET; + ret->inst_c0 |= ((((gint64)CMP_EQ) << 4) & 0xf0); + ret->inst_c1 = element_type; + return ret; #else MonoInst *ins = emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg); if (!COMPILE_LLVM (cfg)) @@ -686,6 +692,88 @@ get_xconst_int_elem (MonoCompile *cfg, MonoInst *ins, MonoTypeEnum etype, int in } } +#ifdef TARGET_S390X +static int type_to_extract_op (MonoTypeEnum type); + +static int +lower_xcompare_op (int intrinsic_id, MonoTypeEnum etype) +{ + gboolean is_unsigned = type_enum_is_unsigned (etype); + + switch (intrinsic_id) { + case SN_GreaterThan: + case SN_GreaterThanAll: + case SN_GreaterThanAny: + return is_unsigned ? CMP_GT_UN : CMP_GT; + break; + case SN_GreaterThanOrEqual: + case SN_GreaterThanOrEqualAll: + case SN_GreaterThanOrEqualAny: + return is_unsigned ? CMP_GE_UN : CMP_GE; + break; + case SN_LessThan: + case SN_LessThanAll: + case SN_LessThanAny: + return is_unsigned ? CMP_LT_UN : CMP_LT; + break; + case SN_LessThanOrEqual: + case SN_LessThanOrEqualAll: + case SN_LessThanOrEqualAny: + return is_unsigned ? CMP_LE_UN : CMP_LE; + break; + default: + g_assert_not_reached (); + } +} + +static MonoInst* +emit_sum_vector (MonoCompile *cfg, MonoType *vector_type, MonoTypeEnum element_type, MonoInst *arg) +{ + MonoClass *vector_class = mono_class_from_mono_type_internal (vector_type); + int op = -1; + MonoInst *tmp = emit_xzero (cfg, vector_class); + MonoInst *ins = arg; + int index = -1; + switch (element_type) { + case MONO_TYPE_R4: + return NULL; + break; + case MONO_TYPE_R8: + return NULL; + break; + case MONO_TYPE_I1: + case MONO_TYPE_U1: + ins = emit_simd_ins (cfg, vector_class, OP_S390_VSUMB, ins->dreg,tmp->dreg); + ins = emit_simd_ins (cfg, vector_class, OP_S390_VSUMQF, ins->dreg,tmp->dreg); + index = 16; + break; + case MONO_TYPE_I2: + case MONO_TYPE_U2: + ins = emit_simd_ins (cfg, vector_class, OP_S390_VSUMH, ins->dreg,tmp->dreg); + ins = emit_simd_ins (cfg, vector_class, OP_S390_VSUMQF, ins->dreg,tmp->dreg); + index = 8; + break; + case MONO_TYPE_I4: + case MONO_TYPE_U4: + ins = emit_simd_ins (cfg, vector_class, OP_S390_VSUMQF, ins->dreg,tmp->dreg); + index = 4; + break; + case MONO_TYPE_I: + case MONO_TYPE_U: + case MONO_TYPE_I8: + case MONO_TYPE_U8: + ins = emit_simd_ins (cfg, vector_class, OP_S390_VSUMQG, ins->dreg,tmp->dreg); + index = 2; + break; + default: + return NULL; + } + op = type_to_extract_op(element_type); + ins = emit_simd_ins (cfg, vector_class, op, ins->dreg,-1); + ins->inst_c0 = index - 1; + return ins; +} +#endif #ifdef TARGET_ARM64 static int type_to_extract_op (MonoTypeEnum type); static MonoType* get_vector_t_elem_type (MonoType *vector_type); @@ -1473,7 +1561,7 @@ emit_vector_create_scalar ( if (COMPILE_LLVM (cfg)) { opcode = is_unsafe ? OP_CREATE_SCALAR_UNSAFE : OP_CREATE_SCALAR; } else { -#ifdef TARGET_AMD64 +#if defined(TARGET_AMD64) || defined(TARGET_S390X) MonoInst *ins; ins = emit_xzero (cfg, vklass); @@ -1819,12 +1907,12 @@ emit_dot (MonoCompile *cfg, MonoClass *klass, MonoType *vector_type, MonoTypeEnu #if defined(TARGET_WASM) if (!COMPILE_LLVM (cfg) && (arg0_type == MONO_TYPE_I8 || arg0_type == MONO_TYPE_U8)) return NULL; -#elif defined(TARGET_ARM64) +#elif defined(TARGET_ARM64) || defined(TARGET_S390X) if (!COMPILE_LLVM (cfg) && (arg0_type == MONO_TYPE_I8 || arg0_type == MONO_TYPE_U8 || arg0_type == MONO_TYPE_I || arg0_type == MONO_TYPE_U)) return NULL; #endif -#if defined(TARGET_ARM64) || defined(TARGET_WASM) +#if defined(TARGET_ARM64) || defined(TARGET_WASM) || defined(TARGET_S390X) MonoInst *pairwise_multiply = emit_simd_ins (cfg, klass, OP_XBINOP, sreg1, sreg2); pairwise_multiply->inst_c0 = type_enum_is_float (arg0_type) ? OP_FMUL : OP_IMUL; pairwise_multiply->inst_c1 = arg0_type; @@ -1939,7 +2027,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return NULL; // FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_S390X) if (!COMPILE_LLVM (cfg)) { if (vector_size != 128) return NULL; @@ -2022,6 +2110,12 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi } else { return emit_simd_ins_for_sig (cfg, klass, OP_VECTOR_IABS, -1, arg0_type, fsig, args); } +#elif defined(TARGET_S390X) + if (type_enum_is_float(arg0_type)) { + return emit_simd_ins_for_sig (cfg, klass, arg0_type == MONO_TYPE_R8 ? OP_S390_VFLPDB : OP_S390_VFLPSB, -1, arg0_type, fsig, args); + } else { + return emit_simd_ins_for_sig (cfg, klass, OP_VECTOR_IABS, -1, arg0_type, fsig, args); + } #else return NULL; #endif @@ -2041,7 +2135,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi if (!is_element_type_primitive (fsig->params [0]) || !is_element_type_primitive (fsig->params [1])) return NULL; -#ifndef TARGET_ARM64 +#if !defined(TARGET_ARM64) && !defined(TARGET_S390X) if (((id == SN_Max) || (id == SN_Min)) && type_enum_is_float(arg0_type)) return NULL; #endif @@ -2084,7 +2178,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return NULL; #ifdef TARGET_ARM64 return emit_simd_ins_for_sig (cfg, klass, OP_ARM64_BIC, -1, arg0_type, fsig, args); -#elif defined(TARGET_AMD64) || defined(TARGET_WASM) +#elif defined(TARGET_AMD64) || defined(TARGET_WASM) || defined(TARGET_S390X) /* Swap lhs and rhs because Vector128 needs lhs & !rhs whereas SSE2 does !lhs & rhs */ MonoInst *tmp = args[0]; @@ -2179,6 +2273,16 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi int ceil_or_floor = id == SN_Ceiling ? 10 : 9; return emit_simd_ins_for_sig (cfg, klass, OP_SSE41_ROUNDP, ceil_or_floor, arg0_type, fsig, args); +#elif defined(TARGET_S390X) + int ceil_or_floor = id == SN_Ceiling ? 6 : 7; + switch (arg0_type){ + case MONO_TYPE_R4: + return emit_simd_ins_for_sig (cfg, klass, OP_S390_VFISB, ceil_or_floor, arg0_type, fsig, args); + case MONO_TYPE_R8: + return emit_simd_ins_for_sig (cfg, klass, OP_S390_VFIDB, ceil_or_floor, arg0_type, fsig, args); + default: + g_assert_not_reached (); + } #else return NULL; #endif @@ -2187,9 +2291,9 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi if (!is_element_type_primitive (fsig->params [0])) return NULL; -#if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) || defined(TARGET_S390X) -#if defined(TARGET_AMD64) +#if defined(TARGET_AMD64) || defined(TARGET_S390X) if (!COMPILE_LLVM (cfg)) { MonoInst *val1 = emit_simd_ins (cfg, klass, OP_XBINOP_FORCEINT, args [0]->dreg, args [1]->dreg); val1->inst_c0 = XBINOP_FORCEINT_AND; @@ -2325,7 +2429,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi MonoClass *vklass = mono_class_from_mono_type_internal(fsig->ret); return emit_vector_create_broadcast (cfg, vklass, etype, args [0]); } else if (is_create_from_half_vectors_overload (fsig)) { -#if defined(TARGET_AMD64) +#if defined(TARGET_AMD64) || defined(TARGET_S390X) // Require Vector64 SIMD support if (!COMPILE_LLVM (cfg)) return NULL; @@ -2382,10 +2486,19 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi } } } else { +#ifndef TARGET_S390X MonoInst* cmp = emit_xcompare (cfg, arg_class, arg0_type, args [0], args [1]); MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1); ret->inst_c0 = (id == SN_EqualsAll) ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET; ret->inst_c1 = mono_class_value_size (klass, NULL); +#else + /* we need XCOMPARE(_FP), SIMD_EXTR_ARE_ALL/ANY_SET and CMP_* ops in the same ins to emit correct*/ + /* load on condition instructions */ + MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_S390_XCOMPARE_XEXTRACT, args [0]->dreg, args [1]->dreg); + ret->inst_c0 = (id == SN_EqualsAll) ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET; + ret->inst_c0 |= ((((gint64)CMP_EQ) << 4) & 0xf0); + ret->inst_c1 = arg0_type; +#endif return ret; } g_assert_not_reached (); @@ -2517,6 +2630,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return emit_simd_ins_for_sig (cfg, klass, OP_SSE_MOVMSK, -1, type, fsig, args); #endif + return NULL; } case SN_GetElement: { int elems; @@ -2606,7 +2720,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return NULL; int op = id == SN_GetLower ? OP_XLOWER : OP_XUPPER; -#ifdef TARGET_AMD64 +#if defined(TARGET_AMD64) || defined(TARGET_S390X) if (!COMPILE_LLVM (cfg)) /* These return a Vector64 */ return NULL; @@ -2666,10 +2780,20 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return emit_not_xequal (cfg, arg_class, arg0_type, cmp, zero); } } else { +#ifndef TARGET_S390X MonoInst* cmp = emit_xcompare_for_intrinsic (cfg, arg_class, id, arg0_type, args [0], args [1]); MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1); ret->inst_c0 = is_all ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET; ret->inst_c1 = mono_class_value_size (klass, NULL); +#else + /* we need XCOMPARE(_FP), SIMD_EXTR_ARE_ALL/ANY_SET and CMP_* ops in the same ins to emit correct*/ + /* load on condition instructions */ + MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_S390_XCOMPARE_XEXTRACT, args[0]->dreg, args[1]->dreg); + int temp = lower_xcompare_op(id, arg0_type); + ret->inst_c0 = is_all ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET; + ret->inst_c0 |= ((temp << 4) & 0xf0); + ret->inst_c1 = arg0_type; +#endif return ret; } } @@ -2734,7 +2858,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi if (!type_enum_is_float(arg0_type)) return emit_xzero (cfg, klass); int op = -1; -#if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) || defined(TARGET_S390X) op = OP_ONES_COMPLEMENT; #endif if (op == -1) @@ -2983,6 +3107,19 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return emit_simd_ins_for_sig (cfg, klass, OP_WASM_EXTRACT_NARROW, -1, -1, fsig, args); } + return NULL; +#elif defined(TARGET_S390X) + switch (arg0_type) { + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return emit_simd_ins_for_sig (cfg, klass, OP_S390_VPKH, -1, -1, fsig, args); + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return emit_simd_ins_for_sig (cfg, klass, OP_S390_VPKF, -1, -1, fsig, args); + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return emit_simd_ins_for_sig (cfg, klass, OP_S390_VPKG, -1, -1, fsig, args); + } return NULL; #else return NULL; @@ -3140,7 +3277,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi case SN_Sum: { if (!is_element_type_primitive (fsig->params [0])) return NULL; -#if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) +#if defined(TARGET_ARM64) || defined(TARGET_AMD64) || defined(TARGET_WASM) || defined(TARGET_S390X) return emit_sum_vector (cfg, fsig->params [0], arg0_type, args [0]); #else return NULL; @@ -3157,6 +3294,9 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi int instc0 = arg0_type == MONO_TYPE_R4 ? INTRINS_SIMD_SQRT_R4 : INTRINS_SIMD_SQRT_R8; return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X, instc0, arg0_type, fsig, args); +#elif defined(TARGET_S390X) + int instc0 = arg0_type == MONO_TYPE_R4 ? OP_S390_VFSQSB : OP_S390_VFSQDB; + return emit_simd_ins_for_sig (cfg, klass, instc0, 0, arg0_type, fsig, args); #else return NULL; #endif @@ -3240,7 +3380,7 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi MONO_EMIT_NEW_COND_EXC (cfg, GE_UN, "ArgumentOutOfRangeException"); gboolean use_xextract; -#ifdef TARGET_AMD64 +#if defined(TARGET_AMD64) || defined(TARGET_S390X) use_xextract = FALSE; #else use_xextract = type_to_width_log2 (arg0_type) == 3; @@ -3314,13 +3454,50 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi #elif defined(TARGET_AMD64) // FIXME: return NULL; +#elif defined(TARGET_S390X) + if (id == SN_WidenLower) { + switch (arg0_type){ + case MONO_TYPE_I1: + return emit_simd_ins (cfg, klass, OP_S390_VUPHB, args[0]->dreg, -1); + case MONO_TYPE_U1: + return emit_simd_ins (cfg, klass, OP_S390_VUPLHB, args[0]->dreg, -1); + case MONO_TYPE_I2: + return emit_simd_ins (cfg, klass, OP_S390_VUPHH, args[0]->dreg, -1); + case MONO_TYPE_U2: + return emit_simd_ins (cfg, klass, OP_S390_VUPLHH, args[0]->dreg, -1); + case MONO_TYPE_I4: + return emit_simd_ins (cfg, klass, OP_S390_VUPHF, args[0]->dreg, -1); + case MONO_TYPE_U4: + return emit_simd_ins (cfg, klass, OP_S390_VUPLHF, args[0]->dreg, -1); + default: + return NULL; + } + } + else { + switch (arg0_type){ + case MONO_TYPE_I1: + return emit_simd_ins (cfg, klass, OP_S390_VUPLB, args[0]->dreg, -1); + case MONO_TYPE_U1: + return emit_simd_ins (cfg, klass, OP_S390_VUPLLB, args[0]->dreg, -1); + case MONO_TYPE_I2: + return emit_simd_ins (cfg, klass, OP_S390_VUPLHW, args[0]->dreg, -1); + case MONO_TYPE_U2: + return emit_simd_ins (cfg, klass, OP_S390_VUPLLH, args[0]->dreg, -1); + case MONO_TYPE_I4: + return emit_simd_ins (cfg, klass, OP_S390_VUPLF, args[0]->dreg, -1); + case MONO_TYPE_U4: + return emit_simd_ins (cfg, klass, OP_S390_VUPLLF, args[0]->dreg, -1); + default: + return NULL; + } + } #else return NULL; #endif } case SN_WithLower: case SN_WithUpper: { -#ifdef TARGET_AMD64 +#if defined(TARGET_AMD64) || defined(TARGET_S390X) if (!COMPILE_LLVM (cfg)) /* These return a Vector64 */ return NULL; @@ -3468,7 +3645,7 @@ emit_sri_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f return NULL; #endif -#ifdef TARGET_AMD64 +#if defined(TARGET_AMD64) || defined(TARGET_S390X) if (!COMPILE_LLVM (cfg) && (size != 16)) return NULL; if (size != 16) @@ -3489,6 +3666,50 @@ emit_sri_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f g_assert (sizeof (buf) >= size); memset (buf, 0, sizeof (buf)); +#ifdef TARGET_S390X + /* we directly emit vrepi*/ + if (etype->type != MONO_TYPE_R4 && etype->type != MONO_TYPE_R8) { + switch (etype->type) { + case MONO_TYPE_I1: + case MONO_TYPE_U1: + return emit_simd_ins_for_sig (cfg, klass, OP_S390_VREPIB, etype->type, 1, fsig, args); + case MONO_TYPE_I2: + case MONO_TYPE_U2: + return emit_simd_ins_for_sig (cfg, klass, OP_S390_VREPIH, etype->type, 1, fsig, args); + case MONO_TYPE_I4: + case MONO_TYPE_U4: + return emit_simd_ins_for_sig (cfg, klass, OP_S390_VREPIF, etype->type, 1, fsig, args); + case MONO_TYPE_I: + case MONO_TYPE_U: + case MONO_TYPE_I8: + case MONO_TYPE_U8: + return emit_simd_ins_for_sig (cfg, klass, OP_S390_VREPIG, etype->type, 1, fsig, args); + default: + g_assert_not_reached (); + } + } + switch (etype->type){ + case MONO_TYPE_R4:{ + float *value = (float*)buf; + + for (int i = 0; i < len; ++i) { + value [i] = 1.0f; + } + + return emit_xconst_v128 (cfg, klass, (guint8*)value); + } + case MONO_TYPE_R8:{ + double *value = (double*)buf; + + for (int i = 0; i < len; ++i) { + value [i] = 1.0; + } + + return emit_xconst_v128 (cfg, klass, (guint8*)value); + } + } +#else + switch (etype->type) { case MONO_TYPE_I1: case MONO_TYPE_U1: { @@ -3559,6 +3780,7 @@ emit_sri_vector_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *f default: g_assert_not_reached (); } +#endif } case SN_op_Addition: case SN_op_BitwiseAnd: @@ -6713,7 +6935,7 @@ mono_emit_common_intrinsics (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSi * for function arguments. When using SIMD intrinsics arguments optimized into OP_ARG needs to be decomposed * into correspondig SIMD LOADX/STOREX instructions. */ -#if defined(TARGET_WIN32) && defined(TARGET_AMD64) +#if (defined(TARGET_WIN32) && defined(TARGET_AMD64)) || defined(TARGET_S390X) static gboolean decompose_vtype_opt_uses_simd_intrinsics (MonoCompile *cfg, MonoInst *ins) {