diff --git a/src/mono/mono/arch/s390x/s390x-codegen.h b/src/mono/mono/arch/s390x/s390x-codegen.h index e779f862237f4e..ec9562a0e04744 100644 --- a/src/mono/mono/arch/s390x/s390x-codegen.h +++ b/src/mono/mono/arch/s390x/s390x-codegen.h @@ -896,11 +896,18 @@ typedef struct { #define S390_RIE_4(c,opc,g1,i2,m3) do \ { \ - s390_emit16(c, ((opc & 0xff00) | (g1) << 4); \ + s390_emit16(c, ((opc & 0xff00) | (g1) << 4)); \ s390_emit16(c, (i2)); \ s390_emit16(c, ((m3) << 12 | (opc & 0xff))); \ } while (0) +#define S390_RIE_6(c,opc,g1,g2,i3,i4,i5) do \ +{ \ + s390_emit16(c, ((opc & 0xff00) | ((g1) << 4) | g2)); \ + s390_emit16(c, ((i3) << 8) | i4); \ + s390_emit16(c, ((i5) << 8 | (opc & 0xff))); \ +} while (0) + #define S390_RIL_1(c,opc,g1,m2) do \ { \ s390_emit16(c, ((opc >> 4) << 8 | (g1) << 4 | (opc & 0xf))); \ @@ -918,17 +925,24 @@ typedef struct { s390_emit16(c, ((opc, & 0xff00) | (r1) << 4) | (r2)); \ s390_emit16(c, ((b) << 12) | (d)); \ s390_emit16(c, ((i) << 4) | ((opc) & 0xff)); \ -} +} while (0) #define S390_RRS(c,opc,r1,r2,m3,b,d) do \ { \ s390_emit16(c, ((opc, & 0xff00) | (r1) << 4) | (r2)); \ s390_emit16(c, ((b) << 12) | (d)); \ s390_emit16(c, ((m3) << 12) | ((opc) & 0xff)); \ -} +} while (0) #define S390_SI(c,opc,s1,p1,m2) s390_emit32(c, (opc << 24 | (m2) << 16 | (s1) << 12 | ((p1) & 0xfff))); +#define S390_SIL(c, opc, b, d, i) do \ +{ \ + s390_emit16(c, opc); \ + s390_emit16(c, ((b) << 12) | ((d) & 0x0fff)); \ + s390_emit16(c, ((i) & 0xffff)); \ +} while (0) + #define S390_SIY(c,opc,s1,p1,m2) do \ { \ s390_emit16(c, ((opc & 0xff00) | m2)); \ @@ -1242,7 +1256,7 @@ typedef struct { #define s390_clgij(c, r, i, b) S390_RIE_3(c, 0xec7d, r, i, m, d) #define s390_clgr(c, r1, r2) S390_RRE(c, 0xb921, r1, r2) #define s390_clgdbr(c, r1, m3, r2, m4) S390_RRF_4(c, 0xb3ad, r1, m3, r2, m4) -#define s390_clgebr(c, r1, m3, r2, m4) S390_RRF_4(c, 0xb39c, r1, m3, r2, m4) +#define s390_clgebr(c, r1, m3, r2, m4) S390_RRF_4(c, 0xb3ac, r1, m3, r2, m4) #define s390_clgrj(c, r1, r2, m, v) S390_RIE_2(c, 0xec65, r1, r2, m, v) #define s390_clgrb(c, r1, r2, m3, b, d) S390_RRS(c, 0xece5, r1, r2, m3, b, d) #define s390_cli(c, b, d, v) S390_SI(c, 0x95, b, d, v) @@ -1257,6 +1271,7 @@ typedef struct { #define s390_crl(c, r, v) S390_RIL_1(c, 0xc6d, r, v) #define s390_crt(c, r1, r2, m3) S390_RRF_2(c, 0xb972, r1, r2, m3); #define s390_cgrt(c, r1, r2, m3) S390_RRF_2(c, 0xb960, r1, r2, m3); +#define s390_cpsdr(c, r1, r2, r3) S390_RRF_2(c, 0xb372, r1, r2, r3); #define s390_cs(c, r1, r2, b, d) S390_RX(c, 0xba, r1, r2, b, d) #define s390_csg(c, r1, r2, b, d) S390_RSY_1(c, 0xeb30, r1, r2, b, d) #define s390_csst(c, d1, b1, d2, b2, r) S390_SSF(c, 0xc82, b1, d1, b2, d2, r) @@ -1272,6 +1287,9 @@ typedef struct { #define s390_dsgfr(c, r1, r2) S390_RRE(c, 0xb91d, r1, r2) #define s390_dsgr(c, r1, r2) S390_RRE(c, 0xb90d, r1, r2) #define s390_ear(c, r1, r2) S390_RRE(c, 0xb24f, r1, r2) +#define s390_fidbra(c, r1, m3, r2, m4) S390_RRF_4(c, 0xb35f, r1, m3, r2, m4) +#define s390_fiebra(c, r1, m3, r2, m4) S390_RRF_4(c, 0xb357, r1, m3, r2, m4) +#define s390_flogr(c, r1, r2) S390_RRE(c, 0xb983, r1, r2) #define s390_ic(c, r, x, b, d) S390_RX(c, 0x43, r, x, b, d) #define s390_icm(c, r, m, b, d) S390_RX(c, 0xbf, r, m, b, d) #define s390_icmy(c, r, x, b, d) S390_RXY(c, 0xeb81, r, x, b, d) @@ -1388,6 +1406,19 @@ typedef struct { #define s390_lnebr(c, r1, r2) S390_RRE(c, 0xb301, r1, r2) #define s390_lngr(c, r1, r2) S390_RRE(c, 0xb901, r1, r2) #define s390_lnr(c, r1, r2) S390_RR(c, 0x11, r1, r2) +#define s390_loc(c, r, m, b, d) S390_RSY_2(c, 0xebf2, r, m, b, d) +#define s390_locg(c, r, m, b, d) S390_RSY_2(c, 0xebe2, r, m, b, d) +#define s390_locr(c, r1, m, r2) S390_RRF_2(c, 0xb9f2, r1, m, r2) +#define s390_locgr(c, r1, m, r2) S390_RRF_2(c, 0xb9e2, r1, m, r2) +#define s390_locfh(c, r, n, b, d) S390_RRF_2(c, 0xebe0, r, m, b, d) +#define s390_locfhr(c, r1, m, r2) S390_RRF_2(c, 0xb9e0, r1, m, r2) +#define s390_lpdbr(c, r1, r2) S390_RRE(c, 0xb310, r1, r2) +#define s390_lpebr(c, r1, r2) S390_RRE(c, 0xb300, r1, r2) +#define s390_lpgr(c, r1, r2) S390_RRE(c, 0xb900, r1, r2) +#define s390_lpr(c, r1, r2) S390_RR(c, 0x10, r1, r2) +#define s390_lr(c, r1, r2) S390_RR(c, 0x18, r1, r2) +#define s390_lrl(c, r1, d) S390_RIL_1(c, 0xc4d, r1, d) +#define s390_lt(c, r, x, b, d) S390_RXY(c, 0xe312, r, x, b, d) #define s390_lpdbr(c, r1, r2) S390_RRE(c, 0xb310, r1, r2) #define s390_lpgr(c, r1, r2) S390_RRE(c, 0xb900, r1, r2) #define s390_lpr(c, r1, r2) S390_RR(c, 0x10, r1, r2) @@ -1419,6 +1450,7 @@ typedef struct { #define s390_msr(c, r1, r2) S390_RRE(c, 0xb252, r1, r2) #define s390_msrkc(c, r1, r2, r3) S390_RRF_1(c, 0xb9fd, r1, r2, r3) #define s390_mvc(c, l, b1, d1, b2, d2) S390_SS_1(c, 0xd2, l, b1, d1, b2, d2) +#define s390_mvghi(c, b1, d1, i2) S390_SIL(c, 0xe548, b1, d1, i2) #define s390_mvcl(c, r1, r2) S390_RR(c, 0x0e, r1, r2) #define s390_mvcle(c, r1, r3, d2, b2) S390_RS_1(c, 0xa8, r1, r3, d2, b2) #define s390_mvi(c, b, d, v) S390_SI(c, 0x92, b, d, v) @@ -1439,7 +1471,7 @@ typedef struct { #define s390_mem(c) S390_RR(c, 0x07, 0xe, 0) #define s390_nr(c, r1, r2) S390_RR(c, 0x14, r1, r2) #define s390_nrk(c, r1, r2) S390_RRF_1(c, 0xb9f4, r1, r2) -#define s390_ny(c, r, x, b, d) S390_RRY(c, 0xe354, r1, r2) +#define s390_ny(c, r, x, b, d) S390_RXY(c, 0xe354, r, x, b, d) #define s390_o(c, r, x, b, d) S390_RX(c, 0x56, r, x, b, d) #define s390_oihf(c, r, v) S390_RIL_1(c, 0xc0c, r, v) #define s390_oihh(c, r, v) S390_RI(c, 0xa58, r, v) @@ -1452,6 +1484,9 @@ typedef struct { #define s390_ogr(c, r1, r2) S390_RRE(c, 0xb981, r1, r2) #define s390_ogrk(c, r1, r2, r3) S390_RRF_1(c, 0xb9e6, r1, r2, r3) #define s390_or(c, r1, r2) S390_RR(c, 0x16, r1, r2) +#define s390_oy(c, r, x, b, d) S390_RXY(c, 0xe356, r, x, b, d) +#define s390_popcnt(c, r1, m, r2) S390_RRF_2(c, 0xb9e1, r1, m, r2) +#define s390_risbg(c,r1,r2,i3,i4,i5) S390_RIE_6(c, 0xec55, r1, r2, i3, i4, i5) #define s390_s(c, r, x, b, d) S390_RX(c, 0x5b, r, x, b, d) #define s390_sdb(c, r, x, b, d) S390_RXE(c, 0xed1b, r, x, b, d) #define s390_sdbr(c, r1, r2) S390_RRE(c, 0xb31b, r1, r2) diff --git a/src/mono/mono/mini/cpu-s390x.md b/src/mono/mono/mini/cpu-s390x.md index 2af9ffc1a0ed9c..2c896b90bdea96 100644 --- a/src/mono/mono/mini/cpu-s390x.md +++ b/src/mono/mono/mini/cpu-s390x.md @@ -147,11 +147,13 @@ r4_conv_to_i4: dest:i src1:f len:16 r4_conv_to_u4: dest:i src1:f len:32 r4_conv_to_i8: dest:i src1:f len:32 r4_conv_to_r8: dest:f src1:f len:17 +r4_conv_to_u8: dest:i src1:f len:17 r4_conv_to_r4: dest:f src1:f len:17 r4_add: dest:f src1:f src2:f clob:1 len:5 r4_sub: dest:f src1:f src2:f clob:1 len:5 r4_mul: dest:f src1:f src2:f clob:1 len:5 r4_div: dest:f src1:f src2:f clob:1 len:5 +r4_rem: dest:f src1:f src2:f clob:1 len:12 r4_neg: dest:f src1:f clob:1 len:23 r4_ceq: dest:i src1:f src2:f len:35 r4_cgt: dest:i src1:f src2:f len:35 @@ -187,11 +189,13 @@ loadu1_membase: dest:i src1:b len:30 loadu2_membase: dest:i src1:b len:30 loadu4_mem: dest:i len:8 loadu4_membase: dest:i src1:b len:30 -localloc: dest:i src1:i len:110 +localloc: dest:i src1:i len:180 memory_barrier: len:10 move: dest:i src1:i len:4 mul_imm: dest:i src1:i len:24 nop: len:4 +popcnt32: dest:i src1:i len:38 +popcnt64: dest:i src1:i len:34 relaxed_nop: len:4 arglist: src1:i len:28 bigmul: len:2 dest:i src1:a src2:i @@ -218,7 +222,18 @@ zext_i4: dest:i src1:i len:4 shl_imm: dest:i src1:i len:10 shr_imm: dest:i src1:i len:10 shr_un_imm: dest:i src1:i len:10 +abs: dest:f src1:f len:4 +absf: dest:f src1:f len:4 +ceil: dest:f src1:f len:4 +ceilf: dest:f src1:f len:4 +floor: dest:f src1:f len:4 +floorf: dest:f src1:f len:4 +round: dest:f src1:f len:4 sqrt: dest:f src1:f len:4 +sqrtf: dest:f src1:f len:4 +trunc: dest:f src1:f len:4 +truncf: dest:f src1:f len:4 +fcopysign: dest:f src1:f src2:f len:4 start_handler: len:26 store_membase_imm: dest:b len:46 store_membase_reg: dest:b src1:i len:26 diff --git a/src/mono/mono/mini/mini-s390x.c b/src/mono/mono/mini/mini-s390x.c index bc4e9b24d114c6..4f0a400673a10a 100644 --- a/src/mono/mono/mini/mini-s390x.c +++ b/src/mono/mono/mini/mini-s390x.c @@ -406,6 +406,9 @@ static const char *typeParm[] = { "General", "Base", "FPR8", "FPR4", "StructByVa /*====================== End of Global Variables ===================*/ +static GENERATE_TRY_GET_CLASS_WITH_CACHE (math, "System", "Math") +static GENERATE_TRY_GET_CLASS_WITH_CACHE (mathf, "System", "MathF") + /** * * @brief Return general register name @@ -834,7 +837,7 @@ mono_arch_cpu_optimizations (guint32 *exclude_mask) /* * No s390-specific optimizations yet */ - *exclude_mask = MONO_OPT_LINEARS; + *exclude_mask = 0; return opts; } @@ -1141,8 +1144,6 @@ get_call_info (MonoMemPool *mp, MonoMethodSignature *sig) cinfo->struct_ret = 1; cinfo->ret.size = size; cinfo->ret.vtsize = size; - // cinfo->ret.reg = s390_r2; - // sz->code_size += 4; } break; case MONO_TYPE_VOID: @@ -1361,23 +1362,14 @@ get_call_info (MonoMemPool *mp, MonoMethodSignature *sig) add_general (&gr, sz, &cinfo->sigCookie); } - /*----------------------------------------------------------*/ - /* If we are passing a structure back then if it won't be */ - /* in a register(s) then we make room at the end of the */ - /* parameters that may have been placed on the stack */ - /*----------------------------------------------------------*/ + /* + * If we are passing a structure back then we make room at + * the end of the parameters that may have been placed on + * the stack + */ if (cinfo->struct_ret) { cinfo->ret.offset = sz->stack_size; - switch (cinfo->ret.size) { - case 0: - case 1: - case 2: - case 4: - case 8: - break; - default: - sz->stack_size += S390_ALIGN(cinfo->ret.size, align); - } + sz->stack_size += S390_ALIGN(cinfo->ret.size, align); } cinfo->lastgr = gr; @@ -1436,7 +1428,9 @@ mono_arch_allocate_vars (MonoCompile *cfg) sig = mono_method_signature_internal (cfg->method); - cinfo = get_call_info (cfg->mempool, sig); + if (!cfg->arch.cinfo) + cfg->arch.cinfo = get_call_info (cfg->mempool, sig); + cinfo = cfg->arch.cinfo; /*--------------------------------------------------------------*/ /* local vars are at a positive offset from the stack pointer */ @@ -1663,8 +1657,13 @@ void mono_arch_create_vars (MonoCompile *cfg) { MonoMethodSignature *sig = mono_method_signature_internal (cfg->method); + CallInfo *cinfo; - if (MONO_TYPE_ISSTRUCT (sig->ret)) { + if (!cfg->arch.cinfo) + cfg->arch.cinfo = get_call_info (cfg->mempool, sig); + cinfo = cfg->arch.cinfo; + + if (cinfo->struct_ret) { cfg->vret_addr = mono_compile_create_var (cfg, mono_get_int_type (), OP_ARG); if (G_UNLIKELY (cfg->verbose_level > 1)) { printf ("vret_addr = "); @@ -3446,6 +3445,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) EMIT_COND_SYSTEM_EXCEPTION (S390_CC_LT, "OverflowException"); s390_lgfr (code, ins->dreg, ins->sreg1); break; + case OP_RCONV_TO_R4: + if (ins->dreg != ins->sreg1) + s390_ler (code, ins->dreg, ins->sreg1); + break; + case OP_RCONV_TO_R8: + s390_ldebr (code, ins->dreg, ins->sreg1); + break; case OP_FMOVE: if (ins->dreg != ins->sreg1) { s390_ldr (code, ins->dreg, ins->sreg1); @@ -3476,6 +3482,8 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_S390_SETF4RET: if (!cfg->r4fp) s390_ledbr (code, ins->dreg, ins->sreg1); + else + s390_ldr (code, ins->dreg, ins->sreg1); break; case OP_TLS_GET: { if (s390_is_imm16 (ins->inst_offset)) { @@ -3513,7 +3521,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_TAILCALL : case OP_TAILCALL_REG : case OP_TAILCALL_MEMBASE : { - MonoCallInst *call = (MonoCallInst *) ins; + call = (MonoCallInst *) ins; /* * Restore SP to caller's SP @@ -3592,55 +3600,72 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } break; case OP_FCALL: { - MonoCallInst *call = (MonoCallInst *) ins; + call = (MonoCallInst *) ins; const MonoJumpInfoTarget patch = mono_call_to_patch (call); code = emit_call (cfg, code, patch.type, patch.target); if (!cfg->r4fp && call->signature->ret->type == MONO_TYPE_R4) s390_ldebr (code, s390_f0, s390_f0); } break; + case OP_RCALL: { + call = (MonoCallInst *) ins; + const MonoJumpInfoTarget patch = mono_call_to_patch (call); + code = emit_call (cfg, code, patch.type, patch.target); + if (ins->dreg != s390_f0) + s390_ldr (code, ins->dreg, s390_f0); + break; + } case OP_LCALL: case OP_VCALL: case OP_VCALL2: case OP_VOIDCALL: - case OP_RCALL: case OP_CALL: { - MonoCallInst *call = (MonoCallInst *) ins; + call = (MonoCallInst *) ins; const MonoJumpInfoTarget patch = mono_call_to_patch (call); code = emit_call (cfg, code, patch.type, patch.target); } break; - case OP_FCALL_REG: { + case OP_FCALL_REG: call = (MonoCallInst*)ins; s390_lgr (code, s390_r1, ins->sreg1); s390_basr (code, s390_r14, s390_r1); if (!cfg->r4fp && call->signature->ret->type == MONO_TYPE_R4) s390_ldebr (code, s390_f0, s390_f0); - } break; + case OP_RCALL_REG: + call = (MonoCallInst*)ins; + s390_lgr (code, s390_r1, ins->sreg1); + s390_basr (code, s390_r14, s390_r1); + if (ins->dreg != s390_f0) + s390_ldr (code, ins->dreg, s390_f0); + break; case OP_LCALL_REG: case OP_VCALL_REG: case OP_VCALL2_REG: case OP_VOIDCALL_REG: - case OP_RCALL_REG: case OP_CALL_REG: { s390_lgr (code, s390_r1, ins->sreg1); s390_basr (code, s390_r14, s390_r1); } break; - case OP_FCALL_MEMBASE: { + case OP_FCALL_MEMBASE: call = (MonoCallInst*)ins; s390_lg (code, s390_r1, 0, ins->sreg1, ins->inst_offset); s390_basr (code, s390_r14, s390_r1); if (!cfg->r4fp && call->signature->ret->type == MONO_TYPE_R4) s390_ldebr (code, s390_f0, s390_f0); - } + break; + case OP_RCALL_MEMBASE: + call = (MonoCallInst*)ins; + s390_lg (code, s390_r1, 0, ins->sreg1, ins->inst_offset); + s390_basr (code, s390_r14, s390_r1); + if (ins->dreg != s390_f0) + s390_ldr (code, ins->dreg, s390_f0); break; case OP_LCALL_MEMBASE: case OP_VCALL_MEMBASE: case OP_VCALL2_MEMBASE: case OP_VOIDCALL_MEMBASE: - case OP_RCALL_MEMBASE: case OP_CALL_MEMBASE: { s390_lg (code, s390_r1, 0, ins->sreg1, ins->inst_offset); s390_basr (code, s390_r14, s390_r1); @@ -3656,53 +3681,99 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) area_offset = S390_ALIGN(area_offset, S390_STACK_ALIGNMENT); - /* - * Get alloc size and round to doubleword - */ - s390_lgr (code, s390_r1, ins->sreg1); - s390_aghi (code, s390_r1, 14); + /* Get current backchain pointer */ + s390_lg (code, s390_r13, 0, STK_BASE, 0); + + /* + * Round object size to doubleword + */ + s390_lgr (code, s390_r1, ins->sreg1); + s390_aghi (code, s390_r1, 7); s390_srlg (code, s390_r1, s390_r1, 0, 3); s390_sllg (code, s390_r1, s390_r1, 0, 3); - /* - * If we need to initialize then hold on to the length - */ - if (ins->flags & MONO_INST_INIT) - s390_lgr (code, s390_r0, s390_r1); + if (mono_hwcap_s390x_has_gie) { + if (ins->flags & MONO_INST_INIT) + s390_lgr (code, s390_r0, s390_r1); + + s390_lgr (code, s390_r0, s390_r1); + s390_risbg (code, s390_r2, s390_r1, 0, 0xb3, 0); + s390_sgrk (code, s390_r2, STK_BASE, s390_r2); + + s390_cgr (code, STK_BASE, s390_r2); /* L0: */ + s390_je (code, 9); /* je L1 */ + s390_aghi (code, STK_BASE, -4096); + s390_mvghi (code, s390_r15, 0, 0); + s390_j (code, -9); /* j L0 */ + + s390_risbg (code, s390_r2, s390_r1, 0x34, 0xbf, 0); /* L1: */ + s390_ltgr (code, s390_r2, s390_r2); + s390_jz (code, 13); /* jz L2: */ + + s390_sgr (code, STK_BASE, s390_r2); + s390_risbg (code, s390_r1, s390_r1, 0x34, 0xbf, 0); + s390_lay (code, s390_r1, s390_r1, STK_BASE, -8); + s390_mvghi (code, s390_r1, 0, 0); + /* L2: */ + } else { + s390_lgr (code, s390_r2, s390_r1); + s390_nill (code, s390_r2, 0xf000); + s390_lgr (code, s390_r0, STK_BASE); + s390_sgr (code, s390_r0, s390_r2); + s390_lgr (code, s390_r2, s390_r0); + + s390_cgr (code, STK_BASE, s390_r2); /* L0: */ + s390_je (code, 11); /* je L1 */ + s390_aghi (code, STK_BASE, -4096); + s390_lghi (code, s390_r0, 0); + s390_stg (code, s390_r0, 0, STK_BASE, 4088); + s390_j (code, -11); /* j L0 */ + + s390_lghi (code, s390_r2, 4095); /* L1: */ + s390_ngr (code, s390_r2, s390_r1); + s390_ltgr (code, s390_r2, s390_r2); + s390_jz (code, 7); /* jz L2 */ + + s390_sgr (code, STK_BASE, s390_r2); + s390_stg (code, s390_r2, s390_r1, STK_BASE, -8); + /* L2: */ + if (ins->flags & MONO_INST_INIT) + s390_lgr (code, s390_r0, s390_r1); + } - /* - * Adjust the stack pointer and save the backchain - */ - s390_lg (code, s390_r13, 0, STK_BASE, 0); - s390_sgr (code, STK_BASE, s390_r1); - s390_stg (code, s390_r13, 0, STK_BASE, 0); + /* + * Compute address of localloc'd object + */ + s390_lgr (code, s390_r1, STK_BASE); + if (s390_is_imm16(area_offset)) + s390_aghi (code, s390_r1, area_offset); + else + s390_agfi (code, s390_r1, area_offset); + s390_aghi (code, s390_r1, 7); + s390_srlg (code, s390_r1, s390_r1, 0, 3); + s390_sllg (code, s390_r1, s390_r1, 0, 3); + s390_lgr (code, ins->dreg, s390_r1); - /* - * Skip the stack save requirements and point to localloc area - * and ensure it's correctly aligned - */ - s390_la (code, ins->dreg, 0, STK_BASE, area_offset); - s390_aghi (code, ins->dreg, 7); - s390_srlg (code, ins->dreg, ins->dreg, 0, 3); - s390_sllg (code, ins->dreg, ins->dreg, 0, 3); + /* Save backchain pointer */ + s390_stg (code, s390_r13, 0, STK_BASE, 0); /* * If we need to zero the area then clear from localloc start * using the length we saved earlier - */ - if (ins->flags & MONO_INST_INIT) { - s390_lgr (code, s390_r1, s390_r0); - s390_lgr (code, s390_r0, ins->dreg); - s390_lgr (code, s390_r14, s390_r12); - s390_lghi (code, s390_r13, 0); - s390_mvcle(code, s390_r0, s390_r12, 0, 0); - s390_jo (code, -2); - s390_lgr (code, s390_r12, s390_r14); - } - - /* - * If we have an LMF then we have to adjust its BP */ + if (ins->flags & MONO_INST_INIT) { + s390_lgr (code, s390_r1, s390_r0); + s390_lgr (code, s390_r0, ins->dreg); + s390_lgr (code, s390_r14, s390_r12); + s390_lghi (code, s390_r13, 0); + s390_mvcle(code, s390_r0, s390_r12, 0, 0); + s390_jo (code, -2); + s390_lgr (code, s390_r12, s390_r14); + } + + /* + * If we have an LMF then we have to adjust its BP + */ if (cfg->method->save_lmf) { int lmfOffset = cfg->stack_usage - sizeof(MonoLMF); @@ -3714,7 +3785,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) S390_SET (code, s390_r13, lmfOffset); } s390_stg (code, s390_r15, s390_r13, cfg->frame_reg, - MONO_STRUCT_OFFSET(MonoLMF, ebp)); + MONO_STRUCT_OFFSET(MonoLMF, ebp)); } } break; @@ -4085,9 +4156,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } else { S390_SET (code, s390_r13, ins->inst_p0); s390_le (code, ins->dreg, 0, s390_r13, 0); - if (!cfg->r4fp) { + if (!cfg->r4fp) s390_ldebr (code, ins->dreg, ins->dreg); - } + else + s390_le (code, ins->dreg, 0, s390_r13, 0); } } break; @@ -4135,7 +4207,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; case OP_LCONV_TO_R_UN: { if (mono_hwcap_s390x_has_fpe) { - s390_cdlgbr (code, ins->dreg, 5, ins->sreg1, 0); + s390_cdlgbr (code, ins->dreg, 6, ins->sreg1, 0); } else { short int *jump; s390_lgdr (code, s390_r0, s390_r15); @@ -4214,7 +4286,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_FCONV_TO_U4: case OP_FCONV_TO_U: if (mono_hwcap_s390x_has_fpe) { - s390_clfdbr (code, ins->dreg, 5, ins->sreg1, 0); + s390_clgdbr (code, ins->dreg, 5, ins->sreg1, 0); } else { code = emit_double_to_int (cfg, code, ins->dreg, ins->sreg1, 4, FALSE); } @@ -4265,11 +4337,11 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) break; case OP_RCONV_TO_I4: case OP_RCONV_TO_I: - s390_cfebr (code, ins->dreg, 5, ins->sreg1); + s390_cgebr (code, ins->dreg, 5, ins->sreg1); break; case OP_RCONV_TO_U4: if (mono_hwcap_s390x_has_fpe) { - s390_clfebr (code, ins->dreg, 5, ins->sreg1, 0); + s390_clgebr (code, ins->dreg, 5, ins->sreg1, 0); } else { code = emit_float_to_int (cfg, code, ins->dreg, ins->sreg1, 4, FALSE); } @@ -4308,14 +4380,42 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) PTRSLOT(code, o[4]); } break; - case OP_ABS: { + case OP_ABS: s390_lpdbr (code, ins->dreg, ins->sreg1); - } break; - case OP_SQRT: { + case OP_ABSF: + s390_lpebr (code, ins->dreg, ins->sreg1); + break; + case OP_CEIL: + s390_fidbra (code, ins->dreg, 6, ins->sreg1, 4); + break; + case OP_CEILF: + s390_fiebra (code, ins->dreg, 6, ins->sreg1, 4); + break; + case OP_FLOOR: + s390_fidbra (code, ins->dreg, 7, ins->sreg1, 4); + break; + case OP_FLOORF: + s390_fiebra (code, ins->dreg, 7, ins->sreg1, 4); + break; + case OP_FCOPYSIGN: + s390_cpsdr (code, ins->dreg, ins->sreg2, ins->sreg1); + break; + case OP_ROUND: + s390_fidbra (code, ins->dreg, 4, ins->sreg1, 4); + break; + case OP_SQRT: s390_sqdbr (code, ins->dreg, ins->sreg1); - } break; + case OP_SQRTF: + s390_sqebr (code, ins->dreg, ins->sreg1); + break; + case OP_TRUNC: + s390_fidbra (code, ins->dreg, 5, ins->sreg1, 4); + break; + case OP_TRUNCF: + s390_fiebra (code, ins->dreg, 5, ins->sreg1, 4); + break; case OP_FADD: { CHECK_SRCDST_COM_F; s390_adbr (code, ins->dreg, src2); @@ -4433,56 +4533,80 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } break; case OP_RCEQ: { - s390_cdbr (code, ins->sreg1, ins->sreg2); + if (cfg->r4fp) + s390_cebr (code, ins->sreg1, ins->sreg2); + else + s390_cdbr (code, ins->sreg1, ins->sreg2); s390_lghi (code, ins->dreg, 1); s390_je (code, 4); s390_lghi (code, ins->dreg, 0); } break; case OP_RCLT: { - s390_cdbr (code, ins->sreg1, ins->sreg2); + if (cfg->r4fp) + s390_cebr (code, ins->sreg1, ins->sreg2); + else + s390_cdbr (code, ins->sreg1, ins->sreg2); s390_lghi (code, ins->dreg, 1); s390_jl (code, 4); s390_lghi (code, ins->dreg, 0); } break; case OP_RCLT_UN: { - s390_cdbr (code, ins->sreg1, ins->sreg2); + if (cfg->r4fp) + s390_cebr (code, ins->sreg1, ins->sreg2); + else + s390_cdbr (code, ins->sreg1, ins->sreg2); s390_lghi (code, ins->dreg, 1); s390_jlo (code, 4); s390_lghi (code, ins->dreg, 0); } break; case OP_RCGT: { - s390_cdbr (code, ins->sreg1, ins->sreg2); + if (cfg->r4fp) + s390_cebr (code, ins->sreg1, ins->sreg2); + else + s390_cdbr (code, ins->sreg1, ins->sreg2); s390_lghi (code, ins->dreg, 1); s390_jh (code, 4); s390_lghi (code, ins->dreg, 0); } break; case OP_RCGT_UN: { - s390_cdbr (code, ins->sreg1, ins->sreg2); + if (cfg->r4fp) + s390_cebr (code, ins->sreg1, ins->sreg2); + else + s390_cdbr (code, ins->sreg1, ins->sreg2); s390_lghi (code, ins->dreg, 1); s390_jho (code, 4); s390_lghi (code, ins->dreg, 0); } break; case OP_RCNEQ: { - s390_cdbr (code, ins->sreg1, ins->sreg2); + if (cfg->r4fp) + s390_cebr (code, ins->sreg1, ins->sreg2); + else + s390_cdbr (code, ins->sreg1, ins->sreg2); s390_lghi (code, ins->dreg, 1); s390_jne (code, 4); s390_lghi (code, ins->dreg, 0); } break; case OP_RCGE: { - s390_cdbr (code, ins->sreg1, ins->sreg2); + if (cfg->r4fp) + s390_cebr (code, ins->sreg1, ins->sreg2); + else + s390_cdbr (code, ins->sreg1, ins->sreg2); s390_lghi (code, ins->dreg, 1); s390_jhe (code, 4); s390_lghi (code, ins->dreg, 0); } break; case OP_RCLE: { - s390_cebr (code, ins->sreg1, ins->sreg2); + if (cfg->r4fp) + s390_cebr (code, ins->sreg1, ins->sreg2); + else + s390_cdbr (code, ins->sreg1, ins->sreg2); s390_lghi (code, ins->dreg, 1); s390_jle (code, 4); s390_lghi (code, ins->dreg, 0); @@ -4655,6 +4779,32 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_MEMORY_BARRIER: s390_mem (code); break; + case OP_POPCNT32: + s390_llgfr (code, s390_r1, ins->sreg1); + if (mono_hwcap_s390x_has_mie3) { + s390_popcnt (code, ins->dreg, 0x80, s390_r1); + } else { + s390_popcnt (code, s390_r0, 0, s390_r1); + s390_ahhlr (code, s390_r0, s390_r0, s390_r0); + s390_sllg (code, s390_r1, s390_r0, 0, 16); + s390_algr (code, s390_r0, s390_r1); + s390_sllg (code, s390_r1, s390_r0, 0, 8); + s390_algr (code, s390_r0, s390_r1); + s390_srlg (code, ins->dreg, s390_r0, 0, 56); + } + break; + case OP_POPCNT64: + if (mono_hwcap_s390x_has_mie3) { + s390_popcnt (code, ins->dreg, 0x80, ins->sreg1); + } else { + s390_ahhlr (code, s390_r0, s390_r0, s390_r0); + s390_sllg (code, s390_r1, s390_r0, 0, 16); + s390_algr (code, s390_r0, s390_r1); + s390_sllg (code, s390_r1, s390_r0, 0, 8); + s390_algr (code, s390_r0, s390_r1); + s390_srlg (code, ins->dreg, s390_r0, 0, 56); + } + break; case OP_LIVERANGE_START: { if (cfg->verbose_level > 1) printf ("R%d START=0x%x\n", MONO_VARINFO (cfg, ins->inst_c0)->vreg, (int)(code - cfg->native_code)); @@ -5131,17 +5281,19 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) } break; case OP_EXTRACT_I1: + case OP_EXTRACT_U1: amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4); if (ins->inst_c0) amd64_shift_reg_imm (code, X86_SHR, ins->dreg, ins->inst_c0 * 8); - amd64_widen_reg (code, ins->dreg, ins->dreg, ins->inst_c1 == MONO_TYPE_I1, FALSE); + amd64_widen_reg (code, ins->dreg, ins->dreg, ins->inst_c1 == OP_EXTRACT_I1, FALSE); break; case OP_EXTRACT_I2: + case OP_EXTRACT_U2: /*amd64_movd_reg_xreg_size (code, ins->dreg, ins->sreg1, 4); if (ins->inst_c0) amd64_shift_reg_imm_size (code, X86_SHR, ins->dreg, 16, 4);*/ s390x_pextrw_imm (code, ins->dreg, ins->sreg1, ins->inst_c0); - amd64_widen_reg_size (code, ins->dreg, ins->dreg, ins->inst_c1 == MONO_TYPE_I2, TRUE, 4); + amd64_widen_reg_size (code, ins->dreg, ins->dreg, ins->inst_c1 == OP_EXTRACT_I2, TRUE, 4); break; case OP_EXTRACT_R8: if (ins->inst_c0) @@ -5554,15 +5706,19 @@ mono_arch_emit_prolog (MonoCompile *cfg) cfg->rgctx_var->inst_offset); } -#if 0 +#if 1 +char *methodName = getenv("MONO_TRACE_METHOD"); +if (methodName != NULL) { printf("ns: %s k: %s m: %s\n",method->klass->name_space,method->klass->name,method->name);fflush(stdout); // Tests:set_ip -if ((strcmp(method->klass->name_space,"") == 0) && - (strcmp(method->klass->name,"Tests") == 0) && - (strcmp(method->name, "set_ip") == 0)) { - // (strcmp("CancellationToken,TaskCreationOptions,TaskContinuationOptions,TaskScheduler",mono_signature_get_desc(method->signature, FALSE)) != 0)) { - printf("SIGNATURE: %s\n",mono_signature_get_desc(method->signature, FALSE)); fflush(stdout); - s390_j (code, 0); +//if ((strcmp(method->klass->name_space,"") == 0) && +// (strcmp(method->klass->name,"Tests") == 0) && +// (strcmp(method->name, "set_ip") == 0)) { +// (strcmp("CancellationToken,TaskCreationOptions,TaskContinuationOptions,TaskScheduler",mono_signature_get_desc(method->signature, FALSE)) != 0)) { + if ((strcmp(method->name, methodName) == 0)) { + printf("SIGNATURE: %s\n",mono_signature_get_desc(method->signature, FALSE)); fflush(stdout); + s390_j (code, 0); + } } #endif @@ -5583,13 +5739,16 @@ if ((strcmp(method->klass->name_space,"") == 0) && sig = mono_method_signature_internal (method); pos = 0; - cinfo = get_call_info (cfg->mempool, sig); + cinfo = cfg->arch.cinfo; if (cinfo->struct_ret) { ArgInfo *ainfo = &cinfo->ret; inst = cfg->vret_addr; inst->backend.size = ainfo->vtsize; - s390_stg (code, ainfo->reg, 0, inst->inst_basereg, inst->inst_offset); + if (inst->opcode == OP_REGVAR) + s390_lgr (code, inst->dreg, ainfo->reg); + else + s390_stg (code, ainfo->reg, 0, inst->inst_basereg, inst->inst_offset); } /** @@ -6067,6 +6226,98 @@ mono_arch_emit_inst_for_method (MonoCompile *cfg, MonoMethod *cmethod, MonoMetho { MonoInst *ins = NULL; + int opcode = 0; + MonoStackType stack_type = STACK_R8; + + if (cmethod->klass == mono_class_try_get_math_class ()) { + // unary double + if (fsig->param_count == 1 && fsig->params [0]->type == MONO_TYPE_R8) { + if (strcmp (cmethod->name, "Abs") == 0) { + opcode = OP_ABS; + } else if (strcmp (cmethod->name, "Ceiling") == 0) { + opcode = OP_CEIL; + } else if (strcmp (cmethod->name, "Floor") == 0) { + opcode = OP_FLOOR; + } else if (strcmp (cmethod->name, "Round") == 0) { + opcode = OP_ROUND; + } else if (strcmp (cmethod->name, "Sqrt") == 0) { + opcode = OP_SQRT; + } else if (strcmp (cmethod->name, "Truncate") == 0) { + opcode = OP_TRUNC; + } + } + // unary float (overloaded) + else if (fsig->param_count == 1 && fsig->params [0]->type == MONO_TYPE_R4) { + if (strcmp (cmethod->name, "Abs") == 0) { + if (cfg->r4fp) { + opcode = OP_ABSF; + stack_type = STACK_R4; + } else { + opcode = OP_ABS; + } + } + } + // binary double + else if (fsig->param_count == 2 && fsig->params [0]->type == MONO_TYPE_R8 && fsig->params [1]->type == MONO_TYPE_R8) { + if (strcmp (cmethod->name, "CopySign") == 0) { + opcode = OP_FCOPYSIGN; + } + } + } else if (cmethod->klass == mono_class_try_get_mathf_class ()) { + if (fsig->param_count == 1) { + stack_type = STACK_R4; + if (strcmp (cmethod->name, "Abs") == 0) { + if (cfg->r4fp) { + opcode = OP_ABSF; + stack_type = STACK_R4; + } else { + opcode = OP_ABS; + } + } else if (strcmp (cmethod->name, "Ceiling") == 0) { + if (cfg->r4fp) { + opcode = OP_CEILF; + stack_type = STACK_R4; + } else { + opcode = OP_CEIL; + } + } else if (strcmp (cmethod->name, "Floor") == 0) { + if (cfg->r4fp) { + opcode = OP_FLOORF; + stack_type = STACK_R4; + } else { + opcode = OP_FLOOR; + } + } else if (strcmp (cmethod->name, "Sqrt") == 0) { + if (cfg->r4fp) { + opcode = OP_SQRTF; + stack_type = STACK_R4; + } else { + opcode = OP_SQRT; + } + } else if (strcmp (cmethod->name, "Truncate") == 0) { + if (cfg->r4fp) { + opcode = OP_TRUNCF; + stack_type = STACK_R4; + } else { + opcode = OP_TRUNC; + } + opcode = OP_TRUNCF; + } + } + } + + if (opcode) { + MONO_INST_NEW (cfg, ins, opcode); + ins->type = stack_type; + ins->dreg = mono_alloc_freg (cfg); + ins->sreg1 = args [0]->dreg; + if (fsig->param_count > 1) { + ins->sreg2 = args [1]->dreg; + } + g_assert (fsig->param_count <= 2); + MONO_ADD_INS (cfg->cbb, ins); + } + return ins; } diff --git a/src/mono/mono/mini/mini-s390x.h b/src/mono/mono/mini/mini-s390x.h index d7d3566a1889b6..9a1aee5721aca1 100644 --- a/src/mono/mono/mini/mini-s390x.h +++ b/src/mono/mono/mini/mini-s390x.h @@ -38,6 +38,7 @@ typedef struct MonoCompileArch { int fpSize; /** Size of floating point save area */ MonoInst *ss_tramp_var; /** Single-step variable */ MonoInst *bp_tramp_var; /** Breakpoint variable */ + CallInfo *cinfo; /** Caller information */ guint8 *thunks; /** Thunking area */ int thunks_size; /** Size of thunking area */ } MonoCompileArch; @@ -57,7 +58,6 @@ struct SeqPointInfo { }; #define MONO_ARCH_SIGSEGV_ON_ALTSTACK 1 -#define MONO_ARCH_EMULATE_LCONV_TO_R8_UN 1 #define MONO_ARCH_NO_EMULATE_LONG_MUL_OPTS 1 #define MONO_ARCH_NO_EMULATE_LONG_SHIFT_OPS 1 #define MONO_ARCH_HAVE_IS_INT_OVERFLOW 1 @@ -83,6 +83,7 @@ struct SeqPointInfo { #define MONO_ARCH_HAVE_SDB_TRAMPOLINES 1 #define MONO_ARCH_HAVE_SETUP_RESUME_FROM_SIGNAL_HANDLER_CTX 1 #define MONO_ARCH_HAVE_UNWIND_BACKTRACE 1 +#define MONO_ARCH_FLOAT32_SUPPORTED 1 #define S390_STACK_ALIGNMENT 8 #define S390_FIRST_ARG_REG s390_r2