diff --git a/hphp/runtime/base/runtime_option.h b/hphp/runtime/base/runtime_option.h index 39db391e3..f56ff6741 100644 --- a/hphp/runtime/base/runtime_option.h +++ b/hphp/runtime/base/runtime_option.h @@ -432,6 +432,7 @@ public: F(bool, HHIREnableCoalescing, true) \ F(bool, HHIREnableRefCountOpt, true) \ F(bool, HHIREnableSinking, true) \ + F(bool, HHIRAllocXMMRegs, true) \ F(bool, HHIRGenerateAsserts, debug) \ F(bool, HHIRDirectExit, true) \ F(bool, HHIRDisableTx64, true) \ diff --git a/hphp/runtime/vm/translator/abi-x64.h b/hphp/runtime/vm/translator/abi-x64.h index 1da8f5f09..40556ab6f 100644 --- a/hphp/runtime/vm/translator/abi-x64.h +++ b/hphp/runtime/vm/translator/abi-x64.h @@ -77,16 +77,56 @@ const RegSet kCallerSaved = RegSet() // r10 is reserved by the assembler, and for // various extremely-specific scratch uses. | RegSet(reg::r11) + // XMM regs + // | RegSet(reg::xmm0) Reserved for rMMXScratch0 + // | RegSet(reg::xmm1) Reserved for rMMXScratch1 + | RegSet(reg::xmm2) + | RegSet(reg::xmm3) + | RegSet(reg::xmm4) + | RegSet(reg::xmm5) + | RegSet(reg::xmm6) + | RegSet(reg::xmm7) + | RegSet(reg::xmm8) + | RegSet(reg::xmm9) + | RegSet(reg::xmm10) + | RegSet(reg::xmm11) + | RegSet(reg::xmm12) + | RegSet(reg::xmm13) + | RegSet(reg::xmm14) + | RegSet(reg::xmm15) ; const RegSet kCalleeSaved = RegSet() // r12 is reserved for rVmTl | RegSet(reg::r13) | RegSet(reg::r14) - | RegSet(reg::r15); + | RegSet(reg::r15) + ; const RegSet kAllRegs = kCallerSaved | kCalleeSaved; +const RegSet kMMXRegs = RegSet() + | RegSet(reg::xmm0) + | RegSet(reg::xmm1) + | RegSet(reg::xmm2) + | RegSet(reg::xmm3) + | RegSet(reg::xmm4) + | RegSet(reg::xmm5) + | RegSet(reg::xmm6) + | RegSet(reg::xmm7) + | RegSet(reg::xmm8) + | RegSet(reg::xmm9) + | RegSet(reg::xmm10) + | RegSet(reg::xmm11) + | RegSet(reg::xmm12) + | RegSet(reg::xmm13) + | RegSet(reg::xmm14) + | RegSet(reg::xmm15) + ; + +const RegSet kGPCallerSaved = kCallerSaved - kMMXRegs; +const RegSet kGPCalleeSaved = kCalleeSaved - kMMXRegs; + ////////////////////////////////////////////////////////////////////// /* * Registers reserved for cross-tracelet ABI purposes. @@ -276,7 +316,6 @@ inline SRFlags operator|(SRFlags a, SRFlags b) { // Set of all the x64 registers. const RegSet kAllX64Regs = RegSet(kAllRegs).add(reg::r10) | kSpecialCrossTraceRegs; -const int kNumX64Regs = 16; /* * Some data structures are accessed often enough from translated code diff --git a/hphp/runtime/vm/translator/hopt/check.cpp b/hphp/runtime/vm/translator/hopt/check.cpp index 1a9e6f95f..9ad6d3ede 100644 --- a/hphp/runtime/vm/translator/hopt/check.cpp +++ b/hphp/runtime/vm/translator/hopt/check.cpp @@ -94,7 +94,7 @@ bool checkCfg(Trace* trace, const IRFactory& factory) { } enum Limits : unsigned { - kNumRegisters = Transl::kNumX64Regs, + kNumRegisters = Transl::kNumRegs, kNumSlots = NumPreAllocatedSpillLocs }; diff --git a/hphp/runtime/vm/translator/hopt/codegen.cpp b/hphp/runtime/vm/translator/hopt/codegen.cpp index d02628b8a..997a41ca3 100644 --- a/hphp/runtime/vm/translator/hopt/codegen.cpp +++ b/hphp/runtime/vm/translator/hopt/codegen.cpp @@ -109,6 +109,18 @@ struct MoveInfo { PhysReg m_reg1, m_reg2; }; +template +static bool cycleHasMMXReg(const CycleInfo& cycle, + const int (&moves)[N]) { + int first = cycle.node; + int node = first; + do { + if (PhysReg(node).isXMM()) return true; + node = moves[node]; + } while (node != first); + return false; +} + template void doRegMoves(int (&moves)[N], int rTmp, std::vector& howTo) { @@ -184,11 +196,13 @@ pathloop: } // Deal with any cycles we encountered for (int i = 0; i < numCycles; ++i) { - if (cycles[i].length == 2) { + // can't use xchg if one of the registers is MMX + bool hasMMXReg = cycleHasMMXReg(cycles[i], moves); + if (cycles[i].length == 2 && !hasMMXReg) { int v = cycles[i].node; int w = moves[v]; howTo.push_back(MoveInfo(MoveInfo::Xchg, w, v)); - } else if (cycles[i].length == 3) { + } else if (cycles[i].length == 3 && !hasMMXReg) { int v = cycles[i].node; int w = moves[v]; howTo.push_back(MoveInfo(MoveInfo::Xchg, w, v)); @@ -481,13 +495,46 @@ Address CodeGenerator::emitSmashableFwdJcc(ConditionCode cc, Block* target, return start; } -void emitLoadImm(CodeGenerator::Asm& as, int64_t val, PhysReg dstReg) { - as.emitImmReg(val, dstReg); -} - static void emitMovRegReg(CodeGenerator::Asm& as, PhysReg srcReg, PhysReg dstReg) { - if (srcReg != dstReg) as.movq(srcReg, dstReg); + assert(srcReg != InvalidReg); + assert(dstReg != InvalidReg); + + if (srcReg == dstReg) return; + + if (srcReg.isGP()) { + if (dstReg.isGP()) { // GP => GP + as.movq(srcReg, dstReg); + } else { // GP => MMX + // This generates a movq x86 instruction, which zero extends + // the 64-bit value in srcReg into a 128-bit XMM register + as.mov_reg64_xmm(srcReg, dstReg); + } + } else { + if (dstReg.isGP()) { // MMX => GP + as.mov_xmm_reg64(srcReg, dstReg); + } else { // MMX => MMX + // This copies all 128 bits in XMM, + // thus avoiding partial register stalls + as.movdqa(srcReg, dstReg); + } + } +} + +void emitLoadImm(CodeGenerator::Asm& as, int64_t val, PhysReg dstReg) { + assert(dstReg != InvalidReg); + if (dstReg.isGP()) { + as.emitImmReg(val, dstReg); + } else { + assert(dstReg.isXMM()); + if (val == 0) { + as.pxor_xmm_xmm(dstReg, dstReg); + } else { + // Can't move immediate directly into XMM register, so use rScratch + as.emitImmReg(val, rScratch); + emitMovRegReg(as, rScratch, dstReg); + } + } } static void emitLea(CodeGenerator::Asm& as, MemoryRef mr, PhysReg dst) { @@ -499,6 +546,26 @@ static void emitLea(CodeGenerator::Asm& as, MemoryRef mr, PhysReg dst) { } } +template +static void emitLoadReg(CodeGenerator::Asm& as, Mem mem, PhysReg reg) { + assert(reg != InvalidReg); + if (reg.isGP()) { + as.loadq(mem, reg); + } else { + as.movsd(mem, reg); + } +} + +template +static void emitStoreReg(CodeGenerator::Asm& as, PhysReg reg, Mem mem) { + assert(reg != InvalidReg); + if (reg.isGP()) { + as.storeq(reg, mem); + } else { + as.movsd(reg, mem); + } +} + void shuffle2(CodeGenerator::Asm& a, PhysReg s0, PhysReg s1, PhysReg d0, PhysReg d1) { assert(s0 != s1); @@ -532,37 +599,65 @@ static void zeroExtendIfBool(X64Assembler& as, const SSATmp* src, } } -static void prepUnaryXmmOp(X64Assembler& a, const SSATmp* ssa, RegXMM xmm, - const RegisterInfo& info) { - auto reg = info.getReg(); - RegNumber src(reg); - if (reg == InvalidReg) { - src = rScratch; - assert(ssa->isConst()); - a.mov_imm64_reg(ssa->getValBits(), rScratch); - } - if (ssa->isA(Type::Int | Type::Bool)) { - // Expand non-const bools to 64-bit. - // Consts are already moved into src as 64-bit values above. - if (!ssa->isConst()) zeroExtendIfBool(a, ssa, info); - // cvtsi2sd doesn't modify the high bits of its target, which can - // cause false dependencies to prevent register renaming from kicking - // in. Break the dependency chain by zeroing out the destination reg. - a. pxor_xmm_xmm(xmm, xmm); - a. cvtsi2sd_reg64_xmm(src, xmm); - } else { - a. mov_reg64_xmm(src, xmm); - } +static int64_t convIntToDouble(int64_t i) { + union { + double d; + int64_t i; + } u; + u.d = double(i); + return u.i; } -static void prepBinaryXmmOp(X64Assembler& a, const SSATmp* left, - const SSATmp* right, const RegAllocInfo& regs) { - prepUnaryXmmOp(a, left, xmm0, regs[left]); - prepUnaryXmmOp(a, right, xmm1, regs[right]); +/* + * Returns a XMM register containing the value of SSATmp tmp, + * which can be either a bool, an int, or a double. + * If the value is already in a XMM register, simply returns it. + * Otherwise, the value is moved into rXMMScratch, which is returned. + * If instructions to convert to a double at runtime are needed, + * they're emitted in 'as'. + */ +static PhysReg prepXMMReg(const SSATmp* tmp, + X64Assembler& as, + const RegAllocInfo& allocInfo, + RegXMM rXMMScratch) { + assert(tmp->isA(Type::Bool) || tmp->isA(Type::Int) || tmp->isA(Type::Dbl)); + + PhysReg reg = allocInfo[tmp].getReg(); + + // Case 1: tmp is already in a XMM register + if (reg.isXMM()) return reg; + + // Case 2: tmp is in a GP register + if (reg != InvalidReg) { + // Case 2.a: Dbl stored in GP reg + if (tmp->isA(Type::Dbl)) { + emitMovRegReg(as, reg, rXMMScratch); + return rXMMScratch; + } + // Case 2.b: Bool or Int stored in GP reg + assert(tmp->isA(Type::Bool) || tmp->isA(Type::Int)); + zeroExtendIfBool(as, tmp, allocInfo[tmp]); + as.pxor_xmm_xmm(rXMMScratch, rXMMScratch); + as.cvtsi2sd_reg64_xmm(reg, rXMMScratch); + return rXMMScratch; + } + + // Case 3: tmp is a constant + assert(tmp->isConst()); + + int64_t val = tmp->getValRawInt(); + if (!tmp->isA(Type::Dbl)) { + assert(tmp->isA(Type::Bool | Type::Int)); + if (tmp->isA(Type::Bool)) val = val != 0; // see task #2401790 + val = convIntToDouble(val); + } + emitLoadImm(as, val, rScratch); + emitMovRegReg(as, rScratch, rXMMScratch); + return rXMMScratch; } -static void doubleCmp(X64Assembler& a, RegXMM xmm0, RegXMM xmm1) { - a. ucomisd_xmm_xmm(xmm0, xmm1); +static void doubleCmp(X64Assembler& a, RegXMM xmmReg0, RegXMM xmmReg1) { + a. ucomisd_xmm_xmm(xmmReg0, xmmReg1); Label notPF; a. jnp8(notPF); // PF means the doubles were unordered. We treat this as !equal, so @@ -590,8 +685,10 @@ void CodeGenerator::cgJcc(IRInstruction* inst) { CG_PUNT(cgJcc); } if (src1Type == Type::Dbl || src2Type == Type::Dbl) { - prepBinaryXmmOp(m_as, src1, src2, m_regs); - doubleCmp(m_as, xmm0, xmm1); + PhysReg srcReg1 = prepXMMReg(src1, m_as, m_regs, rXMMScratch0); + PhysReg srcReg2 = prepXMMReg(src2, m_as, m_regs, rXMMScratch1); + assert(srcReg1 != rXMMScratch1 && srcReg2 != rXMMScratch0); + doubleCmp(m_as, srcReg1, srcReg2); } else { if (src1Type == Type::Cls && src2Type == Type::Cls) { assert(opc == JmpSame || opc == JmpNSame); @@ -646,8 +743,8 @@ void CodeGenerator::cgJmpNSame(IRInstruction* inst) { cgJcc(inst); } typedef Transl::X64Assembler Asm; static int64_t shuffleArgs(Asm& a, ArgGroup& args) { // Compute the move/shuffle plan. - int moves[kNumX64Regs]; - ArgDesc* argDescs[kNumX64Regs]; + int moves[kNumRegs]; + ArgDesc* argDescs[kNumRegs]; memset(moves, -1, sizeof moves); memset(argDescs, 0, sizeof argDescs); for (size_t i = 0; i < args.numRegArgs(); ++i) { @@ -671,18 +768,22 @@ static int64_t shuffleArgs(Asm& a, ArgGroup& args) { for (size_t i = 0; i < howTo.size(); ++i) { if (howTo[i].m_kind == MoveInfo::Move) { if (howTo[i].m_reg2 == reg::rScratch) { - a. movq (howTo[i].m_reg1, howTo[i].m_reg2); + emitMovRegReg(a, howTo[i].m_reg1, howTo[i].m_reg2); } else { ArgDesc* argDesc = argDescs[int(howTo[i].m_reg2)]; ArgDesc::Kind kind = argDesc->getKind(); if (kind == ArgDesc::Reg || kind == ArgDesc::TypeReg) { if (argDesc->isZeroExtend()) { + assert(howTo[i].m_reg1.isGP()); + assert(howTo[i].m_reg2.isGP()); a. movzbl (rbyte(howTo[i].m_reg1), r32(howTo[i].m_reg2)); } else { - a. movq (howTo[i].m_reg1, howTo[i].m_reg2); + emitMovRegReg(a, howTo[i].m_reg1, howTo[i].m_reg2); } } else { assert(kind == ArgDesc::Addr); + assert(howTo[i].m_reg1.isGP()); + assert(howTo[i].m_reg2.isGP()); a. lea (howTo[i].m_reg1[argDesc->getImm().q()], howTo[i].m_reg2); } @@ -691,6 +792,8 @@ static int64_t shuffleArgs(Asm& a, ArgGroup& args) { } } } else { + assert(howTo[i].m_reg1.isGP()); + assert(howTo[i].m_reg2.isGP()); a. xchgq (howTo[i].m_reg1, howTo[i].m_reg2); } } @@ -702,6 +805,7 @@ static int64_t shuffleArgs(Asm& a, ArgGroup& args) { if (!args[i].done()) { ArgDesc::Kind kind = args[i].getKind(); PhysReg dst = args[i].getDstReg(); + assert(dst.isGP()); if (kind == ArgDesc::Imm) { emitLoadImm(a, args[i].getImm().q(), dst); } else if (kind == ArgDesc::TypeReg) { @@ -728,13 +832,19 @@ static int64_t shuffleArgs(Asm& a, ArgGroup& args) { a. movzbl(rbyte(srcReg), r32(rScratch)); a. push(rScratch); } else { - a. push(srcReg); + if (srcReg.isXMM()) { + emitMovRegReg(a, srcReg, rScratch); + a.push(rScratch); + } else { + a.push(srcReg); + } } break; case ArgDesc::TypeReg: static_assert(kTypeWordOffset == 4 || kTypeWordOffset == 1, "kTypeWordOffset value not supported"); + assert(srcReg.isGP()); // x86 stacks grow down, so push higher offset items first if (kTypeWordOffset == 4) { a. pushl(r32(srcReg)); @@ -1071,9 +1181,20 @@ void CodeGenerator::cgBinaryOp(IRInstruction* inst, CG_PUNT(cgBinaryOp); } if (src1->isA(Type::Dbl) || src2->isA(Type::Dbl)) { - prepBinaryXmmOp(m_as, src1, src2, m_regs); - (m_as.*fpInstr)(xmm1, xmm0); - m_as. mov_xmm_reg64(xmm0, m_regs[dst].getReg()); + PhysReg dstReg = m_regs[dst].getReg(); + PhysReg resReg = dstReg.isXMM() && dstReg != m_regs[src2].getReg() ? + dstReg : PhysReg(rXMMScratch0); + assert(resReg.isXMM()); + + PhysReg srcReg1 = prepXMMReg(src1, m_as, m_regs, resReg); + PhysReg srcReg2 = prepXMMReg(src2, m_as, m_regs, rXMMScratch1); + assert(srcReg1 != rXMMScratch1 && srcReg2 != rXMMScratch0); + + emitMovRegReg(m_as, srcReg1, resReg); + + (m_as.*fpInstr)(srcReg2, resReg); + + emitMovRegReg(m_as, resReg, dstReg); return; } cgBinaryIntOp(inst, instrIR, instrRR, movInstr, @@ -1353,8 +1474,10 @@ void CodeGenerator::cgOpCmpHelper( else if (type1 == Type::Dbl || type2 == Type::Dbl) { if ((type1 == Type::Dbl || type1 == Type::Int) && (type2 == Type::Dbl || type2 == Type::Int)) { - prepBinaryXmmOp(m_as, src1, src2, m_regs); - doubleCmp(m_as, xmm0, xmm1); + PhysReg srcReg1 = prepXMMReg(src1, m_as, m_regs, rXMMScratch0); + PhysReg srcReg2 = prepXMMReg(src2, m_as, m_regs, rXMMScratch1); + assert(srcReg1 != rXMMScratch1 && srcReg2 != rXMMScratch0); + doubleCmp(m_as, srcReg1, srcReg2); setFromFlags(); } else { CG_PUNT(cgOpCmpHelper_Dbl); @@ -1707,7 +1830,7 @@ void CodeGenerator::cgConvDblToBool(IRInstruction* inst) { m_as.mov_imm64_reg(1, dstReg); } } else { - m_as.movq(srcReg, dstReg); + emitMovRegReg(m_as, srcReg, dstReg); m_as.shlq(1, dstReg); // 0.0 stays zero and -0.0 is now 0.0 m_as.setne(rbyte(dstReg)); // lower byte becomes 1 if dstReg != 0 m_as.movzbl(rbyte(dstReg), r32(dstReg)); @@ -1736,54 +1859,36 @@ void CodeGenerator::cgConvIntToBool(IRInstruction* inst) { } } -void CodeGenerator::cgConvBoolToDbl(IRInstruction* inst) { - // cvtsi2sd doesn't modify the high bits of its target, which can - // cause false dependencies to prevent register renaming from kicking - // in. Break the dependency chain by zeroing out xmm0. - m_as.pxor_xmm_xmm(xmm0, xmm0); - SSATmp* dst = inst->getDst(); - auto dstReg = m_regs[dst].getReg(); - assert(dstReg != InvalidReg); +void CodeGenerator::emitConvBoolOrIntToDbl(IRInstruction* inst) { SSATmp* src = inst->getSrc(0); - auto srcReg = m_regs[src].getReg(); - if (srcReg == InvalidReg) { - assert(src->isConst()); + SSATmp* dst = inst->getDst(); + PhysReg dstReg = m_regs[dst].getReg(); + assert(src->isA(Type::Bool) || src->isA(Type::Int)); + assert(dstReg != InvalidReg); + if (src->isConst()) { int64_t constVal = src->getValRawInt(); - if (constVal == 0) { - m_as.xor_reg64_reg64(dstReg, dstReg); - } else { - m_as.mov_imm64_reg(1, dstReg); - } + if (src->isA(Type::Bool)) constVal = constVal != 0; // see task #2401790 + constVal = convIntToDouble(constVal); + emitLoadImm(m_as, constVal, dstReg); } else { - m_as.movzbl(rbyte(srcReg), r32(dstReg)); + // cvtsi2sd doesn't modify the high bits of its target, which can + // cause false dependencies to prevent register renaming from kicking + // in. Break the dependency chain by zeroing out the XMM reg. + PhysReg srcReg = m_regs[src].getReg(); + PhysReg xmmReg = dstReg.isXMM() ? dstReg : PhysReg(rXMMScratch0); + m_as.pxor_xmm_xmm(xmmReg, xmmReg); + m_as.cvtsi2sd_reg64_xmm(srcReg, xmmReg); + zeroExtendIfBool(m_as, src, m_regs[src]); + emitMovRegReg(m_as, xmmReg, dstReg); } - m_as.cvtsi2sd_reg64_xmm(dstReg, xmm0); - m_as.mov_xmm_reg64(xmm0, dstReg); +} + +void CodeGenerator::cgConvBoolToDbl(IRInstruction* inst) { + emitConvBoolOrIntToDbl(inst); } void CodeGenerator::cgConvIntToDbl(IRInstruction* inst) { - // cvtsi2sd doesn't modify the high bits of its target, which can - // cause false dependencies to prevent register renaming from kicking - // in. Break the dependency chain by zeroing out xmm0. - m_as.pxor_xmm_xmm(xmm0, xmm0); - SSATmp* dst = inst->getDst(); - auto dstReg = m_regs[dst].getReg(); - assert(dstReg != InvalidReg); - SSATmp* src = inst->getSrc(0); - auto srcReg = m_regs[src].getReg(); - if (srcReg == InvalidReg) { - assert(src->isConst()); - int64_t constVal = src->getValRawInt(); - if (constVal == 0) { - m_as.xor_reg64_reg64(dstReg, dstReg); - } else { - m_as.mov_imm64_reg(constVal, dstReg); - } - m_as.cvtsi2sd_reg64_xmm(dstReg, xmm0); - } else { - m_as.cvtsi2sd_reg64_xmm(srcReg, xmm0); - } - m_as.mov_xmm_reg64(xmm0, dstReg); + emitConvBoolOrIntToDbl(inst); } void CodeGenerator::cgConvBoolToInt(IRInstruction* inst) { @@ -1858,7 +1963,7 @@ void CodeGenerator::cgUnbox(IRInstruction* inst) { // srcTypeReg == KindOfRef; srcValReg is RefData* const size_t ref_tv_off = RefData::tvOffset(); if (dstValReg != srcValReg) { - m_as.loadq(srcValReg[ref_tv_off + TVOFF(m_data)], dstValReg); + emitLoadReg(m_as, srcValReg[ref_tv_off + TVOFF(m_data)], dstValReg); emitLoadTVType(m_as, srcValReg[ref_tv_off + TVOFF(m_type)], r32(dstTypeReg)); } else { @@ -1984,8 +2089,8 @@ void CodeGenerator::cgRetVal(IRInstruction* inst) { a. storeq (val->getValRawInt(), rFp[AROFF(m_r) + TVOFF(m_data)]); } else { - zeroExtendIfBool(m_as, val, m_regs[val]); - a. storeq (m_regs[val].getReg(), rFp[AROFF(m_r) + TVOFF(m_data)]); + zeroExtendIfBool(a, val, m_regs[val]); + emitStoreReg(a, m_regs[val].getReg(), rFp[AROFF(m_r) + TVOFF(m_data)]); } } @@ -2250,7 +2355,7 @@ void CodeGenerator::cgSpill(IRInstruction* inst) { // We do not need to mask booleans, since the IR will reload the spill auto srcReg = m_regs[src].getReg(locIndex); auto sinfo = m_regs[dst].getSpillInfo(locIndex); - m_as. storeq(srcReg, reg::rsp[sinfo.offset()]); + emitStoreReg(m_as, srcReg, reg::rsp[sinfo.offset()]); } } @@ -2262,7 +2367,7 @@ void CodeGenerator::cgReload(IRInstruction* inst) { for (int locIndex = 0; locIndex < src->numNeededRegs(); ++locIndex) { auto dstReg = m_regs[dst].getReg(locIndex); auto sinfo = m_regs[src].getSpillInfo(locIndex); - m_as. loadq(reg::rsp[sinfo.offset()], dstReg); + emitLoadReg(m_as, reg::rsp[sinfo.offset()], dstReg); } } @@ -3752,7 +3857,7 @@ void CodeGenerator::cgStore(PhysReg base, m_as.storeq(val, base[off + TVOFF(m_data)]); } else { zeroExtendIfBool(m_as, src, m_regs[src]); - m_as.storeq(m_regs[src].getReg(), base[off + TVOFF(m_data)]); + emitStoreReg(m_as, m_regs[src].getReg(), base[off + TVOFF(m_data)]); } } @@ -3792,7 +3897,7 @@ void CodeGenerator::cgLoad(PhysReg base, if (type == Type::Bool) { m_as.load_reg64_disp_reg32(base, off + TVOFF(m_data), dstReg); } else { - m_as.load_reg64_disp_reg64(base, off + TVOFF(m_data), dstReg); + emitLoadReg(m_as, base[off + TVOFF(m_data)], dstReg); } } diff --git a/hphp/runtime/vm/translator/hopt/codegen.h b/hphp/runtime/vm/translator/hopt/codegen.h index 78e3d34ad..ada0f1884 100644 --- a/hphp/runtime/vm/translator/hopt/codegen.h +++ b/hphp/runtime/vm/translator/hopt/codegen.h @@ -270,7 +270,6 @@ private: Address cgCheckRefCountedType(PhysReg typeReg); Address cgCheckRefCountedType(PhysReg baseReg, int64_t offset); - void cgConvPrimitiveToDbl(IRInstruction* inst); void cgDecRefStaticType(Type type, PhysReg dataReg, Block* exit, @@ -311,6 +310,7 @@ private: void emitReqBindAddr(const Func* func, TCA& dest, Offset offset); void emitAdjustSp(PhysReg spReg, PhysReg dstReg, int64_t adjustment); + void emitConvBoolOrIntToDbl(IRInstruction* inst); /* * Generate an if-block that branches around some unlikely code, handling diff --git a/hphp/runtime/vm/translator/hopt/irfactory.h b/hphp/runtime/vm/translator/hopt/irfactory.h index 9f4c09eb1..2255c9d2f 100644 --- a/hphp/runtime/vm/translator/hopt/irfactory.h +++ b/hphp/runtime/vm/translator/hopt/irfactory.h @@ -223,8 +223,11 @@ public: IRInstruction* defLabel(); IRInstruction* defLabel(unsigned numDst); template SSATmp* cns(T val) { + Type type = typeForConst(val); + // Normalize bool values to 0 or 1 + if (type.equals(Type::Bool)) val = (T)(val != 0); ConstData cdata(val); - return findConst(cdata, typeForConst(val)); + return findConst(cdata, type); } Block* defBlock(const Func* f, IRInstruction*); Block* defBlock(const Func* f) { diff --git a/hphp/runtime/vm/translator/hopt/linearscan.cpp b/hphp/runtime/vm/translator/hopt/linearscan.cpp index 928342ab0..22eda2de5 100644 --- a/hphp/runtime/vm/translator/hopt/linearscan.cpp +++ b/hphp/runtime/vm/translator/hopt/linearscan.cpp @@ -54,8 +54,15 @@ RegSet RegisterInfo::getRegs() const { return regs; } +static PhysReg::Type getRegType(const SSATmp* tmp) { + if (RuntimeOption::EvalHHIRAllocXMMRegs && tmp->isA(Type::Dbl)) { + return PhysReg::XMM; + } + return PhysReg::GP; +} + struct LinearScan : private boost::noncopyable { - static const int NumRegs = 16; + static const int NumRegs = kNumRegs; explicit LinearScan(IRFactory*); RegAllocInfo allocRegs(Trace*, LifetimeInfo*); @@ -67,7 +74,7 @@ private: public: bool isReserved() const { return m_reserved; } bool isCallerSaved() const { - return kCallerSaved.contains(PhysReg(m_regNo)); + return kCallerSaved.contains(m_reg); } bool isCalleeSaved() const { return !isCallerSaved(); } bool isAllocated() const { return m_ssaTmp != nullptr; } @@ -77,6 +84,7 @@ private: Type type = m_ssaTmp->type(); return type == Type::RetAddr; } + PhysReg::Type type() const { return m_reg.type(); } private: SSATmp* m_ssaTmp; // non-null when allocated @@ -86,7 +94,7 @@ private: // LinearScan::m_freeCalleeSaved, or LinearScan::m_allocatedRegs. // of a reserved reg is undefined. smart::list::iterator m_pos; - uint16_t m_regNo; + PhysReg m_reg; bool m_pinned; // do not free this register if pinned // We stress test register allocation by reducing the number of // free registers. @@ -112,7 +120,7 @@ private: void clear(); void add(SSATmp* tmp, uint32_t index, int argNum); private: - // indexed by arg number + // indexed by register number std::pair m_preColoredTmps[LinearScan::NumRegs]; }; @@ -158,7 +166,7 @@ private: void pushFreeReg(RegState* reg); RegState* popFreeReg(smart::list& freeList); void freeReg(RegState* reg); - RegState* getFreeReg(bool preferCallerSaved); + RegState* getFreeReg(PhysReg::Type type, bool preferCallerSaved); RegState* getReg(RegState* reg); template @@ -175,8 +183,8 @@ private: IRFactory* const m_irFactory; RegState m_regs[NumRegs]; // Lists of free caller and callee-saved registers, respectively. - smart::list m_freeCallerSaved; - smart::list m_freeCalleeSaved; + smart::list m_freeCallerSaved[PhysReg::kNumTypes]; + smart::list m_freeCalleeSaved[PhysReg::kNumTypes]; // List of assigned registers, sorted high to low by lastUseId. smart::list m_allocatedRegs; @@ -237,8 +245,10 @@ void LinearScan::StateSave::save(LinearScan* ls) { void LinearScan::StateSave::restore(LinearScan* ls) { ls->m_allocatedRegs.clear(); - ls->m_freeCalleeSaved.clear(); - ls->m_freeCallerSaved.clear(); + for (int i = 0; i < PhysReg::kNumTypes; i++) { + ls->m_freeCalleeSaved[i].clear(); + ls->m_freeCallerSaved[i].clear(); + } for (size_t i = 0; i < NumRegs; i++) { ls->m_regs[i] = m_regs[i]; @@ -247,7 +257,7 @@ void LinearScan::StateSave::restore(LinearScan* ls) { if (reg->isAllocated()) { SSATmp* tmp = reg->m_ssaTmp; for (int r = 0; r < ls->m_allocInfo[tmp].numAllocatedRegs(); r++) { - if ((int)ls->m_allocInfo[tmp].getReg(r) == i) { + if (ls->m_allocInfo[tmp].getReg(r) == PhysReg(i)) { ls->allocRegToTmp(reg, tmp, r); } } @@ -266,23 +276,25 @@ LinearScan::LinearScan(IRFactory* irFactory) , m_jmps(irFactory, JmpList()) , m_allocInfo(irFactory) { - for (int i = 0; i < kNumX64Regs; i++) { + for (int i = 0; i < kNumRegs; i++) { m_regs[i].m_ssaTmp = nullptr; - m_regs[i].m_regNo = i; + m_regs[i].m_reg = PhysReg(i); m_regs[i].m_pinned = false; m_regs[i].m_reserved = false; } // Mark reserved regs. - m_regs[int(rVmSp)] .m_reserved = true; - m_regs[int(rsp)] .m_reserved = true; - m_regs[int(rVmFp)] .m_reserved = true; - m_regs[int(rScratch)].m_reserved = true; - m_regs[int(rVmTl)] .m_reserved = true; + m_regs[int(PhysReg(rVmSp))] .m_reserved = true; + m_regs[int(PhysReg(rsp))] .m_reserved = true; + m_regs[int(PhysReg(rVmFp))] .m_reserved = true; + m_regs[int(PhysReg(rScratch))] .m_reserved = true; + m_regs[int(PhysReg(rVmTl))] .m_reserved = true; + m_regs[int(PhysReg(rXMMScratch0))].m_reserved = true; + m_regs[int(PhysReg(rXMMScratch1))].m_reserved = true; // Reserve extra regs for testing purpose. uint32_t numFreeRegs = RuntimeOption::EvalHHIRNumFreeRegs; - for (int i = kNumX64Regs - 1; i >= 0; i--) { + for (int i = kNumRegs - 1; i >= 0; i--) { if (!m_regs[i].m_reserved) { if (numFreeRegs == 0) { m_regs[i].m_reserved = true; @@ -299,7 +311,7 @@ void LinearScan::allocRegToInstruction(InstructionList::iterator it) { // Reload all source operands if necessary. // Mark registers as unpinned. - for (int regNo = 0; regNo < kNumX64Regs; ++regNo) { + for (int regNo = 0; regNo < kNumRegs; ++regNo) { m_regs[regNo].m_pinned = false; } smart::vector needsReloading(inst->getNumSrcs(), true); @@ -425,6 +437,8 @@ void LinearScan::allocRegToInstruction(InstructionList::iterator it) { void LinearScan::allocRegToTmp(SSATmp* ssaTmp, uint32_t index) { bool preferCallerSaved = true; + PhysReg::Type regType = getRegType(ssaTmp); + if (RuntimeOption::EvalHHIREnableCalleeSavedOpt) { // Prefer caller-saved registers iff doesn't span native. preferCallerSaved = (m_uses[ssaTmp].lastUse <= getNextNativeId()); @@ -432,7 +446,7 @@ void LinearScan::allocRegToTmp(SSATmp* ssaTmp, uint32_t index) { RegState* reg = nullptr; if (!preferCallerSaved) { - reg = getFreeReg(false); + reg = getFreeReg(regType, false); if (reg->isCallerSaved()) { // If we are out of callee-saved registers, fall into the logic of // assigning a caller-saved register. @@ -471,7 +485,7 @@ void LinearScan::allocRegToTmp(SSATmp* ssaTmp, uint32_t index) { if (reg == nullptr) { // No pre-coloring for this tmp. // Pick a regular caller-saved reg. - reg = getFreeReg(true); + reg = getFreeReg(regType, true); } assert(reg); @@ -495,7 +509,7 @@ void LinearScan::allocRegToTmp(SSATmp* ssaTmp, uint32_t index) { void LinearScan::allocRegToTmp(RegState* reg, SSATmp* ssaTmp, uint32_t index) { reg->m_ssaTmp = ssaTmp; // mark inst as using this register - m_allocInfo[ssaTmp].setReg(PhysReg(reg->m_regNo), index); + m_allocInfo[ssaTmp].setReg(reg->m_reg, index); uint32_t lastUseId = m_uses[ssaTmp].lastUse; if (reg->isReserved()) { return; @@ -802,7 +816,7 @@ RegNumber LinearScan::getJmpPreColor(SSATmp* tmp, uint32_t regIndex, // caller-saved regs depends on pre-coloring hints. void LinearScan::initFreeList() { // reserve extra regs for testing purpose. - for (int i = kNumX64Regs - 1; i >= 0; i--) { + for (int i = kNumRegs - 1; i >= 0; i--) { if (!m_regs[i].m_reserved) { pushFreeReg(&m_regs[i]); } @@ -1243,16 +1257,18 @@ LinearScan::RegState* LinearScan::getReg(RegState* reg) { if (reg->isReserved() || reg->isAllocated()) { return nullptr; } + auto type = reg->type(); auto& freeList = (reg->isCallerSaved() ? - m_freeCallerSaved : m_freeCalleeSaved); + m_freeCallerSaved[type] : m_freeCalleeSaved[type]); freeList.erase(reg->m_pos); // Pin it so that other operands in the same instruction will not reuse it. reg->m_pinned = true; return reg; } -LinearScan::RegState* LinearScan::getFreeReg(bool preferCallerSaved) { - if (m_freeCallerSaved.empty() && m_freeCalleeSaved.empty()) { +LinearScan::RegState* LinearScan::getFreeReg(PhysReg::Type type, + bool preferCallerSaved) { + if (m_freeCallerSaved[type].empty() && m_freeCalleeSaved[type].empty()) { assert(!m_allocatedRegs.empty()); // no free registers --> free a register from the allocatedRegs @@ -1260,7 +1276,7 @@ LinearScan::RegState* LinearScan::getFreeReg(bool preferCallerSaved) { // 1. not used for any source operand in the current instruction, and // 2. not used for the return address of a function. auto canSpill = [&] (RegState* reg) { - return !reg->isPinned() && !reg->isRetAddr(); + return !reg->isPinned() && !reg->isRetAddr() && reg->type() == type; }; auto pos = std::find_if(m_allocatedRegs.begin(), m_allocatedRegs.end(), canSpill); @@ -1273,11 +1289,11 @@ LinearScan::RegState* LinearScan::getFreeReg(bool preferCallerSaved) { smart::list* preferred = nullptr; smart::list* other = nullptr; if (preferCallerSaved) { - preferred = &m_freeCallerSaved; - other = &m_freeCalleeSaved; + preferred = &m_freeCallerSaved[type]; + other = &m_freeCalleeSaved[type]; } else { - preferred = &m_freeCalleeSaved; - other = &m_freeCallerSaved; + preferred = &m_freeCalleeSaved[type]; + other = &m_freeCallerSaved[type]; } RegState* theFreeReg = nullptr; @@ -1304,12 +1320,14 @@ void LinearScan::freeReg(RegState* reg) { } void LinearScan::pushFreeReg(RegState* reg) { + PhysReg::Type type = reg->type(); auto& freeList = (reg->isCallerSaved() ? - m_freeCallerSaved : m_freeCalleeSaved); + m_freeCallerSaved[type] : m_freeCalleeSaved[type]); // If next native is going to use , put to the back of the // queue so that it's unlikely to be misused by irrelevant tmps. if (RuntimeOption::EvalHHIREnablePreColoring && - (reg->m_regNo == int(rax) || m_preColoringHint.preColorsTmp(reg))) { + type == PhysReg::GP && + (reg->m_reg == PhysReg(rax) || m_preColoringHint.preColorsTmp(reg))) { freeList.push_back(reg); reg->m_pos = (--freeList.end()); } else { @@ -1396,7 +1414,8 @@ SSATmp* LinearScan::getOrigTmp(SSATmp* tmp) { } bool LinearScan::PreColoringHint::preColorsTmp(RegState* reg) const { - return m_preColoredTmps[reg->m_regNo].first != nullptr; + assert(reg->m_reg.isGP()); + return m_preColoredTmps[int(reg->m_reg)].first != nullptr; } // Get the pre-coloring register of (, ). @@ -1404,9 +1423,10 @@ bool LinearScan::PreColoringHint::preColorsTmp(RegState* reg) const { // not a big problem. RegNumber LinearScan::PreColoringHint::getPreColoringReg( SSATmp* tmp, uint32_t index) const { - for (int regNo = 0; regNo < kNumX64Regs; ++regNo) { + for (int regNo = 0; regNo < kNumRegs; ++regNo) { if (m_preColoredTmps[regNo].first == tmp && m_preColoredTmps[regNo].second == index) { + assert(regNo < kNumGPRegs); return (RegNumber)regNo; } } @@ -1414,7 +1434,7 @@ RegNumber LinearScan::PreColoringHint::getPreColoringReg( } void LinearScan::PreColoringHint::clear() { - for (int i = 0; i < kNumX64Regs; ++i) { + for (int i = 0; i < kNumRegs; ++i) { m_preColoredTmps[i].first = nullptr; m_preColoredTmps[i].second = 0; } @@ -1424,8 +1444,8 @@ void LinearScan::PreColoringHint::clear() { // in next native. void LinearScan::PreColoringHint::add(SSATmp* tmp, uint32_t index, int argNum) { int reg = int(argNumToRegName[argNum]); - assert(reg >= 0 && reg < kNumX64Regs); - m_preColoredTmps[reg].first = tmp; + assert(reg >= 0 && reg < kNumGPRegs); + m_preColoredTmps[reg].first = tmp; m_preColoredTmps[reg].second = index; } diff --git a/hphp/runtime/vm/translator/hopt/print.cpp b/hphp/runtime/vm/translator/hopt/print.cpp index de3d47e95..502a18bc2 100644 --- a/hphp/runtime/vm/translator/hopt/print.cpp +++ b/hphp/runtime/vm/translator/hopt/print.cpp @@ -249,7 +249,12 @@ void print(std::ostream& os, const SSATmp* tmp, const RegAllocInfo* regs, if (!info.spilled()) { for (int i = 0, sz = info.numAllocatedRegs(); i < sz; ++i) { if (i != 0) os << ","; - os << reg::regname(Reg64(info.getReg(i))); + PhysReg reg = info.getReg(i); + if (reg.type() == PhysReg::GP) { + os << reg::regname(Reg64(reg)); + } else { + os << reg::regname(RegXMM(reg)); + } } } else { for (int i = 0, sz = tmp->numNeededRegs(); i < sz; ++i) { diff --git a/hphp/runtime/vm/translator/hopt/tracebuilder.h b/hphp/runtime/vm/translator/hopt/tracebuilder.h index 403621505..38bc130de 100644 --- a/hphp/runtime/vm/translator/hopt/tracebuilder.h +++ b/hphp/runtime/vm/translator/hopt/tracebuilder.h @@ -174,8 +174,8 @@ struct TraceBuilder { return gen(DefConst, type, ConstData(val)); } - SSATmp* cns(Type t) { - return gen(DefConst, t, ConstData(0)); + SSATmp* cns(Type type) { + return gen(DefConst, type, ConstData(0)); } template diff --git a/hphp/runtime/vm/translator/physreg.h b/hphp/runtime/vm/translator/physreg.h index 0a870e666..2c5f58965 100644 --- a/hphp/runtime/vm/translator/physreg.h +++ b/hphp/runtime/vm/translator/physreg.h @@ -36,15 +36,36 @@ namespace HPHP { namespace Transl { * (e.g. store_reg##_disp_reg##). */ struct PhysReg { + enum Type { + GP, + XMM, + kNumTypes, // keep last + }; explicit constexpr PhysReg(int n = -1) : n(n) {} constexpr /* implicit */ PhysReg(Reg64 r) : n(int(r)) {} + constexpr /* implicit */ PhysReg(RegXMM r) : n(int(r) + kNumGPRegs) {} explicit constexpr PhysReg(Reg32 r) : n(int(RegNumber(r))) {} explicit constexpr PhysReg(RegNumber r) : n(int(r)) {} - constexpr /* implicit */ operator Reg64() const { return Reg64(n); } - constexpr /* implicit */ operator RegNumber() const { return RegNumber(n); } + /* implicit */ operator Reg64() const { + assert(isGP() || n == -1); + return Reg64(n); + } + constexpr /* implicit */ operator RegNumber() const { + return n < kNumGPRegs ? RegNumber(n) : RegNumber(n - kNumGPRegs); + } + /* implicit */ operator RegXMM() const { + assert(isXMM() || n == -1); + return RegXMM(n - kNumGPRegs); + } + Type type() const { + assert(n >= 0 && n < kNumRegs); + return n < kNumGPRegs ? GP : XMM; + } + bool isGP () const { return n >= 0 && n < kNumGPRegs; } + bool isXMM() const { return n >= kNumGPRegs && n < kNumRegs; } explicit constexpr operator int() const { return n; } constexpr bool operator==(PhysReg r) const { return n == r.n; } constexpr bool operator!=(PhysReg r) const { return n != r.n; } @@ -53,13 +74,24 @@ struct PhysReg { constexpr bool operator==(Reg32 r) const { return Reg32(n) == r; } constexpr bool operator!=(Reg32 r) const { return Reg32(n) != r; } - MemoryRef operator[](intptr_t p) const { return *(*this + p); } - IndexedMemoryRef operator[](Reg64 i) const { return *(*this + i); } - IndexedMemoryRef operator[](ScaledIndex s) const { return *(*this + s); } + MemoryRef operator[](intptr_t p) const { + assert(type() == GP); + return *(*this + p); + } + IndexedMemoryRef operator[](Reg64 i) const { + assert(type() == GP); + return *(*this + i); + } + IndexedMemoryRef operator[](ScaledIndex s) const { + assert(type() == GP); + return *(*this + s); + } IndexedMemoryRef operator[](ScaledIndexDisp s) const { + assert(type() == GP); return *(*this + s.si + s.disp); } IndexedMemoryRef operator[](DispReg dr) const { + assert(type() == GP); return *(*this + ScaledIndex(dr.base, 0x1) + dr.disp); } diff --git a/hphp/runtime/vm/translator/translator-x64-internal.h b/hphp/runtime/vm/translator/translator-x64-internal.h index 9acbf56ff..d9cb36e68 100644 --- a/hphp/runtime/vm/translator/translator-x64-internal.h +++ b/hphp/runtime/vm/translator/translator-x64-internal.h @@ -905,8 +905,8 @@ inline void emitCopyToAligned(X64Assembler& a, int destOff) { static_assert(sizeof(TypedValue) == 16, "emitCopyToAligned assumes sizeof(TypedValue) is 128 bits"); - a. movdqa (src[srcOff], xmm0); - a. movdqa (xmm0, dest[destOff]); + a. movdqa (src[srcOff], rXMMScratch0); + a. movdqa (rXMMScratch0, dest[destOff]); } // ArgManager -- support for passing VM-level data to helper functions. diff --git a/hphp/runtime/vm/translator/translator-x64.cpp b/hphp/runtime/vm/translator/translator-x64.cpp index 92b811599..2f2537541 100644 --- a/hphp/runtime/vm/translator/translator-x64.cpp +++ b/hphp/runtime/vm/translator/translator-x64.cpp @@ -465,7 +465,7 @@ TranslatorX64::emitPushAR(const NormalizedInstruction& i, const Func* func, void TranslatorX64::emitCallSaveRegs() { assert(!m_regMap.frozen()); - m_regMap.cleanRegs(kCallerSaved); + m_regMap.cleanRegs(kGPCallerSaved); } static void UNUSED tc_debug_print(const char* message, @@ -728,7 +728,7 @@ TranslatorX64::emitCall(X64Assembler& a, TCA dest, bool killRegs) { } if (killRegs) { // All caller-saved regs are now suspect. - m_regMap.smashRegs(kCallerSaved); + m_regMap.smashRegs(kGPCallerSaved); } } @@ -743,7 +743,7 @@ TranslatorX64::emitCall(X64Assembler& a, Call call, bool killRegs) { a.loadq(*rdi, rax); a.call(rax[call.getOffset()]); if (killRegs) { - m_regMap.smashRegs(kCallerSaved); + m_regMap.smashRegs(kGPCallerSaved); } } @@ -868,7 +868,7 @@ void TranslatorX64::prepareCallSaveRegs() { emitCallSaveRegs(); // Clean caller-saved regs. m_pendingUnwindRegInfo.clear(); - RegSet rset = kCalleeSaved; + RegSet rset = kGPCalleeSaved; PhysReg reg; while (rset.findFirst(reg)) { rset.remove(reg); @@ -1030,7 +1030,7 @@ void TranslatorX64::emitDecRef(Asm& a, auto getPushSet = [&] { RegSet ret; - auto regs = kCallerSaved; + auto regs = kGPCallerSaved; PhysReg reg; while (regs.findFirst(reg)) { regs.remove(reg); @@ -1233,7 +1233,7 @@ void TranslatorX64::emitGenericDecRefHelpers() { asm_label(a, release); { - PhysRegSaver prs(a, kCallerSaved - RegSet(rdi)); + PhysRegSaver prs(a, kGPCallerSaved - RegSet(rdi)); callDestructor(a, rScratch, rax); recordIndirectFixup(a.code.frontier, prs.rspAdjustment()); } @@ -3647,17 +3647,17 @@ TranslatorX64::binaryMixedArith(const NormalizedInstruction& i, Opcode op, PhysReg srcReg, PhysReg srcDestReg) { - getInputsIntoXMMRegs(i, srcReg, srcDestReg, xmm1, xmm0); + getInputsIntoXMMRegs(i, srcReg, srcDestReg, rXMMScratch1, rXMMScratch0); switch(op) { #define CASEIMM(OpBc, x64op) \ - case OpBc: a. x64op ##sd_xmm_xmm(xmm1, xmm0); break + case OpBc: a. x64op ##sd_xmm_xmm(rXMMScratch1, rXMMScratch0); break CASEIMM(OpAdd, add); CASEIMM(OpSub, sub); CASEIMM(OpMul, mul); #undef CASEIMM default: not_reached(); } - a. mov_xmm_reg64(xmm0, srcDestReg); + a. mov_xmm_reg64(rXMMScratch0, srcDestReg); } void @@ -4100,9 +4100,9 @@ TranslatorX64::analyzeEqOp(Tracelet& t, NormalizedInstruction& i) { void TranslatorX64::fpEq(const NormalizedInstruction& ni, PhysReg lr, PhysReg rr) { - getInputsIntoXMMRegs(ni, lr, rr, xmm0, xmm1); + getInputsIntoXMMRegs(ni, lr, rr, rXMMScratch0, rXMMScratch1); m_regMap.allocOutputRegs(ni); - a. ucomisd_xmm_xmm(xmm0, xmm1); + a. ucomisd_xmm_xmm(rXMMScratch0, rXMMScratch1); semiLikelyIfBlock(CC_P, a, [&] { // PF means unordered; treat it as !eq. Or 1 into anything at all // to clear ZF. @@ -11459,7 +11459,7 @@ TranslatorX64::TranslatorX64() m_irAUsage(0), m_irAstubsUsage(0), m_numHHIRTrans(0), - m_regMap(kCallerSaved, kCalleeSaved, this), + m_regMap(kGPCallerSaved, kGPCalleeSaved, this), m_unwindRegMap(128), m_curTrace(0), m_curNI(0), @@ -11739,7 +11739,7 @@ TCA TranslatorX64::emitNAryStub(X64Assembler& a, Call c) { a. push (rbp); // { a. movq (rsp, rbp); { - RegSet s = kCallerSaved - alreadySaved; + RegSet s = kGPCallerSaved - alreadySaved; PhysRegSaverParity rs(Parity, a, s); emitCall(a, c); } diff --git a/hphp/test/quick/xmm-spill1.php b/hphp/test/quick/xmm-spill1.php new file mode 100644 index 000000000..83c95b9c0 --- /dev/null +++ b/hphp/test/quick/xmm-spill1.php @@ -0,0 +1,32 @@ +