From b082ed04329a5d1e75cd648d5c6a63ea8907c123 Mon Sep 17 00:00:00 2001 From: kma Date: Thu, 4 Apr 2013 09:49:48 -0700 Subject: [PATCH] All inc and dec, all the time. On older processors, inc and dec have some specific hazards associated with their preservation of the CF flag. Sandy Bridge doesn't suffer from this, so let's take the code size wins here. Thanks to Guilherme for updating my dated priors on this. Also fix a pair of assembler bugs: no such thing as decl r32 on x64, and some decs were really incs. --- hphp/runtime/vm/translator/hopt/codegen.cpp | 24 +++++++++---------- hphp/runtime/vm/translator/translator-x64.cpp | 22 ++++++++--------- hphp/util/asm-x64.h | 4 ++-- 3 files changed, 24 insertions(+), 26 deletions(-) diff --git a/hphp/runtime/vm/translator/hopt/codegen.cpp b/hphp/runtime/vm/translator/hopt/codegen.cpp index 79e449b6a..be8df70f4 100644 --- a/hphp/runtime/vm/translator/hopt/codegen.cpp +++ b/hphp/runtime/vm/translator/hopt/codegen.cpp @@ -912,13 +912,12 @@ void CodeGenerator::cgBinaryIntOp(IRInstruction* inst, if (src1Reg == InvalidReg && src2Reg == InvalidReg) { assert(src1->isConst() && src2->isConst()); int64_t value = oper(src1->getValRawInt(), - src2->getValRawInt()); + src2->getValRawInt()); emitLoadImm(a, value, dstReg); return; } // One register, and one immediate. - if (commutative) { int64_t immed = (src2Reg == InvalidReg ? src2 : src1)->getValRawInt(); @@ -933,7 +932,6 @@ void CodeGenerator::cgBinaryIntOp(IRInstruction* inst, } // NonCommutative: - if (src1Reg == InvalidReg) { if (dstReg == src2Reg) { emitLoadImm(a, src1->getValRawInt(), rScratch); @@ -964,9 +962,9 @@ void CodeGenerator::cgBinaryOp(IRInstruction* inst, if (!(src1->isA(Type::Bool) || src1->isA(Type::Int) || src1->isA(Type::Dbl)) || !(src2->isA(Type::Bool) || src2->isA(Type::Int) || src2->isA(Type::Dbl)) ) - { - CG_PUNT(cgBinaryOp); - } + { + CG_PUNT(cgBinaryOp); + } if (src1->isA(Type::Dbl) || src2->isA(Type::Dbl)) { prepBinaryXmmOp(m_as, src1, src2); (m_as.*fpInstr)(xmm1, xmm0); @@ -2447,12 +2445,12 @@ void CodeGenerator::cgExitGuardFailure(IRInstruction* inst) { } static void emitAssertFlagsNonNegative(CodeGenerator::Asm& as) { - ifThen(as, CC_NGE, [&] { as.int3(); }); + ifThen(as, CC_NGE, [&] { as.ud2(); }); } static void emitAssertRefCount(CodeGenerator::Asm& as, PhysReg base) { as.cmpl(HPHP::RefCountStaticValue, base[FAST_REFCOUNT_OFFSET]); - ifThen(as, CC_NBE, [&] { as.int3(); }); + ifThen(as, CC_NBE, [&] { as.ud2(); }); } static void emitIncRef(CodeGenerator::Asm& as, PhysReg base) { @@ -2460,7 +2458,7 @@ static void emitIncRef(CodeGenerator::Asm& as, PhysReg base) { emitAssertRefCount(as, base); } // emit incref - as.addl(1, base[FAST_REFCOUNT_OFFSET]); + as.incl(base[FAST_REFCOUNT_OFFSET]); if (RuntimeOption::EvalHHIRGenerateAsserts) { // Assert that the ref count is greater than zero emitAssertFlagsNonNegative(as); @@ -2724,7 +2722,7 @@ Address CodeGenerator::cgCheckStaticBitAndDecRef(Type type, emitAssertRefCount(m_as, dataReg); } // Load _count in scratchReg - m_as.load_reg64_disp_reg32(dataReg, FAST_REFCOUNT_OFFSET, scratchReg); + m_as.loadl(dataReg[FAST_REFCOUNT_OFFSET], r32(scratchReg)); // Check for RefCountStaticValue patchStaticCheck = cgCheckStaticBit(type, scratchReg, @@ -2732,7 +2730,7 @@ Address CodeGenerator::cgCheckStaticBitAndDecRef(Type type, // Decrement count and store it back in memory. // If there's an exit, emit jump to it when _count would get down to 0 - m_as.sub_imm32_reg32(1, scratchReg); + m_as.decq(scratchReg); if (exit) { emitFwdJcc(CC_E, exit); } @@ -2772,7 +2770,7 @@ Address CodeGenerator::cgCheckStaticBitAndDecRef(Type type, } // Decrement _count - m_as.sub_imm32_disp_reg32(1, FAST_REFCOUNT_OFFSET, dataReg); + m_as.decl(dataReg[FAST_REFCOUNT_OFFSET]); if (RuntimeOption::EvalHHIRGenerateAsserts) { // Assert that the ref count is not less than zero @@ -3426,7 +3424,7 @@ static void emitLdClsCctx(CodeGenerator::Asm& a, PhysReg srcReg, PhysReg dstReg) { emitMovRegReg(a, srcReg, dstReg); - a. subq(1, dstReg); + a. decq(dstReg); } void CodeGenerator::cgLdClsCtx(IRInstruction* inst) { diff --git a/hphp/runtime/vm/translator/translator-x64.cpp b/hphp/runtime/vm/translator/translator-x64.cpp index dd8444b86..c1c15b874 100644 --- a/hphp/runtime/vm/translator/translator-x64.cpp +++ b/hphp/runtime/vm/translator/translator-x64.cpp @@ -873,7 +873,7 @@ TranslatorX64::emitIncRef(X64Assembler &a, PhysReg base, DataType dtype) { * compact, it only writes the low-order 8 bits of eflags, causing a * partial dependency for any downstream flags-dependent code. */ - a. add_imm32_disp_reg32(1, FAST_REFCOUNT_OFFSET, base); + a. incl(base[FAST_REFCOUNT_OFFSET]); } // endif } @@ -888,7 +888,7 @@ TranslatorX64::emitIncRefGenericRegSafe(PhysReg base, tmpReg); { // if !static IfCountNotStatic ins(a, tmpReg); - a. add_imm32_disp_reg32(1, FAST_REFCOUNT_OFFSET, tmpReg); + a. incl(tmpReg[FAST_REFCOUNT_OFFSET]); } // endif } // endif } @@ -992,7 +992,7 @@ void TranslatorX64::emitDecRef(Asm& a, SpaceRecorder sr("_DecRef", a); { // if !static IfCountNotStatic ins(a, rDatum, type); - a. sub_imm32_disp_reg32(1, FAST_REFCOUNT_OFFSET, rDatum); + a. decl(rDatum[FAST_REFCOUNT_OFFSET]); assert(type >= 0 && type < MaxNumDataTypes); if (&a == &this->astubs) { @@ -1201,7 +1201,7 @@ void TranslatorX64::emitGenericDecRefHelpers() { m_dtorGenericStubRegs = a.code.frontier; a. cmpl (RefCountStaticValue, rdi[FAST_REFCOUNT_OFFSET]); jccBlock(a, [&] { - a. subl (1, rdi[FAST_REFCOUNT_OFFSET]); + a. decl (rdi[FAST_REFCOUNT_OFFSET]); release.jcc8(a, CC_Z); }); a. ret (); @@ -2288,7 +2288,7 @@ TranslatorX64::emitPrologue(Func* func, int nPassed) { // rather than execution speed; i.e., don't unroll the loop. TCA loopTop = a.code.frontier; a. sub_imm32_reg64(sizeof(Cell), rVmSp); - a. add_imm32_reg32(1, rax); + a. incl(eax); emitStoreUninitNull(a, 0, rVmSp); a. cmp_imm32_reg32(numParams, rax); a. jcc8(CC_L, loopTop); @@ -7429,7 +7429,7 @@ void TranslatorX64::translateCreateCont(const Tracelet& t, const int thisOff = cellsToBytes(genLocals - thisId - 1); // We don't have to check for a static refcount since we // know it's an Object - a. addl(1, r(rScratch)[FAST_REFCOUNT_OFFSET]); + a. incl(r(rScratch)[FAST_REFCOUNT_OFFSET]); a. storeq(r(rScratch), r(rDest)[thisOff + TVOFF(m_data)]); emitStoreTVType(a, KindOfObject, r(rDest)[thisOff + TVOFF(m_type)]); } @@ -7624,7 +7624,7 @@ void TranslatorX64::emitContPreNext(const NormalizedInstruction& i, } // ++m_index - a. add_imm64_disp_reg64(0x1, CONTOFF(m_index), r(rCont)); + a. incq(r(rCont)[CONTOFF(m_index)]); // m_running = true a. store_imm8_disp_reg(0x1, CONTOFF(m_running), r(rCont)); } @@ -8561,9 +8561,9 @@ TranslatorX64::translateIncDecL(const Tracelet& t, emitMovRegReg(a, localVal, output); } if (inc) { - a. add_imm32_reg64(1, localVal); + a. incq(localVal); } else { - a. sub_imm32_reg64(1, localVal); + a. decq(localVal); } if (i.outStack && pre) { // --$a, ++$a PhysReg output = getReg(i.outStack->location); @@ -9396,7 +9396,7 @@ TranslatorX64::emitFPushCtorDFast(const NormalizedInstruction& i, if (i.noCtor) { // If we're not running the constructor, just incref the object once and // don't set up the ActRec. - a.add_imm32_disp_reg32(1, FAST_REFCOUNT_OFFSET, rax); + a.incl(rax[FAST_REFCOUNT_OFFSET]); return; } else { // Incref the object twice: once for the stack and once for $this in the @@ -11627,7 +11627,7 @@ asm_label(a, release); a. loadq (rIter[TVOFF(m_data)], rData); a. cmpl (RefCountStaticValue, rData[FAST_REFCOUNT_OFFSET]); jccBlock(a, [&] { - a. subl (1, rData[FAST_REFCOUNT_OFFSET]); + a. decl (rData[FAST_REFCOUNT_OFFSET]); a. jz8 (doRelease); }); a. ret (); diff --git a/hphp/util/asm-x64.h b/hphp/util/asm-x64.h index 260fe57ab..110b00f78 100644 --- a/hphp/util/asm-x64.h +++ b/hphp/util/asm-x64.h @@ -1053,7 +1053,7 @@ struct X64Assembler { void incq(Reg64 r) { instrR(instr_inc, r); } void incl(Reg32 r) { instrR(instr_inc, r); } void decq(Reg64 r) { instrR(instr_dec, r); } - void decl(Reg32 r) { instrR(instr_dec, r); } + // decl(Reg32) cannot be encoded; it's REX. void notb(Reg8 r) { instrR(instr_notb, r); } void not(Reg64 r) { instrR(instr_not, r); } void neg(Reg64 r) { instrR(instr_neg, r); } @@ -1072,7 +1072,7 @@ struct X64Assembler { void incq(MemoryRef m) { instrM(instr_inc, m); } void incl(MemoryRef m) { instrM32(instr_inc, m); } void decq(MemoryRef m) { instrM(instr_dec, m); } - void decl(MemoryRef m) { instrM32(instr_inc, m); } + void decl(MemoryRef m) { instrM32(instr_dec, m); } void movdqu(RegXMM x, MemoryRef m) { instrRM(instr_movdqu, x, m); } void movdqu(RegXMM x, IndexedMemoryRef m) { instrRM(instr_movdqu, x, m); }