All inc and dec, all the time.

On older processors, inc and dec have some specific hazards associated with their preservation of the CF flag. Sandy Bridge doesn't suffer from this, so let's take the code size wins here. Thanks to Guilherme for updating my dated priors on this.

Also fix a pair of assembler bugs: no such thing as decl r32 on x64, and some decs were really incs.
Esse commit está contido em:
kma
2013-04-04 09:49:48 -07:00
commit de Sara Golemon
commit b082ed0432
3 arquivos alterados com 24 adições e 26 exclusões
+11 -13
Ver Arquivo
@@ -912,13 +912,12 @@ void CodeGenerator::cgBinaryIntOp(IRInstruction* inst,
if (src1Reg == InvalidReg && src2Reg == InvalidReg) {
assert(src1->isConst() && src2->isConst());
int64_t value = oper(src1->getValRawInt(),
src2->getValRawInt());
src2->getValRawInt());
emitLoadImm(a, value, dstReg);
return;
}
// One register, and one immediate.
if (commutative) {
int64_t immed = (src2Reg == InvalidReg
? src2 : src1)->getValRawInt();
@@ -933,7 +932,6 @@ void CodeGenerator::cgBinaryIntOp(IRInstruction* inst,
}
// NonCommutative:
if (src1Reg == InvalidReg) {
if (dstReg == src2Reg) {
emitLoadImm(a, src1->getValRawInt(), rScratch);
@@ -964,9 +962,9 @@ void CodeGenerator::cgBinaryOp(IRInstruction* inst,
if (!(src1->isA(Type::Bool) || src1->isA(Type::Int) || src1->isA(Type::Dbl))
||
!(src2->isA(Type::Bool) || src2->isA(Type::Int) || src2->isA(Type::Dbl)) )
{
CG_PUNT(cgBinaryOp);
}
{
CG_PUNT(cgBinaryOp);
}
if (src1->isA(Type::Dbl) || src2->isA(Type::Dbl)) {
prepBinaryXmmOp(m_as, src1, src2);
(m_as.*fpInstr)(xmm1, xmm0);
@@ -2447,12 +2445,12 @@ void CodeGenerator::cgExitGuardFailure(IRInstruction* inst) {
}
static void emitAssertFlagsNonNegative(CodeGenerator::Asm& as) {
ifThen(as, CC_NGE, [&] { as.int3(); });
ifThen(as, CC_NGE, [&] { as.ud2(); });
}
static void emitAssertRefCount(CodeGenerator::Asm& as, PhysReg base) {
as.cmpl(HPHP::RefCountStaticValue, base[FAST_REFCOUNT_OFFSET]);
ifThen(as, CC_NBE, [&] { as.int3(); });
ifThen(as, CC_NBE, [&] { as.ud2(); });
}
static void emitIncRef(CodeGenerator::Asm& as, PhysReg base) {
@@ -2460,7 +2458,7 @@ static void emitIncRef(CodeGenerator::Asm& as, PhysReg base) {
emitAssertRefCount(as, base);
}
// emit incref
as.addl(1, base[FAST_REFCOUNT_OFFSET]);
as.incl(base[FAST_REFCOUNT_OFFSET]);
if (RuntimeOption::EvalHHIRGenerateAsserts) {
// Assert that the ref count is greater than zero
emitAssertFlagsNonNegative(as);
@@ -2724,7 +2722,7 @@ Address CodeGenerator::cgCheckStaticBitAndDecRef(Type type,
emitAssertRefCount(m_as, dataReg);
}
// Load _count in scratchReg
m_as.load_reg64_disp_reg32(dataReg, FAST_REFCOUNT_OFFSET, scratchReg);
m_as.loadl(dataReg[FAST_REFCOUNT_OFFSET], r32(scratchReg));
// Check for RefCountStaticValue
patchStaticCheck = cgCheckStaticBit(type, scratchReg,
@@ -2732,7 +2730,7 @@ Address CodeGenerator::cgCheckStaticBitAndDecRef(Type type,
// Decrement count and store it back in memory.
// If there's an exit, emit jump to it when _count would get down to 0
m_as.sub_imm32_reg32(1, scratchReg);
m_as.decq(scratchReg);
if (exit) {
emitFwdJcc(CC_E, exit);
}
@@ -2772,7 +2770,7 @@ Address CodeGenerator::cgCheckStaticBitAndDecRef(Type type,
}
// Decrement _count
m_as.sub_imm32_disp_reg32(1, FAST_REFCOUNT_OFFSET, dataReg);
m_as.decl(dataReg[FAST_REFCOUNT_OFFSET]);
if (RuntimeOption::EvalHHIRGenerateAsserts) {
// Assert that the ref count is not less than zero
@@ -3426,7 +3424,7 @@ static void emitLdClsCctx(CodeGenerator::Asm& a,
PhysReg srcReg,
PhysReg dstReg) {
emitMovRegReg(a, srcReg, dstReg);
a. subq(1, dstReg);
a. decq(dstReg);
}
void CodeGenerator::cgLdClsCtx(IRInstruction* inst) {
+11 -11
Ver Arquivo
@@ -873,7 +873,7 @@ TranslatorX64::emitIncRef(X64Assembler &a, PhysReg base, DataType dtype) {
* compact, it only writes the low-order 8 bits of eflags, causing a
* partial dependency for any downstream flags-dependent code.
*/
a. add_imm32_disp_reg32(1, FAST_REFCOUNT_OFFSET, base);
a. incl(base[FAST_REFCOUNT_OFFSET]);
} // endif
}
@@ -888,7 +888,7 @@ TranslatorX64::emitIncRefGenericRegSafe(PhysReg base,
tmpReg);
{ // if !static
IfCountNotStatic ins(a, tmpReg);
a. add_imm32_disp_reg32(1, FAST_REFCOUNT_OFFSET, tmpReg);
a. incl(tmpReg[FAST_REFCOUNT_OFFSET]);
} // endif
} // endif
}
@@ -992,7 +992,7 @@ void TranslatorX64::emitDecRef(Asm& a,
SpaceRecorder sr("_DecRef", a);
{ // if !static
IfCountNotStatic ins(a, rDatum, type);
a. sub_imm32_disp_reg32(1, FAST_REFCOUNT_OFFSET, rDatum);
a. decl(rDatum[FAST_REFCOUNT_OFFSET]);
assert(type >= 0 && type < MaxNumDataTypes);
if (&a == &this->astubs) {
@@ -1201,7 +1201,7 @@ void TranslatorX64::emitGenericDecRefHelpers() {
m_dtorGenericStubRegs = a.code.frontier;
a. cmpl (RefCountStaticValue, rdi[FAST_REFCOUNT_OFFSET]);
jccBlock<CC_Z>(a, [&] {
a. subl (1, rdi[FAST_REFCOUNT_OFFSET]);
a. decl (rdi[FAST_REFCOUNT_OFFSET]);
release.jcc8(a, CC_Z);
});
a. ret ();
@@ -2288,7 +2288,7 @@ TranslatorX64::emitPrologue(Func* func, int nPassed) {
// rather than execution speed; i.e., don't unroll the loop.
TCA loopTop = a.code.frontier;
a. sub_imm32_reg64(sizeof(Cell), rVmSp);
a. add_imm32_reg32(1, rax);
a. incl(eax);
emitStoreUninitNull(a, 0, rVmSp);
a. cmp_imm32_reg32(numParams, rax);
a. jcc8(CC_L, loopTop);
@@ -7429,7 +7429,7 @@ void TranslatorX64::translateCreateCont(const Tracelet& t,
const int thisOff = cellsToBytes(genLocals - thisId - 1);
// We don't have to check for a static refcount since we
// know it's an Object
a. addl(1, r(rScratch)[FAST_REFCOUNT_OFFSET]);
a. incl(r(rScratch)[FAST_REFCOUNT_OFFSET]);
a. storeq(r(rScratch), r(rDest)[thisOff + TVOFF(m_data)]);
emitStoreTVType(a, KindOfObject, r(rDest)[thisOff + TVOFF(m_type)]);
}
@@ -7624,7 +7624,7 @@ void TranslatorX64::emitContPreNext(const NormalizedInstruction& i,
}
// ++m_index
a. add_imm64_disp_reg64(0x1, CONTOFF(m_index), r(rCont));
a. incq(r(rCont)[CONTOFF(m_index)]);
// m_running = true
a. store_imm8_disp_reg(0x1, CONTOFF(m_running), r(rCont));
}
@@ -8561,9 +8561,9 @@ TranslatorX64::translateIncDecL(const Tracelet& t,
emitMovRegReg(a, localVal, output);
}
if (inc) {
a. add_imm32_reg64(1, localVal);
a. incq(localVal);
} else {
a. sub_imm32_reg64(1, localVal);
a. decq(localVal);
}
if (i.outStack && pre) { // --$a, ++$a
PhysReg output = getReg(i.outStack->location);
@@ -9396,7 +9396,7 @@ TranslatorX64::emitFPushCtorDFast(const NormalizedInstruction& i,
if (i.noCtor) {
// If we're not running the constructor, just incref the object once and
// don't set up the ActRec.
a.add_imm32_disp_reg32(1, FAST_REFCOUNT_OFFSET, rax);
a.incl(rax[FAST_REFCOUNT_OFFSET]);
return;
} else {
// Incref the object twice: once for the stack and once for $this in the
@@ -11627,7 +11627,7 @@ asm_label(a, release);
a. loadq (rIter[TVOFF(m_data)], rData);
a. cmpl (RefCountStaticValue, rData[FAST_REFCOUNT_OFFSET]);
jccBlock<CC_Z>(a, [&] {
a. subl (1, rData[FAST_REFCOUNT_OFFSET]);
a. decl (rData[FAST_REFCOUNT_OFFSET]);
a. jz8 (doRelease);
});
a. ret ();
+2 -2
Ver Arquivo
@@ -1053,7 +1053,7 @@ struct X64Assembler {
void incq(Reg64 r) { instrR(instr_inc, r); }
void incl(Reg32 r) { instrR(instr_inc, r); }
void decq(Reg64 r) { instrR(instr_dec, r); }
void decl(Reg32 r) { instrR(instr_dec, r); }
// decl(Reg32) cannot be encoded; it's REX.
void notb(Reg8 r) { instrR(instr_notb, r); }
void not(Reg64 r) { instrR(instr_not, r); }
void neg(Reg64 r) { instrR(instr_neg, r); }
@@ -1072,7 +1072,7 @@ struct X64Assembler {
void incq(MemoryRef m) { instrM(instr_inc, m); }
void incl(MemoryRef m) { instrM32(instr_inc, m); }
void decq(MemoryRef m) { instrM(instr_dec, m); }
void decl(MemoryRef m) { instrM32(instr_inc, m); }
void decl(MemoryRef m) { instrM32(instr_dec, m); }
void movdqu(RegXMM x, MemoryRef m) { instrRM(instr_movdqu, x, m); }
void movdqu(RegXMM x, IndexedMemoryRef m) { instrRM(instr_movdqu, x, m); }