Allocate XMM registers for doubles

This diff adds support for allocating SSATmp's of type Dbl directly to
XMM registers. The register allocator now keeps per-reg-type lists of
caller/callee saved registers.  xmm0 and xmm1 are reserved for scratch
as rXMMScratch[01].

Added a runtime option HHIRAllocXMMRegs to enable/disable XMM
allocation -- if disabled, it forces all SSATmps to be allocated to GP
regs, as before.

While here, changed the conversion of int/bool consts to double from
runtime conversions to JIT-time.
Esse commit está contido em:
Guilherme Ottoni
2013-05-12 01:58:35 -07:00
commit de Sara Golemon
commit d7708fde5b
15 arquivos alterados com 489 adições e 237 exclusões
+1
Ver Arquivo
@@ -432,6 +432,7 @@ public:
F(bool, HHIREnableCoalescing, true) \
F(bool, HHIREnableRefCountOpt, true) \
F(bool, HHIREnableSinking, true) \
F(bool, HHIRAllocXMMRegs, true) \
F(bool, HHIRGenerateAsserts, debug) \
F(bool, HHIRDirectExit, true) \
F(bool, HHIRDisableTx64, true) \
+41 -2
Ver Arquivo
@@ -77,16 +77,56 @@ const RegSet kCallerSaved = RegSet()
// r10 is reserved by the assembler, and for
// various extremely-specific scratch uses.
| RegSet(reg::r11)
// XMM regs
// | RegSet(reg::xmm0) Reserved for rMMXScratch0
// | RegSet(reg::xmm1) Reserved for rMMXScratch1
| RegSet(reg::xmm2)
| RegSet(reg::xmm3)
| RegSet(reg::xmm4)
| RegSet(reg::xmm5)
| RegSet(reg::xmm6)
| RegSet(reg::xmm7)
| RegSet(reg::xmm8)
| RegSet(reg::xmm9)
| RegSet(reg::xmm10)
| RegSet(reg::xmm11)
| RegSet(reg::xmm12)
| RegSet(reg::xmm13)
| RegSet(reg::xmm14)
| RegSet(reg::xmm15)
;
const RegSet kCalleeSaved = RegSet()
// r12 is reserved for rVmTl
| RegSet(reg::r13)
| RegSet(reg::r14)
| RegSet(reg::r15);
| RegSet(reg::r15)
;
const RegSet kAllRegs = kCallerSaved | kCalleeSaved;
const RegSet kMMXRegs = RegSet()
| RegSet(reg::xmm0)
| RegSet(reg::xmm1)
| RegSet(reg::xmm2)
| RegSet(reg::xmm3)
| RegSet(reg::xmm4)
| RegSet(reg::xmm5)
| RegSet(reg::xmm6)
| RegSet(reg::xmm7)
| RegSet(reg::xmm8)
| RegSet(reg::xmm9)
| RegSet(reg::xmm10)
| RegSet(reg::xmm11)
| RegSet(reg::xmm12)
| RegSet(reg::xmm13)
| RegSet(reg::xmm14)
| RegSet(reg::xmm15)
;
const RegSet kGPCallerSaved = kCallerSaved - kMMXRegs;
const RegSet kGPCalleeSaved = kCalleeSaved - kMMXRegs;
//////////////////////////////////////////////////////////////////////
/*
* Registers reserved for cross-tracelet ABI purposes.
@@ -276,7 +316,6 @@ inline SRFlags operator|(SRFlags a, SRFlags b) {
// Set of all the x64 registers.
const RegSet kAllX64Regs = RegSet(kAllRegs).add(reg::r10)
| kSpecialCrossTraceRegs;
const int kNumX64Regs = 16;
/*
* Some data structures are accessed often enough from translated code
+1 -1
Ver Arquivo
@@ -94,7 +94,7 @@ bool checkCfg(Trace* trace, const IRFactory& factory) {
}
enum Limits : unsigned {
kNumRegisters = Transl::kNumX64Regs,
kNumRegisters = Transl::kNumRegs,
kNumSlots = NumPreAllocatedSpillLocs
};
+200 -95
Ver Arquivo
@@ -109,6 +109,18 @@ struct MoveInfo {
PhysReg m_reg1, m_reg2;
};
template <int N>
static bool cycleHasMMXReg(const CycleInfo& cycle,
const int (&moves)[N]) {
int first = cycle.node;
int node = first;
do {
if (PhysReg(node).isXMM()) return true;
node = moves[node];
} while (node != first);
return false;
}
template <int N>
void doRegMoves(int (&moves)[N], int rTmp,
std::vector<MoveInfo>& howTo) {
@@ -184,11 +196,13 @@ pathloop:
}
// Deal with any cycles we encountered
for (int i = 0; i < numCycles; ++i) {
if (cycles[i].length == 2) {
// can't use xchg if one of the registers is MMX
bool hasMMXReg = cycleHasMMXReg(cycles[i], moves);
if (cycles[i].length == 2 && !hasMMXReg) {
int v = cycles[i].node;
int w = moves[v];
howTo.push_back(MoveInfo(MoveInfo::Xchg, w, v));
} else if (cycles[i].length == 3) {
} else if (cycles[i].length == 3 && !hasMMXReg) {
int v = cycles[i].node;
int w = moves[v];
howTo.push_back(MoveInfo(MoveInfo::Xchg, w, v));
@@ -481,13 +495,46 @@ Address CodeGenerator::emitSmashableFwdJcc(ConditionCode cc, Block* target,
return start;
}
void emitLoadImm(CodeGenerator::Asm& as, int64_t val, PhysReg dstReg) {
as.emitImmReg(val, dstReg);
}
static void
emitMovRegReg(CodeGenerator::Asm& as, PhysReg srcReg, PhysReg dstReg) {
if (srcReg != dstReg) as.movq(srcReg, dstReg);
assert(srcReg != InvalidReg);
assert(dstReg != InvalidReg);
if (srcReg == dstReg) return;
if (srcReg.isGP()) {
if (dstReg.isGP()) { // GP => GP
as.movq(srcReg, dstReg);
} else { // GP => MMX
// This generates a movq x86 instruction, which zero extends
// the 64-bit value in srcReg into a 128-bit XMM register
as.mov_reg64_xmm(srcReg, dstReg);
}
} else {
if (dstReg.isGP()) { // MMX => GP
as.mov_xmm_reg64(srcReg, dstReg);
} else { // MMX => MMX
// This copies all 128 bits in XMM,
// thus avoiding partial register stalls
as.movdqa(srcReg, dstReg);
}
}
}
void emitLoadImm(CodeGenerator::Asm& as, int64_t val, PhysReg dstReg) {
assert(dstReg != InvalidReg);
if (dstReg.isGP()) {
as.emitImmReg(val, dstReg);
} else {
assert(dstReg.isXMM());
if (val == 0) {
as.pxor_xmm_xmm(dstReg, dstReg);
} else {
// Can't move immediate directly into XMM register, so use rScratch
as.emitImmReg(val, rScratch);
emitMovRegReg(as, rScratch, dstReg);
}
}
}
static void emitLea(CodeGenerator::Asm& as, MemoryRef mr, PhysReg dst) {
@@ -499,6 +546,26 @@ static void emitLea(CodeGenerator::Asm& as, MemoryRef mr, PhysReg dst) {
}
}
template<class Mem>
static void emitLoadReg(CodeGenerator::Asm& as, Mem mem, PhysReg reg) {
assert(reg != InvalidReg);
if (reg.isGP()) {
as.loadq(mem, reg);
} else {
as.movsd(mem, reg);
}
}
template<class Mem>
static void emitStoreReg(CodeGenerator::Asm& as, PhysReg reg, Mem mem) {
assert(reg != InvalidReg);
if (reg.isGP()) {
as.storeq(reg, mem);
} else {
as.movsd(reg, mem);
}
}
void shuffle2(CodeGenerator::Asm& a,
PhysReg s0, PhysReg s1, PhysReg d0, PhysReg d1) {
assert(s0 != s1);
@@ -532,37 +599,65 @@ static void zeroExtendIfBool(X64Assembler& as, const SSATmp* src,
}
}
static void prepUnaryXmmOp(X64Assembler& a, const SSATmp* ssa, RegXMM xmm,
const RegisterInfo& info) {
auto reg = info.getReg();
RegNumber src(reg);
if (reg == InvalidReg) {
src = rScratch;
assert(ssa->isConst());
a.mov_imm64_reg(ssa->getValBits(), rScratch);
}
if (ssa->isA(Type::Int | Type::Bool)) {
// Expand non-const bools to 64-bit.
// Consts are already moved into src as 64-bit values above.
if (!ssa->isConst()) zeroExtendIfBool(a, ssa, info);
// cvtsi2sd doesn't modify the high bits of its target, which can
// cause false dependencies to prevent register renaming from kicking
// in. Break the dependency chain by zeroing out the destination reg.
a. pxor_xmm_xmm(xmm, xmm);
a. cvtsi2sd_reg64_xmm(src, xmm);
} else {
a. mov_reg64_xmm(src, xmm);
}
static int64_t convIntToDouble(int64_t i) {
union {
double d;
int64_t i;
} u;
u.d = double(i);
return u.i;
}
static void prepBinaryXmmOp(X64Assembler& a, const SSATmp* left,
const SSATmp* right, const RegAllocInfo& regs) {
prepUnaryXmmOp(a, left, xmm0, regs[left]);
prepUnaryXmmOp(a, right, xmm1, regs[right]);
/*
* Returns a XMM register containing the value of SSATmp tmp,
* which can be either a bool, an int, or a double.
* If the value is already in a XMM register, simply returns it.
* Otherwise, the value is moved into rXMMScratch, which is returned.
* If instructions to convert to a double at runtime are needed,
* they're emitted in 'as'.
*/
static PhysReg prepXMMReg(const SSATmp* tmp,
X64Assembler& as,
const RegAllocInfo& allocInfo,
RegXMM rXMMScratch) {
assert(tmp->isA(Type::Bool) || tmp->isA(Type::Int) || tmp->isA(Type::Dbl));
PhysReg reg = allocInfo[tmp].getReg();
// Case 1: tmp is already in a XMM register
if (reg.isXMM()) return reg;
// Case 2: tmp is in a GP register
if (reg != InvalidReg) {
// Case 2.a: Dbl stored in GP reg
if (tmp->isA(Type::Dbl)) {
emitMovRegReg(as, reg, rXMMScratch);
return rXMMScratch;
}
// Case 2.b: Bool or Int stored in GP reg
assert(tmp->isA(Type::Bool) || tmp->isA(Type::Int));
zeroExtendIfBool(as, tmp, allocInfo[tmp]);
as.pxor_xmm_xmm(rXMMScratch, rXMMScratch);
as.cvtsi2sd_reg64_xmm(reg, rXMMScratch);
return rXMMScratch;
}
// Case 3: tmp is a constant
assert(tmp->isConst());
int64_t val = tmp->getValRawInt();
if (!tmp->isA(Type::Dbl)) {
assert(tmp->isA(Type::Bool | Type::Int));
if (tmp->isA(Type::Bool)) val = val != 0; // see task #2401790
val = convIntToDouble(val);
}
emitLoadImm(as, val, rScratch);
emitMovRegReg(as, rScratch, rXMMScratch);
return rXMMScratch;
}
static void doubleCmp(X64Assembler& a, RegXMM xmm0, RegXMM xmm1) {
a. ucomisd_xmm_xmm(xmm0, xmm1);
static void doubleCmp(X64Assembler& a, RegXMM xmmReg0, RegXMM xmmReg1) {
a. ucomisd_xmm_xmm(xmmReg0, xmmReg1);
Label notPF;
a. jnp8(notPF);
// PF means the doubles were unordered. We treat this as !equal, so
@@ -590,8 +685,10 @@ void CodeGenerator::cgJcc(IRInstruction* inst) {
CG_PUNT(cgJcc);
}
if (src1Type == Type::Dbl || src2Type == Type::Dbl) {
prepBinaryXmmOp(m_as, src1, src2, m_regs);
doubleCmp(m_as, xmm0, xmm1);
PhysReg srcReg1 = prepXMMReg(src1, m_as, m_regs, rXMMScratch0);
PhysReg srcReg2 = prepXMMReg(src2, m_as, m_regs, rXMMScratch1);
assert(srcReg1 != rXMMScratch1 && srcReg2 != rXMMScratch0);
doubleCmp(m_as, srcReg1, srcReg2);
} else {
if (src1Type == Type::Cls && src2Type == Type::Cls) {
assert(opc == JmpSame || opc == JmpNSame);
@@ -646,8 +743,8 @@ void CodeGenerator::cgJmpNSame(IRInstruction* inst) { cgJcc(inst); }
typedef Transl::X64Assembler Asm;
static int64_t shuffleArgs(Asm& a, ArgGroup& args) {
// Compute the move/shuffle plan.
int moves[kNumX64Regs];
ArgDesc* argDescs[kNumX64Regs];
int moves[kNumRegs];
ArgDesc* argDescs[kNumRegs];
memset(moves, -1, sizeof moves);
memset(argDescs, 0, sizeof argDescs);
for (size_t i = 0; i < args.numRegArgs(); ++i) {
@@ -671,18 +768,22 @@ static int64_t shuffleArgs(Asm& a, ArgGroup& args) {
for (size_t i = 0; i < howTo.size(); ++i) {
if (howTo[i].m_kind == MoveInfo::Move) {
if (howTo[i].m_reg2 == reg::rScratch) {
a. movq (howTo[i].m_reg1, howTo[i].m_reg2);
emitMovRegReg(a, howTo[i].m_reg1, howTo[i].m_reg2);
} else {
ArgDesc* argDesc = argDescs[int(howTo[i].m_reg2)];
ArgDesc::Kind kind = argDesc->getKind();
if (kind == ArgDesc::Reg || kind == ArgDesc::TypeReg) {
if (argDesc->isZeroExtend()) {
assert(howTo[i].m_reg1.isGP());
assert(howTo[i].m_reg2.isGP());
a. movzbl (rbyte(howTo[i].m_reg1), r32(howTo[i].m_reg2));
} else {
a. movq (howTo[i].m_reg1, howTo[i].m_reg2);
emitMovRegReg(a, howTo[i].m_reg1, howTo[i].m_reg2);
}
} else {
assert(kind == ArgDesc::Addr);
assert(howTo[i].m_reg1.isGP());
assert(howTo[i].m_reg2.isGP());
a. lea (howTo[i].m_reg1[argDesc->getImm().q()],
howTo[i].m_reg2);
}
@@ -691,6 +792,8 @@ static int64_t shuffleArgs(Asm& a, ArgGroup& args) {
}
}
} else {
assert(howTo[i].m_reg1.isGP());
assert(howTo[i].m_reg2.isGP());
a. xchgq (howTo[i].m_reg1, howTo[i].m_reg2);
}
}
@@ -702,6 +805,7 @@ static int64_t shuffleArgs(Asm& a, ArgGroup& args) {
if (!args[i].done()) {
ArgDesc::Kind kind = args[i].getKind();
PhysReg dst = args[i].getDstReg();
assert(dst.isGP());
if (kind == ArgDesc::Imm) {
emitLoadImm(a, args[i].getImm().q(), dst);
} else if (kind == ArgDesc::TypeReg) {
@@ -728,13 +832,19 @@ static int64_t shuffleArgs(Asm& a, ArgGroup& args) {
a. movzbl(rbyte(srcReg), r32(rScratch));
a. push(rScratch);
} else {
a. push(srcReg);
if (srcReg.isXMM()) {
emitMovRegReg(a, srcReg, rScratch);
a.push(rScratch);
} else {
a.push(srcReg);
}
}
break;
case ArgDesc::TypeReg:
static_assert(kTypeWordOffset == 4 || kTypeWordOffset == 1,
"kTypeWordOffset value not supported");
assert(srcReg.isGP());
// x86 stacks grow down, so push higher offset items first
if (kTypeWordOffset == 4) {
a. pushl(r32(srcReg));
@@ -1071,9 +1181,20 @@ void CodeGenerator::cgBinaryOp(IRInstruction* inst,
CG_PUNT(cgBinaryOp);
}
if (src1->isA(Type::Dbl) || src2->isA(Type::Dbl)) {
prepBinaryXmmOp(m_as, src1, src2, m_regs);
(m_as.*fpInstr)(xmm1, xmm0);
m_as. mov_xmm_reg64(xmm0, m_regs[dst].getReg());
PhysReg dstReg = m_regs[dst].getReg();
PhysReg resReg = dstReg.isXMM() && dstReg != m_regs[src2].getReg() ?
dstReg : PhysReg(rXMMScratch0);
assert(resReg.isXMM());
PhysReg srcReg1 = prepXMMReg(src1, m_as, m_regs, resReg);
PhysReg srcReg2 = prepXMMReg(src2, m_as, m_regs, rXMMScratch1);
assert(srcReg1 != rXMMScratch1 && srcReg2 != rXMMScratch0);
emitMovRegReg(m_as, srcReg1, resReg);
(m_as.*fpInstr)(srcReg2, resReg);
emitMovRegReg(m_as, resReg, dstReg);
return;
}
cgBinaryIntOp(inst, instrIR, instrRR, movInstr,
@@ -1353,8 +1474,10 @@ void CodeGenerator::cgOpCmpHelper(
else if (type1 == Type::Dbl || type2 == Type::Dbl) {
if ((type1 == Type::Dbl || type1 == Type::Int) &&
(type2 == Type::Dbl || type2 == Type::Int)) {
prepBinaryXmmOp(m_as, src1, src2, m_regs);
doubleCmp(m_as, xmm0, xmm1);
PhysReg srcReg1 = prepXMMReg(src1, m_as, m_regs, rXMMScratch0);
PhysReg srcReg2 = prepXMMReg(src2, m_as, m_regs, rXMMScratch1);
assert(srcReg1 != rXMMScratch1 && srcReg2 != rXMMScratch0);
doubleCmp(m_as, srcReg1, srcReg2);
setFromFlags();
} else {
CG_PUNT(cgOpCmpHelper_Dbl);
@@ -1707,7 +1830,7 @@ void CodeGenerator::cgConvDblToBool(IRInstruction* inst) {
m_as.mov_imm64_reg(1, dstReg);
}
} else {
m_as.movq(srcReg, dstReg);
emitMovRegReg(m_as, srcReg, dstReg);
m_as.shlq(1, dstReg); // 0.0 stays zero and -0.0 is now 0.0
m_as.setne(rbyte(dstReg)); // lower byte becomes 1 if dstReg != 0
m_as.movzbl(rbyte(dstReg), r32(dstReg));
@@ -1736,54 +1859,36 @@ void CodeGenerator::cgConvIntToBool(IRInstruction* inst) {
}
}
void CodeGenerator::cgConvBoolToDbl(IRInstruction* inst) {
// cvtsi2sd doesn't modify the high bits of its target, which can
// cause false dependencies to prevent register renaming from kicking
// in. Break the dependency chain by zeroing out xmm0.
m_as.pxor_xmm_xmm(xmm0, xmm0);
SSATmp* dst = inst->getDst();
auto dstReg = m_regs[dst].getReg();
assert(dstReg != InvalidReg);
void CodeGenerator::emitConvBoolOrIntToDbl(IRInstruction* inst) {
SSATmp* src = inst->getSrc(0);
auto srcReg = m_regs[src].getReg();
if (srcReg == InvalidReg) {
assert(src->isConst());
SSATmp* dst = inst->getDst();
PhysReg dstReg = m_regs[dst].getReg();
assert(src->isA(Type::Bool) || src->isA(Type::Int));
assert(dstReg != InvalidReg);
if (src->isConst()) {
int64_t constVal = src->getValRawInt();
if (constVal == 0) {
m_as.xor_reg64_reg64(dstReg, dstReg);
} else {
m_as.mov_imm64_reg(1, dstReg);
}
if (src->isA(Type::Bool)) constVal = constVal != 0; // see task #2401790
constVal = convIntToDouble(constVal);
emitLoadImm(m_as, constVal, dstReg);
} else {
m_as.movzbl(rbyte(srcReg), r32(dstReg));
// cvtsi2sd doesn't modify the high bits of its target, which can
// cause false dependencies to prevent register renaming from kicking
// in. Break the dependency chain by zeroing out the XMM reg.
PhysReg srcReg = m_regs[src].getReg();
PhysReg xmmReg = dstReg.isXMM() ? dstReg : PhysReg(rXMMScratch0);
m_as.pxor_xmm_xmm(xmmReg, xmmReg);
m_as.cvtsi2sd_reg64_xmm(srcReg, xmmReg);
zeroExtendIfBool(m_as, src, m_regs[src]);
emitMovRegReg(m_as, xmmReg, dstReg);
}
m_as.cvtsi2sd_reg64_xmm(dstReg, xmm0);
m_as.mov_xmm_reg64(xmm0, dstReg);
}
void CodeGenerator::cgConvBoolToDbl(IRInstruction* inst) {
emitConvBoolOrIntToDbl(inst);
}
void CodeGenerator::cgConvIntToDbl(IRInstruction* inst) {
// cvtsi2sd doesn't modify the high bits of its target, which can
// cause false dependencies to prevent register renaming from kicking
// in. Break the dependency chain by zeroing out xmm0.
m_as.pxor_xmm_xmm(xmm0, xmm0);
SSATmp* dst = inst->getDst();
auto dstReg = m_regs[dst].getReg();
assert(dstReg != InvalidReg);
SSATmp* src = inst->getSrc(0);
auto srcReg = m_regs[src].getReg();
if (srcReg == InvalidReg) {
assert(src->isConst());
int64_t constVal = src->getValRawInt();
if (constVal == 0) {
m_as.xor_reg64_reg64(dstReg, dstReg);
} else {
m_as.mov_imm64_reg(constVal, dstReg);
}
m_as.cvtsi2sd_reg64_xmm(dstReg, xmm0);
} else {
m_as.cvtsi2sd_reg64_xmm(srcReg, xmm0);
}
m_as.mov_xmm_reg64(xmm0, dstReg);
emitConvBoolOrIntToDbl(inst);
}
void CodeGenerator::cgConvBoolToInt(IRInstruction* inst) {
@@ -1858,7 +1963,7 @@ void CodeGenerator::cgUnbox(IRInstruction* inst) {
// srcTypeReg == KindOfRef; srcValReg is RefData*
const size_t ref_tv_off = RefData::tvOffset();
if (dstValReg != srcValReg) {
m_as.loadq(srcValReg[ref_tv_off + TVOFF(m_data)], dstValReg);
emitLoadReg(m_as, srcValReg[ref_tv_off + TVOFF(m_data)], dstValReg);
emitLoadTVType(m_as, srcValReg[ref_tv_off + TVOFF(m_type)],
r32(dstTypeReg));
} else {
@@ -1984,8 +2089,8 @@ void CodeGenerator::cgRetVal(IRInstruction* inst) {
a. storeq (val->getValRawInt(),
rFp[AROFF(m_r) + TVOFF(m_data)]);
} else {
zeroExtendIfBool(m_as, val, m_regs[val]);
a. storeq (m_regs[val].getReg(), rFp[AROFF(m_r) + TVOFF(m_data)]);
zeroExtendIfBool(a, val, m_regs[val]);
emitStoreReg(a, m_regs[val].getReg(), rFp[AROFF(m_r) + TVOFF(m_data)]);
}
}
@@ -2250,7 +2355,7 @@ void CodeGenerator::cgSpill(IRInstruction* inst) {
// We do not need to mask booleans, since the IR will reload the spill
auto srcReg = m_regs[src].getReg(locIndex);
auto sinfo = m_regs[dst].getSpillInfo(locIndex);
m_as. storeq(srcReg, reg::rsp[sinfo.offset()]);
emitStoreReg(m_as, srcReg, reg::rsp[sinfo.offset()]);
}
}
@@ -2262,7 +2367,7 @@ void CodeGenerator::cgReload(IRInstruction* inst) {
for (int locIndex = 0; locIndex < src->numNeededRegs(); ++locIndex) {
auto dstReg = m_regs[dst].getReg(locIndex);
auto sinfo = m_regs[src].getSpillInfo(locIndex);
m_as. loadq(reg::rsp[sinfo.offset()], dstReg);
emitLoadReg(m_as, reg::rsp[sinfo.offset()], dstReg);
}
}
@@ -3752,7 +3857,7 @@ void CodeGenerator::cgStore(PhysReg base,
m_as.storeq(val, base[off + TVOFF(m_data)]);
} else {
zeroExtendIfBool(m_as, src, m_regs[src]);
m_as.storeq(m_regs[src].getReg(), base[off + TVOFF(m_data)]);
emitStoreReg(m_as, m_regs[src].getReg(), base[off + TVOFF(m_data)]);
}
}
@@ -3792,7 +3897,7 @@ void CodeGenerator::cgLoad(PhysReg base,
if (type == Type::Bool) {
m_as.load_reg64_disp_reg32(base, off + TVOFF(m_data), dstReg);
} else {
m_as.load_reg64_disp_reg64(base, off + TVOFF(m_data), dstReg);
emitLoadReg(m_as, base[off + TVOFF(m_data)], dstReg);
}
}
+1 -1
Ver Arquivo
@@ -270,7 +270,6 @@ private:
Address cgCheckRefCountedType(PhysReg typeReg);
Address cgCheckRefCountedType(PhysReg baseReg,
int64_t offset);
void cgConvPrimitiveToDbl(IRInstruction* inst);
void cgDecRefStaticType(Type type,
PhysReg dataReg,
Block* exit,
@@ -311,6 +310,7 @@ private:
void emitReqBindAddr(const Func* func, TCA& dest, Offset offset);
void emitAdjustSp(PhysReg spReg, PhysReg dstReg, int64_t adjustment);
void emitConvBoolOrIntToDbl(IRInstruction* inst);
/*
* Generate an if-block that branches around some unlikely code, handling
+4 -1
Ver Arquivo
@@ -223,8 +223,11 @@ public:
IRInstruction* defLabel();
IRInstruction* defLabel(unsigned numDst);
template<typename T> SSATmp* cns(T val) {
Type type = typeForConst(val);
// Normalize bool values to 0 or 1
if (type.equals(Type::Bool)) val = (T)(val != 0);
ConstData cdata(val);
return findConst(cdata, typeForConst(val));
return findConst(cdata, type);
}
Block* defBlock(const Func* f, IRInstruction*);
Block* defBlock(const Func* f) {
+58 -38
Ver Arquivo
@@ -54,8 +54,15 @@ RegSet RegisterInfo::getRegs() const {
return regs;
}
static PhysReg::Type getRegType(const SSATmp* tmp) {
if (RuntimeOption::EvalHHIRAllocXMMRegs && tmp->isA(Type::Dbl)) {
return PhysReg::XMM;
}
return PhysReg::GP;
}
struct LinearScan : private boost::noncopyable {
static const int NumRegs = 16;
static const int NumRegs = kNumRegs;
explicit LinearScan(IRFactory*);
RegAllocInfo allocRegs(Trace*, LifetimeInfo*);
@@ -67,7 +74,7 @@ private:
public:
bool isReserved() const { return m_reserved; }
bool isCallerSaved() const {
return kCallerSaved.contains(PhysReg(m_regNo));
return kCallerSaved.contains(m_reg);
}
bool isCalleeSaved() const { return !isCallerSaved(); }
bool isAllocated() const { return m_ssaTmp != nullptr; }
@@ -77,6 +84,7 @@ private:
Type type = m_ssaTmp->type();
return type == Type::RetAddr;
}
PhysReg::Type type() const { return m_reg.type(); }
private:
SSATmp* m_ssaTmp; // non-null when allocated
@@ -86,7 +94,7 @@ private:
// LinearScan::m_freeCalleeSaved, or LinearScan::m_allocatedRegs.
// <m_pos> of a reserved reg is undefined.
smart::list<RegState*>::iterator m_pos;
uint16_t m_regNo;
PhysReg m_reg;
bool m_pinned; // do not free this register if pinned
// We stress test register allocation by reducing the number of
// free registers.
@@ -112,7 +120,7 @@ private:
void clear();
void add(SSATmp* tmp, uint32_t index, int argNum);
private:
// indexed by arg number
// indexed by register number
std::pair<SSATmp*, uint32_t> m_preColoredTmps[LinearScan::NumRegs];
};
@@ -158,7 +166,7 @@ private:
void pushFreeReg(RegState* reg);
RegState* popFreeReg(smart::list<RegState*>& freeList);
void freeReg(RegState* reg);
RegState* getFreeReg(bool preferCallerSaved);
RegState* getFreeReg(PhysReg::Type type, bool preferCallerSaved);
RegState* getReg(RegState* reg);
template<typename Inner, int DumpVal=4>
@@ -175,8 +183,8 @@ private:
IRFactory* const m_irFactory;
RegState m_regs[NumRegs];
// Lists of free caller and callee-saved registers, respectively.
smart::list<RegState*> m_freeCallerSaved;
smart::list<RegState*> m_freeCalleeSaved;
smart::list<RegState*> m_freeCallerSaved[PhysReg::kNumTypes];
smart::list<RegState*> m_freeCalleeSaved[PhysReg::kNumTypes];
// List of assigned registers, sorted high to low by lastUseId.
smart::list<RegState*> m_allocatedRegs;
@@ -237,8 +245,10 @@ void LinearScan::StateSave::save(LinearScan* ls) {
void LinearScan::StateSave::restore(LinearScan* ls) {
ls->m_allocatedRegs.clear();
ls->m_freeCalleeSaved.clear();
ls->m_freeCallerSaved.clear();
for (int i = 0; i < PhysReg::kNumTypes; i++) {
ls->m_freeCalleeSaved[i].clear();
ls->m_freeCallerSaved[i].clear();
}
for (size_t i = 0; i < NumRegs; i++) {
ls->m_regs[i] = m_regs[i];
@@ -247,7 +257,7 @@ void LinearScan::StateSave::restore(LinearScan* ls) {
if (reg->isAllocated()) {
SSATmp* tmp = reg->m_ssaTmp;
for (int r = 0; r < ls->m_allocInfo[tmp].numAllocatedRegs(); r++) {
if ((int)ls->m_allocInfo[tmp].getReg(r) == i) {
if (ls->m_allocInfo[tmp].getReg(r) == PhysReg(i)) {
ls->allocRegToTmp(reg, tmp, r);
}
}
@@ -266,23 +276,25 @@ LinearScan::LinearScan(IRFactory* irFactory)
, m_jmps(irFactory, JmpList())
, m_allocInfo(irFactory)
{
for (int i = 0; i < kNumX64Regs; i++) {
for (int i = 0; i < kNumRegs; i++) {
m_regs[i].m_ssaTmp = nullptr;
m_regs[i].m_regNo = i;
m_regs[i].m_reg = PhysReg(i);
m_regs[i].m_pinned = false;
m_regs[i].m_reserved = false;
}
// Mark reserved regs.
m_regs[int(rVmSp)] .m_reserved = true;
m_regs[int(rsp)] .m_reserved = true;
m_regs[int(rVmFp)] .m_reserved = true;
m_regs[int(rScratch)].m_reserved = true;
m_regs[int(rVmTl)] .m_reserved = true;
m_regs[int(PhysReg(rVmSp))] .m_reserved = true;
m_regs[int(PhysReg(rsp))] .m_reserved = true;
m_regs[int(PhysReg(rVmFp))] .m_reserved = true;
m_regs[int(PhysReg(rScratch))] .m_reserved = true;
m_regs[int(PhysReg(rVmTl))] .m_reserved = true;
m_regs[int(PhysReg(rXMMScratch0))].m_reserved = true;
m_regs[int(PhysReg(rXMMScratch1))].m_reserved = true;
// Reserve extra regs for testing purpose.
uint32_t numFreeRegs = RuntimeOption::EvalHHIRNumFreeRegs;
for (int i = kNumX64Regs - 1; i >= 0; i--) {
for (int i = kNumRegs - 1; i >= 0; i--) {
if (!m_regs[i].m_reserved) {
if (numFreeRegs == 0) {
m_regs[i].m_reserved = true;
@@ -299,7 +311,7 @@ void LinearScan::allocRegToInstruction(InstructionList::iterator it) {
// Reload all source operands if necessary.
// Mark registers as unpinned.
for (int regNo = 0; regNo < kNumX64Regs; ++regNo) {
for (int regNo = 0; regNo < kNumRegs; ++regNo) {
m_regs[regNo].m_pinned = false;
}
smart::vector<bool> needsReloading(inst->getNumSrcs(), true);
@@ -425,6 +437,8 @@ void LinearScan::allocRegToInstruction(InstructionList::iterator it) {
void LinearScan::allocRegToTmp(SSATmp* ssaTmp, uint32_t index) {
bool preferCallerSaved = true;
PhysReg::Type regType = getRegType(ssaTmp);
if (RuntimeOption::EvalHHIREnableCalleeSavedOpt) {
// Prefer caller-saved registers iff <ssaTmp> doesn't span native.
preferCallerSaved = (m_uses[ssaTmp].lastUse <= getNextNativeId());
@@ -432,7 +446,7 @@ void LinearScan::allocRegToTmp(SSATmp* ssaTmp, uint32_t index) {
RegState* reg = nullptr;
if (!preferCallerSaved) {
reg = getFreeReg(false);
reg = getFreeReg(regType, false);
if (reg->isCallerSaved()) {
// If we are out of callee-saved registers, fall into the logic of
// assigning a caller-saved register.
@@ -471,7 +485,7 @@ void LinearScan::allocRegToTmp(SSATmp* ssaTmp, uint32_t index) {
if (reg == nullptr) {
// No pre-coloring for this tmp.
// Pick a regular caller-saved reg.
reg = getFreeReg(true);
reg = getFreeReg(regType, true);
}
assert(reg);
@@ -495,7 +509,7 @@ void LinearScan::allocRegToTmp(SSATmp* ssaTmp, uint32_t index) {
void LinearScan::allocRegToTmp(RegState* reg, SSATmp* ssaTmp, uint32_t index) {
reg->m_ssaTmp = ssaTmp;
// mark inst as using this register
m_allocInfo[ssaTmp].setReg(PhysReg(reg->m_regNo), index);
m_allocInfo[ssaTmp].setReg(reg->m_reg, index);
uint32_t lastUseId = m_uses[ssaTmp].lastUse;
if (reg->isReserved()) {
return;
@@ -802,7 +816,7 @@ RegNumber LinearScan::getJmpPreColor(SSATmp* tmp, uint32_t regIndex,
// caller-saved regs depends on pre-coloring hints.
void LinearScan::initFreeList() {
// reserve extra regs for testing purpose.
for (int i = kNumX64Regs - 1; i >= 0; i--) {
for (int i = kNumRegs - 1; i >= 0; i--) {
if (!m_regs[i].m_reserved) {
pushFreeReg(&m_regs[i]);
}
@@ -1243,16 +1257,18 @@ LinearScan::RegState* LinearScan::getReg(RegState* reg) {
if (reg->isReserved() || reg->isAllocated()) {
return nullptr;
}
auto type = reg->type();
auto& freeList = (reg->isCallerSaved() ?
m_freeCallerSaved : m_freeCalleeSaved);
m_freeCallerSaved[type] : m_freeCalleeSaved[type]);
freeList.erase(reg->m_pos);
// Pin it so that other operands in the same instruction will not reuse it.
reg->m_pinned = true;
return reg;
}
LinearScan::RegState* LinearScan::getFreeReg(bool preferCallerSaved) {
if (m_freeCallerSaved.empty() && m_freeCalleeSaved.empty()) {
LinearScan::RegState* LinearScan::getFreeReg(PhysReg::Type type,
bool preferCallerSaved) {
if (m_freeCallerSaved[type].empty() && m_freeCalleeSaved[type].empty()) {
assert(!m_allocatedRegs.empty());
// no free registers --> free a register from the allocatedRegs
@@ -1260,7 +1276,7 @@ LinearScan::RegState* LinearScan::getFreeReg(bool preferCallerSaved) {
// 1. not used for any source operand in the current instruction, and
// 2. not used for the return address of a function.
auto canSpill = [&] (RegState* reg) {
return !reg->isPinned() && !reg->isRetAddr();
return !reg->isPinned() && !reg->isRetAddr() && reg->type() == type;
};
auto pos = std::find_if(m_allocatedRegs.begin(), m_allocatedRegs.end(),
canSpill);
@@ -1273,11 +1289,11 @@ LinearScan::RegState* LinearScan::getFreeReg(bool preferCallerSaved) {
smart::list<RegState*>* preferred = nullptr;
smart::list<RegState*>* other = nullptr;
if (preferCallerSaved) {
preferred = &m_freeCallerSaved;
other = &m_freeCalleeSaved;
preferred = &m_freeCallerSaved[type];
other = &m_freeCalleeSaved[type];
} else {
preferred = &m_freeCalleeSaved;
other = &m_freeCallerSaved;
preferred = &m_freeCalleeSaved[type];
other = &m_freeCallerSaved[type];
}
RegState* theFreeReg = nullptr;
@@ -1304,12 +1320,14 @@ void LinearScan::freeReg(RegState* reg) {
}
void LinearScan::pushFreeReg(RegState* reg) {
PhysReg::Type type = reg->type();
auto& freeList = (reg->isCallerSaved() ?
m_freeCallerSaved : m_freeCalleeSaved);
m_freeCallerSaved[type] : m_freeCalleeSaved[type]);
// If next native is going to use <reg>, put <reg> to the back of the
// queue so that it's unlikely to be misused by irrelevant tmps.
if (RuntimeOption::EvalHHIREnablePreColoring &&
(reg->m_regNo == int(rax) || m_preColoringHint.preColorsTmp(reg))) {
type == PhysReg::GP &&
(reg->m_reg == PhysReg(rax) || m_preColoringHint.preColorsTmp(reg))) {
freeList.push_back(reg);
reg->m_pos = (--freeList.end());
} else {
@@ -1396,7 +1414,8 @@ SSATmp* LinearScan::getOrigTmp(SSATmp* tmp) {
}
bool LinearScan::PreColoringHint::preColorsTmp(RegState* reg) const {
return m_preColoredTmps[reg->m_regNo].first != nullptr;
assert(reg->m_reg.isGP());
return m_preColoredTmps[int(reg->m_reg)].first != nullptr;
}
// Get the pre-coloring register of (<tmp>, <index>).
@@ -1404,9 +1423,10 @@ bool LinearScan::PreColoringHint::preColorsTmp(RegState* reg) const {
// not a big problem.
RegNumber LinearScan::PreColoringHint::getPreColoringReg(
SSATmp* tmp, uint32_t index) const {
for (int regNo = 0; regNo < kNumX64Regs; ++regNo) {
for (int regNo = 0; regNo < kNumRegs; ++regNo) {
if (m_preColoredTmps[regNo].first == tmp &&
m_preColoredTmps[regNo].second == index) {
assert(regNo < kNumGPRegs);
return (RegNumber)regNo;
}
}
@@ -1414,7 +1434,7 @@ RegNumber LinearScan::PreColoringHint::getPreColoringReg(
}
void LinearScan::PreColoringHint::clear() {
for (int i = 0; i < kNumX64Regs; ++i) {
for (int i = 0; i < kNumRegs; ++i) {
m_preColoredTmps[i].first = nullptr;
m_preColoredTmps[i].second = 0;
}
@@ -1424,8 +1444,8 @@ void LinearScan::PreColoringHint::clear() {
// in next native.
void LinearScan::PreColoringHint::add(SSATmp* tmp, uint32_t index, int argNum) {
int reg = int(argNumToRegName[argNum]);
assert(reg >= 0 && reg < kNumX64Regs);
m_preColoredTmps[reg].first = tmp;
assert(reg >= 0 && reg < kNumGPRegs);
m_preColoredTmps[reg].first = tmp;
m_preColoredTmps[reg].second = index;
}
+6 -1
Ver Arquivo
@@ -249,7 +249,12 @@ void print(std::ostream& os, const SSATmp* tmp, const RegAllocInfo* regs,
if (!info.spilled()) {
for (int i = 0, sz = info.numAllocatedRegs(); i < sz; ++i) {
if (i != 0) os << ",";
os << reg::regname(Reg64(info.getReg(i)));
PhysReg reg = info.getReg(i);
if (reg.type() == PhysReg::GP) {
os << reg::regname(Reg64(reg));
} else {
os << reg::regname(RegXMM(reg));
}
}
} else {
for (int i = 0, sz = tmp->numNeededRegs(); i < sz; ++i) {
+2 -2
Ver Arquivo
@@ -174,8 +174,8 @@ struct TraceBuilder {
return gen(DefConst, type, ConstData(val));
}
SSATmp* cns(Type t) {
return gen(DefConst, t, ConstData(0));
SSATmp* cns(Type type) {
return gen(DefConst, type, ConstData(0));
}
template<typename T>
+37 -5
Ver Arquivo
@@ -36,15 +36,36 @@ namespace HPHP { namespace Transl {
* (e.g. store_reg##_disp_reg##).
*/
struct PhysReg {
enum Type {
GP,
XMM,
kNumTypes, // keep last
};
explicit constexpr PhysReg(int n = -1) : n(n) {}
constexpr /* implicit */ PhysReg(Reg64 r) : n(int(r)) {}
constexpr /* implicit */ PhysReg(RegXMM r) : n(int(r) + kNumGPRegs) {}
explicit constexpr PhysReg(Reg32 r) : n(int(RegNumber(r))) {}
explicit constexpr PhysReg(RegNumber r) : n(int(r)) {}
constexpr /* implicit */ operator Reg64() const { return Reg64(n); }
constexpr /* implicit */ operator RegNumber() const { return RegNumber(n); }
/* implicit */ operator Reg64() const {
assert(isGP() || n == -1);
return Reg64(n);
}
constexpr /* implicit */ operator RegNumber() const {
return n < kNumGPRegs ? RegNumber(n) : RegNumber(n - kNumGPRegs);
}
/* implicit */ operator RegXMM() const {
assert(isXMM() || n == -1);
return RegXMM(n - kNumGPRegs);
}
Type type() const {
assert(n >= 0 && n < kNumRegs);
return n < kNumGPRegs ? GP : XMM;
}
bool isGP () const { return n >= 0 && n < kNumGPRegs; }
bool isXMM() const { return n >= kNumGPRegs && n < kNumRegs; }
explicit constexpr operator int() const { return n; }
constexpr bool operator==(PhysReg r) const { return n == r.n; }
constexpr bool operator!=(PhysReg r) const { return n != r.n; }
@@ -53,13 +74,24 @@ struct PhysReg {
constexpr bool operator==(Reg32 r) const { return Reg32(n) == r; }
constexpr bool operator!=(Reg32 r) const { return Reg32(n) != r; }
MemoryRef operator[](intptr_t p) const { return *(*this + p); }
IndexedMemoryRef operator[](Reg64 i) const { return *(*this + i); }
IndexedMemoryRef operator[](ScaledIndex s) const { return *(*this + s); }
MemoryRef operator[](intptr_t p) const {
assert(type() == GP);
return *(*this + p);
}
IndexedMemoryRef operator[](Reg64 i) const {
assert(type() == GP);
return *(*this + i);
}
IndexedMemoryRef operator[](ScaledIndex s) const {
assert(type() == GP);
return *(*this + s);
}
IndexedMemoryRef operator[](ScaledIndexDisp s) const {
assert(type() == GP);
return *(*this + s.si + s.disp);
}
IndexedMemoryRef operator[](DispReg dr) const {
assert(type() == GP);
return *(*this + ScaledIndex(dr.base, 0x1) + dr.disp);
}
@@ -905,8 +905,8 @@ inline void emitCopyToAligned(X64Assembler& a,
int destOff) {
static_assert(sizeof(TypedValue) == 16,
"emitCopyToAligned assumes sizeof(TypedValue) is 128 bits");
a. movdqa (src[srcOff], xmm0);
a. movdqa (xmm0, dest[destOff]);
a. movdqa (src[srcOff], rXMMScratch0);
a. movdqa (rXMMScratch0, dest[destOff]);
}
// ArgManager -- support for passing VM-level data to helper functions.
+13 -13
Ver Arquivo
@@ -465,7 +465,7 @@ TranslatorX64::emitPushAR(const NormalizedInstruction& i, const Func* func,
void
TranslatorX64::emitCallSaveRegs() {
assert(!m_regMap.frozen());
m_regMap.cleanRegs(kCallerSaved);
m_regMap.cleanRegs(kGPCallerSaved);
}
static void UNUSED tc_debug_print(const char* message,
@@ -728,7 +728,7 @@ TranslatorX64::emitCall(X64Assembler& a, TCA dest, bool killRegs) {
}
if (killRegs) {
// All caller-saved regs are now suspect.
m_regMap.smashRegs(kCallerSaved);
m_regMap.smashRegs(kGPCallerSaved);
}
}
@@ -743,7 +743,7 @@ TranslatorX64::emitCall(X64Assembler& a, Call call, bool killRegs) {
a.loadq(*rdi, rax);
a.call(rax[call.getOffset()]);
if (killRegs) {
m_regMap.smashRegs(kCallerSaved);
m_regMap.smashRegs(kGPCallerSaved);
}
}
@@ -868,7 +868,7 @@ void TranslatorX64::prepareCallSaveRegs() {
emitCallSaveRegs(); // Clean caller-saved regs.
m_pendingUnwindRegInfo.clear();
RegSet rset = kCalleeSaved;
RegSet rset = kGPCalleeSaved;
PhysReg reg;
while (rset.findFirst(reg)) {
rset.remove(reg);
@@ -1030,7 +1030,7 @@ void TranslatorX64::emitDecRef(Asm& a,
auto getPushSet = [&] {
RegSet ret;
auto regs = kCallerSaved;
auto regs = kGPCallerSaved;
PhysReg reg;
while (regs.findFirst(reg)) {
regs.remove(reg);
@@ -1233,7 +1233,7 @@ void TranslatorX64::emitGenericDecRefHelpers() {
asm_label(a, release);
{
PhysRegSaver prs(a, kCallerSaved - RegSet(rdi));
PhysRegSaver prs(a, kGPCallerSaved - RegSet(rdi));
callDestructor(a, rScratch, rax);
recordIndirectFixup(a.code.frontier, prs.rspAdjustment());
}
@@ -3647,17 +3647,17 @@ TranslatorX64::binaryMixedArith(const NormalizedInstruction& i,
Opcode op,
PhysReg srcReg,
PhysReg srcDestReg) {
getInputsIntoXMMRegs(i, srcReg, srcDestReg, xmm1, xmm0);
getInputsIntoXMMRegs(i, srcReg, srcDestReg, rXMMScratch1, rXMMScratch0);
switch(op) {
#define CASEIMM(OpBc, x64op) \
case OpBc: a. x64op ##sd_xmm_xmm(xmm1, xmm0); break
case OpBc: a. x64op ##sd_xmm_xmm(rXMMScratch1, rXMMScratch0); break
CASEIMM(OpAdd, add);
CASEIMM(OpSub, sub);
CASEIMM(OpMul, mul);
#undef CASEIMM
default: not_reached();
}
a. mov_xmm_reg64(xmm0, srcDestReg);
a. mov_xmm_reg64(rXMMScratch0, srcDestReg);
}
void
@@ -4100,9 +4100,9 @@ TranslatorX64::analyzeEqOp(Tracelet& t, NormalizedInstruction& i) {
void
TranslatorX64::fpEq(const NormalizedInstruction& ni,
PhysReg lr, PhysReg rr) {
getInputsIntoXMMRegs(ni, lr, rr, xmm0, xmm1);
getInputsIntoXMMRegs(ni, lr, rr, rXMMScratch0, rXMMScratch1);
m_regMap.allocOutputRegs(ni);
a. ucomisd_xmm_xmm(xmm0, xmm1);
a. ucomisd_xmm_xmm(rXMMScratch0, rXMMScratch1);
semiLikelyIfBlock(CC_P, a, [&] {
// PF means unordered; treat it as !eq. Or 1 into anything at all
// to clear ZF.
@@ -11459,7 +11459,7 @@ TranslatorX64::TranslatorX64()
m_irAUsage(0),
m_irAstubsUsage(0),
m_numHHIRTrans(0),
m_regMap(kCallerSaved, kCalleeSaved, this),
m_regMap(kGPCallerSaved, kGPCalleeSaved, this),
m_unwindRegMap(128),
m_curTrace(0),
m_curNI(0),
@@ -11739,7 +11739,7 @@ TCA TranslatorX64::emitNAryStub(X64Assembler& a, Call c) {
a. push (rbp); // {
a. movq (rsp, rbp);
{
RegSet s = kCallerSaved - alreadySaved;
RegSet s = kGPCallerSaved - alreadySaved;
PhysRegSaverParity rs(Parity, a, s);
emitCall(a, c);
}
+32
Ver Arquivo
@@ -0,0 +1,32 @@
<?php
// Copyright 2004-present Facebook. All Rights Reserved.
function foo($val, $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o,
$p, $q) {
$a = $b;
$b = $c;
$c = $d;
$d = $e;
$e = $f;
$f = $g;
$g = $h;
$h = $i;
$i = $j;
$j = $k;
$k = $l;
$l = $m;
$m = $n;
$n = $o;
$o = $p;
$p = $p;
$q = $val;
$sum = $a + $b + $c + $d + $e + $f + $g + $h + $i + $j + $k + $l + $m + $n +
$o + $p + $q;
$prod = $a * $b * $c * $d * $e * $f * $g * $h * $i * $j * $k * $l * $m * $n *
$o * $p + $q;
$res = $prod + $sum;
return $res;
}
var_dump(foo(500.5, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.1, 11.1,
12.2, 13.3, 14.4, 15.5, 16.6, 17.7));
+1
Ver Arquivo
@@ -0,0 +1 @@
float(8.703034491432E+14)
+90 -76
Ver Arquivo
@@ -61,6 +61,10 @@ struct ScaledIndex;
struct ScaledIndexDisp;
struct DispReg;
const int kNumGPRegs = 16;
const int kNumXMMRegs = 16;
const int kNumRegs = kNumGPRegs + kNumXMMRegs;
/*
* Type for register numbers, independent of the size we're going to
* be using it as. Also, the same register number may mean different
@@ -347,9 +351,6 @@ namespace reg {
constexpr Reg64 r14(14);
constexpr Reg64 r15(15);
// rScratch is a symbolic name for a register that is always free.
constexpr Reg64 rScratch(r10);
constexpr RegRIP rip;
constexpr Reg32 eax (0);
@@ -411,6 +412,11 @@ namespace reg {
constexpr RegXMM xmm14(14);
constexpr RegXMM xmm15(15);
// rScratch, rXMMScratch[01] are symbolic names for regs that are always free
constexpr Reg64 rScratch(r10);
constexpr RegXMM rXMMScratch0(xmm0);
constexpr RegXMM rXMMScratch1(xmm1);
#define X(x) if (r == x) return "%"#x
inline const char* regname(Reg64 r) {
X(rax); X(rbx); X(rcx); X(rdx); X(rsp); X(rbp); X(rsi); X(rdi);
@@ -669,75 +675,76 @@ struct X64Instr {
};
// 0 1 2 3 4 5 flags
const X64Instr instr_movdqa = { { 0x6F,0x7F,0xF1,0x00,0xF1,0xF1 }, 0x4103 };
const X64Instr instr_movdqu = { { 0x6F,0x7F,0xF1,0x00,0xF1,0xF1 }, 0x8103 };
const X64Instr instr_gpr2xmm = { { 0x6e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
const X64Instr instr_xmm2gpr = { { 0x7e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
const X64Instr instr_movdqa = { { 0x6F,0x7F,0xF1,0x00,0xF1,0xF1 }, 0x4103 };
const X64Instr instr_movdqu = { { 0x6F,0x7F,0xF1,0x00,0xF1,0xF1 }, 0x8103 };
const X64Instr instr_movsd = { { 0x11,0x10,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
const X64Instr instr_gpr2xmm = { { 0x6e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
const X64Instr instr_xmm2gpr = { { 0x7e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
const X64Instr instr_xmmsub = { { 0x5c,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
const X64Instr instr_xmmadd = { { 0x58,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
const X64Instr instr_xmmmul = { { 0x59,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
const X64Instr instr_ucomisd = { { 0x2e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
const X64Instr instr_pxor= { { 0xef,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
const X64Instr instr_ucomisd = { { 0x2e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
const X64Instr instr_pxor= { { 0xef,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
const X64Instr instr_cvtsi2sd= { { 0x2a,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10002 };
const X64Instr instr_lddqu = { { 0xF0,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10103 };
const X64Instr instr_jmp = { { 0xFF,0xF1,0xE9,0x04,0xE9,0xF1 }, 0x0910 };
const X64Instr instr_call = { { 0xFF,0xF1,0xE8,0x02,0xE8,0xF1 }, 0x0900 };
const X64Instr instr_push = { { 0xFF,0xF1,0x68,0x06,0xF1,0x50 }, 0x0510 };
const X64Instr instr_pop = { { 0x8F,0xF1,0xF1,0x00,0xF1,0x58 }, 0x0500 };
const X64Instr instr_inc = { { 0xFF,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_dec = { { 0xFF,0xF1,0xF1,0x01,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_not = { { 0xF7,0xF1,0xF1,0x02,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_notb = { { 0xF6,0xF1,0xF1,0x02,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_neg = { { 0xF7,0xF1,0xF1,0x03,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_negb = { { 0xF6,0xF1,0xF1,0x03,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_add = { { 0x01,0x03,0x81,0x00,0x05,0xF1 }, 0x0810 };
const X64Instr instr_addb = { { 0x00,0x02,0x80,0x00,0x04,0xF1 }, 0x0810 };
const X64Instr instr_sub = { { 0x29,0x2B,0x81,0x05,0x2D,0xF1 }, 0x0810 };
const X64Instr instr_subb = { { 0x28,0x2A,0x80,0x05,0x2C,0xF1 }, 0x0810 };
const X64Instr instr_and = { { 0x21,0x23,0x81,0x04,0x25,0xF1 }, 0x0810 };
const X64Instr instr_andb = { { 0x20,0x22,0x80,0x04,0x24,0xF1 }, 0x0810 };
const X64Instr instr_or = { { 0x09,0x0B,0x81,0x01,0x0D,0xF1 }, 0x0810 };
const X64Instr instr_orb = { { 0x08,0x0A,0x80,0x01,0x0C,0xF1 }, 0x0810 };
const X64Instr instr_xor = { { 0x31,0x33,0x81,0x06,0x35,0xF1 }, 0x0810 };
const X64Instr instr_xorb = { { 0x30,0x32,0x80,0x06,0x34,0xF1 }, 0x0810 };
const X64Instr instr_mov = { { 0x89,0x8B,0xC7,0x00,0xF1,0xB8 }, 0x0600 };
const X64Instr instr_movb = { { 0x88,0x8A,0xC6,0x00,0xF1,0xB0 }, 0x0610 };
const X64Instr instr_test = { { 0x85,0x85,0xF7,0x00,0xA9,0xF1 }, 0x0800 };
const X64Instr instr_testb = { { 0x84,0x84,0xF6,0x00,0xA8,0xF1 }, 0x0810 };
const X64Instr instr_cmp = { { 0x39,0x3B,0x81,0x07,0x3D,0xF1 }, 0x0810 };
const X64Instr instr_cmpb = { { 0x38,0x3A,0x80,0x07,0x3C,0xF1 }, 0x0810 };
const X64Instr instr_sbb = { { 0x19,0x1B,0x81,0x03,0x1D,0xF1 }, 0x0810 };
const X64Instr instr_adc = { { 0x11,0x13,0x81,0x02,0x15,0xF1 }, 0x0810 };
const X64Instr instr_lea = { { 0xF1,0x8D,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_xchgb = { { 0x86,0x86,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_xchg = { { 0x87,0x87,0xF1,0x00,0xF1,0x90 }, 0x1000 };
const X64Instr instr_imul = { { 0xAF,0xF7,0x69,0x05,0xF1,0xF1 }, 0x0019 };
const X64Instr instr_mul = { { 0xF7,0xF1,0xF1,0x04,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_div = { { 0xF7,0xF1,0xF1,0x06,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_idiv = { { 0xF7,0xF1,0xF1,0x07,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_cdq = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x99 }, 0x0400 };
const X64Instr instr_ret = { { 0xF1,0xF1,0xC2,0x00,0xF1,0xC3 }, 0x0540 };
const X64Instr instr_jcc = { { 0xF1,0xF1,0x80,0x00,0xF1,0xF1 }, 0x0114 };
const X64Instr instr_cmovcc = { { 0x40,0x40,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
const X64Instr instr_setcc = { { 0x90,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0102 };
const X64Instr instr_movswx = { { 0xBF,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
const X64Instr instr_movsbx = { { 0xBE,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x2003 };
const X64Instr instr_movzwx = { { 0xB7,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
const X64Instr instr_movzbx = { { 0xB6,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x2003 };
const X64Instr instr_cwde = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x98 }, 0x0400 };
const X64Instr instr_rol = { { 0xD3,0xF1,0xC1,0x00,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_ror = { { 0xD3,0xF1,0xC1,0x01,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_rcl = { { 0xD3,0xF1,0xC1,0x02,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_rcr = { { 0xD3,0xF1,0xC1,0x03,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_shl = { { 0xD3,0xF1,0xC1,0x04,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_shr = { { 0xD3,0xF1,0xC1,0x05,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_sar = { { 0xD3,0xF1,0xC1,0x07,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_xadd = { { 0xC1,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0002 };
const X64Instr instr_cmpxchg = { { 0xB1,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0002 };
const X64Instr instr_nop = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x90 }, 0x0500 };
const X64Instr instr_shld = { { 0xA5,0xF1,0xA4,0x00,0xF1,0xF1 }, 0x0082 };
const X64Instr instr_shrd = { { 0xAD,0xF1,0xAC,0x00,0xF1,0xF1 }, 0x0082 };
const X64Instr instr_int3 = { { 0xF1,0xF1,0xF1,0x00,0xF1,0xCC }, 0x0500 };
const X64Instr instr_jmp = { { 0xFF,0xF1,0xE9,0x04,0xE9,0xF1 }, 0x0910 };
const X64Instr instr_call = { { 0xFF,0xF1,0xE8,0x02,0xE8,0xF1 }, 0x0900 };
const X64Instr instr_push = { { 0xFF,0xF1,0x68,0x06,0xF1,0x50 }, 0x0510 };
const X64Instr instr_pop = { { 0x8F,0xF1,0xF1,0x00,0xF1,0x58 }, 0x0500 };
const X64Instr instr_inc = { { 0xFF,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_dec = { { 0xFF,0xF1,0xF1,0x01,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_not = { { 0xF7,0xF1,0xF1,0x02,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_notb = { { 0xF6,0xF1,0xF1,0x02,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_neg = { { 0xF7,0xF1,0xF1,0x03,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_negb = { { 0xF6,0xF1,0xF1,0x03,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_add = { { 0x01,0x03,0x81,0x00,0x05,0xF1 }, 0x0810 };
const X64Instr instr_addb = { { 0x00,0x02,0x80,0x00,0x04,0xF1 }, 0x0810 };
const X64Instr instr_sub = { { 0x29,0x2B,0x81,0x05,0x2D,0xF1 }, 0x0810 };
const X64Instr instr_subb = { { 0x28,0x2A,0x80,0x05,0x2C,0xF1 }, 0x0810 };
const X64Instr instr_and = { { 0x21,0x23,0x81,0x04,0x25,0xF1 }, 0x0810 };
const X64Instr instr_andb = { { 0x20,0x22,0x80,0x04,0x24,0xF1 }, 0x0810 };
const X64Instr instr_or = { { 0x09,0x0B,0x81,0x01,0x0D,0xF1 }, 0x0810 };
const X64Instr instr_orb = { { 0x08,0x0A,0x80,0x01,0x0C,0xF1 }, 0x0810 };
const X64Instr instr_xor = { { 0x31,0x33,0x81,0x06,0x35,0xF1 }, 0x0810 };
const X64Instr instr_xorb = { { 0x30,0x32,0x80,0x06,0x34,0xF1 }, 0x0810 };
const X64Instr instr_mov = { { 0x89,0x8B,0xC7,0x00,0xF1,0xB8 }, 0x0600 };
const X64Instr instr_movb = { { 0x88,0x8A,0xC6,0x00,0xF1,0xB0 }, 0x0610 };
const X64Instr instr_test = { { 0x85,0x85,0xF7,0x00,0xA9,0xF1 }, 0x0800 };
const X64Instr instr_testb = { { 0x84,0x84,0xF6,0x00,0xA8,0xF1 }, 0x0810 };
const X64Instr instr_cmp = { { 0x39,0x3B,0x81,0x07,0x3D,0xF1 }, 0x0810 };
const X64Instr instr_cmpb = { { 0x38,0x3A,0x80,0x07,0x3C,0xF1 }, 0x0810 };
const X64Instr instr_sbb = { { 0x19,0x1B,0x81,0x03,0x1D,0xF1 }, 0x0810 };
const X64Instr instr_adc = { { 0x11,0x13,0x81,0x02,0x15,0xF1 }, 0x0810 };
const X64Instr instr_lea = { { 0xF1,0x8D,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_xchgb = { { 0x86,0x86,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_xchg = { { 0x87,0x87,0xF1,0x00,0xF1,0x90 }, 0x1000 };
const X64Instr instr_imul = { { 0xAF,0xF7,0x69,0x05,0xF1,0xF1 }, 0x0019 };
const X64Instr instr_mul = { { 0xF7,0xF1,0xF1,0x04,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_div = { { 0xF7,0xF1,0xF1,0x06,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_idiv = { { 0xF7,0xF1,0xF1,0x07,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_cdq = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x99 }, 0x0400 };
const X64Instr instr_ret = { { 0xF1,0xF1,0xC2,0x00,0xF1,0xC3 }, 0x0540 };
const X64Instr instr_jcc = { { 0xF1,0xF1,0x80,0x00,0xF1,0xF1 }, 0x0114 };
const X64Instr instr_cmovcc = { { 0x40,0x40,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
const X64Instr instr_setcc = { { 0x90,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0102 };
const X64Instr instr_movswx = { { 0xBF,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
const X64Instr instr_movsbx = { { 0xBE,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x2003 };
const X64Instr instr_movzwx = { { 0xB7,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
const X64Instr instr_movzbx = { { 0xB6,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x2003 };
const X64Instr instr_cwde = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x98 }, 0x0400 };
const X64Instr instr_rol = { { 0xD3,0xF1,0xC1,0x00,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_ror = { { 0xD3,0xF1,0xC1,0x01,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_rcl = { { 0xD3,0xF1,0xC1,0x02,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_rcr = { { 0xD3,0xF1,0xC1,0x03,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_shl = { { 0xD3,0xF1,0xC1,0x04,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_shr = { { 0xD3,0xF1,0xC1,0x05,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_sar = { { 0xD3,0xF1,0xC1,0x07,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_xadd = { { 0xC1,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0002 };
const X64Instr instr_cmpxchg = { { 0xB1,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0002 };
const X64Instr instr_nop = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x90 }, 0x0500 };
const X64Instr instr_shld = { { 0xA5,0xF1,0xA4,0x00,0xF1,0xF1 }, 0x0082 };
const X64Instr instr_shrd = { { 0xAD,0xF1,0xAC,0x00,0xF1,0xF1 }, 0x0082 };
const X64Instr instr_int3 = { { 0xF1,0xF1,0xF1,0x00,0xF1,0xCC }, 0x0500 };
enum ConditionCode {
CC_None = -1,
@@ -1073,10 +1080,16 @@ struct X64Assembler {
void movdqu(RegXMM x, IndexedMemoryRef m) { instrRM(instr_movdqu, x, m); }
void movdqu(MemoryRef m, RegXMM x) { instrMR(instr_movdqu, m, x); }
void movdqu(IndexedMemoryRef m, RegXMM x) { instrMR(instr_movdqu, m, x); }
void movdqa(RegXMM x, RegXMM y) { instrRR(instr_movdqa, x, y); }
void movdqa(RegXMM x, MemoryRef m) { instrRM(instr_movdqa, x, m); }
void movdqa(RegXMM x, IndexedMemoryRef m) { instrRM(instr_movdqa, x, m); }
void movdqa(MemoryRef m, RegXMM x) { instrMR(instr_movdqa, m, x); }
void movdqa(IndexedMemoryRef m, RegXMM x) { instrMR(instr_movdqa, m, x); }
void movsd (RegXMM x, RegXMM y) { instrRR(instr_movsd, x, y); }
void movsd (RegXMM x, MemoryRef m) { instrRM(instr_movsd, x, m); }
void movsd (RegXMM x, IndexedMemoryRef m) { instrRM(instr_movsd, x, m); }
void movsd (MemoryRef m, RegXMM x) { instrMR(instr_movsd, m, x); }
void movsd (IndexedMemoryRef m, RegXMM x) { instrMR(instr_movsd, m, x); }
void lddqu (MemoryRef m, RegXMM x) { instrMR(instr_lddqu, m, x); }
void lddqu (IndexedMemoryRef m, RegXMM x) { instrMR(instr_lddqu, m, x); }
@@ -2257,15 +2270,16 @@ private:
#define UIMR(m) rn(m.r.base), rn(m.r.index), m.r.scale, m.r.disp
#define URIP(m) reg::noreg, reg::noreg, sz::byte, m.r.disp
void instrR(X64Instr op, Reg64 r) { emitR(op, rn(r)); }
void instrR(X64Instr op, Reg32 r) { emitR32(op, rn(r)); }
void instrR(X64Instr op, Reg8 r) { emitR(op, rn(r), sz::byte); }
void instrRR(X64Instr op, Reg64 x, Reg64 y) { emitRR(op, rn(x), rn(y)); }
void instrRR(X64Instr op, Reg32 x, Reg32 y) { emitRR32(op, rn(x), rn(y)); }
void instrRR(X64Instr op, Reg8 x, Reg8 y) { emitRR8(op, rn(x), rn(y)); }
void instrM(X64Instr op, MemoryRef m) { emitM(op, UMR(m)); }
void instrM(X64Instr op, IndexedMemoryRef m){ emitM(op, UIMR(m)); }
void instrM32(X64Instr op, MemoryRef m) { emitM32(op, UMR(m)); }
void instrR(X64Instr op, Reg64 r) { emitR(op, rn(r)); }
void instrR(X64Instr op, Reg32 r) { emitR32(op, rn(r)); }
void instrR(X64Instr op, Reg8 r) { emitR(op, rn(r), sz::byte); }
void instrRR(X64Instr op, Reg64 x, Reg64 y) { emitRR(op, rn(x), rn(y)); }
void instrRR(X64Instr op, Reg32 x, Reg32 y) { emitRR32(op, rn(x), rn(y)); }
void instrRR(X64Instr op, Reg8 x, Reg8 y) { emitRR8(op, rn(x), rn(y)); }
void instrRR(X64Instr op, RegXMM x, RegXMM y) { emitRR(op, rn(x), rn(y)); }
void instrM(X64Instr op, MemoryRef m) { emitM(op, UMR(m)); }
void instrM(X64Instr op, IndexedMemoryRef m) { emitM(op, UIMR(m)); }
void instrM32(X64Instr op, MemoryRef m) { emitM32(op, UMR(m)); }
void instrRM(X64Instr op,
Reg64 r,