Allocate XMM registers for doubles
This diff adds support for allocating SSATmp's of type Dbl directly to XMM registers. The register allocator now keeps per-reg-type lists of caller/callee saved registers. xmm0 and xmm1 are reserved for scratch as rXMMScratch[01]. Added a runtime option HHIRAllocXMMRegs to enable/disable XMM allocation -- if disabled, it forces all SSATmps to be allocated to GP regs, as before. While here, changed the conversion of int/bool consts to double from runtime conversions to JIT-time.
Esse commit está contido em:
@@ -432,6 +432,7 @@ public:
|
||||
F(bool, HHIREnableCoalescing, true) \
|
||||
F(bool, HHIREnableRefCountOpt, true) \
|
||||
F(bool, HHIREnableSinking, true) \
|
||||
F(bool, HHIRAllocXMMRegs, true) \
|
||||
F(bool, HHIRGenerateAsserts, debug) \
|
||||
F(bool, HHIRDirectExit, true) \
|
||||
F(bool, HHIRDisableTx64, true) \
|
||||
|
||||
@@ -77,16 +77,56 @@ const RegSet kCallerSaved = RegSet()
|
||||
// r10 is reserved by the assembler, and for
|
||||
// various extremely-specific scratch uses.
|
||||
| RegSet(reg::r11)
|
||||
// XMM regs
|
||||
// | RegSet(reg::xmm0) Reserved for rMMXScratch0
|
||||
// | RegSet(reg::xmm1) Reserved for rMMXScratch1
|
||||
| RegSet(reg::xmm2)
|
||||
| RegSet(reg::xmm3)
|
||||
| RegSet(reg::xmm4)
|
||||
| RegSet(reg::xmm5)
|
||||
| RegSet(reg::xmm6)
|
||||
| RegSet(reg::xmm7)
|
||||
| RegSet(reg::xmm8)
|
||||
| RegSet(reg::xmm9)
|
||||
| RegSet(reg::xmm10)
|
||||
| RegSet(reg::xmm11)
|
||||
| RegSet(reg::xmm12)
|
||||
| RegSet(reg::xmm13)
|
||||
| RegSet(reg::xmm14)
|
||||
| RegSet(reg::xmm15)
|
||||
;
|
||||
|
||||
const RegSet kCalleeSaved = RegSet()
|
||||
// r12 is reserved for rVmTl
|
||||
| RegSet(reg::r13)
|
||||
| RegSet(reg::r14)
|
||||
| RegSet(reg::r15);
|
||||
| RegSet(reg::r15)
|
||||
;
|
||||
|
||||
const RegSet kAllRegs = kCallerSaved | kCalleeSaved;
|
||||
|
||||
const RegSet kMMXRegs = RegSet()
|
||||
| RegSet(reg::xmm0)
|
||||
| RegSet(reg::xmm1)
|
||||
| RegSet(reg::xmm2)
|
||||
| RegSet(reg::xmm3)
|
||||
| RegSet(reg::xmm4)
|
||||
| RegSet(reg::xmm5)
|
||||
| RegSet(reg::xmm6)
|
||||
| RegSet(reg::xmm7)
|
||||
| RegSet(reg::xmm8)
|
||||
| RegSet(reg::xmm9)
|
||||
| RegSet(reg::xmm10)
|
||||
| RegSet(reg::xmm11)
|
||||
| RegSet(reg::xmm12)
|
||||
| RegSet(reg::xmm13)
|
||||
| RegSet(reg::xmm14)
|
||||
| RegSet(reg::xmm15)
|
||||
;
|
||||
|
||||
const RegSet kGPCallerSaved = kCallerSaved - kMMXRegs;
|
||||
const RegSet kGPCalleeSaved = kCalleeSaved - kMMXRegs;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
/*
|
||||
* Registers reserved for cross-tracelet ABI purposes.
|
||||
@@ -276,7 +316,6 @@ inline SRFlags operator|(SRFlags a, SRFlags b) {
|
||||
// Set of all the x64 registers.
|
||||
const RegSet kAllX64Regs = RegSet(kAllRegs).add(reg::r10)
|
||||
| kSpecialCrossTraceRegs;
|
||||
const int kNumX64Regs = 16;
|
||||
|
||||
/*
|
||||
* Some data structures are accessed often enough from translated code
|
||||
|
||||
@@ -94,7 +94,7 @@ bool checkCfg(Trace* trace, const IRFactory& factory) {
|
||||
}
|
||||
|
||||
enum Limits : unsigned {
|
||||
kNumRegisters = Transl::kNumX64Regs,
|
||||
kNumRegisters = Transl::kNumRegs,
|
||||
kNumSlots = NumPreAllocatedSpillLocs
|
||||
};
|
||||
|
||||
|
||||
@@ -109,6 +109,18 @@ struct MoveInfo {
|
||||
PhysReg m_reg1, m_reg2;
|
||||
};
|
||||
|
||||
template <int N>
|
||||
static bool cycleHasMMXReg(const CycleInfo& cycle,
|
||||
const int (&moves)[N]) {
|
||||
int first = cycle.node;
|
||||
int node = first;
|
||||
do {
|
||||
if (PhysReg(node).isXMM()) return true;
|
||||
node = moves[node];
|
||||
} while (node != first);
|
||||
return false;
|
||||
}
|
||||
|
||||
template <int N>
|
||||
void doRegMoves(int (&moves)[N], int rTmp,
|
||||
std::vector<MoveInfo>& howTo) {
|
||||
@@ -184,11 +196,13 @@ pathloop:
|
||||
}
|
||||
// Deal with any cycles we encountered
|
||||
for (int i = 0; i < numCycles; ++i) {
|
||||
if (cycles[i].length == 2) {
|
||||
// can't use xchg if one of the registers is MMX
|
||||
bool hasMMXReg = cycleHasMMXReg(cycles[i], moves);
|
||||
if (cycles[i].length == 2 && !hasMMXReg) {
|
||||
int v = cycles[i].node;
|
||||
int w = moves[v];
|
||||
howTo.push_back(MoveInfo(MoveInfo::Xchg, w, v));
|
||||
} else if (cycles[i].length == 3) {
|
||||
} else if (cycles[i].length == 3 && !hasMMXReg) {
|
||||
int v = cycles[i].node;
|
||||
int w = moves[v];
|
||||
howTo.push_back(MoveInfo(MoveInfo::Xchg, w, v));
|
||||
@@ -481,13 +495,46 @@ Address CodeGenerator::emitSmashableFwdJcc(ConditionCode cc, Block* target,
|
||||
return start;
|
||||
}
|
||||
|
||||
void emitLoadImm(CodeGenerator::Asm& as, int64_t val, PhysReg dstReg) {
|
||||
as.emitImmReg(val, dstReg);
|
||||
}
|
||||
|
||||
static void
|
||||
emitMovRegReg(CodeGenerator::Asm& as, PhysReg srcReg, PhysReg dstReg) {
|
||||
if (srcReg != dstReg) as.movq(srcReg, dstReg);
|
||||
assert(srcReg != InvalidReg);
|
||||
assert(dstReg != InvalidReg);
|
||||
|
||||
if (srcReg == dstReg) return;
|
||||
|
||||
if (srcReg.isGP()) {
|
||||
if (dstReg.isGP()) { // GP => GP
|
||||
as.movq(srcReg, dstReg);
|
||||
} else { // GP => MMX
|
||||
// This generates a movq x86 instruction, which zero extends
|
||||
// the 64-bit value in srcReg into a 128-bit XMM register
|
||||
as.mov_reg64_xmm(srcReg, dstReg);
|
||||
}
|
||||
} else {
|
||||
if (dstReg.isGP()) { // MMX => GP
|
||||
as.mov_xmm_reg64(srcReg, dstReg);
|
||||
} else { // MMX => MMX
|
||||
// This copies all 128 bits in XMM,
|
||||
// thus avoiding partial register stalls
|
||||
as.movdqa(srcReg, dstReg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void emitLoadImm(CodeGenerator::Asm& as, int64_t val, PhysReg dstReg) {
|
||||
assert(dstReg != InvalidReg);
|
||||
if (dstReg.isGP()) {
|
||||
as.emitImmReg(val, dstReg);
|
||||
} else {
|
||||
assert(dstReg.isXMM());
|
||||
if (val == 0) {
|
||||
as.pxor_xmm_xmm(dstReg, dstReg);
|
||||
} else {
|
||||
// Can't move immediate directly into XMM register, so use rScratch
|
||||
as.emitImmReg(val, rScratch);
|
||||
emitMovRegReg(as, rScratch, dstReg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void emitLea(CodeGenerator::Asm& as, MemoryRef mr, PhysReg dst) {
|
||||
@@ -499,6 +546,26 @@ static void emitLea(CodeGenerator::Asm& as, MemoryRef mr, PhysReg dst) {
|
||||
}
|
||||
}
|
||||
|
||||
template<class Mem>
|
||||
static void emitLoadReg(CodeGenerator::Asm& as, Mem mem, PhysReg reg) {
|
||||
assert(reg != InvalidReg);
|
||||
if (reg.isGP()) {
|
||||
as.loadq(mem, reg);
|
||||
} else {
|
||||
as.movsd(mem, reg);
|
||||
}
|
||||
}
|
||||
|
||||
template<class Mem>
|
||||
static void emitStoreReg(CodeGenerator::Asm& as, PhysReg reg, Mem mem) {
|
||||
assert(reg != InvalidReg);
|
||||
if (reg.isGP()) {
|
||||
as.storeq(reg, mem);
|
||||
} else {
|
||||
as.movsd(reg, mem);
|
||||
}
|
||||
}
|
||||
|
||||
void shuffle2(CodeGenerator::Asm& a,
|
||||
PhysReg s0, PhysReg s1, PhysReg d0, PhysReg d1) {
|
||||
assert(s0 != s1);
|
||||
@@ -532,37 +599,65 @@ static void zeroExtendIfBool(X64Assembler& as, const SSATmp* src,
|
||||
}
|
||||
}
|
||||
|
||||
static void prepUnaryXmmOp(X64Assembler& a, const SSATmp* ssa, RegXMM xmm,
|
||||
const RegisterInfo& info) {
|
||||
auto reg = info.getReg();
|
||||
RegNumber src(reg);
|
||||
if (reg == InvalidReg) {
|
||||
src = rScratch;
|
||||
assert(ssa->isConst());
|
||||
a.mov_imm64_reg(ssa->getValBits(), rScratch);
|
||||
}
|
||||
if (ssa->isA(Type::Int | Type::Bool)) {
|
||||
// Expand non-const bools to 64-bit.
|
||||
// Consts are already moved into src as 64-bit values above.
|
||||
if (!ssa->isConst()) zeroExtendIfBool(a, ssa, info);
|
||||
// cvtsi2sd doesn't modify the high bits of its target, which can
|
||||
// cause false dependencies to prevent register renaming from kicking
|
||||
// in. Break the dependency chain by zeroing out the destination reg.
|
||||
a. pxor_xmm_xmm(xmm, xmm);
|
||||
a. cvtsi2sd_reg64_xmm(src, xmm);
|
||||
} else {
|
||||
a. mov_reg64_xmm(src, xmm);
|
||||
}
|
||||
static int64_t convIntToDouble(int64_t i) {
|
||||
union {
|
||||
double d;
|
||||
int64_t i;
|
||||
} u;
|
||||
u.d = double(i);
|
||||
return u.i;
|
||||
}
|
||||
|
||||
static void prepBinaryXmmOp(X64Assembler& a, const SSATmp* left,
|
||||
const SSATmp* right, const RegAllocInfo& regs) {
|
||||
prepUnaryXmmOp(a, left, xmm0, regs[left]);
|
||||
prepUnaryXmmOp(a, right, xmm1, regs[right]);
|
||||
/*
|
||||
* Returns a XMM register containing the value of SSATmp tmp,
|
||||
* which can be either a bool, an int, or a double.
|
||||
* If the value is already in a XMM register, simply returns it.
|
||||
* Otherwise, the value is moved into rXMMScratch, which is returned.
|
||||
* If instructions to convert to a double at runtime are needed,
|
||||
* they're emitted in 'as'.
|
||||
*/
|
||||
static PhysReg prepXMMReg(const SSATmp* tmp,
|
||||
X64Assembler& as,
|
||||
const RegAllocInfo& allocInfo,
|
||||
RegXMM rXMMScratch) {
|
||||
assert(tmp->isA(Type::Bool) || tmp->isA(Type::Int) || tmp->isA(Type::Dbl));
|
||||
|
||||
PhysReg reg = allocInfo[tmp].getReg();
|
||||
|
||||
// Case 1: tmp is already in a XMM register
|
||||
if (reg.isXMM()) return reg;
|
||||
|
||||
// Case 2: tmp is in a GP register
|
||||
if (reg != InvalidReg) {
|
||||
// Case 2.a: Dbl stored in GP reg
|
||||
if (tmp->isA(Type::Dbl)) {
|
||||
emitMovRegReg(as, reg, rXMMScratch);
|
||||
return rXMMScratch;
|
||||
}
|
||||
// Case 2.b: Bool or Int stored in GP reg
|
||||
assert(tmp->isA(Type::Bool) || tmp->isA(Type::Int));
|
||||
zeroExtendIfBool(as, tmp, allocInfo[tmp]);
|
||||
as.pxor_xmm_xmm(rXMMScratch, rXMMScratch);
|
||||
as.cvtsi2sd_reg64_xmm(reg, rXMMScratch);
|
||||
return rXMMScratch;
|
||||
}
|
||||
|
||||
// Case 3: tmp is a constant
|
||||
assert(tmp->isConst());
|
||||
|
||||
int64_t val = tmp->getValRawInt();
|
||||
if (!tmp->isA(Type::Dbl)) {
|
||||
assert(tmp->isA(Type::Bool | Type::Int));
|
||||
if (tmp->isA(Type::Bool)) val = val != 0; // see task #2401790
|
||||
val = convIntToDouble(val);
|
||||
}
|
||||
emitLoadImm(as, val, rScratch);
|
||||
emitMovRegReg(as, rScratch, rXMMScratch);
|
||||
return rXMMScratch;
|
||||
}
|
||||
|
||||
static void doubleCmp(X64Assembler& a, RegXMM xmm0, RegXMM xmm1) {
|
||||
a. ucomisd_xmm_xmm(xmm0, xmm1);
|
||||
static void doubleCmp(X64Assembler& a, RegXMM xmmReg0, RegXMM xmmReg1) {
|
||||
a. ucomisd_xmm_xmm(xmmReg0, xmmReg1);
|
||||
Label notPF;
|
||||
a. jnp8(notPF);
|
||||
// PF means the doubles were unordered. We treat this as !equal, so
|
||||
@@ -590,8 +685,10 @@ void CodeGenerator::cgJcc(IRInstruction* inst) {
|
||||
CG_PUNT(cgJcc);
|
||||
}
|
||||
if (src1Type == Type::Dbl || src2Type == Type::Dbl) {
|
||||
prepBinaryXmmOp(m_as, src1, src2, m_regs);
|
||||
doubleCmp(m_as, xmm0, xmm1);
|
||||
PhysReg srcReg1 = prepXMMReg(src1, m_as, m_regs, rXMMScratch0);
|
||||
PhysReg srcReg2 = prepXMMReg(src2, m_as, m_regs, rXMMScratch1);
|
||||
assert(srcReg1 != rXMMScratch1 && srcReg2 != rXMMScratch0);
|
||||
doubleCmp(m_as, srcReg1, srcReg2);
|
||||
} else {
|
||||
if (src1Type == Type::Cls && src2Type == Type::Cls) {
|
||||
assert(opc == JmpSame || opc == JmpNSame);
|
||||
@@ -646,8 +743,8 @@ void CodeGenerator::cgJmpNSame(IRInstruction* inst) { cgJcc(inst); }
|
||||
typedef Transl::X64Assembler Asm;
|
||||
static int64_t shuffleArgs(Asm& a, ArgGroup& args) {
|
||||
// Compute the move/shuffle plan.
|
||||
int moves[kNumX64Regs];
|
||||
ArgDesc* argDescs[kNumX64Regs];
|
||||
int moves[kNumRegs];
|
||||
ArgDesc* argDescs[kNumRegs];
|
||||
memset(moves, -1, sizeof moves);
|
||||
memset(argDescs, 0, sizeof argDescs);
|
||||
for (size_t i = 0; i < args.numRegArgs(); ++i) {
|
||||
@@ -671,18 +768,22 @@ static int64_t shuffleArgs(Asm& a, ArgGroup& args) {
|
||||
for (size_t i = 0; i < howTo.size(); ++i) {
|
||||
if (howTo[i].m_kind == MoveInfo::Move) {
|
||||
if (howTo[i].m_reg2 == reg::rScratch) {
|
||||
a. movq (howTo[i].m_reg1, howTo[i].m_reg2);
|
||||
emitMovRegReg(a, howTo[i].m_reg1, howTo[i].m_reg2);
|
||||
} else {
|
||||
ArgDesc* argDesc = argDescs[int(howTo[i].m_reg2)];
|
||||
ArgDesc::Kind kind = argDesc->getKind();
|
||||
if (kind == ArgDesc::Reg || kind == ArgDesc::TypeReg) {
|
||||
if (argDesc->isZeroExtend()) {
|
||||
assert(howTo[i].m_reg1.isGP());
|
||||
assert(howTo[i].m_reg2.isGP());
|
||||
a. movzbl (rbyte(howTo[i].m_reg1), r32(howTo[i].m_reg2));
|
||||
} else {
|
||||
a. movq (howTo[i].m_reg1, howTo[i].m_reg2);
|
||||
emitMovRegReg(a, howTo[i].m_reg1, howTo[i].m_reg2);
|
||||
}
|
||||
} else {
|
||||
assert(kind == ArgDesc::Addr);
|
||||
assert(howTo[i].m_reg1.isGP());
|
||||
assert(howTo[i].m_reg2.isGP());
|
||||
a. lea (howTo[i].m_reg1[argDesc->getImm().q()],
|
||||
howTo[i].m_reg2);
|
||||
}
|
||||
@@ -691,6 +792,8 @@ static int64_t shuffleArgs(Asm& a, ArgGroup& args) {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
assert(howTo[i].m_reg1.isGP());
|
||||
assert(howTo[i].m_reg2.isGP());
|
||||
a. xchgq (howTo[i].m_reg1, howTo[i].m_reg2);
|
||||
}
|
||||
}
|
||||
@@ -702,6 +805,7 @@ static int64_t shuffleArgs(Asm& a, ArgGroup& args) {
|
||||
if (!args[i].done()) {
|
||||
ArgDesc::Kind kind = args[i].getKind();
|
||||
PhysReg dst = args[i].getDstReg();
|
||||
assert(dst.isGP());
|
||||
if (kind == ArgDesc::Imm) {
|
||||
emitLoadImm(a, args[i].getImm().q(), dst);
|
||||
} else if (kind == ArgDesc::TypeReg) {
|
||||
@@ -728,13 +832,19 @@ static int64_t shuffleArgs(Asm& a, ArgGroup& args) {
|
||||
a. movzbl(rbyte(srcReg), r32(rScratch));
|
||||
a. push(rScratch);
|
||||
} else {
|
||||
a. push(srcReg);
|
||||
if (srcReg.isXMM()) {
|
||||
emitMovRegReg(a, srcReg, rScratch);
|
||||
a.push(rScratch);
|
||||
} else {
|
||||
a.push(srcReg);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case ArgDesc::TypeReg:
|
||||
static_assert(kTypeWordOffset == 4 || kTypeWordOffset == 1,
|
||||
"kTypeWordOffset value not supported");
|
||||
assert(srcReg.isGP());
|
||||
// x86 stacks grow down, so push higher offset items first
|
||||
if (kTypeWordOffset == 4) {
|
||||
a. pushl(r32(srcReg));
|
||||
@@ -1071,9 +1181,20 @@ void CodeGenerator::cgBinaryOp(IRInstruction* inst,
|
||||
CG_PUNT(cgBinaryOp);
|
||||
}
|
||||
if (src1->isA(Type::Dbl) || src2->isA(Type::Dbl)) {
|
||||
prepBinaryXmmOp(m_as, src1, src2, m_regs);
|
||||
(m_as.*fpInstr)(xmm1, xmm0);
|
||||
m_as. mov_xmm_reg64(xmm0, m_regs[dst].getReg());
|
||||
PhysReg dstReg = m_regs[dst].getReg();
|
||||
PhysReg resReg = dstReg.isXMM() && dstReg != m_regs[src2].getReg() ?
|
||||
dstReg : PhysReg(rXMMScratch0);
|
||||
assert(resReg.isXMM());
|
||||
|
||||
PhysReg srcReg1 = prepXMMReg(src1, m_as, m_regs, resReg);
|
||||
PhysReg srcReg2 = prepXMMReg(src2, m_as, m_regs, rXMMScratch1);
|
||||
assert(srcReg1 != rXMMScratch1 && srcReg2 != rXMMScratch0);
|
||||
|
||||
emitMovRegReg(m_as, srcReg1, resReg);
|
||||
|
||||
(m_as.*fpInstr)(srcReg2, resReg);
|
||||
|
||||
emitMovRegReg(m_as, resReg, dstReg);
|
||||
return;
|
||||
}
|
||||
cgBinaryIntOp(inst, instrIR, instrRR, movInstr,
|
||||
@@ -1353,8 +1474,10 @@ void CodeGenerator::cgOpCmpHelper(
|
||||
else if (type1 == Type::Dbl || type2 == Type::Dbl) {
|
||||
if ((type1 == Type::Dbl || type1 == Type::Int) &&
|
||||
(type2 == Type::Dbl || type2 == Type::Int)) {
|
||||
prepBinaryXmmOp(m_as, src1, src2, m_regs);
|
||||
doubleCmp(m_as, xmm0, xmm1);
|
||||
PhysReg srcReg1 = prepXMMReg(src1, m_as, m_regs, rXMMScratch0);
|
||||
PhysReg srcReg2 = prepXMMReg(src2, m_as, m_regs, rXMMScratch1);
|
||||
assert(srcReg1 != rXMMScratch1 && srcReg2 != rXMMScratch0);
|
||||
doubleCmp(m_as, srcReg1, srcReg2);
|
||||
setFromFlags();
|
||||
} else {
|
||||
CG_PUNT(cgOpCmpHelper_Dbl);
|
||||
@@ -1707,7 +1830,7 @@ void CodeGenerator::cgConvDblToBool(IRInstruction* inst) {
|
||||
m_as.mov_imm64_reg(1, dstReg);
|
||||
}
|
||||
} else {
|
||||
m_as.movq(srcReg, dstReg);
|
||||
emitMovRegReg(m_as, srcReg, dstReg);
|
||||
m_as.shlq(1, dstReg); // 0.0 stays zero and -0.0 is now 0.0
|
||||
m_as.setne(rbyte(dstReg)); // lower byte becomes 1 if dstReg != 0
|
||||
m_as.movzbl(rbyte(dstReg), r32(dstReg));
|
||||
@@ -1736,54 +1859,36 @@ void CodeGenerator::cgConvIntToBool(IRInstruction* inst) {
|
||||
}
|
||||
}
|
||||
|
||||
void CodeGenerator::cgConvBoolToDbl(IRInstruction* inst) {
|
||||
// cvtsi2sd doesn't modify the high bits of its target, which can
|
||||
// cause false dependencies to prevent register renaming from kicking
|
||||
// in. Break the dependency chain by zeroing out xmm0.
|
||||
m_as.pxor_xmm_xmm(xmm0, xmm0);
|
||||
SSATmp* dst = inst->getDst();
|
||||
auto dstReg = m_regs[dst].getReg();
|
||||
assert(dstReg != InvalidReg);
|
||||
void CodeGenerator::emitConvBoolOrIntToDbl(IRInstruction* inst) {
|
||||
SSATmp* src = inst->getSrc(0);
|
||||
auto srcReg = m_regs[src].getReg();
|
||||
if (srcReg == InvalidReg) {
|
||||
assert(src->isConst());
|
||||
SSATmp* dst = inst->getDst();
|
||||
PhysReg dstReg = m_regs[dst].getReg();
|
||||
assert(src->isA(Type::Bool) || src->isA(Type::Int));
|
||||
assert(dstReg != InvalidReg);
|
||||
if (src->isConst()) {
|
||||
int64_t constVal = src->getValRawInt();
|
||||
if (constVal == 0) {
|
||||
m_as.xor_reg64_reg64(dstReg, dstReg);
|
||||
} else {
|
||||
m_as.mov_imm64_reg(1, dstReg);
|
||||
}
|
||||
if (src->isA(Type::Bool)) constVal = constVal != 0; // see task #2401790
|
||||
constVal = convIntToDouble(constVal);
|
||||
emitLoadImm(m_as, constVal, dstReg);
|
||||
} else {
|
||||
m_as.movzbl(rbyte(srcReg), r32(dstReg));
|
||||
// cvtsi2sd doesn't modify the high bits of its target, which can
|
||||
// cause false dependencies to prevent register renaming from kicking
|
||||
// in. Break the dependency chain by zeroing out the XMM reg.
|
||||
PhysReg srcReg = m_regs[src].getReg();
|
||||
PhysReg xmmReg = dstReg.isXMM() ? dstReg : PhysReg(rXMMScratch0);
|
||||
m_as.pxor_xmm_xmm(xmmReg, xmmReg);
|
||||
m_as.cvtsi2sd_reg64_xmm(srcReg, xmmReg);
|
||||
zeroExtendIfBool(m_as, src, m_regs[src]);
|
||||
emitMovRegReg(m_as, xmmReg, dstReg);
|
||||
}
|
||||
m_as.cvtsi2sd_reg64_xmm(dstReg, xmm0);
|
||||
m_as.mov_xmm_reg64(xmm0, dstReg);
|
||||
}
|
||||
|
||||
void CodeGenerator::cgConvBoolToDbl(IRInstruction* inst) {
|
||||
emitConvBoolOrIntToDbl(inst);
|
||||
}
|
||||
|
||||
void CodeGenerator::cgConvIntToDbl(IRInstruction* inst) {
|
||||
// cvtsi2sd doesn't modify the high bits of its target, which can
|
||||
// cause false dependencies to prevent register renaming from kicking
|
||||
// in. Break the dependency chain by zeroing out xmm0.
|
||||
m_as.pxor_xmm_xmm(xmm0, xmm0);
|
||||
SSATmp* dst = inst->getDst();
|
||||
auto dstReg = m_regs[dst].getReg();
|
||||
assert(dstReg != InvalidReg);
|
||||
SSATmp* src = inst->getSrc(0);
|
||||
auto srcReg = m_regs[src].getReg();
|
||||
if (srcReg == InvalidReg) {
|
||||
assert(src->isConst());
|
||||
int64_t constVal = src->getValRawInt();
|
||||
if (constVal == 0) {
|
||||
m_as.xor_reg64_reg64(dstReg, dstReg);
|
||||
} else {
|
||||
m_as.mov_imm64_reg(constVal, dstReg);
|
||||
}
|
||||
m_as.cvtsi2sd_reg64_xmm(dstReg, xmm0);
|
||||
} else {
|
||||
m_as.cvtsi2sd_reg64_xmm(srcReg, xmm0);
|
||||
}
|
||||
m_as.mov_xmm_reg64(xmm0, dstReg);
|
||||
emitConvBoolOrIntToDbl(inst);
|
||||
}
|
||||
|
||||
void CodeGenerator::cgConvBoolToInt(IRInstruction* inst) {
|
||||
@@ -1858,7 +1963,7 @@ void CodeGenerator::cgUnbox(IRInstruction* inst) {
|
||||
// srcTypeReg == KindOfRef; srcValReg is RefData*
|
||||
const size_t ref_tv_off = RefData::tvOffset();
|
||||
if (dstValReg != srcValReg) {
|
||||
m_as.loadq(srcValReg[ref_tv_off + TVOFF(m_data)], dstValReg);
|
||||
emitLoadReg(m_as, srcValReg[ref_tv_off + TVOFF(m_data)], dstValReg);
|
||||
emitLoadTVType(m_as, srcValReg[ref_tv_off + TVOFF(m_type)],
|
||||
r32(dstTypeReg));
|
||||
} else {
|
||||
@@ -1984,8 +2089,8 @@ void CodeGenerator::cgRetVal(IRInstruction* inst) {
|
||||
a. storeq (val->getValRawInt(),
|
||||
rFp[AROFF(m_r) + TVOFF(m_data)]);
|
||||
} else {
|
||||
zeroExtendIfBool(m_as, val, m_regs[val]);
|
||||
a. storeq (m_regs[val].getReg(), rFp[AROFF(m_r) + TVOFF(m_data)]);
|
||||
zeroExtendIfBool(a, val, m_regs[val]);
|
||||
emitStoreReg(a, m_regs[val].getReg(), rFp[AROFF(m_r) + TVOFF(m_data)]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2250,7 +2355,7 @@ void CodeGenerator::cgSpill(IRInstruction* inst) {
|
||||
// We do not need to mask booleans, since the IR will reload the spill
|
||||
auto srcReg = m_regs[src].getReg(locIndex);
|
||||
auto sinfo = m_regs[dst].getSpillInfo(locIndex);
|
||||
m_as. storeq(srcReg, reg::rsp[sinfo.offset()]);
|
||||
emitStoreReg(m_as, srcReg, reg::rsp[sinfo.offset()]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2262,7 +2367,7 @@ void CodeGenerator::cgReload(IRInstruction* inst) {
|
||||
for (int locIndex = 0; locIndex < src->numNeededRegs(); ++locIndex) {
|
||||
auto dstReg = m_regs[dst].getReg(locIndex);
|
||||
auto sinfo = m_regs[src].getSpillInfo(locIndex);
|
||||
m_as. loadq(reg::rsp[sinfo.offset()], dstReg);
|
||||
emitLoadReg(m_as, reg::rsp[sinfo.offset()], dstReg);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3752,7 +3857,7 @@ void CodeGenerator::cgStore(PhysReg base,
|
||||
m_as.storeq(val, base[off + TVOFF(m_data)]);
|
||||
} else {
|
||||
zeroExtendIfBool(m_as, src, m_regs[src]);
|
||||
m_as.storeq(m_regs[src].getReg(), base[off + TVOFF(m_data)]);
|
||||
emitStoreReg(m_as, m_regs[src].getReg(), base[off + TVOFF(m_data)]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3792,7 +3897,7 @@ void CodeGenerator::cgLoad(PhysReg base,
|
||||
if (type == Type::Bool) {
|
||||
m_as.load_reg64_disp_reg32(base, off + TVOFF(m_data), dstReg);
|
||||
} else {
|
||||
m_as.load_reg64_disp_reg64(base, off + TVOFF(m_data), dstReg);
|
||||
emitLoadReg(m_as, base[off + TVOFF(m_data)], dstReg);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -270,7 +270,6 @@ private:
|
||||
Address cgCheckRefCountedType(PhysReg typeReg);
|
||||
Address cgCheckRefCountedType(PhysReg baseReg,
|
||||
int64_t offset);
|
||||
void cgConvPrimitiveToDbl(IRInstruction* inst);
|
||||
void cgDecRefStaticType(Type type,
|
||||
PhysReg dataReg,
|
||||
Block* exit,
|
||||
@@ -311,6 +310,7 @@ private:
|
||||
void emitReqBindAddr(const Func* func, TCA& dest, Offset offset);
|
||||
|
||||
void emitAdjustSp(PhysReg spReg, PhysReg dstReg, int64_t adjustment);
|
||||
void emitConvBoolOrIntToDbl(IRInstruction* inst);
|
||||
|
||||
/*
|
||||
* Generate an if-block that branches around some unlikely code, handling
|
||||
|
||||
@@ -223,8 +223,11 @@ public:
|
||||
IRInstruction* defLabel();
|
||||
IRInstruction* defLabel(unsigned numDst);
|
||||
template<typename T> SSATmp* cns(T val) {
|
||||
Type type = typeForConst(val);
|
||||
// Normalize bool values to 0 or 1
|
||||
if (type.equals(Type::Bool)) val = (T)(val != 0);
|
||||
ConstData cdata(val);
|
||||
return findConst(cdata, typeForConst(val));
|
||||
return findConst(cdata, type);
|
||||
}
|
||||
Block* defBlock(const Func* f, IRInstruction*);
|
||||
Block* defBlock(const Func* f) {
|
||||
|
||||
@@ -54,8 +54,15 @@ RegSet RegisterInfo::getRegs() const {
|
||||
return regs;
|
||||
}
|
||||
|
||||
static PhysReg::Type getRegType(const SSATmp* tmp) {
|
||||
if (RuntimeOption::EvalHHIRAllocXMMRegs && tmp->isA(Type::Dbl)) {
|
||||
return PhysReg::XMM;
|
||||
}
|
||||
return PhysReg::GP;
|
||||
}
|
||||
|
||||
struct LinearScan : private boost::noncopyable {
|
||||
static const int NumRegs = 16;
|
||||
static const int NumRegs = kNumRegs;
|
||||
|
||||
explicit LinearScan(IRFactory*);
|
||||
RegAllocInfo allocRegs(Trace*, LifetimeInfo*);
|
||||
@@ -67,7 +74,7 @@ private:
|
||||
public:
|
||||
bool isReserved() const { return m_reserved; }
|
||||
bool isCallerSaved() const {
|
||||
return kCallerSaved.contains(PhysReg(m_regNo));
|
||||
return kCallerSaved.contains(m_reg);
|
||||
}
|
||||
bool isCalleeSaved() const { return !isCallerSaved(); }
|
||||
bool isAllocated() const { return m_ssaTmp != nullptr; }
|
||||
@@ -77,6 +84,7 @@ private:
|
||||
Type type = m_ssaTmp->type();
|
||||
return type == Type::RetAddr;
|
||||
}
|
||||
PhysReg::Type type() const { return m_reg.type(); }
|
||||
|
||||
private:
|
||||
SSATmp* m_ssaTmp; // non-null when allocated
|
||||
@@ -86,7 +94,7 @@ private:
|
||||
// LinearScan::m_freeCalleeSaved, or LinearScan::m_allocatedRegs.
|
||||
// <m_pos> of a reserved reg is undefined.
|
||||
smart::list<RegState*>::iterator m_pos;
|
||||
uint16_t m_regNo;
|
||||
PhysReg m_reg;
|
||||
bool m_pinned; // do not free this register if pinned
|
||||
// We stress test register allocation by reducing the number of
|
||||
// free registers.
|
||||
@@ -112,7 +120,7 @@ private:
|
||||
void clear();
|
||||
void add(SSATmp* tmp, uint32_t index, int argNum);
|
||||
private:
|
||||
// indexed by arg number
|
||||
// indexed by register number
|
||||
std::pair<SSATmp*, uint32_t> m_preColoredTmps[LinearScan::NumRegs];
|
||||
};
|
||||
|
||||
@@ -158,7 +166,7 @@ private:
|
||||
void pushFreeReg(RegState* reg);
|
||||
RegState* popFreeReg(smart::list<RegState*>& freeList);
|
||||
void freeReg(RegState* reg);
|
||||
RegState* getFreeReg(bool preferCallerSaved);
|
||||
RegState* getFreeReg(PhysReg::Type type, bool preferCallerSaved);
|
||||
RegState* getReg(RegState* reg);
|
||||
|
||||
template<typename Inner, int DumpVal=4>
|
||||
@@ -175,8 +183,8 @@ private:
|
||||
IRFactory* const m_irFactory;
|
||||
RegState m_regs[NumRegs];
|
||||
// Lists of free caller and callee-saved registers, respectively.
|
||||
smart::list<RegState*> m_freeCallerSaved;
|
||||
smart::list<RegState*> m_freeCalleeSaved;
|
||||
smart::list<RegState*> m_freeCallerSaved[PhysReg::kNumTypes];
|
||||
smart::list<RegState*> m_freeCalleeSaved[PhysReg::kNumTypes];
|
||||
// List of assigned registers, sorted high to low by lastUseId.
|
||||
smart::list<RegState*> m_allocatedRegs;
|
||||
|
||||
@@ -237,8 +245,10 @@ void LinearScan::StateSave::save(LinearScan* ls) {
|
||||
|
||||
void LinearScan::StateSave::restore(LinearScan* ls) {
|
||||
ls->m_allocatedRegs.clear();
|
||||
ls->m_freeCalleeSaved.clear();
|
||||
ls->m_freeCallerSaved.clear();
|
||||
for (int i = 0; i < PhysReg::kNumTypes; i++) {
|
||||
ls->m_freeCalleeSaved[i].clear();
|
||||
ls->m_freeCallerSaved[i].clear();
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < NumRegs; i++) {
|
||||
ls->m_regs[i] = m_regs[i];
|
||||
@@ -247,7 +257,7 @@ void LinearScan::StateSave::restore(LinearScan* ls) {
|
||||
if (reg->isAllocated()) {
|
||||
SSATmp* tmp = reg->m_ssaTmp;
|
||||
for (int r = 0; r < ls->m_allocInfo[tmp].numAllocatedRegs(); r++) {
|
||||
if ((int)ls->m_allocInfo[tmp].getReg(r) == i) {
|
||||
if (ls->m_allocInfo[tmp].getReg(r) == PhysReg(i)) {
|
||||
ls->allocRegToTmp(reg, tmp, r);
|
||||
}
|
||||
}
|
||||
@@ -266,23 +276,25 @@ LinearScan::LinearScan(IRFactory* irFactory)
|
||||
, m_jmps(irFactory, JmpList())
|
||||
, m_allocInfo(irFactory)
|
||||
{
|
||||
for (int i = 0; i < kNumX64Regs; i++) {
|
||||
for (int i = 0; i < kNumRegs; i++) {
|
||||
m_regs[i].m_ssaTmp = nullptr;
|
||||
m_regs[i].m_regNo = i;
|
||||
m_regs[i].m_reg = PhysReg(i);
|
||||
m_regs[i].m_pinned = false;
|
||||
m_regs[i].m_reserved = false;
|
||||
}
|
||||
|
||||
// Mark reserved regs.
|
||||
m_regs[int(rVmSp)] .m_reserved = true;
|
||||
m_regs[int(rsp)] .m_reserved = true;
|
||||
m_regs[int(rVmFp)] .m_reserved = true;
|
||||
m_regs[int(rScratch)].m_reserved = true;
|
||||
m_regs[int(rVmTl)] .m_reserved = true;
|
||||
m_regs[int(PhysReg(rVmSp))] .m_reserved = true;
|
||||
m_regs[int(PhysReg(rsp))] .m_reserved = true;
|
||||
m_regs[int(PhysReg(rVmFp))] .m_reserved = true;
|
||||
m_regs[int(PhysReg(rScratch))] .m_reserved = true;
|
||||
m_regs[int(PhysReg(rVmTl))] .m_reserved = true;
|
||||
m_regs[int(PhysReg(rXMMScratch0))].m_reserved = true;
|
||||
m_regs[int(PhysReg(rXMMScratch1))].m_reserved = true;
|
||||
|
||||
// Reserve extra regs for testing purpose.
|
||||
uint32_t numFreeRegs = RuntimeOption::EvalHHIRNumFreeRegs;
|
||||
for (int i = kNumX64Regs - 1; i >= 0; i--) {
|
||||
for (int i = kNumRegs - 1; i >= 0; i--) {
|
||||
if (!m_regs[i].m_reserved) {
|
||||
if (numFreeRegs == 0) {
|
||||
m_regs[i].m_reserved = true;
|
||||
@@ -299,7 +311,7 @@ void LinearScan::allocRegToInstruction(InstructionList::iterator it) {
|
||||
|
||||
// Reload all source operands if necessary.
|
||||
// Mark registers as unpinned.
|
||||
for (int regNo = 0; regNo < kNumX64Regs; ++regNo) {
|
||||
for (int regNo = 0; regNo < kNumRegs; ++regNo) {
|
||||
m_regs[regNo].m_pinned = false;
|
||||
}
|
||||
smart::vector<bool> needsReloading(inst->getNumSrcs(), true);
|
||||
@@ -425,6 +437,8 @@ void LinearScan::allocRegToInstruction(InstructionList::iterator it) {
|
||||
|
||||
void LinearScan::allocRegToTmp(SSATmp* ssaTmp, uint32_t index) {
|
||||
bool preferCallerSaved = true;
|
||||
PhysReg::Type regType = getRegType(ssaTmp);
|
||||
|
||||
if (RuntimeOption::EvalHHIREnableCalleeSavedOpt) {
|
||||
// Prefer caller-saved registers iff <ssaTmp> doesn't span native.
|
||||
preferCallerSaved = (m_uses[ssaTmp].lastUse <= getNextNativeId());
|
||||
@@ -432,7 +446,7 @@ void LinearScan::allocRegToTmp(SSATmp* ssaTmp, uint32_t index) {
|
||||
|
||||
RegState* reg = nullptr;
|
||||
if (!preferCallerSaved) {
|
||||
reg = getFreeReg(false);
|
||||
reg = getFreeReg(regType, false);
|
||||
if (reg->isCallerSaved()) {
|
||||
// If we are out of callee-saved registers, fall into the logic of
|
||||
// assigning a caller-saved register.
|
||||
@@ -471,7 +485,7 @@ void LinearScan::allocRegToTmp(SSATmp* ssaTmp, uint32_t index) {
|
||||
if (reg == nullptr) {
|
||||
// No pre-coloring for this tmp.
|
||||
// Pick a regular caller-saved reg.
|
||||
reg = getFreeReg(true);
|
||||
reg = getFreeReg(regType, true);
|
||||
}
|
||||
|
||||
assert(reg);
|
||||
@@ -495,7 +509,7 @@ void LinearScan::allocRegToTmp(SSATmp* ssaTmp, uint32_t index) {
|
||||
void LinearScan::allocRegToTmp(RegState* reg, SSATmp* ssaTmp, uint32_t index) {
|
||||
reg->m_ssaTmp = ssaTmp;
|
||||
// mark inst as using this register
|
||||
m_allocInfo[ssaTmp].setReg(PhysReg(reg->m_regNo), index);
|
||||
m_allocInfo[ssaTmp].setReg(reg->m_reg, index);
|
||||
uint32_t lastUseId = m_uses[ssaTmp].lastUse;
|
||||
if (reg->isReserved()) {
|
||||
return;
|
||||
@@ -802,7 +816,7 @@ RegNumber LinearScan::getJmpPreColor(SSATmp* tmp, uint32_t regIndex,
|
||||
// caller-saved regs depends on pre-coloring hints.
|
||||
void LinearScan::initFreeList() {
|
||||
// reserve extra regs for testing purpose.
|
||||
for (int i = kNumX64Regs - 1; i >= 0; i--) {
|
||||
for (int i = kNumRegs - 1; i >= 0; i--) {
|
||||
if (!m_regs[i].m_reserved) {
|
||||
pushFreeReg(&m_regs[i]);
|
||||
}
|
||||
@@ -1243,16 +1257,18 @@ LinearScan::RegState* LinearScan::getReg(RegState* reg) {
|
||||
if (reg->isReserved() || reg->isAllocated()) {
|
||||
return nullptr;
|
||||
}
|
||||
auto type = reg->type();
|
||||
auto& freeList = (reg->isCallerSaved() ?
|
||||
m_freeCallerSaved : m_freeCalleeSaved);
|
||||
m_freeCallerSaved[type] : m_freeCalleeSaved[type]);
|
||||
freeList.erase(reg->m_pos);
|
||||
// Pin it so that other operands in the same instruction will not reuse it.
|
||||
reg->m_pinned = true;
|
||||
return reg;
|
||||
}
|
||||
|
||||
LinearScan::RegState* LinearScan::getFreeReg(bool preferCallerSaved) {
|
||||
if (m_freeCallerSaved.empty() && m_freeCalleeSaved.empty()) {
|
||||
LinearScan::RegState* LinearScan::getFreeReg(PhysReg::Type type,
|
||||
bool preferCallerSaved) {
|
||||
if (m_freeCallerSaved[type].empty() && m_freeCalleeSaved[type].empty()) {
|
||||
assert(!m_allocatedRegs.empty());
|
||||
|
||||
// no free registers --> free a register from the allocatedRegs
|
||||
@@ -1260,7 +1276,7 @@ LinearScan::RegState* LinearScan::getFreeReg(bool preferCallerSaved) {
|
||||
// 1. not used for any source operand in the current instruction, and
|
||||
// 2. not used for the return address of a function.
|
||||
auto canSpill = [&] (RegState* reg) {
|
||||
return !reg->isPinned() && !reg->isRetAddr();
|
||||
return !reg->isPinned() && !reg->isRetAddr() && reg->type() == type;
|
||||
};
|
||||
auto pos = std::find_if(m_allocatedRegs.begin(), m_allocatedRegs.end(),
|
||||
canSpill);
|
||||
@@ -1273,11 +1289,11 @@ LinearScan::RegState* LinearScan::getFreeReg(bool preferCallerSaved) {
|
||||
smart::list<RegState*>* preferred = nullptr;
|
||||
smart::list<RegState*>* other = nullptr;
|
||||
if (preferCallerSaved) {
|
||||
preferred = &m_freeCallerSaved;
|
||||
other = &m_freeCalleeSaved;
|
||||
preferred = &m_freeCallerSaved[type];
|
||||
other = &m_freeCalleeSaved[type];
|
||||
} else {
|
||||
preferred = &m_freeCalleeSaved;
|
||||
other = &m_freeCallerSaved;
|
||||
preferred = &m_freeCalleeSaved[type];
|
||||
other = &m_freeCallerSaved[type];
|
||||
}
|
||||
|
||||
RegState* theFreeReg = nullptr;
|
||||
@@ -1304,12 +1320,14 @@ void LinearScan::freeReg(RegState* reg) {
|
||||
}
|
||||
|
||||
void LinearScan::pushFreeReg(RegState* reg) {
|
||||
PhysReg::Type type = reg->type();
|
||||
auto& freeList = (reg->isCallerSaved() ?
|
||||
m_freeCallerSaved : m_freeCalleeSaved);
|
||||
m_freeCallerSaved[type] : m_freeCalleeSaved[type]);
|
||||
// If next native is going to use <reg>, put <reg> to the back of the
|
||||
// queue so that it's unlikely to be misused by irrelevant tmps.
|
||||
if (RuntimeOption::EvalHHIREnablePreColoring &&
|
||||
(reg->m_regNo == int(rax) || m_preColoringHint.preColorsTmp(reg))) {
|
||||
type == PhysReg::GP &&
|
||||
(reg->m_reg == PhysReg(rax) || m_preColoringHint.preColorsTmp(reg))) {
|
||||
freeList.push_back(reg);
|
||||
reg->m_pos = (--freeList.end());
|
||||
} else {
|
||||
@@ -1396,7 +1414,8 @@ SSATmp* LinearScan::getOrigTmp(SSATmp* tmp) {
|
||||
}
|
||||
|
||||
bool LinearScan::PreColoringHint::preColorsTmp(RegState* reg) const {
|
||||
return m_preColoredTmps[reg->m_regNo].first != nullptr;
|
||||
assert(reg->m_reg.isGP());
|
||||
return m_preColoredTmps[int(reg->m_reg)].first != nullptr;
|
||||
}
|
||||
|
||||
// Get the pre-coloring register of (<tmp>, <index>).
|
||||
@@ -1404,9 +1423,10 @@ bool LinearScan::PreColoringHint::preColorsTmp(RegState* reg) const {
|
||||
// not a big problem.
|
||||
RegNumber LinearScan::PreColoringHint::getPreColoringReg(
|
||||
SSATmp* tmp, uint32_t index) const {
|
||||
for (int regNo = 0; regNo < kNumX64Regs; ++regNo) {
|
||||
for (int regNo = 0; regNo < kNumRegs; ++regNo) {
|
||||
if (m_preColoredTmps[regNo].first == tmp &&
|
||||
m_preColoredTmps[regNo].second == index) {
|
||||
assert(regNo < kNumGPRegs);
|
||||
return (RegNumber)regNo;
|
||||
}
|
||||
}
|
||||
@@ -1414,7 +1434,7 @@ RegNumber LinearScan::PreColoringHint::getPreColoringReg(
|
||||
}
|
||||
|
||||
void LinearScan::PreColoringHint::clear() {
|
||||
for (int i = 0; i < kNumX64Regs; ++i) {
|
||||
for (int i = 0; i < kNumRegs; ++i) {
|
||||
m_preColoredTmps[i].first = nullptr;
|
||||
m_preColoredTmps[i].second = 0;
|
||||
}
|
||||
@@ -1424,8 +1444,8 @@ void LinearScan::PreColoringHint::clear() {
|
||||
// in next native.
|
||||
void LinearScan::PreColoringHint::add(SSATmp* tmp, uint32_t index, int argNum) {
|
||||
int reg = int(argNumToRegName[argNum]);
|
||||
assert(reg >= 0 && reg < kNumX64Regs);
|
||||
m_preColoredTmps[reg].first = tmp;
|
||||
assert(reg >= 0 && reg < kNumGPRegs);
|
||||
m_preColoredTmps[reg].first = tmp;
|
||||
m_preColoredTmps[reg].second = index;
|
||||
}
|
||||
|
||||
|
||||
@@ -249,7 +249,12 @@ void print(std::ostream& os, const SSATmp* tmp, const RegAllocInfo* regs,
|
||||
if (!info.spilled()) {
|
||||
for (int i = 0, sz = info.numAllocatedRegs(); i < sz; ++i) {
|
||||
if (i != 0) os << ",";
|
||||
os << reg::regname(Reg64(info.getReg(i)));
|
||||
PhysReg reg = info.getReg(i);
|
||||
if (reg.type() == PhysReg::GP) {
|
||||
os << reg::regname(Reg64(reg));
|
||||
} else {
|
||||
os << reg::regname(RegXMM(reg));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int i = 0, sz = tmp->numNeededRegs(); i < sz; ++i) {
|
||||
|
||||
@@ -174,8 +174,8 @@ struct TraceBuilder {
|
||||
return gen(DefConst, type, ConstData(val));
|
||||
}
|
||||
|
||||
SSATmp* cns(Type t) {
|
||||
return gen(DefConst, t, ConstData(0));
|
||||
SSATmp* cns(Type type) {
|
||||
return gen(DefConst, type, ConstData(0));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
|
||||
@@ -36,15 +36,36 @@ namespace HPHP { namespace Transl {
|
||||
* (e.g. store_reg##_disp_reg##).
|
||||
*/
|
||||
struct PhysReg {
|
||||
enum Type {
|
||||
GP,
|
||||
XMM,
|
||||
kNumTypes, // keep last
|
||||
};
|
||||
explicit constexpr PhysReg(int n = -1) : n(n) {}
|
||||
constexpr /* implicit */ PhysReg(Reg64 r) : n(int(r)) {}
|
||||
constexpr /* implicit */ PhysReg(RegXMM r) : n(int(r) + kNumGPRegs) {}
|
||||
explicit constexpr PhysReg(Reg32 r) : n(int(RegNumber(r))) {}
|
||||
|
||||
explicit constexpr PhysReg(RegNumber r) : n(int(r)) {}
|
||||
|
||||
constexpr /* implicit */ operator Reg64() const { return Reg64(n); }
|
||||
constexpr /* implicit */ operator RegNumber() const { return RegNumber(n); }
|
||||
/* implicit */ operator Reg64() const {
|
||||
assert(isGP() || n == -1);
|
||||
return Reg64(n);
|
||||
}
|
||||
constexpr /* implicit */ operator RegNumber() const {
|
||||
return n < kNumGPRegs ? RegNumber(n) : RegNumber(n - kNumGPRegs);
|
||||
}
|
||||
/* implicit */ operator RegXMM() const {
|
||||
assert(isXMM() || n == -1);
|
||||
return RegXMM(n - kNumGPRegs);
|
||||
}
|
||||
|
||||
Type type() const {
|
||||
assert(n >= 0 && n < kNumRegs);
|
||||
return n < kNumGPRegs ? GP : XMM;
|
||||
}
|
||||
bool isGP () const { return n >= 0 && n < kNumGPRegs; }
|
||||
bool isXMM() const { return n >= kNumGPRegs && n < kNumRegs; }
|
||||
explicit constexpr operator int() const { return n; }
|
||||
constexpr bool operator==(PhysReg r) const { return n == r.n; }
|
||||
constexpr bool operator!=(PhysReg r) const { return n != r.n; }
|
||||
@@ -53,13 +74,24 @@ struct PhysReg {
|
||||
constexpr bool operator==(Reg32 r) const { return Reg32(n) == r; }
|
||||
constexpr bool operator!=(Reg32 r) const { return Reg32(n) != r; }
|
||||
|
||||
MemoryRef operator[](intptr_t p) const { return *(*this + p); }
|
||||
IndexedMemoryRef operator[](Reg64 i) const { return *(*this + i); }
|
||||
IndexedMemoryRef operator[](ScaledIndex s) const { return *(*this + s); }
|
||||
MemoryRef operator[](intptr_t p) const {
|
||||
assert(type() == GP);
|
||||
return *(*this + p);
|
||||
}
|
||||
IndexedMemoryRef operator[](Reg64 i) const {
|
||||
assert(type() == GP);
|
||||
return *(*this + i);
|
||||
}
|
||||
IndexedMemoryRef operator[](ScaledIndex s) const {
|
||||
assert(type() == GP);
|
||||
return *(*this + s);
|
||||
}
|
||||
IndexedMemoryRef operator[](ScaledIndexDisp s) const {
|
||||
assert(type() == GP);
|
||||
return *(*this + s.si + s.disp);
|
||||
}
|
||||
IndexedMemoryRef operator[](DispReg dr) const {
|
||||
assert(type() == GP);
|
||||
return *(*this + ScaledIndex(dr.base, 0x1) + dr.disp);
|
||||
}
|
||||
|
||||
|
||||
@@ -905,8 +905,8 @@ inline void emitCopyToAligned(X64Assembler& a,
|
||||
int destOff) {
|
||||
static_assert(sizeof(TypedValue) == 16,
|
||||
"emitCopyToAligned assumes sizeof(TypedValue) is 128 bits");
|
||||
a. movdqa (src[srcOff], xmm0);
|
||||
a. movdqa (xmm0, dest[destOff]);
|
||||
a. movdqa (src[srcOff], rXMMScratch0);
|
||||
a. movdqa (rXMMScratch0, dest[destOff]);
|
||||
}
|
||||
|
||||
// ArgManager -- support for passing VM-level data to helper functions.
|
||||
|
||||
@@ -465,7 +465,7 @@ TranslatorX64::emitPushAR(const NormalizedInstruction& i, const Func* func,
|
||||
void
|
||||
TranslatorX64::emitCallSaveRegs() {
|
||||
assert(!m_regMap.frozen());
|
||||
m_regMap.cleanRegs(kCallerSaved);
|
||||
m_regMap.cleanRegs(kGPCallerSaved);
|
||||
}
|
||||
|
||||
static void UNUSED tc_debug_print(const char* message,
|
||||
@@ -728,7 +728,7 @@ TranslatorX64::emitCall(X64Assembler& a, TCA dest, bool killRegs) {
|
||||
}
|
||||
if (killRegs) {
|
||||
// All caller-saved regs are now suspect.
|
||||
m_regMap.smashRegs(kCallerSaved);
|
||||
m_regMap.smashRegs(kGPCallerSaved);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -743,7 +743,7 @@ TranslatorX64::emitCall(X64Assembler& a, Call call, bool killRegs) {
|
||||
a.loadq(*rdi, rax);
|
||||
a.call(rax[call.getOffset()]);
|
||||
if (killRegs) {
|
||||
m_regMap.smashRegs(kCallerSaved);
|
||||
m_regMap.smashRegs(kGPCallerSaved);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -868,7 +868,7 @@ void TranslatorX64::prepareCallSaveRegs() {
|
||||
emitCallSaveRegs(); // Clean caller-saved regs.
|
||||
m_pendingUnwindRegInfo.clear();
|
||||
|
||||
RegSet rset = kCalleeSaved;
|
||||
RegSet rset = kGPCalleeSaved;
|
||||
PhysReg reg;
|
||||
while (rset.findFirst(reg)) {
|
||||
rset.remove(reg);
|
||||
@@ -1030,7 +1030,7 @@ void TranslatorX64::emitDecRef(Asm& a,
|
||||
|
||||
auto getPushSet = [&] {
|
||||
RegSet ret;
|
||||
auto regs = kCallerSaved;
|
||||
auto regs = kGPCallerSaved;
|
||||
PhysReg reg;
|
||||
while (regs.findFirst(reg)) {
|
||||
regs.remove(reg);
|
||||
@@ -1233,7 +1233,7 @@ void TranslatorX64::emitGenericDecRefHelpers() {
|
||||
|
||||
asm_label(a, release);
|
||||
{
|
||||
PhysRegSaver prs(a, kCallerSaved - RegSet(rdi));
|
||||
PhysRegSaver prs(a, kGPCallerSaved - RegSet(rdi));
|
||||
callDestructor(a, rScratch, rax);
|
||||
recordIndirectFixup(a.code.frontier, prs.rspAdjustment());
|
||||
}
|
||||
@@ -3647,17 +3647,17 @@ TranslatorX64::binaryMixedArith(const NormalizedInstruction& i,
|
||||
Opcode op,
|
||||
PhysReg srcReg,
|
||||
PhysReg srcDestReg) {
|
||||
getInputsIntoXMMRegs(i, srcReg, srcDestReg, xmm1, xmm0);
|
||||
getInputsIntoXMMRegs(i, srcReg, srcDestReg, rXMMScratch1, rXMMScratch0);
|
||||
switch(op) {
|
||||
#define CASEIMM(OpBc, x64op) \
|
||||
case OpBc: a. x64op ##sd_xmm_xmm(xmm1, xmm0); break
|
||||
case OpBc: a. x64op ##sd_xmm_xmm(rXMMScratch1, rXMMScratch0); break
|
||||
CASEIMM(OpAdd, add);
|
||||
CASEIMM(OpSub, sub);
|
||||
CASEIMM(OpMul, mul);
|
||||
#undef CASEIMM
|
||||
default: not_reached();
|
||||
}
|
||||
a. mov_xmm_reg64(xmm0, srcDestReg);
|
||||
a. mov_xmm_reg64(rXMMScratch0, srcDestReg);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -4100,9 +4100,9 @@ TranslatorX64::analyzeEqOp(Tracelet& t, NormalizedInstruction& i) {
|
||||
void
|
||||
TranslatorX64::fpEq(const NormalizedInstruction& ni,
|
||||
PhysReg lr, PhysReg rr) {
|
||||
getInputsIntoXMMRegs(ni, lr, rr, xmm0, xmm1);
|
||||
getInputsIntoXMMRegs(ni, lr, rr, rXMMScratch0, rXMMScratch1);
|
||||
m_regMap.allocOutputRegs(ni);
|
||||
a. ucomisd_xmm_xmm(xmm0, xmm1);
|
||||
a. ucomisd_xmm_xmm(rXMMScratch0, rXMMScratch1);
|
||||
semiLikelyIfBlock(CC_P, a, [&] {
|
||||
// PF means unordered; treat it as !eq. Or 1 into anything at all
|
||||
// to clear ZF.
|
||||
@@ -11459,7 +11459,7 @@ TranslatorX64::TranslatorX64()
|
||||
m_irAUsage(0),
|
||||
m_irAstubsUsage(0),
|
||||
m_numHHIRTrans(0),
|
||||
m_regMap(kCallerSaved, kCalleeSaved, this),
|
||||
m_regMap(kGPCallerSaved, kGPCalleeSaved, this),
|
||||
m_unwindRegMap(128),
|
||||
m_curTrace(0),
|
||||
m_curNI(0),
|
||||
@@ -11739,7 +11739,7 @@ TCA TranslatorX64::emitNAryStub(X64Assembler& a, Call c) {
|
||||
a. push (rbp); // {
|
||||
a. movq (rsp, rbp);
|
||||
{
|
||||
RegSet s = kCallerSaved - alreadySaved;
|
||||
RegSet s = kGPCallerSaved - alreadySaved;
|
||||
PhysRegSaverParity rs(Parity, a, s);
|
||||
emitCall(a, c);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
<?php
|
||||
// Copyright 2004-present Facebook. All Rights Reserved.
|
||||
|
||||
function foo($val, $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o,
|
||||
$p, $q) {
|
||||
$a = $b;
|
||||
$b = $c;
|
||||
$c = $d;
|
||||
$d = $e;
|
||||
$e = $f;
|
||||
$f = $g;
|
||||
$g = $h;
|
||||
$h = $i;
|
||||
$i = $j;
|
||||
$j = $k;
|
||||
$k = $l;
|
||||
$l = $m;
|
||||
$m = $n;
|
||||
$n = $o;
|
||||
$o = $p;
|
||||
$p = $p;
|
||||
$q = $val;
|
||||
$sum = $a + $b + $c + $d + $e + $f + $g + $h + $i + $j + $k + $l + $m + $n +
|
||||
$o + $p + $q;
|
||||
$prod = $a * $b * $c * $d * $e * $f * $g * $h * $i * $j * $k * $l * $m * $n *
|
||||
$o * $p + $q;
|
||||
$res = $prod + $sum;
|
||||
return $res;
|
||||
}
|
||||
|
||||
var_dump(foo(500.5, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.1, 11.1,
|
||||
12.2, 13.3, 14.4, 15.5, 16.6, 17.7));
|
||||
@@ -0,0 +1 @@
|
||||
float(8.703034491432E+14)
|
||||
+90
-76
@@ -61,6 +61,10 @@ struct ScaledIndex;
|
||||
struct ScaledIndexDisp;
|
||||
struct DispReg;
|
||||
|
||||
const int kNumGPRegs = 16;
|
||||
const int kNumXMMRegs = 16;
|
||||
const int kNumRegs = kNumGPRegs + kNumXMMRegs;
|
||||
|
||||
/*
|
||||
* Type for register numbers, independent of the size we're going to
|
||||
* be using it as. Also, the same register number may mean different
|
||||
@@ -347,9 +351,6 @@ namespace reg {
|
||||
constexpr Reg64 r14(14);
|
||||
constexpr Reg64 r15(15);
|
||||
|
||||
// rScratch is a symbolic name for a register that is always free.
|
||||
constexpr Reg64 rScratch(r10);
|
||||
|
||||
constexpr RegRIP rip;
|
||||
|
||||
constexpr Reg32 eax (0);
|
||||
@@ -411,6 +412,11 @@ namespace reg {
|
||||
constexpr RegXMM xmm14(14);
|
||||
constexpr RegXMM xmm15(15);
|
||||
|
||||
// rScratch, rXMMScratch[01] are symbolic names for regs that are always free
|
||||
constexpr Reg64 rScratch(r10);
|
||||
constexpr RegXMM rXMMScratch0(xmm0);
|
||||
constexpr RegXMM rXMMScratch1(xmm1);
|
||||
|
||||
#define X(x) if (r == x) return "%"#x
|
||||
inline const char* regname(Reg64 r) {
|
||||
X(rax); X(rbx); X(rcx); X(rdx); X(rsp); X(rbp); X(rsi); X(rdi);
|
||||
@@ -669,75 +675,76 @@ struct X64Instr {
|
||||
};
|
||||
|
||||
// 0 1 2 3 4 5 flags
|
||||
const X64Instr instr_movdqa = { { 0x6F,0x7F,0xF1,0x00,0xF1,0xF1 }, 0x4103 };
|
||||
const X64Instr instr_movdqu = { { 0x6F,0x7F,0xF1,0x00,0xF1,0xF1 }, 0x8103 };
|
||||
const X64Instr instr_gpr2xmm = { { 0x6e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
|
||||
const X64Instr instr_xmm2gpr = { { 0x7e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
|
||||
const X64Instr instr_movdqa = { { 0x6F,0x7F,0xF1,0x00,0xF1,0xF1 }, 0x4103 };
|
||||
const X64Instr instr_movdqu = { { 0x6F,0x7F,0xF1,0x00,0xF1,0xF1 }, 0x8103 };
|
||||
const X64Instr instr_movsd = { { 0x11,0x10,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
|
||||
const X64Instr instr_gpr2xmm = { { 0x6e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
|
||||
const X64Instr instr_xmm2gpr = { { 0x7e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
|
||||
const X64Instr instr_xmmsub = { { 0x5c,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
|
||||
const X64Instr instr_xmmadd = { { 0x58,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
|
||||
const X64Instr instr_xmmmul = { { 0x59,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
|
||||
const X64Instr instr_ucomisd = { { 0x2e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
|
||||
const X64Instr instr_pxor= { { 0xef,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
|
||||
const X64Instr instr_ucomisd = { { 0x2e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
|
||||
const X64Instr instr_pxor= { { 0xef,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
|
||||
const X64Instr instr_cvtsi2sd= { { 0x2a,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10002 };
|
||||
const X64Instr instr_lddqu = { { 0xF0,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10103 };
|
||||
const X64Instr instr_jmp = { { 0xFF,0xF1,0xE9,0x04,0xE9,0xF1 }, 0x0910 };
|
||||
const X64Instr instr_call = { { 0xFF,0xF1,0xE8,0x02,0xE8,0xF1 }, 0x0900 };
|
||||
const X64Instr instr_push = { { 0xFF,0xF1,0x68,0x06,0xF1,0x50 }, 0x0510 };
|
||||
const X64Instr instr_pop = { { 0x8F,0xF1,0xF1,0x00,0xF1,0x58 }, 0x0500 };
|
||||
const X64Instr instr_inc = { { 0xFF,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_dec = { { 0xFF,0xF1,0xF1,0x01,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_not = { { 0xF7,0xF1,0xF1,0x02,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_notb = { { 0xF6,0xF1,0xF1,0x02,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_neg = { { 0xF7,0xF1,0xF1,0x03,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_negb = { { 0xF6,0xF1,0xF1,0x03,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_add = { { 0x01,0x03,0x81,0x00,0x05,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_addb = { { 0x00,0x02,0x80,0x00,0x04,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_sub = { { 0x29,0x2B,0x81,0x05,0x2D,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_subb = { { 0x28,0x2A,0x80,0x05,0x2C,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_and = { { 0x21,0x23,0x81,0x04,0x25,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_andb = { { 0x20,0x22,0x80,0x04,0x24,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_or = { { 0x09,0x0B,0x81,0x01,0x0D,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_orb = { { 0x08,0x0A,0x80,0x01,0x0C,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_xor = { { 0x31,0x33,0x81,0x06,0x35,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_xorb = { { 0x30,0x32,0x80,0x06,0x34,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_mov = { { 0x89,0x8B,0xC7,0x00,0xF1,0xB8 }, 0x0600 };
|
||||
const X64Instr instr_movb = { { 0x88,0x8A,0xC6,0x00,0xF1,0xB0 }, 0x0610 };
|
||||
const X64Instr instr_test = { { 0x85,0x85,0xF7,0x00,0xA9,0xF1 }, 0x0800 };
|
||||
const X64Instr instr_testb = { { 0x84,0x84,0xF6,0x00,0xA8,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_cmp = { { 0x39,0x3B,0x81,0x07,0x3D,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_cmpb = { { 0x38,0x3A,0x80,0x07,0x3C,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_sbb = { { 0x19,0x1B,0x81,0x03,0x1D,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_adc = { { 0x11,0x13,0x81,0x02,0x15,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_lea = { { 0xF1,0x8D,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_xchgb = { { 0x86,0x86,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_xchg = { { 0x87,0x87,0xF1,0x00,0xF1,0x90 }, 0x1000 };
|
||||
const X64Instr instr_imul = { { 0xAF,0xF7,0x69,0x05,0xF1,0xF1 }, 0x0019 };
|
||||
const X64Instr instr_mul = { { 0xF7,0xF1,0xF1,0x04,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_div = { { 0xF7,0xF1,0xF1,0x06,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_idiv = { { 0xF7,0xF1,0xF1,0x07,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_cdq = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x99 }, 0x0400 };
|
||||
const X64Instr instr_ret = { { 0xF1,0xF1,0xC2,0x00,0xF1,0xC3 }, 0x0540 };
|
||||
const X64Instr instr_jcc = { { 0xF1,0xF1,0x80,0x00,0xF1,0xF1 }, 0x0114 };
|
||||
const X64Instr instr_cmovcc = { { 0x40,0x40,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
|
||||
const X64Instr instr_setcc = { { 0x90,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0102 };
|
||||
const X64Instr instr_movswx = { { 0xBF,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
|
||||
const X64Instr instr_movsbx = { { 0xBE,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x2003 };
|
||||
const X64Instr instr_movzwx = { { 0xB7,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
|
||||
const X64Instr instr_movzbx = { { 0xB6,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x2003 };
|
||||
const X64Instr instr_cwde = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x98 }, 0x0400 };
|
||||
const X64Instr instr_rol = { { 0xD3,0xF1,0xC1,0x00,0xF1,0xF1 }, 0x0020 };
|
||||
const X64Instr instr_ror = { { 0xD3,0xF1,0xC1,0x01,0xF1,0xF1 }, 0x0020 };
|
||||
const X64Instr instr_rcl = { { 0xD3,0xF1,0xC1,0x02,0xF1,0xF1 }, 0x0020 };
|
||||
const X64Instr instr_rcr = { { 0xD3,0xF1,0xC1,0x03,0xF1,0xF1 }, 0x0020 };
|
||||
const X64Instr instr_shl = { { 0xD3,0xF1,0xC1,0x04,0xF1,0xF1 }, 0x0020 };
|
||||
const X64Instr instr_shr = { { 0xD3,0xF1,0xC1,0x05,0xF1,0xF1 }, 0x0020 };
|
||||
const X64Instr instr_sar = { { 0xD3,0xF1,0xC1,0x07,0xF1,0xF1 }, 0x0020 };
|
||||
const X64Instr instr_xadd = { { 0xC1,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0002 };
|
||||
const X64Instr instr_cmpxchg = { { 0xB1,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0002 };
|
||||
const X64Instr instr_nop = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x90 }, 0x0500 };
|
||||
const X64Instr instr_shld = { { 0xA5,0xF1,0xA4,0x00,0xF1,0xF1 }, 0x0082 };
|
||||
const X64Instr instr_shrd = { { 0xAD,0xF1,0xAC,0x00,0xF1,0xF1 }, 0x0082 };
|
||||
const X64Instr instr_int3 = { { 0xF1,0xF1,0xF1,0x00,0xF1,0xCC }, 0x0500 };
|
||||
const X64Instr instr_jmp = { { 0xFF,0xF1,0xE9,0x04,0xE9,0xF1 }, 0x0910 };
|
||||
const X64Instr instr_call = { { 0xFF,0xF1,0xE8,0x02,0xE8,0xF1 }, 0x0900 };
|
||||
const X64Instr instr_push = { { 0xFF,0xF1,0x68,0x06,0xF1,0x50 }, 0x0510 };
|
||||
const X64Instr instr_pop = { { 0x8F,0xF1,0xF1,0x00,0xF1,0x58 }, 0x0500 };
|
||||
const X64Instr instr_inc = { { 0xFF,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_dec = { { 0xFF,0xF1,0xF1,0x01,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_not = { { 0xF7,0xF1,0xF1,0x02,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_notb = { { 0xF6,0xF1,0xF1,0x02,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_neg = { { 0xF7,0xF1,0xF1,0x03,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_negb = { { 0xF6,0xF1,0xF1,0x03,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_add = { { 0x01,0x03,0x81,0x00,0x05,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_addb = { { 0x00,0x02,0x80,0x00,0x04,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_sub = { { 0x29,0x2B,0x81,0x05,0x2D,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_subb = { { 0x28,0x2A,0x80,0x05,0x2C,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_and = { { 0x21,0x23,0x81,0x04,0x25,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_andb = { { 0x20,0x22,0x80,0x04,0x24,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_or = { { 0x09,0x0B,0x81,0x01,0x0D,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_orb = { { 0x08,0x0A,0x80,0x01,0x0C,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_xor = { { 0x31,0x33,0x81,0x06,0x35,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_xorb = { { 0x30,0x32,0x80,0x06,0x34,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_mov = { { 0x89,0x8B,0xC7,0x00,0xF1,0xB8 }, 0x0600 };
|
||||
const X64Instr instr_movb = { { 0x88,0x8A,0xC6,0x00,0xF1,0xB0 }, 0x0610 };
|
||||
const X64Instr instr_test = { { 0x85,0x85,0xF7,0x00,0xA9,0xF1 }, 0x0800 };
|
||||
const X64Instr instr_testb = { { 0x84,0x84,0xF6,0x00,0xA8,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_cmp = { { 0x39,0x3B,0x81,0x07,0x3D,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_cmpb = { { 0x38,0x3A,0x80,0x07,0x3C,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_sbb = { { 0x19,0x1B,0x81,0x03,0x1D,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_adc = { { 0x11,0x13,0x81,0x02,0x15,0xF1 }, 0x0810 };
|
||||
const X64Instr instr_lea = { { 0xF1,0x8D,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_xchgb = { { 0x86,0x86,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_xchg = { { 0x87,0x87,0xF1,0x00,0xF1,0x90 }, 0x1000 };
|
||||
const X64Instr instr_imul = { { 0xAF,0xF7,0x69,0x05,0xF1,0xF1 }, 0x0019 };
|
||||
const X64Instr instr_mul = { { 0xF7,0xF1,0xF1,0x04,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_div = { { 0xF7,0xF1,0xF1,0x06,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_idiv = { { 0xF7,0xF1,0xF1,0x07,0xF1,0xF1 }, 0x0000 };
|
||||
const X64Instr instr_cdq = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x99 }, 0x0400 };
|
||||
const X64Instr instr_ret = { { 0xF1,0xF1,0xC2,0x00,0xF1,0xC3 }, 0x0540 };
|
||||
const X64Instr instr_jcc = { { 0xF1,0xF1,0x80,0x00,0xF1,0xF1 }, 0x0114 };
|
||||
const X64Instr instr_cmovcc = { { 0x40,0x40,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
|
||||
const X64Instr instr_setcc = { { 0x90,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0102 };
|
||||
const X64Instr instr_movswx = { { 0xBF,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
|
||||
const X64Instr instr_movsbx = { { 0xBE,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x2003 };
|
||||
const X64Instr instr_movzwx = { { 0xB7,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
|
||||
const X64Instr instr_movzbx = { { 0xB6,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x2003 };
|
||||
const X64Instr instr_cwde = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x98 }, 0x0400 };
|
||||
const X64Instr instr_rol = { { 0xD3,0xF1,0xC1,0x00,0xF1,0xF1 }, 0x0020 };
|
||||
const X64Instr instr_ror = { { 0xD3,0xF1,0xC1,0x01,0xF1,0xF1 }, 0x0020 };
|
||||
const X64Instr instr_rcl = { { 0xD3,0xF1,0xC1,0x02,0xF1,0xF1 }, 0x0020 };
|
||||
const X64Instr instr_rcr = { { 0xD3,0xF1,0xC1,0x03,0xF1,0xF1 }, 0x0020 };
|
||||
const X64Instr instr_shl = { { 0xD3,0xF1,0xC1,0x04,0xF1,0xF1 }, 0x0020 };
|
||||
const X64Instr instr_shr = { { 0xD3,0xF1,0xC1,0x05,0xF1,0xF1 }, 0x0020 };
|
||||
const X64Instr instr_sar = { { 0xD3,0xF1,0xC1,0x07,0xF1,0xF1 }, 0x0020 };
|
||||
const X64Instr instr_xadd = { { 0xC1,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0002 };
|
||||
const X64Instr instr_cmpxchg = { { 0xB1,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0002 };
|
||||
const X64Instr instr_nop = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x90 }, 0x0500 };
|
||||
const X64Instr instr_shld = { { 0xA5,0xF1,0xA4,0x00,0xF1,0xF1 }, 0x0082 };
|
||||
const X64Instr instr_shrd = { { 0xAD,0xF1,0xAC,0x00,0xF1,0xF1 }, 0x0082 };
|
||||
const X64Instr instr_int3 = { { 0xF1,0xF1,0xF1,0x00,0xF1,0xCC }, 0x0500 };
|
||||
|
||||
enum ConditionCode {
|
||||
CC_None = -1,
|
||||
@@ -1073,10 +1080,16 @@ struct X64Assembler {
|
||||
void movdqu(RegXMM x, IndexedMemoryRef m) { instrRM(instr_movdqu, x, m); }
|
||||
void movdqu(MemoryRef m, RegXMM x) { instrMR(instr_movdqu, m, x); }
|
||||
void movdqu(IndexedMemoryRef m, RegXMM x) { instrMR(instr_movdqu, m, x); }
|
||||
void movdqa(RegXMM x, RegXMM y) { instrRR(instr_movdqa, x, y); }
|
||||
void movdqa(RegXMM x, MemoryRef m) { instrRM(instr_movdqa, x, m); }
|
||||
void movdqa(RegXMM x, IndexedMemoryRef m) { instrRM(instr_movdqa, x, m); }
|
||||
void movdqa(MemoryRef m, RegXMM x) { instrMR(instr_movdqa, m, x); }
|
||||
void movdqa(IndexedMemoryRef m, RegXMM x) { instrMR(instr_movdqa, m, x); }
|
||||
void movsd (RegXMM x, RegXMM y) { instrRR(instr_movsd, x, y); }
|
||||
void movsd (RegXMM x, MemoryRef m) { instrRM(instr_movsd, x, m); }
|
||||
void movsd (RegXMM x, IndexedMemoryRef m) { instrRM(instr_movsd, x, m); }
|
||||
void movsd (MemoryRef m, RegXMM x) { instrMR(instr_movsd, m, x); }
|
||||
void movsd (IndexedMemoryRef m, RegXMM x) { instrMR(instr_movsd, m, x); }
|
||||
void lddqu (MemoryRef m, RegXMM x) { instrMR(instr_lddqu, m, x); }
|
||||
void lddqu (IndexedMemoryRef m, RegXMM x) { instrMR(instr_lddqu, m, x); }
|
||||
|
||||
@@ -2257,15 +2270,16 @@ private:
|
||||
#define UIMR(m) rn(m.r.base), rn(m.r.index), m.r.scale, m.r.disp
|
||||
#define URIP(m) reg::noreg, reg::noreg, sz::byte, m.r.disp
|
||||
|
||||
void instrR(X64Instr op, Reg64 r) { emitR(op, rn(r)); }
|
||||
void instrR(X64Instr op, Reg32 r) { emitR32(op, rn(r)); }
|
||||
void instrR(X64Instr op, Reg8 r) { emitR(op, rn(r), sz::byte); }
|
||||
void instrRR(X64Instr op, Reg64 x, Reg64 y) { emitRR(op, rn(x), rn(y)); }
|
||||
void instrRR(X64Instr op, Reg32 x, Reg32 y) { emitRR32(op, rn(x), rn(y)); }
|
||||
void instrRR(X64Instr op, Reg8 x, Reg8 y) { emitRR8(op, rn(x), rn(y)); }
|
||||
void instrM(X64Instr op, MemoryRef m) { emitM(op, UMR(m)); }
|
||||
void instrM(X64Instr op, IndexedMemoryRef m){ emitM(op, UIMR(m)); }
|
||||
void instrM32(X64Instr op, MemoryRef m) { emitM32(op, UMR(m)); }
|
||||
void instrR(X64Instr op, Reg64 r) { emitR(op, rn(r)); }
|
||||
void instrR(X64Instr op, Reg32 r) { emitR32(op, rn(r)); }
|
||||
void instrR(X64Instr op, Reg8 r) { emitR(op, rn(r), sz::byte); }
|
||||
void instrRR(X64Instr op, Reg64 x, Reg64 y) { emitRR(op, rn(x), rn(y)); }
|
||||
void instrRR(X64Instr op, Reg32 x, Reg32 y) { emitRR32(op, rn(x), rn(y)); }
|
||||
void instrRR(X64Instr op, Reg8 x, Reg8 y) { emitRR8(op, rn(x), rn(y)); }
|
||||
void instrRR(X64Instr op, RegXMM x, RegXMM y) { emitRR(op, rn(x), rn(y)); }
|
||||
void instrM(X64Instr op, MemoryRef m) { emitM(op, UMR(m)); }
|
||||
void instrM(X64Instr op, IndexedMemoryRef m) { emitM(op, UIMR(m)); }
|
||||
void instrM32(X64Instr op, MemoryRef m) { emitM32(op, UMR(m)); }
|
||||
|
||||
void instrRM(X64Instr op,
|
||||
Reg64 r,
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário