Re-Tighten live register calculations

Compute the registers that are live across each IR instruction, rather
than the live-out set.  I noticed this while working on a fix for the
CallBuiltin instruction.

We currently compute which registers are live-out from each instruction,
then use the sets to push/pop callee-saved registers around native calls.
cgCallHelper() has a hack to remove destination registers from this set,
but it's not always precise, because we sometimes call cgCallHelper with
different dest registers than were assigned to the instruction, which
(rarely) can cause unnecessary spilling.

In one case cgLdClsMethodFCache(), one of the src tmps is copied to a
destination before the internal call; in that case the dst is not live
across the whole instruction but the value must still be preserved
across the call.  Easy fix: mark it live before cgCallHelper().
Esse commit está contido em:
smith
2013-03-18 17:34:39 -07:00
commit de Sara Golemon
commit c3a15af2e5
4 arquivos alterados com 58 adições e 68 exclusões
+15 -20
Ver Arquivo
@@ -758,9 +758,9 @@ void CodeGenerator::cgCallHelper(Asm& a,
// Save the register that are live at the point after this IR instruction.
// However, don't save the destination registers that will be overwritten
// by this call.
RegSet regsToSave = (m_curInst->getLiveOutRegs() & kCallerSaved).
remove(dstReg0).remove(dstReg1);
PhysRegSaverParity<1> regSaver(a, regsToSave);
RegSet toSave = m_curInst->getLiveRegs() & kCallerSaved;
assert((toSave & RegSet().add(dstReg0).add(dstReg1)).empty());
PhysRegSaverParity<1> regSaver(a, toSave);
// Assign registers to the arguments
for (size_t i = 0; i < args.size(); i++) {
@@ -2484,10 +2484,7 @@ void CodeGenerator::cgGenericRetDecRefs(IRInstruction* inst) {
auto const rDest = inst->getDst()->getReg();
auto& a = m_as;
RegSet retvalRegs;
for (int i = 0; i < retVal->numAllocatedRegs(); ++i) {
retvalRegs.add(retVal->getReg(i));
}
RegSet retvalRegs = retVal->getRegs();
assert(retvalRegs.size() <= 2);
/*
@@ -2509,7 +2506,7 @@ void CodeGenerator::cgGenericRetDecRefs(IRInstruction* inst) {
* are still allocated to registers at this time.
*/
const auto UNUSED expectedLiveRegs = RegSet(rFp).add(rDest) | retvalRegs;
assert((m_curInst->getLiveOutRegs() - expectedLiveRegs).empty());
assert((m_curInst->getLiveRegs() - expectedLiveRegs).empty());
assert(rFp == rVmFp &&
"free locals helper assumes the frame pointer is rVmFp");
assert(rDest == rVmSp &&
@@ -2825,7 +2822,7 @@ void CodeGenerator::cgDecRefDynamicTypeMem(PhysReg baseReg,
if (exit == nullptr && RuntimeOption::EvalHHIRGenericDtorHelper) {
{
// This PhysRegSaverParity saves rdi redundantly if
// !m_curInst->getLiveOutRegs().contains(rdi), but its
// !m_curInst->getLiveRegs().contains(rdi), but its
// necessary to maintain stack alignment. We can do better
// by making the helpers adjust the stack for us in the cold
// path, which calls the destructor.
@@ -3802,15 +3799,14 @@ void CodeGenerator::cgLdClsMethodCache(IRInstruction* inst) {
getContextName(getCurClass()));
auto funcDestReg = dst->getReg(0);
auto classDestReg = dst->getReg(1);
auto offsetof_func = offsetof(TargetCache::StaticMethodCache, m_func);
auto offsetof_cls = offsetof(TargetCache::StaticMethodCache, m_cls);
assert(funcDestReg != InvalidReg && classDestReg != InvalidReg);
// Attempt to retrieve the func* and class* from cache
m_as.load_reg64_disp_reg64(rVmTl, ch, funcDestReg);
m_as.load_reg64_disp_reg64(rVmTl,
ch + offsetof(TargetCache::StaticMethodCache,
m_cls),
classDestReg);
m_as.test_reg64_reg64(funcDestReg, funcDestReg);
m_as.loadq(rVmTl[ch + offsetof_func], funcDestReg);
m_as.loadq(rVmTl[ch + offsetof_cls], classDestReg);
m_as.testq(funcDestReg, funcDestReg);
// May have retrieved a NULL from the cache
// handle case where method is not entered in the cache
unlikelyIfBlock(CC_E, [&] {
@@ -3828,11 +3824,8 @@ void CodeGenerator::cgLdClsMethodCache(IRInstruction* inst) {
.immPtr(method) // methodName
);
// recordInstrCall is done in cgCallHelper
m_astubs.test_reg64_reg64(funcDestReg, funcDestReg);
m_astubs.load_reg64_disp_reg64(rVmTl,
ch + offsetof(TargetCache::StaticMethodCache,
m_cls),
classDestReg);
m_astubs.testq(funcDestReg, funcDestReg);
m_astubs.loadq(rVmTl[ch + offsetof_cls], classDestReg);
// if StaticMethodCache::lookupIR() returned NULL, jmp to label
emitFwdJcc(m_astubs, CC_Z, label);
});
@@ -3931,6 +3924,8 @@ void CodeGenerator::cgLdClsMethodFCache(IRInstruction* inst) {
if (false) { // typecheck
const UNUSED Func* f = StaticMethodFCache::lookupIR(ch, cls, methName);
}
// preserve destCtxReg across the call since it wouldn't be otherwise
inst->setLiveRegs(inst->getLiveRegs().add(destCtxReg));
cgCallHelper(m_astubs,
(TCA)StaticMethodFCache::lookupIR,
funcDestReg,
+10 -2
Ver Arquivo
@@ -545,7 +545,7 @@ void IRInstruction::convertToNop() {
m_numSrcs = nop.m_numSrcs;
m_id = nop.m_id;
m_srcs = nop.m_srcs;
m_liveOutRegs = nop.m_liveOutRegs;
m_liveRegs = nop.m_liveRegs;
m_numDsts = nop.m_numDsts;
m_dst = nop.m_dst;
m_taken = nullptr;
@@ -716,7 +716,8 @@ void IRInstruction::print(std::ostream& ostream) const {
}
if (!isTransient()) {
ostream << folly::format("({:02d}) ", getIId());
if (!m_id) ostream << folly::format("({:02d}) ", getIId());
else ostream << folly::format("({:02d}@{:02d}) ", getIId(), m_id);
}
printDst(ostream);
@@ -881,6 +882,13 @@ int SSATmp::numAllocatedRegs() const {
return i;
}
RegSet SSATmp::getRegs() const {
RegSet regs;
for (int i = 0, n = numAllocatedRegs(); i < n; ++i) {
if (hasReg(i)) regs.add(getReg(i));
}
return regs;
}
bool SSATmp::getValBool() const {
assert(isConst());
+10 -6
Ver Arquivo
@@ -1504,8 +1504,11 @@ struct IRInstruction {
*/
bool isTransient() const { return m_iid == kTransient; }
RegSet getLiveOutRegs() const { return m_liveOutRegs; }
void setLiveOutRegs(RegSet s) { m_liveOutRegs = s; }
// LiveRegs is the set of registers that are live across this instruction.
// Doesn't include dest registers, or src registers whose lifetime ends here.
RegSet getLiveRegs() const { return m_liveRegs; }
void setLiveRegs(RegSet s) { m_liveRegs = s; }
Block* getBlock() const { return m_block; }
void setBlock(Block* b) { m_block = b; }
Trace* getTrace() const;
@@ -1563,7 +1566,7 @@ private:
const IId m_iid;
uint32_t m_id;
SSATmp** m_srcs;
RegSet m_liveOutRegs;
RegSet m_liveRegs;
SSATmp* m_dst; // if HasDest or NaryDest
Block* m_taken; // for branches, guards, and jmp
Block* m_block; // block that owns this instruction
@@ -1713,9 +1716,10 @@ public:
*
* Returns InvalidReg for slots that aren't allocated.
*/
PhysReg getReg() const { assert(!m_isSpilled); return m_regs[0]; }
PhysReg getReg(uint32_t i) const { assert(!m_isSpilled); return m_regs[i]; }
void setReg(PhysReg reg, uint32_t i) { m_regs[i] = reg; }
PhysReg getReg() const { assert(!m_isSpilled); return m_regs[0]; }
PhysReg getReg(uint32_t i) const { assert(!m_isSpilled); return m_regs[i]; }
void setReg(PhysReg reg, uint32_t i) { m_regs[i] = reg; }
RegSet getRegs() const;
/*
* Returns information about how to spill/fill a SSATmp.
+23 -40
Ver Arquivo
@@ -97,8 +97,8 @@ private:
void allocRegToTmp(SSATmp* ssaTmp, uint32_t index);
void freeRegsAtId(uint32_t id);
void spill(SSATmp* tmp);
void computeLiveOutRegs();
static RegSet computeLiveOutRegs(IRInstruction* inst, RegSet liveRegs);
void computeLiveRegs();
static RegSet computeLiveRegs(IRInstruction* inst, RegSet liveRegs);
void initFreeList();
void coalesce(Trace* trace);
@@ -202,52 +202,38 @@ LinearScan::LinearScan(IRFactory* irFactory)
}
}
RegSet LinearScan::computeLiveOutRegs(IRInstruction* inst, RegSet liveRegs) {
/*
* Compute and save registers that are live *across* inst, not including
* registers whose lifetimes end at inst, nor registers defined by inst.
* Return the updated live set, including registers defined by inst.
*/
RegSet LinearScan::computeLiveRegs(IRInstruction* inst, RegSet live) {
uint32_t instId = inst->getId();
for (SSATmp* src : inst->getSrcs()) {
if (src->getLastUseId() <= instId) {
for (int locIndex = 0;
locIndex < src->numAllocatedRegs();
++locIndex) {
if (src->hasReg(locIndex)) {
// inst is the last use of the register assigned to this SSATmp
// remove src reg from live regs set
liveRegs.remove(src->getReg(locIndex));
}
}
}
if (src->getLastUseId() <= instId) live -= src->getRegs();
}
// add the destination registers to the live regs set
RegSet def, defOut;
for (const SSATmp& dst : inst->getDsts()) {
if (dst.getLastUseId() > instId) {
for (int locIndex = 0;
locIndex < dst.numAllocatedRegs();
locIndex++) {
if (dst.hasReg(locIndex)) {
liveRegs.add(dst.getReg(locIndex));
}
}
}
RegSet d = dst.getRegs();
if (dst.getLastUseId() > instId) defOut |= d;
live -= d;
}
inst->setLiveOutRegs(liveRegs);
return liveRegs;
inst->setLiveRegs(live);
return live | defOut;
}
/*
* Computes the live out regs at each instruction in a trace.
* This function takes as the second argument an initial live
* register set at the start of the trace and returns the live
* registers at the end of the trace.
* Computes the live regs at each instruction in a trace.
* The function uses the same last use information and instruction
* ordering used by the linear scan register allocator, so its
* important that this function iterates over the instruction in
* the same order that linear scan orders the instructions.
*/
void LinearScan::computeLiveOutRegs() {
RegSet liveOutRegs;
void LinearScan::computeLiveRegs() {
RegSet liveRegs;
for (Block* block : m_blocks) {
for (IRInstruction& inst : *block) {
liveOutRegs = LinearScan::computeLiveOutRegs(&inst, liveOutRegs);
liveRegs = LinearScan::computeLiveRegs(&inst, liveRegs);
}
}
}
@@ -276,11 +262,8 @@ void LinearScan::allocRegToInstruction(InstructionList::iterator it) {
inst->setSrc(i, tmp);
}
if (!needsReloading[i]) {
for (int locIndex = 0;
locIndex < tmp->numAllocatedRegs();
++locIndex) {
auto srcReg = tmp->getReg(locIndex);
m_regs[int(srcReg)].m_pinned = true;
for (int i = 0, n = tmp->numAllocatedRegs(); i < n; ++i) {
m_regs[int(tmp->getReg(i))].m_pinned = true;
}
}
}
@@ -843,8 +826,8 @@ void LinearScan::allocRegs(Trace* trace) {
}
numberInstructions(m_blocks);
// record the live out register set at each instruction
computeLiveOutRegs();
// record the live register set at each instruction
computeLiveRegs();
}
void LinearScan::allocRegsToTrace() {