Relax more guards

This diff significantly generalizes guard relaxation and adds support
for more bytecode instructions.  Before, only CGetL, SetL, RetC, and
RetV were able to relax their inputs, and only in limited ways,
e.g. if their outputs were not used or only used in specific ways.
The new approach follows how the input types flow into the output
types, thus allowing dependencies to be relaxed even if they feed a
chain of values through the tracelet.

This diff also add support for relaxing inputs for the following
bytecode instructions:
  - Pop*
  - FCall
  - FCallArray
  - AddElemC
  - ArrayIdx
  - SetS
  - SetG
  - ContPack
  - ContRetC
  - ContHandle

The spill area was also increased (from 16 to 32 cells) since relaxed
dependencies increase register pressure -- relaxed types are not
immediates that can be burned in and need to reside in registers
instead.
Esse commit está contido em:
Guilherme Ottoni
2013-05-23 18:08:30 -07:00
commit de sgolemon
commit 97e2eac6a7
10 arquivos alterados com 264 adições e 100 exclusões
+2 -2
Ver Arquivo
@@ -341,8 +341,8 @@ const RegSet kAllX64Regs = RegSet(kAllRegs).add(reg::r10)
* address). It is used as spill locations by HHIR (see LinearScan),
* and for MInstrState in both HHIR and translator-x64-vector.cpp.
*/
const size_t kReservedRSPScratchSpace = 0x180;
const size_t kReservedRSPSpillSpace = 0x100;
const size_t kReservedRSPScratchSpace = 0x280;
const size_t kReservedRSPSpillSpace = 0x200;
//////////////////////////////////////////////////////////////////////
@@ -2608,6 +2608,7 @@ void CodeGenerator::cgStRefWork(IRInstruction* inst, bool genStoreType) {
auto destReg = m_regs[inst->dst()].getReg();
auto addrReg = m_regs[inst->src(0)].getReg();
SSATmp* src = inst->src(1);
always_assert(!m_regs[src].isFullXMM());
cgStore(addrReg, RefData::tvOffset(), src, genStoreType);
if (destReg != InvalidReg) emitMovRegReg(m_as, addrReg, destReg);
}
+1 -22
Ver Arquivo
@@ -484,20 +484,11 @@ bool IRInstruction::isLoad() const {
}
}
/*
* Returns true if the instruction stores its source operand srcIdx to memory.
*/
bool IRInstruction::stores(uint32_t srcIdx) const {
bool IRInstruction::storesCell(uint32_t srcIdx) const {
switch (m_op) {
case StRetVal:
case StLoc:
case StLocNT:
case StRef:
case StRefNT:
case SetNewElem:
case SetNewElemStk:
case BindNewElem:
case BindNewElemStk:
return srcIdx == 1;
case StMem:
@@ -506,18 +497,6 @@ bool IRInstruction::stores(uint32_t srcIdx) const {
case StPropNT:
return srcIdx == 2;
case SetElem:
case SetElemStk:
case BindElem:
case BindElemStk:
return srcIdx == 3;
case SetProp:
case SetPropStk:
case BindProp:
case BindPropStk:
return srcIdx == 4;
case SpillStack:
return srcIdx >= 2 && srcIdx < numSrcs();
+5 -1
Ver Arquivo
@@ -246,7 +246,11 @@ struct IRInstruction {
bool isControlFlow() const { return bool(m_taken.to()); }
bool isBlockEnd() const { return m_taken.to() || isTerminal(); }
bool isLoad() const;
bool stores(uint32_t srcIdx) const;
/*
* Returns true if the instruction stores its source operand srcIdx to
* memory as a cell.
*/
bool storesCell(uint32_t srcIdx) const;
/*
* Comparison and hashing for the purposes of CSE-equality.
+1 -6
Ver Arquivo
@@ -338,11 +338,6 @@ PhysReg::Type LinearScan::getRegType(const SSATmp* tmp, int locIdx) const {
tmpId = spill->src(0)->id();
}
if (tmpType.equals(Type::Uncounted) || tmpType.equals(Type::UncountedInit)) {
// These relaxed types should always be candidates for full XMM allocation
assert(m_fullXMMCandidates[tmpId]);
}
if (m_fullXMMCandidates[tmpId]) {
FTRACE(6,
"getRegType(SSATmp {} : {}): it's a candidate for full XMM register\n",
@@ -1073,7 +1068,7 @@ void LinearScan::findFullXMMCandidates() {
}
int idx = 0;
for (SSATmp* tmp : inst.srcs()) {
if (tmp->numNeededRegs() == 2 && !inst.stores(idx)) {
if (tmp->numNeededRegs() == 2 && !inst.storesCell(idx)) {
notCandidates[tmp->id()] = true;
}
idx++;
+1 -1
Ver Arquivo
@@ -29,7 +29,7 @@ class IRFactory;
// This value must be consistent with the number of pre-allocated
// bytes for spill locations in __enterTCHelper in translator-x64.cpp.
// Be careful when changing this value.
const int NumPreAllocatedSpillLocs = 32;
const int NumPreAllocatedSpillLocs = 64;
struct UseInfo {
UseInfo() : lastUse(0), count(0) {}
@@ -45,7 +45,7 @@ enterTCHelper:
* 16-byte aligned.
*/
sub $0x180, %rsp // kReservedRSPScratchSpace
sub $0x280, %rsp // kReservedRSPScratchSpace
/*
* If returning from a BIND_CALL request, push the return IP saved
@@ -63,7 +63,7 @@ enterTCHelper:
call *%rdx
.LenterTCHelper$serviceReqLabel:
add $0x180, %rsp // kReservedRSPScratchSpace
add $0x280, %rsp // kReservedRSPScratchSpace
// Restore infoPtr into %rbx
pop %rbx
.cfi_adjust_cfa_offset -8
+1 -1
Ver Arquivo
@@ -2749,7 +2749,7 @@ static_assert(rVmSp == rbx &&
rVmTl == r12 &&
rStashedAR == r15,
"__enterTCHelper needs to be modified to use the correct ABI");
static_assert(kReservedRSPScratchSpace == 0x180,
static_assert(kReservedRSPScratchSpace == 0x280,
"enterTCHelper needs to be updated for changes to "
"kReservedRSPScratchSpace");
static_assert(REQ_BIND_CALL == 0x1,
+239 -64
Ver Arquivo
@@ -2590,17 +2590,18 @@ void Translator::postAnalyze(NormalizedInstruction* ni, SrcKey& sk,
}
}
static bool isPop(const NormalizedInstruction* instr) {
Opcode opc = instr->op();
return (opc == OpPopC ||
opc == OpPopV ||
opc == OpPopR);
}
NormalizedInstruction::OutputUse
NormalizedInstruction::getOutputUsage(DynLocation* output,
bool ignorePops /* =false */) const {
NormalizedInstruction::getOutputUsage(const DynLocation* output) const {
for (NormalizedInstruction* succ = next;
succ; succ = succ->next) {
if (ignorePops &&
(succ->op() == OpPopC ||
succ->op() == OpPopV ||
succ->op() == OpPopR)) {
continue;
}
if (succ->noOp) continue;
for (size_t i = 0; i < succ->inputs.size(); ++i) {
if (succ->inputs[i] == output) {
if (succ->inputWasInferred(i)) {
@@ -2626,6 +2627,17 @@ NormalizedInstruction::getOutputUsage(DynLocation* output,
return OutputUnused;
}
bool NormalizedInstruction::isOutputUsed(const DynLocation* output) const {
return (output && !output->rtt.isVagueValue() &&
getOutputUsage(output) == OutputUsed);
}
bool NormalizedInstruction::isAnyOutputUsed() const
{
return (isOutputUsed(outStack) ||
isOutputUsed(outLocal));
}
GuardType::GuardType(DataType outer, DataType inner)
: outerType(outer), innerType(inner) {
}
@@ -2733,44 +2745,95 @@ GuardType GuardType::getCountnessInit() const {
}
/**
* Returns true iff loc is consumed by a Pop* instruction in the sequence
* starting at instr.
*/
bool isPopped(DynLocation* loc, NormalizedInstruction* instr) {
for (; instr ; instr = instr->next) {
for (size_t i = 0; i < instr->inputs.size(); i++) {
if (instr->inputs[i] == loc) {
return isPop(instr);
}
}
}
return false;
}
DataTypeCategory
Translator::getOperandConstraintCategory(NormalizedInstruction* instr,
size_t opndIdx) {
static const NormalizedInstruction::OutputUse kOutputUsed =
NormalizedInstruction::OutputUsed;
switch (instr->op()) {
case OpSetL : {
assert(opndIdx < 2);
Opcode opc = instr->op();
switch (opc) {
case OpSetS:
case OpSetG:
case OpSetL: {
if (opndIdx == 0) { // stack value
auto stackValUsage = instr->getOutputUsage(instr->outStack, true);
if ((instr->outStack && stackValUsage == kOutputUsed) ||
(instr->getOutputUsage(instr->outLocal) == kOutputUsed)) {
return DataTypeSpecific;
// If the output on the stack is simply popped, then we don't
// even care whether the type is ref-counted or not because
// the ref-count is transfered to the target location.
if (!instr->outStack || isPopped(instr->outStack, instr->next)) {
return DataTypeGeneric;
}
return DataTypeGeneric;
} else { // old local value
return DataTypeCountness;
}
}
case OpCGetL : {
if (!instr->outStack || (instr->getOutputUsage(instr->outStack, true) ==
NormalizedInstruction::OutputUsed)) {
return DataTypeSpecific;
if (opc == OpSetL) {
// old local value is dec-refed
assert(opndIdx == 1);
return DataTypeCountness;
}
return DataTypeSpecific;
}
case OpCGetL:
return DataTypeCountnessInit;
}
case OpRetC :
case OpRetV : {
case OpRetC:
case OpRetV:
return DataTypeCountness;
}
default: return DataTypeSpecific;
case OpFCall:
// Note: instead of pessimizing calls that may be inlined with
// DataTypeSpecific, we could apply the operand constraints of
// the callee in constrainDep.
return instr->calleeTrace ? DataTypeSpecific : DataTypeGeneric;
case OpFCallArray:
return DataTypeGeneric;
case OpPopC:
case OpPopV:
case OpPopR:
return DataTypeCountness;
case OpPackCont:
case OpContRetC:
// The stack input is teleported to the continuation's m_value field
return opndIdx == 0 ? DataTypeGeneric : DataTypeSpecific;
case OpContHandle:
// This always calls the interpreter
return DataTypeGeneric;
case OpAddElemC:
// The stack input is teleported to the array
return opndIdx == 0 ? DataTypeGeneric : DataTypeSpecific;
case OpArrayIdx:
// The default value (w/ opndIdx 0) is simply passed to a helper,
// which takes care of dec-refing it if needed
return opndIdx == 0 ? DataTypeGeneric : DataTypeSpecific;
default:
return DataTypeSpecific;
}
}
GuardType Translator::getOperandConstraintType(NormalizedInstruction* instr,
size_t opndIdx,
const GuardType& specType) {
GuardType
Translator::getOperandConstraintType(NormalizedInstruction* instr,
size_t opndIdx,
const GuardType& specType) {
DataTypeCategory dtCategory = getOperandConstraintCategory(instr, opndIdx);
switch (dtCategory) {
case DataTypeGeneric: return GuardType(KindOfAny);
@@ -2787,21 +2850,144 @@ void Translator::constrainOperandType(GuardType& relxType,
const GuardType& specType) {
if (relxType.isSpecific()) return; // Can't constrain any further
switch (instr->op()) {
case OpRetC :
case OpRetV :
case OpCGetL :
case OpSetL :
{
GuardType consType = getOperandConstraintType(instr, opndIdx, specType);
if (consType.isMoreRefinedThan(relxType)) {
relxType = consType;
GuardType consType = getOperandConstraintType(instr, opndIdx, specType);
if (consType.isMoreRefinedThan(relxType)) {
relxType = consType;
}
}
/*
* This method looks at every use of loc in the stream of instructions
* starting at firstInstr and constrains the relxType towards specType
* according to each use. Note that this method not only looks at
* direct uses of loc, but it also recursively looks at any other
* DynLocs whose type depends on loc's type.
*/
void Translator::constrainDep(const DynLocation* loc,
NormalizedInstruction* firstInstr,
GuardType specType,
GuardType& relxType) {
if (relxType.isSpecific()) return; // can't contrain it any further
for (NormalizedInstruction* instr = firstInstr; instr; instr = instr->next) {
if (instr->noOp) continue;
Opcode opc = instr->op();
size_t nInputs = instr->inputs.size();
for (size_t i = 0; i < nInputs; i++) {
DynLocation* usedLoc = instr->inputs[i];
if (usedLoc == loc) {
constrainOperandType(relxType, instr, i, specType);
// If the instruction's input doesn't propagate to its output,
// then we're done. Otherwise, we need to constrain relxType
// based on the uses of the output.
if (!outputDependsOnInput(opc)) continue;
bool outputIsStackInput = false;
const DynLocation* outStack = instr->outStack;
const DynLocation* outLocal = instr->outLocal;
switch (instrInfo[opc].type) {
case OutSameAsInput:
outputIsStackInput = true;
break;
case OutCInput:
outputIsStackInput = true;
// fall-through
case OutCInputL:
if (specType.getOuterType() == KindOfRef &&
instr->isAnyOutputUsed()) {
// Value gets unboxed along the way. Pessimize it for now.
relxType = specType;
return;
}
break;
default:
relxType = specType;
return;
}
// The instruction input's type propagates to the outputs.
// So constrain the dependence further based on uses of outputs.
if ((i == 0 && outputIsStackInput) || // stack input @ [0]
(i == nInputs - 1 && !outputIsStackInput)) { // local input is last
if (outStack && !outStack->rtt.isVagueValue()) {
// For SetL, getOperandConstraintCategory() generates
// DataTypeGeneric if the stack output is popped. In this
// case, don't further constrain the stack output,
// otherwise the Pop* would make it a DataTypeCountness.
if (opc != OpSetL || !relxType.isGeneric()) {
constrainDep(outStack, instr->next, specType, relxType);
}
}
if (outLocal && !outLocal->rtt.isVagueValue()) {
constrainDep(outLocal, instr->next, specType, relxType);
}
}
}
return;
}
default: {
relxType = specType;
return;
}
}
/**
* Propagates the relaxed type relxType for loc to all its consumers,
* starting at firstInstr.
*/
void Translator::propagateRelaxedType(Tracelet& tclet,
NormalizedInstruction* firstInstr,
DynLocation* loc,
const GuardType& relxType) {
assert(relxType.isRelaxed());
for (NormalizedInstruction* instr = firstInstr; instr; instr = instr->next) {
if (instr->noOp) continue;
Opcode opc = instr->op();
size_t nInputs = instr->inputs.size();
for (size_t i = 0; i < nInputs; i++) {
DynLocation* usedLoc = instr->inputs[i];
if (usedLoc == loc) {
auto outKind = instrInfo[opc].type;
// stack input propagates to outputs
bool outputIsStackInput = (i == 0 && // stack input is inputs[0]
(outKind == OutSameAsInput || outKind == OutCInput));
bool outputIsLocalInput = (i == nInputs - 1 && // local input is last
outKind == OutCInputL);
bool outputIsInput = outputIsStackInput || outputIsLocalInput;
if (outputIsInput) {
// if instr has outStack, update its type and propagate to consumers
if (instr->outStack) {
TRACE(6,
"propagateRelaxedType: Loc: %s oldType: %s => newType: %s\n",
instr->outStack->location.pretty().c_str(),
instr->outStack->rtt.pretty().c_str(),
RuntimeType(relxType.getOuterType(),
relxType.getInnerType()).pretty().c_str());
instr->outStack->rtt = RuntimeType(relxType.getOuterType(),
relxType.getInnerType());
propagateRelaxedType(tclet, instr->next, instr->outStack, relxType);
}
// if instr has outLocal, update its type and propagate to consumers
if (instr->outLocal) {
TRACE(6,
"propagateRelaxedType: Loc: %s oldType: %s => newType: %s\n",
instr->outLocal->location.pretty().c_str(),
instr->outLocal->rtt.pretty().c_str(),
RuntimeType(relxType.getOuterType(),
relxType.getInnerType()).pretty().c_str());
instr->outLocal->rtt = RuntimeType(relxType.getOuterType(),
relxType.getInnerType());
propagateRelaxedType(tclet, instr->next, instr->outLocal, relxType);
}
}
}
}
}
}
@@ -2812,7 +2998,6 @@ void Translator::constrainOperandType(GuardType& relxType,
*/
void Translator::relaxDeps(Tracelet& tclet, TraceletContext& tctxt) {
DynLocTypeMap locRelxTypeMap;
DynLocTypeMap locSpecTypeMap;
// Initialize type maps. Relaxed types start off very relaxed, and then
// they may get more specific depending on how the instructions use them.
@@ -2820,23 +3005,12 @@ void Translator::relaxDeps(Tracelet& tclet, TraceletContext& tctxt) {
for (auto depIt = deps.begin(); depIt != deps.end(); depIt++) {
DynLocation* loc = depIt->second;
const RuntimeType& rtt = depIt->second->rtt;
if (rtt.isValue() && !rtt.isVagueValue() && !loc->location.isThis()) {
locSpecTypeMap[loc] = GuardType(rtt);
locRelxTypeMap[loc] = GuardType(KindOfAny);
}
}
// Process the instruction stream, constraining the relaxed types
// along the way
for (NormalizedInstruction* instr = tclet.m_instrStream.first; instr;
instr = instr->next) {
for (size_t i = 0; i < instr->inputs.size(); i++) {
DynLocation* loc = instr->inputs[i];
auto it = locRelxTypeMap.find(loc);
if (it != locRelxTypeMap.end()) {
GuardType& relxType = it->second;
constrainOperandType(relxType, instr, i, locSpecTypeMap[loc]);
}
if (rtt.isValue() && !rtt.isVagueValue() && !rtt.isClass() &&
!loc->location.isThis()) {
GuardType relxType = GuardType(KindOfAny);
GuardType specType = GuardType(rtt);
constrainDep(loc, tclet.m_instrStream.first, specType, relxType);
locRelxTypeMap[loc] = relxType;
}
}
@@ -2855,6 +3029,7 @@ void Translator::relaxDeps(Tracelet& tclet, TraceletContext& tctxt) {
assert(relxType.getOuterType() != KindOfInvalid);
deps[loc->location]->rtt = RuntimeType(relxType.getOuterType(),
relxType.getInnerType());
propagateRelaxedType(tclet, tclet.m_instrStream.first, loc, relxType);
}
}
}
+11 -1
Ver Arquivo
@@ -406,7 +406,9 @@ class NormalizedInstruction {
OutputInferred,
OutputDoesntCare
};
OutputUse getOutputUsage(DynLocation* output, bool ignorePops = false) const;
OutputUse getOutputUsage(const DynLocation* output) const;
bool isOutputUsed(const DynLocation* output) const;
bool isAnyOutputUsed() const;
std::string toString() const;
};
@@ -784,6 +786,14 @@ private:
int& currentStackOffset,
bool& varEnvTaint);
void relaxDeps(Tracelet& tclet, TraceletContext& tctxt);
void propagateRelaxedType(Tracelet& tclet,
NormalizedInstruction* firstInstr,
DynLocation* loc,
const GuardType& relxType);
void constrainDep(const DynLocation* loc,
NormalizedInstruction* firstInstr,
GuardType specType,
GuardType& relxType);
DataTypeCategory getOperandConstraintCategory(NormalizedInstruction* instr,
size_t opndIdx);
GuardType getOperandConstraintType(NormalizedInstruction* instr,