From 97e2eac6a7a894797fab24c465c071089975daf5 Mon Sep 17 00:00:00 2001 From: Guilherme Ottoni Date: Thu, 23 May 2013 18:08:30 -0700 Subject: [PATCH] Relax more guards This diff significantly generalizes guard relaxation and adds support for more bytecode instructions. Before, only CGetL, SetL, RetC, and RetV were able to relax their inputs, and only in limited ways, e.g. if their outputs were not used or only used in specific ways. The new approach follows how the input types flow into the output types, thus allowing dependencies to be relaxed even if they feed a chain of values through the tracelet. This diff also add support for relaxing inputs for the following bytecode instructions: - Pop* - FCall - FCallArray - AddElemC - ArrayIdx - SetS - SetG - ContPack - ContRetC - ContHandle The spill area was also increased (from 16 to 32 cells) since relaxed dependencies increase register pressure -- relaxed types are not immediates that can be burned in and need to reside in registers instead. --- hphp/runtime/vm/translator/abi-x64.h | 4 +- hphp/runtime/vm/translator/hopt/codegen.cpp | 1 + hphp/runtime/vm/translator/hopt/ir.cpp | 23 +- .../vm/translator/hopt/irinstruction.h | 6 +- .../runtime/vm/translator/hopt/linearscan.cpp | 7 +- hphp/runtime/vm/translator/hopt/linearscan.h | 2 +- .../vm/translator/translator-asm-helpers.S | 4 +- hphp/runtime/vm/translator/translator-x64.cpp | 2 +- hphp/runtime/vm/translator/translator.cpp | 303 ++++++++++++++---- hphp/runtime/vm/translator/translator.h | 12 +- 10 files changed, 264 insertions(+), 100 deletions(-) diff --git a/hphp/runtime/vm/translator/abi-x64.h b/hphp/runtime/vm/translator/abi-x64.h index 8eeba9ca0..b26c5633e 100644 --- a/hphp/runtime/vm/translator/abi-x64.h +++ b/hphp/runtime/vm/translator/abi-x64.h @@ -341,8 +341,8 @@ const RegSet kAllX64Regs = RegSet(kAllRegs).add(reg::r10) * address). It is used as spill locations by HHIR (see LinearScan), * and for MInstrState in both HHIR and translator-x64-vector.cpp. */ -const size_t kReservedRSPScratchSpace = 0x180; -const size_t kReservedRSPSpillSpace = 0x100; +const size_t kReservedRSPScratchSpace = 0x280; +const size_t kReservedRSPSpillSpace = 0x200; ////////////////////////////////////////////////////////////////////// diff --git a/hphp/runtime/vm/translator/hopt/codegen.cpp b/hphp/runtime/vm/translator/hopt/codegen.cpp index d34d516a7..1f1d1ae95 100644 --- a/hphp/runtime/vm/translator/hopt/codegen.cpp +++ b/hphp/runtime/vm/translator/hopt/codegen.cpp @@ -2608,6 +2608,7 @@ void CodeGenerator::cgStRefWork(IRInstruction* inst, bool genStoreType) { auto destReg = m_regs[inst->dst()].getReg(); auto addrReg = m_regs[inst->src(0)].getReg(); SSATmp* src = inst->src(1); + always_assert(!m_regs[src].isFullXMM()); cgStore(addrReg, RefData::tvOffset(), src, genStoreType); if (destReg != InvalidReg) emitMovRegReg(m_as, addrReg, destReg); } diff --git a/hphp/runtime/vm/translator/hopt/ir.cpp b/hphp/runtime/vm/translator/hopt/ir.cpp index d5b0ef237..9e1fc4387 100644 --- a/hphp/runtime/vm/translator/hopt/ir.cpp +++ b/hphp/runtime/vm/translator/hopt/ir.cpp @@ -484,20 +484,11 @@ bool IRInstruction::isLoad() const { } } -/* - * Returns true if the instruction stores its source operand srcIdx to memory. - */ -bool IRInstruction::stores(uint32_t srcIdx) const { +bool IRInstruction::storesCell(uint32_t srcIdx) const { switch (m_op) { case StRetVal: case StLoc: case StLocNT: - case StRef: - case StRefNT: - case SetNewElem: - case SetNewElemStk: - case BindNewElem: - case BindNewElemStk: return srcIdx == 1; case StMem: @@ -506,18 +497,6 @@ bool IRInstruction::stores(uint32_t srcIdx) const { case StPropNT: return srcIdx == 2; - case SetElem: - case SetElemStk: - case BindElem: - case BindElemStk: - return srcIdx == 3; - - case SetProp: - case SetPropStk: - case BindProp: - case BindPropStk: - return srcIdx == 4; - case SpillStack: return srcIdx >= 2 && srcIdx < numSrcs(); diff --git a/hphp/runtime/vm/translator/hopt/irinstruction.h b/hphp/runtime/vm/translator/hopt/irinstruction.h index d244a4018..bcd44f361 100644 --- a/hphp/runtime/vm/translator/hopt/irinstruction.h +++ b/hphp/runtime/vm/translator/hopt/irinstruction.h @@ -246,7 +246,11 @@ struct IRInstruction { bool isControlFlow() const { return bool(m_taken.to()); } bool isBlockEnd() const { return m_taken.to() || isTerminal(); } bool isLoad() const; - bool stores(uint32_t srcIdx) const; + /* + * Returns true if the instruction stores its source operand srcIdx to + * memory as a cell. + */ + bool storesCell(uint32_t srcIdx) const; /* * Comparison and hashing for the purposes of CSE-equality. diff --git a/hphp/runtime/vm/translator/hopt/linearscan.cpp b/hphp/runtime/vm/translator/hopt/linearscan.cpp index b0f974085..126979248 100644 --- a/hphp/runtime/vm/translator/hopt/linearscan.cpp +++ b/hphp/runtime/vm/translator/hopt/linearscan.cpp @@ -338,11 +338,6 @@ PhysReg::Type LinearScan::getRegType(const SSATmp* tmp, int locIdx) const { tmpId = spill->src(0)->id(); } - if (tmpType.equals(Type::Uncounted) || tmpType.equals(Type::UncountedInit)) { - // These relaxed types should always be candidates for full XMM allocation - assert(m_fullXMMCandidates[tmpId]); - } - if (m_fullXMMCandidates[tmpId]) { FTRACE(6, "getRegType(SSATmp {} : {}): it's a candidate for full XMM register\n", @@ -1073,7 +1068,7 @@ void LinearScan::findFullXMMCandidates() { } int idx = 0; for (SSATmp* tmp : inst.srcs()) { - if (tmp->numNeededRegs() == 2 && !inst.stores(idx)) { + if (tmp->numNeededRegs() == 2 && !inst.storesCell(idx)) { notCandidates[tmp->id()] = true; } idx++; diff --git a/hphp/runtime/vm/translator/hopt/linearscan.h b/hphp/runtime/vm/translator/hopt/linearscan.h index 6d72742bd..dfa918b4d 100644 --- a/hphp/runtime/vm/translator/hopt/linearscan.h +++ b/hphp/runtime/vm/translator/hopt/linearscan.h @@ -29,7 +29,7 @@ class IRFactory; // This value must be consistent with the number of pre-allocated // bytes for spill locations in __enterTCHelper in translator-x64.cpp. // Be careful when changing this value. -const int NumPreAllocatedSpillLocs = 32; +const int NumPreAllocatedSpillLocs = 64; struct UseInfo { UseInfo() : lastUse(0), count(0) {} diff --git a/hphp/runtime/vm/translator/translator-asm-helpers.S b/hphp/runtime/vm/translator/translator-asm-helpers.S index 4cb0b64df..fad169080 100644 --- a/hphp/runtime/vm/translator/translator-asm-helpers.S +++ b/hphp/runtime/vm/translator/translator-asm-helpers.S @@ -45,7 +45,7 @@ enterTCHelper: * 16-byte aligned. */ - sub $0x180, %rsp // kReservedRSPScratchSpace + sub $0x280, %rsp // kReservedRSPScratchSpace /* * If returning from a BIND_CALL request, push the return IP saved @@ -63,7 +63,7 @@ enterTCHelper: call *%rdx .LenterTCHelper$serviceReqLabel: - add $0x180, %rsp // kReservedRSPScratchSpace + add $0x280, %rsp // kReservedRSPScratchSpace // Restore infoPtr into %rbx pop %rbx .cfi_adjust_cfa_offset -8 diff --git a/hphp/runtime/vm/translator/translator-x64.cpp b/hphp/runtime/vm/translator/translator-x64.cpp index 237fb61eb..2aaa3ec4d 100644 --- a/hphp/runtime/vm/translator/translator-x64.cpp +++ b/hphp/runtime/vm/translator/translator-x64.cpp @@ -2749,7 +2749,7 @@ static_assert(rVmSp == rbx && rVmTl == r12 && rStashedAR == r15, "__enterTCHelper needs to be modified to use the correct ABI"); -static_assert(kReservedRSPScratchSpace == 0x180, +static_assert(kReservedRSPScratchSpace == 0x280, "enterTCHelper needs to be updated for changes to " "kReservedRSPScratchSpace"); static_assert(REQ_BIND_CALL == 0x1, diff --git a/hphp/runtime/vm/translator/translator.cpp b/hphp/runtime/vm/translator/translator.cpp index 23cf9baf3..6c4c636b8 100644 --- a/hphp/runtime/vm/translator/translator.cpp +++ b/hphp/runtime/vm/translator/translator.cpp @@ -2590,17 +2590,18 @@ void Translator::postAnalyze(NormalizedInstruction* ni, SrcKey& sk, } } +static bool isPop(const NormalizedInstruction* instr) { + Opcode opc = instr->op(); + return (opc == OpPopC || + opc == OpPopV || + opc == OpPopR); +} + NormalizedInstruction::OutputUse -NormalizedInstruction::getOutputUsage(DynLocation* output, - bool ignorePops /* =false */) const { +NormalizedInstruction::getOutputUsage(const DynLocation* output) const { for (NormalizedInstruction* succ = next; succ; succ = succ->next) { - if (ignorePops && - (succ->op() == OpPopC || - succ->op() == OpPopV || - succ->op() == OpPopR)) { - continue; - } + if (succ->noOp) continue; for (size_t i = 0; i < succ->inputs.size(); ++i) { if (succ->inputs[i] == output) { if (succ->inputWasInferred(i)) { @@ -2626,6 +2627,17 @@ NormalizedInstruction::getOutputUsage(DynLocation* output, return OutputUnused; } +bool NormalizedInstruction::isOutputUsed(const DynLocation* output) const { + return (output && !output->rtt.isVagueValue() && + getOutputUsage(output) == OutputUsed); +} + +bool NormalizedInstruction::isAnyOutputUsed() const +{ + return (isOutputUsed(outStack) || + isOutputUsed(outLocal)); +} + GuardType::GuardType(DataType outer, DataType inner) : outerType(outer), innerType(inner) { } @@ -2733,44 +2745,95 @@ GuardType GuardType::getCountnessInit() const { } +/** + * Returns true iff loc is consumed by a Pop* instruction in the sequence + * starting at instr. + */ +bool isPopped(DynLocation* loc, NormalizedInstruction* instr) { + for (; instr ; instr = instr->next) { + for (size_t i = 0; i < instr->inputs.size(); i++) { + if (instr->inputs[i] == loc) { + return isPop(instr); + } + } + } + return false; +} + DataTypeCategory Translator::getOperandConstraintCategory(NormalizedInstruction* instr, size_t opndIdx) { - static const NormalizedInstruction::OutputUse kOutputUsed = - NormalizedInstruction::OutputUsed; - switch (instr->op()) { - case OpSetL : { - assert(opndIdx < 2); + Opcode opc = instr->op(); + + switch (opc) { + case OpSetS: + case OpSetG: + case OpSetL: { if (opndIdx == 0) { // stack value - auto stackValUsage = instr->getOutputUsage(instr->outStack, true); - if ((instr->outStack && stackValUsage == kOutputUsed) || - (instr->getOutputUsage(instr->outLocal) == kOutputUsed)) { - return DataTypeSpecific; + // If the output on the stack is simply popped, then we don't + // even care whether the type is ref-counted or not because + // the ref-count is transfered to the target location. + if (!instr->outStack || isPopped(instr->outStack, instr->next)) { + return DataTypeGeneric; } - return DataTypeGeneric; - } else { // old local value return DataTypeCountness; } - } - case OpCGetL : { - if (!instr->outStack || (instr->getOutputUsage(instr->outStack, true) == - NormalizedInstruction::OutputUsed)) { - return DataTypeSpecific; + if (opc == OpSetL) { + // old local value is dec-refed + assert(opndIdx == 1); + return DataTypeCountness; } + return DataTypeSpecific; + } + + case OpCGetL: return DataTypeCountnessInit; - } - case OpRetC : - case OpRetV : { + + case OpRetC: + case OpRetV: return DataTypeCountness; - } - default: return DataTypeSpecific; + + case OpFCall: + // Note: instead of pessimizing calls that may be inlined with + // DataTypeSpecific, we could apply the operand constraints of + // the callee in constrainDep. + return instr->calleeTrace ? DataTypeSpecific : DataTypeGeneric; + + case OpFCallArray: + return DataTypeGeneric; + + case OpPopC: + case OpPopV: + case OpPopR: + return DataTypeCountness; + + case OpPackCont: + case OpContRetC: + // The stack input is teleported to the continuation's m_value field + return opndIdx == 0 ? DataTypeGeneric : DataTypeSpecific; + + case OpContHandle: + // This always calls the interpreter + return DataTypeGeneric; + + case OpAddElemC: + // The stack input is teleported to the array + return opndIdx == 0 ? DataTypeGeneric : DataTypeSpecific; + + case OpArrayIdx: + // The default value (w/ opndIdx 0) is simply passed to a helper, + // which takes care of dec-refing it if needed + return opndIdx == 0 ? DataTypeGeneric : DataTypeSpecific; + + default: + return DataTypeSpecific; } } - -GuardType Translator::getOperandConstraintType(NormalizedInstruction* instr, - size_t opndIdx, - const GuardType& specType) { +GuardType +Translator::getOperandConstraintType(NormalizedInstruction* instr, + size_t opndIdx, + const GuardType& specType) { DataTypeCategory dtCategory = getOperandConstraintCategory(instr, opndIdx); switch (dtCategory) { case DataTypeGeneric: return GuardType(KindOfAny); @@ -2787,21 +2850,144 @@ void Translator::constrainOperandType(GuardType& relxType, const GuardType& specType) { if (relxType.isSpecific()) return; // Can't constrain any further - switch (instr->op()) { - case OpRetC : - case OpRetV : - case OpCGetL : - case OpSetL : - { - GuardType consType = getOperandConstraintType(instr, opndIdx, specType); - if (consType.isMoreRefinedThan(relxType)) { - relxType = consType; + GuardType consType = getOperandConstraintType(instr, opndIdx, specType); + if (consType.isMoreRefinedThan(relxType)) { + relxType = consType; + } +} + +/* + * This method looks at every use of loc in the stream of instructions + * starting at firstInstr and constrains the relxType towards specType + * according to each use. Note that this method not only looks at + * direct uses of loc, but it also recursively looks at any other + * DynLocs whose type depends on loc's type. + */ +void Translator::constrainDep(const DynLocation* loc, + NormalizedInstruction* firstInstr, + GuardType specType, + GuardType& relxType) { + if (relxType.isSpecific()) return; // can't contrain it any further + + for (NormalizedInstruction* instr = firstInstr; instr; instr = instr->next) { + if (instr->noOp) continue; + Opcode opc = instr->op(); + size_t nInputs = instr->inputs.size(); + for (size_t i = 0; i < nInputs; i++) { + DynLocation* usedLoc = instr->inputs[i]; + if (usedLoc == loc) { + constrainOperandType(relxType, instr, i, specType); + + // If the instruction's input doesn't propagate to its output, + // then we're done. Otherwise, we need to constrain relxType + // based on the uses of the output. + if (!outputDependsOnInput(opc)) continue; + + bool outputIsStackInput = false; + const DynLocation* outStack = instr->outStack; + const DynLocation* outLocal = instr->outLocal; + + switch (instrInfo[opc].type) { + case OutSameAsInput: + outputIsStackInput = true; + break; + + case OutCInput: + outputIsStackInput = true; + // fall-through + case OutCInputL: + if (specType.getOuterType() == KindOfRef && + instr->isAnyOutputUsed()) { + // Value gets unboxed along the way. Pessimize it for now. + relxType = specType; + return; + } + break; + + default: + relxType = specType; + return; + } + + // The instruction input's type propagates to the outputs. + // So constrain the dependence further based on uses of outputs. + if ((i == 0 && outputIsStackInput) || // stack input @ [0] + (i == nInputs - 1 && !outputIsStackInput)) { // local input is last + if (outStack && !outStack->rtt.isVagueValue()) { + // For SetL, getOperandConstraintCategory() generates + // DataTypeGeneric if the stack output is popped. In this + // case, don't further constrain the stack output, + // otherwise the Pop* would make it a DataTypeCountness. + if (opc != OpSetL || !relxType.isGeneric()) { + constrainDep(outStack, instr->next, specType, relxType); + } + } + if (outLocal && !outLocal->rtt.isVagueValue()) { + constrainDep(outLocal, instr->next, specType, relxType); + } + } } - return; } - default: { - relxType = specType; - return; + } +} + +/** + * Propagates the relaxed type relxType for loc to all its consumers, + * starting at firstInstr. + */ +void Translator::propagateRelaxedType(Tracelet& tclet, + NormalizedInstruction* firstInstr, + DynLocation* loc, + const GuardType& relxType) { + assert(relxType.isRelaxed()); + + for (NormalizedInstruction* instr = firstInstr; instr; instr = instr->next) { + if (instr->noOp) continue; + Opcode opc = instr->op(); + size_t nInputs = instr->inputs.size(); + for (size_t i = 0; i < nInputs; i++) { + DynLocation* usedLoc = instr->inputs[i]; + if (usedLoc == loc) { + auto outKind = instrInfo[opc].type; + + // stack input propagates to outputs + bool outputIsStackInput = (i == 0 && // stack input is inputs[0] + (outKind == OutSameAsInput || outKind == OutCInput)); + bool outputIsLocalInput = (i == nInputs - 1 && // local input is last + outKind == OutCInputL); + bool outputIsInput = outputIsStackInput || outputIsLocalInput; + + if (outputIsInput) { + // if instr has outStack, update its type and propagate to consumers + if (instr->outStack) { + + TRACE(6, + "propagateRelaxedType: Loc: %s oldType: %s => newType: %s\n", + instr->outStack->location.pretty().c_str(), + instr->outStack->rtt.pretty().c_str(), + RuntimeType(relxType.getOuterType(), + relxType.getInnerType()).pretty().c_str()); + + instr->outStack->rtt = RuntimeType(relxType.getOuterType(), + relxType.getInnerType()); + propagateRelaxedType(tclet, instr->next, instr->outStack, relxType); + } + // if instr has outLocal, update its type and propagate to consumers + if (instr->outLocal) { + + TRACE(6, + "propagateRelaxedType: Loc: %s oldType: %s => newType: %s\n", + instr->outLocal->location.pretty().c_str(), + instr->outLocal->rtt.pretty().c_str(), + RuntimeType(relxType.getOuterType(), + relxType.getInnerType()).pretty().c_str()); + + instr->outLocal->rtt = RuntimeType(relxType.getOuterType(), + relxType.getInnerType()); + propagateRelaxedType(tclet, instr->next, instr->outLocal, relxType); + } + } + } } } } @@ -2812,7 +2998,6 @@ void Translator::constrainOperandType(GuardType& relxType, */ void Translator::relaxDeps(Tracelet& tclet, TraceletContext& tctxt) { DynLocTypeMap locRelxTypeMap; - DynLocTypeMap locSpecTypeMap; // Initialize type maps. Relaxed types start off very relaxed, and then // they may get more specific depending on how the instructions use them. @@ -2820,23 +3005,12 @@ void Translator::relaxDeps(Tracelet& tclet, TraceletContext& tctxt) { for (auto depIt = deps.begin(); depIt != deps.end(); depIt++) { DynLocation* loc = depIt->second; const RuntimeType& rtt = depIt->second->rtt; - if (rtt.isValue() && !rtt.isVagueValue() && !loc->location.isThis()) { - locSpecTypeMap[loc] = GuardType(rtt); - locRelxTypeMap[loc] = GuardType(KindOfAny); - } - } - - // Process the instruction stream, constraining the relaxed types - // along the way - for (NormalizedInstruction* instr = tclet.m_instrStream.first; instr; - instr = instr->next) { - for (size_t i = 0; i < instr->inputs.size(); i++) { - DynLocation* loc = instr->inputs[i]; - auto it = locRelxTypeMap.find(loc); - if (it != locRelxTypeMap.end()) { - GuardType& relxType = it->second; - constrainOperandType(relxType, instr, i, locSpecTypeMap[loc]); - } + if (rtt.isValue() && !rtt.isVagueValue() && !rtt.isClass() && + !loc->location.isThis()) { + GuardType relxType = GuardType(KindOfAny); + GuardType specType = GuardType(rtt); + constrainDep(loc, tclet.m_instrStream.first, specType, relxType); + locRelxTypeMap[loc] = relxType; } } @@ -2855,6 +3029,7 @@ void Translator::relaxDeps(Tracelet& tclet, TraceletContext& tctxt) { assert(relxType.getOuterType() != KindOfInvalid); deps[loc->location]->rtt = RuntimeType(relxType.getOuterType(), relxType.getInnerType()); + propagateRelaxedType(tclet, tclet.m_instrStream.first, loc, relxType); } } } diff --git a/hphp/runtime/vm/translator/translator.h b/hphp/runtime/vm/translator/translator.h index a5329830b..bc5520de9 100644 --- a/hphp/runtime/vm/translator/translator.h +++ b/hphp/runtime/vm/translator/translator.h @@ -406,7 +406,9 @@ class NormalizedInstruction { OutputInferred, OutputDoesntCare }; - OutputUse getOutputUsage(DynLocation* output, bool ignorePops = false) const; + OutputUse getOutputUsage(const DynLocation* output) const; + bool isOutputUsed(const DynLocation* output) const; + bool isAnyOutputUsed() const; std::string toString() const; }; @@ -784,6 +786,14 @@ private: int& currentStackOffset, bool& varEnvTaint); void relaxDeps(Tracelet& tclet, TraceletContext& tctxt); + void propagateRelaxedType(Tracelet& tclet, + NormalizedInstruction* firstInstr, + DynLocation* loc, + const GuardType& relxType); + void constrainDep(const DynLocation* loc, + NormalizedInstruction* firstInstr, + GuardType specType, + GuardType& relxType); DataTypeCategory getOperandConstraintCategory(NormalizedInstruction* instr, size_t opndIdx); GuardType getOperandConstraintType(NormalizedInstruction* instr,