diff --git a/hphp/doc/ir.specification b/hphp/doc/ir.specification index b698cdfb1..df8c1f4df 100755 --- a/hphp/doc/ir.specification +++ b/hphp/doc/ir.specification @@ -440,6 +440,12 @@ CheckDefinedClsEq -> L `classPtr'; if they aren't equal or if `className' is not defined, branch to L. +CheckCold -> L + + Check if the counter associated with translation TransID is cold + (i.e. within a fixed threshold). If it's not (i.e. such translation + has reached the "hotness threshold"), then branch to label L. + GuardRefs S0:FuncPtr S1:Int S2:Int S3:Int S4:Int S5:Int Perform reffiness guard checks. Operands: @@ -1223,6 +1229,12 @@ ReqRetranslate This instruction is used in exit traces for a type prediction that occurs at the first bytecode offset of a tracelet. +ReqRetranslateOpt + + Emit a service request to retranslate, with a higher optimization + gear, translation transID, which starts at bcOff. This instruction + is used in exit traces that trigger profile-guided optimizations. + ReqBindJmpGt ReqBindJmpGte ReqBindJmpLt diff --git a/hphp/runtime/base/program_functions.cpp b/hphp/runtime/base/program_functions.cpp index 69236dee6..57d540af7 100644 --- a/hphp/runtime/base/program_functions.cpp +++ b/hphp/runtime/base/program_functions.cpp @@ -533,7 +533,7 @@ void execute_command_line_begin(int argc, char **argv, int xhprof) { void execute_command_line_end(int xhprof, bool coverage, const char *program) { ThreadInfo *ti = ThreadInfo::s_threadInfo.getNoCheck(); - if (RuntimeOption::EvalJit && RuntimeOption::EvalDumpTC) { + if (RuntimeOption::EvalDumpTC) { HPHP::Transl::tc_dump(); } diff --git a/hphp/runtime/base/runtime_option.cpp b/hphp/runtime/base/runtime_option.cpp index 8190606e3..b707b2988 100644 --- a/hphp/runtime/base/runtime_option.cpp +++ b/hphp/runtime/base/runtime_option.cpp @@ -427,8 +427,8 @@ EVALFLAGS(); std::set RuntimeOption::DynamicInvokeFunctions; bool RuntimeOption::RecordCodeCoverage = false; std::string RuntimeOption::CodeCoverageOutputFile; -size_t RuntimeOption::VMTranslAHotSize = 2 << 20; -size_t RuntimeOption::VMTranslASize = 510 << 20; +size_t RuntimeOption::VMTranslAHotSize = 4 << 20; +size_t RuntimeOption::VMTranslASize = 508 << 20; size_t RuntimeOption::VMTranslAStubsSize = 512 << 20; size_t RuntimeOption::VMTranslGDataSize = RuntimeOption::VMTranslASize >> 2; diff --git a/hphp/runtime/base/runtime_option.h b/hphp/runtime/base/runtime_option.h index 1292c14a7..761b200a1 100644 --- a/hphp/runtime/base/runtime_option.h +++ b/hphp/runtime/base/runtime_option.h @@ -445,9 +445,12 @@ public: F(bool, HHIRPredictionOpts, true) \ F(bool, HHIRStressCodegenBlocks, false) \ F(string, JitRegionSelector, regionSelectorDefault()) \ + F(bool, JitPGO, false) \ + F(uint64_t, JitPGOThreshold, 2) \ /* DumpBytecode =1 dumps user php, =2 dumps systemlib & user php */ \ F(int32_t, DumpBytecode, 0) \ F(bool, DumpTC, false) \ + F(bool, DumpTCAnchors, false) \ F(bool, DumpAst, false) \ F(bool, MapTCHuge, true) \ F(uint32_t, TCNumHugeHotMB, 16) \ diff --git a/hphp/runtime/base/types.h b/hphp/runtime/base/types.h index fada97bee..9489a2212 100644 --- a/hphp/runtime/base/types.h +++ b/hphp/runtime/base/types.h @@ -466,6 +466,7 @@ const Id kInvalidId = Id(-1); // offsets. typedef int32_t Offset; constexpr Offset kInvalidOffset = std::numeric_limits::max(); +typedef hphp_hash_set OffsetSet; /* * Various fields in the VM's runtime have indexes that are addressed diff --git a/hphp/runtime/vm/func.cpp b/hphp/runtime/vm/func.cpp index c4977e745..898b62686 100644 --- a/hphp/runtime/vm/func.cpp +++ b/hphp/runtime/vm/func.cpp @@ -666,6 +666,19 @@ void Func::getFuncInfo(ClassInfo::MethodInfo* mi) const { } } +DVFuncletsVec Func::getDVFunclets() const { + DVFuncletsVec dvs; + int nParams = numParams(); + for (int i = 0; i < nParams; ++i) { + const ParamInfo& pi = params()[i]; + if (pi.hasDefaultValue()) { + dvs.push_back(std::make_pair(i, pi.funcletOff())); + } + } + return dvs; +} + + Func::SharedData::SharedData(PreClass* preClass, Id id, Offset base, Offset past, int line1, int line2, bool top, const StringData* docComment) diff --git a/hphp/runtime/vm/func.h b/hphp/runtime/vm/func.h index cf9c6c770..3a6509d5a 100644 --- a/hphp/runtime/vm/func.h +++ b/hphp/runtime/vm/func.h @@ -37,6 +37,11 @@ class PreClassEmitter; typedef uint32_t FuncId; constexpr FuncId InvalidFuncId = FuncId(-1LL); +/* + * Vector of pairs (param number, offset of corresponding DV funclet). + */ +typedef std::vector > DVFuncletsVec; + /* * Metadata about a php function or object method. */ @@ -248,6 +253,7 @@ struct Func { HphpArray* getStaticLocals() const; void getFuncInfo(ClassInfo::MethodInfo* mi) const; + DVFuncletsVec getDVFunclets() const; Unit* unit() const { return m_unit; } PreClass* preClass() const { return shared()->m_preClass; } diff --git a/hphp/runtime/vm/jit/abi-x64.h b/hphp/runtime/vm/jit/abi-x64.h index 0cca74554..df7241bbb 100644 --- a/hphp/runtime/vm/jit/abi-x64.h +++ b/hphp/runtime/vm/jit/abi-x64.h @@ -227,6 +227,12 @@ const int kNumServiceReqArgRegs = */ \ REQ(RETRANSLATE) \ \ + /* + * When PGO is enabled, this retranslates previous translations leveraging + * profiling data. + */ \ + REQ(RETRANSLATE_OPT) \ + \ /* * If the max translations is reached for a SrcKey, the last * translation in the chain will jump to an interpret request stub. diff --git a/hphp/runtime/vm/jit/code-gen.cpp b/hphp/runtime/vm/jit/code-gen.cpp index 56e2e0766..48de69459 100755 --- a/hphp/runtime/vm/jit/code-gen.cpp +++ b/hphp/runtime/vm/jit/code-gen.cpp @@ -782,7 +782,11 @@ void CodeGenerator::emitReqBindJcc(ConditionCode cc, extra->notTaken, cc, m_tx64->ccArgInfo(cc)); + + tx64->setJmpTransID(a.frontier()); a. jcc (cc, jccStub); + + tx64->setJmpTransID(a.frontier()); a. jmp (jccStub); } @@ -2580,6 +2584,8 @@ void CodeGenerator::cgRetCtrl(IRInstruction* inst) { void CodeGenerator::emitReqBindAddr(const Func* func, TCA& dest, Offset offset) { + tx64->setJmpTransID((TCA)&dest); + dest = m_tx64->emitServiceReq(REQ_BIND_ADDR, &dest, offset); @@ -2600,6 +2606,8 @@ void CodeGenerator::cgJmpSwitchDest(IRInstruction* inst) { TCA def = m_tx64->emitServiceReq(REQ_BIND_JMPCC_SECOND, m_as.frontier(), data->defaultOff, CC_AE); + tx64->setJmpTransID(m_as.frontier()); + m_as. jae(def); } @@ -2913,6 +2921,12 @@ void CodeGenerator::cgReqRetranslateNoIR(IRInstruction* inst) { m_tx64->emitReqRetransNoIR(m_as, dest); } +void CodeGenerator::cgReqRetranslateOpt(IRInstruction* inst) { + auto extra = inst->extra(); + auto sk = SrcKey(curFunc(), extra->offset); + m_tx64->emitReqRetransOpt(m_as, sk, extra->transId); +} + void CodeGenerator::cgReqRetranslate(IRInstruction* inst) { auto const destSK = SrcKey(curFunc(), m_curTrace->bcOff()); auto const destSR = m_tx64->getSrcRec(destSK); @@ -5251,6 +5265,16 @@ void CodeGenerator::cgExitOnVarEnv(IRInstruction* inst) { emitFwdJcc(CC_NE, label); } +void CodeGenerator::cgCheckCold(IRInstruction* inst) { + Block* label = inst->taken(); + TransID transId = inst->extra()->transId; + auto counterAddr = m_tx64->profData()->transCounterAddr(transId); + + emitLoadImm(m_as, uint64_t(counterAddr), m_rScratch); + m_as.decq(m_rScratch[0]); + emitFwdJcc(CC_LE, label); +} + void CodeGenerator::cgReleaseVVOrExit(IRInstruction* inst) { auto* const label = inst->taken(); auto const rFp = m_regs[inst->src(0)].reg(); diff --git a/hphp/runtime/vm/jit/dce.cpp b/hphp/runtime/vm/jit/dce.cpp index 6c25c33cb..4e4048b95 100644 --- a/hphp/runtime/vm/jit/dce.cpp +++ b/hphp/runtime/vm/jit/dce.cpp @@ -493,7 +493,7 @@ void consumeIncRef(const IRInstruction* consumer, const SSATmp* src, if ((srcInst->op() == CheckType || srcInst->op() == AssertType) && srcInst->typeParam().maybeCounted()) { // srcInst is a CheckType/AsserType that guards to a refcounted type. We - // need to trace through to its source. If the instruciton guards to a + // need to trace through to its source. If the instruction guards to a // non-refcounted type then the reference is consumed by CheckType itself. consumeIncRef(consumer, srcInst->src(0), state); return; diff --git a/hphp/runtime/vm/jit/extra-data.h b/hphp/runtime/vm/jit/extra-data.h index f46cc5f75..83f5ebd9f 100644 --- a/hphp/runtime/vm/jit/extra-data.h +++ b/hphp/runtime/vm/jit/extra-data.h @@ -18,6 +18,7 @@ #define incl_HPHP_VM_EXTRADATA_H_ #include "hphp/runtime/vm/jit/ir.h" +#include "hphp/runtime/vm/jit/types.h" namespace HPHP { namespace JIT { @@ -255,6 +256,29 @@ struct BCOffset : IRExtraData { Offset offset; }; +/* + * Translation IDs. + */ +struct TransIDData : IRExtraData { + explicit TransIDData(Transl::TransID transId) : transId(transId) {} + std::string show() const { return folly::to(transId); } + Transl::TransID transId; +}; + +/* + * Information needed to generate a REQ_RETRANSLATE_OPT service request. + */ +struct ReqRetransOptData : IRExtraData { + explicit ReqRetransOptData(Transl::TransID transId, Offset offset) + : transId(transId) + , offset(offset) {} + std::string show() const { + return folly::to(transId, ", ", offset); + } + Transl::TransID transId; + Offset offset; +}; + /* * DefInlineFP is present when we need to create a frame for inlining. * This instruction also carries some metadata used by tracebuilder to @@ -401,6 +425,8 @@ X(DefInlineFP, DefInlineFPData); X(ReqBindJmp, BCOffset); X(ReqBindJmpNoIR, BCOffset); X(ReqRetranslateNoIR, BCOffset); +X(ReqRetranslateOpt, ReqRetransOptData); +X(CheckCold, TransIDData); X(CallArray, CallArrayData); X(LdClsCns, ClsCnsName); X(LookupClsCns, ClsCnsName); diff --git a/hphp/runtime/vm/jit/hhbc-translator.cpp b/hphp/runtime/vm/jit/hhbc-translator.cpp index 58050c72c..431e0bc9a 100755 --- a/hphp/runtime/vm/jit/hhbc-translator.cpp +++ b/hphp/runtime/vm/jit/hhbc-translator.cpp @@ -868,14 +868,15 @@ void HhbcTranslator::emitReqDoc(const StringData* name) { } template -SSATmp* HhbcTranslator::emitIterInitCommon(int offset, Lambda genFunc) { +SSATmp* HhbcTranslator::emitIterInitCommon(int offset, Lambda genFunc, + bool invertCond) { SSATmp* src = popC(); Type type = src->type(); if (!type.isArray() && type != Type::Obj) { PUNT(IterInit); } SSATmp* res = genFunc(src); - return emitJmpCondHelper(offset, true, res); + return emitJmpCondHelper(offset, !invertCond, res); } template @@ -895,39 +896,40 @@ SSATmp* HhbcTranslator::emitMIterInitCommon(int offset, Lambda genFunc) { void HhbcTranslator::emitIterInit(uint32_t iterId, int offset, - uint32_t valLocalId) { + uint32_t valLocalId, + bool invertCond) { emitIterInitCommon(offset, [&] (SSATmp* src) { - return gen( - IterInit, - Type::Bool, - src, - m_tb->fp(), - cns(iterId), - cns(valLocalId) - ); - }); + return gen(IterInit, + Type::Bool, + src, + m_tb->fp(), + cns(iterId), + cns(valLocalId)); + }, + invertCond); } void HhbcTranslator::emitIterInitK(uint32_t iterId, int offset, uint32_t valLocalId, - uint32_t keyLocalId) { + uint32_t keyLocalId, + bool invertCond) { emitIterInitCommon(offset, [&] (SSATmp* src) { - return gen( - IterInitK, - Type::Bool, - src, - m_tb->fp(), - cns(iterId), - cns(valLocalId), - cns(keyLocalId) - ); - }); + return gen(IterInitK, + Type::Bool, + src, + m_tb->fp(), + cns(iterId), + cns(valLocalId), + cns(keyLocalId)); + }, + invertCond); } void HhbcTranslator::emitIterNext(uint32_t iterId, int offset, - uint32_t valLocalId) { + uint32_t valLocalId, + bool invertCond) { SSATmp* res = gen( IterNext, Type::Bool, @@ -935,13 +937,14 @@ void HhbcTranslator::emitIterNext(uint32_t iterId, cns(iterId), cns(valLocalId) ); - emitJmpCondHelper(offset, false, res); + emitJmpCondHelper(offset, invertCond, res); } void HhbcTranslator::emitIterNextK(uint32_t iterId, int offset, uint32_t valLocalId, - uint32_t keyLocalId) { + uint32_t keyLocalId, + bool invertCond) { SSATmp* res = gen( IterNextK, Type::Bool, @@ -950,48 +953,47 @@ void HhbcTranslator::emitIterNextK(uint32_t iterId, cns(valLocalId), cns(keyLocalId) ); - emitJmpCondHelper(offset, false, res); + emitJmpCondHelper(offset, invertCond, res); } void HhbcTranslator::emitWIterInit(uint32_t iterId, int offset, - uint32_t valLocalId) { + uint32_t valLocalId, + bool invertCond) { emitIterInitCommon( offset, [&] (SSATmp* src) { - return gen( - WIterInit, - Type::Bool, - src, - m_tb->fp(), - cns(iterId), - cns(valLocalId) - ); - } - ); + return gen(WIterInit, + Type::Bool, + src, + m_tb->fp(), + cns(iterId), + cns(valLocalId)); + }, + invertCond); } void HhbcTranslator::emitWIterInitK(uint32_t iterId, int offset, uint32_t valLocalId, - uint32_t keyLocalId) { + uint32_t keyLocalId, + bool invertCond) { emitIterInitCommon( offset, [&] (SSATmp* src) { - return gen( - WIterInitK, - Type::Bool, - src, - m_tb->fp(), - cns(iterId), - cns(valLocalId), - cns(keyLocalId) - ); - } - ); + return gen(WIterInitK, + Type::Bool, + src, + m_tb->fp(), + cns(iterId), + cns(valLocalId), + cns(keyLocalId)); + }, + invertCond); } void HhbcTranslator::emitWIterNext(uint32_t iterId, int offset, - uint32_t valLocalId) { + uint32_t valLocalId, + bool invertCond) { SSATmp* res = gen( WIterNext, Type::Bool, @@ -999,13 +1001,14 @@ void HhbcTranslator::emitWIterNext(uint32_t iterId, cns(iterId), cns(valLocalId) ); - emitJmpCondHelper(offset, false, res); + emitJmpCondHelper(offset, invertCond, res); } void HhbcTranslator::emitWIterNextK(uint32_t iterId, int offset, uint32_t valLocalId, - uint32_t keyLocalId) { + uint32_t keyLocalId, + bool invertCond) { SSATmp* res = gen( WIterNextK, Type::Bool, @@ -1014,7 +1017,7 @@ void HhbcTranslator::emitWIterNextK(uint32_t iterId, cns(valLocalId), cns(keyLocalId) ); - emitJmpCondHelper(offset, false, res); + emitJmpCondHelper(offset, invertCond, res); } void HhbcTranslator::emitMIterInit(uint32_t iterId, @@ -1434,6 +1437,10 @@ void HhbcTranslator::emitIncTransCounter() { m_tb->gen(IncTransCounter); } +void HhbcTranslator::emitCheckCold(TransID transId) { + m_tb->gen(CheckCold, getExitOptTrace(transId), TransIDData(transId)); +} + SSATmp* HhbcTranslator::getStrName(const StringData* knownName) { SSATmp* name = popC(); assert(name->isA(Type::Str) || knownName); @@ -2589,7 +2596,7 @@ void HhbcTranslator::guardTypeLocal(uint32_t locId, Type type) { void HhbcTranslator::guardTypeLocation(const RegionDesc::Location& loc, Type type) { - assert(type.subtypeOf(Type::Gen | Type::Cls)); + assert(type.subtypeOf(Type::Gen)); typedef RegionDesc::Location::Tag T; switch (loc.tag()) { case T::Stack: guardTypeStack(loc.stackOffset(), type); break; @@ -2631,12 +2638,7 @@ void HhbcTranslator::assertTypeLocation(const RegionDesc::Location& loc, } void HhbcTranslator::guardTypeStack(uint32_t stackIndex, Type type) { - // Should not generate guards for class; instead assert their type - if (type.subtypeOf(Type::Cls)) { - assertTypeStack(stackIndex, type); - return; - } - + assert(type.subtypeOf(Type::Gen)); assert(m_evalStack.size() == 0); assert(m_stackDeficit == 0); // This should only be called at the beginning // of a trace, with a clean stack. @@ -2644,6 +2646,7 @@ void HhbcTranslator::guardTypeStack(uint32_t stackIndex, Type type) { } void HhbcTranslator::checkTypeStack(uint32_t idx, Type type, Offset dest) { + assert(type.subtypeOf(Type::Gen)); auto exitTrace = getExitTrace(dest); if (idx < m_evalStack.size()) { FTRACE(1, "checkTypeStack(){}: generating CheckType for {}\n", @@ -3907,15 +3910,15 @@ IRTrace* HhbcTranslator::getExitTrace(Offset targetBcOff /* = -1 */) { IRTrace* HhbcTranslator::getExitTrace(Offset targetBcOff, std::vector& spillValues) { if (targetBcOff == -1) targetBcOff = bcOff(); - return getExitTraceImpl(targetBcOff, ExitFlag::None, spillValues, - CustomExit{}); + return getExitTraceImpl(targetBcOff, ExitFlag::JIT, spillValues, + CustomExit{}); } IRTrace* HhbcTranslator::getExitTraceWarn(Offset targetBcOff, std::vector& spillValues, const StringData* warning) { assert(targetBcOff != -1); - return getExitTraceImpl(targetBcOff, ExitFlag::None, spillValues, + return getExitTraceImpl(targetBcOff, ExitFlag::JIT, spillValues, [&]() -> SSATmp* { gen(RaiseWarning, cns(warning)); return nullptr; @@ -3934,8 +3937,36 @@ IRTrace* HhbcTranslator::makeSideExit(Offset targetBcOff, ExitLambda exit) { IRTrace* HhbcTranslator::getExitSlowTrace() { auto spillValues = peekSpillValues(); - return getExitTraceImpl(bcOff(), ExitFlag::NoIR, spillValues, - CustomExit{}); + return getExitTraceImpl(bcOff(), ExitFlag::Interp, spillValues, + CustomExit{}); +} + +IRTrace* HhbcTranslator::getExitOptTrace(TransID transId) { + auto spillValues = peekSpillValues(); + Offset targetBcOff = bcOff(); + auto const exit = m_tb->makeExitTrace(targetBcOff); + + BCMarker exitMarker; + exitMarker.bcOff = targetBcOff; + exitMarker.spOff = m_tb->spOffset() + spillValues.size() - m_stackDeficit; + exitMarker.func = curFunc(); + + TracePusher tracePusher(*m_tb, exit, exitMarker); + + SSATmp* stack = nullptr; + if (m_stackDeficit != 0 || !spillValues.empty()) { + spillValues.insert(spillValues.begin(), + { m_tb->sp(), cns(int64_t(m_stackDeficit)) }); + stack = gen(SpillStack, + std::make_pair(spillValues.size(), &spillValues[0])); + } else { + stack = m_tb->sp(); + } + + gen(SyncABIRegs, m_tb->fp(), stack); + gen(ReqRetranslateOpt, ReqRetransOptData(transId, targetBcOff)); + + return exit; } IRTrace* HhbcTranslator::getExitTraceImpl(Offset targetBcOff, @@ -3985,7 +4016,7 @@ IRTrace* HhbcTranslator::getExitTraceImpl(Offset targetBcOff, gen(SyncABIRegs, m_tb->fp(), stack); - if (flag == ExitFlag::NoIR) { + if (flag == ExitFlag::Interp) { gen(targetBcOff == m_startBcOff ? ReqRetranslateNoIR : ReqBindJmpNoIR, BCOffset(targetBcOff)); return exit; @@ -3996,7 +4027,6 @@ IRTrace* HhbcTranslator::getExitTraceImpl(Offset targetBcOff, } else { gen(ReqBindJmp, BCOffset(targetBcOff)); } - return exit; } diff --git a/hphp/runtime/vm/jit/hhbc-translator.h b/hphp/runtime/vm/jit/hhbc-translator.h index cfcd5fae4..2f2047bdf 100755 --- a/hphp/runtime/vm/jit/hhbc-translator.h +++ b/hphp/runtime/vm/jit/hhbc-translator.h @@ -355,16 +355,24 @@ struct HhbcTranslator { void emitReqDoc(const StringData* name); // iterators - void emitIterInit(uint32_t iterId, int targetOffset, uint32_t valLocalId); + void emitIterInit(uint32_t iterId, + int targetOffset, + uint32_t valLocalId, + bool invertCond); void emitIterInitK(uint32_t iterId, int targetOffset, uint32_t valLocalId, - uint32_t keyLocalId); - void emitIterNext(uint32_t iterId, int targetOffset, uint32_t valLocalId); + uint32_t keyLocalId, + bool invertCond); + void emitIterNext(uint32_t iterId, + int targetOffset, + uint32_t valLocalId, + bool invertCond); void emitIterNextK(uint32_t iterId, int targetOffset, uint32_t valLocalId, - uint32_t keyLocalId); + uint32_t keyLocalId, + bool invertCond); void emitMIterInit(uint32_t iterId, int targetOffset, uint32_t valLocalId); void emitMIterInitK(uint32_t iterId, int targetOffset, @@ -375,16 +383,24 @@ struct HhbcTranslator { int targetOffset, uint32_t valLocalId, uint32_t keyLocalId); - void emitWIterInit(uint32_t iterId, int targetOffset, uint32_t valLocalId); + void emitWIterInit(uint32_t iterId, + int targetOffset, + uint32_t valLocalId, + bool invertCond); void emitWIterInitK(uint32_t iterId, int targetOffset, uint32_t valLocalId, - uint32_t keyLocalId); - void emitWIterNext(uint32_t iterId, int targetOffset, uint32_t valLocalId); + uint32_t keyLocalId, + bool invertCond); + void emitWIterNext(uint32_t iterId, + int targetOffset, + uint32_t valLocalId, + bool invertCond); void emitWIterNextK(uint32_t iterId, int targetOffset, uint32_t valLocalId, - uint32_t keyLocalId); + uint32_t keyLocalId, + bool invertCond); void emitIterFree(uint32_t iterId); void emitMIterFree(uint32_t iterId); @@ -414,6 +430,7 @@ struct HhbcTranslator { void emitStrlen(); void emitIncStat(int32_t counter, int32_t value, bool force = false); void emitIncTransCounter(); + void emitCheckCold(Transl::TransID transId); void emitArrayIdx(); private: @@ -682,7 +699,7 @@ private: SSATmp* emitIncDec(bool pre, bool inc, SSATmp* src); void emitBinaryArith(Opcode); template - SSATmp* emitIterInitCommon(int offset, Lambda genFunc); + SSATmp* emitIterInitCommon(int offset, Lambda genFunc, bool invertCond); BCMarker makeMarker(Offset bcOff); void updateMarker(); template @@ -724,6 +741,7 @@ private: // Exit trace creation routines. */ IRTrace* getExitSlowTrace(); IRTrace* getCatchTrace(); + IRTrace* getExitOptTrace(Transl::TransID transId); /* * Implementation for the above. Takes spillValues, target offset, @@ -734,16 +752,15 @@ private: // Exit trace creation routines. * on the stack before exiting. */ enum class ExitFlag { - None, - NoIR, - + Interp, // will bail to the interpreter to execute at least one BC instr + JIT, // will attempt to use the JIT to create a new translation // DelayedMarker means to use the current instruction marker // instead of one for targetBcOff. DelayedMarker, }; typedef std::function CustomExit; IRTrace* getExitTraceImpl(Offset targetBcOff, - ExitFlag noIRExit, + ExitFlag flag, std::vector& spillValues, const CustomExit&); diff --git a/hphp/runtime/vm/jit/ir-translator.cpp b/hphp/runtime/vm/jit/ir-translator.cpp index 39fd9409a..e5f97fa0c 100644 --- a/hphp/runtime/vm/jit/ir-translator.cpp +++ b/hphp/runtime/vm/jit/ir-translator.cpp @@ -117,11 +117,16 @@ void IRTranslator::checkType(const Transl::Location& l, using Transl::Location; switch (l.space) { - case Location::Stack: - m_hhbcTrans.guardTypeStack(locPhysicalOffset(l), - Type::fromRuntimeType(rtt)); + case Location::Stack: { + uint32_t stackOffset = locPhysicalOffset(l); + JIT::Type type = JIT::Type::fromRuntimeType(rtt); + if (type.subtypeOf(Type::Cls)) { + m_hhbcTrans.assertTypeStack(stackOffset, type); + } else { + m_hhbcTrans.guardTypeStack(stackOffset, type); + } break; - + } case Location::Local: m_hhbcTrans.guardTypeLocal(l.offset, Type::fromRuntimeType(rtt)); break; @@ -266,12 +271,27 @@ void IRTranslator::translateBranchOp(const NormalizedInstruction& i) { auto const op = i.op(); assert(op == OpJmpZ || op == OpJmpNZ); - assert(!i.next); + Offset takenOffset = i.offset() + i.imm[0].u_BA; + Offset fallthruOffset = i.offset() + instrLen((Op*)(i.pc())); + assert(i.breaksTracelet || + i.nextOffset == takenOffset || + i.nextOffset == fallthruOffset); + + if (i.breaksTracelet || i.nextOffset == fallthruOffset) { + if (op == OpJmpZ) { + HHIR_EMIT(JmpZ, takenOffset); + } else { + HHIR_EMIT(JmpNZ, takenOffset); + } + return; + } + assert(i.nextOffset == takenOffset); + // invert the branch if (op == OpJmpZ) { - HHIR_EMIT(JmpZ, i.offset() + i.imm[0].u_BA); + HHIR_EMIT(JmpNZ, fallthruOffset); } else { - HHIR_EMIT(JmpNZ, i.offset() + i.imm[0].u_BA); + HHIR_EMIT(JmpZ, fallthruOffset); } } @@ -1382,40 +1402,78 @@ IRTranslator::translateInstanceOfD(const NormalizedInstruction& i) { HHIR_EMIT(InstanceOfD, (i.imm[0].u_SA)); } +/* + * This function returns the offset of instruction i's branch target. + * This is normally the offset corresponding to the branch being + * taken. However, if i does not break a trace and it's followed in + * the trace by the instruction in the taken branch, then this + * function returns the offset of the i's fall-through instruction. + * In that case, the invertCond output argument is set to true; + * otherwise it's set to false. + */ +static Offset getBranchTarget(const NormalizedInstruction& i, + bool& invertCond) { + assert(instrJumpOffset((Op*)(i.pc())) != nullptr); + Offset targetOffset = i.offset() + i.imm[1].u_BA; + invertCond = false; + + if (!i.breaksTracelet && i.nextOffset == targetOffset) { + invertCond = true; + Offset fallthruOffset = i.offset() + instrLen((Op*)i.pc()); + targetOffset = fallthruOffset; + } + + return targetOffset; +} + void IRTranslator::translateIterInit(const NormalizedInstruction& i) { + bool invertCond = false; + Offset targetOffset = getBranchTarget(i, invertCond); + HHIR_EMIT(IterInit, i.imm[0].u_IVA, - i.offset() + i.imm[1].u_BA, - i.imm[2].u_IVA); + targetOffset, + i.imm[2].u_IVA, + invertCond); } void IRTranslator::translateIterInitK(const NormalizedInstruction& i) { + bool invertCond = false; + Offset targetOffset = getBranchTarget(i, invertCond); + HHIR_EMIT(IterInitK, i.imm[0].u_IVA, - i.offset() + i.imm[1].u_BA, + targetOffset, i.imm[2].u_IVA, - i.imm[3].u_IVA); + i.imm[3].u_IVA, + invertCond); } void IRTranslator::translateIterNext(const NormalizedInstruction& i) { + bool invertCond = false; + Offset targetOffset = getBranchTarget(i, invertCond); HHIR_EMIT(IterNext, i.imm[0].u_IVA, - i.offset() + i.imm[1].u_BA, - i.imm[2].u_IVA); + targetOffset, + i.imm[2].u_IVA, + invertCond); } void IRTranslator::translateIterNextK(const NormalizedInstruction& i) { + bool invertCond = false; + Offset targetOffset = getBranchTarget(i, invertCond); HHIR_EMIT(IterNextK, i.imm[0].u_IVA, - i.offset() + i.imm[1].u_BA, + targetOffset, i.imm[2].u_IVA, - i.imm[3].u_IVA); + i.imm[3].u_IVA, + invertCond); } void @@ -1456,38 +1514,52 @@ IRTranslator::translateMIterNextK(const NormalizedInstruction& i) { void IRTranslator::translateWIterInit(const NormalizedInstruction& i) { + bool invertCond = false; + Offset targetOffset = getBranchTarget(i, invertCond); + HHIR_EMIT(WIterInit, i.imm[0].u_IVA, - i.offset() + i.imm[1].u_BA, - i.imm[2].u_IVA); + targetOffset, + i.imm[2].u_IVA, + invertCond); } void IRTranslator::translateWIterInitK(const NormalizedInstruction& i) { + bool invertCond = false; + Offset targetOffset = getBranchTarget(i, invertCond); + HHIR_EMIT(WIterInitK, i.imm[0].u_IVA, - i.offset() + i.imm[1].u_BA, + targetOffset, i.imm[2].u_IVA, - i.imm[3].u_IVA); + i.imm[3].u_IVA, + invertCond); } void IRTranslator::translateWIterNext(const NormalizedInstruction& i) { + bool invertCond = false; + Offset targetOffset = getBranchTarget(i, invertCond); HHIR_EMIT(WIterNext, i.imm[0].u_IVA, - i.offset() + i.imm[1].u_BA, - i.imm[2].u_IVA); + targetOffset, + i.imm[2].u_IVA, + invertCond); } void IRTranslator::translateWIterNextK(const NormalizedInstruction& i) { + bool invertCond = false; + Offset targetOffset = getBranchTarget(i, invertCond); HHIR_EMIT(WIterNextK, i.imm[0].u_IVA, - i.offset() + i.imm[1].u_BA, + targetOffset, i.imm[2].u_IVA, - i.imm[3].u_IVA); + i.imm[3].u_IVA, + invertCond); } void @@ -1613,6 +1685,8 @@ void IRTranslator::translateInstr(const NormalizedInstruction& i) { FTRACE(1, "\n{:-^60}\n", folly::format("translating {} with stack:\n{}", i.toString(), m_hhbcTrans.showStack())); + // When profiling, we disable type predictions to avoid side exits + assert(Transl::tx64->mode() != TransProfile || !i.outputPredicted); m_hhbcTrans.setBcOff(i.source.offset(), i.breaksTracelet && !m_hhbcTrans.isInlining()); diff --git a/hphp/runtime/vm/jit/ir.h b/hphp/runtime/vm/jit/ir.h index 345074e33..3bce1ce14 100755 --- a/hphp/runtime/vm/jit/ir.h +++ b/hphp/runtime/vm/jit/ir.h @@ -316,6 +316,7 @@ O(RaiseError, ND, S(Str), E|N|Mem|Refs|T|Er) \ O(RaiseWarning, ND, S(Str), E|N|Mem|Refs|Er) \ O(CheckInit, ND, S(Gen), NF) \ O(CheckInitMem, ND, S(PtrToGen) C(Int), NF) \ +O(CheckCold, ND, NA, E) \ O(AssertNonNull, DSubtract(0, Nullptr), S(Nullptr,CountedStr), NF) \ O(Unbox, DUnbox(0), S(Gen), NF) \ O(Box, DBox(0), S(Init), E|N|Mem|CRc|PRc) \ @@ -423,6 +424,7 @@ O(ExceptionBarrier, D(StkPtr), S(StkPtr), E) \ O(ReqBindJmp, ND, NA, T|E) \ O(ReqBindJmpNoIR, ND, NA, T|E) \ O(ReqRetranslateNoIR, ND, NA, T|E) \ +O(ReqRetranslateOpt, ND, NA, T|E) \ O(ReqRetranslate, ND, NA, T|E) \ O(SyncABIRegs, ND, S(FramePtr) S(StkPtr), E) \ O(Mov, DofS(0), SUnk, C|P) \ diff --git a/hphp/runtime/vm/jit/prof-data.cpp b/hphp/runtime/vm/jit/prof-data.cpp new file mode 100644 index 000000000..30793ae90 --- /dev/null +++ b/hphp/runtime/vm/jit/prof-data.cpp @@ -0,0 +1,233 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +#include "hphp/runtime/vm/jit/prof-data.h" + +#include +#include "hphp/util/base.h" +#include "hphp/runtime/vm/jit/translator.h" +#include "hphp/runtime/vm/jit/region-selection.h" + +namespace HPHP { +namespace JIT { + +static const Trace::Module TRACEMOD = Trace::pgo; + +using Transl::Tracelet; +using Transl::TransAnchor; +using Transl::TransProlog; +using Transl::TransProfile; + +/////////// Counters ////////// + +template +T ProfCounters::get(uint32_t id) const { + if (id / kCountersPerChunk >= m_chunks.size()) { + return m_initVal; + } + return m_chunks[id / kCountersPerChunk][id % kCountersPerChunk]; +} + +template +T* ProfCounters::getAddr(uint32_t id) { + // allocate a new chunk of counters if necessary + if (id >= m_chunks.size() * kCountersPerChunk) { + uint32_t size = sizeof(T) * kCountersPerChunk; + T* chunk = (T*)malloc(size); + std::fill_n(chunk, kCountersPerChunk, m_initVal); + m_chunks.push_back(chunk); + } + assert(id / kCountersPerChunk < m_chunks.size()); + return &(m_chunks[id / kCountersPerChunk][id % kCountersPerChunk]); +} + +/////////// ProfTransRec ////////// + +ProfTransRec::ProfTransRec(TransID id, + TransKind kind, + Offset lastBcOff, + const SrcKey& sk, + RegionDesc::BlockPtr block) + : m_id(id) + , m_kind(kind) + , m_lastBcOff(lastBcOff) + , m_block(block) + , m_sk(sk) { + assert(block == nullptr || block->start() == sk); +} + +ProfTransRec::ProfTransRec(TransID id, + TransKind kind, + const SrcKey& sk) + : m_id(id) + , m_kind(kind) + , m_lastBcOff(-1) + , m_block(nullptr) + , m_sk(sk) { + assert(kind == TransAnchor || kind == TransProlog); +} + +TransID ProfTransRec::transId() const { + return m_id; +} + +TransKind ProfTransRec::kind() const { + return m_kind; +} + +SrcKey ProfTransRec::srcKey() const { + return m_sk; +} + +Offset ProfTransRec::startBcOff() const { + return m_block->start().offset();; +} + +Offset ProfTransRec::lastBcOff() const { + return m_lastBcOff; +} + +Func* ProfTransRec::func() const { + return const_cast(m_block->func()); +} + +FuncId ProfTransRec::funcId() const { + return m_sk.getFuncId(); +} + +RegionDesc::BlockPtr ProfTransRec::block() const { + return m_block; +} + +/////////// ProfData ////////// + +ProfData::ProfData() + : m_numTrans(0) + , m_counters(RuntimeOption::EvalJitPGOThreshold) { +} + +uint32_t ProfData::numTrans() const { + return m_numTrans; +} + +TransID ProfData::curTransID() const { + return numTrans(); +} + +SrcKey ProfData::transSrcKey(TransID id) const { + assert(id < m_transRecs.size()); + return m_transRecs[id]->srcKey(); +} + +Offset ProfData::transStartBcOff(TransID id) const { + assert(id < m_transRecs.size()); + return m_transRecs[id]->startBcOff(); +} + +Offset ProfData::transLastBcOff(TransID id) const { + assert(id < m_transRecs.size()); + return m_transRecs[id]->lastBcOff(); +} + +Op* ProfData::transLastInstr(TransID id) const { + Unit* unit = transFunc(id)->unit(); + Offset lastBcOff = transLastBcOff(id); + return (Op*)(unit->at(lastBcOff)); +} + +Offset ProfData::transStopBcOff(TransID id) const { + Unit* unit = m_transRecs[id]->func()->unit(); + Offset lastBcOff = transLastBcOff(id); + return lastBcOff + instrLen((Op*)(unit->at(lastBcOff))); +} + +FuncId ProfData::transFuncId(TransID id) const { + assert(id < m_transRecs.size()); + return m_transRecs[id]->funcId(); +} + +Func* ProfData::transFunc(TransID id) const { + assert(id < m_transRecs.size()); + return m_transRecs[id]->func(); +} + +TransKind ProfData::transKind(TransID id) const { + assert(id < m_numTrans); + return m_transRecs[id]->kind(); +} + +int64_t ProfData::transCounter(TransID id) const { + assert(id < m_numTrans); + return m_counters.get(id); +} + +int64_t* ProfData::transCounterAddr(TransID id) { + return m_counters.getAddr(id); +} + +bool ProfData::optimized(const SrcKey& sk) const { + return mapContains(m_optimized, sk); +} + +void ProfData::setOptimized(const SrcKey& sk) { + m_optimized.insert(sk); +} + +RegionDesc::BlockPtr ProfData::transBlock(TransID id) const { + assert(id < m_transRecs.size()); + const ProfTransRec& pTransRec = *m_transRecs[id]; + return pTransRec.block(); +} + +/* + * Temporary work-around. + * + * TODO: get rid of this once translateRegion supports inlining + */ +static bool supportedTracelet(TransID transId, const Tracelet& tlet) { + for (auto instr = tlet.m_instrStream.first; instr; instr = instr->next) { + if (instr->calleeTrace) { + FTRACE(5, "supportedTracelet: unsupported {}: has inlining\n", transId); + return false; + } + } + + return true; +} + +TransID ProfData::addTrans(const Tracelet& tracelet, TransKind kind) { + TransID transId = m_numTrans++; + Offset lastBcOff = tracelet.m_instrStream.last->source.offset(); + auto block = kind == TransProfile && supportedTracelet(transId, tracelet) ? + createBlock(tracelet) : nullptr; + m_transRecs.emplace_back(new ProfTransRec(transId, kind, lastBcOff, + tracelet.m_sk, block)); + return transId; +} + +TransID ProfData::addTransProlog(const SrcKey& sk) { + TransID transId = m_numTrans++; + m_transRecs.emplace_back(new ProfTransRec(transId, TransProlog, sk)); + return transId; +} + +TransID ProfData::addTransAnchor(const SrcKey& sk) { + TransID transId = m_numTrans++; + m_transRecs.emplace_back(new ProfTransRec(transId, TransAnchor, sk)); + return transId; +} + +} } diff --git a/hphp/runtime/vm/jit/prof-data.h b/hphp/runtime/vm/jit/prof-data.h new file mode 100644 index 000000000..be4e12feb --- /dev/null +++ b/hphp/runtime/vm/jit/prof-data.h @@ -0,0 +1,137 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +#ifndef incl_HPHP_PROF_TRANS_DATA_H_ +#define incl_HPHP_PROF_TRANS_DATA_H_ + +#include + +#include "hphp/util/base.h" +#include "hphp/runtime/base/types.h" +#include "hphp/runtime/vm/func.h" +#include "hphp/runtime/vm/srckey.h" +#include "hphp/runtime/vm/jit/types.h" +#include "hphp/runtime/vm/jit/runtime-type.h" +#include "hphp/runtime/vm/jit/region-selection.h" + +namespace HPHP { +namespace JIT { + +using Transl::TransID; +using Transl::TransKind; +using Transl::Tracelet; + +/** + * A simple class of a growable number of profiling counters with + * fixed addresses, suitable for being incremented from the TC. + */ +template +class ProfCounters { + public: + explicit ProfCounters(T initVal) + : m_initVal(initVal) + {} + + ProfCounters(const ProfCounters&) = delete; + ProfCounters& operator=(const ProfCounters&) = delete; + + ~ProfCounters() { + for (size_t i = 0; i < m_chunks.size(); i++) { + free(m_chunks[i]); + } + } + + T get(uint32_t id) const; + T* getAddr(uint32_t id); + + private: + static const uint32_t kCountersPerChunk = 2 * 1024 * 1024 / sizeof(T); + + T m_initVal; + vector m_chunks; +}; + + +/** + * A profiling record kept for each translation in JitPGO mode. + */ +class ProfTransRec { + public: + ProfTransRec(TransID id, TransKind kind, Offset lastBcOff, const SrcKey& sk, + RegionDesc::BlockPtr block); + ProfTransRec(TransID id, TransKind kind, const SrcKey& sk); + + TransID transId() const; + TransKind kind() const; + SrcKey srcKey() const; + Offset startBcOff() const; + Offset lastBcOff() const; + Func* func() const; + FuncId funcId() const; + RegionDesc::BlockPtr block() const; + + private: + TransID m_id; // sequential ID of the assiciated translation + TransKind m_kind; + Offset m_lastBcOff; // offset of the last bytecode instr + RegionDesc::BlockPtr m_block; + SrcKey m_sk; +}; + +typedef std::unique_ptr ProfTransRecPtr; + +/** + * ProfData encapsulates the profiling data kept by the JIT. + */ +class ProfData { +public: + ProfData(); + + ProfData(const ProfData&) = delete; + ProfData& operator=(const ProfData&) = delete; + + TransID numTrans() const; + TransID curTransID() const; + + SrcKey transSrcKey(TransID id) const; + Offset transStartBcOff(TransID id) const; + Offset transLastBcOff(TransID id) const; + Op* transLastInstr(TransID id) const; + Offset transStopBcOff(TransID id) const; + FuncId transFuncId(TransID id) const; + Func* transFunc(TransID id) const; + RegionDesc::BlockPtr transBlock(TransID id) const; + TransKind transKind(TransID id) const; + int64_t transCounter(TransID id) const; + int64_t* transCounterAddr(TransID id); + + TransID addTrans(const Tracelet& tracelet, TransKind kind); + TransID addTransProlog(const SrcKey& sk); + TransID addTransAnchor(const SrcKey& sk); + + bool optimized(const SrcKey& sk) const; + void setOptimized(const SrcKey& sk); + +private: + uint32_t m_numTrans; + vector m_transRecs; + ProfCounters m_counters; + SrcKeySet m_optimized; // set of SrcKeys already optimized +}; + +} } + +#endif diff --git a/hphp/runtime/vm/jit/region-hot-block.cpp b/hphp/runtime/vm/jit/region-hot-block.cpp new file mode 100644 index 000000000..effbaf131 --- /dev/null +++ b/hphp/runtime/vm/jit/region-hot-block.cpp @@ -0,0 +1,37 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +#include "hphp/runtime/vm/jit/trans-cfg.h" +#include "hphp/runtime/vm/jit/translator-inline.h" + +namespace HPHP { +namespace JIT { + +RegionDescPtr selectHotBlock(TransID transId, + const ProfData* profData, + const TransCFG& cfg) { + RegionDescPtr region = smart::make_unique(); + + RegionDesc::BlockPtr block = profData->transBlock(transId); + + if (block != nullptr) { + region->blocks.emplace_back(block); + } + + return region; +} + +} } diff --git a/hphp/runtime/vm/jit/region-hot-trace.cpp b/hphp/runtime/vm/jit/region-hot-trace.cpp new file mode 100644 index 000000000..96cdbe68d --- /dev/null +++ b/hphp/runtime/vm/jit/region-hot-trace.cpp @@ -0,0 +1,178 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +#include "hphp/runtime/vm/jit/trans-cfg.h" +#include "hphp/runtime/vm/jit/translator-inline.h" + +namespace HPHP { +namespace JIT { + +static const Trace::Module TRACEMOD = Trace::pgo; + +/** + * This function returns true for control-flow bytecode instructions that + * are not support in the middle of a region yet. + */ +static bool breaksRegion(Op opc) { + switch (opc) { + case OpMIterNext: + case OpMIterNextK: + case OpSwitch: + case OpSSwitch: + case OpContSuspend: + case OpContRetC: + case OpRetC: + case OpRetV: + case OpExit: + case OpFatal: + case OpMIterInit: + case OpMIterInitK: + case OpIterBreak: + case OpDecodeCufIter: + case OpThrow: + case OpUnwind: + case OpEval: + case OpNativeImpl: + case OpContHandle: + return true; + + default: + return false; + } +} + +/** + * Returns the set of bytecode offsets for the instructions that may + * be executed immediately after opc. + */ +static OffsetSet findSuccOffsets(Op* opc, const Unit* unit) { + OffsetSet succBcOffs; + Op* bcStart = (Op*)(unit->entry()); + + if (!instrIsControlFlow(*opc)) { + Offset succOff = opc + instrLen(opc) - bcStart; + succBcOffs.insert(succOff); + return succBcOffs; + } + + if (instrAllowsFallThru(*opc)) { + Offset succOff = opc + instrLen(opc) - bcStart; + succBcOffs.insert(succOff); + } + + if (isSwitch(*opc)) { + foreachSwitchTarget(opc, [&](Offset& offset) { + succBcOffs.insert(offset); + }); + } else { + Offset target = instrJumpTarget(bcStart, opc - bcStart); + if (target != InvalidAbsoluteOffset) { + succBcOffs.insert(target); + } + } + return succBcOffs; +} + +RegionDescPtr selectHotTrace(TransID triggerId, + const ProfData* profData, + TransCFG& cfg, + TransIDSet& selectedSet) { + JIT::RegionDescPtr region = smart::make_unique(); + TransID tid = triggerId; + TransID prevId = InvalidID; + selectedSet.clear(); + + while (!setContains(selectedSet, tid)) { + + RegionDesc::BlockPtr block = profData->transBlock(tid); + if (block == nullptr) break; + + // If the debugger is attached, only allow single-block regions. + if (prevId != InvalidID && isDebuggerAttachedProcess()) { + FTRACE(5, "selectHotRegion: breaking region at Translation {} " + "because of debugger is attached\n", tid); + break; + } + + // Break if block is not the first and requires reffiness checks. + // Task #2589970: fix translateRegion to support mid-region reffiness checks + if (prevId != InvalidID) { + auto nRefDeps = block->reffinessPreds().size(); + if (nRefDeps > 0) { + FTRACE(5, "selectHotRegion: breaking region because of refDeps ({}) at " + "Translation {}\n", nRefDeps, tid); + break; + } + } + + // Break trace if translation tid cannot follow the execution of + // the entire translation prevTd. This can only happen if the + // execution of prevId takes a side exit that leads to the + // execution of tid. + if (prevId != InvalidID) { + Op* lastInstr = profData->transLastInstr(prevId); + const Unit* unit = profData->transFunc(prevId)->unit(); + OffsetSet succOffs = findSuccOffsets(lastInstr, unit); + if (!setContains(succOffs, profData->transSrcKey(tid).offset())) { + if (HPHP::Trace::moduleEnabled(HPHP::Trace::pgo, 5)) { + FTRACE(5, "selectHotTrace: WARNING: Breaking region @: {}\n", + JIT::show(*region)); + FTRACE(5, "selectHotTrace: next translation selected: tid = {}\n{}\n", + tid, JIT::show(*block)); + std::string succStr("succOffs = "); + for (auto succ : succOffs) { + succStr += lexical_cast(succ); + } + FTRACE(5, "\n{}\n", succStr); + } + break; + } + } + region->blocks.emplace_back(block); + selectedSet.insert(tid); + + Op lastOp = *(profData->transLastInstr(tid)); + if (breaksRegion(lastOp)) { + FTRACE(5, "selectHotTrace: breaking region because of last instruction " + "in Translation {}: {}\n", tid, opcodeToName(lastOp)); + break; + } + + auto outArcs = cfg.outArcs(tid); + if (outArcs.size() == 0) { + FTRACE(5, "selectHotTrace: breaking region because there's no successor " + "for Translation {}\n", tid); + break; + } + + auto maxWeight = std::numeric_limits::min(); + TransCFG::Arc* maxArc = nullptr; + for (auto arc : outArcs) { + if (arc->weight() >= maxWeight) { + maxWeight = arc->weight(); + maxArc = arc; + } + } + assert(maxArc != nullptr); + + prevId = tid; + tid = maxArc->dst(); + } + + return region; +} + +} } diff --git a/hphp/runtime/vm/jit/region-method.cpp b/hphp/runtime/vm/jit/region-method.cpp index 52aaf4c96..913bb2d0a 100644 --- a/hphp/runtime/vm/jit/region-method.cpp +++ b/hphp/runtime/vm/jit/region-method.cpp @@ -54,12 +54,12 @@ int numInstrs(PC start, PC end) { * back to the tracelet compiler. (This will happen for side-exits * from method regions, for example.) */ -RegionDescPtr regionMethod(const RegionContext& context) { +RegionDescPtr selectMethod(const RegionContext& context) { using namespace HPHP::Verifier; if (!isFuncEntry(context.func, context.bcOffset)) return nullptr; - FTRACE(1, "function entry for {}: using regionMethod\n", - context.func->fullName()->data()); + FTRACE(1, "function entry for {}: using selectMethod\n", + context.func->fullName()->data()); auto ret = smart::make_unique(); diff --git a/hphp/runtime/vm/jit/region-onebc.cpp b/hphp/runtime/vm/jit/region-onebc.cpp index 31056ed9c..e11dc5428 100644 --- a/hphp/runtime/vm/jit/region-onebc.cpp +++ b/hphp/runtime/vm/jit/region-onebc.cpp @@ -25,7 +25,7 @@ namespace HPHP { namespace JIT { * A dummy (debugging) region selector that just uses a single HHBC * opcode as the region, and guards on everything. */ -RegionDescPtr regionOneBC(const RegionContext& ctx) { +RegionDescPtr selectOneBC(const RegionContext& ctx) { auto ret = smart::make_unique(); auto blk = smart::make_unique(ctx.func, ctx.bcOffset, 1); diff --git a/hphp/runtime/vm/jit/region-selection.cpp b/hphp/runtime/vm/jit/region-selection.cpp index 1f7dfe41a..a3c90b3e3 100644 --- a/hphp/runtime/vm/jit/region-selection.cpp +++ b/hphp/runtime/vm/jit/region-selection.cpp @@ -25,36 +25,57 @@ #include "hphp/util/map_walker.h" #include "hphp/runtime/base/runtime_option.h" #include "hphp/runtime/vm/jit/translator.h" +#include "hphp/runtime/vm/jit/trans-cfg.h" +#include "hphp/runtime/vm/jit/translator-inline.h" namespace HPHP { namespace JIT { TRACE_SET_MOD(region); +using Transl::TransID; +using Transl::TranslatorX64; + ////////////////////////////////////////////////////////////////////// -extern RegionDescPtr regionMethod(const RegionContext&); -extern RegionDescPtr regionOneBC(const RegionContext&); -extern RegionDescPtr regionTracelet(const RegionContext&); +extern RegionDescPtr selectMethod(const RegionContext&); +extern RegionDescPtr selectOneBC(const RegionContext&); +extern RegionDescPtr selectTracelet(const RegionContext&); +extern RegionDescPtr selectHotBlock(TransID transId, + const ProfData* profData, + const TransCFG& cfg); +extern RegionDescPtr selectHotTrace(TransID triggerId, + const ProfData* profData, + TransCFG& cfg, + TransIDSet& selectedSet); ////////////////////////////////////////////////////////////////////// namespace { enum class RegionMode { - None, - OneBC, - Method, - Tracelet, - Legacy, + None, // empty region + + // Modes that create a region by inspecting live VM state + OneBC, // region with a single bytecode instruction + Method, // region with a whole method + Tracelet, // single-entry, multiple-exits region that ends on conditional + // branches or when an instruction consumes a value of unknown type + Legacy, // same as Tracelet, but using the legacy analyze() code + + // Modes that create a region by leveraging profiling data + HotBlock, // single-entry, single-exit region + HotTrace, // single-entry, multiple-exits region }; RegionMode regionMode() { auto& s = RuntimeOption::EvalJitRegionSelector; - if (s == "") return RegionMode::None; - if (s == "onebc") return RegionMode::OneBC; - if (s == "method") return RegionMode::Method; + if (s == "" ) return RegionMode::None; + if (s == "onebc" ) return RegionMode::OneBC; + if (s == "method" ) return RegionMode::Method; if (s == "tracelet") return RegionMode::Tracelet; - if (s == "legacy") return RegionMode::Legacy; + if (s == "legacy" ) return RegionMode::Legacy; + if (s == "hotblock") return RegionMode::HotBlock; + if (s == "hottrace") return RegionMode::HotTrace; FTRACE(1, "unknown region mode {}: using none\n", s); if (debug) abort(); return RegionMode::None; @@ -163,7 +184,7 @@ void RegionDesc::Block::checkInvariants() const { ////////////////////////////////////////////////////////////////////// namespace { -RegionDescPtr createRegion(const Transl::Tracelet& tlet) { +RegionDescPtr selectTraceletLegacy(const Transl::Tracelet& tlet) { typedef Transl::NormalizedInstruction NI; typedef RegionDesc::Block Block; @@ -176,7 +197,7 @@ RegionDescPtr createRegion(const Transl::Tracelet& tlet) { Block* curBlock; auto newBlock = [&] { region->blocks.push_back( - smart::make_unique(tlet.m_func, sk.offset(), 0)); + std::make_shared(tlet.m_func, sk.offset(), 0)); curBlock = region->blocks.back().get(); }; newBlock(); @@ -253,6 +274,15 @@ RegionDescPtr createRegion(const Transl::Tracelet& tlet) { } } +RegionDesc::BlockPtr createBlock(const Transl::Tracelet& tlet) { + RegionDescPtr region = selectTraceletLegacy(tlet); + + if (region == nullptr) return nullptr; + + always_assert(region->blocks.size() == 1); + return region->blocks.front(); +} + RegionDescPtr selectRegion(const RegionContext& context, const Transl::Tracelet* t) { auto const mode = regionMode(); @@ -281,11 +311,15 @@ RegionDescPtr selectRegion(const RegionContext& context, auto region = [&]{ try { switch (mode) { - case RegionMode::None: return RegionDescPtr{nullptr}; - case RegionMode::OneBC: return regionOneBC(context); - case RegionMode::Method: return regionMethod(context); - case RegionMode::Tracelet: return regionTracelet(context); - case RegionMode::Legacy: always_assert(t); return createRegion(*t); + case RegionMode::None: return RegionDescPtr{nullptr}; + case RegionMode::OneBC: return selectOneBC(context); + case RegionMode::Method: return selectMethod(context); + case RegionMode::Tracelet: return selectTracelet(context); + case RegionMode::Legacy: + always_assert(t); return selectTraceletLegacy(*t); + case RegionMode::HotBlock: + case RegionMode::HotTrace: always_assert(0 && + "unsupported region mode"); } not_reached(); } catch (const std::exception& e) { @@ -303,6 +337,48 @@ RegionDescPtr selectRegion(const RegionContext& context, return region; } +RegionDescPtr selectHotRegion(TransID transId, + TranslatorX64* tx64) { + + assert(RuntimeOption::EvalJitPGO); + + const ProfData* profData = tx64->profData(); + FuncId funcId = profData->transFuncId(transId); + TransCFG cfg(funcId, profData, tx64->getSrcDB(), tx64->getJmpToTransIDMap()); + TransIDSet selectedTIDs; + RegionDescPtr region = nullptr; + RegionMode mode = regionMode(); + + switch (mode) { + case RegionMode::None: + region = RegionDescPtr{nullptr}; + break; + case RegionMode::HotBlock: + region = selectHotBlock(transId, profData, cfg); + break; + case RegionMode::HotTrace: + region = selectHotTrace(transId, profData, cfg, selectedTIDs); + break; + case RegionMode::OneBC: + case RegionMode::Method: + case RegionMode::Tracelet: + case RegionMode::Legacy: + always_assert(0 && "unsupported region mode"); + } + + if (Trace::moduleEnabled(HPHP::Trace::pgo, 5)) { + std::string dotFileName = string("/tmp/trans-cfg-") + + lexical_cast(transId) + ".dot"; + + cfg.print(dotFileName, profData, &selectedTIDs); + FTRACE(5, "selectHotRegion: New Translation {} (file: {}) {}\n", + tx64->profData()->curTransID(), dotFileName, + region ? show(*region) : std::string("empty region")); + } + + return region; +} + ////////////////////////////////////////////////////////////////////// std::string show(RegionDesc::Location l) { diff --git a/hphp/runtime/vm/jit/region-selection.h b/hphp/runtime/vm/jit/region-selection.h index 243037280..b004dd9cf 100644 --- a/hphp/runtime/vm/jit/region-selection.h +++ b/hphp/runtime/vm/jit/region-selection.h @@ -26,11 +26,15 @@ #include "hphp/runtime/base/smart_containers.h" #include "hphp/runtime/vm/srckey.h" #include "hphp/runtime/vm/jit/type.h" +#include "hphp/runtime/vm/jit/types.h" namespace HPHP { + namespace Transl { struct Tracelet; +struct TranslatorX64; } + namespace JIT { using boost::container::flat_map; @@ -52,7 +56,7 @@ struct RegionDesc { struct Location; struct TypePred; struct ReffinessPred; - typedef smart::unique_ptr::type BlockPtr; + typedef std::shared_ptr BlockPtr; enum class ParamByRef : uint8_t { Yes, No, @@ -158,7 +162,6 @@ public: if (debug) checkInvariants(); } - Block(const Block&) = delete; Block& operator=(const Block&) = delete; /* @@ -270,16 +273,34 @@ struct RegionContext::PreLiveAR { ////////////////////////////////////////////////////////////////////// /* - * Define a compilation region that starts with sk. + * Select a compilation region corresponding to the given context. + * The shape of the region selected is controlled by + * RuntimeOption::EvalJitRegionSelector. If the specified shape is + * 'tracelet', then the input argument t is used to build the region. * - * May return nullptr. + * This function may return nullptr. * * For now this is hooked up in TranslatorX64::translateWork, and * returning nullptr causes it to use the current level 0 tracelet * analyzer. Eventually we'd like analyze to occur underneath this as * well. */ -RegionDescPtr selectRegion(const RegionContext&, const Transl::Tracelet*); +RegionDescPtr selectRegion(const RegionContext& context, + const Transl::Tracelet* t); + +/* + * Select a compilation region based on profiling information. This + * is used in JitPGO mode. Argument transId specifies the profiling + * translation that triggered the profiling-based region selection. + */ +RegionDescPtr selectHotRegion(Transl::TransID transId, + Transl::TranslatorX64* tx64); + +/* + * Creates a Block corresponding to tracelet tlet. This function + * assumes that tlet contains a single block. + */ +RegionDesc::BlockPtr createBlock(const Transl::Tracelet& tlet); /* * Debug stringification for various things. diff --git a/hphp/runtime/vm/jit/region-tracelet.cpp b/hphp/runtime/vm/jit/region-tracelet.cpp index ce832adf9..8e93dfafb 100644 --- a/hphp/runtime/vm/jit/region-tracelet.cpp +++ b/hphp/runtime/vm/jit/region-tracelet.cpp @@ -195,7 +195,7 @@ RegionDescPtr regionTraceletImpl(const RegionContext& ctx, * attempts to consume an input with an insufficiently precise type. * */ -RegionDescPtr regionTracelet(const RegionContext& ctx) { +RegionDescPtr selectTracelet(const RegionContext& ctx) { InterpSet interp; RegionDescPtr region; uint32_t tries = 1; diff --git a/hphp/runtime/vm/jit/srcdb.cpp b/hphp/runtime/vm/jit/srcdb.cpp index 0f57974f4..a384f16a4 100644 --- a/hphp/runtime/vm/jit/srcdb.cpp +++ b/hphp/runtime/vm/jit/srcdb.cpp @@ -48,6 +48,11 @@ TCA SrcRec::getFallbackTranslation() const { } void SrcRec::chainFrom(IncomingBranch br) { + assert(br.type() == IncomingBranch::Tag::ADDR || + tx64->a. contains(br.toSmash()) || + tx64->ahot. contains(br.toSmash()) || + tx64->astubs. contains(br.toSmash()) || + tx64->atrampolines.contains(br.toSmash())); TCA destAddr = getTopTranslation(); m_incomingBranches.push_back(br); TRACE(1, "SrcRec(%p)::chainFrom %p -> %p (type %d); %zd incoming branches\n", @@ -175,14 +180,15 @@ void SrcRec::replaceOldTranslations() { * If we ever change that we'll have to change this to patch to * some sort of rebind requests. */ - assert(!RuntimeOption::RepoAuthoritative); + assert(!RuntimeOption::RepoAuthoritative || RuntimeOption::EvalJitPGO); patchIncomingBranches(m_anchorTranslation); } void SrcRec::patch(IncomingBranch branch, TCA dest) { switch (branch.type()) { case IncomingBranch::Tag::JMP: { - auto& a = tx64->getAsmFor(branch.toSmash()); + auto toSmash = branch.toSmash(); + auto& a = tx64->getAsmFor(toSmash); CodeCursor cg(a, branch.toSmash()); TranslatorX64::smashJmp(a, branch.toSmash(), dest); break; diff --git a/hphp/runtime/vm/jit/srcdb.h b/hphp/runtime/vm/jit/srcdb.h index ab8375243..774431ed8 100644 --- a/hphp/runtime/vm/jit/srcdb.h +++ b/hphp/runtime/vm/jit/srcdb.h @@ -125,6 +125,10 @@ struct SrcRec { return m_inProgressTailJumps; } + const vector& incomingBranches() const { + return m_incomingBranches; + } + void clearInProgressTailJumps() { m_inProgressTailJumps.clear(); } diff --git a/hphp/runtime/vm/jit/trace-builder.cpp b/hphp/runtime/vm/jit/trace-builder.cpp index 78db6a1e6..1560c9497 100644 --- a/hphp/runtime/vm/jit/trace-builder.cpp +++ b/hphp/runtime/vm/jit/trace-builder.cpp @@ -21,6 +21,7 @@ #include "hphp/util/trace.h" #include "hphp/runtime/vm/jit/target-cache.h" #include "hphp/runtime/vm/jit/ir-factory.h" +#include "hphp/util/assertions.h" namespace HPHP { namespace JIT { @@ -287,6 +288,7 @@ void TraceBuilder::updateTrackedState(IRInstruction* inst) { // fallthrough case AssertLoc: case GuardLoc: + case CheckLoc: setLocalType(inst->extra()->locId, inst->typeParam()); break; @@ -572,21 +574,39 @@ SSATmp* TraceBuilder::cseLookup(IRInstruction* inst, SSATmp* TraceBuilder::preOptimizeCheckLoc(IRInstruction* inst) { auto const locId = inst->extra()->locId; + Type typeParam = inst->typeParam(); if (auto const prevValue = getLocalValue(locId)) { - always_assert(false && "WTF"); - return gen( - CheckType, inst->typeParam(), inst->taken(), prevValue - ); + return gen(CheckType, typeParam, inst->taken(), prevValue); } auto const prevType = getLocalType(locId); - if (prevType != Type::None) { - always_assert(false && "WTF2"); - // It doesn't make sense to be checking something that's deemed to - // fail. - assert(prevType == inst->typeParam()); + + if (prevType == Type::None) { + return nullptr; + } + + if (prevType.subtypeOf(typeParam)) { inst->convertToNop(); + } else { + // + // Normally, it doesn't make sense to be checking something that's + // deemed to fail. Incompatible boxed types are ok though, since + // we don't track them precisely, but instead check them at every + // use. + // + // However, in JitPGO mode right now, this pathological case can + // happen, because profile counters are not accurate and we + // currently don't analyze Block post-conditions when picking its + // successors during region selection. This can lead to + // incompatible types in blocks selected for the same region. + // + if (!typeParam.isBoxed() || !prevType.isBoxed()) { + if ((typeParam & prevType) == Type::Bottom) { + assert(RuntimeOption::EvalJitPGO); + return gen(Jmp_, inst->taken()); + } + } } return nullptr; @@ -599,12 +619,22 @@ SSATmp* TraceBuilder::preOptimizeAssertLoc(IRInstruction* inst) { if (!prevType.equals(Type::None) && !typeParam.strictSubtypeOf(prevType)) { if (!prevType.subtypeOf(typeParam)) { + /* Task #2553746 + * This is triggering for a case where the tracked state says the local is + * InitNull but the AssertLoc says it's Str. */ static auto const error = StringData::GetStaticString("Internal error: static analysis was " "wrong about a local variable's type."); auto* errorInst = m_irFactory.gen(RaiseError, inst->marker(), cns(error)); inst->become(&m_irFactory, errorInst); - assert(false && "Incorrect local type from static analysis"); + assert_log(false, [&]{ + IRTrace& mainTrace = trace()->isMain() ? *trace() + : *(trace()->main()); + return folly::format("\npreOptimizeAssertLoc: prevType: {} " + "typeParam: {}\nin instr: {}\nin trace: {}\n", + prevType.toString(), typeParam.toString(), + inst->toString(), mainTrace.toString()).str(); + }); } else { inst->convertToNop(); } @@ -810,6 +840,7 @@ SSATmp* TraceBuilder::optimizeWork(IRInstruction* inst, // Found a dominating instruction that can be used instead of inst FTRACE(1, " {}cse found: {}\n", indent(), result->inst()->toString()); + assert(!inst->consumesReferences()); if (inst->producesReference()) { // Replace with an IncRef FTRACE(1, " {}cse of refcount-producing instruction\n", indent()); diff --git a/hphp/runtime/vm/jit/trans-cfg.cpp b/hphp/runtime/vm/jit/trans-cfg.cpp new file mode 100644 index 000000000..6ec0bd62a --- /dev/null +++ b/hphp/runtime/vm/jit/trans-cfg.cpp @@ -0,0 +1,215 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +#include "hphp/runtime/vm/jit/trans-cfg.h" + +namespace HPHP { +namespace JIT { + +static const Trace::Module TRACEMOD = Trace::pgo; + +static TransIDSet findPredTrans(const SrcRec* sr, + const TcaTransIDMap& jmpToTransID) { + assert(sr); + TransIDSet predSet; + + for (auto inBr : sr->incomingBranches()) { + TransID srcId = mapGet(jmpToTransID, inBr.toSmash(), InvalidID); + FTRACE(5, "findPredTrans: toSmash = {} srcId = {}\n", + inBr.toSmash(), srcId); + if (srcId != InvalidID) { + predSet.insert(srcId); + } + } + + return predSet; +} + +/** + * This function tries to infer the weight of any arc in the arcVec given the + * weights of other arcs in the list and totalWeight, which is the + * known sum of all their weights. + * Returns whether or not the weight of any arc was inferred and, in case of + * success, the weight of such arc is updated. + */ +static bool inferredArcWeight(const TransCFG::ArcPtrVec& arcVec, + int64_t totalWeight) { + int64_t arcWeight = totalWeight; + TransCFG::Arc* unknownArc = nullptr; + for (auto arc : arcVec) { + if (arc->weight() == TransCFG::Arc::kUnknownWeight) { + if (unknownArc != nullptr) { + // More than one arc with unknown weight, so can't infer + return false; + } + unknownArc = arc; + } else { + arcWeight -= arc->weight(); + } + } + if (unknownArc == nullptr) return false; + // Avoid creating negative-weight arcs. Node weights are not required to be + // accurate and, since arc weights are derived from nodes' weights, they + // aren't accurate either. This can result in arcWeight to be negative here. + if (arcWeight < 0) arcWeight = 0; + unknownArc->setWeight(arcWeight); + return true; +} + +TransCFG::TransCFG(FuncId funcId, + const ProfData* profData, + const SrcDB& srcDB, + const TcaTransIDMap& jmpToTransID) { + assert(profData); + + // add nodes + for (TransID tid = 0; tid < profData->numTrans(); tid++) { + if (profData->transKind(tid) == TransProfile && + profData->transBlock(tid) != nullptr && + profData->transFuncId(tid) == funcId) { + int64_t counter = profData->transCounter(tid); + int64_t weight = RuntimeOption::EvalJitPGOThreshold - counter; + addNode(tid, weight); + } + } + + // add arcs + for (TransID dstId : nodes()) { + SrcKey dstSK = profData->transSrcKey(dstId); + const SrcRec* dstSR = srcDB.find(dstSK); + FTRACE(5, "TransCFG: adding incoming arcs in dstId = {}\n", dstId); + TransIDSet predIDs = findPredTrans(dstSR, jmpToTransID); + for (auto predId : predIDs) { + if (hasNode(predId)) { + FTRACE(5, "TransCFG: adding arc {} -> {}\n", predId, dstId); + addArc(predId, dstId, TransCFG::Arc::kUnknownWeight); + } + } + } + + // infer arc weights + bool changed; + do { + changed = false; + for (TransID tid : nodes()) { + int64_t nodeWeight = weight(tid); + if (inferredArcWeight(inArcs(tid), nodeWeight)) changed = true; + if (inferredArcWeight(outArcs(tid), nodeWeight)) changed = true; + } + } while (changed); + + // guess weight or non-inferred arcs + for (TransID tid : nodes()) { + for (auto arc : outArcs(tid)) { + if (arc->weight() == Arc::kUnknownWeight) { + arc->setGuessed(); + int64_t arcWgt = std::min(weight(arc->src()), weight(arc->dst())) / 2; + arc->setWeight(arcWgt); + } + } + } +} + +int64_t TransCFG::weight(TransID id) const { + assert(hasNode(id)); + size_t idx = mapGet(m_idToIdx, id); + return m_nodeInfo[idx].weight(); +} + +const TransCFG::ArcPtrVec& TransCFG::inArcs(TransID id) const { + assert(hasNode(id)); + size_t idx = mapGet(m_idToIdx, id); + return m_nodeInfo[idx].inArcs(); +} + +const TransCFG::ArcPtrVec& TransCFG::outArcs(TransID id) const { + assert(hasNode(id)); + size_t idx = mapGet(m_idToIdx, id); + return m_nodeInfo[idx].outArcs(); +} + +TransCFG::Node::~Node() { + for (auto arc : m_outArcs) { + delete arc; + } +} + +void TransCFG::addNode(TransID id, int64_t weight) { + size_t idx = m_transIds.size(); + m_transIds.push_back(id); + m_idToIdx[id] = idx; + m_nodeInfo.push_back(Node(id, weight)); +} + +bool TransCFG::hasNode(TransID id) const { + return m_idToIdx.find(id) != m_idToIdx.end(); +} + +void TransCFG::addArc(TransID srcId, TransID dstId, int64_t weight) { + assert(hasNode(srcId)); + assert(hasNode(dstId)); + size_t srcIdx = m_idToIdx[srcId]; + size_t dstIdx = m_idToIdx[dstId]; + Arc* arc = new Arc(srcId, dstId, weight); + m_nodeInfo[srcIdx].addOutArc(arc); + m_nodeInfo[dstIdx].addInArc(arc); +} + +void TransCFG::print(std::string fileName, const ProfData* profData, + const TransIDSet* selected) const { + FILE* file = fopen(fileName.c_str(), "wt"); + if (!file) return; + + fprintf(file, "digraph CFG {\n"); + + // find max node weight + int64_t maxWeight = 1; // 1 to avoid div by 0 + for (auto tid : nodes()) { + auto w = weight(tid); + if (w > maxWeight) maxWeight = w; + } + + // print nodes + for (auto tid : nodes()) { + int64_t w = weight(tid); + uint32_t coldness = 255 - (255 * w / maxWeight); + Offset bcStart = profData->transStartBcOff(tid); + Offset bcStop = profData->transStopBcOff(tid); + const char* shape = selected && setContains(*selected, tid) ? "oval" + : "box"; + fprintf(file, + "t%u [shape=%s,label=\"T: %u\\np: %" PRIu64 "\\nbc: [0x%x-0x%x)\"," + "style=filled,fillcolor=\"#ff%02x%02x\"];\n", tid, shape, tid, w, + bcStart, bcStop, coldness, coldness); + } + + // print arcs + for (auto srcId : nodes()) { + for (auto arc : outArcs(srcId)) { + int64_t w = arc->weight(); + fprintf(file, "t%u -> t%u [color=\"%s\",label=\"%ld\"] ;\n", + srcId, + arc->dst(), + arc->guessed() ? "red" : "green4", + w); + } + } + + fprintf(file, "}\n"); + fclose(file); +} + +} } diff --git a/hphp/runtime/vm/jit/trans-cfg.h b/hphp/runtime/vm/jit/trans-cfg.h new file mode 100644 index 000000000..5587f2cad --- /dev/null +++ b/hphp/runtime/vm/jit/trans-cfg.h @@ -0,0 +1,109 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +#ifndef incl_HPHP_TRANS_CFG_H_ +#define incl_HPHP_TRANS_CFG_H_ + +#include + +#include "hphp/util/base.h" +#include "hphp/runtime/vm/jit/srcdb.h" +#include "hphp/runtime/vm/jit/translator.h" +#include "hphp/runtime/vm/jit/translator-x64.h" +#include "hphp/runtime/vm/jit/translator-inline.h" + +namespace HPHP { +namespace JIT { + +/** + * A dynamic control-flow graph of single-block translations. + */ +class TransCFG { + public: + class Arc { + public: + static const int64_t kUnknownWeight = -1; + + Arc(TransID src, TransID dst, int64_t w) + : m_src(src) + , m_dst(dst) + , m_weight(w) + , m_guessed(false) + {} + TransID src() const { return m_src; } + TransID dst() const { return m_dst; } + int64_t weight() const { return m_weight; } + bool guessed() const { return m_guessed; } + void setWeight(int64_t w) { m_weight = w; } + void setGuessed() { m_guessed = true; } + private: + TransID m_src; + TransID m_dst; + int64_t m_weight; + bool m_guessed; // whether or not m_weight was guessed + }; + + typedef std::vector ArcPtrVec; + + class Node { + public: + Node(TransID id, int64_t w) + : m_id(id) + , m_weight(w) + {} + ~Node(); + + TransID transId() const { return m_id; } + int64_t weight() const { return m_weight; } + const ArcPtrVec& inArcs() const { return m_inArcs; } + const ArcPtrVec& outArcs() const { return m_outArcs; } + void addInArc (Arc* arc) { m_inArcs.push_back(arc); } + void addOutArc(Arc* arc) { m_outArcs.push_back(arc); } + private: + TransID m_id; + int64_t m_weight; + ArcPtrVec m_inArcs; + ArcPtrVec m_outArcs; + }; + + TransCFG() {} + TransCFG(FuncId funcId, + const ProfData* profData, + const SrcDB& srcDB, + const TcaTransIDMap& jmpToTransID); + + const vector& nodes() const { return m_transIds; } + int64_t weight(TransID id) const; + void setNodeWeight(TransID id, int64_t weight); + const ArcPtrVec& inArcs(TransID id) const; + const ArcPtrVec& outArcs(TransID id) const; + void addNode(TransID id, int64_t weight); + bool hasNode(TransID id) const; + void addArc(TransID srcId, TransID dstId, int64_t weight=0); + void print(std::string fileName, + const ProfData* profData, + const TransIDSet* selected = nullptr) const; + + private: + vector m_transIds; // vector of TransIDs in the graph + vector m_nodeInfo; // info about each node + hphp_hash_map m_idToIdx; // map from TransIDs to indices + // in m_nodeInfo +}; + +} } + +#endif diff --git a/hphp/runtime/vm/jit/translator-x64-helpers.cpp b/hphp/runtime/vm/jit/translator-x64-helpers.cpp index f907c3ec3..132957b51 100644 --- a/hphp/runtime/vm/jit/translator-x64-helpers.cpp +++ b/hphp/runtime/vm/jit/translator-x64-helpers.cpp @@ -211,9 +211,7 @@ TCA funcBodyHelper(ActRec* fp) { TCA tca = tx64->getCallArrayProlog(func); - if (tca) { - func->setFuncBody(tca); - } else { + if (!tca) { tca = Translator::Get()->getResumeHelper(); } tl_regState = VMRegState::DIRTY; diff --git a/hphp/runtime/vm/jit/translator-x64.cpp b/hphp/runtime/vm/jit/translator-x64.cpp index e394f152d..08cae47d7 100644 --- a/hphp/runtime/vm/jit/translator-x64.cpp +++ b/hphp/runtime/vm/jit/translator-x64.cpp @@ -582,6 +582,19 @@ asm_label(a, release); size_t(a.frontier() - m_dtorGenericStub)); } +bool TranslatorX64::profileSrcKey(const SrcKey& sk) const { + if (!RuntimeOption::EvalJitPGO) return false; + + if (profData()->optimized(sk)) return false; + + // The TCA of closure bodies is stored in the func's prologue + // tables. So, to support retranslating them, we need to reset the + // prologue tables and the prologue cache appropriately. + // (test/quick/floatcmp.php exposes this problem) + if (curFunc()->isClosureBody()) return false; + + return true; +} TCA TranslatorX64::retranslate(const TranslArgs& args) { if (isDebuggerAttachedProcess() && isSrcKeyInBL(curUnit(), args.m_sk)) { @@ -593,6 +606,9 @@ TCA TranslatorX64::retranslate(const TranslArgs& args) { LeaseHolder writer(s_writeLease); if (!writer) return nullptr; SKTRACE(1, args.m_sk, "retranslate\n"); + if (m_mode == TransInvalid) { + m_mode = profileSrcKey(args.m_sk) ? TransProfile : TransLive; + } return translate(args); } @@ -617,6 +633,7 @@ TCA TranslatorX64::retranslateAndPatchNoIR(SrcKey sk, // interpretation of this BB. return nullptr; } + m_mode = TransLive; TCA start = translate(TranslArgs(sk, align).interp(true)); if (start != nullptr) { smashJmp(getAsmFor(toSmash), toSmash, start); @@ -624,6 +641,54 @@ TCA TranslatorX64::retranslateAndPatchNoIR(SrcKey sk, return start; } +TCA TranslatorX64::retranslateOpt(TransID transId, bool align) { + LeaseHolder writer(s_writeLease); + if (!writer) return nullptr; + + TRACE(1, "retranslateOpt: transId = %u\n", transId); + + Func* func = nullptr; + if (m_profData->transBlock(transId) == nullptr) { + // This can happen for profiling translations that have some + // feature not supported by translateRegion yet. For such translations, + // we don't have a Func* (since it's grabbed from the Block). + // Anyway, in this case, the region translator resorts generates a + // TransLive translation, corresponding to the current live VM context. + func = const_cast(curFunc()); + } else { + func = m_profData->transFunc(transId); + } + + // We may get here multiple times because different translations of + // the same SrcKey hit the optimization threshold. Only the first + // time around we want to invalidate the existing translations. + const SrcKey& sk = m_profData->transSrcKey(transId); + bool alreadyOptimized = m_profData->optimized(sk); + m_profData->setOptimized(sk); + + bool setFuncBody = (!alreadyOptimized && + func->base() == sk.offset() && + func->getDVFunclets().size() == 0); + + if (!alreadyOptimized) { + if (setFuncBody) func->setFuncBody((TCA)funcBodyHelperThunk); + invalidateSrcKey(sk); + } else { + // Bail if we already reached the maximum number of translations per SrcKey. + // Note that this can only happen with multi-threading. + SrcRec* srcRec = getSrcRec(sk); + assert(srcRec); + size_t nTrans = srcRec->translations().size(); + if (nTrans >= RuntimeOption::EvalJitMaxTranslations + 1) return nullptr; + } + + m_mode = TransOptimize; + auto translArgs = TranslArgs(sk, align).transId(transId); + if (setFuncBody) translArgs.setFuncBody(); + + return retranslate(translArgs); +} + /* * Satisfy an alignment constraint. If we're in a reachable section * of code, bridge the gap with nops. Otherwise, int3's. @@ -772,6 +837,7 @@ TranslatorX64::createTranslation(const TranslArgs& args) { auto sk = args.m_sk; LeaseHolder writer(s_writeLease); if (!writer) return nullptr; + if (SrcRec* sr = m_srcDB.find(sk)) { TCA tca = sr->getTopTranslation(); if (tca) { @@ -803,9 +869,12 @@ TranslatorX64::createTranslation(const TranslArgs& args) { size_t asize = a.frontier() - astart; size_t stubsize = astubs.frontier() - stubstart; assert(asize == 0); - if (stubsize) { + if (stubsize && RuntimeOption::EvalDumpTCAnchors) { addTranslation(TransRec(sk, curUnit()->md5(), TransAnchor, astart, asize, stubstart, stubsize)); + if (m_profData) { + m_profData->addTransAnchor(sk); + } assert(!isTransDBEnabled() || getTransRec(stubstart)->kind == TransAnchor); } @@ -825,6 +894,8 @@ TranslatorX64::translate(const TranslArgs& args) { INC_TPC(translate); assert(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0); assert(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0); + assert(m_mode != TransInvalid); + SCOPE_EXIT{ m_mode = TransInvalid; }; if (!args.m_interp) { if (m_numHHIRTrans == RuntimeOption::EvalJitGlobalTranslationLimit) { @@ -834,7 +905,8 @@ TranslatorX64::translate(const TranslArgs& args) { } } - AHotSelector ahs(this, curFunc()->attrs() & AttrHot); + Func* func = const_cast(curFunc()); + AHotSelector ahs(this, func->attrs() & AttrHot); if (args.m_align) { moveToAlign(a, kNonFallthroughAlign); @@ -844,6 +916,9 @@ TranslatorX64::translate(const TranslArgs& args) { translateWork(args); + if (args.m_setFuncBody) { + func->setFuncBody(start); + } SKTRACE(1, args.m_sk, "translate moved head from %p to %p\n", getTopTranslation(args.m_sk), start); return start; @@ -1095,41 +1170,43 @@ TranslatorX64::trimExtraArgs(ActRec* ar) { tl_regState = VMRegState::DIRTY; } +TCA +TranslatorX64::emitCallArrayProlog(const Func* func, + const DVFuncletsVec& dvs) { + TCA start = a.frontier(); + if (dvs.size() == 1) { + a. cmp_imm32_disp_reg32(dvs[0].first, + AROFF(m_numArgsAndCtorFlag), rVmFp); + emitBindJcc(a, CC_LE, SrcKey(func, dvs[0].second)); + emitBindJmp(a, SrcKey(func, func->base())); + } else { + a. load_reg64_disp_reg32(rVmFp, AROFF(m_numArgsAndCtorFlag), rax); + for (unsigned i = 0; i < dvs.size(); i++) { + a. cmp_imm32_reg32(dvs[i].first, rax); + emitBindJcc(a, CC_LE, SrcKey(func, dvs[i].second)); + } + emitBindJmp(a, SrcKey(func, func->base())); + } + return start; +} + TCA TranslatorX64::getCallArrayProlog(Func* func) { TCA tca = func->getFuncBody(); if (tca != (TCA)funcBodyHelperThunk) return tca; - int numParams = func->numParams(); - std::vector > dvs; - for (int i = 0; i < numParams; ++i) { - const Func::ParamInfo& pi = func->params()[i]; - if (pi.hasDefaultValue()) { - dvs.push_back(std::make_pair(i, pi.funcletOff())); - } - } + DVFuncletsVec dvs = func->getDVFunclets(); + if (dvs.size()) { LeaseHolder writer(s_writeLease); if (!writer) return nullptr; tca = func->getFuncBody(); if (tca != (TCA)funcBodyHelperThunk) return tca; - tca = a.frontier(); - if (dvs.size() == 1) { - a. cmp_imm32_disp_reg32(dvs[0].first, - AROFF(m_numArgsAndCtorFlag), rVmFp); - emitBindJcc(a, CC_LE, SrcKey(func, dvs[0].second)); - emitBindJmp(a, SrcKey(func, func->base())); - } else { - a. load_reg64_disp_reg32(rVmFp, AROFF(m_numArgsAndCtorFlag), rax); - for (unsigned i = 0; i < dvs.size(); i++) { - a. cmp_imm32_reg32(dvs[i].first, rax); - emitBindJcc(a, CC_LE, SrcKey(func, dvs[i].second)); - } - emitBindJmp(a, SrcKey(func, func->base())); - } + tca = emitCallArrayProlog(func, dvs); + func->setFuncBody(tca); } else { SrcKey sk(func, func->base()); - tca = tx64->getTranslation(TranslArgs(sk, false)); + tca = tx64->getTranslation(TranslArgs(sk, false).setFuncBody()); } return tca; @@ -1511,6 +1588,10 @@ TranslatorX64::funcPrologue(Func* func, int nPassed, ActRec* ar) { TransProlog, aStart, a.frontier() - aStart, stubStart, astubs.frontier() - stubStart)); + if (m_profData) { + m_profData->addTransProlog(skFuncBody); + } + recordGdbTranslation(skFuncBody, func, a, aStart, false, true); @@ -1852,27 +1933,6 @@ int32_t TranslatorX64::emitNativeImpl(const Func* func, return sizeof(ActRec) + cellsToBytes(nLocalCells-1); } -// for documentation see bindJmpccFirst below -void -TranslatorX64::emitCondJmp(SrcKey skTaken, SrcKey skNotTaken, - ConditionCode cc) { - // should be true for SrcKeys generated via OpJmpZ/OpJmpNZ - assert(skTaken.getFuncId() == skNotTaken.getFuncId()); - - // reserve space for a smashable jnz/jmp pair; both initially point - // to our stub. - prepareForTestAndSmash(a, 0, TestAndSmashFlags::kAlignJccAndJmp); - TCA old = a.frontier(); - TCA stub = emitServiceReq(REQ_BIND_JMPCC_FIRST, - old, - skTaken.offset(), - skNotTaken.offset(), - cc, - ccArgInfo(cc)); - a.jcc(cc, stub); - a.jmp(stub); -} - /* * bindJmp -- * @@ -2020,6 +2080,8 @@ TranslatorX64::emitBindJ(X64Assembler& _a, ConditionCode cc, emitJmpOrJcc(_a, cc, toSmash); } + setJmpTransID(toSmash); + TCA sr = emitServiceReq(SRFlags::None, req, toSmash, dest.offset()); @@ -2098,6 +2160,12 @@ void TranslatorX64::emitReqRetransNoIR(Asm& as, const SrcKey& sk) { } } +void TranslatorX64::emitReqRetransOpt(Asm& as, const SrcKey& sk, + TransID transId) { + emitServiceReq(REQ_RETRANSLATE_OPT, + sk.getFuncId(), sk.offset(), transId); +} + void TranslatorX64::checkRefs(X64Assembler& a, SrcKey sk, @@ -2443,6 +2511,17 @@ bool TranslatorX64::handleServiceRequest(TReqInfo& info, SKTRACE(1, sk, "retranslated (without IR) @%p\n", start); } break; + case REQ_RETRANSLATE_OPT: { + FuncId funcId = (FuncId) args[0]; + Offset offset = (Offset) args[1]; + TransID transId = (TransID)args[2]; + sk = SrcKey(funcId, offset); + start = retranslateOpt(transId, false); + SKTRACE(2, sk, "retranslated-OPT: transId = %d start: @%p\n", transId, + start); + break; + } + case REQ_RETRANSLATE: { INC_TPC(retranslate); sk = SrcKey(curFunc(), (Offset)args[0]); @@ -3059,8 +3138,8 @@ int64_t switchObjHelper(ObjectData* o, int64_t base, int64_t nTargets) { } bool -TranslatorX64::checkTranslationLimit(SrcKey sk, - const SrcRec& srcRec) const { +TranslatorX64::reachedTranslationLimit(SrcKey sk, + const SrcRec& srcRec) const { if (srcRec.translations().size() == RuntimeOption::EvalJitMaxTranslations) { INC_TPC(max_trans); if (debug && Trace::moduleEnabled(Trace::tx64, 2)) { @@ -3211,12 +3290,24 @@ TranslatorX64::translateWork(const TranslArgs& args) { assert(srcRec.inProgressTailJumps().empty()); }; - if (!args.m_interp && !checkTranslationLimit(sk, srcRec)) { + if (!args.m_interp && !reachedTranslationLimit(sk, srcRec)) { // Attempt to create a region at this SrcKey - JIT::RegionContext rContext { curFunc(), args.m_sk.offset(), curSpOff() }; - FTRACE(2, "populating live context for region\n"); - populateLiveContext(rContext); - auto region = JIT::selectRegion(rContext, &t); + JIT::RegionDescPtr region; + if (RuntimeOption::EvalJitPGO) { + if (m_mode == TransOptimize) { + TransID transId = args.m_transId; + assert(transId != InvalidID); + region = JIT::selectHotRegion(transId, this); + if (region && region->blocks.size() == 0) region = nullptr; + } else { + // We always go through the tracelet translator in this case + } + } else { + JIT::RegionContext rContext { curFunc(), sk.offset(), curSpOff() }; + FTRACE(2, "populating live context for region\n"); + populateLiveContext(rContext); + region = JIT::selectRegion(rContext, &t); + } TranslateResult result = Retry; RegionBlacklist regionInterps; @@ -3244,6 +3335,9 @@ TranslatorX64::translateWork(const TranslArgs& args) { if (!region || result == Failure) { FTRACE(1, "trying irTranslateTracelet\n"); assertCleanState(); + if (m_mode == TransOptimize) { + m_mode = TransLive; + } result = translateTracelet(t); DEBUG_ONLY static const bool reqRegion = getenv("HHVM_REQUIRE_REGION"); assert(IMPLIES(region && reqRegion, result != Success)); @@ -3258,8 +3352,10 @@ TranslatorX64::translateWork(const TranslArgs& args) { } if (result == Success) { - // Translation succeeded. Mark it as such. - transKind = TransNormalIR; + assert(m_mode == TransLive || + m_mode == TransProfile || + m_mode == TransOptimize); + transKind = m_mode; } } @@ -3295,11 +3391,14 @@ TranslatorX64::translateWork(const TranslArgs& args) { false, false); recordGdbTranslation(sk, curFunc(), astubs, stubStart, false, false); + if (RuntimeOption::EvalJitPGO) { + m_profData->addTrans(t, transKind); + } // SrcRec::newTranslation() makes this code reachable. Do this last; // otherwise there's some chance of hitting in the reader threads whose // metadata is not yet visible. TRACE(1, "newTranslation: %p sk: (func %d, bcOff %d)\n", - start, sk.getFuncId(), sk.offset()); + start, sk.getFuncId(), sk.offset()); srcRec.newTranslation(start); TRACE(1, "tx64: %zd-byte tracelet\n", a.frontier() - start); if (Trace::moduleEnabledRelease(Trace::tcspace, 1)) { @@ -3326,6 +3425,10 @@ TranslatorX64::translateTracelet(Tracelet& t) { ht.emitIncTransCounter(); } + if (m_mode == TransProfile) { + ht.emitCheckCold(m_profData->curTransID()); + } + emitRB(a, RBTypeTraceletBody, t.m_sk); Stats::emitInc(a, Stats::Instr_TC, t.m_numOpcodes); @@ -3357,6 +3460,7 @@ TranslatorX64::translateTracelet(Tracelet& t) { ni = ni->next) { try { SKTRACE(1, ni->source, "HHIR: translateInstr\n"); + assert(!(m_mode == TransProfile && ni->outputPredicted && ni->next)); m_irTrans->translateInstr(*ni); } catch (JIT::FailedIRGen& fcg) { always_assert(!ni->interp); @@ -3600,6 +3704,7 @@ TranslatorX64::TranslatorX64() } } assert(base); + tcStart = base; base += -(uint64_t)base & (kRoundUp - 1); enhugen(base, RuntimeOption::EvalTCNumHugeHotMB); TRACE(1, "init atrampolines @%p\n", base); @@ -4139,7 +4244,7 @@ bool TranslatorX64::dumpTC(bool ignoreLease) { // Returns true on success bool tc_dump(void) { - return TranslatorX64::Get()->dumpTC(); + return TranslatorX64::Get() && TranslatorX64::Get()->dumpTC(); } // Returns true on success @@ -4176,7 +4281,7 @@ bool TranslatorX64::dumpTCData() { } void TranslatorX64::invalidateSrcKey(SrcKey sk) { - assert(!RuntimeOption::RepoAuthoritative); + assert(!RuntimeOption::RepoAuthoritative || RuntimeOption::EvalJitPGO); assert(s_writeLease.amOwner()); /* * Reroute existing translations for SrcKey to an as-yet indeterminate @@ -4192,6 +4297,14 @@ void TranslatorX64::invalidateSrcKey(SrcKey sk) { sr->replaceOldTranslations(); } +void TranslatorX64::setJmpTransID(TCA jmp) { + if (m_mode != TransProfile) return; + + TransID transId = m_profData->curTransID(); + FTRACE(5, "setJmpTransID: adding {} => {}\n", jmp, transId); + m_jmpToTransID[jmp] = transId; +} + } // HPHP::Transl } // HPHP diff --git a/hphp/runtime/vm/jit/translator-x64.h b/hphp/runtime/vm/jit/translator-x64.h index ee1ebc8f8..07cdb0b44 100644 --- a/hphp/runtime/vm/jit/translator-x64.h +++ b/hphp/runtime/vm/jit/translator-x64.h @@ -99,6 +99,8 @@ static const int kNumFreeLocalsHelpers = 9; typedef X64Assembler Asm; +typedef hphp_hash_map TcaTransIDMap; + constexpr size_t kJmpTargetAlign = 16; constexpr size_t kNonFallthroughAlign = 64; constexpr int kJmpLen = 5; @@ -150,26 +152,32 @@ class TranslatorX64 : public Translator class AHotSelector { public: AHotSelector(TranslatorX64* tx, bool hot) : - m_tx(tx), m_hot(hot && - tx->ahot.available() > 8192 && - tx->a.base() != tx->ahot.base()) { - if (m_hot) { - m_save = tx->a; - tx->a = tx->ahot; + m_tx(tx), m_swap(hot && + tx->ahot.available() > 8192 && + // Only swap if a and ahot aren't swapped yet. + // This assumes ahot area is in lower address. + tx->a.base() > tx->ahot.base()) { + if (m_swap) { + // Swap a and ahot, so that 'a' contains the hot code region. + // Note that, although we don't write to tx->ahot directly, we + // still need to make sure that all assembler code areas are + // available in a, astubs, and ahot, for example when we call + // asmChoose(addr, a, ahot, astubs). + std::swap(m_tx->a, m_tx->ahot); } } ~AHotSelector() { - if (m_hot) { - m_tx->ahot = m_tx->a; - m_tx->a = m_save; + if (m_swap) { + // Swap a and ahot back. + std::swap(m_tx->a, m_tx->ahot); } } private: TranslatorX64* m_tx; - Asm m_save; - bool m_hot; + bool m_swap; }; + TCA tcStart; Asm ahot; Asm a; Asm astubs; @@ -197,6 +205,9 @@ class TranslatorX64 : public Translator DataBlock m_globalData; + TcaTransIDMap m_jmpToTransID; // maps jump addresses to the ID + // of translation containing them + // Data structures for HHIR-based translation uint64_t m_numHHIRTrans; @@ -224,7 +235,12 @@ private: void drawCFG(std::ofstream& out) const; static vector x64TranslRegs(); - Asm& getAsmFor(TCA addr) { return asmChoose(addr, a, ahot, astubs); } + Asm& getAsmFor(TCA addr) { + assert(a.base() != ahot.base() && + a.base() != astubs.base() && + ahot.base() != astubs.base()); + return asmChoose(addr, a, ahot, astubs, atrampolines); + } void emitIncRef(X64Assembler &a, PhysReg base, DataType dtype); void emitIncRef(PhysReg base, DataType); void emitIncRefGenericRegSafe(PhysReg base, int disp, PhysReg tmp); @@ -239,7 +255,8 @@ public: TCA getCallArrayProlog(Func* func); void smashPrologueGuards(TCA* prologues, int numPrologues, const Func* func); private: - + TCA emitCallArrayProlog(const Func* func, + const DVFuncletsVec& dvs); void translateClassExistsImpl(const Tracelet& t, const NormalizedInstruction& i, Attr typeAttr); @@ -304,6 +321,14 @@ private: void fixup(VMExecutionContext* ec) const; TCA getTranslatedCaller() const; + const TcaTransIDMap& getJmpToTransIDMap() const { + return m_jmpToTransID; + } + + void setJmpTransID(TCA jmp); + + bool profileSrcKey(const SrcKey& sk) const; + TCA getTopTranslation(SrcKey sk) { return getSrcRec(sk)->getTopTranslation(); } @@ -325,7 +350,7 @@ private: } inline bool isValidCodeAddress(TCA tca) const { - return tca >= ahot.base() && tca < astubs.base() + astubs.capacity(); + return tca >= tcStart && tca < astubs.base() + astubs.capacity(); } // If we were to shove every little helper function into this class @@ -364,7 +389,7 @@ public: FreeStubList m_freeStubs; bool freeRequestStub(TCA stub); TCA getFreeStub(); - bool checkTranslationLimit(SrcKey, const SrcRec&) const; + bool reachedTranslationLimit(SrcKey, const SrcRec&) const; TranslateResult translateTracelet(Tracelet& t); void checkRefs(Asm&, SrcKey, const RefDeps&, SrcRec&); @@ -436,7 +461,6 @@ public: TCA emitRetFromInterpretedGeneratorFrame(); void emitPopRetIntoActRec(Asm& a); int32_t emitBindCall(SrcKey srcKey, const Func* funcd, int numArgs); - void emitCondJmp(SrcKey skTrue, SrcKey skFalse, ConditionCode cc); TCA funcPrologue(Func* func, int nArgs, ActRec* ar = nullptr); bool checkCachedPrologue(const Func* func, int param, TCA& plgOut) const; @@ -581,6 +605,7 @@ private: public: // Only for HackIR void emitReqRetransNoIR(Asm& as, const SrcKey& sk); + void emitReqRetransOpt(Asm& as, const SrcKey& sk, TransID transId); private: // asize + astubssize + gdatasize + trampolinesblocksize diff --git a/hphp/runtime/vm/jit/translator.cpp b/hphp/runtime/vm/jit/translator.cpp index cc94a7b41..20546dfca 100755 --- a/hphp/runtime/vm/jit/translator.cpp +++ b/hphp/runtime/vm/jit/translator.cpp @@ -554,6 +554,9 @@ predictOutputs(SrcKey startSk, const NormalizedInstruction* ni) { if (!RuntimeOption::EvalJitTypePrediction) return KindOfInvalid; + // In JitPGO mode, disable type prediction to avoid side exits + if (RuntimeOption::EvalJitPGO) return KindOfInvalid; + if (RuntimeOption::EvalJitStressTypePredPercent && RuntimeOption::EvalJitStressTypePredPercent > int(get_random() % 100)) { int dt; @@ -756,7 +759,8 @@ getDynLocType(const SrcKey startSk, return RuntimeType(tv->m_type); } tv = Unit::lookupCns(sd); - if (tv) { + // In JitPGO mode, we disable type predictions to avoid side exits + if (tv && !RuntimeOption::EvalJitPGO) { ni->outputPredicted = true; TRACE(1, "CNS %s: guessing runtime type %d\n", sd->data(), tv->m_type); return RuntimeType(tv->m_type); @@ -1508,6 +1512,9 @@ bool Translator::applyInputMetaData(Unit::MetaHandle& metaHand, ni->imm[0].u_IVA = info.m_data; break; case Unit::MetaInfo::Kind::DataTypePredicted: { + // In JitPGO, disable type predictions to avoid side exits + if (RuntimeOption::EvalJitPGO) break; + // If the original type was invalid or predicted, then use the // prediction in the meta-data. assert((unsigned) arg < inputInfos.size()); @@ -2363,7 +2370,10 @@ DynLocation* TraceletContext::recordRead(const InputInfo& ii, m_resolvedDeps[l] = dl; } } else { - RuntimeType rtt = tx64->liveType(l, *curUnit(), true); + // TODO: Once the region translator supports guard relaxation + // (task #2598894), we can enable specialization for all modes. + const bool specialize = tx64->mode() == TransLive; + RuntimeType rtt = tx64->liveType(l, *curUnit(), specialize); assert(rtt.isIter() || !rtt.isVagueValue()); // Allocate a new DynLocation to represent this and store it in the // current map. @@ -3183,6 +3193,12 @@ void Translator::analyzeCallee(TraceletContext& tas, fcall->calleeTrace = std::move(subTrace); } +static bool instrBreaksProfileBB(const NormalizedInstruction* instr) { + return (instrIsNonCallControlFlow(instr->op()) || + instr->outputPredicted || + instr->op() == OpClsCnsD); // side exits if misses in the target cache +} + /* * analyze -- * @@ -3311,6 +3327,15 @@ std::unique_ptr Translator::analyze(SrcKey sk, throwUnknownInput(); } } + if ((m_mode == TransProfile || m_mode == TransOptimize) && + t.m_numOpcodes > 0) { + // We want to break blocks at every instrution that consumes a ref, + // so that we avoid side exits. Therefore, instructions consume ref + // can only be the first in the tracelet/block. + if (rtt.isValue() && rtt.isRef()) { + throwUnknownInput(); + } + } } ni->inputs.push_back(dl); } @@ -3439,6 +3464,12 @@ std::unique_ptr Translator::analyze(SrcKey sk, tas.recordDelete(l); } + if (m_mode == TransProfile && instrBreaksProfileBB(ni)) { + SKTRACE(1, sk, "BB broken\n"); + sk.advance(unit); + goto breakBB; + } + // Check if we need to break the tracelet. // // If we've gotten this far, it mostly boils down to control-flow @@ -3483,7 +3514,10 @@ breakBB: } } - relaxDeps(t, tas); + // translateRegion doesn't support guard relaxation/specialization yet + if (m_mode != TransProfile && m_mode != TransOptimize) { + relaxDeps(t, tas); + } // Mark the last instruction appropriately assert(t.m_instrStream.last); @@ -3504,12 +3538,19 @@ breakBB: Translator::Translator() : m_resumeHelper(nullptr) , m_createdTime(Timer::GetCurrentTimeMicros()) + , m_mode(TransInvalid) + , m_profData(nullptr) , m_analysisDepth(0) { initInstrInfo(); + if (RuntimeOption::EvalJitPGO) { + m_profData = new ProfData(); + } } Translator::~Translator() { + delete m_profData; + m_profData = nullptr; } Translator* @@ -3771,7 +3812,8 @@ Translator::translateRegion(const RegionDesc& region, const SrcKey startSk = region.blocks.front()->start(); Unit::MetaHandle metaHand; - for (auto const& block : region.blocks) { + for (auto b = 0; b < region.blocks.size(); b++) { + auto const& block = region.blocks[b]; SrcKey sk = block->start(); const Func* topFunc = nullptr; auto typePreds = makeMapWalker(block->typePreds()); @@ -3783,12 +3825,19 @@ Translator::translateRegion(const RegionDesc& region, // Emit prediction guards. If this is the first instruction in the // region the guards will go to a retranslate request. Otherwise, they'll // go to a side exit. + bool isFirstRegionInstr = block == region.blocks.front() && i == 0; while (typePreds.hasNext(sk)) { auto const& pred = typePreds.next(); - if (block == region.blocks.front() && i == 0) { - ht.guardTypeLocation(pred.location, pred.type); + auto type = pred.type; + auto loc = pred.location; + if (type.subtypeOf(Type::Cls)) { + // Do not generate guards for class; instead assert the type + assert(loc.tag() == JIT::RegionDesc::Location::Tag::Stack); + ht.assertTypeLocation(loc, type); + } else if (isFirstRegionInstr) { + ht.guardTypeLocation(loc, type); } else { - ht.checkTypeLocation(pred.location, pred.type, sk.offset()); + ht.checkTypeLocation(loc, type, sk.offset()); } } @@ -3800,6 +3849,10 @@ Translator::translateRegion(const RegionDesc& region, ht.guardRefs(pred.arSpOffset, pred.mask, pred.vals); } + if (RuntimeOption::EvalJitTransCounters && isFirstRegionInstr) { + ht.emitIncTransCounter(); + } + // Update the current funcd, if we have a new one. if (knownFuncs.hasNext(sk)) { topFunc = knownFuncs.next(); @@ -3813,6 +3866,12 @@ Translator::translateRegion(const RegionDesc& region, i == block->length() - 1 && block == region.blocks.back(); inst.changesPC = opcodeChangesPC(inst.op()); inst.funcd = topFunc; + inst.nextOffset = kInvalidOffset; + if (instrIsNonCallControlFlow(inst.op()) && !inst.breaksTracelet) { + assert(b < region.blocks.size()); + inst.nextOffset = region.blocks[b+1]->start().offset(); + } + inst.outputPredicted = false; populateImmediates(inst); // We can get a more precise output type for interpOne if we know all of @@ -3868,11 +3927,6 @@ Translator::translateRegion(const RegionDesc& region, return Retry; } - if (isFCallStar(inst.op()) || inst.op() == OpFCallBuiltin) { - // This is much more conservative than it needs to be. - ht.emitSmashLocals(); - } - // Check the prediction. If the predicted type is less specific than what // is currently on the eval stack, checkTypeLocation won't emit any code. if (doPrediction) { @@ -3918,7 +3972,7 @@ uint64_t* Translator::getTransCounterAddr() { [id % transCountersPerChunk]); } -uint32_t Translator::addTranslation(const TransRec& transRec) { +void Translator::addTranslation(const TransRec& transRec) { if (Trace::moduleEnabledRelease(Trace::trans, 1)) { // Log the translation's size, creation time, SrcKey, and size Trace::traceRelease("New translation: %" PRId64 " %s %u %u %d\n", @@ -3932,7 +3986,7 @@ uint32_t Translator::addTranslation(const TransRec& transRec) { transRec.kind); } - if (!isTransDBEnabled()) return -1u; + if (!isTransDBEnabled()) return; uint32_t id = getCurrentTransID(); m_translations.push_back(transRec); m_translations[id].setID(id); @@ -3943,8 +3997,6 @@ uint32_t Translator::addTranslation(const TransRec& transRec) { if (transRec.astubsLen > 0) { m_transDB[transRec.astubsStart] = id; } - - return id; } uint64_t Translator::getTransCounter(TransID transId) const { @@ -3999,14 +4051,13 @@ void Translator::invalidateFile(Eval::PhpFile* f) { } static const char *transKindStr[] = { - "Normal_Tx64", - "Normal_HHIR", - "Anchor", - "Prologue", +#define DO(KIND) #KIND, + TRANS_KINDS +#undef DO }; const char *getTransKindName(TransKind kind) { - assert(kind >= 0 && kind <= TransProlog); + assert(kind >= 0 && kind < TransInvalid); return transKindStr[kind]; } diff --git a/hphp/runtime/vm/jit/translator.h b/hphp/runtime/vm/jit/translator.h index 301b08609..ff95296c5 100644 --- a/hphp/runtime/vm/jit/translator.h +++ b/hphp/runtime/vm/jit/translator.h @@ -41,6 +41,7 @@ #include "hphp/runtime/vm/jit/translator-instrs.h" #include "hphp/runtime/vm/jit/type.h" #include "hphp/runtime/vm/jit/write-lease.h" +#include "hphp/runtime/vm/jit/prof-data.h" #include "hphp/runtime/vm/debugger_hook.h" #include "hphp/runtime/vm/srckey.h" #include "hphp/runtime/base/md5.h" @@ -59,6 +60,7 @@ namespace Transl { using JIT::Type; using JIT::RegionDesc; using JIT::HhbcTranslator; +using JIT::ProfData; static const bool trustSigSegv = false; static const uint32_t transCountersPerChunk = 1024 * 1024 / 8; @@ -235,6 +237,8 @@ class NormalizedInstruction { // stack at tracelet entry. int stackOffset; int sequenceNum; + Offset nextOffset; // for intra-trace* non-call control-flow instructions, + // this is the offset of the next instruction in the trace* bool breaksTracelet:1; bool changesPC:1; bool fuseBranch:1; @@ -578,13 +582,6 @@ struct Tracelet : private boost::noncopyable { SrcKey nextSk() const; }; -enum TransKind { - TransInterp = 0, - TransNormalIR = 1, - TransAnchor = 2, - TransProlog = 3, -}; - const char* getTransKindName(TransKind kind); /* @@ -614,8 +611,6 @@ struct TransRec { uint8_t counterLen; vector bcMapping; - static const TransID InvalidID = -1LL; - TransRec() {} TransRec(SrcKey s, @@ -663,6 +658,8 @@ struct TranslArgs { , m_src(nullptr) , m_align(align) , m_interp(false) + , m_setFuncBody(false) + , m_transId(InvalidID) {} TranslArgs& sk(const SrcKey& sk) { @@ -681,11 +678,21 @@ struct TranslArgs { m_interp = interp; return *this; } + TranslArgs& setFuncBody() { + m_setFuncBody = true; + return *this; + } + TranslArgs& transId(TransID transId) { + m_transId = transId; + return *this; + } SrcKey m_sk; TCA m_src; bool m_align; bool m_interp; + bool m_setFuncBody; + TransID m_transId; }; /* @@ -863,7 +870,7 @@ public: uint64_t getTransCounter(TransID transId) const; void setTransCounter(TransID transId, uint64_t value); - uint32_t addTranslation(const TransRec& transRec); + void addTranslation(const TransRec& transRec); // helpers for srcDB. SrcRec* getSrcRec(SrcKey sk) { @@ -873,6 +880,10 @@ public: return m_srcDB.insert(sk); } + const SrcDB& getSrcDB() const { + return m_srcDB; + } + /* * Create a Tracelet for the given SrcKey, which must actually be * the current VM frame. @@ -909,6 +920,9 @@ protected: Mutex m_dbgBlacklistLock; bool isSrcKeyInBL(const Unit* unit, const SrcKey& sk); + TransKind m_mode; + ProfData* m_profData; + private: int m_analysisDepth; @@ -921,10 +935,19 @@ public: TCA getResumeHelper() { return m_resumeHelper; } + TCA getResumeHelperRet() { return m_resumeHelperRet; } + ProfData* profData() const { + return m_profData; + } + + TransKind mode() const { + return m_mode; + } + int analysisDepth() const { assert(m_analysisDepth >= 0); return m_analysisDepth; diff --git a/hphp/runtime/vm/jit/types.h b/hphp/runtime/vm/jit/types.h index 891b678c0..daa5d2508 100644 --- a/hphp/runtime/vm/jit/types.h +++ b/hphp/runtime/vm/jit/types.h @@ -39,10 +39,38 @@ struct ctca_identity_hash { } }; - typedef uint32_t TransID; typedef hphp_hash_set TransIDSet; +const TransID InvalidID = -1LL; + +/** + * The different kinds of translations that the JIT generates: + * + * - Anchor : a service request for retranslating + * - Prolog : function prologue + * - Interp : a service to interpret at least one instruction + * - Live : translate one tracelet by inspecting live VM state + * - Profile : translate one block by inspecting live VM state and + * inserting profiling counters + * - Optimize: translate one region performing optimizations that may + * leverage data collected by Profile translations + */ +#define TRANS_KINDS \ + DO(Anchor) \ + DO(Prolog) \ + DO(Interp) \ + DO(Live) \ + DO(Profile) \ + DO(Optimize) \ + DO(Invalid) \ + +enum TransKind { +#define DO(KIND) Trans##KIND, + TRANS_KINDS +#undef DO +}; + }} #endif diff --git a/hphp/runtime/vm/srckey.h b/hphp/runtime/vm/srckey.h index 86979ef5d..ff4b9d271 100644 --- a/hphp/runtime/vm/srckey.h +++ b/hphp/runtime/vm/srckey.h @@ -131,6 +131,8 @@ struct SrcKey::Hasher { } }; +typedef hphp_hash_set SrcKeySet; + ////////////////////////////////////////////////////////////////////// inline std::string show(SrcKey sk) { diff --git a/hphp/runtime/vm/unit.h b/hphp/runtime/vm/unit.h index 218ae546f..71a5e0be3 100644 --- a/hphp/runtime/vm/unit.h +++ b/hphp/runtime/vm/unit.h @@ -17,7 +17,6 @@ #ifndef incl_HPHP_VM_UNIT_H_ #define incl_HPHP_VM_UNIT_H_ -// Expects that runtime/vm/core_types.h is already included. #include "hphp/runtime/base/runtime_option.h" #include "hphp/runtime/vm/hhbc.h" #include "hphp/runtime/base/complex_types.h" diff --git a/hphp/util/trace.h b/hphp/util/trace.h index 3ddecc9fd..5aa68cd70 100644 --- a/hphp/util/trace.h +++ b/hphp/util/trace.h @@ -89,6 +89,7 @@ namespace Trace { TM(typeProfile) \ TM(hhir) \ TM(printir) \ + TM(pgo) \ TM(hhirTracelets) \ TM(gc) \ TM(instancebits)\