From d078c2b895fa8a474c48f2d3b98292fadae08062 Mon Sep 17 00:00:00 2001 From: Guilherme Ottoni Date: Tue, 4 Jun 2013 17:25:42 -0700 Subject: [PATCH] Add initial framework for PGO in the JIT, and use it for trace-region selection This diff adds an initial framework for profile-guided optimizations in the JIT. Two new translation modes are added, effectively creating a multi-gear JIT: 1. Profile: this collects profiling data, which includes execution counters and type information. 2. Optimize: this uses data collected by Profiling translations to produce optimized translations. Right now, the Optimize gear is solely used to produce larger compilation regions (traces), which increase the scope of optimizations exposed to the JIT (compared to tracelets). This is still work in progress, and it's disabled by default. This diff also fixes a number of bugs exposed by trace regions, gets tools/reduce working again, and makes a number of improvements to tc-print related to the new translation modes. --- hphp/doc/ir.specification | 12 + hphp/runtime/base/program_functions.cpp | 2 +- hphp/runtime/base/runtime_option.cpp | 4 +- hphp/runtime/base/runtime_option.h | 3 + hphp/runtime/base/types.h | 1 + hphp/runtime/vm/func.cpp | 13 + hphp/runtime/vm/func.h | 6 + hphp/runtime/vm/jit/abi-x64.h | 6 + hphp/runtime/vm/jit/code-gen.cpp | 24 ++ hphp/runtime/vm/jit/dce.cpp | 2 +- hphp/runtime/vm/jit/extra-data.h | 26 ++ hphp/runtime/vm/jit/hhbc-translator.cpp | 166 ++++++++----- hphp/runtime/vm/jit/hhbc-translator.h | 43 +++- hphp/runtime/vm/jit/ir-translator.cpp | 120 +++++++-- hphp/runtime/vm/jit/ir.h | 2 + hphp/runtime/vm/jit/prof-data.cpp | 233 ++++++++++++++++++ hphp/runtime/vm/jit/prof-data.h | 137 ++++++++++ hphp/runtime/vm/jit/region-hot-block.cpp | 37 +++ hphp/runtime/vm/jit/region-hot-trace.cpp | 178 +++++++++++++ hphp/runtime/vm/jit/region-method.cpp | 6 +- hphp/runtime/vm/jit/region-onebc.cpp | 2 +- hphp/runtime/vm/jit/region-selection.cpp | 114 +++++++-- hphp/runtime/vm/jit/region-selection.h | 31 ++- hphp/runtime/vm/jit/region-tracelet.cpp | 2 +- hphp/runtime/vm/jit/srcdb.cpp | 10 +- hphp/runtime/vm/jit/srcdb.h | 4 + hphp/runtime/vm/jit/trace-builder.cpp | 51 +++- hphp/runtime/vm/jit/trans-cfg.cpp | 215 ++++++++++++++++ hphp/runtime/vm/jit/trans-cfg.h | 109 ++++++++ .../runtime/vm/jit/translator-x64-helpers.cpp | 4 +- hphp/runtime/vm/jit/translator-x64.cpp | 229 ++++++++++++----- hphp/runtime/vm/jit/translator-x64.h | 57 +++-- hphp/runtime/vm/jit/translator.cpp | 93 +++++-- hphp/runtime/vm/jit/translator.h | 43 +++- hphp/runtime/vm/jit/types.h | 30 ++- hphp/runtime/vm/srckey.h | 2 + hphp/runtime/vm/unit.h | 1 - hphp/util/trace.h | 1 + 38 files changed, 1760 insertions(+), 259 deletions(-) create mode 100644 hphp/runtime/vm/jit/prof-data.cpp create mode 100644 hphp/runtime/vm/jit/prof-data.h create mode 100644 hphp/runtime/vm/jit/region-hot-block.cpp create mode 100644 hphp/runtime/vm/jit/region-hot-trace.cpp create mode 100644 hphp/runtime/vm/jit/trans-cfg.cpp create mode 100644 hphp/runtime/vm/jit/trans-cfg.h diff --git a/hphp/doc/ir.specification b/hphp/doc/ir.specification index b698cdfb1..df8c1f4df 100755 --- a/hphp/doc/ir.specification +++ b/hphp/doc/ir.specification @@ -440,6 +440,12 @@ CheckDefinedClsEq -> L `classPtr'; if they aren't equal or if `className' is not defined, branch to L. +CheckCold -> L + + Check if the counter associated with translation TransID is cold + (i.e. within a fixed threshold). If it's not (i.e. such translation + has reached the "hotness threshold"), then branch to label L. + GuardRefs S0:FuncPtr S1:Int S2:Int S3:Int S4:Int S5:Int Perform reffiness guard checks. Operands: @@ -1223,6 +1229,12 @@ ReqRetranslate This instruction is used in exit traces for a type prediction that occurs at the first bytecode offset of a tracelet. +ReqRetranslateOpt + + Emit a service request to retranslate, with a higher optimization + gear, translation transID, which starts at bcOff. This instruction + is used in exit traces that trigger profile-guided optimizations. + ReqBindJmpGt ReqBindJmpGte ReqBindJmpLt diff --git a/hphp/runtime/base/program_functions.cpp b/hphp/runtime/base/program_functions.cpp index 69236dee6..57d540af7 100644 --- a/hphp/runtime/base/program_functions.cpp +++ b/hphp/runtime/base/program_functions.cpp @@ -533,7 +533,7 @@ void execute_command_line_begin(int argc, char **argv, int xhprof) { void execute_command_line_end(int xhprof, bool coverage, const char *program) { ThreadInfo *ti = ThreadInfo::s_threadInfo.getNoCheck(); - if (RuntimeOption::EvalJit && RuntimeOption::EvalDumpTC) { + if (RuntimeOption::EvalDumpTC) { HPHP::Transl::tc_dump(); } diff --git a/hphp/runtime/base/runtime_option.cpp b/hphp/runtime/base/runtime_option.cpp index 8190606e3..b707b2988 100644 --- a/hphp/runtime/base/runtime_option.cpp +++ b/hphp/runtime/base/runtime_option.cpp @@ -427,8 +427,8 @@ EVALFLAGS(); std::set RuntimeOption::DynamicInvokeFunctions; bool RuntimeOption::RecordCodeCoverage = false; std::string RuntimeOption::CodeCoverageOutputFile; -size_t RuntimeOption::VMTranslAHotSize = 2 << 20; -size_t RuntimeOption::VMTranslASize = 510 << 20; +size_t RuntimeOption::VMTranslAHotSize = 4 << 20; +size_t RuntimeOption::VMTranslASize = 508 << 20; size_t RuntimeOption::VMTranslAStubsSize = 512 << 20; size_t RuntimeOption::VMTranslGDataSize = RuntimeOption::VMTranslASize >> 2; diff --git a/hphp/runtime/base/runtime_option.h b/hphp/runtime/base/runtime_option.h index 1292c14a7..761b200a1 100644 --- a/hphp/runtime/base/runtime_option.h +++ b/hphp/runtime/base/runtime_option.h @@ -445,9 +445,12 @@ public: F(bool, HHIRPredictionOpts, true) \ F(bool, HHIRStressCodegenBlocks, false) \ F(string, JitRegionSelector, regionSelectorDefault()) \ + F(bool, JitPGO, false) \ + F(uint64_t, JitPGOThreshold, 2) \ /* DumpBytecode =1 dumps user php, =2 dumps systemlib & user php */ \ F(int32_t, DumpBytecode, 0) \ F(bool, DumpTC, false) \ + F(bool, DumpTCAnchors, false) \ F(bool, DumpAst, false) \ F(bool, MapTCHuge, true) \ F(uint32_t, TCNumHugeHotMB, 16) \ diff --git a/hphp/runtime/base/types.h b/hphp/runtime/base/types.h index fada97bee..9489a2212 100644 --- a/hphp/runtime/base/types.h +++ b/hphp/runtime/base/types.h @@ -466,6 +466,7 @@ const Id kInvalidId = Id(-1); // offsets. typedef int32_t Offset; constexpr Offset kInvalidOffset = std::numeric_limits::max(); +typedef hphp_hash_set OffsetSet; /* * Various fields in the VM's runtime have indexes that are addressed diff --git a/hphp/runtime/vm/func.cpp b/hphp/runtime/vm/func.cpp index c4977e745..898b62686 100644 --- a/hphp/runtime/vm/func.cpp +++ b/hphp/runtime/vm/func.cpp @@ -666,6 +666,19 @@ void Func::getFuncInfo(ClassInfo::MethodInfo* mi) const { } } +DVFuncletsVec Func::getDVFunclets() const { + DVFuncletsVec dvs; + int nParams = numParams(); + for (int i = 0; i < nParams; ++i) { + const ParamInfo& pi = params()[i]; + if (pi.hasDefaultValue()) { + dvs.push_back(std::make_pair(i, pi.funcletOff())); + } + } + return dvs; +} + + Func::SharedData::SharedData(PreClass* preClass, Id id, Offset base, Offset past, int line1, int line2, bool top, const StringData* docComment) diff --git a/hphp/runtime/vm/func.h b/hphp/runtime/vm/func.h index cf9c6c770..3a6509d5a 100644 --- a/hphp/runtime/vm/func.h +++ b/hphp/runtime/vm/func.h @@ -37,6 +37,11 @@ class PreClassEmitter; typedef uint32_t FuncId; constexpr FuncId InvalidFuncId = FuncId(-1LL); +/* + * Vector of pairs (param number, offset of corresponding DV funclet). + */ +typedef std::vector > DVFuncletsVec; + /* * Metadata about a php function or object method. */ @@ -248,6 +253,7 @@ struct Func { HphpArray* getStaticLocals() const; void getFuncInfo(ClassInfo::MethodInfo* mi) const; + DVFuncletsVec getDVFunclets() const; Unit* unit() const { return m_unit; } PreClass* preClass() const { return shared()->m_preClass; } diff --git a/hphp/runtime/vm/jit/abi-x64.h b/hphp/runtime/vm/jit/abi-x64.h index 0cca74554..df7241bbb 100644 --- a/hphp/runtime/vm/jit/abi-x64.h +++ b/hphp/runtime/vm/jit/abi-x64.h @@ -227,6 +227,12 @@ const int kNumServiceReqArgRegs = */ \ REQ(RETRANSLATE) \ \ + /* + * When PGO is enabled, this retranslates previous translations leveraging + * profiling data. + */ \ + REQ(RETRANSLATE_OPT) \ + \ /* * If the max translations is reached for a SrcKey, the last * translation in the chain will jump to an interpret request stub. diff --git a/hphp/runtime/vm/jit/code-gen.cpp b/hphp/runtime/vm/jit/code-gen.cpp index 56e2e0766..48de69459 100755 --- a/hphp/runtime/vm/jit/code-gen.cpp +++ b/hphp/runtime/vm/jit/code-gen.cpp @@ -782,7 +782,11 @@ void CodeGenerator::emitReqBindJcc(ConditionCode cc, extra->notTaken, cc, m_tx64->ccArgInfo(cc)); + + tx64->setJmpTransID(a.frontier()); a. jcc (cc, jccStub); + + tx64->setJmpTransID(a.frontier()); a. jmp (jccStub); } @@ -2580,6 +2584,8 @@ void CodeGenerator::cgRetCtrl(IRInstruction* inst) { void CodeGenerator::emitReqBindAddr(const Func* func, TCA& dest, Offset offset) { + tx64->setJmpTransID((TCA)&dest); + dest = m_tx64->emitServiceReq(REQ_BIND_ADDR, &dest, offset); @@ -2600,6 +2606,8 @@ void CodeGenerator::cgJmpSwitchDest(IRInstruction* inst) { TCA def = m_tx64->emitServiceReq(REQ_BIND_JMPCC_SECOND, m_as.frontier(), data->defaultOff, CC_AE); + tx64->setJmpTransID(m_as.frontier()); + m_as. jae(def); } @@ -2913,6 +2921,12 @@ void CodeGenerator::cgReqRetranslateNoIR(IRInstruction* inst) { m_tx64->emitReqRetransNoIR(m_as, dest); } +void CodeGenerator::cgReqRetranslateOpt(IRInstruction* inst) { + auto extra = inst->extra(); + auto sk = SrcKey(curFunc(), extra->offset); + m_tx64->emitReqRetransOpt(m_as, sk, extra->transId); +} + void CodeGenerator::cgReqRetranslate(IRInstruction* inst) { auto const destSK = SrcKey(curFunc(), m_curTrace->bcOff()); auto const destSR = m_tx64->getSrcRec(destSK); @@ -5251,6 +5265,16 @@ void CodeGenerator::cgExitOnVarEnv(IRInstruction* inst) { emitFwdJcc(CC_NE, label); } +void CodeGenerator::cgCheckCold(IRInstruction* inst) { + Block* label = inst->taken(); + TransID transId = inst->extra()->transId; + auto counterAddr = m_tx64->profData()->transCounterAddr(transId); + + emitLoadImm(m_as, uint64_t(counterAddr), m_rScratch); + m_as.decq(m_rScratch[0]); + emitFwdJcc(CC_LE, label); +} + void CodeGenerator::cgReleaseVVOrExit(IRInstruction* inst) { auto* const label = inst->taken(); auto const rFp = m_regs[inst->src(0)].reg(); diff --git a/hphp/runtime/vm/jit/dce.cpp b/hphp/runtime/vm/jit/dce.cpp index 6c25c33cb..4e4048b95 100644 --- a/hphp/runtime/vm/jit/dce.cpp +++ b/hphp/runtime/vm/jit/dce.cpp @@ -493,7 +493,7 @@ void consumeIncRef(const IRInstruction* consumer, const SSATmp* src, if ((srcInst->op() == CheckType || srcInst->op() == AssertType) && srcInst->typeParam().maybeCounted()) { // srcInst is a CheckType/AsserType that guards to a refcounted type. We - // need to trace through to its source. If the instruciton guards to a + // need to trace through to its source. If the instruction guards to a // non-refcounted type then the reference is consumed by CheckType itself. consumeIncRef(consumer, srcInst->src(0), state); return; diff --git a/hphp/runtime/vm/jit/extra-data.h b/hphp/runtime/vm/jit/extra-data.h index f46cc5f75..83f5ebd9f 100644 --- a/hphp/runtime/vm/jit/extra-data.h +++ b/hphp/runtime/vm/jit/extra-data.h @@ -18,6 +18,7 @@ #define incl_HPHP_VM_EXTRADATA_H_ #include "hphp/runtime/vm/jit/ir.h" +#include "hphp/runtime/vm/jit/types.h" namespace HPHP { namespace JIT { @@ -255,6 +256,29 @@ struct BCOffset : IRExtraData { Offset offset; }; +/* + * Translation IDs. + */ +struct TransIDData : IRExtraData { + explicit TransIDData(Transl::TransID transId) : transId(transId) {} + std::string show() const { return folly::to(transId); } + Transl::TransID transId; +}; + +/* + * Information needed to generate a REQ_RETRANSLATE_OPT service request. + */ +struct ReqRetransOptData : IRExtraData { + explicit ReqRetransOptData(Transl::TransID transId, Offset offset) + : transId(transId) + , offset(offset) {} + std::string show() const { + return folly::to(transId, ", ", offset); + } + Transl::TransID transId; + Offset offset; +}; + /* * DefInlineFP is present when we need to create a frame for inlining. * This instruction also carries some metadata used by tracebuilder to @@ -401,6 +425,8 @@ X(DefInlineFP, DefInlineFPData); X(ReqBindJmp, BCOffset); X(ReqBindJmpNoIR, BCOffset); X(ReqRetranslateNoIR, BCOffset); +X(ReqRetranslateOpt, ReqRetransOptData); +X(CheckCold, TransIDData); X(CallArray, CallArrayData); X(LdClsCns, ClsCnsName); X(LookupClsCns, ClsCnsName); diff --git a/hphp/runtime/vm/jit/hhbc-translator.cpp b/hphp/runtime/vm/jit/hhbc-translator.cpp index 58050c72c..431e0bc9a 100755 --- a/hphp/runtime/vm/jit/hhbc-translator.cpp +++ b/hphp/runtime/vm/jit/hhbc-translator.cpp @@ -868,14 +868,15 @@ void HhbcTranslator::emitReqDoc(const StringData* name) { } template -SSATmp* HhbcTranslator::emitIterInitCommon(int offset, Lambda genFunc) { +SSATmp* HhbcTranslator::emitIterInitCommon(int offset, Lambda genFunc, + bool invertCond) { SSATmp* src = popC(); Type type = src->type(); if (!type.isArray() && type != Type::Obj) { PUNT(IterInit); } SSATmp* res = genFunc(src); - return emitJmpCondHelper(offset, true, res); + return emitJmpCondHelper(offset, !invertCond, res); } template @@ -895,39 +896,40 @@ SSATmp* HhbcTranslator::emitMIterInitCommon(int offset, Lambda genFunc) { void HhbcTranslator::emitIterInit(uint32_t iterId, int offset, - uint32_t valLocalId) { + uint32_t valLocalId, + bool invertCond) { emitIterInitCommon(offset, [&] (SSATmp* src) { - return gen( - IterInit, - Type::Bool, - src, - m_tb->fp(), - cns(iterId), - cns(valLocalId) - ); - }); + return gen(IterInit, + Type::Bool, + src, + m_tb->fp(), + cns(iterId), + cns(valLocalId)); + }, + invertCond); } void HhbcTranslator::emitIterInitK(uint32_t iterId, int offset, uint32_t valLocalId, - uint32_t keyLocalId) { + uint32_t keyLocalId, + bool invertCond) { emitIterInitCommon(offset, [&] (SSATmp* src) { - return gen( - IterInitK, - Type::Bool, - src, - m_tb->fp(), - cns(iterId), - cns(valLocalId), - cns(keyLocalId) - ); - }); + return gen(IterInitK, + Type::Bool, + src, + m_tb->fp(), + cns(iterId), + cns(valLocalId), + cns(keyLocalId)); + }, + invertCond); } void HhbcTranslator::emitIterNext(uint32_t iterId, int offset, - uint32_t valLocalId) { + uint32_t valLocalId, + bool invertCond) { SSATmp* res = gen( IterNext, Type::Bool, @@ -935,13 +937,14 @@ void HhbcTranslator::emitIterNext(uint32_t iterId, cns(iterId), cns(valLocalId) ); - emitJmpCondHelper(offset, false, res); + emitJmpCondHelper(offset, invertCond, res); } void HhbcTranslator::emitIterNextK(uint32_t iterId, int offset, uint32_t valLocalId, - uint32_t keyLocalId) { + uint32_t keyLocalId, + bool invertCond) { SSATmp* res = gen( IterNextK, Type::Bool, @@ -950,48 +953,47 @@ void HhbcTranslator::emitIterNextK(uint32_t iterId, cns(valLocalId), cns(keyLocalId) ); - emitJmpCondHelper(offset, false, res); + emitJmpCondHelper(offset, invertCond, res); } void HhbcTranslator::emitWIterInit(uint32_t iterId, int offset, - uint32_t valLocalId) { + uint32_t valLocalId, + bool invertCond) { emitIterInitCommon( offset, [&] (SSATmp* src) { - return gen( - WIterInit, - Type::Bool, - src, - m_tb->fp(), - cns(iterId), - cns(valLocalId) - ); - } - ); + return gen(WIterInit, + Type::Bool, + src, + m_tb->fp(), + cns(iterId), + cns(valLocalId)); + }, + invertCond); } void HhbcTranslator::emitWIterInitK(uint32_t iterId, int offset, uint32_t valLocalId, - uint32_t keyLocalId) { + uint32_t keyLocalId, + bool invertCond) { emitIterInitCommon( offset, [&] (SSATmp* src) { - return gen( - WIterInitK, - Type::Bool, - src, - m_tb->fp(), - cns(iterId), - cns(valLocalId), - cns(keyLocalId) - ); - } - ); + return gen(WIterInitK, + Type::Bool, + src, + m_tb->fp(), + cns(iterId), + cns(valLocalId), + cns(keyLocalId)); + }, + invertCond); } void HhbcTranslator::emitWIterNext(uint32_t iterId, int offset, - uint32_t valLocalId) { + uint32_t valLocalId, + bool invertCond) { SSATmp* res = gen( WIterNext, Type::Bool, @@ -999,13 +1001,14 @@ void HhbcTranslator::emitWIterNext(uint32_t iterId, cns(iterId), cns(valLocalId) ); - emitJmpCondHelper(offset, false, res); + emitJmpCondHelper(offset, invertCond, res); } void HhbcTranslator::emitWIterNextK(uint32_t iterId, int offset, uint32_t valLocalId, - uint32_t keyLocalId) { + uint32_t keyLocalId, + bool invertCond) { SSATmp* res = gen( WIterNextK, Type::Bool, @@ -1014,7 +1017,7 @@ void HhbcTranslator::emitWIterNextK(uint32_t iterId, cns(valLocalId), cns(keyLocalId) ); - emitJmpCondHelper(offset, false, res); + emitJmpCondHelper(offset, invertCond, res); } void HhbcTranslator::emitMIterInit(uint32_t iterId, @@ -1434,6 +1437,10 @@ void HhbcTranslator::emitIncTransCounter() { m_tb->gen(IncTransCounter); } +void HhbcTranslator::emitCheckCold(TransID transId) { + m_tb->gen(CheckCold, getExitOptTrace(transId), TransIDData(transId)); +} + SSATmp* HhbcTranslator::getStrName(const StringData* knownName) { SSATmp* name = popC(); assert(name->isA(Type::Str) || knownName); @@ -2589,7 +2596,7 @@ void HhbcTranslator::guardTypeLocal(uint32_t locId, Type type) { void HhbcTranslator::guardTypeLocation(const RegionDesc::Location& loc, Type type) { - assert(type.subtypeOf(Type::Gen | Type::Cls)); + assert(type.subtypeOf(Type::Gen)); typedef RegionDesc::Location::Tag T; switch (loc.tag()) { case T::Stack: guardTypeStack(loc.stackOffset(), type); break; @@ -2631,12 +2638,7 @@ void HhbcTranslator::assertTypeLocation(const RegionDesc::Location& loc, } void HhbcTranslator::guardTypeStack(uint32_t stackIndex, Type type) { - // Should not generate guards for class; instead assert their type - if (type.subtypeOf(Type::Cls)) { - assertTypeStack(stackIndex, type); - return; - } - + assert(type.subtypeOf(Type::Gen)); assert(m_evalStack.size() == 0); assert(m_stackDeficit == 0); // This should only be called at the beginning // of a trace, with a clean stack. @@ -2644,6 +2646,7 @@ void HhbcTranslator::guardTypeStack(uint32_t stackIndex, Type type) { } void HhbcTranslator::checkTypeStack(uint32_t idx, Type type, Offset dest) { + assert(type.subtypeOf(Type::Gen)); auto exitTrace = getExitTrace(dest); if (idx < m_evalStack.size()) { FTRACE(1, "checkTypeStack(){}: generating CheckType for {}\n", @@ -3907,15 +3910,15 @@ IRTrace* HhbcTranslator::getExitTrace(Offset targetBcOff /* = -1 */) { IRTrace* HhbcTranslator::getExitTrace(Offset targetBcOff, std::vector& spillValues) { if (targetBcOff == -1) targetBcOff = bcOff(); - return getExitTraceImpl(targetBcOff, ExitFlag::None, spillValues, - CustomExit{}); + return getExitTraceImpl(targetBcOff, ExitFlag::JIT, spillValues, + CustomExit{}); } IRTrace* HhbcTranslator::getExitTraceWarn(Offset targetBcOff, std::vector& spillValues, const StringData* warning) { assert(targetBcOff != -1); - return getExitTraceImpl(targetBcOff, ExitFlag::None, spillValues, + return getExitTraceImpl(targetBcOff, ExitFlag::JIT, spillValues, [&]() -> SSATmp* { gen(RaiseWarning, cns(warning)); return nullptr; @@ -3934,8 +3937,36 @@ IRTrace* HhbcTranslator::makeSideExit(Offset targetBcOff, ExitLambda exit) { IRTrace* HhbcTranslator::getExitSlowTrace() { auto spillValues = peekSpillValues(); - return getExitTraceImpl(bcOff(), ExitFlag::NoIR, spillValues, - CustomExit{}); + return getExitTraceImpl(bcOff(), ExitFlag::Interp, spillValues, + CustomExit{}); +} + +IRTrace* HhbcTranslator::getExitOptTrace(TransID transId) { + auto spillValues = peekSpillValues(); + Offset targetBcOff = bcOff(); + auto const exit = m_tb->makeExitTrace(targetBcOff); + + BCMarker exitMarker; + exitMarker.bcOff = targetBcOff; + exitMarker.spOff = m_tb->spOffset() + spillValues.size() - m_stackDeficit; + exitMarker.func = curFunc(); + + TracePusher tracePusher(*m_tb, exit, exitMarker); + + SSATmp* stack = nullptr; + if (m_stackDeficit != 0 || !spillValues.empty()) { + spillValues.insert(spillValues.begin(), + { m_tb->sp(), cns(int64_t(m_stackDeficit)) }); + stack = gen(SpillStack, + std::make_pair(spillValues.size(), &spillValues[0])); + } else { + stack = m_tb->sp(); + } + + gen(SyncABIRegs, m_tb->fp(), stack); + gen(ReqRetranslateOpt, ReqRetransOptData(transId, targetBcOff)); + + return exit; } IRTrace* HhbcTranslator::getExitTraceImpl(Offset targetBcOff, @@ -3985,7 +4016,7 @@ IRTrace* HhbcTranslator::getExitTraceImpl(Offset targetBcOff, gen(SyncABIRegs, m_tb->fp(), stack); - if (flag == ExitFlag::NoIR) { + if (flag == ExitFlag::Interp) { gen(targetBcOff == m_startBcOff ? ReqRetranslateNoIR : ReqBindJmpNoIR, BCOffset(targetBcOff)); return exit; @@ -3996,7 +4027,6 @@ IRTrace* HhbcTranslator::getExitTraceImpl(Offset targetBcOff, } else { gen(ReqBindJmp, BCOffset(targetBcOff)); } - return exit; } diff --git a/hphp/runtime/vm/jit/hhbc-translator.h b/hphp/runtime/vm/jit/hhbc-translator.h index cfcd5fae4..2f2047bdf 100755 --- a/hphp/runtime/vm/jit/hhbc-translator.h +++ b/hphp/runtime/vm/jit/hhbc-translator.h @@ -355,16 +355,24 @@ struct HhbcTranslator { void emitReqDoc(const StringData* name); // iterators - void emitIterInit(uint32_t iterId, int targetOffset, uint32_t valLocalId); + void emitIterInit(uint32_t iterId, + int targetOffset, + uint32_t valLocalId, + bool invertCond); void emitIterInitK(uint32_t iterId, int targetOffset, uint32_t valLocalId, - uint32_t keyLocalId); - void emitIterNext(uint32_t iterId, int targetOffset, uint32_t valLocalId); + uint32_t keyLocalId, + bool invertCond); + void emitIterNext(uint32_t iterId, + int targetOffset, + uint32_t valLocalId, + bool invertCond); void emitIterNextK(uint32_t iterId, int targetOffset, uint32_t valLocalId, - uint32_t keyLocalId); + uint32_t keyLocalId, + bool invertCond); void emitMIterInit(uint32_t iterId, int targetOffset, uint32_t valLocalId); void emitMIterInitK(uint32_t iterId, int targetOffset, @@ -375,16 +383,24 @@ struct HhbcTranslator { int targetOffset, uint32_t valLocalId, uint32_t keyLocalId); - void emitWIterInit(uint32_t iterId, int targetOffset, uint32_t valLocalId); + void emitWIterInit(uint32_t iterId, + int targetOffset, + uint32_t valLocalId, + bool invertCond); void emitWIterInitK(uint32_t iterId, int targetOffset, uint32_t valLocalId, - uint32_t keyLocalId); - void emitWIterNext(uint32_t iterId, int targetOffset, uint32_t valLocalId); + uint32_t keyLocalId, + bool invertCond); + void emitWIterNext(uint32_t iterId, + int targetOffset, + uint32_t valLocalId, + bool invertCond); void emitWIterNextK(uint32_t iterId, int targetOffset, uint32_t valLocalId, - uint32_t keyLocalId); + uint32_t keyLocalId, + bool invertCond); void emitIterFree(uint32_t iterId); void emitMIterFree(uint32_t iterId); @@ -414,6 +430,7 @@ struct HhbcTranslator { void emitStrlen(); void emitIncStat(int32_t counter, int32_t value, bool force = false); void emitIncTransCounter(); + void emitCheckCold(Transl::TransID transId); void emitArrayIdx(); private: @@ -682,7 +699,7 @@ private: SSATmp* emitIncDec(bool pre, bool inc, SSATmp* src); void emitBinaryArith(Opcode); template - SSATmp* emitIterInitCommon(int offset, Lambda genFunc); + SSATmp* emitIterInitCommon(int offset, Lambda genFunc, bool invertCond); BCMarker makeMarker(Offset bcOff); void updateMarker(); template @@ -724,6 +741,7 @@ private: // Exit trace creation routines. */ IRTrace* getExitSlowTrace(); IRTrace* getCatchTrace(); + IRTrace* getExitOptTrace(Transl::TransID transId); /* * Implementation for the above. Takes spillValues, target offset, @@ -734,16 +752,15 @@ private: // Exit trace creation routines. * on the stack before exiting. */ enum class ExitFlag { - None, - NoIR, - + Interp, // will bail to the interpreter to execute at least one BC instr + JIT, // will attempt to use the JIT to create a new translation // DelayedMarker means to use the current instruction marker // instead of one for targetBcOff. DelayedMarker, }; typedef std::function CustomExit; IRTrace* getExitTraceImpl(Offset targetBcOff, - ExitFlag noIRExit, + ExitFlag flag, std::vector& spillValues, const CustomExit&); diff --git a/hphp/runtime/vm/jit/ir-translator.cpp b/hphp/runtime/vm/jit/ir-translator.cpp index 39fd9409a..e5f97fa0c 100644 --- a/hphp/runtime/vm/jit/ir-translator.cpp +++ b/hphp/runtime/vm/jit/ir-translator.cpp @@ -117,11 +117,16 @@ void IRTranslator::checkType(const Transl::Location& l, using Transl::Location; switch (l.space) { - case Location::Stack: - m_hhbcTrans.guardTypeStack(locPhysicalOffset(l), - Type::fromRuntimeType(rtt)); + case Location::Stack: { + uint32_t stackOffset = locPhysicalOffset(l); + JIT::Type type = JIT::Type::fromRuntimeType(rtt); + if (type.subtypeOf(Type::Cls)) { + m_hhbcTrans.assertTypeStack(stackOffset, type); + } else { + m_hhbcTrans.guardTypeStack(stackOffset, type); + } break; - + } case Location::Local: m_hhbcTrans.guardTypeLocal(l.offset, Type::fromRuntimeType(rtt)); break; @@ -266,12 +271,27 @@ void IRTranslator::translateBranchOp(const NormalizedInstruction& i) { auto const op = i.op(); assert(op == OpJmpZ || op == OpJmpNZ); - assert(!i.next); + Offset takenOffset = i.offset() + i.imm[0].u_BA; + Offset fallthruOffset = i.offset() + instrLen((Op*)(i.pc())); + assert(i.breaksTracelet || + i.nextOffset == takenOffset || + i.nextOffset == fallthruOffset); + + if (i.breaksTracelet || i.nextOffset == fallthruOffset) { + if (op == OpJmpZ) { + HHIR_EMIT(JmpZ, takenOffset); + } else { + HHIR_EMIT(JmpNZ, takenOffset); + } + return; + } + assert(i.nextOffset == takenOffset); + // invert the branch if (op == OpJmpZ) { - HHIR_EMIT(JmpZ, i.offset() + i.imm[0].u_BA); + HHIR_EMIT(JmpNZ, fallthruOffset); } else { - HHIR_EMIT(JmpNZ, i.offset() + i.imm[0].u_BA); + HHIR_EMIT(JmpZ, fallthruOffset); } } @@ -1382,40 +1402,78 @@ IRTranslator::translateInstanceOfD(const NormalizedInstruction& i) { HHIR_EMIT(InstanceOfD, (i.imm[0].u_SA)); } +/* + * This function returns the offset of instruction i's branch target. + * This is normally the offset corresponding to the branch being + * taken. However, if i does not break a trace and it's followed in + * the trace by the instruction in the taken branch, then this + * function returns the offset of the i's fall-through instruction. + * In that case, the invertCond output argument is set to true; + * otherwise it's set to false. + */ +static Offset getBranchTarget(const NormalizedInstruction& i, + bool& invertCond) { + assert(instrJumpOffset((Op*)(i.pc())) != nullptr); + Offset targetOffset = i.offset() + i.imm[1].u_BA; + invertCond = false; + + if (!i.breaksTracelet && i.nextOffset == targetOffset) { + invertCond = true; + Offset fallthruOffset = i.offset() + instrLen((Op*)i.pc()); + targetOffset = fallthruOffset; + } + + return targetOffset; +} + void IRTranslator::translateIterInit(const NormalizedInstruction& i) { + bool invertCond = false; + Offset targetOffset = getBranchTarget(i, invertCond); + HHIR_EMIT(IterInit, i.imm[0].u_IVA, - i.offset() + i.imm[1].u_BA, - i.imm[2].u_IVA); + targetOffset, + i.imm[2].u_IVA, + invertCond); } void IRTranslator::translateIterInitK(const NormalizedInstruction& i) { + bool invertCond = false; + Offset targetOffset = getBranchTarget(i, invertCond); + HHIR_EMIT(IterInitK, i.imm[0].u_IVA, - i.offset() + i.imm[1].u_BA, + targetOffset, i.imm[2].u_IVA, - i.imm[3].u_IVA); + i.imm[3].u_IVA, + invertCond); } void IRTranslator::translateIterNext(const NormalizedInstruction& i) { + bool invertCond = false; + Offset targetOffset = getBranchTarget(i, invertCond); HHIR_EMIT(IterNext, i.imm[0].u_IVA, - i.offset() + i.imm[1].u_BA, - i.imm[2].u_IVA); + targetOffset, + i.imm[2].u_IVA, + invertCond); } void IRTranslator::translateIterNextK(const NormalizedInstruction& i) { + bool invertCond = false; + Offset targetOffset = getBranchTarget(i, invertCond); HHIR_EMIT(IterNextK, i.imm[0].u_IVA, - i.offset() + i.imm[1].u_BA, + targetOffset, i.imm[2].u_IVA, - i.imm[3].u_IVA); + i.imm[3].u_IVA, + invertCond); } void @@ -1456,38 +1514,52 @@ IRTranslator::translateMIterNextK(const NormalizedInstruction& i) { void IRTranslator::translateWIterInit(const NormalizedInstruction& i) { + bool invertCond = false; + Offset targetOffset = getBranchTarget(i, invertCond); + HHIR_EMIT(WIterInit, i.imm[0].u_IVA, - i.offset() + i.imm[1].u_BA, - i.imm[2].u_IVA); + targetOffset, + i.imm[2].u_IVA, + invertCond); } void IRTranslator::translateWIterInitK(const NormalizedInstruction& i) { + bool invertCond = false; + Offset targetOffset = getBranchTarget(i, invertCond); + HHIR_EMIT(WIterInitK, i.imm[0].u_IVA, - i.offset() + i.imm[1].u_BA, + targetOffset, i.imm[2].u_IVA, - i.imm[3].u_IVA); + i.imm[3].u_IVA, + invertCond); } void IRTranslator::translateWIterNext(const NormalizedInstruction& i) { + bool invertCond = false; + Offset targetOffset = getBranchTarget(i, invertCond); HHIR_EMIT(WIterNext, i.imm[0].u_IVA, - i.offset() + i.imm[1].u_BA, - i.imm[2].u_IVA); + targetOffset, + i.imm[2].u_IVA, + invertCond); } void IRTranslator::translateWIterNextK(const NormalizedInstruction& i) { + bool invertCond = false; + Offset targetOffset = getBranchTarget(i, invertCond); HHIR_EMIT(WIterNextK, i.imm[0].u_IVA, - i.offset() + i.imm[1].u_BA, + targetOffset, i.imm[2].u_IVA, - i.imm[3].u_IVA); + i.imm[3].u_IVA, + invertCond); } void @@ -1613,6 +1685,8 @@ void IRTranslator::translateInstr(const NormalizedInstruction& i) { FTRACE(1, "\n{:-^60}\n", folly::format("translating {} with stack:\n{}", i.toString(), m_hhbcTrans.showStack())); + // When profiling, we disable type predictions to avoid side exits + assert(Transl::tx64->mode() != TransProfile || !i.outputPredicted); m_hhbcTrans.setBcOff(i.source.offset(), i.breaksTracelet && !m_hhbcTrans.isInlining()); diff --git a/hphp/runtime/vm/jit/ir.h b/hphp/runtime/vm/jit/ir.h index 345074e33..3bce1ce14 100755 --- a/hphp/runtime/vm/jit/ir.h +++ b/hphp/runtime/vm/jit/ir.h @@ -316,6 +316,7 @@ O(RaiseError, ND, S(Str), E|N|Mem|Refs|T|Er) \ O(RaiseWarning, ND, S(Str), E|N|Mem|Refs|Er) \ O(CheckInit, ND, S(Gen), NF) \ O(CheckInitMem, ND, S(PtrToGen) C(Int), NF) \ +O(CheckCold, ND, NA, E) \ O(AssertNonNull, DSubtract(0, Nullptr), S(Nullptr,CountedStr), NF) \ O(Unbox, DUnbox(0), S(Gen), NF) \ O(Box, DBox(0), S(Init), E|N|Mem|CRc|PRc) \ @@ -423,6 +424,7 @@ O(ExceptionBarrier, D(StkPtr), S(StkPtr), E) \ O(ReqBindJmp, ND, NA, T|E) \ O(ReqBindJmpNoIR, ND, NA, T|E) \ O(ReqRetranslateNoIR, ND, NA, T|E) \ +O(ReqRetranslateOpt, ND, NA, T|E) \ O(ReqRetranslate, ND, NA, T|E) \ O(SyncABIRegs, ND, S(FramePtr) S(StkPtr), E) \ O(Mov, DofS(0), SUnk, C|P) \ diff --git a/hphp/runtime/vm/jit/prof-data.cpp b/hphp/runtime/vm/jit/prof-data.cpp new file mode 100644 index 000000000..30793ae90 --- /dev/null +++ b/hphp/runtime/vm/jit/prof-data.cpp @@ -0,0 +1,233 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +#include "hphp/runtime/vm/jit/prof-data.h" + +#include +#include "hphp/util/base.h" +#include "hphp/runtime/vm/jit/translator.h" +#include "hphp/runtime/vm/jit/region-selection.h" + +namespace HPHP { +namespace JIT { + +static const Trace::Module TRACEMOD = Trace::pgo; + +using Transl::Tracelet; +using Transl::TransAnchor; +using Transl::TransProlog; +using Transl::TransProfile; + +/////////// Counters ////////// + +template +T ProfCounters::get(uint32_t id) const { + if (id / kCountersPerChunk >= m_chunks.size()) { + return m_initVal; + } + return m_chunks[id / kCountersPerChunk][id % kCountersPerChunk]; +} + +template +T* ProfCounters::getAddr(uint32_t id) { + // allocate a new chunk of counters if necessary + if (id >= m_chunks.size() * kCountersPerChunk) { + uint32_t size = sizeof(T) * kCountersPerChunk; + T* chunk = (T*)malloc(size); + std::fill_n(chunk, kCountersPerChunk, m_initVal); + m_chunks.push_back(chunk); + } + assert(id / kCountersPerChunk < m_chunks.size()); + return &(m_chunks[id / kCountersPerChunk][id % kCountersPerChunk]); +} + +/////////// ProfTransRec ////////// + +ProfTransRec::ProfTransRec(TransID id, + TransKind kind, + Offset lastBcOff, + const SrcKey& sk, + RegionDesc::BlockPtr block) + : m_id(id) + , m_kind(kind) + , m_lastBcOff(lastBcOff) + , m_block(block) + , m_sk(sk) { + assert(block == nullptr || block->start() == sk); +} + +ProfTransRec::ProfTransRec(TransID id, + TransKind kind, + const SrcKey& sk) + : m_id(id) + , m_kind(kind) + , m_lastBcOff(-1) + , m_block(nullptr) + , m_sk(sk) { + assert(kind == TransAnchor || kind == TransProlog); +} + +TransID ProfTransRec::transId() const { + return m_id; +} + +TransKind ProfTransRec::kind() const { + return m_kind; +} + +SrcKey ProfTransRec::srcKey() const { + return m_sk; +} + +Offset ProfTransRec::startBcOff() const { + return m_block->start().offset();; +} + +Offset ProfTransRec::lastBcOff() const { + return m_lastBcOff; +} + +Func* ProfTransRec::func() const { + return const_cast(m_block->func()); +} + +FuncId ProfTransRec::funcId() const { + return m_sk.getFuncId(); +} + +RegionDesc::BlockPtr ProfTransRec::block() const { + return m_block; +} + +/////////// ProfData ////////// + +ProfData::ProfData() + : m_numTrans(0) + , m_counters(RuntimeOption::EvalJitPGOThreshold) { +} + +uint32_t ProfData::numTrans() const { + return m_numTrans; +} + +TransID ProfData::curTransID() const { + return numTrans(); +} + +SrcKey ProfData::transSrcKey(TransID id) const { + assert(id < m_transRecs.size()); + return m_transRecs[id]->srcKey(); +} + +Offset ProfData::transStartBcOff(TransID id) const { + assert(id < m_transRecs.size()); + return m_transRecs[id]->startBcOff(); +} + +Offset ProfData::transLastBcOff(TransID id) const { + assert(id < m_transRecs.size()); + return m_transRecs[id]->lastBcOff(); +} + +Op* ProfData::transLastInstr(TransID id) const { + Unit* unit = transFunc(id)->unit(); + Offset lastBcOff = transLastBcOff(id); + return (Op*)(unit->at(lastBcOff)); +} + +Offset ProfData::transStopBcOff(TransID id) const { + Unit* unit = m_transRecs[id]->func()->unit(); + Offset lastBcOff = transLastBcOff(id); + return lastBcOff + instrLen((Op*)(unit->at(lastBcOff))); +} + +FuncId ProfData::transFuncId(TransID id) const { + assert(id < m_transRecs.size()); + return m_transRecs[id]->funcId(); +} + +Func* ProfData::transFunc(TransID id) const { + assert(id < m_transRecs.size()); + return m_transRecs[id]->func(); +} + +TransKind ProfData::transKind(TransID id) const { + assert(id < m_numTrans); + return m_transRecs[id]->kind(); +} + +int64_t ProfData::transCounter(TransID id) const { + assert(id < m_numTrans); + return m_counters.get(id); +} + +int64_t* ProfData::transCounterAddr(TransID id) { + return m_counters.getAddr(id); +} + +bool ProfData::optimized(const SrcKey& sk) const { + return mapContains(m_optimized, sk); +} + +void ProfData::setOptimized(const SrcKey& sk) { + m_optimized.insert(sk); +} + +RegionDesc::BlockPtr ProfData::transBlock(TransID id) const { + assert(id < m_transRecs.size()); + const ProfTransRec& pTransRec = *m_transRecs[id]; + return pTransRec.block(); +} + +/* + * Temporary work-around. + * + * TODO: get rid of this once translateRegion supports inlining + */ +static bool supportedTracelet(TransID transId, const Tracelet& tlet) { + for (auto instr = tlet.m_instrStream.first; instr; instr = instr->next) { + if (instr->calleeTrace) { + FTRACE(5, "supportedTracelet: unsupported {}: has inlining\n", transId); + return false; + } + } + + return true; +} + +TransID ProfData::addTrans(const Tracelet& tracelet, TransKind kind) { + TransID transId = m_numTrans++; + Offset lastBcOff = tracelet.m_instrStream.last->source.offset(); + auto block = kind == TransProfile && supportedTracelet(transId, tracelet) ? + createBlock(tracelet) : nullptr; + m_transRecs.emplace_back(new ProfTransRec(transId, kind, lastBcOff, + tracelet.m_sk, block)); + return transId; +} + +TransID ProfData::addTransProlog(const SrcKey& sk) { + TransID transId = m_numTrans++; + m_transRecs.emplace_back(new ProfTransRec(transId, TransProlog, sk)); + return transId; +} + +TransID ProfData::addTransAnchor(const SrcKey& sk) { + TransID transId = m_numTrans++; + m_transRecs.emplace_back(new ProfTransRec(transId, TransAnchor, sk)); + return transId; +} + +} } diff --git a/hphp/runtime/vm/jit/prof-data.h b/hphp/runtime/vm/jit/prof-data.h new file mode 100644 index 000000000..be4e12feb --- /dev/null +++ b/hphp/runtime/vm/jit/prof-data.h @@ -0,0 +1,137 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +#ifndef incl_HPHP_PROF_TRANS_DATA_H_ +#define incl_HPHP_PROF_TRANS_DATA_H_ + +#include + +#include "hphp/util/base.h" +#include "hphp/runtime/base/types.h" +#include "hphp/runtime/vm/func.h" +#include "hphp/runtime/vm/srckey.h" +#include "hphp/runtime/vm/jit/types.h" +#include "hphp/runtime/vm/jit/runtime-type.h" +#include "hphp/runtime/vm/jit/region-selection.h" + +namespace HPHP { +namespace JIT { + +using Transl::TransID; +using Transl::TransKind; +using Transl::Tracelet; + +/** + * A simple class of a growable number of profiling counters with + * fixed addresses, suitable for being incremented from the TC. + */ +template +class ProfCounters { + public: + explicit ProfCounters(T initVal) + : m_initVal(initVal) + {} + + ProfCounters(const ProfCounters&) = delete; + ProfCounters& operator=(const ProfCounters&) = delete; + + ~ProfCounters() { + for (size_t i = 0; i < m_chunks.size(); i++) { + free(m_chunks[i]); + } + } + + T get(uint32_t id) const; + T* getAddr(uint32_t id); + + private: + static const uint32_t kCountersPerChunk = 2 * 1024 * 1024 / sizeof(T); + + T m_initVal; + vector m_chunks; +}; + + +/** + * A profiling record kept for each translation in JitPGO mode. + */ +class ProfTransRec { + public: + ProfTransRec(TransID id, TransKind kind, Offset lastBcOff, const SrcKey& sk, + RegionDesc::BlockPtr block); + ProfTransRec(TransID id, TransKind kind, const SrcKey& sk); + + TransID transId() const; + TransKind kind() const; + SrcKey srcKey() const; + Offset startBcOff() const; + Offset lastBcOff() const; + Func* func() const; + FuncId funcId() const; + RegionDesc::BlockPtr block() const; + + private: + TransID m_id; // sequential ID of the assiciated translation + TransKind m_kind; + Offset m_lastBcOff; // offset of the last bytecode instr + RegionDesc::BlockPtr m_block; + SrcKey m_sk; +}; + +typedef std::unique_ptr ProfTransRecPtr; + +/** + * ProfData encapsulates the profiling data kept by the JIT. + */ +class ProfData { +public: + ProfData(); + + ProfData(const ProfData&) = delete; + ProfData& operator=(const ProfData&) = delete; + + TransID numTrans() const; + TransID curTransID() const; + + SrcKey transSrcKey(TransID id) const; + Offset transStartBcOff(TransID id) const; + Offset transLastBcOff(TransID id) const; + Op* transLastInstr(TransID id) const; + Offset transStopBcOff(TransID id) const; + FuncId transFuncId(TransID id) const; + Func* transFunc(TransID id) const; + RegionDesc::BlockPtr transBlock(TransID id) const; + TransKind transKind(TransID id) const; + int64_t transCounter(TransID id) const; + int64_t* transCounterAddr(TransID id); + + TransID addTrans(const Tracelet& tracelet, TransKind kind); + TransID addTransProlog(const SrcKey& sk); + TransID addTransAnchor(const SrcKey& sk); + + bool optimized(const SrcKey& sk) const; + void setOptimized(const SrcKey& sk); + +private: + uint32_t m_numTrans; + vector m_transRecs; + ProfCounters m_counters; + SrcKeySet m_optimized; // set of SrcKeys already optimized +}; + +} } + +#endif diff --git a/hphp/runtime/vm/jit/region-hot-block.cpp b/hphp/runtime/vm/jit/region-hot-block.cpp new file mode 100644 index 000000000..effbaf131 --- /dev/null +++ b/hphp/runtime/vm/jit/region-hot-block.cpp @@ -0,0 +1,37 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +#include "hphp/runtime/vm/jit/trans-cfg.h" +#include "hphp/runtime/vm/jit/translator-inline.h" + +namespace HPHP { +namespace JIT { + +RegionDescPtr selectHotBlock(TransID transId, + const ProfData* profData, + const TransCFG& cfg) { + RegionDescPtr region = smart::make_unique(); + + RegionDesc::BlockPtr block = profData->transBlock(transId); + + if (block != nullptr) { + region->blocks.emplace_back(block); + } + + return region; +} + +} } diff --git a/hphp/runtime/vm/jit/region-hot-trace.cpp b/hphp/runtime/vm/jit/region-hot-trace.cpp new file mode 100644 index 000000000..96cdbe68d --- /dev/null +++ b/hphp/runtime/vm/jit/region-hot-trace.cpp @@ -0,0 +1,178 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +#include "hphp/runtime/vm/jit/trans-cfg.h" +#include "hphp/runtime/vm/jit/translator-inline.h" + +namespace HPHP { +namespace JIT { + +static const Trace::Module TRACEMOD = Trace::pgo; + +/** + * This function returns true for control-flow bytecode instructions that + * are not support in the middle of a region yet. + */ +static bool breaksRegion(Op opc) { + switch (opc) { + case OpMIterNext: + case OpMIterNextK: + case OpSwitch: + case OpSSwitch: + case OpContSuspend: + case OpContRetC: + case OpRetC: + case OpRetV: + case OpExit: + case OpFatal: + case OpMIterInit: + case OpMIterInitK: + case OpIterBreak: + case OpDecodeCufIter: + case OpThrow: + case OpUnwind: + case OpEval: + case OpNativeImpl: + case OpContHandle: + return true; + + default: + return false; + } +} + +/** + * Returns the set of bytecode offsets for the instructions that may + * be executed immediately after opc. + */ +static OffsetSet findSuccOffsets(Op* opc, const Unit* unit) { + OffsetSet succBcOffs; + Op* bcStart = (Op*)(unit->entry()); + + if (!instrIsControlFlow(*opc)) { + Offset succOff = opc + instrLen(opc) - bcStart; + succBcOffs.insert(succOff); + return succBcOffs; + } + + if (instrAllowsFallThru(*opc)) { + Offset succOff = opc + instrLen(opc) - bcStart; + succBcOffs.insert(succOff); + } + + if (isSwitch(*opc)) { + foreachSwitchTarget(opc, [&](Offset& offset) { + succBcOffs.insert(offset); + }); + } else { + Offset target = instrJumpTarget(bcStart, opc - bcStart); + if (target != InvalidAbsoluteOffset) { + succBcOffs.insert(target); + } + } + return succBcOffs; +} + +RegionDescPtr selectHotTrace(TransID triggerId, + const ProfData* profData, + TransCFG& cfg, + TransIDSet& selectedSet) { + JIT::RegionDescPtr region = smart::make_unique(); + TransID tid = triggerId; + TransID prevId = InvalidID; + selectedSet.clear(); + + while (!setContains(selectedSet, tid)) { + + RegionDesc::BlockPtr block = profData->transBlock(tid); + if (block == nullptr) break; + + // If the debugger is attached, only allow single-block regions. + if (prevId != InvalidID && isDebuggerAttachedProcess()) { + FTRACE(5, "selectHotRegion: breaking region at Translation {} " + "because of debugger is attached\n", tid); + break; + } + + // Break if block is not the first and requires reffiness checks. + // Task #2589970: fix translateRegion to support mid-region reffiness checks + if (prevId != InvalidID) { + auto nRefDeps = block->reffinessPreds().size(); + if (nRefDeps > 0) { + FTRACE(5, "selectHotRegion: breaking region because of refDeps ({}) at " + "Translation {}\n", nRefDeps, tid); + break; + } + } + + // Break trace if translation tid cannot follow the execution of + // the entire translation prevTd. This can only happen if the + // execution of prevId takes a side exit that leads to the + // execution of tid. + if (prevId != InvalidID) { + Op* lastInstr = profData->transLastInstr(prevId); + const Unit* unit = profData->transFunc(prevId)->unit(); + OffsetSet succOffs = findSuccOffsets(lastInstr, unit); + if (!setContains(succOffs, profData->transSrcKey(tid).offset())) { + if (HPHP::Trace::moduleEnabled(HPHP::Trace::pgo, 5)) { + FTRACE(5, "selectHotTrace: WARNING: Breaking region @: {}\n", + JIT::show(*region)); + FTRACE(5, "selectHotTrace: next translation selected: tid = {}\n{}\n", + tid, JIT::show(*block)); + std::string succStr("succOffs = "); + for (auto succ : succOffs) { + succStr += lexical_cast(succ); + } + FTRACE(5, "\n{}\n", succStr); + } + break; + } + } + region->blocks.emplace_back(block); + selectedSet.insert(tid); + + Op lastOp = *(profData->transLastInstr(tid)); + if (breaksRegion(lastOp)) { + FTRACE(5, "selectHotTrace: breaking region because of last instruction " + "in Translation {}: {}\n", tid, opcodeToName(lastOp)); + break; + } + + auto outArcs = cfg.outArcs(tid); + if (outArcs.size() == 0) { + FTRACE(5, "selectHotTrace: breaking region because there's no successor " + "for Translation {}\n", tid); + break; + } + + auto maxWeight = std::numeric_limits::min(); + TransCFG::Arc* maxArc = nullptr; + for (auto arc : outArcs) { + if (arc->weight() >= maxWeight) { + maxWeight = arc->weight(); + maxArc = arc; + } + } + assert(maxArc != nullptr); + + prevId = tid; + tid = maxArc->dst(); + } + + return region; +} + +} } diff --git a/hphp/runtime/vm/jit/region-method.cpp b/hphp/runtime/vm/jit/region-method.cpp index 52aaf4c96..913bb2d0a 100644 --- a/hphp/runtime/vm/jit/region-method.cpp +++ b/hphp/runtime/vm/jit/region-method.cpp @@ -54,12 +54,12 @@ int numInstrs(PC start, PC end) { * back to the tracelet compiler. (This will happen for side-exits * from method regions, for example.) */ -RegionDescPtr regionMethod(const RegionContext& context) { +RegionDescPtr selectMethod(const RegionContext& context) { using namespace HPHP::Verifier; if (!isFuncEntry(context.func, context.bcOffset)) return nullptr; - FTRACE(1, "function entry for {}: using regionMethod\n", - context.func->fullName()->data()); + FTRACE(1, "function entry for {}: using selectMethod\n", + context.func->fullName()->data()); auto ret = smart::make_unique(); diff --git a/hphp/runtime/vm/jit/region-onebc.cpp b/hphp/runtime/vm/jit/region-onebc.cpp index 31056ed9c..e11dc5428 100644 --- a/hphp/runtime/vm/jit/region-onebc.cpp +++ b/hphp/runtime/vm/jit/region-onebc.cpp @@ -25,7 +25,7 @@ namespace HPHP { namespace JIT { * A dummy (debugging) region selector that just uses a single HHBC * opcode as the region, and guards on everything. */ -RegionDescPtr regionOneBC(const RegionContext& ctx) { +RegionDescPtr selectOneBC(const RegionContext& ctx) { auto ret = smart::make_unique(); auto blk = smart::make_unique(ctx.func, ctx.bcOffset, 1); diff --git a/hphp/runtime/vm/jit/region-selection.cpp b/hphp/runtime/vm/jit/region-selection.cpp index 1f7dfe41a..a3c90b3e3 100644 --- a/hphp/runtime/vm/jit/region-selection.cpp +++ b/hphp/runtime/vm/jit/region-selection.cpp @@ -25,36 +25,57 @@ #include "hphp/util/map_walker.h" #include "hphp/runtime/base/runtime_option.h" #include "hphp/runtime/vm/jit/translator.h" +#include "hphp/runtime/vm/jit/trans-cfg.h" +#include "hphp/runtime/vm/jit/translator-inline.h" namespace HPHP { namespace JIT { TRACE_SET_MOD(region); +using Transl::TransID; +using Transl::TranslatorX64; + ////////////////////////////////////////////////////////////////////// -extern RegionDescPtr regionMethod(const RegionContext&); -extern RegionDescPtr regionOneBC(const RegionContext&); -extern RegionDescPtr regionTracelet(const RegionContext&); +extern RegionDescPtr selectMethod(const RegionContext&); +extern RegionDescPtr selectOneBC(const RegionContext&); +extern RegionDescPtr selectTracelet(const RegionContext&); +extern RegionDescPtr selectHotBlock(TransID transId, + const ProfData* profData, + const TransCFG& cfg); +extern RegionDescPtr selectHotTrace(TransID triggerId, + const ProfData* profData, + TransCFG& cfg, + TransIDSet& selectedSet); ////////////////////////////////////////////////////////////////////// namespace { enum class RegionMode { - None, - OneBC, - Method, - Tracelet, - Legacy, + None, // empty region + + // Modes that create a region by inspecting live VM state + OneBC, // region with a single bytecode instruction + Method, // region with a whole method + Tracelet, // single-entry, multiple-exits region that ends on conditional + // branches or when an instruction consumes a value of unknown type + Legacy, // same as Tracelet, but using the legacy analyze() code + + // Modes that create a region by leveraging profiling data + HotBlock, // single-entry, single-exit region + HotTrace, // single-entry, multiple-exits region }; RegionMode regionMode() { auto& s = RuntimeOption::EvalJitRegionSelector; - if (s == "") return RegionMode::None; - if (s == "onebc") return RegionMode::OneBC; - if (s == "method") return RegionMode::Method; + if (s == "" ) return RegionMode::None; + if (s == "onebc" ) return RegionMode::OneBC; + if (s == "method" ) return RegionMode::Method; if (s == "tracelet") return RegionMode::Tracelet; - if (s == "legacy") return RegionMode::Legacy; + if (s == "legacy" ) return RegionMode::Legacy; + if (s == "hotblock") return RegionMode::HotBlock; + if (s == "hottrace") return RegionMode::HotTrace; FTRACE(1, "unknown region mode {}: using none\n", s); if (debug) abort(); return RegionMode::None; @@ -163,7 +184,7 @@ void RegionDesc::Block::checkInvariants() const { ////////////////////////////////////////////////////////////////////// namespace { -RegionDescPtr createRegion(const Transl::Tracelet& tlet) { +RegionDescPtr selectTraceletLegacy(const Transl::Tracelet& tlet) { typedef Transl::NormalizedInstruction NI; typedef RegionDesc::Block Block; @@ -176,7 +197,7 @@ RegionDescPtr createRegion(const Transl::Tracelet& tlet) { Block* curBlock; auto newBlock = [&] { region->blocks.push_back( - smart::make_unique(tlet.m_func, sk.offset(), 0)); + std::make_shared(tlet.m_func, sk.offset(), 0)); curBlock = region->blocks.back().get(); }; newBlock(); @@ -253,6 +274,15 @@ RegionDescPtr createRegion(const Transl::Tracelet& tlet) { } } +RegionDesc::BlockPtr createBlock(const Transl::Tracelet& tlet) { + RegionDescPtr region = selectTraceletLegacy(tlet); + + if (region == nullptr) return nullptr; + + always_assert(region->blocks.size() == 1); + return region->blocks.front(); +} + RegionDescPtr selectRegion(const RegionContext& context, const Transl::Tracelet* t) { auto const mode = regionMode(); @@ -281,11 +311,15 @@ RegionDescPtr selectRegion(const RegionContext& context, auto region = [&]{ try { switch (mode) { - case RegionMode::None: return RegionDescPtr{nullptr}; - case RegionMode::OneBC: return regionOneBC(context); - case RegionMode::Method: return regionMethod(context); - case RegionMode::Tracelet: return regionTracelet(context); - case RegionMode::Legacy: always_assert(t); return createRegion(*t); + case RegionMode::None: return RegionDescPtr{nullptr}; + case RegionMode::OneBC: return selectOneBC(context); + case RegionMode::Method: return selectMethod(context); + case RegionMode::Tracelet: return selectTracelet(context); + case RegionMode::Legacy: + always_assert(t); return selectTraceletLegacy(*t); + case RegionMode::HotBlock: + case RegionMode::HotTrace: always_assert(0 && + "unsupported region mode"); } not_reached(); } catch (const std::exception& e) { @@ -303,6 +337,48 @@ RegionDescPtr selectRegion(const RegionContext& context, return region; } +RegionDescPtr selectHotRegion(TransID transId, + TranslatorX64* tx64) { + + assert(RuntimeOption::EvalJitPGO); + + const ProfData* profData = tx64->profData(); + FuncId funcId = profData->transFuncId(transId); + TransCFG cfg(funcId, profData, tx64->getSrcDB(), tx64->getJmpToTransIDMap()); + TransIDSet selectedTIDs; + RegionDescPtr region = nullptr; + RegionMode mode = regionMode(); + + switch (mode) { + case RegionMode::None: + region = RegionDescPtr{nullptr}; + break; + case RegionMode::HotBlock: + region = selectHotBlock(transId, profData, cfg); + break; + case RegionMode::HotTrace: + region = selectHotTrace(transId, profData, cfg, selectedTIDs); + break; + case RegionMode::OneBC: + case RegionMode::Method: + case RegionMode::Tracelet: + case RegionMode::Legacy: + always_assert(0 && "unsupported region mode"); + } + + if (Trace::moduleEnabled(HPHP::Trace::pgo, 5)) { + std::string dotFileName = string("/tmp/trans-cfg-") + + lexical_cast(transId) + ".dot"; + + cfg.print(dotFileName, profData, &selectedTIDs); + FTRACE(5, "selectHotRegion: New Translation {} (file: {}) {}\n", + tx64->profData()->curTransID(), dotFileName, + region ? show(*region) : std::string("empty region")); + } + + return region; +} + ////////////////////////////////////////////////////////////////////// std::string show(RegionDesc::Location l) { diff --git a/hphp/runtime/vm/jit/region-selection.h b/hphp/runtime/vm/jit/region-selection.h index 243037280..b004dd9cf 100644 --- a/hphp/runtime/vm/jit/region-selection.h +++ b/hphp/runtime/vm/jit/region-selection.h @@ -26,11 +26,15 @@ #include "hphp/runtime/base/smart_containers.h" #include "hphp/runtime/vm/srckey.h" #include "hphp/runtime/vm/jit/type.h" +#include "hphp/runtime/vm/jit/types.h" namespace HPHP { + namespace Transl { struct Tracelet; +struct TranslatorX64; } + namespace JIT { using boost::container::flat_map; @@ -52,7 +56,7 @@ struct RegionDesc { struct Location; struct TypePred; struct ReffinessPred; - typedef smart::unique_ptr::type BlockPtr; + typedef std::shared_ptr BlockPtr; enum class ParamByRef : uint8_t { Yes, No, @@ -158,7 +162,6 @@ public: if (debug) checkInvariants(); } - Block(const Block&) = delete; Block& operator=(const Block&) = delete; /* @@ -270,16 +273,34 @@ struct RegionContext::PreLiveAR { ////////////////////////////////////////////////////////////////////// /* - * Define a compilation region that starts with sk. + * Select a compilation region corresponding to the given context. + * The shape of the region selected is controlled by + * RuntimeOption::EvalJitRegionSelector. If the specified shape is + * 'tracelet', then the input argument t is used to build the region. * - * May return nullptr. + * This function may return nullptr. * * For now this is hooked up in TranslatorX64::translateWork, and * returning nullptr causes it to use the current level 0 tracelet * analyzer. Eventually we'd like analyze to occur underneath this as * well. */ -RegionDescPtr selectRegion(const RegionContext&, const Transl::Tracelet*); +RegionDescPtr selectRegion(const RegionContext& context, + const Transl::Tracelet* t); + +/* + * Select a compilation region based on profiling information. This + * is used in JitPGO mode. Argument transId specifies the profiling + * translation that triggered the profiling-based region selection. + */ +RegionDescPtr selectHotRegion(Transl::TransID transId, + Transl::TranslatorX64* tx64); + +/* + * Creates a Block corresponding to tracelet tlet. This function + * assumes that tlet contains a single block. + */ +RegionDesc::BlockPtr createBlock(const Transl::Tracelet& tlet); /* * Debug stringification for various things. diff --git a/hphp/runtime/vm/jit/region-tracelet.cpp b/hphp/runtime/vm/jit/region-tracelet.cpp index ce832adf9..8e93dfafb 100644 --- a/hphp/runtime/vm/jit/region-tracelet.cpp +++ b/hphp/runtime/vm/jit/region-tracelet.cpp @@ -195,7 +195,7 @@ RegionDescPtr regionTraceletImpl(const RegionContext& ctx, * attempts to consume an input with an insufficiently precise type. * */ -RegionDescPtr regionTracelet(const RegionContext& ctx) { +RegionDescPtr selectTracelet(const RegionContext& ctx) { InterpSet interp; RegionDescPtr region; uint32_t tries = 1; diff --git a/hphp/runtime/vm/jit/srcdb.cpp b/hphp/runtime/vm/jit/srcdb.cpp index 0f57974f4..a384f16a4 100644 --- a/hphp/runtime/vm/jit/srcdb.cpp +++ b/hphp/runtime/vm/jit/srcdb.cpp @@ -48,6 +48,11 @@ TCA SrcRec::getFallbackTranslation() const { } void SrcRec::chainFrom(IncomingBranch br) { + assert(br.type() == IncomingBranch::Tag::ADDR || + tx64->a. contains(br.toSmash()) || + tx64->ahot. contains(br.toSmash()) || + tx64->astubs. contains(br.toSmash()) || + tx64->atrampolines.contains(br.toSmash())); TCA destAddr = getTopTranslation(); m_incomingBranches.push_back(br); TRACE(1, "SrcRec(%p)::chainFrom %p -> %p (type %d); %zd incoming branches\n", @@ -175,14 +180,15 @@ void SrcRec::replaceOldTranslations() { * If we ever change that we'll have to change this to patch to * some sort of rebind requests. */ - assert(!RuntimeOption::RepoAuthoritative); + assert(!RuntimeOption::RepoAuthoritative || RuntimeOption::EvalJitPGO); patchIncomingBranches(m_anchorTranslation); } void SrcRec::patch(IncomingBranch branch, TCA dest) { switch (branch.type()) { case IncomingBranch::Tag::JMP: { - auto& a = tx64->getAsmFor(branch.toSmash()); + auto toSmash = branch.toSmash(); + auto& a = tx64->getAsmFor(toSmash); CodeCursor cg(a, branch.toSmash()); TranslatorX64::smashJmp(a, branch.toSmash(), dest); break; diff --git a/hphp/runtime/vm/jit/srcdb.h b/hphp/runtime/vm/jit/srcdb.h index ab8375243..774431ed8 100644 --- a/hphp/runtime/vm/jit/srcdb.h +++ b/hphp/runtime/vm/jit/srcdb.h @@ -125,6 +125,10 @@ struct SrcRec { return m_inProgressTailJumps; } + const vector& incomingBranches() const { + return m_incomingBranches; + } + void clearInProgressTailJumps() { m_inProgressTailJumps.clear(); } diff --git a/hphp/runtime/vm/jit/trace-builder.cpp b/hphp/runtime/vm/jit/trace-builder.cpp index 78db6a1e6..1560c9497 100644 --- a/hphp/runtime/vm/jit/trace-builder.cpp +++ b/hphp/runtime/vm/jit/trace-builder.cpp @@ -21,6 +21,7 @@ #include "hphp/util/trace.h" #include "hphp/runtime/vm/jit/target-cache.h" #include "hphp/runtime/vm/jit/ir-factory.h" +#include "hphp/util/assertions.h" namespace HPHP { namespace JIT { @@ -287,6 +288,7 @@ void TraceBuilder::updateTrackedState(IRInstruction* inst) { // fallthrough case AssertLoc: case GuardLoc: + case CheckLoc: setLocalType(inst->extra()->locId, inst->typeParam()); break; @@ -572,21 +574,39 @@ SSATmp* TraceBuilder::cseLookup(IRInstruction* inst, SSATmp* TraceBuilder::preOptimizeCheckLoc(IRInstruction* inst) { auto const locId = inst->extra()->locId; + Type typeParam = inst->typeParam(); if (auto const prevValue = getLocalValue(locId)) { - always_assert(false && "WTF"); - return gen( - CheckType, inst->typeParam(), inst->taken(), prevValue - ); + return gen(CheckType, typeParam, inst->taken(), prevValue); } auto const prevType = getLocalType(locId); - if (prevType != Type::None) { - always_assert(false && "WTF2"); - // It doesn't make sense to be checking something that's deemed to - // fail. - assert(prevType == inst->typeParam()); + + if (prevType == Type::None) { + return nullptr; + } + + if (prevType.subtypeOf(typeParam)) { inst->convertToNop(); + } else { + // + // Normally, it doesn't make sense to be checking something that's + // deemed to fail. Incompatible boxed types are ok though, since + // we don't track them precisely, but instead check them at every + // use. + // + // However, in JitPGO mode right now, this pathological case can + // happen, because profile counters are not accurate and we + // currently don't analyze Block post-conditions when picking its + // successors during region selection. This can lead to + // incompatible types in blocks selected for the same region. + // + if (!typeParam.isBoxed() || !prevType.isBoxed()) { + if ((typeParam & prevType) == Type::Bottom) { + assert(RuntimeOption::EvalJitPGO); + return gen(Jmp_, inst->taken()); + } + } } return nullptr; @@ -599,12 +619,22 @@ SSATmp* TraceBuilder::preOptimizeAssertLoc(IRInstruction* inst) { if (!prevType.equals(Type::None) && !typeParam.strictSubtypeOf(prevType)) { if (!prevType.subtypeOf(typeParam)) { + /* Task #2553746 + * This is triggering for a case where the tracked state says the local is + * InitNull but the AssertLoc says it's Str. */ static auto const error = StringData::GetStaticString("Internal error: static analysis was " "wrong about a local variable's type."); auto* errorInst = m_irFactory.gen(RaiseError, inst->marker(), cns(error)); inst->become(&m_irFactory, errorInst); - assert(false && "Incorrect local type from static analysis"); + assert_log(false, [&]{ + IRTrace& mainTrace = trace()->isMain() ? *trace() + : *(trace()->main()); + return folly::format("\npreOptimizeAssertLoc: prevType: {} " + "typeParam: {}\nin instr: {}\nin trace: {}\n", + prevType.toString(), typeParam.toString(), + inst->toString(), mainTrace.toString()).str(); + }); } else { inst->convertToNop(); } @@ -810,6 +840,7 @@ SSATmp* TraceBuilder::optimizeWork(IRInstruction* inst, // Found a dominating instruction that can be used instead of inst FTRACE(1, " {}cse found: {}\n", indent(), result->inst()->toString()); + assert(!inst->consumesReferences()); if (inst->producesReference()) { // Replace with an IncRef FTRACE(1, " {}cse of refcount-producing instruction\n", indent()); diff --git a/hphp/runtime/vm/jit/trans-cfg.cpp b/hphp/runtime/vm/jit/trans-cfg.cpp new file mode 100644 index 000000000..6ec0bd62a --- /dev/null +++ b/hphp/runtime/vm/jit/trans-cfg.cpp @@ -0,0 +1,215 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +#include "hphp/runtime/vm/jit/trans-cfg.h" + +namespace HPHP { +namespace JIT { + +static const Trace::Module TRACEMOD = Trace::pgo; + +static TransIDSet findPredTrans(const SrcRec* sr, + const TcaTransIDMap& jmpToTransID) { + assert(sr); + TransIDSet predSet; + + for (auto inBr : sr->incomingBranches()) { + TransID srcId = mapGet(jmpToTransID, inBr.toSmash(), InvalidID); + FTRACE(5, "findPredTrans: toSmash = {} srcId = {}\n", + inBr.toSmash(), srcId); + if (srcId != InvalidID) { + predSet.insert(srcId); + } + } + + return predSet; +} + +/** + * This function tries to infer the weight of any arc in the arcVec given the + * weights of other arcs in the list and totalWeight, which is the + * known sum of all their weights. + * Returns whether or not the weight of any arc was inferred and, in case of + * success, the weight of such arc is updated. + */ +static bool inferredArcWeight(const TransCFG::ArcPtrVec& arcVec, + int64_t totalWeight) { + int64_t arcWeight = totalWeight; + TransCFG::Arc* unknownArc = nullptr; + for (auto arc : arcVec) { + if (arc->weight() == TransCFG::Arc::kUnknownWeight) { + if (unknownArc != nullptr) { + // More than one arc with unknown weight, so can't infer + return false; + } + unknownArc = arc; + } else { + arcWeight -= arc->weight(); + } + } + if (unknownArc == nullptr) return false; + // Avoid creating negative-weight arcs. Node weights are not required to be + // accurate and, since arc weights are derived from nodes' weights, they + // aren't accurate either. This can result in arcWeight to be negative here. + if (arcWeight < 0) arcWeight = 0; + unknownArc->setWeight(arcWeight); + return true; +} + +TransCFG::TransCFG(FuncId funcId, + const ProfData* profData, + const SrcDB& srcDB, + const TcaTransIDMap& jmpToTransID) { + assert(profData); + + // add nodes + for (TransID tid = 0; tid < profData->numTrans(); tid++) { + if (profData->transKind(tid) == TransProfile && + profData->transBlock(tid) != nullptr && + profData->transFuncId(tid) == funcId) { + int64_t counter = profData->transCounter(tid); + int64_t weight = RuntimeOption::EvalJitPGOThreshold - counter; + addNode(tid, weight); + } + } + + // add arcs + for (TransID dstId : nodes()) { + SrcKey dstSK = profData->transSrcKey(dstId); + const SrcRec* dstSR = srcDB.find(dstSK); + FTRACE(5, "TransCFG: adding incoming arcs in dstId = {}\n", dstId); + TransIDSet predIDs = findPredTrans(dstSR, jmpToTransID); + for (auto predId : predIDs) { + if (hasNode(predId)) { + FTRACE(5, "TransCFG: adding arc {} -> {}\n", predId, dstId); + addArc(predId, dstId, TransCFG::Arc::kUnknownWeight); + } + } + } + + // infer arc weights + bool changed; + do { + changed = false; + for (TransID tid : nodes()) { + int64_t nodeWeight = weight(tid); + if (inferredArcWeight(inArcs(tid), nodeWeight)) changed = true; + if (inferredArcWeight(outArcs(tid), nodeWeight)) changed = true; + } + } while (changed); + + // guess weight or non-inferred arcs + for (TransID tid : nodes()) { + for (auto arc : outArcs(tid)) { + if (arc->weight() == Arc::kUnknownWeight) { + arc->setGuessed(); + int64_t arcWgt = std::min(weight(arc->src()), weight(arc->dst())) / 2; + arc->setWeight(arcWgt); + } + } + } +} + +int64_t TransCFG::weight(TransID id) const { + assert(hasNode(id)); + size_t idx = mapGet(m_idToIdx, id); + return m_nodeInfo[idx].weight(); +} + +const TransCFG::ArcPtrVec& TransCFG::inArcs(TransID id) const { + assert(hasNode(id)); + size_t idx = mapGet(m_idToIdx, id); + return m_nodeInfo[idx].inArcs(); +} + +const TransCFG::ArcPtrVec& TransCFG::outArcs(TransID id) const { + assert(hasNode(id)); + size_t idx = mapGet(m_idToIdx, id); + return m_nodeInfo[idx].outArcs(); +} + +TransCFG::Node::~Node() { + for (auto arc : m_outArcs) { + delete arc; + } +} + +void TransCFG::addNode(TransID id, int64_t weight) { + size_t idx = m_transIds.size(); + m_transIds.push_back(id); + m_idToIdx[id] = idx; + m_nodeInfo.push_back(Node(id, weight)); +} + +bool TransCFG::hasNode(TransID id) const { + return m_idToIdx.find(id) != m_idToIdx.end(); +} + +void TransCFG::addArc(TransID srcId, TransID dstId, int64_t weight) { + assert(hasNode(srcId)); + assert(hasNode(dstId)); + size_t srcIdx = m_idToIdx[srcId]; + size_t dstIdx = m_idToIdx[dstId]; + Arc* arc = new Arc(srcId, dstId, weight); + m_nodeInfo[srcIdx].addOutArc(arc); + m_nodeInfo[dstIdx].addInArc(arc); +} + +void TransCFG::print(std::string fileName, const ProfData* profData, + const TransIDSet* selected) const { + FILE* file = fopen(fileName.c_str(), "wt"); + if (!file) return; + + fprintf(file, "digraph CFG {\n"); + + // find max node weight + int64_t maxWeight = 1; // 1 to avoid div by 0 + for (auto tid : nodes()) { + auto w = weight(tid); + if (w > maxWeight) maxWeight = w; + } + + // print nodes + for (auto tid : nodes()) { + int64_t w = weight(tid); + uint32_t coldness = 255 - (255 * w / maxWeight); + Offset bcStart = profData->transStartBcOff(tid); + Offset bcStop = profData->transStopBcOff(tid); + const char* shape = selected && setContains(*selected, tid) ? "oval" + : "box"; + fprintf(file, + "t%u [shape=%s,label=\"T: %u\\np: %" PRIu64 "\\nbc: [0x%x-0x%x)\"," + "style=filled,fillcolor=\"#ff%02x%02x\"];\n", tid, shape, tid, w, + bcStart, bcStop, coldness, coldness); + } + + // print arcs + for (auto srcId : nodes()) { + for (auto arc : outArcs(srcId)) { + int64_t w = arc->weight(); + fprintf(file, "t%u -> t%u [color=\"%s\",label=\"%ld\"] ;\n", + srcId, + arc->dst(), + arc->guessed() ? "red" : "green4", + w); + } + } + + fprintf(file, "}\n"); + fclose(file); +} + +} } diff --git a/hphp/runtime/vm/jit/trans-cfg.h b/hphp/runtime/vm/jit/trans-cfg.h new file mode 100644 index 000000000..5587f2cad --- /dev/null +++ b/hphp/runtime/vm/jit/trans-cfg.h @@ -0,0 +1,109 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +#ifndef incl_HPHP_TRANS_CFG_H_ +#define incl_HPHP_TRANS_CFG_H_ + +#include + +#include "hphp/util/base.h" +#include "hphp/runtime/vm/jit/srcdb.h" +#include "hphp/runtime/vm/jit/translator.h" +#include "hphp/runtime/vm/jit/translator-x64.h" +#include "hphp/runtime/vm/jit/translator-inline.h" + +namespace HPHP { +namespace JIT { + +/** + * A dynamic control-flow graph of single-block translations. + */ +class TransCFG { + public: + class Arc { + public: + static const int64_t kUnknownWeight = -1; + + Arc(TransID src, TransID dst, int64_t w) + : m_src(src) + , m_dst(dst) + , m_weight(w) + , m_guessed(false) + {} + TransID src() const { return m_src; } + TransID dst() const { return m_dst; } + int64_t weight() const { return m_weight; } + bool guessed() const { return m_guessed; } + void setWeight(int64_t w) { m_weight = w; } + void setGuessed() { m_guessed = true; } + private: + TransID m_src; + TransID m_dst; + int64_t m_weight; + bool m_guessed; // whether or not m_weight was guessed + }; + + typedef std::vector ArcPtrVec; + + class Node { + public: + Node(TransID id, int64_t w) + : m_id(id) + , m_weight(w) + {} + ~Node(); + + TransID transId() const { return m_id; } + int64_t weight() const { return m_weight; } + const ArcPtrVec& inArcs() const { return m_inArcs; } + const ArcPtrVec& outArcs() const { return m_outArcs; } + void addInArc (Arc* arc) { m_inArcs.push_back(arc); } + void addOutArc(Arc* arc) { m_outArcs.push_back(arc); } + private: + TransID m_id; + int64_t m_weight; + ArcPtrVec m_inArcs; + ArcPtrVec m_outArcs; + }; + + TransCFG() {} + TransCFG(FuncId funcId, + const ProfData* profData, + const SrcDB& srcDB, + const TcaTransIDMap& jmpToTransID); + + const vector& nodes() const { return m_transIds; } + int64_t weight(TransID id) const; + void setNodeWeight(TransID id, int64_t weight); + const ArcPtrVec& inArcs(TransID id) const; + const ArcPtrVec& outArcs(TransID id) const; + void addNode(TransID id, int64_t weight); + bool hasNode(TransID id) const; + void addArc(TransID srcId, TransID dstId, int64_t weight=0); + void print(std::string fileName, + const ProfData* profData, + const TransIDSet* selected = nullptr) const; + + private: + vector m_transIds; // vector of TransIDs in the graph + vector m_nodeInfo; // info about each node + hphp_hash_map m_idToIdx; // map from TransIDs to indices + // in m_nodeInfo +}; + +} } + +#endif diff --git a/hphp/runtime/vm/jit/translator-x64-helpers.cpp b/hphp/runtime/vm/jit/translator-x64-helpers.cpp index f907c3ec3..132957b51 100644 --- a/hphp/runtime/vm/jit/translator-x64-helpers.cpp +++ b/hphp/runtime/vm/jit/translator-x64-helpers.cpp @@ -211,9 +211,7 @@ TCA funcBodyHelper(ActRec* fp) { TCA tca = tx64->getCallArrayProlog(func); - if (tca) { - func->setFuncBody(tca); - } else { + if (!tca) { tca = Translator::Get()->getResumeHelper(); } tl_regState = VMRegState::DIRTY; diff --git a/hphp/runtime/vm/jit/translator-x64.cpp b/hphp/runtime/vm/jit/translator-x64.cpp index e394f152d..08cae47d7 100644 --- a/hphp/runtime/vm/jit/translator-x64.cpp +++ b/hphp/runtime/vm/jit/translator-x64.cpp @@ -582,6 +582,19 @@ asm_label(a, release); size_t(a.frontier() - m_dtorGenericStub)); } +bool TranslatorX64::profileSrcKey(const SrcKey& sk) const { + if (!RuntimeOption::EvalJitPGO) return false; + + if (profData()->optimized(sk)) return false; + + // The TCA of closure bodies is stored in the func's prologue + // tables. So, to support retranslating them, we need to reset the + // prologue tables and the prologue cache appropriately. + // (test/quick/floatcmp.php exposes this problem) + if (curFunc()->isClosureBody()) return false; + + return true; +} TCA TranslatorX64::retranslate(const TranslArgs& args) { if (isDebuggerAttachedProcess() && isSrcKeyInBL(curUnit(), args.m_sk)) { @@ -593,6 +606,9 @@ TCA TranslatorX64::retranslate(const TranslArgs& args) { LeaseHolder writer(s_writeLease); if (!writer) return nullptr; SKTRACE(1, args.m_sk, "retranslate\n"); + if (m_mode == TransInvalid) { + m_mode = profileSrcKey(args.m_sk) ? TransProfile : TransLive; + } return translate(args); } @@ -617,6 +633,7 @@ TCA TranslatorX64::retranslateAndPatchNoIR(SrcKey sk, // interpretation of this BB. return nullptr; } + m_mode = TransLive; TCA start = translate(TranslArgs(sk, align).interp(true)); if (start != nullptr) { smashJmp(getAsmFor(toSmash), toSmash, start); @@ -624,6 +641,54 @@ TCA TranslatorX64::retranslateAndPatchNoIR(SrcKey sk, return start; } +TCA TranslatorX64::retranslateOpt(TransID transId, bool align) { + LeaseHolder writer(s_writeLease); + if (!writer) return nullptr; + + TRACE(1, "retranslateOpt: transId = %u\n", transId); + + Func* func = nullptr; + if (m_profData->transBlock(transId) == nullptr) { + // This can happen for profiling translations that have some + // feature not supported by translateRegion yet. For such translations, + // we don't have a Func* (since it's grabbed from the Block). + // Anyway, in this case, the region translator resorts generates a + // TransLive translation, corresponding to the current live VM context. + func = const_cast(curFunc()); + } else { + func = m_profData->transFunc(transId); + } + + // We may get here multiple times because different translations of + // the same SrcKey hit the optimization threshold. Only the first + // time around we want to invalidate the existing translations. + const SrcKey& sk = m_profData->transSrcKey(transId); + bool alreadyOptimized = m_profData->optimized(sk); + m_profData->setOptimized(sk); + + bool setFuncBody = (!alreadyOptimized && + func->base() == sk.offset() && + func->getDVFunclets().size() == 0); + + if (!alreadyOptimized) { + if (setFuncBody) func->setFuncBody((TCA)funcBodyHelperThunk); + invalidateSrcKey(sk); + } else { + // Bail if we already reached the maximum number of translations per SrcKey. + // Note that this can only happen with multi-threading. + SrcRec* srcRec = getSrcRec(sk); + assert(srcRec); + size_t nTrans = srcRec->translations().size(); + if (nTrans >= RuntimeOption::EvalJitMaxTranslations + 1) return nullptr; + } + + m_mode = TransOptimize; + auto translArgs = TranslArgs(sk, align).transId(transId); + if (setFuncBody) translArgs.setFuncBody(); + + return retranslate(translArgs); +} + /* * Satisfy an alignment constraint. If we're in a reachable section * of code, bridge the gap with nops. Otherwise, int3's. @@ -772,6 +837,7 @@ TranslatorX64::createTranslation(const TranslArgs& args) { auto sk = args.m_sk; LeaseHolder writer(s_writeLease); if (!writer) return nullptr; + if (SrcRec* sr = m_srcDB.find(sk)) { TCA tca = sr->getTopTranslation(); if (tca) { @@ -803,9 +869,12 @@ TranslatorX64::createTranslation(const TranslArgs& args) { size_t asize = a.frontier() - astart; size_t stubsize = astubs.frontier() - stubstart; assert(asize == 0); - if (stubsize) { + if (stubsize && RuntimeOption::EvalDumpTCAnchors) { addTranslation(TransRec(sk, curUnit()->md5(), TransAnchor, astart, asize, stubstart, stubsize)); + if (m_profData) { + m_profData->addTransAnchor(sk); + } assert(!isTransDBEnabled() || getTransRec(stubstart)->kind == TransAnchor); } @@ -825,6 +894,8 @@ TranslatorX64::translate(const TranslArgs& args) { INC_TPC(translate); assert(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0); assert(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0); + assert(m_mode != TransInvalid); + SCOPE_EXIT{ m_mode = TransInvalid; }; if (!args.m_interp) { if (m_numHHIRTrans == RuntimeOption::EvalJitGlobalTranslationLimit) { @@ -834,7 +905,8 @@ TranslatorX64::translate(const TranslArgs& args) { } } - AHotSelector ahs(this, curFunc()->attrs() & AttrHot); + Func* func = const_cast(curFunc()); + AHotSelector ahs(this, func->attrs() & AttrHot); if (args.m_align) { moveToAlign(a, kNonFallthroughAlign); @@ -844,6 +916,9 @@ TranslatorX64::translate(const TranslArgs& args) { translateWork(args); + if (args.m_setFuncBody) { + func->setFuncBody(start); + } SKTRACE(1, args.m_sk, "translate moved head from %p to %p\n", getTopTranslation(args.m_sk), start); return start; @@ -1095,41 +1170,43 @@ TranslatorX64::trimExtraArgs(ActRec* ar) { tl_regState = VMRegState::DIRTY; } +TCA +TranslatorX64::emitCallArrayProlog(const Func* func, + const DVFuncletsVec& dvs) { + TCA start = a.frontier(); + if (dvs.size() == 1) { + a. cmp_imm32_disp_reg32(dvs[0].first, + AROFF(m_numArgsAndCtorFlag), rVmFp); + emitBindJcc(a, CC_LE, SrcKey(func, dvs[0].second)); + emitBindJmp(a, SrcKey(func, func->base())); + } else { + a. load_reg64_disp_reg32(rVmFp, AROFF(m_numArgsAndCtorFlag), rax); + for (unsigned i = 0; i < dvs.size(); i++) { + a. cmp_imm32_reg32(dvs[i].first, rax); + emitBindJcc(a, CC_LE, SrcKey(func, dvs[i].second)); + } + emitBindJmp(a, SrcKey(func, func->base())); + } + return start; +} + TCA TranslatorX64::getCallArrayProlog(Func* func) { TCA tca = func->getFuncBody(); if (tca != (TCA)funcBodyHelperThunk) return tca; - int numParams = func->numParams(); - std::vector > dvs; - for (int i = 0; i < numParams; ++i) { - const Func::ParamInfo& pi = func->params()[i]; - if (pi.hasDefaultValue()) { - dvs.push_back(std::make_pair(i, pi.funcletOff())); - } - } + DVFuncletsVec dvs = func->getDVFunclets(); + if (dvs.size()) { LeaseHolder writer(s_writeLease); if (!writer) return nullptr; tca = func->getFuncBody(); if (tca != (TCA)funcBodyHelperThunk) return tca; - tca = a.frontier(); - if (dvs.size() == 1) { - a. cmp_imm32_disp_reg32(dvs[0].first, - AROFF(m_numArgsAndCtorFlag), rVmFp); - emitBindJcc(a, CC_LE, SrcKey(func, dvs[0].second)); - emitBindJmp(a, SrcKey(func, func->base())); - } else { - a. load_reg64_disp_reg32(rVmFp, AROFF(m_numArgsAndCtorFlag), rax); - for (unsigned i = 0; i < dvs.size(); i++) { - a. cmp_imm32_reg32(dvs[i].first, rax); - emitBindJcc(a, CC_LE, SrcKey(func, dvs[i].second)); - } - emitBindJmp(a, SrcKey(func, func->base())); - } + tca = emitCallArrayProlog(func, dvs); + func->setFuncBody(tca); } else { SrcKey sk(func, func->base()); - tca = tx64->getTranslation(TranslArgs(sk, false)); + tca = tx64->getTranslation(TranslArgs(sk, false).setFuncBody()); } return tca; @@ -1511,6 +1588,10 @@ TranslatorX64::funcPrologue(Func* func, int nPassed, ActRec* ar) { TransProlog, aStart, a.frontier() - aStart, stubStart, astubs.frontier() - stubStart)); + if (m_profData) { + m_profData->addTransProlog(skFuncBody); + } + recordGdbTranslation(skFuncBody, func, a, aStart, false, true); @@ -1852,27 +1933,6 @@ int32_t TranslatorX64::emitNativeImpl(const Func* func, return sizeof(ActRec) + cellsToBytes(nLocalCells-1); } -// for documentation see bindJmpccFirst below -void -TranslatorX64::emitCondJmp(SrcKey skTaken, SrcKey skNotTaken, - ConditionCode cc) { - // should be true for SrcKeys generated via OpJmpZ/OpJmpNZ - assert(skTaken.getFuncId() == skNotTaken.getFuncId()); - - // reserve space for a smashable jnz/jmp pair; both initially point - // to our stub. - prepareForTestAndSmash(a, 0, TestAndSmashFlags::kAlignJccAndJmp); - TCA old = a.frontier(); - TCA stub = emitServiceReq(REQ_BIND_JMPCC_FIRST, - old, - skTaken.offset(), - skNotTaken.offset(), - cc, - ccArgInfo(cc)); - a.jcc(cc, stub); - a.jmp(stub); -} - /* * bindJmp -- * @@ -2020,6 +2080,8 @@ TranslatorX64::emitBindJ(X64Assembler& _a, ConditionCode cc, emitJmpOrJcc(_a, cc, toSmash); } + setJmpTransID(toSmash); + TCA sr = emitServiceReq(SRFlags::None, req, toSmash, dest.offset()); @@ -2098,6 +2160,12 @@ void TranslatorX64::emitReqRetransNoIR(Asm& as, const SrcKey& sk) { } } +void TranslatorX64::emitReqRetransOpt(Asm& as, const SrcKey& sk, + TransID transId) { + emitServiceReq(REQ_RETRANSLATE_OPT, + sk.getFuncId(), sk.offset(), transId); +} + void TranslatorX64::checkRefs(X64Assembler& a, SrcKey sk, @@ -2443,6 +2511,17 @@ bool TranslatorX64::handleServiceRequest(TReqInfo& info, SKTRACE(1, sk, "retranslated (without IR) @%p\n", start); } break; + case REQ_RETRANSLATE_OPT: { + FuncId funcId = (FuncId) args[0]; + Offset offset = (Offset) args[1]; + TransID transId = (TransID)args[2]; + sk = SrcKey(funcId, offset); + start = retranslateOpt(transId, false); + SKTRACE(2, sk, "retranslated-OPT: transId = %d start: @%p\n", transId, + start); + break; + } + case REQ_RETRANSLATE: { INC_TPC(retranslate); sk = SrcKey(curFunc(), (Offset)args[0]); @@ -3059,8 +3138,8 @@ int64_t switchObjHelper(ObjectData* o, int64_t base, int64_t nTargets) { } bool -TranslatorX64::checkTranslationLimit(SrcKey sk, - const SrcRec& srcRec) const { +TranslatorX64::reachedTranslationLimit(SrcKey sk, + const SrcRec& srcRec) const { if (srcRec.translations().size() == RuntimeOption::EvalJitMaxTranslations) { INC_TPC(max_trans); if (debug && Trace::moduleEnabled(Trace::tx64, 2)) { @@ -3211,12 +3290,24 @@ TranslatorX64::translateWork(const TranslArgs& args) { assert(srcRec.inProgressTailJumps().empty()); }; - if (!args.m_interp && !checkTranslationLimit(sk, srcRec)) { + if (!args.m_interp && !reachedTranslationLimit(sk, srcRec)) { // Attempt to create a region at this SrcKey - JIT::RegionContext rContext { curFunc(), args.m_sk.offset(), curSpOff() }; - FTRACE(2, "populating live context for region\n"); - populateLiveContext(rContext); - auto region = JIT::selectRegion(rContext, &t); + JIT::RegionDescPtr region; + if (RuntimeOption::EvalJitPGO) { + if (m_mode == TransOptimize) { + TransID transId = args.m_transId; + assert(transId != InvalidID); + region = JIT::selectHotRegion(transId, this); + if (region && region->blocks.size() == 0) region = nullptr; + } else { + // We always go through the tracelet translator in this case + } + } else { + JIT::RegionContext rContext { curFunc(), sk.offset(), curSpOff() }; + FTRACE(2, "populating live context for region\n"); + populateLiveContext(rContext); + region = JIT::selectRegion(rContext, &t); + } TranslateResult result = Retry; RegionBlacklist regionInterps; @@ -3244,6 +3335,9 @@ TranslatorX64::translateWork(const TranslArgs& args) { if (!region || result == Failure) { FTRACE(1, "trying irTranslateTracelet\n"); assertCleanState(); + if (m_mode == TransOptimize) { + m_mode = TransLive; + } result = translateTracelet(t); DEBUG_ONLY static const bool reqRegion = getenv("HHVM_REQUIRE_REGION"); assert(IMPLIES(region && reqRegion, result != Success)); @@ -3258,8 +3352,10 @@ TranslatorX64::translateWork(const TranslArgs& args) { } if (result == Success) { - // Translation succeeded. Mark it as such. - transKind = TransNormalIR; + assert(m_mode == TransLive || + m_mode == TransProfile || + m_mode == TransOptimize); + transKind = m_mode; } } @@ -3295,11 +3391,14 @@ TranslatorX64::translateWork(const TranslArgs& args) { false, false); recordGdbTranslation(sk, curFunc(), astubs, stubStart, false, false); + if (RuntimeOption::EvalJitPGO) { + m_profData->addTrans(t, transKind); + } // SrcRec::newTranslation() makes this code reachable. Do this last; // otherwise there's some chance of hitting in the reader threads whose // metadata is not yet visible. TRACE(1, "newTranslation: %p sk: (func %d, bcOff %d)\n", - start, sk.getFuncId(), sk.offset()); + start, sk.getFuncId(), sk.offset()); srcRec.newTranslation(start); TRACE(1, "tx64: %zd-byte tracelet\n", a.frontier() - start); if (Trace::moduleEnabledRelease(Trace::tcspace, 1)) { @@ -3326,6 +3425,10 @@ TranslatorX64::translateTracelet(Tracelet& t) { ht.emitIncTransCounter(); } + if (m_mode == TransProfile) { + ht.emitCheckCold(m_profData->curTransID()); + } + emitRB(a, RBTypeTraceletBody, t.m_sk); Stats::emitInc(a, Stats::Instr_TC, t.m_numOpcodes); @@ -3357,6 +3460,7 @@ TranslatorX64::translateTracelet(Tracelet& t) { ni = ni->next) { try { SKTRACE(1, ni->source, "HHIR: translateInstr\n"); + assert(!(m_mode == TransProfile && ni->outputPredicted && ni->next)); m_irTrans->translateInstr(*ni); } catch (JIT::FailedIRGen& fcg) { always_assert(!ni->interp); @@ -3600,6 +3704,7 @@ TranslatorX64::TranslatorX64() } } assert(base); + tcStart = base; base += -(uint64_t)base & (kRoundUp - 1); enhugen(base, RuntimeOption::EvalTCNumHugeHotMB); TRACE(1, "init atrampolines @%p\n", base); @@ -4139,7 +4244,7 @@ bool TranslatorX64::dumpTC(bool ignoreLease) { // Returns true on success bool tc_dump(void) { - return TranslatorX64::Get()->dumpTC(); + return TranslatorX64::Get() && TranslatorX64::Get()->dumpTC(); } // Returns true on success @@ -4176,7 +4281,7 @@ bool TranslatorX64::dumpTCData() { } void TranslatorX64::invalidateSrcKey(SrcKey sk) { - assert(!RuntimeOption::RepoAuthoritative); + assert(!RuntimeOption::RepoAuthoritative || RuntimeOption::EvalJitPGO); assert(s_writeLease.amOwner()); /* * Reroute existing translations for SrcKey to an as-yet indeterminate @@ -4192,6 +4297,14 @@ void TranslatorX64::invalidateSrcKey(SrcKey sk) { sr->replaceOldTranslations(); } +void TranslatorX64::setJmpTransID(TCA jmp) { + if (m_mode != TransProfile) return; + + TransID transId = m_profData->curTransID(); + FTRACE(5, "setJmpTransID: adding {} => {}\n", jmp, transId); + m_jmpToTransID[jmp] = transId; +} + } // HPHP::Transl } // HPHP diff --git a/hphp/runtime/vm/jit/translator-x64.h b/hphp/runtime/vm/jit/translator-x64.h index ee1ebc8f8..07cdb0b44 100644 --- a/hphp/runtime/vm/jit/translator-x64.h +++ b/hphp/runtime/vm/jit/translator-x64.h @@ -99,6 +99,8 @@ static const int kNumFreeLocalsHelpers = 9; typedef X64Assembler Asm; +typedef hphp_hash_map TcaTransIDMap; + constexpr size_t kJmpTargetAlign = 16; constexpr size_t kNonFallthroughAlign = 64; constexpr int kJmpLen = 5; @@ -150,26 +152,32 @@ class TranslatorX64 : public Translator class AHotSelector { public: AHotSelector(TranslatorX64* tx, bool hot) : - m_tx(tx), m_hot(hot && - tx->ahot.available() > 8192 && - tx->a.base() != tx->ahot.base()) { - if (m_hot) { - m_save = tx->a; - tx->a = tx->ahot; + m_tx(tx), m_swap(hot && + tx->ahot.available() > 8192 && + // Only swap if a and ahot aren't swapped yet. + // This assumes ahot area is in lower address. + tx->a.base() > tx->ahot.base()) { + if (m_swap) { + // Swap a and ahot, so that 'a' contains the hot code region. + // Note that, although we don't write to tx->ahot directly, we + // still need to make sure that all assembler code areas are + // available in a, astubs, and ahot, for example when we call + // asmChoose(addr, a, ahot, astubs). + std::swap(m_tx->a, m_tx->ahot); } } ~AHotSelector() { - if (m_hot) { - m_tx->ahot = m_tx->a; - m_tx->a = m_save; + if (m_swap) { + // Swap a and ahot back. + std::swap(m_tx->a, m_tx->ahot); } } private: TranslatorX64* m_tx; - Asm m_save; - bool m_hot; + bool m_swap; }; + TCA tcStart; Asm ahot; Asm a; Asm astubs; @@ -197,6 +205,9 @@ class TranslatorX64 : public Translator DataBlock m_globalData; + TcaTransIDMap m_jmpToTransID; // maps jump addresses to the ID + // of translation containing them + // Data structures for HHIR-based translation uint64_t m_numHHIRTrans; @@ -224,7 +235,12 @@ private: void drawCFG(std::ofstream& out) const; static vector x64TranslRegs(); - Asm& getAsmFor(TCA addr) { return asmChoose(addr, a, ahot, astubs); } + Asm& getAsmFor(TCA addr) { + assert(a.base() != ahot.base() && + a.base() != astubs.base() && + ahot.base() != astubs.base()); + return asmChoose(addr, a, ahot, astubs, atrampolines); + } void emitIncRef(X64Assembler &a, PhysReg base, DataType dtype); void emitIncRef(PhysReg base, DataType); void emitIncRefGenericRegSafe(PhysReg base, int disp, PhysReg tmp); @@ -239,7 +255,8 @@ public: TCA getCallArrayProlog(Func* func); void smashPrologueGuards(TCA* prologues, int numPrologues, const Func* func); private: - + TCA emitCallArrayProlog(const Func* func, + const DVFuncletsVec& dvs); void translateClassExistsImpl(const Tracelet& t, const NormalizedInstruction& i, Attr typeAttr); @@ -304,6 +321,14 @@ private: void fixup(VMExecutionContext* ec) const; TCA getTranslatedCaller() const; + const TcaTransIDMap& getJmpToTransIDMap() const { + return m_jmpToTransID; + } + + void setJmpTransID(TCA jmp); + + bool profileSrcKey(const SrcKey& sk) const; + TCA getTopTranslation(SrcKey sk) { return getSrcRec(sk)->getTopTranslation(); } @@ -325,7 +350,7 @@ private: } inline bool isValidCodeAddress(TCA tca) const { - return tca >= ahot.base() && tca < astubs.base() + astubs.capacity(); + return tca >= tcStart && tca < astubs.base() + astubs.capacity(); } // If we were to shove every little helper function into this class @@ -364,7 +389,7 @@ public: FreeStubList m_freeStubs; bool freeRequestStub(TCA stub); TCA getFreeStub(); - bool checkTranslationLimit(SrcKey, const SrcRec&) const; + bool reachedTranslationLimit(SrcKey, const SrcRec&) const; TranslateResult translateTracelet(Tracelet& t); void checkRefs(Asm&, SrcKey, const RefDeps&, SrcRec&); @@ -436,7 +461,6 @@ public: TCA emitRetFromInterpretedGeneratorFrame(); void emitPopRetIntoActRec(Asm& a); int32_t emitBindCall(SrcKey srcKey, const Func* funcd, int numArgs); - void emitCondJmp(SrcKey skTrue, SrcKey skFalse, ConditionCode cc); TCA funcPrologue(Func* func, int nArgs, ActRec* ar = nullptr); bool checkCachedPrologue(const Func* func, int param, TCA& plgOut) const; @@ -581,6 +605,7 @@ private: public: // Only for HackIR void emitReqRetransNoIR(Asm& as, const SrcKey& sk); + void emitReqRetransOpt(Asm& as, const SrcKey& sk, TransID transId); private: // asize + astubssize + gdatasize + trampolinesblocksize diff --git a/hphp/runtime/vm/jit/translator.cpp b/hphp/runtime/vm/jit/translator.cpp index cc94a7b41..20546dfca 100755 --- a/hphp/runtime/vm/jit/translator.cpp +++ b/hphp/runtime/vm/jit/translator.cpp @@ -554,6 +554,9 @@ predictOutputs(SrcKey startSk, const NormalizedInstruction* ni) { if (!RuntimeOption::EvalJitTypePrediction) return KindOfInvalid; + // In JitPGO mode, disable type prediction to avoid side exits + if (RuntimeOption::EvalJitPGO) return KindOfInvalid; + if (RuntimeOption::EvalJitStressTypePredPercent && RuntimeOption::EvalJitStressTypePredPercent > int(get_random() % 100)) { int dt; @@ -756,7 +759,8 @@ getDynLocType(const SrcKey startSk, return RuntimeType(tv->m_type); } tv = Unit::lookupCns(sd); - if (tv) { + // In JitPGO mode, we disable type predictions to avoid side exits + if (tv && !RuntimeOption::EvalJitPGO) { ni->outputPredicted = true; TRACE(1, "CNS %s: guessing runtime type %d\n", sd->data(), tv->m_type); return RuntimeType(tv->m_type); @@ -1508,6 +1512,9 @@ bool Translator::applyInputMetaData(Unit::MetaHandle& metaHand, ni->imm[0].u_IVA = info.m_data; break; case Unit::MetaInfo::Kind::DataTypePredicted: { + // In JitPGO, disable type predictions to avoid side exits + if (RuntimeOption::EvalJitPGO) break; + // If the original type was invalid or predicted, then use the // prediction in the meta-data. assert((unsigned) arg < inputInfos.size()); @@ -2363,7 +2370,10 @@ DynLocation* TraceletContext::recordRead(const InputInfo& ii, m_resolvedDeps[l] = dl; } } else { - RuntimeType rtt = tx64->liveType(l, *curUnit(), true); + // TODO: Once the region translator supports guard relaxation + // (task #2598894), we can enable specialization for all modes. + const bool specialize = tx64->mode() == TransLive; + RuntimeType rtt = tx64->liveType(l, *curUnit(), specialize); assert(rtt.isIter() || !rtt.isVagueValue()); // Allocate a new DynLocation to represent this and store it in the // current map. @@ -3183,6 +3193,12 @@ void Translator::analyzeCallee(TraceletContext& tas, fcall->calleeTrace = std::move(subTrace); } +static bool instrBreaksProfileBB(const NormalizedInstruction* instr) { + return (instrIsNonCallControlFlow(instr->op()) || + instr->outputPredicted || + instr->op() == OpClsCnsD); // side exits if misses in the target cache +} + /* * analyze -- * @@ -3311,6 +3327,15 @@ std::unique_ptr Translator::analyze(SrcKey sk, throwUnknownInput(); } } + if ((m_mode == TransProfile || m_mode == TransOptimize) && + t.m_numOpcodes > 0) { + // We want to break blocks at every instrution that consumes a ref, + // so that we avoid side exits. Therefore, instructions consume ref + // can only be the first in the tracelet/block. + if (rtt.isValue() && rtt.isRef()) { + throwUnknownInput(); + } + } } ni->inputs.push_back(dl); } @@ -3439,6 +3464,12 @@ std::unique_ptr Translator::analyze(SrcKey sk, tas.recordDelete(l); } + if (m_mode == TransProfile && instrBreaksProfileBB(ni)) { + SKTRACE(1, sk, "BB broken\n"); + sk.advance(unit); + goto breakBB; + } + // Check if we need to break the tracelet. // // If we've gotten this far, it mostly boils down to control-flow @@ -3483,7 +3514,10 @@ breakBB: } } - relaxDeps(t, tas); + // translateRegion doesn't support guard relaxation/specialization yet + if (m_mode != TransProfile && m_mode != TransOptimize) { + relaxDeps(t, tas); + } // Mark the last instruction appropriately assert(t.m_instrStream.last); @@ -3504,12 +3538,19 @@ breakBB: Translator::Translator() : m_resumeHelper(nullptr) , m_createdTime(Timer::GetCurrentTimeMicros()) + , m_mode(TransInvalid) + , m_profData(nullptr) , m_analysisDepth(0) { initInstrInfo(); + if (RuntimeOption::EvalJitPGO) { + m_profData = new ProfData(); + } } Translator::~Translator() { + delete m_profData; + m_profData = nullptr; } Translator* @@ -3771,7 +3812,8 @@ Translator::translateRegion(const RegionDesc& region, const SrcKey startSk = region.blocks.front()->start(); Unit::MetaHandle metaHand; - for (auto const& block : region.blocks) { + for (auto b = 0; b < region.blocks.size(); b++) { + auto const& block = region.blocks[b]; SrcKey sk = block->start(); const Func* topFunc = nullptr; auto typePreds = makeMapWalker(block->typePreds()); @@ -3783,12 +3825,19 @@ Translator::translateRegion(const RegionDesc& region, // Emit prediction guards. If this is the first instruction in the // region the guards will go to a retranslate request. Otherwise, they'll // go to a side exit. + bool isFirstRegionInstr = block == region.blocks.front() && i == 0; while (typePreds.hasNext(sk)) { auto const& pred = typePreds.next(); - if (block == region.blocks.front() && i == 0) { - ht.guardTypeLocation(pred.location, pred.type); + auto type = pred.type; + auto loc = pred.location; + if (type.subtypeOf(Type::Cls)) { + // Do not generate guards for class; instead assert the type + assert(loc.tag() == JIT::RegionDesc::Location::Tag::Stack); + ht.assertTypeLocation(loc, type); + } else if (isFirstRegionInstr) { + ht.guardTypeLocation(loc, type); } else { - ht.checkTypeLocation(pred.location, pred.type, sk.offset()); + ht.checkTypeLocation(loc, type, sk.offset()); } } @@ -3800,6 +3849,10 @@ Translator::translateRegion(const RegionDesc& region, ht.guardRefs(pred.arSpOffset, pred.mask, pred.vals); } + if (RuntimeOption::EvalJitTransCounters && isFirstRegionInstr) { + ht.emitIncTransCounter(); + } + // Update the current funcd, if we have a new one. if (knownFuncs.hasNext(sk)) { topFunc = knownFuncs.next(); @@ -3813,6 +3866,12 @@ Translator::translateRegion(const RegionDesc& region, i == block->length() - 1 && block == region.blocks.back(); inst.changesPC = opcodeChangesPC(inst.op()); inst.funcd = topFunc; + inst.nextOffset = kInvalidOffset; + if (instrIsNonCallControlFlow(inst.op()) && !inst.breaksTracelet) { + assert(b < region.blocks.size()); + inst.nextOffset = region.blocks[b+1]->start().offset(); + } + inst.outputPredicted = false; populateImmediates(inst); // We can get a more precise output type for interpOne if we know all of @@ -3868,11 +3927,6 @@ Translator::translateRegion(const RegionDesc& region, return Retry; } - if (isFCallStar(inst.op()) || inst.op() == OpFCallBuiltin) { - // This is much more conservative than it needs to be. - ht.emitSmashLocals(); - } - // Check the prediction. If the predicted type is less specific than what // is currently on the eval stack, checkTypeLocation won't emit any code. if (doPrediction) { @@ -3918,7 +3972,7 @@ uint64_t* Translator::getTransCounterAddr() { [id % transCountersPerChunk]); } -uint32_t Translator::addTranslation(const TransRec& transRec) { +void Translator::addTranslation(const TransRec& transRec) { if (Trace::moduleEnabledRelease(Trace::trans, 1)) { // Log the translation's size, creation time, SrcKey, and size Trace::traceRelease("New translation: %" PRId64 " %s %u %u %d\n", @@ -3932,7 +3986,7 @@ uint32_t Translator::addTranslation(const TransRec& transRec) { transRec.kind); } - if (!isTransDBEnabled()) return -1u; + if (!isTransDBEnabled()) return; uint32_t id = getCurrentTransID(); m_translations.push_back(transRec); m_translations[id].setID(id); @@ -3943,8 +3997,6 @@ uint32_t Translator::addTranslation(const TransRec& transRec) { if (transRec.astubsLen > 0) { m_transDB[transRec.astubsStart] = id; } - - return id; } uint64_t Translator::getTransCounter(TransID transId) const { @@ -3999,14 +4051,13 @@ void Translator::invalidateFile(Eval::PhpFile* f) { } static const char *transKindStr[] = { - "Normal_Tx64", - "Normal_HHIR", - "Anchor", - "Prologue", +#define DO(KIND) #KIND, + TRANS_KINDS +#undef DO }; const char *getTransKindName(TransKind kind) { - assert(kind >= 0 && kind <= TransProlog); + assert(kind >= 0 && kind < TransInvalid); return transKindStr[kind]; } diff --git a/hphp/runtime/vm/jit/translator.h b/hphp/runtime/vm/jit/translator.h index 301b08609..ff95296c5 100644 --- a/hphp/runtime/vm/jit/translator.h +++ b/hphp/runtime/vm/jit/translator.h @@ -41,6 +41,7 @@ #include "hphp/runtime/vm/jit/translator-instrs.h" #include "hphp/runtime/vm/jit/type.h" #include "hphp/runtime/vm/jit/write-lease.h" +#include "hphp/runtime/vm/jit/prof-data.h" #include "hphp/runtime/vm/debugger_hook.h" #include "hphp/runtime/vm/srckey.h" #include "hphp/runtime/base/md5.h" @@ -59,6 +60,7 @@ namespace Transl { using JIT::Type; using JIT::RegionDesc; using JIT::HhbcTranslator; +using JIT::ProfData; static const bool trustSigSegv = false; static const uint32_t transCountersPerChunk = 1024 * 1024 / 8; @@ -235,6 +237,8 @@ class NormalizedInstruction { // stack at tracelet entry. int stackOffset; int sequenceNum; + Offset nextOffset; // for intra-trace* non-call control-flow instructions, + // this is the offset of the next instruction in the trace* bool breaksTracelet:1; bool changesPC:1; bool fuseBranch:1; @@ -578,13 +582,6 @@ struct Tracelet : private boost::noncopyable { SrcKey nextSk() const; }; -enum TransKind { - TransInterp = 0, - TransNormalIR = 1, - TransAnchor = 2, - TransProlog = 3, -}; - const char* getTransKindName(TransKind kind); /* @@ -614,8 +611,6 @@ struct TransRec { uint8_t counterLen; vector bcMapping; - static const TransID InvalidID = -1LL; - TransRec() {} TransRec(SrcKey s, @@ -663,6 +658,8 @@ struct TranslArgs { , m_src(nullptr) , m_align(align) , m_interp(false) + , m_setFuncBody(false) + , m_transId(InvalidID) {} TranslArgs& sk(const SrcKey& sk) { @@ -681,11 +678,21 @@ struct TranslArgs { m_interp = interp; return *this; } + TranslArgs& setFuncBody() { + m_setFuncBody = true; + return *this; + } + TranslArgs& transId(TransID transId) { + m_transId = transId; + return *this; + } SrcKey m_sk; TCA m_src; bool m_align; bool m_interp; + bool m_setFuncBody; + TransID m_transId; }; /* @@ -863,7 +870,7 @@ public: uint64_t getTransCounter(TransID transId) const; void setTransCounter(TransID transId, uint64_t value); - uint32_t addTranslation(const TransRec& transRec); + void addTranslation(const TransRec& transRec); // helpers for srcDB. SrcRec* getSrcRec(SrcKey sk) { @@ -873,6 +880,10 @@ public: return m_srcDB.insert(sk); } + const SrcDB& getSrcDB() const { + return m_srcDB; + } + /* * Create a Tracelet for the given SrcKey, which must actually be * the current VM frame. @@ -909,6 +920,9 @@ protected: Mutex m_dbgBlacklistLock; bool isSrcKeyInBL(const Unit* unit, const SrcKey& sk); + TransKind m_mode; + ProfData* m_profData; + private: int m_analysisDepth; @@ -921,10 +935,19 @@ public: TCA getResumeHelper() { return m_resumeHelper; } + TCA getResumeHelperRet() { return m_resumeHelperRet; } + ProfData* profData() const { + return m_profData; + } + + TransKind mode() const { + return m_mode; + } + int analysisDepth() const { assert(m_analysisDepth >= 0); return m_analysisDepth; diff --git a/hphp/runtime/vm/jit/types.h b/hphp/runtime/vm/jit/types.h index 891b678c0..daa5d2508 100644 --- a/hphp/runtime/vm/jit/types.h +++ b/hphp/runtime/vm/jit/types.h @@ -39,10 +39,38 @@ struct ctca_identity_hash { } }; - typedef uint32_t TransID; typedef hphp_hash_set TransIDSet; +const TransID InvalidID = -1LL; + +/** + * The different kinds of translations that the JIT generates: + * + * - Anchor : a service request for retranslating + * - Prolog : function prologue + * - Interp : a service to interpret at least one instruction + * - Live : translate one tracelet by inspecting live VM state + * - Profile : translate one block by inspecting live VM state and + * inserting profiling counters + * - Optimize: translate one region performing optimizations that may + * leverage data collected by Profile translations + */ +#define TRANS_KINDS \ + DO(Anchor) \ + DO(Prolog) \ + DO(Interp) \ + DO(Live) \ + DO(Profile) \ + DO(Optimize) \ + DO(Invalid) \ + +enum TransKind { +#define DO(KIND) Trans##KIND, + TRANS_KINDS +#undef DO +}; + }} #endif diff --git a/hphp/runtime/vm/srckey.h b/hphp/runtime/vm/srckey.h index 86979ef5d..ff4b9d271 100644 --- a/hphp/runtime/vm/srckey.h +++ b/hphp/runtime/vm/srckey.h @@ -131,6 +131,8 @@ struct SrcKey::Hasher { } }; +typedef hphp_hash_set SrcKeySet; + ////////////////////////////////////////////////////////////////////// inline std::string show(SrcKey sk) { diff --git a/hphp/runtime/vm/unit.h b/hphp/runtime/vm/unit.h index 218ae546f..71a5e0be3 100644 --- a/hphp/runtime/vm/unit.h +++ b/hphp/runtime/vm/unit.h @@ -17,7 +17,6 @@ #ifndef incl_HPHP_VM_UNIT_H_ #define incl_HPHP_VM_UNIT_H_ -// Expects that runtime/vm/core_types.h is already included. #include "hphp/runtime/base/runtime_option.h" #include "hphp/runtime/vm/hhbc.h" #include "hphp/runtime/base/complex_types.h" diff --git a/hphp/util/trace.h b/hphp/util/trace.h index 3ddecc9fd..5aa68cd70 100644 --- a/hphp/util/trace.h +++ b/hphp/util/trace.h @@ -89,6 +89,7 @@ namespace Trace { TM(typeProfile) \ TM(hhir) \ TM(printir) \ + TM(pgo) \ TM(hhirTracelets) \ TM(gc) \ TM(instancebits)\