diff --git a/hphp/runtime/base/runtime_option.cpp b/hphp/runtime/base/runtime_option.cpp index a34f76ca3..94a1c2f17 100644 --- a/hphp/runtime/base/runtime_option.cpp +++ b/hphp/runtime/base/runtime_option.cpp @@ -401,10 +401,11 @@ EVALFLAGS(); std::set RuntimeOption::DynamicInvokeFunctions; bool RuntimeOption::RecordCodeCoverage = false; std::string RuntimeOption::CodeCoverageOutputFile; -size_t RuntimeOption::VMTranslAHotSize = 4 << 20; -size_t RuntimeOption::VMTranslASize = 508 << 20; +size_t RuntimeOption::VMTranslAHotSize = 4 << 20; +size_t RuntimeOption::VMTranslASize = 508 << 20; +size_t RuntimeOption::VMTranslAProfSize = 512 << 20; size_t RuntimeOption::VMTranslAStubsSize = 512 << 20; -size_t RuntimeOption::VMTranslGDataSize = RuntimeOption::VMTranslASize >> 2; +size_t RuntimeOption::VMTranslGDataSize = RuntimeOption::VMTranslASize >> 2; std::string RuntimeOption::RepoLocalMode; std::string RuntimeOption::RepoLocalPath; @@ -1141,6 +1142,7 @@ void RuntimeOption::Load(Hdf &config, StringVec *overwrites /* = NULL */, if (RecordCodeCoverage) CheckSymLink = true; CodeCoverageOutputFile = eval["CodeCoverageOutputFile"].getString(); VMTranslAHotSize = eval["JitAHotSize"].getUInt64(VMTranslAHotSize); + VMTranslAProfSize = eval["JitAProfSize"].getUInt64(VMTranslAProfSize); VMTranslASize = eval["JitASize"].getUInt64(VMTranslASize); VMTranslAStubsSize = eval["JitAStubsSize"].getUInt64(VMTranslAStubsSize); VMTranslGDataSize = eval["JitGlobalDataSize"].getUInt64(VMTranslGDataSize); diff --git a/hphp/runtime/base/runtime_option.h b/hphp/runtime/base/runtime_option.h index 986aec0e9..a05f8da08 100644 --- a/hphp/runtime/base/runtime_option.h +++ b/hphp/runtime/base/runtime_option.h @@ -447,6 +447,7 @@ public: // TranslatorX64 allocation options static size_t VMTranslASize; static size_t VMTranslAHotSize; + static size_t VMTranslAProfSize; static size_t VMTranslAStubsSize; static size_t VMTranslGDataSize; diff --git a/hphp/runtime/vm/jit/srcdb.cpp b/hphp/runtime/vm/jit/srcdb.cpp index a384f16a4..b0135b483 100644 --- a/hphp/runtime/vm/jit/srcdb.cpp +++ b/hphp/runtime/vm/jit/srcdb.cpp @@ -51,6 +51,7 @@ void SrcRec::chainFrom(IncomingBranch br) { assert(br.type() == IncomingBranch::Tag::ADDR || tx64->a. contains(br.toSmash()) || tx64->ahot. contains(br.toSmash()) || + tx64->aprof. contains(br.toSmash()) || tx64->astubs. contains(br.toSmash()) || tx64->atrampolines.contains(br.toSmash())); TCA destAddr = getTopTranslation(); diff --git a/hphp/runtime/vm/jit/translator-x64.cpp b/hphp/runtime/vm/jit/translator-x64.cpp index 65a8fb30f..2ff457b86 100644 --- a/hphp/runtime/vm/jit/translator-x64.cpp +++ b/hphp/runtime/vm/jit/translator-x64.cpp @@ -855,8 +855,6 @@ TranslatorX64::createTranslation(const TranslArgs& args) { // We put retranslate requests at the end of our slab to more frequently // allow conditional jump fall-throughs - AHotSelector ahs(this, curFunc()->attrs() & AttrHot); - TCA astart = a.frontier(); TCA stubstart = astubs.frontier(); TCA req = emitServiceReq(REQ_RETRANSLATE, sk.offset()); @@ -905,8 +903,9 @@ TranslatorX64::translate(const TranslArgs& args) { } } - Func* func = const_cast(curFunc()); - AHotSelector ahs(this, func->attrs() & AttrHot); + Func* func = const_cast(args.m_sk.func()); + AsmSelector asmSel(AsmSelector::Args(this).profile(m_mode == TransProfile) + .hot(func->attrs() & AttrHot)); if (args.m_align) { moveToAlign(a, kNonFallthroughAlign); @@ -1505,7 +1504,7 @@ TranslatorX64::funcPrologue(Func* func, int nPassed, ActRec* ar) { // in case another thread snuck in and set the prologue already. if (checkCachedPrologue(func, paramIndex, prologue)) return prologue; - AHotSelector ahs(this, func->attrs() & AttrHot); + AsmSelector asmSel(AsmSelector::Args(this).hot(func->attrs() & AttrHot)); SpaceRecorder sr("_FuncPrologue", a); // If we're close to a cache line boundary, just burn some space to @@ -3664,11 +3663,13 @@ TranslatorX64::TranslatorX64() m_catchTraceMap(128) { static const size_t kRoundUp = 2 << 20; - const size_t kAHotSize = RuntimeOption::VMTranslAHotSize; - const size_t kASize = RuntimeOption::VMTranslASize; + const size_t kAHotSize = RuntimeOption::VMTranslAHotSize; + const size_t kAProfSize = RuntimeOption::EvalJitPGO ? + RuntimeOption::VMTranslAProfSize : 0; + const size_t kASize = RuntimeOption::VMTranslASize; const size_t kAStubsSize = RuntimeOption::VMTranslAStubsSize; - const size_t kGDataSize = RuntimeOption::VMTranslGDataSize; - m_totalSize = kAHotSize + kASize + kAStubsSize + + const size_t kGDataSize = RuntimeOption::VMTranslGDataSize; + m_totalSize = kAHotSize + kASize + kAStubsSize + kAProfSize + kTrampolinesBlockSize + kGDataSize; TRACE(1, "TranslatorX64@%p startup\n", this); @@ -3737,7 +3738,11 @@ TranslatorX64::TranslatorX64() base += kAHotSize; TRACE(1, "init a @%p\n", base); a.init(base, kASize); + aStart = base; base += kASize; + TRACE(1, "init aprof @%p\n", base); + aprof.init(base, kAProfSize); + base += kAProfSize; base += -(uint64_t)base & (kRoundUp - 1); TRACE(1, "init astubs @%p\n", base); astubs.init(base, kAStubsSize); @@ -3747,7 +3752,7 @@ TranslatorX64::TranslatorX64() m_globalData.init(base, kGDataSize); // put the stubs into ahot, rather than a - AHotSelector ahs(this, true); + AsmSelector asmSel(AsmSelector::Args(this).hot(true)); // Emit some special helpers that are shared across translations. @@ -4098,23 +4103,26 @@ size_t TranslatorX64::getTargetCacheSize() { std::string TranslatorX64::getUsage() { std::string usage; - size_t aHotUsage = ahot.used(); - size_t aUsage = a.used(); + size_t aHotUsage = ahot.used(); + size_t aProfUsage = aprof.used(); + size_t aUsage = a.used(); size_t stubsUsage = astubs.used(); - size_t dataUsage = m_globalData.frontier - m_globalData.base; - size_t tcUsage = TargetCache::s_frontier; + size_t dataUsage = m_globalData.frontier - m_globalData.base; + size_t tcUsage = TargetCache::s_frontier; size_t persistentUsage = TargetCache::s_persistent_frontier - TargetCache::s_persistent_start; Util::string_printf( usage, "tx64: %9zd bytes (%zd%%) in ahot.code\n" "tx64: %9zd bytes (%zd%%) in a.code\n" + "tx64: %9zd bytes (%zd%%) in aprof.code\n" "tx64: %9zd bytes (%zd%%) in astubs.code\n" "tx64: %9zd bytes (%zd%%) in m_globalData\n" "tx64: %9zd bytes (%zd%%) in targetCache\n" "tx64: %9zd bytes (%zd%%) in persistentCache\n", aHotUsage, 100 * aHotUsage / ahot.capacity(), aUsage, 100 * aUsage / a.capacity(), + aProfUsage, 100 * aProfUsage / aprof.capacity(), stubsUsage, 100 * stubsUsage / astubs.capacity(), dataUsage, 100 * dataUsage / m_globalData.size, tcUsage, @@ -4226,7 +4234,9 @@ bool TranslatorX64::dumpTCCode(const char* filename) { } // dump starting from the trampolines; this assumes processInit() places // trampolines before the translation cache - size_t count = a.frontier() - atrampolines.base(); + // Task #2649357: teach tc-print about aprof, to avoid dumping the entire + // 'a' code slab + size_t count = aprof.frontier() - atrampolines.base(); bool result = (fwrite(atrampolines.base(), 1, count, aFile) == count); if (result) { count = astubs.used(); @@ -4325,6 +4335,79 @@ void TranslatorX64::setJmpTransID(TCA jmp) { m_jmpToTransID[jmp] = transId; } +TranslatorX64::AsmSelector::AsmSelector(const Args& args) + : m_tx(args.getTranslator()) + , m_select(args.getSelection()) { + + // If an assembler other an 'a' has already been selected, then just + // keep that selection. + if (m_tx->a.base() != m_tx->aStart) { + m_select = AsmSelection::Default; + } + + swap(); +} + +/* + * Swap 'a' with 'ahot' or 'aprof'. + * Note that, although we don't write to either tx->ahot or tx->aprof directly, + * we still need to make sure that all assembler code areas are available + * in a, astubs, aprof, and ahot, for example when we call asmChoose(addr, ...). + */ +void TranslatorX64::AsmSelector::swap() { + switch (m_select) { + case AsmSelection::Profile: std::swap(m_tx->a, m_tx->aprof); break; + case AsmSelection::Hot : std::swap(m_tx->a, m_tx->ahot) ; break; + case AsmSelection::Default: break; // nothing to do + } +} + +TranslatorX64::AsmSelector::~AsmSelector() { + swap(); +} + +TranslatorX64::AsmSelector::Args::Args(TranslatorX64* tx) + : m_tx(tx) + , m_select(AsmSelection::Default) { + assert(m_tx != nullptr); +} + +static const int kMaxTranslationBytes = 8192; + +TranslatorX64::AsmSelector::Args& +TranslatorX64::AsmSelector::Args::hot(bool isHot) { + // Profile has precedence over Hot. + if (m_select == AsmSelection::Profile) return *this; + + // Make sure there's enough room left in ahot. + if (isHot && m_tx->ahot.available() > kMaxTranslationBytes) { + m_select = AsmSelection::Hot; + } else { + m_select = AsmSelection::Default; + } + return *this; +} + +TranslatorX64::AsmSelector::Args& +TranslatorX64::AsmSelector::Args::profile(bool isProf) { + if (isProf) { + m_select = AsmSelection::Profile; + } else if (m_select == AsmSelection::Profile) { + m_select = AsmSelection::Default; + } + return *this; +} + +TranslatorX64::AsmSelection +TranslatorX64::AsmSelector::Args::getSelection() const { + return m_select; +} + +TranslatorX64* +TranslatorX64::AsmSelector::Args::getTranslator() const { + return m_tx; +} + } // HPHP::Transl } // HPHP diff --git a/hphp/runtime/vm/jit/translator-x64.h b/hphp/runtime/vm/jit/translator-x64.h index 07cdb0b44..740b4b7cb 100644 --- a/hphp/runtime/vm/jit/translator-x64.h +++ b/hphp/runtime/vm/jit/translator-x64.h @@ -149,38 +149,43 @@ class TranslatorX64 : public Translator typedef X64Assembler Asm; - class AHotSelector { + enum class AsmSelection { + Default, // 'a' + Hot, // 'ahot' + Profile, // 'aprof' -- highest precedence + }; + + class AsmSelector { public: - AHotSelector(TranslatorX64* tx, bool hot) : - m_tx(tx), m_swap(hot && - tx->ahot.available() > 8192 && - // Only swap if a and ahot aren't swapped yet. - // This assumes ahot area is in lower address. - tx->a.base() > tx->ahot.base()) { - if (m_swap) { - // Swap a and ahot, so that 'a' contains the hot code region. - // Note that, although we don't write to tx->ahot directly, we - // still need to make sure that all assembler code areas are - // available in a, astubs, and ahot, for example when we call - // asmChoose(addr, a, ahot, astubs). - std::swap(m_tx->a, m_tx->ahot); - } - } - ~AHotSelector() { - if (m_swap) { - // Swap a and ahot back. - std::swap(m_tx->a, m_tx->ahot); - } - } + class Args { + public: + explicit Args(TranslatorX64* tx); + Args& hot(bool isHot); + Args& profile(bool isProf); + AsmSelection getSelection() const; + TranslatorX64* getTranslator() const; + + private: + TranslatorX64* m_tx; + AsmSelection m_select; + }; + + explicit AsmSelector(const Args& args); + ~AsmSelector(); + private: + void swap(); + TranslatorX64* m_tx; - bool m_swap; + AsmSelection m_select; }; TCA tcStart; - Asm ahot; - Asm a; - Asm astubs; + TCA aStart; + Asm ahot; // used for hot code of AttrHot functions + Asm a; // used for hot code of non-AttrHot functions + Asm aprof; // used for hot code of profiling translations + Asm astubs; // used for cold code Asm atrampolines; PointerMap trampolineMap; int m_numNativeTrampolines; @@ -239,7 +244,7 @@ private: assert(a.base() != ahot.base() && a.base() != astubs.base() && ahot.base() != astubs.base()); - return asmChoose(addr, a, ahot, astubs, atrampolines); + return asmChoose(addr, a, ahot, aprof, astubs, atrampolines); } void emitIncRef(X64Assembler &a, PhysReg base, DataType dtype); void emitIncRef(PhysReg base, DataType);