Add an aprof code region and use it for profile translations in JitPGO mode
This diff generalizes the AHotSelector (now called AsmSelector) to select an assembler among 'a', 'ahot', and 'aprof'. 'aprof' is only allocated and used in JitPGO mode, and it's used for TransProfile translations.
Esse commit está contido em:
@@ -401,10 +401,11 @@ EVALFLAGS();
|
||||
std::set<string, stdltistr> RuntimeOption::DynamicInvokeFunctions;
|
||||
bool RuntimeOption::RecordCodeCoverage = false;
|
||||
std::string RuntimeOption::CodeCoverageOutputFile;
|
||||
size_t RuntimeOption::VMTranslAHotSize = 4 << 20;
|
||||
size_t RuntimeOption::VMTranslASize = 508 << 20;
|
||||
size_t RuntimeOption::VMTranslAHotSize = 4 << 20;
|
||||
size_t RuntimeOption::VMTranslASize = 508 << 20;
|
||||
size_t RuntimeOption::VMTranslAProfSize = 512 << 20;
|
||||
size_t RuntimeOption::VMTranslAStubsSize = 512 << 20;
|
||||
size_t RuntimeOption::VMTranslGDataSize = RuntimeOption::VMTranslASize >> 2;
|
||||
size_t RuntimeOption::VMTranslGDataSize = RuntimeOption::VMTranslASize >> 2;
|
||||
|
||||
std::string RuntimeOption::RepoLocalMode;
|
||||
std::string RuntimeOption::RepoLocalPath;
|
||||
@@ -1141,6 +1142,7 @@ void RuntimeOption::Load(Hdf &config, StringVec *overwrites /* = NULL */,
|
||||
if (RecordCodeCoverage) CheckSymLink = true;
|
||||
CodeCoverageOutputFile = eval["CodeCoverageOutputFile"].getString();
|
||||
VMTranslAHotSize = eval["JitAHotSize"].getUInt64(VMTranslAHotSize);
|
||||
VMTranslAProfSize = eval["JitAProfSize"].getUInt64(VMTranslAProfSize);
|
||||
VMTranslASize = eval["JitASize"].getUInt64(VMTranslASize);
|
||||
VMTranslAStubsSize = eval["JitAStubsSize"].getUInt64(VMTranslAStubsSize);
|
||||
VMTranslGDataSize = eval["JitGlobalDataSize"].getUInt64(VMTranslGDataSize);
|
||||
|
||||
@@ -447,6 +447,7 @@ public:
|
||||
// TranslatorX64 allocation options
|
||||
static size_t VMTranslASize;
|
||||
static size_t VMTranslAHotSize;
|
||||
static size_t VMTranslAProfSize;
|
||||
static size_t VMTranslAStubsSize;
|
||||
static size_t VMTranslGDataSize;
|
||||
|
||||
|
||||
@@ -51,6 +51,7 @@ void SrcRec::chainFrom(IncomingBranch br) {
|
||||
assert(br.type() == IncomingBranch::Tag::ADDR ||
|
||||
tx64->a. contains(br.toSmash()) ||
|
||||
tx64->ahot. contains(br.toSmash()) ||
|
||||
tx64->aprof. contains(br.toSmash()) ||
|
||||
tx64->astubs. contains(br.toSmash()) ||
|
||||
tx64->atrampolines.contains(br.toSmash()));
|
||||
TCA destAddr = getTopTranslation();
|
||||
|
||||
@@ -855,8 +855,6 @@ TranslatorX64::createTranslation(const TranslArgs& args) {
|
||||
|
||||
// We put retranslate requests at the end of our slab to more frequently
|
||||
// allow conditional jump fall-throughs
|
||||
AHotSelector ahs(this, curFunc()->attrs() & AttrHot);
|
||||
|
||||
TCA astart = a.frontier();
|
||||
TCA stubstart = astubs.frontier();
|
||||
TCA req = emitServiceReq(REQ_RETRANSLATE, sk.offset());
|
||||
@@ -905,8 +903,9 @@ TranslatorX64::translate(const TranslArgs& args) {
|
||||
}
|
||||
}
|
||||
|
||||
Func* func = const_cast<Func*>(curFunc());
|
||||
AHotSelector ahs(this, func->attrs() & AttrHot);
|
||||
Func* func = const_cast<Func*>(args.m_sk.func());
|
||||
AsmSelector asmSel(AsmSelector::Args(this).profile(m_mode == TransProfile)
|
||||
.hot(func->attrs() & AttrHot));
|
||||
|
||||
if (args.m_align) {
|
||||
moveToAlign(a, kNonFallthroughAlign);
|
||||
@@ -1505,7 +1504,7 @@ TranslatorX64::funcPrologue(Func* func, int nPassed, ActRec* ar) {
|
||||
// in case another thread snuck in and set the prologue already.
|
||||
if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
|
||||
|
||||
AHotSelector ahs(this, func->attrs() & AttrHot);
|
||||
AsmSelector asmSel(AsmSelector::Args(this).hot(func->attrs() & AttrHot));
|
||||
|
||||
SpaceRecorder sr("_FuncPrologue", a);
|
||||
// If we're close to a cache line boundary, just burn some space to
|
||||
@@ -3664,11 +3663,13 @@ TranslatorX64::TranslatorX64()
|
||||
m_catchTraceMap(128)
|
||||
{
|
||||
static const size_t kRoundUp = 2 << 20;
|
||||
const size_t kAHotSize = RuntimeOption::VMTranslAHotSize;
|
||||
const size_t kASize = RuntimeOption::VMTranslASize;
|
||||
const size_t kAHotSize = RuntimeOption::VMTranslAHotSize;
|
||||
const size_t kAProfSize = RuntimeOption::EvalJitPGO ?
|
||||
RuntimeOption::VMTranslAProfSize : 0;
|
||||
const size_t kASize = RuntimeOption::VMTranslASize;
|
||||
const size_t kAStubsSize = RuntimeOption::VMTranslAStubsSize;
|
||||
const size_t kGDataSize = RuntimeOption::VMTranslGDataSize;
|
||||
m_totalSize = kAHotSize + kASize + kAStubsSize +
|
||||
const size_t kGDataSize = RuntimeOption::VMTranslGDataSize;
|
||||
m_totalSize = kAHotSize + kASize + kAStubsSize + kAProfSize +
|
||||
kTrampolinesBlockSize + kGDataSize;
|
||||
|
||||
TRACE(1, "TranslatorX64@%p startup\n", this);
|
||||
@@ -3737,7 +3738,11 @@ TranslatorX64::TranslatorX64()
|
||||
base += kAHotSize;
|
||||
TRACE(1, "init a @%p\n", base);
|
||||
a.init(base, kASize);
|
||||
aStart = base;
|
||||
base += kASize;
|
||||
TRACE(1, "init aprof @%p\n", base);
|
||||
aprof.init(base, kAProfSize);
|
||||
base += kAProfSize;
|
||||
base += -(uint64_t)base & (kRoundUp - 1);
|
||||
TRACE(1, "init astubs @%p\n", base);
|
||||
astubs.init(base, kAStubsSize);
|
||||
@@ -3747,7 +3752,7 @@ TranslatorX64::TranslatorX64()
|
||||
m_globalData.init(base, kGDataSize);
|
||||
|
||||
// put the stubs into ahot, rather than a
|
||||
AHotSelector ahs(this, true);
|
||||
AsmSelector asmSel(AsmSelector::Args(this).hot(true));
|
||||
|
||||
// Emit some special helpers that are shared across translations.
|
||||
|
||||
@@ -4098,23 +4103,26 @@ size_t TranslatorX64::getTargetCacheSize() {
|
||||
|
||||
std::string TranslatorX64::getUsage() {
|
||||
std::string usage;
|
||||
size_t aHotUsage = ahot.used();
|
||||
size_t aUsage = a.used();
|
||||
size_t aHotUsage = ahot.used();
|
||||
size_t aProfUsage = aprof.used();
|
||||
size_t aUsage = a.used();
|
||||
size_t stubsUsage = astubs.used();
|
||||
size_t dataUsage = m_globalData.frontier - m_globalData.base;
|
||||
size_t tcUsage = TargetCache::s_frontier;
|
||||
size_t dataUsage = m_globalData.frontier - m_globalData.base;
|
||||
size_t tcUsage = TargetCache::s_frontier;
|
||||
size_t persistentUsage =
|
||||
TargetCache::s_persistent_frontier - TargetCache::s_persistent_start;
|
||||
Util::string_printf(
|
||||
usage,
|
||||
"tx64: %9zd bytes (%zd%%) in ahot.code\n"
|
||||
"tx64: %9zd bytes (%zd%%) in a.code\n"
|
||||
"tx64: %9zd bytes (%zd%%) in aprof.code\n"
|
||||
"tx64: %9zd bytes (%zd%%) in astubs.code\n"
|
||||
"tx64: %9zd bytes (%zd%%) in m_globalData\n"
|
||||
"tx64: %9zd bytes (%zd%%) in targetCache\n"
|
||||
"tx64: %9zd bytes (%zd%%) in persistentCache\n",
|
||||
aHotUsage, 100 * aHotUsage / ahot.capacity(),
|
||||
aUsage, 100 * aUsage / a.capacity(),
|
||||
aProfUsage, 100 * aProfUsage / aprof.capacity(),
|
||||
stubsUsage, 100 * stubsUsage / astubs.capacity(),
|
||||
dataUsage, 100 * dataUsage / m_globalData.size,
|
||||
tcUsage,
|
||||
@@ -4226,7 +4234,9 @@ bool TranslatorX64::dumpTCCode(const char* filename) {
|
||||
}
|
||||
// dump starting from the trampolines; this assumes processInit() places
|
||||
// trampolines before the translation cache
|
||||
size_t count = a.frontier() - atrampolines.base();
|
||||
// Task #2649357: teach tc-print about aprof, to avoid dumping the entire
|
||||
// 'a' code slab
|
||||
size_t count = aprof.frontier() - atrampolines.base();
|
||||
bool result = (fwrite(atrampolines.base(), 1, count, aFile) == count);
|
||||
if (result) {
|
||||
count = astubs.used();
|
||||
@@ -4325,6 +4335,79 @@ void TranslatorX64::setJmpTransID(TCA jmp) {
|
||||
m_jmpToTransID[jmp] = transId;
|
||||
}
|
||||
|
||||
TranslatorX64::AsmSelector::AsmSelector(const Args& args)
|
||||
: m_tx(args.getTranslator())
|
||||
, m_select(args.getSelection()) {
|
||||
|
||||
// If an assembler other an 'a' has already been selected, then just
|
||||
// keep that selection.
|
||||
if (m_tx->a.base() != m_tx->aStart) {
|
||||
m_select = AsmSelection::Default;
|
||||
}
|
||||
|
||||
swap();
|
||||
}
|
||||
|
||||
/*
|
||||
* Swap 'a' with 'ahot' or 'aprof'.
|
||||
* Note that, although we don't write to either tx->ahot or tx->aprof directly,
|
||||
* we still need to make sure that all assembler code areas are available
|
||||
* in a, astubs, aprof, and ahot, for example when we call asmChoose(addr, ...).
|
||||
*/
|
||||
void TranslatorX64::AsmSelector::swap() {
|
||||
switch (m_select) {
|
||||
case AsmSelection::Profile: std::swap(m_tx->a, m_tx->aprof); break;
|
||||
case AsmSelection::Hot : std::swap(m_tx->a, m_tx->ahot) ; break;
|
||||
case AsmSelection::Default: break; // nothing to do
|
||||
}
|
||||
}
|
||||
|
||||
TranslatorX64::AsmSelector::~AsmSelector() {
|
||||
swap();
|
||||
}
|
||||
|
||||
TranslatorX64::AsmSelector::Args::Args(TranslatorX64* tx)
|
||||
: m_tx(tx)
|
||||
, m_select(AsmSelection::Default) {
|
||||
assert(m_tx != nullptr);
|
||||
}
|
||||
|
||||
static const int kMaxTranslationBytes = 8192;
|
||||
|
||||
TranslatorX64::AsmSelector::Args&
|
||||
TranslatorX64::AsmSelector::Args::hot(bool isHot) {
|
||||
// Profile has precedence over Hot.
|
||||
if (m_select == AsmSelection::Profile) return *this;
|
||||
|
||||
// Make sure there's enough room left in ahot.
|
||||
if (isHot && m_tx->ahot.available() > kMaxTranslationBytes) {
|
||||
m_select = AsmSelection::Hot;
|
||||
} else {
|
||||
m_select = AsmSelection::Default;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
TranslatorX64::AsmSelector::Args&
|
||||
TranslatorX64::AsmSelector::Args::profile(bool isProf) {
|
||||
if (isProf) {
|
||||
m_select = AsmSelection::Profile;
|
||||
} else if (m_select == AsmSelection::Profile) {
|
||||
m_select = AsmSelection::Default;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
TranslatorX64::AsmSelection
|
||||
TranslatorX64::AsmSelector::Args::getSelection() const {
|
||||
return m_select;
|
||||
}
|
||||
|
||||
TranslatorX64*
|
||||
TranslatorX64::AsmSelector::Args::getTranslator() const {
|
||||
return m_tx;
|
||||
}
|
||||
|
||||
} // HPHP::Transl
|
||||
|
||||
} // HPHP
|
||||
|
||||
@@ -149,38 +149,43 @@ class TranslatorX64 : public Translator
|
||||
|
||||
typedef X64Assembler Asm;
|
||||
|
||||
class AHotSelector {
|
||||
enum class AsmSelection {
|
||||
Default, // 'a'
|
||||
Hot, // 'ahot'
|
||||
Profile, // 'aprof' -- highest precedence
|
||||
};
|
||||
|
||||
class AsmSelector {
|
||||
public:
|
||||
AHotSelector(TranslatorX64* tx, bool hot) :
|
||||
m_tx(tx), m_swap(hot &&
|
||||
tx->ahot.available() > 8192 &&
|
||||
// Only swap if a and ahot aren't swapped yet.
|
||||
// This assumes ahot area is in lower address.
|
||||
tx->a.base() > tx->ahot.base()) {
|
||||
if (m_swap) {
|
||||
// Swap a and ahot, so that 'a' contains the hot code region.
|
||||
// Note that, although we don't write to tx->ahot directly, we
|
||||
// still need to make sure that all assembler code areas are
|
||||
// available in a, astubs, and ahot, for example when we call
|
||||
// asmChoose(addr, a, ahot, astubs).
|
||||
std::swap(m_tx->a, m_tx->ahot);
|
||||
}
|
||||
}
|
||||
~AHotSelector() {
|
||||
if (m_swap) {
|
||||
// Swap a and ahot back.
|
||||
std::swap(m_tx->a, m_tx->ahot);
|
||||
}
|
||||
}
|
||||
class Args {
|
||||
public:
|
||||
explicit Args(TranslatorX64* tx);
|
||||
Args& hot(bool isHot);
|
||||
Args& profile(bool isProf);
|
||||
AsmSelection getSelection() const;
|
||||
TranslatorX64* getTranslator() const;
|
||||
|
||||
private:
|
||||
TranslatorX64* m_tx;
|
||||
AsmSelection m_select;
|
||||
};
|
||||
|
||||
explicit AsmSelector(const Args& args);
|
||||
~AsmSelector();
|
||||
|
||||
private:
|
||||
void swap();
|
||||
|
||||
TranslatorX64* m_tx;
|
||||
bool m_swap;
|
||||
AsmSelection m_select;
|
||||
};
|
||||
|
||||
TCA tcStart;
|
||||
Asm ahot;
|
||||
Asm a;
|
||||
Asm astubs;
|
||||
TCA aStart;
|
||||
Asm ahot; // used for hot code of AttrHot functions
|
||||
Asm a; // used for hot code of non-AttrHot functions
|
||||
Asm aprof; // used for hot code of profiling translations
|
||||
Asm astubs; // used for cold code
|
||||
Asm atrampolines;
|
||||
PointerMap trampolineMap;
|
||||
int m_numNativeTrampolines;
|
||||
@@ -239,7 +244,7 @@ private:
|
||||
assert(a.base() != ahot.base() &&
|
||||
a.base() != astubs.base() &&
|
||||
ahot.base() != astubs.base());
|
||||
return asmChoose(addr, a, ahot, astubs, atrampolines);
|
||||
return asmChoose(addr, a, ahot, aprof, astubs, atrampolines);
|
||||
}
|
||||
void emitIncRef(X64Assembler &a, PhysReg base, DataType dtype);
|
||||
void emitIncRef(PhysReg base, DataType);
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário