Partition the tc into hot/cold sections
We already generate the data for this, so lets use it.
Esse commit está contido em:
@@ -5309,6 +5309,14 @@ void EmitterVisitor::emitPostponedMeths() {
|
||||
attrs = attrs | AttrMayUseVV;
|
||||
}
|
||||
|
||||
auto fullName = p.m_meth->getOriginalFullName();
|
||||
auto it = Option::FunctionSections.find(fullName);
|
||||
if ((it != Option::FunctionSections.end() && it->second == "hot") ||
|
||||
(RuntimeOption::EvalRandomHotFuncs &&
|
||||
(hash_string_i(fullName.c_str()) & 8))) {
|
||||
attrs = attrs | AttrHot;
|
||||
}
|
||||
|
||||
if (Option::WholeProgram) {
|
||||
if (!funcScope->isRedeclaring()) {
|
||||
attrs = attrs | AttrUnique;
|
||||
|
||||
@@ -397,7 +397,8 @@ EVALFLAGS();
|
||||
std::set<string, stdltistr> RuntimeOption::DynamicInvokeFunctions;
|
||||
bool RuntimeOption::RecordCodeCoverage = false;
|
||||
std::string RuntimeOption::CodeCoverageOutputFile;
|
||||
size_t RuntimeOption::VMTranslASize = 512 << 20;
|
||||
size_t RuntimeOption::VMTranslAHotSize = 2 << 20;
|
||||
size_t RuntimeOption::VMTranslASize = 510 << 20;
|
||||
size_t RuntimeOption::VMTranslAStubsSize = 512 << 20;
|
||||
size_t RuntimeOption::VMTranslGDataSize = RuntimeOption::VMTranslASize >> 2;
|
||||
|
||||
@@ -1153,6 +1154,7 @@ void RuntimeOption::Load(Hdf &config, StringVec *overwrites /* = NULL */,
|
||||
}
|
||||
if (RecordCodeCoverage) CheckSymLink = true;
|
||||
CodeCoverageOutputFile = eval["CodeCoverageOutputFile"].getString();
|
||||
VMTranslAHotSize = eval["JitAHotSize"].getUInt64(VMTranslAHotSize);
|
||||
VMTranslASize = eval["JitASize"].getUInt64(VMTranslASize);
|
||||
VMTranslAStubsSize = eval["JitAStubsSize"].getUInt64(VMTranslAStubsSize);
|
||||
VMTranslGDataSize = eval["JitGlobalDataSize"].getUInt64(VMTranslGDataSize);
|
||||
|
||||
@@ -436,6 +436,7 @@ public:
|
||||
F(bool, DumpTC, false) \
|
||||
F(bool, DumpAst, false) \
|
||||
F(bool, MapTCHuge, true) \
|
||||
F(bool, RandomHotFuncs, false) \
|
||||
F(uint32_t, ConstEstimate, 10000)
|
||||
|
||||
#define F(type, name, unused) \
|
||||
@@ -449,6 +450,7 @@ public:
|
||||
|
||||
// TranslatorX64 allocation options
|
||||
static size_t VMTranslASize;
|
||||
static size_t VMTranslAHotSize;
|
||||
static size_t VMTranslAStubsSize;
|
||||
static size_t VMTranslGDataSize;
|
||||
|
||||
|
||||
@@ -103,7 +103,8 @@ enum Attr {
|
||||
AttrVariadicByRef = (1 << 15), // X //
|
||||
AttrMayUseVV = (1 << 16), // X //
|
||||
AttrPersistent= (1 << 17), // X X //
|
||||
AttrDeepInit = (1 << 18) // X
|
||||
AttrDeepInit = (1 << 18), // X //
|
||||
AttrHot = (1 << 19), // X //
|
||||
};
|
||||
|
||||
static inline Attr operator|(Attr a, Attr b) { return Attr((int)a | (int)b); }
|
||||
|
||||
@@ -463,6 +463,9 @@ void Func::prettyPrint(std::ostream& out) const {
|
||||
} else {
|
||||
out << "Function " << m_name->data();
|
||||
}
|
||||
|
||||
if (m_attrs & AttrHot) out << " (hot)";
|
||||
|
||||
out << " at " << base();
|
||||
if (shared()->m_id != -1) {
|
||||
out << " (ID " << shared()->m_id << ")";
|
||||
|
||||
@@ -1388,6 +1388,7 @@ TranslatorX64::createTranslation(SrcKey sk, bool align,
|
||||
|
||||
// We put retranslate requests at the end of our slab to more frequently
|
||||
// allow conditional jump fall-throughs
|
||||
AHotSelector ahs(this, curFunc()->attrs() & AttrHot);
|
||||
|
||||
TCA astart = a.code.frontier;
|
||||
TCA stubstart = astubs.code.frontier;
|
||||
@@ -1437,6 +1438,8 @@ TranslatorX64::translate(SrcKey sk, bool align, bool allowIR) {
|
||||
assert(m_useHHIR == false);
|
||||
}
|
||||
|
||||
AHotSelector ahs(this, curFunc()->attrs() & AttrHot);
|
||||
|
||||
if (align) {
|
||||
moveToAlign(a, kNonFallthroughAlign);
|
||||
}
|
||||
@@ -1545,12 +1548,16 @@ TranslatorX64::smash(X64Assembler &a, TCA src, TCA dest, bool isCall) {
|
||||
}
|
||||
|
||||
void TranslatorX64::protectCode() {
|
||||
mprotect(tx64->a.code.base, tx64->a.code.size, PROT_READ | PROT_EXEC);
|
||||
mprotect(tx64->ahot.code.base,
|
||||
tx64->astubs.code.base - tx64->ahot.code.base +
|
||||
tx64->astubs.code.size, PROT_READ | PROT_EXEC);
|
||||
|
||||
}
|
||||
|
||||
void TranslatorX64::unprotectCode() {
|
||||
mprotect(tx64->a.code.base, tx64->a.code.size,
|
||||
mprotect(tx64->ahot.code.base,
|
||||
tx64->astubs.code.base - tx64->ahot.code.base +
|
||||
tx64->astubs.code.size,
|
||||
PROT_READ | PROT_WRITE | PROT_EXEC);
|
||||
}
|
||||
|
||||
@@ -2090,6 +2097,8 @@ TranslatorX64::funcPrologue(Func* func, int nPassed, ActRec* ar) {
|
||||
// in case another thread snuck in and set the prologue already.
|
||||
if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
|
||||
|
||||
AHotSelector ahs(this, func->attrs() & AttrHot);
|
||||
|
||||
SpaceRecorder sr("_FuncPrologue", a);
|
||||
// If we're close to a cache line boundary, just burn some space to
|
||||
// try to keep the func and its body on fewer total lines.
|
||||
@@ -2693,7 +2702,7 @@ TranslatorX64::bindJmpccFirst(TCA toSmash,
|
||||
|
||||
Asm &as = getAsmFor(toSmash);
|
||||
// Its not clear where chainFrom should go to if as is astubs
|
||||
assert(&as == &a);
|
||||
assert(&as != &astubs);
|
||||
|
||||
// can we just directly fall through?
|
||||
// a jmp + jz takes 5 + 6 = 11 bytes
|
||||
@@ -2721,7 +2730,7 @@ TranslatorX64::bindJmpccFirst(TCA toSmash,
|
||||
* toSmash+11: newHotness
|
||||
*/
|
||||
CodeCursor cg(as, toSmash);
|
||||
a.jcc(cc, stub);
|
||||
as.jcc(cc, stub);
|
||||
getSrcRec(dest)->chainFrom(as, IncomingBranch(as.code.frontier));
|
||||
TRACE(5, "bindJmpccFirst: overwrote with cc%02x taken %d\n", cc, taken);
|
||||
return tDest;
|
||||
@@ -4144,19 +4153,13 @@ TCA TranslatorX64::getTranslatedCaller() const {
|
||||
ActRec* framePtr = fp; // can't directly mutate the register-mapped one
|
||||
for (; framePtr; framePtr = (ActRec*)framePtr->m_savedRbp) {
|
||||
TCA rip = (TCA)framePtr->m_savedRip;
|
||||
if (isCodeAddress(rip)) {
|
||||
if (isValidCodeAddress(rip)) {
|
||||
return rip;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool TranslatorX64::isCodeAddress(TCA addr) const {
|
||||
return a.code.isValidAddress(addr) ||
|
||||
astubs.code.isValidAddress(addr) ||
|
||||
atrampolines.code.isValidAddress(addr);
|
||||
}
|
||||
|
||||
void
|
||||
TranslatorX64::syncWork() {
|
||||
assert(tl_regState == REGSTATE_DIRTY);
|
||||
@@ -11413,14 +11416,16 @@ TranslatorX64::translateTracelet(SrcKey sk, bool considerHHIR/*=true*/,
|
||||
SKTRACE(1, sk, "translateTracelet\n");
|
||||
assert(m_srcDB.find(sk));
|
||||
assert(m_regMap.pristine());
|
||||
|
||||
TCA start = a.code.frontier;
|
||||
TCA stubStart = astubs.code.frontier;
|
||||
TCA counterStart = 0;
|
||||
uint8_t counterLen = 0;
|
||||
uint8_t counterLen = 0;
|
||||
SrcRec& srcRec = *getSrcRec(sk);
|
||||
vector<TransBCMapping> bcMapping;
|
||||
TransKind transKind = TransNormal;
|
||||
|
||||
|
||||
if (m_useHHIR) {
|
||||
TranslateTraceletResult result;
|
||||
do {
|
||||
@@ -11703,15 +11708,18 @@ TranslatorX64::TranslatorX64()
|
||||
m_curFunc(nullptr),
|
||||
m_vecState(nullptr)
|
||||
{
|
||||
const size_t kAHotSize = RuntimeOption::VMTranslAHotSize;
|
||||
const size_t kASize = RuntimeOption::VMTranslASize;
|
||||
const size_t kAStubsSize = RuntimeOption::VMTranslAStubsSize;
|
||||
const size_t kGDataSize = RuntimeOption::VMTranslGDataSize;
|
||||
m_totalSize = kASize + kAStubsSize + kTrampolinesBlockSize + kGDataSize;
|
||||
m_totalSize = kAHotSize + kASize + kAStubsSize +
|
||||
kTrampolinesBlockSize + kGDataSize;
|
||||
|
||||
TRACE(1, "TranslatorX64@%p startup\n", this);
|
||||
tx64 = this;
|
||||
|
||||
if ((kASize < (10 << 20)) ||
|
||||
if ((kAHotSize < (2 << 20)) ||
|
||||
(kASize < (10 << 20)) ||
|
||||
(kAStubsSize < (10 << 20)) ||
|
||||
(kGDataSize < (2 << 20))) {
|
||||
fprintf(stderr, "Allocation sizes ASize, AStubsSize, and GlobalDataSize "
|
||||
@@ -11761,9 +11769,13 @@ TranslatorX64::TranslatorX64()
|
||||
TRACE(1, "init atrampolines @%p\n", base);
|
||||
atrampolines.init(base, kTrampolinesBlockSize);
|
||||
base += kTrampolinesBlockSize;
|
||||
|
||||
m_unwindRegistrar = register_unwind_region(base, m_totalSize);
|
||||
TRACE(1, "init ahot @%p\n", base);
|
||||
ahot.init(base, kAHotSize);
|
||||
base += kAHotSize;
|
||||
TRACE(1, "init a @%p\n", base);
|
||||
a.init(base, kASize);
|
||||
m_unwindRegistrar = register_unwind_region(base, m_totalSize);
|
||||
base += kASize;
|
||||
TRACE(1, "init astubs @%p\n", base);
|
||||
astubs.init(base, kAStubsSize);
|
||||
@@ -11771,6 +11783,9 @@ TranslatorX64::TranslatorX64()
|
||||
TRACE(1, "init gdata @%p\n", base);
|
||||
m_globalData.init(base, kGDataSize);
|
||||
|
||||
// put the stubs into ahot, rather than a
|
||||
AHotSelector ahs(this, true);
|
||||
|
||||
// Emit some special helpers that are shared across translations.
|
||||
|
||||
// Emit a byte of padding. This is a kind of hacky way to
|
||||
@@ -12187,24 +12202,28 @@ size_t TranslatorX64::getTargetCacheSize() {
|
||||
|
||||
std::string TranslatorX64::getUsage() {
|
||||
std::string usage;
|
||||
size_t aHotUsage = ahot.code.frontier - ahot.code.base;
|
||||
size_t aUsage = a.code.frontier - a.code.base;
|
||||
size_t stubsUsage = astubs.code.frontier - astubs.code.base;
|
||||
size_t dataUsage = m_globalData.frontier - m_globalData.base;
|
||||
size_t tcUsage = TargetCache::s_frontier;
|
||||
Util::string_printf(usage,
|
||||
"tx64: %9zd bytes (%" PRId64 "%%) in a.code\n"
|
||||
"tx64: %9zd bytes (%" PRId64 "%%) in astubs.code\n"
|
||||
"tx64: %9zd bytes (%" PRId64 "%%) in a.code from ir\n"
|
||||
"tx64: %9zd bytes (%" PRId64 "%%) in astubs.code from ir\n"
|
||||
"tx64: %9zd bytes (%" PRId64 "%%) in m_globalData\n"
|
||||
"tx64: %9zd bytes (%" PRId64 "%%) in targetCache\n",
|
||||
aUsage, 100 * aUsage / a.code.size,
|
||||
stubsUsage, 100 * stubsUsage / astubs.code.size,
|
||||
m_irAUsage, 100 * m_irAUsage / a.code.size,
|
||||
m_irAstubsUsage, 100 * m_irAstubsUsage / astubs.code.size,
|
||||
dataUsage, 100 * dataUsage / m_globalData.size,
|
||||
tcUsage,
|
||||
100 * tcUsage / RuntimeOption::EvalJitTargetCacheSize);
|
||||
Util::string_printf(
|
||||
usage,
|
||||
"tx64: %9zd bytes (%" PRId64 "%%) in ahot.code\n"
|
||||
"tx64: %9zd bytes (%" PRId64 "%%) in a.code\n"
|
||||
"tx64: %9zd bytes (%" PRId64 "%%) in astubs.code\n"
|
||||
"tx64: %9zd bytes (%" PRId64 "%%) in a.code from ir\n"
|
||||
"tx64: %9zd bytes (%" PRId64 "%%) in astubs.code from ir\n"
|
||||
"tx64: %9zd bytes (%" PRId64 "%%) in m_globalData\n"
|
||||
"tx64: %9zd bytes (%" PRId64 "%%) in targetCache\n",
|
||||
aHotUsage, 100 * aHotUsage / ahot.code.size,
|
||||
aUsage, 100 * aUsage / a.code.size,
|
||||
stubsUsage, 100 * stubsUsage / astubs.code.size,
|
||||
m_irAUsage, 100 * m_irAUsage / a.code.size,
|
||||
m_irAstubsUsage, 100 * m_irAstubsUsage / astubs.code.size,
|
||||
dataUsage, 100 * dataUsage / m_globalData.size,
|
||||
tcUsage,
|
||||
100 * tcUsage / RuntimeOption::EvalJitTargetCacheSize);
|
||||
return usage;
|
||||
}
|
||||
|
||||
|
||||
@@ -128,6 +128,32 @@ class TranslatorX64 : public Translator
|
||||
typedef X64Assembler Asm;
|
||||
typedef std::map<int, int> ContParamMap;
|
||||
static const int kMaxInlineContLocals = 10;
|
||||
|
||||
class AHotSelector {
|
||||
public:
|
||||
AHotSelector(TranslatorX64* tx, bool hot) :
|
||||
m_tx(tx), m_hot(hot &&
|
||||
tx->ahot.code.base + tx->ahot.code.size -
|
||||
tx->ahot.code.frontier > 8192 &&
|
||||
tx->a.code.base != tx->ahot.code.base) {
|
||||
if (m_hot) {
|
||||
m_save = tx->a;
|
||||
tx->a = tx->ahot;
|
||||
}
|
||||
}
|
||||
~AHotSelector() {
|
||||
if (m_hot) {
|
||||
m_tx->ahot = m_tx->a;
|
||||
m_tx->a = m_save;
|
||||
}
|
||||
}
|
||||
private:
|
||||
TranslatorX64* m_tx;
|
||||
Asm m_save;
|
||||
bool m_hot;
|
||||
};
|
||||
|
||||
Asm ahot;
|
||||
Asm a;
|
||||
Asm astubs;
|
||||
Asm atrampolines;
|
||||
@@ -225,7 +251,7 @@ private:
|
||||
return m_regMap.getReg(dl.location);
|
||||
}
|
||||
|
||||
Asm& getAsmFor(TCA addr) { return asmChoose(addr, a, astubs); }
|
||||
Asm& getAsmFor(TCA addr) { return asmChoose(addr, a, ahot, astubs); }
|
||||
void emitIncRef(X64Assembler &a, PhysReg base, DataType dtype);
|
||||
void emitIncRef(PhysReg base, DataType);
|
||||
void emitIncRefGenericRegSafe(PhysReg base, int disp, PhysReg tmp);
|
||||
@@ -320,8 +346,7 @@ private:
|
||||
PhysReg scr);
|
||||
|
||||
inline bool isValidCodeAddress(TCA tca) const {
|
||||
return a.code.isValidAddress(tca) || astubs.code.isValidAddress(tca) ||
|
||||
atrampolines.code.isValidAddress(tca);
|
||||
return tca >= ahot.code.base && tca < astubs.code.base + astubs.code.size;
|
||||
}
|
||||
template<int Arity> TCA emitNAryStub(Asm& a, Call c);
|
||||
TCA emitUnaryStub(Asm& a, Call c);
|
||||
@@ -704,7 +729,6 @@ PSEUDOINSTRS
|
||||
void fixupWork(VMExecutionContext* ec, ActRec* startRbp) const;
|
||||
void fixup(VMExecutionContext* ec) const;
|
||||
TCA getTranslatedCaller() const;
|
||||
bool isCodeAddress(TCA) const;
|
||||
|
||||
// helpers for srcDB.
|
||||
SrcRec* getSrcRec(SrcKey sk) {
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário