Partition the tc into hot/cold sections

We already generate the data for this, so lets use it.
Esse commit está contido em:
mwilliams
2013-03-25 15:35:06 -07:00
commit de Sara Golemon
commit 57a5cb6be7
7 arquivos alterados com 94 adições e 35 exclusões
+8
Ver Arquivo
@@ -5309,6 +5309,14 @@ void EmitterVisitor::emitPostponedMeths() {
attrs = attrs | AttrMayUseVV;
}
auto fullName = p.m_meth->getOriginalFullName();
auto it = Option::FunctionSections.find(fullName);
if ((it != Option::FunctionSections.end() && it->second == "hot") ||
(RuntimeOption::EvalRandomHotFuncs &&
(hash_string_i(fullName.c_str()) & 8))) {
attrs = attrs | AttrHot;
}
if (Option::WholeProgram) {
if (!funcScope->isRedeclaring()) {
attrs = attrs | AttrUnique;
+3 -1
Ver Arquivo
@@ -397,7 +397,8 @@ EVALFLAGS();
std::set<string, stdltistr> RuntimeOption::DynamicInvokeFunctions;
bool RuntimeOption::RecordCodeCoverage = false;
std::string RuntimeOption::CodeCoverageOutputFile;
size_t RuntimeOption::VMTranslASize = 512 << 20;
size_t RuntimeOption::VMTranslAHotSize = 2 << 20;
size_t RuntimeOption::VMTranslASize = 510 << 20;
size_t RuntimeOption::VMTranslAStubsSize = 512 << 20;
size_t RuntimeOption::VMTranslGDataSize = RuntimeOption::VMTranslASize >> 2;
@@ -1153,6 +1154,7 @@ void RuntimeOption::Load(Hdf &config, StringVec *overwrites /* = NULL */,
}
if (RecordCodeCoverage) CheckSymLink = true;
CodeCoverageOutputFile = eval["CodeCoverageOutputFile"].getString();
VMTranslAHotSize = eval["JitAHotSize"].getUInt64(VMTranslAHotSize);
VMTranslASize = eval["JitASize"].getUInt64(VMTranslASize);
VMTranslAStubsSize = eval["JitAStubsSize"].getUInt64(VMTranslAStubsSize);
VMTranslGDataSize = eval["JitGlobalDataSize"].getUInt64(VMTranslGDataSize);
+2
Ver Arquivo
@@ -436,6 +436,7 @@ public:
F(bool, DumpTC, false) \
F(bool, DumpAst, false) \
F(bool, MapTCHuge, true) \
F(bool, RandomHotFuncs, false) \
F(uint32_t, ConstEstimate, 10000)
#define F(type, name, unused) \
@@ -449,6 +450,7 @@ public:
// TranslatorX64 allocation options
static size_t VMTranslASize;
static size_t VMTranslAHotSize;
static size_t VMTranslAStubsSize;
static size_t VMTranslGDataSize;
+2 -1
Ver Arquivo
@@ -103,7 +103,8 @@ enum Attr {
AttrVariadicByRef = (1 << 15), // X //
AttrMayUseVV = (1 << 16), // X //
AttrPersistent= (1 << 17), // X X //
AttrDeepInit = (1 << 18) // X
AttrDeepInit = (1 << 18), // X //
AttrHot = (1 << 19), // X //
};
static inline Attr operator|(Attr a, Attr b) { return Attr((int)a | (int)b); }
+3
Ver Arquivo
@@ -463,6 +463,9 @@ void Func::prettyPrint(std::ostream& out) const {
} else {
out << "Function " << m_name->data();
}
if (m_attrs & AttrHot) out << " (hot)";
out << " at " << base();
if (shared()->m_id != -1) {
out << " (ID " << shared()->m_id << ")";
+48 -29
Ver Arquivo
@@ -1388,6 +1388,7 @@ TranslatorX64::createTranslation(SrcKey sk, bool align,
// We put retranslate requests at the end of our slab to more frequently
// allow conditional jump fall-throughs
AHotSelector ahs(this, curFunc()->attrs() & AttrHot);
TCA astart = a.code.frontier;
TCA stubstart = astubs.code.frontier;
@@ -1437,6 +1438,8 @@ TranslatorX64::translate(SrcKey sk, bool align, bool allowIR) {
assert(m_useHHIR == false);
}
AHotSelector ahs(this, curFunc()->attrs() & AttrHot);
if (align) {
moveToAlign(a, kNonFallthroughAlign);
}
@@ -1545,12 +1548,16 @@ TranslatorX64::smash(X64Assembler &a, TCA src, TCA dest, bool isCall) {
}
void TranslatorX64::protectCode() {
mprotect(tx64->a.code.base, tx64->a.code.size, PROT_READ | PROT_EXEC);
mprotect(tx64->ahot.code.base,
tx64->astubs.code.base - tx64->ahot.code.base +
tx64->astubs.code.size, PROT_READ | PROT_EXEC);
}
void TranslatorX64::unprotectCode() {
mprotect(tx64->a.code.base, tx64->a.code.size,
mprotect(tx64->ahot.code.base,
tx64->astubs.code.base - tx64->ahot.code.base +
tx64->astubs.code.size,
PROT_READ | PROT_WRITE | PROT_EXEC);
}
@@ -2090,6 +2097,8 @@ TranslatorX64::funcPrologue(Func* func, int nPassed, ActRec* ar) {
// in case another thread snuck in and set the prologue already.
if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
AHotSelector ahs(this, func->attrs() & AttrHot);
SpaceRecorder sr("_FuncPrologue", a);
// If we're close to a cache line boundary, just burn some space to
// try to keep the func and its body on fewer total lines.
@@ -2693,7 +2702,7 @@ TranslatorX64::bindJmpccFirst(TCA toSmash,
Asm &as = getAsmFor(toSmash);
// Its not clear where chainFrom should go to if as is astubs
assert(&as == &a);
assert(&as != &astubs);
// can we just directly fall through?
// a jmp + jz takes 5 + 6 = 11 bytes
@@ -2721,7 +2730,7 @@ TranslatorX64::bindJmpccFirst(TCA toSmash,
* toSmash+11: newHotness
*/
CodeCursor cg(as, toSmash);
a.jcc(cc, stub);
as.jcc(cc, stub);
getSrcRec(dest)->chainFrom(as, IncomingBranch(as.code.frontier));
TRACE(5, "bindJmpccFirst: overwrote with cc%02x taken %d\n", cc, taken);
return tDest;
@@ -4144,19 +4153,13 @@ TCA TranslatorX64::getTranslatedCaller() const {
ActRec* framePtr = fp; // can't directly mutate the register-mapped one
for (; framePtr; framePtr = (ActRec*)framePtr->m_savedRbp) {
TCA rip = (TCA)framePtr->m_savedRip;
if (isCodeAddress(rip)) {
if (isValidCodeAddress(rip)) {
return rip;
}
}
return nullptr;
}
bool TranslatorX64::isCodeAddress(TCA addr) const {
return a.code.isValidAddress(addr) ||
astubs.code.isValidAddress(addr) ||
atrampolines.code.isValidAddress(addr);
}
void
TranslatorX64::syncWork() {
assert(tl_regState == REGSTATE_DIRTY);
@@ -11413,14 +11416,16 @@ TranslatorX64::translateTracelet(SrcKey sk, bool considerHHIR/*=true*/,
SKTRACE(1, sk, "translateTracelet\n");
assert(m_srcDB.find(sk));
assert(m_regMap.pristine());
TCA start = a.code.frontier;
TCA stubStart = astubs.code.frontier;
TCA counterStart = 0;
uint8_t counterLen = 0;
uint8_t counterLen = 0;
SrcRec& srcRec = *getSrcRec(sk);
vector<TransBCMapping> bcMapping;
TransKind transKind = TransNormal;
if (m_useHHIR) {
TranslateTraceletResult result;
do {
@@ -11703,15 +11708,18 @@ TranslatorX64::TranslatorX64()
m_curFunc(nullptr),
m_vecState(nullptr)
{
const size_t kAHotSize = RuntimeOption::VMTranslAHotSize;
const size_t kASize = RuntimeOption::VMTranslASize;
const size_t kAStubsSize = RuntimeOption::VMTranslAStubsSize;
const size_t kGDataSize = RuntimeOption::VMTranslGDataSize;
m_totalSize = kASize + kAStubsSize + kTrampolinesBlockSize + kGDataSize;
m_totalSize = kAHotSize + kASize + kAStubsSize +
kTrampolinesBlockSize + kGDataSize;
TRACE(1, "TranslatorX64@%p startup\n", this);
tx64 = this;
if ((kASize < (10 << 20)) ||
if ((kAHotSize < (2 << 20)) ||
(kASize < (10 << 20)) ||
(kAStubsSize < (10 << 20)) ||
(kGDataSize < (2 << 20))) {
fprintf(stderr, "Allocation sizes ASize, AStubsSize, and GlobalDataSize "
@@ -11761,9 +11769,13 @@ TranslatorX64::TranslatorX64()
TRACE(1, "init atrampolines @%p\n", base);
atrampolines.init(base, kTrampolinesBlockSize);
base += kTrampolinesBlockSize;
m_unwindRegistrar = register_unwind_region(base, m_totalSize);
TRACE(1, "init ahot @%p\n", base);
ahot.init(base, kAHotSize);
base += kAHotSize;
TRACE(1, "init a @%p\n", base);
a.init(base, kASize);
m_unwindRegistrar = register_unwind_region(base, m_totalSize);
base += kASize;
TRACE(1, "init astubs @%p\n", base);
astubs.init(base, kAStubsSize);
@@ -11771,6 +11783,9 @@ TranslatorX64::TranslatorX64()
TRACE(1, "init gdata @%p\n", base);
m_globalData.init(base, kGDataSize);
// put the stubs into ahot, rather than a
AHotSelector ahs(this, true);
// Emit some special helpers that are shared across translations.
// Emit a byte of padding. This is a kind of hacky way to
@@ -12187,24 +12202,28 @@ size_t TranslatorX64::getTargetCacheSize() {
std::string TranslatorX64::getUsage() {
std::string usage;
size_t aHotUsage = ahot.code.frontier - ahot.code.base;
size_t aUsage = a.code.frontier - a.code.base;
size_t stubsUsage = astubs.code.frontier - astubs.code.base;
size_t dataUsage = m_globalData.frontier - m_globalData.base;
size_t tcUsage = TargetCache::s_frontier;
Util::string_printf(usage,
"tx64: %9zd bytes (%" PRId64 "%%) in a.code\n"
"tx64: %9zd bytes (%" PRId64 "%%) in astubs.code\n"
"tx64: %9zd bytes (%" PRId64 "%%) in a.code from ir\n"
"tx64: %9zd bytes (%" PRId64 "%%) in astubs.code from ir\n"
"tx64: %9zd bytes (%" PRId64 "%%) in m_globalData\n"
"tx64: %9zd bytes (%" PRId64 "%%) in targetCache\n",
aUsage, 100 * aUsage / a.code.size,
stubsUsage, 100 * stubsUsage / astubs.code.size,
m_irAUsage, 100 * m_irAUsage / a.code.size,
m_irAstubsUsage, 100 * m_irAstubsUsage / astubs.code.size,
dataUsage, 100 * dataUsage / m_globalData.size,
tcUsage,
100 * tcUsage / RuntimeOption::EvalJitTargetCacheSize);
Util::string_printf(
usage,
"tx64: %9zd bytes (%" PRId64 "%%) in ahot.code\n"
"tx64: %9zd bytes (%" PRId64 "%%) in a.code\n"
"tx64: %9zd bytes (%" PRId64 "%%) in astubs.code\n"
"tx64: %9zd bytes (%" PRId64 "%%) in a.code from ir\n"
"tx64: %9zd bytes (%" PRId64 "%%) in astubs.code from ir\n"
"tx64: %9zd bytes (%" PRId64 "%%) in m_globalData\n"
"tx64: %9zd bytes (%" PRId64 "%%) in targetCache\n",
aHotUsage, 100 * aHotUsage / ahot.code.size,
aUsage, 100 * aUsage / a.code.size,
stubsUsage, 100 * stubsUsage / astubs.code.size,
m_irAUsage, 100 * m_irAUsage / a.code.size,
m_irAstubsUsage, 100 * m_irAstubsUsage / astubs.code.size,
dataUsage, 100 * dataUsage / m_globalData.size,
tcUsage,
100 * tcUsage / RuntimeOption::EvalJitTargetCacheSize);
return usage;
}
+28 -4
Ver Arquivo
@@ -128,6 +128,32 @@ class TranslatorX64 : public Translator
typedef X64Assembler Asm;
typedef std::map<int, int> ContParamMap;
static const int kMaxInlineContLocals = 10;
class AHotSelector {
public:
AHotSelector(TranslatorX64* tx, bool hot) :
m_tx(tx), m_hot(hot &&
tx->ahot.code.base + tx->ahot.code.size -
tx->ahot.code.frontier > 8192 &&
tx->a.code.base != tx->ahot.code.base) {
if (m_hot) {
m_save = tx->a;
tx->a = tx->ahot;
}
}
~AHotSelector() {
if (m_hot) {
m_tx->ahot = m_tx->a;
m_tx->a = m_save;
}
}
private:
TranslatorX64* m_tx;
Asm m_save;
bool m_hot;
};
Asm ahot;
Asm a;
Asm astubs;
Asm atrampolines;
@@ -225,7 +251,7 @@ private:
return m_regMap.getReg(dl.location);
}
Asm& getAsmFor(TCA addr) { return asmChoose(addr, a, astubs); }
Asm& getAsmFor(TCA addr) { return asmChoose(addr, a, ahot, astubs); }
void emitIncRef(X64Assembler &a, PhysReg base, DataType dtype);
void emitIncRef(PhysReg base, DataType);
void emitIncRefGenericRegSafe(PhysReg base, int disp, PhysReg tmp);
@@ -320,8 +346,7 @@ private:
PhysReg scr);
inline bool isValidCodeAddress(TCA tca) const {
return a.code.isValidAddress(tca) || astubs.code.isValidAddress(tca) ||
atrampolines.code.isValidAddress(tca);
return tca >= ahot.code.base && tca < astubs.code.base + astubs.code.size;
}
template<int Arity> TCA emitNAryStub(Asm& a, Call c);
TCA emitUnaryStub(Asm& a, Call c);
@@ -704,7 +729,6 @@ PSEUDOINSTRS
void fixupWork(VMExecutionContext* ec, ActRec* startRbp) const;
void fixup(VMExecutionContext* ec) const;
TCA getTranslatedCaller() const;
bool isCodeAddress(TCA) const;
// helpers for srcDB.
SrcRec* getSrcRec(SrcKey sk) {