Profile vector instruction shapes
This adds a new profiling mode for vector instruction shapes. I'm planning on using this to identify any common cases that may be worth special casing, like we do for simple SetM, CGetM, and IssetM instructions. Instead of adding Yet Another Hashtable Stats Map, I created a generic version and changed TRACE=punt:1 to use it as well. I also changed emitInterpOneOrPunt to use a more specific name.
Esse commit está contido em:
@@ -157,6 +157,7 @@ int compiler_main(int argc, char **argv) {
|
||||
try {
|
||||
Hdf empty;
|
||||
RuntimeOption::Load(empty);
|
||||
VM::initialize_repo();
|
||||
|
||||
CompilerOptions po;
|
||||
#ifdef FACEBOOK
|
||||
|
||||
@@ -1151,6 +1151,10 @@ IncStat S0:ConstInt S1:ConstInt S2:ConstBool
|
||||
'force' flag. This opcode becomes a noop iff (force == false and runtime
|
||||
stats are not enabled) at translation time.
|
||||
|
||||
IncStatGrouped S0:ConstStr S1:ConstStr S2:ConstInt
|
||||
|
||||
Adds the value S2 to the counter named S1, in the category S0.
|
||||
|
||||
DbgAssertRefCount S0:{Counted|StaticStr|StaticArr}
|
||||
|
||||
Assert that S0 has a valid refcount. S0 must be a type with a valid
|
||||
|
||||
@@ -819,6 +819,11 @@ int instrNumPushes(const Opcode* opcode);
|
||||
StackTransInfo instrStackTransInfo(const Opcode* opcode);
|
||||
int instrSpToArDelta(const Opcode* opcode);
|
||||
|
||||
inline bool
|
||||
mcodeIsLiteral(MemberCode mcode) {
|
||||
return mcode == MET || mcode == MEI || mcode == MPT;
|
||||
}
|
||||
|
||||
inline bool
|
||||
mcodeMaybePropName(MemberCode mcode) {
|
||||
return mcode == MPC || mcode == MPL || mcode == MPT;
|
||||
|
||||
@@ -36,6 +36,9 @@ const char* g_counterNames[] = {
|
||||
__thread uint64_t tl_counters[kNumStatCounters];
|
||||
__thread uint64_t tl_helper_counters[kMaxNumTrampolines];
|
||||
|
||||
typedef hphp_const_char_map<hphp_const_char_map<uint64_t>> StatGroupMap;
|
||||
__thread StatGroupMap* tl_stat_groups = nullptr;
|
||||
|
||||
// Only the thread holding the write lease will set the entries in the
|
||||
// helperNames array but other threads may concurrently read these
|
||||
// entries, so each entry is volatile (or an atomic type per the new
|
||||
@@ -72,31 +75,75 @@ void emitIncTranslOp(X64Assembler& a, Opcode opc, bool force) {
|
||||
Transl::CC_None, force);
|
||||
}
|
||||
|
||||
void init() {
|
||||
if (!enabledAny()) return;
|
||||
assert(tl_stat_groups == nullptr);
|
||||
tl_stat_groups = new StatGroupMap();
|
||||
}
|
||||
|
||||
static __thread int64_t epoch;
|
||||
void dump() {
|
||||
if (!enabled()) return;
|
||||
TRACE(1, "STATS %ld %s\n", epoch, g_context->getRequestUrl(50).c_str());
|
||||
if (!enabledAny()) return;
|
||||
auto url = g_context->getRequestUrl(50);
|
||||
TRACE(0, "STATS %ld %s\n", epoch, url.c_str());
|
||||
#include "runtime/vm/stats-opcodeDef.h"
|
||||
#define STAT(s) \
|
||||
if (!tl_counters[s]) {} else \
|
||||
TRACE(1, "STAT %-50s %15" PRId64 "\n", #s, tl_counters[s]);
|
||||
TRACE(0, "STAT %-50s %15" PRId64 "\n", #s, tl_counters[s]);
|
||||
STATS
|
||||
#undef STAT
|
||||
#undef O
|
||||
for (int i=0; helperNames[i]; i++) {
|
||||
if (tl_helper_counters[i]) {
|
||||
TRACE(1, "STAT %-50s %15ld\n",
|
||||
TRACE(0, "STAT %-50s %15ld\n",
|
||||
helperNames[i],
|
||||
tl_helper_counters[i]);
|
||||
}
|
||||
}
|
||||
|
||||
typedef std::pair<const char*, uint64_t> StatPair;
|
||||
for (auto const& group : *tl_stat_groups) {
|
||||
std::ostringstream stats;
|
||||
auto const& map = group.second;
|
||||
uint64_t total = 0, accum = 0;;
|
||||
|
||||
std::vector<StatPair> rows(map.begin(), map.end());
|
||||
std::for_each(rows.begin(), rows.end(),
|
||||
[&](const StatPair& p) { total += p.second; });
|
||||
auto gt = [](const StatPair& a, const StatPair& b) {
|
||||
return a.second > b.second;
|
||||
};
|
||||
std::sort(rows.begin(), rows.end(), gt);
|
||||
|
||||
stats << folly::format("{:-^80}\n",
|
||||
folly::format(" group {} ",
|
||||
group.first, url))
|
||||
<< folly::format("{:>45} {:>9} {:>8} {:>8}\n",
|
||||
"name", "count", "% total", "accum %");
|
||||
for (auto const& row : rows) {
|
||||
accum += row.second;
|
||||
stats << folly::format("{:>45} {} {:9} {:8.2%} {:8.2%}\n",
|
||||
row.first, ':', row.second,
|
||||
(double)row.second / total, (double)accum / total);
|
||||
}
|
||||
FTRACE(0, "{}\n", stats.str());
|
||||
}
|
||||
}
|
||||
|
||||
void clear() {
|
||||
if (!RuntimeOption::EnableInstructionCounts && !enabled()) return;
|
||||
if (!RuntimeOption::EnableInstructionCounts && !enabledAny()) return;
|
||||
++epoch;
|
||||
memset(&tl_counters[0], 0, sizeof(tl_counters));
|
||||
memset(&tl_helper_counters[0], 0, sizeof(tl_helper_counters));
|
||||
|
||||
assert(tl_stat_groups);
|
||||
delete tl_stat_groups;
|
||||
tl_stat_groups = nullptr;
|
||||
}
|
||||
|
||||
void incStatGrouped(const StringData* category, const StringData* name, int n) {
|
||||
assert(tl_stat_groups);
|
||||
(*tl_stat_groups)[category->data()][name->data()] += n;
|
||||
}
|
||||
|
||||
} } }
|
||||
|
||||
@@ -199,6 +199,10 @@ static inline bool enabled() {
|
||||
return Trace::moduleEnabled(Trace::stats, 1);
|
||||
}
|
||||
|
||||
static inline bool enabledAny() {
|
||||
return enabled() || Trace::moduleEnabled(Trace::statgroups);
|
||||
}
|
||||
|
||||
static inline bool enableInstrCount() {
|
||||
return Trace::moduleEnabled(Trace::stats, 2);
|
||||
}
|
||||
@@ -248,9 +252,12 @@ inline void emitInc(Transl::X64Assembler& a, StatCounter stat, int n = 1,
|
||||
|
||||
extern void emitIncTranslOp(Transl::X64Assembler& a, Opcode opc,
|
||||
bool force = false);
|
||||
extern void init();
|
||||
extern void dump();
|
||||
extern void clear();
|
||||
|
||||
void incStatGrouped(const StringData* cat, const StringData* name, int n = 1);
|
||||
|
||||
} } }
|
||||
|
||||
#endif
|
||||
|
||||
@@ -313,16 +313,17 @@ CALL_OPCODE(PrintStr)
|
||||
CALL_OPCODE(PrintInt)
|
||||
CALL_OPCODE(PrintBool)
|
||||
CALL_OPCODE(DbgAssertPtr)
|
||||
CALL_OPCODE(LdSwitchDblIndex);
|
||||
CALL_OPCODE(LdSwitchStrIndex);
|
||||
CALL_OPCODE(LdSwitchObjIndex);
|
||||
CALL_OPCODE(VerifyParamCallable);
|
||||
CALL_OPCODE(VerifyParamFail);
|
||||
CALL_OPCODE(LdSwitchDblIndex)
|
||||
CALL_OPCODE(LdSwitchStrIndex)
|
||||
CALL_OPCODE(LdSwitchObjIndex)
|
||||
CALL_OPCODE(VerifyParamCallable)
|
||||
CALL_OPCODE(VerifyParamFail)
|
||||
CALL_OPCODE(RaiseUninitLoc)
|
||||
CALL_OPCODE(WarnNonObjProp)
|
||||
CALL_OPCODE(ThrowNonObjProp)
|
||||
CALL_OPCODE(RaiseUndefProp)
|
||||
CALL_OPCODE(RaiseError)
|
||||
CALL_OPCODE(IncStatGrouped)
|
||||
|
||||
// Vector instruction helpers
|
||||
CALL_OPCODE(BaseG)
|
||||
|
||||
@@ -2423,7 +2423,10 @@ void HhbcTranslator::emitInterpOneOrPunt(Type type,
|
||||
int numDiscard, /* = 0 */
|
||||
Trace* target /* = NULL */) {
|
||||
if (RuntimeOption::EvalIRPuntDontInterp) {
|
||||
PUNT(PuntDontInterp);
|
||||
Op op = *(Op*)(getCurUnit()->entry() + m_bcOff);
|
||||
const char* name = StringData::GetStaticString(
|
||||
std::string("PuntDontInterp-") + opcodeToName(op))->data();
|
||||
SPUNT(name);
|
||||
} else {
|
||||
emitInterpOne(type, numDiscard, target);
|
||||
}
|
||||
|
||||
@@ -383,6 +383,7 @@ private:
|
||||
void emitMPre();
|
||||
void emitFinalMOp();
|
||||
void emitMPost();
|
||||
void emitMTrace();
|
||||
|
||||
// Bases
|
||||
void emitBaseOp();
|
||||
|
||||
@@ -515,6 +515,7 @@ O(EmptyElem, D(Bool), C(TCA) \
|
||||
S(Gen) \
|
||||
S(PtrToCell), E|N|Mem|Refs|Er) \
|
||||
O(IncStat, ND, C(Int) C(Int) C(Bool), E|Mem) \
|
||||
O(IncStatGrouped, ND, CStr CStr C(Int), E|N|Mem) \
|
||||
O(DbgAssertRefCount, ND, SUnk, N|E) \
|
||||
O(DbgAssertPtr, ND, S(PtrToGen), N|E) \
|
||||
O(Nop, ND, NA, NF) \
|
||||
|
||||
@@ -1071,7 +1071,7 @@ TranslatorX64::irTranslateFPassV(const Tracelet& t,
|
||||
|
||||
void
|
||||
TranslatorX64::irTranslateFPassR(const Tracelet& t,
|
||||
const NormalizedInstruction& i) {
|
||||
const NormalizedInstruction& i) {
|
||||
/*
|
||||
* Like FPassC, FPassR is able to cheat on boxing if the current
|
||||
* parameter is pass by reference but we have a cell: the box would refer
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "runtime/vm/translator/hopt/nativecalls.h"
|
||||
|
||||
#include "runtime/vm/runtime.h"
|
||||
#include "runtime/vm/stats.h"
|
||||
#include "runtime/vm/translator/targetcache.h"
|
||||
#include "runtime/vm/translator/translator-runtime.h"
|
||||
#include "runtime/vm/translator/hopt/ir.h"
|
||||
@@ -87,6 +88,8 @@ static CallMap s_callMap({
|
||||
{RaiseUndefProp, (TCA)raiseUndefProp, DNone, SSync,
|
||||
{{SSA, 0}, {SSA, 1}}},
|
||||
{RaiseError, (TCA)raise_error_sd, DNone, SSync, {{SSA, 0}}},
|
||||
{IncStatGrouped, (TCA)Stats::incStatGrouped, DNone, SNone,
|
||||
{{SSA, 0}, {SSA, 1}, {SSA, 2}}},
|
||||
|
||||
/* Switch helpers */
|
||||
{LdSwitchDblIndex, (TCA)switchDoubleHelper, DSSA, SSync,
|
||||
|
||||
@@ -355,6 +355,10 @@ void HhbcTranslator::VectorTranslator::checkMIState() {
|
||||
void HhbcTranslator::VectorTranslator::emitMPre() {
|
||||
checkMIState();
|
||||
|
||||
if (HPHP::Trace::moduleEnabled(HPHP::Trace::minstr, 1)) {
|
||||
emitMTrace();
|
||||
}
|
||||
|
||||
if (m_needMIS) {
|
||||
m_misBase = m_tb.gen(DefMIStateBase);
|
||||
SSATmp* uninit = m_tb.genDefUninit();
|
||||
@@ -382,6 +386,41 @@ void HhbcTranslator::VectorTranslator::emitMPre() {
|
||||
}
|
||||
}
|
||||
|
||||
void HhbcTranslator::VectorTranslator::emitMTrace() {
|
||||
auto rttStr = [this](int i) {
|
||||
return Type::fromRuntimeType(m_ni.inputs[i]->rtt).unbox().toString();
|
||||
};
|
||||
std::ostringstream shape;
|
||||
int iInd = m_mii.valCount();
|
||||
const char* separator = "";
|
||||
|
||||
shape << opcodeToName(m_ni.mInstrOp()) << " <";
|
||||
auto baseLoc = m_ni.immVec.locationCode();
|
||||
shape << folly::format("{}:{} ", locationCodeString(baseLoc), rttStr(iInd));
|
||||
++iInd;
|
||||
|
||||
for (int mInd = 0; mInd < m_ni.immVecM.size(); ++mInd) {
|
||||
auto mcode = m_ni.immVecM[mInd];
|
||||
shape << separator;
|
||||
if (mcode == MW) {
|
||||
shape << "MW";
|
||||
} else if (mcodeMaybeArrayKey(mcode)) {
|
||||
shape << "ME:" << rttStr(iInd);
|
||||
} else if (mcodeMaybePropName(mcode)) {
|
||||
shape << "MP:" << rttStr(iInd);
|
||||
} else {
|
||||
not_reached();
|
||||
}
|
||||
if (mcode != MW) ++iInd;
|
||||
separator = " ";
|
||||
}
|
||||
shape << '>';
|
||||
m_tb.gen(IncStatGrouped,
|
||||
cns(StringData::GetStaticString("vector instructions")),
|
||||
cns(StringData::GetStaticString(shape.str())),
|
||||
cns(1));
|
||||
}
|
||||
|
||||
// Build a map from (stack) input index to stack index.
|
||||
void HhbcTranslator::VectorTranslator::numberStackInputs() {
|
||||
// Stack inputs are pushed in the order they appear in the vector
|
||||
|
||||
@@ -149,52 +149,6 @@ __thread VMRegState tl_regState = REGSTATE_CLEAN;
|
||||
__thread JmpHitMap* tl_unlikelyHits = nullptr;
|
||||
__thread JmpHitMap* tl_jccHits = nullptr;
|
||||
|
||||
namespace {
|
||||
typedef hphp_hash_map<litstr, int64_t, pointer_hash<const char>> PuntMap;
|
||||
__thread PuntMap* tl_puntCounts = nullptr;
|
||||
|
||||
void recordPunt(litstr key) {
|
||||
assert(Trace::moduleEnabled(Trace::punt, 1));
|
||||
assert(tl_puntCounts);
|
||||
(*tl_puntCounts)[key]++;
|
||||
}
|
||||
|
||||
void initPuntCounts() {
|
||||
if (!Trace::moduleEnabled(Trace::punt, 1)) return;
|
||||
assert(!tl_puntCounts);
|
||||
tl_puntCounts = new PuntMap();
|
||||
}
|
||||
|
||||
void dumpPuntCounts() {
|
||||
if (!Trace::moduleEnabled(Trace::punt, 1)) return;
|
||||
assert(tl_puntCounts);
|
||||
TRACE_SET_MOD(punt);
|
||||
|
||||
int64_t total = 0;
|
||||
std::map<int64_t, litstr> sortedPunts;
|
||||
for (auto const& pair : *tl_puntCounts) {
|
||||
sortedPunts[pair.second] = pair.first;
|
||||
total += pair.second;
|
||||
}
|
||||
|
||||
TRACE(1, "-------------------- hhir punts for %s --------------------\n",
|
||||
g_context->getRequestUrl(50).c_str());
|
||||
TRACE(1, "%30s %9s %9s %9s\n",
|
||||
"name", "count", "% total", "accum %");
|
||||
int64_t accum = 0;
|
||||
for (auto const& pair : boost::adaptors::reverse(sortedPunts)) {
|
||||
accum += pair.first;
|
||||
TRACE(1, "%30s : %9ld %8.2f%% %8.2f%%\n",
|
||||
pair.second, pair.first,
|
||||
100.0 * pair.first / total, 100.0 * accum / total);
|
||||
}
|
||||
TRACE(1, "\n");
|
||||
|
||||
delete tl_puntCounts;
|
||||
tl_puntCounts = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
static StaticString s___call(LITSTR_INIT("__call"));
|
||||
static StaticString s___callStatic(LITSTR_INIT("__callStatic"));
|
||||
|
||||
@@ -2937,8 +2891,10 @@ void TranslatorX64::emitReqRetransNoIR(Asm& as, SrcKey& sk) {
|
||||
|
||||
void TranslatorX64::emitRecordPunt(Asm& a, const std::string& name) {
|
||||
PhysRegSaver regs(a, kAllX64Regs);
|
||||
a. movq (StringData::GetStaticString(name)->data(), rdi);
|
||||
a. call ((TCA)recordPunt);
|
||||
a. movq (StringData::GetStaticString("hhir punts"), rdi);
|
||||
a. movq (StringData::GetStaticString(name), rsi);
|
||||
a. movq (1, rdx);
|
||||
a. call ((TCA)Stats::incStatGrouped);
|
||||
}
|
||||
|
||||
uint64_t TranslatorX64::packBitVec(const vector<bool>& bits, unsigned i) {
|
||||
@@ -12051,7 +12007,7 @@ TranslatorX64::requestInit() {
|
||||
Treadmill::startRequest(g_vmContext->m_currentThreadIdx);
|
||||
memset(&s_perfCounters, 0, sizeof(s_perfCounters));
|
||||
initJmpProfile();
|
||||
initPuntCounts();
|
||||
Stats::init();
|
||||
}
|
||||
|
||||
void
|
||||
@@ -12069,7 +12025,6 @@ TranslatorX64::requestExit() {
|
||||
Stats::dump();
|
||||
Stats::clear();
|
||||
dumpJmpProfile();
|
||||
dumpPuntCounts();
|
||||
|
||||
if (Trace::moduleEnabledRelease(Trace::tx64stats, 1)) {
|
||||
Trace::traceRelease("TranslatorX64 perf counters for %s:\n",
|
||||
|
||||
@@ -84,6 +84,9 @@ class Init {
|
||||
if (mod >= 0) {
|
||||
levels[mod] = level;
|
||||
}
|
||||
if (mod == Trace::minstr || mod == Trace::punt) {
|
||||
levels[Trace::statgroups] = std::max(levels[Trace::statgroups], 1);
|
||||
}
|
||||
}
|
||||
free(e);
|
||||
} else {
|
||||
|
||||
@@ -86,6 +86,8 @@ namespace Trace {
|
||||
TM(instancebits)\
|
||||
TM(hhas) \
|
||||
TM(punt) \
|
||||
TM(statgroups) \
|
||||
TM(minstr) \
|
||||
/* Stress categories, to exercise rare paths */ \
|
||||
TM(stress_txInterpPct) \
|
||||
TM(stress_txInterpSeed) \
|
||||
|
||||
Referência em uma Nova Issue
Bloquear um usuário