From b8ebb4842134991072bf9cbbedeb297877698e15 Mon Sep 17 00:00:00 2001 From: bsimmers Date: Tue, 26 Mar 2013 22:50:55 -0700 Subject: [PATCH] Profile vector instruction shapes This adds a new profiling mode for vector instruction shapes. I'm planning on using this to identify any common cases that may be worth special casing, like we do for simple SetM, CGetM, and IssetM instructions. Instead of adding Yet Another Hashtable Stats Map, I created a generic version and changed TRACE=punt:1 to use it as well. I also changed emitInterpOneOrPunt to use a more specific name. --- hphp/compiler/compiler.cpp | 1 + hphp/doc/ir.specification | 4 ++ hphp/runtime/vm/hhbc.h | 5 ++ hphp/runtime/vm/stats.cpp | 57 +++++++++++++++++-- hphp/runtime/vm/stats.h | 7 +++ hphp/runtime/vm/translator/hopt/codegen.cpp | 11 ++-- .../vm/translator/hopt/hhbctranslator.cpp | 5 +- .../vm/translator/hopt/hhbctranslator.h | 1 + hphp/runtime/vm/translator/hopt/ir.h | 1 + .../vm/translator/hopt/irtranslator.cpp | 2 +- .../vm/translator/hopt/nativecalls.cpp | 3 + .../vm/translator/hopt/vectortranslator.cpp | 39 +++++++++++++ hphp/runtime/vm/translator/translator-x64.cpp | 55 ++---------------- hphp/util/trace.cpp | 3 + hphp/util/trace.h | 2 + 15 files changed, 134 insertions(+), 62 deletions(-) diff --git a/hphp/compiler/compiler.cpp b/hphp/compiler/compiler.cpp index b858b4962..361fb36a7 100644 --- a/hphp/compiler/compiler.cpp +++ b/hphp/compiler/compiler.cpp @@ -157,6 +157,7 @@ int compiler_main(int argc, char **argv) { try { Hdf empty; RuntimeOption::Load(empty); + VM::initialize_repo(); CompilerOptions po; #ifdef FACEBOOK diff --git a/hphp/doc/ir.specification b/hphp/doc/ir.specification index 6cee6c189..8e6b5386d 100644 --- a/hphp/doc/ir.specification +++ b/hphp/doc/ir.specification @@ -1151,6 +1151,10 @@ IncStat S0:ConstInt S1:ConstInt S2:ConstBool 'force' flag. This opcode becomes a noop iff (force == false and runtime stats are not enabled) at translation time. +IncStatGrouped S0:ConstStr S1:ConstStr S2:ConstInt + + Adds the value S2 to the counter named S1, in the category S0. + DbgAssertRefCount S0:{Counted|StaticStr|StaticArr} Assert that S0 has a valid refcount. S0 must be a type with a valid diff --git a/hphp/runtime/vm/hhbc.h b/hphp/runtime/vm/hhbc.h index 880b06807..e7d5223cd 100644 --- a/hphp/runtime/vm/hhbc.h +++ b/hphp/runtime/vm/hhbc.h @@ -819,6 +819,11 @@ int instrNumPushes(const Opcode* opcode); StackTransInfo instrStackTransInfo(const Opcode* opcode); int instrSpToArDelta(const Opcode* opcode); +inline bool +mcodeIsLiteral(MemberCode mcode) { + return mcode == MET || mcode == MEI || mcode == MPT; +} + inline bool mcodeMaybePropName(MemberCode mcode) { return mcode == MPC || mcode == MPL || mcode == MPT; diff --git a/hphp/runtime/vm/stats.cpp b/hphp/runtime/vm/stats.cpp index de004a999..c9a34717d 100644 --- a/hphp/runtime/vm/stats.cpp +++ b/hphp/runtime/vm/stats.cpp @@ -36,6 +36,9 @@ const char* g_counterNames[] = { __thread uint64_t tl_counters[kNumStatCounters]; __thread uint64_t tl_helper_counters[kMaxNumTrampolines]; +typedef hphp_const_char_map> StatGroupMap; +__thread StatGroupMap* tl_stat_groups = nullptr; + // Only the thread holding the write lease will set the entries in the // helperNames array but other threads may concurrently read these // entries, so each entry is volatile (or an atomic type per the new @@ -72,31 +75,75 @@ void emitIncTranslOp(X64Assembler& a, Opcode opc, bool force) { Transl::CC_None, force); } +void init() { + if (!enabledAny()) return; + assert(tl_stat_groups == nullptr); + tl_stat_groups = new StatGroupMap(); +} + static __thread int64_t epoch; void dump() { - if (!enabled()) return; - TRACE(1, "STATS %ld %s\n", epoch, g_context->getRequestUrl(50).c_str()); + if (!enabledAny()) return; + auto url = g_context->getRequestUrl(50); + TRACE(0, "STATS %ld %s\n", epoch, url.c_str()); #include "runtime/vm/stats-opcodeDef.h" #define STAT(s) \ if (!tl_counters[s]) {} else \ - TRACE(1, "STAT %-50s %15" PRId64 "\n", #s, tl_counters[s]); + TRACE(0, "STAT %-50s %15" PRId64 "\n", #s, tl_counters[s]); STATS #undef STAT #undef O for (int i=0; helperNames[i]; i++) { if (tl_helper_counters[i]) { - TRACE(1, "STAT %-50s %15ld\n", + TRACE(0, "STAT %-50s %15ld\n", helperNames[i], tl_helper_counters[i]); } } + + typedef std::pair StatPair; + for (auto const& group : *tl_stat_groups) { + std::ostringstream stats; + auto const& map = group.second; + uint64_t total = 0, accum = 0;; + + std::vector rows(map.begin(), map.end()); + std::for_each(rows.begin(), rows.end(), + [&](const StatPair& p) { total += p.second; }); + auto gt = [](const StatPair& a, const StatPair& b) { + return a.second > b.second; + }; + std::sort(rows.begin(), rows.end(), gt); + + stats << folly::format("{:-^80}\n", + folly::format(" group {} ", + group.first, url)) + << folly::format("{:>45} {:>9} {:>8} {:>8}\n", + "name", "count", "% total", "accum %"); + for (auto const& row : rows) { + accum += row.second; + stats << folly::format("{:>45} {} {:9} {:8.2%} {:8.2%}\n", + row.first, ':', row.second, + (double)row.second / total, (double)accum / total); + } + FTRACE(0, "{}\n", stats.str()); + } } void clear() { - if (!RuntimeOption::EnableInstructionCounts && !enabled()) return; + if (!RuntimeOption::EnableInstructionCounts && !enabledAny()) return; ++epoch; memset(&tl_counters[0], 0, sizeof(tl_counters)); memset(&tl_helper_counters[0], 0, sizeof(tl_helper_counters)); + + assert(tl_stat_groups); + delete tl_stat_groups; + tl_stat_groups = nullptr; +} + +void incStatGrouped(const StringData* category, const StringData* name, int n) { + assert(tl_stat_groups); + (*tl_stat_groups)[category->data()][name->data()] += n; } } } } diff --git a/hphp/runtime/vm/stats.h b/hphp/runtime/vm/stats.h index 09a1b986f..d04af4a5d 100644 --- a/hphp/runtime/vm/stats.h +++ b/hphp/runtime/vm/stats.h @@ -199,6 +199,10 @@ static inline bool enabled() { return Trace::moduleEnabled(Trace::stats, 1); } +static inline bool enabledAny() { + return enabled() || Trace::moduleEnabled(Trace::statgroups); +} + static inline bool enableInstrCount() { return Trace::moduleEnabled(Trace::stats, 2); } @@ -248,9 +252,12 @@ inline void emitInc(Transl::X64Assembler& a, StatCounter stat, int n = 1, extern void emitIncTranslOp(Transl::X64Assembler& a, Opcode opc, bool force = false); +extern void init(); extern void dump(); extern void clear(); +void incStatGrouped(const StringData* cat, const StringData* name, int n = 1); + } } } #endif diff --git a/hphp/runtime/vm/translator/hopt/codegen.cpp b/hphp/runtime/vm/translator/hopt/codegen.cpp index 6d5269b4d..0b5347bf2 100644 --- a/hphp/runtime/vm/translator/hopt/codegen.cpp +++ b/hphp/runtime/vm/translator/hopt/codegen.cpp @@ -313,16 +313,17 @@ CALL_OPCODE(PrintStr) CALL_OPCODE(PrintInt) CALL_OPCODE(PrintBool) CALL_OPCODE(DbgAssertPtr) -CALL_OPCODE(LdSwitchDblIndex); -CALL_OPCODE(LdSwitchStrIndex); -CALL_OPCODE(LdSwitchObjIndex); -CALL_OPCODE(VerifyParamCallable); -CALL_OPCODE(VerifyParamFail); +CALL_OPCODE(LdSwitchDblIndex) +CALL_OPCODE(LdSwitchStrIndex) +CALL_OPCODE(LdSwitchObjIndex) +CALL_OPCODE(VerifyParamCallable) +CALL_OPCODE(VerifyParamFail) CALL_OPCODE(RaiseUninitLoc) CALL_OPCODE(WarnNonObjProp) CALL_OPCODE(ThrowNonObjProp) CALL_OPCODE(RaiseUndefProp) CALL_OPCODE(RaiseError) +CALL_OPCODE(IncStatGrouped) // Vector instruction helpers CALL_OPCODE(BaseG) diff --git a/hphp/runtime/vm/translator/hopt/hhbctranslator.cpp b/hphp/runtime/vm/translator/hopt/hhbctranslator.cpp index 820a8be5c..9959f8576 100644 --- a/hphp/runtime/vm/translator/hopt/hhbctranslator.cpp +++ b/hphp/runtime/vm/translator/hopt/hhbctranslator.cpp @@ -2423,7 +2423,10 @@ void HhbcTranslator::emitInterpOneOrPunt(Type type, int numDiscard, /* = 0 */ Trace* target /* = NULL */) { if (RuntimeOption::EvalIRPuntDontInterp) { - PUNT(PuntDontInterp); + Op op = *(Op*)(getCurUnit()->entry() + m_bcOff); + const char* name = StringData::GetStaticString( + std::string("PuntDontInterp-") + opcodeToName(op))->data(); + SPUNT(name); } else { emitInterpOne(type, numDiscard, target); } diff --git a/hphp/runtime/vm/translator/hopt/hhbctranslator.h b/hphp/runtime/vm/translator/hopt/hhbctranslator.h index fa63b681b..2c8e92830 100644 --- a/hphp/runtime/vm/translator/hopt/hhbctranslator.h +++ b/hphp/runtime/vm/translator/hopt/hhbctranslator.h @@ -383,6 +383,7 @@ private: void emitMPre(); void emitFinalMOp(); void emitMPost(); + void emitMTrace(); // Bases void emitBaseOp(); diff --git a/hphp/runtime/vm/translator/hopt/ir.h b/hphp/runtime/vm/translator/hopt/ir.h index b8e7552ca..a3180351c 100644 --- a/hphp/runtime/vm/translator/hopt/ir.h +++ b/hphp/runtime/vm/translator/hopt/ir.h @@ -515,6 +515,7 @@ O(EmptyElem, D(Bool), C(TCA) \ S(Gen) \ S(PtrToCell), E|N|Mem|Refs|Er) \ O(IncStat, ND, C(Int) C(Int) C(Bool), E|Mem) \ +O(IncStatGrouped, ND, CStr CStr C(Int), E|N|Mem) \ O(DbgAssertRefCount, ND, SUnk, N|E) \ O(DbgAssertPtr, ND, S(PtrToGen), N|E) \ O(Nop, ND, NA, NF) \ diff --git a/hphp/runtime/vm/translator/hopt/irtranslator.cpp b/hphp/runtime/vm/translator/hopt/irtranslator.cpp index d5663fb43..041ed74d6 100644 --- a/hphp/runtime/vm/translator/hopt/irtranslator.cpp +++ b/hphp/runtime/vm/translator/hopt/irtranslator.cpp @@ -1071,7 +1071,7 @@ TranslatorX64::irTranslateFPassV(const Tracelet& t, void TranslatorX64::irTranslateFPassR(const Tracelet& t, - const NormalizedInstruction& i) { + const NormalizedInstruction& i) { /* * Like FPassC, FPassR is able to cheat on boxing if the current * parameter is pass by reference but we have a cell: the box would refer diff --git a/hphp/runtime/vm/translator/hopt/nativecalls.cpp b/hphp/runtime/vm/translator/hopt/nativecalls.cpp index 05e63832b..40d844255 100644 --- a/hphp/runtime/vm/translator/hopt/nativecalls.cpp +++ b/hphp/runtime/vm/translator/hopt/nativecalls.cpp @@ -17,6 +17,7 @@ #include "runtime/vm/translator/hopt/nativecalls.h" #include "runtime/vm/runtime.h" +#include "runtime/vm/stats.h" #include "runtime/vm/translator/targetcache.h" #include "runtime/vm/translator/translator-runtime.h" #include "runtime/vm/translator/hopt/ir.h" @@ -87,6 +88,8 @@ static CallMap s_callMap({ {RaiseUndefProp, (TCA)raiseUndefProp, DNone, SSync, {{SSA, 0}, {SSA, 1}}}, {RaiseError, (TCA)raise_error_sd, DNone, SSync, {{SSA, 0}}}, + {IncStatGrouped, (TCA)Stats::incStatGrouped, DNone, SNone, + {{SSA, 0}, {SSA, 1}, {SSA, 2}}}, /* Switch helpers */ {LdSwitchDblIndex, (TCA)switchDoubleHelper, DSSA, SSync, diff --git a/hphp/runtime/vm/translator/hopt/vectortranslator.cpp b/hphp/runtime/vm/translator/hopt/vectortranslator.cpp index ac1cc2236..830c65389 100644 --- a/hphp/runtime/vm/translator/hopt/vectortranslator.cpp +++ b/hphp/runtime/vm/translator/hopt/vectortranslator.cpp @@ -355,6 +355,10 @@ void HhbcTranslator::VectorTranslator::checkMIState() { void HhbcTranslator::VectorTranslator::emitMPre() { checkMIState(); + if (HPHP::Trace::moduleEnabled(HPHP::Trace::minstr, 1)) { + emitMTrace(); + } + if (m_needMIS) { m_misBase = m_tb.gen(DefMIStateBase); SSATmp* uninit = m_tb.genDefUninit(); @@ -382,6 +386,41 @@ void HhbcTranslator::VectorTranslator::emitMPre() { } } +void HhbcTranslator::VectorTranslator::emitMTrace() { + auto rttStr = [this](int i) { + return Type::fromRuntimeType(m_ni.inputs[i]->rtt).unbox().toString(); + }; + std::ostringstream shape; + int iInd = m_mii.valCount(); + const char* separator = ""; + + shape << opcodeToName(m_ni.mInstrOp()) << " <"; + auto baseLoc = m_ni.immVec.locationCode(); + shape << folly::format("{}:{} ", locationCodeString(baseLoc), rttStr(iInd)); + ++iInd; + + for (int mInd = 0; mInd < m_ni.immVecM.size(); ++mInd) { + auto mcode = m_ni.immVecM[mInd]; + shape << separator; + if (mcode == MW) { + shape << "MW"; + } else if (mcodeMaybeArrayKey(mcode)) { + shape << "ME:" << rttStr(iInd); + } else if (mcodeMaybePropName(mcode)) { + shape << "MP:" << rttStr(iInd); + } else { + not_reached(); + } + if (mcode != MW) ++iInd; + separator = " "; + } + shape << '>'; + m_tb.gen(IncStatGrouped, + cns(StringData::GetStaticString("vector instructions")), + cns(StringData::GetStaticString(shape.str())), + cns(1)); +} + // Build a map from (stack) input index to stack index. void HhbcTranslator::VectorTranslator::numberStackInputs() { // Stack inputs are pushed in the order they appear in the vector diff --git a/hphp/runtime/vm/translator/translator-x64.cpp b/hphp/runtime/vm/translator/translator-x64.cpp index c8538635e..99d10c8c3 100644 --- a/hphp/runtime/vm/translator/translator-x64.cpp +++ b/hphp/runtime/vm/translator/translator-x64.cpp @@ -149,52 +149,6 @@ __thread VMRegState tl_regState = REGSTATE_CLEAN; __thread JmpHitMap* tl_unlikelyHits = nullptr; __thread JmpHitMap* tl_jccHits = nullptr; -namespace { -typedef hphp_hash_map> PuntMap; -__thread PuntMap* tl_puntCounts = nullptr; - -void recordPunt(litstr key) { - assert(Trace::moduleEnabled(Trace::punt, 1)); - assert(tl_puntCounts); - (*tl_puntCounts)[key]++; -} - -void initPuntCounts() { - if (!Trace::moduleEnabled(Trace::punt, 1)) return; - assert(!tl_puntCounts); - tl_puntCounts = new PuntMap(); -} - -void dumpPuntCounts() { - if (!Trace::moduleEnabled(Trace::punt, 1)) return; - assert(tl_puntCounts); - TRACE_SET_MOD(punt); - - int64_t total = 0; - std::map sortedPunts; - for (auto const& pair : *tl_puntCounts) { - sortedPunts[pair.second] = pair.first; - total += pair.second; - } - - TRACE(1, "-------------------- hhir punts for %s --------------------\n", - g_context->getRequestUrl(50).c_str()); - TRACE(1, "%30s %9s %9s %9s\n", - "name", "count", "% total", "accum %"); - int64_t accum = 0; - for (auto const& pair : boost::adaptors::reverse(sortedPunts)) { - accum += pair.first; - TRACE(1, "%30s : %9ld %8.2f%% %8.2f%%\n", - pair.second, pair.first, - 100.0 * pair.first / total, 100.0 * accum / total); - } - TRACE(1, "\n"); - - delete tl_puntCounts; - tl_puntCounts = nullptr; -} -} - static StaticString s___call(LITSTR_INIT("__call")); static StaticString s___callStatic(LITSTR_INIT("__callStatic")); @@ -2937,8 +2891,10 @@ void TranslatorX64::emitReqRetransNoIR(Asm& as, SrcKey& sk) { void TranslatorX64::emitRecordPunt(Asm& a, const std::string& name) { PhysRegSaver regs(a, kAllX64Regs); - a. movq (StringData::GetStaticString(name)->data(), rdi); - a. call ((TCA)recordPunt); + a. movq (StringData::GetStaticString("hhir punts"), rdi); + a. movq (StringData::GetStaticString(name), rsi); + a. movq (1, rdx); + a. call ((TCA)Stats::incStatGrouped); } uint64_t TranslatorX64::packBitVec(const vector& bits, unsigned i) { @@ -12051,7 +12007,7 @@ TranslatorX64::requestInit() { Treadmill::startRequest(g_vmContext->m_currentThreadIdx); memset(&s_perfCounters, 0, sizeof(s_perfCounters)); initJmpProfile(); - initPuntCounts(); + Stats::init(); } void @@ -12069,7 +12025,6 @@ TranslatorX64::requestExit() { Stats::dump(); Stats::clear(); dumpJmpProfile(); - dumpPuntCounts(); if (Trace::moduleEnabledRelease(Trace::tx64stats, 1)) { Trace::traceRelease("TranslatorX64 perf counters for %s:\n", diff --git a/hphp/util/trace.cpp b/hphp/util/trace.cpp index a1c5182aa..99405cf4c 100644 --- a/hphp/util/trace.cpp +++ b/hphp/util/trace.cpp @@ -84,6 +84,9 @@ class Init { if (mod >= 0) { levels[mod] = level; } + if (mod == Trace::minstr || mod == Trace::punt) { + levels[Trace::statgroups] = std::max(levels[Trace::statgroups], 1); + } } free(e); } else { diff --git a/hphp/util/trace.h b/hphp/util/trace.h index dcd1bce5d..884c77e6a 100644 --- a/hphp/util/trace.h +++ b/hphp/util/trace.h @@ -86,6 +86,8 @@ namespace Trace { TM(instancebits)\ TM(hhas) \ TM(punt) \ + TM(statgroups) \ + TM(minstr) \ /* Stress categories, to exercise rare paths */ \ TM(stress_txInterpPct) \ TM(stress_txInterpSeed) \