Use eager vmreganchor

This diff introduces a new "eager" vmreganchor, which we call
from some of the heaviest users of vmreganchor. For an eager
reganchor, we save the rbp, pcOff and spOff at the call site
in the MInstrState, and the reganchor itself reads it off
from the MInstrState (instead of following the rbp chain
and looking up the hash table). Currently, the set of functions
which use eager vmreganchor is based on profiling.
Esse commit está contido em:
aravind
2013-05-12 17:33:28 -07:00
commit de Sara Golemon
commit 64889059a2
7 arquivos alterados com 132 adições e 23 exclusões
+3 -2
Ver Arquivo
@@ -64,6 +64,7 @@ const int64_t k_UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_HIRAGANA_QUATERNARY_MODE;
const int64_t k_UCOL_NUMERIC_COLLATION = UCOL_NUMERIC_COLLATION;
using HPHP::Transl::CallerFrame;
using HPHP::Transl::EagerCallerFrame;
#define getCheckedArrayRetType(input, fail, type) \
Variant::TypedValueAccessor tva_##input = input.getTypedAccessor(); \
@@ -175,7 +176,7 @@ Variant f_array_filter(CVarRef input, CVarRef callback /* = null_variant */) {
return ArrayUtil::Filter(arr_input);
}
CallCtx ctx;
CallerFrame cf;
EagerCallerFrame cf;
vm_decode_function(callback, cf(), false, ctx);
if (ctx.func == NULL) {
return uninit_null();
@@ -263,7 +264,7 @@ Variant f_array_map(int _argc, CVarRef callback, CVarRef arr1, CArrRef _argv /*
CallCtx ctx;
ctx.func = NULL;
if (!callback.isNull()) {
CallerFrame cf;
EagerCallerFrame cf;
vm_decode_function(callback, cf(), false, ctx);
}
if (ctx.func == NULL) {
+4 -3
Ver Arquivo
@@ -32,6 +32,7 @@ namespace HPHP {
///////////////////////////////////////////////////////////////////////////////
using HPHP::Transl::CallerFrame;
using HPHP::Transl::EagerCallerFrame;
static const StaticString s_internal("internal");
static const StaticString s_user("user");
@@ -252,7 +253,7 @@ Variant f_forward_static_call(int _argc, CVarRef function,
}
Variant f_get_called_class() {
CallerFrame cf;
EagerCallerFrame cf;
ActRec* ar = cf();
if (ar == NULL) {
return Variant(false);
@@ -365,7 +366,7 @@ Array hhvm_get_frame_args(const ActRec* ar) {
}
Variant f_func_get_args() {
CallerFrame cf;
EagerCallerFrame cf;
ActRec* ar = cf();
if (ar && ar->hasVarEnv() && ar->getVarEnv()->isGlobalScope()) {
raise_warning(
@@ -397,7 +398,7 @@ Array func_get_args(int num_args, CArrRef params, CArrRef args) {
}
int64_t f_func_num_args() {
CallerFrame cf;
EagerCallerFrame cf;
ActRec* ar = cf();
if (ar == NULL) {
return -1;
+1 -1
Ver Arquivo
@@ -2245,7 +2245,7 @@ void VMExecutionContext::invokeContFunc(const Func* f,
assert(f);
assert(this_);
VMRegAnchor _;
EagerVMRegAnchor _;
this_->incRefCount();
@@ -3472,6 +3472,11 @@ void CodeGenerator::cgCallBuiltin(IRInstruction* inst) {
DataType funcReturnType = func->returnType();
int returnOffset = HHIR_MISOFF(tvBuiltinReturn);
if (TranslatorX64::eagerRecord(func)) {
const uchar* pc = curUnit()->entry() + m_state.lastMarker->bcOff;
// we have spilled all args to stack, so spDiff is 0
m_tx64->emitEagerSyncPoint(m_as, pc, 0);
}
// RSP points to the MInstrState we need to use.
// workaround the fact that rsp moves when we spill registers around call
PhysReg misReg = rScratch;
@@ -3607,9 +3612,13 @@ void CodeGenerator::cgNativeImpl(IRInstruction* inst) {
assert(func->isConst());
assert(func->type() == Type::Func);
const Func* fn = func->getValFunc();
BuiltinFunction builtinFuncPtr = func->getValFunc()->builtinFuncPtr();
emitMovRegReg(m_as, m_regs[fp].getReg(), argNumToRegName[0]);
if (TranslatorX64::eagerRecord(fn)) {
m_tx64->emitEagerSyncPoint(m_as, fn->getEntry(), 0);
}
m_as.call((TCA)builtinFuncPtr);
recordSyncPoint(m_as);
}
+45 -13
Ver Arquivo
@@ -111,23 +111,55 @@ struct VMRegAnchor : private boost::noncopyable {
}
};
struct EagerVMRegAnchor {
VMRegState m_old;
EagerVMRegAnchor() {
if (debug) {
const Cell* fp = vmfp();
const Cell* sp = vmsp();
const uchar* pc = vmpc();
VMRegAnchor _;
assert(vmfp() == fp && vmsp() == sp && vmpc() == pc);
// compiler complains about unused variables
fp = sp = nullptr;
pc = nullptr;
}
m_old = tl_regState;
tl_regState = REGSTATE_CLEAN;
}
~EagerVMRegAnchor() {
tl_regState = m_old;
}
};
static inline ActRec* regAnchorFP() {
// In builtins, m_fp points to the caller's frame if called
// through FCallBuiltin, else it points to the builtin's frame,
// in which case, getPrevVMState() gets the caller's frame.
VMExecutionContext* context = g_vmContext;
ActRec* cur = context->getFP();
if (!cur) return nullptr;
if (cur->skipFrame()) {
ActRec* prev = context->getPrevVMState(cur);
if (prev == cur) return nullptr;
return prev;
} else {
return cur;
}
}
struct EagerCallerFrame : public EagerVMRegAnchor {
ActRec* operator()() {
return regAnchorFP();
}
};
// VM helper to retrieve the frame pointer from the TC. This is
// a common need for extensions.
struct CallerFrame : public VMRegAnchor {
ActRec* operator()() {
// In builtins, m_fp points to the caller's frame if called
// through FCallBuiltin, else it points to the builtin's frame,
// in which case, getPrevVMState() gets the caller's frame.
VMExecutionContext* context = g_vmContext;
ActRec* cur = context->getFP();
if (!cur) return nullptr;
if (cur->skipFrame()) {
ActRec* prev = context->getPrevVMState(cur);
if (prev == cur) return nullptr;
return prev;
} else {
return cur;
}
return regAnchorFP();
}
};
+67 -4
Ver Arquivo
@@ -747,6 +747,35 @@ TranslatorX64::emitCall(X64Assembler& a, Call call, bool killRegs) {
}
}
static void emitGetGContext(X64Assembler& a, PhysReg dest) {
emitTLSLoad<ExecutionContext>(a, g_context, dest);
}
void
TranslatorX64::emitEagerSyncPoint(X64Assembler& a, const Opcode* pc,
const Offset spDiff) {
static COff spOff = offsetof(VMExecutionContext, m_stack) +
Stack::topOfStackOffset();
static COff fpOff = offsetof(VMExecutionContext, m_fp);
static COff pcOff = offsetof(VMExecutionContext, m_pc);
/* we can't use rScratch because the pc store uses it as a
temporary */
Reg64 rEC = reg::rdi;
a. push(rEC);
emitGetGContext(a, rEC);
a. storeq(rVmFp, rEC[fpOff]);
if (spDiff) {
a. lea(rVmSp[spDiff], rScratch);
a. storeq(rScratch, rEC[spOff]);
} else {
a. storeq(rVmSp, rEC[spOff]);
}
a. storeq(pc, rEC[pcOff]);
a. pop(rEC);
}
void
TranslatorX64::recordSyncPoint(X64Assembler& a, Offset pcOff, Offset spOff) {
m_pendingFixups.push_back(PendingFixup(a.code.frontier,
@@ -827,6 +856,14 @@ TranslatorX64::recordCallImpl(X64Assembler& a,
}
}
void
TranslatorX64::recordEagerCall(X64Assembler& a,
const NormalizedInstruction& i) {
SrcKey sk = i.source;
emitEagerSyncPoint(a, curUnit()->entry() + sk.offset(),
-i.stackOff * sizeof(TypedValue));
}
void TranslatorX64::prepareCallSaveRegs() {
emitCallSaveRegs(); // Clean caller-saved regs.
m_pendingUnwindRegInfo.clear();
@@ -890,10 +927,6 @@ void TranslatorX64::emitIncRefGeneric(PhysReg base, int disp) {
emitIncRefGenericRegSafe(base, disp, r(tmpReg));
}
static void emitGetGContext(X64Assembler& a, PhysReg dest) {
emitTLSLoad<ExecutionContext>(a, g_context, dest);
}
// emitEagerVMRegSave --
// Inline. Saves regs in-place in the TC. This is an unusual need;
// you probably want to lazily save these regs via recordCall and
@@ -6693,6 +6726,30 @@ TranslatorX64::translateRetV(const Tracelet& t,
translateRetC(t, i);
}
/* This is somewhat hacky. It decides which helpers/builtins should use
* eager vmreganchor based on profile information. Using
* eager vmreganchor for all helper calls is a perf regression. */
bool TranslatorX64::eagerRecord(const Func* func) {
const char* list[] = {
"func_get_args",
"get_called_class",
"func_num_args",
"array_filter",
"array_map",
};
for (int i = 0; i < sizeof(list)/sizeof(list[0]); i++) {
if (!strcmp(func->name()->data(), list[i])) {
return true;
}
}
if (func->cls() && !strcmp(func->cls()->name()->data(), "WaitHandle")
&& !strcmp(func->name()->data(), "join")) {
return true;
}
return false;
}
/*
* NativeImpl is a special operation in the sense that it must be the
* only opcode in a function body, and also functions as the return.
@@ -6719,6 +6776,9 @@ int32_t TranslatorX64::emitNativeImpl(const Func* func,
* will handle it for us.
*/
a. mov_reg64_reg64(rVmFp, argNumToRegName[0]);
if (eagerRecord(func)) {
emitEagerSyncPoint(a, func->getEntry(), 0);
}
emitCall(a, (TCA)builtinFuncPtr, false /* smash regs */);
/*
@@ -9828,6 +9888,9 @@ void TranslatorX64::translateFCallBuiltin(const Tracelet& t,
}
// Call builtin
BuiltinFunction nativeFuncPtr = func->nativeFuncPtr();
if (eagerRecord(func)) {
recordEagerCall(a, ni);
}
emitCall(a, (TCA)nativeFuncPtr, true);
recordReentrantCall(ni);
@@ -309,6 +309,7 @@ private:
const NormalizedInstruction& i,
Attr typeAttr);
void recordSyncPoint(Asm& a, Offset pcOff, Offset spOff);
void emitEagerSyncPoint(Asm& a, const Opcode* pc, const Offset spDiff);
void recordIndirectFixup(CTCA addr, int dwordsPushed);
template <bool reentrant>
void recordCallImpl(Asm& a, const NormalizedInstruction& i,
@@ -329,6 +330,7 @@ private:
void recordStubCall(const NormalizedInstruction& i) {
recordCall(astubs, i);
}
void recordEagerCall(Asm& a, const NormalizedInstruction& i);
void emitSideExit(Asm& a, const NormalizedInstruction& dest, bool next);
void emitStringToClass(const NormalizedInstruction& i);
void emitKnownClassCheck(const NormalizedInstruction& i,
@@ -960,6 +962,7 @@ private:
TCA funcPrologue(Func* func, int nArgs, ActRec* ar = nullptr);
bool checkCachedPrologue(const Func* func, int param, TCA& plgOut) const;
SrcKey emitPrologue(Func* func, int nArgs);
static bool eagerRecord(const Func* func);
int32_t emitNativeImpl(const Func*, bool emitSavedRIPReturn);
void emitBindJ(Asm& a, ConditionCode cc, SrcKey dest,
ServiceRequest req);