Some optimizations for LdClsCns

Change LdClsCns to side exit when the type is uninitialized.
On the exit path, do a LookupClsCns and ReqBindJmp for the next srckey
so forward progress is still made.  Add a predictionopts case for the
common case of a LdClsCns; CheckInit being followed again by a
CheckType---in this case, hoist all the checks into LdClsCns.
Esse commit está contido em:
Jordan DeLong
2013-05-25 19:20:24 -07:00
commit de sgolemon
commit afa6d733c1
9 arquivos alterados com 251 adições e 124 exclusões
+18 -9
Ver Arquivo
@@ -757,16 +757,25 @@ D:Cls = LdClsCachedSafe S0:ConstStr [ -> L ]
Loads the class whose name is S0 out of the target cache. If the class is not
defined, returns null and optionally branches to L.
D:T = LdClsCns<T> S0:ConstStr S1:ConstStr [ -> L ]
D:T = LdClsCns<T,className,constName> [ -> L ]
Loads the class constant named S0 from a class named S1 via the
target cache. This instruction should generally be followed by
CheckInit, unless we know the class is already loaded. If the
optional label L is specified and the loaded value's type does not
match T, this instruction does not load into D and transfers control
to L. The result may be uninitialized if the class is not defined.
Note that no decref of the result is necessary because class
constants will always be static.
Loads the named class constant for a class via the target cache.
This instruction should generally be followed by CheckInit, unless
we know the class is already loaded.
If the optional label L is specified and the loaded value's type
does not match T, this instruction does not load into D and
transfers control to L.
The result may be uninitialized if the class is not defined. Note
that no decref of the result is necessary because class constants
will always be static.
D:T = LookupClsCns<T,className,constName>
Load a class constant for a class via the target cache, invoking
autoload if it is not defined. This instruction may raise an
undefined constant error if autoload cannot define the constant.
D:FuncCtx = LdClsMethodFCache S0:ConstStr S1:ConstStr
S2:{Obj|Cls|Ctx} L:Label
+22 -29
Ver Arquivo
@@ -4643,43 +4643,36 @@ void CodeGenerator::cgLdCls(IRInstruction* inst) {
ArgGroup(m_regs).imm(ch).ssa(className));
}
static StringData* fullConstName(SSATmp* cls, SSATmp* cnsName) {
static StringData* fullConstName(const StringData* cls,
const StringData* cnsName) {
return StringData::GetStaticString(
Util::toLower(cls->getValStr()->data()) + "::" +
cnsName->getValStr()->data());
Util::toLower(cls->data()) + "::" + cnsName->data()
);
}
void CodeGenerator::cgLdClsCns(IRInstruction* inst) {
SSATmp* cnsName = inst->src(0);
SSATmp* cls = inst->src(1);
StringData* fullName = fullConstName(cls, cnsName);
TargetCache::CacheHandle ch = TargetCache::allocClassConstant(fullName);
// note that we bail from the trace if the target cache entry is empty
// for this class constant or if the type assertion fails.
// TODO: handle the slow case helper call.
auto const extra = inst->extra<LdClsCns>();
auto const fullName = fullConstName(extra->clsName, extra->cnsName);
auto const ch = TargetCache::allocClassConstant(fullName);
cgLoad(rVmTl, ch, inst);
}
void CodeGenerator::cgLookupClsCns(IRInstruction* inst) {
SSATmp* cnsName = inst->src(0);
SSATmp* cls = inst->src(1);
assert(inst->typeParam() == Type::Cell);
assert(cnsName->isConst() && cnsName->type() == Type::StaticStr);
assert(cls->isConst() && cls->type() == Type::StaticStr);
StringData* fullName = fullConstName(cls, cnsName);
TargetCache::CacheHandle ch = TargetCache::allocClassConstant(fullName);
ArgGroup args(m_regs);
args.addr(rVmTl, ch)
.immPtr(Unit::GetNamedEntity(cls->getValStr()))
.immPtr(cls->getValStr())
.immPtr(cnsName->getValStr());
cgCallHelper(m_as, TCA(TargetCache::lookupClassConstantTv),
inst->dst(), kSyncPoint, args, DestType::TV);
auto const extra = inst->extra<LookupClsCns>();
auto const fullName = fullConstName(extra->clsName, extra->cnsName);
auto const ch = TargetCache::allocClassConstant(fullName);
cgCallHelper(
m_as,
TCA(TargetCache::lookupClassConstantTv),
inst->dst(),
kSyncPoint,
ArgGroup(m_regs)
.addr(rVmTl, ch)
.immPtr(Unit::GetNamedEntity(extra->clsName))
.immPtr(extra->clsName)
.immPtr(extra->cnsName),
DestType::TV
);
}
void CodeGenerator::cgLdCns(IRInstruction* inst) {
+19
Ver Arquivo
@@ -288,6 +288,23 @@ struct CallArrayData : IRExtraData {
Offset pc, after;
};
/*
* Name of a class constant.
*/
struct ClsCnsName : IRExtraData {
explicit ClsCnsName(const StringData* cls, const StringData* cns)
: clsName(cls)
, cnsName(cns)
{}
std::string show() const {
return folly::to<std::string>(clsName->data(), "::", cnsName->data());
}
const StringData* clsName;
const StringData* cnsName;
};
//////////////////////////////////////////////////////////////////////
#define X(op, data) \
@@ -333,6 +350,8 @@ X(ReqBindJmpNoIR, BCOffset);
X(ReqRetranslateNoIR, BCOffset);
X(InlineCreateCont, CreateContData);
X(CallArray, CallArrayData);
X(LdClsCns, ClsCnsName);
X(LookupClsCns, ClsCnsName);
X(ReqBindJmpGt, ReqBindJccData);
X(ReqBindJmpGte, ReqBindJccData);
X(ReqBindJmpLt, ReqBindJccData);
+77 -66
Ver Arquivo
@@ -107,12 +107,6 @@ void HhbcTranslator::refineType(SSATmp* tmp, Type type) {
//
// FIXME: I think most of these shouldn't be possible still
// (except LdStack?).
//
// XXX These are possible once we remove the inferred/predicted
// type from emitCGetProp etc in HhbcTranslator. We need to
// delete label on these instructions if this is due to an
// assertType and also handled LdClsCns.
// TODO(#2035446): fix this for LdClsCns
assert(opc == LdLoc || opc == LdStack ||
opc == LdMem || opc == LdProp ||
opc == LdRef);
@@ -272,7 +266,7 @@ IRInstruction* HhbcTranslator::makeMarker(Offset bcOff) {
int32_t stackOff = m_tb->spOffset() +
m_evalStack.numCells() - m_stackDeficit;
FTRACE(2, "emitMarker: bc {} sp {} fn {}\n",
FTRACE(2, "makeMarker: bc {} sp {} fn {}\n",
bcOff, stackOff, curFunc()->fullName()->data());
MarkerData marker;
@@ -1521,40 +1515,25 @@ void HhbcTranslator::emitCmp(Opcode opc) {
gen(DecRef, src1);
}
void HhbcTranslator::emitClsCnsD(int32_t cnsNameStrId, int32_t clsNameStrId) {
// This bytecode re-enters if there is no class with the given name
// and can throw a fatal error.
const StringData* cnsNameStr = lookupStringId(cnsNameStrId);
const StringData* clsNameStr = lookupStringId(clsNameStrId);
SSATmp* cnsNameTmp = cns(cnsNameStr);
SSATmp* clsNameTmp = cns(clsNameStr);
if (0) {
// TODO: 2068502 pick one of these two implementations and remove the other.
Trace* exitTrace = getExitSlowTrace();
SSATmp* cns = gen(LdClsCns, Type::Cell, cnsNameTmp, clsNameTmp);
gen(CheckInit, exitTrace, cns);
push(cns);
} else {
// if-then-else
// todo: t2068502: refine the type? hhbc spec says null|bool|int|dbl|str
// and, str should always be static-str.
Type cnsType = Type::Cell;
SSATmp* c1 = gen(LdClsCns, cnsType, cnsNameTmp, clsNameTmp);
SSATmp* result = m_tb->cond(curFunc(),
[&] (Block* taken) { // branch
gen(CheckInit, taken, c1);
},
[&] { // Next: LdClsCns hit in TC
return c1;
},
[&] { // Taken: miss in TC, do lookup & init
m_tb->hint(Block::Unlikely);
return gen(LookupClsCns, getCatchTrace(),
cnsType, cnsNameTmp, clsNameTmp);
}
);
push(result);
}
void HhbcTranslator::emitClsCnsD(int32_t cnsNameId, int32_t clsNameId) {
auto const clsCnsName = ClsCnsName { lookupStringId(clsNameId),
lookupStringId(cnsNameId) };
// If we have to side exit, do the target cache lookup before
// chaining to another Tracelet so forward progress still happens.
auto const sideExit = makeSideExit(
nextBcOff(),
[&] (Trace* t) {
return genFor(t, LookupClsCns, Type::Cell, clsCnsName);
}
);
// TODO: ideally we'd load Uncounted here without guarding, since we
// know this value has to be a non-refcounted type, but the register
// allocator doesn't understand what we mean right now.
auto const cns = gen(LdClsCns, clsCnsName, Type::Cell);
gen(CheckInit, sideExit, cns);
push(cns);
}
void HhbcTranslator::emitAKExists() {
@@ -2822,7 +2801,7 @@ void HhbcTranslator::emitBindMem(SSATmp* ptr, SSATmp* src) {
pushIncRef(src);
gen(StMem, ptr, cns(0), src);
if (isRefCounted(src) && src->type().canRunDtor()) {
Block* exitBlock = getExitTrace(nextSrcKey().offset())->front();
Block* exitBlock = getExitTrace(nextBcOff())->front();
exitBlock->prepend(m_irFactory.gen(DecRef, prevValue));
gen(DecRefNZOrBranch, exitBlock, prevValue);
} else {
@@ -3076,7 +3055,7 @@ void HhbcTranslator::emitMod() {
// will raise a notice and produce the boolean false. Punch out
// here and resume after the Mod instruction; this should be rare.
auto const exit = getExitTraceWarn(
nextSrcKey().offset(),
nextBcOff(),
exitSpillValues,
StringData::GetStaticString(Strings::DIVISION_BY_ZERO)
);
@@ -3173,30 +3152,41 @@ Trace* HhbcTranslator::getExitTrace(Offset targetBcOff /* = -1 */) {
Trace* HhbcTranslator::getExitTrace(Offset targetBcOff,
std::vector<SSATmp*>& spillValues) {
if (targetBcOff == -1) targetBcOff = bcOff();
return getExitTraceImpl(targetBcOff, ExitFlag::None, spillValues, nullptr);
return getExitTraceImpl(targetBcOff, ExitFlag::None, spillValues,
CustomExit{});
}
Trace* HhbcTranslator::getExitTraceWarn(Offset targetBcOff,
std::vector<SSATmp*>& spillValues,
const StringData* warning) {
assert(targetBcOff != -1);
return getExitTraceImpl(targetBcOff, ExitFlag::None, spillValues, warning);
return getExitTraceImpl(targetBcOff, ExitFlag::None, spillValues,
[&](Trace* t) -> SSATmp* {
genFor(t, RaiseWarning, cns(warning));
return nullptr;
}
);
}
template<class ExitLambda>
Trace* HhbcTranslator::makeSideExit(Offset targetBcOff, ExitLambda exit) {
auto spillValues = peekSpillValues();
return getExitTraceImpl(targetBcOff,
ExitFlag::DelayedMarker,
spillValues,
exit);
}
/*
* Generates an exit trace which will continue execution without HHIR.
* This should be used in situations that HHIR cannot handle -- ideally
* only in slow paths.
*/
Trace* HhbcTranslator::getExitSlowTrace() {
auto spillValues = peekSpillValues();
return getExitTraceImpl(bcOff(), ExitFlag::NoIR, spillValues, nullptr);
return getExitTraceImpl(bcOff(), ExitFlag::NoIR, spillValues,
CustomExit{});
}
Trace* HhbcTranslator::getExitTraceImpl(Offset targetBcOff,
ExitFlag flag,
std::vector<SSATmp*>& stackValues,
const StringData* warning) {
const CustomExit& customFn) {
auto const exit = m_tb->makeExitTrace(targetBcOff);
MarkerData exitMarker;
@@ -3204,25 +3194,46 @@ Trace* HhbcTranslator::getExitTraceImpl(Offset targetBcOff,
exitMarker.stackOff = m_tb->spOffset() +
stackValues.size() - m_stackDeficit;
exitMarker.func = curFunc();
genFor(exit, Marker, exitMarker);
if (warning) {
genFor(exit, RaiseWarning, cns(warning));
MarkerData currentMarker;
currentMarker.bcOff = bcOff();
currentMarker.func = curFunc();
currentMarker.stackOff = m_tb->spOffset() +
m_evalStack.numCells() - m_stackDeficit;
genFor(exit, Marker,
flag == ExitFlag::DelayedMarker ? currentMarker : exitMarker);
// The value we use for stack is going to depend on whether we have
// to spillstack or what.
auto stack = m_tb->sp();
// TODO(#2404447) move this conditional to the simplifier?
if (m_stackDeficit != 0 || !stackValues.empty()) {
stackValues.insert(
stackValues.begin(),
{ m_tb->sp(), cns(int64_t(m_stackDeficit)) }
);
stack = genFor(exit,
SpillStack, std::make_pair(stackValues.size(), &stackValues[0])
);
}
auto const stack = [&]{
// TODO(#2404447) move this conditional to the simplifier?
if (m_stackDeficit != 0 || !stackValues.empty()) {
stackValues.insert(
stackValues.begin(),
{ m_tb->sp(), cns(int64_t(m_stackDeficit)) }
);
return genFor(exit,
SpillStack, std::make_pair(stackValues.size(), &stackValues[0])
if (customFn) {
stack = genFor(exit, ExceptionBarrier, stack);
auto const customTmp = customFn(exit);
if (customTmp) {
SSATmp* spill2[] = { stack, cns(0), customTmp };
stack = genFor(exit,
SpillStack, std::make_pair(sizeof spill2 / sizeof spill2[0], spill2)
);
exitMarker.stackOff += 1;
}
return m_tb->sp();
}();
}
if (flag == ExitFlag::DelayedMarker) {
genFor(exit, Marker, exitMarker);
}
genFor(exit, SyncABIRegs, m_tb->fp(), stack);
+46 -2
Ver Arquivo
@@ -595,25 +595,63 @@ private:
IRInstruction* makeMarker(Offset bcOff);
void emitMarker();
// Exit trace creation routines.
private: // Exit trace creation routines.
Trace* getExitTrace(Offset targetBcOff = -1);
Trace* getExitTrace(Offset targetBcOff,
std::vector<SSATmp*>& spillValues);
Trace* getExitTraceWarn(Offset targetBcOff,
std::vector<SSATmp*>& spillValues,
const StringData* warning);
/*
* Create a custom side exit---that is, an exit that does some
* amount work before leaving the trace.
*
* The exit trace will spill things with a Marker for the current bytecode.
*
* Then it will do an ExceptionBarrier, followed by whatever is done
* by the CustomExit(Trace*) function. The custom exit may add
* instructions to the exit trace, and optionally may return an
* additional SSATmp* to spill on the stack. If there is no
* additional SSATmp*, it should return nullptr.
*
* TODO(#2447661): this should be way better than this, should allow
* using gen/push/spillStack/etc.
*/
template<class ExitLambda>
Trace* makeSideExit(Offset targetBcOff, ExitLambda exit);
/*
* Generates an exit trace which will continue execution without HHIR.
* This should be used in situations that HHIR cannot handle -- ideally
* only in slow paths.
*/
Trace* getExitSlowTrace();
Trace* getCatchTrace();
/*
* Implementation for the above. Takes spillValues, target offset,
* and a flag for whether to make a no-IR exit.
*
* Also takes a CustomExit(Trace*) function that may perform more
* operations and optionally return a single additional SSATmp*
* (otherwise nullptr) to spill on the stack before exiting.
*/
enum class ExitFlag {
None,
NoIR,
// DelayedMarker means to use the current instruction marker
// instead of one for targetBcOff.
DelayedMarker,
};
typedef std::function<SSATmp* (Trace*)> CustomExit;
Trace* getExitTraceImpl(Offset targetBcOff,
ExitFlag noIRExit,
std::vector<SSATmp*>& spillValues,
const StringData* warning);
const CustomExit&);
private:
/*
* Accessors for the current function being compiled and its
* class and unit.
@@ -634,6 +672,12 @@ private:
return srcKey;
}
/*
* Return the bcOffset of the next instruction (whether it is in
* this tracelet or not).
*/
Offset nextBcOff() const { return nextSrcKey().offset(); }
/*
* Helpers for resolving bytecode immediate ids.
*/
+1
Ver Arquivo
@@ -773,6 +773,7 @@ bool isRefCounted(SSATmp* tmp) {
}
IRInstruction* inst = tmp->inst();
Opcode opc = inst->op();
// TODO(#2448005): we shouldn't have to list LdClsCns here.
if (opc == DefConst || opc == LdConst || opc == LdClsCns) {
return false;
}
+2 -2
Ver Arquivo
@@ -302,8 +302,8 @@ O(LdClsCached, D(Cls), CStr, C|E|N|Refs|Er|Mem) \
O(LdClsCachedSafe, D(Cls), CStr, C) \
O(LdClsCtx, D(Cls), S(Ctx), C) \
O(LdClsCctx, D(Cls), S(Cctx), C) \
O(LdClsCns, DParam, CStr CStr, NF) \
O(LookupClsCns, DParam, CStr CStr, E|Refs|Er|N|Mem) \
O(LdClsCns, DParam, NA, NF) \
O(LookupClsCns, DParam, NA, E|Refs|Er|N|Mem) \
O(LdCns, DParam, CStr, NF) \
O(LookupCns, DParam, CStr, E|Refs|Er|N|Mem) \
O(LdClsMethodCache, D(FuncCls), C(Str) \
+64 -14
Ver Arquivo
@@ -81,13 +81,14 @@ void optimizePredictions(Trace* const trace, IRFactory* const irFactory) {
* generic LdMem/IncRef on the exit block, otherwise we do
* type-specialized versions.
*/
auto optLdMem = [&] (IRInstruction* checkType, IRInstruction* lastMarker) {
auto optLdMem = [&] (IRInstruction* checkType,
IRInstruction* lastMarker) -> bool {
auto const incRef = checkType->src(0)->inst();
if (incRef->op() != IncRef) return;
if (incRef->op() != IncRef) return false;
auto const ldMem = incRef->src(0)->inst();
if (ldMem->op() != LdMem) return;
if (ldMem->src(1)->getValInt() != 0) return;
if (!ldMem->typeParam().equals(Type::Cell)) return;
if (ldMem->op() != LdMem) return false;
if (ldMem->src(1)->getValInt() != 0) return false;
if (!ldMem->typeParam().equals(Type::Cell)) return false;
FTRACE(5, "candidate: {}\n", ldMem->toString());
@@ -95,13 +96,13 @@ void optimizePredictions(Trace* const trace, IRFactory* const irFactory) {
auto const exit = checkType->taken();
auto const specialized = checkType->block()->next();
if (mainBlock != checkType->block()) return;
if (exit->numPreds() != 1) return;
if (exit->isMain()) return;
if (mainBlock != checkType->block()) return false;
if (exit->numPreds() != 1) return false;
if (exit->isMain()) return false;
auto const sinkFirst = mainBlock->iteratorTo(ldMem);
auto const sinkLast = mainBlock->iteratorTo(checkType);
if (!instructionsAreSinkable(sinkFirst, sinkLast)) return;
if (!instructionsAreSinkable(sinkFirst, sinkLast)) return false;
FTRACE(5, "all sinkable\n");
auto const& rpoSort = sortedBlocks();
@@ -127,8 +128,8 @@ void optimizePredictions(Trace* const trace, IRFactory* const irFactory) {
/*
* Specialize the LdMem left on the main trace after cloning the
* generic version to the exit. We'll reflowTypes in a sec to get
* everything downstream specialized.
* generic version to the exit. We'll reflowTypes after we're
* done with all of this to get everything downstream specialized.
*/
ldMem->setTypeParam(checkType->typeParam());
@@ -148,7 +149,47 @@ void optimizePredictions(Trace* const trace, IRFactory* const irFactory) {
specialized->insert(specialized->skipHeader(),
irFactory->cloneInstruction(lastMarker));
reflowTypes(specialized, rpoSort);
return true;
};
/*
* When we have a type prediction for a LdClsCns that is followed
* immediately by CheckInit, we can merge both checks into the
* LdClsCns and change it to exit to the same location the CheckInit
* would've exited to.
*/
auto optLdClsCns = [&] (IRInstruction* checkType,
IRInstruction* lastMarker) -> bool {
auto const ldClsCns = checkType->src(0)->inst();
if (ldClsCns->op() != LdClsCns) return false;
if (ldClsCns->taken()) return false;
auto const mainBlock = ldClsCns->block();
auto const nextIt = boost::next(mainBlock->iteratorTo(ldClsCns));
if (nextIt == mainBlock->end()) return false;
auto const checkInit = &*nextIt;
if (checkInit->op() != CheckInit) return false;
auto const exit = checkInit->taken();
if (exit->numPreds() != 1) return false;
FTRACE(5, "candidate: {}\n", ldClsCns->toString());
// Change the LdClsCns to do the check on the more refined type,
// exiting to the trace we would've exited to, and get rid of the
// CheckInit.
checkInit->setTaken(nullptr);
mainBlock->erase(mainBlock->iteratorTo(checkInit));
ldClsCns->setTaken(exit);
ldClsCns->setTypeParam(checkType->typeParam());
// We don't need the checkType anymore.
irFactory->replace(
checkType,
Mov,
ldClsCns->dst()
);
return true;
};
/*
@@ -160,6 +201,7 @@ void optimizePredictions(Trace* const trace, IRFactory* const irFactory) {
* visiting.
*/
if (!trace->isMain()) return;
bool needsReflow = false;
for (Block* b : trace->blocks()) {
IRInstruction* lastMarker = nullptr;
for (auto& inst : *b) {
@@ -171,11 +213,19 @@ void optimizePredictions(Trace* const trace, IRFactory* const irFactory) {
if (inst.op() == CheckType &&
inst.src(0)->type().equals(Type::Cell)) {
assert(lastMarker);
optLdMem(&inst, lastMarker);
break;
if (optLdMem(&inst, lastMarker) ||
optLdClsCns(&inst, lastMarker)) {
needsReflow = true;
break;
}
}
}
}
if (needsReflow) {
auto& cfg = sortedBlocks();
reflowTypes(cfg.front(), cfg);
}
}
//////////////////////////////////////////////////////////////////////
+2 -2
Ver Arquivo
@@ -2119,8 +2119,8 @@ void HhbcTranslator::VectorTranslator::emitMPost() {
void HhbcTranslator::VectorTranslator::emitSideExits(SSATmp* catchSp,
int nStack) {
const Offset nextOff = m_ht.nextSrcKey().offset();
auto op = m_ni.mInstrOp();
auto const nextOff = m_ht.nextBcOff();
auto const op = m_ni.mInstrOp();
const bool isSetWithRef = op == OpSetWithRefLM || op == OpSetWithRefRM;
if (m_failedSetTrace) {