Kill more of tx64

I started by just killing a few unused flags from
NormalizedInstruction and then deleted things that were even slightly
related (and things that depended on them, etc...).
Esse commit está contido em:
bsimmers
2013-05-30 17:32:45 -07:00
commit de sgolemon
commit 687164d21b
15 arquivos alterados com 132 adições e 3493 exclusões
+1 -1
Ver Arquivo
@@ -27,7 +27,7 @@
#define incl_HPHP_VM_RUNTIME_TRANSLATOR_ABI_X64_H_
#include "hphp/util/asm-x64.h"
#include "hphp/runtime/vm/translator/regalloc.h"
#include "hphp/runtime/vm/translator/physreg.h"
namespace HPHP { namespace Transl {
+2
Ver Arquivo
@@ -17,6 +17,8 @@
#ifndef incl_HPHP_VM_CFG_H_
#define incl_HPHP_VM_CFG_H_
#include <boost/dynamic_bitset.hpp>
#include "hphp/runtime/base/memory/memory_manager.h"
#include "hphp/runtime/vm/translator/hopt/block.h"
#include "hphp/runtime/vm/translator/hopt/trace.h"
@@ -2678,15 +2678,6 @@ static void emitExitNoIRStats(Asm& a,
Transl::CC_None,
true);
}
if (HPHP::Trace::moduleEnabled(HPHP::Trace::punt, 1)) {
auto const op = Op(*func->unit()->at(dest.m_offset));
auto const name = folly::format(
"exitSlow-{}",
opcodeToName(op)
).str();
tx64->emitRecordPunt(a, name);
}
}
void CodeGenerator::cgReqBindJmpNoIR(IRInstruction* inst) {
@@ -967,6 +967,29 @@ void HhbcTranslator::emitCIterFree(uint32_t iterId) {
gen(CIterFree, IterId(iterId), m_tb->getFp());
}
typedef std::map<int, int> ContParamMap;
/*
* mapContParams determines if every named local in origFunc has a
* corresponding named local in genFunc. If this step succeeds and
* there's no VarEnv at runtime, the continuation's variables can be
* filled completely inline in the TC (assuming there aren't too
* many).
*/
static
bool mapContParams(ContParamMap& map,
const Func* origFunc, const Func* genFunc) {
const StringData* const* varNames = origFunc->localNames();
for (Id i = 0; i < origFunc->numNamedLocals(); ++i) {
Id id = genFunc->lookupVarId(varNames[i]);
if (id != kInvalidId) {
map[i] = id;
} else {
return false;
}
}
return true;
}
void HhbcTranslator::emitCreateCont(bool getArgs,
Id funNameStrId) {
gen(ExitOnVarEnv, getExitSlowTrace()->front(), m_tb->getFp());
@@ -990,9 +1013,9 @@ void HhbcTranslator::emitCreateCont(bool getArgs,
cns(genFunc)
);
TranslatorX64::ContParamMap params;
ContParamMap params;
if (origLocals <= TranslatorX64::kMaxInlineContLocals &&
TranslatorX64::mapContParams(params, origFunc, genFunc)) {
mapContParams(params, origFunc, genFunc)) {
static auto const thisStr = StringData::GetStaticString("this");
Id thisId = kInvalidId;
const bool fillThis = origFunc->isNonClosureMethod() &&
@@ -1868,7 +1868,6 @@ TranslatorX64::irTranslateTracelet(Tracelet& t,
}
} catch (JIT::FailedCodeGen& fcg) {
transResult = Failure;
if (Trace::moduleEnabled(Trace::punt, 1)) m_lastHHIRPunt = fcg.func;
TRACE(1, "HHIR: FAILED to generate code for Translation %d "
"@ %s:%d (%s)\n", getCurrentTransID(),
fcg.file, fcg.line, fcg.func);
@@ -1877,7 +1876,6 @@ TranslatorX64::irTranslateTracelet(Tracelet& t,
fcg.file, fcg.line, fcg.func);
} catch (JIT::FailedIRGen& x) {
transResult = Failure;
if (Trace::moduleEnabled(Trace::punt, 1)) m_lastHHIRPunt = x.func;
TRACE(1, "HHIR: FAILED to translate @ %s:%d (%s)\n",
x.file, x.line, x.func);
} catch (TranslationFailedExc& tfe) {
+1
Ver Arquivo
@@ -22,6 +22,7 @@
#include "hphp/runtime/vm/translator/hopt/ir.h"
#include "hphp/runtime/vm/translator/hopt/irinstruction.h"
#include "hphp/runtime/vm/translator/hopt/ssatmp.h"
#include "hphp/runtime/vm/translator/translator.h"
using namespace HPHP::Transl;
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff
-474
Ver Arquivo
@@ -1,474 +0,0 @@
/*
+----------------------------------------------------------------------+
| HipHop for PHP |
+----------------------------------------------------------------------+
| Copyright (c) 2010- Facebook, Inc. (http://www.facebook.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
*/
#ifndef incl_HPHP_REG_ALLOC_H_
#define incl_HPHP_REG_ALLOC_H_
#include <boost/noncopyable.hpp>
#include "hphp/util/trace.h"
#include "hphp/util/asm-x64.h"
#include "hphp/runtime/vm/translator/translator.h"
#include "hphp/runtime/vm/translator/physreg.h"
namespace HPHP { namespace Transl {
// Assumption: the set interfaces are limited to the first 64 registers.
static const int kMaxRegs = 64;
/*
* We take a "virtual memory" approach to register allocation. The virtual
* registers are PHP Location's, and the physical registers are
* an opaque collection of integers that presumably map to machine regs.
* We assume that the machine has enough physical registers to get through
* a single normalized instruction with all inputs and outputs in
* registers.
*
* By analogy with page replacement, we do LRU to replace physical
* registers. While this is an imperfect analogy, it provides constant
* overheads per register, and a guarantee that any runnable code will
* actually be able to allocate registers.
*/
struct RegContent {
enum Kind {
Invalid,
Loc,
Int
} m_kind;
int64_t m_int;
Location m_loc;
explicit RegContent(const Location &loc, int64_t intval = 0)
: m_kind(Loc), m_int(intval), m_loc(loc) {}
explicit RegContent(int64_t _m_int)
: m_kind(Int)
, m_int(_m_int)
, m_loc(Location())
{}
RegContent() : m_kind(Invalid), m_int(0), m_loc(Location()) { }
bool isInt() const {
return m_kind == Int;
}
bool isLoc() const {
return m_kind == Loc;
}
bool isValid() const {
return m_kind == Int || m_kind == Loc;
}
bool isInvalid() const {
return !isValid();
}
bool isUnreachableStack(int firstUnreachable) const {
return isLoc() && m_loc.isStack() && m_loc.offset >= firstUnreachable;
}
int cmp(const RegContent &other) const {
if (m_kind != other.m_kind) {
return m_kind - other.m_kind;
}
if (m_kind == Int) {
if (m_int == other.m_int) {
return 0;
}
return m_int > other.m_int ? 1 : -1;
}
assert(m_kind == Loc);
return m_loc.cmp(other.m_loc);
}
bool operator==(const RegContent &other) const {
return cmp(other) == 0;
}
bool operator!=(const RegContent &other) const {
return cmp(other) != 0;
}
const char *kindStr() const {
switch (m_kind) {
case Int : return "Int";
case Loc : return "Loc";
default : return "Invalid";
}
}
std::string pretty() const;
// Hash function
size_t operator()(const RegContent& cont) const {
return HPHP::hash_int64_pair(
cont.m_kind, cont.isInt() ? cont.m_int : cont.m_loc(cont.m_loc));
}
};
/*
* In the virtual memory analogy, a RegInfo is the PTE: it contains
* the v2p mapping and dirty bit.
*/
struct RegInfo {
#define REGSTATES \
REGSTATE(INVALID) \
REGSTATE(FREE) REGSTATE(CLEAN) REGSTATE(SCRATCH) REGSTATE(DIRTY)
enum State {
#define REGSTATE(x) x,
// Order is meaningful here; we use <, > to test "strength" of state
REGSTATES
#undef REGSTATE
};
RegContent m_cont;
uint64_t m_epoch;
PhysReg m_pReg;
State m_state;
DataType m_type;
std::string pretty() const {
const char* names[] = {
#define REGSTATE(x) #x,
REGSTATES
#undef REGSTATE
};
char buf[1024];
sprintf(buf, "Reg:%02d:%s:%lld:Type:%d",
int(m_pReg), names[m_state], static_cast<long long>(m_epoch),
m_type);
return Trace::prettyNode(buf, m_cont);
}
RegInfo() : m_cont(), m_state(FREE) { }
#undef REGSTATES
};
/**
* SpillFill -- machine-specific details about how to spill and fill
* registers in given virtual location. Should probably be a template
* parameter, but I'd like to not waste my short remaining years of health
* recompiling.
*/
class SpillFill {
public:
virtual ~SpillFill() { }
virtual void spill(const Location& loc, DataType t, PhysReg reg,
bool writeType) = 0;
virtual void fill(const Location& loc, PhysReg reg) = 0;
virtual void fillByMov(PhysReg src, PhysReg dst) = 0;
virtual void loadImm(int64_t immVal, PhysReg reg) = 0;
virtual void poison(PhysReg reg) = 0;
};
class LazyScratchReg;
class RegAlloc {
friend class LazyScratchReg;
// RegInfo: indexed by PhysReg.
RegInfo m_info[kMaxRegs];
// Secondary indices on m_info.
RegSet m_callerSaved; // Good short-lived regs
RegSet m_calleeSaved; // Good long-lived regs
int m_numRegs; // Number of real registers, <= kMaxRegs
RegSet m_allRegs;
PhysReg m_lru[kMaxRegs]; // lru order over registers
typedef hphp_hash_map<RegContent, PhysReg, RegContent> ContToRegMap;
ContToRegMap m_contToRegMap; // Content -> PhysReg
SpillFill* m_spf;
mutable int m_freezeCount; // support immutability
uint64_t m_epoch;
bool m_branchSynced;
RegInfo* alloc(const Location& loc, DataType t, RegInfo::State state,
bool needsFill, int64_t immVal = 0, PhysReg target = InvalidReg);
RegInfo* findFreeReg(const Location& loc);
void assignRegInfo(RegInfo *regInfo, const RegContent &cont,
RegInfo::State state, DataType type);
void stateTransition(RegInfo* r, RegInfo::State to);
static bool isValidReg(PhysReg pr) {
return int(pr) >= 0 && int(pr) < kMaxRegs;
}
// lru operations are O(numRegs), but numRegs is small.
void lruFront(RegInfo *r);
void lruBack(RegInfo* r);
RegInfo *physRegToInfo(PhysReg pr) const;
void freeRegInfo(RegInfo* ri);
void spill(RegInfo *toSpill);
void trace();
void verify();
void smashRegImpl(RegInfo *r);
void reconcileOne(RegInfo* r, RegAlloc* branchRA, PhysReg branchPR);
bool checkNoScratch();
PhysReg allocScratchReg(PhysReg pr = InvalidReg);
void freeScratchReg(PhysReg r);
public:
RegAlloc(RegSet callerSaved, RegSet calleeSaved, SpillFill* spf);
RegAlloc(const RegAlloc& rhs) {
*this = rhs; // operator= invocation
}
// allocReg: allocate a single operand
PhysReg allocReg(const Location& loc, DataType t, RegInfo::State state) {
RegInfo* ri = alloc(loc, t, state, state == RegInfo::CLEAN);
return ri->m_pReg;
}
void setBranchSynced() {
assert(!m_branchSynced);
m_branchSynced = true;
}
bool branchSynced() {
return m_branchSynced;
}
void assertNoScratch() { assert(checkNoScratch()); }
const RegInfo* getInfo(PhysReg pr) const {
return physRegToInfo(pr);
}
bool regIsDirty(PhysReg pr) const {
return getInfo(pr)->m_state == RegInfo::DIRTY;
}
bool regIsClean(PhysReg pr) const {
return getInfo(pr)->m_state == RegInfo::CLEAN;
}
bool regIsFree(PhysReg pr) const {
return getInfo(pr)->m_state == RegInfo::FREE;
}
void assertRegIsFree(PhysReg pr) const {
if (debug && !regIsFree(pr)) {
std::cerr << getInfo(pr)->pretty() << std::endl;
always_assert(false && "Expected register to be free");
}
}
DataType regType(PhysReg pr) const {
return getInfo(pr)->m_type;
}
void setRegType(PhysReg pr, DataType type) const {
physRegToInfo(pr)->m_type = type;
}
Location regLoc(PhysReg pr) const {
const RegInfo* info = getInfo(pr);
return info->m_cont.isLoc() ? info->m_cont.m_loc : Location();
}
// allocInputRegs: given an instruction, find/fill its inputs.
void allocInputReg(const DynLocation& dl, PhysReg target = InvalidReg);
void allocInputReg(const NormalizedInstruction& ni, int index,
PhysReg target = InvalidReg);
void allocInputRegs(const NormalizedInstruction& ni);
// allocOutputRegs: destructively mark output registers. Should only
// be done when we know that the code we're emitting will drive valid
// values into these outputs.
void allocOutputRegs(const NormalizedInstruction& ni);
void bind(PhysReg reg, const Location& loc, DataType t,
RegInfo::State state);
void bindScratch(LazyScratchReg& reg, const Location& loc, DataType t,
RegInfo::State state);
void markAsClean(const Location& loc);
/*
* Invalidating a location means to drop any register mapped to that
* location down to FREE state, regardless of the current state.
*
* (This differs from smashing a location in that it doesn't require
* that the register is not DIRTY.)
*/
void invalidate(const Location& loc);
void invalidateLocals(int first, int last);
bool hasReg(const Location &loc) const;
RegSet getRegsLike(RegInfo::State state) const;
bool hasDirtyRegs(int firstUnreachableStk) const;
PhysReg getReg(const Location &loc);
/*
* Returns a PhysReg containing the given immediate value (immVal),
* or InvalidReg if this could not be accomplished.
*
* If a physical register already contains `immVal', this function
* returns it. Otherwise, if `allowAllocate' is true and this
* register allocator is not frozen, a free register, if available,
* will be allocated and set to this value.
*
* Otherwise InvalidReg is returned.
*/
PhysReg getImmReg(int64_t immVal, bool allowAllocate = true);
/*
* Reset the register mapping to an empty state, epoch zero.
*
* Post: pristine() == true
*/
void reset();
/*
* Indicates whether the register map is in its initial, empty
* state. That is, empty() == true and bumpEpoch has not been
* called since the last time reset() was called.
*/
bool pristine() const;
/*
* Returns true if this RegMap has no non-FREE registers in it.
*/
bool empty() const;
/*
* Clean any dirty registers from various sets.
*
* For these functions, only registers in the DIRTY state are
* cleaned and transitioned to the CLEAN state. Scratch registers
* remain in scratch state.
*/
void cleanAll();
void cleanRegs(RegSet regsToPurge);
void cleanLoc(const Location& loc);
void cleanLocals();
void cleanReg(PhysReg reg);
/*
* Forget the mapping for all registers in the set. The regs must
* not be DIRTY (if you want to forget a dirty register without
* spilling, scrub it first).
*/
void smashRegs(RegSet smashedRegs);
void smashReg(PhysReg pr);
void smashLoc(const Location& loc);
/*
* Forget a mapping for a register, after cleaning it if it is DIRTY.
*
* This is equivalent to calling clean followed by smash.
*/
void cleanSmashRegs(RegSet set);
void cleanSmashReg(PhysReg pr);
void cleanSmashLoc(const Location& loc);
/*
* Scrubbing a register means to change it to the CLEAN state,
* regardless of whether we've actually spilled its contents to
* memory. These functions may not be called for a scratch
* register---it must be a program location---but it is legal to
* scrub an already-free register.
*
* This is often going to be followed by smashing the register.
* Special functions help for the case of dealing with discarding
* dead execution stack locations, since that's usually what this is
* about.
*/
void scrubStackEntries(int firstUnreachable);
void scrubStackRange(int firstToDiscard, int lastToDiscard);
void scrubReg(PhysReg pr);
void scrubRegs(RegSet regs);
void scrubLoc(const Location&);
void killImms(RegSet imms);
void swapRegisters(PhysReg r1, PhysReg r2);
/*
* Emit spills and fills in order to bring the `branch' state into
* the state of *this.
*
* This reconcile method is not symmetric: it will only emit
* spills/fills to the branch. In particular, this means that
* whatever is happening on the other side of branch.m_spf-> needs
* to be aware of the branch register state. Specifically: this
* means tx64->m_regMap must be the *branch* register map during
* reconciliation, because spill() may try to use/load immediate
* registers.
*/
void reconcile(RegAlloc& branch);
void freeze() const { m_freezeCount++; }
void defrost() const { m_freezeCount--; assert(m_freezeCount >= 0); }
bool frozen() const { return m_freezeCount > 0; }
void bumpEpoch() { m_epoch++; }
std::string pretty() const;
};
// RAII ScratchReg holder:
// LazyScratchReg r(m_regMap);
// ...
// r.alloc();
// ... neg_reg64(*r);
//
// ScratchReg r(m_regMap);
// .. neg_reg64(*r);
class LazyScratchReg : boost::noncopyable {
protected:
RegAlloc& m_regMap;
PhysReg m_reg;
public:
explicit LazyScratchReg(RegAlloc& regMap);
~LazyScratchReg();
bool isAllocated() const { return m_reg != reg::noreg; }
void alloc(PhysReg pr = InvalidReg);
void dealloc();
void realloc(PhysReg pr = InvalidReg);
friend PhysReg r(const LazyScratchReg& l) { return l.m_reg; }
friend Reg8 rbyte(const LazyScratchReg& l) { return rbyte(l.m_reg); }
friend Reg32 r32(const LazyScratchReg& l) { return r32(l.m_reg); }
friend Reg64 r64(const LazyScratchReg& l) { return r64(l.m_reg); }
};
class ScratchReg : public LazyScratchReg {
public:
explicit ScratchReg(RegAlloc& regMap);
// Use this constructor to reserve an already-selected register, which
// must be free.
ScratchReg(RegAlloc& regMap, PhysReg pr);
};
/*
* DumbScratch allocates a register out of a RegSet, putting it back
* when it's done. This is used for very simple register selection
* when we don't want to use the whole register allocator
* (e.g. between tracelets).
*
* Since this thing is dumb, there's no recourse if the set has no
* registers available. This thing will assert, then throw, in that
* case.
*/
struct DumbScratchReg : private boost::noncopyable {
explicit DumbScratchReg(RegSet& allocSet);
~DumbScratchReg();
friend PhysReg r(const DumbScratchReg& d) { return d.m_reg; }
friend Reg32 r32(const DumbScratchReg& d) { return r32(d.m_reg); }
friend Reg64 r64(const DumbScratchReg& d) { return r64(d.m_reg); }
private:
RegSet& m_regPool;
const PhysReg m_reg;
};
} } // HPHP::Transl
#endif /* incl_HPHP_REG_ALLOC_H_ */
@@ -39,334 +39,6 @@ void ifThen(Transl::X64Assembler& a, ConditionCode cc, Then thenBlock) {
// RAII aids to machine code.
// Put FreezeRegs in a scope around any emit calls where the caller needs
// to be sure the callee will not modify the state of the register
// map. (Arises in situations with conditional code often.)
struct FreezeRegs : private boost::noncopyable {
explicit FreezeRegs(RegAlloc& regs) : m_regs(regs) { m_regs.freeze(); }
~FreezeRegs() { m_regs.defrost(); }
private:
RegAlloc& m_regs;
};
class RedirectSpillFill : boost::noncopyable {
X64Assembler* const m_oldSpf;
public:
explicit RedirectSpillFill(X64Assembler* newCode)
: m_oldSpf(tx64->m_spillFillCode)
{
tx64->m_spillFillCode = newCode;
}
~RedirectSpillFill() {
tx64->m_spillFillCode = m_oldSpf;
}
};
// DiamondGuard is a scoped way to protect register allocator state around
// control flow. When we enter some optional code that may affect the state
// of the register file, we copy the register file's state, and redirect any
// spills and fills to the branch's body.
//
// When we're ready to rejoin the main control flow, we bring the registers
// back into the state they were in, and restore the old spill/fill
// destinations.
class DiamondGuard : boost::noncopyable {
RedirectSpillFill m_spfChanger;
public:
explicit DiamondGuard(X64Assembler& a) : m_spfChanger(&a) {
tx64->m_savedRegMaps.push(
TranslatorX64::SavedRegState(this, tx64->m_regMap));
}
~DiamondGuard() {
assert(!tx64->m_savedRegMaps.empty());
assert(tx64->m_savedRegMaps.top().saver == this);
// Bring the register state back to its state in the main body.
//
// Note: it's important that tx64->m_regMap is the branch
// RegAlloc during this. See RegAlloc::reconcile.
tx64->m_savedRegMaps.top().savedState.reconcile(tx64->m_regMap);
tx64->m_regMap = tx64->m_savedRegMaps.top().savedState;
tx64->m_savedRegMaps.pop();
}
};
// Helper for use with UnlikelyIfBlock when you have a complex else
// branch that needs to make changes to the register file.
//
// For an example usage see UnlikelyIfBlock.
class DiamondReturn : boost::noncopyable {
X64Assembler* m_branchA;
X64Assembler* m_mainA;
TCA m_branchJmp;
TCA m_finishBranchFrontier;
private:
friend class UnlikelyIfBlock;
void initBranch(X64Assembler* branchA, X64Assembler* mainA) {
/*
* DiamondReturn must be used with branches going to different
* code regions.
*/
assert(branchA != mainA);
m_branchA = branchA;
m_mainA = mainA;
tx64->m_savedRegMaps.push(
TranslatorX64::SavedRegState(this, tx64->m_regMap));
}
void finishBranch(TCA jmp, TCA frontier) {
m_branchJmp = jmp;
m_finishBranchFrontier = frontier;
// If there's some reason to do something other than this we have
// to change the way this class works.
const int UNUSED kJumpSize = 5;
assert(m_finishBranchFrontier == m_branchJmp + kJumpSize);
// We're done with the branch, so save the branch's state and
// switch back to the main line's state.
swapRegMaps();
}
bool finishedBranch() const { return m_branchJmp != 0; }
void swapRegMaps() {
assert(!tx64->m_savedRegMaps.empty());
assert(tx64->m_savedRegMaps.top().saver == this);
std::swap(tx64->m_savedRegMaps.top().savedState, tx64->m_regMap);
}
void emitReconciliation() {
assert(!tx64->m_savedRegMaps.empty());
assert(tx64->m_savedRegMaps.top().saver == this);
RedirectSpillFill spfRedir(m_branchA);
if (finishedBranch()) {
// We need tx64->m_regMap to point at the branch during
// reconciliation. See RegAlloc::reconcile().
swapRegMaps();
}
RegAlloc& branchState = tx64->m_regMap;
RegAlloc& currentState = tx64->m_savedRegMaps.top().savedState;
currentState.reconcile(branchState);
tx64->m_regMap = currentState;
tx64->m_savedRegMaps.pop();
}
public:
explicit DiamondReturn()
: m_branchA(0)
, m_branchJmp(0)
{}
void kill() {
m_mainA = nullptr;
}
~DiamondReturn() {
assert(m_branchA &&
"DiamondReturn was created without being passed to UnlikelyIfBlock");
if (!m_mainA) {
/*
* We were killed. eg the UnlikelyIfBlock took a side exit, so
* no reconciliation/branch back to a is required.
*/
return;
}
if (!finishedBranch()) {
/*
* In this case, we're reconciling the branch even though it
* isn't really finished (no one ever called finishBranch()), so
* we just need to emit spills/fills now and not be as clever as
* below. See UnlikelyIfBlock::reconcileEarly.
*/
emitReconciliation();
return;
}
const TCA currentBranchFrontier = m_branchA->code.frontier;
const bool branchFrontierMoved =
currentBranchFrontier != m_finishBranchFrontier;
/*
* If the branch frontier hasn't moved since the branch was
* finished, we don't need the jmp that was already emitted
* anymore, so rewind so we can potentially overwrite it with
* spills/fills.
*/
if (!branchFrontierMoved) {
m_branchA->code.frontier = m_branchJmp;
}
// Send out reconciliation code to the branch area. We want to
// bring the state of the branch's register file into sync with
// the main-line.
const TCA spfStart = m_branchA->code.frontier;
emitReconciliation();
const bool hadAnySpf = spfStart != m_branchA->code.frontier;
if (branchFrontierMoved) {
/*
* In this case, more than one DiamondReturn is being used and
* there are multiple unlikely branches.
*
* If there was no reconciliation code it's not big deal, we'll
* just patch the existing jmp to go to the return in m_mainA.
* But if we needed reconciliation code, we'll instead want to
* patch it to jump there.
*/
if (hadAnySpf) {
m_branchA->patchJmp(m_branchJmp, spfStart);
m_branchA->jmp(m_mainA->code.frontier);
} else {
m_branchA->patchJmp(m_branchJmp, m_mainA->code.frontier);
}
} else {
assert(spfStart == m_branchJmp);
m_branchA->jmp(m_mainA->code.frontier);
}
}
};
// Code to profile how often our UnlikelyIfBlock branches are taken in
// practice. Enable with TRACE=unlikely:1
struct JmpHitRate {
litstr key;
uint64_t check;
uint64_t take;
JmpHitRate() : key(nullptr), check(0), take(0) {}
float rate() const {
return 100.0 * take / check;
}
bool operator<(const JmpHitRate& b) const {
return rate() > b.rate();
}
};
typedef hphp_hash_map<litstr, JmpHitRate, pointer_hash<const char>> JmpHitMap;
extern __thread JmpHitMap* tl_unlikelyHits;
extern __thread JmpHitMap* tl_jccHits;
template<Trace::Module mod>
static void recordJmpProfile(litstr key, int64_t take) {
JmpHitMap& map = mod == Trace::unlikely ? *tl_unlikelyHits : *tl_jccHits;
JmpHitRate& r = map[key];
r.key = key;
r.check++;
if (take) r.take++;
}
template<Trace::Module mod>
void emitJmpProfile(X64Assembler& a, ConditionCode cc) {
using namespace reg;
if (!Trace::moduleEnabledRelease(mod)) return;
const ssize_t sz = 1024;
char key[sz];
// Clean up filename
std::string file =
boost::filesystem::path(tx64->m_curFile).filename().string();
// Get instruction if wanted
const NormalizedInstruction* ni = tx64->m_curNI;
std::string inst;
if (Trace::moduleEnabledRelease(mod, 2)) {
inst = std::string(", ") + (ni ? opcodeToName(ni->op()) : "<none>");
}
const char* fmt = Trace::moduleEnabledRelease(mod, 3) ?
"%-25s:%-5d, %-28s%s" :
"%-25s:%-5d (%-28s%s)";
if (snprintf(key, sz, fmt,
file.c_str(), tx64->m_curLine, tx64->m_curFunc,
inst.c_str()) >= sz) {
key[sz-1] = '\0';
}
litstr data = StringData::GetStaticString(key)->data();
RegSet allRegs = kAllX64Regs;
allRegs.remove(rsi);
a. pushf ();
a. push (rsi);
a. setcc (cc, sil);
a. movzbl (sil, esi);
{
PhysRegSaver regs(a, allRegs);
a.emitImmReg((intptr_t)data, rdi);
if (false) {
recordJmpProfile<mod>("", 0);
}
a.call ((TCA)recordJmpProfile<mod>);
}
a. pop (rsi);
a. popf ();
}
inline void initJmpProfile() {
if (Trace::moduleEnabledRelease(Trace::unlikely)) {
tl_unlikelyHits = new JmpHitMap();
}
if (Trace::moduleEnabledRelease(Trace::jcc)) {
tl_jccHits = new JmpHitMap();
}
}
inline void dumpProfileImpl(Trace::Module mod) {
JmpHitMap*& table = mod == Trace::jcc ? tl_jccHits : tl_unlikelyHits;
if (!table) return;
std::vector<JmpHitRate> hits;
JmpHitRate overall;
overall.key = "total";
for (auto& item : *table) {
overall.check += item.second.check;
overall.take += item.second.take;
hits.push_back(item.second);
}
if (hits.empty()) return;
auto cmp = [&](const JmpHitRate& a, const JmpHitRate& b) {
return a.take > b.take ? true
: a.take == b.take ? a.check > b.check
: false;
};
std::sort(hits.begin(), hits.end(), cmp);
Trace::traceRelease("%s hit rates for %s:\n",
mod == Trace::jcc ? "JccBlock" : "UnlikelyIfBlock",
g_context->getRequestUrl(50).c_str());
const char* fmt = Trace::moduleEnabledRelease(mod, 3) ?
"%6.2f, %8llu, %8llu, %5.1f, %s\n" :
"%6.2f%% (%8llu / %8llu, %5.1f%% of total): %s\n";
auto printRate = [&](const JmpHitRate& hr) {
Trace::traceRelease(fmt,
hr.rate(), hr.take, hr.check, hr.key,
100.0 * hr.take / overall.take);
};
printRate(overall);
std::for_each(hits.begin(), hits.end(), printRate);
Trace::traceRelease("\n");
delete table;
table = nullptr;
}
inline void dumpJmpProfile() {
if (!Trace::moduleEnabledRelease(Trace::unlikely) &&
!Trace::moduleEnabledRelease(Trace::jcc)) {
return;
}
dumpProfileImpl(Trace::unlikely);
dumpProfileImpl(Trace::jcc);
}
// UnlikelyIfBlock:
//
// Branch to distant code (that we presumably don't expect to
@@ -408,59 +80,21 @@ struct UnlikelyIfBlock {
X64Assembler& m_unlikely;
TCA m_likelyPostBranch;
DiamondReturn* m_returnDiamond;
bool m_externalDiamond;
boost::optional<FreezeRegs> m_ice;
explicit UnlikelyIfBlock(ConditionCode cc,
X64Assembler& likely,
X64Assembler& unlikely,
DiamondReturn* returnDiamond = 0)
X64Assembler& unlikely)
: m_likely(likely)
, m_unlikely(unlikely)
, m_returnDiamond(returnDiamond ? returnDiamond : new DiamondReturn())
, m_externalDiamond(!!returnDiamond)
{
emitJmpProfile<Trace::unlikely>(m_likely, cc);
m_likely.jcc(cc, m_unlikely.code.frontier);
m_likelyPostBranch = m_likely.code.frontier;
m_returnDiamond->initBranch(&unlikely, &likely);
tx64->m_spillFillCode = &unlikely;
}
~UnlikelyIfBlock() {
TCA jmpAddr = m_unlikely.code.frontier;
m_unlikely.jmp(m_likelyPostBranch);
if (m_returnDiamond) {
m_returnDiamond->finishBranch(jmpAddr, m_unlikely.code.frontier);
if (!m_externalDiamond) {
delete m_returnDiamond;
}
}
tx64->m_spillFillCode = &m_likely;
}
/*
* Force early reconciliation between the branch and main line.
* Using this has some tricky cases, part of which is that you can't
* allocate registers anymore in the branch if you do this (so we'll
* freeze until ~UnlikelyIfBlock).
*
* It's also almost certainly error if we have m_externalDiamond, so
* for now that's an assert.
*/
void reconcileEarly() {
assert(!m_externalDiamond);
delete m_returnDiamond;
m_returnDiamond = 0;
m_ice = boost::in_place<FreezeRegs>(boost::ref(tx64->m_regMap));
}
};
#define UnlikelyIfBlock \
m_curFile = __FILE__; m_curFunc = __FUNCTION__; m_curLine = __LINE__; \
UnlikelyIfBlock
// Helper structs for jcc vs. jcc8.
struct Jcc8 {
static void branch(X64Assembler& a, ConditionCode cc, TCA dest) {
@@ -487,19 +121,15 @@ template <ConditionCode Jcc, typename J=Jcc8>
struct JccBlock {
mutable X64Assembler* m_a;
TCA m_jcc;
mutable DiamondGuard* m_dg;
explicit JccBlock(X64Assembler& a)
: m_a(&a),
m_dg(new DiamondGuard(a)) {
emitJmpProfile<Trace::jcc>(a, Jcc);
: m_a(&a) {
m_jcc = a.code.frontier;
J::branch(a, Jcc, m_a->code.frontier);
}
~JccBlock() {
if (m_a) {
delete m_dg;
J::patch(*m_a, m_jcc, m_a->code.frontier);
}
}
@@ -509,54 +139,13 @@ private:
JccBlock& operator=(const JccBlock&);
};
#define JccBlock \
m_curFile = __FILE__; m_curFunc = __FUNCTION__; m_curLine = __LINE__; \
JccBlock
template<class Lambda>
void guardDiamond(X64Assembler& a, Lambda body) {
DiamondGuard dg(a);
body();
}
template<ConditionCode Jcc, class Lambda>
void jccBlock(X64Assembler& a, Lambda body) {
Label exit;
guardDiamond(a, [&] {
exit.jcc8(a, Jcc);
body();
});
asm_label(a, exit);
exit.jcc8(a, Jcc);
body();
asm_label(a, exit);
}
/*
* semiLikelyIfBlock is a conditional block of code that is expected
* to be unlikely, but not so unlikely that we should shove it into
* astubs.
*
* Usage example:
*
* a. test_reg64_reg64(*rFoo, *rFoo);
* semiLikelyIfBlock(CC_Z, a, [&]{
* EMIT_CALL(a, some_helper);
* emitMovRegReg(a, rax, *rFoo);
* });
*/
template<class Lambda>
void semiLikelyIfBlock(ConditionCode Jcc, X64Assembler& a, Lambda body) {
Label likely;
Label unlikely;
guardDiamond(a, [&] {
unlikely.jcc8(a, Jcc);
likely.jmp8(a);
asm_label(a, unlikely);
body();
});
asm_label(a, likely);
}
// A CondBlock is an RAII structure for emitting conditional code. It
// compares the source register at fieldOffset with fieldValue, and
// conditionally branches over the enclosing block of assembly on the
@@ -578,10 +167,10 @@ struct CondBlock {
X64Assembler& m_a;
int m_off;
TCA m_jcc8;
DiamondGuard* m_dg;
CondBlock(X64Assembler& a, PhysReg reg, int offset = 0)
: m_a(a), m_off(offset), m_dg(new DiamondGuard(a)) {
: m_a(a)
, m_off(offset) {
int typeDisp = m_off + FieldOffset;
static_assert(sizeof(FieldType) == 1 || sizeof(FieldType) == 4,
"CondBlock of unimplemented field size");
@@ -596,7 +185,6 @@ struct CondBlock {
}
~CondBlock() {
delete m_dg;
m_a.patchJcc8(m_jcc8, m_a.code.frontier);
}
};
@@ -641,33 +229,6 @@ locToRegDisp(const Location& l, PhysReg *outbase, int *outdisp,
// Common code emission patterns.
// emitStoreImm --
// Try to use a nice encoding for the size and value.
static void
emitStoreImm(X64Assembler& a, uint64_t imm, PhysReg r, int off,
int size = sz::qword, RegAlloc* regAlloc = nullptr) {
using namespace reg;
if (size == sz::qword) {
PhysReg immReg = regAlloc ? regAlloc->getImmReg(imm) : InvalidReg;
if (immReg == InvalidReg) {
if (deltaFits(imm, sz::dword)) {
a. store_imm64_disp_reg64(imm, off, r);
return;
}
emitImmReg(a, imm, rAsm);
immReg = rAsm;
}
a. store_reg64_disp_reg64(immReg, off, r);
} else if (size == sz::dword) {
a. store_imm32_disp_reg(imm, off, r);
} else if (size == sz::byte) {
a. store_imm8_disp_reg(imm, off, r);
} else {
not_implemented();
}
}
// vstackOffset --
// emitVStackStore --
//
@@ -679,15 +240,7 @@ emitStoreImm(X64Assembler& a, uint64_t imm, PhysReg r, int off,
// to be a hardware size: 1, 2, 4, or 8 bytes.
static inline int
vstackOffset(const NormalizedInstruction& ni, COff off) {
return off - cellsToBytes(ni.stackOff);
}
static inline void
emitVStackStoreImm(X64Assembler &a, const NormalizedInstruction &ni,
uint64_t imm, int off, int size = sz::qword,
RegAlloc *regAlloc = nullptr) {
int hwOff = vstackOffset(ni, off);
emitStoreImm(a, imm, rVmSp, hwOff, size, regAlloc);
not_reached();
}
static inline void
@@ -910,107 +463,6 @@ inline void emitCopyToAligned(X64Assembler& a,
a. movdqa (xmm0, dest[destOff]);
}
// ArgManager -- support for passing VM-level data to helper functions.
class ArgManager {
typedef HPHP::Transl::X64Assembler& A;
public:
ArgManager(TranslatorX64 &tx64, A& a) : m_tx64(tx64), m_a(a) { }
void addImm(uint64_t imm);
void addLoc(const Location &loc);
void addLocRef(const Location &loc);
void addDeref(const Location &loc);
void addReg(PhysReg reg);
void addRegPlus(PhysReg reg, int32_t off);
void addReg(const LazyScratchReg& l) { addReg(r(l)); }
void addRegPlus(const LazyScratchReg& l, int32_t off) {
addRegPlus(r(l), off);
}
void addLocAddr(const Location &loc);
void emitArguments() {
size_t n = m_args.size();
assert((int)n <= kNumRegisterArgs);
cleanLocs();
std::map<PhysReg, size_t> used;
std::vector<PhysReg> actual(n, InvalidReg);
computeUsed(used, actual);
shuffleRegisters(used, actual);
emitValues(actual);
}
private:
struct ArgContent {
enum ArgKind {
ArgImm, ArgLoc, ArgLocRef, ArgDeref, ArgReg, ArgRegPlus, ArgLocAddr
} m_kind;
PhysReg m_reg;
const Location *m_loc;
uint64_t m_imm;
ArgContent(ArgKind kind, PhysReg reg, uint64_t imm) :
m_kind(kind), m_reg(reg), m_loc(nullptr), m_imm(imm) { }
ArgContent(ArgKind kind, const Location &loc) :
m_kind(kind), m_reg(InvalidReg), m_loc(&loc), m_imm(0) { }
};
TranslatorX64& m_tx64;
A& m_a;
std::vector<ArgContent> m_args;
ArgManager(); // Don't build without reference to translator
void cleanLocs();
void computeUsed(std::map<PhysReg, size_t> &used,
std::vector<PhysReg> &actual);
void shuffleRegisters(std::map<PhysReg, size_t> &used,
std::vector<PhysReg> &actual);
void emitValues(std::vector<PhysReg> &actual);
};
// Some macros to make writing calls palatable. You have to "type" the
// arguments
#define IMM(i) _am.addImm(i)
#define V(loc) _am.addLoc(loc)
#define VREF(loc) _am.addLocRef(loc)
#define DEREF(loc) _am.addDeref(loc)
#define R(r) _am.addReg(r)
#define RPLUS(r,off) _am.addRegPlus(r, off)
#define A(loc) _am.addLocAddr(loc)
#define IE(cond, argIf, argElse) \
((cond) ? (argIf) : (argElse))
static inline void voidFunc() {}
#define ID(argDbg) IE(debug, (argDbg), voidFunc())
#define EMIT_CALL_PROLOGUE(a) do { \
SpaceRecorder sr("_HCallInclusive", a); \
ArgManager _am(*this, a); \
prepareCallSaveRegs();
#define EMIT_CALL_EPILOGUE(a, dest) \
_am.emitArguments(); \
{ \
SpaceRecorder sr("_HCallExclusive", a); \
emitCall(a, (TCA)(dest), true); \
} \
} while(0)
#define EMIT_CALL(a, dest, ...) \
EMIT_CALL_PROLOGUE(a) \
__VA_ARGS__ ; \
EMIT_CALL_EPILOGUE(a, dest)
#define EMIT_RCALL(a, ni, dest, ...) \
EMIT_CALL(a, dest, __VA_ARGS__); \
recordReentrantCall(a, ni);
// supportedPlan --
// nativePlan --
// simplePlan --
Diferenças do arquivo suprimidas por serem muito extensas Carregar Diff
+3 -151
Ver Arquivo
@@ -25,7 +25,6 @@
#include "hphp/util/asm-x64.h"
#include "hphp/runtime/vm/translator/srcdb.h"
#include "hphp/runtime/vm/translator/unwind-x64.h"
#include "hphp/runtime/vm/translator/regalloc.h"
#include "tbb/concurrent_hash_map.h"
#include "hphp/util/ringbuffer.h"
#include "hphp/runtime/vm/debug/debug.h"
@@ -128,11 +127,9 @@ void prepareForSmash(Asm&, int nBytes, int offset = 0);
bool isSmashable(Address frontier, int nBytes, int offset = 0);
class TranslatorX64 : public Translator
, SpillFill
, boost::noncopyable {
friend class SrcRec; // so it can smash code.
friend class SrcDB; // For write lock and code invalidation.
friend class ArgManager;
friend class WithCounters;
friend class DiamondGuard;
friend class DiamondReturn;
@@ -151,7 +148,6 @@ class TranslatorX64 : public Translator
typedef void (*sigaction_t)(int, siginfo_t*, void*);
typedef X64Assembler Asm;
typedef std::map<int, int> ContParamMap;
static const int kMaxInlineContLocals = 10;
class AHotSelector {
@@ -185,10 +181,6 @@ class TranslatorX64 : public Translator
PointerMap trampolineMap;
int m_numNativeTrampolines;
size_t m_trampolineSize; // size of each trampoline
// spillFillCode points to one of a or astubs. We need it to produce
// reconciliation code to the alternate buffer. Don't directly manipulate;
// use DiamondGuard instead.
Asm* m_spillFillCode;
SrcDB m_srcDB;
SignalStubMap m_segvStubs;
@@ -227,18 +219,6 @@ class TranslatorX64 : public Translator
void hhirTraceFree();
struct SavedRegState {
explicit SavedRegState(void* saver, const RegAlloc& state)
: saver(saver)
, savedState(state)
{}
void* saver; // For debugging: ensure these are popped in the right order
RegAlloc savedState;
};
RegAlloc m_regMap;
std::stack<SavedRegState> m_savedRegMaps;
FixupMap m_fixupMap;
UnwindInfoHandle m_unwindRegistrar;
CatchTraceMap m_catchTraceMap;
@@ -266,33 +246,12 @@ private:
void drawCFG(std::ofstream& out) const;
static vector<PhysReg> x64TranslRegs();
PhysReg getReg(const Location& loc) {
return m_regMap.getReg(loc);
}
PhysReg getReg(const DynLocation& dl) {
return m_regMap.getReg(dl.location);
}
Asm& getAsmFor(TCA addr) { return asmChoose(addr, a, ahot, astubs); }
void emitIncRef(X64Assembler &a, PhysReg base, DataType dtype);
void emitIncRef(PhysReg base, DataType);
void emitIncRefGenericRegSafe(PhysReg base, int disp, PhysReg tmp);
void emitIncRefGeneric(PhysReg base, int disp = 0);
void emitDecRef(Asm& a, const NormalizedInstruction& i, PhysReg rDatum,
DataType type);
void emitDecRef(const NormalizedInstruction& i, PhysReg rDatum,
DataType type);
void emitDecRefGeneric(const NormalizedInstruction& i, PhysReg srcReg,
int disp = 0);
void emitDecRefGenericReg(PhysReg rData, PhysReg rType);
void emitDecRefInput(Asm& a, const NormalizedInstruction& i, int input);
static Call getDtorCall(DataType type);
void emitCopy(PhysReg srcCell, int disp, PhysReg destCell);
void emitCopyToStack(Asm& a,
const NormalizedInstruction& ni,
PhysReg src,
int off);
void emitCopyToStackRegSafe(Asm& a,
const NormalizedInstruction& ni,
PhysReg src,
@@ -300,72 +259,23 @@ private:
PhysReg tmpReg);
void emitThisCheck(const NormalizedInstruction& i, PhysReg reg);
void emitPushAR(const NormalizedInstruction& i, const Func* func,
const int bytesPopped = 0, bool isCtor = false,
bool clearThis = true, uintptr_t varEnvInvName = 0);
void emitCallSaveRegs();
void prepareCallSaveRegs() { not_reached(); }
void emitCallStaticLocHelper(X64Assembler& as,
const NormalizedInstruction& i,
ScratchReg& output,
ptrdiff_t ch);
public:
void emitCall(Asm& a, TCA dest, bool killRegs=false);
void emitCall(Asm& a, Call call, bool killRegs=false);
void emitCall(Asm& a, TCA dest);
void emitCall(Asm& a, Call call);
private:
/* Continuation-related helpers */
static bool mapContParams(ContParamMap& map, const Func* origFunc,
const Func* genFunc);
void emitCallFillCont(Asm& a, const Func* orig, const Func* gen);
void emitCallPack(Asm& a, const NormalizedInstruction& i);
void emitContRaiseCheck(Asm& a, const NormalizedInstruction& i);
void emitContExit();
void emitContPreNext(const NormalizedInstruction& i, ScratchReg& rCont);
void emitContStartedCheck(const NormalizedInstruction& i, ScratchReg& rCont);
template<bool raise>
void translateContSendImpl(const NormalizedInstruction& i);
void translateClassExistsImpl(const Tracelet& t,
const NormalizedInstruction& i,
Attr typeAttr);
void recordSyncPoint(Asm& a, Offset pcOff, Offset spOff);
void emitEagerSyncPoint(Asm& a, const Opcode* pc, const Offset spDiff);
void recordIndirectFixup(CTCA addr, int dwordsPushed);
template <bool reentrant>
void recordCallImpl(Asm& a, const NormalizedInstruction& i,
bool advance = false, int adjust = 0);
void recordReentrantCall(Asm& a, const NormalizedInstruction& i,
bool advance = false, int adjust = 0) {
recordCallImpl<true>(a, i, advance, adjust);
}
void recordReentrantCall(const NormalizedInstruction& i) {
recordCallImpl<true>(a, i);
}
void recordReentrantStubCall(const NormalizedInstruction& i,
bool advance = false) {
recordCallImpl<true>(astubs, i, advance);
}
void recordCall(Asm& a, const NormalizedInstruction& i);
void recordCall(const NormalizedInstruction& i);
void recordStubCall(const NormalizedInstruction& i) {
recordCall(astubs, i);
}
void recordEagerCall(Asm& a, const NormalizedInstruction& i);
void emitSideExit(Asm& a, const NormalizedInstruction& dest, bool next);
void emitStringToClass(const NormalizedInstruction& i);
void emitKnownClassCheck(const NormalizedInstruction& i,
const StringData* clssName,
RegNumber reg);
void emitStringToKnownClass(const NormalizedInstruction& i,
const StringData* clssName);
void emitObjToClass(const NormalizedInstruction& i);
void emitClsAndPals(const NormalizedInstruction& i);
void emitStaticPropInlineLookup(const NormalizedInstruction& i,
int classInputIdx,
const DynLocation& propInput,
PhysReg scr);
template<int Arity> TCA emitNAryStub(Asm& a, Call c);
TCA emitUnaryStub(Asm& a, Call c);
@@ -377,36 +287,13 @@ private:
TCA emitPrologueRedispatch(Asm &a);
TCA emitFuncGuard(Asm& a, const Func *f);
template <bool reentrant>
void callUnaryStubImpl(Asm& a, const NormalizedInstruction& i, TCA stub,
PhysReg arg, int disp = 0);
void callUnaryReentrantStub(Asm& a, const NormalizedInstruction& i, TCA stub,
PhysReg arg, int disp = 0) {
callUnaryStubImpl<true>(a, i, stub, arg, disp);
}
void callUnaryStub(Asm& a, const NormalizedInstruction& i, TCA stub,
PhysReg arg, int disp = 0) {
callUnaryStubImpl<false>(a, i, stub, arg, disp);
}
void callBinaryStub(Asm& a, const NormalizedInstruction& i, TCA stub,
PhysReg arg1, PhysReg arg2);
void emitDerefStoreToLoc(PhysReg srcReg, const Location& destLoc);
void getInputsIntoXMMRegs(const NormalizedInstruction& ni,
PhysReg lr, PhysReg rr,
RegXMM lxmm, RegXMM rxmm);
void binaryIntegerArith(const NormalizedInstruction &i,
Opcode op, PhysReg srcReg, PhysReg srcDestReg);
void binaryMixedArith(const NormalizedInstruction &i,
Opcode op, PhysReg srcReg, PhysReg srcDestReg);
void binaryArithCell(const NormalizedInstruction &i,
Opcode op,
const DynLocation& in1,
const DynLocation& inout);
void binaryArithLocal(const NormalizedInstruction &i,
Opcode op,
const DynLocation& in1,
const DynLocation& in2,
const DynLocation& out);
void fpEq(const NormalizedInstruction& i, PhysReg lr, PhysReg rr);
void emitRB(Asm& a, Trace::RingBufferType t, SrcKey sk,
RegSet toSave = RegSet());
@@ -418,14 +305,8 @@ private:
ArgDontAllocate = -1,
ArgAnyReg = -2
};
void allocInputsForCall(const NormalizedInstruction& i,
const int* args);
private:
void invalidateOutStack(const NormalizedInstruction& ni);
void cleanOutLocal(const NormalizedInstruction& ni);
void invalidateOutLocal(const NormalizedInstruction& ni);
#define INSTRS \
CASE(PopC) \
CASE(PopV) \
@@ -658,17 +539,6 @@ private:
private:
virtual void syncWork();
void spillTo(DataType t, PhysReg reg, bool writeType,
PhysReg base, int disp);
// SpillFill interface
void spill(const Location& loc, DataType t, PhysReg reg,
bool writeType);
void fill(const Location& loc, PhysReg reg);
void fillByMov(PhysReg src, PhysReg dst);
void loadImm(int64_t immVal, PhysReg reg);
void poison(PhysReg dest);
public:
bool acquireWriteLease(bool blocking) {
return s_writeLease.acquire(blocking);
@@ -689,8 +559,6 @@ public:
bool freeRequestStub(TCA stub);
TCA getFreeStub();
private:
void translateInstr(const Tracelet& t, const NormalizedInstruction& i);
void translateInstrWork(const Tracelet& t, const NormalizedInstruction& i);
void irInterpretInstr(const NormalizedInstruction& i);
void irTranslateInstr(const Tracelet& t, const NormalizedInstruction& i);
void irTranslateInstrWork(const Tracelet& t, const NormalizedInstruction& i);
@@ -712,10 +580,6 @@ private:
vector<TransBCMapping>* bcMap);
void irPassPredictedAndInferredTypes(const NormalizedInstruction& i);
void emitStringCheck(Asm& _a, PhysReg base, int offset);
void emitTypeCheck(Asm& _a, DataType dt,
PhysReg base, int offset,
SrcRec* fail = nullptr);
void irAssertType(const Location& l, const RuntimeType& rtt);
void checkType(Asm&, const Location& l, const RuntimeType& rtt,
SrcRec& fail);
@@ -724,9 +588,6 @@ private:
void checkRefs(Asm&, SrcKey, const RefDeps&, SrcRec&);
void emitDecRefThis(const ScratchReg& tmpReg);
void emitVVRet(const ScratchReg&, Label& extraArgsReturn,
Label& varEnvReturn);
void emitInlineReturn(Location retvalSrcLoc, int retvalSrcDisp);
void emitGenericReturn(bool noThis, int retvalSrcDisp);
void dumpStack(const char* msg, int offset) const;
@@ -770,14 +631,6 @@ private:
PhysReg = reg::r13,
PhysReg = reg::r14,
PhysReg = reg::rax);
void emitCheckUncounted(X64Assembler& a,
PhysReg baseReg,
int offset,
SrcRec& fail);
void emitCheckUncountedInit(X64Assembler& a,
PhysReg baseReg,
int offset,
SrcRec& fail);
TCA emitServiceReq(ServiceRequest, int numArgs, ...);
TCA emitServiceReq(SRFlags flags, ServiceRequest, int numArgs, ...);
@@ -802,7 +655,7 @@ private:
void emitStackCheck(int funcDepth, Offset pc);
void emitStackCheckDynamic(int numArgs, Offset pc);
void emitTestSurpriseFlags(Asm& a);
void emitCheckSurpriseFlagsEnter(bool inTracelet, Fixup f);
void emitCheckSurpriseFlagsEnter(bool inTracelet, Fixup fixup);
TCA emitTransCounterInc(Asm& a);
static void trimExtraArgs(ActRec* ar);
@@ -919,7 +772,6 @@ private:
public: // Only for HackIR
void emitReqRetransNoIR(Asm& as, const SrcKey& sk);
void emitRecordPunt(Asm& as, const std::string& name);
#define DECLARE_FUNC(nm) \
void irTranslate ## nm(const Tracelet& t, \
const NormalizedInstruction& i);
+3 -70
Ver Arquivo
@@ -2357,69 +2357,6 @@ void Translator::getOutputs(/*inout*/ Tracelet& t,
}
}
void
Translator::findImmable(ImmStack &stack,
NormalizedInstruction* ni) {
switch (ni->op()) {
case OpInt:
stack.pushInt(getImm(ni->pc(), 0).u_I64A);
return;
case OpString:
stack.pushLitstr(getImm(ni->pc(), 0).u_SA);
return;
// For binary ops we assume that only one of the two is an immediate
// because if both were immediates then hopefully the second pass
// optimized away this instruction. However, if a binary op has two
// immediates, we won't generate incorrect code: instead it will
// merely be suboptimal.
// we can only handle an immediate if it's the second immediate
case OpAdd:
case OpSub:
if (stack.isInt(0)) {
SKTRACE(1, ni->source, "marking for immediate elision\n");
ni->hasConstImm = true;
ni->constImm.u_I64A = stack.get(0).i64a;
// We don't currently remove the OpInt instruction that produced
// this integer. We should update the translator to correctly support
// removing instructions from the tracelet.
}
break;
case OpFPassM:
case OpCGetM:
case OpSetM:
case OpIssetM: {
// If this is "<VecInstr>M <... EC>"
const ImmVector& iv = ni->immVec;
assert(iv.isValid());
MemberCode mc;
StringData* str;
int64_t strId;
if (iv.size() > 1 &&
iv.decodeLastMember(curUnit(), str, mc, &strId) &&
mc == MET) {
/*
* If the operand takes a literal string that's not strictly an
* integer, we can call into array-access helper functions that
* don't bother with the integer check.
*/
int64_t lval;
if (LIKELY(!str->isStrictlyInteger(lval))) {
ni->hasConstImm = true;
ni->constImm.u_SA = strId;
}
}
} break;
default: ;
}
stack.processOpcode(ni->pc());
}
void
Translator::requestResetHighLevelTranslator() {
if (dbgTranslateCoin) {
@@ -3135,7 +3072,7 @@ void Translator::analyzeCallee(TraceletContext& tas,
TypeMap initialMap;
LocationSet callerArgLocs;
for (int i = 0; i < numArgs; ++i) {
auto callerLoc = Location(Location::Stack, fcall->stackOff - i - 1);
auto callerLoc = Location(Location::Stack, fcall->stackOffset - i - 1);
auto calleeLoc = Location(Location::Local, numArgs - i - 1);
auto type = tas.currentType(callerLoc);
@@ -3340,14 +3277,13 @@ std::unique_ptr<Tracelet> Translator::analyze(SrcKey sk,
head:
NormalizedInstruction* ni = t.newNormalizedInstruction();
ni->source = sk;
ni->stackOff = stackFrameOffset;
ni->stackOffset = stackFrameOffset;
ni->funcd = (t.m_arState.getCurrentState() == ActRecState::KNOWN) ?
t.m_arState.getCurrentFunc() : nullptr;
ni->m_unit = unit;
ni->preppedByRef = false;
ni->breaksTracelet = false;
ni->changesPC = opcodeChangesPC(ni->op());
ni->manuallyAllocInputs = false;
ni->fuseBranch = false;
ni->outputPredicted = false;
ni->outputPredictionStatic = false;
@@ -3364,9 +3300,6 @@ std::unique_ptr<Tracelet> Translator::analyze(SrcKey sk,
ni->imm[0].u_IVA = 1;
}
// Use the basic block analyzer to follow the flow of immediate values.
findImmable(immStack, ni);
SKTRACE(1, sk, "stack args: virtual sfo now %d\n", stackFrameOffset);
// Translation could fail entirely (because of an unknown opcode), or
@@ -3390,7 +3323,7 @@ std::unique_ptr<Tracelet> Translator::analyze(SrcKey sk,
// beginning of the instruction, but getReffiness() wants the delta
// relative to the sp at the beginning of the tracelet, so we adjust
// by subtracting ni->stackOff
int entryArDelta = instrSpToArDelta(ni->pc()) - ni->stackOff;
int entryArDelta = instrSpToArDelta(ni->pc()) - ni->stackOffset;
ni->preppedByRef = t.m_arState.getReffiness(argNum,
entryArDelta,
&t.m_refDeps);
+1 -8
Ver Arquivo
@@ -308,15 +308,13 @@ class NormalizedInstruction {
unsigned checkedInputs;
// StackOff: logical delta at *start* of this instruction to
// stack at tracelet entry.
int stackOff;
int stackOffset;
int sequenceNum;
bool hasConstImm:1;
bool startsBB:1;
bool breaksTracelet:1;
bool changesPC:1;
bool fuseBranch:1;
bool preppedByRef:1; // For FPass*; indicates parameter reffiness
bool manuallyAllocInputs:1;
bool outputPredicted:1;
bool outputPredictionStatic:1;
bool ignoreInnerType:1;
@@ -363,8 +361,6 @@ class NormalizedInstruction {
// or decrefs).
boost::dynamic_bitset<> nonRefCountedLocals;
ArgUnion constImm;
Op op() const;
Op mInstrOp() const;
PC pc() const;
@@ -380,7 +376,6 @@ class NormalizedInstruction {
, outStack2(nullptr)
, outStack3(nullptr)
, checkedInputs(0)
, hasConstImm(false)
, ignoreInnerType(false)
, guardedThis(false)
, guardedCls(false)
@@ -814,8 +809,6 @@ private:
void produceDataRef(Tracelet* tlet, NormalizedInstruction* ni,
Location loc);
void findImmable(ImmStack &stack, NormalizedInstruction* ni);
virtual void syncWork() = 0;
virtual void invalidateSrcKey(SrcKey sk) = 0;
+1 -1
Ver Arquivo
@@ -86,7 +86,7 @@ class Init {
if (mod >= 0) {
levels[mod] = level;
}
if (mod == Trace::minstr || mod == Trace::punt) {
if (mod == Trace::minstr) {
levels[Trace::statgroups] = std::max(levels[Trace::statgroups], 1);
}
}
-3
Ver Arquivo
@@ -89,11 +89,8 @@ namespace Trace {
TM(printir) \
TM(hhirTracelets) \
TM(gc) \
TM(unlikely) \
TM(jcc) \
TM(instancebits)\
TM(hhas) \
TM(punt) \
TM(statgroups) \
TM(minstr) \
/* Stress categories, to exercise rare paths */ \