Arquivos
hhvm/hphp/runtime/vm/jit/translator-x64.cpp
T
Jordan DeLong f008eafffe Move SrcKey out of translator.h and Transl:: @override-unit-failures
I was going to #include translator.h in a header I had for
talking to the region selector thing and decided to just get this over
with instead.  (It shouldn't need to #include that.)  Found a few
other unused things to remove while at it.
2013-06-03 10:55:04 -07:00

4285 linhas
138 KiB
C++

/*
+----------------------------------------------------------------------+
| HipHop for PHP |
+----------------------------------------------------------------------+
| Copyright (c) 2010- Facebook, Inc. (http://www.facebook.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
*/
#include "hphp/runtime/vm/jit/translator-x64.h"
#include <cinttypes>
#include <stdint.h>
#include <assert.h>
#include <unistd.h>
#include <sys/mman.h>
#include <strstream>
#include <stdio.h>
#include <stdarg.h>
#include <string>
#include <queue>
#include <unwind.h>
#ifdef __FreeBSD__
# include <ucontext.h>
typedef __sighandler_t *sighandler_t;
# define RIP_REGISTER(v) (v).mc_rip
#else
# if defined(__x86_64__)
# define RIP_REGISTER(v) (v).gregs[REG_RIP]
# elif defined(__AARCH64EL__)
# define RIP_REGISTER(v) (v).pc
# endif
#endif
#include <boost/bind.hpp>
#include <boost/optional.hpp>
#include <boost/utility/typed_in_place_factory.hpp>
#include <boost/range/adaptors.hpp>
#include <boost/scoped_ptr.hpp>
#include "folly/Format.h"
#include "hphp/util/asm-x64.h"
#include "hphp/util/bitops.h"
#include "hphp/util/debug.h"
#include "hphp/util/disasm.h"
#include "hphp/util/maphuge.h"
#include "hphp/util/rank.h"
#include "hphp/util/ringbuffer.h"
#include "hphp/util/timer.h"
#include "hphp/util/trace.h"
#include "hphp/util/meta.h"
#include "hphp/util/util.h"
#include "hphp/util/repo_schema.h"
#include "hphp/runtime/vm/bytecode.h"
#include "hphp/runtime/vm/php_debug.h"
#include "hphp/runtime/vm/runtime.h"
#include "hphp/runtime/base/complex_types.h"
#include "hphp/runtime/base/execution_context.h"
#include "hphp/runtime/base/runtime_option.h"
#include "hphp/runtime/base/strings.h"
#include "hphp/runtime/base/strings.h"
#include "hphp/runtime/base/server/source_root_info.h"
#include "hphp/runtime/base/zend/zend_string.h"
#include "hphp/runtime/ext/ext_closure.h"
#include "hphp/runtime/ext/ext_continuation.h"
#include "hphp/runtime/ext/ext_function.h"
#include "hphp/runtime/vm/debug/debug.h"
#include "hphp/runtime/vm/jit/targetcache.h"
#include "hphp/runtime/vm/jit/translator-inline.h"
#include "hphp/runtime/vm/jit/srcdb.h"
#include "hphp/runtime/vm/jit/x64-util.h"
#include "hphp/runtime/vm/jit/unwind-x64.h"
#include "hphp/runtime/base/stats.h"
#include "hphp/runtime/vm/pendq.h"
#include "hphp/runtime/vm/treadmill.h"
#include "hphp/runtime/vm/repo.h"
#include "hphp/runtime/vm/type_profile.h"
#include "hphp/runtime/vm/member_operations.h"
#include "hphp/runtime/vm/jit/abi-x64.h"
#include "hphp/runtime/eval/runtime/file_repository.h"
#include "hphp/runtime/vm/jit/hhbctranslator.h"
#include "hphp/runtime/vm/jit/translator-x64-internal.h"
namespace HPHP {
namespace Transl {
using namespace reg;
using namespace Util;
using namespace Trace;
using std::max;
#define TRANS_PERF_COUNTERS \
TPC(translate) \
TPC(retranslate) \
TPC(interp_bb) \
TPC(interp_instr) \
TPC(interp_one) \
TPC(max_trans) \
TPC(enter_tc) \
TPC(service_req)
static const char* const kInstrCountTx64Name = "instr_tx64";
static const char* const kInstrCountIRName = "instr_hhir";
#define TPC(n) "trans_" #n,
static const char* const kPerfCounterNames[] = {
TRANS_PERF_COUNTERS
kInstrCountTx64Name,
kInstrCountIRName,
};
#undef TPC
#define TPC(n) tpc_ ## n,
enum TransPerfCounter {
TRANS_PERF_COUNTERS
tpc_num_counters
};
#undef TPC
static __thread int64_t s_perfCounters[tpc_num_counters];
#define INC_TPC(n) ++s_perfCounters[tpc_ ## n];
// nextTx64: Global shared state. The tx64 that should be used for
// new requests going forward.
TranslatorX64* volatile nextTx64;
// tx64: Thread-local state. The tx64 we're using for the current request.
__thread TranslatorX64* tx64;
// Register dirtiness: thread-private.
__thread VMRegState tl_regState = REGSTATE_CLEAN;
static StaticString s___call(LITSTR_INIT("__call"));
static StaticString s___callStatic(LITSTR_INIT("__callStatic"));
// Initialize at most this many locals inline in function body prologue; more
// than this, and emitting a loop is more compact. To be precise, the actual
// crossover point in terms of code size is 6; 9 was determined by experiment to
// be the optimal point in certain benchmarks. #microoptimization
static const int kLocalsToInitializeInline = 9;
// An intentionally funny-looking-in-core-dumps constant for uninitialized
// instruction pointers.
static const uint64_t kUninitializedRIP = 0xba5eba11acc01ade;
// Return the SrcKey for the operation that should follow the supplied
// NormalizedInstruction. (This might not be the next SrcKey in the
// unit if we merged some instructions or otherwise modified them
// during analysis.)
SrcKey nextSrcKey(const Tracelet& t, const NormalizedInstruction& i) {
return i.next ? i.next->source : t.m_nextSk;
}
// stubBlock --
// Used to emit a bunch of outlined code that is unconditionally jumped to.
template <typename L>
void stubBlock(X64Assembler& hot, X64Assembler& cold, const L& body) {
hot. jmp(cold.code.frontier);
guardDiamond(cold, body);
cold. jmp(hot.code.frontier);
}
static bool
typeCanBeStatic(DataType t) {
return t != KindOfObject && t != KindOfRef;
}
// IfCountNotStatic --
// Emits if (%reg->_count != RefCountStaticValue) { ... }.
// May short-circuit this check if the type is known to be
// static already.
struct IfCountNotStatic {
typedef CondBlock<FAST_REFCOUNT_OFFSET,
RefCountStaticValue,
CC_Z,
field_type(RefData, _count)> NonStaticCondBlock;
NonStaticCondBlock *m_cb; // might be null
IfCountNotStatic(X64Assembler& a,
PhysReg reg,
DataType t = KindOfInvalid) {
// Objects and variants cannot be static
if (typeCanBeStatic(t)) {
m_cb = new NonStaticCondBlock(a, reg);
} else {
m_cb = nullptr;
}
}
~IfCountNotStatic() {
delete m_cb;
}
};
bool
classIsUnique(const Class* cls) {
return RuntimeOption::RepoAuthoritative &&
cls &&
(cls->attrs() & AttrUnique);
}
bool
classIsUniqueOrCtxParent(const Class* cls) {
if (!cls) return false;
if (classIsUnique(cls)) return true;
Class* ctx = arGetContextClass(curFrame());
if (!ctx) return false;
return ctx->classof(cls);
}
bool
classIsUniqueNormalClass(const Class* cls) {
return classIsUnique(cls) &&
!(cls->attrs() & (AttrInterface | AttrTrait));
}
// Segfault handler: figure out if it's an intentional segfault
// (timeout exception) and if so, act appropriately. Otherwise, pass
// the signal on.
void TranslatorX64::SEGVHandler(int signum, siginfo_t *info, void *ctx) {
TranslatorX64 *self = Get();
void *surprisePage =
ThreadInfo::s_threadInfo->m_reqInjectionData.surprisePage;
if (info->si_addr == surprisePage) {
ucontext_t *ucontext = (ucontext_t*)ctx;
TCA rip = (TCA)RIP_REGISTER(ucontext->uc_mcontext);
SignalStubMap::const_accessor a;
if (!self->m_segvStubs.find(a, rip)) {
NOT_REACHED();
}
TCA astubsCall = a->second;
// When this handler returns, "call" the astubs code for this
// surprise check.
RIP_REGISTER(ucontext->uc_mcontext) = (uintptr_t)astubsCall;
// We've processed this event; reset the page in case execution
// continues normally.
g_vmContext->m_stack.unprotect();
} else {
sighandler_t handler = (sighandler_t)self->m_segvChain;
if (handler == SIG_DFL || handler == SIG_IGN) {
signal(signum, handler);
raise(signum);
} else {
self->m_segvChain(signum, info, ctx);
}
}
}
/*
* Copy a heap cell from memory to the stack.
*
* Use emitCopyToStack when you can safely change the state of the
* register map. When using emitCopyToStackRegSafe, you'll need to
* invalidate the stack location manually at an appropriate time.
*/
void
TranslatorX64::emitCopyToStackRegSafe(X64Assembler& a,
const NormalizedInstruction& ni,
PhysReg src,
int off,
PhysReg tmpReg) {
assert(off % sizeof(Cell) == 0);
emitCopyTo(a, src, 0, rVmSp, vstackOffset(ni, off), tmpReg);
}
// Logical register move: ensures the value in src will be in dest
// after execution, but might do so in strange ways. Do not count on
// being able to smash dest to a different register in the future, e.g.
void
emitMovRegReg(X64Assembler& a, PhysReg src, PhysReg dest) {
SpaceRecorder("_RegMove", a);
if (src != dest) {
a. movq (src, dest);
}
}
void
emitLea(X64Assembler& a, PhysReg base, int disp, PhysReg dest) {
if (!disp) {
emitMovRegReg(a, base, dest);
return;
}
a. lea (base[disp], dest);
}
static void UNUSED tc_debug_print(const char* message,
uintptr_t r1,
uintptr_t r2,
uintptr_t r3,
ActRec* fp) {
TRACE(1, "*********************** %s: %p %p %p (for : %s)\n",
message, (void*)r1, (void*)r2, (void*)r3,
fp->m_func ? fp->m_func->fullName()->data() : "[?]");
}
// Utility for debugging translations that will print a message,
// followed by the value of up to three registers.
void TranslatorX64::emitDebugPrint(Asm& a,
const char* message,
PhysReg r1,
PhysReg r2,
PhysReg r3) {
boost::optional<PhysRegSaver> aSaver;
boost::optional<PhysRegSaverStub> astubsSaver;
if (&a == &this->a) {
aSaver = boost::in_place<PhysRegSaver>(boost::ref(a), kAllX64Regs);
} else {
astubsSaver = boost::in_place<PhysRegSaverStub>(boost::ref(a),
kAllX64Regs);
}
a. mov_imm64_reg (uintptr_t(message), argNumToRegName[0]);
a. mov_reg64_reg64(r1, argNumToRegName[1]);
a. mov_reg64_reg64(r2, argNumToRegName[2]);
a. mov_reg64_reg64(r3, argNumToRegName[3]);
a. mov_reg64_reg64(rVmFp, argNumToRegName[4]);
a. call((TCA)tc_debug_print);
}
void
TranslatorX64::emitRB(X64Assembler& a,
RingBufferType t,
SrcKey sk, RegSet toSave) {
if (!Trace::moduleEnabledRelease(Trace::tx64, 3)) {
return;
}
PhysRegSaver rs(a, toSave | kSpecialCrossTraceRegs);
int arg = 0;
emitImmReg(a, t, argNumToRegName[arg++]);
emitImmReg(a, sk.getFuncId(), argNumToRegName[arg++]);
emitImmReg(a, sk.offset(), argNumToRegName[arg++]);
a. call((TCA)ringbufferEntry);
}
void
TranslatorX64::emitRB(X64Assembler& a,
RingBufferType t,
const char* msg,
RegSet toSave) {
if (!Trace::moduleEnabledRelease(Trace::tx64, 3)) {
return;
}
PhysRegSaver save(a, toSave | kSpecialCrossTraceRegs);
int arg = 0;
emitImmReg(a, (uintptr_t)msg, argNumToRegName[arg++]);
emitImmReg(a, strlen(msg), argNumToRegName[arg++]);
emitImmReg(a, t, argNumToRegName[arg++]);
a. call((TCA)ringbufferMsg);
}
void
TranslatorX64::emitCall(X64Assembler& a, TCA dest) {
if (a.jmpDeltaFits(dest) && !Stats::enabled()) {
a. call(dest);
} else {
a. call(getNativeTrampoline(dest));
}
}
void
TranslatorX64::emitCall(X64Assembler& a, Call call) {
if (call.isDirect()) {
return emitCall(a, (TCA)call.getAddress());
}
// Virtual call.
// Load method's address from proper offset off of object in rdi,
// using rax as scratch.
a.loadq(*rdi, rax);
a.call(rax[call.getOffset()]);
}
static void emitGetGContext(X64Assembler& a, PhysReg dest) {
emitTLSLoad<ExecutionContext>(a, g_context, dest);
}
void
TranslatorX64::emitEagerSyncPoint(X64Assembler& a, const Opcode* pc,
const Offset spDiff) {
static COff spOff = offsetof(VMExecutionContext, m_stack) +
Stack::topOfStackOffset();
static COff fpOff = offsetof(VMExecutionContext, m_fp);
static COff pcOff = offsetof(VMExecutionContext, m_pc);
/* we can't use rAsm because the pc store uses it as a
temporary */
Reg64 rEC = reg::rdi;
a. push(rEC);
emitGetGContext(a, rEC);
a. storeq(rVmFp, rEC[fpOff]);
if (spDiff) {
a. lea(rVmSp[spDiff], rAsm);
a. storeq(rAsm, rEC[spOff]);
} else {
a. storeq(rVmSp, rEC[spOff]);
}
a. storeq(pc, rEC[pcOff]);
a. pop(rEC);
}
void
TranslatorX64::recordSyncPoint(X64Assembler& a, Offset pcOff, Offset spOff) {
m_pendingFixups.push_back(PendingFixup(a.code.frontier,
Fixup(pcOff, spOff)));
}
void
TranslatorX64::recordIndirectFixup(CTCA addr, int dwordsPushed) {
m_fixupMap.recordIndirectFixup(
a.code.frontier, IndirectFixup((2 + dwordsPushed) * 8));
}
void
TranslatorX64::emitIncRef(PhysReg base, DataType dtype) {
emitIncRef(a, base, dtype);
}
void
TranslatorX64::emitIncRef(X64Assembler &a, PhysReg base, DataType dtype) {
if (!IS_REFCOUNTED_TYPE(dtype) && dtype != KindOfInvalid) {
return;
}
SpaceRecorder sr("_IncRef", a);
assert(sizeof(Countable) == sizeof(int32_t));
{ // if !static then
IfCountNotStatic ins(a, base, dtype);
/*
* The optimization guide cautions against using inc; while it is
* compact, it only writes the low-order 8 bits of eflags, causing a
* partial dependency for any downstream flags-dependent code.
*/
a. incl(base[FAST_REFCOUNT_OFFSET]);
} // endif
}
void
TranslatorX64::emitIncRefGenericRegSafe(PhysReg base,
int disp,
PhysReg tmpReg) {
{ // if RC
IfRefCounted irc(a, base, disp);
a. load_reg64_disp_reg64(base, disp + TVOFF(m_data),
tmpReg);
{ // if !static
IfCountNotStatic ins(a, tmpReg);
a. incl(tmpReg[FAST_REFCOUNT_OFFSET]);
} // endif
} // endif
}
// emitEagerVMRegSave --
// Inline. Saves regs in-place in the TC. This is an unusual need;
// you probably want to lazily save these regs via recordCall and
// its ilk.
//
// SaveFP uses rVmFp, as usual. SavePC requires the caller to have
// placed the PC offset of the instruction about to be executed in
// rdi.
enum RegSaveFlags {
SaveFP = 1,
SavePC = 2
};
static TCA
emitEagerVMRegSave(X64Assembler& a,
int flags /* :: RegSaveFlags */) {
TCA start = a.code.frontier;
bool saveFP = bool(flags & SaveFP);
bool savePC = bool(flags & SavePC);
assert((flags & ~(SavePC | SaveFP)) == 0);
Reg64 pcReg = rdi;
PhysReg rEC = rAsm;
assert(!kSpecialCrossTraceRegs.contains(rdi));
emitGetGContext(a, rEC);
static COff spOff = offsetof(VMExecutionContext, m_stack) +
Stack::topOfStackOffset();
static COff fpOff = offsetof(VMExecutionContext, m_fp) - spOff;
static COff pcOff = offsetof(VMExecutionContext, m_pc) - spOff;
assert(spOff != 0);
// Instruction selection note: this is an lea, but add is more
// compact and we can afford the flags bash.
a. addq (spOff, r64(rEC));
a. storeq (rVmSp, *rEC);
if (savePC) {
// We're going to temporarily abuse rVmSp to hold the current unit.
Reg64 rBC = rVmSp;
a. push (rBC);
// m_fp -> m_func -> m_unit -> m_bc + pcReg
a. loadq (rVmFp[AROFF(m_func)], rBC);
a. loadq (rBC[Func::unitOff()], rBC);
a. loadq (rBC[Unit::bcOff()], rBC);
a. addq (rBC, pcReg);
a. storeq (pcReg, rEC[pcOff]);
a. pop (rBC);
}
if (saveFP) {
a. storeq (rVmFp, rEC[fpOff]);
}
return start;
}
Call TranslatorX64::getDtorCall(DataType type) {
switch (type) {
case BitwiseKindOfString:
return Call(getMethodPtr(&StringData::release));
case KindOfArray:
return Call(getMethodPtr(&ArrayData::release));
case KindOfObject:
return Call(getMethodPtr(&ObjectData::release));
case KindOfRef:
return Call(getMethodPtr(&RefData::release));
default:
assert(false);
NOT_REACHED();
}
}
/*
* callDestructor/jumpDestructor --
*
* Emit a call or jump to the appropriate destructor for a dynamically
* typed value.
*
* No registers are saved; most translated code should be using
* emitDecRefGeneric{Reg,} instead of this.
*
* Inputs:
*
* - typeReg is destroyed and may not be argNumToRegName[0].
* - argNumToRegName[0] should contain the m_data for this value.
* - scratch is destoyed.
*/
static IndexedMemoryRef lookupDestructor(X64Assembler& a,
PhysReg typeReg,
PhysReg scratch) {
assert(typeReg != r32(argNumToRegName[0]));
assert(scratch != argNumToRegName[0]);
static_assert((BitwiseKindOfString >> kShiftDataTypeToDestrIndex == 0) &&
(KindOfArray >> kShiftDataTypeToDestrIndex == 1) &&
(KindOfObject >> kShiftDataTypeToDestrIndex == 2) &&
(KindOfRef >> kShiftDataTypeToDestrIndex == 3),
"lookup of destructors depends on KindOf* values");
a. shrl (kShiftDataTypeToDestrIndex, r32(typeReg));
a. movq (&g_destructors, scratch);
return scratch[typeReg*8];
}
static void callDestructor(X64Assembler& a,
PhysReg typeReg,
PhysReg scratch) {
a. call (lookupDestructor(a, typeReg, scratch));
}
static void jumpDestructor(X64Assembler& a,
PhysReg typeReg,
PhysReg scratch) {
a. jmp (lookupDestructor(a, typeReg, scratch));
}
void TranslatorX64::emitGenericDecRefHelpers() {
Label release;
// m_dtorGenericStub just takes a pointer to the TypedValue in rdi.
moveToAlign(a, kNonFallthroughAlign);
m_irPopRHelper = a.code.frontier;
// popR: Move top-of-stack pointer to rdi
emitMovRegReg(a, rVmSp, rdi);
// fall through
m_dtorGenericStub = a.code.frontier;
emitLoadTVType(a, rdi[TVOFF(m_type)], r32(rAsm));
a. loadq (rdi[TVOFF(m_data)], rdi);
// Fall through to the regs stub.
/*
* Custom calling convention: m_type goes in rAsm, m_data in
* rdi. We don't ever store program locations in rAsm, so the
* caller didn't need to spill anything. The assembler sometimes
* uses rAsm, but we know the stub won't need to and it makes it
* possible to share the code for both decref helpers.
*/
m_dtorGenericStubRegs = a.code.frontier;
a. cmpl (RefCountStaticValue, rdi[FAST_REFCOUNT_OFFSET]);
jccBlock<CC_Z>(a, [&] {
a. decl (rdi[FAST_REFCOUNT_OFFSET]);
release.jcc8(a, CC_Z);
});
a. ret ();
asm_label(a, release);
{
PhysRegSaver prs(a, kGPCallerSaved - RegSet(rdi));
callDestructor(a, rAsm, rax);
recordIndirectFixup(a.code.frontier, prs.rspTotalAdjustmentRegs());
}
a. ret ();
TRACE(1, "HOTSTUB: generic dtor start: %lx\n",
uintptr_t(m_irPopRHelper));
TRACE(1, "HOTSTUB: genericDtorStub: %lx\n", uintptr_t(m_dtorGenericStub));
TRACE(1, "HOTSTUB: genericDtorStubRegs: %lx\n",
uintptr_t(m_dtorGenericStubRegs));
TRACE(1, "HOTSTUB: total dtor generic stubs %zu bytes\n",
size_t(a.code.frontier - m_dtorGenericStub));
}
TCA TranslatorX64::retranslate(const TranslArgs& args) {
if (isDebuggerAttachedProcess() && isSrcKeyInBL(curUnit(), args.m_sk)) {
// We are about to translate something known to be blacklisted by
// debugger, exit early
SKTRACE(1, args.m_sk, "retranslate abort due to debugger\n");
return nullptr;
}
LeaseHolder writer(s_writeLease);
if (!writer) return nullptr;
SKTRACE(1, args.m_sk, "retranslate\n");
return translate(args);
}
// Only use comes from HHIR's cgExitTrace() case TraceExitType::SlowNoProgress
TCA TranslatorX64::retranslateAndPatchNoIR(SrcKey sk,
bool align,
TCA toSmash) {
if (isDebuggerAttachedProcess() && isSrcKeyInBL(curUnit(), sk)) {
// We are about to translate something known to be blacklisted by
// debugger, exit early
SKTRACE(1, sk, "retranslateAndPatchNoIR abort due to debugger\n");
return nullptr;
}
LeaseHolder writer(s_writeLease);
if (!writer) return nullptr;
SKTRACE(1, sk, "retranslateAndPatchNoIR\n");
SrcRec* srcRec = getSrcRec(sk);
if (srcRec->translations().size() ==
RuntimeOption::EvalJitMaxTranslations + 1) {
// we've gone over the translation limit and already have an anchor
// translation that will interpret, so just return NULL and force
// interpretation of this BB.
return nullptr;
}
TCA start = translate(TranslArgs(sk, align).interp(true));
if (start != nullptr) {
smashJmp(getAsmFor(toSmash), toSmash, start);
}
return start;
}
/*
* Satisfy an alignment constraint. If we're in a reachable section
* of code, bridge the gap with nops. Otherwise, int3's.
*/
void
TranslatorX64::moveToAlign(X64Assembler &aa,
const size_t align /* =kJmpTargetAlign */,
bool unreachable /* =true */) {
using namespace HPHP::Util;
SpaceRecorder sr("_Align", aa);
assert(isPowerOfTwo(align));
size_t leftInBlock = align - ((align - 1) & uintptr_t(aa.code.frontier));
if (leftInBlock == align) return;
if (unreachable) {
if (leftInBlock > 2) {
aa.ud2();
leftInBlock -= 2;
}
if (leftInBlock > 0) {
aa.emitInt3s(leftInBlock);
}
return;
}
aa.emitNop(leftInBlock);
}
/*
* Req machinery. We sometimes emit code that is unable to proceed
* without translator assistance; e.g., a basic block whose successor is
* unknown. We leave one of these request arg blobs in m_data, and point
* to it at callout-time.
*/
// REQ_BIND_CALL
struct ReqBindCall {
SrcKey m_sourceInstr;
TCA m_toSmash;
int m_nArgs;
bool m_isImmutable; // call was to known func.
} m_bindCall;
// ID to name mapping for tracing.
static inline const char*
reqName(int req) {
static const char* reqNames[] = {
#define REQ(nm) #nm,
SERVICE_REQUESTS
#undef REQ
};
return reqNames[req];
}
/*
* Find or create a translation for sk. Returns TCA of "best" current
* translation. May return NULL if it is currently impossible to create
* a translation.
*/
TCA
TranslatorX64::getTranslation(const TranslArgs& args) {
auto sk = args.m_sk;
curFunc()->validate();
SKTRACE(2, sk,
"getTranslation: curUnit %s funcId %" PRIx64 " offset %d\n",
curUnit()->filepath()->data(),
sk.getFuncId(),
sk.offset());
SKTRACE(2, sk, " funcId: %" PRIx64 "\n",
curFunc()->getFuncId());
if (curFrame()->hasVarEnv() && curFrame()->getVarEnv()->isGlobalScope()) {
SKTRACE(2, sk, "punting on pseudoMain\n");
return nullptr;
}
if (const SrcRec* sr = m_srcDB.find(sk)) {
TCA tca = sr->getTopTranslation();
if (tca) {
SKTRACE(2, sk, "getTranslation: found %p\n", tca);
return tca;
}
}
return createTranslation(args);
}
int
TranslatorX64::numTranslations(SrcKey sk) const {
if (const SrcRec* sr = m_srcDB.find(sk)) {
return sr->translations().size();
}
return 0;
}
TCA
TranslatorX64::createTranslation(const TranslArgs& args) {
/*
* Try to become the writer. We delay this until we *know* we will have
* a need to create new translations, instead of just trying to win the
* lottery at the dawn of time. Hopefully lots of requests won't require
* any new translation.
*/
auto retransl = [&] {
return retranslate(args);
};
auto sk = args.m_sk;
LeaseHolder writer(s_writeLease);
if (!writer) return nullptr;
if (SrcRec* sr = m_srcDB.find(sk)) {
TCA tca = sr->getTopTranslation();
if (tca) {
// Handle extremely unlikely race; someone may have just already
// added the first instance of this SrcRec while we did a
// non-blocking wait on the write lease.
return tca;
} else {
// Since we are holding the write lease, we know that sk is properly
// initialized, except that it has no translations (due to
// replaceOldTranslations)
return retransl();
}
}
// We put retranslate requests at the end of our slab to more frequently
// allow conditional jump fall-throughs
AHotSelector ahs(this, curFunc()->attrs() & AttrHot);
TCA astart = a.code.frontier;
TCA stubstart = astubs.code.frontier;
TCA req = emitServiceReq(SRFlags::None, REQ_RETRANSLATE,
1, uint64_t(sk.offset()));
SKTRACE(1, sk, "inserting anchor translation for (%p,%d) at %p\n",
curUnit(), sk.offset(), req);
SrcRec* sr = m_srcDB.insert(sk);
sr->setFuncInfo(curFunc());
sr->setAnchorTranslation(req);
size_t asize = a.code.frontier - astart;
size_t stubsize = astubs.code.frontier - stubstart;
assert(asize == 0);
if (stubsize) {
addTranslation(TransRec(sk, curUnit()->md5(), TransAnchor,
astart, asize, stubstart, stubsize));
assert(!isTransDBEnabled() || getTransRec(stubstart)->kind == TransAnchor);
}
return retransl();
}
TCA
TranslatorX64::lookupTranslation(SrcKey sk) const {
if (SrcRec* sr = m_srcDB.find(sk)) {
return sr->getTopTranslation();
}
return nullptr;
}
TCA
TranslatorX64::translate(const TranslArgs& args) {
INC_TPC(translate);
assert(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
assert(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
if (!args.m_interp) {
if (m_numHHIRTrans == RuntimeOption::EvalJitGlobalTranslationLimit) {
RuntimeOption::EvalJit = false;
ThreadInfo::s_threadInfo->m_reqInjectionData.updateJit();
}
}
AHotSelector ahs(this, curFunc()->attrs() & AttrHot);
if (args.m_align) {
moveToAlign(a, kNonFallthroughAlign);
}
TCA start = a.code.frontier;
m_lastHHIRPunt.clear();
translateTracelet(args);
SKTRACE(1, args.m_sk, "translate moved head from %p to %p\n",
getTopTranslation(args.m_sk), start);
return start;
}
/*
* Returns true if the given current frontier can have an nBytes-long
* instruction written without any risk of cache-tearing.
*/
bool isSmashable(Address frontier, int nBytes, int offset /* = 0 */) {
assert(nBytes <= int(kX64CacheLineSize));
uintptr_t iFrontier = uintptr_t(frontier) + offset;
uintptr_t lastByte = uintptr_t(frontier) + nBytes - 1;
return (iFrontier & ~kX64CacheLineMask) == (lastByte & ~kX64CacheLineMask);
}
/*
* Call before emitting a test-jcc sequence. Inserts a nop gap such that after
* writing a testBytes-long instruction, the frontier will be smashable.
*/
void prepareForTestAndSmash(Asm& a, int testBytes, TestAndSmashFlags flags) {
switch (flags) {
case kAlignJcc:
prepareForSmash(a, testBytes + kJmpccLen, testBytes);
assert(isSmashable(a.code.frontier + testBytes, kJmpccLen));
break;
case kAlignJccImmediate:
prepareForSmash(a,
testBytes + kJmpccLen,
testBytes + kJmpccLen - kJmpImmBytes);
assert(isSmashable(a.code.frontier + testBytes, kJmpccLen,
kJmpccLen - kJmpImmBytes));
break;
case kAlignJccAndJmp:
// Ensure that the entire jcc, and the entire jmp are smashable
// (but we dont need them both to be in the same cache line)
prepareForSmash(a, testBytes + kJmpccLen, testBytes);
prepareForSmash(a, testBytes + kJmpccLen + kJmpLen, testBytes + kJmpccLen);
assert(isSmashable(a.code.frontier + testBytes, kJmpccLen));
assert(isSmashable(a.code.frontier + testBytes + kJmpccLen, kJmpLen));
break;
}
}
void prepareForSmash(X64Assembler& a, int nBytes, int offset /* = 0 */) {
if (!isSmashable(a.code.frontier, nBytes, offset)) {
int gapSize = (~(uintptr_t(a.code.frontier) + offset) &
kX64CacheLineMask) + 1;
a.emitNop(gapSize);
assert(isSmashable(a.code.frontier, nBytes, offset));
}
}
void
TranslatorX64::smash(X64Assembler &a, TCA src, TCA dest, bool isCall) {
assert(canWrite());
TRACE(2, "smash: %p -> %p\n", src, dest);
/*
* !
*
* We are about to smash reachable code in the translation cache. A
* hardware thread might be executing the very instruction we're
* modifying. This is safe because:
*
* 1. We align smashable instructions so that they reside on a single
* cache line;
*
* 2. We modify the instruction with a single processor store; and
*
* 3. The smashed region contains only a single instruction in the
* orignal instruction stream (see jmp() -> emitJ32() -> bytes() in
* the assembler.
*/
CodeCursor cg(a, src);
assert(isSmashable(a.code.frontier, kJmpLen));
if (dest > src && dest - src <= kJmpLen) {
assert(!isCall);
a. emitNop(dest - src);
} else if (!isCall) {
a. jmp(dest);
} else {
a. call(dest);
}
}
void TranslatorX64::protectCode() {
mprotect(tx64->ahot.code.base,
tx64->astubs.code.base - tx64->ahot.code.base +
tx64->astubs.code.size, PROT_READ | PROT_EXEC);
}
void TranslatorX64::unprotectCode() {
mprotect(tx64->ahot.code.base,
tx64->astubs.code.base - tx64->ahot.code.base +
tx64->astubs.code.size,
PROT_READ | PROT_WRITE | PROT_EXEC);
}
void
TranslatorX64::emitStackCheck(int funcDepth, Offset pc) {
funcDepth += kStackCheckPadding * sizeof(Cell);
uint64_t stackMask = cellsToBytes(RuntimeOption::EvalVMStackElms) - 1;
a. mov_reg64_reg64(rVmSp, rAsm); // copy to destroy
a. and_imm64_reg64(stackMask, rAsm);
a. sub_imm64_reg64(funcDepth + Stack::sSurprisePageSize, rAsm);
assert(m_stackOverflowHelper);
a. jl(m_stackOverflowHelper); // Unlikely branch to failure.
// Success.
}
// Tests the surprise flags for the current thread. Should be used
// before a jnz to surprise handling code.
void
TranslatorX64::emitTestSurpriseFlags(Asm& a) {
static_assert(RequestInjectionData::LastFlag < (1 << 8),
"Translator assumes RequestInjectionFlags fit in one byte");
a. testb((int8_t)0xff, rVmTl[TargetCache::kConditionFlagsOff]);
}
void
TranslatorX64::emitCheckSurpriseFlagsEnter(bool inTracelet, Fixup fixup) {
emitTestSurpriseFlags(a);
{
UnlikelyIfBlock ifTracer(CC_NZ, a, astubs);
if (false) { // typecheck
const ActRec* ar = nullptr;
functionEnterHelper(ar);
}
astubs.mov_reg64_reg64(rVmFp, argNumToRegName[0]);
emitCall(astubs, (TCA)&functionEnterHelper);
if (inTracelet) {
recordSyncPoint(astubs, fixup.m_pcOffset, fixup.m_spOffset);
} else {
// If we're being called while generating a func prologue, we
// have to record the fixup directly in the fixup map instead of
// going through m_pendingFixups like normal.
m_fixupMap.recordFixup(astubs.code.frontier, fixup);
}
}
}
void
TranslatorX64::setArgInActRec(ActRec* ar, int argNum, uint64_t datum,
DataType t) {
TypedValue* tv =
(TypedValue*)(uintptr_t(ar) - (argNum+1) * sizeof(TypedValue));
tv->m_data.num = datum;
tv->m_type = t;
}
int
TranslatorX64::shuffleArgsForMagicCall(ActRec* ar) {
if (!ar->hasInvName()) {
return 0;
}
const Func* f UNUSED = ar->m_func;
f->validate();
assert(f->name()->isame(s___call.get())
|| f->name()->isame(s___callStatic.get()));
assert(f->numParams() == 2);
TRACE(1, "shuffleArgsForMagicCall: ar %p\n", ar);
assert(ar->hasInvName());
StringData* invName = ar->getInvName();
assert(invName);
ar->setVarEnv(nullptr);
int nargs = ar->numArgs();
// We need to make an array containing all the arguments passed by the
// caller and put it where the second argument is
HphpArray* argArray = ArrayData::Make(nargs);
argArray->incRefCount();
for (int i = 0; i < nargs; ++i) {
TypedValue* tv =
(TypedValue*)(uintptr_t(ar) - (i+1) * sizeof(TypedValue));
argArray->nvAppend(tv);
tvRefcountedDecRef(tv);
}
// Put invName in the slot for first argument
setArgInActRec(ar, 0, uint64_t(invName), BitwiseKindOfString);
// Put argArray in the slot for second argument
setArgInActRec(ar, 1, uint64_t(argArray), KindOfArray);
// Fix up ActRec's numArgs
ar->initNumArgs(2);
return 1;
}
/*
* The standard VMRegAnchor treatment won't work for some cases called
* during function preludes.
*
* The fp sync machinery is fundamentally based on the notion that
* instruction pointers in the TC are uniquely associated with source
* HHBC instructions, and that source HHBC instructions are in turn
* uniquely associated with SP->FP deltas.
*
* trimExtraArgs is called from the prologue of the callee.
* The prologue is 1) still in the caller frame for now,
* and 2) shared across multiple call sites. 1 means that we have the
* fp from the caller's frame, and 2 means that this fp is not enough
* to figure out sp.
*
* However, the prologue passes us the callee actRec, whose predecessor
* has to be the caller. So we can sync sp and fp by ourselves here.
* Geronimo!
*/
static void sync_regstate_to_caller(ActRec* preLive) {
assert(tl_regState == REGSTATE_DIRTY);
VMExecutionContext* ec = g_vmContext;
ec->m_stack.top() = (TypedValue*)preLive - preLive->numArgs();
ActRec* fp = preLive == ec->m_firstAR ?
ec->m_nestedVMs.back().m_savedState.fp : (ActRec*)preLive->m_savedRbp;
ec->m_fp = fp;
ec->m_pc = fp->m_func->unit()->at(fp->m_func->base() + preLive->m_soff);
tl_regState = REGSTATE_CLEAN;
}
void
TranslatorX64::trimExtraArgs(ActRec* ar) {
assert(!ar->hasInvName());
sync_regstate_to_caller(ar);
const Func* f = ar->m_func;
int numParams = f->numParams();
int numArgs = ar->numArgs();
assert(numArgs > numParams);
int numExtra = numArgs - numParams;
TRACE(1, "trimExtraArgs: %d args, function %s takes only %d, ar %p\n",
numArgs, f->name()->data(), numParams, ar);
if (f->attrs() & AttrMayUseVV) {
assert(!ar->hasExtraArgs());
ar->setExtraArgs(ExtraArgs::allocateCopy(
(TypedValue*)(uintptr_t(ar) - numArgs * sizeof(TypedValue)),
numArgs - numParams));
} else {
// Function is not marked as "MayUseVV", so discard the extra arguments
TypedValue* tv = (TypedValue*)(uintptr_t(ar) - numArgs*sizeof(TypedValue));
for (int i = 0; i < numExtra; ++i) {
tvRefcountedDecRef(tv);
++tv;
}
ar->setNumArgs(numParams);
}
// Only go back to dirty in a non-exception case. (Same reason as
// above.)
tl_regState = REGSTATE_DIRTY;
}
TCA
TranslatorX64::getCallArrayProlog(Func* func) {
TCA tca = func->getFuncBody();
if (tca != (TCA)funcBodyHelperThunk) return tca;
int numParams = func->numParams();
std::vector<std::pair<int,Offset> > dvs;
for (int i = 0; i < numParams; ++i) {
const Func::ParamInfo& pi = func->params()[i];
if (pi.hasDefaultValue()) {
dvs.push_back(std::make_pair(i, pi.funcletOff()));
}
}
if (dvs.size()) {
LeaseHolder writer(s_writeLease);
if (!writer) return nullptr;
tca = func->getFuncBody();
if (tca != (TCA)funcBodyHelperThunk) return tca;
tca = a.code.frontier;
if (dvs.size() == 1) {
a. cmp_imm32_disp_reg32(dvs[0].first,
AROFF(m_numArgsAndCtorFlag), rVmFp);
emitBindJcc(a, CC_LE, SrcKey(func, dvs[0].second));
emitBindJmp(a, SrcKey(func, func->base()));
} else {
a. load_reg64_disp_reg32(rVmFp, AROFF(m_numArgsAndCtorFlag), rax);
for (unsigned i = 0; i < dvs.size(); i++) {
a. cmp_imm32_reg32(dvs[i].first, rax);
emitBindJcc(a, CC_LE, SrcKey(func, dvs[i].second));
}
emitBindJmp(a, SrcKey(func, func->base()));
}
} else {
SrcKey sk(func, func->base());
tca = tx64->getTranslation(TranslArgs(sk, false));
}
return tca;
}
TCA
TranslatorX64::emitPrologueRedispatch(X64Assembler& a) {
TCA retval;
moveToAlign(a);
retval = a.code.frontier;
TRACE(1, "HOTSTUB: emitPrologueRedispatch: %lx\n", uintptr_t(a.code.frontier));
// We're in the wrong func prologue.
assert(kScratchCrossTraceRegs.contains(rax));
assert(kScratchCrossTraceRegs.contains(rdx));
assert(kScratchCrossTraceRegs.contains(rcx));
// Get the called func in rax
a. load_reg64_disp_reg64(rStashedAR, AROFF(m_func), rax);
// Get the number of passed parameters in rdx
a. load_reg64_disp_reg32(rStashedAR, AROFF(m_numArgsAndCtorFlag), rdx);
a. and_imm32_reg32(0x7fffffff, rdx);
// Get the number of declared parameters in rcx
a. load_reg64_disp_reg32(rax, Func::numParamsOff(), rcx);
// If we didn't pass too many args, directly dereference
// func->m_prologues.
a. cmp_reg32_reg32(rdx, rcx);
TCA bToFixedProloguesCheck = a.code.frontier;
a. jcc8(CC_L, bToFixedProloguesCheck);
// cmp $kNumFixedPrologues, %rdx
// jl numParamsCheck
TCA actualDispatch = a.code.frontier;
// rcx: prologueIdx
// rax = func->prologues[numParams]
// jmp rax
a. loadq (rax[rdx*8 + Func::prologueTableOff()], rax);
a. jmp (rax);
a. ud2 ();
// Hmm, more parameters passed than the function expected. Did we pass
// kNumFixedPrologues or more? If not, %rdx is still a perfectly
// legitimate index into the func prologue table.
// numParamsCheck:
// cmp $kNumFixedPrologues, %rcx
// jl dispatch
a.patchJcc8(bToFixedProloguesCheck, a.code.frontier); // numParamsCheck:
a. cmp_imm32_reg32(kNumFixedPrologues, rdx);
a. jcc8(CC_L, actualDispatch);
// Too many gosh-darned parameters passed. Go to numExpected + 1, which
// is always a "too many params" entry point.
//
// mov %rdx, %rcx
// add $1, %rcx
// jmp dispatch
a. load_reg64_disp_index_reg64(rax,
// %rcx + 1
Func::prologueTableOff() + sizeof(TCA),
rcx,
rax);
a. jmp(rax);
a. ud2();
return retval;
}
// The funcGuard gets skipped and patched by other code, so we have some
// magic offsets.
static const int kFuncMovImm = 6; // Offset to the immediate for 8 byte Func*
static const int kFuncCmpImm = 4; // Offset to the immediate for 4 byte Func*
static const int kFuncGuardLen = 23;
static const int kFuncGuardShortLen = 14;
template<typename T>
static T*
funcPrologToGuardImm(TCA prolog) {
assert(sizeof(T) == 4 || sizeof(T) == 8);
T* retval = (T*)(prolog - (sizeof(T) == 8 ?
kFuncGuardLen - kFuncMovImm :
kFuncGuardShortLen - kFuncCmpImm));
// We padded these so the immediate would fit inside a cache line
assert(((uintptr_t(retval) ^ (uintptr_t(retval + 1) - 1)) &
~(kX64CacheLineSize - 1)) == 0);
return retval;
}
static inline bool
funcPrologHasGuard(TCA prolog, const Func* func) {
intptr_t iptr = uintptr_t(func);
if (deltaFits(iptr, sz::dword)) {
return *funcPrologToGuardImm<int32_t>(prolog) == iptr;
}
return *funcPrologToGuardImm<int64_t>(prolog) == iptr;
}
static TCA
funcPrologToGuard(TCA prolog, const Func* func) {
if (!prolog || prolog == (TCA)fcallHelperThunk) return prolog;
return prolog -
(deltaFits(uintptr_t(func), sz::dword) ?
kFuncGuardShortLen :
kFuncGuardLen);
}
TCA
TranslatorX64::emitFuncGuard(X64Assembler& a, const Func* func) {
assert(kScratchCrossTraceRegs.contains(rax));
assert(kScratchCrossTraceRegs.contains(rdx));
const int kAlign = kX64CacheLineSize;
const int kAlignMask = kAlign - 1;
int loBits = uintptr_t(a.code.frontier) & kAlignMask;
int delta, size;
// Ensure the immediate is safely smashable
// the immediate must not cross a qword boundary,
if (!deltaFits((intptr_t)func, sz::dword)) {
size = 8;
delta = loBits + kFuncMovImm;
} else {
size = 4;
delta = loBits + kFuncCmpImm;
}
delta = (delta + size - 1) & kAlignMask;
if (delta < size - 1) {
a.emitNop(size - 1 - delta);
}
TCA aStart DEBUG_ONLY = a.code.frontier;
if (!deltaFits((intptr_t)func, sz::dword)) {
a. load_reg64_disp_reg64(rStashedAR, AROFF(m_func), rax);
/*
Although func doesnt fit in a signed 32-bit immediate, it may still
fit in an unsigned one. Rather than deal with yet another case
(which only happens when we disable jemalloc) just force it to
be an 8-byte immediate, and patch it up afterwards.
*/
a. mov_imm64_reg(0xdeadbeeffeedface, rdx);
assert(((uint64_t*)a.code.frontier)[-1] == 0xdeadbeeffeedface);
((uint64_t*)a.code.frontier)[-1] = uintptr_t(func);
a. cmp_reg64_reg64(rax, rdx);
} else {
a. cmp_imm32_disp_reg32(uint64_t(func), AROFF(m_func), rStashedAR);
}
assert(m_funcPrologueRedispatch);
a. jnz(m_funcPrologueRedispatch);
assert(funcPrologToGuard(a.code.frontier, func) == aStart);
assert(funcPrologHasGuard(a.code.frontier, func));
return a.code.frontier;
}
/*
* funcPrologue --
*
* Given a callee and a number of args, match up to the callee's
* argument expectations and dispatch.
*
* Call/return hand-shaking is a bit funny initially. At translation time,
* we don't necessarily know what function we're calling. For instance,
*
* f(g());
*
* Will lead to a set of basic blocks like:
*
* b1: pushfuncd "f"
* pushfuncd "g"
* fcall
* b2: fcall
*
* The fcallc labelled "b2" above is not statically bindable in our
* execution model.
*
* We decouple the call work into a per-callsite portion, responsible
* for recording the return address, and a per-(callee, numArgs) portion,
* responsible for fixing up arguments and dispatching to remaining
* code. We call the per-callee portion a "prologue."
*
* Also, we are called from two distinct environments. From REQ_BIND_CALL,
* we're running "between" basic blocks, with all VM registers sync'ed.
* However, we're also called in the middle of basic blocks, when dropping
* entries into func->m_prologues. So don't go around using the
* translation-time values of vmfp()/vmsp(), since they have an
* unpredictable relationship to the source.
*/
bool
TranslatorX64::checkCachedPrologue(const Func* func, int paramIdx,
TCA& prologue) const {
prologue = (TCA)func->getPrologue(paramIdx);
if (prologue != (TCA)fcallHelperThunk && !s_replaceInFlight) {
TRACE(1, "cached prologue %s(%d) -> cached %p\n",
func->fullName()->data(), paramIdx, prologue);
assert(isValidCodeAddress(prologue));
return true;
}
return false;
}
// pops the return address pushed by fcall and stores it into the actrec
void
TranslatorX64::emitPopRetIntoActRec(Asm& a) {
a. pop (rStashedAR[AROFF(m_savedRip)]);
}
static void interp_set_regs(ActRec* ar, Cell* sp, Offset pcOff) {
assert(tl_regState == REGSTATE_DIRTY);
tl_regState = REGSTATE_CLEAN;
vmfp() = (Cell*)ar;
vmsp() = sp;
vmpc() = curUnit()->at(pcOff);
}
TCA
TranslatorX64::funcPrologue(Func* func, int nPassed, ActRec* ar) {
func->validate();
TRACE(1, "funcPrologue %s(%d)\n", func->fullName()->data(), nPassed);
int numParams = func->numParams();
int paramIndex = nPassed <= numParams ? nPassed : numParams + 1;
bool funcIsMagic = func->isMagic();
// Do a quick test before grabbing the write lease
TCA prologue;
if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
if (func->isClonedClosure()) {
assert(ar);
const Func::ParamInfoVec& paramInfo = func->params();
Offset entry = func->base();
for (int i = nPassed; i < numParams; ++i) {
const Func::ParamInfo& pi = paramInfo[i];
if (pi.hasDefaultValue()) {
entry = pi.funcletOff();
break;
}
}
interp_set_regs(ar, (Cell*)ar - func->numSlotsInFrame(), entry);
SrcKey funcBody(func, entry);
TCA tca = getTranslation(TranslArgs(funcBody, false));
tl_regState = REGSTATE_DIRTY;
if (tca) {
// racy, but ok...
func->setPrologue(paramIndex, tca);
}
return tca;
}
// If the translator is getting replaced out from under us, refuse to
// provide a prologue; we don't know whether this request is running on the
// old or new context.
LeaseHolder writer(s_writeLease);
if (!writer || s_replaceInFlight) return nullptr;
// Double check the prologue array now that we have the write lease
// in case another thread snuck in and set the prologue already.
if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
AHotSelector ahs(this, func->attrs() & AttrHot);
SpaceRecorder sr("_FuncPrologue", a);
// If we're close to a cache line boundary, just burn some space to
// try to keep the func and its body on fewer total lines.
if (((uintptr_t)a.code.frontier & kX64CacheLineMask) >= 32) {
moveToAlign(a, kX64CacheLineSize);
}
// Careful: this isn't necessarily the real entry point. For funcIsMagic
// prologues, this is just a possible prologue.
TCA aStart = a.code.frontier;
TCA start = aStart;
TCA stubStart = astubs.code.frontier;
// Guard: we're in the right callee. This happens in magicStart for
// magic callees.
if (!funcIsMagic) {
start = aStart = emitFuncGuard(a, func);
}
emitRB(a, RBTypeFuncPrologueTry, func->fullName()->data());
// NB: We have most of the register file to play with, since we know
// we're between BB's. So, we hardcode some registers here rather
// than using the scratch allocator.
TRACE(2, "funcPrologue: user function: %s\n", func->name()->data());
// Add a counter for the translation if requested
if (RuntimeOption::EvalJitTransCounters) {
emitTransCounterInc(a);
}
if (!funcIsMagic) {
emitPopRetIntoActRec(a);
// entry point for magic methods comes later
emitRB(a, RBTypeFuncEntry, func->fullName()->data());
/*
* Guard: we have stack enough stack space to complete this
* function. We omit overflow checks if it is a leaf function
* that can't use more than kStackCheckLeafPadding cells.
*/
auto const needStackCheck =
!(func->attrs() & AttrPhpLeafFn) ||
func->maxStackCells() >= kStackCheckLeafPadding;
if (needStackCheck) {
emitStackCheck(cellsToBytes(func->maxStackCells()), func->base());
}
}
SrcKey skFuncBody = emitPrologue(func, nPassed);
if (funcIsMagic) {
// entry points for magic methods is here
TCA magicStart = emitFuncGuard(a, func);
emitPopRetIntoActRec(a);
emitRB(a, RBTypeFuncEntry, func->fullName()->data());
// Guard: we have stack enough stack space to complete this function.
emitStackCheck(cellsToBytes(func->maxStackCells()), func->base());
assert(numParams == 2);
// Special __call prologue
a. mov_reg64_reg64(rStashedAR, argNumToRegName[0]);
emitCall(a, TCA(TranslatorX64::shuffleArgsForMagicCall));
// if shuffleArgs returns 0, that means this was not a magic call
// and we should proceed to a prologue specialized for nPassed;
// otherwise, proceed to a prologue specialized for nPassed==numParams (2).
if (nPassed == 2) {
a.jmp(start);
} else {
a.test_reg64_reg64(rax, rax);
// z ==> not a magic call, go to prologue for nPassed
if (deltaFits(start - (a.code.frontier + kJcc8Len), sz::byte)) {
a.jcc8(CC_Z, start);
} else {
a.jcc(CC_Z, start);
}
// this was a magic call
// nPassed == 2
// Fix up hardware stack pointer
nPassed = 2;
emitLea(a, rStashedAR, -cellsToBytes(nPassed), rVmSp);
// Optimization TODO: Reuse the prologue for args == 2
emitPrologue(func, nPassed);
}
start = magicStart;
}
assert(funcPrologHasGuard(start, func));
TRACE(2, "funcPrologue tx64 %p %s(%d) setting prologue %p\n",
this, func->fullName()->data(), nPassed, start);
assert(isValidCodeAddress(start));
func->setPrologue(paramIndex, start);
addTranslation(TransRec(skFuncBody, func->unit()->md5(),
TransProlog, aStart, a.code.frontier - aStart,
stubStart, astubs.code.frontier - stubStart));
recordGdbTranslation(skFuncBody, func,
a, aStart,
false, true);
recordBCInstr(OpFuncPrologue, a, start);
return start;
}
static void raiseMissingArgument(const char* name, int expected, int got) {
if (expected == 1) {
raise_warning(Strings::MISSING_ARGUMENT, name, got);
} else {
raise_warning(Strings::MISSING_ARGUMENTS, name, expected, got);
}
}
SrcKey
TranslatorX64::emitPrologue(Func* func, int nPassed) {
int numParams = func->numParams();
const Func::ParamInfoVec& paramInfo = func->params();
Offset dvInitializer = InvalidAbsoluteOffset;
assert(IMPLIES(func->isGenerator(), nPassed == numParams));
if (nPassed > numParams) {
// Too many args; a weird case, so just callout. Stash ar
// somewhere callee-saved.
if (false) { // typecheck
TranslatorX64::trimExtraArgs((ActRec*)nullptr);
}
a. mov_reg64_reg64(rStashedAR, argNumToRegName[0]);
emitCall(a, TCA(TranslatorX64::trimExtraArgs));
// We'll fix rVmSp below.
} else if (nPassed < numParams) {
// Figure out which, if any, default value initializer to go to
for (int i = nPassed; i < numParams; ++i) {
const Func::ParamInfo& pi = paramInfo[i];
if (pi.hasDefaultValue()) {
dvInitializer = pi.funcletOff();
break;
}
}
TRACE(1, "Only have %d of %d args; getting dvFunclet\n",
nPassed, numParams);
emitImmReg(a, nPassed, rax);
// do { *(--rVmSp) = NULL; nPassed++; } while (nPassed < numParams);
// This should be an unusual case, so optimize for code density
// rather than execution speed; i.e., don't unroll the loop.
TCA loopTop = a.code.frontier;
a. sub_imm32_reg64(sizeof(Cell), rVmSp);
a. incl(eax);
emitStoreUninitNull(a, 0, rVmSp);
a. cmp_imm32_reg32(numParams, rax);
a. jcc8(CC_L, loopTop);
}
// Entry point for numParams == nPassed is here.
// Args are kosher. Frame linkage: set fp = ar.
a. mov_reg64_reg64(rStashedAR, rVmFp);
int numLocals = numParams;
if (func->isClosureBody()) {
int numUseVars = func->cls()->numDeclProperties();
emitLea(a, rVmFp, -cellsToBytes(numParams), rVmSp);
PhysReg rClosure = rcx;
a. loadq(rVmFp[AROFF(m_this)], rClosure);
// Swap in the $this or late bound class
a. loadq(rClosure[c_Closure::thisOffset()], rAsm);
a. storeq(rAsm, rVmFp[AROFF(m_this)]);
a. shrq(1, rAsm);
if (func->attrs() & AttrStatic) {
UnlikelyIfBlock ifRealThis(CC_NBE, a, astubs);
astubs.shlq(1, rAsm);
emitIncRef(astubs, rAsm, KindOfObject);
} else {
JccBlock<CC_BE> ifRealThis(a);
a.shlq(1, rAsm);
emitIncRef(rAsm, KindOfObject);
}
// Put in the correct context
a. loadq(rClosure[c_Closure::funcOffset()], rAsm);
a. storeq(rAsm, rVmFp[AROFF(m_func)]);
// Copy in all the use vars
int baseUVOffset = sizeof(ObjectData) + func->cls()->builtinPropSize();
for (int i = 0; i < numUseVars + 1; i++) {
int spOffset = -cellsToBytes(i+1);
if (i == 0) {
// The closure is the first local.
// We don't incref because it used to be $this
// and now it is a local, so they cancel out
emitStoreTypedValue(a, KindOfObject, rClosure, spOffset, rVmSp);
continue;
}
int uvOffset = baseUVOffset + cellsToBytes(i-1);
emitCopyTo(a, rClosure, uvOffset, rVmSp, spOffset, rAsm);
emitIncRefGenericRegSafe(rVmSp, spOffset, rAsm);
}
numLocals += numUseVars + 1;
}
// We're in the callee frame; initialize locals. Unroll the loop all
// the way if there are a modest number of locals to update;
// otherwise, do it in a compact loop. If we're in a generator body,
// named locals will be initialized by UnpackCont so we can leave
// them alone here.
int numUninitLocals = func->numLocals() - numLocals;
assert(numUninitLocals >= 0);
if (numUninitLocals > 0 && !func->isGenerator()) {
SpaceRecorder sr("_InitializeLocals", a);
// If there are too many locals, then emitting a loop to initialize locals
// is more compact, rather than emitting a slew of movs inline.
if (numUninitLocals > kLocalsToInitializeInline) {
PhysReg loopReg = rcx;
// rVmFp + rcx points to the count/type fields of the TypedValue we're
// about to write to.
int loopStart = -func->numLocals() * sizeof(TypedValue) + TVOFF(m_type);
int loopEnd = -numLocals * sizeof(TypedValue) + TVOFF(m_type);
emitImmReg(a, loopStart, loopReg);
emitImmReg(a, KindOfUninit, rdx);
TCA topOfLoop = a.code.frontier;
// do {
// rVmFp[loopReg].m_type = KindOfUninit;
// } while(++loopReg != loopEnd);
emitStoreTVType(a, edx, rVmFp[loopReg]);
a. addq (sizeof(Cell), loopReg);
a. cmpq (loopEnd, loopReg);
a. jcc8 (CC_NE, topOfLoop);
} else {
PhysReg base;
int disp, k;
static_assert(KindOfUninit == 0, "");
if (numParams < func->numLocals()) {
a.xorl (eax, eax);
}
for (k = numLocals; k < func->numLocals(); ++k) {
locToRegDisp(Location(Location::Local, k), &base, &disp, func);
emitStoreTVType(a, eax, base[disp + TVOFF(m_type)]);
}
}
}
const Opcode* destPC = func->unit()->entry() + func->base();
if (dvInitializer != InvalidAbsoluteOffset) {
// dispatch to funclet.
destPC = func->unit()->entry() + dvInitializer;
}
SrcKey funcBody(func, destPC);
// Move rVmSp to the right place: just past all locals
int frameCells = func->numSlotsInFrame();
if (func->isGenerator()) {
frameCells = 0;
} else {
emitLea(a, rVmFp, -cellsToBytes(frameCells), rVmSp);
}
Fixup fixup(funcBody.offset() - func->base(), frameCells);
// Emit warnings for any missing arguments
if (!func->info()) {
for (int i = nPassed; i < numParams; ++i) {
if (paramInfo[i].funcletOff() == InvalidAbsoluteOffset) {
emitImmReg(a, (intptr_t)func->name()->data(), argNumToRegName[0]);
emitImmReg(a, numParams, argNumToRegName[1]);
emitImmReg(a, i, argNumToRegName[2]);
emitCall(a, (TCA)raiseMissingArgument);
m_fixupMap.recordFixup(a.code.frontier, fixup);
}
}
}
// Check surprise flags in the same place as the interpreter: after
// setting up the callee's frame but before executing any of its
// code
emitCheckSurpriseFlagsEnter(false, fixup);
if (func->isClosureBody() && func->cls()) {
int entry = nPassed <= numParams ? nPassed : numParams + 1;
// Relying on rStashedAR == rVmFp here
a. loadq (rStashedAR[AROFF(m_func)], rax);
a. loadq (rax[Func::prologueTableOff() + sizeof(TCA)*entry], rax);
a. jmp (rax);
} else {
emitBindJmp(funcBody);
}
return funcBody;
}
static bool
isNativeImplCall(const Func* funcd, int numArgs) {
return funcd && funcd->info() && numArgs == funcd->numParams();
}
int32_t // returns the amount by which rVmSp should be adjusted
TranslatorX64::emitBindCall(SrcKey srcKey, const Func* funcd, int numArgs) {
// If this is a call to a builtin and we don't need any argument
// munging, we can skip the prologue system and do it inline.
if (isNativeImplCall(funcd, numArgs)) {
StoreImmPatcher patchIP(a, (uint64_t)a.code.frontier, reg::rax,
cellsToBytes(numArgs) + AROFF(m_savedRip),
rVmSp);
assert(funcd->numLocals() == funcd->numParams());
assert(funcd->numIterators() == 0);
emitLea(a, rVmSp, cellsToBytes(numArgs), rVmFp);
emitCheckSurpriseFlagsEnter(true, Fixup(0, numArgs));
// rVmSp is already correctly adjusted, because there's no locals
// other than the arguments passed.
auto retval = emitNativeImpl(funcd, false /* don't jump to return */);
patchIP.patch(uint64_t(a.code.frontier));
return retval;
}
if (debug) {
a. storeq (kUninitializedRIP,
rVmSp[cellsToBytes(numArgs) + AROFF(m_savedRip)]);
}
// Stash callee's rVmFp into rStashedAR for the callee's prologue
emitLea(a, rVmSp, cellsToBytes(numArgs), rStashedAR);
emitBindCallHelper(srcKey, funcd, numArgs);
return 0;
}
void
TranslatorX64::emitBindCallHelper(SrcKey srcKey,
const Func* funcd,
int numArgs) {
// Whatever prologue we're branching to will check at runtime that we
// went to the right Func*, correcting if necessary. We treat the first
// Func we encounter as a decent prediction. Make space to burn in a
// TCA.
ReqBindCall* req = m_globalData.alloc<ReqBindCall>();
prepareForSmash(a, kCallLen);
TCA toSmash = a.code.frontier;
a. call(astubs.code.frontier);
astubs. mov_reg64_reg64(rStashedAR, serviceReqArgRegs[1]);
emitPopRetIntoActRec(astubs);
emitServiceReq(SRFlags::Persistent, REQ_BIND_CALL, 1ull, req);
TRACE(1, "will bind static call: tca %p, this %p, funcd %p, astubs %p\n",
toSmash, this, funcd, astubs.code.frontier);
req->m_toSmash = toSmash;
req->m_nArgs = numArgs;
req->m_sourceInstr = srcKey;
req->m_isImmutable = (bool)funcd;
return;
}
/*
* NativeImpl is a special operation in the sense that it must be the
* only opcode in a function body, and also functions as the return.
*
* if emitSavedRIPReturn is false, it returns the amount by which
* rVmSp should be adjusted, otherwise, it emits code to perform
* the adjustment (this allows us to combine updates to rVmSp)
*/
int32_t TranslatorX64::emitNativeImpl(const Func* func,
bool emitSavedRIPReturn) {
BuiltinFunction builtinFuncPtr = func->builtinFuncPtr();
if (false) { // typecheck
ActRec* ar = nullptr;
builtinFuncPtr(ar);
}
TRACE(2, "calling builtin preClass %p func %p\n", func->preClass(),
builtinFuncPtr);
/*
* Call the native implementation. This will free the locals for us in the
* normal case. In the case where an exception is thrown, the VM unwinder
* will handle it for us.
*/
a. mov_reg64_reg64(rVmFp, argNumToRegName[0]);
if (eagerRecord(func)) {
emitEagerSyncPoint(a, func->getEntry(), 0);
}
emitCall(a, (TCA)builtinFuncPtr);
/*
* We're sometimes calling this while curFunc() isn't really the
* builtin---make sure to properly record the sync point as if we
* are inside the builtin.
*
* The assumption here is that for builtins, the generated func
* contains only a single opcode (NativeImpl), and there are no
* non-argument locals.
*/
assert(func->numIterators() == 0 && func->isBuiltin());
assert(func->numLocals() == func->numParams());
assert(*func->getEntry() == OpNativeImpl);
assert(instrLen(func->getEntry()) == func->past() - func->base());
Offset pcOffset = 0; // NativeImpl is the only instruction in the func
Offset stackOff = func->numLocals(); // Builtin stubs have no
// non-arg locals
recordSyncPoint(a, pcOffset, stackOff);
if (emitSavedRIPReturn) {
// push the return address to get ready to ret.
a. push (rVmFp[AROFF(m_savedRip)]);
}
/*
* The native implementation already put the return value on the
* stack for us, and handled cleaning up the arguments. We have to
* update the frame pointer and the stack pointer, and load the
* return value into the return register so the trace we are
* returning to has it where it expects.
*
* TODO(#1273094): we should probably modify the actual builtins to
* return values via registers (rax:edx) using the C ABI and do a
* reg-to-reg move.
*/
int nLocalCells = func->numSlotsInFrame();
if (emitSavedRIPReturn) {
a. add_imm64_reg64(sizeof(ActRec) + cellsToBytes(nLocalCells-1), rVmSp);
}
a. load_reg64_disp_reg64(rVmFp, AROFF(m_savedRbp), rVmFp);
emitRB(a, RBTypeFuncExit, func->fullName()->data());
if (emitSavedRIPReturn) {
a. ret();
translator_not_reached(a);
return 0;
}
return sizeof(ActRec) + cellsToBytes(nLocalCells-1);
}
// for documentation see bindJmpccFirst below
void
TranslatorX64::emitCondJmp(SrcKey skTaken, SrcKey skNotTaken,
ConditionCode cc) {
// should be true for SrcKeys generated via OpJmpZ/OpJmpNZ
assert(skTaken.getFuncId() == skNotTaken.getFuncId());
// reserve space for a smashable jnz/jmp pair; both initially point
// to our stub.
prepareForTestAndSmash(a, 0, kAlignJccAndJmp);
TCA old = a.code.frontier;
TCA stub = astubs.code.frontier;
// begin code for the stub
// We need to be careful here, as we are passing an extra paramter to
// REQ_BIND_JMPCC_FIRST. However we can't pass this parameter via
// emitServiceReq because that only supports constants/immediates, so
// compute the last argument via setcc.
astubs.setcc(cc, rbyte(serviceReqArgRegs[4]));
emitServiceReq(SRFlags::Persistent, REQ_BIND_JMPCC_FIRST, 4ull,
old,
uint64_t(skTaken.offset()),
uint64_t(skNotTaken.offset()),
uint64_t(cc));
a.jcc(cc, stub); // MUST use 4-byte immediate form
a.jmp(stub); // MUST use 4-byte immediate form
}
/*
* bindJmp --
*
* Runtime service handler that patches a jmp to the translation of
* u:dest from toSmash.
*/
TCA
TranslatorX64::bindJmp(TCA toSmash, SrcKey destSk,
ServiceRequest req, bool& smashed) {
TCA tDest = getTranslation(
TranslArgs(destSk, false).interp(req == REQ_BIND_JMP_NO_IR)
.src(toSmash));
if (!tDest) return nullptr;
LeaseHolder writer(s_writeLease);
if (!writer) return tDest;
smashed = true;
SrcRec* sr = getSrcRec(destSk);
if (req == REQ_BIND_ADDR) {
sr->chainFrom(IncomingBranch::addr(reinterpret_cast<TCA*>(toSmash)));
} else if (req == REQ_BIND_JCC) {
sr->chainFrom(IncomingBranch::jccFrom(toSmash));
} else {
sr->chainFrom(IncomingBranch::jmpFrom(toSmash));
}
return tDest;
}
/*
* When we end a tracelet with a conditional jump, emitCondJmp first emits:
*
* 1: j<CC> stubJmpccFirst
* jmp stubJmpccFirst
*
* Our "taken" argument tells us whether the branch at 1: was taken or
* not; and therefore which of offTaken and offNotTaken to continue executing.
* If we did take the branch, we now rewrite the code so that the branch is
* straightened. This predicts that subsequent executions will go the same way
* as the first execution.
*
* jn<CC> stubJmpccSecond:offNotTaken
* nop5 ; fallthru, or jmp if there's already a translation.
* offTaken:
*
* If we did not take the branch, we leave the sense of the condition
* intact, while patching it up to go to the unexplored code:
*
* j<CC> stubJmpccSecond:offTaken
* nop5
* offNotTaken:
*/
TCA
TranslatorX64::bindJmpccFirst(TCA toSmash,
Offset offTaken, Offset offNotTaken,
bool taken,
ConditionCode cc,
bool& smashed) {
const Func* f = curFunc();
LeaseHolder writer(s_writeLease);
if (!writer) return nullptr;
Offset offWillExplore = taken ? offTaken : offNotTaken;
Offset offWillDefer = taken ? offNotTaken : offTaken;
SrcKey dest(f, offWillExplore);
TRACE(3, "bindJmpccFirst: explored %d, will defer %d; overwriting cc%02x "
"taken %d\n",
offWillExplore, offWillDefer, cc, taken);
// We want the branch to point to whichever side has not been explored
// yet.
if (taken) cc = ccNegate(cc);
TCA stub =
emitServiceReq(SRFlags::None, REQ_BIND_JMPCC_SECOND, 3,
toSmash, uint64_t(offWillDefer), uint64_t(cc));
Asm& as = getAsmFor(toSmash);
// Its not clear where chainFrom should go to if as is astubs
assert(&as != &astubs);
// can we just directly fall through?
// a jmp + jz takes 5 + 6 = 11 bytes
bool fallThru = toSmash + kJmpccLen + kJmpLen == as.code.frontier &&
!m_srcDB.find(dest);
TCA tDest;
tDest = getTranslation(TranslArgs(dest, !fallThru).src(toSmash));
if (!tDest) {
return 0;
}
smashed = true;
assert(s_writeLease.amOwner());
/*
* Roll over the jcc and the jmp/fallthru. E.g., from:
*
* toSmash: jcc <jmpccFirstStub>
* toSmash+6: jmp <jmpccFirstStub>
* toSmash+11: <probably the new translation == tdest>
*
* to:
*
* toSmash: j[n]z <jmpccSecondStub>
* toSmash+6: nop5
* toSmash+11: newHotness
*/
CodeCursor cg(as, toSmash);
as.jcc(cc, stub);
getSrcRec(dest)->chainFrom(IncomingBranch::jmpFrom(as.code.frontier));
TRACE(5, "bindJmpccFirst: overwrote with cc%02x taken %d\n", cc, taken);
return tDest;
}
// smashes a jcc to point to a new destination
TCA
TranslatorX64::bindJmpccSecond(TCA toSmash, const Offset off,
ConditionCode cc, bool& smashed) {
const Func* f = curFunc();
SrcKey dest(f, off);
TCA branch = getTranslation(TranslArgs(dest, true).src(toSmash));
LeaseHolder writer(s_writeLease, NO_ACQUIRE);
if (branch && writer.acquire()) {
smashed = true;
SrcRec* destRec = getSrcRec(dest);
destRec->chainFrom(IncomingBranch::jccFrom(toSmash));
}
return branch;
}
static void emitJmpOrJcc(X64Assembler& a, ConditionCode cc, TCA addr) {
if (cc == CC_None) {
a. jmp(addr);
} else {
a. jcc((ConditionCode)cc, addr);
}
}
/*
* emitBindJ --
*
* Emit code to lazily branch (optionally on condition cc) to the
* srckey in next.
* Assumes current basic block is closed (outputs synced, etc.).
*/
void
TranslatorX64::emitBindJ(X64Assembler& _a, ConditionCode cc,
SrcKey dest, ServiceRequest req) {
prepareForSmash(_a, cc == CC_None ? (int)kJmpLen : kJmpccLen);
TCA toSmash = _a.code.frontier;
if (&_a == &astubs) {
emitJmpOrJcc(_a, cc, toSmash);
}
TCA sr = emitServiceReq(SRFlags::None, req, 2,
toSmash, uint64_t(dest.offset()));
if (&_a == &astubs) {
CodeCursor cursor(_a, toSmash);
emitJmpOrJcc(_a, cc, sr);
} else {
emitJmpOrJcc(_a, cc, sr);
}
}
void
TranslatorX64::emitBindJcc(X64Assembler& _a, ConditionCode cc,
SrcKey dest,
ServiceRequest req /* = REQ_BIND_JCC */) {
emitBindJ(_a, cc, dest, req);
}
void
TranslatorX64::emitBindJmp(X64Assembler& _a,
SrcKey dest,
ServiceRequest req /* = REQ_BIND_JMP */) {
emitBindJ(_a, CC_None, dest, req);
}
void
TranslatorX64::emitBindJmp(SrcKey dest) {
emitBindJmp(a, dest);
}
void
TranslatorX64::checkType(X64Assembler& a,
const Location& l,
const RuntimeType& rtt,
SrcRec& fail) {
// We can get invalid inputs as a side effect of reading invalid
// items out of BBs we truncate; they don't need guards.
if (rtt.isVagueValue() || l.isThis()) return;
irCheckType(a, l, rtt, fail);
}
void
TranslatorX64::emitFallbackJmp(SrcRec& dest, ConditionCode cc /* = CC_NZ */) {
emitFallbackJmp(a, dest, cc);
}
void
TranslatorX64::emitFallbackJmp(Asm& as, SrcRec& dest,
ConditionCode cc /* = CC_NZ */) {
prepareForSmash(as, kJmpccLen);
dest.emitFallbackJump(as.code.frontier, cc);
}
void
TranslatorX64::emitFallbackUncondJmp(Asm& as, SrcRec& dest) {
prepareForSmash(as, kJmpLen);
dest.emitFallbackJump(as.code.frontier);
}
void
TranslatorX64::emitFallbackCondJmp(Asm& as, SrcRec& dest, ConditionCode cc) {
prepareForSmash(as, kJmpccLen);
dest.emitFallbackJump(as.code.frontier, cc);
}
void TranslatorX64::emitReqRetransNoIR(Asm& as, const SrcKey& sk) {
prepareForSmash(as, kJmpLen);
TCA toSmash = as.code.frontier;
if (&as == &astubs) {
as.jmp(toSmash);
}
TCA sr = emitServiceReq(REQ_RETRANSLATE_NO_IR, 2,
toSmash, sk.offset());
if (&as == &astubs) {
CodeCursor cc(as, toSmash);
as.jmp(sr);
} else {
as.jmp(sr);
}
}
uint64_t TranslatorX64::packBitVec(const vector<bool>& bits, unsigned i) {
uint64_t retval = 0;
assert(i % 64 == 0);
assert(i < bits.size());
while (i < bits.size()) {
retval |= bits[i] << (i % 64);
if ((++i % 64) == 0) {
break;
}
}
return retval;
}
void
TranslatorX64::checkRefs(X64Assembler& a,
SrcKey sk,
const RefDeps& refDeps,
SrcRec& fail) {
if (refDeps.size() == 0) {
return;
}
// Set up guards for each pushed ActRec that we've made reffiness
// assumptions about
for (RefDeps::ArMap::const_iterator it = refDeps.m_arMap.begin();
it != refDeps.m_arMap.end(); ++it) {
// Be careful! The actual Func might have fewer refs than the number
// of args we're passing. To forestall this, we're going to have to
// keep checking i against the number of params. We consider invocations
// with too many arguments to have passed their checks.
int entryArDelta = it->first;
m_hhbcTrans->guardRefs(entryArDelta,
it->second.m_mask,
it->second.m_vals);
}
}
/*
* emitRetFromInterpretedFrame --
*
* When the interpreter pushes a call frame, there is necessarily no
* machine RIP available to return to. This helper fishes out the
* destination from the frame and redirects execution to it via enterTC.
*/
TCA
TranslatorX64::emitRetFromInterpretedFrame() {
int32_t arBase = sizeof(ActRec) - sizeof(Cell);
moveToAlign(astubs);
TCA stub = astubs.code.frontier;
// Marshall our own args by hand here.
astubs. lea (rVmSp[-arBase], serviceReqArgRegs[0]);
astubs. movq (rVmFp, serviceReqArgRegs[1]);
(void) emitServiceReq(SRFlags::Persistent | SRFlags::JmpInsteadOfRet,
REQ_POST_INTERP_RET, 0ull);
return stub;
}
/*
* Same as above, except has different logic for fetching the AR we are trying
* to return from, because generators have ARs in different places.
*/
TCA
TranslatorX64::emitRetFromInterpretedGeneratorFrame() {
// We have to get the Continuation object from the current AR's $this, then
// find where its embedded AR is.
moveToAlign(astubs);
TCA stub = astubs.code.frontier;
PhysReg rContAR = serviceReqArgRegs[0];
astubs. loadq (rVmFp[AROFF(m_this)], rContAR);
astubs. loadq (rContAR[CONTOFF(m_arPtr)], rContAR);
astubs. movq (rVmFp, serviceReqArgRegs[1]);
(void) emitServiceReq(SRFlags::Persistent | SRFlags::JmpInsteadOfRet,
REQ_POST_INTERP_RET, 0ull);
return stub;
}
class FreeRequestStubTrigger : public Treadmill::WorkItem {
TCA m_stub;
public:
explicit FreeRequestStubTrigger(TCA stub) : m_stub(stub) {
TRACE(3, "FreeStubTrigger @ %p, stub %p\n", this, m_stub);
}
virtual void operator()() {
TRACE(3, "FreeStubTrigger: Firing @ %p , stub %p\n", this, m_stub);
if (TranslatorX64::Get()->freeRequestStub(m_stub) != true) {
/* If we can't free the stub, enqueue again to retry */
enqueue(new FreeRequestStubTrigger(m_stub));
}
}
};
#ifdef DEBUG
struct DepthGuard {
static __thread int m_depth;
DepthGuard() { m_depth++; TRACE(2, "DepthGuard: %d {\n", m_depth); }
~DepthGuard() { TRACE(2, "DepthGuard: %d }\n", m_depth); m_depth--; }
bool depthOne() const { return m_depth == 1; }
};
__thread int DepthGuard::m_depth;
#else
struct DepthGuard { bool depthOne() const { return false; } };
#endif
/*
* enterTCHelper does not save callee-saved registers except %rbp. This means
* when we call it from C++, we have to tell gcc to clobber all the other
* callee-saved registers.
*/
#if defined(__x86_64__)
# define CALLEE_SAVED_BARRIER() \
asm volatile("" : : : "rbx", "r12", "r13", "r14", "r15")
#elif defined(__AARCH64EL__)
# define CALLEE_SAVED_BARRIER() \
asm volatile("" : : : "x19", "x20", "x21", "x22", "x23", "x24", "x25", \
"x26", "x27", "x28")
#else
# error What are the callee-saved registers on your system?
#endif
/*
* enterTCHelper is a handwritten assembly function that transfers control in
* and out of the TC.
*/
static_assert(rVmSp == rbx &&
rVmFp == rbp &&
rVmTl == r12 &&
rStashedAR == r15,
"__enterTCHelper needs to be modified to use the correct ABI");
static_assert(kReservedRSPScratchSpace == 0x280,
"enterTCHelper needs to be updated for changes to "
"kReservedRSPScratchSpace");
static_assert(REQ_BIND_CALL == 0x1,
"Update assembly test for REQ_BIND_CALL in __enterTCHelper");
extern "C" void enterTCHelper(Cell* vm_sp,
Cell* vm_fp,
TCA start,
TReqInfo* infoPtr,
ActRec* firstAR,
void* targetCacheBase);
struct TReqInfo {
uintptr_t requestNum;
uintptr_t args[5];
// Some TC registers need to be preserved across service requests.
uintptr_t saved_rStashedAr;
// Stub addresses are passed back to allow us to recycle used stubs.
TCA stubAddr;
};
void
TranslatorX64::enterTC(TCA start, void* data) {
using namespace TargetCache;
if (debug) {
fflush(stdout);
fflush(stderr);
}
DepthGuard d;
TReqInfo info;
SrcKey sk;
if (LIKELY(start != nullptr)) {
info.requestNum = data ? REQ_BIND_CALL : -1;
info.saved_rStashedAr = (uintptr_t)data;
} else {
info.requestNum = -1;
info.saved_rStashedAr = 0;
sk = *(SrcKey*)data;
start = getTranslation(TranslArgs(sk, true));
}
for (;;) {
assert(sizeof(Cell) == 16);
assert(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
assert(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
s_writeLease.gremlinUnlock();
// Keep dispatching until we end up somewhere the translator
// recognizes, or we luck out and the leaseholder exits.
while (!start) {
TRACE(2, "enterTC forwarding BB to interpreter\n");
g_vmContext->m_pc = curUnit()->at(sk.offset());
INC_TPC(interp_bb);
g_vmContext->dispatchBB();
PC newPc = g_vmContext->getPC();
if (!newPc) { g_vmContext->m_fp = 0; return; }
sk = SrcKey(curFunc(), newPc);
start = getTranslation(TranslArgs(sk, true));
}
assert(start == (TCA)HPHP::Transl::funcBodyHelperThunk ||
isValidCodeAddress(start) ||
(start == (TCA)HPHP::Transl::fcallHelperThunk &&
info.saved_rStashedAr == (uintptr_t)data));
assert(!s_writeLease.amOwner());
const Func* func = (vmfp() ? (ActRec*)vmfp() : (ActRec*)data)->m_func;
func->validate();
INC_TPC(enter_tc);
TRACE(1, "enterTC: %p fp%p(%s) sp%p enter {\n", start,
vmfp(), func->name()->data(), vmsp());
tl_regState = REGSTATE_DIRTY;
// We have to force C++ to spill anything that might be in a callee-saved
// register (aside from rbp). enterTCHelper does not save them.
CALLEE_SAVED_BARRIER();
enterTCHelper(vmsp(), vmfp(), start, &info, vmFirstAR(),
tl_targetCaches);
CALLEE_SAVED_BARRIER();
assert(g_vmContext->m_stack.isValidAddress((uintptr_t)vmsp()));
tl_regState = REGSTATE_CLEAN; // Careful: pc isn't sync'ed yet.
TRACE(1, "enterTC: %p fp%p sp%p } return\n", start,
vmfp(), vmsp());
if (debug) {
// Debugging code: cede the write lease half the time.
if (RuntimeOption::EvalJitStressLease) {
if (d.depthOne() == 1 && (rand() % 2) == 0) {
s_writeLease.gremlinLock();
}
}
// Ensure that each case either returns, or drives start to a valid
// value.
start = TCA(0xbee5face);
}
TRACE(2, "enterTC: request(%s) args: %" PRIx64 " %" PRIx64 " %"
PRIx64 " %" PRIx64 " %" PRIx64 "\n",
reqName(info.requestNum),
info.args[0], info.args[1], info.args[2], info.args[3],
info.args[4]);
if (LIKELY(info.requestNum == REQ_EXIT)) {
vmfp() = nullptr;
return;
}
if (!handleServiceRequest(info, start, sk)) return;
}
}
/*
* The contract is that each case will set sk to the place where
* execution should resume, and optionally set start to the hardware
* translation of the resumption point (or otherwise set it to null).
* Returns false if we need to halt this nesting of the VM.
*
* start and sk might be subtly different; i.e., there are cases where
* start != NULL && start != getTranslation(sk). For instance,
* REQ_BIND_CALL has not finished executing the OpCall when it gets
* here, and has even done some work on its behalf. sk == OpFCall,
* while start == the point in the TC that's "half-way through" the
* Call instruction. If we punt to the interpreter, the interpreter
* will redo some of the work that the translator has already done.
*/
bool TranslatorX64::handleServiceRequest(TReqInfo& info,
TCA& start,
SrcKey& sk) {
const uintptr_t& requestNum = info.requestNum;
auto* const args = info.args;
assert(requestNum != REQ_EXIT);
INC_TPC(service_req);
bool smashed = false;
switch (requestNum) {
case REQ_BIND_CALL: {
ReqBindCall* req = (ReqBindCall*)args[0];
ActRec* calleeFrame = (ActRec*)args[1];
TCA toSmash = req->m_toSmash;
Func *func = const_cast<Func*>(calleeFrame->m_func);
int nArgs = req->m_nArgs;
bool isImmutable = req->m_isImmutable;
TCA dest = tx64->funcPrologue(func, nArgs);
TRACE(2, "enterTC: bindCall %s -> %p\n", func->name()->data(), dest);
if (!isImmutable) {
// We dont know we're calling the right function, so adjust
// dest to point to the dynamic check of ar->m_func.
dest = funcPrologToGuard(dest, func);
} else {
TRACE(2, "enterTC: bindCall immutably %s -> %p\n",
func->fullName()->data(), dest);
}
LeaseHolder writer(s_writeLease, NO_ACQUIRE);
if (dest && writer.acquire()) {
TRACE(2, "enterTC: bindCall smash %p -> %p\n", toSmash, dest);
smashCall(tx64->getAsmFor(toSmash), toSmash, dest);
smashed = true;
// sk: stale, but doesn't matter since we have a valid dest TCA.
} else {
// We need translator help; we're not at the callee yet, so
// roll back. The prelude has done some work already, but it
// should be safe to redo.
TRACE(2, "enterTC: bindCall rollback smash %p -> %p\n",
toSmash, dest);
sk = req->m_sourceInstr;
}
start = dest;
if (!start) {
// EnterTCHelper pushes the return ip onto the stack when the
// requestNum is REQ_BIND_CALL, but if start is NULL, it will
// interpret in doFCall, so we clear out the requestNum in this
// case to prevent enterTCHelper from pushing the return ip
// onto the stack.
info.requestNum = ~REQ_BIND_CALL;
}
} break;
case REQ_BIND_SIDE_EXIT:
case REQ_BIND_JMP:
case REQ_BIND_JCC:
case REQ_BIND_JMP_NO_IR:
case REQ_BIND_ADDR:
{
TCA toSmash = (TCA)args[0];
Offset off = args[1];
sk = SrcKey(curFunc(), off);
if (requestNum == REQ_BIND_SIDE_EXIT) {
SKTRACE(3, sk, "side exit taken!\n");
}
start = bindJmp(toSmash, sk, (ServiceRequest)requestNum, smashed);
} break;
case REQ_BIND_JMPCC_FIRST: {
TCA toSmash = (TCA)args[0];
Offset offTaken = (Offset)args[1];
Offset offNotTaken = (Offset)args[2];
ConditionCode cc = ConditionCode(args[3]);
bool taken = int64_t(args[4]) & 1;
start = bindJmpccFirst(toSmash, offTaken, offNotTaken,
taken, cc, smashed);
// SrcKey: we basically need to emulate the fail
sk = SrcKey(curFunc(), taken ? offTaken : offNotTaken);
} break;
case REQ_BIND_JMPCC_SECOND: {
TCA toSmash = (TCA)args[0];
Offset off = (Offset)args[1];
ConditionCode cc = ConditionCode(args[2]);
start = bindJmpccSecond(toSmash, off, cc, smashed);
sk = SrcKey(curFunc(), off);
} break;
case REQ_BIND_REQUIRE: {
ReqLitStaticArgs* rlsa = (ReqLitStaticArgs*)args[0];
sk = SrcKey((Func*)args[1], (Offset)args[2]);
start = getTranslation(TranslArgs(sk, true));
if (start) {
LeaseHolder writer(s_writeLease);
if (writer) {
smashed = true;
SrcRec* sr = getSrcRec(sk);
sr->chainFrom(IncomingBranch::addr(&rlsa->m_pseudoMain));
}
}
} break;
case REQ_RETRANSLATE_NO_IR: {
TCA toSmash = (TCA)args[0];
sk = SrcKey(curFunc(), (Offset)args[1]);
start = retranslateAndPatchNoIR(sk, true, toSmash);
SKTRACE(1, sk, "retranslated (without IR) @%p\n", start);
} break;
case REQ_RETRANSLATE: {
INC_TPC(retranslate);
sk = SrcKey(curFunc(), (Offset)args[0]);
start = retranslate(TranslArgs(sk, true));
SKTRACE(2, sk, "retranslated @%p\n", start);
} break;
case REQ_INTERPRET: {
Offset off = args[0];
int numInstrs = args[1];
g_vmContext->m_pc = curUnit()->at(off);
/*
* We know the compilation unit has not changed; basic blocks do
* not span files. I claim even exceptions do not violate this
* axiom.
*/
assert(numInstrs >= 0);
SKTRACE(5, SrcKey(curFunc(), off), "interp: enter\n");
if (numInstrs) {
s_perfCounters[tpc_interp_instr] += numInstrs;
g_vmContext->dispatchN(numInstrs);
} else {
// numInstrs == 0 means it wants to dispatch until BB ends
INC_TPC(interp_bb);
g_vmContext->dispatchBB();
}
PC newPc = g_vmContext->getPC();
if (!newPc) { g_vmContext->m_fp = 0; return false; }
SrcKey newSk(curFunc(), newPc);
SKTRACE(5, newSk, "interp: exit\n");
sk = newSk;
start = getTranslation(TranslArgs(newSk, true));
} break;
case REQ_POST_INTERP_RET: {
// This is only responsible for the control-flow aspect of the Ret:
// getting to the destination's translation, if any.
ActRec* ar = (ActRec*)args[0];
ActRec* caller = (ActRec*)args[1];
assert((Cell*) caller == vmfp());
Unit* destUnit = caller->m_func->unit();
// Set PC so logging code in getTranslation doesn't get confused.
vmpc() = destUnit->at(caller->m_func->base() + ar->m_soff);
SrcKey dest(caller->m_func, vmpc());
sk = dest;
start = getTranslation(TranslArgs(dest, true));
TRACE(3, "REQ_POST_INTERP_RET: from %s to %s\n",
ar->m_func->fullName()->data(),
caller->m_func->fullName()->data());
} break;
case REQ_RESUME: {
SrcKey dest(curFunc(), vmpc());
sk = dest;
start = getTranslation(TranslArgs(dest, true));
} break;
case REQ_STACK_OVERFLOW: {
/*
* we need to construct the pc of the fcall from the return
* address (which will be after the fcall). Because fcall is
* a variable length instruction, and because we sometimes
* delete instructions from the instruction stream, we
* need to use fpi regions to find the fcall.
*/
const FPIEnt* fe = curFunc()->findPrecedingFPI(
curUnit()->offsetOf(vmpc()));
vmpc() = curUnit()->at(fe->m_fcallOff);
assert(isFCallStar(*vmpc()));
raise_error("Stack overflow");
NOT_REACHED();
}
}
if (smashed && info.stubAddr) {
Treadmill::WorkItem::enqueue(new FreeRequestStubTrigger(info.stubAddr));
}
return true;
}
TCA FreeStubList::maybePop() {
StubNode* ret = m_list;
if (ret) {
m_list = ret->m_next;
ret->m_freed = ~kStubFree;
}
return (TCA)ret;
}
void FreeStubList::push(TCA stub) {
/* A freed stub may be released by Treadmill more than
* once if multiple threads execute the service request before it is
* freed. We detect duplicates by marking freed stubs */
StubNode* n = (StubNode *)stub;
if (n->m_freed == kStubFree) return;
n->m_freed = kStubFree;
n->m_next = m_list;
m_list = n;
}
bool
TranslatorX64::freeRequestStub(TCA stub) {
LeaseHolder writer(s_writeLease);
/* If we can't acquire the write lock, the
* caller (FreeRequestStubTrigger) retries
*/
if (!writer) return false;
assert(astubs.code.isValidAddress(stub));
m_freeStubs.push(stub);
return true;
}
TCA TranslatorX64::getFreeStub() {
TCA ret = m_freeStubs.maybePop();
if (ret) {
Stats::inc(Stats::Astubs_Reused);
assert(m_freeStubs.m_list == 0
|| astubs.code.isValidAddress(TCA(m_freeStubs.m_list)));
} else {
ret = astubs.code.frontier;
Stats::inc(Stats::Astubs_New);
}
return ret;
}
/*
* RAII bookmark for temporarily rewinding a.code.frontier.
*/
class ConditionalCodeCursor {
typedef X64Assembler Asm;
Asm& m_a;
TCA m_oldFrontier;
bool m_changed;
public:
ConditionalCodeCursor(Asm& a, TCA newFrontier) :
m_a(a), m_oldFrontier(a.code.frontier) {
m_a.code.frontier = newFrontier;
m_changed = (newFrontier != m_oldFrontier);
TRACE_MOD(Trace::trans, 1, "RewindTo: %p (from %p)\n",
m_a.code.frontier, m_oldFrontier);
}
~ConditionalCodeCursor() {
if (m_changed) {
m_a.code.frontier = m_oldFrontier;
}
TRACE_MOD(Trace::trans, 1, "Restore: %p\n",
m_a.code.frontier);
}
};
/*
* emitServiceReq --
*
* Call a translator service co-routine. The code emitted here is
* reenters the enterTC loop, invoking the requested service. Control
* will be returned non-locally to the next logical instruction in
* the TC.
*
* Return value is a destination; we emit the bulky service
* request code into astubs.
*/
TCA
TranslatorX64::emitServiceReqVA(SRFlags flags, ServiceRequest req, int numArgs,
va_list args) {
bool emitInA = flags & SRFlags::EmitInA;
bool align = (flags & SRFlags::Align) && !emitInA;
bool notReusable = flags & SRFlags::Persistent;
Asm& as = emitInA ? a : astubs;
TCA start = emitInA ? a.code.frontier :
notReusable ? astubs.code.frontier :
getFreeStub();
ConditionalCodeCursor cg(as, start);
/* max space for moving to align, saving VM regs plus emitting args */
static const int kVMRegSpace = 0x14;
static const int kMovSize = 0xa;
static const int kNumServiceRegs = sizeof(serviceReqArgRegs)/sizeof(PhysReg);
static const int kMaxStubSpace = kJmpTargetAlign - 1
+ kVMRegSpace
+ kNumServiceRegs * kMovSize;
if (align) {
moveToAlign(as);
}
TCA retval = as.code.frontier;
emitEagerVMRegSave(as, SaveFP);
/*
* Move args into appropriate regs.
*/
TRACE(3, "Emit Service Req %s(", reqName(req));
for (int i = 0; i < numArgs; i++) {
uint64_t argVal = va_arg(args, uint64_t);
TRACE(3, "%p,", (void*)argVal);
emitImmReg(as, argVal, serviceReqArgRegs[i]);
}
if (notReusable) {
emitImmReg(as, 0, rAsm);
} else {
/*
* Make sure that the stub has enough space so it can be reused
* for other service requests, with different number of arguments,
* alignment, etc.
*/
as.emitNop(start + kMaxStubSpace - as.code.frontier);
emitImmReg(as, (uint64_t)start, rAsm);
}
TRACE(3, ")\n");
emitImmReg(as, req, rdi);
/*
* Weird hand-shaking with enterTC: reverse-call a service routine.
*
* In the case of some special stubs (m_callToExit, m_retHelper), we
* have already unbalanced the return stack by doing a ret to
* something other than enterTCHelper. In that case
* SRJmpInsteadOfRet indicates to fake the return.
*/
if (flags & SRFlags::JmpInsteadOfRet) {
as.pop(rax);
as.jmp(rax);
} else {
as.ret();
}
recordBCInstr(OpServiceRequest, as, retval);
translator_not_reached(as);
return retval;
}
TCA
TranslatorX64::emitServiceReq(ServiceRequest req, int numArgs, ...) {
va_list args;
va_start(args, numArgs);
TCA retval = emitServiceReqVA(SRFlags::Align, req, numArgs, args);
va_end(args);
return retval;
}
TCA
TranslatorX64::emitServiceReq(SRFlags flags, ServiceRequest req,
int numArgs, ...) {
va_list args;
va_start(args, numArgs);
TCA retval = emitServiceReqVA(flags, req, numArgs, args);
va_end(args);
return retval;
}
TCA
TranslatorX64::emitTransCounterInc(X64Assembler& a) {
TCA start = a.code.frontier;
if (!isTransDBEnabled()) return start;
a. movq (getTransCounterAddr(), rAsm);
a. lock ();
a. incq (*rAsm);
return start;
}
void
TranslatorX64::getInputsIntoXMMRegs(const NormalizedInstruction& ni,
PhysReg lr, PhysReg rr,
RegXMM lxmm,
RegXMM rxmm) {
const DynLocation& l = *ni.inputs[0];
const DynLocation& r = *ni.inputs[1];
// Get the values into their appropriate xmm locations
auto intoXmm = [&](const DynLocation& l, PhysReg src, RegXMM xmm) {
if (l.isInt()) {
// cvtsi2sd doesn't modify the high bits of its target, which can
// cause false dependencies to prevent register renaming from kicking
// in. Break the dependency chain by zeroing out the destination reg.
a. pxor_xmm_xmm(xmm, xmm);
a. cvtsi2sd_reg64_xmm(src, xmm);
} else {
a. mov_reg64_xmm(src, xmm);
}
};
intoXmm(l, lr, lxmm);
intoXmm(r, rr, rxmm);
}
void
TranslatorX64::binaryMixedArith(const NormalizedInstruction& i,
Opcode op,
PhysReg srcReg,
PhysReg srcDestReg) {
getInputsIntoXMMRegs(i, srcReg, srcDestReg, xmm1, xmm0);
switch(op) {
#define CASEIMM(OpBc, x64op) \
case OpBc: a. x64op ##sd_xmm_xmm(xmm1, xmm0); break
CASEIMM(OpAdd, add);
CASEIMM(OpSub, sub);
CASEIMM(OpMul, mul);
#undef CASEIMM
default: not_reached();
}
a. mov_xmm_reg64(xmm0, srcDestReg);
}
#define O(opcode, imm, pusph, pop, flags) \
/**
* The interpOne methods saves m_pc, m_fp, and m_sp ExecutionContext,
* calls into the interpreter, and then return a pointer to the
* current ExecutionContext.
*/ \
VMExecutionContext* \
interpOne##opcode(ActRec* ar, Cell* sp, Offset pcOff) { \
interp_set_regs(ar, sp, pcOff); \
SKTRACE(5, SrcKey(curFunc(), vmpc()), "%40s %p %p\n", \
"interpOne" #opcode " before (fp,sp)", \
vmfp(), vmsp()); \
assert(*vmpc() == Op ## opcode); \
VMExecutionContext* ec = g_vmContext; \
Stats::inc(Stats::Instr_InterpOne ## opcode); \
INC_TPC(interp_one) \
/* Correct for over-counting in TC-stats. */ \
Stats::inc(Stats::Instr_TC, -1); \
ec->op##opcode(); \
/*
* Only set regstate back to dirty if an exception is not
* propagating. If an exception is throwing, regstate for this call
* is actually still correct, and we don't have information in the
* fixup map for interpOne calls anyway.
*/ \
tl_regState = REGSTATE_DIRTY; \
return ec; \
}
OPCODES
#undef O
void* interpOneEntryPoints[] = {
#define O(opcode, imm, pusph, pop, flags) \
(void*)(interpOne ## opcode),
OPCODES
#undef O
};
void TranslatorX64::fixupWork(VMExecutionContext* ec,
ActRec* rbp) const {
assert(RuntimeOption::EvalJit);
TRACE_SET_MOD(fixup);
TRACE(1, "fixup(begin):\n");
auto isVMFrame = [] (ActRec* ar) {
assert(ar);
bool ret = uintptr_t(ar) - Util::s_stackLimit >= Util::s_stackSize;
assert(!ret ||
(ar >= g_vmContext->m_stack.getStackLowAddress() &&
ar < g_vmContext->m_stack.getStackHighAddress()) ||
ar->m_func->isGenerator());
return ret;
};
auto* nextRbp = rbp;
rbp = 0;
do {
auto* prevRbp = rbp;
rbp = nextRbp;
assert(rbp && "Missing fixup for native call");
nextRbp = reinterpret_cast<ActRec*>(rbp->m_savedRbp);
TRACE(2, "considering frame %p, %p\n", rbp, (void*)rbp->m_savedRip);
if (isVMFrame(nextRbp)) {
TRACE(2, "fixup checking vm frame %s\n",
nextRbp->m_func->name()->data());
FixupMap::VMRegs regs;
if (m_fixupMap.getFrameRegs(rbp, prevRbp, &regs)) {
TRACE(2, "fixup(end): func %s fp %p sp %p pc %p\n",
regs.m_fp->m_func->name()->data(),
regs.m_fp, regs.m_sp, regs.m_pc);
ec->m_fp = const_cast<ActRec*>(regs.m_fp);
ec->m_pc = regs.m_pc;
vmsp() = regs.m_sp;
return;
}
}
} while (rbp && rbp != nextRbp);
// OK, we've exhausted the entire actRec chain. We are only
// invoking ::fixup() from contexts that were known to be called out
// of the TC, so this cannot happen.
NOT_REACHED();
}
void TranslatorX64::fixup(VMExecutionContext* ec) const {
// Start looking for fixup entries at the current (C++) frame. This
// will walk the frames upward until we find a TC frame.
DECLARE_FRAME_POINTER(framePtr);
fixupWork(ec, framePtr);
}
TCA TranslatorX64::getTranslatedCaller() const {
DECLARE_FRAME_POINTER(fp);
ActRec* framePtr = fp; // can't directly mutate the register-mapped one
for (; framePtr; framePtr = (ActRec*)framePtr->m_savedRbp) {
TCA rip = (TCA)framePtr->m_savedRip;
if (isValidCodeAddress(rip)) {
return rip;
}
}
return nullptr;
}
void
TranslatorX64::syncWork() {
assert(tl_regState == REGSTATE_DIRTY);
fixup(g_vmContext);
tl_regState = REGSTATE_CLEAN;
Stats::inc(Stats::TC_Sync);
}
// could be static but used in hopt/codegen.cpp
void raiseUndefVariable(StringData* nm) {
raise_notice(Strings::UNDEFINED_VARIABLE, nm->data());
// FIXME: do we need to decref the string if an exception is propagating?
decRefStr(nm);
}
// This intentionally excludes Int/Int, which is handled separately
// from cases involving the FPU.
bool
mathEquivTypes(RuntimeType lt, RuntimeType rt) {
return (lt.isDouble() && rt.isDouble()) ||
(lt.isInt() && rt.isDouble()) ||
(lt.isDouble() && rt.isInt());
}
/* This is somewhat hacky. It decides which helpers/builtins should
* use eager vmreganchor based on profile information. Using eager
* vmreganchor for all helper calls is a perf regression. */
bool TranslatorX64::eagerRecord(const Func* func) {
const char* list[] = {
"func_get_args",
"get_called_class",
"func_num_args",
"array_filter",
"array_map",
};
for (int i = 0; i < sizeof(list)/sizeof(list[0]); i++) {
if (!strcmp(func->name()->data(), list[i])) {
return true;
}
}
if (func->cls() && !strcmp(func->cls()->name()->data(), "WaitHandle")
&& !strcmp(func->name()->data(), "join")) {
return true;
}
return false;
}
Instance*
HOT_FUNC_VM
newInstanceHelper(Class* cls, int numArgs, ActRec* ar, ActRec* prevAr) {
const Func* f = cls->getCtor();
Instance* ret = nullptr;
if (UNLIKELY(!(f->attrs() & AttrPublic))) {
VMRegAnchor _;
UNUSED MethodLookup::LookupResult res =
g_vmContext->lookupCtorMethod(f, cls, true /*raise*/);
assert(res == MethodLookup::MethodFoundWithThis);
}
// Don't start pushing the AR until newInstance returns; it may reenter.
ret = newInstance(cls);
f->validate();
ar->m_func = f;
ar->initNumArgs(numArgs, true /*fromCtor*/);
// Count stack and this.
ret->incRefCount();
ret->incRefCount();
ar->setThis(ret);
ar->setVarEnv(nullptr);
arSetSfp(ar, prevAr);
TRACE(2, "newInstanceHelper: AR %p: f %p, savedRbp %#lx, savedRip %#lx"
" this %p\n",
ar, ar->m_func, ar->m_savedRbp, ar->m_savedRip, ar->m_this);
return ret;
}
const Func*
TranslatorX64::findCuf(const NormalizedInstruction& ni,
Class*& cls, StringData*& invName, bool& forward) {
forward = (ni.op() == OpFPushCufF);
cls = nullptr;
invName = nullptr;
DynLocation* callable = ni.inputs[ni.op() == OpFPushCufSafe ? 1 : 0];
const StringData* str =
callable->isString() ? callable->rtt.valueString() : nullptr;
const ArrayData* arr =
callable->isArray() ? callable->rtt.valueArray() : nullptr;
StringData* sclass = nullptr;
StringData* sname = nullptr;
if (str) {
Func* f = HPHP::Unit::lookupFunc(str);
if (f) return f;
String name(const_cast<StringData*>(str));
int pos = name.find("::");
if (pos <= 0 || pos + 2 >= name.size() ||
name.find("::", pos + 2) != String::npos) {
return nullptr;
}
sclass = StringData::GetStaticString(name.substr(0, pos).get());
sname = StringData::GetStaticString(name.substr(pos + 2).get());
} else if (arr) {
if (arr->size() != 2) return nullptr;
CVarRef e0 = arr->get(int64_t(0), false);
CVarRef e1 = arr->get(int64_t(1), false);
if (!e0.isString() || !e1.isString()) return nullptr;
sclass = e0.getStringData();
sname = e1.getStringData();
String name(sname);
if (name.find("::") != String::npos) return nullptr;
} else {
return nullptr;
}
Class* ctx = curFunc()->cls();
if (sclass->isame(s_self.get())) {
if (!ctx) return nullptr;
cls = ctx;
forward = true;
} else if (sclass->isame(s_parent.get())) {
if (!ctx || !ctx->parent()) return nullptr;
cls = ctx->parent();
forward = true;
} else if (sclass->isame(s_static.get())) {
return nullptr;
} else {
cls = Unit::lookupUniqueClass(sclass);
if (!cls) return nullptr;
}
bool magicCall = false;
const Func* f = lookupImmutableMethod(cls, sname, magicCall, true);
if (!f || (forward && !ctx->classof(f->cls()))) {
/*
* To preserve the invariant that the lsb class
* is an instance of the context class, we require
* that f's class is an instance of the context class.
* This is conservative, but without it, we would need
* a runtime check to decide whether or not to forward
* the lsb class
*/
return nullptr;
}
if (magicCall) invName = sname;
return f;
}
TCA
TranslatorX64::emitNativeTrampoline(TCA helperAddr) {
auto& a = atrampolines;
if (!a.code.canEmit(m_trampolineSize)) {
// not enough space to emit a trampoline, so just return the
// helper address and emitCall will the emit the right sequence
// to call it indirectly
TRACE(1, "Ran out of space to emit a trampoline for %p\n", helperAddr);
assert(false);
return helperAddr;
}
uint32_t index = m_numNativeTrampolines++;
TCA trampAddr = a.code.frontier;
if (Stats::enabled()) {
Stats::emitInc(a, &Stats::tl_helper_counters[0], index);
char* name = Util::getNativeFunctionName(helperAddr);
const size_t limit = 50;
if (strlen(name) > limit) {
name[limit] = '\0';
}
Stats::helperNames[index] = name;
}
/*
* For stubs that take arguments in rAsm, we need to make sure
* we're not damaging its contents here. (If !jmpDeltaFits, the jmp
* opcode will need to movabs the address into rAsm before
* jumping.)
*/
auto UNUSED stubUsingRScratch = [&](TCA tca) {
return tca == m_dtorGenericStubRegs;
};
assert(IMPLIES(stubUsingRScratch(helperAddr), a.jmpDeltaFits(helperAddr)));
a. jmp (helperAddr);
a. ud2 ();
trampolineMap[helperAddr] = trampAddr;
if (m_trampolineSize == 0) {
m_trampolineSize = a.code.frontier - trampAddr;
assert(m_trampolineSize >= kMinPerTrampolineSize);
}
recordBCInstr(OpNativeTrampoline, a, trampAddr);
return trampAddr;
}
TCA
TranslatorX64::getNativeTrampoline(TCA helperAddr) {
if (!RuntimeOption::EvalJitTrampolines && !Stats::enabled()) {
return helperAddr;
}
TCA trampAddr = (TCA)mapGet<PointerMap>(trampolineMap, helperAddr);
if (trampAddr) {
return trampAddr;
}
return emitNativeTrampoline(helperAddr);
}
static void defClsHelper(PreClass *preClass) {
assert(tl_regState == REGSTATE_DIRTY);
tl_regState = REGSTATE_CLEAN;
Unit::defClass(preClass);
/*
* m_defClsHelper sync'd the registers for us already. This means
* if an exception propagates we want to leave things as
* REGSTATE_CLEAN, since we're still in sync. Only set it to dirty
* if we are actually returning to run in the TC again.
*/
tl_regState = REGSTATE_DIRTY;
}
template <typename T>
static int64_t switchBoundsCheck(T v, int64_t base, int64_t nTargets) {
// I'm relying on gcc to be smart enough to optimize away the next
// two lines when T is int64.
if (int64_t(v) == v) {
int64_t ival = v;
if (ival >= base && ival < (base + nTargets)) {
return ival - base;
}
}
return nTargets + 1;
}
int64_t switchDoubleHelper(int64_t val, int64_t base, int64_t nTargets) {
union {
int64_t intbits;
double dblval;
} u;
u.intbits = val;
return switchBoundsCheck(u.dblval, base, nTargets);
}
int64_t switchStringHelper(StringData* s, int64_t base, int64_t nTargets) {
int64_t ival;
double dval;
switch (s->isNumericWithVal(ival, dval, 1)) {
case KindOfNull:
ival = switchBoundsCheck(0, base, nTargets);
break;
case KindOfDouble:
ival = switchBoundsCheck(dval, base, nTargets);
break;
case KindOfInt64:
ival = switchBoundsCheck(ival, base, nTargets);
break;
default:
not_reached();
}
decRefStr(s);
return ival;
}
int64_t switchObjHelper(ObjectData* o, int64_t base, int64_t nTargets) {
int64_t ival = o->o_toInt64();
decRefObj(o);
return switchBoundsCheck(ival, base, nTargets);
}
// PSEUDOINSTR_DISPATCH is a switch() fragment that routes opcodes to their
// shared handlers, as per the PSEUDOINSTRS macro.
#define PSEUDOINSTR_DISPATCH(func) \
case OpBitAnd: \
case OpBitOr: \
case OpBitXor: \
case OpSub: \
case OpMul: \
func(BinaryArithOp, t, i) \
case OpSame: \
case OpNSame: \
func(SameOp, t, i) \
case OpEq: \
case OpNeq: \
func(EqOp, t, i) \
case OpLt: \
case OpLte: \
case OpGt: \
case OpGte: \
func(LtGtOp, t, i) \
case OpEmptyL: \
case OpCastBool: \
func(UnaryBooleanOp, t, i) \
case OpJmpZ: \
case OpJmpNZ: \
func(BranchOp, t, i) \
case OpSetL: \
case OpBindL: \
func(AssignToLocalOp, t, i) \
case OpFPassC: \
case OpFPassCW: \
case OpFPassCE: \
func(FPassCOp, t, i) \
case OpFPushCuf: \
case OpFPushCufF: \
case OpFPushCufSafe: \
func(FPushCufOp, t, i) \
case OpIssetL: \
case OpIsNullL: \
case OpIsStringL: \
case OpIsArrayL: \
case OpIsIntL: \
case OpIsObjectL: \
case OpIsBoolL: \
case OpIsDoubleL: \
case OpIsNullC: \
case OpIsStringC: \
case OpIsArrayC: \
case OpIsIntC: \
case OpIsObjectC: \
case OpIsBoolC: \
case OpIsDoubleC: \
func(CheckTypeOp, t, i)
bool
TranslatorX64::dontGuardAnyInputs(Opcode op) {
switch (op) {
#define CASE(iNm) case Op ## iNm:
#define NOOP(a, b, c)
INSTRS
PSEUDOINSTR_DISPATCH(NOOP)
return false;
}
return true;
#undef NOOP
#undef CASE
}
// Emit necessary guards for variants and pseudo-main locals before instr i.
// For HHIR, this only inserts guards for pseudo-main locals. Variants are
// guarded in a different way.
void
TranslatorX64::emitVariantGuards(const Tracelet& t,
const NormalizedInstruction& i) {
bool pseudoMain = Translator::liveFrameIsPseudoMain();
bool isFirstInstr = (&i == t.m_instrStream.first);
for (size_t in = 0; in < i.inputs.size(); ++in) {
DynLocation* input = i.inputs[in];
if (!input->isValue()) continue;
bool isRef = input->isRef() &&
!i.ignoreInnerType &&
input->rtt.innerType() != KindOfInvalid;
bool modifiableLocal = pseudoMain && input->isLocal() &&
!input->rtt.isVagueValue();
if (!modifiableLocal && !isRef) continue;
SKTRACE(1, i.source, "guarding %s: (%s:%d) :: %d!\n",
modifiableLocal ? "pseudoMain local" : "variant inner",
input->location.spaceName(),
input->location.offset,
input->rtt.valueType());
// TODO task 1122807: don't check the inner type if we've already
// checked it and have executed no possibly-aliasing instructions in
// the meanwhile.
if (modifiableLocal) {
RuntimeType& rtt = input->rtt;
JIT::Type type = JIT::Type::fromRuntimeType(rtt);
if (isFirstInstr) {
m_hhbcTrans->guardTypeLocal(input->location.offset, type);
} else {
m_hhbcTrans->checkTypeLocal(input->location.offset, type);
}
}
}
}
bool
TranslatorX64::checkTranslationLimit(SrcKey sk,
const SrcRec& srcRec) const {
if (srcRec.translations().size() == RuntimeOption::EvalJitMaxTranslations) {
INC_TPC(max_trans);
if (debug && Trace::moduleEnabled(Trace::tx64, 2)) {
const vector<TCA>& tns = srcRec.translations();
TRACE(1, "Too many (%" PRId64 ") translations: %s, BC offset %d\n",
tns.size(), curUnit()->filepath()->data(),
sk.offset());
SKTRACE(2, sk, "{\n", tns.size());
TCA topTrans = srcRec.getTopTranslation();
for (size_t i = 0; i < tns.size(); ++i) {
const TransRec* rec = getTransRec(tns[i]);
assert(rec);
SKTRACE(2, sk, "%d %p\n", i, tns[i]);
if (tns[i] == topTrans) {
SKTRACE(2, sk, "%d: *Top*\n", i);
}
if (rec->kind == TransAnchor) {
SKTRACE(2, sk, "%d: Anchor\n", i);
} else {
SKTRACE(2, sk, "%d: guards {\n", i);
for (unsigned j = 0; j < rec->dependencies.size(); ++j) {
TRACE(2, rec->dependencies[j]);
}
SKTRACE(2, sk, "%d } guards\n", i);
}
}
SKTRACE(2, sk, "} /* Too many translations */\n");
}
return true;
}
return false;
}
void
TranslatorX64::emitGuardChecks(X64Assembler& a,
SrcKey sk,
const ChangeMap& dependencies,
const RefDeps& refDeps,
SrcRec& fail) {
if (Trace::moduleEnabled(Trace::stats, 2)) {
Stats::emitInc(a, Stats::TraceletGuard_enter);
}
bool pseudoMain = Translator::liveFrameIsPseudoMain();
emitRB(a, RBTypeTraceletGuards, sk);
for (DepMap::const_iterator dep = dependencies.begin();
dep != dependencies.end();
++dep) {
if (!pseudoMain || !dep->second->isLocal() || !dep->second->isValue()) {
checkType(a, dep->first, dep->second->rtt, fail);
} else {
TRACE(3, "Skipping tracelet guard for %s %d\n",
dep->second->location.pretty().c_str(),
(int)dep->second->rtt.outerType());
}
}
checkRefs(a, sk, refDeps, fail);
if (Trace::moduleEnabled(Trace::stats, 2)) {
Stats::emitInc(a, Stats::TraceletGuard_execute);
}
}
void dumpTranslationInfo(const Tracelet& t, TCA postGuards) {
if (!debug) return;
SrcKey sk = t.m_sk;
TRACE(3, "----------------------------------------------\n");
TRACE(3, " Translating from file %s:%d %s at %p:\n",
curUnit()->filepath()->data(),
curUnit()->getLineNumber(sk.offset()),
curFunc()->name()->data(),
postGuards);
TRACE(3, " preconds:\n");
TRACE(3, " types:\n");
for (DepMap::const_iterator i = t.m_dependencies.begin();
i != t.m_dependencies.end(); ++i) {
TRACE(3, " %-5s\n", i->second->pretty().c_str());
}
if (t.m_refDeps.size() != 0) {
TRACE(3, " refs:\n");
for (RefDeps::ArMap::const_iterator i = t.m_refDeps.m_arMap.begin();
i != t.m_refDeps.m_arMap.end();
++i) {
TRACE(3, " (ActRec %" PRId64 " : %-5s)\n", i->first,
i->second.pretty().c_str());
}
}
TRACE(3, " postconds:\n");
for (ChangeMap::const_iterator i = t.m_changes.begin();
i != t.m_changes.end(); ++i) {
TRACE(3, " %-5s\n", i->second->pretty().c_str());
}
for (auto ni = t.m_instrStream.first; ni; ni = ni->next) {
TRACE(3, " %6d: %s\n", ni->source.offset(),
instrToString(ni->pc()).c_str());
if (ni->breaksTracelet) break;
}
TRACE(3, "----------------------------------------------\n");
if (Trace::moduleEnabled(Trace::tx64, 5)) {
// prettyStack() expects to use vmpc(). Leave it in the state we
// found it since this code is debug-only, and we don't want behavior
// to vary across the optimized/debug builds.
PC oldPC = vmpc();
vmpc() = curUnit()->at(sk.offset());
TRACE(3, g_vmContext->prettyStack(string(" tx64 ")));
vmpc() = oldPC;
TRACE(3, "----------------------------------------------\n");
}
}
void
TranslatorX64::translateTracelet(const TranslArgs& args) {
auto sk = args.m_sk;
std::unique_ptr<Tracelet> tp = analyze(sk);
Tracelet& t = *tp;
m_curTrace = &t;
Nuller<Tracelet> ctNuller(&m_curTrace);
SKTRACE(1, sk, "translateTracelet\n");
assert(m_srcDB.find(sk));
TCA start = a.code.frontier;
TCA stubStart = astubs.code.frontier;
TCA counterStart = 0;
uint8_t counterLen = 0;
SrcRec& srcRec = *getSrcRec(sk);
vector<TransBCMapping> bcMapping;
TransKind transKind = TransInterp;
if (!args.m_interp) {
TranslateTraceletResult result;
do {
hhirTraceStart(sk.offset(), t.m_nextSk.offset());
SKTRACE(1, sk, "retrying irTranslateTracelet\n");
result = irTranslateTracelet(t, start, stubStart, &bcMapping);
if (result == Retry) {
assert(a.code.frontier == start);
assert(astubs.code.frontier == stubStart);
}
} while (result == Retry);
if (result == Success) {
m_irAUsage += (a.code.frontier - start);
m_irAstubsUsage += (astubs.code.frontier - stubStart);
transKind = TransNormalIR;
}
}
if (transKind == TransInterp) {
assert(m_pendingFixups.size() == 0);
assert(srcRec.inProgressTailJumps().size() == 0);
bcMapping.clear();
// The whole translation failed; give up on this BB. Since it is not
// linked into srcDB yet, it is guaranteed not to be reachable.
// Permanent reset; nothing is reachable yet.
a.code.frontier = start;
astubs.code.frontier = stubStart;
bcMapping.clear();
// Discard any pending fixups.
m_pendingFixups.clear();
srcRec.clearInProgressTailJumps();
TRACE(1,
"emitting %d-instr interp request for failed translation\n",
int(t.m_numOpcodes));
// Add a counter for the translation if requested
if (RuntimeOption::EvalJitTransCounters) {
emitTransCounterInc(a);
}
a. jmp(emitServiceReq(REQ_INTERPRET, 2ull, uint64_t(t.m_sk.offset()),
uint64_t(t.m_numOpcodes)));
// Fall through.
}
for (uint i = 0; i < m_pendingFixups.size(); i++) {
TCA tca = m_pendingFixups[i].m_tca;
assert(isValidCodeAddress(tca));
m_fixupMap.recordFixup(tca, m_pendingFixups[i].m_fixup);
}
m_pendingFixups.clear();
addTranslation(TransRec(t.m_sk, curUnit()->md5(), transKind, t, start,
a.code.frontier - start, stubStart,
astubs.code.frontier - stubStart,
counterStart, counterLen,
bcMapping));
recordGdbTranslation(sk, curFunc(), a, start,
false, false);
recordGdbTranslation(sk, curFunc(), astubs, stubStart,
false, false);
// SrcRec::newTranslation() makes this code reachable. Do this last;
// otherwise there's some chance of hitting in the reader threads whose
// metadata is not yet visible.
TRACE(1, "newTranslation: %p sk: (func %d, bcOff %d)\n",
start, sk.getFuncId(), sk.offset());
srcRec.newTranslation(start);
TRACE(1, "tx64: %zd-byte tracelet\n", a.code.frontier - start);
if (Trace::moduleEnabledRelease(Trace::tcspace, 1)) {
Trace::traceRelease(getUsage().c_str());
}
}
/*
* Defines functions called by emitGenericReturn, and
* cgGenericRetDecRefs.
*/
void TranslatorX64::emitFreeLocalsHelpers() {
Label doRelease;
Label release;
Label loopHead;
/*
* Note: the IR currently requires that we preserve r13/r14 across
* calls to these free locals helpers.
*/
static_assert(rVmSp == rbx, "");
auto const rIter = rbx;
auto const rFinished = r15;
auto const rType = esi;
auto const rData = rdi;
moveToAlign(a, kNonFallthroughAlign);
TRACE(1, "HOTSTUB: freeLocalsHelpers starts %lx\n", uintptr_t(a.code.frontier));
asm_label(a, release);
a. loadq (rIter[TVOFF(m_data)], rData);
a. cmpl (RefCountStaticValue, rData[FAST_REFCOUNT_OFFSET]);
jccBlock<CC_Z>(a, [&] {
a. decl (rData[FAST_REFCOUNT_OFFSET]);
a. jz8 (doRelease);
});
a. ret ();
asm_label(a, doRelease);
jumpDestructor(a, PhysReg(rType), rax);
moveToAlign(a, kJmpTargetAlign);
m_freeManyLocalsHelper = a.code.frontier;
a. lea (rVmFp[-cellsToBytes(kNumFreeLocalsHelpers)], rFinished);
auto emitDecLocal = [&] {
Label skipDecRef;
emitLoadTVType(a, rIter[TVOFF(m_type)], rType);
emitCmpTVType(a, KindOfRefCountThreshold, rType);
a. jle8 (skipDecRef);
a. call (release);
recordIndirectFixup(a.code.frontier, 0);
asm_label(a, skipDecRef);
};
// Loop for the first few locals, but unroll the final
// kNumFreeLocalsHelpers.
asm_label(a, loopHead);
emitDecLocal();
a. addq (sizeof(TypedValue), rIter);
a. cmpq (rIter, rFinished);
a. jnz8 (loopHead);
for (int i = 0; i < kNumFreeLocalsHelpers; ++i) {
m_freeLocalsHelpers[kNumFreeLocalsHelpers - i - 1] = a.code.frontier;
TRACE(1, "HOTSTUB: m_freeLocalsHelpers[%d] = %p\n",
kNumFreeLocalsHelpers - i - 1, a.code.frontier);
emitDecLocal();
if (i != kNumFreeLocalsHelpers - 1) {
a.addq (sizeof(TypedValue), rIter);
}
}
a. addq (AROFF(m_r) + sizeof(TypedValue), rVmSp);
a. ret (8);
TRACE(1, "STUB freeLocals helpers: %zu bytes\n",
size_t(a.code.frontier - m_freeManyLocalsHelper));
}
TranslatorX64::TranslatorX64()
: m_numNativeTrampolines(0),
m_trampolineSize(0),
m_defClsHelper(0),
m_funcPrologueRedispatch(0),
m_irAUsage(0),
m_irAstubsUsage(0),
m_numHHIRTrans(0),
m_catchTraceMap(128),
m_curTrace(0),
m_curNI(0),
m_curFile(nullptr),
m_curLine(0),
m_curFunc(nullptr)
{
static const size_t kRoundUp = 2 << 20;
const size_t kAHotSize = RuntimeOption::VMTranslAHotSize;
const size_t kASize = RuntimeOption::VMTranslASize;
const size_t kAStubsSize = RuntimeOption::VMTranslAStubsSize;
const size_t kGDataSize = RuntimeOption::VMTranslGDataSize;
m_totalSize = kAHotSize + kASize + kAStubsSize +
kTrampolinesBlockSize + kGDataSize;
TRACE(1, "TranslatorX64@%p startup\n", this);
tx64 = this;
if ((kAHotSize < (2 << 20)) ||
(kASize < (10 << 20)) ||
(kAStubsSize < (10 << 20)) ||
(kGDataSize < (2 << 20))) {
fprintf(stderr, "Allocation sizes ASize, AStubsSize, and GlobalDataSize "
"are too small.\n");
exit(1);
}
if (m_totalSize > (2ul << 30)) {
fprintf(stderr,"Combined size of ASize, AStubSize, and GlobalDataSize "
"must be < 2GiB to support 32-bit relative addresses\n");
exit(1);
}
static bool profileUp = false;
if (!profileUp) {
profileInit();
profileUp = true;
}
auto enhugen = [&](void* base, int numMB) {
if (RuntimeOption::EvalMapTCHuge) {
assert((uintptr_t(base) & (kRoundUp - 1)) == 0);
hintHuge(base, numMB << 20);
}
};
// We want to ensure that the block for "a", "astubs",
// "atrampolines", and "m_globalData" are nearby so that we can
// short jump/point between them. Thus we allocate one slab and
// divide it between "a", "astubs", and "atrampolines".
// Using sbrk to ensure its in the bottom 2G, so we avoid
// the need for trampolines, and get to use shorter
// instructions for tc addresses.
const size_t allocationSize = m_totalSize + kRoundUp - 1;
uint8_t *base = (uint8_t*)sbrk(allocationSize);
if (base == (uint8_t*)-1) {
base = (uint8_t*)low_malloc(allocationSize);
if (!base) {
base = (uint8_t*)malloc(allocationSize);
}
if (!base) {
fprintf(stderr, "could not allocate %zd bytes for translation cache\n",
allocationSize);
exit(1);
}
}
assert(base);
base += -(uint64_t)base & (kRoundUp - 1);
enhugen(base, RuntimeOption::EvalTCNumHugeHotMB);
TRACE(1, "init atrampolines @%p\n", base);
atrampolines.init(base, kTrampolinesBlockSize);
base += kTrampolinesBlockSize;
m_unwindRegistrar = register_unwind_region(base, m_totalSize);
TRACE(1, "init ahot @%p\n", base);
ahot.init(base, kAHotSize);
base += kAHotSize;
TRACE(1, "init a @%p\n", base);
a.init(base, kASize);
base += kASize;
base += -(uint64_t)base & (kRoundUp - 1);
TRACE(1, "init astubs @%p\n", base);
astubs.init(base, kAStubsSize);
enhugen(base, RuntimeOption::EvalTCNumHugeColdMB);
base += kAStubsSize;
TRACE(1, "init gdata @%p\n", base);
m_globalData.init(base, kGDataSize);
// put the stubs into ahot, rather than a
AHotSelector ahs(this, true);
// Emit some special helpers that are shared across translations.
// Emit a byte of padding. This is a kind of hacky way to
// avoid hitting an assert in recordGdbStub when we call
// it with m_callToExit - 1 as the start address.
astubs.emitNop(1);
// Call to exit with whatever value the program leaves on
// the return stack.
m_callToExit = emitServiceReq(SRFlags::Align | SRFlags::JmpInsteadOfRet,
REQ_EXIT, 0ull);
/*
* Helpers for returning from a function where the ActRec was pushed
* by the interpreter.
*/
m_retHelper = emitRetFromInterpretedFrame();
m_genRetHelper = emitRetFromInterpretedGeneratorFrame();
/*
* Returning from a function where the ActRec was pushed by an
* inlined call. This is separate from m_retHelper just for
* debugability---it does the same thing.
*/
m_retInlHelper = emitRetFromInterpretedFrame();
FTRACE(1, "retInlHelper: {}\n", (void*)m_retInlHelper);
moveToAlign(astubs);
m_resumeHelperRet = astubs.code.frontier;
emitPopRetIntoActRec(astubs);
m_resumeHelper = astubs.code.frontier;
emitGetGContext(astubs, rax);
astubs. load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_fp),
rVmFp);
astubs. load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_stack) +
Stack::topOfStackOffset(), rVmSp);
emitServiceReq(SRFlags::Persistent, REQ_RESUME, 0ull);
// Helper for DefCls, in astubs.
{
auto& a = astubs;
if (false) {
PreClass *preClass = 0;
defClsHelper(preClass);
}
m_defClsHelper = TCA(a.code.frontier);
PhysReg rEC = argNumToRegName[2];
emitGetGContext(a, rEC);
a. storeq (rVmFp, rEC[offsetof(VMExecutionContext, m_fp)]);
a. storeq (argNumToRegName[1],
rEC[offsetof(VMExecutionContext, m_pc)]);
a. storeq (rax, rEC[offsetof(VMExecutionContext, m_stack) +
Stack::topOfStackOffset()]);
a. jmp (TCA(defClsHelper));
}
// The decRef helper for when we bring the count down to zero. Callee needs to
// bring the value into rdi. These can be burned in for all time, and for all
// translations.
typedef void* vp;
TCA strDtor, arrDtor, objDtor, refDtor;
strDtor = emitUnaryStub(astubs, Call(getMethodPtr(&StringData::release)));
arrDtor = emitUnaryStub(astubs, Call(getVTableOffset(&HphpArray::release)));
objDtor = emitUnaryStub(astubs, Call(getMethodPtr(&ObjectData::release)));
refDtor = emitUnaryStub(astubs, Call(vp(getMethodPtr(&RefData::release))));
m_dtorStubs[typeToDestrIndex(BitwiseKindOfString)] = strDtor;
m_dtorStubs[typeToDestrIndex(KindOfArray)] = arrDtor;
m_dtorStubs[typeToDestrIndex(KindOfObject)] = objDtor;
m_dtorStubs[typeToDestrIndex(KindOfRef)] = refDtor;
// Hot helper stubs in A:
emitGenericDecRefHelpers();
emitFreeLocalsHelpers();
m_funcPrologueRedispatch = emitPrologueRedispatch(a);
TRACE(1, "HOTSTUB: all stubs finished: %lx\n",
uintptr_t(a.code.frontier));
if (trustSigSegv) {
// Install SIGSEGV handler for timeout exceptions
struct sigaction sa;
struct sigaction old_sa;
sa.sa_sigaction = &TranslatorX64::SEGVHandler;
sa.sa_flags = SA_SIGINFO;
sigemptyset(&sa.sa_mask);
if (sigaction(SIGSEGV, &sa, &old_sa) != 0) {
throw std::runtime_error(
std::string("Failed to install SIGSEGV handler: ") +
strerror(errno));
}
m_segvChain = old_sa.sa_flags & SA_SIGINFO ?
old_sa.sa_sigaction : (sigaction_t)old_sa.sa_handler;
}
moveToAlign(astubs);
m_stackOverflowHelper = astubs.code.frontier;
// We are called from emitStackCheck, with the new stack frame in
// rStashedAR. Get the caller's PC into rdi and save it off.
astubs. load_reg64_disp_reg64(rVmFp, AROFF(m_func), rax);
astubs. load_reg64_disp_reg32(rStashedAR, AROFF(m_soff), rdi);
astubs. load_reg64_disp_reg64(rax, Func::sharedOffset(), rax);
astubs. load_reg64_disp_reg32(rax, Func::sharedBaseOffset(), rax);
astubs. add_reg32_reg32(rax, rdi);
emitEagerVMRegSave(astubs, SaveFP | SavePC);
emitServiceReq(SRFlags::Persistent, REQ_STACK_OVERFLOW, 0ull);
}
// do gdb specific initialization. This has to happen after
// the TranslatorX64 constructor is called, because gdb initialization
// calls backs into TranslatorX64::Get()
void TranslatorX64::initGdb() {
// On a backtrace, gdb tries to locate the calling frame at address
// returnRIP-1. However, for the first VM frame, there is no code at
// returnRIP-1, since the AR was set up manually. For this frame,
// record the tracelet address as starting from callToExit-1, so gdb
// does not barf
recordGdbStub(astubs, m_callToExit - 1, "HHVM::callToExit");
recordBCInstr(OpRetFromInterp, astubs, m_retHelper);
recordGdbStub(astubs, m_retHelper - 1, "HHVM::retHelper");
recordBCInstr(OpResumeHelper, astubs, m_resumeHelper);
recordBCInstr(OpDefClsHelper, astubs, m_defClsHelper);
recordBCInstr(OpDtorStub, astubs,
m_dtorStubs[typeToDestrIndex(BitwiseKindOfString)]);
recordGdbStub(astubs, m_dtorStubs[typeToDestrIndex(BitwiseKindOfString)],
"HHVM::destructorStub");
}
TranslatorX64*
TranslatorX64::Get() {
/*
* Called from outrageously early, pre-main code, and will
* allocate the first translator space.
*/
if (!nextTx64) {
nextTx64 = new TranslatorX64();
nextTx64->initGdb();
}
if (!tx64) {
tx64 = nextTx64;
}
assert(tx64);
return tx64;
}
template<int Arity>
TCA TranslatorX64::emitNAryStub(X64Assembler& a, Call c) {
BOOST_STATIC_ASSERT((Arity < kNumRegisterArgs));
// The callNAryStub has already saved these regs on a.
RegSet alreadySaved;
for (size_t i = 0; i < Arity; ++i) {
alreadySaved |= RegSet(argNumToRegName[i]);
}
/*
* We've made a call instruction, and pushed Arity args on the
* stack. So the stack address will be odd coming into the stub if
* Arity + 1 (for the call) is odd. We need to correct for this
* when saving other registers below to keep SSE-friendly alignment
* of the stack.
*/
const int Parity = (Arity + 1) % 2;
// These dtor stubs are meant to be called with the call
// instruction, unlike most translator code.
moveToAlign(a);
TCA start = a.code.frontier;
/*
* Preserve most caller-saved regs. The calling code has already
* preserved regs in `alreadySaved'; we push the rest of the caller
* saved regs and rbp. It should take 9 qwords in total, and the
* incoming call instruction made it 10. This is an even number of
* pushes, so we preserve the SSE-friendliness of our execution
* environment (without real intervention from PhysRegSaverParity).
*
* Note that we don't need to clean all registers because the only
* reason we could need those locations written back is if stack
* unwinding were to happen. These stubs can re-enter due to user
* destructors, but exceptions are not allowed to propagate out of
* those, so it's not a problem.
*/
a. push (rbp); // {
a. movq (rsp, rbp);
{
RegSet s = kGPCallerSaved - alreadySaved;
PhysRegSaverParity rs(Parity, a, s);
emitCall(a, c);
}
a. pop (rbp); // }
a. ret ();
return start;
}
TCA TranslatorX64::emitUnaryStub(X64Assembler& a, Call c) {
return emitNAryStub<1>(a, c);
}
void TranslatorX64::registerCatchTrace(CTCA ip, TCA trace) {
FTRACE(1, "registerCatchTrace: afterCall: {} trace: {}\n", ip, trace);
m_catchTraceMap.insert(ip, trace);
}
TCA TranslatorX64::getCatchTrace(CTCA ip) const {
TCA* found = m_catchTraceMap.find(ip);
return found ? *found : nullptr;
}
namespace {
struct DeferredFileInvalidate : public DeferredWorkItem {
Eval::PhpFile* m_f;
explicit DeferredFileInvalidate(Eval::PhpFile* f) : m_f(f) {
TRACE(2, "DeferredFileInvalidate @ %p, m_f %p\n", this, m_f); }
void operator()() {
TRACE(2, "DeferredFileInvalidate: Firing @ %p , m_f %p\n", this, m_f);
tx64->invalidateFileWork(m_f);
}
};
struct DeferredPathInvalidate : public DeferredWorkItem {
const std::string m_path;
explicit DeferredPathInvalidate(const std::string& path) : m_path(path) {
assert(m_path.size() >= 1 && m_path[0] == '/');
}
void operator()() {
String spath(m_path);
/*
* inotify saw this path change. Now poke the file repository;
* it will notice the underlying PhpFile* has changed, and notify
* us via ::invalidateFile.
*
* We don't actually need to *do* anything with the PhpFile* from
* this lookup; since the path has changed, the file we'll get out is
* going to be some new file, not the old file that needs invalidation.
*/
UNUSED Eval::PhpFile* f =
g_vmContext->lookupPhpFile(spath.get(), "");
// We don't keep around the extra ref.
if (f) f->decRefAndDelete();
}
};
}
void
TranslatorX64::requestInit() {
TRACE(1, "in requestInit(%" PRId64 ")\n", g_vmContext->m_currentThreadIdx);
tl_regState = REGSTATE_CLEAN;
PendQ::drain();
requestResetHighLevelTranslator();
Treadmill::startRequest(g_vmContext->m_currentThreadIdx);
memset(&s_perfCounters, 0, sizeof(s_perfCounters));
Stats::init();
}
void
TranslatorX64::requestExit() {
if (s_writeLease.amOwner()) {
s_writeLease.drop();
}
TRACE_MOD(txlease, 2, "%" PRIx64 " write lease stats: %15" PRId64
" kept, %15" PRId64 " grabbed\n",
pthread_self(), s_writeLease.m_hintKept,
s_writeLease.m_hintGrabbed);
PendQ::drain();
Treadmill::finishRequest(g_vmContext->m_currentThreadIdx);
TRACE(1, "done requestExit(%" PRId64 ")\n", g_vmContext->m_currentThreadIdx);
Stats::dump();
Stats::clear();
if (Trace::moduleEnabledRelease(Trace::tx64stats, 1)) {
Trace::traceRelease("TranslatorX64 perf counters for %s:\n",
g_context->getRequestUrl(50).c_str());
for (int i = 0; i < tpc_num_counters; i++) {
Trace::traceRelease("%-20s %10lld\n",
kPerfCounterNames[i], s_perfCounters[i]);
}
Trace::traceRelease("\n");
}
}
bool
TranslatorX64::isPseudoEvent(const char* event) {
for (auto name : kPerfCounterNames) {
if (!strcmp(event, name)) {
return true;
}
}
return false;
}
void
TranslatorX64::getPerfCounters(Array& ret) {
for (int i = 0; i < tpc_num_counters; i++) {
// Until Perflab can automatically scale the values we give it to
// an appropriate range, we have to fudge these numbers so they
// look more like reasonable hardware counter values.
ret.set(String::FromCStr(kPerfCounterNames[i]),
s_perfCounters[i] * 1000);
}
if (RuntimeOption::EnableInstructionCounts) {
auto doCounts = [&](unsigned begin, const char* const name) {
int64_t count = 0;
for (; begin < Stats::Instr_InterpOneHighInvalid;
begin += STATS_PER_OPCODE) {
count += Stats::tl_counters[Stats::StatCounter(begin)];
}
ret.set(String::FromCStr(name), count);
};
doCounts(Stats::Instr_TranslLowInvalid + STATS_PER_OPCODE,
kInstrCountTx64Name);
doCounts(Stats::Instr_TranslIRPostLowInvalid + STATS_PER_OPCODE,
kInstrCountIRName);
}
}
TranslatorX64::~TranslatorX64() {
freeSlab(atrampolines.code.base, m_totalSize);
}
static Debug::TCRange rangeFrom(const X64Assembler& a, const TCA addr,
bool isAstubs) {
assert(a.code.isValidAddress(addr));
return Debug::TCRange(addr, a.code.frontier, isAstubs);
}
void TranslatorX64::recordBCInstr(uint32_t op,
const X64Assembler& a,
const TCA addr) {
if (addr != a.code.frontier) {
m_debugInfo.recordBCInstr(Debug::TCRange(addr, a.code.frontier,
&a == &astubs ? true : false), op);
}
}
void TranslatorX64::recordGdbTranslation(SrcKey sk,
const Func* srcFunc,
const X64Assembler& a,
const TCA start,
bool exit,
bool inPrologue) {
if (start != a.code.frontier) {
assert(s_writeLease.amOwner());
if (!RuntimeOption::EvalJitNoGdb) {
m_debugInfo.recordTracelet(rangeFrom(a, start,
&a == &astubs ? true : false),
srcFunc,
srcFunc->unit() ?
srcFunc->unit()->at(sk.offset()) : nullptr,
exit, inPrologue);
}
if (RuntimeOption::EvalPerfPidMap) {
m_debugInfo.recordPerfMap(rangeFrom(a, start,
&a == &astubs ? true : false),
srcFunc, exit, inPrologue);
}
}
}
void TranslatorX64::recordGdbStub(const X64Assembler& a,
const TCA start, const char* name) {
if (!RuntimeOption::EvalJitNoGdb) {
m_debugInfo.recordStub(rangeFrom(a, start, &a == &astubs ? true : false),
name);
}
}
size_t TranslatorX64::getCodeSize() {
return a.code.frontier - a.code.base;
}
size_t TranslatorX64::getStubSize() {
return astubs.code.frontier - astubs.code.base;
}
size_t TranslatorX64::getTargetCacheSize() {
return TargetCache::s_frontier;
}
std::string TranslatorX64::getUsage() {
std::string usage;
size_t aHotUsage = ahot.code.frontier - ahot.code.base;
size_t aUsage = a.code.frontier - a.code.base;
size_t stubsUsage = astubs.code.frontier - astubs.code.base;
size_t dataUsage = m_globalData.frontier - m_globalData.base;
size_t tcUsage = TargetCache::s_frontier;
size_t persistentUsage =
TargetCache::s_persistent_frontier - TargetCache::s_persistent_start;
Util::string_printf(
usage,
"tx64: %9zd bytes (%" PRId64 "%%) in ahot.code\n"
"tx64: %9zd bytes (%" PRId64 "%%) in a.code\n"
"tx64: %9zd bytes (%" PRId64 "%%) in astubs.code\n"
"tx64: %9zd bytes (%" PRId64 "%%) in a.code from ir\n"
"tx64: %9zd bytes (%" PRId64 "%%) in astubs.code from ir\n"
"tx64: %9zd bytes (%" PRId64 "%%) in m_globalData\n"
"tx64: %9zd bytes (%" PRId64 "%%) in targetCache\n"
"tx64: %9zd bytes (%" PRId64 "%%) in persistentCache\n",
aHotUsage, 100 * aHotUsage / ahot.code.size,
aUsage, 100 * aUsage / a.code.size,
stubsUsage, 100 * stubsUsage / astubs.code.size,
m_irAUsage, 100 * m_irAUsage / a.code.size,
m_irAstubsUsage, 100 * m_irAstubsUsage / astubs.code.size,
dataUsage, 100 * dataUsage / m_globalData.size,
tcUsage,
400 * tcUsage / RuntimeOption::EvalJitTargetCacheSize / 3,
persistentUsage,
400 * persistentUsage / RuntimeOption::EvalJitTargetCacheSize);
return usage;
}
bool TranslatorX64::addDbgGuards(const Unit* unit) {
// TODO refactor
// It grabs the write lease and iterating through whole SrcDB...
bool locked = s_writeLease.acquire(true);
if (!locked) {
return false;
}
struct timespec tsBegin, tsEnd;
gettime(CLOCK_MONOTONIC, &tsBegin);
// Doc says even find _could_ invalidate iterator, in pactice it should
// be very rare, so go with it now.
for (SrcDB::iterator it = m_srcDB.begin(); it != m_srcDB.end(); ++it) {
SrcKey const sk = SrcKey::fromAtomicInt(it->first);
SrcRec& sr = *it->second;
if (sr.unitMd5() == unit->md5() &&
!sr.hasDebuggerGuard() &&
isSrcKeyInBL(unit, sk)) {
addDbgGuardImpl(sk, sr);
}
}
s_writeLease.drop();
gettime(CLOCK_MONOTONIC, &tsEnd);
int64_t elapsed = gettime_diff_us(tsBegin, tsEnd);
if (Trace::moduleEnabledRelease(Trace::tx64, 5)) {
Trace::traceRelease("addDbgGuards got lease for %" PRId64 " us\n", elapsed);
}
return true;
}
bool TranslatorX64::addDbgGuard(const Func* func, Offset offset) {
SrcKey sk(func, offset);
{
if (SrcRec* sr = m_srcDB.find(sk)) {
if (sr->hasDebuggerGuard()) {
return true;
}
} else {
// no translation yet
return true;
}
}
if (debug) {
if (!isSrcKeyInBL(func->unit(), sk)) {
TRACE(5, "calling addDbgGuard on PC that is not in blacklist");
return false;
}
}
bool locked = s_writeLease.acquire(true);
if (!locked) {
return false;
}
{
if (SrcRec* sr = m_srcDB.find(sk)) {
addDbgGuardImpl(sk, *sr);
}
}
s_writeLease.drop();
return true;
}
void TranslatorX64::addDbgGuardImpl(SrcKey sk, SrcRec& srcRec) {
TCA dbgGuard = a.code.frontier;
// Emit the checks for debugger attach
emitTLSLoad<ThreadInfo>(a, ThreadInfo::s_threadInfo, rAsm);
static COff dbgOff = offsetof(ThreadInfo, m_reqInjectionData) +
RequestInjectionData::debuggerReadOnlyOffset();
a. load_reg64_disp_reg32(rAsm, dbgOff, rAsm);
a. testb((int8_t)0xff, rbyte(rAsm));
// Branch to a special REQ_INTERPRET if attached
{
TCA fallback = emitServiceReq(REQ_INTERPRET, 2, uint64_t(sk.offset()), 0);
a. jnz(fallback);
}
// Emit a jump to the actual code
TCA realCode = srcRec.getTopTranslation();
prepareForSmash(a, kJmpLen);
TCA dbgBranchGuardSrc = a.code.frontier;
a. jmp(realCode);
// Add it to srcRec
srcRec.addDebuggerGuard(dbgGuard, dbgBranchGuardSrc);
}
bool TranslatorX64::dumpTCCode(const char* filename) {
string aFilename = string(filename).append("_a");
string astubFilename = string(filename).append("_astub");
FILE* aFile = fopen(aFilename.c_str(),"wb");
if (aFile == nullptr)
return false;
FILE* astubFile = fopen(astubFilename.c_str(),"wb");
if (astubFile == nullptr) {
fclose(aFile);
return false;
}
string helperAddrFilename = string(filename).append("_helpers_addrs.txt");
FILE* helperAddrFile = fopen(helperAddrFilename.c_str(),"wb");
if (helperAddrFile == nullptr) {
fclose(aFile);
fclose(astubFile);
return false;
}
// dump starting from the trampolines; this assumes processInit() places
// trampolines before the translation cache
size_t count = a.code.frontier-atrampolines.code.base;
bool result = (fwrite(atrampolines.code.base, 1, count, aFile) == count);
if (result) {
count = astubs.code.frontier - astubs.code.base;
result = (fwrite(astubs.code.base, 1, count, astubFile) == count);
}
if (result) {
for(PointerMap::iterator iter = trampolineMap.begin();
iter != trampolineMap.end();
iter++) {
void* helperAddr = iter->first;
void* trampAddr = iter->second;
char* functionName = Util::getNativeFunctionName(helperAddr);
fprintf(helperAddrFile,"%10p %10p %s\n",
trampAddr, helperAddr,
functionName);
free(functionName);
}
}
fclose(aFile);
fclose(astubFile);
fclose(helperAddrFile);
return result;
}
// Returns true on success
bool TranslatorX64::dumpTC(bool ignoreLease) {
if (!ignoreLease && !s_writeLease.acquire(true)) return false;
bool success = dumpTCData();
if (success) {
success = dumpTCCode("/tmp/tc_dump");
}
if (!ignoreLease) s_writeLease.drop();
return success;
}
// Returns true on success
bool tc_dump(void) {
return TranslatorX64::Get()->dumpTC();
}
// Returns true on success
bool TranslatorX64::dumpTCData() {
gzFile tcDataFile = gzopen("/tmp/tc_data.txt.gz", "w");
if (!tcDataFile) return false;
if (!gzprintf(tcDataFile,
"repo_schema = %s\n"
"a.base = %p\n"
"a.frontier = %p\n"
"astubs.base = %p\n"
"astubs.frontier = %p\n\n",
kRepoSchemaId,
atrampolines.code.base, a.code.frontier,
astubs.code.base, astubs.code.frontier)) {
return false;
}
if (!gzprintf(tcDataFile, "total_translations = %zu\n\n",
m_translations.size())) {
return false;
}
for (size_t t = 0; t < m_translations.size(); t++) {
if (gzputs(tcDataFile,
m_translations[t].print(getTransCounter(t)).c_str()) == -1) {
return false;
}
}
gzclose(tcDataFile);
return true;
}
void TranslatorX64::invalidateSrcKey(SrcKey sk) {
assert(!RuntimeOption::RepoAuthoritative);
assert(s_writeLease.amOwner());
/*
* Reroute existing translations for SrcKey to an as-yet indeterminate
* new one.
*/
SrcRec* sr = m_srcDB.find(sk);
assert(sr);
/*
* Since previous translations aren't reachable from here, we know we
* just created some garbage in the TC. We currently have no mechanism
* to reclaim this.
*/
sr->replaceOldTranslations();
}
void TranslatorX64::invalidateFileWork(Eval::PhpFile* f) {
class FileInvalidationTrigger : public Treadmill::WorkItem {
Eval::PhpFile* m_f;
int m_nRefs;
public:
FileInvalidationTrigger(Eval::PhpFile* f, int n) : m_f(f), m_nRefs(n) { }
virtual void operator()() {
if (m_f->decRef(m_nRefs) == 0) {
Eval::FileRepository::onDelete(m_f);
}
}
};
size_t nSmashed = m_srcDB.invalidateCode(f);
if (nSmashed) {
// The srcDB found an entry for this file. The entry's dependency
// on this file was counted as a reference, and the code is no longer
// reachable. We need to wait until the last outstanding request
// drains to know that we can really remove the reference.
Treadmill::WorkItem::enqueue(new FileInvalidationTrigger(f, nSmashed));
}
}
bool TranslatorX64::invalidateFile(Eval::PhpFile* f) {
// This is called from high rank, but we'll need the write lease to
// invalidate code.
if (!RuntimeOption::EvalJit) return false;
assert(f != nullptr);
PendQ::defer(new DeferredFileInvalidate(f));
return true;
}
} // HPHP::Transl
static const Trace::Module TRACEMOD = Trace::tx64;
void invalidatePath(const std::string& path) {
TRACE(1, "invalidatePath: abspath %s\n", path.c_str());
PendQ::defer(new DeferredPathInvalidate(path));
}
} // HPHP::VM