f008eafffe
I was going to #include translator.h in a header I had for talking to the region selector thing and decided to just get this over with instead. (It shouldn't need to #include that.) Found a few other unused things to remove while at it.
4285 linhas
138 KiB
C++
4285 linhas
138 KiB
C++
/*
|
|
+----------------------------------------------------------------------+
|
|
| HipHop for PHP |
|
|
+----------------------------------------------------------------------+
|
|
| Copyright (c) 2010- Facebook, Inc. (http://www.facebook.com) |
|
|
+----------------------------------------------------------------------+
|
|
| This source file is subject to version 3.01 of the PHP license, |
|
|
| that is bundled with this package in the file LICENSE, and is |
|
|
| available through the world-wide-web at the following url: |
|
|
| http://www.php.net/license/3_01.txt |
|
|
| If you did not receive a copy of the PHP license and are unable to |
|
|
| obtain it through the world-wide-web, please send a note to |
|
|
| license@php.net so we can mail you a copy immediately. |
|
|
+----------------------------------------------------------------------+
|
|
*/
|
|
#include "hphp/runtime/vm/jit/translator-x64.h"
|
|
|
|
#include <cinttypes>
|
|
#include <stdint.h>
|
|
#include <assert.h>
|
|
#include <unistd.h>
|
|
#include <sys/mman.h>
|
|
#include <strstream>
|
|
#include <stdio.h>
|
|
#include <stdarg.h>
|
|
#include <string>
|
|
#include <queue>
|
|
#include <unwind.h>
|
|
|
|
#ifdef __FreeBSD__
|
|
# include <ucontext.h>
|
|
typedef __sighandler_t *sighandler_t;
|
|
# define RIP_REGISTER(v) (v).mc_rip
|
|
#else
|
|
# if defined(__x86_64__)
|
|
# define RIP_REGISTER(v) (v).gregs[REG_RIP]
|
|
# elif defined(__AARCH64EL__)
|
|
# define RIP_REGISTER(v) (v).pc
|
|
# endif
|
|
#endif
|
|
|
|
#include <boost/bind.hpp>
|
|
#include <boost/optional.hpp>
|
|
#include <boost/utility/typed_in_place_factory.hpp>
|
|
#include <boost/range/adaptors.hpp>
|
|
#include <boost/scoped_ptr.hpp>
|
|
|
|
#include "folly/Format.h"
|
|
|
|
#include "hphp/util/asm-x64.h"
|
|
#include "hphp/util/bitops.h"
|
|
#include "hphp/util/debug.h"
|
|
#include "hphp/util/disasm.h"
|
|
#include "hphp/util/maphuge.h"
|
|
#include "hphp/util/rank.h"
|
|
#include "hphp/util/ringbuffer.h"
|
|
#include "hphp/util/timer.h"
|
|
#include "hphp/util/trace.h"
|
|
#include "hphp/util/meta.h"
|
|
#include "hphp/util/util.h"
|
|
#include "hphp/util/repo_schema.h"
|
|
|
|
#include "hphp/runtime/vm/bytecode.h"
|
|
#include "hphp/runtime/vm/php_debug.h"
|
|
#include "hphp/runtime/vm/runtime.h"
|
|
#include "hphp/runtime/base/complex_types.h"
|
|
#include "hphp/runtime/base/execution_context.h"
|
|
#include "hphp/runtime/base/runtime_option.h"
|
|
#include "hphp/runtime/base/strings.h"
|
|
#include "hphp/runtime/base/strings.h"
|
|
#include "hphp/runtime/base/server/source_root_info.h"
|
|
#include "hphp/runtime/base/zend/zend_string.h"
|
|
#include "hphp/runtime/ext/ext_closure.h"
|
|
#include "hphp/runtime/ext/ext_continuation.h"
|
|
#include "hphp/runtime/ext/ext_function.h"
|
|
#include "hphp/runtime/vm/debug/debug.h"
|
|
#include "hphp/runtime/vm/jit/targetcache.h"
|
|
#include "hphp/runtime/vm/jit/translator-inline.h"
|
|
#include "hphp/runtime/vm/jit/srcdb.h"
|
|
#include "hphp/runtime/vm/jit/x64-util.h"
|
|
#include "hphp/runtime/vm/jit/unwind-x64.h"
|
|
#include "hphp/runtime/base/stats.h"
|
|
#include "hphp/runtime/vm/pendq.h"
|
|
#include "hphp/runtime/vm/treadmill.h"
|
|
#include "hphp/runtime/vm/repo.h"
|
|
#include "hphp/runtime/vm/type_profile.h"
|
|
#include "hphp/runtime/vm/member_operations.h"
|
|
#include "hphp/runtime/vm/jit/abi-x64.h"
|
|
#include "hphp/runtime/eval/runtime/file_repository.h"
|
|
#include "hphp/runtime/vm/jit/hhbctranslator.h"
|
|
|
|
#include "hphp/runtime/vm/jit/translator-x64-internal.h"
|
|
|
|
namespace HPHP {
|
|
namespace Transl {
|
|
|
|
using namespace reg;
|
|
using namespace Util;
|
|
using namespace Trace;
|
|
using std::max;
|
|
|
|
#define TRANS_PERF_COUNTERS \
|
|
TPC(translate) \
|
|
TPC(retranslate) \
|
|
TPC(interp_bb) \
|
|
TPC(interp_instr) \
|
|
TPC(interp_one) \
|
|
TPC(max_trans) \
|
|
TPC(enter_tc) \
|
|
TPC(service_req)
|
|
|
|
static const char* const kInstrCountTx64Name = "instr_tx64";
|
|
static const char* const kInstrCountIRName = "instr_hhir";
|
|
|
|
#define TPC(n) "trans_" #n,
|
|
static const char* const kPerfCounterNames[] = {
|
|
TRANS_PERF_COUNTERS
|
|
kInstrCountTx64Name,
|
|
kInstrCountIRName,
|
|
};
|
|
#undef TPC
|
|
|
|
#define TPC(n) tpc_ ## n,
|
|
enum TransPerfCounter {
|
|
TRANS_PERF_COUNTERS
|
|
tpc_num_counters
|
|
};
|
|
#undef TPC
|
|
static __thread int64_t s_perfCounters[tpc_num_counters];
|
|
#define INC_TPC(n) ++s_perfCounters[tpc_ ## n];
|
|
|
|
// nextTx64: Global shared state. The tx64 that should be used for
|
|
// new requests going forward.
|
|
TranslatorX64* volatile nextTx64;
|
|
// tx64: Thread-local state. The tx64 we're using for the current request.
|
|
__thread TranslatorX64* tx64;
|
|
|
|
// Register dirtiness: thread-private.
|
|
__thread VMRegState tl_regState = REGSTATE_CLEAN;
|
|
|
|
static StaticString s___call(LITSTR_INIT("__call"));
|
|
static StaticString s___callStatic(LITSTR_INIT("__callStatic"));
|
|
|
|
// Initialize at most this many locals inline in function body prologue; more
|
|
// than this, and emitting a loop is more compact. To be precise, the actual
|
|
// crossover point in terms of code size is 6; 9 was determined by experiment to
|
|
// be the optimal point in certain benchmarks. #microoptimization
|
|
static const int kLocalsToInitializeInline = 9;
|
|
|
|
// An intentionally funny-looking-in-core-dumps constant for uninitialized
|
|
// instruction pointers.
|
|
static const uint64_t kUninitializedRIP = 0xba5eba11acc01ade;
|
|
|
|
// Return the SrcKey for the operation that should follow the supplied
|
|
// NormalizedInstruction. (This might not be the next SrcKey in the
|
|
// unit if we merged some instructions or otherwise modified them
|
|
// during analysis.)
|
|
SrcKey nextSrcKey(const Tracelet& t, const NormalizedInstruction& i) {
|
|
return i.next ? i.next->source : t.m_nextSk;
|
|
}
|
|
|
|
// stubBlock --
|
|
// Used to emit a bunch of outlined code that is unconditionally jumped to.
|
|
template <typename L>
|
|
void stubBlock(X64Assembler& hot, X64Assembler& cold, const L& body) {
|
|
hot. jmp(cold.code.frontier);
|
|
guardDiamond(cold, body);
|
|
cold. jmp(hot.code.frontier);
|
|
}
|
|
|
|
static bool
|
|
typeCanBeStatic(DataType t) {
|
|
return t != KindOfObject && t != KindOfRef;
|
|
}
|
|
|
|
// IfCountNotStatic --
|
|
// Emits if (%reg->_count != RefCountStaticValue) { ... }.
|
|
// May short-circuit this check if the type is known to be
|
|
// static already.
|
|
struct IfCountNotStatic {
|
|
typedef CondBlock<FAST_REFCOUNT_OFFSET,
|
|
RefCountStaticValue,
|
|
CC_Z,
|
|
field_type(RefData, _count)> NonStaticCondBlock;
|
|
NonStaticCondBlock *m_cb; // might be null
|
|
IfCountNotStatic(X64Assembler& a,
|
|
PhysReg reg,
|
|
DataType t = KindOfInvalid) {
|
|
// Objects and variants cannot be static
|
|
if (typeCanBeStatic(t)) {
|
|
m_cb = new NonStaticCondBlock(a, reg);
|
|
} else {
|
|
m_cb = nullptr;
|
|
}
|
|
}
|
|
|
|
~IfCountNotStatic() {
|
|
delete m_cb;
|
|
}
|
|
};
|
|
|
|
bool
|
|
classIsUnique(const Class* cls) {
|
|
return RuntimeOption::RepoAuthoritative &&
|
|
cls &&
|
|
(cls->attrs() & AttrUnique);
|
|
}
|
|
|
|
bool
|
|
classIsUniqueOrCtxParent(const Class* cls) {
|
|
if (!cls) return false;
|
|
if (classIsUnique(cls)) return true;
|
|
Class* ctx = arGetContextClass(curFrame());
|
|
if (!ctx) return false;
|
|
return ctx->classof(cls);
|
|
}
|
|
|
|
bool
|
|
classIsUniqueNormalClass(const Class* cls) {
|
|
return classIsUnique(cls) &&
|
|
!(cls->attrs() & (AttrInterface | AttrTrait));
|
|
}
|
|
|
|
// Segfault handler: figure out if it's an intentional segfault
|
|
// (timeout exception) and if so, act appropriately. Otherwise, pass
|
|
// the signal on.
|
|
void TranslatorX64::SEGVHandler(int signum, siginfo_t *info, void *ctx) {
|
|
TranslatorX64 *self = Get();
|
|
void *surprisePage =
|
|
ThreadInfo::s_threadInfo->m_reqInjectionData.surprisePage;
|
|
if (info->si_addr == surprisePage) {
|
|
ucontext_t *ucontext = (ucontext_t*)ctx;
|
|
TCA rip = (TCA)RIP_REGISTER(ucontext->uc_mcontext);
|
|
SignalStubMap::const_accessor a;
|
|
if (!self->m_segvStubs.find(a, rip)) {
|
|
NOT_REACHED();
|
|
}
|
|
TCA astubsCall = a->second;
|
|
|
|
// When this handler returns, "call" the astubs code for this
|
|
// surprise check.
|
|
RIP_REGISTER(ucontext->uc_mcontext) = (uintptr_t)astubsCall;
|
|
|
|
// We've processed this event; reset the page in case execution
|
|
// continues normally.
|
|
g_vmContext->m_stack.unprotect();
|
|
} else {
|
|
sighandler_t handler = (sighandler_t)self->m_segvChain;
|
|
if (handler == SIG_DFL || handler == SIG_IGN) {
|
|
signal(signum, handler);
|
|
raise(signum);
|
|
} else {
|
|
self->m_segvChain(signum, info, ctx);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Copy a heap cell from memory to the stack.
|
|
*
|
|
* Use emitCopyToStack when you can safely change the state of the
|
|
* register map. When using emitCopyToStackRegSafe, you'll need to
|
|
* invalidate the stack location manually at an appropriate time.
|
|
*/
|
|
|
|
void
|
|
TranslatorX64::emitCopyToStackRegSafe(X64Assembler& a,
|
|
const NormalizedInstruction& ni,
|
|
PhysReg src,
|
|
int off,
|
|
PhysReg tmpReg) {
|
|
assert(off % sizeof(Cell) == 0);
|
|
emitCopyTo(a, src, 0, rVmSp, vstackOffset(ni, off), tmpReg);
|
|
}
|
|
|
|
// Logical register move: ensures the value in src will be in dest
|
|
// after execution, but might do so in strange ways. Do not count on
|
|
// being able to smash dest to a different register in the future, e.g.
|
|
void
|
|
emitMovRegReg(X64Assembler& a, PhysReg src, PhysReg dest) {
|
|
SpaceRecorder("_RegMove", a);
|
|
if (src != dest) {
|
|
a. movq (src, dest);
|
|
}
|
|
}
|
|
|
|
void
|
|
emitLea(X64Assembler& a, PhysReg base, int disp, PhysReg dest) {
|
|
if (!disp) {
|
|
emitMovRegReg(a, base, dest);
|
|
return;
|
|
}
|
|
a. lea (base[disp], dest);
|
|
}
|
|
|
|
static void UNUSED tc_debug_print(const char* message,
|
|
uintptr_t r1,
|
|
uintptr_t r2,
|
|
uintptr_t r3,
|
|
ActRec* fp) {
|
|
TRACE(1, "*********************** %s: %p %p %p (for : %s)\n",
|
|
message, (void*)r1, (void*)r2, (void*)r3,
|
|
fp->m_func ? fp->m_func->fullName()->data() : "[?]");
|
|
}
|
|
|
|
// Utility for debugging translations that will print a message,
|
|
// followed by the value of up to three registers.
|
|
void TranslatorX64::emitDebugPrint(Asm& a,
|
|
const char* message,
|
|
PhysReg r1,
|
|
PhysReg r2,
|
|
PhysReg r3) {
|
|
boost::optional<PhysRegSaver> aSaver;
|
|
boost::optional<PhysRegSaverStub> astubsSaver;
|
|
|
|
if (&a == &this->a) {
|
|
aSaver = boost::in_place<PhysRegSaver>(boost::ref(a), kAllX64Regs);
|
|
} else {
|
|
astubsSaver = boost::in_place<PhysRegSaverStub>(boost::ref(a),
|
|
kAllX64Regs);
|
|
}
|
|
|
|
a. mov_imm64_reg (uintptr_t(message), argNumToRegName[0]);
|
|
a. mov_reg64_reg64(r1, argNumToRegName[1]);
|
|
a. mov_reg64_reg64(r2, argNumToRegName[2]);
|
|
a. mov_reg64_reg64(r3, argNumToRegName[3]);
|
|
a. mov_reg64_reg64(rVmFp, argNumToRegName[4]);
|
|
a. call((TCA)tc_debug_print);
|
|
}
|
|
|
|
void
|
|
TranslatorX64::emitRB(X64Assembler& a,
|
|
RingBufferType t,
|
|
SrcKey sk, RegSet toSave) {
|
|
if (!Trace::moduleEnabledRelease(Trace::tx64, 3)) {
|
|
return;
|
|
}
|
|
PhysRegSaver rs(a, toSave | kSpecialCrossTraceRegs);
|
|
int arg = 0;
|
|
emitImmReg(a, t, argNumToRegName[arg++]);
|
|
emitImmReg(a, sk.getFuncId(), argNumToRegName[arg++]);
|
|
emitImmReg(a, sk.offset(), argNumToRegName[arg++]);
|
|
a. call((TCA)ringbufferEntry);
|
|
}
|
|
|
|
void
|
|
TranslatorX64::emitRB(X64Assembler& a,
|
|
RingBufferType t,
|
|
const char* msg,
|
|
RegSet toSave) {
|
|
if (!Trace::moduleEnabledRelease(Trace::tx64, 3)) {
|
|
return;
|
|
}
|
|
PhysRegSaver save(a, toSave | kSpecialCrossTraceRegs);
|
|
int arg = 0;
|
|
emitImmReg(a, (uintptr_t)msg, argNumToRegName[arg++]);
|
|
emitImmReg(a, strlen(msg), argNumToRegName[arg++]);
|
|
emitImmReg(a, t, argNumToRegName[arg++]);
|
|
a. call((TCA)ringbufferMsg);
|
|
}
|
|
|
|
void
|
|
TranslatorX64::emitCall(X64Assembler& a, TCA dest) {
|
|
if (a.jmpDeltaFits(dest) && !Stats::enabled()) {
|
|
a. call(dest);
|
|
} else {
|
|
a. call(getNativeTrampoline(dest));
|
|
}
|
|
}
|
|
|
|
void
|
|
TranslatorX64::emitCall(X64Assembler& a, Call call) {
|
|
if (call.isDirect()) {
|
|
return emitCall(a, (TCA)call.getAddress());
|
|
}
|
|
// Virtual call.
|
|
// Load method's address from proper offset off of object in rdi,
|
|
// using rax as scratch.
|
|
a.loadq(*rdi, rax);
|
|
a.call(rax[call.getOffset()]);
|
|
}
|
|
|
|
static void emitGetGContext(X64Assembler& a, PhysReg dest) {
|
|
emitTLSLoad<ExecutionContext>(a, g_context, dest);
|
|
}
|
|
|
|
void
|
|
TranslatorX64::emitEagerSyncPoint(X64Assembler& a, const Opcode* pc,
|
|
const Offset spDiff) {
|
|
static COff spOff = offsetof(VMExecutionContext, m_stack) +
|
|
Stack::topOfStackOffset();
|
|
static COff fpOff = offsetof(VMExecutionContext, m_fp);
|
|
static COff pcOff = offsetof(VMExecutionContext, m_pc);
|
|
|
|
/* we can't use rAsm because the pc store uses it as a
|
|
temporary */
|
|
Reg64 rEC = reg::rdi;
|
|
|
|
a. push(rEC);
|
|
emitGetGContext(a, rEC);
|
|
a. storeq(rVmFp, rEC[fpOff]);
|
|
if (spDiff) {
|
|
a. lea(rVmSp[spDiff], rAsm);
|
|
a. storeq(rAsm, rEC[spOff]);
|
|
} else {
|
|
a. storeq(rVmSp, rEC[spOff]);
|
|
}
|
|
a. storeq(pc, rEC[pcOff]);
|
|
a. pop(rEC);
|
|
}
|
|
|
|
void
|
|
TranslatorX64::recordSyncPoint(X64Assembler& a, Offset pcOff, Offset spOff) {
|
|
m_pendingFixups.push_back(PendingFixup(a.code.frontier,
|
|
Fixup(pcOff, spOff)));
|
|
}
|
|
|
|
void
|
|
TranslatorX64::recordIndirectFixup(CTCA addr, int dwordsPushed) {
|
|
m_fixupMap.recordIndirectFixup(
|
|
a.code.frontier, IndirectFixup((2 + dwordsPushed) * 8));
|
|
}
|
|
|
|
void
|
|
TranslatorX64::emitIncRef(PhysReg base, DataType dtype) {
|
|
emitIncRef(a, base, dtype);
|
|
}
|
|
|
|
void
|
|
TranslatorX64::emitIncRef(X64Assembler &a, PhysReg base, DataType dtype) {
|
|
if (!IS_REFCOUNTED_TYPE(dtype) && dtype != KindOfInvalid) {
|
|
return;
|
|
}
|
|
SpaceRecorder sr("_IncRef", a);
|
|
assert(sizeof(Countable) == sizeof(int32_t));
|
|
{ // if !static then
|
|
IfCountNotStatic ins(a, base, dtype);
|
|
/*
|
|
* The optimization guide cautions against using inc; while it is
|
|
* compact, it only writes the low-order 8 bits of eflags, causing a
|
|
* partial dependency for any downstream flags-dependent code.
|
|
*/
|
|
a. incl(base[FAST_REFCOUNT_OFFSET]);
|
|
} // endif
|
|
}
|
|
|
|
void
|
|
TranslatorX64::emitIncRefGenericRegSafe(PhysReg base,
|
|
int disp,
|
|
PhysReg tmpReg) {
|
|
{ // if RC
|
|
IfRefCounted irc(a, base, disp);
|
|
a. load_reg64_disp_reg64(base, disp + TVOFF(m_data),
|
|
tmpReg);
|
|
{ // if !static
|
|
IfCountNotStatic ins(a, tmpReg);
|
|
a. incl(tmpReg[FAST_REFCOUNT_OFFSET]);
|
|
} // endif
|
|
} // endif
|
|
}
|
|
|
|
// emitEagerVMRegSave --
|
|
// Inline. Saves regs in-place in the TC. This is an unusual need;
|
|
// you probably want to lazily save these regs via recordCall and
|
|
// its ilk.
|
|
//
|
|
// SaveFP uses rVmFp, as usual. SavePC requires the caller to have
|
|
// placed the PC offset of the instruction about to be executed in
|
|
// rdi.
|
|
enum RegSaveFlags {
|
|
SaveFP = 1,
|
|
SavePC = 2
|
|
};
|
|
|
|
static TCA
|
|
emitEagerVMRegSave(X64Assembler& a,
|
|
int flags /* :: RegSaveFlags */) {
|
|
TCA start = a.code.frontier;
|
|
bool saveFP = bool(flags & SaveFP);
|
|
bool savePC = bool(flags & SavePC);
|
|
assert((flags & ~(SavePC | SaveFP)) == 0);
|
|
|
|
Reg64 pcReg = rdi;
|
|
PhysReg rEC = rAsm;
|
|
assert(!kSpecialCrossTraceRegs.contains(rdi));
|
|
|
|
emitGetGContext(a, rEC);
|
|
|
|
static COff spOff = offsetof(VMExecutionContext, m_stack) +
|
|
Stack::topOfStackOffset();
|
|
static COff fpOff = offsetof(VMExecutionContext, m_fp) - spOff;
|
|
static COff pcOff = offsetof(VMExecutionContext, m_pc) - spOff;
|
|
|
|
assert(spOff != 0);
|
|
// Instruction selection note: this is an lea, but add is more
|
|
// compact and we can afford the flags bash.
|
|
a. addq (spOff, r64(rEC));
|
|
a. storeq (rVmSp, *rEC);
|
|
if (savePC) {
|
|
// We're going to temporarily abuse rVmSp to hold the current unit.
|
|
Reg64 rBC = rVmSp;
|
|
a. push (rBC);
|
|
// m_fp -> m_func -> m_unit -> m_bc + pcReg
|
|
a. loadq (rVmFp[AROFF(m_func)], rBC);
|
|
a. loadq (rBC[Func::unitOff()], rBC);
|
|
a. loadq (rBC[Unit::bcOff()], rBC);
|
|
a. addq (rBC, pcReg);
|
|
a. storeq (pcReg, rEC[pcOff]);
|
|
a. pop (rBC);
|
|
}
|
|
if (saveFP) {
|
|
a. storeq (rVmFp, rEC[fpOff]);
|
|
}
|
|
return start;
|
|
}
|
|
|
|
Call TranslatorX64::getDtorCall(DataType type) {
|
|
switch (type) {
|
|
case BitwiseKindOfString:
|
|
return Call(getMethodPtr(&StringData::release));
|
|
case KindOfArray:
|
|
return Call(getMethodPtr(&ArrayData::release));
|
|
case KindOfObject:
|
|
return Call(getMethodPtr(&ObjectData::release));
|
|
case KindOfRef:
|
|
return Call(getMethodPtr(&RefData::release));
|
|
default:
|
|
assert(false);
|
|
NOT_REACHED();
|
|
}
|
|
}
|
|
|
|
/*
|
|
* callDestructor/jumpDestructor --
|
|
*
|
|
* Emit a call or jump to the appropriate destructor for a dynamically
|
|
* typed value.
|
|
*
|
|
* No registers are saved; most translated code should be using
|
|
* emitDecRefGeneric{Reg,} instead of this.
|
|
*
|
|
* Inputs:
|
|
*
|
|
* - typeReg is destroyed and may not be argNumToRegName[0].
|
|
* - argNumToRegName[0] should contain the m_data for this value.
|
|
* - scratch is destoyed.
|
|
*/
|
|
|
|
static IndexedMemoryRef lookupDestructor(X64Assembler& a,
|
|
PhysReg typeReg,
|
|
PhysReg scratch) {
|
|
assert(typeReg != r32(argNumToRegName[0]));
|
|
assert(scratch != argNumToRegName[0]);
|
|
|
|
static_assert((BitwiseKindOfString >> kShiftDataTypeToDestrIndex == 0) &&
|
|
(KindOfArray >> kShiftDataTypeToDestrIndex == 1) &&
|
|
(KindOfObject >> kShiftDataTypeToDestrIndex == 2) &&
|
|
(KindOfRef >> kShiftDataTypeToDestrIndex == 3),
|
|
"lookup of destructors depends on KindOf* values");
|
|
|
|
a. shrl (kShiftDataTypeToDestrIndex, r32(typeReg));
|
|
a. movq (&g_destructors, scratch);
|
|
return scratch[typeReg*8];
|
|
}
|
|
|
|
static void callDestructor(X64Assembler& a,
|
|
PhysReg typeReg,
|
|
PhysReg scratch) {
|
|
a. call (lookupDestructor(a, typeReg, scratch));
|
|
}
|
|
|
|
static void jumpDestructor(X64Assembler& a,
|
|
PhysReg typeReg,
|
|
PhysReg scratch) {
|
|
a. jmp (lookupDestructor(a, typeReg, scratch));
|
|
}
|
|
|
|
void TranslatorX64::emitGenericDecRefHelpers() {
|
|
Label release;
|
|
|
|
// m_dtorGenericStub just takes a pointer to the TypedValue in rdi.
|
|
moveToAlign(a, kNonFallthroughAlign);
|
|
m_irPopRHelper = a.code.frontier;
|
|
// popR: Move top-of-stack pointer to rdi
|
|
emitMovRegReg(a, rVmSp, rdi);
|
|
// fall through
|
|
m_dtorGenericStub = a.code.frontier;
|
|
emitLoadTVType(a, rdi[TVOFF(m_type)], r32(rAsm));
|
|
a. loadq (rdi[TVOFF(m_data)], rdi);
|
|
// Fall through to the regs stub.
|
|
|
|
/*
|
|
* Custom calling convention: m_type goes in rAsm, m_data in
|
|
* rdi. We don't ever store program locations in rAsm, so the
|
|
* caller didn't need to spill anything. The assembler sometimes
|
|
* uses rAsm, but we know the stub won't need to and it makes it
|
|
* possible to share the code for both decref helpers.
|
|
*/
|
|
m_dtorGenericStubRegs = a.code.frontier;
|
|
a. cmpl (RefCountStaticValue, rdi[FAST_REFCOUNT_OFFSET]);
|
|
jccBlock<CC_Z>(a, [&] {
|
|
a. decl (rdi[FAST_REFCOUNT_OFFSET]);
|
|
release.jcc8(a, CC_Z);
|
|
});
|
|
a. ret ();
|
|
|
|
asm_label(a, release);
|
|
{
|
|
PhysRegSaver prs(a, kGPCallerSaved - RegSet(rdi));
|
|
callDestructor(a, rAsm, rax);
|
|
recordIndirectFixup(a.code.frontier, prs.rspTotalAdjustmentRegs());
|
|
}
|
|
a. ret ();
|
|
|
|
TRACE(1, "HOTSTUB: generic dtor start: %lx\n",
|
|
uintptr_t(m_irPopRHelper));
|
|
TRACE(1, "HOTSTUB: genericDtorStub: %lx\n", uintptr_t(m_dtorGenericStub));
|
|
TRACE(1, "HOTSTUB: genericDtorStubRegs: %lx\n",
|
|
uintptr_t(m_dtorGenericStubRegs));
|
|
TRACE(1, "HOTSTUB: total dtor generic stubs %zu bytes\n",
|
|
size_t(a.code.frontier - m_dtorGenericStub));
|
|
}
|
|
|
|
|
|
TCA TranslatorX64::retranslate(const TranslArgs& args) {
|
|
if (isDebuggerAttachedProcess() && isSrcKeyInBL(curUnit(), args.m_sk)) {
|
|
// We are about to translate something known to be blacklisted by
|
|
// debugger, exit early
|
|
SKTRACE(1, args.m_sk, "retranslate abort due to debugger\n");
|
|
return nullptr;
|
|
}
|
|
LeaseHolder writer(s_writeLease);
|
|
if (!writer) return nullptr;
|
|
SKTRACE(1, args.m_sk, "retranslate\n");
|
|
return translate(args);
|
|
}
|
|
|
|
// Only use comes from HHIR's cgExitTrace() case TraceExitType::SlowNoProgress
|
|
TCA TranslatorX64::retranslateAndPatchNoIR(SrcKey sk,
|
|
bool align,
|
|
TCA toSmash) {
|
|
if (isDebuggerAttachedProcess() && isSrcKeyInBL(curUnit(), sk)) {
|
|
// We are about to translate something known to be blacklisted by
|
|
// debugger, exit early
|
|
SKTRACE(1, sk, "retranslateAndPatchNoIR abort due to debugger\n");
|
|
return nullptr;
|
|
}
|
|
LeaseHolder writer(s_writeLease);
|
|
if (!writer) return nullptr;
|
|
SKTRACE(1, sk, "retranslateAndPatchNoIR\n");
|
|
SrcRec* srcRec = getSrcRec(sk);
|
|
if (srcRec->translations().size() ==
|
|
RuntimeOption::EvalJitMaxTranslations + 1) {
|
|
// we've gone over the translation limit and already have an anchor
|
|
// translation that will interpret, so just return NULL and force
|
|
// interpretation of this BB.
|
|
return nullptr;
|
|
}
|
|
TCA start = translate(TranslArgs(sk, align).interp(true));
|
|
if (start != nullptr) {
|
|
smashJmp(getAsmFor(toSmash), toSmash, start);
|
|
}
|
|
return start;
|
|
}
|
|
|
|
/*
|
|
* Satisfy an alignment constraint. If we're in a reachable section
|
|
* of code, bridge the gap with nops. Otherwise, int3's.
|
|
*/
|
|
void
|
|
TranslatorX64::moveToAlign(X64Assembler &aa,
|
|
const size_t align /* =kJmpTargetAlign */,
|
|
bool unreachable /* =true */) {
|
|
using namespace HPHP::Util;
|
|
SpaceRecorder sr("_Align", aa);
|
|
assert(isPowerOfTwo(align));
|
|
size_t leftInBlock = align - ((align - 1) & uintptr_t(aa.code.frontier));
|
|
if (leftInBlock == align) return;
|
|
if (unreachable) {
|
|
if (leftInBlock > 2) {
|
|
aa.ud2();
|
|
leftInBlock -= 2;
|
|
}
|
|
if (leftInBlock > 0) {
|
|
aa.emitInt3s(leftInBlock);
|
|
}
|
|
return;
|
|
}
|
|
aa.emitNop(leftInBlock);
|
|
}
|
|
|
|
/*
|
|
* Req machinery. We sometimes emit code that is unable to proceed
|
|
* without translator assistance; e.g., a basic block whose successor is
|
|
* unknown. We leave one of these request arg blobs in m_data, and point
|
|
* to it at callout-time.
|
|
*/
|
|
|
|
// REQ_BIND_CALL
|
|
struct ReqBindCall {
|
|
SrcKey m_sourceInstr;
|
|
TCA m_toSmash;
|
|
int m_nArgs;
|
|
bool m_isImmutable; // call was to known func.
|
|
} m_bindCall;
|
|
|
|
// ID to name mapping for tracing.
|
|
static inline const char*
|
|
reqName(int req) {
|
|
static const char* reqNames[] = {
|
|
#define REQ(nm) #nm,
|
|
SERVICE_REQUESTS
|
|
#undef REQ
|
|
};
|
|
return reqNames[req];
|
|
}
|
|
|
|
/*
|
|
* Find or create a translation for sk. Returns TCA of "best" current
|
|
* translation. May return NULL if it is currently impossible to create
|
|
* a translation.
|
|
*/
|
|
TCA
|
|
TranslatorX64::getTranslation(const TranslArgs& args) {
|
|
auto sk = args.m_sk;
|
|
curFunc()->validate();
|
|
SKTRACE(2, sk,
|
|
"getTranslation: curUnit %s funcId %" PRIx64 " offset %d\n",
|
|
curUnit()->filepath()->data(),
|
|
sk.getFuncId(),
|
|
sk.offset());
|
|
SKTRACE(2, sk, " funcId: %" PRIx64 "\n",
|
|
curFunc()->getFuncId());
|
|
|
|
if (curFrame()->hasVarEnv() && curFrame()->getVarEnv()->isGlobalScope()) {
|
|
SKTRACE(2, sk, "punting on pseudoMain\n");
|
|
return nullptr;
|
|
}
|
|
if (const SrcRec* sr = m_srcDB.find(sk)) {
|
|
TCA tca = sr->getTopTranslation();
|
|
if (tca) {
|
|
SKTRACE(2, sk, "getTranslation: found %p\n", tca);
|
|
return tca;
|
|
}
|
|
}
|
|
return createTranslation(args);
|
|
}
|
|
|
|
int
|
|
TranslatorX64::numTranslations(SrcKey sk) const {
|
|
if (const SrcRec* sr = m_srcDB.find(sk)) {
|
|
return sr->translations().size();
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
TCA
|
|
TranslatorX64::createTranslation(const TranslArgs& args) {
|
|
/*
|
|
* Try to become the writer. We delay this until we *know* we will have
|
|
* a need to create new translations, instead of just trying to win the
|
|
* lottery at the dawn of time. Hopefully lots of requests won't require
|
|
* any new translation.
|
|
*/
|
|
auto retransl = [&] {
|
|
return retranslate(args);
|
|
};
|
|
auto sk = args.m_sk;
|
|
LeaseHolder writer(s_writeLease);
|
|
if (!writer) return nullptr;
|
|
if (SrcRec* sr = m_srcDB.find(sk)) {
|
|
TCA tca = sr->getTopTranslation();
|
|
if (tca) {
|
|
// Handle extremely unlikely race; someone may have just already
|
|
// added the first instance of this SrcRec while we did a
|
|
// non-blocking wait on the write lease.
|
|
return tca;
|
|
} else {
|
|
// Since we are holding the write lease, we know that sk is properly
|
|
// initialized, except that it has no translations (due to
|
|
// replaceOldTranslations)
|
|
return retransl();
|
|
}
|
|
}
|
|
|
|
// We put retranslate requests at the end of our slab to more frequently
|
|
// allow conditional jump fall-throughs
|
|
AHotSelector ahs(this, curFunc()->attrs() & AttrHot);
|
|
|
|
TCA astart = a.code.frontier;
|
|
TCA stubstart = astubs.code.frontier;
|
|
TCA req = emitServiceReq(SRFlags::None, REQ_RETRANSLATE,
|
|
1, uint64_t(sk.offset()));
|
|
SKTRACE(1, sk, "inserting anchor translation for (%p,%d) at %p\n",
|
|
curUnit(), sk.offset(), req);
|
|
SrcRec* sr = m_srcDB.insert(sk);
|
|
sr->setFuncInfo(curFunc());
|
|
sr->setAnchorTranslation(req);
|
|
|
|
size_t asize = a.code.frontier - astart;
|
|
size_t stubsize = astubs.code.frontier - stubstart;
|
|
assert(asize == 0);
|
|
if (stubsize) {
|
|
addTranslation(TransRec(sk, curUnit()->md5(), TransAnchor,
|
|
astart, asize, stubstart, stubsize));
|
|
assert(!isTransDBEnabled() || getTransRec(stubstart)->kind == TransAnchor);
|
|
}
|
|
|
|
return retransl();
|
|
}
|
|
|
|
TCA
|
|
TranslatorX64::lookupTranslation(SrcKey sk) const {
|
|
if (SrcRec* sr = m_srcDB.find(sk)) {
|
|
return sr->getTopTranslation();
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
TCA
|
|
TranslatorX64::translate(const TranslArgs& args) {
|
|
INC_TPC(translate);
|
|
assert(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
|
|
assert(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
|
|
|
|
if (!args.m_interp) {
|
|
if (m_numHHIRTrans == RuntimeOption::EvalJitGlobalTranslationLimit) {
|
|
RuntimeOption::EvalJit = false;
|
|
ThreadInfo::s_threadInfo->m_reqInjectionData.updateJit();
|
|
}
|
|
}
|
|
|
|
AHotSelector ahs(this, curFunc()->attrs() & AttrHot);
|
|
|
|
if (args.m_align) {
|
|
moveToAlign(a, kNonFallthroughAlign);
|
|
}
|
|
|
|
TCA start = a.code.frontier;
|
|
m_lastHHIRPunt.clear();
|
|
translateTracelet(args);
|
|
|
|
SKTRACE(1, args.m_sk, "translate moved head from %p to %p\n",
|
|
getTopTranslation(args.m_sk), start);
|
|
return start;
|
|
}
|
|
|
|
/*
|
|
* Returns true if the given current frontier can have an nBytes-long
|
|
* instruction written without any risk of cache-tearing.
|
|
*/
|
|
bool isSmashable(Address frontier, int nBytes, int offset /* = 0 */) {
|
|
assert(nBytes <= int(kX64CacheLineSize));
|
|
uintptr_t iFrontier = uintptr_t(frontier) + offset;
|
|
uintptr_t lastByte = uintptr_t(frontier) + nBytes - 1;
|
|
return (iFrontier & ~kX64CacheLineMask) == (lastByte & ~kX64CacheLineMask);
|
|
}
|
|
|
|
/*
|
|
* Call before emitting a test-jcc sequence. Inserts a nop gap such that after
|
|
* writing a testBytes-long instruction, the frontier will be smashable.
|
|
*/
|
|
void prepareForTestAndSmash(Asm& a, int testBytes, TestAndSmashFlags flags) {
|
|
switch (flags) {
|
|
case kAlignJcc:
|
|
prepareForSmash(a, testBytes + kJmpccLen, testBytes);
|
|
assert(isSmashable(a.code.frontier + testBytes, kJmpccLen));
|
|
break;
|
|
case kAlignJccImmediate:
|
|
prepareForSmash(a,
|
|
testBytes + kJmpccLen,
|
|
testBytes + kJmpccLen - kJmpImmBytes);
|
|
assert(isSmashable(a.code.frontier + testBytes, kJmpccLen,
|
|
kJmpccLen - kJmpImmBytes));
|
|
break;
|
|
case kAlignJccAndJmp:
|
|
// Ensure that the entire jcc, and the entire jmp are smashable
|
|
// (but we dont need them both to be in the same cache line)
|
|
prepareForSmash(a, testBytes + kJmpccLen, testBytes);
|
|
prepareForSmash(a, testBytes + kJmpccLen + kJmpLen, testBytes + kJmpccLen);
|
|
assert(isSmashable(a.code.frontier + testBytes, kJmpccLen));
|
|
assert(isSmashable(a.code.frontier + testBytes + kJmpccLen, kJmpLen));
|
|
break;
|
|
}
|
|
}
|
|
|
|
void prepareForSmash(X64Assembler& a, int nBytes, int offset /* = 0 */) {
|
|
if (!isSmashable(a.code.frontier, nBytes, offset)) {
|
|
int gapSize = (~(uintptr_t(a.code.frontier) + offset) &
|
|
kX64CacheLineMask) + 1;
|
|
a.emitNop(gapSize);
|
|
assert(isSmashable(a.code.frontier, nBytes, offset));
|
|
}
|
|
}
|
|
|
|
void
|
|
TranslatorX64::smash(X64Assembler &a, TCA src, TCA dest, bool isCall) {
|
|
assert(canWrite());
|
|
TRACE(2, "smash: %p -> %p\n", src, dest);
|
|
/*
|
|
* !
|
|
*
|
|
* We are about to smash reachable code in the translation cache. A
|
|
* hardware thread might be executing the very instruction we're
|
|
* modifying. This is safe because:
|
|
*
|
|
* 1. We align smashable instructions so that they reside on a single
|
|
* cache line;
|
|
*
|
|
* 2. We modify the instruction with a single processor store; and
|
|
*
|
|
* 3. The smashed region contains only a single instruction in the
|
|
* orignal instruction stream (see jmp() -> emitJ32() -> bytes() in
|
|
* the assembler.
|
|
*/
|
|
CodeCursor cg(a, src);
|
|
assert(isSmashable(a.code.frontier, kJmpLen));
|
|
if (dest > src && dest - src <= kJmpLen) {
|
|
assert(!isCall);
|
|
a. emitNop(dest - src);
|
|
} else if (!isCall) {
|
|
a. jmp(dest);
|
|
} else {
|
|
a. call(dest);
|
|
}
|
|
}
|
|
|
|
void TranslatorX64::protectCode() {
|
|
mprotect(tx64->ahot.code.base,
|
|
tx64->astubs.code.base - tx64->ahot.code.base +
|
|
tx64->astubs.code.size, PROT_READ | PROT_EXEC);
|
|
|
|
}
|
|
|
|
void TranslatorX64::unprotectCode() {
|
|
mprotect(tx64->ahot.code.base,
|
|
tx64->astubs.code.base - tx64->ahot.code.base +
|
|
tx64->astubs.code.size,
|
|
PROT_READ | PROT_WRITE | PROT_EXEC);
|
|
}
|
|
|
|
void
|
|
TranslatorX64::emitStackCheck(int funcDepth, Offset pc) {
|
|
funcDepth += kStackCheckPadding * sizeof(Cell);
|
|
|
|
uint64_t stackMask = cellsToBytes(RuntimeOption::EvalVMStackElms) - 1;
|
|
a. mov_reg64_reg64(rVmSp, rAsm); // copy to destroy
|
|
a. and_imm64_reg64(stackMask, rAsm);
|
|
a. sub_imm64_reg64(funcDepth + Stack::sSurprisePageSize, rAsm);
|
|
assert(m_stackOverflowHelper);
|
|
a. jl(m_stackOverflowHelper); // Unlikely branch to failure.
|
|
// Success.
|
|
}
|
|
|
|
// Tests the surprise flags for the current thread. Should be used
|
|
// before a jnz to surprise handling code.
|
|
void
|
|
TranslatorX64::emitTestSurpriseFlags(Asm& a) {
|
|
static_assert(RequestInjectionData::LastFlag < (1 << 8),
|
|
"Translator assumes RequestInjectionFlags fit in one byte");
|
|
a. testb((int8_t)0xff, rVmTl[TargetCache::kConditionFlagsOff]);
|
|
}
|
|
|
|
void
|
|
TranslatorX64::emitCheckSurpriseFlagsEnter(bool inTracelet, Fixup fixup) {
|
|
emitTestSurpriseFlags(a);
|
|
{
|
|
UnlikelyIfBlock ifTracer(CC_NZ, a, astubs);
|
|
if (false) { // typecheck
|
|
const ActRec* ar = nullptr;
|
|
functionEnterHelper(ar);
|
|
}
|
|
astubs.mov_reg64_reg64(rVmFp, argNumToRegName[0]);
|
|
emitCall(astubs, (TCA)&functionEnterHelper);
|
|
if (inTracelet) {
|
|
recordSyncPoint(astubs, fixup.m_pcOffset, fixup.m_spOffset);
|
|
} else {
|
|
// If we're being called while generating a func prologue, we
|
|
// have to record the fixup directly in the fixup map instead of
|
|
// going through m_pendingFixups like normal.
|
|
m_fixupMap.recordFixup(astubs.code.frontier, fixup);
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
TranslatorX64::setArgInActRec(ActRec* ar, int argNum, uint64_t datum,
|
|
DataType t) {
|
|
TypedValue* tv =
|
|
(TypedValue*)(uintptr_t(ar) - (argNum+1) * sizeof(TypedValue));
|
|
tv->m_data.num = datum;
|
|
tv->m_type = t;
|
|
}
|
|
|
|
int
|
|
TranslatorX64::shuffleArgsForMagicCall(ActRec* ar) {
|
|
if (!ar->hasInvName()) {
|
|
return 0;
|
|
}
|
|
const Func* f UNUSED = ar->m_func;
|
|
f->validate();
|
|
assert(f->name()->isame(s___call.get())
|
|
|| f->name()->isame(s___callStatic.get()));
|
|
assert(f->numParams() == 2);
|
|
TRACE(1, "shuffleArgsForMagicCall: ar %p\n", ar);
|
|
assert(ar->hasInvName());
|
|
StringData* invName = ar->getInvName();
|
|
assert(invName);
|
|
ar->setVarEnv(nullptr);
|
|
int nargs = ar->numArgs();
|
|
// We need to make an array containing all the arguments passed by the
|
|
// caller and put it where the second argument is
|
|
HphpArray* argArray = ArrayData::Make(nargs);
|
|
argArray->incRefCount();
|
|
for (int i = 0; i < nargs; ++i) {
|
|
TypedValue* tv =
|
|
(TypedValue*)(uintptr_t(ar) - (i+1) * sizeof(TypedValue));
|
|
argArray->nvAppend(tv);
|
|
tvRefcountedDecRef(tv);
|
|
}
|
|
// Put invName in the slot for first argument
|
|
setArgInActRec(ar, 0, uint64_t(invName), BitwiseKindOfString);
|
|
// Put argArray in the slot for second argument
|
|
setArgInActRec(ar, 1, uint64_t(argArray), KindOfArray);
|
|
// Fix up ActRec's numArgs
|
|
ar->initNumArgs(2);
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* The standard VMRegAnchor treatment won't work for some cases called
|
|
* during function preludes.
|
|
*
|
|
* The fp sync machinery is fundamentally based on the notion that
|
|
* instruction pointers in the TC are uniquely associated with source
|
|
* HHBC instructions, and that source HHBC instructions are in turn
|
|
* uniquely associated with SP->FP deltas.
|
|
*
|
|
* trimExtraArgs is called from the prologue of the callee.
|
|
* The prologue is 1) still in the caller frame for now,
|
|
* and 2) shared across multiple call sites. 1 means that we have the
|
|
* fp from the caller's frame, and 2 means that this fp is not enough
|
|
* to figure out sp.
|
|
*
|
|
* However, the prologue passes us the callee actRec, whose predecessor
|
|
* has to be the caller. So we can sync sp and fp by ourselves here.
|
|
* Geronimo!
|
|
*/
|
|
static void sync_regstate_to_caller(ActRec* preLive) {
|
|
assert(tl_regState == REGSTATE_DIRTY);
|
|
VMExecutionContext* ec = g_vmContext;
|
|
ec->m_stack.top() = (TypedValue*)preLive - preLive->numArgs();
|
|
ActRec* fp = preLive == ec->m_firstAR ?
|
|
ec->m_nestedVMs.back().m_savedState.fp : (ActRec*)preLive->m_savedRbp;
|
|
ec->m_fp = fp;
|
|
ec->m_pc = fp->m_func->unit()->at(fp->m_func->base() + preLive->m_soff);
|
|
tl_regState = REGSTATE_CLEAN;
|
|
}
|
|
|
|
void
|
|
TranslatorX64::trimExtraArgs(ActRec* ar) {
|
|
assert(!ar->hasInvName());
|
|
|
|
sync_regstate_to_caller(ar);
|
|
const Func* f = ar->m_func;
|
|
int numParams = f->numParams();
|
|
int numArgs = ar->numArgs();
|
|
assert(numArgs > numParams);
|
|
int numExtra = numArgs - numParams;
|
|
|
|
TRACE(1, "trimExtraArgs: %d args, function %s takes only %d, ar %p\n",
|
|
numArgs, f->name()->data(), numParams, ar);
|
|
|
|
if (f->attrs() & AttrMayUseVV) {
|
|
assert(!ar->hasExtraArgs());
|
|
ar->setExtraArgs(ExtraArgs::allocateCopy(
|
|
(TypedValue*)(uintptr_t(ar) - numArgs * sizeof(TypedValue)),
|
|
numArgs - numParams));
|
|
} else {
|
|
// Function is not marked as "MayUseVV", so discard the extra arguments
|
|
TypedValue* tv = (TypedValue*)(uintptr_t(ar) - numArgs*sizeof(TypedValue));
|
|
for (int i = 0; i < numExtra; ++i) {
|
|
tvRefcountedDecRef(tv);
|
|
++tv;
|
|
}
|
|
ar->setNumArgs(numParams);
|
|
}
|
|
|
|
// Only go back to dirty in a non-exception case. (Same reason as
|
|
// above.)
|
|
tl_regState = REGSTATE_DIRTY;
|
|
}
|
|
|
|
TCA
|
|
TranslatorX64::getCallArrayProlog(Func* func) {
|
|
TCA tca = func->getFuncBody();
|
|
if (tca != (TCA)funcBodyHelperThunk) return tca;
|
|
|
|
int numParams = func->numParams();
|
|
std::vector<std::pair<int,Offset> > dvs;
|
|
for (int i = 0; i < numParams; ++i) {
|
|
const Func::ParamInfo& pi = func->params()[i];
|
|
if (pi.hasDefaultValue()) {
|
|
dvs.push_back(std::make_pair(i, pi.funcletOff()));
|
|
}
|
|
}
|
|
if (dvs.size()) {
|
|
LeaseHolder writer(s_writeLease);
|
|
if (!writer) return nullptr;
|
|
tca = func->getFuncBody();
|
|
if (tca != (TCA)funcBodyHelperThunk) return tca;
|
|
tca = a.code.frontier;
|
|
if (dvs.size() == 1) {
|
|
a. cmp_imm32_disp_reg32(dvs[0].first,
|
|
AROFF(m_numArgsAndCtorFlag), rVmFp);
|
|
emitBindJcc(a, CC_LE, SrcKey(func, dvs[0].second));
|
|
emitBindJmp(a, SrcKey(func, func->base()));
|
|
} else {
|
|
a. load_reg64_disp_reg32(rVmFp, AROFF(m_numArgsAndCtorFlag), rax);
|
|
for (unsigned i = 0; i < dvs.size(); i++) {
|
|
a. cmp_imm32_reg32(dvs[i].first, rax);
|
|
emitBindJcc(a, CC_LE, SrcKey(func, dvs[i].second));
|
|
}
|
|
emitBindJmp(a, SrcKey(func, func->base()));
|
|
}
|
|
} else {
|
|
SrcKey sk(func, func->base());
|
|
tca = tx64->getTranslation(TranslArgs(sk, false));
|
|
}
|
|
|
|
return tca;
|
|
}
|
|
|
|
TCA
|
|
TranslatorX64::emitPrologueRedispatch(X64Assembler& a) {
|
|
TCA retval;
|
|
moveToAlign(a);
|
|
retval = a.code.frontier;
|
|
TRACE(1, "HOTSTUB: emitPrologueRedispatch: %lx\n", uintptr_t(a.code.frontier));
|
|
|
|
// We're in the wrong func prologue.
|
|
|
|
assert(kScratchCrossTraceRegs.contains(rax));
|
|
assert(kScratchCrossTraceRegs.contains(rdx));
|
|
assert(kScratchCrossTraceRegs.contains(rcx));
|
|
|
|
// Get the called func in rax
|
|
a. load_reg64_disp_reg64(rStashedAR, AROFF(m_func), rax);
|
|
// Get the number of passed parameters in rdx
|
|
a. load_reg64_disp_reg32(rStashedAR, AROFF(m_numArgsAndCtorFlag), rdx);
|
|
a. and_imm32_reg32(0x7fffffff, rdx);
|
|
// Get the number of declared parameters in rcx
|
|
a. load_reg64_disp_reg32(rax, Func::numParamsOff(), rcx);
|
|
|
|
// If we didn't pass too many args, directly dereference
|
|
// func->m_prologues.
|
|
a. cmp_reg32_reg32(rdx, rcx);
|
|
TCA bToFixedProloguesCheck = a.code.frontier;
|
|
a. jcc8(CC_L, bToFixedProloguesCheck);
|
|
|
|
// cmp $kNumFixedPrologues, %rdx
|
|
// jl numParamsCheck
|
|
TCA actualDispatch = a.code.frontier;
|
|
|
|
// rcx: prologueIdx
|
|
// rax = func->prologues[numParams]
|
|
// jmp rax
|
|
a. loadq (rax[rdx*8 + Func::prologueTableOff()], rax);
|
|
a. jmp (rax);
|
|
a. ud2 ();
|
|
|
|
// Hmm, more parameters passed than the function expected. Did we pass
|
|
// kNumFixedPrologues or more? If not, %rdx is still a perfectly
|
|
// legitimate index into the func prologue table.
|
|
// numParamsCheck:
|
|
// cmp $kNumFixedPrologues, %rcx
|
|
// jl dispatch
|
|
a.patchJcc8(bToFixedProloguesCheck, a.code.frontier); // numParamsCheck:
|
|
a. cmp_imm32_reg32(kNumFixedPrologues, rdx);
|
|
a. jcc8(CC_L, actualDispatch);
|
|
|
|
// Too many gosh-darned parameters passed. Go to numExpected + 1, which
|
|
// is always a "too many params" entry point.
|
|
//
|
|
// mov %rdx, %rcx
|
|
// add $1, %rcx
|
|
// jmp dispatch
|
|
a. load_reg64_disp_index_reg64(rax,
|
|
// %rcx + 1
|
|
Func::prologueTableOff() + sizeof(TCA),
|
|
rcx,
|
|
rax);
|
|
a. jmp(rax);
|
|
a. ud2();
|
|
return retval;
|
|
}
|
|
|
|
// The funcGuard gets skipped and patched by other code, so we have some
|
|
// magic offsets.
|
|
static const int kFuncMovImm = 6; // Offset to the immediate for 8 byte Func*
|
|
static const int kFuncCmpImm = 4; // Offset to the immediate for 4 byte Func*
|
|
static const int kFuncGuardLen = 23;
|
|
static const int kFuncGuardShortLen = 14;
|
|
|
|
template<typename T>
|
|
static T*
|
|
funcPrologToGuardImm(TCA prolog) {
|
|
assert(sizeof(T) == 4 || sizeof(T) == 8);
|
|
T* retval = (T*)(prolog - (sizeof(T) == 8 ?
|
|
kFuncGuardLen - kFuncMovImm :
|
|
kFuncGuardShortLen - kFuncCmpImm));
|
|
// We padded these so the immediate would fit inside a cache line
|
|
assert(((uintptr_t(retval) ^ (uintptr_t(retval + 1) - 1)) &
|
|
~(kX64CacheLineSize - 1)) == 0);
|
|
|
|
return retval;
|
|
}
|
|
|
|
static inline bool
|
|
funcPrologHasGuard(TCA prolog, const Func* func) {
|
|
intptr_t iptr = uintptr_t(func);
|
|
if (deltaFits(iptr, sz::dword)) {
|
|
return *funcPrologToGuardImm<int32_t>(prolog) == iptr;
|
|
}
|
|
return *funcPrologToGuardImm<int64_t>(prolog) == iptr;
|
|
}
|
|
|
|
static TCA
|
|
funcPrologToGuard(TCA prolog, const Func* func) {
|
|
if (!prolog || prolog == (TCA)fcallHelperThunk) return prolog;
|
|
return prolog -
|
|
(deltaFits(uintptr_t(func), sz::dword) ?
|
|
kFuncGuardShortLen :
|
|
kFuncGuardLen);
|
|
}
|
|
|
|
TCA
|
|
TranslatorX64::emitFuncGuard(X64Assembler& a, const Func* func) {
|
|
assert(kScratchCrossTraceRegs.contains(rax));
|
|
assert(kScratchCrossTraceRegs.contains(rdx));
|
|
|
|
const int kAlign = kX64CacheLineSize;
|
|
const int kAlignMask = kAlign - 1;
|
|
int loBits = uintptr_t(a.code.frontier) & kAlignMask;
|
|
int delta, size;
|
|
|
|
// Ensure the immediate is safely smashable
|
|
// the immediate must not cross a qword boundary,
|
|
if (!deltaFits((intptr_t)func, sz::dword)) {
|
|
size = 8;
|
|
delta = loBits + kFuncMovImm;
|
|
} else {
|
|
size = 4;
|
|
delta = loBits + kFuncCmpImm;
|
|
}
|
|
|
|
delta = (delta + size - 1) & kAlignMask;
|
|
if (delta < size - 1) {
|
|
a.emitNop(size - 1 - delta);
|
|
}
|
|
|
|
TCA aStart DEBUG_ONLY = a.code.frontier;
|
|
if (!deltaFits((intptr_t)func, sz::dword)) {
|
|
a. load_reg64_disp_reg64(rStashedAR, AROFF(m_func), rax);
|
|
/*
|
|
Although func doesnt fit in a signed 32-bit immediate, it may still
|
|
fit in an unsigned one. Rather than deal with yet another case
|
|
(which only happens when we disable jemalloc) just force it to
|
|
be an 8-byte immediate, and patch it up afterwards.
|
|
*/
|
|
a. mov_imm64_reg(0xdeadbeeffeedface, rdx);
|
|
assert(((uint64_t*)a.code.frontier)[-1] == 0xdeadbeeffeedface);
|
|
((uint64_t*)a.code.frontier)[-1] = uintptr_t(func);
|
|
a. cmp_reg64_reg64(rax, rdx);
|
|
} else {
|
|
a. cmp_imm32_disp_reg32(uint64_t(func), AROFF(m_func), rStashedAR);
|
|
}
|
|
|
|
assert(m_funcPrologueRedispatch);
|
|
|
|
a. jnz(m_funcPrologueRedispatch);
|
|
assert(funcPrologToGuard(a.code.frontier, func) == aStart);
|
|
assert(funcPrologHasGuard(a.code.frontier, func));
|
|
return a.code.frontier;
|
|
}
|
|
|
|
/*
|
|
* funcPrologue --
|
|
*
|
|
* Given a callee and a number of args, match up to the callee's
|
|
* argument expectations and dispatch.
|
|
*
|
|
* Call/return hand-shaking is a bit funny initially. At translation time,
|
|
* we don't necessarily know what function we're calling. For instance,
|
|
*
|
|
* f(g());
|
|
*
|
|
* Will lead to a set of basic blocks like:
|
|
*
|
|
* b1: pushfuncd "f"
|
|
* pushfuncd "g"
|
|
* fcall
|
|
* b2: fcall
|
|
*
|
|
* The fcallc labelled "b2" above is not statically bindable in our
|
|
* execution model.
|
|
*
|
|
* We decouple the call work into a per-callsite portion, responsible
|
|
* for recording the return address, and a per-(callee, numArgs) portion,
|
|
* responsible for fixing up arguments and dispatching to remaining
|
|
* code. We call the per-callee portion a "prologue."
|
|
*
|
|
* Also, we are called from two distinct environments. From REQ_BIND_CALL,
|
|
* we're running "between" basic blocks, with all VM registers sync'ed.
|
|
* However, we're also called in the middle of basic blocks, when dropping
|
|
* entries into func->m_prologues. So don't go around using the
|
|
* translation-time values of vmfp()/vmsp(), since they have an
|
|
* unpredictable relationship to the source.
|
|
*/
|
|
bool
|
|
TranslatorX64::checkCachedPrologue(const Func* func, int paramIdx,
|
|
TCA& prologue) const {
|
|
prologue = (TCA)func->getPrologue(paramIdx);
|
|
if (prologue != (TCA)fcallHelperThunk && !s_replaceInFlight) {
|
|
TRACE(1, "cached prologue %s(%d) -> cached %p\n",
|
|
func->fullName()->data(), paramIdx, prologue);
|
|
assert(isValidCodeAddress(prologue));
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// pops the return address pushed by fcall and stores it into the actrec
|
|
void
|
|
TranslatorX64::emitPopRetIntoActRec(Asm& a) {
|
|
a. pop (rStashedAR[AROFF(m_savedRip)]);
|
|
}
|
|
|
|
static void interp_set_regs(ActRec* ar, Cell* sp, Offset pcOff) {
|
|
assert(tl_regState == REGSTATE_DIRTY);
|
|
tl_regState = REGSTATE_CLEAN;
|
|
vmfp() = (Cell*)ar;
|
|
vmsp() = sp;
|
|
vmpc() = curUnit()->at(pcOff);
|
|
}
|
|
|
|
TCA
|
|
TranslatorX64::funcPrologue(Func* func, int nPassed, ActRec* ar) {
|
|
func->validate();
|
|
TRACE(1, "funcPrologue %s(%d)\n", func->fullName()->data(), nPassed);
|
|
int numParams = func->numParams();
|
|
int paramIndex = nPassed <= numParams ? nPassed : numParams + 1;
|
|
|
|
bool funcIsMagic = func->isMagic();
|
|
|
|
// Do a quick test before grabbing the write lease
|
|
TCA prologue;
|
|
if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
|
|
if (func->isClonedClosure()) {
|
|
assert(ar);
|
|
const Func::ParamInfoVec& paramInfo = func->params();
|
|
Offset entry = func->base();
|
|
for (int i = nPassed; i < numParams; ++i) {
|
|
const Func::ParamInfo& pi = paramInfo[i];
|
|
if (pi.hasDefaultValue()) {
|
|
entry = pi.funcletOff();
|
|
break;
|
|
}
|
|
}
|
|
interp_set_regs(ar, (Cell*)ar - func->numSlotsInFrame(), entry);
|
|
SrcKey funcBody(func, entry);
|
|
TCA tca = getTranslation(TranslArgs(funcBody, false));
|
|
tl_regState = REGSTATE_DIRTY;
|
|
if (tca) {
|
|
// racy, but ok...
|
|
func->setPrologue(paramIndex, tca);
|
|
}
|
|
return tca;
|
|
}
|
|
|
|
// If the translator is getting replaced out from under us, refuse to
|
|
// provide a prologue; we don't know whether this request is running on the
|
|
// old or new context.
|
|
LeaseHolder writer(s_writeLease);
|
|
if (!writer || s_replaceInFlight) return nullptr;
|
|
// Double check the prologue array now that we have the write lease
|
|
// in case another thread snuck in and set the prologue already.
|
|
if (checkCachedPrologue(func, paramIndex, prologue)) return prologue;
|
|
|
|
AHotSelector ahs(this, func->attrs() & AttrHot);
|
|
|
|
SpaceRecorder sr("_FuncPrologue", a);
|
|
// If we're close to a cache line boundary, just burn some space to
|
|
// try to keep the func and its body on fewer total lines.
|
|
if (((uintptr_t)a.code.frontier & kX64CacheLineMask) >= 32) {
|
|
moveToAlign(a, kX64CacheLineSize);
|
|
}
|
|
// Careful: this isn't necessarily the real entry point. For funcIsMagic
|
|
// prologues, this is just a possible prologue.
|
|
TCA aStart = a.code.frontier;
|
|
TCA start = aStart;
|
|
TCA stubStart = astubs.code.frontier;
|
|
|
|
// Guard: we're in the right callee. This happens in magicStart for
|
|
// magic callees.
|
|
if (!funcIsMagic) {
|
|
start = aStart = emitFuncGuard(a, func);
|
|
}
|
|
|
|
emitRB(a, RBTypeFuncPrologueTry, func->fullName()->data());
|
|
|
|
// NB: We have most of the register file to play with, since we know
|
|
// we're between BB's. So, we hardcode some registers here rather
|
|
// than using the scratch allocator.
|
|
TRACE(2, "funcPrologue: user function: %s\n", func->name()->data());
|
|
|
|
// Add a counter for the translation if requested
|
|
if (RuntimeOption::EvalJitTransCounters) {
|
|
emitTransCounterInc(a);
|
|
}
|
|
|
|
if (!funcIsMagic) {
|
|
emitPopRetIntoActRec(a);
|
|
// entry point for magic methods comes later
|
|
emitRB(a, RBTypeFuncEntry, func->fullName()->data());
|
|
|
|
/*
|
|
* Guard: we have stack enough stack space to complete this
|
|
* function. We omit overflow checks if it is a leaf function
|
|
* that can't use more than kStackCheckLeafPadding cells.
|
|
*/
|
|
auto const needStackCheck =
|
|
!(func->attrs() & AttrPhpLeafFn) ||
|
|
func->maxStackCells() >= kStackCheckLeafPadding;
|
|
if (needStackCheck) {
|
|
emitStackCheck(cellsToBytes(func->maxStackCells()), func->base());
|
|
}
|
|
}
|
|
|
|
SrcKey skFuncBody = emitPrologue(func, nPassed);
|
|
|
|
if (funcIsMagic) {
|
|
// entry points for magic methods is here
|
|
TCA magicStart = emitFuncGuard(a, func);
|
|
emitPopRetIntoActRec(a);
|
|
emitRB(a, RBTypeFuncEntry, func->fullName()->data());
|
|
// Guard: we have stack enough stack space to complete this function.
|
|
emitStackCheck(cellsToBytes(func->maxStackCells()), func->base());
|
|
assert(numParams == 2);
|
|
// Special __call prologue
|
|
a. mov_reg64_reg64(rStashedAR, argNumToRegName[0]);
|
|
emitCall(a, TCA(TranslatorX64::shuffleArgsForMagicCall));
|
|
// if shuffleArgs returns 0, that means this was not a magic call
|
|
// and we should proceed to a prologue specialized for nPassed;
|
|
// otherwise, proceed to a prologue specialized for nPassed==numParams (2).
|
|
if (nPassed == 2) {
|
|
a.jmp(start);
|
|
} else {
|
|
a.test_reg64_reg64(rax, rax);
|
|
// z ==> not a magic call, go to prologue for nPassed
|
|
if (deltaFits(start - (a.code.frontier + kJcc8Len), sz::byte)) {
|
|
a.jcc8(CC_Z, start);
|
|
} else {
|
|
a.jcc(CC_Z, start);
|
|
}
|
|
// this was a magic call
|
|
// nPassed == 2
|
|
// Fix up hardware stack pointer
|
|
nPassed = 2;
|
|
emitLea(a, rStashedAR, -cellsToBytes(nPassed), rVmSp);
|
|
// Optimization TODO: Reuse the prologue for args == 2
|
|
emitPrologue(func, nPassed);
|
|
}
|
|
start = magicStart;
|
|
}
|
|
assert(funcPrologHasGuard(start, func));
|
|
TRACE(2, "funcPrologue tx64 %p %s(%d) setting prologue %p\n",
|
|
this, func->fullName()->data(), nPassed, start);
|
|
assert(isValidCodeAddress(start));
|
|
func->setPrologue(paramIndex, start);
|
|
|
|
addTranslation(TransRec(skFuncBody, func->unit()->md5(),
|
|
TransProlog, aStart, a.code.frontier - aStart,
|
|
stubStart, astubs.code.frontier - stubStart));
|
|
|
|
recordGdbTranslation(skFuncBody, func,
|
|
a, aStart,
|
|
false, true);
|
|
recordBCInstr(OpFuncPrologue, a, start);
|
|
|
|
return start;
|
|
}
|
|
|
|
static void raiseMissingArgument(const char* name, int expected, int got) {
|
|
if (expected == 1) {
|
|
raise_warning(Strings::MISSING_ARGUMENT, name, got);
|
|
} else {
|
|
raise_warning(Strings::MISSING_ARGUMENTS, name, expected, got);
|
|
}
|
|
}
|
|
|
|
SrcKey
|
|
TranslatorX64::emitPrologue(Func* func, int nPassed) {
|
|
int numParams = func->numParams();
|
|
const Func::ParamInfoVec& paramInfo = func->params();
|
|
|
|
Offset dvInitializer = InvalidAbsoluteOffset;
|
|
|
|
assert(IMPLIES(func->isGenerator(), nPassed == numParams));
|
|
if (nPassed > numParams) {
|
|
// Too many args; a weird case, so just callout. Stash ar
|
|
// somewhere callee-saved.
|
|
if (false) { // typecheck
|
|
TranslatorX64::trimExtraArgs((ActRec*)nullptr);
|
|
}
|
|
a. mov_reg64_reg64(rStashedAR, argNumToRegName[0]);
|
|
emitCall(a, TCA(TranslatorX64::trimExtraArgs));
|
|
// We'll fix rVmSp below.
|
|
} else if (nPassed < numParams) {
|
|
// Figure out which, if any, default value initializer to go to
|
|
for (int i = nPassed; i < numParams; ++i) {
|
|
const Func::ParamInfo& pi = paramInfo[i];
|
|
if (pi.hasDefaultValue()) {
|
|
dvInitializer = pi.funcletOff();
|
|
break;
|
|
}
|
|
}
|
|
TRACE(1, "Only have %d of %d args; getting dvFunclet\n",
|
|
nPassed, numParams);
|
|
emitImmReg(a, nPassed, rax);
|
|
// do { *(--rVmSp) = NULL; nPassed++; } while (nPassed < numParams);
|
|
// This should be an unusual case, so optimize for code density
|
|
// rather than execution speed; i.e., don't unroll the loop.
|
|
TCA loopTop = a.code.frontier;
|
|
a. sub_imm32_reg64(sizeof(Cell), rVmSp);
|
|
a. incl(eax);
|
|
emitStoreUninitNull(a, 0, rVmSp);
|
|
a. cmp_imm32_reg32(numParams, rax);
|
|
a. jcc8(CC_L, loopTop);
|
|
}
|
|
|
|
// Entry point for numParams == nPassed is here.
|
|
// Args are kosher. Frame linkage: set fp = ar.
|
|
a. mov_reg64_reg64(rStashedAR, rVmFp);
|
|
|
|
int numLocals = numParams;
|
|
if (func->isClosureBody()) {
|
|
int numUseVars = func->cls()->numDeclProperties();
|
|
|
|
emitLea(a, rVmFp, -cellsToBytes(numParams), rVmSp);
|
|
|
|
PhysReg rClosure = rcx;
|
|
a. loadq(rVmFp[AROFF(m_this)], rClosure);
|
|
|
|
// Swap in the $this or late bound class
|
|
a. loadq(rClosure[c_Closure::thisOffset()], rAsm);
|
|
a. storeq(rAsm, rVmFp[AROFF(m_this)]);
|
|
|
|
a. shrq(1, rAsm);
|
|
if (func->attrs() & AttrStatic) {
|
|
UnlikelyIfBlock ifRealThis(CC_NBE, a, astubs);
|
|
astubs.shlq(1, rAsm);
|
|
emitIncRef(astubs, rAsm, KindOfObject);
|
|
} else {
|
|
JccBlock<CC_BE> ifRealThis(a);
|
|
a.shlq(1, rAsm);
|
|
emitIncRef(rAsm, KindOfObject);
|
|
}
|
|
|
|
// Put in the correct context
|
|
a. loadq(rClosure[c_Closure::funcOffset()], rAsm);
|
|
a. storeq(rAsm, rVmFp[AROFF(m_func)]);
|
|
|
|
// Copy in all the use vars
|
|
int baseUVOffset = sizeof(ObjectData) + func->cls()->builtinPropSize();
|
|
for (int i = 0; i < numUseVars + 1; i++) {
|
|
int spOffset = -cellsToBytes(i+1);
|
|
|
|
if (i == 0) {
|
|
// The closure is the first local.
|
|
// We don't incref because it used to be $this
|
|
// and now it is a local, so they cancel out
|
|
emitStoreTypedValue(a, KindOfObject, rClosure, spOffset, rVmSp);
|
|
continue;
|
|
}
|
|
|
|
int uvOffset = baseUVOffset + cellsToBytes(i-1);
|
|
|
|
emitCopyTo(a, rClosure, uvOffset, rVmSp, spOffset, rAsm);
|
|
emitIncRefGenericRegSafe(rVmSp, spOffset, rAsm);
|
|
}
|
|
|
|
numLocals += numUseVars + 1;
|
|
}
|
|
|
|
// We're in the callee frame; initialize locals. Unroll the loop all
|
|
// the way if there are a modest number of locals to update;
|
|
// otherwise, do it in a compact loop. If we're in a generator body,
|
|
// named locals will be initialized by UnpackCont so we can leave
|
|
// them alone here.
|
|
int numUninitLocals = func->numLocals() - numLocals;
|
|
assert(numUninitLocals >= 0);
|
|
if (numUninitLocals > 0 && !func->isGenerator()) {
|
|
SpaceRecorder sr("_InitializeLocals", a);
|
|
|
|
// If there are too many locals, then emitting a loop to initialize locals
|
|
// is more compact, rather than emitting a slew of movs inline.
|
|
if (numUninitLocals > kLocalsToInitializeInline) {
|
|
PhysReg loopReg = rcx;
|
|
|
|
// rVmFp + rcx points to the count/type fields of the TypedValue we're
|
|
// about to write to.
|
|
int loopStart = -func->numLocals() * sizeof(TypedValue) + TVOFF(m_type);
|
|
int loopEnd = -numLocals * sizeof(TypedValue) + TVOFF(m_type);
|
|
|
|
emitImmReg(a, loopStart, loopReg);
|
|
emitImmReg(a, KindOfUninit, rdx);
|
|
|
|
TCA topOfLoop = a.code.frontier;
|
|
// do {
|
|
// rVmFp[loopReg].m_type = KindOfUninit;
|
|
// } while(++loopReg != loopEnd);
|
|
|
|
emitStoreTVType(a, edx, rVmFp[loopReg]);
|
|
a. addq (sizeof(Cell), loopReg);
|
|
a. cmpq (loopEnd, loopReg);
|
|
a. jcc8 (CC_NE, topOfLoop);
|
|
} else {
|
|
PhysReg base;
|
|
int disp, k;
|
|
static_assert(KindOfUninit == 0, "");
|
|
if (numParams < func->numLocals()) {
|
|
a.xorl (eax, eax);
|
|
}
|
|
for (k = numLocals; k < func->numLocals(); ++k) {
|
|
locToRegDisp(Location(Location::Local, k), &base, &disp, func);
|
|
emitStoreTVType(a, eax, base[disp + TVOFF(m_type)]);
|
|
}
|
|
}
|
|
}
|
|
|
|
const Opcode* destPC = func->unit()->entry() + func->base();
|
|
if (dvInitializer != InvalidAbsoluteOffset) {
|
|
// dispatch to funclet.
|
|
destPC = func->unit()->entry() + dvInitializer;
|
|
}
|
|
SrcKey funcBody(func, destPC);
|
|
|
|
// Move rVmSp to the right place: just past all locals
|
|
int frameCells = func->numSlotsInFrame();
|
|
if (func->isGenerator()) {
|
|
frameCells = 0;
|
|
} else {
|
|
emitLea(a, rVmFp, -cellsToBytes(frameCells), rVmSp);
|
|
}
|
|
|
|
Fixup fixup(funcBody.offset() - func->base(), frameCells);
|
|
|
|
// Emit warnings for any missing arguments
|
|
if (!func->info()) {
|
|
for (int i = nPassed; i < numParams; ++i) {
|
|
if (paramInfo[i].funcletOff() == InvalidAbsoluteOffset) {
|
|
emitImmReg(a, (intptr_t)func->name()->data(), argNumToRegName[0]);
|
|
emitImmReg(a, numParams, argNumToRegName[1]);
|
|
emitImmReg(a, i, argNumToRegName[2]);
|
|
emitCall(a, (TCA)raiseMissingArgument);
|
|
m_fixupMap.recordFixup(a.code.frontier, fixup);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check surprise flags in the same place as the interpreter: after
|
|
// setting up the callee's frame but before executing any of its
|
|
// code
|
|
emitCheckSurpriseFlagsEnter(false, fixup);
|
|
|
|
if (func->isClosureBody() && func->cls()) {
|
|
int entry = nPassed <= numParams ? nPassed : numParams + 1;
|
|
// Relying on rStashedAR == rVmFp here
|
|
a. loadq (rStashedAR[AROFF(m_func)], rax);
|
|
a. loadq (rax[Func::prologueTableOff() + sizeof(TCA)*entry], rax);
|
|
a. jmp (rax);
|
|
} else {
|
|
emitBindJmp(funcBody);
|
|
}
|
|
return funcBody;
|
|
}
|
|
|
|
static bool
|
|
isNativeImplCall(const Func* funcd, int numArgs) {
|
|
return funcd && funcd->info() && numArgs == funcd->numParams();
|
|
}
|
|
|
|
int32_t // returns the amount by which rVmSp should be adjusted
|
|
TranslatorX64::emitBindCall(SrcKey srcKey, const Func* funcd, int numArgs) {
|
|
// If this is a call to a builtin and we don't need any argument
|
|
// munging, we can skip the prologue system and do it inline.
|
|
if (isNativeImplCall(funcd, numArgs)) {
|
|
StoreImmPatcher patchIP(a, (uint64_t)a.code.frontier, reg::rax,
|
|
cellsToBytes(numArgs) + AROFF(m_savedRip),
|
|
rVmSp);
|
|
assert(funcd->numLocals() == funcd->numParams());
|
|
assert(funcd->numIterators() == 0);
|
|
emitLea(a, rVmSp, cellsToBytes(numArgs), rVmFp);
|
|
emitCheckSurpriseFlagsEnter(true, Fixup(0, numArgs));
|
|
// rVmSp is already correctly adjusted, because there's no locals
|
|
// other than the arguments passed.
|
|
auto retval = emitNativeImpl(funcd, false /* don't jump to return */);
|
|
patchIP.patch(uint64_t(a.code.frontier));
|
|
return retval;
|
|
}
|
|
if (debug) {
|
|
a. storeq (kUninitializedRIP,
|
|
rVmSp[cellsToBytes(numArgs) + AROFF(m_savedRip)]);
|
|
}
|
|
// Stash callee's rVmFp into rStashedAR for the callee's prologue
|
|
emitLea(a, rVmSp, cellsToBytes(numArgs), rStashedAR);
|
|
emitBindCallHelper(srcKey, funcd, numArgs);
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
TranslatorX64::emitBindCallHelper(SrcKey srcKey,
|
|
const Func* funcd,
|
|
int numArgs) {
|
|
// Whatever prologue we're branching to will check at runtime that we
|
|
// went to the right Func*, correcting if necessary. We treat the first
|
|
// Func we encounter as a decent prediction. Make space to burn in a
|
|
// TCA.
|
|
ReqBindCall* req = m_globalData.alloc<ReqBindCall>();
|
|
prepareForSmash(a, kCallLen);
|
|
TCA toSmash = a.code.frontier;
|
|
a. call(astubs.code.frontier);
|
|
|
|
astubs. mov_reg64_reg64(rStashedAR, serviceReqArgRegs[1]);
|
|
emitPopRetIntoActRec(astubs);
|
|
emitServiceReq(SRFlags::Persistent, REQ_BIND_CALL, 1ull, req);
|
|
|
|
TRACE(1, "will bind static call: tca %p, this %p, funcd %p, astubs %p\n",
|
|
toSmash, this, funcd, astubs.code.frontier);
|
|
req->m_toSmash = toSmash;
|
|
req->m_nArgs = numArgs;
|
|
req->m_sourceInstr = srcKey;
|
|
req->m_isImmutable = (bool)funcd;
|
|
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* NativeImpl is a special operation in the sense that it must be the
|
|
* only opcode in a function body, and also functions as the return.
|
|
*
|
|
* if emitSavedRIPReturn is false, it returns the amount by which
|
|
* rVmSp should be adjusted, otherwise, it emits code to perform
|
|
* the adjustment (this allows us to combine updates to rVmSp)
|
|
*/
|
|
int32_t TranslatorX64::emitNativeImpl(const Func* func,
|
|
bool emitSavedRIPReturn) {
|
|
BuiltinFunction builtinFuncPtr = func->builtinFuncPtr();
|
|
if (false) { // typecheck
|
|
ActRec* ar = nullptr;
|
|
builtinFuncPtr(ar);
|
|
}
|
|
|
|
TRACE(2, "calling builtin preClass %p func %p\n", func->preClass(),
|
|
builtinFuncPtr);
|
|
/*
|
|
* Call the native implementation. This will free the locals for us in the
|
|
* normal case. In the case where an exception is thrown, the VM unwinder
|
|
* will handle it for us.
|
|
*/
|
|
a. mov_reg64_reg64(rVmFp, argNumToRegName[0]);
|
|
if (eagerRecord(func)) {
|
|
emitEagerSyncPoint(a, func->getEntry(), 0);
|
|
}
|
|
emitCall(a, (TCA)builtinFuncPtr);
|
|
|
|
/*
|
|
* We're sometimes calling this while curFunc() isn't really the
|
|
* builtin---make sure to properly record the sync point as if we
|
|
* are inside the builtin.
|
|
*
|
|
* The assumption here is that for builtins, the generated func
|
|
* contains only a single opcode (NativeImpl), and there are no
|
|
* non-argument locals.
|
|
*/
|
|
assert(func->numIterators() == 0 && func->isBuiltin());
|
|
assert(func->numLocals() == func->numParams());
|
|
assert(*func->getEntry() == OpNativeImpl);
|
|
assert(instrLen(func->getEntry()) == func->past() - func->base());
|
|
Offset pcOffset = 0; // NativeImpl is the only instruction in the func
|
|
Offset stackOff = func->numLocals(); // Builtin stubs have no
|
|
// non-arg locals
|
|
recordSyncPoint(a, pcOffset, stackOff);
|
|
|
|
if (emitSavedRIPReturn) {
|
|
// push the return address to get ready to ret.
|
|
a. push (rVmFp[AROFF(m_savedRip)]);
|
|
}
|
|
|
|
/*
|
|
* The native implementation already put the return value on the
|
|
* stack for us, and handled cleaning up the arguments. We have to
|
|
* update the frame pointer and the stack pointer, and load the
|
|
* return value into the return register so the trace we are
|
|
* returning to has it where it expects.
|
|
*
|
|
* TODO(#1273094): we should probably modify the actual builtins to
|
|
* return values via registers (rax:edx) using the C ABI and do a
|
|
* reg-to-reg move.
|
|
*/
|
|
int nLocalCells = func->numSlotsInFrame();
|
|
if (emitSavedRIPReturn) {
|
|
a. add_imm64_reg64(sizeof(ActRec) + cellsToBytes(nLocalCells-1), rVmSp);
|
|
}
|
|
a. load_reg64_disp_reg64(rVmFp, AROFF(m_savedRbp), rVmFp);
|
|
|
|
emitRB(a, RBTypeFuncExit, func->fullName()->data());
|
|
if (emitSavedRIPReturn) {
|
|
a. ret();
|
|
translator_not_reached(a);
|
|
return 0;
|
|
}
|
|
return sizeof(ActRec) + cellsToBytes(nLocalCells-1);
|
|
}
|
|
|
|
// for documentation see bindJmpccFirst below
|
|
void
|
|
TranslatorX64::emitCondJmp(SrcKey skTaken, SrcKey skNotTaken,
|
|
ConditionCode cc) {
|
|
// should be true for SrcKeys generated via OpJmpZ/OpJmpNZ
|
|
assert(skTaken.getFuncId() == skNotTaken.getFuncId());
|
|
|
|
// reserve space for a smashable jnz/jmp pair; both initially point
|
|
// to our stub.
|
|
prepareForTestAndSmash(a, 0, kAlignJccAndJmp);
|
|
TCA old = a.code.frontier;
|
|
TCA stub = astubs.code.frontier;
|
|
|
|
// begin code for the stub
|
|
|
|
// We need to be careful here, as we are passing an extra paramter to
|
|
// REQ_BIND_JMPCC_FIRST. However we can't pass this parameter via
|
|
// emitServiceReq because that only supports constants/immediates, so
|
|
// compute the last argument via setcc.
|
|
astubs.setcc(cc, rbyte(serviceReqArgRegs[4]));
|
|
emitServiceReq(SRFlags::Persistent, REQ_BIND_JMPCC_FIRST, 4ull,
|
|
old,
|
|
uint64_t(skTaken.offset()),
|
|
uint64_t(skNotTaken.offset()),
|
|
uint64_t(cc));
|
|
|
|
a.jcc(cc, stub); // MUST use 4-byte immediate form
|
|
a.jmp(stub); // MUST use 4-byte immediate form
|
|
}
|
|
|
|
/*
|
|
* bindJmp --
|
|
*
|
|
* Runtime service handler that patches a jmp to the translation of
|
|
* u:dest from toSmash.
|
|
*/
|
|
TCA
|
|
TranslatorX64::bindJmp(TCA toSmash, SrcKey destSk,
|
|
ServiceRequest req, bool& smashed) {
|
|
TCA tDest = getTranslation(
|
|
TranslArgs(destSk, false).interp(req == REQ_BIND_JMP_NO_IR)
|
|
.src(toSmash));
|
|
if (!tDest) return nullptr;
|
|
LeaseHolder writer(s_writeLease);
|
|
if (!writer) return tDest;
|
|
smashed = true;
|
|
SrcRec* sr = getSrcRec(destSk);
|
|
if (req == REQ_BIND_ADDR) {
|
|
sr->chainFrom(IncomingBranch::addr(reinterpret_cast<TCA*>(toSmash)));
|
|
} else if (req == REQ_BIND_JCC) {
|
|
sr->chainFrom(IncomingBranch::jccFrom(toSmash));
|
|
} else {
|
|
sr->chainFrom(IncomingBranch::jmpFrom(toSmash));
|
|
}
|
|
return tDest;
|
|
}
|
|
|
|
/*
|
|
* When we end a tracelet with a conditional jump, emitCondJmp first emits:
|
|
*
|
|
* 1: j<CC> stubJmpccFirst
|
|
* jmp stubJmpccFirst
|
|
*
|
|
* Our "taken" argument tells us whether the branch at 1: was taken or
|
|
* not; and therefore which of offTaken and offNotTaken to continue executing.
|
|
* If we did take the branch, we now rewrite the code so that the branch is
|
|
* straightened. This predicts that subsequent executions will go the same way
|
|
* as the first execution.
|
|
*
|
|
* jn<CC> stubJmpccSecond:offNotTaken
|
|
* nop5 ; fallthru, or jmp if there's already a translation.
|
|
* offTaken:
|
|
*
|
|
* If we did not take the branch, we leave the sense of the condition
|
|
* intact, while patching it up to go to the unexplored code:
|
|
*
|
|
* j<CC> stubJmpccSecond:offTaken
|
|
* nop5
|
|
* offNotTaken:
|
|
*/
|
|
TCA
|
|
TranslatorX64::bindJmpccFirst(TCA toSmash,
|
|
Offset offTaken, Offset offNotTaken,
|
|
bool taken,
|
|
ConditionCode cc,
|
|
bool& smashed) {
|
|
const Func* f = curFunc();
|
|
LeaseHolder writer(s_writeLease);
|
|
if (!writer) return nullptr;
|
|
Offset offWillExplore = taken ? offTaken : offNotTaken;
|
|
Offset offWillDefer = taken ? offNotTaken : offTaken;
|
|
SrcKey dest(f, offWillExplore);
|
|
TRACE(3, "bindJmpccFirst: explored %d, will defer %d; overwriting cc%02x "
|
|
"taken %d\n",
|
|
offWillExplore, offWillDefer, cc, taken);
|
|
|
|
// We want the branch to point to whichever side has not been explored
|
|
// yet.
|
|
if (taken) cc = ccNegate(cc);
|
|
TCA stub =
|
|
emitServiceReq(SRFlags::None, REQ_BIND_JMPCC_SECOND, 3,
|
|
toSmash, uint64_t(offWillDefer), uint64_t(cc));
|
|
|
|
Asm& as = getAsmFor(toSmash);
|
|
// Its not clear where chainFrom should go to if as is astubs
|
|
assert(&as != &astubs);
|
|
|
|
// can we just directly fall through?
|
|
// a jmp + jz takes 5 + 6 = 11 bytes
|
|
bool fallThru = toSmash + kJmpccLen + kJmpLen == as.code.frontier &&
|
|
!m_srcDB.find(dest);
|
|
|
|
TCA tDest;
|
|
tDest = getTranslation(TranslArgs(dest, !fallThru).src(toSmash));
|
|
if (!tDest) {
|
|
return 0;
|
|
}
|
|
smashed = true;
|
|
assert(s_writeLease.amOwner());
|
|
/*
|
|
* Roll over the jcc and the jmp/fallthru. E.g., from:
|
|
*
|
|
* toSmash: jcc <jmpccFirstStub>
|
|
* toSmash+6: jmp <jmpccFirstStub>
|
|
* toSmash+11: <probably the new translation == tdest>
|
|
*
|
|
* to:
|
|
*
|
|
* toSmash: j[n]z <jmpccSecondStub>
|
|
* toSmash+6: nop5
|
|
* toSmash+11: newHotness
|
|
*/
|
|
CodeCursor cg(as, toSmash);
|
|
as.jcc(cc, stub);
|
|
getSrcRec(dest)->chainFrom(IncomingBranch::jmpFrom(as.code.frontier));
|
|
TRACE(5, "bindJmpccFirst: overwrote with cc%02x taken %d\n", cc, taken);
|
|
return tDest;
|
|
}
|
|
|
|
// smashes a jcc to point to a new destination
|
|
TCA
|
|
TranslatorX64::bindJmpccSecond(TCA toSmash, const Offset off,
|
|
ConditionCode cc, bool& smashed) {
|
|
const Func* f = curFunc();
|
|
SrcKey dest(f, off);
|
|
TCA branch = getTranslation(TranslArgs(dest, true).src(toSmash));
|
|
LeaseHolder writer(s_writeLease, NO_ACQUIRE);
|
|
if (branch && writer.acquire()) {
|
|
smashed = true;
|
|
SrcRec* destRec = getSrcRec(dest);
|
|
destRec->chainFrom(IncomingBranch::jccFrom(toSmash));
|
|
}
|
|
return branch;
|
|
}
|
|
|
|
static void emitJmpOrJcc(X64Assembler& a, ConditionCode cc, TCA addr) {
|
|
if (cc == CC_None) {
|
|
a. jmp(addr);
|
|
} else {
|
|
a. jcc((ConditionCode)cc, addr);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* emitBindJ --
|
|
*
|
|
* Emit code to lazily branch (optionally on condition cc) to the
|
|
* srckey in next.
|
|
* Assumes current basic block is closed (outputs synced, etc.).
|
|
*/
|
|
void
|
|
TranslatorX64::emitBindJ(X64Assembler& _a, ConditionCode cc,
|
|
SrcKey dest, ServiceRequest req) {
|
|
prepareForSmash(_a, cc == CC_None ? (int)kJmpLen : kJmpccLen);
|
|
TCA toSmash = _a.code.frontier;
|
|
if (&_a == &astubs) {
|
|
emitJmpOrJcc(_a, cc, toSmash);
|
|
}
|
|
|
|
TCA sr = emitServiceReq(SRFlags::None, req, 2,
|
|
toSmash, uint64_t(dest.offset()));
|
|
|
|
if (&_a == &astubs) {
|
|
CodeCursor cursor(_a, toSmash);
|
|
emitJmpOrJcc(_a, cc, sr);
|
|
} else {
|
|
emitJmpOrJcc(_a, cc, sr);
|
|
}
|
|
}
|
|
|
|
void
|
|
TranslatorX64::emitBindJcc(X64Assembler& _a, ConditionCode cc,
|
|
SrcKey dest,
|
|
ServiceRequest req /* = REQ_BIND_JCC */) {
|
|
emitBindJ(_a, cc, dest, req);
|
|
}
|
|
|
|
void
|
|
TranslatorX64::emitBindJmp(X64Assembler& _a,
|
|
SrcKey dest,
|
|
ServiceRequest req /* = REQ_BIND_JMP */) {
|
|
emitBindJ(_a, CC_None, dest, req);
|
|
}
|
|
|
|
void
|
|
TranslatorX64::emitBindJmp(SrcKey dest) {
|
|
emitBindJmp(a, dest);
|
|
}
|
|
|
|
void
|
|
TranslatorX64::checkType(X64Assembler& a,
|
|
const Location& l,
|
|
const RuntimeType& rtt,
|
|
SrcRec& fail) {
|
|
// We can get invalid inputs as a side effect of reading invalid
|
|
// items out of BBs we truncate; they don't need guards.
|
|
if (rtt.isVagueValue() || l.isThis()) return;
|
|
|
|
irCheckType(a, l, rtt, fail);
|
|
}
|
|
|
|
void
|
|
TranslatorX64::emitFallbackJmp(SrcRec& dest, ConditionCode cc /* = CC_NZ */) {
|
|
emitFallbackJmp(a, dest, cc);
|
|
}
|
|
|
|
void
|
|
TranslatorX64::emitFallbackJmp(Asm& as, SrcRec& dest,
|
|
ConditionCode cc /* = CC_NZ */) {
|
|
prepareForSmash(as, kJmpccLen);
|
|
dest.emitFallbackJump(as.code.frontier, cc);
|
|
}
|
|
|
|
void
|
|
TranslatorX64::emitFallbackUncondJmp(Asm& as, SrcRec& dest) {
|
|
prepareForSmash(as, kJmpLen);
|
|
dest.emitFallbackJump(as.code.frontier);
|
|
}
|
|
|
|
void
|
|
TranslatorX64::emitFallbackCondJmp(Asm& as, SrcRec& dest, ConditionCode cc) {
|
|
prepareForSmash(as, kJmpccLen);
|
|
dest.emitFallbackJump(as.code.frontier, cc);
|
|
}
|
|
|
|
void TranslatorX64::emitReqRetransNoIR(Asm& as, const SrcKey& sk) {
|
|
prepareForSmash(as, kJmpLen);
|
|
TCA toSmash = as.code.frontier;
|
|
if (&as == &astubs) {
|
|
as.jmp(toSmash);
|
|
}
|
|
|
|
TCA sr = emitServiceReq(REQ_RETRANSLATE_NO_IR, 2,
|
|
toSmash, sk.offset());
|
|
|
|
if (&as == &astubs) {
|
|
CodeCursor cc(as, toSmash);
|
|
as.jmp(sr);
|
|
} else {
|
|
as.jmp(sr);
|
|
}
|
|
}
|
|
|
|
uint64_t TranslatorX64::packBitVec(const vector<bool>& bits, unsigned i) {
|
|
uint64_t retval = 0;
|
|
assert(i % 64 == 0);
|
|
assert(i < bits.size());
|
|
while (i < bits.size()) {
|
|
retval |= bits[i] << (i % 64);
|
|
if ((++i % 64) == 0) {
|
|
break;
|
|
}
|
|
}
|
|
return retval;
|
|
}
|
|
|
|
void
|
|
TranslatorX64::checkRefs(X64Assembler& a,
|
|
SrcKey sk,
|
|
const RefDeps& refDeps,
|
|
SrcRec& fail) {
|
|
if (refDeps.size() == 0) {
|
|
return;
|
|
}
|
|
|
|
// Set up guards for each pushed ActRec that we've made reffiness
|
|
// assumptions about
|
|
for (RefDeps::ArMap::const_iterator it = refDeps.m_arMap.begin();
|
|
it != refDeps.m_arMap.end(); ++it) {
|
|
// Be careful! The actual Func might have fewer refs than the number
|
|
// of args we're passing. To forestall this, we're going to have to
|
|
// keep checking i against the number of params. We consider invocations
|
|
// with too many arguments to have passed their checks.
|
|
int entryArDelta = it->first;
|
|
|
|
m_hhbcTrans->guardRefs(entryArDelta,
|
|
it->second.m_mask,
|
|
it->second.m_vals);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* emitRetFromInterpretedFrame --
|
|
*
|
|
* When the interpreter pushes a call frame, there is necessarily no
|
|
* machine RIP available to return to. This helper fishes out the
|
|
* destination from the frame and redirects execution to it via enterTC.
|
|
*/
|
|
TCA
|
|
TranslatorX64::emitRetFromInterpretedFrame() {
|
|
int32_t arBase = sizeof(ActRec) - sizeof(Cell);
|
|
moveToAlign(astubs);
|
|
TCA stub = astubs.code.frontier;
|
|
// Marshall our own args by hand here.
|
|
astubs. lea (rVmSp[-arBase], serviceReqArgRegs[0]);
|
|
astubs. movq (rVmFp, serviceReqArgRegs[1]);
|
|
(void) emitServiceReq(SRFlags::Persistent | SRFlags::JmpInsteadOfRet,
|
|
REQ_POST_INTERP_RET, 0ull);
|
|
return stub;
|
|
}
|
|
|
|
/*
|
|
* Same as above, except has different logic for fetching the AR we are trying
|
|
* to return from, because generators have ARs in different places.
|
|
*/
|
|
TCA
|
|
TranslatorX64::emitRetFromInterpretedGeneratorFrame() {
|
|
// We have to get the Continuation object from the current AR's $this, then
|
|
// find where its embedded AR is.
|
|
moveToAlign(astubs);
|
|
TCA stub = astubs.code.frontier;
|
|
|
|
PhysReg rContAR = serviceReqArgRegs[0];
|
|
astubs. loadq (rVmFp[AROFF(m_this)], rContAR);
|
|
astubs. loadq (rContAR[CONTOFF(m_arPtr)], rContAR);
|
|
astubs. movq (rVmFp, serviceReqArgRegs[1]);
|
|
(void) emitServiceReq(SRFlags::Persistent | SRFlags::JmpInsteadOfRet,
|
|
REQ_POST_INTERP_RET, 0ull);
|
|
return stub;
|
|
}
|
|
|
|
class FreeRequestStubTrigger : public Treadmill::WorkItem {
|
|
TCA m_stub;
|
|
public:
|
|
explicit FreeRequestStubTrigger(TCA stub) : m_stub(stub) {
|
|
TRACE(3, "FreeStubTrigger @ %p, stub %p\n", this, m_stub);
|
|
}
|
|
virtual void operator()() {
|
|
TRACE(3, "FreeStubTrigger: Firing @ %p , stub %p\n", this, m_stub);
|
|
if (TranslatorX64::Get()->freeRequestStub(m_stub) != true) {
|
|
/* If we can't free the stub, enqueue again to retry */
|
|
enqueue(new FreeRequestStubTrigger(m_stub));
|
|
}
|
|
}
|
|
};
|
|
|
|
#ifdef DEBUG
|
|
|
|
struct DepthGuard {
|
|
static __thread int m_depth;
|
|
DepthGuard() { m_depth++; TRACE(2, "DepthGuard: %d {\n", m_depth); }
|
|
~DepthGuard() { TRACE(2, "DepthGuard: %d }\n", m_depth); m_depth--; }
|
|
|
|
bool depthOne() const { return m_depth == 1; }
|
|
};
|
|
__thread int DepthGuard::m_depth;
|
|
|
|
#else
|
|
|
|
struct DepthGuard { bool depthOne() const { return false; } };
|
|
|
|
#endif
|
|
|
|
/*
|
|
* enterTCHelper does not save callee-saved registers except %rbp. This means
|
|
* when we call it from C++, we have to tell gcc to clobber all the other
|
|
* callee-saved registers.
|
|
*/
|
|
#if defined(__x86_64__)
|
|
# define CALLEE_SAVED_BARRIER() \
|
|
asm volatile("" : : : "rbx", "r12", "r13", "r14", "r15")
|
|
#elif defined(__AARCH64EL__)
|
|
# define CALLEE_SAVED_BARRIER() \
|
|
asm volatile("" : : : "x19", "x20", "x21", "x22", "x23", "x24", "x25", \
|
|
"x26", "x27", "x28")
|
|
#else
|
|
# error What are the callee-saved registers on your system?
|
|
#endif
|
|
|
|
/*
|
|
* enterTCHelper is a handwritten assembly function that transfers control in
|
|
* and out of the TC.
|
|
*/
|
|
static_assert(rVmSp == rbx &&
|
|
rVmFp == rbp &&
|
|
rVmTl == r12 &&
|
|
rStashedAR == r15,
|
|
"__enterTCHelper needs to be modified to use the correct ABI");
|
|
static_assert(kReservedRSPScratchSpace == 0x280,
|
|
"enterTCHelper needs to be updated for changes to "
|
|
"kReservedRSPScratchSpace");
|
|
static_assert(REQ_BIND_CALL == 0x1,
|
|
"Update assembly test for REQ_BIND_CALL in __enterTCHelper");
|
|
extern "C" void enterTCHelper(Cell* vm_sp,
|
|
Cell* vm_fp,
|
|
TCA start,
|
|
TReqInfo* infoPtr,
|
|
ActRec* firstAR,
|
|
void* targetCacheBase);
|
|
|
|
|
|
struct TReqInfo {
|
|
uintptr_t requestNum;
|
|
uintptr_t args[5];
|
|
|
|
// Some TC registers need to be preserved across service requests.
|
|
uintptr_t saved_rStashedAr;
|
|
|
|
// Stub addresses are passed back to allow us to recycle used stubs.
|
|
TCA stubAddr;
|
|
};
|
|
|
|
|
|
void
|
|
TranslatorX64::enterTC(TCA start, void* data) {
|
|
using namespace TargetCache;
|
|
|
|
if (debug) {
|
|
fflush(stdout);
|
|
fflush(stderr);
|
|
}
|
|
DepthGuard d;
|
|
TReqInfo info;
|
|
SrcKey sk;
|
|
|
|
if (LIKELY(start != nullptr)) {
|
|
info.requestNum = data ? REQ_BIND_CALL : -1;
|
|
info.saved_rStashedAr = (uintptr_t)data;
|
|
} else {
|
|
info.requestNum = -1;
|
|
info.saved_rStashedAr = 0;
|
|
sk = *(SrcKey*)data;
|
|
start = getTranslation(TranslArgs(sk, true));
|
|
}
|
|
for (;;) {
|
|
assert(sizeof(Cell) == 16);
|
|
assert(((uintptr_t)vmsp() & (sizeof(Cell) - 1)) == 0);
|
|
assert(((uintptr_t)vmfp() & (sizeof(Cell) - 1)) == 0);
|
|
|
|
s_writeLease.gremlinUnlock();
|
|
// Keep dispatching until we end up somewhere the translator
|
|
// recognizes, or we luck out and the leaseholder exits.
|
|
while (!start) {
|
|
TRACE(2, "enterTC forwarding BB to interpreter\n");
|
|
g_vmContext->m_pc = curUnit()->at(sk.offset());
|
|
INC_TPC(interp_bb);
|
|
g_vmContext->dispatchBB();
|
|
PC newPc = g_vmContext->getPC();
|
|
if (!newPc) { g_vmContext->m_fp = 0; return; }
|
|
sk = SrcKey(curFunc(), newPc);
|
|
start = getTranslation(TranslArgs(sk, true));
|
|
}
|
|
assert(start == (TCA)HPHP::Transl::funcBodyHelperThunk ||
|
|
isValidCodeAddress(start) ||
|
|
(start == (TCA)HPHP::Transl::fcallHelperThunk &&
|
|
info.saved_rStashedAr == (uintptr_t)data));
|
|
assert(!s_writeLease.amOwner());
|
|
const Func* func = (vmfp() ? (ActRec*)vmfp() : (ActRec*)data)->m_func;
|
|
func->validate();
|
|
INC_TPC(enter_tc);
|
|
|
|
TRACE(1, "enterTC: %p fp%p(%s) sp%p enter {\n", start,
|
|
vmfp(), func->name()->data(), vmsp());
|
|
tl_regState = REGSTATE_DIRTY;
|
|
|
|
// We have to force C++ to spill anything that might be in a callee-saved
|
|
// register (aside from rbp). enterTCHelper does not save them.
|
|
CALLEE_SAVED_BARRIER();
|
|
enterTCHelper(vmsp(), vmfp(), start, &info, vmFirstAR(),
|
|
tl_targetCaches);
|
|
CALLEE_SAVED_BARRIER();
|
|
assert(g_vmContext->m_stack.isValidAddress((uintptr_t)vmsp()));
|
|
|
|
tl_regState = REGSTATE_CLEAN; // Careful: pc isn't sync'ed yet.
|
|
TRACE(1, "enterTC: %p fp%p sp%p } return\n", start,
|
|
vmfp(), vmsp());
|
|
|
|
if (debug) {
|
|
// Debugging code: cede the write lease half the time.
|
|
if (RuntimeOption::EvalJitStressLease) {
|
|
if (d.depthOne() == 1 && (rand() % 2) == 0) {
|
|
s_writeLease.gremlinLock();
|
|
}
|
|
}
|
|
// Ensure that each case either returns, or drives start to a valid
|
|
// value.
|
|
start = TCA(0xbee5face);
|
|
}
|
|
|
|
TRACE(2, "enterTC: request(%s) args: %" PRIx64 " %" PRIx64 " %"
|
|
PRIx64 " %" PRIx64 " %" PRIx64 "\n",
|
|
reqName(info.requestNum),
|
|
info.args[0], info.args[1], info.args[2], info.args[3],
|
|
info.args[4]);
|
|
|
|
if (LIKELY(info.requestNum == REQ_EXIT)) {
|
|
vmfp() = nullptr;
|
|
return;
|
|
}
|
|
if (!handleServiceRequest(info, start, sk)) return;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* The contract is that each case will set sk to the place where
|
|
* execution should resume, and optionally set start to the hardware
|
|
* translation of the resumption point (or otherwise set it to null).
|
|
* Returns false if we need to halt this nesting of the VM.
|
|
*
|
|
* start and sk might be subtly different; i.e., there are cases where
|
|
* start != NULL && start != getTranslation(sk). For instance,
|
|
* REQ_BIND_CALL has not finished executing the OpCall when it gets
|
|
* here, and has even done some work on its behalf. sk == OpFCall,
|
|
* while start == the point in the TC that's "half-way through" the
|
|
* Call instruction. If we punt to the interpreter, the interpreter
|
|
* will redo some of the work that the translator has already done.
|
|
*/
|
|
bool TranslatorX64::handleServiceRequest(TReqInfo& info,
|
|
TCA& start,
|
|
SrcKey& sk) {
|
|
const uintptr_t& requestNum = info.requestNum;
|
|
auto* const args = info.args;
|
|
assert(requestNum != REQ_EXIT);
|
|
INC_TPC(service_req);
|
|
|
|
bool smashed = false;
|
|
switch (requestNum) {
|
|
case REQ_BIND_CALL: {
|
|
ReqBindCall* req = (ReqBindCall*)args[0];
|
|
ActRec* calleeFrame = (ActRec*)args[1];
|
|
TCA toSmash = req->m_toSmash;
|
|
Func *func = const_cast<Func*>(calleeFrame->m_func);
|
|
int nArgs = req->m_nArgs;
|
|
bool isImmutable = req->m_isImmutable;
|
|
TCA dest = tx64->funcPrologue(func, nArgs);
|
|
TRACE(2, "enterTC: bindCall %s -> %p\n", func->name()->data(), dest);
|
|
if (!isImmutable) {
|
|
// We dont know we're calling the right function, so adjust
|
|
// dest to point to the dynamic check of ar->m_func.
|
|
dest = funcPrologToGuard(dest, func);
|
|
} else {
|
|
TRACE(2, "enterTC: bindCall immutably %s -> %p\n",
|
|
func->fullName()->data(), dest);
|
|
}
|
|
LeaseHolder writer(s_writeLease, NO_ACQUIRE);
|
|
if (dest && writer.acquire()) {
|
|
TRACE(2, "enterTC: bindCall smash %p -> %p\n", toSmash, dest);
|
|
smashCall(tx64->getAsmFor(toSmash), toSmash, dest);
|
|
smashed = true;
|
|
// sk: stale, but doesn't matter since we have a valid dest TCA.
|
|
} else {
|
|
// We need translator help; we're not at the callee yet, so
|
|
// roll back. The prelude has done some work already, but it
|
|
// should be safe to redo.
|
|
TRACE(2, "enterTC: bindCall rollback smash %p -> %p\n",
|
|
toSmash, dest);
|
|
sk = req->m_sourceInstr;
|
|
}
|
|
start = dest;
|
|
if (!start) {
|
|
// EnterTCHelper pushes the return ip onto the stack when the
|
|
// requestNum is REQ_BIND_CALL, but if start is NULL, it will
|
|
// interpret in doFCall, so we clear out the requestNum in this
|
|
// case to prevent enterTCHelper from pushing the return ip
|
|
// onto the stack.
|
|
info.requestNum = ~REQ_BIND_CALL;
|
|
}
|
|
} break;
|
|
|
|
case REQ_BIND_SIDE_EXIT:
|
|
case REQ_BIND_JMP:
|
|
case REQ_BIND_JCC:
|
|
case REQ_BIND_JMP_NO_IR:
|
|
case REQ_BIND_ADDR:
|
|
{
|
|
TCA toSmash = (TCA)args[0];
|
|
Offset off = args[1];
|
|
sk = SrcKey(curFunc(), off);
|
|
if (requestNum == REQ_BIND_SIDE_EXIT) {
|
|
SKTRACE(3, sk, "side exit taken!\n");
|
|
}
|
|
start = bindJmp(toSmash, sk, (ServiceRequest)requestNum, smashed);
|
|
} break;
|
|
|
|
case REQ_BIND_JMPCC_FIRST: {
|
|
TCA toSmash = (TCA)args[0];
|
|
Offset offTaken = (Offset)args[1];
|
|
Offset offNotTaken = (Offset)args[2];
|
|
ConditionCode cc = ConditionCode(args[3]);
|
|
bool taken = int64_t(args[4]) & 1;
|
|
start = bindJmpccFirst(toSmash, offTaken, offNotTaken,
|
|
taken, cc, smashed);
|
|
// SrcKey: we basically need to emulate the fail
|
|
sk = SrcKey(curFunc(), taken ? offTaken : offNotTaken);
|
|
} break;
|
|
|
|
case REQ_BIND_JMPCC_SECOND: {
|
|
TCA toSmash = (TCA)args[0];
|
|
Offset off = (Offset)args[1];
|
|
ConditionCode cc = ConditionCode(args[2]);
|
|
start = bindJmpccSecond(toSmash, off, cc, smashed);
|
|
sk = SrcKey(curFunc(), off);
|
|
} break;
|
|
|
|
case REQ_BIND_REQUIRE: {
|
|
ReqLitStaticArgs* rlsa = (ReqLitStaticArgs*)args[0];
|
|
sk = SrcKey((Func*)args[1], (Offset)args[2]);
|
|
start = getTranslation(TranslArgs(sk, true));
|
|
if (start) {
|
|
LeaseHolder writer(s_writeLease);
|
|
if (writer) {
|
|
smashed = true;
|
|
SrcRec* sr = getSrcRec(sk);
|
|
sr->chainFrom(IncomingBranch::addr(&rlsa->m_pseudoMain));
|
|
}
|
|
}
|
|
} break;
|
|
|
|
case REQ_RETRANSLATE_NO_IR: {
|
|
TCA toSmash = (TCA)args[0];
|
|
sk = SrcKey(curFunc(), (Offset)args[1]);
|
|
start = retranslateAndPatchNoIR(sk, true, toSmash);
|
|
SKTRACE(1, sk, "retranslated (without IR) @%p\n", start);
|
|
} break;
|
|
|
|
case REQ_RETRANSLATE: {
|
|
INC_TPC(retranslate);
|
|
sk = SrcKey(curFunc(), (Offset)args[0]);
|
|
start = retranslate(TranslArgs(sk, true));
|
|
SKTRACE(2, sk, "retranslated @%p\n", start);
|
|
} break;
|
|
|
|
case REQ_INTERPRET: {
|
|
Offset off = args[0];
|
|
int numInstrs = args[1];
|
|
g_vmContext->m_pc = curUnit()->at(off);
|
|
/*
|
|
* We know the compilation unit has not changed; basic blocks do
|
|
* not span files. I claim even exceptions do not violate this
|
|
* axiom.
|
|
*/
|
|
assert(numInstrs >= 0);
|
|
SKTRACE(5, SrcKey(curFunc(), off), "interp: enter\n");
|
|
if (numInstrs) {
|
|
s_perfCounters[tpc_interp_instr] += numInstrs;
|
|
g_vmContext->dispatchN(numInstrs);
|
|
} else {
|
|
// numInstrs == 0 means it wants to dispatch until BB ends
|
|
INC_TPC(interp_bb);
|
|
g_vmContext->dispatchBB();
|
|
}
|
|
PC newPc = g_vmContext->getPC();
|
|
if (!newPc) { g_vmContext->m_fp = 0; return false; }
|
|
SrcKey newSk(curFunc(), newPc);
|
|
SKTRACE(5, newSk, "interp: exit\n");
|
|
sk = newSk;
|
|
start = getTranslation(TranslArgs(newSk, true));
|
|
} break;
|
|
|
|
case REQ_POST_INTERP_RET: {
|
|
// This is only responsible for the control-flow aspect of the Ret:
|
|
// getting to the destination's translation, if any.
|
|
ActRec* ar = (ActRec*)args[0];
|
|
ActRec* caller = (ActRec*)args[1];
|
|
assert((Cell*) caller == vmfp());
|
|
Unit* destUnit = caller->m_func->unit();
|
|
// Set PC so logging code in getTranslation doesn't get confused.
|
|
vmpc() = destUnit->at(caller->m_func->base() + ar->m_soff);
|
|
SrcKey dest(caller->m_func, vmpc());
|
|
sk = dest;
|
|
start = getTranslation(TranslArgs(dest, true));
|
|
TRACE(3, "REQ_POST_INTERP_RET: from %s to %s\n",
|
|
ar->m_func->fullName()->data(),
|
|
caller->m_func->fullName()->data());
|
|
} break;
|
|
|
|
case REQ_RESUME: {
|
|
SrcKey dest(curFunc(), vmpc());
|
|
sk = dest;
|
|
start = getTranslation(TranslArgs(dest, true));
|
|
} break;
|
|
|
|
case REQ_STACK_OVERFLOW: {
|
|
/*
|
|
* we need to construct the pc of the fcall from the return
|
|
* address (which will be after the fcall). Because fcall is
|
|
* a variable length instruction, and because we sometimes
|
|
* delete instructions from the instruction stream, we
|
|
* need to use fpi regions to find the fcall.
|
|
*/
|
|
const FPIEnt* fe = curFunc()->findPrecedingFPI(
|
|
curUnit()->offsetOf(vmpc()));
|
|
vmpc() = curUnit()->at(fe->m_fcallOff);
|
|
assert(isFCallStar(*vmpc()));
|
|
raise_error("Stack overflow");
|
|
NOT_REACHED();
|
|
}
|
|
}
|
|
|
|
if (smashed && info.stubAddr) {
|
|
Treadmill::WorkItem::enqueue(new FreeRequestStubTrigger(info.stubAddr));
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
TCA FreeStubList::maybePop() {
|
|
StubNode* ret = m_list;
|
|
if (ret) {
|
|
m_list = ret->m_next;
|
|
ret->m_freed = ~kStubFree;
|
|
}
|
|
return (TCA)ret;
|
|
}
|
|
|
|
void FreeStubList::push(TCA stub) {
|
|
/* A freed stub may be released by Treadmill more than
|
|
* once if multiple threads execute the service request before it is
|
|
* freed. We detect duplicates by marking freed stubs */
|
|
StubNode* n = (StubNode *)stub;
|
|
if (n->m_freed == kStubFree) return;
|
|
n->m_freed = kStubFree;
|
|
n->m_next = m_list;
|
|
m_list = n;
|
|
}
|
|
|
|
bool
|
|
TranslatorX64::freeRequestStub(TCA stub) {
|
|
LeaseHolder writer(s_writeLease);
|
|
/* If we can't acquire the write lock, the
|
|
* caller (FreeRequestStubTrigger) retries
|
|
*/
|
|
if (!writer) return false;
|
|
assert(astubs.code.isValidAddress(stub));
|
|
m_freeStubs.push(stub);
|
|
return true;
|
|
}
|
|
|
|
TCA TranslatorX64::getFreeStub() {
|
|
TCA ret = m_freeStubs.maybePop();
|
|
if (ret) {
|
|
Stats::inc(Stats::Astubs_Reused);
|
|
assert(m_freeStubs.m_list == 0
|
|
|| astubs.code.isValidAddress(TCA(m_freeStubs.m_list)));
|
|
} else {
|
|
ret = astubs.code.frontier;
|
|
Stats::inc(Stats::Astubs_New);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* RAII bookmark for temporarily rewinding a.code.frontier.
|
|
*/
|
|
class ConditionalCodeCursor {
|
|
typedef X64Assembler Asm;
|
|
Asm& m_a;
|
|
TCA m_oldFrontier;
|
|
bool m_changed;
|
|
public:
|
|
ConditionalCodeCursor(Asm& a, TCA newFrontier) :
|
|
m_a(a), m_oldFrontier(a.code.frontier) {
|
|
m_a.code.frontier = newFrontier;
|
|
m_changed = (newFrontier != m_oldFrontier);
|
|
TRACE_MOD(Trace::trans, 1, "RewindTo: %p (from %p)\n",
|
|
m_a.code.frontier, m_oldFrontier);
|
|
}
|
|
~ConditionalCodeCursor() {
|
|
if (m_changed) {
|
|
m_a.code.frontier = m_oldFrontier;
|
|
}
|
|
TRACE_MOD(Trace::trans, 1, "Restore: %p\n",
|
|
m_a.code.frontier);
|
|
}
|
|
};
|
|
|
|
/*
|
|
* emitServiceReq --
|
|
*
|
|
* Call a translator service co-routine. The code emitted here is
|
|
* reenters the enterTC loop, invoking the requested service. Control
|
|
* will be returned non-locally to the next logical instruction in
|
|
* the TC.
|
|
*
|
|
* Return value is a destination; we emit the bulky service
|
|
* request code into astubs.
|
|
*/
|
|
|
|
TCA
|
|
TranslatorX64::emitServiceReqVA(SRFlags flags, ServiceRequest req, int numArgs,
|
|
va_list args) {
|
|
bool emitInA = flags & SRFlags::EmitInA;
|
|
bool align = (flags & SRFlags::Align) && !emitInA;
|
|
bool notReusable = flags & SRFlags::Persistent;
|
|
Asm& as = emitInA ? a : astubs;
|
|
TCA start = emitInA ? a.code.frontier :
|
|
notReusable ? astubs.code.frontier :
|
|
getFreeStub();
|
|
ConditionalCodeCursor cg(as, start);
|
|
/* max space for moving to align, saving VM regs plus emitting args */
|
|
static const int kVMRegSpace = 0x14;
|
|
static const int kMovSize = 0xa;
|
|
static const int kNumServiceRegs = sizeof(serviceReqArgRegs)/sizeof(PhysReg);
|
|
static const int kMaxStubSpace = kJmpTargetAlign - 1
|
|
+ kVMRegSpace
|
|
+ kNumServiceRegs * kMovSize;
|
|
if (align) {
|
|
moveToAlign(as);
|
|
}
|
|
TCA retval = as.code.frontier;
|
|
emitEagerVMRegSave(as, SaveFP);
|
|
/*
|
|
* Move args into appropriate regs.
|
|
*/
|
|
TRACE(3, "Emit Service Req %s(", reqName(req));
|
|
for (int i = 0; i < numArgs; i++) {
|
|
uint64_t argVal = va_arg(args, uint64_t);
|
|
TRACE(3, "%p,", (void*)argVal);
|
|
emitImmReg(as, argVal, serviceReqArgRegs[i]);
|
|
}
|
|
|
|
if (notReusable) {
|
|
emitImmReg(as, 0, rAsm);
|
|
} else {
|
|
/*
|
|
* Make sure that the stub has enough space so it can be reused
|
|
* for other service requests, with different number of arguments,
|
|
* alignment, etc.
|
|
*/
|
|
as.emitNop(start + kMaxStubSpace - as.code.frontier);
|
|
emitImmReg(as, (uint64_t)start, rAsm);
|
|
}
|
|
TRACE(3, ")\n");
|
|
emitImmReg(as, req, rdi);
|
|
|
|
/*
|
|
* Weird hand-shaking with enterTC: reverse-call a service routine.
|
|
*
|
|
* In the case of some special stubs (m_callToExit, m_retHelper), we
|
|
* have already unbalanced the return stack by doing a ret to
|
|
* something other than enterTCHelper. In that case
|
|
* SRJmpInsteadOfRet indicates to fake the return.
|
|
*/
|
|
if (flags & SRFlags::JmpInsteadOfRet) {
|
|
as.pop(rax);
|
|
as.jmp(rax);
|
|
} else {
|
|
as.ret();
|
|
}
|
|
recordBCInstr(OpServiceRequest, as, retval);
|
|
translator_not_reached(as);
|
|
return retval;
|
|
}
|
|
|
|
TCA
|
|
TranslatorX64::emitServiceReq(ServiceRequest req, int numArgs, ...) {
|
|
va_list args;
|
|
va_start(args, numArgs);
|
|
TCA retval = emitServiceReqVA(SRFlags::Align, req, numArgs, args);
|
|
va_end(args);
|
|
return retval;
|
|
}
|
|
|
|
TCA
|
|
TranslatorX64::emitServiceReq(SRFlags flags, ServiceRequest req,
|
|
int numArgs, ...) {
|
|
va_list args;
|
|
va_start(args, numArgs);
|
|
TCA retval = emitServiceReqVA(flags, req, numArgs, args);
|
|
va_end(args);
|
|
return retval;
|
|
}
|
|
|
|
TCA
|
|
TranslatorX64::emitTransCounterInc(X64Assembler& a) {
|
|
TCA start = a.code.frontier;
|
|
if (!isTransDBEnabled()) return start;
|
|
|
|
a. movq (getTransCounterAddr(), rAsm);
|
|
a. lock ();
|
|
a. incq (*rAsm);
|
|
|
|
return start;
|
|
}
|
|
|
|
void
|
|
TranslatorX64::getInputsIntoXMMRegs(const NormalizedInstruction& ni,
|
|
PhysReg lr, PhysReg rr,
|
|
RegXMM lxmm,
|
|
RegXMM rxmm) {
|
|
const DynLocation& l = *ni.inputs[0];
|
|
const DynLocation& r = *ni.inputs[1];
|
|
// Get the values into their appropriate xmm locations
|
|
auto intoXmm = [&](const DynLocation& l, PhysReg src, RegXMM xmm) {
|
|
if (l.isInt()) {
|
|
// cvtsi2sd doesn't modify the high bits of its target, which can
|
|
// cause false dependencies to prevent register renaming from kicking
|
|
// in. Break the dependency chain by zeroing out the destination reg.
|
|
a. pxor_xmm_xmm(xmm, xmm);
|
|
a. cvtsi2sd_reg64_xmm(src, xmm);
|
|
} else {
|
|
a. mov_reg64_xmm(src, xmm);
|
|
}
|
|
};
|
|
intoXmm(l, lr, lxmm);
|
|
intoXmm(r, rr, rxmm);
|
|
}
|
|
|
|
void
|
|
TranslatorX64::binaryMixedArith(const NormalizedInstruction& i,
|
|
Opcode op,
|
|
PhysReg srcReg,
|
|
PhysReg srcDestReg) {
|
|
getInputsIntoXMMRegs(i, srcReg, srcDestReg, xmm1, xmm0);
|
|
switch(op) {
|
|
#define CASEIMM(OpBc, x64op) \
|
|
case OpBc: a. x64op ##sd_xmm_xmm(xmm1, xmm0); break
|
|
CASEIMM(OpAdd, add);
|
|
CASEIMM(OpSub, sub);
|
|
CASEIMM(OpMul, mul);
|
|
#undef CASEIMM
|
|
default: not_reached();
|
|
}
|
|
a. mov_xmm_reg64(xmm0, srcDestReg);
|
|
}
|
|
|
|
#define O(opcode, imm, pusph, pop, flags) \
|
|
/**
|
|
* The interpOne methods saves m_pc, m_fp, and m_sp ExecutionContext,
|
|
* calls into the interpreter, and then return a pointer to the
|
|
* current ExecutionContext.
|
|
*/ \
|
|
VMExecutionContext* \
|
|
interpOne##opcode(ActRec* ar, Cell* sp, Offset pcOff) { \
|
|
interp_set_regs(ar, sp, pcOff); \
|
|
SKTRACE(5, SrcKey(curFunc(), vmpc()), "%40s %p %p\n", \
|
|
"interpOne" #opcode " before (fp,sp)", \
|
|
vmfp(), vmsp()); \
|
|
assert(*vmpc() == Op ## opcode); \
|
|
VMExecutionContext* ec = g_vmContext; \
|
|
Stats::inc(Stats::Instr_InterpOne ## opcode); \
|
|
INC_TPC(interp_one) \
|
|
/* Correct for over-counting in TC-stats. */ \
|
|
Stats::inc(Stats::Instr_TC, -1); \
|
|
ec->op##opcode(); \
|
|
/*
|
|
* Only set regstate back to dirty if an exception is not
|
|
* propagating. If an exception is throwing, regstate for this call
|
|
* is actually still correct, and we don't have information in the
|
|
* fixup map for interpOne calls anyway.
|
|
*/ \
|
|
tl_regState = REGSTATE_DIRTY; \
|
|
return ec; \
|
|
}
|
|
|
|
OPCODES
|
|
#undef O
|
|
|
|
void* interpOneEntryPoints[] = {
|
|
#define O(opcode, imm, pusph, pop, flags) \
|
|
(void*)(interpOne ## opcode),
|
|
OPCODES
|
|
#undef O
|
|
};
|
|
|
|
void TranslatorX64::fixupWork(VMExecutionContext* ec,
|
|
ActRec* rbp) const {
|
|
assert(RuntimeOption::EvalJit);
|
|
|
|
TRACE_SET_MOD(fixup);
|
|
TRACE(1, "fixup(begin):\n");
|
|
|
|
auto isVMFrame = [] (ActRec* ar) {
|
|
assert(ar);
|
|
bool ret = uintptr_t(ar) - Util::s_stackLimit >= Util::s_stackSize;
|
|
assert(!ret ||
|
|
(ar >= g_vmContext->m_stack.getStackLowAddress() &&
|
|
ar < g_vmContext->m_stack.getStackHighAddress()) ||
|
|
ar->m_func->isGenerator());
|
|
return ret;
|
|
};
|
|
|
|
auto* nextRbp = rbp;
|
|
rbp = 0;
|
|
do {
|
|
auto* prevRbp = rbp;
|
|
rbp = nextRbp;
|
|
assert(rbp && "Missing fixup for native call");
|
|
nextRbp = reinterpret_cast<ActRec*>(rbp->m_savedRbp);
|
|
TRACE(2, "considering frame %p, %p\n", rbp, (void*)rbp->m_savedRip);
|
|
|
|
if (isVMFrame(nextRbp)) {
|
|
TRACE(2, "fixup checking vm frame %s\n",
|
|
nextRbp->m_func->name()->data());
|
|
FixupMap::VMRegs regs;
|
|
if (m_fixupMap.getFrameRegs(rbp, prevRbp, ®s)) {
|
|
TRACE(2, "fixup(end): func %s fp %p sp %p pc %p\n",
|
|
regs.m_fp->m_func->name()->data(),
|
|
regs.m_fp, regs.m_sp, regs.m_pc);
|
|
ec->m_fp = const_cast<ActRec*>(regs.m_fp);
|
|
ec->m_pc = regs.m_pc;
|
|
vmsp() = regs.m_sp;
|
|
return;
|
|
}
|
|
}
|
|
} while (rbp && rbp != nextRbp);
|
|
|
|
// OK, we've exhausted the entire actRec chain. We are only
|
|
// invoking ::fixup() from contexts that were known to be called out
|
|
// of the TC, so this cannot happen.
|
|
NOT_REACHED();
|
|
}
|
|
|
|
void TranslatorX64::fixup(VMExecutionContext* ec) const {
|
|
// Start looking for fixup entries at the current (C++) frame. This
|
|
// will walk the frames upward until we find a TC frame.
|
|
DECLARE_FRAME_POINTER(framePtr);
|
|
fixupWork(ec, framePtr);
|
|
}
|
|
|
|
TCA TranslatorX64::getTranslatedCaller() const {
|
|
DECLARE_FRAME_POINTER(fp);
|
|
ActRec* framePtr = fp; // can't directly mutate the register-mapped one
|
|
for (; framePtr; framePtr = (ActRec*)framePtr->m_savedRbp) {
|
|
TCA rip = (TCA)framePtr->m_savedRip;
|
|
if (isValidCodeAddress(rip)) {
|
|
return rip;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
void
|
|
TranslatorX64::syncWork() {
|
|
assert(tl_regState == REGSTATE_DIRTY);
|
|
fixup(g_vmContext);
|
|
tl_regState = REGSTATE_CLEAN;
|
|
Stats::inc(Stats::TC_Sync);
|
|
}
|
|
|
|
// could be static but used in hopt/codegen.cpp
|
|
void raiseUndefVariable(StringData* nm) {
|
|
raise_notice(Strings::UNDEFINED_VARIABLE, nm->data());
|
|
// FIXME: do we need to decref the string if an exception is propagating?
|
|
decRefStr(nm);
|
|
}
|
|
|
|
// This intentionally excludes Int/Int, which is handled separately
|
|
// from cases involving the FPU.
|
|
bool
|
|
mathEquivTypes(RuntimeType lt, RuntimeType rt) {
|
|
return (lt.isDouble() && rt.isDouble()) ||
|
|
(lt.isInt() && rt.isDouble()) ||
|
|
(lt.isDouble() && rt.isInt());
|
|
}
|
|
|
|
/* This is somewhat hacky. It decides which helpers/builtins should
|
|
* use eager vmreganchor based on profile information. Using eager
|
|
* vmreganchor for all helper calls is a perf regression. */
|
|
bool TranslatorX64::eagerRecord(const Func* func) {
|
|
const char* list[] = {
|
|
"func_get_args",
|
|
"get_called_class",
|
|
"func_num_args",
|
|
"array_filter",
|
|
"array_map",
|
|
};
|
|
|
|
for (int i = 0; i < sizeof(list)/sizeof(list[0]); i++) {
|
|
if (!strcmp(func->name()->data(), list[i])) {
|
|
return true;
|
|
}
|
|
}
|
|
if (func->cls() && !strcmp(func->cls()->name()->data(), "WaitHandle")
|
|
&& !strcmp(func->name()->data(), "join")) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
Instance*
|
|
HOT_FUNC_VM
|
|
newInstanceHelper(Class* cls, int numArgs, ActRec* ar, ActRec* prevAr) {
|
|
const Func* f = cls->getCtor();
|
|
Instance* ret = nullptr;
|
|
if (UNLIKELY(!(f->attrs() & AttrPublic))) {
|
|
VMRegAnchor _;
|
|
UNUSED MethodLookup::LookupResult res =
|
|
g_vmContext->lookupCtorMethod(f, cls, true /*raise*/);
|
|
assert(res == MethodLookup::MethodFoundWithThis);
|
|
}
|
|
// Don't start pushing the AR until newInstance returns; it may reenter.
|
|
ret = newInstance(cls);
|
|
f->validate();
|
|
ar->m_func = f;
|
|
ar->initNumArgs(numArgs, true /*fromCtor*/);
|
|
// Count stack and this.
|
|
ret->incRefCount();
|
|
ret->incRefCount();
|
|
ar->setThis(ret);
|
|
ar->setVarEnv(nullptr);
|
|
arSetSfp(ar, prevAr);
|
|
TRACE(2, "newInstanceHelper: AR %p: f %p, savedRbp %#lx, savedRip %#lx"
|
|
" this %p\n",
|
|
ar, ar->m_func, ar->m_savedRbp, ar->m_savedRip, ar->m_this);
|
|
return ret;
|
|
}
|
|
|
|
const Func*
|
|
TranslatorX64::findCuf(const NormalizedInstruction& ni,
|
|
Class*& cls, StringData*& invName, bool& forward) {
|
|
forward = (ni.op() == OpFPushCufF);
|
|
cls = nullptr;
|
|
invName = nullptr;
|
|
|
|
DynLocation* callable = ni.inputs[ni.op() == OpFPushCufSafe ? 1 : 0];
|
|
|
|
const StringData* str =
|
|
callable->isString() ? callable->rtt.valueString() : nullptr;
|
|
const ArrayData* arr =
|
|
callable->isArray() ? callable->rtt.valueArray() : nullptr;
|
|
|
|
StringData* sclass = nullptr;
|
|
StringData* sname = nullptr;
|
|
if (str) {
|
|
Func* f = HPHP::Unit::lookupFunc(str);
|
|
if (f) return f;
|
|
String name(const_cast<StringData*>(str));
|
|
int pos = name.find("::");
|
|
if (pos <= 0 || pos + 2 >= name.size() ||
|
|
name.find("::", pos + 2) != String::npos) {
|
|
return nullptr;
|
|
}
|
|
sclass = StringData::GetStaticString(name.substr(0, pos).get());
|
|
sname = StringData::GetStaticString(name.substr(pos + 2).get());
|
|
} else if (arr) {
|
|
if (arr->size() != 2) return nullptr;
|
|
CVarRef e0 = arr->get(int64_t(0), false);
|
|
CVarRef e1 = arr->get(int64_t(1), false);
|
|
if (!e0.isString() || !e1.isString()) return nullptr;
|
|
sclass = e0.getStringData();
|
|
sname = e1.getStringData();
|
|
String name(sname);
|
|
if (name.find("::") != String::npos) return nullptr;
|
|
} else {
|
|
return nullptr;
|
|
}
|
|
|
|
Class* ctx = curFunc()->cls();
|
|
|
|
if (sclass->isame(s_self.get())) {
|
|
if (!ctx) return nullptr;
|
|
cls = ctx;
|
|
forward = true;
|
|
} else if (sclass->isame(s_parent.get())) {
|
|
if (!ctx || !ctx->parent()) return nullptr;
|
|
cls = ctx->parent();
|
|
forward = true;
|
|
} else if (sclass->isame(s_static.get())) {
|
|
return nullptr;
|
|
} else {
|
|
cls = Unit::lookupUniqueClass(sclass);
|
|
if (!cls) return nullptr;
|
|
}
|
|
|
|
bool magicCall = false;
|
|
const Func* f = lookupImmutableMethod(cls, sname, magicCall, true);
|
|
if (!f || (forward && !ctx->classof(f->cls()))) {
|
|
/*
|
|
* To preserve the invariant that the lsb class
|
|
* is an instance of the context class, we require
|
|
* that f's class is an instance of the context class.
|
|
* This is conservative, but without it, we would need
|
|
* a runtime check to decide whether or not to forward
|
|
* the lsb class
|
|
*/
|
|
return nullptr;
|
|
}
|
|
if (magicCall) invName = sname;
|
|
return f;
|
|
}
|
|
|
|
TCA
|
|
TranslatorX64::emitNativeTrampoline(TCA helperAddr) {
|
|
auto& a = atrampolines;
|
|
|
|
if (!a.code.canEmit(m_trampolineSize)) {
|
|
// not enough space to emit a trampoline, so just return the
|
|
// helper address and emitCall will the emit the right sequence
|
|
// to call it indirectly
|
|
TRACE(1, "Ran out of space to emit a trampoline for %p\n", helperAddr);
|
|
assert(false);
|
|
return helperAddr;
|
|
}
|
|
uint32_t index = m_numNativeTrampolines++;
|
|
TCA trampAddr = a.code.frontier;
|
|
if (Stats::enabled()) {
|
|
Stats::emitInc(a, &Stats::tl_helper_counters[0], index);
|
|
char* name = Util::getNativeFunctionName(helperAddr);
|
|
const size_t limit = 50;
|
|
if (strlen(name) > limit) {
|
|
name[limit] = '\0';
|
|
}
|
|
Stats::helperNames[index] = name;
|
|
}
|
|
|
|
/*
|
|
* For stubs that take arguments in rAsm, we need to make sure
|
|
* we're not damaging its contents here. (If !jmpDeltaFits, the jmp
|
|
* opcode will need to movabs the address into rAsm before
|
|
* jumping.)
|
|
*/
|
|
auto UNUSED stubUsingRScratch = [&](TCA tca) {
|
|
return tca == m_dtorGenericStubRegs;
|
|
};
|
|
|
|
assert(IMPLIES(stubUsingRScratch(helperAddr), a.jmpDeltaFits(helperAddr)));
|
|
a. jmp (helperAddr);
|
|
a. ud2 ();
|
|
|
|
trampolineMap[helperAddr] = trampAddr;
|
|
if (m_trampolineSize == 0) {
|
|
m_trampolineSize = a.code.frontier - trampAddr;
|
|
assert(m_trampolineSize >= kMinPerTrampolineSize);
|
|
}
|
|
recordBCInstr(OpNativeTrampoline, a, trampAddr);
|
|
return trampAddr;
|
|
}
|
|
|
|
TCA
|
|
TranslatorX64::getNativeTrampoline(TCA helperAddr) {
|
|
if (!RuntimeOption::EvalJitTrampolines && !Stats::enabled()) {
|
|
return helperAddr;
|
|
}
|
|
TCA trampAddr = (TCA)mapGet<PointerMap>(trampolineMap, helperAddr);
|
|
if (trampAddr) {
|
|
return trampAddr;
|
|
}
|
|
return emitNativeTrampoline(helperAddr);
|
|
}
|
|
|
|
static void defClsHelper(PreClass *preClass) {
|
|
assert(tl_regState == REGSTATE_DIRTY);
|
|
tl_regState = REGSTATE_CLEAN;
|
|
Unit::defClass(preClass);
|
|
|
|
/*
|
|
* m_defClsHelper sync'd the registers for us already. This means
|
|
* if an exception propagates we want to leave things as
|
|
* REGSTATE_CLEAN, since we're still in sync. Only set it to dirty
|
|
* if we are actually returning to run in the TC again.
|
|
*/
|
|
tl_regState = REGSTATE_DIRTY;
|
|
}
|
|
|
|
template <typename T>
|
|
static int64_t switchBoundsCheck(T v, int64_t base, int64_t nTargets) {
|
|
// I'm relying on gcc to be smart enough to optimize away the next
|
|
// two lines when T is int64.
|
|
if (int64_t(v) == v) {
|
|
int64_t ival = v;
|
|
if (ival >= base && ival < (base + nTargets)) {
|
|
return ival - base;
|
|
}
|
|
}
|
|
return nTargets + 1;
|
|
}
|
|
|
|
int64_t switchDoubleHelper(int64_t val, int64_t base, int64_t nTargets) {
|
|
union {
|
|
int64_t intbits;
|
|
double dblval;
|
|
} u;
|
|
u.intbits = val;
|
|
return switchBoundsCheck(u.dblval, base, nTargets);
|
|
}
|
|
|
|
int64_t switchStringHelper(StringData* s, int64_t base, int64_t nTargets) {
|
|
int64_t ival;
|
|
double dval;
|
|
switch (s->isNumericWithVal(ival, dval, 1)) {
|
|
case KindOfNull:
|
|
ival = switchBoundsCheck(0, base, nTargets);
|
|
break;
|
|
|
|
case KindOfDouble:
|
|
ival = switchBoundsCheck(dval, base, nTargets);
|
|
break;
|
|
|
|
case KindOfInt64:
|
|
ival = switchBoundsCheck(ival, base, nTargets);
|
|
break;
|
|
|
|
default:
|
|
not_reached();
|
|
}
|
|
decRefStr(s);
|
|
return ival;
|
|
}
|
|
|
|
int64_t switchObjHelper(ObjectData* o, int64_t base, int64_t nTargets) {
|
|
int64_t ival = o->o_toInt64();
|
|
decRefObj(o);
|
|
return switchBoundsCheck(ival, base, nTargets);
|
|
}
|
|
|
|
// PSEUDOINSTR_DISPATCH is a switch() fragment that routes opcodes to their
|
|
// shared handlers, as per the PSEUDOINSTRS macro.
|
|
#define PSEUDOINSTR_DISPATCH(func) \
|
|
case OpBitAnd: \
|
|
case OpBitOr: \
|
|
case OpBitXor: \
|
|
case OpSub: \
|
|
case OpMul: \
|
|
func(BinaryArithOp, t, i) \
|
|
case OpSame: \
|
|
case OpNSame: \
|
|
func(SameOp, t, i) \
|
|
case OpEq: \
|
|
case OpNeq: \
|
|
func(EqOp, t, i) \
|
|
case OpLt: \
|
|
case OpLte: \
|
|
case OpGt: \
|
|
case OpGte: \
|
|
func(LtGtOp, t, i) \
|
|
case OpEmptyL: \
|
|
case OpCastBool: \
|
|
func(UnaryBooleanOp, t, i) \
|
|
case OpJmpZ: \
|
|
case OpJmpNZ: \
|
|
func(BranchOp, t, i) \
|
|
case OpSetL: \
|
|
case OpBindL: \
|
|
func(AssignToLocalOp, t, i) \
|
|
case OpFPassC: \
|
|
case OpFPassCW: \
|
|
case OpFPassCE: \
|
|
func(FPassCOp, t, i) \
|
|
case OpFPushCuf: \
|
|
case OpFPushCufF: \
|
|
case OpFPushCufSafe: \
|
|
func(FPushCufOp, t, i) \
|
|
case OpIssetL: \
|
|
case OpIsNullL: \
|
|
case OpIsStringL: \
|
|
case OpIsArrayL: \
|
|
case OpIsIntL: \
|
|
case OpIsObjectL: \
|
|
case OpIsBoolL: \
|
|
case OpIsDoubleL: \
|
|
case OpIsNullC: \
|
|
case OpIsStringC: \
|
|
case OpIsArrayC: \
|
|
case OpIsIntC: \
|
|
case OpIsObjectC: \
|
|
case OpIsBoolC: \
|
|
case OpIsDoubleC: \
|
|
func(CheckTypeOp, t, i)
|
|
|
|
bool
|
|
TranslatorX64::dontGuardAnyInputs(Opcode op) {
|
|
switch (op) {
|
|
#define CASE(iNm) case Op ## iNm:
|
|
#define NOOP(a, b, c)
|
|
INSTRS
|
|
PSEUDOINSTR_DISPATCH(NOOP)
|
|
return false;
|
|
}
|
|
return true;
|
|
#undef NOOP
|
|
#undef CASE
|
|
}
|
|
|
|
// Emit necessary guards for variants and pseudo-main locals before instr i.
|
|
// For HHIR, this only inserts guards for pseudo-main locals. Variants are
|
|
// guarded in a different way.
|
|
void
|
|
TranslatorX64::emitVariantGuards(const Tracelet& t,
|
|
const NormalizedInstruction& i) {
|
|
bool pseudoMain = Translator::liveFrameIsPseudoMain();
|
|
bool isFirstInstr = (&i == t.m_instrStream.first);
|
|
for (size_t in = 0; in < i.inputs.size(); ++in) {
|
|
DynLocation* input = i.inputs[in];
|
|
if (!input->isValue()) continue;
|
|
bool isRef = input->isRef() &&
|
|
!i.ignoreInnerType &&
|
|
input->rtt.innerType() != KindOfInvalid;
|
|
bool modifiableLocal = pseudoMain && input->isLocal() &&
|
|
!input->rtt.isVagueValue();
|
|
|
|
if (!modifiableLocal && !isRef) continue;
|
|
|
|
SKTRACE(1, i.source, "guarding %s: (%s:%d) :: %d!\n",
|
|
modifiableLocal ? "pseudoMain local" : "variant inner",
|
|
input->location.spaceName(),
|
|
input->location.offset,
|
|
input->rtt.valueType());
|
|
// TODO task 1122807: don't check the inner type if we've already
|
|
// checked it and have executed no possibly-aliasing instructions in
|
|
// the meanwhile.
|
|
if (modifiableLocal) {
|
|
RuntimeType& rtt = input->rtt;
|
|
JIT::Type type = JIT::Type::fromRuntimeType(rtt);
|
|
if (isFirstInstr) {
|
|
m_hhbcTrans->guardTypeLocal(input->location.offset, type);
|
|
} else {
|
|
m_hhbcTrans->checkTypeLocal(input->location.offset, type);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
bool
|
|
TranslatorX64::checkTranslationLimit(SrcKey sk,
|
|
const SrcRec& srcRec) const {
|
|
if (srcRec.translations().size() == RuntimeOption::EvalJitMaxTranslations) {
|
|
INC_TPC(max_trans);
|
|
if (debug && Trace::moduleEnabled(Trace::tx64, 2)) {
|
|
const vector<TCA>& tns = srcRec.translations();
|
|
TRACE(1, "Too many (%" PRId64 ") translations: %s, BC offset %d\n",
|
|
tns.size(), curUnit()->filepath()->data(),
|
|
sk.offset());
|
|
SKTRACE(2, sk, "{\n", tns.size());
|
|
TCA topTrans = srcRec.getTopTranslation();
|
|
for (size_t i = 0; i < tns.size(); ++i) {
|
|
const TransRec* rec = getTransRec(tns[i]);
|
|
assert(rec);
|
|
SKTRACE(2, sk, "%d %p\n", i, tns[i]);
|
|
if (tns[i] == topTrans) {
|
|
SKTRACE(2, sk, "%d: *Top*\n", i);
|
|
}
|
|
if (rec->kind == TransAnchor) {
|
|
SKTRACE(2, sk, "%d: Anchor\n", i);
|
|
} else {
|
|
SKTRACE(2, sk, "%d: guards {\n", i);
|
|
for (unsigned j = 0; j < rec->dependencies.size(); ++j) {
|
|
TRACE(2, rec->dependencies[j]);
|
|
}
|
|
SKTRACE(2, sk, "%d } guards\n", i);
|
|
}
|
|
}
|
|
SKTRACE(2, sk, "} /* Too many translations */\n");
|
|
}
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void
|
|
TranslatorX64::emitGuardChecks(X64Assembler& a,
|
|
SrcKey sk,
|
|
const ChangeMap& dependencies,
|
|
const RefDeps& refDeps,
|
|
SrcRec& fail) {
|
|
if (Trace::moduleEnabled(Trace::stats, 2)) {
|
|
Stats::emitInc(a, Stats::TraceletGuard_enter);
|
|
}
|
|
|
|
bool pseudoMain = Translator::liveFrameIsPseudoMain();
|
|
|
|
emitRB(a, RBTypeTraceletGuards, sk);
|
|
for (DepMap::const_iterator dep = dependencies.begin();
|
|
dep != dependencies.end();
|
|
++dep) {
|
|
if (!pseudoMain || !dep->second->isLocal() || !dep->second->isValue()) {
|
|
checkType(a, dep->first, dep->second->rtt, fail);
|
|
} else {
|
|
TRACE(3, "Skipping tracelet guard for %s %d\n",
|
|
dep->second->location.pretty().c_str(),
|
|
(int)dep->second->rtt.outerType());
|
|
}
|
|
}
|
|
|
|
checkRefs(a, sk, refDeps, fail);
|
|
|
|
if (Trace::moduleEnabled(Trace::stats, 2)) {
|
|
Stats::emitInc(a, Stats::TraceletGuard_execute);
|
|
}
|
|
}
|
|
|
|
|
|
void dumpTranslationInfo(const Tracelet& t, TCA postGuards) {
|
|
if (!debug) return;
|
|
|
|
SrcKey sk = t.m_sk;
|
|
|
|
TRACE(3, "----------------------------------------------\n");
|
|
TRACE(3, " Translating from file %s:%d %s at %p:\n",
|
|
curUnit()->filepath()->data(),
|
|
curUnit()->getLineNumber(sk.offset()),
|
|
curFunc()->name()->data(),
|
|
postGuards);
|
|
TRACE(3, " preconds:\n");
|
|
TRACE(3, " types:\n");
|
|
for (DepMap::const_iterator i = t.m_dependencies.begin();
|
|
i != t.m_dependencies.end(); ++i) {
|
|
TRACE(3, " %-5s\n", i->second->pretty().c_str());
|
|
}
|
|
if (t.m_refDeps.size() != 0) {
|
|
TRACE(3, " refs:\n");
|
|
for (RefDeps::ArMap::const_iterator i = t.m_refDeps.m_arMap.begin();
|
|
i != t.m_refDeps.m_arMap.end();
|
|
++i) {
|
|
TRACE(3, " (ActRec %" PRId64 " : %-5s)\n", i->first,
|
|
i->second.pretty().c_str());
|
|
}
|
|
}
|
|
TRACE(3, " postconds:\n");
|
|
for (ChangeMap::const_iterator i = t.m_changes.begin();
|
|
i != t.m_changes.end(); ++i) {
|
|
TRACE(3, " %-5s\n", i->second->pretty().c_str());
|
|
}
|
|
for (auto ni = t.m_instrStream.first; ni; ni = ni->next) {
|
|
TRACE(3, " %6d: %s\n", ni->source.offset(),
|
|
instrToString(ni->pc()).c_str());
|
|
if (ni->breaksTracelet) break;
|
|
}
|
|
TRACE(3, "----------------------------------------------\n");
|
|
if (Trace::moduleEnabled(Trace::tx64, 5)) {
|
|
// prettyStack() expects to use vmpc(). Leave it in the state we
|
|
// found it since this code is debug-only, and we don't want behavior
|
|
// to vary across the optimized/debug builds.
|
|
PC oldPC = vmpc();
|
|
vmpc() = curUnit()->at(sk.offset());
|
|
TRACE(3, g_vmContext->prettyStack(string(" tx64 ")));
|
|
vmpc() = oldPC;
|
|
TRACE(3, "----------------------------------------------\n");
|
|
}
|
|
}
|
|
|
|
void
|
|
TranslatorX64::translateTracelet(const TranslArgs& args) {
|
|
auto sk = args.m_sk;
|
|
std::unique_ptr<Tracelet> tp = analyze(sk);
|
|
Tracelet& t = *tp;
|
|
m_curTrace = &t;
|
|
Nuller<Tracelet> ctNuller(&m_curTrace);
|
|
|
|
SKTRACE(1, sk, "translateTracelet\n");
|
|
assert(m_srcDB.find(sk));
|
|
|
|
TCA start = a.code.frontier;
|
|
TCA stubStart = astubs.code.frontier;
|
|
TCA counterStart = 0;
|
|
uint8_t counterLen = 0;
|
|
SrcRec& srcRec = *getSrcRec(sk);
|
|
vector<TransBCMapping> bcMapping;
|
|
TransKind transKind = TransInterp;
|
|
|
|
if (!args.m_interp) {
|
|
TranslateTraceletResult result;
|
|
do {
|
|
hhirTraceStart(sk.offset(), t.m_nextSk.offset());
|
|
SKTRACE(1, sk, "retrying irTranslateTracelet\n");
|
|
result = irTranslateTracelet(t, start, stubStart, &bcMapping);
|
|
if (result == Retry) {
|
|
assert(a.code.frontier == start);
|
|
assert(astubs.code.frontier == stubStart);
|
|
}
|
|
} while (result == Retry);
|
|
|
|
if (result == Success) {
|
|
m_irAUsage += (a.code.frontier - start);
|
|
m_irAstubsUsage += (astubs.code.frontier - stubStart);
|
|
transKind = TransNormalIR;
|
|
}
|
|
}
|
|
|
|
if (transKind == TransInterp) {
|
|
assert(m_pendingFixups.size() == 0);
|
|
assert(srcRec.inProgressTailJumps().size() == 0);
|
|
bcMapping.clear();
|
|
|
|
// The whole translation failed; give up on this BB. Since it is not
|
|
// linked into srcDB yet, it is guaranteed not to be reachable.
|
|
// Permanent reset; nothing is reachable yet.
|
|
a.code.frontier = start;
|
|
astubs.code.frontier = stubStart;
|
|
bcMapping.clear();
|
|
// Discard any pending fixups.
|
|
m_pendingFixups.clear();
|
|
srcRec.clearInProgressTailJumps();
|
|
TRACE(1,
|
|
"emitting %d-instr interp request for failed translation\n",
|
|
int(t.m_numOpcodes));
|
|
// Add a counter for the translation if requested
|
|
if (RuntimeOption::EvalJitTransCounters) {
|
|
emitTransCounterInc(a);
|
|
}
|
|
a. jmp(emitServiceReq(REQ_INTERPRET, 2ull, uint64_t(t.m_sk.offset()),
|
|
uint64_t(t.m_numOpcodes)));
|
|
// Fall through.
|
|
}
|
|
|
|
for (uint i = 0; i < m_pendingFixups.size(); i++) {
|
|
TCA tca = m_pendingFixups[i].m_tca;
|
|
assert(isValidCodeAddress(tca));
|
|
m_fixupMap.recordFixup(tca, m_pendingFixups[i].m_fixup);
|
|
}
|
|
m_pendingFixups.clear();
|
|
|
|
addTranslation(TransRec(t.m_sk, curUnit()->md5(), transKind, t, start,
|
|
a.code.frontier - start, stubStart,
|
|
astubs.code.frontier - stubStart,
|
|
counterStart, counterLen,
|
|
bcMapping));
|
|
|
|
recordGdbTranslation(sk, curFunc(), a, start,
|
|
false, false);
|
|
recordGdbTranslation(sk, curFunc(), astubs, stubStart,
|
|
false, false);
|
|
// SrcRec::newTranslation() makes this code reachable. Do this last;
|
|
// otherwise there's some chance of hitting in the reader threads whose
|
|
// metadata is not yet visible.
|
|
TRACE(1, "newTranslation: %p sk: (func %d, bcOff %d)\n",
|
|
start, sk.getFuncId(), sk.offset());
|
|
srcRec.newTranslation(start);
|
|
TRACE(1, "tx64: %zd-byte tracelet\n", a.code.frontier - start);
|
|
if (Trace::moduleEnabledRelease(Trace::tcspace, 1)) {
|
|
Trace::traceRelease(getUsage().c_str());
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Defines functions called by emitGenericReturn, and
|
|
* cgGenericRetDecRefs.
|
|
*/
|
|
void TranslatorX64::emitFreeLocalsHelpers() {
|
|
Label doRelease;
|
|
Label release;
|
|
Label loopHead;
|
|
|
|
/*
|
|
* Note: the IR currently requires that we preserve r13/r14 across
|
|
* calls to these free locals helpers.
|
|
*/
|
|
static_assert(rVmSp == rbx, "");
|
|
auto const rIter = rbx;
|
|
auto const rFinished = r15;
|
|
auto const rType = esi;
|
|
auto const rData = rdi;
|
|
|
|
moveToAlign(a, kNonFallthroughAlign);
|
|
|
|
TRACE(1, "HOTSTUB: freeLocalsHelpers starts %lx\n", uintptr_t(a.code.frontier));
|
|
|
|
asm_label(a, release);
|
|
a. loadq (rIter[TVOFF(m_data)], rData);
|
|
a. cmpl (RefCountStaticValue, rData[FAST_REFCOUNT_OFFSET]);
|
|
jccBlock<CC_Z>(a, [&] {
|
|
a. decl (rData[FAST_REFCOUNT_OFFSET]);
|
|
a. jz8 (doRelease);
|
|
});
|
|
a. ret ();
|
|
asm_label(a, doRelease);
|
|
jumpDestructor(a, PhysReg(rType), rax);
|
|
|
|
moveToAlign(a, kJmpTargetAlign);
|
|
m_freeManyLocalsHelper = a.code.frontier;
|
|
a. lea (rVmFp[-cellsToBytes(kNumFreeLocalsHelpers)], rFinished);
|
|
|
|
auto emitDecLocal = [&] {
|
|
Label skipDecRef;
|
|
|
|
emitLoadTVType(a, rIter[TVOFF(m_type)], rType);
|
|
emitCmpTVType(a, KindOfRefCountThreshold, rType);
|
|
a. jle8 (skipDecRef);
|
|
a. call (release);
|
|
recordIndirectFixup(a.code.frontier, 0);
|
|
asm_label(a, skipDecRef);
|
|
};
|
|
|
|
// Loop for the first few locals, but unroll the final
|
|
// kNumFreeLocalsHelpers.
|
|
asm_label(a, loopHead);
|
|
emitDecLocal();
|
|
a. addq (sizeof(TypedValue), rIter);
|
|
a. cmpq (rIter, rFinished);
|
|
a. jnz8 (loopHead);
|
|
|
|
for (int i = 0; i < kNumFreeLocalsHelpers; ++i) {
|
|
m_freeLocalsHelpers[kNumFreeLocalsHelpers - i - 1] = a.code.frontier;
|
|
TRACE(1, "HOTSTUB: m_freeLocalsHelpers[%d] = %p\n",
|
|
kNumFreeLocalsHelpers - i - 1, a.code.frontier);
|
|
emitDecLocal();
|
|
if (i != kNumFreeLocalsHelpers - 1) {
|
|
a.addq (sizeof(TypedValue), rIter);
|
|
}
|
|
}
|
|
|
|
a. addq (AROFF(m_r) + sizeof(TypedValue), rVmSp);
|
|
a. ret (8);
|
|
|
|
TRACE(1, "STUB freeLocals helpers: %zu bytes\n",
|
|
size_t(a.code.frontier - m_freeManyLocalsHelper));
|
|
}
|
|
|
|
TranslatorX64::TranslatorX64()
|
|
: m_numNativeTrampolines(0),
|
|
m_trampolineSize(0),
|
|
m_defClsHelper(0),
|
|
m_funcPrologueRedispatch(0),
|
|
m_irAUsage(0),
|
|
m_irAstubsUsage(0),
|
|
m_numHHIRTrans(0),
|
|
m_catchTraceMap(128),
|
|
m_curTrace(0),
|
|
m_curNI(0),
|
|
m_curFile(nullptr),
|
|
m_curLine(0),
|
|
m_curFunc(nullptr)
|
|
{
|
|
static const size_t kRoundUp = 2 << 20;
|
|
const size_t kAHotSize = RuntimeOption::VMTranslAHotSize;
|
|
const size_t kASize = RuntimeOption::VMTranslASize;
|
|
const size_t kAStubsSize = RuntimeOption::VMTranslAStubsSize;
|
|
const size_t kGDataSize = RuntimeOption::VMTranslGDataSize;
|
|
m_totalSize = kAHotSize + kASize + kAStubsSize +
|
|
kTrampolinesBlockSize + kGDataSize;
|
|
|
|
TRACE(1, "TranslatorX64@%p startup\n", this);
|
|
tx64 = this;
|
|
|
|
if ((kAHotSize < (2 << 20)) ||
|
|
(kASize < (10 << 20)) ||
|
|
(kAStubsSize < (10 << 20)) ||
|
|
(kGDataSize < (2 << 20))) {
|
|
fprintf(stderr, "Allocation sizes ASize, AStubsSize, and GlobalDataSize "
|
|
"are too small.\n");
|
|
exit(1);
|
|
}
|
|
|
|
if (m_totalSize > (2ul << 30)) {
|
|
fprintf(stderr,"Combined size of ASize, AStubSize, and GlobalDataSize "
|
|
"must be < 2GiB to support 32-bit relative addresses\n");
|
|
exit(1);
|
|
}
|
|
|
|
static bool profileUp = false;
|
|
if (!profileUp) {
|
|
profileInit();
|
|
profileUp = true;
|
|
}
|
|
|
|
auto enhugen = [&](void* base, int numMB) {
|
|
if (RuntimeOption::EvalMapTCHuge) {
|
|
assert((uintptr_t(base) & (kRoundUp - 1)) == 0);
|
|
hintHuge(base, numMB << 20);
|
|
}
|
|
};
|
|
|
|
// We want to ensure that the block for "a", "astubs",
|
|
// "atrampolines", and "m_globalData" are nearby so that we can
|
|
// short jump/point between them. Thus we allocate one slab and
|
|
// divide it between "a", "astubs", and "atrampolines".
|
|
|
|
// Using sbrk to ensure its in the bottom 2G, so we avoid
|
|
// the need for trampolines, and get to use shorter
|
|
// instructions for tc addresses.
|
|
const size_t allocationSize = m_totalSize + kRoundUp - 1;
|
|
uint8_t *base = (uint8_t*)sbrk(allocationSize);
|
|
if (base == (uint8_t*)-1) {
|
|
base = (uint8_t*)low_malloc(allocationSize);
|
|
if (!base) {
|
|
base = (uint8_t*)malloc(allocationSize);
|
|
}
|
|
if (!base) {
|
|
fprintf(stderr, "could not allocate %zd bytes for translation cache\n",
|
|
allocationSize);
|
|
exit(1);
|
|
}
|
|
}
|
|
assert(base);
|
|
base += -(uint64_t)base & (kRoundUp - 1);
|
|
enhugen(base, RuntimeOption::EvalTCNumHugeHotMB);
|
|
TRACE(1, "init atrampolines @%p\n", base);
|
|
atrampolines.init(base, kTrampolinesBlockSize);
|
|
base += kTrampolinesBlockSize;
|
|
|
|
m_unwindRegistrar = register_unwind_region(base, m_totalSize);
|
|
TRACE(1, "init ahot @%p\n", base);
|
|
ahot.init(base, kAHotSize);
|
|
base += kAHotSize;
|
|
TRACE(1, "init a @%p\n", base);
|
|
a.init(base, kASize);
|
|
base += kASize;
|
|
base += -(uint64_t)base & (kRoundUp - 1);
|
|
TRACE(1, "init astubs @%p\n", base);
|
|
astubs.init(base, kAStubsSize);
|
|
enhugen(base, RuntimeOption::EvalTCNumHugeColdMB);
|
|
base += kAStubsSize;
|
|
TRACE(1, "init gdata @%p\n", base);
|
|
m_globalData.init(base, kGDataSize);
|
|
|
|
// put the stubs into ahot, rather than a
|
|
AHotSelector ahs(this, true);
|
|
|
|
// Emit some special helpers that are shared across translations.
|
|
|
|
// Emit a byte of padding. This is a kind of hacky way to
|
|
// avoid hitting an assert in recordGdbStub when we call
|
|
// it with m_callToExit - 1 as the start address.
|
|
astubs.emitNop(1);
|
|
|
|
// Call to exit with whatever value the program leaves on
|
|
// the return stack.
|
|
m_callToExit = emitServiceReq(SRFlags::Align | SRFlags::JmpInsteadOfRet,
|
|
REQ_EXIT, 0ull);
|
|
|
|
/*
|
|
* Helpers for returning from a function where the ActRec was pushed
|
|
* by the interpreter.
|
|
*/
|
|
m_retHelper = emitRetFromInterpretedFrame();
|
|
m_genRetHelper = emitRetFromInterpretedGeneratorFrame();
|
|
|
|
/*
|
|
* Returning from a function where the ActRec was pushed by an
|
|
* inlined call. This is separate from m_retHelper just for
|
|
* debugability---it does the same thing.
|
|
*/
|
|
m_retInlHelper = emitRetFromInterpretedFrame();
|
|
FTRACE(1, "retInlHelper: {}\n", (void*)m_retInlHelper);
|
|
|
|
moveToAlign(astubs);
|
|
m_resumeHelperRet = astubs.code.frontier;
|
|
emitPopRetIntoActRec(astubs);
|
|
m_resumeHelper = astubs.code.frontier;
|
|
emitGetGContext(astubs, rax);
|
|
astubs. load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_fp),
|
|
rVmFp);
|
|
astubs. load_reg64_disp_reg64(rax, offsetof(VMExecutionContext, m_stack) +
|
|
Stack::topOfStackOffset(), rVmSp);
|
|
emitServiceReq(SRFlags::Persistent, REQ_RESUME, 0ull);
|
|
|
|
// Helper for DefCls, in astubs.
|
|
{
|
|
auto& a = astubs;
|
|
if (false) {
|
|
PreClass *preClass = 0;
|
|
defClsHelper(preClass);
|
|
}
|
|
m_defClsHelper = TCA(a.code.frontier);
|
|
PhysReg rEC = argNumToRegName[2];
|
|
emitGetGContext(a, rEC);
|
|
a. storeq (rVmFp, rEC[offsetof(VMExecutionContext, m_fp)]);
|
|
a. storeq (argNumToRegName[1],
|
|
rEC[offsetof(VMExecutionContext, m_pc)]);
|
|
a. storeq (rax, rEC[offsetof(VMExecutionContext, m_stack) +
|
|
Stack::topOfStackOffset()]);
|
|
a. jmp (TCA(defClsHelper));
|
|
}
|
|
|
|
// The decRef helper for when we bring the count down to zero. Callee needs to
|
|
// bring the value into rdi. These can be burned in for all time, and for all
|
|
// translations.
|
|
typedef void* vp;
|
|
|
|
TCA strDtor, arrDtor, objDtor, refDtor;
|
|
strDtor = emitUnaryStub(astubs, Call(getMethodPtr(&StringData::release)));
|
|
arrDtor = emitUnaryStub(astubs, Call(getVTableOffset(&HphpArray::release)));
|
|
objDtor = emitUnaryStub(astubs, Call(getMethodPtr(&ObjectData::release)));
|
|
refDtor = emitUnaryStub(astubs, Call(vp(getMethodPtr(&RefData::release))));
|
|
|
|
m_dtorStubs[typeToDestrIndex(BitwiseKindOfString)] = strDtor;
|
|
m_dtorStubs[typeToDestrIndex(KindOfArray)] = arrDtor;
|
|
m_dtorStubs[typeToDestrIndex(KindOfObject)] = objDtor;
|
|
m_dtorStubs[typeToDestrIndex(KindOfRef)] = refDtor;
|
|
|
|
// Hot helper stubs in A:
|
|
emitGenericDecRefHelpers();
|
|
emitFreeLocalsHelpers();
|
|
m_funcPrologueRedispatch = emitPrologueRedispatch(a);
|
|
TRACE(1, "HOTSTUB: all stubs finished: %lx\n",
|
|
uintptr_t(a.code.frontier));
|
|
|
|
if (trustSigSegv) {
|
|
// Install SIGSEGV handler for timeout exceptions
|
|
struct sigaction sa;
|
|
struct sigaction old_sa;
|
|
sa.sa_sigaction = &TranslatorX64::SEGVHandler;
|
|
sa.sa_flags = SA_SIGINFO;
|
|
sigemptyset(&sa.sa_mask);
|
|
if (sigaction(SIGSEGV, &sa, &old_sa) != 0) {
|
|
throw std::runtime_error(
|
|
std::string("Failed to install SIGSEGV handler: ") +
|
|
strerror(errno));
|
|
}
|
|
m_segvChain = old_sa.sa_flags & SA_SIGINFO ?
|
|
old_sa.sa_sigaction : (sigaction_t)old_sa.sa_handler;
|
|
}
|
|
|
|
moveToAlign(astubs);
|
|
m_stackOverflowHelper = astubs.code.frontier;
|
|
// We are called from emitStackCheck, with the new stack frame in
|
|
// rStashedAR. Get the caller's PC into rdi and save it off.
|
|
astubs. load_reg64_disp_reg64(rVmFp, AROFF(m_func), rax);
|
|
astubs. load_reg64_disp_reg32(rStashedAR, AROFF(m_soff), rdi);
|
|
astubs. load_reg64_disp_reg64(rax, Func::sharedOffset(), rax);
|
|
astubs. load_reg64_disp_reg32(rax, Func::sharedBaseOffset(), rax);
|
|
astubs. add_reg32_reg32(rax, rdi);
|
|
emitEagerVMRegSave(astubs, SaveFP | SavePC);
|
|
emitServiceReq(SRFlags::Persistent, REQ_STACK_OVERFLOW, 0ull);
|
|
}
|
|
|
|
// do gdb specific initialization. This has to happen after
|
|
// the TranslatorX64 constructor is called, because gdb initialization
|
|
// calls backs into TranslatorX64::Get()
|
|
void TranslatorX64::initGdb() {
|
|
// On a backtrace, gdb tries to locate the calling frame at address
|
|
// returnRIP-1. However, for the first VM frame, there is no code at
|
|
// returnRIP-1, since the AR was set up manually. For this frame,
|
|
// record the tracelet address as starting from callToExit-1, so gdb
|
|
// does not barf
|
|
recordGdbStub(astubs, m_callToExit - 1, "HHVM::callToExit");
|
|
|
|
recordBCInstr(OpRetFromInterp, astubs, m_retHelper);
|
|
recordGdbStub(astubs, m_retHelper - 1, "HHVM::retHelper");
|
|
recordBCInstr(OpResumeHelper, astubs, m_resumeHelper);
|
|
recordBCInstr(OpDefClsHelper, astubs, m_defClsHelper);
|
|
recordBCInstr(OpDtorStub, astubs,
|
|
m_dtorStubs[typeToDestrIndex(BitwiseKindOfString)]);
|
|
recordGdbStub(astubs, m_dtorStubs[typeToDestrIndex(BitwiseKindOfString)],
|
|
"HHVM::destructorStub");
|
|
}
|
|
|
|
TranslatorX64*
|
|
TranslatorX64::Get() {
|
|
/*
|
|
* Called from outrageously early, pre-main code, and will
|
|
* allocate the first translator space.
|
|
*/
|
|
if (!nextTx64) {
|
|
nextTx64 = new TranslatorX64();
|
|
nextTx64->initGdb();
|
|
}
|
|
if (!tx64) {
|
|
tx64 = nextTx64;
|
|
}
|
|
assert(tx64);
|
|
return tx64;
|
|
}
|
|
|
|
template<int Arity>
|
|
TCA TranslatorX64::emitNAryStub(X64Assembler& a, Call c) {
|
|
BOOST_STATIC_ASSERT((Arity < kNumRegisterArgs));
|
|
|
|
// The callNAryStub has already saved these regs on a.
|
|
RegSet alreadySaved;
|
|
for (size_t i = 0; i < Arity; ++i) {
|
|
alreadySaved |= RegSet(argNumToRegName[i]);
|
|
}
|
|
|
|
/*
|
|
* We've made a call instruction, and pushed Arity args on the
|
|
* stack. So the stack address will be odd coming into the stub if
|
|
* Arity + 1 (for the call) is odd. We need to correct for this
|
|
* when saving other registers below to keep SSE-friendly alignment
|
|
* of the stack.
|
|
*/
|
|
const int Parity = (Arity + 1) % 2;
|
|
|
|
// These dtor stubs are meant to be called with the call
|
|
// instruction, unlike most translator code.
|
|
moveToAlign(a);
|
|
TCA start = a.code.frontier;
|
|
/*
|
|
* Preserve most caller-saved regs. The calling code has already
|
|
* preserved regs in `alreadySaved'; we push the rest of the caller
|
|
* saved regs and rbp. It should take 9 qwords in total, and the
|
|
* incoming call instruction made it 10. This is an even number of
|
|
* pushes, so we preserve the SSE-friendliness of our execution
|
|
* environment (without real intervention from PhysRegSaverParity).
|
|
*
|
|
* Note that we don't need to clean all registers because the only
|
|
* reason we could need those locations written back is if stack
|
|
* unwinding were to happen. These stubs can re-enter due to user
|
|
* destructors, but exceptions are not allowed to propagate out of
|
|
* those, so it's not a problem.
|
|
*/
|
|
a. push (rbp); // {
|
|
a. movq (rsp, rbp);
|
|
{
|
|
RegSet s = kGPCallerSaved - alreadySaved;
|
|
PhysRegSaverParity rs(Parity, a, s);
|
|
emitCall(a, c);
|
|
}
|
|
a. pop (rbp); // }
|
|
a. ret ();
|
|
return start;
|
|
}
|
|
|
|
TCA TranslatorX64::emitUnaryStub(X64Assembler& a, Call c) {
|
|
return emitNAryStub<1>(a, c);
|
|
}
|
|
|
|
void TranslatorX64::registerCatchTrace(CTCA ip, TCA trace) {
|
|
FTRACE(1, "registerCatchTrace: afterCall: {} trace: {}\n", ip, trace);
|
|
m_catchTraceMap.insert(ip, trace);
|
|
}
|
|
|
|
TCA TranslatorX64::getCatchTrace(CTCA ip) const {
|
|
TCA* found = m_catchTraceMap.find(ip);
|
|
return found ? *found : nullptr;
|
|
}
|
|
|
|
namespace {
|
|
|
|
struct DeferredFileInvalidate : public DeferredWorkItem {
|
|
Eval::PhpFile* m_f;
|
|
explicit DeferredFileInvalidate(Eval::PhpFile* f) : m_f(f) {
|
|
TRACE(2, "DeferredFileInvalidate @ %p, m_f %p\n", this, m_f); }
|
|
void operator()() {
|
|
TRACE(2, "DeferredFileInvalidate: Firing @ %p , m_f %p\n", this, m_f);
|
|
tx64->invalidateFileWork(m_f);
|
|
}
|
|
};
|
|
|
|
struct DeferredPathInvalidate : public DeferredWorkItem {
|
|
const std::string m_path;
|
|
explicit DeferredPathInvalidate(const std::string& path) : m_path(path) {
|
|
assert(m_path.size() >= 1 && m_path[0] == '/');
|
|
}
|
|
void operator()() {
|
|
String spath(m_path);
|
|
/*
|
|
* inotify saw this path change. Now poke the file repository;
|
|
* it will notice the underlying PhpFile* has changed, and notify
|
|
* us via ::invalidateFile.
|
|
*
|
|
* We don't actually need to *do* anything with the PhpFile* from
|
|
* this lookup; since the path has changed, the file we'll get out is
|
|
* going to be some new file, not the old file that needs invalidation.
|
|
*/
|
|
UNUSED Eval::PhpFile* f =
|
|
g_vmContext->lookupPhpFile(spath.get(), "");
|
|
// We don't keep around the extra ref.
|
|
if (f) f->decRefAndDelete();
|
|
}
|
|
};
|
|
|
|
}
|
|
|
|
void
|
|
TranslatorX64::requestInit() {
|
|
TRACE(1, "in requestInit(%" PRId64 ")\n", g_vmContext->m_currentThreadIdx);
|
|
tl_regState = REGSTATE_CLEAN;
|
|
PendQ::drain();
|
|
requestResetHighLevelTranslator();
|
|
Treadmill::startRequest(g_vmContext->m_currentThreadIdx);
|
|
memset(&s_perfCounters, 0, sizeof(s_perfCounters));
|
|
Stats::init();
|
|
}
|
|
|
|
void
|
|
TranslatorX64::requestExit() {
|
|
if (s_writeLease.amOwner()) {
|
|
s_writeLease.drop();
|
|
}
|
|
TRACE_MOD(txlease, 2, "%" PRIx64 " write lease stats: %15" PRId64
|
|
" kept, %15" PRId64 " grabbed\n",
|
|
pthread_self(), s_writeLease.m_hintKept,
|
|
s_writeLease.m_hintGrabbed);
|
|
PendQ::drain();
|
|
Treadmill::finishRequest(g_vmContext->m_currentThreadIdx);
|
|
TRACE(1, "done requestExit(%" PRId64 ")\n", g_vmContext->m_currentThreadIdx);
|
|
Stats::dump();
|
|
Stats::clear();
|
|
|
|
if (Trace::moduleEnabledRelease(Trace::tx64stats, 1)) {
|
|
Trace::traceRelease("TranslatorX64 perf counters for %s:\n",
|
|
g_context->getRequestUrl(50).c_str());
|
|
for (int i = 0; i < tpc_num_counters; i++) {
|
|
Trace::traceRelease("%-20s %10lld\n",
|
|
kPerfCounterNames[i], s_perfCounters[i]);
|
|
}
|
|
Trace::traceRelease("\n");
|
|
}
|
|
}
|
|
|
|
bool
|
|
TranslatorX64::isPseudoEvent(const char* event) {
|
|
for (auto name : kPerfCounterNames) {
|
|
if (!strcmp(event, name)) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void
|
|
TranslatorX64::getPerfCounters(Array& ret) {
|
|
for (int i = 0; i < tpc_num_counters; i++) {
|
|
// Until Perflab can automatically scale the values we give it to
|
|
// an appropriate range, we have to fudge these numbers so they
|
|
// look more like reasonable hardware counter values.
|
|
ret.set(String::FromCStr(kPerfCounterNames[i]),
|
|
s_perfCounters[i] * 1000);
|
|
}
|
|
|
|
if (RuntimeOption::EnableInstructionCounts) {
|
|
auto doCounts = [&](unsigned begin, const char* const name) {
|
|
int64_t count = 0;
|
|
for (; begin < Stats::Instr_InterpOneHighInvalid;
|
|
begin += STATS_PER_OPCODE) {
|
|
count += Stats::tl_counters[Stats::StatCounter(begin)];
|
|
}
|
|
ret.set(String::FromCStr(name), count);
|
|
};
|
|
|
|
doCounts(Stats::Instr_TranslLowInvalid + STATS_PER_OPCODE,
|
|
kInstrCountTx64Name);
|
|
doCounts(Stats::Instr_TranslIRPostLowInvalid + STATS_PER_OPCODE,
|
|
kInstrCountIRName);
|
|
}
|
|
}
|
|
|
|
TranslatorX64::~TranslatorX64() {
|
|
freeSlab(atrampolines.code.base, m_totalSize);
|
|
}
|
|
|
|
static Debug::TCRange rangeFrom(const X64Assembler& a, const TCA addr,
|
|
bool isAstubs) {
|
|
assert(a.code.isValidAddress(addr));
|
|
return Debug::TCRange(addr, a.code.frontier, isAstubs);
|
|
}
|
|
|
|
void TranslatorX64::recordBCInstr(uint32_t op,
|
|
const X64Assembler& a,
|
|
const TCA addr) {
|
|
if (addr != a.code.frontier) {
|
|
m_debugInfo.recordBCInstr(Debug::TCRange(addr, a.code.frontier,
|
|
&a == &astubs ? true : false), op);
|
|
}
|
|
}
|
|
|
|
void TranslatorX64::recordGdbTranslation(SrcKey sk,
|
|
const Func* srcFunc,
|
|
const X64Assembler& a,
|
|
const TCA start,
|
|
bool exit,
|
|
bool inPrologue) {
|
|
if (start != a.code.frontier) {
|
|
assert(s_writeLease.amOwner());
|
|
if (!RuntimeOption::EvalJitNoGdb) {
|
|
m_debugInfo.recordTracelet(rangeFrom(a, start,
|
|
&a == &astubs ? true : false),
|
|
srcFunc,
|
|
srcFunc->unit() ?
|
|
srcFunc->unit()->at(sk.offset()) : nullptr,
|
|
exit, inPrologue);
|
|
}
|
|
if (RuntimeOption::EvalPerfPidMap) {
|
|
m_debugInfo.recordPerfMap(rangeFrom(a, start,
|
|
&a == &astubs ? true : false),
|
|
srcFunc, exit, inPrologue);
|
|
}
|
|
}
|
|
}
|
|
|
|
void TranslatorX64::recordGdbStub(const X64Assembler& a,
|
|
const TCA start, const char* name) {
|
|
if (!RuntimeOption::EvalJitNoGdb) {
|
|
m_debugInfo.recordStub(rangeFrom(a, start, &a == &astubs ? true : false),
|
|
name);
|
|
}
|
|
}
|
|
|
|
size_t TranslatorX64::getCodeSize() {
|
|
return a.code.frontier - a.code.base;
|
|
}
|
|
|
|
size_t TranslatorX64::getStubSize() {
|
|
return astubs.code.frontier - astubs.code.base;
|
|
}
|
|
|
|
size_t TranslatorX64::getTargetCacheSize() {
|
|
return TargetCache::s_frontier;
|
|
}
|
|
|
|
std::string TranslatorX64::getUsage() {
|
|
std::string usage;
|
|
size_t aHotUsage = ahot.code.frontier - ahot.code.base;
|
|
size_t aUsage = a.code.frontier - a.code.base;
|
|
size_t stubsUsage = astubs.code.frontier - astubs.code.base;
|
|
size_t dataUsage = m_globalData.frontier - m_globalData.base;
|
|
size_t tcUsage = TargetCache::s_frontier;
|
|
size_t persistentUsage =
|
|
TargetCache::s_persistent_frontier - TargetCache::s_persistent_start;
|
|
Util::string_printf(
|
|
usage,
|
|
"tx64: %9zd bytes (%" PRId64 "%%) in ahot.code\n"
|
|
"tx64: %9zd bytes (%" PRId64 "%%) in a.code\n"
|
|
"tx64: %9zd bytes (%" PRId64 "%%) in astubs.code\n"
|
|
"tx64: %9zd bytes (%" PRId64 "%%) in a.code from ir\n"
|
|
"tx64: %9zd bytes (%" PRId64 "%%) in astubs.code from ir\n"
|
|
"tx64: %9zd bytes (%" PRId64 "%%) in m_globalData\n"
|
|
"tx64: %9zd bytes (%" PRId64 "%%) in targetCache\n"
|
|
"tx64: %9zd bytes (%" PRId64 "%%) in persistentCache\n",
|
|
aHotUsage, 100 * aHotUsage / ahot.code.size,
|
|
aUsage, 100 * aUsage / a.code.size,
|
|
stubsUsage, 100 * stubsUsage / astubs.code.size,
|
|
m_irAUsage, 100 * m_irAUsage / a.code.size,
|
|
m_irAstubsUsage, 100 * m_irAstubsUsage / astubs.code.size,
|
|
dataUsage, 100 * dataUsage / m_globalData.size,
|
|
tcUsage,
|
|
400 * tcUsage / RuntimeOption::EvalJitTargetCacheSize / 3,
|
|
persistentUsage,
|
|
400 * persistentUsage / RuntimeOption::EvalJitTargetCacheSize);
|
|
return usage;
|
|
}
|
|
|
|
bool TranslatorX64::addDbgGuards(const Unit* unit) {
|
|
// TODO refactor
|
|
// It grabs the write lease and iterating through whole SrcDB...
|
|
bool locked = s_writeLease.acquire(true);
|
|
if (!locked) {
|
|
return false;
|
|
}
|
|
struct timespec tsBegin, tsEnd;
|
|
gettime(CLOCK_MONOTONIC, &tsBegin);
|
|
// Doc says even find _could_ invalidate iterator, in pactice it should
|
|
// be very rare, so go with it now.
|
|
for (SrcDB::iterator it = m_srcDB.begin(); it != m_srcDB.end(); ++it) {
|
|
SrcKey const sk = SrcKey::fromAtomicInt(it->first);
|
|
SrcRec& sr = *it->second;
|
|
if (sr.unitMd5() == unit->md5() &&
|
|
!sr.hasDebuggerGuard() &&
|
|
isSrcKeyInBL(unit, sk)) {
|
|
addDbgGuardImpl(sk, sr);
|
|
}
|
|
}
|
|
s_writeLease.drop();
|
|
gettime(CLOCK_MONOTONIC, &tsEnd);
|
|
int64_t elapsed = gettime_diff_us(tsBegin, tsEnd);
|
|
if (Trace::moduleEnabledRelease(Trace::tx64, 5)) {
|
|
Trace::traceRelease("addDbgGuards got lease for %" PRId64 " us\n", elapsed);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool TranslatorX64::addDbgGuard(const Func* func, Offset offset) {
|
|
SrcKey sk(func, offset);
|
|
{
|
|
if (SrcRec* sr = m_srcDB.find(sk)) {
|
|
if (sr->hasDebuggerGuard()) {
|
|
return true;
|
|
}
|
|
} else {
|
|
// no translation yet
|
|
return true;
|
|
}
|
|
}
|
|
if (debug) {
|
|
if (!isSrcKeyInBL(func->unit(), sk)) {
|
|
TRACE(5, "calling addDbgGuard on PC that is not in blacklist");
|
|
return false;
|
|
}
|
|
}
|
|
bool locked = s_writeLease.acquire(true);
|
|
if (!locked) {
|
|
return false;
|
|
}
|
|
{
|
|
if (SrcRec* sr = m_srcDB.find(sk)) {
|
|
addDbgGuardImpl(sk, *sr);
|
|
}
|
|
}
|
|
s_writeLease.drop();
|
|
return true;
|
|
}
|
|
|
|
void TranslatorX64::addDbgGuardImpl(SrcKey sk, SrcRec& srcRec) {
|
|
TCA dbgGuard = a.code.frontier;
|
|
// Emit the checks for debugger attach
|
|
emitTLSLoad<ThreadInfo>(a, ThreadInfo::s_threadInfo, rAsm);
|
|
static COff dbgOff = offsetof(ThreadInfo, m_reqInjectionData) +
|
|
RequestInjectionData::debuggerReadOnlyOffset();
|
|
a. load_reg64_disp_reg32(rAsm, dbgOff, rAsm);
|
|
a. testb((int8_t)0xff, rbyte(rAsm));
|
|
// Branch to a special REQ_INTERPRET if attached
|
|
{
|
|
TCA fallback = emitServiceReq(REQ_INTERPRET, 2, uint64_t(sk.offset()), 0);
|
|
a. jnz(fallback);
|
|
}
|
|
// Emit a jump to the actual code
|
|
TCA realCode = srcRec.getTopTranslation();
|
|
prepareForSmash(a, kJmpLen);
|
|
TCA dbgBranchGuardSrc = a.code.frontier;
|
|
a. jmp(realCode);
|
|
// Add it to srcRec
|
|
srcRec.addDebuggerGuard(dbgGuard, dbgBranchGuardSrc);
|
|
}
|
|
|
|
bool TranslatorX64::dumpTCCode(const char* filename) {
|
|
string aFilename = string(filename).append("_a");
|
|
string astubFilename = string(filename).append("_astub");
|
|
FILE* aFile = fopen(aFilename.c_str(),"wb");
|
|
if (aFile == nullptr)
|
|
return false;
|
|
FILE* astubFile = fopen(astubFilename.c_str(),"wb");
|
|
if (astubFile == nullptr) {
|
|
fclose(aFile);
|
|
return false;
|
|
}
|
|
string helperAddrFilename = string(filename).append("_helpers_addrs.txt");
|
|
FILE* helperAddrFile = fopen(helperAddrFilename.c_str(),"wb");
|
|
if (helperAddrFile == nullptr) {
|
|
fclose(aFile);
|
|
fclose(astubFile);
|
|
return false;
|
|
}
|
|
// dump starting from the trampolines; this assumes processInit() places
|
|
// trampolines before the translation cache
|
|
size_t count = a.code.frontier-atrampolines.code.base;
|
|
bool result = (fwrite(atrampolines.code.base, 1, count, aFile) == count);
|
|
if (result) {
|
|
count = astubs.code.frontier - astubs.code.base;
|
|
result = (fwrite(astubs.code.base, 1, count, astubFile) == count);
|
|
}
|
|
if (result) {
|
|
for(PointerMap::iterator iter = trampolineMap.begin();
|
|
iter != trampolineMap.end();
|
|
iter++) {
|
|
void* helperAddr = iter->first;
|
|
void* trampAddr = iter->second;
|
|
char* functionName = Util::getNativeFunctionName(helperAddr);
|
|
fprintf(helperAddrFile,"%10p %10p %s\n",
|
|
trampAddr, helperAddr,
|
|
functionName);
|
|
free(functionName);
|
|
}
|
|
}
|
|
fclose(aFile);
|
|
fclose(astubFile);
|
|
fclose(helperAddrFile);
|
|
return result;
|
|
}
|
|
|
|
// Returns true on success
|
|
bool TranslatorX64::dumpTC(bool ignoreLease) {
|
|
if (!ignoreLease && !s_writeLease.acquire(true)) return false;
|
|
bool success = dumpTCData();
|
|
if (success) {
|
|
success = dumpTCCode("/tmp/tc_dump");
|
|
}
|
|
if (!ignoreLease) s_writeLease.drop();
|
|
return success;
|
|
}
|
|
|
|
// Returns true on success
|
|
bool tc_dump(void) {
|
|
return TranslatorX64::Get()->dumpTC();
|
|
}
|
|
|
|
// Returns true on success
|
|
bool TranslatorX64::dumpTCData() {
|
|
gzFile tcDataFile = gzopen("/tmp/tc_data.txt.gz", "w");
|
|
if (!tcDataFile) return false;
|
|
|
|
if (!gzprintf(tcDataFile,
|
|
"repo_schema = %s\n"
|
|
"a.base = %p\n"
|
|
"a.frontier = %p\n"
|
|
"astubs.base = %p\n"
|
|
"astubs.frontier = %p\n\n",
|
|
kRepoSchemaId,
|
|
atrampolines.code.base, a.code.frontier,
|
|
astubs.code.base, astubs.code.frontier)) {
|
|
return false;
|
|
}
|
|
|
|
if (!gzprintf(tcDataFile, "total_translations = %zu\n\n",
|
|
m_translations.size())) {
|
|
return false;
|
|
}
|
|
|
|
for (size_t t = 0; t < m_translations.size(); t++) {
|
|
if (gzputs(tcDataFile,
|
|
m_translations[t].print(getTransCounter(t)).c_str()) == -1) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
gzclose(tcDataFile);
|
|
return true;
|
|
}
|
|
|
|
void TranslatorX64::invalidateSrcKey(SrcKey sk) {
|
|
assert(!RuntimeOption::RepoAuthoritative);
|
|
assert(s_writeLease.amOwner());
|
|
/*
|
|
* Reroute existing translations for SrcKey to an as-yet indeterminate
|
|
* new one.
|
|
*/
|
|
SrcRec* sr = m_srcDB.find(sk);
|
|
assert(sr);
|
|
/*
|
|
* Since previous translations aren't reachable from here, we know we
|
|
* just created some garbage in the TC. We currently have no mechanism
|
|
* to reclaim this.
|
|
*/
|
|
sr->replaceOldTranslations();
|
|
}
|
|
|
|
void TranslatorX64::invalidateFileWork(Eval::PhpFile* f) {
|
|
class FileInvalidationTrigger : public Treadmill::WorkItem {
|
|
Eval::PhpFile* m_f;
|
|
int m_nRefs;
|
|
public:
|
|
FileInvalidationTrigger(Eval::PhpFile* f, int n) : m_f(f), m_nRefs(n) { }
|
|
virtual void operator()() {
|
|
if (m_f->decRef(m_nRefs) == 0) {
|
|
Eval::FileRepository::onDelete(m_f);
|
|
}
|
|
}
|
|
};
|
|
size_t nSmashed = m_srcDB.invalidateCode(f);
|
|
if (nSmashed) {
|
|
// The srcDB found an entry for this file. The entry's dependency
|
|
// on this file was counted as a reference, and the code is no longer
|
|
// reachable. We need to wait until the last outstanding request
|
|
// drains to know that we can really remove the reference.
|
|
Treadmill::WorkItem::enqueue(new FileInvalidationTrigger(f, nSmashed));
|
|
}
|
|
}
|
|
|
|
bool TranslatorX64::invalidateFile(Eval::PhpFile* f) {
|
|
// This is called from high rank, but we'll need the write lease to
|
|
// invalidate code.
|
|
if (!RuntimeOption::EvalJit) return false;
|
|
assert(f != nullptr);
|
|
PendQ::defer(new DeferredFileInvalidate(f));
|
|
return true;
|
|
}
|
|
|
|
} // HPHP::Transl
|
|
|
|
static const Trace::Module TRACEMOD = Trace::tx64;
|
|
|
|
void invalidatePath(const std::string& path) {
|
|
TRACE(1, "invalidatePath: abspath %s\n", path.c_str());
|
|
PendQ::defer(new DeferredPathInvalidate(path));
|
|
}
|
|
|
|
} // HPHP::VM
|