Adding floor and ceil to interpreter/jit

Added floor and ceil to the interp and jit, also fixed some type conversion simplifications.
Esse commit está contido em:
Paul Bissonnette
2013-07-03 09:36:36 -07:00
commit de Sara Golemon
commit d63459b8e5
20 arquivos alterados com 279 adições e 7 exclusões
+14
Ver Arquivo
@@ -3171,6 +3171,20 @@ bool EmitterVisitor::visitImpl(ConstructPtr node) {
e.Strlen();
return true;
}
} else if (call->isCallToFunction("floor")) {
if (params && params->getCount() == 1) {
visit((*params)[0]);
emitConvertToCell(e);
e.Floor();
return true;
}
} else if (call->isCallToFunction("ceil")) {
if (params && params->getCount() == 1) {
visit((*params)[0]);
emitConvertToCell(e);
e.Ceil();
return true;
}
} else if (call->isCallToFunction("define")) {
if (params && params->getCount() == 2) {
ExpressionPtr p0 = (*params)[0];
+10
Ver Arquivo
@@ -1058,6 +1058,16 @@ Shr [C C] -> [C:Int]
Shift right (>>). Pushes ((int)$2 >> (int)$1) onto the stack. This
instruction never throws a fatal error.
Floor [C] -> [C:Dbl]
Round $1 to nearest integer value not greater than $1. Converts $1 to
numeric as appropriate and then takes floor of resulting numeric value.
Ceil [C] -> [C:Dbl]
Round $1 to nearest integer value not less than $1. Converts $1 to numeric
as appropriate and then takes ceil of resulting numeric value.
CastBool [C] -> [C:Bool]
Cast to boolean ((bool),(boolean)). Pushes (bool)$1 onto the stack.
+6
Ver Arquivo
@@ -477,6 +477,8 @@ D:Int = OpBitOr S0:Int S1:Int
D:Int = OpBitXor S0:Int S1:Int
D:Int = OpShl S0:Int S1:Int
D:Int = OpShr S0:Int S1:Int
D:Dbl = OpFloor S0:Dbl
D:Dbl = OpCeil S0:Dbl
Integer/boolean arithmetic. Performs the operation described by the
opcode name on S0 and S1, and puts the result in D.
@@ -491,6 +493,10 @@ D:Int = OpShr S0:Int S1:Int
Note that OpShr is an arithmetic right shift.
OpFloor and OpCeil will return an integral value not greater, or not less
than their input respectively. Their use requires SSE 4.1, availability
should be checked before they are emitted.
D:Bool = OpLogicXor S0:Bool S1:Bool
Logical XOR of the two sources. (Note that && and || do not have
+1
Ver Arquivo
@@ -485,6 +485,7 @@ private:
Offset offset, Ref* r1, TypedValue* val, TypedValue* key);
void jmpSurpriseCheck(Offset o);
template<Op op> void jmpOpImpl(PC& pc);
template<class Op> void roundOpImpl(Op op);
#define O(name, imm, pusph, pop, flags) \
void iop##name(PC& pc);
OPCODES
+18
Ver Arquivo
@@ -6825,6 +6825,24 @@ inline void OPTBLD_INLINE VMExecutionContext::iopContHandle(PC& pc) {
throw exn.asObjRef();
}
template<class Op>
inline void OPTBLD_INLINE VMExecutionContext::roundOpImpl(Op op) {
TypedValue* val = m_stack.topTV();
tvCastToDoubleInPlace(val);
val->m_data.dbl = op(val->m_data.dbl);
}
inline void OPTBLD_INLINE VMExecutionContext::iopFloor(PC& pc) {
NEXT();
roundOpImpl(floor);
}
inline void OPTBLD_INLINE VMExecutionContext::iopCeil(PC& pc) {
NEXT();
roundOpImpl(ceil);
}
inline void OPTBLD_INLINE VMExecutionContext::iopStrlen(PC& pc) {
NEXT();
TypedValue* subj = m_stack.topTV();
+2
Ver Arquivo
@@ -570,6 +570,8 @@ enum SetOpOp {
O(Strlen, NA, ONE(CV), ONE(CV), NF) \
O(IncStat, TWO(IVA,IVA), NOV, NOV, NF) \
O(ArrayIdx, NA, THREE(CV,CV,CV), ONE(CV), NF) \
O(Floor, NA, ONE(CV), ONE(CV), NF) \
O(Ceil, NA, ONE(CV), ONE(CV), NF) \
O(HighInvalid, NA, NOV, NOV, NF) \
enum class Op : uint8_t {
+20
Ver Arquivo
@@ -1385,6 +1385,26 @@ bool CodeGenerator::emitDec(SSATmp* dst, SSATmp* src1, SSATmp* src2) {
return emitIncDecHelper(dst, src1, src2, &Asm::decq);
}
void CodeGenerator::cgRoundCommon(IRInstruction* inst, RoundDirection dir) {
auto dst = inst->dst();
auto src = inst->src(0);
auto dstReg = m_regs[dst].reg();
auto inReg = prepXMMReg(src, m_as, m_regs, rCgXMM0);
auto outReg = dstReg.isXMM() ? dstReg : PhysReg(rCgXMM1);
m_as. roundsd (dir, inReg, outReg);
emitMovRegReg(m_as, outReg, dstReg);
}
void CodeGenerator::cgOpFloor(IRInstruction* inst) {
cgRoundCommon(inst, RoundDirection::floor);
}
void CodeGenerator::cgOpCeil(IRInstruction* inst) {
cgRoundCommon(inst, RoundDirection::ceil);
}
void CodeGenerator::cgOpAdd(IRInstruction* inst) {
SSATmp* dst = inst->dst();
SSATmp* src1 = inst->src(0);
+2
Ver Arquivo
@@ -206,6 +206,8 @@ private:
enum Commutativity { Commutative, NonCommutative };
void cgRoundCommon(IRInstruction* inst, RoundDirection dir);
template<class Oper, class RegType>
void cgBinaryOp(IRInstruction*,
void (Asm::*intImm)(Immed, RegType),
+25
Ver Arquivo
@@ -15,6 +15,7 @@
*/
#include "hphp/runtime/vm/jit/hhbc-translator.h"
#include "folly/CpuId.h"
#include "hphp/util/trace.h"
#include "hphp/runtime/ext/ext_closure.h"
#include "hphp/runtime/ext/ext_continuation.h"
@@ -3319,6 +3320,30 @@ void HhbcTranslator::emitNot() {
gen(DecRef, src);
}
void HhbcTranslator::emitFloor() {
// need SSE 4.1 support to use roundsd
if (!folly::CpuId().sse41()) {
PUNT(Floor);
}
auto val = popC();
auto dblVal = gen(ConvCellToDbl, val);
gen(DecRef, val);
push(gen(OpFloor, dblVal));
}
void HhbcTranslator::emitCeil() {
// need SSE 4.1 support to use roundsd
if (!folly::CpuId().sse41()) {
PUNT(Ceil);
}
auto val = popC();
auto dblVal = gen(ConvCellToDbl, val);
gen(DecRef, val);
push(gen(OpCeil, dblVal));
}
#define BINOP(Opp) \
void HhbcTranslator::emit ## Opp() { \
emitBinaryArith(Op ## Opp); \
+3
Ver Arquivo
@@ -329,6 +329,9 @@ struct HhbcTranslator {
void emitRetC(bool freeInline);
void emitRetV(bool freeInline);
void emitFloor();
void emitCeil();
// binary arithmetic ops
void emitAdd();
void emitSub();
+10
Ver Arquivo
@@ -439,6 +439,16 @@ IRTranslator::translateAddElemC(const NormalizedInstruction& i) {
HHIR_EMIT(AddElemC);
}
void
IRTranslator::translateFloor(const NormalizedInstruction& i) {
HHIR_EMIT(Floor);
}
void
IRTranslator::translateCeil(const NormalizedInstruction& i) {
HHIR_EMIT(Ceil);
}
void
IRTranslator::translateAddNewElemC(const NormalizedInstruction& i) {
assert(i.inputs.size() == 2);
+2
Ver Arquivo
@@ -272,6 +272,8 @@ O(OpEq, D(Bool), S(Gen) S(Gen), C|N) \
O(OpNeq, D(Bool), S(Gen) S(Gen), C|N) \
O(OpSame, D(Bool), S(Gen) S(Gen), C|N) \
O(OpNSame, D(Bool), S(Gen) S(Gen), C|N) \
O(OpFloor, D(Dbl), S(Dbl), C) \
O(OpCeil, D(Dbl), S(Dbl), C) \
O(InstanceOfBitmask, D(Bool), S(Cls) CStr, C) \
O(NInstanceOfBitmask, D(Bool), S(Cls) CStr, C) \
O(IsType, D(Bool), S(Cell), C) \
+27
Ver Arquivo
@@ -348,6 +348,8 @@ SSATmp* Simplifier::simplify(IRInstruction* inst) {
case ConvCellToBool:return simplifyConvCellToBool(inst);
case ConvCellToInt: return simplifyConvCellToInt(inst);
case ConvCellToDbl: return simplifyConvCellToDbl(inst);
case OpFloor: return simplifyFloor(inst);
case OpCeil: return simplifyCeil(inst);
case Unbox: return simplifyUnbox(inst);
case UnboxPtr: return simplifyUnboxPtr(inst);
case IsType:
@@ -1425,6 +1427,7 @@ SSATmp* Simplifier::simplifyConvDblToBool(IRInstruction* inst) {
if (src->isConst()) {
return cns(bool(src->getValDbl()));
}
return nullptr;
}
@@ -1608,6 +1611,30 @@ SSATmp* Simplifier::simplifyConvCellToDbl(IRInstruction* inst) {
return nullptr;
}
template<class Oper>
SSATmp* Simplifier::simplifyRoundCommon(IRInstruction* inst, Oper op) {
auto const src = inst->src(0);
if (src->isConst()) {
return cns(op(src->getValDbl()));
}
auto srcInst = src->inst();
if (srcInst->op() == ConvIntToDbl || srcInst->op() == ConvBoolToDbl) {
return src;
}
return nullptr;
}
SSATmp* Simplifier::simplifyFloor(IRInstruction* inst) {
return simplifyRoundCommon(inst, floor);
}
SSATmp* Simplifier::simplifyCeil(IRInstruction* inst) {
return simplifyRoundCommon(inst, ceil);
}
SSATmp* Simplifier::simplifyLdClsPropAddr(IRInstruction* inst) {
SSATmp* propName = inst->src(1);
if (!propName->isConst()) return nullptr;
+3
Ver Arquivo
@@ -107,6 +107,8 @@ private:
SSATmp* simplifyConvCellToBool(IRInstruction*);
SSATmp* simplifyConvCellToInt(IRInstruction*);
SSATmp* simplifyConvCellToDbl(IRInstruction*);
SSATmp* simplifyFloor(IRInstruction*);
SSATmp* simplifyCeil(IRInstruction*);
SSATmp* simplifyUnbox(IRInstruction*);
SSATmp* simplifyUnboxPtr(IRInstruction*);
SSATmp* simplifyCheckInit(IRInstruction* inst);
@@ -141,6 +143,7 @@ private:
template<class Oper>
SSATmp* simplifyShift(SSATmp* src1, SSATmp* src2, Oper op);
template<class Oper> SSATmp* simplifyRoundCommon(IRInstruction*, Oper);
private: // tracebuilder forwarders
template<class... Args> SSATmp* cns(Args&&...);
+3 -1
Ver Arquivo
@@ -167,7 +167,9 @@
CASE(DecodeCufIter) \
CASE(Shl) \
CASE(Shr) \
CASE(Div)
CASE(Div) \
CASE(Floor) \
CASE(Ceil) \
// These are instruction-like functions which cover more than one
// opcode.
+2
Ver Arquivo
@@ -1261,6 +1261,8 @@ static const struct {
{ OpStrlen, {Stack1, Stack1, OutStrlen, 0 }},
{ OpIncStat, {None, None, OutNone, 0 }},
{ OpArrayIdx, {StackTop3, Stack1, OutUnknown, -2 }},
{ OpFloor, {Stack1, Stack1, OutDouble, 0 }},
{ OpCeil, {Stack1, Stack1, OutDouble, 0 }},
/*** 14. Continuation instructions ***/
+56
Ver Arquivo
@@ -0,0 +1,56 @@
<?php
function main($v1, $v2, $v3, $v4, $v5, $v6) {
$a = floor($v1);
$b = ceil($v1);
$c = floor($v2);
$d = ceil($v2);
$e = floor(5.5);
$f = floor(-5.5);
$g = ceil(5.5);
$h = ceil(-5.5);
$i = floor(2);
$j = ceil(2);
$k = floor(-2);
$l = ceil(-2);
$m = floor(0);
$n = ceil(0);
$o = floor(0.0);
$p = ceil(-0.0);
$q = floor($v3);
$r = ceil($v3);
$s = floor($v4);
$t = ceil($v4);
$u = floor($v5);
$v = ceil($v5);
$w = floor($v6);
$x = ceil($v6);
var_dump($a);
var_dump($b);
var_dump($c);
var_dump($d);
var_dump($e);
var_dump($f);
var_dump($g);
var_dump($h);
var_dump($i);
var_dump($j);
var_dump($k);
var_dump($l);
var_dump($m);
var_dump($n);
var_dump($o);
var_dump($p);
var_dump($q);
var_dump($r);
var_dump($s);
var_dump($t);
var_dump($u);
var_dump($v);
var_dump($w);
var_dump($x);
}
main(3, -3, 3.5, -3.5, 0, 0.0);
+24
Ver Arquivo
@@ -0,0 +1,24 @@
float(3)
float(3)
float(-3)
float(-3)
float(5)
float(-6)
float(6)
float(-5)
float(2)
float(2)
float(-2)
float(-2)
float(0)
float(0)
float(0)
float(0)
float(3)
float(4)
float(-4)
float(-3)
float(0)
float(0)
float(0)
float(0)
+32 -6
Ver Arquivo
@@ -678,6 +678,8 @@ enum instrFlags {
IF_66PREFIXED = 0x4000, // instruction requires a manditory 0x66 prefix
IF_F3PREFIXED = 0x8000, // instruction requires a manditory 0xf3 prefix
IF_F2PREFIXED = 0x10000, // instruction requires a manditory 0xf2 prefix
IF_THREEBYTEOP = 0x20000, // instruction requires a 0x0F 0x3A prefix
IF_ROUND = 0x40000, // instruction is round(sp)d
};
/*
@@ -773,6 +775,14 @@ const X64Instr instr_nop = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x90 }, 0x0500 };
const X64Instr instr_shld = { { 0xA5,0xF1,0xA4,0x00,0xF1,0xF1 }, 0x0082 };
const X64Instr instr_shrd = { { 0xAD,0xF1,0xAC,0x00,0xF1,0xF1 }, 0x0082 };
const X64Instr instr_int3 = { { 0xF1,0xF1,0xF1,0x00,0xF1,0xCC }, 0x0500 };
const X64Instr instr_roundsd { { 0xF1,0xF1,0x0b,0x00,0xF1,0xF1 }, 0x64112 };
enum class RoundDirection : ssize_t {
nearest = 0,
floor = 1,
ceil = 2,
truncate = 3,
};
enum ConditionCode {
CC_None = -1,
@@ -1185,6 +1195,10 @@ public:
void shlq (Reg64 r) { instrR(instr_shl, r); }
void sarq (Reg64 r) { instrR(instr_sar, r); }
void roundsd (RoundDirection d, RegXMM src, RegXMM dst) {
emitIRR(instr_roundsd, rn(dst), rn(src), ssize_t(d));
}
/*
* Control-flow directives. Primitive labeling/patching facilities
* are available, as well as slightly higher-level ones via the
@@ -1645,19 +1659,31 @@ public:
int r2 = int(rn2);
bool reverse = ((op.flags & IF_REVERSE) != 0);
// Opsize prefix
prefixBytes(0, opSz);
prefixBytes(op.flags, opSz);
// REX
unsigned char rex = 0;
if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) rex |= 8;
bool highByteReg = false;
if (opSz == sz::byte || (op.flags & IF_BYTEREG)) {
if (byteRegNeedsRex(r1) ||
(!(op.flags & IF_BYTEREG) && byteRegNeedsRex(r2))) {
rex |= 0x40;
}
r1 = byteRegEncodeNumber(r1, highByteReg);
r2 = byteRegEncodeNumber(r2, highByteReg);
}
if (r1 & 8) rex |= (reverse ? 1 : 4);
if (r2 & 8) rex |= (reverse ? 4 : 1);
if (rex) byte(0x40 | rex);
if (rex) {
byte(0x40 | rex);
if (highByteReg) byteRegMisuse();
}
// Determine the size of the immediate
int immSize = computeImmediateSize(op, imm, opSz);
// Use 2-byte opcode for cmovcc, setcc, movsx, movzx, movsx8, movzx8
// instructions
if ((op.flags & IF_TWOBYTEOP) != 0) byte(0x0F);
int opcode = (immSize == sz::byte && opSz != sz::byte) ?
if (op.flags & IF_TWOBYTEOP || op.flags & IF_THREEBYTEOP) byte(0x0F);
if (op.flags & IF_THREEBYTEOP) byte(0x3a);
int opcode = (immSize == sz::byte && opSz != sz::byte &&
(op.flags & IF_ROUND) == 0) ?
(op.table[2] | 2) : op.table[2];
byte(opcode);
if (reverse) {
+19
Ver Arquivo
@@ -724,6 +724,25 @@ sar %cl,%r8
)");
}
TEST(Asm, FloatRounding) {
if (folly::CpuId().sse41()) {
Asm a;
a.init(10 << 24);
a. roundsd(RoundDirection::nearest, xmm1, xmm2);
a. roundsd(RoundDirection::floor, xmm2, xmm4);
a. roundsd(RoundDirection::ceil, xmm8, xmm7);
a. roundsd(RoundDirection::truncate, xmm12, xmm9);
expect_asm(a, R"(
roundsd $0x0,%xmm1,%xmm2
roundsd $0x1,%xmm2,%xmm4
roundsd $0x2,%xmm8,%xmm7
roundsd $0x3,%xmm12,%xmm9
)");
}
}
TEST(Asm, SSEDivision) {
Asm a;
a.init(10 << 24);