Arquivos
hhvm/hphp/runtime/vm/jit/memcpy-x64.cpp
T
Tim Starling 998951619f update copyright date
We did not intend to imply our copyrights last forever

Closes #759
2013-06-03 12:43:56 -07:00

94 linhas
2.9 KiB
C++

/*
+----------------------------------------------------------------------+
| HipHop for PHP |
+----------------------------------------------------------------------+
| Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
*/
#ifdef __x86_64__
#include <emmintrin.h>
#include <stdint.h>
#include "hphp/util/util.h"
#include "hphp/util/assertions.h"
extern "C" {
HOT_FUNC
void*
memcpy(void* vdest, const void* vsrc, size_t len) {
auto src = (const char*)vsrc;
auto dest = (char*) vdest;
// We do a lot of small memcpy's, so we don't want to neglect their
// performance. For smaller-than-a-cacheline entities, just use
// regular loads and stores.
if (len & 0x3f) {
unsigned nBytes = len & 7;
switch (nBytes) {
case 7: dest[6] = src[6];
case 6: dest[5] = src[5];
case 5: dest[4] = src[4];
case 4: dest[3] = src[3];
case 3: dest[2] = src[2];
case 2: dest[1] = src[1];
case 1: dest[0] = src[0];
case 0: break;
}
len -= nBytes;
dest += nBytes;
src += nBytes;
assert((len & 7) == 0);
unsigned nQwordBytes = len & 0x3f;
auto qdest = (uint64_t*)dest;
auto qsrc = (const uint64_t*)src;
unsigned nQwords = nQwordBytes >> 3;
switch (nQwords) {
case 7: qdest[6] = qsrc[6];
case 6: qdest[5] = qsrc[5];
case 5: qdest[4] = qsrc[4];
case 4: qdest[3] = qsrc[3];
case 3: qdest[2] = qsrc[2];
case 2: qdest[1] = qsrc[1];
case 1: qdest[0] = qsrc[0];
case 0: break;
}
len -= nQwordBytes;
dest += nQwordBytes;
src += nQwordBytes;
}
// Do the bulk with fat loads/stores.
assert((len & 0x3f) == 0);
while (len) {
auto dqdest = (__m128i*)dest;
auto dqsrc = (__m128i*)src;
__m128i xmm0 = _mm_loadu_si128(dqsrc + 0);
__m128i xmm1 = _mm_loadu_si128(dqsrc + 1);
__m128i xmm2 = _mm_loadu_si128(dqsrc + 2);
__m128i xmm3 = _mm_loadu_si128(dqsrc + 3);
len -= 64;
dest += 64;
src += 64;
_mm_storeu_si128(dqdest + 0, xmm0);
_mm_storeu_si128(dqdest + 1, xmm1);
_mm_storeu_si128(dqdest + 2, xmm2);
_mm_storeu_si128(dqdest + 3, xmm3);
}
return vdest;
}
}
#endif // __x86_64__