2a793e36dc
Dumps information about allocation events in smart allocator, and aggregates some information such as bytes allocated per callsite in the current request (or across requests). This information can be gathered in the heaptrace command, and the information collected during the trace is enough to find how much memory an object is responsible for (i.e. keeping reachable). It's all conditionally compiled because perflab showed a pretty significant instruction regression if it was a runtime option. You'll need to fbconfig with --extra-cxxflags=-DMEMORY_PROFILING in order to use it. This isn't done, but I wanted to get some feedback on where I should take this (general design, what to log, where to dump stuff, etc)
481 linhas
15 KiB
C++
481 linhas
15 KiB
C++
/*
|
|
+----------------------------------------------------------------------+
|
|
| HipHop for PHP |
|
|
+----------------------------------------------------------------------+
|
|
| Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) |
|
|
| Copyright (c) 1998-2010 Zend Technologies Ltd. (http://www.zend.com) |
|
|
+----------------------------------------------------------------------+
|
|
| This source file is subject to version 2.00 of the Zend license, |
|
|
| that is bundled with this package in the file LICENSE, and is |
|
|
| available through the world-wide-web at the following url: |
|
|
| http://www.zend.com/license/2_00.txt. |
|
|
| If you did not receive a copy of the Zend license and are unable to |
|
|
| obtain it through the world-wide-web, please send a note to |
|
|
| license@zend.com so we can mail you a copy immediately. |
|
|
+----------------------------------------------------------------------+
|
|
*/
|
|
|
|
#include "hphp/runtime/base/memory_manager.h"
|
|
|
|
// Get SIZE_MAX definition. Do this before including any other files, to make
|
|
// sure that this is the first place that stdint.h is included.
|
|
#ifndef __STDC_LIMIT_MACROS
|
|
#define __STDC_LIMIT_MACROS
|
|
#endif
|
|
#define __STDC_LIMIT_MACROS
|
|
|
|
#include "hphp/runtime/base/smart_allocator.h"
|
|
#include "hphp/runtime/base/leak_detectable.h"
|
|
#include "hphp/runtime/base/sweepable.h"
|
|
#include "hphp/runtime/base/memory_profile.h"
|
|
#include "hphp/runtime/base/builtin_functions.h"
|
|
#include "hphp/runtime/base/runtime_option.h"
|
|
#include "hphp/runtime/server/http_server.h"
|
|
#include "hphp/util/alloc.h"
|
|
#include "hphp/util/process.h"
|
|
#include "hphp/util/trace.h"
|
|
|
|
#include <stdint.h>
|
|
|
|
namespace HPHP {
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
TRACE_SET_MOD(smartalloc);
|
|
#ifdef USE_JEMALLOC
|
|
bool MemoryManager::s_statsEnabled = false;
|
|
size_t MemoryManager::s_cactiveLimitCeiling = 0;
|
|
|
|
static size_t threadAllocatedpMib[2];
|
|
static size_t threadDeallocatedpMib[2];
|
|
static size_t statsCactiveMib[2];
|
|
static pthread_once_t threadStatsOnce = PTHREAD_ONCE_INIT;
|
|
static void threadStatsInit() {
|
|
if (!mallctlnametomib) return;
|
|
size_t miblen = sizeof(threadAllocatedpMib) / sizeof(size_t);
|
|
if (mallctlnametomib("thread.allocatedp", threadAllocatedpMib, &miblen)) {
|
|
return;
|
|
}
|
|
miblen = sizeof(threadDeallocatedpMib) / sizeof(size_t);
|
|
if (mallctlnametomib("thread.deallocatedp", threadDeallocatedpMib, &miblen)) {
|
|
return;
|
|
}
|
|
miblen = sizeof(statsCactiveMib) / sizeof(size_t);
|
|
if (mallctlnametomib("stats.cactive", statsCactiveMib, &miblen)) {
|
|
return;
|
|
}
|
|
MemoryManager::s_statsEnabled = true;
|
|
|
|
// In threadStats() we wish to solve for cactiveLimit in:
|
|
//
|
|
// footprint + cactiveLimit + headRoom == MemTotal
|
|
//
|
|
// However, headRoom comes from RuntimeOption::ServerMemoryHeadRoom, which
|
|
// isn't initialized until after the code here runs. Therefore, compute
|
|
// s_cactiveLimitCeiling here in order to amortize the cost of introspecting
|
|
// footprint and MemTotal.
|
|
//
|
|
// cactiveLimit == (MemTotal - footprint) - headRoom
|
|
//
|
|
// cactiveLimit == s_cactiveLimitCeiling - headRoom
|
|
// where
|
|
// s_cactiveLimitCeiling == MemTotal - footprint
|
|
size_t footprint = Process::GetCodeFootprint(Process::GetProcessId());
|
|
size_t MemTotal = 0;
|
|
#ifndef __APPLE__
|
|
size_t pageSize = size_t(sysconf(_SC_PAGESIZE));
|
|
MemTotal = size_t(sysconf(_SC_PHYS_PAGES)) * pageSize;
|
|
#else
|
|
int mib[2] = { CTL_HW, HW_MEMSIZE };
|
|
u_int namelen = sizeof(mib) / sizeof(mib[0]);
|
|
size_t len = sizeof(MemTotal);
|
|
sysctl(mib, namelen, &MemTotal, &len, nullptr, 0);
|
|
#endif
|
|
if (MemTotal > footprint) {
|
|
MemoryManager::s_cactiveLimitCeiling = MemTotal - footprint;
|
|
}
|
|
}
|
|
|
|
static inline void threadStats(uint64_t*& allocated, uint64_t*& deallocated,
|
|
size_t*& cactive, size_t& cactiveLimit) {
|
|
pthread_once(&threadStatsOnce, threadStatsInit);
|
|
if (!MemoryManager::s_statsEnabled) return;
|
|
|
|
size_t len = sizeof(allocated);
|
|
if (mallctlbymib(threadAllocatedpMib,
|
|
sizeof(threadAllocatedpMib) / sizeof(size_t),
|
|
&allocated, &len, nullptr, 0)) {
|
|
not_reached();
|
|
}
|
|
|
|
len = sizeof(deallocated);
|
|
if (mallctlbymib(threadDeallocatedpMib,
|
|
sizeof(threadDeallocatedpMib) / sizeof(size_t),
|
|
&deallocated, &len, nullptr, 0)) {
|
|
not_reached();
|
|
}
|
|
|
|
len = sizeof(cactive);
|
|
if (mallctlbymib(statsCactiveMib,
|
|
sizeof(statsCactiveMib) / sizeof(size_t),
|
|
&cactive, &len, nullptr, 0)) {
|
|
not_reached();
|
|
}
|
|
|
|
size_t headRoom = RuntimeOption::ServerMemoryHeadRoom;
|
|
// Compute cactiveLimit based on s_cactiveLimitCeiling, as computed in
|
|
// threadStatsInit().
|
|
if (headRoom != 0 && headRoom < MemoryManager::s_cactiveLimitCeiling) {
|
|
cactiveLimit = MemoryManager::s_cactiveLimitCeiling - headRoom;
|
|
} else {
|
|
cactiveLimit = SIZE_MAX;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
static void* MemoryManagerInit() {
|
|
// We store the free list pointers right at the start of each object,
|
|
// overlapping SmartHeader.data, and we also clobber _count as a
|
|
// free-object flag when the object is deallocated.
|
|
// This assert just makes sure they don't overflow.
|
|
static_assert(FAST_REFCOUNT_OFFSET + sizeof(int) <=
|
|
SmartAllocatorImpl::MinItemSize,
|
|
"MinItemSize is too small");
|
|
MemoryManager::TlsWrapper tls;
|
|
return (void*)tls.getNoCheck;
|
|
}
|
|
|
|
void* MemoryManager::TlsInitSetup = MemoryManagerInit();
|
|
|
|
void MemoryManager::Create(void* storage) {
|
|
new (storage) MemoryManager();
|
|
}
|
|
|
|
void MemoryManager::Delete(MemoryManager* mm) {
|
|
mm->~MemoryManager();
|
|
}
|
|
|
|
void MemoryManager::OnThreadExit(MemoryManager* mm) {
|
|
mm->~MemoryManager();
|
|
}
|
|
|
|
MemoryManager::AllocIterator::AllocIterator(const MemoryManager* mman)
|
|
: m_mman(*mman)
|
|
, m_it(m_mman.m_smartAllocators.begin())
|
|
{}
|
|
|
|
SmartAllocatorImpl*
|
|
MemoryManager::AllocIterator::current() const {
|
|
return m_it == m_mman.m_smartAllocators.end() ? 0 : *m_it;
|
|
}
|
|
|
|
void MemoryManager::AllocIterator::next() {
|
|
++m_it;
|
|
}
|
|
|
|
MemoryManager::MemoryManager() : m_front(0), m_limit(0),
|
|
m_enabled(RuntimeOption::EnableMemoryManager) {
|
|
#ifdef USE_JEMALLOC
|
|
threadStats(m_allocated, m_deallocated, m_cactive, m_cactiveLimit);
|
|
#endif
|
|
resetStats();
|
|
m_stats.maxBytes = INT64_MAX;
|
|
// make the circular-lists empty.
|
|
m_sweep.next = m_sweep.prev = &m_sweep;
|
|
m_strings.next = m_strings.prev = &m_strings;
|
|
}
|
|
|
|
void MemoryManager::resetStats() {
|
|
m_stats.usage = 0;
|
|
m_stats.alloc = 0;
|
|
m_stats.peakUsage = 0;
|
|
m_stats.peakAlloc = 0;
|
|
m_stats.totalAlloc = 0;
|
|
#ifdef USE_JEMALLOC
|
|
if (s_statsEnabled) {
|
|
m_stats.jemallocDebt = 0;
|
|
m_prevAllocated = int64_t(*m_allocated);
|
|
m_delta = m_prevAllocated - int64_t(*m_deallocated);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
NEVER_INLINE
|
|
void MemoryManager::refreshStatsHelper() {
|
|
refreshStats();
|
|
}
|
|
|
|
void MemoryManager::refreshStatsHelperExceeded() {
|
|
ThreadInfo* info = ThreadInfo::s_threadInfo.getNoCheck();
|
|
info->m_reqInjectionData.setMemExceededFlag();
|
|
}
|
|
|
|
#ifdef USE_JEMALLOC
|
|
void MemoryManager::refreshStatsHelperStop() {
|
|
HttpServer::Server->stop();
|
|
// Increase the limit to the maximum possible value, so that this method
|
|
// won't be called again.
|
|
m_cactiveLimit = SIZE_MAX;
|
|
}
|
|
#endif
|
|
|
|
void MemoryManager::add(SmartAllocatorImpl *allocator) {
|
|
assert(allocator);
|
|
m_smartAllocators.push_back(allocator);
|
|
}
|
|
|
|
void MemoryManager::sweepAll() {
|
|
Sweepable::SweepAll();
|
|
}
|
|
|
|
struct SmallNode {
|
|
size_t padbytes; // <= kMaxSmartSize means small block
|
|
};
|
|
|
|
typedef std::vector<char*>::const_iterator SlabIter;
|
|
|
|
void MemoryManager::rollback() {
|
|
StringData::sweepAll();
|
|
for (unsigned int i = 0, n = m_smartAllocators.size(); i < n; i++) {
|
|
m_smartAllocators[i]->clear();
|
|
}
|
|
// free smart-malloc slabs
|
|
for (SlabIter i = m_slabs.begin(), end = m_slabs.end(); i != end; ++i) {
|
|
free(*i);
|
|
}
|
|
m_slabs.clear();
|
|
// free large allocation blocks
|
|
for (SweepNode *n = m_sweep.next, *next; n != &m_sweep; n = next) {
|
|
next = n->next;
|
|
free(n);
|
|
}
|
|
m_sweep.next = m_sweep.prev = &m_sweep;
|
|
// zero out freelists
|
|
for (unsigned i = 0; i < kNumSizes; i++) {
|
|
m_smartfree[i].clear();
|
|
}
|
|
m_front = m_limit = 0;
|
|
}
|
|
|
|
void MemoryManager::logStats() {
|
|
LeakDetectable::LogMallocStats();
|
|
}
|
|
|
|
void MemoryManager::checkMemory() {
|
|
printf("----- MemoryManager for Thread %ld -----\n", (long)pthread_self());
|
|
|
|
refreshStats();
|
|
printf("Current Usage: %" PRId64 " bytes\t", m_stats.usage);
|
|
printf("Current Alloc: %" PRId64 " bytes\n", m_stats.alloc);
|
|
printf("Peak Usage: %" PRId64 " bytes\t", m_stats.peakUsage);
|
|
printf("Peak Alloc: %" PRId64 " bytes\n", m_stats.peakAlloc);
|
|
|
|
printf("Slabs: %lu KiB\n", m_slabs.size() * SLAB_SIZE / 1024);
|
|
}
|
|
|
|
//
|
|
// smart_malloc implementation notes
|
|
//
|
|
// These functions allocate all small blocks from a single slab,
|
|
// and defer larger allocations directly to malloc. When small blocks
|
|
// are freed they're placed the appropriate size-segreated freelist.
|
|
// (m_smartfree[i]). Small blocks have an 8-byte SmallNode and
|
|
// are swept en-masse when slabs are freed.
|
|
//
|
|
// Medium blocks use a 16-byte SweepNode header to maintain a doubly-linked
|
|
// list of blocks to free at request end. smart_free can distinguish
|
|
// SmallNode and SweepNode because valid next/prev pointers must be
|
|
// larger than kMaxSmartSize.
|
|
//
|
|
|
|
inline void* MemoryManager::smartMalloc(size_t nbytes) {
|
|
assert(nbytes > 0);
|
|
// add room for header before rounding up
|
|
size_t padbytes = (nbytes + sizeof(SmallNode) + kMask) & ~kMask;
|
|
if (LIKELY(padbytes <= kMaxSmartSize)) {
|
|
m_stats.usage += padbytes;
|
|
unsigned i = (padbytes - 1) >> kLgSizeQuantum;
|
|
assert(i < kNumSizes);
|
|
void* p = m_smartfree[i].maybePop();
|
|
if (LIKELY(p != 0)) return p;
|
|
char* mem = m_front;
|
|
if (LIKELY(mem + padbytes <= m_limit)) {
|
|
m_front = mem + padbytes;
|
|
SmallNode* n = (SmallNode*) mem;
|
|
n->padbytes = padbytes;
|
|
return n + 1;
|
|
}
|
|
return smartMallocSlab(padbytes);
|
|
}
|
|
return smartMallocBig(nbytes);
|
|
}
|
|
|
|
inline void MemoryManager::smartFree(void* ptr) {
|
|
assert(ptr != 0);
|
|
SweepNode* n = ((SweepNode*)ptr) - 1;
|
|
size_t padbytes = n->padbytes;
|
|
if (LIKELY(padbytes <= kMaxSmartSize)) {
|
|
assert(memset(ptr, kSmartFreeFill, padbytes - sizeof(SmallNode)));
|
|
unsigned i = (padbytes - 1) >> kLgSizeQuantum;
|
|
assert(i < kNumSizes);
|
|
m_smartfree[i].push(ptr);
|
|
m_stats.usage -= padbytes;
|
|
return;
|
|
}
|
|
smartFreeBig(n);
|
|
}
|
|
|
|
// quick-and-dirty realloc implementation. We could do better if the block
|
|
// is malloc'd, by deferring to the underlying realloc.
|
|
inline void* MemoryManager::smartRealloc(void* ptr, size_t nbytes) {
|
|
assert(ptr != 0 && nbytes > 0);
|
|
SweepNode* n = ((SweepNode*)ptr) - 1;
|
|
size_t old_padbytes = n->padbytes;
|
|
if (LIKELY(old_padbytes <= kMaxSmartSize)) {
|
|
void* newmem = smartMalloc(nbytes);
|
|
memcpy(newmem, ptr, std::min(old_padbytes - sizeof(SmallNode), nbytes));
|
|
smartFree(ptr);
|
|
return newmem;
|
|
}
|
|
SweepNode* next = n->next;
|
|
SweepNode* prev = n->prev;
|
|
SweepNode* n2 = (SweepNode*) realloc(n, nbytes + sizeof(SweepNode));
|
|
|
|
// ensure that we have not exceeded the per request memory limit (#2529805)
|
|
refreshStatsHelper();
|
|
if (n2 != n) {
|
|
// block moved; must re-link to sweeplist
|
|
next->prev = prev->next = n2;
|
|
}
|
|
return n2 + 1;
|
|
}
|
|
|
|
/**
|
|
* Get a new slab, then allocate nbytes from it and install it in our
|
|
* slab list. Return the newly allocated nbytes-sized block.
|
|
*/
|
|
NEVER_INLINE char* MemoryManager::newSlab(size_t nbytes) {
|
|
if (UNLIKELY(m_stats.usage > m_stats.maxBytes)) {
|
|
refreshStatsHelper();
|
|
}
|
|
char* slab = (char*) Util::safe_malloc(SLAB_SIZE);
|
|
JEMALLOC_STATS_ADJUST(&m_stats, SLAB_SIZE);
|
|
m_stats.alloc += SLAB_SIZE;
|
|
if (m_stats.alloc > m_stats.peakAlloc) {
|
|
m_stats.peakAlloc = m_stats.alloc;
|
|
}
|
|
m_slabs.push_back(slab);
|
|
m_front = slab + nbytes;
|
|
m_limit = slab + SLAB_SIZE;
|
|
return slab;
|
|
}
|
|
|
|
NEVER_INLINE
|
|
void* MemoryManager::smartMallocSlab(size_t padbytes) {
|
|
SmallNode* n = (SmallNode*) newSlab(padbytes);
|
|
n->padbytes = padbytes;
|
|
return n + 1;
|
|
}
|
|
|
|
inline void* MemoryManager::smartEnlist(SweepNode* n) {
|
|
if (UNLIKELY(m_stats.usage > m_stats.maxBytes)) {
|
|
refreshStatsHelper();
|
|
}
|
|
// link after m_sweep
|
|
SweepNode* next = m_sweep.next;
|
|
n->next = next;
|
|
n->prev = &m_sweep;
|
|
next->prev = m_sweep.next = n;
|
|
assert(n->padbytes > kMaxSmartSize);
|
|
return n + 1;
|
|
}
|
|
|
|
NEVER_INLINE
|
|
void* MemoryManager::smartMallocBig(size_t nbytes) {
|
|
assert(nbytes > 0);
|
|
SweepNode* n = (SweepNode*) Util::safe_malloc(nbytes + sizeof(SweepNode));
|
|
return smartEnlist(n);
|
|
}
|
|
|
|
NEVER_INLINE
|
|
void* MemoryManager::smartCallocBig(size_t totalbytes) {
|
|
assert(totalbytes > 0);
|
|
SweepNode* n = (SweepNode*)Util::safe_calloc(totalbytes + sizeof(SweepNode),
|
|
1);
|
|
return smartEnlist(n);
|
|
}
|
|
|
|
NEVER_INLINE
|
|
void MemoryManager::smartFreeBig(SweepNode* n) {
|
|
SweepNode* next = n->next;
|
|
SweepNode* prev = n->prev;
|
|
next->prev = prev;
|
|
prev->next = next;
|
|
free(n);
|
|
}
|
|
|
|
// allocate nbytes from the current slab, aligned to 16-bytes
|
|
inline void* MemoryManager::slabAlloc(size_t nbytes) {
|
|
const size_t kAlignMask = 15;
|
|
assert((nbytes & 7) == 0);
|
|
char* ptr = (char*)(uintptr_t(m_front + kAlignMask) & ~kAlignMask);
|
|
if (ptr + nbytes <= m_limit) {
|
|
m_front = ptr + nbytes;
|
|
return ptr;
|
|
}
|
|
return newSlab(nbytes);
|
|
}
|
|
|
|
static inline MemoryManager& MM() {
|
|
return *MemoryManager::TheMemoryManager();
|
|
}
|
|
|
|
// smart_malloc api entry points, with support for malloc/free corner cases.
|
|
|
|
HOT_FUNC
|
|
void* smart_malloc(size_t nbytes) {
|
|
return MM().smartMalloc(std::max(nbytes, size_t(1)));
|
|
}
|
|
|
|
HOT_FUNC
|
|
void* smart_calloc(size_t count, size_t nbytes) {
|
|
size_t totalbytes = std::max(nbytes * count, size_t(1));
|
|
if (totalbytes <= MemoryManager::kMaxSmartSize) {
|
|
return memset(MM().smartMalloc(totalbytes), 0, totalbytes);
|
|
}
|
|
return MM().smartCallocBig(totalbytes);
|
|
}
|
|
|
|
HOT_FUNC
|
|
void* smart_realloc(void* ptr, size_t nbytes) {
|
|
if (!ptr) return MM().smartMalloc(std::max(nbytes, size_t(1)));
|
|
if (!nbytes) return ptr ? MM().smartFree(ptr), (void*)0 : (void*)0;
|
|
return MM().smartRealloc(ptr, nbytes);
|
|
}
|
|
|
|
HOT_FUNC
|
|
void smart_free(void* ptr) {
|
|
if (ptr) MM().smartFree(ptr);
|
|
}
|
|
|
|
// SmartAllocator facade
|
|
|
|
HOT_FUNC
|
|
void* SmartAllocatorImpl::alloc(size_t nbytes) {
|
|
assert(nbytes == size_t(m_itemSize));
|
|
MM().getStats().usage += nbytes;
|
|
void* ptr = m_free.maybePop();
|
|
if (UNLIKELY(!ptr)) {
|
|
ptr = MM().slabAlloc(nbytes);
|
|
}
|
|
TRACE(1, "alloc %zu -> %p\n", nbytes, ptr);
|
|
MemoryProfile::logAllocation(ptr, nbytes);
|
|
return ptr;
|
|
}
|
|
|
|
void SmartAllocatorImpl::logDealloc(void *ptr) {
|
|
MemoryProfile::logDeallocation(ptr);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
}
|