/*
   +----------------------------------------------------------------------+
   | HipHop for PHP                                                       |
   +----------------------------------------------------------------------+
   | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com)     |
   | Copyright (c) 1998-2010 Zend Technologies Ltd. (http://www.zend.com) |
   +----------------------------------------------------------------------+
   | This source file is subject to version 2.00 of the Zend license,     |
   | that is bundled with this package in the file LICENSE, and is        |
   | available through the world-wide-web at the following url:           |
   | http://www.zend.com/license/2_00.txt.                                |
   | If you did not receive a copy of the Zend license and are unable to  |
   | obtain it through the world-wide-web, please send a note to          |
   | license@zend.com so we can mail you a copy immediately.              |
   +----------------------------------------------------------------------+
*/

#include "hphp/runtime/base/memory_manager.h"

// Get SIZE_MAX definition.  Do this before including any other files, to make
// sure that this is the first place that stdint.h is included.
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS
#endif
#define __STDC_LIMIT_MACROS

#include "hphp/runtime/base/smart_allocator.h"
#include "hphp/runtime/base/leak_detectable.h"
#include "hphp/runtime/base/sweepable.h"
#include "hphp/runtime/base/builtin_functions.h"
#include "hphp/runtime/base/runtime_option.h"
#include "hphp/runtime/server/http_server.h"
#include "hphp/util/alloc.h"
#include "hphp/util/process.h"
#include "hphp/util/trace.h"

#include <stdint.h>

namespace HPHP {
///////////////////////////////////////////////////////////////////////////////

TRACE_SET_MOD(smartalloc);
#ifdef USE_JEMALLOC
bool MemoryManager::s_statsEnabled = false;
size_t MemoryManager::s_cactiveLimitCeiling = 0;

static size_t threadAllocatedpMib[2];
static size_t threadDeallocatedpMib[2];
static size_t statsCactiveMib[2];
static pthread_once_t threadStatsOnce = PTHREAD_ONCE_INIT;
static void threadStatsInit() {
  if (!mallctlnametomib) return;
  size_t miblen = sizeof(threadAllocatedpMib) / sizeof(size_t);
  if (mallctlnametomib("thread.allocatedp", threadAllocatedpMib, &miblen)) {
    return;
  }
  miblen = sizeof(threadDeallocatedpMib) / sizeof(size_t);
  if (mallctlnametomib("thread.deallocatedp", threadDeallocatedpMib, &miblen)) {
    return;
  }
  miblen = sizeof(statsCactiveMib) / sizeof(size_t);
  if (mallctlnametomib("stats.cactive", statsCactiveMib, &miblen)) {
    return;
  }
  MemoryManager::s_statsEnabled = true;

  // In threadStats() we wish to solve for cactiveLimit in:
  //
  //   footprint + cactiveLimit + headRoom == MemTotal
  //
  // However, headRoom comes from RuntimeOption::ServerMemoryHeadRoom, which
  // isn't initialized until after the code here runs.  Therefore, compute
  // s_cactiveLimitCeiling here in order to amortize the cost of introspecting
  // footprint and MemTotal.
  //
  //   cactiveLimit == (MemTotal - footprint) - headRoom
  //
  //   cactiveLimit == s_cactiveLimitCeiling - headRoom
  // where
  //   s_cactiveLimitCeiling == MemTotal - footprint
  size_t footprint = Process::GetCodeFootprint(Process::GetProcessId());
  size_t MemTotal  = 0;
#ifndef __APPLE__
  size_t pageSize = size_t(sysconf(_SC_PAGESIZE));
  MemTotal = size_t(sysconf(_SC_PHYS_PAGES)) * pageSize;
#else
  int mib[2] = { CTL_HW, HW_MEMSIZE };
  u_int namelen = sizeof(mib) / sizeof(mib[0]);
  size_t len = sizeof(MemTotal);
  sysctl(mib, namelen, &MemTotal, &len, nullptr, 0);
#endif
  if (MemTotal > footprint) {
    MemoryManager::s_cactiveLimitCeiling = MemTotal - footprint;
  }
}

static inline void threadStats(uint64_t*& allocated, uint64_t*& deallocated,
                               size_t*& cactive, size_t& cactiveLimit) {
  pthread_once(&threadStatsOnce, threadStatsInit);
  if (!MemoryManager::s_statsEnabled) return;

  size_t len = sizeof(allocated);
  if (mallctlbymib(threadAllocatedpMib,
                   sizeof(threadAllocatedpMib) / sizeof(size_t),
                   &allocated, &len, nullptr, 0)) {
    not_reached();
  }

  len = sizeof(deallocated);
  if (mallctlbymib(threadDeallocatedpMib,
                   sizeof(threadDeallocatedpMib) / sizeof(size_t),
                   &deallocated, &len, nullptr, 0)) {
    not_reached();
  }

  len = sizeof(cactive);
  if (mallctlbymib(statsCactiveMib,
                   sizeof(statsCactiveMib) / sizeof(size_t),
                   &cactive, &len, nullptr, 0)) {
    not_reached();
  }

  size_t headRoom = RuntimeOption::ServerMemoryHeadRoom;
  // Compute cactiveLimit based on s_cactiveLimitCeiling, as computed in
  // threadStatsInit().
  if (headRoom != 0 && headRoom < MemoryManager::s_cactiveLimitCeiling) {
    cactiveLimit = MemoryManager::s_cactiveLimitCeiling - headRoom;
  } else {
    cactiveLimit = SIZE_MAX;
  }
}
#endif

static void* MemoryManagerInit() {
  // We store the free list pointers right at the start of each object,
  // overlapping SmartHeader.data, and we also clobber _count as a
  // free-object flag when the object is deallocated.
  // This assert just makes sure they don't overflow.
  static_assert(FAST_REFCOUNT_OFFSET + sizeof(int) <=
                SmartAllocatorImpl::MinItemSize,
                "MinItemSize is too small");
  MemoryManager::TlsWrapper tls;
  return (void*)tls.getNoCheck;
}

void* MemoryManager::TlsInitSetup = MemoryManagerInit();

void MemoryManager::Create(void* storage) {
  new (storage) MemoryManager();
}

void MemoryManager::Delete(MemoryManager* mm) {
  mm->~MemoryManager();
}

void MemoryManager::OnThreadExit(MemoryManager* mm) {
  mm->~MemoryManager();
}

MemoryManager::AllocIterator::AllocIterator(const MemoryManager* mman)
  : m_mman(*mman)
  , m_it(m_mman.m_smartAllocators.begin())
{}

SmartAllocatorImpl*
MemoryManager::AllocIterator::current() const {
  return m_it == m_mman.m_smartAllocators.end() ? 0 : *m_it;
}

void MemoryManager::AllocIterator::next() {
  ++m_it;
}

MemoryManager::MemoryManager() : m_front(0), m_limit(0),
  m_enabled(RuntimeOption::EnableMemoryManager) {
#ifdef USE_JEMALLOC
  threadStats(m_allocated, m_deallocated, m_cactive, m_cactiveLimit);
#endif
  resetStats();
  m_stats.maxBytes = INT64_MAX;
  // make the circular-lists empty.
  m_sweep.next = m_sweep.prev = &m_sweep;
}

void MemoryManager::resetStats() {
  m_stats.usage = 0;
  m_stats.alloc = 0;
  m_stats.peakUsage = 0;
  m_stats.peakAlloc = 0;
  m_stats.totalAlloc = 0;
#ifdef USE_JEMALLOC
  if (s_statsEnabled) {
    m_stats.jemallocDebt = 0;
    m_prevAllocated = int64_t(*m_allocated);
    m_delta = m_prevAllocated - int64_t(*m_deallocated);
  }
#endif
}

NEVER_INLINE
void MemoryManager::refreshStatsHelper() {
  refreshStats();
}

void MemoryManager::refreshStatsHelperExceeded() {
  ThreadInfo* info = ThreadInfo::s_threadInfo.getNoCheck();
  info->m_reqInjectionData.setMemExceededFlag();
}

#ifdef USE_JEMALLOC
void MemoryManager::refreshStatsHelperStop() {
  HttpServer::Server->stop();
  // Increase the limit to the maximum possible value, so that this method
  // won't be called again.
  m_cactiveLimit = SIZE_MAX;
}
#endif

void MemoryManager::add(SmartAllocatorImpl *allocator) {
  assert(allocator);
  m_smartAllocators.push_back(allocator);
}

void MemoryManager::sweepAll() {
  Sweepable::SweepAll();
}

struct SmallNode {
  size_t padbytes; // <= kMaxSmartSize means small block
};

typedef std::vector<char*>::const_iterator SlabIter;

void MemoryManager::rollback() {
  for (unsigned int i = 0, n = m_smartAllocators.size(); i < n; i++) {
    m_smartAllocators[i]->clear();
  }
  // free smart-malloc slabs
  for (SlabIter i = m_slabs.begin(), end = m_slabs.end(); i != end; ++i) {
    free(*i);
  }
  m_slabs.clear();
  // free large allocation blocks
  for (SweepNode *n = m_sweep.next, *next; n != &m_sweep; n = next) {
    next = n->next;
    free(n);
  }
  m_sweep.next = m_sweep.prev = &m_sweep;
  // zero out freelists
  for (unsigned i = 0; i < kNumSizes; i++) {
    m_smartfree[i].clear();
  }
  m_front = m_limit = 0;
}

void MemoryManager::logStats() {
  LeakDetectable::LogMallocStats();
}

void MemoryManager::checkMemory() {
  printf("----- MemoryManager for Thread %ld -----\n", (long)pthread_self());

  refreshStats();
  printf("Current Usage: %" PRId64 " bytes\t", m_stats.usage);
  printf("Current Alloc: %" PRId64 " bytes\n", m_stats.alloc);
  printf("Peak Usage: %" PRId64 " bytes\t", m_stats.peakUsage);
  printf("Peak Alloc: %" PRId64 " bytes\n", m_stats.peakAlloc);

  printf("Slabs: %lu KiB\n", m_slabs.size() * SLAB_SIZE / 1024);
}

//
// smart_malloc implementation notes
//
// These functions allocate all small blocks from a single slab,
// and defer larger allocations directly to malloc.  When small blocks
// are freed they're placed the appropriate size-segreated freelist.
// (m_smartfree[i]).  Small blocks have an 8-byte SmallNode and
// are swept en-masse when slabs are freed.
//
// Medium blocks use a 16-byte SweepNode header to maintain a doubly-linked
// list of blocks to free at request end.  smart_free can distinguish
// SmallNode and SweepNode because valid next/prev pointers must be
// larger than kMaxSmartSize.
//

inline void* MemoryManager::smartMalloc(size_t nbytes) {
  assert(nbytes > 0);
  // add room for header before rounding up
  size_t padbytes = (nbytes + sizeof(SmallNode) + kMask) & ~kMask;
  if (LIKELY(padbytes <= kMaxSmartSize)) {
    m_stats.usage += padbytes;
    unsigned i = (padbytes - 1) >> kLgSizeQuantum;
    assert(i < kNumSizes);
    void* p = m_smartfree[i].maybePop();
    if (LIKELY(p != 0)) return p;
    char* mem = m_front;
    if (LIKELY(mem + padbytes <= m_limit)) {
      m_front = mem + padbytes;
      SmallNode* n = (SmallNode*) mem;
      n->padbytes = padbytes;
      return n + 1;
    }
    return smartMallocSlab(padbytes);
  }
  return smartMallocBig(nbytes);
}

inline void MemoryManager::smartFree(void* ptr) {
  assert(ptr != 0);
  SweepNode* n = ((SweepNode*)ptr) - 1;
  size_t padbytes = n->padbytes;
  if (LIKELY(padbytes <= kMaxSmartSize)) {
    assert(memset(ptr, kSmartFreeFill, padbytes - sizeof(SmallNode)));
    unsigned i = (padbytes - 1) >> kLgSizeQuantum;
    assert(i < kNumSizes);
    m_smartfree[i].push(ptr);
    m_stats.usage -= padbytes;
    return;
  }
  smartFreeBig(n);
}

// quick-and-dirty realloc implementation.  We could do better if the block
// is malloc'd, by deferring to the underlying realloc.
inline void* MemoryManager::smartRealloc(void* ptr, size_t nbytes) {
  assert(ptr != 0 && nbytes > 0);
  SweepNode* n = ((SweepNode*)ptr) - 1;
  size_t old_padbytes = n->padbytes;
  if (LIKELY(old_padbytes <= kMaxSmartSize)) {
    void* newmem = smartMalloc(nbytes);
    memcpy(newmem, ptr, std::min(old_padbytes - sizeof(SmallNode), nbytes));
    smartFree(ptr);
    return newmem;
  }
  SweepNode* next = n->next;
  SweepNode* prev = n->prev;
  SweepNode* n2 = (SweepNode*) realloc(n, nbytes + sizeof(SweepNode));

  // ensure that we have not exceeded the per request memory limit (#2529805)
  refreshStatsHelper();
  if (n2 != n) {
    // block moved; must re-link to sweeplist
    next->prev = prev->next = n2;
  }
  return n2 + 1;
}

/**
 * Get a new slab, then allocate nbytes from it and install it in our
 * slab list.  Return the newly allocated nbytes-sized block.
 */
NEVER_INLINE char* MemoryManager::newSlab(size_t nbytes) {
  if (UNLIKELY(m_stats.usage > m_stats.maxBytes)) {
    refreshStatsHelper();
  }
  char* slab = (char*) Util::safe_malloc(SLAB_SIZE);
  JEMALLOC_STATS_ADJUST(&m_stats, SLAB_SIZE);
  m_stats.alloc += SLAB_SIZE;
  if (m_stats.alloc > m_stats.peakAlloc) {
    m_stats.peakAlloc = m_stats.alloc;
  }
  m_slabs.push_back(slab);
  m_front = slab + nbytes;
  m_limit = slab + SLAB_SIZE;
  return slab;
}

NEVER_INLINE
void* MemoryManager::smartMallocSlab(size_t padbytes) {
  SmallNode* n = (SmallNode*) newSlab(padbytes);
  n->padbytes = padbytes;
  return n + 1;
}

inline void* MemoryManager::smartEnlist(SweepNode* n) {
  if (UNLIKELY(m_stats.usage > m_stats.maxBytes)) {
    refreshStatsHelper();
  }
  // link after m_sweep
  SweepNode* next = m_sweep.next;
  n->next = next;
  n->prev = &m_sweep;
  next->prev = m_sweep.next = n;
  assert(n->padbytes > kMaxSmartSize);
  return n + 1;
}

NEVER_INLINE
void* MemoryManager::smartMallocBig(size_t nbytes) {
  assert(nbytes > 0);
  SweepNode* n = (SweepNode*) Util::safe_malloc(nbytes + sizeof(SweepNode));
  return smartEnlist(n);
}

NEVER_INLINE
void* MemoryManager::smartCallocBig(size_t totalbytes) {
  assert(totalbytes > 0);
  SweepNode* n = (SweepNode*)Util::safe_calloc(totalbytes + sizeof(SweepNode),
                                               1);
  return smartEnlist(n);
}

NEVER_INLINE
void MemoryManager::smartFreeBig(SweepNode* n) {
  SweepNode* next = n->next;
  SweepNode* prev = n->prev;
  next->prev = prev;
  prev->next = next;
  free(n);
}

// allocate nbytes from the current slab, aligned to 16-bytes
inline void* MemoryManager::slabAlloc(size_t nbytes) {
  const size_t kAlignMask = 15;
  assert((nbytes & 7) == 0);
  char* ptr = (char*)(uintptr_t(m_front + kAlignMask) & ~kAlignMask);
  if (ptr + nbytes <= m_limit) {
    m_front = ptr + nbytes;
    return ptr;
  }
  return newSlab(nbytes);
}

static inline MemoryManager& MM() {
  return *MemoryManager::TheMemoryManager();
}

// smart_malloc api entry points, with support for malloc/free corner cases.

HOT_FUNC
void* smart_malloc(size_t nbytes) {
  return MM().smartMalloc(std::max(nbytes, size_t(1)));
}

HOT_FUNC
void* smart_calloc(size_t count, size_t nbytes) {
  size_t totalbytes = std::max(nbytes * count, size_t(1));
  if (totalbytes <= MemoryManager::kMaxSmartSize) {
    return memset(MM().smartMalloc(totalbytes), 0, totalbytes);
  }
  return MM().smartCallocBig(totalbytes);
}

HOT_FUNC
void* smart_realloc(void* ptr, size_t nbytes) {
  if (!ptr) return MM().smartMalloc(std::max(nbytes, size_t(1)));
  if (!nbytes) return ptr ? MM().smartFree(ptr), (void*)0 : (void*)0;
  return MM().smartRealloc(ptr, nbytes);
}

HOT_FUNC
void smart_free(void* ptr) {
  if (ptr) MM().smartFree(ptr);
}

// SmartAllocator facade

HOT_FUNC
void* SmartAllocatorImpl::alloc(size_t nbytes) {
  assert(nbytes == size_t(m_itemSize));
  MM().getStats().usage += nbytes;
  void* ptr = m_free.maybePop();
  if (UNLIKELY(!ptr)) {
    ptr = MM().slabAlloc(nbytes);
  }
  TRACE(1, "alloc %zu -> %p\n", nbytes, ptr);
  return ptr;
}

///////////////////////////////////////////////////////////////////////////////
}