/* +----------------------------------------------------------------------+ | HipHop for PHP | +----------------------------------------------------------------------+ | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) | +----------------------------------------------------------------------+ | This source file is subject to version 3.01 of the PHP license, | | that is bundled with this package in the file LICENSE, and is | | available through the world-wide-web at the following url: | | http://www.php.net/license/3_01.txt | | If you did not receive a copy of the PHP license and are unable to | | obtain it through the world-wide-web, please send a note to | | license@php.net so we can mail you a copy immediately. | +----------------------------------------------------------------------+ */ #include "hphp/runtime/vm/backup_gc.h" #include "hphp/runtime/base/runtime_error.h" #include #include #include #include #include #include "hphp/util/assertions.h" #include "hphp/util/timer.h" #include "hphp/util/trace.h" #include "hphp/runtime/base/execution_context.h" #include "hphp/runtime/base/smart_allocator.h" #include "hphp/runtime/base/memory_manager.h" #include "hphp/runtime/base/complex_types.h" #include "hphp/runtime/base/hphp_array.h" #include "hphp/runtime/vm/class.h" namespace HPHP { TRACE_SET_MOD(gc); ////////////////////////////////////////////////////////////////////// namespace { enum class Color { Colorless, Black, // Known to be reachable Garbage // Used for GarbageDetector }; typedef hphp_hash_map ColorMap; typedef std::pair TypedObj; typedef std::set TypedObjSet; TypedObj make_typed_obj(RefData* p) { return std::make_pair(KindOfRef, p); } TypedObj make_typed_obj(ObjectData* p) { return std::make_pair(KindOfObject, p); } TypedObj make_typed_obj(ArrayData* p) { return std::make_pair(KindOfArray, p); } struct GCState : private boost::noncopyable { GCState() : m_totalCount(0) , m_collectedCount(0) {} ColorMap m_colorMap; uint64_t m_totalCount; uint64_t m_collectedCount; // For cycle detection, we keep a set of all the leaked heap objects // here to make it easier to emit nodes and edges without doing it // during a heap iteration. TypedObjSet m_cyclicGarbage; // Set the color of an object to newColor, and return whatever its // old color was. Color setColor(void* vp, Color newColor) { std::pair p = m_colorMap.insert(std::make_pair(vp, newColor)); Color ret = !p.second ? p.first->second : Color::Colorless; p.first->second = newColor; return ret; } }; int32_t* count_addr(void* obj) { void* addr = static_cast(obj) + FAST_REFCOUNT_OFFSET; return static_cast(addr); } bool is_smart_allocated(ObjectData* obj) { // XXX ObjectData is allocated from several smart allocators by // size, so currently we have to check them all. MemoryManager::AllocIterator aIter(MemoryManager::TheMemoryManager()); while (SmartAllocatorImpl* sa = aIter.current()) { if (sa->getAllocatorType() == typeid(ObjectData)) { if (sa->isFromThisAllocator(obj)) return true; } aIter.next(); } return false; } template bool UNUSED is_smart_allocated(T* p) { return T::AllocatorType::getNoCheck()->isFromThisAllocator(p); } bool is_static(void* obj) { return *count_addr(obj) == RefCountStaticValue; } template void walk_allocator(const Visitor& visit, SmartAllocatorImpl* sa) { SmartAllocatorImpl::Iterator saIter(sa); while (ObjectType* p = static_cast(saIter.current())) { visit(sa, p); saIter.next(); } } /* * Iterate every live object in any smart-allocator, and call visit() * on it. * * The collection algorithm involves several heap walks (see * collect_algorithm below). */ template void walk_smart_heap(const Visitor& visit) { MemoryManager::AllocIterator aIter(MemoryManager::TheMemoryManager()); for (; SmartAllocatorImpl* sa = aIter.current(); aIter.next()) { auto const& t = sa->getAllocatorType(); if (t == typeid(HphpArray)) { walk_allocator(visit, sa); } else if (t == typeid(RefData)) { walk_allocator(visit, sa); } else if (t == typeid(ObjectData)) { walk_allocator(visit, sa); } else if (t == typeid(StringData)) { // Unneccesary for the first level walk, because strings can't // have references to other objects. } } } template void traceImpl(const Visitor&, ArrayData*); template void traceImpl(const Visitor&, ObjectData*); template void traceImpl(const Visitor&, RefData*); // This just exists so that visitors that don't want to visit strings // don't need to have an instantiable operator() for a StringData* // argument. template struct VisitStringHelper { static void visit(const Visitor& visit, StringData* sd) { visit(sd); } }; template struct VisitStringHelper { static void visit(const Visitor&, StringData*) {} }; template void traceImpl(const Visitor& visit, TypedValue* tv) { switch (tv->m_type) { case KindOfRef: assert(!is_static(tv->m_data.pref)); assert(is_smart_allocated(tv->m_data.pref)); visit(tv->m_data.pref); break; case KindOfObject: /* * We need to check whether ObjectData*'s are actually from the * smart heap before tracing them (some ResourceData objects are * not smart allocated). * * Since we don't trace them, this means that garbage cycles * involving non-smart allocated extension objects can't be * collected. On the other hand, if a garbage cycle has a * reference to one of these ResourceData objects, when we collect * it we will leave the reference to it despite collecting the * cycle (this is the same as what would happen if we let the * cycle leak, so this is semantically ok although it is a * resource leak). */ assert(!is_static(tv->m_data.pref)); if (is_smart_allocated(tv->m_data.pobj)) { visit(tv->m_data.pobj); } break; case KindOfArray: if (!is_static(tv->m_data.parr)) { visit(tv->m_data.parr); } break; case KindOfString: if (!is_static(tv->m_data.pstr)) { assert(is_smart_allocated(tv->m_data.pstr)); VisitStringHelper::visit( visit, tv->m_data.pstr ); } break; default: break; } } template void traceImpl(const Visitor& visit, ArrayData* ad) { for (ssize_t i = ad->iter_begin(); i != ArrayData::invalid_index; i = ad->iter_advance(i)) { /* * We need to visit the keys only if they are strings (for the * case where we are deallocating objects). The ArrayData api * adds references when looking at keys, but this is ok because * GarbageCollector doesn't actually free StringData's outright * (just decrefs them). */ if (Visitor::visits_strings) { Variant key(ad->getKey(i)); if (key.isString()) { traceImpl(visit, key.asTypedValue()); } else { assert(key.isInteger()); } } // The key is either a string or an int, so we don't care. Only // look at the value. Trace the TypedValue so we visit whatever // it points to. traceImpl(visit, const_cast( ad->getValueRef(i).asTypedValue())); } } template void traceImpl(const Visitor& visit, ObjectData* obj) { // Dynamic properties. We have to visit the dynamic property array // itself, since it is an object living in the smart heap (all the // top-level walks visit it anyway). if (ArrayData* dyn = obj->getDynProps().get()) { visit(dyn); } // Declared properties. We need to indirect through the TypedValue // before visiting, since these are in-situ in the ObjectData. void* vpObj = obj; unsigned char* address = static_cast(vpObj); const size_t nProps = obj->getVMClass()->numDeclProperties(); for (size_t i = 0; i < nProps; ++i) { size_t off = obj->getVMClass()->declPropOffset(i); void* tvAddr = address + off; traceImpl(visit, static_cast(tvAddr)); } } template void traceImpl(const Visitor& visit, RefData* ref) { visit(ref); } template void trace(const Visitor& visit, T* t) { traceImpl(visit, t); } template void trace(const Visitor& visit, RefData* ref) { traceImpl(visit, ref->tv()); } struct RefDecrement { static const bool visits_strings = false; template void operator()(T* t) const { --*count_addr(t); assert(*count_addr(t) >= 0); } }; struct RefIncrement { static const bool visits_strings = false; template void operator()(T* t) const { assert(*count_addr(t) >= 0); ++*count_addr(t); } }; struct InternalRefRemover { template void operator()(SmartAllocatorImpl*, T* t) const { trace(RefDecrement(), t); } }; struct MarkLive { // Since we can't trace StringData, we don't need to bother marking // these. static const bool visits_strings = false; explicit MarkLive(GCState& state) : m_state(state) {} template void operator()(T* t) const { ++*count_addr(t); if (m_state.setColor(t, Color::Black) == Color::Colorless) { trace(*this, t); } } GCState& m_state; }; struct ExternalRefRestorer { explicit ExternalRefRestorer(GCState& state) : m_state(state) {} template void operator()(SmartAllocatorImpl*, T* t) const { if (*count_addr(t) == 0) return; if (m_state.setColor(t, Color::Black) == Color::Colorless) { trace(MarkLive(m_state), t); } } GCState& m_state; }; /* * Since strings haven't been involved in the heap walk stuff, they * won't get freed when they are part of a cycle unless we handle them * specially here. It's only necessary to trace one level out looking * for strings---any strings involved in the cycle are at most one * level out from an object that GarbageCollector will visit. * Moreover, if a string is shared, we might visit it more than once * here---so just decref it, don't free it. */ struct StringDealloc { static const bool visits_strings = true; void operator()(StringData* s) const { decRefStr(s); } template void operator()(T*) const {} }; struct GarbageCollector { explicit GarbageCollector(GCState& state) : m_state(state) {} /* * Deallocation for these objects needs to go directly to the * SmartAllocator. If we try to run their destructors or release * functions, they'll try to decref the things they refer to, even * though all those things have a zero _count now. We also don't * want to run object destructors. * * Also, HphpArray and some objects have to be swept before we tell * the allocator it's done. */ template void operator()(SmartAllocatorImpl* sa, T* t) const { const int32_t count = *count_addr(t); if (!count) { trace(StringDealloc(), t); dealloc(sa, t); ++m_state.m_collectedCount; } ++m_state.m_totalCount; } void dealloc(SmartAllocatorImpl* sa, ArrayData* ar) const { if (Sweepable* s = dynamic_cast(ar)) { s->sweep(); s->unregister(); } sa->dealloc(ar); } void dealloc(SmartAllocatorImpl* sa, ObjectData* obj) const { if (Sweepable* s = dynamic_cast(obj)) { s->sweep(); s->unregister(); } if (RuntimeOption::EnableObjDestructCall) { g_vmContext->m_liveBCObjs.erase(obj); } sa->dealloc(obj); } void dealloc(SmartAllocatorImpl* sa, RefData* rd) const { sa->dealloc(rd); } GCState& m_state; }; struct GarbageDetector { explicit GarbageDetector(GCState& state) : m_state(state) {} template void operator()(SmartAllocatorImpl*, T* t) const { // Check a color flag instead of the count, because we might // increment it before seeing it. if (m_state.setColor(t, Color::Garbage) == Color::Colorless) { m_state.m_cyclicGarbage.insert(make_typed_obj(t)); /* * To leave the heap in a consistent state, we need to add back all * the internal references for the garbage. */ trace(RefIncrement(), t); } } GCState& m_state; }; /* * Implements a garbage collection/detection algorithm that doesn't * require knowing about every reference in the system. * * References outside of the set of objects we evaluate (the smart * heap) will just keep objects alive. (This means a cycle living * partially in native C++ objects won't be collectable.) */ template void collect_algorithm(GCState& state) { /* * Step 1: * * Walk the entire heap, and for each object, subtract one from * the reference counts of all objects it refers to. * * We do this to objects that can have references to other * objects, but ignore strings. */ walk_smart_heap(InternalRefRemover()); /* * Step 2: * * Each (non-string) object that still has a non-zero reference * count has references that live outside of the smart heap, and * therefore is live. Mark it live, and mark every object it * refers to (transitively) live. */ walk_smart_heap(ExternalRefRestorer(state)); /* * Step 3: * * Any object that still has a zero reference count is part of a * heap-internal cycle. Do something with it. */ walk_smart_heap(FinalStep(state)); } struct EdgePrinter { static const bool visits_strings = false; explicit EdgePrinter(hphp_hash_map& nodeIds, uint32_t srcId, std::ostream& out) : m_nodeIds(nodeIds) , m_srcId(srcId) , m_out(out) {} template void operator()(T* t) const { // TODO: could show which member or array key pointed to this? m_out << " edge [\n" " source " << m_srcId << '\n' << " target " << m_nodeIds[t] << '\n' << " ]\n"; } hphp_hash_map& m_nodeIds; uint32_t m_srcId; std::ostream& m_out; }; } ////////////////////////////////////////////////////////////////////// std::string gc_collect_cycles() { TRACE(1, "GC: starting gc_collect_cycles\n"); Timer cpuTimer(Timer::TotalCPU); Timer wallTimer(Timer::WallTime); GCState state; collect_algorithm(state); const uint64_t live = state.m_totalCount - state.m_collectedCount; const float survivalRate = 100 * float(live) / std::max(state.m_totalCount, (uint64_t)1); std::string ret = str( boost::format("released %d/%d objects; survival%% = %02.2f; " "cpu time = %5lld; wall time = %5lld\n") % state.m_collectedCount % state.m_totalCount % survivalRate % cpuTimer.getMicroSeconds() % wallTimer.getMicroSeconds()); TRACE(1, "%s", ret.c_str()); return ret; } void gc_detect_cycles(const std::string& filename) { TRACE(1, "GC: starting gc_detect_cycles\n"); GCState state; collect_algorithm(state); std::ofstream out(filename.c_str()); if (!out.is_open()) { raise_error("couldn't open output file for gc_detect_cycles, %s", strerror(errno)); return; } uint32_t nextNodeId = 1; hphp_hash_map nodeIds; out << "graph [\n" " directed 1\n"; // Print nodes. for (TypedObjSet::const_iterator it = state.m_cyclicGarbage.begin(); it != state.m_cyclicGarbage.end(); ++it) { uint32_t thisNodeId = nextNodeId++; nodeIds[it->second] = thisNodeId; const char* name; const char* color; switch (it->first) { case KindOfObject: { ObjectData* od = static_cast(it->second); name = od->getVMClass()->nameRef().data(); color = "#FFCC00"; break; } case KindOfArray: name = "array()"; color = "#CCCCFF"; break; case KindOfRef: name = "RefData"; color = "#33CCCC"; break; default: not_reached(); } out << " node [ id " << thisNodeId << "\n" " graphics [\n" " type \"roundrectangle\"\n" " fill \"" << color << "\"\n" " ]\n" " LabelGraphics [\n" " anchor \"e\"\n" " alignment \"left\"\n" " fontName \"Consolas\"\n" " text \"" << name << "\"\n" " ]\n" " ]\n"; } // Print edges. for (TypedObjSet::const_iterator it = state.m_cyclicGarbage.begin(); it != state.m_cyclicGarbage.end(); ++it) { EdgePrinter p(nodeIds, nodeIds[it->second], out); switch (it->first) { case KindOfObject: trace(p, static_cast(it->second)); break; case KindOfRef: trace(p, static_cast(it->second)); break; case KindOfArray: trace(p, static_cast(it->second)); break; default: assert(false); } } out << "]\n"; TRACE(1, "GC: %zu objects were part of cycles; wrote to %s\n", state.m_cyclicGarbage.size(), filename.c_str()); } ////////////////////////////////////////////////////////////////////// }