From 82dc4550da9a0c4aa531cade1b55a55b274df4e7 Mon Sep 17 00:00:00 2001 From: Stephen Chen Date: Tue, 18 Jun 2013 11:23:08 -0700 Subject: [PATCH] A simple facility to track runtime statistics in hphp. This a simpler / cleaner version of fbcode's ServiceData for hphp. Currently we support only flat counters, MultiLevelTimeSeries and Histograms. We can add more stats types later on as needed. ServiceData is a global entry point for all this stuff. The current idea is to completely decouple data input and export. ServiceData internally has three separate maps tracking flat counters, timeseries and histograms. These maps are wrapped by spin locks and protected by folly::Synchronized. ServiceData provides three functions to create/retrive counter objects. The counter objects are thread safe (protected again by spin locks and folly::Synchronized). --- hphp/util/service_data-inl.h | 58 +++++++ hphp/util/service_data.cpp | 276 ++++++++++++++++++++++++++++++++ hphp/util/service_data.h | 229 ++++++++++++++++++++++++++ hphp/util/test/service_data.cpp | 123 ++++++++++++++ 4 files changed, 686 insertions(+) create mode 100644 hphp/util/service_data-inl.h create mode 100644 hphp/util/service_data.cpp create mode 100644 hphp/util/service_data.h create mode 100644 hphp/util/test/service_data.cpp diff --git a/hphp/util/service_data-inl.h b/hphp/util/service_data-inl.h new file mode 100644 index 000000000..5ff79837a --- /dev/null +++ b/hphp/util/service_data-inl.h @@ -0,0 +1,58 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +namespace HPHP { + +////////////////////////////////////////////////////////////////////// + +namespace ServiceData { + +namespace detail { + +inline std::chrono::seconds nowAsSeconds() { + auto now = std::chrono::system_clock::now(); + return std::chrono::duration_cast( + now.time_since_epoch()); +} + +} // namespace detail + +inline void ExportedTimeSeries::addValue(int64_t value) { + m_timeseries->addValue(detail::nowAsSeconds(), value); +} + +inline void ExportedTimeSeries::addValue(int64_t value, int64_t times) { + m_timeseries->addValue(detail::nowAsSeconds(), value, times); +} + +inline void ExportedTimeSeries::addValueAggregated(int64_t sum, + int64_t nsamples) { + m_timeseries->addValueAggregated(detail::nowAsSeconds(), sum, nsamples); +} + +inline void ExportedHistogram::addValue(int64_t value) { + m_histogram->addValue(value); +} + +inline void ExportedHistogram::removeValue(int64_t value) { + m_histogram->removeValue(value); +} + +} // namespace ServiceData + +////////////////////////////////////////////////////////////////////// + +} // namespace HPHP diff --git a/hphp/util/service_data.cpp b/hphp/util/service_data.cpp new file mode 100644 index 000000000..1bbb7c5c4 --- /dev/null +++ b/hphp/util/service_data.cpp @@ -0,0 +1,276 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +#include "hphp/util/service_data.h" + +#include +#include + +#include "folly/Conv.h" +#include "folly/MapUtil.h" +#include "hphp/util/base.h" + +namespace HPHP { + +////////////////////////////////////////////////////////////////////// + +namespace ServiceData { + +ExportedTimeSeries::ExportedTimeSeries( + int numBuckets, + const std::vector& durations, + const std::vector& exportTypes) + : m_timeseries(folly::MultiLevelTimeSeries(numBuckets, + durations.size(), + &durations[0])), + m_exportTypes(exportTypes) { +} + +void ExportedTimeSeries::exportAll(const std::string& prefix, + std::map& statsMap) { + SYNCHRONIZED(m_timeseries) { + // must first call update to flush data. + m_timeseries.update(detail::nowAsSeconds()); + + for (int i = 0; i < m_timeseries.numLevels(); ++i) { + auto& level = m_timeseries.getLevel(i); + std::string suffix = + level.isAllTime() ? "" : + folly::to(".", level.duration().count()); + + for (auto type : m_exportTypes) { + if (type == ServiceData::StatsType::AVG) { + statsMap.insert( + std::make_pair(folly::to(prefix, ".avg", suffix), + level.avg())); + } else if (type == ServiceData::StatsType::SUM) { + statsMap.insert( + std::make_pair(folly::to(prefix, ".sum", suffix), + level.sum())); + } else if (type == ServiceData::StatsType::RATE) { + statsMap.insert( + std::make_pair(folly::to(prefix, ".rate", suffix), + level.rate())); + } else if (type == ServiceData::StatsType::COUNT) { + statsMap.insert( + std::make_pair(folly::to(prefix, ".count", suffix), + level.count())); + } else if (type == ServiceData::StatsType::PCT) { + statsMap.insert( + std::make_pair(folly::to(prefix, ".pct", suffix), + level.avg() * 100)); + } + } + } + } +} + +ExportedHistogram::ExportedHistogram( + int64_t bucketSize, + int64_t min, + int64_t max, + const std::vector& exportPercentiles) + : m_histogram(folly::Histogram(bucketSize, min, max)), + m_exportPercentiles(exportPercentiles) { +} + +void ExportedHistogram::exportAll(const std::string& prefix, + std::map& statsMap) { + SYNCHRONIZED(m_histogram) { + for (double percentile : m_exportPercentiles) { + statsMap.insert( + std::make_pair( + folly::to( + prefix, ".hist.p", folly::to(percentile * 100)), + m_histogram.getPercentileEstimate(percentile))); + } + } +} + +namespace { + +class Impl { + public: + ExportedCounter* createCounter(const std::string& name) { + SYNCHRONIZED(m_counterMap) { + auto iterator = m_counterMap.find(name); + if (iterator == m_counterMap.end()) { + return (m_counterMap[name] = new ExportedCounter()); + } + return iterator->second; + } + // make compiler happy. + return nullptr; + } + + ExportedTimeSeries* createTimeseries( + const std::string& name, + const std::vector& types, + const std::vector& levels, + int numBuckets) { + SYNCHRONIZED(m_timeseriesMap) { + ExportedTimeSeries* counter = nullptr; + auto iterator = m_timeseriesMap.find(name); + if (iterator == m_timeseriesMap.end()) { + counter = new ExportedTimeSeries(numBuckets, levels, types); + m_timeseriesMap[name] = counter; + } else { + counter = iterator->second; + } + + return counter; + } + // make compiler happy. + return nullptr; + } + + ExportedHistogram* createHistogram( + const std::string& name, + int64_t bucketSize, + int64_t min, + int64_t max, + const std::vector& exportPercentiles) { + + SYNCHRONIZED(m_histogramMap) { + ExportedHistogram* histogram; + auto iterator = m_histogramMap.find(name); + if (iterator == m_histogramMap.end()) { + histogram = new ExportedHistogram(bucketSize, min, max, + exportPercentiles); + m_histogramMap[name] = histogram; + } else { + histogram = iterator->second; + } + + return histogram; + } + // make compiler happy. + return nullptr; + } + + void exportAll(std::map& statsMap) { + // make a copy of the counter map so we can't hold the lock on the map while + // we are exporting individual stats. + hphp_hash_map counters; + m_counterMap.copy(&counters); + for (auto iter : counters) { + statsMap.insert(std::make_pair(iter.first, iter.second->getValue())); + } + + // Same idea here. Make a copy first to iterate over so we don't hold the + // lock on the map while we export individual timeseries + hphp_hash_map timeseries; + m_timeseriesMap.copy(×eries); + + for (auto iter : timeseries) { + iter.second->exportAll(iter.first, statsMap); + } + + // And same here for histograms. + hphp_hash_map histograms; + m_histogramMap.copy(&histograms); + + for (auto iter : histograms) { + iter.second->exportAll(iter.first, statsMap); + } + } + + private: + // This is a singleton class. Once constructed, we never destroy it. See the + // implementation note below. + ~Impl() = delete; + + // Delete all the values from a STL style associative container. + template + static void containerDeleteSeconds(Container* container) { + for (auto iter : *container) { + delete iter.second; + iter.second = 0; + } + } + + typedef hphp_hash_map ExportedCounterMap; + typedef hphp_hash_map ExportedTimeSeriesMap; + typedef hphp_hash_map ExportedHistogramMap; + + folly::Synchronized m_counterMap; + folly::Synchronized m_timeseriesMap; + folly::Synchronized m_histogramMap; +}; + +// Implementation note: +// +// Impl data structure is a singleton and globally accessible. We need to +// initialize it before anyone tries to use it. It is possible and likely that +// another statically initialized object will call methods on it to create +// counters. Therefore, we need Impl to be initialized statically before main() +// starts. Unfortunately, there is no initialization order guarantees for the +// statically and globally constructed objects. To get around that, we wrap the +// initialization in a function so s_impl will get initialized the first time it +// gets called. +// +// For the same reason, we need s_impl to be destructed after all other +// statically created objects may reference it in their destructor. We achieve +// that by *intentionally* creating the object on heap and never delete it. It's +// better to leak memory here than to have random crashes on shutdown. +static Impl& getServiceDataInstance() { + static Impl *s_impl = new Impl(); + return *s_impl; +} +// One problem with getServiceDataInstance() is that it's not thread safe. If +// two threads are accessing this function for the first time concurrently, we +// might end up creating two Impl object. We work around that by making sure we +// trigger this function statically before main() starts. +// +// Note that it's still possible for the race condition to happen if we are +// creating and starting threads statically before main() starts. If that +// happens, we'll have to wrap getServiceDataInstance around a pthread_once and +// pay some runtime synchronization cost. +const Impl& s_dummy = getServiceDataInstance(); + +} // namespace + +ExportedCounter* createCounter(const std::string& name) { + return getServiceDataInstance().createCounter(name); +} + +ExportedTimeSeries* createTimeseries( + const std::string& name, + const std::vector& types, + const std::vector& levels, + int numBuckets) { + return getServiceDataInstance().createTimeseries( + name, types, levels, numBuckets); +} + +ExportedHistogram* createHistogram( + const std::string& name, + int64_t bucketSize, + int64_t min, + int64_t max, + const std::vector& exportPercentile) { + return getServiceDataInstance().createHistogram( + name, bucketSize, min, max, exportPercentile); +} + +void exportAll(std::map& statsMap) { + return getServiceDataInstance().exportAll(statsMap); +} + +} // namespace ServiceData. + +////////////////////////////////////////////////////////////////////// +} diff --git a/hphp/util/service_data.h b/hphp/util/service_data.h new file mode 100644 index 000000000..4e6e86b61 --- /dev/null +++ b/hphp/util/service_data.h @@ -0,0 +1,229 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +#ifndef incl_HPHP_SERVICE_DATA_H_ +#define incl_HPHP_SERVICE_DATA_H_ + +#include +#include +#include +#include +#include +#include + +#include "folly/RWSpinLock.h" +#include "folly/Synchronized.h" +#include "folly/stats/Histogram.h" +#include "folly/stats/MultiLevelTimeSeries.h" + +namespace HPHP { +/////////////////////////////////////////////////////////////////////////////// + +/* + * A globally accessible statistics tracking facility. This can be used to keep + * track of internal runtime statistics in the form of flat counters, timeseries + * counters or histograms. + * + * ServiceData provides a globally accessible entry point to all the internal + * statistics. A 'statistic counter' of different types could be created by + * calling createCouter() createTimeseries() or createHistogram(). The caller + * can then add values at different time points to the statistic counters. The + * statistic can then be retrieved and reported via the exportAll() call on + * ServiceData. + * + * Thread safety: + * ============== + * All functions in ServiceData namespace are thread safe. It is safe + * (and recommended) to cache the object returned by create...() methods and + * repeatedly add data points to it. It is safe to call create...() with the + * same name from multiple threads. In this case, only one object will be + * created and passed back to different threads. + * + * All objects returned by returned by the various create...() calls are thread + * safe. It is okay to add data points to it from multiple threads concurrently. + * These objects are internally synchronized with spin locks. + * + * Example Usage: + * ============== + * // create a flat counter named foo. + * auto counter = ServiceData::createCouter("foo"); + * counter->increment(); + * + * // create timeseries data named bar with default setting (avg value for the + * // last 1 minute, 10 minute, hour and all time). + * auto timeseries = ServiceData::createTimeseries("bar"); + * timeseries->addValue(3); + * + * // create a histogram with 10 buckets, min of 1, max of 100 and export the + * // 50th and 90th percentile value for reporting. + * auto histogram = ServiceData::createHistogram("blah", 10, 1, 100, + * {0.5, 0.9}); + * histogram->addValue(10); + * + * // You can report the data like so. + * std::map statsMap; + * ServiceData::exportAll(statsMap); + * + * and statsMap will contain these keys: + * + * "foo" + * "bar.avg.60", "bar.avg.600", "bar.avg.3600", "bar.avg" + * "blah.hist.p50", "blah.hist.p90" + * + * Anti pattern: + * ============= + * ServiceData::createCounter("foo")->increment(); // don't do this. + * Don't do this in performance critical code. You will incur the cost of a + * std::map look up whenever createCounter() is called. Rather, you should call + * ServiceData::createCounter("foo") just once, cache the returned pointer and + * repeatedly adding data points to it. + */ +namespace ServiceData { + +class ExportedCounter; +class ExportedHistogram; +class ExportedTimeSeries; + +enum class StatsType { AVG, SUM, RATE, COUNT, PCT }; + +/* + * Create a flat counter named 'name'. Return an existing counter if it has + * already been created. + */ +ExportedCounter* createCounter(const std::string& name); + +/* + * Create a timeseries counter named 'name'. Return an existing one if it + * has already been created. + * + * Timeseries data is implemented as a number of buckets (buckted by time). + * As data point is added and time rolls forward, new bucket is created and + * the earliest bucket expires. + * + * We keep multiple of timeseries data at different granularity and update + * them simultaneously. This allows us to commute statistics at different + * levels. For example, we can simultaneously compute the avg of some counter + * value over the last 5 minutes, 10 minutes and hour. This is a similar + * concept to the load counters from the unix 'uptime' command. + * + * 'exportTypes' specifies what kind of statistics to export for each level. + * More export types can be added after the timeseries is created. + * + * 'levels' specifies at which granularity should the stats be tracked. The + * time duration must be strictly increasing. Special value '0' means all + * time and should always come last. + * + * 'numBuckets' specifies how many buckets to keep at each level. More buckets + * will produce more precise data at the expense of memory. + */ +ExportedTimeSeries* createTimeseries( + const std::string& name, + const std::vector& exportTypes = + std::vector{ StatsType::AVG }, + const std::vector& levels = + std::vector{ + std::chrono::seconds(60), + std::chrono::seconds(600), + std::chrono::seconds(3600), + std::chrono::seconds(0) /* all time */ }, + int numBuckets = 60); + +/* + * Create a histogram counter named 'name'. Return an existing one if it has + * already been created. + * + * 'bucketSize' specifies how many buckets to track for the histogram. + * 'min' is the minimal value in the histogram. + * 'max' is the maximal value in the histogram. + * 'exportPercentile' specifies at what percentile values we should report the + * stat. A set of doubles between 0 and 1.0. For example, 0.5 means p50 and + * 0.99 means p99. + */ +ExportedHistogram* createHistogram( + const std::string& name, + int64_t bucketSize, + int64_t min, + int64_t max, + const std::vector& exportPercentile); + +/* + * Export all the statistics as simple key, value pairs. + */ +void exportAll(std::map& statsMap); + +// Interface for a flat counter. All methods are thread safe. +class ExportedCounter { + public: + void increment() { m_value.fetch_add(1, std::memory_order_relaxed); } + void decrement() { m_value.fetch_sub(1, std::memory_order_relaxed); } + void setValue(int64_t value) { + m_value.store(value, std::memory_order_relaxed); + } + int64_t getValue() const { return m_value.load(std::memory_order_relaxed); } + + private: + std::atomic_int_fast64_t m_value; + + ~ExportedCounter() = delete; +}; + +// Interface for timeseries data. All methods are thread safe. +class ExportedTimeSeries { + public: + ExportedTimeSeries(int numBuckets, + const std::vector& durations, + const std::vector& exportTypes); + + void addValue(int64_t value); + void addValue(int64_t value, int64_t times); + void addValueAggregated(int64_t sum, int64_t nsamples); + + void exportAll(const std::string& prefix, + std::map& statsMap); + + private: + folly::Synchronized, + folly::RWSpinLock > m_timeseries; + const std::vector m_exportTypes; + + ~ExportedTimeSeries() = delete; +}; + +// Interface for histogram data. All methods are thread safe. +class ExportedHistogram { + public: + ExportedHistogram(int64_t bucketSize, int64_t min, int64_t max, + const std::vector& exportPercentiles); + void addValue(int64_t value); + void removeValue(int64_t value); + void exportAll(const std::string& prefix, + std::map& statsMap); + + private: + folly::Synchronized, folly::RWSpinLock> m_histogram; + const std::vector m_exportPercentiles; + + ~ExportedHistogram() = delete; +}; + +}; // namespace ServiceData + +/////////////////////////////////////////////////////////////////////////////// +} + +#include "hphp/util/service_data-inl.h" + +#endif // incl_HPHP_SERVICE_DATA_H_ diff --git a/hphp/util/test/service_data.cpp b/hphp/util/test/service_data.cpp new file mode 100644 index 000000000..f4020f151 --- /dev/null +++ b/hphp/util/test/service_data.cpp @@ -0,0 +1,123 @@ +/* + +----------------------------------------------------------------------+ + | HipHop for PHP | + +----------------------------------------------------------------------+ + | Copyright (c) 2010-2013 Facebook, Inc. (http://www.facebook.com) | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ +*/ + +#include "hphp/util/service_data.h" +#include "gtest/gtest.h" +namespace HPHP { + +TEST(ServiceDataTest, CounterTest) { + // Simple counter test. + auto counter = ServiceData::createCounter("c1"); + counter->increment(); + { + std::map values; + ServiceData::exportAll(values); + EXPECT_EQ(1, values["c1"]); + } + counter->increment(); + counter->increment(); + { + std::map values; + ServiceData::exportAll(values); + EXPECT_EQ(3, values["c1"]); + } + counter->setValue(0); + { + std::map values; + ServiceData::exportAll(values); + EXPECT_EQ(0, values["c1"]); + } + + // Multiple counters. + auto counter1 = ServiceData::createCounter("c2"); + counter->increment(); + counter1->setValue(5); + { + std::map values; + ServiceData::exportAll(values); + EXPECT_EQ(1, values["c1"]); + EXPECT_EQ(5, values["c2"]); + } + + // Multiple counter object to the same underlying counter. + auto counter2 = ServiceData::createCounter("c2"); + counter1->setValue(5); + counter2->increment(); + counter1->increment(); + { + std::map values; + ServiceData::exportAll(values); + EXPECT_EQ(7, values["c2"]); + } +} + +TEST(ServiceDataTest, TimeSeriesTest) { + auto statsType = { + ServiceData::StatsType::AVG, + ServiceData::StatsType::SUM, + ServiceData::StatsType::COUNT, + ServiceData::StatsType::RATE + }; + + auto ts = ServiceData::createTimeseries("foo", statsType); + ts->addValue(1); + ts->addValue(1); + + { + std::map values; + ServiceData::exportAll(values); + EXPECT_EQ(1, values["foo.avg"]); + EXPECT_EQ(1, values["foo.avg.60"]); + EXPECT_EQ(1, values["foo.avg.600"]); + EXPECT_EQ(1, values["foo.avg.3600"]); + + EXPECT_EQ(2, values["foo.sum"]); + EXPECT_EQ(2, values["foo.sum.60"]); + EXPECT_EQ(2, values["foo.sum.600"]); + EXPECT_EQ(2, values["foo.sum.3600"]); + + EXPECT_EQ(2, values["foo.count"]); + EXPECT_EQ(2, values["foo.count.60"]); + EXPECT_EQ(2, values["foo.count.600"]); + EXPECT_EQ(2, values["foo.count.3600"]); + + EXPECT_EQ(2, values["foo.rate"]); + EXPECT_EQ(2, values["foo.rate.60"]); + EXPECT_EQ(2, values["foo.rate.600"]); + EXPECT_EQ(2, values["foo.rate.3600"]); + } +} + +TEST(ServiceDataTest, Histogram) { + auto hist = ServiceData::createHistogram( + "foo", 1, 0, 100, + {0.05, 0.5, 0.75, 0.95}); + + for (int i = 0; i < 100; ++i) { + hist->addValue(i); + } + + { + std::map values; + ServiceData::exportAll(values); + EXPECT_EQ(5, values["foo.hist.p5"]); + EXPECT_EQ(50, values["foo.hist.p50"]); + EXPECT_EQ(75, values["foo.hist.p75"]); + EXPECT_EQ(95, values["foo.hist.p95"]); + } +} + +}