xref: /5.5.2/platform/include/platform/histogram.h (revision 9c83cef1)
1/* -*- Mode: C++; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/*
3 *     Copyright 2015 Couchbase, Inc
4 *
5 *   Licensed under the Apache License, Version 2.0 (the "License");
6 *   you may not use this file except in compliance with the License.
7 *   You may obtain a copy of the License at
8 *
9 *       http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *   Unless required by applicable law or agreed to in writing, software
12 *   distributed under the License is distributed on an "AS IS" BASIS,
13 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 *   See the License for the specific language governing permissions and
15 *   limitations under the License.
16 */
17
18#pragma once
19
20#include <platform/make_unique.h>
21#include <platform/platform.h>
22#include <platform/processclock.h>
23#include <relaxed_atomic.h>
24
25#include <algorithm>
26#include <chrono>
27#include <cmath>
28#include <functional>
29#include <iostream>
30#include <numeric>
31#include <vector>
32
33// Custom microseconds duration used for measuring histogram stats.
34// Stats will never be negative, so an unsigned Rep is used.
35using UnsignedMicroseconds = std::chrono::duration<uint64_t, std::micro>;
36
37namespace cb {
38// std::chrono::durations are not supported by std::numeric_limits by default.
39// This struct acts as a traits class for durations. It should be specialised
40// for individual durations and implement a required subset of methods from
41// std::numeric_limits
42template <typename T>
43struct duration_limits {};
44
45template <>
46struct duration_limits<UnsignedMicroseconds> {
47    static UnsignedMicroseconds max() {
48        return UnsignedMicroseconds::max();
49    }
50    static UnsignedMicroseconds min() {
51        return UnsignedMicroseconds::min();
52    }
53};
54}
55
56// Forward declaration.
57template <typename T, template <class> class Limits>
58class Histogram;
59
60/**
61 * An individual bin of histogram data.
62 *
63 * @tparam T type of the histogram bin value.
64 * @tparam Limits traits class which provides min() and max() static
65 *         member functions which specify the min and max value of T.
66 */
67template <typename T, template <class> class Limits = std::numeric_limits>
68class HistogramBin {
69public:
70
71    /**
72     * Get a HistogramBin covering [s, e)
73     */
74    HistogramBin(T s, T e)
75        : _count(0),
76          _start(s),
77          _end(e) { }
78
79    HistogramBin(const HistogramBin& other) = delete;
80
81    /**
82     * The starting value of this histogram bin (inclusive).
83     */
84    T start() const { return _start; }
85
86    /**
87     * The ending value of this histogram bin (exclusive).
88     */
89    T end() const { return _end; }
90
91    /**
92     * The count in this bin.
93     */
94    size_t count() const { return _count.load(); }
95
96private:
97    friend class Histogram<T, Limits>;
98
99    /**
100     * Increment this bin by the given amount.
101     */
102    void incr(size_t amount) {
103        _count.fetch_add(amount);
104    }
105
106    /**
107     * Set a specific value for this bin.
108     */
109    void set(size_t val) {
110        _count.store(val);
111    }
112
113    /**
114     * Does this bin contain the given value?
115     *
116     * @param value a value that may be within this bin's boundaries
117     * @return true if this value is counted within this bin
118     */
119    bool accepts(T value) {
120        return value >= _start && (value < _end || value == Limits<T>::max());
121    }
122
123    Couchbase::RelaxedAtomic<size_t> _count;
124    T _start;
125    T _end;
126};
127
128/**
129 * Helper function object to sum sample.
130 */
131template <typename T, template <class> class Limits>
132class HistogramBinSampleAdder {
133public:
134    HistogramBinSampleAdder() { }
135
136    size_t operator()(size_t n,
137                      const typename Histogram<T, Limits>::value_type& b) {
138        return n + b->count();
139    }
140};
141
142/**
143 * A bin generator that generates buckets of a width that may increase
144 * in size a fixed growth amount over iterations.
145 */
146template <typename T, template <class> class Limits = std::numeric_limits>
147class GrowingWidthGenerator {
148public:
149
150    /**
151     * Construct a growing width generator.
152     *
153     * @param start the starting point for this generator (inclusive)
154     * @param width the starting width for this generator
155     * @param growth how far the width should increase each time
156     */
157    GrowingWidthGenerator(T start, T width, double growth = 1.0)
158        : _growth(growth),
159          _start(start) {
160        // Dividing two durations returns a value of the underlying Rep.
161        _width = width / T(1);
162    }
163
164    /**
165     * Generate the next bin.
166     */
167    typename Histogram<T, Limits>::value_type operator()() {
168        auto rv = std::make_unique<typename Histogram<T, Limits>::bin_type>(
169                _start, _start + static_cast<T>(uint64_t(_width)));
170        _start += static_cast<T>(uint64_t(_width));
171        _width = _width * _growth;
172        return rv;
173    }
174
175private:
176    double _growth;
177    T _start;
178    double _width;
179};
180
181/**
182 * A bin generator that generates buckets from a sequence of T where
183 * each bin is from [v[n], v[n+1]).
184 */
185template <typename T, template <class> class Limits = std::numeric_limits>
186class FixedInputGenerator {
187public:
188
189    /**
190     * Get a FixedInputGenerator with the given sequence of bin starts.
191     */
192    FixedInputGenerator(std::vector<T>& input)
193        : it(input.begin()),
194          end(input.end()) { }
195
196    typename Histogram<T, Limits>::value_type operator()() {
197        if (it + 1 >= end) {
198            throw std::overflow_error("FixedInputGenerator::operator()"
199                                              "would overflow input sequence");
200        }
201        T current = *it;
202        ++it;
203        T next = *it;
204        return std::make_unique<typename Histogram<T, Limits>::bin_type>(
205                current, next);
206    }
207
208private:
209    typename std::vector<T>::iterator it;
210    const typename std::vector<T>::iterator end;
211};
212
213/**
214 * A bin generator that [n^i, n^(i+1)).
215 */
216template <typename T, template <class> class Limits = std::numeric_limits>
217class ExponentialGenerator {
218public:
219
220    /**
221     * Get a FixedInputGenerator with the given sequence of bin starts.
222     */
223    ExponentialGenerator(uint64_t start, double power)
224        : _start(start),
225          _power(power) { }
226
227    typename Histogram<T, Limits>::value_type operator()() {
228        T start = T(uint64_t(std::pow(_power, double(_start))));
229        T end = T(uint64_t(std::pow(_power, double(++_start))));
230        return std::make_unique<typename Histogram<T, Limits>::bin_type>(start,
231                                                                         end);
232    }
233
234private:
235    uint64_t _start;
236    double _power;
237};
238
239// UnsignedMicroseconds stream operator required for Histogram::verify()
240// so the type can be printed.
241inline std::ostream& operator<<(std::ostream& os,
242                                const UnsignedMicroseconds& ms) {
243    os << ms.count();
244    return os;
245}
246
247/**
248 * A Histogram.
249 */
250template <typename T, template <class> class Limits = std::numeric_limits>
251class Histogram {
252public:
253    using bin_type = HistogramBin<T, Limits>;
254    using value_type = std::unique_ptr<bin_type>;
255    using container_type = std::vector<value_type>;
256    using const_iterator = typename container_type::const_iterator;
257    using iterator = typename container_type::const_iterator;
258
259    /**
260     * Build a histogram.
261     *
262     * @param generator a generator for the bins within this bucket
263     * @param n how many bins this histogram should contain
264     */
265    template<typename G>
266    Histogram(const G& generator, size_t n = 30)
267        : bins(n) {
268        if (n < 1){
269            throw std::invalid_argument("Histogram must have at least 1 bin");
270        }
271        fill(generator);
272    }
273
274    /**
275     * Build a default histogram.
276     *
277     * @param n how many bins this histogram should contain.
278     */
279    Histogram(size_t n = 30)
280        : Histogram(ExponentialGenerator<T, Limits>(0, 2.0), n) {
281    }
282
283    //Deleted to avoid copying large objects
284    Histogram(const Histogram& other) = delete;
285
286    Histogram(Histogram&& other) = default;
287
288    Histogram& operator=(Histogram&& other) = default;
289
290    Histogram& operator=(const Histogram& other) = delete;
291
292    /**
293     * Add a value to this histogram.
294     *
295     * Note: defined non-inline to reduce the code bloat from recording
296     *       histogram values - the inline size of this method is non-trivial.
297     *       If you get want to add a new instantiation of this class; check
298     *       the explicit template definition in histogram.cc.
299     *
300     * @param amount the size of the thing being added
301     * @param count the quantity at this size being added
302     */
303    void add(T amount, size_t count = 1);
304
305    /**
306     * Get the bin servicing the given sized input.
307     */
308    const HistogramBin<T, Limits>* getBin(T amount) {
309        return findBin(amount)->get();
310    }
311
312    /**
313     * Set all bins to 0.
314     */
315    void reset();
316
317    /**
318     * Get the total number of samples counted.
319     *
320     * This is the sum of all counts in each bin.
321     */
322    size_t total();
323
324    /**
325     * Get an iterator from the beginning of a histogram bin.
326     */
327    const_iterator begin() const {
328        return bins.begin();
329    }
330
331    iterator begin() {
332        return bins.begin();
333    }
334
335    /**
336     * Get the iterator at the end of the histogram bin.
337     */
338    const_iterator end() const {
339        return bins.end();
340    }
341
342    iterator end() {
343        return bins.end();
344    }
345
346private:
347
348    template<typename G>
349    void fill(G& generator) {
350        std::generate(bins.begin(), bins.end(), generator);
351
352        // If there will not naturally be one, create a bin for the
353        // smallest possible value
354        if (bins.front()->start() > Limits<T>::min()) {
355            bins.insert(bins.begin(),
356                        std::make_unique<bin_type>(Limits<T>::min(),
357                                                   bins.front()->start()));
358        }
359
360        // Also create one reaching to the largest possible value
361        if (bins.back()->end() < Limits<T>::max()) {
362            bins.push_back(std::make_unique<bin_type>(bins.back()->end(),
363                                                      Limits<T>::max()));
364        }
365
366        verify();
367    }
368
369    // This validates that we're sorted and have no gaps or overlaps. Returns
370    // true if tests pass, else false.
371    bool verify();
372
373    /* Finds the bin containing the specified amount. Returns iterator to end
374     * () if not found
375     */
376    iterator findBin(T amount);
377
378    template <typename type, template <class> class limits>
379    friend std::ostream& operator<<(std::ostream& out,
380                                    const Histogram<type, limits>& b);
381
382    container_type bins;
383};
384
385/**
386 * Histogram of durations measured in microseconds.
387 *
388 * Typesafe; calling add() with any chrono::duration specialization (seconds,
389 * milliseconds etc) will perform any necessary conversion.
390 */
391using MicrosecondHistogram =
392        Histogram<UnsignedMicroseconds, cb::duration_limits>;
393
394/**
395 * Adapter class to assist in recording the duration of an event into
396 * MicrosecondHistograms; which don't have start() / stop() methods.
397 *
398 * This class will record the startTime when start() is called; then when end()
399 * is called it will calculate the duration and pass that to Histogram.
400 *
401 * @tparam Histogram Type to record the duration into.
402 *
403 */
404class PLATFORM_PUBLIC_API MicrosecondStopwatch {
405public:
406    MicrosecondStopwatch(MicrosecondHistogram& histogram)
407        : histogram(histogram) {
408    }
409
410    void start(ProcessClock::time_point start_) {
411        startTime = start_;
412    }
413
414    void stop(ProcessClock::time_point end);
415
416private:
417    MicrosecondHistogram& histogram;
418    ProcessClock::time_point startTime;
419};
420
421/**
422 * Times blocks automatically and records the values in a histogram.
423 *
424 * If THRESHOLD_MS is greater than zero, then any blocks taking longer than
425 * THRESHOLD_MS to execute will be reported to stderr.
426 * Note this requires that a name is specified for the BlockTimer.
427 */
428template<typename HISTOGRAM, uint64_t THRESHOLD_MS>
429class GenericBlockTimer {
430public:
431
432    /**
433     * Get a BlockTimer that will store its values in the given
434     * histogram.
435     *
436     * @param d the histogram that will hold the result.
437     *        If it is 'nullptr', the BlockTimer is disabled so that
438     *        both constructor and destructor will do nothing.
439     * @param n the name to give the histogram, used when logging slow block
440     *        execution to the log.
441     */
442    GenericBlockTimer(HISTOGRAM* d,
443                      const char* n = nullptr,
444                      std::ostream* o = nullptr)
445        : dest(d),
446          start((dest) ? ProcessClock::now() : ProcessClock::time_point()),
447          name(n),
448          out(o) {
449    }
450
451    ~GenericBlockTimer() {
452        if (dest) {
453            auto spent = ProcessClock::now() - start;
454            dest->add(std::chrono::duration_cast<std::chrono::microseconds>(spent));
455            log(spent, name, out);
456        }
457    }
458
459    static void log(ProcessClock::duration spent,
460                    const char* name,
461                    std::ostream* o) {
462        if (o && name) {
463            *o << name << "\t" << spent.count() << "\n";
464        }
465        if (THRESHOLD_MS > 0) {
466            const auto msec = std::make_unsigned<ProcessClock::rep>::type(std::chrono::duration_cast<std::chrono::milliseconds>(spent).count());
467            if (name != nullptr && msec > THRESHOLD_MS) {
468                std::cerr << "BlockTimer<" << name
469                          << "> Took too long: " << msec << "ms" << std::endl;
470            }
471        }
472    }
473
474private:
475    HISTOGRAM* dest;
476    ProcessClock::time_point start;
477    const char* name;
478    std::ostream* out;
479};
480
481/* Convenience specialization which only records in a MicrosecondHistogram;
482 * doesn't log slow blocks.
483 */
484typedef GenericBlockTimer<MicrosecondHistogram, 0> BlockTimer;
485
486// How to print a bin.
487template <typename T, template <class> class Limits>
488std::ostream& operator<<(std::ostream& out, const HistogramBin<T, Limits>& b) {
489    out << "[" << b.start() << ", " << b.end() << ") = " << b.count();
490    return out;
491}
492
493// How to print a histogram.
494template <typename T, template <class> class Limits>
495std::ostream& operator<<(std::ostream& out, const Histogram<T, Limits>& b) {
496    out << "{Histogram: ";
497    bool needComma(false);
498    for (const auto& bin : b.bins) {
499        if (needComma) {
500            out << ", ";
501        }
502        out << (*bin);
503        needComma = true;
504    }
505    out << "}";
506    return out;
507}
508