benchmark  1.6.1
benchmark.h
1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Support for registering benchmarks for functions.
16 
17 /* Example usage:
18 // Define a function that executes the code to be measured a
19 // specified number of times:
20 static void BM_StringCreation(benchmark::State& state) {
21  for (auto _ : state)
22  std::string empty_string;
23 }
24 
25 // Register the function as a benchmark
26 BENCHMARK(BM_StringCreation);
27 
28 // Define another benchmark
29 static void BM_StringCopy(benchmark::State& state) {
30  std::string x = "hello";
31  for (auto _ : state)
32  std::string copy(x);
33 }
34 BENCHMARK(BM_StringCopy);
35 
36 // Augment the main() program to invoke benchmarks if specified
37 // via the --benchmark_filter command line flag. E.g.,
38 // my_unittest --benchmark_filter=all
39 // my_unittest --benchmark_filter=BM_StringCreation
40 // my_unittest --benchmark_filter=String
41 // my_unittest --benchmark_filter='Copy|Creation'
42 int main(int argc, char** argv) {
43  benchmark::Initialize(&argc, argv);
44  benchmark::RunSpecifiedBenchmarks();
45  benchmark::Shutdown();
46  return 0;
47 }
48 
49 // Sometimes a family of microbenchmarks can be implemented with
50 // just one routine that takes an extra argument to specify which
51 // one of the family of benchmarks to run. For example, the following
52 // code defines a family of microbenchmarks for measuring the speed
53 // of memcpy() calls of different lengths:
54 
55 static void BM_memcpy(benchmark::State& state) {
56  char* src = new char[state.range(0)]; char* dst = new char[state.range(0)];
57  memset(src, 'x', state.range(0));
58  for (auto _ : state)
59  memcpy(dst, src, state.range(0));
60  state.SetBytesProcessed(state.iterations() * state.range(0));
61  delete[] src; delete[] dst;
62 }
63 BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
64 
65 // The preceding code is quite repetitive, and can be replaced with the
66 // following short-hand. The following invocation will pick a few
67 // appropriate arguments in the specified range and will generate a
68 // microbenchmark for each such argument.
69 BENCHMARK(BM_memcpy)->Range(8, 8<<10);
70 
71 // You might have a microbenchmark that depends on two inputs. For
72 // example, the following code defines a family of microbenchmarks for
73 // measuring the speed of set insertion.
74 static void BM_SetInsert(benchmark::State& state) {
75  set<int> data;
76  for (auto _ : state) {
77  state.PauseTiming();
78  data = ConstructRandomSet(state.range(0));
79  state.ResumeTiming();
80  for (int j = 0; j < state.range(1); ++j)
81  data.insert(RandomNumber());
82  }
83 }
84 BENCHMARK(BM_SetInsert)
85  ->Args({1<<10, 128})
86  ->Args({2<<10, 128})
87  ->Args({4<<10, 128})
88  ->Args({8<<10, 128})
89  ->Args({1<<10, 512})
90  ->Args({2<<10, 512})
91  ->Args({4<<10, 512})
92  ->Args({8<<10, 512});
93 
94 // The preceding code is quite repetitive, and can be replaced with
95 // the following short-hand. The following macro will pick a few
96 // appropriate arguments in the product of the two specified ranges
97 // and will generate a microbenchmark for each such pair.
98 BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}});
99 
100 // For more complex patterns of inputs, passing a custom function
101 // to Apply allows programmatic specification of an
102 // arbitrary set of arguments to run the microbenchmark on.
103 // The following example enumerates a dense range on
104 // one parameter, and a sparse range on the second.
105 static void CustomArguments(benchmark::internal::Benchmark* b) {
106  for (int i = 0; i <= 10; ++i)
107  for (int j = 32; j <= 1024*1024; j *= 8)
108  b->Args({i, j});
109 }
110 BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
111 
112 // Templated microbenchmarks work the same way:
113 // Produce then consume 'size' messages 'iters' times
114 // Measures throughput in the absence of multiprogramming.
115 template <class Q> int BM_Sequential(benchmark::State& state) {
116  Q q;
117  typename Q::value_type v;
118  for (auto _ : state) {
119  for (int i = state.range(0); i--; )
120  q.push(v);
121  for (int e = state.range(0); e--; )
122  q.Wait(&v);
123  }
124  // actually messages, not bytes:
125  state.SetBytesProcessed(state.iterations() * state.range(0));
126 }
127 BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
128 
129 Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
130 benchmark. This option overrides the `benchmark_min_time` flag.
131 
132 void BM_test(benchmark::State& state) {
133  ... body ...
134 }
135 BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds.
136 
137 In a multithreaded test, it is guaranteed that none of the threads will start
138 until all have reached the loop start, and all will have finished before any
139 thread exits the loop body. As such, any global setup or teardown you want to
140 do can be wrapped in a check against the thread index:
141 
142 static void BM_MultiThreaded(benchmark::State& state) {
143  if (state.thread_index() == 0) {
144  // Setup code here.
145  }
146  for (auto _ : state) {
147  // Run the test as normal.
148  }
149  if (state.thread_index() == 0) {
150  // Teardown code here.
151  }
152 }
153 BENCHMARK(BM_MultiThreaded)->Threads(4);
154 
155 
156 If a benchmark runs a few milliseconds it may be hard to visually compare the
157 measured times, since the output data is given in nanoseconds per default. In
158 order to manually set the time unit, you can specify it manually:
159 
160 BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
161 */
162 
163 #ifndef BENCHMARK_BENCHMARK_H_
164 #define BENCHMARK_BENCHMARK_H_
165 
166 // The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer.
167 #if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
168 #define BENCHMARK_HAS_CXX11
169 #endif
170 
171 // This _MSC_VER check should detect VS 2017 v15.3 and newer.
172 #if __cplusplus >= 201703L || \
173  (defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L)
174 #define BENCHMARK_HAS_CXX17
175 #endif
176 
177 #include <stdint.h>
178 
179 #include <algorithm>
180 #include <cassert>
181 #include <cstddef>
182 #include <iosfwd>
183 #include <limits>
184 #include <map>
185 #include <set>
186 #include <string>
187 #include <utility>
188 #include <vector>
189 
190 #if defined(BENCHMARK_HAS_CXX11)
191 #include <atomic>
192 #include <initializer_list>
193 #include <type_traits>
194 #include <utility>
195 #endif
196 
197 #if defined(_MSC_VER)
198 #include <intrin.h> // for _ReadWriteBarrier
199 #endif
200 
201 #ifndef BENCHMARK_HAS_CXX11
202 #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
203  TypeName(const TypeName&); \
204  TypeName& operator=(const TypeName&)
205 #else
206 #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
207  TypeName(const TypeName&) = delete; \
208  TypeName& operator=(const TypeName&) = delete
209 #endif
210 
211 #ifdef BENCHMARK_HAS_CXX17
212 #define BENCHMARK_UNUSED [[maybe_unused]]
213 #elif defined(__GNUC__) || defined(__clang__)
214 #define BENCHMARK_UNUSED __attribute__((unused))
215 #else
216 #define BENCHMARK_UNUSED
217 #endif
218 
219 #if defined(__GNUC__) || defined(__clang__)
220 #define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
221 #define BENCHMARK_NOEXCEPT noexcept
222 #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
223 #elif defined(_MSC_VER) && !defined(__clang__)
224 #define BENCHMARK_ALWAYS_INLINE __forceinline
225 #if _MSC_VER >= 1900
226 #define BENCHMARK_NOEXCEPT noexcept
227 #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
228 #else
229 #define BENCHMARK_NOEXCEPT
230 #define BENCHMARK_NOEXCEPT_OP(x)
231 #endif
232 #define __func__ __FUNCTION__
233 #else
234 #define BENCHMARK_ALWAYS_INLINE
235 #define BENCHMARK_NOEXCEPT
236 #define BENCHMARK_NOEXCEPT_OP(x)
237 #endif
238 
239 #define BENCHMARK_INTERNAL_TOSTRING2(x) #x
240 #define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x)
241 
242 // clang-format off
243 #if defined(__GNUC__) || defined(__clang__)
244 #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
245 #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
246 #define BENCHMARK_DISABLE_DEPRECATED_WARNING \
247  _Pragma("GCC diagnostic push") \
248  _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
249 #define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("GCC diagnostic pop")
250 #else
251 #define BENCHMARK_BUILTIN_EXPECT(x, y) x
252 #define BENCHMARK_DEPRECATED_MSG(msg)
253 #define BENCHMARK_WARNING_MSG(msg) \
254  __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \
255  __LINE__) ") : warning note: " msg))
256 #define BENCHMARK_DISABLE_DEPRECATED_WARNING
257 #define BENCHMARK_RESTORE_DEPRECATED_WARNING
258 #endif
259 // clang-format on
260 
261 #if defined(__GNUC__) && !defined(__clang__)
262 #define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
263 #endif
264 
265 #ifndef __has_builtin
266 #define __has_builtin(x) 0
267 #endif
268 
269 #if defined(__GNUC__) || __has_builtin(__builtin_unreachable)
270 #define BENCHMARK_UNREACHABLE() __builtin_unreachable()
271 #elif defined(_MSC_VER)
272 #define BENCHMARK_UNREACHABLE() __assume(false)
273 #else
274 #define BENCHMARK_UNREACHABLE() ((void)0)
275 #endif
276 
277 #ifdef BENCHMARK_HAS_CXX11
278 #define BENCHMARK_OVERRIDE override
279 #else
280 #define BENCHMARK_OVERRIDE
281 #endif
282 
283 namespace benchmark {
284 class BenchmarkReporter;
285 
286 void Initialize(int* argc, char** argv);
287 void Shutdown();
288 
289 // Report to stdout all arguments in 'argv' as unrecognized except the first.
290 // Returns true there is at least on unrecognized argument (i.e. 'argc' > 1).
291 bool ReportUnrecognizedArguments(int argc, char** argv);
292 
293 // Returns the current value of --benchmark_filter.
294 std::string GetBenchmarkFilter();
295 
296 // Generate a list of benchmarks matching the specified --benchmark_filter flag
297 // and if --benchmark_list_tests is specified return after printing the name
298 // of each matching benchmark. Otherwise run each matching benchmark and
299 // report the results.
300 //
301 // spec : Specify the benchmarks to run. If users do not specify this arg,
302 // then the value of FLAGS_benchmark_filter
303 // will be used.
304 //
305 // The second and third overload use the specified 'display_reporter' and
306 // 'file_reporter' respectively. 'file_reporter' will write to the file
307 // specified
308 // by '--benchmark_output'. If '--benchmark_output' is not given the
309 // 'file_reporter' is ignored.
310 //
311 // RETURNS: The number of matching benchmarks.
312 size_t RunSpecifiedBenchmarks();
313 size_t RunSpecifiedBenchmarks(std::string spec);
314 
315 size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter);
316 size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
317  std::string spec);
318 
319 size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
320  BenchmarkReporter* file_reporter);
321 size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
322  BenchmarkReporter* file_reporter,
323  std::string spec);
324 
325 // If a MemoryManager is registered (via RegisterMemoryManager()),
326 // it can be used to collect and report allocation metrics for a run of the
327 // benchmark.
329  public:
330  static const int64_t TombstoneValue;
331 
332  struct Result {
333  Result()
334  : num_allocs(0),
335  max_bytes_used(0),
336  total_allocated_bytes(TombstoneValue),
337  net_heap_growth(TombstoneValue) {}
338 
339  // The number of allocations made in total between Start and Stop.
340  int64_t num_allocs;
341 
342  // The peak memory use between Start and Stop.
343  int64_t max_bytes_used;
344 
345  // The total memory allocated, in bytes, between Start and Stop.
346  // Init'ed to TombstoneValue if metric not available.
347  int64_t total_allocated_bytes;
348 
349  // The net changes in memory, in bytes, between Start and Stop.
350  // ie., total_allocated_bytes - total_deallocated_bytes.
351  // Init'ed to TombstoneValue if metric not available.
352  int64_t net_heap_growth;
353  };
354 
355  virtual ~MemoryManager() {}
356 
357  // Implement this to start recording allocation information.
358  virtual void Start() = 0;
359 
360  // Implement this to stop recording and fill out the given Result structure.
361  BENCHMARK_DEPRECATED_MSG("Use Stop(Result&) instead")
362  virtual void Stop(Result* result) = 0;
363 
364  // FIXME(vyng): Make this pure virtual once we've migrated current users.
365  BENCHMARK_DISABLE_DEPRECATED_WARNING
366  virtual void Stop(Result& result) { Stop(&result); }
367  BENCHMARK_RESTORE_DEPRECATED_WARNING
368 };
369 
370 // Register a MemoryManager instance that will be used to collect and report
371 // allocation measurements for benchmark runs.
372 void RegisterMemoryManager(MemoryManager* memory_manager);
373 
374 // Add a key-value pair to output as part of the context stanza in the report.
375 void AddCustomContext(const std::string& key, const std::string& value);
376 
377 namespace internal {
378 class Benchmark;
379 class BenchmarkImp;
380 class BenchmarkFamilies;
381 
382 void UseCharPointer(char const volatile*);
383 
384 // Take ownership of the pointer and register the benchmark. Return the
385 // registered benchmark.
386 Benchmark* RegisterBenchmarkInternal(Benchmark*);
387 
388 // Ensure that the standard streams are properly initialized in every TU.
389 int InitializeStreams();
390 BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
391 
392 } // namespace internal
393 
394 #if (!defined(__GNUC__) && !defined(__clang__)) || defined(__pnacl__) || \
395  defined(__EMSCRIPTEN__)
396 #define BENCHMARK_HAS_NO_INLINE_ASSEMBLY
397 #endif
398 
399 // Force the compiler to flush pending writes to global memory. Acts as an
400 // effective read/write barrier
401 #ifdef BENCHMARK_HAS_CXX11
402 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
403  std::atomic_signal_fence(std::memory_order_acq_rel);
404 }
405 #endif
406 
407 // The DoNotOptimize(...) function can be used to prevent a value or
408 // expression from being optimized away by the compiler. This function is
409 // intended to add little to no overhead.
410 // See: https://youtu.be/nXaxk27zwlk?t=2441
411 #ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
412 template <class Tp>
413 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
414  asm volatile("" : : "r,m"(value) : "memory");
415 }
416 
417 template <class Tp>
418 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
419 #if defined(__clang__)
420  asm volatile("" : "+r,m"(value) : : "memory");
421 #else
422  asm volatile("" : "+m,r"(value) : : "memory");
423 #endif
424 }
425 
426 #ifndef BENCHMARK_HAS_CXX11
427 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
428  asm volatile("" : : : "memory");
429 }
430 #endif
431 #elif defined(_MSC_VER)
432 template <class Tp>
433 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
434  internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
435  _ReadWriteBarrier();
436 }
437 
438 #ifndef BENCHMARK_HAS_CXX11
439 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); }
440 #endif
441 #else
442 template <class Tp>
443 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
444  internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
445 }
446 // FIXME Add ClobberMemory() for non-gnu and non-msvc compilers, before C++11.
447 #endif
448 
449 // This class is used for user-defined counters.
450 class Counter {
451  public:
452  enum Flags {
453  kDefaults = 0,
454  // Mark the counter as a rate. It will be presented divided
455  // by the duration of the benchmark.
456  kIsRate = 1 << 0,
457  // Mark the counter as a thread-average quantity. It will be
458  // presented divided by the number of threads.
459  kAvgThreads = 1 << 1,
460  // Mark the counter as a thread-average rate. See above.
461  kAvgThreadsRate = kIsRate | kAvgThreads,
462  // Mark the counter as a constant value, valid/same for *every* iteration.
463  // When reporting, it will be *multiplied* by the iteration count.
464  kIsIterationInvariant = 1 << 2,
465  // Mark the counter as a constant rate.
466  // When reporting, it will be *multiplied* by the iteration count
467  // and then divided by the duration of the benchmark.
468  kIsIterationInvariantRate = kIsRate | kIsIterationInvariant,
469  // Mark the counter as a iteration-average quantity.
470  // It will be presented divided by the number of iterations.
471  kAvgIterations = 1 << 3,
472  // Mark the counter as a iteration-average rate. See above.
473  kAvgIterationsRate = kIsRate | kAvgIterations,
474 
475  // In the end, invert the result. This is always done last!
476  kInvert = 1 << 31
477  };
478 
479  enum OneK {
480  // 1'000 items per 1k
481  kIs1000 = 1000,
482  // 1'024 items per 1k
483  kIs1024 = 1024
484  };
485 
486  double value;
487  Flags flags;
488  OneK oneK;
489 
490  BENCHMARK_ALWAYS_INLINE
491  Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000)
492  : value(v), flags(f), oneK(k) {}
493 
494  BENCHMARK_ALWAYS_INLINE operator double const &() const { return value; }
495  BENCHMARK_ALWAYS_INLINE operator double&() { return value; }
496 };
497 
498 // A helper for user code to create unforeseen combinations of Flags, without
499 // having to do this cast manually each time, or providing this operator.
500 Counter::Flags inline operator|(const Counter::Flags& LHS,
501  const Counter::Flags& RHS) {
502  return static_cast<Counter::Flags>(static_cast<int>(LHS) |
503  static_cast<int>(RHS));
504 }
505 
506 // This is the container for the user-defined counters.
507 typedef std::map<std::string, Counter> UserCounters;
508 
509 // TimeUnit is passed to a benchmark in order to specify the order of magnitude
510 // for the measured time.
511 enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond };
512 
513 // BigO is passed to a benchmark in order to specify the asymptotic
514 // computational
515 // complexity for the benchmark. In case oAuto is selected, complexity will be
516 // calculated automatically to the best fit.
517 enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
518 
519 typedef uint64_t IterationCount;
520 
521 enum StatisticUnit { kTime, kPercentage };
522 
523 // BigOFunc is passed to a benchmark in order to specify the asymptotic
524 // computational complexity for the benchmark.
525 typedef double(BigOFunc)(IterationCount);
526 
527 // StatisticsFunc is passed to a benchmark in order to compute some descriptive
528 // statistics over all the measurements of some type
529 typedef double(StatisticsFunc)(const std::vector<double>&);
530 
531 namespace internal {
532 struct Statistics {
533  std::string name_;
534  StatisticsFunc* compute_;
535  StatisticUnit unit_;
536 
537  Statistics(const std::string& name, StatisticsFunc* compute,
538  StatisticUnit unit = kTime)
539  : name_(name), compute_(compute), unit_(unit) {}
540 };
541 
542 class BenchmarkInstance;
543 class ThreadTimer;
544 class ThreadManager;
546 
547 enum AggregationReportMode
548 #if defined(BENCHMARK_HAS_CXX11)
549  : unsigned
550 #else
551 #endif
552 {
553  // The mode has not been manually specified
554  ARM_Unspecified = 0,
555  // The mode is user-specified.
556  // This may or may not be set when the following bit-flags are set.
557  ARM_Default = 1U << 0U,
558  // File reporter should only output aggregates.
559  ARM_FileReportAggregatesOnly = 1U << 1U,
560  // Display reporter should only output aggregates
561  ARM_DisplayReportAggregatesOnly = 1U << 2U,
562  // Both reporters should only display aggregates.
563  ARM_ReportAggregatesOnly =
564  ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly
565 };
566 
567 } // namespace internal
568 
569 // State is passed to a running Benchmark and contains state for the
570 // benchmark to use.
571 class State {
572  public:
573  struct StateIterator;
574  friend struct StateIterator;
575 
576  // Returns iterators used to run each iteration of a benchmark using a
577  // C++11 ranged-based for loop. These functions should not be called directly.
578  //
579  // REQUIRES: The benchmark has not started running yet. Neither begin nor end
580  // have been called previously.
581  //
582  // NOTE: KeepRunning may not be used after calling either of these functions.
583  BENCHMARK_ALWAYS_INLINE StateIterator begin();
584  BENCHMARK_ALWAYS_INLINE StateIterator end();
585 
586  // Returns true if the benchmark should continue through another iteration.
587  // NOTE: A benchmark may not return from the test until KeepRunning() has
588  // returned false.
589  bool KeepRunning();
590 
591  // Returns true iff the benchmark should run n more iterations.
592  // REQUIRES: 'n' > 0.
593  // NOTE: A benchmark must not return from the test until KeepRunningBatch()
594  // has returned false.
595  // NOTE: KeepRunningBatch() may overshoot by up to 'n' iterations.
596  //
597  // Intended usage:
598  // while (state.KeepRunningBatch(1000)) {
599  // // process 1000 elements
600  // }
601  bool KeepRunningBatch(IterationCount n);
602 
603  // REQUIRES: timer is running and 'SkipWithError(...)' has not been called
604  // by the current thread.
605  // Stop the benchmark timer. If not called, the timer will be
606  // automatically stopped after the last iteration of the benchmark loop.
607  //
608  // For threaded benchmarks the PauseTiming() function only pauses the timing
609  // for the current thread.
610  //
611  // NOTE: The "real time" measurement is per-thread. If different threads
612  // report different measurements the largest one is reported.
613  //
614  // NOTE: PauseTiming()/ResumeTiming() are relatively
615  // heavyweight, and so their use should generally be avoided
616  // within each benchmark iteration, if possible.
617  void PauseTiming();
618 
619  // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called
620  // by the current thread.
621  // Start the benchmark timer. The timer is NOT running on entrance to the
622  // benchmark function. It begins running after control flow enters the
623  // benchmark loop.
624  //
625  // NOTE: PauseTiming()/ResumeTiming() are relatively
626  // heavyweight, and so their use should generally be avoided
627  // within each benchmark iteration, if possible.
628  void ResumeTiming();
629 
630  // REQUIRES: 'SkipWithError(...)' has not been called previously by the
631  // current thread.
632  // Report the benchmark as resulting in an error with the specified 'msg'.
633  // After this call the user may explicitly 'return' from the benchmark.
634  //
635  // If the ranged-for style of benchmark loop is used, the user must explicitly
636  // break from the loop, otherwise all future iterations will be run.
637  // If the 'KeepRunning()' loop is used the current thread will automatically
638  // exit the loop at the end of the current iteration.
639  //
640  // For threaded benchmarks only the current thread stops executing and future
641  // calls to `KeepRunning()` will block until all threads have completed
642  // the `KeepRunning()` loop. If multiple threads report an error only the
643  // first error message is used.
644  //
645  // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
646  // the current scope immediately. If the function is called from within
647  // the 'KeepRunning()' loop the current iteration will finish. It is the users
648  // responsibility to exit the scope as needed.
649  void SkipWithError(const char* msg);
650 
651  // Returns true if an error has been reported with 'SkipWithError(...)'.
652  bool error_occurred() const { return error_occurred_; }
653 
654  // REQUIRES: called exactly once per iteration of the benchmarking loop.
655  // Set the manually measured time for this benchmark iteration, which
656  // is used instead of automatically measured time if UseManualTime() was
657  // specified.
658  //
659  // For threaded benchmarks the final value will be set to the largest
660  // reported values.
661  void SetIterationTime(double seconds);
662 
663  // Set the number of bytes processed by the current benchmark
664  // execution. This routine is typically called once at the end of a
665  // throughput oriented benchmark.
666  //
667  // REQUIRES: a benchmark has exited its benchmarking loop.
668  BENCHMARK_ALWAYS_INLINE
669  void SetBytesProcessed(int64_t bytes) {
670  counters["bytes_per_second"] =
671  Counter(static_cast<double>(bytes), Counter::kIsRate, Counter::kIs1024);
672  }
673 
674  BENCHMARK_ALWAYS_INLINE
675  int64_t bytes_processed() const {
676  if (counters.find("bytes_per_second") != counters.end())
677  return static_cast<int64_t>(counters.at("bytes_per_second"));
678  return 0;
679  }
680 
681  // If this routine is called with complexity_n > 0 and complexity report is
682  // requested for the
683  // family benchmark, then current benchmark will be part of the computation
684  // and complexity_n will
685  // represent the length of N.
686  BENCHMARK_ALWAYS_INLINE
687  void SetComplexityN(int64_t complexity_n) { complexity_n_ = complexity_n; }
688 
689  BENCHMARK_ALWAYS_INLINE
690  int64_t complexity_length_n() const { return complexity_n_; }
691 
692  // If this routine is called with items > 0, then an items/s
693  // label is printed on the benchmark report line for the currently
694  // executing benchmark. It is typically called at the end of a processing
695  // benchmark where a processing items/second output is desired.
696  //
697  // REQUIRES: a benchmark has exited its benchmarking loop.
698  BENCHMARK_ALWAYS_INLINE
699  void SetItemsProcessed(int64_t items) {
700  counters["items_per_second"] =
701  Counter(static_cast<double>(items), benchmark::Counter::kIsRate);
702  }
703 
704  BENCHMARK_ALWAYS_INLINE
705  int64_t items_processed() const {
706  if (counters.find("items_per_second") != counters.end())
707  return static_cast<int64_t>(counters.at("items_per_second"));
708  return 0;
709  }
710 
711  // If this routine is called, the specified label is printed at the
712  // end of the benchmark report line for the currently executing
713  // benchmark. Example:
714  // static void BM_Compress(benchmark::State& state) {
715  // ...
716  // double compress = input_size / output_size;
717  // state.SetLabel(StrFormat("compress:%.1f%%", 100.0*compression));
718  // }
719  // Produces output that looks like:
720  // BM_Compress 50 50 14115038 compress:27.3%
721  //
722  // REQUIRES: a benchmark has exited its benchmarking loop.
723  void SetLabel(const char* label);
724 
725  void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) {
726  this->SetLabel(str.c_str());
727  }
728 
729  // Range arguments for this run. CHECKs if the argument has been set.
730  BENCHMARK_ALWAYS_INLINE
731  int64_t range(std::size_t pos = 0) const {
732  assert(range_.size() > pos);
733  return range_[pos];
734  }
735 
736  BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead")
737  int64_t range_x() const { return range(0); }
738 
739  BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead")
740  int64_t range_y() const { return range(1); }
741 
742  // Number of threads concurrently executing the benchmark.
743  BENCHMARK_ALWAYS_INLINE
744  int threads() const { return threads_; }
745 
746  // Index of the executing thread. Values from [0, threads).
747  BENCHMARK_ALWAYS_INLINE
748  int thread_index() const { return thread_index_; }
749 
750  BENCHMARK_ALWAYS_INLINE
751  IterationCount iterations() const {
752  if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
753  return 0;
754  }
755  return max_iterations - total_iterations_ + batch_leftover_;
756  }
757 
758  private:
759  // items we expect on the first cache line (ie 64 bytes of the struct)
760  // When total_iterations_ is 0, KeepRunning() and friends will return false.
761  // May be larger than max_iterations.
762  IterationCount total_iterations_;
763 
764  // When using KeepRunningBatch(), batch_leftover_ holds the number of
765  // iterations beyond max_iters that were run. Used to track
766  // completed_iterations_ accurately.
767  IterationCount batch_leftover_;
768 
769  public:
770  const IterationCount max_iterations;
771 
772  private:
773  bool started_;
774  bool finished_;
775  bool error_occurred_;
776 
777  // items we don't need on the first cache line
778  std::vector<int64_t> range_;
779 
780  int64_t complexity_n_;
781 
782  public:
783  // Container for user-defined counters.
784  UserCounters counters;
785 
786  private:
787  State(IterationCount max_iters, const std::vector<int64_t>& ranges,
788  int thread_i, int n_threads, internal::ThreadTimer* timer,
789  internal::ThreadManager* manager,
790  internal::PerfCountersMeasurement* perf_counters_measurement);
791 
792  void StartKeepRunning();
793  // Implementation of KeepRunning() and KeepRunningBatch().
794  // is_batch must be true unless n is 1.
795  bool KeepRunningInternal(IterationCount n, bool is_batch);
796  void FinishKeepRunning();
797 
798  const int thread_index_;
799  const int threads_;
800 
801  internal::ThreadTimer* const timer_;
802  internal::ThreadManager* const manager_;
803  internal::PerfCountersMeasurement* const perf_counters_measurement_;
804 
805  friend class internal::BenchmarkInstance;
806 };
807 
808 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() {
809  return KeepRunningInternal(1, /*is_batch=*/false);
810 }
811 
812 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningBatch(IterationCount n) {
813  return KeepRunningInternal(n, /*is_batch=*/true);
814 }
815 
816 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(IterationCount n,
817  bool is_batch) {
818  // total_iterations_ is set to 0 by the constructor, and always set to a
819  // nonzero value by StartKepRunning().
820  assert(n > 0);
821  // n must be 1 unless is_batch is true.
822  assert(is_batch || n == 1);
823  if (BENCHMARK_BUILTIN_EXPECT(total_iterations_ >= n, true)) {
824  total_iterations_ -= n;
825  return true;
826  }
827  if (!started_) {
828  StartKeepRunning();
829  if (!error_occurred_ && total_iterations_ >= n) {
830  total_iterations_ -= n;
831  return true;
832  }
833  }
834  // For non-batch runs, total_iterations_ must be 0 by now.
835  if (is_batch && total_iterations_ != 0) {
836  batch_leftover_ = n - total_iterations_;
837  total_iterations_ = 0;
838  return true;
839  }
840  FinishKeepRunning();
841  return false;
842 }
843 
845  struct BENCHMARK_UNUSED Value {};
846  typedef std::forward_iterator_tag iterator_category;
847  typedef Value value_type;
848  typedef Value reference;
849  typedef Value pointer;
850  typedef std::ptrdiff_t difference_type;
851 
852  private:
853  friend class State;
854  BENCHMARK_ALWAYS_INLINE
855  StateIterator() : cached_(0), parent_() {}
856 
857  BENCHMARK_ALWAYS_INLINE
858  explicit StateIterator(State* st)
859  : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {}
860 
861  public:
862  BENCHMARK_ALWAYS_INLINE
863  Value operator*() const { return Value(); }
864 
865  BENCHMARK_ALWAYS_INLINE
866  StateIterator& operator++() {
867  assert(cached_ > 0);
868  --cached_;
869  return *this;
870  }
871 
872  BENCHMARK_ALWAYS_INLINE
873  bool operator!=(StateIterator const&) const {
874  if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true;
875  parent_->FinishKeepRunning();
876  return false;
877  }
878 
879  private:
880  IterationCount cached_;
881  State* const parent_;
882 };
883 
884 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::begin() {
885  return StateIterator(this);
886 }
887 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() {
888  StartKeepRunning();
889  return StateIterator();
890 }
891 
892 namespace internal {
893 
894 typedef void(Function)(State&);
895 
896 // ------------------------------------------------------
897 // Benchmark registration object. The BENCHMARK() macro expands
898 // into an internal::Benchmark* object. Various methods can
899 // be called on this object to change the properties of the benchmark.
900 // Each method returns "this" so that multiple method calls can
901 // chained into one expression.
902 class Benchmark {
903  public:
904  virtual ~Benchmark();
905 
906  // Note: the following methods all return "this" so that multiple
907  // method calls can be chained together in one expression.
908 
909  // Specify the name of the benchmark
910  Benchmark* Name(const std::string& name);
911 
912  // Run this benchmark once with "x" as the extra argument passed
913  // to the function.
914  // REQUIRES: The function passed to the constructor must accept an arg1.
915  Benchmark* Arg(int64_t x);
916 
917  // Run this benchmark with the given time unit for the generated output report
918  Benchmark* Unit(TimeUnit unit);
919 
920  // Run this benchmark once for a number of values picked from the
921  // range [start..limit]. (start and limit are always picked.)
922  // REQUIRES: The function passed to the constructor must accept an arg1.
923  Benchmark* Range(int64_t start, int64_t limit);
924 
925  // Run this benchmark once for all values in the range [start..limit] with
926  // specific step
927  // REQUIRES: The function passed to the constructor must accept an arg1.
928  Benchmark* DenseRange(int64_t start, int64_t limit, int step = 1);
929 
930  // Run this benchmark once with "args" as the extra arguments passed
931  // to the function.
932  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
933  Benchmark* Args(const std::vector<int64_t>& args);
934 
935  // Equivalent to Args({x, y})
936  // NOTE: This is a legacy C++03 interface provided for compatibility only.
937  // New code should use 'Args'.
938  Benchmark* ArgPair(int64_t x, int64_t y) {
939  std::vector<int64_t> args;
940  args.push_back(x);
941  args.push_back(y);
942  return Args(args);
943  }
944 
945  // Run this benchmark once for a number of values picked from the
946  // ranges [start..limit]. (starts and limits are always picked.)
947  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
948  Benchmark* Ranges(const std::vector<std::pair<int64_t, int64_t> >& ranges);
949 
950  // Run this benchmark once for each combination of values in the (cartesian)
951  // product of the supplied argument lists.
952  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
953  Benchmark* ArgsProduct(const std::vector<std::vector<int64_t> >& arglists);
954 
955  // Equivalent to ArgNames({name})
956  Benchmark* ArgName(const std::string& name);
957 
958  // Set the argument names to display in the benchmark name. If not called,
959  // only argument values will be shown.
960  Benchmark* ArgNames(const std::vector<std::string>& names);
961 
962  // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}).
963  // NOTE: This is a legacy C++03 interface provided for compatibility only.
964  // New code should use 'Ranges'.
965  Benchmark* RangePair(int64_t lo1, int64_t hi1, int64_t lo2, int64_t hi2) {
966  std::vector<std::pair<int64_t, int64_t> > ranges;
967  ranges.push_back(std::make_pair(lo1, hi1));
968  ranges.push_back(std::make_pair(lo2, hi2));
969  return Ranges(ranges);
970  }
971 
972  // Have "setup" and/or "teardown" invoked once for every benchmark run.
973  // If the benchmark is multi-threaded (will run in k threads concurrently),
974  // the setup callback will be be invoked exactly once (not k times) before
975  // each run with k threads. Time allowing (e.g. for a short benchmark), there
976  // may be multiple such runs per benchmark, each run with its own
977  // "setup"/"teardown".
978  //
979  // If the benchmark uses different size groups of threads (e.g. via
980  // ThreadRange), the above will be true for each size group.
981  //
982  // The callback will be passed a State object, which includes the number
983  // of threads, thread-index, benchmark arguments, etc.
984  //
985  // The callback must not be NULL or self-deleting.
986  Benchmark* Setup(void (*setup)(const benchmark::State&));
987  Benchmark* Teardown(void (*teardown)(const benchmark::State&));
988 
989  // Pass this benchmark object to *func, which can customize
990  // the benchmark by calling various methods like Arg, Args,
991  // Threads, etc.
992  Benchmark* Apply(void (*func)(Benchmark* benchmark));
993 
994  // Set the range multiplier for non-dense range. If not called, the range
995  // multiplier kRangeMultiplier will be used.
996  Benchmark* RangeMultiplier(int multiplier);
997 
998  // Set the minimum amount of time to use when running this benchmark. This
999  // option overrides the `benchmark_min_time` flag.
1000  // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
1001  Benchmark* MinTime(double t);
1002 
1003  // Specify the amount of iterations that should be run by this benchmark.
1004  // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
1005  //
1006  // NOTE: This function should only be used when *exact* iteration control is
1007  // needed and never to control or limit how long a benchmark runs, where
1008  // `--benchmark_min_time=N` or `MinTime(...)` should be used instead.
1009  Benchmark* Iterations(IterationCount n);
1010 
1011  // Specify the amount of times to repeat this benchmark. This option overrides
1012  // the `benchmark_repetitions` flag.
1013  // REQUIRES: `n > 0`
1014  Benchmark* Repetitions(int n);
1015 
1016  // Specify if each repetition of the benchmark should be reported separately
1017  // or if only the final statistics should be reported. If the benchmark
1018  // is not repeated then the single result is always reported.
1019  // Applies to *ALL* reporters (display and file).
1020  Benchmark* ReportAggregatesOnly(bool value = true);
1021 
1022  // Same as ReportAggregatesOnly(), but applies to display reporter only.
1023  Benchmark* DisplayAggregatesOnly(bool value = true);
1024 
1025  // By default, the CPU time is measured only for the main thread, which may
1026  // be unrepresentative if the benchmark uses threads internally. If called,
1027  // the total CPU time spent by all the threads will be measured instead.
1028  // By default, the only the main thread CPU time will be measured.
1029  Benchmark* MeasureProcessCPUTime();
1030 
1031  // If a particular benchmark should use the Wall clock instead of the CPU time
1032  // (be it either the CPU time of the main thread only (default), or the
1033  // total CPU usage of the benchmark), call this method. If called, the elapsed
1034  // (wall) time will be used to control how many iterations are run, and in the
1035  // printing of items/second or MB/seconds values.
1036  // If not called, the CPU time used by the benchmark will be used.
1037  Benchmark* UseRealTime();
1038 
1039  // If a benchmark must measure time manually (e.g. if GPU execution time is
1040  // being
1041  // measured), call this method. If called, each benchmark iteration should
1042  // call
1043  // SetIterationTime(seconds) to report the measured time, which will be used
1044  // to control how many iterations are run, and in the printing of items/second
1045  // or MB/second values.
1046  Benchmark* UseManualTime();
1047 
1048  // Set the asymptotic computational complexity for the benchmark. If called
1049  // the asymptotic computational complexity will be shown on the output.
1050  Benchmark* Complexity(BigO complexity = benchmark::oAuto);
1051 
1052  // Set the asymptotic computational complexity for the benchmark. If called
1053  // the asymptotic computational complexity will be shown on the output.
1054  Benchmark* Complexity(BigOFunc* complexity);
1055 
1056  // Add this statistics to be computed over all the values of benchmark run
1057  Benchmark* ComputeStatistics(const std::string& name,
1058  StatisticsFunc* statistics,
1059  StatisticUnit unit = kTime);
1060 
1061  // Support for running multiple copies of the same benchmark concurrently
1062  // in multiple threads. This may be useful when measuring the scaling
1063  // of some piece of code.
1064 
1065  // Run one instance of this benchmark concurrently in t threads.
1066  Benchmark* Threads(int t);
1067 
1068  // Pick a set of values T from [min_threads,max_threads].
1069  // min_threads and max_threads are always included in T. Run this
1070  // benchmark once for each value in T. The benchmark run for a
1071  // particular value t consists of t threads running the benchmark
1072  // function concurrently. For example, consider:
1073  // BENCHMARK(Foo)->ThreadRange(1,16);
1074  // This will run the following benchmarks:
1075  // Foo in 1 thread
1076  // Foo in 2 threads
1077  // Foo in 4 threads
1078  // Foo in 8 threads
1079  // Foo in 16 threads
1080  Benchmark* ThreadRange(int min_threads, int max_threads);
1081 
1082  // For each value n in the range, run this benchmark once using n threads.
1083  // min_threads and max_threads are always included in the range.
1084  // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts
1085  // a benchmark with 1, 4, 7 and 8 threads.
1086  Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1);
1087 
1088  // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
1089  Benchmark* ThreadPerCpu();
1090 
1091  virtual void Run(State& state) = 0;
1092 
1093  protected:
1094  explicit Benchmark(const char* name);
1095  Benchmark(Benchmark const&);
1096  void SetName(const char* name);
1097 
1098  int ArgsCnt() const;
1099 
1100  private:
1101  friend class BenchmarkFamilies;
1102  friend class BenchmarkInstance;
1103 
1104  std::string name_;
1105  AggregationReportMode aggregation_report_mode_;
1106  std::vector<std::string> arg_names_; // Args for all benchmark runs
1107  std::vector<std::vector<int64_t> > args_; // Args for all benchmark runs
1108  TimeUnit time_unit_;
1109  int range_multiplier_;
1110  double min_time_;
1111  IterationCount iterations_;
1112  int repetitions_;
1113  bool measure_process_cpu_time_;
1114  bool use_real_time_;
1115  bool use_manual_time_;
1116  BigO complexity_;
1117  BigOFunc* complexity_lambda_;
1118  std::vector<Statistics> statistics_;
1119  std::vector<int> thread_counts_;
1120 
1121  typedef void (*callback_function)(const benchmark::State&);
1122  callback_function setup_;
1123  callback_function teardown_;
1124 
1125  Benchmark& operator=(Benchmark const&);
1126 };
1127 
1128 } // namespace internal
1129 
1130 // Create and register a benchmark with the specified 'name' that invokes
1131 // the specified functor 'fn'.
1132 //
1133 // RETURNS: A pointer to the registered benchmark.
1134 internal::Benchmark* RegisterBenchmark(const char* name,
1135  internal::Function* fn);
1136 
1137 #if defined(BENCHMARK_HAS_CXX11)
1138 template <class Lambda>
1139 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn);
1140 #endif
1141 
1142 // Remove all registered benchmarks. All pointers to previously registered
1143 // benchmarks are invalidated.
1144 void ClearRegisteredBenchmarks();
1145 
1146 namespace internal {
1147 // The class used to hold all Benchmarks created from static function.
1148 // (ie those created using the BENCHMARK(...) macros.
1150  public:
1151  FunctionBenchmark(const char* name, Function* func)
1152  : Benchmark(name), func_(func) {}
1153 
1154  virtual void Run(State& st) BENCHMARK_OVERRIDE;
1155 
1156  private:
1157  Function* func_;
1158 };
1159 
1160 #ifdef BENCHMARK_HAS_CXX11
1161 template <class Lambda>
1162 class LambdaBenchmark : public Benchmark {
1163  public:
1164  virtual void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); }
1165 
1166  private:
1167  template <class OLambda>
1168  LambdaBenchmark(const char* name, OLambda&& lam)
1169  : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
1170 
1171  LambdaBenchmark(LambdaBenchmark const&) = delete;
1172 
1173  template <class Lam> // NOLINTNEXTLINE(readability-redundant-declaration)
1174  friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&);
1175 
1176  Lambda lambda_;
1177 };
1178 #endif
1179 
1180 } // namespace internal
1181 
1182 inline internal::Benchmark* RegisterBenchmark(const char* name,
1183  internal::Function* fn) {
1184  return internal::RegisterBenchmarkInternal(
1185  ::new internal::FunctionBenchmark(name, fn));
1186 }
1187 
1188 #ifdef BENCHMARK_HAS_CXX11
1189 template <class Lambda>
1190 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
1191  using BenchType =
1192  internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
1193  return internal::RegisterBenchmarkInternal(
1194  ::new BenchType(name, std::forward<Lambda>(fn)));
1195 }
1196 #endif
1197 
1198 #if defined(BENCHMARK_HAS_CXX11) && \
1199  (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
1200 template <class Lambda, class... Args>
1201 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn,
1202  Args&&... args) {
1203  return benchmark::RegisterBenchmark(
1204  name, [=](benchmark::State& st) { fn(st, args...); });
1205 }
1206 #else
1207 #define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
1208 #endif
1209 
1210 // The base class for all fixture tests.
1212  public:
1213  Fixture() : internal::Benchmark("") {}
1214 
1215  virtual void Run(State& st) BENCHMARK_OVERRIDE {
1216  this->SetUp(st);
1217  this->BenchmarkCase(st);
1218  this->TearDown(st);
1219  }
1220 
1221  // These will be deprecated ...
1222  virtual void SetUp(const State&) {}
1223  virtual void TearDown(const State&) {}
1224  // ... In favor of these.
1225  virtual void SetUp(State& st) { SetUp(const_cast<const State&>(st)); }
1226  virtual void TearDown(State& st) { TearDown(const_cast<const State&>(st)); }
1227 
1228  protected:
1229  virtual void BenchmarkCase(State&) = 0;
1230 };
1231 
1232 } // namespace benchmark
1233 
1234 // ------------------------------------------------------
1235 // Macro to register benchmarks
1236 
1237 // Check that __COUNTER__ is defined and that __COUNTER__ increases by 1
1238 // every time it is expanded. X + 1 == X + 0 is used in case X is defined to be
1239 // empty. If X is empty the expression becomes (+1 == +0).
1240 #if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0)
1241 #define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__
1242 #else
1243 #define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__
1244 #endif
1245 
1246 // Helpers for generating unique variable names
1247 #ifdef BENCHMARK_HAS_CXX11
1248 #define BENCHMARK_PRIVATE_NAME(...) \
1249  BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, \
1250  __VA_ARGS__)
1251 #else
1252 #define BENCHMARK_PRIVATE_NAME(n) \
1253  BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
1254 #endif // BENCHMARK_HAS_CXX11
1255 
1256 #define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
1257 #define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
1258 // Helper for concatenation with macro name expansion
1259 #define BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method) \
1260  BaseClass##_##Method##_Benchmark
1261 
1262 #define BENCHMARK_PRIVATE_DECLARE(n) \
1263  static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \
1264  BENCHMARK_UNUSED
1265 
1266 #ifdef BENCHMARK_HAS_CXX11
1267 #define BENCHMARK(...) \
1268  BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \
1269  (::benchmark::internal::RegisterBenchmarkInternal( \
1270  new ::benchmark::internal::FunctionBenchmark(#__VA_ARGS__, \
1271  &__VA_ARGS__)))
1272 #else
1273 #define BENCHMARK(n) \
1274  BENCHMARK_PRIVATE_DECLARE(n) = \
1275  (::benchmark::internal::RegisterBenchmarkInternal( \
1276  new ::benchmark::internal::FunctionBenchmark(#n, n)))
1277 #endif // BENCHMARK_HAS_CXX11
1278 
1279 // Old-style macros
1280 #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
1281 #define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)})
1282 #define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
1283 #define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
1284 #define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
1285  BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}})
1286 
1287 #ifdef BENCHMARK_HAS_CXX11
1288 
1289 // Register a benchmark which invokes the function specified by `func`
1290 // with the additional arguments specified by `...`.
1291 //
1292 // For example:
1293 //
1294 // template <class ...ExtraArgs>`
1295 // void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
1296 // [...]
1297 //}
1298 // /* Registers a benchmark named "BM_takes_args/int_string_test` */
1299 // BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
1300 #define BENCHMARK_CAPTURE(func, test_case_name, ...) \
1301  BENCHMARK_PRIVATE_DECLARE(func) = \
1302  (::benchmark::internal::RegisterBenchmarkInternal( \
1303  new ::benchmark::internal::FunctionBenchmark( \
1304  #func "/" #test_case_name, \
1305  [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
1306 
1307 #endif // BENCHMARK_HAS_CXX11
1308 
1309 // This will register a benchmark for a templatized function. For example:
1310 //
1311 // template<int arg>
1312 // void BM_Foo(int iters);
1313 //
1314 // BENCHMARK_TEMPLATE(BM_Foo, 1);
1315 //
1316 // will register BM_Foo<1> as a benchmark.
1317 #define BENCHMARK_TEMPLATE1(n, a) \
1318  BENCHMARK_PRIVATE_DECLARE(n) = \
1319  (::benchmark::internal::RegisterBenchmarkInternal( \
1320  new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n<a>)))
1321 
1322 #define BENCHMARK_TEMPLATE2(n, a, b) \
1323  BENCHMARK_PRIVATE_DECLARE(n) = \
1324  (::benchmark::internal::RegisterBenchmarkInternal( \
1325  new ::benchmark::internal::FunctionBenchmark(#n "<" #a "," #b ">", \
1326  n<a, b>)))
1327 
1328 #ifdef BENCHMARK_HAS_CXX11
1329 #define BENCHMARK_TEMPLATE(n, ...) \
1330  BENCHMARK_PRIVATE_DECLARE(n) = \
1331  (::benchmark::internal::RegisterBenchmarkInternal( \
1332  new ::benchmark::internal::FunctionBenchmark( \
1333  #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)))
1334 #else
1335 #define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
1336 #endif
1337 
1338 #define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1339  class BaseClass##_##Method##_Benchmark : public BaseClass { \
1340  public: \
1341  BaseClass##_##Method##_Benchmark() { \
1342  this->SetName(#BaseClass "/" #Method); \
1343  } \
1344  \
1345  protected: \
1346  virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1347  };
1348 
1349 #define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1350  class BaseClass##_##Method##_Benchmark : public BaseClass<a> { \
1351  public: \
1352  BaseClass##_##Method##_Benchmark() { \
1353  this->SetName(#BaseClass "<" #a ">/" #Method); \
1354  } \
1355  \
1356  protected: \
1357  virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1358  };
1359 
1360 #define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1361  class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> { \
1362  public: \
1363  BaseClass##_##Method##_Benchmark() { \
1364  this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \
1365  } \
1366  \
1367  protected: \
1368  virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1369  };
1370 
1371 #ifdef BENCHMARK_HAS_CXX11
1372 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...) \
1373  class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \
1374  public: \
1375  BaseClass##_##Method##_Benchmark() { \
1376  this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); \
1377  } \
1378  \
1379  protected: \
1380  virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1381  };
1382 #else
1383 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \
1384  BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(n, a)
1385 #endif
1386 
1387 #define BENCHMARK_DEFINE_F(BaseClass, Method) \
1388  BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1389  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1390 
1391 #define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) \
1392  BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1393  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1394 
1395 #define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b) \
1396  BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1397  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1398 
1399 #ifdef BENCHMARK_HAS_CXX11
1400 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...) \
1401  BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1402  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1403 #else
1404 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, a) \
1405  BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a)
1406 #endif
1407 
1408 #define BENCHMARK_REGISTER_F(BaseClass, Method) \
1409  BENCHMARK_PRIVATE_REGISTER_F(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method))
1410 
1411 #define BENCHMARK_PRIVATE_REGISTER_F(TestName) \
1412  BENCHMARK_PRIVATE_DECLARE(TestName) = \
1413  (::benchmark::internal::RegisterBenchmarkInternal(new TestName()))
1414 
1415 // This macro will define and register a benchmark within a fixture class.
1416 #define BENCHMARK_F(BaseClass, Method) \
1417  BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1418  BENCHMARK_REGISTER_F(BaseClass, Method); \
1419  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1420 
1421 #define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) \
1422  BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1423  BENCHMARK_REGISTER_F(BaseClass, Method); \
1424  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1425 
1426 #define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b) \
1427  BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1428  BENCHMARK_REGISTER_F(BaseClass, Method); \
1429  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1430 
1431 #ifdef BENCHMARK_HAS_CXX11
1432 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...) \
1433  BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1434  BENCHMARK_REGISTER_F(BaseClass, Method); \
1435  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1436 #else
1437 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, a) \
1438  BENCHMARK_TEMPLATE1_F(BaseClass, Method, a)
1439 #endif
1440 
1441 // Helper macro to create a main routine in a test that runs the benchmarks
1442 #define BENCHMARK_MAIN() \
1443  int main(int argc, char** argv) { \
1444  ::benchmark::Initialize(&argc, argv); \
1445  if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \
1446  ::benchmark::RunSpecifiedBenchmarks(); \
1447  ::benchmark::Shutdown(); \
1448  return 0; \
1449  } \
1450  int main(int, char**)
1451 
1452 // ------------------------------------------------------
1453 // Benchmark Reporters
1454 
1455 namespace benchmark {
1456 
1457 struct CPUInfo {
1458  struct CacheInfo {
1459  std::string type;
1460  int level;
1461  int size;
1462  int num_sharing;
1463  };
1464 
1465  enum Scaling { UNKNOWN, ENABLED, DISABLED };
1466 
1467  int num_cpus;
1468  Scaling scaling;
1469  double cycles_per_second;
1470  std::vector<CacheInfo> caches;
1471  std::vector<double> load_avg;
1472 
1473  static const CPUInfo& Get();
1474 
1475  private:
1476  CPUInfo();
1477  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo);
1478 };
1479 
1480 // Adding Struct for System Information
1481 struct SystemInfo {
1482  std::string name;
1483  static const SystemInfo& Get();
1484 
1485  private:
1486  SystemInfo();
1487  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(SystemInfo);
1488 };
1489 
1490 // BenchmarkName contains the components of the Benchmark's name
1491 // which allows individual fields to be modified or cleared before
1492 // building the final name using 'str()'.
1494  std::string function_name;
1495  std::string args;
1496  std::string min_time;
1497  std::string iterations;
1498  std::string repetitions;
1499  std::string time_type;
1500  std::string threads;
1501 
1502  // Return the full name of the benchmark with each non-empty
1503  // field separated by a '/'
1504  std::string str() const;
1505 };
1506 
1507 // Interface for custom benchmark result printers.
1508 // By default, benchmark reports are printed to stdout. However an application
1509 // can control the destination of the reports by calling
1510 // RunSpecifiedBenchmarks and passing it a custom reporter object.
1511 // The reporter object must implement the following interface.
1513  public:
1514  struct Context {
1515  CPUInfo const& cpu_info;
1516  SystemInfo const& sys_info;
1517  // The number of chars in the longest benchmark name.
1518  size_t name_field_width;
1519  static const char* executable_name;
1520  Context();
1521  };
1522 
1523  struct Run {
1524  static const int64_t no_repetition_index = -1;
1525  enum RunType { RT_Iteration, RT_Aggregate };
1526 
1527  Run()
1528  : run_type(RT_Iteration),
1529  aggregate_unit(kTime),
1530  error_occurred(false),
1531  iterations(1),
1532  threads(1),
1533  time_unit(kNanosecond),
1534  real_accumulated_time(0),
1535  cpu_accumulated_time(0),
1536  max_heapbytes_used(0),
1537  complexity(oNone),
1538  complexity_lambda(),
1539  complexity_n(0),
1540  report_big_o(false),
1541  report_rms(false),
1542  memory_result(NULL),
1543  allocs_per_iter(0.0) {}
1544 
1545  std::string benchmark_name() const;
1546  BenchmarkName run_name;
1547  int64_t family_index;
1548  int64_t per_family_instance_index;
1549  RunType run_type;
1550  std::string aggregate_name;
1551  StatisticUnit aggregate_unit;
1552  std::string report_label; // Empty if not set by benchmark.
1553  bool error_occurred;
1554  std::string error_message;
1555 
1556  IterationCount iterations;
1557  int64_t threads;
1558  int64_t repetition_index;
1559  int64_t repetitions;
1560  TimeUnit time_unit;
1561  double real_accumulated_time;
1562  double cpu_accumulated_time;
1563 
1564  // Return a value representing the real time per iteration in the unit
1565  // specified by 'time_unit'.
1566  // NOTE: If 'iterations' is zero the returned value represents the
1567  // accumulated time.
1568  double GetAdjustedRealTime() const;
1569 
1570  // Return a value representing the cpu time per iteration in the unit
1571  // specified by 'time_unit'.
1572  // NOTE: If 'iterations' is zero the returned value represents the
1573  // accumulated time.
1574  double GetAdjustedCPUTime() const;
1575 
1576  // This is set to 0.0 if memory tracing is not enabled.
1577  double max_heapbytes_used;
1578 
1579  // Keep track of arguments to compute asymptotic complexity
1580  BigO complexity;
1581  BigOFunc* complexity_lambda;
1582  int64_t complexity_n;
1583 
1584  // what statistics to compute from the measurements
1585  const std::vector<internal::Statistics>* statistics;
1586 
1587  // Inform print function whether the current run is a complexity report
1588  bool report_big_o;
1589  bool report_rms;
1590 
1591  UserCounters counters;
1592 
1593  // Memory metrics.
1594  const MemoryManager::Result* memory_result;
1595  double allocs_per_iter;
1596  };
1597 
1599  PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {}
1600 
1601  // How many runs will all instances of this benchmark perform?
1602  int num_runs_total;
1603 
1604  // How many runs have happened already?
1605  int num_runs_done;
1606 
1607  // The reports about (non-errneous!) runs of this family.
1608  std::vector<BenchmarkReporter::Run> Runs;
1609  };
1610 
1611  // Construct a BenchmarkReporter with the output stream set to 'std::cout'
1612  // and the error stream set to 'std::cerr'
1614 
1615  // Called once for every suite of benchmarks run.
1616  // The parameter "context" contains information that the
1617  // reporter may wish to use when generating its report, for example the
1618  // platform under which the benchmarks are running. The benchmark run is
1619  // never started if this function returns false, allowing the reporter
1620  // to skip runs based on the context information.
1621  virtual bool ReportContext(const Context& context) = 0;
1622 
1623  // Called once for each group of benchmark runs, gives information about
1624  // cpu-time and heap memory usage during the benchmark run. If the group
1625  // of runs contained more than two entries then 'report' contains additional
1626  // elements representing the mean and standard deviation of those runs.
1627  // Additionally if this group of runs was the last in a family of benchmarks
1628  // 'reports' contains additional entries representing the asymptotic
1629  // complexity and RMS of that benchmark family.
1630  virtual void ReportRuns(const std::vector<Run>& report) = 0;
1631 
1632  // Called once and only once after ever group of benchmarks is run and
1633  // reported.
1634  virtual void Finalize() {}
1635 
1636  // REQUIRES: The object referenced by 'out' is valid for the lifetime
1637  // of the reporter.
1638  void SetOutputStream(std::ostream* out) {
1639  assert(out);
1640  output_stream_ = out;
1641  }
1642 
1643  // REQUIRES: The object referenced by 'err' is valid for the lifetime
1644  // of the reporter.
1645  void SetErrorStream(std::ostream* err) {
1646  assert(err);
1647  error_stream_ = err;
1648  }
1649 
1650  std::ostream& GetOutputStream() const { return *output_stream_; }
1651 
1652  std::ostream& GetErrorStream() const { return *error_stream_; }
1653 
1654  virtual ~BenchmarkReporter();
1655 
1656  // Write a human readable string to 'out' representing the specified
1657  // 'context'.
1658  // REQUIRES: 'out' is non-null.
1659  static void PrintBasicContext(std::ostream* out, Context const& context);
1660 
1661  private:
1662  std::ostream* output_stream_;
1663  std::ostream* error_stream_;
1664 };
1665 
1666 // Simple reporter that outputs benchmark data to the console. This is the
1667 // default reporter used by RunSpecifiedBenchmarks().
1669  public:
1670  enum OutputOptions {
1671  OO_None = 0,
1672  OO_Color = 1,
1673  OO_Tabular = 2,
1674  OO_ColorTabular = OO_Color | OO_Tabular,
1675  OO_Defaults = OO_ColorTabular
1676  };
1677  explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults)
1678  : output_options_(opts_), name_field_width_(0), printed_header_(false) {}
1679 
1680  virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
1681  virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
1682 
1683  protected:
1684  virtual void PrintRunData(const Run& report);
1685  virtual void PrintHeader(const Run& report);
1686 
1687  OutputOptions output_options_;
1688  size_t name_field_width_;
1689  UserCounters prev_counters_;
1690  bool printed_header_;
1691 };
1692 
1694  public:
1695  JSONReporter() : first_report_(true) {}
1696  virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
1697  virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
1698  virtual void Finalize() BENCHMARK_OVERRIDE;
1699 
1700  private:
1701  void PrintRunData(const Run& report);
1702 
1703  bool first_report_;
1704 };
1705 
1706 class BENCHMARK_DEPRECATED_MSG(
1707  "The CSV Reporter will be removed in a future release") CSVReporter
1708  : public BenchmarkReporter {
1709  public:
1710  CSVReporter() : printed_header_(false) {}
1711  virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
1712  virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
1713 
1714  private:
1715  void PrintRunData(const Run& report);
1716 
1717  bool printed_header_;
1718  std::set<std::string> user_counter_names_;
1719 };
1720 
1721 inline const char* GetTimeUnitString(TimeUnit unit) {
1722  switch (unit) {
1723  case kSecond:
1724  return "s";
1725  case kMillisecond:
1726  return "ms";
1727  case kMicrosecond:
1728  return "us";
1729  case kNanosecond:
1730  return "ns";
1731  }
1732  BENCHMARK_UNREACHABLE();
1733 }
1734 
1735 inline double GetTimeUnitMultiplier(TimeUnit unit) {
1736  switch (unit) {
1737  case kSecond:
1738  return 1;
1739  case kMillisecond:
1740  return 1e3;
1741  case kMicrosecond:
1742  return 1e6;
1743  case kNanosecond:
1744  return 1e9;
1745  }
1746  BENCHMARK_UNREACHABLE();
1747 }
1748 
1749 // Creates a list of integer values for the given range and multiplier.
1750 // This can be used together with ArgsProduct() to allow multiple ranges
1751 // with different multiplers.
1752 // Example:
1753 // ArgsProduct({
1754 // CreateRange(0, 1024, /*multi=*/32),
1755 // CreateRange(0, 100, /*multi=*/4),
1756 // CreateDenseRange(0, 4, /*step=*/1),
1757 // });
1758 std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi);
1759 
1760 // Creates a list of integer values for the given range and step.
1761 std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit, int step);
1762 
1763 } // namespace benchmark
1764 
1765 #endif // BENCHMARK_BENCHMARK_H_
Definition: benchmark.h:1512
Definition: benchmark.h:1668
Definition: benchmark.h:450
Definition: benchmark.h:1211
Definition: benchmark.h:1693
Definition: benchmark.h:328
Definition: benchmark.h:571
Definition: benchmark_register.cc:70
Definition: benchmark_api_internal.h:18
Definition: benchmark.h:902
Definition: benchmark.h:1149
Definition: perf_counters.h:125
Definition: thread_manager.h:12
Definition: thread_timer.h:10
Definition: benchmark.h:1493
Definition: benchmark.h:1514
Definition: benchmark.h:1523
Definition: benchmark.h:1458
Definition: benchmark.h:1457
Definition: benchmark.h:332
Definition: benchmark.h:845
Definition: benchmark.h:844
Definition: benchmark.h:1481
Definition: benchmark.h:532