Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/bench/base58.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,6 @@ static void Base58Decode(benchmark::State& state)
}


BENCHMARK(Base58Encode);
BENCHMARK(Base58CheckEncode);
BENCHMARK(Base58Decode);
BENCHMARK(Base58Encode, 470 * 1000);
BENCHMARK(Base58CheckEncode, 320 * 1000);
BENCHMARK(Base58Decode, 800 * 1000);
181 changes: 109 additions & 72 deletions src/bench/bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,102 +8,139 @@
#include <assert.h>
#include <iostream>
#include <iomanip>
#include <algorithm>
#include <regex>
#include <numeric>

benchmark::BenchRunner::BenchmarkMap &benchmark::BenchRunner::benchmarks() {
static std::map<std::string, benchmark::BenchFunction> benchmarks_map;
void benchmark::ConsolePrinter::header()
{
std::cout << "# Benchmark, evals, iterations, total, min, max, median" << std::endl;
}

void benchmark::ConsolePrinter::result(const State& state)
{
auto results = state.m_elapsed_results;
std::sort(results.begin(), results.end());

double total = state.m_num_iters * std::accumulate(results.begin(), results.end(), 0.0);

double front = 0;
double back = 0;
double median = 0;

if (!results.empty()) {
front = results.front();
back = results.back();

size_t mid = results.size() / 2;
median = results[mid];
if (0 == results.size() % 2) {
median = (results[mid] + results[mid + 1]) / 2;
}
}

std::cout << std::setprecision(6);
std::cout << state.m_name << ", " << state.m_num_evals << ", " << state.m_num_iters << ", " << total << ", " << front << ", " << back << ", " << median << std::endl;
}

void benchmark::ConsolePrinter::footer() {}
benchmark::PlotlyPrinter::PlotlyPrinter(std::string plotly_url, int64_t width, int64_t height)
: m_plotly_url(plotly_url), m_width(width), m_height(height)
{
}

void benchmark::PlotlyPrinter::header()
{
std::cout << "<html><head>"
<< "<script src=\"" << m_plotly_url << "\"></script>"
<< "</head><body><div id=\"myDiv\" style=\"width:" << m_width << "px; height:" << m_height << "px\"></div>"
<< "<script> var data = ["
<< std::endl;
}

void benchmark::PlotlyPrinter::result(const State& state)
{
std::cout << "{ " << std::endl
<< " name: '" << state.m_name << "', " << std::endl
<< " y: [";

const char* prefix = "";
for (const auto& e : state.m_elapsed_results) {
std::cout << prefix << std::setprecision(6) << e;
prefix = ", ";
}
std::cout << "]," << std::endl
<< " boxpoints: 'all', jitter: 0.3, pointpos: 0, type: 'box',"
<< std::endl
<< "}," << std::endl;
}

void benchmark::PlotlyPrinter::footer()
{
std::cout << "]; var layout = { showlegend: false, yaxis: { rangemode: 'tozero', autorange: true } };"
<< "Plotly.newPlot('myDiv', data, layout);"
<< "</script></body></html>";
}


benchmark::BenchRunner::BenchmarkMap& benchmark::BenchRunner::benchmarks()
{
static std::map<std::string, Bench> benchmarks_map;
return benchmarks_map;
}

benchmark::BenchRunner::BenchRunner(std::string name, benchmark::BenchFunction func)
benchmark::BenchRunner::BenchRunner(std::string name, benchmark::BenchFunction func, uint64_t num_iters_for_one_second)
{
benchmarks().insert(std::make_pair(name, func));
benchmarks().insert(std::make_pair(name, Bench{func, num_iters_for_one_second}));
}

void
benchmark::BenchRunner::RunAll(benchmark::duration elapsedTimeForOne)
void benchmark::BenchRunner::RunAll(Printer& printer, uint64_t num_evals, double scaling, const std::string& filter, bool is_list_only)
{
perf_init();
if (std::ratio_less_equal<benchmark::clock::period, std::micro>::value) {
if (!std::ratio_less_equal<benchmark::clock::period, std::micro>::value) {
std::cerr << "WARNING: Clock precision is worse than microsecond - benchmarks may be less accurate!\n";
}
#ifdef DEBUG
std::cerr << "WARNING: This is a debug build - may result in slower benchmarks.\n";
#endif

std::cout << "#Benchmark" << "," << "count" << "," << "min(ns)" << "," << "max(ns)" << "," << "average(ns)" << ","
<< "min_cycles" << "," << "max_cycles" << "," << "average_cycles" << "\n";
std::regex reFilter(filter);
std::smatch baseMatch;

for (const auto &p: benchmarks()) {
State state(p.first, elapsedTimeForOne);
p.second(state);
}
perf_fini();
}
printer.header();

bool benchmark::State::KeepRunning()
{
if (count & countMask) {
++count;
return true;
}
time_point now;
for (const auto& p : benchmarks()) {
if (!std::regex_match(p.first, baseMatch, reFilter)) {
continue;
}

uint64_t nowCycles;
if (count == 0) {
lastTime = beginTime = now = clock::now();
lastCycles = beginCycles = nowCycles = perf_cpucycles();
}
else {
now = clock::now();
auto elapsed = now - lastTime;
auto elapsedOne = elapsed / (countMask + 1);
if (elapsedOne < minTime) minTime = elapsedOne;
if (elapsedOne > maxTime) maxTime = elapsedOne;

// We only use relative values, so don't have to handle 64-bit wrap-around specially
nowCycles = perf_cpucycles();
uint64_t elapsedOneCycles = (nowCycles - lastCycles) / (countMask + 1);
if (elapsedOneCycles < minCycles) minCycles = elapsedOneCycles;
if (elapsedOneCycles > maxCycles) maxCycles = elapsedOneCycles;

if (elapsed*128 < maxElapsed) {
// If the execution was much too fast (1/128th of maxElapsed), increase the count mask by 8x and restart timing.
// The restart avoids including the overhead of this code in the measurement.
countMask = ((countMask<<3)|7) & ((1LL<<60)-1);
count = 0;
minTime = duration::max();
maxTime = duration::zero();
minCycles = std::numeric_limits<uint64_t>::max();
maxCycles = std::numeric_limits<uint64_t>::min();
return true;
uint64_t num_iters = static_cast<uint64_t>(p.second.num_iters_for_one_second * scaling);
if (0 == num_iters) {
num_iters = 1;
}
if (elapsed*16 < maxElapsed) {
uint64_t newCountMask = ((countMask<<1)|1) & ((1LL<<60)-1);
if ((count & newCountMask)==0) {
countMask = newCountMask;
}
State state(p.first, num_evals, num_iters, printer);
if (!is_list_only) {
p.second.func(state);
}
printer.result(state);
}
lastTime = now;
lastCycles = nowCycles;
++count;

if (now - beginTime < maxElapsed) return true; // Keep going
printer.footer();

--count;
perf_fini();
}

assert(count != 0 && "count == 0 => (now == 0 && beginTime == 0) => return above");
bool benchmark::State::UpdateTimer(const benchmark::time_point current_time)
{
if (m_start_time != time_point()) {
std::chrono::duration<double> diff = current_time - m_start_time;
m_elapsed_results.push_back(diff.count() / m_num_iters);

// Output results
// Duration casts are only necessary here because hardware with sub-nanosecond clocks
// will lose precision.
int64_t min_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(minTime).count();
int64_t max_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(maxTime).count();
int64_t avg_elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>((now-beginTime)/count).count();
int64_t averageCycles = (nowCycles-beginCycles)/count;
std::cout << std::fixed << std::setprecision(15) << name << "," << count << "," << min_elapsed << "," << max_elapsed << "," << avg_elapsed << ","
<< minCycles << "," << maxCycles << "," << averageCycles << "\n";
std::cout.copyfmt(std::ios(nullptr));
if (m_elapsed_results.size() == m_num_evals) {
return false;
}
}

return false;
m_num_iters_left = m_num_iters - 1;
return true;
}
143 changes: 95 additions & 48 deletions src/bench/bench.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <limits>
#include <map>
#include <string>
#include <vector>
#include <chrono>

#include <boost/preprocessor/cat.hpp>
Expand All @@ -32,64 +33,110 @@ static void CODE_TO_TIME(benchmark::State& state)
... do any cleanup needed...
}

BENCHMARK(CODE_TO_TIME);
// default to running benchmark for 5000 iterations
BENCHMARK(CODE_TO_TIME, 5000);

*/

namespace benchmark {
// In case high_resolution_clock is steady, prefer that, otherwise use steady_clock.
struct best_clock {
using hi_res_clock = std::chrono::high_resolution_clock;
using steady_clock = std::chrono::steady_clock;
using type = std::conditional<hi_res_clock::is_steady, hi_res_clock, steady_clock>::type;
};
using clock = best_clock::type;
using time_point = clock::time_point;
using duration = clock::duration;

class State {
std::string name;
duration maxElapsed;
time_point beginTime, lastTime;
duration minTime, maxTime;
uint64_t count;
uint64_t countMask;
uint64_t beginCycles;
uint64_t lastCycles;
uint64_t minCycles;
uint64_t maxCycles;
public:
State(std::string _name, duration _maxElapsed) :
name(_name),
maxElapsed(_maxElapsed),
minTime(duration::max()),
maxTime(duration::zero()),
count(0),
countMask(1),
beginCycles(0),
lastCycles(0),
minCycles(std::numeric_limits<uint64_t>::max()),
maxCycles(std::numeric_limits<uint64_t>::min()) {
}
bool KeepRunning();
};
// In case high_resolution_clock is steady, prefer that, otherwise use steady_clock.
struct best_clock {
using hi_res_clock = std::chrono::high_resolution_clock;
using steady_clock = std::chrono::steady_clock;
using type = std::conditional<hi_res_clock::is_steady, hi_res_clock, steady_clock>::type;
};
using clock = best_clock::type;
using time_point = clock::time_point;
using duration = clock::duration;

class Printer;

class State
{
public:
std::string m_name;
uint64_t m_num_iters_left;
const uint64_t m_num_iters;
const uint64_t m_num_evals;
std::vector<double> m_elapsed_results;
time_point m_start_time;

bool UpdateTimer(time_point finish_time);

typedef std::function<void(State&)> BenchFunction;
State(std::string name, uint64_t num_evals, double num_iters, Printer& printer) : m_name(name), m_num_iters_left(0), m_num_iters(num_iters), m_num_evals(num_evals)
{
}

class BenchRunner
inline bool KeepRunning()
{
typedef std::map<std::string, BenchFunction> BenchmarkMap;
static BenchmarkMap &benchmarks();
if (m_num_iters_left--) {
return true;
}

bool result = UpdateTimer(clock::now());
// measure again so runtime of UpdateTimer is not included
m_start_time = clock::now();
return result;
}
};

public:
BenchRunner(std::string name, BenchFunction func);
typedef std::function<void(State&)> BenchFunction;

static void RunAll(duration elapsedTimeForOne = std::chrono::seconds(1));
class BenchRunner
{
struct Bench {
BenchFunction func;
uint64_t num_iters_for_one_second;
};
typedef std::map<std::string, Bench> BenchmarkMap;
static BenchmarkMap& benchmarks();

public:
BenchRunner(std::string name, BenchFunction func, uint64_t num_iters_for_one_second);

static void RunAll(Printer& printer, uint64_t num_evals, double scaling, const std::string& filter, bool is_list_only);
};

// interface to output benchmark results.
class Printer
{
public:
virtual ~Printer() {}
virtual void header() = 0;
virtual void result(const State& state) = 0;
virtual void footer() = 0;
};

// default printer to console, shows min, max, median.
class ConsolePrinter : public Printer
{
public:
void header();
void result(const State& state);
void footer();
};

// creates box plot with plotly.js
class PlotlyPrinter : public Printer
{
public:
PlotlyPrinter(std::string plotly_url, int64_t width, int64_t height);
void header();
void result(const State& state);
void footer();

private:
std::string m_plotly_url;
int64_t m_width;
int64_t m_height;
};
}

// BENCHMARK(foo) expands to: benchmark::BenchRunner bench_11foo("foo", foo);
#define BENCHMARK(n) \
benchmark::BenchRunner BOOST_PP_CAT(bench_, BOOST_PP_CAT(__LINE__, n))(BOOST_PP_STRINGIZE(n), n);

// BENCHMARK(foo, num_iters_for_one_second) expands to: benchmark::BenchRunner bench_11foo("foo", num_iterations);
// Choose a num_iters_for_one_second that takes roughly 1 second. The goal is that all benchmarks should take approximately
// the same time, and scaling factor can be used that the total time is appropriate for your system.
#define BENCHMARK(n, num_iters_for_one_second) \
benchmark::BenchRunner BOOST_PP_CAT(bench_, BOOST_PP_CAT(__LINE__, n))(BOOST_PP_STRINGIZE(n), n, (num_iters_for_one_second));

#endif // BITCOIN_BENCH_BENCH_H
Loading