path: root/storage/tokudb/PerconaFT/tools/
diff options
Diffstat (limited to 'storage/tokudb/PerconaFT/tools/')
1 files changed, 0 insertions, 629 deletions
diff --git a/storage/tokudb/PerconaFT/tools/ b/storage/tokudb/PerconaFT/tools/
deleted file mode 100644
index cade7e5dfaf..00000000000
--- a/storage/tokudb/PerconaFT/tools/
+++ /dev/null
@@ -1,629 +0,0 @@
-/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
-// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
-#ident "$Id$"
-This file is part of PerconaFT.
-Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
- PerconaFT is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License, version 2,
- as published by the Free Software Foundation.
- PerconaFT is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with PerconaFT. If not, see <>.
- PerconaFT is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License, version 3,
- as published by the Free Software Foundation.
- PerconaFT is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- GNU Affero General Public License for more details.
- You should have received a copy of the GNU Affero General Public License
- along with PerconaFT. If not, see <>.
-======= */
-#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
-// Replay a block allocator trace against different strategies and compare
-// the results
-#include <db.h>
-#include <getopt.h>
-#include <math.h>
-#include <stdio.h>
-#include <string.h>
-#include <map>
-#include <set>
-#include <string>
-#include <sstream>
-#include <vector>
-#include <portability/memory.h>
-#include <portability/toku_assert.h>
-#include <portability/toku_stdlib.h>
-#include "ft/serialize/block_allocator.h"
-using std::map;
-using std::set;
-using std::string;
-using std::vector;
-static int verbose = false;
-static void ba_replay_assert(bool pred, const char *msg, const char *line, int line_num) {
- if (!pred) {
- fprintf(stderr, "%s, line (#%d): %s\n", msg, line_num, line);
- abort();
- }
-static char *trim_whitespace(char *line) {
- // skip leading whitespace
- while (isspace(*line)) {
- line++;
- }
- return line;
-static int64_t parse_number(char **ptr, int line_num, int base) {
- *ptr = trim_whitespace(*ptr);
- char *line = *ptr;
- char *new_ptr;
- int64_t n = strtoll(line, &new_ptr, base);
- ba_replay_assert(n >= 0, "malformed trace (bad numeric token)", line, line_num);
- ba_replay_assert(new_ptr > *ptr, "malformed trace (missing numeric token)", line, line_num);
- *ptr = new_ptr;
- return n;
-static uint64_t parse_uint64(char **ptr, int line_num) {
- int64_t n = parse_number(ptr, line_num, 10);
- // we happen to know that the uint64's we deal with will
- // take less than 63 bits (they come from pointers)
- return static_cast<uint64_t>(n);
-static string parse_token(char **ptr, int line_num) {
- *ptr = trim_whitespace(*ptr);
- char *line = *ptr;
- // parse the first token, which represents the traced function
- char token[64];
- int r = sscanf(*ptr, "%64s", token);
- ba_replay_assert(r == 1, "malformed trace (missing string token)", line, line_num);
- *ptr += strlen(token);
- return string(token);
-static block_allocator::blockpair parse_blockpair(char **ptr, int line_num) {
- *ptr = trim_whitespace(*ptr);
- char *line = *ptr;
- uint64_t offset, size;
- int bytes_read;
- int r = sscanf(line, "[%" PRIu64 " %" PRIu64 "]%n", &offset, &size, &bytes_read);
- ba_replay_assert(r == 2, "malformed trace (bad offset/size pair)", line, line_num);
- *ptr += bytes_read;
- return block_allocator::blockpair(offset, size);
-static char *strip_newline(char *line, bool *found) {
- char *ptr = strchr(line, '\n');
- if (ptr != nullptr) {
- if (found != nullptr) {
- *found = true;
- }
- *ptr = '\0';
- }
- return line;
-static char *read_trace_line(FILE *file) {
- const int buf_size = 4096;
- char buf[buf_size];
- std::stringstream ss;
- while (true) {
- if (fgets(buf, buf_size, file) == nullptr) {
- break;
- }
- bool has_newline = false;
- ss << strip_newline(buf, &has_newline);
- if (has_newline) {
- // end of the line, we're done out
- break;
- }
- }
- std::string s = ss.str();
- return s.size() ? toku_strdup(s.c_str()) : nullptr;
-static vector<string> canonicalize_trace_from(FILE *file) {
- // new trace, canonicalized from a raw trace
- vector<string> canonicalized_trace;
- // raw allocator id -> canonical allocator id
- //
- // keeps track of allocators that were created as part of the trace,
- // and therefore will be part of the canonicalized trace.
- uint64_t allocator_id_seq_num = 0;
- map<uint64_t, uint64_t> allocator_ids;
- // allocated offset -> allocation seq num
- //
- uint64_t allocation_seq_num = 0;
- static const uint64_t ASN_NONE = (uint64_t) -1;
- typedef map<uint64_t, uint64_t> offset_seq_map;
- // raw allocator id -> offset_seq_map that tracks its allocations
- map<uint64_t, offset_seq_map> offset_to_seq_num_maps;
- int line_num = 0;
- char *line;
- while ((line = read_trace_line(file)) != nullptr) {
- line_num++;
- char *ptr = line;
- string fn = parse_token(&ptr, line_num);
- int64_t allocator_id = parse_number(&ptr, line_num, 16);
- std::stringstream ss;
- if (fn.find("ba_trace_create") != string::npos) {
- ba_replay_assert(allocator_ids.count(allocator_id) == 0, "corrupted trace: double create", line, line_num);
- ba_replay_assert(fn == "ba_trace_create" || fn == "ba_trace_create_from_blockpairs",
- "corrupted trace: bad fn", line, line_num);
- // we only convert the allocator_id to an allocator_id_seq_num
- // in the canonical trace and leave the rest of the line as-is.
- allocator_ids[allocator_id] = allocator_id_seq_num;
- ss << fn << ' ' << allocator_id_seq_num << ' ' << trim_whitespace(ptr) << std::endl;
- allocator_id_seq_num++;
- // First, read passed the reserve / alignment values.
- (void) parse_uint64(&ptr, line_num);
- (void) parse_uint64(&ptr, line_num);
- if (fn == "ba_trace_create_from_blockpairs") {
- // For each blockpair created by this traceline, add its offset to the offset seq map
- // with asn ASN_NONE so that later canonicalizations of `free' know whether to write
- // down the asn or the raw offset.
- offset_seq_map *map = &offset_to_seq_num_maps[allocator_id];
- while (*trim_whitespace(ptr) != '\0') {
- const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num);
- (*map)[bp.offset] = ASN_NONE;
- }
- }
- } else {
- ba_replay_assert(allocator_ids.count(allocator_id) > 0, "corrupted trace: unknown allocator", line, line_num);
- uint64_t canonical_allocator_id = allocator_ids[allocator_id];
- // this is the map that tracks allocations for this allocator
- offset_seq_map *map = &offset_to_seq_num_maps[allocator_id];
- if (fn == "ba_trace_alloc") {
- const uint64_t size = parse_uint64(&ptr, line_num);
- const uint64_t heat = parse_uint64(&ptr, line_num);
- const uint64_t offset = parse_uint64(&ptr, line_num);
- ba_replay_assert(map->count(offset) == 0, "corrupted trace: double alloc", line, line_num);
- // remember that an allocation at `offset' has the current alloc seq num
- (*map)[offset] = allocation_seq_num;
- // translate `offset = alloc(size)' to `asn = alloc(size)'
- ss << fn << ' ' << canonical_allocator_id << ' ' << size << ' ' << heat << ' ' << allocation_seq_num << std::endl;
- allocation_seq_num++;
- } else if (fn == "ba_trace_free") {
- const uint64_t offset = parse_uint64(&ptr, line_num);
- ba_replay_assert(map->count(offset) != 0, "corrupted trace: invalid free", line, line_num);
- // get the alloc seq num for an allcation that occurred at `offset'
- const uint64_t asn = (*map)[offset];
- map->erase(offset);
- // if there's an asn, then a corresponding ba_trace_alloc occurred and we should
- // write `free(asn)'. otherwise, the blockpair was initialized from create_from_blockpairs
- // and we write the original offset.
- if (asn != ASN_NONE) {
- ss << "ba_trace_free_asn" << ' ' << canonical_allocator_id << ' ' << asn << std::endl;
- } else {
- ss << "ba_trace_free_offset" << ' ' << canonical_allocator_id << ' ' << offset << std::endl;
- }
- } else if (fn == "ba_trace_destroy") {
- // Remove this allocator from both maps
- allocator_ids.erase(allocator_id);
- offset_to_seq_num_maps.erase(allocator_id);
- // translate `destroy(ptr_id) to destroy(canonical_id)'
- ss << fn << ' ' << canonical_allocator_id << ' ' << std::endl;
- } else {
- ba_replay_assert(false, "corrupted trace: bad fn", line, line_num);
- }
- }
- canonicalized_trace.push_back(ss.str());
- toku_free(line);
- }
- if (allocator_ids.size() != 0) {
- fprintf(stderr, "warning: leaked allocators. this might be ok if the tracing process is still running");
- }
- return canonicalized_trace;
-struct streaming_variance_calculator {
- int64_t n_samples;
- int64_t mean;
- int64_t variance;
- // math credit: AoCP, Donald Knuth, '62
- void add_sample(int64_t x) {
- n_samples++;
- if (n_samples == 1) {
- mean = x;
- variance = 0;
- } else {
- int64_t old_mean = mean;
- mean = old_mean + ((x - old_mean) / n_samples);
- variance = (((n_samples - 1) * variance) +
- ((x - old_mean) * (x - mean))) / n_samples;
- }
- }
-struct canonical_trace_stats {
- uint64_t n_lines_replayed;
- uint64_t n_create;
- uint64_t n_create_from_blockpairs;
- uint64_t n_alloc_hot;
- uint64_t n_alloc_cold;
- uint64_t n_free;
- uint64_t n_destroy;
- struct streaming_variance_calculator alloc_hot_bytes;
- struct streaming_variance_calculator alloc_cold_bytes;
- canonical_trace_stats() {
- memset(this, 0, sizeof(*this));
- }
-struct fragmentation_report {
- fragmentation_report() {
- memset(this, 0, sizeof(*this));
- }
- void merge(const struct fragmentation_report &src_report) {
- for (int i = 0; i < 2; i++) {
- TOKU_DB_FRAGMENTATION_S *dst = i == 0 ? &beginning : &end;
- const TOKU_DB_FRAGMENTATION_S *src = i == 0 ? &src_report.beginning : &src_report.end;
- dst->file_size_bytes += src->file_size_bytes;
- dst->data_bytes += src->data_bytes;
- dst->data_blocks += src->data_blocks;
- dst->checkpoint_bytes_additional += src->checkpoint_bytes_additional;
- dst->checkpoint_blocks_additional += src->checkpoint_blocks_additional;
- dst->unused_bytes += src->unused_bytes;
- dst->unused_blocks += src->unused_blocks;
- dst->largest_unused_block += src->largest_unused_block;
- }
- }
-static void replay_canonicalized_trace(const vector<string> &canonicalized_trace,
- block_allocator::allocation_strategy strategy,
- map<uint64_t, struct fragmentation_report> *reports,
- struct canonical_trace_stats *stats) {
- // maps an allocator id to its block allocator
- map<uint64_t, block_allocator *> allocator_map;
- // maps allocation seq num to allocated offset
- map<uint64_t, uint64_t> seq_num_to_offset;
- for (vector<string>::const_iterator it = canonicalized_trace.begin();
- it != canonicalized_trace.end(); it++) {
- const int line_num = stats->n_lines_replayed++;
- char *line = toku_strdup(it->c_str());
- line = strip_newline(line, nullptr);
- char *ptr = trim_whitespace(line);
- // canonical allocator id is in base 10, not 16
- string fn = parse_token(&ptr, line_num);
- int64_t allocator_id = parse_number(&ptr, line_num, 10);
- if (fn.find("ba_trace_create") != string::npos) {
- const uint64_t reserve_at_beginning = parse_uint64(&ptr, line_num);
- const uint64_t alignment = parse_uint64(&ptr, line_num);
- ba_replay_assert(allocator_map.count(allocator_id) == 0,
- "corrupted canonical trace: double create", line, line_num);
- block_allocator *ba = new block_allocator();
- if (fn == "ba_trace_create") {
- ba->create(reserve_at_beginning, alignment);
- stats->n_create++;
- } else {
- ba_replay_assert(fn == "ba_trace_create_from_blockpairs",
- "corrupted canonical trace: bad create fn", line, line_num);
- vector<block_allocator::blockpair> pairs;
- while (*trim_whitespace(ptr) != '\0') {
- const block_allocator::blockpair bp = parse_blockpair(&ptr, line_num);
- pairs.push_back(bp);
- }
- ba->create_from_blockpairs(reserve_at_beginning, alignment, &pairs[0], pairs.size());
- stats->n_create_from_blockpairs++;
- }
- ba->set_strategy(strategy);
- ba->get_statistics(&report);
- (*reports)[allocator_id].beginning = report;
- allocator_map[allocator_id] = ba;
- } else {
- ba_replay_assert(allocator_map.count(allocator_id) > 0,
- "corrupted canonical trace: no such allocator", line, line_num);
- block_allocator *ba = allocator_map[allocator_id];
- if (fn == "ba_trace_alloc") {
- // replay an `alloc' whose result will be associated with a certain asn
- const uint64_t size = parse_uint64(&ptr, line_num);
- const uint64_t heat = parse_uint64(&ptr, line_num);
- const uint64_t asn = parse_uint64(&ptr, line_num);
- ba_replay_assert(seq_num_to_offset.count(asn) == 0,
- "corrupted canonical trace: double alloc (asn in use)", line, line_num);
- uint64_t offset;
- ba->alloc_block(size, heat, &offset);
- seq_num_to_offset[asn] = offset;
- heat ? stats->n_alloc_hot++ : stats->n_alloc_cold++;
- heat ? stats->alloc_hot_bytes.add_sample(size) : stats->alloc_cold_bytes.add_sample(size);
- } else if (fn == "ba_trace_free_asn") {
- // replay a `free' on a block whose offset is the result of an alloc with an asn
- const uint64_t asn = parse_uint64(&ptr, line_num);
- ba_replay_assert(seq_num_to_offset.count(asn) == 1,
- "corrupted canonical trace: double free (asn unused)", line, line_num);
- const uint64_t offset = seq_num_to_offset[asn];
- ba->free_block(offset);
- seq_num_to_offset.erase(asn);
- stats->n_free++;
- } else if (fn == "ba_trace_free_offset") {
- // replay a `free' on a block whose offset was explicitly set during a create_from_blockpairs
- const uint64_t offset = parse_uint64(&ptr, line_num);
- ba->free_block(offset);
- stats->n_free++;
- } else if (fn == "ba_trace_destroy") {
- ba->get_statistics(&report);
- ba->destroy();
- (*reports)[allocator_id].end = report;
- allocator_map.erase(allocator_id);
- stats->n_destroy++;
- } else {
- ba_replay_assert(false, "corrupted canonical trace: bad fn", line, line_num);
- }
- }
- toku_free(line);
- }
-static const char *strategy_to_cstring(block_allocator::allocation_strategy strategy) {
- switch (strategy) {
- case block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT:
- return "first-fit";
- case block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT:
- return "best-fit";
- case block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE:
- return "heat-zone";
- case block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT:
- return "padded-fit";
- default:
- abort();
- }
-static block_allocator::allocation_strategy cstring_to_strategy(const char *str) {
- if (strcmp(str, "first-fit") == 0) {
- return block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT;
- }
- if (strcmp(str, "best-fit") == 0) {
- return block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT;
- }
- if (strcmp(str, "heat-zone") == 0) {
- return block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE;
- }
- if (strcmp(str, "padded-fit") != 0) {
- fprintf(stderr, "bad strategy string: %s\n", str);
- abort();
- }
- return block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT;
-static void print_result_verbose(uint64_t allocator_id,
- block_allocator::allocation_strategy strategy,
- const struct fragmentation_report &report) {
- if (report.end.data_bytes + report.end.unused_bytes +
- report.beginning.data_bytes + report.beginning.unused_bytes
- < 32UL * 1024 * 1024) {
- printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
- return;
- }
- printf(" allocator_id: %20" PRId64 "\n", allocator_id);
- printf(" strategy: %20s\n", strategy_to_cstring(strategy));
- for (int i = 0; i < 2; i++) {
- const TOKU_DB_FRAGMENTATION_S *r = i == 0 ? &report.beginning : &report.end;
- printf("%s\n", i == 0 ? "BEFORE" : "AFTER");
- uint64_t total_bytes = r->data_bytes + r->unused_bytes;
- uint64_t total_blocks = r->data_blocks + r->unused_blocks;
- // byte statistics
- printf(" total bytes: %20" PRId64 "\n", total_bytes);
- printf(" used bytes: %20" PRId64 " (%.3lf)\n", r->data_bytes,
- static_cast<double>(r->data_bytes) / total_bytes);
- printf(" unused bytes: %20" PRId64 " (%.3lf)\n", r->unused_bytes,
- static_cast<double>(r->unused_bytes) / total_bytes);
- // block statistics
- printf(" total blocks: %20" PRId64 "\n", total_blocks);
- printf(" used blocks: %20" PRId64 " (%.3lf)\n", r->data_blocks,
- static_cast<double>(r->data_blocks) / total_blocks);
- printf(" unused blocks: %20" PRId64 " (%.3lf)\n", r->unused_blocks,
- static_cast<double>(r->unused_blocks) / total_blocks);
- // misc
- printf(" largest unused: %20" PRId64 "\n", r->largest_unused_block);
- }
-static void print_result(uint64_t allocator_id,
- block_allocator::allocation_strategy strategy,
- const struct fragmentation_report &report) {
- const TOKU_DB_FRAGMENTATION_S *beginning = &report.beginning;
- const TOKU_DB_FRAGMENTATION_S *end = &report.end;
- uint64_t total_beginning_bytes = beginning->data_bytes + beginning->unused_bytes;
- uint64_t total_end_bytes = end->data_bytes + end->unused_bytes;
- if (total_end_bytes + total_beginning_bytes < 32UL * 1024 * 1024) {
- if (verbose) {
- printf("\n");
- printf(" ...skipping allocator_id %" PRId64 " (total bytes < 32mb)\n", allocator_id);
- }
- return;
- }
- printf("\n");
- if (verbose) {
- print_result_verbose(allocator_id, strategy, report);
- } else {
- printf(" %-15s: allocator %" PRId64 ", %.3lf used bytes (%.3lf before)\n",
- strategy_to_cstring(strategy), allocator_id,
- static_cast<double>(report.end.data_bytes) / total_end_bytes,
- static_cast<double>(report.beginning.data_bytes) / total_beginning_bytes);
- }
-static int only_aggregate_reports;
-static struct option getopt_options[] = {
- { "verbose", no_argument, &verbose, 1 },
- { "only-aggregate-reports", no_argument, &only_aggregate_reports, 1 },
- { "include-strategy", required_argument, nullptr, 'i' },
- { "exclude-strategy", required_argument, nullptr, 'x' },
- { nullptr, 0, nullptr, 0 },
-int main(int argc, char *argv[]) {
- int opt;
- set<block_allocator::allocation_strategy> candidate_strategies, excluded_strategies;
- while ((opt = getopt_long(argc, argv, "", getopt_options, nullptr)) != -1) {
- switch (opt) {
- case 0:
- break;
- case 'i':
- candidate_strategies.insert(cstring_to_strategy(optarg));
- break;
- case 'x':
- excluded_strategies.insert(cstring_to_strategy(optarg));
- break;
- case '?':
- default:
- abort();
- };
- }
- // Default to everything if nothing was explicitly included.
- if (candidate_strategies.empty()) {
- candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT);
- candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT);
- candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_PADDED_FIT);
- candidate_strategies.insert(block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE);
- }
- // ..but remove anything that was explicitly excluded
- for (set<block_allocator::allocation_strategy>::const_iterator it = excluded_strategies.begin();
- it != excluded_strategies.end(); it++) {
- candidate_strategies.erase(*it);
- }
- // Run the real trace
- //
- // First, read the raw trace from stdin
- vector<string> canonicalized_trace = canonicalize_trace_from(stdin);
- if (!only_aggregate_reports) {
- printf("\n");
- printf("Individual reports, by allocator:\n");
- }
- struct canonical_trace_stats stats;
- map<block_allocator::allocation_strategy, struct fragmentation_report> reports_by_strategy;
- for (set<block_allocator::allocation_strategy>::const_iterator it = candidate_strategies.begin();
- it != candidate_strategies.end(); it++) {
- const block_allocator::allocation_strategy strategy(*it);
- // replay the canonicalized trace against the current strategy.
- //
- // we provided the allocator map so we can gather statistics later
- struct canonical_trace_stats dummy_stats;
- map<uint64_t, struct fragmentation_report> reports;
- replay_canonicalized_trace(canonicalized_trace, strategy, &reports,
- // Only need to gather canonical trace stats once
- it == candidate_strategies.begin() ? &stats : &dummy_stats);
- struct fragmentation_report aggregate_report;
- memset(&aggregate_report, 0, sizeof(aggregate_report));
- for (map<uint64_t, struct fragmentation_report>::iterator rp = reports.begin();
- rp != reports.end(); rp++) {
- const struct fragmentation_report &report = rp->second;
- aggregate_report.merge(report);
- if (!only_aggregate_reports) {
- print_result(rp->first, strategy, report);
- }
- }
- reports_by_strategy[strategy] = aggregate_report;
- }
- printf("\n");
- printf("Aggregate reports, by strategy:\n");
- for (map<block_allocator::allocation_strategy, struct fragmentation_report>::iterator it = reports_by_strategy.begin();
- it != reports_by_strategy.end(); it++) {
- print_result(0, it->first, it->second);
- }
- printf("\n");
- printf("Overall trace stats:\n");
- printf("\n");
- printf(" n_lines_played: %15" PRIu64 "\n", stats.n_lines_replayed);
- printf(" n_create: %15" PRIu64 "\n", stats.n_create);
- printf(" n_create_from_blockpairs: %15" PRIu64 "\n", stats.n_create_from_blockpairs);
- printf(" n_alloc_hot: %15" PRIu64 "\n", stats.n_alloc_hot);
- printf(" n_alloc_cold: %15" PRIu64 "\n", stats.n_alloc_cold);
- printf(" n_free: %15" PRIu64 "\n", stats.n_free);
- printf(" n_destroy: %15" PRIu64 "\n", stats.n_destroy);
- printf("\n");
- printf(" avg_alloc_hot: %15" PRIu64 "\n", stats.alloc_hot_bytes.mean);
- printf(" stddev_alloc_hot: %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_hot_bytes.variance));
- printf(" avg_alloc_cold: %15" PRIu64 "\n", stats.alloc_cold_bytes.mean);
- printf(" stddev_alloc_cold: %15" PRIu64 "\n", (uint64_t) sqrt(stats.alloc_cold_bytes.variance));
- printf("\n");
- return 0;