//===-- Benchmark memory specific tools -------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // This file complements the `benchmark` header with memory specific tools and // benchmarking facilities. #ifndef LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H #define LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H #include "LibcBenchmark.h" #include "LibcFunctionPrototypes.h" #include "MemorySizeDistributions.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Alignment.h" #include #include #include namespace llvm { namespace libc_benchmarks { //-------------- // Configuration //-------------- struct StudyConfiguration { // One of 'memcpy', 'memset', 'memcmp'. // The underlying implementation is always the llvm libc one. // e.g. 'memcpy' will test '__llvm_libc::memcpy' std::string Function; // The number of trials to run for this benchmark. // If in SweepMode, each individual sizes are measured 'NumTrials' time. // i.e 'NumTrials' measurements for 0, 'NumTrials' measurements for 1 ... uint32_t NumTrials = 1; // Toggles between Sweep Mode and Distribution Mode (default). // See 'SweepModeMaxSize' and 'SizeDistributionName' below. bool IsSweepMode = false; // Maximum size to use when measuring a ramp of size values (SweepMode). // The benchmark measures all sizes from 0 to SweepModeMaxSize. // Note: in sweep mode the same size is sampled several times in a row this // will allow the processor to learn it and optimize the branching pattern. // The resulting measurement is likely to be idealized. uint32_t SweepModeMaxSize = 0; // inclusive // The name of the distribution to be used to randomize the size parameter. // This is used when SweepMode is false (default). std::string SizeDistributionName; // This parameter allows to control how the buffers are accessed during // benchmark: // None : Use a fixed address that is at least cache line aligned, // 1 : Use random address, // >1 : Use random address aligned to value. MaybeAlign AccessAlignment = std::nullopt; // When Function == 'memcmp', this is the buffers mismatch position. // 0 : Buffers always compare equal, // >0 : Buffers compare different at byte N-1. uint32_t MemcmpMismatchAt = 0; }; struct Runtime { // Details about the Host (cpu name, cpu frequency, cache hierarchy). HostState Host; // The framework will populate this value so all data accessed during the // benchmark will stay in L1 data cache. This includes bookkeeping data. uint32_t BufferSize = 0; // This is the number of distinct parameters used in a single batch. // The framework always tests a batch of randomized parameter to prevent the // cpu from learning branching patterns. uint32_t BatchParameterCount = 0; // The benchmark options that were used to perform the measurement. // This is decided by the framework. BenchmarkOptions BenchmarkOptions; }; //-------- // Results //-------- // The root object containing all the data (configuration and measurements). struct Study { std::string StudyName; Runtime Runtime; StudyConfiguration Configuration; std::vector Measurements; }; //------ // Utils //------ // Provides an aligned, dynamically allocated buffer. class AlignedBuffer { char *const Buffer = nullptr; size_t Size = 0; public: // Note: msan / asan can't handle Alignment > 512. static constexpr size_t Alignment = 512; explicit AlignedBuffer(size_t Size) : Buffer(static_cast(aligned_alloc(Alignment, Size))), Size(Size) {} ~AlignedBuffer() { free(Buffer); } inline char *operator+(size_t Index) { return Buffer + Index; } inline const char *operator+(size_t Index) const { return Buffer + Index; } inline char &operator[](size_t Index) { return Buffer[Index]; } inline const char &operator[](size_t Index) const { return Buffer[Index]; } inline char *begin() { return Buffer; } inline char *end() { return Buffer + Size; } }; // Helper to generate random buffer offsets that satisfy the configuration // constraints. class OffsetDistribution { std::uniform_int_distribution Distribution; uint32_t Factor; public: explicit OffsetDistribution(size_t BufferSize, size_t MaxSizeValue, MaybeAlign AccessAlignment); template uint32_t operator()(Generator &G) { return Distribution(G) * Factor; } }; // Helper to generate random buffer offsets that satisfy the configuration // constraints. It is specifically designed to benchmark `memcmp` functions // where we may want the Nth byte to differ. class MismatchOffsetDistribution { std::uniform_int_distribution MismatchIndexSelector; llvm::SmallVector MismatchIndices; const uint32_t MismatchAt; public: explicit MismatchOffsetDistribution(size_t BufferSize, size_t MaxSizeValue, size_t MismatchAt); explicit operator bool() const { return !MismatchIndices.empty(); } const llvm::SmallVectorImpl &getMismatchIndices() const { return MismatchIndices; } template uint32_t operator()(Generator &G, uint32_t Size) { const uint32_t MismatchIndex = MismatchIndices[MismatchIndexSelector(G)]; // We need to position the offset so that a mismatch occurs at MismatchAt. if (Size >= MismatchAt) return MismatchIndex - MismatchAt; // Size is too small to trigger the mismatch. return MismatchIndex - Size - 1; } }; /// This structure holds a vector of ParameterType. /// It makes sure that BufferCount x BufferSize Bytes and the vector of /// ParameterType can all fit in the L1 cache. struct ParameterBatch { struct ParameterType { unsigned OffsetBytes : 16; // max : 16 KiB - 1 unsigned SizeBytes : 16; // max : 16 KiB - 1 }; ParameterBatch(size_t BufferCount); /// Verifies that memory accessed through this parameter is valid. void checkValid(const ParameterType &) const; /// Computes the number of bytes processed during within this batch. size_t getBatchBytes() const; const size_t BufferSize; const size_t BatchSize; std::vector Parameters; }; /// Provides source and destination buffers for the Copy operation as well as /// the associated size distributions. struct CopySetup : public ParameterBatch { CopySetup(); inline static const ArrayRef getDistributions() { return getMemcpySizeDistributions(); } inline void *Call(ParameterType Parameter, MemcpyFunction Memcpy) { return Memcpy(DstBuffer + Parameter.OffsetBytes, SrcBuffer + Parameter.OffsetBytes, Parameter.SizeBytes); } private: AlignedBuffer SrcBuffer; AlignedBuffer DstBuffer; }; /// Provides source and destination buffers for the Move operation as well as /// the associated size distributions. struct MoveSetup : public ParameterBatch { MoveSetup(); inline static const ArrayRef getDistributions() { return getMemmoveSizeDistributions(); } inline void *Call(ParameterType Parameter, MemmoveFunction Memmove) { return Memmove(Buffer + ParameterBatch::BufferSize / 3, Buffer + Parameter.OffsetBytes, Parameter.SizeBytes); } private: AlignedBuffer Buffer; }; /// Provides destination buffer for the Set operation as well as the associated /// size distributions. struct SetSetup : public ParameterBatch { SetSetup(); inline static const ArrayRef getDistributions() { return getMemsetSizeDistributions(); } inline void *Call(ParameterType Parameter, MemsetFunction Memset) { return Memset(DstBuffer + Parameter.OffsetBytes, Parameter.OffsetBytes % 0xFF, Parameter.SizeBytes); } inline void *Call(ParameterType Parameter, BzeroFunction Bzero) { Bzero(DstBuffer + Parameter.OffsetBytes, Parameter.SizeBytes); return DstBuffer.begin(); } private: AlignedBuffer DstBuffer; }; /// Provides left and right buffers for the Comparison operation as well as the /// associated size distributions. struct ComparisonSetup : public ParameterBatch { ComparisonSetup(); inline static const ArrayRef getDistributions() { return getMemcmpSizeDistributions(); } inline int Call(ParameterType Parameter, MemcmpOrBcmpFunction MemcmpOrBcmp) { return MemcmpOrBcmp(LhsBuffer + Parameter.OffsetBytes, RhsBuffer + Parameter.OffsetBytes, Parameter.SizeBytes); } private: AlignedBuffer LhsBuffer; AlignedBuffer RhsBuffer; }; } // namespace libc_benchmarks } // namespace llvm #endif // LLVM_LIBC_UTILS_BENCHMARK_MEMORY_BENCHMARK_H