// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- // Copyright (c) 2007, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // --- // Author: Arun Sharma // // A tcmalloc system allocator that uses a memory based filesystem such as // tmpfs or hugetlbfs // // Since these only exist on linux, we only register this allocator there. #ifdef __linux #include #include // for errno, EINVAL #include // for PRId64 #include // for PATH_MAX #include // for size_t, NULL #ifdef HAVE_STDINT_H #include // for int64_t, uintptr_t #endif #include // for snprintf #include // for mkstemp #include // for strerror #include // for mmap, MAP_FAILED, etc #include // for fstatfs, statfs #include // for ftruncate, off_t, unlink #include // for operator new #include #include #include "base/basictypes.h" #include "base/googleinit.h" #include "base/sysinfo.h" #include "internal_logging.h" // TODO(sanjay): Move the code below into the tcmalloc namespace using tcmalloc::kLog; using tcmalloc::kCrash; using tcmalloc::Log; using std::string; DEFINE_string(memfs_malloc_path, EnvToString("TCMALLOC_MEMFS_MALLOC_PATH", ""), "Path where hugetlbfs or tmpfs is mounted. The caller is " "responsible for ensuring that the path is unique and does " "not conflict with another process"); DEFINE_int64(memfs_malloc_limit_mb, EnvToInt("TCMALLOC_MEMFS_LIMIT_MB", 0), "Limit total allocation size to the " "specified number of MiB. 0 == no limit."); DEFINE_bool(memfs_malloc_abort_on_fail, EnvToBool("TCMALLOC_MEMFS_ABORT_ON_FAIL", false), "abort() whenever memfs_malloc fails to satisfy an allocation " "for any reason."); DEFINE_bool(memfs_malloc_ignore_mmap_fail, EnvToBool("TCMALLOC_MEMFS_IGNORE_MMAP_FAIL", false), "Ignore failures from mmap"); DEFINE_bool(memfs_malloc_map_private, EnvToBool("TCMALLOC_MEMFS_MAP_PRIVATE", false), "Use MAP_PRIVATE with mmap"); DEFINE_bool(memfs_malloc_disable_fallback, EnvToBool("TCMALLOC_MEMFS_DISABLE_FALLBACK", false), "If we run out of hugepage memory don't fallback to default " "allocator."); // Hugetlbfs based allocator for tcmalloc class HugetlbSysAllocator: public SysAllocator { public: explicit HugetlbSysAllocator(SysAllocator* fallback) : failed_(true), // To disable allocator until Initialize() is called. big_page_size_(0), hugetlb_fd_(-1), hugetlb_base_(0), fallback_(fallback) { } void* Alloc(size_t size, size_t *actual_size, size_t alignment); bool Initialize(); bool failed_; // Whether failed to allocate memory. private: void* AllocInternal(size_t size, size_t *actual_size, size_t alignment); int64 big_page_size_; int hugetlb_fd_; // file descriptor for hugetlb off_t hugetlb_base_; SysAllocator* fallback_; // Default system allocator to fall back to. }; static union { char buf[sizeof(HugetlbSysAllocator)]; void *ptr; } hugetlb_space; // No locking needed here since we assume that tcmalloc calls // us with an internal lock held (see tcmalloc/system-alloc.cc). void* HugetlbSysAllocator::Alloc(size_t size, size_t *actual_size, size_t alignment) { if (!FLAGS_memfs_malloc_disable_fallback && failed_) { return fallback_->Alloc(size, actual_size, alignment); } // We don't respond to allocation requests smaller than big_page_size_ unless // the caller is ok to take more than they asked for. Used by MetaDataAlloc. if (!FLAGS_memfs_malloc_disable_fallback && actual_size == NULL && size < big_page_size_) { return fallback_->Alloc(size, actual_size, alignment); } // Enforce huge page alignment. Be careful to deal with overflow. size_t new_alignment = alignment; if (new_alignment < big_page_size_) new_alignment = big_page_size_; size_t aligned_size = ((size + new_alignment - 1) / new_alignment) * new_alignment; if (!FLAGS_memfs_malloc_disable_fallback && aligned_size < size) { return fallback_->Alloc(size, actual_size, alignment); } void* result = AllocInternal(aligned_size, actual_size, new_alignment); if (result != NULL) { return result; } else if (FLAGS_memfs_malloc_disable_fallback) { return NULL; } Log(kLog, __FILE__, __LINE__, "HugetlbSysAllocator: (failed, allocated)", failed_, hugetlb_base_); if (FLAGS_memfs_malloc_abort_on_fail) { Log(kCrash, __FILE__, __LINE__, "memfs_malloc_abort_on_fail is set"); } return fallback_->Alloc(size, actual_size, alignment); } void* HugetlbSysAllocator::AllocInternal(size_t size, size_t* actual_size, size_t alignment) { // Ask for extra memory if alignment > pagesize size_t extra = 0; if (alignment > big_page_size_) { extra = alignment - big_page_size_; } // Test if this allocation would put us over the limit. off_t limit = FLAGS_memfs_malloc_limit_mb*1024*1024; if (limit > 0 && hugetlb_base_ + size + extra > limit) { // Disable the allocator when there's less than one page left. if (limit - hugetlb_base_ < big_page_size_) { Log(kLog, __FILE__, __LINE__, "reached memfs_malloc_limit_mb"); failed_ = true; } else { Log(kLog, __FILE__, __LINE__, "alloc too large (size, bytes left)", size, limit-hugetlb_base_); } return NULL; } // This is not needed for hugetlbfs, but needed for tmpfs. Annoyingly // hugetlbfs returns EINVAL for ftruncate. int ret = ftruncate(hugetlb_fd_, hugetlb_base_ + size + extra); if (ret != 0 && errno != EINVAL) { Log(kLog, __FILE__, __LINE__, "ftruncate failed", strerror(errno)); failed_ = true; return NULL; } // Note: size + extra does not overflow since: // size + alignment < (1<(MAP_FAILED)) { if (!FLAGS_memfs_malloc_ignore_mmap_fail) { Log(kLog, __FILE__, __LINE__, "mmap failed (size, error)", size + extra, strerror(errno)); failed_ = true; } return NULL; } uintptr_t ptr = reinterpret_cast(result); // Adjust the return memory so it is aligned size_t adjust = 0; if ((ptr & (alignment - 1)) != 0) { adjust = alignment - (ptr & (alignment - 1)); } ptr += adjust; hugetlb_base_ += (size + extra); if (actual_size) { *actual_size = size + extra - adjust; } return reinterpret_cast(ptr); } bool HugetlbSysAllocator::Initialize() { char path[PATH_MAX]; const int pathlen = FLAGS_memfs_malloc_path.size(); if (pathlen + 8 > sizeof(path)) { Log(kCrash, __FILE__, __LINE__, "XX fatal: memfs_malloc_path too long"); return false; } memcpy(path, FLAGS_memfs_malloc_path.data(), pathlen); memcpy(path + pathlen, ".XXXXXX", 8); // Also copies terminating \0 int hugetlb_fd = mkstemp(path); if (hugetlb_fd == -1) { Log(kLog, __FILE__, __LINE__, "warning: unable to create memfs_malloc_path", path, strerror(errno)); return false; } // Cleanup memory on process exit if (unlink(path) == -1) { Log(kCrash, __FILE__, __LINE__, "fatal: error unlinking memfs_malloc_path", path, strerror(errno)); return false; } // Use fstatfs to figure out the default page size for memfs struct statfs sfs; if (fstatfs(hugetlb_fd, &sfs) == -1) { Log(kCrash, __FILE__, __LINE__, "fatal: error fstatfs of memfs_malloc_path", strerror(errno)); return false; } int64 page_size = sfs.f_bsize; hugetlb_fd_ = hugetlb_fd; big_page_size_ = page_size; failed_ = false; return true; } REGISTER_MODULE_INITIALIZER(memfs_malloc, { if (FLAGS_memfs_malloc_path.length()) { SysAllocator* alloc = MallocExtension::instance()->GetSystemAllocator(); HugetlbSysAllocator* hp = new (hugetlb_space.buf) HugetlbSysAllocator(alloc); if (hp->Initialize()) { MallocExtension::instance()->SetSystemAllocator(hp); } } }); #endif /* ifdef __linux */