/** * Copyright (C) 2018-present MongoDB, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the Server Side Public License, version 1, * as published by MongoDB, Inc. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * Server Side Public License for more details. * * You should have received a copy of the Server Side Public License * along with this program. If not, see * . * * As a special exception, the copyright holders give permission to link the * code of portions of this program with the OpenSSL library under certain * conditions as described in each individual source file and distribute * linked combinations including the program with the OpenSSL library. You * must comply with the Server Side Public License in all respects for * all of the code used other than as permitted herein. If you modify file(s) * with this exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do so, * delete this exception statement from your version. If you delete this * exception statement from all source files in the program, then also delete * it in the license file. */ #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kFTDC #include "mongo/platform/basic.h" #include "mongo/util/procparser.h" #include #include #include #include #include #include #include #include #include #include #include #include "mongo/base/parse_number.h" #include "mongo/base/status.h" #include "mongo/base/status_with.h" #include "mongo/base/string_data.h" #include "mongo/bson/bsonobjbuilder.h" #include "mongo/util/log.h" #include "mongo/util/mongoutils/str.h" #include "mongo/util/scopeguard.h" #include "mongo/util/text.h" namespace mongo { namespace { /** * Get USER_HZ for the machine. See time(7) for an explanation. */ int64_t getTicksPerSecond() { int64_t ret = sysconf(_SC_CLK_TCK); return ret; } /** * Convert USER_HZ to milliseconds. */ double convertTicksToMilliSeconds(const int64_t ticks, const int64_t ticksPerSecond) { return static_cast(ticks) / (static_cast(ticksPerSecond) / 1000.0); } const size_t kFileBufferSize = 16384; const size_t kFileReadRetryCount = 5; constexpr auto kSysBlockDeviceDirectoryName = "device"; /** * Read a file from disk as a string with a null-terminating byte using the POSIX file api. * * This function is designed to get all the data it needs from small /proc files in a single read. * The /proc/stat and /proc/diskstats files can vary in size, but 16kb will cover most cases. * * Finally, we return errors instead of throwing to ensure that FTDC can return partial information * on failure instead of no information. Some container filesystems may overlay /proc so we may not * be reading directly from the kernel. */ StatusWith readFileAsString(StringData filename) { int fd = open(filename.toString().c_str(), 0); if (fd == -1) { int err = errno; return Status(ErrorCodes::FileOpenFailed, str::stream() << "Failed to open file " << filename << " with error: " << errnoWithDescription(err)); } auto scopedGuard = MakeGuard([fd] { close(fd); }); BufBuilder builder(kFileBufferSize); std::array buf; ssize_t size_read = 0; // Read until the end as needed do { // Retry if interrupted size_t retry = 0; do { size_read = read(fd, buf.data(), kFileBufferSize); if (size_read == -1) { int err = errno; // Retry if we hit EGAIN or EINTR a few times before giving up if (retry < kFileReadRetryCount && (err == EAGAIN || err == EINTR)) { ++retry; continue; } return Status(ErrorCodes::FileStreamFailed, str::stream() << "Failed to read file " << filename << " with error: " << errnoWithDescription(err)); } break; } while (true); if (size_read != 0) { builder.appendBuf(buf.data(), size_read); } } while (size_read != 0); // Null terminate the buffer since we are about to convert it to a string builder.appendChar(0); return std::string(builder.buf(), builder.len()); } const char* const kAdditionCpuFields[] = {"user_ms", "nice_ms", "system_ms", "idle_ms", "iowait_ms", "irq_ms", "softirq_ms", "steal_ms", "guest_ms", "guest_nice_ms"}; const size_t kAdditionCpuFieldCount = std::extent::value; const char* const kDiskFields[] = { "reads", "reads_merged", "read_sectors", "read_time_ms", "writes", "writes_merged", "write_sectors", "write_time_ms", "io_in_progress", "io_time_ms", "io_queued_ms", }; const size_t kDiskFieldCount = std::extent::value; } // namespace namespace procparser { // Here is an example of the type of string it supports. // Note: intr output has been trimmed // // The cpu field maps up to 10 individual fields depending on the kernel version. For other views, // this code assumes there is only a single value. // // For more information, see: // Documentation/filesystems/proc.txt in the Linux kernel // proc(5) man page // // > cat /proc/stat // cpu 41801 9179 32206 831134223 34279 0 947 0 0 0 // cpu0 2977 450 2475 69253074 1959 0 116 0 0 0 // cpu1 6213 4261 9400 69177349 845 0 539 0 0 0 // cpu2 1949 831 3699 69261035 645 0 0 0 0 0 // cpu3 2222 644 3283 69264801 783 0 0 0 0 0 // cpu4 16576 607 4757 69232589 8195 0 291 0 0 0 // cpu5 3742 391 4571 69257332 2322 0 0 0 0 0 // cpu6 2173 376 743 69284308 400 0 0 0 0 0 // cpu7 1232 375 704 69285753 218 0 0 0 0 0 // cpu8 960 127 576 69262851 18107 0 0 0 0 0 // cpu9 1755 227 744 69283938 362 0 0 0 0 0 // cpu10 1380 641 678 69285193 219 0 0 0 0 0 // cpu11 618 244 572 69285995 218 0 0 0 0 0 // intr 54084718 135 2 .... // ctxt 190305514 // btime 1463584038 // processes 47438 // procs_running 1 // procs_blocked 0 // softirq 102690251 8 26697410 115481 23345078 816026 0 2296 26068778 0 25645174 // Status parseProcStat(const std::vector& keys, StringData data, int64_t ticksPerSecond, BSONObjBuilder* builder) { bool foundKeys = false; using string_split_iterator = boost::split_iterator; // Split the file by lines. // token_compress_on means the iterator skips over consecutive '\n'. This should not be a // problem in normal /proc/stat output. for (string_split_iterator lineIt = string_split_iterator( data.begin(), data.end(), boost::token_finder([](char c) { return c == '\n'; }, boost::token_compress_on)); lineIt != string_split_iterator(); ++lineIt) { StringData line((*lineIt).begin(), (*lineIt).end()); // Split the line by spaces since that is the only delimiter for stat files. // token_compress_on means the iterator skips over consecutive ' '. This is needed for the // first line which is "cpu ". string_split_iterator partIt = string_split_iterator( line.begin(), line.end(), boost::token_finder([](char c) { return c == ' '; }, boost::token_compress_on)); // Skip processing this line if we do not have a key. if (partIt == string_split_iterator()) { continue; } StringData key((*partIt).begin(), (*partIt).end()); ++partIt; // Skip processing this line if we only have a key, and no number. if (partIt == string_split_iterator()) { continue; } // Check if the key is in the list. /proc/stat will have extra keys, and // may not have the keys we want. if (keys.empty() || std::find(keys.begin(), keys.end(), key) != keys.end()) { foundKeys = true; if (key == "cpu") { // Cpu is 10 fields, we need to chew through all of them. // Some kernels we support lack the last field or two: guest and/or guest_nice. for (size_t index = 0; partIt != string_split_iterator() && index < kAdditionCpuFieldCount; ++partIt, ++index) { StringData stringValue((*partIt).begin(), (*partIt).end() - (*partIt).begin()); uint64_t value; if (!parseNumberFromString(stringValue, &value).isOK()) { value = 0; } builder->appendNumber(kAdditionCpuFields[index], convertTicksToMilliSeconds(value, ticksPerSecond)); } } else { StringData stringValue((*partIt).begin(), (*partIt).end() - (*partIt).begin()); uint64_t value; if (!parseNumberFromString(stringValue, &value).isOK()) { value = 0; } builder->appendNumber(key, static_cast(value)); } } } return foundKeys ? Status::OK() : Status(ErrorCodes::NoSuchKey, "Failed to find any keys in stat string"); } Status parseProcStatFile(StringData filename, const std::vector& keys, BSONObjBuilder* builder) { auto swString = readFileAsString(filename); if (!swString.isOK()) { return swString.getStatus(); } return parseProcStat(keys, swString.getValue(), getTicksPerSecond(), builder); } // Here is an example of the type of string it supports: // Note: output has been trimmed // // For more information, see: // Documentation/filesystems/proc.txt in the Linux kernel // proc(5) man page // // > cat /proc/meminfo // MemTotal: 12294392 kB // MemFree: 3652612 kB // MemAvailable: 11831704 kB // Buffers: 568536 kB // Cached: 6421520 kB // SwapCached: 0 kB // HugePages_Total: 0 // // Note: HugePages_* do not end in kB, it is not a typo // Status parseProcMemInfo(const std::vector& keys, StringData data, BSONObjBuilder* builder) { bool foundKeys = false; using string_split_iterator = boost::split_iterator; // Split the file by lines. // token_compress_on means the iterator skips over consecutive '\n'. This should not be a // problem in normal /proc/memInfo output. for (string_split_iterator lineIt = string_split_iterator( data.begin(), data.end(), boost::token_finder([](char c) { return c == '\n'; }, boost::token_compress_on)); lineIt != string_split_iterator(); ++lineIt) { StringData line((*lineIt).begin(), (*lineIt).end()); // Split the line by spaces and colons since these are the delimiters for meminfo files. // token_compress_on means the iterator skips over consecutive ' '. This is needed for // every line. string_split_iterator partIt = string_split_iterator(line.begin(), line.end(), boost::token_finder([](char c) { return c == ' ' || c == ':'; }, boost::token_compress_on)); // Skip processing this line if we do not have a key. if (partIt == string_split_iterator()) { continue; } StringData key((*partIt).begin(), (*partIt).end()); ++partIt; // Skip processing this line if we only have a key, and no number. if (partIt == string_split_iterator()) { continue; } // Check if the key is in the list. /proc/meminfo will have extra keys, and may not have the // keys we want. if (keys.empty() || std::find(keys.begin(), keys.end(), key) != keys.end()) { foundKeys = true; StringData stringValue((*partIt).begin(), (*partIt).end()); uint64_t value; if (!parseNumberFromString(stringValue, &value).isOK()) { value = 0; } // Check if the line ends in "kB" ++partIt; // If there is one last token, check if it is actually "kB" if (partIt != string_split_iterator()) { StringData kb_token((*partIt).begin(), (*partIt).end()); auto keyWithSuffix = key.toString(); if (kb_token == "kB") { keyWithSuffix.append("_kb"); } builder->appendNumber(keyWithSuffix, static_cast(value)); } else { builder->appendNumber(key, static_cast(value)); } } } return foundKeys ? Status::OK() : Status(ErrorCodes::NoSuchKey, "Failed to find any keys in meminfo string"); } Status parseProcMemInfoFile(StringData filename, const std::vector& keys, BSONObjBuilder* builder) { auto swString = readFileAsString(filename); if (!swString.isOK()) { return swString.getStatus(); } return parseProcMemInfo(keys, swString.getValue(), builder); } // // Here is an example of the type of string it supports (long lines elided for clarity). // > cat /proc/net/netstat // TcpExt: SyncookiesSent SyncookiesRecv SyncookiesFailed ... // TcpExt: 3437 5938 13368 ... // IpExt: InNoRoutes InTruncatedPkts InMcastPkts ... // IpExt: 999 1 4819969 ... // // Parser assumes file consists of alternating lines of keys and values // key and value lines consist of space-separated tokens // first token is a key prefix that is prepended in the output to each key // all prefixed keys and corresponding values are copied to output as-is // Status parseProcNetstat(const std::vector& keys, StringData data, BSONObjBuilder* builder) { using string_split_iterator = boost::split_iterator; string_split_iterator keysIt; bool foundKeys = false; // Split the file by lines. uint32_t lineNum = 0; for (string_split_iterator lineIt = string_split_iterator( data.begin(), data.end(), boost::token_finder([](char c) { return c == '\n'; }, boost::token_compress_on)); lineIt != string_split_iterator(); ++lineIt, ++lineNum) { if (lineNum % 2 == 0) { // even numbered lines are keys keysIt = string_split_iterator( (*lineIt).begin(), (*lineIt).end(), boost::token_finder([](char c) { return c == ' '; }, boost::token_compress_on)); } else { // odd numbered lines are values string_split_iterator valuesIt = string_split_iterator( (*lineIt).begin(), (*lineIt).end(), boost::token_finder([](char c) { return c == ' '; }, boost::token_compress_on)); StringData prefix; // iterate over the keys and values in parallel for (uint32_t keyNum = 0; keysIt != string_split_iterator() && valuesIt != string_split_iterator(); ++keysIt, ++valuesIt, ++keyNum) { if (keyNum == 0) { // first token is a prefix to be applied to remaining keys prefix = StringData((*keysIt).begin(), (*keysIt).end()); // ignore line if prefix isn't in requested list if (!keys.empty() && std::find(keys.begin(), keys.end(), prefix) == keys.end()) break; } else { // remaining tokens are key/value pairs StringData key((*keysIt).begin(), (*keysIt).end()); StringData stringValue((*valuesIt).begin(), (*valuesIt).end()); uint64_t value; if (parseNumberFromString(stringValue, &value).isOK()) { builder->appendNumber(prefix.toString() + key.toString(), static_cast(value)); foundKeys = true; } } } } } return foundKeys ? Status::OK() : Status(ErrorCodes::NoSuchKey, "Failed to find any keys in netstats string"); } Status parseProcNetstatFile(const std::vector& keys, StringData filename, BSONObjBuilder* builder) { auto swString = readFileAsString(filename); if (!swString.isOK()) { return swString.getStatus(); } return parseProcNetstat(keys, swString.getValue(), builder); } // Here is an example of the type of string it supports: // // For more information, see: // Documentation/iostats.txt in the Linux kernel // proc(5) man page // // > cat /proc/diskstats // 8 0 sda 120611 33630 6297628 96550 349797 167398 11311562 2453603 0 117514 2554160 // 8 1 sda1 138 37 8642 315 3 0 18 14 0 292 329 // 8 2 sda2 120409 33593 6285754 96158 329029 167398 11311544 2450573 0 115611 2550739 // 8 16 sdb 12707 3876 1525418 57507 997 3561 297576 97976 0 37870 155619 // 8 17 sdb1 12601 3876 1521090 57424 992 3561 297576 97912 0 37738 155468 // 11 0 sr0 0 0 0 0 0 0 0 0 0 0 0 // 253 0 dm-0 154910 0 6279522 177681 506513 0 11311544 5674418 0 117752 5852275 // 253 1 dm-1 109 0 4584 226 0 0 0 0 0 172 226 // Status parseProcDiskStats(const std::vector& disks, StringData data, BSONObjBuilder* builder) { bool foundKeys = false; std::vector stats; stats.reserve(kDiskFieldCount); using string_split_iterator = boost::split_iterator; // Split the file by lines. // token_compress_on means the iterator skips over consecutive '\n'. This should not be a // problem in normal /proc/diskstats output. for (string_split_iterator lineIt = string_split_iterator( data.begin(), data.end(), boost::token_finder([](char c) { return c == '\n'; }, boost::token_compress_on)); lineIt != string_split_iterator(); ++lineIt) { StringData line((*lineIt).begin(), (*lineIt).end()); // Skip leading whitespace so that the split_iterator starts on non-whitespace otherwise we // get an empty first token. Device major numbers (the first number on each line) are right // aligned to 4 spaces and start from // single digits. auto beginNonWhitespace = std::find_if_not(line.begin(), line.end(), [](char c) { return c == ' '; }); // Split the line by spaces since that is the only delimiter for diskstats files. // token_compress_on means the iterator skips over consecutive ' '. string_split_iterator partIt = string_split_iterator( beginNonWhitespace, line.end(), boost::token_finder([](char c) { return c == ' '; }, boost::token_compress_on)); // Skip processing this line if the line is blank if (partIt == string_split_iterator()) { continue; } ++partIt; // Skip processing this line if we only have a device major number. if (partIt == string_split_iterator()) { continue; } ++partIt; // Skip processing this line if we only have a device major minor. if (partIt == string_split_iterator()) { continue; } StringData disk((*partIt).begin(), (*partIt).end()); // Skip processing this line if we only have a block device name. if (partIt == string_split_iterator()) { continue; } ++partIt; // Check if the disk is in the list. /proc/diskstats will have extra disks, and may not have // the disk we want. if (disks.empty() || std::find(disks.begin(), disks.end(), disk) != disks.end()) { foundKeys = true; stats.clear(); // Only generate a disk document if the disk has some activity. For instance, there // could be a CD-ROM drive that is not used. bool hasSomeNonZeroStats = false; for (size_t index = 0; partIt != string_split_iterator() && index < kDiskFieldCount; ++partIt, ++index) { StringData stringValue((*partIt).begin(), (*partIt).end()); uint64_t value; if (!parseNumberFromString(stringValue, &value).isOK()) { value = 0; } if (value != 0) { hasSomeNonZeroStats = true; } stats.push_back(value); } if (hasSomeNonZeroStats) { // Start a new document with disk as the name. BSONObjBuilder sub(builder->subobjStart(disk)); for (size_t index = 0; index < stats.size() && index < kDiskFieldCount; ++index) { sub.appendNumber(kDiskFields[index], static_cast(stats[index])); } sub.doneFast(); } } } return foundKeys ? Status::OK() : Status(ErrorCodes::NoSuchKey, "Failed to find any keys in diskstats string"); } Status parseProcDiskStatsFile(StringData filename, const std::vector& disks, BSONObjBuilder* builder) { auto swString = readFileAsString(filename); if (!swString.isOK()) { return swString.getStatus(); } return parseProcDiskStats(disks, swString.getValue(), builder); } namespace { /** * Is this a disk that is interesting to us? We only want physical disks, not multiple disk devices, * LVM2 devices, partitions, or RAM disks. * * A physical disk has a symlink to a directory at /sys/block//device. * * Note: returns false upon any errors such as access denied. */ bool isInterestingDisk(const boost::filesystem::path& path) { boost::filesystem::path blockDevicePath(path); blockDevicePath /= kSysBlockDeviceDirectoryName; boost::system::error_code ec; auto statusSysBlock = boost::filesystem::status(blockDevicePath, ec); if (!boost::filesystem::exists(statusSysBlock)) { return false; } if (ec) { warning() << "Error checking directory '" << blockDevicePath.generic_string() << "': " << ec.message(); return false; } if (!boost::filesystem::is_directory(statusSysBlock)) { return false; } return true; } } // namespace std::vector findPhysicalDisks(StringData sysBlockPath) { boost::system::error_code ec; auto sysBlockPathStr = sysBlockPath.toString(); auto statusSysBlock = boost::filesystem::status(sysBlockPathStr, ec); if (ec) { warning() << "Error checking directory '" << sysBlockPathStr << "': " << ec.message(); return {}; } if (!(boost::filesystem::exists(statusSysBlock) && boost::filesystem::is_directory(statusSysBlock))) { warning() << "Could not find directory '" << sysBlockPathStr << "': " << ec.message(); return {}; } std::vector files; // Iterate through directories in /sys/block. The directories in this directory can be physical // block devices (like SSD or HDD) or virtual devices like the LVM2 device mapper or a multiple // disk device. It does not contain disk partitions. boost::filesystem::directory_iterator di(sysBlockPathStr, ec); if (ec) { warning() << "Error getting directory iterator '" << sysBlockPathStr << "': " << ec.message(); return {}; } for (; di != boost::filesystem::directory_iterator(); di++) { auto path = (*di).path(); if (isInterestingDisk(path)) { files.push_back(path.filename().generic_string()); } } return files; } } // namespace procparser } // namespace mongo