From d2aa85049faf7b01370827357c039abf020c26d9 Mon Sep 17 00:00:00 2001 From: Gabriel Russell Date: Wed, 26 Aug 2020 11:42:31 -0400 Subject: SERVER-50123 Record number of physical cores on all platforms --- src/mongo/util/processinfo.h | 11 ++- src/mongo/util/processinfo_linux.cpp | 152 ++++++++++++++++++++++++--------- src/mongo/util/processinfo_windows.cpp | 135 +++++++++++++++++------------ 3 files changed, 201 insertions(+), 97 deletions(-) diff --git a/src/mongo/util/processinfo.h b/src/mongo/util/processinfo.h index 5040484b46b..416599a17c7 100644 --- a/src/mongo/util/processinfo.h +++ b/src/mongo/util/processinfo.h @@ -98,12 +98,19 @@ public: } /** - * Get the number of CPUs + * Get the number of (logical) CPUs */ static unsigned getNumCores() { return sysInfo().numCores; } + /** + * Get the number of physical CPUs + */ + static unsigned getNumPhysicalCores() { + return sysInfo().numPhysicalCores; + } + /** * Get the number of cores available. Make a best effort to get the cores for this process. * If that information is not available, get the total number of CPUs. @@ -198,6 +205,7 @@ private: unsigned long long memSize; unsigned long long memLimit; unsigned numCores; + unsigned numPhysicalCores; unsigned long long pageSize; std::string cpuArch; bool hasNuma; @@ -215,6 +223,7 @@ private: memSize(0), memLimit(0), numCores(0), + numPhysicalCores(0), pageSize(0), hasNuma(false), preferMsyncOverFSync(true) { diff --git a/src/mongo/util/processinfo_linux.cpp b/src/mongo/util/processinfo_linux.cpp index a968c54727f..de4b84bca5a 100644 --- a/src/mongo/util/processinfo_linux.cpp +++ b/src/mongo/util/processinfo_linux.cpp @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -53,28 +54,28 @@ #include #include #include +#include #include #include "mongo/logv2/log.h" #include "mongo/util/file.h" +#include "mongo/util/static_immortal.h" #define KLONG long #define KLF "l" namespace mongo { +using namespace fmt::literals; + class LinuxProc { public: LinuxProc(ProcessId pid) { - char name[128]; - sprintf(name, "/proc/%d/stat", pid.asUInt32()); - - FILE* f = fopen(name, "r"); + auto name = "/proc/{}/stat"_format(pid.asUInt32()); + FILE* f = fopen(name.c_str(), "r"); if (!f) { - std::stringstream ss; - ss << "couldn't open [" << name << "] " << errnoWithDescription(); - std::string s = ss.str(); - msgasserted(13538, s.c_str()); + auto e = errno; + msgasserted(13538, "couldn't open [{}] {}"_format(name, errnoWithDescription(e))); } int found = fscanf(f, "%d %127s %c " @@ -134,9 +135,7 @@ public: &_rtprio, &_sched */ ); - if (found == 0) { - std::cout << "system error: reading proc info" << std::endl; - } + massert(13539, "couldn't parse [{}]"_format(name).c_str(), found != 0); fclose(f); } @@ -313,6 +312,43 @@ void appendMountInfo(BSONObjBuilder& bob) { } } +class CpuInfoParser { +public: + struct LineProcessor { + pcrecpp::RE regex; + std::function f; + }; + std::vector lineProcessors; + std::function recordProcessor; + void run() { + std::ifstream f("/proc/cpuinfo"); + if (!f) + return; + + bool readSuccess; + bool unprocessed = false; + static StaticImmortal lineRegex(R"re((.*?)\s*:\s*(.*))re"); + do { + std::string fstr; + readSuccess = f && std::getline(f, fstr); + if (readSuccess && !fstr.empty()) { + std::string key; + std::string value; + if (!lineRegex->FullMatch(fstr, &key, &value)) + continue; + for (auto&& [lpr, lpf] : lineProcessors) { + if (lpr.FullMatch(key)) + lpf(value); + } + unprocessed = true; + } else if (unprocessed) { + recordProcessor(); + unprocessed = false; + } + } while (readSuccess); + } +}; + } // namespace class LinuxSysHelper { @@ -333,30 +369,63 @@ public: return fstr; } + + /** + * count the number of physical cores + */ + static void getNumPhysicalCores(int& physicalCores) { + + /* In /proc/cpuinfo core ids are only unique within a particular physical unit, AKA a cpu + * package, so to count the total cores we need to count the unique pairs of core id and + * physical id*/ + struct CpuId { + std::string core; + std::string physical; + }; + + CpuId parsedCpuId; + + auto cmp = [](auto&& a, auto&& b) { + auto tupLens = [](auto&& o) { return std::tie(o.core, o.physical); }; + return tupLens(a) < tupLens(b); + }; + std::set cpuIds(cmp); + + CpuInfoParser cpuInfoParser{ + { + {"physical id", [&](const std::string& value) { parsedCpuId.physical = value; }}, + {"core id", [&](const std::string& value) { parsedCpuId.core = value; }}, + }, + [&]() { + cpuIds.insert(parsedCpuId); + parsedCpuId = CpuId{}; + }}; + cpuInfoParser.run(); + + physicalCores = cpuIds.size(); + } + /** * Get some details about the CPU */ static void getCpuInfo(int& procCount, std::string& freq, std::string& features) { - FILE* f; - char fstr[1024] = {0}; - procCount = 0; - f = fopen("/proc/cpuinfo", "r"); - if (f == nullptr) - return; - - while (fgets(fstr, 1023, f) != nullptr && !feof(f)) { - // until the end of the file - fstr[strlen(fstr) < 1 ? 0 : strlen(fstr) - 1] = '\0'; - if (strncmp(fstr, "processor ", 10) == 0 || strncmp(fstr, "processor\t:", 11) == 0) - ++procCount; - if (strncmp(fstr, "cpu MHz\t\t:", 10) == 0) - freq = fstr + 11; - if (strncmp(fstr, "flags\t\t:", 8) == 0) - features = fstr + 9; - } + procCount = 0; - fclose(f); + CpuInfoParser cpuInfoParser{ + { +#ifdef __s390x__ + {R"re(processor\s+\d+)re", [&](const std::string& value) { procCount++; }}, + {"cpu MHz static", [&](const std::string& value) { freq = value; }}, + {"features", [&](const std::string& value) { features = value; }}, +#else + {"processor", [&](const std::string& value) { procCount++; }}, + {"cpu MHz", [&](const std::string& value) { freq = value; }}, + {"flags", [&](const std::string& value) { features = value; }}, +#endif + }, + []() {}}; + cpuInfoParser.run(); } /** @@ -585,15 +654,18 @@ void ProcessInfo::SystemInfo::collectSystemInfo() { std::string distroName, distroVersion; std::string cpuFreq, cpuFeatures; int cpuCount; + int physicalCores; std::string verSig = LinuxSysHelper::readLineFromFile("/proc/version_signature"); LinuxSysHelper::getCpuInfo(cpuCount, cpuFreq, cpuFeatures); + LinuxSysHelper::getNumPhysicalCores(physicalCores); LinuxSysHelper::getLinuxDistro(distroName, distroVersion); if (uname(&unameData) == -1) { + auto e = errno; LOGV2(23339, - "Unable to collect detailed system information: {strerror_errno}", - "strerror_errno"_attr = strerror(errno)); + "Unable to collect detailed system information", + "error"_attr = errnoWithDescription(e)); } osType = "Linux"; @@ -630,6 +702,7 @@ void ProcessInfo::SystemInfo::collectSystemInfo() { bExtra.append("pageSize", static_cast(pageSize)); bExtra.append("numPages", static_cast(sysconf(_SC_PHYS_PAGES))); bExtra.append("maxOpenFiles", static_cast(sysconf(_SC_OPEN_MAX))); + bExtra.append("physicalCores", physicalCores); appendMountInfo(bExtra); @@ -648,10 +721,9 @@ bool ProcessInfo::checkNumaEnabled() { hasNumaMaps = boost::filesystem::exists("/proc/self/numa_maps"); } catch (boost::filesystem::filesystem_error& e) { LOGV2(23340, - "WARNING: Cannot detect if NUMA interleaving is enabled. Failed to probe " - "\"{e_path1_string}\": {e_code_message}", - "e_path1_string"_attr = e.path1().string(), - "e_code_message"_attr = e.code().message()); + "WARNING: Cannot detect if NUMA interleaving is enabled. Failed to probe", + "path"_attr = e.path1().string(), + "reason"_attr = e.code().message()); return false; } @@ -677,9 +749,8 @@ bool ProcessInfo::blockCheckSupported() { bool ProcessInfo::blockInMemory(const void* start) { unsigned char x = 0; if (mincore(const_cast(alignToStartOfPage(start)), getPageSize(), &x)) { - LOGV2(23341, - "mincore failed: {errnoWithDescription}", - "errnoWithDescription"_attr = errnoWithDescription()); + auto e = errno; + LOGV2(23341, "mincore failed", "error"_attr = errnoWithDescription(e)); return 1; } return x & 0x1; @@ -690,9 +761,8 @@ bool ProcessInfo::pagesInMemory(const void* start, size_t numPages, std::vector< if (mincore(const_cast(alignToStartOfPage(start)), numPages * getPageSize(), reinterpret_cast(&out->front()))) { - LOGV2(23342, - "mincore failed: {errnoWithDescription}", - "errnoWithDescription"_attr = errnoWithDescription()); + auto e = errno; + LOGV2(23342, "mincore failed", "error"_attr = errnoWithDescription(e)); return false; } for (size_t i = 0; i < numPages; ++i) { diff --git a/src/mongo/util/processinfo_windows.cpp b/src/mongo/util/processinfo_windows.cpp index aeff1ba1ae9..1fd42a0bf5e 100644 --- a/src/mongo/util/processinfo_windows.cpp +++ b/src/mongo/util/processinfo_windows.cpp @@ -42,6 +42,65 @@ namespace mongo { +namespace { + +using Slpi = SYSTEM_LOGICAL_PROCESSOR_INFORMATION; +using SlpiBuf = std::aligned_storage_t; + +struct LpiRecords { + const Slpi* begin() const { + return reinterpret_cast(slpiRecords.get()); + } + + const Slpi* end() const { + return begin() + count; + } + + std::unique_ptr slpiRecords; + size_t count; +}; + +// Both the body of this getLogicalProcessorInformationRecords and the callers of +// getLogicalProcessorInformationRecords are largely modeled off of the example code at +// https://docs.microsoft.com/en-us/windows/win32/api/sysinfoapi/nf-sysinfoapi-getlogicalprocessorinformation +LpiRecords getLogicalProcessorInformationRecords() { + + DWORD returnLength = 0; + LpiRecords lpiRecords{}; + + DWORD returnCode = 0; + do { + returnCode = GetLogicalProcessorInformation( + reinterpret_cast(lpiRecords.slpiRecords.get()), &returnLength); + if (returnCode == FALSE) { + if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { + lpiRecords.slpiRecords = std::unique_ptr( + new SlpiBuf[((returnLength - 1) / sizeof(Slpi)) + 1]); + } else { + DWORD gle = GetLastError(); + LOGV2_WARNING(23811, + "GetLogicalProcessorInformation failed", + "error"_attr = errnoWithDescription(gle)); + return LpiRecords{}; + } + } + } while (returnCode == FALSE); + + + lpiRecords.count = returnLength / sizeof(Slpi); + return lpiRecords; +} + +int getPhysicalCores() { + int processorCoreCount = 0; + for (auto&& lpi : getLogicalProcessorInformationRecords()) { + if (lpi.Relationship == RelationProcessorCore) + processorCoreCount++; + } + return processorCoreCount; +} + +} // namespace int _wconvertmtos(SIZE_T s) { return (int)(s / (1024 * 1024)); } @@ -72,9 +131,7 @@ int ProcessInfo::getVirtualMemorySize() { BOOL status = GlobalMemoryStatusEx(&mse); if (!status) { DWORD gle = GetLastError(); - LOGV2_ERROR(23812, - "GlobalMemoryStatusEx failed with {errnoWithDescription_gle}", - "errnoWithDescription_gle"_attr = errnoWithDescription(gle)); + LOGV2_ERROR(23812, "GlobalMemoryStatusEx failed", "error"_attr = errnoWithDescription(gle)); fassert(28621, status); } @@ -88,9 +145,7 @@ int ProcessInfo::getResidentSize() { BOOL status = GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc)); if (!status) { DWORD gle = GetLastError(); - LOGV2_ERROR(23813, - "GetProcessMemoryInfo failed with {errnoWithDescription_gle}", - "errnoWithDescription_gle"_attr = errnoWithDescription(gle)); + LOGV2_ERROR(23813, "GetProcessMemoryInfo failed", "error"_attr = errnoWithDescription(gle)); fassert(28622, status); } @@ -161,22 +216,20 @@ bool getFileVersion(const char* filePath, DWORD& fileVersionMS, DWORD& fileVersi DWORD verSize = GetFileVersionInfoSizeA(filePath, NULL); if (verSize == 0) { DWORD gle = GetLastError(); - LOGV2_WARNING( - 23807, - "GetFileVersionInfoSizeA on {filePath} failed with {errnoWithDescription_gle}", - "filePath"_attr = filePath, - "errnoWithDescription_gle"_attr = errnoWithDescription(gle)); + LOGV2_WARNING(23807, + "GetFileVersionInfoSizeA failed", + "path"_attr = filePath, + "error"_attr = errnoWithDescription(gle)); return false; } std::unique_ptr verData(new char[verSize]); if (GetFileVersionInfoA(filePath, NULL, verSize, verData.get()) == 0) { DWORD gle = GetLastError(); - LOGV2_WARNING( - 23808, - "GetFileVersionInfoSizeA on {filePath} failed with {errnoWithDescription_gle}", - "filePath"_attr = filePath, - "errnoWithDescription_gle"_attr = errnoWithDescription(gle)); + LOGV2_WARNING(23808, + "GetFileVersionInfoSizeA failed", + "path"_attr = filePath, + "error"_attr = errnoWithDescription(gle)); return false; } @@ -185,16 +238,16 @@ bool getFileVersion(const char* filePath, DWORD& fileVersionMS, DWORD& fileVersi if (VerQueryValueA(verData.get(), "\\", (LPVOID*)&verInfo, &size) == 0) { DWORD gle = GetLastError(); LOGV2_WARNING(23809, - "VerQueryValueA on {filePath} failed with {errnoWithDescription_gle}", - "filePath"_attr = filePath, - "errnoWithDescription_gle"_attr = errnoWithDescription(gle)); + "VerQueryValueA failed", + "path"_attr = filePath, + "error"_attr = errnoWithDescription(gle)); return false; } if (size != sizeof(VS_FIXEDFILEINFO)) { LOGV2_WARNING(23810, - "VerQueryValueA on {filePath} returned structure with unexpected size", - "filePath"_attr = filePath); + "VerQueryValueA returned structure with unexpected size", + "path"_attr = filePath); return false; } @@ -214,8 +267,10 @@ void ProcessInfo::SystemInfo::collectSystemInfo() { GetNativeSystemInfo(&ntsysinfo); addrSize = (ntsysinfo.wProcessorArchitecture == PROCESSOR_ARCHITECTURE_AMD64 ? 64 : 32); numCores = ntsysinfo.dwNumberOfProcessors; + numPhysicalCores = getPhysicalCores(); pageSize = static_cast(ntsysinfo.dwPageSize); bExtra.append("pageSize", static_cast(pageSize)); + bExtra.append("physicalCores", static_cast(numPhysicalCores)); // get memory info mse.dwLength = sizeof(mse); @@ -309,43 +364,13 @@ void ProcessInfo::SystemInfo::collectSystemInfo() { _extraStats = bExtra.obj(); } -bool ProcessInfo::checkNumaEnabled() { - typedef BOOL(WINAPI * LPFN_GLPI)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD); - DWORD returnLength = 0; +bool ProcessInfo::checkNumaEnabled() { DWORD numaNodeCount = 0; - std::unique_ptr buffer; - - DWORD returnCode = 0; - do { - returnCode = GetLogicalProcessorInformation(buffer.get(), &returnLength); - - if (returnCode == FALSE) { - if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) { - buffer.reset(reinterpret_cast( - new BYTE[returnLength])); - } else { - DWORD gle = GetLastError(); - LOGV2_WARNING( - 23811, - "GetLogicalProcessorInformation failed with {errnoWithDescription_gle}", - "errnoWithDescription_gle"_attr = errnoWithDescription(gle)); - return false; - } - } - } while (returnCode == FALSE); - - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION ptr = buffer.get(); - - unsigned int byteOffset = 0; - while (byteOffset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= returnLength) { - if (ptr->Relationship == RelationNumaNode) { + for (auto&& lpi : getLogicalProcessorInformationRecords()) { + if (lpi.Relationship == RelationNumaNode) // Non-NUMA systems report a single record of this type. - numaNodeCount++; - } - - byteOffset += sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); - ptr++; + ++numaNodeCount; } // For non-NUMA machines, the count is 1 -- cgit v1.2.1