diff options
41 files changed, 602 insertions, 231 deletions
diff --git a/buildscripts/linter/simplecpplint.py b/buildscripts/linter/simplecpplint.py index a1cd50f94d3..e0b3e4a04e2 100644 --- a/buildscripts/linter/simplecpplint.py +++ b/buildscripts/linter/simplecpplint.py @@ -126,6 +126,7 @@ class Linter: self._check_for_nonmongo_assert(linenum) self._check_for_mongo_unstructured_log(linenum) self._check_for_mongo_config_header(linenum) + self._check_for_ctype(linenum) # Relax the rule of commenting generic FCV references for files directly related to FCV # implementations. @@ -216,6 +217,12 @@ class Linter: linenum, 'mongodb/unstructuredlog', 'Illegal use of unstructured logging, ' 'this is only for local development use and should not be committed.') + def _check_for_ctype(self, linenum): + line = self.clean_lines[linenum] + if 'include <cctype>' in line or 'include <ctype.h>' in line: + self._error(linenum, 'mongodb/ctype', + 'Use of prohibited <ctype.h> or <cctype> header, use "mongo/util/ctype.h"') + def _check_for_server_side_public_license(self, copyright_offset): license_header = '''\ * This program is free software: you can redistribute it and/or modify diff --git a/src/mongo/base/parse_number.cpp b/src/mongo/base/parse_number.cpp index c904b539b42..2bb46f0ba10 100644 --- a/src/mongo/base/parse_number.cpp +++ b/src/mongo/base/parse_number.cpp @@ -32,7 +32,6 @@ #include "mongo/base/parse_number.h" #include <algorithm> -#include <cctype> #include <cerrno> #include <cstdint> #include <cstdlib> @@ -42,6 +41,7 @@ #include "mongo/base/status_with.h" #include "mongo/platform/decimal128.h" #include "mongo/platform/overflow_arithmetic.h" +#include "mongo/util/ctype.h" namespace mongo { namespace { @@ -159,8 +159,7 @@ inline StatusWith<uint64_t> parseMagnitudeFromStringWithBase(uint64_t base, StringData removeLeadingWhitespace(StringData s) { return s.substr(std::distance( - s.begin(), - std::find_if_not(s.begin(), s.end(), [](unsigned char c) { return isspace(c); }))); + s.begin(), std::find_if_not(s.begin(), s.end(), [](char c) { return ctype::isSpace(c); }))); } template <typename NumberType> @@ -210,24 +209,6 @@ Status parseNumberFromStringHelper(StringData s, return Status::OK(); } -#ifdef _WIN32 - -namespace { - -/** - * Converts ascii c-locale uppercase characters to lower case, leaves other char values - * unchanged. - */ -char toLowerAscii(char c) { - if (isascii(c) && isupper(c)) - return _tolower(c); - return c; -} - -} // namespace - -#endif // defined(_WIN32) - template <> Status parseNumberFromStringHelper<double>(StringData stringValue, double* result, @@ -241,7 +222,7 @@ Status parseNumberFromStringHelper<double>(StringData stringValue, if (stringValue.empty()) return Status(ErrorCodes::FailedToParse, "Empty string"); - if (!parser._skipLeadingWhitespace && isspace(stringValue[0])) + if (!parser._skipLeadingWhitespace && ctype::isSpace(stringValue[0])) return Status(ErrorCodes::FailedToParse, "Leading whitespace"); std::string str = stringValue.toString(); @@ -254,7 +235,8 @@ Status parseNumberFromStringHelper<double>(StringData stringValue, #ifdef _WIN32 // The Windows libc implementation of strtod cannot parse +/-infinity or nan, // so handle that here. - std::transform(str.begin(), str.end(), str.begin(), toLowerAscii); + for (char& c : str) + c = ctype::toLower(c); if (str == "nan"_sd) { *result = std::numeric_limits<double>::quiet_NaN(); if (endptr) diff --git a/src/mongo/base/string_data.h b/src/mongo/base/string_data.h index 3b7ed6f0e96..eb762771a77 100644 --- a/src/mongo/base/string_data.h +++ b/src/mongo/base/string_data.h @@ -40,6 +40,7 @@ #include "mongo/platform/compiler.h" #include "mongo/stdx/type_traits.h" +#include "mongo/util/ctype.h" #define MONGO_INCLUDE_INVARIANT_H_WHITELISTED #include "mongo/util/invariant.h" #undef MONGO_INCLUDE_INVARIANT_H_WHITELISTED @@ -238,20 +239,10 @@ inline int StringData::compare(StringData other) const { } inline bool StringData::equalCaseInsensitive(StringData other) const { - if (other.size() != size()) - return false; - - for (size_t x = 0; x < size(); x++) { - char a = _data[x]; - char b = other._data[x]; - if (a == b) - continue; - if (tolower(a) == tolower(b)) - continue; - return false; - } - - return true; + return size() == other.size() && + std::equal(begin(), end(), other.begin(), other.end(), [](char a, char b) { + return ctype::toLower(a) == ctype::toLower(b); + }); } inline void StringData::copyTo(char* dest, bool includeEndingNull) const { diff --git a/src/mongo/bson/json.cpp b/src/mongo/bson/json.cpp index a0ba4f0ec4c..8cbc550141e 100644 --- a/src/mongo/bson/json.cpp +++ b/src/mongo/bson/json.cpp @@ -31,6 +31,7 @@ #include "mongo/bson/json.h" +#include <algorithm> #include <cstdint> #include <fmt/format.h> @@ -40,6 +41,7 @@ #include "mongo/platform/decimal128.h" #include "mongo/platform/strtoll.h" #include "mongo/util/base64.h" +#include "mongo/util/ctype.h" #include "mongo/util/decimal_counter.h" #include "mongo/util/hex.h" #include "mongo/util/str.h" @@ -1199,10 +1201,7 @@ Status JParse::field(std::string* result) { return quotedString(result); } else { // Unquoted key - // 'isspace()' takes an 'int' (signed), so (default signed) 'char's get sign-extended - // and therefore 'corrupted' unless we force them to be unsigned ... 0x80 becomes - // 0xffffff80 as seen by isspace when sign-extended ... we want it to be 0x00000080 - while (_input < _input_end && isspace(*reinterpret_cast<const unsigned char*>(_input))) { + while (_input < _input_end && ctype::isSpace(*_input)) { ++_input; } if (_input >= _input_end) { @@ -1372,10 +1371,7 @@ bool JParse::readTokenImpl(const char* token, bool advance) { if (token == nullptr) { return false; } - // 'isspace()' takes an 'int' (signed), so (default signed) 'char's get sign-extended - // and therefore 'corrupted' unless we force them to be unsigned ... 0x80 becomes - // 0xffffff80 as seen by isspace when sign-extended ... we want it to be 0x00000080 - while (check < _input_end && isspace(*reinterpret_cast<const unsigned char*>(check))) { + while (check < _input_end && ctype::isSpace(*check)) { ++check; } while (*token != '\0') { @@ -1418,13 +1414,7 @@ inline bool JParse::match(char matchChar, const char* matchSet) const { bool JParse::isHexString(StringData str) const { MONGO_JSON_DEBUG("str: " << str); - std::size_t i; - for (i = 0; i < str.size(); i++) { - if (!isxdigit(str[i])) { - return false; - } - } - return true; + return std::all_of(str.begin(), str.end(), [](char c) { return ctype::isXdigit(c); }); } bool JParse::isBase64String(StringData str) const { diff --git a/src/mongo/client/mongo_uri.cpp b/src/mongo/client/mongo_uri.cpp index ae8322c086c..08454c6b3f7 100644 --- a/src/mongo/client/mongo_uri.cpp +++ b/src/mongo/client/mongo_uri.cpp @@ -47,6 +47,7 @@ #include "mongo/db/namespace_string.h" #include "mongo/stdx/utility.h" #include "mongo/util/assert_util.h" +#include "mongo/util/ctype.h" #include "mongo/util/dns_name.h" #include "mongo/util/dns_query.h" #include "mongo/util/hex.h" @@ -73,7 +74,7 @@ const std::vector<std::pair<std::string, std::string>> permittedTXTOptions = {{" */ void mongo::uriEncode(std::ostream& ss, StringData toEncode, StringData passthrough) { for (const auto& c : toEncode) { - if ((c == '-') || (c == '_') || (c == '.') || (c == '~') || isalnum(c) || + if ((c == '-') || (c == '_') || (c == '.') || (c == '~') || ctype::isAlnum(c) || (passthrough.find(c) != std::string::npos)) { ss << c; } else { diff --git a/src/mongo/client/sdam/server_selection_json_test_runner.cpp b/src/mongo/client/sdam/server_selection_json_test_runner.cpp index 943f933bf25..9fa60ea76a1 100644 --- a/src/mongo/client/sdam/server_selection_json_test_runner.cpp +++ b/src/mongo/client/sdam/server_selection_json_test_runner.cpp @@ -47,6 +47,7 @@ #include "mongo/logv2/log.h" #include "mongo/stdx/unordered_set.h" #include "mongo/util/clock_source_mock.h" +#include "mongo/util/ctype.h" #include "mongo/util/options_parser/environment.h" #include "mongo/util/options_parser/option_section.h" #include "mongo/util/options_parser/options_parser.h" @@ -286,7 +287,7 @@ private: // This can throw for test cases that have invalid read preferences. auto readPrefObj = _jsonTest.getObjectField("read_preference"); std::string mode = readPrefObj.getStringField("mode"); - mode[0] = std::tolower(mode[0]); + mode[0] = ctype::toLower(mode[0]); auto tagSetsObj = readPrefObj["tag_sets"]; auto tags = tagSetsObj ? BSONArray(readPrefObj["tag_sets"].Obj()) : BSONArray(); diff --git a/src/mongo/db/auth/security_file.cpp b/src/mongo/db/auth/security_file.cpp index 70af00a9c76..8f2d103c7b5 100644 --- a/src/mongo/db/auth/security_file.cpp +++ b/src/mongo/db/auth/security_file.cpp @@ -32,7 +32,6 @@ #include "mongo/db/auth/security_key.h" #include <algorithm> -#include <cctype> #include <string> #include <sys/stat.h> #include <vector> diff --git a/src/mongo/db/bson/dotted_path_support.cpp b/src/mongo/db/bson/dotted_path_support.cpp index 0595c8baf0f..8bfc0483f3b 100644 --- a/src/mongo/db/bson/dotted_path_support.cpp +++ b/src/mongo/db/bson/dotted_path_support.cpp @@ -31,13 +31,13 @@ #include "mongo/db/bson/dotted_path_support.h" -#include <cctype> #include <string> #include "mongo/bson/bsonelement.h" #include "mongo/bson/bsonmisc.h" #include "mongo/bson/bsonobj.h" #include "mongo/bson/bsonobjbuilder.h" +#include "mongo/util/ctype.h" namespace mongo { namespace dotted_path_support { @@ -74,9 +74,9 @@ void _extractAllElementsAlongPath(const BSONObj& obj, arrayComponents); } else if (e.type() == Array) { bool allDigits = false; - if (next.size() > 0 && std::isdigit(next[0])) { + if (next.size() > 0 && ctype::isDigit(next[0])) { unsigned temp = 1; - while (temp < next.size() && std::isdigit(next[temp])) + while (temp < next.size() && ctype::isDigit(next[temp])) temp++; allDigits = temp == next.size() || next[temp] == '.'; } diff --git a/src/mongo/db/catalog/database_impl.cpp b/src/mongo/db/catalog/database_impl.cpp index c29a631ac29..a7153041314 100644 --- a/src/mongo/db/catalog/database_impl.cpp +++ b/src/mongo/db/catalog/database_impl.cpp @@ -95,8 +95,9 @@ Status validateDBNameForWindows(StringData dbname) { "con", "prn", "aux", "nul", "com1", "com2", "com3", "com4", "com5", "com6", "com7", "com8", "com9", "lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9"}; - std::string lower(dbname.toString()); - std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower); + std::string lower{dbname}; + std::transform( + lower.begin(), lower.end(), lower.begin(), [](char c) { return ctype::toLower(c); }); if (std::count(windowsReservedNames.begin(), windowsReservedNames.end(), lower)) return Status(ErrorCodes::BadValue, diff --git a/src/mongo/db/field_ref.cpp b/src/mongo/db/field_ref.cpp index c9d5b9119a7..80a16b62d6e 100644 --- a/src/mongo/db/field_ref.cpp +++ b/src/mongo/db/field_ref.cpp @@ -32,9 +32,9 @@ #include "mongo/db/field_ref.h" #include <algorithm> -#include <cctype> #include "mongo/util/assert_util.h" +#include "mongo/util/ctype.h" namespace mongo { @@ -246,7 +246,7 @@ bool FieldRef::isNumericPathComponentStrict(StringData component) { bool FieldRef::isNumericPathComponentLenient(StringData component) { return !component.empty() && - std::all_of(component.begin(), component.end(), [](auto c) { return std::isdigit(c); }); + std::all_of(component.begin(), component.end(), [](auto c) { return ctype::isDigit(c); }); } bool FieldRef::isNumericPathComponentStrict(FieldIndex i) const { diff --git a/src/mongo/db/fts/fts_language.cpp b/src/mongo/db/fts/fts_language.cpp index a2765d350d2..654817cd261 100644 --- a/src/mongo/db/fts/fts_language.cpp +++ b/src/mongo/db/fts/fts_language.cpp @@ -30,7 +30,6 @@ #include "mongo/db/fts/fts_language.h" #include <algorithm> -#include <cctype> #include <fmt/format.h> #include <map> #include <memory> @@ -45,6 +44,7 @@ #include "mongo/db/fts/fts_unicode_phrase_matcher.h" #include "mongo/db/fts/fts_unicode_tokenizer.h" #include "mongo/util/assert_util.h" +#include "mongo/util/ctype.h" namespace mongo::fts { @@ -59,8 +59,8 @@ using namespace fmt::literals; struct LanguageStringCompare { bool operator()(StringData a, StringData b) const { return std::lexicographical_compare( - a.begin(), a.end(), b.begin(), b.end(), [](unsigned char a, unsigned char b) { - return std::tolower(a) < std::tolower(b); + a.begin(), a.end(), b.begin(), b.end(), [](char a, char b) { + return ctype::toLower(a) < ctype::toLower(b); }); } }; diff --git a/src/mongo/db/fts/unicode/string_test.cpp b/src/mongo/db/fts/unicode/string_test.cpp index a2943877b28..00931a22a10 100644 --- a/src/mongo/db/fts/unicode/string_test.cpp +++ b/src/mongo/db/fts/unicode/string_test.cpp @@ -29,11 +29,10 @@ #include "mongo/platform/basic.h" -#include <cctype> - #include "mongo/db/fts/unicode/string.h" #include "mongo/shell/linenoise_utf8.h" #include "mongo/unittest/unittest.h" +#include "mongo/util/ctype.h" #include "mongo/util/text.h" #ifdef MSC_VER @@ -114,7 +113,7 @@ TEST(UnicodeString, CaseFolding) { // Test all ascii chars. for (unsigned char ch = 0; ch <= 0x7F; ch++) { const auto upper = std::string(1, ch); - const auto lower = std::string(1, std::tolower(ch)); + const auto lower = std::string(1, ctype::toLower(ch)); if (ch) { // String's constructor doesn't handle embedded NUL bytes. ASSERT_EQUALS(lower, String(upper).toLowerToBuf(&buf, kNormal)); } diff --git a/src/mongo/db/matcher/path_internal.cpp b/src/mongo/db/matcher/path_internal.cpp index e8d4527de34..78d025445a6 100644 --- a/src/mongo/db/matcher/path_internal.cpp +++ b/src/mongo/db/matcher/path_internal.cpp @@ -29,14 +29,14 @@ #include "mongo/db/matcher/path_internal.h" +#include <algorithm> + +#include "mongo/util/ctype.h" + namespace mongo { bool isAllDigits(StringData str) { - for (unsigned i = 0; i < str.size(); i++) { - if (!isdigit(str[i])) - return false; - } - return true; + return std::all_of(str.begin(), str.end(), [](char c) { return ctype::isDigit(c); }); } BSONElement getFieldDottedOrArray(const BSONObj& doc, diff --git a/src/mongo/db/pipeline/document_source_match.cpp b/src/mongo/db/pipeline/document_source_match.cpp index a8c2e8e31d6..cf45cd689f0 100644 --- a/src/mongo/db/pipeline/document_source_match.cpp +++ b/src/mongo/db/pipeline/document_source_match.cpp @@ -31,6 +31,7 @@ #include "mongo/db/pipeline/document_source_match.h" +#include <algorithm> #include <memory> #include "mongo/db/exec/document_value/document.h" @@ -43,6 +44,7 @@ #include "mongo/db/pipeline/document_path_support.h" #include "mongo/db/pipeline/expression.h" #include "mongo/db/pipeline/lite_parsed_document_source.h" +#include "mongo/util/ctype.h" #include "mongo/util/str.h" namespace mongo { @@ -144,14 +146,8 @@ namespace { // input is well formed. bool isAllDigits(StringData str) { - if (str.empty()) - return false; - - for (size_t i = 0; i < str.size(); i++) { - if (!isdigit(str[i])) - return false; - } - return true; + return !str.empty() && + std::all_of(str.begin(), str.end(), [](char c) { return ctype::isDigit(c); }); } bool isFieldnameRedactSafe(StringData fieldName) { diff --git a/src/mongo/db/query/collation/collator_interface_mock.cpp b/src/mongo/db/query/collation/collator_interface_mock.cpp index 0ace0847984..1c896c9f9eb 100644 --- a/src/mongo/db/query/collation/collator_interface_mock.cpp +++ b/src/mongo/db/query/collation/collator_interface_mock.cpp @@ -32,11 +32,11 @@ #include "mongo/db/query/collation/collator_interface_mock.h" #include <algorithm> -#include <cctype> #include <memory> #include <string> #include "mongo/util/assert_util.h" +#include "mongo/util/str.h" namespace mongo { @@ -77,15 +77,8 @@ int CollatorInterfaceMock::compare(StringData left, StringData right) const { StringData rightReversed(rightString); return leftReversed.compare(rightReversed); } - case MockType::kToLowerString: { - std::string leftString = left.toString(); - std::string rightString = right.toString(); - std::transform(leftString.begin(), leftString.end(), leftString.begin(), ::tolower); - std::transform(rightString.begin(), rightString.end(), rightString.begin(), ::tolower); - StringData leftLower(leftString); - StringData rightLower(rightString); - return leftLower.compare(rightLower); - } + case MockType::kToLowerString: + return str::toLower(left).compare(str::toLower(right)); case MockType::kAlwaysEqual: return 0; } @@ -101,12 +94,8 @@ CollatorInterface::ComparisonKey CollatorInterfaceMock::getComparisonKey( std::reverse(keyDataString.begin(), keyDataString.end()); return makeComparisonKey(std::move(keyDataString)); } - case MockType::kToLowerString: { - std::string keyDataString = stringData.toString(); - std::transform( - keyDataString.begin(), keyDataString.end(), keyDataString.begin(), ::tolower); - return makeComparisonKey(std::move(keyDataString)); - } + case MockType::kToLowerString: + return makeComparisonKey(str::toLower(stringData)); case MockType::kAlwaysEqual: return makeComparisonKey("always_equal"); } diff --git a/src/mongo/db/query/datetime/date_time_support.cpp b/src/mongo/db/query/datetime/date_time_support.cpp index a0c56878a89..9af89734beb 100644 --- a/src/mongo/db/query/datetime/date_time_support.cpp +++ b/src/mongo/db/query/datetime/date_time_support.cpp @@ -41,6 +41,7 @@ #include "mongo/bson/util/builder.h" #include "mongo/db/service_context.h" #include "mongo/util/assert_util.h" +#include "mongo/util/ctype.h" #include "mongo/util/duration.h" #include "mongo/util/str.h" @@ -312,7 +313,8 @@ boost::optional<Seconds> TimeZoneDatabase::parseUtcOffset(StringData offsetSpec) auto bias = offsetSpec[0] == '+' ? 1 : -1; // ±HH - if (offsetSpec.size() == 3 && isdigit(offsetSpec[1]) && isdigit(offsetSpec[2])) { + if (offsetSpec.size() == 3 && ctype::isDigit(offsetSpec[1]) && + ctype::isDigit(offsetSpec[2])) { int offset; if (NumberParser().base(10)(offsetSpec.substr(1, 2), &offset).isOK()) { return duration_cast<Seconds>(Hours(bias * offset)); @@ -321,8 +323,9 @@ boost::optional<Seconds> TimeZoneDatabase::parseUtcOffset(StringData offsetSpec) } // ±HHMM - if (offsetSpec.size() == 5 && isdigit(offsetSpec[1]) && isdigit(offsetSpec[2]) && - isdigit(offsetSpec[3]) && isdigit(offsetSpec[4])) { + if (offsetSpec.size() == 5 && ctype::isDigit(offsetSpec[1]) && + ctype::isDigit(offsetSpec[2]) && ctype::isDigit(offsetSpec[3]) && + ctype::isDigit(offsetSpec[4])) { int offset; if (NumberParser().base(10)(offsetSpec.substr(1, 4), &offset).isOK()) { return duration_cast<Seconds>(Hours(bias * (offset / 100L)) + @@ -332,8 +335,9 @@ boost::optional<Seconds> TimeZoneDatabase::parseUtcOffset(StringData offsetSpec) } // ±HH:MM - if (offsetSpec.size() == 6 && isdigit(offsetSpec[1]) && isdigit(offsetSpec[2]) && - offsetSpec[3] == ':' && isdigit(offsetSpec[4]) && isdigit(offsetSpec[5])) { + if (offsetSpec.size() == 6 && ctype::isDigit(offsetSpec[1]) && + ctype::isDigit(offsetSpec[2]) && offsetSpec[3] == ':' && + ctype::isDigit(offsetSpec[4]) && ctype::isDigit(offsetSpec[5])) { int hourOffset, minuteOffset; if (!NumberParser().base(10)(offsetSpec.substr(1, 2), &hourOffset).isOK()) { return boost::none; diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp index 98386d60579..54f2846cdc0 100644 --- a/src/mongo/db/query/index_bounds_builder.cpp +++ b/src/mongo/db/query/index_bounds_builder.cpp @@ -51,6 +51,7 @@ #include "mongo/db/query/planner_wildcard_helpers.h" #include "mongo/db/query/query_knobs_gen.h" #include "mongo/logv2/log.h" +#include "mongo/util/ctype.h" #include "mongo/util/str.h" #include "third_party/s2/s2cell.h" #include "third_party/s2/s2regioncoverer.h" @@ -264,7 +265,7 @@ string IndexBoundsBuilder::simpleRegex(const char* regex, // comment r = ss; break; - } else if (extended && isspace(c)) { + } else if (extended && ctype::isSpace(c)) { continue; } else { // self-matching char diff --git a/src/mongo/db/repl/idempotency_update_sequence_test.cpp b/src/mongo/db/repl/idempotency_update_sequence_test.cpp index 6402d3ee95c..d46607f194b 100644 --- a/src/mongo/db/repl/idempotency_update_sequence_test.cpp +++ b/src/mongo/db/repl/idempotency_update_sequence_test.cpp @@ -30,7 +30,6 @@ #include "mongo/platform/basic.h" #include <algorithm> -#include <cctype> #include <memory> #include "mongo/db/field_ref.h" diff --git a/src/mongo/platform/decimal128.cpp b/src/mongo/platform/decimal128.cpp index 3d0e06f42d5..65eaf7eaef4 100644 --- a/src/mongo/platform/decimal128.cpp +++ b/src/mongo/platform/decimal128.cpp @@ -32,7 +32,6 @@ #include "mongo/platform/basic.h" #include <algorithm> -#include <cctype> #include <cmath> #include <cstdlib> #include <iostream> @@ -51,21 +50,16 @@ #include "mongo/config.h" #include "mongo/platform/endian.h" #include "mongo/util/assert_util.h" +#include "mongo/util/ctype.h" #include "mongo/util/str.h" -namespace { +namespace mongo { -std::string toAsciiLowerCase(mongo::StringData input) { - std::string res = input.toString(); - for (char& c : res) { - c = tolower(c); - } - return res; -} +namespace { // Returns the number of characters consumed from input string. If unable to parse, // it returns 0. -size_t validateInputString(mongo::StringData input, std::uint32_t* signalingFlags) { +size_t validateInputString(StringData input, std::uint32_t* signalingFlags) { // Input must be of these forms: // * Valid decimal (standard or scientific notation): // /[-+]?\d*(.\d+)?([e][+\-]?\d+)?/ @@ -77,18 +71,18 @@ size_t validateInputString(mongo::StringData input, std::uint32_t* signalingFlag // Check for NaN and Infinity size_t start = (isSigned) ? 1 : 0; size_t charsConsumed = start; - mongo::StringData noSign = input.substr(start); + StringData noSign = input.substr(start); bool isNanOrInf = noSign == "nan" || noSign == "inf" || noSign == "infinity"; if (isNanOrInf) return start + noSign.size(); // Input starting with non digit - if (!std::isdigit(noSign[0])) { + if (!ctype::isDigit(noSign[0])) { if (noSign[0] != '.') { - *signalingFlags = mongo::Decimal128::SignalingFlag::kInvalid; + *signalingFlags = Decimal128::SignalingFlag::kInvalid; return 0; } else if (noSign.size() == 1) { - *signalingFlags = mongo::Decimal128::SignalingFlag::kInvalid; + *signalingFlags = Decimal128::SignalingFlag::kInvalid; return 0; } } @@ -102,11 +96,11 @@ size_t validateInputString(mongo::StringData input, std::uint32_t* signalingFlag char c = noSign[i]; if (c == '.') { if (parsedDot) { - *signalingFlags = mongo::Decimal128::SignalingFlag::kInvalid; + *signalingFlags = Decimal128::SignalingFlag::kInvalid; return 0; } parsedDot = true; - } else if (!std::isdigit(c)) { + } else if (!ctype::isDigit(c)) { break; } else { hasCoefficient = true; @@ -119,7 +113,7 @@ size_t validateInputString(mongo::StringData input, std::uint32_t* signalingFlag if (isZero) { // Override inexact/overflow flag set by the intel library - *signalingFlags = mongo::Decimal128::SignalingFlag::kNoFlag; + *signalingFlags = Decimal128::SignalingFlag::kNoFlag; } // Input is valid if we've parsed the entire string @@ -129,21 +123,21 @@ size_t validateInputString(mongo::StringData input, std::uint32_t* signalingFlag // String with empty coefficient and non-empty exponent if (!hasCoefficient) { - *signalingFlags = mongo::Decimal128::SignalingFlag::kInvalid; + *signalingFlags = Decimal128::SignalingFlag::kInvalid; return 0; } // Check exponent - mongo::StringData exponent = noSign.substr(i); + StringData exponent = noSign.substr(i); if (exponent[0] != 'e' || exponent.size() < 2) { - *signalingFlags = mongo::Decimal128::SignalingFlag::kInvalid; + *signalingFlags = Decimal128::SignalingFlag::kInvalid; return 0; } if (exponent[1] == '-' || exponent[1] == '+') { exponent = exponent.substr(2); if (exponent.size() == 0) { - *signalingFlags = mongo::Decimal128::SignalingFlag::kInvalid; + *signalingFlags = Decimal128::SignalingFlag::kInvalid; return 0; } charsConsumed += 2; @@ -152,21 +146,13 @@ size_t validateInputString(mongo::StringData input, std::uint32_t* signalingFlag ++charsConsumed; } - for (size_t j = 0; j < exponent.size(); j++) { - char c = exponent[j]; - if (!std::isdigit(c)) { - *signalingFlags = mongo::Decimal128::SignalingFlag::kInvalid; - return 0; - } - ++charsConsumed; + if (!std::all_of(exponent.begin(), exponent.end(), [](char c) { return ctype::isDigit(c); })) { + *signalingFlags = Decimal128::SignalingFlag::kInvalid; + return 0; } + charsConsumed += exponent.size(); return charsConsumed; } -} // namespace - -namespace mongo { - -namespace { // Determine system's endian ordering in order to construct decimal 128 values directly constexpr bool kNativeLittle = (endian::Order::kNative == endian::Order::kLittle); @@ -313,7 +299,7 @@ Decimal128::Decimal128(std::string stringValue, std::uint32_t* signalingFlags, RoundingMode roundMode, size_t* charsConsumed) { - std::string lower = toAsciiLowerCase(stringValue); + std::string lower = str::toLower(stringValue); BID_UINT128 dec128; // The intel library function requires a char * while c_str() returns a const char*. // We're using const_cast here since the library function should not modify the input. diff --git a/src/mongo/platform/strcasestr.cpp b/src/mongo/platform/strcasestr.cpp index a9c5a9ce09d..9c5ffe23a3c 100644 --- a/src/mongo/platform/strcasestr.cpp +++ b/src/mongo/platform/strcasestr.cpp @@ -39,7 +39,6 @@ #if defined(_WIN32) || defined(__sun) #include <algorithm> -#include <cctype> #include <cstring> #include <string> @@ -49,6 +48,9 @@ #define STRCASESTR_EMULATION_NAME strcasestr #endif +#include "mongo/util/ctype.h" +#include "mongo/util/str.h" + namespace mongo { namespace pal { @@ -60,18 +62,13 @@ namespace pal { * @return ptr to start of 'needle' within 'haystack' if found, NULL otherwise */ const char* STRCASESTR_EMULATION_NAME(const char* haystack, const char* needle) { - std::string haystackLower(haystack); - std::transform(haystackLower.begin(), haystackLower.end(), haystackLower.begin(), ::tolower); - - std::string needleLower(needle); - std::transform(needleLower.begin(), needleLower.end(), needleLower.begin(), ::tolower); - - // Use strstr() to find 'lowercased needle' in 'lowercased haystack' - // If found, use the location to compute the matching location in the original string - // If not found, return NULL - const char* haystackLowerStart = haystackLower.c_str(); - const char* location = strstr(haystackLowerStart, needleLower.c_str()); - return location ? (haystack + (location - haystackLowerStart)) : nullptr; + StringData hay(haystack); + StringData pat(needle); + auto caseEq = [](char a, char b) { return ctype::toLower(a) == ctype::toLower(b); }; + auto pos = std::search(hay.begin(), hay.end(), pat.begin(), pat.end(), caseEq); + if (pos == hay.end()) + return nullptr; + return haystack + (pos - hay.begin()); } #if defined(__sun) diff --git a/src/mongo/scripting/engine.cpp b/src/mongo/scripting/engine.cpp index 42ccc5f7154..90f29d579cf 100644 --- a/src/mongo/scripting/engine.cpp +++ b/src/mongo/scripting/engine.cpp @@ -33,15 +33,17 @@ #include "mongo/scripting/engine.h" +#include <algorithm> #include <boost/filesystem/operations.hpp> -#include <cctype> +#include "mongo/base/string_data.h" #include "mongo/client/dbclient_base.h" #include "mongo/client/dbclient_cursor.h" #include "mongo/db/operation_context.h" #include "mongo/db/service_context.h" #include "mongo/logv2/log.h" #include "mongo/scripting/dbdirectclient_factory.h" +#include "mongo/util/ctype.h" #include "mongo/util/fail_point.h" #include "mongo/util/file.h" #include "mongo/util/text.h" @@ -207,8 +209,10 @@ void Scope::storedFuncMod(OperationContext* opCtx) { void Scope::validateObjectIdString(const string& str) { uassert(10448, "invalid object id: length", str.size() == 24); - for (size_t i = 0; i < str.size(); i++) - uassert(10430, "invalid object id: not hex", std::isxdigit(str.at(i))); + auto isAllHex = [](StringData s) { + return std::all_of(s.begin(), s.end(), [](char c) { return ctype::isXdigit(c); }); + }; + uassert(10430, "invalid object id: not hex", isAllHex(str)); } void Scope::loadStored(OperationContext* opCtx, bool ignoreNotConnected) { @@ -615,12 +619,13 @@ bool hasJSReturn(const string& code) { // return is at start OR preceded by space // AND return is not followed by digit or letter - return (x == 0 || isspace(code[x - 1])) && !(isalpha(code[x + 6]) || isdigit(code[x + 6])); + return (x == 0 || ctype::isSpace(code[x - 1])) && + !(ctype::isAlpha(code[x + 6]) || ctype::isDigit(code[x + 6])); } const char* jsSkipWhiteSpace(const char* raw) { while (raw[0]) { - while (isspace(*raw)) { + while (ctype::isSpace(*raw)) { ++raw; } if (raw[0] != '/' || raw[1] != '/') diff --git a/src/mongo/scripting/mozjs/bindata.cpp b/src/mongo/scripting/mozjs/bindata.cpp index f66e6064956..36c37700e20 100644 --- a/src/mongo/scripting/mozjs/bindata.cpp +++ b/src/mongo/scripting/mozjs/bindata.cpp @@ -31,7 +31,6 @@ #include "mongo/scripting/mozjs/bindata.h" -#include <cctype> #include <iomanip> #include "mongo/bson/bsonobjbuilder.h" diff --git a/src/mongo/shell/linenoise.cpp b/src/mongo/shell/linenoise.cpp index 901b4f3c76f..7c6865e4392 100644 --- a/src/mongo/shell/linenoise.cpp +++ b/src/mongo/shell/linenoise.cpp @@ -97,7 +97,6 @@ #else /* _WIN32 */ -#include <cctype> #include <signal.h> #include <stdlib.h> #include <string.h> @@ -105,13 +104,13 @@ #include <sys/types.h> #include <termios.h> #include <unistd.h> -#include <wctype.h> #endif /* _WIN32 */ #include "linenoise.h" #include "linenoise_utf8.h" #include "mk_wcwidth.h" +#include <cwctype> #include <errno.h> #include <fcntl.h> #include <memory> @@ -1968,7 +1967,7 @@ int InputBuffer::incrementalHistorySearch(PromptBase& pi, int startChar) { } static bool isCharacterAlphanumeric(UChar32 testChar) { - return iswalnum(testChar); + return std::iswalnum(testChar); } int InputBuffer::getInputLine(PromptBase& pi) { diff --git a/src/mongo/shell/mongo_main.cpp b/src/mongo/shell/mongo_main.cpp index b9567df8f9b..4a4069077f7 100644 --- a/src/mongo/shell/mongo_main.cpp +++ b/src/mongo/shell/mongo_main.cpp @@ -38,7 +38,6 @@ #include <boost/log/attributes/value_extraction.hpp> #include <boost/log/core.hpp> #include <boost/log/sinks.hpp> -#include <cctype> #include <fstream> #include <iostream> #include <pcrecpp.h> @@ -73,6 +72,7 @@ #include "mongo/shell/shell_utils_launcher.h" #include "mongo/stdx/utility.h" #include "mongo/transport/transport_layer_asio.h" +#include "mongo/util/ctype.h" #include "mongo/util/errno_util.h" #include "mongo/util/exit.h" #include "mongo/util/file.h" @@ -469,7 +469,7 @@ std::string getURIFromArgs(const std::string& arg, const auto colonPos = arg.find(':'); if ((colonPos != std::string::npos) && ((colonPos + 1) < arg.size()) && - isdigit(arg[colonPos + 1])) { + ctype::isDigit(arg[colonPos + 1])) { // Assume IPv4 or hostname with port. return parseDbHost("test", arg); } @@ -544,7 +544,7 @@ static void edit(const std::string& whatToEdit) { // "whatToEdit" might look like a variable/property name bool editingVariable = true; for (const char* p = whatToEdit.c_str(); *p; ++p) { - if (!(isalnum(*p) || *p == '_' || *p == '.')) { + if (!(ctype::isAlnum(*p) || *p == '_' || *p == '.')) { editingVariable = false; break; } @@ -1062,7 +1062,7 @@ int mongo_main(int argc, char* argv[]) { shellHistoryAdd(linePtr); const char* s = linePtr + 5; // skip "edit " - while (*s && isspace(*s)) + while (*s && ctype::isSpace(*s)) s++; edit(s); diff --git a/src/mongo/shell/shell_utils.cpp b/src/mongo/shell/shell_utils.cpp index 4dc05908256..4fe70659c20 100644 --- a/src/mongo/shell/shell_utils.cpp +++ b/src/mongo/shell/shell_utils.cpp @@ -35,7 +35,6 @@ #include <algorithm> #include <boost/filesystem.hpp> -#include <cctype> #include <memory> #include <set> #include <stdlib.h> @@ -59,6 +58,7 @@ #include "mongo/shell/shell_options.h" #include "mongo/shell/shell_utils_extended.h" #include "mongo/shell/shell_utils_launcher.h" +#include "mongo/util/ctype.h" #include "mongo/util/fail_point.h" #include "mongo/util/processinfo.h" #include "mongo/util/quick_exit.h" @@ -256,7 +256,7 @@ bool isBalanced(const std::string& code) { } if ("~!%^&*-+=|:,<>/?."_sd.find(code[i]) != std::string::npos) danglingOp = true; - else if (!std::isspace(code[i])) + else if (!ctype::isSpace(code[i])) danglingOp = false; } diff --git a/src/mongo/shell/shell_utils_launcher.cpp b/src/mongo/shell/shell_utils_launcher.cpp index d52ebcfefc6..2de721aada7 100644 --- a/src/mongo/shell/shell_utils_launcher.cpp +++ b/src/mongo/shell/shell_utils_launcher.cpp @@ -39,7 +39,6 @@ #include <boost/iostreams/stream.hpp> #include <boost/iostreams/stream_buffer.hpp> #include <boost/iostreams/tee.hpp> -#include <cctype> #include <fcntl.h> #include <fmt/format.h> #include <iostream> @@ -71,6 +70,7 @@ #include "mongo/shell/shell_options.h" #include "mongo/shell/shell_utils.h" #include "mongo/util/assert_util.h" +#include "mongo/util/ctype.h" #include "mongo/util/destructor_guard.h" #include "mongo/util/exit.h" #include "mongo/util/net/hostandport.h" @@ -591,14 +591,8 @@ boost::filesystem::path ProgramRunner::findProgram(const string& prog) { // needs to be appended. // - auto isExtensionValid = [](std::string extension) { - for (auto c : extension) { - if (std::isdigit(c)) { - return false; - } - } - - return true; + auto isExtensionValid = [](std::string e) { + return std::all_of(e.begin(), e.end(), [](char c) { return !ctype::isDigit(c); }); }; if (!p.has_extension() || !isExtensionValid(p.extension().string())) { diff --git a/src/mongo/util/SConscript b/src/mongo/util/SConscript index 7b4b56c6540..1e759de47dc 100644 --- a/src/mongo/util/SConscript +++ b/src/mongo/util/SConscript @@ -619,6 +619,7 @@ icuEnv.CppUnitTest( 'clock_source_mock_test.cpp', 'concepts_test.cpp', 'container_size_helper_test.cpp', + 'ctype_test.cpp', 'decimal_counter_test.cpp', 'decorable_test.cpp', 'diagnostic_info_test.cpp' if get_option('use-diagnostic-latches') == 'on' else [], @@ -768,3 +769,8 @@ stacktraceEnv.Benchmark( # See above for how to handle any future LIBDEPS additions here. # LIBDEPS=... ) + +env.Benchmark( + target='string_bm', + source='string_bm.cpp', +) diff --git a/src/mongo/util/ctype.h b/src/mongo/util/ctype.h new file mode 100644 index 00000000000..a3880e281a8 --- /dev/null +++ b/src/mongo/util/ctype.h @@ -0,0 +1,212 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +/** + * Replacements for <cctype> or <ctype.h> functions and macros. + * These should be used instead of the corresponding standard functions. + * Note the camel-case spelling to distinguish these from the C++ functions + * and especially the C macros. + * + * Regarding the capitalization of these functions: POSIX defines standard + * identifiers for the 12 character classes. Each "is"- function here directly + * references and evokes such a POSIX identifier, so they are not + * camel-cased as ordinary English phrases (so `isAlnum` not `isAlNum`). + * + * <https://en.wikipedia.org/wiki/Regular_expression#Character_classes> + * + * Problems with the standard functions: + * + * - They accept int (to accept the EOF of -1 and integrate with cstdio). + * Passing negative char values other than EOF is undefined behavior! + * They cannot be used directly in std algorithms operating on char + * arguments because of this, say to `std::transform` or `std::find_if` + * on a `std::string`. You need a lambda and it has to do a cast. + * - Most are locale dependent, so they have to be slow. Dropping + * locale makes the "is"- functions 200% faster. + * - They return int instead of bool for C compatibility. Undesirable in C++. + * - In C they are macros, so they are very different entities depending on + * the subtle choice of #include <cctype> vs #include <ctype.h>. + * - Support for the EOF value bloats the lookup tables and carves out a + * surprising special case. + * + * The `<cctype>` character classification functions are a subtle source of bugs. + * See warnings at <https://en.cppreference.com/w/cpp/header/cctype>. + * + * The proper call sequence is often not done, creating bugs. So + * here are some more suitable C++17 implementations. We can make our versions + * constexpr and noexcept because they don't depend on the locale or other + * dynamic program state. + */ + +#pragma once + +#include <array> + +namespace mongo::ctype { +namespace detail { + +/** Define a bit position for each character class queryable with this API. */ +enum ClassBit : uint16_t { + kUpper = 1 << 0, //< [upper] UPPERCASE + kLower = 1 << 1, //< [lower] lowercase + kAlpha = 1 << 2, //< [alpha] Alphabetic (upper case or lower case) + kDigit = 1 << 3, //< [digit] Decimal digit + kXdigit = 1 << 4, //< [xdigit] Hexadecimal digit (upper case or lower case: [0-9A-Fa-f]) + kSpace = 1 << 5, //< [space] Whitespace ([ \t\r\n\f\v]) + kPrint = 1 << 6, //< [print] Printing (non-control chars) + kGraph = 1 << 7, //< [graph] Graphical (non-control, non-whitespace) + kBlank = 1 << 8, //< [blank] Blank (' ', '\t') + kCntrl = 1 << 9, //< [cntrl] Control character: 0x00-0x1f, and 0x7f (DEL) + kPunct = 1 << 10, //< [punct] Punctuation (graphical, but not alphanumeric) + kAlnum = 1 << 11 //< [alnum] Alphanumeric (letter or digit) +}; + +/** Returns the bitwise-or of all `ClassBit` pertinent to character `c`. */ +constexpr uint16_t calculateClassBits(unsigned char c) { + if (c >= 0x80) + return 0; + uint16_t r = 0; + if (c <= 0x1f || c == 0x7f) + r |= kCntrl; + if (!(r & kCntrl)) + r |= kPrint; + if (c == '\t' || c == ' ') + r |= kBlank; + if ((r & kBlank) || c == '\n' || c == '\v' || c == '\f' || c == '\r') + r |= kSpace; + if (c >= 'A' && c <= 'Z') + r |= kUpper; + if (c >= 'a' && c <= 'z') + r |= kLower; + if (c >= '0' && c <= '9') + r |= kDigit; + if ((r & kUpper) || (r & kLower)) + r |= kAlpha; + if ((r & kDigit) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')) + r |= kXdigit; + if ((r & kAlpha) || (r & kDigit)) + r |= kAlnum; + if ((r & kPrint) && !(r & kSpace)) + r |= kGraph; + if ((r & kGraph) && !(r & kAlnum)) + r |= kPunct; + return r; +} + +/** The character class memberships for each char. */ +constexpr auto chClassTable = [] { + std::array<uint16_t, 256> arr{}; + for (size_t i = 0; i < arr.size(); ++i) + arr[i] = calculateClassBits(i); + return arr; +}(); + +constexpr bool isMember(char c, uint16_t mask) { + return chClassTable[static_cast<unsigned char>(c)] & mask; +} + +/** Lookup table for `toUpper`. */ +constexpr auto chUpperTable = [] { + std::array<char, 256> arr{}; + for (size_t i = 0; i < arr.size(); ++i) + arr[i] = isMember(i, kLower) ? 'A' + (i - 'a') : i; + return arr; +}(); + +/** Lookup table for `toLower`. */ +constexpr auto chLowerTable = [] { + std::array<char, 256> arr{}; + for (size_t i = 0; i < arr.size(); ++i) + arr[i] = isMember(i, kUpper) ? 'a' + (i - 'A') : i; + return arr; +}(); + +} // namespace detail + + +/** + * These 12 "is"- functions exactly match the <cctype> definitions for the + * POSIX (or C) locale. See the corresponding definitions in <cctype>. + * <https://en.cppreference.com/w/cpp/header/cctype> + * See notes above. + */ +constexpr bool isAlnum(char c) noexcept { + return detail::isMember(c, detail::kAlnum); +} +constexpr bool isAlpha(char c) noexcept { + return detail::isMember(c, detail::kAlpha); +} +constexpr bool isLower(char c) noexcept { + return detail::isMember(c, detail::kLower); +} +constexpr bool isUpper(char c) noexcept { + return detail::isMember(c, detail::kUpper); +} +constexpr bool isDigit(char c) noexcept { + return detail::isMember(c, detail::kDigit); +} +constexpr bool isXdigit(char c) noexcept { + return detail::isMember(c, detail::kXdigit); +} +constexpr bool isCntrl(char c) noexcept { + return detail::isMember(c, detail::kCntrl); +} +constexpr bool isGraph(char c) noexcept { + return detail::isMember(c, detail::kGraph); +} +constexpr bool isSpace(char c) noexcept { + return detail::isMember(c, detail::kSpace); +} +constexpr bool isBlank(char c) noexcept { + return detail::isMember(c, detail::kBlank); +} +constexpr bool isPrint(char c) noexcept { + return detail::isMember(c, detail::kPrint); +} +constexpr bool isPunct(char c) noexcept { + return detail::isMember(c, detail::kPunct); +} + +/** + * Returns the upper case of `c` if `c` is lower case, otherwise `c`. + * Unlike `std::toupper`, is not affected by locale. See notes above. + */ +constexpr char toUpper(char c) noexcept { + return detail::chUpperTable[static_cast<unsigned char>(c)]; +} + +/** + * Returns the lower case of `c` if `c` is upper case, otherwise `c`. + * Unlike `std::tolower`, is not affected by locale. See notes above. + */ +constexpr char toLower(char c) noexcept { + return detail::chLowerTable[static_cast<unsigned char>(c)]; +} + +} // namespace mongo::ctype diff --git a/src/mongo/util/ctype_test.cpp b/src/mongo/util/ctype_test.cpp new file mode 100644 index 00000000000..55c122dfd6e --- /dev/null +++ b/src/mongo/util/ctype_test.cpp @@ -0,0 +1,109 @@ +/** + * Copyright (C) 2020-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest + +#include "mongo/platform/basic.h" + +#include <boost/optional.hpp> +#include <fmt/format.h> + +#include "mongo/logv2/log.h" +#include "mongo/unittest/unittest.h" +#include "mongo/util/ctype.h" +#include "mongo/util/hex.h" + +namespace mongo::ctype { +namespace { + +using namespace fmt::literals; + +TEST(Ctype, MatchesCxxStdlib) { + for (size_t i = 0; i < 256; ++i) { + char c = i; + unsigned char uc = i; + const std::string msg = " i={:02x}"_format(i); + ASSERT_EQ(isAlnum(c), (bool)std::isalnum(uc)) << msg; + ASSERT_EQ(isAlpha(c), (bool)std::isalpha(uc)) << msg; + ASSERT_EQ(isLower(c), (bool)std::islower(uc)) << msg; + ASSERT_EQ(isUpper(c), (bool)std::isupper(uc)) << msg; + ASSERT_EQ(isDigit(c), (bool)std::isdigit(uc)) << msg; + ASSERT_EQ(isXdigit(c), (bool)std::isxdigit(uc)) << msg; + ASSERT_EQ(isCntrl(c), (bool)std::iscntrl(uc)) << msg; + ASSERT_EQ(isGraph(c), (bool)std::isgraph(uc)) << msg; + ASSERT_EQ(isSpace(c), (bool)std::isspace(uc)) << msg; + ASSERT_EQ(isBlank(c), (bool)std::isblank(uc)) << msg; + ASSERT_EQ(isPrint(c), (bool)std::isprint(uc)) << msg; + ASSERT_EQ(isPunct(c), (bool)std::ispunct(uc)) << msg; + ASSERT_EQ(toLower(c), (char)std::tolower(uc)) << msg; + ASSERT_EQ(toUpper(c), (char)std::toupper(uc)) << msg; + } +} + +TEST(Ctype, MatchesCStdlib) { + for (size_t i = 0; i < 256; ++i) { + char c = i; + unsigned char uc = i; + const std::string msg = " i={:02x}"_format(i); + ASSERT_EQ(isAlnum(c), (bool)isalnum(uc)) << msg; + ASSERT_EQ(isAlpha(c), (bool)isalpha(uc)) << msg; + ASSERT_EQ(isLower(c), (bool)islower(uc)) << msg; + ASSERT_EQ(isUpper(c), (bool)isupper(uc)) << msg; + ASSERT_EQ(isDigit(c), (bool)isdigit(uc)) << msg; + ASSERT_EQ(isXdigit(c), (bool)isxdigit(uc)) << msg; + ASSERT_EQ(isCntrl(c), (bool)iscntrl(uc)) << msg; + ASSERT_EQ(isGraph(c), (bool)isgraph(uc)) << msg; + ASSERT_EQ(isSpace(c), (bool)isspace(uc)) << msg; + ASSERT_EQ(isBlank(c), (bool)isblank(uc)) << msg; + ASSERT_EQ(isPrint(c), (bool)isprint(uc)) << msg; + ASSERT_EQ(isPunct(c), (bool)ispunct(uc)) << msg; + ASSERT_EQ(toLower(c), (char)tolower(uc)) << msg; + ASSERT_EQ(toUpper(c), (char)toupper(uc)) << msg; + } +} + +TEST(Ctype, IsConstexpr) { + MONGO_STATIC_ASSERT(isAlnum('a')); + MONGO_STATIC_ASSERT(isAlpha('a')); + MONGO_STATIC_ASSERT(isLower('a')); + MONGO_STATIC_ASSERT(!isUpper('a')); + MONGO_STATIC_ASSERT(!isDigit('a')); + MONGO_STATIC_ASSERT(isXdigit('a')); + MONGO_STATIC_ASSERT(!isCntrl('a')); + MONGO_STATIC_ASSERT(isGraph('a')); + MONGO_STATIC_ASSERT(!isSpace('a')); + MONGO_STATIC_ASSERT(!isBlank('a')); + MONGO_STATIC_ASSERT(isPrint('a')); + MONGO_STATIC_ASSERT(!isPunct('a')); + MONGO_STATIC_ASSERT(toLower('a') == 'a'); + MONGO_STATIC_ASSERT(toUpper('a') == 'A'); +} + +} // namespace +} // namespace mongo::ctype diff --git a/src/mongo/util/dns_name.h b/src/mongo/util/dns_name.h index f48eb17a9b6..c1472769f07 100644 --- a/src/mongo/util/dns_name.h +++ b/src/mongo/util/dns_name.h @@ -30,7 +30,6 @@ #pragma once #include <algorithm> -#include <cctype> #include <iostream> #include <iterator> #include <sstream> @@ -41,6 +40,7 @@ #include "mongo/base/string_data.h" #include "mongo/bson/util/builder.h" #include "mongo/util/assert_util.h" +#include "mongo/util/ctype.h" namespace mongo { namespace dns { @@ -117,13 +117,13 @@ public: // We permit dashes and numbers. We also permit underscores for use with SRV records // and such. - if (!(ch == '-' || std::isalnum(ch) || (ch == '_' && parserState == kFirstLetter))) { + if (!(ch == '-' || ctype::isAlnum(ch) || (ch == '_' && parserState == kFirstLetter))) { uasserted(ErrorCodes::DNSRecordTypeMismatch, "A Domain Name cannot have tokens other than dash or alphanumerics."); } // All domain names are represented in lower-case letters, because DNS is case // insensitive. - name.push_back(std::tolower(ch)); + name.push_back(ctype::toLower(ch)); if (parserState == kFirstLetter) { parserState = kNonPeriod; } @@ -367,7 +367,7 @@ private: bool isEquivalentToIPv4DottedDecimal() const { return !_fullyQualified && _nameComponents.size() == 4 && std::all_of(begin(_nameComponents), end(_nameComponents), [](const auto& s) { - return std::all_of(begin(s), end(s), [](char c) { return std::isdigit(c); }); + return std::all_of(begin(s), end(s), [](char c) { return ctype::isDigit(c); }); }); } diff --git a/src/mongo/util/hex.cpp b/src/mongo/util/hex.cpp index 21a81a91dfc..5f92ca6a8a9 100644 --- a/src/mongo/util/hex.cpp +++ b/src/mongo/util/hex.cpp @@ -30,12 +30,12 @@ #include "mongo/util/hex.h" #include <algorithm> -#include <cctype> #include <fmt/format.h> #include <iterator> #include <string> #include "mongo/base/error_codes.h" +#include "mongo/util/ctype.h" namespace mongo { @@ -85,7 +85,7 @@ unsigned char decodePair(StringData c) { bool validate(StringData s) { // There must be an even number of characters, since each pair encodes a single byte. return s.size() % 2 == 0 && - std::all_of(s.begin(), s.end(), [](unsigned char c) { return std::isxdigit(c); }); + std::all_of(s.begin(), s.end(), [](auto c) { return ctype::isXdigit(c); }); } std::string encode(StringData data) { diff --git a/src/mongo/util/net/ssl_manager.cpp b/src/mongo/util/net/ssl_manager.cpp index f33cb9df0bf..73884cabae0 100644 --- a/src/mongo/util/net/ssl_manager.cpp +++ b/src/mongo/util/net/ssl_manager.cpp @@ -48,6 +48,7 @@ #include "mongo/platform/overflow_arithmetic.h" #include "mongo/transport/session.h" #include "mongo/transport/transport_layer_asio.h" +#include "mongo/util/ctype.h" #include "mongo/util/hex.h" #include "mongo/util/icu.h" #include "mongo/util/net/ssl_options.h" @@ -62,20 +63,6 @@ SSLManagerCoordinator* theSSLManagerCoordinator; namespace { -// Some of these duplicate the std::isalpha/std::isxdigit because we don't want them to be -// affected by the current locale. -inline bool isAlpha(char ch) { - return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'); -} - -inline bool isDigit(char ch) { - return (ch >= '0' && ch <= '9'); -} - -inline bool isHex(char ch) { - return isDigit(ch) || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f'); -} - // This function returns true if the character is supposed to be escaped according to the rules // in RFC4514. The exception to the RFC the space character ' ' and the '#', because we've not // required users to escape spaces or sharps in DNs in the past. @@ -164,12 +151,12 @@ std::string RFC4514Parser::extractAttributeName() { std::function<bool(char ch)> characterCheck; // If the first character is a digit, then this is an OID and can only contain // numbers and '.' - if (isDigit(ch)) { - characterCheck = [](char ch) { return (isDigit(ch) || ch == '.'); }; + if (ctype::isDigit(ch)) { + characterCheck = [](char ch) { return ctype::isDigit(ch) || ch == '.'; }; // If the first character is an alpha, then this is a short name and can only // contain alpha/digit/hyphen characters. - } else if (isAlpha(ch)) { - characterCheck = [](char ch) { return (isAlpha(ch) || isDigit(ch) || ch == '-'); }; + } else if (ctype::isAlpha(ch)) { + characterCheck = [](char ch) { return ctype::isAlnum(ch) || ch == '-'; }; // Otherwise this is an invalid attribute name } else { uasserted(ErrorCodes::BadValue, @@ -214,13 +201,13 @@ std::pair<std::string, RFC4514Parser::ValueTerminator> RFC4514Parser::extractVal if (isEscaped(ch)) { sb << ch; trailingSpaces = 0; - } else if (isHex(ch)) { + } else if (ctype::isXdigit(ch)) { const std::array<char, 2> hexValStr = {ch, _advance()}; uassert(ErrorCodes::BadValue, str::stream() << "Escaped hex value contains invalid character \'" << hexValStr[1] << "\'", - isHex(hexValStr[1])); + ctype::isXdigit(hexValStr[1])); const char hexVal = hexblob::decodePair(StringData(hexValStr.data(), 2)); sb << hexVal; if (hexVal != ' ') { diff --git a/src/mongo/util/net/ssl_options.cpp b/src/mongo/util/net/ssl_options.cpp index a142e3a917a..3cdf4da1610 100644 --- a/src/mongo/util/net/ssl_options.cpp +++ b/src/mongo/util/net/ssl_options.cpp @@ -36,6 +36,7 @@ #include "mongo/base/status.h" #include "mongo/config.h" #include "mongo/db/server_options.h" +#include "mongo/util/ctype.h" #include "mongo/util/hex.h" #include "mongo/util/options_parser/startup_options.h" #include "mongo/util/text.h" @@ -57,7 +58,7 @@ std::vector<uint8_t> hexToVector(StringData hex) { std::string data = hexblob::decode(hex); return std::vector<uint8_t>(data.begin(), data.end()); } catch (const ExceptionFor<ErrorCodes::FailedToParse>&) { - if (std::any_of(hex.begin(), hex.end(), [](unsigned char c) { return !isxdigit(c); })) { + if (std::any_of(hex.begin(), hex.end(), [](char c) { return !ctype::isXdigit(c); })) { uasserted(ErrorCodes::BadValue, "Not a valid hex string"); } if (hex.size() % 2) { diff --git a/src/mongo/util/options_parser/options_parser.cpp b/src/mongo/util/options_parser/options_parser.cpp index 39a38544805..28600197c77 100644 --- a/src/mongo/util/options_parser/options_parser.cpp +++ b/src/mongo/util/options_parser/options_parser.cpp @@ -37,7 +37,6 @@ #include <boost/iostreams/stream.hpp> #include <boost/iostreams/stream_buffer.hpp> #include <boost/program_options.hpp> -#include <cctype> #include <cerrno> #include <fcntl.h> #include <fstream> @@ -58,6 +57,7 @@ #include "mongo/db/json.h" #include "mongo/logv2/log.h" #include "mongo/util/assert_util.h" +#include "mongo/util/ctype.h" #include "mongo/util/hex.h" #include "mongo/util/net/hostandport.h" #include "mongo/util/net/http_client.h" @@ -450,10 +450,10 @@ public: if (_trim == Trim::kWhitespace) { size_t start = 0; size_t end = str.size(); - while ((start < end) && std::isspace(str[start])) { + while ((start < end) && ctype::isSpace(str[start])) { ++start; } - while ((start < end) && std::isspace(str[end - 1])) { + while ((start < end) && ctype::isSpace(str[end - 1])) { --end; } if ((start > 0) || (end < str.size())) { diff --git a/src/mongo/util/processinfo_linux.cpp b/src/mongo/util/processinfo_linux.cpp index b4b67c4b91b..96735c90792 100644 --- a/src/mongo/util/processinfo_linux.cpp +++ b/src/mongo/util/processinfo_linux.cpp @@ -58,6 +58,7 @@ #include <pcrecpp.h> #include "mongo/logv2/log.h" +#include "mongo/util/ctype.h" #include "mongo/util/file.h" #define KLONG long @@ -531,7 +532,7 @@ public: lineOff = 0; // trim whitespace and append 000 to replace kB. - while (isspace(meminfo.at(lineOff))) + while (ctype::isSpace(meminfo.at(lineOff))) lineOff++; meminfo = meminfo.substr(lineOff); diff --git a/src/mongo/util/stacktrace.cpp b/src/mongo/util/stacktrace.cpp index 2725d40d200..07ab0a8ab68 100644 --- a/src/mongo/util/stacktrace.cpp +++ b/src/mongo/util/stacktrace.cpp @@ -31,12 +31,11 @@ #include "mongo/util/stacktrace.h" -#include <cctype> - #include "mongo/bson/bsonobj.h" #include "mongo/bson/json.h" #include "mongo/logv2/log.h" #include "mongo/util/assert_util.h" +#include "mongo/util/ctype.h" namespace mongo::stack_trace_detail { namespace { @@ -82,7 +81,7 @@ StringData Hex::toHex(uint64_t x, Buf& buf, bool showBase) { uint64_t Hex::fromHex(StringData s) { uint64_t x = 0; for (char c : s) { - char uc = std::toupper(static_cast<unsigned char>(c)); + char uc = ctype::toUpper(c); if (size_t pos = kDigits<16>.find(uc); pos == std::string::npos) { return x; } else { diff --git a/src/mongo/util/stacktrace_threads.cpp b/src/mongo/util/stacktrace_threads.cpp index 05d6bcacb0d..62a5b020a2c 100644 --- a/src/mongo/util/stacktrace_threads.cpp +++ b/src/mongo/util/stacktrace_threads.cpp @@ -36,7 +36,6 @@ #include <array> #include <atomic> #include <boost/filesystem.hpp> -#include <cctype> #include <cstdint> #include <cstdlib> #include <dirent.h> diff --git a/src/mongo/util/str.cpp b/src/mongo/util/str.cpp index 024a6bca1e3..bc4d4ec4fe5 100644 --- a/src/mongo/util/str.cpp +++ b/src/mongo/util/str.cpp @@ -29,9 +29,8 @@ #include "mongo/platform/basic.h" -#include <cctype> - #include "mongo/base/parse_number.h" +#include "mongo/util/ctype.h" #include "mongo/util/hex.h" #include "mongo/util/str.h" @@ -90,8 +89,8 @@ int LexNumCmp::cmp(StringData sd1, StringData sd2, bool lexOnly) { return -1; if (!lexOnly) { - bool n1 = isdigit(sd1[s1]); - bool n2 = isdigit(sd2[s2]); + bool n1 = ctype::isDigit(sd1[s1]); + bool n2 = ctype::isDigit(sd2[s2]); if (n1 && n2) { // get rid of leading 0s @@ -105,9 +104,9 @@ int LexNumCmp::cmp(StringData sd1, StringData sd2, bool lexOnly) { size_t e1 = s1; size_t e2 = s2; - while (e1 < sd1.size() && isdigit(sd1[e1])) + while (e1 < sd1.size() && ctype::isDigit(sd1[e1])) e1++; - while (e2 < sd2.size() && isdigit(sd2[e2])) + while (e2 < sd2.size() && ctype::isDigit(sd2[e2])) e2++; size_t len1 = e1 - s1; @@ -225,7 +224,7 @@ std::string escape(StringData sd, bool escape_slash) { boost::optional<size_t> parseUnsignedBase10Integer(StringData fieldName) { // Do not accept positions like '-4' or '+4' - if (!std::isdigit(fieldName[0])) { + if (!ctype::isDigit(fieldName[0])) { return boost::none; } unsigned int index; diff --git a/src/mongo/util/str.h b/src/mongo/util/str.h index 1466dea2466..d90e8a10e93 100644 --- a/src/mongo/util/str.h +++ b/src/mongo/util/str.h @@ -35,8 +35,8 @@ * TODO: De-inline. */ +#include <algorithm> #include <boost/optional.hpp> -#include <ctype.h> #include <memory> #include <sstream> #include <string> @@ -45,6 +45,7 @@ #include "mongo/base/string_data.h" #include "mongo/bson/util/builder.h" #include "mongo/platform/bits.h" +#include "mongo/util/ctype.h" namespace mongo::str { @@ -201,7 +202,7 @@ inline unsigned toUnsigned(const std::string& a) { unsigned x = 0; const char* p = a.c_str(); while (1) { - if (!isdigit(*p)) + if (!ctype::isDigit(*p)) break; x = x * 10 + (*p - '0'); p++; @@ -365,17 +366,10 @@ void splitStringDelim(const std::string& str, std::vector<std::string>* res, cha void joinStringDelim(const std::vector<std::string>& strs, std::string* res, char delim); inline std::string toLower(StringData input) { - std::string::size_type sz = input.size(); - - std::unique_ptr<char[]> line(new char[sz + 1]); - char* copy = line.get(); - - for (std::string::size_type i = 0; i < sz; i++) { - char c = input[i]; - copy[i] = (char)tolower((int)c); - } - copy[sz] = 0; - return copy; + std::string r{input}; + for (char& c : r) + c = ctype::toLower(c); + return r; } /** Functor for combining lexical and numeric comparisons. */ diff --git a/src/mongo/util/str_test.cpp b/src/mongo/util/str_test.cpp index 68decc11214..31913d38e0c 100644 --- a/src/mongo/util/str_test.cpp +++ b/src/mongo/util/str_test.cpp @@ -32,6 +32,7 @@ #include "mongo/unittest/unittest.h" +#include "mongo/util/ctype.h" #include "mongo/util/hex.h" #include "mongo/util/str.h" @@ -74,7 +75,7 @@ void assertCmp(int expected, StringData s1, StringData s2, bool lexOnly = false) } TEST(StringUtilsTest, Simple2) { - ASSERT(!isdigit((char)255)); + ASSERT(!ctype::isDigit((char)255)); assertCmp(0, "a", "a"); assertCmp(-1, "a", "aa"); diff --git a/src/mongo/util/string_bm.cpp b/src/mongo/util/string_bm.cpp new file mode 100644 index 00000000000..3dc5fd2a24c --- /dev/null +++ b/src/mongo/util/string_bm.cpp @@ -0,0 +1,123 @@ +/** + * Copyright (C) 2018-present MongoDB, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the Server Side Public License, version 1, + * as published by MongoDB, Inc. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * Server Side Public License for more details. + * + * You should have received a copy of the Server Side Public License + * along with this program. If not, see + * <http://www.mongodb.com/licensing/server-side-public-license>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the Server Side Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include <algorithm> +#include <cctype> // NOLINT +#include <functional> +#include <iomanip> +#include <sstream> +#include <string> +#include <vector> + +#include <benchmark/benchmark.h> + +#include "mongo/base/simple_string_data_comparator.h" +#include "mongo/base/string_data.h" +#include "mongo/util/ctype.h" + +// Verify the performance of our string processing algorithms. +// This can include StringData, util/str utilities, etc. + +namespace mongo { +namespace { + +std::string makeString(size_t size) { + StringData fill = "The quick brown fox jumped over the lazy dog. "; + std::string s; + while (s.size() < size) { + size_t avail = size - s.size(); + StringData fillSub = fill.substr(0, std::min(avail, fill.size())); + s.append(fillSub.begin(), fillSub.end()); + } + return s; +} + +void BM_StringDataEqualCaseInsensitive(benchmark::State& state) { + std::uint64_t items = 0; + std::string s1 = makeString(1000); + std::string s2 = s1; + StringData sd1 = s1; + for (auto _ : state) { + benchmark::DoNotOptimize(sd1.equalCaseInsensitive(s2)); + ++items; + } + state.SetItemsProcessed(items); +} +BENCHMARK(BM_StringDataEqualCaseInsensitive); + +void BM_StdToLower(benchmark::State& state) { + std::uint64_t items = 0; + std::string s1 = makeString(1000); + for (auto _ : state) { + for (char& c : s1) + benchmark::DoNotOptimize(c = std::tolower(c)); + ++items; + } + state.SetItemsProcessed(items); +} +BENCHMARK(BM_StdToLower); + +void BM_MongoCtypeToLower(benchmark::State& state) { + std::uint64_t items = 0; + std::string s1 = makeString(1000); + for (auto _ : state) { + for (char& c : s1) + benchmark::DoNotOptimize(c = ctype::toLower(c)); + ++items; + } + state.SetItemsProcessed(items); +} +BENCHMARK(BM_MongoCtypeToLower); + +void BM_StdIsAlpha(benchmark::State& state) { + std::uint64_t items = 0; + std::string s1 = makeString(1000); + for (auto _ : state) { + for (char& c : s1) + benchmark::DoNotOptimize(std::isalpha(c)); + ++items; + } + state.SetItemsProcessed(items); +} +BENCHMARK(BM_StdIsAlpha); + +void BM_MongoCtypeIsAlpha(benchmark::State& state) { + std::uint64_t items = 0; + std::string s1 = makeString(1000); + for (auto _ : state) { + for (char& c : s1) + benchmark::DoNotOptimize(ctype::isAlpha(c)); + ++items; + } + state.SetItemsProcessed(items); +} +BENCHMARK(BM_MongoCtypeIsAlpha); + +} // namespace +} // namespace mongo |