summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--buildscripts/linter/simplecpplint.py7
-rw-r--r--src/mongo/base/parse_number.cpp28
-rw-r--r--src/mongo/base/string_data.h19
-rw-r--r--src/mongo/bson/json.cpp20
-rw-r--r--src/mongo/client/mongo_uri.cpp3
-rw-r--r--src/mongo/client/sdam/server_selection_json_test_runner.cpp3
-rw-r--r--src/mongo/db/auth/security_file.cpp1
-rw-r--r--src/mongo/db/bson/dotted_path_support.cpp6
-rw-r--r--src/mongo/db/catalog/database_impl.cpp5
-rw-r--r--src/mongo/db/field_ref.cpp4
-rw-r--r--src/mongo/db/fts/fts_language.cpp6
-rw-r--r--src/mongo/db/fts/unicode/string_test.cpp5
-rw-r--r--src/mongo/db/matcher/path_internal.cpp10
-rw-r--r--src/mongo/db/pipeline/document_source_match.cpp12
-rw-r--r--src/mongo/db/query/collation/collator_interface_mock.cpp21
-rw-r--r--src/mongo/db/query/datetime/date_time_support.cpp14
-rw-r--r--src/mongo/db/query/index_bounds_builder.cpp3
-rw-r--r--src/mongo/db/repl/idempotency_update_sequence_test.cpp1
-rw-r--r--src/mongo/platform/decimal128.cpp54
-rw-r--r--src/mongo/platform/strcasestr.cpp23
-rw-r--r--src/mongo/scripting/engine.cpp15
-rw-r--r--src/mongo/scripting/mozjs/bindata.cpp1
-rw-r--r--src/mongo/shell/linenoise.cpp5
-rw-r--r--src/mongo/shell/mongo_main.cpp8
-rw-r--r--src/mongo/shell/shell_utils.cpp4
-rw-r--r--src/mongo/shell/shell_utils_launcher.cpp12
-rw-r--r--src/mongo/util/SConscript6
-rw-r--r--src/mongo/util/ctype.h212
-rw-r--r--src/mongo/util/ctype_test.cpp109
-rw-r--r--src/mongo/util/dns_name.h8
-rw-r--r--src/mongo/util/hex.cpp4
-rw-r--r--src/mongo/util/net/ssl_manager.cpp27
-rw-r--r--src/mongo/util/net/ssl_options.cpp3
-rw-r--r--src/mongo/util/options_parser/options_parser.cpp6
-rw-r--r--src/mongo/util/processinfo_linux.cpp3
-rw-r--r--src/mongo/util/stacktrace.cpp5
-rw-r--r--src/mongo/util/stacktrace_threads.cpp1
-rw-r--r--src/mongo/util/str.cpp13
-rw-r--r--src/mongo/util/str.h20
-rw-r--r--src/mongo/util/str_test.cpp3
-rw-r--r--src/mongo/util/string_bm.cpp123
41 files changed, 602 insertions, 231 deletions
diff --git a/buildscripts/linter/simplecpplint.py b/buildscripts/linter/simplecpplint.py
index a1cd50f94d3..e0b3e4a04e2 100644
--- a/buildscripts/linter/simplecpplint.py
+++ b/buildscripts/linter/simplecpplint.py
@@ -126,6 +126,7 @@ class Linter:
self._check_for_nonmongo_assert(linenum)
self._check_for_mongo_unstructured_log(linenum)
self._check_for_mongo_config_header(linenum)
+ self._check_for_ctype(linenum)
# Relax the rule of commenting generic FCV references for files directly related to FCV
# implementations.
@@ -216,6 +217,12 @@ class Linter:
linenum, 'mongodb/unstructuredlog', 'Illegal use of unstructured logging, '
'this is only for local development use and should not be committed.')
+ def _check_for_ctype(self, linenum):
+ line = self.clean_lines[linenum]
+ if 'include <cctype>' in line or 'include <ctype.h>' in line:
+ self._error(linenum, 'mongodb/ctype',
+ 'Use of prohibited <ctype.h> or <cctype> header, use "mongo/util/ctype.h"')
+
def _check_for_server_side_public_license(self, copyright_offset):
license_header = '''\
* This program is free software: you can redistribute it and/or modify
diff --git a/src/mongo/base/parse_number.cpp b/src/mongo/base/parse_number.cpp
index c904b539b42..2bb46f0ba10 100644
--- a/src/mongo/base/parse_number.cpp
+++ b/src/mongo/base/parse_number.cpp
@@ -32,7 +32,6 @@
#include "mongo/base/parse_number.h"
#include <algorithm>
-#include <cctype>
#include <cerrno>
#include <cstdint>
#include <cstdlib>
@@ -42,6 +41,7 @@
#include "mongo/base/status_with.h"
#include "mongo/platform/decimal128.h"
#include "mongo/platform/overflow_arithmetic.h"
+#include "mongo/util/ctype.h"
namespace mongo {
namespace {
@@ -159,8 +159,7 @@ inline StatusWith<uint64_t> parseMagnitudeFromStringWithBase(uint64_t base,
StringData removeLeadingWhitespace(StringData s) {
return s.substr(std::distance(
- s.begin(),
- std::find_if_not(s.begin(), s.end(), [](unsigned char c) { return isspace(c); })));
+ s.begin(), std::find_if_not(s.begin(), s.end(), [](char c) { return ctype::isSpace(c); })));
}
template <typename NumberType>
@@ -210,24 +209,6 @@ Status parseNumberFromStringHelper(StringData s,
return Status::OK();
}
-#ifdef _WIN32
-
-namespace {
-
-/**
- * Converts ascii c-locale uppercase characters to lower case, leaves other char values
- * unchanged.
- */
-char toLowerAscii(char c) {
- if (isascii(c) && isupper(c))
- return _tolower(c);
- return c;
-}
-
-} // namespace
-
-#endif // defined(_WIN32)
-
template <>
Status parseNumberFromStringHelper<double>(StringData stringValue,
double* result,
@@ -241,7 +222,7 @@ Status parseNumberFromStringHelper<double>(StringData stringValue,
if (stringValue.empty())
return Status(ErrorCodes::FailedToParse, "Empty string");
- if (!parser._skipLeadingWhitespace && isspace(stringValue[0]))
+ if (!parser._skipLeadingWhitespace && ctype::isSpace(stringValue[0]))
return Status(ErrorCodes::FailedToParse, "Leading whitespace");
std::string str = stringValue.toString();
@@ -254,7 +235,8 @@ Status parseNumberFromStringHelper<double>(StringData stringValue,
#ifdef _WIN32
// The Windows libc implementation of strtod cannot parse +/-infinity or nan,
// so handle that here.
- std::transform(str.begin(), str.end(), str.begin(), toLowerAscii);
+ for (char& c : str)
+ c = ctype::toLower(c);
if (str == "nan"_sd) {
*result = std::numeric_limits<double>::quiet_NaN();
if (endptr)
diff --git a/src/mongo/base/string_data.h b/src/mongo/base/string_data.h
index 3b7ed6f0e96..eb762771a77 100644
--- a/src/mongo/base/string_data.h
+++ b/src/mongo/base/string_data.h
@@ -40,6 +40,7 @@
#include "mongo/platform/compiler.h"
#include "mongo/stdx/type_traits.h"
+#include "mongo/util/ctype.h"
#define MONGO_INCLUDE_INVARIANT_H_WHITELISTED
#include "mongo/util/invariant.h"
#undef MONGO_INCLUDE_INVARIANT_H_WHITELISTED
@@ -238,20 +239,10 @@ inline int StringData::compare(StringData other) const {
}
inline bool StringData::equalCaseInsensitive(StringData other) const {
- if (other.size() != size())
- return false;
-
- for (size_t x = 0; x < size(); x++) {
- char a = _data[x];
- char b = other._data[x];
- if (a == b)
- continue;
- if (tolower(a) == tolower(b))
- continue;
- return false;
- }
-
- return true;
+ return size() == other.size() &&
+ std::equal(begin(), end(), other.begin(), other.end(), [](char a, char b) {
+ return ctype::toLower(a) == ctype::toLower(b);
+ });
}
inline void StringData::copyTo(char* dest, bool includeEndingNull) const {
diff --git a/src/mongo/bson/json.cpp b/src/mongo/bson/json.cpp
index a0ba4f0ec4c..8cbc550141e 100644
--- a/src/mongo/bson/json.cpp
+++ b/src/mongo/bson/json.cpp
@@ -31,6 +31,7 @@
#include "mongo/bson/json.h"
+#include <algorithm>
#include <cstdint>
#include <fmt/format.h>
@@ -40,6 +41,7 @@
#include "mongo/platform/decimal128.h"
#include "mongo/platform/strtoll.h"
#include "mongo/util/base64.h"
+#include "mongo/util/ctype.h"
#include "mongo/util/decimal_counter.h"
#include "mongo/util/hex.h"
#include "mongo/util/str.h"
@@ -1199,10 +1201,7 @@ Status JParse::field(std::string* result) {
return quotedString(result);
} else {
// Unquoted key
- // 'isspace()' takes an 'int' (signed), so (default signed) 'char's get sign-extended
- // and therefore 'corrupted' unless we force them to be unsigned ... 0x80 becomes
- // 0xffffff80 as seen by isspace when sign-extended ... we want it to be 0x00000080
- while (_input < _input_end && isspace(*reinterpret_cast<const unsigned char*>(_input))) {
+ while (_input < _input_end && ctype::isSpace(*_input)) {
++_input;
}
if (_input >= _input_end) {
@@ -1372,10 +1371,7 @@ bool JParse::readTokenImpl(const char* token, bool advance) {
if (token == nullptr) {
return false;
}
- // 'isspace()' takes an 'int' (signed), so (default signed) 'char's get sign-extended
- // and therefore 'corrupted' unless we force them to be unsigned ... 0x80 becomes
- // 0xffffff80 as seen by isspace when sign-extended ... we want it to be 0x00000080
- while (check < _input_end && isspace(*reinterpret_cast<const unsigned char*>(check))) {
+ while (check < _input_end && ctype::isSpace(*check)) {
++check;
}
while (*token != '\0') {
@@ -1418,13 +1414,7 @@ inline bool JParse::match(char matchChar, const char* matchSet) const {
bool JParse::isHexString(StringData str) const {
MONGO_JSON_DEBUG("str: " << str);
- std::size_t i;
- for (i = 0; i < str.size(); i++) {
- if (!isxdigit(str[i])) {
- return false;
- }
- }
- return true;
+ return std::all_of(str.begin(), str.end(), [](char c) { return ctype::isXdigit(c); });
}
bool JParse::isBase64String(StringData str) const {
diff --git a/src/mongo/client/mongo_uri.cpp b/src/mongo/client/mongo_uri.cpp
index ae8322c086c..08454c6b3f7 100644
--- a/src/mongo/client/mongo_uri.cpp
+++ b/src/mongo/client/mongo_uri.cpp
@@ -47,6 +47,7 @@
#include "mongo/db/namespace_string.h"
#include "mongo/stdx/utility.h"
#include "mongo/util/assert_util.h"
+#include "mongo/util/ctype.h"
#include "mongo/util/dns_name.h"
#include "mongo/util/dns_query.h"
#include "mongo/util/hex.h"
@@ -73,7 +74,7 @@ const std::vector<std::pair<std::string, std::string>> permittedTXTOptions = {{"
*/
void mongo::uriEncode(std::ostream& ss, StringData toEncode, StringData passthrough) {
for (const auto& c : toEncode) {
- if ((c == '-') || (c == '_') || (c == '.') || (c == '~') || isalnum(c) ||
+ if ((c == '-') || (c == '_') || (c == '.') || (c == '~') || ctype::isAlnum(c) ||
(passthrough.find(c) != std::string::npos)) {
ss << c;
} else {
diff --git a/src/mongo/client/sdam/server_selection_json_test_runner.cpp b/src/mongo/client/sdam/server_selection_json_test_runner.cpp
index 943f933bf25..9fa60ea76a1 100644
--- a/src/mongo/client/sdam/server_selection_json_test_runner.cpp
+++ b/src/mongo/client/sdam/server_selection_json_test_runner.cpp
@@ -47,6 +47,7 @@
#include "mongo/logv2/log.h"
#include "mongo/stdx/unordered_set.h"
#include "mongo/util/clock_source_mock.h"
+#include "mongo/util/ctype.h"
#include "mongo/util/options_parser/environment.h"
#include "mongo/util/options_parser/option_section.h"
#include "mongo/util/options_parser/options_parser.h"
@@ -286,7 +287,7 @@ private:
// This can throw for test cases that have invalid read preferences.
auto readPrefObj = _jsonTest.getObjectField("read_preference");
std::string mode = readPrefObj.getStringField("mode");
- mode[0] = std::tolower(mode[0]);
+ mode[0] = ctype::toLower(mode[0]);
auto tagSetsObj = readPrefObj["tag_sets"];
auto tags = tagSetsObj ? BSONArray(readPrefObj["tag_sets"].Obj()) : BSONArray();
diff --git a/src/mongo/db/auth/security_file.cpp b/src/mongo/db/auth/security_file.cpp
index 70af00a9c76..8f2d103c7b5 100644
--- a/src/mongo/db/auth/security_file.cpp
+++ b/src/mongo/db/auth/security_file.cpp
@@ -32,7 +32,6 @@
#include "mongo/db/auth/security_key.h"
#include <algorithm>
-#include <cctype>
#include <string>
#include <sys/stat.h>
#include <vector>
diff --git a/src/mongo/db/bson/dotted_path_support.cpp b/src/mongo/db/bson/dotted_path_support.cpp
index 0595c8baf0f..8bfc0483f3b 100644
--- a/src/mongo/db/bson/dotted_path_support.cpp
+++ b/src/mongo/db/bson/dotted_path_support.cpp
@@ -31,13 +31,13 @@
#include "mongo/db/bson/dotted_path_support.h"
-#include <cctype>
#include <string>
#include "mongo/bson/bsonelement.h"
#include "mongo/bson/bsonmisc.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/util/ctype.h"
namespace mongo {
namespace dotted_path_support {
@@ -74,9 +74,9 @@ void _extractAllElementsAlongPath(const BSONObj& obj,
arrayComponents);
} else if (e.type() == Array) {
bool allDigits = false;
- if (next.size() > 0 && std::isdigit(next[0])) {
+ if (next.size() > 0 && ctype::isDigit(next[0])) {
unsigned temp = 1;
- while (temp < next.size() && std::isdigit(next[temp]))
+ while (temp < next.size() && ctype::isDigit(next[temp]))
temp++;
allDigits = temp == next.size() || next[temp] == '.';
}
diff --git a/src/mongo/db/catalog/database_impl.cpp b/src/mongo/db/catalog/database_impl.cpp
index c29a631ac29..a7153041314 100644
--- a/src/mongo/db/catalog/database_impl.cpp
+++ b/src/mongo/db/catalog/database_impl.cpp
@@ -95,8 +95,9 @@ Status validateDBNameForWindows(StringData dbname) {
"con", "prn", "aux", "nul", "com1", "com2", "com3", "com4", "com5", "com6", "com7",
"com8", "com9", "lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9"};
- std::string lower(dbname.toString());
- std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower);
+ std::string lower{dbname};
+ std::transform(
+ lower.begin(), lower.end(), lower.begin(), [](char c) { return ctype::toLower(c); });
if (std::count(windowsReservedNames.begin(), windowsReservedNames.end(), lower))
return Status(ErrorCodes::BadValue,
diff --git a/src/mongo/db/field_ref.cpp b/src/mongo/db/field_ref.cpp
index c9d5b9119a7..80a16b62d6e 100644
--- a/src/mongo/db/field_ref.cpp
+++ b/src/mongo/db/field_ref.cpp
@@ -32,9 +32,9 @@
#include "mongo/db/field_ref.h"
#include <algorithm>
-#include <cctype>
#include "mongo/util/assert_util.h"
+#include "mongo/util/ctype.h"
namespace mongo {
@@ -246,7 +246,7 @@ bool FieldRef::isNumericPathComponentStrict(StringData component) {
bool FieldRef::isNumericPathComponentLenient(StringData component) {
return !component.empty() &&
- std::all_of(component.begin(), component.end(), [](auto c) { return std::isdigit(c); });
+ std::all_of(component.begin(), component.end(), [](auto c) { return ctype::isDigit(c); });
}
bool FieldRef::isNumericPathComponentStrict(FieldIndex i) const {
diff --git a/src/mongo/db/fts/fts_language.cpp b/src/mongo/db/fts/fts_language.cpp
index a2765d350d2..654817cd261 100644
--- a/src/mongo/db/fts/fts_language.cpp
+++ b/src/mongo/db/fts/fts_language.cpp
@@ -30,7 +30,6 @@
#include "mongo/db/fts/fts_language.h"
#include <algorithm>
-#include <cctype>
#include <fmt/format.h>
#include <map>
#include <memory>
@@ -45,6 +44,7 @@
#include "mongo/db/fts/fts_unicode_phrase_matcher.h"
#include "mongo/db/fts/fts_unicode_tokenizer.h"
#include "mongo/util/assert_util.h"
+#include "mongo/util/ctype.h"
namespace mongo::fts {
@@ -59,8 +59,8 @@ using namespace fmt::literals;
struct LanguageStringCompare {
bool operator()(StringData a, StringData b) const {
return std::lexicographical_compare(
- a.begin(), a.end(), b.begin(), b.end(), [](unsigned char a, unsigned char b) {
- return std::tolower(a) < std::tolower(b);
+ a.begin(), a.end(), b.begin(), b.end(), [](char a, char b) {
+ return ctype::toLower(a) < ctype::toLower(b);
});
}
};
diff --git a/src/mongo/db/fts/unicode/string_test.cpp b/src/mongo/db/fts/unicode/string_test.cpp
index a2943877b28..00931a22a10 100644
--- a/src/mongo/db/fts/unicode/string_test.cpp
+++ b/src/mongo/db/fts/unicode/string_test.cpp
@@ -29,11 +29,10 @@
#include "mongo/platform/basic.h"
-#include <cctype>
-
#include "mongo/db/fts/unicode/string.h"
#include "mongo/shell/linenoise_utf8.h"
#include "mongo/unittest/unittest.h"
+#include "mongo/util/ctype.h"
#include "mongo/util/text.h"
#ifdef MSC_VER
@@ -114,7 +113,7 @@ TEST(UnicodeString, CaseFolding) {
// Test all ascii chars.
for (unsigned char ch = 0; ch <= 0x7F; ch++) {
const auto upper = std::string(1, ch);
- const auto lower = std::string(1, std::tolower(ch));
+ const auto lower = std::string(1, ctype::toLower(ch));
if (ch) { // String's constructor doesn't handle embedded NUL bytes.
ASSERT_EQUALS(lower, String(upper).toLowerToBuf(&buf, kNormal));
}
diff --git a/src/mongo/db/matcher/path_internal.cpp b/src/mongo/db/matcher/path_internal.cpp
index e8d4527de34..78d025445a6 100644
--- a/src/mongo/db/matcher/path_internal.cpp
+++ b/src/mongo/db/matcher/path_internal.cpp
@@ -29,14 +29,14 @@
#include "mongo/db/matcher/path_internal.h"
+#include <algorithm>
+
+#include "mongo/util/ctype.h"
+
namespace mongo {
bool isAllDigits(StringData str) {
- for (unsigned i = 0; i < str.size(); i++) {
- if (!isdigit(str[i]))
- return false;
- }
- return true;
+ return std::all_of(str.begin(), str.end(), [](char c) { return ctype::isDigit(c); });
}
BSONElement getFieldDottedOrArray(const BSONObj& doc,
diff --git a/src/mongo/db/pipeline/document_source_match.cpp b/src/mongo/db/pipeline/document_source_match.cpp
index a8c2e8e31d6..cf45cd689f0 100644
--- a/src/mongo/db/pipeline/document_source_match.cpp
+++ b/src/mongo/db/pipeline/document_source_match.cpp
@@ -31,6 +31,7 @@
#include "mongo/db/pipeline/document_source_match.h"
+#include <algorithm>
#include <memory>
#include "mongo/db/exec/document_value/document.h"
@@ -43,6 +44,7 @@
#include "mongo/db/pipeline/document_path_support.h"
#include "mongo/db/pipeline/expression.h"
#include "mongo/db/pipeline/lite_parsed_document_source.h"
+#include "mongo/util/ctype.h"
#include "mongo/util/str.h"
namespace mongo {
@@ -144,14 +146,8 @@ namespace {
// input is well formed.
bool isAllDigits(StringData str) {
- if (str.empty())
- return false;
-
- for (size_t i = 0; i < str.size(); i++) {
- if (!isdigit(str[i]))
- return false;
- }
- return true;
+ return !str.empty() &&
+ std::all_of(str.begin(), str.end(), [](char c) { return ctype::isDigit(c); });
}
bool isFieldnameRedactSafe(StringData fieldName) {
diff --git a/src/mongo/db/query/collation/collator_interface_mock.cpp b/src/mongo/db/query/collation/collator_interface_mock.cpp
index 0ace0847984..1c896c9f9eb 100644
--- a/src/mongo/db/query/collation/collator_interface_mock.cpp
+++ b/src/mongo/db/query/collation/collator_interface_mock.cpp
@@ -32,11 +32,11 @@
#include "mongo/db/query/collation/collator_interface_mock.h"
#include <algorithm>
-#include <cctype>
#include <memory>
#include <string>
#include "mongo/util/assert_util.h"
+#include "mongo/util/str.h"
namespace mongo {
@@ -77,15 +77,8 @@ int CollatorInterfaceMock::compare(StringData left, StringData right) const {
StringData rightReversed(rightString);
return leftReversed.compare(rightReversed);
}
- case MockType::kToLowerString: {
- std::string leftString = left.toString();
- std::string rightString = right.toString();
- std::transform(leftString.begin(), leftString.end(), leftString.begin(), ::tolower);
- std::transform(rightString.begin(), rightString.end(), rightString.begin(), ::tolower);
- StringData leftLower(leftString);
- StringData rightLower(rightString);
- return leftLower.compare(rightLower);
- }
+ case MockType::kToLowerString:
+ return str::toLower(left).compare(str::toLower(right));
case MockType::kAlwaysEqual:
return 0;
}
@@ -101,12 +94,8 @@ CollatorInterface::ComparisonKey CollatorInterfaceMock::getComparisonKey(
std::reverse(keyDataString.begin(), keyDataString.end());
return makeComparisonKey(std::move(keyDataString));
}
- case MockType::kToLowerString: {
- std::string keyDataString = stringData.toString();
- std::transform(
- keyDataString.begin(), keyDataString.end(), keyDataString.begin(), ::tolower);
- return makeComparisonKey(std::move(keyDataString));
- }
+ case MockType::kToLowerString:
+ return makeComparisonKey(str::toLower(stringData));
case MockType::kAlwaysEqual:
return makeComparisonKey("always_equal");
}
diff --git a/src/mongo/db/query/datetime/date_time_support.cpp b/src/mongo/db/query/datetime/date_time_support.cpp
index a0c56878a89..9af89734beb 100644
--- a/src/mongo/db/query/datetime/date_time_support.cpp
+++ b/src/mongo/db/query/datetime/date_time_support.cpp
@@ -41,6 +41,7 @@
#include "mongo/bson/util/builder.h"
#include "mongo/db/service_context.h"
#include "mongo/util/assert_util.h"
+#include "mongo/util/ctype.h"
#include "mongo/util/duration.h"
#include "mongo/util/str.h"
@@ -312,7 +313,8 @@ boost::optional<Seconds> TimeZoneDatabase::parseUtcOffset(StringData offsetSpec)
auto bias = offsetSpec[0] == '+' ? 1 : -1;
// ±HH
- if (offsetSpec.size() == 3 && isdigit(offsetSpec[1]) && isdigit(offsetSpec[2])) {
+ if (offsetSpec.size() == 3 && ctype::isDigit(offsetSpec[1]) &&
+ ctype::isDigit(offsetSpec[2])) {
int offset;
if (NumberParser().base(10)(offsetSpec.substr(1, 2), &offset).isOK()) {
return duration_cast<Seconds>(Hours(bias * offset));
@@ -321,8 +323,9 @@ boost::optional<Seconds> TimeZoneDatabase::parseUtcOffset(StringData offsetSpec)
}
// ±HHMM
- if (offsetSpec.size() == 5 && isdigit(offsetSpec[1]) && isdigit(offsetSpec[2]) &&
- isdigit(offsetSpec[3]) && isdigit(offsetSpec[4])) {
+ if (offsetSpec.size() == 5 && ctype::isDigit(offsetSpec[1]) &&
+ ctype::isDigit(offsetSpec[2]) && ctype::isDigit(offsetSpec[3]) &&
+ ctype::isDigit(offsetSpec[4])) {
int offset;
if (NumberParser().base(10)(offsetSpec.substr(1, 4), &offset).isOK()) {
return duration_cast<Seconds>(Hours(bias * (offset / 100L)) +
@@ -332,8 +335,9 @@ boost::optional<Seconds> TimeZoneDatabase::parseUtcOffset(StringData offsetSpec)
}
// ±HH:MM
- if (offsetSpec.size() == 6 && isdigit(offsetSpec[1]) && isdigit(offsetSpec[2]) &&
- offsetSpec[3] == ':' && isdigit(offsetSpec[4]) && isdigit(offsetSpec[5])) {
+ if (offsetSpec.size() == 6 && ctype::isDigit(offsetSpec[1]) &&
+ ctype::isDigit(offsetSpec[2]) && offsetSpec[3] == ':' &&
+ ctype::isDigit(offsetSpec[4]) && ctype::isDigit(offsetSpec[5])) {
int hourOffset, minuteOffset;
if (!NumberParser().base(10)(offsetSpec.substr(1, 2), &hourOffset).isOK()) {
return boost::none;
diff --git a/src/mongo/db/query/index_bounds_builder.cpp b/src/mongo/db/query/index_bounds_builder.cpp
index 98386d60579..54f2846cdc0 100644
--- a/src/mongo/db/query/index_bounds_builder.cpp
+++ b/src/mongo/db/query/index_bounds_builder.cpp
@@ -51,6 +51,7 @@
#include "mongo/db/query/planner_wildcard_helpers.h"
#include "mongo/db/query/query_knobs_gen.h"
#include "mongo/logv2/log.h"
+#include "mongo/util/ctype.h"
#include "mongo/util/str.h"
#include "third_party/s2/s2cell.h"
#include "third_party/s2/s2regioncoverer.h"
@@ -264,7 +265,7 @@ string IndexBoundsBuilder::simpleRegex(const char* regex,
// comment
r = ss;
break;
- } else if (extended && isspace(c)) {
+ } else if (extended && ctype::isSpace(c)) {
continue;
} else {
// self-matching char
diff --git a/src/mongo/db/repl/idempotency_update_sequence_test.cpp b/src/mongo/db/repl/idempotency_update_sequence_test.cpp
index 6402d3ee95c..d46607f194b 100644
--- a/src/mongo/db/repl/idempotency_update_sequence_test.cpp
+++ b/src/mongo/db/repl/idempotency_update_sequence_test.cpp
@@ -30,7 +30,6 @@
#include "mongo/platform/basic.h"
#include <algorithm>
-#include <cctype>
#include <memory>
#include "mongo/db/field_ref.h"
diff --git a/src/mongo/platform/decimal128.cpp b/src/mongo/platform/decimal128.cpp
index 3d0e06f42d5..65eaf7eaef4 100644
--- a/src/mongo/platform/decimal128.cpp
+++ b/src/mongo/platform/decimal128.cpp
@@ -32,7 +32,6 @@
#include "mongo/platform/basic.h"
#include <algorithm>
-#include <cctype>
#include <cmath>
#include <cstdlib>
#include <iostream>
@@ -51,21 +50,16 @@
#include "mongo/config.h"
#include "mongo/platform/endian.h"
#include "mongo/util/assert_util.h"
+#include "mongo/util/ctype.h"
#include "mongo/util/str.h"
-namespace {
+namespace mongo {
-std::string toAsciiLowerCase(mongo::StringData input) {
- std::string res = input.toString();
- for (char& c : res) {
- c = tolower(c);
- }
- return res;
-}
+namespace {
// Returns the number of characters consumed from input string. If unable to parse,
// it returns 0.
-size_t validateInputString(mongo::StringData input, std::uint32_t* signalingFlags) {
+size_t validateInputString(StringData input, std::uint32_t* signalingFlags) {
// Input must be of these forms:
// * Valid decimal (standard or scientific notation):
// /[-+]?\d*(.\d+)?([e][+\-]?\d+)?/
@@ -77,18 +71,18 @@ size_t validateInputString(mongo::StringData input, std::uint32_t* signalingFlag
// Check for NaN and Infinity
size_t start = (isSigned) ? 1 : 0;
size_t charsConsumed = start;
- mongo::StringData noSign = input.substr(start);
+ StringData noSign = input.substr(start);
bool isNanOrInf = noSign == "nan" || noSign == "inf" || noSign == "infinity";
if (isNanOrInf)
return start + noSign.size();
// Input starting with non digit
- if (!std::isdigit(noSign[0])) {
+ if (!ctype::isDigit(noSign[0])) {
if (noSign[0] != '.') {
- *signalingFlags = mongo::Decimal128::SignalingFlag::kInvalid;
+ *signalingFlags = Decimal128::SignalingFlag::kInvalid;
return 0;
} else if (noSign.size() == 1) {
- *signalingFlags = mongo::Decimal128::SignalingFlag::kInvalid;
+ *signalingFlags = Decimal128::SignalingFlag::kInvalid;
return 0;
}
}
@@ -102,11 +96,11 @@ size_t validateInputString(mongo::StringData input, std::uint32_t* signalingFlag
char c = noSign[i];
if (c == '.') {
if (parsedDot) {
- *signalingFlags = mongo::Decimal128::SignalingFlag::kInvalid;
+ *signalingFlags = Decimal128::SignalingFlag::kInvalid;
return 0;
}
parsedDot = true;
- } else if (!std::isdigit(c)) {
+ } else if (!ctype::isDigit(c)) {
break;
} else {
hasCoefficient = true;
@@ -119,7 +113,7 @@ size_t validateInputString(mongo::StringData input, std::uint32_t* signalingFlag
if (isZero) {
// Override inexact/overflow flag set by the intel library
- *signalingFlags = mongo::Decimal128::SignalingFlag::kNoFlag;
+ *signalingFlags = Decimal128::SignalingFlag::kNoFlag;
}
// Input is valid if we've parsed the entire string
@@ -129,21 +123,21 @@ size_t validateInputString(mongo::StringData input, std::uint32_t* signalingFlag
// String with empty coefficient and non-empty exponent
if (!hasCoefficient) {
- *signalingFlags = mongo::Decimal128::SignalingFlag::kInvalid;
+ *signalingFlags = Decimal128::SignalingFlag::kInvalid;
return 0;
}
// Check exponent
- mongo::StringData exponent = noSign.substr(i);
+ StringData exponent = noSign.substr(i);
if (exponent[0] != 'e' || exponent.size() < 2) {
- *signalingFlags = mongo::Decimal128::SignalingFlag::kInvalid;
+ *signalingFlags = Decimal128::SignalingFlag::kInvalid;
return 0;
}
if (exponent[1] == '-' || exponent[1] == '+') {
exponent = exponent.substr(2);
if (exponent.size() == 0) {
- *signalingFlags = mongo::Decimal128::SignalingFlag::kInvalid;
+ *signalingFlags = Decimal128::SignalingFlag::kInvalid;
return 0;
}
charsConsumed += 2;
@@ -152,21 +146,13 @@ size_t validateInputString(mongo::StringData input, std::uint32_t* signalingFlag
++charsConsumed;
}
- for (size_t j = 0; j < exponent.size(); j++) {
- char c = exponent[j];
- if (!std::isdigit(c)) {
- *signalingFlags = mongo::Decimal128::SignalingFlag::kInvalid;
- return 0;
- }
- ++charsConsumed;
+ if (!std::all_of(exponent.begin(), exponent.end(), [](char c) { return ctype::isDigit(c); })) {
+ *signalingFlags = Decimal128::SignalingFlag::kInvalid;
+ return 0;
}
+ charsConsumed += exponent.size();
return charsConsumed;
}
-} // namespace
-
-namespace mongo {
-
-namespace {
// Determine system's endian ordering in order to construct decimal 128 values directly
constexpr bool kNativeLittle = (endian::Order::kNative == endian::Order::kLittle);
@@ -313,7 +299,7 @@ Decimal128::Decimal128(std::string stringValue,
std::uint32_t* signalingFlags,
RoundingMode roundMode,
size_t* charsConsumed) {
- std::string lower = toAsciiLowerCase(stringValue);
+ std::string lower = str::toLower(stringValue);
BID_UINT128 dec128;
// The intel library function requires a char * while c_str() returns a const char*.
// We're using const_cast here since the library function should not modify the input.
diff --git a/src/mongo/platform/strcasestr.cpp b/src/mongo/platform/strcasestr.cpp
index a9c5a9ce09d..9c5ffe23a3c 100644
--- a/src/mongo/platform/strcasestr.cpp
+++ b/src/mongo/platform/strcasestr.cpp
@@ -39,7 +39,6 @@
#if defined(_WIN32) || defined(__sun)
#include <algorithm>
-#include <cctype>
#include <cstring>
#include <string>
@@ -49,6 +48,9 @@
#define STRCASESTR_EMULATION_NAME strcasestr
#endif
+#include "mongo/util/ctype.h"
+#include "mongo/util/str.h"
+
namespace mongo {
namespace pal {
@@ -60,18 +62,13 @@ namespace pal {
* @return ptr to start of 'needle' within 'haystack' if found, NULL otherwise
*/
const char* STRCASESTR_EMULATION_NAME(const char* haystack, const char* needle) {
- std::string haystackLower(haystack);
- std::transform(haystackLower.begin(), haystackLower.end(), haystackLower.begin(), ::tolower);
-
- std::string needleLower(needle);
- std::transform(needleLower.begin(), needleLower.end(), needleLower.begin(), ::tolower);
-
- // Use strstr() to find 'lowercased needle' in 'lowercased haystack'
- // If found, use the location to compute the matching location in the original string
- // If not found, return NULL
- const char* haystackLowerStart = haystackLower.c_str();
- const char* location = strstr(haystackLowerStart, needleLower.c_str());
- return location ? (haystack + (location - haystackLowerStart)) : nullptr;
+ StringData hay(haystack);
+ StringData pat(needle);
+ auto caseEq = [](char a, char b) { return ctype::toLower(a) == ctype::toLower(b); };
+ auto pos = std::search(hay.begin(), hay.end(), pat.begin(), pat.end(), caseEq);
+ if (pos == hay.end())
+ return nullptr;
+ return haystack + (pos - hay.begin());
}
#if defined(__sun)
diff --git a/src/mongo/scripting/engine.cpp b/src/mongo/scripting/engine.cpp
index 42ccc5f7154..90f29d579cf 100644
--- a/src/mongo/scripting/engine.cpp
+++ b/src/mongo/scripting/engine.cpp
@@ -33,15 +33,17 @@
#include "mongo/scripting/engine.h"
+#include <algorithm>
#include <boost/filesystem/operations.hpp>
-#include <cctype>
+#include "mongo/base/string_data.h"
#include "mongo/client/dbclient_base.h"
#include "mongo/client/dbclient_cursor.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/service_context.h"
#include "mongo/logv2/log.h"
#include "mongo/scripting/dbdirectclient_factory.h"
+#include "mongo/util/ctype.h"
#include "mongo/util/fail_point.h"
#include "mongo/util/file.h"
#include "mongo/util/text.h"
@@ -207,8 +209,10 @@ void Scope::storedFuncMod(OperationContext* opCtx) {
void Scope::validateObjectIdString(const string& str) {
uassert(10448, "invalid object id: length", str.size() == 24);
- for (size_t i = 0; i < str.size(); i++)
- uassert(10430, "invalid object id: not hex", std::isxdigit(str.at(i)));
+ auto isAllHex = [](StringData s) {
+ return std::all_of(s.begin(), s.end(), [](char c) { return ctype::isXdigit(c); });
+ };
+ uassert(10430, "invalid object id: not hex", isAllHex(str));
}
void Scope::loadStored(OperationContext* opCtx, bool ignoreNotConnected) {
@@ -615,12 +619,13 @@ bool hasJSReturn(const string& code) {
// return is at start OR preceded by space
// AND return is not followed by digit or letter
- return (x == 0 || isspace(code[x - 1])) && !(isalpha(code[x + 6]) || isdigit(code[x + 6]));
+ return (x == 0 || ctype::isSpace(code[x - 1])) &&
+ !(ctype::isAlpha(code[x + 6]) || ctype::isDigit(code[x + 6]));
}
const char* jsSkipWhiteSpace(const char* raw) {
while (raw[0]) {
- while (isspace(*raw)) {
+ while (ctype::isSpace(*raw)) {
++raw;
}
if (raw[0] != '/' || raw[1] != '/')
diff --git a/src/mongo/scripting/mozjs/bindata.cpp b/src/mongo/scripting/mozjs/bindata.cpp
index f66e6064956..36c37700e20 100644
--- a/src/mongo/scripting/mozjs/bindata.cpp
+++ b/src/mongo/scripting/mozjs/bindata.cpp
@@ -31,7 +31,6 @@
#include "mongo/scripting/mozjs/bindata.h"
-#include <cctype>
#include <iomanip>
#include "mongo/bson/bsonobjbuilder.h"
diff --git a/src/mongo/shell/linenoise.cpp b/src/mongo/shell/linenoise.cpp
index 901b4f3c76f..7c6865e4392 100644
--- a/src/mongo/shell/linenoise.cpp
+++ b/src/mongo/shell/linenoise.cpp
@@ -97,7 +97,6 @@
#else /* _WIN32 */
-#include <cctype>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
@@ -105,13 +104,13 @@
#include <sys/types.h>
#include <termios.h>
#include <unistd.h>
-#include <wctype.h>
#endif /* _WIN32 */
#include "linenoise.h"
#include "linenoise_utf8.h"
#include "mk_wcwidth.h"
+#include <cwctype>
#include <errno.h>
#include <fcntl.h>
#include <memory>
@@ -1968,7 +1967,7 @@ int InputBuffer::incrementalHistorySearch(PromptBase& pi, int startChar) {
}
static bool isCharacterAlphanumeric(UChar32 testChar) {
- return iswalnum(testChar);
+ return std::iswalnum(testChar);
}
int InputBuffer::getInputLine(PromptBase& pi) {
diff --git a/src/mongo/shell/mongo_main.cpp b/src/mongo/shell/mongo_main.cpp
index b9567df8f9b..4a4069077f7 100644
--- a/src/mongo/shell/mongo_main.cpp
+++ b/src/mongo/shell/mongo_main.cpp
@@ -38,7 +38,6 @@
#include <boost/log/attributes/value_extraction.hpp>
#include <boost/log/core.hpp>
#include <boost/log/sinks.hpp>
-#include <cctype>
#include <fstream>
#include <iostream>
#include <pcrecpp.h>
@@ -73,6 +72,7 @@
#include "mongo/shell/shell_utils_launcher.h"
#include "mongo/stdx/utility.h"
#include "mongo/transport/transport_layer_asio.h"
+#include "mongo/util/ctype.h"
#include "mongo/util/errno_util.h"
#include "mongo/util/exit.h"
#include "mongo/util/file.h"
@@ -469,7 +469,7 @@ std::string getURIFromArgs(const std::string& arg,
const auto colonPos = arg.find(':');
if ((colonPos != std::string::npos) && ((colonPos + 1) < arg.size()) &&
- isdigit(arg[colonPos + 1])) {
+ ctype::isDigit(arg[colonPos + 1])) {
// Assume IPv4 or hostname with port.
return parseDbHost("test", arg);
}
@@ -544,7 +544,7 @@ static void edit(const std::string& whatToEdit) {
// "whatToEdit" might look like a variable/property name
bool editingVariable = true;
for (const char* p = whatToEdit.c_str(); *p; ++p) {
- if (!(isalnum(*p) || *p == '_' || *p == '.')) {
+ if (!(ctype::isAlnum(*p) || *p == '_' || *p == '.')) {
editingVariable = false;
break;
}
@@ -1062,7 +1062,7 @@ int mongo_main(int argc, char* argv[]) {
shellHistoryAdd(linePtr);
const char* s = linePtr + 5; // skip "edit "
- while (*s && isspace(*s))
+ while (*s && ctype::isSpace(*s))
s++;
edit(s);
diff --git a/src/mongo/shell/shell_utils.cpp b/src/mongo/shell/shell_utils.cpp
index 4dc05908256..4fe70659c20 100644
--- a/src/mongo/shell/shell_utils.cpp
+++ b/src/mongo/shell/shell_utils.cpp
@@ -35,7 +35,6 @@
#include <algorithm>
#include <boost/filesystem.hpp>
-#include <cctype>
#include <memory>
#include <set>
#include <stdlib.h>
@@ -59,6 +58,7 @@
#include "mongo/shell/shell_options.h"
#include "mongo/shell/shell_utils_extended.h"
#include "mongo/shell/shell_utils_launcher.h"
+#include "mongo/util/ctype.h"
#include "mongo/util/fail_point.h"
#include "mongo/util/processinfo.h"
#include "mongo/util/quick_exit.h"
@@ -256,7 +256,7 @@ bool isBalanced(const std::string& code) {
}
if ("~!%^&*-+=|:,<>/?."_sd.find(code[i]) != std::string::npos)
danglingOp = true;
- else if (!std::isspace(code[i]))
+ else if (!ctype::isSpace(code[i]))
danglingOp = false;
}
diff --git a/src/mongo/shell/shell_utils_launcher.cpp b/src/mongo/shell/shell_utils_launcher.cpp
index d52ebcfefc6..2de721aada7 100644
--- a/src/mongo/shell/shell_utils_launcher.cpp
+++ b/src/mongo/shell/shell_utils_launcher.cpp
@@ -39,7 +39,6 @@
#include <boost/iostreams/stream.hpp>
#include <boost/iostreams/stream_buffer.hpp>
#include <boost/iostreams/tee.hpp>
-#include <cctype>
#include <fcntl.h>
#include <fmt/format.h>
#include <iostream>
@@ -71,6 +70,7 @@
#include "mongo/shell/shell_options.h"
#include "mongo/shell/shell_utils.h"
#include "mongo/util/assert_util.h"
+#include "mongo/util/ctype.h"
#include "mongo/util/destructor_guard.h"
#include "mongo/util/exit.h"
#include "mongo/util/net/hostandport.h"
@@ -591,14 +591,8 @@ boost::filesystem::path ProgramRunner::findProgram(const string& prog) {
// needs to be appended.
//
- auto isExtensionValid = [](std::string extension) {
- for (auto c : extension) {
- if (std::isdigit(c)) {
- return false;
- }
- }
-
- return true;
+ auto isExtensionValid = [](std::string e) {
+ return std::all_of(e.begin(), e.end(), [](char c) { return !ctype::isDigit(c); });
};
if (!p.has_extension() || !isExtensionValid(p.extension().string())) {
diff --git a/src/mongo/util/SConscript b/src/mongo/util/SConscript
index 7b4b56c6540..1e759de47dc 100644
--- a/src/mongo/util/SConscript
+++ b/src/mongo/util/SConscript
@@ -619,6 +619,7 @@ icuEnv.CppUnitTest(
'clock_source_mock_test.cpp',
'concepts_test.cpp',
'container_size_helper_test.cpp',
+ 'ctype_test.cpp',
'decimal_counter_test.cpp',
'decorable_test.cpp',
'diagnostic_info_test.cpp' if get_option('use-diagnostic-latches') == 'on' else [],
@@ -768,3 +769,8 @@ stacktraceEnv.Benchmark(
# See above for how to handle any future LIBDEPS additions here.
# LIBDEPS=...
)
+
+env.Benchmark(
+ target='string_bm',
+ source='string_bm.cpp',
+)
diff --git a/src/mongo/util/ctype.h b/src/mongo/util/ctype.h
new file mode 100644
index 00000000000..a3880e281a8
--- /dev/null
+++ b/src/mongo/util/ctype.h
@@ -0,0 +1,212 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+/**
+ * Replacements for <cctype> or <ctype.h> functions and macros.
+ * These should be used instead of the corresponding standard functions.
+ * Note the camel-case spelling to distinguish these from the C++ functions
+ * and especially the C macros.
+ *
+ * Regarding the capitalization of these functions: POSIX defines standard
+ * identifiers for the 12 character classes. Each "is"- function here directly
+ * references and evokes such a POSIX identifier, so they are not
+ * camel-cased as ordinary English phrases (so `isAlnum` not `isAlNum`).
+ *
+ * <https://en.wikipedia.org/wiki/Regular_expression#Character_classes>
+ *
+ * Problems with the standard functions:
+ *
+ * - They accept int (to accept the EOF of -1 and integrate with cstdio).
+ * Passing negative char values other than EOF is undefined behavior!
+ * They cannot be used directly in std algorithms operating on char
+ * arguments because of this, say to `std::transform` or `std::find_if`
+ * on a `std::string`. You need a lambda and it has to do a cast.
+ * - Most are locale dependent, so they have to be slow. Dropping
+ * locale makes the "is"- functions 200% faster.
+ * - They return int instead of bool for C compatibility. Undesirable in C++.
+ * - In C they are macros, so they are very different entities depending on
+ * the subtle choice of #include <cctype> vs #include <ctype.h>.
+ * - Support for the EOF value bloats the lookup tables and carves out a
+ * surprising special case.
+ *
+ * The `<cctype>` character classification functions are a subtle source of bugs.
+ * See warnings at <https://en.cppreference.com/w/cpp/header/cctype>.
+ *
+ * The proper call sequence is often not done, creating bugs. So
+ * here are some more suitable C++17 implementations. We can make our versions
+ * constexpr and noexcept because they don't depend on the locale or other
+ * dynamic program state.
+ */
+
+#pragma once
+
+#include <array>
+
+namespace mongo::ctype {
+namespace detail {
+
+/** Define a bit position for each character class queryable with this API. */
+enum ClassBit : uint16_t {
+ kUpper = 1 << 0, //< [upper] UPPERCASE
+ kLower = 1 << 1, //< [lower] lowercase
+ kAlpha = 1 << 2, //< [alpha] Alphabetic (upper case or lower case)
+ kDigit = 1 << 3, //< [digit] Decimal digit
+ kXdigit = 1 << 4, //< [xdigit] Hexadecimal digit (upper case or lower case: [0-9A-Fa-f])
+ kSpace = 1 << 5, //< [space] Whitespace ([ \t\r\n\f\v])
+ kPrint = 1 << 6, //< [print] Printing (non-control chars)
+ kGraph = 1 << 7, //< [graph] Graphical (non-control, non-whitespace)
+ kBlank = 1 << 8, //< [blank] Blank (' ', '\t')
+ kCntrl = 1 << 9, //< [cntrl] Control character: 0x00-0x1f, and 0x7f (DEL)
+ kPunct = 1 << 10, //< [punct] Punctuation (graphical, but not alphanumeric)
+ kAlnum = 1 << 11 //< [alnum] Alphanumeric (letter or digit)
+};
+
+/** Returns the bitwise-or of all `ClassBit` pertinent to character `c`. */
+constexpr uint16_t calculateClassBits(unsigned char c) {
+ if (c >= 0x80)
+ return 0;
+ uint16_t r = 0;
+ if (c <= 0x1f || c == 0x7f)
+ r |= kCntrl;
+ if (!(r & kCntrl))
+ r |= kPrint;
+ if (c == '\t' || c == ' ')
+ r |= kBlank;
+ if ((r & kBlank) || c == '\n' || c == '\v' || c == '\f' || c == '\r')
+ r |= kSpace;
+ if (c >= 'A' && c <= 'Z')
+ r |= kUpper;
+ if (c >= 'a' && c <= 'z')
+ r |= kLower;
+ if (c >= '0' && c <= '9')
+ r |= kDigit;
+ if ((r & kUpper) || (r & kLower))
+ r |= kAlpha;
+ if ((r & kDigit) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'))
+ r |= kXdigit;
+ if ((r & kAlpha) || (r & kDigit))
+ r |= kAlnum;
+ if ((r & kPrint) && !(r & kSpace))
+ r |= kGraph;
+ if ((r & kGraph) && !(r & kAlnum))
+ r |= kPunct;
+ return r;
+}
+
+/** The character class memberships for each char. */
+constexpr auto chClassTable = [] {
+ std::array<uint16_t, 256> arr{};
+ for (size_t i = 0; i < arr.size(); ++i)
+ arr[i] = calculateClassBits(i);
+ return arr;
+}();
+
+constexpr bool isMember(char c, uint16_t mask) {
+ return chClassTable[static_cast<unsigned char>(c)] & mask;
+}
+
+/** Lookup table for `toUpper`. */
+constexpr auto chUpperTable = [] {
+ std::array<char, 256> arr{};
+ for (size_t i = 0; i < arr.size(); ++i)
+ arr[i] = isMember(i, kLower) ? 'A' + (i - 'a') : i;
+ return arr;
+}();
+
+/** Lookup table for `toLower`. */
+constexpr auto chLowerTable = [] {
+ std::array<char, 256> arr{};
+ for (size_t i = 0; i < arr.size(); ++i)
+ arr[i] = isMember(i, kUpper) ? 'a' + (i - 'A') : i;
+ return arr;
+}();
+
+} // namespace detail
+
+
+/**
+ * These 12 "is"- functions exactly match the <cctype> definitions for the
+ * POSIX (or C) locale. See the corresponding definitions in <cctype>.
+ * <https://en.cppreference.com/w/cpp/header/cctype>
+ * See notes above.
+ */
+constexpr bool isAlnum(char c) noexcept {
+ return detail::isMember(c, detail::kAlnum);
+}
+constexpr bool isAlpha(char c) noexcept {
+ return detail::isMember(c, detail::kAlpha);
+}
+constexpr bool isLower(char c) noexcept {
+ return detail::isMember(c, detail::kLower);
+}
+constexpr bool isUpper(char c) noexcept {
+ return detail::isMember(c, detail::kUpper);
+}
+constexpr bool isDigit(char c) noexcept {
+ return detail::isMember(c, detail::kDigit);
+}
+constexpr bool isXdigit(char c) noexcept {
+ return detail::isMember(c, detail::kXdigit);
+}
+constexpr bool isCntrl(char c) noexcept {
+ return detail::isMember(c, detail::kCntrl);
+}
+constexpr bool isGraph(char c) noexcept {
+ return detail::isMember(c, detail::kGraph);
+}
+constexpr bool isSpace(char c) noexcept {
+ return detail::isMember(c, detail::kSpace);
+}
+constexpr bool isBlank(char c) noexcept {
+ return detail::isMember(c, detail::kBlank);
+}
+constexpr bool isPrint(char c) noexcept {
+ return detail::isMember(c, detail::kPrint);
+}
+constexpr bool isPunct(char c) noexcept {
+ return detail::isMember(c, detail::kPunct);
+}
+
+/**
+ * Returns the upper case of `c` if `c` is lower case, otherwise `c`.
+ * Unlike `std::toupper`, is not affected by locale. See notes above.
+ */
+constexpr char toUpper(char c) noexcept {
+ return detail::chUpperTable[static_cast<unsigned char>(c)];
+}
+
+/**
+ * Returns the lower case of `c` if `c` is upper case, otherwise `c`.
+ * Unlike `std::tolower`, is not affected by locale. See notes above.
+ */
+constexpr char toLower(char c) noexcept {
+ return detail::chLowerTable[static_cast<unsigned char>(c)];
+}
+
+} // namespace mongo::ctype
diff --git a/src/mongo/util/ctype_test.cpp b/src/mongo/util/ctype_test.cpp
new file mode 100644
index 00000000000..55c122dfd6e
--- /dev/null
+++ b/src/mongo/util/ctype_test.cpp
@@ -0,0 +1,109 @@
+/**
+ * Copyright (C) 2020-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
+
+#include "mongo/platform/basic.h"
+
+#include <boost/optional.hpp>
+#include <fmt/format.h>
+
+#include "mongo/logv2/log.h"
+#include "mongo/unittest/unittest.h"
+#include "mongo/util/ctype.h"
+#include "mongo/util/hex.h"
+
+namespace mongo::ctype {
+namespace {
+
+using namespace fmt::literals;
+
+TEST(Ctype, MatchesCxxStdlib) {
+ for (size_t i = 0; i < 256; ++i) {
+ char c = i;
+ unsigned char uc = i;
+ const std::string msg = " i={:02x}"_format(i);
+ ASSERT_EQ(isAlnum(c), (bool)std::isalnum(uc)) << msg;
+ ASSERT_EQ(isAlpha(c), (bool)std::isalpha(uc)) << msg;
+ ASSERT_EQ(isLower(c), (bool)std::islower(uc)) << msg;
+ ASSERT_EQ(isUpper(c), (bool)std::isupper(uc)) << msg;
+ ASSERT_EQ(isDigit(c), (bool)std::isdigit(uc)) << msg;
+ ASSERT_EQ(isXdigit(c), (bool)std::isxdigit(uc)) << msg;
+ ASSERT_EQ(isCntrl(c), (bool)std::iscntrl(uc)) << msg;
+ ASSERT_EQ(isGraph(c), (bool)std::isgraph(uc)) << msg;
+ ASSERT_EQ(isSpace(c), (bool)std::isspace(uc)) << msg;
+ ASSERT_EQ(isBlank(c), (bool)std::isblank(uc)) << msg;
+ ASSERT_EQ(isPrint(c), (bool)std::isprint(uc)) << msg;
+ ASSERT_EQ(isPunct(c), (bool)std::ispunct(uc)) << msg;
+ ASSERT_EQ(toLower(c), (char)std::tolower(uc)) << msg;
+ ASSERT_EQ(toUpper(c), (char)std::toupper(uc)) << msg;
+ }
+}
+
+TEST(Ctype, MatchesCStdlib) {
+ for (size_t i = 0; i < 256; ++i) {
+ char c = i;
+ unsigned char uc = i;
+ const std::string msg = " i={:02x}"_format(i);
+ ASSERT_EQ(isAlnum(c), (bool)isalnum(uc)) << msg;
+ ASSERT_EQ(isAlpha(c), (bool)isalpha(uc)) << msg;
+ ASSERT_EQ(isLower(c), (bool)islower(uc)) << msg;
+ ASSERT_EQ(isUpper(c), (bool)isupper(uc)) << msg;
+ ASSERT_EQ(isDigit(c), (bool)isdigit(uc)) << msg;
+ ASSERT_EQ(isXdigit(c), (bool)isxdigit(uc)) << msg;
+ ASSERT_EQ(isCntrl(c), (bool)iscntrl(uc)) << msg;
+ ASSERT_EQ(isGraph(c), (bool)isgraph(uc)) << msg;
+ ASSERT_EQ(isSpace(c), (bool)isspace(uc)) << msg;
+ ASSERT_EQ(isBlank(c), (bool)isblank(uc)) << msg;
+ ASSERT_EQ(isPrint(c), (bool)isprint(uc)) << msg;
+ ASSERT_EQ(isPunct(c), (bool)ispunct(uc)) << msg;
+ ASSERT_EQ(toLower(c), (char)tolower(uc)) << msg;
+ ASSERT_EQ(toUpper(c), (char)toupper(uc)) << msg;
+ }
+}
+
+TEST(Ctype, IsConstexpr) {
+ MONGO_STATIC_ASSERT(isAlnum('a'));
+ MONGO_STATIC_ASSERT(isAlpha('a'));
+ MONGO_STATIC_ASSERT(isLower('a'));
+ MONGO_STATIC_ASSERT(!isUpper('a'));
+ MONGO_STATIC_ASSERT(!isDigit('a'));
+ MONGO_STATIC_ASSERT(isXdigit('a'));
+ MONGO_STATIC_ASSERT(!isCntrl('a'));
+ MONGO_STATIC_ASSERT(isGraph('a'));
+ MONGO_STATIC_ASSERT(!isSpace('a'));
+ MONGO_STATIC_ASSERT(!isBlank('a'));
+ MONGO_STATIC_ASSERT(isPrint('a'));
+ MONGO_STATIC_ASSERT(!isPunct('a'));
+ MONGO_STATIC_ASSERT(toLower('a') == 'a');
+ MONGO_STATIC_ASSERT(toUpper('a') == 'A');
+}
+
+} // namespace
+} // namespace mongo::ctype
diff --git a/src/mongo/util/dns_name.h b/src/mongo/util/dns_name.h
index f48eb17a9b6..c1472769f07 100644
--- a/src/mongo/util/dns_name.h
+++ b/src/mongo/util/dns_name.h
@@ -30,7 +30,6 @@
#pragma once
#include <algorithm>
-#include <cctype>
#include <iostream>
#include <iterator>
#include <sstream>
@@ -41,6 +40,7 @@
#include "mongo/base/string_data.h"
#include "mongo/bson/util/builder.h"
#include "mongo/util/assert_util.h"
+#include "mongo/util/ctype.h"
namespace mongo {
namespace dns {
@@ -117,13 +117,13 @@ public:
// We permit dashes and numbers. We also permit underscores for use with SRV records
// and such.
- if (!(ch == '-' || std::isalnum(ch) || (ch == '_' && parserState == kFirstLetter))) {
+ if (!(ch == '-' || ctype::isAlnum(ch) || (ch == '_' && parserState == kFirstLetter))) {
uasserted(ErrorCodes::DNSRecordTypeMismatch,
"A Domain Name cannot have tokens other than dash or alphanumerics.");
}
// All domain names are represented in lower-case letters, because DNS is case
// insensitive.
- name.push_back(std::tolower(ch));
+ name.push_back(ctype::toLower(ch));
if (parserState == kFirstLetter) {
parserState = kNonPeriod;
}
@@ -367,7 +367,7 @@ private:
bool isEquivalentToIPv4DottedDecimal() const {
return !_fullyQualified && _nameComponents.size() == 4 &&
std::all_of(begin(_nameComponents), end(_nameComponents), [](const auto& s) {
- return std::all_of(begin(s), end(s), [](char c) { return std::isdigit(c); });
+ return std::all_of(begin(s), end(s), [](char c) { return ctype::isDigit(c); });
});
}
diff --git a/src/mongo/util/hex.cpp b/src/mongo/util/hex.cpp
index 21a81a91dfc..5f92ca6a8a9 100644
--- a/src/mongo/util/hex.cpp
+++ b/src/mongo/util/hex.cpp
@@ -30,12 +30,12 @@
#include "mongo/util/hex.h"
#include <algorithm>
-#include <cctype>
#include <fmt/format.h>
#include <iterator>
#include <string>
#include "mongo/base/error_codes.h"
+#include "mongo/util/ctype.h"
namespace mongo {
@@ -85,7 +85,7 @@ unsigned char decodePair(StringData c) {
bool validate(StringData s) {
// There must be an even number of characters, since each pair encodes a single byte.
return s.size() % 2 == 0 &&
- std::all_of(s.begin(), s.end(), [](unsigned char c) { return std::isxdigit(c); });
+ std::all_of(s.begin(), s.end(), [](auto c) { return ctype::isXdigit(c); });
}
std::string encode(StringData data) {
diff --git a/src/mongo/util/net/ssl_manager.cpp b/src/mongo/util/net/ssl_manager.cpp
index f33cb9df0bf..73884cabae0 100644
--- a/src/mongo/util/net/ssl_manager.cpp
+++ b/src/mongo/util/net/ssl_manager.cpp
@@ -48,6 +48,7 @@
#include "mongo/platform/overflow_arithmetic.h"
#include "mongo/transport/session.h"
#include "mongo/transport/transport_layer_asio.h"
+#include "mongo/util/ctype.h"
#include "mongo/util/hex.h"
#include "mongo/util/icu.h"
#include "mongo/util/net/ssl_options.h"
@@ -62,20 +63,6 @@ SSLManagerCoordinator* theSSLManagerCoordinator;
namespace {
-// Some of these duplicate the std::isalpha/std::isxdigit because we don't want them to be
-// affected by the current locale.
-inline bool isAlpha(char ch) {
- return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
-}
-
-inline bool isDigit(char ch) {
- return (ch >= '0' && ch <= '9');
-}
-
-inline bool isHex(char ch) {
- return isDigit(ch) || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f');
-}
-
// This function returns true if the character is supposed to be escaped according to the rules
// in RFC4514. The exception to the RFC the space character ' ' and the '#', because we've not
// required users to escape spaces or sharps in DNs in the past.
@@ -164,12 +151,12 @@ std::string RFC4514Parser::extractAttributeName() {
std::function<bool(char ch)> characterCheck;
// If the first character is a digit, then this is an OID and can only contain
// numbers and '.'
- if (isDigit(ch)) {
- characterCheck = [](char ch) { return (isDigit(ch) || ch == '.'); };
+ if (ctype::isDigit(ch)) {
+ characterCheck = [](char ch) { return ctype::isDigit(ch) || ch == '.'; };
// If the first character is an alpha, then this is a short name and can only
// contain alpha/digit/hyphen characters.
- } else if (isAlpha(ch)) {
- characterCheck = [](char ch) { return (isAlpha(ch) || isDigit(ch) || ch == '-'); };
+ } else if (ctype::isAlpha(ch)) {
+ characterCheck = [](char ch) { return ctype::isAlnum(ch) || ch == '-'; };
// Otherwise this is an invalid attribute name
} else {
uasserted(ErrorCodes::BadValue,
@@ -214,13 +201,13 @@ std::pair<std::string, RFC4514Parser::ValueTerminator> RFC4514Parser::extractVal
if (isEscaped(ch)) {
sb << ch;
trailingSpaces = 0;
- } else if (isHex(ch)) {
+ } else if (ctype::isXdigit(ch)) {
const std::array<char, 2> hexValStr = {ch, _advance()};
uassert(ErrorCodes::BadValue,
str::stream() << "Escaped hex value contains invalid character \'"
<< hexValStr[1] << "\'",
- isHex(hexValStr[1]));
+ ctype::isXdigit(hexValStr[1]));
const char hexVal = hexblob::decodePair(StringData(hexValStr.data(), 2));
sb << hexVal;
if (hexVal != ' ') {
diff --git a/src/mongo/util/net/ssl_options.cpp b/src/mongo/util/net/ssl_options.cpp
index a142e3a917a..3cdf4da1610 100644
--- a/src/mongo/util/net/ssl_options.cpp
+++ b/src/mongo/util/net/ssl_options.cpp
@@ -36,6 +36,7 @@
#include "mongo/base/status.h"
#include "mongo/config.h"
#include "mongo/db/server_options.h"
+#include "mongo/util/ctype.h"
#include "mongo/util/hex.h"
#include "mongo/util/options_parser/startup_options.h"
#include "mongo/util/text.h"
@@ -57,7 +58,7 @@ std::vector<uint8_t> hexToVector(StringData hex) {
std::string data = hexblob::decode(hex);
return std::vector<uint8_t>(data.begin(), data.end());
} catch (const ExceptionFor<ErrorCodes::FailedToParse>&) {
- if (std::any_of(hex.begin(), hex.end(), [](unsigned char c) { return !isxdigit(c); })) {
+ if (std::any_of(hex.begin(), hex.end(), [](char c) { return !ctype::isXdigit(c); })) {
uasserted(ErrorCodes::BadValue, "Not a valid hex string");
}
if (hex.size() % 2) {
diff --git a/src/mongo/util/options_parser/options_parser.cpp b/src/mongo/util/options_parser/options_parser.cpp
index 39a38544805..28600197c77 100644
--- a/src/mongo/util/options_parser/options_parser.cpp
+++ b/src/mongo/util/options_parser/options_parser.cpp
@@ -37,7 +37,6 @@
#include <boost/iostreams/stream.hpp>
#include <boost/iostreams/stream_buffer.hpp>
#include <boost/program_options.hpp>
-#include <cctype>
#include <cerrno>
#include <fcntl.h>
#include <fstream>
@@ -58,6 +57,7 @@
#include "mongo/db/json.h"
#include "mongo/logv2/log.h"
#include "mongo/util/assert_util.h"
+#include "mongo/util/ctype.h"
#include "mongo/util/hex.h"
#include "mongo/util/net/hostandport.h"
#include "mongo/util/net/http_client.h"
@@ -450,10 +450,10 @@ public:
if (_trim == Trim::kWhitespace) {
size_t start = 0;
size_t end = str.size();
- while ((start < end) && std::isspace(str[start])) {
+ while ((start < end) && ctype::isSpace(str[start])) {
++start;
}
- while ((start < end) && std::isspace(str[end - 1])) {
+ while ((start < end) && ctype::isSpace(str[end - 1])) {
--end;
}
if ((start > 0) || (end < str.size())) {
diff --git a/src/mongo/util/processinfo_linux.cpp b/src/mongo/util/processinfo_linux.cpp
index b4b67c4b91b..96735c90792 100644
--- a/src/mongo/util/processinfo_linux.cpp
+++ b/src/mongo/util/processinfo_linux.cpp
@@ -58,6 +58,7 @@
#include <pcrecpp.h>
#include "mongo/logv2/log.h"
+#include "mongo/util/ctype.h"
#include "mongo/util/file.h"
#define KLONG long
@@ -531,7 +532,7 @@ public:
lineOff = 0;
// trim whitespace and append 000 to replace kB.
- while (isspace(meminfo.at(lineOff)))
+ while (ctype::isSpace(meminfo.at(lineOff)))
lineOff++;
meminfo = meminfo.substr(lineOff);
diff --git a/src/mongo/util/stacktrace.cpp b/src/mongo/util/stacktrace.cpp
index 2725d40d200..07ab0a8ab68 100644
--- a/src/mongo/util/stacktrace.cpp
+++ b/src/mongo/util/stacktrace.cpp
@@ -31,12 +31,11 @@
#include "mongo/util/stacktrace.h"
-#include <cctype>
-
#include "mongo/bson/bsonobj.h"
#include "mongo/bson/json.h"
#include "mongo/logv2/log.h"
#include "mongo/util/assert_util.h"
+#include "mongo/util/ctype.h"
namespace mongo::stack_trace_detail {
namespace {
@@ -82,7 +81,7 @@ StringData Hex::toHex(uint64_t x, Buf& buf, bool showBase) {
uint64_t Hex::fromHex(StringData s) {
uint64_t x = 0;
for (char c : s) {
- char uc = std::toupper(static_cast<unsigned char>(c));
+ char uc = ctype::toUpper(c);
if (size_t pos = kDigits<16>.find(uc); pos == std::string::npos) {
return x;
} else {
diff --git a/src/mongo/util/stacktrace_threads.cpp b/src/mongo/util/stacktrace_threads.cpp
index 05d6bcacb0d..62a5b020a2c 100644
--- a/src/mongo/util/stacktrace_threads.cpp
+++ b/src/mongo/util/stacktrace_threads.cpp
@@ -36,7 +36,6 @@
#include <array>
#include <atomic>
#include <boost/filesystem.hpp>
-#include <cctype>
#include <cstdint>
#include <cstdlib>
#include <dirent.h>
diff --git a/src/mongo/util/str.cpp b/src/mongo/util/str.cpp
index 024a6bca1e3..bc4d4ec4fe5 100644
--- a/src/mongo/util/str.cpp
+++ b/src/mongo/util/str.cpp
@@ -29,9 +29,8 @@
#include "mongo/platform/basic.h"
-#include <cctype>
-
#include "mongo/base/parse_number.h"
+#include "mongo/util/ctype.h"
#include "mongo/util/hex.h"
#include "mongo/util/str.h"
@@ -90,8 +89,8 @@ int LexNumCmp::cmp(StringData sd1, StringData sd2, bool lexOnly) {
return -1;
if (!lexOnly) {
- bool n1 = isdigit(sd1[s1]);
- bool n2 = isdigit(sd2[s2]);
+ bool n1 = ctype::isDigit(sd1[s1]);
+ bool n2 = ctype::isDigit(sd2[s2]);
if (n1 && n2) {
// get rid of leading 0s
@@ -105,9 +104,9 @@ int LexNumCmp::cmp(StringData sd1, StringData sd2, bool lexOnly) {
size_t e1 = s1;
size_t e2 = s2;
- while (e1 < sd1.size() && isdigit(sd1[e1]))
+ while (e1 < sd1.size() && ctype::isDigit(sd1[e1]))
e1++;
- while (e2 < sd2.size() && isdigit(sd2[e2]))
+ while (e2 < sd2.size() && ctype::isDigit(sd2[e2]))
e2++;
size_t len1 = e1 - s1;
@@ -225,7 +224,7 @@ std::string escape(StringData sd, bool escape_slash) {
boost::optional<size_t> parseUnsignedBase10Integer(StringData fieldName) {
// Do not accept positions like '-4' or '+4'
- if (!std::isdigit(fieldName[0])) {
+ if (!ctype::isDigit(fieldName[0])) {
return boost::none;
}
unsigned int index;
diff --git a/src/mongo/util/str.h b/src/mongo/util/str.h
index 1466dea2466..d90e8a10e93 100644
--- a/src/mongo/util/str.h
+++ b/src/mongo/util/str.h
@@ -35,8 +35,8 @@
* TODO: De-inline.
*/
+#include <algorithm>
#include <boost/optional.hpp>
-#include <ctype.h>
#include <memory>
#include <sstream>
#include <string>
@@ -45,6 +45,7 @@
#include "mongo/base/string_data.h"
#include "mongo/bson/util/builder.h"
#include "mongo/platform/bits.h"
+#include "mongo/util/ctype.h"
namespace mongo::str {
@@ -201,7 +202,7 @@ inline unsigned toUnsigned(const std::string& a) {
unsigned x = 0;
const char* p = a.c_str();
while (1) {
- if (!isdigit(*p))
+ if (!ctype::isDigit(*p))
break;
x = x * 10 + (*p - '0');
p++;
@@ -365,17 +366,10 @@ void splitStringDelim(const std::string& str, std::vector<std::string>* res, cha
void joinStringDelim(const std::vector<std::string>& strs, std::string* res, char delim);
inline std::string toLower(StringData input) {
- std::string::size_type sz = input.size();
-
- std::unique_ptr<char[]> line(new char[sz + 1]);
- char* copy = line.get();
-
- for (std::string::size_type i = 0; i < sz; i++) {
- char c = input[i];
- copy[i] = (char)tolower((int)c);
- }
- copy[sz] = 0;
- return copy;
+ std::string r{input};
+ for (char& c : r)
+ c = ctype::toLower(c);
+ return r;
}
/** Functor for combining lexical and numeric comparisons. */
diff --git a/src/mongo/util/str_test.cpp b/src/mongo/util/str_test.cpp
index 68decc11214..31913d38e0c 100644
--- a/src/mongo/util/str_test.cpp
+++ b/src/mongo/util/str_test.cpp
@@ -32,6 +32,7 @@
#include "mongo/unittest/unittest.h"
+#include "mongo/util/ctype.h"
#include "mongo/util/hex.h"
#include "mongo/util/str.h"
@@ -74,7 +75,7 @@ void assertCmp(int expected, StringData s1, StringData s2, bool lexOnly = false)
}
TEST(StringUtilsTest, Simple2) {
- ASSERT(!isdigit((char)255));
+ ASSERT(!ctype::isDigit((char)255));
assertCmp(0, "a", "a");
assertCmp(-1, "a", "aa");
diff --git a/src/mongo/util/string_bm.cpp b/src/mongo/util/string_bm.cpp
new file mode 100644
index 00000000000..3dc5fd2a24c
--- /dev/null
+++ b/src/mongo/util/string_bm.cpp
@@ -0,0 +1,123 @@
+/**
+ * Copyright (C) 2018-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include <algorithm>
+#include <cctype> // NOLINT
+#include <functional>
+#include <iomanip>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <benchmark/benchmark.h>
+
+#include "mongo/base/simple_string_data_comparator.h"
+#include "mongo/base/string_data.h"
+#include "mongo/util/ctype.h"
+
+// Verify the performance of our string processing algorithms.
+// This can include StringData, util/str utilities, etc.
+
+namespace mongo {
+namespace {
+
+std::string makeString(size_t size) {
+ StringData fill = "The quick brown fox jumped over the lazy dog. ";
+ std::string s;
+ while (s.size() < size) {
+ size_t avail = size - s.size();
+ StringData fillSub = fill.substr(0, std::min(avail, fill.size()));
+ s.append(fillSub.begin(), fillSub.end());
+ }
+ return s;
+}
+
+void BM_StringDataEqualCaseInsensitive(benchmark::State& state) {
+ std::uint64_t items = 0;
+ std::string s1 = makeString(1000);
+ std::string s2 = s1;
+ StringData sd1 = s1;
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(sd1.equalCaseInsensitive(s2));
+ ++items;
+ }
+ state.SetItemsProcessed(items);
+}
+BENCHMARK(BM_StringDataEqualCaseInsensitive);
+
+void BM_StdToLower(benchmark::State& state) {
+ std::uint64_t items = 0;
+ std::string s1 = makeString(1000);
+ for (auto _ : state) {
+ for (char& c : s1)
+ benchmark::DoNotOptimize(c = std::tolower(c));
+ ++items;
+ }
+ state.SetItemsProcessed(items);
+}
+BENCHMARK(BM_StdToLower);
+
+void BM_MongoCtypeToLower(benchmark::State& state) {
+ std::uint64_t items = 0;
+ std::string s1 = makeString(1000);
+ for (auto _ : state) {
+ for (char& c : s1)
+ benchmark::DoNotOptimize(c = ctype::toLower(c));
+ ++items;
+ }
+ state.SetItemsProcessed(items);
+}
+BENCHMARK(BM_MongoCtypeToLower);
+
+void BM_StdIsAlpha(benchmark::State& state) {
+ std::uint64_t items = 0;
+ std::string s1 = makeString(1000);
+ for (auto _ : state) {
+ for (char& c : s1)
+ benchmark::DoNotOptimize(std::isalpha(c));
+ ++items;
+ }
+ state.SetItemsProcessed(items);
+}
+BENCHMARK(BM_StdIsAlpha);
+
+void BM_MongoCtypeIsAlpha(benchmark::State& state) {
+ std::uint64_t items = 0;
+ std::string s1 = makeString(1000);
+ for (auto _ : state) {
+ for (char& c : s1)
+ benchmark::DoNotOptimize(ctype::isAlpha(c));
+ ++items;
+ }
+ state.SetItemsProcessed(items);
+}
+BENCHMARK(BM_MongoCtypeIsAlpha);
+
+} // namespace
+} // namespace mongo