diff options
author | Sara Golemon <sara.golemon@mongodb.com> | 2017-09-14 15:24:18 -0400 |
---|---|---|
committer | Sara Golemon <sara.golemon@mongodb.com> | 2017-09-21 11:19:26 -0400 |
commit | f3bf7e7920a51df7100238a55c304ed7cd3aed1f (patch) | |
tree | a43a89c9a0f960cea7a2f9ce31c5ecc3243d5131 /src/mongo/client/mongo_uri.cpp | |
parent | 6c4c5f47b2e3fbf841683448a5e117e86f72c4ef (diff) | |
download | mongo-f3bf7e7920a51df7100238a55c304ed7cd3aed1f.tar.gz |
SERVER-29921 rewrite URI parser
Diffstat (limited to 'src/mongo/client/mongo_uri.cpp')
-rw-r--r-- | src/mongo/client/mongo_uri.cpp | 338 |
1 files changed, 265 insertions, 73 deletions
diff --git a/src/mongo/client/mongo_uri.cpp b/src/mongo/client/mongo_uri.cpp index 6e6a7825b2d..1db9c10d2c0 100644 --- a/src/mongo/client/mongo_uri.cpp +++ b/src/mongo/client/mongo_uri.cpp @@ -32,130 +32,322 @@ #include "mongo/client/mongo_uri.h" -#include <regex> +#include <utility> #include "mongo/base/status_with.h" #include "mongo/bson/bsonobjbuilder.h" #include "mongo/client/dbclientinterface.h" #include "mongo/client/sasl_client_authenticate.h" +#include "mongo/db/namespace_string.h" +#include "mongo/util/hex.h" #include "mongo/util/mongoutils/str.h" -#include "mongo/util/password_digest.h" #include <boost/algorithm/string/case_conv.hpp> #include <boost/algorithm/string/classification.hpp> +#include <boost/algorithm/string/find_iterator.hpp> #include <boost/algorithm/string/predicate.hpp> -#include <boost/algorithm/string/split.hpp> + +namespace { +constexpr std::array<char, 16> hexits{ + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; +const mongo::StringData kURIPrefix{"mongodb://"}; +} + +/** + * RFC 3986 Section 2.1 - Percent Encoding + * + * Encode data elements in a way which will allow them to be embedded + * into a mongodb:// URI safely. + */ +void mongo::uriEncode(std::ostream& ss, StringData toEncode) { + for (const auto& c : toEncode) { + if ((c == '-') || (c == '_') || (c == '.') || (c == '~') || isalnum(c)) { + ss << c; + } else { + // Encoding anything not included in section 2.3 "Unreserved characters" + ss << '%' << hexits[(c >> 4) & 0xF] << hexits[c & 0xF]; + } + } +} + +mongo::StatusWith<std::string> mongo::uriDecode(StringData toDecode) { + StringBuilder out; + for (size_t i = 0; i < toDecode.size(); ++i) { + const auto c = toDecode[i]; + if (c == '%') { + if (i + 2 > toDecode.size()) { + return Status(ErrorCodes::FailedToParse, + "Encountered partial escape sequence at end of string"); + } + out << fromHex(toDecode.substr(i + 1, 2)); + i += 2; + } else { + out << c; + } + } + return out.str(); +} namespace mongo { namespace { -const char kMongoDBURL[] = - // scheme: non-capturing - "mongodb://" - // credentials: two inner captures for user and password - "(?:([^:]+)(?::([^@]+))?@)?" +/** + * Helper Method for MongoURI::parse() to split a string into exactly 2 pieces by a char delimeter + */ +std::pair<StringData, StringData> partitionForward(StringData str, const char c) { + const auto delim = str.find(c); + if (delim == std::string::npos) { + return {str, StringData()}; + } + return {str.substr(0, delim), str.substr(delim + 1)}; +} + +/** + * Helper method for MongoURI::parse() to split a string into exactly 2 pieces by a char delimiter + * searching backward from the end of the string. + */ +std::pair<StringData, StringData> partitionBackward(StringData str, const char c) { + const auto delim = str.rfind(c); + if (delim == std::string::npos) { + return {StringData(), str}; + } + return {str.substr(0, delim), str.substr(delim + 1)}; +} - // servers: grabs all host:port or UNIX socket names - "((?:[^\\/]+|/.+\\.sock)(?:,(?:[^\\/]+|/.+\\.sock))*)" +/** + * Breakout method for parsing application/x-www-form-urlencoded option pairs + * + * foo=bar&baz=qux&... + */ +StatusWith<MongoURI::OptionsMap> parseOptions(StringData options, StringData url) { + MongoURI::OptionsMap ret; + if (options.empty()) { + return ret; + } - // database and options are grouped together - "(?:/" + if (options.find('?') != std::string::npos) { + return Status(ErrorCodes::FailedToParse, + str::stream() + << "URI Cannot Contain multiple questions marks for mongodb:// URL: " + << url); + } - // database: matches anything but the chars that cannot be part of a MongoDB database name which - // are (in order) - forward slash, back slash, dot, space, double-quote, dollar sign, asterisk, - // less than, greater than, colon, pipe, question mark. - "([^/\\\\\\.\\ \"\\$*<>:\\|\\?]*)?" + const auto optionsStr = options.toString(); + for (auto i = + boost::make_split_iterator(optionsStr, boost::first_finder("&", boost::is_iequal())); + i != std::remove_reference<decltype((i))>::type{}; + ++i) { + const auto opt = boost::copy_range<std::string>(*i); + if (opt.empty()) { + return Status(ErrorCodes::FailedToParse, + str::stream() + << "Missing a key/value pair in the options for mongodb:// URL: " + << url); + } + + const auto kvPair = partitionForward(opt, '='); + const auto keyRaw = kvPair.first; + if (keyRaw.empty()) { + return Status( + ErrorCodes::FailedToParse, + str::stream() + << "Missing a key for key/value pair in the options for mongodb:// URL: " + << url); + } + const auto key = uriDecode(keyRaw); + if (!key.isOK()) { + return Status( + ErrorCodes::FailedToParse, + str::stream() << "Key '" << keyRaw + << "' in options cannot properly be URL decoded for mongodb:// URL: " + << url); + } + const auto valRaw = kvPair.second; + if (valRaw.empty()) { + return Status(ErrorCodes::FailedToParse, + str::stream() << "Missing value for key '" << keyRaw + << "' in the options for mongodb:// URL: " + << url); + } + const auto val = uriDecode(valRaw); + if (!val.isOK()) { + return Status( + ErrorCodes::FailedToParse, + str::stream() << "Value '" << valRaw << "' for key '" << keyRaw + << "' in options cannot properly be URL decoded for mongodb:// URL: " + << url); + } - // options - "(?:\\?([^&=?]+=[^&=?]+(?:&[^&=?]+=[^&=?]+)*))?" + ret[key.getValue()] = val.getValue(); + } - // close db/options group - ")?"; + return ret; +} } // namespace - StatusWith<MongoURI> MongoURI::parse(const std::string& url) { - if (!boost::algorithm::starts_with(url, "mongodb://")) { - auto cs_status = ConnectionString::parse(url); + const StringData urlSD(url); + + // 1. Validate and remove the scheme prefix mongodb:// + if (!urlSD.startsWith(kURIPrefix)) { + const auto cs_status = ConnectionString::parse(url); if (!cs_status.isOK()) { return cs_status.getStatus(); } - return MongoURI(cs_status.getValue()); } - const std::regex mongoUrlRe(kMongoDBURL); - std::smatch matches; - if (!std::regex_match(url, matches, mongoUrlRe)) { - return Status(ErrorCodes::FailedToParse, - str::stream() << "Failed to parse mongodb:// URL: " << url); + const auto uriWithoutPrefix = urlSD.substr(kURIPrefix.size()); + + // 2. Split the string by the first, unescaped / (if any), yielding: + // split[0]: User information and host identifers + // split[1]: Auth database and connection options + const auto userAndDb = partitionForward(uriWithoutPrefix, '/'); + const auto userAndHostInfo = userAndDb.first; + const auto databaseAndOptions = userAndDb.second; + + // 2.b Make sure that there are no question marks in the left side of the / + // as any options after the ? must still have the / delimeter + if (databaseAndOptions.empty() && userAndHostInfo.find('?') != std::string::npos) { + return Status( + ErrorCodes::FailedToParse, + str::stream() + << "URI must contain slash delimeter between hosts and options for mongodb:// URL: " + << url); } - // We have the whole input plus 5 top level captures (user, password, host, db, options). - invariant(matches.size() == 6); - - if (!matches[3].matched) { - return Status(ErrorCodes::FailedToParse, "No server(s) specified"); + // 3. Split the user information and host identifiers string by the last, unescaped @, yielding: + // split[0]: User information + // split[1]: Host identifiers; + const auto userAndHost = partitionBackward(userAndHostInfo, '@'); + const auto userInfo = userAndHost.first; + const auto hostIdentifiers = userAndHost.second; + + // 4. Validate, split (if applicable), and URL decode the user information, yielding: + // split[0] = username + // split[1] = password + const auto userAndPass = partitionForward(userInfo, ':'); + const auto usernameSD = userAndPass.first; + const auto passwordSD = userAndPass.second; + + const auto containsColonOrAt = [](StringData str) { + return (str.find(':') != std::string::npos) || (str.find('@') != std::string::npos); + }; + + if (containsColonOrAt(usernameSD)) { + return Status(ErrorCodes::FailedToParse, + str::stream() << "Username must be URL Encoded for mongodb:// URL: " << url); + } + if (containsColonOrAt(passwordSD)) { + return Status(ErrorCodes::FailedToParse, + str::stream() << "Password must be URL Encoded for mongodb:// URL: " << url); } - std::map<std::string, std::string> options; + // Get the username and make sure it did not fail to decode + const auto usernameWithStatus = uriDecode(usernameSD); + if (!usernameWithStatus.isOK()) { + return Status( + ErrorCodes::FailedToParse, + str::stream() << "Username cannot properly be URL decoded for mongodb:// URL: " << url); + } + const auto username = usernameWithStatus.getValue(); + + // Get the password and make sure it did not fail to decode + const auto passwordWithStatus = uriDecode(passwordSD); + if (!passwordWithStatus.isOK()) + return Status( + ErrorCodes::FailedToParse, + str::stream() << "Password cannot properly be URL decoded for mongodb:// URL: " << url); + const auto password = passwordWithStatus.getValue(); + + // 5. Validate, split, and URL decode the host identifiers. + const auto hostIdentifiersStr = hostIdentifiers.toString(); + std::vector<HostAndPort> servers; + for (auto i = boost::make_split_iterator(hostIdentifiersStr, + boost::first_finder(",", boost::is_iequal())); + i != std::remove_reference<decltype((i))>::type{}; + ++i) { + const auto hostWithStatus = uriDecode(boost::copy_range<std::string>(*i)); + if (!hostWithStatus.isOK()) { + return Status( + ErrorCodes::FailedToParse, + str::stream() << "Host cannot properly be URL decoded for mongodb:// URL: " << url); + } - if (matches[5].matched) { - const std::string optionsMatch = matches[5].str(); + const auto host = hostWithStatus.getValue(); + if (host.empty()) { + continue; + } - std::vector<boost::iterator_range<std::string::const_iterator>> optionsTokens; - boost::algorithm::split(optionsTokens, optionsMatch, boost::algorithm::is_any_of("=&")); + if ((host.find('/') != std::string::npos) && !StringData(host).endsWith(".sock")) { + return Status( + ErrorCodes::FailedToParse, + str::stream() << "'" << host << "' in '" << url + << "' appears to be a unix socket, but does not end in '.sock'"); + } - if (optionsTokens.size() % 2 != 0) { - return Status(ErrorCodes::FailedToParse, - str::stream() - << "Missing a key or value in the options for mongodb:// URL: " - << url); - ; + const auto statusHostAndPort = HostAndPort::parse(host); + if (!statusHostAndPort.isOK()) { + return statusHostAndPort.getStatus(); } + servers.push_back(statusHostAndPort.getValue()); + } + if (servers.empty()) { + return Status(ErrorCodes::FailedToParse, "No server(s) specified"); + } - for (size_t i = 0; i != optionsTokens.size(); i = i + 2) - options[std::string(optionsTokens[i].begin(), optionsTokens[i].end())] = - std::string(optionsTokens[i + 1].begin(), optionsTokens[i + 1].end()); + // 6. Split the auth database and connection options string by the first, unescaped ?, yielding: + // split[0] = auth database + // split[1] = connection options + const auto dbAndOpts = partitionForward(databaseAndOptions, '?'); + const auto databaseSD = dbAndOpts.first; + const auto connectionOptions = dbAndOpts.second; + const auto databaseWithStatus = uriDecode(databaseSD); + if (!databaseWithStatus.isOK()) { + return Status(ErrorCodes::FailedToParse, + str::stream() + << "Database name cannot properly be URL decoded for mongodb:// URL: " + << url); + } + const auto database = databaseWithStatus.getValue(); + + // 7. Validate the database contains no prohibited characters + // Prohibited characters: + // slash ("/"), backslash ("\"), space (" "), double-quote ("""), or dollar sign ("$") + // period (".") is also prohibited, but drivers MAY allow periods + if (!database.empty() && + !NamespaceString::validDBName(database, + NamespaceString::DollarInDbNameBehavior::Disallow)) { + return Status(ErrorCodes::FailedToParse, + str::stream() + << "Database name cannot have reserved characters for mongodb:// URL: " + << url); } - OptionsMap::const_iterator optIter; + // 8. Validate, split, and URL decode the connection options + const auto optsWith = parseOptions(connectionOptions, url); + if (!optsWith.isOK()) { + return optsWith.getStatus(); + } + const auto options = optsWith.getValue(); // If a replica set option was specified, store it in the 'setName' field. - bool haveSetName; + const auto optIter = options.find("replicaSet"); std::string setName; - if ((haveSetName = ((optIter = options.find("replicaSet")) != options.end()))) { + if (optIter != options.end()) { setName = optIter->second; + invariant(!setName.empty()); } - std::vector<HostAndPort> servers; - - { - std::vector<std::string> servers_split; - const std::string serversStr = matches[3].str(); - boost::algorithm::split(servers_split, serversStr, boost::is_any_of(",")); - for (auto&& s : servers_split) { - auto statusHostAndPort = HostAndPort::parse(s); - if (!statusHostAndPort.isOK()) { - return statusHostAndPort.getStatus(); - } - - servers.push_back(statusHostAndPort.getValue()); - } - } - - const bool direct = !haveSetName && (servers.size() == 1); - - if (!direct && setName.empty()) { + if ((servers.size() > 1) && setName.empty()) { return Status(ErrorCodes::FailedToParse, "Cannot list multiple servers in URL without 'replicaSet' option"); } ConnectionString cs( - direct ? ConnectionString::MASTER : ConnectionString::SET, servers, setName); - return MongoURI( - std::move(cs), matches[1].str(), matches[2].str(), matches[4].str(), std::move(options)); + setName.empty() ? ConnectionString::MASTER : ConnectionString::SET, servers, setName); + return MongoURI(std::move(cs), username, password, database, std::move(options)); } } // namespace mongo |