diff options
Diffstat (limited to 'libproxy/url.cpp')
-rw-r--r-- | libproxy/url.cpp | 558 |
1 files changed, 0 insertions, 558 deletions
diff --git a/libproxy/url.cpp b/libproxy/url.cpp deleted file mode 100644 index 6925745..0000000 --- a/libproxy/url.cpp +++ /dev/null @@ -1,558 +0,0 @@ -/******************************************************************************* - * libproxy - A library for proxy configuration - * Copyright (C) 2006 Nathaniel McCallum <nathaniel@natemccallum.com> - * - * Based on work found in GLib GIO: - * Copyright (C) 2006-2007 Red Hat, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * - ******************************************************************************/ - -#include <fcntl.h> // For ::open() -#include <cstring> // For memcpy() -#include <sstream> // For int/string conversion (using stringstream) -#include <cstdio> // For sscanf() -#include <cstdlib> // For atoi() -#include <cerrno> // For errno and EINTR -#include <sys/stat.h> // For stat() -#include <algorithm> // For transform() - -#ifdef WIN32 -#include <io.h> -#define open _open -#define O_RDONLY _O_RDONLY -#define close _close -#define read _read -#define SHUT_RDWR SD_BOTH -#else -#include <unistd.h> // For read() close() -#define closesocket close -#endif - -#include "url.hpp" -using namespace libproxy; -using namespace std; - -// This mime type should be reported by the web server -#define PAC_MIME_TYPE "application/x-ns-proxy-autoconfig" -// Fall back to checking for this mime type, which servers often report wrong -#define PAC_MIME_TYPE_FB "text/plain" - -// This is the maximum pac size (to avoid memory attacks) -#define PAC_MAX_SIZE 0x800000 -// This is the default block size to use when receiving via HTTP -#define PAC_HTTP_BLOCK_SIZE 512 - -static inline int get_default_port(string scheme) { - struct servent *serv; - size_t plus = scheme.find('+'); - - if (plus != string::npos) - scheme = scheme.substr(plus + 1); - - if ((serv = getservbyname(scheme.c_str(), NULL))) - return ntohs(serv->s_port); - - return 0; -} - -template <class T> -static inline string to_string_ (const T& t) { - stringstream ss; - ss << t; - return ss.str(); -} - -#define _copyaddr_t(type, addr) (sockaddr*) memcpy(new type, &(addr), sizeof(type)) -static inline sockaddr* copyaddr(const struct sockaddr& addr) { - switch (addr.sa_family) { - case (AF_INET): - return _copyaddr_t(sockaddr_in, addr); - case (AF_INET6): - return _copyaddr_t(sockaddr_in6, addr); - default: - return NULL; - } -} - -bool url::is_valid(const string &url_) { - bool rtv = true; - - try { - url url(url_); - } catch (parse_error&) { - rtv = false; - } - - return rtv; -} - -string url::encode(const string &data, const string &valid_reserved) { - ostringstream encoded; - for (unsigned int i=0; i < data.size(); i++) { - if (isalnum((unsigned char)data[i]) - || valid_reserved.find(data[i]) != string::npos - || string("-._~").find(data[i]) != string::npos) - encoded << data[i]; - else - encoded << '%' - << ((unsigned char)data[i] < 16 ? "0" : "") - << hex << (((int)data[i]) & 0xff); - } - return encoded.str(); -} - -url::url(const string &url) - : m_orig(url), m_port(0), m_ips(NULL) { - size_t idx = 0; - size_t hier_part_start, hier_part_end; - size_t query_part_start; - size_t path_start, path_end; - string hier_part; - - /* From RFC 3986 Decodes: - * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] - */ - - idx = 0; - - /* Decode scheme: - * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) - */ - - if (!isalpha(url[idx])) - throw parse_error("Invalid URL: " + url); - - while (1) { - char c = url[idx++]; - - if (c == ':') break; - - if (!(isalnum(c) || - c == '+' || - c == '-' || - c == '.')) - throw parse_error("Invalid URL: " + url); - } - - m_scheme = url.substr(0, idx - 1); - transform(m_scheme.begin(), m_scheme.end(), m_scheme.begin(), ::tolower); - - hier_part_start = idx; - hier_part_end = url.find('#', idx); - query_part_start = url.find('?', idx); - if (query_part_start != string::npos) - { - if (hier_part_end == string::npos) - m_query = url.substr(query_part_start); - else { - m_query = url.substr(query_part_start, hier_part_end - query_part_start); - } - hier_part_end = query_part_start; - } - - hier_part = url.substr(hier_part_start, - hier_part_end == string::npos ? - string::npos : hier_part_end - hier_part_start); - - /* 3: - * hier-part = "//" authority path-abempty - * / path-absolute - * / path-rootless - * / path-empty - */ - - if (hier_part.size() >= 2 && hier_part[0] == '/' && hier_part[1] == '/') { - size_t authority_start, authority_end; - size_t userinfo_start, userinfo_end; - size_t host_start, host_end; - - authority_start = 2; - /* authority is always followed by / or nothing */ - authority_end = hier_part.find('/', authority_start); - if (authority_end == string::npos) - authority_end = hier_part.size(); - path_start = authority_end; - - /* 3.2: - * authority = [ userinfo "@" ] host [ ":" port ] - */ - - /* Get user and password */ - userinfo_start = authority_start; - userinfo_end = hier_part.find('@', authority_start); - if (userinfo_end != string::npos) { - size_t user_end; - - user_end = hier_part.rfind(':', userinfo_end); - if (user_end == string::npos) - user_end = userinfo_end; - else - m_pass = hier_part.substr(user_end + 1, userinfo_end - (user_end + 1)); - - m_user = hier_part.substr(userinfo_start, user_end - userinfo_start); - } - - /* Get hostname */ - if (userinfo_end == string::npos) - host_start = authority_start; - else - host_start = userinfo_end + 1; - - /* Check for IPv6 IP */ - if (host_start < hier_part.size() - && hier_part[host_start] == '[') { - host_end = hier_part.find(']', host_start); - if (host_end == string::npos) - throw parse_error("Invalid URL: " + url); - host_end++; - if (hier_part[host_end] == '\0') - host_end = string::npos; - } else { - host_end = hier_part.find(':', host_start); - if (path_start < host_end) - host_end = path_start; - } - - /* If not port, host ends where path starts */ - if (host_end == string::npos) - host_end = path_start; - - m_host = hier_part.substr(host_start, host_end - host_start); - transform(m_host.begin(), m_host.end(), m_host.begin(), ::tolower); - - /* Get port */ - m_port = get_default_port(m_scheme); - - if (host_end < hier_part.size() - && hier_part[host_end] == ':') { - size_t port_start = host_end + 1; - m_port = atoi(hier_part.c_str() + port_start); - } - } else { - path_start = 0; - } - - /* Get path */ - if (path_start != string::npos) - { - path_end = hier_part_end; - if (path_end == string::npos) - m_path = hier_part.substr(path_start); - else - m_path = hier_part.substr(path_start, path_end - path_start); - } -} - -url::url(const url &url) : m_ips(NULL) { - *this = url; -} - -url::~url() { - empty_cache(); -} - -bool url::operator==(const url& url) const { - return m_orig == url.to_string(); -} - -url& url::operator=(const url& url) { - // Ensure these aren't the same objects - if (&url == this) - return *this; - - m_host = url.m_host; - m_orig = url.m_orig; - m_pass = url.m_pass; - m_path = url.m_path; - m_query = url.m_query; - m_port = url.m_port; - m_scheme = url.m_scheme; - m_user = url.m_user; - - empty_cache(); - - if (url.m_ips) { - int i; - - // Copy the new ip cache - for (i=0 ; url.m_ips[i] ; i++) {}; - m_ips = new sockaddr*[i]; - for (i=0 ; url.m_ips[i] ; i++) - m_ips[i] = copyaddr(*url.m_ips[i]); - } - - return *this; -} - -url& url::operator=(const string &strurl) { - url tmp(strurl); - *this = tmp; - return *this; -} - -string url::get_host() const { - return m_host; -} - -sockaddr const* const* url::get_ips(bool usedns) { - // Check the cache - if (m_ips) - return m_ips; - - // Check without DNS first - if (usedns && get_ips(false)) - return m_ips; - - // Check DNS for IPs - struct addrinfo* info; - struct addrinfo flags; - memset(&flags, '\0', sizeof(addrinfo)); - flags.ai_family = AF_UNSPEC; - flags.ai_socktype = 0; - flags.ai_protocol = 0; - flags.ai_flags = AI_NUMERICHOST; - if (!getaddrinfo(m_host.c_str(), NULL, usedns ? NULL : &flags, &info)) { - struct addrinfo* first = info; - unsigned int i = 0; - - // Get the size of our array - for (info = first ; info ; info = info->ai_next) - i++; - - // Return NULL if no IPs found - if (i == 0) - return m_ips = NULL; - - // Create our array since we actually have a result - m_ips = new sockaddr*[++i]; - memset(m_ips, '\0', sizeof(sockaddr*)*i); - - // Copy the sockaddr's into m_ips - for (i = 0, info = first ; info ; info = info->ai_next) { - if (info->ai_addr->sa_family == AF_INET || info->ai_addr->sa_family == AF_INET6) { - m_ips[i] = copyaddr(*(info->ai_addr)); - if (!m_ips[i]) break; - ((sockaddr_in **) m_ips)[i++]->sin_port = htons(m_port); - } - } - - freeaddrinfo(first); - return m_ips; - } - - // No addresses found - return NULL; -} - -string url::get_password() const { - return m_pass; -} - -string url::get_path() const { - return m_path; -} - -string url::get_query() const { - return m_query; -} - -uint16_t url::get_port() const { - return m_port; -} - -string url::get_scheme() const { - return m_scheme; -} - -string url::get_username() const { - return m_user; -} - -string url::to_string() const { - return m_orig; -} - -static string recvline(int fd) { - string line; - int ret; - - // Reserve arbitrary amount of space to avoid small memory reallocations. - line.reserve(128); - - do { - char c; - ret = recv(fd, &c, 1, 0); - if (ret == 1) { - if (c == '\n') - return line; - line += c; - } - } while (ret == 1 || (ret == -1 && errno == EINTR)); - - return line; -} - -char* url::get_pac() { - int sock = -1; - bool chunked = false; - unsigned long int content_length = 0, status = 0; - char* buffer = NULL; - string request; - - // In case of a file:// url we open the file and read it - if (m_scheme == "file" || m_scheme == "pac+file") { - struct stat st; - if ((sock = ::open(m_path.c_str(), O_RDONLY)) < 0) - return NULL; - - if (!fstat(sock, &st) && st.st_size < PAC_MAX_SIZE) { - buffer = new char[st.st_size+1]; - memset(buffer, 0, st.st_size+1); - if (read(sock, buffer, st.st_size) == 0) { - delete[] buffer; - buffer = NULL; - } - } - close(sock); - return buffer; - } - - // DNS lookup of host - if (!get_ips(true)) - return NULL; - - // Iterate through each IP trying to make a connection - // Stop at the first one - for (int i=0 ; m_ips[i] ; i++) { - sock = socket(m_ips[i]->sa_family, SOCK_STREAM, 0); - if (sock < 0) continue; - - if (m_ips[i]->sa_family == AF_INET && - !connect(sock, m_ips[i], sizeof(struct sockaddr_in))) - break; - else if (m_ips[i]->sa_family == AF_INET6 && - !connect(sock, m_ips[i], sizeof(struct sockaddr_in6))) - break; - - closesocket(sock); - sock = -1; - } - - // Test our socket - if (sock < 0) return NULL; - - // Build the request string - request = "GET " + (m_path.size() > 0 ? m_path : "/") + m_query + " HTTP/1.1\r\n"; - request += "Host: " + m_host + (m_port != 80 ? ":" + std::to_string(m_port) : "") + "\r\n"; - request += "Accept: " + string(PAC_MIME_TYPE) + "\r\n"; - request += "Connection: close\r\n"; - request += "\r\n"; - - // Send HTTP request - if ((size_t) send(sock, request.c_str(), request.size(), 0) != request.size()) { - closesocket(sock); - return NULL; - } - - /* Verify status line */ - string line = recvline(sock); - if (sscanf(line.c_str(), "HTTP/1.%*d %lu", &status) == 1 && status == 200) { - /* Check for correct mime type and content length */ - content_length = 0; - for (line = recvline(sock) ; line != "\r" && line != "" ; line = recvline(sock)) { - // Check for chunked encoding - if (line.find("Content-Transfer-Encoding: chunked") == 0 || line.find("Transfer-Encoding: chunked") == 0) - chunked = true; - - // Check for content length - else if (content_length == 0) - sscanf(line.c_str(), "Content-Length: %lu", &content_length); - } - - // Get content - std::vector<char> dynamic_buffer; - do { - unsigned int chunk_length; - - if (chunked) { - // Discard the empty line if we received a previous chunk - if (!dynamic_buffer.empty()) recvline(sock); - - // Get the chunk-length line as an integer - if (sscanf(recvline(sock).c_str(), "%x", &chunk_length) != 1 || chunk_length == 0) break; - - // Add this chunk to our content length, - // ensuring that we aren't over our max size - content_length += chunk_length; - } - - if (content_length >= PAC_MAX_SIZE) break; - - while (content_length == 0 || dynamic_buffer.size() != content_length) { - // Calculate length to recv - unsigned int length_to_read = PAC_HTTP_BLOCK_SIZE; - if (content_length > 0) - length_to_read = content_length - dynamic_buffer.size(); - - // Prepare buffer - dynamic_buffer.resize(dynamic_buffer.size() + length_to_read); - - int r = recv(sock, dynamic_buffer.data() + dynamic_buffer.size() - length_to_read, length_to_read, 0); - - // Shrink buffer to fit - if (r >= 0) - dynamic_buffer.resize(dynamic_buffer.size() - length_to_read + r); - - // PAC size too large, discard - if (dynamic_buffer.size() >= PAC_MAX_SIZE) { - chunked = false; - dynamic_buffer.clear(); - break; - } - - if (r <= 0) { - chunked = false; - break; - } - } - } while (chunked); - - if (content_length == 0 || content_length == dynamic_buffer.size()) { - buffer = new char[dynamic_buffer.size() + 1]; - if (!dynamic_buffer.empty()) { - memcpy(buffer, dynamic_buffer.data(), dynamic_buffer.size()); - } - buffer[dynamic_buffer.size()] = '\0'; - } - } - - // Clean up - shutdown(sock, SHUT_RDWR); - closesocket(sock); - return buffer; -} - -void url::empty_cache() -{ - if (m_ips) { - // Free any existing ip cache - for (int i=0 ; m_ips[i] ; i++) - delete m_ips[i]; - delete[] m_ips; - m_ips = NULL; - } -} |