summaryrefslogtreecommitdiff
path: root/lib/uri/common.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/uri/common.rb')
-rw-r--r--lib/uri/common.rb52
1 files changed, 41 insertions, 11 deletions
diff --git a/lib/uri/common.rb b/lib/uri/common.rb
index 26b179add2..ca38bec7ec 100644
--- a/lib/uri/common.rb
+++ b/lib/uri/common.rb
@@ -13,6 +13,8 @@ require_relative "rfc2396_parser"
require_relative "rfc3986_parser"
module URI
+ include RFC2396_REGEXP
+
REGEXP = RFC2396_REGEXP
Parser = RFC2396_Parser
RFC3986_PARSER = RFC3986_Parser.new
@@ -62,14 +64,17 @@ module URI
module_function :make_components_hash
end
- include REGEXP
-
module Schemes
end
private_constant :Schemes
+ #
+ # Register the given +klass+ to be instantiated when parsing URLs with the given +scheme+.
+ # Note that currently only schemes which after .upcase are valid constant names
+ # can be registered (no -/+/. allowed).
+ #
def self.register_scheme(scheme, klass)
- Schemes.const_set(scheme, klass)
+ Schemes.const_set(scheme.to_s.upcase, klass)
end
# Returns a Hash of the defined schemes.
@@ -295,6 +300,7 @@ module URI
256.times do |i|
TBLENCWWWCOMP_[-i.chr] = -('%%%02X' % i)
end
+ TBLENCURICOMP_ = TBLENCWWWCOMP_.dup.freeze
TBLENCWWWCOMP_[' '] = '+'
TBLENCWWWCOMP_.freeze
TBLDECWWWCOMP_ = {} # :nodoc:
@@ -320,6 +326,33 @@ module URI
#
# See URI.decode_www_form_component, URI.encode_www_form.
def self.encode_www_form_component(str, enc=nil)
+ _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_, str, enc)
+ end
+
+ # Decodes given +str+ of URL-encoded form data.
+ #
+ # This decodes + to SP.
+ #
+ # See URI.encode_www_form_component, URI.decode_www_form.
+ def self.decode_www_form_component(str, enc=Encoding::UTF_8)
+ _decode_uri_component(/\+|%\h\h/, str, enc)
+ end
+
+ # Encodes +str+ using URL encoding
+ #
+ # This encodes SP to %20 instead of +.
+ def self.encode_uri_component(str, enc=nil)
+ _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCURICOMP_, str, enc)
+ end
+
+ # Decodes given +str+ of URL-encoded data.
+ #
+ # This does not decode + to SP.
+ def self.decode_uri_component(str, enc=Encoding::UTF_8)
+ _decode_uri_component(/%\h\h/, str, enc)
+ end
+
+ def self._encode_uri_component(regexp, table, str, enc)
str = str.to_s.dup
if str.encoding != Encoding::ASCII_8BIT
if enc && enc != Encoding::ASCII_8BIT
@@ -328,19 +361,16 @@ module URI
end
str.force_encoding(Encoding::ASCII_8BIT)
end
- str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
+ str.gsub!(regexp, table)
str.force_encoding(Encoding::US_ASCII)
end
+ private_class_method :_encode_uri_component
- # Decodes given +str+ of URL-encoded form data.
- #
- # This decodes + to SP.
- #
- # See URI.encode_www_form_component, URI.decode_www_form.
- def self.decode_www_form_component(str, enc=Encoding::UTF_8)
+ def self._decode_uri_component(regexp, str, enc)
raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str)
- str.b.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc)
+ str.b.gsub(regexp, TBLDECWWWCOMP_).force_encoding(enc)
end
+ private_class_method :_decode_uri_component
# Generates URL-encoded form data from given +enum+.
#