summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Draper <matthew@trebex.net>2023-03-12 08:18:37 +1030
committerGitHub <noreply@github.com>2023-03-12 10:48:37 +1300
commit9f059d19647aeaef5c2cc683a333c06120caf939 (patch)
tree63a758a52706e85f706c4ef713dc2a820aa78f5c
parentc7088be5a2fa4e445b48b156570ad8e496104891 (diff)
downloadrack-9f059d19647aeaef5c2cc683a333c06120caf939.tar.gz
Split form/query parsing into two steps (#2038)
* Split form/query parsing into two steps First we parse the raw input into a stream of [key, value] pairs, and only after that do we expand that into the deep params hash. This allows a user to operate directly on the pair stream if they need to apply different semantics, without needing to rewind the input, and without creating a conflict with anything else (like a middleware) that wants to use Rack's standard GET / POST hash format.
-rw-r--r--lib/rack/constants.rb2
-rw-r--r--lib/rack/multipart.rb25
-rw-r--r--lib/rack/query_parser.rb51
-rw-r--r--lib/rack/request.rb83
-rw-r--r--test/spec_request.rb9
5 files changed, 129 insertions, 41 deletions
diff --git a/lib/rack/constants.rb b/lib/rack/constants.rb
index 13365935..5b0c181e 100644
--- a/lib/rack/constants.rb
+++ b/lib/rack/constants.rb
@@ -54,11 +54,13 @@ module Rack
RACK_RESPONSE_FINISHED = 'rack.response_finished'
RACK_REQUEST_FORM_INPUT = 'rack.request.form_input'
RACK_REQUEST_FORM_HASH = 'rack.request.form_hash'
+ RACK_REQUEST_FORM_PAIRS = 'rack.request.form_pairs'
RACK_REQUEST_FORM_VARS = 'rack.request.form_vars'
RACK_REQUEST_FORM_ERROR = 'rack.request.form_error'
RACK_REQUEST_COOKIE_HASH = 'rack.request.cookie_hash'
RACK_REQUEST_COOKIE_STRING = 'rack.request.cookie_string'
RACK_REQUEST_QUERY_HASH = 'rack.request.query_hash'
+ RACK_REQUEST_QUERY_PAIRS = 'rack.request.query_pairs'
RACK_REQUEST_QUERY_STRING = 'rack.request.query_string'
RACK_METHODOVERRIDE_ORIGINAL_METHOD = 'rack.methodoverride.original_method'
end
diff --git a/lib/rack/multipart.rb b/lib/rack/multipart.rb
index 165b4db3..4b02fb3e 100644
--- a/lib/rack/multipart.rb
+++ b/lib/rack/multipart.rb
@@ -19,6 +19,31 @@ module Rack
include BadRequest
end
+ # Accumulator for multipart form data, conforming to the QueryParser API.
+ # In future, the Parser could return the pair list directly, but that would
+ # change its API.
+ class ParamList # :nodoc:
+ def self.make_params
+ new
+ end
+
+ def self.normalize_params(params, key, value)
+ params << [key, value]
+ end
+
+ def initialize
+ @pairs = []
+ end
+
+ def <<(pair)
+ @pairs << pair
+ end
+
+ def to_params_hash
+ @pairs
+ end
+ end
+
class << self
def parse_multipart(env, params = Rack::Utils.default_query_parser)
unless io = env[RACK_INPUT]
diff --git a/lib/rack/query_parser.rb b/lib/rack/query_parser.rb
index 1592a01e..1c05ae82 100644
--- a/lib/rack/query_parser.rb
+++ b/lib/rack/query_parser.rb
@@ -37,19 +37,42 @@ module Rack
@param_depth_limit = param_depth_limit
end
- # Stolen from Mongrel, with some small modifications:
+ # Originally stolen from Mongrel, now with some modifications:
# Parses a query string by breaking it up at the '&'. You can also use this
# to parse cookies by changing the characters used in the second parameter
# (which defaults to '&').
- def parse_query(qs, separator = nil, &unescaper)
- unescaper ||= method(:unescape)
+ #
+ # Returns an array of 2-element arrays, where the first element is the
+ # key and the second element is the value.
+ def split_query(qs, separator = nil, &unescaper)
+ pairs = []
+
+ if qs && !qs.empty?
+ unescaper ||= method(:unescape)
+
+ qs.split(separator ? (COMMON_SEP[separator] || /[#{separator}] */n) : DEFAULT_SEP).each do |p|
+ next if p.empty?
+ pair = p.split('=', 2).map!(&unescaper)
+ pair << nil if pair.length == 1
+ pairs << pair
+ end
+ end
- params = make_params
+ pairs
+ rescue ArgumentError => e
+ raise InvalidParameterError, e.message, e.backtrace
+ end
- (qs || '').split(separator ? (COMMON_SEP[separator] || /[#{separator}] */n) : DEFAULT_SEP).each do |p|
- next if p.empty?
- k, v = p.split('=', 2).map!(&unescaper)
+ # Parses a query string by breaking it up at the '&'. You can also use this
+ # to parse cookies by changing the characters used in the second parameter
+ # (which defaults to '&').
+ #
+ # Returns a hash where each value is a string (when a key only appears once)
+ # or an array of strings (when a key appears more than once).
+ def parse_query(qs, separator = nil, &unescaper)
+ params = make_params
+ split_query(qs, separator, &unescaper).each do |k, v|
if cur = params[k]
if cur.class == Array
params[k] << v
@@ -61,7 +84,7 @@ module Rack
end
end
- return params.to_h
+ params.to_h
end
# parse_nested_query expands a query string into structural types. Supported
@@ -72,17 +95,11 @@ module Rack
def parse_nested_query(qs, separator = nil)
params = make_params
- unless qs.nil? || qs.empty?
- (qs || '').split(separator ? (COMMON_SEP[separator] || /[#{separator}] */n) : DEFAULT_SEP).each do |p|
- k, v = p.split('=', 2).map! { |s| unescape(s) }
-
- _normalize_params(params, k, v, 0)
- end
+ split_query(qs, separator).each do |k, v|
+ _normalize_params(params, k, v, 0)
end
- return params.to_h
- rescue ArgumentError => e
- raise InvalidParameterError, e.message, e.backtrace
+ params.to_h
end
# normalize_params recursively expands parameters into structural types. If
diff --git a/lib/rack/request.rb b/lib/rack/request.rb
index a3eb9926..e6969645 100644
--- a/lib/rack/request.rb
+++ b/lib/rack/request.rb
@@ -483,11 +483,22 @@ module Rack
# Returns the data received in the query string.
def GET
if get_header(RACK_REQUEST_QUERY_STRING) == query_string
- get_header(RACK_REQUEST_QUERY_HASH)
+ if query_hash = get_header(RACK_REQUEST_QUERY_HASH)
+ return query_hash
+ end
+ end
+
+ set_header(RACK_REQUEST_QUERY_HASH, expand_params(query_param_list))
+ end
+
+ def query_param_list
+ if get_header(RACK_REQUEST_QUERY_STRING) == query_string
+ get_header(RACK_REQUEST_QUERY_PAIRS)
else
- query_hash = parse_query(query_string, '&')
- set_header(RACK_REQUEST_QUERY_STRING, query_string)
- set_header(RACK_REQUEST_QUERY_HASH, query_hash)
+ query_pairs = split_query(query_string, '&')
+ set_header RACK_REQUEST_QUERY_STRING, query_string
+ set_header RACK_REQUEST_QUERY_HASH, nil
+ set_header(RACK_REQUEST_QUERY_PAIRS, query_pairs)
end
end
@@ -496,6 +507,16 @@ module Rack
# This method support both application/x-www-form-urlencoded and
# multipart/form-data.
def POST
+ if get_header(RACK_REQUEST_FORM_INPUT).equal?(get_header(RACK_INPUT))
+ if form_hash = get_header(RACK_REQUEST_FORM_HASH)
+ return form_hash
+ end
+ end
+
+ set_header(RACK_REQUEST_FORM_HASH, expand_params(body_param_list))
+ end
+
+ def body_param_list
if error = get_header(RACK_REQUEST_FORM_ERROR)
raise error.class, error.message, cause: error.cause
end
@@ -503,36 +524,36 @@ module Rack
begin
rack_input = get_header(RACK_INPUT)
- # If the form hash was already memoized:
- if form_hash = get_header(RACK_REQUEST_FORM_HASH)
- # And it was memoized from the same input:
- if get_header(RACK_REQUEST_FORM_INPUT).equal?(rack_input)
- return form_hash
+ form_pairs = nil
+
+ # If the form data has already been memoized from the same
+ # input:
+ if get_header(RACK_REQUEST_FORM_INPUT).equal?(rack_input)
+ if form_pairs = get_header(RACK_REQUEST_FORM_PAIRS)
+ return form_pairs
end
end
- # Otherwise, figure out how to parse the input:
if rack_input.nil?
- set_header RACK_REQUEST_FORM_INPUT, nil
- set_header(RACK_REQUEST_FORM_HASH, {})
+ form_pairs = []
elsif form_data? || parseable_data?
- unless set_header(RACK_REQUEST_FORM_HASH, parse_multipart)
- form_vars = get_header(RACK_INPUT).read
+ unless form_pairs = Rack::Multipart.extract_multipart(self, Rack::Multipart::ParamList)
+ form_vars = rack_input.read
# Fix for Safari Ajax postings that always append \0
# form_vars.sub!(/\0\z/, '') # performance replacement:
form_vars.slice!(-1) if form_vars.end_with?("\0")
set_header RACK_REQUEST_FORM_VARS, form_vars
- set_header RACK_REQUEST_FORM_HASH, parse_query(form_vars, '&')
+ form_pairs = split_query(form_vars, '&')
end
-
- set_header RACK_REQUEST_FORM_INPUT, get_header(RACK_INPUT)
- get_header RACK_REQUEST_FORM_HASH
else
- set_header RACK_REQUEST_FORM_INPUT, get_header(RACK_INPUT)
- set_header(RACK_REQUEST_FORM_HASH, {})
+ form_pairs = []
end
+
+ set_header RACK_REQUEST_FORM_INPUT, rack_input
+ set_header RACK_REQUEST_FORM_HASH, nil
+ set_header(RACK_REQUEST_FORM_PAIRS, form_pairs)
rescue => error
set_header(RACK_REQUEST_FORM_ERROR, error)
raise
@@ -672,6 +693,28 @@ module Rack
Rack::Multipart.extract_multipart(self, query_parser)
end
+ def split_query(query, d = '&')
+ query_parser = query_parser()
+ unless query_parser.respond_to?(:split_query)
+ query_parser = Utils.default_query_parser
+ unless query_parser.respond_to?(:split_query)
+ query_parser = QueryParser.make_default(0)
+ end
+ end
+
+ query_parser.split_query(query, d)
+ end
+
+ def expand_params(pairs, query_parser = query_parser())
+ params = query_parser.make_params
+
+ pairs.each do |key, value|
+ query_parser.normalize_params(params, key, value)
+ end
+
+ params.to_params_hash
+ end
+
def split_header(value)
value ? value.strip.split(/[,\s]+/) : []
end
diff --git a/test/spec_request.rb b/test/spec_request.rb
index 9a94b35f..2a3f792a 100644
--- a/test/spec_request.rb
+++ b/test/spec_request.rb
@@ -572,11 +572,12 @@ class RackRequestTest < Minitest::Spec
end
it "parse the query string" do
- request = make_request(Rack::MockRequest.env_for("/?foo=bar&quux=bla"))
- request.query_string.must_equal "foo=bar&quux=bla"
- request.GET.must_equal "foo" => "bar", "quux" => "bla"
+ request = make_request(Rack::MockRequest.env_for("/?foo=bar&quux=bla&nothing&empty="))
+ request.query_string.must_equal "foo=bar&quux=bla&nothing&empty="
+ request.GET.must_equal "foo" => "bar", "quux" => "bla", "nothing" => "", "empty" => ""
request.POST.must_be :empty?
- request.params.must_equal "foo" => "bar", "quux" => "bla"
+ request.params.must_equal "foo" => "bar", "quux" => "bla", "nothing" => "", "empty" => ""
+ request.query_param_list.must_equal [["foo", "bar"], ["quux", "bla"], ["nothing", nil], ["empty", ""]]
end
it "handles invalid unicode in query string value" do