summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeremy Evans <code@jeremyevans.net>2023-04-28 13:29:49 -0700
committerJeremy Evans <code@jeremyevans.net>2023-04-28 15:10:10 -0700
commit51b0c26bedf3b876188ff11d011d5c84e4941c49 (patch)
treeeca8aa112d2a308a0358c9a7ae03527ded7f237d
parentab360dd5ba16361c562bdf482af8ab14141807a6 (diff)
downloadrack-51b0c26bedf3b876188ff11d011d5c84e4941c49.tar.gz
Add Content-Disposition parameter parser
The ReDoS fix in ee25ab9a7ee981d7578f559701085b0cf39bde77 breaks valid requests, because colons are valid inside parameter values. You cannot use a regexp scan and ensure correct behavior, since values inside parameters can be escaped. Issues like this are the reason for the famous "now they have two problems" quote regarding regexps. Add a basic parser for parameters in Content-Disposition. This parser is based purely on String#{index,slice!,[],==}, usually with string arguments for #index (though one case uses a simple regexp). There are two loops (one nested in the other), but the use of slice! ensures that forward progress is always made on each loop iteration. In addition to fixing the bug introduced by the security fix, this removes multiple separate passes over the mime head, one pass to get the parameter name for Content-Disposition, and a separate pass to get the filename. It removes the get_filename method, though some of the code is kept in a smaller normalize_filename method. This removes 18 separate regexp contents that were previously used just for the separate parse to find the filename for the content disposition. Fixes #2076
-rw-r--r--lib/rack/multipart/parser.rb132
-rw-r--r--test/spec_multipart.rb80
2 files changed, 162 insertions, 50 deletions
diff --git a/lib/rack/multipart/parser.rb b/lib/rack/multipart/parser.rb
index cb33292e..f10d1832 100644
--- a/lib/rack/multipart/parser.rb
+++ b/lib/rack/multipart/parser.rb
@@ -32,28 +32,9 @@ module Rack
EOL = "\r\n"
MULTIPART = %r|\Amultipart/.*boundary=\"?([^\";,]+)\"?|ni
- TOKEN = /[^\s()<>,;:\\"\/\[\]?=]+/
- CONDISP = /Content-Disposition:\s*#{TOKEN}\s*/i
- VALUE = /"(?:\\"|[^"])*"|#{TOKEN}/
- BROKEN = /^#{CONDISP}.*;\s*filename=(#{VALUE})/i
MULTIPART_CONTENT_TYPE = /Content-Type: (.*)#{EOL}/ni
- MULTIPART_CONTENT_DISPOSITION = /Content-Disposition:[^:]*;\s*name=(#{VALUE})/ni
+ MULTIPART_CONTENT_DISPOSITION = /Content-Disposition:(.*)(?=#{EOL}(\S|\z))/ni
MULTIPART_CONTENT_ID = /Content-ID:\s*([^#{EOL}]*)/ni
- # Updated definitions from RFC 2231
- ATTRIBUTE_CHAR = %r{[^ \x00-\x1f\x7f)(><@,;:\\"/\[\]?='*%]}
- ATTRIBUTE = /#{ATTRIBUTE_CHAR}+/
- SECTION = /\*[0-9]+/
- REGULAR_PARAMETER_NAME = /#{ATTRIBUTE}#{SECTION}?/
- REGULAR_PARAMETER = /(#{REGULAR_PARAMETER_NAME})=(#{VALUE})/
- EXTENDED_OTHER_NAME = /#{ATTRIBUTE}\*[1-9][0-9]*\*/
- EXTENDED_OTHER_VALUE = /%[0-9a-fA-F][0-9a-fA-F]|#{ATTRIBUTE_CHAR}/
- EXTENDED_OTHER_PARAMETER = /(#{EXTENDED_OTHER_NAME})=(#{EXTENDED_OTHER_VALUE}*)/
- EXTENDED_INITIAL_NAME = /#{ATTRIBUTE}(?:\*0)?\*/
- EXTENDED_INITIAL_VALUE = /[a-zA-Z0-9\-]*'[a-zA-Z0-9\-]*'#{EXTENDED_OTHER_VALUE}*/
- EXTENDED_INITIAL_PARAMETER = /(#{EXTENDED_INITIAL_NAME})=(#{EXTENDED_INITIAL_VALUE})/
- EXTENDED_PARAMETER = /#{EXTENDED_INITIAL_PARAMETER}|#{EXTENDED_OTHER_PARAMETER}/
- DISPPARM = /;\s*(?:#{REGULAR_PARAMETER}|#{EXTENDED_PARAMETER})\s*/
- RFC2183 = /^#{CONDISP}(#{DISPPARM})+$/i
class Parser
BUFSIZE = 1_048_576
@@ -316,13 +297,89 @@ module Rack
if @sbuf.scan_until(@head_regex)
head = @sbuf[1]
content_type = head[MULTIPART_CONTENT_TYPE, 1]
- if name = head[MULTIPART_CONTENT_DISPOSITION, 1]
- name = dequote(name)
+ if disposition = head[MULTIPART_CONTENT_DISPOSITION, 1]
+ # ignore actual content-disposition value (should always be form-data)
+ i = disposition.index(';')
+ disposition.slice!(0, i+1)
+ param = nil
+
+ # Parse parameter list
+ while i = disposition.index('=')
+ # Found end of parameter name, ensure forward progress in loop
+ param = disposition.slice!(0, i+1)
+
+ # Remove ending equals and preceding whitespace from parameter name
+ param.chomp!('=')
+ param.lstrip!
+
+ if disposition[0] == '"'
+ # Parameter value is quoted, parse it, handling backslash escapes
+ disposition.slice!(0, 1)
+ value = String.new
+
+ while i = disposition.index(/(["\\])/)
+ c = $1
+
+ # Append all content until ending quote or escape
+ value << disposition.slice!(0, i)
+
+ # Remove either backslash or ending quote,
+ # ensures forward progress in loop
+ disposition.slice!(0, 1)
+
+ # stop parsing parameter value if found ending quote
+ break if c == '"'
+
+ escaped_char = disposition.slice!(0, 1)
+ if param == 'filename' && escaped_char != '"'
+ # Possible IE uploaded filename, append both escape backslash and value
+ value << c << escaped_char
+ else
+ # Other only append escaped value
+ value << escaped_char
+ end
+ end
+ else
+ if i = disposition.index(';')
+ # Parameter value unquoted (which may be invalid), value ends at semicolon
+ value = disposition.slice!(0, i)
+ else
+ # If no ending semicolon, assume remainder of line is value and stop
+ # parsing
+ disposition.strip!
+ value = disposition
+ disposition = ''
+ end
+ end
+
+ case param
+ when 'name'
+ name = value
+ when 'filename'
+ filename = value
+ when 'filename*'
+ filename_star = value
+ # else
+ # ignore other parameters
+ end
+
+ # skip trailing semicolon, to proceed to next parameter
+ if i = disposition.index(';')
+ disposition.slice!(0, i+1)
+ end
+ end
else
name = head[MULTIPART_CONTENT_ID, 1]
end
- filename = get_filename(head)
+ if filename_star
+ encoding, _, filename = filename_star.split("'", 3)
+ filename = normalize_filename(filename || '')
+ filename.force_encoding(::Encoding.find(encoding))
+ elsif filename
+ filename = $1 if filename =~ /^"(.*)"$/
+ filename = normalize_filename(filename)
+ end
if name.nil? || name.empty?
name = filename || "#{content_type || TEXT_PLAIN}[]".dup
@@ -367,39 +424,14 @@ module Rack
end
end
- def get_filename(head)
- filename = nil
- case head
- when RFC2183
- params = Hash[*head.scan(DISPPARM).flat_map(&:compact)]
-
- if filename = params['filename*']
- encoding, _, filename = filename.split("'", 3)
- elsif filename = params['filename']
- filename = $1 if filename =~ /^"(.*)"$/
- end
- when BROKEN
- filename = $1
- filename = $1 if filename =~ /^"(.*)"$/
- end
-
- return unless filename
-
+ def normalize_filename(filename)
if filename.scan(/%.?.?/).all? { |s| /%[0-9a-fA-F]{2}/.match?(s) }
filename = Utils.unescape_path(filename)
end
filename.scrub!
- if filename !~ /\\[^\\"]/
- filename = filename.gsub(/\\(.)/, '\1')
- end
-
- if encoding
- filename.force_encoding ::Encoding.find(encoding)
- end
-
- filename
+ filename.split(/[\/\\]/).last || String.new
end
CHARSET = "charset"
diff --git a/test/spec_multipart.rb b/test/spec_multipart.rb
index d5ebe717..c36fe50c 100644
--- a/test/spec_multipart.rb
+++ b/test/spec_multipart.rb
@@ -231,6 +231,86 @@ describe Rack::Multipart do
Timeout::timeout(10) { Rack::Multipart.parse_multipart(env) }
end
+ content_disposition_parse = lambda do |params|
+ boundary = '---------------------------932620571087722842402766118'
+
+ data = StringIO.new
+ data.write("--#{boundary}")
+ data.write("\r\n")
+ data.write("Content-Disposition: form-data;#{params}")
+ data.write("\r\n")
+ data.write("content-type:application/pdf\r\n")
+ data.write("\r\n")
+ data.write("--#{boundary}--\r\n")
+ data.rewind
+
+ fixture = {
+ "CONTENT_TYPE" => "multipart/form-data; boundary=#{boundary}",
+ "CONTENT_LENGTH" => data.length.to_s,
+ :input => data,
+ }
+
+ env = Rack::MockRequest.env_for '/', fixture
+ Rack::Multipart.parse_multipart(env)
+ end
+
+ # see https://github.com/rack/rack/issues/2076
+ it "parse content-disposition with modification date before name parameter" do
+ x = content_disposition_parse.call(' filename="sample.sql"; modification-date="Wed, 26 Apr 2023 11:01:34 GMT"; size=24; name="file"')
+ x.keys.must_equal ["file"]
+ x["file"][:filename].must_equal "sample.sql"
+ x["file"][:name].must_equal "file"
+ end
+
+ it "parse content-disposition with colon in parameter value before name parameter" do
+ x = content_disposition_parse.call(' filename="sam:ple.sql"; name="file"')
+ x.keys.must_equal ["file"]
+ x["file"][:filename].must_equal "sam:ple.sql"
+ x["file"][:name].must_equal "file"
+ end
+
+ it "parse content-disposition with name= in parameter value before name parameter" do
+ x = content_disposition_parse.call('filename="name=bar"; name="file"')
+ x.keys.must_equal ["file"]
+ x["file"][:filename].must_equal "name=bar"
+ x["file"][:name].must_equal "file"
+ end
+
+ it "parse content-disposition with unquoted parameter values" do
+ x = content_disposition_parse.call('filename=sam:ple.sql; name=file')
+ x.keys.must_equal ["file"]
+ x["file"][:filename].must_equal "sam:ple.sql"
+ x["file"][:name].must_equal "file"
+ end
+
+ it "parse content-disposition with backslash escaped parameter values" do
+ x = content_disposition_parse.call('filename="foo\"bar"; name=file')
+ x.keys.must_equal ["file"]
+ x["file"][:filename].must_equal "foo\"bar"
+ x["file"][:name].must_equal "file"
+ end
+
+ it "parse content-disposition with IE full paths in filename" do
+ x = content_disposition_parse.call('filename="c:\foo\bar"; name=file;')
+ x.keys.must_equal ["file"]
+ x["file"][:filename].must_equal "bar"
+ x["file"][:name].must_equal "file"
+ end
+
+ it "parse content-disposition with escaped parameter values in name" do
+ x = content_disposition_parse.call('filename="bar"; name="file\\\\-\\xfoo"')
+ x.keys.must_equal ["file\\-xfoo"]
+ x["file\\-xfoo"][:filename].must_equal "bar"
+ x["file\\-xfoo"][:name].must_equal "file\\-xfoo"
+ end
+
+ it "parse content-disposition with escaped parameter values in name" do
+ x = content_disposition_parse.call('filename="bar"; name="file\\\\-\\xfoo"')
+ x.keys.must_equal ["file\\-xfoo"]
+ x["file\\-xfoo"][:filename].must_equal "bar"
+ x["file\\-xfoo"][:name].must_equal "file\\-xfoo"
+ end
+
it 'raises an EOF error on content-length mismatch' do
env = Rack::MockRequest.env_for("/", multipart_fixture(:empty))
env['rack.input'] = StringIO.new