diff options
author | Jeremy Evans <code@jeremyevans.net> | 2023-04-28 13:29:49 -0700 |
---|---|---|
committer | Jeremy Evans <code@jeremyevans.net> | 2023-04-28 15:10:10 -0700 |
commit | 51b0c26bedf3b876188ff11d011d5c84e4941c49 (patch) | |
tree | eca8aa112d2a308a0358c9a7ae03527ded7f237d | |
parent | ab360dd5ba16361c562bdf482af8ab14141807a6 (diff) | |
download | rack-51b0c26bedf3b876188ff11d011d5c84e4941c49.tar.gz |
Add Content-Disposition parameter parser
The ReDoS fix in ee25ab9a7ee981d7578f559701085b0cf39bde77 breaks valid
requests, because colons are valid inside parameter values. You cannot
use a regexp scan and ensure correct behavior, since values inside
parameters can be escaped. Issues like this are the reason for the
famous "now they have two problems" quote regarding regexps.
Add a basic parser for parameters in Content-Disposition. This parser
is based purely on String#{index,slice!,[],==}, usually with string
arguments for #index (though one case uses a simple regexp). There
are two loops (one nested in the other), but the use of slice! ensures
that forward progress is always made on each loop iteration.
In addition to fixing the bug introduced by the security fix, this
removes multiple separate passes over the mime head, one pass to get
the parameter name for Content-Disposition, and a separate pass to get
the filename. It removes the get_filename method, though some of the
code is kept in a smaller normalize_filename method.
This removes 18 separate regexp contents that were previously used
just for the separate parse to find the filename for the content
disposition.
Fixes #2076
-rw-r--r-- | lib/rack/multipart/parser.rb | 132 | ||||
-rw-r--r-- | test/spec_multipart.rb | 80 |
2 files changed, 162 insertions, 50 deletions
diff --git a/lib/rack/multipart/parser.rb b/lib/rack/multipart/parser.rb index cb33292e..f10d1832 100644 --- a/lib/rack/multipart/parser.rb +++ b/lib/rack/multipart/parser.rb @@ -32,28 +32,9 @@ module Rack EOL = "\r\n" MULTIPART = %r|\Amultipart/.*boundary=\"?([^\";,]+)\"?|ni - TOKEN = /[^\s()<>,;:\\"\/\[\]?=]+/ - CONDISP = /Content-Disposition:\s*#{TOKEN}\s*/i - VALUE = /"(?:\\"|[^"])*"|#{TOKEN}/ - BROKEN = /^#{CONDISP}.*;\s*filename=(#{VALUE})/i MULTIPART_CONTENT_TYPE = /Content-Type: (.*)#{EOL}/ni - MULTIPART_CONTENT_DISPOSITION = /Content-Disposition:[^:]*;\s*name=(#{VALUE})/ni + MULTIPART_CONTENT_DISPOSITION = /Content-Disposition:(.*)(?=#{EOL}(\S|\z))/ni MULTIPART_CONTENT_ID = /Content-ID:\s*([^#{EOL}]*)/ni - # Updated definitions from RFC 2231 - ATTRIBUTE_CHAR = %r{[^ \x00-\x1f\x7f)(><@,;:\\"/\[\]?='*%]} - ATTRIBUTE = /#{ATTRIBUTE_CHAR}+/ - SECTION = /\*[0-9]+/ - REGULAR_PARAMETER_NAME = /#{ATTRIBUTE}#{SECTION}?/ - REGULAR_PARAMETER = /(#{REGULAR_PARAMETER_NAME})=(#{VALUE})/ - EXTENDED_OTHER_NAME = /#{ATTRIBUTE}\*[1-9][0-9]*\*/ - EXTENDED_OTHER_VALUE = /%[0-9a-fA-F][0-9a-fA-F]|#{ATTRIBUTE_CHAR}/ - EXTENDED_OTHER_PARAMETER = /(#{EXTENDED_OTHER_NAME})=(#{EXTENDED_OTHER_VALUE}*)/ - EXTENDED_INITIAL_NAME = /#{ATTRIBUTE}(?:\*0)?\*/ - EXTENDED_INITIAL_VALUE = /[a-zA-Z0-9\-]*'[a-zA-Z0-9\-]*'#{EXTENDED_OTHER_VALUE}*/ - EXTENDED_INITIAL_PARAMETER = /(#{EXTENDED_INITIAL_NAME})=(#{EXTENDED_INITIAL_VALUE})/ - EXTENDED_PARAMETER = /#{EXTENDED_INITIAL_PARAMETER}|#{EXTENDED_OTHER_PARAMETER}/ - DISPPARM = /;\s*(?:#{REGULAR_PARAMETER}|#{EXTENDED_PARAMETER})\s*/ - RFC2183 = /^#{CONDISP}(#{DISPPARM})+$/i class Parser BUFSIZE = 1_048_576 @@ -316,13 +297,89 @@ module Rack if @sbuf.scan_until(@head_regex) head = @sbuf[1] content_type = head[MULTIPART_CONTENT_TYPE, 1] - if name = head[MULTIPART_CONTENT_DISPOSITION, 1] - name = dequote(name) + if disposition = head[MULTIPART_CONTENT_DISPOSITION, 1] + # ignore actual content-disposition value (should always be form-data) + i = disposition.index(';') + disposition.slice!(0, i+1) + param = nil + + # Parse parameter list + while i = disposition.index('=') + # Found end of parameter name, ensure forward progress in loop + param = disposition.slice!(0, i+1) + + # Remove ending equals and preceding whitespace from parameter name + param.chomp!('=') + param.lstrip! + + if disposition[0] == '"' + # Parameter value is quoted, parse it, handling backslash escapes + disposition.slice!(0, 1) + value = String.new + + while i = disposition.index(/(["\\])/) + c = $1 + + # Append all content until ending quote or escape + value << disposition.slice!(0, i) + + # Remove either backslash or ending quote, + # ensures forward progress in loop + disposition.slice!(0, 1) + + # stop parsing parameter value if found ending quote + break if c == '"' + + escaped_char = disposition.slice!(0, 1) + if param == 'filename' && escaped_char != '"' + # Possible IE uploaded filename, append both escape backslash and value + value << c << escaped_char + else + # Other only append escaped value + value << escaped_char + end + end + else + if i = disposition.index(';') + # Parameter value unquoted (which may be invalid), value ends at semicolon + value = disposition.slice!(0, i) + else + # If no ending semicolon, assume remainder of line is value and stop + # parsing + disposition.strip! + value = disposition + disposition = '' + end + end + + case param + when 'name' + name = value + when 'filename' + filename = value + when 'filename*' + filename_star = value + # else + # ignore other parameters + end + + # skip trailing semicolon, to proceed to next parameter + if i = disposition.index(';') + disposition.slice!(0, i+1) + end + end else name = head[MULTIPART_CONTENT_ID, 1] end - filename = get_filename(head) + if filename_star + encoding, _, filename = filename_star.split("'", 3) + filename = normalize_filename(filename || '') + filename.force_encoding(::Encoding.find(encoding)) + elsif filename + filename = $1 if filename =~ /^"(.*)"$/ + filename = normalize_filename(filename) + end if name.nil? || name.empty? name = filename || "#{content_type || TEXT_PLAIN}[]".dup @@ -367,39 +424,14 @@ module Rack end end - def get_filename(head) - filename = nil - case head - when RFC2183 - params = Hash[*head.scan(DISPPARM).flat_map(&:compact)] - - if filename = params['filename*'] - encoding, _, filename = filename.split("'", 3) - elsif filename = params['filename'] - filename = $1 if filename =~ /^"(.*)"$/ - end - when BROKEN - filename = $1 - filename = $1 if filename =~ /^"(.*)"$/ - end - - return unless filename - + def normalize_filename(filename) if filename.scan(/%.?.?/).all? { |s| /%[0-9a-fA-F]{2}/.match?(s) } filename = Utils.unescape_path(filename) end filename.scrub! - if filename !~ /\\[^\\"]/ - filename = filename.gsub(/\\(.)/, '\1') - end - - if encoding - filename.force_encoding ::Encoding.find(encoding) - end - - filename + filename.split(/[\/\\]/).last || String.new end CHARSET = "charset" diff --git a/test/spec_multipart.rb b/test/spec_multipart.rb index d5ebe717..c36fe50c 100644 --- a/test/spec_multipart.rb +++ b/test/spec_multipart.rb @@ -231,6 +231,86 @@ describe Rack::Multipart do Timeout::timeout(10) { Rack::Multipart.parse_multipart(env) } end + content_disposition_parse = lambda do |params| + boundary = '---------------------------932620571087722842402766118' + + data = StringIO.new + data.write("--#{boundary}") + data.write("\r\n") + data.write("Content-Disposition: form-data;#{params}") + data.write("\r\n") + data.write("content-type:application/pdf\r\n") + data.write("\r\n") + data.write("--#{boundary}--\r\n") + data.rewind + + fixture = { + "CONTENT_TYPE" => "multipart/form-data; boundary=#{boundary}", + "CONTENT_LENGTH" => data.length.to_s, + :input => data, + } + + env = Rack::MockRequest.env_for '/', fixture + Rack::Multipart.parse_multipart(env) + end + + # see https://github.com/rack/rack/issues/2076 + it "parse content-disposition with modification date before name parameter" do + x = content_disposition_parse.call(' filename="sample.sql"; modification-date="Wed, 26 Apr 2023 11:01:34 GMT"; size=24; name="file"') + x.keys.must_equal ["file"] + x["file"][:filename].must_equal "sample.sql" + x["file"][:name].must_equal "file" + end + + it "parse content-disposition with colon in parameter value before name parameter" do + x = content_disposition_parse.call(' filename="sam:ple.sql"; name="file"') + x.keys.must_equal ["file"] + x["file"][:filename].must_equal "sam:ple.sql" + x["file"][:name].must_equal "file" + end + + it "parse content-disposition with name= in parameter value before name parameter" do + x = content_disposition_parse.call('filename="name=bar"; name="file"') + x.keys.must_equal ["file"] + x["file"][:filename].must_equal "name=bar" + x["file"][:name].must_equal "file" + end + + it "parse content-disposition with unquoted parameter values" do + x = content_disposition_parse.call('filename=sam:ple.sql; name=file') + x.keys.must_equal ["file"] + x["file"][:filename].must_equal "sam:ple.sql" + x["file"][:name].must_equal "file" + end + + it "parse content-disposition with backslash escaped parameter values" do + x = content_disposition_parse.call('filename="foo\"bar"; name=file') + x.keys.must_equal ["file"] + x["file"][:filename].must_equal "foo\"bar" + x["file"][:name].must_equal "file" + end + + it "parse content-disposition with IE full paths in filename" do + x = content_disposition_parse.call('filename="c:\foo\bar"; name=file;') + x.keys.must_equal ["file"] + x["file"][:filename].must_equal "bar" + x["file"][:name].must_equal "file" + end + + it "parse content-disposition with escaped parameter values in name" do + x = content_disposition_parse.call('filename="bar"; name="file\\\\-\\xfoo"') + x.keys.must_equal ["file\\-xfoo"] + x["file\\-xfoo"][:filename].must_equal "bar" + x["file\\-xfoo"][:name].must_equal "file\\-xfoo" + end + + it "parse content-disposition with escaped parameter values in name" do + x = content_disposition_parse.call('filename="bar"; name="file\\\\-\\xfoo"') + x.keys.must_equal ["file\\-xfoo"] + x["file\\-xfoo"][:filename].must_equal "bar" + x["file\\-xfoo"][:name].must_equal "file\\-xfoo" + end + it 'raises an EOF error on content-length mismatch' do env = Rack::MockRequest.env_for("/", multipart_fixture(:empty)) env['rack.input'] = StringIO.new |