summaryrefslogtreecommitdiff
path: root/lib/rack/multipart/parser.rb
diff options
context:
space:
mode:
authorJeremy Evans <code@jeremyevans.net>2023-04-28 13:29:49 -0700
committerJeremy Evans <code@jeremyevans.net>2023-04-28 15:10:10 -0700
commit51b0c26bedf3b876188ff11d011d5c84e4941c49 (patch)
treeeca8aa112d2a308a0358c9a7ae03527ded7f237d /lib/rack/multipart/parser.rb
parentab360dd5ba16361c562bdf482af8ab14141807a6 (diff)
downloadrack-51b0c26bedf3b876188ff11d011d5c84e4941c49.tar.gz
Add Content-Disposition parameter parser
The ReDoS fix in ee25ab9a7ee981d7578f559701085b0cf39bde77 breaks valid requests, because colons are valid inside parameter values. You cannot use a regexp scan and ensure correct behavior, since values inside parameters can be escaped. Issues like this are the reason for the famous "now they have two problems" quote regarding regexps. Add a basic parser for parameters in Content-Disposition. This parser is based purely on String#{index,slice!,[],==}, usually with string arguments for #index (though one case uses a simple regexp). There are two loops (one nested in the other), but the use of slice! ensures that forward progress is always made on each loop iteration. In addition to fixing the bug introduced by the security fix, this removes multiple separate passes over the mime head, one pass to get the parameter name for Content-Disposition, and a separate pass to get the filename. It removes the get_filename method, though some of the code is kept in a smaller normalize_filename method. This removes 18 separate regexp contents that were previously used just for the separate parse to find the filename for the content disposition. Fixes #2076
Diffstat (limited to 'lib/rack/multipart/parser.rb')
-rw-r--r--lib/rack/multipart/parser.rb132
1 files changed, 82 insertions, 50 deletions
diff --git a/lib/rack/multipart/parser.rb b/lib/rack/multipart/parser.rb
index cb33292e..f10d1832 100644
--- a/lib/rack/multipart/parser.rb
+++ b/lib/rack/multipart/parser.rb
@@ -32,28 +32,9 @@ module Rack
EOL = "\r\n"
MULTIPART = %r|\Amultipart/.*boundary=\"?([^\";,]+)\"?|ni
- TOKEN = /[^\s()<>,;:\\"\/\[\]?=]+/
- CONDISP = /Content-Disposition:\s*#{TOKEN}\s*/i
- VALUE = /"(?:\\"|[^"])*"|#{TOKEN}/
- BROKEN = /^#{CONDISP}.*;\s*filename=(#{VALUE})/i
MULTIPART_CONTENT_TYPE = /Content-Type: (.*)#{EOL}/ni
- MULTIPART_CONTENT_DISPOSITION = /Content-Disposition:[^:]*;\s*name=(#{VALUE})/ni
+ MULTIPART_CONTENT_DISPOSITION = /Content-Disposition:(.*)(?=#{EOL}(\S|\z))/ni
MULTIPART_CONTENT_ID = /Content-ID:\s*([^#{EOL}]*)/ni
- # Updated definitions from RFC 2231
- ATTRIBUTE_CHAR = %r{[^ \x00-\x1f\x7f)(><@,;:\\"/\[\]?='*%]}
- ATTRIBUTE = /#{ATTRIBUTE_CHAR}+/
- SECTION = /\*[0-9]+/
- REGULAR_PARAMETER_NAME = /#{ATTRIBUTE}#{SECTION}?/
- REGULAR_PARAMETER = /(#{REGULAR_PARAMETER_NAME})=(#{VALUE})/
- EXTENDED_OTHER_NAME = /#{ATTRIBUTE}\*[1-9][0-9]*\*/
- EXTENDED_OTHER_VALUE = /%[0-9a-fA-F][0-9a-fA-F]|#{ATTRIBUTE_CHAR}/
- EXTENDED_OTHER_PARAMETER = /(#{EXTENDED_OTHER_NAME})=(#{EXTENDED_OTHER_VALUE}*)/
- EXTENDED_INITIAL_NAME = /#{ATTRIBUTE}(?:\*0)?\*/
- EXTENDED_INITIAL_VALUE = /[a-zA-Z0-9\-]*'[a-zA-Z0-9\-]*'#{EXTENDED_OTHER_VALUE}*/
- EXTENDED_INITIAL_PARAMETER = /(#{EXTENDED_INITIAL_NAME})=(#{EXTENDED_INITIAL_VALUE})/
- EXTENDED_PARAMETER = /#{EXTENDED_INITIAL_PARAMETER}|#{EXTENDED_OTHER_PARAMETER}/
- DISPPARM = /;\s*(?:#{REGULAR_PARAMETER}|#{EXTENDED_PARAMETER})\s*/
- RFC2183 = /^#{CONDISP}(#{DISPPARM})+$/i
class Parser
BUFSIZE = 1_048_576
@@ -316,13 +297,89 @@ module Rack
if @sbuf.scan_until(@head_regex)
head = @sbuf[1]
content_type = head[MULTIPART_CONTENT_TYPE, 1]
- if name = head[MULTIPART_CONTENT_DISPOSITION, 1]
- name = dequote(name)
+ if disposition = head[MULTIPART_CONTENT_DISPOSITION, 1]
+ # ignore actual content-disposition value (should always be form-data)
+ i = disposition.index(';')
+ disposition.slice!(0, i+1)
+ param = nil
+
+ # Parse parameter list
+ while i = disposition.index('=')
+ # Found end of parameter name, ensure forward progress in loop
+ param = disposition.slice!(0, i+1)
+
+ # Remove ending equals and preceding whitespace from parameter name
+ param.chomp!('=')
+ param.lstrip!
+
+ if disposition[0] == '"'
+ # Parameter value is quoted, parse it, handling backslash escapes
+ disposition.slice!(0, 1)
+ value = String.new
+
+ while i = disposition.index(/(["\\])/)
+ c = $1
+
+ # Append all content until ending quote or escape
+ value << disposition.slice!(0, i)
+
+ # Remove either backslash or ending quote,
+ # ensures forward progress in loop
+ disposition.slice!(0, 1)
+
+ # stop parsing parameter value if found ending quote
+ break if c == '"'
+
+ escaped_char = disposition.slice!(0, 1)
+ if param == 'filename' && escaped_char != '"'
+ # Possible IE uploaded filename, append both escape backslash and value
+ value << c << escaped_char
+ else
+ # Other only append escaped value
+ value << escaped_char
+ end
+ end
+ else
+ if i = disposition.index(';')
+ # Parameter value unquoted (which may be invalid), value ends at semicolon
+ value = disposition.slice!(0, i)
+ else
+ # If no ending semicolon, assume remainder of line is value and stop
+ # parsing
+ disposition.strip!
+ value = disposition
+ disposition = ''
+ end
+ end
+
+ case param
+ when 'name'
+ name = value
+ when 'filename'
+ filename = value
+ when 'filename*'
+ filename_star = value
+ # else
+ # ignore other parameters
+ end
+
+ # skip trailing semicolon, to proceed to next parameter
+ if i = disposition.index(';')
+ disposition.slice!(0, i+1)
+ end
+ end
else
name = head[MULTIPART_CONTENT_ID, 1]
end
- filename = get_filename(head)
+ if filename_star
+ encoding, _, filename = filename_star.split("'", 3)
+ filename = normalize_filename(filename || '')
+ filename.force_encoding(::Encoding.find(encoding))
+ elsif filename
+ filename = $1 if filename =~ /^"(.*)"$/
+ filename = normalize_filename(filename)
+ end
if name.nil? || name.empty?
name = filename || "#{content_type || TEXT_PLAIN}[]".dup
@@ -367,39 +424,14 @@ module Rack
end
end
- def get_filename(head)
- filename = nil
- case head
- when RFC2183
- params = Hash[*head.scan(DISPPARM).flat_map(&:compact)]
-
- if filename = params['filename*']
- encoding, _, filename = filename.split("'", 3)
- elsif filename = params['filename']
- filename = $1 if filename =~ /^"(.*)"$/
- end
- when BROKEN
- filename = $1
- filename = $1 if filename =~ /^"(.*)"$/
- end
-
- return unless filename
-
+ def normalize_filename(filename)
if filename.scan(/%.?.?/).all? { |s| /%[0-9a-fA-F]{2}/.match?(s) }
filename = Utils.unescape_path(filename)
end
filename.scrub!
- if filename !~ /\\[^\\"]/
- filename = filename.gsub(/\\(.)/, '\1')
- end
-
- if encoding
- filename.force_encoding ::Encoding.find(encoding)
- end
-
- filename
+ filename.split(/[\/\\]/).last || String.new
end
CHARSET = "charset"