1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
|
# frozen_string_literal: false
# HTTP response class.
#
# This class wraps together the response header and the response body (the
# entity requested).
#
# It mixes in the HTTPHeader module, which provides access to response
# header values both via hash-like methods and via individual readers.
#
# Note that each possible HTTP response code defines its own
# HTTPResponse subclass. All classes are defined under the Net module.
# Indentation indicates inheritance. For a list of the classes see Net::HTTP.
#
# Correspondence <code>HTTP code => class</code> is stored in CODE_TO_OBJ
# constant:
#
# Net::HTTPResponse::CODE_TO_OBJ['404'] #=> Net::HTTPNotFound
#
class Net::HTTPResponse
class << self
# true if the response has a body.
def body_permitted?
self::HAS_BODY
end
def exception_type # :nodoc: internal use only
self::EXCEPTION_TYPE
end
def read_new(sock) #:nodoc: internal use only
httpv, code, msg = read_status_line(sock)
res = response_class(code).new(httpv, code, msg)
each_response_header(sock) do |k,v|
res.add_field k, v
end
res
end
private
def read_status_line(sock)
str = sock.readline
m = /\AHTTP(?:\/(\d+\.\d+))?\s+(\d\d\d)(?:\s+(.*))?\z/in.match(str) or
raise Net::HTTPBadResponse, "wrong status line: #{str.dump}"
m.captures
end
def response_class(code)
CODE_TO_OBJ[code] or
CODE_CLASS_TO_OBJ[code[0,1]] or
Net::HTTPUnknownResponse
end
def each_response_header(sock)
key = value = nil
while true
line = sock.readuntil("\n", true).sub(/\s+\z/, '')
break if line.empty?
if line[0] == ?\s or line[0] == ?\t and value
value << ' ' unless value.empty?
value << line.strip
else
yield key, value if key
key, value = line.strip.split(/\s*:\s*/, 2)
raise Net::HTTPBadResponse, 'wrong header line format' if value.nil?
end
end
yield key, value if key
end
end
# next is to fix bug in RDoc, where the private inside class << self
# spills out.
public
include Net::HTTPHeader
def initialize(httpv, code, msg) #:nodoc: internal use only
@http_version = httpv
@code = code
@message = msg
initialize_http_header nil
@body = nil
@read = false
@uri = nil
@decode_content = false
@body_encoding = false
end
# The HTTP version supported by the server.
attr_reader :http_version
# The HTTP result code string. For example, '302'. You can also
# determine the response type by examining which response subclass
# the response object is an instance of.
attr_reader :code
# The HTTP result message sent by the server. For example, 'Not Found'.
attr_reader :message
alias msg message # :nodoc: obsolete
# The URI used to fetch this response. The response URI is only available
# if a URI was used to create the request.
attr_reader :uri
# Set to true automatically when the request did not contain an
# Accept-Encoding header from the user.
attr_accessor :decode_content
# The encoding to use for the response body. If Encoding, use that encoding.
# If other true value, attempt to detect the appropriate encoding, and use
# that.
attr_reader :body_encoding
# Set the encoding to use for the response body. If given a String, find
# the related Encoding.
def body_encoding=(value)
value = Encoding.find(value) if value.is_a?(String)
@body_encoding = value
end
def inspect
"#<#{self.class} #{@code} #{@message} readbody=#{@read}>"
end
#
# response <-> exception relationship
#
def code_type #:nodoc:
self.class
end
def error! #:nodoc:
message = @code
message += ' ' + @message.dump if @message
raise error_type().new(message, self)
end
def error_type #:nodoc:
self.class::EXCEPTION_TYPE
end
# Raises an HTTP error if the response is not 2xx (success).
def value
error! unless self.kind_of?(Net::HTTPSuccess)
end
def uri= uri # :nodoc:
@uri = uri.dup if uri
end
#
# header (for backward compatibility only; DO NOT USE)
#
def response #:nodoc:
warn "Net::HTTPResponse#response is obsolete", uplevel: 1 if $VERBOSE
self
end
def header #:nodoc:
warn "Net::HTTPResponse#header is obsolete", uplevel: 1 if $VERBOSE
self
end
def read_header #:nodoc:
warn "Net::HTTPResponse#read_header is obsolete", uplevel: 1 if $VERBOSE
self
end
#
# body
#
def reading_body(sock, reqmethodallowbody) #:nodoc: internal use only
@socket = sock
@body_exist = reqmethodallowbody && self.class.body_permitted?
begin
yield
self.body # ensure to read body
ensure
@socket = nil
end
end
# Gets the entity body returned by the remote HTTP server.
#
# If a block is given, the body is passed to the block, and
# the body is provided in fragments, as it is read in from the socket.
#
# If +dest+ argument is given, response is read into that variable,
# with <code>dest#<<</code> method (it could be String or IO, or any
# other object responding to <code><<</code>).
#
# Calling this method a second or subsequent time for the same
# HTTPResponse object will return the value already read.
#
# http.request_get('/index.html') {|res|
# puts res.read_body
# }
#
# http.request_get('/index.html') {|res|
# p res.read_body.object_id # 538149362
# p res.read_body.object_id # 538149362
# }
#
# # using iterator
# http.request_get('/index.html') {|res|
# res.read_body do |segment|
# print segment
# end
# }
#
def read_body(dest = nil, &block)
if @read
raise IOError, "#{self.class}\#read_body called twice" if dest or block
return @body
end
to = procdest(dest, block)
stream_check
if @body_exist
read_body_0 to
@body = to
else
@body = nil
end
@read = true
case enc = @body_encoding
when Encoding, false, nil
# Encoding: force given encoding
# false/nil: do not force encoding
else
# other value: detect encoding from body
enc = detect_encoding(@body)
end
@body.force_encoding(enc) if enc
@body
end
# Returns the full entity body.
#
# Calling this method a second or subsequent time will return the
# string already read.
#
# http.request_get('/index.html') {|res|
# puts res.body
# }
#
# http.request_get('/index.html') {|res|
# p res.body.object_id # 538149362
# p res.body.object_id # 538149362
# }
#
def body
read_body()
end
# Because it may be necessary to modify the body, Eg, decompression
# this method facilitates that.
def body=(value)
@body = value
end
alias entity body #:nodoc: obsolete
private
# :nodoc:
def detect_encoding(str, encoding=nil)
if encoding
elsif encoding = type_params['charset']
elsif encoding = check_bom(str)
else
encoding = case content_type&.downcase
when %r{text/x(?:ht)?ml|application/(?:[^+]+\+)?xml}
/\A<xml[ \t\r\n]+
version[ \t\r\n]*=[ \t\r\n]*(?:"[0-9.]+"|'[0-9.]*')[ \t\r\n]+
encoding[ \t\r\n]*=[ \t\r\n]*
(?:"([A-Za-z][\-A-Za-z0-9._]*)"|'([A-Za-z][\-A-Za-z0-9._]*)')/x =~ str
encoding = $1 || $2 || Encoding::UTF_8
when %r{text/html.*}
sniff_encoding(str)
end
end
return encoding
end
# :nodoc:
def sniff_encoding(str, encoding=nil)
# the encoding sniffing algorithm
# http://www.w3.org/TR/html5/parsing.html#determining-the-character-encoding
if enc = scanning_meta(str)
enc
# 6. last visited page or something
# 7. frequency
elsif str.ascii_only?
Encoding::US_ASCII
elsif str.dup.force_encoding(Encoding::UTF_8).valid_encoding?
Encoding::UTF_8
end
# 8. implementation-defined or user-specified
end
# :nodoc:
def check_bom(str)
case str.byteslice(0, 2)
when "\xFE\xFF"
return Encoding::UTF_16BE
when "\xFF\xFE"
return Encoding::UTF_16LE
end
if "\xEF\xBB\xBF" == str.byteslice(0, 3)
return Encoding::UTF_8
end
nil
end
# :nodoc:
def scanning_meta(str)
require 'strscan'
ss = StringScanner.new(str)
if ss.scan_until(/<meta[\t\n\f\r ]*/)
attrs = {} # attribute_list
got_pragma = false
need_pragma = nil
charset = nil
# step: Attributes
while attr = get_attribute(ss)
name, value = *attr
next if attrs[name]
attrs[name] = true
case name
when 'http-equiv'
got_pragma = true if value == 'content-type'
when 'content'
encoding = extracting_encodings_from_meta_elements(value)
unless charset
charset = encoding
end
need_pragma = true
when 'charset'
need_pragma = false
charset = value
end
end
# step: Processing
return if need_pragma.nil?
return if need_pragma && !got_pragma
charset = Encoding.find(charset) rescue nil
return unless charset
charset = Encoding::UTF_8 if charset == Encoding::UTF_16
return charset # tentative
end
nil
end
def get_attribute(ss)
ss.scan(/[\t\n\f\r \/]*/)
if ss.peek(1) == '>'
ss.getch
return nil
end
name = ss.scan(/[^=\t\n\f\r \/>]*/)
name.downcase!
raise if name.empty?
ss.skip(/[\t\n\f\r ]*/)
if ss.getch != '='
value = ''
return [name, value]
end
ss.skip(/[\t\n\f\r ]*/)
case ss.peek(1)
when '"'
ss.getch
value = ss.scan(/[^"]+/)
value.downcase!
ss.getch
when "'"
ss.getch
value = ss.scan(/[^']+/)
value.downcase!
ss.getch
when '>'
value = ''
else
value = ss.scan(/[^\t\n\f\r >]+/)
value.downcase!
end
[name, value]
end
def extracting_encodings_from_meta_elements(value)
# http://dev.w3.org/html5/spec/fetching-resources.html#algorithm-for-extracting-an-encoding-from-a-meta-element
if /charset[\t\n\f\r ]*=(?:"([^"]*)"|'([^']*)'|["']|\z|([^\t\n\f\r ;]+))/i =~ value
return $1 || $2 || $3
end
return nil
end
##
# Checks for a supported Content-Encoding header and yields an Inflate
# wrapper for this response's socket when zlib is present. If the
# Content-Encoding is not supported or zlib is missing, the plain socket is
# yielded.
#
# If a Content-Range header is present, a plain socket is yielded as the
# bytes in the range may not be a complete deflate block.
def inflater # :nodoc:
return yield @socket unless Net::HTTP::HAVE_ZLIB
return yield @socket unless @decode_content
return yield @socket if self['content-range']
v = self['content-encoding']
case v&.downcase
when 'deflate', 'gzip', 'x-gzip' then
self.delete 'content-encoding'
inflate_body_io = Inflater.new(@socket)
begin
yield inflate_body_io
success = true
ensure
begin
inflate_body_io.finish
rescue => err
# Ignore #finish's error if there is an exception from yield
raise err if success
end
end
when 'none', 'identity' then
self.delete 'content-encoding'
yield @socket
else
yield @socket
end
end
def read_body_0(dest)
inflater do |inflate_body_io|
if chunked?
read_chunked dest, inflate_body_io
return
end
@socket = inflate_body_io
clen = content_length()
if clen
@socket.read clen, dest, true # ignore EOF
return
end
clen = range_length()
if clen
@socket.read clen, dest
return
end
@socket.read_all dest
end
end
##
# read_chunked reads from +@socket+ for chunk-size, chunk-extension, CRLF,
# etc. and +chunk_data_io+ for chunk-data which may be deflate or gzip
# encoded.
#
# See RFC 2616 section 3.6.1 for definitions
def read_chunked(dest, chunk_data_io) # :nodoc:
total = 0
while true
line = @socket.readline
hexlen = line.slice(/[0-9a-fA-F]+/) or
raise Net::HTTPBadResponse, "wrong chunk size line: #{line}"
len = hexlen.hex
break if len == 0
begin
chunk_data_io.read len, dest
ensure
total += len
@socket.read 2 # \r\n
end
end
until @socket.readline.empty?
# none
end
end
def stream_check
raise IOError, 'attempt to read body out of block' if @socket.closed?
end
def procdest(dest, block)
raise ArgumentError, 'both arg and block given for HTTP method' if
dest and block
if block
Net::ReadAdapter.new(block)
else
dest || ''
end
end
##
# Inflater is a wrapper around Net::BufferedIO that transparently inflates
# zlib and gzip streams.
class Inflater # :nodoc:
##
# Creates a new Inflater wrapping +socket+
def initialize socket
@socket = socket
# zlib with automatic gzip detection
@inflate = Zlib::Inflate.new(32 + Zlib::MAX_WBITS)
end
##
# Finishes the inflate stream.
def finish
return if @inflate.total_in == 0
@inflate.finish
end
##
# Returns a Net::ReadAdapter that inflates each read chunk into +dest+.
#
# This allows a large response body to be inflated without storing the
# entire body in memory.
def inflate_adapter(dest)
if dest.respond_to?(:set_encoding)
dest.set_encoding(Encoding::ASCII_8BIT)
elsif dest.respond_to?(:force_encoding)
dest.force_encoding(Encoding::ASCII_8BIT)
end
block = proc do |compressed_chunk|
@inflate.inflate(compressed_chunk) do |chunk|
compressed_chunk.clear
dest << chunk
end
end
Net::ReadAdapter.new(block)
end
##
# Reads +clen+ bytes from the socket, inflates them, then writes them to
# +dest+. +ignore_eof+ is passed down to Net::BufferedIO#read
#
# Unlike Net::BufferedIO#read, this method returns more than +clen+ bytes.
# At this time there is no way for a user of Net::HTTPResponse to read a
# specific number of bytes from the HTTP response body, so this internal
# API does not return the same number of bytes as were requested.
#
# See https://bugs.ruby-lang.org/issues/6492 for further discussion.
def read clen, dest, ignore_eof = false
temp_dest = inflate_adapter(dest)
@socket.read clen, temp_dest, ignore_eof
end
##
# Reads the rest of the socket, inflates it, then writes it to +dest+.
def read_all dest
temp_dest = inflate_adapter(dest)
@socket.read_all temp_dest
end
end
end
|