Add JSON cookie coder. To be default after 1.6+

author: James Tucker <jftucker@gmail.com> 2013-02-06 14:13:10 -0800
committer: James Tucker <jftucker@gmail.com> 2013-02-07 18:32:49 -0800
commit: 71ce52cc7a7ecbbaa7c3db98ef6455de6c0aaca2 (patch)
tree: 657d1fe15ae5955a737bfd5611d9de3100b406a5
parent: c91f7373f4ed081d258d75cd208da50479148cdd (diff)
download: rack-71ce52cc7a7ecbbaa7c3db98ef6455de6c0aaca2.tar.gz
4 files changed, 635 insertions, 0 deletions
diff --git a/lib/rack.rb b/lib/rack.rb
index 0cfc501d..57119df3 100644
--- a/lib/rack.rb
+++ b/lib/rack.rb
@@ -80,4 +80,8 @@ module Rack
     autoload :Pool, "rack/session/pool"
     autoload :Memcache, "rack/session/memcache"
   end
+
+  module Utils
+    autoload :OkJson, "rack/utils/okjson"
+  end
 end
diff --git a/lib/rack/session/cookie.rb b/lib/rack/session/cookie.rb
index 56469135..f2de5407 100644
--- a/lib/rack/session/cookie.rb
+++ b/lib/rack/session/cookie.rb
@@ -65,6 +65,19 @@ module Rack
             ::Marshal.load(super(str)) rescue nil
           end
         end
+
+        # N.B. Unlike other encoding methods, the contained objects must be a
+        # valid JSON composite type, either a Hash or an Array.
+        class JSON < Base64
+          def encode(obj)
+            super(::Rack::Utils::OkJson.encode(obj))
+          end
+
+          def decode(str)
+            return unless str
+            ::Rack::Utils::OkJson.decode(super(str)) rescue nil
+          end
+        end
       end
 
       # Use no encoding for session cookies
diff --git a/lib/rack/utils/okjson.rb b/lib/rack/utils/okjson.rb
new file mode 100644
index 00000000..dcf80e55
--- /dev/null
+++ b/lib/rack/utils/okjson.rb
@@ -0,0 +1,599 @@
+# encoding: UTF-8
+#
+# Copyright 2011, 2012 Keith Rarick
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+# See https://github.com/kr/okjson for updates.
+# Imported from the above repo @ d4e8643ad92e14b37d11326855499c7e4108ed17
+# Namespace modified for vendoring under Rack::Utils
+
+require 'stringio'
+
+# Some parts adapted from
+# http://golang.org/src/pkg/json/decode.go and
+# http://golang.org/src/pkg/utf8/utf8.go
+module Rack::Utils::OkJson
+  Upstream = 'LTD7LBKLZWFF7OZK'
+  extend self
+
+
+  # Decodes a json document in string s and
+  # returns the corresponding ruby value.
+  # String s must be valid UTF-8. If you have
+  # a string in some other encoding, convert
+  # it first.
+  #
+  # String values in the resulting structure
+  # will be UTF-8.
+  def decode(s)
+    ts = lex(s)
+    v, ts = textparse(ts)
+    if ts.length > 0
+      raise Error, 'trailing garbage'
+    end
+    v
+  end
+
+
+  # Parses a "json text" in the sense of RFC 4627.
+  # Returns the parsed value and any trailing tokens.
+  # Note: this is almost the same as valparse,
+  # except that it does not accept atomic values.
+  def textparse(ts)
+    if ts.length < 0
+      raise Error, 'empty'
+    end
+
+    typ, _, val = ts[0]
+    case typ
+    when '{' then objparse(ts)
+    when '[' then arrparse(ts)
+    else
+      raise Error, "unexpected #{val.inspect}"
+    end
+  end
+
+
+  # Parses a "value" in the sense of RFC 4627.
+  # Returns the parsed value and any trailing tokens.
+  def valparse(ts)
+    if ts.length < 0
+      raise Error, 'empty'
+    end
+
+    typ, _, val = ts[0]
+    case typ
+    when '{' then objparse(ts)
+    when '[' then arrparse(ts)
+    when :val,:str then [val, ts[1..-1]]
+    else
+      raise Error, "unexpected #{val.inspect}"
+    end
+  end
+
+
+  # Parses an "object" in the sense of RFC 4627.
+  # Returns the parsed value and any trailing tokens.
+  def objparse(ts)
+    ts = eat('{', ts)
+    obj = {}
+
+    if ts[0][0] == '}'
+      return obj, ts[1..-1]
+    end
+
+    k, v, ts = pairparse(ts)
+    obj[k] = v
+
+    if ts[0][0] == '}'
+      return obj, ts[1..-1]
+    end
+
+    loop do
+      ts = eat(',', ts)
+
+      k, v, ts = pairparse(ts)
+      obj[k] = v
+
+      if ts[0][0] == '}'
+        return obj, ts[1..-1]
+      end
+    end
+  end
+
+
+  # Parses a "member" in the sense of RFC 4627.
+  # Returns the parsed values and any trailing tokens.
+  def pairparse(ts)
+    (typ, _, k), ts = ts[0], ts[1..-1]
+    if typ != :str
+      raise Error, "unexpected #{k.inspect}"
+    end
+    ts = eat(':', ts)
+    v, ts = valparse(ts)
+    [k, v, ts]
+  end
+
+
+  # Parses an "array" in the sense of RFC 4627.
+  # Returns the parsed value and any trailing tokens.
+  def arrparse(ts)
+    ts = eat('[', ts)
+    arr = []
+
+    if ts[0][0] == ']'
+      return arr, ts[1..-1]
+    end
+
+    v, ts = valparse(ts)
+    arr << v
+
+    if ts[0][0] == ']'
+      return arr, ts[1..-1]
+    end
+
+    loop do
+      ts = eat(',', ts)
+
+      v, ts = valparse(ts)
+      arr << v
+
+      if ts[0][0] == ']'
+        return arr, ts[1..-1]
+      end
+    end
+  end
+
+
+  def eat(typ, ts)
+    if ts[0][0] != typ
+      raise Error, "expected #{typ} (got #{ts[0].inspect})"
+    end
+    ts[1..-1]
+  end
+
+
+  # Scans s and returns a list of json tokens,
+  # excluding white space (as defined in RFC 4627).
+  def lex(s)
+    ts = []
+    while s.length > 0
+      typ, lexeme, val = tok(s)
+      if typ == nil
+        raise Error, "invalid character at #{s[0,10].inspect}"
+      end
+      if typ != :space
+        ts << [typ, lexeme, val]
+      end
+      s = s[lexeme.length..-1]
+    end
+    ts
+  end
+
+
+  # Scans the first token in s and
+  # returns a 3-element list, or nil
+  # if s does not begin with a valid token.
+  #
+  # The first list element is one of
+  # '{', '}', ':', ',', '[', ']',
+  # :val, :str, and :space.
+  #
+  # The second element is the lexeme.
+  #
+  # The third element is the value of the
+  # token for :val and :str, otherwise
+  # it is the lexeme.
+  def tok(s)
+    case s[0]
+    when ?{  then ['{', s[0,1], s[0,1]]
+    when ?}  then ['}', s[0,1], s[0,1]]
+    when ?:  then [':', s[0,1], s[0,1]]
+    when ?,  then [',', s[0,1], s[0,1]]
+    when ?[  then ['[', s[0,1], s[0,1]]
+    when ?]  then [']', s[0,1], s[0,1]]
+    when ?n  then nulltok(s)
+    when ?t  then truetok(s)
+    when ?f  then falsetok(s)
+    when ?"  then strtok(s)
+    when Spc then [:space, s[0,1], s[0,1]]
+    when ?\t then [:space, s[0,1], s[0,1]]
+    when ?\n then [:space, s[0,1], s[0,1]]
+    when ?\r then [:space, s[0,1], s[0,1]]
+    else          numtok(s)
+    end
+  end
+
+
+  def nulltok(s);  s[0,4] == 'null'  ? [:val, 'null',  nil]   : [] end
+  def truetok(s);  s[0,4] == 'true'  ? [:val, 'true',  true]  : [] end
+  def falsetok(s); s[0,5] == 'false' ? [:val, 'false', false] : [] end
+
+
+  def numtok(s)
+    m = /-?([1-9][0-9]+|[0-9])([.][0-9]+)?([eE][+-]?[0-9]+)?/.match(s)
+    if m && m.begin(0) == 0
+      if m[3] && !m[2]
+        [:val, m[0], Integer(m[1])*(10**Integer(m[3][1..-1]))]
+      elsif m[2]
+        [:val, m[0], Float(m[0])]
+      else
+        [:val, m[0], Integer(m[0])]
+      end
+    else
+      []
+    end
+  end
+
+
+  def strtok(s)
+    m = /"([^"\\]|\\["\/\\bfnrt]|\\u[0-9a-fA-F]{4})*"/.match(s)
+    if ! m
+      raise Error, "invalid string literal at #{abbrev(s)}"
+    end
+    [:str, m[0], unquote(m[0])]
+  end
+
+
+  def abbrev(s)
+    t = s[0,10]
+    p = t['`']
+    t = t[0,p] if p
+    t = t + '...' if t.length < s.length
+    '`' + t + '`'
+  end
+
+
+  # Converts a quoted json string literal q into a UTF-8-encoded string.
+  # The rules are different than for Ruby, so we cannot use eval.
+  # Unquote will raise an error if q contains control characters.
+  def unquote(q)
+    q = q[1...-1]
+    a = q.dup # allocate a big enough string
+    rubydoesenc = false
+    # In ruby >= 1.9, a[w] is a codepoint, not a byte.
+    if a.class.method_defined?(:force_encoding)
+      a.force_encoding('UTF-8')
+      rubydoesenc = true
+    end
+    r, w = 0, 0
+    while r < q.length
+      c = q[r]
+      case true
+      when c == ?\\
+        r += 1
+        if r >= q.length
+          raise Error, "string literal ends with a \"\\\": \"#{q}\""
+        end
+
+        case q[r]
+        when ?",?\\,?/,?'
+          a[w] = q[r]
+          r += 1
+          w += 1
+        when ?b,?f,?n,?r,?t
+          a[w] = Unesc[q[r]]
+          r += 1
+          w += 1
+        when ?u
+          r += 1
+          uchar = begin
+            hexdec4(q[r,4])
+          rescue RuntimeError => e
+            raise Error, "invalid escape sequence \\u#{q[r,4]}: #{e}"
+          end
+          r += 4
+          if surrogate? uchar
+            if q.length >= r+6
+              uchar1 = hexdec4(q[r+2,4])
+              uchar = subst(uchar, uchar1)
+              if uchar != Ucharerr
+                # A valid pair; consume.
+                r += 6
+              end
+            end
+          end
+          if rubydoesenc
+            a[w] = '' << uchar
+            w += 1
+          else
+            w += ucharenc(a, w, uchar)
+          end
+        else
+          raise Error, "invalid escape char #{q[r]} in \"#{q}\""
+        end
+      when c == ?", c < Spc
+        raise Error, "invalid character in string literal \"#{q}\""
+      else
+        # Copy anything else byte-for-byte.
+        # Valid UTF-8 will remain valid UTF-8.
+        # Invalid UTF-8 will remain invalid UTF-8.
+        # In ruby >= 1.9, c is a codepoint, not a byte,
+        # in which case this is still what we want.
+        a[w] = c
+        r += 1
+        w += 1
+      end
+    end
+    a[0,w]
+  end
+
+
+  # Encodes unicode character u as UTF-8
+  # bytes in string a at position i.
+  # Returns the number of bytes written.
+  def ucharenc(a, i, u)
+    case true
+    when u <= Uchar1max
+      a[i] = (u & 0xff).chr
+      1
+    when u <= Uchar2max
+      a[i+0] = (Utag2 | ((u>>6)&0xff)).chr
+      a[i+1] = (Utagx | (u&Umaskx)).chr
+      2
+    when u <= Uchar3max
+      a[i+0] = (Utag3 | ((u>>12)&0xff)).chr
+      a[i+1] = (Utagx | ((u>>6)&Umaskx)).chr
+      a[i+2] = (Utagx | (u&Umaskx)).chr
+      3
+    else
+      a[i+0] = (Utag4 | ((u>>18)&0xff)).chr
+      a[i+1] = (Utagx | ((u>>12)&Umaskx)).chr
+      a[i+2] = (Utagx | ((u>>6)&Umaskx)).chr
+      a[i+3] = (Utagx | (u&Umaskx)).chr
+      4
+    end
+  end
+
+
+  def hexdec4(s)
+    if s.length != 4
+      raise Error, 'short'
+    end
+    (nibble(s[0])<<12) | (nibble(s[1])<<8) | (nibble(s[2])<<4) | nibble(s[3])
+  end
+
+
+  def subst(u1, u2)
+    if Usurr1 <= u1 && u1 < Usurr2 && Usurr2 <= u2 && u2 < Usurr3
+      return ((u1-Usurr1)<<10) | (u2-Usurr2) + Usurrself
+    end
+    return Ucharerr
+  end
+
+
+  def surrogate?(u)
+    Usurr1 <= u && u < Usurr3
+  end
+
+
+  def nibble(c)
+    case true
+    when ?0 <= c && c <= ?9 then c.ord - ?0.ord
+    when ?a <= c && c <= ?z then c.ord - ?a.ord + 10
+    when ?A <= c && c <= ?Z then c.ord - ?A.ord + 10
+    else
+      raise Error, "invalid hex code #{c}"
+    end
+  end
+
+
+  # Encodes x into a json text. It may contain only
+  # Array, Hash, String, Numeric, true, false, nil.
+  # (Note, this list excludes Symbol.)
+  # X itself must be an Array or a Hash.
+  # No other value can be encoded, and an error will
+  # be raised if x contains any other value, such as
+  # Nan, Infinity, Symbol, and Proc, or if a Hash key
+  # is not a String.
+  # Strings contained in x must be valid UTF-8.
+  def encode(x)
+    case x
+    when Hash    then objenc(x)
+    when Array   then arrenc(x)
+    else
+      raise Error, 'root value must be an Array or a Hash'
+    end
+  end
+
+
+  def valenc(x)
+    case x
+    when Hash    then objenc(x)
+    when Array   then arrenc(x)
+    when String  then strenc(x)
+    when Numeric then numenc(x)
+    when true    then "true"
+    when false   then "false"
+    when nil     then "null"
+    else
+      raise Error, "cannot encode #{x.class}: #{x.inspect}"
+    end
+  end
+
+
+  def objenc(x)
+    '{' + x.map{|k,v| keyenc(k) + ':' + valenc(v)}.join(',') + '}'
+  end
+
+
+  def arrenc(a)
+    '[' + a.map{|x| valenc(x)}.join(',') + ']'
+  end
+
+
+  def keyenc(k)
+    case k
+    when String then strenc(k)
+    else
+      raise Error, "Hash key is not a string: #{k.inspect}"
+    end
+  end
+
+
+  def strenc(s)
+    t = StringIO.new
+    t.putc(?")
+    r = 0
+
+    # In ruby >= 1.9, s[r] is a codepoint, not a byte.
+    rubydoesenc = s.class.method_defined?(:encoding)
+
+    while r < s.length
+      case s[r]
+      when ?"  then t.print('\\"')
+      when ?\\ then t.print('\\\\')
+      when ?\b then t.print('\\b')
+      when ?\f then t.print('\\f')
+      when ?\n then t.print('\\n')
+      when ?\r then t.print('\\r')
+      when ?\t then t.print('\\t')
+      else
+        c = s[r]
+        case true
+        when rubydoesenc
+          begin
+            c.ord # will raise an error if c is invalid UTF-8
+            t.write(c)
+          rescue
+            t.write(Ustrerr)
+          end
+        when Spc <= c && c <= ?~
+          t.putc(c)
+        else
+          n = ucharcopy(t, s, r) # ensure valid UTF-8 output
+          r += n - 1 # r is incremented below
+        end
+      end
+      r += 1
+    end
+    t.putc(?")
+    t.string
+  end
+
+
+  def numenc(x)
+    if ((x.nan? || x.infinite?) rescue false)
+      raise Error, "Numeric cannot be represented: #{x}"
+    end
+    "#{x}"
+  end
+
+
+  # Copies the valid UTF-8 bytes of a single character
+  # from string s at position i to I/O object t, and
+  # returns the number of bytes copied.
+  # If no valid UTF-8 char exists at position i,
+  # ucharcopy writes Ustrerr and returns 1.
+  def ucharcopy(t, s, i)
+    n = s.length - i
+    raise Utf8Error if n < 1
+
+    c0 = s[i].ord
+
+    # 1-byte, 7-bit sequence?
+    if c0 < Utagx
+      t.putc(c0)
+      return 1
+    end
+
+    raise Utf8Error if c0 < Utag2 # unexpected continuation byte?
+
+    raise Utf8Error if n < 2 # need continuation byte
+    c1 = s[i+1].ord
+    raise Utf8Error if c1 < Utagx || Utag2 <= c1
+
+    # 2-byte, 11-bit sequence?
+    if c0 < Utag3
+      raise Utf8Error if ((c0&Umask2)<<6 | (c1&Umaskx)) <= Uchar1max
+      t.putc(c0)
+      t.putc(c1)
+      return 2
+    end
+
+    # need second continuation byte
+    raise Utf8Error if n < 3
+
+    c2 = s[i+2].ord
+    raise Utf8Error if c2 < Utagx || Utag2 <= c2
+
+    # 3-byte, 16-bit sequence?
+    if c0 < Utag4
+      u = (c0&Umask3)<<12 | (c1&Umaskx)<<6 | (c2&Umaskx)
+      raise Utf8Error if u <= Uchar2max
+      t.putc(c0)
+      t.putc(c1)
+      t.putc(c2)
+      return 3
+    end
+
+    # need third continuation byte
+    raise Utf8Error if n < 4
+    c3 = s[i+3].ord
+    raise Utf8Error if c3 < Utagx || Utag2 <= c3
+
+    # 4-byte, 21-bit sequence?
+    if c0 < Utag5
+      u = (c0&Umask4)<<18 | (c1&Umaskx)<<12 | (c2&Umaskx)<<6 | (c3&Umaskx)
+      raise Utf8Error if u <= Uchar3max
+      t.putc(c0)
+      t.putc(c1)
+      t.putc(c2)
+      t.putc(c3)
+      return 4
+    end
+
+    raise Utf8Error
+  rescue Utf8Error
+    t.write(Ustrerr)
+    return 1
+  end
+
+
+  class Utf8Error < ::StandardError
+  end
+
+
+  class Error < ::StandardError
+  end
+
+
+  Utagx = 0x80 # 1000 0000
+  Utag2 = 0xc0 # 1100 0000
+  Utag3 = 0xe0 # 1110 0000
+  Utag4 = 0xf0 # 1111 0000
+  Utag5 = 0xF8 # 1111 1000
+  Umaskx = 0x3f # 0011 1111
+  Umask2 = 0x1f # 0001 1111
+  Umask3 = 0x0f # 0000 1111
+  Umask4 = 0x07 # 0000 0111
+  Uchar1max = (1<<7) - 1
+  Uchar2max = (1<<11) - 1
+  Uchar3max = (1<<16) - 1
+  Ucharerr = 0xFFFD # unicode "replacement char"
+  Ustrerr = "\xef\xbf\xbd" # unicode "replacement char"
+  Usurrself = 0x10000
+  Usurr1 = 0xd800
+  Usurr2 = 0xdc00
+  Usurr3 = 0xe000
+
+  Spc = ' '[0]
+  Unesc = {?b=>?\b, ?f=>?\f, ?n=>?\n, ?r=>?\r, ?t=>?\t}
+end
diff --git a/test/spec_session_cookie.rb b/test/spec_session_cookie.rb
index e325584d..8256f762 100644
--- a/test/spec_session_cookie.rb
+++ b/test/spec_session_cookie.rb
@@ -100,6 +100,25 @@ describe Rack::Session::Cookie do
         coder.decode('lulz').should.equal nil
       end
     end
+
+    describe 'JSON' do
+      it 'marshals and base64 encodes' do
+        coder = Rack::Session::Cookie::Base64::JSON.new
+        obj   = %w[fuuuuu]
+        coder.encode(obj).should.equal [::Rack::Utils::OkJson.encode(obj)].pack('m')
+      end
+
+      it 'marshals and base64 decodes' do
+        coder = Rack::Session::Cookie::Base64::JSON.new
+        str   = [::Rack::Utils::OkJson.encode(%w[fuuuuu])].pack('m')
+        coder.decode(str).should.equal ::Rack::Utils::OkJson.decode(str.unpack('m').first)
+      end
+
+      it 'rescues failures on decode' do
+        coder = Rack::Session::Cookie::Base64::JSON.new
+        coder.decode('lulz').should.equal nil
+      end
+    end
   end
 
   it "warns if no secret is given" do
author	James Tucker <jftucker@gmail.com>	2013-02-06 14:13:10 -0800
committer	James Tucker <jftucker@gmail.com>	2013-02-07 18:32:49 -0800
commit	71ce52cc7a7ecbbaa7c3db98ef6455de6c0aaca2 (patch)
tree	657d1fe15ae5955a737bfd5611d9de3100b406a5
parent	c91f7373f4ed081d258d75cd208da50479148cdd (diff)
download	rack-71ce52cc7a7ecbbaa7c3db98ef6455de6c0aaca2.tar.gz