summaryrefslogtreecommitdiff
path: root/simplejson/encoder.py
diff options
context:
space:
mode:
authorNick Babcock <nbabcock19@hotmail.com>2015-07-10 17:25:50 -0400
committerNick Babcock <nbabcock19@hotmail.com>2015-07-10 17:25:50 -0400
commita9e5e7256c9fb27126813c9430be1dab5f27f7ba (patch)
tree488eb7357fd5e781395161b7aa6be5aabea6a8ed /simplejson/encoder.py
parent49724ee06f038d27ab3d4adbe4ed403692799aa1 (diff)
parent0bcdf20cc525c1343b796cb8f247ea5213c6557e (diff)
downloadsimplejson-a9e5e7256c9fb27126813c9430be1dab5f27f7ba.tar.gz
Merge branch 'master' into iterable_as_array-gh1
Conflicts: CHANGES.txt conf.py index.rst setup.py simplejson/__init__.py simplejson/_speedups.c simplejson/encoder.py simplejson/tests/test_tuple.py
Diffstat (limited to 'simplejson/encoder.py')
-rw-r--r--simplejson/encoder.py373
1 files changed, 248 insertions, 125 deletions
diff --git a/simplejson/encoder.py b/simplejson/encoder.py
index 75ba993..d771bb4 100644
--- a/simplejson/encoder.py
+++ b/simplejson/encoder.py
@@ -1,11 +1,14 @@
"""Implementation of JSONEncoder
"""
+from __future__ import absolute_import
import re
-from decimal import Decimal
-
+from operator import itemgetter
+# Do not import Decimal directly to avoid reload issues
+import decimal
+from .compat import u, unichr, binary_type, string_types, integer_types, PY3
def _import_speedups():
try:
- from simplejson import _speedups
+ from . import _speedups
return _speedups.encode_basestring_ascii, _speedups.make_encoder
except ImportError:
return None, None
@@ -13,7 +16,10 @@ c_encode_basestring_ascii, c_make_encoder = _import_speedups()
from simplejson.decoder import PosInf
-ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
+#ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
+# This is required because u() will mangle the string and ur'' isn't valid
+# python3 syntax
+ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]')
ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
HAS_UTF8 = re.compile(r'[\x80-\xff]')
ESCAPE_DCT = {
@@ -24,32 +30,40 @@ ESCAPE_DCT = {
'\n': '\\n',
'\r': '\\r',
'\t': '\\t',
- u'\u2028': '\\u2028',
- u'\u2029': '\\u2029',
}
for i in range(0x20):
#ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
+for i in [0x2028, 0x2029]:
+ ESCAPE_DCT.setdefault(unichr(i), '\\u%04x' % (i,))
FLOAT_REPR = repr
-def encode_basestring(s):
+def encode_basestring(s, _PY3=PY3, _q=u('"')):
"""Return a JSON representation of a Python string
"""
- if isinstance(s, str) and HAS_UTF8.search(s) is not None:
- s = s.decode('utf-8')
+ if _PY3:
+ if isinstance(s, binary_type):
+ s = s.decode('utf-8')
+ else:
+ if isinstance(s, str) and HAS_UTF8.search(s) is not None:
+ s = s.decode('utf-8')
def replace(match):
return ESCAPE_DCT[match.group(0)]
- return u'"' + ESCAPE.sub(replace, s) + u'"'
+ return _q + ESCAPE.sub(replace, s) + _q
-def py_encode_basestring_ascii(s):
+def py_encode_basestring_ascii(s, _PY3=PY3):
"""Return an ASCII-only JSON representation of a Python string
"""
- if isinstance(s, str) and HAS_UTF8.search(s) is not None:
- s = s.decode('utf-8')
+ if _PY3:
+ if isinstance(s, binary_type):
+ s = s.decode('utf-8')
+ else:
+ if isinstance(s, str) and HAS_UTF8.search(s) is not None:
+ s = s.decode('utf-8')
def replace(match):
s = match.group(0)
try:
@@ -103,11 +117,14 @@ class JSONEncoder(object):
"""
item_separator = ', '
key_separator = ': '
+
def __init__(self, skipkeys=False, ensure_ascii=True,
- check_circular=True, allow_nan=True, sort_keys=False,
- indent=None, separators=None, encoding='utf-8', default=None,
- use_decimal=True, namedtuple_as_object=True,
- tuple_as_array=True, iterable_as_array=False):
+ check_circular=True, allow_nan=True, sort_keys=False,
+ indent=None, separators=None, encoding='utf-8', default=None,
+ use_decimal=True, namedtuple_as_object=True,
+ tuple_as_array=True, bigint_as_string=False,
+ item_sort_key=None, for_json=False, ignore_nan=False,
+ int_as_string_bitcount=None, iterable_as_array=False):
"""Constructor for JSONEncoder, with sensible defaults.
If skipkeys is false, then it is a TypeError to attempt
@@ -139,9 +156,10 @@ class JSONEncoder(object):
versions of simplejson earlier than 2.1.0, an integer is also accepted
and is converted to a string with that many spaces.
- If specified, separators should be a (item_separator, key_separator)
- tuple. The default is (', ', ': '). To get the most compact JSON
- representation you should specify (',', ':') to eliminate whitespace.
+ If specified, separators should be an (item_separator, key_separator)
+ tuple. The default is (', ', ': ') if *indent* is ``None`` and
+ (',', ': ') otherwise. To get the most compact JSON representation,
+ you should specify (',', ':') to eliminate whitespace.
If specified, default is a function that gets called for objects
that can't otherwise be serialized. It should return a JSON encodable
@@ -155,7 +173,7 @@ class JSONEncoder(object):
be supported directly by the encoder. For the inverse, decode JSON
with ``parse_float=decimal.Decimal``.
- If namedtuple_as_object is true (the default), tuple subclasses with
+ If namedtuple_as_object is true (the default), objects with
``_asdict()`` methods will be encoded as JSON objects.
If tuple_as_array is true (the default), tuple (and subclasses) will
@@ -165,6 +183,27 @@ class JSONEncoder(object):
any object not in the above table that implements ``__iter__()``
will be encoded as a JSON array.
+ If bigint_as_string is true (not the default), ints 2**53 and higher
+ or lower than -2**53 will be encoded as strings. This is to avoid the
+ rounding that happens in Javascript otherwise.
+
+ If int_as_string_bitcount is a positive number (n), then int of size
+ greater than or equal to 2**n or lower than or equal to -2**n will be
+ encoded as strings.
+
+ If specified, item_sort_key is a callable used to sort the items in
+ each dictionary. This is useful if you want to sort items other than
+ in alphabetical order by key.
+
+ If for_json is true (not the default), objects with a ``for_json()``
+ method will use the return value of that method for encoding as JSON
+ instead of the object.
+
+ If *ignore_nan* is true (default: ``False``), then out of range
+ :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized
+ as ``null`` in compliance with the ECMA-262 specification. If true,
+ this will override *allow_nan*.
+
"""
self.skipkeys = skipkeys
@@ -176,8 +215,13 @@ class JSONEncoder(object):
self.namedtuple_as_object = namedtuple_as_object
self.tuple_as_array = tuple_as_array
self.iterable_as_array = iterable_as_array
- if isinstance(indent, (int, long)):
- indent = ' ' * indent
+ self.bigint_as_string = bigint_as_string
+ self.item_sort_key = item_sort_key
+ self.for_json = for_json
+ self.ignore_nan = ignore_nan
+ self.int_as_string_bitcount = int_as_string_bitcount
+ if indent is not None and not isinstance(indent, string_types):
+ indent = indent * ' '
self.indent = indent
if separators is not None:
self.item_separator, self.key_separator = separators
@@ -216,12 +260,11 @@ class JSONEncoder(object):
"""
# This is for extremely simple cases and benchmarks.
- if isinstance(o, basestring):
- if isinstance(o, str):
- _encoding = self.encoding
- if (_encoding is not None
- and not (_encoding == 'utf-8')):
- o = o.decode(_encoding)
+ if isinstance(o, binary_type):
+ _encoding = self.encoding
+ if (_encoding is not None and not (_encoding == 'utf-8')):
+ o = o.decode(_encoding)
+ if isinstance(o, string_types):
if self.ensure_ascii:
return encode_basestring_ascii(o)
else:
@@ -257,11 +300,11 @@ class JSONEncoder(object):
_encoder = encode_basestring
if self.encoding != 'utf-8':
def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
- if isinstance(o, str):
+ if isinstance(o, binary_type):
o = o.decode(_encoding)
return _orig_encoder(o)
- def floatstr(o, allow_nan=self.allow_nan,
+ def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan,
_repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
# Check for specials. Note that this type of test is processor
# and/or platform-specific, so do tests which don't depend on
@@ -274,17 +317,23 @@ class JSONEncoder(object):
elif o == _neginf:
text = '-Infinity'
else:
+ if type(o) != float:
+ # See #118, do not trust custom str/repr
+ o = float(o)
return _repr(o)
- if not allow_nan:
+ if ignore_nan:
+ text = 'null'
+ elif not allow_nan:
raise ValueError(
"Out of range float values are not JSON compliant: " +
repr(o))
return text
-
key_memo = {}
+ int_as_string_bitcount = (
+ 53 if self.bigint_as_string else self.int_as_string_bitcount)
if (_one_shot and c_make_encoder is not None
and self.indent is None):
_iterencode = c_make_encoder(
@@ -292,14 +341,18 @@ class JSONEncoder(object):
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, self.allow_nan, key_memo, self.use_decimal,
self.namedtuple_as_object, self.tuple_as_array,
- self.iterable_as_array)
+ int_as_string_bitcount,
+ self.item_sort_key, self.encoding, self.for_json,
+ self.ignore_nan, decimal.Decimal, self.iterable_as_array)
else:
_iterencode = _make_iterencode(
markers, self.default, _encoder, self.indent, floatstr,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, _one_shot, self.use_decimal,
self.namedtuple_as_object, self.tuple_as_array,
- self.iterable_as_array)
+ int_as_string_bitcount,
+ self.item_sort_key, self.encoding, self.for_json,
+ self.iterable_as_array, Decimal=decimal.Decimal)
try:
return _iterencode(o, 0)
finally:
@@ -336,24 +389,53 @@ class JSONEncoderForHTML(JSONEncoder):
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
_use_decimal, _namedtuple_as_object, _tuple_as_array,
+ _int_as_string_bitcount, _item_sort_key,
+ _encoding,_for_json,
_iterable_as_array,
## HACK: hand-optimized bytecode; turn globals into locals
- False=False,
- True=True,
+ _PY3=PY3,
ValueError=ValueError,
- basestring=basestring,
- Decimal=Decimal,
+ string_types=string_types,
+ Decimal=None,
dict=dict,
float=float,
id=id,
- int=int,
+ integer_types=integer_types,
isinstance=isinstance,
list=list,
- long=long,
str=str,
tuple=tuple,
iter=iter,
):
+ if _use_decimal and Decimal is None:
+ Decimal = decimal.Decimal
+ if _item_sort_key and not callable(_item_sort_key):
+ raise TypeError("item_sort_key must be None or callable")
+ elif _sort_keys and not _item_sort_key:
+ _item_sort_key = itemgetter(0)
+
+ if (_int_as_string_bitcount is not None and
+ (_int_as_string_bitcount <= 0 or
+ not isinstance(_int_as_string_bitcount, integer_types))):
+ raise TypeError("int_as_string_bitcount must be a positive integer")
+
+ def _encode_int(value):
+ skip_quoting = (
+ _int_as_string_bitcount is None
+ or
+ _int_as_string_bitcount < 1
+ )
+ if type(value) not in integer_types:
+ # See #118, do not trust custom str/repr
+ value = int(value)
+ if (
+ skip_quoting or
+ (-1 << _int_as_string_bitcount)
+ < value <
+ (1 << _int_as_string_bitcount)
+ ):
+ return str(value)
+ return '"' + str(value) + '"'
def _iterencode_list(lst, _current_indent_level):
if not lst:
@@ -379,7 +461,8 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
first = False
else:
buf = separator
- if isinstance(value, basestring):
+ if (isinstance(value, string_types) or
+ (_PY3 and isinstance(value, binary_type))):
yield buf + _encoder(value)
elif value is None:
yield buf + 'null'
@@ -387,26 +470,30 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
yield buf + 'true'
elif value is False:
yield buf + 'false'
- elif isinstance(value, (int, long)):
- yield buf + str(value)
+ elif isinstance(value, integer_types):
+ yield buf + _encode_int(value)
elif isinstance(value, float):
yield buf + _floatstr(value)
elif _use_decimal and isinstance(value, Decimal):
yield buf + str(value)
else:
yield buf
- if isinstance(value, list):
- chunks = _iterencode_list(value, _current_indent_level)
- elif (_namedtuple_as_object and isinstance(value, tuple) and
- hasattr(value, '_asdict')):
- chunks = _iterencode_dict(value._asdict(),
- _current_indent_level)
- elif _tuple_as_array and isinstance(value, tuple):
+ for_json = _for_json and getattr(value, 'for_json', None)
+ if for_json and callable(for_json):
+ chunks = _iterencode(for_json(), _current_indent_level)
+ elif isinstance(value, list):
chunks = _iterencode_list(value, _current_indent_level)
- elif isinstance(value, dict):
- chunks = _iterencode_dict(value, _current_indent_level)
else:
- chunks = _iterencode(value, _current_indent_level)
+ _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
+ if _asdict and callable(_asdict):
+ chunks = _iterencode_dict(_asdict(),
+ _current_indent_level)
+ elif _tuple_as_array and isinstance(value, tuple):
+ chunks = _iterencode_list(value, _current_indent_level)
+ elif isinstance(value, dict):
+ chunks = _iterencode_dict(value, _current_indent_level)
+ else:
+ chunks = _iterencode(value, _current_indent_level)
for chunk in chunks:
yield chunk
if newline_indent is not None:
@@ -416,6 +503,32 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
if markers is not None:
del markers[markerid]
+ def _stringify_key(key):
+ if isinstance(key, string_types): # pragma: no cover
+ pass
+ elif isinstance(key, binary_type):
+ key = key.decode(_encoding)
+ elif isinstance(key, float):
+ key = _floatstr(key)
+ elif key is True:
+ key = 'true'
+ elif key is False:
+ key = 'false'
+ elif key is None:
+ key = 'null'
+ elif isinstance(key, integer_types):
+ if type(key) not in integer_types:
+ # See #118, do not trust custom str/repr
+ key = int(key)
+ key = str(key)
+ elif _use_decimal and isinstance(key, Decimal):
+ key = str(key)
+ elif _skipkeys:
+ key = None
+ else:
+ raise TypeError("key " + repr(key) + " is not a string")
+ return key
+
def _iterencode_dict(dct, _current_indent_level):
if not dct:
yield '{}'
@@ -435,37 +548,35 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
newline_indent = None
item_separator = _item_separator
first = True
- if _sort_keys:
- items = dct.items()
- items.sort(key=lambda kv: kv[0])
+ if _PY3:
+ iteritems = dct.items()
+ else:
+ iteritems = dct.iteritems()
+ if _item_sort_key:
+ items = []
+ for k, v in dct.items():
+ if not isinstance(k, string_types):
+ k = _stringify_key(k)
+ if k is None:
+ continue
+ items.append((k, v))
+ items.sort(key=_item_sort_key)
else:
- items = dct.iteritems()
+ items = iteritems
for key, value in items:
- if isinstance(key, basestring):
- pass
- # JavaScript is weakly typed for these, so it makes sense to
- # also allow them. Many encoders seem to do something like this.
- elif isinstance(key, float):
- key = _floatstr(key)
- elif key is True:
- key = 'true'
- elif key is False:
- key = 'false'
- elif key is None:
- key = 'null'
- elif isinstance(key, (int, long)):
- key = str(key)
- elif _skipkeys:
- continue
- else:
- raise TypeError("key " + repr(key) + " is not a string")
+ if not (_item_sort_key or isinstance(key, string_types)):
+ key = _stringify_key(key)
+ if key is None:
+ # _skipkeys must be True
+ continue
if first:
first = False
else:
yield item_separator
yield _encoder(key)
yield _key_separator
- if isinstance(value, basestring):
+ if (isinstance(value, string_types) or
+ (_PY3 and isinstance(value, binary_type))):
yield _encoder(value)
elif value is None:
yield 'null'
@@ -473,25 +584,29 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
yield 'true'
elif value is False:
yield 'false'
- elif isinstance(value, (int, long)):
- yield str(value)
+ elif isinstance(value, integer_types):
+ yield _encode_int(value)
elif isinstance(value, float):
yield _floatstr(value)
elif _use_decimal and isinstance(value, Decimal):
yield str(value)
else:
- if isinstance(value, list):
- chunks = _iterencode_list(value, _current_indent_level)
- elif (_namedtuple_as_object and isinstance(value, tuple) and
- hasattr(value, '_asdict')):
- chunks = _iterencode_dict(value._asdict(),
- _current_indent_level)
- elif _tuple_as_array and isinstance(value, tuple):
+ for_json = _for_json and getattr(value, 'for_json', None)
+ if for_json and callable(for_json):
+ chunks = _iterencode(for_json(), _current_indent_level)
+ elif isinstance(value, list):
chunks = _iterencode_list(value, _current_indent_level)
- elif isinstance(value, dict):
- chunks = _iterencode_dict(value, _current_indent_level)
else:
- chunks = _iterencode(value, _current_indent_level)
+ _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
+ if _asdict and callable(_asdict):
+ chunks = _iterencode_dict(_asdict(),
+ _current_indent_level)
+ elif _tuple_as_array and isinstance(value, tuple):
+ chunks = _iterencode_list(value, _current_indent_level)
+ elif isinstance(value, dict):
+ chunks = _iterencode_dict(value, _current_indent_level)
+ else:
+ chunks = _iterencode(value, _current_indent_level)
for chunk in chunks:
yield chunk
if newline_indent is not None:
@@ -502,7 +617,8 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
del markers[markerid]
def _iterencode(o, _current_indent_level):
- if isinstance(o, basestring):
+ if (isinstance(o, string_types) or
+ (_PY3 and isinstance(o, binary_type))):
yield _encoder(o)
elif o is None:
yield 'null'
@@ -510,45 +626,52 @@ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
yield 'true'
elif o is False:
yield 'false'
- elif isinstance(o, (int, long)):
- yield str(o)
+ elif isinstance(o, integer_types):
+ yield _encode_int(o)
elif isinstance(o, float):
yield _floatstr(o)
- elif isinstance(o, list):
- for chunk in _iterencode_list(o, _current_indent_level):
- yield chunk
- elif (_namedtuple_as_object and isinstance(o, tuple) and
- hasattr(o, '_asdict')):
- for chunk in _iterencode_dict(o._asdict(), _current_indent_level):
- yield chunk
- elif (_tuple_as_array and isinstance(o, tuple)):
- for chunk in _iterencode_list(o, _current_indent_level):
- yield chunk
- elif isinstance(o, dict):
- for chunk in _iterencode_dict(o, _current_indent_level):
- yield chunk
- elif _use_decimal and isinstance(o, Decimal):
- yield str(o)
else:
- while _iterable_as_array:
- # Markers are not checked here because it is valid for an
- # iterable to return self.
- try:
- o = iter(o)
- except TypeError:
- break
+ for_json = _for_json and getattr(o, 'for_json', None)
+ if for_json and callable(for_json):
+ for chunk in _iterencode(for_json(), _current_indent_level):
+ yield chunk
+ elif isinstance(o, list):
for chunk in _iterencode_list(o, _current_indent_level):
yield chunk
- return
- if markers is not None:
- markerid = id(o)
- if markerid in markers:
- raise ValueError("Circular reference detected")
- markers[markerid] = o
- o = _default(o)
- for chunk in _iterencode(o, _current_indent_level):
- yield chunk
- if markers is not None:
- del markers[markerid]
+ else:
+ _asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
+ if _asdict and callable(_asdict):
+ for chunk in _iterencode_dict(_asdict(),
+ _current_indent_level):
+ yield chunk
+ elif (_tuple_as_array and isinstance(o, tuple)):
+ for chunk in _iterencode_list(o, _current_indent_level):
+ yield chunk
+ elif isinstance(o, dict):
+ for chunk in _iterencode_dict(o, _current_indent_level):
+ yield chunk
+ elif _use_decimal and isinstance(o, Decimal):
+ yield str(o)
+ else:
+ while _iterable_as_array:
+ # Markers are not checked here because it is valid for
+ # an iterable to return self.
+ try:
+ o = iter(o)
+ except TypeError:
+ break
+ for chunk in _iterencode_list(o, _current_indent_level):
+ yield chunk
+ return
+ if markers is not None:
+ markerid = id(o)
+ if markerid in markers:
+ raise ValueError("Circular reference detected")
+ markers[markerid] = o
+ o = _default(o)
+ for chunk in _iterencode(o, _current_indent_level):
+ yield chunk
+ if markers is not None:
+ del markers[markerid]
return _iterencode