Clean up size calculation for binary_to_term/1

This commit cleans up the size calculation phase of binary_to_term/1: * Count sub terms in an **unsigned** 32-bit integer instead of in an **signed** 32-bit integer to ensure that the counter wraps on overflow. While the existing code worked on all platform we have tested it on, signed overflow is is undefined behavior in the C standard. * Be paranoid and use the overflow-checking ADDTERM() macro for **all** additions to the `term` variable. * Reject lists and large tuples early if the rest of the binary being decoded is obviously too short. (The same early rejection was already done for maps.) * Explicitly reject decoding maps with 2^31 or more elements on 64-bit platforms (on 32-bit platforms there is already code to reject maps with 2^30 or more elements). Decoding maps with that many elements has never worked correctly, but would fail in random ways (because the `term` variable would overflow and miscount the sub terms). We considered counting sub terms in a 64-bit counter on 64-bit machines to allow truly ginormous terms to be decoded, but decided that such terms are probably a mistake and that it is better to reject them.
author: Björn Gustavsson <bjorn@erlang.org> 2021-09-09 09:28:57 +0200
committer: Björn Gustavsson <bjorn@erlang.org> 2022-03-01 08:12:45 +0100
commit: ebcbb97b4ec223464cac3d94375739a248ddef6e (patch)
tree: 8d280f74d4d659fc325f2f91ecf82b41e7339370 /erts/emulator
parent: d469db4ddf72479a42b60f5337504fb53b65079e (diff)
download: erlang-ebcbb97b4ec223464cac3d94375739a248ddef6e.tar.gz
2 files changed, 46 insertions, 21 deletions
diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c
index 48c5c20bdd..8f68e155e7 100644
--- a/erts/emulator/beam/external.c
+++ b/erts/emulator/beam/external.c
@@ -5373,11 +5373,21 @@ static Sint
 decoded_size(const byte *ep, const byte* endp, int internal_tags, B2TContext* ctx)
 {
     Sint heap_size;
-    int terms;
     int atom_extra_skip;
     Uint n;
     SWord reds;
 
+    /* Keep track of the current number of sub terms remaining to be decoded.
+     *
+     * We limit the number of sub terms to 2^32-1, even on 64-bit
+     * machines, because a term that has many sub-terms must be truly
+     * ginormous and is proably a mistake.
+     *
+     * This means that a map with 2^31 or more elements cannot be decoded,
+     * even on a 64-bit machine.
+     */
+    Uint32 terms;
+
     if (ctx) {
         reds = ctx->reds;
         if (ctx->u.sc.ep) {
@@ -5416,9 +5426,11 @@ init_done:
 	 if ((sz) > endp-ep) { goto error; }	\
     } while (0)
 
+/* Increment the number of terms that remain to decode
+ * and check for the term counter wrapping around. */
 #define ADDTERMS(n)				\
     do {					\
-        int before = terms;		        \
+        Uint32 before = terms;		        \
 	terms += (n);                           \
 	if (terms < before) goto error;     	\
     } while (0)
@@ -5503,7 +5515,7 @@ init_done:
 	case_PID:
 	    /* In case it is an external pid */
 	    heap_size += EXTERNAL_PID_HEAP_SIZE;
-	    terms++;
+            ADDTERMS(1);
 	    break;
         case V4_PORT_EXT:
 	    atom_extra_skip = 12;
@@ -5516,7 +5528,7 @@ init_done:
 	case_PORT:
 	    /* In case it is an external port */
 	    heap_size += EXTERNAL_PORT_HEAP_SIZE;
-	    terms++;
+            ADDTERMS(1);
 	    break;
 	case NEWER_REFERENCE_EXT:
 	    atom_extra_skip = 4;
@@ -5541,7 +5553,7 @@ init_done:
 #else
 		heap_size += EXTERNAL_THING_HEAD_SIZE + id_words;
 #endif
-		terms++;
+                ADDTERMS(1);
 		break;
 	    }
 	case REFERENCE_EXT:
@@ -5556,20 +5568,23 @@ init_done:
 	    CHKSIZE(4);
 	    n = get_uint32(ep);
 	    ep += 4;
-	    ADDTERMS(n);
-	    terms++;
+            CHKSIZE(n); /* Fail faster if the binary is too short. */
+            /* Count terms in two operations to avoid overflow. */
+            ADDTERMS(n);
+            ADDTERMS(1);
 	    heap_size += 2 * n;
 	    break;
 	case SMALL_TUPLE_EXT:
 	    CHKSIZE(1);
 	    n = *ep++;
-	    terms += n;
+            ADDTERMS(n);
 	    heap_size += n + 1;
 	    break;
 	case LARGE_TUPLE_EXT:
 	    CHKSIZE(4);
 	    n = get_uint32(ep);
 	    ep += 4;
+            CHKSIZE(n); /* Fail faster if the binary is too short. */
 	    ADDTERMS(n);
 	    heap_size += n + 1;
 	    break;
@@ -5577,19 +5592,25 @@ init_done:
 	    CHKSIZE(4);
 	    n = get_uint32(ep);
 	    ep += 4;
-	    ADDTERMS(2*n);
             if (n <= MAP_SMALL_MAP_LIMIT) {
                 heap_size += 3 + n + 1 + n;
-            } else {
-#if !defined(ARCH_64)
-                if ((n >> 30) != 0) {
-                    /* Can't possibly fit in memory. */
-                    goto error;
-                }
+#if defined(ARCH_64)
+            } else if ((n >> 31) != 0) {
+                /* Avoid overflow by limiting the number of elements in
+                 * a map to 2^31-1 (about 2 billions). */
+                goto error;
+#else
+            } else if ((n >> 30) != 0) {
+                /* Can't possibly fit in memory on 32-bit machine. */
+                goto error;
 #endif
-                CHKSIZE(2*n);   /* Conservative size check */
+            } else {
+                CHKSIZE(2*(Uint)n); /* Fail faster if the binary is too short. */
                 heap_size += HASHMAP_ESTIMATED_HEAP_SIZE(n);
             }
+            /* Count terms in two operations to avoid overflow. */
+            ADDTERMS(n);
+            ADDTERMS(n);
 	    break;
 	case STRING_EXT:
 	    CHKSIZE(2);
@@ -5628,7 +5649,7 @@ init_done:
 	    }
 	    break;
 	case EXPORT_EXT:
-	    terms += 3;
+	    ADDTERMS(3);
 	    heap_size += ERL_FUN_SIZE;
 	    break;
 	case NEW_FUN_EXT:
@@ -5646,7 +5667,7 @@ init_done:
 		if (num_free > MAX_ARG) {
 		    goto error;
 		}
-		terms += 4 + num_free;
+		ADDTERMS(4 + num_free);
 		heap_size += ERL_FUN_SIZE + num_free;
 		break;
 	    }
@@ -5684,7 +5705,7 @@ init_done:
 	}
         terms--;
 
-        if (ctx && --reds <= 0 && terms > 0) {
+        if (ctx && --reds <= 0 && terms != 0) {
             ctx->u.sc.heap_size = heap_size;
             ctx->u.sc.terms = terms;
             ctx->u.sc.ep = ep;
@@ -5692,9 +5713,8 @@ init_done:
             ctx->reds = 0;
             return 0;
         }
-    }while (terms > 0);
+    } while (terms != 0);
 
-    /* 'terms' may be non-zero if it has wrapped around */
     if (terms == 0) {
         if (ctx) {
             ctx->state = B2TDecodeInit;
diff --git a/erts/emulator/test/binary_SUITE.erl b/erts/emulator/test/binary_SUITE.erl
index 58d4f83a2c..c23b937fc3 100644
--- a/erts/emulator/test/binary_SUITE.erl
+++ b/erts/emulator/test/binary_SUITE.erl
@@ -1545,11 +1545,14 @@ test_terms(Test_Func) ->
     Test_Func("abcdef"),
     Test_Func([a, b, 1, 2]),
     Test_Func([a|b]),
+    Test_Func([make_port(), make_ref(), make_pid(), fun() -> ok end,
+               Very_Big | lists:seq(1, 75)]),
 
     Test_Func({}),
     Test_Func({1}),
     Test_Func({a, b}),
     Test_Func({a, b, c}),
+    Test_Func({make_port(), make_ref(), make_pid(), fun() -> ok end}),
     Test_Func(list_to_tuple(lists:seq(0, 255))),
     Test_Func(list_to_tuple(lists:seq(0, 256))),
 
@@ -1600,9 +1603,11 @@ test_terms(Test_Func) ->
     Test_Func(<<42:10>>),
     Test_Func(list_to_bitstring([<<5:6>>|lists:seq(0, 255)])),
 
+    %% Funs in a list.
     Test_Func(F = fun(A) -> 42*A end),
     Test_Func(lists:duplicate(32, F)),
 
+    %% External funs in a list.
     Test_Func(FF = fun binary_SUITE:all/0),
     Test_Func(lists:duplicate(32, FF)),
author	Björn Gustavsson <bjorn@erlang.org>	2021-09-09 09:28:57 +0200
committer	Björn Gustavsson <bjorn@erlang.org>	2022-03-01 08:12:45 +0100
commit	ebcbb97b4ec223464cac3d94375739a248ddef6e (patch)
tree	8d280f74d4d659fc325f2f91ecf82b41e7339370 /erts/emulator
parent	d469db4ddf72479a42b60f5337504fb53b65079e (diff)
download	erlang-ebcbb97b4ec223464cac3d94375739a248ddef6e.tar.gz