summaryrefslogtreecommitdiff
path: root/rts/StgStdThunks.cmm
diff options
context:
space:
mode:
Diffstat (limited to 'rts/StgStdThunks.cmm')
-rw-r--r--rts/StgStdThunks.cmm100
1 files changed, 100 insertions, 0 deletions
diff --git a/rts/StgStdThunks.cmm b/rts/StgStdThunks.cmm
index 3c528f662f..c4f5d25881 100644
--- a/rts/StgStdThunks.cmm
+++ b/rts/StgStdThunks.cmm
@@ -13,6 +13,9 @@
#include "Cmm.h"
#include "Updates.h"
+import ghczmprim_GHCziCString_unpackCStringzh_info;
+import ghczmprim_GHCziCString_unpackCStringUtf8zh_info;
+
/* -----------------------------------------------------------------------------
The code for a thunk that simply extracts a field from a
single-constructor datatype depends only on the offset of the field
@@ -286,3 +289,100 @@ INFO_TABLE(stg_ap_7_upd,7,0,THUNK,"stg_ap_7_upd_info","stg_ap_7_upd_info")
StgThunk_payload(node,6));
}
}
+
+/* -----------------------------------------------------------------------------
+ Making strings
+ -------------------------------------------------------------------------- */
+
+/*
+ * Note [unpack_cstring closures]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * Strings are extremely common. In Core they will typically manifest as the
+ * a pair of top-level bindings:
+ *
+ * s :: String
+ * s = unpackCString# s#
+ *
+ * s# :: Addr#
+ * s# = "hello world"#
+ *
+ * It turns out that `s` is a non-trivial amount of code which is duplicated
+ * for every `String` literal. To avoid this duplicate, we have a standard
+ * string-unpacking closure, unpack_cstring. Note that currently we only do
+ * this for ASCII strings; strings mentioning non-ASCII characters are
+ * represented by CAF applications of unpackCStringUtf8# as before.
+ *
+ * unpack_cstring closures are similar to standard THUNK_STATIC closures but
+ * with a non-GC pointer to a C-string at the end (the "extra" pointer).
+ * We must place this extra pointer at the end of the closure to ensure that
+ * it has a similar layout to a normal THUNK_STATIC closure, which has no space
+ * for free variables (since these would be contained in the thunk's code and SRT).
+ *
+ * When it is evaluated, an stg_unpack_cstring closure is updated to be an
+ * indirection to the resulting [Char], just as a normal unpackCString# thunk
+ * would be.
+ *
+ * Closure layout:
+ *
+ * ┌───────────────────┐ ┌──► ┌──────────────────────────┐
+ * │ stg_unpack_cstring│ │ │ "hello world ..." │
+ * ├───────────────────┤ │ └──────────────────────────┘
+ * │ indirectee │ │
+ * ├───────────────────┤ │
+ * │ static_link │ │
+ * ├───────────────────┤ │
+ * │ saved_info │ │
+ * ├───────────────────┤ │
+ * │ the_string ─┼───────┘
+ * └───────────────────┘
+ *
+ */
+
+stg_do_unpack_cstring(P_ node, P_ newCAF_ret) {
+ STK_CHK_PP(WDS(SIZEOF_StgUpdateFrame), stg_do_unpack_cstring, node, newCAF_ret);
+ W_ str;
+ str = StgThunk_payload(node, 2);
+ push (UPDATE_FRAME_FIELDS(,,stg_bh_upd_frame_info, CCCS, 0, newCAF_ret)) {
+ jump %ENTRY_CODE(ghczmprim_GHCziCString_unpackCStringzh_info)(node, str);
+ }
+}
+
+INFO_TABLE(stg_unpack_cstring, 0, 0, THUNK_STATIC, "stg_unpack_cstring", "stg_unpack_cstring")
+ (P_ node)
+{
+ W_ newCAF_ret;
+ (newCAF_ret) = ccall newCAF(BaseReg "ptr", node "ptr");
+
+ if (newCAF_ret == 0) {
+ // We raced with another thread to evaluate the CAF and they won;
+ // `node` should now be an indirection.
+ ENTER(node);
+ } else {
+ jump stg_do_unpack_cstring(node, newCAF_ret);
+ }
+}
+
+stg_do_unpack_cstring_utf8(P_ node, P_ newCAF_ret) {
+ STK_CHK_PP(WDS(SIZEOF_StgUpdateFrame), stg_do_unpack_cstring_utf8, node, newCAF_ret);
+ W_ str;
+ str = StgThunk_payload(node, 2);
+ push (UPDATE_FRAME_FIELDS(,,stg_bh_upd_frame_info, CCCS, 0, newCAF_ret)) {
+ jump %ENTRY_CODE(ghczmprim_GHCziCString_unpackCStringUtf8zh_info)(node, str);
+ }
+}
+
+INFO_TABLE(stg_unpack_cstring_utf8, 0, 0, THUNK_STATIC, "stg_unpack_cstring_utf8", "stg_unpack_cstring_utf8")
+ (P_ node)
+{
+ W_ newCAF_ret;
+ (newCAF_ret) = ccall newCAF(BaseReg "ptr", node "ptr");
+
+ if (newCAF_ret == 0) {
+ // We raced with another thread to evaluate the CAF and they won;
+ // `node` should now be an indirection.
+ ENTER(node);
+ } else {
+ jump stg_do_unpack_cstring_utf8(node, newCAF_ret);
+ }
+}
+