summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2009-12-08 19:46:35 +0100
committerMike Pall <mike>2009-12-08 19:46:35 +0100
commit55b16959717084884fd4a0cbae6d19e3786c20c7 (patch)
treec8a07a43c13679751ed25a9d06796e9e7b2134a6
downloadluajit2-55b16959717084884fd4a0cbae6d19e3786c20c7.tar.gz
RELEASE LuaJIT-2.0.0-beta1v2.0.0-beta1
-rw-r--r--.gitignore11
-rw-r--r--Makefile84
-rw-r--r--README16
-rw-r--r--doc/api.html203
-rw-r--r--doc/bluequad-print.css166
-rw-r--r--doc/bluequad.css303
-rw-r--r--doc/changes.html281
-rw-r--r--doc/contact.html84
-rw-r--r--doc/faq.html141
-rw-r--r--doc/img/contact.pngbin0 -> 1340 bytes
-rw-r--r--doc/install.html216
-rw-r--r--doc/luajit.html120
-rw-r--r--doc/running.html233
-rw-r--r--doc/status.html235
-rw-r--r--dynasm/dasm_proto.h69
-rw-r--r--dynasm/dasm_x86.h467
-rw-r--r--dynasm/dasm_x86.lua1799
-rw-r--r--dynasm/dynasm.lua1070
-rw-r--r--etc/strict.lua41
-rw-r--r--lib/.gitignore1
-rw-r--r--lib/bc.lua182
-rw-r--r--lib/dis_x64.lua19
-rw-r--r--lib/dis_x86.lua824
-rw-r--r--lib/dump.lua567
-rw-r--r--lib/v.lua156
-rw-r--r--src/.gitignore8
-rw-r--r--src/Makefile326
-rw-r--r--src/Makefile.dep139
-rw-r--r--src/buildvm.c438
-rw-r--r--src/buildvm.h106
-rw-r--r--src/buildvm_asm.c220
-rw-r--r--src/buildvm_fold.c206
-rw-r--r--src/buildvm_lib.c365
-rw-r--r--src/buildvm_peobj.c303
-rw-r--r--src/buildvm_x86.dasc3592
-rw-r--r--src/lauxlib.h159
-rw-r--r--src/lib_aux.c438
-rw-r--r--src/lib_base.c560
-rw-r--r--src/lib_bit.c74
-rw-r--r--src/lib_debug.c366
-rw-r--r--src/lib_init.c37
-rw-r--r--src/lib_io.c538
-rw-r--r--src/lib_jit.c589
-rw-r--r--src/lib_math.c188
-rw-r--r--src/lib_os.c249
-rw-r--r--src/lib_package.c508
-rw-r--r--src/lib_string.c790
-rw-r--r--src/lib_table.c276
-rw-r--r--src/lj.supp6
-rw-r--r--src/lj_alloc.c1232
-rw-r--r--src/lj_alloc.h17
-rw-r--r--src/lj_api.c1046
-rw-r--r--src/lj_arch.h88
-rw-r--r--src/lj_asm.c3324
-rw-r--r--src/lj_asm.h17
-rw-r--r--src/lj_bc.c17
-rw-r--r--src/lj_bc.h235
-rw-r--r--src/lj_ctype.c44
-rw-r--r--src/lj_ctype.h40
-rw-r--r--src/lj_def.h226
-rw-r--r--src/lj_dispatch.c284
-rw-r--r--src/lj_dispatch.h64
-rw-r--r--src/lj_err.c763
-rw-r--r--src/lj_err.h40
-rw-r--r--src/lj_errmsg.h134
-rw-r--r--src/lj_ff.h18
-rw-r--r--src/lj_frame.h84
-rw-r--r--src/lj_func.c185
-rw-r--r--src/lj_func.h25
-rw-r--r--src/lj_gc.c800
-rw-r--r--src/lj_gc.h102
-rw-r--r--src/lj_gdbjit.c739
-rw-r--r--src/lj_gdbjit.h22
-rw-r--r--src/lj_ir.c461
-rw-r--r--src/lj_ir.h429
-rw-r--r--src/lj_iropt.h128
-rw-r--r--src/lj_jit.h279
-rw-r--r--src/lj_lex.c393
-rw-r--r--src/lj_lex.h63
-rw-r--r--src/lj_lib.c216
-rw-r--r--src/lj_lib.h84
-rw-r--r--src/lj_mcode.c260
-rw-r--r--src/lj_mcode.h23
-rw-r--r--src/lj_meta.c358
-rw-r--r--src/lj_meta.h33
-rw-r--r--src/lj_obj.c41
-rw-r--r--src/lj_obj.h676
-rw-r--r--src/lj_opt_dce.c79
-rw-r--r--src/lj_opt_fold.c1415
-rw-r--r--src/lj_opt_loop.c358
-rw-r--r--src/lj_opt_mem.c550
-rw-r--r--src/lj_opt_narrow.c430
-rw-r--r--src/lj_parse.c2198
-rw-r--r--src/lj_parse.h15
-rw-r--r--src/lj_record.c2136
-rw-r--r--src/lj_record.h17
-rw-r--r--src/lj_snap.c286
-rw-r--r--src/lj_snap.h19
-rw-r--r--src/lj_state.c255
-rw-r--r--src/lj_state.h31
-rw-r--r--src/lj_str.c301
-rw-r--r--src/lj_str.h45
-rw-r--r--src/lj_tab.c618
-rw-r--r--src/lj_tab.h41
-rw-r--r--src/lj_target.h132
-rw-r--r--src/lj_target_x86.h257
-rw-r--r--src/lj_trace.c591
-rw-r--r--src/lj_trace.h52
-rw-r--r--src/lj_traceerr.h59
-rw-r--r--src/lj_udata.c33
-rw-r--r--src/lj_udata.h14
-rw-r--r--src/lj_vm.h66
-rw-r--r--src/lj_vmevent.c56
-rw-r--r--src/lj_vmevent.h49
-rw-r--r--src/ljamalg.c70
-rw-r--r--src/lua.h388
-rw-r--r--src/lua.hpp9
-rw-r--r--src/luaconf.h133
-rw-r--r--src/luajit.c519
-rw-r--r--src/luajit.h68
-rw-r--r--src/lualib.h41
-rw-r--r--src/msvcbuild.bat53
122 files changed, 42143 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..1a07bf75
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,11 @@
+*.[oa]
+*.so
+*.obj
+*.lib
+*.exp
+*.dll
+*.exe
+*.manifest
+*.dmp
+*.swp
+.tags
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..67347041
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,84 @@
+##############################################################################
+# LuaJIT top level Makefile for installation. Requires GNU Make.
+#
+# Suitable for POSIX platforms (Linux, *BSD, OSX etc.).
+# Note: src/Makefile has many more configurable options.
+#
+# ##### This Makefile is NOT useful for installation on Windows! #####
+# For MSVC, please follow the instructions given in src/msvcbuild.bat.
+# For MinGW and Cygwin, cd to src and run make with the Makefile there.
+# NYI: add wininstall.bat
+#
+# Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+##############################################################################
+
+BASEVER= 2.0.0
+VERSION= 2.0.0-beta1
+
+##############################################################################
+#
+# Change the installation path as needed and modify src/luaconf.h accordingly.
+# Note: PREFIX must be an absolute path!
+#
+PREFIX= /usr/local
+##############################################################################
+
+INSTALL_BIN= $(PREFIX)/bin
+INSTALL_NAME= luajit-$(VERSION)
+INSTALL_T= $(INSTALL_BIN)/$(INSTALL_NAME)
+INSTALL_TSYM= $(INSTALL_BIN)/luajit
+INSTALL_INC= $(PREFIX)/include/luajit-$(BASEVER)
+INSTALL_JITLIB= $(PREFIX)/share/luajit-$(VERSION)/jit
+
+MKDIR= mkdir -p
+SYMLINK= ln -f -s
+INSTALL_X= install -m 0755
+INSTALL_F= install -m 0644
+
+FILES_T= luajit
+FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
+FILES_JITLIB= bc.lua v.lua dump.lua dis_x86.lua dis_x64.lua vmdef.lua
+
+##############################################################################
+
+INSTALL_DEP= src/luajit
+
+all $(INSTALL_DEP):
+ @echo "==== Building LuaJIT $(VERSION) ===="
+ $(MAKE) -C src
+ @echo "==== Successfully built LuaJIT $(VERSION) ===="
+
+install: $(INSTALL_DEP)
+ @echo "==== Installing LuaJIT $(VERSION) to $(PREFIX) ===="
+ $(MKDIR) $(INSTALL_BIN) $(INSTALL_INC) $(INSTALL_JITLIB)
+ cd src && $(INSTALL_X) $(FILES_T) $(INSTALL_T)
+ cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
+ cd lib && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
+ @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
+ @echo ""
+ @echo "Note: the beta releases deliberately do NOT install a symlink for luajit"
+ @echo "You can do this now by running this command (with sudo):"
+ @echo ""
+ @echo " $(SYMLINK) $(INSTALL_NAME) $(INSTALL_TSYM)"
+ @echo ""
+
+##############################################################################
+
+amalg:
+ @echo "Building LuaJIT $(VERSION)"
+ $(MAKE) -C src amalg
+
+clean:
+ $(MAKE) -C src clean
+
+cleaner:
+ $(MAKE) -C src cleaner
+
+distclean:
+ $(MAKE) -C src distclean
+
+SUB_TARGETS= amalg clean cleaner distclean
+
+.PHONY: all install $(SUB_TARGETS)
+
+##############################################################################
diff --git a/README b/README
new file mode 100644
index 00000000..43caf78e
--- /dev/null
+++ b/README
@@ -0,0 +1,16 @@
+README for LuaJIT 2.0.0-beta1
+-----------------------------
+
+LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
+
+Project Homepage: http://luajit.org/
+
+LuaJIT is Copyright (C) 2005-2009 Mike Pall.
+LuaJIT is free software, released under the MIT/X license.
+See full Copyright Notice in src/luajit.h
+
+Documentation for LuaJIT is available in HTML format.
+Please point your favorite browser to:
+
+ doc/luajit.html
+
diff --git a/doc/api.html b/doc/api.html
new file mode 100644
index 00000000..79788d95
--- /dev/null
+++ b/doc/api.html
@@ -0,0 +1,203 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>API Extensions</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>API Extensions</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li><li>
+<a class="current" href="api.html">API Extensions</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+LuaJIT is fully upwards-compatible with Lua 5.1. It supports all
+<a href="http://www.lua.org/manual/5.1/manual.html#5"><span class="ext">&raquo;</span>&nbsp;standard Lua
+library functions</a> and the full set of
+<a href="http://www.lua.org/manual/5.1/manual.html#3"><span class="ext">&raquo;</span>&nbsp;Lua/C API
+functions</a>.
+</p>
+<p>
+LuaJIT is also fully ABI-compatible to Lua 5.1 at the linker/dynamic
+loader level. This means you can compile a C&nbsp;module against the
+standard Lua headers and load the same shared library from either Lua
+or LuaJIT.
+</p>
+
+<h2 id="bit"><tt>bit.*</tt> &mdash; Bitwise Operations</h2>
+<p>
+LuaJIT supports all bitwise operations as defined by
+<a href="http://bitop.luajit.org"><span class="ext">&raquo;</span>&nbsp;Lua BitOp</a>:
+</p>
+<pre class="code">
+bit.tobit bit.tohex bit.bnot bit.band bit.bor bit.bxor
+bit.lshift bit.rshift bit.arshift bit.rol bit.ror bit.bswap
+</pre>
+<p>
+This module is a LuaJIT built-in &mdash; you don't need to download or
+install Lua BitOp. The Lua BitOp site has full documentation for all
+<a href="http://bitop.luajit.org/api.html"><span class="ext">&raquo;</span>&nbsp;Lua BitOp API functions</a>.
+</p>
+<p>
+Please make sure to <tt>require</tt> the module before using any of
+its functions:
+</p>
+<pre class="code">
+local bit = require("bit")
+</pre>
+<p>
+An already installed Lua BitOp module is ignored by LuaJIT.
+This way you can use bit operations from both Lua and LuaJIT on a
+shared installation.
+</p>
+
+<h2 id="jit"><tt>jit.*</tt> &mdash; JIT compiler control</h2>
+<p>
+The functions in this built-in module control the behavior
+of the JIT compiler engine.
+</p>
+
+<h3 id="jit_onoff"><tt>jit.on()<br>
+jit.off()</tt></h3>
+<p>
+Turns the whole JIT compiler on (default) or off.
+</p>
+<p>
+These functions are typically used with the command line options
+<tt>-j on</tt> or <tt>-j off</tt>.
+</p>
+
+<h3 id="jit_flush"><tt>jit.flush()</tt></h3>
+<p>
+Flushes the whole cache of compiled code.
+</p>
+
+<h3 id="jit_flush_tr"><tt>jit.flush(tr)</tt></h3>
+<p>
+Flushes the code for the specified root trace and all of its
+side traces from the cache.
+</p>
+
+<h3 id="jit_onoff_func"><tt>jit.on(func|true [,true|false])<br>
+jit.off(func|true [,true|false])<br>
+jit.flush(func|true [,true|false])</tt></h3>
+<p>
+<tt>jit.on</tt> enables JIT compilation for a Lua function (this is
+the default).
+</p>
+<p>
+<tt>jit.off</tt> disables JIT compilation for a Lua function and
+flushes any already compiled code from the code cache.
+</p>
+<p>
+<tt>jit.flush</tt> flushes the code, but doesn't affect the
+enable/disable status.
+</p>
+<p>
+The current function, i.e. the Lua function calling this library
+function, can also be specified by passing <tt>true</tt> as the first
+argument.
+</p>
+<p>
+If the second argument is <tt>true</tt>, JIT compilation is also
+enabled, disabled or flushed recursively for all subfunctions of a
+function. With <tt>false</tt> only the subfunctions are affected.
+</p>
+<p>
+The <tt>jit.on</tt> and <tt>jit.off</tt> functions only set a flag
+which is checked when the function is about to be compiled. They do
+not trigger immediate compilation.
+</p>
+<p>
+Typical usage is <tt>jit.off(true, true)</tt> in the main chunk
+of a module to turn off JIT compilation for the whole module for
+debugging purposes.
+</p>
+
+<h3 id="jit_version"><tt>jit.version</tt></h3>
+<p>
+Contains the LuaJIT version string.
+</p>
+
+<h3 id="jit_version_num"><tt>jit.version_num</tt></h3>
+<p>
+Contains the version number of the LuaJIT core. Version xx.yy.zz
+is represented by the decimal number xxyyzz.
+</p>
+
+<h3 id="jit_arch"><tt>jit.arch</tt></h3>
+<p>
+Contains the target architecture name (CPU and optional ABI).
+</p>
+
+<h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2>
+<p>
+This module provides the backend for the <tt>-O</tt> command line
+option.
+</p>
+<p>
+You can also use it programmatically, e.g.:
+</p>
+<pre class="code">
+jit.opt.start(2) -- same as -O2
+jit.opt.start("-dce")
+jit.opt.start("hotloop=10", "hotexit=2")
+</pre>
+<p>
+Unlike in LuaJIT 1.x, the module is built-in and
+<b>optimization is turned on by default!</b>
+It's no longer necessary to run <tt>require("jit.opt").start()</tt>,
+which was one of the ways to enable optimization.
+</p>
+
+<h2 id="jit_util"><tt>jit.util.*</tt> &mdash; JIT compiler introspection</h2>
+<p>
+This module holds functions to introspect the bytecode, generated
+traces, the IR and the generated machine code. The functionality
+provided by this module is still in flux and therefore undocumented.
+</p>
+<p>
+The debug modules <tt>-jbc</tt>, <tt>-jv</tt> and <tt>-jdump</tt> make
+extensive use of these functions. Please check out their source code,
+if you want to know more.
+</p>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2009 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>
diff --git a/doc/bluequad-print.css b/doc/bluequad-print.css
new file mode 100644
index 00000000..00a6b154
--- /dev/null
+++ b/doc/bluequad-print.css
@@ -0,0 +1,166 @@
+/* Copyright (C) 2004-2009 Mike Pall.
+ *
+ * You are welcome to use the general ideas of this design for your own sites.
+ * But please do not steal the stylesheet, the layout or the color scheme.
+ */
+body {
+ font-family: serif;
+ font-size: 11pt;
+ margin: 0 3em;
+ padding: 0;
+ border: none;
+}
+a:link, a:visited, a:hover, a:active {
+ text-decoration: none;
+ background: transparent;
+ color: #0000ff;
+}
+h1, h2, h3 {
+ font-family: sans-serif;
+ font-weight: bold;
+ text-align: left;
+ margin: 0.5em 0;
+ padding: 0;
+}
+h1 {
+ font-size: 200%;
+}
+h2 {
+ font-size: 150%;
+}
+h3 {
+ font-size: 125%;
+}
+p {
+ margin: 0 0 0.5em 0;
+ padding: 0;
+}
+ul, ol {
+ margin: 0.5em 0;
+ padding: 0 0 0 2em;
+}
+ul {
+ list-style: outside square;
+}
+ol {
+ list-style: outside decimal;
+}
+li {
+ margin: 0;
+ padding: 0;
+}
+dl {
+ margin: 1em 0;
+ padding: 1em;
+ border: 1px solid black;
+}
+dt {
+ font-weight: bold;
+ margin: 0;
+ padding: 0;
+}
+dt sup {
+ float: right;
+ margin-left: 1em;
+}
+dd {
+ margin: 0.5em 0 0 2em;
+ padding: 0;
+}
+table {
+ table-layout: fixed;
+ width: 100%;
+ margin: 1em 0;
+ padding: 0;
+ border: 1px solid black;
+ border-spacing: 0;
+ border-collapse: collapse;
+}
+tr {
+ margin: 0;
+ padding: 0;
+ border: none;
+}
+td {
+ text-align: left;
+ margin: 0;
+ padding: 0.2em 0.5em;
+ border-top: 1px solid black;
+ border-bottom: 1px solid black;
+}
+tr.separate td {
+ border-top: double;
+}
+tt, pre, code, kbd, samp {
+ font-family: monospace;
+ font-size: 75%;
+}
+kbd {
+ font-weight: bolder;
+}
+blockquote, pre {
+ margin: 1em 2em;
+ padding: 0;
+}
+img {
+ border: none;
+ vertical-align: baseline;
+ margin: 0;
+ padding: 0;
+}
+img.left {
+ float: left;
+ margin: 0.5em 1em 0.5em 0;
+}
+img.right {
+ float: right;
+ margin: 0.5em 0 0.5em 1em;
+}
+.flush {
+ clear: both;
+ visibility: hidden;
+}
+.hide, .noprint, #nav {
+ display: none !important;
+}
+.pagebreak {
+ page-break-before: always;
+}
+#site {
+ text-align: right;
+ font-family: sans-serif;
+ font-weight: bold;
+ margin: 0 1em;
+ border-bottom: 1pt solid black;
+}
+#site a {
+ font-size: 1.2em;
+}
+#site a:link, #site a:visited {
+ text-decoration: none;
+ font-weight: bold;
+ background: transparent;
+ color: #ffffff;
+}
+#logo {
+ color: #ff8000;
+}
+#head {
+ clear: both;
+ margin: 0 1em;
+}
+#main {
+ line-height: 1.3;
+ text-align: justify;
+ margin: 1em;
+}
+#foot {
+ clear: both;
+ font-size: 80%;
+ text-align: center;
+ margin: 0 1.25em;
+ padding: 0.5em 0 0 0;
+ border-top: 1pt solid black;
+ page-break-before: avoid;
+ page-break-after: avoid;
+}
diff --git a/doc/bluequad.css b/doc/bluequad.css
new file mode 100644
index 00000000..7e52102f
--- /dev/null
+++ b/doc/bluequad.css
@@ -0,0 +1,303 @@
+/* Copyright (C) 2004-2009 Mike Pall.
+ *
+ * You are welcome to use the general ideas of this design for your own sites.
+ * But please do not steal the stylesheet, the layout or the color scheme.
+ */
+/* colorscheme:
+ *
+ * site | head #4162bf/white | #6078bf/#e6ecff
+ * ------+------ ----------------+-------------------
+ * nav | main #bfcfff | #e6ecff/black
+ *
+ * nav: hiback loback #c5d5ff #b9c9f9
+ * hiborder loborder #e6ecff #97a7d7
+ * link hover #2142bf #ff0000
+ *
+ * link: link visited hover #2142bf #8122bf #ff0000
+ *
+ * main: boxback boxborder #f0f4ff #bfcfff
+ */
+body {
+ font-family: Verdana, Arial, Helvetica, sans-serif;
+ font-size: 10pt;
+ margin: 0;
+ padding: 0;
+ border: none;
+ background: #e0e0e0;
+ color: #000000;
+}
+a:link {
+ text-decoration: none;
+ background: transparent;
+ color: #2142bf;
+}
+a:visited {
+ text-decoration: none;
+ background: transparent;
+ color: #8122bf;
+}
+a:hover, a:active {
+ text-decoration: underline;
+ background: transparent;
+ color: #ff0000;
+}
+h1, h2, h3 {
+ font-weight: bold;
+ text-align: left;
+ margin: 0.5em 0;
+ padding: 0;
+ background: transparent;
+}
+h1 {
+ font-size: 200%;
+ line-height: 3em; /* really 6em relative to body, match #site span */
+ margin: 0;
+}
+h2 {
+ font-size: 150%;
+ color: #606060;
+}
+h3 {
+ font-size: 125%;
+ color: #404040;
+}
+p {
+ max-width: 600px;
+ margin: 0 0 0.5em 0;
+ padding: 0;
+}
+b {
+ color: #404040;
+}
+ul, ol {
+ max-width: 600px;
+ margin: 0.5em 0;
+ padding: 0 0 0 2em;
+}
+ul {
+ list-style: outside square;
+}
+ol {
+ list-style: outside decimal;
+}
+li {
+ margin: 0;
+ padding: 0;
+}
+dl {
+ max-width: 600px;
+ margin: 1em 0;
+ padding: 1em;
+ border: 1px solid #bfcfff;
+ background: #f0f4ff;
+}
+dt {
+ font-weight: bold;
+ margin: 0;
+ padding: 0;
+}
+dt sup {
+ float: right;
+ margin-left: 1em;
+ color: #808080;
+}
+dt a:visited {
+ text-decoration: none;
+ color: #2142bf;
+}
+dt a:hover, dt a:active {
+ text-decoration: none;
+ color: #ff0000;
+}
+dd {
+ margin: 0.5em 0 0 2em;
+ padding: 0;
+}
+div.tablewrap { /* for IE *sigh* */
+ max-width: 600px;
+}
+table {
+ table-layout: fixed;
+ border-spacing: 0;
+ border-collapse: collapse;
+ max-width: 600px;
+ width: 100%;
+ margin: 1em 0;
+ padding: 0;
+ border: 1px solid #bfcfff;
+}
+tr {
+ margin: 0;
+ padding: 0;
+ border: none;
+}
+tr.odd {
+ background: #f0f4ff;
+}
+tr.separate td {
+ border-top: 1px solid #bfcfff;
+}
+td {
+ text-align: left;
+ margin: 0;
+ padding: 0.2em 0.5em;
+ border: none;
+}
+tt, code, kbd, samp {
+ font-family: Courier New, Courier, monospace;
+ line-height: 1.2;
+ font-size: 110%;
+}
+kbd {
+ font-weight: bolder;
+}
+blockquote, pre {
+ max-width: 600px;
+ margin: 1em 2em;
+ padding: 0;
+}
+pre {
+ line-height: 1.1;
+}
+pre.code {
+ line-height: 1.4;
+ margin: 0.5em 0 1em 0.5em;
+ padding: 0.5em 1em;
+ border: 1px solid #bfcfff;
+ background: #f0f4ff;
+}
+img {
+ border: none;
+ vertical-align: baseline;
+ margin: 0;
+ padding: 0;
+}
+img.left {
+ float: left;
+ margin: 0.5em 1em 0.5em 0;
+}
+img.right {
+ float: right;
+ margin: 0.5em 0 0.5em 1em;
+}
+.indent {
+ padding-left: 1em;
+}
+.flush {
+ clear: both;
+ visibility: hidden;
+}
+.hide, .noscreen {
+ display: none !important;
+}
+.ext {
+ color: #ff8000;
+}
+#site {
+ clear: both;
+ float: left;
+ width: 13em;
+ text-align: center;
+ font-weight: bold;
+ margin: 0;
+ padding: 0;
+ background: transparent;
+ color: #ffffff;
+}
+#site a {
+ font-size: 200%;
+}
+#site a:link, #site a:visited {
+ text-decoration: none;
+ font-weight: bold;
+ background: transparent;
+ color: #ffffff;
+}
+#site span {
+ line-height: 3em; /* really 6em relative to body, match h1 */
+}
+#logo {
+ color: #ffb380;
+}
+#head {
+ margin: 0;
+ padding: 0 0 0 2em;
+ border-left: solid 13em #4162bf;
+ border-right: solid 3em #6078bf;
+ background: #6078bf;
+ color: #e6ecff;
+}
+#nav {
+ clear: both;
+ float: left;
+ overflow: hidden;
+ text-align: left;
+ line-height: 1.5;
+ width: 13em;
+ padding-top: 1em;
+ background: transparent;
+}
+#nav ul {
+ list-style: none outside;
+ margin: 0;
+ padding: 0;
+}
+#nav li {
+ margin: 0;
+ padding: 0;
+}
+#nav a {
+ display: block;
+ text-decoration: none;
+ font-weight: bold;
+ margin: 0;
+ padding: 2px 1em;
+ border-top: 1px solid transparent;
+ border-bottom: 1px solid transparent;
+ background: transparent;
+ color: #2142bf;
+}
+#nav a:hover, #nav a:active {
+ text-decoration: none;
+ border-top: 1px solid #97a7d7;
+ border-bottom: 1px solid #e6ecff;
+ background: #b9c9f9;
+ color: #ff0000;
+}
+#nav a.current, #nav a.current:hover, #nav a.current:active {
+ border-top: 1px solid #e6ecff;
+ border-bottom: 1px solid #97a7d7;
+ background: #c5d5ff;
+ color: #2142bf;
+}
+#nav ul ul a {
+ padding: 0 1em 0 2em;
+}
+#main {
+ line-height: 1.5;
+ text-align: left;
+ margin: 0;
+ padding: 1em 2em;
+ border-left: solid 13em #bfcfff;
+ border-right: solid 3em #e6ecff;
+ background: #e6ecff;
+}
+#foot {
+ clear: both;
+ font-size: 80%;
+ text-align: center;
+ margin: 0;
+ padding: 0.5em;
+ background: #6078bf;
+ color: #ffffff;
+}
+#foot a:link, #foot a:visited {
+ text-decoration: underline;
+ background: transparent;
+ color: #ffffff;
+}
+#foot a:hover, #foot a:active {
+ text-decoration: underline;
+ background: transparent;
+ color: #bfcfff;
+}
diff --git a/doc/changes.html b/doc/changes.html
new file mode 100644
index 00000000..6c34b8be
--- /dev/null
+++ b/doc/changes.html
@@ -0,0 +1,281 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>LuaJIT Change History</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+<style type="text/css">
+div.major { max-width: 600px; padding: 1em; margin: 1em 0 1em 0; }
+</style>
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>LuaJIT Change History</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li><li>
+<a href="api.html">API Extensions</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a class="current" href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+This is a list of changes between the released versions of LuaJIT.<br>
+The current <span style="color: #c00000;">development version</span> is <strong>LuaJIT&nbsp;2.0.0-beta1</strong>.<br>
+The current <span style="color: #0000c0;">stable version</span> is <strong>LuaJIT&nbsp;1.1.5</strong>.
+</p>
+<p>
+Please check the
+<a href="http://luajit.org/luajit_changes.html"><span class="ext">&raquo;</span>&nbsp;Online Change History</a>
+to see whether newer versions are available.
+</p>
+
+<div class="major" style="background: #ffd0d0;">
+<h2 id="LuaJIT-2.0.0-beta1">LuaJIT 2.0.0-beta1 &mdash; 2009-10-31</h2>
+<ul>
+<li>This is the first public release of LuaJIT 2.0.</li>
+<li>The whole VM has been rewritten from the ground up, so there's
+no point in listing differences over earlier versions.</li>
+</ul>
+</div>
+
+<div class="major" style="background: #d0d0ff;">
+<h2 id="LuaJIT-1.1.5">LuaJIT 1.1.5 &mdash; 2008-10-25</h2>
+<ul>
+<li>Merged with Lua 5.1.4. Fixes all
+<a href="http://www.lua.org/bugs.html#5.1.3"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.3</a>.</li>
+</ul>
+
+<h2 id="LuaJIT-1.1.4">LuaJIT 1.1.4 &mdash; 2008-02-05</h2>
+<ul>
+<li>Merged with Lua 5.1.3. Fixes all
+<a href="http://www.lua.org/bugs.html#5.1.2"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.2</a>.</li>
+<li>Fixed possible (but unlikely) stack corruption while compiling
+<tt>k^x</tt> expressions.</li>
+<li>Fixed DynASM template for cmpss instruction.</li>
+</ul>
+
+<h2 id="LuaJIT-1.1.3">LuaJIT 1.1.3 &mdash; 2007-05-24</h2>
+<ul>
+<li>Merged with Lua 5.1.2. Fixes all
+<a href="http://www.lua.org/bugs.html#5.1.1"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1.1</a>.</li>
+<li>Merged pending Lua 5.1.x fixes: "return -nil" bug, spurious count hook call.</li>
+<li>Remove a (sometimes) wrong assertion in <tt>luaJIT_findpc()</tt>.</li>
+<li>DynASM now allows labels for displacements and <tt>.aword</tt>.</li>
+<li>Fix some compiler warnings for DynASM glue (internal API change).</li>
+<li>Correct naming for SSSE3 (temporarily known as SSE4) in DynASM and x86 disassembler.</li>
+<li>The loadable debug modules now handle redirection to stdout
+(e.g. <tt>-j&nbsp;trace=-</tt>).</li>
+</ul>
+
+<h2 id="LuaJIT-1.1.2">LuaJIT 1.1.2 &mdash; 2006-06-24</h2>
+<ul>
+<li>Fix MSVC inline assembly: use only local variables with
+<tt>lua_number2int()</tt>.</li>
+<li>Fix "attempt to call a thread value" bug on Mac OS X:
+make values of consts used as lightuserdata keys unique
+to avoid joining by the compiler/linker.</li>
+</ul>
+
+<h2 id="LuaJIT-1.1.1">LuaJIT 1.1.1 &mdash; 2006-06-20</h2>
+<ul>
+<li>Merged with Lua 5.1.1. Fixes all
+<a href="http://www.lua.org/bugs.html#5.1"><span class="ext">&raquo;</span>&nbsp;known bugs in Lua 5.1</a>.</li>
+<li>Enforce (dynamic) linker error for EXE/DLL version mismatches.</li>
+<li>Minor changes to DynASM: faster preprocessing, smaller encoding
+for some immediates.</li>
+</ul>
+<p>
+This release is in sync with Coco 1.1.1 (see the
+<a href="http://coco.luajit.org/changes.html"><span class="ext">&raquo;</span>&nbsp;Coco Change History</a>).
+</p>
+
+<h2 id="LuaJIT-1.1.0">LuaJIT 1.1.0 &mdash; 2006-03-13</h2>
+<ul>
+<li>Merged with Lua 5.1 (final).</li>
+
+<li>New JIT call frame setup:
+<ul>
+<li>The C stack is kept 16 byte aligned (faster).
+Mandatory for Mac OS X on Intel, too.</li>
+<li>Faster calling conventions for internal C helper functions.</li>
+<li>Better instruction scheduling for function prologue, OP_CALL and
+OP_RETURN.</li>
+</ul></li>
+
+<li>Miscellaneous optimizations:
+<ul>
+<li>Faster loads of FP constants. Remove narrow-to-wide store-to-load
+forwarding stalls.</li>
+<li>Use (scalar) SSE2 ops (if the CPU supports it) to speed up slot moves
+and FP to integer conversions.</li>
+<li>Optimized the two-argument form of <tt>OP_CONCAT</tt> (<tt>a..b</tt>).</li>
+<li>Inlined <tt>OP_MOD</tt> (<tt>a%b</tt>).
+With better accuracy than the C variant, too.</li>
+<li>Inlined <tt>OP_POW</tt> (<tt>a^b</tt>). Unroll <tt>x^k</tt> or
+use <tt>k^x = 2^(log2(k)*x)</tt> or call <tt>pow()</tt>.</li>
+</ul></li>
+
+<li>Changes in the optimizer:
+<ul>
+<li>Improved hinting for table keys derived from table values
+(<tt>t1[t2[x]]</tt>).</li>
+<li>Lookup hinting now works with arbitrary object types and
+supports index chains, too.</li>
+<li>Generate type hints for arithmetic and comparison operators,
+OP_LEN, OP_CONCAT and OP_FORPREP.</li>
+<li>Remove several hint definitions in favour of a generic COMBINE hint.</li>
+<li>Complete rewrite of <tt>jit.opt_inline</tt> module
+(ex <tt>jit.opt_lib</tt>).</li>
+</ul></li>
+
+<li>Use adaptive deoptimization:
+<ul>
+<li>If runtime verification of a contract fails, the affected
+instruction is recompiled and patched on-the-fly.
+Regular programs will trigger deoptimization only occasionally.</li>
+<li>This avoids generating code for uncommon fallback cases
+most of the time. Generated code is up to 30% smaller compared to
+LuaJIT&nbsp;1.0.3.</li>
+<li>Deoptimization is used for many opcodes and contracts:
+<ul>
+<li>OP_CALL, OP_TAILCALL: type mismatch for callable.</li>
+<li>Inlined calls: closure mismatch, parameter number and type mismatches.</li>
+<li>OP_GETTABLE, OP_SETTABLE: table or key type and range mismatches.</li>
+<li>All arithmetic and comparison operators, OP_LEN, OP_CONCAT,
+OP_FORPREP: operand type and range mismatches.</li>
+</ul></li>
+<li>Complete redesign of the debug and traceback info
+(bytecode &harr; mcode) to support deoptimization.
+Much more flexible and needs only 50% of the space.</li>
+<li>The modules <tt>jit.trace</tt>, <tt>jit.dumphints</tt> and
+<tt>jit.dump</tt> handle deoptimization.</li>
+</ul></li>
+
+<li>Inlined many popular library functions
+(for commonly used arguments only):
+<ul>
+<li>Most <tt>math.*</tt> functions (the 18 most used ones)
+[2x-10x faster].</li>
+<li><tt>string.len</tt>, <tt>string.sub</tt> and <tt>string.char</tt>
+[2x-10x faster].</li>
+<li><tt>table.insert</tt>, <tt>table.remove</tt> and <tt>table.getn</tt>
+[3x-5x faster].</li>
+<li><tt>coroutine.yield</tt> and <tt>coroutine.resume</tt>
+[3x-5x faster].</li>
+<li><tt>pairs</tt>, <tt>ipairs</tt> and the corresponding iterators
+[8x-15x faster].</li>
+</ul></li>
+
+<li>Changes in the core and loadable modules and the stand-alone executable:
+<ul>
+<li>Added <tt>jit.version</tt>, <tt>jit.version_num</tt>
+and <tt>jit.arch</tt>.</li>
+<li>Reorganized some internal API functions (<tt>jit.util.*mcode*</tt>).</li>
+<li>The <tt>-j dump</tt> output now shows JSUB names, too.</li>
+<li>New x86 disassembler module written in pure Lua. No dependency
+on ndisasm anymore. Flexible API, very compact (500 lines)
+and complete (x87, MMX, SSE, SSE2, SSE3, SSSE3, privileged instructions).</li>
+<li><tt>luajit -v</tt> prints the LuaJIT version and copyright
+on a separate line.</li>
+</ul></li>
+
+<li>Added SSE, SSE2, SSE3 and SSSE3 support to DynASM.</li>
+<li>Miscellaneous doc changes. Added a section about
+<a href="luajit_install.html#embedding">embedding LuaJIT</a>.</li>
+</ul>
+<p>
+This release is in sync with Coco 1.1.0 (see the
+<a href="http://coco.luajit.org/changes.html"><span class="ext">&raquo;</span>&nbsp;Coco Change History</a>).
+</p>
+</div>
+
+<div class="major" style="background: #ffffd0;">
+<h2 id="LuaJIT-1.0.3">LuaJIT 1.0.3 &mdash; 2005-09-08</h2>
+<ul>
+<li>Even more docs.</li>
+<li>Unified closure checks in <tt>jit.*</tt>.</li>
+<li>Fixed some range checks in <tt>jit.util.*</tt>.</li>
+<li>Fixed __newindex call originating from <tt>jit_settable_str()</tt>.</li>
+<li>Merged with Lua 5.1 alpha (including early bugfixes).</li>
+</ul>
+<p>
+This is the first public release of LuaJIT.
+</p>
+
+<h2 id="LuaJIT-1.0.2">LuaJIT 1.0.2 &mdash; 2005-09-02</h2>
+<ul>
+<li>Add support for flushing the Valgrind translation cache <br>
+(<tt>MYCFLAGS= -DUSE_VALGRIND</tt>).</li>
+<li>Add support for freeing executable mcode memory to the <tt>mmap()</tt>-based
+variant for POSIX systems.</li>
+<li>Reorganized the C&nbsp;function signature handling in
+<tt>jit.opt_lib</tt>.</li>
+<li>Changed to index-based hints for inlining C&nbsp;functions.
+Still no support in the backend for inlining.</li>
+<li>Hardcode <tt>HEAP_CREATE_ENABLE_EXECUTE</tt> value if undefined.</li>
+<li>Misc. changes to the <tt>jit.*</tt> modules.</li>
+<li>Misc. changes to the Makefiles.</li>
+<li>Lots of new docs.</li>
+<li>Complete doc reorg.</li>
+</ul>
+<p>
+Not released because Lua 5.1 alpha came out today.
+</p>
+
+<h2 id="LuaJIT-1.0.1">LuaJIT 1.0.1 &mdash; 2005-08-31</h2>
+<ul>
+<li>Missing GC step in <tt>OP_CONCAT</tt>.</li>
+<li>Fix result handling for C &ndash;> JIT calls.</li>
+<li>Detect CPU feature bits.</li>
+<li>Encode conditional moves (<tt>fucomip</tt>) only when supported.</li>
+<li>Add fallback instructions for FP compares.</li>
+<li>Add support for <tt>LUA_COMPAT_VARARG</tt>. Still disabled by default.</li>
+<li>MSVC needs a specific place for the <tt>CALLBACK</tt> attribute
+(David Burgess).</li>
+<li>Misc. doc updates.</li>
+</ul>
+<p>
+Interim non-public release.
+Special thanks to Adam D. Moss for reporting most of the bugs.
+</p>
+
+<h2 id="LuaJIT-1.0.0">LuaJIT 1.0.0 &mdash; 2005-08-29</h2>
+<p>
+This is the initial non-public release of LuaJIT.
+</p>
+</div>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2009 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>
diff --git a/doc/contact.html b/doc/contact.html
new file mode 100644
index 00000000..36d5a825
--- /dev/null
+++ b/doc/contact.html
@@ -0,0 +1,84 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>Contact</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>Contact</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li><li>
+<a href="api.html">API Extensions</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+Please send general questions to the
+<a href="http://www.lua.org/lua-l.html"><span class="ext">&raquo;</span>&nbsp;Lua mailing list</a>.
+You can also send any questions you have directly to me:
+</p>
+
+<script type="text/javascript">
+<!--
+var xS="@-: .0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZa<b>cdefghijklmnopqrstuvwxyz"
+function xD(s)
+{var len=s.length;var r="";for(var i=0;i<len;i++)
+{var c=s.charAt(i);var n=xS.indexOf(c);if(n!=-1)
+c=xS.charAt(66-n);r+=c;}
+document.write("<"+"p>"+r+"<"+"/p>\n");}
+//-->
+</script>
+<script type="text/javascript">
+<!--
+xD("ewYKA7vu-EIwslx7 K9A.t41C")
+//--></script>
+<noscript>
+<p><img src="img/contact.png" alt="Contact info in image" width="170" height="13">
+</p>
+</noscript>
+
+<h2>Copyright</h2>
+<p>
+All documentation is
+Copyright &copy; 2005-2009 Mike Pall.
+</p>
+
+
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2009 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>
diff --git a/doc/faq.html b/doc/faq.html
new file mode 100644
index 00000000..6f62e1eb
--- /dev/null
+++ b/doc/faq.html
@@ -0,0 +1,141 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>Frequently Asked Questions (FAQ)</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+<style type="text/css">
+dd { margin-left: 1.5em; }
+</style>
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>Frequently Asked Questions (FAQ)</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li><li>
+<a href="api.html">API Extensions</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a class="current" href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<dl>
+<dt>Q: Where can I learn more about Lua and LuaJIT?</dt>
+<dd>
+<ul style="padding: 0;">
+<li>The <a href="http://lua.org"><span class="ext">&raquo;</span>&nbsp;main Lua.org site</a> has complete
+<a href="http://www.lua.org/docs.html"><span class="ext">&raquo;</span>&nbsp;documentation</a> of the language
+and links to books and papers about Lua.</li>
+<li>The community-managed <a href="http://lua-users.org/wiki/"><span class="ext">&raquo;</span>&nbsp;Lua Wiki</a>
+has information about diverse topics.</li>
+<li>The primary source of information for the latest developments surrounding
+Lua is the <a href="http://www.lua.org/lua-l.html"><span class="ext">&raquo;</span>&nbsp;Lua mailing list</a>.
+You can check out the <a href="http://lua-users.org/lists/lua-l/"><span class="ext">&raquo;</span>&nbsp;mailing
+list archive</a> or
+<a href="http://bazar2.conectiva.com.br/mailman/listinfo/lua"><span class="ext">&raquo;</span>&nbsp;subscribe</a>
+to the list (you need to be subscribed before posting).<br>
+This is also the place where announcements and discussions about LuaJIT
+take place.</li>
+</ul>
+</dl>
+
+<dl>
+<dt>Q: Where can I learn more about the compiler technology used by LuaJIT?</dt>
+<dd>
+I'm planning to write more documentation about the internals of LuaJIT.
+In the meantime, please use the following Google Scholar searches
+to find relevant papers:<br>
+Search for: <a href="http://scholar.google.com/scholar?q=Trace+Compiler"><span class="ext">&raquo;</span>&nbsp;Trace Compiler</a><br>
+Search for: <a href="http://scholar.google.com/scholar?q=JIT+Compiler"><span class="ext">&raquo;</span>&nbsp;JIT Compiler</a><br>
+Search for: <a href="http://scholar.google.com/scholar?q=Dynamic+Language+Optimizations"><span class="ext">&raquo;</span>&nbsp;Dynamic Language Optimizations</a><br>
+Search for: <a href="http://scholar.google.com/scholar?q=SSA+Form"><span class="ext">&raquo;</span>&nbsp;SSA Form</a><br>
+Search for: <a href="http://scholar.google.com/scholar?q=Linear+Scan+Register+Allocation"><span class="ext">&raquo;</span>&nbsp;Linear Scan Register Allocation</a><br>
+And, you know, reading the source is of course the only way to enlightenment. :-)
+</dd>
+</dl>
+
+<dl>
+<dt>Q: Why do I get this error: "attempt to index global 'arg' (a nil value)"?<br>
+Q: My vararg functions fail after switching to LuaJIT!</dt>
+<dd>LuaJIT is compatible to the Lua 5.1 language standard. It doesn't
+support the implicit <tt>arg</tt> parameter for old-style vararg
+functions from Lua 5.0.<br>Please convert your code to the
+<a href="http://www.lua.org/manual/5.1/manual.html#2.5.9"><span class="ext">&raquo;</span>&nbsp;Lua 5.1
+vararg syntax</a>.</dd>
+</dl>
+
+<dl>
+<dt>Q: Sometimes Ctrl-C fails to stop my Lua program. Why?</dt>
+<dd>The interrupt signal handler sets a Lua debug hook. But this is
+currently ignored by compiled code (this will eventually be fixed). If
+your program is running in a tight loop and never falls back to the
+interpreter, the debug hook never runs and can't throw the
+"interrupted!" error.<br> In the meantime you have to press Ctrl-C
+twice to get stop your program. That's similar to when it's stuck
+running inside a C function under the Lua interpreter.</dd>
+</dl>
+
+<dl>
+<dt>Q: Why doesn't my favorite power-patch for Lua apply against LuaJIT?</dt>
+<dd>Because it's a completely redesigned VM and has very little code
+in common with Lua anymore. Also, if the patch introduces changes to
+the Lua semantics, this would need to be reflected everywhere in the
+VM, from the interpreter up to all stages of the compiler.<br> Please
+use only standard Lua language constructs. For many common needs you
+can use source transformations or use wrapper or proxy functions.
+The compiler will happily optimize away such indirections.</dd>
+</dl>
+
+<dl>
+<dt>Q: Lua runs everywhere. Why doesn't LuaJIT support my CPU?</dt>
+<dd>Because it's a compiler &mdash; it needs to generate native
+machine code. This means the code generator must be ported to each
+architecture. And the fast interpreter is written in assembler and
+must be ported, too. This is quite an undertaking.<br> Currently only
+x86 CPUs are supported. x64 support is in the works. Other
+architectures will follow with sufficient demand and/or
+sponsoring.</dd>
+</dl>
+
+<dl>
+<dt>Q: When will feature X be added? When will the next version be released?</dt>
+<dd>When it's ready.<br>
+C'mon, it's open source &mdash; I'm doing it on my own time and you're
+getting it for free. You can either contribute a patch or sponsor
+the development of certain features, if they are important to you.
+</dd>
+</dl>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2009 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>
diff --git a/doc/img/contact.png b/doc/img/contact.png
new file mode 100644
index 00000000..9c73dc59
--- /dev/null
+++ b/doc/img/contact.png
Binary files differ
diff --git a/doc/install.html b/doc/install.html
new file mode 100644
index 00000000..b7211d21
--- /dev/null
+++ b/doc/install.html
@@ -0,0 +1,216 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>Installation</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>Installation</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a class="current" href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li><li>
+<a href="api.html">API Extensions</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+LuaJIT is only distributed as a source package. This page explains
+how to build and install LuaJIT with different operating systems
+and C&nbsp;compilers.
+</p>
+<p>
+For the impatient (on POSIX systems):
+</p>
+<pre class="code">
+make &amp;&amp; sudo make install
+</pre>
+<p>
+LuaJIT currently builds out-of-the box on all popular x86 systems
+(Linux, Windows, OSX etc.). It builds and runs fine as a 32&nbsp;bit
+application under x64-based systems, too.
+</p>
+
+<h2>Configuring LuaJIT</h2>
+<p>
+The standard configuration should work fine for most installations.
+Usually there is no need to tweak the settings, except when you want to
+install to a non-standard path. The following three files hold all
+user-configurable settings:
+</p>
+<ul>
+<li><tt>src/luaconf.h</tt> sets some configuration variables, in
+particular the default paths for loading modules.</li>
+<li><tt>Makefile</tt> has settings for installing LuaJIT (POSIX
+only).</li>
+<li><tt>src/Makefile</tt> has settings for compiling LuaJIT under POSIX,
+MinGW and Cygwin.</li>
+<li><tt>src/msvcbuild.bat</tt> has settings for compiling LuaJIT with
+MSVC.</li>
+</ul>
+<p>
+Please read the instructions given in these files, before changing
+any settings.
+</p>
+
+<h2 id="posix">POSIX Systems (Linux, OSX, *BSD etc.)</h2>
+<h3>Prerequisites</h3>
+<p>
+Depending on your distribution, you may need to install a package for
+GCC (GCC 3.4 or later required), the development headers and/or a
+complete SDK.
+</p>
+<p>
+E.g. on a current Debian/Ubuntu, install <tt>libc6-dev</tt>
+with the package manager. Currently LuaJIT only builds as a 32&nbsp;bit
+application, so you actually need to install <tt>libc6-dev-i386</tt>
+when building on an x64 OS.
+</p>
+<p>
+Download the current source package (pick the .tar.gz), if you haven't
+already done so. Move it to a directory of your choice, open a
+terminal window and change to this directory. Now unpack the archive
+and change to the newly created directory:
+</p>
+<pre class="code">
+tar zxf LuaJIT-2.0.0-beta1.tar.gz
+cd LuaJIT-2.0.0-beta1
+</pre>
+<h3>Building LuaJIT</h3>
+<p>
+The supplied Makefiles try to auto-detect the settings needed for your
+operating system and your compiler. They need to be run with GNU Make,
+which is probably the default on your system, anyway. Simply run:
+</p>
+<pre class="code">
+make
+</pre>
+<h3>Installing LuaJIT</h3>
+<p>
+The top-level Makefile installs LuaJIT by default under
+<tt>/usr/local</tt>, i.e. the executable ends up in
+<tt>/usr/local/bin</tt> and so on. You need to have root privileges
+to write to this path. So, assuming sudo is installed on your system,
+run the following command and enter your sudo password:
+</p>
+<pre class="code">
+sudo make install
+</pre>
+<p>
+Otherwise specify the directory prefix as an absolute path, e.g.:
+</p>
+<pre class="code">
+sudo make install PREFIX=/opt/lj2
+</pre>
+<p>
+But note that the installation prefix and the prefix for the module paths
+(configured in <tt>src/luaconf.h</tt>) must match.
+</p>
+<p style="color: #c00000;">
+Note: to avoid overwriting a previous version, the beta test releases
+only install the LuaJIT executable under the versioned name (i.e.
+<tt>luajit-2.0.0-beta1</tt>). You probably want to create a symlink
+for convenience, with a command like this:
+</p>
+<pre class="code" style="color: #c00000;">
+sudo ln -sf luajit-2.0.0-beta1 /usr/local/bin/luajit
+</pre>
+
+<h2 id="windows">Windows Systems</h2>
+<h3>Prerequisites</h3>
+<p>
+Either install one of the open source SDKs
+(<a href="http://mingw.org/"><span class="ext">&raquo;</span>&nbsp;MinGW</a> or
+<a href="http://www.cygwin.com/"><span class="ext">&raquo;</span>&nbsp;Cygwin</a>) which come with modified
+versions of GCC plus the required development headers.
+</p>
+<p>
+Or install Microsoft's Visual C++ (MSVC) &mdash; the freely downloadable
+<a href="http://www.microsoft.com/Express/VC/"><span class="ext">&raquo;</span>&nbsp;Express Edition</a>
+works just fine.
+</p>
+<p>
+Next, download the source package and unpack it using an archive manager
+(e.g. the Windows Explorer) to a directory of your choice.
+</p>
+<h3>Building with MSVC</h3>
+<p>
+Open a "Visual Studio .NET Command Prompt" and <tt>cd</tt> to the
+directory where you've unpacked the sources. Then run this command:
+</p>
+<pre class="code">
+cd src
+msvcbuild
+</pre>
+<p>
+Then follow the installation instructions below.
+</p>
+<h3>Building with MinGW or Cygwin</h3>
+<p>
+Open a command prompt window and make sure the MinGW or Cygwin programs
+are in your path. Then <tt>cd</tt> to the directory where
+you've unpacked the sources and run this command for MinGW:
+</p>
+<pre class="code">
+cd src
+mingw32-make
+</pre>
+<p>
+Or this command for Cygwin:
+</p>
+<pre class="code">
+cd src
+make
+</pre>
+<p>
+Then follow the installation instructions below.
+</p>
+<h3>Installing LuaJIT</h3>
+<p>
+Copy <tt>luajit.exe</tt> and <tt>lua51.dll</tt>
+to a newly created directory (any location is ok). Add <tt>lua</tt>
+and <tt>lua\jit</tt> directories below it and copy all Lua files
+from the <tt>lib</tt> directory of the distribution to the latter directory.
+</p>
+<p>
+There are no hardcoded
+absolute path names &mdash; all modules are loaded relative to the
+directory where <tt>luajit.exe</tt> is installed
+(see <tt>src/luaconf.h</tt>).
+</p>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2009 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>
diff --git a/doc/luajit.html b/doc/luajit.html
new file mode 100644
index 00000000..9b16ea37
--- /dev/null
+++ b/doc/luajit.html
@@ -0,0 +1,120 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>LuaJIT</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>LuaJIT</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a class="current" href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li><li>
+<a href="api.html">API Extensions</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+LuaJIT is a <b>Just-In-Time Compiler</b> for the Lua<sup>*</sup>
+programming language.
+</p>
+<p>
+LuaJIT is Copyright &copy; 2005-2008 Mike Pall.
+LuaJIT is open source software, released under the
+<a href="http://www.opensource.org/licenses/mit-license.php"><span class="ext">&raquo;</span>&nbsp;MIT/X license</a>.
+</p>
+<p class="indent" style="color: #606060;">
+* Lua is a powerful, dynamic and light-weight programming language
+designed for extending applications. Lua is also frequently used as a
+general-purpose, stand-alone language. More information about
+Lua can be found at: <a href="http://www.lua.org/"><span class="ext">&raquo;</span>&nbsp;http://www.lua.org/</a>
+</p>
+<h2>Compatibility</h2>
+<p>
+LuaJIT implements the full set of language features defined by Lua 5.1.
+The virtual machine (VM) is <b>API- and ABI-compatible</b> to the
+standard Lua interpreter and can be deployed as a drop-in replacement.
+</p>
+<p>
+LuaJIT offers more performance, at the expense of portability. It
+currently runs on all popular operating systems based on <b>x86 CPUs</b>
+(Linux, Windows, OSX etc.). It will be ported to x64 CPUs and other
+platforms in the future, based on user demand and sponsoring.
+</p>
+
+<h2>Overview</h2>
+<p>
+LuaJIT has been successfully used as a <b>scripting middleware</b> in
+games, 3D modellers, numerical simulations, trading platforms and many
+other specialty applications. It combines high flexibility with high
+performance and an unmatched <b>low memory footprint</b>: less than
+<b>120K</b> for the VM plus less than <b>80K</b> for the JIT compiler.
+</p>
+<p>
+LuaJIT has been in continuous development since 2005. It's widely
+considered to be <b>one of the fastest dynamic language
+implementations</b>. It has outperfomed other dynamic languages on many
+cross-language benchmarks since its first release &mdash; often by a
+substantial margin. Only now, in 2009, other dynamic language VMs are
+starting to catch up with the performance of LuaJIT 1.x &hellip;
+</p>
+<p>
+2009 also marks the first release of the long-awaited <b>LuaJIT 2.0</b>.
+The whole VM has been rewritten from the ground up and relentlessly
+optimized for performance. It combines a high-speed interpreter,
+written in assembler, with a state-of-the-art JIT compiler.
+</p>
+<p>
+An innovative <b>trace compiler</b> is integrated with advanced,
+SSA-based optimizations and a highly tuned code generation backend. This
+allows a substantial reduction of the overhead associated with dynamic
+language features. It's destined to break into the performance range
+traditionally reserved for offline, static language compilers.
+</p>
+
+<h2>More ...</h2>
+<p>
+Click on the LuaJIT sub-topics in the navigation bar to learn more
+about LuaJIT.
+</p>
+<p><p>
+Click on the Logo in the upper left corner to visit
+the LuaJIT project page on the web. All other links to online
+resources are marked with a '<span class="ext">&raquo;</span>'.
+</p>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2009 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>
diff --git a/doc/running.html b/doc/running.html
new file mode 100644
index 00000000..db69578c
--- /dev/null
+++ b/doc/running.html
@@ -0,0 +1,233 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>Running LuaJIT</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+<style type="text/css">
+table.opt {
+ line-height: 1.2;
+}
+tr.opthead td {
+ font-weight: bold;
+}
+td.flag_name {
+ width: 4em;
+}
+td.flag_level {
+ width: 2em;
+ text-align: center;
+}
+td.param_name {
+ width: 6em;
+}
+td.param_default {
+ width: 4em;
+ text-align: right;
+}
+</style>
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>Running LuaJIT</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a class="current" href="running.html">Running</a>
+</li><li>
+<a href="api.html">API Extensions</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+LuaJIT has only a single stand-alone executable, called <tt>luajit</tt> on
+POSIX systems or <tt>luajit.exe</tt> on Windows. It can be used to run simple
+Lua statements or whole Lua applications from the command line. It has an
+interactive mode, too.
+</p>
+<p class="indent" style="color: #c00000;">
+Note: the beta test releases only install under the versioned name on
+POSIX systems (to avoid overwriting a previous version). You either need
+to type <tt>luajit-2.0.0-beta1</tt> to start it or create a symlink
+with a command like this:
+</p>
+<pre class="code" style="color: #c00000;">
+sudo ln -sf luajit-2.0.0-beta1 /usr/local/bin/luajit
+</pre>
+<p>
+Unlike previous versions <b>optimization is turned on by default</b> in
+LuaJIT 2.0!<br>It's no longer necessary to use <tt>luajit&nbsp;-O</tt>.
+</p>
+
+<h2 id="options">Command Line Options</h2>
+<p>
+The <tt>luajit</tt> stand-alone executable is just a slightly modified
+version of the regular <tt>lua</tt> stand-alone executable.
+It supports the same basic options, too. <tt>luajit&nbsp;-h</tt>
+prints a short list of the available options. Please have a look at the
+<a href="http://www.lua.org/manual/5.1/manual.html#6"><span class="ext">&raquo;</span>&nbsp;Lua manual</a>
+for details.
+</p>
+<p>
+Two additional options control the behavior of LuaJIT:
+</p>
+
+<h3 id="opt_j"><tt>-j cmd[=arg[,arg...]]</tt></h3>
+<p>
+This option performs a LuaJIT control command or activates one of the
+loadable extension modules. The command is first looked up in the
+<tt>jit.*</tt> library. If no matching function is found, a module
+named <tt>jit.&lt;cmd&gt;</tt> is loaded and the <tt>start()</tt>
+function of the module is called with the specified arguments (if
+any). The space between <tt>-j</tt> and <tt>cmd</tt> is optional.
+</p>
+<p>
+Here are the available LuaJIT control commands:
+</p>
+<ul>
+<li id="j_on"><tt>-jon</tt> &mdash; Turns the JIT compiler on (default).</li>
+<li id="j_off"><tt>-joff</tt> &mdash; Turns the JIT compiler off (only use the interpreter).</li>
+<li id="j_flush"><tt>-jflush</tt> &mdash; Flushes the whole cache of compiled code.</li>
+<li id="j_v"><tt>-jv</tt> &mdash; Shows verbose information about the progress of the JIT compiler.</li>
+<li id="j_dump"><tt>-jdump</tt> &mdash; Dumps the code and structures used in various compiler stages.</li>
+</ul>
+<p>
+The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules
+written in Lua. They are mainly used for debugging the JIT compiler
+itself. For a description of their options and output format, please
+read the comment block at the start of their source.
+They can be found in the <tt>lib</tt> directory of the source
+distribution or installed under the <tt>jit</tt> directory. By default
+this is <tt>/usr/local/share/luajit-2.0.0-beta1/jit</tt> on POSIX
+systems.
+</p>
+
+<h3 id="opt_O"><tt>-O[level]</tt><br>
+<tt>-O[+]flag</tt> <tt>-O-flag</tt><br>
+<tt>-Oparam=value</tt></h3>
+<p>
+This options allows fine-tuned control of the optimizations used by
+the JIT compiler. This is mainly intended for debugging LuaJIT itself.
+Please note that the JIT compiler is extremly fast (we are talking
+about the microsecond to millisecond range). Disabling optimizations
+doesn't have any visible impact on its overhead, but usually generates
+code that runs slower.
+</p>
+<p>
+The first form sets an optimization level &mdash; this enables a
+specific mix of optimization flags. <tt>-O0</tt> turns off all
+optimizations and higher numbers enable more optimizations. Omitting
+the level (i.e. just <tt>-O</tt>) sets the default optimization level,
+which is <tt>-O3</tt> in the current version.
+</p>
+<p>
+The second form adds or removes individual optimization flags.
+The third form sets a parameter for the VM or the JIT compiler
+to a specific value.
+</p>
+<p>
+You can either use this option multiple times (like <tt>-Ocse
+-O-dce -Ohotloop=10</tt>) or separate several settings with a comma
+(like <tt>-O+cse,-dce,hotloop=10</tt>). The settings are applied from
+left to right and later settings override earlier ones. You can freely
+mix the three forms, but note that setting an optimization level
+overrides all earlier flags.
+</p>
+<p>
+Here are the available flags and at what optimization levels they
+are enabled:
+</p>
+<table class="opt">
+<tr class="opthead">
+<td class="flag_name">Flag</td>
+<td class="flag_level">-O1</td>
+<td class="flag_level">-O2</td>
+<td class="flag_level">-O3</td>
+<td class="flag_desc">&nbsp;</td>
+</tr>
+<tr class="odd separate">
+<td class="flag_name">fold</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_desc">Constant Folding, Simplifications and Reassociation</td></tr>
+<tr class="even">
+<td class="flag_name">cse</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_desc">Common-Subexpression Elimination</td></tr>
+<tr class="odd">
+<td class="flag_name">dce</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_desc">Dead-Code Elimination</td></tr>
+<tr class="even">
+<td class="flag_name">narrow</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_desc">Narrowing of numbers to integers</td></tr>
+<tr class="odd">
+<td class="flag_name">loop</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_level">&bull;</td><td class="flag_desc">Loop Optimizations (code hoisting)</td></tr>
+<tr class="even">
+<td class="flag_name">fwd</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_desc">Load Forwarding (L2L) and Store Forwarding (S2L)</td></tr>
+<tr class="odd">
+<td class="flag_name">dse</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_desc">Dead-Store Elimination</td></tr>
+<tr class="even">
+<td class="flag_name">fuse</td><td class="flag_level">&nbsp;</td><td class="flag_level">&nbsp;</td><td class="flag_level">&bull;</td><td class="flag_desc">Fusion of operands into instructions</td></tr>
+</table>
+<p>
+Here are the parameters and their default settings:
+</p>
+<table class="opt">
+<tr class="opthead">
+<td class="param_name">Parameter</td>
+<td class="param_default">Default</td>
+<td class="param_desc">&nbsp;</td>
+</tr>
+<tr class="odd separate">
+<td class="param_name">maxtrace</td><td class="param_default">1000</td><td class="param_desc">Max. number of traces in the cache</td></tr>
+<tr class="even">
+<td class="param_name">maxrecord</td><td class="param_default">2000</td><td class="param_desc">Max. number of recorded IR instructions</td></tr>
+<tr class="odd">
+<td class="param_name">maxirconst</td><td class="param_default">500</td><td class="param_desc">Max. number of IR constants of a trace</td></tr>
+<tr class="even">
+<td class="param_name">maxside</td><td class="param_default">100</td><td class="param_desc">Max. number of side traces of a root trace</td></tr>
+<tr class="odd">
+<td class="param_name">maxsnap</td><td class="param_default">100</td><td class="param_desc">Max. number of snapshots for a trace</td></tr>
+<tr class="even separate">
+<td class="param_name">hotloop</td><td class="param_default">57</td><td class="param_desc">Number of iterations to detect a hot loop</td></tr>
+<tr class="odd">
+<td class="param_name">hotexit</td><td class="param_default">10</td><td class="param_desc">Number of taken exits to start a side trace</td></tr>
+<tr class="even">
+<td class="param_name">tryside</td><td class="param_default">4</td><td class="param_desc">Number of attempts to compile a side trace</td></tr>
+<tr class="odd separate">
+<td class="param_name">instunroll</td><td class="param_default">4</td><td class="param_desc">Max. unroll factor for instable loops</td></tr>
+<tr class="even">
+<td class="param_name">loopunroll</td><td class="param_default">7</td><td class="param_desc">Max. unroll factor for loop ops in side traces</td></tr>
+<tr class="odd">
+<td class="param_name">callunroll</td><td class="param_default">3</td><td class="param_desc">Max. unroll factor for pseudo-recursive calls</td></tr>
+<tr class="even separate">
+<td class="param_name">sizemcode</td><td class="param_default">32</td><td class="param_desc">Size of each machine code area in KBytes (Windows: 64K)</td></tr>
+<tr class="odd">
+<td class="param_name">maxmcode</td><td class="param_default">512</td><td class="param_desc">Max. total size of all machine code areas in KBytes</td></tr>
+</table>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2009 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>
diff --git a/doc/status.html b/doc/status.html
new file mode 100644
index 00000000..23c14c76
--- /dev/null
+++ b/doc/status.html
@@ -0,0 +1,235 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>Status &amp; Roadmap</title>
+<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta name="Author" content="Mike Pall">
+<meta name="Copyright" content="Copyright (C) 2005-2009, Mike Pall">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+<style type="text/css">
+ul li { padding-bottom: 0.3em; }
+</style>
+</head>
+<body>
+<div id="site">
+<a href="http://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>Status &amp; Roadmap</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li><li>
+<a href="api.html">API Extensions</a>
+</li></ul>
+</li><li>
+<a class="current" href="status.html">Status</a>
+<ul><li>
+<a href="changes.html">Changes</a>
+</li></ul>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+The <span style="color: #0000c0;">LuaJIT 1.x</span> series represents
+the current <span style="color: #0000c0;">stable branch</span>. As of
+this writing there have been no open bugs since about a year. So, if
+you need a rock-solid VM, you are encouraged to fetch the latest
+release of LuaJIT 1.x from the <a href="http://luajit.org/download.html"><span class="ext">&raquo;</span>&nbsp;Download</a>
+page.
+</p>
+<p>
+<span style="color: #c00000;">LuaJIT 2.0</span> is the currently active
+<span style="color: #c00000;">development branch</span>.
+It has <b>Beta Test</b> status and is still undergoing
+substantial changes. It's expected to quickly mature within the next
+months. You should definitely start to evaluate it for new projects
+right now. But deploying it in production environments is not yet
+recommended.
+</p>
+
+<h2>Current Status</h2>
+<p>
+This is a list of the things you should know about the LuaJIT 2.0 beta test:
+</p>
+<ul>
+<li>
+The JIT compiler can only generate code for CPUs with <b>SSE2</b> at the
+moment. I.e. you need at least a P4, Core 2/i5/i7 or K8/K10 to use it. I
+plan to fix this during the beta phase and add support for emitting x87
+instructions to the backend.
+</li>
+<li>
+Obviously there will be many <b>bugs</b> in a VM which has been
+rewritten from the ground up. Please report your findings together with
+the circumstances needed to reproduce the bug. If possible reduce the
+problem down to a simple test cases.<br>
+There is no formal bug tracker at the moment. The best place for
+discussion is the
+<a href="http://www.lua.org/lua-l.html"><span class="ext">&raquo;</span>&nbsp;Lua mailing list</a>. Of course
+you may also send your bug report directly to me, especially when they
+contains lengthy debug output. Please check the
+<a href="contact.html">Contact</a> page for details.
+</li>
+<li>
+The VM is complete in the sense that it <b>should</b> run all Lua code
+just fine. It's considered a serious bug if the VM crashes or produces
+unexpected results &mdash; please report it. There are only very few
+known incompatibilities with standard Lua:
+<ul>
+<li>
+The Lua <b>debug API</b> is missing a couple of features (call/return
+hooks) and shows slightly different behavior (no per-coroutine hooks).
+</li>
+<li>
+Most other issues you're likely to find (e.g. with the existing test
+suites) are differences in the <b>implementation-defined</b> behavior.
+These either have a good reason (like early tail call resolving which
+may cause differences in error reporting), are arbitrary design choices
+or are due to quirks in the VM. The latter cases may get fixed if a
+demonstrable need is shown.
+</li>
+</ul>
+</li>
+<li>
+The <b>JIT compiler</b> is not complete (yet) and falls back to the
+interpreter in some cases. All of this works transparently, so unless
+you use -jv, you'll probably never notice (the interpreter is quite
+fast, too). Here are the known issues:
+<ul>
+<li>
+Many known issues cause a <b>NYI</b> (not yet implemented) trace abort
+message. E.g. for calls to vararg functions or many string library
+functions. Reporting these is only mildly useful, except if you have good
+example code that shows the problem. Obviously, reports accompanied with
+a patch to fix the issue are more than welcome. But please check back
+with me, before writing major improvements, to avoid duplication of
+effort.
+</li>
+<li>
+<b>Recursion</b> is not traced yet. Often no trace will be generated at
+all or some unroll limit will catch it and aborts the trace.
+</li>
+<li>
+The trace compiler currently does not back off specialization for
+function call dispatch. It should really fall back to specializing on
+the prototype, not the closure identity. This can lead to the so-called
+"trace explosion" problem with <b>closure-heavy programming</b>. The
+trace linking heuristics prevent this, but in the worst case this
+means the code always falls back to the interpreter.
+</li>
+<li>
+<b>Trace management</b> needs more tuning: better blacklisting of aborted
+traces, less drastic countermeasures against trace explosion and better
+heuristics in general.
+</li>
+<li>
+Some checks are missing in the JIT-compiled code for obscure situations
+with <b>open upvalues aliasing</b> one of the SSA slots later on (or
+vice versa). Bonus points, if you can find a real world test case for
+this.
+</li>
+</ul>
+</li>
+</ul>
+
+<h2>Roadmap</h2>
+<p>
+Rather than stating exact release dates (I'm well known for making
+spectacularly wrong guesses), this roadmap lists the general project
+plan, sorted by priority, as well as ideas for the future:
+</p>
+<ul>
+<li>
+The main goal right now is to stabilize LuaJIT 2.0 and get it out of
+beta test. <b>Correctness</b> has priority over completeness. This
+implies the first stable release will certainly NOT compile every
+library function call and will fall back to the interpreter from time
+to time. This is perfectly ok, since it still executes all Lua code,
+just not at the highest possible speed.
+</li>
+<li>
+The next step is to get it to compile more library functions and handle
+more cases where the compiler currently bails out. This doesn't mean it
+will compile every corner case. It's much more important that it
+performs well in a majority of use cases. Every compiler has to make
+these trade-offs &mdash; <b>completeness</b> just cannot be the
+overriding goal for a low-footprint, low-overhead JIT compiler.
+</li>
+<li>
+More <b>optimizations</b> will be added in parallel to the last step on
+an as-needed basis. Array-bounds-check (ABC) removal, sinking of stores
+to aggregates and sinking of allocations are high on the list. Faster
+handling of NEWREF and better alias analysis are desirable, too. More
+complex optimizations with less pay-off, such as value-range-propagation
+(VRP) will have to wait.
+</li>
+<li>
+LuaJIT 2.0 has been designed with <b>portability</b> in mind.
+Nonetheless, it compiles to native code and needs to be adapted to each
+architecture. Porting the compiler backend is probably the easier task,
+but a key element of its design is the fast interpreter, written in
+machine-specific assembler.<br>
+The code base and the internal structures are already prepared for
+easier porting to 64 bit architectures. The most likely next target is a
+port to <b>x64</b>, but this will have to wait until the x86 port
+stabilizes. Other ports will follow &mdash; companies which are
+interested in sponsoring a port to a particular architecture, please
+<a href="contact.html">contact me</a>.
+</li>
+<li>
+There are some planned <b>structural improvements</b> to the compiler,
+like compressed snapshot maps or generic handling of calls to helper
+methods. These are of lesser importance, unless other developments
+elevate their priority.
+</li>
+<li>
+<b>Documentation</b> about the <b>internals</b> of LuaJIT is still sorely
+missing. Although the source code is included and is IMHO well
+commented, many basic design decisions are in need of an explanation.
+The rather un-traditional compiler architecture and the many highly
+optimized data structures are a barrier for outside participation in
+the development. Alas, as I've repeatedly stated, I'm better at
+writing code than papers and I'm not in need of any academical merits.
+Someday I will find the time for it. :-)
+</li>
+<li>
+Producing good code for unbiased branches is a key problem for trace
+compilers. This is the main cause for "trace explosion".
+<b>Hyperblock scheduling</b> promises to solve this nicely at the
+price of a major redesign of the compiler. This would also pave the
+way for emitting predicated instructions, which is a prerequisite
+for efficient <b>vectorization</b>.
+</li>
+<li>
+Currently Lua is missing a standard library for access to <b>structured
+binary data</b> and <b>arrays/buffers</b> holding low-level data types.
+Allowing calls to arbitrary C functions (<b>FFI</b>) would obviate the
+need to write manual bindings. A variety of extension modules is floating
+around, with different scope and capabilities. Alas, none of them has been
+designed with a JIT compiler in mind.
+</li>
+</ul>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2009 Mike Pall
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>
diff --git a/dynasm/dasm_proto.h b/dynasm/dasm_proto.h
new file mode 100644
index 00000000..94d9a9e2
--- /dev/null
+++ b/dynasm/dasm_proto.h
@@ -0,0 +1,69 @@
+/*
+** DynASM encoding engine prototypes.
+** Copyright (C) 2005-2009 Mike Pall. All rights reserved.
+** Released under the MIT/X license. See dynasm.lua for full copyright notice.
+*/
+
+#ifndef _DASM_PROTO_H
+#define _DASM_PROTO_H
+
+#include <stddef.h>
+#include <stdarg.h>
+
+#define DASM_IDENT "DynASM 1.2.1"
+#define DASM_VERSION 10201 /* 1.2.1 */
+
+#ifndef Dst_DECL
+#define Dst_DECL dasm_State *Dst
+#endif
+
+#ifndef Dst_GET
+#define Dst_GET (Dst)
+#endif
+
+#ifndef DASM_FDEF
+#define DASM_FDEF extern
+#endif
+
+
+/* Internal DynASM encoder state. */
+typedef struct dasm_State dasm_State;
+
+/* Action list type. */
+typedef const unsigned char *dasm_ActList;
+
+
+/* Initialize and free DynASM state. */
+DASM_FDEF void dasm_init(Dst_DECL, int maxsection);
+DASM_FDEF void dasm_free(Dst_DECL);
+
+/* Setup global array. Must be called before dasm_setup(). */
+DASM_FDEF void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl);
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+DASM_FDEF void dasm_growpc(Dst_DECL, unsigned int maxpc);
+
+/* Setup encoder. */
+DASM_FDEF void dasm_setup(Dst_DECL, dasm_ActList actionlist);
+
+/* Feed encoder with actions. Calls are generated by pre-processor. */
+DASM_FDEF void dasm_put(Dst_DECL, int start, ...);
+
+/* Link sections and return the resulting size. */
+DASM_FDEF int dasm_link(Dst_DECL, size_t *szp);
+
+/* Encode sections into buffer. */
+DASM_FDEF int dasm_encode(Dst_DECL, void *buffer);
+
+/* Get PC label offset. */
+DASM_FDEF int dasm_getpclabel(Dst_DECL, unsigned int pc);
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+DASM_FDEF int dasm_checkstep(Dst_DECL, int secmatch);
+#else
+#define dasm_checkstep(a, b) 0
+#endif
+
+
+#endif /* _DASM_PROTO_H */
diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h
new file mode 100644
index 00000000..dab33e5a
--- /dev/null
+++ b/dynasm/dasm_x86.h
@@ -0,0 +1,467 @@
+/*
+** DynASM x86 encoding engine.
+** Copyright (C) 2005-2009 Mike Pall. All rights reserved.
+** Released under the MIT/X license. See dynasm.lua for full copyright notice.
+*/
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define DASM_ARCH "x86"
+
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d) 0
+#endif
+
+/* Action definitions. DASM_STOP must be 255. */
+enum {
+ DASM_DISP = 233,
+ DASM_IMM_S, DASM_IMM_B, DASM_IMM_W, DASM_IMM_D, DASM_IMM_WB, DASM_IMM_DB,
+ DASM_VREG, DASM_SPACE, DASM_SETLABEL, DASM_REL_A, DASM_REL_LG, DASM_REL_PC,
+ DASM_IMM_LG, DASM_IMM_PC, DASM_LABEL_LG, DASM_LABEL_PC, DASM_ALIGN,
+ DASM_EXTERN, DASM_ESC, DASM_MARK, DASM_SECTION, DASM_STOP
+};
+
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS 25
+
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK 0x00000000
+#define DASM_S_NOMEM 0x01000000
+#define DASM_S_PHASE 0x02000000
+#define DASM_S_MATCH_SEC 0x03000000
+#define DASM_S_RANGE_I 0x11000000
+#define DASM_S_RANGE_SEC 0x12000000
+#define DASM_S_RANGE_LG 0x13000000
+#define DASM_S_RANGE_PC 0x14000000
+#define DASM_S_RANGE_VREG 0x15000000
+#define DASM_S_UNDEF_L 0x21000000
+#define DASM_S_UNDEF_PC 0x22000000
+
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
+#define DASM_SEC2POS(sec) ((sec)<<24)
+#define DASM_POS2SEC(pos) ((pos)>>24)
+#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+/* Per-section structure. */
+typedef struct dasm_Section {
+ int *rbuf; /* Biased buffer pointer (negative section bias). */
+ int *buf; /* True buffer pointer. */
+ size_t bsize; /* Buffer size in bytes. */
+ int pos; /* Biased buffer position. */
+ int epos; /* End of biased buffer position - max single put. */
+ int ofs; /* Byte offset into section. */
+} dasm_Section;
+
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+ size_t psize; /* Allocated size of this structure. */
+ dasm_ActList actionlist; /* Current actionlist pointer. */
+ int *lglabels; /* Local/global chain/pos ptrs. */
+ size_t lgsize;
+ int *pclabels; /* PC label chains/pos ptrs. */
+ size_t pcsize;
+ void **globals; /* Array of globals (bias -10). */
+ dasm_Section *section; /* Pointer to active section. */
+ size_t codesize; /* Total size of all code sections. */
+ int maxsection; /* 0 <= sectionidx < maxsection. */
+ int status; /* Status code. */
+ dasm_Section sections[1]; /* All sections. Alloc-extended. */
+};
+
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+
+
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+{
+ dasm_State *D;
+ size_t psz = 0;
+ int i;
+ Dst_REF = NULL;
+ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+ D = Dst_REF;
+ D->psize = psz;
+ D->lglabels = NULL;
+ D->lgsize = 0;
+ D->pclabels = NULL;
+ D->pcsize = 0;
+ D->globals = NULL;
+ D->maxsection = maxsection;
+ for (i = 0; i < maxsection; i++) {
+ D->sections[i].buf = NULL; /* Need this for pass3. */
+ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+ D->sections[i].bsize = 0;
+ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
+ }
+}
+
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+{
+ dasm_State *D = Dst_REF;
+ int i;
+ for (i = 0; i < D->maxsection; i++)
+ if (D->sections[i].buf)
+ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+ DASM_M_FREE(Dst, D, D->psize);
+}
+
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+{
+ dasm_State *D = Dst_REF;
+ D->globals = gl - 10; /* Negative bias to compensate for locals. */
+ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
+}
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+{
+ dasm_State *D = Dst_REF;
+ size_t osz = D->pcsize;
+ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
+ memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
+}
+
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, dasm_ActList actionlist)
+{
+ dasm_State *D = Dst_REF;
+ int i;
+ D->actionlist = actionlist;
+ D->status = DASM_S_OK;
+ D->section = &D->sections[0];
+ memset((void *)D->lglabels, 0, D->lgsize);
+ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
+ for (i = 0; i < D->maxsection; i++) {
+ D->sections[i].pos = DASM_SEC2POS(i);
+ D->sections[i].ofs = 0;
+ }
+}
+
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+ do { if (!(x)) { \
+ D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+ D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
+#else
+#define CK(x, st) ((void)0)
+#define CKPL(kind, st) ((void)0)
+#endif
+
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+{
+ va_list ap;
+ dasm_State *D = Dst_REF;
+ dasm_ActList p = D->actionlist + start;
+ dasm_Section *sec = D->section;
+ int pos = sec->pos, ofs = sec->ofs, mrm = 4;
+ int *b;
+
+ if (pos >= sec->epos) {
+ DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
+ sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+ sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
+ }
+
+ b = sec->rbuf;
+ b[pos++] = start;
+
+ va_start(ap, start);
+ while (1) {
+ int action = *p++;
+ if (action < DASM_DISP) {
+ ofs++;
+ } else if (action <= DASM_REL_A) {
+ int n = va_arg(ap, int);
+ b[pos++] = n;
+ switch (action) {
+ case DASM_DISP:
+ if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; }
+ case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
+ case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
+ case DASM_IMM_D: ofs += 4; break;
+ case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob;
+ case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break;
+ case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob;
+ case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
+ case DASM_SPACE: p++; ofs += n; break;
+ case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
+ case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG);
+ if (*p++ == 1 && *p == DASM_DISP) mrm = n; continue;
+ }
+ mrm = 4;
+ } else {
+ int *pl, n;
+ switch (action) {
+ case DASM_REL_LG:
+ case DASM_IMM_LG:
+ n = *p++; pl = D->lglabels + n;
+ if (n <= 246) { CKPL(lg, LG); goto putrel; } /* Bkwd rel or global. */
+ pl -= 246; n = *pl;
+ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
+ goto linkrel;
+ case DASM_REL_PC:
+ case DASM_IMM_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
+ putrel:
+ n = *pl;
+ if (n < 0) { /* Label exists. Get label pos and store it. */
+ b[pos] = -n;
+ } else {
+ linkrel:
+ b[pos] = n; /* Else link to rel chain, anchored at label. */
+ *pl = pos;
+ }
+ pos++;
+ ofs += 4; /* Maximum offset needed. */
+ if (action == DASM_REL_LG || action == DASM_REL_PC)
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
+ case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel;
+ case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
+ putlabel:
+ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; }
+ *pl = -pos; /* Label exists now. */
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
+ case DASM_ALIGN:
+ ofs += *p++; /* Maximum alignment needed (arg is 2**n-1). */
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
+ case DASM_EXTERN: p += 2; ofs += 4; break;
+ case DASM_ESC: p++; ofs++; break;
+ case DASM_MARK: mrm = p[-2]; break;
+ case DASM_SECTION:
+ n = *p; CK(n < D->maxsection, RANGE_SEC); D->section = &D->sections[n];
+ case DASM_STOP: goto stop;
+ }
+ }
+ }
+stop:
+ va_end(ap);
+ sec->pos = pos;
+ sec->ofs = ofs;
+}
+#undef CK
+
+/* Pass 2: Link sections, shrink branches/aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t *szp)
+{
+ dasm_State *D = Dst_REF;
+ int secnum;
+ int ofs = 0;
+
+#ifdef DASM_CHECKS
+ *szp = 0;
+ if (D->status != DASM_S_OK) return D->status;
+ {
+ int pc;
+ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
+ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
+ }
+#endif
+
+ { /* Handle globals not defined in this translation unit. */
+ int idx;
+ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
+ int n = D->lglabels[idx];
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+ }
+ }
+
+ /* Combine all code sections. No support for data sections (yet). */
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
+ dasm_Section *sec = D->sections + secnum;
+ int *b = sec->rbuf;
+ int pos = DASM_SEC2POS(secnum);
+ int lastpos = sec->pos;
+
+ while (pos != lastpos) {
+ dasm_ActList p = D->actionlist + b[pos++];
+ while (1) {
+ int op, action = *p++;
+ switch (action) {
+ case DASM_REL_LG: p++; op = p[-3]; goto rel_pc;
+ case DASM_REL_PC: op = p[-2]; rel_pc: {
+ int shrink = op == 0xe9 ? 3 : ((op&0xf0) == 0x80 ? 4 : 0);
+ if (shrink) { /* Shrinkable branch opcode? */
+ int lofs, lpos = b[pos];
+ if (lpos < 0) goto noshrink; /* Ext global? */
+ lofs = *DASM_POS2PTR(D, lpos);
+ if (lpos > pos) { /* Fwd label: add cumulative section offsets. */
+ int i;
+ for (i = secnum; i < DASM_POS2SEC(lpos); i++)
+ lofs += D->sections[i].ofs;
+ } else {
+ lofs -= ofs; /* Bkwd label: unfix offset. */
+ }
+ lofs -= b[pos+1]; /* Short branch ok? */
+ if (lofs >= -128-shrink && lofs <= 127) ofs -= shrink; /* Yes. */
+ else { noshrink: shrink = 0; } /* No, cannot shrink op. */
+ }
+ b[pos+1] = shrink;
+ pos += 2;
+ break;
+ }
+ case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++;
+ case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W:
+ case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB:
+ case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break;
+ case DASM_LABEL_LG: p++;
+ case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */
+ case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */
+ case DASM_EXTERN: p += 2; break;
+ case DASM_ESC: p++; break;
+ case DASM_MARK: break;
+ case DASM_SECTION: case DASM_STOP: goto stop;
+ }
+ }
+ stop: (void)0;
+ }
+ ofs += sec->ofs; /* Next section starts right after current section. */
+ }
+
+ D->codesize = ofs; /* Total size of all code sections */
+ *szp = ofs;
+ return DASM_S_OK;
+}
+
+#define dasmb(x) *cp++ = (unsigned char)(x)
+#ifndef DASM_ALIGNED_WRITES
+#define dasmw(x) \
+ do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0)
+#define dasmd(x) \
+ do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0)
+#else
+#define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0)
+#define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0)
+#endif
+
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+{
+ dasm_State *D = Dst_REF;
+ unsigned char *base = (unsigned char *)buffer;
+ unsigned char *cp = base;
+ int secnum;
+
+ /* Encode all code sections. No support for data sections (yet). */
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
+ dasm_Section *sec = D->sections + secnum;
+ int *b = sec->buf;
+ int *endb = sec->rbuf + sec->pos;
+
+ while (b != endb) {
+ dasm_ActList p = D->actionlist + *b++;
+ unsigned char *mark = NULL;
+ while (1) {
+ int action = *p++;
+ int n = (action >= DASM_DISP && action <= DASM_ALIGN) ? *b++ : 0;
+ switch (action) {
+ case DASM_DISP: if (!mark) mark = cp; {
+ unsigned char *mm = mark;
+ if (*p != DASM_IMM_DB && *p != DASM_IMM_WB) mark = NULL;
+ if (n == 0) { int mrm = mm[-1]&7; if (mrm == 4) mrm = mm[0]&7;
+ if (mrm != 5) { mm[-1] -= 0x80; break; } }
+ if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40;
+ }
+ case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break;
+ case DASM_IMM_DB: if (((n+128)&-256) == 0) {
+ db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb;
+ } else mark = NULL;
+ case DASM_IMM_D: wd: dasmd(n); break;
+ case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
+ case DASM_IMM_W: dasmw(n); break;
+ case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; }
+ case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
+ b++; n = (int)(ptrdiff_t)D->globals[-n];
+ case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */
+ case DASM_REL_PC: rel_pc: {
+ int shrink = *b++;
+ int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; }
+ n = *pb - ((int)(cp-base) + 4-shrink);
+ if (shrink == 0) goto wd;
+ if (shrink == 4) { cp--; cp[-1] = *cp-0x10; } else cp[-1] = 0xeb;
+ goto wb;
+ }
+ case DASM_IMM_LG:
+ p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; }
+ case DASM_IMM_PC: {
+ int *pb = DASM_POS2PTR(D, n);
+ n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base);
+ goto wd;
+ }
+ case DASM_LABEL_LG: {
+ int idx = *p++;
+ if (idx >= 10)
+ D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n));
+ break;
+ }
+ case DASM_LABEL_PC: case DASM_SETLABEL: break;
+ case DASM_SPACE: { int fill = *p++; while (n--) *cp++ = fill; break; }
+ case DASM_ALIGN:
+ n = *p++;
+ while (((cp-base) & n)) *cp++ = 0x90; /* nop */
+ break;
+ case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd;
+ case DASM_MARK: mark = cp; break;
+ case DASM_ESC: action = *p++;
+ default: *cp++ = action; break;
+ case DASM_SECTION: case DASM_STOP: goto stop;
+ }
+ }
+ stop: (void)0;
+ }
+ }
+
+ if (base + D->codesize != cp) /* Check for phase errors. */
+ return DASM_S_PHASE;
+ return DASM_S_OK;
+}
+
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+{
+ dasm_State *D = Dst_REF;
+ if (pc*sizeof(int) < D->pcsize) {
+ int pos = D->pclabels[pc];
+ if (pos < 0) return *DASM_POS2PTR(D, -pos);
+ if (pos > 0) return -1; /* Undefined. */
+ }
+ return -2; /* Unused or out of range. */
+}
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+{
+ dasm_State *D = Dst_REF;
+ if (D->status == DASM_S_OK) {
+ int i;
+ for (i = 1; i <= 9; i++) {
+ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_L|i; break; }
+ D->lglabels[i] = 0;
+ }
+ }
+ if (D->status == DASM_S_OK && secmatch >= 0 &&
+ D->section != &D->sections[secmatch])
+ D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
+ return D->status;
+}
+#endif
+
diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
new file mode 100644
index 00000000..82210806
--- /dev/null
+++ b/dynasm/dasm_x86.lua
@@ -0,0 +1,1799 @@
+------------------------------------------------------------------------------
+-- DynASM x86 module.
+--
+-- Copyright (C) 2005-2009 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+
+-- Module information:
+local _info = {
+ arch = "x86",
+ description = "DynASM x86 (i386) module",
+ version = "1.2.1",
+ vernum = 10201,
+ release = "2009-04-16",
+ author = "Mike Pall",
+ license = "MIT",
+}
+
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, unpack = assert, unpack
+local _s = string
+local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
+local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
+local concat, sort = table.concat, table.sort
+local char, unpack = string.char, unpack
+
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+ -- int arg, 1 buffer pos:
+ "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
+ -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
+ "VREG", "SPACE",
+ -- ptrdiff_t arg, 1 buffer pos (address): !x64
+ "SETLABEL", "REL_A",
+ -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
+ "REL_LG", "REL_PC",
+ -- action arg (1 byte) or int arg, 1 buffer pos (link):
+ "IMM_LG", "IMM_PC",
+ -- action arg (1 byte) or int arg, 1 buffer pos (offset):
+ "LABEL_LG", "LABEL_PC",
+ -- action arg (1 byte), 1 buffer pos (offset):
+ "ALIGN",
+ -- action args (2 bytes), no buffer pos.
+ "EXTERN",
+ -- action arg (1 byte), no buffer pos.
+ "ESC",
+ -- no action arg, no buffer pos.
+ "MARK",
+ -- action arg (1 byte), no buffer pos, terminal action:
+ "SECTION",
+ -- no args, no buffer pos, terminal action:
+ "STOP"
+}
+
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+-- Action name -> action number (dynamically generated below).
+local map_action = {}
+-- First action number. Everything below does not need to be escaped.
+local actfirst = 256-#action_names
+
+-- Action list buffer and string (only used to remove dupes).
+local actlist = {}
+local actstr = ""
+
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+
+------------------------------------------------------------------------------
+
+-- Compute action numbers for action names.
+for n,name in ipairs(action_names) do
+ local num = actfirst + n - 1
+ map_action[name] = num
+end
+
+-- Dump action names and numbers.
+local function dumpactions(out)
+ out:write("DynASM encoding engine action codes:\n")
+ for n,name in ipairs(action_names) do
+ local num = map_action[name]
+ out:write(format(" %-10s %02X %d\n", name, num, num))
+ end
+ out:write("\n")
+end
+
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+ local nn = #actlist
+ local last = actlist[nn] or 255
+ actlist[nn] = nil -- Remove last byte.
+ if nn == 0 then nn = 1 end
+ out:write("static const unsigned char ", name, "[", nn, "] = {\n")
+ local s = " "
+ for n,b in ipairs(actlist) do
+ s = s..b..","
+ if #s >= 75 then
+ assert(out:write(s, "\n"))
+ s = " "
+ end
+ end
+ out:write(s, last, "\n};\n\n") -- Add last byte back.
+end
+
+------------------------------------------------------------------------------
+
+-- Add byte to action list.
+local function wputxb(n)
+ assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range")
+ actlist[#actlist+1] = n
+end
+
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, a, num)
+ wputxb(assert(map_action[action], "bad action name `"..action.."'"))
+ if a then actargs[#actargs+1] = a end
+ if a or num then secpos = secpos + (num or 1) end
+end
+
+-- Add call to embedded DynASM C code.
+local function wcall(func, args)
+ wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
+end
+
+-- Delete duplicate action list chunks. A tad slow, but so what.
+local function dedupechunk(offset)
+ local al, as = actlist, actstr
+ local chunk = char(unpack(al, offset+1, #al))
+ local orig = find(as, chunk, 1, true)
+ if orig then
+ actargs[1] = orig-1 -- Replace with original offset.
+ for i=offset+1,#al do al[i] = nil end -- Kill dupe.
+ else
+ actstr = as..chunk
+ end
+end
+
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+ local offset = actargs[1]
+ if #actlist == offset then return end -- Nothing to flush.
+ if not term then waction("STOP") end -- Terminate action list.
+ dedupechunk(offset)
+ wcall("put", actargs) -- Add call to dasm_put().
+ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+ secpos = 1 -- The actionlist offset occupies a buffer position, too.
+end
+
+-- Put escaped byte.
+local function wputb(n)
+ if n >= actfirst then waction("ESC") end -- Need to escape byte.
+ wputxb(n)
+end
+
+------------------------------------------------------------------------------
+
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 10
+local map_global = setmetatable({}, { __index = function(t, name)
+ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
+ local n = next_global
+ if n > 246 then werror("too many global labels") end
+ next_global = n + 1
+ t[name] = n
+ return n
+end})
+
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("Global labels:\n")
+ for i=10,next_global-1 do
+ out:write(format(" %s\n", t[i]))
+ end
+ out:write("\n")
+end
+
+-- Write global label enum.
+local function writeglobals(out, prefix)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("enum {\n")
+ for i=10,next_global-1 do
+ out:write(" ", prefix, t[i], ",\n")
+ end
+ out:write(" ", prefix, "_MAX\n};\n")
+end
+
+-- Write global label names.
+local function writeglobalnames(out, name)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("static const char *const ", name, "[] = {\n")
+ for i=10,next_global-1 do
+ out:write(" \"", t[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = -1
+local map_extern = setmetatable({}, { __index = function(t, name)
+ -- No restrictions on the name for now.
+ local n = next_extern
+ if n < -256 then werror("too many extern labels") end
+ next_extern = n - 1
+ t[name] = n
+ return n
+end})
+
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+ local t = {}
+ for name, n in pairs(map_extern) do t[-n] = name end
+ out:write("Extern labels:\n")
+ for i=1,-next_extern-1 do
+ out:write(format(" %s\n", t[i]))
+ end
+ out:write("\n")
+end
+
+-- Write extern label names.
+local function writeexternnames(out, name)
+ local t = {}
+ for name, n in pairs(map_extern) do t[-n] = name end
+ out:write("static const char *const ", name, "[] = {\n")
+ for i=1,-next_extern-1 do
+ out:write(" \"", t[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Arch-specific maps.
+local map_archdef = {} -- Ext. register name -> int. name.
+local map_reg_rev = {} -- Int. register name -> ext. name.
+local map_reg_num = {} -- Int. register name -> register number.
+local map_reg_opsize = {} -- Int. register name -> operand size.
+local map_reg_valid_base = {} -- Int. register name -> valid base register?
+local map_reg_valid_index = {} -- Int. register name -> valid index register?
+local reg_list = {} -- Canonical list of int. register names.
+
+local map_type = {} -- Type name -> { ctype, reg }
+local ctypenum = 0 -- Type number (for _PTx macros).
+
+local addrsize = "d" -- Size for address operands. !x64
+
+-- Helper function to fill register maps.
+local function mkrmap(sz, cl, names)
+ local cname = format("@%s", sz)
+ reg_list[#reg_list+1] = cname
+ map_archdef[cl] = cname
+ map_reg_rev[cname] = cl
+ map_reg_num[cname] = -1
+ map_reg_opsize[cname] = sz
+ if sz == addrsize then
+ map_reg_valid_base[cname] = true
+ map_reg_valid_index[cname] = true
+ end
+ for n,name in ipairs(names) do
+ local iname = format("@%s%x", sz, n-1)
+ reg_list[#reg_list+1] = iname
+ map_archdef[name] = iname
+ map_reg_rev[iname] = name
+ map_reg_num[iname] = n-1
+ map_reg_opsize[iname] = sz
+ if sz == addrsize then
+ map_reg_valid_base[iname] = true
+ map_reg_valid_index[iname] = true
+ end
+ end
+ reg_list[#reg_list+1] = ""
+end
+
+-- Integer registers (dword, word and byte sized).
+mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"})
+map_reg_valid_index[map_archdef.esp] = false
+mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
+mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
+map_archdef["Ra"] = "@"..addrsize
+
+-- FP registers (internally tword sized, but use "f" as operand size).
+mkrmap("f", "Rf", {"st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7"})
+
+-- SSE registers (oword sized, but qword and dword accessible).
+mkrmap("o", "xmm", {"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7"})
+
+-- Operand size prefixes to codes.
+local map_opsize = {
+ byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t",
+ aword = addrsize,
+}
+
+-- Operand size code to number.
+local map_opsizenum = {
+ b = 1, w = 2, d = 4, q = 8, o = 16, t = 10,
+}
+
+-- Operand size code to name.
+local map_opsizename = {
+ b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword",
+ f = "fpword",
+}
+
+-- Valid index register scale factors.
+local map_xsc = {
+ ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3,
+}
+
+-- Condition codes.
+local map_cc = {
+ o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7,
+ s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15,
+ c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7,
+ pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15,
+}
+
+
+-- Reverse defines for registers.
+function _M.revdef(s)
+ return gsub(s, "@%w+", map_reg_rev)
+end
+
+-- Dump register names and numbers
+local function dumpregs(out)
+ out:write("Register names, sizes and internal numbers:\n")
+ for _,reg in ipairs(reg_list) do
+ if reg == "" then
+ out:write("\n")
+ else
+ local name = map_reg_rev[reg]
+ local num = map_reg_num[reg]
+ local opsize = map_opsizename[map_reg_opsize[reg]]
+ out:write(format(" %-5s %-8s %s\n", name, opsize,
+ num < 0 and "(variable)" or num))
+ end
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC).
+local function wputlabel(aprefix, imm, num)
+ if type(imm) == "number" then
+ if imm < 0 then
+ waction("EXTERN")
+ wputxb(aprefix == "IMM_" and 0 or 1)
+ imm = -imm-1
+ else
+ waction(aprefix.."LG", nil, num);
+ end
+ wputxb(imm)
+ else
+ waction(aprefix.."PC", imm, num)
+ end
+end
+
+-- Put signed byte or arg.
+local function wputsbarg(n)
+ if type(n) == "number" then
+ if n < -128 or n > 127 then
+ werror("signed immediate byte out of range")
+ end
+ if n < 0 then n = n + 256 end
+ wputb(n)
+ else waction("IMM_S", n) end
+end
+
+-- Put unsigned byte or arg.
+local function wputbarg(n)
+ if type(n) == "number" then
+ if n < 0 or n > 255 then
+ werror("unsigned immediate byte out of range")
+ end
+ wputb(n)
+ else waction("IMM_B", n) end
+end
+
+-- Put unsigned word or arg.
+local function wputwarg(n)
+ if type(n) == "number" then
+ if n < 0 or n > 65535 then
+ werror("unsigned immediate word out of range")
+ end
+ local r = n%256; n = (n-r)/256; wputb(r); wputb(n);
+ else waction("IMM_W", n) end
+end
+
+-- Put signed or unsigned dword or arg.
+local function wputdarg(n)
+ local tn = type(n)
+ if tn == "number" then
+ if n < 0 then n = n + 4294967296 end
+ local r = n%256; n = (n-r)/256; wputb(r);
+ r = n%256; n = (n-r)/256; wputb(r);
+ r = n%256; n = (n-r)/256; wputb(r); wputb(n);
+ elseif tn == "table" then
+ wputlabel("IMM_", n[1], 1)
+ else
+ waction("IMM_D", n)
+ end
+end
+
+-- Put operand-size dependent number or arg (defaults to dword).
+local function wputszarg(sz, n)
+ if not sz or sz == "d" then wputdarg(n)
+ elseif sz == "w" then wputwarg(n)
+ elseif sz == "b" then wputbarg(n)
+ elseif sz == "s" then wputsbarg(n)
+ else werror("bad operand size") end
+end
+
+-- Put multi-byte opcode with operand-size dependent modifications.
+local function wputop(sz, op)
+ local r
+ if sz == "w" then wputb(102) end
+ -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
+ if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
+ if op >= 16777216 then r = op % 16777216 wputb((op-r) / 16777216) op = r end
+ if op >= 65536 then r = op % 65536 wputb((op-r) / 65536) op = r end
+ if op >= 256 then r = op % 256 wputb((op-r) / 256) op = r end
+ if sz == "b" then op = op - 1 end
+ wputb(op)
+end
+
+-- Put ModRM or SIB formatted byte.
+local function wputmodrm(m, s, rm, vs, vrm)
+ assert(m < 4 and s < 8 and rm < 8, "bad modrm operands")
+ wputb(64*m + 8*s + rm)
+end
+
+-- Put ModRM/SIB plus optional displacement.
+local function wputmrmsib(t, imark, s, vsreg)
+ local vreg, vxreg
+ local reg, xreg = t.reg, t.xreg
+ if reg and reg < 0 then reg = 0; vreg = t.vreg end
+ if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end
+ if s < 0 then s = 0 end
+
+ -- Register mode.
+ if sub(t.mode, 1, 1) == "r" then
+ wputmodrm(3, s, reg)
+ if vsreg then waction("VREG", vsreg); wputxb(2) end
+ if vreg then waction("VREG", vreg); wputxb(0) end
+ return
+ end
+
+ local disp = t.disp
+ local tdisp = type(disp)
+ -- No base register?
+ if not reg then
+ if xreg then
+ -- Indexed mode with index register only.
+ -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
+ wputmodrm(0, s, 4)
+ if imark then waction("MARK") end
+ if vsreg then waction("VREG", vsreg); wputxb(2) end
+ wputmodrm(t.xsc, xreg, 5)
+ if vxreg then waction("VREG", vxreg); wputxb(3) end
+ else
+ -- Pure displacement.
+ wputmodrm(0, s, 5) -- [disp] -> (0, s, ebp)
+ if imark then waction("MARK") end
+ if vsreg then waction("VREG", vsreg); wputxb(2) end
+ end
+ wputdarg(disp)
+ return
+ end
+
+ local m
+ if tdisp == "number" then -- Check displacement size at assembly time.
+ if disp == 0 and reg ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too)
+ if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0]
+ elseif disp >= -128 and disp <= 127 then m = 1
+ else m = 2 end
+ elseif tdisp == "table" then
+ m = 2
+ end
+
+ -- Index register present or esp as base register: need SIB encoding.
+ if xreg or reg == 4 then
+ wputmodrm(m or 2, s, 4) -- ModRM.
+ if m == nil or imark then waction("MARK") end
+ if vsreg then waction("VREG", vsreg); wputxb(2) end
+ wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
+ if vxreg then waction("VREG", vxreg); wputxb(3) end
+ if vreg then waction("VREG", vreg); wputxb(1) end
+ else
+ wputmodrm(m or 2, s, reg) -- ModRM.
+ if (imark and (m == 1 or m == 2)) or
+ (m == nil and (vsreg or vreg)) then waction("MARK") end
+ if vsreg then waction("VREG", vsreg); wputxb(2) end
+ if vreg then waction("VREG", vreg); wputxb(1) end
+ end
+
+ -- Put displacement.
+ if m == 1 then wputsbarg(disp)
+ elseif m == 2 then wputdarg(disp)
+ elseif m == nil then waction("DISP", disp) end
+end
+
+------------------------------------------------------------------------------
+
+-- Return human-readable operand mode string.
+local function opmodestr(op, args)
+ local m = {}
+ for i=1,#args do
+ local a = args[i]
+ m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?")
+ end
+ return op.." "..concat(m, ",")
+end
+
+-- Convert number to valid integer or nil.
+local function toint(expr)
+ local n = tonumber(expr)
+ if n then
+ if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then
+ werror("bad integer number `"..expr.."'")
+ end
+ return n
+ end
+end
+
+-- Parse immediate expression.
+local function immexpr(expr)
+ -- &expr (pointer)
+ if sub(expr, 1, 1) == "&" then
+ return "iPJ", format("(ptrdiff_t)(%s)", sub(expr,2))
+ end
+
+ local prefix = sub(expr, 1, 2)
+ -- =>expr (pc label reference)
+ if prefix == "=>" then
+ return "iJ", sub(expr, 3)
+ end
+ -- ->name (global label reference)
+ if prefix == "->" then
+ return "iJ", map_global[sub(expr, 3)]
+ end
+
+ -- [<>][1-9] (local label reference)
+ local dir, lnum = match(expr, "^([<>])([1-9])$")
+ if dir then -- Fwd: 247-255, Bkwd: 1-9.
+ return "iJ", lnum + (dir == ">" and 246 or 0)
+ end
+
+ local extname = match(expr, "^extern%s+(%S+)$")
+ if extname then
+ return "iJ", map_extern[extname]
+ end
+
+ -- expr (interpreted as immediate)
+ return "iI", expr
+end
+
+-- Parse displacement expression: +-num, +-expr, +-opsize*num
+local function dispexpr(expr)
+ local disp = expr == "" and 0 or toint(expr)
+ if disp then return disp end
+ local c, dispt = match(expr, "^([+-])%s*(.+)$")
+ if c == "+" then
+ expr = dispt
+ elseif not c then
+ werror("bad displacement expression `"..expr.."'")
+ end
+ local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$")
+ local ops, imm = map_opsize[opsize], toint(tailops)
+ if ops and imm then
+ if c == "-" then imm = -imm end
+ return imm*map_opsizenum[ops]
+ end
+ local mode, iexpr = immexpr(dispt)
+ if mode == "iJ" then
+ if c == "-" then werror("cannot invert label reference") end
+ return { iexpr }
+ end
+ return expr -- Need to return original signed expression.
+end
+
+-- Parse register or type expression.
+local function rtexpr(expr)
+ if not expr then return end
+ local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$")
+ local tp = map_type[tname or expr]
+ if tp then
+ local reg = ovreg or tp.reg
+ local rnum = map_reg_num[reg]
+ if not rnum then
+ werror("type `"..(tname or expr).."' needs a register override")
+ end
+ if not map_reg_valid_base[reg] then
+ werror("bad base register override `"..(map_reg_rev[reg] or reg).."'")
+ end
+ return reg, rnum, tp
+ end
+ return expr, map_reg_num[expr]
+end
+
+-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
+local function parseoperand(param)
+ local t = {}
+
+ local expr = param
+ local opsize, tailops = match(param, "^(%w+)%s*(.+)$")
+ if opsize then
+ t.opsize = map_opsize[opsize]
+ if t.opsize then expr = tailops end
+ end
+
+ local br = match(expr, "^%[%s*(.-)%s*%]$")
+ repeat
+ if br then
+ t.mode = "xm"
+
+ -- [disp]
+ t.disp = toint(br)
+ if t.disp then
+ t.mode = "xmO"
+ break
+ end
+
+ -- [reg...]
+ local tp
+ local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$")
+ reg, t.reg, tp = rtexpr(reg)
+ if not t.reg then
+ -- [expr]
+ t.mode = "xmO"
+ t.disp = dispexpr("+"..br)
+ break
+ end
+
+ if t.reg == -1 then
+ t.vreg, tailr = match(tailr, "^(%b())(.*)$")
+ if not t.vreg then werror("bad variable register expression") end
+ end
+
+ -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr]
+ local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$")
+ if xsc then
+ if not map_reg_valid_index[reg] then
+ werror("bad index register `"..map_reg_rev[reg].."'")
+ end
+ t.xsc = map_xsc[xsc]
+ t.xreg = t.reg
+ t.vxreg = t.vreg
+ t.reg = nil
+ t.vreg = nil
+ t.disp = dispexpr(tailsc)
+ break
+ end
+ if not map_reg_valid_base[reg] then
+ werror("bad base register `"..map_reg_rev[reg].."'")
+ end
+
+ -- [reg] or [reg+-disp]
+ t.disp = toint(tailr) or (tailr == "" and 0)
+ if t.disp then break end
+
+ -- [reg+xreg...]
+ local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$")
+ xreg, t.xreg, tp = rtexpr(xreg)
+ if not t.xreg then
+ -- [reg+-expr]
+ t.disp = dispexpr(tailr)
+ break
+ end
+ if not map_reg_valid_index[xreg] then
+ werror("bad index register `"..map_reg_rev[xreg].."'")
+ end
+
+ if t.xreg == -1 then
+ t.vxreg, tailx = match(tailx, "^(%b())(.*)$")
+ if not t.vxreg then werror("bad variable register expression") end
+ end
+
+ -- [reg+xreg*xsc...]
+ local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$")
+ if xsc then
+ t.xsc = map_xsc[xsc]
+ tailx = tailsc
+ end
+
+ -- [...] or [...+-disp] or [...+-expr]
+ t.disp = dispexpr(tailx)
+ else
+ -- imm or opsize*imm
+ local imm = toint(expr)
+ if not imm and sub(expr, 1, 1) == "*" and t.opsize then
+ imm = toint(sub(expr, 2))
+ if imm then
+ imm = imm * map_opsizenum[t.opsize]
+ t.opsize = nil
+ end
+ end
+ if imm then
+ if t.opsize then werror("bad operand size override") end
+ local m = "i"
+ if imm == 1 then m = m.."1" end
+ if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end
+ if imm >= -128 and imm <= 127 then m = m.."S" end
+ t.imm = imm
+ t.mode = m
+ break
+ end
+
+ local tp
+ local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$")
+ reg, t.reg, tp = rtexpr(reg)
+ if t.reg then
+ if t.reg == -1 then
+ t.vreg, tailr = match(tailr, "^(%b())(.*)$")
+ if not t.vreg then werror("bad variable register expression") end
+ end
+ -- reg
+ if tailr == "" then
+ if t.opsize then werror("bad operand size override") end
+ t.opsize = map_reg_opsize[reg]
+ if t.opsize == "f" then
+ t.mode = t.reg == 0 and "fF" or "f"
+ else
+ if reg == "@w4" then wwarn("bad idea, try again with `esp'") end
+ t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm")
+ end
+ break
+ end
+
+ -- type[idx], type[idx].field, type->field -> [reg+offset_expr]
+ if not tp then werror("bad operand `"..param.."'") end
+ t.mode = "xm"
+ t.disp = format(tp.ctypefmt, tailr)
+ else
+ t.mode, t.imm = immexpr(expr)
+ if sub(t.mode, -1) == "J" then
+ if t.opsize and t.opsize ~= addrsize then
+ werror("bad operand size override")
+ end
+ t.opsize = addrsize
+ end
+ end
+ end
+ until true
+ return t
+end
+
+------------------------------------------------------------------------------
+-- x86 Template String Description
+-- ===============================
+--
+-- Each template string is a list of [match:]pattern pairs,
+-- separated by "|". The first match wins. No match means a
+-- bad or unsupported combination of operand modes or sizes.
+--
+-- The match part and the ":" is omitted if the operation has
+-- no operands. Otherwise the first N characters are matched
+-- against the mode strings of each of the N operands.
+--
+-- The mode string for each operand type is (see parseoperand()):
+-- Integer register: "rm", +"R" for eax, ax, al, +"C" for cl
+-- FP register: "f", +"F" for st0
+-- Index operand: "xm", +"O" for [disp] (pure offset)
+-- Immediate: "i", +"S" for signed 8 bit, +"1" for 1,
+-- +"I" for arg, +"P" for pointer
+-- Any: +"J" for valid jump targets
+--
+-- So a match character "m" (mixed) matches both an integer register
+-- and an index operand (to be encoded with the ModRM/SIB scheme).
+-- But "r" matches only a register and "x" only an index operand
+-- (e.g. for FP memory access operations).
+--
+-- The operand size match string starts right after the mode match
+-- characters and ends before the ":". "dwb" is assumed, if empty.
+-- The effective data size of the operation is matched against this list.
+--
+-- If only the regular "b", "w", "d", "q", "t" operand sizes are
+-- present, then all operands must be the same size. Unspecified sizes
+-- are ignored, but at least one operand must have a size or the pattern
+-- won't match (use the "byte", "word", "dword", "qword", "tword"
+-- operand size overrides. E.g.: mov dword [eax], 1).
+--
+-- If the list has a "1" or "2" prefix, the operand size is taken
+-- from the respective operand and any other operand sizes are ignored.
+-- If the list contains only ".", all operand sizes are ignored.
+-- If the list has a "/" prefix, the concatenated (mixed) operand sizes
+-- are compared to the match.
+--
+-- E.g. "rrdw" matches for either two dword registers or two word
+-- registers. "Fx2dq" matches an st0 operand plus an index operand
+-- pointing to a dword (float) or qword (double).
+--
+-- Every character after the ":" is part of the pattern string:
+-- Hex chars are accumulated to form the opcode (left to right).
+-- "n" disables the standard opcode mods
+-- (otherwise: -1 for "b", o16 prefix for "w")
+-- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode.
+-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
+-- The spare 3 bits are either filled with the last hex digit or
+-- the result from a previous "r"/"R". The opcode is restored.
+--
+-- All of the following characters force a flush of the opcode:
+-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand.
+-- "S" stores a signed 8 bit immediate from the last operand.
+-- "U" stores an unsigned 8 bit immediate from the last operand.
+-- "W" stores an unsigned 16 bit immediate from the last operand.
+-- "i" stores an operand sized immediate from the last operand.
+-- "I" dito, but generates an action code to optionally modify
+-- the opcode (+2) for a signed 8 bit immediate.
+-- "J" generates one of the REL action codes from the last operand.
+--
+------------------------------------------------------------------------------
+
+-- Template strings for x86 instructions. Ordered by first opcode byte.
+-- Unimplemented opcodes (deliberate omissions) are marked with *.
+local map_op = {
+ -- 00-05: add...
+ -- 06: *push es
+ -- 07: *pop es
+ -- 08-0D: or...
+ -- 0E: *push cs
+ -- 0F: two byte opcode prefix
+ -- 10-15: adc...
+ -- 16: *push ss
+ -- 17: *pop ss
+ -- 18-1D: sbb...
+ -- 1E: *push ds
+ -- 1F: *pop ds
+ -- 20-25: and...
+ es_0 = "26",
+ -- 27: *daa
+ -- 28-2D: sub...
+ cs_0 = "2E",
+ -- 2F: *das
+ -- 30-35: xor...
+ ss_0 = "36",
+ -- 37: *aaa
+ -- 38-3D: cmp...
+ ds_0 = "3E",
+ -- 3F: *aas
+ inc_1 = "rdw:40r|m:FF0m",
+ dec_1 = "rdw:48r|m:FF1m",
+ push_1 = "rdw:50r|mdw:FF6m|S.:6AS|ib:n6Ai|i.:68i",
+ pop_1 = "rdw:58r|mdw:8F0m",
+ -- 60: *pusha, *pushad, *pushaw
+ -- 61: *popa, *popad, *popaw
+ -- 62: *bound rdw,x
+ -- 63: *arpl mw,rw
+ fs_0 = "64",
+ gs_0 = "65",
+ o16_0 = "66",
+ a16_0 = "67",
+ -- 68: push idw
+ -- 69: imul rdw,mdw,idw
+ -- 6A: push ib
+ -- 6B: imul rdw,mdw,S
+ -- 6C: *insb
+ -- 6D: *insd, *insw
+ -- 6E: *outsb
+ -- 6F: *outsd, *outsw
+ -- 70-7F: jcc lb
+ -- 80: add... mb,i
+ -- 81: add... mdw,i
+ -- 82: *undefined
+ -- 83: add... mdw,S
+ test_2 = "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi",
+ -- 86: xchg rb,mb
+ -- 87: xchg rdw,mdw
+ -- 88: mov mb,r
+ -- 89: mov mdw,r
+ -- 8A: mov r,mb
+ -- 8B: mov r,mdw
+ -- 8C: *mov mdw,seg
+ lea_2 = "rxd:8DrM",
+ -- 8E: *mov seg,mdw
+ -- 8F: pop mdw
+ nop_0 = "90",
+ xchg_2 = "Rrdw:90R|rRdw:90r|rm:87rM|mr:87Rm",
+ cbw_0 = "6698",
+ cwde_0 = "98",
+ cwd_0 = "6699",
+ cdq_0 = "99",
+ -- 9A: *call iw:idw
+ wait_0 = "9B",
+ fwait_0 = "9B",
+ pushf_0 = "9C",
+ pushfw_0 = "669C",
+ pushfd_0 = "9C",
+ popf_0 = "9D",
+ popfw_0 = "669D",
+ popfd_0 = "9D",
+ sahf_0 = "9E",
+ lahf_0 = "9F",
+ mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi",
+ movsb_0 = "A4",
+ movsw_0 = "66A5",
+ movsd_0 = "A5",
+ cmpsb_0 = "A6",
+ cmpsw_0 = "66A7",
+ cmpsd_0 = "A7",
+ -- A8: test Rb,i
+ -- A9: test Rdw,i
+ stosb_0 = "AA",
+ stosw_0 = "66AB",
+ stosd_0 = "AB",
+ lodsb_0 = "AC",
+ lodsw_0 = "66AD",
+ lodsd_0 = "AD",
+ scasb_0 = "AE",
+ scasw_0 = "66AF",
+ scasd_0 = "AF",
+ -- B0-B7: mov rb,i
+ -- B8-BF: mov rdw,i
+ -- C0: rol... mb,i
+ -- C1: rol... mdw,i
+ ret_1 = "i.:nC2W",
+ ret_0 = "C3",
+ -- C4: *les rdw,mq
+ -- C5: *lds rdw,mq
+ -- C6: mov mb,i
+ -- C7: mov mdw,i
+ -- C8: *enter iw,ib
+ leave_0 = "C9",
+ -- CA: *retf iw
+ -- CB: *retf
+ int3_0 = "CC",
+ int_1 = "i.:nCDU",
+ into_0 = "CE",
+ -- CF: *iret
+ -- D0: rol... mb,1
+ -- D1: rol... mdw,1
+ -- D2: rol... mb,cl
+ -- D3: rol... mb,cl
+ -- D4: *aam ib
+ -- D5: *aad ib
+ -- D6: *salc
+ -- D7: *xlat
+ -- D8-DF: floating point ops
+ -- E0: *loopne
+ -- E1: *loope
+ -- E2: *loop
+ -- E3: *jcxz, *jecxz
+ -- E4: *in Rb,ib
+ -- E5: *in Rdw,ib
+ -- E6: *out ib,Rb
+ -- E7: *out ib,Rdw
+ call_1 = "md:FF2m|J.:E8J",
+ jmp_1 = "md:FF4m|J.:E9J", -- short: EB
+ -- EA: *jmp iw:idw
+ -- EB: jmp ib
+ -- EC: *in Rb,dx
+ -- ED: *in Rdw,dx
+ -- EE: *out dx,Rb
+ -- EF: *out dx,Rdw
+ -- F0: *lock
+ int1_0 = "F1",
+ repne_0 = "F2",
+ repnz_0 = "F2",
+ rep_0 = "F3",
+ repe_0 = "F3",
+ repz_0 = "F3",
+ -- F4: *hlt
+ cmc_0 = "F5",
+ -- F6: test... mb,i; div... mb
+ -- F7: test... mdw,i; div... mdw
+ clc_0 = "F8",
+ stc_0 = "F9",
+ -- FA: *cli
+ cld_0 = "FC",
+ std_0 = "FD",
+ -- FE: inc... mb
+ -- FF: inc... mdw
+
+ -- misc ops
+ not_1 = "m:F72m",
+ neg_1 = "m:F73m",
+ mul_1 = "m:F74m",
+ imul_1 = "m:F75m",
+ div_1 = "m:F76m",
+ idiv_1 = "m:F77m",
+
+ imul_2 = "rmdw:0FAFrM|rIdw:69rmI|rSdw:6BrmS|ridw:69rmi",
+ imul_3 = "rmIdw:69rMI|rmSdw:6BrMS|rmidw:69rMi",
+
+ movzx_2 = "rm/db:0FB6rM|rm/wb:0FB6rM|rm/dw:0FB7rM",
+ movsx_2 = "rm/db:0FBErM|rm/wb:0FBErM|rm/dw:0FBFrM",
+
+ bswap_1 = "rd:0FC8r",
+ bsf_2 = "rmdw:0FBCrM",
+ bsr_2 = "rmdw:0FBDrM",
+ bt_2 = "mrdw:0FA3Rm|midw:0FBA4mU",
+ btc_2 = "mrdw:0FBBRm|midw:0FBA7mU",
+ btr_2 = "mrdw:0FB3Rm|midw:0FBA6mU",
+ bts_2 = "mrdw:0FABRm|midw:0FBA5mU",
+
+ rdtsc_0 = "0F31", -- P1+
+ cpuid_0 = "0FA2", -- P1+
+
+ -- floating point ops
+ fst_1 = "ff:DDD0r|xd:D92m|xq:DD2m",
+ fstp_1 = "ff:DDD8r|xd:D93m|xq:DD3m|xt:DB7m",
+ fld_1 = "ff:D9C0r|xd:D90m|xq:DD0m|xt:DB5m",
+
+ fpop_0 = "DDD8", -- Alias for fstp st0.
+
+ fist_1 = "xw:nDF2m|xd:DB2m",
+ fistp_1 = "xw:nDF3m|xd:DB3m|xq:DF7m",
+ fild_1 = "xw:nDF0m|xd:DB0m|xq:DF5m",
+
+ fxch_0 = "D9C9",
+ fxch_1 = "ff:D9C8r",
+ fxch_2 = "fFf:D9C8r|Fff:D9C8R",
+
+ fucom_1 = "ff:DDE0r",
+ fucom_2 = "Fff:DDE0R",
+ fucomp_1 = "ff:DDE8r",
+ fucomp_2 = "Fff:DDE8R",
+ fucomi_1 = "ff:DBE8r", -- P6+
+ fucomi_2 = "Fff:DBE8R", -- P6+
+ fucomip_1 = "ff:DFE8r", -- P6+
+ fucomip_2 = "Fff:DFE8R", -- P6+
+ fcomi_1 = "ff:DBF0r", -- P6+
+ fcomi_2 = "Fff:DBF0R", -- P6+
+ fcomip_1 = "ff:DFF0r", -- P6+
+ fcomip_2 = "Fff:DFF0R", -- P6+
+ fucompp_0 = "DAE9",
+ fcompp_0 = "DED9",
+
+ fldcw_1 = "xw:nD95m",
+ fstcw_1 = "xw:n9BD97m",
+ fnstcw_1 = "xw:nD97m",
+ fstsw_1 = "Rw:n9BDFE0|xw:n9BDD7m",
+ fnstsw_1 = "Rw:nDFE0|xw:nDD7m",
+ fclex_0 = "9BDBE2",
+ fnclex_0 = "DBE2",
+
+ fnop_0 = "D9D0",
+ -- D9D1-D9DF: unassigned
+
+ fchs_0 = "D9E0",
+ fabs_0 = "D9E1",
+ -- D9E2: unassigned
+ -- D9E3: unassigned
+ ftst_0 = "D9E4",
+ fxam_0 = "D9E5",
+ -- D9E6: unassigned
+ -- D9E7: unassigned
+ fld1_0 = "D9E8",
+ fldl2t_0 = "D9E9",
+ fldl2e_0 = "D9EA",
+ fldpi_0 = "D9EB",
+ fldlg2_0 = "D9EC",
+ fldln2_0 = "D9ED",
+ fldz_0 = "D9EE",
+ -- D9EF: unassigned
+
+ f2xm1_0 = "D9F0",
+ fyl2x_0 = "D9F1",
+ fptan_0 = "D9F2",
+ fpatan_0 = "D9F3",
+ fxtract_0 = "D9F4",
+ fprem1_0 = "D9F5",
+ fdecstp_0 = "D9F6",
+ fincstp_0 = "D9F7",
+ fprem_0 = "D9F8",
+ fyl2xp1_0 = "D9F9",
+ fsqrt_0 = "D9FA",
+ fsincos_0 = "D9FB",
+ frndint_0 = "D9FC",
+ fscale_0 = "D9FD",
+ fsin_0 = "D9FE",
+ fcos_0 = "D9FF",
+
+ -- SSE, SSE2
+ andnpd_2 = "rmo:660F55rM",
+ andnps_2 = "rmo:0F55rM",
+ andpd_2 = "rmo:660F54rM",
+ andps_2 = "rmo:0F54rM",
+ clflush_1 = "x.:0FAE7m",
+ cmppd_3 = "rmio:660FC2rMU",
+ cmpps_3 = "rmio:0FC2rMU",
+ cmpsd_3 = "rmio:F20FC2rMU",
+ cmpss_3 = "rmio:F30FC2rMU",
+ comisd_2 = "rmo:660F2FrM",
+ comiss_2 = "rmo:0F2FrM",
+ cvtdq2pd_2 = "rro:F30FE6rM|rx/oq:",
+ cvtdq2ps_2 = "rmo:0F5BrM",
+ cvtpd2dq_2 = "rmo:F20FE6rM",
+ cvtpd2ps_2 = "rmo:660F5ArM",
+ cvtpi2pd_2 = "rx/oq:660F2ArM",
+ cvtpi2ps_2 = "rx/oq:0F2ArM",
+ cvtps2dq_2 = "rmo:660F5BrM",
+ cvtps2pd_2 = "rro:0F5ArM|rx/oq:",
+ cvtsd2si_2 = "rr/do:F20F2DrM|rx/dq:",
+ cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:",
+ cvtsi2sd_2 = "rm/od:F20F2ArM",
+ cvtsi2ss_2 = "rm/od:F30F2ArM",
+ cvtss2sd_2 = "rro:F30F5ArM|rx/od:",
+ cvtss2si_2 = "rr/do:F20F2CrM|rx/dd:",
+ cvttpd2dq_2 = "rmo:660FE6rM",
+ cvttps2dq_2 = "rmo:F30F5BrM",
+ cvttsd2si_2 = "rr/do:F20F2CrM|rx/dq:",
+ cvttss2si_2 = "rr/do:F30F2CrM|rx/dd:",
+ ldmxcsr_1 = "xd:0FAE2m",
+ lfence_0 = "0FAEE8",
+ maskmovdqu_2 = "rro:660FF7rM",
+ mfence_0 = "0FAEF0",
+ movapd_2 = "rmo:660F28rM|mro:660F29Rm",
+ movaps_2 = "rmo:0F28rM|mro:0F29Rm",
+ movd_2 = "rm/od:660F6ErM|mr/do:660F7ERm",
+ movdqa_2 = "rmo:660F6FrM|mro:660F7FRm",
+ movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm",
+ movhlps_2 = "rro:0F12rM",
+ movhpd_2 = "rx/oq:660F16rM|xr/qo:660F17Rm",
+ movhps_2 = "rx/oq:0F16rM|xr/qo:0F17Rm",
+ movlhps_2 = "rro:0F16rM",
+ movlpd_2 = "rx/oq:660F12rM|xr/qo:660F13Rm",
+ movlps_2 = "rx/oq:0F12rM|xr/qo:0F13Rm",
+ movmskpd_2 = "rr/do:660F50rM",
+ movmskps_2 = "rr/do:0F50rM",
+ movntdq_2 = "xro:660FE7Rm",
+ movnti_2 = "xrd:0FC3Rm",
+ movntpd_2 = "xro:660F2BRm",
+ movntps_2 = "xro:0F2BRm",
+ movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:660FD6Rm",
+ movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:F20F11Rm",
+ movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm",
+ movupd_2 = "rmo:660F10rM|mro:660F11Rm",
+ movups_2 = "rmo:0F10rM|mro:0F11Rm",
+ orpd_2 = "rmo:660F56rM",
+ orps_2 = "rmo:0F56rM",
+ packssdw_2 = "rmo:660F6BrM",
+ packsswb_2 = "rmo:660F63rM",
+ packuswb_2 = "rmo:660F67rM",
+ paddb_2 = "rmo:660FFCrM",
+ paddd_2 = "rmo:660FFErM",
+ paddq_2 = "rmo:660FD4rM",
+ paddsb_2 = "rmo:660FECrM",
+ paddsw_2 = "rmo:660FEDrM",
+ paddusb_2 = "rmo:660FDCrM",
+ paddusw_2 = "rmo:660FDDrM",
+ paddw_2 = "rmo:660FFDrM",
+ pand_2 = "rmo:660FDBrM",
+ pandn_2 = "rmo:660FDFrM",
+ pause_0 = "F390",
+ pavgb_2 = "rmo:660FE0rM",
+ pavgw_2 = "rmo:660FE3rM",
+ pcmpeqb_2 = "rmo:660F74rM",
+ pcmpeqd_2 = "rmo:660F76rM",
+ pcmpeqw_2 = "rmo:660F75rM",
+ pcmpgtb_2 = "rmo:660F64rM",
+ pcmpgtd_2 = "rmo:660F66rM",
+ pcmpgtw_2 = "rmo:660F65rM",
+ pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nrMU", -- Mem op: SSE4.1 only.
+ pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:",
+ pmaddwd_2 = "rmo:660FF5rM",
+ pmaxsw_2 = "rmo:660FEErM",
+ pmaxub_2 = "rmo:660FDErM",
+ pminsw_2 = "rmo:660FEArM",
+ pminub_2 = "rmo:660FDArM",
+ pmovmskb_2 = "rr/do:660FD7rM",
+ pmulhuw_2 = "rmo:660FE4rM",
+ pmulhw_2 = "rmo:660FE5rM",
+ pmullw_2 = "rmo:660FD5rM",
+ pmuludq_2 = "rmo:660FF4rM",
+ por_2 = "rmo:660FEBrM",
+ prefetchnta_1 = "xb:n0F180m",
+ prefetcht0_1 = "xb:n0F181m",
+ prefetcht1_1 = "xb:n0F182m",
+ prefetcht2_1 = "xb:n0F183m",
+ psadbw_2 = "rmo:660FF6rM",
+ pshufd_3 = "rmio:660F70rMU",
+ pshufhw_3 = "rmio:F30F70rMU",
+ pshuflw_3 = "rmio:F20F70rMU",
+ pslld_2 = "rmo:660FF2rM|rio:660F726mU",
+ pslldq_2 = "rio:660F737mU",
+ psllq_2 = "rmo:660FF3rM|rio:660F736mU",
+ psllw_2 = "rmo:660FF1rM|rio:660F716mU",
+ psrad_2 = "rmo:660FE2rM|rio:660F724mU",
+ psraw_2 = "rmo:660FE1rM|rio:660F714mU",
+ psrld_2 = "rmo:660FD2rM|rio:660F722mU",
+ psrldq_2 = "rio:660F733mU",
+ psrlq_2 = "rmo:660FD3rM|rio:660F732mU",
+ psrlw_2 = "rmo:660FD1rM|rio:660F712mU",
+ psubb_2 = "rmo:660FF8rM",
+ psubd_2 = "rmo:660FFArM",
+ psubq_2 = "rmo:660FFBrM",
+ psubsb_2 = "rmo:660FE8rM",
+ psubsw_2 = "rmo:660FE9rM",
+ psubusb_2 = "rmo:660FD8rM",
+ psubusw_2 = "rmo:660FD9rM",
+ psubw_2 = "rmo:660FF9rM",
+ punpckhbw_2 = "rmo:660F68rM",
+ punpckhdq_2 = "rmo:660F6ArM",
+ punpckhqdq_2 = "rmo:660F6DrM",
+ punpckhwd_2 = "rmo:660F69rM",
+ punpcklbw_2 = "rmo:660F60rM",
+ punpckldq_2 = "rmo:660F62rM",
+ punpcklqdq_2 = "rmo:660F6CrM",
+ punpcklwd_2 = "rmo:660F61rM",
+ pxor_2 = "rmo:660FEFrM",
+ rcpps_2 = "rmo:0F53rM",
+ rcpss_2 = "rmo:F30F53rM",
+ rsqrtps_2 = "rmo:0F52rM",
+ rsqrtss_2 = "rmo:F30F52rM",
+ sfence_0 = "0FAEF8",
+ shufpd_3 = "rmio:660FC6rMU",
+ shufps_3 = "rmio:0FC6rMU",
+ stmxcsr_1 = "xd:0FAE3m",
+ ucomisd_2 = "rmo:660F2ErM",
+ ucomiss_2 = "rmo:0F2ErM",
+ unpckhpd_2 = "rmo:660F15rM",
+ unpckhps_2 = "rmo:0F15rM",
+ unpcklpd_2 = "rmo:660F14rM",
+ unpcklps_2 = "rmo:0F14rM",
+ xorpd_2 = "rmo:660F57rM",
+ xorps_2 = "rmo:0F57rM",
+
+ -- SSE3 ops
+ fisttp_1 = "xw:nDF1m|xd:DB1m|xq:DD1m",
+ addsubpd_2 = "rmo:660FD0rM",
+ addsubps_2 = "rmo:F20FD0rM",
+ haddpd_2 = "rmo:660F7CrM",
+ haddps_2 = "rmo:F20F7CrM",
+ hsubpd_2 = "rmo:660F7DrM",
+ hsubps_2 = "rmo:F20F7DrM",
+ lddqu_2 = "rxo:F20FF0rM",
+ movddup_2 = "rmo:F20F12rM",
+ movshdup_2 = "rmo:F30F16rM",
+ movsldup_2 = "rmo:F30F12rM",
+
+ -- SSSE3 ops
+ pabsb_2 = "rmo:660F381CrM",
+ pabsd_2 = "rmo:660F381ErM",
+ pabsw_2 = "rmo:660F381DrM",
+ palignr_3 = "rmio:660F3A0FrMU",
+ phaddd_2 = "rmo:660F3802rM",
+ phaddsw_2 = "rmo:660F3803rM",
+ phaddw_2 = "rmo:660F3801rM",
+ phsubd_2 = "rmo:660F3806rM",
+ phsubsw_2 = "rmo:660F3807rM",
+ phsubw_2 = "rmo:660F3805rM",
+ pmaddubsw_2 = "rmo:660F3804rM",
+ pmulhrsw_2 = "rmo:660F380BrM",
+ pshufb_2 = "rmo:660F3800rM",
+ psignb_2 = "rmo:660F3808rM",
+ psignd_2 = "rmo:660F380ArM",
+ psignw_2 = "rmo:660F3809rM",
+
+ -- SSE4.1 ops
+ blendpd_3 = "rmio:660F3A0DrMU",
+ blendps_3 = "rmio:660F3A0CrMU",
+ blendvpd_3 = "rmRo:660F3815rM",
+ blendvps_3 = "rmRo:660F3814rM",
+ dppd_3 = "rmio:660F3A41rMU",
+ dpps_3 = "rmio:660F3A40rMU",
+ extractps_3 = "mri/do:660F3A17RmU",
+ insertps_3 = "rrio:660F3A41rMU|rxi/od:",
+ movntdqa_2 = "rmo:660F382ArM",
+ mpsadbw_3 = "rmio:660F3A42rMU",
+ packusdw_2 = "rmo:660F382BrM",
+ pblendvb_3 = "rmRo:660F3810rM",
+ pblendw_3 = "rmio:660F3A0ErMU",
+ pcmpeqq_2 = "rmo:660F3829rM",
+ pextrb_3 = "rri/do:660F3A14nRmU|xri/bo:",
+ pextrd_3 = "mri/do:660F3A16RmU",
+ -- x64: pextrq
+ -- pextrw is SSE2, mem operand is SSE4.1 only
+ phminposuw_2 = "rmo:660F3841rM",
+ pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:",
+ pinsrd_3 = "rmi/od:660F3A22rMU",
+ -- x64: pinsrq
+ pmaxsb_2 = "rmo:660F383CrM",
+ pmaxsd_2 = "rmo:660F383DrM",
+ pmaxud_2 = "rmo:660F383FrM",
+ pmaxuw_2 = "rmo:660F383ErM",
+ pminsb_2 = "rmo:660F3838rM",
+ pminsd_2 = "rmo:660F3839rM",
+ pminud_2 = "rmo:660F383BrM",
+ pminuw_2 = "rmo:660F383ArM",
+ pmovsxbd_2 = "rro:660F3821rM|rx/od:",
+ pmovsxbq_2 = "rro:660F3822rM|rx/ow:",
+ pmovsxbw_2 = "rro:660F3820rM|rx/oq:",
+ pmovsxdq_2 = "rro:660F3825rM|rx/oq:",
+ pmovsxwd_2 = "rro:660F3823rM|rx/oq:",
+ pmovsxwq_2 = "rro:660F3824rM|rx/od:",
+ pmovzxbd_2 = "rro:660F3831rM|rx/od:",
+ pmovzxbq_2 = "rro:660F3832rM|rx/ow:",
+ pmovzxbw_2 = "rro:660F3830rM|rx/oq:",
+ pmovzxdq_2 = "rro:660F3835rM|rx/oq:",
+ pmovzxwd_2 = "rro:660F3833rM|rx/oq:",
+ pmovzxwq_2 = "rro:660F3834rM|rx/od:",
+ pmuldq_2 = "rmo:660F3828rM",
+ pmulld_2 = "rmo:660F3840rM",
+ ptest_2 = "rmo:660F3817rM",
+ roundpd_3 = "rmio:660F3A09rMU",
+ roundps_3 = "rmio:660F3A08rMU",
+ roundsd_3 = "rrio:660F3A0BrMU|rxi/oq:",
+ roundss_3 = "rrio:660F3A0ArMU|rxi/od:",
+
+ -- SSE4.2 ops
+ crc32_2 = "rmd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0nrM",
+ pcmpestri_3 = "rmio:660F3A61rMU",
+ pcmpestrm_3 = "rmio:660F3A60rMU",
+ pcmpgtq_2 = "rmo:660F3837rM",
+ pcmpistri_3 = "rmio:660F3A63rMU",
+ pcmpistrm_3 = "rmio:660F3A62rMU",
+ popcnt_2 = "rmdw:F30FB8rM",
+
+ -- SSE4a
+ extrq_2 = "rro:660F79rM",
+ extrq_3 = "riio:660F780mUU",
+ insertq_2 = "rro:F20F79rM",
+ insertq_4 = "rriio:F20F78rMUU",
+ lzcnt_2 = "rmdw:F30FBDrM",
+ movntsd_2 = "xr/qo:F20F2BRm",
+ movntss_2 = "xr/do:F30F2BRm",
+ -- popcnt is also in SSE4.2
+}
+
+------------------------------------------------------------------------------
+
+-- Arithmetic ops.
+for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3,
+ ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do
+ local n8 = n * 8
+ map_op[name.."_2"] = format(
+ "mr:%02XRm|rm:%02XrM|mI1dw:81%XmI|mS1dw:83%XmS|Ri1dwb:%02Xri|mi1dwb:81%Xmi",
+ 1+n8, 3+n8, n, n, 5+n8, n)
+end
+
+-- Shift ops.
+for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3,
+ shl = 4, shr = 5, sar = 7, sal = 4 } do
+ map_op[name.."_2"] = format("m1:D1%Xm|mC1dwb:D3%Xm|mi:C1%XmU", n, n, n)
+end
+
+-- Conditional ops.
+for cc,n in pairs(map_cc) do
+ map_op["j"..cc.."_1"] = format("J.:0F8%XJ", n) -- short: 7%X
+ map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n)
+ map_op["cmov"..cc.."_2"] = format("rmdw:0F4%XrM", n) -- P6+
+end
+
+-- FP arithmetic ops.
+for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3,
+ sub = 4, subr = 5, div = 6, divr = 7 } do
+ local nc = 192 + n * 8
+ local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8))
+ local fn = "f"..name
+ map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:DC%Xm", nc, n, n)
+ if n == 2 or n == 3 then
+ map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:DC%XM", nc, n, n)
+ else
+ map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:DC%XM", nc, nr, n, n)
+ map_op[fn.."p_1"] = format("ff:DE%02Xr", nr)
+ map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr)
+ end
+ map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n)
+end
+
+-- FP conditional moves.
+for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
+ local n4 = n % 4
+ local nc = 56000 + n4 * 8 + (n-n4) * 64
+ map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+
+ map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
+end
+
+-- SSE FP arithmetic ops.
+for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
+ sub = 12, min = 13, div = 14, max = 15 } do
+ map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
+ map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
+ map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
+ map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
+end
+
+------------------------------------------------------------------------------
+
+-- Process pattern string.
+local function dopattern(pat, args, sz, op)
+ local digit, addin
+ local opcode = 0
+ local szov = sz
+ local narg = 1
+
+ -- Limit number of section buffer positions used by a single dasm_put().
+ -- A single opcode needs a maximum of 2 positions. !x64
+ if secpos+2 > maxsecpos then wflush() end
+
+ -- Process each character.
+ for c in gmatch(pat.."|", ".") do
+ if match(c, "%x") then -- Hex digit.
+ digit = byte(c) - 48
+ if digit > 48 then digit = digit - 39
+ elseif digit > 16 then digit = digit - 7 end
+ opcode = opcode*16 + digit
+ addin = nil
+ elseif c == "n" then -- Disable operand size mods for opcode.
+ szov = nil
+ elseif c == "r" then -- Merge 1st operand regno. into opcode.
+ addin = args[1]; opcode = opcode + addin.reg
+ if narg < 2 then narg = 2 end
+ elseif c == "R" then -- Merge 2nd operand regno. into opcode.
+ addin = args[2]; opcode = opcode + addin.reg
+ narg = 3
+ elseif c == "m" or c == "M" then -- Encode ModRM/SIB.
+ local s
+ if addin then
+ s = addin.reg
+ opcode = opcode - s -- Undo regno opcode merge.
+ else
+ s = opcode % 16 -- Undo last digit.
+ opcode = (opcode - s) / 16
+ end
+ wputop(szov, opcode); opcode = nil
+ local imark = (sub(pat, -1) == "I") -- Force a mark (ugly).
+ -- Put ModRM/SIB with regno/last digit as spare.
+ local nn = c == "m" and 1 or 2
+ wputmrmsib(args[nn], imark, s, addin and addin.vreg)
+ if narg <= nn then narg = nn + 1 end
+ addin = nil
+ else
+ if opcode then -- Flush opcode.
+ if addin and addin.reg == -1 then
+ wputop(szov, opcode + 1)
+ waction("VREG", addin.vreg); wputxb(0)
+ else
+ wputop(szov, opcode)
+ end
+ opcode = nil
+ end
+ if c == "|" then break end
+ if c == "o" then -- Offset (pure 32 bit displacement).
+ wputdarg(args[1].disp); if narg < 2 then narg = 2 end
+ elseif c == "O" then
+ wputdarg(args[2].disp); narg = 3
+ else
+ -- Anything else is an immediate operand.
+ local a = args[narg]
+ narg = narg + 1
+ local mode, imm = a.mode, a.imm
+ if mode == "iJ" and not match("iIJ", c) then
+ werror("bad operand size for label")
+ end
+ if c == "S" then
+ wputsbarg(imm)
+ elseif c == "U" then
+ wputbarg(imm)
+ elseif c == "W" then
+ wputwarg(imm)
+ elseif c == "i" or c == "I" then
+ if mode == "iJ" then
+ wputlabel("IMM_", imm, 1)
+ elseif mode == "iI" and c == "I" then
+ waction(sz == "w" and "IMM_WB" or "IMM_DB", imm)
+ else
+ wputszarg(sz, imm)
+ end
+ elseif c == "J" then
+ if mode == "iPJ" then
+ waction("REL_A", imm) -- !x64 (secpos)
+ else
+ wputlabel("REL_", imm, 2)
+ end
+ else
+ werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
+ end
+ end
+ end
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Mapping of operand modes to short names. Suppress output with '#'.
+local map_modename = {
+ r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm",
+ f = "stx", F = "st0", J = "lbl", ["1"] = "1",
+ I = "#", S = "#", O = "#",
+}
+
+-- Return a table/string showing all possible operand modes.
+local function templatehelp(template, nparams)
+ if nparams == 0 then return "" end
+ local t = {}
+ for tm in gmatch(template, "[^%|]+") do
+ local s = map_modename[sub(tm, 1, 1)]
+ s = s..gsub(sub(tm, 2, nparams), ".", function(c)
+ return ", "..map_modename[c]
+ end)
+ if not match(s, "#") then t[#t+1] = s end
+ end
+ return t
+end
+
+-- Match operand modes against mode match part of template.
+local function matchtm(tm, args)
+ for i=1,#args do
+ if not match(args[i].mode, sub(tm, i, i)) then return end
+ end
+ return true
+end
+
+-- Handle opcodes defined with template strings.
+map_op[".template__"] = function(params, template, nparams)
+ if not params then return templatehelp(template, nparams) end
+ local args = {}
+
+ -- Zero-operand opcodes have no match part.
+ if #params == 0 then
+ dopattern(template, args, "d", params.op)
+ return
+ end
+
+ -- Determine common operand size (coerce undefined size) or flag as mixed.
+ local sz, szmix
+ for i,p in ipairs(params) do
+ args[i] = parseoperand(p)
+ local nsz = args[i].opsize
+ if nsz then
+ if sz and sz ~= nsz then szmix = true else sz = nsz end
+ end
+ end
+
+ -- Try all match:pattern pairs (separated by '|').
+ local gotmatch, lastpat
+ for tm in gmatch(template, "[^%|]+") do
+ -- Split off size match (starts after mode match) and pattern string.
+ local szm, pat = match(tm, "^(.-):(.*)$", #args+1)
+ if pat == "" then pat = lastpat else lastpat = pat end
+ if matchtm(tm, args) then
+ local prefix = sub(szm, 1, 1)
+ if prefix == "/" then -- Match both operand sizes.
+ if args[1].opsize == sub(szm, 2, 2) and
+ args[2].opsize == sub(szm, 3, 3) then
+ dopattern(pat, args, sz, params.op) -- Process pattern string.
+ return
+ end
+ else -- Match common operand size.
+ local szp = sz
+ if szm == "" then szm = "dwb" end -- Default size match.
+ if prefix == "1" then szp = args[1].opsize; szmix = nil
+ elseif prefix == "2" then szp = args[2].opsize; szmix = nil end
+ if not szmix and (prefix == "." or match(szm, szp or "#")) then
+ dopattern(pat, args, szp, params.op) -- Process pattern string.
+ return
+ end
+ end
+ gotmatch = true
+ end
+ end
+
+ local msg = "bad operand mode"
+ if gotmatch then
+ if szmix then
+ msg = "mixed operand size"
+ else
+ msg = sz and "bad operand size" or "missing operand size"
+ end
+ end
+
+ werror(msg.." in `"..opmodestr(params.op, args).."'")
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcodes for data storage.
+local function op_data(params)
+ if not params then return "imm..." end
+ local sz = sub(params.op, 2, 2)
+ if sz == "a" then sz = addrsize end
+ for _,p in ipairs(params) do
+ local a = parseoperand(p)
+ if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then
+ werror("bad mode or size in `"..p.."'")
+ end
+ if a.mode == "iJ" then
+ wputlabel("IMM_", a.imm, 1)
+ else
+ wputszarg(sz, a.imm)
+ end
+ end
+end
+
+map_op[".byte_*"] = op_data
+map_op[".sbyte_*"] = op_data
+map_op[".word_*"] = op_data
+map_op[".dword_*"] = op_data
+map_op[".aword_*"] = op_data
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeactions(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+ if not params then return "prefix" end
+ local prefix = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeglobals(out, prefix) end)
+end
+
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeglobalnames(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeexternnames(out, name) end)
+end
+
+------------------------------------------------------------------------------
+
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_2"] = function(params)
+ if not params then return "[1-9] | ->global | =>pcexpr [, addr]" end
+ local a = parseoperand(params[1])
+ local mode, imm = a.mode, a.imm
+ if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then
+ -- Local label (1: ... 9:) or global label (->global:).
+ waction("LABEL_LG", nil, 1)
+ wputxb(imm)
+ elseif mode == "iJ" then
+ -- PC label (=>pcexpr:).
+ waction("LABEL_PC", imm)
+ else
+ werror("bad label definition")
+ end
+ -- SETLABEL must immediately follow LABEL_LG/LABEL_PC.
+ local addr = params[2]
+ if addr then
+ local a = parseoperand(params[2])
+ if a.mode == "iPJ" then
+ waction("SETLABEL", a.imm) -- !x64 (secpos)
+ else
+ werror("bad label assignment")
+ end
+ end
+end
+map_op[".label_1"] = map_op[".label_2"]
+
+------------------------------------------------------------------------------
+
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+ if not params then return "numpow2" end
+ local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]]
+ if align then
+ local x = align
+ -- Must be a power of 2 in the range (2 ... 256).
+ for i=1,8 do
+ x = x / 2
+ if x == 1 then
+ waction("ALIGN", nil, 1)
+ wputxb(align-1) -- Action byte is 2**n-1.
+ return
+ end
+ end
+ end
+ werror("bad alignment")
+end
+
+-- Spacing pseudo-opcode.
+map_op[".space_2"] = function(params)
+ if not params then return "num [, filler]" end
+ waction("SPACE", params[1])
+ local fill = params[2]
+ if fill then
+ fill = tonumber(fill)
+ if not fill or fill < 0 or fill > 255 then werror("bad filler") end
+ end
+ wputxb(fill or 0)
+end
+map_op[".space_1"] = map_op[".space_2"]
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+ if not params then
+ return nparams == 2 and "name, ctype" or "name, ctype, reg"
+ end
+ local name, ctype, reg = params[1], params[2], params[3]
+ if not match(name, "^[%a_][%w_]*$") then
+ werror("bad type name `"..name.."'")
+ end
+ local tp = map_type[name]
+ if tp then
+ werror("duplicate type `"..name.."'")
+ end
+ if reg and not map_reg_valid_base[reg] then
+ werror("bad base register `"..(map_reg_rev[reg] or reg).."'")
+ end
+ -- Add #type to defines. A bit unclean to put it in map_archdef.
+ map_archdef["#"..name] = "sizeof("..ctype..")"
+ -- Add new type and emit shortcut define.
+ local num = ctypenum + 1
+ map_type[name] = {
+ ctype = ctype,
+ ctypefmt = format("Dt%X(%%s)", num),
+ reg = reg,
+ }
+ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+ ctypenum = num
+end
+map_op[".type_2"] = map_op[".type_3"]
+
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+ local t = {}
+ for name in pairs(map_type) do t[#t+1] = name end
+ sort(t)
+ out:write("Type definitions:\n")
+ for _,name in ipairs(t) do
+ local tp = map_type[name]
+ local reg = tp.reg and map_reg_rev[tp.reg] or ""
+ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
+ end
+ out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Set the current section.
+function _M.section(num)
+ waction("SECTION")
+ wputxb(num)
+ wflush(true) -- SECTION is a terminal action.
+end
+
+------------------------------------------------------------------------------
+
+-- Dump architecture description.
+function _M.dumparch(out)
+ out:write(format("DynASM %s version %s, released %s\n\n",
+ _info.arch, _info.version, _info.release))
+ dumpregs(out)
+ dumpactions(out)
+end
+
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+ dumptypes(out, lvl)
+ dumpglobals(out, lvl)
+ dumpexterns(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+ wline, werror, wfatal, wwarn = wl, we, wf, ww
+ return wflush
+end
+
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+ g_arch, g_opt = arch, opt
+end
+
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+ setmetatable(map_op, { __index = map_coreop })
+ setmetatable(map_def, { __index = map_archdef })
+ return map_op, map_def
+end
+
+return _M
+
+------------------------------------------------------------------------------
+
diff --git a/dynasm/dynasm.lua b/dynasm/dynasm.lua
new file mode 100644
index 00000000..20ff9cf5
--- /dev/null
+++ b/dynasm/dynasm.lua
@@ -0,0 +1,1070 @@
+------------------------------------------------------------------------------
+-- DynASM. A dynamic assembler for code generation engines.
+-- Originally designed and implemented for LuaJIT.
+--
+-- Copyright (C) 2005-2009 Mike Pall. All rights reserved.
+-- See below for full copyright notice.
+------------------------------------------------------------------------------
+
+-- Application information.
+local _info = {
+ name = "DynASM",
+ description = "A dynamic assembler for code generation engines",
+ version = "1.2.1",
+ vernum = 10201,
+ release = "2009-04-16",
+ author = "Mike Pall",
+ url = "http://luajit.org/dynasm.html",
+ license = "MIT",
+ copyright = [[
+Copyright (C) 2005-2009 Mike Pall. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+[ MIT license: http://www.opensource.org/licenses/mit-license.php ]
+]],
+}
+
+-- Cache library functions.
+local type, pairs, ipairs = type, pairs, ipairs
+local pcall, error, assert = pcall, error, assert
+local _s = string
+local sub, match, gmatch, gsub = _s.sub, _s.match, _s.gmatch, _s.gsub
+local format, rep, upper = _s.format, _s.rep, _s.upper
+local _t = table
+local insert, remove, concat, sort = _t.insert, _t.remove, _t.concat, _t.sort
+local exit = os.exit
+local io = io
+local stdin, stdout, stderr = io.stdin, io.stdout, io.stderr
+
+------------------------------------------------------------------------------
+
+-- Program options.
+local g_opt = {}
+
+-- Global state for current file.
+local g_fname, g_curline, g_indent, g_lineno, g_synclineno, g_arch
+local g_errcount = 0
+
+-- Write buffer for output file.
+local g_wbuffer, g_capbuffer
+
+------------------------------------------------------------------------------
+
+-- Write an output line (or callback function) to the buffer.
+local function wline(line, needindent)
+ local buf = g_capbuffer or g_wbuffer
+ buf[#buf+1] = needindent and g_indent..line or line
+ g_synclineno = g_synclineno + 1
+end
+
+-- Write assembler line as a comment, if requestd.
+local function wcomment(aline)
+ if g_opt.comment then
+ wline(g_opt.comment..aline..g_opt.endcomment, true)
+ end
+end
+
+-- Resync CPP line numbers.
+local function wsync()
+ if g_synclineno ~= g_lineno and g_opt.cpp then
+ wline("# "..g_lineno..' "'..g_fname..'"')
+ g_synclineno = g_lineno
+ end
+end
+
+-- Dummy action flush function. Replaced with arch-specific function later.
+local function wflush(term)
+end
+
+-- Dump all buffered output lines.
+local function wdumplines(out, buf)
+ for _,line in ipairs(buf) do
+ if type(line) == "string" then
+ assert(out:write(line, "\n"))
+ else
+ -- Special callback to dynamically insert lines after end of processing.
+ line(out)
+ end
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Emit an error. Processing continues with next statement.
+local function werror(msg)
+ error(format("%s:%s: error: %s:\n%s", g_fname, g_lineno, msg, g_curline), 0)
+end
+
+-- Emit a fatal error. Processing stops.
+local function wfatal(msg)
+ g_errcount = "fatal"
+ werror(msg)
+end
+
+-- Print a warning. Processing continues.
+local function wwarn(msg)
+ stderr:write(format("%s:%s: warning: %s:\n%s\n",
+ g_fname, g_lineno, msg, g_curline))
+end
+
+-- Print caught error message. But suppress excessive errors.
+local function wprinterr(...)
+ if type(g_errcount) == "number" then
+ -- Regular error.
+ g_errcount = g_errcount + 1
+ if g_errcount < 21 then -- Seems to be a reasonable limit.
+ stderr:write(...)
+ elseif g_errcount == 21 then
+ stderr:write(g_fname,
+ ":*: warning: too many errors (suppressed further messages).\n")
+ end
+ else
+ -- Fatal error.
+ stderr:write(...)
+ return true -- Stop processing.
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Map holding all option handlers.
+local opt_map = {}
+local opt_current
+
+-- Print error and exit with error status.
+local function opterror(...)
+ stderr:write("dynasm.lua: ERROR: ", ...)
+ stderr:write("\n")
+ exit(1)
+end
+
+-- Get option parameter.
+local function optparam(args)
+ local argn = args.argn
+ local p = args[argn]
+ if not p then
+ opterror("missing parameter for option `", opt_current, "'.")
+ end
+ args.argn = argn + 1
+ return p
+end
+
+------------------------------------------------------------------------------
+
+-- Core pseudo-opcodes.
+local map_coreop = {}
+-- Dummy opcode map. Replaced by arch-specific map.
+local map_op = {}
+
+-- Forward declarations.
+local dostmt
+local readfile
+
+------------------------------------------------------------------------------
+
+-- Map for defines (initially empty, chains to arch-specific map).
+local map_def = {}
+
+-- Pseudo-opcode to define a substitution.
+map_coreop[".define_2"] = function(params, nparams)
+ if not params then return nparams == 1 and "name" or "name, subst" end
+ local name, def = params[1], params[2] or "1"
+ if not match(name, "^[%a_][%w_]*$") then werror("bad or duplicate define") end
+ map_def[name] = def
+end
+map_coreop[".define_1"] = map_coreop[".define_2"]
+
+-- Define a substitution on the command line.
+function opt_map.D(args)
+ local namesubst = optparam(args)
+ local name, subst = match(namesubst, "^([%a_][%w_]*)=(.*)$")
+ if name then
+ map_def[name] = subst
+ elseif match(namesubst, "^[%a_][%w_]*$") then
+ map_def[namesubst] = "1"
+ else
+ opterror("bad define")
+ end
+end
+
+-- Undefine a substitution on the command line.
+function opt_map.U(args)
+ local name = optparam(args)
+ if match(name, "^[%a_][%w_]*$") then
+ map_def[name] = nil
+ else
+ opterror("bad define")
+ end
+end
+
+-- Helper for definesubst.
+local gotsubst
+
+local function definesubst_one(word)
+ local subst = map_def[word]
+ if subst then gotsubst = word; return subst else return word end
+end
+
+-- Iteratively substitute defines.
+local function definesubst(stmt)
+ -- Limit number of iterations.
+ for i=1,100 do
+ gotsubst = false
+ stmt = gsub(stmt, "#?[%w_]+", definesubst_one)
+ if not gotsubst then break end
+ end
+ if gotsubst then wfatal("recursive define involving `"..gotsubst.."'") end
+ return stmt
+end
+
+-- Dump all defines.
+local function dumpdefines(out, lvl)
+ local t = {}
+ for name in pairs(map_def) do
+ t[#t+1] = name
+ end
+ sort(t)
+ out:write("Defines:\n")
+ for _,name in ipairs(t) do
+ local subst = map_def[name]
+ if g_arch then subst = g_arch.revdef(subst) end
+ out:write(format(" %-20s %s\n", name, subst))
+ end
+ out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Support variables for conditional assembly.
+local condlevel = 0
+local condstack = {}
+
+-- Evaluate condition with a Lua expression. Substitutions already performed.
+local function cond_eval(cond)
+ local func, err = loadstring("return "..cond)
+ if func then
+ setfenv(func, {}) -- No globals. All unknown identifiers evaluate to nil.
+ local ok, res = pcall(func)
+ if ok then
+ if res == 0 then return false end -- Oh well.
+ return not not res
+ end
+ err = res
+ end
+ wfatal("bad condition: "..err)
+end
+
+-- Skip statements until next conditional pseudo-opcode at the same level.
+local function stmtskip()
+ local dostmt_save = dostmt
+ local lvl = 0
+ dostmt = function(stmt)
+ local op = match(stmt, "^%s*(%S+)")
+ if op == ".if" then
+ lvl = lvl + 1
+ elseif lvl ~= 0 then
+ if op == ".endif" then lvl = lvl - 1 end
+ elseif op == ".elif" or op == ".else" or op == ".endif" then
+ dostmt = dostmt_save
+ dostmt(stmt)
+ end
+ end
+end
+
+-- Pseudo-opcodes for conditional assembly.
+map_coreop[".if_1"] = function(params)
+ if not params then return "condition" end
+ local lvl = condlevel + 1
+ local res = cond_eval(params[1])
+ condlevel = lvl
+ condstack[lvl] = res
+ if not res then stmtskip() end
+end
+
+map_coreop[".elif_1"] = function(params)
+ if not params then return "condition" end
+ if condlevel == 0 then wfatal(".elif without .if") end
+ local lvl = condlevel
+ local res = condstack[lvl]
+ if res then
+ if res == "else" then wfatal(".elif after .else") end
+ else
+ res = cond_eval(params[1])
+ if res then
+ condstack[lvl] = res
+ return
+ end
+ end
+ stmtskip()
+end
+
+map_coreop[".else_0"] = function(params)
+ if condlevel == 0 then wfatal(".else without .if") end
+ local lvl = condlevel
+ local res = condstack[lvl]
+ condstack[lvl] = "else"
+ if res then
+ if res == "else" then wfatal(".else after .else") end
+ stmtskip()
+ end
+end
+
+map_coreop[".endif_0"] = function(params)
+ local lvl = condlevel
+ if lvl == 0 then wfatal(".endif without .if") end
+ condlevel = lvl - 1
+end
+
+-- Check for unfinished conditionals.
+local function checkconds()
+ if g_errcount ~= "fatal" and condlevel ~= 0 then
+ wprinterr(g_fname, ":*: error: unbalanced conditional\n")
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Search for a file in the given path and open it for reading.
+local function pathopen(path, name)
+ local dirsep = match(package.path, "\\") and "\\" or "/"
+ for _,p in ipairs(path) do
+ local fullname = p == "" and name or p..dirsep..name
+ local fin = io.open(fullname, "r")
+ if fin then
+ g_fname = fullname
+ return fin
+ end
+ end
+end
+
+-- Include a file.
+map_coreop[".include_1"] = function(params)
+ if not params then return "filename" end
+ local name = params[1]
+ -- Save state. Ugly, I know. but upvalues are fast.
+ local gf, gl, gcl, gi = g_fname, g_lineno, g_curline, g_indent
+ -- Read the included file.
+ local fatal = readfile(pathopen(g_opt.include, name) or
+ wfatal("include file `"..name.."' not found"))
+ -- Restore state.
+ g_synclineno = -1
+ g_fname, g_lineno, g_curline, g_indent = gf, gl, gcl, gi
+ if fatal then wfatal("in include file") end
+end
+
+-- Make .include initially available, too.
+map_op[".include_1"] = map_coreop[".include_1"]
+
+------------------------------------------------------------------------------
+
+-- Support variables for macros.
+local mac_capture, mac_lineno, mac_name
+local mac_active = {}
+local mac_list = {}
+
+-- Pseudo-opcode to define a macro.
+map_coreop[".macro_*"] = function(mparams)
+ if not mparams then return "name [, params...]" end
+ -- Split off and validate macro name.
+ local name = remove(mparams, 1)
+ if not name then werror("missing macro name") end
+ if not (match(name, "^[%a_][%w_%.]*$") or match(name, "^%.[%w_%.]+$")) then
+ wfatal("bad macro name `"..name.."'")
+ end
+ -- Validate macro parameter names.
+ local mdup = {}
+ for _,mp in ipairs(mparams) do
+ if not match(mp, "^[%a_][%w_]*$") then
+ wfatal("bad macro parameter name `"..mp.."'")
+ end
+ if mdup[mp] then wfatal("duplicate macro parameter name `"..mp.."'") end
+ mdup[mp] = true
+ end
+ -- Check for duplicate or recursive macro definitions.
+ local opname = name.."_"..#mparams
+ if map_op[opname] or map_op[name.."_*"] then
+ wfatal("duplicate macro `"..name.."' ("..#mparams.." parameters)")
+ end
+ if mac_capture then wfatal("recursive macro definition") end
+
+ -- Enable statement capture.
+ local lines = {}
+ mac_lineno = g_lineno
+ mac_name = name
+ mac_capture = function(stmt) -- Statement capture function.
+ -- Stop macro definition with .endmacro pseudo-opcode.
+ if not match(stmt, "^%s*.endmacro%s*$") then
+ lines[#lines+1] = stmt
+ return
+ end
+ mac_capture = nil
+ mac_lineno = nil
+ mac_name = nil
+ mac_list[#mac_list+1] = opname
+ -- Add macro-op definition.
+ map_op[opname] = function(params)
+ if not params then return mparams, lines end
+ -- Protect against recursive macro invocation.
+ if mac_active[opname] then wfatal("recursive macro invocation") end
+ mac_active[opname] = true
+ -- Setup substitution map.
+ local subst = {}
+ for i,mp in ipairs(mparams) do subst[mp] = params[i] end
+ local mcom
+ if g_opt.maccomment and g_opt.comment then
+ mcom = " MACRO "..name.." ("..#mparams..")"
+ wcomment("{"..mcom)
+ end
+ -- Loop through all captured statements
+ for _,stmt in ipairs(lines) do
+ -- Substitute macro parameters.
+ local st = gsub(stmt, "[%w_]+", subst)
+ st = definesubst(st)
+ st = gsub(st, "%s*%.%.%s*", "") -- Token paste a..b.
+ if mcom and sub(st, 1, 1) ~= "|" then wcomment(st) end
+ -- Emit statement. Use a protected call for better diagnostics.
+ local ok, err = pcall(dostmt, st)
+ if not ok then
+ -- Add the captured statement to the error.
+ wprinterr(err, "\n", g_indent, "| ", stmt,
+ "\t[MACRO ", name, " (", #mparams, ")]\n")
+ end
+ end
+ if mcom then wcomment("}"..mcom) end
+ mac_active[opname] = nil
+ end
+ end
+end
+
+-- An .endmacro pseudo-opcode outside of a macro definition is an error.
+map_coreop[".endmacro_0"] = function(params)
+ wfatal(".endmacro without .macro")
+end
+
+-- Dump all macros and their contents (with -PP only).
+local function dumpmacros(out, lvl)
+ sort(mac_list)
+ out:write("Macros:\n")
+ for _,opname in ipairs(mac_list) do
+ local name = sub(opname, 1, -3)
+ local params, lines = map_op[opname]()
+ out:write(format(" %-20s %s\n", name, concat(params, ", ")))
+ if lvl > 1 then
+ for _,line in ipairs(lines) do
+ out:write(" |", line, "\n")
+ end
+ out:write("\n")
+ end
+ end
+ out:write("\n")
+end
+
+-- Check for unfinished macro definitions.
+local function checkmacros()
+ if mac_capture then
+ wprinterr(g_fname, ":", mac_lineno,
+ ": error: unfinished .macro `", mac_name ,"'\n")
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Support variables for captures.
+local cap_lineno, cap_name
+local cap_buffers = {}
+local cap_used = {}
+
+-- Start a capture.
+map_coreop[".capture_1"] = function(params)
+ if not params then return "name" end
+ wflush()
+ local name = params[1]
+ if not match(name, "^[%a_][%w_]*$") then
+ wfatal("bad capture name `"..name.."'")
+ end
+ if cap_name then
+ wfatal("already capturing to `"..cap_name.."' since line "..cap_lineno)
+ end
+ cap_name = name
+ cap_lineno = g_lineno
+ -- Create or continue a capture buffer and start the output line capture.
+ local buf = cap_buffers[name]
+ if not buf then buf = {}; cap_buffers[name] = buf end
+ g_capbuffer = buf
+ g_synclineno = 0
+end
+
+-- Stop a capture.
+map_coreop[".endcapture_0"] = function(params)
+ wflush()
+ if not cap_name then wfatal(".endcapture without a valid .capture") end
+ cap_name = nil
+ cap_lineno = nil
+ g_capbuffer = nil
+ g_synclineno = 0
+end
+
+-- Dump a capture buffer.
+map_coreop[".dumpcapture_1"] = function(params)
+ if not params then return "name" end
+ wflush()
+ local name = params[1]
+ if not match(name, "^[%a_][%w_]*$") then
+ wfatal("bad capture name `"..name.."'")
+ end
+ cap_used[name] = true
+ wline(function(out)
+ local buf = cap_buffers[name]
+ if buf then wdumplines(out, buf) end
+ end)
+ g_synclineno = 0
+end
+
+-- Dump all captures and their buffers (with -PP only).
+local function dumpcaptures(out, lvl)
+ out:write("Captures:\n")
+ for name,buf in pairs(cap_buffers) do
+ out:write(format(" %-20s %4s)\n", name, "("..#buf))
+ if lvl > 1 then
+ local bar = rep("=", 76)
+ out:write(" ", bar, "\n")
+ for _,line in ipairs(buf) do
+ out:write(" ", line, "\n")
+ end
+ out:write(" ", bar, "\n\n")
+ end
+ end
+ out:write("\n")
+end
+
+-- Check for unfinished or unused captures.
+local function checkcaptures()
+ if cap_name then
+ wprinterr(g_fname, ":", cap_lineno,
+ ": error: unfinished .capture `", cap_name,"'\n")
+ return
+ end
+ for name in pairs(cap_buffers) do
+ if not cap_used[name] then
+ wprinterr(g_fname, ":*: error: missing .dumpcapture ", name ,"\n")
+ end
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Sections names.
+local map_sections = {}
+
+-- Pseudo-opcode to define code sections.
+-- TODO: Data sections, BSS sections. Needs extra C code and API.
+map_coreop[".section_*"] = function(params)
+ if not params then return "name..." end
+ if #map_sections > 0 then werror("duplicate section definition") end
+ wflush()
+ for sn,name in ipairs(params) do
+ local opname = "."..name.."_0"
+ if not match(name, "^[%a][%w_]*$") or
+ map_op[opname] or map_op["."..name.."_*"] then
+ werror("bad section name `"..name.."'")
+ end
+ map_sections[#map_sections+1] = name
+ wline(format("#define DASM_SECTION_%s\t%d", upper(name), sn-1))
+ map_op[opname] = function(params) g_arch.section(sn-1) end
+ end
+ wline(format("#define DASM_MAXSECTION\t\t%d", #map_sections))
+end
+
+-- Dump all sections.
+local function dumpsections(out, lvl)
+ out:write("Sections:\n")
+ for _,name in ipairs(map_sections) do
+ out:write(format(" %s\n", name))
+ end
+ out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Load architecture-specific module.
+local function loadarch(arch)
+ if not match(arch, "^[%w_]+$") then return "bad arch name" end
+ local ok, m_arch = pcall(require, "dasm_"..arch)
+ if not ok then return "cannot load module: "..m_arch end
+ g_arch = m_arch
+ wflush = m_arch.passcb(wline, werror, wfatal, wwarn)
+ m_arch.setup(arch, g_opt)
+ map_op, map_def = m_arch.mergemaps(map_coreop, map_def)
+end
+
+-- Dump architecture description.
+function opt_map.dumparch(args)
+ local name = optparam(args)
+ if not g_arch then
+ local err = loadarch(name)
+ if err then opterror(err) end
+ end
+
+ local t = {}
+ for name in pairs(map_coreop) do t[#t+1] = name end
+ for name in pairs(map_op) do t[#t+1] = name end
+ sort(t)
+
+ local out = stdout
+ local _arch = g_arch._info
+ out:write(format("%s version %s, released %s, %s\n",
+ _info.name, _info.version, _info.release, _info.url))
+ g_arch.dumparch(out)
+
+ local pseudo = true
+ out:write("Pseudo-Opcodes:\n")
+ for _,sname in ipairs(t) do
+ local name, nparam = match(sname, "^(.+)_([0-9%*])$")
+ if name then
+ if pseudo and sub(name, 1, 1) ~= "." then
+ out:write("\nOpcodes:\n")
+ pseudo = false
+ end
+ local f = map_op[sname]
+ local s
+ if nparam ~= "*" then nparam = nparam + 0 end
+ if nparam == 0 then
+ s = ""
+ elseif type(f) == "string" then
+ s = map_op[".template__"](nil, f, nparam)
+ else
+ s = f(nil, nparam)
+ end
+ if type(s) == "table" then
+ for _,s2 in ipairs(s) do
+ out:write(format(" %-12s %s\n", name, s2))
+ end
+ else
+ out:write(format(" %-12s %s\n", name, s))
+ end
+ end
+ end
+ out:write("\n")
+ exit(0)
+end
+
+-- Pseudo-opcode to set the architecture.
+-- Only initially available (map_op is replaced when called).
+map_op[".arch_1"] = function(params)
+ if not params then return "name" end
+ local err = loadarch(params[1])
+ if err then wfatal(err) end
+end
+
+-- Dummy .arch pseudo-opcode to improve the error report.
+map_coreop[".arch_1"] = function(params)
+ if not params then return "name" end
+ wfatal("duplicate .arch statement")
+end
+
+------------------------------------------------------------------------------
+
+-- Dummy pseudo-opcode. Don't confuse '.nop' with 'nop'.
+map_coreop[".nop_*"] = function(params)
+ if not params then return "[ignored...]" end
+end
+
+-- Pseudo-opcodes to raise errors.
+map_coreop[".error_1"] = function(params)
+ if not params then return "message" end
+ werror(params[1])
+end
+
+map_coreop[".fatal_1"] = function(params)
+ if not params then return "message" end
+ wfatal(params[1])
+end
+
+-- Dump all user defined elements.
+local function dumpdef(out)
+ local lvl = g_opt.dumpdef
+ if lvl == 0 then return end
+ dumpsections(out, lvl)
+ dumpdefines(out, lvl)
+ if g_arch then g_arch.dumpdef(out, lvl) end
+ dumpmacros(out, lvl)
+ dumpcaptures(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Helper for splitstmt.
+local splitlvl
+
+local function splitstmt_one(c)
+ if c == "(" then
+ splitlvl = ")"..splitlvl
+ elseif c == "[" then
+ splitlvl = "]"..splitlvl
+ elseif c == ")" or c == "]" then
+ if sub(splitlvl, 1, 1) ~= c then werror("unbalanced () or []") end
+ splitlvl = sub(splitlvl, 2)
+ elseif splitlvl == "" then
+ return " \0 "
+ end
+ return c
+end
+
+-- Split statement into (pseudo-)opcode and params.
+local function splitstmt(stmt)
+ -- Convert label with trailing-colon into .label statement.
+ local label = match(stmt, "^%s*(.+):%s*$")
+ if label then return ".label", {label} end
+
+ -- Split at commas and equal signs, but obey parentheses and brackets.
+ splitlvl = ""
+ stmt = gsub(stmt, "[,%(%)%[%]]", splitstmt_one)
+ if splitlvl ~= "" then werror("unbalanced () or []") end
+
+ -- Split off opcode.
+ local op, other = match(stmt, "^%s*([^%s%z]+)%s*(.*)$")
+ if not op then werror("bad statement syntax") end
+
+ -- Split parameters.
+ local params = {}
+ for p in gmatch(other, "%s*(%Z+)%z?") do
+ params[#params+1] = gsub(p, "%s+$", "")
+ end
+ if #params > 16 then werror("too many parameters") end
+
+ params.op = op
+ return op, params
+end
+
+-- Process a single statement.
+dostmt = function(stmt)
+ -- Ignore empty statements.
+ if match(stmt, "^%s*$") then return end
+
+ -- Capture macro defs before substitution.
+ if mac_capture then return mac_capture(stmt) end
+ stmt = definesubst(stmt)
+
+ -- Emit C code without parsing the line.
+ if sub(stmt, 1, 1) == "|" then
+ local tail = sub(stmt, 2)
+ wflush()
+ if sub(tail, 1, 2) == "//" then wcomment(tail) else wline(tail, true) end
+ return
+ end
+
+ -- Split into (pseudo-)opcode and params.
+ local op, params = splitstmt(stmt)
+
+ -- Get opcode handler (matching # of parameters or generic handler).
+ local f = map_op[op.."_"..#params] or map_op[op.."_*"]
+ if not f then
+ if not g_arch then wfatal("first statement must be .arch") end
+ -- Improve error report.
+ for i=0,16 do
+ if map_op[op.."_"..i] then
+ werror("wrong number of parameters for `"..op.."'")
+ end
+ end
+ werror("unknown statement `"..op.."'")
+ end
+
+ -- Call opcode handler or special handler for template strings.
+ if type(f) == "string" then
+ map_op[".template__"](params, f)
+ else
+ f(params)
+ end
+end
+
+-- Process a single line.
+local function doline(line)
+ if g_opt.flushline then wflush() end
+
+ -- Assembler line?
+ local indent, aline = match(line, "^(%s*)%|(.*)$")
+ if not aline then
+ -- No, plain C code line, need to flush first.
+ wflush()
+ wsync()
+ wline(line, false)
+ return
+ end
+
+ g_indent = indent -- Remember current line indentation.
+
+ -- Emit C code (even from macros). Avoids echo and line parsing.
+ if sub(aline, 1, 1) == "|" then
+ if not mac_capture then
+ wsync()
+ elseif g_opt.comment then
+ wsync()
+ wcomment(aline)
+ end
+ dostmt(aline)
+ return
+ end
+
+ -- Echo assembler line as a comment.
+ if g_opt.comment then
+ wsync()
+ wcomment(aline)
+ end
+
+ -- Strip assembler comments.
+ aline = gsub(aline, "//.*$", "")
+
+ -- Split line into statements at semicolons.
+ if match(aline, ";") then
+ for stmt in gmatch(aline, "[^;]+") do dostmt(stmt) end
+ else
+ dostmt(aline)
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Write DynASM header.
+local function dasmhead(out)
+ out:write(format([[
+/*
+** This file has been pre-processed with DynASM.
+** %s
+** DynASM version %s, DynASM %s version %s
+** DO NOT EDIT! The original file is in "%s".
+*/
+
+#if DASM_VERSION != %d
+#error "Version mismatch between DynASM and included encoding engine"
+#endif
+
+]], _info.url,
+ _info.version, g_arch._info.arch, g_arch._info.version,
+ g_fname, _info.vernum))
+end
+
+-- Read input file.
+readfile = function(fin)
+ g_indent = ""
+ g_lineno = 0
+ g_synclineno = -1
+
+ -- Process all lines.
+ for line in fin:lines() do
+ g_lineno = g_lineno + 1
+ g_curline = line
+ local ok, err = pcall(doline, line)
+ if not ok and wprinterr(err, "\n") then return true end
+ end
+ wflush()
+
+ -- Close input file.
+ assert(fin == stdin or fin:close())
+end
+
+-- Write output file.
+local function writefile(outfile)
+ local fout
+
+ -- Open output file.
+ if outfile == nil or outfile == "-" then
+ fout = stdout
+ else
+ fout = assert(io.open(outfile, "w"))
+ end
+
+ -- Write all buffered lines
+ wdumplines(fout, g_wbuffer)
+
+ -- Close output file.
+ assert(fout == stdout or fout:close())
+
+ -- Optionally dump definitions.
+ dumpdef(fout == stdout and stderr or stdout)
+end
+
+-- Translate an input file to an output file.
+local function translate(infile, outfile)
+ g_wbuffer = {}
+ g_indent = ""
+ g_lineno = 0
+ g_synclineno = -1
+
+ -- Put header.
+ wline(dasmhead)
+
+ -- Read input file.
+ local fin
+ if infile == "-" then
+ g_fname = "(stdin)"
+ fin = stdin
+ else
+ g_fname = infile
+ fin = assert(io.open(infile, "r"))
+ end
+ readfile(fin)
+
+ -- Check for errors.
+ if not g_arch then
+ wprinterr(g_fname, ":*: error: missing .arch directive\n")
+ end
+ checkconds()
+ checkmacros()
+ checkcaptures()
+
+ if g_errcount ~= 0 then
+ stderr:write(g_fname, ":*: info: ", g_errcount, " error",
+ (type(g_errcount) == "number" and g_errcount > 1) and "s" or "",
+ " in input file -- no output file generated.\n")
+ dumpdef(stderr)
+ exit(1)
+ end
+
+ -- Write output file.
+ writefile(outfile)
+end
+
+------------------------------------------------------------------------------
+
+-- Print help text.
+function opt_map.help()
+ stdout:write("DynASM -- ", _info.description, ".\n")
+ stdout:write("DynASM ", _info.version, " ", _info.release, " ", _info.url, "\n")
+ stdout:write[[
+
+Usage: dynasm [OPTION]... INFILE.dasc|-
+
+ -h, --help Display this help text.
+ -V, --version Display version and copyright information.
+
+ -o, --outfile FILE Output file name (default is stdout).
+ -I, --include DIR Add directory to the include search path.
+
+ -c, --ccomment Use /* */ comments for assembler lines.
+ -C, --cppcomment Use // comments for assembler lines (default).
+ -N, --nocomment Suppress assembler lines in output.
+ -M, --maccomment Show macro expansions as comments (default off).
+
+ -L, --nolineno Suppress CPP line number information in output.
+ -F, --flushline Flush action list for every line.
+
+ -D NAME[=SUBST] Define a substitution.
+ -U NAME Undefine a substitution.
+
+ -P, --dumpdef Dump defines, macros, etc. Repeat for more output.
+ -A, --dumparch ARCH Load architecture ARCH and dump description.
+]]
+ exit(0)
+end
+
+-- Print version information.
+function opt_map.version()
+ stdout:write(format("%s version %s, released %s\n%s\n\n%s",
+ _info.name, _info.version, _info.release, _info.url, _info.copyright))
+ exit(0)
+end
+
+-- Misc. options.
+function opt_map.outfile(args) g_opt.outfile = optparam(args) end
+function opt_map.include(args) insert(g_opt.include, 1, optparam(args)) end
+function opt_map.ccomment() g_opt.comment = "/*|"; g_opt.endcomment = " */" end
+function opt_map.cppcomment() g_opt.comment = "//|"; g_opt.endcomment = "" end
+function opt_map.nocomment() g_opt.comment = false end
+function opt_map.maccomment() g_opt.maccomment = true end
+function opt_map.nolineno() g_opt.cpp = false end
+function opt_map.flushline() g_opt.flushline = true end
+function opt_map.dumpdef() g_opt.dumpdef = g_opt.dumpdef + 1 end
+
+------------------------------------------------------------------------------
+
+-- Short aliases for long options.
+local opt_alias = {
+ h = "help", ["?"] = "help", V = "version",
+ o = "outfile", I = "include",
+ c = "ccomment", C = "cppcomment", N = "nocomment", M = "maccomment",
+ L = "nolineno", F = "flushline",
+ P = "dumpdef", A = "dumparch",
+}
+
+-- Parse single option.
+local function parseopt(opt, args)
+ opt_current = #opt == 1 and "-"..opt or "--"..opt
+ local f = opt_map[opt] or opt_map[opt_alias[opt]]
+ if not f then
+ opterror("unrecognized option `", opt_current, "'. Try `--help'.\n")
+ end
+ f(args)
+end
+
+-- Parse arguments.
+local function parseargs(args)
+ -- Default options.
+ g_opt.comment = "//|"
+ g_opt.endcomment = ""
+ g_opt.cpp = true
+ g_opt.dumpdef = 0
+ g_opt.include = { "" }
+
+ -- Process all option arguments.
+ args.argn = 1
+ repeat
+ local a = args[args.argn]
+ if not a then break end
+ local lopt, opt = match(a, "^%-(%-?)(.+)")
+ if not opt then break end
+ args.argn = args.argn + 1
+ if lopt == "" then
+ -- Loop through short options.
+ for o in gmatch(opt, ".") do parseopt(o, args) end
+ else
+ -- Long option.
+ parseopt(opt, args)
+ end
+ until false
+
+ -- Check for proper number of arguments.
+ local nargs = #args - args.argn + 1
+ if nargs ~= 1 then
+ if nargs == 0 then
+ if g_opt.dumpdef > 0 then return dumpdef(stdout) end
+ end
+ opt_map.help()
+ end
+
+ -- Translate a single input file to a single output file
+ -- TODO: Handle multiple files?
+ translate(args[args.argn], g_opt.outfile)
+end
+
+------------------------------------------------------------------------------
+
+-- Add the directory dynasm.lua resides in to the Lua module search path.
+local arg = arg
+if arg and arg[0] then
+ local prefix = match(arg[0], "^(.*[/\\])")
+ if prefix then package.path = prefix.."?.lua;"..package.path end
+end
+
+-- Start DynASM.
+parseargs{...}
+
+------------------------------------------------------------------------------
+
diff --git a/etc/strict.lua b/etc/strict.lua
new file mode 100644
index 00000000..604619dd
--- /dev/null
+++ b/etc/strict.lua
@@ -0,0 +1,41 @@
+--
+-- strict.lua
+-- checks uses of undeclared global variables
+-- All global variables must be 'declared' through a regular assignment
+-- (even assigning nil will do) in a main chunk before being used
+-- anywhere or assigned to inside a function.
+--
+
+local getinfo, error, rawset, rawget = debug.getinfo, error, rawset, rawget
+
+local mt = getmetatable(_G)
+if mt == nil then
+ mt = {}
+ setmetatable(_G, mt)
+end
+
+mt.__declared = {}
+
+local function what ()
+ local d = getinfo(3, "S")
+ return d and d.what or "C"
+end
+
+mt.__newindex = function (t, n, v)
+ if not mt.__declared[n] then
+ local w = what()
+ if w ~= "main" and w ~= "C" then
+ error("assign to undeclared variable '"..n.."'", 2)
+ end
+ mt.__declared[n] = true
+ end
+ rawset(t, n, v)
+end
+
+mt.__index = function (t, n)
+ if not mt.__declared[n] and what() ~= "C" then
+ error("variable '"..n.."' is not declared", 2)
+ end
+ return rawget(t, n)
+end
+
diff --git a/lib/.gitignore b/lib/.gitignore
new file mode 100644
index 00000000..500e2855
--- /dev/null
+++ b/lib/.gitignore
@@ -0,0 +1 @@
+vmdef.lua
diff --git a/lib/bc.lua b/lib/bc.lua
new file mode 100644
index 00000000..532f2493
--- /dev/null
+++ b/lib/bc.lua
@@ -0,0 +1,182 @@
+----------------------------------------------------------------------------
+-- LuaJIT bytecode listing module.
+--
+-- Copyright (C) 2005-2009 Mike Pall. All rights reserved.
+-- Released under the MIT/X license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+--
+-- This module lists the bytecode of a Lua function. If it's loaded by -jbc
+-- it hooks into the parser and lists all functions of a chunk as they
+-- are parsed.
+--
+-- Example usage:
+--
+-- luajit -jbc -e 'local x=0; for i=1,1e6 do x=x+i end; print(x)'
+-- luajit -jbc=- foo.lua
+-- luajit -jbc=foo.list foo.lua
+--
+-- Default output is to stderr. To redirect the output to a file, pass a
+-- filename as an argument (use '-' for stdout) or set the environment
+-- variable LUAJIT_LISTFILE. The file is overwritten every time the module
+-- is started.
+--
+-- This module can also be used programmatically:
+--
+-- local bc = require("jit.bc")
+--
+-- local function foo() print("hello") end
+--
+-- bc.dump(foo) --> -- BYTECODE -- [...]
+-- print(bc.line(foo, 2)) --> 0002 KSTR 1 1 ; "hello"
+--
+-- local out = {
+-- -- Do something wich each line:
+-- write = function(t, ...) io.write(...) end,
+-- close = function(t) end,
+-- flush = function(t) end,
+-- }
+-- bc.dump(foo, out)
+--
+------------------------------------------------------------------------------
+
+-- Cache some library functions and objects.
+local jit = require("jit")
+assert(jit.version_num == 20000, "LuaJIT core/library version mismatch")
+local jutil = require("jit.util")
+local vmdef = require("jit.vmdef")
+local bit = require("bit")
+local sub, gsub, format = string.sub, string.gsub, string.format
+local byte, band, shr = string.byte, bit.band, bit.rshift
+local funcinfo, funcbc, funck = jutil.funcinfo, jutil.funcbc, jutil.funck
+local funcuvname = jutil.funcuvname
+local bcnames = vmdef.bcnames
+local stdout, stderr = io.stdout, io.stderr
+
+------------------------------------------------------------------------------
+
+local function ctlsub(c)
+ if c == "\n" then return "\\n"
+ elseif c == "\r" then return "\\r"
+ elseif c == "\t" then return "\\t"
+ elseif c == "\r" then return "\\r"
+ else return format("\\%03d", byte(c))
+ end
+end
+
+-- Return one bytecode line.
+local function bcline(func, pc, prefix)
+ local ins, m = funcbc(func, pc)
+ if not ins then return end
+ local ma, mb, mc = band(m, 7), band(m, 15*8), band(m, 15*128)
+ local a = band(shr(ins, 8), 0xff)
+ local oidx = 6*band(ins, 0xff)
+ local s = format("%04d %s %-6s %3s ",
+ pc, prefix or " ", sub(bcnames, oidx+1, oidx+6), ma == 0 and "" or a)
+ local d = shr(ins, 16)
+ if mc == 13*128 then -- BCMjump
+ if ma == 0 then
+ return format("%s=> %04d\n", sub(s, 1, -3), pc+d-0x7fff)
+ end
+ return format("%s=> %04d\n", s, pc+d-0x7fff)
+ end
+ if mb ~= 0 then d = band(d, 0xff) end
+ local kc
+ if mc == 10*128 then -- BCMstr
+ kc = funck(func, -d-1)
+ kc = format(#kc > 40 and '"%.40s"~' or '"%s"', gsub(kc, "%c", ctlsub))
+ elseif mc == 9*128 then -- BCMnum
+ kc = funck(func, d)
+ elseif mc == 12*128 then -- BCMfunc
+ local fi = funcinfo(funck(func, -d-1))
+ if fi.ffid then
+ kc = vmdef.ffnames[fi.ffid]
+ else
+ kc = fi.loc
+ end
+ elseif mc == 5*128 then -- BCMuv
+ kc = funcuvname(func, d)
+ end
+ if ma == 5 then -- BCMuv
+ local ka = funcuvname(func, a)
+ if kc then kc = ka.." ; "..kc else kc = ka end
+ end
+ if mb ~= 0 then
+ local b = shr(ins, 24)
+ if kc then return format("%s%3d %3d ; %s\n", s, b, d, kc) end
+ return format("%s%3d %3d\n", s, b, d)
+ end
+ if kc then return format("%s%3d ; %s\n", s, d, kc) end
+ if mc == 7*128 and d > 32767 then d = d - 65536 end -- BCMlits
+ return format("%s%3d\n", s, d)
+end
+
+-- Collect branch targets of a function.
+local function bctargets(func)
+ local target = {}
+ for pc=1,1000000000 do
+ local ins, m = funcbc(func, pc)
+ if not ins then break end
+ if band(m, 15*128) == 13*128 then target[pc+shr(ins, 16)-0x7fff] = true end
+ end
+ return target
+end
+
+-- Dump bytecode instructions of a function.
+local function bcdump(func, out)
+ if not out then out = stdout end
+ local fi = funcinfo(func)
+ out:write(format("-- BYTECODE -- %s-%d\n", fi.loc, fi.lastlinedefined))
+ local target = bctargets(func)
+ for pc=1,1000000000 do
+ local s = bcline(func, pc, target[pc] and "=>")
+ if not s then break end
+ out:write(s)
+ end
+ out:write("\n")
+ out:flush()
+end
+
+------------------------------------------------------------------------------
+
+-- Active flag and output file handle.
+local active, out
+
+-- List handler.
+local function h_list(func)
+ return bcdump(func, out)
+end
+
+-- Detach list handler.
+local function bclistoff()
+ if active then
+ active = false
+ jit.attach(h_list)
+ if out and out ~= stdout and out ~= stderr then out:close() end
+ out = nil
+ end
+end
+
+-- Open the output file and attach list handler.
+local function bcliston(outfile)
+ if active then bclistoff() end
+ if not outfile then outfile = os.getenv("LUAJIT_LISTFILE") end
+ if outfile then
+ out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
+ else
+ out = stderr
+ end
+ jit.attach(h_list, "bc")
+ active = true
+end
+
+-- Public module functions.
+module(...)
+
+line = bcline
+dump = bcdump
+targets = bctargets
+
+on = bcliston
+off = bclistoff
+start = bcliston -- For -j command line option.
+
diff --git a/lib/dis_x64.lua b/lib/dis_x64.lua
new file mode 100644
index 00000000..da3d63f8
--- /dev/null
+++ b/lib/dis_x64.lua
@@ -0,0 +1,19 @@
+----------------------------------------------------------------------------
+-- LuaJIT x64 disassembler wrapper module.
+--
+-- Copyright (C) 2005-2009 Mike Pall. All rights reserved.
+-- Released under the MIT/X license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This module just exports the 64 bit functions from the combined
+-- x86/x64 disassembler module. All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+local require = require
+
+module(...)
+
+local dis_x86 = require(_PACKAGE.."dis_x86")
+
+create = dis_x86.create64
+disass = dis_x86.disass64
+
diff --git a/lib/dis_x86.lua b/lib/dis_x86.lua
new file mode 100644
index 00000000..8f127bee
--- /dev/null
+++ b/lib/dis_x86.lua
@@ -0,0 +1,824 @@
+----------------------------------------------------------------------------
+-- LuaJIT x86/x64 disassembler module.
+--
+-- Copyright (C) 2005-2009 Mike Pall. All rights reserved.
+-- Released under the MIT/X license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This is a helper module used by the LuaJIT machine code dumper module.
+--
+-- Sending small code snippets to an external disassembler and mixing the
+-- output with our own stuff was too fragile. So I had to bite the bullet
+-- and write yet another x86 disassembler. Oh well ...
+--
+-- The output format is very similar to what ndisasm generates. But it has
+-- been developed independently by looking at the opcode tables from the
+-- Intel and AMD manuals. The supported instruction set is quite extensive
+-- and reflects what a current generation Intel or AMD CPU implements in
+-- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
+-- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM)
+-- instructions.
+--
+-- Notes:
+-- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
+-- * No attempt at optimization has been made -- it's fast enough for my needs.
+-- * The public API may change when more architectures are added.
+------------------------------------------------------------------------------
+
+local type = type
+local sub, byte, format = string.sub, string.byte, string.format
+local match, gmatch, gsub = string.match, string.gmatch, string.gsub
+local lower, rep = string.lower, string.rep
+
+-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
+local map_opc1_32 = {
+--0x
+[0]="addBmr","addVmr","addBrm","addVrm","addBai","addVai","push es","pop es",
+"orBmr","orVmr","orBrm","orVrm","orBai","orVai","push cs","opc2*",
+--1x
+"adcBmr","adcVmr","adcBrm","adcVrm","adcBai","adcVai","push ss","pop ss",
+"sbbBmr","sbbVmr","sbbBrm","sbbVrm","sbbBai","sbbVai","push ds","pop ds",
+--2x
+"andBmr","andVmr","andBrm","andVrm","andBai","andVai","es:seg","daa",
+"subBmr","subVmr","subBrm","subVrm","subBai","subVai","cs:seg","das",
+--3x
+"xorBmr","xorVmr","xorBrm","xorVrm","xorBai","xorVai","ss:seg","aaa",
+"cmpBmr","cmpVmr","cmpBrm","cmpVrm","cmpBai","cmpVai","ds:seg","aas",
+--4x
+"incVR","incVR","incVR","incVR","incVR","incVR","incVR","incVR",
+"decVR","decVR","decVR","decVR","decVR","decVR","decVR","decVR",
+--5x
+"pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR","pushUR",
+"popUR","popUR","popUR","popUR","popUR","popUR","popUR","popUR",
+--6x
+"sz*pushaw,pusha","sz*popaw,popa","boundVrm","arplWmr",
+"fs:seg","gs:seg","o16:","a16",
+"pushUi","imulVrmi","pushBs","imulVrms",
+"insb","insVS","outsb","outsVS",
+--7x
+"joBj","jnoBj","jbBj","jnbBj","jzBj","jnzBj","jbeBj","jaBj",
+"jsBj","jnsBj","jpeBj","jpoBj","jlBj","jgeBj","jleBj","jgBj",
+--8x
+"arith!Bmi","arith!Vmi","arith!Bmi","arith!Vms",
+"testBmr","testVmr","xchgBrm","xchgVrm",
+"movBmr","movVmr","movBrm","movVrm",
+"movVmg","leaVrm","movWgm","popUm",
+--9x
+"nop*xchgVaR|pause|xchgWaR|repne nop","xchgVaR","xchgVaR","xchgVaR",
+"xchgVaR","xchgVaR","xchgVaR","xchgVaR",
+"sz*cbw,cwde,cdqe","sz*cwd,cdq,cqo","call farViw","wait",
+"sz*pushfw,pushf","sz*popfw,popf","sahf","lahf",
+--Ax
+"movBao","movVao","movBoa","movVoa",
+"movsb","movsVS","cmpsb","cmpsVS",
+"testBai","testVai","stosb","stosVS",
+"lodsb","lodsVS","scasb","scasVS",
+--Bx
+"movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
+"movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
+--Cx
+"shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi",
+"enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
+--Dx
+"shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
+"fp*0","fp*1","fp*2","fp*3","fp*4","fp*5","fp*6","fp*7",
+--Ex
+"loopneBj","loopeBj","loopBj","sz*jcxzBj,jecxzBj,jrcxzBj",
+"inBau","inVau","outBua","outVua",
+"callVj","jmpVj","jmp farViw","jmpBj","inBad","inVad","outBda","outVda",
+--Fx
+"lock:","int1","repne:rep","rep:","hlt","cmc","testb!Bm","testv!Vm",
+"clc","stc","cli","sti","cld","std","incb!Bm","incd!Vm",
+}
+assert(#map_opc1_32 == 255)
+
+-- Map for 1st opcode byte in 64 bit mode (overrides only).
+local map_opc1_64 = setmetatable({
+ [0x06]=false, [0x07]=false, [0x0e]=false,
+ [0x16]=false, [0x17]=false, [0x1e]=false, [0x1f]=false,
+ [0x27]=false, [0x2f]=false, [0x37]=false, [0x3f]=false,
+ [0x60]=false, [0x61]=false, [0x62]=false, [0x63]="movsxdVrDmt", [0x67]="a32:",
+ [0x40]="rex*", [0x41]="rex*b", [0x42]="rex*x", [0x43]="rex*xb",
+ [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb",
+ [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb",
+ [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
+ [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false,
+ [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
+}, { __index = map_opc1_32 })
+
+-- Map for 2nd opcode byte (0F xx). True CISC hell. Hey, I told you.
+-- Prefix dependent MMX/SSE opcodes: (none)|rep|o16|repne, -|F3|66|F2
+local map_opc2 = {
+--0x
+[0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
+"invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
+--1x
+"movupsXrm|movssXrm|movupdXrm|movsdXrm",
+"movupsXmr|movssXmr|movupdXmr|movsdXmr",
+"movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
+"movlpsXmr||movlpdXmr",
+"unpcklpsXrm||unpcklpdXrm",
+"unpckhpsXrm||unpckhpdXrm",
+"movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
+"movhpsXmr||movhpdXmr",
+"$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
+"hintnopVm","hintnopVm","hintnopVm","hintnopVm",
+--2x
+"movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
+"movapsXrm||movapdXrm",
+"movapsXmr||movapdXmr",
+"cvtpi2psXrMm|cvtsi2ssXrVm|cvtpi2pdXrMm|cvtsi2sdXrVm",
+"movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
+"cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
+"cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
+"ucomissXrm||ucomisdXrm",
+"comissXrm||comisdXrm",
+--3x
+"wrmsr","rdtsc","rdmsr","rdpmc","sysenter","sysexit",nil,"getsec",
+"opc3*38",nil,"opc3*3a",nil,nil,nil,nil,nil,
+--4x
+"cmovoVrm","cmovnoVrm","cmovbVrm","cmovnbVrm",
+"cmovzVrm","cmovnzVrm","cmovbeVrm","cmovaVrm",
+"cmovsVrm","cmovnsVrm","cmovpeVrm","cmovpoVrm",
+"cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
+--5x
+"movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
+"rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm",
+"andpsXrm||andpdXrm","andnpsXrm||andnpdXrm",
+"orpsXrm||orpdXrm","xorpsXrm||xorpdXrm",
+"addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm",
+"cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm",
+"cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
+"subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm",
+"divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm",
+--6x
+"punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm",
+"pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm",
+"punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm",
+"||punpcklqdqXrm","||punpckhqdqXrm",
+"movPrVSm","movqMrm|movdquXrm|movdqaXrm",
+--7x
+"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu",
+"pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu",
+"pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|",
+"vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
+nil,nil,
+"||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm",
+"movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
+--8x
+"joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
+"jsVj","jnsVj","jpeVj","jpoVj","jlVj","jgeVj","jleVj","jgVj",
+--9x
+"setoBm","setnoBm","setbBm","setnbBm","setzBm","setnzBm","setbeBm","setaBm",
+"setsBm","setnsBm","setpeBm","setpoBm","setlBm","setgeBm","setleBm","setgBm",
+--Ax
+"push fs","pop fs","cpuid","btVmr","shldVmru","shldVmrc",nil,nil,
+"push gs","pop gs","rsm","btsVmr","shrdVmru","shrdVmrc","fxsave!Dmp","imulVrm",
+--Bx
+"cmpxchgBmr","cmpxchgVmr","$lssVrm","btrVmr",
+"$lfsVrm","$lgsVrm","movzxVrBmt","movzxVrWmt",
+"|popcntVrm","ud2Dp","bt!Vmu","btcVmr",
+"bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
+--Cx
+"xaddBmr","xaddVmr",
+"cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|",
+"pinsrwPrWmu","pextrwDrPmu",
+"shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp",
+"bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
+--Dx
+"||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm",
+"paddqPrm","pmullwPrm",
+"|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
+"psubusbPrm","psubuswPrm","pminubPrm","pandPrm",
+"paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm",
+--Ex
+"pavgbPrm","psrawPrm","psradPrm","pavgwPrm",
+"pmulhuwPrm","pmulhwPrm",
+"|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
+"psubsbPrm","psubswPrm","pminswPrm","porPrm",
+"paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm",
+--Fx
+"|||lddquXrm","psllwPrm","pslldPrm","psllqPrm",
+"pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$",
+"psubbPrm","psubwPrm","psubdPrm","psubqPrm",
+"paddbPrm","paddwPrm","padddPrm","ud",
+}
+assert(map_opc2[255] == "ud")
+
+-- Map for three-byte opcodes. Can't wait for their next invention.
+local map_opc3 = {
+["38"] = { -- [66] 0f 38 xx
+--0x
+[0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm",
+"pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm",
+"psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm",
+nil,nil,nil,nil,
+--1x
+"||pblendvbXrma",nil,nil,nil,
+"||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm",
+nil,nil,nil,nil,
+"pabsbPrm","pabswPrm","pabsdPrm",nil,
+--2x
+"||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
+"||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
+"||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm",
+nil,nil,nil,nil,
+--3x
+"||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
+"||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm",
+"||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm",
+"||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm",
+--4x
+"||pmulddXrm","||phminposuwXrm",
+--Fx
+[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
+},
+
+["3a"] = { -- [66] 0f 3a xx
+--0x
+[0x00]=nil,nil,nil,nil,nil,nil,nil,nil,
+"||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu",
+"||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu",
+--1x
+nil,nil,nil,nil,
+"||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
+nil,nil,nil,nil,nil,nil,nil,nil,
+--2x
+"||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil,
+--4x
+[0x40] = "||dppsXrmu",
+[0x41] = "||dppdXrmu",
+[0x42] = "||mpsadbwXrmu",
+--6x
+[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
+[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
+},
+}
+
+-- Map for VMX/SVM opcodes 0F 01 C0-FF (sgdt group with register operands).
+local map_opcvm = {
+[0xc1]="vmcall",[0xc2]="vmlaunch",[0xc3]="vmresume",[0xc4]="vmxoff",
+[0xc8]="monitor",[0xc9]="mwait",
+[0xd8]="vmrun",[0xd9]="vmmcall",[0xda]="vmload",[0xdb]="vmsave",
+[0xdc]="stgi",[0xdd]="clgi",[0xde]="skinit",[0xdf]="invlpga",
+[0xf8]="swapgs",[0xf9]="rdtscp",
+}
+
+-- Map for FP opcodes. And you thought stack machines are simple?
+local map_opcfp = {
+-- D8-DF 00-BF: opcodes with a memory operand.
+-- D8
+[0]="faddFm","fmulFm","fcomFm","fcompFm","fsubFm","fsubrFm","fdivFm","fdivrFm",
+"fldFm",nil,"fstFm","fstpFm","fldenvVm","fldcwWm","fnstenvVm","fnstcwWm",
+-- DA
+"fiaddDm","fimulDm","ficomDm","ficompDm",
+"fisubDm","fisubrDm","fidivDm","fidivrDm",
+-- DB
+"fildDm","fisttpDm","fistDm","fistpDm",nil,"fld twordFmp",nil,"fstp twordFmp",
+-- DC
+"faddGm","fmulGm","fcomGm","fcompGm","fsubGm","fsubrGm","fdivGm","fdivrGm",
+-- DD
+"fldGm","fisttpQm","fstGm","fstpGm","frstorDmp",nil,"fnsaveDmp","fnstswWm",
+-- DE
+"fiaddWm","fimulWm","ficomWm","ficompWm",
+"fisubWm","fisubrWm","fidivWm","fidivrWm",
+-- DF
+"fildWm","fisttpWm","fistWm","fistpWm",
+"fbld twordFmp","fildQm","fbstp twordFmp","fistpQm",
+-- xx C0-FF: opcodes with a pseudo-register operand.
+-- D8
+"faddFf","fmulFf","fcomFf","fcompFf","fsubFf","fsubrFf","fdivFf","fdivrFf",
+-- D9
+"fldFf","fxchFf",{"fnop"},nil,
+{"fchs","fabs",nil,nil,"ftst","fxam"},
+{"fld1","fldl2t","fldl2e","fldpi","fldlg2","fldln2","fldz"},
+{"f2xm1","fyl2x","fptan","fpatan","fxtract","fprem1","fdecstp","fincstp"},
+{"fprem","fyl2xp1","fsqrt","fsincos","frndint","fscale","fsin","fcos"},
+-- DA
+"fcmovbFf","fcmoveFf","fcmovbeFf","fcmovuFf",nil,{nil,"fucompp"},nil,nil,
+-- DB
+"fcmovnbFf","fcmovneFf","fcmovnbeFf","fcmovnuFf",
+{nil,nil,"fnclex","fninit"},"fucomiFf","fcomiFf",nil,
+-- DC
+"fadd toFf","fmul toFf",nil,nil,
+"fsub toFf","fsubr toFf","fdivr toFf","fdiv toFf",
+-- DD
+"ffreeFf",nil,"fstFf","fstpFf","fucomFf","fucompFf",nil,nil,
+-- DE
+"faddpFf","fmulpFf",nil,{nil,"fcompp"},
+"fsubrpFf","fsubpFf","fdivrpFf","fdivpFf",
+-- DF
+nil,nil,nil,nil,{"fnstsw ax"},"fucomipFf","fcomipFf",nil,
+}
+assert(map_opcfp[126] == "fcomipFf")
+
+-- Map for opcode groups. The subkey is sp from the ModRM byte.
+local map_opcgroup = {
+ arith = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" },
+ shift = { "rol", "ror", "rcl", "rcr", "shl", "shr", "sal", "sar" },
+ testb = { "testBmi", "testBmi", "not", "neg", "mul", "imul", "div", "idiv" },
+ testv = { "testVmi", "testVmi", "not", "neg", "mul", "imul", "div", "idiv" },
+ incb = { "inc", "dec" },
+ incd = { "inc", "dec", "callDmp", "$call farDmp",
+ "jmpDmp", "$jmp farDmp", "pushUm" },
+ sldt = { "sldt", "str", "lldt", "ltr", "verr", "verw" },
+ sgdt = { "vm*$sgdt", "vm*$sidt", "$lgdt", "vm*$lidt",
+ "smsw", nil, "lmsw", "vm*$invlpg" },
+ bt = { nil, nil, nil, nil, "bt", "bts", "btr", "btc" },
+ cmpxchg = { nil, "sz*,cmpxchg8bQmp,cmpxchg16bXmp", nil, nil,
+ nil, nil, "vmptrld|vmxon|vmclear", "vmptrst" },
+ pshiftw = { nil, nil, "psrlw", nil, "psraw", nil, "psllw" },
+ pshiftd = { nil, nil, "psrld", nil, "psrad", nil, "pslld" },
+ pshiftq = { nil, nil, "psrlq", nil, nil, nil, "psllq" },
+ pshiftdq = { nil, nil, "psrlq", "psrldq", nil, nil, "psllq", "pslldq" },
+ fxsave = { "$fxsave", "$fxrstor", "$ldmxcsr", "$stmxcsr",
+ nil, "lfenceDp$", "mfenceDp$", "sfenceDp$clflush" },
+ prefetch = { "prefetch", "prefetchw" },
+ prefetcht = { "prefetchnta", "prefetcht0", "prefetcht1", "prefetcht2" },
+}
+
+------------------------------------------------------------------------------
+
+-- Maps for register names.
+local map_regs = {
+ B = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
+ "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" },
+ B64 = { "al", "cl", "dl", "bl", "spl", "bpl", "sil", "dil",
+ "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b" },
+ W = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di",
+ "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w" },
+ D = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
+ "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d" },
+ Q = { "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi",
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" },
+ M = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
+ "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
+ X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
+ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
+}
+local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }
+
+-- Maps for size names.
+local map_sz2n = {
+ B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16,
+}
+local map_sz2prefix = {
+ B = "byte", W = "word", D = "dword",
+ Q = "qword",
+ M = "qword", X = "xword",
+ F = "dword", G = "qword", -- No need for sizes/register names for these two.
+}
+
+------------------------------------------------------------------------------
+
+-- Output a nicely formatted line with an opcode and operands.
+local function putop(ctx, text, operands)
+ local code, pos, hex = ctx.code, ctx.pos, ""
+ local hmax = ctx.hexdump
+ if hmax > 0 then
+ for i=ctx.start,pos-1 do
+ hex = hex..format("%02X", byte(code, i, i))
+ end
+ if #hex > hmax then hex = sub(hex, 1, hmax)..". "
+ else hex = hex..rep(" ", hmax-#hex+2) end
+ end
+ if operands then text = text.." "..operands end
+ if ctx.o16 then text = "o16 "..text; ctx.o16 = false end
+ if ctx.a32 then text = "a32 "..text; ctx.a32 = false end
+ if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end
+ if ctx.rex then
+ local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "")..
+ (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")
+ if t ~= "" then text = "rex."..t.." "..text end
+ ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
+ ctx.rex = false
+ end
+ if ctx.seg then
+ local text2, n = gsub(text, "%[", "["..ctx.seg..":")
+ if n == 0 then text = ctx.seg.." "..text else text = text2 end
+ ctx.seg = false
+ end
+ if ctx.lock then text = "lock "..text; ctx.lock = false end
+ local imm = ctx.imm
+ if imm then
+ local sym = ctx.symtab[imm]
+ if sym then text = text.."\t->"..sym end
+ end
+ ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text))
+ ctx.mrm = false
+ ctx.start = pos
+ ctx.imm = nil
+end
+
+-- Clear all prefix flags.
+local function clearprefixes(ctx)
+ ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false
+ ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
+ ctx.rex = false; ctx.a32 = false
+end
+
+-- Fallback for incomplete opcodes at the end.
+local function incomplete(ctx)
+ ctx.pos = ctx.stop+1
+ clearprefixes(ctx)
+ return putop(ctx, "(incomplete)")
+end
+
+-- Fallback for unknown opcodes.
+local function unknown(ctx)
+ clearprefixes(ctx)
+ return putop(ctx, "(unknown)")
+end
+
+-- Return an immediate of the specified size.
+local function getimm(ctx, pos, n)
+ if pos+n-1 > ctx.stop then return incomplete(ctx) end
+ local code = ctx.code
+ if n == 1 then
+ local b1 = byte(code, pos, pos)
+ return b1
+ elseif n == 2 then
+ local b1, b2 = byte(code, pos, pos+1)
+ return b1+b2*256
+ else
+ local b1, b2, b3, b4 = byte(code, pos, pos+3)
+ local imm = b1+b2*256+b3*65536+b4*16777216
+ ctx.imm = imm
+ return imm
+ end
+end
+
+-- Process pattern string and generate the operands.
+local function putpat(ctx, name, pat)
+ local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
+ local code, pos, stop = ctx.code, ctx.pos, ctx.stop
+
+ -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz
+ for p in gmatch(pat, ".") do
+ local x = nil
+ if p == "V" or p == "U" then
+ if ctx.rexw then sz = "Q"; ctx.rexw = false
+ elseif ctx.o16 then sz = "W"; ctx.o16 = false
+ elseif p == "U" and ctx.x64 then sz = "Q"
+ else sz = "D" end
+ regs = map_regs[sz]
+ elseif p == "T" then
+ if ctx.rexw then sz = "Q"; ctx.rexw = false else sz = "D" end
+ regs = map_regs[sz]
+ elseif p == "B" then
+ sz = "B"
+ regs = ctx.rex and map_regs.B64 or map_regs.B
+ elseif match(p, "[WDQMXFG]") then
+ sz = p
+ regs = map_regs[sz]
+ elseif p == "P" then
+ sz = ctx.o16 and "X" or "M"; ctx.o16 = false
+ regs = map_regs[sz]
+ elseif p == "S" then
+ name = name..lower(sz)
+ elseif p == "s" then
+ local imm = getimm(ctx, pos, 1); if not imm then return end
+ x = imm <= 127 and format("+0x%02x", imm)
+ or format("-0x%02x", 256-imm)
+ pos = pos+1
+ elseif p == "u" then
+ local imm = getimm(ctx, pos, 1); if not imm then return end
+ x = format("0x%02x", imm)
+ pos = pos+1
+ elseif p == "w" then
+ local imm = getimm(ctx, pos, 2); if not imm then return end
+ x = format("0x%x", imm)
+ pos = pos+2
+ elseif p == "o" then -- [offset]
+ if ctx.x64 then
+ local imm1 = getimm(ctx, pos, 4); if not imm1 then return end
+ local imm2 = getimm(ctx, pos+4, 4); if not imm2 then return end
+ x = format("[0x%08x%08x]", imm2, imm1)
+ pos = pos+8
+ else
+ local imm = getimm(ctx, pos, 4); if not imm then return end
+ x = format("[0x%08x]", imm)
+ pos = pos+4
+ end
+ elseif p == "i" or p == "I" then
+ local n = map_sz2n[sz]
+ if n == 8 and ctx.x64 and p == "I" then
+ local imm1 = getimm(ctx, pos, 4); if not imm1 then return end
+ local imm2 = getimm(ctx, pos+4, 4); if not imm2 then return end
+ x = format("0x%08x%08x", imm2, imm1)
+ else
+ if n == 8 then n = 4 end
+ local imm = getimm(ctx, pos, n); if not imm then return end
+ if sz == "Q" and (imm < 0 or imm > 0x7fffffff) then
+ imm = (0xffffffff+1)-imm
+ x = format(imm > 65535 and "-0x%08x" or "-0x%x", imm)
+ else
+ x = format(imm > 65535 and "0x%08x" or "0x%x", imm)
+ end
+ end
+ pos = pos+n
+ elseif p == "j" then
+ local n = map_sz2n[sz]
+ if n == 8 then n = 4 end
+ local imm = getimm(ctx, pos, n); if not imm then return end
+ if sz == "B" and imm > 127 then imm = imm-256
+ elseif imm > 2147483647 then imm = imm-4294967296 end
+ pos = pos+n
+ imm = imm + pos + ctx.addr
+ if imm > 4294967295 and not ctx.x64 then imm = imm-4294967296 end
+ ctx.imm = imm
+ if sz == "W" then
+ x = format("word 0x%04x", imm%65536)
+ elseif ctx.x64 then
+ local lo = imm % 0x1000000
+ x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
+ else
+ x = format("0x%08x", imm)
+ end
+ elseif p == "R" then
+ local r = byte(code, pos-1, pos-1)%8
+ if ctx.rexb then r = r + 8; ctx.rexb = false end
+ x = regs[r+1]
+ elseif p == "a" then x = regs[1]
+ elseif p == "c" then x = "cl"
+ elseif p == "d" then x = "dx"
+ elseif p == "1" then x = "1"
+ else
+ if not mode then
+ mode = ctx.mrm
+ if not mode then
+ if pos > stop then return incomplete(ctx) end
+ mode = byte(code, pos, pos)
+ pos = pos+1
+ end
+ rm = mode%8; mode = (mode-rm)/8
+ sp = mode%8; mode = (mode-sp)/8
+ sdisp = ""
+ if mode < 3 then
+ if rm == 4 then
+ if pos > stop then return incomplete(ctx) end
+ sc = byte(code, pos, pos)
+ pos = pos+1
+ rm = sc%8; sc = (sc-rm)/8
+ rx = sc%8; sc = (sc-rx)/8
+ if ctx.rexx then rx = rx + 8; ctx.rexx = false end
+ if rx == 4 then rx = nil end
+ end
+ if mode > 0 or rm == 5 then
+ local dsz = mode
+ if dsz ~= 1 then dsz = 4 end
+ local disp = getimm(ctx, pos, dsz); if not disp then return end
+ if mode == 0 then rm = nil end
+ if rm or rx or (not sc and ctx.x64 and not ctx.a32) then
+ if dsz == 1 and disp > 127 then
+ sdisp = format("-0x%x", 256-disp)
+ elseif disp >= 0 and disp <= 0x7fffffff then
+ sdisp = format("+0x%x", disp)
+ else
+ sdisp = format("-0x%x", (0xffffffff+1)-disp)
+ end
+ else
+ sdisp = format(ctx.x64 and not ctx.a32 and
+ not (disp >= 0 and disp <= 0x7fffffff)
+ and "0xffffffff%08x" or "0x%08x", disp)
+ end
+ pos = pos+dsz
+ end
+ end
+ if rm and ctx.rexb then rm = rm + 8; ctx.rexb = false end
+ if ctx.rexr then sp = sp + 8; ctx.rexr = false end
+ end
+ if p == "m" then
+ if mode == 3 then x = regs[rm+1]
+ else
+ local aregs = ctx.a32 and map_regs.D or ctx.aregs
+ local srm, srx = "", ""
+ if rm then srm = aregs[rm+1]
+ elseif not sc and ctx.x64 and not ctx.a32 then srm = "rip" end
+ ctx.a32 = false
+ if rx then
+ if rm then srm = srm.."+" end
+ srx = aregs[rx+1]
+ if sc > 0 then srx = srx.."*"..(2^sc) end
+ end
+ x = format("[%s%s%s]", srm, srx, sdisp)
+ end
+ if mode < 3 and
+ (not match(pat, "[aRrgp]") or match(pat, "t")) then -- Yuck.
+ x = map_sz2prefix[sz].." "..x
+ end
+ elseif p == "r" then x = regs[sp+1]
+ elseif p == "g" then x = map_segregs[sp+1]
+ elseif p == "p" then -- Suppress prefix.
+ elseif p == "f" then x = "st"..rm
+ elseif p == "x" then
+ if sp == 0 and ctx.lock and not ctx.x64 then
+ x = "CR8"; ctx.lock = false
+ else
+ x = "CR"..sp
+ end
+ elseif p == "y" then x = "DR"..sp
+ elseif p == "z" then x = "TR"..sp
+ elseif p == "t" then
+ else
+ error("bad pattern `"..pat.."'")
+ end
+ end
+ if x then operands = operands and operands..", "..x or x end
+ end
+ ctx.pos = pos
+ return putop(ctx, name, operands)
+end
+
+-- Forward declaration.
+local map_act
+
+-- Fetch and cache MRM byte.
+local function getmrm(ctx)
+ local mrm = ctx.mrm
+ if not mrm then
+ local pos = ctx.pos
+ if pos > ctx.stop then return nil end
+ mrm = byte(ctx.code, pos, pos)
+ ctx.pos = pos+1
+ ctx.mrm = mrm
+ end
+ return mrm
+end
+
+-- Dispatch to handler depending on pattern.
+local function dispatch(ctx, opat, patgrp)
+ if not opat then return unknown(ctx) end
+ if match(opat, "%|") then -- MMX/SSE variants depending on prefix.
+ local p
+ if ctx.rep then
+ p = ctx.rep=="rep" and "%|([^%|]*)" or "%|[^%|]*%|[^%|]*%|([^%|]*)"
+ ctx.rep = false
+ elseif ctx.o16 then p = "%|[^%|]*%|([^%|]*)"; ctx.o16 = false
+ else p = "^[^%|]*" end
+ opat = match(opat, p)
+ if not opat then return unknown(ctx) end
+-- ctx.rep = false; ctx.o16 = false
+ --XXX fails for 66 f2 0f 38 f1 06 crc32 eax,WORD PTR [esi]
+ --XXX remove in branches?
+ end
+ if match(opat, "%$") then -- reg$mem variants.
+ local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end
+ opat = match(opat, mrm >= 192 and "^[^%$]*" or "%$(.*)")
+ if opat == "" then return unknown(ctx) end
+ end
+ if opat == "" then return unknown(ctx) end
+ local name, pat = match(opat, "^([a-z0-9 ]*)(.*)")
+ if pat == "" and patgrp then pat = patgrp end
+ return map_act[sub(pat, 1, 1)](ctx, name, pat)
+end
+
+-- Get a pattern from an opcode map and dispatch to handler.
+local function dispatchmap(ctx, opcmap)
+ local pos = ctx.pos
+ local opat = opcmap[byte(ctx.code, pos, pos)]
+ pos = pos + 1
+ ctx.pos = pos
+ return dispatch(ctx, opat)
+end
+
+-- Map for action codes. The key is the first char after the name.
+map_act = {
+ -- Simple opcodes without operands.
+ [""] = function(ctx, name, pat)
+ return putop(ctx, name)
+ end,
+
+ -- Operand size chars fall right through.
+ B = putpat, W = putpat, D = putpat, Q = putpat,
+ V = putpat, U = putpat, T = putpat,
+ M = putpat, X = putpat, P = putpat,
+ F = putpat, G = putpat,
+
+ -- Collect prefixes.
+ [":"] = function(ctx, name, pat)
+ ctx[pat == ":" and name or sub(pat, 2)] = name
+ if ctx.pos - ctx.start > 5 then return unknown(ctx) end -- Limit #prefixes.
+ end,
+
+ -- Chain to special handler specified by name.
+ ["*"] = function(ctx, name, pat)
+ return map_act[name](ctx, name, sub(pat, 2))
+ end,
+
+ -- Use named subtable for opcode group.
+ ["!"] = function(ctx, name, pat)
+ local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end
+ return dispatch(ctx, map_opcgroup[name][((mrm-(mrm%8))/8)%8+1], sub(pat, 2))
+ end,
+
+ -- o16,o32[,o64] variants.
+ sz = function(ctx, name, pat)
+ if ctx.o16 then ctx.o16 = false
+ else
+ pat = match(pat, ",(.*)")
+ if ctx.rexw then
+ local p = match(pat, ",(.*)")
+ if p then pat = p; ctx.rexw = false end
+ end
+ end
+ pat = match(pat, "^[^,]*")
+ return dispatch(ctx, pat)
+ end,
+
+ -- Two-byte opcode dispatch.
+ opc2 = function(ctx, name, pat)
+ return dispatchmap(ctx, map_opc2)
+ end,
+
+ -- Three-byte opcode dispatch.
+ opc3 = function(ctx, name, pat)
+ return dispatchmap(ctx, map_opc3[pat])
+ end,
+
+ -- VMX/SVM dispatch.
+ vm = function(ctx, name, pat)
+ return dispatch(ctx, map_opcvm[ctx.mrm])
+ end,
+
+ -- Floating point opcode dispatch.
+ fp = function(ctx, name, pat)
+ local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end
+ local rm = mrm%8
+ local idx = pat*8 + ((mrm-rm)/8)%8
+ if mrm >= 192 then idx = idx + 64 end
+ local opat = map_opcfp[idx]
+ if type(opat) == "table" then opat = opat[rm+1] end
+ return dispatch(ctx, opat)
+ end,
+
+ -- REX prefix.
+ rex = function(ctx, name, pat)
+ if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed.
+ for p in gmatch(pat, ".") do ctx["rex"..p] = true end
+ ctx.rex = true
+ end,
+
+ -- Special case for nop with REX prefix.
+ nop = function(ctx, name, pat)
+ return dispatch(ctx, ctx.rex and pat or "nop")
+ end,
+}
+
+------------------------------------------------------------------------------
+
+-- Disassemble a block of code.
+local function disass_block(ctx, ofs, len)
+ if not ofs then ofs = 0 end
+ local stop = len and ofs+len or #ctx.code
+ ofs = ofs + 1
+ ctx.start = ofs
+ ctx.pos = ofs
+ ctx.stop = stop
+ ctx.imm = nil
+ ctx.mrm = false
+ clearprefixes(ctx)
+ while ctx.pos <= stop do dispatchmap(ctx, ctx.map1) end
+ if ctx.pos ~= ctx.start then incomplete(ctx) end
+end
+
+-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
+local function create_(code, addr, out)
+ local ctx = {}
+ ctx.code = code
+ ctx.addr = (addr or 0) - 1
+ ctx.out = out or io.write
+ ctx.symtab = {}
+ ctx.disass = disass_block
+ ctx.hexdump = 16
+ ctx.x64 = false
+ ctx.map1 = map_opc1_32
+ ctx.aregs = map_regs.D
+ return ctx
+end
+
+local function create64_(code, addr, out)
+ local ctx = create_(code, addr, out)
+ ctx.x64 = true
+ ctx.map1 = map_opc1_64
+ ctx.aregs = map_regs.Q
+ return ctx
+end
+
+-- Simple API: disassemble code (a string) at address and output via out.
+local function disass_(code, addr, out)
+ create_(code, addr, out):disass()
+end
+
+local function disass64_(code, addr, out)
+ create64_(code, addr, out):disass()
+end
+
+
+-- Public module functions.
+module(...)
+
+create = create_
+create64 = create64_
+disass = disass_
+disass64 = disass64_
+
diff --git a/lib/dump.lua b/lib/dump.lua
new file mode 100644
index 00000000..9fde87c1
--- /dev/null
+++ b/lib/dump.lua
@@ -0,0 +1,567 @@
+----------------------------------------------------------------------------
+-- LuaJIT compiler dump module.
+--
+-- Copyright (C) 2005-2009 Mike Pall. All rights reserved.
+-- Released under the MIT/X license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+--
+-- This module can be used to debug the JIT compiler itself. It dumps the
+-- code representations and structures used in various compiler stages.
+--
+-- Example usage:
+--
+-- luajit -jdump -e "local x=0; for i=1,1e6 do x=x+i end; print(x)"
+-- luajit -jdump=im -e "for i=1,1000 do for j=1,1000 do end end" | less -R
+-- luajit -jdump=is myapp.lua | less -R
+-- luajit -jdump=-b myapp.lua
+-- luajit -jdump=+aH,myapp.html myapp.lua
+-- luajit -jdump=ixT,myapp.dump myapp.lua
+--
+-- The first argument specifies the dump mode. The second argument gives
+-- the output file name. Default output is to stdout, unless the environment
+-- variable LUAJIT_DUMPFILE is set. The file is overwritten every time the
+-- module is started.
+--
+-- Different features can be turned on or off with the dump mode. If the
+-- mode starts with a '+', the following features are added to the default
+-- set of features; a '-' removes them. Otherwise the features are replaced.
+--
+-- The following dump features are available (* marks the default):
+--
+-- * t Print a line for each started, ended or aborted trace (see also -jv).
+-- * b Dump the traced bytecode.
+-- * i Dump the IR (intermediate representation).
+-- r Augment the IR with register/stack slots.
+-- s Dump the snapshot map.
+-- * m Dump the generated machine code.
+-- x Print each taken trace exit.
+-- X Print each taken trace exit and the contents of all registers.
+--
+-- The output format can be set with the following characters:
+--
+-- T Plain text output.
+-- A ANSI-colored text output
+-- H Colorized HTML + CSS output.
+--
+-- The default output format is plain text. It's set to ANSI-colored text
+-- if the COLORTERM variable is set. Note: this is independent of any output
+-- redirection, which is actually considered a feature.
+--
+-- You probably want to use less -R to enjoy viewing ANSI-colored text from
+-- a pipe or a file. Add this to your ~/.bashrc: export LESS="-R"
+--
+------------------------------------------------------------------------------
+
+-- Cache some library functions and objects.
+local jit = require("jit")
+assert(jit.version_num == 20000, "LuaJIT core/library version mismatch")
+local jutil = require("jit.util")
+local vmdef = require("jit.vmdef")
+local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
+local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
+local tracemc, traceexitstub = jutil.tracemc, jutil.traceexitstub
+local tracesnap = jutil.tracesnap
+local bit = require("bit")
+local band, shl, shr = bit.band, bit.lshift, bit.rshift
+local sub, gsub, format = string.sub, string.gsub, string.format
+local byte, char, rep = string.byte, string.char, string.rep
+local type, tostring = type, tostring
+local stdout, stderr = io.stdout, io.stderr
+
+-- Load other modules on-demand.
+local bcline, discreate
+
+-- Active flag, output file handle and dump mode.
+local active, out, dumpmode
+
+------------------------------------------------------------------------------
+
+local symtab = {}
+local nexitsym = 0
+
+-- Fill symbol table with trace exit addresses.
+local function fillsymtab(nexit)
+ local t = symtab
+ if nexit > nexitsym then
+ for i=nexitsym,nexit-1 do t[traceexitstub(i)] = tostring(i) end
+ nexitsym = nexit
+ end
+ return t
+end
+
+local function dumpwrite(s)
+ out:write(s)
+end
+
+-- Disassemble machine code.
+local function dump_mcode(tr)
+ local info = traceinfo(tr)
+ if not info then return end
+ local mcode, addr, loop = tracemc(tr)
+ if not mcode then return end
+ if not discreate then
+ discreate = require("jit.dis_"..jit.arch).create
+ end
+ out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
+ local ctx = discreate(mcode, addr, dumpwrite)
+ ctx.hexdump = 0
+ ctx.symtab = fillsymtab(info.nexit)
+ if loop ~= 0 then
+ symtab[addr+loop] = "LOOP"
+ ctx:disass(0, loop)
+ out:write("->LOOP:\n")
+ ctx:disass(loop, #mcode-loop)
+ symtab[addr+loop] = nil
+ else
+ ctx:disass(0, #mcode)
+ end
+end
+
+------------------------------------------------------------------------------
+
+local irtype_text = {
+ [0] = "nil",
+ "fal",
+ "tru",
+ "lud",
+ "str",
+ "ptr",
+ "thr",
+ "pro",
+ "fun",
+ "t09",
+ "tab",
+ "udt",
+ "num",
+ "int",
+ "i8 ",
+ "u8 ",
+ "i16",
+ "u16",
+}
+
+local colortype_ansi = {
+ [0] = "%s",
+ "%s",
+ "%s",
+ "%s",
+ "\027[32m%s\027[m",
+ "%s",
+ "\027[1m%s\027[m",
+ "%s",
+ "\027[1m%s\027[m",
+ "%s",
+ "\027[31m%s\027[m",
+ "\027[36m%s\027[m",
+ "\027[34m%s\027[m",
+ "\027[35m%s\027[m",
+ "\027[35m%s\027[m",
+ "\027[35m%s\027[m",
+ "\027[35m%s\027[m",
+ "\027[35m%s\027[m",
+}
+
+local function colorize_text(s, t)
+ return s
+end
+
+local function colorize_ansi(s, t)
+ return format(colortype_ansi[t], s)
+end
+
+local irtype_ansi = setmetatable({},
+ { __index = function(tab, t)
+ local s = colorize_ansi(irtype_text[t], t); tab[t] = s; return s; end })
+
+local html_escape = { ["<"] = "&lt;", [">"] = "&gt;", ["&"] = "&amp;", }
+
+local function colorize_html(s, t)
+ s = gsub(s, "[<>&]", html_escape)
+ return format('<span class="irt_%s">%s</span>', irtype_text[t], s)
+end
+
+local irtype_html = setmetatable({},
+ { __index = function(tab, t)
+ local s = colorize_html(irtype_text[t], t); tab[t] = s; return s; end })
+
+local header_html = [[
+<style type="text/css">
+background { background: #ffffff; color: #000000; }
+pre.ljdump {
+font-size: 10pt;
+background: #f0f4ff;
+color: #000000;
+border: 1px solid #bfcfff;
+padding: 0.5em;
+margin-left: 2em;
+margin-right: 2em;
+}
+span.irt_str { color: #00a000; }
+span.irt_thr, span.irt_fun { color: #404040; font-weight: bold; }
+span.irt_tab { color: #c00000; }
+span.irt_udt { color: #00c0c0; }
+span.irt_num { color: #0000c0; }
+span.irt_int { color: #c000c0; }
+</style>
+]]
+
+local colorize, irtype
+
+-- Lookup table to convert some literals into names.
+local litname = {
+ ["SLOAD "] = { [0] = "", "I", "R", "RI", "P", "PI", "PR", "PRI", },
+ ["XLOAD "] = { [0] = "", "unaligned", },
+ ["TOINT "] = { [0] = "check", "index", "", },
+ ["FLOAD "] = vmdef.irfield,
+ ["FREF "] = vmdef.irfield,
+ ["FPMATH"] = vmdef.irfpm,
+}
+
+local function ctlsub(c)
+ if c == "\n" then return "\\n"
+ elseif c == "\r" then return "\\r"
+ elseif c == "\t" then return "\\t"
+ elseif c == "\r" then return "\\r"
+ else return format("\\%03d", byte(c))
+ end
+end
+
+local function formatk(tr, idx)
+ local k, t, slot = tracek(tr, idx)
+ local tn = type(k)
+ local s
+ if tn == "number" then
+ if k == 2^52+2^51 then
+ s = "bias"
+ else
+ s = format("%+.14g", k)
+ end
+ elseif tn == "string" then
+ s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub))
+ elseif tn == "function" then
+ local fi = funcinfo(k)
+ if fi.ffid then
+ s = vmdef.ffnames[fi.ffid]
+ else
+ s = fi.loc
+ end
+ elseif tn == "table" then
+ s = format("{%p}", k)
+ elseif tn == "userdata" then
+ if t == 11 then
+ s = format("userdata:%p", k)
+ else
+ s = format("[%p]", k)
+ if s == "[0x00000000]" then s = "NULL" end
+ end
+ else
+ s = tostring(k) -- For primitives.
+ end
+ s = colorize(format("%-4s", s), t)
+ if slot then
+ s = format("%s @%d", s, slot)
+ end
+ return s
+end
+
+local function printsnap(tr, snap)
+ for i=1,#snap do
+ local ref = snap[i]
+ if not ref then
+ out:write("---- ")
+ elseif ref < 0 then
+ out:write(formatk(tr, ref), " ")
+ else
+ local m, ot, op1, op2 = traceir(tr, ref)
+ local t = band(ot, 15)
+ local sep = " "
+ if t == 8 then
+ local oidx = 6*shr(ot, 8)
+ local op = sub(vmdef.irnames, oidx+1, oidx+6)
+ if op == "FRAME " then
+ sep = "|"
+ end
+ end
+ out:write(colorize(format("%04d", ref), t), sep)
+ end
+ end
+ out:write("]\n")
+end
+
+-- Dump snapshots (not interleaved with IR).
+local function dump_snap(tr)
+ out:write("---- TRACE ", tr, " snapshots\n")
+ for i=0,1000000000 do
+ local snap = tracesnap(tr, i)
+ if not snap then break end
+ out:write(format("#%-3d %04d [ ", i, snap[0]))
+ printsnap(tr, snap)
+ end
+end
+
+-- NYI: should really get the register map from the disassembler.
+local reg_map = {
+ [0] = "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
+}
+
+-- Return a register name or stack slot for a rid/sp location.
+local function ridsp_name(ridsp)
+ local rid = band(ridsp, 0xff)
+ if ridsp > 255 then return format("[%x]", shr(ridsp, 8)*4) end
+ if rid < 128 then return reg_map[rid] end
+ return ""
+end
+
+-- Dump IR and interleaved snapshots.
+local function dump_ir(tr, dumpsnap, dumpreg)
+ local info = traceinfo(tr)
+ if not info then return end
+ local nins = info.nins
+ out:write("---- TRACE ", tr, " IR\n")
+ local irnames = vmdef.irnames
+ local snapref = 65536
+ local snap, snapno
+ if dumpsnap then
+ snap = tracesnap(tr, 0)
+ snapref = snap[0]
+ snapno = 0
+ end
+ for ins=1,nins do
+ if ins >= snapref then
+ if dumpreg then
+ out:write(format(".... SNAP #%-3d [ ", snapno))
+ else
+ out:write(format(".... SNAP #%-3d [ ", snapno))
+ end
+ printsnap(tr, snap)
+ snapno = snapno + 1
+ snap = tracesnap(tr, snapno)
+ snapref = snap and snap[0] or 65536
+ end
+ local m, ot, op1, op2, ridsp = traceir(tr, ins)
+ local oidx, t = 6*shr(ot, 8), band(ot, 31)
+ local op = sub(irnames, oidx+1, oidx+6)
+ if op == "LOOP " then
+ if dumpreg then
+ out:write(format("%04d ------------ LOOP ------------\n", ins))
+ else
+ out:write(format("%04d ------ LOOP ------------\n", ins))
+ end
+ elseif op ~= "NOP " and (dumpreg or op ~= "RENAME") then
+ if dumpreg then
+ out:write(format("%04d %-5s ", ins, ridsp_name(ridsp)))
+ else
+ out:write(format("%04d ", ins))
+ end
+ out:write(format("%s%s %s %s ",
+ band(ot, 64) == 0 and " " or ">",
+ band(ot, 128) == 0 and " " or "+",
+ irtype[t], op))
+ local m1 = band(m, 3)
+ if m1 ~= 3 then -- op1 != IRMnone
+ if op1 < 0 then
+ out:write(formatk(tr, op1))
+ else
+ out:write(format(m1 == 0 and "%04d" or "#%-3d", op1))
+ end
+ local m2 = band(m, 3*4)
+ if m2 ~= 3*4 then -- op2 != IRMnone
+ if m2 == 1*4 then -- op2 == IRMlit
+ local litn = litname[op]
+ if litn and litn[op2] then
+ out:write(" ", litn[op2])
+ else
+ out:write(format(" #%-3d", op2))
+ end
+ elseif op2 < 0 then
+ out:write(" ", formatk(tr, op2))
+ else
+ out:write(format(" %04d", op2))
+ end
+ end
+ end
+ out:write("\n")
+ end
+ end
+ if snap then
+ if dumpreg then
+ out:write(format(".... SNAP #%-3d [ ", snapno))
+ else
+ out:write(format(".... SNAP #%-3d [ ", snapno))
+ end
+ printsnap(tr, snap)
+ end
+end
+
+------------------------------------------------------------------------------
+
+local recprefix = ""
+local recdepth = 0
+
+-- Format trace error message.
+local function fmterr(err, info)
+ if type(err) == "number" then
+ if type(info) == "function" then
+ local fi = funcinfo(info)
+ if fi.ffid then
+ info = vmdef.ffnames[fi.ffid]
+ else
+ info = fi.loc
+ end
+ end
+ err = format(vmdef.traceerr[err], info)
+ end
+ return err
+end
+
+-- Dump trace states.
+local function dump_trace(what, tr, func, pc, otr, oex)
+ if what == "stop" or (what == "abort" and dumpmode.a) then
+ if dumpmode.i then dump_ir(tr, dumpmode.s, dumpmode.r and what == "stop")
+ elseif dumpmode.s then dump_snap(tr) end
+ if dumpmode.m then dump_mcode(tr) end
+ end
+ if what == "start" then
+ if dumpmode.H then out:write('<pre class="ljdump">\n') end
+ out:write("---- TRACE ", tr, " ", what)
+ if otr then out:write(" ", otr, "/", oex) end
+ local fi = funcinfo(func, pc)
+ out:write(" ", fi.loc, "\n")
+ recprefix = ""
+ reclevel = 0
+ elseif what == "stop" or what == "abort" then
+ out:write("---- TRACE ", tr, " ", what)
+ recprefix = nil
+ if what == "abort" then
+ local fi = funcinfo(func, pc)
+ out:write(" ", fi.loc, " -- ", fmterr(otr, oex), "\n")
+ else
+ local link = traceinfo(tr).link
+ if link == tr then
+ link = "loop"
+ elseif link == 0 then
+ link = "interpreter"
+ end
+ out:write(" -> ", link, "\n")
+ end
+ if dumpmode.H then out:write("</pre>\n\n") else out:write("\n") end
+ else
+ out:write("---- TRACE ", what, "\n\n")
+ end
+ out:flush()
+end
+
+-- Dump recorded bytecode.
+local function dump_record(tr, func, pc, depth, callee)
+ if depth ~= recdepth then
+ recdepth = depth
+ recprefix = rep(" .", depth)
+ end
+ local line = bcline(func, pc, recprefix)
+ if dumpmode.H then line = gsub(line, "[<>&]", html_escape) end
+ if type(callee) == "function" then
+ local fi = funcinfo(callee)
+ if fi.ffid then
+ out:write(sub(line, 1, -2), " ; ", vmdef.ffnames[fi.ffid], "\n")
+ else
+ out:write(sub(line, 1, -2), " ; ", fi.loc, "\n")
+ end
+ else
+ out:write(line)
+ end
+ if band(funcbc(func, pc), 0xff) < 16 then -- Write JMP for cond. ORDER BC
+ out:write(bcline(func, pc+1, recprefix))
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Dump taken trace exits.
+local function dump_texit(tr, ex, ngpr, nfpr, ...)
+ out:write("---- TRACE ", tr, " exit ", ex, "\n")
+ if dumpmode.X then
+ local regs = {...}
+ for i=1,ngpr do
+ out:write(format(" %08x", regs[i]))
+ if i % 8 == 0 then out:write("\n") end
+ end
+ for i=1,nfpr do
+ out:write(format(" %+17.14g", regs[ngpr+i]))
+ if i % 4 == 0 then out:write("\n") end
+ end
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Detach dump handlers.
+local function dumpoff()
+ if active then
+ active = false
+ jit.attach(dump_texit)
+ jit.attach(dump_record)
+ jit.attach(dump_trace)
+ if out and out ~= stdout and out ~= stderr then out:close() end
+ out = nil
+ end
+end
+
+-- Open the output file and attach dump handlers.
+local function dumpon(opt, outfile)
+ if active then dumpoff() end
+
+ local colormode = os.getenv("COLORTERM") and "A" or "T"
+ if opt then
+ opt = gsub(opt, "[TAH]", function(mode) colormode = mode; return ""; end)
+ end
+
+ local m = { t=true, b=true, i=true, m=true, }
+ if opt and opt ~= "" then
+ local o = sub(opt, 1, 1)
+ if o ~= "+" and o ~= "-" then m = {} end
+ for i=1,#opt do m[sub(opt, i, i)] = (o ~= "-") end
+ end
+ dumpmode = m
+
+ if m.t or m.b or m.i or m.s or m.m then
+ jit.attach(dump_trace, "trace")
+ end
+ if m.b then
+ jit.attach(dump_record, "record")
+ if not bcline then bcline = require("jit.bc").line end
+ end
+ if m.x or m.X then
+ jit.attach(dump_texit, "texit")
+ end
+
+ if not outfile then outfile = os.getenv("LUAJIT_DUMPFILE") end
+ if outfile then
+ out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
+ else
+ out = stdout
+ end
+
+ m[colormode] = true
+ if colormode == "A" then
+ colorize = colorize_ansi
+ irtype = irtype_ansi
+ elseif colormode == "H" then
+ colorize = colorize_html
+ irtype = irtype_html
+ out:write(header_html)
+ else
+ colorize = colorize_text
+ irtype = irtype_text
+ end
+
+ active = true
+end
+
+-- Public module functions.
+module(...)
+
+on = dumpon
+off = dumpoff
+start = dumpon -- For -j command line option.
+
diff --git a/lib/v.lua b/lib/v.lua
new file mode 100644
index 00000000..39fb8ed5
--- /dev/null
+++ b/lib/v.lua
@@ -0,0 +1,156 @@
+----------------------------------------------------------------------------
+-- Verbose mode of the LuaJIT compiler.
+--
+-- Copyright (C) 2005-2009 Mike Pall. All rights reserved.
+-- Released under the MIT/X license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+--
+-- This module shows verbose information about the progress of the
+-- JIT compiler. It prints one line for each generated trace. This module
+-- is useful to see which code has been compiled or where the compiler
+-- punts and falls back to the interpreter.
+--
+-- Example usage:
+--
+-- luajit -jv -e "for i=1,1000 do for j=1,1000 do end end"
+-- luajit -jv=myapp.out myapp.lua
+--
+-- Default output is to stderr. To redirect the output to a file, pass a
+-- filename as an argument (use '-' for stdout) or set the environment
+-- variable LUAJIT_VERBOSEFILE. The file is overwritten every time the
+-- module is started.
+--
+-- The output from the first example should look like this:
+--
+-- [TRACE 1 (command line):1]
+-- [TRACE 2 (1/3) (command line):1 -> 1]
+--
+-- The first number in each line is the internal trace number. Next are
+-- the file name ('(command line)') and the line number (':1') where the
+-- trace has started. Side traces also show the parent trace number and
+-- the exit number where they are attached to in parentheses ('(1/3)').
+-- An arrow at the end shows where the trace links to ('-> 1'), unless
+-- it loops to itself.
+--
+-- In this case the inner loop gets hot and is traced first, generating
+-- a root trace. Then the last exit from the 1st trace gets hot, too,
+-- and triggers generation of the 2nd trace. The side trace follows the
+-- path along the outer loop and *around* the inner loop, back to its
+-- start, and then links to the 1st trace. Yes, this may seem unusual,
+-- if you know how traditional compilers work. Trace compilers are full
+-- of surprises like this -- have fun! :-)
+--
+-- Aborted traces are shown like this:
+--
+-- [TRACE --- foo.lua:44 -- leaving loop in root trace at foo:lua:50]
+--
+-- Don't worry -- trace aborts are quite common, even in programs which
+-- can be fully compiled. The compiler may retry several times until it
+-- finds a suitable trace.
+--
+-- Of course this doesn't work with features that are not-yet-implemented
+-- (NYI error messages). The VM simply falls back to the interpreter. This
+-- may not matter at all if the particular trace is not very high up in
+-- the CPU usage profile. Oh, and the interpreter is quite fast, too.
+--
+-- Also check out the -jdump module, which prints all the gory details.
+--
+------------------------------------------------------------------------------
+
+-- Cache some library functions and objects.
+local jit = require("jit")
+assert(jit.version_num == 20000, "LuaJIT core/library version mismatch")
+local jutil = require("jit.util")
+local vmdef = require("jit.vmdef")
+local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
+local type, format = type, string.format
+local stdout, stderr = io.stdout, io.stderr
+
+-- Active flag and output file handle.
+local active, out
+
+------------------------------------------------------------------------------
+
+local startloc, startex
+
+-- Format trace error message.
+local function fmterr(err, info)
+ if type(err) == "number" then
+ if type(info) == "function" then
+ local fi = funcinfo(info)
+ if fi.ffid then
+ info = vmdef.ffnames[fi.ffid]
+ else
+ info = fi.loc
+ end
+ end
+ err = format(vmdef.traceerr[err], info)
+ end
+ return err
+end
+
+-- Dump trace states.
+local function dump_trace(what, tr, func, pc, otr, oex)
+ if what == "start" then
+ startloc = funcinfo(func, pc).loc
+ startex = otr and "("..otr.."/"..oex..") " or ""
+ else
+ if what == "abort" then
+ local loc = funcinfo(func, pc).loc
+ if loc ~= startloc then
+ out:write(format("[TRACE --- %s%s -- %s at %s]\n",
+ startex, startloc, fmterr(otr, oex), loc))
+ else
+ out:write(format("[TRACE --- %s%s -- %s]\n",
+ startex, startloc, fmterr(otr, oex)))
+ end
+ elseif what == "stop" then
+ local link = traceinfo(tr).link
+ if link == 0 then
+ out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n",
+ tr, startex, startloc))
+ elseif link == tr then
+ out:write(format("[TRACE %3s %s%s]\n", tr, startex, startloc))
+ else
+ out:write(format("[TRACE %3s %s%s -> %d]\n",
+ tr, startex, startloc, link))
+ end
+ else
+ out:write(format("[TRACE %s]\n", what))
+ end
+ out:flush()
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Detach dump handlers.
+local function dumpoff()
+ if active then
+ active = false
+ jit.attach(dump_trace)
+ if out and out ~= stdout and out ~= stderr then out:close() end
+ out = nil
+ end
+end
+
+-- Open the output file and attach dump handlers.
+local function dumpon(outfile)
+ if active then dumpoff() end
+ if not outfile then outfile = os.getenv("LUAJIT_VERBOSEFILE") end
+ if outfile then
+ out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
+ else
+ out = stderr
+ end
+ jit.attach(dump_trace, "trace")
+ active = true
+end
+
+-- Public module functions.
+module(...)
+
+on = dumpon
+off = dumpoff
+start = dumpon -- For -j command line option.
+
diff --git a/src/.gitignore b/src/.gitignore
new file mode 100644
index 00000000..e9f998ce
--- /dev/null
+++ b/src/.gitignore
@@ -0,0 +1,8 @@
+luajit
+buildvm
+buildvm_*.h
+lj_ffdef.h
+lj_libdef.h
+lj_recdef.h
+lj_folddef.h
+lj_vm.s
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 00000000..bb1839d1
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,326 @@
+##############################################################################
+# LuaJIT Makefile. Requires GNU Make.
+#
+# Suitable for POSIX platforms (Linux, *BSD, OSX etc.).
+# Also works with MinGW and Cygwin on Windows.
+# Please check msvcbuild.bat for building with MSVC on Windows.
+#
+# Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+##############################################################################
+
+##############################################################################
+# Compiler options: change them as needed. This mainly affects the speed of
+# the JIT compiler itself, not the speed of the JIT compiled code.
+# Turn any of the optional settings on by removing the '#' in front of them.
+#
+# Note: LuaJIT can only be compiled for x86, and not for x64 (yet)!
+# In the meantime, the x86 binary runs fine under a x64 OS.
+#
+# It's recommended to compile at least for i686. By default the assembler part
+# of the interpreter makes use of CMOV/FCOMI*/FUCOMI* instructions, anyway.
+CC= gcc -m32 -march=i686
+# Use this for GCC 4.2 or higher if you don't intend to distribute the
+# binaries to a different machine:
+#CC= gcc -m32 -march=native
+#
+# Since the assembler part does NOT maintain a frame pointer, it's pointless
+# to slow down the C part by not omitting it. Debugging and tracebacks are
+# not affected -- the assembler part has frame unwind information and GCC
+# emits it with -g (see CCDEBUG below).
+CCOPT= -O2 -fomit-frame-pointer
+# Use this if you want to generate a smaller binary (but it's slower):
+#CCOPT= -Os -fomit-frame-pointer
+# Note: it's no longer recommended to use -O3 with GCC 4.x.
+# The I-Cache bloat usually outweighs the benefits from aggressive inlining.
+#
+CCDEBUG=
+# Uncomment the next line to generate debug information:
+#CCDEBUG= -g
+#
+CCWARN= -Wall
+# Uncomment the next line to enable more warnings:
+#CCWARN+= -Wextra -Wdeclaration-after-statement -Wredundant-decls -Wshadow -Wpointer-arith
+#
+##############################################################################
+
+##############################################################################
+# Compile time definitions: change them as needed, but make sure you force
+# a full recompile with "make clean", followed by "make".
+# Note that most of these are NOT suitable for benchmarking or release mode!
+XCFLAGS=
+#
+# Disable the use of CMOV and FCOMI*/FUCOMI* instructions in the interpreter.
+# This is only necessary if you intend to run the code on REALLY ANCIENT CPUs
+# (before Pentium Pro, or on the VIA C3). This generally slows down the
+# interpreter. Don't bother if your OS wouldn't run on them, anyway.
+#XCFLAGS+= -DLUAJIT_CPU_NOCMOV
+#
+# Disable the JIT compiler, i.e. turn LuaJIT into a pure interpreter:
+#XCFLAGS+= -DLUAJIT_DISABLE_JIT
+#
+# Use the system provided memory allocator (realloc) instead of the
+# bundled memory allocator. This is slower, but sometimes helpful for
+# debugging. It's mandatory for Valgrind's memcheck tool, too.
+#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
+#
+# This define is required to run LuaJIT under Valgrind. The Valgrind
+# header files must be installed. You should enable debug information, too.
+#XCFLAGS+= -DLUAJIT_USE_VALGRIND
+#
+# This is the client for the GDB JIT API. GDB 7.0 or higher is required
+# to make use of it. See lj_gdbjit.c for details. Enabling this causes
+# a non-negligible overhead, even when not running under GDB.
+#XCFLAGS+= -DLUAJIT_USE_GDBJIT
+#
+# Turn on assertions for the Lua/C API to debug problems with lua_* calls.
+# This is rather slow -- use only while developing C libraries/embeddings.
+#XCFLAGS+= -DLUA_USE_APICHECK
+#
+# Turn on assertions for the whole LuaJIT VM. This significantly slows down
+# everything. Use only if you suspect a problem with LuaJIT itself.
+#XCFLAGS+= -DLUA_USE_ASSERT
+#
+##############################################################################
+# You probably don't need to change anything below this line.
+##############################################################################
+
+CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(CFLAGS) $(XCFLAGS)
+LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
+
+HOST_CC= $(CC)
+HOST_RM= rm -f
+HOST_XCFLAGS=
+HOST_XLDFLAGS=
+HOST_XLIBS=
+
+TARGET_CC= $(CC)
+TARGET_STRIP= strip
+TARGET_XCFLAGS= -D_FILE_OFFSET_BITS=64
+TARGET_XLDFLAGS=
+TARGET_XSHLDFLAGS= -shared
+TARGET_XLIBS=
+TARGET_ARCH= $(patsubst %,-DLUAJIT_TARGET=LUAJIT_ARCH_%,$(TARGET))
+TARGET_DISABLE= -U_FORTIFY_SOURCE
+ifneq (,$(findstring stack-protector,$(shell $(CC) -dumpspecs)))
+ TARGET_DISABLE+= -fno-stack-protector
+endif
+
+ifneq (,$(findstring Windows,$(OS)))
+ TARGET_SYS= Windows
+else
+ TARGET_SYS:= $(shell uname -s)
+ ifneq (,$(findstring CYGWIN,$(TARGET_SYS)))
+ TARGET_SYS= Windows
+ endif
+endif
+
+ifeq (Linux,$(TARGET_SYS))
+ TARGET_XLIBS= -ldl
+ TARGET_XLDFLAGS= -Wl,-E
+else
+ifeq (Windows,$(TARGET_SYS))
+ HOST_RM= del
+ TARGET_STRIP= strip --strip-unneeded
+else
+ifeq (Darwin,$(TARGET_SYS))
+ TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup
+ TARGET_STRIP= strip -x
+ export MACOSX_DEPLOYMENT_TARGET=10.3
+else
+ TARGET_XLDFLAGS= -Wl,-E
+endif
+endif
+endif
+
+# NOTE: The LuaJIT distribution comes with a pre-generated buildvm_*.h.
+# You DO NOT NEED an installed copy of (plain) Lua 5.1 to run DynASM unless
+# you want to MODIFY the corresponding *.dasc file. You can also use LuaJIT
+# itself (bootstrapped from the pre-generated file) to run DynASM of course.
+DASM_LUA= lua
+
+Q= @
+E= @echo
+#Q=
+#E= @:
+
+##############################################################################
+
+TARGET_CFLAGS= $(CCOPTIONS) $(TARGET_DISABLE) $(TARGET_XCFLAGS)
+TARGET_LDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS)
+TARGET_SHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS)
+TARGET_LIBS= -lm $(TARGET_XLIBS)
+ifneq (,$(CCDEBUG))
+ TARGET_STRIP= @:
+endif
+
+HOST_CFLAGS= $(CCOPTIONS) $(HOST_XCFLAGS) $(TARGET_ARCH)
+HOST_LDFLAGS= $(LDOPTIONS) $(HOST_XLDFLAGS)
+HOST_LIBS= $(HOST_XLIBS)
+
+DASM_DIR= ../dynasm
+DASM= $(DASM_LUA) $(DASM_DIR)/dynasm.lua
+DASM_FLAGS=
+DASM_DISTFLAGS= -LN
+
+BUILDVM_O= buildvm.o buildvm_asm.o buildvm_peobj.o buildvm_lib.o buildvm_fold.o
+BUILDVM_T= buildvm
+
+HOST_O= $(BUILDVM_O)
+HOST_T= $(BUILDVM_T)
+
+LJVM_S= lj_vm.s
+LJVM_O= lj_vm.o
+LJVM_BOUT= $(LJVM_S)
+LJVM_MODE= asm
+
+LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
+ lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o
+LJLIB_C= $(LJLIB_O:.o=.c)
+
+LJCORE_O= lj_gc.o lj_err.o lj_ctype.o lj_bc.o lj_obj.o \
+ lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o \
+ lj_state.o lj_dispatch.o lj_vmevent.o lj_api.o \
+ lj_lex.o lj_parse.o \
+ lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
+ lj_opt_dce.o lj_opt_loop.o \
+ lj_mcode.o lj_snap.o lj_record.o lj_asm.o lj_trace.o lj_gdbjit.o \
+ lj_lib.o lj_alloc.o lib_aux.o \
+ $(LJLIB_O) lib_init.o
+
+LJVMCORE_O= $(LJVM_O) $(LJCORE_O)
+
+# NYI: Need complete support for building as a shared library on POSIX.
+# This is currently *only* suitable for MinGW and Cygwin, see below.
+LUAJIT_O= luajit.o
+LUAJIT_SO= luajit.so
+LUAJIT_T= luajit
+
+LIB_VMDEF= ../lib/vmdef.lua
+
+TARGET_DEP= $(LIB_VMDEF)
+TARGET_O= $(LJVMCORE_O) $(LUAJIT_O)
+TARGET_T= $(LUAJIT_T)
+
+ALL_GEN= $(LJVM_S) lj_ffdef.h lj_libdef.h lj_recdef.h $(LIB_VMDEF) lj_folddef.h
+ALL_DYNGEN= buildvm_x86.h
+WIN_RM= *.obj *.lib *.exp *.dll *.exe *.manifest
+ALL_RM= $(LUAJIT_T) $(LUAJIT_SO) $(HOST_T) $(ALL_GEN) *.o $(WIN_RM)
+
+ifeq (Windows,$(TARGET_SYS))
+ LJVM_BOUT= $(LJVM_O)
+ LJVM_MODE= peobj
+ LIB_VMDEF= ..\lib\vmdef.lua
+ # Imported symbols are bound to a specific DLL name under Windows.
+ LUAJIT_SO= lua51.dll
+ LUAJIT_T= luajit.exe
+ BUILDVM_T= buildvm.exe
+ #
+ # You can comment out the following two lines to build a static executable.
+ # But then you won't be able to dynamically load any C modules, because
+ # they bind to lua51.dll.
+ #
+ TARGET_XCFLAGS+= -DLUA_BUILD_AS_DLL
+ TARGET_O= $(LUAJIT_SO) $(LUAJIT_O)
+endif
+
+##############################################################################
+
+default: $(TARGET_T)
+
+all: $(TARGET_T)
+
+amalg:
+ @grep "^[+|]" ljamalg.c
+ $(MAKE) all "LJCORE_O=ljamalg.o"
+
+MAKE_TARGETS= amalg
+
+##############################################################################
+
+buildvm_x86.h: buildvm_x86.dasc
+ $(E) "DYNASM $@"
+ $(Q)$(DASM) $(DASM_FLAGS) -o $@ buildvm_x86.dasc
+
+$(BUILDVM_T): $(BUILDVM_O)
+ $(E) "HOSTLINK $@"
+ $(Q)$(HOST_CC) $(HOST_LDFLAGS) -o $@ $(BUILDVM_O) $(HOST_LIBS)
+
+$(LJVM_BOUT): $(BUILDVM_T)
+ $(E) "BUILDVM $@"
+ $(Q)./$(BUILDVM_T) -m $(LJVM_MODE) -o $@
+
+lj_ffdef.h: $(BUILDVM_T) $(LJLIB_C)
+ $(E) "BUILDVM $@"
+ $(Q)./$(BUILDVM_T) -m ffdef -o $@ $(LJLIB_C)
+
+lj_libdef.h: $(BUILDVM_T) $(LJLIB_C)
+ $(E) "BUILDVM $@"
+ $(Q)./$(BUILDVM_T) -m libdef -o $@ $(LJLIB_C)
+
+lj_recdef.h: $(BUILDVM_T) $(LJLIB_C)
+ $(E) "BUILDVM $@"
+ $(Q)./$(BUILDVM_T) -m recdef -o $@ $(LJLIB_C)
+
+$(LIB_VMDEF): $(BUILDVM_T) $(LJLIB_C)
+ $(E) "BUILDVM $@"
+ $(Q)./$(BUILDVM_T) -m vmdef -o $@ $(LJLIB_C)
+
+lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c
+ $(E) "BUILDVM $@"
+ $(Q)./$(BUILDVM_T) -m folddef -o $@ lj_opt_fold.c
+
+$(LUAJIT_SO): $(LJVMCORE_O)
+ $(E) "LINK $@"
+ $(Q)$(TARGET_CC) $(TARGET_SHLDFLAGS) -o $@ $(LJVMCORE_O) $(TARGET_LIBS)
+ $(Q)$(TARGET_STRIP) $@
+
+$(LUAJIT_T): $(TARGET_O) $(TARGET_DEP)
+ $(E) "LINK $@"
+ $(Q)$(TARGET_CC) $(TARGET_LDFLAGS) -o $@ $(TARGET_O) $(TARGET_LIBS)
+ $(Q)$(TARGET_STRIP) $@
+ $(E) "OK Successfully built LuaJIT"
+
+##############################################################################
+
+%.o: %.c
+ $(E) "CC $@"
+ $(Q)$(TARGET_CC) $(TARGET_CFLAGS) -c -o $@ $<
+
+%.o: %.s
+ $(E) "ASM $@"
+ $(Q)$(TARGET_CC) $(TARGET_CFLAGS) -c -o $@ $<
+
+$(HOST_O): %.o: %.c
+ $(E) "HOSTCC $@"
+ $(Q)$(HOST_CC) $(HOST_CFLAGS) -c -o $@ $<
+
+include Makefile.dep
+
+##############################################################################
+
+clean:
+ $(HOST_RM) $(ALL_RM)
+
+cleaner: clean
+ $(HOST_RM) $(ALL_DYNGEN)
+
+distclean: clean
+ $(E) "DYNASM $@"
+ $(Q)$(DASM) $(DASM_DISTFLAGS) -o buildvm_x86.h buildvm_x86.dasc
+
+depend:
+ @test -f lj_ffdef.h || touch lj_ffdef.h
+ @test -f lj_libdef.h || touch lj_libdef.h
+ @test -f lj_recdef.h || touch lj_recdef.h
+ @test -f lj_folddef.h || touch lj_folddef.h
+ @test -f buildvm_x86.h || touch buildvm_x86.h
+ @$(HOST_CC) $(HOST_CFLAGS) -MM *.c | sed "s|$(DASM_DIR)|\$$(DASM_DIR)|g" >Makefile.dep
+ @test -s lj_ffdef.h || $(HOST_RM) lj_ffdef.h
+ @test -s lj_libdef.h || $(HOST_RM) lj_libdef.h
+ @test -s lj_recdef.h || $(HOST_RM) lj_recdef.h
+ @test -s lj_folddef.h || $(HOST_RM) lj_folddef.h
+ @test -s buildvm_x86.h || $(HOST_RM) buildvm_x86.h
+
+.PHONY: default all $(MAKE_TARGETS) clean cleaner distclean depend
+
+##############################################################################
diff --git a/src/Makefile.dep b/src/Makefile.dep
new file mode 100644
index 00000000..b1cdd93b
--- /dev/null
+++ b/src/Makefile.dep
@@ -0,0 +1,139 @@
+buildvm.o: buildvm.c lua.h luaconf.h luajit.h lj_obj.h lj_def.h lj_arch.h \
+ lj_gc.h lj_bc.h lj_ir.h lj_frame.h lj_dispatch.h lj_jit.h lj_target.h \
+ lj_target_x86.h buildvm.h $(DASM_DIR)/dasm_proto.h $(DASM_DIR)/dasm_x86.h \
+ buildvm_x86.h lj_traceerr.h
+buildvm_asm.o: buildvm_asm.c buildvm.h lj_def.h lua.h luaconf.h lj_arch.h \
+ lj_bc.h
+buildvm_fold.o: buildvm_fold.c lj_obj.h lua.h luaconf.h lj_def.h \
+ lj_arch.h lj_ir.h buildvm.h
+buildvm_lib.o: buildvm_lib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_lib.h buildvm.h
+buildvm_peobj.o: buildvm_peobj.c buildvm.h lj_def.h lua.h luaconf.h \
+ lj_arch.h lj_bc.h
+lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_alloc.h
+lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \
+ lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_ctype.h lj_lib.h lj_libdef.h
+lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h
+lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h
+lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h
+lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_err.h lj_errmsg.h lj_gc.h lj_ff.h lj_ffdef.h lj_lib.h \
+ lj_libdef.h
+lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \
+ lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ir.h \
+ lj_jit.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h lj_vmevent.h lj_lib.h \
+ luajit.h lj_libdef.h
+lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_lib.h lj_libdef.h
+lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h
+lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h
+lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_state.h \
+ lj_ff.h lj_ffdef.h lj_ctype.h lj_lib.h lj_libdef.h
+lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \
+ lj_libdef.h
+lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
+lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \
+ lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
+ lj_traceerr.h lj_vm.h lj_lex.h lj_parse.h
+lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \
+ lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h \
+ lj_target.h lj_target_x86.h
+lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h
+lj_ctype.o: lj_ctype.c lj_ctype.h lj_def.h lua.h luaconf.h
+lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_err.h lj_errmsg.h lj_state.h lj_frame.h lj_bc.h lj_jit.h lj_ir.h \
+ lj_trace.h lj_dispatch.h lj_traceerr.h lj_vm.h luajit.h
+lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
+ lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h lj_frame.h lj_bc.h \
+ lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
+lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
+ lj_traceerr.h lj_vm.h
+lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \
+ lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
+ lj_traceerr.h lj_vm.h
+lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_frame.h lj_bc.h lj_jit.h \
+ lj_ir.h lj_dispatch.h
+lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \
+ lj_traceerr.h
+lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_str.h lj_lex.h lj_parse.h lj_ctype.h
+lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_vm.h \
+ lj_lib.h
+lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h lj_dispatch.h lj_bc.h \
+ lj_traceerr.h
+lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_bc.h lj_vm.h
+lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
+lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_ir.h lj_jit.h lj_iropt.h
+lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h \
+ lj_traceerr.h lj_vm.h lj_folddef.h
+lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h \
+ lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h
+lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_tab.h lj_ir.h lj_jit.h lj_iropt.h
+lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
+ lj_arch.h lj_str.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
+ lj_dispatch.h lj_traceerr.h
+lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_state.h \
+ lj_bc.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h
+lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_state.h lj_frame.h \
+ lj_bc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
+ lj_dispatch.h lj_traceerr.h lj_record.h lj_snap.h lj_asm.h lj_vm.h \
+ lj_recdef.h
+lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
+ lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h lj_target_x86.h
+lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \
+ lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
+ lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h
+lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ctype.h
+lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_tab.h
+lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_frame.h lj_bc.h lj_state.h \
+ lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \
+ lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h lj_vm.h \
+ lj_vmevent.h lj_target.h lj_target_x86.h
+lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_gc.h lj_udata.h
+lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \
+ lj_vm.h lj_vmevent.h
+ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \
+ lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_trace.h lj_jit.h \
+ lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c lj_ctype.c \
+ lj_ctype.h lj_bc.c lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c \
+ lj_meta.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c luajit.h \
+ lj_vmevent.c lj_vmevent.h lj_api.c lj_parse.h lj_lex.c lj_parse.c \
+ lj_lib.c lj_lib.h lj_ir.c lj_iropt.h lj_opt_mem.c lj_opt_fold.c \
+ lj_folddef.h lj_opt_narrow.c lj_opt_dce.c lj_opt_loop.c lj_snap.h \
+ lj_mcode.c lj_mcode.h lj_snap.c lj_target.h lj_target_x86.h lj_record.c \
+ lj_ff.h lj_ffdef.h lj_record.h lj_asm.h lj_recdef.h lj_asm.c lj_trace.c \
+ lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c lib_base.c lualib.h \
+ lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c lib_os.c \
+ lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_init.c
+luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h
diff --git a/src/buildvm.c b/src/buildvm.c
new file mode 100644
index 00000000..b3738db4
--- /dev/null
+++ b/src/buildvm.c
@@ -0,0 +1,438 @@
+/*
+** LuaJIT VM builder.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** This is a tool to build the hand-tuned assembler code required for
+** LuaJIT's bytecode interpreter. It supports a variety of output formats
+** to feed different toolchains (see usage() below).
+**
+** This tool is not particularly optimized because it's only used while
+** _building_ LuaJIT. There's no point in distributing or installing it.
+** Only the object code generated by this tool is linked into LuaJIT.
+**
+** Caveat: some memory is not free'd, error handling is lazy.
+** It's a one-shot tool -- any effort fixing this would be wasted.
+*/
+
+#include "lua.h"
+#include "luajit.h"
+
+#ifdef LUA_USE_WIN
+#include <fcntl.h>
+#include <io.h>
+#endif
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_bc.h"
+#include "lj_ir.h"
+#include "lj_frame.h"
+#include "lj_dispatch.h"
+#include "lj_target.h"
+
+#include "buildvm.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* DynASM glue definitions. */
+#define Dst ctx
+#define Dst_DECL BuildCtx *ctx
+#define Dst_REF (ctx->D)
+
+#include "../dynasm/dasm_proto.h"
+
+/* Glue macros for DynASM. */
+#define DASM_M_GROW(ctx, t, p, sz, need) \
+ do { \
+ size_t _sz = (sz), _need = (need); \
+ if (_sz < _need) { \
+ if (_sz < 16) _sz = 16; \
+ while (_sz < _need) _sz += _sz; \
+ (p) = (t *)realloc((p), _sz); \
+ if ((p) == NULL) exit(1); \
+ (sz) = _sz; \
+ } \
+ } while(0)
+
+#define DASM_M_FREE(ctx, p, sz) free(p)
+
+static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
+
+#define DASM_EXTERN(ctx, addr, idx, type) \
+ collect_reloc(ctx, addr, idx, type)
+
+/* ------------------------------------------------------------------------ */
+
+/* Avoid trouble if cross-compiling for an x86 target. Speed doesn't matter. */
+#define DASM_ALIGNED_WRITES 1
+
+/* Embed architecture-specific DynASM encoder and backend. */
+#if LJ_TARGET_X86
+#include "../dynasm/dasm_x86.h"
+#include "buildvm_x86.h"
+#else
+#error "No support for this architecture (yet)"
+#endif
+
+/* ------------------------------------------------------------------------ */
+
+void owrite(BuildCtx *ctx, const void *ptr, size_t sz)
+{
+ if (fwrite(ptr, 1, sz, ctx->fp) != sz) {
+ fprintf(stderr, "Error: cannot write to output file: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* Emit code as raw bytes. Only used for DynASM debugging. */
+static void emit_raw(BuildCtx *ctx)
+{
+ owrite(ctx, ctx->code, ctx->codesz);
+}
+
+/* -- Build machine code -------------------------------------------------- */
+
+/* Collect external relocations. */
+static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type)
+{
+ if (ctx->nreloc >= BUILD_MAX_RELOC) {
+ fprintf(stderr, "Error: too many relocations, increase BUILD_MAX_RELOC.\n");
+ exit(1);
+ }
+ ctx->reloc[ctx->nreloc].ofs = (int32_t)(addr - ctx->code);
+ ctx->reloc[ctx->nreloc].sym = idx;
+ ctx->reloc[ctx->nreloc].type = type;
+ ctx->nreloc++;
+ return 0; /* Encode symbol offset of 0. */
+}
+
+/* Naive insertion sort. Performance doesn't matter here. */
+static void perm_insert(int *perm, int32_t *ofs, int i)
+{
+ perm[i] = i;
+ while (i > 0) {
+ int a = perm[i-1];
+ int b = perm[i];
+ if (ofs[a] <= ofs[b]) break;
+ perm[i] = a;
+ perm[i-1] = b;
+ i--;
+ }
+}
+
+/* Build the machine code. */
+static int build_code(BuildCtx *ctx)
+{
+ int status;
+ int i, j;
+
+ /* Initialize DynASM structures. */
+ ctx->nglob = GLOB__MAX;
+ ctx->glob = (void **)malloc(ctx->nglob*sizeof(void *));
+ memset(ctx->glob, 0, ctx->nglob*sizeof(void *));
+ ctx->nreloc = 0;
+
+ ctx->extnames = extnames;
+ ctx->globnames = globnames;
+
+ ctx->dasm_ident = DASM_IDENT;
+ ctx->dasm_arch = DASM_ARCH;
+
+ dasm_init(Dst, DASM_MAXSECTION);
+ dasm_setupglobal(Dst, ctx->glob, ctx->nglob);
+ dasm_setup(Dst, build_actionlist);
+
+ /* Call arch-specific backend to emit the code. */
+ ctx->npc = build_backend(ctx);
+
+ /* Finalize the code. */
+ (void)dasm_checkstep(Dst, DASM_SECTION_CODE);
+ if ((status = dasm_link(Dst, &ctx->codesz))) return status;
+ ctx->code = (uint8_t *)malloc(ctx->codesz);
+ if ((status = dasm_encode(Dst, (void *)ctx->code))) return status;
+
+ /* Allocate the symbol offset and permutation tables. */
+ ctx->nsym = ctx->npc + ctx->nglob;
+ ctx->perm = (int *)malloc((ctx->nsym+1)*sizeof(int *));
+ ctx->sym_ofs = (int32_t *)malloc((ctx->nsym+1)*sizeof(int32_t));
+
+ /* Collect the opcodes (PC labels). */
+ for (i = 0; i < ctx->npc; i++) {
+ int32_t n = dasm_getpclabel(Dst, i);
+ if (n < 0) return 0x22000000|i;
+ ctx->sym_ofs[i] = n;
+ perm_insert(ctx->perm, ctx->sym_ofs, i);
+ }
+
+ /* Collect the globals (named labels). */
+ for (j = 0; j < ctx->nglob; j++, i++) {
+ const char *gl = globnames[j];
+ int len = (int)strlen(gl);
+ if (!ctx->glob[j]) {
+ fprintf(stderr, "Error: undefined global %s\n", gl);
+ exit(2);
+ }
+ if (len >= 2 && gl[len-2] == '_' && gl[len-1] == 'Z')
+ ctx->sym_ofs[i] = -1; /* Skip the _Z symbols. */
+ else
+ ctx->sym_ofs[i] = (int32_t)((uint8_t *)(ctx->glob[j]) - ctx->code);
+ perm_insert(ctx->perm, ctx->sym_ofs, i);
+ }
+
+ /* Close the address range. */
+ ctx->sym_ofs[i] = (int32_t)ctx->codesz;
+ perm_insert(ctx->perm, ctx->sym_ofs, i);
+
+ dasm_free(Dst);
+
+ return 0;
+}
+
+/* -- Generate VM enums --------------------------------------------------- */
+
+const char *const bc_names[] = {
+#define BCNAME(name, ma, mb, mc, mt) #name,
+BCDEF(BCNAME)
+#undef BCNAME
+ NULL
+};
+
+const char *const ir_names[] = {
+#define IRNAME(name, m, m1, m2) #name,
+IRDEF(IRNAME)
+#undef IRNAME
+ NULL
+};
+
+const char *const irfpm_names[] = {
+#define FPMNAME(name) #name,
+IRFPMDEF(FPMNAME)
+#undef FPMNAME
+ NULL
+};
+
+const char *const irfield_names[] = {
+#define FLNAME(name, type, field) #name,
+IRFLDEF(FLNAME)
+#undef FLNAME
+ NULL
+};
+
+static const char *const trace_errors[] = {
+#define TREDEF(name, msg) msg,
+#include "lj_traceerr.h"
+ NULL
+};
+
+static const char *lower(char *buf, const char *s)
+{
+ char *p = buf;
+ while (*s) {
+ *p++ = (*s >= 'A' && *s <= 'Z') ? *s+0x20 : *s;
+ s++;
+ }
+ *p = '\0';
+ return buf;
+}
+
+/* Emit VM definitions as Lua code for debug modules. */
+static void emit_vmdef(BuildCtx *ctx)
+{
+ char buf[80];
+ int i;
+ fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
+ fprintf(ctx->fp, "module(...)\n\n");
+
+ fprintf(ctx->fp, "bcnames = \"");
+ for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
+ fprintf(ctx->fp, "\"\n\n");
+
+ fprintf(ctx->fp, "irnames = \"");
+ for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
+ fprintf(ctx->fp, "\"\n\n");
+
+ fprintf(ctx->fp, "irfpm = { [0]=");
+ for (i = 0; irfpm_names[i]; i++)
+ fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i]));
+ fprintf(ctx->fp, "}\n\n");
+
+ fprintf(ctx->fp, "irfield = { [0]=");
+ for (i = 0; irfield_names[i]; i++) {
+ char *p;
+ lower(buf, irfield_names[i]);
+ p = strchr(buf, '_');
+ if (p) *p = '.';
+ fprintf(ctx->fp, "\"%s\", ", buf);
+ }
+ fprintf(ctx->fp, "}\n\n");
+
+ fprintf(ctx->fp, "traceerr = {\n[0]=");
+ for (i = 0; trace_errors[i]; i++)
+ fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
+ fprintf(ctx->fp, "}\n\n");
+}
+
+/* -- Argument parsing ---------------------------------------------------- */
+
+/* Build mode names. */
+static const char *const modenames[] = {
+#define BUILDNAME(name) #name,
+BUILDDEF(BUILDNAME)
+#undef BUILDNAME
+ NULL
+};
+
+/* Print usage information and exit. */
+static void usage(void)
+{
+ int i;
+ fprintf(stderr, LUAJIT_VERSION " VM builder.\n");
+ fprintf(stderr, LUAJIT_COPYRIGHT ", " LUAJIT_URL "\n");
+ fprintf(stderr, "Target architecture: " LJ_ARCH_NAME "\n\n");
+ fprintf(stderr, "Usage: buildvm -m mode [-o outfile] [infiles...]\n\n");
+ fprintf(stderr, "Available modes:\n");
+ for (i = 0; i < BUILD__MAX; i++)
+ fprintf(stderr, " %s\n", modenames[i]);
+ exit(1);
+}
+
+/* Parse the output mode name. */
+static BuildMode parsemode(const char *mode)
+{
+ int i;
+ for (i = 0; modenames[i]; i++)
+ if (!strcmp(mode, modenames[i]))
+ return (BuildMode)i;
+ usage();
+ return (BuildMode)-1;
+}
+
+/* Parse arguments. */
+static void parseargs(BuildCtx *ctx, char **argv)
+{
+ const char *a;
+ int i;
+ ctx->mode = (BuildMode)-1;
+ ctx->outname = "-";
+ for (i = 1; (a = argv[i]) != NULL; i++) {
+ if (a[0] != '-')
+ break;
+ switch (a[1]) {
+ case '-':
+ if (a[2]) goto err;
+ i++;
+ goto ok;
+ case '\0':
+ goto ok;
+ case 'm':
+ i++;
+ if (a[2] || argv[i] == NULL) goto err;
+ ctx->mode = parsemode(argv[i]);
+ break;
+ case 'o':
+ i++;
+ if (a[2] || argv[i] == NULL) goto err;
+ ctx->outname = argv[i];
+ break;
+ default: err:
+ usage();
+ break;
+ }
+ }
+ok:
+ ctx->args = argv+i;
+ if (ctx->mode == (BuildMode)-1) goto err;
+}
+
+int main(int argc, char **argv)
+{
+ BuildCtx ctx_;
+ BuildCtx *ctx = &ctx_;
+ int status, binmode;
+
+ UNUSED(argc);
+ parseargs(ctx, argv);
+
+ if ((status = build_code(ctx))) {
+ fprintf(stderr,"Error: DASM error %08x\n", status);
+ return 1;
+ }
+
+ switch (ctx->mode) {
+#if LJ_TARGET_X86ORX64
+ case BUILD_peobj:
+#endif
+ case BUILD_raw:
+ binmode = 1;
+ break;
+ default:
+ binmode = 0;
+ break;
+ }
+
+ if (ctx->outname[0] == '-' && ctx->outname[1] == '\0') {
+ ctx->fp = stdout;
+#ifdef LUA_USE_WIN
+ if (binmode)
+ _setmode(_fileno(stdout), _O_BINARY); /* Yuck. */
+#endif
+ } else if (!(ctx->fp = fopen(ctx->outname, binmode ? "wb" : "w"))) {
+ fprintf(stderr, "Error: cannot open output file '%s': %s\n",
+ ctx->outname, strerror(errno));
+ exit(1);
+ }
+
+ switch (ctx->mode) {
+ case BUILD_asm:
+#if defined(__ELF__)
+ ctx->mode = BUILD_elfasm;
+#elif defined(__MACH__)
+ ctx->mode = BUILD_machasm;
+#else
+ fprintf(stderr,"Error: auto-guessing the system assembler failed\n");
+ return 1;
+#endif
+ /* fallthrough */
+ case BUILD_elfasm:
+ case BUILD_coffasm:
+ case BUILD_machasm:
+ emit_asm(ctx);
+ emit_asm_debug(ctx);
+ break;
+#if LJ_TARGET_X86ORX64
+ case BUILD_peobj:
+ emit_peobj(ctx);
+ break;
+#endif
+ case BUILD_raw:
+ emit_raw(ctx);
+ break;
+ case BUILD_vmdef:
+ emit_vmdef(ctx);
+ /* fallthrough */
+ case BUILD_ffdef:
+ case BUILD_libdef:
+ case BUILD_recdef:
+ emit_lib(ctx);
+ break;
+ case BUILD_folddef:
+ emit_fold(ctx);
+ break;
+ default:
+ break;
+ }
+
+ fflush(ctx->fp);
+ if (ferror(ctx->fp)) {
+ fprintf(stderr, "Error: cannot write to output file: %s\n",
+ strerror(errno));
+ exit(1);
+ }
+ fclose(ctx->fp);
+
+ return 0;
+}
+
diff --git a/src/buildvm.h b/src/buildvm.h
new file mode 100644
index 00000000..e55527fd
--- /dev/null
+++ b/src/buildvm.h
@@ -0,0 +1,106 @@
+/*
+** LuaJIT VM builder.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _BUILDVM_H
+#define _BUILDVM_H
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "lj_def.h"
+#include "lj_arch.h"
+
+/* Hardcoded limits. Increase as needed. */
+#define BUILD_MAX_RELOC 100 /* Max. number of relocations. */
+#define BUILD_MAX_FOLD 4096 /* Max. number of fold rules. */
+
+/* Prefix for scanned library definitions. */
+#define LIBDEF_PREFIX "LJLIB_"
+
+/* Prefix for scanned fold definitions. */
+#define FOLDDEF_PREFIX "LJFOLD"
+
+/* Prefixes for generated labels. */
+#define LABEL_PREFIX "lj_"
+#define LABEL_PREFIX_BC LABEL_PREFIX "BC_"
+#define LABEL_PREFIX_FF LABEL_PREFIX "ff_"
+#define LABEL_PREFIX_CF LABEL_PREFIX "cf_"
+#define LABEL_PREFIX_FFH LABEL_PREFIX "ffh_"
+#define LABEL_PREFIX_LIBCF LABEL_PREFIX "lib_cf_"
+#define LABEL_PREFIX_LIBINIT LABEL_PREFIX "lib_init_"
+
+/* Extra labels. */
+#define LABEL_ASM_BEGIN LABEL_PREFIX "vm_asm_begin"
+#define LABEL_OP_OFS LABEL_PREFIX "vm_op_ofs"
+
+/* Forward declaration. */
+struct dasm_State;
+
+/* Build modes. */
+#if LJ_TARGET_X86ORX64
+#define BUILDDEFX(_) _(peobj)
+#else
+#define BUILDDEFX(_)
+#endif
+
+#define BUILDDEF(_) \
+ _(asm) _(elfasm) _(coffasm) _(machasm) BUILDDEFX(_) _(raw) \
+ _(ffdef) _(libdef) _(recdef) _(vmdef) \
+ _(folddef)
+
+typedef enum {
+#define BUILDENUM(name) BUILD_##name,
+BUILDDEF(BUILDENUM)
+#undef BUILDENUM
+ BUILD__MAX
+} BuildMode;
+
+/* Code relocation. */
+typedef struct BuildReloc {
+ int32_t ofs;
+ int sym;
+ int type;
+} BuildReloc;
+
+/* Build context structure. */
+typedef struct BuildCtx {
+ /* DynASM state pointer. Should be first member. */
+ struct dasm_State *D;
+ /* Parsed command line. */
+ BuildMode mode;
+ FILE *fp;
+ const char *outname;
+ char **args;
+ /* Code and symbols generated by DynASM. */
+ uint8_t *code;
+ size_t codesz;
+ int npc, nglob, nsym, nreloc;
+ void **glob;
+ int *perm;
+ int32_t *sym_ofs;
+ /* Strings generated by DynASM. */
+ const char *const *extnames;
+ const char *const *globnames;
+ const char *dasm_ident;
+ const char *dasm_arch;
+ /* Relocations. */
+ BuildReloc reloc[BUILD_MAX_RELOC];
+} BuildCtx;
+
+extern void owrite(BuildCtx *ctx, const void *ptr, size_t sz);
+extern void emit_asm(BuildCtx *ctx);
+extern void emit_peobj(BuildCtx *ctx);
+extern void emit_lib(BuildCtx *ctx);
+extern void emit_fold(BuildCtx *ctx);
+
+extern const char *const bc_names[];
+extern const char *const ir_names[];
+extern const char *const irfpm_names[];
+extern const char *const irfield_names[];
+
+#endif
diff --git a/src/buildvm_asm.c b/src/buildvm_asm.c
new file mode 100644
index 00000000..e6972bd5
--- /dev/null
+++ b/src/buildvm_asm.c
@@ -0,0 +1,220 @@
+/*
+** LuaJIT VM builder: Assembler source code emitter.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#include "buildvm.h"
+#include "lj_bc.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* Emit bytes piecewise as assembler text. */
+static void emit_asm_bytes(BuildCtx *ctx, uint8_t *p, int n)
+{
+ int i;
+ for (i = 0; i < n; i++) {
+ if ((i & 15) == 0)
+ fprintf(ctx->fp, "\t.byte %d", p[i]);
+ else
+ fprintf(ctx->fp, ",%d", p[i]);
+ if ((i & 15) == 15) putc('\n', ctx->fp);
+ }
+ if ((n & 15) != 0) putc('\n', ctx->fp);
+}
+
+/* Emit relocation */
+static void emit_asm_reloc(BuildCtx *ctx, BuildReloc *r)
+{
+ const char *sym = ctx->extnames[r->sym];
+ switch (ctx->mode) {
+ case BUILD_elfasm:
+ if (r->type)
+ fprintf(ctx->fp, "\t.long %s-.-4\n", sym);
+ else
+ fprintf(ctx->fp, "\t.long %s\n", sym);
+ break;
+ case BUILD_coffasm:
+ fprintf(ctx->fp, "\t.def _%s; .scl 3; .type 32; .endef\n", sym);
+ if (r->type)
+ fprintf(ctx->fp, "\t.long _%s-.-4\n", sym);
+ else
+ fprintf(ctx->fp, "\t.long _%s\n", sym);
+ break;
+ default: /* BUILD_machasm for relative relocations handled below. */
+ fprintf(ctx->fp, "\t.long _%s\n", sym);
+ break;
+ }
+}
+
+static const char *const jccnames[] = {
+ "jo", "jno", "jb", "jnb", "jz", "jnz", "jbe", "ja",
+ "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg"
+};
+
+/* Emit relocation for the incredibly stupid OSX assembler. */
+static void emit_asm_reloc_mach(BuildCtx *ctx, uint8_t *cp, int n,
+ const char *sym)
+{
+ const char *opname = NULL;
+ if (--n < 0) goto err;
+ if (cp[n] == 0xe8) {
+ opname = "call";
+ } else if (cp[n] == 0xe9) {
+ opname = "jmp";
+ } else if (cp[n] >= 0x80 && cp[n] <= 0x8f && n > 0 && cp[n-1] == 0x0f) {
+ opname = jccnames[cp[n]-0x80];
+ n--;
+ } else {
+err:
+ fprintf(stderr, "Error: unsupported opcode for %s symbol relocation.\n",
+ sym);
+ exit(1);
+ }
+ emit_asm_bytes(ctx, cp, n);
+ if (!strncmp(sym, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1))
+ fprintf(ctx->fp, "\t%s _%s\n", opname, sym);
+ else
+ fprintf(ctx->fp, "\t%s _" LABEL_PREFIX "wrapper_%s\n", opname, sym);
+}
+
+/* Emit an assembler label. */
+static void emit_asm_label(BuildCtx *ctx, const char *name, int size, int isfunc)
+{
+ switch (ctx->mode) {
+ case BUILD_elfasm:
+ fprintf(ctx->fp,
+ "\n\t.globl %s\n"
+ "\t.hidden %s\n"
+ "\t.type %s, @%s\n"
+ "\t.size %s, %d\n"
+ "%s:\n",
+ name, name, name, isfunc ? "function" : "object", name, size, name);
+ break;
+ case BUILD_coffasm:
+ fprintf(ctx->fp, "\n\t.globl _%s\n", name);
+ if (isfunc)
+ fprintf(ctx->fp, "\t.def _%s; .scl 3; .type 32; .endef\n", name);
+ fprintf(ctx->fp, "_%s:\n", name);
+ break;
+ case BUILD_machasm:
+ fprintf(ctx->fp,
+ "\n\t.private_extern _%s\n"
+ "_%s:\n", name, name);
+ break;
+ default:
+ break;
+ }
+}
+
+/* Emit alignment. */
+static void emit_asm_align(BuildCtx *ctx, int bits)
+{
+ switch (ctx->mode) {
+ case BUILD_elfasm:
+ case BUILD_coffasm:
+ fprintf(ctx->fp, "\t.p2align %d\n", bits);
+ break;
+ case BUILD_machasm:
+ fprintf(ctx->fp, "\t.align %d\n", bits);
+ break;
+ default:
+ break;
+ }
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* Emit assembler source code. */
+void emit_asm(BuildCtx *ctx)
+{
+ char name[80];
+ int32_t prev;
+ int i, pi, rel;
+
+ fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch);
+ fprintf(ctx->fp, "\t.text\n");
+ emit_asm_align(ctx, 4);
+
+ emit_asm_label(ctx, LABEL_ASM_BEGIN, 0, 1);
+ if (ctx->mode == BUILD_elfasm)
+ fprintf(ctx->fp, ".Lbegin:\n");
+
+ i = 0;
+ do {
+ pi = ctx->perm[i++];
+ prev = ctx->sym_ofs[pi];
+ } while (prev < 0); /* Skip the _Z symbols. */
+
+ for (rel = 0; i <= ctx->nsym; i++) {
+ int ni = ctx->perm[i];
+ int32_t next = ctx->sym_ofs[ni];
+ int size = (int)(next - prev);
+ int32_t stop = next;
+ if (pi >= ctx->npc) {
+ sprintf(name, LABEL_PREFIX "%s", ctx->globnames[pi-ctx->npc]);
+ emit_asm_label(ctx, name, size, 1);
+#if LJ_HASJIT
+ } else {
+#else
+ } else if (!(pi == BC_JFORI || pi == BC_JFORL || pi == BC_JITERL ||
+ pi == BC_JLOOP || pi == BC_IFORL || pi == BC_IITERL ||
+ pi == BC_ILOOP)) {
+#endif
+ sprintf(name, LABEL_PREFIX_BC "%s", bc_names[pi]);
+ emit_asm_label(ctx, name, size, 1);
+ }
+ while (rel < ctx->nreloc && ctx->reloc[rel].ofs < stop) {
+ int n = ctx->reloc[rel].ofs - prev;
+ if (ctx->mode == BUILD_machasm && ctx->reloc[rel].type != 0) {
+ emit_asm_reloc_mach(ctx, ctx->code+prev, n,
+ ctx->extnames[ctx->reloc[rel].sym]);
+ } else {
+ emit_asm_bytes(ctx, ctx->code+prev, n);
+ emit_asm_reloc(ctx, &ctx->reloc[rel]);
+ }
+ prev += n+4;
+ rel++;
+ }
+ emit_asm_bytes(ctx, ctx->code+prev, stop-prev);
+ prev = next;
+ pi = ni;
+ }
+
+ switch (ctx->mode) {
+ case BUILD_elfasm:
+ fprintf(ctx->fp, "\n\t.section .rodata\n");
+ break;
+ case BUILD_coffasm:
+ fprintf(ctx->fp, "\n\t.section .rdata,\"dr\"\n");
+ break;
+ case BUILD_machasm:
+ fprintf(ctx->fp, "\n\t.const\n");
+ break;
+ default:
+ break;
+ }
+ emit_asm_align(ctx, 5);
+
+ emit_asm_label(ctx, LABEL_OP_OFS, 2*ctx->npc, 0);
+ for (i = 0; i < ctx->npc; i++)
+ fprintf(ctx->fp, "\t.short %d\n", ctx->sym_ofs[i]);
+
+ fprintf(ctx->fp, "\n");
+ switch (ctx->mode) {
+ case BUILD_elfasm:
+ fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\",@progbits\n");
+ /* fallthrough */
+ case BUILD_coffasm:
+ fprintf(ctx->fp, "\t.ident \"%s\"\n", ctx->dasm_ident);
+ break;
+ case BUILD_machasm:
+ fprintf(ctx->fp,
+ "\t.cstring\n"
+ "\t.ascii \"%s\\0\"\n", ctx->dasm_ident);
+ break;
+ default:
+ break;
+ }
+ fprintf(ctx->fp, "\n");
+}
+
diff --git a/src/buildvm_fold.c b/src/buildvm_fold.c
new file mode 100644
index 00000000..5f065643
--- /dev/null
+++ b/src/buildvm_fold.c
@@ -0,0 +1,206 @@
+/*
+** LuaJIT VM builder: IR folding hash table generator.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#include "lj_obj.h"
+#include "lj_ir.h"
+
+#include "buildvm.h"
+
+/* Context for the folding hash table generator. */
+static int lineno;
+static int funcidx;
+static uint32_t foldkeys[BUILD_MAX_FOLD];
+static uint32_t nkeys;
+
+/* Try to fill the hash table with keys using the hash parameters. */
+static int tryhash(uint32_t *htab, uint32_t sz, uint32_t r, int dorol)
+{
+ uint32_t i;
+ if (dorol && ((r & 31) == 0 || (r>>5) == 0))
+ return 0; /* Avoid zero rotates. */
+ memset(htab, 0xff, (sz+1)*sizeof(uint32_t));
+ for (i = 0; i < nkeys; i++) {
+ uint32_t key = foldkeys[i];
+ uint32_t k = key & 0xffffff;
+ uint32_t h = (dorol ? lj_rol(lj_rol(k, r>>5) - k, r&31) :
+ (((k << (r>>5)) - k) << (r&31))) % sz;
+ if (htab[h] != 0xffffffff) { /* Collision on primary slot. */
+ if (htab[h+1] != 0xffffffff) { /* Collision on secondary slot. */
+ /* Try to move the colliding key, if possible. */
+ if (h < sz-1 && htab[h+2] == 0xffffffff) {
+ uint32_t k2 = htab[h+1] & 0xffffff;
+ uint32_t h2 = (dorol ? lj_rol(lj_rol(k2, r>>5) - k2, r&31) :
+ (((k2 << (r>>5)) - k2) << (r&31))) % sz;
+ if (h2 != h+1) return 0; /* Cannot resolve collision. */
+ htab[h+2] = htab[h+1]; /* Move colliding key to secondary slot. */
+ } else {
+ return 0; /* Collision. */
+ }
+ }
+ htab[h+1] = key;
+ } else {
+ htab[h] = key;
+ }
+ }
+ return 1; /* Success, all keys could be stored. */
+}
+
+/* Print the generated hash table. */
+static void printhash(BuildCtx *ctx, uint32_t *htab, uint32_t sz)
+{
+ uint32_t i;
+ fprintf(ctx->fp, "static const uint32_t fold_hash[%d] = {\n0x%08x",
+ sz+1, htab[0]);
+ for (i = 1; i < sz+1; i++)
+ fprintf(ctx->fp, ",\n0x%08x", htab[i]);
+ fprintf(ctx->fp, "\n};\n\n");
+}
+
+/* Exhaustive search for the shortest semi-perfect hash table. */
+static void makehash(BuildCtx *ctx)
+{
+ uint32_t htab[BUILD_MAX_FOLD*2+1];
+ uint32_t sz, r;
+ /* Search for the smallest hash table with an odd size. */
+ for (sz = (nkeys|1); sz < BUILD_MAX_FOLD*2; sz += 2) {
+ /* First try all shift hash combinations. */
+ for (r = 0; r < 32*32; r++) {
+ if (tryhash(htab, sz, r, 0)) {
+ printhash(ctx, htab, sz);
+ fprintf(ctx->fp,
+ "#define fold_hashkey(k)\t(((((k)<<%u)-(k))<<%u)%%%u)\n\n",
+ r>>5, r&31, sz);
+ return;
+ }
+ }
+ /* Then try all rotate hash combinations. */
+ for (r = 0; r < 32*32; r++) {
+ if (tryhash(htab, sz, r, 1)) {
+ printhash(ctx, htab, sz);
+ fprintf(ctx->fp,
+ "#define fold_hashkey(k)\t(lj_rol(lj_rol((k),%u)-(k),%u)%%%u)\n\n",
+ r>>5, r&31, sz);
+ return;
+ }
+ }
+ }
+ fprintf(stderr, "Error: search for perfect hash failed\n");
+ exit(1);
+}
+
+/* Parse one token of a fold rule. */
+static uint32_t nexttoken(char **pp, int allowlit, int allowany)
+{
+ char *p = *pp;
+ if (p) {
+ uint32_t i;
+ char *q = strchr(p, ' ');
+ if (q) *q++ = '\0';
+ *pp = q;
+ if (allowlit && !strncmp(p, "IRFPM_", 6)) {
+ for (i = 0; irfpm_names[i]; i++)
+ if (!strcmp(irfpm_names[i], p+6))
+ return i;
+ } else if (allowlit && !strncmp(p, "IRFL_", 5)) {
+ for (i = 0; irfield_names[i]; i++)
+ if (!strcmp(irfield_names[i], p+5))
+ return i;
+ } else if (allowany && !strcmp("any", p)) {
+ return 0xff;
+ } else {
+ for (i = 0; ir_names[i]; i++)
+ if (!strcmp(ir_names[i], p))
+ return i;
+ }
+ fprintf(stderr, "Error: bad fold definition token \"%s\" at line %d\n", p, lineno);
+ exit(1);
+ }
+ return 0;
+}
+
+/* Parse a fold rule. */
+static void foldrule(char *p)
+{
+ uint32_t op = nexttoken(&p, 0, 0);
+ uint32_t left = nexttoken(&p, 0, 1);
+ uint32_t right = nexttoken(&p, 1, 1);
+ uint32_t key = (funcidx << 24) | (op << 16) | (left << 8) | right;
+ uint32_t i;
+ if (nkeys >= BUILD_MAX_FOLD) {
+ fprintf(stderr, "Error: too many fold rules, increase BUILD_MAX_FOLD.\n");
+ exit(1);
+ }
+ /* Simple insertion sort to detect duplicates. */
+ for (i = nkeys; i > 0; i--) {
+ if ((foldkeys[i-1]&0xffffff) < (key & 0xffffff))
+ break;
+ if ((foldkeys[i-1]&0xffffff) == (key & 0xffffff)) {
+ fprintf(stderr, "Error: duplicate fold definition at line %d\n", lineno);
+ exit(1);
+ }
+ foldkeys[i] = foldkeys[i-1];
+ }
+ foldkeys[i] = key;
+ nkeys++;
+}
+
+/* Emit C source code for IR folding hash table. */
+void emit_fold(BuildCtx *ctx)
+{
+ char buf[256]; /* We don't care about analyzing lines longer than that. */
+ const char *fname = ctx->args[0];
+ FILE *fp;
+
+ if (fname == NULL) {
+ fprintf(stderr, "Error: missing input filename\n");
+ exit(1);
+ }
+
+ if (fname[0] == '-' && fname[1] == '\0') {
+ fp = stdin;
+ } else {
+ fp = fopen(fname, "r");
+ if (!fp) {
+ fprintf(stderr, "Error: cannot open input file '%s': %s\n",
+ fname, strerror(errno));
+ exit(1);
+ }
+ }
+
+ fprintf(ctx->fp, "/* This is a generated file. DO NOT EDIT! */\n\n");
+ fprintf(ctx->fp, "static const FoldFunc fold_func[] = {\n");
+
+ lineno = 0;
+ funcidx = 0;
+ nkeys = 0;
+ while (fgets(buf, sizeof(buf), fp) != NULL) {
+ lineno++;
+ /* The prefix must be at the start of a line, otherwise it's ignored. */
+ if (!strncmp(buf, FOLDDEF_PREFIX, sizeof(FOLDDEF_PREFIX)-1)) {
+ char *p = buf+sizeof(FOLDDEF_PREFIX)-1;
+ char *q = strchr(p, ')');
+ if (p[0] == '(' && q) {
+ p++;
+ *q = '\0';
+ foldrule(p);
+ } else if ((p[0] == 'F' || p[0] == 'X') && p[1] == '(' && q) {
+ p += 2;
+ *q = '\0';
+ fprintf(ctx->fp, funcidx ? ",\n %s" : " %s", p);
+ funcidx++;
+ } else {
+ buf[strlen(buf)-1] = '\0';
+ fprintf(stderr, "Error: unknown fold definition tag %s%s at line %d\n",
+ FOLDDEF_PREFIX, p, lineno);
+ exit(1);
+ }
+ }
+ }
+ fclose(fp);
+ fprintf(ctx->fp, "\n};\n\n");
+
+ makehash(ctx);
+}
+
diff --git a/src/buildvm_lib.c b/src/buildvm_lib.c
new file mode 100644
index 00000000..cc572200
--- /dev/null
+++ b/src/buildvm_lib.c
@@ -0,0 +1,365 @@
+/*
+** LuaJIT VM builder: library definition compiler.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#include "lj_obj.h"
+#include "lj_lib.h"
+
+#include "buildvm.h"
+
+/* Context for library definitions. */
+static uint8_t obuf[8192];
+static uint8_t *optr;
+static char modname[80];
+static size_t modnamelen;
+static char funcname[80];
+static int modstate, regfunc;
+static int ffid, recffid;
+
+enum {
+ REGFUNC_OK,
+ REGFUNC_NOREG,
+ REGFUNC_NOREGUV
+};
+
+static void libdef_name(char *p, int kind)
+{
+ size_t n = strlen(p);
+ if (kind != LIBINIT_STRING) {
+ if (n > modnamelen && p[modnamelen] == '_' &&
+ !strncmp(p, modname, modnamelen)) {
+ p += modnamelen+1;
+ n -= modnamelen+1;
+ }
+ }
+ if (n > LIBINIT_MAXSTR) {
+ fprintf(stderr, "Error: string too long: '%s'\n", p);
+ exit(1);
+ }
+ if (optr+1+n+2 > obuf+sizeof(obuf)) { /* +2 for caller. */
+ fprintf(stderr, "Error: output buffer overflow\n");
+ exit(1);
+ }
+ *optr++ = (uint8_t)(n | kind);
+ memcpy(optr, p, n);
+ optr += n;
+}
+
+static void libdef_endmodule(BuildCtx *ctx)
+{
+ if (modstate != 0) {
+ char line[80];
+ const uint8_t *p;
+ int n;
+ if (modstate == 1)
+ fprintf(ctx->fp, " (lua_CFunction)0");
+ fprintf(ctx->fp, "\n};\n");
+ fprintf(ctx->fp, "static const uint8_t %s%s[] = {\n",
+ LABEL_PREFIX_LIBINIT, modname);
+ line[0] = '\0';
+ for (n = 0, p = obuf; p < optr; p++) {
+ n += sprintf(line+n, "%d,", *p);
+ if (n >= 75) {
+ fprintf(ctx->fp, "%s\n", line);
+ n = 0;
+ line[0] = '\0';
+ }
+ }
+ fprintf(ctx->fp, "%s%d\n};\n#endif\n\n", line, LIBINIT_END);
+ }
+}
+
+static void libdef_module(BuildCtx *ctx, char *p, int arg)
+{
+ UNUSED(arg);
+ if (ctx->mode == BUILD_libdef) {
+ libdef_endmodule(ctx);
+ optr = obuf;
+ *optr++ = (uint8_t)ffid;
+ *optr++ = 0;
+ modstate = 1;
+ fprintf(ctx->fp, "#ifdef %sMODULE_%s\n", LIBDEF_PREFIX, p);
+ fprintf(ctx->fp, "#undef %sMODULE_%s\n", LIBDEF_PREFIX, p);
+ fprintf(ctx->fp, "static const lua_CFunction %s%s[] = {\n",
+ LABEL_PREFIX_LIBCF, p);
+ }
+ modnamelen = strlen(p);
+ if (modnamelen > sizeof(modname)-1) {
+ fprintf(stderr, "Error: module name too long: '%s'\n", p);
+ exit(1);
+ }
+ strcpy(modname, p);
+}
+
+static int find_ffofs(BuildCtx *ctx, const char *name)
+{
+ int i;
+ for (i = 0; i < ctx->nglob; i++) {
+ const char *gl = ctx->globnames[i];
+ if (gl[0] == 'f' && gl[1] == 'f' && gl[2] == '_' && !strcmp(gl+3, name)) {
+ return (int)((uint8_t *)ctx->glob[i] - ctx->code);
+ }
+ }
+ fprintf(stderr, "Error: undefined fast function %s%s\n",
+ LABEL_PREFIX_FF, name);
+ exit(1);
+}
+
+static void libdef_func(BuildCtx *ctx, char *p, int arg)
+{
+ if (ctx->mode == BUILD_libdef) {
+ int ofs = arg != LIBINIT_CF ? find_ffofs(ctx, p) : 0;
+ if (modstate == 0) {
+ fprintf(stderr, "Error: no module for function definition %s\n", p);
+ exit(1);
+ }
+ if (regfunc == REGFUNC_NOREG) {
+ if (optr+1 > obuf+sizeof(obuf)) {
+ fprintf(stderr, "Error: output buffer overflow\n");
+ exit(1);
+ }
+ *optr++ = LIBINIT_FFID;
+ } else {
+ if (arg != LIBINIT_ASM_) {
+ if (modstate != 1) fprintf(ctx->fp, ",\n");
+ modstate = 2;
+ fprintf(ctx->fp, " %s%s", arg ? LABEL_PREFIX_FFH : LABEL_PREFIX_CF, p);
+ }
+ if (regfunc != REGFUNC_NOREGUV) obuf[1]++; /* Bump hash table size. */
+ libdef_name(regfunc == REGFUNC_NOREGUV ? "" : p, arg);
+ if (arg) {
+ *optr++ = (uint8_t)ofs;
+ *optr++ = (uint8_t)(ofs >> 8);
+ }
+ }
+ } else if (ctx->mode == BUILD_ffdef) {
+ fprintf(ctx->fp, "FFDEF(%s)\n", p);
+ } else if (ctx->mode == BUILD_recdef) {
+ if (strlen(p) > sizeof(funcname)-1) {
+ fprintf(stderr, "Error: function name too long: '%s'\n", p);
+ exit(1);
+ }
+ strcpy(funcname, p);
+ } else if (ctx->mode == BUILD_vmdef) {
+ int i;
+ for (i = 1; p[i] && modname[i-1]; i++)
+ if (p[i] == '_') p[i] = '.';
+ fprintf(ctx->fp, "\"%s\",\n", p);
+ }
+ ffid++;
+ regfunc = REGFUNC_OK;
+}
+
+static uint32_t find_rec(char *name)
+{
+ char *p = (char *)obuf;
+ uint32_t n;
+ for (n = 2; *p; n++) {
+ if (strcmp(p, name) == 0)
+ return n;
+ p += strlen(p)+1;
+ }
+ if (p+strlen(name)+1 >= (char *)obuf+sizeof(obuf)) {
+ fprintf(stderr, "Error: output buffer overflow\n");
+ exit(1);
+ }
+ strcpy(p, name);
+ return n;
+}
+
+static void libdef_rec(BuildCtx *ctx, char *p, int arg)
+{
+ UNUSED(arg);
+ if (ctx->mode == BUILD_recdef) {
+ char *q;
+ uint32_t n;
+ for (; recffid+1 < ffid; recffid++)
+ fprintf(ctx->fp, ",\n0");
+ recffid = ffid;
+ if (*p == '.') p = funcname;
+ q = strchr(p, ' ');
+ if (q) *q++ = '\0';
+ n = find_rec(p);
+ if (q)
+ fprintf(ctx->fp, ",\n0x%02x00+(%s)", n, q);
+ else
+ fprintf(ctx->fp, ",\n0x%02x00", n);
+ }
+}
+
+static void memcpy_endian(void *dst, void *src, size_t n)
+{
+ union { uint8_t b; uint32_t u; } host_endian;
+ host_endian.u = 1;
+ if (host_endian.b == LJ_ENDIAN_SELECT(1, 0)) {
+ memcpy(dst, src, n);
+ } else {
+ size_t i;
+ for (i = 0; i < n; i++)
+ ((uint8_t *)dst)[i] = ((uint8_t *)src)[n-i];
+ }
+}
+
+static void libdef_push(BuildCtx *ctx, char *p, int arg)
+{
+ UNUSED(arg);
+ if (ctx->mode == BUILD_libdef) {
+ int len = (int)strlen(p);
+ if (*p == '"') {
+ if (len > 1 && p[len-1] == '"') {
+ p[len-1] = '\0';
+ libdef_name(p+1, LIBINIT_STRING);
+ return;
+ }
+ } else if (*p >= '0' && *p <= '9') {
+ char *ep;
+ double d = strtod(p, &ep);
+ if (*ep == '\0') {
+ if (optr+1+sizeof(double) > obuf+sizeof(obuf)) {
+ fprintf(stderr, "Error: output buffer overflow\n");
+ exit(1);
+ }
+ *optr++ = LIBINIT_NUMBER;
+ memcpy_endian(optr, &d, sizeof(double));
+ optr += sizeof(double);
+ return;
+ }
+ } else if (!strcmp(p, "lastcl")) {
+ if (optr+1 > obuf+sizeof(obuf)) {
+ fprintf(stderr, "Error: output buffer overflow\n");
+ exit(1);
+ }
+ *optr++ = LIBINIT_LASTCL;
+ return;
+ } else if (len > 4 && !strncmp(p, "top-", 4)) {
+ if (optr+2 > obuf+sizeof(obuf)) {
+ fprintf(stderr, "Error: output buffer overflow\n");
+ exit(1);
+ }
+ *optr++ = LIBINIT_COPY;
+ *optr++ = (uint8_t)atoi(p+4);
+ return;
+ }
+ fprintf(stderr, "Error: bad value for %sPUSH(%s)\n", LIBDEF_PREFIX, p);
+ exit(1);
+ }
+}
+
+static void libdef_set(BuildCtx *ctx, char *p, int arg)
+{
+ UNUSED(arg);
+ if (ctx->mode == BUILD_libdef) {
+ if (p[0] == '!' && p[1] == '\0') p[0] = '\0'; /* Set env. */
+ libdef_name(p, LIBINIT_STRING);
+ *optr++ = LIBINIT_SET;
+ obuf[1]++; /* Bump hash table size. */
+ }
+}
+
+static void libdef_regfunc(BuildCtx *ctx, char *p, int arg)
+{
+ UNUSED(ctx); UNUSED(p);
+ regfunc = arg;
+}
+
+typedef void (*LibDefFunc)(BuildCtx *ctx, char *p, int arg);
+
+typedef struct LibDefHandler {
+ const char *suffix;
+ const char *stop;
+ const LibDefFunc func;
+ const int arg;
+} LibDefHandler;
+
+static const LibDefHandler libdef_handlers[] = {
+ { "MODULE_", " \t\r\n", libdef_module, 0 },
+ { "CF(", ")", libdef_func, LIBINIT_CF },
+ { "ASM(", ")", libdef_func, LIBINIT_ASM },
+ { "ASM_(", ")", libdef_func, LIBINIT_ASM_ },
+ { "REC(", ")", libdef_rec, 0 },
+ { "PUSH(", ")", libdef_push, 0 },
+ { "SET(", ")", libdef_set, 0 },
+ { "NOREGUV", NULL, libdef_regfunc, REGFUNC_NOREGUV },
+ { "NOREG", NULL, libdef_regfunc, REGFUNC_NOREG },
+ { NULL, NULL, (LibDefFunc)0, 0 }
+};
+
+/* Emit C source code for library function definitions. */
+void emit_lib(BuildCtx *ctx)
+{
+ const char *fname;
+
+ if (ctx->mode == BUILD_ffdef || ctx->mode == BUILD_libdef ||
+ ctx->mode == BUILD_recdef)
+ fprintf(ctx->fp, "/* This is a generated file. DO NOT EDIT! */\n\n");
+ else if (ctx->mode == BUILD_vmdef)
+ fprintf(ctx->fp, "ffnames = {\n[0]=\"Lua\",\n\"C\",\n");
+ if (ctx->mode == BUILD_recdef)
+ fprintf(ctx->fp, "static const uint16_t recff_idmap[] = {\n0,\n0x0100");
+ recffid = ffid = FF_C+1;
+
+ while ((fname = *ctx->args++)) {
+ char buf[256]; /* We don't care about analyzing lines longer than that. */
+ FILE *fp;
+ if (fname[0] == '-' && fname[1] == '\0') {
+ fp = stdin;
+ } else {
+ fp = fopen(fname, "r");
+ if (!fp) {
+ fprintf(stderr, "Error: cannot open input file '%s': %s\n",
+ fname, strerror(errno));
+ exit(1);
+ }
+ }
+ modstate = 0;
+ regfunc = REGFUNC_OK;
+ while (fgets(buf, sizeof(buf), fp) != NULL) {
+ char *p;
+ for (p = buf; (p = strstr(p, LIBDEF_PREFIX)) != NULL; ) {
+ const LibDefHandler *ldh;
+ p += sizeof(LIBDEF_PREFIX)-1;
+ for (ldh = libdef_handlers; ldh->suffix != NULL; ldh++) {
+ size_t n, len = strlen(ldh->suffix);
+ if (!strncmp(p, ldh->suffix, len)) {
+ p += len;
+ n = ldh->stop ? strcspn(p, ldh->stop) : 0;
+ if (!p[n]) break;
+ p[n] = '\0';
+ ldh->func(ctx, p, ldh->arg);
+ p += n+1;
+ break;
+ }
+ }
+ if (ldh->suffix == NULL) {
+ buf[strlen(buf)-1] = '\0';
+ fprintf(stderr, "Error: unknown library definition tag %s%s\n",
+ LIBDEF_PREFIX, p);
+ exit(1);
+ }
+ }
+ }
+ fclose(fp);
+ if (ctx->mode == BUILD_libdef) {
+ libdef_endmodule(ctx);
+ }
+ }
+
+ if (ctx->mode == BUILD_ffdef) {
+ fprintf(ctx->fp, "\n#undef FFDEF\n\n");
+ } else if (ctx->mode == BUILD_vmdef) {
+ fprintf(ctx->fp, "}\n\n");
+ } else if (ctx->mode == BUILD_recdef) {
+ char *p = (char *)obuf;
+ fprintf(ctx->fp, "\n};\n\n");
+ fprintf(ctx->fp, "static const RecordFunc recff_func[] = {\n"
+ "recff_nyi,\n"
+ "recff_c");
+ while (*p) {
+ fprintf(ctx->fp, ",\nrecff_%s", p);
+ p += strlen(p)+1;
+ }
+ fprintf(ctx->fp, "\n};\n\n");
+ }
+}
+
diff --git a/src/buildvm_peobj.c b/src/buildvm_peobj.c
new file mode 100644
index 00000000..9acf6b76
--- /dev/null
+++ b/src/buildvm_peobj.c
@@ -0,0 +1,303 @@
+/*
+** LuaJIT VM builder: PE object emitter.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Only used for building on Windows, since we cannot assume the presence
+** of a suitable assembler. The host and target byte order must match.
+*/
+
+#include "buildvm.h"
+#include "lj_bc.h"
+
+#if LJ_TARGET_X86ORX64
+
+/* Context for PE object emitter. */
+static char *strtab;
+static size_t strtabofs;
+
+/* -- PE object definitions ----------------------------------------------- */
+
+/* PE header. */
+typedef struct PEheader {
+ uint16_t arch;
+ uint16_t nsects;
+ uint32_t time;
+ uint32_t symtabofs;
+ uint32_t nsyms;
+ uint16_t opthdrsz;
+ uint16_t flags;
+} PEheader;
+
+/* PE section. */
+typedef struct PEsection {
+ char name[8];
+ uint32_t vsize;
+ uint32_t vaddr;
+ uint32_t size;
+ uint32_t ofs;
+ uint32_t relocofs;
+ uint32_t lineofs;
+ uint16_t nreloc;
+ uint16_t nline;
+ uint32_t flags;
+} PEsection;
+
+/* PE relocation. */
+typedef struct PEreloc {
+ uint32_t vaddr;
+ uint32_t symidx;
+ uint16_t type;
+} PEreloc;
+
+/* Cannot use sizeof, because it pads up to the max. alignment. */
+#define PEOBJ_RELOC_SIZE (4+4+2)
+
+/* PE symbol table entry. */
+typedef struct PEsym {
+ union {
+ char name[8];
+ uint32_t nameref[2];
+ } n;
+ uint32_t value;
+ int16_t sect;
+ uint16_t type;
+ uint8_t scl;
+ uint8_t naux;
+} PEsym;
+
+/* PE symbol table auxiliary entry for a section. */
+typedef struct PEsymaux {
+ uint32_t size;
+ uint16_t nreloc;
+ uint16_t nline;
+ uint32_t cksum;
+ uint16_t assoc;
+ uint8_t comdatsel;
+ uint8_t unused[3];
+} PEsymaux;
+
+/* Cannot use sizeof, because it pads up to the max. alignment. */
+#define PEOBJ_SYM_SIZE (8+4+2+2+1+1)
+
+/* PE object CPU specific defines. */
+#if LJ_TARGET_X86
+#define PEOBJ_ARCH_TARGET 0x014c
+#define PEOBJ_RELOC_REL32 0x14 /* MS: REL32, GNU: DISP32. */
+#define PEOBJ_RELOC_DIR32 0x06
+#define PEOBJ_SYM_PREFIX "_"
+#elif LJ_TARGET_X64
+#define PEOBJ_ARCH_TARGET 0x8664
+#define PEOBJ_RELOC_REL32 0x04 /* MS: REL32, GNU: DISP32. */
+#define PEOBJ_RELOC_DIR32 0x02
+#define PEOBJ_SYM_PREFIX ""
+#endif
+
+/* Section numbers (0-based). */
+enum {
+ PEOBJ_SECT_ABS = -2,
+ PEOBJ_SECT_UNDEF = -1,
+ PEOBJ_SECT_TEXT,
+ /* TODO: add .pdata/.xdata for x64. */
+ PEOBJ_SECT_RDATA,
+ PEOBJ_SECT_RDATA_Z,
+ PEOBJ_NSECTIONS
+};
+
+/* Symbol types. */
+#define PEOBJ_TYPE_NULL 0
+#define PEOBJ_TYPE_FUNC 0x20
+
+/* Symbol storage class. */
+#define PEOBJ_SCL_EXTERN 2
+#define PEOBJ_SCL_STATIC 3
+
+/* -- PE object emitter --------------------------------------------------- */
+
+/* Emit PE object symbol. */
+static void emit_peobj_sym(BuildCtx *ctx, const char *name, uint32_t value,
+ int sect, int type, int scl)
+{
+ PEsym sym;
+ size_t len = strlen(name);
+ if (!strtab) { /* Pass 1: only calculate string table length. */
+ if (len > 8) strtabofs += len+1;
+ return;
+ }
+ if (len <= 8) {
+ memcpy(sym.n.name, name, len);
+ memset(sym.n.name+len, 0, 8-len);
+ } else {
+ sym.n.nameref[0] = 0;
+ sym.n.nameref[1] = strtabofs;
+ memcpy(strtab + strtabofs, name, len);
+ strtab[strtabofs+len] = 0;
+ strtabofs += len+1;
+ }
+ sym.value = value;
+ sym.sect = (int16_t)(sect+1); /* 1-based section number. */
+ sym.type = (uint16_t)type;
+ sym.scl = (uint8_t)scl;
+ sym.naux = 0;
+ owrite(ctx, &sym, PEOBJ_SYM_SIZE);
+}
+
+/* Emit PE object section symbol. */
+static void emit_peobj_sym_sect(BuildCtx *ctx, PEsection *pesect, int sect)
+{
+ PEsym sym;
+ PEsymaux aux;
+ if (!strtab) return; /* Pass 1: no output. */
+ memcpy(sym.n.name, pesect[sect].name, 8);
+ sym.value = 0;
+ sym.sect = (int16_t)(sect+1); /* 1-based section number. */
+ sym.type = PEOBJ_TYPE_NULL;
+ sym.scl = PEOBJ_SCL_STATIC;
+ sym.naux = 1;
+ owrite(ctx, &sym, PEOBJ_SYM_SIZE);
+ memset(&aux, 0, sizeof(PEsymaux));
+ aux.size = pesect[sect].size;
+ aux.nreloc = pesect[sect].nreloc;
+ owrite(ctx, &aux, PEOBJ_SYM_SIZE);
+}
+
+#define emit_peobj_sym_func(ctx, name, ofs) \
+ emit_peobj_sym(ctx, name, (uint32_t)(ofs), \
+ PEOBJ_SECT_TEXT, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN)
+#define emit_peobj_sym_rdata(ctx, name, ofs) \
+ emit_peobj_sym(ctx, name, (uint32_t)(ofs), \
+ PEOBJ_SECT_RDATA, PEOBJ_TYPE_NULL, PEOBJ_SCL_EXTERN)
+
+/* Emit Windows PE object file. */
+void emit_peobj(BuildCtx *ctx)
+{
+ PEheader pehdr;
+ PEsection pesect[PEOBJ_NSECTIONS];
+ int nzsym, relocsyms;
+ uint32_t sofs;
+ int i;
+ union { uint8_t b; uint32_t u; } host_endian;
+
+ host_endian.u = 1;
+ if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) {
+ fprintf(stderr, "Error: different byte order for host and target\n");
+ exit(1);
+ }
+
+ sofs = sizeof(PEheader) + PEOBJ_NSECTIONS*sizeof(PEsection);
+
+ /* Fill in PE sections. */
+ memset(&pesect, 0, PEOBJ_NSECTIONS*sizeof(PEsection));
+ memcpy(pesect[PEOBJ_SECT_TEXT].name, ".text", sizeof(".text")-1);
+ pesect[PEOBJ_SECT_TEXT].ofs = sofs;
+ sofs += (pesect[PEOBJ_SECT_TEXT].size = (uint32_t)ctx->codesz);
+ pesect[PEOBJ_SECT_TEXT].relocofs = sofs;
+ sofs += (pesect[PEOBJ_SECT_TEXT].nreloc = (uint16_t)ctx->nreloc) * PEOBJ_RELOC_SIZE;
+ /* Flags: 60 = read+execute, 50 = align16, 20 = code. */
+ pesect[PEOBJ_SECT_TEXT].flags = 0x60500020;
+
+ memcpy(pesect[PEOBJ_SECT_RDATA].name, ".rdata", sizeof(".rdata")-1);
+ pesect[PEOBJ_SECT_RDATA].ofs = sofs;
+ sofs += (pesect[PEOBJ_SECT_RDATA].size = ctx->npc*sizeof(uint16_t));
+ /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
+ pesect[PEOBJ_SECT_RDATA].flags = 0x40300040;
+
+ memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1);
+ pesect[PEOBJ_SECT_RDATA_Z].ofs = sofs;
+ sofs += (pesect[PEOBJ_SECT_RDATA_Z].size = (uint32_t)strlen(ctx->dasm_ident)+1);
+ /* Flags: 40 = read, 30 = align4, 40 = initialized data. */
+ pesect[PEOBJ_SECT_RDATA_Z].flags = 0x40300040;
+
+ /* Fill in PE header. */
+ pehdr.arch = PEOBJ_ARCH_TARGET;
+ pehdr.nsects = PEOBJ_NSECTIONS;
+ pehdr.time = 0; /* Timestamp is optional. */
+ pehdr.symtabofs = sofs;
+ pehdr.opthdrsz = 0;
+ pehdr.flags = 0;
+
+ /* Compute the size of the symbol table:
+ ** @feat.00 + nsections*2
+ ** + asm_start + (nsyms-nzsym) + op_ofs
+ ** + relocsyms
+ */
+ /* Skip _Z syms. */
+ for (nzsym = 0; ctx->sym_ofs[ctx->perm[nzsym]] < 0; nzsym++) ;
+ for (relocsyms = 0; ctx->extnames[relocsyms]; relocsyms++) ;
+ pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+(ctx->nsym-nzsym)+1 + relocsyms;
+
+ /* Write PE object header and all sections. */
+ owrite(ctx, &pehdr, sizeof(PEheader));
+ owrite(ctx, &pesect, sizeof(PEsection)*PEOBJ_NSECTIONS);
+
+ /* Write .text section. */
+ owrite(ctx, ctx->code, ctx->codesz);
+ for (i = 0; i < ctx->nreloc; i++) {
+ PEreloc reloc;
+ reloc.vaddr = (uint32_t)ctx->reloc[i].ofs;
+ reloc.symidx = 1+2+ctx->reloc[i].sym; /* Reloc syms are after .text sym. */
+ reloc.type = ctx->reloc[i].type ? PEOBJ_RELOC_REL32 : PEOBJ_RELOC_DIR32;
+ owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
+ }
+
+ /* Write .rdata section. */
+ for (i = 0; i < ctx->npc; i++) {
+ uint16_t pcofs = (uint16_t)ctx->sym_ofs[i];
+ owrite(ctx, &pcofs, 2);
+ }
+
+ /* Write .rdata$Z section. */
+ owrite(ctx, ctx->dasm_ident, strlen(ctx->dasm_ident)+1);
+
+ /* Write symbol table. */
+ strtab = NULL; /* 1st pass: collect string sizes. */
+ for (;;) {
+ char name[80];
+
+ strtabofs = 4;
+ /* Mark as SafeSEH compliant. */
+ emit_peobj_sym(ctx, "@feat.00", 1,
+ PEOBJ_SECT_ABS, PEOBJ_TYPE_NULL, PEOBJ_SCL_STATIC);
+
+ emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_TEXT);
+ for (i = 0; ctx->extnames[i]; i++) {
+ sprintf(name, PEOBJ_SYM_PREFIX "%s", ctx->extnames[i]);
+ emit_peobj_sym(ctx, name, 0,
+ PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
+ }
+ emit_peobj_sym_func(ctx, PEOBJ_SYM_PREFIX LABEL_ASM_BEGIN, 0);
+ for (i = nzsym; i < ctx->nsym; i++) {
+ int pi = ctx->perm[i];
+ if (pi >= ctx->npc) {
+ sprintf(name, PEOBJ_SYM_PREFIX LABEL_PREFIX "%s",
+ ctx->globnames[pi-ctx->npc]);
+ emit_peobj_sym_func(ctx, name, ctx->sym_ofs[pi]);
+#if LJ_HASJIT
+ } else {
+#else
+ } else if (!(pi == BC_JFORI || pi == BC_JFORL || pi == BC_JITERL ||
+ pi == BC_JLOOP || pi == BC_IFORL || pi == BC_IITERL ||
+ pi == BC_ILOOP)) {
+#endif
+ sprintf(name, PEOBJ_SYM_PREFIX LABEL_PREFIX_BC "%s",
+ bc_names[pi]);
+ emit_peobj_sym_func(ctx, name, ctx->sym_ofs[pi]);
+ }
+ }
+
+ emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_RDATA);
+ emit_peobj_sym_rdata(ctx, PEOBJ_SYM_PREFIX LABEL_OP_OFS, 0);
+
+ emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_RDATA_Z);
+
+ if (strtab)
+ break;
+ /* 2nd pass: alloc strtab, write syms and copy strings. */
+ strtab = (char *)malloc(strtabofs);
+ *(uint32_t *)strtab = strtabofs;
+ }
+
+ /* Write string table. */
+ owrite(ctx, strtab, strtabofs);
+}
+
+#endif
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc
new file mode 100644
index 00000000..add00c9d
--- /dev/null
+++ b/src/buildvm_x86.dasc
@@ -0,0 +1,3592 @@
+|// Low-level VM code for x86 CPUs.
+|// Bytecode interpreter, fast functions and helper functions.
+|// Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+|
+|.arch x86
+|.section code_op, code_sub
+|
+|.actionlist build_actionlist
+|.globals GLOB_
+|.globalnames globnames
+|.externnames extnames
+|
+|//-----------------------------------------------------------------------
+|
+|// Fixed register assignments for the interpreter.
+|// This is very fragile and has many dependencies. Caveat emptor.
+|.define BASE, edx // Not C callee-save, refetched anyway.
+|.define KBASE, edi // Must be C callee-save.
+|.define PC, esi // Must be C callee-save.
+|.define DISPATCH, ebx // Must be C callee-save.
+|
+|.define RA, ecx
+|.define RAL, cl
+|.define RB, ebp // Must be ebp (C callee-save).
+|.define RC, eax // Must be eax (fcomparepp and others).
+|.define RCW, ax
+|.define RCH, ah
+|.define RCL, al
+|.define OP, RB
+|.define RD, RC
+|.define RDL, RCL
+|
+|// Type definitions. Some of these are only used for documentation.
+|.type L, lua_State
+|.type GL, global_State
+|.type TVALUE, TValue
+|.type GCOBJ, GCobj
+|.type STR, GCstr
+|.type TAB, GCtab
+|.type LFUNC, GCfuncL
+|.type CFUNC, GCfuncC
+|.type PROTO, GCproto
+|.type UPVAL, GCupval
+|.type NODE, Node
+|.type NARGS, int
+|.type TRACE, Trace
+|.type EXITINFO, ExitInfo
+|
+|// Stack layout while in interpreter. Must match with lj_frame.h.
+|.macro saveregs
+| push ebp; push edi; push esi; push ebx
+|.endmacro
+|.macro restoreregs
+| pop ebx; pop esi; pop edi; pop ebp
+|.endmacro
+|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
+|
+|.define INARG_4, aword [esp+aword*15]
+|.define INARG_3, aword [esp+aword*14]
+|.define INARG_2, aword [esp+aword*13]
+|.define INARG_1, aword [esp+aword*12]
+|//----- 16 byte aligned, ^^^ arguments from C caller
+|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter.
+|.define SAVE_R4, aword [esp+aword*10]
+|.define SAVE_R3, aword [esp+aword*9]
+|.define SAVE_R2, aword [esp+aword*8]
+|//----- 16 byte aligned
+|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves.
+|.define SAVE_PC, aword [esp+aword*6]
+|.define ARG6, aword [esp+aword*5]
+|.define ARG5, aword [esp+aword*4]
+|//----- 16 byte aligned
+|.define ARG4, aword [esp+aword*3]
+|.define ARG3, aword [esp+aword*2]
+|.define ARG2, aword [esp+aword*1]
+|.define ARG1, aword [esp] //<-- esp while in interpreter.
+|//----- 16 byte aligned, ^^^ arguments for C callee
+|
+|// FPARGx overlaps ARGx and ARG(x+1) on x86.
+|.define FPARG5, qword [esp+qword*2]
+|.define FPARG3, qword [esp+qword*1]
+|.define FPARG1, qword [esp]
+|// NRESULTS overlaps ARG6 (and FPARG5)
+|.define NRESULTS, ARG6
+|
+|// Arguments for vm_call and vm_pcall.
+|.define INARG_P_ERRF, INARG_4 // vm_pcall only.
+|.define INARG_NRES, INARG_3
+|.define INARG_BASE, INARG_2
+|.define SAVE_L, INARG_1
+|
+|.define SAVE_CFRAME, INARG_BASE // Overwrites INARG_BASE!
+|
+|// Arguments for vm_cpcall.
+|.define INARG_CP_UD, INARG_4
+|.define INARG_CP_FUNC, INARG_3
+|.define INARG_CP_CALL, INARG_2
+|
+|//-----------------------------------------------------------------------
+|
+|// Instruction headers.
+|.macro ins_A; .endmacro
+|.macro ins_AD; .endmacro
+|.macro ins_AJ; .endmacro
+|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro
+|.macro ins_AB_; movzx RB, RCH; .endmacro
+|.macro ins_A_C; movzx RC, RCL; .endmacro
+|.macro ins_AND; not RD; .endmacro
+|
+|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
+|.macro ins_NEXT
+| mov RC, [PC]
+| movzx RA, RCH
+| movzx OP, RCL
+| add PC, 4
+| shr RC, 16
+| jmp aword [DISPATCH+OP*4]
+|.endmacro
+|
+|// Instruction footer.
+|.if 1
+| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
+| .define ins_next, ins_NEXT
+| .define ins_next_, ins_NEXT
+|.else
+| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
+| // Affects only certain kinds of benchmarks (and only with -j off).
+| // Around 10%-30% slower on Core2, a lot more slower on P4.
+| .macro ins_next
+| jmp ->ins_next
+| .endmacro
+| .macro ins_next_
+| ->ins_next:
+| ins_NEXT
+| .endmacro
+|.endif
+|
+|//-----------------------------------------------------------------------
+|
+|// Macros to test operand types.
+|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro
+|.macro checknum, reg, target; checktp reg, LJ_TISNUM; ja target; .endmacro
+|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro
+|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro
+|
+|// These operands must be used with movzx.
+|.define PC_OP, byte [PC-4]
+|.define PC_RA, byte [PC-3]
+|.define PC_RB, byte [PC-1]
+|.define PC_RC, byte [PC-2]
+|.define PC_RD, word [PC-2]
+|
+|.macro branchPC, reg
+| lea PC, [PC+reg*4-BCBIAS_J*4]
+|.endmacro
+|
+|// Assumes DISPATCH is relative to GL.
+#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
+#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
+|
+|// Decrement hashed hotcount and trigger trace recorder if zero.
+|.macro hotloop, reg
+| mov reg, PC
+| shr reg, 1
+| and reg, HOTCOUNT_PCMASK
+| sub word [DISPATCH+reg+GG_DISP2HOT], 1
+| jz ->vm_hotloop
+|.endmacro
+|
+|.macro hotcall, reg
+| mov reg, PC
+| shr reg, 1
+| and reg, HOTCOUNT_PCMASK
+| sub word [DISPATCH+reg+GG_DISP2HOT], 1
+| jz ->vm_hotcall
+|.endmacro
+|
+|// Set current VM state.
+|.macro set_vmstate, st
+| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
+|.endmacro
+|
+|// Annoying x87 stuff: support for two compare variants.
+|.macro fcomparepp // Compare and pop st0 >< st1.
+||if (cmov) {
+| fucomip st1
+| fpop
+||} else {
+| fucompp
+| fnstsw ax // eax modified!
+| sahf
+||}
+|.endmacro
+|
+|.macro fdup; fld st0; .endmacro
+|.macro fpop1; fstp st1; .endmacro
+|
+|// Move table write barrier back. Overwrites reg.
+|.macro barrierback, tab, reg
+| and byte tab->marked, cast_byte(~LJ_GC_BLACK) // black2gray(tab)
+| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
+| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
+| mov tab->gclist, reg
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+
+/* Generate subroutines used by opcodes and other parts of the VM. */
+/* The .code_sub section should be last to help static branch prediction. */
+static void build_subroutines(BuildCtx *ctx, int cmov)
+{
+ |.code_sub
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Call and return handling -------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// Reminder: A call gate may be called with func/args above L->maxstack,
+ |// i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
+ |// too. This means all call gates (L*, C and fast functions) must check
+ |// for stack overflow _before_ adding more slots!
+ |
+ |//-- Call gates ---------------------------------------------------------
+ |
+ |->gate_lf: // Call gate for fixarg Lua functions.
+ | // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = old base), PC = return
+ | // DISPATCH initialized
+ | mov BASE, RA
+ | mov PROTO:RB, LFUNC:RB->pt
+ | mov [BASE-4], PC // Store caller PC.
+ | movzx RA, byte PROTO:RB->framesize
+ | mov PC, PROTO:RB->bc
+ | mov KBASE, PROTO:RB->k
+ | mov L:RB, SAVE_L
+ | lea RA, [BASE+RA*8] // Top of frame.
+ | lea RC, [BASE+NARGS:RC*8-4] // Points to tag of 1st free slot.
+ | cmp RA, L:RB->maxstack
+ | ja ->gate_lf_growstack
+ |9: // Entry point from vararg setup below.
+ | mov RB, LJ_TNIL
+ |1: // Clear free slots until top of frame.
+ | mov [RC], RB
+ | mov [RC+8], RB
+ | add RC, 16
+ | cmp RC, RA
+ | jb <1
+#if LJ_HASJIT
+ | // NYI: Disabled, until the tracer supports recursion/upcalls/leaves.
+ | // hotcall RB
+#endif
+ | ins_next
+ |
+ |->gate_lv: // Call gate for vararg Lua functions.
+ | // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = old base), PC = return
+ | // DISPATCH initialized
+ | mov [RA-4], PC // Store caller PC.
+ | lea PC, [NARGS:RC*8+FRAME_VARG]
+ | lea BASE, [RA+PC-FRAME_VARG]
+ | mov [BASE-8], LFUNC:RB // Store copy of LFUNC.
+ | mov PROTO:RB, LFUNC:RB->pt
+ | mov [BASE-4], PC // Store delta + FRAME_VARG.
+ | movzx PC, byte PROTO:RB->framesize
+ | lea KBASE, [BASE+PC*8]
+ | mov L:PC, SAVE_L
+ | lea RC, [BASE+4]
+ | cmp KBASE, L:PC->maxstack
+ | ja ->gate_lv_growstack // Need to grow stack.
+ | movzx PC, byte PROTO:RB->numparams
+ | test PC, PC
+ | jz >2
+ |1: // Copy fixarg slots up.
+ | add RA, 8
+ | cmp RA, BASE
+ | jnb >2
+ | mov KBASE, [RA-8]
+ | mov [RC-4], KBASE
+ | mov KBASE, [RA-4]
+ | mov [RC], KBASE
+ | add RC, 8
+ | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC).
+ | sub PC, 1
+ | jnz <1
+ |2:
+ | movzx RA, byte PROTO:RB->framesize
+ | mov PC, PROTO:RB->bc
+ | mov KBASE, PROTO:RB->k
+ | lea RA, [BASE+RA*8]
+ | jmp <9
+ |
+ |->gate_c: // Call gate for C functions.
+ | // RA = new base, RB = CFUNC, RC = nargs+1, (BASE = old base), PC = return
+ | mov [RA-4], PC
+ | mov KBASE, CFUNC:RB->f
+ | mov L:RB, SAVE_L
+ | lea RC, [RA+NARGS:RC*8-8]
+ | mov L:RB->base, RA
+ | lea RA, [RC+8*LUA_MINSTACK]
+ | mov ARG1, L:RB
+ | mov L:RB->top, RC
+ | cmp RA, L:RB->maxstack
+ | ja ->gate_c_growstack // Need to grow stack.
+ | set_vmstate C
+ | call KBASE // (lua_State *L)
+ | set_vmstate INTERP
+ | // nresults returned in eax (RD).
+ | mov BASE, L:RB->base
+ | lea RA, [BASE+RD*8]
+ | neg RA
+ | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
+ |->vm_returnc:
+ | add RD, 1 // RD = nresults+1
+ | mov NRESULTS, RD
+ | test PC, FRAME_TYPE
+ | jz ->BC_RET_Z // Handle regular return to Lua.
+ |
+ |//-- Return handling (non-inline) ---------------------------------------
+ |
+ |->vm_return:
+ | // BASE = base, RA = resultofs, RD = nresults+1 (= NRESULTS), PC = return
+ | test PC, FRAME_C
+ | jz ->vm_returnp
+ |
+ | // Return to C.
+ | set_vmstate C
+ | and PC, -8
+ | sub PC, BASE
+ | neg PC // Previous base = BASE - delta.
+ |
+ | sub RD, 1
+ | jz >2
+ |1:
+ | mov RB, [BASE+RA] // Move results down.
+ | mov [BASE-8], RB
+ | mov RB, [BASE+RA+4]
+ | mov [BASE-4], RB
+ | add BASE, 8
+ | sub RD, 1
+ | jnz <1
+ |2:
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, PC
+ |3:
+ | mov RD, NRESULTS
+ | mov RA, INARG_NRES // RA = wanted nresults+1
+ |4:
+ | cmp RA, RD
+ | jne >6 // More/less results wanted?
+ |5:
+ | sub BASE, 8
+ | mov L:RB->top, BASE
+ |
+ |->vm_leave_cp:
+ | mov RA, SAVE_CFRAME // Restore previous C frame.
+ | mov L:RB->cframe, RA
+ | xor eax, eax // Ok return status for vm_pcall.
+ |
+ |->vm_leave_unw:
+ | add esp, CFRAME_SPACE
+ | restoreregs
+ | ret
+ |
+ |6:
+ | jb >7 // Less results wanted?
+ | // More results wanted. Check stack size and fill up results with nil.
+ | cmp BASE, L:RB->maxstack
+ | ja >8
+ | mov dword [BASE-4], LJ_TNIL
+ | add BASE, 8
+ | add RD, 1
+ | jmp <4
+ |
+ |7: // Less results wanted.
+ | test RA, RA
+ | jz <5 // But check for LUA_MULTRET+1.
+ | sub RA, RD // Negative result!
+ | lea BASE, [BASE+RA*8] // Correct top.
+ | jmp <5
+ |
+ |8: // Corner case: need to grow stack for filling up results.
+ | // This can happen if:
+ | // - A C function grows the stack (a lot).
+ | // - The GC shrinks the stack in between.
+ | // - A return back from a lua_call() with (high) nresults adjustment.
+ | mov L:RB->top, BASE // Save current top held in BASE (yes).
+ | mov NRESULTS, RD // Need to fill only remainder with nil.
+ | mov ARG2, RA // Grow by wanted nresults+1.
+ | mov ARG1, L:RB
+ | call extern lj_state_growstack // (lua_State *L, int n)
+ | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
+ | jmp <3
+ |
+ |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
+ | // (void *cframe, int errcode)
+ | mov ecx, [esp+4]
+ | mov eax, [esp+8] // Error return status for vm_pcall.
+ | and ecx, CFRAME_RAWMASK
+ | mov esp, ecx
+ | mov L:RB, SAVE_L
+ | mov GL:RB, L:RB->glref
+ | mov dword GL:RB->vmstate, ~LJ_VMST_C
+ | jmp ->vm_leave_unw
+ |
+ |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
+ | mov ecx, [esp+4]
+ | and ecx, CFRAME_RAWMASK
+ | mov esp, ecx
+ | mov L:RB, SAVE_L
+ | mov RA, -8 // Results start at BASE+RA = BASE-8.
+ | mov RD, 1+1 // Really 1+2 results, incr. later.
+ | mov BASE, L:RB->base
+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+ | add DISPATCH, GG_G2DISP
+ | mov PC, [BASE-4] // Fetch PC of previous frame.
+ | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message.
+ | set_vmstate INTERP
+ | jmp ->vm_returnc // Increments RD/NRESULTS and returns.
+ |
+ |->vm_returnp:
+ | test PC, FRAME_P
+ | jz ->cont_dispatch
+ |
+ | // Return from pcall or xpcall fast func.
+ | and PC, -8
+ | sub BASE, PC // Restore caller base.
+ | lea RA, [RA+PC-8] // Rebase RA and prepend one result.
+ | mov PC, [BASE-4] // Fetch PC of previous frame.
+ | // Prepending may overwrite the pcall frame, so do it at the end.
+ | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results.
+ | jmp ->vm_returnc // Increments RD/NRESULTS and returns.
+ |
+ |//-- Grow stack on-demand -----------------------------------------------
+ |
+ |->gate_c_growstack: // Grow stack for C function.
+ | mov ARG2, LUA_MINSTACK
+ | jmp >1
+ |
+ |->gate_lv_growstack: // Grow stack for vararg Lua function.
+ | sub RC, 8
+ | mov BASE, RA
+ | mov RA, KBASE
+ | mov PC, PROTO:RB->bc
+ | mov L:RB, SAVE_L
+ |
+ |->gate_lf_growstack: // Grow stack for fixarg Lua function.
+ | // BASE = new base, RA = requested top, RC = top (offset +4 bytes)
+ | // RB = L, PC = first PC of called function (or anything if C function)
+ | sub RC, 4 // Adjust top.
+ | sub RA, BASE
+ | shr RA, 3 // n = pt->framesize - L->top
+ | add PC, 4 // Must point after first instruction.
+ | mov L:RB->base, BASE
+ | mov L:RB->top, RC
+ | mov SAVE_PC, PC
+ | mov ARG2, RA
+ | mov ARG1, L:RB
+ |1:
+ | // L:RB = L, L->base = new base, L->top = top
+ | // SAVE_PC = initial PC+1 (undefined for C functions)
+ | call extern lj_state_growstack // (lua_State *L, int n)
+ | mov RA, L:RB->base
+ | mov RC, L:RB->top
+ | mov LFUNC:RB, [RA-8]
+ | mov PC, [RA-4]
+ | sub RC, RA
+ | shr RC, 3
+ | add NARGS:RC, 1
+ | // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = invalid), PC restored.
+ | jmp aword LFUNC:RB->gate // Just retry call.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Entry points into the assembler VM ---------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_resume: // Setup C frame and resume thread.
+ | // (lua_State *L, StkId base, int nres1 = 0, ptrdiff_t ef = 0)
+ | saveregs
+ | mov PC, FRAME_C
+ | sub esp, CFRAME_SPACE
+ | xor RD, RD
+ | mov L:RB, SAVE_L
+ | lea KBASE, [esp+CFRAME_RESUME]
+ | mov RA, INARG_BASE
+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+ | add DISPATCH, GG_G2DISP
+ | mov L:RB->cframe, KBASE
+ | mov SAVE_CFRAME, RD // Caveat: overlaps INARG_BASE!
+ | mov SAVE_PC, RD // Any value outside of bytecode is ok.
+ | cmp byte L:RB->status, RDL
+ | je >3 // Initial resume (like a call).
+ |
+ | // Resume after yield (like a return).
+ | set_vmstate INTERP
+ | mov byte L:RB->status, RDL
+ | mov BASE, L:RB->base
+ | mov RD, L:RB->top
+ | sub RD, RA
+ | shr RD, 3
+ | add RD, 1 // RD = nresults+1
+ | sub RA, BASE // RA = resultofs
+ | mov PC, [BASE-4]
+ | mov NRESULTS, RD
+ | test PC, FRAME_TYPE
+ | jz ->BC_RET_Z
+ | jmp ->vm_return
+ |
+ |->vm_pcall: // Setup protected C frame and enter VM.
+ | // (lua_State *L, StkId base, int nres1, ptrdiff_t ef)
+ | saveregs
+ | mov PC, FRAME_CP
+ | jmp >1
+ |
+ |->vm_call: // Setup C frame and enter VM.
+ | // (lua_State *L, StkId base, int nres1)
+ | saveregs
+ | mov PC, FRAME_C
+ |
+ |1: // Entry point for vm_pcall above (PC = ftype).
+ | sub esp, CFRAME_SPACE
+ | mov L:RB, SAVE_L
+ | mov RA, INARG_BASE
+ |
+ |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype).
+ | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
+ | mov SAVE_CFRAME, KBASE // Caveat: overlaps INARG_BASE!
+ | mov SAVE_PC, esp // Any value outside of bytecode is ok.
+ | mov L:RB->cframe, esp
+ |
+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+ | add DISPATCH, GG_G2DISP
+ |
+ |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype).
+ | set_vmstate INTERP
+ | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
+ | add PC, RA
+ | sub PC, BASE // PC = frame delta + frame type
+ |
+ | mov RC, L:RB->top
+ | sub RC, RA
+ | shr NARGS:RC, 3
+ | add NARGS:RC, 1 // RC = nargs+1
+ |
+ | mov LFUNC:RB, [RA-8]
+ | cmp dword [RA-4], LJ_TFUNC
+ | jne ->vmeta_call // Ensure KBASE defined and != BASE.
+ | jmp aword LFUNC:RB->gate
+ | // RA = new base, RB = LFUNC/CFUNC, RC = nargs+1.
+ |
+ |->vm_cpcall: // Setup protected C frame, call C.
+ | // (lua_State *L, lua_CPFunction cp, lua_CFunction func, void *ud)
+ | saveregs
+ | sub esp, CFRAME_SPACE
+ |
+ | mov L:RB, SAVE_L
+ | mov RC, INARG_CP_UD
+ | mov RA, INARG_CP_FUNC
+ | mov BASE, INARG_CP_CALL
+ | mov SAVE_PC, esp // Any value outside of bytecode is ok.
+ |
+ | // Caveat: INARG_P_* and INARG_CP_* overlap!
+ | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
+ | sub KBASE, L:RB->top
+ | mov INARG_P_ERRF, 0 // No error function.
+ | mov INARG_NRES, KBASE // Neg. delta means cframe w/o frame.
+ | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
+ |
+ | mov ARG3, RC
+ | mov ARG2, RA
+ | mov ARG1, L:RB
+ |
+ | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
+ | mov SAVE_CFRAME, KBASE // Caveat: overlaps INARG_CP_CALL!
+ | mov L:RB->cframe, esp
+ |
+ | call BASE // (lua_State *L, lua_CFunction func, void *ud)
+ | // StkId (new base) or NULL returned in eax (RC).
+ | test RC, RC
+ | jz ->vm_leave_cp // No base? Just remove C frame.
+ | mov RA, RC
+ | mov PC, FRAME_CP
+ | jmp <2 // Else continue with the call.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Metamethod handling ------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |//-- Continuation dispatch ----------------------------------------------
+ |
+ |->cont_dispatch:
+ | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in NRESULTS)
+ | add RA, BASE
+ | and PC, -8
+ | mov RB, BASE
+ | sub BASE, PC // Restore caller BASE.
+ | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg.
+ | mov RC, RA // ... in [RC]
+ | mov PC, [RB-12] // Restore PC from [cont|PC].
+ | mov LFUNC:KBASE, [BASE-8]
+ | mov PROTO:KBASE, LFUNC:KBASE->pt
+ | mov KBASE, PROTO:KBASE->k
+ | // BASE = base, RC = result, RB = meta base
+ | jmp dword [RB-16] // Jump to continuation.
+ |
+ |->cont_cat: // BASE = base, RC = result, RB = mbase
+ | movzx RA, PC_RB
+ | sub RB, 16
+ | lea RA, [BASE+RA*8]
+ | sub RA, RB
+ | je ->cont_ra
+ | neg RA
+ | shr RA, 3
+ | mov ARG3, RA
+ | mov RA, [RC+4]
+ | mov RC, [RC]
+ | mov [RB+4], RA
+ | mov [RB], RC
+ | mov ARG2, RB
+ | jmp ->BC_CAT_Z
+ |
+ |//-- Table indexing metamethods -----------------------------------------
+ |
+ |->vmeta_tgets:
+ | mov ARG5, RC // RC = GCstr *
+ | mov ARG6, LJ_TSTR
+ | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6.
+ | cmp PC_OP, BC_GGET
+ | jne >1
+ | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
+ | mov [RA], TAB:RB // RB = GCtab *
+ | mov dword [RA+4], LJ_TTAB
+ | mov RB, RA
+ | jmp >2
+ |
+ |->vmeta_tgetb:
+ | movzx RC, PC_RC // Ugly, cannot fild from a byte.
+ | mov ARG4, RC
+ | fild ARG4
+ | fstp FPARG5
+ | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6.
+ | jmp >1
+ |
+ |->vmeta_tgetv:
+ | movzx RC, PC_RC // Reload TValue *k from RC.
+ | lea RC, [BASE+RC*8]
+ |1:
+ | movzx RB, PC_RB // Reload TValue *t from RB.
+ | lea RB, [BASE+RB*8]
+ |2:
+ | mov ARG2, RB
+ | mov L:RB, SAVE_L
+ | mov ARG3, RC
+ | mov ARG1, L:RB
+ | mov SAVE_PC, PC
+ | mov L:RB->base, BASE
+ | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
+ | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
+ | mov BASE, L:RB->base
+ | test RC, RC
+ | jz >3
+ |->cont_ra: // BASE = base, RC = result
+ | movzx RA, PC_RA
+ | mov RB, [RC+4]
+ | mov RC, [RC]
+ | mov [BASE+RA*8+4], RB
+ | mov [BASE+RA*8], RC
+ | ins_next
+ |
+ |3: // Call __index metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k
+ | mov RA, L:RB->top
+ | mov [RA-12], PC // [cont|PC]
+ | lea PC, [RA+FRAME_CONT]
+ | sub PC, BASE
+ | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
+ | mov NARGS:RC, 3 // 2+1 args for func(t, k).
+ | jmp aword LFUNC:RB->gate
+ |
+ |//-----------------------------------------------------------------------
+ |
+ |->vmeta_tsets:
+ | mov ARG5, RC // RC = GCstr *
+ | mov ARG6, LJ_TSTR
+ | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6.
+ | cmp PC_OP, BC_GSET
+ | jne >1
+ | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
+ | mov [RA], TAB:RB // RB = GCtab *
+ | mov dword [RA+4], LJ_TTAB
+ | mov RB, RA
+ | jmp >2
+ |
+ |->vmeta_tsetb:
+ | movzx RC, PC_RC // Ugly, cannot fild from a byte.
+ | mov ARG4, RC
+ | fild ARG4
+ | fstp FPARG5
+ | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6.
+ | jmp >1
+ |
+ |->vmeta_tsetv:
+ | movzx RC, PC_RC // Reload TValue *k from RC.
+ | lea RC, [BASE+RC*8]
+ |1:
+ | movzx RB, PC_RB // Reload TValue *t from RB.
+ | lea RB, [BASE+RB*8]
+ |2:
+ | mov ARG2, RB
+ | mov L:RB, SAVE_L
+ | mov ARG3, RC
+ | mov ARG1, L:RB
+ | mov SAVE_PC, PC
+ | mov L:RB->base, BASE
+ | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
+ | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
+ | mov BASE, L:RB->base
+ | test RC, RC
+ | jz >3
+ | // NOBARRIER: lj_meta_tset ensures the table is not black.
+ | movzx RA, PC_RA
+ | mov RB, [BASE+RA*8+4]
+ | mov RA, [BASE+RA*8]
+ | mov [RC+4], RB
+ | mov [RC], RA
+ |->cont_nop: // BASE = base, (RC = result)
+ | ins_next
+ |
+ |3: // Call __newindex metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
+ | mov RA, L:RB->top
+ | mov [RA-12], PC // [cont|PC]
+ | movzx RC, PC_RA
+ | mov RB, [BASE+RC*8+4] // Copy value to third argument.
+ | mov RC, [BASE+RC*8]
+ | mov [RA+20], RB
+ | mov [RA+16], RC
+ | lea PC, [RA+FRAME_CONT]
+ | sub PC, BASE
+ | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
+ | mov NARGS:RC, 4 // 3+1 args for func(t, k, v).
+ | jmp aword LFUNC:RB->gate
+ |
+ |//-- Comparison metamethods ---------------------------------------------
+ |
+ |->vmeta_comp:
+ | movzx RB, PC_OP
+ | lea RD, [BASE+RD*8]
+ | lea RA, [BASE+RA*8]
+ | mov ARG4, RB
+ | mov L:RB, SAVE_L
+ | mov ARG3, RD
+ | mov ARG2, RA
+ | mov ARG1, L:RB
+ | mov SAVE_PC, PC
+ | mov L:RB->base, BASE
+ | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
+ | // 0/1 or TValue * (metamethod) returned in eax (RC).
+ |3:
+ | mov BASE, L:RB->base
+ | cmp RC, 1
+ | ja ->vmeta_binop
+ |4:
+ | lea PC, [PC+4]
+ | jb >6
+ |5:
+ | movzx RD, PC_RD
+ | branchPC RD
+ |6:
+ | ins_next
+ |
+ |->cont_condt: // BASE = base, RC = result
+ | add PC, 4
+ | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true.
+ | jb <5
+ | jmp <6
+ |
+ |->cont_condf: // BASE = base, RC = result
+ | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false.
+ | jmp <4
+ |
+ |->vmeta_equal:
+ | mov ARG4, RB
+ | mov L:RB, SAVE_L
+ | sub PC, 4
+ | mov ARG3, RD
+ | mov ARG2, RA
+ | mov ARG1, L:RB
+ | mov SAVE_PC, PC
+ | mov L:RB->base, BASE
+ | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
+ | // 0/1 or TValue * (metamethod) returned in eax (RC).
+ | jmp <3
+ |
+ |//-- Arithmetic metamethods ---------------------------------------------
+ |
+ |->vmeta_arith_vn:
+ | lea RC, [KBASE+RC*8]
+ | jmp >1
+ |
+ |->vmeta_arith_nv:
+ | lea RC, [KBASE+RC*8]
+ | lea RB, [BASE+RB*8]
+ | xchg RB, RC
+ | jmp >2
+ |
+ |->vmeta_unm:
+ | lea RC, [BASE+RD*8]
+ | mov RB, RC
+ | jmp >2
+ |
+ |->vmeta_arith_vv:
+ | lea RC, [BASE+RC*8]
+ |1:
+ | lea RB, [BASE+RB*8]
+ |2:
+ | lea RA, [BASE+RA*8]
+ | mov ARG3, RB
+ | mov L:RB, SAVE_L
+ | mov ARG4, RC
+ | movzx RC, PC_OP
+ | mov ARG2, RA
+ | mov ARG5, RC
+ | mov ARG1, L:RB
+ | mov SAVE_PC, PC
+ | mov L:RB->base, BASE
+ | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
+ | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
+ | mov BASE, L:RB->base
+ | test RC, RC
+ | jz ->cont_nop
+ |
+ | // Call metamethod for binary op.
+ |->vmeta_binop:
+ | // BASE = base, RC = new base, stack = cont/func/o1/o2
+ | mov RA, RC
+ | sub RC, BASE
+ | mov [RA-12], PC // [cont|PC]
+ | lea PC, [RC+FRAME_CONT]
+ | mov LFUNC:RB, [RA-8]
+ | mov NARGS:RC, 3 // 2+1 args for func(o1, o2).
+ | cmp dword [RA-4], LJ_TFUNC
+ | jne ->vmeta_call
+ | jmp aword LFUNC:RB->gate
+ |
+ |->vmeta_len:
+ | lea RD, [BASE+RD*8]
+ | mov L:RB, SAVE_L
+ | mov ARG2, RD
+ | mov ARG1, L:RB
+ | mov SAVE_PC, PC
+ | mov L:RB->base, BASE
+ | call extern lj_meta_len // (lua_State *L, TValue *o)
+ | // TValue * (metamethod) returned in eax (RC).
+ | mov BASE, L:RB->base
+ | jmp ->vmeta_binop // Binop call for compatibility.
+ |
+ |//-- Call metamethod ----------------------------------------------------
+ |
+ |->vmeta_call: // Resolve and call __call metamethod.
+ | // RA = new base, RC = nargs+1, BASE = old base, PC = return
+ | mov ARG4, RA // Save RA, RC for us.
+ | mov ARG5, NARGS:RC
+ | sub RA, 8
+ | lea RC, [RA+NARGS:RC*8]
+ | mov L:RB, SAVE_L
+ | mov ARG2, RA
+ | mov ARG3, RC
+ | mov ARG1, L:RB
+ | mov SAVE_PC, PC
+ | mov L:RB->base, BASE // This is the callers base!
+ | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
+ | mov BASE, L:RB->base
+ | mov RA, ARG4
+ | mov NARGS:RC, ARG5
+ | mov LFUNC:RB, [RA-8]
+ | add NARGS:RC, 1
+ | // This is fragile. L->base must not move, KBASE must always be defined.
+ | cmp KBASE, BASE // Continue with CALLT if flag set.
+ | je ->BC_CALLT_Z
+ | jmp aword LFUNC:RB->gate // Otherwise call resolved metamethod.
+ |
+ |//-- Argument coercion for 'for' statement ------------------------------
+ |
+ |->vmeta_for:
+ | mov L:RB, SAVE_L
+ | mov ARG2, RA
+ | mov ARG1, L:RB
+ | mov SAVE_PC, PC
+ | mov L:RB->base, BASE
+ | call extern lj_meta_for // (lua_State *L, StkId base)
+ | mov BASE, L:RB->base
+ | mov RC, [PC-4]
+ | movzx RA, RCH
+ | movzx OP, RCL
+ | shr RC, 16
+ | jmp aword [DISPATCH+OP*4+GG_DISP_STATIC*4] // Retry FORI or JFORI.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Fast functions -----------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |.macro .ffunc, name
+ |->ff_ .. name:
+ |.endmacro
+ |
+ |.macro .ffunc_1, name
+ |->ff_ .. name:
+ | cmp NARGS:RC, 1+1; jb ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_2, name
+ |->ff_ .. name:
+ | cmp NARGS:RC, 2+1; jb ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_n, name
+ | .ffunc_1 name
+ | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
+ | fld qword [RA]
+ |.endmacro
+ |
+ |.macro .ffunc_n, name, op
+ | .ffunc_1 name
+ | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
+ | op
+ | fld qword [RA]
+ |.endmacro
+ |
+ |.macro .ffunc_nn, name
+ | .ffunc_2 name
+ | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
+ | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
+ | fld qword [RA]
+ | fld qword [RA+8]
+ |.endmacro
+ |
+ |.macro .ffunc_nnr, name
+ | .ffunc_2 name
+ | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
+ | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
+ | fld qword [RA+8]
+ | fld qword [RA]
+ |.endmacro
+ |
+ |// Inlined GC threshold check. Caveat: uses label 1.
+ |.macro ffgccheck
+ | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
+ | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
+ | jb >1
+ | call ->fff_gcstep
+ |1:
+ |.endmacro
+ |
+ |//-- Base library: checks -----------------------------------------------
+ |
+ |.ffunc_1 assert
+ | mov RB, [RA+4]
+ | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback
+ | mov NRESULTS, RD
+ | mov [RA-4], RB
+ | mov RB, [RA]
+ | mov [RA-8], RB
+ | sub RD, 2
+ | jz >2
+ | mov ARG1, RA
+ |1:
+ | add RA, 8
+ | mov RB, [RA+4]
+ | mov [RA-4], RB
+ | mov RB, [RA]
+ | mov [RA-8], RB
+ | sub RD, 1
+ | jnz <1
+ | mov RA, ARG1
+ |2:
+ | mov RD, NRESULTS
+ | jmp ->fff_res_
+ |
+ |.ffunc_1 type
+ | mov RB, [RA+4]
+ | mov RC, ~LJ_TNUMX
+ | not RB
+ | cmp RC, RB
+ ||if (cmov) {
+ | cmova RC, RB
+ ||} else {
+ | jbe >1; mov RC, RB; 1:
+ ||}
+ | mov CFUNC:RB, [RA-8]
+ | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
+ | mov dword [RA-4], LJ_TSTR
+ | mov [RA-8], STR:RC
+ | jmp ->fff_res1
+ |
+ |//-- Base library: getters and setters ---------------------------------
+ |
+ |.ffunc_1 getmetatable
+ | mov RB, [RA+4]
+ | cmp RB, LJ_TTAB; jne >6
+ |1: // Field metatable must be at same offset for GCtab and GCudata!
+ | mov TAB:RB, [RA]
+ | mov TAB:RB, TAB:RB->metatable
+ |2:
+ | test TAB:RB, TAB:RB
+ | mov dword [RA-4], LJ_TNIL
+ | jz ->fff_res1
+ | mov CFUNC:RC, [RA-8]
+ | mov STR:RC, [DISPATCH+DISPATCH_GL(mmname)+4*MM_metatable]
+ | mov dword [RA-4], LJ_TTAB // Store metatable as default result.
+ | mov [RA-8], TAB:RB
+ | mov ARG1, RA // Save result pointer.
+ | mov RA, TAB:RB->hmask
+ | and RA, STR:RC->hash
+ | imul RA, #NODE
+ | add NODE:RA, TAB:RB->node
+ |3: // Rearranged logic, because we expect _not_ to find the key.
+ | cmp dword NODE:RA->key.it, LJ_TSTR
+ | jne >4
+ | cmp dword NODE:RA->key.gcr, STR:RC
+ | je >5
+ |4:
+ | mov NODE:RA, NODE:RA->next
+ | test NODE:RA, NODE:RA
+ | jnz <3
+ | jmp ->fff_res1 // Not found, keep default result.
+ |5:
+ | mov RB, [RA+4]
+ | cmp RB, LJ_TNIL; je ->fff_res1 // Dito for nil value.
+ | mov RC, [RA]
+ | mov RA, ARG1 // Restore result pointer.
+ | mov [RA-4], RB // Return value of mt.__metatable.
+ | mov [RA-8], RC
+ | jmp ->fff_res1
+ |
+ |6:
+ | cmp RB, LJ_TUDATA; je <1
+ | cmp RB, LJ_TISNUM; ja >7
+ | mov RB, LJ_TNUMX
+ |7:
+ | not RB
+ | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(basemt)]
+ | jmp <2
+ |
+ |.ffunc_2 setmetatable
+ | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
+ | // Fast path: no mt for table yet and not clearing the mt.
+ | mov TAB:RB, [RA]
+ | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
+ | cmp dword [RA+12], LJ_TTAB; jne ->fff_fallback
+ | mov TAB:RC, [RA+8]
+ | mov TAB:RB->metatable, TAB:RC
+ | mov dword [RA-4], LJ_TTAB // Return original table.
+ | mov [RA-8], TAB:RB
+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jz >1
+ | // Possible write barrier. Table is black, but skip iswhite(mt) check.
+ | barrierback TAB:RB, RC
+ |1:
+ | jmp ->fff_res1
+ |
+ |.ffunc_2 rawget
+ | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
+ | mov TAB:RC, [RA]
+ | mov L:RB, SAVE_L
+ | mov ARG2, TAB:RC
+ | mov ARG1, L:RB
+ | mov RB, RA
+ | mov ARG4, BASE // Save BASE and RA.
+ | add RA, 8
+ | mov ARG3, RA
+ | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
+ | // cTValue * returned in eax (RC).
+ | mov RA, RB
+ | mov BASE, ARG4
+ | mov RB, [RC] // Copy table slot.
+ | mov RC, [RC+4]
+ | mov [RA-8], RB
+ | mov [RA-4], RC
+ | jmp ->fff_res1
+ |
+ |//-- Base library: conversions ------------------------------------------
+ |
+ |.ffunc tonumber
+ | // Only handles the number case inline (without a base argument).
+ | cmp NARGS:RC, 1+1; jne ->fff_fallback // Exactly one argument.
+ | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
+ | fld qword [RA]
+ | jmp ->fff_resn
+ |
+ |.ffunc_1 tostring
+ | // Only handles the string or number case inline.
+ | cmp dword [RA+4], LJ_TSTR; jne >3
+ | // A __tostring method in the string base metatable is ignored.
+ | mov STR:RC, [RA]
+ |2:
+ | mov dword [RA-4], LJ_TSTR
+ | mov [RA-8], STR:RC
+ | jmp ->fff_res1
+ |3: // Handle numbers inline, unless a number base metatable is present.
+ | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
+ | cmp dword [DISPATCH+DISPATCH_GL(basemt)+4*(~LJ_TNUMX)], 0
+ | jne ->fff_fallback
+ | ffgccheck // Caveat: uses label 1.
+ | mov L:RB, SAVE_L
+ | mov ARG1, L:RB
+ | mov ARG2, RA
+ | mov L:RB->base, RA // Add frame since C call can throw.
+ | mov [RA-4], PC
+ | mov SAVE_PC, PC // Redundant (but a defined value).
+ | mov ARG3, BASE // Save BASE.
+ | call extern lj_str_fromnum // (lua_State *L, lua_Number *np)
+ | // GCstr returned in eax (RC).
+ | mov RA, L:RB->base
+ | mov BASE, ARG3
+ | jmp <2
+ |
+ |//-- Base library: iterators -------------------------------------------
+ |
+ |.ffunc_1 next
+ | je >2 // Missing 2nd arg?
+ |1:
+ | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
+ | mov TAB:RB, [RA]
+ | mov ARG2, TAB:RB
+ | mov L:RB, SAVE_L
+ | mov ARG1, L:RB
+ | mov L:RB->base, RA // Add frame since C call can throw.
+ | mov [RA-4], PC
+ | mov SAVE_PC, PC // Redundant (but a defined value).
+ | mov ARG4, BASE // Save BASE.
+ | add RA, 8
+ | mov ARG3, RA
+ | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
+ | // Flag returned in eax (RC).
+ | mov RA, L:RB->base
+ | mov BASE, ARG4
+ | test RC, RC; jz >3 // End of traversal?
+ | mov RB, [RA+8] // Copy key and value to results.
+ | mov RC, [RA+12]
+ | mov [RA-8], RB
+ | mov [RA-4], RC
+ | mov RB, [RA+16]
+ | mov RC, [RA+20]
+ | mov [RA], RB
+ | mov [RA+4], RC
+ |->fff_res2:
+ | mov RD, 1+2
+ | jmp ->fff_res
+ |2: // Set missing 2nd arg to nil.
+ | mov dword [RA+12], LJ_TNIL
+ | jmp <1
+ |3: // End of traversal: return nil.
+ | mov dword [RA-4], LJ_TNIL
+ | jmp ->fff_res1
+ |
+ |.ffunc_1 pairs
+ | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
+ | mov CFUNC:RC, CFUNC:RB->upvalue[0]
+ | mov dword [RA-4], LJ_TFUNC
+ | mov [RA-8], CFUNC:RC
+ | mov dword [RA+12], LJ_TNIL
+ | mov RD, 1+3
+ | jmp ->fff_res
+ |
+ |.ffunc_1 ipairs_aux
+ | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
+ | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
+ | fld qword [RA+8]
+ | fld1
+ | faddp st1
+ | fist ARG2
+ | fstp qword [RA-8]
+ | mov TAB:RB, [RA]
+ | mov RC, ARG2
+ | cmp RC, TAB:RB->asize; jae >2 // Not in array part?
+ | shl RC, 3
+ | add RC, TAB:RB->array
+ |1:
+ | cmp dword [RC+4], LJ_TNIL; je ->fff_res0
+ | mov RB, [RC] // Copy array slot.
+ | mov RC, [RC+4]
+ | mov [RA], RB
+ | mov [RA+4], RC
+ | jmp ->fff_res2
+ |2: // Check for empty hash part first. Otherwise call C function.
+ | cmp dword TAB:RB->hmask, 0; je ->fff_res0
+ | mov ARG1, TAB:RB
+ | mov ARG3, BASE // Save BASE and RA.
+ | mov RB, RA
+ | call extern lj_tab_getinth // (GCtab *t, int32_t key)
+ | // cTValue * or NULL returned in eax (RC).
+ | mov RA, RB
+ | mov BASE, ARG3
+ | test RC, RC
+ | jnz <1
+ |->fff_res0:
+ | mov RD, 1+0
+ | jmp ->fff_res
+ |
+ |.ffunc_1 ipairs
+ | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
+ | mov CFUNC:RC, CFUNC:RB->upvalue[0]
+ | mov dword [RA-4], LJ_TFUNC
+ | mov [RA-8], CFUNC:RC
+ | fldz
+ | fstp qword [RA+8]
+ | mov RD, 1+3
+ | jmp ->fff_res
+ |
+ |//-- Base library: catch errors ----------------------------------------
+ |
+ |.ffunc_1 pcall
+ | mov [RA-4], PC
+ | mov PC, 8+FRAME_PCALL
+ | mov BASE, RA
+ | add RA, 8
+ | sub NARGS:RC, 1
+ | mov LFUNC:RB, [RA-8]
+ |1:
+ | test byte [DISPATCH+DISPATCH_GL(hookmask)], HOOK_ACTIVE
+ | jnz >3 // Hook active before pcall?
+ |2:
+ | cmp dword [RA-4], LJ_TFUNC
+ | jne ->vmeta_call // Ensure KBASE defined and != BASE.
+ | jmp aword LFUNC:RB->gate
+ |3:
+ | add PC, 1 // Use FRAME_PCALLH if hook was active.
+ | jmp <2
+ |
+ |.ffunc_2 xpcall
+ | cmp dword [RA+12], LJ_TFUNC; jne ->fff_fallback
+ | mov [RA-4], PC
+ | mov RB, [RA+4] // Swap function and traceback.
+ | mov [RA+12], RB
+ | mov dword [RA+4], LJ_TFUNC
+ | mov LFUNC:RB, [RA]
+ | mov PC, [RA+8]
+ | mov [RA+8], LFUNC:RB
+ | mov [RA], PC
+ | mov PC, 2*8+FRAME_PCALL
+ | mov BASE, RA
+ | add RA, 2*8
+ | sub NARGS:RC, 2
+ | jmp <1
+ |
+ |//-- Coroutine library --------------------------------------------------
+ |
+ |.macro coroutine_resume_wrap, resume
+ |9: // Need to restore PC for fallback handler.
+ | mov PC, SAVE_PC
+ | jmp ->fff_fallback
+ |
+ |.if resume
+ |.ffunc_1 coroutine_resume
+ | mov L:RB, [RA]
+ |.else
+ |.ffunc coroutine_wrap_aux
+ | mov L:RB, CFUNC:RB->upvalue[0].gcr
+ |.endif
+ | mov [RA-4], PC
+ | mov SAVE_PC, PC
+ | mov ARG1, L:RB
+ |.if resume
+ | cmp dword [RA+4], LJ_TTHREAD; jne <9
+ |.endif
+ | cmp aword L:RB->cframe, 0; jne <9
+ | cmp byte L:RB->status, LUA_YIELD; ja <9
+ | mov PC, L:RB->top
+ | mov ARG2, PC
+ | je >1 // Status != LUA_YIELD (i.e. 0)?
+ | cmp PC, L:RB->base; je <9 // Check for presence of initial func.
+ |1:
+ |.if resume
+ | lea PC, [PC+NARGS:RC*8-16] // Check stack space (-1-thread).
+ |.else
+ | lea PC, [PC+NARGS:RC*8-8] // Check stack space (-1).
+ |.endif
+ | cmp PC, L:RB->maxstack; ja <9
+ | mov L:RB->top, PC
+ |
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, RA
+ |.if resume
+ | add RA, 8 // Keep resumed thread in stack for GC.
+ |.endif
+ | mov L:RB->top, RA
+ | mov RB, ARG2
+ |.if resume
+ | lea RA, [RA+NARGS:RC*8-24] // RA = end of source for stack move.
+ |.else
+ | lea RA, [RA+NARGS:RC*8-16] // RA = end of source for stack move.
+ |.endif
+ | sub RA, PC // Relative to PC.
+ |
+ | cmp PC, RB
+ | je >3
+ |2: // Move args to coroutine.
+ | mov RC, [PC+RA+4]
+ | mov [PC-4], RC
+ | mov RC, [PC+RA]
+ | mov [PC-8], RC
+ | sub PC, 8
+ | cmp PC, RB
+ | jne <2
+ |3:
+ | xor RA, RA
+ | mov ARG4, RA
+ | mov ARG3, RA
+ | call ->vm_resume // (lua_State *L, StkId base, 0, 0)
+ | set_vmstate INTERP
+ |
+ | mov L:RB, SAVE_L
+ | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
+ | mov BASE, L:RB->base
+ | cmp eax, LUA_YIELD
+ | ja >8
+ |4:
+ | mov RA, L:PC->base
+ | mov KBASE, L:PC->top
+ | mov L:PC->top, RA // Clear coroutine stack.
+ | mov PC, KBASE
+ | sub PC, RA
+ | je >6 // No results?
+ | lea RD, [BASE+PC]
+ | shr PC, 3
+ | cmp RD, L:RB->maxstack
+ | ja >9 // Need to grow stack?
+ |
+ | mov RB, BASE
+ | sub RB, RA
+ |5: // Move results from coroutine.
+ | mov RD, [RA]
+ | mov [RA+RB], RD
+ | mov RD, [RA+4]
+ | mov [RA+RB+4], RD
+ | add RA, 8
+ | cmp RA, KBASE
+ | jne <5
+ |6:
+ |.if resume
+ | lea RD, [PC+2] // nresults+1 = 1 + true + results.
+ | mov dword [BASE-4], LJ_TTRUE // Prepend true to results.
+ |.else
+ | lea RD, [PC+1] // nresults+1 = 1 + results.
+ |.endif
+ |7:
+ | mov PC, SAVE_PC
+ | mov NRESULTS, RD
+ |.if resume
+ | mov RA, -8
+ |.else
+ | xor RA, RA
+ |.endif
+ | test PC, FRAME_TYPE
+ | jz ->BC_RET_Z
+ | jmp ->vm_return
+ |
+ |8: // Coroutine returned with error (at co->top-1).
+ |.if resume
+ | mov dword [BASE-4], LJ_TFALSE // Prepend false to results.
+ | mov RA, L:PC->top
+ | sub RA, 8
+ | mov L:PC->top, RA // Clear error from coroutine stack.
+ | mov RD, [RA] // Copy error message.
+ | mov [BASE], RD
+ | mov RD, [RA+4]
+ | mov [BASE+4], RD
+ | mov RD, 1+2 // nresults+1 = 1 + false + error.
+ | jmp <7
+ |.else
+ | mov ARG2, L:PC
+ | mov ARG1, L:RB
+ | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
+ | // Error function does not return.
+ |.endif
+ |
+ |9: // Handle stack expansion on return from yield.
+ | mov L:RA, ARG1 // The callee doesn't modify SAVE_L.
+ | mov L:RA->top, KBASE // Undo coroutine stack clearing.
+ | mov ARG2, PC
+ | mov ARG1, L:RB
+ | call extern lj_state_growstack // (lua_State *L, int n)
+ | mov BASE, L:RB->base
+ | jmp <4 // Retry the stack move.
+ |.endmacro
+ |
+ | coroutine_resume_wrap 1 // coroutine.resume
+ | coroutine_resume_wrap 0 // coroutine.wrap
+ |
+ |.ffunc coroutine_yield
+ | mov L:RB, SAVE_L
+ | mov [RA-4], PC
+ | test aword L:RB->cframe, CFRAME_CANYIELD
+ | jz ->fff_fallback
+ | mov L:RB->base, RA
+ | lea RC, [RA+NARGS:RC*8-8]
+ | mov L:RB->top, RC
+ | xor eax, eax
+ | mov aword L:RB->cframe, eax
+ | mov al, LUA_YIELD
+ | mov byte L:RB->status, al
+ | jmp ->vm_leave_unw
+ |
+ |//-- Math library -------------------------------------------------------
+ |
+ |.ffunc_n math_abs
+ | fabs
+ | // fallthrough
+ |->fff_resn:
+ | fstp qword [RA-8]
+ |->fff_res1:
+ | mov RD, 1+1
+ |->fff_res:
+ | mov NRESULTS, RD
+ |->fff_res_:
+ | test PC, FRAME_TYPE
+ | jnz >7
+ |5:
+ | cmp PC_RB, RDL // More results expected?
+ | ja >6
+ | // BASE and KBASE are assumed to be set for the calling frame.
+ | ins_next
+ |
+ |6: // Fill up results with nil.
+ | mov dword [RA+RD*8-12], LJ_TNIL
+ | add RD, 1
+ | jmp <5
+ |
+ |7: // Non-standard return case.
+ | mov BASE, RA
+ | mov RA, -8 // Results start at BASE+RA = BASE-8.
+ | jmp ->vm_return
+ |
+ |.ffunc_n math_floor; call ->vm_floor; jmp ->fff_resn
+ |.ffunc_n math_ceil; call ->vm_ceil; jmp ->fff_resn
+ |
+ |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
+ |
+ |.ffunc_n math_log, fldln2; fyl2x; jmp ->fff_resn
+ |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn
+ |.ffunc_n math_exp; call ->vm_exp; jmp ->fff_resn
+ |
+ |.ffunc_n math_sin; fsin; jmp ->fff_resn
+ |.ffunc_n math_cos; fcos; jmp ->fff_resn
+ |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn
+ |
+ |.ffunc_n math_asin
+ | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan
+ | jmp ->fff_resn
+ |.ffunc_n math_acos
+ | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan
+ | jmp ->fff_resn
+ |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
+ |
+ |.macro math_extern, func
+ |.ffunc_n math_ .. func
+ | mov ARG5, RA
+ | fstp FPARG1
+ | mov RB, BASE
+ | call extern func
+ | mov RA, ARG5
+ | mov BASE, RB
+ | jmp ->fff_resn
+ |.endmacro
+ |
+ | math_extern sinh
+ | math_extern cosh
+ | math_extern tanh
+ |
+ |->ff_math_deg:
+ |.ffunc_n math_rad; fmul qword CFUNC:RB->upvalue[0]; jmp ->fff_resn
+ |
+ |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
+ |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
+ |
+ |.ffunc_1 math_frexp
+ | mov RB, [RA+4]
+ | cmp RB, LJ_TISNUM; ja ->fff_fallback
+ | mov RC, [RA]
+ | mov [RA-4], RB; mov [RA-8], RC
+ | shl RB, 1; cmp RB, 0xffe00000; jae >3
+ | or RC, RB; jz >3
+ | mov RC, 1022
+ | cmp RB, 0x00200000; jb >4
+ |1:
+ | shr RB, 21; sub RB, RC // Extract and unbias exponent.
+ | mov ARG1, RB; fild ARG1
+ | mov RB, [RA-4]
+ | and RB, 0x800fffff // Mask off exponent.
+ | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
+ | mov [RA-4], RB
+ |2:
+ | fstp qword [RA]
+ | mov RD, 1+2
+ | jmp ->fff_res
+ |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
+ | fldz; jmp <2
+ |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
+ | fld qword [RA]
+ | mov ARG1, 0x5a800000; fmul ARG1 // x = x*2^54
+ | fstp qword [RA-8]
+ | mov RB, [RA-4]; mov RC, 1076; shl RB, 1; jmp <1
+ |
+ |.ffunc_n math_modf
+ | mov RB, [RA+4]
+ | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
+ | fdup
+ | call ->vm_trunc
+ | fsub st1, st0
+ |1:
+ | fstp qword [RA-8]; fstp qword [RA]
+ | mov RC, [RA-4]; mov RB, [RA+4]
+ | xor RC, RB; js >3 // Need to adjust sign?
+ |2:
+ | mov RD, 1+2
+ | jmp ->fff_res
+ |3:
+ | xor RB, 0x80000000; mov [RA+4], RB; jmp <2 // Flip sign of fraction.
+ |4:
+ | fldz; fxch; jmp <1 // Return +-Inf and +-0.
+ |
+ |.ffunc_nnr math_fmod
+ |1: ; fprem; fnstsw ax; sahf; jp <1
+ | fpop1
+ | jmp ->fff_resn
+ |
+ |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
+ |
+ |.macro math_minmax, name, cmovop, nocmovop
+ |.ffunc_n name
+ | mov RB, 2
+ |1:
+ | cmp RB, RD; jae ->fff_resn
+ | cmp dword [RA+RB*8-4], LJ_TISNUM; ja >5
+ | fld qword [RA+RB*8-8]
+ ||if (cmov) {
+ | fucomi st1; cmovop st1; fpop1
+ ||} else {
+ | push eax
+ | fucom st1; fnstsw ax; test ah, 1; nocmovop >2; fxch; 2: ; fpop
+ | pop eax
+ ||}
+ | add RB, 1
+ | jmp <1
+ |.endmacro
+ |
+ | math_minmax math_min, fcmovnbe, jz
+ | math_minmax math_max, fcmovbe, jnz
+ |5:
+ | fpop; jmp ->fff_fallback
+ |
+ |//-- String library -----------------------------------------------------
+ |
+ |.ffunc_1 string_len
+ | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
+ | mov STR:RB, [RA]
+ | fild dword STR:RB->len
+ | jmp ->fff_resn
+ |
+ |.ffunc string_byte // Only handle the 1-arg case here.
+ | cmp NARGS:RC, 1+1; jne ->fff_fallback
+ | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
+ | mov STR:RB, [RA]
+ | cmp dword STR:RB->len, 1
+ | jb ->fff_res0 // Return no results for empty string.
+ | movzx RB, byte STR:RB[1]
+ | mov ARG1, RB
+ | fild ARG1
+ | jmp ->fff_resn
+ |
+ |.ffunc string_char // Only handle the 1-arg case here.
+ | ffgccheck
+ | cmp NARGS:RC, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
+ | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
+ | fld qword [RA]
+ | fistp ARG4
+ | cmp ARG4, 255; ja ->fff_fallback
+ | lea RC, ARG4 // Little-endian.
+ | mov ARG5, RA // Save RA.
+ | mov ARG3, 1
+ | mov ARG2, RC
+ |->fff_newstr:
+ | mov L:RB, SAVE_L
+ | mov ARG1, L:RB
+ | mov SAVE_PC, PC
+ | mov L:RB->base, BASE
+ | call extern lj_str_new // (lua_State *L, char *str, size_t l)
+ | // GCstr * returned in eax (RC).
+ | mov RA, ARG5
+ | mov BASE, L:RB->base
+ | mov dword [RA-4], LJ_TSTR
+ | mov [RA-8], STR:RC
+ | jmp ->fff_res1
+ |
+ |.ffunc string_sub
+ | ffgccheck
+ | mov ARG5, RA // Save RA.
+ | mov ARG4, -1
+ | cmp NARGS:RC, 1+2; jb ->fff_fallback
+ | jna >1
+ | cmp dword [RA+20], LJ_TISNUM; ja ->fff_fallback
+ | fld qword [RA+16]
+ | fistp ARG4
+ |1:
+ | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
+ | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
+ | mov STR:RB, [RA]
+ | mov ARG2, STR:RB
+ | mov RB, STR:RB->len
+ | fld qword [RA+8]
+ | fistp ARG3
+ | mov RC, ARG4
+ | cmp RB, RC // len < end? (unsigned compare)
+ | jb >5
+ |2:
+ | mov RA, ARG3
+ | test RA, RA // start <= 0?
+ | jle >7
+ |3:
+ | mov STR:RB, ARG2
+ | sub RC, RA // start > end?
+ | jl ->fff_emptystr
+ | lea RB, [STR:RB+RA+#STR-1]
+ | add RC, 1
+ |4:
+ | mov ARG2, RB
+ | mov ARG3, RC
+ | jmp ->fff_newstr
+ |
+ |5: // Negative end or overflow.
+ | jl >6
+ | lea RC, [RC+RB+1] // end = end+(len+1)
+ | jmp <2
+ |6: // Overflow.
+ | mov RC, RB // end = len
+ | jmp <2
+ |
+ |7: // Negative start or underflow.
+ | je >8
+ | add RA, RB // start = start+(len+1)
+ | add RA, 1
+ | jg <3 // start > 0?
+ |8: // Underflow.
+ | mov RA, 1 // start = 1
+ | jmp <3
+ |
+ |->fff_emptystr: // Range underflow.
+ | xor RC, RC // Zero length. Any ptr in RB is ok.
+ | jmp <4
+ |
+ |.ffunc_2 string_rep // Only handle the 1-char case inline.
+ | ffgccheck
+ | mov ARG5, RA // Save RA.
+ | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
+ | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
+ | mov STR:RB, [RA]
+ | fld qword [RA+8]
+ | fistp ARG4
+ | mov RC, ARG4
+ | test RC, RC
+ | jle ->fff_emptystr // Count <= 0? (or non-int)
+ | cmp dword STR:RB->len, 1
+ | jb ->fff_emptystr // Zero length string?
+ | jne ->fff_fallback_2 // Fallback for > 1-char strings.
+ | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
+ | movzx RA, byte STR:RB[1]
+ | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
+ | mov ARG3, RC
+ | mov ARG2, RB
+ |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
+ | mov [RB], RAL
+ | add RB, 1
+ | sub RC, 1
+ | jnz <1
+ | jmp ->fff_newstr
+ |
+ |.ffunc_1 string_reverse
+ | ffgccheck
+ | mov ARG5, RA // Save RA.
+ | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
+ | mov STR:RB, [RA]
+ | mov RC, STR:RB->len
+ | test RC, RC
+ | jz ->fff_emptystr // Zero length string?
+ | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
+ | add RB, #STR
+ | mov ARG4, PC // Need another temp register.
+ | mov ARG3, RC
+ | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
+ | mov ARG2, PC
+ |1:
+ | movzx RA, byte [RB]
+ | add RB, 1
+ | sub RC, 1
+ | mov [PC+RC], RAL
+ | jnz <1
+ | mov PC, ARG4
+ | jmp ->fff_newstr
+ |
+ |.macro ffstring_case, name, lo, hi
+ | .ffunc_1 name
+ | ffgccheck
+ | mov ARG5, RA // Save RA.
+ | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
+ | mov STR:RB, [RA]
+ | mov RC, STR:RB->len
+ | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
+ | add RB, #STR
+ | mov ARG4, PC // Need another temp register.
+ | mov ARG3, RC
+ | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
+ | mov ARG2, PC
+ | jmp >3
+ |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
+ | movzx RA, byte [RB+RC]
+ | cmp RA, lo
+ | jb >2
+ | cmp RA, hi
+ | ja >2
+ | xor RA, 0x20
+ |2:
+ | mov [PC+RC], RAL
+ |3:
+ | sub RC, 1
+ | jns <1
+ | mov PC, ARG4
+ | jmp ->fff_newstr
+ |.endmacro
+ |
+ |ffstring_case string_lower, 0x41, 0x5a
+ |ffstring_case string_upper, 0x61, 0x7a
+ |
+ |//-- Table library ------------------------------------------------------
+ |
+ |.ffunc_1 table_getn
+ | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
+ | mov TAB:RB, [RA]
+ | mov ARG1, TAB:RB
+ | mov RB, RA // Save RA and BASE.
+ | mov ARG2, BASE
+ | call extern lj_tab_len // (GCtab *t)
+ | // Length of table returned in eax (RC).
+ | mov ARG1, RC
+ | mov RA, RB // Restore RA and BASE.
+ | mov BASE, ARG2
+ | fild ARG1
+ | jmp ->fff_resn
+ |
+ |//-- Bit library --------------------------------------------------------
+ |
+ |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
+ |
+ |.ffunc_n bit_tobit
+ | mov ARG5, TOBIT_BIAS
+ | fadd ARG5
+ | fstp FPARG1 // 64 bit FP store.
+ | fild ARG1 // 32 bit integer load (s2lfwd ok).
+ | jmp ->fff_resn
+ |
+ |.macro .ffunc_bit, name
+ | .ffunc_n name
+ | mov ARG5, TOBIT_BIAS
+ | fadd ARG5
+ | fstp FPARG1
+ | mov RB, ARG1
+ |.endmacro
+ |
+ |.macro .ffunc_bit_op, name, ins
+ | .ffunc_bit name
+ | mov NRESULTS, NARGS:RC // Save for fallback.
+ | lea RC, [RA+NARGS:RC*8-16]
+ |1:
+ | cmp RC, RA
+ | jbe ->fff_resbit
+ | cmp dword [RC+4], LJ_TISNUM; ja ->fff_fallback_bit_op
+ | fld qword [RC]
+ | fadd ARG5
+ | fstp FPARG1
+ | ins RB, ARG1
+ | sub RC, 8
+ | jmp <1
+ |.endmacro
+ |
+ |.ffunc_bit_op bit_band, and
+ |.ffunc_bit_op bit_bor, or
+ |.ffunc_bit_op bit_bxor, xor
+ |
+ |.ffunc_bit bit_bswap
+ | bswap RB
+ | jmp ->fff_resbit
+ |
+ |.ffunc_bit bit_bnot
+ | not RB
+ |->fff_resbit:
+ | mov ARG1, RB
+ | fild ARG1
+ | jmp ->fff_resn
+ |
+ |->fff_fallback_bit_op:
+ | mov NARGS:RC, NRESULTS // Restore for fallback
+ | jmp ->fff_fallback
+ |
+ |.macro .ffunc_bit_sh, name, ins
+ | .ffunc_nn name
+ | mov ARG5, TOBIT_BIAS
+ | fadd ARG5
+ | fstp FPARG3
+ | fadd ARG5
+ | fstp FPARG1
+ | mov RC, RA // Assumes RA is ecx.
+ | mov RA, ARG3
+ | mov RB, ARG1
+ | ins RB, cl
+ | mov RA, RC
+ | jmp ->fff_resbit
+ |.endmacro
+ |
+ |.ffunc_bit_sh bit_lshift, shl
+ |.ffunc_bit_sh bit_rshift, shr
+ |.ffunc_bit_sh bit_arshift, sar
+ |.ffunc_bit_sh bit_rol, rol
+ |.ffunc_bit_sh bit_ror, ror
+ |
+ |//-----------------------------------------------------------------------
+ |
+ |->fff_fallback_2:
+ | mov NARGS:RC, 1+2 // Other args are ignored, anyway.
+ | jmp ->fff_fallback
+ |->fff_fallback_1:
+ | mov NARGS:RC, 1+1 // Other args are ignored, anyway.
+ |->fff_fallback: // Call fast function fallback handler.
+ | // RA = new base, RC = nargs+1
+ | mov L:RB, SAVE_L
+ | sub BASE, RA
+ | mov [RA-4], PC
+ | mov SAVE_PC, PC // Redundant (but a defined value).
+ | mov ARG3, BASE // Save old BASE (relative).
+ | mov L:RB->base, RA
+ | lea RC, [RA+NARGS:RC*8-8]
+ | mov ARG1, L:RB
+ | lea BASE, [RC+8*LUA_MINSTACK] // Ensure enough space for handler.
+ | mov L:RB->top, RC
+ | mov CFUNC:RA, [RA-8]
+ | cmp BASE, L:RB->maxstack
+ | ja >5 // Need to grow stack.
+ | call aword CFUNC:RA->f // (lua_State *L)
+ | // Either throws an error or recovers and returns 0 or NRESULTS (+1).
+ | test RC, RC; jnz >3
+ |1: // Returned 0: retry fast path.
+ | mov RA, L:RB->base
+ | mov RC, L:RB->top
+ | sub RC, RA
+ | shr RC, 3
+ | add NARGS:RC, 1
+ | mov LFUNC:RB, [RA-8]
+ | mov BASE, ARG3 // Restore old BASE.
+ | add BASE, RA
+ | cmp [RA-4], PC; jne >2 // Callable modified by handler?
+ | jmp aword LFUNC:RB->gate // Retry the call.
+ |
+ |2: // Run modified callable.
+ | cmp dword [RA-4], LJ_TFUNC
+ | jne ->vmeta_call
+ | jmp aword LFUNC:RB->gate // Retry the call.
+ |
+ |3: // Returned NRESULTS (already in RC/RD).
+ | mov RA, L:RB->base
+ | mov BASE, ARG3 // Restore old BASE.
+ | add BASE, RA
+ | jmp ->fff_res
+ |
+ |5: // Grow stack for fallback handler.
+ | mov ARG2, LUA_MINSTACK
+ | call extern lj_state_growstack // (lua_State *L, int n)
+ | jmp <1 // Dumb retry (goes through ff first).
+ |
+ |->fff_gcstep: // Call GC step function.
+ | // RA = new base, RC = nargs+1
+ | pop RB // Must keep stack at same level.
+ | mov ARG3, RB // Save return address
+ | mov L:RB, SAVE_L
+ | sub BASE, RA
+ | mov ARG2, BASE // Save old BASE (relative).
+ | mov [RA-4], PC
+ | mov SAVE_PC, PC // Redundant (but a defined value).
+ | mov L:RB->base, RA
+ | lea RC, [RA+NARGS:RC*8-8]
+ | mov ARG1, L:RB
+ | mov L:RB->top, RC
+ | call extern lj_gc_step // (lua_State *L)
+ | mov RA, L:RB->base
+ | mov RC, L:RB->top
+ | sub RC, RA
+ | shr RC, 3
+ | add NARGS:RC, 1
+ | mov PC, [RA-4]
+ | mov BASE, ARG2 // Restore old BASE.
+ | add BASE, RA
+ | mov RB, ARG3
+ | push RB // Restore return address.
+ | mov LFUNC:RB, [RA-8]
+ | ret
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Special dispatch targets -------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_record: // Dispatch target for recording phase.
+#if LJ_HASJIT
+ | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
+ | test RDL, HOOK_VMEVENT // No recording while in vmevent.
+ | jnz >5
+ | // Decrement the hookcount for consistency, but always do the call.
+ | test RDL, HOOK_ACTIVE
+ | jnz >1
+ | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
+ | jz >1
+ | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
+ | jmp >1
+#endif
+ |
+ |->vm_hook: // Dispatch target with enabled hooks.
+ | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
+ | test RDL, HOOK_ACTIVE // Hook already active?
+ | jnz >5
+ |
+ | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
+ | jz >5
+ | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
+ | jz >1
+ | test RDL, LUA_MASKLINE
+ | jz >5
+ |1:
+ | mov L:RB, SAVE_L
+ | mov RD, NRESULTS // Dynamic top for *M instructions.
+ | mov ARG3, RD
+ | mov L:RB->base, BASE
+ | mov ARG2, PC
+ | mov ARG1, L:RB
+ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
+ | call extern lj_dispatch_ins // (lua_State *L, BCIns *pc, int nres)
+ |4:
+ | mov BASE, L:RB->base
+ | movzx RA, PC_RA
+ |5:
+ | movzx OP, PC_OP
+ | movzx RD, PC_RD
+ | jmp aword [DISPATCH+OP*4+GG_DISP_STATIC*4] // Re-dispatch to static ins.
+ |
+ |->vm_hotloop: // Hot loop counter underflow.
+#if LJ_HASJIT
+ | mov L:RB, SAVE_L
+ | lea RA, [DISPATCH+GG_DISP2J]
+ | mov ARG2, PC
+ | mov ARG1, RA
+ | mov [DISPATCH+DISPATCH_J(L)], L:RB
+ | mov SAVE_PC, PC
+ | mov L:RB->base, BASE
+ | call extern lj_trace_hot // (jit_State *J, const BCIns *pc)
+ | jmp <4
+#endif
+ |
+ |->vm_hotcall: // Hot call counter underflow.
+#if LJ_HASJIT
+ | mov L:RB, SAVE_L
+ | lea RA, [DISPATCH+GG_DISP2J]
+ | mov ARG2, PC
+ | mov ARG1, RA
+ | mov [DISPATCH+DISPATCH_J(L)], L:RB
+ | mov SAVE_PC, PC
+ | mov L:RB->base, BASE
+ | call extern lj_trace_hot // (jit_State *J, const BCIns *pc)
+ | mov BASE, L:RB->base
+ | // Dispatch the first instruction and optionally record it.
+ | ins_next
+#endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Trace exit handler -------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// Called from an exit stub with the exit number on the stack.
+ |// The 16 bit exit number is stored with two (sign-extended) push imm8.
+ |->vm_exit_handler:
+#if LJ_HASJIT
+ | push ebp; lea ebp, [esp+12]; push ebp
+ | push ebx; push edx; push ecx; push eax
+ | movzx RC, byte [ebp-4] // Reconstruct exit number.
+ | mov RCH, byte [ebp-8]
+ | mov [ebp-4], edi; mov [ebp-8], esi
+ | // Caveat: DISPATCH is ebx.
+ | mov DISPATCH, [ebp]
+ | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
+ | set_vmstate EXIT
+ | mov [DISPATCH+DISPATCH_J(exitno)], RC
+ | mov [DISPATCH+DISPATCH_J(parent)], RA
+ | sub esp, 8*8+16 // Room for SSE regs + args.
+ |
+ | // Must not access SSE regs if SSE2 is not present.
+ | test dword [DISPATCH+DISPATCH_J(flags)], JIT_F_SSE2
+ | jz >1
+ | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6
+ | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4
+ | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2
+ | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
+ |1:
+ | // Caveat: RB is ebp.
+ | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)]
+ | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
+ | mov [DISPATCH+DISPATCH_J(L)], L:RB
+ | lea RC, [esp+16]
+ | mov L:RB->base, BASE
+ | lea RA, [DISPATCH+GG_DISP2J]
+ | mov ARG2, RC
+ | mov ARG1, RA
+ | call extern lj_trace_exit // (jit_State *J, ExitState *ex)
+ | // Interpreter C frame returned in eax.
+ | mov esp, eax // Reposition stack to C frame.
+ | mov BASE, L:RB->base
+ | mov PC, SAVE_PC
+ | mov SAVE_L, L:RB // Needed for on-trace resume/yield.
+#endif
+ |->vm_exit_interp:
+#if LJ_HASJIT
+ | mov LFUNC:KBASE, [BASE-8]
+ | mov PROTO:KBASE, LFUNC:KBASE->pt
+ | mov KBASE, PROTO:KBASE->k
+ | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
+ | set_vmstate INTERP
+ | ins_next
+#endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Math helper functions ----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// FP value rounding. Called by math.floor/math.ceil fast functions
+ |// and from JIT code. Arg/ret on x87 stack. No int/xmm registers modified.
+ |.macro vm_round, mode1, mode2
+ | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2.
+ | mov [esp+8], eax
+ | mov ax, mode1
+ | or ax, [esp+4]
+ |.if mode2 ~= 0xffff
+ | and ax, mode2
+ |.endif
+ | mov [esp+6], ax
+ | fldcw word [esp+6]
+ | frndint
+ | fldcw word [esp+4]
+ | mov eax, [esp+8]
+ | ret
+ |.endmacro
+ |
+ |->vm_floor:
+ | vm_round 0x0400, 0xf7ff
+ |
+ |->vm_ceil:
+ | vm_round 0x0800, 0xfbff
+ |
+ |->vm_trunc:
+ | vm_round 0x0c00, 0xffff
+ |
+ |// FP modulo x%y. Called by BC_MOD* and vm_arith.
+ |// Args/ret on x87 stack (y on top). No xmm registers modified.
+ |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
+ |->vm_mod:
+ | fld st1
+ | fdiv st1
+ | fnstcw word [esp+4]
+ | mov ax, 0x0400
+ | or ax, [esp+4]
+ | and ax, 0xf7ff
+ | mov [esp+6], ax
+ | fldcw word [esp+6]
+ | frndint
+ | fldcw word [esp+4]
+ | fmulp st1
+ | fsubp st1
+ | ret
+ |
+ |// FP exponentiation e^x and 2^x. Called by math.exp fast function and
+ |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified.
+ |// Caveat: needs 3 slots on x87 stack!
+ |->vm_exp:
+ | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e))
+ |->vm_exp2:
+ | fst dword [esp+4] // Caveat: overwrites ARG1.
+ | cmp dword [esp+4], 0x7f800000; je >1 // Special case: e^+Inf = +Inf
+ | cmp dword [esp+4], 0xff800000; je >2 // Special case: e^-Inf = 0
+ |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check.
+ | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
+ | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
+ |1:
+ | ret
+ |2:
+ | fpop; fldz; ret
+ |
+ |// Generic power function x^y. Called by BC_POW, math.pow fast function
+ |// and vm_arith. Args/ret on x87 stack (y on top). No int/xmm regs modified.
+ |// Caveat: needs 3 slots on x87 stack!
+ |->vm_pow:
+ | fist dword [esp+4] // Store/reload int before comparison.
+ | fild dword [esp+4] // Integral exponent used in vm_powi.
+ ||if (cmov) {
+ | fucomip st1
+ ||} else {
+ | push eax; fucomp st1; fnstsw ax; sahf; pop eax
+ ||}
+ | jnz >8 // Branch for FP exponents.
+ | jp >9 // Branch for NaN exponent.
+ | fpop // Pop y and fallthrough to vm_powi.
+ |
+ |// FP/int power function x^i. Called from JIT code. Arg1/ret on x87 stack.
+ |// Arg2 (int) on C stack. No int/xmm regs modified.
+ |// Caveat: needs 2 slots on x87 stack!
+ |->vm_powi:
+ | push eax
+ | mov eax, [esp+8]
+ | cmp eax, 1; jle >6 // i<=1?
+ | // Now 1 < (unsigned)i <= 0x80000000.
+ |1: // Handle leading zeros.
+ | test eax, 1; jnz >2
+ | fmul st0
+ | shr eax, 1
+ | jmp <1
+ |2:
+ | shr eax, 1; jz >5
+ | fdup
+ |3: // Handle trailing bits.
+ | fmul st0
+ | shr eax, 1; jz >4
+ | jnc <3
+ | fmul st1, st0
+ | jmp <3
+ |4:
+ | fmulp st1
+ |5:
+ | pop eax
+ | ret
+ |6:
+ | je <5 // x^1 ==> x
+ | jb >7
+ | fld1; fdivrp st1
+ | neg eax
+ | cmp eax, 1; je <5 // x^-1 ==> 1/x
+ | jmp <1 // x^-i ==> (1/x)^i
+ |7:
+ | fpop; fld1 // x^0 ==> 1
+ | pop eax
+ | ret
+ |
+ |8: // FP/FP power function x^y.
+ | push eax
+ | fst dword [esp+8]
+ | fxch
+ | fst dword [esp+12]
+ | mov eax, [esp+8]; shl eax, 1
+ | cmp eax, 0xff000000; je >2 // x^+-Inf?
+ | mov eax, [esp+12]; shl eax, 1; je >4 // +-0^y?
+ | cmp eax, 0xff000000; je >4 // +-Inf^y?
+ | pop eax
+ | fyl2x
+ | jmp ->vm_exp2raw
+ |
+ |9: // Handle x^NaN.
+ | fld1
+ ||if (cmov) {
+ | fucomip st2
+ ||} else {
+ | push eax; fucomp st2; fnstsw ax; sahf; pop eax
+ ||}
+ | je >1 // 1^NaN ==> 1
+ | fxch // x^NaN ==> NaN
+ |1:
+ | fpop
+ | ret
+ |
+ |2: // Handle x^+-Inf.
+ | fabs
+ | fld1
+ ||if (cmov) {
+ | fucomip st1
+ ||} else {
+ | fucomp st1; fnstsw ax; sahf
+ ||}
+ | je >3 // +-1^+-Inf ==> 1
+ | fpop; fabs; fldz; mov eax, 0; setc al
+ | ror eax, 1; xor eax, [esp+8]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
+ | fxch
+ |3:
+ | fpop1; fabs; pop eax
+ | ret
+ |
+ |4: // Handle +-0^y or +-Inf^y.
+ | cmp dword [esp+8], 0; jge <3 // y >= 0, x^y ==> |x|
+ | fpop; fpop
+ | test eax, eax; pop eax; jz >5 // y < 0, +-0^y ==> +Inf
+ | fldz // y < 0, +-Inf^y ==> 0
+ | ret
+ |5:
+ | mov dword [esp+8], 0x7f800000 // Return +Inf.
+ | fld dword [esp+8]
+ | ret
+ |
+ |// Callable from C: double lj_vm_foldfpm(double x, int fpm)
+ |// Computes fpm(x) for extended math functions. ORDER FPM.
+ |->vm_foldfpm:
+ | mov eax, [esp+12]
+ | fld qword [esp+4]
+ | cmp eax, 1; jb ->vm_floor; je ->vm_ceil
+ | cmp eax, 3; jb ->vm_trunc; ja >1
+ | fsqrt; ret
+ |1: ; cmp eax, 5; jb ->vm_exp; je ->vm_exp2
+ | cmp eax, 7; je >1; ja >2
+ | fldln2; fxch; fyl2x; ret
+ |1: ; fld1; fxch; fyl2x; ret
+ |2: ; cmp eax, 9; je >1; ja >2
+ | fldlg2; fxch; fyl2x; ret
+ |1: ; fsin; ret
+ |2: ; cmp eax, 11; je >1; ja >9
+ | fcos; ret
+ |1: ; fptan; fpop; ret
+ |9: ; int3 // Bad fpm.
+ |
+ |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
+ |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
+ |// and basic math functions. ORDER ARITH
+ |->vm_foldarith:
+ | mov eax, [esp+20]
+ | fld qword [esp+4]
+ | fld qword [esp+12]
+ | cmp eax, 1; je >1; ja >2
+ | faddp st1; ret
+ |1: ; fsubp st1; ret
+ |2: ; cmp eax, 3; je >1; ja >2
+ | fmulp st1; ret
+ |1: ; fdivp st1; ret
+ |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
+ | cmp eax, 7; je >1; ja >2
+ | fpop; fchs; ret
+ |1: ; fpop; fabs; ret
+ |2: ; cmp eax, 9; je >1; ja >2
+ | fpatan; ret
+ |1: ; fxch; fscale; fpop1; ret
+ |2: ; cmp eax, 11; je >1; ja >9
+ ||if (cmov) {
+ | fucomi st1; fcmovnbe st1; fpop1; ret
+ |1: ; fucomi st1; fcmovbe st1; fpop1; ret
+ ||} else {
+ | fucom st1; fnstsw ax; test ah, 1; jz >2; fxch; 2: ; fpop; ret
+ |1: ; fucom st1; fnstsw ax; test ah, 1; jnz >2; fxch; 2: ; fpop; ret
+ ||}
+ |9: ; int3 // Bad op.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Miscellaneous functions --------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
+ |->vm_cpuid:
+ | pushfd
+ | pop edx
+ | mov ecx, edx
+ | xor edx, 0x00200000 // Toggle ID bit in flags.
+ | push edx
+ | popfd
+ | pushfd
+ | pop edx
+ | xor eax, eax // Zero means no features supported.
+ | cmp ecx, edx
+ | jz >1 // No ID toggle means no CPUID support.
+ | mov eax, [esp+4] // Argument 1 is function number.
+ | push edi
+ | push ebx
+ | cpuid
+ | mov edi, [esp+16] // Argument 2 is result area.
+ | mov [edi], eax
+ | mov [edi+4], ebx
+ | mov [edi+8], ecx
+ | mov [edi+12], edx
+ | pop ebx
+ | pop edi
+ |1:
+ | ret
+ |
+ |//-----------------------------------------------------------------------
+}
+
+/* Generate the code for a single instruction. */
+static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
+{
+ int vk = 0;
+ |// Note: aligning all instructions does not pay off.
+ |=>defop:
+
+ switch (op) {
+
+ /* -- Comparison ops ---------------------------------------------------- */
+
+ /* Remember: all ops branch for a true comparison, fall through otherwise. */
+
+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+ | // RA = src1, RD = src2, JMP with RD = target
+ | ins_AD
+ | checknum RA, ->vmeta_comp
+ | checknum RD, ->vmeta_comp
+ | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
+ | fld qword [BASE+RD*8]
+ | add PC, 4
+ | fcomparepp // eax (RD) modified!
+ | // Unordered: all of ZF CF PF set, ordered: PF clear.
+ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
+ switch (op) {
+ case BC_ISLT:
+ | jbe >2
+ break;
+ case BC_ISGE:
+ | ja >2
+ break;
+ case BC_ISLE:
+ | jb >2
+ break;
+ case BC_ISGT:
+ | jae >2
+ break;
+ default: break; /* Shut up GCC. */
+ }
+ |1:
+ | movzx RD, PC_RD
+ | branchPC RD
+ |2:
+ | ins_next
+ break;
+
+ case BC_ISEQV: case BC_ISNEV:
+ vk = op == BC_ISEQV;
+ | ins_AD // RA = src1, RD = src2, JMP with RD = target
+ | mov RB, [BASE+RD*8+4]
+ | add PC, 4
+ | cmp RB, LJ_TISNUM; ja >5
+ | checknum RA, >5
+ | fld qword [BASE+RA*8]
+ | fld qword [BASE+RD*8]
+ | fcomparepp // eax (RD) modified!
+ iseqne_fp:
+ if (vk) {
+ | jp >2 // Unordered means not equal.
+ | jne >2
+ } else {
+ | jp >2 // Unordered means not equal.
+ | je >1
+ }
+ iseqne_end:
+ if (vk) {
+ |1: // EQ: Branch to the target.
+ | movzx RD, PC_RD
+ | branchPC RD
+ |2: // NE: Fallthrough to next instruction.
+ } else {
+ |2: // NE: Branch to the target.
+ | movzx RD, PC_RD
+ | branchPC RD
+ |1: // EQ: Fallthrough to next instruction.
+ }
+ | ins_next
+ |
+ if (op == BC_ISEQV || op == BC_ISNEV) {
+ |5: // Either or both types are not numbers.
+ | checktp RA, RB // Compare types.
+ | jne <2 // Not the same type?
+ | cmp RB, LJ_TISPRI
+ | jae <1 // Same type and primitive type?
+ |
+ | // Same types and not a primitive type. Compare GCobj or pvalue.
+ | mov RA, [BASE+RA*8]
+ | mov RD, [BASE+RD*8]
+ | cmp RA, RD
+ | je <1 // Same GCobjs or pvalues?
+ | cmp RB, LJ_TISTABUD
+ | ja <2 // Different objects and not table/ud?
+ |
+ | // Different tables or userdatas. Need to check __eq metamethod.
+ | // Field metatable must be at same offset for GCtab and GCudata!
+ | mov TAB:RB, TAB:RA->metatable
+ | test TAB:RB, TAB:RB
+ | jz <2 // No metatable?
+ | test byte TAB:RB->nomm, 1<<MM_eq
+ | jnz <2 // Or 'no __eq' flag set?
+ if (vk) {
+ | xor RB, RB // ne = 0
+ } else {
+ | mov RB, 1 // ne = 1
+ }
+ | jmp ->vmeta_equal // Handle __eq metamethod.
+ }
+ break;
+ case BC_ISEQS: case BC_ISNES:
+ vk = op == BC_ISEQS;
+ | ins_AND // RA = src, RD = str const, JMP with RD = target
+ | add PC, 4
+ | checkstr RA, >2
+ | mov RA, [BASE+RA*8]
+ | cmp RA, [KBASE+RD*4]
+ iseqne_test:
+ if (vk) {
+ | jne >2
+ } else {
+ | je >1
+ }
+ goto iseqne_end;
+ case BC_ISEQN: case BC_ISNEN:
+ vk = op == BC_ISEQN;
+ | ins_AD // RA = src, RD = num const, JMP with RD = target
+ | add PC, 4
+ | checknum RA, >2
+ | fld qword [BASE+RA*8]
+ | fld qword [KBASE+RD*8]
+ | fcomparepp // eax (RD) modified!
+ goto iseqne_fp;
+ case BC_ISEQP: case BC_ISNEP:
+ vk = op == BC_ISEQP;
+ | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
+ | add PC, 4
+ | checktp RA, RD
+ goto iseqne_test;
+
+ /* -- Unary test and copy ops ------------------------------------------- */
+
+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
+ | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
+ | mov RB, [BASE+RD*8+4]
+ | add PC, 4
+ | cmp RB, LJ_TISTRUECOND
+ if (op == BC_IST || op == BC_ISTC) {
+ | jae >1
+ } else {
+ | jb >1
+ }
+ if (op == BC_ISTC || op == BC_ISFC) {
+ | mov [BASE+RA*8+4], RB
+ | mov RB, [BASE+RD*8]
+ | mov [BASE+RA*8], RB
+ }
+ | movzx RD, PC_RD
+ | branchPC RD
+ |1: // Fallthrough to the next instruction.
+ | ins_next
+ break;
+
+ /* -- Unary ops --------------------------------------------------------- */
+
+ case BC_MOV:
+ | ins_AD // RA = dst, RD = src
+ | mov RB, [BASE+RD*8+4]
+ | mov RD, [BASE+RD*8] // Overwrites RD.
+ | mov [BASE+RA*8+4], RB
+ | mov [BASE+RA*8], RD
+ | ins_next_
+ break;
+ case BC_NOT:
+ | ins_AD // RA = dst, RD = src
+ | xor RB, RB
+ | checktp RD, LJ_TISTRUECOND
+ | adc RB, LJ_TTRUE
+ | mov [BASE+RA*8+4], RB
+ | ins_next
+ break;
+ case BC_UNM:
+ | ins_AD // RA = dst, RD = src
+ | checknum RD, ->vmeta_unm
+ | fld qword [BASE+RD*8]
+ | fchs
+ | fstp qword [BASE+RA*8]
+ | ins_next
+ break;
+ case BC_LEN:
+ | ins_AD // RA = dst, RD = src
+ | checkstr RD, >2
+ | mov STR:RD, [BASE+RD*8]
+ | fild dword STR:RD->len
+ |1:
+ | fstp qword [BASE+RA*8]
+ | ins_next
+ |2:
+ | checktab RD, ->vmeta_len
+ | mov TAB:RD, [BASE+RD*8]
+ | mov ARG1, TAB:RD
+ | mov RB, BASE // Save BASE.
+ | call extern lj_tab_len // (GCtab *t)
+ | // Length of table returned in eax (RC).
+ | mov ARG1, RC
+ | mov BASE, RB // Restore BASE.
+ | fild ARG1
+ | movzx RA, PC_RA
+ | jmp <1
+ break;
+
+ /* -- Binary ops -------------------------------------------------------- */
+
+ |.macro ins_arithpre, ins
+ | ins_ABC
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+ ||switch (vk) {
+ ||case 0:
+ | checknum RB, ->vmeta_arith_vn
+ | fld qword [BASE+RB*8]
+ | ins qword [KBASE+RC*8]
+ || break;
+ ||case 1:
+ | checknum RB, ->vmeta_arith_nv
+ | fld qword [KBASE+RC*8]
+ | ins qword [BASE+RB*8]
+ || break;
+ ||default:
+ | checknum RB, ->vmeta_arith_vv
+ | checknum RC, ->vmeta_arith_vv
+ | fld qword [BASE+RB*8]
+ | ins qword [BASE+RC*8]
+ || break;
+ ||}
+ |.endmacro
+ |
+ |.macro ins_arith, ins
+ | ins_arithpre ins
+ | fstp qword [BASE+RA*8]
+ | ins_next
+ |.endmacro
+
+ | // RA = dst, RB = src1 or num const, RC = src2 or num const
+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
+ | ins_arith fadd
+ break;
+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
+ | ins_arith fsub
+ break;
+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
+ | ins_arith fmul
+ break;
+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
+ | ins_arith fdiv
+ break;
+ case BC_MODVN:
+ | ins_arithpre fld
+ |->BC_MODVN_Z:
+ | call ->vm_mod
+ | fstp qword [BASE+RA*8]
+ | ins_next
+ break;
+ case BC_MODNV: case BC_MODVV:
+ | ins_arithpre fld
+ | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
+ break;
+ case BC_POW:
+ | ins_arithpre fld
+ | call ->vm_pow
+ | fstp qword [BASE+RA*8]
+ | ins_next
+ break;
+
+ case BC_CAT:
+ | ins_ABC // RA = dst, RB = src_start, RC = src_end
+ | lea RA, [BASE+RC*8]
+ | sub RC, RB
+ | mov ARG2, RA
+ | mov ARG3, RC
+ |->BC_CAT_Z:
+ | mov L:RB, SAVE_L
+ | mov ARG1, L:RB
+ | mov SAVE_PC, PC
+ | mov L:RB->base, BASE
+ | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
+ | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
+ | mov BASE, L:RB->base
+ | test RC, RC
+ | jnz ->vmeta_binop
+ | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB].
+ | movzx RA, PC_RA
+ | mov RC, [BASE+RB*8+4]
+ | mov RB, [BASE+RB*8]
+ | mov [BASE+RA*8+4], RC
+ | mov [BASE+RA*8], RB
+ | ins_next
+ break;
+
+ /* -- Constant ops ------------------------------------------------------ */
+
+ case BC_KSTR:
+ | ins_AND // RA = dst, RD = str const (~)
+ | mov RD, [KBASE+RD*4]
+ | mov dword [BASE+RA*8+4], LJ_TSTR
+ | mov [BASE+RA*8], RD
+ | ins_next
+ break;
+ case BC_KSHORT:
+ | ins_AD // RA = dst, RD = signed int16 literal
+ | fild PC_RD // Refetch signed RD from instruction.
+ | fstp qword [BASE+RA*8]
+ | ins_next
+ break;
+ case BC_KNUM:
+ | ins_AD // RA = dst, RD = num const
+ | fld qword [KBASE+RD*8]
+ | fstp qword [BASE+RA*8]
+ | ins_next
+ break;
+ case BC_KPRI:
+ | ins_AND // RA = dst, RD = primitive type (~)
+ | mov [BASE+RA*8+4], RD
+ | ins_next
+ break;
+ case BC_KNIL:
+ | ins_AD // RA = dst_start, RD = dst_end
+ | lea RA, [BASE+RA*8+12]
+ | lea RD, [BASE+RD*8+4]
+ | mov RB, LJ_TNIL
+ | mov [RA-8], RB // Sets minimum 2 slots.
+ |1:
+ | mov [RA], RB
+ | add RA, 8
+ | cmp RA, RD
+ | jbe <1
+ | ins_next
+ break;
+
+ /* -- Upvalue and function ops ------------------------------------------ */
+
+ case BC_UGET:
+ | ins_AD // RA = dst, RD = upvalue #
+ | mov LFUNC:RB, [BASE-8]
+ | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)]
+ | mov RB, UPVAL:RB->v
+ | mov RD, [RB+4]
+ | mov RB, [RB]
+ | mov [BASE+RA*8+4], RD
+ | mov [BASE+RA*8], RB
+ | ins_next
+ break;
+ case BC_USETV:
+ | ins_AD // RA = upvalue #, RD = src
+ | // Really ugly code due to the lack of a 4th free register.
+ | mov LFUNC:RB, [BASE-8]
+ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
+ | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
+ | jnz >4
+ |1:
+ | mov RA, [BASE+RD*8]
+ |2:
+ | mov RB, UPVAL:RB->v
+ | mov RD, [BASE+RD*8+4]
+ | mov [RB], RA
+ | mov [RB+4], RD
+ |3:
+ | ins_next
+ |
+ |4: // Upvalue is black. Check if new value is collectable and white.
+ | mov RA, [BASE+RD*8+4]
+ | sub RA, LJ_TISGCV
+ | cmp RA, LJ_TISNUM - LJ_TISGCV // tvisgcv(v)
+ | jbe <1
+ | mov GCOBJ:RA, [BASE+RD*8]
+ | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
+ | jz <2
+ | // Crossed a write barrier. So move the barrier forward.
+ | mov ARG2, UPVAL:RB
+ | mov ARG3, GCOBJ:RA
+ | mov RB, UPVAL:RB->v
+ | mov RD, [BASE+RD*8+4]
+ | mov [RB], GCOBJ:RA
+ | mov [RB+4], RD
+ |->BC_USETV_Z:
+ | mov L:RB, SAVE_L
+ | lea GL:RA, [DISPATCH+GG_DISP2G]
+ | mov L:RB->base, BASE
+ | mov ARG1, GL:RA
+ | call extern lj_gc_barrieruv // (global_State *g, GCobj *o, GCobj *v)
+ | mov BASE, L:RB->base
+ | jmp <3
+ break;
+ case BC_USETS:
+ | ins_AND // RA = upvalue #, RD = str const (~)
+ | mov LFUNC:RB, [BASE-8]
+ | mov GCOBJ:RD, [KBASE+RD*4]
+ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
+ | mov RA, UPVAL:RB->v
+ | mov dword [RA+4], LJ_TSTR
+ | mov [RA], GCOBJ:RD
+ | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
+ | jnz >2
+ |1:
+ | ins_next
+ |
+ |2: // Upvalue is black. Check if string is white.
+ | test byte GCOBJ:RD->gch.marked, LJ_GC_WHITES // iswhite(str)
+ | jz <1
+ | // Crossed a write barrier. So move the barrier forward.
+ | mov ARG3, GCOBJ:RD
+ | mov ARG2, UPVAL:RB
+ | jmp ->BC_USETV_Z
+ break;
+ case BC_USETN:
+ | ins_AD // RA = upvalue #, RD = num const
+ | mov LFUNC:RB, [BASE-8]
+ | fld qword [KBASE+RD*8]
+ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
+ | mov RA, UPVAL:RB->v
+ | fstp qword [RA]
+ | ins_next
+ break;
+ case BC_USETP:
+ | ins_AND // RA = upvalue #, RD = primitive type (~)
+ | mov LFUNC:RB, [BASE-8]
+ | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
+ | mov RA, UPVAL:RB->v
+ | mov [RA+4], RD
+ | ins_next
+ break;
+ case BC_UCLO:
+ | ins_AD // RA = level, RD = target
+ | branchPC RD // Do this first to free RD.
+ | mov L:RB, SAVE_L
+ | cmp dword L:RB->openupval, 0
+ | je >1
+ | lea RA, [BASE+RA*8]
+ | mov ARG2, RA
+ | mov ARG1, L:RB
+ | mov L:RB->base, BASE
+ | call extern lj_func_closeuv // (lua_State *L, StkId level)
+ | mov BASE, L:RB->base
+ |1:
+ | ins_next
+ break;
+
+ case BC_FNEW:
+ | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
+ | mov LFUNC:RA, [BASE-8]
+ | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *.
+ | mov L:RB, SAVE_L
+ | mov ARG3, LFUNC:RA
+ | mov ARG2, PROTO:RD
+ | mov SAVE_PC, PC
+ | mov ARG1, L:RB
+ | mov L:RB->base, BASE
+ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
+ | call extern lj_func_newL_gc
+ | // GCfuncL * returned in eax (RC).
+ | mov BASE, L:RB->base
+ | movzx RA, PC_RA
+ | mov [BASE+RA*8], LFUNC:RC
+ | mov dword [BASE+RA*8+4], LJ_TFUNC
+ | ins_next
+ break;
+
+ /* -- Table ops --------------------------------------------------------- */
+
+ case BC_TNEW:
+ | ins_AD // RA = dst, RD = hbits|asize
+ | mov RB, RD
+ | and RD, 0x7ff
+ | shr RB, 11
+ | cmp RD, 0x7ff // Turn 0x7ff into 0x801.
+ | sete RAL
+ | mov ARG3, RB
+ | add RD, RA
+ | mov L:RB, SAVE_L
+ | add RD, RA
+ | mov ARG2, RD
+ | mov SAVE_PC, PC
+ | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
+ | mov ARG1, L:RB
+ | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
+ | mov L:RB->base, BASE
+ | jae >2
+ |1:
+ | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
+ | // Table * returned in eax (RC).
+ | mov BASE, L:RB->base
+ | movzx RA, PC_RA
+ | mov [BASE+RA*8], TAB:RC
+ | mov dword [BASE+RA*8+4], LJ_TTAB
+ | ins_next
+ |2:
+ | call extern lj_gc_step_fixtop // (lua_State *L)
+ | mov ARG1, L:RB // Args owned by callee. Set it again.
+ | jmp <1
+ break;
+ case BC_TDUP:
+ | ins_AND // RA = dst, RD = table const (~) (holding template table)
+ | mov TAB:RD, [KBASE+RD*4]
+ | mov L:RB, SAVE_L
+ | mov ARG2, TAB:RD
+ | mov ARG1, L:RB
+ | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
+ | mov SAVE_PC, PC
+ | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
+ | mov L:RB->base, BASE
+ | jae >3
+ |2:
+ | call extern lj_tab_dup // (lua_State *L, Table *kt)
+ | // Table * returned in eax (RC).
+ | mov BASE, L:RB->base
+ | movzx RA, PC_RA
+ | mov [BASE+RA*8], TAB:RC
+ | mov dword [BASE+RA*8+4], LJ_TTAB
+ | ins_next
+ |3:
+ | call extern lj_gc_step_fixtop // (lua_State *L)
+ | mov ARG1, L:RB // Args owned by callee. Set it again.
+ | jmp <2
+ break;
+
+ case BC_GGET:
+ | ins_AND // RA = dst, RD = str const (~)
+ | mov LFUNC:RB, [BASE-8]
+ | mov TAB:RB, LFUNC:RB->env
+ | mov STR:RC, [KBASE+RD*4]
+ | jmp ->BC_TGETS_Z
+ break;
+ case BC_GSET:
+ | ins_AND // RA = src, RD = str const (~)
+ | mov LFUNC:RB, [BASE-8]
+ | mov TAB:RB, LFUNC:RB->env
+ | mov STR:RC, [KBASE+RD*4]
+ | jmp ->BC_TSETS_Z
+ break;
+
+ case BC_TGETV:
+ | ins_ABC // RA = dst, RB = table, RC = key
+ | checktab RB, ->vmeta_tgetv
+ | mov TAB:RB, [BASE+RB*8]
+ |
+ | // Integer key? Convert number to int and back and compare.
+ | checknum RC, >5
+ | fld qword [BASE+RC*8]
+ | fist ARG1
+ | fild ARG1
+ | fcomparepp // eax (RC) modified!
+ | mov RC, ARG1
+ | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
+ | cmp RC, TAB:RB->asize // Takes care of unordered, too.
+ | jae ->vmeta_tgetv // Not in array part? Use fallback.
+ | shl RC, 3
+ | add RC, TAB:RB->array
+ | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
+ | je >2
+ |1:
+ | mov RB, [RC] // Get array slot.
+ | mov RC, [RC+4]
+ | mov [BASE+RA*8], RB
+ | mov [BASE+RA*8+4], RC
+ | ins_next
+ |
+ |2: // Check for __index if table value is nil.
+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
+ | jz <1
+ | mov TAB:RA, TAB:RB->metatable
+ | test byte TAB:RA->nomm, 1<<MM_index
+ | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
+ | movzx RA, PC_RA // Restore RA.
+ | jmp <1
+ |
+ |5: // String key?
+ | checkstr RC, ->vmeta_tgetv
+ | mov STR:RC, [BASE+RC*8]
+ | jmp ->BC_TGETS_Z
+ break;
+ case BC_TGETS:
+ | ins_ABC // RA = dst, RB = table, RC = str const (~)
+ | not RC
+ | mov STR:RC, [KBASE+RC*4]
+ | checktab RB, ->vmeta_tgets
+ | mov TAB:RB, [BASE+RB*8]
+ |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
+ | mov RA, TAB:RB->hmask
+ | and RA, STR:RC->hash
+ | imul RA, #NODE
+ | add NODE:RA, TAB:RB->node
+ |1:
+ | cmp dword NODE:RA->key.it, LJ_TSTR
+ | jne >4
+ | cmp dword NODE:RA->key.gcr, STR:RC
+ | jne >4
+ | // Ok, key found. Assumes: offsetof(Node, val) == 0
+ | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath.
+ | je >5 // Key found, but nil value?
+ | movzx RC, PC_RA
+ | mov RB, [RA] // Get node value.
+ | mov RA, [RA+4]
+ | mov [BASE+RC*8], RB
+ |2:
+ | mov [BASE+RC*8+4], RA
+ | ins_next
+ |
+ |3:
+ | movzx RC, PC_RA
+ | mov RA, LJ_TNIL
+ | jmp <2
+ |
+ |4: // Follow hash chain.
+ | mov NODE:RA, NODE:RA->next
+ | test NODE:RA, NODE:RA
+ | jnz <1
+ | // End of hash chain: key not found, nil result.
+ |
+ |5: // Check for __index if table value is nil.
+ | mov TAB:RA, TAB:RB->metatable
+ | test TAB:RA, TAB:RA
+ | jz <3 // No metatable: done.
+ | test byte TAB:RA->nomm, 1<<MM_index
+ | jnz <3 // 'no __index' flag set: done.
+ | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
+ break;
+ case BC_TGETB:
+ | ins_ABC // RA = dst, RB = table, RC = byte literal
+ | checktab RB, ->vmeta_tgetb
+ | mov TAB:RB, [BASE+RB*8]
+ | cmp RC, TAB:RB->asize
+ | jae ->vmeta_tgetb
+ | shl RC, 3
+ | add RC, TAB:RB->array
+ | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
+ | je >2
+ |1:
+ | mov RB, [RC] // Get array slot.
+ | mov RC, [RC+4]
+ | mov [BASE+RA*8], RB
+ | mov [BASE+RA*8+4], RC
+ | ins_next
+ |
+ |2: // Check for __index if table value is nil.
+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
+ | jz <1
+ | mov TAB:RA, TAB:RB->metatable
+ | test byte TAB:RA->nomm, 1<<MM_index
+ | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
+ | movzx RA, PC_RA // Restore RA.
+ | jmp <1
+ break;
+
+ case BC_TSETV:
+ | ins_ABC // RA = src, RB = table, RC = key
+ | checktab RB, ->vmeta_tsetv
+ | mov TAB:RB, [BASE+RB*8]
+ |
+ | // Integer key? Convert number to int and back and compare.
+ | checknum RC, >5
+ | fld qword [BASE+RC*8]
+ | fist ARG1
+ | fild ARG1
+ | fcomparepp // eax (RC) modified!
+ | mov RC, ARG1
+ | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
+ | cmp RC, TAB:RB->asize // Takes care of unordered, too.
+ | jae ->vmeta_tsetv
+ | shl RC, 3
+ | add RC, TAB:RB->array
+ | cmp dword [RC+4], LJ_TNIL
+ | je >3 // Previous value is nil?
+ |1:
+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jnz >7
+ |2:
+ | mov RB, [BASE+RA*8+4] // Set array slot.
+ | mov RA, [BASE+RA*8]
+ | mov [RC+4], RB
+ | mov [RC], RA
+ | ins_next
+ |
+ |3: // Check for __newindex if previous value is nil.
+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
+ | jz <1
+ | mov TAB:RA, TAB:RB->metatable
+ | test byte TAB:RA->nomm, 1<<MM_newindex
+ | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
+ | movzx RA, PC_RA // Restore RA.
+ | jmp <1
+ |
+ |5: // String key?
+ | checkstr RC, ->vmeta_tsetv
+ | mov STR:RC, [BASE+RC*8]
+ | jmp ->BC_TSETS_Z
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, RA
+ | movzx RA, PC_RA // Restore RA.
+ | jmp <2
+ break;
+ case BC_TSETS:
+ | ins_ABC // RA = src, RB = table, RC = str const (~)
+ | not RC
+ | mov STR:RC, [KBASE+RC*4]
+ | checktab RB, ->vmeta_tsets
+ | mov TAB:RB, [BASE+RB*8]
+ |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
+ | mov RA, TAB:RB->hmask
+ | and RA, STR:RC->hash
+ | imul RA, #NODE
+ | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
+ | add NODE:RA, TAB:RB->node
+ |1:
+ | cmp dword NODE:RA->key.it, LJ_TSTR
+ | jne >5
+ | cmp dword NODE:RA->key.gcr, STR:RC
+ | jne >5
+ | // Ok, key found. Assumes: offsetof(Node, val) == 0
+ | cmp dword [RA+4], LJ_TNIL
+ | je >4 // Previous value is nil?
+ |2:
+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jnz >7
+ |3:
+ | movzx RC, PC_RA
+ | mov RB, [BASE+RC*8+4] // Set node value.
+ | mov RC, [BASE+RC*8]
+ | mov [RA+4], RB
+ | mov [RA], RC
+ | ins_next
+ |
+ |4: // Check for __newindex if previous value is nil.
+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
+ | jz <2
+ | mov ARG1, RA // Save RA.
+ | mov TAB:RA, TAB:RB->metatable
+ | test byte TAB:RA->nomm, 1<<MM_newindex
+ | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
+ | mov RA, ARG1 // Restore RA.
+ | jmp <2
+ |
+ |5: // Follow hash chain.
+ | mov NODE:RA, NODE:RA->next
+ | test NODE:RA, NODE:RA
+ | jnz <1
+ | // End of hash chain: key not found, add a new one.
+ |
+ | // But check for __newindex first.
+ | mov TAB:RA, TAB:RB->metatable
+ | test TAB:RA, TAB:RA
+ | jz >6 // No metatable: continue.
+ | test byte TAB:RA->nomm, 1<<MM_newindex
+ | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
+ |6:
+ | mov ARG5, STR:RC
+ | mov ARG6, LJ_TSTR
+ | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6.
+ | mov ARG4, TAB:RB // Save TAB:RB for us.
+ | mov ARG2, TAB:RB
+ | mov L:RB, SAVE_L
+ | mov ARG3, RC
+ | mov ARG1, L:RB
+ | mov SAVE_PC, PC
+ | mov L:RB->base, BASE
+ | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
+ | // Handles write barrier for the new key. TValue * returned in eax (RC).
+ | mov BASE, L:RB->base
+ | mov TAB:RB, ARG4 // Need TAB:RB for barrier.
+ | mov RA, eax
+ | jmp <2 // Must check write barrier for value.
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, RC // Destroys STR:RC.
+ | jmp <3
+ break;
+ case BC_TSETB:
+ | ins_ABC // RA = src, RB = table, RC = byte literal
+ | checktab RB, ->vmeta_tsetb
+ | mov TAB:RB, [BASE+RB*8]
+ | cmp RC, TAB:RB->asize
+ | jae ->vmeta_tsetb
+ | shl RC, 3
+ | add RC, TAB:RB->array
+ | cmp dword [RC+4], LJ_TNIL
+ | je >3 // Previous value is nil?
+ |1:
+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jnz >7
+ |2:
+ | mov RB, [BASE+RA*8+4] // Set array slot.
+ | mov RA, [BASE+RA*8]
+ | mov [RC+4], RB
+ | mov [RC], RA
+ | ins_next
+ |
+ |3: // Check for __newindex if previous value is nil.
+ | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
+ | jz <1
+ | mov TAB:RA, TAB:RB->metatable
+ | test byte TAB:RA->nomm, 1<<MM_newindex
+ | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
+ | movzx RA, PC_RA // Restore RA.
+ | jmp <1
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, RA
+ | movzx RA, PC_RA // Restore RA.
+ | jmp <2
+ break;
+
+ case BC_TSETM:
+ | ins_AD // RA = base (table at base-1), RD = num const (start index)
+ | mov ARG5, KBASE // Need one more free register.
+ | fld qword [KBASE+RD*8]
+ | fistp ARG4 // Const is guaranteed to be an int.
+ |1:
+ | lea RA, [BASE+RA*8]
+ | mov TAB:RB, [RA-8] // Guaranteed to be a table.
+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jnz >7
+ |2:
+ | mov RD, NRESULTS
+ | mov KBASE, ARG4
+ | sub RD, 1
+ | jz >4 // Nothing to copy?
+ | add RD, KBASE // Compute needed size.
+ | cmp RD, TAB:RB->asize
+ | jae >5 // Does not fit into array part?
+ | sub RD, KBASE
+ | shl KBASE, 3
+ | add KBASE, TAB:RB->array
+ |3: // Copy result slots to table.
+ | mov RB, [RA]
+ | mov [KBASE], RB
+ | mov RB, [RA+4]
+ | add RA, 8
+ | mov [KBASE+4], RB
+ | add KBASE, 8
+ | sub RD, 1
+ | jnz <3
+ |4:
+ | mov KBASE, ARG5
+ | ins_next
+ |
+ |5: // Need to resize array part.
+ | mov ARG2, TAB:RB
+ | mov L:RB, SAVE_L
+ | mov ARG3, RD
+ | mov ARG1, L:RB
+ | mov SAVE_PC, PC
+ | mov L:RB->base, BASE
+ | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
+ | mov BASE, L:RB->base
+ | movzx RA, PC_RA // Restore RA.
+ | jmp <1 // Retry.
+ |
+ |7: // Possible table write barrier for any value. Skip valiswhite check.
+ | barrierback TAB:RB, RD
+ | jmp <2
+ break;
+
+ /* -- Calls and vararg handling ----------------------------------------- */
+
+ case BC_CALL: case BC_CALLM:
+ | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
+ if (op == BC_CALLM) {
+ | add NARGS:RC, NRESULTS
+ }
+ | lea RA, [BASE+RA*8+8]
+ | mov LFUNC:RB, [RA-8]
+ | cmp dword [RA-4], LJ_TFUNC
+ | jne ->vmeta_call
+ | jmp aword LFUNC:RB->gate
+ break;
+
+ case BC_CALLMT:
+ | ins_AD // RA = base, RD = extra_nargs
+ | add NARGS:RD, NRESULTS
+ | // Fall through. Assumes BC_CALLMT follows and ins_AD is a no-op.
+ break;
+ case BC_CALLT:
+ | ins_AD // RA = base, RD = nargs+1
+ | lea RA, [BASE+RA*8+8]
+ | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
+ | mov LFUNC:RB, [RA-8]
+ | cmp dword [RA-4], LJ_TFUNC
+ | jne ->vmeta_call
+ |->BC_CALLT_Z:
+ | mov PC, [BASE-4]
+ | test PC, FRAME_TYPE
+ | jnz >7
+ |1:
+ | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below.
+ | mov NRESULTS, NARGS:RD
+ | sub NARGS:RD, 1
+ | jz >3
+ |2:
+ | mov RB, [RA] // Move args down.
+ | mov [KBASE], RB
+ | mov RB, [RA+4]
+ | mov [KBASE+4], RB
+ | add KBASE, 8
+ | add RA, 8
+ | sub NARGS:RD, 1
+ | jnz <2
+ |
+ | mov LFUNC:RB, [BASE-8]
+ |3:
+ | mov RA, BASE // BASE is ignored, except when ...
+ | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
+ | ja >5
+ |4:
+ | mov NARGS:RD, NRESULTS
+ | jmp aword LFUNC:RB->gate
+ |
+ |5: // Tailcall to a fast function.
+ | test PC, FRAME_TYPE // Lua frame below?
+ | jnz <4
+ | movzx RD, PC_RA // Need to prepare BASE/KBASE.
+ | not RD
+ | lea BASE, [BASE+RD*8]
+ | mov LFUNC:KBASE, [BASE-8]
+ | mov PROTO:KBASE, LFUNC:KBASE->pt
+ | mov KBASE, PROTO:KBASE->k
+ | jmp <4
+ |
+ |7: // Tailcall from a vararg function.
+ | jnp <1 // Vararg frame below?
+ | and PC, -8
+ | sub BASE, PC // Need to relocate BASE/KBASE down.
+ | mov KBASE, BASE
+ | mov PC, [BASE-4]
+ | jmp <1
+ break;
+
+ case BC_ITERC:
+ | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
+ | lea RA, [BASE+RA*8+8] // fb = base+1
+ | mov RB, [RA-24] // Copy state. fb[0] = fb[-3].
+ | mov RC, [RA-20]
+ | mov [RA], RB
+ | mov [RA+4], RC
+ | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2].
+ | mov RC, [RA-12]
+ | mov [RA+8], RB
+ | mov [RA+12], RC
+ | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4]
+ | mov RC, [RA-28]
+ | mov [RA-8], LFUNC:RB
+ | mov [RA-4], RC
+ | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call.
+ | mov NARGS:RC, 3
+ | jne ->vmeta_call
+ | jmp aword LFUNC:RB->gate
+ break;
+
+ case BC_VARG:
+ | ins_AB_ // RA = base, RB = nresults+1, (RC = 1)
+ | mov LFUNC:RC, [BASE-8]
+ | lea RA, [BASE+RA*8]
+ | mov PROTO:RC, LFUNC:RC->pt
+ | movzx RC, byte PROTO:RC->numparams
+ | mov ARG3, KBASE // Need one more free register.
+ | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)]
+ | sub KBASE, [BASE-4]
+ | // Note: KBASE may now be even _above_ BASE if nargs was < numparams.
+ | test RB, RB
+ | jz >5 // Copy all varargs?
+ | lea RB, [RA+RB*8-8]
+ | cmp KBASE, BASE // No vararg slots?
+ | jnb >2
+ |1: // Copy vararg slots to destination slots.
+ | mov RC, [KBASE-8]
+ | mov [RA], RC
+ | mov RC, [KBASE-4]
+ | add KBASE, 8
+ | mov [RA+4], RC
+ | add RA, 8
+ | cmp RA, RB // All destination slots filled?
+ | jnb >3
+ | cmp KBASE, BASE // No more vararg slots?
+ | jb <1
+ |2: // Fill up remainder with nil.
+ | mov dword [RA+4], LJ_TNIL
+ | add RA, 8
+ | cmp RA, RB
+ | jb <2
+ |3:
+ | mov KBASE, ARG3
+ | ins_next
+ |
+ |5: // Copy all varargs.
+ | mov NRESULTS, 1 // NRESULTS = 0+1
+ | mov RC, BASE
+ | sub RC, KBASE
+ | jbe <3 // No vararg slots?
+ | mov RB, RC
+ | shr RB, 3
+ | mov ARG2, RB // Store this for stack growth below.
+ | add RB, 1
+ | mov NRESULTS, RB // NRESULTS = #varargs+1
+ | mov L:RB, SAVE_L
+ | add RC, RA
+ | cmp RC, L:RB->maxstack
+ | ja >7 // Need to grow stack?
+ |6: // Copy all vararg slots.
+ | mov RC, [KBASE-8]
+ | mov [RA], RC
+ | mov RC, [KBASE-4]
+ | add KBASE, 8
+ | mov [RA+4], RC
+ | add RA, 8
+ | cmp KBASE, BASE // No more vararg slots?
+ | jb <6
+ | jmp <3
+ |
+ |7: // Grow stack for varargs.
+ | mov L:RB->base, BASE
+ | mov L:RB->top, RA
+ | mov SAVE_PC, PC
+ | sub KBASE, BASE // Need delta, because BASE may change.
+ | mov ARG1, L:RB
+ | call extern lj_state_growstack // (lua_State *L, int n)
+ | mov BASE, L:RB->base
+ | mov RA, L:RB->top
+ | add KBASE, BASE
+ | jmp <6
+ break;
+
+ /* -- Returns ----------------------------------------------------------- */
+
+ case BC_RETM:
+ | ins_AD // RA = results, RD = extra_nresults
+ | add RD, NRESULTS // NRESULTS >=1, so RD >=1.
+ | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
+ break;
+
+ case BC_RET: case BC_RET0: case BC_RET1:
+ | ins_AD // RA = results, RD = nresults+1
+ if (op != BC_RET0) {
+ | shl RA, 3
+ }
+ |1:
+ | mov PC, [BASE-4]
+ | mov NRESULTS, RD // Save nresults+1.
+ | test PC, FRAME_TYPE // Check frame type marker.
+ | jnz >7 // Not returning to a fixarg Lua func?
+ switch (op) {
+ case BC_RET:
+ |->BC_RET_Z:
+ | mov KBASE, BASE // Use KBASE for result move.
+ | sub RD, 1
+ | jz >3
+ |2:
+ | mov RB, [KBASE+RA] // Move results down.
+ | mov [KBASE-8], RB
+ | mov RB, [KBASE+RA+4]
+ | mov [KBASE-4], RB
+ | add KBASE, 8
+ | sub RD, 1
+ | jnz <2
+ |3:
+ | mov RD, NRESULTS // Note: NRESULTS may be >255.
+ | movzx RB, PC_RB // So cannot compare with RDL!
+ |5:
+ | cmp RB, RD // More results expected?
+ | ja >6
+ break;
+ case BC_RET1:
+ | mov RB, [BASE+RA+4]
+ | mov [BASE-4], RB
+ | mov RB, [BASE+RA]
+ | mov [BASE-8], RB
+ /* fallthrough */
+ case BC_RET0:
+ |5:
+ | cmp PC_RB, RDL // More results expected?
+ | ja >6
+ default:
+ break;
+ }
+ | movzx RA, PC_RA
+ | not RA // Note: ~RA = -(RA+1)
+ | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
+ | mov LFUNC:KBASE, [BASE-8]
+ | mov PROTO:KBASE, LFUNC:KBASE->pt
+ | mov KBASE, PROTO:KBASE->k
+ | ins_next
+ |
+ |6: // Fill up results with nil.
+ if (op == BC_RET) {
+ | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base.
+ | add KBASE, 8
+ } else {
+ | mov dword [BASE+RD*8-12], LJ_TNIL
+ }
+ | add RD, 1
+ | jmp <5
+ |
+ |7: // Non-standard return case.
+ | jnp ->vm_return
+ | // Return from vararg function: relocate BASE down and RA up.
+ | and PC, -8
+ | sub BASE, PC
+ if (op != BC_RET0) {
+ | add RA, PC
+ }
+ | jmp <1
+ break;
+
+ /* -- Loops and branches ------------------------------------------------ */
+
+ |.define FOR_IDX, qword [RA]; .define FOR_TIDX, dword [RA+4]
+ |.define FOR_STOP, qword [RA+8]; .define FOR_TSTOP, dword [RA+12]
+ |.define FOR_STEP, qword [RA+16]; .define FOR_TSTEP, dword [RA+20]
+ |.define FOR_EXT, qword [RA+24]; .define FOR_TEXT, dword [RA+28]
+
+ case BC_FORL:
+#if LJ_HASJIT
+ | hotloop RB
+#endif
+ | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
+ break;
+
+ case BC_JFORI:
+ case BC_JFORL:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_FORI:
+ case BC_IFORL:
+ vk = (op == BC_IFORL || op == BC_JFORL);
+ | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
+ | lea RA, [BASE+RA*8]
+ if (!vk) {
+ | cmp FOR_TIDX, LJ_TISNUM; ja ->vmeta_for // Type checks
+ | cmp FOR_TSTOP, LJ_TISNUM; ja ->vmeta_for
+ }
+ | mov RB, FOR_TSTEP // Load type/hiword of for step.
+ if (!vk) {
+ | cmp RB, LJ_TISNUM; ja ->vmeta_for
+ }
+ | fld FOR_STOP
+ | fld FOR_IDX
+ if (vk) {
+ | fadd FOR_STEP // nidx = idx + step
+ | fst FOR_IDX
+ }
+ | fst FOR_EXT
+ | test RB, RB // Swap lim/(n)idx if step non-negative.
+ | js >1
+ | fxch
+ |1:
+ | fcomparepp // eax (RD) modified if !cmov.
+ if (!cmov) {
+ | movzx RD, PC_RD // Need to reload RD.
+ }
+ if (op == BC_FORI) {
+ | jnb >2
+ | branchPC RD
+ } else if (op == BC_JFORI) {
+ | branchPC RD
+ | movzx RD, PC_RD
+ | jnb =>BC_JLOOP
+ } else if (op == BC_IFORL) {
+ | jb >2
+ | branchPC RD
+ } else {
+ | jnb =>BC_JLOOP
+ }
+ |2:
+ | ins_next
+ break;
+
+ case BC_ITERL:
+#if LJ_HASJIT
+ | hotloop RB
+#endif
+ | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
+ break;
+
+ case BC_JITERL:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_IITERL:
+ | ins_AJ // RA = base, RD = target
+ | lea RA, [BASE+RA*8]
+ | mov RB, [RA+4]
+ | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
+ if (op == BC_JITERL) {
+ | mov [RA-4], RB
+ | mov RB, [RA]
+ | mov [RA-8], RB
+ | jmp =>BC_JLOOP
+ } else {
+ | branchPC RD // Otherwise save control var + branch.
+ | mov RD, [RA]
+ | mov [RA-4], RB
+ | mov [RA-8], RD
+ }
+ |1:
+ | ins_next
+ break;
+
+ case BC_LOOP:
+ | ins_A // RA = base, RD = target (loop extent)
+ | // Note: RA/RD is only used by trace recorder to determine scope/extent
+ | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
+#if LJ_HASJIT
+ | hotloop RB
+#endif
+ | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
+ break;
+
+ case BC_ILOOP:
+ | ins_A // RA = base, RD = target (loop extent)
+ | ins_next
+ break;
+
+ case BC_JLOOP:
+#if LJ_HASJIT
+ | ins_AD // RA = base (ignored), RD = traceno
+ | mov RA, [DISPATCH+DISPATCH_J(trace)]
+ | mov TRACE:RD, [RA+RD*4]
+ | mov RD, TRACE:RD->mcode
+ | mov L:RB, SAVE_L
+ | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
+ | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB
+ | jmp RD
+#endif
+ break;
+
+ case BC_JMP:
+ | ins_AJ // RA = unused, RD = target
+ | branchPC RD
+ | ins_next
+ break;
+
+ /* ---------------------------------------------------------------------- */
+
+ default:
+ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
+ exit(2);
+ break;
+ }
+}
+
+static int build_backend(BuildCtx *ctx)
+{
+ int op;
+ int cmov = 1;
+#ifdef LUAJIT_CPU_NOCMOV
+ cmov = 0;
+#endif
+
+ dasm_growpc(Dst, BC__MAX);
+
+ build_subroutines(ctx, cmov);
+
+ |.code_op
+ for (op = 0; op < BC__MAX; op++)
+ build_ins(ctx, (BCOp)op, op, cmov);
+
+ return BC__MAX;
+}
+
+/* Emit pseudo frame-info for all assembler functions. */
+static void emit_asm_debug(BuildCtx *ctx)
+{
+ switch (ctx->mode) {
+ case BUILD_elfasm:
+ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
+ fprintf(ctx->fp,
+ ".Lframe0:\n"
+ "\t.long .LECIE0-.LSCIE0\n"
+ ".LSCIE0:\n"
+ "\t.long 0xffffffff\n"
+ "\t.byte 0x1\n"
+ "\t.string \"\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -4\n"
+ "\t.byte 0x8\n"
+ "\t.byte 0xc\n\t.uleb128 0x4\n\t.uleb128 0x4\n"
+ "\t.byte 0x88\n\t.uleb128 0x1\n"
+ "\t.align 4\n"
+ ".LECIE0:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE0:\n"
+ "\t.long .LEFDE0-.LASFDE0\n"
+ ".LASFDE0:\n"
+ "\t.long .Lframe0\n"
+ "\t.long .Lbegin\n"
+ "\t.long %d\n"
+ "\t.byte 0xe\n\t.uleb128 0x30\n" /* def_cfa_offset */
+ "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
+ "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
+ "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
+ "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
+ "\t.align 4\n"
+ ".LEFDE0:\n\n", (int)ctx->codesz);
+ break;
+ default: /* Difficult for other modes. */
+ break;
+ }
+}
+
diff --git a/src/lauxlib.h b/src/lauxlib.h
new file mode 100644
index 00000000..505a9f52
--- /dev/null
+++ b/src/lauxlib.h
@@ -0,0 +1,159 @@
+/*
+** $Id: lauxlib.h,v 1.88.1.1 2007/12/27 13:02:25 roberto Exp $
+** Auxiliary functions for building Lua libraries
+** See Copyright Notice in lua.h
+*/
+
+
+#ifndef lauxlib_h
+#define lauxlib_h
+
+
+#include <stddef.h>
+#include <stdio.h>
+
+#include "lua.h"
+
+
+#define luaL_getn(L,i) ((int)lua_objlen(L, i))
+#define luaL_setn(L,i,j) ((void)0) /* no op! */
+
+/* extra error code for `luaL_load' */
+#define LUA_ERRFILE (LUA_ERRERR+1)
+
+typedef struct luaL_Reg {
+ const char *name;
+ lua_CFunction func;
+} luaL_Reg;
+
+LUALIB_API void (luaL_openlib) (lua_State *L, const char *libname,
+ const luaL_Reg *l, int nup);
+LUALIB_API void (luaL_register) (lua_State *L, const char *libname,
+ const luaL_Reg *l);
+LUALIB_API int (luaL_getmetafield) (lua_State *L, int obj, const char *e);
+LUALIB_API int (luaL_callmeta) (lua_State *L, int obj, const char *e);
+LUALIB_API int (luaL_typerror) (lua_State *L, int narg, const char *tname);
+LUALIB_API int (luaL_argerror) (lua_State *L, int numarg, const char *extramsg);
+LUALIB_API const char *(luaL_checklstring) (lua_State *L, int numArg,
+ size_t *l);
+LUALIB_API const char *(luaL_optlstring) (lua_State *L, int numArg,
+ const char *def, size_t *l);
+LUALIB_API lua_Number (luaL_checknumber) (lua_State *L, int numArg);
+LUALIB_API lua_Number (luaL_optnumber) (lua_State *L, int nArg, lua_Number def);
+
+LUALIB_API lua_Integer (luaL_checkinteger) (lua_State *L, int numArg);
+LUALIB_API lua_Integer (luaL_optinteger) (lua_State *L, int nArg,
+ lua_Integer def);
+
+LUALIB_API void (luaL_checkstack) (lua_State *L, int sz, const char *msg);
+LUALIB_API void (luaL_checktype) (lua_State *L, int narg, int t);
+LUALIB_API void (luaL_checkany) (lua_State *L, int narg);
+
+LUALIB_API int (luaL_newmetatable) (lua_State *L, const char *tname);
+LUALIB_API void *(luaL_checkudata) (lua_State *L, int ud, const char *tname);
+
+LUALIB_API void (luaL_where) (lua_State *L, int lvl);
+LUALIB_API int (luaL_error) (lua_State *L, const char *fmt, ...);
+
+LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def,
+ const char *const lst[]);
+
+LUALIB_API int (luaL_ref) (lua_State *L, int t);
+LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref);
+
+LUALIB_API int (luaL_loadfile) (lua_State *L, const char *filename);
+LUALIB_API int (luaL_loadbuffer) (lua_State *L, const char *buff, size_t sz,
+ const char *name);
+LUALIB_API int (luaL_loadstring) (lua_State *L, const char *s);
+
+LUALIB_API lua_State *(luaL_newstate) (void);
+
+
+LUALIB_API const char *(luaL_gsub) (lua_State *L, const char *s, const char *p,
+ const char *r);
+
+LUALIB_API const char *(luaL_findtable) (lua_State *L, int idx,
+ const char *fname, int szhint);
+
+
+
+
+/*
+** ===============================================================
+** some useful macros
+** ===============================================================
+*/
+
+#define luaL_argcheck(L, cond,numarg,extramsg) \
+ ((void)((cond) || luaL_argerror(L, (numarg), (extramsg))))
+#define luaL_checkstring(L,n) (luaL_checklstring(L, (n), NULL))
+#define luaL_optstring(L,n,d) (luaL_optlstring(L, (n), (d), NULL))
+#define luaL_checkint(L,n) ((int)luaL_checkinteger(L, (n)))
+#define luaL_optint(L,n,d) ((int)luaL_optinteger(L, (n), (d)))
+#define luaL_checklong(L,n) ((long)luaL_checkinteger(L, (n)))
+#define luaL_optlong(L,n,d) ((long)luaL_optinteger(L, (n), (d)))
+
+#define luaL_typename(L,i) lua_typename(L, lua_type(L,(i)))
+
+#define luaL_dofile(L, fn) \
+ (luaL_loadfile(L, fn) || lua_pcall(L, 0, LUA_MULTRET, 0))
+
+#define luaL_dostring(L, s) \
+ (luaL_loadstring(L, s) || lua_pcall(L, 0, LUA_MULTRET, 0))
+
+#define luaL_getmetatable(L,n) (lua_getfield(L, LUA_REGISTRYINDEX, (n)))
+
+#define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n)))
+
+/*
+** {======================================================
+** Generic Buffer manipulation
+** =======================================================
+*/
+
+
+
+typedef struct luaL_Buffer {
+ char *p; /* current position in buffer */
+ int lvl; /* number of strings in the stack (level) */
+ lua_State *L;
+ char buffer[LUAL_BUFFERSIZE];
+} luaL_Buffer;
+
+#define luaL_addchar(B,c) \
+ ((void)((B)->p < ((B)->buffer+LUAL_BUFFERSIZE) || luaL_prepbuffer(B)), \
+ (*(B)->p++ = (char)(c)))
+
+/* compatibility only */
+#define luaL_putchar(B,c) luaL_addchar(B,c)
+
+#define luaL_addsize(B,n) ((B)->p += (n))
+
+LUALIB_API void (luaL_buffinit) (lua_State *L, luaL_Buffer *B);
+LUALIB_API char *(luaL_prepbuffer) (luaL_Buffer *B);
+LUALIB_API void (luaL_addlstring) (luaL_Buffer *B, const char *s, size_t l);
+LUALIB_API void (luaL_addstring) (luaL_Buffer *B, const char *s);
+LUALIB_API void (luaL_addvalue) (luaL_Buffer *B);
+LUALIB_API void (luaL_pushresult) (luaL_Buffer *B);
+
+
+/* }====================================================== */
+
+
+/* compatibility with ref system */
+
+/* pre-defined references */
+#define LUA_NOREF (-2)
+#define LUA_REFNIL (-1)
+
+#define lua_ref(L,lock) ((lock) ? luaL_ref(L, LUA_REGISTRYINDEX) : \
+ (lua_pushstring(L, "unlocked references are obsolete"), lua_error(L), 0))
+
+#define lua_unref(L,ref) luaL_unref(L, LUA_REGISTRYINDEX, (ref))
+
+#define lua_getref(L,ref) lua_rawgeti(L, LUA_REGISTRYINDEX, (ref))
+
+
+#define luaL_reg luaL_Reg
+
+#endif
diff --git a/src/lib_aux.c b/src/lib_aux.c
new file mode 100644
index 00000000..1ae32dbc
--- /dev/null
+++ b/src/lib_aux.c
@@ -0,0 +1,438 @@
+/*
+** Auxiliary library for the Lua/C API.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major parts taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+#define lib_aux_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_lib.h"
+
+/* convert a stack index to positive */
+#define abs_index(L, i) \
+ ((i) > 0 || (i) <= LUA_REGISTRYINDEX ? (i) : lua_gettop(L) + (i) + 1)
+
+/* -- Type checks --------------------------------------------------------- */
+
+LUALIB_API void luaL_checkstack(lua_State *L, int size, const char *msg)
+{
+ if (!lua_checkstack(L, size))
+ lj_err_callerv(L, LJ_ERR_STKOVM, msg);
+}
+
+LUALIB_API void luaL_checktype(lua_State *L, int narg, int tt)
+{
+ if (lua_type(L, narg) != tt)
+ lj_err_argt(L, narg, tt);
+}
+
+LUALIB_API void luaL_checkany(lua_State *L, int narg)
+{
+ lj_lib_checkany(L, narg);
+}
+
+LUALIB_API const char *luaL_checklstring(lua_State *L, int narg, size_t *len)
+{
+ GCstr *s = lj_lib_checkstr(L, narg);
+ if (len != NULL) *len = s->len;
+ return strdata(s);
+}
+
+LUALIB_API const char *luaL_optlstring(lua_State *L, int narg,
+ const char *def, size_t *len)
+{
+ GCstr *s = lj_lib_optstr(L, narg);
+ if (s) {
+ if (len != NULL) *len = s->len;
+ return strdata(s);
+ }
+ if (len != NULL) *len = def ? strlen(def) : 0;
+ return def;
+}
+
+LUALIB_API lua_Number luaL_checknumber(lua_State *L, int narg)
+{
+ return lj_lib_checknum(L, narg);
+}
+
+LUALIB_API lua_Number luaL_optnumber(lua_State *L, int narg, lua_Number def)
+{
+ lj_lib_opt(L, narg,
+ return lj_lib_checknum(L, narg);
+ ,
+ return def;
+ )
+}
+
+LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int narg)
+{
+#if LJ_64
+ return (lua_Integer)lj_lib_checknum(L, narg);
+#else
+ return lj_lib_checkint(L, narg);
+#endif
+}
+
+LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int narg, lua_Integer def)
+{
+#if LJ_64
+ lj_lib_opt(L, narg,
+ return (lua_Integer)lj_lib_checknum(L, narg);
+ ,
+ return def;
+ )
+#else
+ return lj_lib_optint(L, narg, def);
+#endif
+}
+
+LUALIB_API int luaL_checkoption(lua_State *L, int narg, const char *def,
+ const char *const lst[])
+{
+ GCstr *s = lj_lib_optstr(L, narg);
+ const char *opt = s ? strdata(s) : def;
+ uint32_t i;
+ if (!opt) lj_err_argt(L, narg, LUA_TSTRING);
+ for (i = 0; lst[i]; i++)
+ if (strcmp(lst[i], opt) == 0)
+ return (int)i;
+ lj_err_argv(L, narg, LJ_ERR_INVOPTM, opt);
+}
+
+/* -- Module registration ------------------------------------------------- */
+
+LUALIB_API const char *luaL_findtable(lua_State *L, int idx,
+ const char *fname, int szhint)
+{
+ const char *e;
+ lua_pushvalue(L, idx);
+ do {
+ e = strchr(fname, '.');
+ if (e == NULL) e = fname + strlen(fname);
+ lua_pushlstring(L, fname, (size_t)(e - fname));
+ lua_rawget(L, -2);
+ if (lua_isnil(L, -1)) { /* no such field? */
+ lua_pop(L, 1); /* remove this nil */
+ lua_createtable(L, 0, (*e == '.' ? 1 : szhint)); /* new table for field */
+ lua_pushlstring(L, fname, (size_t)(e - fname));
+ lua_pushvalue(L, -2);
+ lua_settable(L, -4); /* set new table into field */
+ } else if (!lua_istable(L, -1)) { /* field has a non-table value? */
+ lua_pop(L, 2); /* remove table and value */
+ return fname; /* return problematic part of the name */
+ }
+ lua_remove(L, -2); /* remove previous table */
+ fname = e + 1;
+ } while (*e == '.');
+ return NULL;
+}
+
+static int libsize(const luaL_Reg *l)
+{
+ int size = 0;
+ for (; l->name; l++) size++;
+ return size;
+}
+
+LUALIB_API void luaL_openlib(lua_State *L, const char *libname,
+ const luaL_Reg *l, int nup)
+{
+ if (libname) {
+ int size = libsize(l);
+ /* check whether lib already exists */
+ luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
+ lua_getfield(L, -1, libname); /* get _LOADED[libname] */
+ if (!lua_istable(L, -1)) { /* not found? */
+ lua_pop(L, 1); /* remove previous result */
+ /* try global variable (and create one if it does not exist) */
+ if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, size) != NULL)
+ lj_err_callerv(L, LJ_ERR_BADMODN, libname);
+ lua_pushvalue(L, -1);
+ lua_setfield(L, -3, libname); /* _LOADED[libname] = new table */
+ }
+ lua_remove(L, -2); /* remove _LOADED table */
+ lua_insert(L, -(nup+1)); /* move library table to below upvalues */
+ }
+ for (; l->name; l++) {
+ int i;
+ for (i = 0; i < nup; i++) /* copy upvalues to the top */
+ lua_pushvalue(L, -nup);
+ lua_pushcclosure(L, l->func, nup);
+ lua_setfield(L, -(nup+2), l->name);
+ }
+ lua_pop(L, nup); /* remove upvalues */
+}
+
+LUALIB_API void luaL_register(lua_State *L, const char *libname,
+ const luaL_Reg *l)
+{
+ luaL_openlib(L, libname, l, 0);
+}
+
+LUALIB_API const char *luaL_gsub(lua_State *L, const char *s,
+ const char *p, const char *r)
+{
+ const char *wild;
+ size_t l = strlen(p);
+ luaL_Buffer b;
+ luaL_buffinit(L, &b);
+ while ((wild = strstr(s, p)) != NULL) {
+ luaL_addlstring(&b, s, (size_t)(wild - s)); /* push prefix */
+ luaL_addstring(&b, r); /* push replacement in place of pattern */
+ s = wild + l; /* continue after `p' */
+ }
+ luaL_addstring(&b, s); /* push last suffix */
+ luaL_pushresult(&b);
+ return lua_tostring(L, -1);
+}
+
+/* -- Buffer handling ----------------------------------------------------- */
+
+#define bufflen(B) ((size_t)((B)->p - (B)->buffer))
+#define bufffree(B) ((size_t)(LUAL_BUFFERSIZE - bufflen(B)))
+
+static int emptybuffer(luaL_Buffer *B)
+{
+ size_t l = bufflen(B);
+ if (l == 0)
+ return 0; /* put nothing on stack */
+ lua_pushlstring(B->L, B->buffer, l);
+ B->p = B->buffer;
+ B->lvl++;
+ return 1;
+}
+
+static void adjuststack(luaL_Buffer *B)
+{
+ if (B->lvl > 1) {
+ lua_State *L = B->L;
+ int toget = 1; /* number of levels to concat */
+ size_t toplen = lua_strlen(L, -1);
+ do {
+ size_t l = lua_strlen(L, -(toget+1));
+ if (!(B->lvl - toget + 1 >= LUA_MINSTACK/2 || toplen > l))
+ break;
+ toplen += l;
+ toget++;
+ } while (toget < B->lvl);
+ lua_concat(L, toget);
+ B->lvl = B->lvl - toget + 1;
+ }
+}
+
+LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B)
+{
+ if (emptybuffer(B))
+ adjuststack(B);
+ return B->buffer;
+}
+
+LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l)
+{
+ while (l--)
+ luaL_addchar(B, *s++);
+}
+
+LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s)
+{
+ luaL_addlstring(B, s, strlen(s));
+}
+
+LUALIB_API void luaL_pushresult(luaL_Buffer *B)
+{
+ emptybuffer(B);
+ lua_concat(B->L, B->lvl);
+ B->lvl = 1;
+}
+
+LUALIB_API void luaL_addvalue(luaL_Buffer *B)
+{
+ lua_State *L = B->L;
+ size_t vl;
+ const char *s = lua_tolstring(L, -1, &vl);
+ if (vl <= bufffree(B)) { /* fit into buffer? */
+ memcpy(B->p, s, vl); /* put it there */
+ B->p += vl;
+ lua_pop(L, 1); /* remove from stack */
+ } else {
+ if (emptybuffer(B))
+ lua_insert(L, -2); /* put buffer before new value */
+ B->lvl++; /* add new value into B stack */
+ adjuststack(B);
+ }
+}
+
+LUALIB_API void luaL_buffinit(lua_State *L, luaL_Buffer *B)
+{
+ B->L = L;
+ B->p = B->buffer;
+ B->lvl = 0;
+}
+
+/* -- Reference management ------------------------------------------------ */
+
+#define FREELIST_REF 0
+
+LUALIB_API int luaL_ref(lua_State *L, int t)
+{
+ int ref;
+ t = abs_index(L, t);
+ if (lua_isnil(L, -1)) {
+ lua_pop(L, 1); /* remove from stack */
+ return LUA_REFNIL; /* `nil' has a unique fixed reference */
+ }
+ lua_rawgeti(L, t, FREELIST_REF); /* get first free element */
+ ref = (int)lua_tointeger(L, -1); /* ref = t[FREELIST_REF] */
+ lua_pop(L, 1); /* remove it from stack */
+ if (ref != 0) { /* any free element? */
+ lua_rawgeti(L, t, ref); /* remove it from list */
+ lua_rawseti(L, t, FREELIST_REF); /* (t[FREELIST_REF] = t[ref]) */
+ } else { /* no free elements */
+ ref = (int)lua_objlen(L, t);
+ ref++; /* create new reference */
+ }
+ lua_rawseti(L, t, ref);
+ return ref;
+}
+
+LUALIB_API void luaL_unref(lua_State *L, int t, int ref)
+{
+ if (ref >= 0) {
+ t = abs_index(L, t);
+ lua_rawgeti(L, t, FREELIST_REF);
+ lua_rawseti(L, t, ref); /* t[ref] = t[FREELIST_REF] */
+ lua_pushinteger(L, ref);
+ lua_rawseti(L, t, FREELIST_REF); /* t[FREELIST_REF] = ref */
+ }
+}
+
+/* -- Load Lua code ------------------------------------------------------- */
+
+typedef struct FileReaderCtx {
+ FILE *fp;
+ char buf[LUAL_BUFFERSIZE];
+} FileReaderCtx;
+
+static const char *reader_file(lua_State *L, void *ud, size_t *size)
+{
+ FileReaderCtx *ctx = (FileReaderCtx *)ud;
+ UNUSED(L);
+ if (feof(ctx->fp)) return NULL;
+ *size = fread(ctx->buf, 1, sizeof(ctx->buf), ctx->fp);
+ return *size > 0 ? ctx->buf : NULL;
+}
+
+LUALIB_API int luaL_loadfile(lua_State *L, const char *filename)
+{
+ FileReaderCtx ctx;
+ int status;
+ const char *chunkname;
+ if (filename) {
+ ctx.fp = fopen(filename, "r");
+ if (ctx.fp == NULL) {
+ lua_pushfstring(L, "cannot open %s: %s", filename, strerror(errno));
+ return LUA_ERRFILE;
+ }
+ chunkname = lua_pushfstring(L, "@%s", filename);
+ } else {
+ ctx.fp = stdin;
+ chunkname = "=stdin";
+ }
+ status = lua_load(L, reader_file, &ctx, chunkname);
+ if (ferror(ctx.fp)) {
+ L->top -= filename ? 2 : 1;
+ lua_pushfstring(L, "cannot read %s: %s", chunkname+1, strerror(errno));
+ if (filename)
+ fclose(ctx.fp);
+ return LUA_ERRFILE;
+ }
+ if (filename) {
+ L->top--;
+ copyTV(L, L->top-1, L->top);
+ fclose(ctx.fp);
+ }
+ return status;
+}
+
+typedef struct StringReaderCtx {
+ const char *str;
+ size_t size;
+} StringReaderCtx;
+
+static const char *reader_string(lua_State *L, void *ud, size_t *size)
+{
+ StringReaderCtx *ctx = (StringReaderCtx *)ud;
+ UNUSED(L);
+ if (ctx->size == 0) return NULL;
+ *size = ctx->size;
+ ctx->size = 0;
+ return ctx->str;
+}
+
+LUALIB_API int luaL_loadbuffer(lua_State *L, const char *buf, size_t size,
+ const char *name)
+{
+ StringReaderCtx ctx;
+ ctx.str = buf;
+ ctx.size = size;
+ return lua_load(L, reader_string, &ctx, name);
+}
+
+LUALIB_API int luaL_loadstring(lua_State *L, const char *s)
+{
+ return luaL_loadbuffer(L, s, strlen(s), s);
+}
+
+/* -- Default allocator and panic function -------------------------------- */
+
+#ifdef LUAJIT_USE_SYSMALLOC
+
+static void *mem_alloc(void *ud, void *ptr, size_t osize, size_t nsize)
+{
+ (void)ud;
+ (void)osize;
+ if (nsize == 0) {
+ free(ptr);
+ return NULL;
+ } else {
+ return realloc(ptr, nsize);
+ }
+}
+
+#define mem_create() NULL
+
+#else
+
+#include "lj_alloc.h"
+
+#define mem_alloc lj_alloc_f
+#define mem_create lj_alloc_create
+
+#endif
+
+static int panic(lua_State *L)
+{
+ fprintf(stderr, "PANIC: unprotected error in call to Lua API (%s)\n",
+ lua_tostring(L, -1));
+ return 0;
+}
+
+LUALIB_API lua_State *luaL_newstate(void)
+{
+ lua_State *L = lua_newstate(mem_alloc, mem_create());
+ if (L) G(L)->panic = panic;
+ return L;
+}
+
diff --git a/src/lib_base.c b/src/lib_base.c
new file mode 100644
index 00000000..6b9e8eef
--- /dev/null
+++ b/src/lib_base.c
@@ -0,0 +1,560 @@
+/*
+** Base and coroutine library.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#include <stdio.h>
+
+#define lib_base_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_meta.h"
+#include "lj_state.h"
+#include "lj_ff.h"
+#include "lj_ctype.h"
+#include "lj_lib.h"
+
+/* -- Base library: checks ------------------------------------------------ */
+
+#define LJLIB_MODULE_base
+
+LJLIB_ASM(assert) LJLIB_REC(.)
+{
+ GCstr *s;
+ lj_lib_checkany(L, 1);
+ s = lj_lib_optstr(L, 2);
+ if (s)
+ lj_err_callermsg(L, strdata(s));
+ else
+ lj_err_caller(L, LJ_ERR_ASSERT);
+ return FFH_UNREACHABLE;
+}
+
+/* ORDER LJ_T */
+LJLIB_PUSH("nil")
+LJLIB_PUSH("boolean")
+LJLIB_PUSH(top-1) /* boolean */
+LJLIB_PUSH("userdata")
+LJLIB_PUSH("string")
+LJLIB_PUSH("upval")
+LJLIB_PUSH("thread")
+LJLIB_PUSH("proto")
+LJLIB_PUSH("function")
+LJLIB_PUSH("deadkey")
+LJLIB_PUSH("table")
+LJLIB_PUSH(top-8) /* userdata */
+LJLIB_PUSH("number")
+LJLIB_ASM_(type) LJLIB_REC(.)
+/* Recycle the lj_lib_checkany(L, 1) from assert. */
+
+/* -- Base library: getters and setters ----------------------------------- */
+
+LJLIB_ASM_(getmetatable) LJLIB_REC(.)
+/* Recycle the lj_lib_checkany(L, 1) from assert. */
+
+LJLIB_ASM(setmetatable) LJLIB_REC(.)
+{
+ GCtab *t = lj_lib_checktab(L, 1);
+ GCtab *mt = lj_lib_checktabornil(L, 2);
+ if (!tvisnil(lj_meta_lookup(L, L->base, MM_metatable)))
+ lj_err_caller(L, LJ_ERR_PROTMT);
+ setgcref(t->metatable, obj2gco(mt));
+ if (mt) { lj_gc_objbarriert(L, t, mt); }
+ settabV(L, L->base-1, t);
+ return FFH_RES(1);
+}
+
+LJLIB_CF(getfenv)
+{
+ GCfunc *fn;
+ cTValue *o = L->base;
+ if (!(o < L->top && tvisfunc(o))) {
+ int level = lj_lib_optint(L, 1, 1);
+ o = lj_err_getframe(L, level, &level);
+ if (o == NULL)
+ lj_err_arg(L, 1, LJ_ERR_INVLVL);
+ }
+ fn = &gcval(o)->fn;
+ settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env));
+ return 1;
+}
+
+LJLIB_CF(setfenv)
+{
+ GCfunc *fn;
+ GCtab *t = lj_lib_checktab(L, 2);
+ cTValue *o = L->base;
+ if (!(o < L->top && tvisfunc(o))) {
+ int level = lj_lib_checkint(L, 1);
+ if (level == 0) {
+ /* NOBARRIER: A thread (i.e. L) is never black. */
+ setgcref(L->env, obj2gco(t));
+ return 0;
+ }
+ o = lj_err_getframe(L, level, &level);
+ if (o == NULL)
+ lj_err_arg(L, 1, LJ_ERR_INVLVL);
+ }
+ fn = &gcval(o)->fn;
+ if (!isluafunc(fn))
+ lj_err_caller(L, LJ_ERR_SETFENV);
+ setgcref(fn->l.env, obj2gco(t));
+ lj_gc_objbarrier(L, obj2gco(fn), t);
+ setfuncV(L, L->top++, fn);
+ return 1;
+}
+
+LJLIB_ASM(rawget) LJLIB_REC(.)
+{
+ lj_lib_checktab(L, 1);
+ lj_lib_checkany(L, 2);
+ return FFH_UNREACHABLE;
+}
+
+LJLIB_CF(rawset) LJLIB_REC(.)
+{
+ lj_lib_checktab(L, 1);
+ lj_lib_checkany(L, 2);
+ L->top = 1+lj_lib_checkany(L, 3);
+ lua_rawset(L, 1);
+ return 1;
+}
+
+LJLIB_CF(rawequal) LJLIB_REC(.)
+{
+ cTValue *o1 = lj_lib_checkany(L, 1);
+ cTValue *o2 = lj_lib_checkany(L, 2);
+ setboolV(L->top-1, lj_obj_equal(o1, o2));
+ return 1;
+}
+
+LJLIB_CF(unpack)
+{
+ GCtab *t = lj_lib_checktab(L, 1);
+ int32_t n, i = lj_lib_optint(L, 2, 1);
+ int32_t e = (L->base+3-1 < L->top && !tvisnil(L->base+3-1)) ?
+ lj_lib_checkint(L, 3) : (int32_t)lj_tab_len(t);
+ if (i > e) return 0;
+ n = e - i + 1;
+ if (n <= 0 || !lua_checkstack(L, n))
+ lj_err_caller(L, LJ_ERR_UNPACK);
+ do {
+ cTValue *tv = lj_tab_getint(t, i);
+ if (tv) {
+ copyTV(L, L->top++, tv);
+ } else {
+ setnilV(L->top++);
+ }
+ } while (i++ < e);
+ return n;
+}
+
+LJLIB_CF(select)
+{
+ int32_t n = (int32_t)(L->top - L->base);
+ if (n >= 1 && tvisstr(L->base) && *strVdata(L->base) == '#') {
+ setintV(L->top-1, n-1);
+ return 1;
+ } else {
+ int32_t i = lj_lib_checkint(L, 1);
+ if (i < 0) i = n + i; else if (i > n) i = n;
+ if (i < 1)
+ lj_err_arg(L, 1, LJ_ERR_IDXRNG);
+ return n - i;
+ }
+}
+
+/* -- Base library: conversions ------------------------------------------- */
+
+LJLIB_ASM(tonumber) LJLIB_REC(.)
+{
+ int32_t base = lj_lib_optint(L, 2, 10);
+ if (base == 10) {
+ TValue *o = lj_lib_checkany(L, 1);
+ if (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o))) {
+ setnumV(L->base-1, numV(o));
+ return FFH_RES(1);
+ }
+ } else {
+ const char *p = strdata(lj_lib_checkstr(L, 1));
+ char *ep;
+ unsigned long ul;
+ if (base < 2 || base > 36)
+ lj_err_arg(L, 2, LJ_ERR_BASERNG);
+ ul = strtoul(p, &ep, base);
+ if (p != ep) {
+ while (lj_ctype_isspace((unsigned char)(*ep))) ep++;
+ if (*ep == '\0') {
+ setnumV(L->base-1, cast_num(ul));
+ return FFH_RES(1);
+ }
+ }
+ }
+ setnilV(L->base-1);
+ return FFH_RES(1);
+}
+
+LJLIB_ASM(tostring) LJLIB_REC(.)
+{
+ TValue *o = lj_lib_checkany(L, 1);
+ cTValue *mo;
+ L->top = o+1; /* Only keep one argument. */
+ if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
+ copyTV(L, L->base-1, mo); /* Replace callable. */
+ return FFH_RETRY;
+ } else {
+ GCstr *s;
+ if (tvisnum(o)) {
+ s = lj_str_fromnum(L, &o->n);
+ } else if (tvisnil(o)) {
+ s = lj_str_newlit(L, "nil");
+ } else if (tvisfalse(o)) {
+ s = lj_str_newlit(L, "false");
+ } else if (tvistrue(o)) {
+ s = lj_str_newlit(L, "true");
+ } else {
+ if (tvisfunc(o) && isffunc(funcV(o)))
+ lua_pushfstring(L, "function: fast#%d", funcV(o)->c.ffid);
+ else
+ lua_pushfstring(L, "%s: %p", typename(o), lua_topointer(L, 1));
+ /* Note: lua_pushfstring calls the GC which may invalidate o. */
+ s = strV(L->top-1);
+ }
+ setstrV(L, L->base-1, s);
+ return FFH_RES(1);
+ }
+}
+
+/* -- Base library: iterators --------------------------------------------- */
+
+LJLIB_ASM(next)
+{
+ lj_lib_checktab(L, 1);
+ lj_lib_checknum(L, 2); /* For ipairs_aux. */
+ return FFH_UNREACHABLE;
+}
+
+LJLIB_PUSH(lastcl)
+LJLIB_ASM_(pairs)
+
+LJLIB_NOREGUV LJLIB_ASM_(ipairs_aux) LJLIB_REC(.)
+
+LJLIB_PUSH(lastcl)
+LJLIB_ASM_(ipairs) LJLIB_REC(.)
+
+/* -- Base library: throw and catch errors -------------------------------- */
+
+LJLIB_CF(error)
+{
+ int32_t level = lj_lib_optint(L, 2, 1);
+ lua_settop(L, 1);
+ if (lua_isstring(L, 1) && level > 0) {
+ luaL_where(L, level);
+ lua_pushvalue(L, 1);
+ lua_concat(L, 2);
+ }
+ return lua_error(L);
+}
+
+LJLIB_ASM(pcall) LJLIB_REC(.)
+{
+ lj_lib_checkany(L, 1);
+ lj_lib_checkfunc(L, 2); /* For xpcall only. */
+ return FFH_UNREACHABLE;
+}
+LJLIB_ASM_(xpcall) LJLIB_REC(.)
+
+/* -- Base library: load Lua code ----------------------------------------- */
+
+static int load_aux(lua_State *L, int status)
+{
+ if (status == 0)
+ return 1;
+ copyTV(L, L->top, L->top-1);
+ setnilV(L->top-1);
+ L->top++;
+ return 2;
+}
+
+LJLIB_CF(loadstring)
+{
+ GCstr *s = lj_lib_checkstr(L, 1);
+ GCstr *name = lj_lib_optstr(L, 2);
+ return load_aux(L,
+ luaL_loadbuffer(L, strdata(s), s->len, strdata(name ? name : s)));
+}
+
+LJLIB_CF(loadfile)
+{
+ GCstr *fname = lj_lib_optstr(L, 1);
+ return load_aux(L, luaL_loadfile(L, fname ? strdata(fname) : NULL));
+}
+
+static const char *reader_func(lua_State *L, void *ud, size_t *size)
+{
+ UNUSED(ud);
+ luaL_checkstack(L, 2, "too many nested functions");
+ copyTV(L, L->top++, L->base);
+ lua_call(L, 0, 1); /* Call user-supplied function. */
+ L->top--;
+ if (tvisnil(L->top)) {
+ *size = 0;
+ return NULL;
+ } else if (tvisstr(L->top) || tvisnum(L->top)) {
+ copyTV(L, L->base+2, L->top); /* Anchor string in reserved stack slot. */
+ return lua_tolstring(L, 3, size);
+ } else {
+ lj_err_caller(L, LJ_ERR_RDRSTR);
+ return NULL;
+ }
+}
+
+LJLIB_CF(load)
+{
+ GCstr *name = lj_lib_optstr(L, 2);
+ lj_lib_checkfunc(L, 1);
+ lua_settop(L, 3); /* Reserve a slot for the string from the reader. */
+ return load_aux(L,
+ lua_load(L, reader_func, NULL, name ? strdata(name) : "=(load)"));
+}
+
+LJLIB_CF(dofile)
+{
+ GCstr *fname = lj_lib_optstr(L, 1);
+ setnilV(L->top);
+ L->top = L->base+1;
+ if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != 0)
+ lua_error(L);
+ lua_call(L, 0, LUA_MULTRET);
+ return (L->top - L->base) - 1;
+}
+
+/* -- Base library: GC control -------------------------------------------- */
+
+LJLIB_CF(gcinfo)
+{
+ setintV(L->top++, (G(L)->gc.total >> 10));
+ return 1;
+}
+
+LJLIB_CF(collectgarbage)
+{
+ int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT, /* ORDER LUA_GC* */
+ "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul");
+ int32_t data = lj_lib_optint(L, 2, 0);
+ if (opt == LUA_GCCOUNT) {
+ setnumV(L->top-1, cast_num((int32_t)G(L)->gc.total)/1024.0);
+ } else {
+ int res = lua_gc(L, opt, data);
+ if (opt == LUA_GCSTEP)
+ setboolV(L->top-1, res);
+ else
+ setintV(L->top-1, res);
+ }
+ return 1;
+}
+
+/* -- Base library: miscellaneous functions ------------------------------- */
+
+LJLIB_PUSH(top-2) /* Upvalue holds weak table. */
+LJLIB_CF(newproxy)
+{
+ lua_settop(L, 1);
+ lua_newuserdata(L, 0);
+ if (lua_toboolean(L, 1) == 0) { /* newproxy(): without metatable. */
+ return 1;
+ } else if (lua_isboolean(L, 1)) { /* newproxy(true): with metatable. */
+ lua_newtable(L);
+ lua_pushvalue(L, -1);
+ lua_pushboolean(L, 1);
+ lua_rawset(L, lua_upvalueindex(1)); /* Remember mt in weak table. */
+ } else { /* newproxy(proxy): inherit metatable. */
+ int validproxy = 0;
+ if (lua_getmetatable(L, 1)) {
+ lua_rawget(L, lua_upvalueindex(1));
+ validproxy = lua_toboolean(L, -1);
+ lua_pop(L, 1);
+ }
+ if (!validproxy)
+ lj_err_arg(L, 1, LJ_ERR_NOPROXY);
+ lua_getmetatable(L, 1);
+ }
+ lua_setmetatable(L, 2);
+ return 1;
+}
+
+LJLIB_PUSH("tostring")
+LJLIB_CF(print)
+{
+ ptrdiff_t i, nargs = L->top - L->base;
+ cTValue *tv = lj_tab_getstr(tabref(L->env), strV(lj_lib_upvalue(L, 1)));
+ int shortcut = (tv && tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring);
+ copyTV(L, L->top++, tv ? tv : niltv(L));
+ for (i = 0; i < nargs; i++) {
+ const char *str;
+ size_t size;
+ cTValue *o = &L->base[i];
+ if (shortcut && tvisstr(o)) {
+ str = strVdata(o);
+ size = strV(o)->len;
+ } else if (shortcut && tvisnum(o)) {
+ char buf[LUAI_MAXNUMBER2STR];
+ lua_Number n = numV(o);
+ size = (size_t)lua_number2str(buf, n);
+ str = buf;
+ } else {
+ copyTV(L, L->top+1, o);
+ copyTV(L, L->top, L->top-1);
+ L->top += 2;
+ lua_call(L, 1, 1);
+ str = lua_tolstring(L, -1, &size);
+ if (!str)
+ lj_err_caller(L, LJ_ERR_PRTOSTR);
+ L->top--;
+ }
+ if (i)
+ putchar('\t');
+ fwrite(str, 1, size, stdout);
+ }
+ putchar('\n');
+ return 0;
+}
+
+LJLIB_PUSH(top-3)
+LJLIB_SET(_VERSION)
+
+#include "lj_libdef.h"
+
+/* -- Coroutine library --------------------------------------------------- */
+
+#define LJLIB_MODULE_coroutine
+
+LJLIB_CF(coroutine_status)
+{
+ const char *s;
+ lua_State *co;
+ if (!(L->top > L->base && tvisthread(L->base)))
+ lj_err_arg(L, 1, LJ_ERR_NOCORO);
+ co = threadV(L->base);
+ if (co == L) s = "running";
+ else if (co->status == LUA_YIELD) s = "suspended";
+ else if (co->status != 0) s = "dead";
+ else if (co->base > co->stack+1) s = "normal";
+ else if (co->top == co->base) s = "dead";
+ else s = "suspended";
+ lua_pushstring(L, s);
+ return 1;
+}
+
+LJLIB_CF(coroutine_running)
+{
+ if (lua_pushthread(L))
+ setnilV(L->top++);
+ return 1;
+}
+
+LJLIB_CF(coroutine_create)
+{
+ lua_State *L1 = lua_newthread(L);
+ if (!(L->top > L->base && tvisfunc(L->base) && isluafunc(funcV(L->base))))
+ lj_err_arg(L, 1, LJ_ERR_NOLFUNC);
+ setfuncV(L, L1->top++, funcV(L->base));
+ return 1;
+}
+
+LJLIB_ASM(coroutine_yield)
+{
+ lj_err_caller(L, LJ_ERR_CYIELD);
+ return FFH_UNREACHABLE;
+}
+
+static int ffh_resume(lua_State *L, lua_State *co, int wrap)
+{
+ if (co->cframe != NULL || co->status > LUA_YIELD ||
+ (co->status == 0 && co->top == co->base)) {
+ ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD;
+ if (wrap) lj_err_caller(L, em);
+ setboolV(L->base-1, 0);
+ setstrV(L, L->base, lj_err_str(L, em));
+ return FFH_RES(2);
+ }
+ lj_state_growstack(co, (MSize)(L->top - L->base - 1));
+ return FFH_RETRY;
+}
+
+LJLIB_ASM(coroutine_resume)
+{
+ if (!(L->top > L->base && tvisthread(L->base)))
+ lj_err_arg(L, 1, LJ_ERR_NOCORO);
+ return ffh_resume(L, threadV(L->base), 0);
+}
+
+LJLIB_NOREG LJLIB_ASM(coroutine_wrap_aux)
+{
+ return ffh_resume(L, threadV(lj_lib_upvalue(L, 1)), 1);
+}
+
+/* Inline declarations. */
+LJ_ASMF void lj_ff_coroutine_wrap_aux(void);
+LJ_FUNCA_NORET void lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co);
+
+/* Error handler, called from assembler VM. */
+void lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co)
+{
+ co->top--; copyTV(L, L->top, co->top); L->top++;
+ if (tvisstr(L->top-1))
+ lj_err_callermsg(L, strVdata(L->top-1));
+ else
+ lj_err_run(L);
+}
+
+LJLIB_CF(coroutine_wrap)
+{
+ GCfunc *fn;
+ lj_cf_coroutine_create(L);
+ lua_pushcclosure(L, lj_ffh_coroutine_wrap_aux, 1);
+ fn = funcV(L->top-1);
+ fn->c.gate = lj_ff_coroutine_wrap_aux;
+ fn->c.ffid = FF_coroutine_wrap_aux;
+ return 1;
+}
+
+#include "lj_libdef.h"
+
+/* ------------------------------------------------------------------------ */
+
+static void newproxy_weaktable(lua_State *L)
+{
+ /* NOBARRIER: The table is new (marked white). */
+ GCtab *t = lj_tab_new(L, 0, 1);
+ settabV(L, L->top++, t);
+ setgcref(t->metatable, obj2gco(t));
+ setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "__mode")),
+ lj_str_newlit(L, "kv"));
+ t->nomm = cast_byte(~(1u<<MM_mode));
+}
+
+LUALIB_API int luaopen_base(lua_State *L)
+{
+ /* NOBARRIER: Table and value are the same. */
+ GCtab *env = tabref(L->env);
+ settabV(L, lj_tab_setstr(L, env, lj_str_newlit(L, "_G")), env);
+ lua_pushliteral(L, LUA_VERSION); /* top-3. */
+ newproxy_weaktable(L); /* top-2. */
+ LJ_LIB_REG_(L, "_G", base);
+ LJ_LIB_REG(L, coroutine);
+ return 2;
+}
+
diff --git a/src/lib_bit.c b/src/lib_bit.c
new file mode 100644
index 00000000..2f727e68
--- /dev/null
+++ b/src/lib_bit.c
@@ -0,0 +1,74 @@
+/*
+** Bit manipulation library.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lib_bit_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_lib.h"
+
+/* ------------------------------------------------------------------------ */
+
+#define LJLIB_MODULE_bit
+
+LJLIB_ASM(bit_tobit) LJLIB_REC(bit_unary IR_TOBIT)
+{
+ lj_lib_checknum(L, 1);
+ return FFH_RETRY;
+}
+LJLIB_ASM_(bit_bnot) LJLIB_REC(bit_unary IR_BNOT)
+LJLIB_ASM_(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP)
+
+LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL)
+{
+ lj_lib_checknum(L, 1);
+ lj_lib_checknum(L, 2);
+ return FFH_RETRY;
+}
+LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR)
+LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR)
+LJLIB_ASM_(bit_rol) LJLIB_REC(bit_shift IR_BROL)
+LJLIB_ASM_(bit_ror) LJLIB_REC(bit_shift IR_BROR)
+
+LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND)
+{
+ int i = 0;
+ do { lj_lib_checknum(L, ++i); } while (L->base+i < L->top);
+ return FFH_RETRY;
+}
+LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR)
+LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR)
+
+/* ------------------------------------------------------------------------ */
+
+LJLIB_CF(bit_tohex)
+{
+ uint32_t b = (uint32_t)lj_num2bit(lj_lib_checknum(L, 1));
+ int32_t i, n = L->base+1 >= L->top ? 8 : lj_num2bit(lj_lib_checknum(L, 2));
+ const char *hexdigits = "0123456789abcdef";
+ char buf[8];
+ if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; }
+ if (n > 8) n = 8;
+ for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; }
+ lua_pushlstring(L, buf, (size_t)n);
+ return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+LUALIB_API int luaopen_bit(lua_State *L)
+{
+ LJ_LIB_REG(L, bit);
+ return 1;
+}
+
diff --git a/src/lib_debug.c b/src/lib_debug.c
new file mode 100644
index 00000000..0e6c35e5
--- /dev/null
+++ b/src/lib_debug.c
@@ -0,0 +1,366 @@
+/*
+** Debug library.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#define lib_debug_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_lib.h"
+
+/* ------------------------------------------------------------------------ */
+
+#define LJLIB_MODULE_debug
+
+LJLIB_CF(debug_getregistry)
+{
+ copyTV(L, L->top++, registry(L));
+ return 1;
+}
+
+LJLIB_CF(debug_getmetatable)
+{
+ lj_lib_checkany(L, 1);
+ if (!lua_getmetatable(L, 1)) {
+ setnilV(L->top-1);
+ }
+ return 1;
+}
+
+LJLIB_CF(debug_setmetatable)
+{
+ lj_lib_checktabornil(L, 2);
+ L->top = L->base+2;
+ lua_setmetatable(L, 1);
+ setboolV(L->top-1, 1);
+ return 1;
+}
+
+LJLIB_CF(debug_getfenv)
+{
+ lj_lib_checkany(L, 1);
+ lua_getfenv(L, 1);
+ return 1;
+}
+
+LJLIB_CF(debug_setfenv)
+{
+ lj_lib_checktab(L, 2);
+ L->top = L->base+2;
+ if (!lua_setfenv(L, 1))
+ lj_err_caller(L, LJ_ERR_SETFENV);
+ return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static void settabss(lua_State *L, const char *i, const char *v)
+{
+ lua_pushstring(L, v);
+ lua_setfield(L, -2, i);
+}
+
+static void settabsi(lua_State *L, const char *i, int v)
+{
+ lua_pushinteger(L, v);
+ lua_setfield(L, -2, i);
+}
+
+static lua_State *getthread(lua_State *L, int *arg)
+{
+ if (L->base < L->top && tvisthread(L->base)) {
+ *arg = 1;
+ return threadV(L->base);
+ } else {
+ *arg = 0;
+ return L;
+ }
+}
+
+static void treatstackoption(lua_State *L, lua_State *L1, const char *fname)
+{
+ if (L == L1) {
+ lua_pushvalue(L, -2);
+ lua_remove(L, -3);
+ }
+ else
+ lua_xmove(L1, L, 1);
+ lua_setfield(L, -2, fname);
+}
+
+LJLIB_CF(debug_getinfo)
+{
+ lua_Debug ar;
+ int arg;
+ lua_State *L1 = getthread(L, &arg);
+ const char *options = luaL_optstring(L, arg+2, "flnSu");
+ if (lua_isnumber(L, arg+1)) {
+ if (!lua_getstack(L1, (int)lua_tointeger(L, arg+1), &ar)) {
+ setnilV(L->top-1);
+ return 1;
+ }
+ } else if (L->base+arg < L->top && tvisfunc(L->base+arg)) {
+ options = lua_pushfstring(L, ">%s", options);
+ setfuncV(L1, L1->top++, funcV(L->base+arg));
+ } else {
+ lj_err_arg(L, arg+1, LJ_ERR_NOFUNCL);
+ }
+ if (!lua_getinfo(L1, options, &ar))
+ lj_err_arg(L, arg+2, LJ_ERR_INVOPT);
+ lua_createtable(L, 0, 16);
+ if (strchr(options, 'S')) {
+ settabss(L, "source", ar.source);
+ settabss(L, "short_src", ar.short_src);
+ settabsi(L, "linedefined", ar.linedefined);
+ settabsi(L, "lastlinedefined", ar.lastlinedefined);
+ settabss(L, "what", ar.what);
+ }
+ if (strchr(options, 'l'))
+ settabsi(L, "currentline", ar.currentline);
+ if (strchr(options, 'u'))
+ settabsi(L, "nups", ar.nups);
+ if (strchr(options, 'n')) {
+ settabss(L, "name", ar.name);
+ settabss(L, "namewhat", ar.namewhat);
+ }
+ if (strchr(options, 'L'))
+ treatstackoption(L, L1, "activelines");
+ if (strchr(options, 'f'))
+ treatstackoption(L, L1, "func");
+ return 1; /* return table */
+}
+
+LJLIB_CF(debug_getlocal)
+{
+ int arg;
+ lua_State *L1 = getthread(L, &arg);
+ lua_Debug ar;
+ const char *name;
+ if (!lua_getstack(L1, lj_lib_checkint(L, arg+1), &ar))
+ lj_err_arg(L, arg+1, LJ_ERR_LVLRNG);
+ name = lua_getlocal(L1, &ar, lj_lib_checkint(L, arg+2));
+ if (name) {
+ lua_xmove(L1, L, 1);
+ lua_pushstring(L, name);
+ lua_pushvalue(L, -2);
+ return 2;
+ } else {
+ setnilV(L->top-1);
+ return 1;
+ }
+}
+
+LJLIB_CF(debug_setlocal)
+{
+ int arg;
+ lua_State *L1 = getthread(L, &arg);
+ lua_Debug ar;
+ TValue *tv;
+ if (!lua_getstack(L1, lj_lib_checkint(L, arg+1), &ar))
+ lj_err_arg(L, arg+1, LJ_ERR_LVLRNG);
+ tv = lj_lib_checkany(L, arg+3);
+ copyTV(L1, L1->top++, tv);
+ lua_pushstring(L, lua_setlocal(L1, &ar, lj_lib_checkint(L, arg+2)));
+ return 1;
+}
+
+static int debug_getupvalue(lua_State *L, int get)
+{
+ int32_t n = lj_lib_checkint(L, 2);
+ if (isluafunc(lj_lib_checkfunc(L, 1))) {
+ const char *name = get ? lua_getupvalue(L, 1, n) : lua_setupvalue(L, 1, n);
+ if (name) {
+ lua_pushstring(L, name);
+ if (!get) return 1;
+ copyTV(L, L->top, L->top-2);
+ L->top++;
+ return 2;
+ }
+ }
+ return 0;
+}
+
+LJLIB_CF(debug_getupvalue)
+{
+ return debug_getupvalue(L, 1);
+}
+
+LJLIB_CF(debug_setupvalue)
+{
+ lj_lib_checkany(L, 3);
+ return debug_getupvalue(L, 0);
+}
+
+/* ------------------------------------------------------------------------ */
+
+static const char KEY_HOOK = 'h';
+
+static void hookf(lua_State *L, lua_Debug *ar)
+{
+ static const char *const hooknames[] =
+ {"call", "return", "line", "count", "tail return"};
+ lua_pushlightuserdata(L, (void *)&KEY_HOOK);
+ lua_rawget(L, LUA_REGISTRYINDEX);
+ if (lua_isfunction(L, -1)) {
+ lua_pushstring(L, hooknames[(int)ar->event]);
+ if (ar->currentline >= 0)
+ lua_pushinteger(L, ar->currentline);
+ else lua_pushnil(L);
+ lua_call(L, 2, 0);
+ }
+}
+
+static int makemask(const char *smask, int count)
+{
+ int mask = 0;
+ if (strchr(smask, 'c')) mask |= LUA_MASKCALL;
+ if (strchr(smask, 'r')) mask |= LUA_MASKRET;
+ if (strchr(smask, 'l')) mask |= LUA_MASKLINE;
+ if (count > 0) mask |= LUA_MASKCOUNT;
+ return mask;
+}
+
+static char *unmakemask(int mask, char *smask)
+{
+ int i = 0;
+ if (mask & LUA_MASKCALL) smask[i++] = 'c';
+ if (mask & LUA_MASKRET) smask[i++] = 'r';
+ if (mask & LUA_MASKLINE) smask[i++] = 'l';
+ smask[i] = '\0';
+ return smask;
+}
+
+LJLIB_CF(debug_sethook)
+{
+ int arg, mask, count;
+ lua_Hook func;
+ (void)getthread(L, &arg);
+ if (lua_isnoneornil(L, arg+1)) {
+ lua_settop(L, arg+1);
+ func = NULL; mask = 0; count = 0; /* turn off hooks */
+ } else {
+ const char *smask = luaL_checkstring(L, arg+2);
+ luaL_checktype(L, arg+1, LUA_TFUNCTION);
+ count = luaL_optint(L, arg+3, 0);
+ func = hookf; mask = makemask(smask, count);
+ }
+ lua_pushlightuserdata(L, (void *)&KEY_HOOK);
+ lua_pushvalue(L, arg+1);
+ lua_rawset(L, LUA_REGISTRYINDEX);
+ lua_sethook(L, func, mask, count);
+ return 0;
+}
+
+LJLIB_CF(debug_gethook)
+{
+ char buff[5];
+ int mask = lua_gethookmask(L);
+ lua_Hook hook = lua_gethook(L);
+ if (hook != NULL && hook != hookf) { /* external hook? */
+ lua_pushliteral(L, "external hook");
+ } else {
+ lua_pushlightuserdata(L, (void *)&KEY_HOOK);
+ lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */
+ }
+ lua_pushstring(L, unmakemask(mask, buff));
+ lua_pushinteger(L, lua_gethookcount(L));
+ return 3;
+}
+
+/* ------------------------------------------------------------------------ */
+
+LJLIB_CF(debug_debug)
+{
+ for (;;) {
+ char buffer[250];
+ fputs("lua_debug> ", stderr);
+ if (fgets(buffer, sizeof(buffer), stdin) == 0 ||
+ strcmp(buffer, "cont\n") == 0)
+ return 0;
+ if (luaL_loadbuffer(L, buffer, strlen(buffer), "=(debug command)") ||
+ lua_pcall(L, 0, 0, 0)) {
+ fputs(lua_tostring(L, -1), stderr);
+ fputs("\n", stderr);
+ }
+ lua_settop(L, 0); /* remove eventual returns */
+ }
+}
+
+/* ------------------------------------------------------------------------ */
+
+#define LEVELS1 12 /* size of the first part of the stack */
+#define LEVELS2 10 /* size of the second part of the stack */
+
+LJLIB_CF(debug_traceback)
+{
+ int level;
+ int firstpart = 1; /* still before eventual `...' */
+ int arg;
+ lua_State *L1 = getthread(L, &arg);
+ lua_Debug ar;
+ if (lua_isnumber(L, arg+2)) {
+ level = (int)lua_tointeger(L, arg+2);
+ lua_pop(L, 1);
+ }
+ else
+ level = (L == L1) ? 1 : 0; /* level 0 may be this own function */
+ if (lua_gettop(L) == arg)
+ lua_pushliteral(L, "");
+ else if (!lua_isstring(L, arg+1)) return 1; /* message is not a string */
+ else lua_pushliteral(L, "\n");
+ lua_pushliteral(L, "stack traceback:");
+ while (lua_getstack(L1, level++, &ar)) {
+ if (level > LEVELS1 && firstpart) {
+ /* no more than `LEVELS2' more levels? */
+ if (!lua_getstack(L1, level+LEVELS2, &ar)) {
+ level--; /* keep going */
+ } else {
+ lua_pushliteral(L, "\n\t..."); /* too many levels */
+ /* This only works with LuaJIT 2.x. Avoids O(n^2) behaviour. */
+ lua_getstack(L1, -10, &ar);
+ level = ar.i_ci - LEVELS2;
+ }
+ firstpart = 0;
+ continue;
+ }
+ lua_pushliteral(L, "\n\t");
+ lua_getinfo(L1, "Snl", &ar);
+ lua_pushfstring(L, "%s:", ar.short_src);
+ if (ar.currentline > 0)
+ lua_pushfstring(L, "%d:", ar.currentline);
+ if (*ar.namewhat != '\0') { /* is there a name? */
+ lua_pushfstring(L, " in function " LUA_QS, ar.name);
+ } else {
+ if (*ar.what == 'm') /* main? */
+ lua_pushfstring(L, " in main chunk");
+ else if (*ar.what == 'C' || *ar.what == 't')
+ lua_pushliteral(L, " ?"); /* C function or tail call */
+ else
+ lua_pushfstring(L, " in function <%s:%d>",
+ ar.short_src, ar.linedefined);
+ }
+ lua_concat(L, lua_gettop(L) - arg);
+ }
+ lua_concat(L, lua_gettop(L) - arg);
+ return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+LUALIB_API int luaopen_debug(lua_State *L)
+{
+ LJ_LIB_REG(L, debug);
+ return 1;
+}
+
diff --git a/src/lib_init.c b/src/lib_init.c
new file mode 100644
index 00000000..04ca60d9
--- /dev/null
+++ b/src/lib_init.c
@@ -0,0 +1,37 @@
+/*
+** Library initialization.
+** Major parts taken verbatim from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#define lib_init_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+static const luaL_Reg lualibs[] = {
+ { "", luaopen_base },
+ { LUA_LOADLIBNAME, luaopen_package },
+ { LUA_TABLIBNAME, luaopen_table },
+ { LUA_IOLIBNAME, luaopen_io },
+ { LUA_OSLIBNAME, luaopen_os },
+ { LUA_STRLIBNAME, luaopen_string },
+ { LUA_MATHLIBNAME, luaopen_math },
+ { LUA_DBLIBNAME, luaopen_debug },
+ { LUA_BITLIBNAME, luaopen_bit },
+ { LUA_JITLIBNAME, luaopen_jit },
+ { NULL, NULL }
+};
+
+LUALIB_API void luaL_openlibs(lua_State *L)
+{
+ const luaL_Reg *lib = lualibs;
+ for (; lib->func; lib++) {
+ lua_pushcfunction(L, lib->func);
+ lua_pushstring(L, lib->name);
+ lua_call(L, 1, 0);
+ }
+}
+
diff --git a/src/lib_io.c b/src/lib_io.c
new file mode 100644
index 00000000..01623258
--- /dev/null
+++ b/src/lib_io.c
@@ -0,0 +1,538 @@
+/*
+** I/O library.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#include <errno.h>
+#include <stdio.h>
+
+#define lib_io_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_gc.h"
+#include "lj_ff.h"
+#include "lj_lib.h"
+
+/* Index of standard handles in function environment. */
+#define IO_INPUT 1
+#define IO_OUTPUT 2
+
+/* -- Error handling ------------------------------------------------------ */
+
+static int io_pushresult(lua_State *L, int ok, const char *fname)
+{
+ if (ok) {
+ setboolV(L->top++, 1);
+ return 1;
+ } else {
+ int en = errno; /* Lua API calls may change this value. */
+ lua_pushnil(L);
+ if (fname)
+ lua_pushfstring(L, "%s: %s", fname, strerror(en));
+ else
+ lua_pushfstring(L, "%s", strerror(en));
+ lua_pushinteger(L, en);
+ return 3;
+ }
+}
+
+static void io_file_error(lua_State *L, int arg, const char *fname)
+{
+ lua_pushfstring(L, "%s: %s", fname, strerror(errno));
+ luaL_argerror(L, arg, lua_tostring(L, -1));
+}
+
+/* -- Open helpers -------------------------------------------------------- */
+
+#define io_tofilep(L) ((FILE **)luaL_checkudata(L, 1, LUA_FILEHANDLE))
+
+static FILE *io_tofile(lua_State *L)
+{
+ FILE **f = io_tofilep(L);
+ if (*f == NULL)
+ lj_err_caller(L, LJ_ERR_IOCLFL);
+ return *f;
+}
+
+static FILE **io_file_new(lua_State *L)
+{
+ FILE **pf = (FILE **)lua_newuserdata(L, sizeof(FILE *));
+ *pf = NULL;
+ luaL_getmetatable(L, LUA_FILEHANDLE);
+ lua_setmetatable(L, -2);
+ return pf;
+}
+
+/* -- Close helpers ------------------------------------------------------- */
+
+static int lj_cf_io_std_close(lua_State *L)
+{
+ lua_pushnil(L);
+ lua_pushliteral(L, "cannot close standard file");
+ return 2;
+}
+
+static int lj_cf_io_pipe_close(lua_State *L)
+{
+ FILE **p = io_tofilep(L);
+#if defined(LUA_USE_POSIX)
+ int ok = (pclose(*p) != -1);
+#elif defined(LUA_USE_WIN)
+ int ok = (_pclose(*p) != -1);
+#else
+ int ok = 0;
+#endif
+ *p = NULL;
+ return io_pushresult(L, ok, NULL);
+}
+
+static int lj_cf_io_file_close(lua_State *L)
+{
+ FILE **p = io_tofilep(L);
+ int ok = (fclose(*p) == 0);
+ *p = NULL;
+ return io_pushresult(L, ok, NULL);
+}
+
+static int io_file_close(lua_State *L)
+{
+ lua_getfenv(L, 1);
+ lua_getfield(L, -1, "__close");
+ return (lua_tocfunction(L, -1))(L);
+}
+
+/* -- Read/write helpers -------------------------------------------------- */
+
+static int io_file_readnum(lua_State *L, FILE *fp)
+{
+ lua_Number d;
+ if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) {
+ lua_pushnumber(L, d);
+ return 1;
+ } else {
+ return 0; /* read fails */
+ }
+}
+
+static int test_eof(lua_State *L, FILE *fp)
+{
+ int c = getc(fp);
+ ungetc(c, fp);
+ lua_pushlstring(L, NULL, 0);
+ return (c != EOF);
+}
+
+static int io_file_readline(lua_State *L, FILE *fp)
+{
+ luaL_Buffer b;
+ luaL_buffinit(L, &b);
+ for (;;) {
+ size_t len;
+ char *p = luaL_prepbuffer(&b);
+ if (fgets(p, LUAL_BUFFERSIZE, fp) == NULL) { /* EOF? */
+ luaL_pushresult(&b);
+ return (strV(L->top-1)->len > 0); /* Anything read? */
+ }
+ len = strlen(p);
+ if (len == 0 || p[len-1] != '\n') { /* Partial line? */
+ luaL_addsize(&b, len);
+ } else {
+ luaL_addsize(&b, len - 1); /* Don't include EOL. */
+ luaL_pushresult(&b);
+ return 1; /* Got at least an EOL. */
+ }
+ }
+}
+
+static int io_file_readchars(lua_State *L, FILE *fp, size_t n)
+{
+ size_t rlen; /* how much to read */
+ size_t nr; /* number of chars actually read */
+ luaL_Buffer b;
+ luaL_buffinit(L, &b);
+ rlen = LUAL_BUFFERSIZE; /* try to read that much each time */
+ do {
+ char *p = luaL_prepbuffer(&b);
+ if (rlen > n) rlen = n; /* cannot read more than asked */
+ nr = fread(p, 1, rlen, fp);
+ luaL_addsize(&b, nr);
+ n -= nr; /* still have to read `n' chars */
+ } while (n > 0 && nr == rlen); /* until end of count or eof */
+ luaL_pushresult(&b); /* close buffer */
+ return (n == 0 || lua_objlen(L, -1) > 0);
+}
+
+static int io_file_read(lua_State *L, FILE *fp, int start)
+{
+ int ok, n, nargs = (L->top - L->base) - start;
+ clearerr(fp);
+ if (nargs == 0) {
+ ok = io_file_readline(L, fp);
+ n = start+1; /* Return 1 result. */
+ } else {
+ /* The results plus the buffers go on top of the args. */
+ luaL_checkstack(L, nargs+LUA_MINSTACK, "too many arguments");
+ ok = 1;
+ for (n = start; nargs-- && ok; n++) {
+ if (tvisstr(L->base+n)) {
+ const char *p = strVdata(L->base+n);
+ if (p[0] != '*')
+ lj_err_arg(L, n+1, LJ_ERR_INVOPT);
+ if (p[1] == 'n')
+ ok = io_file_readnum(L, fp);
+ else if (p[1] == 'l')
+ ok = io_file_readline(L, fp);
+ else if (p[1] == 'a')
+ io_file_readchars(L, fp, ~((size_t)0));
+ else
+ lj_err_arg(L, n+1, LJ_ERR_INVFMT);
+ } else if (tvisnum(L->base+n)) {
+ size_t len = (size_t)lj_lib_checkint(L, n+1);
+ ok = len ? io_file_readchars(L, fp, len) : test_eof(L, fp);
+ } else {
+ lj_err_arg(L, n+1, LJ_ERR_INVOPT);
+ }
+ }
+ }
+ if (ferror(fp))
+ return io_pushresult(L, 0, NULL);
+ if (!ok)
+ setnilV(L->top-1); /* Replace last result with nil. */
+ return n - start;
+}
+
+static int io_file_write(lua_State *L, FILE *fp, int start)
+{
+ cTValue *tv;
+ int status = 1;
+ for (tv = L->base+start; tv < L->top; tv++) {
+ if (tvisstr(tv)) {
+ MSize len = strV(tv)->len;
+ status = status && (fwrite(strVdata(tv), 1, len, fp) == len);
+ } else if (tvisnum(tv)) {
+ status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0);
+ } else {
+ lj_lib_checkstr(L, tv-L->base+1);
+ }
+ }
+ return io_pushresult(L, status, NULL);
+}
+
+/* -- I/O file methods ---------------------------------------------------- */
+
+#define LJLIB_MODULE_io_method
+
+LJLIB_CF(io_method_close)
+{
+ if (lua_isnone(L, 1))
+ lua_rawgeti(L, LUA_ENVIRONINDEX, IO_OUTPUT);
+ io_tofile(L);
+ return io_file_close(L);
+}
+
+LJLIB_CF(io_method_read)
+{
+ return io_file_read(L, io_tofile(L), 1);
+}
+
+LJLIB_CF(io_method_write)
+{
+ return io_file_write(L, io_tofile(L), 1);
+}
+
+LJLIB_CF(io_method_flush)
+{
+ return io_pushresult(L, fflush(io_tofile(L)) == 0, NULL);
+}
+
+LJLIB_CF(io_method_seek)
+{
+ FILE *fp = io_tofile(L);
+ int opt = lj_lib_checkopt(L, 2, 1, "\3set\3cur\3end");
+ lua_Number ofs;
+ int res;
+ if (opt == 0) opt = SEEK_SET;
+ else if (opt == 1) opt = SEEK_CUR;
+ else if (opt == 2) opt = SEEK_END;
+ lj_lib_opt(L, 3,
+ ofs = lj_lib_checknum(L, 3);
+ ,
+ ofs = 0;
+ )
+#if defined(LUA_USE_POSIX)
+ res = fseeko(fp, (int64_t)ofs, opt);
+#elif _MSC_VER >= 1400
+ res = _fseeki64(fp, (int64_t)ofs, opt);
+#elif defined(__MINGW32__)
+ res = fseeko64(fp, (int64_t)ofs, opt);
+#else
+ res = fseek(fp, (long)ofs, opt);
+#endif
+ if (res)
+ return io_pushresult(L, 0, NULL);
+#if defined(LUA_USE_POSIX)
+ ofs = cast_num(ftello(fp));
+#elif _MSC_VER >= 1400
+ ofs = cast_num(_ftelli64(fp));
+#elif defined(__MINGW32__)
+ ofs = cast_num(ftello64(fp));
+#else
+ ofs = cast_num(ftell(fp));
+#endif
+ setnumV(L->top-1, ofs);
+ return 1;
+}
+
+LJLIB_CF(io_method_setvbuf)
+{
+ FILE *fp = io_tofile(L);
+ int opt = lj_lib_checkopt(L, 2, -1, "\4full\4line\2no");
+ size_t sz = (size_t)lj_lib_optint(L, 3, LUAL_BUFFERSIZE);
+ if (opt == 0) opt = _IOFBF;
+ else if (opt == 1) opt = _IOLBF;
+ else if (opt == 2) opt = _IONBF;
+ return io_pushresult(L, (setvbuf(fp, NULL, opt, sz) == 0), NULL);
+}
+
+/* Forward declaration. */
+static void io_file_lines(lua_State *L, int idx, int toclose);
+
+LJLIB_CF(io_method_lines)
+{
+ io_tofile(L);
+ io_file_lines(L, 1, 0);
+ return 1;
+}
+
+LJLIB_CF(io_method___gc)
+{
+ FILE *fp = *io_tofilep(L);
+ if (fp != NULL) io_file_close(L);
+ return 0;
+}
+
+LJLIB_CF(io_method___tostring)
+{
+ FILE *fp = *io_tofilep(L);
+ if (fp == NULL)
+ lua_pushliteral(L, "file (closed)");
+ else
+ lua_pushfstring(L, "file (%p)", fp);
+ return 1;
+}
+
+LJLIB_PUSH(top-1) LJLIB_SET(__index)
+
+#include "lj_libdef.h"
+
+/* -- I/O library functions ----------------------------------------------- */
+
+#define LJLIB_MODULE_io
+
+LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */
+
+static FILE *io_file_get(lua_State *L, int findex)
+{
+ GCtab *fenv = tabref(curr_func(L)->c.env);
+ GCudata *ud = udataV(&tvref(fenv->array)[findex]);
+ FILE *fp = *(FILE **)uddata(ud);
+ if (fp == NULL)
+ lj_err_caller(L, LJ_ERR_IOSTDCL);
+ return fp;
+}
+
+LJLIB_CF(io_open)
+{
+ const char *fname = luaL_checkstring(L, 1);
+ const char *mode = luaL_optstring(L, 2, "r");
+ FILE **pf = io_file_new(L);
+ *pf = fopen(fname, mode);
+ return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1;
+}
+
+LJLIB_CF(io_tmpfile)
+{
+ FILE **pf = io_file_new(L);
+ *pf = tmpfile();
+ return (*pf == NULL) ? io_pushresult(L, 0, NULL) : 1;
+}
+
+LJLIB_CF(io_close)
+{
+ return lj_cf_io_method_close(L);
+}
+
+LJLIB_CF(io_read)
+{
+ return io_file_read(L, io_file_get(L, IO_INPUT), 0);
+}
+
+LJLIB_CF(io_write)
+{
+ return io_file_write(L, io_file_get(L, IO_OUTPUT), 0);
+}
+
+LJLIB_CF(io_flush)
+{
+ return io_pushresult(L, fflush(io_file_get(L, IO_OUTPUT)) == 0, NULL);
+}
+
+LJLIB_NOREG LJLIB_CF(io_lines_iter)
+{
+ FILE *fp = *(FILE **)uddata(udataV(lj_lib_upvalue(L, 1)));
+ int ok;
+ if (fp == NULL)
+ lj_err_caller(L, LJ_ERR_IOCLFL);
+ ok = io_file_readline(L, fp);
+ if (ferror(fp))
+ return luaL_error(L, "%s", strerror(errno));
+ if (ok)
+ return 1;
+ if (tvistrue(lj_lib_upvalue(L, 2))) { /* Need to close file? */
+ L->top = L->base+1;
+ setudataV(L, L->base, udataV(lj_lib_upvalue(L, 1)));
+ io_file_close(L);
+ }
+ return 0;
+}
+
+static void io_file_lines(lua_State *L, int idx, int toclose)
+{
+ lua_pushvalue(L, idx);
+ lua_pushboolean(L, toclose);
+ lua_pushcclosure(L, lj_cf_io_lines_iter, 2);
+ funcV(L->top-1)->c.ffid = FF_io_lines_iter;
+}
+
+LJLIB_CF(io_lines)
+{
+ if (lua_isnoneornil(L, 1)) { /* no arguments? */
+ /* will iterate over default input */
+ lua_rawgeti(L, LUA_ENVIRONINDEX, IO_INPUT);
+ return lj_cf_io_method_lines(L);
+ } else {
+ const char *fname = luaL_checkstring(L, 1);
+ FILE **pf = io_file_new(L);
+ *pf = fopen(fname, "r");
+ if (*pf == NULL)
+ io_file_error(L, 1, fname);
+ io_file_lines(L, lua_gettop(L), 1);
+ return 1;
+ }
+}
+
+static int io_std_get(lua_State *L, int fp, const char *mode)
+{
+ if (!lua_isnoneornil(L, 1)) {
+ const char *fname = lua_tostring(L, 1);
+ if (fname) {
+ FILE **pf = io_file_new(L);
+ *pf = fopen(fname, mode);
+ if (*pf == NULL)
+ io_file_error(L, 1, fname);
+ } else {
+ io_tofile(L); /* check that it's a valid file handle */
+ lua_pushvalue(L, 1);
+ }
+ lua_rawseti(L, LUA_ENVIRONINDEX, fp);
+ }
+ /* return current value */
+ lua_rawgeti(L, LUA_ENVIRONINDEX, fp);
+ return 1;
+}
+
+LJLIB_CF(io_input)
+{
+ return io_std_get(L, IO_INPUT, "r");
+}
+
+LJLIB_CF(io_output)
+{
+ return io_std_get(L, IO_OUTPUT, "w");
+}
+
+LJLIB_CF(io_type)
+{
+ void *ud;
+ luaL_checkany(L, 1);
+ ud = lua_touserdata(L, 1);
+ lua_getfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE);
+ if (ud == NULL || !lua_getmetatable(L, 1) || !lua_rawequal(L, -2, -1))
+ lua_pushnil(L); /* not a file */
+ else if (*((FILE **)ud) == NULL)
+ lua_pushliteral(L, "closed file");
+ else
+ lua_pushliteral(L, "file");
+ return 1;
+}
+
+LJLIB_PUSH(top-3) LJLIB_SET(!) /* Set environment. */
+
+LJLIB_CF(io_popen)
+{
+#if defined(LUA_USE_POSIX) || defined(LUA_USE_WIN)
+ const char *fname = luaL_checkstring(L, 1);
+ const char *mode = luaL_optstring(L, 2, "r");
+ FILE **pf = io_file_new(L);
+#ifdef LUA_USE_POSIX
+ fflush(NULL);
+ *pf = popen(fname, mode);
+#else
+ *pf = _popen(fname, mode);
+#endif
+ return (*pf == NULL) ? io_pushresult(L, 0, fname) : 1;
+#else
+ luaL_error(L, LUA_QL("popen") " not supported");
+#endif
+}
+
+#include "lj_libdef.h"
+
+/* ------------------------------------------------------------------------ */
+
+static void io_std_new(lua_State *L, FILE *fp, int k, const char *fname)
+{
+ FILE **pf = io_file_new(L);
+ GCudata *ud = udataV(L->top-1);
+ GCtab *envt = tabV(L->top-2);
+ *pf = fp;
+ setgcref(ud->env, obj2gco(envt));
+ lj_gc_objbarrier(L, obj2gco(ud), envt);
+ if (k > 0) {
+ lua_pushvalue(L, -1);
+ lua_rawseti(L, -5, k);
+ }
+ lua_setfield(L, -3, fname);
+}
+
+static void io_fenv_new(lua_State *L, int narr, lua_CFunction cls)
+{
+ lua_createtable(L, narr, 1);
+ lua_pushcfunction(L, cls);
+ lua_setfield(L, -2, "__close");
+}
+
+LUALIB_API int luaopen_io(lua_State *L)
+{
+ LJ_LIB_REG_(L, NULL, io_method);
+ lua_setfield(L, LUA_REGISTRYINDEX, LUA_FILEHANDLE);
+ io_fenv_new(L, 0, lj_cf_io_pipe_close); /* top-3 */
+ io_fenv_new(L, 2, lj_cf_io_file_close); /* top-2 */
+ LJ_LIB_REG(L, io);
+ io_fenv_new(L, 0, lj_cf_io_std_close);
+ io_std_new(L, stdin, IO_INPUT, "stdin");
+ io_std_new(L, stdout, IO_OUTPUT, "stdout");
+ io_std_new(L, stderr, 0, "stderr");
+ lua_pop(L, 1);
+ return 1;
+}
+
diff --git a/src/lib_jit.c b/src/lib_jit.c
new file mode 100644
index 00000000..4a57f3b4
--- /dev/null
+++ b/src/lib_jit.c
@@ -0,0 +1,589 @@
+/*
+** JIT library.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lib_jit_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_arch.h"
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#if LJ_HASJIT
+#include "lj_ir.h"
+#include "lj_jit.h"
+#include "lj_iropt.h"
+#endif
+#include "lj_dispatch.h"
+#include "lj_vm.h"
+#include "lj_vmevent.h"
+#include "lj_lib.h"
+
+#include "luajit.h"
+
+/* -- jit.* functions ----------------------------------------------------- */
+
+#define LJLIB_MODULE_jit
+
+static int setjitmode(lua_State *L, int mode)
+{
+ int idx = 0;
+ if (L->base == L->top || tvisnil(L->base)) { /* jit.on/off/flush([nil]) */
+ mode |= LUAJIT_MODE_ENGINE;
+ } else {
+ /* jit.on/off/flush(func|proto, nil|true|false) */
+ if (tvisfunc(L->base) || tvisproto(L->base))
+ idx = 1;
+ else if (!tvistrue(L->base)) /* jit.on/off/flush(true, nil|true|false) */
+ goto err;
+ if (L->base+1 < L->top && tvisbool(L->base+1))
+ mode |= boolV(L->base+1) ? LUAJIT_MODE_ALLFUNC : LUAJIT_MODE_ALLSUBFUNC;
+ else
+ mode |= LUAJIT_MODE_FUNC;
+ }
+ if (luaJIT_setmode(L, idx, mode) != 1) {
+ err:
+#if LJ_HASJIT
+ lj_err_arg(L, 1, LJ_ERR_NOLFUNC);
+#else
+ lj_err_caller(L, LJ_ERR_NOJIT);
+#endif
+ }
+ return 0;
+}
+
+LJLIB_CF(jit_on)
+{
+ return setjitmode(L, LUAJIT_MODE_ON);
+}
+
+LJLIB_CF(jit_off)
+{
+ return setjitmode(L, LUAJIT_MODE_OFF);
+}
+
+LJLIB_CF(jit_flush)
+{
+#if LJ_HASJIT
+ if (L->base < L->top && (tvisnum(L->base) || tvisstr(L->base))) {
+ int traceno = lj_lib_checkint(L, 1);
+ luaJIT_setmode(L, traceno, LUAJIT_MODE_FLUSH|LUAJIT_MODE_TRACE);
+ return 0;
+ }
+#endif
+ return setjitmode(L, LUAJIT_MODE_FLUSH);
+}
+
+#if LJ_HASJIT
+/* Push a string for every flag bit that is set. */
+static void flagbits_to_strings(lua_State *L, uint32_t flags, uint32_t base,
+ const char *str)
+{
+ for (; *str; base <<= 1, str += 1+*str)
+ if (flags & base)
+ setstrV(L, L->top++, lj_str_new(L, str+1, *(uint8_t *)str));
+}
+#endif
+
+LJLIB_CF(jit_status)
+{
+#if LJ_HASJIT
+ jit_State *J = L2J(L);
+ L->top = L->base;
+ setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0);
+ flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING);
+ flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING);
+ return L->top - L->base;
+#else
+ setboolV(L->top++, 0);
+ return 1;
+#endif
+}
+
+LJLIB_CF(jit_attach)
+{
+#ifdef LUAJIT_DISABLE_VMEVENT
+ luaL_error(L, "vmevent API disabled");
+#else
+ GCfunc *fn = lj_lib_checkfunc(L, 1);
+ GCstr *s = lj_lib_optstr(L, 2);
+ luaL_findtable(L, LUA_REGISTRYINDEX, LJ_VMEVENTS_REGKEY, LJ_VMEVENTS_HSIZE);
+ if (s) { /* Attach to given event. */
+ lua_pushvalue(L, 1);
+ lua_rawseti(L, -2, VMEVENT_HASHIDX(s->hash));
+ G(L)->vmevmask = VMEVENT_NOCACHE; /* Invalidate cache. */
+ } else { /* Detach if no event given. */
+ setnilV(L->top++);
+ while (lua_next(L, -2)) {
+ L->top--;
+ if (tvisfunc(L->top) && funcV(L->top) == fn) {
+ setnilV(lj_tab_set(L, tabV(L->top-2), L->top-1));
+ }
+ }
+ }
+#endif
+ return 0;
+}
+
+LJLIB_PUSH(top-4) LJLIB_SET(arch)
+LJLIB_PUSH(top-3) LJLIB_SET(version_num)
+LJLIB_PUSH(top-2) LJLIB_SET(version)
+
+#include "lj_libdef.h"
+
+/* -- jit.util.* functions ------------------------------------------------ */
+
+#define LJLIB_MODULE_jit_util
+
+/* -- Reflection API for Lua functions ------------------------------------ */
+
+/* Return prototype of first argument (Lua function or prototype object) */
+static GCproto *check_Lproto(lua_State *L, int nolua)
+{
+ TValue *o = L->base;
+ if (L->top > o) {
+ if (tvisproto(o)) {
+ return protoV(o);
+ } else if (tvisfunc(o)) {
+ if (isluafunc(funcV(o)))
+ return funcproto(funcV(o));
+ else if (nolua)
+ return NULL;
+ }
+ }
+ lj_err_argt(L, 1, LUA_TFUNCTION);
+ return NULL; /* unreachable */
+}
+
+static void setintfield(lua_State *L, GCtab *t, const char *name, int32_t val)
+{
+ setintV(lj_tab_setstr(L, t, lj_str_newz(L, name)), val);
+}
+
+/* local info = jit.util.funcinfo(func [,pc]) */
+LJLIB_CF(jit_util_funcinfo)
+{
+ GCproto *pt = check_Lproto(L, 1);
+ if (pt) {
+ BCPos pc = (BCPos)lj_lib_optint(L, 2, 0);
+ GCtab *t;
+ lua_createtable(L, 0, 16); /* Increment hash size if fields are added. */
+ t = tabV(L->top-1);
+ setintfield(L, t, "linedefined", pt->linedefined);
+ setintfield(L, t, "lastlinedefined", pt->lastlinedefined);
+ setintfield(L, t, "stackslots", pt->framesize);
+ setintfield(L, t, "params", pt->numparams);
+ setintfield(L, t, "bytecodes", (int32_t)pt->sizebc);
+ setintfield(L, t, "gcconsts", (int32_t)pt->sizekgc);
+ setintfield(L, t, "nconsts", (int32_t)pt->sizekn);
+ setintfield(L, t, "upvalues", (int32_t)pt->sizeuv);
+ if (pc > 0)
+ setintfield(L, t, "currentline", pt->lineinfo ? pt->lineinfo[pc-1] : 0);
+ lua_pushboolean(L, (pt->flags & PROTO_IS_VARARG));
+ lua_setfield(L, -2, "isvararg");
+ setstrV(L, L->top++, pt->chunkname);
+ lua_setfield(L, -2, "source");
+ lj_err_pushloc(L, pt, pc);
+ lua_setfield(L, -2, "loc");
+ } else {
+ GCfunc *fn = funcV(L->base);
+ GCtab *t;
+ lua_createtable(L, 0, 2); /* Increment hash size if fields are added. */
+ t = tabV(L->top-1);
+ setintfield(L, t, "ffid", fn->c.ffid);
+ setintfield(L, t, "upvalues", fn->c.nupvalues);
+ }
+ return 1;
+}
+
+/* local ins, m = jit.util.funcbc(func, pc) */
+LJLIB_CF(jit_util_funcbc)
+{
+ GCproto *pt = check_Lproto(L, 0);
+ BCPos pc = (BCPos)lj_lib_checkint(L, 2) - 1;
+ if (pc < pt->sizebc) {
+ BCIns ins = pt->bc[pc];
+ BCOp op = bc_op(ins);
+ lua_assert(op < BC__MAX);
+ setintV(L->top, ins);
+ setintV(L->top+1, lj_bc_mode[op]);
+ L->top += 2;
+ return 2;
+ }
+ return 0;
+}
+
+/* local k = jit.util.funck(func, idx) */
+LJLIB_CF(jit_util_funck)
+{
+ GCproto *pt = check_Lproto(L, 0);
+ MSize idx = (MSize)lj_lib_checkint(L, 2);
+ if ((int32_t)idx >= 0) {
+ if (idx < pt->sizekn) {
+ setnumV(L->top-1, pt->k.n[idx]);
+ return 1;
+ }
+ } else {
+ if (~idx < pt->sizekgc) {
+ GCobj *gc = gcref(pt->k.gc[idx]);
+ setgcV(L, L->top-1, &gc->gch, ~gc->gch.gct);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* local name = jit.util.funcuvname(func, idx) */
+LJLIB_CF(jit_util_funcuvname)
+{
+ GCproto *pt = check_Lproto(L, 0);
+ uint32_t idx = (uint32_t)lj_lib_checkint(L, 2);
+ if (idx < pt->sizeuvname) {
+ setstrV(L, L->top-1, pt->uvname[idx]);
+ return 1;
+ }
+ return 0;
+}
+
+/* -- Reflection API for traces ------------------------------------------- */
+
+#if LJ_HASJIT
+
+/* Check trace argument. Must not throw for non-existent trace numbers. */
+static Trace *jit_checktrace(lua_State *L)
+{
+ TraceNo tr = (TraceNo)lj_lib_checkint(L, 1);
+ jit_State *J = L2J(L);
+ if (tr > 0 && tr < J->sizetrace)
+ return J->trace[tr];
+ return NULL;
+}
+
+/* local info = jit.util.traceinfo(tr) */
+LJLIB_CF(jit_util_traceinfo)
+{
+ Trace *T = jit_checktrace(L);
+ if (T) {
+ GCtab *t;
+ lua_createtable(L, 0, 4); /* Increment hash size if fields are added. */
+ t = tabV(L->top-1);
+ setintfield(L, t, "nins", (int32_t)T->nins - REF_BIAS - 1);
+ setintfield(L, t, "nk", REF_BIAS - (int32_t)T->nk);
+ setintfield(L, t, "link", T->link);
+ setintfield(L, t, "nexit", T->nsnap);
+ /* There are many more fields. Add them only when needed. */
+ return 1;
+ }
+ return 0;
+}
+
+/* local m, ot, op1, op2, prev = jit.util.traceir(tr, idx) */
+LJLIB_CF(jit_util_traceir)
+{
+ Trace *T = jit_checktrace(L);
+ IRRef ref = (IRRef)lj_lib_checkint(L, 2) + REF_BIAS;
+ if (T && ref >= REF_BIAS && ref < T->nins) {
+ IRIns *ir = &T->ir[ref];
+ int32_t m = lj_ir_mode[ir->o];
+ setintV(L->top-2, m);
+ setintV(L->top-1, ir->ot);
+ setintV(L->top++, (int32_t)ir->op1 - (irm_op1(m)==IRMref ? REF_BIAS : 0));
+ setintV(L->top++, (int32_t)ir->op2 - (irm_op2(m)==IRMref ? REF_BIAS : 0));
+ setintV(L->top++, ir->prev);
+ return 5;
+ }
+ return 0;
+}
+
+/* local k, t [, slot] = jit.util.tracek(tr, idx) */
+LJLIB_CF(jit_util_tracek)
+{
+ Trace *T = jit_checktrace(L);
+ IRRef ref = (IRRef)lj_lib_checkint(L, 2) + REF_BIAS;
+ if (T && ref >= T->nk && ref < REF_BIAS) {
+ IRIns *ir = &T->ir[ref];
+ int32_t slot = -1;
+ if (ir->o == IR_KSLOT) {
+ slot = ir->op2;
+ ir = &T->ir[ir->op1];
+ }
+ lj_ir_kvalue(L, L->top-2, ir);
+ setintV(L->top-1, (int32_t)irt_type(ir->t));
+ if (slot == -1)
+ return 2;
+ setintV(L->top++, slot);
+ return 3;
+ }
+ return 0;
+}
+
+/* local snap = jit.util.tracesnap(tr, sn) */
+LJLIB_CF(jit_util_tracesnap)
+{
+ Trace *T = jit_checktrace(L);
+ SnapNo sn = (SnapNo)lj_lib_checkint(L, 2);
+ if (T && sn < T->nsnap) {
+ SnapShot *snap = &T->snap[sn];
+ IRRef2 *map = &T->snapmap[snap->mapofs];
+ BCReg s, nslots = snap->nslots;
+ GCtab *t;
+ lua_createtable(L, nslots ? (int)nslots : 1, 0);
+ t = tabV(L->top-1);
+ setintV(lj_tab_setint(L, t, 0), (int32_t)snap->ref - REF_BIAS);
+ for (s = 0; s < nslots; s++) {
+ TValue *o = lj_tab_setint(L, t, (int32_t)(s+1));
+ IRRef ref = snap_ref(map[s]);
+ if (ref)
+ setintV(o, (int32_t)ref - REF_BIAS);
+ else
+ setboolV(o, 0);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+/* local mcode, addr, loop = jit.util.tracemc(tr) */
+LJLIB_CF(jit_util_tracemc)
+{
+ Trace *T = jit_checktrace(L);
+ if (T && T->mcode != NULL) {
+ setstrV(L, L->top-1, lj_str_new(L, (const char *)T->mcode, T->szmcode));
+ setnumV(L->top++, cast_num((intptr_t)T->mcode));
+ setintV(L->top++, T->mcloop);
+ return 3;
+ }
+ return 0;
+}
+
+/* local addr = jit.util.traceexitstub(idx) */
+LJLIB_CF(jit_util_traceexitstub)
+{
+ ExitNo exitno = (ExitNo)lj_lib_checkint(L, 1);
+ jit_State *J = L2J(L);
+ if (exitno < EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR) {
+ setnumV(L->top-1, cast_num((intptr_t)exitstub_addr(J, exitno)));
+ return 1;
+ }
+ return 0;
+}
+
+#else
+
+static int trace_nojit(lua_State *L)
+{
+ UNUSED(L);
+ return 0;
+}
+#define lj_cf_jit_util_traceinfo trace_nojit
+#define lj_cf_jit_util_traceir trace_nojit
+#define lj_cf_jit_util_tracek trace_nojit
+#define lj_cf_jit_util_tracesnap trace_nojit
+#define lj_cf_jit_util_tracemc trace_nojit
+#define lj_cf_jit_util_traceexitstub trace_nojit
+
+#endif
+
+#include "lj_libdef.h"
+
+/* -- jit.opt module ------------------------------------------------------ */
+
+#define LJLIB_MODULE_jit_opt
+
+#if LJ_HASJIT
+/* Parse optimization level. */
+static int jitopt_level(jit_State *J, const char *str)
+{
+ if (str[0] >= '0' && str[0] <= '9' && str[1] == '\0') {
+ uint32_t flags;
+ if (str[0] == '0') flags = JIT_F_OPT_0;
+ else if (str[0] == '1') flags = JIT_F_OPT_1;
+ else if (str[0] == '2') flags = JIT_F_OPT_2;
+ else flags = JIT_F_OPT_3;
+ J->flags = (J->flags & ~JIT_F_OPT_MASK) | flags;
+ return 1; /* Ok. */
+ }
+ return 0; /* No match. */
+}
+
+/* Parse optimization flag. */
+static int jitopt_flag(jit_State *J, const char *str)
+{
+ const char *lst = JIT_F_OPTSTRING;
+ uint32_t opt;
+ int set = 1;
+ if (str[0] == '+') {
+ str++;
+ } else if (str[0] == '-') {
+ str++;
+ set = 0;
+ } else if (str[0] == 'n' && str[1] == 'o') {
+ str += str[2] == '-' ? 3 : 2;
+ set = 0;
+ }
+ for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) {
+ size_t len = *(const uint8_t *)lst;
+ if (len == 0)
+ break;
+ if (strncmp(str, lst+1, len) == 0 && str[len] == '\0') {
+ if (set) J->flags |= opt; else J->flags &= ~opt;
+ return 1; /* Ok. */
+ }
+ lst += 1+len;
+ }
+ return 0; /* No match. */
+}
+
+/* Forward declaration. */
+static void jit_init_hotcount(jit_State *J);
+
+/* Parse optimization parameter. */
+static int jitopt_param(jit_State *J, const char *str)
+{
+ const char *lst = JIT_P_STRING;
+ int i;
+ for (i = 0; i < JIT_P__MAX; i++) {
+ size_t len = *(const uint8_t *)lst;
+ TValue tv;
+ lua_assert(len != 0);
+ if (strncmp(str, lst+1, len) == 0 && str[len] == '=' &&
+ lj_str_numconv(&str[len+1], &tv)) {
+ J->param[i] = lj_num2int(tv.n);
+ if (i == JIT_P_hotloop)
+ jit_init_hotcount(J);
+ return 1; /* Ok. */
+ }
+ lst += 1+len;
+ }
+ return 0; /* No match. */
+}
+#endif
+
+/* jit.opt.start(flags...) */
+LJLIB_CF(jit_opt_start)
+{
+#if LJ_HASJIT
+ jit_State *J = L2J(L);
+ int nargs = (int)(L->top - L->base);
+ if (nargs == 0) {
+ J->flags = (J->flags & ~JIT_F_OPT_MASK) | JIT_F_OPT_DEFAULT;
+ } else {
+ int i;
+ for (i = 1; i <= nargs; i++) {
+ const char *str = strdata(lj_lib_checkstr(L, i));
+ if (!jitopt_level(J, str) &&
+ !jitopt_flag(J, str) &&
+ !jitopt_param(J, str))
+ lj_err_callerv(L, LJ_ERR_JITOPT, str);
+ }
+ }
+#else
+ lj_err_caller(L, LJ_ERR_NOJIT);
+#endif
+ return 0;
+}
+
+#include "lj_libdef.h"
+
+/* -- JIT compiler initialization ----------------------------------------- */
+
+#if LJ_HASJIT
+/* Default values for JIT parameters. */
+static const int32_t jit_param_default[JIT_P__MAX+1] = {
+#define JIT_PARAMINIT(len, name, value) (value),
+JIT_PARAMDEF(JIT_PARAMINIT)
+#undef JIT_PARAMINIT
+ 0
+};
+
+/* Initialize hotcount table. */
+static void jit_init_hotcount(jit_State *J)
+{
+ HotCount start = (HotCount)J->param[JIT_P_hotloop];
+ HotCount *hotcount = J2GG(J)->hotcount;
+ uint32_t i;
+ for (i = 0; i < HOTCOUNT_SIZE; i++)
+ hotcount[i] = start;
+}
+#endif
+
+/* Arch-dependent CPU detection. */
+static uint32_t jit_cpudetect(lua_State *L)
+{
+ uint32_t flags = 0;
+#if LJ_TARGET_X86ORX64
+ uint32_t vendor[4];
+ uint32_t features[4];
+ if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
+#if !LJ_HASJIT
+#define JIT_F_CMOV 1
+#endif
+ flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
+#if LJ_HASJIT
+ flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
+ flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
+ if (vendor[2] == 0x6c65746e) { /* Intel. */
+ if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */
+ flags |= JIT_F_P4; /* Currently unused. */
+ else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
+ flags |= JIT_F_LEA_AGU;
+ } else if (vendor[2] == 0x444d4163) { /* AMD. */
+ uint32_t fam = (features[0] & 0x0ff00f00);
+ if (fam == 0x00000f00) /* K8. */
+ flags |= JIT_F_SPLIT_XMM;
+ if (fam >= 0x00000f00) /* K8, K10. */
+ flags |= JIT_F_PREFER_IMUL;
+ }
+#endif
+ }
+#ifndef LUAJIT_CPU_NOCMOV
+ if (!(flags & JIT_F_CMOV))
+ luaL_error(L, "Ancient CPU lacks CMOV support (recompile with -DLUAJIT_CPU_NOCMOV)");
+#endif
+#if LJ_HASJIT
+ if (!(flags & JIT_F_SSE2))
+ luaL_error(L, "Sorry, SSE2 CPU support required for this beta release");
+#endif
+ UNUSED(L);
+#else
+#error "Missing CPU detection for this architecture"
+#endif
+ return flags;
+}
+
+/* Initialize JIT compiler. */
+static void jit_init(lua_State *L)
+{
+ uint32_t flags = jit_cpudetect(L);
+#if LJ_HASJIT
+ jit_State *J = L2J(L);
+ J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
+ memcpy(J->param, jit_param_default, sizeof(J->param));
+ jit_init_hotcount(J);
+ lj_dispatch_update(G(L));
+#else
+ UNUSED(flags);
+#endif
+}
+
+LUALIB_API int luaopen_jit(lua_State *L)
+{
+ lua_pushliteral(L, LJ_ARCH_NAME);
+ lua_pushinteger(L, LUAJIT_VERSION_NUM);
+ lua_pushliteral(L, LUAJIT_VERSION);
+ LJ_LIB_REG(L, jit);
+#ifndef LUAJIT_DISABLE_JITUTIL
+ LJ_LIB_REG_(L, "jit.util", jit_util);
+#endif
+ LJ_LIB_REG_(L, "jit.opt", jit_opt);
+ L->top -= 2;
+ jit_init(L);
+ return 1;
+}
+
diff --git a/src/lib_math.c b/src/lib_math.c
new file mode 100644
index 00000000..ec8b0c2b
--- /dev/null
+++ b/src/lib_math.c
@@ -0,0 +1,188 @@
+/*
+** Math library.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#include <math.h>
+
+#define lib_math_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_lib.h"
+
+/* ------------------------------------------------------------------------ */
+
+#define LJLIB_MODULE_math
+
+LJLIB_ASM(math_abs) LJLIB_REC(.)
+{
+ lj_lib_checknum(L, 1);
+ return FFH_RETRY;
+}
+LJLIB_ASM_(math_floor) LJLIB_REC(math_round IRFPM_FLOOR)
+LJLIB_ASM_(math_ceil) LJLIB_REC(math_round IRFPM_CEIL)
+LJLIB_ASM_(math_sqrt) LJLIB_REC(math_unary IRFPM_SQRT)
+LJLIB_ASM_(math_log) LJLIB_REC(math_unary IRFPM_LOG)
+LJLIB_ASM_(math_log10) LJLIB_REC(math_unary IRFPM_LOG10)
+LJLIB_ASM_(math_exp) LJLIB_REC(math_unary IRFPM_EXP)
+LJLIB_ASM_(math_sin) LJLIB_REC(math_unary IRFPM_SIN)
+LJLIB_ASM_(math_cos) LJLIB_REC(math_unary IRFPM_COS)
+LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN)
+LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin)
+LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos)
+LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan)
+LJLIB_ASM_(math_sinh)
+LJLIB_ASM_(math_cosh)
+LJLIB_ASM_(math_tanh)
+LJLIB_ASM_(math_frexp)
+LJLIB_ASM_(math_modf) LJLIB_REC(.)
+
+LJLIB_PUSH(57.29577951308232)
+LJLIB_ASM_(math_deg) LJLIB_REC(math_degrad)
+
+LJLIB_PUSH(0.017453292519943295)
+LJLIB_ASM_(math_rad) LJLIB_REC(math_degrad)
+
+LJLIB_ASM(math_atan2) LJLIB_REC(math_binary IR_ATAN2)
+{
+ lj_lib_checknum(L, 1);
+ lj_lib_checknum(L, 2);
+ return FFH_RETRY;
+}
+LJLIB_ASM_(math_ldexp) LJLIB_REC(math_binary IR_LDEXP)
+LJLIB_ASM_(math_pow) LJLIB_REC(.)
+LJLIB_ASM_(math_fmod)
+
+LJLIB_ASM(math_min) LJLIB_REC(math_minmax IR_MIN)
+{
+ int i = 0;
+ do { lj_lib_checknum(L, ++i); } while (L->base+i < L->top);
+ return FFH_RETRY;
+}
+LJLIB_ASM_(math_max) LJLIB_REC(math_minmax IR_MAX)
+
+LJLIB_PUSH(3.14159265358979323846) LJLIB_SET(pi)
+LJLIB_PUSH(1e310) LJLIB_SET(huge)
+
+#ifdef __MACH__
+LJ_FUNCA double lj_wrapper_sinh(double x) { return sinh(x); }
+LJ_FUNCA double lj_wrapper_cosh(double x) { return cosh(x); }
+LJ_FUNCA double lj_wrapper_tanh(double x) { return tanh(x); }
+#endif
+
+/* ------------------------------------------------------------------------ */
+
+/* This implements a Tausworthe PRNG with period 2^223. Based on:
+** Tables of maximally-equidistributed combined LFSR generators,
+** Pierre L'Ecuyer, 1991, table 3, 1st entry.
+** Full-period ME-CF generator with L=64, J=4, k=223, N1=49.
+*/
+
+/* PRNG state. */
+typedef struct TW223State {
+ uint64_t gen[4]; /* State of the 4 LFSR generators. */
+ int valid; /* State is valid. */
+} TW223State;
+
+/* Union needed for bit-pattern conversion between uint64_t and double. */
+typedef union { uint64_t u64; double d; } U64double;
+
+/* Update generator i and compute a running xor of all states. */
+#define TW223_GEN(i, k, q, s) \
+ z = tw->gen[i]; \
+ z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \
+ r ^= z; tw->gen[i] = z;
+
+/* PRNG step function. Returns a double in the range 0.0 <= d < 1.0. */
+static double tw223_step(TW223State *tw)
+{
+ uint64_t z, r = 0;
+ U64double u;
+ TW223_GEN(0, 63, 31, 18)
+ TW223_GEN(1, 58, 19, 28)
+ TW223_GEN(2, 55, 24, 7)
+ TW223_GEN(3, 47, 21, 8)
+ u.u64 = (r & (((uint64_t)1 << 52)-1)) | ((uint64_t)0x3ff << 52);
+#if defined(__GNUC__) && LJ_TARGET_X86 && __pic__
+ /* Compensate for unbelievable GCC pessimization. */
+ {
+ volatile U64double u1;
+ u1.u64 = (uint64_t)0x3f8 << 52;
+ return u.d - u1.d;
+ }
+#else
+ return u.d - 1.0;
+#endif
+}
+
+/* PRNG initialization function. */
+static void tw223_init(TW223State *tw, double d)
+{
+ uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */
+ int i;
+ for (i = 0; i < 4; i++) {
+ U64double u;
+ uint32_t m = 1u << (r&255);
+ r >>= 8;
+ u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354;
+ if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */
+ tw->gen[i] = u.u64;
+ }
+ tw->valid = 1;
+ for (i = 0; i < 10; i++)
+ tw223_step(tw);
+}
+
+/* PRNG extract function. */
+LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */
+LJLIB_CF(math_random)
+{
+ int n = cast_int(L->top - L->base);
+ TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1))));
+ double d;
+ if (LJ_UNLIKELY(!tw->valid)) tw223_init(tw, 0.0);
+ d = tw223_step(tw);
+ if (n > 0) {
+ double r1 = lj_lib_checknum(L, 1);
+ if (n == 1) {
+ d = floor(d*r1) + 1.0; /* d is an int in range [1, r1] */
+ } else {
+ double r2 = lj_lib_checknum(L, 2);
+ d = floor(d*(r2-r1+1.0)) + r1; /* d is an int in range [r1, r2] */
+ }
+ } /* else: d is a double in range [0, 1] */
+ setnumV(L->top++, d);
+ return 1;
+}
+
+/* PRNG seed function. */
+LJLIB_PUSH(top-2) /* Upvalue holds userdata with TW223State. */
+LJLIB_CF(math_randomseed)
+{
+ TW223State *tw = (TW223State *)(uddata(udataV(lj_lib_upvalue(L, 1))));
+ tw223_init(tw, lj_lib_checknum(L, 1));
+ return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+LUALIB_API int luaopen_math(lua_State *L)
+{
+ TW223State *tw;
+ tw = (TW223State *)lua_newuserdata(L, sizeof(TW223State));
+ tw->valid = 0; /* Use lazy initialization to save some time on startup. */
+ LJ_LIB_REG(L, math);
+#if defined(LUA_COMPAT_MOD)
+ lua_getfield(L, -1, "fmod");
+ lua_setfield(L, -2, "mod");
+#endif
+ return 1;
+}
+
diff --git a/src/lib_os.c b/src/lib_os.c
new file mode 100644
index 00000000..bee7216a
--- /dev/null
+++ b/src/lib_os.c
@@ -0,0 +1,249 @@
+/*
+** OS library.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#include <errno.h>
+#include <locale.h>
+#include <time.h>
+
+#define lib_os_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#ifdef LUA_USE_POSIX
+#include <unistd.h>
+#else
+#include <stdio.h>
+#endif
+
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_lib.h"
+
+/* ------------------------------------------------------------------------ */
+
+#define LJLIB_MODULE_os
+
+static int os_pushresult(lua_State *L, int i, const char *filename)
+{
+ int en = errno; /* calls to Lua API may change this value */
+ if (i) {
+ setboolV(L->top-1, 1);
+ return 1;
+ } else {
+ setnilV(L->top-1);
+ lua_pushfstring(L, "%s: %s", filename, strerror(en));
+ lua_pushinteger(L, en);
+ return 3;
+ }
+}
+
+LJLIB_CF(os_execute)
+{
+ lua_pushinteger(L, system(luaL_optstring(L, 1, NULL)));
+ return 1;
+}
+
+LJLIB_CF(os_remove)
+{
+ const char *filename = luaL_checkstring(L, 1);
+ return os_pushresult(L, remove(filename) == 0, filename);
+}
+
+LJLIB_CF(os_rename)
+{
+ const char *fromname = luaL_checkstring(L, 1);
+ const char *toname = luaL_checkstring(L, 2);
+ return os_pushresult(L, rename(fromname, toname) == 0, fromname);
+}
+
+LJLIB_CF(os_tmpname)
+{
+#ifdef LUA_USE_POSIX
+ char buf[15+1];
+ int fp;
+ strcpy(buf, "/tmp/lua_XXXXXX");
+ fp = mkstemp(buf);
+ if (fp != -1)
+ close(fp);
+ else
+ lj_err_caller(L, LJ_ERR_OSUNIQF);
+#else
+ char buf[L_tmpnam];
+ if (tmpnam(buf) == NULL)
+ lj_err_caller(L, LJ_ERR_OSUNIQF);
+#endif
+ lua_pushstring(L, buf);
+ return 1;
+}
+
+LJLIB_CF(os_getenv)
+{
+ lua_pushstring(L, getenv(luaL_checkstring(L, 1))); /* if NULL push nil */
+ return 1;
+}
+
+LJLIB_CF(os_exit)
+{
+ exit(lj_lib_optint(L, 1, EXIT_SUCCESS));
+ return 0; /* to avoid warnings */
+}
+
+LJLIB_CF(os_clock)
+{
+ setnumV(L->top++, ((lua_Number)clock())*(1.0/(lua_Number)CLOCKS_PER_SEC));
+ return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static void setfield(lua_State *L, const char *key, int value)
+{
+ lua_pushinteger(L, value);
+ lua_setfield(L, -2, key);
+}
+
+static void setboolfield(lua_State *L, const char *key, int value)
+{
+ if (value < 0) /* undefined? */
+ return; /* does not set field */
+ lua_pushboolean(L, value);
+ lua_setfield(L, -2, key);
+}
+
+static int getboolfield(lua_State *L, const char *key)
+{
+ int res;
+ lua_getfield(L, -1, key);
+ res = lua_isnil(L, -1) ? -1 : lua_toboolean(L, -1);
+ lua_pop(L, 1);
+ return res;
+}
+
+static int getfield(lua_State *L, const char *key, int d)
+{
+ int res;
+ lua_getfield(L, -1, key);
+ if (lua_isnumber(L, -1)) {
+ res = (int)lua_tointeger(L, -1);
+ } else {
+ if (d < 0)
+ lj_err_callerv(L, LJ_ERR_OSDATEF, key);
+ res = d;
+ }
+ lua_pop(L, 1);
+ return res;
+}
+
+LJLIB_CF(os_date)
+{
+ const char *s = luaL_optstring(L, 1, "%c");
+ time_t t = luaL_opt(L, (time_t)luaL_checknumber, 2, time(NULL));
+ struct tm *stm;
+ if (*s == '!') { /* UTC? */
+ stm = gmtime(&t);
+ s++; /* skip `!' */
+ } else {
+ stm = localtime(&t);
+ }
+ if (stm == NULL) { /* invalid date? */
+ setnilV(L->top-1);
+ } else if (strcmp(s, "*t") == 0) {
+ lua_createtable(L, 0, 9); /* 9 = number of fields */
+ setfield(L, "sec", stm->tm_sec);
+ setfield(L, "min", stm->tm_min);
+ setfield(L, "hour", stm->tm_hour);
+ setfield(L, "day", stm->tm_mday);
+ setfield(L, "month", stm->tm_mon+1);
+ setfield(L, "year", stm->tm_year+1900);
+ setfield(L, "wday", stm->tm_wday+1);
+ setfield(L, "yday", stm->tm_yday+1);
+ setboolfield(L, "isdst", stm->tm_isdst);
+ } else {
+ char cc[3];
+ luaL_Buffer b;
+ cc[0] = '%'; cc[2] = '\0';
+ luaL_buffinit(L, &b);
+ for (; *s; s++) {
+ if (*s != '%' || *(s + 1) == '\0') { /* no conversion specifier? */
+ luaL_addchar(&b, *s);
+ } else {
+ size_t reslen;
+ char buff[200]; /* should be big enough for any conversion result */
+ cc[1] = *(++s);
+ reslen = strftime(buff, sizeof(buff), cc, stm);
+ luaL_addlstring(&b, buff, reslen);
+ }
+ }
+ luaL_pushresult(&b);
+ }
+ return 1;
+}
+
+LJLIB_CF(os_time)
+{
+ time_t t;
+ if (lua_isnoneornil(L, 1)) { /* called without args? */
+ t = time(NULL); /* get current time */
+ } else {
+ struct tm ts;
+ luaL_checktype(L, 1, LUA_TTABLE);
+ lua_settop(L, 1); /* make sure table is at the top */
+ ts.tm_sec = getfield(L, "sec", 0);
+ ts.tm_min = getfield(L, "min", 0);
+ ts.tm_hour = getfield(L, "hour", 12);
+ ts.tm_mday = getfield(L, "day", -1);
+ ts.tm_mon = getfield(L, "month", -1) - 1;
+ ts.tm_year = getfield(L, "year", -1) - 1900;
+ ts.tm_isdst = getboolfield(L, "isdst");
+ t = mktime(&ts);
+ }
+ if (t == (time_t)(-1))
+ lua_pushnil(L);
+ else
+ lua_pushnumber(L, (lua_Number)t);
+ return 1;
+}
+
+LJLIB_CF(os_difftime)
+{
+ lua_pushnumber(L, difftime((time_t)(luaL_checknumber(L, 1)),
+ (time_t)(luaL_optnumber(L, 2, (lua_Number)0))));
+ return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+LJLIB_CF(os_setlocale)
+{
+ GCstr *s = lj_lib_optstr(L, 1);
+ const char *str = s ? strdata(s) : NULL;
+ int opt = lj_lib_checkopt(L, 2, 6,
+ "\5ctype\7numeric\4time\7collate\10monetary\1\377\3all");
+ if (opt == 0) opt = LC_CTYPE;
+ else if (opt == 1) opt = LC_NUMERIC;
+ else if (opt == 2) opt = LC_TIME;
+ else if (opt == 3) opt = LC_COLLATE;
+ else if (opt == 4) opt = LC_MONETARY;
+ else if (opt == 6) opt = LC_ALL;
+ lua_pushstring(L, setlocale(opt, str));
+ return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+LUALIB_API int luaopen_os(lua_State *L)
+{
+ LJ_LIB_REG(L, os);
+ return 1;
+}
+
diff --git a/src/lib_package.c b/src/lib_package.c
new file mode 100644
index 00000000..69fa1db9
--- /dev/null
+++ b/src/lib_package.c
@@ -0,0 +1,508 @@
+/*
+** Package library.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#define lib_package_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_lib.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* Error codes for ll_loadfunc. */
+#define PACKAGE_ERR_LIB 1
+#define PACKAGE_ERR_FUNC 2
+
+/* Redefined in platform specific part. */
+#define PACKAGE_LIB_FAIL "open"
+#define setprogdir(L) ((void)0)
+
+#if defined(LUA_DL_DLOPEN)
+
+#include <dlfcn.h>
+
+static void ll_unloadlib(void *lib)
+{
+ dlclose(lib);
+}
+
+static void *ll_load(lua_State *L, const char *path)
+{
+ void *lib = dlopen(path, RTLD_NOW);
+ if (lib == NULL) lua_pushstring(L, dlerror());
+ return lib;
+}
+
+static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym)
+{
+ lua_CFunction f = (lua_CFunction)dlsym(lib, sym);
+ if (f == NULL) lua_pushstring(L, dlerror());
+ return f;
+}
+
+#elif defined(LUA_DL_DLL)
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#undef setprogdir
+
+static void setprogdir(lua_State *L)
+{
+ char buff[MAX_PATH + 1];
+ char *lb;
+ DWORD nsize = sizeof(buff);
+ DWORD n = GetModuleFileNameA(NULL, buff, nsize);
+ if (n == 0 || n == nsize || (lb = strrchr(buff, '\\')) == NULL) {
+ luaL_error(L, "unable to get ModuleFileName");
+ } else {
+ *lb = '\0';
+ luaL_gsub(L, lua_tostring(L, -1), LUA_EXECDIR, buff);
+ lua_remove(L, -2); /* remove original string */
+ }
+}
+
+static void pusherror(lua_State *L)
+{
+ DWORD error = GetLastError();
+ char buffer[128];
+ if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
+ NULL, error, 0, buffer, sizeof(buffer), NULL))
+ lua_pushstring(L, buffer);
+ else
+ lua_pushfstring(L, "system error %d\n", error);
+}
+
+static void ll_unloadlib(void *lib)
+{
+ FreeLibrary((HINSTANCE)lib);
+}
+
+static void *ll_load(lua_State *L, const char *path)
+{
+ HINSTANCE lib = LoadLibraryA(path);
+ if (lib == NULL) pusherror(L);
+ return lib;
+}
+
+static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym)
+{
+ lua_CFunction f = (lua_CFunction)GetProcAddress((HINSTANCE)lib, sym);
+ if (f == NULL) pusherror(L);
+ return f;
+}
+
+#else
+
+#undef PACKAGE_LIB_FAIL
+#define PACKAGE_LIB_FAIL "absent"
+
+#define DLMSG "dynamic libraries not enabled; check your Lua installation"
+
+static void ll_unloadlib(void *lib)
+{
+ (void)lib;
+}
+
+static void *ll_load(lua_State *L, const char *path)
+{
+ (void)path;
+ lua_pushliteral(L, DLMSG);
+ return NULL;
+}
+
+static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym)
+{
+ (void)lib; (void)sym;
+ lua_pushliteral(L, DLMSG);
+ return NULL;
+}
+#endif
+
+/* ------------------------------------------------------------------------ */
+
+static void **ll_register(lua_State *L, const char *path)
+{
+ void **plib;
+ lua_pushfstring(L, "LOADLIB: %s", path);
+ lua_gettable(L, LUA_REGISTRYINDEX); /* check library in registry? */
+ if (!lua_isnil(L, -1)) { /* is there an entry? */
+ plib = (void **)lua_touserdata(L, -1);
+ } else { /* no entry yet; create one */
+ lua_pop(L, 1);
+ plib = (void **)lua_newuserdata(L, sizeof(void *));
+ *plib = NULL;
+ luaL_getmetatable(L, "_LOADLIB");
+ lua_setmetatable(L, -2);
+ lua_pushfstring(L, "LOADLIB: %s", path);
+ lua_pushvalue(L, -2);
+ lua_settable(L, LUA_REGISTRYINDEX);
+ }
+ return plib;
+}
+
+static int ll_loadfunc(lua_State *L, const char *path, const char *sym)
+{
+ void **reg = ll_register(L, path);
+ if (*reg == NULL) *reg = ll_load(L, path);
+ if (*reg == NULL) {
+ return PACKAGE_ERR_LIB; /* unable to load library */
+ } else {
+ lua_CFunction f = ll_sym(L, *reg, sym);
+ if (f == NULL)
+ return PACKAGE_ERR_FUNC; /* unable to find function */
+ lua_pushcfunction(L, f);
+ return 0; /* return function */
+ }
+}
+
+static int lj_cf_package_loadlib(lua_State *L)
+{
+ const char *path = luaL_checkstring(L, 1);
+ const char *init = luaL_checkstring(L, 2);
+ int stat = ll_loadfunc(L, path, init);
+ if (stat == 0) { /* no errors? */
+ return 1; /* return the loaded function */
+ } else { /* error; error message is on stack top */
+ lua_pushnil(L);
+ lua_insert(L, -2);
+ lua_pushstring(L, (stat == PACKAGE_ERR_LIB) ? PACKAGE_LIB_FAIL : "init");
+ return 3; /* return nil, error message, and where */
+ }
+}
+
+static int lj_cf_package_unloadlib(lua_State *L)
+{
+ void **lib = (void **)luaL_checkudata(L, 1, "_LOADLIB");
+ if (*lib) ll_unloadlib(*lib);
+ *lib = NULL; /* mark library as closed */
+ return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static int readable(const char *filename)
+{
+ FILE *f = fopen(filename, "r"); /* try to open file */
+ if (f == NULL) return 0; /* open failed */
+ fclose(f);
+ return 1;
+}
+
+static const char *pushnexttemplate(lua_State *L, const char *path)
+{
+ const char *l;
+ while (*path == *LUA_PATHSEP) path++; /* skip separators */
+ if (*path == '\0') return NULL; /* no more templates */
+ l = strchr(path, *LUA_PATHSEP); /* find next separator */
+ if (l == NULL) l = path + strlen(path);
+ lua_pushlstring(L, path, (size_t)(l - path)); /* template */
+ return l;
+}
+
+static const char *findfile(lua_State *L, const char *name,
+ const char *pname)
+{
+ const char *path;
+ name = luaL_gsub(L, name, ".", LUA_DIRSEP);
+ lua_getfield(L, LUA_ENVIRONINDEX, pname);
+ path = lua_tostring(L, -1);
+ if (path == NULL)
+ luaL_error(L, LUA_QL("package.%s") " must be a string", pname);
+ lua_pushliteral(L, ""); /* error accumulator */
+ while ((path = pushnexttemplate(L, path)) != NULL) {
+ const char *filename;
+ filename = luaL_gsub(L, lua_tostring(L, -1), LUA_PATH_MARK, name);
+ lua_remove(L, -2); /* remove path template */
+ if (readable(filename)) /* does file exist and is readable? */
+ return filename; /* return that file name */
+ lua_pushfstring(L, "\n\tno file " LUA_QS, filename);
+ lua_remove(L, -2); /* remove file name */
+ lua_concat(L, 2); /* add entry to possible error message */
+ }
+ return NULL; /* not found */
+}
+
+static void loaderror(lua_State *L, const char *filename)
+{
+ luaL_error(L, "error loading module " LUA_QS " from file " LUA_QS ":\n\t%s",
+ lua_tostring(L, 1), filename, lua_tostring(L, -1));
+}
+
+static int lj_cf_package_loader_lua(lua_State *L)
+{
+ const char *filename;
+ const char *name = luaL_checkstring(L, 1);
+ filename = findfile(L, name, "path");
+ if (filename == NULL) return 1; /* library not found in this path */
+ if (luaL_loadfile(L, filename) != 0)
+ loaderror(L, filename);
+ return 1; /* library loaded successfully */
+}
+
+static const char *mkfuncname(lua_State *L, const char *modname)
+{
+ const char *funcname;
+ const char *mark = strchr(modname, *LUA_IGMARK);
+ if (mark) modname = mark + 1;
+ funcname = luaL_gsub(L, modname, ".", "_");
+ funcname = lua_pushfstring(L, "luaopen_%s", funcname);
+ lua_remove(L, -2); /* remove 'gsub' result */
+ return funcname;
+}
+
+static int lj_cf_package_loader_c(lua_State *L)
+{
+ const char *funcname;
+ const char *name = luaL_checkstring(L, 1);
+ const char *filename = findfile(L, name, "cpath");
+ if (filename == NULL) return 1; /* library not found in this path */
+ funcname = mkfuncname(L, name);
+ if (ll_loadfunc(L, filename, funcname) != 0)
+ loaderror(L, filename);
+ return 1; /* library loaded successfully */
+}
+
+static int lj_cf_package_loader_croot(lua_State *L)
+{
+ const char *funcname;
+ const char *filename;
+ const char *name = luaL_checkstring(L, 1);
+ const char *p = strchr(name, '.');
+ int stat;
+ if (p == NULL) return 0; /* is root */
+ lua_pushlstring(L, name, (size_t)(p - name));
+ filename = findfile(L, lua_tostring(L, -1), "cpath");
+ if (filename == NULL) return 1; /* root not found */
+ funcname = mkfuncname(L, name);
+ if ((stat = ll_loadfunc(L, filename, funcname)) != 0) {
+ if (stat != PACKAGE_ERR_FUNC) loaderror(L, filename); /* real error */
+ lua_pushfstring(L, "\n\tno module " LUA_QS " in file " LUA_QS,
+ name, filename);
+ return 1; /* function not found */
+ }
+ return 1;
+}
+
+static int lj_cf_package_loader_preload(lua_State *L)
+{
+ const char *name = luaL_checkstring(L, 1);
+ lua_getfield(L, LUA_ENVIRONINDEX, "preload");
+ if (!lua_istable(L, -1))
+ luaL_error(L, LUA_QL("package.preload") " must be a table");
+ lua_getfield(L, -1, name);
+ if (lua_isnil(L, -1)) /* not found? */
+ lua_pushfstring(L, "\n\tno field package.preload['%s']", name);
+ return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static const int sentinel_ = 0;
+#define sentinel ((void *)&sentinel_)
+
+static int lj_cf_package_require(lua_State *L)
+{
+ const char *name = luaL_checkstring(L, 1);
+ int i;
+ lua_settop(L, 1); /* _LOADED table will be at index 2 */
+ lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
+ lua_getfield(L, 2, name);
+ if (lua_toboolean(L, -1)) { /* is it there? */
+ if (lua_touserdata(L, -1) == sentinel) /* check loops */
+ luaL_error(L, "loop or previous error loading module " LUA_QS, name);
+ return 1; /* package is already loaded */
+ }
+ /* else must load it; iterate over available loaders */
+ lua_getfield(L, LUA_ENVIRONINDEX, "loaders");
+ if (!lua_istable(L, -1))
+ luaL_error(L, LUA_QL("package.loaders") " must be a table");
+ lua_pushliteral(L, ""); /* error message accumulator */
+ for (i = 1; ; i++) {
+ lua_rawgeti(L, -2, i); /* get a loader */
+ if (lua_isnil(L, -1))
+ luaL_error(L, "module " LUA_QS " not found:%s",
+ name, lua_tostring(L, -2));
+ lua_pushstring(L, name);
+ lua_call(L, 1, 1); /* call it */
+ if (lua_isfunction(L, -1)) /* did it find module? */
+ break; /* module loaded successfully */
+ else if (lua_isstring(L, -1)) /* loader returned error message? */
+ lua_concat(L, 2); /* accumulate it */
+ else
+ lua_pop(L, 1);
+ }
+ lua_pushlightuserdata(L, sentinel);
+ lua_setfield(L, 2, name); /* _LOADED[name] = sentinel */
+ lua_pushstring(L, name); /* pass name as argument to module */
+ lua_call(L, 1, 1); /* run loaded module */
+ if (!lua_isnil(L, -1)) /* non-nil return? */
+ lua_setfield(L, 2, name); /* _LOADED[name] = returned value */
+ lua_getfield(L, 2, name);
+ if (lua_touserdata(L, -1) == sentinel) { /* module did not set a value? */
+ lua_pushboolean(L, 1); /* use true as result */
+ lua_pushvalue(L, -1); /* extra copy to be returned */
+ lua_setfield(L, 2, name); /* _LOADED[name] = true */
+ }
+ return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static void setfenv(lua_State *L)
+{
+ lua_Debug ar;
+ if (lua_getstack(L, 1, &ar) == 0 ||
+ lua_getinfo(L, "f", &ar) == 0 || /* get calling function */
+ lua_iscfunction(L, -1))
+ luaL_error(L, LUA_QL("module") " not called from a Lua function");
+ lua_pushvalue(L, -2);
+ lua_setfenv(L, -2);
+ lua_pop(L, 1);
+}
+
+static void dooptions(lua_State *L, int n)
+{
+ int i;
+ for (i = 2; i <= n; i++) {
+ lua_pushvalue(L, i); /* get option (a function) */
+ lua_pushvalue(L, -2); /* module */
+ lua_call(L, 1, 0);
+ }
+}
+
+static void modinit(lua_State *L, const char *modname)
+{
+ const char *dot;
+ lua_pushvalue(L, -1);
+ lua_setfield(L, -2, "_M"); /* module._M = module */
+ lua_pushstring(L, modname);
+ lua_setfield(L, -2, "_NAME");
+ dot = strrchr(modname, '.'); /* look for last dot in module name */
+ if (dot == NULL) dot = modname; else dot++;
+ /* set _PACKAGE as package name (full module name minus last part) */
+ lua_pushlstring(L, modname, (size_t)(dot - modname));
+ lua_setfield(L, -2, "_PACKAGE");
+}
+
+static int lj_cf_package_module(lua_State *L)
+{
+ const char *modname = luaL_checkstring(L, 1);
+ int loaded = lua_gettop(L) + 1; /* index of _LOADED table */
+ lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
+ lua_getfield(L, loaded, modname); /* get _LOADED[modname] */
+ if (!lua_istable(L, -1)) { /* not found? */
+ lua_pop(L, 1); /* remove previous result */
+ /* try global variable (and create one if it does not exist) */
+ if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, 1) != NULL)
+ lj_err_callerv(L, LJ_ERR_BADMODN, modname);
+ lua_pushvalue(L, -1);
+ lua_setfield(L, loaded, modname); /* _LOADED[modname] = new table */
+ }
+ /* check whether table already has a _NAME field */
+ lua_getfield(L, -1, "_NAME");
+ if (!lua_isnil(L, -1)) { /* is table an initialized module? */
+ lua_pop(L, 1);
+ } else { /* no; initialize it */
+ lua_pop(L, 1);
+ modinit(L, modname);
+ }
+ lua_pushvalue(L, -1);
+ setfenv(L);
+ dooptions(L, loaded - 1);
+ return 0;
+}
+
+static int lj_cf_package_seeall(lua_State *L)
+{
+ luaL_checktype(L, 1, LUA_TTABLE);
+ if (!lua_getmetatable(L, 1)) {
+ lua_createtable(L, 0, 1); /* create new metatable */
+ lua_pushvalue(L, -1);
+ lua_setmetatable(L, 1);
+ }
+ lua_pushvalue(L, LUA_GLOBALSINDEX);
+ lua_setfield(L, -2, "__index"); /* mt.__index = _G */
+ return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#define AUXMARK "\1"
+
+static void setpath(lua_State *L, const char *fieldname, const char *envname,
+ const char *def)
+{
+ const char *path = getenv(envname);
+ if (path == NULL) {
+ lua_pushstring(L, def);
+ } else {
+ path = luaL_gsub(L, path, LUA_PATHSEP LUA_PATHSEP,
+ LUA_PATHSEP AUXMARK LUA_PATHSEP);
+ luaL_gsub(L, path, AUXMARK, def);
+ lua_remove(L, -2);
+ }
+ setprogdir(L);
+ lua_setfield(L, -2, fieldname);
+}
+
+static const luaL_Reg package_lib[] = {
+ { "loadlib", lj_cf_package_loadlib },
+ { "seeall", lj_cf_package_seeall },
+ { NULL, NULL }
+};
+
+static const luaL_Reg package_global[] = {
+ { "module", lj_cf_package_module },
+ { "require", lj_cf_package_require },
+ { NULL, NULL }
+};
+
+static const lua_CFunction package_loaders[] =
+{
+ lj_cf_package_loader_preload,
+ lj_cf_package_loader_lua,
+ lj_cf_package_loader_c,
+ lj_cf_package_loader_croot,
+ NULL
+};
+
+LUALIB_API int luaopen_package(lua_State *L)
+{
+ int i;
+ luaL_newmetatable(L, "_LOADLIB");
+ lua_pushcfunction(L, lj_cf_package_unloadlib);
+ lua_setfield(L, -2, "__gc");
+ luaL_register(L, LUA_LOADLIBNAME, package_lib);
+ lua_pushvalue(L, -1);
+ lua_replace(L, LUA_ENVIRONINDEX);
+ lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0);
+ for (i = 0; package_loaders[i] != NULL; i++) {
+ lua_pushcfunction(L, package_loaders[i]);
+ lua_rawseti(L, -2, i+1);
+ }
+ lua_setfield(L, -2, "loaders");
+ setpath(L, "path", LUA_PATH, LUA_PATH_DEFAULT);
+ setpath(L, "cpath", LUA_CPATH, LUA_CPATH_DEFAULT);
+ lua_pushliteral(L, LUA_PATH_CONFIG);
+ lua_setfield(L, -2, "config");
+ luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
+ lua_setfield(L, -2, "loaded");
+ lua_newtable(L);
+ lua_setfield(L, -2, "preload");
+ lua_pushvalue(L, LUA_GLOBALSINDEX);
+ luaL_register(L, NULL, package_global);
+ lua_pop(L, 1);
+ return 1;
+}
+
diff --git a/src/lib_string.c b/src/lib_string.c
new file mode 100644
index 00000000..fdd7fbcb
--- /dev/null
+++ b/src/lib_string.c
@@ -0,0 +1,790 @@
+/*
+** String library.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#include <stdio.h>
+
+#define lib_string_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_state.h"
+#include "lj_ff.h"
+#include "lj_ctype.h"
+#include "lj_lib.h"
+
+/* ------------------------------------------------------------------------ */
+
+#define LJLIB_MODULE_string
+
+LJLIB_ASM(string_len) LJLIB_REC(.)
+{
+ lj_lib_checkstr(L, 1);
+ return FFH_RETRY;
+}
+
+LJLIB_ASM(string_byte) LJLIB_REC(string_range 0)
+{
+ GCstr *s = lj_lib_checkstr(L, 1);
+ int32_t len = (int32_t)s->len;
+ int32_t start = lj_lib_optint(L, 2, 1);
+ int32_t stop = lj_lib_optint(L, 3, start);
+ int32_t n, i;
+ const unsigned char *p;
+ if (stop < 0) stop += len+1;
+ if (start < 0) start += len+1;
+ if (start <= 0) start = 1;
+ if (stop > len) stop = len;
+ if (start > stop) return FFH_RES(0); /* Empty interval: return no results. */
+ start--;
+ n = stop - start;
+ if ((uint32_t)n > LUAI_MAXCSTACK)
+ lj_err_caller(L, LJ_ERR_STRSLC);
+ lj_state_checkstack(L, (MSize)n);
+ p = (const unsigned char *)strdata(s) + start;
+ for (i = 0; i < n; i++)
+ setintV(L->base + i-1, p[i]);
+ return FFH_RES(n);
+}
+
+LJLIB_ASM(string_char)
+{
+ int i, nargs = cast_int(L->top - L->base);
+ char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (size_t)nargs);
+ for (i = 1; i <= nargs; i++) {
+ int32_t k = lj_lib_checkint(L, i);
+ if (!checku8(k))
+ lj_err_arg(L, i, LJ_ERR_BADVAL);
+ buf[i-1] = (char)k;
+ }
+ setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)nargs));
+ return FFH_RES(1);
+}
+
+LJLIB_ASM(string_sub) LJLIB_REC(string_range 1)
+{
+ lj_lib_checkstr(L, 1);
+ lj_lib_checkint(L, 2);
+ setintV(L->base+2, lj_lib_optint(L, 3, -1));
+ return FFH_RETRY;
+}
+
+LJLIB_ASM(string_rep)
+{
+ GCstr *s = lj_lib_checkstr(L, 1);
+ int32_t len = (int32_t)s->len;
+ int32_t k = lj_lib_checkint(L, 2);
+ int64_t tlen = (int64_t)k * len;
+ const char *src;
+ char *buf;
+ if (k <= 0) return FFH_RETRY;
+ if (tlen > LJ_MAX_STR)
+ lj_err_caller(L, LJ_ERR_STROV);
+ buf = lj_str_needbuf(L, &G(L)->tmpbuf, (MSize)tlen);
+ if (len <= 1) return FFH_RETRY; /* ASM code only needed buffer resize. */
+ src = strdata(s);
+ do {
+ int32_t i = 0;
+ do { *buf++ = src[i++]; } while (i < len);
+ } while (--k > 0);
+ setstrV(L, L->base-1, lj_str_new(L, G(L)->tmpbuf.buf, (size_t)tlen));
+ return FFH_RES(1);
+}
+
+LJLIB_ASM(string_reverse)
+{
+ GCstr *s = lj_lib_checkstr(L, 1);
+ lj_str_needbuf(L, &G(L)->tmpbuf, s->len);
+ return FFH_RETRY;
+}
+LJLIB_ASM_(string_lower)
+LJLIB_ASM_(string_upper)
+
+/* ------------------------------------------------------------------------ */
+
+LJLIB_CF(string_dump)
+{
+ lj_err_caller(L, LJ_ERR_STRDUMP);
+ return 0; /* unreachable */
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* macro to `unsign' a character */
+#define uchar(c) ((unsigned char)(c))
+
+#define CAP_UNFINISHED (-1)
+#define CAP_POSITION (-2)
+
+typedef struct MatchState {
+ const char *src_init; /* init of source string */
+ const char *src_end; /* end (`\0') of source string */
+ lua_State *L;
+ int level; /* total number of captures (finished or unfinished) */
+ struct {
+ const char *init;
+ ptrdiff_t len;
+ } capture[LUA_MAXCAPTURES];
+} MatchState;
+
+#define L_ESC '%'
+#define SPECIALS "^$*+?.([%-"
+
+static int check_capture(MatchState *ms, int l)
+{
+ l -= '1';
+ if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED)
+ lj_err_caller(ms->L, LJ_ERR_STRCAPI);
+ return l;
+}
+
+static int capture_to_close(MatchState *ms)
+{
+ int level = ms->level;
+ for (level--; level>=0; level--)
+ if (ms->capture[level].len == CAP_UNFINISHED) return level;
+ lj_err_caller(ms->L, LJ_ERR_STRPATC);
+ return 0; /* unreachable */
+}
+
+static const char *classend(MatchState *ms, const char *p)
+{
+ switch (*p++) {
+ case L_ESC:
+ if (*p == '\0')
+ lj_err_caller(ms->L, LJ_ERR_STRPATE);
+ return p+1;
+ case '[':
+ if (*p == '^') p++;
+ do { /* look for a `]' */
+ if (*p == '\0')
+ lj_err_caller(ms->L, LJ_ERR_STRPATM);
+ if (*(p++) == L_ESC && *p != '\0')
+ p++; /* skip escapes (e.g. `%]') */
+ } while (*p != ']');
+ return p+1;
+ default:
+ return p;
+ }
+}
+
+static const unsigned char match_class_map[32] = {
+ 0, LJ_CTYPE_ALPHA, 0, LJ_CTYPE_CNTRL, LJ_CTYPE_DIGIT, 0,0,0,0,0,0,0,
+ LJ_CTYPE_LOWER, 0,0,0, LJ_CTYPE_PUNCT, 0,0, LJ_CTYPE_SPACE, 0,
+ LJ_CTYPE_UPPER, 0, LJ_CTYPE_ALNUM, LJ_CTYPE_XDIGIT, 0,0,0,0,0,0,0
+};
+
+static int match_class(int c, int cl)
+{
+ if ((cl & 0xc0) == 0x40) {
+ int t = match_class_map[(cl&0x1f)];
+ if (t) {
+ t = lj_ctype_isa(c, t);
+ return (cl & 0x20) ? t : !t;
+ }
+ if (cl == 'z') return c == 0;
+ if (cl == 'Z') return c != 0;
+ }
+ return (cl == c);
+}
+
+static int matchbracketclass(int c, const char *p, const char *ec)
+{
+ int sig = 1;
+ if (*(p+1) == '^') {
+ sig = 0;
+ p++; /* skip the `^' */
+ }
+ while (++p < ec) {
+ if (*p == L_ESC) {
+ p++;
+ if (match_class(c, uchar(*p)))
+ return sig;
+ }
+ else if ((*(p+1) == '-') && (p+2 < ec)) {
+ p+=2;
+ if (uchar(*(p-2)) <= c && c <= uchar(*p))
+ return sig;
+ }
+ else if (uchar(*p) == c) return sig;
+ }
+ return !sig;
+}
+
+static int singlematch(int c, const char *p, const char *ep)
+{
+ switch (*p) {
+ case '.': return 1; /* matches any char */
+ case L_ESC: return match_class(c, uchar(*(p+1)));
+ case '[': return matchbracketclass(c, p, ep-1);
+ default: return (uchar(*p) == c);
+ }
+}
+
+static const char *match(MatchState *ms, const char *s, const char *p);
+
+static const char *matchbalance(MatchState *ms, const char *s, const char *p)
+{
+ if (*p == 0 || *(p+1) == 0)
+ lj_err_caller(ms->L, LJ_ERR_STRPATU);
+ if (*s != *p) {
+ return NULL;
+ } else {
+ int b = *p;
+ int e = *(p+1);
+ int cont = 1;
+ while (++s < ms->src_end) {
+ if (*s == e) {
+ if (--cont == 0) return s+1;
+ } else if (*s == b) {
+ cont++;
+ }
+ }
+ }
+ return NULL; /* string ends out of balance */
+}
+
+static const char *max_expand(MatchState *ms, const char *s,
+ const char *p, const char *ep)
+{
+ ptrdiff_t i = 0; /* counts maximum expand for item */
+ while ((s+i)<ms->src_end && singlematch(uchar(*(s+i)), p, ep))
+ i++;
+ /* keeps trying to match with the maximum repetitions */
+ while (i>=0) {
+ const char *res = match(ms, (s+i), ep+1);
+ if (res) return res;
+ i--; /* else didn't match; reduce 1 repetition to try again */
+ }
+ return NULL;
+}
+
+static const char *min_expand(MatchState *ms, const char *s,
+ const char *p, const char *ep)
+{
+ for (;;) {
+ const char *res = match(ms, s, ep+1);
+ if (res != NULL)
+ return res;
+ else if (s<ms->src_end && singlematch(uchar(*s), p, ep))
+ s++; /* try with one more repetition */
+ else
+ return NULL;
+ }
+}
+
+static const char *start_capture(MatchState *ms, const char *s,
+ const char *p, int what)
+{
+ const char *res;
+ int level = ms->level;
+ if (level >= LUA_MAXCAPTURES) lj_err_caller(ms->L, LJ_ERR_STRCAPN);
+ ms->capture[level].init = s;
+ ms->capture[level].len = what;
+ ms->level = level+1;
+ if ((res=match(ms, s, p)) == NULL) /* match failed? */
+ ms->level--; /* undo capture */
+ return res;
+}
+
+static const char *end_capture(MatchState *ms, const char *s,
+ const char *p)
+{
+ int l = capture_to_close(ms);
+ const char *res;
+ ms->capture[l].len = s - ms->capture[l].init; /* close capture */
+ if ((res = match(ms, s, p)) == NULL) /* match failed? */
+ ms->capture[l].len = CAP_UNFINISHED; /* undo capture */
+ return res;
+}
+
+static const char *match_capture(MatchState *ms, const char *s, int l)
+{
+ size_t len;
+ l = check_capture(ms, l);
+ len = (size_t)ms->capture[l].len;
+ if ((size_t)(ms->src_end-s) >= len &&
+ memcmp(ms->capture[l].init, s, len) == 0)
+ return s+len;
+ else
+ return NULL;
+}
+
+static const char *match(MatchState *ms, const char *s, const char *p)
+{
+ init: /* using goto's to optimize tail recursion */
+ switch (*p) {
+ case '(': /* start capture */
+ if (*(p+1) == ')') /* position capture? */
+ return start_capture(ms, s, p+2, CAP_POSITION);
+ else
+ return start_capture(ms, s, p+1, CAP_UNFINISHED);
+ case ')': /* end capture */
+ return end_capture(ms, s, p+1);
+ case L_ESC:
+ switch (*(p+1)) {
+ case 'b': /* balanced string? */
+ s = matchbalance(ms, s, p+2);
+ if (s == NULL) return NULL;
+ p+=4;
+ goto init; /* else return match(ms, s, p+4); */
+ case 'f': { /* frontier? */
+ const char *ep; char previous;
+ p += 2;
+ if (*p != '[')
+ lj_err_caller(ms->L, LJ_ERR_STRPATB);
+ ep = classend(ms, p); /* points to what is next */
+ previous = (s == ms->src_init) ? '\0' : *(s-1);
+ if (matchbracketclass(uchar(previous), p, ep-1) ||
+ !matchbracketclass(uchar(*s), p, ep-1)) return NULL;
+ p=ep;
+ goto init; /* else return match(ms, s, ep); */
+ }
+ default:
+ if (lj_ctype_isdigit(uchar(*(p+1)))) { /* capture results (%0-%9)? */
+ s = match_capture(ms, s, uchar(*(p+1)));
+ if (s == NULL) return NULL;
+ p+=2;
+ goto init; /* else return match(ms, s, p+2) */
+ }
+ goto dflt; /* case default */
+ }
+ case '\0': /* end of pattern */
+ return s; /* match succeeded */
+ case '$':
+ if (*(p+1) == '\0') /* is the `$' the last char in pattern? */
+ return (s == ms->src_end) ? s : NULL; /* check end of string */
+ else
+ goto dflt;
+ default: dflt: { /* it is a pattern item */
+ const char *ep = classend(ms, p); /* points to what is next */
+ int m = s<ms->src_end && singlematch(uchar(*s), p, ep);
+ switch (*ep) {
+ case '?': { /* optional */
+ const char *res;
+ if (m && ((res=match(ms, s+1, ep+1)) != NULL))
+ return res;
+ p=ep+1;
+ goto init; /* else return match(ms, s, ep+1); */
+ }
+ case '*': /* 0 or more repetitions */
+ return max_expand(ms, s, p, ep);
+ case '+': /* 1 or more repetitions */
+ return (m ? max_expand(ms, s+1, p, ep) : NULL);
+ case '-': /* 0 or more repetitions (minimum) */
+ return min_expand(ms, s, p, ep);
+ default:
+ if (!m) return NULL;
+ s++; p=ep;
+ goto init; /* else return match(ms, s+1, ep); */
+ }
+ }
+ }
+}
+
+static const char *lmemfind(const char *s1, size_t l1,
+ const char *s2, size_t l2)
+{
+ if (l2 == 0) {
+ return s1; /* empty strings are everywhere */
+ } else if (l2 > l1) {
+ return NULL; /* avoids a negative `l1' */
+ } else {
+ const char *init; /* to search for a `*s2' inside `s1' */
+ l2--; /* 1st char will be checked by `memchr' */
+ l1 = l1-l2; /* `s2' cannot be found after that */
+ while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
+ init++; /* 1st char is already checked */
+ if (memcmp(init, s2+1, l2) == 0) {
+ return init-1;
+ } else { /* correct `l1' and `s1' to try again */
+ l1 -= (size_t)(init-s1);
+ s1 = init;
+ }
+ }
+ return NULL; /* not found */
+ }
+}
+
+static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
+{
+ if (i >= ms->level) {
+ if (i == 0) /* ms->level == 0, too */
+ lua_pushlstring(ms->L, s, (size_t)(e - s)); /* add whole match */
+ else
+ lj_err_caller(ms->L, LJ_ERR_STRCAPI);
+ } else {
+ ptrdiff_t l = ms->capture[i].len;
+ if (l == CAP_UNFINISHED) lj_err_caller(ms->L, LJ_ERR_STRCAPU);
+ if (l == CAP_POSITION)
+ lua_pushinteger(ms->L, ms->capture[i].init - ms->src_init + 1);
+ else
+ lua_pushlstring(ms->L, ms->capture[i].init, (size_t)l);
+ }
+}
+
+static int push_captures(MatchState *ms, const char *s, const char *e)
+{
+ int i;
+ int nlevels = (ms->level == 0 && s) ? 1 : ms->level;
+ luaL_checkstack(ms->L, nlevels, "too many captures");
+ for (i = 0; i < nlevels; i++)
+ push_onecapture(ms, i, s, e);
+ return nlevels; /* number of strings pushed */
+}
+
+static ptrdiff_t posrelat(ptrdiff_t pos, size_t len)
+{
+ /* relative string position: negative means back from end */
+ if (pos < 0) pos += (ptrdiff_t)len + 1;
+ return (pos >= 0) ? pos : 0;
+}
+
+static int str_find_aux(lua_State *L, int find)
+{
+ size_t l1, l2;
+ const char *s = luaL_checklstring(L, 1, &l1);
+ const char *p = luaL_checklstring(L, 2, &l2);
+ ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1;
+ if (init < 0)
+ init = 0;
+ else if ((size_t)(init) > l1)
+ init = (ptrdiff_t)l1;
+ if (find && (lua_toboolean(L, 4) || /* explicit request? */
+ strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */
+ /* do a plain search */
+ const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2);
+ if (s2) {
+ lua_pushinteger(L, s2-s+1);
+ lua_pushinteger(L, s2-s+(ptrdiff_t)l2);
+ return 2;
+ }
+ } else {
+ MatchState ms;
+ int anchor = (*p == '^') ? (p++, 1) : 0;
+ const char *s1=s+init;
+ ms.L = L;
+ ms.src_init = s;
+ ms.src_end = s+l1;
+ do {
+ const char *res;
+ ms.level = 0;
+ if ((res=match(&ms, s1, p)) != NULL) {
+ if (find) {
+ lua_pushinteger(L, s1-s+1); /* start */
+ lua_pushinteger(L, res-s); /* end */
+ return push_captures(&ms, NULL, 0) + 2;
+ } else {
+ return push_captures(&ms, s1, res);
+ }
+ }
+ } while (s1++ < ms.src_end && !anchor);
+ }
+ lua_pushnil(L); /* not found */
+ return 1;
+}
+
+LJLIB_CF(string_find)
+{
+ return str_find_aux(L, 1);
+}
+
+LJLIB_CF(string_match)
+{
+ return str_find_aux(L, 0);
+}
+
+LJLIB_NOREG LJLIB_CF(string_gmatch_aux)
+{
+ const char *p = strVdata(lj_lib_upvalue(L, 2));
+ GCstr *str = strV(lj_lib_upvalue(L, 1));
+ const char *s = strdata(str);
+ TValue *tvpos = lj_lib_upvalue(L, 3);
+ const char *src = s + tvpos->u32.lo;
+ MatchState ms;
+ ms.L = L;
+ ms.src_init = s;
+ ms.src_end = s + str->len;
+ for (; src <= ms.src_end; src++) {
+ const char *e;
+ ms.level = 0;
+ if ((e = match(&ms, src, p)) != NULL) {
+ int32_t pos = (int32_t)(e - s);
+ if (e == src) pos++; /* Ensure progress for empty match. */
+ tvpos->u32.lo = (uint32_t)pos;
+ return push_captures(&ms, src, e);
+ }
+ }
+ return 0; /* not found */
+}
+
+LJLIB_CF(string_gmatch)
+{
+ lj_lib_checkstr(L, 1);
+ lj_lib_checkstr(L, 2);
+ L->top = L->base+3;
+ (L->top-1)->u64 = 0;
+ lua_pushcclosure(L, lj_cf_string_gmatch_aux, 3);
+ funcV(L->top-1)->c.ffid = FF_string_gmatch_aux;
+ return 1;
+}
+
+static void add_s(MatchState *ms, luaL_Buffer *b, const char *s, const char *e)
+{
+ size_t l, i;
+ const char *news = lua_tolstring(ms->L, 3, &l);
+ for (i = 0; i < l; i++) {
+ if (news[i] != L_ESC) {
+ luaL_addchar(b, news[i]);
+ } else {
+ i++; /* skip ESC */
+ if (!lj_ctype_isdigit(uchar(news[i]))) {
+ luaL_addchar(b, news[i]);
+ } else if (news[i] == '0') {
+ luaL_addlstring(b, s, (size_t)(e - s));
+ } else {
+ push_onecapture(ms, news[i] - '1', s, e);
+ luaL_addvalue(b); /* add capture to accumulated result */
+ }
+ }
+ }
+}
+
+static void add_value(MatchState *ms, luaL_Buffer *b,
+ const char *s, const char *e)
+{
+ lua_State *L = ms->L;
+ switch (lua_type(L, 3)) {
+ case LUA_TNUMBER:
+ case LUA_TSTRING: {
+ add_s(ms, b, s, e);
+ return;
+ }
+ case LUA_TFUNCTION: {
+ int n;
+ lua_pushvalue(L, 3);
+ n = push_captures(ms, s, e);
+ lua_call(L, n, 1);
+ break;
+ }
+ case LUA_TTABLE: {
+ push_onecapture(ms, 0, s, e);
+ lua_gettable(L, 3);
+ break;
+ }
+ }
+ if (!lua_toboolean(L, -1)) { /* nil or false? */
+ lua_pop(L, 1);
+ lua_pushlstring(L, s, (size_t)(e - s)); /* keep original text */
+ } else if (!lua_isstring(L, -1)) {
+ lj_err_callerv(L, LJ_ERR_STRGSRV, luaL_typename(L, -1));
+ }
+ luaL_addvalue(b); /* add result to accumulator */
+}
+
+LJLIB_CF(string_gsub)
+{
+ size_t srcl;
+ const char *src = luaL_checklstring(L, 1, &srcl);
+ const char *p = luaL_checkstring(L, 2);
+ int tr = lua_type(L, 3);
+ int max_s = luaL_optint(L, 4, (int)(srcl+1));
+ int anchor = (*p == '^') ? (p++, 1) : 0;
+ int n = 0;
+ MatchState ms;
+ luaL_Buffer b;
+ if (!(tr == LUA_TNUMBER || tr == LUA_TSTRING ||
+ tr == LUA_TFUNCTION || tr == LUA_TTABLE))
+ lj_err_arg(L, 3, LJ_ERR_NOSFT);
+ luaL_buffinit(L, &b);
+ ms.L = L;
+ ms.src_init = src;
+ ms.src_end = src+srcl;
+ while (n < max_s) {
+ const char *e;
+ ms.level = 0;
+ e = match(&ms, src, p);
+ if (e) {
+ n++;
+ add_value(&ms, &b, src, e);
+ }
+ if (e && e>src) /* non empty match? */
+ src = e; /* skip it */
+ else if (src < ms.src_end)
+ luaL_addchar(&b, *src++);
+ else
+ break;
+ if (anchor)
+ break;
+ }
+ luaL_addlstring(&b, src, (size_t)(ms.src_end-src));
+ luaL_pushresult(&b);
+ lua_pushinteger(L, n); /* number of substitutions */
+ return 2;
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */
+#define MAX_FMTITEM 512
+/* valid flags in a format specification */
+#define FMT_FLAGS "-+ #0"
+/*
+** maximum size of each format specification (such as '%-099.99d')
+** (+10 accounts for %99.99x plus margin of error)
+*/
+#define MAX_FMTSPEC (sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
+
+static void addquoted(lua_State *L, luaL_Buffer *b, int arg)
+{
+ GCstr *str = lj_lib_checkstr(L, arg);
+ int32_t len = (int32_t)str->len;
+ const char *s = strdata(str);
+ luaL_addchar(b, '"');
+ while (len--) {
+ switch (*s) {
+ case '"': case '\\': case '\n':
+ luaL_addchar(b, '\\');
+ luaL_addchar(b, *s);
+ break;
+ case '\r':
+ luaL_addlstring(b, "\\r", 2);
+ break;
+ case '\0':
+ luaL_addlstring(b, "\\000", 4);
+ break;
+ default:
+ luaL_addchar(b, *s);
+ break;
+ }
+ s++;
+ }
+ luaL_addchar(b, '"');
+}
+
+static const char *scanformat(lua_State *L, const char *strfrmt, char *form)
+{
+ const char *p = strfrmt;
+ while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++; /* skip flags */
+ if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS))
+ lj_err_caller(L, LJ_ERR_STRFMTR);
+ if (lj_ctype_isdigit(uchar(*p))) p++; /* skip width */
+ if (lj_ctype_isdigit(uchar(*p))) p++; /* (2 digits at most) */
+ if (*p == '.') {
+ p++;
+ if (lj_ctype_isdigit(uchar(*p))) p++; /* skip precision */
+ if (lj_ctype_isdigit(uchar(*p))) p++; /* (2 digits at most) */
+ }
+ if (lj_ctype_isdigit(uchar(*p)))
+ lj_err_caller(L, LJ_ERR_STRFMTW);
+ *(form++) = '%';
+ strncpy(form, strfrmt, (size_t)(p - strfrmt + 1));
+ form += p - strfrmt + 1;
+ *form = '\0';
+ return p;
+}
+
+static void addintlen(char *form)
+{
+ size_t l = strlen(form);
+ char spec = form[l - 1];
+ strcpy(form + l - 1, LUA_INTFRMLEN);
+ form[l + sizeof(LUA_INTFRMLEN) - 2] = spec;
+ form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0';
+}
+
+LJLIB_CF(string_format)
+{
+ int arg = 1;
+ GCstr *fmt = lj_lib_checkstr(L, arg);
+ const char *strfrmt = strdata(fmt);
+ const char *strfrmt_end = strfrmt + fmt->len;
+ luaL_Buffer b;
+ luaL_buffinit(L, &b);
+ while (strfrmt < strfrmt_end) {
+ if (*strfrmt != L_ESC) {
+ luaL_addchar(&b, *strfrmt++);
+ } else if (*++strfrmt == L_ESC) {
+ luaL_addchar(&b, *strfrmt++); /* %% */
+ } else { /* format item */
+ char form[MAX_FMTSPEC]; /* to store the format (`%...') */
+ char buff[MAX_FMTITEM]; /* to store the formatted item */
+ arg++;
+ strfrmt = scanformat(L, strfrmt, form);
+ switch (*strfrmt++) {
+ case 'c':
+ sprintf(buff, form, lj_lib_checkint(L, arg));
+ break;
+ case 'd': case 'i':
+ addintlen(form);
+ sprintf(buff, form, (LUA_INTFRM_T)lj_lib_checknum(L, arg));
+ break;
+ case 'o': case 'u': case 'x': case 'X':
+ addintlen(form);
+ sprintf(buff, form, (unsigned LUA_INTFRM_T)lj_lib_checknum(L, arg));
+ break;
+ case 'e': case 'E': case 'f': case 'g': case 'G':
+ sprintf(buff, form, (double)lj_lib_checknum(L, arg));
+ break;
+ case 'q':
+ addquoted(L, &b, arg);
+ continue;
+ case 'p':
+ lj_str_pushf(L, "%p", lua_topointer(L, arg));
+ luaL_addvalue(&b);
+ continue;
+ case 's': {
+ GCstr *str = lj_lib_checkstr(L, arg);
+ if (!strchr(form, '.') && str->len >= 100) {
+ /* no precision and string is too long to be formatted;
+ keep original string */
+ setstrV(L, L->top++, str);
+ luaL_addvalue(&b);
+ continue;
+ }
+ sprintf(buff, form, strdata(str));
+ break;
+ }
+ default:
+ lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1));
+ break;
+ }
+ luaL_addlstring(&b, buff, strlen(buff));
+ }
+ }
+ luaL_pushresult(&b);
+ return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+LUALIB_API int luaopen_string(lua_State *L)
+{
+ GCtab *mt;
+ LJ_LIB_REG(L, string);
+#if defined(LUA_COMPAT_GFIND)
+ lua_getfield(L, -1, "gmatch");
+ lua_setfield(L, -2, "gfind");
+#endif
+ mt = lj_tab_new(L, 0, 1);
+ /* NOBARRIER: G(L)->mmname[] is a GC root. */
+ setgcref(G(L)->basemt[~LJ_TSTR], obj2gco(mt));
+ settabV(L, lj_tab_setstr(L, mt, strref(G(L)->mmname[MM_index])),
+ tabV(L->top-1));
+ mt->nomm = cast_byte(~(1u<<MM_index));
+ return 1;
+}
+
diff --git a/src/lib_table.c b/src/lib_table.c
new file mode 100644
index 00000000..68dc825b
--- /dev/null
+++ b/src/lib_table.c
@@ -0,0 +1,276 @@
+/*
+** Table library.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#define lib_table_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_tab.h"
+#include "lj_lib.h"
+
+/* ------------------------------------------------------------------------ */
+
+#define LJLIB_MODULE_table
+
+LJLIB_CF(table_foreachi)
+{
+ GCtab *t = lj_lib_checktab(L, 1);
+ GCfunc *func = lj_lib_checkfunc(L, 2);
+ MSize i, n = lj_tab_len(t);
+ for (i = 1; i <= n; i++) {
+ cTValue *val;
+ setfuncV(L, L->top, func);
+ setintV(L->top+1, i);
+ val = lj_tab_getint(t, (int32_t)i);
+ if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); }
+ L->top += 3;
+ lua_call(L, 2, 1);
+ if (!tvisnil(L->top-1))
+ return 1;
+ L->top--;
+ }
+ return 0;
+}
+
+LJLIB_CF(table_foreach)
+{
+ GCtab *t = lj_lib_checktab(L, 1);
+ GCfunc *func = lj_lib_checkfunc(L, 2);
+ L->top = L->base+3;
+ setnilV(L->top-1);
+ while (lj_tab_next(L, t, L->top-1)) {
+ copyTV(L, L->top+2, L->top);
+ copyTV(L, L->top+1, L->top-1);
+ setfuncV(L, L->top, func);
+ L->top += 3;
+ lua_call(L, 2, 1);
+ if (!tvisnil(L->top-1))
+ return 1;
+ L->top--;
+ }
+ return 0;
+}
+
+LJLIB_ASM(table_getn) LJLIB_REC(.)
+{
+ lj_lib_checktab(L, 1);
+ return FFH_UNREACHABLE;
+}
+
+LJLIB_CF(table_maxn)
+{
+ GCtab *t = lj_lib_checktab(L, 1);
+ TValue *array = tvref(t->array);
+ Node *node;
+ lua_Number m = 0;
+ uint32_t i;
+ for (i = 0; i < t->asize; i++)
+ if (!tvisnil(&array[i])) {
+ m = (lua_Number)i;
+ break;
+ }
+ node = noderef(t->node);
+ for (i = 0; i <= t->hmask; i++)
+ if (tvisnum(&node[i].key) && numV(&node[i].key) > m)
+ m = numV(&node[i].key);
+ setnumV(L->top-1, m);
+ return 1;
+}
+
+LJLIB_CF(table_insert)
+{
+ GCtab *t = lj_lib_checktab(L, 1);
+ int32_t n, i = (int32_t)lj_tab_len(t) + 1;
+ int nargs = (int)((char *)L->top - (char *)L->base);
+ if (nargs != 2*sizeof(TValue)) {
+ if (nargs != 3*sizeof(TValue))
+ lj_err_caller(L, LJ_ERR_TABINS);
+ /* NOBARRIER: This just moves existing elements around. */
+ for (n = lj_lib_checkint(L, 2); i > n; i--) {
+ /* The set may invalidate the get pointer, so need to do it first! */
+ TValue *dst = lj_tab_setint(L, t, i);
+ cTValue *src = lj_tab_getint(t, i-1);
+ if (src) {
+ copyTV(L, dst, src);
+ } else {
+ setnilV(dst);
+ }
+ }
+ i = n;
+ }
+ {
+ TValue *dst = lj_tab_setint(L, t, i);
+ copyTV(L, dst, L->top-1);
+ lj_gc_barriert(L, t, dst);
+ }
+ return 0;
+}
+
+LJLIB_CF(table_remove)
+{
+ GCtab *t = lj_lib_checktab(L, 1);
+ int32_t e = (int32_t)lj_tab_len(t);
+ int32_t pos = lj_lib_optint(L, 2, e);
+ if (!(1 <= pos && pos <= e)) /* position is outside bounds? */
+ return 0; /* nothing to remove */
+ lua_rawgeti(L, 1, pos);
+ /* NOBARRIER: This just moves existing elements around. */
+ for (; pos < e; pos++) {
+ cTValue *src = lj_tab_getint(t, pos+1);
+ TValue *dst = lj_tab_setint(L, t, pos);
+ if (src) {
+ copyTV(L, dst, src);
+ } else {
+ setnilV(dst);
+ }
+ }
+ setnilV(lj_tab_setint(L, t, e));
+ return 1;
+}
+
+LJLIB_CF(table_concat)
+{
+ luaL_Buffer b;
+ GCtab *t = lj_lib_checktab(L, 1);
+ GCstr *sep = lj_lib_optstr(L, 2);
+ MSize seplen = sep ? sep->len : 0;
+ int32_t i = lj_lib_optint(L, 3, 1);
+ int32_t e = L->base+3 < L->top ? lj_lib_checkint(L, 4) :
+ (int32_t)lj_tab_len(t);
+ luaL_buffinit(L, &b);
+ if (i <= e) {
+ for (;;) {
+ cTValue *o;
+ lua_rawgeti(L, 1, i);
+ o = L->top-1;
+ if (!(tvisstr(o) || tvisnum(o)))
+ lj_err_callerv(L, LJ_ERR_TABCAT, typename(o), i);
+ luaL_addvalue(&b);
+ if (i++ == e) break;
+ if (seplen)
+ luaL_addlstring(&b, strdata(sep), seplen);
+ }
+ }
+ luaL_pushresult(&b);
+ return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+static void set2(lua_State *L, int i, int j)
+{
+ lua_rawseti(L, 1, i);
+ lua_rawseti(L, 1, j);
+}
+
+static int sort_comp(lua_State *L, int a, int b)
+{
+ if (!lua_isnil(L, 2)) { /* function? */
+ int res;
+ lua_pushvalue(L, 2);
+ lua_pushvalue(L, a-1); /* -1 to compensate function */
+ lua_pushvalue(L, b-2); /* -2 to compensate function and `a' */
+ lua_call(L, 2, 1);
+ res = lua_toboolean(L, -1);
+ lua_pop(L, 1);
+ return res;
+ } else { /* a < b? */
+ return lua_lessthan(L, a, b);
+ }
+}
+
+static void auxsort(lua_State *L, int l, int u)
+{
+ while (l < u) { /* for tail recursion */
+ int i, j;
+ /* sort elements a[l], a[(l+u)/2] and a[u] */
+ lua_rawgeti(L, 1, l);
+ lua_rawgeti(L, 1, u);
+ if (sort_comp(L, -1, -2)) /* a[u] < a[l]? */
+ set2(L, l, u); /* swap a[l] - a[u] */
+ else
+ lua_pop(L, 2);
+ if (u-l == 1) break; /* only 2 elements */
+ i = (l+u)/2;
+ lua_rawgeti(L, 1, i);
+ lua_rawgeti(L, 1, l);
+ if (sort_comp(L, -2, -1)) { /* a[i]<a[l]? */
+ set2(L, i, l);
+ } else {
+ lua_pop(L, 1); /* remove a[l] */
+ lua_rawgeti(L, 1, u);
+ if (sort_comp(L, -1, -2)) /* a[u]<a[i]? */
+ set2(L, i, u);
+ else
+ lua_pop(L, 2);
+ }
+ if (u-l == 2) break; /* only 3 elements */
+ lua_rawgeti(L, 1, i); /* Pivot */
+ lua_pushvalue(L, -1);
+ lua_rawgeti(L, 1, u-1);
+ set2(L, i, u-1);
+ /* a[l] <= P == a[u-1] <= a[u], only need to sort from l+1 to u-2 */
+ i = l; j = u-1;
+ for (;;) { /* invariant: a[l..i] <= P <= a[j..u] */
+ /* repeat ++i until a[i] >= P */
+ while (lua_rawgeti(L, 1, ++i), sort_comp(L, -1, -2)) {
+ if (i>u) lj_err_caller(L, LJ_ERR_TABSORT);
+ lua_pop(L, 1); /* remove a[i] */
+ }
+ /* repeat --j until a[j] <= P */
+ while (lua_rawgeti(L, 1, --j), sort_comp(L, -3, -1)) {
+ if (j<l) lj_err_caller(L, LJ_ERR_TABSORT);
+ lua_pop(L, 1); /* remove a[j] */
+ }
+ if (j<i) {
+ lua_pop(L, 3); /* pop pivot, a[i], a[j] */
+ break;
+ }
+ set2(L, i, j);
+ }
+ lua_rawgeti(L, 1, u-1);
+ lua_rawgeti(L, 1, i);
+ set2(L, u-1, i); /* swap pivot (a[u-1]) with a[i] */
+ /* a[l..i-1] <= a[i] == P <= a[i+1..u] */
+ /* adjust so that smaller half is in [j..i] and larger one in [l..u] */
+ if (i-l < u-i) {
+ j=l; i=i-1; l=i+2;
+ } else {
+ j=i+1; i=u; u=j-2;
+ }
+ auxsort(L, j, i); /* call recursively the smaller one */
+ } /* repeat the routine for the larger one */
+}
+
+LJLIB_CF(table_sort)
+{
+ GCtab *t = lj_lib_checktab(L, 1);
+ int32_t n = (int32_t)lj_tab_len(t);
+ lua_settop(L, 2);
+ if (!tvisnil(L->base+1))
+ lj_lib_checkfunc(L, 2);
+ auxsort(L, 1, n);
+ return 0;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+LUALIB_API int luaopen_table(lua_State *L)
+{
+ LJ_LIB_REG(L, table);
+ return 1;
+}
+
diff --git a/src/lj.supp b/src/lj.supp
new file mode 100644
index 00000000..9a1379d7
--- /dev/null
+++ b/src/lj.supp
@@ -0,0 +1,6 @@
+# Valgrind suppression file for LuaJIT 2.x.
+{
+ Optimized string compare
+ Memcheck:Addr4
+ fun:lj_str_cmp
+}
diff --git a/src/lj_alloc.c b/src/lj_alloc.c
new file mode 100644
index 00000000..8ad4f8fb
--- /dev/null
+++ b/src/lj_alloc.c
@@ -0,0 +1,1232 @@
+/*
+** Bundled memory allocator.
+**
+** Beware: this is a HEAVILY CUSTOMIZED version of dlmalloc.
+** The original bears the following remark:
+**
+** This is a version (aka dlmalloc) of malloc/free/realloc written by
+** Doug Lea and released to the public domain, as explained at
+** http://creativecommons.org/licenses/publicdomain.
+**
+** * Version pre-2.8.4 Wed Mar 29 19:46:29 2006 (dl at gee)
+**
+** No additional copyright is claimed over the customizations.
+** Please do NOT bother the original author about this version here!
+**
+** If you want to use dlmalloc in another project, you should get
+** the original from: ftp://gee.cs.oswego.edu/pub/misc/
+** For thread-safe derivatives, take a look at:
+** - ptmalloc: http://www.malloc.de/
+** - nedmalloc: http://www.nedprod.com/programs/portable/nedmalloc/
+*/
+
+#define lj_alloc_c
+#define LUA_CORE
+
+/* To get the mremap prototype. Must be defind before any system includes. */
+#if defined(__linux__) && !defined(_GNU_SOURCE)
+#define _GNU_SOURCE
+#endif
+
+#include "lj_def.h"
+#include "lj_arch.h"
+#include "lj_alloc.h"
+
+#ifndef LUAJIT_USE_SYSMALLOC
+
+#define MAX_SIZE_T (~(size_t)0)
+#define MALLOC_ALIGNMENT ((size_t)8U)
+
+#define DEFAULT_GRANULARITY ((size_t)128U * (size_t)1024U)
+#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U)
+#define DEFAULT_MMAP_THRESHOLD ((size_t)128U * (size_t)1024U)
+#define MAX_RELEASE_CHECK_RATE 255
+
+/* ------------------- size_t and alignment properties -------------------- */
+
+/* The byte and bit size of a size_t */
+#define SIZE_T_SIZE (sizeof(size_t))
+#define SIZE_T_BITSIZE (sizeof(size_t) << 3)
+
+/* Some constants coerced to size_t */
+/* Annoying but necessary to avoid errors on some platforms */
+#define SIZE_T_ZERO ((size_t)0)
+#define SIZE_T_ONE ((size_t)1)
+#define SIZE_T_TWO ((size_t)2)
+#define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1)
+#define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2)
+#define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES)
+
+/* The bit mask value corresponding to MALLOC_ALIGNMENT */
+#define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE)
+
+/* the number of bytes to offset an address to align it */
+#define align_offset(A)\
+ ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\
+ ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK))
+
+/* -------------------------- MMAP support ------------------------------- */
+
+#define MFAIL ((void *)(MAX_SIZE_T))
+#define CMFAIL ((char *)(MFAIL)) /* defined for convenience */
+
+#define IS_DIRECT_BIT (SIZE_T_ONE)
+
+#ifdef LUA_USE_WIN
+
+#if LJ_64
+#error "missing support for WIN64 to allocate in lower 2G"
+#endif
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+/* Win32 MMAP via VirtualAlloc */
+static LJ_AINLINE void *CALL_MMAP(size_t size)
+{
+ void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+ return (ptr != 0)? ptr: MFAIL;
+}
+
+/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
+static LJ_AINLINE void *DIRECT_MMAP(size_t size)
+{
+ void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
+ PAGE_READWRITE);
+ return (ptr != 0)? ptr: MFAIL;
+}
+
+/* This function supports releasing coalesed segments */
+static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
+{
+ MEMORY_BASIC_INFORMATION minfo;
+ char *cptr = (char *)ptr;
+ while (size) {
+ if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
+ return -1;
+ if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
+ minfo.State != MEM_COMMIT || minfo.RegionSize > size)
+ return -1;
+ if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
+ return -1;
+ cptr += minfo.RegionSize;
+ size -= minfo.RegionSize;
+ }
+ return 0;
+}
+
+#else
+
+#include <sys/mman.h>
+
+#define MMAP_PROT (PROT_READ|PROT_WRITE)
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+#define MAP_ANONYMOUS MAP_ANON
+#endif /* MAP_ANON */
+
+#if LJ_64
+#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS|MAP_32BIT)
+#else
+#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
+#endif
+
+#define CALL_MMAP(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0)
+#define DIRECT_MMAP(s) CALL_MMAP(s)
+#define CALL_MUNMAP(a, s) munmap((a), (s))
+
+#ifdef __linux__
+/* Need to define _GNU_SOURCE to get the mremap prototype. */
+#define CALL_MREMAP(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv))
+#endif
+
+#endif
+
+#ifndef CALL_MREMAP
+#define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL)
+#endif
+
+/* ----------------------- Chunk representations ------------------------ */
+
+struct malloc_chunk {
+ size_t prev_foot; /* Size of previous chunk (if free). */
+ size_t head; /* Size and inuse bits. */
+ struct malloc_chunk *fd; /* double links -- used only if free. */
+ struct malloc_chunk *bk;
+};
+
+typedef struct malloc_chunk mchunk;
+typedef struct malloc_chunk *mchunkptr;
+typedef struct malloc_chunk *sbinptr; /* The type of bins of chunks */
+typedef unsigned int bindex_t; /* Described below */
+typedef unsigned int binmap_t; /* Described below */
+typedef unsigned int flag_t; /* The type of various bit flag sets */
+
+/* ------------------- Chunks sizes and alignments ----------------------- */
+
+#define MCHUNK_SIZE (sizeof(mchunk))
+
+#define CHUNK_OVERHEAD (SIZE_T_SIZE)
+
+/* Direct chunks need a second word of overhead ... */
+#define DIRECT_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES)
+/* ... and additional padding for fake next-chunk at foot */
+#define DIRECT_FOOT_PAD (FOUR_SIZE_T_SIZES)
+
+/* The smallest size we can malloc is an aligned minimal chunk */
+#define MIN_CHUNK_SIZE\
+ ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+
+/* conversion from malloc headers to user pointers, and back */
+#define chunk2mem(p) ((void *)((char *)(p) + TWO_SIZE_T_SIZES))
+#define mem2chunk(mem) ((mchunkptr)((char *)(mem) - TWO_SIZE_T_SIZES))
+/* chunk associated with aligned address A */
+#define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A)))
+
+/* Bounds on request (not chunk) sizes. */
+#define MAX_REQUEST ((~MIN_CHUNK_SIZE+1) << 2)
+#define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE)
+
+/* pad request bytes into a usable size */
+#define pad_request(req) \
+ (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
+
+/* pad request, checking for minimum (but not maximum) */
+#define request2size(req) \
+ (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req))
+
+/* ------------------ Operations on head and foot fields ----------------- */
+
+#define PINUSE_BIT (SIZE_T_ONE)
+#define CINUSE_BIT (SIZE_T_TWO)
+#define INUSE_BITS (PINUSE_BIT|CINUSE_BIT)
+
+/* Head value for fenceposts */
+#define FENCEPOST_HEAD (INUSE_BITS|SIZE_T_SIZE)
+
+/* extraction of fields from head words */
+#define cinuse(p) ((p)->head & CINUSE_BIT)
+#define pinuse(p) ((p)->head & PINUSE_BIT)
+#define chunksize(p) ((p)->head & ~(INUSE_BITS))
+
+#define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT)
+#define clear_cinuse(p) ((p)->head &= ~CINUSE_BIT)
+
+/* Treat space at ptr +/- offset as a chunk */
+#define chunk_plus_offset(p, s) ((mchunkptr)(((char *)(p)) + (s)))
+#define chunk_minus_offset(p, s) ((mchunkptr)(((char *)(p)) - (s)))
+
+/* Ptr to next or previous physical malloc_chunk. */
+#define next_chunk(p) ((mchunkptr)(((char *)(p)) + ((p)->head & ~INUSE_BITS)))
+#define prev_chunk(p) ((mchunkptr)(((char *)(p)) - ((p)->prev_foot) ))
+
+/* extract next chunk's pinuse bit */
+#define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT)
+
+/* Get/set size at footer */
+#define get_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_foot)
+#define set_foot(p, s) (((mchunkptr)((char *)(p) + (s)))->prev_foot = (s))
+
+/* Set size, pinuse bit, and foot */
+#define set_size_and_pinuse_of_free_chunk(p, s)\
+ ((p)->head = (s|PINUSE_BIT), set_foot(p, s))
+
+/* Set size, pinuse bit, foot, and clear next pinuse */
+#define set_free_with_pinuse(p, s, n)\
+ (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s))
+
+#define is_direct(p)\
+ (!((p)->head & PINUSE_BIT) && ((p)->prev_foot & IS_DIRECT_BIT))
+
+/* Get the internal overhead associated with chunk p */
+#define overhead_for(p)\
+ (is_direct(p)? DIRECT_CHUNK_OVERHEAD : CHUNK_OVERHEAD)
+
+/* ---------------------- Overlaid data structures ----------------------- */
+
+struct malloc_tree_chunk {
+ /* The first four fields must be compatible with malloc_chunk */
+ size_t prev_foot;
+ size_t head;
+ struct malloc_tree_chunk *fd;
+ struct malloc_tree_chunk *bk;
+
+ struct malloc_tree_chunk *child[2];
+ struct malloc_tree_chunk *parent;
+ bindex_t index;
+};
+
+typedef struct malloc_tree_chunk tchunk;
+typedef struct malloc_tree_chunk *tchunkptr;
+typedef struct malloc_tree_chunk *tbinptr; /* The type of bins of trees */
+
+/* A little helper macro for trees */
+#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1])
+
+/* ----------------------------- Segments -------------------------------- */
+
+struct malloc_segment {
+ char *base; /* base address */
+ size_t size; /* allocated size */
+ struct malloc_segment *next; /* ptr to next segment */
+};
+
+typedef struct malloc_segment msegment;
+typedef struct malloc_segment *msegmentptr;
+
+/* ---------------------------- malloc_state ----------------------------- */
+
+/* Bin types, widths and sizes */
+#define NSMALLBINS (32U)
+#define NTREEBINS (32U)
+#define SMALLBIN_SHIFT (3U)
+#define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT)
+#define TREEBIN_SHIFT (8U)
+#define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT)
+#define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE)
+#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD)
+
+struct malloc_state {
+ binmap_t smallmap;
+ binmap_t treemap;
+ size_t dvsize;
+ size_t topsize;
+ mchunkptr dv;
+ mchunkptr top;
+ size_t trim_check;
+ size_t release_checks;
+ mchunkptr smallbins[(NSMALLBINS+1)*2];
+ tbinptr treebins[NTREEBINS];
+ msegment seg;
+};
+
+typedef struct malloc_state *mstate;
+
+#define is_initialized(M) ((M)->top != 0)
+
+/* -------------------------- system alloc setup ------------------------- */
+
+/* page-align a size */
+#define page_align(S)\
+ (((S) + (LJ_PAGESIZE - SIZE_T_ONE)) & ~(LJ_PAGESIZE - SIZE_T_ONE))
+
+/* granularity-align a size */
+#define granularity_align(S)\
+ (((S) + (DEFAULT_GRANULARITY - SIZE_T_ONE))\
+ & ~(DEFAULT_GRANULARITY - SIZE_T_ONE))
+
+#ifdef LUA_USE_WIN
+#define mmap_align(S) granularity_align(S)
+#else
+#define mmap_align(S) page_align(S)
+#endif
+
+/* True if segment S holds address A */
+#define segment_holds(S, A)\
+ ((char *)(A) >= S->base && (char *)(A) < S->base + S->size)
+
+/* Return segment holding given address */
+static msegmentptr segment_holding(mstate m, char *addr)
+{
+ msegmentptr sp = &m->seg;
+ for (;;) {
+ if (addr >= sp->base && addr < sp->base + sp->size)
+ return sp;
+ if ((sp = sp->next) == 0)
+ return 0;
+ }
+}
+
+/* Return true if segment contains a segment link */
+static int has_segment_link(mstate m, msegmentptr ss)
+{
+ msegmentptr sp = &m->seg;
+ for (;;) {
+ if ((char *)sp >= ss->base && (char *)sp < ss->base + ss->size)
+ return 1;
+ if ((sp = sp->next) == 0)
+ return 0;
+ }
+}
+
+/*
+ TOP_FOOT_SIZE is padding at the end of a segment, including space
+ that may be needed to place segment records and fenceposts when new
+ noncontiguous segments are added.
+*/
+#define TOP_FOOT_SIZE\
+ (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
+
+/* ---------------------------- Indexing Bins ---------------------------- */
+
+#define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS)
+#define small_index(s) ((s) >> SMALLBIN_SHIFT)
+#define small_index2size(i) ((i) << SMALLBIN_SHIFT)
+#define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE))
+
+/* addressing by index. See above about smallbin repositioning */
+#define smallbin_at(M, i) ((sbinptr)((char *)&((M)->smallbins[(i)<<1])))
+#define treebin_at(M,i) (&((M)->treebins[i]))
+
+/* assign tree index for size S to variable I */
+#define compute_tree_index(S, I)\
+{\
+ unsigned int X = S >> TREEBIN_SHIFT;\
+ if (X == 0) {\
+ I = 0;\
+ } else if (X > 0xFFFF) {\
+ I = NTREEBINS-1;\
+ } else {\
+ unsigned int K = lj_fls(X);\
+ I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
+ }\
+}
+
+/* Bit representing maximum resolved size in a treebin at i */
+#define bit_for_tree_index(i) \
+ (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2)
+
+/* Shift placing maximum resolved bit in a treebin at i as sign bit */
+#define leftshift_for_tree_index(i) \
+ ((i == NTREEBINS-1)? 0 : \
+ ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2)))
+
+/* The size of the smallest chunk held in bin with index i */
+#define minsize_for_tree_index(i) \
+ ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \
+ (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1)))
+
+/* ------------------------ Operations on bin maps ----------------------- */
+
+/* bit corresponding to given index */
+#define idx2bit(i) ((binmap_t)(1) << (i))
+
+/* Mark/Clear bits with given index */
+#define mark_smallmap(M,i) ((M)->smallmap |= idx2bit(i))
+#define clear_smallmap(M,i) ((M)->smallmap &= ~idx2bit(i))
+#define smallmap_is_marked(M,i) ((M)->smallmap & idx2bit(i))
+
+#define mark_treemap(M,i) ((M)->treemap |= idx2bit(i))
+#define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i))
+#define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i))
+
+/* mask with all bits to left of least bit of x on */
+#define left_bits(x) ((x<<1) | (~(x<<1)+1))
+
+/* Set cinuse bit and pinuse bit of next chunk */
+#define set_inuse(M,p,s)\
+ ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
+ ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT)
+
+/* Set cinuse and pinuse of this chunk and pinuse of next chunk */
+#define set_inuse_and_pinuse(M,p,s)\
+ ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
+ ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT)
+
+/* Set size, cinuse and pinuse bit of this chunk */
+#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
+ ((p)->head = (s|PINUSE_BIT|CINUSE_BIT))
+
+/* ----------------------- Operations on smallbins ----------------------- */
+
+/* Link a free chunk into a smallbin */
+#define insert_small_chunk(M, P, S) {\
+ bindex_t I = small_index(S);\
+ mchunkptr B = smallbin_at(M, I);\
+ mchunkptr F = B;\
+ if (!smallmap_is_marked(M, I))\
+ mark_smallmap(M, I);\
+ else\
+ F = B->fd;\
+ B->fd = P;\
+ F->bk = P;\
+ P->fd = F;\
+ P->bk = B;\
+}
+
+/* Unlink a chunk from a smallbin */
+#define unlink_small_chunk(M, P, S) {\
+ mchunkptr F = P->fd;\
+ mchunkptr B = P->bk;\
+ bindex_t I = small_index(S);\
+ if (F == B) {\
+ clear_smallmap(M, I);\
+ } else {\
+ F->bk = B;\
+ B->fd = F;\
+ }\
+}
+
+/* Unlink the first chunk from a smallbin */
+#define unlink_first_small_chunk(M, B, P, I) {\
+ mchunkptr F = P->fd;\
+ if (B == F) {\
+ clear_smallmap(M, I);\
+ } else {\
+ B->fd = F;\
+ F->bk = B;\
+ }\
+}
+
+/* Replace dv node, binning the old one */
+/* Used only when dvsize known to be small */
+#define replace_dv(M, P, S) {\
+ size_t DVS = M->dvsize;\
+ if (DVS != 0) {\
+ mchunkptr DV = M->dv;\
+ insert_small_chunk(M, DV, DVS);\
+ }\
+ M->dvsize = S;\
+ M->dv = P;\
+}
+
+/* ------------------------- Operations on trees ------------------------- */
+
+/* Insert chunk into tree */
+#define insert_large_chunk(M, X, S) {\
+ tbinptr *H;\
+ bindex_t I;\
+ compute_tree_index(S, I);\
+ H = treebin_at(M, I);\
+ X->index = I;\
+ X->child[0] = X->child[1] = 0;\
+ if (!treemap_is_marked(M, I)) {\
+ mark_treemap(M, I);\
+ *H = X;\
+ X->parent = (tchunkptr)H;\
+ X->fd = X->bk = X;\
+ } else {\
+ tchunkptr T = *H;\
+ size_t K = S << leftshift_for_tree_index(I);\
+ for (;;) {\
+ if (chunksize(T) != S) {\
+ tchunkptr *C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\
+ K <<= 1;\
+ if (*C != 0) {\
+ T = *C;\
+ } else {\
+ *C = X;\
+ X->parent = T;\
+ X->fd = X->bk = X;\
+ break;\
+ }\
+ } else {\
+ tchunkptr F = T->fd;\
+ T->fd = F->bk = X;\
+ X->fd = F;\
+ X->bk = T;\
+ X->parent = 0;\
+ break;\
+ }\
+ }\
+ }\
+}
+
+#define unlink_large_chunk(M, X) {\
+ tchunkptr XP = X->parent;\
+ tchunkptr R;\
+ if (X->bk != X) {\
+ tchunkptr F = X->fd;\
+ R = X->bk;\
+ F->bk = R;\
+ R->fd = F;\
+ } else {\
+ tchunkptr *RP;\
+ if (((R = *(RP = &(X->child[1]))) != 0) ||\
+ ((R = *(RP = &(X->child[0]))) != 0)) {\
+ tchunkptr *CP;\
+ while ((*(CP = &(R->child[1])) != 0) ||\
+ (*(CP = &(R->child[0])) != 0)) {\
+ R = *(RP = CP);\
+ }\
+ *RP = 0;\
+ }\
+ }\
+ if (XP != 0) {\
+ tbinptr *H = treebin_at(M, X->index);\
+ if (X == *H) {\
+ if ((*H = R) == 0) \
+ clear_treemap(M, X->index);\
+ } else {\
+ if (XP->child[0] == X) \
+ XP->child[0] = R;\
+ else \
+ XP->child[1] = R;\
+ }\
+ if (R != 0) {\
+ tchunkptr C0, C1;\
+ R->parent = XP;\
+ if ((C0 = X->child[0]) != 0) {\
+ R->child[0] = C0;\
+ C0->parent = R;\
+ }\
+ if ((C1 = X->child[1]) != 0) {\
+ R->child[1] = C1;\
+ C1->parent = R;\
+ }\
+ }\
+ }\
+}
+
+/* Relays to large vs small bin operations */
+
+#define insert_chunk(M, P, S)\
+ if (is_small(S)) { insert_small_chunk(M, P, S)\
+ } else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); }
+
+#define unlink_chunk(M, P, S)\
+ if (is_small(S)) { unlink_small_chunk(M, P, S)\
+ } else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); }
+
+/* ----------------------- Direct-mmapping chunks ----------------------- */
+
+static void *direct_alloc(size_t nb)
+{
+ size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+ if (LJ_LIKELY(mmsize > nb)) { /* Check for wrap around 0 */
+ char *mm = (char *)(DIRECT_MMAP(mmsize));
+ if (mm != CMFAIL) {
+ size_t offset = align_offset(chunk2mem(mm));
+ size_t psize = mmsize - offset - DIRECT_FOOT_PAD;
+ mchunkptr p = (mchunkptr)(mm + offset);
+ p->prev_foot = offset | IS_DIRECT_BIT;
+ p->head = psize|CINUSE_BIT;
+ chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD;
+ chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0;
+ return chunk2mem(p);
+ }
+ }
+ return NULL;
+}
+
+static mchunkptr direct_resize(mchunkptr oldp, size_t nb)
+{
+ size_t oldsize = chunksize(oldp);
+ if (is_small(nb)) /* Can't shrink direct regions below small size */
+ return NULL;
+ /* Keep old chunk if big enough but not too big */
+ if (oldsize >= nb + SIZE_T_SIZE &&
+ (oldsize - nb) <= (DEFAULT_GRANULARITY << 1)) {
+ return oldp;
+ } else {
+ size_t offset = oldp->prev_foot & ~IS_DIRECT_BIT;
+ size_t oldmmsize = oldsize + offset + DIRECT_FOOT_PAD;
+ size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+ char *cp = (char *)CALL_MREMAP((char *)oldp - offset,
+ oldmmsize, newmmsize, 1);
+ if (cp != CMFAIL) {
+ mchunkptr newp = (mchunkptr)(cp + offset);
+ size_t psize = newmmsize - offset - DIRECT_FOOT_PAD;
+ newp->head = psize|CINUSE_BIT;
+ chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD;
+ chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0;
+ return newp;
+ }
+ }
+ return NULL;
+}
+
+/* -------------------------- mspace management -------------------------- */
+
+/* Initialize top chunk and its size */
+static void init_top(mstate m, mchunkptr p, size_t psize)
+{
+ /* Ensure alignment */
+ size_t offset = align_offset(chunk2mem(p));
+ p = (mchunkptr)((char *)p + offset);
+ psize -= offset;
+
+ m->top = p;
+ m->topsize = psize;
+ p->head = psize | PINUSE_BIT;
+ /* set size of fake trailing chunk holding overhead space only once */
+ chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE;
+ m->trim_check = DEFAULT_TRIM_THRESHOLD; /* reset on each update */
+}
+
+/* Initialize bins for a new mstate that is otherwise zeroed out */
+static void init_bins(mstate m)
+{
+ /* Establish circular links for smallbins */
+ bindex_t i;
+ for (i = 0; i < NSMALLBINS; i++) {
+ sbinptr bin = smallbin_at(m,i);
+ bin->fd = bin->bk = bin;
+ }
+}
+
+/* Allocate chunk and prepend remainder with chunk in successor base. */
+static void *prepend_alloc(mstate m, char *newbase, char *oldbase, size_t nb)
+{
+ mchunkptr p = align_as_chunk(newbase);
+ mchunkptr oldfirst = align_as_chunk(oldbase);
+ size_t psize = (size_t)((char *)oldfirst - (char *)p);
+ mchunkptr q = chunk_plus_offset(p, nb);
+ size_t qsize = psize - nb;
+ set_size_and_pinuse_of_inuse_chunk(m, p, nb);
+
+ /* consolidate remainder with first chunk of old base */
+ if (oldfirst == m->top) {
+ size_t tsize = m->topsize += qsize;
+ m->top = q;
+ q->head = tsize | PINUSE_BIT;
+ } else if (oldfirst == m->dv) {
+ size_t dsize = m->dvsize += qsize;
+ m->dv = q;
+ set_size_and_pinuse_of_free_chunk(q, dsize);
+ } else {
+ if (!cinuse(oldfirst)) {
+ size_t nsize = chunksize(oldfirst);
+ unlink_chunk(m, oldfirst, nsize);
+ oldfirst = chunk_plus_offset(oldfirst, nsize);
+ qsize += nsize;
+ }
+ set_free_with_pinuse(q, qsize, oldfirst);
+ insert_chunk(m, q, qsize);
+ }
+
+ return chunk2mem(p);
+}
+
+/* Add a segment to hold a new noncontiguous region */
+static void add_segment(mstate m, char *tbase, size_t tsize)
+{
+ /* Determine locations and sizes of segment, fenceposts, old top */
+ char *old_top = (char *)m->top;
+ msegmentptr oldsp = segment_holding(m, old_top);
+ char *old_end = oldsp->base + oldsp->size;
+ size_t ssize = pad_request(sizeof(struct malloc_segment));
+ char *rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
+ size_t offset = align_offset(chunk2mem(rawsp));
+ char *asp = rawsp + offset;
+ char *csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp;
+ mchunkptr sp = (mchunkptr)csp;
+ msegmentptr ss = (msegmentptr)(chunk2mem(sp));
+ mchunkptr tnext = chunk_plus_offset(sp, ssize);
+ mchunkptr p = tnext;
+
+ /* reset top to new space */
+ init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
+
+ /* Set up segment record */
+ set_size_and_pinuse_of_inuse_chunk(m, sp, ssize);
+ *ss = m->seg; /* Push current record */
+ m->seg.base = tbase;
+ m->seg.size = tsize;
+ m->seg.next = ss;
+
+ /* Insert trailing fenceposts */
+ for (;;) {
+ mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE);
+ p->head = FENCEPOST_HEAD;
+ if ((char *)(&(nextp->head)) < old_end)
+ p = nextp;
+ else
+ break;
+ }
+
+ /* Insert the rest of old top into a bin as an ordinary free chunk */
+ if (csp != old_top) {
+ mchunkptr q = (mchunkptr)old_top;
+ size_t psize = (size_t)(csp - old_top);
+ mchunkptr tn = chunk_plus_offset(q, psize);
+ set_free_with_pinuse(q, psize, tn);
+ insert_chunk(m, q, psize);
+ }
+}
+
+/* -------------------------- System allocation -------------------------- */
+
+static void *alloc_sys(mstate m, size_t nb)
+{
+ char *tbase = CMFAIL;
+ size_t tsize = 0;
+
+ /* Directly map large chunks */
+ if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) {
+ void *mem = direct_alloc(nb);
+ if (mem != 0)
+ return mem;
+ }
+
+ {
+ size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE;
+ size_t rsize = granularity_align(req);
+ if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */
+ char *mp = (char *)(CALL_MMAP(rsize));
+ if (mp != CMFAIL) {
+ tbase = mp;
+ tsize = rsize;
+ }
+ }
+ }
+
+ if (tbase != CMFAIL) {
+ msegmentptr sp = &m->seg;
+ /* Try to merge with an existing segment */
+ while (sp != 0 && tbase != sp->base + sp->size)
+ sp = sp->next;
+ if (sp != 0 && segment_holds(sp, m->top)) { /* append */
+ sp->size += tsize;
+ init_top(m, m->top, m->topsize + tsize);
+ } else {
+ sp = &m->seg;
+ while (sp != 0 && sp->base != tbase + tsize)
+ sp = sp->next;
+ if (sp != 0) {
+ char *oldbase = sp->base;
+ sp->base = tbase;
+ sp->size += tsize;
+ return prepend_alloc(m, tbase, oldbase, nb);
+ } else {
+ add_segment(m, tbase, tsize);
+ }
+ }
+
+ if (nb < m->topsize) { /* Allocate from new or extended top space */
+ size_t rsize = m->topsize -= nb;
+ mchunkptr p = m->top;
+ mchunkptr r = m->top = chunk_plus_offset(p, nb);
+ r->head = rsize | PINUSE_BIT;
+ set_size_and_pinuse_of_inuse_chunk(m, p, nb);
+ return chunk2mem(p);
+ }
+ }
+
+ return NULL;
+}
+
+/* ----------------------- system deallocation -------------------------- */
+
+/* Unmap and unlink any mmapped segments that don't contain used chunks */
+static size_t release_unused_segments(mstate m)
+{
+ size_t released = 0;
+ size_t nsegs = 0;
+ msegmentptr pred = &m->seg;
+ msegmentptr sp = pred->next;
+ while (sp != 0) {
+ char *base = sp->base;
+ size_t size = sp->size;
+ msegmentptr next = sp->next;
+ nsegs++;
+ {
+ mchunkptr p = align_as_chunk(base);
+ size_t psize = chunksize(p);
+ /* Can unmap if first chunk holds entire segment and not pinned */
+ if (!cinuse(p) && (char *)p + psize >= base + size - TOP_FOOT_SIZE) {
+ tchunkptr tp = (tchunkptr)p;
+ if (p == m->dv) {
+ m->dv = 0;
+ m->dvsize = 0;
+ } else {
+ unlink_large_chunk(m, tp);
+ }
+ if (CALL_MUNMAP(base, size) == 0) {
+ released += size;
+ /* unlink obsoleted record */
+ sp = pred;
+ sp->next = next;
+ } else { /* back out if cannot unmap */
+ insert_large_chunk(m, tp, psize);
+ }
+ }
+ }
+ pred = sp;
+ sp = next;
+ }
+ /* Reset check counter */
+ m->release_checks = nsegs > MAX_RELEASE_CHECK_RATE ?
+ nsegs : MAX_RELEASE_CHECK_RATE;
+ return released;
+}
+
+static int alloc_trim(mstate m, size_t pad)
+{
+ size_t released = 0;
+ if (pad < MAX_REQUEST && is_initialized(m)) {
+ pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */
+
+ if (m->topsize > pad) {
+ /* Shrink top space in granularity-size units, keeping at least one */
+ size_t unit = DEFAULT_GRANULARITY;
+ size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit -
+ SIZE_T_ONE) * unit;
+ msegmentptr sp = segment_holding(m, (char *)m->top);
+
+ if (sp->size >= extra &&
+ !has_segment_link(m, sp)) { /* can't shrink if pinned */
+ size_t newsize = sp->size - extra;
+ /* Prefer mremap, fall back to munmap */
+ if ((CALL_MREMAP(sp->base, sp->size, newsize, 0) != MFAIL) ||
+ (CALL_MUNMAP(sp->base + newsize, extra) == 0)) {
+ released = extra;
+ }
+ }
+
+ if (released != 0) {
+ sp->size -= released;
+ init_top(m, m->top, m->topsize - released);
+ }
+ }
+
+ /* Unmap any unused mmapped segments */
+ released += release_unused_segments(m);
+
+ /* On failure, disable autotrim to avoid repeated failed future calls */
+ if (released == 0 && m->topsize > m->trim_check)
+ m->trim_check = MAX_SIZE_T;
+ }
+
+ return (released != 0)? 1 : 0;
+}
+
+/* ---------------------------- malloc support --------------------------- */
+
+/* allocate a large request from the best fitting chunk in a treebin */
+static void *tmalloc_large(mstate m, size_t nb)
+{
+ tchunkptr v = 0;
+ size_t rsize = ~nb+1; /* Unsigned negation */
+ tchunkptr t;
+ bindex_t idx;
+ compute_tree_index(nb, idx);
+
+ if ((t = *treebin_at(m, idx)) != 0) {
+ /* Traverse tree for this bin looking for node with size == nb */
+ size_t sizebits = nb << leftshift_for_tree_index(idx);
+ tchunkptr rst = 0; /* The deepest untaken right subtree */
+ for (;;) {
+ tchunkptr rt;
+ size_t trem = chunksize(t) - nb;
+ if (trem < rsize) {
+ v = t;
+ if ((rsize = trem) == 0)
+ break;
+ }
+ rt = t->child[1];
+ t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
+ if (rt != 0 && rt != t)
+ rst = rt;
+ if (t == 0) {
+ t = rst; /* set t to least subtree holding sizes > nb */
+ break;
+ }
+ sizebits <<= 1;
+ }
+ }
+
+ if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */
+ binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap;
+ if (leftbits != 0)
+ t = *treebin_at(m, lj_ffs(leftbits));
+ }
+
+ while (t != 0) { /* find smallest of tree or subtree */
+ size_t trem = chunksize(t) - nb;
+ if (trem < rsize) {
+ rsize = trem;
+ v = t;
+ }
+ t = leftmost_child(t);
+ }
+
+ /* If dv is a better fit, return NULL so malloc will use it */
+ if (v != 0 && rsize < (size_t)(m->dvsize - nb)) {
+ mchunkptr r = chunk_plus_offset(v, nb);
+ unlink_large_chunk(m, v);
+ if (rsize < MIN_CHUNK_SIZE) {
+ set_inuse_and_pinuse(m, v, (rsize + nb));
+ } else {
+ set_size_and_pinuse_of_inuse_chunk(m, v, nb);
+ set_size_and_pinuse_of_free_chunk(r, rsize);
+ insert_chunk(m, r, rsize);
+ }
+ return chunk2mem(v);
+ }
+ return NULL;
+}
+
+/* allocate a small request from the best fitting chunk in a treebin */
+static void *tmalloc_small(mstate m, size_t nb)
+{
+ tchunkptr t, v;
+ mchunkptr r;
+ size_t rsize;
+ bindex_t i = lj_ffs(m->treemap);
+
+ v = t = *treebin_at(m, i);
+ rsize = chunksize(t) - nb;
+
+ while ((t = leftmost_child(t)) != 0) {
+ size_t trem = chunksize(t) - nb;
+ if (trem < rsize) {
+ rsize = trem;
+ v = t;
+ }
+ }
+
+ r = chunk_plus_offset(v, nb);
+ unlink_large_chunk(m, v);
+ if (rsize < MIN_CHUNK_SIZE) {
+ set_inuse_and_pinuse(m, v, (rsize + nb));
+ } else {
+ set_size_and_pinuse_of_inuse_chunk(m, v, nb);
+ set_size_and_pinuse_of_free_chunk(r, rsize);
+ replace_dv(m, r, rsize);
+ }
+ return chunk2mem(v);
+}
+
+/* ----------------------------------------------------------------------- */
+
+void *lj_alloc_create(void)
+{
+ size_t tsize = DEFAULT_GRANULARITY;
+ char *tbase = (char *)(CALL_MMAP(tsize));
+ if (tbase != CMFAIL) {
+ size_t msize = pad_request(sizeof(struct malloc_state));
+ mchunkptr mn;
+ mchunkptr msp = align_as_chunk(tbase);
+ mstate m = (mstate)(chunk2mem(msp));
+ memset(m, 0, msize);
+ msp->head = (msize|PINUSE_BIT|CINUSE_BIT);
+ m->seg.base = tbase;
+ m->seg.size = tsize;
+ m->release_checks = MAX_RELEASE_CHECK_RATE;
+ init_bins(m);
+ mn = next_chunk(mem2chunk(m));
+ init_top(m, mn, (size_t)((tbase + tsize) - (char *)mn) - TOP_FOOT_SIZE);
+ return m;
+ }
+ return NULL;
+}
+
+void lj_alloc_destroy(void *msp)
+{
+ mstate ms = (mstate)msp;
+ msegmentptr sp = &ms->seg;
+ while (sp != 0) {
+ char *base = sp->base;
+ size_t size = sp->size;
+ sp = sp->next;
+ CALL_MUNMAP(base, size);
+ }
+}
+
+static LJ_NOINLINE void *lj_alloc_malloc(void *msp, size_t nsize)
+{
+ mstate ms = (mstate)msp;
+ void *mem;
+ size_t nb;
+ if (nsize <= MAX_SMALL_REQUEST) {
+ bindex_t idx;
+ binmap_t smallbits;
+ nb = (nsize < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(nsize);
+ idx = small_index(nb);
+ smallbits = ms->smallmap >> idx;
+
+ if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
+ mchunkptr b, p;
+ idx += ~smallbits & 1; /* Uses next bin if idx empty */
+ b = smallbin_at(ms, idx);
+ p = b->fd;
+ unlink_first_small_chunk(ms, b, p, idx);
+ set_inuse_and_pinuse(ms, p, small_index2size(idx));
+ mem = chunk2mem(p);
+ return mem;
+ } else if (nb > ms->dvsize) {
+ if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
+ mchunkptr b, p, r;
+ size_t rsize;
+ binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
+ bindex_t i = lj_ffs(leftbits);
+ b = smallbin_at(ms, i);
+ p = b->fd;
+ unlink_first_small_chunk(ms, b, p, i);
+ rsize = small_index2size(i) - nb;
+ /* Fit here cannot be remainderless if 4byte sizes */
+ if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) {
+ set_inuse_and_pinuse(ms, p, small_index2size(i));
+ } else {
+ set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+ r = chunk_plus_offset(p, nb);
+ set_size_and_pinuse_of_free_chunk(r, rsize);
+ replace_dv(ms, r, rsize);
+ }
+ mem = chunk2mem(p);
+ return mem;
+ } else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) {
+ return mem;
+ }
+ }
+ } else if (nsize >= MAX_REQUEST) {
+ nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
+ } else {
+ nb = pad_request(nsize);
+ if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) {
+ return mem;
+ }
+ }
+
+ if (nb <= ms->dvsize) {
+ size_t rsize = ms->dvsize - nb;
+ mchunkptr p = ms->dv;
+ if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
+ mchunkptr r = ms->dv = chunk_plus_offset(p, nb);
+ ms->dvsize = rsize;
+ set_size_and_pinuse_of_free_chunk(r, rsize);
+ set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+ } else { /* exhaust dv */
+ size_t dvs = ms->dvsize;
+ ms->dvsize = 0;
+ ms->dv = 0;
+ set_inuse_and_pinuse(ms, p, dvs);
+ }
+ mem = chunk2mem(p);
+ return mem;
+ } else if (nb < ms->topsize) { /* Split top */
+ size_t rsize = ms->topsize -= nb;
+ mchunkptr p = ms->top;
+ mchunkptr r = ms->top = chunk_plus_offset(p, nb);
+ r->head = rsize | PINUSE_BIT;
+ set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
+ mem = chunk2mem(p);
+ return mem;
+ }
+ return alloc_sys(ms, nb);
+}
+
+static LJ_NOINLINE void *lj_alloc_free(void *msp, void *ptr)
+{
+ if (ptr != 0) {
+ mchunkptr p = mem2chunk(ptr);
+ mstate fm = (mstate)msp;
+ size_t psize = chunksize(p);
+ mchunkptr next = chunk_plus_offset(p, psize);
+ if (!pinuse(p)) {
+ size_t prevsize = p->prev_foot;
+ if ((prevsize & IS_DIRECT_BIT) != 0) {
+ prevsize &= ~IS_DIRECT_BIT;
+ psize += prevsize + DIRECT_FOOT_PAD;
+ CALL_MUNMAP((char *)p - prevsize, psize);
+ return NULL;
+ } else {
+ mchunkptr prev = chunk_minus_offset(p, prevsize);
+ psize += prevsize;
+ p = prev;
+ /* consolidate backward */
+ if (p != fm->dv) {
+ unlink_chunk(fm, p, prevsize);
+ } else if ((next->head & INUSE_BITS) == INUSE_BITS) {
+ fm->dvsize = psize;
+ set_free_with_pinuse(p, psize, next);
+ return NULL;
+ }
+ }
+ }
+ if (!cinuse(next)) { /* consolidate forward */
+ if (next == fm->top) {
+ size_t tsize = fm->topsize += psize;
+ fm->top = p;
+ p->head = tsize | PINUSE_BIT;
+ if (p == fm->dv) {
+ fm->dv = 0;
+ fm->dvsize = 0;
+ }
+ if (tsize > fm->trim_check)
+ alloc_trim(fm, 0);
+ return NULL;
+ } else if (next == fm->dv) {
+ size_t dsize = fm->dvsize += psize;
+ fm->dv = p;
+ set_size_and_pinuse_of_free_chunk(p, dsize);
+ return NULL;
+ } else {
+ size_t nsize = chunksize(next);
+ psize += nsize;
+ unlink_chunk(fm, next, nsize);
+ set_size_and_pinuse_of_free_chunk(p, psize);
+ if (p == fm->dv) {
+ fm->dvsize = psize;
+ return NULL;
+ }
+ }
+ } else {
+ set_free_with_pinuse(p, psize, next);
+ }
+
+ if (is_small(psize)) {
+ insert_small_chunk(fm, p, psize);
+ } else {
+ tchunkptr tp = (tchunkptr)p;
+ insert_large_chunk(fm, tp, psize);
+ if (--fm->release_checks == 0)
+ release_unused_segments(fm);
+ }
+ }
+ return NULL;
+}
+
+static LJ_NOINLINE void *lj_alloc_realloc(void *msp, void *ptr, size_t nsize)
+{
+ if (nsize >= MAX_REQUEST) {
+ return NULL;
+ } else {
+ mstate m = (mstate)msp;
+ mchunkptr oldp = mem2chunk(ptr);
+ size_t oldsize = chunksize(oldp);
+ mchunkptr next = chunk_plus_offset(oldp, oldsize);
+ mchunkptr newp = 0;
+ size_t nb = request2size(nsize);
+
+ /* Try to either shrink or extend into top. Else malloc-copy-free */
+ if (is_direct(oldp)) {
+ newp = direct_resize(oldp, nb); /* this may return NULL. */
+ } else if (oldsize >= nb) { /* already big enough */
+ size_t rsize = oldsize - nb;
+ newp = oldp;
+ if (rsize >= MIN_CHUNK_SIZE) {
+ mchunkptr remainder = chunk_plus_offset(newp, nb);
+ set_inuse(m, newp, nb);
+ set_inuse(m, remainder, rsize);
+ lj_alloc_free(m, chunk2mem(remainder));
+ }
+ } else if (next == m->top && oldsize + m->topsize > nb) {
+ /* Expand into top */
+ size_t newsize = oldsize + m->topsize;
+ size_t newtopsize = newsize - nb;
+ mchunkptr newtop = chunk_plus_offset(oldp, nb);
+ set_inuse(m, oldp, nb);
+ newtop->head = newtopsize |PINUSE_BIT;
+ m->top = newtop;
+ m->topsize = newtopsize;
+ newp = oldp;
+ }
+
+ if (newp != 0) {
+ return chunk2mem(newp);
+ } else {
+ void *newmem = lj_alloc_malloc(m, nsize);
+ if (newmem != 0) {
+ size_t oc = oldsize - overhead_for(oldp);
+ memcpy(newmem, ptr, oc < nsize ? oc : nsize);
+ lj_alloc_free(m, ptr);
+ }
+ return newmem;
+ }
+ }
+}
+
+void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize)
+{
+ (void)osize;
+ if (nsize == 0) {
+ return lj_alloc_free(msp, ptr);
+ } else if (ptr == NULL) {
+ return lj_alloc_malloc(msp, nsize);
+ } else {
+ return lj_alloc_realloc(msp, ptr, nsize);
+ }
+}
+
+#endif
diff --git a/src/lj_alloc.h b/src/lj_alloc.h
new file mode 100644
index 00000000..f87a7cf3
--- /dev/null
+++ b/src/lj_alloc.h
@@ -0,0 +1,17 @@
+/*
+** Bundled memory allocator.
+** Donated to the public domain.
+*/
+
+#ifndef _LJ_ALLOC_H
+#define _LJ_ALLOC_H
+
+#include "lj_def.h"
+
+#ifndef LUAJIT_USE_SYSMALLOC
+LJ_FUNC void *lj_alloc_create(void);
+LJ_FUNC void lj_alloc_destroy(void *msp);
+LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize);
+#endif
+
+#endif
diff --git a/src/lj_api.c b/src/lj_api.c
new file mode 100644
index 00000000..ea4eaf66
--- /dev/null
+++ b/src/lj_api.c
@@ -0,0 +1,1046 @@
+/*
+** Public Lua/C API.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#define lj_api_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_func.h"
+#include "lj_udata.h"
+#include "lj_meta.h"
+#include "lj_state.h"
+#include "lj_frame.h"
+#include "lj_trace.h"
+#include "lj_vm.h"
+#include "lj_lex.h"
+#include "lj_parse.h"
+
+/* -- Common helper functions --------------------------------------------- */
+
+#define api_checknelems(L, n) api_check(L, (n) <= (L->top - L->base))
+#define api_checkvalidindex(L, i) api_check(L, (i) != niltv(L))
+
+static TValue *index2adr(lua_State *L, int idx)
+{
+ if (idx > 0) {
+ TValue *o = L->base + (idx - 1);
+ return o < L->top ? o : niltv(L);
+ } else if (idx > LUA_REGISTRYINDEX) {
+ api_check(L, idx != 0 && -idx <= L->top - L->base);
+ return L->top + idx;
+ } else if (idx == LUA_GLOBALSINDEX) {
+ TValue *o = &G(L)->tmptv;
+ settabV(L, o, tabref(L->env));
+ return o;
+ } else if (idx == LUA_REGISTRYINDEX) {
+ return registry(L);
+ } else {
+ GCfunc *fn = curr_func(L);
+ api_check(L, fn->c.gct == ~LJ_TFUNC && !isluafunc(fn));
+ if (idx == LUA_ENVIRONINDEX) {
+ TValue *o = &G(L)->tmptv;
+ settabV(L, o, tabref(fn->c.env));
+ return o;
+ } else {
+ idx = LUA_GLOBALSINDEX - idx;
+ return idx <= fn->c.nupvalues ? &fn->c.upvalue[idx-1] : niltv(L);
+ }
+ }
+}
+
+static TValue *stkindex2adr(lua_State *L, int idx)
+{
+ if (idx > 0) {
+ TValue *o = L->base + (idx - 1);
+ return o < L->top ? o : niltv(L);
+ } else {
+ api_check(L, idx != 0 && -idx <= L->top - L->base);
+ return L->top + idx;
+ }
+}
+
+static GCtab *getcurrenv(lua_State *L)
+{
+ GCfunc *fn = curr_func(L);
+ return fn->c.gct == ~LJ_TFUNC ? tabref(fn->c.env) : tabref(L->env);
+}
+
+/* -- Miscellaneous API functions ----------------------------------------- */
+
+LUA_API int lua_status(lua_State *L)
+{
+ return L->status;
+}
+
+LUA_API int lua_checkstack(lua_State *L, int size)
+{
+ if (size > LUAI_MAXCSTACK || (L->top - L->base + size) > LUAI_MAXCSTACK) {
+ return 0; /* Stack overflow. */
+ } else if (size > 0) {
+ lj_state_checkstack(L, (MSize)size);
+ }
+ return 1;
+}
+
+LUA_API void lua_xmove(lua_State *from, lua_State *to, int n)
+{
+ TValue *f, *t;
+ if (from == to) return;
+ api_checknelems(from, n);
+ api_check(from, G(from) == G(to));
+ lj_state_checkstack(to, (MSize)n);
+ f = from->top;
+ t = to->top = to->top + n;
+ while (--n >= 0) copyTV(to, --t, --f);
+ from->top = f;
+}
+
+/* -- Stack manipulation -------------------------------------------------- */
+
+LUA_API int lua_gettop(lua_State *L)
+{
+ return cast_int(L->top - L->base);
+}
+
+LUA_API void lua_settop(lua_State *L, int idx)
+{
+ if (idx >= 0) {
+ api_check(L, idx <= L->maxstack - L->base);
+ if (L->base + idx > L->top) {
+ if (L->base + idx >= L->maxstack)
+ lj_state_growstack(L, (MSize)idx - (MSize)(L->top - L->base));
+ do { setnilV(L->top++); } while (L->top < L->base + idx);
+ } else {
+ L->top = L->base + idx;
+ }
+ } else {
+ api_check(L, -(idx+1) <= (L->top - L->base));
+ L->top += idx+1; /* Shrinks top (idx < 0). */
+ }
+}
+
+LUA_API void lua_remove(lua_State *L, int idx)
+{
+ TValue *p = stkindex2adr(L, idx);
+ api_checkvalidindex(L, p);
+ while (++p < L->top) copyTV(L, p-1, p);
+ L->top--;
+}
+
+LUA_API void lua_insert(lua_State *L, int idx)
+{
+ TValue *q, *p = stkindex2adr(L, idx);
+ api_checkvalidindex(L, p);
+ for (q = L->top; q > p; q--) copyTV(L, q, q-1);
+ copyTV(L, p, L->top);
+}
+
+LUA_API void lua_replace(lua_State *L, int idx)
+{
+ api_checknelems(L, 1);
+ if (idx == LUA_GLOBALSINDEX) {
+ api_check(L, tvistab(L->top-1));
+ /* NOBARRIER: A thread (i.e. L) is never black. */
+ setgcref(L->env, obj2gco(tabV(L->top-1)));
+ } else if (idx == LUA_ENVIRONINDEX) {
+ GCfunc *fn = curr_func(L);
+ if (fn->c.gct != ~LJ_TFUNC)
+ lj_err_msg(L, LJ_ERR_NOENV);
+ api_check(L, tvistab(L->top-1));
+ setgcref(fn->c.env, obj2gco(tabV(L->top-1)));
+ lj_gc_barrier(L, fn, L->top-1);
+ } else {
+ TValue *o = index2adr(L, idx);
+ api_checkvalidindex(L, o);
+ copyTV(L, o, L->top-1);
+ if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */
+ lj_gc_barrier(L, curr_func(L), L->top-1);
+ }
+ L->top--;
+}
+
+LUA_API void lua_pushvalue(lua_State *L, int idx)
+{
+ copyTV(L, L->top, index2adr(L, idx));
+ incr_top(L);
+}
+
+/* -- Stack getters ------------------------------------------------------- */
+
+LUA_API int lua_type(lua_State *L, int idx)
+{
+ cTValue *o = index2adr(L, idx);
+ if (tvisnum(o)) {
+ return LUA_TNUMBER;
+#if LJ_64
+ } else if (tvislightud(o)) {
+ return LUA_TLIGHTUSERDATA;
+#endif
+ } else if (o == niltv(L)) {
+ return LUA_TNONE;
+ } else { /* Magic internal/external tag conversion. ORDER LJ_T */
+ int t = ~itype(o);
+ return (int)(((t < 8 ? 0x98a42110 : 0x75b6) >> 4*(t&7)) & 15u);
+ }
+}
+
+LUA_API const char *lua_typename(lua_State *L, int t)
+{
+ UNUSED(L);
+ return lj_obj_typename[t+1];
+}
+
+LUA_API int lua_iscfunction(lua_State *L, int idx)
+{
+ cTValue *o = index2adr(L, idx);
+ return !isluafunc(funcV(o));
+}
+
+LUA_API int lua_isnumber(lua_State *L, int idx)
+{
+ cTValue *o = index2adr(L, idx);
+ TValue tmp;
+ return (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp)));
+}
+
+LUA_API int lua_isstring(lua_State *L, int idx)
+{
+ cTValue *o = index2adr(L, idx);
+ return (tvisstr(o) || tvisnum(o));
+}
+
+LUA_API int lua_isuserdata(lua_State *L, int idx)
+{
+ cTValue *o = index2adr(L, idx);
+ return (tvisudata(o) || tvislightud(o));
+}
+
+LUA_API int lua_rawequal(lua_State *L, int idx1, int idx2)
+{
+ cTValue *o1 = index2adr(L, idx1);
+ cTValue *o2 = index2adr(L, idx2);
+ return (o1 == niltv(L) || o2 == niltv(L)) ? 0 : lj_obj_equal(o1, o2);
+}
+
+LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
+{
+ cTValue *o1 = index2adr(L, idx1);
+ cTValue *o2 = index2adr(L, idx2);
+ if (tvisnum(o1) && tvisnum(o2)) {
+ return numV(o1) == numV(o2);
+ } else if (itype(o1) != itype(o2)) {
+ return 0;
+ } else if (tvispri(o1)) {
+ return o1 != niltv(L) && o2 != niltv(L);
+#if LJ_64
+ } else if (tvislightud(o1)) {
+ return o1->u64 == o2->u64;
+#endif
+ } else if (gcrefeq(o1->gcr, o2->gcr)) {
+ return 1;
+ } else if (!tvistabud(o1)) {
+ return 0;
+ } else {
+ TValue *base = lj_meta_equal(L, gcV(o1), gcV(o2), 0);
+ if ((uintptr_t)base <= 1) {
+ return (int)(uintptr_t)base;
+ } else {
+ L->top = base+2;
+ lj_vm_call(L, base, 1+1);
+ L->top -= 2;
+ return tvistruecond(L->top+1);
+ }
+ }
+}
+
+LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2)
+{
+ cTValue *o1 = index2adr(L, idx1);
+ cTValue *o2 = index2adr(L, idx2);
+ if (o1 == niltv(L) || o2 == niltv(L)) {
+ return 0;
+ } else if (tvisnum(o1) && tvisnum(o2)) {
+ return numV(o1) < numV(o2);
+ } else {
+ TValue *base = lj_meta_comp(L, o1, o2, 0);
+ if ((uintptr_t)base <= 1) {
+ return (int)(uintptr_t)base;
+ } else {
+ L->top = base+2;
+ lj_vm_call(L, base, 1+1);
+ L->top -= 2;
+ return tvistruecond(L->top+1);
+ }
+ }
+}
+
+LUA_API lua_Number lua_tonumber(lua_State *L, int idx)
+{
+ cTValue *o = index2adr(L, idx);
+ TValue tmp;
+ if (LJ_LIKELY(tvisnum(o)))
+ return numV(o);
+ else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))
+ return numV(&tmp);
+ else
+ return 0;
+}
+
+LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
+{
+ cTValue *o = index2adr(L, idx);
+ TValue tmp;
+ lua_Number n;
+ if (LJ_LIKELY(tvisnum(o)))
+ n = numV(o);
+ else if (tvisstr(o) && lj_str_numconv(strVdata(o), &tmp))
+ n = numV(&tmp);
+ else
+ return 0;
+#if LJ_64
+ return (lua_Integer)n;
+#else
+ return lj_num2int(n);
+#endif
+}
+
+LUA_API int lua_toboolean(lua_State *L, int idx)
+{
+ cTValue *o = index2adr(L, idx);
+ return tvistruecond(o);
+}
+
+LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len)
+{
+ TValue *o = index2adr(L, idx);
+ GCstr *s;
+ if (LJ_LIKELY(tvisstr(o))) {
+ s = strV(o);
+ } else if (tvisnum(o)) {
+ lj_gc_check(L);
+ o = index2adr(L, idx); /* GC may move the stack. */
+ s = lj_str_fromnum(L, &o->n);
+ } else {
+ if (len != NULL) *len = 0;
+ return NULL;
+ }
+ if (len != NULL) *len = s->len;
+ return strdata(s);
+}
+
+LUA_API size_t lua_objlen(lua_State *L, int idx)
+{
+ TValue *o = index2adr(L, idx);
+ if (tvisstr(o))
+ return strV(o)->len;
+ else if (tvistab(o))
+ return cast(size_t, lj_tab_len(tabV(o)));
+ else if (tvisudata(o))
+ return udataV(o)->len;
+ else if (tvisnum(o))
+ return lj_str_fromnum(L, &o->n)->len;
+ else
+ return 0;
+}
+
+LUA_API lua_CFunction lua_tocfunction(lua_State *L, int idx)
+{
+ cTValue *o = index2adr(L, idx);
+ return funcV(o)->c.gate == lj_gate_c ? funcV(o)->c.f : NULL;
+}
+
+LUA_API void *lua_touserdata(lua_State *L, int idx)
+{
+ cTValue *o = index2adr(L, idx);
+ if (tvisudata(o))
+ return uddata(udataV(o));
+ else if (tvislightud(o))
+ return lightudV(o);
+ else
+ return NULL;
+}
+
+LUA_API lua_State *lua_tothread(lua_State *L, int idx)
+{
+ cTValue *o = index2adr(L, idx);
+ return (!tvisthread(o)) ? NULL : threadV(o);
+}
+
+LUA_API const void *lua_topointer(lua_State *L, int idx)
+{
+ cTValue *o = index2adr(L, idx);
+ if (tvisudata(o))
+ return uddata(udataV(o));
+ else if (tvislightud(o))
+ return lightudV(o);
+ else if (tvisgcv(o))
+ return gcV(o);
+ else
+ return NULL;
+}
+
+/* -- Stack setters (object creation) ------------------------------------- */
+
+LUA_API void lua_pushnil(lua_State *L)
+{
+ setnilV(L->top);
+ incr_top(L);
+}
+
+LUA_API void lua_pushnumber(lua_State *L, lua_Number n)
+{
+ setnumV(L->top, n);
+ if (LJ_UNLIKELY(tvisnan(L->top)))
+ setnanV(L->top); /* Canonicalize injected NaNs. */
+ incr_top(L);
+}
+
+LUA_API void lua_pushinteger(lua_State *L, lua_Integer n)
+{
+ setnumV(L->top, cast_num(n));
+ incr_top(L);
+}
+
+LUA_API void lua_pushlstring(lua_State *L, const char *str, size_t len)
+{
+ GCstr *s;
+ lj_gc_check(L);
+ s = lj_str_new(L, str, len);
+ setstrV(L, L->top, s);
+ incr_top(L);
+}
+
+LUA_API void lua_pushstring(lua_State *L, const char *str)
+{
+ if (str == NULL) {
+ setnilV(L->top);
+ } else {
+ GCstr *s;
+ lj_gc_check(L);
+ s = lj_str_newz(L, str);
+ setstrV(L, L->top, s);
+ }
+ incr_top(L);
+}
+
+LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt,
+ va_list argp)
+{
+ lj_gc_check(L);
+ return lj_str_pushvf(L, fmt, argp);
+}
+
+LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
+{
+ const char *ret;
+ va_list argp;
+ lj_gc_check(L);
+ va_start(argp, fmt);
+ ret = lj_str_pushvf(L, fmt, argp);
+ va_end(argp);
+ return ret;
+}
+
+LUA_API void lua_pushcclosure(lua_State *L, lua_CFunction f, int n)
+{
+ GCfunc *fn;
+ lj_gc_check(L);
+ api_checknelems(L, n);
+ fn = lj_func_newC(L, (MSize)n, getcurrenv(L));
+ fn->c.f = f;
+ L->top -= n;
+ while (n--)
+ copyTV(L, &fn->c.upvalue[n], L->top+n);
+ setfuncV(L, L->top, fn);
+ lua_assert(iswhite(obj2gco(fn)));
+ incr_top(L);
+}
+
+LUA_API void lua_pushboolean(lua_State *L, int b)
+{
+ setboolV(L->top, (b != 0));
+ incr_top(L);
+}
+
+LUA_API void lua_pushlightuserdata(lua_State *L, void *p)
+{
+ setlightudV(L->top, checklightudptr(L, p));
+ incr_top(L);
+}
+
+LUA_API void lua_createtable(lua_State *L, int narray, int nrec)
+{
+ GCtab *t;
+ lj_gc_check(L);
+ t = lj_tab_new(L, (uint32_t)(narray > 0 ? narray+1 : 0), hsize2hbits(nrec));
+ settabV(L, L->top, t);
+ incr_top(L);
+}
+
+LUALIB_API int luaL_newmetatable(lua_State *L, const char *tname)
+{
+ GCtab *regt = tabV(registry(L));
+ TValue *tv = lj_tab_setstr(L, regt, lj_str_newz(L, tname));
+ if (tvisnil(tv)) {
+ GCtab *mt = lj_tab_new(L, 0, 1);
+ settabV(L, tv, mt);
+ settabV(L, L->top++, mt);
+ lj_gc_objbarriert(L, regt, mt);
+ return 1;
+ } else {
+ copyTV(L, L->top++, tv);
+ return 0;
+ }
+}
+
+LUA_API int lua_pushthread(lua_State *L)
+{
+ setthreadV(L, L->top, L);
+ incr_top(L);
+ return (mainthread(G(L)) == L);
+}
+
+LUA_API lua_State *lua_newthread(lua_State *L)
+{
+ lua_State *L1;
+ lj_gc_check(L);
+ L1 = lj_state_new(L);
+ setthreadV(L, L->top, L1);
+ incr_top(L);
+ return L1;
+}
+
+LUA_API void *lua_newuserdata(lua_State *L, size_t size)
+{
+ GCudata *ud;
+ lj_gc_check(L);
+ if (size > LJ_MAX_UDATA)
+ lj_err_msg(L, LJ_ERR_UDATAOV);
+ ud = lj_udata_new(L, (MSize)size, getcurrenv(L));
+ setudataV(L, L->top, ud);
+ incr_top(L);
+ return uddata(ud);
+}
+
+LUA_API void lua_concat(lua_State *L, int n)
+{
+ api_checknelems(L, n);
+ if (n >= 2) {
+ n--;
+ do {
+ TValue *top = lj_meta_cat(L, L->top-1, n);
+ if (top == NULL) {
+ L->top -= n;
+ break;
+ }
+ n -= cast_int(L->top - top);
+ L->top = top+2;
+ lj_vm_call(L, top, 1+1);
+ L->top--;
+ copyTV(L, L->top-1, L->top);
+ } while (--n > 0);
+ } else if (n == 0) { /* Push empty string. */
+ setstrV(L, L->top, lj_str_new(L, "", 0));
+ incr_top(L);
+ }
+ /* else n == 1: nothing to do. */
+}
+
+/* -- Object getters ------------------------------------------------------ */
+
+LUA_API void lua_gettable(lua_State *L, int idx)
+{
+ cTValue *v, *t = index2adr(L, idx);
+ api_checkvalidindex(L, t);
+ v = lj_meta_tget(L, t, L->top-1);
+ if (v == NULL) {
+ L->top += 2;
+ lj_vm_call(L, L->top-2, 1+1);
+ L->top -= 2;
+ v = L->top+1;
+ }
+ copyTV(L, L->top-1, v);
+}
+
+LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
+{
+ cTValue *v, *t = index2adr(L, idx);
+ TValue key;
+ api_checkvalidindex(L, t);
+ setstrV(L, &key, lj_str_newz(L, k));
+ v = lj_meta_tget(L, t, &key);
+ if (v == NULL) {
+ L->top += 2;
+ lj_vm_call(L, L->top-2, 1+1);
+ L->top -= 2;
+ v = L->top+1;
+ }
+ copyTV(L, L->top, v);
+ incr_top(L);
+}
+
+LUA_API void lua_rawget(lua_State *L, int idx)
+{
+ cTValue *t = index2adr(L, idx);
+ api_check(L, tvistab(t));
+ copyTV(L, L->top-1, lj_tab_get(L, tabV(t), L->top-1));
+}
+
+LUA_API void lua_rawgeti(lua_State *L, int idx, int n)
+{
+ cTValue *v, *t = index2adr(L, idx);
+ api_check(L, tvistab(t));
+ v = lj_tab_getint(tabV(t), n);
+ if (v) {
+ copyTV(L, L->top, v);
+ } else {
+ setnilV(L->top);
+ }
+ incr_top(L);
+}
+
+LUA_API int lua_getmetatable(lua_State *L, int idx)
+{
+ cTValue *o = index2adr(L, idx);
+ GCtab *mt = NULL;
+ if (tvistab(o))
+ mt = tabref(tabV(o)->metatable);
+ else if (tvisudata(o))
+ mt = tabref(udataV(o)->metatable);
+ else
+ mt = tabref(G(L)->basemt[itypemap(o)]);
+ if (mt == NULL)
+ return 0;
+ settabV(L, L->top, mt);
+ incr_top(L);
+ return 1;
+}
+
+LUALIB_API int luaL_getmetafield(lua_State *L, int idx, const char *field)
+{
+ if (lua_getmetatable(L, idx)) {
+ cTValue *tv = lj_tab_getstr(tabV(L->top-1), lj_str_newz(L, field));
+ if (tv && !tvisnil(tv)) {
+ copyTV(L, L->top-1, tv);
+ return 1;
+ }
+ L->top--;
+ }
+ return 0;
+}
+
+LUA_API void lua_getfenv(lua_State *L, int idx)
+{
+ cTValue *o = index2adr(L, idx);
+ api_checkvalidindex(L, o);
+ if (tvisfunc(o)) {
+ settabV(L, L->top, tabref(funcV(o)->c.env));
+ } else if (tvisudata(o)) {
+ settabV(L, L->top, tabref(udataV(o)->env));
+ } else if (tvisthread(o)) {
+ settabV(L, L->top, tabref(threadV(o)->env));
+ } else {
+ setnilV(L->top);
+ }
+ incr_top(L);
+}
+
+LUA_API int lua_next(lua_State *L, int idx)
+{
+ cTValue *t = index2adr(L, idx);
+ int more;
+ api_check(L, tvistab(t));
+ more = lj_tab_next(L, tabV(t), L->top-1);
+ if (more) {
+ incr_top(L); /* Return new key and value slot. */
+ } else { /* End of traversal. */
+ L->top--; /* Remove key slot. */
+ }
+ return more;
+}
+
+static const char *aux_upvalue(cTValue *f, uint32_t idx, TValue **val)
+{
+ GCfunc *fn;
+ if (!tvisfunc(f)) return NULL;
+ fn = funcV(f);
+ if (isluafunc(fn)) {
+ GCproto *pt = funcproto(fn);
+ if (idx < pt->sizeuvname) {
+ *val = gcref(fn->l.uvptr[idx])->uv.v;
+ return strdata(pt->uvname[idx]);
+ }
+ } else {
+ if (idx < fn->c.nupvalues) {
+ *val = &fn->c.upvalue[idx];
+ return "";
+ }
+ }
+ return NULL;
+}
+
+LUA_API const char *lua_getupvalue(lua_State *L, int idx, int n)
+{
+ TValue *val;
+ const char *name = aux_upvalue(index2adr(L, idx), (uint32_t)(n-1), &val);
+ if (name) {
+ copyTV(L, L->top, val);
+ incr_top(L);
+ }
+ return name;
+}
+
+LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
+{
+ cTValue *o = index2adr(L, idx);
+ if (tvisudata(o)) {
+ GCudata *ud = udataV(o);
+ cTValue *tv = lj_tab_getstr(tabV(registry(L)), lj_str_newz(L, tname));
+ if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable))
+ return uddata(ud);
+ }
+ lj_err_argtype(L, idx, tname);
+ return NULL; /* unreachable */
+}
+
+/* -- Object setters ------------------------------------------------------ */
+
+LUA_API void lua_settable(lua_State *L, int idx)
+{
+ TValue *o;
+ cTValue *t = index2adr(L, idx);
+ api_checknelems(L, 2);
+ api_checkvalidindex(L, t);
+ o = lj_meta_tset(L, t, L->top-2);
+ if (o) {
+ /* NOBARRIER: lj_meta_tset ensures the table is not black. */
+ copyTV(L, o, L->top-1);
+ L->top -= 2;
+ } else {
+ L->top += 3;
+ copyTV(L, L->top-1, L->top-6);
+ lj_vm_call(L, L->top-3, 0+1);
+ L->top -= 3;
+ }
+}
+
+LUA_API void lua_setfield(lua_State *L, int idx, const char *k)
+{
+ TValue *o;
+ TValue key;
+ cTValue *t = index2adr(L, idx);
+ api_checknelems(L, 1);
+ api_checkvalidindex(L, t);
+ setstrV(L, &key, lj_str_newz(L, k));
+ o = lj_meta_tset(L, t, &key);
+ if (o) {
+ L->top--;
+ /* NOBARRIER: lj_meta_tset ensures the table is not black. */
+ copyTV(L, o, L->top);
+ } else {
+ L->top += 3;
+ copyTV(L, L->top-1, L->top-6);
+ lj_vm_call(L, L->top-3, 0+1);
+ L->top -= 2;
+ }
+}
+
+LUA_API void lua_rawset(lua_State *L, int idx)
+{
+ GCtab *t = tabV(index2adr(L, idx));
+ TValue *dst, *key;
+ api_checknelems(L, 2);
+ key = L->top-2;
+ dst = lj_tab_set(L, t, key);
+ copyTV(L, dst, key+1);
+ lj_gc_barriert(L, t, dst);
+ L->top = key;
+}
+
+LUA_API void lua_rawseti(lua_State *L, int idx, int n)
+{
+ GCtab *t = tabV(index2adr(L, idx));
+ TValue *dst, *src;
+ api_checknelems(L, 1);
+ dst = lj_tab_setint(L, t, n);
+ src = L->top-1;
+ copyTV(L, dst, src);
+ lj_gc_barriert(L, t, dst);
+ L->top = src;
+}
+
+LUA_API int lua_setmetatable(lua_State *L, int idx)
+{
+ global_State *g;
+ GCtab *mt;
+ cTValue *o = index2adr(L, idx);
+ api_checknelems(L, 1);
+ api_checkvalidindex(L, o);
+ if (tvisnil(L->top-1)) {
+ mt = NULL;
+ } else {
+ api_check(L, tvistab(L->top-1));
+ mt = tabV(L->top-1);
+ }
+ g = G(L);
+ if (tvistab(o)) {
+ setgcref(tabV(o)->metatable, obj2gco(mt));
+ if (mt)
+ lj_gc_objbarriert(L, tabV(o), mt);
+ } else if (tvisudata(o)) {
+ setgcref(udataV(o)->metatable, obj2gco(mt));
+ if (mt)
+ lj_gc_objbarrier(L, udataV(o), mt);
+ } else {
+ /* Flush cache, since traces specialize to basemt. But not during __gc. */
+ if (lj_trace_flushall(L))
+ lj_err_caller(L, LJ_ERR_NOGCMM);
+ if (tvisbool(o)) {
+ /* NOBARRIER: g->basemt[] is a GC root. */
+ setgcref(g->basemt[~LJ_TTRUE], obj2gco(mt));
+ setgcref(g->basemt[~LJ_TFALSE], obj2gco(mt));
+ } else {
+ /* NOBARRIER: g->basemt[] is a GC root. */
+ setgcref(g->basemt[itypemap(o)], obj2gco(mt));
+ }
+ }
+ L->top--;
+ return 1;
+}
+
+LUA_API int lua_setfenv(lua_State *L, int idx)
+{
+ cTValue *o = index2adr(L, idx);
+ GCtab *t;
+ api_checknelems(L, 1);
+ api_checkvalidindex(L, o);
+ api_check(L, tvistab(L->top-1));
+ t = tabV(L->top-1);
+ if (tvisfunc(o)) {
+ setgcref(funcV(o)->c.env, obj2gco(t));
+ } else if (tvisudata(o)) {
+ setgcref(udataV(o)->env, obj2gco(t));
+ } else if (tvisthread(o)) {
+ setgcref(threadV(o)->env, obj2gco(t));
+ } else {
+ L->top--;
+ return 0;
+ }
+ lj_gc_objbarrier(L, gcV(o), t);
+ L->top--;
+ return 1;
+}
+
+LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n)
+{
+ cTValue *f = index2adr(L, idx);
+ TValue *val;
+ const char *name;
+ api_checknelems(L, 1);
+ name = aux_upvalue(f, (uint32_t)(n-1), &val);
+ if (name) {
+ L->top--;
+ copyTV(L, val, L->top);
+ lj_gc_barrier(L, funcV(f), L->top);
+ }
+ return name;
+}
+
+/* -- Calls --------------------------------------------------------------- */
+
+LUA_API void lua_call(lua_State *L, int nargs, int nresults)
+{
+ api_checknelems(L, nargs+1);
+ lj_vm_call(L, L->top - nargs, nresults+1);
+}
+
+LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
+{
+ global_State *g = G(L);
+ uint8_t oldh = hook_save(g);
+ ptrdiff_t ef;
+ int status;
+ api_checknelems(L, nargs+1);
+ if (errfunc == 0) {
+ ef = 0;
+ } else {
+ cTValue *o = stkindex2adr(L, errfunc);
+ api_checkvalidindex(L, o);
+ ef = savestack(L, o);
+ }
+ status = lj_vm_pcall(L, L->top - nargs, nresults+1, ef);
+ if (status) hook_restore(g, oldh);
+ return status;
+}
+
+static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
+{
+ GCfunc *fn;
+ fn = lj_func_newC(L, 0, getcurrenv(L));
+ fn->c.f = func;
+ setfuncV(L, L->top, fn);
+ setlightudV(L->top+1, checklightudptr(L, ud));
+ cframe_nres(L->cframe) = 1+0; /* Zero results. */
+ L->top += 2;
+ return L->top-1; /* Now call the newly allocated C function. */
+}
+
+LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
+{
+ global_State *g = G(L);
+ uint8_t oldh = hook_save(g);
+ int status = lj_vm_cpcall(L, cpcall, func, ud);
+ if (status) hook_restore(g, oldh);
+ return status;
+}
+
+LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
+{
+ if (luaL_getmetafield(L, idx, field)) {
+ TValue *base = L->top--;
+ copyTV(L, base, index2adr(L, idx));
+ L->top = base+1;
+ lj_vm_call(L, base, 1+1);
+ return 1;
+ }
+ return 0;
+}
+
+/* -- Coroutine yield and resume ------------------------------------------ */
+
+LUA_API int lua_yield(lua_State *L, int nresults)
+{
+ void *cf = L->cframe;
+ cTValue *f;
+ if (!cframe_canyield(cf))
+ lj_err_msg(L, LJ_ERR_CYIELD);
+ f = L->top - nresults;
+ if (f > L->base) {
+ TValue *t = L->base;
+ while (--nresults >= 0) copyTV(L, t++, f++);
+ L->top = t;
+ }
+ L->cframe = NULL;
+ L->status = LUA_YIELD;
+ lj_vm_unwind_c(cf, LUA_YIELD);
+ return -1; /* unreachable */
+}
+
+LUA_API int lua_resume(lua_State *L, int nargs)
+{
+ if (L->cframe == NULL && L->status <= LUA_YIELD)
+ return lj_vm_resume(L, L->top - nargs, 0, 0);
+ L->top = L->base;
+ setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP));
+ incr_top(L);
+ return LUA_ERRRUN;
+}
+
+/* -- Load and dump Lua code ---------------------------------------------- */
+
+static TValue *cpparser(lua_State *L, lua_CFunction dummy, void *ud)
+{
+ LexState *ls = cast(LexState *, ud);
+ GCfunc *fn;
+ UNUSED(dummy);
+ cframe_errfunc(L->cframe) = -1; /* Inherit error function. */
+ lj_lex_start(L, ls);
+ fn = lj_func_newL(L, lj_parse(ls), tabref(L->env));
+ /* Parser may realloc stack. Don't combine above/below into one statement. */
+ setfuncV(L, L->top++, fn);
+ return NULL;
+}
+
+LUA_API int lua_load(lua_State *L, lua_Reader reader, void *data,
+ const char *chunkname)
+{
+ LexState ls;
+ int status;
+ global_State *g;
+ ls.rfunc = reader;
+ ls.rdata = data;
+ ls.chunkarg = chunkname ? chunkname : "?";
+ lj_str_initbuf(L, &ls.sb);
+ status = lj_vm_cpcall(L, cpparser, NULL, &ls);
+ g = G(L);
+ lj_str_freebuf(g, &ls.sb);
+ lj_gc_check(L);
+ return status;
+}
+
+LUA_API int lua_dump(lua_State *L, lua_Writer writer, void *data)
+{
+ api_checknelems(L, 1);
+ UNUSED(L); UNUSED(writer); UNUSED(data);
+ return 1; /* Error, not supported. */
+}
+
+/* -- GC and memory management -------------------------------------------- */
+
+LUA_API int lua_gc(lua_State *L, int what, int data)
+{
+ global_State *g = G(L);
+ int res = 0;
+ switch (what) {
+ case LUA_GCSTOP:
+ g->gc.threshold = LJ_MAX_MEM;
+ break;
+ case LUA_GCRESTART:
+ g->gc.threshold = g->gc.total;
+ break;
+ case LUA_GCCOLLECT:
+ lj_gc_fullgc(L);
+ break;
+ case LUA_GCCOUNT:
+ res = cast_int(g->gc.total >> 10);
+ break;
+ case LUA_GCCOUNTB:
+ res = cast_int(g->gc.total & 0x3ff);
+ break;
+ case LUA_GCSTEP: {
+ MSize a = (MSize)data << 10;
+ g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0;
+ while (g->gc.total >= g->gc.threshold)
+ if (lj_gc_step(L)) {
+ res = 1;
+ break;
+ }
+ break;
+ }
+ case LUA_GCSETPAUSE:
+ res = cast_int(g->gc.pause);
+ g->gc.pause = (MSize)data;
+ break;
+ case LUA_GCSETSTEPMUL:
+ res = cast_int(g->gc.stepmul);
+ g->gc.stepmul = (MSize)data;
+ break;
+ default:
+ res = -1; /* Invalid option. */
+ }
+ return res;
+}
+
+LUA_API lua_Alloc lua_getallocf(lua_State *L, void **ud)
+{
+ global_State *g = G(L);
+ if (ud) *ud = g->allocd;
+ return g->allocf;
+}
+
+LUA_API void lua_setallocf(lua_State *L, lua_Alloc f, void *ud)
+{
+ global_State *g = G(L);
+ g->allocd = ud;
+ g->allocf = f;
+}
+
diff --git a/src/lj_arch.h b/src/lj_arch.h
new file mode 100644
index 00000000..abdb5af9
--- /dev/null
+++ b/src/lj_arch.h
@@ -0,0 +1,88 @@
+/*
+** Target architecture selection.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_ARCH_H
+#define _LJ_ARCH_H
+
+#include "lua.h"
+
+
+/* Target endianess. */
+#define LUAJIT_LE 0
+#define LUAJIT_BE 1
+
+/* Target architectures. */
+#define LUAJIT_ARCH_X86 1
+#define LUAJIT_ARCH_x86 1
+#define LUAJIT_ARCH_X64 2
+#define LUAJIT_ARCH_x64 2
+
+
+/* Select native target if no target defined. */
+#ifndef LUAJIT_TARGET
+
+#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
+#define LUAJIT_TARGET LUAJIT_ARCH_X86
+#elif defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
+#define LUAJIT_TARGET LUAJIT_ARCH_X64
+#else
+#error "No support for this architecture (yet)"
+#endif
+
+#endif
+
+/* Set target properties. */
+#if LUAJIT_TARGET == LUAJIT_ARCH_X86
+#define LJ_ARCH_NAME "x86"
+#define LJ_ARCH_BITS 32
+#define LJ_ARCH_ENDIAN LUAJIT_LE
+#define LJ_TARGET_X86 1
+#define LJ_TARGET_X86ORX64 1
+#define LJ_PAGESIZE 4096
+#elif LUAJIT_TARGET == LUAJIT_ARCH_X64
+#define LJ_ARCH_NAME "x64"
+#define LJ_ARCH_BITS 64
+#define LJ_ARCH_ENDIAN LUAJIT_LE
+#define LJ_TARGET_X64 1
+#define LJ_TARGET_X86ORX64 1
+#define LJ_PAGESIZE 4096
+#error "No support for x64 architecture (yet)"
+#else
+#error "No target architecture defined"
+#endif
+
+/* Disable or enable the JIT compiler. */
+#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT)
+#define LJ_HASJIT 0
+#else
+#define LJ_HASJIT 1
+#endif
+
+#if LJ_ARCH_ENDIAN == LUAJIT_BE
+#define LJ_ENDIAN_SELECT(le, be) be
+#define LJ_ENDIAN_LOHI(lo, hi) hi lo
+#else
+#define LJ_ENDIAN_SELECT(le, be) le
+#define LJ_ENDIAN_LOHI(lo, hi) lo hi
+#endif
+
+#if LJ_ARCH_BITS == 32
+#define LJ_32 1
+#define LJ_64 0
+#elif LJ_ARCH_BITS == 64
+#define LJ_32 0
+#define LJ_64 1
+#else
+#error "Bad LJ_ARCH_BITS setting"
+#endif
+
+/* Whether target CPU masks the shift count by the operand length or not. */
+#if LJ_TARGET_X86ORX64
+#define LJ_TARGET_MASKEDSHIFT 1
+#else
+#define LJ_TARGET_MASKEDSHIFT 0
+#endif
+
+#endif
diff --git a/src/lj_asm.c b/src/lj_asm.c
new file mode 100644
index 00000000..b89b8543
--- /dev/null
+++ b/src/lj_asm.c
@@ -0,0 +1,3324 @@
+/*
+** IR assembler (SSA IR -> machine code).
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_asm_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+#if LJ_HASJIT
+
+#include "lj_gc.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_ir.h"
+#include "lj_jit.h"
+#include "lj_iropt.h"
+#include "lj_mcode.h"
+#include "lj_iropt.h"
+#include "lj_trace.h"
+#include "lj_snap.h"
+#include "lj_asm.h"
+#include "lj_dispatch.h"
+#include "lj_vm.h"
+#include "lj_target.h"
+
+/* -- Assembler state and common macros ----------------------------------- */
+
+/* Assembler state. */
+typedef struct ASMState {
+ RegCost cost[RID_MAX]; /* Reference and blended allocation cost for regs. */
+
+ MCode *mcp; /* Current MCode pointer (grows down). */
+ MCode *mclim; /* Lower limit for MCode memory + red zone. */
+
+ IRIns *ir; /* Copy of pointer to IR instructions/constants. */
+ jit_State *J; /* JIT compiler state. */
+
+ x86ModRM mrm; /* Fused x86 address operand. */
+
+ RegSet freeset; /* Set of free registers. */
+ RegSet modset; /* Set of registers modified inside the loop. */
+ RegSet phiset; /* Set of PHI registers. */
+
+ uint32_t flags; /* Copy of JIT compiler flags. */
+ int loopinv; /* Loop branch inversion (0:no, 1:yes, 2:yes+CC_P). */
+
+ int32_t evenspill; /* Next even spill slot. */
+ int32_t oddspill; /* Next odd spill slot (or 0). */
+
+ IRRef curins; /* Reference of current instruction. */
+ IRRef stopins; /* Stop assembly before hitting this instruction. */
+ IRRef orignins; /* Original T->nins. */
+
+ IRRef snapref; /* Current snapshot is active after this reference. */
+ IRRef snaprename; /* Rename highwater mark for snapshot check. */
+ SnapNo snapno; /* Current snapshot number. */
+ SnapNo loopsnapno; /* Loop snapshot number. */
+
+ Trace *T; /* Trace to assemble. */
+ Trace *parent; /* Parent trace (or NULL). */
+
+ IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */
+ IRRef sectref; /* Section base reference (loopref or 0). */
+ IRRef loopref; /* Reference of LOOP instruction (or 0). */
+
+ BCReg topslot; /* Number of slots for stack check (unless 0). */
+ MSize gcsteps; /* Accumulated number of GC steps (per section). */
+
+ MCode *mcbot; /* Bottom of reserved MCode. */
+ MCode *mctop; /* Top of generated MCode. */
+ MCode *mcloop; /* Pointer to loop MCode (or NULL). */
+ MCode *invmcp; /* Points to invertible loop branch (or NULL). */
+ MCode *testmcp; /* Pending opportunity to remove test r,r. */
+ MCode *realign; /* Realign loop if not NULL. */
+
+ IRRef1 phireg[RID_MAX]; /* PHI register references. */
+ uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent slot to RegSP map. */
+} ASMState;
+
+#define IR(ref) (&as->ir[(ref)])
+
+/* Check for variant to invariant references. */
+#define iscrossref(as, ref) ((ref) < as->sectref)
+
+/* Inhibit memory op fusion from variant to invariant references. */
+#define FUSE_DISABLED (~(IRRef)0)
+#define mayfuse(as, ref) ((ref) > as->fuseref)
+#define neverfuse(as) (as->fuseref == FUSE_DISABLED)
+#define opisfusableload(o) \
+ ((o) == IR_ALOAD || (o) == IR_HLOAD || (o) == IR_ULOAD || \
+ (o) == IR_FLOAD || (o) == IR_SLOAD || (o) == IR_XLOAD)
+
+/* Instruction selection for XMM moves. */
+#define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS)
+#define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD)
+
+/* Sparse limit checks using a red zone before the actual limit. */
+#define MCLIM_REDZONE 64
+#define checkmclim(as) \
+ if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as)
+
+static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as)
+{
+ lj_mcode_limiterr(as->J, (size_t)(as->mctop - as->mcp + 4*MCLIM_REDZONE));
+}
+
+/* -- Emit x86 instructions ----------------------------------------------- */
+
+#define MODRM(mode, r1, r2) ((MCode)((mode)+(((r1)&7)<<3)+((r2)&7)))
+
+#if LJ_64
+#define REXRB(p, rr, rb) \
+ { MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \
+ if (rex != 0x40) *--(p) = rex; }
+#define FORCE_REX 0x200
+#else
+#define REXRB(p, rr, rb) ((void)0)
+#define FORCE_REX 0
+#endif
+
+#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
+#define emit_i32(as, i) (*(int32_t *)(as->mcp-4) = (i), as->mcp -= 4)
+
+#define emit_x87op(as, xo) \
+ (*(uint16_t *)(as->mcp-2) = (uint16_t)(xo), as->mcp -= 2)
+
+/* op */
+static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
+ MCode *p, int delta)
+{
+ int n = (int8_t)xo;
+#if defined(__GNUC__)
+ if (__builtin_constant_p(xo) && n == -2)
+ p[delta-2] = (MCode)(xo >> 24);
+ else if (__builtin_constant_p(xo) && n == -3)
+ *(uint16_t *)(p+delta-3) = (uint16_t)(xo >> 16);
+ else
+#endif
+ *(uint32_t *)(p+delta-5) = (uint32_t)xo;
+ p += n + delta;
+#if LJ_64
+ {
+ uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1);
+ if (rex != 0x40) {
+ if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); }
+ *--p = (MCode)rex;
+ }
+ }
+#else
+ UNUSED(rr); UNUSED(rb); UNUSED(rx);
+#endif
+ return p;
+}
+
+/* op + modrm */
+#define emit_opm(xo, mode, rr, rb, p, delta) \
+ (p[(delta)-1] = MODRM((mode), (rr), (rb)), \
+ emit_op((xo), (rr), (rb), 0, (p), (delta)))
+
+/* op + modrm + sib */
+#define emit_opmx(xo, mode, scale, rr, rb, rx, p) \
+ (p[-1] = MODRM((scale), (rx), (rb)), \
+ p[-2] = MODRM((mode), (rr), RID_ESP), \
+ emit_op((xo), (rr), (rb), (rx), (p), -1))
+
+/* op r1, r2 */
+static void emit_rr(ASMState *as, x86Op xo, Reg r1, Reg r2)
+{
+ MCode *p = as->mcp;
+ as->mcp = emit_opm(xo, XM_REG, r1, r2, p, 0);
+}
+
+#if LJ_64 && defined(LUA_USE_ASSERT)
+/* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */
+static int32_t ptr2addr(void *p)
+{
+ lua_assert((uintptr_t)p < (uintptr_t)0x80000000);
+ return i32ptr(p);
+}
+#else
+#define ptr2addr(p) (i32ptr((p)))
+#endif
+
+/* op r, [addr] */
+static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
+{
+ MCode *p = as->mcp;
+ *(int32_t *)(p-4) = ptr2addr(addr);
+#if LJ_64
+ p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
+ as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
+#else
+ as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
+#endif
+}
+
+/* op r, [base+ofs] */
+static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs)
+{
+ MCode *p = as->mcp;
+ x86Mode mode;
+ if (ra_hasreg(rb)) {
+ if (ofs == 0 && (rb&7) != RID_EBP) {
+ mode = XM_OFS0;
+ } else if (checki8(ofs)) {
+ *--p = (MCode)ofs;
+ mode = XM_OFS8;
+ } else {
+ p -= 4;
+ *(int32_t *)p = ofs;
+ mode = XM_OFS32;
+ }
+ if ((rb&7) == RID_ESP)
+ *--p = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
+ } else {
+ *(int32_t *)(p-4) = ofs;
+#if LJ_64
+ p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
+ p -= 5;
+ rb = RID_ESP;
+#else
+ p -= 4;
+ rb = RID_EBP;
+#endif
+ mode = XM_OFS0;
+ }
+ as->mcp = emit_opm(xo, mode, rr, rb, p, 0);
+}
+
+/* op r, [base+idx*scale+ofs] */
+static void emit_rmrxo(ASMState *as, x86Op xo, Reg rr, Reg rb, Reg rx,
+ x86Mode scale, int32_t ofs)
+{
+ MCode *p = as->mcp;
+ x86Mode mode;
+ if (ofs == 0 && (rb&7) != RID_EBP) {
+ mode = XM_OFS0;
+ } else if (checki8(ofs)) {
+ mode = XM_OFS8;
+ *--p = (MCode)ofs;
+ } else {
+ mode = XM_OFS32;
+ p -= 4;
+ *(int32_t *)p = ofs;
+ }
+ as->mcp = emit_opmx(xo, mode, scale, rr, rb, rx, p);
+}
+
+/* op r, i */
+static void emit_gri(ASMState *as, x86Group xg, Reg rb, int32_t i)
+{
+ MCode *p = as->mcp;
+ if (checki8(i)) {
+ p -= 3;
+ p[2] = (MCode)i;
+ p[0] = (MCode)(xg >> 16);
+ } else {
+ p -= 6;
+ *(int32_t *)(p+2) = i;
+ p[0] = (MCode)(xg >> 8);
+ }
+ p[1] = MODRM(XM_REG, xg, rb);
+ REXRB(p, 0, rb);
+ as->mcp = p;
+}
+
+/* op [base+ofs], i */
+static void emit_gmroi(ASMState *as, x86Group xg, Reg rb, int32_t ofs,
+ int32_t i)
+{
+ x86Op xo;
+ if (checki8(i)) {
+ emit_i8(as, i);
+ xo = (x86Op)(((xg >> 16) << 24)+0xfe);
+ } else {
+ emit_i32(as, i);
+ xo = (x86Op)(((xg >> 8) << 24)+0xfe);
+ }
+ emit_rmro(as, xo, (Reg)xg, rb, ofs);
+}
+
+#define emit_shifti(as, xg, r, i) \
+ (emit_i8(as, (i)), emit_rr(as, XO_SHIFTi, (Reg)(xg), (r)))
+
+/* op r, rm/mrm */
+static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb)
+{
+ MCode *p = as->mcp;
+ x86Mode mode = XM_REG;
+ if (rb == RID_MRM) {
+ rb = as->mrm.base;
+ if (rb == RID_NONE) {
+ rb = RID_EBP;
+ mode = XM_OFS0;
+ p -= 4;
+ *(int32_t *)p = as->mrm.ofs;
+ if (as->mrm.idx != RID_NONE)
+ goto mrmidx;
+#if LJ_64
+ *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
+ rb = RID_ESP;
+#endif
+ } else {
+ if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) {
+ mode = XM_OFS0;
+ } else if (checki8(as->mrm.ofs)) {
+ *--p = (MCode)as->mrm.ofs;
+ mode = XM_OFS8;
+ } else {
+ p -= 4;
+ *(int32_t *)p = as->mrm.ofs;
+ mode = XM_OFS32;
+ }
+ if (as->mrm.idx != RID_NONE) {
+ mrmidx:
+ as->mcp = emit_opmx(xo, mode, as->mrm.scale, rr, rb, as->mrm.idx, p);
+ return;
+ }
+ if ((rb&7) == RID_ESP)
+ *--p = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
+ }
+ }
+ as->mcp = emit_opm(xo, mode, rr, rb, p, 0);
+}
+
+static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
+{
+ if (ofs) {
+ if ((as->flags & JIT_F_LEA_AGU))
+ emit_rmro(as, XO_LEA, r, r, ofs);
+ else
+ emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs);
+ }
+}
+
+/* -- Emit moves ---------------------------------------------------------- */
+
+/* Generic move between two regs. */
+static void emit_movrr(ASMState *as, Reg r1, Reg r2)
+{
+ emit_rr(as, r1 < RID_MAX_GPR ? XO_MOV : XMM_MOVRR(as), r1, r2);
+}
+
+/* Generic move from [base+ofs]. */
+static void emit_movrmro(ASMState *as, Reg rr, Reg rb, int32_t ofs)
+{
+ emit_rmro(as, rr < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), rr, rb, ofs);
+}
+
+/* mov [base+ofs], i */
+static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
+{
+ emit_i32(as, i);
+ emit_rmro(as, XO_MOVmi, 0, base, ofs);
+}
+
+/* mov [base+ofs], r */
+#define emit_movtomro(as, r, base, ofs) \
+ emit_rmro(as, XO_MOVto, (r), (base), (ofs))
+
+/* Get/set global_State fields. */
+#define emit_opgl(as, xo, r, field) \
+ emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field)
+#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field)
+#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field)
+#define emit_setgli(as, field, i) \
+ (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, field))
+
+/* mov r, i / xor r, r */
+static void emit_loadi(ASMState *as, Reg r, int32_t i)
+{
+ if (i == 0) {
+ emit_rr(as, XO_ARITH(XOg_XOR), r, r);
+ } else {
+ MCode *p = as->mcp;
+ *(int32_t *)(p-4) = i;
+ p[-5] = (MCode)(XI_MOVri+(r&7));
+ p -= 5;
+ REXRB(p, 0, r);
+ as->mcp = p;
+ }
+}
+
+/* mov r, addr */
+#define emit_loada(as, r, addr) \
+ emit_loadi(as, (r), ptr2addr((addr)))
+
+/* movsd r, [&tv->n] / xorps r, r */
+static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
+{
+ if (tvispzero(tv)) /* Use xor only for +0. */
+ emit_rr(as, XO_XORPS, r, r);
+ else
+ emit_rma(as, XMM_MOVRM(as), r, &tv->n);
+}
+
+/* -- Emit branches ------------------------------------------------------- */
+
+/* Label for short jumps. */
+typedef MCode *MCLabel;
+
+/* jcc short target */
+static void emit_sjcc(ASMState *as, int cc, MCLabel target)
+{
+ MCode *p = as->mcp;
+ p[-1] = (MCode)(int8_t)(target-p);
+ p[-2] = (MCode)(XI_JCCs+(cc&15));
+ as->mcp = p - 2;
+}
+
+/* jcc short (pending target) */
+static MCLabel emit_sjcc_label(ASMState *as, int cc)
+{
+ MCode *p = as->mcp;
+ p[-1] = 0;
+ p[-2] = (MCode)(XI_JCCs+(cc&15));
+ as->mcp = p - 2;
+ return p;
+}
+
+/* Fixup jcc short target. */
+static void emit_sfixup(ASMState *as, MCLabel source)
+{
+ source[-1] = (MCode)(as->mcp-source);
+}
+
+/* Return label pointing to current PC. */
+#define emit_label(as) ((as)->mcp)
+
+/* jcc target */
+static void emit_jcc(ASMState *as, int cc, MCode *target)
+{
+ MCode *p = as->mcp;
+ int32_t addr = (int32_t)(target - p);
+ *(int32_t *)(p-4) = addr;
+ p[-5] = (MCode)(XI_JCCn+(cc&15));
+ p[-6] = 0x0f;
+ as->mcp = p - 6;
+}
+
+/* call target */
+static void emit_call_(ASMState *as, MCode *target)
+{
+ MCode *p = as->mcp;
+ *(int32_t *)(p-4) = (int32_t)(target - p);
+ p[-5] = XI_CALL;
+ as->mcp = p - 5;
+}
+
+#define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f))
+
+/* Argument setup for C calls. Up to 3 args need no stack adjustment. */
+#define emit_setargr(as, narg, r) \
+ emit_movtomro(as, (r), RID_ESP, ((narg)-1)*4);
+#define emit_setargi(as, narg, imm) \
+ emit_movmroi(as, RID_ESP, ((narg)-1)*4, (imm))
+#define emit_setargp(as, narg, ptr) \
+ emit_setargi(as, (narg), ptr2addr((ptr)))
+
+/* -- Register allocator debugging ---------------------------------------- */
+
+/* #define LUAJIT_DEBUG_RA */
+
+#ifdef LUAJIT_DEBUG_RA
+
+#include <stdio.h>
+#include <stdarg.h>
+
+#define RIDNAME(name) #name,
+static const char *const ra_regname[] = {
+ GPRDEF(RIDNAME)
+ FPRDEF(RIDNAME)
+ "mrm",
+ NULL
+};
+#undef RIDNAME
+
+static char ra_dbg_buf[65536];
+static char *ra_dbg_p;
+static char *ra_dbg_merge;
+static MCode *ra_dbg_mcp;
+
+static void ra_dstart(void)
+{
+ ra_dbg_p = ra_dbg_buf;
+ ra_dbg_merge = NULL;
+ ra_dbg_mcp = NULL;
+}
+
+static void ra_dflush(void)
+{
+ fwrite(ra_dbg_buf, 1, (size_t)(ra_dbg_p-ra_dbg_buf), stdout);
+ ra_dstart();
+}
+
+static void ra_dprintf(ASMState *as, const char *fmt, ...)
+{
+ char *p;
+ va_list argp;
+ va_start(argp, fmt);
+ p = ra_dbg_mcp == as->mcp ? ra_dbg_merge : ra_dbg_p;
+ ra_dbg_mcp = NULL;
+ p += sprintf(p, "%08x \e[36m%04d ", (uintptr_t)as->mcp, as->curins-REF_BIAS);
+ for (;;) {
+ const char *e = strchr(fmt, '$');
+ if (e == NULL) break;
+ memcpy(p, fmt, (size_t)(e-fmt));
+ p += e-fmt;
+ if (e[1] == 'r') {
+ Reg r = va_arg(argp, Reg) & RID_MASK;
+ if (r <= RID_MAX) {
+ const char *q;
+ for (q = ra_regname[r]; *q; q++)
+ *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q;
+ } else {
+ *p++ = '?';
+ lua_assert(0);
+ }
+ } else if (e[1] == 'f' || e[1] == 'i') {
+ IRRef ref;
+ if (e[1] == 'f')
+ ref = va_arg(argp, IRRef);
+ else
+ ref = va_arg(argp, IRIns *) - as->ir;
+ if (ref >= REF_BIAS)
+ p += sprintf(p, "%04d", ref - REF_BIAS);
+ else
+ p += sprintf(p, "K%03d", REF_BIAS - ref);
+ } else if (e[1] == 's') {
+ uint32_t slot = va_arg(argp, uint32_t);
+ p += sprintf(p, "[esp+0x%x]", sps_scale(slot));
+ } else {
+ lua_assert(0);
+ }
+ fmt = e+2;
+ }
+ va_end(argp);
+ while (*fmt)
+ *p++ = *fmt++;
+ *p++ = '\e'; *p++ = '['; *p++ = 'm'; *p++ = '\n';
+ if (p > ra_dbg_buf+sizeof(ra_dbg_buf)-256) {
+ fwrite(ra_dbg_buf, 1, (size_t)(p-ra_dbg_buf), stdout);
+ p = ra_dbg_buf;
+ }
+ ra_dbg_p = p;
+}
+
+#define RA_DBG_START() ra_dstart()
+#define RA_DBG_FLUSH() ra_dflush()
+#define RA_DBG_REF() \
+ do { char *_p = ra_dbg_p; ra_dprintf(as, ""); \
+ ra_dbg_merge = _p; ra_dbg_mcp = as->mcp; } while (0)
+#define RA_DBGX(x) ra_dprintf x
+
+#else
+#define RA_DBG_START() ((void)0)
+#define RA_DBG_FLUSH() ((void)0)
+#define RA_DBG_REF() ((void)0)
+#define RA_DBGX(x) ((void)0)
+#endif
+
+/* -- Register allocator -------------------------------------------------- */
+
+#define ra_free(as, r) rset_set(as->freeset, (r))
+#define ra_modified(as, r) rset_set(as->modset, (r))
+
+#define ra_used(ir) (ra_hasreg((ir)->r) || ra_hasspill((ir)->s))
+
+/* Setup register allocator. */
+static void ra_setup(ASMState *as)
+{
+ /* Initially all regs (except the stack pointer) are free for use. */
+ as->freeset = RSET_ALL;
+ as->modset = RSET_EMPTY;
+ as->phiset = RSET_EMPTY;
+ memset(as->phireg, 0, sizeof(as->phireg));
+ memset(as->cost, 0, sizeof(as->cost));
+ as->cost[RID_ESP] = REGCOST(~0u, 0u);
+
+ /* Start slots for spill slot allocation. */
+ as->evenspill = (SPS_FIRST+1)&~1;
+ as->oddspill = (SPS_FIRST&1) ? SPS_FIRST : 0;
+}
+
+/* Rematerialize constants. */
+static Reg ra_rematk(ASMState *as, IRIns *ir)
+{
+ Reg r = ir->r;
+ lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
+ ra_free(as, r);
+ ra_modified(as, r);
+ ir->r = RID_INIT; /* Do not keep any hint. */
+ RA_DBGX((as, "remat $i $r", ir, r));
+ if (ir->o == IR_KNUM) {
+ emit_loadn(as, r, ir_knum(ir));
+ } else if (ir->o == IR_BASE) {
+ ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
+ emit_getgl(as, r, jit_base);
+ } else {
+ lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
+ ir->o == IR_KPTR || ir->o == IR_KNULL);
+ emit_loadi(as, r, ir->i);
+ }
+ return r;
+}
+
+/* Force a spill. Allocate a new spill slot if needed. */
+static int32_t ra_spill(ASMState *as, IRIns *ir)
+{
+ int32_t slot = ir->s;
+ if (!ra_hasspill(slot)) {
+ if (irt_isnum(ir->t)) {
+ slot = as->evenspill;
+ as->evenspill += 2;
+ } else if (as->oddspill) {
+ slot = as->oddspill;
+ as->oddspill = 0;
+ } else {
+ slot = as->evenspill;
+ as->oddspill = slot+1;
+ as->evenspill += 2;
+ }
+ if (as->evenspill > 256)
+ lj_trace_err(as->J, LJ_TRERR_SPILLOV);
+ ir->s = (uint8_t)slot;
+ }
+ return sps_scale(slot);
+}
+
+/* Restore a register (marked as free). Rematerialize or force a spill. */
+static Reg ra_restore(ASMState *as, IRRef ref)
+{
+ IRIns *ir = IR(ref);
+ if (irref_isk(ref) || ref == REF_BASE) {
+ return ra_rematk(as, ir);
+ } else {
+ Reg r = ir->r;
+ lua_assert(ra_hasreg(r));
+ ra_free(as, r);
+ ra_modified(as, r);
+ ra_sethint(ir->r, r); /* Keep hint. */
+ RA_DBGX((as, "restore $i $r", ir, r));
+ emit_movrmro(as, r, RID_ESP, ra_spill(as, ir)); /* Force a spill. */
+ return r;
+ }
+}
+
+/* Save a register to a spill slot. */
+static LJ_AINLINE void ra_save(ASMState *as, IRIns *ir, Reg r)
+{
+ RA_DBGX((as, "save $i $r", ir, r));
+ emit_rmro(as, r < RID_MAX_GPR ? XO_MOVto : XO_MOVSDto,
+ r, RID_ESP, sps_scale(ir->s));
+}
+
+#define MINCOST(r) \
+ if (LJ_LIKELY(allow&RID2RSET(r)) && as->cost[r] < cost) \
+ cost = as->cost[r]
+
+/* Evict the register with the lowest cost, forcing a restore. */
+static Reg ra_evict(ASMState *as, RegSet allow)
+{
+ RegCost cost = ~(RegCost)0;
+ if (allow < RID2RSET(RID_MAX_GPR)) {
+ MINCOST(RID_EAX);MINCOST(RID_ECX);MINCOST(RID_EDX);MINCOST(RID_EBX);
+ MINCOST(RID_EBP);MINCOST(RID_ESI);MINCOST(RID_EDI);
+#if LJ_64
+ MINCOST(RID_R8D);MINCOST(RID_R9D);MINCOST(RID_R10D);MINCOST(RID_R11D);
+ MINCOST(RID_R12D);MINCOST(RID_R13D);MINCOST(RID_R14D);MINCOST(RID_R15D);
+#endif
+ } else {
+ MINCOST(RID_XMM0);MINCOST(RID_XMM1);MINCOST(RID_XMM2);MINCOST(RID_XMM3);
+ MINCOST(RID_XMM4);MINCOST(RID_XMM5);MINCOST(RID_XMM6);MINCOST(RID_XMM7);
+#if LJ_64
+ MINCOST(RID_XMM8);MINCOST(RID_XMM9);MINCOST(RID_XMM10);MINCOST(RID_XMM11);
+ MINCOST(RID_XMM12);MINCOST(RID_XMM13);MINCOST(RID_XMM14);MINCOST(RID_XMM15);
+#endif
+ }
+ lua_assert(allow != RSET_EMPTY);
+ lua_assert(regcost_ref(cost) >= as->T->nk && regcost_ref(cost) < as->T->nins);
+ return ra_restore(as, regcost_ref(cost));
+}
+
+/* Pick any register (marked as free). Evict on-demand. */
+static LJ_AINLINE Reg ra_pick(ASMState *as, RegSet allow)
+{
+ RegSet pick = as->freeset & allow;
+ if (!pick)
+ return ra_evict(as, allow);
+ else
+ return rset_picktop(pick);
+}
+
+/* Get a scratch register (marked as free). */
+static LJ_AINLINE Reg ra_scratch(ASMState *as, RegSet allow)
+{
+ Reg r = ra_pick(as, allow);
+ ra_modified(as, r);
+ RA_DBGX((as, "scratch $r", r));
+ return r;
+}
+
+/* Evict all registers from a set (if not free). */
+static void ra_evictset(ASMState *as, RegSet drop)
+{
+ as->modset |= drop;
+ drop &= ~as->freeset;
+ while (drop) {
+ Reg r = rset_picktop(drop);
+ ra_restore(as, regcost_ref(as->cost[r]));
+ rset_clear(drop, r);
+ checkmclim(as);
+ }
+}
+
+/* Allocate a register for ref from the allowed set of registers.
+** Note: this function assumes the ref does NOT have a register yet!
+** Picks an optimal register, sets the cost and marks the register as non-free.
+*/
+static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow)
+{
+ IRIns *ir = IR(ref);
+ RegSet pick = as->freeset & allow;
+ Reg r;
+ lua_assert(ra_noreg(ir->r));
+ if (pick) {
+ /* First check register hint from propagation or PHI. */
+ if (ra_hashint(ir->r)) {
+ r = ra_gethint(ir->r);
+ if (rset_test(pick, r)) /* Use hint register if possible. */
+ goto found;
+ /* Rematerialization is cheaper than missing a hint. */
+ if (rset_test(allow, r) && irref_isk(regcost_ref(as->cost[r]))) {
+ ra_rematk(as, IR(regcost_ref(as->cost[r])));
+ goto found;
+ }
+ RA_DBGX((as, "hintmiss $f $r", ref, r));
+ }
+ /* Invariants should preferably get unused registers. */
+ if (ref < as->loopref && !irt_isphi(ir->t))
+ r = rset_pickbot(pick);
+ else
+ r = rset_picktop(pick);
+ } else {
+ r = ra_evict(as, allow);
+ }
+found:
+ RA_DBGX((as, "alloc $f $r", ref, r));
+ ir->r = (uint8_t)r;
+ rset_clear(as->freeset, r);
+ as->cost[r] = REGCOST_REF_T(ref, irt_t(ir->t));
+ return r;
+}
+
+/* Allocate a register on-demand. */
+static LJ_INLINE Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
+{
+ Reg r = IR(ref)->r;
+ /* Note: allow is ignored if the register is already allocated. */
+ if (ra_noreg(r)) r = ra_allocref(as, ref, allow);
+ return r;
+}
+
+/* Rename register allocation and emit move. */
+static void ra_rename(ASMState *as, Reg down, Reg up)
+{
+ IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]);
+ IR(ref)->r = (uint8_t)up;
+ as->cost[down] = 0;
+ lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR));
+ lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up));
+ rset_set(as->freeset, down); /* 'down' is free ... */
+ rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */
+ RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up));
+ emit_movrr(as, down, up); /* Backwards code generation needs inverse move. */
+ if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
+ lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno);
+ ren = tref_ref(lj_ir_emit(as->J));
+ as->ir = as->T->ir; /* The IR may have been reallocated. */
+ IR(ren)->r = (uint8_t)down;
+ IR(ren)->s = SPS_NONE;
+ }
+}
+
+/* Pick a destination register (marked as free).
+** Caveat: allow is ignored if there's already a destination register.
+** Use ra_destreg() to get a specific register.
+*/
+static Reg ra_dest(ASMState *as, IRIns *ir, RegSet allow)
+{
+ Reg dest = ir->r;
+ if (ra_hasreg(dest)) {
+ ra_free(as, dest);
+ ra_modified(as, dest);
+ } else {
+ dest = ra_scratch(as, allow);
+ }
+ if (LJ_UNLIKELY(ra_hasspill(ir->s))) ra_save(as, ir, dest);
+ return dest;
+}
+
+/* Force a specific destination register (marked as free). */
+static void ra_destreg(ASMState *as, IRIns *ir, Reg r)
+{
+ Reg dest = ra_dest(as, ir, RID2RSET(r));
+ if (dest != r) {
+ ra_scratch(as, RID2RSET(r));
+ emit_movrr(as, dest, r);
+ }
+}
+
+/* Propagate dest register to left reference. Emit moves as needed.
+** This is a required fixup step for all 2-operand machine instructions.
+*/
+static void ra_left(ASMState *as, Reg dest, IRRef lref)
+{
+ IRIns *ir = IR(lref);
+ Reg left = ir->r;
+ if (ra_noreg(left)) {
+ if (irref_isk(lref)) {
+ if (ir->o == IR_KNUM) {
+ cTValue *tv = ir_knum(ir);
+ /* FP remat needs a load except for +0. Still better than eviction. */
+ if (tvispzero(tv) || !(as->freeset & RSET_FPR)) {
+ emit_loadn(as, dest, tv);
+ return;
+ }
+ } else {
+ lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
+ ir->o == IR_KPTR || ir->o == IR_KNULL);
+ emit_loadi(as, dest, ir->i);
+ return;
+ }
+ }
+ if (!ra_hashint(left) && !iscrossref(as, lref))
+ ra_sethint(ir->r, dest); /* Propagate register hint. */
+ left = ra_allocref(as, lref, dest < RID_MAX_GPR ? RSET_GPR : RSET_FPR);
+ }
+ /* Move needed for true 3-operand instruction: y=a+b ==> y=a; y+=b. */
+ if (dest != left) {
+ /* Use register renaming if dest is the PHI reg. */
+ if (irt_isphi(ir->t) && as->phireg[dest] == lref) {
+ ra_modified(as, left);
+ ra_rename(as, left, dest);
+ } else {
+ emit_movrr(as, dest, left);
+ }
+ }
+}
+
+/* -- Exit stubs ---------------------------------------------------------- */
+
+/* Generate an exit stub group at the bottom of the reserved MCode memory. */
+static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
+{
+ ExitNo i, groupofs = (group*EXITSTUBS_PER_GROUP) & 0xff;
+ MCode *mxp = as->mcbot;
+ MCode *mxpstart = mxp;
+ if (mxp + (2+2)*EXITSTUBS_PER_GROUP+8+5 >= as->mctop)
+ asm_mclimit(as);
+ /* Push low byte of exitno for each exit stub. */
+ *mxp++ = XI_PUSHi8; *mxp++ = (MCode)groupofs;
+ for (i = 1; i < EXITSTUBS_PER_GROUP; i++) {
+ *mxp++ = XI_JMPs; *mxp++ = (MCode)((2+2)*(EXITSTUBS_PER_GROUP - i) - 2);
+ *mxp++ = XI_PUSHi8; *mxp++ = (MCode)(groupofs + i);
+ }
+ /* Push the high byte of the exitno for each exit stub group. */
+ *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8);
+ /* Store DISPATCH in ExitInfo->dispatch. Account for the two push ops. */
+ *mxp++ = XI_MOVmi;
+ *mxp++ = MODRM(XM_OFS8, 0, RID_ESP);
+ *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
+ *mxp++ = 2*sizeof(void *);
+ *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4;
+ /* Jump to exit handler which fills in the ExitState. */
+ *mxp++ = XI_JMP; mxp += 4;
+ *((int32_t *)(mxp-4)) = (int32_t)((MCode *)lj_vm_exit_handler - mxp);
+ /* Commit the code for this group (even if assembly fails later on). */
+ lj_mcode_commitbot(as->J, mxp);
+ as->mcbot = mxp;
+ as->mclim = as->mcbot + MCLIM_REDZONE;
+ return mxpstart;
+}
+
+/* Setup all needed exit stubs. */
+static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
+{
+ ExitNo i;
+ if (nexits >= EXITSTUBS_PER_GROUP*LJ_MAX_EXITSTUBGR)
+ lj_trace_err(as->J, LJ_TRERR_SNAPOV);
+ for (i = 0; i < (nexits+EXITSTUBS_PER_GROUP-1)/EXITSTUBS_PER_GROUP; i++)
+ if (as->J->exitstubgroup[i] == NULL)
+ as->J->exitstubgroup[i] = asm_exitstub_gen(as, i);
+}
+
+/* -- Snapshot and guard handling ----------------------------------------- */
+
+/* Can we rematerialize a KNUM instead of forcing a spill? */
+static int asm_snap_canremat(ASMState *as)
+{
+ Reg r;
+ for (r = RID_MIN_FPR; r < RID_MAX_FPR; r++)
+ if (irref_isk(regcost_ref(as->cost[r])))
+ return 1;
+ return 0;
+}
+
+/* Allocate registers or spill slots for refs escaping to a snapshot. */
+static void asm_snap_alloc(ASMState *as)
+{
+ SnapShot *snap = &as->T->snap[as->snapno];
+ IRRef2 *map = &as->T->snapmap[snap->mapofs];
+ BCReg s, nslots = snap->nslots;
+ for (s = 0; s < nslots; s++) {
+ IRRef ref = snap_ref(map[s]);
+ if (!irref_isk(ref)) {
+ IRIns *ir = IR(ref);
+ if (!ra_used(ir) && ir->o != IR_FRAME) {
+ RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
+ /* Not a var-to-invar ref and got a free register (or a remat)? */
+ if ((!iscrossref(as, ref) || irt_isphi(ir->t)) &&
+ ((as->freeset & allow) ||
+ (allow == RSET_FPR && asm_snap_canremat(as)))) {
+ ra_allocref(as, ref, allow); /* Allocate a register. */
+ checkmclim(as);
+ RA_DBGX((as, "snapreg $f $r", ref, ir->r));
+ } else {
+ ra_spill(as, ir); /* Otherwise force a spill slot. */
+ RA_DBGX((as, "snapspill $f $s", ref, ir->s));
+ }
+ }
+ }
+ }
+}
+
+/* All guards for a snapshot use the same exitno. This is currently the
+** same as the snapshot number. Since the exact origin of the exit cannot
+** be determined, all guards for the same snapshot must exit with the same
+** RegSP mapping.
+** A renamed ref which has been used in a prior guard for the same snapshot
+** would cause an inconsistency. The easy way out is to force a spill slot.
+*/
+static int asm_snap_checkrename(ASMState *as, IRRef ren)
+{
+ SnapShot *snap = &as->T->snap[as->snapno];
+ IRRef2 *map = &as->T->snapmap[snap->mapofs];
+ BCReg s, nslots = snap->nslots;
+ for (s = 0; s < nslots; s++) {
+ IRRef ref = snap_ref(map[s]);
+ if (ref == ren) {
+ IRIns *ir = IR(ref);
+ ra_spill(as, ir); /* Register renamed, so force a spill slot. */
+ RA_DBGX((as, "snaprensp $f $s", ref, ir->s));
+ return 1; /* Found. */
+ }
+ }
+ return 0; /* Not found. */
+}
+
+/* Prepare snapshot for next guard instruction. */
+static void asm_snap_prep(ASMState *as)
+{
+ if (as->curins < as->snapref) {
+ do {
+ lua_assert(as->snapno != 0);
+ as->snapno--;
+ as->snapref = as->T->snap[as->snapno].ref;
+ } while (as->curins < as->snapref);
+ asm_snap_alloc(as);
+ as->snaprename = as->T->nins;
+ } else {
+ /* Process any renames above the highwater mark. */
+ for (; as->snaprename < as->T->nins; as->snaprename++) {
+ IRIns *ir = IR(as->snaprename);
+ if (asm_snap_checkrename(as, ir->op1))
+ ir->op2 = REF_BIAS-1; /* Kill rename. */
+ }
+ }
+}
+
+/* Emit conditional branch to exit for guard.
+** It's important to emit this *after* all registers have been allocated,
+** because rematerializations may invalidate the flags.
+*/
+static void asm_guardcc(ASMState *as, int cc)
+{
+ MCode *target = exitstub_addr(as->J, as->snapno);
+ MCode *p = as->mcp;
+ if (LJ_UNLIKELY(p == as->invmcp)) {
+ as->loopinv = 1;
+ *(int32_t *)(p+1) = target - (p+5);
+ target = p;
+ cc ^= 1;
+ if (as->realign) {
+ emit_sjcc(as, cc, target);
+ return;
+ }
+ }
+ emit_jcc(as, cc, target);
+}
+
+/* -- Memory operand fusion ----------------------------------------------- */
+
+/* Arch-specific field offsets. */
+static const uint8_t field_ofs[IRFL__MAX+1] = {
+#define FLOFS(name, type, field) (uint8_t)offsetof(type, field),
+IRFLDEF(FLOFS)
+#undef FLOFS
+ 0
+};
+
+/* Limit linear search to this distance. Avoids O(n^2) behavior. */
+#define CONFLICT_SEARCH_LIM 15
+
+/* Check if there's no conflicting instruction between curins and ref. */
+static int noconflict(ASMState *as, IRRef ref, IROp conflict)
+{
+ IRIns *ir = as->ir;
+ IRRef i = as->curins;
+ if (i > ref + CONFLICT_SEARCH_LIM)
+ return 0; /* Give up, ref is too far away. */
+ while (--i > ref)
+ if (ir[i].o == conflict)
+ return 0; /* Conflict found. */
+ return 1; /* Ok, no conflict. */
+}
+
+/* Fuse array reference into memory operand. */
+static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow)
+{
+ IRIns *irb = IR(ir->op1);
+ IRIns *ira, *irx;
+ lua_assert(ir->o == IR_AREF);
+ lua_assert(irb->o == IR_FLOAD && irb->op2 == IRFL_TAB_ARRAY);
+ ira = IR(irb->op1);
+ if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
+ noconflict(as, irb->op1, IR_NEWREF)) {
+ /* We can avoid the FLOAD of t->array for colocated arrays. */
+ as->mrm.base = (uint8_t)ra_alloc1(as, irb->op1, allow); /* Table obj. */
+ as->mrm.ofs = -(int32_t)(ira->op1*sizeof(TValue)); /* Ofs to colo array. */
+ } else {
+ as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); /* Array base. */
+ as->mrm.ofs = 0;
+ }
+ irx = IR(ir->op2);
+ if (irref_isk(ir->op2)) {
+ as->mrm.ofs += 8*irx->i;
+ as->mrm.idx = RID_NONE;
+ } else {
+ rset_clear(allow, as->mrm.base);
+ as->mrm.scale = XM_SCALE8;
+ /* Fuse a constant ADD (e.g. t[i+1]) into the offset.
+ ** Doesn't help much without ABCelim, but reduces register pressure.
+ */
+ if (mayfuse(as, ir->op2) && ra_noreg(irx->r) &&
+ irx->o == IR_ADD && irref_isk(irx->op2)) {
+ as->mrm.ofs += 8*IR(irx->op2)->i;
+ as->mrm.idx = (uint8_t)ra_alloc1(as, irx->op1, allow);
+ } else {
+ as->mrm.idx = (uint8_t)ra_alloc1(as, ir->op2, allow);
+ }
+ }
+}
+
+/* Fuse array/hash/upvalue reference into memory operand.
+** Caveat: this may allocate GPRs for the base/idx registers. Be sure to
+** pass the final allow mask, excluding any GPRs used for other inputs.
+** In particular: 2-operand GPR instructions need to call ra_dest() first!
+*/
+static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
+{
+ IRIns *ir = IR(ref);
+ if (ra_noreg(ir->r)) {
+ switch ((IROp)ir->o) {
+ case IR_AREF:
+ if (mayfuse(as, ref)) {
+ asm_fusearef(as, ir, allow);
+ return;
+ }
+ break;
+ case IR_HREFK:
+ if (mayfuse(as, ref)) {
+ as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
+ as->mrm.ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
+ as->mrm.idx = RID_NONE;
+ return;
+ }
+ break;
+ case IR_UREFC:
+ if (irref_isk(ir->op1)) {
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
+ GCupval *uv = &gcref(fn->l.uvptr[ir->op2])->uv;
+ as->mrm.ofs = ptr2addr(&uv->tv);
+ as->mrm.base = as->mrm.idx = RID_NONE;
+ return;
+ }
+ break;
+ default:
+ lua_assert(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO);
+ break;
+ }
+ }
+ as->mrm.base = (uint8_t)ra_alloc1(as, ref, allow);
+ as->mrm.ofs = 0;
+ as->mrm.idx = RID_NONE;
+}
+
+/* Fuse FLOAD/FREF reference into memory operand. */
+static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow)
+{
+ lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF);
+ as->mrm.ofs = field_ofs[ir->op2];
+ as->mrm.idx = RID_NONE;
+ if (irref_isk(ir->op1)) {
+ as->mrm.ofs += IR(ir->op1)->i;
+ as->mrm.base = RID_NONE;
+ } else {
+ as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
+ }
+}
+
+/* Fuse string reference into memory operand. */
+static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
+{
+ IRIns *irr;
+ lua_assert(ir->o == IR_STRREF);
+ as->mrm.idx = as->mrm.base = RID_NONE;
+ as->mrm.scale = XM_SCALE1;
+ as->mrm.ofs = sizeof(GCstr);
+ if (irref_isk(ir->op1)) {
+ as->mrm.ofs += IR(ir->op1)->i;
+ } else {
+ Reg r = ra_alloc1(as, ir->op1, allow);
+ rset_clear(allow, r);
+ as->mrm.base = (uint8_t)r;
+ }
+ irr = IR(ir->op2);
+ if (irref_isk(ir->op2)) {
+ as->mrm.ofs += irr->i;
+ } else {
+ Reg r;
+ /* Fuse a constant add into the offset, e.g. string.sub(s, i+10). */
+ if (mayfuse(as, ir->op2) && irr->o == IR_ADD && irref_isk(irr->op2)) {
+ as->mrm.ofs += IR(irr->op2)->i;
+ r = ra_alloc1(as, irr->op1, allow);
+ } else {
+ r = ra_alloc1(as, ir->op2, allow);
+ }
+ if (as->mrm.base == RID_NONE)
+ as->mrm.base = (uint8_t)r;
+ else
+ as->mrm.idx = (uint8_t)r;
+ }
+}
+
+/* Fuse load into memory operand. */
+static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
+{
+ IRIns *ir = IR(ref);
+ if (ra_hasreg(ir->r)) {
+ if (allow != RSET_EMPTY) return ir->r; /* Fast path. */
+ fusespill:
+ /* Force a spill if only memory operands are allowed (asm_x87load). */
+ as->mrm.base = RID_ESP;
+ as->mrm.ofs = ra_spill(as, ir);
+ as->mrm.idx = RID_NONE;
+ return RID_MRM;
+ }
+ if (ir->o == IR_KNUM) {
+ lua_assert(allow != RSET_EMPTY);
+ if (!(as->freeset & ~as->modset & RSET_FPR)) {
+ as->mrm.ofs = ptr2addr(ir_knum(ir));
+ as->mrm.base = as->mrm.idx = RID_NONE;
+ return RID_MRM;
+ }
+ } else if (mayfuse(as, ref)) {
+ RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
+ if (ir->o == IR_SLOAD) {
+ if (!irt_isint(ir->t) && !(ir->op2 & IRSLOAD_PARENT)) {
+ as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
+ as->mrm.ofs = 8*((int32_t)ir->op1-1);
+ as->mrm.idx = RID_NONE;
+ return RID_MRM;
+ }
+ } else if (ir->o == IR_FLOAD) {
+ /* Generic fusion is only ok for IRT_INT operand (but see asm_comp). */
+ if (irt_isint(ir->t) && noconflict(as, ref, IR_FSTORE)) {
+ asm_fusefref(as, ir, xallow);
+ return RID_MRM;
+ }
+ } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
+ if (noconflict(as, ref, ir->o + IRDELTA_L2S)) {
+ asm_fuseahuref(as, ir->op1, xallow);
+ return RID_MRM;
+ }
+ } else if (ir->o == IR_XLOAD) {
+ /* Generic fusion is only ok for IRT_INT operand (but see asm_comp).
+ ** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
+ */
+ if (irt_isint(ir->t)) {
+ asm_fusestrref(as, IR(ir->op1), xallow);
+ return RID_MRM;
+ }
+ }
+ }
+ if (!(as->freeset & allow) &&
+ (allow == RSET_EMPTY || ra_hasspill(ir->s) || ref < as->loopref))
+ goto fusespill;
+ return ra_allocref(as, ref, allow);
+}
+
+/* -- Type conversions ---------------------------------------------------- */
+
+static void asm_tonum(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ Reg left = asm_fuseload(as, ir->op1, RSET_GPR);
+ emit_mrm(as, XO_CVTSI2SD, dest, left);
+ if (!(as->flags & JIT_F_SPLIT_XMM))
+ emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
+}
+
+static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
+{
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ asm_guardcc(as, CC_P);
+ asm_guardcc(as, CC_NE);
+ emit_rr(as, XO_UCOMISD, left, tmp);
+ emit_rr(as, XO_CVTSI2SD, tmp, dest);
+ if (!(as->flags & JIT_F_SPLIT_XMM))
+ emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
+ emit_rr(as, XO_CVTTSD2SI, dest, left);
+ /* Can't fuse since left is needed twice. */
+}
+
+static void asm_toint(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
+ emit_mrm(as, XO_CVTSD2SI, dest, left);
+}
+
+static void asm_tobit(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg tmp = ra_noreg(IR(ir->op1)->r) ?
+ ra_alloc1(as, ir->op1, RSET_FPR) :
+ ra_scratch(as, RSET_FPR);
+ Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp));
+ emit_rr(as, XO_MOVDto, tmp, dest);
+ emit_mrm(as, XO_ADDSD, tmp, right);
+ ra_left(as, tmp, ir->op1);
+}
+
+static void asm_strto(ASMState *as, IRIns *ir)
+{
+ Reg str;
+ int32_t ofs;
+ RegSet drop = RSET_SCRATCH;
+ /* Force a spill slot for the destination register (if any). */
+ if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r))
+ rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */
+ ra_evictset(as, drop);
+ asm_guardcc(as, CC_E);
+ emit_rr(as, XO_TEST, RID_RET, RID_RET);
+ /* int lj_str_numconv(const char *s, TValue *n) */
+ emit_call(as, lj_str_numconv);
+ ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
+ if (ofs == 0) {
+ emit_setargr(as, 2, RID_ESP);
+ } else {
+ emit_setargr(as, 2, RID_RET);
+ emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ofs);
+ }
+ emit_setargr(as, 1, RID_RET);
+ str = ra_alloc1(as, ir->op1, RSET_GPR);
+ emit_rmro(as, XO_LEA, RID_RET, str, sizeof(GCstr));
+}
+
+static void asm_tostr(ASMState *as, IRIns *ir)
+{
+ IRIns *irl = IR(ir->op1);
+ ra_destreg(as, ir, RID_RET);
+ ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
+ as->gcsteps++;
+ if (irt_isnum(irl->t)) {
+ /* GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) */
+ emit_call(as, lj_str_fromnum);
+ emit_setargr(as, 1, RID_RET);
+ emit_getgl(as, RID_RET, jit_L);
+ emit_setargr(as, 2, RID_RET);
+ emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ra_spill(as, irl));
+ } else {
+ /* GCstr *lj_str_fromint(lua_State *L, int32_t k) */
+ emit_call(as, lj_str_fromint);
+ emit_setargr(as, 1, RID_RET);
+ emit_getgl(as, RID_RET, jit_L);
+ emit_setargr(as, 2, ra_alloc1(as, ir->op1, RSET_GPR));
+ }
+}
+
+/* -- Memory references --------------------------------------------------- */
+
+static void asm_aref(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ asm_fusearef(as, ir, RSET_GPR);
+ if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0))
+ emit_mrm(as, XO_LEA, dest, RID_MRM);
+ else if (as->mrm.base != dest)
+ emit_rr(as, XO_MOV, dest, as->mrm.base);
+}
+
+/* Must match with hashkey() and hashrot() in lj_tab.c. */
+static uint32_t ir_khash(IRIns *ir)
+{
+ uint32_t lo, hi;
+ if (irt_isstr(ir->t)) {
+ return ir_kstr(ir)->hash;
+ } else if (irt_isnum(ir->t)) {
+ lo = ir_knum(ir)->u32.lo;
+ hi = ir_knum(ir)->u32.hi & 0x7fffffff;
+ } else if (irt_ispri(ir->t)) {
+ lua_assert(!irt_isnil(ir->t));
+ return irt_type(ir->t)-IRT_FALSE;
+ } else {
+ lua_assert(irt_isaddr(ir->t));
+ lo = u32ptr(ir_kgc(ir));
+ hi = lo - 0x04c11db7;
+ }
+ lo ^= hi; hi = lj_rol(hi, 14);
+ lo -= hi; hi = lj_rol(hi, 5);
+ hi ^= lo; hi -= lj_rol(lo, 27);
+ return hi;
+}
+
+/* Merge NE(HREF, niltv) check. */
+static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
+{
+ /* Assumes nothing else generates NE of HREF. */
+ if (ir[1].o == IR_NE && ir[1].op1 == as->curins) {
+ if (LJ_64 && *as->mcp != XI_ARITHi)
+ as->mcp += 7+6;
+ else
+ as->mcp += 6+6; /* Kill cmp reg, imm32 + jz exit. */
+ return as->mcp + *(int32_t *)(as->mcp-4); /* Return exit address. */
+ }
+ return NULL;
+}
+
+/* Inlined hash lookup. Specialized for key type and for const keys.
+** The equivalent C code is:
+** Node *n = hashkey(t, key);
+** do {
+** if (lj_obj_equal(&n->key, key)) return &n->val;
+** } while ((n = nextnode(n)));
+** return niltv(L);
+*/
+static void asm_href(ASMState *as, IRIns *ir)
+{
+ MCode *nilexit = merge_href_niltv(as, ir); /* Do this before any restores. */
+ RegSet allow = RSET_GPR;
+ Reg dest = ra_dest(as, ir, allow);
+ Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
+ Reg key = RID_NONE, tmp = RID_NONE;
+ IRIns *irkey = IR(ir->op2);
+ int isk = irref_isk(ir->op2);
+ IRType1 kt = irkey->t;
+ uint32_t khash;
+ MCLabel l_end, l_loop, l_next;
+
+ if (!isk) {
+ rset_clear(allow, tab);
+ key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
+ if (!irt_isstr(kt))
+ tmp = ra_scratch(as, rset_exclude(allow, key));
+ }
+
+ /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */
+ l_end = emit_label(as);
+ if (nilexit)
+ emit_jcc(as, CC_E, nilexit); /* XI_JMP is not found by lj_asm_patchexit. */
+ else
+ emit_loada(as, dest, niltvg(J2G(as->J)));
+
+ /* Follow hash chain until the end. */
+ l_loop = emit_sjcc_label(as, CC_NZ);
+ emit_rr(as, XO_TEST, dest, dest);
+ emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next));
+ l_next = emit_label(as);
+
+ /* Type and value comparison. */
+ emit_sjcc(as, CC_E, l_end);
+ if (irt_isnum(kt)) {
+ if (isk) {
+ /* Assumes -0.0 is already canonicalized to +0.0. */
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo),
+ (int32_t)ir_knum(irkey)->u32.lo);
+ emit_sjcc(as, CC_NE, l_next);
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi),
+ (int32_t)ir_knum(irkey)->u32.hi);
+ } else {
+ emit_sjcc(as, CC_P, l_next);
+ emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n));
+ emit_sjcc(as, CC_A, l_next);
+ /* The type check avoids NaN penalties and complaints from Valgrind. */
+ emit_i8(as, ~IRT_NUM);
+ emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
+ }
+ } else {
+ if (!irt_ispri(kt)) {
+ lua_assert(irt_isaddr(kt));
+ if (isk)
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr),
+ ptr2addr(ir_kgc(irkey)));
+ else
+ emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr));
+ emit_sjcc(as, CC_NE, l_next);
+ }
+ lua_assert(!irt_isnil(kt));
+ emit_i8(as, ~irt_type(kt));
+ emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
+ }
+ emit_sfixup(as, l_loop);
+ checkmclim(as);
+
+ /* Load main position relative to tab->node into dest. */
+ khash = isk ? ir_khash(irkey) : 1;
+ if (khash == 0) {
+ emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node));
+ } else {
+ emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node));
+ if ((as->flags & JIT_F_PREFER_IMUL)) {
+ emit_i8(as, sizeof(Node));
+ emit_rr(as, XO_IMULi8, dest, dest);
+ } else {
+ emit_shifti(as, XOg_SHL, dest, 3);
+ emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0);
+ }
+ if (isk) {
+ emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash);
+ emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
+ } else if (irt_isstr(kt)) {
+ emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, hash));
+ emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
+ } else { /* Must match with hashrot() in lj_tab.c. */
+ emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask));
+ emit_rr(as, XO_ARITH(XOg_SUB), dest, tmp);
+ emit_shifti(as, XOg_ROL, tmp, 27);
+ emit_rr(as, XO_ARITH(XOg_XOR), dest, tmp);
+ emit_shifti(as, XOg_ROL, dest, 5);
+ emit_rr(as, XO_ARITH(XOg_SUB), tmp, dest);
+ emit_shifti(as, XOg_ROL, dest, 14);
+ emit_rr(as, XO_ARITH(XOg_XOR), tmp, dest);
+ if (irt_isnum(kt)) {
+ emit_rmro(as, XO_ARITH(XOg_AND), dest, RID_ESP, ra_spill(as, irkey)+4);
+ emit_loadi(as, dest, 0x7fffffff);
+ emit_rr(as, XO_MOVDto, key, tmp);
+ } else {
+ emit_rr(as, XO_MOV, tmp, key);
+ emit_rmro(as, XO_LEA, dest, key, -0x04c11db7);
+ }
+ }
+ }
+}
+
+static void asm_hrefk(ASMState *as, IRIns *ir)
+{
+ IRIns *kslot = IR(ir->op2);
+ IRIns *irkey = IR(kslot->op1);
+ int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
+ Reg dest = ra_used(ir) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
+ Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
+ MCLabel l_exit;
+ lua_assert(ofs % sizeof(Node) == 0);
+ if (ra_hasreg(dest)) {
+ if (ofs != 0) {
+ if (dest == node && !(as->flags & JIT_F_LEA_AGU))
+ emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs);
+ else
+ emit_rmro(as, XO_LEA, dest, node, ofs);
+ } else if (dest != node) {
+ emit_rr(as, XO_MOV, dest, node);
+ }
+ }
+ asm_guardcc(as, CC_NE);
+ l_exit = emit_label(as);
+ if (irt_isnum(irkey->t)) {
+ /* Assumes -0.0 is already canonicalized to +0.0. */
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), node,
+ ofs + (int32_t)offsetof(Node, key.u32.lo),
+ (int32_t)ir_knum(irkey)->u32.lo);
+ emit_sjcc(as, CC_NE, l_exit);
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), node,
+ ofs + (int32_t)offsetof(Node, key.u32.hi),
+ (int32_t)ir_knum(irkey)->u32.hi);
+ } else {
+ if (!irt_ispri(irkey->t)) {
+ lua_assert(irt_isgcv(irkey->t));
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), node,
+ ofs + (int32_t)offsetof(Node, key.gcr),
+ ptr2addr(ir_kgc(irkey)));
+ emit_sjcc(as, CC_NE, l_exit);
+ }
+ lua_assert(!irt_isnil(irkey->t));
+ emit_i8(as, ~irt_type(irkey->t));
+ emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
+ ofs + (int32_t)offsetof(Node, key.it));
+ }
+}
+
+static void asm_newref(ASMState *as, IRIns *ir)
+{
+ IRRef keyref = ir->op2;
+ IRIns *irkey = IR(keyref);
+ RegSet allow = RSET_GPR;
+ Reg tab, tmp;
+ ra_destreg(as, ir, RID_RET);
+ ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
+ tab = ra_alloc1(as, ir->op1, allow);
+ tmp = ra_scratch(as, rset_clear(allow, tab));
+ /* TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) */
+ emit_call(as, lj_tab_newkey);
+ emit_setargr(as, 1, tmp);
+ emit_setargr(as, 2, tab);
+ emit_getgl(as, tmp, jit_L);
+ if (irt_isnum(irkey->t)) {
+ /* For numbers use the constant itself or a spill slot as a TValue. */
+ if (irref_isk(keyref)) {
+ emit_setargp(as, 3, ir_knum(irkey));
+ } else {
+ emit_setargr(as, 3, tmp);
+ emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey));
+ }
+ } else {
+ /* Otherwise use g->tmptv to hold the TValue. */
+ lua_assert(irt_ispri(irkey->t) || irt_isaddr(irkey->t));
+ emit_setargr(as, 3, tmp);
+ if (!irref_isk(keyref)) {
+ Reg src = ra_alloc1(as, keyref, rset_exclude(allow, tmp));
+ emit_movtomro(as, src, tmp, 0);
+ } else if (!irt_ispri(irkey->t)) {
+ emit_movmroi(as, tmp, 0, irkey->i);
+ }
+ emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
+ emit_loada(as, tmp, &J2G(as->J)->tmptv);
+ }
+}
+
+static void asm_uref(ASMState *as, IRIns *ir)
+{
+ /* NYI: Check that UREFO is still open and not aliasing a slot. */
+ if (ra_used(ir)) {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ if (irref_isk(ir->op1)) {
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
+ TValue **v = &gcref(fn->l.uvptr[ir->op2])->uv.v;
+ emit_rma(as, XO_MOV, dest, v);
+ } else {
+ Reg uv = ra_scratch(as, RSET_GPR);
+ Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
+ if (ir->o == IR_UREFC) {
+ emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv));
+ asm_guardcc(as, CC_NE);
+ emit_i8(as, 1);
+ emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
+ } else {
+ emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v));
+ }
+ emit_rmro(as, XO_MOV, uv, func,
+ (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)ir->op2);
+ }
+ }
+}
+
+static void asm_fref(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ asm_fusefref(as, ir, RSET_GPR);
+ emit_mrm(as, XO_LEA, dest, RID_MRM);
+}
+
+static void asm_strref(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ asm_fusestrref(as, ir, RSET_GPR);
+ if (as->mrm.base == RID_NONE)
+ emit_loadi(as, dest, as->mrm.ofs);
+ else if (as->mrm.base == dest && as->mrm.idx == RID_NONE)
+ emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs);
+ else
+ emit_mrm(as, XO_LEA, dest, RID_MRM);
+}
+
+/* -- Loads and stores ---------------------------------------------------- */
+
+static void asm_fload(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ x86Op xo;
+ asm_fusefref(as, ir, RSET_GPR);
+ switch (irt_type(ir->t)) {
+ case IRT_I8: xo = XO_MOVSXb; break;
+ case IRT_U8: xo = XO_MOVZXb; break;
+ case IRT_I16: xo = XO_MOVSXw; break;
+ case IRT_U16: xo = XO_MOVZXw; break;
+ default:
+ lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
+ xo = XO_MOV;
+ break;
+ }
+ emit_mrm(as, xo, dest, RID_MRM);
+}
+
+static void asm_fstore(ASMState *as, IRIns *ir)
+{
+ RegSet allow = RSET_GPR;
+ Reg src = RID_NONE;
+ /* The IRT_I16/IRT_U16 stores should never be simplified for constant
+ ** values since mov word [mem], imm16 has a length-changing prefix.
+ */
+ if (!irref_isk(ir->op2) || irt_isi16(ir->t) || irt_isu16(ir->t)) {
+ RegSet allow8 = (irt_isi8(ir->t) || irt_isu8(ir->t)) ? RSET_GPR8 : RSET_GPR;
+ src = ra_alloc1(as, ir->op2, allow8);
+ rset_clear(allow, src);
+ }
+ asm_fusefref(as, IR(ir->op1), allow);
+ if (ra_hasreg(src)) {
+ x86Op xo;
+ switch (irt_type(ir->t)) {
+ case IRT_I8: case IRT_U8: xo = XO_MOVtob; src |= FORCE_REX; break;
+ case IRT_I16: case IRT_U16: xo = XO_MOVtow; break;
+ default:
+ lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
+ xo = XO_MOVto;
+ break;
+ }
+ emit_mrm(as, xo, src, RID_MRM);
+ } else {
+ if (irt_isi8(ir->t) || irt_isu8(ir->t)) {
+ emit_i8(as, IR(ir->op2)->i);
+ emit_mrm(as, XO_MOVmib, 0, RID_MRM);
+ } else {
+ lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
+ emit_i32(as, IR(ir->op2)->i);
+ emit_mrm(as, XO_MOVmi, 0, RID_MRM);
+ }
+ }
+}
+
+static void asm_ahuload(ASMState *as, IRIns *ir)
+{
+ RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
+ lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t));
+ if (ra_used(ir)) {
+ Reg dest = ra_dest(as, ir, allow);
+ asm_fuseahuref(as, ir->op1, RSET_GPR);
+ emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM);
+ } else {
+ asm_fuseahuref(as, ir->op1, RSET_GPR);
+ }
+ /* Always do the type check, even if the load result is unused. */
+ asm_guardcc(as, irt_isnum(ir->t) ? CC_A : CC_NE);
+ emit_i8(as, ~irt_type(ir->t));
+ as->mrm.ofs += 4;
+ emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM);
+}
+
+static void asm_ahustore(ASMState *as, IRIns *ir)
+{
+ if (irt_isnum(ir->t)) {
+ Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
+ asm_fuseahuref(as, ir->op1, RSET_GPR);
+ emit_mrm(as, XO_MOVSDto, src, RID_MRM);
+ } else {
+ IRIns *irr = IR(ir->op2);
+ RegSet allow = RSET_GPR;
+ Reg src = RID_NONE;
+ if (!irref_isk(ir->op2)) {
+ src = ra_alloc1(as, ir->op2, allow);
+ rset_clear(allow, src);
+ }
+ asm_fuseahuref(as, ir->op1, allow);
+ if (ra_hasreg(src)) {
+ emit_mrm(as, XO_MOVto, src, RID_MRM);
+ } else if (!irt_ispri(irr->t)) {
+ lua_assert(irt_isaddr(ir->t));
+ emit_i32(as, irr->i);
+ emit_mrm(as, XO_MOVmi, 0, RID_MRM);
+ }
+ as->mrm.ofs += 4;
+ emit_i32(as, (int32_t)~irt_type(ir->t));
+ emit_mrm(as, XO_MOVmi, 0, RID_MRM);
+ }
+}
+
+static void asm_sload(ASMState *as, IRIns *ir)
+{
+ int32_t ofs = 8*((int32_t)ir->op1-1);
+ IRType1 t = ir->t;
+ Reg base;
+ lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
+ if (irt_isint(t)) {
+ Reg left = ra_scratch(as, RSET_FPR);
+ asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */
+ base = ra_alloc1(as, REF_BASE, RSET_GPR);
+ emit_rmro(as, XMM_MOVRM(as), left, base, ofs);
+ t.irt = IRT_NUM; /* Continue with a regular number type check. */
+ } else if (ra_used(ir)) {
+ RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
+ Reg dest = ra_dest(as, ir, allow);
+ lua_assert(irt_isnum(ir->t) || irt_isaddr(ir->t));
+ base = ra_alloc1(as, REF_BASE, RSET_GPR);
+ emit_movrmro(as, dest, base, ofs);
+ } else {
+ if (!irt_isguard(ir->t))
+ return; /* No type check: avoid base alloc. */
+ base = ra_alloc1(as, REF_BASE, RSET_GPR);
+ }
+ if (irt_isguard(ir->t)) {
+ /* Need type check, even if the load result is unused. */
+ asm_guardcc(as, irt_isnum(t) ? CC_A : CC_NE);
+ emit_i8(as, ~irt_type(t));
+ emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4);
+ }
+}
+
+static void asm_xload(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ x86Op xo;
+ asm_fusestrref(as, IR(ir->op1), RSET_GPR); /* For now only support STRREF. */
+ /* ir->op2 is ignored -- unaligned loads are ok on x86. */
+ switch (irt_type(ir->t)) {
+ case IRT_I8: xo = XO_MOVSXb; break;
+ case IRT_U8: xo = XO_MOVZXb; break;
+ case IRT_I16: xo = XO_MOVSXw; break;
+ case IRT_U16: xo = XO_MOVZXw; break;
+ default: lua_assert(irt_isint(ir->t)); xo = XO_MOV; break;
+ }
+ emit_mrm(as, xo, dest, RID_MRM);
+}
+
+/* -- String ops ---------------------------------------------------------- */
+
+static void asm_snew(ASMState *as, IRIns *ir)
+{
+ RegSet allow = RSET_GPR;
+ Reg left, right;
+ IRIns *irl;
+ ra_destreg(as, ir, RID_RET);
+ ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
+ irl = IR(ir->op1);
+ left = irl->r;
+ right = IR(ir->op2)->r;
+ if (ra_noreg(left)) {
+ lua_assert(irl->o == IR_STRREF);
+ /* Get register only for non-const STRREF. */
+ if (!(irref_isk(irl->op1) && irref_isk(irl->op2))) {
+ if (ra_hasreg(right)) rset_clear(allow, right);
+ left = ra_allocref(as, ir->op1, allow);
+ }
+ }
+ if (ra_noreg(right) && !irref_isk(ir->op2)) {
+ if (ra_hasreg(left)) rset_clear(allow, left);
+ right = ra_allocref(as, ir->op2, allow);
+ }
+ /* GCstr *lj_str_new(lua_State *L, const char *str, size_t len) */
+ emit_call(as, lj_str_new);
+ emit_setargr(as, 1, RID_RET);
+ emit_getgl(as, RID_RET, jit_L);
+ if (ra_noreg(left)) /* Use immediate for const STRREF. */
+ emit_setargi(as, 2, IR(irl->op1)->i + IR(irl->op2)->i +
+ (int32_t)sizeof(GCstr));
+ else
+ emit_setargr(as, 2, left);
+ if (ra_noreg(right))
+ emit_setargi(as, 3, IR(ir->op2)->i);
+ else
+ emit_setargr(as, 3, right);
+ as->gcsteps++;
+}
+
+/* -- Table ops ----------------------------------------------------------- */
+
+static void asm_tnew(ASMState *as, IRIns *ir)
+{
+ ra_destreg(as, ir, RID_RET);
+ ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
+ /* GCtab *lj_tab_new(lua_State *L, int32_t asize, uint32_t hbits) */
+ emit_call(as, lj_tab_new);
+ emit_setargr(as, 1, RID_RET);
+ emit_setargi(as, 2, ir->op1);
+ emit_setargi(as, 3, ir->op2);
+ emit_getgl(as, RID_RET, jit_L);
+ as->gcsteps++;
+}
+
+static void asm_tdup(ASMState *as, IRIns *ir)
+{
+ ra_destreg(as, ir, RID_RET);
+ ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
+ /* GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) */
+ emit_call(as, lj_tab_dup);
+ emit_setargr(as, 1, RID_RET);
+ emit_setargp(as, 2, ir_kgc(IR(ir->op1)));
+ emit_getgl(as, RID_RET, jit_L);
+ as->gcsteps++;
+}
+
+static void asm_tlen(ASMState *as, IRIns *ir)
+{
+ ra_destreg(as, ir, RID_RET);
+ ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
+ emit_call(as, lj_tab_len); /* MSize lj_tab_len(GCtab *t) */
+ emit_setargr(as, 1, ra_alloc1(as, ir->op1, RSET_GPR));
+}
+
+static void asm_tbar(ASMState *as, IRIns *ir)
+{
+ Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab));
+ MCLabel l_end = emit_label(as);
+ emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist));
+ emit_setgl(as, tab, gc.grayagain);
+ emit_getgl(as, tmp, gc.grayagain);
+ emit_i8(as, ~LJ_GC_BLACK);
+ emit_rmro(as, XO_ARITHib, XOg_AND, tab, offsetof(GCtab, marked));
+ emit_sjcc(as, CC_Z, l_end);
+ emit_i8(as, LJ_GC_BLACK);
+ emit_rmro(as, XO_GROUP3b, XOg_TEST, tab, offsetof(GCtab, marked));
+}
+
+static void asm_obar(ASMState *as, IRIns *ir)
+{
+ RegSet allow = RSET_GPR;
+ Reg obj, val;
+ GCobj *valp;
+ MCLabel l_end;
+ int32_t ofs;
+ ra_evictset(as, RSET_SCRATCH);
+ if (irref_isk(ir->op2)) {
+ valp = ir_kgc(IR(ir->op2));
+ val = RID_NONE;
+ } else {
+ valp = NULL;
+ val = ra_alloc1(as, ir->op2, allow);
+ rset_clear(allow, val);
+ }
+ obj = ra_alloc1(as, ir->op1, allow);
+ l_end = emit_label(as);
+ /* No need for other object barriers (yet). */
+ lua_assert(IR(ir->op1)->o == IR_UREFC);
+ ofs = -(int32_t)offsetof(GCupval, tv);
+ /* void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) */
+ emit_call(as, lj_gc_barrieruv);
+ if (ofs == 0) {
+ emit_setargr(as, 2, obj);
+ } else if (rset_test(RSET_SCRATCH, obj) && !(as->flags & JIT_F_LEA_AGU)) {
+ emit_setargr(as, 2, obj);
+ emit_gri(as, XG_ARITHi(XOg_ADD), obj, ofs);
+ } else {
+ emit_setargr(as, 2, RID_RET);
+ emit_rmro(as, XO_LEA, RID_RET, obj, ofs);
+ }
+ emit_setargp(as, 1, J2G(as->J));
+ if (valp)
+ emit_setargp(as, 3, valp);
+ else
+ emit_setargr(as, 3, val);
+ emit_sjcc(as, CC_Z, l_end);
+ emit_i8(as, LJ_GC_WHITES);
+ if (valp)
+ emit_rma(as, XO_GROUP3b, XOg_TEST, &valp->gch.marked);
+ else
+ emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked));
+ emit_sjcc(as, CC_Z, l_end);
+ emit_i8(as, LJ_GC_BLACK);
+ emit_rmro(as, XO_GROUP3b, XOg_TEST, obj,
+ ofs + (int32_t)offsetof(GChead, marked));
+}
+
+/* -- FP/int arithmetic and logic operations ------------------------------ */
+
+/* Load reference onto x87 stack. Force a spill to memory if needed. */
+static void asm_x87load(ASMState *as, IRRef ref)
+{
+ IRIns *ir = IR(ref);
+ if (ir->o == IR_KNUM) {
+ cTValue *tv = ir_knum(ir);
+ if (tvispzero(tv)) /* Use fldz only for +0. */
+ emit_x87op(as, XI_FLDZ);
+ else if (tvispone(tv))
+ emit_x87op(as, XI_FLD1);
+ else
+ emit_rma(as, XO_FLDq, XOg_FLDq, tv);
+ } else if (ir->o == IR_TONUM && !ra_used(ir) &&
+ !irref_isk(ir->op1) && mayfuse(as, ir->op1)) {
+ IRIns *iri = IR(ir->op1);
+ emit_rmro(as, XO_FILDd, XOg_FILDd, RID_ESP, ra_spill(as, iri));
+ } else {
+ emit_mrm(as, XO_FLDq, XOg_FLDq, asm_fuseload(as, ref, RSET_EMPTY));
+ }
+}
+
+/* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */
+static int fpmjoin_pow(ASMState *as, IRIns *ir)
+{
+ IRIns *irp = IR(ir->op1);
+ if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
+ IRIns *irpp = IR(irp->op1);
+ if (irpp == ir-2 && irpp->o == IR_FPMATH &&
+ irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
+ emit_call(as, lj_vm_pow); /* st0 = lj_vm_pow(st1, st0) */
+ asm_x87load(as, irp->op2);
+ asm_x87load(as, irpp->op1);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+ IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER;
+ if (fpm == IRFPM_SQRT) {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
+ emit_mrm(as, XO_SQRTSD, dest, left);
+ } else if ((as->flags & JIT_F_SSE4_1) && fpm <= IRFPM_TRUNC) {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
+ /* Round down/up/trunc == 1001/1010/1011. */
+ emit_i8(as, 0x09 + fpm);
+ /* ROUNDSD has a 4-byte opcode which doesn't fit in x86Op. */
+ emit_mrm(as, XO_ROUNDSD, dest, left);
+ /* Let's pretend it's a 3-byte opcode, and compensate afterwards. */
+ /* This is atrocious, but the alternatives are much worse. */
+ if (LJ_64 && as->mcp[1] != (MCode)(XO_ROUNDSD >> 16)) {
+ as->mcp[0] = as->mcp[1]; as->mcp[1] = 0x0f; /* Swap 0F and REX. */
+ }
+ *--as->mcp = 0x66; /* 1st byte of ROUNDSD opcode. */
+ } else {
+ int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
+ Reg dest = ir->r;
+ if (ra_hasreg(dest)) {
+ ra_free(as, dest);
+ ra_modified(as, dest);
+ emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs);
+ }
+ emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
+ switch (fpm) { /* st0 = lj_vm_*(st0) */
+ case IRFPM_FLOOR: emit_call(as, lj_vm_floor); break;
+ case IRFPM_CEIL: emit_call(as, lj_vm_ceil); break;
+ case IRFPM_TRUNC: emit_call(as, lj_vm_trunc); break;
+ case IRFPM_EXP: emit_call(as, lj_vm_exp); break;
+ case IRFPM_EXP2:
+ if (fpmjoin_pow(as, ir)) return;
+ emit_call(as, lj_vm_exp2); /* st0 = lj_vm_exp2(st0) */
+ break;
+ case IRFPM_SIN: emit_x87op(as, XI_FSIN); break;
+ case IRFPM_COS: emit_x87op(as, XI_FCOS); break;
+ case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break;
+ case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10:
+ /* Note: the use of fyl2xp1 would be pointless here. When computing
+ ** log(1.0+eps) the precision is already lost after 1.0 is added.
+ ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense.
+ */
+ emit_x87op(as, XI_FYL2X); break;
+ case IRFPM_OTHER:
+ switch (ir->o) {
+ case IR_ATAN2:
+ emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break;
+ case IR_LDEXP:
+ emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break;
+ case IR_POWI:
+ emit_call(as, lj_vm_powi); /* st0 = lj_vm_powi(st0, [esp]) */
+ emit_rmro(as, XO_MOVto, ra_alloc1(as, ir->op2, RSET_GPR), RID_ESP, 0);
+ break;
+ default: lua_assert(0); break;
+ }
+ break;
+ default: lua_assert(0); break;
+ }
+ asm_x87load(as, ir->op1);
+ switch (fpm) {
+ case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break;
+ case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break;
+ case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break;
+ case IRFPM_OTHER:
+ if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2);
+ break;
+ default: break;
+ }
+ }
+}
+
+/* Find out whether swapping operands might be beneficial. */
+static int swapops(ASMState *as, IRIns *ir)
+{
+ IRIns *irl = IR(ir->op1);
+ IRIns *irr = IR(ir->op2);
+ lua_assert(ra_noreg(irr->r));
+ if (!irm_iscomm(lj_ir_mode[ir->o]))
+ return 0; /* Can't swap non-commutative operations. */
+ if (irref_isk(ir->op2))
+ return 0; /* Don't swap constants to the left. */
+ if (ra_hasreg(irl->r))
+ return 1; /* Swap if left already has a register. */
+ if (ra_samehint(ir->r, irr->r))
+ return 1; /* Swap if dest and right have matching hints. */
+ if (ir->op1 < as->loopref && !irt_isphi(irl->t) &&
+ !(ir->op2 < as->loopref && !irt_isphi(irr->t)))
+ return 1; /* Swap invariants to the right. */
+ if (opisfusableload(irl->o))
+ return 1; /* Swap fusable loads to the right. */
+ return 0; /* Otherwise don't swap. */
+}
+
+static void asm_fparith(ASMState *as, IRIns *ir, x86Op xo)
+{
+ IRRef lref = ir->op1;
+ IRRef rref = ir->op2;
+ RegSet allow = RSET_FPR;
+ Reg dest;
+ Reg right = IR(rref)->r;
+ if (ra_hasreg(right))
+ rset_clear(allow, right);
+ dest = ra_dest(as, ir, allow);
+ if (lref == rref) {
+ right = dest;
+ } else if (ra_noreg(right)) {
+ if (swapops(as, ir)) {
+ IRRef tmp = lref; lref = rref; rref = tmp;
+ }
+ right = asm_fuseload(as, rref, rset_clear(allow, dest));
+ }
+ emit_mrm(as, xo, dest, right);
+ ra_left(as, dest, lref);
+}
+
+static void asm_intarith(ASMState *as, IRIns *ir, x86Arith xa)
+{
+ IRRef lref = ir->op1;
+ IRRef rref = ir->op2;
+ RegSet allow = RSET_GPR;
+ Reg dest, right;
+ if (as->testmcp == as->mcp) { /* Drop test r,r instruction. */
+ as->testmcp = NULL;
+ as->mcp += (LJ_64 && *as->mcp != XI_TEST) ? 3 : 2;
+ }
+ right = IR(rref)->r;
+ if (ra_hasreg(right))
+ rset_clear(allow, right);
+ dest = ra_dest(as, ir, allow);
+ if (lref == rref) {
+ right = dest;
+ } else if (ra_noreg(right) && !irref_isk(rref)) {
+ if (swapops(as, ir)) {
+ IRRef tmp = lref; lref = rref; rref = tmp;
+ }
+ right = asm_fuseload(as, rref, rset_clear(allow, dest));
+ /* Note: fuses only with IR_FLOAD for now. */
+ }
+ if (irt_isguard(ir->t)) /* For IR_ADDOV etc. */
+ asm_guardcc(as, CC_O);
+ if (ra_hasreg(right))
+ emit_mrm(as, XO_ARITH(xa), dest, right);
+ else
+ emit_gri(as, XG_ARITHi(xa), dest, IR(ir->op2)->i);
+ ra_left(as, dest, lref);
+}
+
+/* LEA is really a 4-operand ADD with an independent destination register,
+** up to two source registers and an immediate. One register can be scaled
+** by 1, 2, 4 or 8. This can be used to avoid moves or to fuse several
+** instructions.
+**
+** Currently only a few common cases are supported:
+** - 3-operand ADD: y = a+b; y = a+k with a and b already allocated
+** - Left ADD fusion: y = (a+b)+k; y = (a+k)+b
+** - Right ADD fusion: y = a+(b+k)
+** The ommited variants have already been reduced by FOLD.
+**
+** There are more fusion opportunities, like gathering shifts or joining
+** common references. But these are probably not worth the trouble, since
+** array indexing is not decomposed and already makes use of all fields
+** of the ModRM operand.
+*/
+static int asm_lea(ASMState *as, IRIns *ir)
+{
+ IRIns *irl = IR(ir->op1);
+ IRIns *irr = IR(ir->op2);
+ RegSet allow = RSET_GPR;
+ Reg dest;
+ as->mrm.base = as->mrm.idx = RID_NONE;
+ as->mrm.scale = XM_SCALE1;
+ as->mrm.ofs = 0;
+ if (ra_hasreg(irl->r)) {
+ rset_clear(allow, irl->r);
+ as->mrm.base = irl->r;
+ if (irref_isk(ir->op2) || ra_hasreg(irr->r)) {
+ /* The PHI renaming logic does a better job in some cases. */
+ if (ra_hasreg(ir->r) &&
+ ((irt_isphi(irl->t) && as->phireg[ir->r] == ir->op1) ||
+ (irt_isphi(irr->t) && as->phireg[ir->r] == ir->op2)))
+ return 0;
+ if (irref_isk(ir->op2)) {
+ as->mrm.ofs = irr->i;
+ } else {
+ rset_clear(allow, irr->r);
+ as->mrm.idx = irr->r;
+ }
+ } else if (irr->o == IR_ADD && mayfuse(as, ir->op2) &&
+ irref_isk(irr->op2)) {
+ Reg idx = ra_alloc1(as, irr->op1, allow);
+ rset_clear(allow, idx);
+ as->mrm.idx = (uint8_t)idx;
+ as->mrm.ofs = IR(irr->op2)->i;
+ } else {
+ return 0;
+ }
+ } else if (ir->op1 != ir->op2 && irl->o == IR_ADD && mayfuse(as, ir->op1) &&
+ (irref_isk(ir->op2) || irref_isk(irl->op2))) {
+ Reg idx, base = ra_alloc1(as, irl->op1, allow);
+ rset_clear(allow, base);
+ as->mrm.base = (uint8_t)base;
+ if (irref_isk(ir->op2)) {
+ as->mrm.ofs = irr->i;
+ idx = ra_alloc1(as, irl->op2, allow);
+ } else {
+ as->mrm.ofs = IR(irl->op2)->i;
+ idx = ra_alloc1(as, ir->op2, allow);
+ }
+ rset_clear(allow, idx);
+ as->mrm.idx = (uint8_t)idx;
+ } else {
+ return 0;
+ }
+ dest = ra_dest(as, ir, allow);
+ emit_mrm(as, XO_LEA, dest, RID_MRM);
+ return 1; /* Success. */
+}
+
+static void asm_add(ASMState *as, IRIns *ir)
+{
+ if (irt_isnum(ir->t))
+ asm_fparith(as, ir, XO_ADDSD);
+ else if ((as->flags & JIT_F_LEA_AGU) || as->testmcp == as->mcp ||
+ !asm_lea(as, ir))
+ asm_intarith(as, ir, XOg_ADD);
+}
+
+static void asm_bitnot(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ emit_rr(as, XO_GROUP3, XOg_NOT, dest);
+ ra_left(as, dest, ir->op1);
+}
+
+static void asm_bitswap(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ MCode *p = as->mcp;
+ p[-1] = (MCode)(XI_BSWAP+(dest&7));
+ p[-2] = 0x0f;
+ p -= 2;
+ REXRB(p, 0, dest);
+ as->mcp = p;
+ ra_left(as, dest, ir->op1);
+}
+
+static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
+{
+ IRRef rref = ir->op2;
+ IRIns *irr = IR(rref);
+ Reg dest;
+ if (irref_isk(rref)) { /* Constant shifts. */
+ int shift;
+ dest = ra_dest(as, ir, RSET_GPR);
+ shift = irr->i & 31; /* Handle shifts of 0..31 bits. */
+ switch (shift) {
+ case 0: return;
+ case 1: emit_rr(as, XO_SHIFT1, (Reg)xs, dest); break;
+ default: emit_shifti(as, xs, dest, shift); break;
+ }
+ } else { /* Variable shifts implicitly use register cl (i.e. ecx). */
+ RegSet allow = rset_exclude(RSET_GPR, RID_ECX);
+ Reg right = irr->r;
+ if (ra_noreg(right)) {
+ right = ra_allocref(as, rref, RID2RSET(RID_ECX));
+ } else if (right != RID_ECX) {
+ rset_clear(allow, right);
+ ra_scratch(as, RID2RSET(RID_ECX));
+ }
+ dest = ra_dest(as, ir, allow);
+ emit_rr(as, XO_SHIFTcl, (Reg)xs, dest);
+ if (right != RID_ECX)
+ emit_rr(as, XO_MOV, RID_ECX, right);
+ }
+ ra_left(as, dest, ir->op1);
+ /*
+ ** Note: avoid using the flags resulting from a shift or rotate!
+ ** All of them cause a partial flag stall, except for r,1 shifts
+ ** (but not rotates). And a shift count of 0 leaves the flags unmodified.
+ */
+}
+
+/* -- Comparisons --------------------------------------------------------- */
+
+/* Virtual flags for unordered FP comparisons. */
+#define VCC_U 0x100 /* Unordered. */
+#define VCC_P 0x200 /* Needs extra CC_P branch. */
+#define VCC_S 0x400 /* Swap avoids CC_P branch. */
+#define VCC_PS (VCC_P|VCC_S)
+
+static void asm_comp_(ASMState *as, IRIns *ir, int cc)
+{
+ if (irt_isnum(ir->t)) {
+ IRRef lref = ir->op1;
+ IRRef rref = ir->op2;
+ Reg left, right;
+ MCLabel l_around;
+ /*
+ ** An extra CC_P branch is required to preserve ordered/unordered
+ ** semantics for FP comparisons. This can be avoided by swapping
+ ** the operands and inverting the condition (except for EQ and UNE).
+ ** So always try to swap if possible.
+ **
+ ** Another option would be to swap operands to achieve better memory
+ ** operand fusion. But it's unlikely that this outweighs the cost
+ ** of the extra branches.
+ */
+ if (cc & VCC_S) { /* Swap? */
+ IRRef tmp = lref; lref = rref; rref = tmp;
+ cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */
+ }
+ left = ra_alloc1(as, lref, RSET_FPR);
+ right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
+ l_around = emit_label(as);
+ asm_guardcc(as, cc >> 4);
+ if (cc & VCC_P) { /* Extra CC_P branch required? */
+ if (!(cc & VCC_U)) {
+ asm_guardcc(as, CC_P); /* Branch to exit for ordered comparisons. */
+ } else if (l_around != as->invmcp) {
+ emit_sjcc(as, CC_P, l_around); /* Branch around for unordered. */
+ } else {
+ /* Patched to mcloop by asm_loop_fixup. */
+ as->loopinv = 2;
+ if (as->realign)
+ emit_sjcc(as, CC_P, as->mcp);
+ else
+ emit_jcc(as, CC_P, as->mcp);
+ }
+ }
+ emit_mrm(as, XO_UCOMISD, left, right);
+ } else if (!(irt_isstr(ir->t) && (cc & 0xe) != CC_E)) {
+ IRRef lref = ir->op1, rref = ir->op2;
+ IROp leftop = (IROp)(IR(lref)->o);
+ lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
+ /* Swap constants (only for ABC) and fusable loads to the right. */
+ if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) {
+ if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */
+ else if ((cc & 0xa) == 0x2) cc ^= 5; /* A <-> B, AE <-> BE */
+ lref = ir->op2; rref = ir->op1;
+ }
+ if (irref_isk(rref)) {
+ IRIns *irl = IR(lref);
+ int32_t imm = IR(rref)->i;
+ /* Check wether we can use test ins. Not for unsigned, since CF=0. */
+ int usetest = (imm == 0 && (cc & 0xa) != 0x2);
+ if (usetest && irl->o == IR_BAND && irl+1 == ir && !ra_used(irl)) {
+ /* Combine comp(BAND(ref, r/imm), 0) into test mrm, r/imm. */
+ Reg right, left = RID_NONE;
+ RegSet allow = RSET_GPR;
+ if (!irref_isk(irl->op2)) {
+ left = ra_alloc1(as, irl->op2, allow);
+ rset_clear(allow, left);
+ }
+ right = asm_fuseload(as, irl->op1, allow);
+ asm_guardcc(as, cc);
+ if (irref_isk(irl->op2)) {
+ emit_i32(as, IR(irl->op2)->i);
+ emit_mrm(as, XO_GROUP3, XOg_TEST, right);
+ } else {
+ emit_mrm(as, XO_TEST, left, right);
+ }
+ } else {
+ Reg left;
+ if (opisfusableload((IROp)irl->o) &&
+ ((irt_isi8(irl->t) && checki8(imm)) ||
+ (irt_isu8(irl->t) && checku8(imm)))) {
+ /* Only the IRT_INT case is fused by asm_fuseload. The IRT_I8/IRT_U8
+ ** loads are handled here. The IRT_I16/IRT_U16 loads should never be
+ ** fused, since cmp word [mem], imm16 has a length-changing prefix.
+ */
+ IRType1 origt = irl->t; /* Temporarily flip types. */
+ irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT;
+ left = asm_fuseload(as, lref, RSET_GPR);
+ irl->t = origt;
+ if (left == RID_MRM) { /* Fusion succeeded? */
+ asm_guardcc(as, cc);
+ emit_i8(as, imm);
+ emit_mrm(as, XO_ARITHib, XOg_CMP, RID_MRM);
+ return;
+ } /* Otherwise handle register case as usual. */
+ } else {
+ left = asm_fuseload(as, lref, RSET_GPR);
+ }
+ asm_guardcc(as, cc);
+ if (usetest && left != RID_MRM) {
+ /* Use test r,r instead of cmp r,0. */
+ if (irl+1 == ir) /* Referencing previous ins? */
+ as->testmcp = as->mcp; /* Set flag to drop test r,r if possible. */
+ emit_rr(as, XO_TEST, left, left);
+ } else {
+ x86Op xo;
+ if (checki8(imm)) {
+ emit_i8(as, imm);
+ xo = XO_ARITHi8;
+ } else {
+ emit_i32(as, imm);
+ xo = XO_ARITHi;
+ }
+ emit_mrm(as, xo, XOg_CMP, left);
+ }
+ }
+ } else {
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ Reg right = asm_fuseload(as, rref, rset_exclude(RSET_GPR, left));
+ asm_guardcc(as, cc);
+ emit_mrm(as, XO_CMP, left, right);
+ }
+ } else { /* Handle ordered string compares. */
+ RegSet allow = RSET_GPR;
+ /* This assumes lj_str_cmp never uses any SSE registers. */
+ ra_evictset(as, (RSET_SCRATCH & RSET_GPR));
+ asm_guardcc(as, cc);
+ emit_rr(as, XO_TEST, RID_RET, RID_RET);
+ emit_call(as, lj_str_cmp); /* int32_t lj_str_cmp(GCstr *a, GCstr *b) */
+ if (irref_isk(ir->op1)) {
+ emit_setargi(as, 1, IR(ir->op1)->i);
+ } else {
+ Reg left = ra_alloc1(as, ir->op1, allow);
+ rset_clear(allow, left);
+ emit_setargr(as, 1, left);
+ }
+ if (irref_isk(ir->op2)) {
+ emit_setargi(as, 2, IR(ir->op2)->i);
+ } else {
+ Reg right = ra_alloc1(as, ir->op2, allow);
+ emit_setargr(as, 2, right);
+ }
+ }
+}
+
+#define asm_comp(as, ir, ci, cf, cu) \
+ asm_comp_(as, ir, (ci)+((cf)<<4)+(cu))
+
+/* -- GC handling --------------------------------------------------------- */
+
+/* Sync all live GC values to Lua stack slots. */
+static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow)
+{
+ IRRef2 *map = &as->T->snapmap[snap->mapofs];
+ BCReg s, nslots = snap->nslots;
+ for (s = 0; s < nslots; s++) {
+ IRRef ref = snap_ref(map[s]);
+ if (!irref_isk(ref)) {
+ IRIns *ir = IR(ref);
+ if (ir->o == IR_FRAME) {
+ /* NYI: sync the frame, bump base, set topslot, clear new slots. */
+ lj_trace_err(as->J, LJ_TRERR_NYIGCF);
+ } else if (irt_isgcv(ir->t) &&
+ !(ir->o == IR_SLOAD && ir->op1 < nslots && map[ir->op1] == 0)) {
+ Reg src = ra_alloc1(as, ref, allow);
+ int32_t ofs = 8*(int32_t)(s-1);
+ emit_movtomro(as, src, base, ofs);
+ emit_movmroi(as, base, ofs+4, irt_toitype(ir->t));
+ checkmclim(as);
+ }
+ }
+ }
+}
+
+/* Check GC threshold and do one or more GC steps. */
+static void asm_gc_check(ASMState *as, SnapShot *snap)
+{
+ MCLabel l_end;
+ const BCIns *pc;
+ Reg tmp, base;
+ RegSet drop = RSET_SCRATCH;
+ /* Must evict BASE because the stack may be reallocated by the GC. */
+ if (ra_hasreg(IR(REF_BASE)->r))
+ drop |= RID2RSET(IR(REF_BASE)->r);
+ ra_evictset(as, drop);
+ base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_GPR, RID_RET));
+ l_end = emit_label(as);
+ /* void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) */
+ emit_call(as, lj_gc_step_jit);
+ emit_movtomro(as, base, RID_RET, offsetof(lua_State, base));
+ emit_setargr(as, 1, RID_RET);
+ emit_setargi(as, 3, (int32_t)as->gcsteps);
+ emit_getgl(as, RID_RET, jit_L);
+ pc = (const BCIns *)(uintptr_t)as->T->snapmap[snap->mapofs+snap->nslots];
+ emit_setargp(as, 2, pc);
+ asm_gc_sync(as, snap, base, rset_exclude(RSET_SCRATCH & RSET_GPR, base));
+ if (as->curins == as->loopref) /* BASE gets restored by LOOP anyway. */
+ ra_restore(as, REF_BASE); /* Better do it inside the slow path. */
+ /* Jump around GC step if GC total < GC threshold. */
+ tmp = ra_scratch(as, RSET_SCRATCH & RSET_GPR);
+ emit_sjcc(as, CC_B, l_end);
+ emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold);
+ emit_getgl(as, tmp, gc.total);
+ as->gcsteps = 0;
+ checkmclim(as);
+}
+
+/* -- PHI and loop handling ----------------------------------------------- */
+
+/* Break a PHI cycle by renaming to a free register (evict if needed). */
+static void asm_phi_break(ASMState *as, RegSet blocked, RegSet blockedby,
+ RegSet allow)
+{
+ RegSet candidates = blocked & allow;
+ if (candidates) { /* If this register file has candidates. */
+ /* Note: the set for ra_pick cannot be empty, since each register file
+ ** has some registers never allocated to PHIs.
+ */
+ Reg down, up = ra_pick(as, ~blocked & allow); /* Get a free register. */
+ if (candidates & ~blockedby) /* Optimize shifts, else it's a cycle. */
+ candidates = candidates & ~blockedby;
+ down = rset_picktop(candidates); /* Pick candidate PHI register. */
+ ra_rename(as, down, up); /* And rename it to the free register. */
+ }
+}
+
+/* PHI register shuffling.
+**
+** The allocator tries hard to preserve PHI register assignments across
+** the loop body. Most of the time this loop does nothing, since there
+** are no register mismatches.
+**
+** If a register mismatch is detected and ...
+** - the register is currently free: rename it.
+** - the register is blocked by an invariant: restore/remat and rename it.
+** - Otherwise the register is used by another PHI, so mark it as blocked.
+**
+** The renames are order-sensitive, so just retry the loop if a register
+** is marked as blocked, but has been freed in the meantime. A cycle is
+** detected if all of the blocked registers are allocated. To break the
+** cycle rename one of them to a free register and retry.
+**
+** Note that PHI spill slots are kept in sync and don't need to be shuffled.
+*/
+static void asm_phi_shuffle(ASMState *as)
+{
+ RegSet work;
+
+ /* Find and resolve PHI register mismatches. */
+ for (;;) {
+ RegSet blocked = RSET_EMPTY;
+ RegSet blockedby = RSET_EMPTY;
+ RegSet phiset = as->phiset;
+ while (phiset) { /* Check all left PHI operand registers. */
+ Reg r = rset_picktop(phiset);
+ IRIns *irl = IR(as->phireg[r]);
+ Reg left = irl->r;
+ if (r != left) { /* Mismatch? */
+ if (!rset_test(as->freeset, r)) { /* PHI register blocked? */
+ IRRef ref = regcost_ref(as->cost[r]);
+ if (irt_ismarked(IR(ref)->t)) { /* Blocked by other PHI (w/reg)? */
+ rset_set(blocked, r);
+ if (ra_hasreg(left))
+ rset_set(blockedby, left);
+ left = RID_NONE;
+ } else { /* Otherwise grab register from invariant. */
+ ra_restore(as, ref);
+ checkmclim(as);
+ }
+ }
+ if (ra_hasreg(left)) {
+ ra_rename(as, left, r);
+ checkmclim(as);
+ }
+ }
+ rset_clear(phiset, r);
+ }
+ if (!blocked) break; /* Finished. */
+ if (!(as->freeset & blocked)) { /* Break cycles if none are free. */
+ asm_phi_break(as, blocked, blockedby, RSET_GPR);
+ asm_phi_break(as, blocked, blockedby, RSET_FPR);
+ checkmclim(as);
+ } /* Else retry some more renames. */
+ }
+
+ /* Restore/remat invariants whose registers are modified inside the loop. */
+ work = as->modset & ~(as->freeset | as->phiset);
+ while (work) {
+ Reg r = rset_picktop(work);
+ ra_restore(as, regcost_ref(as->cost[r]));
+ rset_clear(work, r);
+ checkmclim(as);
+ }
+
+ /* Allocate and save all unsaved PHI regs and clear marks. */
+ work = as->phiset;
+ while (work) {
+ Reg r = rset_picktop(work);
+ IRRef lref = as->phireg[r];
+ IRIns *ir = IR(lref);
+ if (ra_hasspill(ir->s)) { /* Left PHI gained a spill slot? */
+ irt_clearmark(ir->t); /* Handled here, so clear marker now. */
+ ra_alloc1(as, lref, RID2RSET(r));
+ ra_save(as, ir, r); /* Save to spill slot inside the loop. */
+ checkmclim(as);
+ }
+ rset_clear(work, r);
+ }
+}
+
+/* Emit renames for left PHIs which are only spilled outside the loop. */
+static void asm_phi_fixup(ASMState *as)
+{
+ RegSet work = as->phiset;
+ while (work) {
+ Reg r = rset_picktop(work);
+ IRRef lref = as->phireg[r];
+ IRIns *ir = IR(lref);
+ /* Left PHI gained a spill slot before the loop? */
+ if (irt_ismarked(ir->t) && ra_hasspill(ir->s)) {
+ IRRef ren;
+ lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
+ ren = tref_ref(lj_ir_emit(as->J));
+ as->ir = as->T->ir; /* The IR may have been reallocated. */
+ IR(ren)->r = (uint8_t)r;
+ IR(ren)->s = SPS_NONE;
+ }
+ irt_clearmark(ir->t); /* Always clear marker. */
+ rset_clear(work, r);
+ }
+}
+
+/* Setup right PHI reference. */
+static void asm_phi(ASMState *as, IRIns *ir)
+{
+ RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
+ RegSet afree = (as->freeset & allow);
+ IRIns *irl = IR(ir->op1);
+ IRIns *irr = IR(ir->op2);
+ /* Spill slot shuffling is not implemented yet (but rarely needed). */
+ if (ra_hasspill(irl->s) || ra_hasspill(irr->s))
+ lj_trace_err(as->J, LJ_TRERR_NYIPHI);
+ /* Leave at least one register free for non-PHIs (and PHI cycle breaking). */
+ if ((afree & (afree-1))) { /* Two or more free registers? */
+ Reg r;
+ if (ra_noreg(irr->r)) { /* Get a register for the right PHI. */
+ r = ra_allocref(as, ir->op2, allow);
+ } else { /* Duplicate right PHI, need a copy (rare). */
+ r = ra_scratch(as, allow);
+ emit_movrr(as, r, irr->r);
+ }
+ ir->r = (uint8_t)r;
+ rset_set(as->phiset, r);
+ as->phireg[r] = (IRRef1)ir->op1;
+ irt_setmark(irl->t); /* Marks left PHIs _with_ register. */
+ if (ra_noreg(irl->r))
+ ra_sethint(irl->r, r); /* Set register hint for left PHI. */
+ } else { /* Otherwise allocate a spill slot. */
+ /* This is overly restrictive, but it triggers only on synthetic code. */
+ if (ra_hasreg(irl->r) || ra_hasreg(irr->r))
+ lj_trace_err(as->J, LJ_TRERR_NYIPHI);
+ ra_spill(as, ir);
+ irl->s = irr->s = ir->s; /* Sync left/right PHI spill slots. */
+ }
+}
+
+/* Fixup the loop branch. */
+static void asm_loop_fixup(ASMState *as)
+{
+ MCode *p = as->mctop;
+ MCode *target = as->mcp;
+ if (as->realign) { /* Realigned loops use short jumps. */
+ as->realign = NULL; /* Stop another retry. */
+ lua_assert(((intptr_t)target & 15) == 0);
+ if (as->loopinv) { /* Inverted loop branch? */
+ p -= 5;
+ p[0] = XI_JMP;
+ lua_assert(target - p >= -128);
+ p[-1] = (MCode)(target - p); /* Patch sjcc. */
+ if (as->loopinv == 2)
+ p[-3] = (MCode)(target - p + 2); /* Patch opt. short jp. */
+ } else {
+ lua_assert(target - p >= -128);
+ p[-1] = (MCode)(int8_t)(target - p); /* Patch short jmp. */
+ p[-2] = XI_JMPs;
+ }
+ } else {
+ MCode *newloop;
+ p[-5] = XI_JMP;
+ if (as->loopinv) { /* Inverted loop branch? */
+ /* asm_guardcc already inverted the jcc and patched the jmp. */
+ p -= 5;
+ newloop = target+4;
+ *(int32_t *)(p-4) = (int32_t)(target - p); /* Patch jcc. */
+ if (as->loopinv == 2) {
+ *(int32_t *)(p-10) = (int32_t)(target - p + 6); /* Patch opt. jp. */
+ newloop = target+8;
+ }
+ } else { /* Otherwise just patch jmp. */
+ *(int32_t *)(p-4) = (int32_t)(target - p);
+ newloop = target+3;
+ }
+ /* Realign small loops and shorten the loop branch. */
+ if (newloop >= p - 128) {
+ as->realign = newloop; /* Force a retry and remember alignment. */
+ as->curins = as->stopins; /* Abort asm_trace now. */
+ as->T->nins = as->orignins; /* Remove any added renames. */
+ }
+ }
+}
+
+/* Middle part of a loop. */
+static void asm_loop(ASMState *as)
+{
+ /* LOOP is a guard, so the snapno is up to date. */
+ as->loopsnapno = as->snapno;
+ if (as->gcsteps)
+ asm_gc_check(as, &as->T->snap[as->loopsnapno]);
+ /* LOOP marks the transition from the variant to the invariant part. */
+ as->testmcp = as->invmcp = NULL;
+ as->sectref = 0;
+ if (!neverfuse(as)) as->fuseref = 0;
+ asm_phi_shuffle(as);
+ asm_loop_fixup(as);
+ as->mcloop = as->mcp;
+ RA_DBGX((as, "===== LOOP ====="));
+ if (!as->realign) RA_DBG_FLUSH();
+}
+
+/* -- Head of trace ------------------------------------------------------- */
+
+/* Rematerialize all remaining constants in registers. */
+static void asm_const_remat(ASMState *as)
+{
+ RegSet work = ~as->freeset & RSET_ALL;
+ while (work) {
+ Reg r = rset_pickbot(work);
+ IRRef ref = regcost_ref(as->cost[r]);
+ if (irref_isk(ref) || ref == REF_BASE) {
+ ra_rematk(as, IR(ref));
+ checkmclim(as);
+ }
+ rset_clear(work, r);
+ }
+}
+
+/* Head of a root trace. */
+static void asm_head_root(ASMState *as)
+{
+ int32_t spadj;
+ emit_setgli(as, vmstate, (int32_t)as->J->curtrace);
+ spadj = sps_adjust(as);
+ as->T->spadjust = (uint16_t)spadj;
+ emit_addptr(as, RID_ESP, -spadj);
+}
+
+/* Handle BASE coalescing for a root trace. */
+static void asm_head_base(ASMState *as)
+{
+ IRIns *ir = IR(REF_BASE);
+ Reg r = ir->r;
+ lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
+ ra_free(as, r);
+ if (r != RID_BASE) {
+ ra_scratch(as, RID2RSET(RID_BASE));
+ emit_rr(as, XO_MOV, r, RID_BASE);
+ }
+}
+
+/* Check Lua stack size for overflow at the start of a side trace.
+** Stack overflow is rare, so let the regular exit handling fix this up.
+** This is done in the context of the *parent* trace and parent exitno!
+*/
+static void asm_checkstack(ASMState *as, RegSet allow)
+{
+ /* Try to get an unused temp. register, otherwise spill/restore eax. */
+ Reg r = allow ? rset_pickbot(allow) : RID_EAX;
+ emit_jcc(as, CC_B, exitstub_addr(as->J, as->J->exitno));
+ if (allow == RSET_EMPTY) /* Restore temp. register. */
+ emit_rmro(as, XO_MOV, r, RID_ESP, sps_scale(SPS_TEMP1));
+ emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*as->topslot));
+ emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, ptr2addr(&J2G(as->J)->jit_base));
+ emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
+ emit_getgl(as, r, jit_L);
+ if (allow == RSET_EMPTY) /* Spill temp. register. */
+ emit_rmro(as, XO_MOVto, r, RID_ESP, sps_scale(SPS_TEMP1));
+}
+
+/* Head of a side trace.
+**
+** The current simplistic algorithm requires that all slots inherited
+** from the parent are live in a register between pass 2 and pass 3. This
+** avoids the complexity of stack slot shuffling. But of course this may
+** overflow the register set in some cases and cause the dreaded error:
+** "NYI: register coalescing too complex". A refined algorithm is needed.
+*/
+static void asm_head_side(ASMState *as)
+{
+ IRRef1 sloadins[RID_MAX];
+ RegSet allow = RSET_ALL; /* Inverse of all coalesced registers. */
+ RegSet live = RSET_EMPTY; /* Live parent registers. */
+ int32_t spadj, spdelta;
+ int pass2 = 0;
+ int pass3 = 0;
+ IRRef i;
+
+ /* Scan all parent SLOADs and collect register dependencies. */
+ for (i = as->curins; i > REF_BASE; i--) {
+ IRIns *ir = IR(i);
+ lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
+ ir->o == IR_FRAME);
+ if (ir->o == IR_SLOAD) {
+ RegSP rs = as->parentmap[ir->op1];
+ if (ra_hasreg(ir->r)) {
+ rset_clear(allow, ir->r);
+ if (ra_hasspill(ir->s))
+ ra_save(as, ir, ir->r);
+ } else if (ra_hasspill(ir->s)) {
+ irt_setmark(ir->t);
+ pass2 = 1;
+ }
+ if (ir->r == rs) { /* Coalesce matching registers right now. */
+ ra_free(as, ir->r);
+ } else if (ra_hasspill(regsp_spill(rs))) {
+ if (ra_hasreg(ir->r))
+ pass3 = 1;
+ } else if (ra_used(ir)) {
+ sloadins[rs] = (IRRef1)i;
+ rset_set(live, rs); /* Block live parent register. */
+ }
+ }
+ }
+
+ /* Calculate stack frame adjustment. */
+ spadj = sps_adjust(as);
+ spdelta = spadj - (int32_t)as->parent->spadjust;
+ if (spdelta < 0) { /* Don't shrink the stack frame. */
+ spadj = (int32_t)as->parent->spadjust;
+ spdelta = 0;
+ }
+ as->T->spadjust = (uint16_t)spadj;
+
+ /* Reload spilled target registers. */
+ if (pass2) {
+ for (i = as->curins; i > REF_BASE; i--) {
+ IRIns *ir = IR(i);
+ if (irt_ismarked(ir->t)) {
+ RegSet mask;
+ Reg r;
+ RegSP rs;
+ irt_clearmark(ir->t);
+ rs = as->parentmap[ir->op1];
+ if (!ra_hasspill(regsp_spill(rs)))
+ ra_sethint(ir->r, rs); /* Hint may be gone, set it again. */
+ else if (sps_scale(regsp_spill(rs))+spdelta == sps_scale(ir->s))
+ continue; /* Same spill slot, do nothing. */
+ mask = (irt_isnum(ir->t) ? RSET_FPR : RSET_GPR) & allow;
+ if (mask == RSET_EMPTY)
+ lj_trace_err(as->J, LJ_TRERR_NYICOAL);
+ r = ra_allocref(as, i, mask);
+ ra_save(as, ir, r);
+ rset_clear(allow, r);
+ if (r == rs) { /* Coalesce matching registers right now. */
+ ra_free(as, r);
+ rset_clear(live, r);
+ } else if (ra_hasspill(regsp_spill(rs))) {
+ pass3 = 1;
+ }
+ checkmclim(as);
+ }
+ }
+ }
+
+ /* Store trace number and adjust stack frame relative to the parent. */
+ emit_setgli(as, vmstate, (int32_t)as->J->curtrace);
+ emit_addptr(as, RID_ESP, -spdelta);
+
+ /* Restore target registers from parent spill slots. */
+ if (pass3) {
+ RegSet work = ~as->freeset & RSET_ALL;
+ while (work) {
+ Reg r = rset_pickbot(work);
+ IRIns *ir = IR(regcost_ref(as->cost[r]));
+ RegSP rs = as->parentmap[ir->op1];
+ rset_clear(work, r);
+ if (ra_hasspill(regsp_spill(rs))) {
+ int32_t ofs = sps_scale(regsp_spill(rs));
+ ra_free(as, r);
+ emit_movrmro(as, r, RID_ESP, ofs);
+ checkmclim(as);
+ }
+ }
+ }
+
+ /* Shuffle registers to match up target regs with parent regs. */
+ for (;;) {
+ RegSet work;
+
+ /* Repeatedly coalesce free live registers by moving to their target. */
+ while ((work = as->freeset & live) != RSET_EMPTY) {
+ Reg rp = rset_pickbot(work);
+ IRIns *ir = IR(sloadins[rp]);
+ rset_clear(live, rp);
+ rset_clear(allow, rp);
+ ra_free(as, ir->r);
+ emit_movrr(as, ir->r, rp);
+ checkmclim(as);
+ }
+
+ /* We're done if no live registers remain. */
+ if (live == RSET_EMPTY)
+ break;
+
+ /* Break cycles by renaming one target to a temp. register. */
+ if (live & RSET_GPR) {
+ RegSet tmpset = as->freeset & ~live & allow & RSET_GPR;
+ if (tmpset == RSET_EMPTY)
+ lj_trace_err(as->J, LJ_TRERR_NYICOAL);
+ ra_rename(as, rset_pickbot(live & RSET_GPR), rset_pickbot(tmpset));
+ }
+ if (live & RSET_FPR) {
+ RegSet tmpset = as->freeset & ~live & allow & RSET_FPR;
+ if (tmpset == RSET_EMPTY)
+ lj_trace_err(as->J, LJ_TRERR_NYICOAL);
+ ra_rename(as, rset_pickbot(live & RSET_FPR), rset_pickbot(tmpset));
+ }
+ checkmclim(as);
+ /* Continue with coalescing to fix up the broken cycle(s). */
+ }
+
+ /* Check Lua stack size if frames have been added. */
+ if (as->topslot)
+ asm_checkstack(as, allow & RSET_GPR);
+}
+
+/* -- Tail of trace ------------------------------------------------------- */
+
+/* Sync Lua stack slots to match the last snapshot.
+** Note: code generation is backwards, so this is best read bottom-up.
+*/
+static void asm_tail_sync(ASMState *as)
+{
+ SnapShot *snap = &as->T->snap[as->T->nsnap-1]; /* Last snapshot. */
+ BCReg s, nslots = snap->nslots;
+ IRRef2 *map = &as->T->snapmap[snap->mapofs];
+ IRRef2 *flinks = map + nslots + snap->nframelinks;
+ BCReg newbase = 0;
+ BCReg secondbase = ~(BCReg)0;
+ BCReg topslot = 0;
+
+ checkmclim(as);
+ ra_allocref(as, REF_BASE, RID2RSET(RID_BASE));
+
+ /* Must check all frames to find topslot (outer can be larger than inner). */
+ for (s = 0; s < nslots; s++) {
+ IRRef ref = snap_ref(map[s]);
+ if (!irref_isk(ref)) {
+ IRIns *ir = IR(ref);
+ if (ir->o == IR_FRAME && irt_isfunc(ir->t)) {
+ GCfunc *fn = ir_kfunc(IR(ir->op2));
+ if (isluafunc(fn)) {
+ BCReg fs = s + funcproto(fn)->framesize;
+ newbase = s;
+ if (secondbase == ~(BCReg)0) secondbase = s;
+ if (fs > topslot) topslot = fs;
+ }
+ }
+ }
+ }
+ as->topslot = topslot; /* Used in asm_head_side(). */
+
+ if (as->T->link == TRACE_INTERP) {
+ /* Setup fixed registers for exit to interpreter. */
+ emit_loada(as, RID_DISPATCH, J2GG(as->J)->dispatch);
+ emit_loadi(as, RID_PC, (int32_t)map[nslots]);
+ } else if (newbase) {
+ /* Save modified BASE for linking to trace with higher start frame. */
+ emit_setgl(as, RID_BASE, jit_base);
+ }
+
+ emit_addptr(as, RID_BASE, 8*(int32_t)newbase);
+
+ /* Clear stack slots of newly added frames. */
+ if (nslots <= topslot) {
+ if (nslots < topslot) {
+ for (s = nslots; s <= topslot; s++) {
+ emit_movtomro(as, RID_EAX, RID_BASE, 8*(int32_t)s-4);
+ checkmclim(as);
+ }
+ emit_loadi(as, RID_EAX, LJ_TNIL);
+ } else {
+ emit_movmroi(as, RID_BASE, 8*(int32_t)nslots-4, LJ_TNIL);
+ }
+ }
+
+ /* Store the value of all modified slots to the Lua stack. */
+ for (s = 0; s < nslots; s++) {
+ int32_t ofs = 8*((int32_t)s-1);
+ IRRef ref = snap_ref(map[s]);
+ if (ref) {
+ IRIns *ir = IR(ref);
+ /* No need to restore readonly slots and unmodified non-parent slots. */
+ if (ir->o == IR_SLOAD && ir->op1 == s &&
+ (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
+ continue;
+ if (irt_isnum(ir->t)) {
+ Reg src = ra_alloc1(as, ref, RSET_FPR);
+ emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
+ } else if (ir->o == IR_FRAME) {
+ emit_movmroi(as, RID_BASE, ofs, ptr2addr(ir_kgc(IR(ir->op2))));
+ if (s != 0) /* Do not overwrite link to previous frame. */
+ emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*--flinks));
+ } else {
+ lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t));
+ if (!irref_isk(ref)) {
+ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
+ emit_movtomro(as, src, RID_BASE, ofs);
+ } else if (!irt_ispri(ir->t)) {
+ emit_movmroi(as, RID_BASE, ofs, ir->i);
+ }
+ emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
+ }
+ } else if (s > secondbase) {
+ emit_movmroi(as, RID_BASE, ofs+4, LJ_TNIL);
+ }
+ checkmclim(as);
+ }
+ lua_assert(map + nslots == flinks-1);
+}
+
+/* Fixup the tail code. */
+static void asm_tail_fixup(ASMState *as, TraceNo lnk)
+{
+ /* Note: don't use as->mcp swap + emit_*: emit_op overwrites more bytes. */
+ MCode *p = as->mctop;
+ MCode *target, *q;
+ int32_t spadj = as->T->spadjust;
+ if (spadj == 0) {
+ p -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6;
+ } else {
+ MCode *p1;
+ /* Patch stack adjustment. */
+ if (checki8(spadj)) {
+ p -= 3;
+ p1 = p-6;
+ *p1 = (MCode)spadj;
+ } else {
+ p1 = p-9;
+ *(int32_t *)p1 = spadj;
+ }
+ if ((as->flags & JIT_F_LEA_AGU)) {
+ p1[-3] = (MCode)XI_LEA;
+ p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP);
+ p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
+ } else {
+ p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
+ p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
+ }
+ }
+ /* Patch exit branch. */
+ target = lnk == TRACE_INTERP ? (MCode *)lj_vm_exit_interp :
+ as->J->trace[lnk]->mcode;
+ *(int32_t *)(p-4) = (int32_t)(target - p);
+ p[-5] = XI_JMP;
+ /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */
+ for (q = as->mctop-1; q >= p; q--)
+ *q = XI_NOP;
+ as->mctop = p;
+}
+
+/* -- Instruction dispatch ------------------------------------------------ */
+
+/* Assemble a single instruction. */
+static void asm_ir(ASMState *as, IRIns *ir)
+{
+ switch ((IROp)ir->o) {
+ /* Miscellaneous ops. */
+ case IR_LOOP: asm_loop(as); break;
+ case IR_NOP: break;
+ case IR_PHI: asm_phi(as, ir); break;
+
+ /* Guarded assertions. */
+ case IR_LT: asm_comp(as, ir, CC_GE, CC_AE, VCC_PS); break;
+ case IR_GE: asm_comp(as, ir, CC_L, CC_B, 0); break;
+ case IR_LE: asm_comp(as, ir, CC_G, CC_A, VCC_PS); break;
+ case IR_GT: asm_comp(as, ir, CC_LE, CC_BE, 0); break;
+ case IR_ULT: asm_comp(as, ir, CC_AE, CC_AE, VCC_U); break;
+ case IR_UGE: asm_comp(as, ir, CC_B, CC_B, VCC_U|VCC_PS); break;
+ case IR_ULE: asm_comp(as, ir, CC_A, CC_A, VCC_U); break;
+ case IR_ABC:
+ case IR_UGT: asm_comp(as, ir, CC_BE, CC_BE, VCC_U|VCC_PS); break;
+
+ case IR_FRAME:
+ if (ir->op1 == ir->op2) break; /* No check needed for placeholder. */
+ /* fallthrough */
+ case IR_EQ: asm_comp(as, ir, CC_NE, CC_NE, VCC_P); break;
+ case IR_NE: asm_comp(as, ir, CC_E, CC_E, VCC_U|VCC_P); break;
+
+ /* Bit ops. */
+ case IR_BNOT: asm_bitnot(as, ir); break;
+ case IR_BSWAP: asm_bitswap(as, ir); break;
+
+ case IR_BAND: asm_intarith(as, ir, XOg_AND); break;
+ case IR_BOR: asm_intarith(as, ir, XOg_OR); break;
+ case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break;
+
+ case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break;
+ case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break;
+ case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break;
+ case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break;
+ case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break;
+
+ /* Arithmetic ops. */
+ case IR_ADD: asm_add(as, ir); break;
+ case IR_SUB:
+ if (irt_isnum(ir->t))
+ asm_fparith(as, ir, XO_SUBSD);
+ else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
+ asm_intarith(as, ir, XOg_SUB);
+ break;
+ case IR_MUL: asm_fparith(as, ir, XO_MULSD); break;
+ case IR_DIV: asm_fparith(as, ir, XO_DIVSD); break;
+
+ case IR_NEG: asm_fparith(as, ir, XO_XORPS); break;
+ case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break;
+
+ case IR_MIN: asm_fparith(as, ir, XO_MINSD); break;
+ case IR_MAX: asm_fparith(as, ir, XO_MAXSD); break;
+
+ case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: case IR_POWI:
+ asm_fpmath(as, ir);
+ break;
+
+ /* Overflow-checking arithmetic ops. Note: don't use LEA here! */
+ case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break;
+ case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break;
+
+ /* Memory references. */
+ case IR_AREF: asm_aref(as, ir); break;
+ case IR_HREF: asm_href(as, ir); break;
+ case IR_HREFK: asm_hrefk(as, ir); break;
+ case IR_NEWREF: asm_newref(as, ir); break;
+ case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
+ case IR_FREF: asm_fref(as, ir); break;
+ case IR_STRREF: asm_strref(as, ir); break;
+
+ /* Loads and stores. */
+ case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: asm_ahuload(as, ir); break;
+ case IR_FLOAD: asm_fload(as, ir); break;
+ case IR_SLOAD: asm_sload(as, ir); break;
+ case IR_XLOAD: asm_xload(as, ir); break;
+
+ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
+ case IR_FSTORE: asm_fstore(as, ir); break;
+
+ /* String ops. */
+ case IR_SNEW: asm_snew(as, ir); break;
+
+ /* Table ops. */
+ case IR_TNEW: asm_tnew(as, ir); break;
+ case IR_TDUP: asm_tdup(as, ir); break;
+ case IR_TLEN: asm_tlen(as, ir); break;
+ case IR_TBAR: asm_tbar(as, ir); break;
+ case IR_OBAR: asm_obar(as, ir); break;
+
+ /* Type conversions. */
+ case IR_TONUM: asm_tonum(as, ir); break;
+ case IR_TOINT:
+ if (irt_isguard(ir->t))
+ asm_tointg(as, ir, ra_alloc1(as, ir->op1, RSET_FPR));
+ else
+ asm_toint(as, ir); break;
+ break;
+ case IR_TOBIT: asm_tobit(as, ir); break;
+ case IR_TOSTR: asm_tostr(as, ir); break;
+ case IR_STRTO: asm_strto(as, ir); break;
+
+ default:
+ setintV(&as->J->errinfo, ir->o);
+ lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
+ break;
+ }
+}
+
+/* Assemble a trace in linear backwards order. */
+static void asm_trace(ASMState *as)
+{
+ for (as->curins--; as->curins > as->stopins; as->curins--) {
+ IRIns *ir = IR(as->curins);
+ if (irt_isguard(ir->t))
+ asm_snap_prep(as);
+ else if (!ra_used(ir) && !irm_sideeff(lj_ir_mode[ir->o]) &&
+ (as->flags & JIT_F_OPT_DCE))
+ continue; /* Dead-code elimination can be soooo easy. */
+ RA_DBG_REF();
+ checkmclim(as);
+ asm_ir(as, ir);
+ }
+}
+
+/* -- Trace setup --------------------------------------------------------- */
+
+/* Clear reg/sp for all instructions and add register hints. */
+static void asm_setup_regsp(ASMState *as, Trace *T)
+{
+ IRRef i, nins;
+ int inloop;
+
+ /* Clear reg/sp for constants. */
+ for (i = T->nk; i < REF_BIAS; i++)
+ IR(i)->prev = REGSP_INIT;
+
+ /* REF_BASE is used for implicit references to the BASE register. */
+ IR(REF_BASE)->prev = REGSP_HINT(RID_BASE);
+
+ nins = T->nins;
+ if (IR(nins-1)->o == IR_RENAME) {
+ do { nins--; } while (IR(nins-1)->o == IR_RENAME);
+ T->nins = nins; /* Remove any renames left over from ASM restart. */
+ }
+ as->snaprename = nins;
+ as->snapref = nins;
+ as->snapno = T->nsnap;
+
+ as->stopins = REF_BASE;
+ as->orignins = nins;
+ as->curins = nins;
+
+ inloop = 0;
+ for (i = REF_FIRST; i < nins; i++) {
+ IRIns *ir = IR(i);
+ switch (ir->o) {
+ case IR_LOOP:
+ inloop = 1;
+ break;
+ /* Set hints for slot loads from a parent trace. */
+ case IR_SLOAD:
+ if ((ir->op2 & IRSLOAD_PARENT)) {
+ RegSP rs = as->parentmap[ir->op1];
+ lua_assert(regsp_used(rs));
+ as->stopins = i;
+ if (!ra_hasspill(regsp_spill(rs)) && ra_hasreg(regsp_reg(rs))) {
+ ir->prev = (uint16_t)REGSP_HINT(regsp_reg(rs));
+ continue;
+ }
+ }
+ break;
+ case IR_FRAME:
+ if (i == as->stopins+1 && ir->op1 == ir->op2)
+ as->stopins++;
+ break;
+ /* C calls evict all scratch regs and return results in RID_RET. */
+ case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TLEN: case IR_TOSTR:
+ case IR_NEWREF:
+ ir->prev = REGSP_HINT(RID_RET);
+ if (inloop)
+ as->modset = RSET_SCRATCH;
+ continue;
+ case IR_STRTO: case IR_OBAR:
+ if (inloop)
+ as->modset = RSET_SCRATCH;
+ break;
+ /* Ordered string compares evict all integer scratch registers. */
+ case IR_LT: case IR_GE: case IR_LE: case IR_GT:
+ if (irt_isstr(ir->t) && inloop)
+ as->modset |= (RSET_SCRATCH & RSET_GPR);
+ break;
+ /* Non-constant shift counts need to be in RID_ECX. */
+ case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
+ if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r))
+ IR(ir->op2)->r = REGSP_HINT(RID_ECX);
+ break;
+ /* Do not propagate hints across type conversions. */
+ case IR_TONUM: case IR_TOINT: case IR_TOBIT:
+ break;
+ default:
+ /* Propagate hints across likely 'op reg, imm' or 'op reg'. */
+ if (irref_isk(ir->op2) && !irref_isk(ir->op1)) {
+ ir->prev = IR(ir->op1)->prev;
+ continue;
+ }
+ break;
+ }
+ ir->prev = REGSP_INIT;
+ }
+}
+
+/* -- Assembler core ------------------------------------------------------ */
+
+/* Define this if you want to run LuaJIT with Valgrind. */
+#ifdef LUAJIT_USE_VALGRIND
+#include <valgrind/valgrind.h>
+#define VG_INVALIDATE(p, sz) VALGRIND_DISCARD_TRANSLATIONS(p, sz)
+#else
+#define VG_INVALIDATE(p, sz) ((void)0)
+#endif
+
+/* Assemble a trace. */
+void lj_asm_trace(jit_State *J, Trace *T)
+{
+ ASMState as_;
+ ASMState *as = &as_;
+
+ /* Setup initial state. Copy some fields to reduce indirections. */
+ as->J = J;
+ as->T = T;
+ as->ir = T->ir;
+ as->flags = J->flags;
+ as->loopref = J->loopref;
+ as->realign = NULL;
+ as->loopinv = 0;
+ if (J->parent) {
+ as->parent = J->trace[J->parent];
+ lj_snap_regspmap(as->parentmap, as->parent, J->exitno);
+ } else {
+ as->parent = NULL;
+ }
+ as->mctop = lj_mcode_reserve(J, &as->mcbot); /* Reserve MCode memory. */
+ as->mcp = as->mctop;
+ as->mclim = as->mcbot + MCLIM_REDZONE;
+ asm_exitstub_setup(as, T->nsnap);
+
+ do {
+ as->mcp = as->mctop;
+ as->curins = T->nins;
+ RA_DBG_START();
+ RA_DBGX((as, "===== STOP ====="));
+ /* Realign and leave room for backwards loop branch or exit branch. */
+ if (as->realign) {
+ int i = ((int)(intptr_t)as->realign) & 15;
+ MCode *p = as->mctop;
+ /* Fill unused mcode tail with NOPs to make the prefetcher happy. */
+ while (i-- > 0)
+ *--p = XI_NOP;
+ as->mctop = p;
+ as->mcp = p - (as->loopinv ? 5 : 2); /* Space for short/near jmp. */
+ } else {
+ as->mcp = as->mctop - 5; /* Space for exit branch (near jmp). */
+ }
+ as->invmcp = as->mcp;
+ as->mcloop = NULL;
+ as->testmcp = NULL;
+ as->topslot = 0;
+ as->gcsteps = 0;
+ as->sectref = as->loopref;
+ as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED;
+
+ /* Setup register allocation. */
+ ra_setup(as);
+ asm_setup_regsp(as, T);
+
+ if (!as->loopref) {
+ /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */
+ as->mcp -= (as->flags & JIT_F_LEA_AGU) ? 7 : 6;
+ as->invmcp = NULL;
+ asm_tail_sync(as);
+ }
+ asm_trace(as);
+ } while (as->realign); /* Retry in case the MCode needs to be realigned. */
+
+ RA_DBG_REF();
+ checkmclim(as);
+ if (as->gcsteps)
+ asm_gc_check(as, &as->T->snap[0]);
+ if (!J->parent)
+ asm_head_base(as);
+ asm_const_remat(as);
+ if (J->parent)
+ asm_head_side(as);
+ else
+ asm_head_root(as);
+ asm_phi_fixup(as);
+
+ RA_DBGX((as, "===== START ===="));
+ RA_DBG_FLUSH();
+ if (as->freeset != RSET_ALL)
+ lj_trace_err(as->J, LJ_TRERR_BADRA); /* Ouch! Should never happen. */
+
+ /* Set trace entry point before fixing up tail to allow link to self. */
+ T->mcode = as->mcp;
+ T->mcloop = as->mcloop ? (MSize)(as->mcloop - as->mcp) : 0;
+ if (!as->loopref)
+ asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */
+ T->szmcode = (MSize)(as->mctop - as->mcp);
+ VG_INVALIDATE(T->mcode, T->szmcode);
+}
+
+/* Patch exit jumps of existing machine code to a new target. */
+void lj_asm_patchexit(jit_State *J, Trace *T, ExitNo exitno, MCode *target)
+{
+ MCode *p = T->mcode;
+ MCode *mcarea = lj_mcode_patch(J, p, 0);
+ MSize len = T->szmcode;
+ MCode *px = exitstub_addr(J, exitno) - 6;
+ MCode *pe = p+len-6;
+ if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px)
+ *(int32_t *)(p+len-4) = (int32_t)(target - (p+len));
+ for (; p < pe; p++) {
+ if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) {
+ *(int32_t *)(p+2) = (int32_t)(target - (p+6));
+ p += 5;
+ }
+ }
+ lj_mcode_patch(J, mcarea, 1);
+ VG_INVALIDATE(T->mcode, T->szmcode);
+}
+
+#undef IR
+
+#endif
diff --git a/src/lj_asm.h b/src/lj_asm.h
new file mode 100644
index 00000000..84122b43
--- /dev/null
+++ b/src/lj_asm.h
@@ -0,0 +1,17 @@
+/*
+** IR assembler (SSA IR -> machine code).
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_ASM_H
+#define _LJ_ASM_H
+
+#include "lj_jit.h"
+
+#if LJ_HASJIT
+LJ_FUNC void lj_asm_trace(jit_State *J, Trace *T);
+LJ_FUNC void lj_asm_patchexit(jit_State *J, Trace *T, ExitNo exitno,
+ MCode *target);
+#endif
+
+#endif
diff --git a/src/lj_bc.c b/src/lj_bc.c
new file mode 100644
index 00000000..79846325
--- /dev/null
+++ b/src/lj_bc.c
@@ -0,0 +1,17 @@
+/*
+** Bytecode instruction modes.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_bc_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_bc.h"
+
+/* Bytecode instruction modes. */
+LJ_DATADEF const uint16_t lj_bc_mode[BC__MAX+1] = {
+BCDEF(BCMODE)
+ 0
+};
+
diff --git a/src/lj_bc.h b/src/lj_bc.h
new file mode 100644
index 00000000..d906011c
--- /dev/null
+++ b/src/lj_bc.h
@@ -0,0 +1,235 @@
+/*
+** Bytecode instruction format.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_BC_H
+#define _LJ_BC_H
+
+#include "lj_def.h"
+#include "lj_arch.h"
+
+/* Bytecode instruction format, 32 bit wide, fields of 8 or 16 bit:
+**
+** +----+----+----+----+
+** | B | C | A | OP | Format ABC
+** +----+----+----+----+
+** | D | A | OP | Format AD
+** +--------------------
+** MSB LSB
+**
+** In-memory instructions are always stored in host byte order.
+*/
+
+/* Operand ranges and related constants. */
+#define BCMAX_A 0xff
+#define BCMAX_B 0xff
+#define BCMAX_C 0xff
+#define BCMAX_D 0xffff
+#define BCBIAS_J 0x8000
+#define NO_REG BCMAX_A
+#define NO_JMP (~(BCPos)0)
+
+/* Macros to get instruction fields. */
+#define bc_op(i) (cast(BCOp, (i)&0xff))
+#define bc_a(i) (cast(BCReg, ((i)>>8)&0xff))
+#define bc_b(i) (cast(BCReg, (i)>>24))
+#define bc_c(i) (cast(BCReg, ((i)>>16)&0xff))
+#define bc_d(i) (cast(BCReg, (i)>>16))
+#define bc_j(i) ((ptrdiff_t)bc_d(i)-BCBIAS_J)
+
+/* Macros to set instruction fields. */
+#define setbc_byte(p, x, ofs) \
+ ((uint8_t *)(p))[LJ_ENDIAN_SELECT(ofs, 3-ofs)] = cast_byte(x)
+#define setbc_op(p, x) setbc_byte(p, (x), 0)
+#define setbc_a(p, x) setbc_byte(p, (x), 1)
+#define setbc_b(p, x) setbc_byte(p, (x), 3)
+#define setbc_c(p, x) setbc_byte(p, (x), 2)
+#define setbc_d(p, x) \
+ ((uint16_t *)(p))[LJ_ENDIAN_SELECT(1, 0)] = cast(uint16_t, (x))
+#define setbc_j(p, x) setbc_d(p, (BCPos)((int32_t)(x)+BCBIAS_J))
+
+/* Macros to compose instructions. */
+#define BCINS_ABC(o, a, b, c) \
+ (cast(BCIns, o)|(cast(BCIns, a)<<8)|\
+ (cast(BCIns, b)<<24)|(cast(BCIns, c)<<16))
+#define BCINS_AD(o, a, d) \
+ (cast(BCIns, o)|(cast(BCIns, a)<<8)|(cast(BCIns, d)<<16))
+#define BCINS_AJ(o, a, j) BCINS_AD(o, a, (BCPos)((int32_t)(j)+BCBIAS_J))
+
+/* Bytecode instruction definition. Order matters, see below.
+**
+** (name, filler, Amode, Bmode, Cmode or Dmode, metamethod)
+**
+** The opcode name suffixes specify the type for RB/RC or RD:
+** V = variable slot
+** S = string const
+** N = number const
+** P = primitive type (~itype)
+** B = unsigned byte literal
+** M = multiple args/results
+*/
+#define BCDEF(_) \
+ /* Comparison ops. ORDER OPR. */ \
+ _(ISLT, var, ___, var, lt) \
+ _(ISGE, var, ___, var, lt) \
+ _(ISLE, var, ___, var, le) \
+ _(ISGT, var, ___, var, le) \
+ \
+ _(ISEQV, var, ___, var, eq) \
+ _(ISNEV, var, ___, var, eq) \
+ _(ISEQS, var, ___, str, eq) \
+ _(ISNES, var, ___, str, eq) \
+ _(ISEQN, var, ___, num, eq) \
+ _(ISNEN, var, ___, num, eq) \
+ _(ISEQP, var, ___, pri, eq) \
+ _(ISNEP, var, ___, pri, eq) \
+ \
+ /* Unary test and copy ops. */ \
+ _(ISTC, dst, ___, var, ___) \
+ _(ISFC, dst, ___, var, ___) \
+ _(IST, ___, ___, var, ___) \
+ _(ISF, ___, ___, var, ___) \
+ \
+ /* Unary ops. */ \
+ _(MOV, dst, ___, var, ___) \
+ _(NOT, dst, ___, var, ___) \
+ _(UNM, dst, ___, var, unm) \
+ _(LEN, dst, ___, var, len) \
+ \
+ /* Binary ops. ORDER OPR. VV last, POW must be next. */ \
+ _(ADDVN, dst, var, num, add) \
+ _(SUBVN, dst, var, num, sub) \
+ _(MULVN, dst, var, num, mul) \
+ _(DIVVN, dst, var, num, div) \
+ _(MODVN, dst, var, num, mod) \
+ \
+ _(ADDNV, dst, var, num, add) \
+ _(SUBNV, dst, var, num, sub) \
+ _(MULNV, dst, var, num, mul) \
+ _(DIVNV, dst, var, num, div) \
+ _(MODNV, dst, var, num, mod) \
+ \
+ _(ADDVV, dst, var, var, add) \
+ _(SUBVV, dst, var, var, sub) \
+ _(MULVV, dst, var, var, mul) \
+ _(DIVVV, dst, var, var, div) \
+ _(MODVV, dst, var, var, mod) \
+ \
+ _(POW, dst, var, var, pow) \
+ _(CAT, dst, rbase, rbase, concat) \
+ \
+ /* Constant ops. */ \
+ _(KSTR, dst, ___, str, ___) \
+ _(KSHORT, dst, ___, lits, ___) \
+ _(KNUM, dst, ___, num, ___) \
+ _(KPRI, dst, ___, pri, ___) \
+ _(KNIL, base, ___, base, ___) \
+ \
+ /* Upvalue and function ops. */ \
+ _(UGET, dst, ___, uv, ___) \
+ _(USETV, uv, ___, var, ___) \
+ _(USETS, uv, ___, str, ___) \
+ _(USETN, uv, ___, num, ___) \
+ _(USETP, uv, ___, pri, ___) \
+ _(UCLO, rbase, ___, jump, ___) \
+ _(FNEW, dst, ___, func, gc) \
+ \
+ /* Table ops. */ \
+ _(TNEW, dst, ___, lit, gc) \
+ _(TDUP, dst, ___, tab, gc) \
+ _(GGET, dst, ___, str, index) \
+ _(GSET, var, ___, str, newindex) \
+ _(TGETV, dst, var, var, index) \
+ _(TGETS, dst, var, str, index) \
+ _(TGETB, dst, var, lit, index) \
+ _(TSETV, var, var, var, newindex) \
+ _(TSETS, var, var, str, newindex) \
+ _(TSETB, var, var, lit, newindex) \
+ _(TSETM, base, ___, num, newindex) \
+ \
+ /* Calls and vararg handling. T = tail call. */ \
+ _(CALLM, base, lit, lit, call) \
+ _(CALL, base, lit, lit, call) \
+ _(CALLMT, base, ___, lit, call) \
+ _(CALLT, base, ___, lit, call) \
+ _(ITERC, base, lit, lit, call) \
+ _(VARG, base, lit, lit, ___) \
+ \
+ /* Returns. */ \
+ _(RETM, base, ___, lit, ___) \
+ _(RET, rbase, ___, lit, ___) \
+ _(RET0, rbase, ___, lit, ___) \
+ _(RET1, rbase, ___, lit, ___) \
+ \
+ /* Loops and branches. I/J = interp/JIT, I/C/L = init/call/loop. */ \
+ _(FORI, base, ___, jump, ___) \
+ _(JFORI, base, ___, jump, ___) \
+ \
+ _(FORL, base, ___, jump, ___) \
+ _(IFORL, base, ___, jump, ___) \
+ _(JFORL, base, ___, lit, ___) \
+ \
+ _(ITERL, base, ___, jump, ___) \
+ _(IITERL, base, ___, jump, ___) \
+ _(JITERL, base, ___, lit, ___) \
+ \
+ _(LOOP, rbase, ___, jump, ___) \
+ _(ILOOP, rbase, ___, jump, ___) \
+ _(JLOOP, rbase, ___, lit, ___) \
+ \
+ _(JMP, rbase, ___, jump, ___)
+
+/* Bytecode opcode numbers. */
+typedef enum {
+#define BCENUM(name, ma, mb, mc, mt) BC_##name,
+BCDEF(BCENUM)
+#undef BCENUM
+ BC__MAX
+} BCOp;
+
+LJ_STATIC_ASSERT((int)BC_ISEQV+1 == (int)BC_ISNEV);
+LJ_STATIC_ASSERT(((int)BC_ISEQV^1) == (int)BC_ISNEV);
+LJ_STATIC_ASSERT(((int)BC_ISEQS^1) == (int)BC_ISNES);
+LJ_STATIC_ASSERT(((int)BC_ISEQN^1) == (int)BC_ISNEN);
+LJ_STATIC_ASSERT(((int)BC_ISEQP^1) == (int)BC_ISNEP);
+LJ_STATIC_ASSERT(((int)BC_ISLT^1) == (int)BC_ISGE);
+LJ_STATIC_ASSERT(((int)BC_ISLE^1) == (int)BC_ISGT);
+LJ_STATIC_ASSERT(((int)BC_ISLT^3) == (int)BC_ISGT);
+LJ_STATIC_ASSERT((int)BC_IST-(int)BC_ISTC == (int)BC_ISF-(int)BC_ISFC);
+LJ_STATIC_ASSERT((int)BC_CALLT-(int)BC_CALL == (int)BC_CALLMT-(int)BC_CALLM);
+LJ_STATIC_ASSERT((int)BC_CALLMT + 1 == (int)BC_CALLT);
+LJ_STATIC_ASSERT((int)BC_RETM + 1 == (int)BC_RET);
+LJ_STATIC_ASSERT((int)BC_FORL + 1 == (int)BC_IFORL);
+LJ_STATIC_ASSERT((int)BC_FORL + 2 == (int)BC_JFORL);
+LJ_STATIC_ASSERT((int)BC_ITERL + 1 == (int)BC_IITERL);
+LJ_STATIC_ASSERT((int)BC_ITERL + 2 == (int)BC_JITERL);
+LJ_STATIC_ASSERT((int)BC_LOOP + 1 == (int)BC_ILOOP);
+LJ_STATIC_ASSERT((int)BC_LOOP + 2 == (int)BC_JLOOP);
+
+/* Stack slots used by FORI/FORL, relative to operand A. */
+enum {
+ FORL_IDX, FORL_STOP, FORL_STEP, FORL_EXT
+};
+
+/* Bytecode operand modes. ORDER BCMode */
+typedef enum {
+ BCMnone, BCMdst, BCMbase, BCMvar, BCMrbase, BCMuv, /* Mode A must be <= 7 */
+ BCMlit, BCMlits, BCMpri, BCMnum, BCMstr, BCMtab, BCMfunc, BCMjump,
+ BCM_max
+} BCMode;
+#define BCM___ BCMnone
+
+#define bcmode_a(op) (cast(BCMode, lj_bc_mode[op] & 7))
+#define bcmode_b(op) (cast(BCMode, (lj_bc_mode[op]>>3) & 15))
+#define bcmode_c(op) (cast(BCMode, (lj_bc_mode[op]>>7) & 15))
+#define bcmode_d(op) bcmode_c(op)
+#define bcmode_hasd(op) ((lj_bc_mode[op] & (15<<3)) == (BCMnone<<3))
+#define bcmode_mm(op) (cast(MMS, lj_bc_mode[op]>>11))
+
+#define BCMODE(name, ma, mb, mc, mm) \
+ (BCM##ma|(BCM##mb<<3)|(BCM##mc<<7)|(MM_##mm<<11)),
+
+LJ_DATA const uint16_t lj_bc_mode[BC__MAX+1];
+
+#endif
diff --git a/src/lj_ctype.c b/src/lj_ctype.c
new file mode 100644
index 00000000..9f19b879
--- /dev/null
+++ b/src/lj_ctype.c
@@ -0,0 +1,44 @@
+/*
+** Internal CTYPE replacement.
+** Donated to the public domain.
+**
+** This is intended to replace the problematic libc single-byte NLS functions.
+** These just don't make sense anymore with UTF-8 locales becoming the norm
+** on POSIX systems. It never worked too well on Windows systems since hardly
+** anyone bothered to call setlocale().
+**
+** Instead this table is hardcoded for ASCII, except for identifiers. These
+** include the characters 128-255, too. This allows for the use of all
+** non-ASCII chars as identifiers in the lexer. This is a broad definition,
+** but works well in practice for both UTF-8 locales and most single-byte
+** locales (such as ISO-8859-*).
+**
+** If you really need proper ctypes for UTF-8 strings, please use an add-on
+** library such as slnunicode: http://luaforge.net/projects/sln/
+*/
+
+#define lj_ctype_c
+#define LUA_CORE
+
+#include "lj_ctype.h"
+
+LJ_DATADEF const uint8_t lj_ctype_bits[257] = {
+ 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+ 152,152,152,152,152,152,152,152,152,152, 4, 4, 4, 4, 4, 4,
+ 4,176,176,176,176,176,176,160,160,160,160,160,160,160,160,160,
+ 160,160,160,160,160,160,160,160,160,160,160, 4, 4, 4, 4,132,
+ 4,208,208,208,208,208,208,192,192,192,192,192,192,192,192,192,
+ 192,192,192,192,192,192,192,192,192,192,192, 4, 4, 4, 4, 1,
+ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,
+ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,
+ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,
+ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,
+ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,
+ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,
+ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,
+ 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
+};
+
diff --git a/src/lj_ctype.h b/src/lj_ctype.h
new file mode 100644
index 00000000..c4cdff84
--- /dev/null
+++ b/src/lj_ctype.h
@@ -0,0 +1,40 @@
+/*
+** Internal CTYPE replacement.
+** Donated to the public domain.
+*/
+
+#ifndef _LJ_CTYPE_H
+#define _LJ_CTYPE_H
+
+#include "lj_def.h"
+
+#define LJ_CTYPE_CNTRL 0x01
+#define LJ_CTYPE_SPACE 0x02
+#define LJ_CTYPE_PUNCT 0x04
+#define LJ_CTYPE_DIGIT 0x08
+#define LJ_CTYPE_XDIGIT 0x10
+#define LJ_CTYPE_UPPER 0x20
+#define LJ_CTYPE_LOWER 0x40
+#define LJ_CTYPE_IDENT 0x80
+#define LJ_CTYPE_ALPHA (LJ_CTYPE_LOWER|LJ_CTYPE_UPPER)
+#define LJ_CTYPE_ALNUM (LJ_CTYPE_ALPHA|LJ_CTYPE_DIGIT)
+
+/* Only pass -1 or 0..255 to these macros. Never pass a signed char! */
+#define lj_ctype_isa(c, t) (lj_ctype_bits[(c)+1] & t)
+#define lj_ctype_iscntrl(c) lj_ctype_isa((c), LJ_CTYPE_CNTRL)
+#define lj_ctype_isspace(c) lj_ctype_isa((c), LJ_CTYPE_SPACE)
+#define lj_ctype_ispunct(c) lj_ctype_isa((c), LJ_CTYPE_PUNCT)
+#define lj_ctype_isdigit(c) lj_ctype_isa((c), LJ_CTYPE_DIGIT)
+#define lj_ctype_isxdigit(c) lj_ctype_isa((c), LJ_CTYPE_XDIGIT)
+#define lj_ctype_isupper(c) lj_ctype_isa((c), LJ_CTYPE_UPPER)
+#define lj_ctype_islower(c) lj_ctype_isa((c), LJ_CTYPE_LOWER)
+#define lj_ctype_isident(c) lj_ctype_isa((c), LJ_CTYPE_IDENT)
+#define lj_ctype_isalpha(c) lj_ctype_isa((c), LJ_CTYPE_ALPHA)
+#define lj_ctype_isalnum(c) lj_ctype_isa((c), LJ_CTYPE_ALNUM)
+
+#define lj_ctype_toupper(c) ((c) - (lj_ctype_islower(c) >> 1))
+#define lj_ctype_tolower(c) ((c) + lj_ctype_isupper(c))
+
+LJ_DATA const uint8_t lj_ctype_bits[257];
+
+#endif
diff --git a/src/lj_def.h b/src/lj_def.h
new file mode 100644
index 00000000..dbfd5bf5
--- /dev/null
+++ b/src/lj_def.h
@@ -0,0 +1,226 @@
+/*
+** LuaJIT common internal definitions.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_DEF_H
+#define _LJ_DEF_H
+
+#include "lua.h"
+
+#ifdef _MSC_VER
+/* MSVC is stuck in the last century and doesn't have C99's stdint.h. */
+typedef __int8 int8_t;
+typedef __int16 int16_t;
+typedef __int32 int32_t;
+typedef __int64 int64_t;
+typedef unsigned __int8 uint8_t;
+typedef unsigned __int16 uint16_t;
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+#ifdef _WIN64
+typedef __int64 intptr_t;
+typedef unsigned __int64 uintptr_t;
+#else
+typedef __int32 intptr_t;
+typedef unsigned __int32 uintptr_t;
+#endif
+#else
+#include <stdint.h>
+#endif
+
+/* Needed everywhere. */
+#include <string.h>
+#include <stdlib.h>
+
+/* Various VM limits. */
+#define LJ_MAX_MEM 0x7fffff00 /* Max. total memory allocation. */
+#define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */
+#define LJ_MAX_STR LJ_MAX_MEM /* Max. string length. */
+#define LJ_MAX_UDATA LJ_MAX_MEM /* Max. userdata length. */
+
+#define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */
+#define LJ_MAX_HBITS 26 /* Max. hash bits. */
+#define LJ_MAX_ABITS 28 /* Max. bits of array key. */
+#define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */
+#define LJ_MAX_COLOSIZE 16 /* Max. elems for colocated array. */
+
+#define LJ_MAX_LINE LJ_MAX_MEM /* Max. source code line number. */
+#define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */
+#define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */
+#define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */
+#define LJ_MAX_LOCVAR 200 /* Max. # of local variables. */
+#define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */
+
+#define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */
+#define LJ_STACK_EXTRA 5 /* Extra stack space (metamethods). */
+
+/* Minimum table/buffer sizes. */
+#define LJ_MIN_GLOBAL 6 /* Min. global table size (hbits). */
+#define LJ_MIN_REGISTRY 2 /* Min. registry size (hbits). */
+#define LJ_MIN_STRTAB 256 /* Min. string table size (pow2). */
+#define LJ_MIN_SBUF 32 /* Min. string buffer length. */
+#define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */
+#define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */
+#define LJ_MIN_KNUMSZ 16 /* Min. size for chained KNUM array. */
+
+/* JIT compiler limits. */
+#define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */
+#define LJ_MAX_PHI 32 /* Max. # of PHIs for a loop. */
+#define LJ_MAX_EXITSTUBGR 8 /* Max. # of exit stub groups. */
+
+/* Various macros. */
+#ifndef UNUSED
+#define UNUSED(x) ((void)(x)) /* to avoid warnings */
+#endif
+
+#ifndef cast
+#define cast(t, exp) ((t)(exp))
+#endif
+
+#define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo)
+#define cast_byte(i) cast(uint8_t, (i))
+#define cast_num(i) cast(lua_Number, (i))
+#define cast_int(i) cast(int, (i))
+#define i32ptr(p) ((int32_t)(intptr_t)(void *)(p))
+#define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p))
+
+#define checki8(x) ((x) == (int32_t)(int8_t)(x))
+#define checku8(x) ((x) == (int32_t)(uint8_t)(x))
+#define checki16(x) ((x) == (int32_t)(int16_t)(x))
+
+/* Every half-decent C compiler transforms this into a rotate instruction. */
+#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(32-(n))))
+#define lj_ror(x, n) (((x)<<(32-(n))) | ((x)>>(n)))
+
+/* A really naive Bloom filter. But sufficient for our needs. */
+typedef uintptr_t BloomFilter;
+#define BLOOM_MASK (8*sizeof(BloomFilter) - 1)
+#define bloombit(x) ((uintptr_t)1 << ((x) & BLOOM_MASK))
+#define bloomset(b, x) ((b) |= bloombit((x)))
+#define bloomtest(b, x) ((b) & bloombit((x)))
+
+#if defined(__GNUC__)
+
+#if (__GNUC__ < 3) || ((__GNUC__ == 3) && __GNUC_MINOR__ < 4)
+#error "sorry, need GCC 3.4 or newer"
+#endif
+
+#define LJ_NORET __attribute__((noreturn))
+#define LJ_ALIGN(n) __attribute__((aligned(n)))
+#define LJ_INLINE inline
+#define LJ_AINLINE inline __attribute__((always_inline))
+#define LJ_NOINLINE __attribute__((noinline))
+
+#if defined(__ELF__) || defined(__MACH__)
+#define LJ_NOAPI extern __attribute__((visibility("hidden")))
+#endif
+
+/* Note: it's only beneficial to use fastcall on x86 and then only for up to
+** two non-FP args. The amalgamated compile covers all LJ_FUNC cases. Only
+** indirect calls and related tail-called C functions are marked as fastcall.
+*/
+#if defined(__i386__)
+#define LJ_FASTCALL __attribute__((fastcall))
+#endif
+
+#define LJ_LIKELY(x) __builtin_expect(!!(x), 1)
+#define LJ_UNLIKELY(x) __builtin_expect(!!(x), 0)
+
+#define lj_ffs(x) ((uint32_t)__builtin_ctz(x))
+/* Don't ask ... */
+#if defined(__INTEL_COMPILER) && (defined(__i386__) || defined(__x86_64__))
+static LJ_AINLINE uint32_t lj_fls(uint32_t x)
+{
+ uint32_t r; __asm__("bsrl %1, %0" : "=r" (r) : "rm" (x) : "cc"); return r;
+}
+#else
+#define lj_fls(x) ((uint32_t)(__builtin_clz(x)^31))
+#endif
+
+#if defined(__i386__) || defined(__x86_64__)
+static LJ_AINLINE uint32_t lj_bswap(uint32_t x)
+{
+ uint32_t r; __asm__("bswap %0" : "=r" (r) : "0" (x)); return r;
+}
+#else
+#error "missing define for lj_bswap()"
+#endif
+
+#elif defined(_MSC_VER)
+
+#define LJ_NORET __declspec(noreturn)
+#define LJ_ALIGN(n) __declspec(align(n))
+#define LJ_INLINE __inline
+#define LJ_AINLINE __forceinline
+#define LJ_NOINLINE __declspec(noinline)
+#if defined(_M_IX86)
+#define LJ_FASTCALL __fastcall
+#endif
+
+static LJ_AINLINE uint32_t lj_ffs(uint32_t x)
+{
+ uint32_t r; _BitScanForward(&r, x); return r;
+}
+
+static LJ_AINLINE uint32_t lj_fls(uint32_t x)
+{
+ uint32_t r; _BitScanReverse(&r, x); return r;
+}
+
+#define lj_bswap(x) (_byteswap_ulong((x)))
+
+#else
+#error "missing defines for your compiler"
+#endif
+
+/* Optional defines. */
+#ifndef LJ_FASTCALL
+#define LJ_FASTCALL
+#endif
+#ifndef LJ_NORET
+#define LJ_NORET
+#endif
+#ifndef LJ_NOAPI
+#define LJ_NOAPI extern
+#endif
+#ifndef LJ_LIKELY
+#define LJ_LIKELY(x) (x)
+#define LJ_UNLIKELY(x) (x)
+#endif
+
+/* Attributes for internal functions. */
+#if defined(ljamalg_c)
+#define LJ_DATA static
+#define LJ_DATADEF static
+#define LJ_FUNC static
+#define LJ_ASMF LJ_NOAPI
+#define LJ_FUNCA LJ_NOAPI
+#else
+#define LJ_DATA LJ_NOAPI
+#define LJ_DATADEF
+#define LJ_FUNC LJ_NOAPI
+#define LJ_ASMF LJ_NOAPI
+#define LJ_FUNCA LJ_NOAPI
+#endif
+#define LJ_FUNC_NORET LJ_FUNC LJ_NORET
+#define LJ_FUNCA_NORET LJ_FUNCA LJ_NORET
+#define LJ_ASMF_NORET LJ_ASMF LJ_NORET
+
+/* Runtime assertions. */
+#ifdef lua_assert
+#define check_exp(c, e) (lua_assert(c), (e))
+#define api_check(l, e) lua_assert(e)
+#else
+#define lua_assert(c) ((void)0)
+#define check_exp(c, e) (e)
+#define api_check luai_apicheck
+#endif
+
+/* Static assertions. */
+#define LJ_ASSERT_NAME2(name, line) name ## line
+#define LJ_ASSERT_NAME(line) LJ_ASSERT_NAME2(lj_assert_, line)
+#define LJ_STATIC_ASSERT(cond) \
+ extern void LJ_ASSERT_NAME(__LINE__)(int STATIC_ASSERTION_FAILED[(cond)?1:-1])
+
+#endif
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c
new file mode 100644
index 00000000..d2fce2e0
--- /dev/null
+++ b/src/lj_dispatch.c
@@ -0,0 +1,284 @@
+/*
+** Instruction dispatch handling.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_dispatch_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_state.h"
+#include "lj_frame.h"
+#include "lj_bc.h"
+#if LJ_HASJIT
+#include "lj_jit.h"
+#endif
+#include "lj_trace.h"
+#include "lj_dispatch.h"
+#include "lj_vm.h"
+#include "luajit.h"
+
+/* -- Dispatch table management ------------------------------------------- */
+
+/* Initialize instruction dispatch table and hot counters. */
+void lj_dispatch_init(GG_State *GG)
+{
+ uint32_t i;
+ ASMFunction *disp = GG->dispatch;
+ for (i = 0; i < BC__MAX; i++)
+ disp[GG_DISP_STATIC+i] = disp[i] = makeasmfunc(lj_vm_op_ofs[i]);
+ /* The JIT engine is off by default. luaopen_jit() turns it on. */
+ disp[BC_FORL] = disp[BC_IFORL];
+ disp[BC_ITERL] = disp[BC_IITERL];
+ disp[BC_LOOP] = disp[BC_ILOOP];
+}
+
+/* Update dispatch table depending on various flags. */
+void lj_dispatch_update(global_State *g)
+{
+ uint8_t oldmode = g->dispatchmode;
+ uint8_t mode = 0;
+#if LJ_HASJIT
+ mode |= (G2J(g)->flags & JIT_F_ON) ? 1 : 0;
+ mode |= G2J(g)->state != LJ_TRACE_IDLE ? 6 : 0;
+#endif
+ mode |= (g->hookmask & HOOK_EVENTMASK) ? 2 : 0;
+ if (oldmode != mode) { /* Mode changed? */
+ ASMFunction *disp = G2GG(g)->dispatch;
+ ASMFunction f_forl, f_iterl, f_loop;
+ g->dispatchmode = mode;
+ if ((mode & 5) == 1) { /* Hotcount if JIT is on, but not when recording. */
+ f_forl = makeasmfunc(lj_vm_op_ofs[BC_FORL]);
+ f_iterl = makeasmfunc(lj_vm_op_ofs[BC_ITERL]);
+ f_loop = makeasmfunc(lj_vm_op_ofs[BC_LOOP]);
+ } else { /* Otherwise use the non-hotcounting instructions. */
+ f_forl = disp[GG_DISP_STATIC+BC_IFORL];
+ f_iterl = disp[GG_DISP_STATIC+BC_IITERL];
+ f_loop = disp[GG_DISP_STATIC+BC_ILOOP];
+ }
+ /* Set static loop ins first (may be copied below). */
+ disp[GG_DISP_STATIC+BC_FORL] = f_forl;
+ disp[GG_DISP_STATIC+BC_ITERL] = f_iterl;
+ disp[GG_DISP_STATIC+BC_LOOP] = f_loop;
+ if ((oldmode & 6) != (mode & 6)) { /* Need to change whole table? */
+ if ((mode & 6) == 0) { /* No hooks and no recording? */
+ /* Copy static dispatch table to dynamic dispatch table. */
+ memcpy(&disp[0], &disp[GG_DISP_STATIC], sizeof(ASMFunction)*BC__MAX);
+ } else {
+ /* The recording dispatch also checks for hooks. */
+ ASMFunction f = (mode & 6) == 6 ? lj_vm_record : lj_vm_hook;
+ uint32_t i;
+ for (i = 0; i < BC__MAX; i++)
+ disp[i] = f;
+ }
+ } else if ((mode & 6) == 0) { /* Fix dynamic loop ins unless overriden. */
+ disp[BC_FORL] = f_forl;
+ disp[BC_ITERL] = f_iterl;
+ disp[BC_LOOP] = f_loop;
+ }
+ }
+}
+
+/* -- JIT mode setting ---------------------------------------------------- */
+
+#if LJ_HASJIT
+/* Set JIT mode for a single prototype. */
+static void setptmode(global_State *g, GCproto *pt, int mode)
+{
+ if ((mode & LUAJIT_MODE_ON)) { /* (Re-)enable JIT compilation. */
+ pt->flags &= ~PROTO_NO_JIT;
+ lj_trace_reenableproto(pt); /* Unpatch all ILOOP etc. bytecodes. */
+ } else { /* Flush and/or disable JIT compilation. */
+ if (!(mode & LUAJIT_MODE_FLUSH))
+ pt->flags |= PROTO_NO_JIT;
+ lj_trace_flushproto(g, pt); /* Flush all traces of prototype. */
+ }
+}
+
+/* Recursively set the JIT mode for all children of a prototype. */
+static void setptmode_all(global_State *g, GCproto *pt, int mode)
+{
+ ptrdiff_t i;
+ for (i = -(ptrdiff_t)pt->sizekgc; i < 0; i++) {
+ GCobj *o = gcref(pt->k.gc[i]);
+ if (o->gch.gct == ~LJ_TPROTO) {
+ setptmode(g, gco2pt(o), mode);
+ setptmode_all(g, gco2pt(o), mode);
+ }
+ }
+}
+#endif
+
+/* Public API function: control the JIT engine. */
+int luaJIT_setmode(lua_State *L, int idx, int mode)
+{
+ global_State *g = G(L);
+ int mm = mode & LUAJIT_MODE_MASK;
+ lj_trace_abort(g); /* Abort recording on any state change. */
+ /* Avoid pulling the rug from under our own feet. */
+ if ((g->hookmask & HOOK_GC))
+ lj_err_caller(L, LJ_ERR_NOGCMM);
+ switch (mm) {
+#if LJ_HASJIT
+ case LUAJIT_MODE_ENGINE:
+ if ((mode & LUAJIT_MODE_FLUSH)) {
+ lj_trace_flushall(L);
+ } else {
+ if ((mode & LUAJIT_MODE_ON))
+ G2J(g)->flags |= (uint32_t)JIT_F_ON;
+ else
+ G2J(g)->flags &= ~(uint32_t)JIT_F_ON;
+ lj_dispatch_update(g);
+ }
+ break;
+ case LUAJIT_MODE_FUNC:
+ case LUAJIT_MODE_ALLFUNC:
+ case LUAJIT_MODE_ALLSUBFUNC: {
+ cTValue *tv = idx == 0 ? frame_prev(L->base-1) :
+ idx > 0 ? L->base + (idx-1) : L->top + idx;
+ GCproto *pt;
+ if ((idx == 0 || tvisfunc(tv)) && isluafunc(&gcval(tv)->fn))
+ pt = funcproto(&gcval(tv)->fn); /* Cannot use funcV() for frame slot. */
+ else if (tvisproto(tv))
+ pt = protoV(tv);
+ else
+ return 0; /* Failed. */
+ if (mm != LUAJIT_MODE_ALLSUBFUNC)
+ setptmode(g, pt, mode);
+ if (mm != LUAJIT_MODE_FUNC)
+ setptmode_all(g, pt, mode);
+ break;
+ }
+ case LUAJIT_MODE_TRACE:
+ if (!(mode & LUAJIT_MODE_FLUSH))
+ return 0; /* Failed. */
+ lj_trace_flush(G2J(g), idx);
+ break;
+#else
+ case LUAJIT_MODE_ENGINE:
+ case LUAJIT_MODE_FUNC:
+ case LUAJIT_MODE_ALLFUNC:
+ case LUAJIT_MODE_ALLSUBFUNC:
+ UNUSED(idx);
+ if ((mode & LUAJIT_MODE_ON))
+ return 0; /* Failed. */
+ break;
+#endif
+ default:
+ return 0; /* Failed. */
+ }
+ return 1; /* OK. */
+}
+
+/* Enforce (dynamic) linker error for version mismatches. See luajit.c. */
+LUA_API void LUAJIT_VERSION_SYM(void)
+{
+}
+
+/* -- Hooks --------------------------------------------------------------- */
+
+/* This function can be called asynchronously (e.g. during a signal). */
+LUA_API int lua_sethook(lua_State *L, lua_Hook func, int mask, int count)
+{
+ global_State *g = G(L);
+ mask &= HOOK_EVENTMASK;
+ if (func == NULL || mask == 0) { mask = 0; func = NULL; } /* Consistency. */
+ g->hookf = func;
+ g->hookcount = g->hookcstart = (int32_t)count;
+ g->hookmask = (uint8_t)((g->hookmask & ~HOOK_EVENTMASK) | mask);
+ lj_trace_abort(g); /* Abort recording on any hook change. */
+ lj_dispatch_update(g);
+ return 1;
+}
+
+LUA_API lua_Hook lua_gethook(lua_State *L)
+{
+ return G(L)->hookf;
+}
+
+LUA_API int lua_gethookmask(lua_State *L)
+{
+ return G(L)->hookmask & HOOK_EVENTMASK;
+}
+
+LUA_API int lua_gethookcount(lua_State *L)
+{
+ return (int)G(L)->hookcstart;
+}
+
+/* Call a hook. */
+static void callhook(lua_State *L, int event, BCLine line)
+{
+ global_State *g = G(L);
+ lua_Hook hookf = g->hookf;
+ if (hookf && !hook_active(g)) {
+ lua_Debug ar;
+ lj_trace_abort(g); /* Abort recording on any hook call. */
+ ar.event = event;
+ ar.currentline = line;
+ ar.i_ci = cast_int((L->base-1) - L->stack); /* Top frame, nextframe=NULL. */
+ lj_state_checkstack(L, 1+LUA_MINSTACK);
+ hook_enter(g);
+ hookf(L, &ar);
+ lua_assert(hook_active(g));
+ hook_leave(g);
+ }
+}
+
+/* -- Instruction dispatch callbacks -------------------------------------- */
+
+/* Calculate number of used stack slots in the current frame. */
+static BCReg cur_topslot(GCproto *pt, const BCIns *pc, uint32_t nres)
+{
+ BCIns ins = pc[-1];
+ for (;;) {
+ switch (bc_op(ins)) {
+ case BC_UCLO: ins = pc[bc_j(ins)]; break;
+ case BC_CALLM:
+ case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1;
+ case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1;
+ case BC_TSETM: return bc_a(ins) + nres-1;
+ default: return pt->framesize;
+ }
+ }
+}
+
+/* Instruction dispatch callback for instr/line hooks or when recording. */
+void lj_dispatch_ins(lua_State *L, const BCIns *pc, uint32_t nres)
+{
+ GCfunc *fn = curr_func(L);
+ GCproto *pt = funcproto(fn);
+ BCReg slots = cur_topslot(pt, pc, nres);
+ global_State *g = G(L);
+ const BCIns *oldpc = cframe_Lpc(L);
+ cframe_Lpc(L) = pc;
+ L->top = L->base + slots; /* Fix top. */
+#if LJ_HASJIT
+ {
+ jit_State *J = G2J(g);
+ if (J->state != LJ_TRACE_IDLE) {
+ J->L = L;
+ J->pc = pc-1;
+ J->fn = fn;
+ J->pt = pt;
+ lj_trace_ins(J);
+ }
+ }
+#endif
+ if ((g->hookmask & LUA_MASKCOUNT) && g->hookcount == 0) {
+ g->hookcount = g->hookcstart;
+ callhook(L, LUA_HOOKCOUNT, -1);
+ }
+ if ((g->hookmask & LUA_MASKLINE) && pt->lineinfo) {
+ BCPos npc = (BCPos)(pc - pt->bc)-1;
+ BCPos opc = (BCPos)(oldpc - pt->bc)-1;
+ BCLine line = pt->lineinfo[npc];
+ if (npc == 0 || pc <= oldpc ||
+ opc >= pt->sizebc || line != pt->lineinfo[opc]) {
+ L->top = L->base + slots; /* Fix top again after instruction hook. */
+ callhook(L, LUA_HOOKLINE, line);
+ }
+ }
+}
+
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
new file mode 100644
index 00000000..298aa166
--- /dev/null
+++ b/src/lj_dispatch.h
@@ -0,0 +1,64 @@
+/*
+** Instruction dispatch handling.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_DISPATCH_H
+#define _LJ_DISPATCH_H
+
+#include "lj_obj.h"
+#include "lj_bc.h"
+#if LJ_HASJIT
+#include "lj_jit.h"
+#endif
+
+/* Type of hot counter. Must match the code in the assembler VM. */
+/* 16 bits are sufficient. Only 0.0015% overhead with maximum slot penalty. */
+typedef uint16_t HotCount;
+
+/* Number of hot counter hash table entries (must be a power of two). */
+#define HOTCOUNT_SIZE 64
+#define HOTCOUNT_PCMASK ((HOTCOUNT_SIZE-1)*sizeof(HotCount))
+#define HOTCOUNT_MIN_PENALTY 103
+#define HOTCOUNT_MAX_PENALTY 60000
+
+/* Global state, main thread and extra fields are allocated together. */
+typedef struct GG_State {
+ lua_State L; /* Main thread. */
+ global_State g; /* Global state. */
+#if LJ_HASJIT
+ jit_State J; /* JIT state. */
+ HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */
+#endif
+ ASMFunction dispatch[2*BC__MAX]; /* Instruction dispatch tables. */
+} GG_State;
+
+#define GG_DISP_STATIC BC__MAX
+
+#define GG_OFS(field) ((int)offsetof(GG_State, field))
+#define G2GG(gl) \
+ ((GG_State *)(((char *)(gl))-((char *)(&((GG_State *)0)->g))))
+#define J2GG(j) \
+ ((GG_State *)(((char *)(j))-((char *)(&((GG_State *)0)->J))))
+#define L2GG(L) G2GG(G(L))
+#define J2G(J) (&J2GG(J)->g)
+#define G2J(gl) (&G2GG(gl)->J)
+#define L2J(L) (&L2GG(L)->J)
+#define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g))
+#define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch))
+#define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch))
+#define GG_DISP2HOT (GG_OFS(hotcount) - GG_OFS(dispatch))
+
+#define hotcount_get(gg, pc) \
+ (gg)->hotcount[(u32ptr(pc)>>2) & (HOTCOUNT_SIZE-1)]
+#define hotcount_set(gg, pc, val) \
+ (hotcount_get((gg), (pc)) = (HotCount)(val))
+
+/* Dispatch table management. */
+LJ_FUNC void lj_dispatch_init(GG_State *GG);
+LJ_FUNC void lj_dispatch_update(global_State *g);
+
+/* Instruction dispatch callback for instr/line hooks or when recording. */
+LJ_FUNCA void lj_dispatch_ins(lua_State *L, const BCIns *pc, uint32_t nres);
+
+#endif
diff --git a/src/lj_err.c b/src/lj_err.c
new file mode 100644
index 00000000..a723af48
--- /dev/null
+++ b/src/lj_err.c
@@ -0,0 +1,763 @@
+/*
+** Error handling and debugging API.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#define lj_err_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_func.h"
+#include "lj_state.h"
+#include "lj_frame.h"
+#include "lj_bc.h"
+#include "lj_trace.h"
+#include "lj_vm.h"
+
+/* -- Error messages ------------------------------------------------------ */
+
+/* Error message strings. */
+static const char *lj_err_allmsg =
+#define ERRDEF(name, msg) msg "\0"
+#include "lj_errmsg.h"
+;
+
+#define err2msg(em) (lj_err_allmsg+(int)(em))
+
+/* -- Frame and function introspection ------------------------------------ */
+
+static BCPos currentpc(lua_State *L, GCfunc *fn, cTValue *nextframe)
+{
+ const BCIns *ins;
+ lua_assert(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD);
+ if (!isluafunc(fn)) { /* Cannot derive a PC for non-Lua functions. */
+ return ~(BCPos)0;
+ } else if (nextframe == NULL) { /* Lua function on top. */
+ ins = cframe_Lpc(L); /* Only happens during error/hook handling. */
+ } else {
+ if (frame_islua(nextframe)) {
+ ins = frame_pc(nextframe);
+ } else if (frame_iscont(nextframe)) {
+ ins = frame_contpc(nextframe);
+ } else {
+ /* Lua function below errfunc/gc/hook: find cframe to get the PC. */
+ void *cf = cframe_raw(L->cframe);
+ TValue *f = L->base-1;
+ while (f > nextframe) {
+ if (frame_islua(f)) {
+ f = frame_prevl(f);
+ } else {
+ if (frame_isc(f))
+ cf = cframe_raw(cframe_prev(cf));
+ f = frame_prevd(f);
+ }
+ }
+ if (cframe_prev(cf))
+ cf = cframe_raw(cframe_prev(cf));
+ ins = cframe_pc(cf);
+ }
+ }
+ return (BCPos)((ins - funcproto(fn)->bc) - 1);
+}
+
+static BCLine currentline(lua_State *L, GCfunc *fn, cTValue *nextframe)
+{
+ BCPos pc = currentpc(L, fn, nextframe);
+ if (pc != ~(BCPos)0) {
+ GCproto *pt = funcproto(fn);
+ lua_assert(pc < pt->sizebc);
+ return pt->lineinfo ? pt->lineinfo[pc] : 0;
+ } else {
+ return -1;
+ }
+}
+
+static const char *getvarname(const GCproto *pt, BCPos pc, BCReg slot)
+{
+ MSize i;
+ for (i = 0; i < pt->sizevarinfo && pt->varinfo[i].startpc <= pc; i++)
+ if (pc < pt->varinfo[i].endpc && slot-- == 0)
+ return strdata(pt->varinfo[i].name);
+ return NULL;
+}
+
+static const char *getobjname(GCproto *pt, const BCIns *ip, BCReg slot,
+ const char **name)
+{
+ const char *lname;
+restart:
+ lname = getvarname(pt, (BCPos)(ip - pt->bc), slot);
+ if (lname != NULL) { *name = lname; return "local"; }
+ while (--ip >= pt->bc) {
+ BCIns ins = *ip;
+ BCOp op = bc_op(ins);
+ BCReg ra = bc_a(ins);
+ if (bcmode_a(op) == BCMbase) {
+ if (slot >= ra && (op != BC_KNIL || slot <= bc_d(ins)))
+ return NULL;
+ } else if (bcmode_a(op) == BCMdst && ra == slot) {
+ switch (bc_op(ins)) {
+ case BC_MOV:
+ if (ra == slot) { slot = bc_d(ins); goto restart; }
+ break;
+ case BC_GGET:
+ *name = strdata(gco2str(gcref(pt->k.gc[~bc_d(ins)])));
+ return "global";
+ case BC_TGETS:
+ *name = strdata(gco2str(gcref(pt->k.gc[~bc_c(ins)])));
+ if (ip > pt->bc) {
+ BCIns insp = ip[-1];
+ if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1 &&
+ bc_d(insp) == bc_b(ins))
+ return "method";
+ }
+ return "field";
+ case BC_UGET:
+ *name = pt->uvname ? strdata(pt->uvname[bc_d(ins)]) : "?";
+ return "upvalue";
+ default:
+ return NULL;
+ }
+ }
+ }
+ return NULL;
+}
+
+static const char *getfuncname(lua_State *L, TValue *frame, const char **name)
+{
+ MMS mm;
+ const BCIns *ip;
+ TValue *pframe;
+ GCfunc *fn;
+ BCPos pc;
+ if (frame_isvarg(frame))
+ frame = frame_prevd(frame);
+ pframe = frame_prev(frame);
+ fn = frame_func(pframe);
+ pc = currentpc(L, fn, frame);
+ if (pc == ~(BCPos)0)
+ return NULL;
+ lua_assert(pc < funcproto(fn)->sizebc);
+ ip = &funcproto(fn)->bc[pc];
+ mm = bcmode_mm(bc_op(*ip));
+ if (mm == MM_call) {
+ BCReg slot = bc_a(*ip);
+ if (bc_op(*ip) == BC_ITERC) slot -= 3;
+ return getobjname(funcproto(fn), ip, slot, name);
+ } else if (mm != MM_MAX) {
+ *name = strdata(strref(G(L)->mmname[mm]));
+ return "metamethod";
+ } else {
+ return NULL;
+ }
+}
+
+void lj_err_pushloc(lua_State *L, GCproto *pt, BCPos pc)
+{
+ GCstr *name = pt->chunkname;
+ if (name) {
+ const char *s = strdata(name);
+ MSize i, len = name->len;
+ BCLine line;
+ if (pc)
+ line = pt->lineinfo ? pt->lineinfo[pc-1] : 0;
+ else
+ line = pt->linedefined;
+ if (*s == '@') {
+ s++; len--;
+ for (i = len; i > 0; i--)
+ if (s[i] == '/' || s[i] == '\\') {
+ s += i+1;
+ break;
+ }
+ lj_str_pushf(L, "%s:%d", s, line);
+ } else if (len > 40) {
+ lj_str_pushf(L, "%p:%d", pt, line);
+ } else if (*s == '=') {
+ lj_str_pushf(L, "%s:%d", s+1, line);
+ } else {
+ lj_str_pushf(L, "\"%s\":%d", s, line);
+ }
+ } else {
+ lj_str_pushf(L, "%p:%u", pt, pc);
+ }
+}
+
+static void err_chunkid(char *out, const char *src)
+{
+ if (*src == '=') {
+ strncpy(out, src+1, LUA_IDSIZE); /* remove first char */
+ out[LUA_IDSIZE-1] = '\0'; /* ensures null termination */
+ } else if (*src == '@') { /* out = "source", or "...source" */
+ size_t l = strlen(++src); /* skip the `@' */
+ if (l >= LUA_IDSIZE) {
+ src += l-(LUA_IDSIZE-4); /* get last part of file name */
+ strcpy(out, "...");
+ out += 3;
+ }
+ strcpy(out, src);
+ } else { /* out = [string "string"] */
+ size_t len; /* Length, up to first control char. */
+ for (len = 0; len < LUA_IDSIZE-11; len++)
+ if (((const unsigned char *)src)[len] < ' ') break;
+ strcpy(out, "[string \""); out += 9;
+ if (src[len] != '\0') { /* must truncate? */
+ if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15;
+ strncpy(out, src, len); out += len;
+ strcpy(out, "..."); out += 3;
+ } else {
+ strcpy(out, src); out += len;
+ }
+ strcpy(out, "\"]");
+ }
+}
+
+/* -- Public debug API ---------------------------------------------------- */
+
+static TValue *findlocal(lua_State *L, const lua_Debug *ar,
+ const char **name, BCReg slot)
+{
+ uint32_t offset = (uint32_t)ar->i_ci & 0xffff;
+ uint32_t size = (uint32_t)ar->i_ci >> 16;
+ TValue *frame = L->stack + offset;
+ TValue *nextframe = size ? frame + size : NULL;
+ GCfunc *fn = frame_func(frame);
+ BCPos pc = currentpc(L, fn, nextframe);
+ if (pc != ~(BCPos)0 &&
+ (*name = getvarname(funcproto(fn), pc, slot-1)) != NULL)
+ ;
+ else if (slot > 0 && frame + slot < (nextframe ? nextframe : L->top))
+ *name = "(*temporary)";
+ else
+ *name = NULL;
+ return frame+slot;
+}
+
+LUA_API const char *lua_getlocal(lua_State *L, const lua_Debug *ar, int n)
+{
+ const char *name;
+ TValue *o = findlocal(L, ar, &name, (BCReg)n);
+ if (name) {
+ copyTV(L, L->top, o);
+ incr_top(L);
+ }
+ return name;
+}
+
+
+LUA_API const char *lua_setlocal(lua_State *L, const lua_Debug *ar, int n)
+{
+ const char *name;
+ TValue *o = findlocal(L, ar, &name, (BCReg)n);
+ if (name)
+ copyTV(L, o, L->top-1);
+ L->top--;
+ return name;
+}
+
+LUA_API int lua_getinfo(lua_State *L, const char *what, lua_Debug *ar)
+{
+ int status = 1;
+ TValue *frame = NULL;
+ TValue *nextframe = NULL;
+ GCfunc *fn;
+ if (*what == '>') {
+ TValue *func = L->top - 1;
+ api_check(L, tvisfunc(func));
+ fn = funcV(func);
+ L->top--;
+ what++;
+ } else {
+ uint32_t offset = (uint32_t)ar->i_ci & 0xffff;
+ uint32_t size = (uint32_t)ar->i_ci >> 16;
+ lua_assert(offset != 0);
+ frame = L->stack + offset;
+ if (size) nextframe = frame + size;
+ lua_assert(frame<=L->maxstack && (!nextframe || nextframe<=L->maxstack));
+ fn = frame_func(frame);
+ lua_assert(fn->c.gct == ~LJ_TFUNC);
+ }
+ for (; *what; what++) {
+ switch (*what) {
+ case 'S':
+ if (isluafunc(fn)) {
+ ar->source = strdata(funcproto(fn)->chunkname);
+ ar->linedefined = cast_int(funcproto(fn)->linedefined);
+ ar->lastlinedefined = cast_int(funcproto(fn)->lastlinedefined);
+ ar->what = (ar->linedefined == 0) ? "main" : "Lua";
+ } else {
+ ar->source = "=[C]";
+ ar->linedefined = -1;
+ ar->lastlinedefined = -1;
+ ar->what = "C";
+ }
+ err_chunkid(ar->short_src, ar->source);
+ break;
+ case 'l':
+ ar->currentline = frame ? currentline(L, fn, nextframe) : -1;
+ break;
+ case 'u':
+ ar->nups = fn->c.nupvalues;
+ break;
+ case 'n':
+ ar->namewhat = frame ? getfuncname(L, frame, &ar->name) : NULL;
+ if (ar->namewhat == NULL) {
+ ar->namewhat = "";
+ ar->name = NULL;
+ }
+ break;
+ case 'f':
+ setfuncV(L, L->top, fn);
+ incr_top(L);
+ break;
+ case 'L':
+ if (isluafunc(fn)) {
+ GCtab *t = lj_tab_new(L, 0, 0);
+ BCLine *lineinfo = funcproto(fn)->lineinfo;
+ uint32_t i, szl = funcproto(fn)->sizelineinfo;
+ for (i = 0; i < szl; i++)
+ setboolV(lj_tab_setint(L, t, lineinfo[i]), 1);
+ settabV(L, L->top, t);
+ } else {
+ setnilV(L->top);
+ }
+ incr_top(L);
+ break;
+ default:
+ status = 0; /* Bad option. */
+ break;
+ }
+ }
+ return status;
+}
+
+cTValue *lj_err_getframe(lua_State *L, int level, int *size)
+{
+ cTValue *frame, *nextframe;
+ /* Traverse frames backwards. */
+ for (nextframe = frame = L->base-1; frame > L->stack; ) {
+ if (frame_gc(frame) == obj2gco(L))
+ level++; /* Skip dummy frames. See lj_meta_call(). */
+ if (level-- == 0) {
+ *size = cast_int(nextframe - frame);
+ return frame; /* Level found. */
+ }
+ nextframe = frame;
+ if (frame_islua(frame)) {
+ frame = frame_prevl(frame);
+ } else {
+ if (frame_isvarg(frame))
+ level++; /* Skip vararg pseudo-frame. */
+ frame = frame_prevd(frame);
+ }
+ }
+ *size = level;
+ return NULL; /* Level not found. */
+}
+
+LUA_API int lua_getstack(lua_State *L, int level, lua_Debug *ar)
+{
+ int size;
+ cTValue *frame = lj_err_getframe(L, level, &size);
+ if (frame) {
+ ar->i_ci = (size << 16) + cast_int(frame - L->stack);
+ return 1;
+ } else {
+ ar->i_ci = level - size;
+ return 0;
+ }
+}
+
+/* -- Error handling ------------------------------------------------------ */
+
+/* Return string object for error message. */
+LJ_NOINLINE GCstr *lj_err_str(lua_State *L, ErrMsg em)
+{
+ return lj_str_newz(L, err2msg(em));
+}
+
+/* Unwind Lua stack and add error message on top. */
+LJ_NOINLINE static void unwindstack(lua_State *L, TValue *top, int errcode)
+{
+ lj_func_closeuv(L, top);
+ switch (errcode) {
+ case LUA_ERRMEM:
+ setstrV(L, top, lj_err_str(L, LJ_ERR_ERRMEM));
+ break;
+ case LUA_ERRERR:
+ setstrV(L, top, lj_err_str(L, LJ_ERR_ERRERR));
+ break;
+ case LUA_ERRSYNTAX:
+ case LUA_ERRRUN:
+ copyTV(L, top, L->top - 1);
+ break;
+ default:
+ lua_assert(0);
+ break;
+ }
+ L->top = top+1;
+ lj_state_relimitstack(L);
+}
+
+/* Throw error. Find catch frame, unwind stack and continue. */
+LJ_NOINLINE void lj_err_throw(lua_State *L, int errcode)
+{
+ TValue *frame = L->base-1;
+ void *cf = L->cframe;
+ global_State *g = G(L);
+ if (L->status == LUA_ERRERR+1) { /* Don't touch the stack during lua_open. */
+ lj_vm_unwind_c(cf, errcode);
+ goto uncaught; /* unreachable */
+ }
+ lj_trace_abort(g);
+ setgcrefnull(g->jit_L);
+ L->status = 0;
+ while (cf) {
+ if (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */
+ TValue *top = restorestack(L, -cframe_nres(cf));
+ if (frame < top) {
+ L->cframe = cframe_prev(cf);
+ L->base = frame+1;
+ unwindstack(L, top, errcode);
+ lj_vm_unwind_c(cf, errcode);
+ goto uncaught; /* unreachable */
+ }
+ }
+ if (frame <= L->stack)
+ break;
+ switch (frame_typep(frame)) {
+ case FRAME_LUA:
+ case FRAME_LUAP:
+ frame = frame_prevl(frame);
+ break;
+ case FRAME_C:
+ if (cframe_canyield(cf)) goto uncaught;
+ cf = cframe_prev(cf);
+ /* fallthrough */
+ case FRAME_CONT:
+ case FRAME_VARG:
+ frame = frame_prevd(frame);
+ break;
+ case FRAME_CP:
+ L->cframe = cframe_prev(cf);
+ L->base = frame_prevd(frame) + 1;
+ unwindstack(L, frame, errcode);
+ lj_vm_unwind_c(cf, errcode);
+ goto uncaught; /* unreachable */
+ case FRAME_PCALL:
+ hook_leave(g);
+ /* fallthrough */
+ case FRAME_PCALLH:
+ L->cframe = cf;
+ L->base = frame_prevd(frame) + 1;
+ unwindstack(L, L->base, errcode);
+ lj_vm_unwind_ff(cf);
+ goto uncaught; /* unreachable */
+ default:
+ lua_assert(0);
+ goto uncaught;
+ }
+ }
+ /* No catch frame found. Must be a resume or an unprotected error. */
+uncaught:
+ L->status = cast_byte(errcode);
+ L->cframe = NULL;
+ if (cframe_canyield(cf)) { /* Resume? */
+ unwindstack(L, L->top, errcode);
+ lj_vm_unwind_c(cf, errcode);
+ }
+ /* Better rethrow on main thread than panic. */
+ {
+ if (L != mainthread(g))
+ lj_err_throw(mainthread(g), errcode);
+ if (g->panic) {
+ L->base = L->stack+1;
+ unwindstack(L, L->base, errcode);
+ g->panic(L);
+ }
+ }
+ exit(EXIT_FAILURE);
+}
+
+/* Find error function for runtime errors. Requires an extra stack traversal. */
+static ptrdiff_t finderrfunc(lua_State *L)
+{
+ TValue *frame = L->base-1;
+ void *cf = L->cframe;
+ while (frame > L->stack) {
+ lua_assert(cf != NULL);
+ while (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */
+ if (frame >= restorestack(L, -cframe_nres(cf)))
+ break;
+ if (cframe_errfunc(cf) >= 0) /* Error handler not inherited (-1)? */
+ return cframe_errfunc(cf);
+ cf = cframe_prev(cf); /* Else unwind cframe and continue searching. */
+ if (cf == NULL)
+ return 0;
+ }
+ switch (frame_typep(frame)) {
+ case FRAME_LUA:
+ case FRAME_LUAP:
+ frame = frame_prevl(frame);
+ break;
+ case FRAME_C:
+ if (cframe_canyield(cf)) return 0;
+ cf = cframe_prev(cf);
+ /* fallthrough */
+ case FRAME_CONT:
+ case FRAME_VARG:
+ frame = frame_prevd(frame);
+ break;
+ case FRAME_CP:
+ if (cframe_errfunc(cf) >= 0)
+ return cframe_errfunc(cf);
+ frame = frame_prevd(frame);
+ break;
+ case FRAME_PCALL:
+ case FRAME_PCALLH:
+ if (frame_ftsz(frame) >= (ptrdiff_t)(2*sizeof(TValue))) /* xpcall? */
+ return savestack(L, frame-1); /* Point to xpcall's errorfunc. */
+ return 0;
+ default:
+ lua_assert(0);
+ return 0;
+ }
+ }
+ return 0;
+}
+
+/* Runtime error. */
+LJ_NOINLINE void lj_err_run(lua_State *L)
+{
+ ptrdiff_t ef = finderrfunc(L);
+ if (ef) {
+ TValue *errfunc = restorestack(L, ef);
+ TValue *top = L->top;
+ lj_trace_abort(G(L));
+ if (!tvisfunc(errfunc) || L->status == LUA_ERRERR)
+ lj_err_throw(L, LUA_ERRERR);
+ L->status = LUA_ERRERR;
+ copyTV(L, top, top-1);
+ copyTV(L, top-1, errfunc);
+ L->top = top+1;
+ lj_vm_call(L, top, 1+1); /* Stack: |errfunc|msg| -> |msg| */
+ }
+ lj_err_throw(L, LUA_ERRRUN);
+}
+
+/* Add location to error message. */
+LJ_NOINLINE static void err_loc(lua_State *L, const char *msg,
+ cTValue *frame, cTValue *nextframe)
+{
+ if (frame) {
+ GCfunc *fn = frame_func(frame);
+ if (isluafunc(fn)) {
+ char buff[LUA_IDSIZE];
+ BCLine line = currentline(L, fn, nextframe);
+ err_chunkid(buff, strdata(funcproto(fn)->chunkname));
+ lj_str_pushf(L, "%s:%d: %s", buff, line, msg);
+ return;
+ }
+ }
+ lj_str_pushf(L, "%s", msg);
+}
+
+/* Formatted runtime error message. */
+LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...)
+{
+ const char *msg;
+ va_list argp;
+ va_start(argp, em);
+ if (curr_funcisL(L)) L->top = curr_topL(L);
+ msg = lj_str_pushvf(L, err2msg(em), argp);
+ va_end(argp);
+ err_loc(L, msg, L->base-1, NULL);
+ lj_err_run(L);
+}
+
+/* Non-vararg variant for better calling conventions. */
+LJ_NOINLINE void lj_err_msg(lua_State *L, ErrMsg em)
+{
+ err_msgv(L, em);
+}
+
+/* Lexer error. */
+LJ_NOINLINE void lj_err_lex(lua_State *L, const char *src, const char *tok,
+ BCLine line, ErrMsg em, va_list argp)
+{
+ char buff[LUA_IDSIZE];
+ const char *msg;
+ err_chunkid(buff, src);
+ msg = lj_str_pushvf(L, err2msg(em), argp);
+ msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg);
+ if (tok)
+ lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok);
+ lj_err_throw(L, LUA_ERRSYNTAX);
+}
+
+/* Typecheck error for operands. */
+LJ_NOINLINE void lj_err_optype(lua_State *L, cTValue *o, ErrMsg opm)
+{
+ const char *tname = typename(o);
+ const char *oname = NULL;
+ const char *opname = err2msg(opm);
+ if (curr_funcisL(L)) {
+ GCproto *pt = curr_proto(L);
+ const BCIns *pc = cframe_Lpc(L) - 1;
+ const char *kind = getobjname(pt, pc, (BCReg)(o - L->base), &oname);
+ if (kind)
+ err_msgv(L, LJ_ERR_BADOPRT, opname, kind, oname, tname);
+ }
+ err_msgv(L, LJ_ERR_BADOPRV, opname, tname);
+}
+
+/* Typecheck error for ordered comparisons. */
+LJ_NOINLINE void lj_err_comp(lua_State *L, cTValue *o1, cTValue *o2)
+{
+ const char *t1 = typename(o1);
+ const char *t2 = typename(o2);
+ err_msgv(L, t1 == t2 ? LJ_ERR_BADCMPV : LJ_ERR_BADCMPT, t1, t2);
+ /* This assumes the two "boolean" entries are commoned by the C compiler. */
+}
+
+/* Typecheck error for __call. */
+LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o)
+{
+ /* Gross hack if lua_[p]call or pcall/xpcall fail for a non-callable object:
+ ** L->base still points to the caller. So add a dummy frame with L instead
+ ** of a function. See lua_getstack().
+ */
+ const BCIns *pc = cframe_Lpc(L);
+ if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) {
+ const char *tname = typename(o);
+ setframe_pc(o, pc);
+ setframe_gc(o, obj2gco(L));
+ L->top = L->base = o+1;
+ err_msgv(L, LJ_ERR_BADCALL, tname);
+ }
+ lj_err_optype(L, o, LJ_ERR_OPCALL);
+}
+
+/* Error in context of caller. */
+LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg)
+{
+ cTValue *frame = L->base-1;
+ cTValue *pframe = frame_islua(frame) ? frame_prevl(frame) : NULL;
+ err_loc(L, msg, pframe, frame);
+ lj_err_run(L);
+}
+
+/* Formatted error in context of caller. */
+LJ_NOINLINE void lj_err_callerv(lua_State *L, ErrMsg em, ...)
+{
+ const char *msg;
+ va_list argp;
+ va_start(argp, em);
+ msg = lj_str_pushvf(L, err2msg(em), argp);
+ va_end(argp);
+ lj_err_callermsg(L, msg);
+}
+
+/* Error in context of caller. */
+LJ_NOINLINE void lj_err_caller(lua_State *L, ErrMsg em)
+{
+ lj_err_callermsg(L, err2msg(em));
+}
+
+/* Argument error message. */
+LJ_NORET LJ_NOINLINE static void err_argmsg(lua_State *L, int narg,
+ const char *msg)
+{
+ const char *fname = "?";
+ const char *ftype = getfuncname(L, L->base - 1, &fname);
+ if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */
+ msg = lj_str_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg);
+ else
+ msg = lj_str_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg);
+ lj_err_callermsg(L, msg);
+}
+
+/* Formatted argument error. */
+LJ_NOINLINE void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...)
+{
+ const char *msg;
+ va_list argp;
+ va_start(argp, em);
+ msg = lj_str_pushvf(L, err2msg(em), argp);
+ va_end(argp);
+ err_argmsg(L, narg, msg);
+}
+
+/* Argument error. */
+LJ_NOINLINE void lj_err_arg(lua_State *L, int narg, ErrMsg em)
+{
+ err_argmsg(L, narg, err2msg(em));
+}
+
+/* Typecheck error for arguments. */
+LJ_NOINLINE void lj_err_argtype(lua_State *L, int narg, const char *xname)
+{
+ TValue *o = L->base + narg-1;
+ const char *tname = o < L->top ? typename(o) : lj_obj_typename[0];
+ const char *msg = lj_str_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname);
+ err_argmsg(L, narg, msg);
+}
+
+/* Typecheck error for arguments. */
+LJ_NOINLINE void lj_err_argt(lua_State *L, int narg, int tt)
+{
+ lj_err_argtype(L, narg, lj_obj_typename[tt+1]);
+}
+
+/* -- Public error handling API ------------------------------------------- */
+
+LUA_API lua_CFunction lua_atpanic(lua_State *L, lua_CFunction panicf)
+{
+ lua_CFunction old = G(L)->panic;
+ G(L)->panic = panicf;
+ return old;
+}
+
+/* Forwarders for the public API (C calling convention and no LJ_NORET). */
+LUA_API int lua_error(lua_State *L)
+{
+ lj_err_run(L);
+ return 0; /* unreachable */
+}
+
+LUALIB_API int luaL_argerror(lua_State *L, int narg, const char *msg)
+{
+ err_argmsg(L, narg, msg);
+ return 0; /* unreachable */
+}
+
+LUALIB_API int luaL_typerror(lua_State *L, int narg, const char *xname)
+{
+ lj_err_argtype(L, narg, xname);
+ return 0; /* unreachable */
+}
+
+LUALIB_API void luaL_where(lua_State *L, int level)
+{
+ int size;
+ cTValue *frame = lj_err_getframe(L, level, &size);
+ err_loc(L, "", frame, size ? frame+size : NULL);
+}
+
+LUALIB_API int luaL_error(lua_State *L, const char *fmt, ...)
+{
+ const char *msg;
+ va_list argp;
+ va_start(argp, fmt);
+ msg = lj_str_pushvf(L, fmt, argp);
+ va_end(argp);
+ lj_err_callermsg(L, msg);
+ return 0; /* unreachable */
+}
+
diff --git a/src/lj_err.h b/src/lj_err.h
new file mode 100644
index 00000000..e794d44c
--- /dev/null
+++ b/src/lj_err.h
@@ -0,0 +1,40 @@
+/*
+** Error handling and debugging support.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_ERR_H
+#define _LJ_ERR_H
+
+#include <stdarg.h>
+
+#include "lj_obj.h"
+
+typedef enum {
+#define ERRDEF(name, msg) \
+ LJ_ERR_##name, LJ_ERR_##name##_ = LJ_ERR_##name + sizeof(msg)-1,
+#include "lj_errmsg.h"
+ LJ_ERR__MAX
+} ErrMsg;
+
+LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em);
+LJ_FUNC_NORET void lj_err_throw(lua_State *L, int errcode);
+LJ_FUNC_NORET void lj_err_run(lua_State *L);
+LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em);
+LJ_FUNC_NORET void lj_err_lex(lua_State *L, const char *src, const char *tok,
+ BCLine line, ErrMsg em, va_list argp);
+LJ_FUNC_NORET void lj_err_optype(lua_State *L, cTValue *o, ErrMsg opm);
+LJ_FUNC_NORET void lj_err_comp(lua_State *L, cTValue *o1, cTValue *o2);
+LJ_FUNC_NORET void lj_err_optype_call(lua_State *L, TValue *o);
+LJ_FUNC_NORET void lj_err_callermsg(lua_State *L, const char *msg);
+LJ_FUNC_NORET void lj_err_callerv(lua_State *L, ErrMsg em, ...);
+LJ_FUNC_NORET void lj_err_caller(lua_State *L, ErrMsg em);
+LJ_FUNC_NORET void lj_err_arg(lua_State *L, int narg, ErrMsg em);
+LJ_FUNC_NORET void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...);
+LJ_FUNC_NORET void lj_err_argtype(lua_State *L, int narg, const char *xname);
+LJ_FUNC_NORET void lj_err_argt(lua_State *L, int narg, int tt);
+
+LJ_FUNC void lj_err_pushloc(lua_State *L, GCproto *pt, BCPos pc);
+LJ_FUNC cTValue *lj_err_getframe(lua_State *L, int level, int *size);
+
+#endif
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h
new file mode 100644
index 00000000..03abd59b
--- /dev/null
+++ b/src/lj_errmsg.h
@@ -0,0 +1,134 @@
+/*
+** VM error messages.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+/* This file may be included multiple times with different ERRDEF macros. */
+
+/* Basic error handling. */
+ERRDEF(ERRMEM, "not enough memory")
+ERRDEF(ERRERR, "error in error handling")
+
+/* Allocations. */
+ERRDEF(STROV, "string length overflow")
+ERRDEF(UDATAOV, "userdata length overflow")
+ERRDEF(STKOV, "stack overflow")
+ERRDEF(STKOVM, "stack overflow (%s)")
+ERRDEF(TABOV, "table overflow")
+
+/* Table indexing. */
+ERRDEF(NANIDX, "table index is NaN")
+ERRDEF(NILIDX, "table index is nil")
+ERRDEF(NEXTIDX, "invalid key to " LUA_QL("next"))
+
+/* Metamethod resolving. */
+ERRDEF(BADCALL, "attempt to call a %s value")
+ERRDEF(BADOPRT, "attempt to %s %s " LUA_QS " (a %s value)")
+ERRDEF(BADOPRV, "attempt to %s a %s value")
+ERRDEF(BADCMPT, "attempt to compare %s with %s")
+ERRDEF(BADCMPV, "attempt to compare two %s values")
+ERRDEF(GETLOOP, "loop in gettable")
+ERRDEF(SETLOOP, "loop in settable")
+ERRDEF(OPCALL, "call")
+ERRDEF(OPINDEX, "index")
+ERRDEF(OPARITH, "perform arithmetic on")
+ERRDEF(OPCAT, "concatenate")
+ERRDEF(OPLEN, "get length of")
+
+/* Type checks. */
+ERRDEF(BADSELF, "calling " LUA_QS " on bad self (%s)")
+ERRDEF(BADARG, "bad argument #%d to " LUA_QS " (%s)")
+ERRDEF(BADTYPE, "%s expected, got %s")
+ERRDEF(BADVAL, "invalid value")
+ERRDEF(NOVAL, "value expected")
+ERRDEF(NOCORO, "coroutine expected")
+ERRDEF(NOTABN, "nil or table expected")
+ERRDEF(NOLFUNC, "Lua function expected")
+ERRDEF(NOFUNCL, "function or level expected")
+ERRDEF(NOSFT, "string/function/table expected")
+ERRDEF(NOPROXY, "boolean or proxy expected")
+ERRDEF(FORINIT, LUA_QL("for") " initial value must be a number")
+ERRDEF(FORLIM, LUA_QL("for") " limit must be a number")
+ERRDEF(FORSTEP, LUA_QL("for") " step must be a number")
+
+/* C API checks. */
+ERRDEF(NOENV, "no calling environment")
+ERRDEF(CYIELD, "attempt to yield across C-call boundary")
+ERRDEF(BADLU, "bad light userdata pointer")
+ERRDEF(NOGCMM, "bad action while in __gc metamethod")
+
+/* Standard library function errors. */
+ERRDEF(ASSERT, "assertion failed!")
+ERRDEF(PROTMT, "cannot change a protected metatable")
+ERRDEF(UNPACK, "too many results to unpack")
+ERRDEF(RDRSTR, "reader function must return a string")
+ERRDEF(PRTOSTR, LUA_QL("tostring") " must return a string to " LUA_QL("print"))
+ERRDEF(IDXRNG, "index out of range")
+ERRDEF(BASERNG, "base out of range")
+ERRDEF(LVLRNG, "level out of range")
+ERRDEF(INVLVL, "invalid level")
+ERRDEF(INVOPT, "invalid option")
+ERRDEF(INVOPTM, "invalid option " LUA_QS)
+ERRDEF(INVFMT, "invalid format")
+ERRDEF(SETFENV, LUA_QL("setfenv") " cannot change environment of given object")
+ERRDEF(CORUN, "cannot resume running coroutine")
+ERRDEF(CODEAD, "cannot resume dead coroutine")
+ERRDEF(COSUSP, "cannot resume non-suspended coroutine")
+ERRDEF(TABINS, "wrong number of arguments to " LUA_QL("insert"))
+ERRDEF(TABCAT, "invalid value (%s) at index %d in table for " LUA_QL("concat"))
+ERRDEF(TABSORT, "invalid order function for sorting")
+ERRDEF(IOCLFL, "attempt to use a closed file")
+ERRDEF(IOSTDCL, "standard file is closed")
+ERRDEF(OSUNIQF, "unable to generate a unique filename")
+ERRDEF(OSDATEF, "field " LUA_QS " missing in date table")
+ERRDEF(STRDUMP, "cannot dump functions")
+ERRDEF(STRSLC, "string slice too long")
+ERRDEF(STRPATB, "missing " LUA_QL("[") " after " LUA_QL("%f") " in pattern")
+ERRDEF(STRPATC, "invalid pattern capture")
+ERRDEF(STRPATE, "malformed pattern (ends with " LUA_QL("%") ")")
+ERRDEF(STRPATM, "malformed pattern (missing " LUA_QL("]") ")")
+ERRDEF(STRPATU, "unbalanced pattern")
+ERRDEF(STRCAPI, "invalid capture index")
+ERRDEF(STRCAPN, "too many captures")
+ERRDEF(STRCAPU, "unfinished capture")
+ERRDEF(STRFMTO, "invalid option " LUA_QL("%%%c") " to " LUA_QL("format"))
+ERRDEF(STRFMTR, "invalid format (repeated flags)")
+ERRDEF(STRFMTW, "invalid format (width or precision too long)")
+ERRDEF(STRGSRV, "invalid replacement value (a %s)")
+ERRDEF(BADMODN, "name conflict for module " LUA_QS)
+ERRDEF(NOJIT, "JIT compiler permanently disabled")
+ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS)
+
+/* Lexer/parser errors. */
+ERRDEF(XNEAR, "%s near " LUA_QS)
+ERRDEF(XELEM, "lexical element too long")
+ERRDEF(XLINES, "chunk has too many lines")
+ERRDEF(XLEVELS, "chunk has too many syntax levels")
+ERRDEF(XNUMBER, "malformed number")
+ERRDEF(XLSTR, "unfinished long string")
+ERRDEF(XLCOM, "unfinished long comment")
+ERRDEF(XSTR, "unfinished string")
+ERRDEF(XESC, "escape sequence too large")
+ERRDEF(XLDELIM, "invalid long string delimiter")
+ERRDEF(XBCLOAD, "cannot load Lua bytecode")
+ERRDEF(XTOKEN, LUA_QS " expected")
+ERRDEF(XJUMP, "control structure too long")
+ERRDEF(XSLOTS, "function or expression too complex")
+ERRDEF(XLIMM, "main function has more than %d %s")
+ERRDEF(XLIMF, "function at line %d has more than %d %s")
+ERRDEF(XMATCH, LUA_QS " expected (to close " LUA_QS " at line %d)")
+ERRDEF(XFIXUP, "function too long for return fixup")
+ERRDEF(XPARAM, "<name> or " LUA_QL("...") " expected")
+ERRDEF(XAMBIG, "ambiguous syntax (function call x new statement)")
+ERRDEF(XFUNARG, "function arguments expected")
+ERRDEF(XSYMBOL, "unexpected symbol")
+ERRDEF(XDOTS, "cannot use " LUA_QL("...") " outside a vararg function")
+ERRDEF(XSYNTAX, "syntax error")
+ERRDEF(XBREAK, "no loop to break")
+ERRDEF(XFOR, LUA_QL("=") " or " LUA_QL("in") " expected")
+
+#undef ERRDEF
+
+/* Detecting unused error messages:
+ awk -F, '/^ERRDEF/ { gsub(/ERRDEF./, ""); printf "grep -q LJ_ERR_%s *.[ch] || echo %s\n", $1, $1}' lj_errmsg.h | sh
+*/
diff --git a/src/lj_ff.h b/src/lj_ff.h
new file mode 100644
index 00000000..6dfd73a7
--- /dev/null
+++ b/src/lj_ff.h
@@ -0,0 +1,18 @@
+/*
+** Fast function IDs.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_FF_H
+#define _LJ_FF_H
+
+/* Fast function ID. */
+typedef enum {
+ FF_LUA_ = FF_LUA, /* Lua function (must be 0). */
+ FF_C_ = FF_C, /* Regular C function (must be 1). */
+#define FFDEF(name) FF_##name,
+#include "lj_ffdef.h"
+ FF__MAX
+} FastFunc;
+
+#endif
diff --git a/src/lj_frame.h b/src/lj_frame.h
new file mode 100644
index 00000000..1c03e3e1
--- /dev/null
+++ b/src/lj_frame.h
@@ -0,0 +1,84 @@
+/*
+** Stack frames.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_FRAME_H
+#define _LJ_FRAME_H
+
+#include "lj_obj.h"
+#include "lj_bc.h"
+
+/* -- Lua stack frame ----------------------------------------------------- */
+
+/* Frame type markers in callee function slot (callee base-1). */
+enum {
+ FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG,
+ FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH
+};
+#define FRAME_TYPE 3
+#define FRAME_P 4
+#define FRAME_TYPEP (FRAME_TYPE|FRAME_P)
+
+/* Macros to access and modify Lua frames. */
+#define frame_gc(f) (gcref((f)->fr.func))
+#define frame_func(f) (&frame_gc(f)->fn)
+#define frame_ftsz(f) ((f)->fr.tp.ftsz)
+
+#define frame_type(f) (frame_ftsz(f) & FRAME_TYPE)
+#define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP)
+#define frame_islua(f) (frame_type(f) == FRAME_LUA)
+#define frame_isc(f) (frame_type(f) == FRAME_C)
+#define frame_iscont(f) (frame_typep(f) == FRAME_CONT)
+#define frame_isvarg(f) (frame_typep(f) == FRAME_VARG)
+#define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL)
+
+#define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns))
+#define frame_contpc(f) (frame_pc((f)-1))
+#if LJ_64
+#define frame_contf(f) \
+ ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin+(((f)-1)->u64 & 0xffffffff)))
+#else
+#define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void))
+#endif
+#define frame_delta(f) (frame_ftsz(f) >> 3)
+#define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP)
+
+#define frame_prevl(f) ((f) - (1+bc_a(frame_pc(f)[-1])))
+#define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f)))
+#define frame_prev(f) (frame_islua(f)?frame_prevl(f):frame_prevd(f))
+/* Note: this macro does not skip over FRAME_VARG. */
+
+#define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc)))
+#define setframe_gc(f, p) (setgcref((f)->fr.func, (p)))
+
+/* -- C stack frame ------------------------------------------------------- */
+
+/* Macros to access and modify the C stack frame chain. */
+
+/* These definitions must match with the arch-specific *.dasc files. */
+#if LJ_TARGET_X86
+#define CFRAME_OFS_ERRF (15*sizeof(void *))
+#define CFRAME_OFS_NRES (14*sizeof(void *))
+#define CFRAME_OFS_PREV (13*sizeof(void *))
+#define CFRAME_OFS_L (12*sizeof(void *))
+#define CFRAME_OFS_PC (6*sizeof(void *))
+#define CFRAME_SIZE (12*sizeof(void *))
+#else
+#error "Missing CFRAME_* definitions for this architecture"
+#endif
+
+#define CFRAME_RESUME 1
+#define CFRAME_CANYIELD ((intptr_t)(CFRAME_RESUME))
+#define CFRAME_RAWMASK (~CFRAME_CANYIELD)
+
+#define cframe_errfunc(cf) (*(ptrdiff_t *)(((char *)cf)+CFRAME_OFS_ERRF))
+#define cframe_nres(cf) (*(ptrdiff_t *)(((char *)cf)+CFRAME_OFS_NRES))
+#define cframe_prev(cf) (*(void **)(((char *)cf)+CFRAME_OFS_PREV))
+#define cframe_L(cf) (*(lua_State **)(((char *)cf)+CFRAME_OFS_L))
+#define cframe_pc(cf) (*(const BCIns **)(((char *)cf)+CFRAME_OFS_PC))
+#define cframe_canyield(cf) ((intptr_t)(cf) & CFRAME_CANYIELD)
+#define cframe_raw(cf) ((void *)((intptr_t)(cf) & CFRAME_RAWMASK))
+#define cframe_Lpc(L) cframe_pc(cframe_raw(L->cframe))
+
+#endif
diff --git a/src/lj_func.c b/src/lj_func.c
new file mode 100644
index 00000000..92cdeda2
--- /dev/null
+++ b/src/lj_func.c
@@ -0,0 +1,185 @@
+/*
+** Function handling (prototypes, functions and upvalues).
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#define lj_func_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_func.h"
+#include "lj_trace.h"
+#include "lj_vm.h"
+
+/* -- Prototypes ---------------------------------------------------------- */
+
+GCproto *lj_func_newproto(lua_State *L)
+{
+ GCproto *pt = lj_mem_newobj(L, GCproto);
+ pt->gct = ~LJ_TPROTO;
+ pt->numparams = 0;
+ pt->framesize = 0;
+ pt->sizeuv = 0;
+ pt->flags = 0;
+ pt->trace = 0;
+ pt->k.n = NULL;
+ pt->bc = NULL;
+ pt->uv = NULL;
+ pt->sizebc = 0;
+ pt->sizekgc = 0;
+ pt->sizekn = 0;
+ pt->sizelineinfo = 0;
+ pt->sizevarinfo = 0;
+ pt->sizeuvname = 0;
+ pt->linedefined = 0;
+ pt->lastlinedefined = 0;
+ pt->lineinfo = NULL;
+ pt->varinfo = NULL;
+ pt->uvname = NULL;
+ pt->chunkname = NULL;
+ return pt;
+}
+
+void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt)
+{
+ MSize nkgc = round_nkgc(pt->sizekgc);
+ MSize sizek = nkgc*(MSize)sizeof(GCobj *) +
+ pt->sizekn*(MSize)sizeof(lua_Number);
+ lj_mem_free(g, pt->k.gc - nkgc, sizek);
+ lj_mem_freevec(g, pt->bc, pt->sizebc, BCIns);
+ lj_mem_freevec(g, pt->uv, pt->sizeuv, int16_t);
+ lj_mem_freevec(g, pt->lineinfo, pt->sizelineinfo, int32_t);
+ lj_mem_freevec(g, pt->varinfo, pt->sizevarinfo, struct VarInfo);
+ lj_mem_freevec(g, pt->uvname, pt->sizeuvname, GCstr *);
+ lj_trace_freeproto(g, pt);
+ lj_mem_freet(g, pt);
+}
+
+/* -- Upvalues ------------------------------------------------------------ */
+
+static void unlinkuv(GCupval *uv)
+{
+ lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv);
+ setgcrefr(uvnext(uv)->prev, uv->prev);
+ setgcrefr(uvprev(uv)->next, uv->next);
+}
+
+/* Find existing open upvalue for a stack slot or create a new one. */
+static GCupval *func_finduv(lua_State *L, TValue *slot)
+{
+ global_State *g = G(L);
+ GCRef *pp = &L->openupval;
+ GCupval *p;
+ GCupval *uv;
+ /* Search the sorted list of open upvalues. */
+ while (gcref(*pp) != NULL && (p = gco2uv(gcref(*pp)))->v >= slot) {
+ lua_assert(!p->closed && p->v != &p->tv);
+ if (p->v == slot) { /* Found open upvalue pointing to same slot? */
+ if (isdead(g, obj2gco(p))) /* Resurrect it, if it's dead. */
+ flipwhite(obj2gco(p));
+ return p;
+ }
+ pp = &p->nextgc;
+ }
+ /* No matching upvalue found. Create a new one. */
+ uv = lj_mem_newt(L, sizeof(GCupval), GCupval);
+ newwhite(g, uv);
+ uv->gct = ~LJ_TUPVAL;
+ uv->closed = 0; /* Still open. */
+ uv->v = slot; /* Pointing to the stack slot. */
+ /* NOBARRIER: The GCupval is new (marked white) and open. */
+ setgcrefr(uv->nextgc, *pp); /* Insert into sorted list of open upvalues. */
+ setgcref(*pp, obj2gco(uv));
+ setgcref(uv->prev, obj2gco(&g->uvhead)); /* Insert into GC list, too. */
+ setgcrefr(uv->next, g->uvhead.next);
+ setgcref(uvnext(uv)->prev, obj2gco(uv));
+ setgcref(g->uvhead.next, obj2gco(uv));
+ lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv);
+ return uv;
+}
+
+/* Close all open upvalues pointing to some stack level or above. */
+void lj_func_closeuv(lua_State *L, TValue *level)
+{
+ GCupval *uv;
+ global_State *g = G(L);
+ while (gcref(L->openupval) != NULL &&
+ (uv = gco2uv(gcref(L->openupval)))->v >= level) {
+ GCobj *o = obj2gco(uv);
+ lua_assert(!isblack(o) && !uv->closed && uv->v != &uv->tv);
+ setgcrefr(L->openupval, uv->nextgc); /* No longer in open list. */
+ if (isdead(g, o)) {
+ lj_func_freeuv(g, uv);
+ } else {
+ unlinkuv(uv);
+ lj_gc_closeuv(g, uv);
+ }
+ }
+}
+
+void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv)
+{
+ if (!uv->closed)
+ unlinkuv(uv);
+ lj_mem_freet(g, uv);
+}
+
+/* -- Functions (closures) ------------------------------------------------ */
+
+GCfunc *lj_func_newC(lua_State *L, MSize nelems, GCtab *env)
+{
+ GCfunc *fn = cast(GCfunc *, lj_mem_newgco(L, sizeCfunc(nelems)));
+ fn->c.gct = ~LJ_TFUNC;
+ fn->c.ffid = FF_C;
+ fn->c.nupvalues = cast_byte(nelems);
+ /* NOBARRIER: The GCfunc is new (marked white). */
+ setgcref(fn->c.env, obj2gco(env));
+ fn->c.gate = lj_gate_c;
+ return fn;
+}
+
+GCfunc *lj_func_newL(lua_State *L, GCproto *pt, GCtab *env)
+{
+ GCfunc *fn = cast(GCfunc *, lj_mem_newgco(L, sizeLfunc((MSize)pt->sizeuv)));
+ fn->l.gct = ~LJ_TFUNC;
+ fn->l.ffid = FF_LUA;
+ fn->l.nupvalues = cast_byte(pt->sizeuv);
+ /* NOBARRIER: The GCfunc is new (marked white). */
+ setgcref(fn->l.pt, obj2gco(pt));
+ setgcref(fn->l.env, obj2gco(env));
+ fn->l.gate = (pt->flags & PROTO_IS_VARARG) ? lj_gate_lv : lj_gate_lf;
+ return fn;
+}
+
+/* Do a GC check and create a new Lua function with inherited upvalues. */
+GCfunc *lj_func_newL_gc(lua_State *L, GCproto *pt, GCfuncL *parent)
+{
+ GCfunc *fn;
+ GCRef *puv;
+ uint32_t i, nuv;
+ TValue *base;
+ lj_gc_check_fixtop(L);
+ fn = lj_func_newL(L, pt, tabref(parent->env));
+ /* NOBARRIER: The GCfunc is new (marked white). */
+ puv = parent->uvptr;
+ nuv = fn->l.nupvalues;
+ base = L->base;
+ for (i = 0; i < nuv; i++) {
+ int v = pt->uv[i];
+ GCupval *uv = v < 0 ? &gcref(puv[~v])->uv : func_finduv(L, base + v);
+ setgcref(fn->l.uvptr[i], obj2gco(uv));
+ }
+ return fn;
+}
+
+void LJ_FASTCALL lj_func_free(global_State *g, GCfunc *fn)
+{
+ MSize size = isluafunc(fn) ? sizeLfunc((MSize)fn->l.nupvalues) :
+ sizeCfunc((MSize)fn->c.nupvalues);
+ lj_mem_free(g, fn, size);
+}
+
diff --git a/src/lj_func.h b/src/lj_func.h
new file mode 100644
index 00000000..ee7942ea
--- /dev/null
+++ b/src/lj_func.h
@@ -0,0 +1,25 @@
+/*
+** Function handling (prototypes, functions and upvalues).
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_FUNC_H
+#define _LJ_FUNC_H
+
+#include "lj_obj.h"
+
+/* Prototypes. */
+LJ_FUNC GCproto *lj_func_newproto(lua_State *L);
+LJ_FUNC void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt);
+
+/* Upvalues. */
+LJ_FUNCA void lj_func_closeuv(lua_State *L, TValue *level);
+LJ_FUNC void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv);
+
+/* Functions (closures). */
+LJ_FUNC GCfunc *lj_func_newC(lua_State *L, MSize nelems, GCtab *env);
+LJ_FUNC GCfunc *lj_func_newL(lua_State *L, GCproto *pt, GCtab *env);
+LJ_FUNCA GCfunc *lj_func_newL_gc(lua_State *L, GCproto *pt, GCfuncL *parent);
+LJ_FUNC void LJ_FASTCALL lj_func_free(global_State *g, GCfunc *c);
+
+#endif
diff --git a/src/lj_gc.c b/src/lj_gc.c
new file mode 100644
index 00000000..e479b567
--- /dev/null
+++ b/src/lj_gc.c
@@ -0,0 +1,800 @@
+/*
+** Garbage collector.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#define lj_gc_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_func.h"
+#include "lj_udata.h"
+#include "lj_meta.h"
+#include "lj_state.h"
+#include "lj_frame.h"
+#include "lj_trace.h"
+#include "lj_vm.h"
+
+#define GCSTEPSIZE 1024u
+#define GCSWEEPMAX 40
+#define GCSWEEPCOST 10
+#define GCFINALIZECOST 100
+
+/* Macros to set GCobj colors and flags. */
+#define white2gray(x) ((x)->gch.marked &= cast_byte(~LJ_GC_WHITES))
+#define black2gray(x) ((x)->gch.marked &= cast_byte(~LJ_GC_BLACK))
+#define gray2black(x) ((x)->gch.marked |= LJ_GC_BLACK)
+#define makewhite(g, x) \
+ ((x)->gch.marked = ((x)->gch.marked & cast_byte(~LJ_GC_COLORS)) | curwhite(g))
+#define isfinalized(u) ((u)->marked & LJ_GC_FINALIZED)
+#define markfinalized(u) ((u)->marked |= LJ_GC_FINALIZED)
+
+/* -- Mark phase ---------------------------------------------------------- */
+
+/* Mark a TValue (if needed). */
+#define gc_marktv(g, tv) \
+ { lua_assert(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct)); \
+ if (tviswhite(tv)) gc_mark(g, gcV(tv)); }
+
+/* Mark a GCobj (if needed). */
+#define gc_markobj(g, o) \
+ { if (iswhite(obj2gco(o))) gc_mark(g, obj2gco(o)); }
+
+/* Mark a string object. */
+#define gc_mark_str(s) ((s)->marked &= cast_byte(~LJ_GC_WHITES))
+
+/* Mark a white GCobj. */
+static void gc_mark(global_State *g, GCobj *o)
+{
+ lua_assert(iswhite(o) && !isdead(g, o));
+ white2gray(o);
+ if (LJ_UNLIKELY(o->gch.gct == ~LJ_TUDATA)) {
+ GCtab *mt = tabref(gco2ud(o)->metatable);
+ gray2black(o); /* Userdata are never gray. */
+ if (mt) gc_markobj(g, mt);
+ gc_markobj(g, tabref(gco2ud(o)->env));
+ } else if (LJ_UNLIKELY(o->gch.gct == ~LJ_TUPVAL)) {
+ GCupval *uv = gco2uv(o);
+ gc_marktv(g, uv->v);
+ if (uv->closed)
+ gray2black(o); /* Closed upvalues are never gray. */
+ } else if (o->gch.gct != ~LJ_TSTR) {
+ lua_assert(o->gch.gct == ~LJ_TFUNC || o->gch.gct == ~LJ_TTAB ||
+ o->gch.gct == ~LJ_TTHREAD || o->gch.gct == ~LJ_TPROTO);
+ setgcrefr(o->gch.gclist, g->gc.gray);
+ setgcref(g->gc.gray, o);
+ }
+}
+
+/* Mark the base metatables. */
+static void gc_mark_basemt(global_State *g)
+{
+ int i;
+ for (i = 0; i < BASEMT_MAX; i++)
+ if (tabref(g->basemt[i]) != NULL)
+ gc_markobj(g, tabref(g->basemt[i]));
+}
+
+/* Start a GC cycle and mark the root set. */
+static void gc_mark_start(global_State *g)
+{
+ setgcrefnull(g->gc.gray);
+ setgcrefnull(g->gc.grayagain);
+ setgcrefnull(g->gc.weak);
+ gc_markobj(g, mainthread(g));
+ gc_markobj(g, tabref(mainthread(g)->env));
+ gc_marktv(g, &g->registrytv);
+ gc_mark_basemt(g);
+ g->gc.state = GCSpropagate;
+}
+
+/* Mark open upvalues. */
+static void gc_mark_uv(global_State *g)
+{
+ GCupval *uv;
+ for (uv = uvnext(&g->uvhead); uv != &g->uvhead; uv = uvnext(uv)) {
+ lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv);
+ if (isgray(obj2gco(uv)))
+ gc_marktv(g, uv->v);
+ }
+}
+
+/* Mark userdata in mmudata list. */
+static void gc_mark_mmudata(global_State *g)
+{
+ GCobj *root = gcref(g->gc.mmudata);
+ GCobj *u = root;
+ if (u) {
+ do {
+ u = gcnext(u);
+ makewhite(g, u); /* Could be from previous GC. */
+ gc_mark(g, u);
+ } while (u != root);
+ }
+}
+
+/* Separate userdata which which needs finalization to mmudata list. */
+size_t lj_gc_separateudata(global_State *g, int all)
+{
+ size_t m = 0;
+ GCRef *p = &mainthread(g)->nextgc;
+ GCobj *o;
+ while ((o = gcref(*p)) != NULL) {
+ if (!(iswhite(o) || all) || isfinalized(gco2ud(o))) {
+ p = &o->gch.nextgc; /* Nothing to do. */
+ } else if (!lj_meta_fastg(g, tabref(gco2ud(o)->metatable), MM_gc)) {
+ markfinalized(gco2ud(o)); /* Done, as there's no __gc metamethod. */
+ p = &o->gch.nextgc;
+ } else { /* Otherwise move userdata to be finalized to mmudata list. */
+ m += sizeudata(gco2ud(o));
+ markfinalized(gco2ud(o));
+ *p = o->gch.nextgc;
+ if (gcref(g->gc.mmudata)) { /* Link to end of mmudata list. */
+ GCobj *root = gcref(g->gc.mmudata);
+ setgcrefr(o->gch.nextgc, root->gch.nextgc);
+ setgcref(root->gch.nextgc, o);
+ setgcref(g->gc.mmudata, o);
+ } else { /* Create circular list. */
+ setgcref(o->gch.nextgc, o);
+ setgcref(g->gc.mmudata, o);
+ }
+ }
+ }
+ return m;
+}
+
+/* -- Propagation phase --------------------------------------------------- */
+
+/* Traverse a table. */
+static int gc_traverse_tab(global_State *g, GCtab *t)
+{
+ int weak = 0;
+ cTValue *mode;
+ GCtab *mt = tabref(t->metatable);
+ if (mt)
+ gc_markobj(g, mt);
+ mode = lj_meta_fastg(g, mt, MM_mode);
+ if (mode && tvisstr(mode)) { /* Valid __mode field? */
+ const char *modestr = strVdata(mode);
+ int c;
+ while ((c = *modestr++)) {
+ if (c == 'k') weak |= LJ_GC_WEAKKEY;
+ else if (c == 'v') weak |= LJ_GC_WEAKVAL;
+ }
+ if (weak) { /* Weak tables are cleared in the atomic phase. */
+ t->marked = cast_byte((t->marked & ~LJ_GC_WEAK) | weak);
+ setgcrefr(t->gclist, g->gc.weak);
+ setgcref(g->gc.weak, obj2gco(t));
+ }
+ }
+ if (weak == LJ_GC_WEAK) /* Nothing to mark if both keys/values are weak. */
+ return 1;
+ if (!(weak & LJ_GC_WEAKVAL)) { /* Mark array part. */
+ MSize i, asize = t->asize;
+ for (i = 0; i < asize; i++)
+ gc_marktv(g, arrayslot(t, i));
+ }
+ if (t->hmask > 0) { /* Mark hash part. */
+ Node *node = noderef(t->node);
+ MSize i, hmask = t->hmask;
+ for (i = 0; i <= hmask; i++) {
+ Node *n = &node[i];
+ lua_assert(itype(&n->key) != LJ_TDEADKEY || tvisnil(&n->val));
+ if (!tvisnil(&n->val)) { /* Mark non-empty slot. */
+ lua_assert(!tvisnil(&n->key));
+ if (!(weak & LJ_GC_WEAKKEY)) gc_marktv(g, &n->key);
+ if (!(weak & LJ_GC_WEAKVAL)) gc_marktv(g, &n->val);
+ } else if (tvisgcv(&n->key)) { /* Leave GC key in, but mark as dead. */
+ setitype(&n->key, LJ_TDEADKEY);
+ }
+ }
+ }
+ return weak;
+}
+
+/* Traverse a function. */
+static void gc_traverse_func(global_State *g, GCfunc *fn)
+{
+ gc_markobj(g, tabref(fn->c.env));
+ if (isluafunc(fn)) {
+ uint32_t i;
+ lua_assert(fn->l.nupvalues == funcproto(fn)->sizeuv);
+ gc_markobj(g, funcproto(fn));
+ for (i = 0; i < fn->l.nupvalues; i++) /* Mark Lua function upvalues. */
+ gc_markobj(g, &gcref(fn->l.uvptr[i])->uv);
+ } else {
+ uint32_t i;
+ for (i = 0; i < fn->c.nupvalues; i++) /* Mark C function upvalues. */
+ gc_marktv(g, &fn->c.upvalue[i]);
+ }
+}
+
+#if LJ_HASJIT
+/* Traverse a trace. */
+static void gc_traverse_trace(global_State *g, Trace *T)
+{
+ IRRef ref;
+ for (ref = T->nk; ref < REF_TRUE; ref++) {
+ IRIns *ir = &T->ir[ref];
+ if (ir->o == IR_KGC)
+ gc_markobj(g, ir_kgc(ir));
+ }
+}
+
+/* The current trace is a GC root while not anchored in the prototype (yet). */
+#define gc_mark_curtrace(g) \
+ { if (G2J(g)->state != LJ_TRACE_IDLE && G2J(g)->curtrace != 0) \
+ gc_traverse_trace(g, &G2J(g)->cur); }
+#else
+#define gc_mark_curtrace(g) UNUSED(g)
+#endif
+
+/* Traverse a prototype. */
+static void gc_traverse_proto(global_State *g, GCproto *pt)
+{
+ ptrdiff_t i;
+#if LJ_HASJIT
+ jit_State *J = G2J(g);
+ TraceNo root, side;
+ /* Mark all root traces and attached side traces. */
+ for (root = pt->trace; root != 0; root = J->trace[root]->nextroot) {
+ for (side = J->trace[root]->nextside; side != 0;
+ side = J->trace[side]->nextside)
+ gc_traverse_trace(g, J->trace[side]);
+ gc_traverse_trace(g, J->trace[root]);
+ }
+#endif
+ /* GC during prototype creation could cause NULL fields. */
+ if (pt->chunkname)
+ gc_mark_str(pt->chunkname);
+ for (i = -(ptrdiff_t)pt->sizekgc; i < 0; i++) /* Mark collectable consts. */
+ gc_markobj(g, gcref(pt->k.gc[i]));
+ for (i = 0; i < (ptrdiff_t)pt->sizeuvname; i++) /* Mark upvalue names. */
+ if (pt->uvname[i])
+ gc_mark_str(pt->uvname[i]);
+ for (i = 0; i < (ptrdiff_t)pt->sizevarinfo; i++) /* Mark names of locals. */
+ if (pt->varinfo[i].name)
+ gc_mark_str(pt->varinfo[i].name);
+}
+
+/* Traverse the frame structure of a stack. */
+static TValue *gc_traverse_frames(global_State *g, lua_State *th)
+{
+ TValue *frame, *top = th->top-1;
+ /* Note: extra vararg frame not skipped, marks function twice (harmless). */
+ for (frame = th->base-1; frame > th->stack; frame = frame_prev(frame)) {
+ GCfunc *fn = frame_func(frame);
+ TValue *ftop = frame;
+ if (isluafunc(fn)) ftop += funcproto(fn)->framesize;
+ if (ftop > top) top = ftop;
+ gc_markobj(g, frame_gc(frame)); /* Need to mark hidden function (or L). */
+ }
+ top++; /* Correct bias of -1 (frame == base-1). */
+ if (top > th->maxstack) top = th->maxstack;
+ return top;
+}
+
+/* Traverse a thread object. */
+static void gc_traverse_thread(global_State *g, lua_State *th)
+{
+ TValue *o, *lim;
+ gc_markobj(g, tabref(th->env));
+ for (o = th->stack+1; o < th->top; o++)
+ gc_marktv(g, o);
+ lim = gc_traverse_frames(g, th);
+ /* Extra cleanup required to avoid this marking problem:
+ **
+ ** [aa[bb.X| X created.
+ ** [aa[cc| GC called from (small) inner frame, X destroyed.
+ ** [aa....X.| GC called again in (larger) outer frame, X resurrected (ouch).
+ **
+ ** During GC in step 2 the stack must be cleaned up to the max. frame extent:
+ **
+ ** ***| Slots cleaned
+ ** [cc| from top of last frame
+ ** [aa......| to max. frame extent.
+ */
+ for (; o <= lim; o++)
+ setnilV(o);
+ lj_state_shrinkstack(th, (MSize)(lim - th->stack));
+}
+
+/* Propagate one gray object. Traverse it and turn it black. */
+static size_t propagatemark(global_State *g)
+{
+ GCobj *o = gcref(g->gc.gray);
+ lua_assert(isgray(o));
+ gray2black(o);
+ setgcrefr(g->gc.gray, o->gch.gclist); /* Remove from gray list. */
+ if (LJ_LIKELY(o->gch.gct == ~LJ_TTAB)) {
+ GCtab *t = gco2tab(o);
+ if (gc_traverse_tab(g, t))
+ black2gray(o); /* Keep weak tables gray. */
+ return sizeof(GCtab) + sizeof(TValue) * t->asize +
+ sizeof(Node) * (t->hmask + 1);
+ } else if (LJ_LIKELY(o->gch.gct == ~LJ_TFUNC)) {
+ GCfunc *fn = gco2func(o);
+ gc_traverse_func(g, fn);
+ return isluafunc(fn) ? sizeLfunc((MSize)fn->l.nupvalues) :
+ sizeCfunc((MSize)fn->c.nupvalues);
+ } else if (LJ_LIKELY(o->gch.gct == ~LJ_TPROTO)) {
+ GCproto *pt = gco2pt(o);
+ gc_traverse_proto(g, pt);
+ return sizeof(GCproto) + sizeof(BCIns) * pt->sizebc +
+ sizeof(GCobj *) * pt->sizekgc +
+ sizeof(lua_Number) * pt->sizekn +
+ sizeof(int16_t) * pt->sizeuv +
+ sizeof(int32_t) * pt->sizelineinfo +
+ sizeof(VarInfo) * pt->sizevarinfo +
+ sizeof(GCstr *) * pt->sizeuvname;
+ } else {
+ lua_State *th = gco2th(o);
+ setgcrefr(th->gclist, g->gc.grayagain);
+ setgcref(g->gc.grayagain, o);
+ black2gray(o); /* Threads are never black. */
+ gc_traverse_thread(g, th);
+ return sizeof(lua_State) + sizeof(TValue) * th->stacksize;
+ }
+}
+
+/* Propagate all gray objects. */
+static size_t gc_propagate_gray(global_State *g)
+{
+ size_t m = 0;
+ while (gcref(g->gc.gray) != NULL)
+ m += propagatemark(g);
+ return m;
+}
+
+/* -- Sweep phase --------------------------------------------------------- */
+
+/* Try to shrink some common data structures. */
+static void gc_shrink(global_State *g, lua_State *L)
+{
+ if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1)
+ lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */
+ if (g->tmpbuf.sz > LJ_MIN_SBUF*2)
+ lj_str_resizebuf(L, &g->tmpbuf, g->tmpbuf.sz >> 1); /* Shrink temp buf. */
+}
+
+/* Type of GC free functions. */
+typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o);
+
+/* GC free functions for LJ_TSTR .. LJ_TUDATA. ORDER LJ_T */
+static const GCFreeFunc gc_freefunc[] = {
+ (GCFreeFunc)lj_str_free,
+ (GCFreeFunc)lj_func_freeuv,
+ (GCFreeFunc)lj_state_free,
+ (GCFreeFunc)lj_func_freeproto,
+ (GCFreeFunc)lj_func_free,
+ (GCFreeFunc)0,
+ (GCFreeFunc)lj_tab_free,
+ (GCFreeFunc)lj_udata_free
+};
+
+/* Full sweep of a GC list. */
+#define gc_fullsweep(g, p) gc_sweep(g, (p), LJ_MAX_MEM)
+
+/* Partial sweep of a GC list. */
+static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
+{
+ /* Mask with other white and LJ_GC_FIXED. Or LJ_GC_SFIXED on shutdown. */
+ int ow = otherwhite(g);
+ GCobj *o;
+ while ((o = gcref(*p)) != NULL && lim-- > 0) {
+ if (o->gch.gct == ~LJ_TTHREAD) /* Need to sweep open upvalues, too. */
+ gc_fullsweep(g, &gco2th(o)->openupval);
+ if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */
+ lua_assert(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED));
+ makewhite(g, o); /* Value is alive, change to the current white. */
+ p = &o->gch.nextgc;
+ } else { /* Otherwise value is dead, free it. */
+ lua_assert(isdead(g, o) || ow == LJ_GC_SFIXED);
+ setgcrefr(*p, o->gch.nextgc);
+ if (o == gcref(g->gc.root))
+ setgcrefr(g->gc.root, o->gch.nextgc); /* Adjust list anchor. */
+ gc_freefunc[o->gch.gct - ~LJ_TSTR](g, o);
+ }
+ }
+ return p;
+}
+
+/* Check whether we can clear a key or a value slot from a table. */
+static int gc_mayclear(cTValue *o, int val)
+{
+ if (tvisgcv(o)) { /* Only collectable objects can be weak references. */
+ if (tvisstr(o)) { /* But strings cannot be used as weak references. */
+ gc_mark_str(strV(o)); /* And need to be marked. */
+ return 0;
+ }
+ if (iswhite(gcV(o)))
+ return 1; /* Object is about to be collected. */
+ if (tvisudata(o) && val && isfinalized(udataV(o)))
+ return 1; /* Finalized userdata is dropped only from values. */
+ }
+ return 0; /* Cannot clear. */
+}
+
+/* Clear collected entries from weak tables. */
+static void gc_clearweak(GCobj *o)
+{
+ while (o) {
+ GCtab *t = gco2tab(o);
+ lua_assert((t->marked & LJ_GC_WEAK));
+ if ((t->marked & LJ_GC_WEAKVAL)) {
+ MSize i, asize = t->asize;
+ for (i = 0; i < asize; i++) {
+ /* Clear array slot when value is about to be collected. */
+ TValue *tv = arrayslot(t, i);
+ if (gc_mayclear(tv, 1))
+ setnilV(tv);
+ }
+ }
+ if (t->hmask > 0) {
+ Node *node = noderef(t->node);
+ MSize i, hmask = t->hmask;
+ for (i = 0; i <= hmask; i++) {
+ Node *n = &node[i];
+ /* Clear hash slot when key or value is about to be collected. */
+ if (!tvisnil(&n->val) && (gc_mayclear(&n->key, 0) ||
+ gc_mayclear(&n->val, 1))) {
+ setnilV(&n->val);
+ if (tvisgcv(&n->key)) /* Leave GC key in, but mark as dead. */
+ setitype(&n->key, LJ_TDEADKEY);
+ }
+ }
+ }
+ o = gcref(t->gclist);
+ }
+}
+
+/* Finalize one userdata object from mmudata list. */
+static void gc_finalize(lua_State *L)
+{
+ global_State *g = G(L);
+ GCobj *o = gcnext(gcref(g->gc.mmudata));
+ GCudata *ud = gco2ud(o);
+ cTValue *mo;
+ /* Unchain from list of userdata to be finalized. */
+ if (o == gcref(g->gc.mmudata))
+ setgcrefnull(g->gc.mmudata);
+ else
+ setgcrefr(gcref(g->gc.mmudata)->gch.nextgc, ud->nextgc);
+ /* Add it back to the main userdata list and make it white. */
+ setgcrefr(ud->nextgc, mainthread(g)->nextgc);
+ setgcref(mainthread(g)->nextgc, o);
+ makewhite(g, o);
+ /* Resolve the __gc metamethod. */
+ mo = lj_meta_fastg(g, tabref(ud->metatable), MM_gc);
+ if (mo) {
+ /* Save and restore lots of state around the __gc callback. */
+ uint8_t oldh = hook_save(g);
+ MSize oldt = g->gc.threshold;
+ GCobj *oldjl = gcref(g->jit_L);
+ MSize oldjs = 0;
+ ptrdiff_t oldjb = 0;
+ int errcode;
+ TValue *top;
+ if (oldjl) {
+ oldjs = gco2th(oldjl)->stacksize;
+ oldjb = savestack(gco2th(oldjl), mref(g->jit_base, TValue ));
+ setgcrefnull(g->jit_L);
+ }
+ lj_trace_abort(g);
+ top = L->top;
+ L->top = top+2;
+ hook_entergc(g); /* Disable hooks and new traces during __gc. */
+ g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */
+ copyTV(L, top, mo);
+ setudataV(L, top+1, ud);
+ errcode = lj_vm_pcall(L, top+1, 1+0, -1); /* Stack: |mo|ud| -> | */
+ hook_restore(g, oldh);
+ g->gc.threshold = oldt; /* Restore GC threshold. */
+ if (oldjl) {
+ if (gco2th(oldjl)->stacksize < oldjs)
+ lj_state_growstack(gco2th(oldjl), oldjs - gco2th(oldjl)->stacksize);
+ setgcref(g->jit_L, oldjl);
+ setmref(g->jit_base, restorestack(gco2th(oldjl), oldjb));
+ }
+ if (errcode)
+ lj_err_throw(L, errcode); /* Propagate errors. */
+ }
+}
+
+/* Finalize all userdata objects from mmudata list. */
+void lj_gc_finalizeudata(lua_State *L)
+{
+ while (gcref(G(L)->gc.mmudata) != NULL)
+ gc_finalize(L);
+}
+
+/* Free all remaining GC objects. */
+void lj_gc_freeall(global_State *g)
+{
+ MSize i, strmask;
+ /* Free everything, except super-fixed objects (the main thread). */
+ g->gc.currentwhite = LJ_GC_WHITES | LJ_GC_SFIXED;
+ gc_fullsweep(g, &g->gc.root);
+ strmask = g->strmask;
+ for (i = 0; i <= strmask; i++) /* Free all string hash chains. */
+ gc_fullsweep(g, &g->strhash[i]);
+}
+
+/* -- Collector ----------------------------------------------------------- */
+
+/* Atomic part of the GC cycle, transitioning from mark to sweep phase. */
+static void atomic(global_State *g, lua_State *L)
+{
+ size_t udsize;
+
+ gc_mark_uv(g); /* Need to remark open upvalues (the thread may be dead). */
+ gc_propagate_gray(g); /* Propagate any left-overs. */
+
+ setgcrefr(g->gc.gray, g->gc.weak); /* Empty the list of weak tables. */
+ setgcrefnull(g->gc.weak);
+ lua_assert(!iswhite(obj2gco(mainthread(g))));
+ gc_markobj(g, L); /* Mark running thread. */
+ gc_mark_curtrace(g); /* Mark current trace. */
+ gc_mark_basemt(g); /* Mark base metatables (again). */
+ gc_propagate_gray(g); /* Propagate all of the above. */
+
+ setgcrefr(g->gc.gray, g->gc.grayagain); /* Empty the 2nd chance list. */
+ setgcrefnull(g->gc.grayagain);
+ gc_propagate_gray(g); /* Propagate it. */
+
+ udsize = lj_gc_separateudata(g, 0); /* Separate userdata to be finalized. */
+ gc_mark_mmudata(g); /* Mark them. */
+ udsize += gc_propagate_gray(g); /* And propagate the marks. */
+
+ /* All marking done, clear weak tables. */
+ gc_clearweak(gcref(g->gc.weak));
+
+ /* Prepare for sweep phase. */
+ g->gc.currentwhite = cast_byte(otherwhite(g)); /* Flip current white. */
+ g->gc.sweepstr = 0;
+ g->gc.sweep = &g->gc.root;
+ g->gc.state = GCSsweepstring;
+ g->gc.estimate = g->gc.total - (MSize)udsize; /* Initial estimate. */
+}
+
+/* GC state machine. Returns a cost estimate for each step performed. */
+static size_t gc_onestep(lua_State *L)
+{
+ global_State *g = G(L);
+ switch (g->gc.state) {
+ case GCSpause:
+ gc_mark_start(g); /* Start a new GC cycle by marking all GC roots. */
+ return 0;
+ case GCSpropagate:
+ if (gcref(g->gc.gray) != NULL)
+ return propagatemark(g); /* Propagate one gray object. */
+ atomic(g, L); /* End of mark phase. */
+ return 0;
+ case GCSsweepstring: {
+ MSize old = g->gc.total;
+ gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */
+ if (g->gc.sweepstr > g->strmask)
+ g->gc.state = GCSsweep; /* All string hash chains sweeped. */
+ lua_assert(old >= g->gc.total);
+ g->gc.estimate -= old - g->gc.total;
+ return GCSWEEPCOST;
+ }
+ case GCSsweep: {
+ MSize old = g->gc.total;
+ g->gc.sweep = gc_sweep(g, g->gc.sweep, GCSWEEPMAX); /* Partial sweep. */
+ if (gcref(*g->gc.sweep) == NULL) {
+ gc_shrink(g, L);
+ g->gc.state = GCSfinalize; /* End of sweep phase. */
+ }
+ lua_assert(old >= g->gc.total);
+ g->gc.estimate -= old - g->gc.total;
+ return GCSWEEPMAX*GCSWEEPCOST;
+ }
+ case GCSfinalize:
+ if (gcref(g->gc.mmudata) != NULL) {
+ gc_finalize(L); /* Finalize one userdata object. */
+ if (g->gc.estimate > GCFINALIZECOST)
+ g->gc.estimate -= GCFINALIZECOST;
+ return GCFINALIZECOST;
+ }
+ g->gc.state = GCSpause; /* End of GC cycle. */
+ g->gc.debt = 0;
+ return 0;
+ default:
+ lua_assert(0);
+ return 0;
+ }
+}
+
+/* Perform a limited amount of incremental GC steps. */
+int lj_gc_step(lua_State *L)
+{
+ global_State *g = G(L);
+ MSize lim;
+ int32_t ostate = g->vmstate;
+ setvmstate(g, GC);
+ lim = (GCSTEPSIZE/100) * g->gc.stepmul;
+ if (lim == 0)
+ lim = LJ_MAX_MEM;
+ g->gc.debt += g->gc.total - g->gc.threshold;
+ do {
+ lim -= (MSize)gc_onestep(L);
+ if (g->gc.state == GCSpause) {
+ lua_assert(g->gc.total >= g->gc.estimate);
+ g->gc.threshold = (g->gc.estimate/100) * g->gc.pause;
+ g->vmstate = ostate;
+ return 1; /* Finished a GC cycle. */
+ }
+ } while ((int32_t)lim > 0);
+ if (g->gc.debt < GCSTEPSIZE) {
+ g->gc.threshold = g->gc.total + GCSTEPSIZE;
+ } else {
+ g->gc.debt -= GCSTEPSIZE;
+ g->gc.threshold = g->gc.total;
+ }
+ g->vmstate = ostate;
+ return 0;
+}
+
+/* Ditto, but fix the stack top first. */
+void lj_gc_step_fixtop(lua_State *L)
+{
+ if (curr_funcisL(L)) L->top = curr_topL(L);
+ lj_gc_step(L);
+}
+
+/* Perform multiple GC steps. Called from JIT-compiled code. */
+void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps)
+{
+ cframe_pc(cframe_raw(L->cframe)) = pc;
+ L->top = curr_topL(L);
+ while (steps-- > 0 && lj_gc_step(L) == 0)
+ ;
+}
+
+/* Perform a full GC cycle. */
+void lj_gc_fullgc(lua_State *L)
+{
+ global_State *g = G(L);
+ int32_t ostate = g->vmstate;
+ setvmstate(g, GC);
+ if (g->gc.state <= GCSpropagate) { /* Caught somewhere in the middle. */
+ g->gc.sweepstr = 0;
+ g->gc.sweep = &g->gc.root; /* Sweep everything (preserving it). */
+ setgcrefnull(g->gc.gray); /* Reset lists from partial propagation. */
+ setgcrefnull(g->gc.grayagain);
+ setgcrefnull(g->gc.weak);
+ g->gc.state = GCSsweepstring; /* Fast forward to the sweep phase. */
+ }
+ lua_assert(g->gc.state != GCSpause && g->gc.state != GCSpropagate);
+ while (g->gc.state != GCSfinalize) { /* Finish sweep. */
+ lua_assert(g->gc.state == GCSsweepstring || g->gc.state == GCSsweep);
+ gc_onestep(L);
+ }
+ /* Now perform a full GC. */
+ gc_mark_start(g);
+ while (g->gc.state != GCSpause)
+ gc_onestep(L);
+ g->gc.threshold = (g->gc.estimate/100) * g->gc.pause;
+ g->vmstate = ostate;
+}
+
+/* -- Write barriers ------------------------------------------------------ */
+
+/* Move the GC propagation frontier back for tables (make it gray again). */
+void lj_gc_barrierback(global_State *g, GCtab *t)
+{
+ GCobj *o = obj2gco(t);
+ lua_assert(isblack(o) && !isdead(g, o));
+ lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause);
+ black2gray(o);
+ setgcrefr(t->gclist, g->gc.grayagain);
+ setgcref(g->gc.grayagain, o);
+}
+
+/* Move the GC propagation frontier forward. */
+void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v)
+{
+ lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o));
+ lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause);
+ lua_assert(o->gch.gct != ~LJ_TTAB);
+ /* Preserve invariant during propagation. Otherwise it doesn't matter. */
+ if (g->gc.state == GCSpropagate)
+ gc_mark(g, v); /* Move frontier forward. */
+ else
+ makewhite(g, o); /* Make it white to avoid the following barrier. */
+}
+
+/* The reason for duplicating this is that it needs to be visible from ASM. */
+void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v)
+{
+ lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o));
+ lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause);
+ lua_assert(o->gch.gct == ~LJ_TUPVAL);
+ /* Preserve invariant during propagation. Otherwise it doesn't matter. */
+ if (g->gc.state == GCSpropagate)
+ gc_mark(g, v); /* Move frontier forward. */
+ else
+ makewhite(g, o); /* Make it white to avoid the following barrier. */
+}
+
+/* Close upvalue. Also needs a write barrier. */
+void lj_gc_closeuv(global_State *g, GCupval *uv)
+{
+ GCobj *o = obj2gco(uv);
+ /* Copy stack slot to upvalue itself and point to the copy. */
+ copyTV(mainthread(g), &uv->tv, uv->v);
+ uv->v = &uv->tv;
+ uv->closed = 1;
+ setgcrefr(o->gch.nextgc, g->gc.root);
+ setgcref(g->gc.root, o);
+ if (isgray(o)) { /* A closed upvalue is never gray, so fix this. */
+ if (g->gc.state == GCSpropagate) {
+ gray2black(o); /* Make it black and preserve invariant. */
+ if (tviswhite(uv->v))
+ lj_gc_barrierf(g, o, gcV(uv->v));
+ } else {
+ makewhite(g, o); /* Make it white, i.e. sweep the upvalue. */
+ lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause);
+ }
+ }
+}
+
+#if LJ_HASJIT
+/* Mark a trace if it's saved during the propagation phase. */
+void lj_gc_barriertrace(global_State *g, void *T)
+{
+ if (g->gc.state == GCSpropagate)
+ gc_traverse_trace(g, (Trace *)T);
+}
+#endif
+
+/* -- Allocator ----------------------------------------------------------- */
+
+/* Call pluggable memory allocator to allocate or resize a fragment. */
+void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz)
+{
+ global_State *g = G(L);
+ lua_assert((osz == 0) == (p == NULL));
+ p = g->allocf(g->allocd, p, osz, nsz);
+ if (p == NULL && nsz > 0)
+ lj_err_throw(L, LUA_ERRMEM);
+ lua_assert((nsz == 0) == (p == NULL));
+ g->gc.total = (g->gc.total - osz) + nsz;
+ return p;
+}
+
+/* Allocate new GC object and link it to the root set. */
+void *lj_mem_newgco(lua_State *L, MSize size)
+{
+ global_State *g = G(L);
+ GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size);
+ if (o == NULL)
+ lj_err_throw(L, LUA_ERRMEM);
+ g->gc.total += size;
+ setgcrefr(o->gch.nextgc, g->gc.root);
+ setgcref(g->gc.root, o);
+ newwhite(g, o);
+ return o;
+}
+
+/* Resize growable vector. */
+void *lj_mem_grow(lua_State *L, void *p, MSize *szp, MSize lim, MSize esz)
+{
+ MSize sz = (*szp) << 1;
+ if (sz < LJ_MIN_VECSZ)
+ sz = LJ_MIN_VECSZ;
+ if (sz > lim)
+ sz = lim;
+ p = lj_mem_realloc(L, p, (*szp)*esz, sz*esz);
+ *szp = sz;
+ return p;
+}
+
diff --git a/src/lj_gc.h b/src/lj_gc.h
new file mode 100644
index 00000000..192066d3
--- /dev/null
+++ b/src/lj_gc.h
@@ -0,0 +1,102 @@
+/*
+** Garbage collector.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_GC_H
+#define _LJ_GC_H
+
+#include "lj_obj.h"
+
+/* Garbage collector states. Order matters. */
+enum { GCSpause, GCSpropagate, GCSsweepstring, GCSsweep, GCSfinalize };
+
+/* Bitmasks for marked field of GCobj. */
+#define LJ_GC_WHITE0 0x01
+#define LJ_GC_WHITE1 0x02
+#define LJ_GC_BLACK 0x04
+#define LJ_GC_FINALIZED 0x08
+#define LJ_GC_WEAKKEY 0x08
+#define LJ_GC_WEAKVAL 0x10
+#define LJ_GC_FIXED 0x20
+#define LJ_GC_SFIXED 0x40
+
+#define LJ_GC_WHITES (LJ_GC_WHITE0 | LJ_GC_WHITE1)
+#define LJ_GC_COLORS (LJ_GC_WHITES | LJ_GC_BLACK)
+#define LJ_GC_WEAK (LJ_GC_WEAKKEY | LJ_GC_WEAKVAL)
+
+/* Macros to test and set GCobj colors. */
+#define iswhite(x) ((x)->gch.marked & LJ_GC_WHITES)
+#define isblack(x) ((x)->gch.marked & LJ_GC_BLACK)
+#define isgray(x) (!((x)->gch.marked & (LJ_GC_BLACK|LJ_GC_WHITES)))
+#define tviswhite(x) (tvisgcv(x) && iswhite(gcV(x)))
+#define otherwhite(g) (g->gc.currentwhite ^ LJ_GC_WHITES)
+#define isdead(g, v) ((v)->gch.marked & otherwhite(g) & LJ_GC_WHITES)
+
+#define curwhite(g) ((g)->gc.currentwhite & LJ_GC_WHITES)
+#define newwhite(g, x) (obj2gco(x)->gch.marked = (uint8_t)curwhite(g))
+#define flipwhite(x) ((x)->gch.marked ^= LJ_GC_WHITES)
+#define fixstring(s) ((s)->marked |= LJ_GC_FIXED)
+
+/* Collector. */
+LJ_FUNC size_t lj_gc_separateudata(global_State *g, int all);
+LJ_FUNC void lj_gc_finalizeudata(lua_State *L);
+LJ_FUNC void lj_gc_freeall(global_State *g);
+LJ_FUNCA int lj_gc_step(lua_State *L);
+LJ_FUNCA void lj_gc_step_fixtop(lua_State *L);
+LJ_FUNCA void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps);
+LJ_FUNC void lj_gc_fullgc(lua_State *L);
+
+/* GC check: drive collector forward if the GC threshold has been reached. */
+#define lj_gc_check(L) \
+ { if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) \
+ lj_gc_step(L); }
+#define lj_gc_check_fixtop(L) \
+ { if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) \
+ lj_gc_step_fixtop(L); }
+
+/* Write barriers. */
+LJ_FUNC void lj_gc_barrierback(global_State *g, GCtab *t);
+LJ_FUNC void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v);
+LJ_FUNCA void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v);
+LJ_FUNC void lj_gc_closeuv(global_State *g, GCupval *uv);
+LJ_FUNC void lj_gc_barriertrace(global_State *g, void *T);
+
+/* Barrier for stores to table objects. TValue and GCobj variant. */
+#define lj_gc_barriert(L, t, tv) \
+ { if (tviswhite(tv) && isblack(obj2gco(t))) \
+ lj_gc_barrierback(G(L), (t)); }
+#define lj_gc_objbarriert(L, t, o) \
+ { if (iswhite(obj2gco(o)) && isblack(obj2gco(t))) \
+ lj_gc_barrierback(G(L), (t)); }
+
+/* Barrier for stores to any other object. TValue and GCobj variant. */
+#define lj_gc_barrier(L, p, tv) \
+ { if (tviswhite(tv) && isblack(obj2gco(p))) \
+ lj_gc_barrierf(G(L), obj2gco(p), gcV(tv)); }
+#define lj_gc_objbarrier(L, p, o) \
+ { if (iswhite(obj2gco(o)) && isblack(obj2gco(p))) \
+ lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); }
+
+/* Allocator. */
+LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz);
+LJ_FUNC void *lj_mem_newgco(lua_State *L, MSize size);
+LJ_FUNC void *lj_mem_grow(lua_State *L, void *p,
+ MSize *szp, MSize lim, MSize esz);
+
+#define lj_mem_new(L, s) lj_mem_realloc(L, NULL, 0, (s))
+#define lj_mem_free(g, p, osize) \
+ (g->gc.total -= (MSize)(osize), g->allocf(g->allocd, (p), (osize), 0))
+
+#define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (MSize)((n)*sizeof(t))))
+#define lj_mem_reallocvec(L, p, on, n, t) \
+ ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (MSize)((n)*sizeof(t))))
+#define lj_mem_growvec(L, p, n, m, t) \
+ ((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t)))
+#define lj_mem_freevec(g, p, n, t) lj_mem_free(g, (p), (n)*sizeof(t))
+
+#define lj_mem_newobj(L, t) ((t *)lj_mem_newgco(L, sizeof(t)))
+#define lj_mem_newt(L, s, t) ((t *)lj_mem_new(L, (s)))
+#define lj_mem_freet(g, p) lj_mem_free(g, (p), sizeof(*(p)))
+
+#endif
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c
new file mode 100644
index 00000000..dfec188a
--- /dev/null
+++ b/src/lj_gdbjit.c
@@ -0,0 +1,739 @@
+/*
+** Client for the GDB JIT API.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_gdbjit_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+#if LJ_HASJIT
+
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_frame.h"
+#include "lj_jit.h"
+#include "lj_dispatch.h"
+
+/* This is not compiled in by default.
+** Enable with -DLUAJIT_USE_GDBJIT in the Makefile and recompile everything.
+*/
+#ifdef LUAJIT_USE_GDBJIT
+
+/* The GDB JIT API allows JIT compilers to pass debug information about
+** JIT-compiled code back to GDB. You need at least GDB 7.0 or higher
+** to see it in action.
+**
+** This is a passive API, so it works even when not running under GDB
+** or when attaching to an already running process. Alas, this implies
+** enabling it always has a non-negligible overhead -- do not use in
+** release mode!
+**
+** The LuaJIT GDB JIT client is rather minimal at the moment. It gives
+** each trace a symbol name and adds a source location and frame unwind
+** information. Obviously LuaJIT itself and any embedding C application
+** should be compiled with debug symbols, too (see the Makefile).
+**
+** Traces are named TRACE_1, TRACE_2, ... these correspond to the trace
+** numbers from -jv or -jdump. Use "break TRACE_1" or "tbreak TRACE_1" etc.
+** to set breakpoints on specific traces (even ahead of their creation).
+**
+** The source location for each trace allows listing the corresponding
+** source lines with the GDB command "list" (but only if the Lua source
+** has been loaded from a file). Currently this is always set to the
+** location where the trace has been started.
+**
+** Frame unwind information can be inspected with the GDB command
+** "info frame". This also allows proper backtraces across JIT-compiled
+** code with the GDB command "bt".
+**
+** You probably want to add the following settings to a .gdbinit file
+** (or add them to ~/.gdbinit):
+** set disassembly-flavor intel
+** set breakpoint pending on
+**
+** Here's a sample GDB session:
+** ------------------------------------------------------------------------
+
+$ cat >x.lua
+for outer=1,100 do
+ for inner=1,100 do end
+end
+^D
+
+$ luajit -jv x.lua
+[TRACE 1 x.lua:2]
+[TRACE 2 (1/3) x.lua:1 -> 1]
+
+$ gdb --quiet --args luajit x.lua
+(gdb) tbreak TRACE_1
+Function "TRACE_1" not defined.
+Temporary breakpoint 1 (TRACE_1) pending.
+(gdb) run
+Starting program: luajit x.lua
+
+Temporary breakpoint 1, TRACE_1 () at x.lua:2
+2 for inner=1,100 do end
+(gdb) list
+1 for outer=1,100 do
+2 for inner=1,100 do end
+3 end
+(gdb) bt
+#0 TRACE_1 () at x.lua:2
+#1 0x08053690 in lua_pcall [...]
+[...]
+#7 0x0806ff90 in main [...]
+(gdb) disass TRACE_1
+Dump of assembler code for function TRACE_1:
+0xf7fd9fba <TRACE_1+0>: mov DWORD PTR ds:0xf7e0e2a0,0x1
+0xf7fd9fc4 <TRACE_1+10>: movsd xmm7,QWORD PTR [edx+0x20]
+[...]
+0xf7fd9ff8 <TRACE_1+62>: jmp 0xf7fd2014
+End of assembler dump.
+(gdb) tbreak TRACE_2
+Function "TRACE_2" not defined.
+Temporary breakpoint 2 (TRACE_2) pending.
+(gdb) cont
+Continuing.
+
+Temporary breakpoint 2, TRACE_2 () at x.lua:1
+1 for outer=1,100 do
+(gdb) info frame
+Stack level 0, frame at 0xffffd7c0:
+ eip = 0xf7fd9f60 in TRACE_2 (x.lua:1); saved eip 0x8053690
+ called by frame at 0xffffd7e0
+ source language unknown.
+ Arglist at 0xffffd78c, args:
+ Locals at 0xffffd78c, Previous frame's sp is 0xffffd7c0
+ Saved registers:
+ ebx at 0xffffd7ac, ebp at 0xffffd7b8, esi at 0xffffd7b0, edi at 0xffffd7b4,
+ eip at 0xffffd7bc
+(gdb)
+
+** ------------------------------------------------------------------------
+*/
+
+/* -- GDB JIT API --------------------------------------------------------- */
+
+/* GDB JIT actions. */
+enum {
+ GDBJIT_NOACTION = 0,
+ GDBJIT_REGISTER,
+ GDBJIT_UNREGISTER
+};
+
+/* GDB JIT entry. */
+typedef struct GDBJITentry {
+ struct GDBJITentry *next_entry;
+ struct GDBJITentry *prev_entry;
+ const char *symfile_addr;
+ uint64_t symfile_size;
+} GDBJITentry;
+
+/* GDB JIT descriptor. */
+typedef struct GDBJITdesc {
+ uint32_t version;
+ uint32_t action_flag;
+ GDBJITentry *relevant_entry;
+ GDBJITentry *first_entry;
+} GDBJITdesc;
+
+GDBJITdesc __jit_debug_descriptor = {
+ 1, GDBJIT_NOACTION, NULL, NULL
+};
+
+/* GDB sets a breakpoint at this function. */
+void LJ_NOINLINE __jit_debug_register_code()
+{
+ __asm__ __volatile__("");
+};
+
+/* -- In-memory ELF object definitions ------------------------------------ */
+
+/* ELF definitions. */
+typedef struct ELFheader {
+ uint8_t emagic[4];
+ uint8_t eclass;
+ uint8_t eendian;
+ uint8_t eversion;
+ uint8_t eosabi;
+ uint8_t eabiversion;
+ uint8_t epad[7];
+ uint16_t type;
+ uint16_t machine;
+ uint32_t version;
+ uintptr_t entry;
+ uintptr_t phofs;
+ uintptr_t shofs;
+ uint32_t flags;
+ uint16_t ehsize;
+ uint16_t phentsize;
+ uint16_t phnum;
+ uint16_t shentsize;
+ uint16_t shnum;
+ uint16_t shstridx;
+} ELFheader;
+
+typedef struct ELFsectheader {
+ uint32_t name;
+ uint32_t type;
+ uintptr_t flags;
+ uintptr_t addr;
+ uintptr_t ofs;
+ uintptr_t size;
+ uint32_t link;
+ uint32_t info;
+ uintptr_t align;
+ uintptr_t entsize;
+} ELFsectheader;
+
+#define ELFSECT_IDX_ABS 0xfff1
+
+enum {
+ ELFSECT_TYPE_PROGBITS = 1,
+ ELFSECT_TYPE_SYMTAB = 2,
+ ELFSECT_TYPE_STRTAB = 3,
+ ELFSECT_TYPE_NOBITS = 8
+};
+
+#define ELFSECT_FLAGS_WRITE 1
+#define ELFSECT_FLAGS_ALLOC 2
+#define ELFSECT_FLAGS_EXEC 4
+
+typedef struct ELFsymbol {
+#if LJ_64
+ uint32_t name;
+ uint8_t info;
+ uint8_t other;
+ uint16_t sectidx;
+ uintptr_t value;
+ uint64_t size;
+#else
+ uint32_t name;
+ uintptr_t value;
+ uint32_t size;
+ uint8_t info;
+ uint8_t other;
+ uint16_t sectidx;
+#endif
+} ELFsymbol;
+
+enum {
+ ELFSYM_TYPE_FUNC = 2,
+ ELFSYM_TYPE_FILE = 4,
+ ELFSYM_BIND_LOCAL = 0 << 4,
+ ELFSYM_BIND_GLOBAL = 1 << 4,
+};
+
+/* DWARF definitions. */
+#define DW_CIE_VERSION 1
+
+enum {
+ DW_CFA_nop = 0x0,
+ DW_CFA_def_cfa = 0xc,
+ DW_CFA_def_cfa_offset = 0xe,
+ DW_CFA_advance_loc = 0x40,
+ DW_CFA_offset = 0x80
+};
+
+enum {
+ DW_EH_PE_udata4 = 3,
+ DW_EH_PE_textrel = 0x20
+};
+
+enum {
+ DW_TAG_compile_unit = 0x11
+};
+
+enum {
+ DW_children_no = 0,
+ DW_children_yes = 1
+};
+
+enum {
+ DW_AT_name = 0x03,
+ DW_AT_stmt_list = 0x10,
+ DW_AT_low_pc = 0x11,
+ DW_AT_high_pc = 0x12
+};
+
+enum {
+ DW_FORM_addr = 0x01,
+ DW_FORM_data4 = 0x06,
+ DW_FORM_string = 0x08
+};
+
+enum {
+ DW_LNS_extended_op = 0,
+ DW_LNS_copy = 1,
+ DW_LNS_advance_pc = 2,
+ DW_LNS_advance_line = 3
+};
+
+enum {
+ DW_LNE_end_sequence = 1,
+ DW_LNE_set_address = 2
+};
+
+enum {
+#if LJ_TARGET_X86
+ DW_REG_AX, DW_REG_CX, DW_REG_DX, DW_REG_BX,
+ DW_REG_SP, DW_REG_BP, DW_REG_SI, DW_REG_DI,
+ DW_REG_RA,
+#elif LJ_TARGET_X64
+ /* Yes, the order is strange, but correct. */
+ DW_REG_AX, DW_REG_DX, DW_REG_CX, DW_REG_BX,
+ DW_REG_SI, DW_REG_DI, DW_REG_BP, DW_REG_SP,
+ DW_REG_8, DW_REG_9, DW_REG_10, DW_REG_11,
+ DW_REG_12, DW_REG_13, DW_REG_14, DW_REG_15,
+ DW_REG_RA,
+#else
+#error "Unsupported target architecture"
+#endif
+};
+
+/* Minimal list of sections for the in-memory ELF object. */
+enum {
+ GDBJIT_SECT_NULL,
+ GDBJIT_SECT_text,
+ GDBJIT_SECT_eh_frame,
+ GDBJIT_SECT_shstrtab,
+ GDBJIT_SECT_strtab,
+ GDBJIT_SECT_symtab,
+ GDBJIT_SECT_debug_info,
+ GDBJIT_SECT_debug_abbrev,
+ GDBJIT_SECT_debug_line,
+ GDBJIT_SECT__MAX
+};
+
+enum {
+ GDBJIT_SYM_UNDEF,
+ GDBJIT_SYM_FILE,
+ GDBJIT_SYM_FUNC,
+ GDBJIT_SYM__MAX
+};
+
+/* In-memory ELF object. */
+typedef struct GDBJITobj {
+ ELFheader hdr; /* ELF header. */
+ ELFsectheader sect[GDBJIT_SECT__MAX]; /* ELF sections. */
+ ELFsymbol sym[GDBJIT_SYM__MAX]; /* ELF symbol table. */
+ uint8_t space[4096]; /* Space for various section data. */
+} GDBJITobj;
+
+/* Combined structure for GDB JIT entry and ELF object. */
+typedef struct GDBJITentryobj {
+ GDBJITentry entry;
+ size_t sz;
+ GDBJITobj obj;
+} GDBJITentryobj;
+
+/* Template for in-memory ELF header. */
+static const ELFheader elfhdr_template = {
+ .emagic = { 0x7f, 'E', 'L', 'F' },
+ .eclass = LJ_64 ? 2 : 1,
+ .eendian = LJ_ENDIAN_SELECT(1, 2),
+ .eversion = 1,
+#if defined(__linux__)
+ .eosabi = 0, /* Nope, it's not 3. */
+#elif defined(__FreeBSD__)
+ .eosabi = 9,
+#elif defined(__NetBSD__)
+ .eosabi = 2,
+#elif defined(__OpenBSD__)
+ .eosabi = 12,
+#elif defined(__solaris__)
+ .eosabi = 6,
+#else
+ .eosabi = 0,
+#endif
+ .eabiversion = 0,
+ .epad = { 0, 0, 0, 0, 0, 0, 0 },
+ .type = 1,
+#if LJ_TARGET_X86
+ .machine = 3,
+#elif LJ_TARGET_X64
+ .machine = 62,
+#else
+#error "Unsupported target architecture"
+#endif
+ .version = 1,
+ .entry = 0,
+ .phofs = 0,
+ .shofs = offsetof(GDBJITobj, sect),
+ .flags = 0,
+ .ehsize = sizeof(ELFheader),
+ .phentsize = 0,
+ .phnum = 0,
+ .shentsize = sizeof(ELFsectheader),
+ .shnum = GDBJIT_SECT__MAX,
+ .shstridx = GDBJIT_SECT_shstrtab
+};
+
+/* -- In-memory ELF object generation ------------------------------------- */
+
+/* Context for generating the ELF object for the GDB JIT API. */
+typedef struct GDBJITctx {
+ uint8_t *p; /* Pointer to next address in obj.space. */
+ uint8_t *startp; /* Pointer to start address in obj.space. */
+ Trace *T; /* Generate symbols for this trace. */
+ uintptr_t mcaddr; /* Machine code address. */
+ MSize szmcode; /* Size of machine code. */
+ MSize spadjp; /* Stack adjustment for parent trace or interpreter. */
+ MSize spadj; /* Stack adjustment for trace itself. */
+ BCLine lineno; /* Starting line number. */
+ const char *filename; /* Starting file name. */
+ const char *trname; /* Name of trace. */
+ size_t objsize; /* Final size of ELF object. */
+ GDBJITobj obj; /* In-memory ELF object. */
+} GDBJITctx;
+
+/* Add a zero-terminated string. */
+static uint32_t gdbjit_strz(GDBJITctx *ctx, const char *str)
+{
+ uint8_t *p = ctx->p;
+ uint32_t ofs = (uint32_t)(p - ctx->startp);
+ do {
+ *p++ = (uint8_t)*str;
+ } while (*str++);
+ ctx->p = p;
+ return ofs;
+}
+
+/* Add a ULEB128 value. */
+static void gdbjit_uleb128(GDBJITctx *ctx, uint32_t v)
+{
+ uint8_t *p = ctx->p;
+ for (; v >= 0x80; v >>= 7)
+ *p++ = (uint8_t)((v & 0x7f) | 0x80);
+ *p++ = (uint8_t)v;
+ ctx->p = p;
+}
+
+/* Add a SLEB128 value. */
+static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
+{
+ uint8_t *p = ctx->p;
+ for (; (uint32_t)(v+0x40) >= 0x80; v >>= 7)
+ *p++ = (uint8_t)((v & 0x7f) | 0x80);
+ *p++ = (uint8_t)(v & 0x7f);
+ ctx->p = p;
+}
+
+/* Shortcuts to generate DWARF structures. */
+#define DB(x) (*p++ = (x))
+#define DI8(x) (*(int8_t *)p = (x), p++)
+#define DU16(x) (*(uint16_t *)p = (x), p += 2)
+#define DU32(x) (*(uint32_t *)p = (x), p += 4)
+#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t))
+#define DUV(x) (ctx->p = p, gdbjit_uleb128(ctx, (x)), p = ctx->p)
+#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p)
+#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p)
+#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop
+#define DSECT(name, stmt) \
+ { uint32_t *szp_##name = (uint32_t *)p; p += 4; stmt \
+ *szp_##name = (uint32_t)((p-(uint8_t *)szp_##name)-4); } \
+
+/* Initialize ELF section headers. */
+static void LJ_FASTCALL gdbjit_secthdr(GDBJITctx *ctx)
+{
+ ELFsectheader *sect;
+
+ *ctx->p++ = '\0'; /* Empty string at start of string table. */
+
+#define SECTDEF(id, tp, al) \
+ sect = &ctx->obj.sect[GDBJIT_SECT_##id]; \
+ sect->name = gdbjit_strz(ctx, "." #id); \
+ sect->type = ELFSECT_TYPE_##tp; \
+ sect->align = (al)
+
+ SECTDEF(text, NOBITS, 16);
+ sect->flags = ELFSECT_FLAGS_ALLOC|ELFSECT_FLAGS_EXEC;
+ sect->addr = ctx->mcaddr;
+ sect->ofs = 0;
+ sect->size = ctx->szmcode;
+
+ SECTDEF(eh_frame, PROGBITS, sizeof(uintptr_t));
+ sect->flags = ELFSECT_FLAGS_ALLOC;
+
+ SECTDEF(shstrtab, STRTAB, 1);
+ SECTDEF(strtab, STRTAB, 1);
+
+ SECTDEF(symtab, SYMTAB, sizeof(uintptr_t));
+ sect->ofs = offsetof(GDBJITobj, sym);
+ sect->size = sizeof(ctx->obj.sym);
+ sect->link = GDBJIT_SECT_strtab;
+ sect->entsize = sizeof(ELFsymbol);
+ sect->info = GDBJIT_SYM_FUNC;
+
+ SECTDEF(debug_info, PROGBITS, 1);
+ SECTDEF(debug_abbrev, PROGBITS, 1);
+ SECTDEF(debug_line, PROGBITS, 1);
+
+#undef SECTDEF
+}
+
+/* Initialize symbol table. */
+static void LJ_FASTCALL gdbjit_symtab(GDBJITctx *ctx)
+{
+ ELFsymbol *sym;
+
+ *ctx->p++ = '\0'; /* Empty string at start of string table. */
+
+ sym = &ctx->obj.sym[GDBJIT_SYM_FILE];
+ sym->name = gdbjit_strz(ctx, "JIT mcode");
+ sym->sectidx = ELFSECT_IDX_ABS;
+ sym->info = ELFSYM_TYPE_FILE|ELFSYM_BIND_LOCAL;
+
+ sym = &ctx->obj.sym[GDBJIT_SYM_FUNC];
+ sym->name = gdbjit_strz(ctx, ctx->trname);
+ sym->sectidx = GDBJIT_SECT_text;
+ sym->value = 0;
+ sym->size = ctx->szmcode;
+ sym->info = ELFSYM_TYPE_FUNC|ELFSYM_BIND_GLOBAL;
+}
+
+/* Initialize .eh_frame section. */
+static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx)
+{
+ uint8_t *p = ctx->p;
+ uint8_t *framep = p;
+
+ /* Emit DWARF EH CIE. */
+ DSECT(CIE,
+ DU32(0); /* Offset to CIE itself. */
+ DB(DW_CIE_VERSION);
+ DSTR("zR"); /* Augmentation. */
+ DUV(1); /* Code alignment factor. */
+ DSV(-(int32_t)sizeof(uintptr_t)); /* Data alignment factor. */
+ DB(DW_REG_RA); /* Return address register. */
+ DB(1); DB(DW_EH_PE_textrel|DW_EH_PE_udata4); /* Augmentation data. */
+ DB(DW_CFA_def_cfa); DUV(DW_REG_SP); DUV(sizeof(uintptr_t));
+ DB(DW_CFA_offset|DW_REG_RA); DUV(1);
+ DALIGNNOP(sizeof(uintptr_t));
+ )
+
+ /* Emit DWARF EH FDE. */
+ DSECT(FDE,
+ DU32((uint32_t)(p-framep)); /* Offset to CIE. */
+ DU32(0); /* Machine code offset relative to .text. */
+ DU32(ctx->szmcode); /* Machine code length. */
+ DB(0); /* Augmentation data. */
+ /* Registers saved in CFRAME. */
+#if LJ_TARGET_X86
+ DB(DW_CFA_offset|DW_REG_BP); DUV(2);
+ DB(DW_CFA_offset|DW_REG_DI); DUV(3);
+ DB(DW_CFA_offset|DW_REG_SI); DUV(4);
+ DB(DW_CFA_offset|DW_REG_BX); DUV(5);
+#elif LJ_TARGET_X64
+ /* Add saved registers for x64 CFRAME. */
+#else
+#error "Unsupported target architecture"
+#endif
+ if (ctx->spadjp != ctx->spadj) { /* Parent/interpreter stack frame size. */
+ DB(DW_CFA_def_cfa_offset); DUV(ctx->spadjp);
+ DB(DW_CFA_advance_loc|1); /* Only an approximation. */
+ }
+ DB(DW_CFA_def_cfa_offset); DUV(ctx->spadj); /* Trace stack frame size. */
+ DALIGNNOP(sizeof(uintptr_t));
+ )
+
+ ctx->p = p;
+}
+
+/* Initialize .debug_info section. */
+static void LJ_FASTCALL gdbjit_debuginfo(GDBJITctx *ctx)
+{
+ uint8_t *p = ctx->p;
+
+ DSECT(info,
+ DU16(2); /* DWARF version. */
+ DU32(0); /* Abbrev offset. */
+ DB(sizeof(uintptr_t)); /* Pointer size. */
+
+ DUV(1); /* Abbrev #1: DW_TAG_compile_unit. */
+ DSTR(ctx->filename); /* DW_AT_name. */
+ DADDR(ctx->mcaddr); /* DW_AT_low_pc. */
+ DADDR(ctx->mcaddr + ctx->szmcode); /* DW_AT_high_pc. */
+ DU32(0); /* DW_AT_stmt_list. */
+ )
+
+ ctx->p = p;
+}
+
+/* Initialize .debug_abbrev section. */
+static void LJ_FASTCALL gdbjit_debugabbrev(GDBJITctx *ctx)
+{
+ uint8_t *p = ctx->p;
+
+ /* Abbrev #1: DW_TAG_compile_unit. */
+ DUV(1); DUV(DW_TAG_compile_unit);
+ DB(DW_children_no);
+ DUV(DW_AT_name); DUV(DW_FORM_string);
+ DUV(DW_AT_low_pc); DUV(DW_FORM_addr);
+ DUV(DW_AT_high_pc); DUV(DW_FORM_addr);
+ DUV(DW_AT_stmt_list); DUV(DW_FORM_data4);
+ DB(0); DB(0);
+
+ ctx->p = p;
+}
+
+#define DLNE(op, s) (DB(DW_LNS_extended_op), DUV(1+(s)), DB((op)))
+
+/* Initialize .debug_line section. */
+static void LJ_FASTCALL gdbjit_debugline(GDBJITctx *ctx)
+{
+ uint8_t *p = ctx->p;
+
+ DSECT(line,
+ DU16(2); /* DWARF version. */
+ DSECT(header,
+ DB(1); /* Minimum instruction length. */
+ DB(1); /* is_stmt. */
+ DI8(0); /* Line base for special opcodes. */
+ DB(2); /* Line range for special opcodes. */
+ DB(3+1); /* Opcode base at DW_LNS_advance_line+1. */
+ DB(0); DB(1); DB(1); /* Standard opcode lengths. */
+ /* Directory table. */
+ DB(0);
+ /* File name table. */
+ DSTR(ctx->filename); DUV(0); DUV(0); DUV(0);
+ DB(0);
+ )
+
+ DLNE(DW_LNE_set_address, sizeof(uintptr_t)); DADDR(ctx->mcaddr);
+ if (ctx->lineno) {
+ DB(DW_LNS_advance_line); DSV(ctx->lineno-1);
+ }
+ DB(DW_LNS_copy);
+ DB(DW_LNS_advance_pc); DUV(ctx->szmcode);
+ DLNE(DW_LNE_end_sequence, 0);
+ )
+
+ ctx->p = p;
+}
+
+#undef DLNE
+
+/* Undef shortcuts. */
+#undef DB
+#undef DI8
+#undef DU16
+#undef DU32
+#undef DADDR
+#undef DUV
+#undef DSV
+#undef DSTR
+#undef DALIGNNOP
+#undef DSECT
+
+/* Type of a section initializer callback. */
+typedef void (LJ_FASTCALL *GDBJITinitf)(GDBJITctx *ctx);
+
+/* Call section initializer and set the section offset and size. */
+static void gdbjit_initsect(GDBJITctx *ctx, int sect, GDBJITinitf initf)
+{
+ ctx->startp = ctx->p;
+ ctx->obj.sect[sect].ofs = (uintptr_t)((char *)ctx->p - (char *)&ctx->obj);
+ initf(ctx);
+ ctx->obj.sect[sect].size = (uintptr_t)(ctx->p - ctx->startp);
+}
+
+#define SECTALIGN(p, a) \
+ ((p) = (uint8_t *)(((uintptr_t)(p) + ((a)-1)) & ~(uintptr_t)((a)-1)))
+
+/* Build in-memory ELF object. */
+static void gdbjit_buildobj(GDBJITctx *ctx)
+{
+ GDBJITobj *obj = &ctx->obj;
+ /* Fill in ELF header and clear structures. */
+ memcpy(&obj->hdr, &elfhdr_template, sizeof(ELFheader));
+ memset(&obj->sect, 0, sizeof(ELFsectheader)*GDBJIT_SECT__MAX);
+ memset(&obj->sym, 0, sizeof(ELFsymbol)*GDBJIT_SYM__MAX);
+ /* Initialize sections. */
+ ctx->p = obj->space;
+ gdbjit_initsect(ctx, GDBJIT_SECT_shstrtab, gdbjit_secthdr);
+ gdbjit_initsect(ctx, GDBJIT_SECT_strtab, gdbjit_symtab);
+ gdbjit_initsect(ctx, GDBJIT_SECT_debug_info, gdbjit_debuginfo);
+ gdbjit_initsect(ctx, GDBJIT_SECT_debug_abbrev, gdbjit_debugabbrev);
+ gdbjit_initsect(ctx, GDBJIT_SECT_debug_line, gdbjit_debugline);
+ SECTALIGN(ctx->p, sizeof(uintptr_t));
+ gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame, gdbjit_ehframe);
+ ctx->objsize = (size_t)((char *)ctx->p - (char *)obj);
+ lua_assert(ctx->objsize < sizeof(GDBJITobj));
+}
+
+#undef SECTALIGN
+
+/* -- Interface to GDB JIT API -------------------------------------------- */
+
+/* Add new entry to GDB JIT symbol chain. */
+static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
+{
+ /* Allocate memory for GDB JIT entry and ELF object. */
+ MSize sz = (MSize)(sizeof(GDBJITentryobj) - sizeof(GDBJITobj) + ctx->objsize);
+ GDBJITentryobj *eo = lj_mem_newt(L, sz, GDBJITentryobj);
+ memcpy(&eo->obj, &ctx->obj, ctx->objsize); /* Copy ELF object. */
+ eo->sz = sz;
+ ctx->T->gdbjit_entry = (void *)eo;
+ /* Link new entry to chain and register it. */
+ eo->entry.prev_entry = NULL;
+ eo->entry.next_entry = __jit_debug_descriptor.first_entry;
+ if (eo->entry.next_entry)
+ eo->entry.next_entry->prev_entry = &eo->entry;
+ eo->entry.symfile_addr = (const char *)&eo->obj;
+ eo->entry.symfile_size = ctx->objsize;
+ __jit_debug_descriptor.first_entry = &eo->entry;
+ __jit_debug_descriptor.relevant_entry = &eo->entry;
+ __jit_debug_descriptor.action_flag = GDBJIT_REGISTER;
+ __jit_debug_register_code();
+}
+
+/* Add debug info for newly compiled trace and notify GDB. */
+void lj_gdbjit_addtrace(jit_State *J, Trace *T, TraceNo traceno)
+{
+ GDBJITctx ctx;
+ lua_State *L = J->L;
+ GCproto *pt = &gcref(T->startpt)->pt;
+ TraceNo parent = T->ir[REF_BASE].op1;
+ uintptr_t pcofs = (uintptr_t)(T->snap[0].mapofs+T->snap[0].nslots);
+ const BCIns *startpc = (const BCIns *)(uintptr_t)T->snapmap[pcofs];
+ ctx.T = T;
+ ctx.mcaddr = (uintptr_t)T->mcode;
+ ctx.szmcode = T->szmcode;
+ ctx.spadjp = CFRAME_SIZE + (MSize)(parent ? J->trace[parent]->spadjust : 0);
+ ctx.spadj = CFRAME_SIZE + T->spadjust;
+ ctx.lineno = pt->lineinfo ? pt->lineinfo[startpc - pt->bc] : 0;
+ ctx.filename = strdata(pt->chunkname);
+ if (*ctx.filename == '@' || *ctx.filename == '=')
+ ctx.filename++;
+ else
+ ctx.filename = "(string)";
+ ctx.trname = lj_str_pushf(L, "TRACE_%d", traceno);
+ L->top--;
+ gdbjit_buildobj(&ctx);
+ gdbjit_newentry(L, &ctx);
+}
+
+/* Delete debug info for trace and notify GDB. */
+void lj_gdbjit_deltrace(jit_State *J, Trace *T)
+{
+ GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry;
+ if (eo) {
+ if (eo->entry.prev_entry)
+ eo->entry.prev_entry->next_entry = eo->entry.next_entry;
+ else
+ __jit_debug_descriptor.first_entry = eo->entry.next_entry;
+ if (eo->entry.next_entry)
+ eo->entry.next_entry->prev_entry = eo->entry.prev_entry;
+ __jit_debug_descriptor.relevant_entry = &eo->entry;
+ __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER;
+ __jit_debug_register_code();
+ lj_mem_free(J2G(J), eo, eo->sz);
+ }
+}
+
+#endif
+#endif
diff --git a/src/lj_gdbjit.h b/src/lj_gdbjit.h
new file mode 100644
index 00000000..2221948f
--- /dev/null
+++ b/src/lj_gdbjit.h
@@ -0,0 +1,22 @@
+/*
+** Client for the GDB JIT API.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_GDBJIT_H
+#define _LJ_GDBJIT_H
+
+#include "lj_obj.h"
+#include "lj_jit.h"
+
+#if LJ_HASJIT && defined(LUAJIT_USE_GDBJIT)
+
+LJ_FUNC void lj_gdbjit_addtrace(jit_State *J, Trace *T, TraceNo traceno);
+LJ_FUNC void lj_gdbjit_deltrace(jit_State *J, Trace *T);
+
+#else
+#define lj_gdbjit_addtrace(J, T, tn) UNUSED(T)
+#define lj_gdbjit_deltrace(J, T) UNUSED(T)
+#endif
+
+#endif
diff --git a/src/lj_ir.c b/src/lj_ir.c
new file mode 100644
index 00000000..2ff54821
--- /dev/null
+++ b/src/lj_ir.c
@@ -0,0 +1,461 @@
+/*
+** SSA IR (Intermediate Representation) emitter.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_ir_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+#if LJ_HASJIT
+
+#include "lj_gc.h"
+#include "lj_str.h"
+#include "lj_ir.h"
+#include "lj_jit.h"
+#include "lj_iropt.h"
+#include "lj_trace.h"
+
+/* Some local macros to save typing. Undef'd at the end. */
+#define IR(ref) (&J->cur.ir[(ref)])
+#define fins (&J->fold.ins)
+
+/* Pass IR on to next optimization in chain (FOLD). */
+#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
+
+/* -- IR tables ----------------------------------------------------------- */
+
+/* IR instruction modes. */
+LJ_DATADEF const uint8_t lj_ir_mode[IR__MAX+1] = {
+IRDEF(IRMODE)
+ 0
+};
+
+/* -- IR emitter ---------------------------------------------------------- */
+
+/* Grow IR buffer at the top. */
+void LJ_FASTCALL lj_ir_growtop(jit_State *J)
+{
+ IRIns *baseir = J->irbuf + J->irbotlim;
+ MSize szins = J->irtoplim - J->irbotlim;
+ if (szins) {
+ baseir = (IRIns *)lj_mem_realloc(J->L, baseir, szins*sizeof(IRIns),
+ 2*szins*sizeof(IRIns));
+ J->irtoplim = J->irbotlim + 2*szins;
+ } else {
+ baseir = (IRIns *)lj_mem_realloc(J->L, NULL, 0, LJ_MIN_IRSZ*sizeof(IRIns));
+ J->irbotlim = REF_BASE - LJ_MIN_IRSZ/4;
+ J->irtoplim = J->irbotlim + LJ_MIN_IRSZ;
+ }
+ J->cur.ir = J->irbuf = baseir - J->irbotlim;
+}
+
+/* Grow IR buffer at the bottom or shift it up. */
+static void lj_ir_growbot(jit_State *J)
+{
+ IRIns *baseir = J->irbuf + J->irbotlim;
+ MSize szins = J->irtoplim - J->irbotlim;
+ lua_assert(szins != 0);
+ lua_assert(J->cur.nk == J->irbotlim);
+ if (J->cur.nins + (szins >> 1) < J->irtoplim) {
+ /* More than half of the buffer is free on top: shift up by a quarter. */
+ MSize ofs = szins >> 2;
+ memmove(baseir + ofs, baseir, (J->cur.nins - J->irbotlim)*sizeof(IRIns));
+ J->irbotlim -= ofs;
+ J->irtoplim -= ofs;
+ J->cur.ir = J->irbuf = baseir - J->irbotlim;
+ } else {
+ /* Double the buffer size, but split the growth amongst top/bottom. */
+ IRIns *newbase = lj_mem_newt(J->L, 2*szins*sizeof(IRIns), IRIns);
+ MSize ofs = szins >= 256 ? 128 : (szins >> 1); /* Limit bottom growth. */
+ memcpy(newbase + ofs, baseir, (J->cur.nins - J->irbotlim)*sizeof(IRIns));
+ lj_mem_free(G(J->L), baseir, szins*sizeof(IRIns));
+ J->irbotlim -= ofs;
+ J->irtoplim = J->irbotlim + 2*szins;
+ J->cur.ir = J->irbuf = newbase - J->irbotlim;
+ }
+}
+
+/* Emit IR without any optimizations. */
+TRef LJ_FASTCALL lj_ir_emit(jit_State *J)
+{
+ IRRef ref = lj_ir_nextins(J);
+ IRIns *ir = IR(ref);
+ IROp op = fins->o;
+ ir->prev = J->chain[op];
+ J->chain[op] = (IRRef1)ref;
+ ir->o = op;
+ ir->op1 = fins->op1;
+ ir->op2 = fins->op2;
+ J->guardemit.irt |= fins->t.irt;
+ return TREF(ref, irt_t((ir->t = fins->t)));
+}
+
+/* -- Interning of constants ---------------------------------------------- */
+
+/*
+** IR instructions for constants are kept between J->cur.nk >= ref < REF_BIAS.
+** They are chained like all other instructions, but grow downwards.
+** The are interned (like strings in the VM) to facilitate reference
+** comparisons. The same constant must get the same reference.
+*/
+
+/* Get ref of next IR constant and optionally grow IR.
+** Note: this may invalidate all IRIns *!
+*/
+static LJ_AINLINE IRRef ir_nextk(jit_State *J)
+{
+ IRRef ref = J->cur.nk;
+ if (LJ_UNLIKELY(ref <= J->irbotlim)) lj_ir_growbot(J);
+ J->cur.nk = --ref;
+ return ref;
+}
+
+/* Intern int32_t constant. */
+TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k)
+{
+ IRIns *ir, *cir = J->cur.ir;
+ IRRef ref;
+ for (ref = J->chain[IR_KINT]; ref; ref = cir[ref].prev)
+ if (cir[ref].i == k)
+ goto found;
+ ref = ir_nextk(J);
+ ir = IR(ref);
+ ir->i = k;
+ ir->t.irt = IRT_INT;
+ ir->o = IR_KINT;
+ ir->prev = J->chain[IR_KINT];
+ J->chain[IR_KINT] = (IRRef1)ref;
+found:
+ return TREF(ref, IRT_INT);
+}
+
+/* The MRef inside the KNUM IR instruction holds the address of the constant
+** (an aligned double or a special 64 bit pattern). The KNUM constants
+** themselves are stored in a chained array and shared across traces.
+**
+** Rationale for choosing this data structure:
+** - The address of the constants is embedded in the generated machine code
+** and must never move. A resizable array or hash table wouldn't work.
+** - Most apps need very few non-integer constants (less than a dozen).
+** - Linear search is hard to beat in terms of speed and low complexity.
+*/
+typedef struct KNumArray {
+ MRef next; /* Pointer to next list. */
+ MSize numk; /* Number of used elements in this array. */
+ TValue k[LJ_MIN_KNUMSZ]; /* Array of constants. */
+} KNumArray;
+
+/* Free all chained arrays. */
+void lj_ir_knum_freeall(jit_State *J)
+{
+ KNumArray *kn;
+ for (kn = mref(J->knum, KNumArray); kn; ) {
+ KNumArray *next = mref(kn->next, KNumArray);
+ lj_mem_free(J2G(J), kn, sizeof(KNumArray));
+ kn = next;
+ }
+}
+
+/* Find KNUM constant in chained array or add it. */
+static cTValue *ir_knum_find(jit_State *J, uint64_t nn)
+{
+ KNumArray *kn, *knp = NULL;
+ TValue *ntv;
+ MSize idx;
+ /* Search for the constant in the whole chain of arrays. */
+ for (kn = mref(J->knum, KNumArray); kn; kn = mref(kn->next, KNumArray)) {
+ knp = kn; /* Remember previous element in list. */
+ for (idx = 0; idx < kn->numk; idx++) { /* Search one array. */
+ TValue *tv = &kn->k[idx];
+ if (tv->u64 == nn) /* Needed for +-0/NaN/absmask. */
+ return tv;
+ }
+ }
+ /* Constant was not found, need to add it. */
+ if (!(knp && knp->numk < LJ_MIN_KNUMSZ)) { /* Allocate a new array. */
+ KNumArray *nkn = lj_mem_newt(J->L, sizeof(KNumArray), KNumArray);
+ setmref(nkn->next, NULL);
+ nkn->numk = 0;
+ if (knp)
+ setmref(knp->next, nkn); /* Chain to the end of the list. */
+ else
+ setmref(J->knum, nkn); /* Link first array. */
+ knp = nkn;
+ }
+ ntv = &knp->k[knp->numk++]; /* Add to current array. */
+ ntv->u64 = nn;
+ return ntv;
+}
+
+/* Intern FP constant, given by its address. */
+TRef lj_ir_knum_addr(jit_State *J, cTValue *tv)
+{
+ IRIns *ir, *cir = J->cur.ir;
+ IRRef ref;
+ for (ref = J->chain[IR_KNUM]; ref; ref = cir[ref].prev)
+ if (ir_knum(&cir[ref]) == tv)
+ goto found;
+ ref = ir_nextk(J);
+ ir = IR(ref);
+ setmref(ir->ptr, tv);
+ ir->t.irt = IRT_NUM;
+ ir->o = IR_KNUM;
+ ir->prev = J->chain[IR_KNUM];
+ J->chain[IR_KNUM] = (IRRef1)ref;
+found:
+ return TREF(ref, IRT_NUM);
+}
+
+/* Intern FP constant, given by its 64 bit pattern. */
+TRef lj_ir_knum_nn(jit_State *J, uint64_t nn)
+{
+ return lj_ir_knum_addr(J, ir_knum_find(J, nn));
+}
+
+/* Special 16 byte aligned SIMD constants. */
+LJ_DATADEF LJ_ALIGN(16) cTValue lj_ir_knum_tv[4] = {
+ { U64x(7fffffff,ffffffff) }, { U64x(7fffffff,ffffffff) },
+ { U64x(80000000,00000000) }, { U64x(80000000,00000000) }
+};
+
+/* Check whether a number is int and return it. -0 is NOT considered an int. */
+static int numistrueint(lua_Number n, int32_t *kp)
+{
+ int32_t k = lj_num2int(n);
+ if (n == cast_num(k)) {
+ if (kp) *kp = k;
+ if (k == 0) { /* Special check for -0. */
+ TValue tv;
+ setnumV(&tv, n);
+ if (tv.u32.hi != 0)
+ return 0;
+ }
+ return 1;
+ }
+ return 0;
+}
+
+/* Intern number as int32_t constant if possible, otherwise as FP constant. */
+TRef lj_ir_knumint(jit_State *J, lua_Number n)
+{
+ int32_t k;
+ if (numistrueint(n, &k))
+ return lj_ir_kint(J, k);
+ else
+ return lj_ir_knum(J, n);
+}
+
+/* Intern GC object "constant". */
+TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t)
+{
+ IRIns *ir, *cir = J->cur.ir;
+ IRRef ref;
+ for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev)
+ if (ir_kgc(&cir[ref]) == o)
+ goto found;
+ ref = ir_nextk(J);
+ ir = IR(ref);
+ /* NOBARRIER: Current trace is a GC root. */
+ setgcref(ir->gcr, o);
+ ir->t.irt = (uint8_t)t;
+ ir->o = IR_KGC;
+ ir->prev = J->chain[IR_KGC];
+ J->chain[IR_KGC] = (IRRef1)ref;
+found:
+ return TREF(ref, t);
+}
+
+/* Intern 32 bit pointer constant. */
+TRef lj_ir_kptr(jit_State *J, void *ptr)
+{
+ IRIns *ir, *cir = J->cur.ir;
+ IRRef ref;
+ lua_assert((void *)(intptr_t)i32ptr(ptr) == ptr);
+ for (ref = J->chain[IR_KPTR]; ref; ref = cir[ref].prev)
+ if (mref(cir[ref].ptr, void) == ptr)
+ goto found;
+ ref = ir_nextk(J);
+ ir = IR(ref);
+ setmref(ir->ptr, ptr);
+ ir->t.irt = IRT_PTR;
+ ir->o = IR_KPTR;
+ ir->prev = J->chain[IR_KPTR];
+ J->chain[IR_KPTR] = (IRRef1)ref;
+found:
+ return TREF(ref, IRT_PTR);
+}
+
+/* Intern typed NULL constant. */
+TRef lj_ir_knull(jit_State *J, IRType t)
+{
+ IRIns *ir, *cir = J->cur.ir;
+ IRRef ref;
+ for (ref = J->chain[IR_KNULL]; ref; ref = cir[ref].prev)
+ if (irt_t(cir[ref].t) == t)
+ goto found;
+ ref = ir_nextk(J);
+ ir = IR(ref);
+ ir->i = 0;
+ ir->t.irt = (uint8_t)t;
+ ir->o = IR_KNULL;
+ ir->prev = J->chain[IR_KNULL];
+ J->chain[IR_KNULL] = (IRRef1)ref;
+found:
+ return TREF(ref, t);
+}
+
+/* Intern key slot. */
+TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot)
+{
+ IRIns *ir, *cir = J->cur.ir;
+ IRRef2 op12 = IRREF2((IRRef1)key, (IRRef1)slot);
+ IRRef ref;
+ /* Const part is not touched by CSE/DCE, so 0-65535 is ok for IRMlit here. */
+ lua_assert(tref_isk(key) && slot == (IRRef)(IRRef1)slot);
+ for (ref = J->chain[IR_KSLOT]; ref; ref = cir[ref].prev)
+ if (cir[ref].op12 == op12)
+ goto found;
+ ref = ir_nextk(J);
+ ir = IR(ref);
+ ir->op12 = op12;
+ ir->t.irt = IRT_PTR;
+ ir->o = IR_KSLOT;
+ ir->prev = J->chain[IR_KSLOT];
+ J->chain[IR_KSLOT] = (IRRef1)ref;
+found:
+ return TREF(ref, IRT_PTR);
+}
+
+/* -- Access to IR constants ---------------------------------------------- */
+
+/* Copy value of IR constant. */
+void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir)
+{
+ UNUSED(L);
+ lua_assert(ir->o != IR_KSLOT); /* Common mistake. */
+ if (irt_isint(ir->t)) {
+ lua_assert(ir->o == IR_KINT);
+ setintV(tv, ir->i);
+ } else if (irt_isnum(ir->t)) {
+ lua_assert(ir->o == IR_KNUM);
+ setnumV(tv, ir_knum(ir)->n);
+ } else if (irt_ispri(ir->t)) {
+ lua_assert(ir->o == IR_KPRI);
+ setitype(tv, irt_toitype(ir->t));
+ } else {
+ if (ir->o == IR_KGC) {
+ lua_assert(irt_isgcv(ir->t));
+ setgcV(L, tv, &ir_kgc(ir)->gch, irt_toitype(ir->t));
+ } else {
+ lua_assert(ir->o == IR_KPTR || ir->o == IR_KNULL);
+ setlightudV(tv, mref(ir->ptr, void));
+ }
+ }
+}
+
+/* -- Convert IR operand types -------------------------------------------- */
+
+/* Convert from integer or string to number. */
+TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr)
+{
+ if (!tref_isnum(tr)) {
+ if (tref_isinteger(tr))
+ tr = emitir(IRTN(IR_TONUM), tr, 0);
+ else if (tref_isstr(tr))
+ tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
+ else
+ lj_trace_err(J, LJ_TRERR_BADTYPE);
+ }
+ return tr;
+}
+
+/* Convert from integer or number to string. */
+TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr)
+{
+ if (!tref_isstr(tr)) {
+ if (!tref_isnumber(tr))
+ lj_trace_err(J, LJ_TRERR_BADTYPE);
+ tr = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0);
+ }
+ return tr;
+}
+
+/* Convert from number or string to bitop operand (overflow wrapped). */
+TRef LJ_FASTCALL lj_ir_tobit(jit_State *J, TRef tr)
+{
+ if (!tref_isinteger(tr)) {
+ if (tref_isstr(tr))
+ tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
+ else if (!tref_isnum(tr))
+ lj_trace_err(J, LJ_TRERR_BADTYPE);
+ tr = emitir(IRTI(IR_TOBIT), tr, lj_ir_knum_tobit(J));
+ }
+ return tr;
+}
+
+/* Convert from number or string to integer (overflow undefined). */
+TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr)
+{
+ if (!tref_isinteger(tr)) {
+ if (tref_isstr(tr))
+ tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
+ else if (!tref_isnum(tr))
+ lj_trace_err(J, LJ_TRERR_BADTYPE);
+ tr = emitir(IRTI(IR_TOINT), tr, IRTOINT_ANY);
+ }
+ return tr;
+}
+
+/* -- Miscellaneous IR ops ------------------------------------------------ */
+
+/* Evaluate numeric comparison. */
+int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op)
+{
+ switch (op) {
+ case IR_EQ: return (a == b);
+ case IR_NE: return (a != b);
+ case IR_LT: return (a < b);
+ case IR_GE: return (a >= b);
+ case IR_LE: return (a <= b);
+ case IR_GT: return (a > b);
+ case IR_ULT: return !(a >= b);
+ case IR_UGE: return !(a < b);
+ case IR_ULE: return !(a > b);
+ case IR_UGT: return !(a <= b);
+ default: lua_assert(0); return 0;
+ }
+}
+
+/* Evaluate string comparison. */
+int lj_ir_strcmp(GCstr *a, GCstr *b, IROp op)
+{
+ int res = lj_str_cmp(a, b);
+ switch (op) {
+ case IR_LT: return (res < 0);
+ case IR_GE: return (res >= 0);
+ case IR_LE: return (res <= 0);
+ case IR_GT: return (res > 0);
+ default: lua_assert(0); return 0;
+ }
+}
+
+/* Rollback IR to previous state. */
+void lj_ir_rollback(jit_State *J, IRRef ref)
+{
+ IRRef nins = J->cur.nins;
+ while (nins > ref) {
+ IRIns *ir;
+ nins--;
+ ir = IR(nins);
+ J->chain[ir->o] = ir->prev;
+ }
+ J->cur.nins = nins;
+}
+
+#undef IR
+#undef fins
+#undef emitir
+
+#endif
diff --git a/src/lj_ir.h b/src/lj_ir.h
new file mode 100644
index 00000000..a6973a81
--- /dev/null
+++ b/src/lj_ir.h
@@ -0,0 +1,429 @@
+/*
+** SSA IR (Intermediate Representation) format.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_IR_H
+#define _LJ_IR_H
+
+#include "lj_obj.h"
+
+/* IR instruction definition. Order matters, see below. */
+#define IRDEF(_) \
+ /* Miscellaneous ops. */ \
+ _(NOP, N , ___, ___) \
+ _(BASE, N , lit, lit) \
+ _(LOOP, G , ___, ___) \
+ _(PHI, S , ref, ref) \
+ _(RENAME, S , ref, lit) \
+ \
+ /* Constants. */ \
+ _(KPRI, N , ___, ___) \
+ _(KINT, N , cst, ___) \
+ _(KGC, N , cst, ___) \
+ _(KPTR, N , cst, ___) \
+ _(KNULL, N , cst, ___) \
+ _(KNUM, N , cst, ___) \
+ _(KSLOT, N , ref, lit) \
+ \
+ /* Guarded assertions. */ \
+ /* Must be properly aligned to flip opposites (^1) and (un)ordered (^4). */ \
+ _(EQ, GC, ref, ref) \
+ _(NE, GC, ref, ref) \
+ \
+ _(ABC, G , ref, ref) \
+ _(FRAME, G , ref, ref) \
+ \
+ _(LT, G , ref, ref) \
+ _(GE, G , ref, ref) \
+ _(LE, G , ref, ref) \
+ _(GT, G , ref, ref) \
+ \
+ _(ULT, G , ref, ref) \
+ _(UGE, G , ref, ref) \
+ _(ULE, G , ref, ref) \
+ _(UGT, G , ref, ref) \
+ \
+ /* Bit ops. */ \
+ _(BNOT, N , ref, ___) \
+ _(BSWAP, N , ref, ___) \
+ _(BAND, C , ref, ref) \
+ _(BOR, C , ref, ref) \
+ _(BXOR, C , ref, ref) \
+ _(BSHL, N , ref, ref) \
+ _(BSHR, N , ref, ref) \
+ _(BSAR, N , ref, ref) \
+ _(BROL, N , ref, ref) \
+ _(BROR, N , ref, ref) \
+ \
+ /* Arithmetic ops. ORDER ARITH (FPMATH/POWI take the space for MOD/POW). */ \
+ _(ADD, C , ref, ref) \
+ _(SUB, N , ref, ref) \
+ _(MUL, C , ref, ref) \
+ _(DIV, N , ref, ref) \
+ \
+ _(FPMATH, N , ref, lit) \
+ _(POWI, N , ref, ref) \
+ \
+ _(NEG, N , ref, ref) \
+ _(ABS, N , ref, ref) \
+ _(ATAN2, N , ref, ref) \
+ _(LDEXP, N , ref, ref) \
+ _(MIN, C , ref, ref) \
+ _(MAX, C , ref, ref) \
+ \
+ /* Overflow-checking arithmetic ops. */ \
+ _(ADDOV, GC, ref, ref) \
+ _(SUBOV, G , ref, ref) \
+ \
+ /* Memory ops. A = array, H = hash, U = upvalue, F = field, S = stack. */ \
+ \
+ /* Memory references. */ \
+ _(AREF, R , ref, ref) \
+ _(HREFK, RG, ref, ref) \
+ _(HREF, L , ref, ref) \
+ _(NEWREF, S , ref, ref) \
+ _(UREFO, LG, ref, lit) \
+ _(UREFC, LG, ref, lit) \
+ _(FREF, R , ref, lit) \
+ _(STRREF, N , ref, ref) \
+ \
+ /* Loads and Stores. These must be in the same order. */ \
+ _(ALOAD, LG, ref, ___) \
+ _(HLOAD, LG, ref, ___) \
+ _(ULOAD, LG, ref, ___) \
+ _(FLOAD, L , ref, lit) \
+ _(SLOAD, LG, lit, lit) \
+ _(XLOAD, L , ref, lit) \
+ \
+ _(ASTORE, S , ref, ref) \
+ _(HSTORE, S , ref, ref) \
+ _(USTORE, S , ref, ref) \
+ _(FSTORE, S , ref, ref) \
+ \
+ /* String ops. */ \
+ _(SNEW, N , ref, ref) \
+ \
+ /* Table ops. */ \
+ _(TNEW, A , lit, lit) \
+ _(TDUP, A , ref, ___) \
+ _(TLEN, L , ref, ___) \
+ _(TBAR, S , ref, ___) \
+ _(OBAR, S , ref, ref) \
+ \
+ /* Type conversions. */ \
+ _(TONUM, N , ref, ___) \
+ _(TOINT, N , ref, lit) \
+ _(TOBIT, N , ref, ref) \
+ _(TOSTR, N , ref, ___) \
+ _(STRTO, G , ref, ___) \
+ \
+ /* End of list. */
+
+/* IR opcodes (max. 256). */
+typedef enum {
+#define IRENUM(name, m, m1, m2) IR_##name,
+IRDEF(IRENUM)
+#undef IRENUM
+ IR__MAX
+} IROp;
+
+/* Stored opcode. */
+typedef uint8_t IROp1;
+
+LJ_STATIC_ASSERT(((int)IR_EQ^1) == (int)IR_NE);
+LJ_STATIC_ASSERT(((int)IR_LT^1) == (int)IR_GE);
+LJ_STATIC_ASSERT(((int)IR_LE^1) == (int)IR_GT);
+LJ_STATIC_ASSERT(((int)IR_LT^3) == (int)IR_GT);
+LJ_STATIC_ASSERT(((int)IR_LT^4) == (int)IR_ULT);
+
+/* Delta between xLOAD and xSTORE. */
+#define IRDELTA_L2S ((int)IR_ASTORE - (int)IR_ALOAD)
+
+LJ_STATIC_ASSERT((int)IR_HLOAD + IRDELTA_L2S == (int)IR_HSTORE);
+LJ_STATIC_ASSERT((int)IR_ULOAD + IRDELTA_L2S == (int)IR_USTORE);
+LJ_STATIC_ASSERT((int)IR_FLOAD + IRDELTA_L2S == (int)IR_FSTORE);
+
+/* FPMATH sub-functions. ORDER FPM. */
+#define IRFPMDEF(_) \
+ _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \
+ _(SQRT) _(EXP) _(EXP2) _(LOG) _(LOG2) _(LOG10) \
+ _(SIN) _(COS) _(TAN) \
+ _(OTHER)
+
+typedef enum {
+#define FPMENUM(name) IRFPM_##name,
+IRFPMDEF(FPMENUM)
+#undef FPMENUM
+ IRFPM__MAX
+} IRFPMathOp;
+
+/* FLOAD field IDs. */
+#define IRFLDEF(_) \
+ _(STR_LEN, GCstr, len) \
+ _(FUNC_ENV, GCfunc, l.env) \
+ _(TAB_META, GCtab, metatable) \
+ _(TAB_ARRAY, GCtab, array) \
+ _(TAB_NODE, GCtab, node) \
+ _(TAB_ASIZE, GCtab, asize) \
+ _(TAB_HMASK, GCtab, hmask) \
+ _(TAB_NOMM, GCtab, nomm) \
+ _(UDATA_META, GCudata, metatable)
+
+typedef enum {
+#define FLENUM(name, type, field) IRFL_##name,
+IRFLDEF(FLENUM)
+#undef FLENUM
+ IRFL__MAX
+} IRFieldID;
+
+/* SLOAD mode bits, stored in op2. */
+#define IRSLOAD_INHERIT 1 /* Inherited by exits/side traces. */
+#define IRSLOAD_READONLY 2 /* Read-only, omit slot store. */
+#define IRSLOAD_PARENT 4 /* Coalesce with parent trace. */
+
+/* XLOAD mode, stored in op2. */
+#define IRXLOAD_UNALIGNED 1
+
+/* TOINT mode, stored in op2. Ordered by strength of the checks. */
+#define IRTOINT_CHECK 0 /* Number checked for integerness. */
+#define IRTOINT_INDEX 1 /* Checked + special backprop rules. */
+#define IRTOINT_ANY 2 /* Any FP number is ok. */
+#define IRTOINT_TOBIT 3 /* Cache only: TOBIT conversion. */
+
+/* IR operand mode (2 bit). */
+typedef enum {
+ IRMref, /* IR reference. */
+ IRMlit, /* 16 bit unsigned literal. */
+ IRMcst, /* Constant literal: i, gcr or ptr. */
+ IRMnone /* Unused operand. */
+} IRMode;
+#define IRM___ IRMnone
+
+/* Mode bits: Commutative, {Normal/Ref, Alloc, Load, Store}, Guard. */
+#define IRM_C 0x10
+
+#define IRM_N 0x00
+#define IRM_R IRM_N
+#define IRM_A 0x20
+#define IRM_L 0x40
+#define IRM_S 0x60
+
+#define IRM_G 0x80
+
+#define IRM_GC (IRM_G|IRM_C)
+#define IRM_RG (IRM_R|IRM_G)
+#define IRM_LG (IRM_L|IRM_G)
+
+#define irm_op1(m) (cast(IRMode, (m)&3))
+#define irm_op2(m) (cast(IRMode, ((m)>>2)&3))
+#define irm_iscomm(m) ((m) & IRM_C)
+#define irm_kind(m) ((m) & IRM_S)
+#define irm_isguard(m) ((m) & IRM_G)
+/* Stores or any other op with a guard has a side-effect. */
+#define irm_sideeff(m) ((m) >= IRM_S)
+
+#define IRMODE(name, m, m1, m2) ((IRM##m1)|((IRM##m2)<<2)|(IRM_##m)),
+
+LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
+
+/* IR result type and flags (8 bit). */
+typedef enum {
+ /* Map of itypes to non-negative numbers. ORDER LJ_T */
+ IRT_NIL,
+ IRT_FALSE,
+ IRT_TRUE,
+ IRT_LIGHTUD,
+ /* GCobj types are from here ... */
+ IRT_STR,
+ IRT_PTR, /* IRT_PTR never escapes the IR (map of LJ_TUPVAL). */
+ IRT_THREAD,
+ IRT_PROTO,
+ IRT_FUNC,
+ IRT_9, /* LJ_TDEADKEY is never used in the IR. */
+ IRT_TAB,
+ IRT_UDATA,
+ /* ... until here. */
+ IRT_NUM,
+ /* The various integers are only used in the IR and can only escape to
+ ** a TValue after implicit or explicit conversion (TONUM). Their types
+ ** must be contiguous and next to IRT_NUM (see the typerange macros below).
+ */
+ IRT_INT,
+ IRT_I8,
+ IRT_U8,
+ IRT_I16,
+ IRT_U16,
+ /* There is room for 14 more types. */
+
+ /* Additional flags. */
+ IRT_MARK = 0x20, /* Marker for misc. purposes. */
+ IRT_GUARD = 0x40, /* Instruction is a guard. */
+ IRT_ISPHI = 0x80, /* Instruction is left or right PHI operand. */
+
+ /* Masks. */
+ IRT_TYPE = 0x1f,
+ IRT_T = 0xff
+} IRType;
+
+#define irtype_ispri(irt) ((uint32_t)(irt) <= IRT_TRUE)
+
+/* Stored IRType. */
+typedef struct IRType1 { uint8_t irt; } IRType1;
+
+#define IRT(o, t) ((uint32_t)(((o)<<8) | (t)))
+#define IRTI(o) (IRT((o), IRT_INT))
+#define IRTN(o) (IRT((o), IRT_NUM))
+#define IRTG(o, t) (IRT((o), IRT_GUARD|(t)))
+#define IRTGI(o) (IRT((o), IRT_GUARD|IRT_INT))
+
+#define irt_t(t) (cast(IRType, (t).irt))
+#define irt_type(t) (cast(IRType, (t).irt & IRT_TYPE))
+#define irt_sametype(t1, t2) ((((t1).irt ^ (t2).irt) & IRT_TYPE) == 0)
+#define irt_typerange(t, first, last) \
+ ((uint32_t)((t).irt & IRT_TYPE) - (uint32_t)(first) <= (uint32_t)(last-first))
+
+#define irt_isnil(t) (irt_type(t) == IRT_NIL)
+#define irt_ispri(t) ((uint32_t)irt_type(t) <= IRT_TRUE)
+#define irt_isstr(t) (irt_type(t) == IRT_STR)
+#define irt_isfunc(t) (irt_type(t) == IRT_FUNC)
+#define irt_istab(t) (irt_type(t) == IRT_TAB)
+#define irt_isnum(t) (irt_type(t) == IRT_NUM)
+#define irt_isint(t) (irt_type(t) == IRT_INT)
+#define irt_isi8(t) (irt_type(t) == IRT_I8)
+#define irt_isu8(t) (irt_type(t) == IRT_U8)
+#define irt_isi16(t) (irt_type(t) == IRT_I16)
+#define irt_isu16(t) (irt_type(t) == IRT_U16)
+
+#define irt_isinteger(t) (irt_typerange((t), IRT_INT, IRT_U16))
+#define irt_isgcv(t) (irt_typerange((t), IRT_STR, IRT_UDATA))
+#define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA))
+
+#define itype2irt(tv) \
+ (~uitype(tv) < IRT_NUM ? cast(IRType, ~uitype(tv)) : IRT_NUM)
+#define irt_toitype(t) ((int32_t)~(uint32_t)irt_type(t))
+
+#define irt_isguard(t) ((t).irt & IRT_GUARD)
+#define irt_ismarked(t) ((t).irt & IRT_MARK)
+#define irt_setmark(t) ((t).irt |= IRT_MARK)
+#define irt_clearmark(t) ((t).irt &= ~IRT_MARK)
+#define irt_isphi(t) ((t).irt & IRT_ISPHI)
+#define irt_setphi(t) ((t).irt |= IRT_ISPHI)
+#define irt_clearphi(t) ((t).irt &= ~IRT_ISPHI)
+
+/* Stored combined IR opcode and type. */
+typedef uint16_t IROpT;
+
+/* IR references. */
+typedef uint16_t IRRef1; /* One stored reference. */
+typedef uint32_t IRRef2; /* Two stored references. */
+typedef uint32_t IRRef; /* Used to pass around references. */
+
+/* Fixed references. */
+enum {
+ REF_BIAS = 0x8000,
+ REF_TRUE = REF_BIAS-3,
+ REF_FALSE = REF_BIAS-2,
+ REF_NIL = REF_BIAS-1, /* \--- Constants grow downwards. */
+ REF_BASE = REF_BIAS, /* /--- IR grows upwards. */
+ REF_FIRST = REF_BIAS+1,
+ REF_DROP = 0xffff
+};
+
+/* Note: IRMlit operands must be < REF_BIAS, too!
+** This allows for fast and uniform manipulation of all operands
+** without looking up the operand mode in lj_ir_mode:
+** - CSE calculates the maximum reference of two operands.
+** This must work with mixed reference/literal operands, too.
+** - DCE marking only checks for operand >= REF_BIAS.
+** - LOOP needs to substitute reference operands.
+** Constant references and literals must not be modified.
+*/
+
+#define IRREF2(lo, hi) ((IRRef2)(lo) | ((IRRef2)(hi) << 16))
+
+#define irref_isk(ref) ((ref) < REF_BIAS)
+
+/* Tagged IR references. */
+typedef uint32_t TRef;
+
+#define TREF(ref, t) (cast(TRef, (ref) + ((t)<<16)))
+
+#define tref_ref(tr) (cast(IRRef1, (tr)))
+#define tref_t(tr) (cast(IRType, (tr)>>16))
+#define tref_type(tr) (cast(IRType, ((tr)>>16) & IRT_TYPE))
+#define tref_typerange(tr, first, last) \
+ ((((tr)>>16) & IRT_TYPE) - (TRef)(first) <= (TRef)(last-first))
+
+#define tref_istype(tr, t) (((tr) & (IRT_TYPE<<16)) == ((t)<<16))
+#define tref_isnil(tr) (tref_istype((tr), IRT_NIL))
+#define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE))
+#define tref_istrue(tr) (tref_istype((tr), IRT_TRUE))
+#define tref_isstr(tr) (tref_istype((tr), IRT_STR))
+#define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC))
+#define tref_istab(tr) (tref_istype((tr), IRT_TAB))
+#define tref_isudata(tr) (tref_istype((tr), IRT_UDATA))
+#define tref_isnum(tr) (tref_istype((tr), IRT_NUM))
+#define tref_isint(tr) (tref_istype((tr), IRT_INT))
+
+#define tref_isbool(tr) (tref_typerange((tr), IRT_FALSE, IRT_TRUE))
+#define tref_ispri(tr) (tref_typerange((tr), IRT_NIL, IRT_TRUE))
+#define tref_istruecond(tr) (!tref_typerange((tr), IRT_NIL, IRT_FALSE))
+#define tref_isinteger(tr) (tref_typerange((tr), IRT_INT, IRT_U16))
+#define tref_isnumber(tr) (tref_typerange((tr), IRT_NUM, IRT_U16))
+#define tref_isnumber_str(tr) (tref_isnumber((tr)) || tref_isstr((tr)))
+#define tref_isgcv(tr) (tref_typerange((tr), IRT_STR, IRT_UDATA))
+
+#define tref_isk(tr) (irref_isk(tref_ref((tr))))
+#define tref_isk2(tr1, tr2) (irref_isk(tref_ref((tr1) | (tr2))))
+
+#define TREF_PRI(t) (TREF(REF_NIL-(t), (t)))
+#define TREF_NIL (TREF_PRI(IRT_NIL))
+#define TREF_FALSE (TREF_PRI(IRT_FALSE))
+#define TREF_TRUE (TREF_PRI(IRT_TRUE))
+
+/* IR instruction format (64 bit).
+**
+** 16 16 8 8 8 8
+** +-------+-------+---+---+---+---+
+** | op1 | op2 | t | o | r | s |
+** +-------+-------+---+---+---+---+
+** | op12/i/gco | ot | prev | (alternative fields in union)
+** +---------------+-------+-------+
+** 32 16 16
+**
+** prev is only valid prior to register allocation and then reused for r + s.
+*/
+
+typedef union IRIns {
+ struct {
+ LJ_ENDIAN_LOHI(
+ IRRef1 op1; /* IR operand 1. */
+ , IRRef1 op2; /* IR operand 2. */
+ )
+ IROpT ot; /* IR opcode and type (overlaps t and o). */
+ IRRef1 prev; /* Previous ins in same chain (overlaps r and s). */
+ };
+ struct {
+ IRRef2 op12; /* IR operand 1 and 2 (overlaps op1 and op2). */
+ LJ_ENDIAN_LOHI(
+ IRType1 t; /* IR type. */
+ , IROp1 o; /* IR opcode. */
+ )
+ LJ_ENDIAN_LOHI(
+ uint8_t r; /* Register allocation (overlaps prev). */
+ , uint8_t s; /* Spill slot allocation (overlaps prev). */
+ )
+ };
+ int32_t i; /* 32 bit signed integer literal (overlaps op12). */
+ GCRef gcr; /* GCobj constant (overlaps op12). */
+ MRef ptr; /* Pointer constant (overlaps op12). */
+} IRIns;
+
+#define ir_kgc(ir) (gcref((ir)->gcr))
+#define ir_kstr(ir) (gco2str(ir_kgc((ir))))
+#define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
+#define ir_kfunc(ir) (gco2func(ir_kgc((ir))))
+#define ir_knum(ir) (mref((ir)->ptr, cTValue))
+
+#endif
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
new file mode 100644
index 00000000..69b0a955
--- /dev/null
+++ b/src/lj_iropt.h
@@ -0,0 +1,128 @@
+/*
+** Common header for IR emitter and optimizations.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_IROPT_H
+#define _LJ_IROPT_H
+
+#include "lj_obj.h"
+#include "lj_jit.h"
+
+#if LJ_HASJIT
+/* IR emitter. */
+LJ_FUNC void LJ_FASTCALL lj_ir_growtop(jit_State *J);
+LJ_FUNC TRef LJ_FASTCALL lj_ir_emit(jit_State *J);
+
+/* Save current IR in J->fold.ins, but do not emit it (yet). */
+static LJ_AINLINE void lj_ir_set_(jit_State *J, uint16_t ot, IRRef1 a, IRRef1 b)
+{
+ J->fold.ins.ot = ot; J->fold.ins.op1 = a; J->fold.ins.op2 = b;
+}
+
+#define lj_ir_set(J, ot, a, b) \
+ lj_ir_set_(J, (uint16_t)(ot), (IRRef1)(a), (IRRef1)(b))
+
+/* Get ref of next IR instruction and optionally grow IR.
+** Note: this may invalidate all IRIns*!
+*/
+static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J)
+{
+ IRRef ref = J->cur.nins;
+ if (LJ_UNLIKELY(ref >= J->irtoplim)) lj_ir_growtop(J);
+ J->cur.nins = ref + 1;
+ return ref;
+}
+
+/* Interning of constants. */
+LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k);
+LJ_FUNC void lj_ir_knum_freeall(jit_State *J);
+LJ_FUNC TRef lj_ir_knum_addr(jit_State *J, cTValue *tv);
+LJ_FUNC TRef lj_ir_knum_nn(jit_State *J, uint64_t nn);
+LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n);
+LJ_FUNC TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t);
+LJ_FUNC TRef lj_ir_kptr(jit_State *J, void *ptr);
+LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t);
+LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot);
+
+static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n)
+{
+ TValue tv;
+ tv.n = n;
+ return lj_ir_knum_nn(J, tv.u64);
+}
+
+#define lj_ir_kstr(J, str) lj_ir_kgc(J, obj2gco((str)), IRT_STR)
+#define lj_ir_ktab(J, tab) lj_ir_kgc(J, obj2gco((tab)), IRT_TAB)
+#define lj_ir_kfunc(J, func) lj_ir_kgc(J, obj2gco((func)), IRT_FUNC)
+
+/* Special FP constants. */
+#define lj_ir_knum_zero(J) lj_ir_knum_nn(J, U64x(00000000,00000000))
+#define lj_ir_knum_one(J) lj_ir_knum_nn(J, U64x(3ff00000,00000000))
+#define lj_ir_knum_tobit(J) lj_ir_knum_nn(J, U64x(43380000,00000000))
+
+/* Special 16 byte aligned SIMD constants. */
+LJ_DATA LJ_ALIGN(16) cTValue lj_ir_knum_tv[4];
+#define lj_ir_knum_abs(J) lj_ir_knum_addr(J, &lj_ir_knum_tv[0])
+#define lj_ir_knum_neg(J) lj_ir_knum_addr(J, &lj_ir_knum_tv[2])
+
+/* Access to constants. */
+LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir);
+
+/* Convert IR operand types. */
+LJ_FUNC TRef LJ_FASTCALL lj_ir_tonum(jit_State *J, TRef tr);
+LJ_FUNC TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr);
+LJ_FUNC TRef LJ_FASTCALL lj_ir_tobit(jit_State *J, TRef tr);
+LJ_FUNC TRef LJ_FASTCALL lj_ir_toint(jit_State *J, TRef tr);
+
+/* Miscellaneous IR ops. */
+LJ_FUNC int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op);
+LJ_FUNC int lj_ir_strcmp(GCstr *a, GCstr *b, IROp op);
+LJ_FUNC void lj_ir_rollback(jit_State *J, IRRef ref);
+
+/* Emit IR instructions with on-the-fly optimizations. */
+LJ_FUNC TRef LJ_FASTCALL lj_opt_fold(jit_State *J);
+LJ_FUNC TRef LJ_FASTCALL lj_opt_cse(jit_State *J);
+
+/* Special return values for the fold functions. */
+enum {
+ NEXTFOLD, /* Couldn't fold, pass on. */
+ RETRYFOLD, /* Retry fold with modified fins. */
+ KINTFOLD, /* Return ref for int constant in fins->i. */
+ FAILFOLD, /* Guard would always fail. */
+ DROPFOLD, /* Guard eliminated. */
+ MAX_FOLD
+};
+
+#define INTFOLD(k) ((J->fold.ins.i = (k)), (TRef)KINTFOLD)
+#define CONDFOLD(cond) ((TRef)FAILFOLD + (TRef)(cond))
+#define LEFTFOLD (J->fold.ins.op1)
+#define RIGHTFOLD (J->fold.ins.op2)
+#define CSEFOLD (lj_opt_cse(J))
+#define EMITFOLD (lj_ir_emit(J))
+
+/* Load/store forwarding. */
+LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J);
+LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J);
+LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J);
+LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J);
+LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J);
+LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref);
+
+/* Dead-store elimination. */
+LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J);
+LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_ustore(jit_State *J);
+LJ_FUNC TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J);
+
+/* Narrowing. */
+LJ_FUNC TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J);
+LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc);
+LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc);
+LJ_FUNC IRType lj_opt_narrow_forl(cTValue *forbase);
+
+/* Optimization passes. */
+LJ_FUNC void lj_opt_dce(jit_State *J);
+LJ_FUNC int lj_opt_loop(jit_State *J);
+#endif
+
+#endif
diff --git a/src/lj_jit.h b/src/lj_jit.h
new file mode 100644
index 00000000..280eff41
--- /dev/null
+++ b/src/lj_jit.h
@@ -0,0 +1,279 @@
+/*
+** Common definitions for the JIT compiler.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_JIT_H
+#define _LJ_JIT_H
+
+#include "lj_obj.h"
+#include "lj_ir.h"
+
+/* JIT engine flags. */
+#define JIT_F_ON 0x00000001
+
+/* CPU-specific JIT engine flags. */
+#if LJ_TARGET_X86ORX64
+#define JIT_F_CMOV 0x00000100
+#define JIT_F_SSE2 0x00000200
+#define JIT_F_SSE4_1 0x00000400
+#define JIT_F_P4 0x00000800
+#define JIT_F_PREFER_IMUL 0x00001000
+#define JIT_F_SPLIT_XMM 0x00002000
+#define JIT_F_LEA_AGU 0x00004000
+
+/* Names for the CPU-specific flags. Must match the order above. */
+#define JIT_F_CPU_FIRST JIT_F_CMOV
+#define JIT_F_CPUSTRING "\4CMOV\4SSE2\6SSE4.1\2P4\3AMD\2K8\4ATOM"
+#else
+#error "Missing CPU-specific JIT engine flags"
+#endif
+
+/* Optimization flags. */
+#define JIT_F_OPT_MASK 0x00ff0000
+
+#define JIT_F_OPT_FOLD 0x00010000
+#define JIT_F_OPT_CSE 0x00020000
+#define JIT_F_OPT_DCE 0x00040000
+#define JIT_F_OPT_FWD 0x00080000
+#define JIT_F_OPT_DSE 0x00100000
+#define JIT_F_OPT_NARROW 0x00200000
+#define JIT_F_OPT_LOOP 0x00400000
+#define JIT_F_OPT_FUSE 0x00800000
+
+/* Optimizations names for -O. Must match the order above. */
+#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD
+#define JIT_F_OPTSTRING \
+ "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\4fuse"
+
+/* Optimization levels set a fixed combination of flags. */
+#define JIT_F_OPT_0 0
+#define JIT_F_OPT_1 (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE)
+#define JIT_F_OPT_2 (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP)
+#define JIT_F_OPT_3 (JIT_F_OPT_2|JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_FUSE)
+#define JIT_F_OPT_DEFAULT JIT_F_OPT_3
+
+#ifdef LUA_USE_WIN
+/* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */
+#define JIT_P_sizemcode_DEFAULT 64
+#else
+/* Could go as low as 4K, but the mmap() overhead would be rather high. */
+#define JIT_P_sizemcode_DEFAULT 32
+#endif
+
+/* Optimization parameters and their defaults. Length is a char in octal! */
+#define JIT_PARAMDEF(_) \
+ _(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \
+ _(\011, maxrecord, 2000) /* Max. # of recorded IR instructions. */ \
+ _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \
+ _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \
+ _(\007, maxsnap, 100) /* Max. # of snapshots for a trace. */ \
+ \
+ _(\007, hotloop, 57) /* # of iterations to detect a hot loop. */ \
+ _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \
+ _(\007, tryside, 4) /* # of attempts to compile a side trace. */ \
+ \
+ _(\012, instunroll, 4) /* Max. unroll for instable loops. */ \
+ _(\012, loopunroll, 7) /* Max. unroll for loop ops in side traces. */ \
+ _(\012, callunroll, 3) /* Max. unroll for recursive calls. */ \
+ _(\011, recunroll, 0) /* Max. unroll for true recursion. */ \
+ \
+ /* Size of each machine code area (in KBytes). */ \
+ _(\011, sizemcode, JIT_P_sizemcode_DEFAULT) \
+ /* Max. total size of all machine code areas (in KBytes). */ \
+ _(\010, maxmcode, 512) \
+ /* End of list. */
+
+enum {
+#define JIT_PARAMENUM(len, name, value) JIT_P_##name,
+JIT_PARAMDEF(JIT_PARAMENUM)
+#undef JIT_PARAMENUM
+ JIT_P__MAX
+};
+
+#define JIT_PARAMSTR(len, name, value) #len #name
+#define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR)
+
+/* Trace compiler state. */
+typedef enum {
+ LJ_TRACE_IDLE, /* Trace compiler idle. */
+ LJ_TRACE_ACTIVE = 0x10,
+ LJ_TRACE_RECORD, /* Bytecode recording active. */
+ LJ_TRACE_START, /* New trace started. */
+ LJ_TRACE_END, /* End of trace. */
+ LJ_TRACE_ASM, /* Assemble trace. */
+ LJ_TRACE_ERR, /* Trace aborted with error. */
+} TraceState;
+
+/* Machine code type. */
+typedef uint8_t MCode;
+
+/* Stack snapshot header. */
+typedef struct SnapShot {
+ uint16_t mapofs; /* Offset into snapshot map. */
+ IRRef1 ref; /* First IR ref for this snapshot. */
+ uint8_t nslots; /* Number of stack slots. */
+ uint8_t nframelinks; /* Number of frame links. */
+ uint8_t count; /* Count of taken exits for this snapshot. */
+ uint8_t unused1;
+} SnapShot;
+
+#define SNAPCOUNT_DONE 255 /* Already compiled and linked a side trace. */
+#define snap_ref(sn) ((IRRef)(IRRef1)(sn))
+#define snap_ridsp(sn) ((sn) >> 16)
+
+/* Snapshot and exit numbers. */
+typedef uint32_t SnapNo;
+typedef uint32_t ExitNo;
+
+/* Trace number. */
+typedef uint32_t TraceNo; /* Used to pass around trace numbers. */
+typedef uint16_t TraceNo1; /* Stored trace number. */
+
+#define TRACE_INTERP 0 /* Fallback to interpreter. */
+
+/* Trace anchor. */
+typedef struct Trace {
+ IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */
+ IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */
+ IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */
+ SnapShot *snap; /* Snapshot array. */
+ IRRef2 *snapmap; /* Snapshot map. */
+ uint16_t nsnap; /* Number of snapshots. */
+ uint16_t nsnapmap; /* Number of snapshot map elements. */
+ GCRef startpt; /* Starting prototype. */
+ BCIns startins; /* Original bytecode of starting instruction. */
+ MCode *mcode; /* Start of machine code. */
+ MSize szmcode; /* Size of machine code. */
+ MSize mcloop; /* Offset of loop start in machine code. */
+ TraceNo1 link; /* Linked trace (or self for loops). */
+ TraceNo1 root; /* Root trace of side trace (or 0 for root traces). */
+ TraceNo1 nextroot; /* Next root trace for same prototype. */
+ TraceNo1 nextside; /* Next side trace of same root trace. */
+ uint16_t nchild; /* Number of child traces (root trace only). */
+ uint16_t spadjust; /* Stack pointer adjustment (offset in bytes). */
+#ifdef LUAJIT_USE_GDBJIT
+ void *gdbjit_entry; /* GDB JIT entry. */
+#endif
+} Trace;
+
+/* Round-robin penalty cache for bytecodes leading to aborted traces. */
+typedef struct HotPenalty {
+ const BCIns *pc; /* Starting bytecode PC. */
+ uint16_t val; /* Penalty value, i.e. hotcount start. */
+ uint16_t reason; /* Abort reason (really TraceErr). */
+} HotPenalty;
+
+/* Number of slots for the penalty cache. Must be a power of 2. */
+#define PENALTY_SLOTS 16
+
+/* Round-robin backpropagation cache for narrowing conversions. */
+typedef struct BPropEntry {
+ IRRef1 key; /* Key: original reference. */
+ IRRef1 val; /* Value: reference after conversion. */
+ IRRef mode; /* Mode for this entry (currently IRTOINT_*). */
+} BPropEntry;
+
+/* Number of slots for the backpropagation cache. Must be a power of 2. */
+#define BPROP_SLOTS 16
+
+/* Fold state is used to fold instructions on-the-fly. */
+typedef struct FoldState {
+ IRIns ins; /* Currently emitted instruction. */
+ IRIns left; /* Instruction referenced by left operand. */
+ IRIns right; /* Instruction referenced by right operand. */
+} FoldState;
+
+/* JIT compiler state. */
+typedef struct jit_State {
+ Trace cur; /* Current trace. */
+
+ lua_State *L; /* Current Lua state. */
+ const BCIns *pc; /* Current PC. */
+ BCReg maxslot; /* Relative to baseslot. */
+
+ uint32_t flags; /* JIT engine flags. */
+ TRef *base; /* Current frame base, points into J->slots. */
+ BCReg baseslot; /* Current frame base, offset into J->slots. */
+ GCfunc *fn; /* Current function. */
+ GCproto *pt; /* Current prototype. */
+
+ FoldState fold; /* Fold state. */
+
+ uint8_t mergesnap; /* Allowed to merge with next snapshot. */
+ uint8_t needsnap; /* Need snapshot before recording next bytecode. */
+ IRType1 guardemit; /* Accumulated IRT_GUARD for emitted instructions. */
+ uint8_t unused1;
+
+ const BCIns *bc_min; /* Start of allowed bytecode range for root trace. */
+ MSize bc_extent; /* Extent of the range. */
+
+ TraceState state; /* Trace compiler state. */
+
+ int32_t instunroll; /* Unroll counter for instable loops. */
+ int32_t loopunroll; /* Unroll counter for loop ops in side traces. */
+ int32_t tailcalled; /* Number of successive tailcalls. */
+ int32_t framedepth; /* Current frame depth. */
+
+ MRef knum; /* Pointer to chained array of KNUM constants. */
+
+ IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */
+ IRRef irtoplim; /* Upper limit of instuction buffer (biased). */
+ IRRef irbotlim; /* Lower limit of instuction buffer (biased). */
+ IRRef loopref; /* Last loop reference or ref of final LOOP (or 0). */
+
+ SnapShot *snapbuf; /* Temp. snapshot buffer. */
+ IRRef2 *snapmapbuf; /* Temp. snapshot map buffer. */
+ MSize sizesnap; /* Size of temp. snapshot buffer. */
+ MSize sizesnapmap; /* Size of temp. snapshot map buffer. */
+
+ Trace **trace; /* Array of traces. */
+ TraceNo curtrace; /* Current trace number (if not 0). Kept in J->cur. */
+ TraceNo freetrace; /* Start of scan for next free trace. */
+ MSize sizetrace; /* Size of trace array. */
+
+ IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */
+ TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */
+
+ int32_t param[JIT_P__MAX]; /* JIT engine parameters. */
+
+ MCode *exitstubgroup[LJ_MAX_EXITSTUBGR]; /* Exit stub group addresses. */
+
+ HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */
+ uint32_t penaltyslot; /* Round-robin index into penalty slots. */
+
+ BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */
+ uint32_t bpropslot; /* Round-robin index into bpropcache slots. */
+
+ const BCIns *startpc; /* Bytecode PC of starting instruction. */
+ TraceNo parent; /* Parent of current side trace (0 for root traces). */
+ ExitNo exitno; /* Exit number in parent of current side trace. */
+
+ TValue errinfo; /* Additional info element for trace errors. */
+
+ MCode *mcarea; /* Base of current mcode area. */
+ MCode *mctop; /* Top of current mcode area. */
+ MCode *mcbot; /* Bottom of current mcode area. */
+ size_t szmcarea; /* Size of current mcode area. */
+ size_t szallmcarea; /* Total size of all allocated mcode areas. */
+ int mcprot; /* Protection of current mcode area. */
+} jit_State;
+
+/* Exit stubs. */
+#if LJ_TARGET_X86ORX64
+/* Limited by the range of a short fwd jump (127): (2+2)*(32-1)-2 = 122. */
+#define EXITSTUB_SPACING (2+2)
+#define EXITSTUBS_PER_GROUP 32
+#else
+#error "Missing CPU-specific exit stub definitions"
+#endif
+
+/* Return the address of an exit stub. */
+static LJ_AINLINE MCode *exitstub_addr(jit_State *J, ExitNo exitno)
+{
+ lua_assert(J->exitstubgroup[exitno / EXITSTUBS_PER_GROUP] != NULL);
+ return J->exitstubgroup[exitno / EXITSTUBS_PER_GROUP] +
+ EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP);
+}
+
+#endif
diff --git a/src/lj_lex.c b/src/lj_lex.c
new file mode 100644
index 00000000..38b0a7d4
--- /dev/null
+++ b/src/lj_lex.c
@@ -0,0 +1,393 @@
+/*
+** Lexical analyzer.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#define lj_lex_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_lex.h"
+#include "lj_parse.h"
+#include "lj_ctype.h"
+
+/* Lua lexer token names. */
+static const char *const tokennames[] = {
+#define TKSTR1(name) #name,
+#define TKSTR2(name, sym) #sym,
+TKDEF(TKSTR1, TKSTR2)
+#undef TKSTR1
+#undef TKSTR2
+ NULL
+};
+
+/* -- Buffer handling ----------------------------------------------------- */
+
+#define char2int(c) cast(int, cast(uint8_t, (c)))
+#define next(ls) \
+ (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls))
+#define save_and_next(ls) (save(ls, ls->current), next(ls))
+#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
+#define END_OF_STREAM (-1)
+
+static int fillbuf(LexState *ls)
+{
+ size_t sz;
+ const char *buf = ls->rfunc(ls->L, ls->rdata, &sz);
+ if (buf == NULL || sz == 0) return END_OF_STREAM;
+ ls->n = (MSize)sz - 1;
+ ls->p = buf;
+ return char2int(*(ls->p++));
+}
+
+static void save(LexState *ls, int c)
+{
+ if (ls->sb.n + 1 > ls->sb.sz) {
+ MSize newsize;
+ if (ls->sb.sz >= LJ_MAX_STR/2)
+ lj_lex_error(ls, 0, LJ_ERR_XELEM);
+ newsize = ls->sb.sz * 2;
+ lj_str_resizebuf(ls->L, &ls->sb, newsize);
+ }
+ ls->sb.buf[ls->sb.n++] = cast(char, c);
+}
+
+static int check_next(LexState *ls, const char *set)
+{
+ if (!strchr(set, ls->current))
+ return 0;
+ save_and_next(ls);
+ return 1;
+}
+
+static void inclinenumber(LexState *ls)
+{
+ int old = ls->current;
+ lua_assert(currIsNewline(ls));
+ next(ls); /* skip `\n' or `\r' */
+ if (currIsNewline(ls) && ls->current != old)
+ next(ls); /* skip `\n\r' or `\r\n' */
+ if (++ls->linenumber >= LJ_MAX_LINE)
+ lj_lex_error(ls, ls->token, LJ_ERR_XLINES);
+}
+
+/* -- Scanner for terminals ----------------------------------------------- */
+
+static void read_numeral(LexState *ls, TValue *tv)
+{
+ lua_assert(lj_ctype_isdigit(ls->current));
+ do {
+ save_and_next(ls);
+ } while (lj_ctype_isdigit(ls->current) || ls->current == '.');
+ if (check_next(ls, "Ee")) /* `E'? */
+ check_next(ls, "+-"); /* optional exponent sign */
+ while (lj_ctype_isident(ls->current))
+ save_and_next(ls);
+ save(ls, '\0');
+ if (!lj_str_numconv(ls->sb.buf, tv))
+ lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER);
+}
+
+static int skip_sep(LexState *ls)
+{
+ int count = 0;
+ int s = ls->current;
+ lua_assert(s == '[' || s == ']');
+ save_and_next(ls);
+ while (ls->current == '=') {
+ save_and_next(ls);
+ count++;
+ }
+ return (ls->current == s) ? count : (-count) - 1;
+}
+
+static void read_long_string(LexState *ls, TValue *tv, int sep)
+{
+ save_and_next(ls); /* skip 2nd `[' */
+ if (currIsNewline(ls)) /* string starts with a newline? */
+ inclinenumber(ls); /* skip it */
+ for (;;) {
+ switch (ls->current) {
+ case END_OF_STREAM:
+ lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM);
+ break;
+ case ']':
+ if (skip_sep(ls) == sep) {
+ save_and_next(ls); /* skip 2nd `]' */
+ goto endloop;
+ }
+ break;
+ case '\n':
+ case '\r':
+ save(ls, '\n');
+ inclinenumber(ls);
+ if (!tv) lj_str_resetbuf(&ls->sb); /* avoid wasting space */
+ break;
+ default:
+ if (tv) save_and_next(ls);
+ else next(ls);
+ break;
+ }
+ } endloop:
+ if (tv) {
+ GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep),
+ ls->sb.n - 2*(2 + (MSize)sep));
+ setstrV(ls->L, tv, str);
+ }
+}
+
+static void read_string(LexState *ls, int delim, TValue *tv)
+{
+ save_and_next(ls);
+ while (ls->current != delim) {
+ switch (ls->current) {
+ case END_OF_STREAM:
+ lj_lex_error(ls, TK_eof, LJ_ERR_XSTR);
+ continue;
+ case '\n':
+ case '\r':
+ lj_lex_error(ls, TK_string, LJ_ERR_XSTR);
+ continue;
+ case '\\': {
+ int c;
+ next(ls); /* do not save the `\' */
+ switch (ls->current) {
+ case 'a': c = '\a'; break;
+ case 'b': c = '\b'; break;
+ case 'f': c = '\f'; break;
+ case 'n': c = '\n'; break;
+ case 'r': c = '\r'; break;
+ case 't': c = '\t'; break;
+ case 'v': c = '\v'; break;
+ case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue;
+ case END_OF_STREAM: continue; /* will raise an error next loop */
+ default:
+ if (!lj_ctype_isdigit(ls->current)) {
+ save_and_next(ls); /* handles \\, \", \', and \? */
+ } else { /* \xxx */
+ int i = 0;
+ c = 0;
+ do {
+ c = 10*c + (ls->current-'0');
+ next(ls);
+ } while (++i<3 && lj_ctype_isdigit(ls->current));
+ if (c > UCHAR_MAX)
+ lj_lex_error(ls, TK_string, LJ_ERR_XESC);
+ save(ls, c);
+ }
+ continue;
+ }
+ save(ls, c);
+ next(ls);
+ continue;
+ }
+ default:
+ save_and_next(ls);
+ break;
+ }
+ }
+ save_and_next(ls); /* skip delimiter */
+ setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2));
+}
+
+/* -- Main lexical scanner ------------------------------------------------ */
+
+static int llex(LexState *ls, TValue *tv)
+{
+ lj_str_resetbuf(&ls->sb);
+ for (;;) {
+ if (lj_ctype_isident(ls->current)) {
+ GCstr *s;
+ if (lj_ctype_isdigit(ls->current)) { /* Numeric literal. */
+ read_numeral(ls, tv);
+ return TK_number;
+ }
+ /* Identifier or reserved word. */
+ do {
+ save_and_next(ls);
+ } while (lj_ctype_isident(ls->current));
+ s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n);
+ if (s->reserved > 0) /* Reserved word? */
+ return TK_OFS + s->reserved;
+ setstrV(ls->L, tv, s);
+ return TK_name;
+ }
+ switch (ls->current) {
+ case '\n':
+ case '\r':
+ inclinenumber(ls);
+ continue;
+ case ' ':
+ case '\t':
+ case '\v':
+ case '\f':
+ next(ls);
+ continue;
+ case '-':
+ next(ls);
+ if (ls->current != '-') return '-';
+ /* else is a comment */
+ next(ls);
+ if (ls->current == '[') {
+ int sep = skip_sep(ls);
+ lj_str_resetbuf(&ls->sb); /* `skip_sep' may dirty the buffer */
+ if (sep >= 0) {
+ read_long_string(ls, NULL, sep); /* long comment */
+ lj_str_resetbuf(&ls->sb);
+ continue;
+ }
+ }
+ /* else short comment */
+ while (!currIsNewline(ls) && ls->current != END_OF_STREAM)
+ next(ls);
+ continue;
+ case '[': {
+ int sep = skip_sep(ls);
+ if (sep >= 0) {
+ read_long_string(ls, tv, sep);
+ return TK_string;
+ } else if (sep == -1) {
+ return '[';
+ } else {
+ lj_lex_error(ls, TK_string, LJ_ERR_XLDELIM);
+ continue;
+ }
+ }
+ case '=':
+ next(ls);
+ if (ls->current != '=') return '='; else { next(ls); return TK_eq; }
+ case '<':
+ next(ls);
+ if (ls->current != '=') return '<'; else { next(ls); return TK_le; }
+ case '>':
+ next(ls);
+ if (ls->current != '=') return '>'; else { next(ls); return TK_ge; }
+ case '~':
+ next(ls);
+ if (ls->current != '=') return '~'; else { next(ls); return TK_ne; }
+ case '"':
+ case '\'':
+ read_string(ls, ls->current, tv);
+ return TK_string;
+ case '.':
+ save_and_next(ls);
+ if (check_next(ls, ".")) {
+ if (check_next(ls, "."))
+ return TK_dots; /* ... */
+ else
+ return TK_concat; /* .. */
+ } else if (!lj_ctype_isdigit(ls->current)) {
+ return '.';
+ } else {
+ read_numeral(ls, tv);
+ return TK_number;
+ }
+ case END_OF_STREAM:
+ return TK_eof;
+ default: {
+ int c = ls->current;
+ next(ls);
+ return c; /* Single-char tokens (+ - / ...). */
+ }
+ }
+ }
+}
+
+/* -- Lexer API ----------------------------------------------------------- */
+
+void lj_lex_start(lua_State *L, LexState *ls)
+{
+ ls->L = L;
+ ls->fs = NULL;
+ ls->n = 0;
+ ls->p = NULL;
+ ls->lookahead = TK_eof; /* No look-ahead token. */
+ ls->linenumber = 1;
+ ls->lastline = 1;
+ lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF);
+ next(ls); /* Read-ahead first char. */
+ if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb &&
+ char2int(ls->p[1]) == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
+ ls->n -= 2;
+ ls->p += 2;
+ next(ls);
+ }
+ if (ls->current == '#') { /* Skip POSIX #! header line. */
+ do {
+ next(ls);
+ if (ls->current == END_OF_STREAM) return;
+ } while (!currIsNewline(ls));
+ inclinenumber(ls);
+ }
+ if (ls->current == LUA_SIGNATURE[0]) {
+ setstrV(L, L->top++, lj_err_str(L, LJ_ERR_XBCLOAD));
+ lj_err_throw(L, LUA_ERRSYNTAX);
+ }
+ /* This is an unanchored GCstr before it's stored in the prototype.
+ ** Do this last since next() calls the reader which may call the GC.
+ */
+ ls->chunkname = lj_str_newz(L, ls->chunkarg);
+}
+
+void lj_lex_next(LexState *ls)
+{
+ ls->lastline = ls->linenumber;
+ if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */
+ ls->token = llex(ls, &ls->tokenval); /* Get next token. */
+ } else { /* Otherwise return lookahead token. */
+ ls->token = ls->lookahead;
+ ls->lookahead = TK_eof;
+ ls->tokenval = ls->lookaheadval;
+ }
+}
+
+LexToken lj_lex_lookahead(LexState *ls)
+{
+ lua_assert(ls->lookahead == TK_eof);
+ ls->lookahead = llex(ls, &ls->lookaheadval);
+ return ls->lookahead;
+}
+
+const char *lj_lex_token2str(LexState *ls, LexToken token)
+{
+ if (token > TK_OFS)
+ return tokennames[token-TK_OFS-1];
+ else if (!lj_ctype_iscntrl(token))
+ return lj_str_pushf(ls->L, "%c", token);
+ else
+ return lj_str_pushf(ls->L, "char(%d)", token);
+}
+
+void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...)
+{
+ const char *tok;
+ va_list argp;
+ if (token == 0) {
+ tok = NULL;
+ } else if (token == TK_name || token == TK_string || token == TK_number) {
+ save(ls, '\0');
+ tok = ls->sb.buf;
+ } else {
+ tok = lj_lex_token2str(ls, token);
+ }
+ va_start(argp, em);
+ lj_err_lex(ls->L, strdata(ls->chunkname), tok, ls->linenumber, em, argp);
+ va_end(argp);
+}
+
+void lj_lex_init(lua_State *L)
+{
+ uint32_t i;
+ for (i = 0; i < TK_RESERVED; i++) {
+ GCstr *s = lj_str_newz(L, tokennames[i]);
+ fixstring(s); /* Reserved words are never collected. */
+ s->reserved = cast_byte(i+1);
+ }
+}
+
diff --git a/src/lj_lex.h b/src/lj_lex.h
new file mode 100644
index 00000000..cc5d5a9f
--- /dev/null
+++ b/src/lj_lex.h
@@ -0,0 +1,63 @@
+/*
+** Lexical analyzer.
+** Major parts taken verbatim from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#ifndef _LJ_LEX_H
+#define _LJ_LEX_H
+
+#include <stdarg.h>
+
+#include "lj_obj.h"
+#include "lj_err.h"
+
+/* Lua lexer tokens. */
+#define TKDEF(_, __) \
+ _(and) _(break) _(do) _(else) _(elseif) _(end) _(false) \
+ _(for) _(function) _(if) _(in) _(local) _(nil) _(not) _(or) \
+ _(repeat) _(return) _(then) _(true) _(until) _(while) \
+ __(concat, ..) __(dots, ...) __(eq, ==) __(ge, >=) __(le, <=) __(ne, ~=) \
+ __(number, <number>) __(name, <name>) __(string, <string>) __(eof, <eof>)
+
+enum {
+ TK_OFS = 256,
+#define TKENUM1(name) TK_##name,
+#define TKENUM2(name, sym) TK_##name,
+TKDEF(TKENUM1, TKENUM2)
+#undef TKENUM1
+#undef TKENUM2
+ TK_RESERVED = TK_while - TK_OFS
+};
+
+typedef int LexToken;
+
+/* Lua lexer state. */
+typedef struct LexState {
+ struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */
+ struct lua_State *L; /* Lua state. */
+ TValue tokenval; /* Current token value. */
+ TValue lookaheadval; /* Lookahead token value. */
+ int current; /* Current character (charint). */
+ LexToken token; /* Current token. */
+ LexToken lookahead; /* Lookahead token. */
+ SBuf sb; /* String buffer for tokens. */
+ const char *p; /* Current position in input buffer. */
+ MSize n; /* Bytes left in input buffer. */
+ lua_Reader rfunc; /* Reader callback. */
+ void *rdata; /* Reader callback data. */
+ BCLine linenumber; /* Input line counter. */
+ BCLine lastline; /* Line of last token. */
+ GCstr *chunkname; /* Current chunk name (interned string). */
+ const char *chunkarg; /* Chunk name argument. */
+ uint32_t level; /* Syntactical nesting level. */
+} LexState;
+
+LJ_FUNC void lj_lex_start(lua_State *L, LexState *ls);
+LJ_FUNC void lj_lex_next(LexState *ls);
+LJ_FUNC LexToken lj_lex_lookahead(LexState *ls);
+LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken token);
+LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...);
+LJ_FUNC void lj_lex_init(lua_State *L);
+
+#endif
diff --git a/src/lj_lib.c b/src/lj_lib.c
new file mode 100644
index 00000000..683c66d6
--- /dev/null
+++ b/src/lj_lib.c
@@ -0,0 +1,216 @@
+/*
+** Library function support.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_lib_c
+#define LUA_CORE
+
+#include "lauxlib.h"
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_func.h"
+#include "lj_vm.h"
+#include "lj_lib.h"
+
+/* -- Library initialization ---------------------------------------------- */
+
+static GCtab *lib_create_table(lua_State *L, const char *libname, int hsize)
+{
+ if (libname) {
+ luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
+ lua_getfield(L, -1, libname);
+ if (!tvistab(L->top-1)) {
+ L->top--;
+ if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, hsize) != NULL)
+ lj_err_callerv(L, LJ_ERR_BADMODN, libname);
+ settabV(L, L->top, tabV(L->top-1));
+ L->top++;
+ lua_setfield(L, -3, libname); /* _LOADED[libname] = new table */
+ }
+ L->top--;
+ settabV(L, L->top-1, tabV(L->top));
+ } else {
+ lua_createtable(L, 0, hsize);
+ }
+ return tabV(L->top-1);
+}
+
+void lj_lib_register(lua_State *L, const char *libname,
+ const uint8_t *p, const lua_CFunction *cf)
+{
+ GCtab *env = tabref(L->env);
+ GCfunc *ofn = NULL;
+ int ffid = *p++;
+ GCtab *tab = lib_create_table(L, libname, *p++);
+ ptrdiff_t tpos = L->top - L->base;
+
+ /* Avoid barriers further down. */
+ if (isblack(obj2gco(tab))) lj_gc_barrierback(G(L), tab);
+ tab->nomm = 0;
+
+ for (;;) {
+ uint32_t tag = *p++;
+ MSize len = tag & LIBINIT_LENMASK;
+ tag &= LIBINIT_TAGMASK;
+ if (tag != LIBINIT_STRING) {
+ const char *name;
+ MSize nuv = (MSize)(L->top - L->base - tpos);
+ GCfunc *fn = lj_func_newC(L, nuv, env);
+ if (nuv) {
+ L->top = L->base + tpos;
+ memcpy(fn->c.upvalue, L->top, sizeof(TValue)*nuv);
+ }
+ fn->c.ffid = (uint8_t)(ffid++);
+ name = (const char *)p;
+ p += len;
+ if (tag != LIBINIT_CF) {
+ fn->c.gate = makeasmfunc(p[0] + (p[1] << 8));
+ p += 2;
+ }
+ if (tag == LIBINIT_ASM_)
+ fn->c.f = ofn->c.f; /* Copy handler from previous function. */
+ else
+ fn->c.f = *cf++; /* Get cf or handler from C function table. */
+ if (len) {
+ /* NOBARRIER: See above for common barrier. */
+ setfuncV(L, lj_tab_setstr(L, tab, lj_str_new(L, name, len)), fn);
+ }
+ ofn = fn;
+ } else {
+ switch (tag | len) {
+ case LIBINIT_SET:
+ L->top -= 2;
+ if (tvisstr(L->top+1) && strV(L->top+1)->len == 0)
+ env = tabV(L->top);
+ else /* NOBARRIER: See above for common barrier. */
+ copyTV(L, lj_tab_set(L, tab, L->top+1), L->top);
+ break;
+ case LIBINIT_NUMBER:
+ memcpy(&L->top->n, p, sizeof(double));
+ L->top++;
+ p += sizeof(double);
+ break;
+ case LIBINIT_COPY:
+ copyTV(L, L->top, L->top - *p++);
+ L->top++;
+ break;
+ case LIBINIT_LASTCL:
+ setfuncV(L, L->top++, ofn);
+ break;
+ case LIBINIT_FFID:
+ ffid++;
+ break;
+ case LIBINIT_END:
+ return;
+ default:
+ setstrV(L, L->top++, lj_str_new(L, (const char *)p, len));
+ p += len;
+ break;
+ }
+ }
+ }
+}
+
+/* -- Type checks --------------------------------------------------------- */
+
+TValue *lj_lib_checkany(lua_State *L, int narg)
+{
+ TValue *o = L->base + narg-1;
+ if (o >= L->top)
+ lj_err_arg(L, narg, LJ_ERR_NOVAL);
+ return o;
+}
+
+GCstr *lj_lib_checkstr(lua_State *L, int narg)
+{
+ TValue *o = L->base + narg-1;
+ if (o < L->top) {
+ if (LJ_LIKELY(tvisstr(o))) {
+ return strV(o);
+ } else if (tvisnum(o)) {
+ GCstr *s = lj_str_fromnum(L, &o->n);
+ setstrV(L, o, s);
+ return s;
+ }
+ }
+ lj_err_argt(L, narg, LUA_TSTRING);
+ return NULL; /* unreachable */
+}
+
+GCstr *lj_lib_optstr(lua_State *L, int narg)
+{
+ TValue *o = L->base + narg-1;
+ return (o < L->top && !tvisnil(o)) ? lj_lib_checkstr(L, narg) : NULL;
+}
+
+lua_Number lj_lib_checknum(lua_State *L, int narg)
+{
+ TValue *o = L->base + narg-1;
+ if (!(o < L->top &&
+ (tvisnum(o) || (tvisstr(o) && lj_str_numconv(strVdata(o), o)))))
+ lj_err_argt(L, narg, LUA_TNUMBER);
+ return numV(o);
+}
+
+int32_t lj_lib_checkint(lua_State *L, int narg)
+{
+ return lj_num2int(lj_lib_checknum(L, narg));
+}
+
+int32_t lj_lib_optint(lua_State *L, int narg, int32_t def)
+{
+ TValue *o = L->base + narg-1;
+ return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def;
+}
+
+GCfunc *lj_lib_checkfunc(lua_State *L, int narg)
+{
+ TValue *o = L->base + narg-1;
+ if (!(o < L->top && tvisfunc(o)))
+ lj_err_argt(L, narg, LUA_TFUNCTION);
+ return funcV(o);
+}
+
+GCtab *lj_lib_checktab(lua_State *L, int narg)
+{
+ TValue *o = L->base + narg-1;
+ if (!(o < L->top && tvistab(o)))
+ lj_err_argt(L, narg, LUA_TTABLE);
+ return tabV(o);
+}
+
+GCtab *lj_lib_checktabornil(lua_State *L, int narg)
+{
+ TValue *o = L->base + narg-1;
+ if (o < L->top) {
+ if (tvistab(o))
+ return tabV(o);
+ else if (tvisnil(o))
+ return NULL;
+ }
+ lj_err_arg(L, narg, LJ_ERR_NOTABN);
+ return NULL; /* unreachable */
+}
+
+int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst)
+{
+ GCstr *s = def >= 0 ? lj_lib_optstr(L, narg) : lj_lib_checkstr(L, narg);
+ if (s) {
+ const char *opt = strdata(s);
+ MSize len = s->len;
+ int i;
+ for (i = 0; *(const uint8_t *)lst; i++) {
+ if (*(const uint8_t *)lst == len && memcmp(opt, lst+1, len) == 0)
+ return i;
+ lst += 1+*(const uint8_t *)lst;
+ }
+ lj_err_argv(L, narg, LJ_ERR_INVOPTM, opt);
+ }
+ return def;
+}
+
diff --git a/src/lj_lib.h b/src/lj_lib.h
new file mode 100644
index 00000000..1cba3778
--- /dev/null
+++ b/src/lj_lib.h
@@ -0,0 +1,84 @@
+/*
+** Library function support.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_LIB_H
+#define _LJ_LIB_H
+
+#include "lj_obj.h"
+
+/*
+** A fallback handler is called by the assembler VM if the fast path fails:
+**
+** - too few arguments: unrecoverable.
+** - wrong argument type: recoverable, if coercion succeeds.
+** - bad argument value: unrecoverable.
+** - stack overflow: recoverable, if stack reallocation succeeds.
+** - extra handling: recoverable.
+**
+** The unrecoverable cases throw an error with lj_err_arg(), lj_err_argtype(),
+** lj_err_caller() or lj_err_callermsg().
+** The recoverable cases return 0 or the number of results + 1.
+** The assembler VM retries the fast path only if 0 is returned.
+** This time the fallback must not be called again or it gets stuck in a loop.
+*/
+
+/* Return values from fallback handler. */
+#define FFH_RETRY 0
+#define FFH_UNREACHABLE FFH_RETRY
+#define FFH_RES(n) ((n)+1)
+
+LJ_FUNC TValue *lj_lib_checkany(lua_State *L, int narg);
+LJ_FUNC GCstr *lj_lib_checkstr(lua_State *L, int narg);
+LJ_FUNC GCstr *lj_lib_optstr(lua_State *L, int narg);
+LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg);
+LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg);
+LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def);
+LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg);
+LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg);
+LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg);
+LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
+
+#define lj_lib_opt(L, narg, gotarg, noarg) \
+ { TValue *_o = L->base + (narg)-1; \
+ if (_o < L->top && !tvisnil(_o)) { gotarg } else { noarg } }
+
+/* Avoid including lj_frame.h. */
+#define lj_lib_upvalue(L, n) \
+ (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1])
+
+/* Library function declarations. Scanned by buildvm. */
+#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L)
+#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L)
+#define LJLIB_ASM_(name)
+#define LJLIB_SET(name)
+#define LJLIB_PUSH(arg)
+#define LJLIB_REC(handler)
+#define LJLIB_NOREGUV
+#define LJLIB_NOREG
+
+#define LJ_LIB_REG(L, name) \
+ lj_lib_register(L, #name, lj_lib_init_##name, lj_lib_cf_##name)
+#define LJ_LIB_REG_(L, regname, name) \
+ lj_lib_register(L, regname, lj_lib_init_##name, lj_lib_cf_##name)
+
+LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
+ const uint8_t *init, const lua_CFunction *cf);
+
+/* Library init data tags. */
+#define LIBINIT_LENMASK 0x3f
+#define LIBINIT_TAGMASK 0xc0
+#define LIBINIT_CF 0x00
+#define LIBINIT_ASM 0x40
+#define LIBINIT_ASM_ 0x80
+#define LIBINIT_STRING 0xc0
+#define LIBINIT_MAXSTR 0x39
+#define LIBINIT_SET 0xfa
+#define LIBINIT_NUMBER 0xfb
+#define LIBINIT_COPY 0xfc
+#define LIBINIT_LASTCL 0xfd
+#define LIBINIT_FFID 0xfe
+#define LIBINIT_END 0xff
+
+#endif
diff --git a/src/lj_mcode.c b/src/lj_mcode.c
new file mode 100644
index 00000000..e5791e9f
--- /dev/null
+++ b/src/lj_mcode.c
@@ -0,0 +1,260 @@
+/*
+** Machine code management.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_mcode_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+#if LJ_HASJIT
+
+#include "lj_gc.h"
+#include "lj_jit.h"
+#include "lj_mcode.h"
+#include "lj_trace.h"
+#include "lj_dispatch.h"
+
+/* -- OS-specific functions ----------------------------------------------- */
+
+#if defined(LUA_USE_WIN)
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#define MCPROT_RW PAGE_READWRITE
+#define MCPROT_RX PAGE_EXECUTE_READ
+#define MCPROT_RWX PAGE_EXECUTE_READWRITE
+
+static LJ_AINLINE void *mcode_alloc(jit_State *J, size_t sz, DWORD prot)
+{
+ void *p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
+ if (!p)
+ lj_trace_err(J, LJ_TRERR_MCODEAL);
+ return p;
+}
+
+static LJ_AINLINE void mcode_free(jit_State *J, void *p, size_t sz)
+{
+ UNUSED(J); UNUSED(sz);
+ VirtualFree(p, 0, MEM_RELEASE);
+}
+
+static LJ_AINLINE void mcode_setprot(void *p, size_t sz, DWORD prot)
+{
+ DWORD oprot;
+ VirtualProtect(p, sz, prot, &oprot);
+}
+
+#elif defined(LUA_USE_POSIX)
+
+#include <sys/mman.h>
+
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#define MCPROT_RW (PROT_READ|PROT_WRITE)
+#define MCPROT_RX (PROT_READ|PROT_EXEC)
+#define MCPROT_RWX (PROT_READ|PROT_WRITE|PROT_EXEC)
+
+static LJ_AINLINE void *mcode_alloc(jit_State *J, size_t sz, int prot)
+{
+ void *p = mmap(NULL, sz, prot, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (p == MAP_FAILED)
+ lj_trace_err(J, LJ_TRERR_MCODEAL);
+ return p;
+}
+
+static LJ_AINLINE void mcode_free(jit_State *J, void *p, size_t sz)
+{
+ UNUSED(J);
+ munmap(p, sz);
+}
+
+static LJ_AINLINE void mcode_setprot(void *p, size_t sz, int prot)
+{
+ mprotect(p, sz, prot);
+}
+
+#else
+
+/* Fallback allocator. This will fail if memory is not executable by default. */
+#define LUAJIT_UNPROTECT_MCODE
+#define MCPROT_RW 0
+#define MCPROT_RX 0
+#define MCPROT_RWX 0
+
+static LJ_AINLINE void *mcode_alloc(jit_State *J, size_t sz, int prot)
+{
+ UNUSED(prot);
+ return lj_mem_new(J->L, sz);
+}
+
+static LJ_AINLINE void mcode_free(jit_State *J, void *p, size_t sz)
+{
+ lj_mem_free(J2G(J), p, sz);
+}
+
+#define mcode_setprot(p, sz, prot) UNUSED(p)
+
+#endif
+
+/* -- MCode area management ----------------------------------------------- */
+
+/* Define this ONLY if the page protection twiddling becomes a bottleneck. */
+#ifdef LUAJIT_UNPROTECT_MCODE
+
+/* It's generally considered to be a potential security risk to have
+** pages with simultaneous write *and* execute access in a process.
+**
+** Do not even think about using this mode for server processes or
+** apps handling untrusted external data (such as a browser).
+**
+** The security risk is not in LuaJIT itself -- but if an adversary finds
+** any *other* flaw in your C application logic, then any RWX memory page
+** simplifies writing an exploit considerably.
+*/
+#define MCPROT_GEN MCPROT_RWX
+#define MCPROT_RUN MCPROT_RWX
+
+#else
+
+/* This is the default behaviour and much safer:
+**
+** Most of the time the memory pages holding machine code are executable,
+** but NONE of them is writable.
+**
+** The current memory area is marked read-write (but NOT executable) only
+** during the short time window while the assembler generates machine code.
+*/
+#define MCPROT_GEN MCPROT_RW
+#define MCPROT_RUN MCPROT_RX
+
+#endif
+
+/* Change protection of MCode area. */
+static void mcode_protect(jit_State *J, int prot)
+{
+#ifdef LUAJIT_UNPROTECT_MCODE
+ UNUSED(J); UNUSED(prot);
+#else
+ if (J->mcprot != prot) {
+ mcode_setprot(J->mcarea, J->szmcarea, prot);
+ J->mcprot = prot;
+ }
+#endif
+}
+
+/* Linked list of MCode areas. */
+typedef struct MCLink {
+ MCode *next; /* Next area. */
+ size_t size; /* Size of current area. */
+} MCLink;
+
+/* Allocate a new MCode area. */
+static void mcode_allocarea(jit_State *J)
+{
+ MCode *oldarea = J->mcarea;
+ size_t sz = (size_t)J->param[JIT_P_sizemcode] << 10;
+ sz = (sz + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1);
+ J->mcarea = (MCode *)mcode_alloc(J, sz, MCPROT_GEN);
+ J->szmcarea = sz;
+ J->mcprot = MCPROT_GEN;
+ J->mctop = (MCode *)((char *)J->mcarea + J->szmcarea);
+ J->mcbot = (MCode *)((char *)J->mcarea + sizeof(MCLink));
+ ((MCLink *)J->mcarea)->next = oldarea;
+ ((MCLink *)J->mcarea)->size = sz;
+ J->szallmcarea += sz;
+}
+
+/* Free all MCode areas. */
+void lj_mcode_free(jit_State *J)
+{
+ MCode *mc = J->mcarea;
+ J->mcarea = NULL;
+ J->szallmcarea = 0;
+ while (mc) {
+ MCode *next = ((MCLink *)mc)->next;
+ mcode_free(J, mc, ((MCLink *)mc)->size);
+ mc = next;
+ }
+}
+
+/* -- MCode transactions -------------------------------------------------- */
+
+/* Reserve the remainder of the current MCode area. */
+MCode *lj_mcode_reserve(jit_State *J, MCode **lim)
+{
+ if (!J->mcarea)
+ mcode_allocarea(J);
+ else
+ mcode_protect(J, MCPROT_GEN);
+ *lim = J->mcbot;
+ return J->mctop;
+}
+
+/* Commit the top part of the current MCode area. */
+void lj_mcode_commit(jit_State *J, MCode *top)
+{
+ J->mctop = top;
+ mcode_protect(J, MCPROT_RUN);
+}
+
+/* Abort the reservation. */
+void lj_mcode_abort(jit_State *J)
+{
+ mcode_protect(J, MCPROT_RUN);
+}
+
+/* Set/reset protection to allow patching of MCode areas. */
+MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish)
+{
+#ifdef LUAJIT_UNPROTECT_MCODE
+ UNUSED(J); UNUSED(ptr); UNUSED(finish);
+ return NULL;
+#else
+ if (finish) {
+ if (J->mcarea == ptr)
+ mcode_protect(J, MCPROT_RUN);
+ else
+ mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN);
+ return NULL;
+ } else {
+ MCode *mc = J->mcarea;
+ /* Try current area first to use the protection cache. */
+ if (ptr >= mc && ptr < mc + J->szmcarea) {
+ mcode_protect(J, MCPROT_GEN);
+ return mc;
+ }
+ /* Otherwise search through the list of MCode areas. */
+ for (;;) {
+ mc = ((MCLink *)mc)->next;
+ lua_assert(mc != NULL);
+ if (ptr >= mc && ptr < mc + ((MCLink *)mc)->size) {
+ mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN);
+ return mc;
+ }
+ }
+ }
+#endif
+}
+
+/* Limit of MCode reservation reached. */
+void lj_mcode_limiterr(jit_State *J, size_t need)
+{
+ size_t sizemcode, maxmcode;
+ lj_mcode_abort(J);
+ sizemcode = (size_t)J->param[JIT_P_sizemcode] << 10;
+ sizemcode = (sizemcode + LJ_PAGESIZE-1) & ~(size_t)(LJ_PAGESIZE - 1);
+ maxmcode = (size_t)J->param[JIT_P_maxmcode] << 10;
+ if ((size_t)need > sizemcode)
+ lj_trace_err(J, LJ_TRERR_MCODEOV); /* Too long for any area. */
+ if (J->szallmcarea + sizemcode > maxmcode)
+ lj_trace_err(J, LJ_TRERR_MCODEAL);
+ mcode_allocarea(J);
+ lj_trace_err(J, LJ_TRERR_MCODELM); /* Retry with new area. */
+}
+
+#endif
diff --git a/src/lj_mcode.h b/src/lj_mcode.h
new file mode 100644
index 00000000..d4573bf4
--- /dev/null
+++ b/src/lj_mcode.h
@@ -0,0 +1,23 @@
+/*
+** Machine code management.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_MCODE_H
+#define _LJ_MCODE_H
+
+#include "lj_jit.h"
+
+#if LJ_HASJIT
+LJ_FUNC void lj_mcode_free(jit_State *J);
+LJ_FUNC MCode *lj_mcode_reserve(jit_State *J, MCode **lim);
+LJ_FUNC void lj_mcode_commit(jit_State *J, MCode *m);
+LJ_FUNC void lj_mcode_abort(jit_State *J);
+LJ_FUNC MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish);
+LJ_FUNC_NORET void lj_mcode_limiterr(jit_State *J, size_t need);
+
+#define lj_mcode_commitbot(J, m) (J->mcbot = (m))
+
+#endif
+
+#endif
diff --git a/src/lj_meta.c b/src/lj_meta.c
new file mode 100644
index 00000000..dff01f85
--- /dev/null
+++ b/src/lj_meta.c
@@ -0,0 +1,358 @@
+/*
+** Metamethod handling.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#define lj_meta_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_meta.h"
+#include "lj_bc.h"
+#include "lj_vm.h"
+
+/* -- Metamethod handling ------------------------------------------------- */
+
+/* String interning of metamethod names for fast indexing. */
+void lj_meta_init(lua_State *L)
+{
+#define MMNAME(name) "__" #name
+ const char *metanames = MMDEF(MMNAME);
+#undef MMNAME
+ global_State *g = G(L);
+ const char *p, *q;
+ uint32_t i;
+ for (i = 0, p = metanames; *p; i++, p = q) {
+ GCstr *s;
+ for (q = p+2; *q && *q != '_'; q++) ;
+ s = lj_str_new(L, p, (size_t)(q-p));
+ fixstring(s); /* Never collect these names. */
+ /* NOBARRIER: g->mmname[] is a GC root. */
+ setgcref(g->mmname[i], obj2gco(s));
+ }
+}
+
+/* Negative caching of a few fast metamethods. See the lj_meta_fast() macro. */
+cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name)
+{
+ cTValue *mo = lj_tab_getstr(mt, name);
+ lua_assert(mm <= MM_FAST);
+ if (!mo || tvisnil(mo)) { /* No metamethod? */
+ mt->nomm |= cast_byte(1u<<mm); /* Set negative cache flag. */
+ return NULL;
+ }
+ return mo;
+}
+
+/* Lookup metamethod for object. */
+cTValue *lj_meta_lookup(lua_State *L, cTValue *o, MMS mm)
+{
+ GCtab *mt;
+ if (tvistab(o))
+ mt = tabref(tabV(o)->metatable);
+ else if (tvisudata(o))
+ mt = tabref(udataV(o)->metatable);
+ else
+ mt = tabref(G(L)->basemt[itypemap(o)]);
+ if (mt) {
+ cTValue *mo = lj_tab_getstr(mt, strref(G(L)->mmname[mm]));
+ if (mo)
+ return mo;
+ }
+ return niltv(L);
+}
+
+/* Setup call to metamethod to be run by Assembler VM. */
+static TValue *mmcall(lua_State *L, ASMFunction cont, cTValue *mo,
+ cTValue *a, cTValue *b)
+{
+ /*
+ ** |-- framesize -> top top+1 top+2 top+3
+ ** before: [func slots ...]
+ ** mm setup: [func slots ...] [cont|?] [mo|tmtype] [a] [b]
+ ** in asm: [func slots ...] [cont|PC] [mo|delta] [a] [b]
+ ** ^-- func base ^-- mm base
+ ** after mm: [func slots ...] [result]
+ ** ^-- copy to base[PC_RA] --/ for lj_cont_ra
+ ** istruecond + branch for lj_cont_cond*
+ ** ignore for lj_cont_nop
+ ** next PC: [func slots ...]
+ */
+ TValue *top = L->top;
+ if (curr_funcisL(L)) top = curr_topL(L);
+ setcont(top, cont); /* Assembler VM stores PC in upper word. */
+ copyTV(L, top+1, mo); /* Store metamethod and two arguments. */
+ copyTV(L, top+2, a);
+ copyTV(L, top+3, b);
+ return top+2; /* Return new base. */
+}
+
+/* -- C helpers for some instructions, called from assembler VM ----------- */
+
+/* Helper for TGET*. __index chain and metamethod. */
+cTValue *lj_meta_tget(lua_State *L, cTValue *o, cTValue *k)
+{
+ int loop;
+ for (loop = 0; loop < LJ_MAX_IDXCHAIN; loop++) {
+ cTValue *mo;
+ if (tvistab(o)) {
+ GCtab *t = tabV(o);
+ cTValue *tv = lj_tab_get(L, t, k);
+ if (!tvisnil(tv) ||
+ !(mo = lj_meta_fast(L, tabref(t->metatable), MM_index)))
+ return tv;
+ } else if (tvisnil(mo = lj_meta_lookup(L, o, MM_index))) {
+ lj_err_optype(L, o, LJ_ERR_OPINDEX);
+ return NULL; /* unreachable */
+ }
+ if (tvisfunc(mo)) {
+ L->top = mmcall(L, lj_cont_ra, mo, o, k);
+ return NULL; /* Trigger metamethod call. */
+ }
+ o = mo;
+ }
+ lj_err_msg(L, LJ_ERR_GETLOOP);
+ return NULL; /* unreachable */
+}
+
+/* Helper for TSET*. __newindex chain and metamethod. */
+TValue *lj_meta_tset(lua_State *L, cTValue *o, cTValue *k)
+{
+ TValue tmp;
+ int loop;
+ for (loop = 0; loop < LJ_MAX_IDXCHAIN; loop++) {
+ cTValue *mo;
+ if (tvistab(o)) {
+ GCtab *t = tabV(o);
+ TValue *tv = lj_tab_set(L, t, k);
+ if (!tvisnil(tv) ||
+ !(mo = lj_meta_fast(L, tabref(t->metatable), MM_newindex))) {
+ if (isblack(obj2gco(t))) lj_gc_barrierback(G(L), t);
+ return tv;
+ }
+ } else if (tvisnil(mo = lj_meta_lookup(L, o, MM_newindex))) {
+ lj_err_optype(L, o, LJ_ERR_OPINDEX);
+ return NULL; /* unreachable */
+ }
+ if (tvisfunc(mo)) {
+ L->top = mmcall(L, lj_cont_nop, mo, o, k);
+ /* L->top+2 = v filled in by caller. */
+ return NULL; /* Trigger metamethod call. */
+ }
+ copyTV(L, &tmp, mo);
+ o = &tmp;
+ }
+ lj_err_msg(L, LJ_ERR_SETLOOP);
+ return NULL; /* unreachable */
+}
+
+static cTValue *str2num(cTValue *o, TValue *n)
+{
+ if (tvisnum(o))
+ return o;
+ else if (tvisstr(o) && lj_str_numconv(strVdata(o), n))
+ return n;
+ else
+ return NULL;
+}
+
+/* Helper for arithmetic instructions. Coercion, metamethod. */
+TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb, cTValue *rc,
+ BCReg op)
+{
+ MMS mm = bcmode_mm(op);
+ TValue tempb, tempc;
+ cTValue *b, *c;
+ if ((b = str2num(rb, &tempb)) != NULL &&
+ (c = str2num(rc, &tempc)) != NULL) { /* Try coercion first. */
+ setnumV(ra, lj_vm_foldarith(numV(b), numV(c), (int)mm-MM_add));
+ return NULL;
+ } else {
+ cTValue *mo = lj_meta_lookup(L, rb, mm);
+ if (tvisnil(mo)) {
+ mo = lj_meta_lookup(L, rc, mm);
+ if (tvisnil(mo)) {
+ if (str2num(rb, &tempb) == NULL) rc = rb;
+ lj_err_optype(L, rc, LJ_ERR_OPARITH);
+ return NULL; /* unreachable */
+ }
+ }
+ return mmcall(L, lj_cont_ra, mo, rb, rc);
+ }
+}
+
+/* In-place coercion of a number to a string. */
+static LJ_AINLINE int tostring(lua_State *L, TValue *o)
+{
+ if (tvisstr(o)) {
+ return 1;
+ } else if (tvisnum(o)) {
+ setstrV(L, o, lj_str_fromnum(L, &o->n));
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+/* Helper for CAT. Coercion, iterative concat, __concat metamethod. */
+TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
+{
+ do {
+ int n = 1;
+ if (!(tvisstr(top-1) || tvisnum(top-1)) || !tostring(L, top)) {
+ cTValue *mo = lj_meta_lookup(L, top-1, MM_concat);
+ if (tvisnil(mo)) {
+ mo = lj_meta_lookup(L, top, MM_concat);
+ if (tvisnil(mo)) {
+ if (tvisstr(top-1) || tvisnum(top-1)) top++;
+ lj_err_optype(L, top-1, LJ_ERR_OPCAT);
+ return NULL; /* unreachable */
+ }
+ }
+ /* One of the top two elements is not a string, call __cat metamethod:
+ **
+ ** before: [...][CAT stack .........................]
+ ** top-1 top top+1 top+2
+ ** pick two: [...][CAT stack ...] [o1] [o2]
+ ** setup mm: [...][CAT stack ...] [cont|?] [mo|tmtype] [o1] [o2]
+ ** in asm: [...][CAT stack ...] [cont|PC] [mo|delta] [o1] [o2]
+ ** ^-- func base ^-- mm base
+ ** after mm: [...][CAT stack ...] <--push-- [result]
+ ** next step: [...][CAT stack .............]
+ */
+ copyTV(L, top+2, top) /* Careful with the order of stack copies! */
+ copyTV(L, top+1, top-1)
+ copyTV(L, top, mo)
+ setcont(top-1, lj_cont_cat);
+ return top+1; /* Trigger metamethod call. */
+ } else if (strV(top)->len == 0) { /* Shortcut. */
+ (void)tostring(L, top-1);
+ } else {
+ /* Pick as many strings as possible from the top and concatenate them:
+ **
+ ** before: [...][CAT stack ...........................]
+ ** pick str: [...][CAT stack ...] [...... strings ......]
+ ** concat: [...][CAT stack ...] [result]
+ ** next step: [...][CAT stack ............]
+ */
+ MSize tlen = strV(top)->len;
+ char *buffer;
+ int i;
+ for (n = 1; n <= left && tostring(L, top-n); n++) {
+ MSize len = strV(top-n)->len;
+ if (len >= LJ_MAX_STR - tlen)
+ lj_err_msg(L, LJ_ERR_STROV);
+ tlen += len;
+ }
+ buffer = lj_str_needbuf(L, &G(L)->tmpbuf, tlen);
+ n--;
+ tlen = 0;
+ for (i = n; i >= 0; i--) {
+ MSize len = strV(top-i)->len;
+ memcpy(buffer + tlen, strVdata(top-i), len);
+ tlen += len;
+ }
+ setstrV(L, top-n, lj_str_new(L, buffer, tlen));
+ }
+ left -= n;
+ top -= n;
+ } while (left >= 1);
+ lj_gc_check_fixtop(L);
+ return NULL;
+}
+
+/* Helper for LEN. __len metamethod. */
+TValue *lj_meta_len(lua_State *L, cTValue *o)
+{
+ cTValue *mo = lj_meta_lookup(L, o, MM_len);
+ if (tvisnil(mo)) {
+ lj_err_optype(L, o, LJ_ERR_OPLEN);
+ return NULL; /* unreachable */
+ }
+ return mmcall(L, lj_cont_ra, mo, o, niltv(L));
+}
+
+/* Helper for equality comparisons. __eq metamethod. */
+TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne)
+{
+ /* Field metatable must be at same offset for GCtab and GCudata! */
+ cTValue *mo = lj_meta_fast(L, tabref(o1->gch.metatable), MM_eq);
+ if (mo) {
+ TValue *top;
+ int it;
+ if (tabref(o1->gch.metatable) != tabref(o2->gch.metatable)) {
+ cTValue *mo2 = lj_meta_fast(L, tabref(o2->gch.metatable), MM_eq);
+ if (mo2 == NULL || !lj_obj_equal(mo, mo2))
+ return cast(TValue *, (intptr_t)ne);
+ }
+ top = curr_top(L);
+ setcont(top, ne ? lj_cont_condf : lj_cont_condt);
+ copyTV(L, top+1, mo);
+ it = o1->gch.gct == ~LJ_TTAB ? LJ_TTAB : LJ_TUDATA;
+ setgcV(L, top+2, &o1->gch, it);
+ setgcV(L, top+3, &o2->gch, it);
+ return top+2; /* Trigger metamethod call. */
+ }
+ return cast(TValue *, (intptr_t)ne);
+}
+
+/* Helper for ordered comparisons. String compare, __lt/__le metamethods. */
+TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op)
+{
+ if (itype(o1) == itype(o2)) { /* Never called with two numbers. */
+ if (tvisstr(o1) && tvisstr(o2)) {
+ int32_t res = lj_str_cmp(strV(o1), strV(o2));
+ return cast(TValue *, (intptr_t)(((op&2) ? res <= 0 : res < 0) ^ (op&1)));
+ } else {
+ trymt:
+ while (1) {
+ ASMFunction cont = (op & 1) ? lj_cont_condf : lj_cont_condt;
+ MMS mm = (op & 2) ? MM_le : MM_lt;
+ cTValue *mo = lj_meta_lookup(L, o1, mm);
+ cTValue *mo2 = lj_meta_lookup(L, o2, mm);
+ if (tvisnil(mo) || !lj_obj_equal(mo, mo2)) {
+ if (op & 2) { /* MM_le not found: retry with MM_lt. */
+ cTValue *ot = o1; o1 = o2; o2 = ot; /* Swap operands. */
+ op ^= 3; /* Use LT and flip condition. */
+ continue;
+ }
+ goto err;
+ }
+ return mmcall(L, cont, mo, o1, o2);
+ }
+ }
+ } else if (tvisbool(o1) && tvisbool(o2)) {
+ goto trymt;
+ } else {
+ err:
+ lj_err_comp(L, o1, o2);
+ return NULL;
+ }
+}
+
+/* Helper for calls. __call metamethod. */
+void lj_meta_call(lua_State *L, TValue *func, TValue *top)
+{
+ cTValue *mo = lj_meta_lookup(L, func, MM_call);
+ TValue *p;
+ if (!tvisfunc(mo))
+ lj_err_optype_call(L, func);
+ for (p = top; p > func; p--) copyTV(L, p, p-1);
+ copyTV(L, func, mo);
+}
+
+/* Helper for FORI. Coercion. */
+void lj_meta_for(lua_State *L, TValue *base)
+{
+ if (!str2num(base, base)) lj_err_msg(L, LJ_ERR_FORINIT);
+ if (!str2num(base+1, base+1)) lj_err_msg(L, LJ_ERR_FORLIM);
+ if (!str2num(base+2, base+2)) lj_err_msg(L, LJ_ERR_FORSTEP);
+}
+
diff --git a/src/lj_meta.h b/src/lj_meta.h
new file mode 100644
index 00000000..60d1e79e
--- /dev/null
+++ b/src/lj_meta.h
@@ -0,0 +1,33 @@
+/*
+** Metamethod handling.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_META_H
+#define _LJ_META_H
+
+#include "lj_obj.h"
+
+/* Metamethod handling */
+LJ_FUNC void lj_meta_init(lua_State *L);
+LJ_FUNC cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name);
+LJ_FUNC cTValue *lj_meta_lookup(lua_State *L, cTValue *o, MMS mm);
+
+#define lj_meta_fastg(g, mt, mm) \
+ ((mt) == NULL ? NULL : ((mt)->nomm & (1u<<(mm))) ? NULL : \
+ lj_meta_cache(mt, mm, strref((g)->mmname[mm])))
+#define lj_meta_fast(L, mt, mm) lj_meta_fastg(G(L), mt, mm)
+
+/* C helpers for some instructions, called from assembler VM. */
+LJ_FUNCA cTValue *lj_meta_tget(lua_State *L, cTValue *o, cTValue *k);
+LJ_FUNCA TValue *lj_meta_tset(lua_State *L, cTValue *o, cTValue *k);
+LJ_FUNCA TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb,
+ cTValue *rc, BCReg op);
+LJ_FUNCA TValue *lj_meta_cat(lua_State *L, TValue *top, int left);
+LJ_FUNCA TValue *lj_meta_len(lua_State *L, cTValue *o);
+LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne);
+LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op);
+LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top);
+LJ_FUNCA void lj_meta_for(lua_State *L, TValue *base);
+
+#endif
diff --git a/src/lj_obj.c b/src/lj_obj.c
new file mode 100644
index 00000000..d26a6b38
--- /dev/null
+++ b/src/lj_obj.c
@@ -0,0 +1,41 @@
+/*
+** Miscellaneous object handling.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_obj_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+/* Object type names. */
+LJ_DATADEF const char *const lj_obj_typename[] = { /* ORDER LUA_T */
+ "no value", "nil", "boolean", "userdata", "number", "string",
+ "table", "function", "userdata", "thread", "proto", "upval"
+};
+
+LJ_DATADEF const char *const lj_obj_itypename[] = { /* ORDER LJ_T */
+ "nil", "boolean", "boolean", "userdata", "string", "upval", "thread",
+ "proto", "function", "deadkey", "table", "userdata", "number"
+};
+
+/* Compare two objects without calling metamethods. */
+int lj_obj_equal(cTValue *o1, cTValue *o2)
+{
+ if (itype(o1) == itype(o2)) {
+ if (tvispri(o1))
+ return 1;
+ if (!tvisnum(o1)) {
+#if LJ_64
+ if (tvislightud(o1))
+ return o1->u64 == o2->u64;
+ else
+#endif
+ return gcrefeq(o1->gcr, o2->gcr);
+ }
+ } else if (!tvisnum(o1) || !tvisnum(o2)) {
+ return 0;
+ }
+ return numV(o1) == numV(o2);
+}
+
diff --git a/src/lj_obj.h b/src/lj_obj.h
new file mode 100644
index 00000000..e5ea713d
--- /dev/null
+++ b/src/lj_obj.h
@@ -0,0 +1,676 @@
+/*
+** LuaJIT VM tags, values and objects.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#ifndef _LJ_OBJ_H
+#define _LJ_OBJ_H
+
+#include "lua.h"
+#include "lj_def.h"
+#include "lj_arch.h"
+
+/* -- Memory references (32 bit address space) ---------------------------- */
+
+/* Memory size. */
+typedef uint32_t MSize;
+
+/* Memory reference */
+typedef struct MRef {
+ uint32_t ptr32; /* Pseudo 32 bit pointer. */
+} MRef;
+
+#define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32)
+
+#define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p))
+#define setmrefr(r, v) ((r).ptr32 = (v).ptr32)
+
+/* -- GC object references (32 bit address space) ------------------------- */
+
+/* GCobj reference */
+typedef struct GCRef {
+ uint32_t gcptr32; /* Pseudo 32 bit pointer. */
+} GCRef;
+
+/* Common GC header for all collectable objects. */
+#define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct
+/* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */
+
+#define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32)
+#define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32)
+#define gcrefu(r) ((r).gcptr32)
+#define gcrefi(r) ((int32_t)(r).gcptr32)
+#define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32)
+#define gcnext(gc) (gcref((gc)->gch.nextgc))
+
+#define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch)
+#define setgcrefi(r, i) ((r).gcptr32 = (uint32_t)(i))
+#define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p))
+#define setgcrefnull(r) ((r).gcptr32 = 0)
+#define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32)
+
+/* IMPORTANT NOTE:
+**
+** All uses of the setgcref* macros MUST be accompanied with a write barrier.
+**
+** This is to ensure the integrity of the incremental GC. The invariant
+** to preserve is that a black object never points to a white object.
+** I.e. never store a white object into a field of a black object.
+**
+** It's ok to LEAVE OUT the write barrier ONLY in the following cases:
+** - The source is not a GC object (NULL).
+** - The target is a GC root. I.e. everything in global_State.
+** - The target is a lua_State field (threads are never black).
+** - The target is a stack slot, see setgcV et al.
+** - The target is an open upvalue, i.e. pointing to a stack slot.
+** - The target is a newly created object (i.e. marked white). But make
+** sure nothing invokes the GC inbetween.
+** - The target and the source are the same object (self-reference).
+** - The target already contains the object (e.g. moving elements around).
+**
+** The most common case is a store to a stack slot. All other cases where
+** a barrier has been omitted are annotated with a NOBARRIER comment.
+**
+** The same logic applies for stores to table slots (array part or hash
+** part). ALL uses of lj_tab_set* require a barrier for the stored *value*
+** (if it's a GC object). The barrier for the *key* is already handled
+** internally by lj_tab_newkey.
+*/
+
+/* -- Common type definitions --------------------------------------------- */
+
+/* Types for handling bytecodes. Need this here, details in lj_bc.h. */
+typedef uint32_t BCIns; /* Bytecode instruction. */
+typedef uint32_t BCPos; /* Bytecode position. */
+typedef uint32_t BCReg; /* Bytecode register. */
+typedef int32_t BCLine; /* Bytecode line number. */
+
+/* Internal assembler functions. Never call these directly from C. */
+typedef void (*ASMFunction)(void);
+
+/* Resizable string buffer. Need this here, details in lj_str.h. */
+typedef struct SBuf {
+ char *buf; /* String buffer base. */
+ MSize n; /* String buffer length. */
+ MSize sz; /* String buffer size. */
+} SBuf;
+
+/* -- Tags and values ----------------------------------------------------- */
+
+/* Frame link. */
+typedef union {
+ int32_t ftsz; /* Frame type and size of previous frame. */
+ MRef pcr; /* Overlaps PC for Lua frames. */
+} FrameLink;
+
+/* Tagged value. */
+typedef LJ_ALIGN(8) union TValue {
+ uint64_t u64; /* 64 bit pattern overlaps number. */
+ lua_Number n; /* Number object overlaps split tag/value object. */
+ struct {
+ LJ_ENDIAN_LOHI(
+ GCRef gcr; /* GCobj reference (if any). */
+ , int32_t it; /* Internal object tag. Must overlap MSW of number. */
+ )
+ };
+ struct {
+ LJ_ENDIAN_LOHI(
+ GCRef func; /* Function for next frame (or dummy L). */
+ , FrameLink tp; /* Link to previous frame. */
+ )
+ } fr;
+ struct {
+ LJ_ENDIAN_LOHI(
+ uint32_t lo; /* Lower 32 bits of number. */
+ , uint32_t hi; /* Upper 32 bits of number. */
+ )
+ } u32;
+} TValue;
+
+typedef const TValue cTValue;
+
+#define tvref(r) (mref(r, TValue))
+
+/* More external and GCobj tags for internal objects. */
+#define LAST_TT LUA_TTHREAD
+
+#define LUA_TPROTO (LAST_TT+1)
+#define LUA_TUPVAL (LAST_TT+2)
+#define LUA_TDEADKEY (LAST_TT+3)
+
+/* Internal object tags.
+**
+** Internal tags overlap the MSW of a number object (must be a double).
+** Interpreted as a double these are special NaNs. The FPU only generates
+** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available
+** for use as internal tags. Small negative numbers are used to shorten the
+** encoding of type comparisons (reg/mem against sign-ext. 8 bit immediate).
+**
+** ---MSW---.---LSW---
+** primitive types | itype | |
+** lightuserdata | itype | void * | (32 bit platforms)
+** lightuserdata |fffc| void * | (64 bit platforms, 48 bit pointers)
+** GC objects | itype | GCRef |
+** number -------double------
+**
+** ORDER LJ_T
+** Primitive types nil/false/true must be first, lightuserdata next.
+** GC objects are at the end, table/userdata must be lowest.
+** Also check lj_ir.h for similar ordering constraints.
+*/
+#define LJ_TNIL (-1)
+#define LJ_TFALSE (-2)
+#define LJ_TTRUE (-3)
+#define LJ_TLIGHTUD (-4)
+#define LJ_TSTR (-5)
+#define LJ_TUPVAL (-6)
+#define LJ_TTHREAD (-7)
+#define LJ_TPROTO (-8)
+#define LJ_TFUNC (-9)
+#define LJ_TDEADKEY (-10)
+#define LJ_TTAB (-11)
+#define LJ_TUDATA (-12)
+/* This is just the canonical number type used in some places. */
+#define LJ_TNUMX (-13)
+
+#if LJ_64
+#define LJ_TISNUM ((uint32_t)0xfff80000)
+#else
+#define LJ_TISNUM ((uint32_t)LJ_TNUMX)
+#endif
+#define LJ_TISTRUECOND ((uint32_t)LJ_TFALSE)
+#define LJ_TISPRI ((uint32_t)LJ_TTRUE)
+#define LJ_TISGCV ((uint32_t)(LJ_TSTR+1))
+#define LJ_TISTABUD ((uint32_t)LJ_TTAB)
+
+/* -- TValue getters/setters ---------------------------------------------- */
+
+/* Macros to test types. */
+#define itype(o) ((o)->it)
+#define uitype(o) ((uint32_t)itype(o))
+#define tvisnil(o) (itype(o) == LJ_TNIL)
+#define tvisfalse(o) (itype(o) == LJ_TFALSE)
+#define tvistrue(o) (itype(o) == LJ_TTRUE)
+#define tvisbool(o) (tvisfalse(o) || tvistrue(o))
+#if LJ_64
+#define tvislightud(o) ((itype(o) >> 16) == LJ_TLIGHTUD)
+#else
+#define tvislightud(o) (itype(o) == LJ_TLIGHTUD)
+#endif
+#define tvisstr(o) (itype(o) == LJ_TSTR)
+#define tvisfunc(o) (itype(o) == LJ_TFUNC)
+#define tvisthread(o) (itype(o) == LJ_TTHREAD)
+#define tvisproto(o) (itype(o) == LJ_TPROTO)
+#define tvistab(o) (itype(o) == LJ_TTAB)
+#define tvisudata(o) (itype(o) == LJ_TUDATA)
+#define tvisnum(o) (uitype(o) <= LJ_TISNUM)
+
+#define tvistruecond(o) (uitype(o) < LJ_TISTRUECOND)
+#define tvispri(o) (uitype(o) >= LJ_TISPRI)
+#define tvistabud(o) (uitype(o) <= LJ_TISTABUD) /* && !tvisnum() */
+#define tvisgcv(o) \
+ ((uitype(o) - LJ_TISGCV) > ((uint32_t)LJ_TNUMX - LJ_TISGCV))
+
+/* Special macros to test numbers for NaN, +0, -0, +1 and raw equality. */
+#define tvisnan(o) ((o)->n != (o)->n)
+#define tvispzero(o) ((o)->u64 == 0)
+#define tvismzero(o) ((o)->u64 == U64x(80000000,00000000))
+#define tvispone(o) ((o)->u64 == U64x(3ff00000,00000000))
+#define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64)
+
+/* Macros to convert type ids. */
+#if LJ_64
+#define itypemap(o) \
+ (tvisnum(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o))
+#else
+#define itypemap(o) (tvisnum(o) ? ~LJ_TNUMX : ~itype(o))
+#endif
+
+/* Macros to get tagged values. */
+#define gcval(o) (gcref((o)->gcr))
+#define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - (o)->it))
+#if LJ_64
+#define lightudV(o) check_exp(tvislightud(o), \
+ (void *)((o)->u64 & U64x(0000ffff,ffffffff)))
+#else
+#define lightudV(o) check_exp(tvislightud(o), gcrefp((o)->gcr, void))
+#endif
+#define gcV(o) check_exp(tvisgcv(o), gcval(o))
+#define strV(o) check_exp(tvisstr(o), &gcval(o)->str)
+#define funcV(o) check_exp(tvisfunc(o), &gcval(o)->fn)
+#define threadV(o) check_exp(tvisthread(o), &gcval(o)->th)
+#define protoV(o) check_exp(tvisproto(o), &gcval(o)->pt)
+#define tabV(o) check_exp(tvistab(o), &gcval(o)->tab)
+#define udataV(o) check_exp(tvisudata(o), &gcval(o)->ud)
+#define numV(o) check_exp(tvisnum(o), (o)->n)
+
+/* Macros to set tagged values. */
+#define setitype(o, i) ((o)->it = (i))
+#define setnilV(o) ((o)->it = LJ_TNIL)
+#define setboolV(o, x) ((o)->it = LJ_TFALSE-(x))
+
+#if LJ_64
+#define checklightudptr(L, p) \
+ (((uint64_t)(p) >> 48) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p))
+#define setlightudV(o, x) \
+ ((o)->u64 = (uint64_t)(x) | (((uint64_t)LJ_TLIGHTUD) << 48))
+#define setcont(o, x) \
+ ((o)->u64 = (uint64_t)(x) - (uint64_t)lj_vm_asm_begin)
+#else
+#define checklightudptr(L, p) (p)
+#define setlightudV(o, x) \
+ { TValue *i_o = (o); \
+ setgcrefp(i_o->gcr, (x)); i_o->it = LJ_TLIGHTUD; }
+#define setcont(o, x) \
+ { TValue *i_o = (o); \
+ setgcrefp(i_o->gcr, (x)); i_o->it = LJ_TLIGHTUD; }
+#endif
+
+#define tvchecklive(g, o) \
+ lua_assert(!tvisgcv(o) || \
+ ((~itype(o) == gcval(o)->gch.gct) && !isdead(g, gcval(o))))
+
+#define setgcV(L, o, x, itype) \
+ { TValue *i_o = (o); \
+ setgcrefp(i_o->gcr, &(x)->nextgc); i_o->it = itype; \
+ tvchecklive(G(L), i_o); }
+#define setstrV(L, o, x) setgcV(L, o, x, LJ_TSTR)
+#define setthreadV(L, o, x) setgcV(L, o, x, LJ_TTHREAD)
+#define setprotoV(L, o, x) setgcV(L, o, x, LJ_TPROTO)
+#define setfuncV(L, o, x) setgcV(L, o, &(x)->l, LJ_TFUNC)
+#define settabV(L, o, x) setgcV(L, o, x, LJ_TTAB)
+#define setudataV(L, o, x) setgcV(L, o, x, LJ_TUDATA)
+
+#define setnumV(o, x) ((o)->n = (x))
+#define setnanV(o) ((o)->u64 = U64x(fff80000,00000000))
+#define setintV(o, i) ((o)->n = cast_num((int32_t)(i)))
+
+/* Copy tagged values. */
+#define copyTV(L, o1, o2) \
+ { cTValue *i_o2 = (o2); TValue *i_o1 = (o1); \
+ *i_o1 = *i_o2; tvchecklive(G(L), i_o1); }
+
+/* -- String object ------------------------------------------------------- */
+
+/* String object header. String payload follows. */
+typedef struct GCstr {
+ GCHeader;
+ uint8_t reserved; /* Used by lexer for fast lookup of reserved words. */
+ uint8_t unused;
+ MSize hash; /* Hash of string. */
+ MSize len; /* Size of string. */
+} GCstr;
+
+#define strref(r) (&gcref((r))->str)
+#define strdata(s) ((const char *)((s)+1))
+#define strdatawr(s) ((char *)((s)+1))
+#define strVdata(o) strdata(strV(o))
+#define sizestring(s) (sizeof(struct GCstr)+(s)->len+1)
+
+/* -- Userdata object ----------------------------------------------------- */
+
+/* Userdata object. Payload follows. */
+typedef struct GCudata {
+ GCHeader;
+ uint8_t unused1;
+ uint8_t unused2;
+ GCRef env; /* Should be at same offset in GCfunc. */
+ MSize len; /* Size of payload. */
+ GCRef metatable; /* Must be at same offset in GCtab. */
+ uint32_t align1; /* To force 8 byte alignment of the payload. */
+} GCudata;
+
+#define uddata(u) ((void *)((u)+1))
+#define sizeudata(u) (sizeof(struct GCudata)+(u)->len)
+
+/* -- Prototype object ---------------------------------------------------- */
+
+/* Split constant array. Collectables are below, numbers above pointer. */
+typedef union ProtoK {
+ lua_Number *n; /* Numbers. */
+ GCRef *gc; /* Collectable objects (strings/table/proto). */
+} ProtoK;
+
+#define SCALE_NUM_GCO ((int32_t)sizeof(lua_Number)/sizeof(GCRef))
+#define round_nkgc(n) (((n) + SCALE_NUM_GCO-1) & ~(SCALE_NUM_GCO-1))
+
+typedef struct VarInfo {
+ GCstr *name; /* Local variable name. */
+ BCPos startpc; /* First point where the local variable is active. */
+ BCPos endpc; /* First point where the local variable is dead. */
+} VarInfo;
+
+typedef struct GCproto {
+ GCHeader;
+ uint8_t numparams; /* Number of parameters. */
+ uint8_t framesize; /* Fixed frame size. */
+ MSize sizebc; /* Number of bytecode instructions. */
+ GCRef gclist;
+ ProtoK k; /* Split constant array (points to the middle). */
+ BCIns *bc; /* Array of bytecode instructions. */
+ int16_t *uv; /* Upvalue list. local >= 0. parent uv < 0. */
+ MSize sizekgc; /* Number of collectable constants. */
+ MSize sizekn; /* Number of lua_Number constants. */
+ uint8_t sizeuv; /* Number of upvalues. */
+ uint8_t flags; /* Miscellaneous flags (see below). */
+ uint16_t trace; /* Anchor for chain of root traces. */
+ /* ------ The following fields are for debugging/tracebacks only ------ */
+ MSize sizelineinfo; /* Size of lineinfo array (may be 0). */
+ MSize sizevarinfo; /* Size of local var info array (may be 0). */
+ MSize sizeuvname; /* Size of upvalue names array (may be 0). */
+ BCLine linedefined; /* First line of the function definition. */
+ BCLine lastlinedefined; /* Last line of the function definition. */
+ BCLine *lineinfo; /* Map from bytecode instructions to source lines. */
+ struct VarInfo *varinfo; /* Names and extents of local variables. */
+ GCstr **uvname; /* Upvalue names. */
+ GCstr *chunkname; /* Name of the chunk this function was defined in. */
+} GCproto;
+
+#define PROTO_IS_VARARG 0x01
+#define PROTO_HAS_FNEW 0x02
+#define PROTO_HAS_RETURN 0x04
+#define PROTO_FIXUP_RETURN 0x08
+#define PROTO_NO_JIT 0x10
+#define PROTO_HAS_ILOOP 0x20
+
+/* -- Upvalue object ------------------------------------------------------ */
+
+typedef struct GCupval {
+ GCHeader;
+ uint8_t closed; /* Set if closed (i.e. uv->v == &uv->u.value). */
+ uint8_t unused;
+ union {
+ TValue tv; /* If closed: the value itself. */
+ struct { /* If open: double linked list, anchored at thread. */
+ GCRef prev;
+ GCRef next;
+ };
+ };
+ TValue *v; /* Points to stack slot (open) or above (closed). */
+#if LJ_32
+ int32_t unusedv; /* For consistent alignment (32 bit only). */
+#endif
+} GCupval;
+
+#define uvprev(uv_) (&gcref((uv_)->prev)->uv)
+#define uvnext(uv_) (&gcref((uv_)->next)->uv)
+
+/* -- Function object (closures) ------------------------------------------ */
+
+/* Common header for functions. env should be at same offset in GCudata. */
+#define GCfuncHeader \
+ GCHeader; uint8_t ffid; uint8_t nupvalues; \
+ GCRef env; GCRef gclist; ASMFunction gate
+
+typedef struct GCfuncC {
+ GCfuncHeader;
+ lua_CFunction f; /* C function to be called. */
+ TValue upvalue[1]; /* Array of upvalues (TValue). */
+} GCfuncC;
+
+typedef struct GCfuncL {
+ GCfuncHeader;
+ GCRef pt; /* Link to prototype this function is based on. */
+ GCRef uvptr[1]; /* Array of _pointers_ to upvalue objects (GCupval). */
+} GCfuncL;
+
+typedef union GCfunc {
+ GCfuncC c;
+ GCfuncL l;
+} GCfunc;
+
+#define FF_LUA 0
+#define FF_C 1
+#define isluafunc(fn) ((fn)->c.ffid == FF_LUA)
+#define iscfunc(fn) ((fn)->c.ffid == FF_C)
+#define isffunc(fn) ((fn)->c.ffid > FF_C)
+#define funcproto(fn) check_exp(isluafunc(fn), &gcref((fn)->l.pt)->pt)
+#define sizeCfunc(n) (sizeof(GCfuncC) + sizeof(TValue)*((n)-1))
+#define sizeLfunc(n) (sizeof(GCfuncL) + sizeof(TValue *)*((n)-1))
+
+/* -- Table object -------------------------------------------------------- */
+
+/* Hash node. */
+typedef struct Node {
+ TValue val; /* Value object. Must be first field. */
+ TValue key; /* Key object. */
+ MRef next; /* Hash chain. */
+ int32_t unused; /* For consistent alignment. */
+} Node;
+
+LJ_STATIC_ASSERT(offsetof(Node, val) == 0);
+
+typedef struct GCtab {
+ GCHeader;
+ uint8_t nomm; /* Negative cache for fast metamethods. */
+ int8_t colo; /* Array colocation. */
+ MRef array; /* Array part. */
+ GCRef gclist;
+ GCRef metatable; /* Must be at same offset in GCudata. */
+ MRef node; /* Hash part. */
+ uint32_t asize; /* Size of array part (keys [0, asize-1]). */
+ uint32_t hmask; /* Hash part mask (size of hash part - 1). */
+ MRef lastfree; /* Any free position is before this position. */
+} GCtab;
+
+#define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab))
+#define tabref(r) (&gcref((r))->tab)
+#define noderef(r) (mref((r), Node))
+#define nextnode(n) (mref((n)->next, Node))
+
+/* -- State objects ------------------------------------------------------- */
+
+/* VM states. */
+enum {
+ LJ_VMST_INTERP, /* Interpreter. */
+ LJ_VMST_C, /* C function. */
+ LJ_VMST_GC, /* Garbage collector. */
+ LJ_VMST_EXIT, /* Trace exit handler. */
+ LJ_VMST_RECORD, /* Trace recorder. */
+ LJ_VMST_OPT, /* Optimizer. */
+ LJ_VMST_ASM, /* Assembler. */
+ LJ_VMST__MAX
+};
+
+#define setvmstate(g, st) ((g)->vmstate = ~LJ_VMST_##st)
+
+/* Metamethods. */
+#define MMDEF(_) \
+ _(index) _(newindex) _(gc) _(mode) _(eq) \
+ /* Only the above (fast) metamethods are negative cached (max. 8). */ \
+ _(len) _(lt) _(le) _(concat) _(call) \
+ /* The following must be in ORDER ARITH. */ \
+ _(add) _(sub) _(mul) _(div) _(mod) _(pow) _(unm) \
+ /* The following are used in the standard libraries. */ \
+ _(metatable) _(tostring)
+
+typedef enum {
+#define MMENUM(name) MM_##name,
+MMDEF(MMENUM)
+#undef MMENUM
+ MM_MAX,
+ MM____ = MM_MAX,
+ MM_FAST = MM_eq
+} MMS;
+
+#define BASEMT_MAX ((~LJ_TNUMX)+1)
+
+typedef struct GCState {
+ MSize total; /* Memory currently allocated. */
+ MSize threshold; /* Memory threshold. */
+ uint8_t currentwhite; /* Current white color. */
+ uint8_t state; /* GC state. */
+ uint8_t unused1;
+ uint8_t unused2;
+ MSize sweepstr; /* Sweep position in string table. */
+ GCRef root; /* List of all collectable objects. */
+ GCRef *sweep; /* Sweep position in root list. */
+ GCRef gray; /* List of gray objects. */
+ GCRef grayagain; /* List of objects for atomic traversal. */
+ GCRef weak; /* List of weak tables (to be cleared). */
+ GCRef mmudata; /* List of userdata (to be finalized). */
+ MSize stepmul; /* Incremental GC step granularity. */
+ MSize debt; /* Debt (how much GC is behind schedule). */
+ MSize estimate; /* Estimate of memory actually in use. */
+ MSize pause; /* Pause between successive GC cycles. */
+} GCState;
+
+/* Global state, shared by all threads of a Lua universe. */
+typedef struct global_State {
+ GCRef *strhash; /* String hash table (hash chain anchors). */
+ MSize strmask; /* String hash mask (size of hash table - 1). */
+ MSize strnum; /* Number of strings in hash table. */
+ lua_Alloc allocf; /* Memory allocator. */
+ void *allocd; /* Memory allocator data. */
+ GCState gc; /* Garbage collector. */
+ SBuf tmpbuf; /* Temporary buffer for string concatenation. */
+ Node nilnode; /* Fallback 1-element hash part (nil key and value). */
+ uint8_t hookmask; /* Hook mask. */
+ uint8_t dispatchmode; /* Dispatch mode. */
+ uint8_t vmevmask; /* VM event mask. */
+ uint8_t unused1;
+ GCRef mainthref; /* Link to main thread. */
+ TValue registrytv; /* Anchor for registry. */
+ TValue tmptv; /* Temporary TValue. */
+ GCupval uvhead; /* Head of double-linked list of all open upvalues. */
+ int32_t hookcount; /* Instruction hook countdown. */
+ int32_t hookcstart; /* Start count for instruction hook counter. */
+ lua_Hook hookf; /* Hook function. */
+ lua_CFunction panic; /* Called as a last resort for errors. */
+ volatile int32_t vmstate; /* VM state or current JIT code trace number. */
+ GCRef jit_L; /* Current JIT code lua_State or NULL. */
+ MRef jit_base; /* Current JIT code L->base. */
+ GCRef basemt[BASEMT_MAX]; /* Metatables for base types. */
+ GCRef mmname[MM_MAX]; /* Array holding metamethod names. */
+} global_State;
+
+#define mainthread(g) (&gcref(g->mainthref)->th)
+#define niltv(L) \
+ check_exp(tvisnil(&G(L)->nilnode.val), &G(L)->nilnode.val)
+#define niltvg(g) \
+ check_exp(tvisnil(&(g)->nilnode.val), &(g)->nilnode.val)
+
+/* Hook management. Hook event masks are defined in lua.h. */
+#define HOOK_EVENTMASK 0x0f
+#define HOOK_ACTIVE 0x10
+#define HOOK_VMEVENT 0x20
+#define HOOK_GC 0x40
+#define hook_active(g) ((g)->hookmask & HOOK_ACTIVE)
+#define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE)
+#define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC))
+#define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT))
+#define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE)
+#define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK)
+#define hook_restore(g, h) \
+ ((g)->hookmask = ((g)->hookmask & HOOK_EVENTMASK) | (h))
+
+/* Per-thread state object. */
+struct lua_State {
+ GCHeader;
+ uint8_t dummy_ffid; /* Fake FF_C for curr_funcisL() on dummy frames. */
+ uint8_t status; /* Thread status. */
+ MRef glref; /* Link to global state. */
+ GCRef gclist; /* GC chain. */
+ TValue *base; /* Base of currently executing function. */
+ TValue *top; /* First free slot in the stack. */
+ TValue *maxstack; /* Last free slot in the stack. */
+ TValue *stack; /* Stack base. */
+ GCRef openupval; /* List of open upvalues in the stack. */
+ GCRef env; /* Thread environment (table of globals). */
+ void *cframe; /* End of C stack frame chain. */
+ MSize stacksize; /* True stack size (incl. LJ_STACK_EXTRA). */
+};
+
+#define G(L) (mref(L->glref, global_State))
+#define registry(L) (&G(L)->registrytv)
+
+/* Macros to access the currently executing (Lua) function. */
+#define curr_func(L) (&gcref((L->base-1)->fr.func)->fn)
+#define curr_funcisL(L) (isluafunc(curr_func(L)))
+#define curr_proto(L) (funcproto(curr_func(L)))
+#define curr_topL(L) (L->base + curr_proto(L)->framesize)
+#define curr_top(L) (curr_funcisL(L) ? curr_topL(L) : L->top)
+
+/* -- GC object definition and conversions -------------------------------- */
+
+/* GC header for generic access to common fields of GC objects. */
+typedef struct GChead {
+ GCHeader;
+ uint8_t unused1;
+ uint8_t unused2;
+ GCRef env;
+ GCRef gclist;
+ GCRef metatable;
+} GChead;
+
+/* The env field SHOULD be at the same offset for all GC objects. */
+LJ_STATIC_ASSERT(offsetof(GChead, env) == offsetof(GCfuncL, env));
+LJ_STATIC_ASSERT(offsetof(GChead, env) == offsetof(GCudata, env));
+
+/* The metatable field MUST be at the same offset for all GC objects. */
+LJ_STATIC_ASSERT(offsetof(GChead, metatable) == offsetof(GCtab, metatable));
+LJ_STATIC_ASSERT(offsetof(GChead, metatable) == offsetof(GCudata, metatable));
+
+/* The gclist field MUST be at the same offset for all GC objects. */
+LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(lua_State, gclist));
+LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(GCproto, gclist));
+LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(GCfuncL, gclist));
+LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(GCtab, gclist));
+
+typedef union GCobj {
+ GChead gch;
+ GCstr str;
+ GCupval uv;
+ lua_State th;
+ GCproto pt;
+ GCfunc fn;
+ GCtab tab;
+ GCudata ud;
+} GCobj;
+
+/* Macros to convert a GCobj pointer into a specific value. */
+#define gco2str(o) check_exp((o)->gch.gct == ~LJ_TSTR, &(o)->str)
+#define gco2uv(o) check_exp((o)->gch.gct == ~LJ_TUPVAL, &(o)->uv)
+#define gco2th(o) check_exp((o)->gch.gct == ~LJ_TTHREAD, &(o)->th)
+#define gco2pt(o) check_exp((o)->gch.gct == ~LJ_TPROTO, &(o)->pt)
+#define gco2func(o) check_exp((o)->gch.gct == ~LJ_TFUNC, &(o)->fn)
+#define gco2tab(o) check_exp((o)->gch.gct == ~LJ_TTAB, &(o)->tab)
+#define gco2ud(o) check_exp((o)->gch.gct == ~LJ_TUDATA, &(o)->ud)
+
+/* Macro to convert any collectable object into a GCobj pointer. */
+#define obj2gco(v) (cast(GCobj *, (v)))
+
+/* -- Number to integer conversion ---------------------------------------- */
+
+static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
+{
+ TValue o;
+ o.n = n + 6755399441055744.0; /* 2^52 + 2^51 */
+ return (int32_t)o.u32.lo;
+}
+
+#if (defined(__i386__) || defined(_M_IX86)) && !defined(__SSE2__)
+#define lj_num2int(n) lj_num2bit((n))
+#else
+#define lj_num2int(n) ((int32_t)(n))
+#endif
+
+/* -- Miscellaneous object handling --------------------------------------- */
+
+/* Names and maps for internal and external object tags. */
+LJ_DATA const char *const lj_obj_typename[1+LUA_TUPVAL+1];
+LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1];
+
+#define typename(o) (lj_obj_itypename[itypemap(o)])
+
+/* Compare two objects without calling metamethods. */
+LJ_FUNC int lj_obj_equal(cTValue *o1, cTValue *o2);
+
+#ifdef LUA_USE_ASSERT
+#include "lj_gc.h"
+#endif
+
+#endif
diff --git a/src/lj_opt_dce.c b/src/lj_opt_dce.c
new file mode 100644
index 00000000..0cd60830
--- /dev/null
+++ b/src/lj_opt_dce.c
@@ -0,0 +1,79 @@
+/*
+** DCE: Dead Code Elimination. Pre-LOOP only -- ASM already performs DCE.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_opt_dce_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+#if LJ_HASJIT
+
+#include "lj_ir.h"
+#include "lj_jit.h"
+#include "lj_iropt.h"
+
+/* Some local macros to save typing. Undef'd at the end. */
+#define IR(ref) (&J->cur.ir[(ref)])
+
+/* Scan through all snapshots and mark all referenced instructions. */
+static void dce_marksnap(jit_State *J)
+{
+ SnapNo i, nsnap = J->cur.nsnap;
+ for (i = 0; i < nsnap; i++) {
+ SnapShot *snap = &J->cur.snap[i];
+ IRRef2 *map = &J->cur.snapmap[snap->mapofs];
+ BCReg s, nslots = snap->nslots;
+ for (s = 0; s < nslots; s++) {
+ IRRef ref = snap_ref(map[s]);
+ if (!irref_isk(ref))
+ irt_setmark(IR(ref)->t);
+ }
+ }
+}
+
+/* Backwards propagate marks. Replace unused instructions with NOPs. */
+static void dce_propagate(jit_State *J)
+{
+ IRRef1 *pchain[IR__MAX];
+ IRRef ins;
+ uint32_t i;
+ for (i = 0; i < IR__MAX; i++) pchain[i] = &J->chain[i];
+ for (ins = J->cur.nins-1; ins >= REF_FIRST; ins--) {
+ IRIns *ir = IR(ins);
+ if (irt_ismarked(ir->t)) {
+ irt_clearmark(ir->t);
+ pchain[ir->o] = &ir->prev;
+ } else if (!(irt_isguard(ir->t) || irm_sideeff(lj_ir_mode[ir->o]))) {
+ *pchain[ir->o] = ir->prev; /* Reroute original instruction chain. */
+ *pchain[IR_NOP] = (IRRef1)ins;
+ ir->t.irt = IRT_NIL;
+ ir->o = IR_NOP; /* Replace instruction with NOP. */
+ ir->op1 = ir->op2 = 0;
+ pchain[IR_NOP] = &ir->prev;
+ continue;
+ }
+ if (!irref_isk(ir->op1)) irt_setmark(IR(ir->op1)->t);
+ if (!irref_isk(ir->op2)) irt_setmark(IR(ir->op2)->t);
+ }
+ *pchain[IR_NOP] = 0; /* Terminate NOP chain. */
+}
+
+/* Dead Code Elimination.
+**
+** First backpropagate marks for all used instructions. Then replace
+** the unused ones with a NOP. Note that compressing the IR to eliminate
+** the NOPs does not pay off.
+*/
+void lj_opt_dce(jit_State *J)
+{
+ if ((J->flags & JIT_F_OPT_DCE)) {
+ dce_marksnap(J);
+ dce_propagate(J);
+ }
+}
+
+#undef IR
+
+#endif
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
new file mode 100644
index 00000000..e5d98162
--- /dev/null
+++ b/src/lj_opt_fold.c
@@ -0,0 +1,1415 @@
+/*
+** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
+** CSE: Common-Subexpression Elimination.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_opt_fold_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+#if LJ_HASJIT
+
+#include "lj_str.h"
+#include "lj_ir.h"
+#include "lj_jit.h"
+#include "lj_iropt.h"
+#include "lj_trace.h"
+#include "lj_vm.h"
+
+/* Here's a short description how the FOLD engine processes instructions:
+**
+** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
+** The instruction and its operands are used to select matching fold rules.
+** These are applied iteratively until a fixed point is reached.
+**
+** The 8 bit opcode of the instruction itself plus the opcodes of the
+** two instructions referenced by its operands form a 24 bit key
+** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
+**
+** This key is used for partial matching against the fold rules. The
+** left/right operand fields of the key are successively masked with
+** the 'any' wildcard, from most specific to least specific:
+**
+** ins left right
+** ins any right
+** ins left any
+** ins any any
+**
+** The masked key is used to lookup a matching fold rule in a semi-perfect
+** hash table. If a matching rule is found, the related fold function is run.
+** Multiple rules can share the same fold function. A fold rule may return
+** one of several special values:
+**
+** - NEXTFOLD means no folding was applied, because an additional test
+** inside the fold function failed. Matching continues against less
+** specific fold rules. Finally the instruction is passed on to CSE.
+**
+** - RETRYFOLD means the instruction was modified in-place. Folding is
+** retried as if this instruction had just been received.
+**
+** All other return values are terminal actions -- no further folding is
+** applied:
+**
+** - INTFOLD(i) returns a reference to the integer constant i.
+**
+** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
+** without emitting an instruction.
+**
+** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
+** it without passing through any further optimizations.
+**
+** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
+** no result (e.g. guarded assertions): FAILFOLD means the guard would
+** always fail, i.e. the current trace is pointless. DROPFOLD means
+** the guard is always true and has been eliminated. CONDFOLD is a
+** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
+**
+** - Any other return value is interpreted as an IRRef or TRef. This
+** can be a reference to an existing or a newly created instruction.
+** Only the least-significant 16 bits (IRRef1) are used to form a TRef
+** which is finally returned to the caller.
+**
+** The FOLD engine receives instructions both from the trace recorder and
+** substituted instructions from LOOP unrolling. This means all types
+** of instructions may end up here, even though the recorder bypasses
+** FOLD in some cases. Thus all loads, stores and allocations must have
+** an any/any rule to avoid being passed on to CSE.
+**
+** Carefully read the following requirements before adding or modifying
+** any fold rules:
+**
+** Requirement #1: All fold rules must preserve their destination type.
+**
+** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
+** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
+**
+** Requirement #2: Fold rules should not create *new* instructions which
+** reference operands *across* PHIs.
+**
+** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
+** left operand is a PHI. Then fleft->op1 would point across the PHI
+** frontier to an invariant instruction. Adding a PHI for this instruction
+** would be counterproductive. The solution is to add a barrier which
+** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
+** The only exception is for recurrences with high latencies like
+** repeated int->num->int conversions.
+**
+** One could relax this condition a bit if the referenced instruction is
+** a PHI, too. But this often leads to worse code due to excessive
+** register shuffling.
+**
+** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
+** Even returning fleft->op1 would be ok, because a new PHI will added,
+** if needed. But again, this leads to excessive register shuffling and
+** should be avoided.
+**
+** Requirement #3: The set of all fold rules must be monotonic to guarantee
+** termination.
+**
+** The goal is optimization, so one primarily wants to add strength-reducing
+** rules. This means eliminating an instruction or replacing an instruction
+** with one or more simpler instructions. Don't add fold rules which point
+** into the other direction.
+**
+** Some rules (like commutativity) do not directly reduce the strength of
+** an instruction, but enable other fold rules (e.g. by moving constants
+** to the right operand). These rules must be made unidirectional to avoid
+** cycles.
+**
+** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
+*/
+
+/* Some local macros to save typing. Undef'd at the end. */
+#define IR(ref) (&J->cur.ir[(ref)])
+#define fins (&J->fold.ins)
+#define fleft (&J->fold.left)
+#define fright (&J->fold.right)
+#define knumleft (ir_knum(fleft)->n)
+#define knumright (ir_knum(fright)->n)
+
+/* Pass IR on to next optimization in chain (FOLD). */
+#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
+
+/* Fold function type. Fastcall on x86 significantly reduces their size. */
+typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
+
+/* Macros for the fold specs, so buildvm can recognize them. */
+#define LJFOLD(x)
+#define LJFOLDX(x)
+#define LJFOLDF(name) static TRef LJ_FASTCALL name(jit_State *J)
+/* Note: They must be at the start of a line or buildvm ignores them! */
+
+/* Barrier to prevent using operands across PHIs. */
+#define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
+
+/* Barrier to prevent folding across a GC step.
+** GC steps can only happen at the head of a trace and at LOOP.
+** And the GC is only driven forward if there is at least one allocation.
+*/
+#define gcstep_barrier(J, ref) \
+ ((ref) < J->chain[IR_LOOP] && \
+ (J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
+ J->chain[IR_SNEW] || J->chain[IR_TOSTR]))
+
+/* -- Constant folding ---------------------------------------------------- */
+
+LJFOLD(ADD KNUM KNUM)
+LJFOLD(SUB KNUM KNUM)
+LJFOLD(MUL KNUM KNUM)
+LJFOLD(DIV KNUM KNUM)
+LJFOLD(NEG KNUM KNUM)
+LJFOLD(ABS KNUM KNUM)
+LJFOLD(ATAN2 KNUM KNUM)
+LJFOLD(LDEXP KNUM KNUM)
+LJFOLD(MIN KNUM KNUM)
+LJFOLD(MAX KNUM KNUM)
+LJFOLDF(kfold_numarith)
+{
+ lua_Number a = knumleft;
+ lua_Number b = knumright;
+ lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
+ return lj_ir_knum(J, y);
+}
+
+LJFOLD(FPMATH KNUM any)
+LJFOLDF(kfold_fpmath)
+{
+ lua_Number a = knumleft;
+ lua_Number y = lj_vm_foldfpm(a, fins->op2);
+ return lj_ir_knum(J, y);
+}
+
+LJFOLD(POWI KNUM KINT)
+LJFOLDF(kfold_powi)
+{
+ lua_Number a = knumleft;
+ lua_Number b = cast_num(fright->i);
+ lua_Number y = lj_vm_foldarith(a, b, IR_POWI - IR_ADD);
+ return lj_ir_knum(J, y);
+}
+
+static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
+{
+ switch (op) {
+ case IR_ADD: k1 += k2; break;
+ case IR_SUB: k1 -= k2; break;
+ case IR_BAND: k1 &= k2; break;
+ case IR_BOR: k1 |= k2; break;
+ case IR_BXOR: k1 ^= k2; break;
+ case IR_BSHL: k1 <<= (k2 & 31); break;
+ case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break;
+ case IR_BSAR: k1 >>= (k2 & 31); break;
+ case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break;
+ case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
+ default: lua_assert(0); break;
+ }
+ return k1;
+}
+
+LJFOLD(ADD KINT KINT)
+LJFOLD(SUB KINT KINT)
+LJFOLD(BAND KINT KINT)
+LJFOLD(BOR KINT KINT)
+LJFOLD(BXOR KINT KINT)
+LJFOLD(BSHL KINT KINT)
+LJFOLD(BSHR KINT KINT)
+LJFOLD(BSAR KINT KINT)
+LJFOLD(BROL KINT KINT)
+LJFOLD(BROR KINT KINT)
+LJFOLDF(kfold_intarith)
+{
+ return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
+}
+
+LJFOLD(BNOT KINT)
+LJFOLDF(kfold_bnot)
+{
+ return INTFOLD(~fleft->i);
+}
+
+LJFOLD(BSWAP KINT)
+LJFOLDF(kfold_bswap)
+{
+ return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i));
+}
+
+LJFOLD(TONUM KINT)
+LJFOLDF(kfold_tonum)
+{
+ return lj_ir_knum(J, cast_num(fleft->i));
+}
+
+LJFOLD(TOBIT KNUM KNUM)
+LJFOLDF(kfold_tobit)
+{
+ TValue tv;
+ tv.n = knumleft + knumright;
+ return INTFOLD((int32_t)tv.u32.lo);
+}
+
+LJFOLD(TOINT KNUM any)
+LJFOLDF(kfold_toint)
+{
+ lua_Number n = knumleft;
+ int32_t k = lj_num2int(n);
+ if (irt_isguard(fins->t) && n != cast_num(k)) {
+ /* We're about to create a guard which always fails, like TOINT +1.5.
+ ** Some pathological loops cause this during LICM, e.g.:
+ ** local x,k,t = 0,1.5,{1,[1.5]=2}
+ ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
+ ** assert(x == 300)
+ */
+ return FAILFOLD;
+ }
+ return INTFOLD(k);
+}
+
+LJFOLD(TOSTR KNUM)
+LJFOLDF(kfold_tostr_knum)
+{
+ return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft));
+}
+
+LJFOLD(TOSTR KINT)
+LJFOLDF(kfold_tostr_kint)
+{
+ return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i));
+}
+
+LJFOLD(STRTO KGC)
+LJFOLDF(kfold_strto)
+{
+ TValue n;
+ if (lj_str_numconv(strdata(ir_kstr(fleft)), &n))
+ return lj_ir_knum(J, numV(&n));
+ return FAILFOLD;
+}
+
+LJFOLD(SNEW STRREF KINT)
+LJFOLDF(kfold_snew)
+{
+ if (fright->i == 0)
+ return lj_ir_kstr(J, lj_str_new(J->L, "", 0));
+ PHIBARRIER(fleft);
+ if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
+ const char *s = strdata(ir_kstr(IR(fleft->op1)));
+ int32_t ofs = IR(fleft->op2)->i;
+ return lj_ir_kstr(J, lj_str_new(J->L, s+ofs, (size_t)fright->i));
+ }
+ return NEXTFOLD;
+}
+
+/* Must not use kfold_kref for numbers (could be NaN). */
+LJFOLD(EQ KNUM KNUM)
+LJFOLD(NE KNUM KNUM)
+LJFOLD(LT KNUM KNUM)
+LJFOLD(GE KNUM KNUM)
+LJFOLD(LE KNUM KNUM)
+LJFOLD(GT KNUM KNUM)
+LJFOLD(ULT KNUM KNUM)
+LJFOLD(UGE KNUM KNUM)
+LJFOLD(ULE KNUM KNUM)
+LJFOLD(UGT KNUM KNUM)
+LJFOLDF(kfold_numcomp)
+{
+ return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o));
+}
+
+LJFOLD(LT KINT KINT)
+LJFOLD(GE KINT KINT)
+LJFOLD(LE KINT KINT)
+LJFOLD(GT KINT KINT)
+LJFOLD(ULT KINT KINT)
+LJFOLD(UGE KINT KINT)
+LJFOLD(ULE KINT KINT)
+LJFOLD(UGT KINT KINT)
+LJFOLD(ABC KINT KINT)
+LJFOLDF(kfold_intcomp)
+{
+ int32_t a = fleft->i, b = fright->i;
+ switch ((IROp)fins->o) {
+ case IR_LT: return CONDFOLD(a < b);
+ case IR_GE: return CONDFOLD(a >= b);
+ case IR_LE: return CONDFOLD(a <= b);
+ case IR_GT: return CONDFOLD(a > b);
+ case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b);
+ case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b);
+ case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
+ case IR_ABC:
+ case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
+ default: lua_assert(0); return FAILFOLD;
+ }
+}
+
+LJFOLD(LT KGC KGC)
+LJFOLD(GE KGC KGC)
+LJFOLD(LE KGC KGC)
+LJFOLD(GT KGC KGC)
+LJFOLDF(kfold_strcomp)
+{
+ if (irt_isstr(fins->t)) {
+ GCstr *a = ir_kstr(fleft);
+ GCstr *b = ir_kstr(fright);
+ return CONDFOLD(lj_ir_strcmp(a, b, (IROp)fins->o));
+ }
+ return NEXTFOLD;
+}
+
+/* Don't constant-fold away FLOAD checks against KNULL. */
+LJFOLD(EQ FLOAD KNULL)
+LJFOLD(NE FLOAD KNULL)
+LJFOLDX(lj_opt_cse)
+
+/* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
+LJFOLD(EQ any KNULL)
+LJFOLD(NE any KNULL)
+LJFOLD(EQ KNULL any)
+LJFOLD(NE KNULL any)
+LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */
+LJFOLD(NE KINT KINT)
+LJFOLD(EQ KGC KGC)
+LJFOLD(NE KGC KGC)
+LJFOLDF(kfold_kref)
+{
+ return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE));
+}
+
+/* -- Algebraic shortcuts ------------------------------------------------- */
+
+LJFOLD(FPMATH FPMATH IRFPM_FLOOR)
+LJFOLD(FPMATH FPMATH IRFPM_CEIL)
+LJFOLD(FPMATH FPMATH IRFPM_TRUNC)
+LJFOLDF(shortcut_round)
+{
+ IRFPMathOp op = (IRFPMathOp)fleft->op2;
+ if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC)
+ return LEFTFOLD; /* round(round_left(x)) = round_left(x) */
+ return NEXTFOLD;
+}
+
+LJFOLD(FPMATH TONUM IRFPM_FLOOR)
+LJFOLD(FPMATH TONUM IRFPM_CEIL)
+LJFOLD(FPMATH TONUM IRFPM_TRUNC)
+LJFOLD(ABS ABS KNUM)
+LJFOLDF(shortcut_left)
+{
+ return LEFTFOLD; /* f(g(x)) ==> g(x) */
+}
+
+LJFOLD(ABS NEG KNUM)
+LJFOLDF(shortcut_dropleft)
+{
+ PHIBARRIER(fleft);
+ fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */
+ return RETRYFOLD;
+}
+
+/* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
+LJFOLD(NEG NEG KNUM)
+LJFOLD(BNOT BNOT)
+LJFOLD(BSWAP BSWAP)
+LJFOLDF(shortcut_leftleft)
+{
+ PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */
+ return fleft->op1; /* f(g(x)) ==> x */
+}
+
+LJFOLD(TONUM TOINT)
+LJFOLDF(shortcut_leftleft_toint)
+{
+ PHIBARRIER(fleft);
+ if (irt_isguard(fleft->t)) /* Only safe with a guarded TOINT. */
+ return fleft->op1; /* f(g(x)) ==> x */
+ return NEXTFOLD;
+}
+
+LJFOLD(TOINT TONUM any)
+LJFOLD(TOBIT TONUM KNUM) /* The inverse must NOT be shortcut! */
+LJFOLDF(shortcut_leftleft_across_phi)
+{
+ /* Fold even across PHI to avoid expensive int->num->int conversions. */
+ return fleft->op1; /* f(g(x)) ==> x */
+}
+
+/* -- FP algebraic simplifications ---------------------------------------- */
+
+/* FP arithmetic is tricky -- there's not much to simplify.
+** Please note the following common pitfalls before sending "improvements":
+** x+0 ==> x is INVALID for x=-0
+** 0-x ==> -x is INVALID for x=+0
+** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
+*/
+
+LJFOLD(ADD NEG any)
+LJFOLDF(simplify_numadd_negx)
+{
+ PHIBARRIER(fleft);
+ fins->o = IR_SUB; /* (-a) + b ==> b - a */
+ fins->op1 = fins->op2;
+ fins->op2 = fleft->op1;
+ return RETRYFOLD;
+}
+
+LJFOLD(ADD any NEG)
+LJFOLDF(simplify_numadd_xneg)
+{
+ PHIBARRIER(fright);
+ fins->o = IR_SUB; /* a + (-b) ==> a - b */
+ fins->op2 = fright->op1;
+ return RETRYFOLD;
+}
+
+LJFOLD(SUB any KNUM)
+LJFOLDF(simplify_numsub_k)
+{
+ lua_Number n = knumright;
+ if (n == 0.0) /* x - (+-0) ==> x */
+ return LEFTFOLD;
+ return NEXTFOLD;
+}
+
+LJFOLD(SUB NEG KNUM)
+LJFOLDF(simplify_numsub_negk)
+{
+ PHIBARRIER(fleft);
+ fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */
+ fins->op1 = (IRRef1)lj_ir_knum(J, -knumright);
+ return RETRYFOLD;
+}
+
+LJFOLD(SUB any NEG)
+LJFOLDF(simplify_numsub_xneg)
+{
+ PHIBARRIER(fright);
+ fins->o = IR_ADD; /* a - (-b) ==> a + b */
+ fins->op2 = fright->op1;
+ return RETRYFOLD;
+}
+
+LJFOLD(MUL any KNUM)
+LJFOLD(DIV any KNUM)
+LJFOLDF(simplify_nummuldiv_k)
+{
+ lua_Number n = knumright;
+ if (n == 1.0) { /* x o 1 ==> x */
+ return LEFTFOLD;
+ } else if (n == -1.0) { /* x o -1 ==> -x */
+ fins->o = IR_NEG;
+ fins->op2 = (IRRef1)lj_ir_knum_neg(J);
+ return RETRYFOLD;
+ } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
+ fins->o = IR_ADD;
+ fins->op2 = fins->op1;
+ return RETRYFOLD;
+ }
+ return NEXTFOLD;
+}
+
+LJFOLD(MUL NEG KNUM)
+LJFOLD(DIV NEG KNUM)
+LJFOLDF(simplify_nummuldiv_negk)
+{
+ PHIBARRIER(fleft);
+ fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */
+ fins->op2 = (IRRef1)lj_ir_knum(J, -knumright);
+ return RETRYFOLD;
+}
+
+LJFOLD(MUL NEG NEG)
+LJFOLD(DIV NEG NEG)
+LJFOLDF(simplify_nummuldiv_negneg)
+{
+ PHIBARRIER(fleft);
+ PHIBARRIER(fright);
+ fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */
+ fins->op2 = fright->op1;
+ return RETRYFOLD;
+}
+
+LJFOLD(POWI any KINT)
+LJFOLDF(simplify_powi_xk)
+{
+ int32_t k = fright->i;
+ TRef ref = fins->op1;
+ if (k == 0) /* x ^ 0 ==> 1 */
+ return lj_ir_knum_one(J); /* Result must be a number, not an int. */
+ if (k == 1) /* x ^ 1 ==> x */
+ return LEFTFOLD;
+ if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */
+ return NEXTFOLD;
+ if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
+ ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
+ k = -k;
+ }
+ /* Unroll x^k for 1 <= k <= 65536. */
+ for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
+ ref = emitir(IRTN(IR_MUL), ref, ref);
+ if ((k >>= 1) != 0) { /* Handle trailing bits. */
+ TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
+ for (; k != 1; k >>= 1) {
+ if (k & 1)
+ ref = emitir(IRTN(IR_MUL), ref, tmp);
+ tmp = emitir(IRTN(IR_MUL), tmp, tmp);
+ }
+ ref = emitir(IRTN(IR_MUL), ref, tmp);
+ }
+ return ref;
+}
+
+LJFOLD(POWI KNUM any)
+LJFOLDF(simplify_powi_kx)
+{
+ lua_Number n = knumleft;
+ if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
+ fins->o = IR_TONUM;
+ fins->op1 = fins->op2;
+ fins->op2 = 0;
+ fins->op2 = (IRRef1)lj_opt_fold(J);
+ fins->op1 = (IRRef1)lj_ir_knum_one(J);
+ fins->o = IR_LDEXP;
+ return RETRYFOLD;
+ }
+ return NEXTFOLD;
+}
+
+/* -- FP conversion narrowing --------------------------------------------- */
+
+LJFOLD(TOINT ADD any)
+LJFOLD(TOINT SUB any)
+LJFOLD(TOBIT ADD KNUM)
+LJFOLD(TOBIT SUB KNUM)
+LJFOLDF(narrow_convert)
+{
+ PHIBARRIER(fleft);
+ /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
+ if (J->chain[IR_LOOP])
+ return NEXTFOLD;
+ return lj_opt_narrow_convert(J);
+}
+
+/* Relaxed CSE rule for TOINT allows commoning with stronger checks, too. */
+LJFOLD(TOINT any any)
+LJFOLDF(cse_toint)
+{
+ if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
+ IRRef ref, op1 = fins->op1;
+ uint8_t guard = irt_isguard(fins->t);
+ for (ref = J->chain[IR_TOINT]; ref > op1; ref = IR(ref)->prev)
+ if (IR(ref)->op1 == op1 && irt_isguard(IR(ref)->t) >= guard)
+ return ref;
+ }
+ return EMITFOLD; /* No fallthrough to regular CSE. */
+}
+
+/* -- Integer algebraic simplifications ----------------------------------- */
+
+LJFOLD(ADD any KINT)
+LJFOLD(ADDOV any KINT)
+LJFOLD(SUBOV any KINT)
+LJFOLDF(simplify_intadd_k)
+{
+ if (fright->i == 0) /* i o 0 ==> i */
+ return LEFTFOLD;
+ return NEXTFOLD;
+}
+
+LJFOLD(SUB any KINT)
+LJFOLDF(simplify_intsub_k)
+{
+ if (fright->i == 0) /* i - 0 ==> i */
+ return LEFTFOLD;
+ fins->o = IR_ADD; /* i - k ==> i + (-k) */
+ fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */
+ return RETRYFOLD;
+}
+
+LJFOLD(SUB any any)
+LJFOLD(SUBOV any any)
+LJFOLDF(simplify_intsub)
+{
+ if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */
+ return INTFOLD(0);
+ return NEXTFOLD;
+}
+
+LJFOLD(SUB ADD any)
+LJFOLDF(simplify_intsubadd_leftcancel)
+{
+ if (!irt_isnum(fins->t)) {
+ PHIBARRIER(fleft);
+ if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */
+ return fleft->op2;
+ if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */
+ return fleft->op1;
+ }
+ return NEXTFOLD;
+}
+
+LJFOLD(SUB SUB any)
+LJFOLDF(simplify_intsubsub_leftcancel)
+{
+ if (!irt_isnum(fins->t)) {
+ PHIBARRIER(fleft);
+ if (fins->op1 == fleft->op1) { /* (i - j) - i ==> 0 - j */
+ fins->op1 = (IRRef1)lj_ir_kint(J, 0);
+ fins->op2 = fleft->op2;
+ return RETRYFOLD;
+ }
+ }
+ return NEXTFOLD;
+}
+
+LJFOLD(SUB any SUB)
+LJFOLDF(simplify_intsubsub_rightcancel)
+{
+ if (!irt_isnum(fins->t)) {
+ PHIBARRIER(fright);
+ if (fins->op1 == fright->op1) /* i - (i - j) ==> j */
+ return fright->op2;
+ }
+ return NEXTFOLD;
+}
+
+LJFOLD(SUB any ADD)
+LJFOLDF(simplify_intsubadd_rightcancel)
+{
+ if (!irt_isnum(fins->t)) {
+ PHIBARRIER(fright);
+ if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */
+ fins->op2 = fright->op2;
+ fins->op1 = (IRRef1)lj_ir_kint(J, 0);
+ return RETRYFOLD;
+ }
+ if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */
+ fins->op2 = fright->op1;
+ fins->op1 = (IRRef1)lj_ir_kint(J, 0);
+ return RETRYFOLD;
+ }
+ }
+ return NEXTFOLD;
+}
+
+LJFOLD(SUB ADD ADD)
+LJFOLDF(simplify_intsubaddadd_cancel)
+{
+ if (!irt_isnum(fins->t)) {
+ PHIBARRIER(fleft);
+ PHIBARRIER(fright);
+ if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */
+ fins->op1 = fleft->op2;
+ fins->op2 = fright->op2;
+ return RETRYFOLD;
+ }
+ if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
+ fins->op1 = fleft->op2;
+ fins->op2 = fright->op1;
+ return RETRYFOLD;
+ }
+ if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
+ fins->op1 = fleft->op1;
+ fins->op2 = fright->op2;
+ return RETRYFOLD;
+ }
+ if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
+ fins->op1 = fleft->op1;
+ fins->op2 = fright->op1;
+ return RETRYFOLD;
+ }
+ }
+ return NEXTFOLD;
+}
+
+LJFOLD(BAND any KINT)
+LJFOLDF(simplify_band_k)
+{
+ if (fright->i == 0) /* i & 0 ==> 0 */
+ return RIGHTFOLD;
+ if (fright->i == -1) /* i & -1 ==> i */
+ return LEFTFOLD;
+ return NEXTFOLD;
+}
+
+LJFOLD(BOR any KINT)
+LJFOLDF(simplify_bor_k)
+{
+ if (fright->i == 0) /* i | 0 ==> i */
+ return LEFTFOLD;
+ if (fright->i == -1) /* i | -1 ==> -1 */
+ return RIGHTFOLD;
+ return NEXTFOLD;
+}
+
+LJFOLD(BXOR any KINT)
+LJFOLDF(simplify_bxor_k)
+{
+ if (fright->i == 0) /* i xor 0 ==> i */
+ return LEFTFOLD;
+ if (fright->i == -1) { /* i xor -1 ==> ~i */
+ fins->o = IR_BNOT;
+ fins->op2 = 0;
+ return RETRYFOLD;
+ }
+ return NEXTFOLD;
+}
+
+LJFOLD(BSHL any KINT)
+LJFOLD(BSHR any KINT)
+LJFOLD(BSAR any KINT)
+LJFOLD(BROL any KINT)
+LJFOLD(BROR any KINT)
+LJFOLDF(simplify_shift_ik)
+{
+ int32_t k = (fright->i & 31);
+ if (k == 0) /* i o 0 ==> i */
+ return LEFTFOLD;
+ if (k != fright->i) { /* i o k ==> i o (k & 31) */
+ fins->op2 = (IRRef1)lj_ir_kint(J, k);
+ return RETRYFOLD;
+ }
+ if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&31) */
+ fins->o = IR_BROL;
+ fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&31);
+ return RETRYFOLD;
+ }
+ return NEXTFOLD;
+}
+
+LJFOLD(BSHL any BAND)
+LJFOLD(BSHR any BAND)
+LJFOLD(BSAR any BAND)
+LJFOLD(BROL any BAND)
+LJFOLD(BROR any BAND)
+LJFOLDF(simplify_shift_andk)
+{
+#if LJ_TARGET_MASKEDSHIFT
+ IRIns *irk = IR(fright->op2);
+ PHIBARRIER(fright);
+ if (irk->o == IR_KINT) { /* i o (j & 31) ==> i o j */
+ int32_t k = irk->i & 31;
+ if (k == 31) {
+ fins->op2 = fright->op1;
+ return RETRYFOLD;
+ }
+ }
+#endif
+ return NEXTFOLD;
+}
+
+LJFOLD(BSHL KINT any)
+LJFOLD(BSHR KINT any)
+LJFOLDF(simplify_shift1_ki)
+{
+ if (fleft->i == 0) /* 0 o i ==> 0 */
+ return LEFTFOLD;
+ return NEXTFOLD;
+}
+
+LJFOLD(BSAR KINT any)
+LJFOLD(BROL KINT any)
+LJFOLD(BROR KINT any)
+LJFOLDF(simplify_shift2_ki)
+{
+ if (fleft->i == 0 || fleft->i == -1) /* 0 o i ==> 0; -1 o i ==> -1 */
+ return LEFTFOLD;
+ return NEXTFOLD;
+}
+
+/* -- Reassociation ------------------------------------------------------- */
+
+LJFOLD(ADD ADD KINT)
+LJFOLD(BAND BAND KINT)
+LJFOLD(BOR BOR KINT)
+LJFOLD(BXOR BXOR KINT)
+LJFOLDF(reassoc_intarith_k)
+{
+ IRIns *irk = IR(fleft->op2);
+ if (irk->o == IR_KINT) {
+ int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
+ if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
+ return LEFTFOLD;
+ PHIBARRIER(fleft);
+ fins->op1 = fleft->op1;
+ fins->op2 = (IRRef1)lj_ir_kint(J, k);
+ return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
+ }
+ return NEXTFOLD;
+}
+
+LJFOLD(MIN MIN any)
+LJFOLD(MAX MAX any)
+LJFOLD(BAND BAND any)
+LJFOLD(BOR BOR any)
+LJFOLDF(reassoc_dup)
+{
+ PHIBARRIER(fleft);
+ if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
+ return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
+ return NEXTFOLD;
+}
+
+LJFOLD(BXOR BXOR any)
+LJFOLDF(reassoc_bxor)
+{
+ PHIBARRIER(fleft);
+ if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */
+ return fleft->op2;
+ if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */
+ return fleft->op1;
+ return NEXTFOLD;
+}
+
+LJFOLD(BSHL BSHL KINT)
+LJFOLD(BSHR BSHR KINT)
+LJFOLD(BSAR BSAR KINT)
+LJFOLD(BROL BROL KINT)
+LJFOLD(BROR BROR KINT)
+LJFOLDF(reassoc_shift)
+{
+ IRIns *irk = IR(fleft->op2);
+ PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
+ if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
+ int32_t k = (irk->i & 31) + (fright->i & 31);
+ if (k > 31) { /* Combined shift too wide? */
+ if (fins->o == IR_BSHL || fins->o == IR_BSHR)
+ return INTFOLD(0);
+ else if (fins->o == IR_BSAR)
+ k = 31;
+ else
+ k &= 31;
+ }
+ fins->op1 = fleft->op1;
+ fins->op2 = (IRRef1)lj_ir_kint(J, k);
+ return RETRYFOLD;
+ }
+ return NEXTFOLD;
+}
+
+LJFOLD(MIN MIN KNUM)
+LJFOLD(MAX MAX KNUM)
+LJFOLDF(reassoc_minmax_k)
+{
+ IRIns *irk = IR(fleft->op2);
+ if (irk->o == IR_KNUM) {
+ lua_Number a = ir_knum(irk)->n;
+ lua_Number b = knumright;
+ lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
+ if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
+ return LEFTFOLD;
+ PHIBARRIER(fleft);
+ fins->op1 = fleft->op1;
+ fins->op2 = (IRRef1)lj_ir_knum(J, y);
+ return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
+ }
+ return NEXTFOLD;
+}
+
+LJFOLD(MIN MAX any)
+LJFOLD(MAX MIN any)
+LJFOLDF(reassoc_minmax_left)
+{
+ if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
+ return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
+ return NEXTFOLD;
+}
+
+LJFOLD(MIN any MAX)
+LJFOLD(MAX any MIN)
+LJFOLDF(reassoc_minmax_right)
+{
+ if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
+ return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
+ return NEXTFOLD;
+}
+
+/* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
+** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
+** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
+*/
+LJFOLD(ABC any ADD)
+LJFOLDF(reassoc_abc)
+{
+ if (irref_isk(fright->op2)) {
+ IRIns *add2 = IR(fright->op1);
+ if (add2->o == IR_ADD && irref_isk(add2->op2) &&
+ IR(fright->op2)->i == -IR(add2->op2)->i) {
+ IRRef ref = J->chain[IR_ABC];
+ IRRef lim = add2->op1;
+ if (fins->op1 > lim) lim = fins->op1;
+ while (ref > lim) {
+ IRIns *ir = IR(ref);
+ if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
+ return DROPFOLD;
+ ref = ir->prev;
+ }
+ }
+ }
+ return NEXTFOLD;
+}
+
+/* -- Commutativity ------------------------------------------------------- */
+
+/* The refs of commutative ops are canonicalized. Lower refs go to the right.
+** Rationale behind this:
+** - It (also) moves constants to the right.
+** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
+** - It helps CSE to find more matches.
+** - The assembler generates better code with constants at the right.
+*/
+
+LJFOLD(ADD any any)
+LJFOLD(MUL any any)
+LJFOLD(ADDOV any any)
+LJFOLDF(comm_swap)
+{
+ if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
+ IRRef1 tmp = fins->op1;
+ fins->op1 = fins->op2;
+ fins->op2 = tmp;
+ return RETRYFOLD;
+ }
+ return NEXTFOLD;
+}
+
+LJFOLD(EQ any any)
+LJFOLD(NE any any)
+LJFOLDF(comm_equal)
+{
+ /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
+ if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
+ return CONDFOLD(fins->o == IR_EQ);
+ return comm_swap(J);
+}
+
+LJFOLD(LT any any)
+LJFOLD(GE any any)
+LJFOLD(LE any any)
+LJFOLD(GT any any)
+LJFOLD(ULT any any)
+LJFOLD(UGE any any)
+LJFOLD(ULE any any)
+LJFOLD(UGT any any)
+LJFOLDF(comm_comp)
+{
+ /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
+ if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
+ return CONDFOLD(fins->o & 1);
+ if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
+ IRRef1 tmp = fins->op1;
+ fins->op1 = fins->op2;
+ fins->op2 = tmp;
+ fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
+ return RETRYFOLD;
+ }
+ return NEXTFOLD;
+}
+
+LJFOLD(BAND any any)
+LJFOLD(BOR any any)
+LJFOLD(MIN any any)
+LJFOLD(MAX any any)
+LJFOLDF(comm_dup)
+{
+ if (fins->op1 == fins->op2) /* x o x ==> x */
+ return LEFTFOLD;
+ return comm_swap(J);
+}
+
+LJFOLD(BXOR any any)
+LJFOLDF(comm_bxor)
+{
+ if (fins->op1 == fins->op2) /* i xor i ==> 0 */
+ return INTFOLD(0);
+ return comm_swap(J);
+}
+
+/* -- Simplification of compound expressions ------------------------------ */
+
+static int32_t kfold_xload(IRIns *ir, const void *p)
+{
+#if !LJ_TARGET_X86ORX64
+#error "Missing support for unaligned loads"
+#endif
+ switch (irt_type(ir->t)) {
+ case IRT_I8: return (int32_t)*(int8_t *)p;
+ case IRT_U8: return (int32_t)*(uint8_t *)p;
+ case IRT_I16: return (int32_t)*(int16_t *)p;
+ case IRT_U16: return (int32_t)*(uint16_t *)p;
+ default: lua_assert(irt_isint(ir->t)); return (int32_t)*(int32_t *)p;
+ }
+}
+
+/* Turn: string.sub(str, a, b) == kstr
+** into: string.byte(str, a) == string.byte(kstr, 1) etc.
+** Note: this creates unaligned XLOADs!
+*/
+LJFOLD(EQ SNEW KGC)
+LJFOLD(NE SNEW KGC)
+LJFOLDF(merge_eqne_snew_kgc)
+{
+ GCstr *kstr = ir_kstr(fright);
+ int32_t len = (int32_t)kstr->len;
+ lua_assert(irt_isstr(fins->t));
+ if (len <= 4) { /* Handle string lengths 0, 1, 2, 3, 4. */
+ IROp op = (IROp)fins->o;
+ IRRef strref = fleft->op1;
+ lua_assert(IR(strref)->o == IR_STRREF);
+ if (op == IR_EQ) {
+ emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len));
+ /* Caveat: fins/fleft/fright is no longer valid after emitir. */
+ } else {
+ /* NE is not expanded since this would need an OR of two conds. */
+ if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */
+ return NEXTFOLD;
+ if (IR(fleft->op2)->i != len)
+ return DROPFOLD;
+ }
+ if (len > 0) {
+ /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
+ uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, IRT_I8) :
+ len == 2 ? IRT(IR_XLOAD, IRT_U16) :
+ IRTI(IR_XLOAD));
+ TRef tmp = emitir(ot, strref, len > 1 ? IRXLOAD_UNALIGNED : 0);
+ TRef val = lj_ir_kint(J, kfold_xload(IR(tref_ref(tmp)), strdata(kstr)));
+ if (len == 3)
+ tmp = emitir(IRTI(IR_BAND), tmp,
+ lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
+ fins->op1 = (IRRef1)tmp;
+ fins->op2 = (IRRef1)val;
+ fins->ot = (IROpT)IRTGI(op);
+ return RETRYFOLD;
+ } else {
+ return DROPFOLD;
+ }
+ }
+ return NEXTFOLD;
+}
+
+/* -- Loads --------------------------------------------------------------- */
+
+/* Loads cannot be folded or passed on to CSE in general.
+** Alias analysis is needed to check for forwarding opportunities.
+**
+** Caveat: *all* loads must be listed here or they end up at CSE!
+*/
+
+LJFOLD(ALOAD any)
+LJFOLDX(lj_opt_fwd_aload)
+
+LJFOLD(HLOAD any)
+LJFOLDX(lj_opt_fwd_hload)
+
+LJFOLD(ULOAD any)
+LJFOLDX(lj_opt_fwd_uload)
+
+LJFOLD(TLEN any)
+LJFOLDX(lj_opt_fwd_tlen)
+
+/* Upvalue refs are really loads, but there are no corresponding stores.
+** So CSE is ok for them, except for UREFO across a GC step (see below).
+** If the referenced function is const, its upvalue addresses are const, too.
+** This can be used to improve CSE by looking for the same address,
+** even if the upvalues originate from a different function.
+*/
+LJFOLD(UREFO KGC any)
+LJFOLD(UREFC KGC any)
+LJFOLDF(cse_uref)
+{
+ if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
+ IRRef ref = J->chain[fins->o];
+ GCfunc *fn = ir_kfunc(fleft);
+ GCupval *uv = gco2uv(gcref(fn->l.uvptr[fins->op2]));
+ while (ref > 0) {
+ IRIns *ir = IR(ref);
+ if (irref_isk(ir->op1)) {
+ GCfunc *fn2 = ir_kfunc(IR(ir->op1));
+ if (gco2uv(gcref(fn2->l.uvptr[ir->op2])) == uv) {
+ if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
+ break;
+ return ref;
+ }
+ }
+ ref = ir->prev;
+ }
+ }
+ return EMITFOLD;
+}
+
+/* We can safely FOLD/CSE array/hash refs and field loads, since there
+** are no corresponding stores. But NEWREF may invalidate all of them.
+** Lacking better disambiguation for table references, these optimizations
+** are simply disabled across any NEWREF.
+** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
+** FLOADs. And NEWREF itself is treated like a store (see below).
+*/
+LJFOLD(HREF any any)
+LJFOLDF(cse_href)
+{
+ TRef tr = lj_opt_cse(J);
+ return tref_ref(tr) < J->chain[IR_NEWREF] ? EMITFOLD : tr;
+}
+
+LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
+LJFOLDF(fload_tab_tnew_asize)
+{
+ if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF])
+ return INTFOLD(fleft->op1);
+ return NEXTFOLD;
+}
+
+LJFOLD(FLOAD TNEW IRFL_TAB_HMASK)
+LJFOLDF(fload_tab_tnew_hmask)
+{
+ if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF])
+ return INTFOLD((1 << fleft->op2)-1);
+ return NEXTFOLD;
+}
+
+LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE)
+LJFOLDF(fload_tab_tdup_asize)
+{
+ if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF])
+ return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize);
+ return NEXTFOLD;
+}
+
+LJFOLD(FLOAD TDUP IRFL_TAB_HMASK)
+LJFOLDF(fload_tab_tdup_hmask)
+{
+ if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fins->op1 > J->chain[IR_NEWREF])
+ return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask);
+ return NEXTFOLD;
+}
+
+LJFOLD(FLOAD any IRFL_TAB_ARRAY)
+LJFOLD(FLOAD any IRFL_TAB_NODE)
+LJFOLD(FLOAD any IRFL_TAB_ASIZE)
+LJFOLD(FLOAD any IRFL_TAB_HMASK)
+LJFOLDF(fload_tab_ah)
+{
+ TRef tr = lj_opt_cse(J);
+ return tref_ref(tr) < J->chain[IR_NEWREF] ? EMITFOLD : tr;
+}
+
+/* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
+LJFOLD(FLOAD KGC IRFL_STR_LEN)
+LJFOLDF(fload_str_len)
+{
+ if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
+ return INTFOLD((int32_t)ir_kstr(fleft)->len);
+ return NEXTFOLD;
+}
+
+LJFOLD(FLOAD any IRFL_STR_LEN)
+LJFOLDX(lj_opt_cse)
+
+/* All other field loads need alias analysis. */
+LJFOLD(FLOAD any any)
+LJFOLDX(lj_opt_fwd_fload)
+
+/* This is for LOOP only. Recording handles SLOADs internally. */
+LJFOLD(SLOAD any any)
+LJFOLDF(fwd_sload)
+{
+ lua_assert(J->slot[fins->op1] != 0);
+ return J->slot[fins->op1];
+}
+
+/* Strings are immutable, so we can safely FOLD/CSE an XLOAD of a string. */
+LJFOLD(XLOAD STRREF any)
+LJFOLDF(xload_str)
+{
+ if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
+ GCstr *str = ir_kstr(IR(fleft->op1));
+ int32_t ofs = IR(fleft->op2)->i;
+ lua_assert((MSize)ofs < str->len);
+ lua_assert((MSize)(ofs + (1<<((fins->op2>>8)&3))) <= str->len);
+ return INTFOLD(kfold_xload(fins, strdata(str)+ofs));
+ }
+ return CSEFOLD;
+}
+/* No XLOAD of non-strings (yet), so we don't need a (XLOAD any any) rule. */
+
+/* -- Write barriers ------------------------------------------------------ */
+
+/* Write barriers are amenable to CSE, but not across any incremental
+** GC steps.
+**
+** The same logic applies to open upvalue references, because the stack
+** may be resized during a GC step.
+*/
+LJFOLD(TBAR any)
+LJFOLD(OBAR any any)
+LJFOLD(UREFO any any)
+LJFOLDF(barrier_tab)
+{
+ TRef tr = lj_opt_cse(J);
+ if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */
+ return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */
+ return tr;
+}
+
+LJFOLD(TBAR TNEW)
+LJFOLD(TBAR TDUP)
+LJFOLDF(barrier_tnew_tdup)
+{
+ /* New tables are always white and never need a barrier. */
+ if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */
+ return NEXTFOLD;
+ return DROPFOLD;
+}
+
+/* -- Stores and allocations ---------------------------------------------- */
+
+/* Stores and allocations cannot be folded or passed on to CSE in general.
+** But some stores can be eliminated with dead-store elimination (DSE).
+**
+** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
+*/
+
+LJFOLD(ASTORE any any)
+LJFOLD(HSTORE any any)
+LJFOLDX(lj_opt_dse_ahstore)
+
+LJFOLD(USTORE any any)
+LJFOLDX(lj_opt_dse_ustore)
+
+LJFOLD(FSTORE any any)
+LJFOLDX(lj_opt_dse_fstore)
+
+LJFOLD(NEWREF any any) /* Treated like a store. */
+LJFOLD(TNEW any any)
+LJFOLD(TDUP any)
+LJFOLDF(store_raw)
+{
+ return EMITFOLD;
+}
+
+/* ------------------------------------------------------------------------ */
+
+/* Every entry in the generated hash table is a 32 bit pattern:
+**
+** xxxxxxxx iiiiiiii llllllll rrrrrrrr
+**
+** xxxxxxxx = 8 bit index into fold function table
+** iiiiiiii = 8 bit folded instruction opcode
+** llllllll = 8 bit left instruction opcode
+** rrrrrrrr = 8 bit right instruction opcode or 8 bits from literal field
+*/
+
+#include "lj_folddef.h"
+
+/* ------------------------------------------------------------------------ */
+
+/* Fold IR instruction. */
+TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
+{
+ uint32_t key, any;
+ IRRef ref;
+
+ if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
+ lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
+ JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT);
+ /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
+ if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
+ return lj_opt_cse(J);
+
+ /* Forwarding or CSE disabled? Emit raw IR for loads, except for SLOAD. */
+ if ((J->flags & (JIT_F_OPT_FWD|JIT_F_OPT_CSE)) !=
+ (JIT_F_OPT_FWD|JIT_F_OPT_CSE) &&
+ irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD)
+ return lj_ir_emit(J);
+
+ /* DSE disabled? Emit raw IR for stores. */
+ if (!(J->flags & JIT_F_OPT_DSE) && irm_kind(lj_ir_mode[fins->o]) == IRM_S)
+ return lj_ir_emit(J);
+ }
+
+ /* Fold engine start/retry point. */
+retry:
+ /* Construct key from opcode and operand opcodes (unless literal/none). */
+ key = ((uint32_t)fins->o << 16);
+ if (fins->op1 >= J->cur.nk) {
+ key += (uint32_t)IR(fins->op1)->o << 8;
+ *fleft = *IR(fins->op1);
+ }
+ if (fins->op2 >= J->cur.nk) {
+ key += (uint32_t)IR(fins->op2)->o;
+ *fright = *IR(fins->op2);
+ } else {
+ key += (fins->op2 & 0xffu); /* For IRFPM_* and IRFL_*. */
+ }
+
+ /* Check for a match in order from most specific to least specific. */
+ any = 0;
+ for (;;) {
+ uint32_t k = key | any;
+ uint32_t h = fold_hashkey(k);
+ uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
+ if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
+ ref = (IRRef)tref_ref(fold_func[fh >> 24](J));
+ if (ref != NEXTFOLD)
+ break;
+ }
+ if (any == 0xffff) /* Exhausted folding. Pass on to CSE. */
+ return lj_opt_cse(J);
+ any = (any | (any >> 8)) ^ 0xff00;
+ }
+
+ /* Return value processing, ordered by frequency. */
+ if (LJ_LIKELY(ref >= MAX_FOLD))
+ return TREF(ref, irt_t(IR(ref)->t));
+ if (ref == RETRYFOLD)
+ goto retry;
+ if (ref == KINTFOLD)
+ return lj_ir_kint(J, fins->i);
+ if (ref == FAILFOLD)
+ lj_trace_err(J, LJ_TRERR_GFAIL);
+ lua_assert(ref == DROPFOLD);
+ return REF_DROP;
+}
+
+/* -- Common-Subexpression Elimination ------------------------------------ */
+
+/* CSE an IR instruction. This is very fast due to the skip-list chains. */
+TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
+{
+ /* Avoid narrow to wide store-to-load forwarding stall */
+ IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
+ IROp op = fins->o;
+ if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
+ /* Limited search for same operands in per-opcode chain. */
+ IRRef ref = J->chain[op];
+ IRRef lim = fins->op1;
+ if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */
+ while (ref > lim) {
+ if (IR(ref)->op12 == op12)
+ return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */
+ ref = IR(ref)->prev;
+ }
+ }
+ /* Otherwise emit IR (inlined for speed). */
+ {
+ IRRef ref = lj_ir_nextins(J);
+ IRIns *ir = IR(ref);
+ ir->prev = J->chain[op];
+ ir->op12 = op12;
+ J->chain[op] = (IRRef1)ref;
+ ir->o = fins->o;
+ J->guardemit.irt |= fins->t.irt;
+ return TREF(ref, irt_t((ir->t = fins->t)));
+ }
+}
+
+/* ------------------------------------------------------------------------ */
+
+#undef IR
+#undef fins
+#undef fleft
+#undef fright
+#undef knumleft
+#undef knumright
+#undef emitir
+
+#endif
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
new file mode 100644
index 00000000..adc0c476
--- /dev/null
+++ b/src/lj_opt_loop.c
@@ -0,0 +1,358 @@
+/*
+** LOOP: Loop Optimizations.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_opt_loop_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+#if LJ_HASJIT
+
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_ir.h"
+#include "lj_jit.h"
+#include "lj_iropt.h"
+#include "lj_trace.h"
+#include "lj_snap.h"
+#include "lj_vm.h"
+
+/* Loop optimization:
+**
+** Traditional Loop-Invariant Code Motion (LICM) splits the instructions
+** of a loop into invariant and variant instructions. The invariant
+** instructions are hoisted out of the loop and only the variant
+** instructions remain inside the loop body.
+**
+** Unfortunately LICM is mostly useless for compiling dynamic languages.
+** The IR has many guards and most of the subsequent instructions are
+** control-dependent on them. The first non-hoistable guard would
+** effectively prevent hoisting of all subsequent instructions.
+**
+** That's why we use a special form of unrolling using copy-substitution,
+** combined with redundancy elimination:
+**
+** The recorded instruction stream is re-emitted to the compiler pipeline
+** with substituted operands. The substitution table is filled with the
+** refs returned by re-emitting each instruction. This can be done
+** on-the-fly, because the IR is in strict SSA form, where every ref is
+** defined before its use.
+**
+** This aproach generates two code sections, separated by the LOOP
+** instruction:
+**
+** 1. The recorded instructions form a kind of pre-roll for the loop. It
+** contains a mix of invariant and variant instructions and performs
+** exactly one loop iteration (but not necessarily the 1st iteration).
+**
+** 2. The loop body contains only the variant instructions and performs
+** all remaining loop iterations.
+**
+** On first sight that looks like a waste of space, because the variant
+** instructions are present twice. But the key insight is that the
+** pre-roll honors the control-dependencies for *both* the pre-roll itself
+** *and* the loop body!
+**
+** It also means one doesn't have to explicitly model control-dependencies
+** (which, BTW, wouldn't help LICM much). And it's much easier to
+** integrate sparse snapshotting with this approach.
+**
+** One of the nicest aspects of this approach is that all of the
+** optimizations of the compiler pipeline (FOLD, CSE, FWD, etc.) can be
+** reused with only minor restrictions (e.g. one should not fold
+** instructions across loop-carried dependencies).
+**
+** But in general all optimizations can be applied which only need to look
+** backwards into the generated instruction stream. At any point in time
+** during the copy-substitution process this contains both a static loop
+** iteration (the pre-roll) and a dynamic one (from the to-be-copied
+** instruction up to the end of the partial loop body).
+**
+** Since control-dependencies are implicitly kept, CSE also applies to all
+** kinds of guards. The major advantage is that all invariant guards can
+** be hoisted, too.
+**
+** Load/store forwarding works across loop iterations, too. This is
+** important if loop-carried dependencies are kept in upvalues or tables.
+** E.g. 'self.idx = self.idx + 1' deep down in some OO-style method may
+** become a forwarded loop-recurrence after inlining.
+**
+** Since the IR is in SSA form, loop-carried dependencies have to be
+** modeled with PHI instructions. The potential candidates for PHIs are
+** collected on-the-fly during copy-substitution. After eliminating the
+** redundant ones, PHI instructions are emitted *below* the loop body.
+**
+** Note that this departure from traditional SSA form doesn't change the
+** semantics of the PHI instructions themselves. But it greatly simplifies
+** on-the-fly generation of the IR and the machine code.
+*/
+
+/* Some local macros to save typing. Undef'd at the end. */
+#define IR(ref) (&J->cur.ir[(ref)])
+
+/* Pass IR on to next optimization in chain (FOLD). */
+#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
+
+/* Emit raw IR without passing through optimizations. */
+#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
+
+/* -- PHI elimination ----------------------------------------------------- */
+
+/* Emit or eliminate collected PHIs. */
+static void loop_emit_phi(jit_State *J, IRRef1 *subst, IRRef1 *phi, IRRef nphi)
+{
+ int pass2 = 0;
+ IRRef i, nslots;
+ IRRef invar = J->chain[IR_LOOP];
+ /* Pass #1: mark redundant and potentially redundant PHIs. */
+ for (i = 0; i < nphi; i++) {
+ IRRef lref = phi[i];
+ IRRef rref = subst[lref];
+ if (lref == rref || rref == REF_DROP) { /* Invariants are redundant. */
+ irt_setmark(IR(lref)->t);
+ } else if (!(IR(rref)->op1 == lref || IR(rref)->op2 == lref)) {
+ /* Quick check for simple recurrences failed, need pass2. */
+ irt_setmark(IR(lref)->t);
+ pass2 = 1;
+ }
+ }
+ /* Pass #2: traverse variant part and clear marks of non-redundant PHIs. */
+ if (pass2) {
+ for (i = J->cur.nins-1; i > invar; i--) {
+ IRIns *ir = IR(i);
+ if (!irref_isk(ir->op1)) irt_clearmark(IR(ir->op1)->t);
+ if (!irref_isk(ir->op2)) irt_clearmark(IR(ir->op2)->t);
+ }
+ }
+ /* Pass #3: add PHIs for variant slots without a corresponding SLOAD. */
+ nslots = J->baseslot+J->maxslot;
+ for (i = 1; i < nslots; i++) {
+ IRRef ref = tref_ref(J->slot[i]);
+ if (!irref_isk(ref) && ref != subst[ref]) {
+ IRIns *ir = IR(ref);
+ irt_clearmark(ir->t); /* Unmark potential uses, too. */
+ if (!irt_isphi(ir->t) && !irt_ispri(ir->t)) {
+ irt_setphi(ir->t);
+ if (nphi >= LJ_MAX_PHI)
+ lj_trace_err(J, LJ_TRERR_PHIOV);
+ phi[nphi++] = (IRRef1)ref;
+ }
+ }
+ }
+ /* Pass #4: emit PHI instructions or eliminate PHIs. */
+ for (i = 0; i < nphi; i++) {
+ IRRef lref = phi[i];
+ IRIns *ir = IR(lref);
+ if (!irt_ismarked(ir->t)) { /* Emit PHI if not marked. */
+ IRRef rref = subst[lref];
+ if (rref > invar)
+ irt_setphi(IR(rref)->t);
+ emitir_raw(IRT(IR_PHI, irt_type(ir->t)), lref, rref);
+ } else { /* Otherwise eliminate PHI. */
+ irt_clearmark(ir->t);
+ irt_clearphi(ir->t);
+ }
+ }
+}
+
+/* -- Loop unrolling using copy-substitution ------------------------------ */
+
+/* Unroll loop. */
+static void loop_unroll(jit_State *J)
+{
+ IRRef1 phi[LJ_MAX_PHI];
+ uint32_t nphi = 0;
+ IRRef1 *subst;
+ SnapShot *osnap, *snap;
+ IRRef2 *loopmap;
+ BCReg loopslots;
+ MSize nsnap, nsnapmap;
+ IRRef ins, invar, osnapref;
+
+ /* Use temp buffer for substitution table.
+ ** Only non-constant refs in [REF_BIAS,invar) are valid indexes.
+ ** Note: don't call into the VM or run the GC or the buffer may be gone.
+ */
+ invar = J->cur.nins;
+ subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf,
+ (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS;
+ subst[REF_BASE] = REF_BASE;
+
+ /* LOOP separates the pre-roll from the loop body. */
+ emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0);
+
+ /* Ensure size for copy-substituted snapshots (minus #0 and loop snapshot). */
+ nsnap = J->cur.nsnap;
+ if (LJ_UNLIKELY(2*nsnap-2 > J->sizesnap)) {
+ MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
+ if (2*nsnap-2 > maxsnap)
+ lj_trace_err(J, LJ_TRERR_SNAPOV);
+ lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
+ J->cur.snap = J->snapbuf;
+ }
+ nsnapmap = J->cur.nsnapmap; /* Use temp. copy to avoid undo. */
+ if (LJ_UNLIKELY(nsnapmap*2 > J->sizesnapmap)) {
+ J->snapmapbuf = (IRRef2 *)lj_mem_realloc(J->L, J->snapmapbuf,
+ J->sizesnapmap*sizeof(IRRef2),
+ 2*J->sizesnapmap*sizeof(IRRef2));
+ J->cur.snapmap = J->snapmapbuf;
+ J->sizesnapmap *= 2;
+ }
+
+ /* The loop snapshot is used for fallback substitutions. */
+ snap = &J->cur.snap[nsnap-1];
+ loopmap = &J->cur.snapmap[snap->mapofs];
+ loopslots = snap->nslots;
+ /* The PC of snapshot #0 and the loop snapshot must match. */
+ lua_assert(loopmap[loopslots] == J->cur.snapmap[J->cur.snap[0].nslots]);
+
+ /* Start substitution with snapshot #1 (#0 is empty for root traces). */
+ osnap = &J->cur.snap[1];
+ osnapref = osnap->ref;
+
+ /* Copy and substitute all recorded instructions and snapshots. */
+ for (ins = REF_FIRST; ins < invar; ins++) {
+ IRIns *ir;
+ IRRef op1, op2;
+
+ /* Copy-substitute snapshot. */
+ if (ins >= osnapref) {
+ IRRef2 *nmap, *omap = &J->cur.snapmap[osnap->mapofs];
+ BCReg s, nslots;
+ uint32_t nmapofs, nframelinks;
+ if (irt_isguard(J->guardemit)) { /* Guard inbetween? */
+ nmapofs = nsnapmap;
+ snap++; /* Add new snapshot. */
+ } else {
+ nmapofs = snap->mapofs; /* Overwrite previous snapshot. */
+ }
+ J->guardemit.irt = 0;
+ nslots = osnap->nslots;
+ nframelinks = osnap->nframelinks;
+ snap->mapofs = (uint16_t)nmapofs;
+ snap->ref = (IRRef1)J->cur.nins;
+ snap->nslots = (uint8_t)nslots;
+ snap->nframelinks = (uint8_t)nframelinks;
+ snap->count = 0;
+ osnap++;
+ osnapref = osnap->ref;
+ nsnapmap = nmapofs + nslots + nframelinks;
+ nmap = &J->cur.snapmap[nmapofs];
+ /* Substitute snapshot slots. */
+ for (s = 0; s < nslots; s++) {
+ IRRef ref = snap_ref(omap[s]);
+ if (ref) {
+ if (!irref_isk(ref))
+ ref = subst[ref];
+ } else if (s < loopslots) {
+ ref = loopmap[s];
+ }
+ nmap[s] = ref;
+ }
+ /* Copy frame links. */
+ nmap += nslots;
+ omap += nslots;
+ for (s = 0; s < nframelinks; s++)
+ nmap[s] = omap[s];
+ }
+
+ /* Substitute instruction operands. */
+ ir = IR(ins);
+ op1 = ir->op1;
+ if (!irref_isk(op1)) op1 = subst[op1];
+ op2 = ir->op2;
+ if (!irref_isk(op2)) op2 = subst[op2];
+ if (irm_kind(lj_ir_mode[ir->o]) == IRM_N &&
+ op1 == ir->op1 && op2 == ir->op2) { /* Regular invariant ins? */
+ subst[ins] = (IRRef1)ins; /* Shortcut. */
+ } else {
+ /* Re-emit substituted instruction to the FOLD/CSE/etc. pipeline. */
+ IRType1 t = ir->t; /* Get this first, since emitir may invalidate ir. */
+ IRRef ref = tref_ref(emitir(ir->ot & ~IRT_ISPHI, op1, op2));
+ subst[ins] = (IRRef1)ref;
+ if (ref != ins && ref < invar) { /* Loop-carried dependency? */
+ IRIns *irr = IR(ref);
+ /* Potential PHI? */
+ if (!irref_isk(ref) && !irt_isphi(irr->t) && !irt_ispri(irr->t)) {
+ irt_setphi(irr->t);
+ if (nphi >= LJ_MAX_PHI)
+ lj_trace_err(J, LJ_TRERR_PHIOV);
+ phi[nphi++] = (IRRef1)ref;
+ }
+ /* Check all loop-carried dependencies for type instability. */
+ if (!irt_sametype(t, irr->t)) {
+ if (irt_isnum(t) && irt_isinteger(irr->t)) /* Fix int->num case. */
+ subst[ins] = tref_ref(emitir(IRTN(IR_TONUM), ref, 0));
+ else
+ lj_trace_err(J, LJ_TRERR_TYPEINS);
+ }
+ }
+ }
+ }
+ if (irt_isguard(J->guardemit)) { /* Guard inbetween? */
+ J->cur.nsnapmap = (uint16_t)nsnapmap;
+ snap++;
+ } else {
+ J->cur.nsnapmap = (uint16_t)snap->mapofs; /* Last snapshot is redundant. */
+ }
+ J->cur.nsnap = (uint16_t)(snap - J->cur.snap);
+ lua_assert(J->cur.nsnapmap <= J->sizesnapmap);
+
+ loop_emit_phi(J, subst, phi, nphi);
+}
+
+/* Undo any partial changes made by the loop optimization. */
+static void loop_undo(jit_State *J, IRRef ins)
+{
+ lj_ir_rollback(J, ins);
+ for (ins--; ins >= REF_FIRST; ins--) { /* Remove flags. */
+ IRIns *ir = IR(ins);
+ irt_clearphi(ir->t);
+ irt_clearmark(ir->t);
+ }
+}
+
+/* Protected callback for loop optimization. */
+static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud)
+{
+ UNUSED(L); UNUSED(dummy);
+ loop_unroll((jit_State *)ud);
+ return NULL;
+}
+
+/* Loop optimization. */
+int lj_opt_loop(jit_State *J)
+{
+ IRRef nins = J->cur.nins;
+ int errcode = lj_vm_cpcall(J->L, cploop_opt, NULL, J);
+ if (LJ_UNLIKELY(errcode)) {
+ lua_State *L = J->L;
+ if (errcode == LUA_ERRRUN && tvisnum(L->top-1)) { /* Trace error? */
+ int32_t e = lj_num2int(numV(L->top-1));
+ switch ((TraceError)e) {
+ case LJ_TRERR_TYPEINS: /* Type instability. */
+ case LJ_TRERR_GFAIL: /* Guard would always fail. */
+ /* Unrolling via recording fixes many cases, e.g. a flipped boolean. */
+ if (--J->instunroll < 0) /* But do not unroll forever. */
+ break;
+ L->top--; /* Remove error object. */
+ J->guardemit.irt = 0;
+ loop_undo(J, nins);
+ return 1; /* Loop optimization failed, continue recording. */
+ default:
+ break;
+ }
+ }
+ lj_err_throw(L, errcode); /* Propagate all other errors. */
+ }
+ return 0; /* Loop optimization is ok. */
+}
+
+#undef IR
+#undef emitir
+#undef emitir_raw
+
+#endif
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c
new file mode 100644
index 00000000..77a9c0e7
--- /dev/null
+++ b/src/lj_opt_mem.c
@@ -0,0 +1,550 @@
+/*
+** Memory access optimizations.
+** AA: Alias Analysis using high-level semantic disambiguation.
+** FWD: Load Forwarding (L2L) + Store Forwarding (S2L).
+** DSE: Dead-Store Elimination.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_opt_mem_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+#if LJ_HASJIT
+
+#include "lj_tab.h"
+#include "lj_ir.h"
+#include "lj_jit.h"
+#include "lj_iropt.h"
+
+/* Some local macros to save typing. Undef'd at the end. */
+#define IR(ref) (&J->cur.ir[(ref)])
+#define fins (&J->fold.ins)
+
+/*
+** Caveat #1: return value is not always a TRef -- only use with tref_ref().
+** Caveat #2: FWD relies on active CSE for xREF operands -- see lj_opt_fold().
+*/
+
+/* Return values from alias analysis. */
+typedef enum {
+ ALIAS_NO, /* The two refs CANNOT alias (exact). */
+ ALIAS_MAY, /* The two refs MAY alias (inexact). */
+ ALIAS_MUST /* The two refs MUST alias (exact). */
+} AliasRet;
+
+/* -- ALOAD/HLOAD forwarding and ASTORE/HSTORE elimination ---------------- */
+
+/* Alias analysis for array and hash access using key-based disambiguation. */
+static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb)
+{
+ IRRef ka = refa->op2;
+ IRRef kb = refb->op2;
+ IRIns *keya, *keyb;
+ if (refa == refb)
+ return ALIAS_MUST; /* Shortcut for same refs. */
+ keya = IR(ka);
+ if (keya->o == IR_KSLOT) { ka = keya->op1; keya = IR(ka); }
+ keyb = IR(kb);
+ if (keyb->o == IR_KSLOT) { kb = keyb->op1; keyb = IR(kb); }
+ if (ka == kb) {
+ /* Same key. Check for same table with different ref (NEWREF vs. HREF). */
+ IRIns *ta = refa;
+ IRIns *tb = refb;
+ if (ta->o == IR_HREFK || ta->o == IR_AREF) ta = IR(ta->op1);
+ if (tb->o == IR_HREFK || tb->o == IR_AREF) tb = IR(tb->op1);
+ if (ta->op1 == tb->op1)
+ return ALIAS_MUST; /* Same key, same table. */
+ else
+ return ALIAS_MAY; /* Same key, possibly different table. */
+ }
+ if (irref_isk(ka) && irref_isk(kb))
+ return ALIAS_NO; /* Different constant keys. */
+ if (refa->o == IR_AREF) {
+ /* Disambiguate array references based on index arithmetic. */
+ lua_assert(refb->o == IR_AREF);
+ if (refa->op1 == refb->op1) {
+ /* Same table, different non-const array keys. */
+ int32_t ofsa = 0, ofsb = 0;
+ IRRef basea = ka, baseb = kb;
+ /* Gather base and offset from t[base] or t[base+-ofs]. */
+ if (keya->o == IR_ADD && irref_isk(keya->op2)) {
+ basea = keya->op1;
+ ofsa = IR(keya->op2)->i;
+ if (basea == kb && ofsa != 0)
+ return ALIAS_NO; /* t[base+-ofs] vs. t[base]. */
+ }
+ if (keyb->o == IR_ADD && irref_isk(keyb->op2)) {
+ baseb = keyb->op1;
+ ofsb = IR(keyb->op2)->i;
+ if (ka == baseb && ofsb != 0)
+ return ALIAS_NO; /* t[base] vs. t[base+-ofs]. */
+ }
+ if (basea == baseb && ofsa != ofsb)
+ return ALIAS_NO; /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */
+ }
+ } else {
+ /* Disambiguate hash references based on the type of their keys. */
+ lua_assert((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) &&
+ (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF));
+ if (!irt_sametype(keya->t, keyb->t))
+ return ALIAS_NO; /* Different key types. */
+ }
+ return ALIAS_MAY; /* Anything else: we just don't know. */
+}
+
+/* Array and hash load forwarding. */
+static TRef fwd_ahload(jit_State *J, IRRef xref)
+{
+ IRIns *xr = IR(xref);
+ IRRef lim = xref; /* Search limit. */
+ IRRef ref;
+
+ /* Search for conflicting stores. */
+ ref = J->chain[fins->o+IRDELTA_L2S];
+ while (ref > xref) {
+ IRIns *store = IR(ref);
+ switch (aa_ahref(J, xr, IR(store->op1))) {
+ case ALIAS_NO: break; /* Continue searching. */
+ case ALIAS_MAY: lim = ref; goto conflict; /* Limit search for load. */
+ case ALIAS_MUST: return store->op2; /* Store forwarding. */
+ }
+ ref = store->prev;
+ }
+
+ /* No conflicting store (yet): const-fold loads from allocations. */
+ {
+ IRIns *ir = (xr->o == IR_HREFK || xr->o == IR_AREF) ? IR(xr->op1) : xr;
+ IRRef tab = ir->op1;
+ ir = IR(tab);
+ if (ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) {
+ /* A NEWREF with a number key may end up pointing to the array part.
+ ** But it's referenced from HSTORE and not found in the ASTORE chain.
+ ** For now simply consider this a conflict without forwarding anything.
+ */
+ if (xr->o == IR_AREF) {
+ IRRef ref2 = J->chain[IR_NEWREF];
+ while (ref2 > tab) {
+ IRIns *newref = IR(ref2);
+ if (irt_isnum(IR(newref->op2)->t))
+ goto conflict;
+ ref2 = newref->prev;
+ }
+ }
+ /* NEWREF inhibits CSE for HREF, and dependent FLOADs from HREFK/AREF.
+ ** But the above search for conflicting stores was limited by xref.
+ ** So continue searching, limited by the TNEW/TDUP. Store forwarding
+ ** is ok, too. A conflict does NOT limit the search for a matching load.
+ */
+ while (ref > tab) {
+ IRIns *store = IR(ref);
+ switch (aa_ahref(J, xr, IR(store->op1))) {
+ case ALIAS_NO: break; /* Continue searching. */
+ case ALIAS_MAY: goto conflict; /* Conflicting store. */
+ case ALIAS_MUST: return store->op2; /* Store forwarding. */
+ }
+ ref = store->prev;
+ }
+ lua_assert(ir->o != IR_TNEW || irt_isnil(fins->t));
+ if (irt_ispri(fins->t)) {
+ return TREF_PRI(irt_type(fins->t));
+ } else if (irt_isnum(fins->t) || irt_isstr(fins->t)) {
+ TValue keyv;
+ cTValue *tv;
+ IRIns *key = IR(xr->op2);
+ if (key->o == IR_KSLOT) key = IR(key->op1);
+ lj_ir_kvalue(J->L, &keyv, key);
+ tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv);
+ lua_assert(itype2irt(tv) == irt_type(fins->t));
+ if (irt_isnum(fins->t))
+ return lj_ir_knum_nn(J, tv->u64);
+ else
+ return lj_ir_kstr(J, strV(tv));
+ }
+ /* Othwerwise: don't intern as a constant. */
+ }
+ }
+
+conflict:
+ /* Try to find a matching load. Below the conflicting store, if any. */
+ ref = J->chain[fins->o];
+ while (ref > lim) {
+ IRIns *load = IR(ref);
+ if (load->op1 == xref)
+ return ref; /* Load forwarding. */
+ ref = load->prev;
+ }
+ return 0; /* Conflict or no match. */
+}
+
+/* Reassociate ALOAD across PHIs to handle t[i-1] forwarding case. */
+static TRef fwd_aload_reassoc(jit_State *J)
+{
+ IRIns *irx = IR(fins->op1);
+ IRIns *key = IR(irx->op2);
+ if (key->o == IR_ADD && irref_isk(key->op2)) {
+ IRIns *add2 = IR(key->op1);
+ if (add2->o == IR_ADD && irref_isk(add2->op2) &&
+ IR(key->op2)->i == -IR(add2->op2)->i) {
+ IRRef ref = J->chain[IR_AREF];
+ IRRef lim = add2->op1;
+ if (irx->op1 > lim) lim = irx->op1;
+ while (ref > lim) {
+ IRIns *ir = IR(ref);
+ if (ir->op1 == irx->op1 && ir->op2 == add2->op1)
+ return fwd_ahload(J, ref);
+ ref = ir->prev;
+ }
+ }
+ }
+ return 0;
+}
+
+/* ALOAD forwarding. */
+TRef LJ_FASTCALL lj_opt_fwd_aload(jit_State *J)
+{
+ IRRef ref;
+ if ((ref = fwd_ahload(J, fins->op1)) ||
+ (ref = fwd_aload_reassoc(J)))
+ return ref;
+ return EMITFOLD;
+}
+
+/* HLOAD forwarding. */
+TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J)
+{
+ IRRef ref = fwd_ahload(J, fins->op1);
+ if (ref)
+ return ref;
+ return EMITFOLD;
+}
+
+/* ASTORE/HSTORE elimination. */
+TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J)
+{
+ IRRef xref = fins->op1; /* xREF reference. */
+ IRRef val = fins->op2; /* Stored value reference. */
+ IRIns *xr = IR(xref);
+ IRRef1 *refp = &J->chain[fins->o];
+ IRRef ref = *refp;
+ while (ref > xref) { /* Search for redundant or conflicting stores. */
+ IRIns *store = IR(ref);
+ switch (aa_ahref(J, xr, IR(store->op1))) {
+ case ALIAS_NO:
+ break; /* Continue searching. */
+ case ALIAS_MAY: /* Store to MAYBE the same location. */
+ if (store->op2 != val) /* Conflict if the value is different. */
+ goto doemit;
+ break; /* Otherwise continue searching. */
+ case ALIAS_MUST: /* Store to the same location. */
+ if (store->op2 == val) /* Same value: drop the new store. */
+ return DROPFOLD;
+ /* Different value: try to eliminate the redundant store. */
+ if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */
+ IRIns *ir;
+ /* Check for any intervening guards (includes conflicting loads). */
+ for (ir = IR(J->cur.nins-1); ir > store; ir--)
+ if (irt_isguard(ir->t))
+ goto doemit; /* No elimination possible. */
+ /* Remove redundant store from chain and replace with NOP. */
+ *refp = store->prev;
+ store->o = IR_NOP; /* Unchained NOP -- does anybody care? */
+ store->t.irt = IRT_NIL;
+ store->op1 = store->op2 = 0;
+ store->prev = 0;
+ /* Now emit the new store instead. */
+ }
+ goto doemit;
+ }
+ ref = *(refp = &store->prev);
+ }
+doemit:
+ return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
+}
+
+/* -- ULOAD forwarding ---------------------------------------------------- */
+
+/* The current alias analysis for upvalues is very simplistic. It only
+** disambiguates between the unique upvalues of the same function.
+** This is good enough for now, since most upvalues are read-only.
+**
+** A more precise analysis would be feasible with the help of the parser:
+** generate a unique key for every upvalue, even across all prototypes.
+** Lacking a realistic use-case, it's unclear whether this is beneficial.
+*/
+static AliasRet aa_uref(IRIns *refa, IRIns *refb)
+{
+ if (refa->o != refb->o)
+ return ALIAS_NO; /* Different UREFx type. */
+ if (refa->op1 != refb->op1)
+ return ALIAS_MAY; /* Different function. */
+ else if (refa->op2 == refb->op2)
+ return ALIAS_MUST; /* Same function, same upvalue idx. */
+ else
+ return ALIAS_NO; /* Same function, different upvalue idx. */
+}
+
+/* ULOAD forwarding. */
+TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J)
+{
+ IRRef uref = fins->op1;
+ IRRef lim = uref; /* Search limit. */
+ IRIns *xr = IR(uref);
+ IRRef ref;
+
+ /* Search for conflicting stores. */
+ ref = J->chain[IR_USTORE];
+ while (ref > uref) {
+ IRIns *store = IR(ref);
+ switch (aa_uref(xr, IR(store->op1))) {
+ case ALIAS_NO: break; /* Continue searching. */
+ case ALIAS_MAY: lim = ref; goto conflict; /* Limit search for load. */
+ case ALIAS_MUST: return store->op2; /* Store forwarding. */
+ }
+ ref = store->prev;
+ }
+
+conflict:
+ /* Try to find a matching load. Below the conflicting store, if any. */
+ ref = J->chain[IR_ULOAD];
+ while (ref > lim) {
+ IRIns *load = IR(ref);
+ if (load->op1 == uref)
+ return ref; /* Load forwarding. */
+ ref = load->prev;
+ }
+ return EMITFOLD; /* Conflict or no match. */
+}
+
+/* USTORE elimination. */
+TRef LJ_FASTCALL lj_opt_dse_ustore(jit_State *J)
+{
+ IRRef xref = fins->op1; /* xREF reference. */
+ IRRef val = fins->op2; /* Stored value reference. */
+ IRIns *xr = IR(xref);
+ IRRef1 *refp = &J->chain[IR_USTORE];
+ IRRef ref = *refp;
+ while (ref > xref) { /* Search for redundant or conflicting stores. */
+ IRIns *store = IR(ref);
+ switch (aa_uref(xr, IR(store->op1))) {
+ case ALIAS_NO:
+ break; /* Continue searching. */
+ case ALIAS_MAY: /* Store to MAYBE the same location. */
+ if (store->op2 != val) /* Conflict if the value is different. */
+ goto doemit;
+ break; /* Otherwise continue searching. */
+ case ALIAS_MUST: /* Store to the same location. */
+ if (store->op2 == val) /* Same value: drop the new store. */
+ return DROPFOLD;
+ /* Different value: try to eliminate the redundant store. */
+ if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */
+ IRIns *ir;
+ /* Check for any intervening guards (includes conflicting loads). */
+ for (ir = IR(J->cur.nins-1); ir > store; ir--)
+ if (irt_isguard(ir->t))
+ goto doemit; /* No elimination possible. */
+ /* Remove redundant store from chain and replace with NOP. */
+ *refp = store->prev;
+ store->o = IR_NOP; /* Unchained NOP -- does anybody care? */
+ store->t.irt = IRT_NIL;
+ store->op1 = store->op2 = 0;
+ store->prev = 0;
+ /* Now emit the new store instead. */
+ }
+ goto doemit;
+ }
+ ref = *(refp = &store->prev);
+ }
+doemit:
+ return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
+}
+
+/* -- FLOAD forwarding and FSTORE elimination ----------------------------- */
+
+/* Alias analysis for field access.
+** Field loads are cheap and field stores are rare.
+** Simple disambiguation based on field types is good enough.
+*/
+static AliasRet aa_fref(IRIns *refa, IRIns *refb)
+{
+ if (refa->op2 != refb->op2)
+ return ALIAS_NO; /* Different fields. */
+ if (refa->op1 == refb->op1)
+ return ALIAS_MUST; /* Same field, same object. */
+ else
+ return ALIAS_MAY; /* Same field, possibly different object. */
+}
+
+/* Only the loads for mutable fields end up here (see FOLD). */
+TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J)
+{
+ IRRef oref = fins->op1; /* Object reference. */
+ IRRef fid = fins->op2; /* Field ID. */
+ IRRef lim = oref; /* Search limit. */
+ IRRef ref;
+
+ /* Search for conflicting stores. */
+ ref = J->chain[IR_FSTORE];
+ while (ref > oref) {
+ IRIns *store = IR(ref);
+ switch (aa_fref(fins, IR(store->op1))) {
+ case ALIAS_NO: break; /* Continue searching. */
+ case ALIAS_MAY: lim = ref; goto conflict; /* Limit search for load. */
+ case ALIAS_MUST: return store->op2; /* Store forwarding. */
+ }
+ ref = store->prev;
+ }
+
+ /* No conflicting store: const-fold field loads from allocations. */
+ if (fid == IRFL_TAB_META) {
+ IRIns *ir = IR(oref);
+ if (ir->o == IR_TNEW || ir->o == IR_TDUP)
+ return lj_ir_knull(J, IRT_TAB);
+ }
+
+conflict:
+ /* Try to find a matching load. Below the conflicting store, if any. */
+ ref = J->chain[IR_FLOAD];
+ while (ref > lim) {
+ IRIns *load = IR(ref);
+ if (load->op1 == oref && load->op2 == fid)
+ return ref; /* Load forwarding. */
+ ref = load->prev;
+ }
+ return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
+}
+
+/* FSTORE elimination. */
+TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J)
+{
+ IRRef fref = fins->op1; /* FREF reference. */
+ IRRef val = fins->op2; /* Stored value reference. */
+ IRIns *xr = IR(fref);
+ IRRef1 *refp = &J->chain[IR_FSTORE];
+ IRRef ref = *refp;
+ while (ref > fref) { /* Search for redundant or conflicting stores. */
+ IRIns *store = IR(ref);
+ switch (aa_fref(xr, IR(store->op1))) {
+ case ALIAS_NO:
+ break; /* Continue searching. */
+ case ALIAS_MAY:
+ if (store->op2 != val) /* Conflict if the value is different. */
+ goto doemit;
+ break; /* Otherwise continue searching. */
+ case ALIAS_MUST:
+ if (store->op2 == val) /* Same value: drop the new store. */
+ return DROPFOLD;
+ /* Different value: try to eliminate the redundant store. */
+ if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */
+ IRIns *ir;
+ /* Check for any intervening guards or conflicting loads. */
+ for (ir = IR(J->cur.nins-1); ir > store; ir--)
+ if (irt_isguard(ir->t) || (ir->o == IR_FLOAD && ir->op2 == xr->op2))
+ goto doemit; /* No elimination possible. */
+ /* Remove redundant store from chain and replace with NOP. */
+ *refp = store->prev;
+ store->o = IR_NOP; /* Unchained NOP -- does anybody care? */
+ store->t.irt = IRT_NIL;
+ store->op1 = store->op2 = 0;
+ store->prev = 0;
+ /* Now emit the new store instead. */
+ }
+ goto doemit;
+ }
+ ref = *(refp = &store->prev);
+ }
+doemit:
+ return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
+}
+
+/* -- TLEN forwarding ----------------------------------------------------- */
+
+/* This is rather simplistic right now, but better than nothing. */
+TRef LJ_FASTCALL lj_opt_fwd_tlen(jit_State *J)
+{
+ IRRef tab = fins->op1; /* Table reference. */
+ IRRef lim = tab; /* Search limit. */
+ IRRef ref;
+
+ /* Any ASTORE is a conflict and limits the search. */
+ if (J->chain[IR_ASTORE] > lim) lim = J->chain[IR_ASTORE];
+
+ /* Search for conflicting HSTORE with numeric key. */
+ ref = J->chain[IR_HSTORE];
+ while (ref > lim) {
+ IRIns *store = IR(ref);
+ IRIns *href = IR(store->op1);
+ IRIns *key = IR(href->op2);
+ if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) {
+ lim = ref; /* Conflicting store found, limits search for TLEN. */
+ break;
+ }
+ ref = store->prev;
+ }
+
+ /* Try to find a matching load. Below the conflicting store, if any. */
+ ref = J->chain[IR_TLEN];
+ while (ref > lim) {
+ IRIns *tlen = IR(ref);
+ if (tlen->op1 == tab)
+ return ref; /* Load forwarding. */
+ ref = tlen->prev;
+ }
+ return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
+}
+
+/* -- ASTORE/HSTORE previous type analysis -------------------------------- */
+
+/* Check whether the previous value for a table store is non-nil.
+** This can be derived either from a previous store or from a previous
+** load (because all loads from tables perform a type check).
+**
+** The result of the analysis can be used to avoid the metatable check
+** and the guard against HREF returning niltv. Both of these are cheap,
+** so let's not spend too much effort on the analysis.
+**
+** A result of 1 is exact: previous value CANNOT be nil.
+** A result of 0 is inexact: previous value MAY be nil.
+*/
+int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref)
+{
+ /* First check stores. */
+ IRRef ref = J->chain[loadop+IRDELTA_L2S];
+ while (ref > xref) {
+ IRIns *store = IR(ref);
+ if (store->op1 == xref) { /* Same xREF. */
+ /* A nil store MAY alias, but a non-nil store MUST alias. */
+ return !irt_isnil(store->t);
+ } else if (irt_isnil(store->t)) { /* Must check any nil store. */
+ IRRef skref = IR(store->op1)->op2;
+ IRRef xkref = IR(xref)->op2;
+ /* Same key type MAY alias. */
+ if (irt_sametype(IR(skref)->t, IR(xkref)->t)) {
+ if (skref == xkref || !irref_isk(skref) || !irref_isk(xkref))
+ return 0; /* A nil store with same const key or var key MAY alias. */
+ /* Different const keys CANNOT alias. */
+ } /* Different key types CANNOT alias. */
+ } /* Other non-nil stores MAY alias. */
+ ref = store->prev;
+ }
+
+ /* Check loads since nothing could be derived from stores. */
+ ref = J->chain[loadop];
+ while (ref > xref) {
+ IRIns *load = IR(ref);
+ if (load->op1 == xref) { /* Same xREF. */
+ /* A nil load MAY alias, but a non-nil load MUST alias. */
+ return !irt_isnil(load->t);
+ } /* Other non-nil loads MAY alias. */
+ ref = load->prev;
+ }
+ return 0; /* Nothing derived at all, previous value MAY be nil. */
+}
+
+/* ------------------------------------------------------------------------ */
+
+#undef IR
+#undef fins
+
+#endif
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
new file mode 100644
index 00000000..60a6afb8
--- /dev/null
+++ b/src/lj_opt_narrow.c
@@ -0,0 +1,430 @@
+/*
+** NARROW: Narrowing of numbers to integers (double to int32_t).
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_opt_narrow_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+#if LJ_HASJIT
+
+#include "lj_str.h"
+#include "lj_bc.h"
+#include "lj_ir.h"
+#include "lj_jit.h"
+#include "lj_iropt.h"
+#include "lj_trace.h"
+
+/* Rationale for narrowing optimizations:
+**
+** Lua has only a single number type and this is a FP double by default.
+** Narrowing doubles to integers does not pay off for the interpreter on a
+** current-generation x86/x64 machine. Most FP operations need the same
+** amount of execution resources as their integer counterparts, except
+** with slightly longer latencies. Longer latencies are a non-issue for
+** the interpreter, since they are usually hidden by other overhead.
+**
+** The total CPU execution bandwidth is the sum of the bandwidth of the FP
+** and the integer units, because they execute in parallel. The FP units
+** have an equal or higher bandwidth than the integer units. Not using
+** them means losing execution bandwidth. Moving work away from them to
+** the already quite busy integer units is a losing proposition.
+**
+** The situation for JIT-compiled code is a bit different: the higher code
+** density makes the extra latencies much more visible. Tight loops expose
+** the latencies for updating the induction variables. Array indexing
+** requires narrowing conversions with high latencies and additional
+** guards (to check that the index is really an integer). And many common
+** optimizations only work on integers.
+**
+** One solution would be speculative, eager narrowing of all number loads.
+** This causes many problems, like losing -0 or the need to resolve type
+** mismatches between traces. It also effectively forces the integer type
+** to have overflow-checking semantics. This impedes many basic
+** optimizations and requires adding overflow checks to all integer
+** arithmetic operations (whereas FP arithmetics can do without).
+**
+** Always replacing an FP op with an integer op plus an overflow check is
+** counter-productive on a current-generation super-scalar CPU. Although
+** the overflow check branches are highly predictable, they will clog the
+** execution port for the branch unit and tie up reorder buffers. This is
+** turning a pure data-flow dependency into a different data-flow
+** dependency (with slightly lower latency) *plus* a control dependency.
+** In general, you don't want to do this since latencies due to data-flow
+** dependencies can be well hidden by out-of-order execution.
+**
+** A better solution is to keep all numbers as FP values and only narrow
+** when it's beneficial to do so. LuaJIT uses predictive narrowing for
+** induction variables and demand-driven narrowing for index expressions
+** and bit operations. Additionally it can eliminate or hoists most of the
+** resulting overflow checks. Regular arithmetic computations are never
+** narrowed to integers.
+**
+** The integer type in the IR has convenient wrap-around semantics and
+** ignores overflow. Extra operations have been added for
+** overflow-checking arithmetic (ADDOV/SUBOV) instead of an extra type.
+** Apart from reducing overall complexity of the compiler, this also
+** nicely solves the problem where you want to apply algebraic
+** simplifications to ADD, but not to ADDOV. And the assembler can use lea
+** instead of an add for integer ADD, but not for ADDOV (lea does not
+** affect the flags, but it helps to avoid register moves).
+**
+** Note that all of the above has to be reconsidered if LuaJIT is to be
+** ported to architectures with slow FP operations or with no hardware FPU
+** at all. In the latter case an integer-only port may be the best overall
+** solution (if this still meets user demands).
+*/
+
+/* Some local macros to save typing. Undef'd at the end. */
+#define IR(ref) (&J->cur.ir[(ref)])
+#define fins (&J->fold.ins)
+
+/* Pass IR on to next optimization in chain (FOLD). */
+#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
+
+#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
+
+/* -- Elimination of narrowing type conversions --------------------------- */
+
+/* Narrowing of index expressions and bit operations is demand-driven. The
+** trace recorder emits a narrowing type conversion (TOINT or TOBIT) in
+** all of these cases (e.g. array indexing or string indexing). FOLD
+** already takes care of eliminating simple redundant conversions like
+** TOINT(TONUM(x)) ==> x.
+**
+** But the surrounding code is FP-heavy and all arithmetic operations are
+** performed on FP numbers. Consider a common example such as 'x=t[i+1]',
+** with 'i' already an integer (due to induction variable narrowing). The
+** index expression would be recorded as TOINT(ADD(TONUM(i), 1)), which is
+** clearly suboptimal.
+**
+** One can do better by recursively backpropagating the narrowing type
+** conversion across FP arithmetic operations. This turns FP ops into
+** their corresponding integer counterparts. Depending on the semantics of
+** the conversion they also need to check for overflow. Currently only ADD
+** and SUB are supported.
+**
+** The above example can be rewritten as ADDOV(TOINT(TONUM(i)), 1) and
+** then into ADDOV(i, 1) after folding of the conversions. The original FP
+** ops remain in the IR and are eliminated by DCE since all references to
+** them are gone.
+**
+** Special care has to be taken to avoid narrowing across an operation
+** which is potentially operating on non-integral operands. One obvious
+** case is when an expression contains a non-integral constant, but ends
+** up as an integer index at runtime (like t[x+1.5] with x=0.5).
+**
+** Operations with two non-constant operands illustrate a similar problem
+** (like t[a+b] with a=1.5 and b=2.5). Backpropagation has to stop there,
+** unless it can be proven that either operand is integral (e.g. by CSEing
+** a previous conversion). As a not-so-obvious corollary this logic also
+** applies for a whole expression tree (e.g. t[(a+1)+(b+1)]).
+**
+** Correctness of the transformation is guaranteed by avoiding to expand
+** the tree by adding more conversions than the one we would need to emit
+** if not backpropagating. TOBIT employs a more optimistic rule, because
+** the conversion has special semantics, designed to make the life of the
+** compiler writer easier. ;-)
+**
+** Using on-the-fly backpropagation of an expression tree doesn't work
+** because it's unknown whether the transform is correct until the end.
+** This either requires IR rollback and cache invalidation for every
+** subtree or a two-pass algorithm. The former didn't work out too well,
+** so the code now combines a recursive collector with a stack-based
+** emitter.
+**
+** [A recursive backpropagation algorithm with backtracking, employing
+** skip-list lookup and round-robin caching, emitting stack operations
+** on-the-fly for a stack-based interpreter -- and all of that in a meager
+** kilobyte? Yep, compilers are a great treasure chest. Throw away your
+** textbooks and read the codebase of a compiler today!]
+**
+** There's another optimization opportunity for array indexing: it's
+** always accompanied by an array bounds-check. The outermost overflow
+** check may be delegated to the ABC operation. This works because ABC is
+** an unsigned comparison and wrap-around due to overflow creates negative
+** numbers.
+**
+** But this optimization is only valid for constants that cannot overflow
+** an int32_t into the range of valid array indexes [0..2^27+1). A check
+** for +-2^30 is safe since -2^31 - 2^30 wraps to 2^30 and 2^31-1 + 2^30
+** wraps to -2^30-1.
+**
+** It's also good enough in practice, since e.g. t[i+1] or t[i-10] are
+** quite common. So the above example finally ends up as ADD(i, 1)!
+**
+** Later on, the assembler is able to fuse the whole array reference and
+** the ADD into the memory operands of loads and other instructions. This
+** is why LuaJIT is able to generate very pretty (and fast) machine code
+** for array indexing. And that, my dear, concludes another story about
+** one of the hidden secrets of LuaJIT ...
+*/
+
+/* Maximum backpropagation depth and maximum stack size. */
+#define NARROW_MAX_BACKPROP 100
+#define NARROW_MAX_STACK 256
+
+/* Context used for narrowing of type conversions. */
+typedef struct NarrowConv {
+ jit_State *J; /* JIT compiler state. */
+ IRRef2 *sp; /* Current stack pointer. */
+ IRRef2 *maxsp; /* Maximum stack pointer minus redzone. */
+ int lim; /* Limit on the number of emitted conversions. */
+ IRRef mode; /* Conversion mode (IRTOINT_*). */
+ IRRef2 stack[NARROW_MAX_STACK]; /* Stack holding the stack-machine code. */
+} NarrowConv;
+
+/* The stack machine has a 32 bit instruction format: [IROpT | IRRef1]
+** The lower 16 bits hold a reference (or 0). The upper 16 bits hold
+** the IR opcode + type or one of the following special opcodes:
+*/
+enum {
+ NARROW_REF, /* Push ref. */
+ NARROW_CONV, /* Push conversion of ref. */
+ NARROW_INT /* Push KINT ref. The next code holds an int32_t. */
+};
+
+/* Lookup a reference in the backpropagation cache. */
+static IRRef narrow_bpc_get(jit_State *J, IRRef1 key, IRRef mode)
+{
+ ptrdiff_t i;
+ for (i = 0; i < BPROP_SLOTS; i++) {
+ BPropEntry *bp = &J->bpropcache[i];
+ if (bp->key == key && bp->mode <= mode) /* Stronger checks are ok, too. */
+ return bp->val;
+ }
+ return 0;
+}
+
+/* Add an entry to the backpropagation cache. */
+static void narrow_bpc_set(jit_State *J, IRRef1 key, IRRef1 val, IRRef mode)
+{
+ uint32_t slot = J->bpropslot;
+ BPropEntry *bp = &J->bpropcache[slot];
+ J->bpropslot = (slot + 1) & (BPROP_SLOTS-1);
+ bp->key = key;
+ bp->val = val;
+ bp->mode = mode;
+}
+
+/* Backpropagate narrowing conversion. Return number of needed conversions. */
+static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
+{
+ jit_State *J = nc->J;
+ IRIns *ir = IR(ref);
+ IRRef cref;
+
+ /* Check the easy cases first. */
+ if (ir->o == IR_TONUM) { /* Undo inverse conversion. */
+ *nc->sp++ = IRREF2(ir->op1, NARROW_REF);
+ return 0;
+ } else if (ir->o == IR_KNUM) { /* Narrow FP constant. */
+ lua_Number n = ir_knum(ir)->n;
+ if (nc->mode == IRTOINT_TOBIT) { /* Allows a wider range of constants. */
+ int64_t k64 = (int64_t)n;
+ if (n == cast_num(k64)) { /* Only if constant doesn't lose precision. */
+ *nc->sp++ = IRREF2(0, NARROW_INT);
+ *nc->sp++ = (IRRef2)k64; /* But always truncate to 32 bits. */
+ return 0;
+ }
+ } else {
+ int32_t k = lj_num2int(n);
+ if (n == cast_num(k)) { /* Only if constant is really an integer. */
+ *nc->sp++ = IRREF2(0, NARROW_INT);
+ *nc->sp++ = (IRRef2)k;
+ return 0;
+ }
+ }
+ return 10; /* Never narrow other FP constants (this is rare). */
+ }
+
+ /* Try to CSE the conversion. Stronger checks are ok, too. */
+ for (cref = J->chain[fins->o]; cref > ref; cref = IR(cref)->prev)
+ if (IR(cref)->op1 == ref &&
+ irt_isguard(IR(cref)->t) >= irt_isguard(fins->t)) {
+ *nc->sp++ = IRREF2(cref, NARROW_REF);
+ return 0; /* Already there, no additional conversion needed. */
+ }
+
+ /* Backpropagate across ADD/SUB. */
+ if (ir->o == IR_ADD || ir->o == IR_SUB) {
+ /* Try cache lookup first. */
+ IRRef bpref, mode = nc->mode;
+ if (mode == IRTOINT_INDEX && depth > 0)
+ mode = IRTOINT_CHECK; /* Inner conversions need a stronger check. */
+ bpref = narrow_bpc_get(nc->J, (IRRef1)ref, mode);
+ if (bpref) {
+ *nc->sp++ = IRREF2(bpref, NARROW_REF);
+ return 0;
+ }
+ if (++depth < NARROW_MAX_BACKPROP && nc->sp < nc->maxsp) {
+ IRRef2 *savesp = nc->sp;
+ int count = narrow_conv_backprop(nc, ir->op1, depth);
+ count += narrow_conv_backprop(nc, ir->op2, depth);
+ if (count <= nc->lim) { /* Limit total number of conversions. */
+ *nc->sp++ = IRREF2(ref, IRTI(ir->o));
+ return count;
+ }
+ nc->sp = savesp; /* Too many conversions, need to backtrack. */
+ }
+ }
+
+ /* Otherwise add a conversion. */
+ *nc->sp++ = IRREF2(ref, NARROW_CONV);
+ return 1;
+}
+
+/* Emit the conversions collected during backpropagation. */
+static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
+{
+ /* The fins fields must be saved now -- emitir() overwrites them. */
+ IROpT guardot = irt_isguard(fins->t) ? IRTG(IR_ADDOV-IR_ADD, 0) : 0;
+ IROpT convot = fins->ot;
+ IRRef1 convop2 = fins->op2;
+ IRRef2 *next = nc->stack; /* List of instructions from backpropagation. */
+ IRRef2 *last = nc->sp;
+ IRRef2 *sp = nc->stack; /* Recycle the stack to store operands. */
+ while (next < last) { /* Simple stack machine to process the ins. list. */
+ IRRef2 ref = *next++;
+ IROpT op = ref >> 16;
+ if (op == NARROW_REF) {
+ *sp++ = ref;
+ } else if (op == NARROW_CONV) {
+ *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */
+ } else if (op == NARROW_INT) {
+ lua_assert(next < last);
+ *sp++ = lj_ir_kint(J, *next++);
+ } else { /* Regular IROpT. Pops two operands and pushes one result. */
+ IRRef mode = nc->mode;
+ lua_assert(sp >= nc->stack+2);
+ sp--;
+ /* Omit some overflow checks for array indexing. See comments above. */
+ if (mode == IRTOINT_INDEX) {
+ if (next == last && irref_isk((IRRef1)sp[0]) &&
+ (uint32_t)IR((IRRef1)sp[0])->i + 0x40000000 < 0x80000000)
+ guardot = 0;
+ else
+ mode = IRTOINT_CHECK; /* Otherwise cache a stronger check. */
+ }
+ sp[-1] = emitir(op+guardot, sp[-1], sp[0]);
+ narrow_bpc_set(J, (IRRef1)ref, (IRRef1)sp[-1], mode); /* Add to cache. */
+ }
+ }
+ lua_assert(sp == nc->stack+1);
+ return nc->stack[0];
+}
+
+/* Narrow a type conversion of an arithmetic operation. */
+TRef LJ_FASTCALL lj_opt_narrow_convert(jit_State *J)
+{
+ if ((J->flags & JIT_F_OPT_NARROW)) {
+ NarrowConv nc;
+ nc.J = J;
+ nc.sp = nc.stack;
+ nc.maxsp = &nc.stack[NARROW_MAX_STACK-4];
+ if (fins->o == IR_TOBIT) {
+ nc.mode = IRTOINT_TOBIT; /* Used only in the backpropagation cache. */
+ nc.lim = 2; /* TOBIT can use a more optimistic rule. */
+ } else {
+ nc.mode = fins->op2;
+ nc.lim = 1;
+ }
+ if (narrow_conv_backprop(&nc, fins->op1, 0) <= nc.lim)
+ return narrow_conv_emit(J, &nc);
+ }
+ return NEXTFOLD;
+}
+
+/* -- Narrowing of arithmetic operators ----------------------------------- */
+
+/* Check whether a number fits into an int32_t (-0 is ok, too). */
+static int numisint(lua_Number n)
+{
+ return (n == cast_num(lj_num2int(n)));
+}
+
+/* Narrowing of modulo operator. */
+TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc)
+{
+ TRef tmp;
+ if ((J->flags & JIT_F_OPT_NARROW) &&
+ tref_isk(rc) && tref_isint(rc)) { /* Optimize x % k. */
+ int32_t k = IR(tref_ref(rc))->i;
+ if (k > 0 && (k & (k-1)) == 0) { /* i % 2^k ==> band(i, 2^k-1) */
+ if (tref_isint(rb))
+ return emitir(IRTI(IR_BAND), rb, lj_ir_kint(J, k-1));
+ }
+ }
+ /* b % c ==> b - floor(b/c)*c */
+ rb = lj_ir_tonum(J, rb);
+ rc = lj_ir_tonum(J, rc);
+ tmp = emitir(IRTN(IR_DIV), rb, rc);
+ tmp = emitir(IRTN(IR_FPMATH), tmp, IRFPM_FLOOR);
+ tmp = emitir(IRTN(IR_MUL), tmp, rc);
+ return emitir(IRTN(IR_SUB), rb, tmp);
+}
+
+/* Narrowing of power operator or math.pow. */
+TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc)
+{
+ lua_Number n;
+ if (tvisstr(vc) && !lj_str_numconv(strVdata(vc), vc))
+ lj_trace_err(J, LJ_TRERR_BADTYPE);
+ n = numV(vc);
+ /* Limit narrowing for pow to small exponents (or for two constants). */
+ if ((tref_isint(rc) && tref_isk(rc) && tref_isk(rb)) ||
+ ((J->flags & JIT_F_OPT_NARROW) &&
+ (numisint(n) && n >= -65536.0 && n <= 65536.0))) {
+ TRef tmp;
+ if (!tref_isinteger(rc)) {
+ if (tref_isstr(rc))
+ rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
+ rc = emitir(IRTGI(IR_TOINT), rc, IRTOINT_CHECK); /* Guarded TOINT! */
+ }
+ if (!tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */
+ tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536-2147483647-1));
+ emitir(IRTGI(IR_LE), tmp, lj_ir_kint(J, 2*65536-2147483647-1));
+ }
+ return emitir(IRTN(IR_POWI), rb, rc);
+ }
+ /* FOLD covers most cases, but some are easier to do here. */
+ if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb)))))
+ return rb; /* 1 ^ x ==> 1 */
+ rc = lj_ir_tonum(J, rc);
+ if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5)
+ return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT); /* x ^ 0.5 ==> sqrt(x) */
+ /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */
+ rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2);
+ rc = emitir(IRTN(IR_MUL), rb, rc);
+ return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2);
+}
+
+/* -- Predictive narrowing of induction variables ------------------------- */
+
+/* Narrow the FORL index type by looking at the runtime values. */
+IRType lj_opt_narrow_forl(cTValue *forbase)
+{
+ lua_assert(tvisnum(&forbase[FORL_IDX]) &&
+ tvisnum(&forbase[FORL_STOP]) &&
+ tvisnum(&forbase[FORL_STEP]));
+ /* Narrow only if the runtime values of start/stop/step are all integers. */
+ if (numisint(numV(&forbase[FORL_IDX])) &&
+ numisint(numV(&forbase[FORL_STOP])) &&
+ numisint(numV(&forbase[FORL_STEP]))) {
+ /* And if the loop index can't possibly overflow. */
+ lua_Number step = numV(&forbase[FORL_STEP]);
+ lua_Number sum = numV(&forbase[FORL_STOP]) + step;
+ if (0 <= step ? sum <= 2147483647.0 : sum >= -2147483648.0)
+ return IRT_INT;
+ }
+ return IRT_NUM;
+}
+
+#undef IR
+#undef fins
+#undef emitir
+#undef emitir_raw
+
+#endif
diff --git a/src/lj_parse.c b/src/lj_parse.c
new file mode 100644
index 00000000..663525ab
--- /dev/null
+++ b/src/lj_parse.c
@@ -0,0 +1,2198 @@
+/*
+** Lua parser (source code -> bytecode).
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#define lj_parse_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_func.h"
+#include "lj_state.h"
+#include "lj_bc.h"
+#include "lj_lex.h"
+#include "lj_parse.h"
+#include "lj_vm.h"
+#include "lj_vmevent.h"
+
+/* -- Parser structures and definitions ----------------------------------- */
+
+/* Expression kinds. */
+typedef enum {
+ /* Constant expressions must be first and in this order: */
+ VKNIL,
+ VKFALSE,
+ VKTRUE,
+ VKSTR, /* sval = string value */
+ VKNUM, /* nval = numerical value */
+ VKLAST = VKNUM,
+ /* Non-constant expressions follow: */
+ VLOCAL, /* info = local register */
+ VUPVAL, /* info = upvalue index */
+ VGLOBAL, /* sval = string value */
+ VINDEXED, /* info = table register, aux = index reg/byte/string const */
+ VJMP, /* info = instruction PC */
+ VRELOCABLE, /* info = instruction PC */
+ VNONRELOC, /* info = result register */
+ VCALL, /* info = instruction PC, aux = base */
+ VVOID
+} ExpKind;
+
+/* Expression descriptor. */
+typedef struct ExpDesc {
+ union {
+ struct { uint32_t info, aux; } s;
+ TValue nval;
+ GCstr *sval;
+ } u;
+ ExpKind k;
+ BCPos t; /* true condition exit list */
+ BCPos f; /* false condition exit list */
+} ExpDesc;
+
+/* Tests for expression types */
+#define isK(e) ((uint32_t)((e)->k) <= VKLAST)
+#define isnumK(e) ((e)->k == VKNUM)
+#define isstrK(e) ((e)->k == VKSTR)
+#define expnumV(e) check_exp(isnumK((e)), numV(&(e)->u.nval))
+
+#define hasjumps(e) ((e)->t != (e)->f)
+#define isKexp(e) (isK(e) && !hasjumps(e))
+#define isnumKexp(e) (isnumK(e) && !hasjumps(e))
+
+#define priKk(k) check_exp((k) <= VKTRUE, (k) - VKNIL)
+#define priK(e) priKk((e)->k)
+
+/* Per-function linked list of blocks. */
+typedef struct FuncBlock {
+ struct FuncBlock *previous; /* chain */
+ BCPos breaklist; /* list of jumps out of this loop */
+ uint8_t nactvar; /* # active locals outside the breakable structure */
+ uint8_t upval; /* true if some variable in the block is an upvalue */
+ uint8_t isbreakable; /* true if `block' is a loop */
+} FuncBlock;
+
+typedef struct UpValDesc {
+ uint8_t k;
+ uint8_t info;
+} UpValDesc;
+
+/* Per-function state. */
+typedef struct FuncState {
+ GCproto *pt; /* current function header */
+ GCtab *kt; /* table to find (and reuse) elements in `k' */
+ struct FuncState *prev; /* enclosing function */
+ struct LexState *ls; /* lexical state */
+ struct lua_State *L; /* copy of the Lua state */
+ struct FuncBlock *bl; /* chain of current blocks */
+ BCPos pc; /* next bytecode position */
+ BCPos lasttarget; /* PC of last jump target */
+ BCPos jpc; /* list of pending jumps to PC */
+ BCReg freereg; /* first free register */
+ BCReg nkn, nkgc; /* number of lua_Number/GCobj constants */
+ uint16_t nlocvars; /* number of elements in `locvars' */
+ uint8_t nactvar; /* number of active local variables */
+ uint8_t nuv; /* number of upvalues */
+ UpValDesc upvalues[LJ_MAX_UPVAL]; /* upvalues */
+ uint16_t actvar[LJ_MAX_LOCVAR]; /* declared-variable stack */
+} FuncState;
+
+/* Binary and unary operators. ORDER OPR */
+typedef enum BinOpr {
+ OPR_ADD, OPR_SUB, OPR_MUL, OPR_DIV, OPR_MOD, OPR_POW, /* ORDER ARITH */
+ OPR_CONCAT,
+ OPR_NE, OPR_EQ,
+ OPR_LT, OPR_GE, OPR_LE, OPR_GT,
+ OPR_AND, OPR_OR,
+ OPR_NOBINOPR
+} BinOpr;
+
+LJ_STATIC_ASSERT((int)BC_ISGE-(int)BC_ISLT == (int)OPR_GE-(int)OPR_LT);
+LJ_STATIC_ASSERT((int)BC_ISLE-(int)BC_ISLT == (int)OPR_LE-(int)OPR_LT);
+LJ_STATIC_ASSERT((int)BC_ISGT-(int)BC_ISLT == (int)OPR_GT-(int)OPR_LT);
+LJ_STATIC_ASSERT((int)BC_SUBVV-(int)BC_ADDVV == (int)OPR_SUB-(int)OPR_ADD);
+LJ_STATIC_ASSERT((int)BC_MULVV-(int)BC_ADDVV == (int)OPR_MUL-(int)OPR_ADD);
+LJ_STATIC_ASSERT((int)BC_DIVVV-(int)BC_ADDVV == (int)OPR_DIV-(int)OPR_ADD);
+LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD);
+
+typedef enum UnOpr { OPR_MINUS, OPR_NOT, OPR_LEN, OPR_NOUNOPR } UnOpr;
+
+/* -- Error handling ------------------------------------------------------ */
+
+LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em)
+{
+ lj_lex_error(ls, ls->token, em);
+}
+
+LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken token)
+{
+ lj_lex_error(ls, ls->token, LJ_ERR_XTOKEN, lj_lex_token2str(ls, token));
+}
+
+LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what)
+{
+ if (fs->pt->linedefined == 0)
+ lj_lex_error(fs->ls, 0, LJ_ERR_XLIMM, limit, what);
+ else
+ lj_lex_error(fs->ls, 0, LJ_ERR_XLIMF, fs->pt->linedefined, limit, what);
+}
+
+#define checklimit(fs, v, l, m) if ((v) >= (l)) err_limit(fs, l, m)
+#define checklimitgt(fs, v, l, m) if ((v) > (l)) err_limit(fs, l, m)
+#define checkcond(ls, c, em) { if (!(c)) err_syntax(ls, em); }
+
+/* -- Code emitter: branches ---------------------------------------------- */
+
+static BCPos getjump(FuncState *fs, BCPos pc)
+{
+ ptrdiff_t delta = bc_j(fs->pt->bc[pc]);
+ if ((BCPos)delta == NO_JMP)
+ return NO_JMP;
+ else
+ return (BCPos)(((ptrdiff_t)pc+1)+delta);
+}
+
+static int need_value(FuncState *fs, BCPos list)
+{
+ for (; list != NO_JMP; list = getjump(fs, list)) {
+ BCOp op = bc_op(fs->pt->bc[list >= 1 ? list-1 : list]);
+ if (!(op == BC_ISTC || op == BC_ISFC)) return 1;
+ }
+ return 0; /* Not found. */
+}
+
+static int patchtestreg(FuncState *fs, BCPos pc, BCReg reg)
+{
+ BCIns *i = &fs->pt->bc[pc >= 1 ? pc-1 : pc];
+ BCOp op = bc_op(*i);
+ if (!(op == BC_ISTC || op == BC_ISFC))
+ return 0; /* cannot patch other instructions */
+ if (reg != NO_REG && reg != bc_d(*i)) {
+ setbc_a(i, reg);
+ } else { /* no register to put value or register already has the value */
+ setbc_op(i, op+(BC_IST-BC_ISTC));
+ setbc_a(i, 0);
+ }
+ return 1;
+}
+
+static void removevalues(FuncState *fs, BCPos list)
+{
+ for (; list != NO_JMP; list = getjump(fs, list))
+ patchtestreg(fs, list, NO_REG);
+}
+
+static void fixjump(FuncState *fs, BCPos pc, BCPos dest)
+{
+ BCIns *jmp = &fs->pt->bc[pc];
+ BCPos offset = dest-(pc+1)+BCBIAS_J;
+ lua_assert(dest != NO_JMP);
+ if (offset > BCMAX_D)
+ err_syntax(fs->ls, LJ_ERR_XJUMP);
+ setbc_d(jmp, offset);
+}
+
+static void concatjumps(FuncState *fs, BCPos *l1, BCPos l2)
+{
+ if (l2 == NO_JMP) return;
+ else if (*l1 == NO_JMP) {
+ *l1 = l2;
+ } else {
+ BCPos list = *l1;
+ BCPos next;
+ while ((next = getjump(fs, list)) != NO_JMP) /* find last element */
+ list = next;
+ fixjump(fs, list, l2);
+ }
+}
+
+static void patchlistaux(FuncState *fs, BCPos list, BCPos vtarget,
+ BCReg reg, BCPos dtarget)
+{
+ while (list != NO_JMP) {
+ BCPos next = getjump(fs, list);
+ if (patchtestreg(fs, list, reg))
+ fixjump(fs, list, vtarget);
+ else
+ fixjump(fs, list, dtarget); /* jump to default target */
+ list = next;
+ }
+}
+
+static void patchtohere(FuncState *fs, BCPos list)
+{
+ fs->lasttarget = fs->pc;
+ concatjumps(fs, &fs->jpc, list);
+}
+
+static void patchlist(FuncState *fs, BCPos list, BCPos target)
+{
+ if (target == fs->pc) {
+ patchtohere(fs, list);
+ } else {
+ lua_assert(target < fs->pc);
+ patchlistaux(fs, list, target, NO_REG, target);
+ }
+}
+
+/* -- Code emitter: instructions ------------------------------------------ */
+
+static BCPos emitINS(FuncState *fs, BCIns i)
+{
+ GCproto *pt;
+ patchlistaux(fs, fs->jpc, fs->pc, NO_REG, fs->pc);
+ fs->jpc = NO_JMP;
+ pt = fs->pt;
+ if (LJ_UNLIKELY(fs->pc >= pt->sizebc)) {
+ checklimit(fs, fs->pc, LJ_MAX_BCINS, "bytecode instructions");
+ lj_mem_growvec(fs->L, pt->bc, pt->sizebc, LJ_MAX_BCINS, BCIns);
+ lj_mem_growvec(fs->L, pt->lineinfo, pt->sizelineinfo, LJ_MAX_BCINS, BCLine);
+ }
+ pt->bc[fs->pc] = i;
+ pt->lineinfo[fs->pc] = fs->ls->lastline;
+ return fs->pc++;
+}
+
+#define emitABC(fs, o, a, b, c) emitINS(fs, BCINS_ABC(o, a, b, c))
+#define emitAD(fs, o, a, d) emitINS(fs, BCINS_AD(o, a, d))
+#define emitAJ(fs, o, a, j) emitINS(fs, BCINS_AJ(o, a, j))
+
+#define bcptr(fs, e) (&(fs)->pt->bc[(e)->u.s.info])
+
+static BCPos emit_jump(FuncState *fs)
+{
+ BCPos jpc = fs->jpc; /* save list of jumps to here */
+ BCPos j = fs->pc - 1;
+ fs->jpc = NO_JMP;
+ if ((int32_t)j >= (int32_t)fs->lasttarget && bc_op(fs->pt->bc[j]) == BC_UCLO)
+ setbc_j(&fs->pt->bc[j], NO_JMP);
+ else
+ j = emitAJ(fs, BC_JMP, fs->freereg, NO_JMP);
+ concatjumps(fs, &j, jpc); /* keep them on hold */
+ return j;
+}
+
+/* -- Code emitter: constants --------------------------------------------- */
+
+static BCReg numK(FuncState *fs, ExpDesc *e)
+{
+ lua_State *L = fs->L;
+ TValue *val;
+ lua_assert(isnumK(e));
+ val = lj_tab_set(L, fs->kt, &e->u.nval);
+ if (tvisnum(val))
+ return val->u32.lo;
+ val->u64 = fs->nkn;
+ return fs->nkn++;
+}
+
+static BCReg gcK(FuncState *fs, GCobj *gc, int itype)
+{
+ lua_State *L = fs->L;
+ TValue o, *val;
+ setgcV(L, &o, &gc->gch, itype);
+ val = lj_tab_set(L, fs->kt, &o);
+ if (tvisnum(val))
+ return val->u32.lo;
+ val->u64 = fs->nkgc;
+ return fs->nkgc++;
+}
+
+static BCReg strK(FuncState *fs, ExpDesc *e)
+{
+ lua_assert(isstrK(e) || e->k == VGLOBAL);
+ return gcK(fs, obj2gco(e->u.sval), LJ_TSTR);
+}
+
+GCstr *lj_parse_keepstr(LexState *ls, const char *str, size_t len)
+{
+ lua_State *L = ls->L;
+ GCstr *s = lj_str_new(L, str, len);
+ TValue *tv = lj_tab_setstr(L, ls->fs->kt, s);
+ if (tvisnil(tv)) setboolV(tv, 1); /* Anchor string to avoid GC. */
+ return s;
+}
+
+static void keep_token(LexState *ls)
+{
+ if (ls->token == TK_name || ls->token == TK_string) {
+ TValue *tv = lj_tab_setstr(ls->L, ls->fs->kt, strV(&ls->tokenval));
+ if (tvisnil(tv)) setboolV(tv, 1); /* Anchor string to avoid GC. */
+ }
+}
+
+static void nilK(FuncState *fs, BCReg from, BCReg n)
+{
+ BCIns *pr;
+ if (fs->pc > fs->lasttarget) { /* no jumps to current position? */
+ BCReg pfrom, pto;
+ pr = &fs->pt->bc[fs->pc-1];
+ pfrom = bc_a(*pr);
+ switch (bc_op(*pr)) {
+ case BC_KPRI:
+ if (bc_d(*pr) != ~LJ_TNIL) break;
+ if (from == pfrom) {
+ if (n == 1) return;
+ } else if (from == pfrom+1) {
+ from = pfrom;
+ n++;
+ } else {
+ break;
+ }
+ fs->pc--;
+ break;
+ case BC_KNIL:
+ pto = bc_d(*pr);
+ if (pfrom <= from && from <= pto+1) { /* can connect both? */
+ if (from+n-1 > pto)
+ setbc_d(pr, from+n-1);
+ return;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ emitINS(fs, n == 1 ? BCINS_AD(BC_KPRI, from, priKk(VKNIL))
+ : BCINS_AD(BC_KNIL, from, from+n-1));
+}
+
+/* -- Code emitter: registers --------------------------------------------- */
+
+static void checkframe(FuncState *fs, BCReg n)
+{
+ BCReg sz = fs->freereg + n;
+ if (sz > fs->pt->framesize) {
+ if (sz >= LJ_MAX_SLOTS)
+ err_syntax(fs->ls, LJ_ERR_XSLOTS);
+ fs->pt->framesize = cast_byte(sz);
+ }
+}
+
+static void reserveregs(FuncState *fs, BCReg n)
+{
+ checkframe(fs, n);
+ fs->freereg += n;
+}
+
+static void freereg(FuncState *fs, BCReg reg)
+{
+ if (reg >= fs->nactvar) {
+ fs->freereg--;
+ lua_assert(reg == fs->freereg);
+ }
+}
+
+static void freeexp(FuncState *fs, ExpDesc *e)
+{
+ if (e->k == VNONRELOC)
+ freereg(fs, e->u.s.info);
+}
+
+/* -- Code emitter: expressions ------------------------------------------- */
+
+static void dischargevars(FuncState *fs, ExpDesc *e)
+{
+ BCIns ins;
+ switch (e->k) {
+ case VUPVAL:
+ ins = BCINS_AD(BC_UGET, 0, e->u.s.info);
+ break;
+ case VGLOBAL:
+ ins = BCINS_AD(BC_GGET, 0, strK(fs, e));
+ break;
+ case VINDEXED: {
+ /* TGET[VSB] key = reg, string const or byte const */
+ BCReg rc = e->u.s.aux;
+ if ((int32_t)rc < 0) {
+ ins = BCINS_ABC(BC_TGETS, 0, e->u.s.info, ~rc);
+ } else if (rc > BCMAX_C) {
+ ins = BCINS_ABC(BC_TGETB, 0, e->u.s.info, rc-(BCMAX_C+1));
+ } else {
+ freereg(fs, rc);
+ ins = BCINS_ABC(BC_TGETV, 0, e->u.s.info, rc);
+ }
+ freereg(fs, e->u.s.info);
+ break;
+ }
+ case VCALL:
+ e->u.s.info = e->u.s.aux;
+ /* fallthrough */
+ case VLOCAL:
+ e->k = VNONRELOC;
+ /* fallthrough */
+ default:
+ return;
+ }
+ e->u.s.info = emitINS(fs, ins);
+ e->k = VRELOCABLE;
+}
+
+static void discharge2reg(FuncState *fs, ExpDesc *e, BCReg reg)
+{
+ BCIns ins;
+ dischargevars(fs, e);
+ switch (e->k) {
+ case VKNIL: case VKFALSE: case VKTRUE:
+ ins = BCINS_AD(BC_KPRI, reg, priK(e));
+ break;
+ case VKSTR:
+ ins = BCINS_AD(BC_KSTR, reg, strK(fs, e));
+ break;
+ case VKNUM: {
+ lua_Number n = expnumV(e);
+ int32_t k = lj_num2int(n);
+ if (checki16(k) && n == cast_num(k))
+ ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)k);
+ else
+ ins = BCINS_AD(BC_KNUM, reg, numK(fs, e));
+ break;
+ }
+ case VRELOCABLE:
+ setbc_a(bcptr(fs, e), reg);
+ goto noins;
+ case VNONRELOC:
+ if (reg == e->u.s.info)
+ goto noins;
+ ins = BCINS_AD(BC_MOV, reg, e->u.s.info);
+ break;
+ default:
+ lua_assert(e->k == VVOID || e->k == VJMP);
+ return; /* nothing to do... */
+ }
+ emitINS(fs, ins);
+noins:
+ e->u.s.info = reg;
+ e->k = VNONRELOC;
+}
+
+static void exp2reg(FuncState *fs, ExpDesc *e, BCReg reg)
+{
+ discharge2reg(fs, e, reg);
+ if (e->k == VJMP)
+ concatjumps(fs, &e->t, e->u.s.info); /* put this jump in `t' list */
+ if (hasjumps(e)) {
+ BCPos final; /* position after whole expression */
+ BCPos p_f = NO_JMP; /* position of an eventual LOAD false */
+ BCPos p_t = NO_JMP; /* position of an eventual LOAD true */
+ if (need_value(fs, e->t) || need_value(fs, e->f)) {
+ BCPos fj = (e->k == VJMP) ? NO_JMP : emit_jump(fs);
+ p_f = emitAD(fs, BC_KPRI, reg, priKk(VKFALSE));
+ emitAJ(fs, BC_JMP, fs->freereg, 1);
+ p_t = emitAD(fs, BC_KPRI, reg, priKk(VKTRUE));
+ patchtohere(fs, fj);
+ }
+ final = fs->pc;
+ fs->lasttarget = final;
+ patchlistaux(fs, e->f, final, reg, p_f);
+ patchlistaux(fs, e->t, final, reg, p_t);
+ }
+ e->f = e->t = NO_JMP;
+ e->u.s.info = reg;
+ e->k = VNONRELOC;
+}
+
+static void exp2nextreg(FuncState *fs, ExpDesc *e)
+{
+ dischargevars(fs, e);
+ freeexp(fs, e);
+ reserveregs(fs, 1);
+ exp2reg(fs, e, fs->freereg - 1);
+}
+
+static BCReg exp2anyreg(FuncState *fs, ExpDesc *e)
+{
+ dischargevars(fs, e);
+ if (e->k == VNONRELOC) {
+ if (!hasjumps(e)) return e->u.s.info; /* exp is already in a register */
+ if (e->u.s.info >= fs->nactvar) { /* reg. is not a local? */
+ exp2reg(fs, e, e->u.s.info); /* put value on it */
+ return e->u.s.info;
+ }
+ }
+ exp2nextreg(fs, e); /* default */
+ return e->u.s.info;
+}
+
+static void exp2val(FuncState *fs, ExpDesc *e)
+{
+ if (hasjumps(e))
+ exp2anyreg(fs, e);
+ else
+ dischargevars(fs, e);
+}
+
+static void storevar(FuncState *fs, ExpDesc *var, ExpDesc *e)
+{
+ BCIns ins;
+ switch (var->k) {
+ case VLOCAL:
+ freeexp(fs, e);
+ exp2reg(fs, e, var->u.s.info);
+ return;
+ case VUPVAL:
+ exp2val(fs, e);
+ switch (e->k) {
+ case VKNIL: case VKFALSE: case VKTRUE:
+ ins = BCINS_AD(BC_USETP, var->u.s.info, priK(e));
+ break;
+ case VKSTR:
+ ins = BCINS_AD(BC_USETS, var->u.s.info, strK(fs, e));
+ break;
+ case VKNUM:
+ ins = BCINS_AD(BC_USETN, var->u.s.info, numK(fs, e));
+ break;
+ default:
+ ins = BCINS_AD(BC_USETV, var->u.s.info, exp2anyreg(fs, e));
+ break;
+ }
+ break;
+ case VGLOBAL: {
+ BCReg ra = exp2anyreg(fs, e);
+ ins = BCINS_AD(BC_GSET, ra, strK(fs, var));
+ break;
+ }
+ case VINDEXED: {
+ /* TSET[VSB] key = reg, string const or byte const */
+ BCReg ra = exp2anyreg(fs, e);
+ BCReg rc = var->u.s.aux;
+ if ((int32_t)rc < 0) {
+ ins = BCINS_ABC(BC_TSETS, ra, var->u.s.info, ~rc);
+ } else if (rc > BCMAX_C) {
+ ins = BCINS_ABC(BC_TSETB, ra, var->u.s.info, rc-(BCMAX_C+1));
+ } else {
+ /* Free late alloced key reg to avoid assert on free of value reg. */
+ /* This can only happen when called from constructor(). */
+ lua_assert(e->k != VNONRELOC || ra < fs->nactvar ||
+ rc < ra || (freereg(fs, rc),1));
+ ins = BCINS_ABC(BC_TSETV, ra, var->u.s.info, rc);
+ }
+ break;
+ }
+ default:
+ lua_assert(0); /* invalid var kind to store */
+ return;
+ }
+ emitINS(fs, ins);
+ freeexp(fs, e);
+}
+
+static void indexexp(FuncState *fs, ExpDesc *t, ExpDesc *e)
+{
+ /* already called: exp2val(fs, e) */
+ t->k = VINDEXED;
+ if (isnumK(e)) {
+ lua_Number n = expnumV(e);
+ int32_t k = lj_num2int(n);
+ if (checku8(k) && n == cast_num(k)) {
+ t->u.s.aux = BCMAX_C+1+(uint32_t)k; /* 256..511: const byte key */
+ return;
+ }
+ } else if (isstrK(e)) {
+ BCReg idx = strK(fs, e);
+ if (idx <= BCMAX_C) {
+ t->u.s.aux = ~idx; /* -256..-1: const string key */
+ return;
+ }
+ }
+ t->u.s.aux = exp2anyreg(fs, e); /* 0..255: register */
+}
+
+static void methodexp(FuncState *fs, ExpDesc *e, ExpDesc *key)
+{
+ BCReg idx, func, tab = exp2anyreg(fs, e);
+ freeexp(fs, e);
+ func = fs->freereg;
+ emitAD(fs, BC_MOV, func+1, tab);
+ lua_assert(isstrK(key));
+ idx = strK(fs, key);
+ if (idx <= BCMAX_C) {
+ reserveregs(fs, 2);
+ emitABC(fs, BC_TGETS, func, tab, idx);
+ } else {
+ reserveregs(fs, 3);
+ emitAD(fs, BC_KSTR, func+2, idx);
+ emitABC(fs, BC_TGETV, func, tab, func+2);
+ fs->freereg--;
+ }
+ e->u.s.info = func;
+ e->k = VNONRELOC;
+}
+
+/* -- Code emitter: conditionals ------------------------------------------ */
+
+static void invertjump(FuncState *fs, ExpDesc *e)
+{
+ BCIns *i = bcptr(fs, e) - 1;
+ setbc_op(i, bc_op(*i)^1);
+}
+
+static BCPos jumponcond(FuncState *fs, ExpDesc *e, int cond)
+{
+ if (e->k == VRELOCABLE) {
+ BCIns *i = bcptr(fs, e);
+ if (bc_op(*i) == BC_NOT) {
+ *i = BCINS_AD(cond ? BC_ISF : BC_IST, 0, bc_d(*i));
+ return emit_jump(fs);
+ }
+ /* else go through */
+ }
+ if (e->k != VNONRELOC) {
+ reserveregs(fs, 1);
+ discharge2reg(fs, e, fs->freereg-1);
+ }
+ freeexp(fs, e);
+ emitAD(fs, cond ? BC_ISTC : BC_ISFC, NO_REG, e->u.s.info);
+ return emit_jump(fs);
+}
+
+static void goiftrue(FuncState *fs, ExpDesc *e)
+{
+ BCPos pc; /* PC of last jump. */
+ dischargevars(fs, e);
+ switch (e->k) {
+ case VKSTR: case VKNUM: case VKTRUE:
+ pc = NO_JMP; /* always true; do nothing */
+ break;
+ case VJMP:
+ invertjump(fs, e);
+ pc = e->u.s.info;
+ break;
+ case VKFALSE:
+ if (!hasjumps(e)) {
+ pc = emit_jump(fs); /* always jump */
+ break;
+ }
+ /* fallthrough */
+ default:
+ pc = jumponcond(fs, e, 0);
+ break;
+ }
+ concatjumps(fs, &e->f, pc); /* insert last jump in `f' list */
+ patchtohere(fs, e->t);
+ e->t = NO_JMP;
+}
+
+static void goiffalse(FuncState *fs, ExpDesc *e)
+{
+ BCPos pc; /* PC of last jump. */
+ dischargevars(fs, e);
+ switch (e->k) {
+ case VKNIL: case VKFALSE:
+ pc = NO_JMP; /* always false; do nothing */
+ break;
+ case VJMP:
+ pc = e->u.s.info;
+ break;
+ case VKTRUE:
+ if (!hasjumps(e)) {
+ pc = emit_jump(fs); /* always jump */
+ break;
+ }
+ /* fallthrough */
+ default:
+ pc = jumponcond(fs, e, 1);
+ break;
+ }
+ concatjumps(fs, &e->t, pc); /* insert last jump in `t' list */
+ patchtohere(fs, e->f);
+ e->f = NO_JMP;
+}
+
+/* -- Code emitter: operators --------------------------------------------- */
+
+static int foldarith(BinOpr opr, ExpDesc *e1, ExpDesc *e2)
+{
+ TValue o;
+ if (!isnumKexp(e1) || !isnumKexp(e2)) return 0;
+ setnumV(&o, lj_vm_foldarith(expnumV(e1), expnumV(e2), (int)opr-OPR_ADD));
+ if (tvisnan(&o) || tvismzero(&o)) return 0; /* Avoid NaN and -0 as consts. */
+ setnumV(&e1->u.nval, numV(&o));
+ return 1;
+}
+
+static void codearith(FuncState *fs, BinOpr opr, ExpDesc *e1, ExpDesc *e2)
+{
+ BCReg rb, rc, t;
+ uint32_t op;
+ if (foldarith(opr, e1, e2))
+ return;
+ if (opr == OPR_POW) {
+ op = BC_POW;
+ rc = exp2anyreg(fs, e2);
+ rb = exp2anyreg(fs, e1);
+ } else {
+ op = opr-OPR_ADD+BC_ADDVV;
+ /* must discharge 2nd operand first since VINDEXED might free regs */
+ exp2val(fs, e2);
+ if (isnumK(e2) && (rc = numK(fs, e2)) <= BCMAX_C)
+ op -= BC_ADDVV-BC_ADDVN;
+ else
+ rc = exp2anyreg(fs, e2);
+ /* emit_prebinop discharges 1st operand, but may need to use KNUM/KSHORT */
+ lua_assert(isnumK(e1) || e1->k == VNONRELOC);
+ exp2val(fs, e1);
+ /* avoid two consts to satisfy bytecode constraints */
+ if (isnumK(e1) && !isnumK(e2) && (t = numK(fs, e1)) <= BCMAX_B) {
+ rb = rc; rc = t; op -= BC_ADDVV-BC_ADDNV;
+ } else {
+ rb = exp2anyreg(fs, e1);
+ }
+ }
+ /* using freeexp might cause asserts if the order is wrong */
+ if (e1->k == VNONRELOC && e1->u.s.info >= fs->nactvar) fs->freereg--;
+ if (e2->k == VNONRELOC && e2->u.s.info >= fs->nactvar) fs->freereg--;
+ e1->u.s.info = emitABC(fs, op, 0, rb, rc);
+ e1->k = VRELOCABLE;
+}
+
+static void codecomp(FuncState *fs, BinOpr opr, ExpDesc *e1, ExpDesc *e2)
+{
+ ExpDesc *eret = e1;
+ BCIns ins;
+ exp2val(fs, e1);
+ if (opr == OPR_EQ || opr == OPR_NE) {
+ BCOp op = opr == OPR_EQ ? BC_ISEQV : BC_ISNEV;
+ BCReg ra;
+ if (isK(e1)) { e1 = e2; e2 = eret; } /* need constant in 2nd arg */
+ ra = exp2anyreg(fs, e1); /* first arg must be in a reg */
+ exp2val(fs, e2);
+ switch (e2->k) {
+ case VKNIL: case VKFALSE: case VKTRUE:
+ ins = BCINS_AD(op+(BC_ISEQP-BC_ISEQV), ra, priK(e2));
+ break;
+ case VKSTR:
+ ins = BCINS_AD(op+(BC_ISEQS-BC_ISEQV), ra, strK(fs, e2));
+ break;
+ case VKNUM:
+ ins = BCINS_AD(op+(BC_ISEQN-BC_ISEQV), ra, numK(fs, e2));
+ break;
+ default:
+ ins = BCINS_AD(op, ra, exp2anyreg(fs, e2));
+ break;
+ }
+ } else {
+ uint32_t op = opr-OPR_LT+BC_ISLT;
+ BCReg ra;
+ if ((op-BC_ISLT) & 1) { /* GT -> LT, GE -> LE */
+ e1 = e2; e2 = eret; /* swap operands */
+ op = ((op-BC_ISLT)^3)+BC_ISLT;
+ }
+ ra = exp2anyreg(fs, e1);
+ ins = BCINS_AD(op, ra, exp2anyreg(fs, e2));
+ }
+ /* using freeexp might cause asserts if the order is wrong */
+ if (e1->k == VNONRELOC && e1->u.s.info >= fs->nactvar) fs->freereg--;
+ if (e2->k == VNONRELOC && e2->u.s.info >= fs->nactvar) fs->freereg--;
+ emitINS(fs, ins);
+ eret->u.s.info = emit_jump(fs);
+ eret->k = VJMP;
+}
+
+static void emit_unop(FuncState *fs, UnOpr uop, ExpDesc *e)
+{
+ BCOp op = BC_LEN;
+ switch (uop) {
+ case OPR_MINUS:
+ if (isnumKexp(e) && expnumV(e) != 0) { /* Avoid const-folding to -0. */
+ setnumV(&e->u.nval, -expnumV(e));
+ return;
+ }
+ op = BC_UNM;
+ /* fallthrough */
+ case OPR_LEN:
+ exp2anyreg(fs, e);
+ break;
+ case OPR_NOT:
+ /* interchange true and false lists */
+ { BCPos temp = e->f; e->f = e->t; e->t = temp; }
+ removevalues(fs, e->f);
+ removevalues(fs, e->t);
+ dischargevars(fs, e);
+ switch (e->k) {
+ case VKNIL: case VKFALSE:
+ e->k = VKTRUE;
+ return;
+ case VKSTR: case VKNUM: case VKTRUE:
+ e->k = VKFALSE;
+ return;
+ case VJMP:
+ invertjump(fs, e);
+ return;
+ case VRELOCABLE:
+ reserveregs(fs, 1);
+ setbc_a(bcptr(fs, e), fs->freereg-1);
+ e->u.s.info = fs->freereg-1;
+ e->k = VNONRELOC;
+ break;
+ case VNONRELOC:
+ break;
+ default: lua_assert(0); return;
+ }
+ op = BC_NOT;
+ break;
+ default: lua_assert(0); return;
+ }
+ freeexp(fs, e);
+ e->u.s.info = emitAD(fs, op, 0, e->u.s.info);
+ e->k = VRELOCABLE;
+}
+
+static void prepare_binop(FuncState *fs, BinOpr op, ExpDesc *e)
+{
+ switch (op) {
+ case OPR_AND:
+ goiftrue(fs, e);
+ break;
+ case OPR_OR:
+ goiffalse(fs, e);
+ break;
+ case OPR_CONCAT:
+ exp2nextreg(fs, e); /* operand must be on the `stack' */
+ break;
+ case OPR_EQ: case OPR_NE:
+ if (!isKexp(e)) exp2anyreg(fs, e);
+ break;
+ default:
+ if (!isnumKexp(e)) exp2anyreg(fs, e);
+ break;
+ }
+}
+
+static void emit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2)
+{
+ switch (op) {
+ case OPR_AND:
+ lua_assert(e1->t == NO_JMP); /* list must be closed */
+ dischargevars(fs, e2);
+ concatjumps(fs, &e2->f, e1->f);
+ *e1 = *e2;
+ break;
+ case OPR_OR:
+ lua_assert(e1->f == NO_JMP); /* list must be closed */
+ dischargevars(fs, e2);
+ concatjumps(fs, &e2->t, e1->t);
+ *e1 = *e2;
+ break;
+ case OPR_CONCAT:
+ exp2val(fs, e2);
+ if (e2->k == VRELOCABLE && bc_op(*bcptr(fs, e2)) == BC_CAT) {
+ lua_assert(e1->u.s.info == bc_b(*bcptr(fs, e2))-1);
+ freeexp(fs, e1);
+ setbc_b(bcptr(fs, e2), e1->u.s.info);
+ e1->u.s.info = e2->u.s.info;
+ } else {
+ exp2nextreg(fs, e2);
+ freeexp(fs, e2);
+ freeexp(fs, e1);
+ e1->u.s.info = emitABC(fs, BC_CAT, 0, e1->u.s.info, e2->u.s.info);
+ }
+ e1->k = VRELOCABLE;
+ break;
+ case OPR_ADD: case OPR_SUB: case OPR_MUL:
+ case OPR_DIV: case OPR_MOD: case OPR_POW:
+ codearith(fs, op, e1, e2);
+ break;
+ case OPR_EQ: case OPR_NE:
+ case OPR_LT: case OPR_LE: case OPR_GT: case OPR_GE:
+ codecomp(fs, op, e1, e2);
+ break;
+ default: lua_assert(0); break;
+ }
+}
+
+/* -- Lexer support ------------------------------------------------------- */
+
+static int testnext(LexState *ls, LexToken tok)
+{
+ if (ls->token == tok) {
+ lj_lex_next(ls);
+ return 1;
+ }
+ return 0;
+}
+
+static void checknext(LexState *ls, LexToken tok)
+{
+ if (ls->token != tok)
+ err_token(ls, tok);
+ lj_lex_next(ls);
+}
+
+static void checkmatch(LexState *ls, LexToken what, LexToken who, BCLine line)
+{
+ if (!testnext(ls, what)) {
+ if (line == ls->linenumber) {
+ err_token(ls, what);
+ } else {
+ const char *swhat = lj_lex_token2str(ls, what);
+ const char *swho = lj_lex_token2str(ls, who);
+ lj_lex_error(ls, ls->token, LJ_ERR_XMATCH, swhat, swho, line);
+ }
+ }
+}
+
+static GCstr *str_checkname(LexState *ls)
+{
+ GCstr *s;
+ if (ls->token != TK_name)
+ err_token(ls, TK_name);
+ s = strV(&ls->tokenval);
+ lj_lex_next(ls);
+ return s;
+}
+
+static void init_exp(ExpDesc *e, ExpKind k, uint32_t info)
+{
+ e->k = k;
+ e->u.s.info = info;
+ e->f = e->t = NO_JMP;
+}
+
+static void checkname(LexState *ls, ExpDesc *e)
+{
+ init_exp(e, VKSTR, 0);
+ e->u.sval = str_checkname(ls);
+}
+
+/* -- Variable handling --------------------------------------------------- */
+
+#define getlocvar(fs, i) ((fs)->pt->varinfo[(fs)->actvar[(i)]])
+
+static BCReg registerlocalvar(LexState *ls, GCstr *name)
+{
+ FuncState *fs = ls->fs;
+ GCproto *pt = fs->pt;
+ if (LJ_UNLIKELY(fs->nlocvars >= pt->sizevarinfo)) {
+ MSize oldsize = pt->sizevarinfo;
+ checklimit(fs, fs->nlocvars, 32767, "local variables");
+ lj_mem_growvec(fs->L, pt->varinfo, pt->sizevarinfo, 32767, VarInfo);
+ while (oldsize < pt->sizevarinfo) pt->varinfo[oldsize++].name = NULL;
+ }
+ pt->varinfo[fs->nlocvars].name = name;
+ lj_gc_objbarrier(ls->L, pt, name);
+ return fs->nlocvars++;
+}
+
+static void new_localvar(LexState *ls, GCstr *name, BCReg n)
+{
+ FuncState *fs = ls->fs;
+ checklimit(fs, fs->nactvar+n, LJ_MAX_LOCVAR, "local variables");
+ fs->actvar[fs->nactvar+n] = cast(uint16_t, registerlocalvar(ls, name));
+}
+
+#define new_localvarliteral(ls,v,n) \
+ new_localvar(ls, lj_parse_keepstr(ls, "" v, sizeof(v)-1), n)
+
+static void adjustlocalvars(LexState *ls, BCReg nvars)
+{
+ FuncState *fs = ls->fs;
+ fs->nactvar = cast_byte(fs->nactvar + nvars);
+ for (; nvars; nvars--)
+ getlocvar(fs, fs->nactvar - nvars).startpc = fs->pc;
+}
+
+static void removevars(LexState *ls, BCReg tolevel)
+{
+ FuncState *fs = ls->fs;
+ while (fs->nactvar > tolevel)
+ getlocvar(fs, --fs->nactvar).endpc = fs->pc;
+}
+
+static uint32_t indexupvalue(FuncState *fs, GCstr *name, ExpDesc *v)
+{
+ uint32_t i;
+ GCproto *pt = fs->pt;
+ for (i = 0; i < fs->nuv; i++) {
+ if (fs->upvalues[i].k == v->k && fs->upvalues[i].info == v->u.s.info) {
+ lua_assert(pt->uvname[i] == name);
+ return i;
+ }
+ }
+ /* Not found, create a new upvalue for this name. */
+ if (LJ_UNLIKELY(fs->nuv >= pt->sizeuvname)) {
+ MSize oldsize = pt->sizeuvname;
+ checklimit(fs, fs->nuv, LJ_MAX_UPVAL, "upvalues");
+ lj_mem_growvec(fs->L, pt->uvname, pt->sizeuvname, LJ_MAX_UPVAL, GCstr *);
+ while (oldsize < pt->sizeuvname) pt->uvname[oldsize++] = NULL;
+ }
+ pt->uvname[fs->nuv] = name;
+ lj_gc_objbarrier(fs->L, pt, name);
+ lua_assert(v->k == VLOCAL || v->k == VUPVAL);
+ fs->upvalues[fs->nuv].k = cast_byte(v->k);
+ fs->upvalues[fs->nuv].info = cast_byte(v->u.s.info);
+ return fs->nuv++;
+}
+
+static BCReg searchvar(FuncState *fs, GCstr *n)
+{
+ int i;
+ for (i = fs->nactvar-1; i >= 0; i--) {
+ if (n == getlocvar(fs, i).name)
+ return (BCReg)i;
+ }
+ return (BCReg)-1; /* Not found. */
+}
+
+static void markupval(FuncState *fs, BCReg level)
+{
+ FuncBlock *bl = fs->bl;
+ while (bl && bl->nactvar > level) bl = bl->previous;
+ if (bl) bl->upval = 1;
+}
+
+static int singlevaraux(FuncState *fs, GCstr *name, ExpDesc *e, int first)
+{
+ if (fs == NULL) { /* no more levels? */
+ init_exp(e, VGLOBAL, 0); /* default is global variable */
+ e->u.sval = name;
+ return 1;
+ } else {
+ BCReg reg = searchvar(fs, name); /* look up at current level */
+ if ((int32_t)reg >= 0) {
+ init_exp(e, VLOCAL, reg);
+ if (!first)
+ markupval(fs, reg); /* local will be used as an upval */
+ return 0;
+ } else { /* not found at current level; try upper one */
+ if (singlevaraux(fs->prev, name, e, 0)) /* global? */
+ return 1;
+ e->u.s.info = indexupvalue(fs, name, e); /* else was local or upvalue */
+ e->k = VUPVAL; /* upvalue in this level */
+ return 0;
+ }
+ }
+}
+
+#define singlevar(ls, e) singlevaraux((ls)->fs, str_checkname(ls), (e), 1)
+
+static void adjust_assign(LexState *ls, BCReg nvars, BCReg nexps, ExpDesc *e)
+{
+ FuncState *fs = ls->fs;
+ int32_t extra = (int32_t)nvars - (int32_t)nexps;
+ if (e->k == VCALL) {
+ extra++; /* includes call itself */
+ if (extra < 0) extra = 0;
+ setbc_b(bcptr(fs, e), extra+1);
+ if (extra > 1) reserveregs(fs, (BCReg)extra-1);
+ } else {
+ if (e->k != VVOID) exp2nextreg(fs, e); /* close last expression */
+ if (extra > 0) {
+ BCReg reg = fs->freereg;
+ reserveregs(fs, (BCReg)extra);
+ nilK(fs, reg, (BCReg)extra);
+ }
+ }
+}
+
+/* -- Function handling --------------------------------------------------- */
+
+/* Forward declaration. */
+static void chunk(LexState *ls);
+
+static void open_func(LexState *ls, FuncState *fs)
+{
+ lua_State *L = ls->L;
+ GCproto *pt = lj_func_newproto(L);
+ fs->pt = pt;
+ fs->prev = ls->fs; /* linked list of funcstates */
+ fs->ls = ls;
+ fs->L = L;
+ ls->fs = fs;
+ fs->pc = 0;
+ fs->lasttarget = 0;
+ fs->jpc = NO_JMP;
+ fs->freereg = 0;
+ fs->nkgc = 0;
+ fs->nkn = 0;
+ fs->nlocvars = 0;
+ fs->nactvar = 0;
+ fs->nuv = 0;
+ fs->bl = NULL;
+ pt->chunkname = ls->chunkname;
+ pt->framesize = 2; /* registers 0/1 are always valid */
+ fs->kt = lj_tab_new(L, 0, 0);
+ /* anchor table of constants and prototype (to avoid being collected) */
+ settabV(L, L->top, fs->kt);
+ incr_top(L);
+ setprotoV(L, L->top, pt);
+ incr_top(L);
+}
+
+static void collectk(FuncState *fs, GCproto *pt)
+{
+ GCtab *kt;
+ TValue *array;
+ Node *node;
+ BCReg nkgc;
+ MSize i, hmask, sizek;
+ GCRef *kstart;
+ checklimitgt(fs, fs->nkn, BCMAX_D+1, "constants");
+ checklimitgt(fs, fs->nkgc, BCMAX_D+1, "constants");
+ nkgc = round_nkgc(fs->nkgc);
+ sizek = (MSize)(nkgc*sizeof(MRef) + fs->nkn*sizeof(lua_Number));
+ kstart = lj_mem_newt(fs->L, sizek, GCRef);
+ if (nkgc) setgcrefnull(kstart[0]); /* May be uninitialized otherwise. */
+ pt->k.gc = kstart + nkgc;
+ pt->sizekn = fs->nkn;
+ pt->sizekgc = fs->nkgc;
+ kt = fs->kt;
+ array = tvref(kt->array);
+ for (i = 0; i < kt->asize; i++)
+ if (tvisnum(&array[i]))
+ pt->k.n[array[i].u32.lo] = cast_num(i);
+ node = noderef(kt->node);
+ hmask = kt->hmask;
+ for (i = 0; i <= hmask; i++) {
+ Node *n = &node[i];
+ if (tvisnum(&n->val)) {
+ ptrdiff_t kidx = (ptrdiff_t)n->val.u32.lo;
+ if (tvisnum(&n->key)) {
+ pt->k.n[kidx] = numV(&n->key);
+ } else {
+ GCobj *o = gcV(&n->key);
+ setgcref(pt->k.gc[~kidx], o);
+ lj_gc_objbarrier(fs->L, pt, o);
+ }
+ }
+ }
+}
+
+static void collectuv(FuncState *fs, GCproto *pt)
+{
+ uint32_t i;
+ pt->uv = lj_mem_newvec(fs->L, fs->nuv, int16_t);
+ pt->sizeuv = fs->nuv;
+ for (i = 0; i < pt->sizeuv; i++) {
+ uint32_t v = fs->upvalues[i].info;
+ if (fs->upvalues[i].k == VUPVAL) v = ~v;
+ pt->uv[i] = (int16_t)v;
+ }
+}
+
+static void finalret(FuncState *fs, GCproto *pt)
+{
+ BCPos lastpc = fs->pc;
+ if (lastpc > fs->lasttarget) {
+ switch (bc_op(pt->bc[lastpc-1])) {
+ case BC_CALLMT: case BC_CALLT:
+ case BC_RETM: case BC_RET: case BC_RET0: case BC_RET1:
+ goto suppress_return; /* already got a return */
+ default: break;
+ }
+ }
+ if (fs->pt->flags & PROTO_HAS_FNEW)
+ emitAJ(fs, BC_UCLO, 0, 0);
+ emitAD(fs, BC_RET0, 0, 1); /* final return */
+suppress_return:
+ /* may need to fixup returns encoded before first function was created */
+ if (fs->pt->flags & PROTO_FIXUP_RETURN) {
+ BCPos pc;
+ for (pc = 0; pc < lastpc; pc++) {
+ BCIns i = pt->bc[pc];
+ BCPos offset;
+ switch (bc_op(i)) {
+ case BC_CALLMT: case BC_CALLT:
+ case BC_RETM: case BC_RET: case BC_RET0: case BC_RET1:
+ offset = emitINS(fs, i)-(pc+1)+BCBIAS_J; /* copy return ins */
+ if (offset > BCMAX_D)
+ err_syntax(fs->ls, LJ_ERR_XFIXUP);
+ pt->bc[pc] = BCINS_AD(BC_UCLO, 0, offset); /* replace w/ UCLO+branch */
+ break;
+ case BC_UCLO: return; /* we're done */
+ default: break;
+ }
+ }
+ }
+}
+
+static void close_func(LexState *ls)
+{
+ lua_State *L = ls->L;
+ FuncState *fs = ls->fs;
+ GCproto *pt = fs->pt;
+ removevars(ls, 0);
+ finalret(fs, pt);
+ lj_mem_reallocvec(L, pt->bc, pt->sizebc, fs->pc, BCIns);
+ pt->sizebc = fs->pc;
+ collectk(fs, pt);
+ collectuv(fs, pt);
+ lj_mem_reallocvec(L, pt->lineinfo, pt->sizelineinfo, fs->pc, BCLine);
+ pt->sizelineinfo = fs->pc;
+ lj_mem_reallocvec(L, pt->varinfo, pt->sizevarinfo, fs->nlocvars, VarInfo);
+ pt->sizevarinfo = fs->nlocvars;
+ lj_mem_reallocvec(L, pt->uvname, pt->sizeuvname, fs->nuv, GCstr *);
+ pt->sizeuvname = fs->nuv;
+ lua_assert(fs->bl == NULL);
+ lj_vmevent_send(L, BC,
+ setprotoV(L, L->top++, pt);
+ );
+ ls->fs = fs->prev;
+ L->top -= 2; /* Remove table and prototype from the stack. */
+ lua_assert(ls->fs != NULL || ls->token == TK_eof);
+ keep_token(ls); /* Re-anchor last token. */
+}
+
+GCproto *lj_parse(LexState *ls)
+{
+ struct FuncState fs;
+ ls->level = 0;
+ open_func(ls, &fs);
+ fs.pt->flags |= PROTO_IS_VARARG; /* Main chunk is always a vararg func. */
+ lj_lex_next(ls); /* Read-ahead first token. */
+ chunk(ls);
+ if (ls->token != TK_eof)
+ err_token(ls, TK_eof);
+ fs.pt->lastlinedefined = ls->linenumber;
+ close_func(ls);
+ lua_assert(fs.prev == NULL);
+ lua_assert(fs.pt->sizeuv == 0);
+ lua_assert(ls->fs == NULL);
+ return fs.pt;
+}
+
+/* -- Expressions --------------------------------------------------------- */
+
+/* forward declaration */
+static void expr(LexState *ls, ExpDesc *v);
+
+static void field(LexState *ls, ExpDesc *v)
+{
+ /* field -> ['.' | ':'] NAME */
+ FuncState *fs = ls->fs;
+ ExpDesc key;
+ exp2anyreg(fs, v);
+ lj_lex_next(ls); /* skip the dot or colon */
+ checkname(ls, &key);
+ indexexp(fs, v, &key);
+}
+
+static void yindex(LexState *ls, ExpDesc *v)
+{
+ /* index -> '[' expr ']' */
+ lj_lex_next(ls); /* skip the '[' */
+ expr(ls, v);
+ exp2val(ls->fs, v);
+ checknext(ls, ']');
+}
+
+static void kexp2tv(TValue *v, ExpDesc *e)
+{
+ switch (e->k) {
+ case VKNIL: case VKFALSE: case VKTRUE: v->it = ~(int32_t)e->k; break;
+ case VKSTR:
+ setgcref(v->gcr, obj2gco(e->u.sval)); v->it = LJ_TSTR; break;
+ case VKNUM: setnumV(v, expnumV(e)); break;
+ default: lua_assert(0); break;
+ }
+}
+
+static void constructor(LexState *ls, ExpDesc *e)
+{
+ FuncState *fs = ls->fs;
+ BCLine line = ls->linenumber;
+ GCtab *t = NULL;
+ int vcall = 0, needarr = 0;
+ int32_t narr = 1; /* first array index */
+ uint32_t nhash = 0; /* number of hash entries */
+ BCReg freg = fs->freereg;
+ BCPos pc = emitAD(fs, BC_TNEW, freg, 0);
+ init_exp(e, VNONRELOC, freg);
+ reserveregs(fs, 1);
+ freg++;
+ checknext(ls, '{');
+ while (ls->token != '}') {
+ ExpDesc key, val;
+ vcall = 0;
+ if (ls->token == '[') {
+ yindex(ls, &key); /* already calls exp2val */
+ if (!isK(&key)) indexexp(fs, e, &key);
+ if (isnumK(&key) && expnumV(&key) == 0) needarr = 1; else nhash++;
+ checknext(ls, '=');
+ } else if (ls->token == TK_name && lj_lex_lookahead(ls) == '=') {
+ checkname(ls, &key);
+ checknext(ls, '=');
+ nhash++;
+ } else {
+ init_exp(&key, VKNUM, 0);
+ setintV(&key.u.nval, narr);
+ narr++;
+ needarr = vcall = 1;
+ }
+ expr(ls, &val);
+ if (isKexp(&val) && isK(&key) && key.k != VKNIL) {
+ TValue k;
+ if (!t) { /* create template table on demand */
+ BCReg kidx;
+ t = lj_tab_new(fs->L, 0, 0);
+ kidx = gcK(fs, obj2gco(t), LJ_TTAB);
+ fs->pt->bc[pc] = BCINS_AD(BC_TDUP, freg-1, kidx);
+ }
+ vcall = 0;
+ kexp2tv(&k, &key);
+ kexp2tv(lj_tab_set(fs->L, t, &k), &val);
+ if (val.k == VKSTR)
+ lj_gc_objbarriert(fs->L, t, val.u.sval);
+ } else {
+ if (isK(&key)) indexexp(fs, e, &key);
+ if (val.k != VCALL) vcall = 0;
+ storevar(fs, e, &val);
+ }
+ fs->freereg = freg;
+ if (!testnext(ls, ',') && !testnext(ls, ';')) break;
+ }
+ checkmatch(ls, '}', '{', line);
+ if (vcall) {
+ BCIns *i = &fs->pt->bc[fs->pc-1];
+ ExpDesc en;
+ lua_assert(bc_a(*i)==freg && bc_op(*i) == (narr>256?BC_TSETV:BC_TSETB));
+ init_exp(&en, VKNUM, 0);
+ setintV(&en.u.nval, narr-1);
+ if (narr > 256) { fs->pc--; i--; }
+ *i = BCINS_AD(BC_TSETM, freg, numK(fs, &en));
+ setbc_b(i-1, 0);
+ }
+ if (pc == fs->pc-1) { /* make expr relocable if possible */
+ e->u.s.info = pc;
+ fs->freereg--;
+ e->k = VRELOCABLE;
+ } else {
+ e->k = VNONRELOC; /* indexexp may have changed it */
+ }
+ if (!t) { /* Construct TNEW RD: hhhhhaaaaaaaaaaa. */
+ if (!needarr) narr = 0;
+ else if (narr < 3) narr = 3;
+ else if (narr > 0x7ff) narr = 0x7ff;
+ setbc_d(&fs->pt->bc[pc], (uint32_t)narr | (hsize2hbits(nhash) << 11));
+ }
+}
+
+static void parlist(LexState *ls)
+{
+ /* parlist -> [ param { `,' param } ] */
+ FuncState *fs = ls->fs;
+ GCproto *pt = fs->pt;
+ BCReg nparams = 0;
+ if (ls->token != ')') { /* is `parlist' not empty? */
+ do {
+ switch (ls->token) {
+ case TK_name: /* param -> NAME */
+ new_localvar(ls, str_checkname(ls), nparams++);
+ break;
+ case TK_dots: /* param -> `...' */
+ lj_lex_next(ls);
+ pt->flags |= PROTO_IS_VARARG;
+ break;
+ default:
+ err_syntax(ls, LJ_ERR_XPARAM);
+ break;
+ }
+ } while (!(pt->flags & PROTO_IS_VARARG) && testnext(ls, ','));
+ }
+ adjustlocalvars(ls, nparams);
+ pt->numparams = cast_byte(fs->nactvar);
+ reserveregs(fs, fs->nactvar); /* reserve register for parameters */
+}
+
+static void body(LexState *ls, ExpDesc *e, int needself, BCLine line)
+{
+ /* body -> `(' parlist `)' chunk END */
+ FuncState *fs, new_fs;
+ BCReg kidx;
+ open_func(ls, &new_fs);
+ new_fs.pt->linedefined = line;
+ checknext(ls, '(');
+ if (needself) {
+ new_localvarliteral(ls, "self", 0);
+ adjustlocalvars(ls, 1);
+ }
+ parlist(ls);
+ checknext(ls, ')');
+ chunk(ls);
+ new_fs.pt->lastlinedefined = ls->linenumber;
+ checkmatch(ls, TK_end, TK_function, line);
+ close_func(ls);
+ fs = ls->fs;
+ kidx = gcK(fs, obj2gco(new_fs.pt), LJ_TPROTO);
+ init_exp(e, VRELOCABLE, emitAD(fs, BC_FNEW, 0, kidx));
+ if (!(fs->pt->flags & PROTO_HAS_FNEW)) {
+ if (fs->pt->flags & PROTO_HAS_RETURN)
+ fs->pt->flags |= PROTO_FIXUP_RETURN;
+ fs->pt->flags |= PROTO_HAS_FNEW;
+ }
+}
+
+static BCReg explist1(LexState *ls, ExpDesc *v)
+{
+ /* explist1 -> expr { `,' expr } */
+ BCReg n = 1; /* at least one expression */
+ expr(ls, v);
+ while (testnext(ls, ',')) {
+ exp2nextreg(ls->fs, v);
+ expr(ls, v);
+ n++;
+ }
+ return n;
+}
+
+static void funcargs(LexState *ls, ExpDesc *e)
+{
+ FuncState *fs = ls->fs;
+ ExpDesc args;
+ BCIns ins;
+ BCReg base;
+ BCLine line = ls->linenumber;
+ switch (ls->token) {
+ case '(': { /* funcargs -> `(' [ explist1 ] `)' */
+ if (line != ls->lastline)
+ err_syntax(ls, LJ_ERR_XAMBIG);
+ lj_lex_next(ls);
+ if (ls->token == ')') { /* arg list is empty? */
+ args.k = VVOID;
+ } else {
+ explist1(ls, &args);
+ if (args.k == VCALL)
+ setbc_b(bcptr(fs, &args), 0);
+ }
+ checkmatch(ls, ')', '(', line);
+ break;
+ }
+ case '{': { /* funcargs -> constructor */
+ constructor(ls, &args);
+ break;
+ }
+ case TK_string: { /* funcargs -> STRING */
+ init_exp(&args, VKSTR, 0);
+ args.u.sval = strV(&ls->tokenval);
+ lj_lex_next(ls); /* must use `seminfo' before `next' */
+ break;
+ }
+ default: {
+ err_syntax(ls, LJ_ERR_XFUNARG);
+ return;
+ }
+ }
+ lua_assert(e->k == VNONRELOC);
+ base = e->u.s.info; /* base register for call */
+ if (args.k == VCALL) {
+ ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1);
+ } else {
+ if (args.k != VVOID)
+ exp2nextreg(fs, &args); /* close last argument */
+ ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base);
+ }
+ init_exp(e, VCALL, emitINS(fs, ins));
+ e->u.s.aux = base;
+ fs->pt->lineinfo[fs->pc - 1] = line;
+ fs->freereg = base+1; /* call removes function and arguments and leaves
+ (unless changed) one result */
+}
+
+static void prefixexp(LexState *ls, ExpDesc *v)
+{
+ /* prefixexp -> NAME | '(' expr ')' */
+ switch (ls->token) {
+ case '(': {
+ BCLine line = ls->linenumber;
+ lj_lex_next(ls);
+ expr(ls, v);
+ checkmatch(ls, ')', '(', line);
+ dischargevars(ls->fs, v);
+ return;
+ }
+ case TK_name: {
+ singlevar(ls, v);
+ return;
+ }
+ default: {
+ err_syntax(ls, LJ_ERR_XSYMBOL);
+ return;
+ }
+ }
+}
+
+static void primaryexp(LexState *ls, ExpDesc *v)
+{
+ /* primaryexp ->
+ prefixexp { `.' NAME | `[' exp `]' | `:' NAME funcargs | funcargs } */
+ FuncState *fs = ls->fs;
+ prefixexp(ls, v);
+ for (;;) {
+ switch (ls->token) {
+ case '.': /* field */
+ field(ls, v);
+ break;
+ case '[': { /* `[' exp1 `]' */
+ ExpDesc key;
+ exp2anyreg(fs, v);
+ yindex(ls, &key);
+ indexexp(fs, v, &key);
+ break;
+ }
+ case ':': { /* `:' NAME funcargs */
+ ExpDesc key;
+ lj_lex_next(ls);
+ checkname(ls, &key);
+ methodexp(fs, v, &key);
+ funcargs(ls, v);
+ break;
+ }
+ case '(': case TK_string: case '{': /* funcargs */
+ exp2nextreg(fs, v);
+ funcargs(ls, v);
+ break;
+ default: return;
+ }
+ }
+}
+
+static void simpleexp(LexState *ls, ExpDesc *v)
+{
+ /* simpleexp -> NUMBER | STRING | NIL | true | false | ... |
+ constructor | FUNCTION body | primaryexp */
+ switch (ls->token) {
+ case TK_number:
+ init_exp(v, VKNUM, 0);
+ setnumV(&v->u.nval, numV(&ls->tokenval));
+ break;
+ case TK_string:
+ init_exp(v, VKSTR, 0);
+ v->u.sval = strV(&ls->tokenval);
+ break;
+ case TK_nil:
+ init_exp(v, VKNIL, 0);
+ break;
+ case TK_true:
+ init_exp(v, VKTRUE, 0);
+ break;
+ case TK_false:
+ init_exp(v, VKFALSE, 0);
+ break;
+ case TK_dots: { /* vararg */
+ FuncState *fs = ls->fs;
+ BCReg base;
+ checkcond(ls, fs->pt->flags & PROTO_IS_VARARG, LJ_ERR_XDOTS);
+ reserveregs(fs, 1);
+ base = fs->freereg-1;
+ init_exp(v, VCALL, emitABC(fs, BC_VARG, base, 2, 1));
+ v->u.s.aux = base;
+ break;
+ }
+ case '{': /* constructor */
+ constructor(ls, v);
+ return;
+ case TK_function:
+ lj_lex_next(ls);
+ body(ls, v, 0, ls->linenumber);
+ return;
+ default:
+ primaryexp(ls, v);
+ return;
+ }
+ lj_lex_next(ls);
+}
+
+static void enterlevel(LexState *ls)
+{
+ if (++ls->level >= LJ_MAX_XLEVEL)
+ lj_lex_error(ls, 0, LJ_ERR_XLEVELS);
+}
+
+#define leavelevel(ls) ((ls)->level--)
+
+static UnOpr getunopr(LexToken tok)
+{
+ switch (tok) {
+ case TK_not: return OPR_NOT;
+ case '-': return OPR_MINUS;
+ case '#': return OPR_LEN;
+ default: return OPR_NOUNOPR;
+ }
+}
+
+static BinOpr getbinopr(LexToken tok)
+{
+ switch (tok) {
+ case '+': return OPR_ADD;
+ case '-': return OPR_SUB;
+ case '*': return OPR_MUL;
+ case '/': return OPR_DIV;
+ case '%': return OPR_MOD;
+ case '^': return OPR_POW;
+ case TK_concat: return OPR_CONCAT;
+ case TK_ne: return OPR_NE;
+ case TK_eq: return OPR_EQ;
+ case '<': return OPR_LT;
+ case TK_le: return OPR_LE;
+ case '>': return OPR_GT;
+ case TK_ge: return OPR_GE;
+ case TK_and: return OPR_AND;
+ case TK_or: return OPR_OR;
+ default: return OPR_NOBINOPR;
+ }
+}
+
+static const struct {
+ uint8_t left; /* left priority for each binary operator */
+ uint8_t right; /* right priority */
+} priority[] = { /* ORDER OPR */
+ {6,6}, {6,6}, {7,7}, {7,7}, {7,7}, /* ADD SUB MUL DIV MOD */
+ {10,9}, {5,4}, /* POW CONCAT (right associative) */
+ {3,3}, {3,3}, /* EQ NE */
+ {3,3}, {3,3}, {3,3}, {3,3}, /* LT GE GT LE */
+ {2,2}, {1,1} /* AND OR */
+};
+
+#define UNARY_PRIORITY 8 /* priority for unary operators */
+
+/*
+** subexpr -> (simpleexp | unop subexpr) { binop subexpr }
+** where `binop' is any binary operator with a priority higher than `limit'
+*/
+static BinOpr subexpr(LexState *ls, ExpDesc *v, uint32_t limit)
+{
+ BinOpr op;
+ UnOpr uop;
+ enterlevel(ls);
+ uop = getunopr(ls->token);
+ if (uop != OPR_NOUNOPR) {
+ lj_lex_next(ls);
+ subexpr(ls, v, UNARY_PRIORITY);
+ emit_unop(ls->fs, uop, v);
+ } else {
+ simpleexp(ls, v);
+ }
+ /* expand while operators have priorities higher than `limit' */
+ op = getbinopr(ls->token);
+ while (op != OPR_NOBINOPR && priority[op].left > limit) {
+ ExpDesc v2;
+ BinOpr nextop;
+ lj_lex_next(ls);
+ prepare_binop(ls->fs, op, v);
+ /* read sub-expression with higher priority */
+ nextop = subexpr(ls, &v2, priority[op].right);
+ emit_binop(ls->fs, op, v, &v2);
+ op = nextop;
+ }
+ leavelevel(ls);
+ return op; /* return first untreated operator */
+}
+
+static void expr(LexState *ls, ExpDesc *v)
+{
+ subexpr(ls, v, 0);
+}
+
+static BCPos condexpr(LexState *ls)
+{
+ /* cond -> exp */
+ ExpDesc v;
+ expr(ls, &v); /* read condition */
+ if (v.k == VKNIL) v.k = VKFALSE; /* `falses' are all equal here */
+ goiftrue(ls->fs, &v);
+ return v.f;
+}
+
+/* -- Scope handling ------------------------------------------------------ */
+
+static void enterblock(FuncState *fs, FuncBlock *bl, int isbreakable)
+{
+ bl->breaklist = NO_JMP;
+ bl->isbreakable = (uint8_t)isbreakable;
+ bl->nactvar = fs->nactvar;
+ bl->upval = 0;
+ bl->previous = fs->bl;
+ fs->bl = bl;
+ lua_assert(fs->freereg == fs->nactvar);
+}
+
+static void leaveblock(FuncState *fs)
+{
+ FuncBlock *bl = fs->bl;
+ fs->bl = bl->previous;
+ removevars(fs->ls, bl->nactvar);
+ fs->freereg = fs->nactvar; /* free registers */
+ lua_assert(bl->nactvar == fs->nactvar);
+ /* a block either controls scope or breaks (never both) */
+ lua_assert(!bl->isbreakable || !bl->upval);
+ if (bl->upval)
+ emitAJ(fs, BC_UCLO, bl->nactvar, 0);
+ else /* avoid in upval case, it clears lasttarget and kills UCLO+JMP join */
+ patchtohere(fs, bl->breaklist);
+}
+
+static void block(LexState *ls)
+{
+ /* block -> chunk */
+ FuncState *fs = ls->fs;
+ FuncBlock bl;
+ enterblock(fs, &bl, 0);
+ chunk(ls);
+ lua_assert(bl.breaklist == NO_JMP);
+ leaveblock(fs);
+}
+
+/* -- Statements ---------------------------------------------------------- */
+
+/*
+** structure to chain all variables in the left-hand side of an
+** assignment
+*/
+struct LHS_assign {
+ ExpDesc v; /* variable (global, local, upvalue, or indexed) */
+ struct LHS_assign *prev;
+};
+
+/*
+** check whether, in an assignment to a local variable, the local variable
+** is needed in a previous assignment (to a table). If so, save original
+** local value in a safe place and use this safe copy in the previous
+** assignment.
+*/
+static void check_conflict(LexState *ls, struct LHS_assign *lh,
+ const ExpDesc *v)
+{
+ FuncState *fs = ls->fs;
+ BCReg reg = fs->freereg; /* eventual position to save local variable */
+ int conflict = 0;
+ for (; lh; lh = lh->prev) {
+ if (lh->v.k == VINDEXED) {
+ if (lh->v.u.s.info == v->u.s.info) { /* conflict? */
+ conflict = 1;
+ lh->v.u.s.info = reg; /* previous assignment will use safe copy */
+ }
+ if (lh->v.u.s.aux == v->u.s.info) { /* conflict? */
+ conflict = 1;
+ lh->v.u.s.aux = reg; /* previous assignment will use safe copy */
+ }
+ }
+ }
+ if (conflict) {
+ emitAD(fs, BC_MOV, reg, v->u.s.info); /* make copy */
+ reserveregs(fs, 1);
+ }
+}
+
+static void assignment(LexState *ls, struct LHS_assign *lh, BCReg nvars)
+{
+ ExpDesc e;
+ checkcond(ls, VLOCAL <= lh->v.k && lh->v.k <= VINDEXED, LJ_ERR_XSYNTAX);
+ if (testnext(ls, ',')) { /* assignment -> `,' primaryexp assignment */
+ struct LHS_assign nv;
+ nv.prev = lh;
+ primaryexp(ls, &nv.v);
+ if (nv.v.k == VLOCAL)
+ check_conflict(ls, lh, &nv.v);
+ checklimit(ls->fs, ls->level + nvars, LJ_MAX_XLEVEL, "variable names");
+ assignment(ls, &nv, nvars+1);
+ } else { /* assignment -> `=' explist1 */
+ BCReg nexps;
+ checknext(ls, '=');
+ nexps = explist1(ls, &e);
+ if (nexps == nvars) {
+ if (e.k == VCALL) {
+ if (bc_op(*bcptr(ls->fs, &e)) == BC_VARG) {
+ ls->fs->freereg--;
+ e.k = VRELOCABLE;
+ } else {
+ e.u.s.info = e.u.s.aux;
+ e.k = VNONRELOC;
+ }
+ }
+ storevar(ls->fs, &lh->v, &e);
+ return;
+ }
+ adjust_assign(ls, nvars, nexps, &e);
+ if (nexps > nvars)
+ ls->fs->freereg -= nexps - nvars; /* remove extra values */
+ }
+ init_exp(&e, VNONRELOC, ls->fs->freereg-1); /* default assignment */
+ storevar(ls->fs, &lh->v, &e);
+}
+
+static void breakstat(LexState *ls)
+{
+ FuncState *fs = ls->fs;
+ FuncBlock *bl = fs->bl;
+ int upval = 0;
+ while (bl && !bl->isbreakable) {
+ upval |= bl->upval;
+ bl = bl->previous;
+ }
+ if (!bl)
+ err_syntax(ls, LJ_ERR_XBREAK);
+ if (upval)
+ emitAJ(fs, BC_UCLO, bl->nactvar, 0);
+ concatjumps(fs, &bl->breaklist, emit_jump(fs));
+}
+
+static void whilestat(LexState *ls, BCLine line)
+{
+ /* whilestat -> WHILE cond DO block END */
+ FuncState *fs = ls->fs;
+ BCPos start, loop, condexit;
+ FuncBlock bl;
+ lj_lex_next(ls); /* skip WHILE */
+ start = fs->lasttarget = fs->pc;
+ condexit = condexpr(ls);
+ enterblock(fs, &bl, 1);
+ checknext(ls, TK_do);
+ loop = emitAD(fs, BC_LOOP, fs->nactvar, 0);
+ block(ls);
+ patchlist(fs, emit_jump(fs), start);
+ checkmatch(ls, TK_end, TK_while, line);
+ leaveblock(fs);
+ patchtohere(fs, condexit); /* false conditions finish the loop */
+ fixjump(fs, loop, fs->pc);
+}
+
+static void repeatstat(LexState *ls, BCLine line)
+{
+ /* repeatstat -> REPEAT block UNTIL cond */
+ FuncState *fs = ls->fs;
+ BCPos loop = fs->lasttarget = fs->pc;
+ BCPos condexit;
+ FuncBlock bl1, bl2;
+ enterblock(fs, &bl1, 1); /* loop block */
+ enterblock(fs, &bl2, 0); /* scope block */
+ lj_lex_next(ls); /* skip REPEAT */
+ emitAD(fs, BC_LOOP, fs->nactvar, 0);
+ chunk(ls);
+ checkmatch(ls, TK_until, TK_repeat, line);
+ condexit = condexpr(ls); /* read condition (inside scope block) */
+ if (!bl2.upval) { /* no upvalues? */
+ leaveblock(fs); /* finish scope */
+ } else { /* complete semantics when there are upvalues */
+ breakstat(ls); /* if condition then break */
+ patchtohere(fs, condexit); /* else... */
+ leaveblock(fs); /* finish scope... */
+ condexit = emit_jump(fs); /* and repeat */
+ }
+ patchlist(fs, condexit, loop); /* close the loop */
+ fixjump(fs, loop, fs->pc);
+ leaveblock(fs); /* finish loop */
+}
+
+static void exp1(LexState *ls)
+{
+ ExpDesc e;
+ expr(ls, &e);
+ exp2nextreg(ls->fs, &e);
+}
+
+static void forbody(LexState *ls, BCReg base, BCLine line, BCReg nvars,
+ int isnum)
+{
+ /* forbody -> DO block */
+ FuncBlock bl;
+ FuncState *fs = ls->fs;
+ BCPos loop, loopend;
+ adjustlocalvars(ls, 3); /* control variables */
+ checknext(ls, TK_do);
+ loop = isnum ? emitAJ(fs, BC_FORI, base, NO_JMP) :
+ emitAJ(fs, BC_JMP, fs->freereg, NO_JMP);
+ enterblock(fs, &bl, 0); /* scope for declared variables */
+ adjustlocalvars(ls, nvars);
+ reserveregs(fs, nvars);
+ block(ls);
+ leaveblock(fs); /* end of scope for declared variables */
+ if (isnum) {
+ loopend = emitAJ(fs, BC_FORL, base, NO_JMP);
+ fixjump(fs, loop, fs->pc);
+ } else {
+ fixjump(fs, loop, fs->pc);
+ emitABC(fs, BC_ITERC, base+3, nvars+1, 2+1);
+ loopend = emitAJ(fs, BC_ITERL, base+3, NO_JMP);
+ fs->pt->lineinfo[loopend-1] = line;
+ }
+ fs->pt->lineinfo[loopend] = line; /* pretend last op starts the loop */
+ fixjump(fs, loopend, loop+1);
+}
+
+static void fornum(LexState *ls, GCstr *varname, BCLine line)
+{
+ /* fornum -> NAME = exp1,exp1[,exp1] forbody */
+ FuncState *fs = ls->fs;
+ BCReg base = fs->freereg;
+ new_localvarliteral(ls, "(for index)", FORL_IDX);
+ new_localvarliteral(ls, "(for limit)", FORL_STOP);
+ new_localvarliteral(ls, "(for step)", FORL_STEP);
+ new_localvar(ls, varname, FORL_EXT);
+ checknext(ls, '=');
+ exp1(ls); /* initial value */
+ checknext(ls, ',');
+ exp1(ls); /* limit */
+ if (testnext(ls, ',')) {
+ exp1(ls); /* optional step */
+ } else { /* default step = 1 */
+ emitAD(fs, BC_KSHORT, fs->freereg, 1);
+ reserveregs(fs, 1);
+ }
+ forbody(ls, base, line, 1, 1);
+}
+
+static void forlist(LexState *ls, GCstr *indexname)
+{
+ /* forlist -> NAME {,NAME} IN explist1 forbody */
+ FuncState *fs = ls->fs;
+ ExpDesc e;
+ BCReg nvars = 0;
+ BCLine line;
+ BCReg base = fs->freereg;
+ /* create control variables */
+ new_localvarliteral(ls, "(for generator)", nvars++);
+ new_localvarliteral(ls, "(for state)", nvars++);
+ new_localvarliteral(ls, "(for control)", nvars++);
+ /* create declared variables */
+ new_localvar(ls, indexname, nvars++);
+ while (testnext(ls, ','))
+ new_localvar(ls, str_checkname(ls), nvars++);
+ checknext(ls, TK_in);
+ line = ls->linenumber;
+ adjust_assign(ls, 3, explist1(ls, &e), &e);
+ checkframe(fs, 3); /* extra space to call generator */
+ forbody(ls, base, line, nvars - 3, 0);
+}
+
+static void forstat(LexState *ls, BCLine line)
+{
+ /* forstat -> FOR (fornum | forlist) END */
+ FuncState *fs = ls->fs;
+ GCstr *varname;
+ FuncBlock bl;
+ enterblock(fs, &bl, 1); /* scope for loop and control variables */
+ lj_lex_next(ls); /* skip `for' */
+ varname = str_checkname(ls); /* first variable name */
+ switch (ls->token) {
+ case '=': fornum(ls, varname, line); break;
+ case ',': case TK_in: forlist(ls, varname); break;
+ default: err_syntax(ls, LJ_ERR_XFOR);
+ }
+ checkmatch(ls, TK_end, TK_for, line);
+ leaveblock(fs); /* loop scope (`break' jumps to this point) */
+}
+
+static BCPos test_then_block(LexState *ls)
+{
+ /* test_then_block -> [IF | ELSEIF] cond THEN block */
+ BCPos condexit;
+ lj_lex_next(ls); /* skip IF or ELSEIF */
+ condexit = condexpr(ls);
+ checknext(ls, TK_then);
+ block(ls); /* `then' part */
+ return condexit;
+}
+
+static void ifstat(LexState *ls, BCLine line)
+{
+ /* ifstat -> IF cond THEN block {ELSEIF cond THEN block} [ELSE block] END */
+ FuncState *fs = ls->fs;
+ BCPos flist;
+ BCPos escapelist = NO_JMP;
+ flist = test_then_block(ls); /* IF cond THEN block */
+ while (ls->token == TK_elseif) {
+ concatjumps(fs, &escapelist, emit_jump(fs));
+ patchtohere(fs, flist);
+ flist = test_then_block(ls); /* ELSEIF cond THEN block */
+ }
+ if (ls->token == TK_else) {
+ concatjumps(fs, &escapelist, emit_jump(fs));
+ patchtohere(fs, flist);
+ lj_lex_next(ls); /* skip ELSE (after patch, for correct line info) */
+ block(ls); /* `else' part */
+ } else {
+ concatjumps(fs, &escapelist, flist);
+ }
+ patchtohere(fs, escapelist);
+ checkmatch(ls, TK_end, TK_if, line);
+}
+
+static void localfunc(LexState *ls)
+{
+ ExpDesc v, b;
+ FuncState *fs = ls->fs;
+ new_localvar(ls, str_checkname(ls), 0);
+ init_exp(&v, VLOCAL, fs->freereg);
+ reserveregs(fs, 1);
+ adjustlocalvars(ls, 1);
+ body(ls, &b, 0, ls->linenumber);
+ storevar(fs, &v, &b);
+ /* debug information will only see the variable after this point! */
+ getlocvar(fs, fs->nactvar - 1).startpc = fs->pc;
+}
+
+static void localstat(LexState *ls)
+{
+ /* stat -> LOCAL NAME {`,' NAME} [`=' explist1] */
+ BCReg nvars = 0;
+ BCReg nexps;
+ ExpDesc e;
+ do {
+ new_localvar(ls, str_checkname(ls), nvars++);
+ } while (testnext(ls, ','));
+ if (testnext(ls, '=')) {
+ nexps = explist1(ls, &e);
+ } else {
+ e.k = VVOID;
+ nexps = 0;
+ }
+ adjust_assign(ls, nvars, nexps, &e);
+ adjustlocalvars(ls, nvars);
+}
+
+static int func_name(LexState *ls, ExpDesc *v)
+{
+ /* func_name -> NAME {field} [`:' NAME] */
+ int needself = 0;
+ singlevar(ls, v);
+ while (ls->token == '.')
+ field(ls, v);
+ if (ls->token == ':') {
+ needself = 1;
+ field(ls, v);
+ }
+ return needself;
+}
+
+static void funcstat(LexState *ls, BCLine line)
+{
+ /* funcstat -> FUNCTION func_name body */
+ FuncState *fs;
+ int needself;
+ ExpDesc v, b;
+ lj_lex_next(ls); /* skip FUNCTION */
+ needself = func_name(ls, &v);
+ body(ls, &b, needself, line);
+ fs = ls->fs;
+ storevar(fs, &v, &b);
+ fs->pt->lineinfo[fs->pc - 1] = line;
+}
+
+static void exprstat(LexState *ls)
+{
+ /* stat -> func | assignment */
+ FuncState *fs = ls->fs;
+ struct LHS_assign v;
+ primaryexp(ls, &v.v);
+ if (v.v.k == VCALL) { /* stat -> func */
+ setbc_b(bcptr(fs, &v.v), 1); /* call statement uses no results */
+ } else { /* stat -> assignment */
+ v.prev = NULL;
+ assignment(ls, &v, 1);
+ }
+}
+
+static int block_follow(LexToken token)
+{
+ switch (token) {
+ case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static void retstat(LexState *ls)
+{
+ /* stat -> RETURN explist */
+ BCIns ins;
+ FuncState *fs = ls->fs;
+ lj_lex_next(ls); /* skip RETURN */
+ fs->pt->flags |= PROTO_HAS_RETURN;
+ if (block_follow(ls->token) || ls->token == ';') {
+ ins = BCINS_AD(BC_RET0, 0, 1); /* return no values */
+ } else {
+ ExpDesc e;
+ BCReg nret = explist1(ls, &e); /* optional return values */
+ if (nret == 1) {
+ if (e.k == VCALL) {
+ BCIns *i = bcptr(fs, &e);
+ /* It doesn't pay off to add BC_VARGT just for 'return ...'. */
+ if (bc_op(*i) == BC_VARG) goto notailcall;
+ fs->pc--;
+ ins = BCINS_AD(bc_op(*i)-BC_CALL+BC_CALLT, bc_a(*i), bc_c(*i));
+ } else {
+ ins = BCINS_AD(BC_RET1, exp2anyreg(fs, &e), 2);
+ }
+ } else {
+ if (e.k == VCALL) {
+ notailcall:
+ setbc_b(bcptr(fs, &e), 0);
+ ins = BCINS_AD(BC_RETM, fs->nactvar, e.u.s.aux - fs->nactvar);
+ } else {
+ exp2nextreg(fs, &e); /* values must go to the `stack' */
+ ins = BCINS_AD(BC_RET, fs->nactvar, nret+1);
+ }
+ }
+ }
+ if (fs->pt->flags & PROTO_HAS_FNEW)
+ emitAJ(fs, BC_UCLO, 0, 0);
+ emitINS(fs, ins);
+}
+
+static int statement(LexState *ls)
+{
+ BCLine line = ls->linenumber; /* may be needed for error messages */
+ switch (ls->token) {
+ case TK_if:
+ ifstat(ls, line);
+ return 0;
+ case TK_while:
+ whilestat(ls, line);
+ return 0;
+ case TK_do:
+ lj_lex_next(ls); /* skip DO */
+ block(ls);
+ checkmatch(ls, TK_end, TK_do, line);
+ return 0;
+ case TK_for:
+ forstat(ls, line);
+ return 0;
+ case TK_repeat:
+ repeatstat(ls, line);
+ return 0;
+ case TK_function:
+ funcstat(ls, line);
+ return 0;
+ case TK_local:
+ lj_lex_next(ls); /* skip LOCAL */
+ if (testnext(ls, TK_function)) /* local function? */
+ localfunc(ls);
+ else
+ localstat(ls);
+ return 0;
+ case TK_return:
+ retstat(ls);
+ return 1; /* must be last statement */
+ case TK_break:
+ lj_lex_next(ls); /* skip BREAK */
+ breakstat(ls);
+ return 1; /* must be last statement */
+ default:
+ exprstat(ls);
+ return 0;
+ }
+}
+
+static void chunk(LexState *ls)
+{
+ /* chunk -> { stat [`;'] } */
+ int islast = 0;
+ enterlevel(ls);
+ while (!islast && !block_follow(ls->token)) {
+ islast = statement(ls);
+ testnext(ls, ';');
+ lua_assert(ls->fs->pt->framesize >= ls->fs->freereg &&
+ ls->fs->freereg >= ls->fs->nactvar);
+ ls->fs->freereg = ls->fs->nactvar; /* free registers */
+ }
+ leavelevel(ls);
+}
+
diff --git a/src/lj_parse.h b/src/lj_parse.h
new file mode 100644
index 00000000..72aac2c6
--- /dev/null
+++ b/src/lj_parse.h
@@ -0,0 +1,15 @@
+/*
+** Lua parser (source code -> bytecode).
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_PARSE_H
+#define _LJ_PARSE_H
+
+#include "lj_obj.h"
+#include "lj_lex.h"
+
+LJ_FUNC GCproto *lj_parse(LexState *ls);
+LJ_FUNC GCstr *lj_parse_keepstr(LexState *ls, const char *str, size_t l);
+
+#endif
diff --git a/src/lj_record.c b/src/lj_record.c
new file mode 100644
index 00000000..e101ba23
--- /dev/null
+++ b/src/lj_record.c
@@ -0,0 +1,2136 @@
+/*
+** Trace recorder (bytecode -> SSA IR).
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_record_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+#if LJ_HASJIT
+
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_state.h"
+#include "lj_frame.h"
+#include "lj_bc.h"
+#include "lj_ff.h"
+#include "lj_ir.h"
+#include "lj_jit.h"
+#include "lj_iropt.h"
+#include "lj_trace.h"
+#include "lj_record.h"
+#include "lj_snap.h"
+#include "lj_asm.h"
+#include "lj_dispatch.h"
+#include "lj_vm.h"
+
+/* Some local macros to save typing. Undef'd at the end. */
+#define IR(ref) (&J->cur.ir[(ref)])
+
+/* Pass IR on to next optimization in chain (FOLD). */
+#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
+
+/* Emit raw IR without passing through optimizations. */
+#define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
+
+/* Context for recording an indexed load/store. */
+typedef struct RecordIndex {
+ TValue tabv; /* Runtime value of table (or indexed object). */
+ TValue keyv; /* Runtime value of key. */
+ TValue valv; /* Runtime value of stored value. */
+ TValue mobjv; /* Runtime value of metamethod object. */
+ GCtab *mtv; /* Runtime value of metatable object. */
+ cTValue *oldv; /* Runtime value of previously stored value. */
+ TRef tab; /* Table (or indexed object) reference. */
+ TRef key; /* Key reference. */
+ TRef val; /* Value reference for a store or 0 for a load. */
+ TRef mt; /* Metatable reference. */
+ TRef mobj; /* Metamethod object reference. */
+ int idxchain; /* Index indirections left or 0 for raw lookup. */
+} RecordIndex;
+
+/* Requested results from rec_call(). */
+enum {
+ /* Non-negative numbers are number of requested results. */
+ CALLRES_MULTI = -1, /* Return multiple results. */
+ CALLRES_TAILCALL = -2, /* Tail call. */
+ CALLRES_PENDING = -3, /* Call is pending, no results yet. */
+ CALLRES_CONT = -4 /* Continuation call. */
+};
+
+/* Forward declarations. */
+static TRef rec_idx(jit_State *J, RecordIndex *ix);
+static int rec_call(jit_State *J, BCReg func, int cres, int nargs);
+
+/* -- Sanity checks ------------------------------------------------------- */
+
+#ifdef LUA_USE_ASSERT
+/* Sanity check the whole IR -- sloooow. */
+static void rec_check_ir(jit_State *J)
+{
+ IRRef i, nins = J->cur.nins, nk = J->cur.nk;
+ lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536);
+ for (i = nins-1; i >= nk; i--) {
+ IRIns *ir = IR(i);
+ uint32_t mode = lj_ir_mode[ir->o];
+ IRRef op1 = ir->op1;
+ IRRef op2 = ir->op2;
+ switch (irm_op1(mode)) {
+ case IRMnone: lua_assert(op1 == 0); break;
+ case IRMref: lua_assert(op1 >= nk);
+ lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break;
+ case IRMlit: break;
+ case IRMcst: lua_assert(i < REF_BIAS); continue;
+ }
+ switch (irm_op2(mode)) {
+ case IRMnone: lua_assert(op2 == 0); break;
+ case IRMref: lua_assert(op2 >= nk);
+ lua_assert(i >= REF_BIAS ? op2 < i : op2 > i); break;
+ case IRMlit: break;
+ case IRMcst: lua_assert(0); break;
+ }
+ if (ir->prev) {
+ lua_assert(ir->prev >= nk);
+ lua_assert(i >= REF_BIAS ? ir->prev < i : ir->prev > i);
+ lua_assert(IR(ir->prev)->o == ir->o);
+ }
+ }
+}
+
+/* Sanity check the slots. */
+static void rec_check_slots(jit_State *J)
+{
+ BCReg s, nslots = J->baseslot + J->maxslot;
+ lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS);
+ lua_assert(nslots < LJ_MAX_JSLOTS);
+ for (s = 0; s < nslots; s++) {
+ TRef tr = J->slot[s];
+ if (tr) {
+ IRRef ref = tref_ref(tr);
+ lua_assert(ref >= J->cur.nk && ref < J->cur.nins);
+ lua_assert(irt_t(IR(ref)->t) == tref_t(tr));
+ }
+ }
+}
+#endif
+
+/* -- Type handling and specialization ------------------------------------ */
+
+/* Note: these functions return tagged references (TRef). */
+
+/* Specialize a slot to a specific type. Note: slot can be negative! */
+static TRef sloadt(jit_State *J, int32_t slot, IRType t, int mode)
+{
+ /* No guard, since none of the callers need a type-checking SLOAD. */
+ TRef ref = emitir_raw(IRT(IR_SLOAD, t), (int32_t)J->baseslot+slot, mode);
+ J->base[slot] = ref;
+ return ref;
+}
+
+/* Specialize a slot to the runtime type. Note: slot can be negative! */
+static TRef sload(jit_State *J, int32_t slot)
+{
+ IRType t = itype2irt(&J->L->base[slot]);
+ TRef ref = emitir_raw(IRTG(IR_SLOAD, t), (int32_t)J->baseslot+slot, 0);
+ if (irtype_ispri(t)) ref = TREF_PRI(t); /* Canonicalize primitive refs. */
+ J->base[slot] = ref;
+ return ref;
+}
+
+/* Get TRef from slot. Load slot and specialize if not done already. */
+#define getslot(J, s) (J->base[(s)] ? J->base[(s)] : sload(J, (int32_t)(s)))
+
+/* Get TRef for current function. */
+static TRef getcurrf(jit_State *J)
+{
+ if (J->base[-1]) {
+ IRIns *ir = IR(tref_ref(J->base[-1]));
+ if (ir->o == IR_FRAME) /* Shortcut if already specialized. */
+ return TREF(ir->op2, IRT_FUNC); /* Return TRef of KFUNC. */
+ return J->base[-1];
+ } else {
+ lua_assert(J->baseslot == 1);
+ return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY);
+ }
+}
+
+/* Compare for raw object equality.
+** Returns 0 if the objects are the same.
+** Returns 1 if they are different, but the same type.
+** Returns 2 for two different types.
+** Comparisons between primitives always return 1 -- no caller cares about it.
+*/
+static int rec_objcmp(jit_State *J, TRef a, TRef b, cTValue *av, cTValue *bv)
+{
+ int diff = !lj_obj_equal(av, bv);
+ if (!tref_isk2(a, b)) { /* Shortcut, also handles primitives. */
+ IRType ta = tref_type(a);
+ IRType tb = tref_type(b);
+ if (ta != tb) {
+ /* Widen mixed number/int comparisons to number/number comparison. */
+ if (ta == IRT_INT && tb == IRT_NUM) {
+ a = emitir(IRTN(IR_TONUM), a, 0);
+ ta = IRT_NUM;
+ } else if (ta == IRT_NUM && tb == IRT_INT) {
+ b = emitir(IRTN(IR_TONUM), b, 0);
+ } else {
+ return 2; /* Two different types are never equal. */
+ }
+ }
+ emitir(IRTG(diff ? IR_NE : IR_EQ, ta), a, b);
+ }
+ return diff;
+}
+
+/* -- Record loop ops ----------------------------------------------------- */
+
+/* Loop event. */
+typedef enum {
+ LOOPEV_LEAVE, /* Loop is left or not entered. */
+ LOOPEV_ENTER /* Loop is entered. */
+} LoopEvent;
+
+/* Canonicalize slots: convert integers to numbers. */
+static void canonicalize_slots(jit_State *J)
+{
+ BCReg s;
+ for (s = J->baseslot+J->maxslot-1; s >= 1; s--) {
+ TRef tr = J->slot[s];
+ if (tref_isinteger(tr)) {
+ IRIns *ir = IR(tref_ref(tr));
+ if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY)))
+ J->slot[s] = emitir(IRTN(IR_TONUM), tr, 0);
+ }
+ }
+}
+
+/* Stop recording. */
+static void rec_stop(jit_State *J, TraceNo lnk)
+{
+ lj_trace_end(J);
+ J->cur.link = (uint16_t)lnk;
+ if (lnk == J->curtrace) { /* Looping back? */
+ if ((J->flags & JIT_F_OPT_LOOP)) /* Shall we try to create a loop? */
+ goto nocanon; /* Do not canonicalize or we lose the narrowing. */
+ if (J->cur.root) /* Otherwise ensure we always link to the root trace. */
+ J->cur.link = J->cur.root;
+ }
+ canonicalize_slots(J);
+nocanon:
+ /* Note: all loop ops must set J->pc to the following instruction! */
+ lj_snap_add(J); /* Add loop snapshot. */
+ J->needsnap = 0;
+ J->mergesnap = 1; /* In case recording continues. */
+}
+
+/* Peek before FORI to find a const initializer, otherwise load from slot. */
+static TRef fori_arg(jit_State *J, const BCIns *pc, BCReg slot, IRType t)
+{
+ /* A store to slot-1 means there's no conditional assignment for slot. */
+ if (bc_a(pc[-1]) == slot-1 && bcmode_a(bc_op(pc[-1])) == BCMdst) {
+ BCIns ins = pc[0];
+ if (bc_a(ins) == slot) {
+ if (bc_op(ins) == BC_KSHORT) {
+ int32_t k = (int32_t)(int16_t)bc_d(ins);
+ if (t == IRT_INT)
+ return lj_ir_kint(J, k);
+ else
+ return lj_ir_knum(J, cast_num(k));
+ } else if (bc_op(ins) == BC_KNUM) {
+ lua_Number n = J->pt->k.n[bc_d(ins)];
+ if (t == IRT_INT)
+ return lj_ir_kint(J, lj_num2int(n));
+ else
+ return lj_ir_knum(J, n);
+ }
+ }
+ }
+ if (J->base[slot])
+ return J->base[slot];
+ else
+ return sloadt(J, (int32_t)slot, t, IRSLOAD_READONLY|IRSLOAD_INHERIT);
+}
+
+/* Simulate the runtime behavior of the FOR loop iterator.
+** It's important to exactly reproduce the semantics of the interpreter.
+*/
+static LoopEvent for_iter(jit_State *J, IROp *op, BCReg ra, int isforl)
+{
+ cTValue *forbase = &J->L->base[ra];
+ lua_Number stopv = numV(&forbase[FORL_STOP]);
+ lua_Number idxv = numV(&forbase[FORL_IDX]);
+ if (isforl)
+ idxv += numV(&forbase[FORL_STEP]);
+ if ((int32_t)forbase[FORL_STEP].u32.hi >= 0) {
+ if (idxv <= stopv) { *op = IR_LE; return LOOPEV_ENTER; }
+ *op = IR_GT; return LOOPEV_LEAVE;
+ } else {
+ if (stopv <= idxv) { *op = IR_GE; return LOOPEV_ENTER; }
+ *op = IR_LT; return LOOPEV_LEAVE;
+ }
+}
+
+/* Record FORL/JFORL or FORI/JFORI. */
+static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
+{
+ BCReg ra = bc_a(*fori);
+ IROp op;
+ LoopEvent ev = for_iter(J, &op, ra, isforl);
+ TRef *tr = &J->base[ra];
+ TRef idx, stop;
+ IRType t;
+ if (isforl) { /* Handle FORL/JFORL opcodes. */
+ TRef step;
+ idx = tr[FORL_IDX];
+ if (!idx) idx = sloadt(J, (int32_t)(ra+FORL_IDX), IRT_NUM, 0);
+ t = tref_type(idx);
+ stop = fori_arg(J, fori-2, ra+FORL_STOP, t);
+ step = fori_arg(J, fori-1, ra+FORL_STEP, t);
+ tr[FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step);
+ } else { /* Handle FORI/JFORI opcodes. */
+ BCReg i;
+ t = IRT_NUM;
+ for (i = FORL_IDX; i <= FORL_STEP; i++) {
+ lua_assert(J->base[ra+i] != 0); /* Assumes the slots are already set. */
+ tr[i] = lj_ir_tonum(J, J->base[ra+i]);
+ }
+ idx = tr[FORL_IDX];
+ stop = tr[FORL_STOP];
+ if (!tref_isk(tr[FORL_STEP])) /* Non-const step: need direction guard. */
+ emitir(IRTG(((op-IR_LT)>>1)+IR_LT, IRT_NUM),
+ tr[FORL_STEP], lj_ir_knum_zero(J));
+ }
+
+ tr[FORL_EXT] = idx;
+ if (ev == LOOPEV_LEAVE) {
+ J->maxslot = ra+FORL_EXT+1;
+ J->pc = fori+1;
+ } else {
+ J->maxslot = ra;
+ J->pc = fori+bc_j(*fori)+1;
+ }
+ lj_snap_add(J);
+
+ emitir(IRTG(op, t), idx, stop);
+
+ if (ev == LOOPEV_LEAVE) {
+ J->maxslot = ra;
+ J->pc = fori+bc_j(*fori)+1;
+ } else {
+ J->maxslot = ra+FORL_EXT+1;
+ J->pc = fori+1;
+ }
+ J->needsnap = 1;
+ return ev;
+}
+
+/* Record ITERL/JITERL. */
+static LoopEvent rec_iterl(jit_State *J, const BCIns iterins)
+{
+ BCReg ra = bc_a(iterins);
+ lua_assert(J->base[ra] != 0);
+ if (!tref_isnil(J->base[ra])) { /* Looping back? */
+ J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */
+ J->maxslot = ra-1+bc_b(J->pc[-1]);
+ J->pc += bc_j(iterins)+1;
+ return LOOPEV_ENTER;
+ } else {
+ J->maxslot = ra-3;
+ J->pc++;
+ return LOOPEV_LEAVE;
+ }
+}
+
+/* Record LOOP/JLOOP. Now, that was easy. */
+static LoopEvent rec_loop(jit_State *J, BCReg ra)
+{
+ J->maxslot = ra;
+ J->pc++;
+ return LOOPEV_ENTER;
+}
+
+/* Check if a loop repeatedly failed to trace because it didn't loop back. */
+static int innerloopleft(jit_State *J, const BCIns *pc)
+{
+ ptrdiff_t i;
+ for (i = 0; i < PENALTY_SLOTS; i++)
+ if (J->penalty[i].pc == pc) {
+ if (J->penalty[i].reason == LJ_TRERR_LLEAVE &&
+ J->penalty[i].val >= 2*HOTCOUNT_MIN_PENALTY)
+ return 1;
+ break;
+ }
+ return 0;
+}
+
+/* Handle the case when an interpreted loop op is hit. */
+static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
+{
+ if (J->parent == 0) {
+ if (pc == J->startpc && J->framedepth == 0) { /* Same loop? */
+ if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */
+ lj_trace_err(J, LJ_TRERR_LLEAVE);
+ rec_stop(J, J->curtrace); /* Root trace forms a loop. */
+ } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */
+ /* It's usually better to abort here and wait until the inner loop
+ ** is traced. But if the inner loop repeatedly didn't loop back,
+ ** this indicates a low trip count. In this case try unrolling
+ ** an inner loop even in a root trace. But it's better to be a bit
+ ** more conservative here and only do it for very short loops.
+ */
+ if (!innerloopleft(J, pc))
+ lj_trace_err(J, LJ_TRERR_LINNER); /* Root trace hit an inner loop. */
+ if ((J->loopref && J->cur.nins - J->loopref > 8) || --J->loopunroll < 0)
+ lj_trace_err(J, LJ_TRERR_LUNROLL); /* Limit loop unrolling. */
+ J->loopref = J->cur.nins;
+ }
+ } else if (ev != LOOPEV_LEAVE) { /* Side trace enters an inner loop. */
+ J->loopref = J->cur.nins;
+ if (--J->loopunroll < 0)
+ lj_trace_err(J, LJ_TRERR_LUNROLL); /* Limit loop unrolling. */
+ } /* Side trace continues across a loop that's left or not entered. */
+}
+
+/* Handle the case when an already compiled loop op is hit. */
+static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev)
+{
+ if (J->parent == 0) { /* Root trace hit an inner loop. */
+ /* Better let the inner loop spawn a side trace back here. */
+ lj_trace_err(J, LJ_TRERR_LINNER);
+ } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */
+ J->instunroll = 0; /* Cannot continue across a compiled loop op. */
+ if (J->pc == J->startpc && J->framedepth == 0)
+ lnk = J->curtrace; /* Can form an extra loop. */
+ rec_stop(J, lnk); /* Link to the loop. */
+ } /* Side trace continues across a loop that's left or not entered. */
+}
+
+/* -- Metamethod handling ------------------------------------------------- */
+
+/* Prepare to record call to metamethod. */
+static BCReg rec_mm_prep(jit_State *J, ASMFunction cont)
+{
+ BCReg s, top = curr_proto(J->L)->framesize;
+ TRef trcont;
+ setcont(&J->L->base[top], cont);
+#if LJ_64
+ trcont = lj_ir_kptr(J, (void *)((int64_t)cont - (int64_t)lj_vm_asm_begin));
+#else
+ trcont = lj_ir_kptr(J, (void *)cont);
+#endif
+ J->base[top] = emitir(IRTG(IR_FRAME, IRT_PTR), trcont, trcont);
+ for (s = J->maxslot; s < top; s++)
+ J->base[s] = 0;
+ return top+1;
+}
+
+/* Record metamethod lookup. */
+static int rec_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
+{
+ RecordIndex mix;
+ GCtab *mt;
+ if (tref_istab(ix->tab)) {
+ mt = tabref(tabV(&ix->tabv)->metatable);
+ mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_TAB_META);
+ } else if (tref_isudata(ix->tab)) {
+ mt = tabref(udataV(&ix->tabv)->metatable);
+ mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_UDATA_META);
+ } else {
+ /* Specialize to base metatable. Must flush mcode in lua_setmetatable(). */
+ mt = tabref(J2G(J)->basemt[itypemap(&ix->tabv)]);
+ if (mt == NULL)
+ return 0; /* No metamethod. */
+ mix.tab = lj_ir_ktab(J, mt);
+ goto nocheck;
+ }
+ ix->mt = mix.tab;
+ emitir(IRTG(mt ? IR_NE : IR_EQ, IRT_TAB), mix.tab, lj_ir_knull(J, IRT_TAB));
+nocheck:
+ if (mt) {
+ GCstr *mmstr = strref(J2G(J)->mmname[mm]);
+ cTValue *mo = lj_tab_getstr(mt, mmstr);
+ if (mo && !tvisnil(mo))
+ copyTV(J->L, &ix->mobjv, mo);
+ ix->mtv = mt;
+ settabV(J->L, &mix.tabv, mt);
+ setstrV(J->L, &mix.keyv, mmstr);
+ mix.key = lj_ir_kstr(J, mmstr);
+ mix.val = 0;
+ mix.idxchain = 0;
+ ix->mobj = rec_idx(J, &mix);
+ return !tref_isnil(ix->mobj); /* 1 if metamethod found, 0 if not. */
+ }
+ return 0; /* No metamethod. */
+}
+
+/* Record call to arithmetic metamethod (and MM_len). */
+static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
+{
+ /* Set up metamethod call first to save ix->tab and ix->tabv. */
+ BCReg func = rec_mm_prep(J, lj_cont_ra);
+ TRef *base = J->base + func;
+ TValue *basev = J->L->base + func;
+ base[1] = ix->tab; base[2] = ix->key;
+ copyTV(J->L, basev+1, &ix->tabv);
+ copyTV(J->L, basev+2, &ix->keyv);
+ if (!rec_mm_lookup(J, ix, mm)) { /* Lookup metamethod on 1st operand. */
+ if (mm != MM_len) {
+ ix->tab = ix->key;
+ copyTV(J->L, &ix->tabv, &ix->keyv);
+ if (rec_mm_lookup(J, ix, mm)) /* Lookup metamethod on 2nd operand. */
+ goto ok;
+ }
+ lj_trace_err(J, LJ_TRERR_NOMM);
+ }
+ok:
+ base[0] = ix->mobj;
+ copyTV(J->L, basev+0, &ix->mobjv);
+ return rec_call(J, func, CALLRES_CONT, 2) ? J->base[func] : 0;
+}
+
+/* Call a comparison metamethod. */
+static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op)
+{
+ BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt);
+ TRef *base = J->base + func;
+ TValue *tv = J->L->base + func;
+ base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key;
+ copyTV(J->L, tv+0, &ix->mobjv);
+ copyTV(J->L, tv+1, &ix->valv);
+ copyTV(J->L, tv+2, &ix->keyv);
+ rec_call(J, func, CALLRES_CONT, 2);
+ /* It doesn't matter whether this is immediately resolved or not.
+ ** Type specialization of the return type suffices to specialize
+ ** the control flow.
+ */
+}
+
+/* Record call to equality comparison metamethod (for tab and udata only). */
+static void rec_mm_equal(jit_State *J, RecordIndex *ix, int op)
+{
+ ix->tab = ix->val;
+ copyTV(J->L, &ix->tabv, &ix->valv);
+ if (rec_mm_lookup(J, ix, MM_eq)) { /* Lookup metamethod on 1st operand. */
+ cTValue *bv;
+ TRef mo1 = ix->mobj;
+ TValue mo1v;
+ copyTV(J->L, &mo1v, &ix->mobjv);
+ /* Avoid the 2nd lookup and the objcmp if the metatables are equal. */
+ bv = &ix->keyv;
+ if (tvistab(bv) && tabref(tabV(bv)->metatable) == ix->mtv) {
+ TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_TAB_META);
+ emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt);
+ } else if (tvisudata(bv) && tabref(udataV(bv)->metatable) == ix->mtv) {
+ TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_UDATA_META);
+ emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt);
+ } else { /* Lookup metamethod on 2nd operand and compare both. */
+ ix->tab = ix->key;
+ copyTV(J->L, &ix->tabv, bv);
+ if (!rec_mm_lookup(J, ix, MM_eq) ||
+ rec_objcmp(J, mo1, ix->mobj, &mo1v, &ix->mobjv))
+ return;
+ }
+ rec_mm_callcomp(J, ix, op);
+ }
+}
+
+/* Record call to ordered comparison metamethods (for arbitrary objects). */
+static void rec_mm_comp(jit_State *J, RecordIndex *ix, int op)
+{
+ ix->tab = ix->val;
+ copyTV(J->L, &ix->tabv, &ix->valv);
+ while (1) {
+ MMS mm = (op & 2) ? MM_le : MM_lt; /* Try __le + __lt or only __lt. */
+ if (rec_mm_lookup(J, ix, mm)) { /* Lookup metamethod on 1st operand. */
+ cTValue *bv;
+ TRef mo1 = ix->mobj;
+ TValue mo1v;
+ copyTV(J->L, &mo1v, &ix->mobjv);
+ /* Avoid the 2nd lookup and the objcmp if the metatables are equal. */
+ bv = &ix->keyv;
+ if (tvistab(bv) && tabref(tabV(bv)->metatable) == ix->mtv) {
+ TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_TAB_META);
+ emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt);
+ } else if (tvisudata(bv) && tabref(udataV(bv)->metatable) == ix->mtv) {
+ TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_UDATA_META);
+ emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt);
+ } else { /* Lookup metamethod on 2nd operand and compare both. */
+ ix->tab = ix->key;
+ copyTV(J->L, &ix->tabv, bv);
+ if (!rec_mm_lookup(J, ix, mm) ||
+ rec_objcmp(J, mo1, ix->mobj, &mo1v, &ix->mobjv))
+ goto nomatch;
+ }
+ rec_mm_callcomp(J, ix, op);
+ return;
+ }
+ nomatch:
+ /* First lookup failed. Retry with __lt and swapped operands. */
+ if (!(op & 2)) break; /* Already at __lt. Interpreter will throw. */
+ ix->tab = ix->key; ix->key = ix->val; ix->val = ix->tab;
+ copyTV(J->L, &ix->tabv, &ix->keyv);
+ copyTV(J->L, &ix->keyv, &ix->valv);
+ copyTV(J->L, &ix->valv, &ix->tabv);
+ op ^= 3;
+ }
+}
+
+/* -- Indexed access ------------------------------------------------------ */
+
+/* Record indexed key lookup. */
+static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
+{
+ TRef key;
+ GCtab *t = tabV(&ix->tabv);
+ ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */
+
+ /* Integer keys are looked up in the array part first. */
+ key = ix->key;
+ if (tref_isnumber(key)) {
+ lua_Number n = numV(&ix->keyv);
+ int32_t k = lj_num2int(n);
+ lua_assert(tvisnum(&ix->keyv));
+ /* Potential array key? */
+ if ((MSize)k < LJ_MAX_ASIZE && n == cast_num(k)) {
+ TRef asizeref, ikey = key;
+ if (!tref_isinteger(ikey))
+ ikey = emitir(IRTGI(IR_TOINT), ikey, IRTOINT_INDEX);
+ asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE);
+ if ((MSize)k < t->asize) { /* Currently an array key? */
+ TRef arrayref;
+ emitir(IRTGI(IR_ABC), asizeref, ikey); /* Bounds check. */
+ arrayref = emitir(IRT(IR_FLOAD, IRT_PTR), ix->tab, IRFL_TAB_ARRAY);
+ return emitir(IRT(IR_AREF, IRT_PTR), arrayref, ikey);
+ } else { /* Currently not in array (may be an array extension)? */
+ emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */
+ if (k == 0 && tref_isk(key))
+ key = lj_ir_knum_zero(J); /* Canonicalize 0 or +-0.0 to +0.0. */
+ /* And continue with the hash lookup. */
+ }
+ } else if (!tref_isk(key)) {
+ /* We can rule out const numbers which failed the integerness test
+ ** above. But all other numbers are potential array keys.
+ */
+ if (t->asize == 0) { /* True sparse tables have an empty array part. */
+ /* Guard that the array part stays empty. */
+ TRef tmp = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE);
+ emitir(IRTGI(IR_EQ), tmp, lj_ir_kint(J, 0));
+ } else {
+ lj_trace_err(J, LJ_TRERR_NYITMIX);
+ }
+ }
+ }
+
+ /* Otherwise the key is located in the hash part. */
+ if (tref_isinteger(key)) /* Hash keys are based on numbers, not ints. */
+ ix->key = key = emitir(IRTN(IR_TONUM), key, 0);
+ if (tref_isk(key)) {
+ /* Optimize lookup of constant hash keys. */
+ MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val);
+ if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) &&
+ hslot <= 65535*(MSize)sizeof(Node)) {
+ TRef node, kslot;
+ TRef hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK);
+ emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask));
+ node = emitir(IRT(IR_FLOAD, IRT_PTR), ix->tab, IRFL_TAB_NODE);
+ kslot = lj_ir_kslot(J, key, hslot / sizeof(Node));
+ return emitir(IRTG(IR_HREFK, IRT_PTR), node, kslot);
+ }
+ }
+ /* Fall back to a regular hash lookup. */
+ return emitir(IRT(IR_HREF, IRT_PTR), ix->tab, key);
+}
+
+/* Determine whether a key is NOT one of the fast metamethod names. */
+static int nommstr(jit_State *J, TRef key)
+{
+ if (tref_isstr(key)) {
+ if (tref_isk(key)) {
+ GCstr *str = ir_kstr(IR(tref_ref(key)));
+ uint32_t i;
+ for (i = 0; i <= MM_FAST; i++)
+ if (strref(J2G(J)->mmname[i]) == str)
+ return 0; /* MUST be one the fast metamethod names. */
+ } else {
+ return 0; /* Variable string key MAY be a metamethod name. */
+ }
+ }
+ return 1; /* CANNOT be a metamethod name. */
+}
+
+/* Record indexed load/store. */
+static TRef rec_idx(jit_State *J, RecordIndex *ix)
+{
+ TRef xref;
+ IROp xrefop, loadop;
+ cTValue *oldv;
+
+ while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */
+ lua_assert(ix->idxchain != 0); /* Never call raw rec_idx() on non-table. */
+ if (!rec_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index))
+ lj_trace_err(J, LJ_TRERR_NOMM);
+ handlemm:
+ if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */
+ BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra);
+ TRef *base = J->base + func;
+ TValue *tv = J->L->base + func;
+ base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key;
+ setfuncV(J->L, tv+0, funcV(&ix->mobjv));
+ copyTV(J->L, tv+1, &ix->tabv);
+ copyTV(J->L, tv+2, &ix->keyv);
+ if (ix->val) {
+ base[3] = ix->val;
+ copyTV(J->L, tv+3, &ix->valv);
+ rec_call(J, func, CALLRES_CONT, 3); /* mobj(tab, key, val) */
+ return 0;
+ } else {
+ /* res = mobj(tab, key) */
+ return rec_call(J, func, CALLRES_CONT, 2) ? J->base[func] : 0;
+ }
+ }
+ /* Otherwise retry lookup with metaobject. */
+ ix->tab = ix->mobj;
+ copyTV(J->L, &ix->tabv, &ix->mobjv);
+ if (--ix->idxchain == 0)
+ lj_trace_err(J, LJ_TRERR_IDXLOOP);
+ }
+
+ /* First catch nil and NaN keys for tables. */
+ if (tvisnil(&ix->keyv) || (tvisnum(&ix->keyv) && tvisnan(&ix->keyv))) {
+ if (ix->val) /* Better fail early. */
+ lj_trace_err(J, LJ_TRERR_STORENN);
+ if (tref_isk(ix->key)) {
+ if (ix->idxchain && rec_mm_lookup(J, ix, MM_index))
+ goto handlemm;
+ return TREF_NIL;
+ }
+ }
+
+ /* Record the key lookup. */
+ xref = rec_idx_key(J, ix);
+ xrefop = IR(tref_ref(xref))->o;
+ loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD;
+ oldv = ix->oldv;
+
+ if (ix->val == 0) { /* Indexed load */
+ IRType t = itype2irt(oldv);
+ TRef res = emitir(IRTG(loadop, t), xref, 0);
+ if (t == IRT_NIL && ix->idxchain && rec_mm_lookup(J, ix, MM_index))
+ goto handlemm;
+ if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */
+ return res;
+ } else { /* Indexed store. */
+ GCtab *mt = tabref(tabV(&ix->tabv)->metatable);
+ if (tvisnil(oldv)) { /* Previous value was nil? */
+ /* Need to duplicate the hasmm check for the early guards. */
+ int hasmm = 0;
+ if (ix->idxchain && mt) {
+ cTValue *mo = lj_tab_getstr(mt, strref(J2G(J)->mmname[MM_newindex]));
+ hasmm = mo && !tvisnil(mo);
+ }
+ if (hasmm || oldv == niltvg(J2G(J)))
+ emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */
+ else if (xrefop == IR_HREF)
+ emitir(IRTG(IR_NE, IRT_PTR), xref, lj_ir_kptr(J, niltvg(J2G(J))));
+ if (ix->idxchain && rec_mm_lookup(J, ix, MM_newindex)) { /* Metamethod? */
+ lua_assert(hasmm);
+ goto handlemm;
+ }
+ lua_assert(!hasmm);
+ if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */
+ TRef key = ix->key;
+ if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */
+ key = emitir(IRTN(IR_TONUM), key, 0);
+ xref = emitir(IRT(IR_NEWREF, IRT_PTR), ix->tab, key);
+ }
+ } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) {
+ /* Cannot derive that the previous value was non-nil, must do checks. */
+ if (xrefop == IR_HREF) /* Guard against store to niltv. */
+ emitir(IRTG(IR_NE, IRT_PTR), xref, lj_ir_kptr(J, niltvg(J2G(J))));
+ if (ix->idxchain) { /* Metamethod lookup required? */
+ /* A check for NULL metatable is cheaper (hoistable) than a load. */
+ if (!mt) {
+ TRef mtref = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_TAB_META);
+ emitir(IRTG(IR_EQ, IRT_TAB), mtref, lj_ir_knull(J, IRT_TAB));
+ } else {
+ IRType t = itype2irt(oldv);
+ emitir(IRTG(loadop, t), xref, 0); /* Guard for non-nil value. */
+ }
+ }
+ }
+ if (tref_isinteger(ix->val)) /* Convert int to number before storing. */
+ ix->val = emitir(IRTN(IR_TONUM), ix->val, 0);
+ emitir(IRT(loadop+IRDELTA_L2S, tref_type(ix->val)), xref, ix->val);
+ if (tref_isgcv(ix->val))
+ emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0);
+ /* Invalidate neg. metamethod cache for stores with certain string keys. */
+ if (!nommstr(J, ix->key)) {
+ TRef fref = emitir(IRT(IR_FREF, IRT_PTR), ix->tab, IRFL_TAB_NOMM);
+ emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0));
+ }
+ J->needsnap = 1;
+ return 0;
+ }
+}
+
+/* -- Upvalue access ------------------------------------------------------ */
+
+/* Record upvalue load/store. */
+static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val)
+{
+ GCupval *uvp = &gcref(J->fn->l.uvptr[uv])->uv;
+ TRef fn = getcurrf(J);
+ IRRef uref;
+ int needbarrier = 0;
+ if (!uvp->closed) {
+ /* In current stack? */
+ if (uvp->v >= J->L->stack && uvp->v < J->L->maxstack) {
+ int32_t slot = (int32_t)(uvp->v - (J->L->base - J->baseslot));
+ if (slot >= 0) { /* Aliases an SSA slot? */
+ slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */
+ /* NYI: add IR to guard that it's still aliasing the same slot. */
+ if (val == 0) {
+ return getslot(J, slot);
+ } else {
+ J->base[slot] = val;
+ if (slot >= (int32_t)J->maxslot) J->maxslot = (BCReg)(slot+1);
+ return 0;
+ }
+ }
+ }
+ uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PTR), fn, uv));
+ } else {
+ needbarrier = 1;
+ uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PTR), fn, uv));
+ }
+ if (val == 0) { /* Upvalue load */
+ IRType t = itype2irt(uvp->v);
+ TRef res = emitir(IRTG(IR_ULOAD, t), uref, 0);
+ if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitive refs. */
+ return res;
+ } else { /* Upvalue store. */
+ if (tref_isinteger(val)) /* Convert int to number before storing. */
+ val = emitir(IRTN(IR_TONUM), val, 0);
+ emitir(IRT(IR_USTORE, tref_type(val)), uref, val);
+ if (needbarrier && tref_isgcv(val))
+ emitir(IRT(IR_OBAR, IRT_NIL), uref, val);
+ J->needsnap = 1;
+ return 0;
+ }
+}
+
+/* -- Record calls to fast functions -------------------------------------- */
+
+/* Note: The function and the arguments for the bytecode CALL instructions
+** always occupy _new_ stack slots (above the highest active variable).
+** This means they must have been stored there by previous instructions
+** (MOV, K*, ADD etc.) which must be part of the same trace. This in turn
+** means their reference slots are already valid and their types have
+** already been specialized (i.e. getslot() would be redundant).
+** The 1st slot beyond the arguments is set to 0 before calling recff_*.
+*/
+
+/* Data used by handlers to record a fast function. */
+typedef struct RecordFFData {
+ TValue *argv; /* Runtime argument values. */
+ GCfunc *fn; /* The currently recorded function. */
+ int nargs; /* Number of passed arguments. */
+ int nres; /* Number of returned results (defaults to 1). */
+ int cres; /* Wanted number of call results. */
+ uint32_t data; /* Per-ffid auxiliary data (opcode, literal etc.). */
+} RecordFFData;
+
+/* Type of handler to record a fast function. */
+typedef void (*RecordFunc)(jit_State *J, TRef *res, RecordFFData *rd);
+
+/* Avoid carrying two pointers around. */
+#define arg (res+1)
+
+/* Get runtime value of int argument. */
+static int32_t argv2int(jit_State *J, TValue *o)
+{
+ if (tvisstr(o) && !lj_str_numconv(strVdata(o), o))
+ lj_trace_err(J, LJ_TRERR_BADTYPE);
+ return lj_num2bit(numV(o));
+}
+
+/* Get runtime value of string argument. */
+static GCstr *argv2str(jit_State *J, TValue *o)
+{
+ if (LJ_LIKELY(tvisstr(o))) {
+ return strV(o);
+ } else {
+ GCstr *s;
+ lua_assert(tvisnum(o));
+ s = lj_str_fromnum(J->L, &o->n);
+ setstrV(J->L, o, s);
+ return s;
+ }
+}
+
+/* Fallback handler for all fast functions that are not recorded (yet). */
+static void recff_nyi(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ UNUSED(res);
+ setfuncV(J->L, &J->errinfo, rd->fn);
+ lj_trace_err_info(J, LJ_TRERR_NYIFF);
+}
+
+LJ_NORET static void recff_err_ffu(jit_State *J, RecordFFData *rd)
+{
+ setfuncV(J->L, &J->errinfo, rd->fn);
+ lj_trace_err_info(J, LJ_TRERR_NYIFFU);
+}
+
+/* C functions can have arbitrary side-effects and are not recorded (yet). */
+static void recff_c(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ UNUSED(res);
+ setlightudV(&J->errinfo, (void *)rd->fn->c.f);
+ lj_trace_err_info(J, LJ_TRERR_NYICF);
+}
+
+/* -- Base library fast functions ----------------------------------------- */
+
+static void recff_assert(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ /* Arguments already specialized. The interpreter throws for nil/false. */
+ BCReg i;
+ for (i = 0; arg[i]; i++) /* Need to pass through all arguments. */
+ res[i] = arg[i];
+ rd->nres = (int)i;
+ UNUSED(J);
+}
+
+static void recff_type(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ /* Arguments already specialized. Result is a constant string. Neat, huh? */
+ IRType t = tref_isinteger(arg[0]) ? IRT_NUM : tref_type(arg[0]);
+ res[0] = lj_ir_kstr(J, strV(&rd->fn->c.upvalue[t]));
+}
+
+static void recff_getmetatable(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef tr = arg[0];
+ if (tref_istab(tr)) {
+ RecordIndex ix;
+ ix.tab = tr;
+ copyTV(J->L, &ix.tabv, &rd->argv[0]);
+ if (rec_mm_lookup(J, &ix, MM_metatable))
+ res[0] = ix.mobj;
+ else
+ res[0] = ix.mt;
+ } /* else: Interpreter will throw. */
+}
+
+static void recff_setmetatable(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef tr = arg[0];
+ TRef mt = arg[1];
+ if (tref_istab(tr) && (tref_istab(mt) || (mt && tref_isnil(mt)))) {
+ TRef fref, mtref;
+ RecordIndex ix;
+ ix.tab = tr;
+ copyTV(J->L, &ix.tabv, &rd->argv[0]);
+ rec_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable field. */
+ fref = emitir(IRT(IR_FREF, IRT_PTR), tr, IRFL_TAB_META);
+ mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt;
+ emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref);
+ if (!tref_isnil(mt))
+ emitir(IRT(IR_TBAR, IRT_TAB), tr, 0);
+ res[0] = tr;
+ J->needsnap = 1;
+ } /* else: Interpreter will throw. */
+}
+
+static void recff_rawget(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ if (tref_istab(arg[0]) && arg[1]) {
+ RecordIndex ix;
+ ix.tab = arg[0]; ix.key = arg[1]; ix.val = 0; ix.idxchain = 0;
+ settabV(J->L, &ix.tabv, tabV(&rd->argv[0]));
+ copyTV(J->L, &ix.keyv, &rd->argv[1]);
+ res[0] = rec_idx(J, &ix);
+ } /* else: Interpreter will throw. */
+}
+
+static void recff_rawset(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ if (tref_istab(arg[0]) && arg[1] && arg[2]) {
+ RecordIndex ix;
+ ix.tab = arg[0]; ix.key = arg[1]; ix.val = arg[2]; ix.idxchain = 0;
+ settabV(J->L, &ix.tabv, tabV(&rd->argv[0]));
+ copyTV(J->L, &ix.keyv, &rd->argv[1]);
+ copyTV(J->L, &ix.valv, &rd->argv[2]);
+ rec_idx(J, &ix);
+ res[0] = arg[0]; /* Returns table. */
+ } /* else: Interpreter will throw. */
+}
+
+static void recff_rawequal(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ if (arg[0] && arg[1]) {
+ int diff = rec_objcmp(J, arg[0], arg[1], &rd->argv[0], &rd->argv[1]);
+ res[0] = diff ? TREF_FALSE : TREF_TRUE;
+ } /* else: Interpreter will throw. */
+}
+
+static void recff_tonumber(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef tr = arg[0];
+ if (tref_isnumber_str(tr)) {
+ if (arg[1]) {
+ TRef base = lj_ir_toint(J, arg[1]);
+ if (!tref_isk(base) || IR(tref_ref(base))->i != 10)
+ recff_err_ffu(J, rd);
+ }
+ if (tref_isstr(tr))
+ tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
+ } else {
+ tr = TREF_NIL;
+ }
+ res[0] = tr;
+ UNUSED(rd);
+}
+
+static void recff_tostring(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef tr = arg[0];
+ if (tref_isstr(tr)) {
+ /* Ignore __tostring in the string base metatable. */
+ res[0] = tr;
+ } else {
+ RecordIndex ix;
+ ix.tab = tr;
+ copyTV(J->L, &ix.tabv, &rd->argv[0]);
+ if (rec_mm_lookup(J, &ix, MM_tostring)) { /* Has __tostring metamethod? */
+ res[0] = ix.mobj;
+ copyTV(J->L, rd->argv - 1, &ix.mobjv);
+ if (!rec_call(J, (BCReg)(res - J->base), 1, 1)) /* Pending call? */
+ rd->cres = CALLRES_PENDING;
+ /* Otherwise res[0] already contains the result. */
+ } else if (tref_isnumber(tr)) {
+ res[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0);
+ } else {
+ recff_err_ffu(J, rd);
+ }
+ }
+}
+
+static void recff_ipairs_aux(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ RecordIndex ix;
+ ix.tab = arg[0];
+ if (tref_istab(ix.tab)) {
+ if (!tvisnum(&rd->argv[1])) /* No support for string coercion. */
+ lj_trace_err(J, LJ_TRERR_BADTYPE);
+ setnumV(&ix.keyv, numV(&rd->argv[1])+(lua_Number)1);
+ settabV(J->L, &ix.tabv, tabV(&rd->argv[0]));
+ ix.val = 0; ix.idxchain = 0;
+ ix.key = lj_ir_toint(J, arg[1]);
+ res[0] = ix.key = emitir(IRTI(IR_ADD), ix.key, lj_ir_kint(J, 1));
+ res[1] = rec_idx(J, &ix);
+ rd->nres = tref_isnil(res[1]) ? 0 : 2;
+ } /* else: Interpreter will throw. */
+}
+
+static void recff_ipairs(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef tab = arg[0];
+ if (tref_istab(tab)) {
+ res[0] = lj_ir_kfunc(J, funcV(&rd->fn->c.upvalue[0]));
+ res[1] = tab;
+ res[2] = lj_ir_kint(J, 0);
+ rd->nres = 3;
+ } /* else: Interpreter will throw. */
+}
+
+static void recff_pcall(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ if (rd->nargs >= 1) {
+ BCReg parg = (BCReg)(arg - J->base);
+ if (rec_call(J, parg, CALLRES_MULTI, rd->nargs - 1)) { /* Resolved call. */
+ res[0] = TREF_TRUE; /* Prepend true result. No need to move results. */
+ rd->nres = (int)((J->maxslot - parg) + 1);
+ } else { /* Propagate pending call. */
+ rd->cres = CALLRES_PENDING;
+ }
+ } /* else: Interpreter will throw. */
+}
+
+/* Struct to pass context across lj_vm_cpcall. */
+typedef struct RecordXpcall {
+ BCReg parg;
+ int nargs;
+ int resolved;
+} RecordXpcall;
+
+static TValue *recff_xpcall_cp(lua_State *L, lua_CFunction dummy, void *ud)
+{
+ jit_State *J = L2J(L);
+ RecordXpcall *rx = (RecordXpcall *)ud;
+ UNUSED(dummy);
+ rx->resolved = rec_call(J, rx->parg, CALLRES_MULTI, rx->nargs);
+ return NULL;
+}
+
+static void recff_xpcall(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ if (rd->nargs >= 2) {
+ RecordXpcall rx;
+ BCReg parg = (BCReg)(arg - J->base) + 1;
+ TRef tmp;
+ TValue argv0, argv1;
+ ptrdiff_t oargv;
+ int errcode;
+ /* Swap function and traceback. */
+ tmp = arg[0]; arg[0] = arg[1]; arg[1] = tmp;
+ copyTV(J->L, &argv0, &rd->argv[0]);
+ copyTV(J->L, &argv1, &rd->argv[1]);
+ copyTV(J->L, &rd->argv[0], &argv1);
+ copyTV(J->L, &rd->argv[1], &argv0);
+ oargv = savestack(J->L, rd->argv);
+ /* Need to protect rec_call because the recorder may throw. */
+ rx.parg = parg;
+ rx.nargs = rd->nargs - 2;
+ errcode = lj_vm_cpcall(J->L, recff_xpcall_cp, NULL, &rx);
+ /* Always undo Lua stack swap to avoid confusing the interpreter. */
+ rd->argv = restorestack(J->L, oargv); /* Stack may have been resized. */
+ copyTV(J->L, &rd->argv[0], &argv0);
+ copyTV(J->L, &rd->argv[1], &argv1);
+ if (errcode)
+ lj_err_throw(J->L, errcode); /* Propagate errors. */
+ if (rx.resolved) { /* Resolved call. */
+ int i, nres = (int)(J->maxslot - parg);
+ rd->nres = nres + 1;
+ res[0] = TREF_TRUE; /* Prepend true result. */
+ for (i = 1; i <= nres; i++) /* Move results down. */
+ res[i] = res[i+1];
+ } else { /* Propagate pending call. */
+ rd->cres = CALLRES_PENDING;
+ }
+ } /* else: Interpreter will throw. */
+}
+
+/* -- Math library fast functions ----------------------------------------- */
+
+static void recff_math_abs(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef tr = lj_ir_tonum(J, arg[0]);
+ res[0] = emitir(IRTN(IR_ABS), tr, lj_ir_knum_abs(J));
+ UNUSED(rd);
+}
+
+/* Record rounding functions math.floor and math.ceil. */
+static void recff_math_round(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ if (tref_isinteger(arg[0]))
+ res[0] = arg[0];
+ else
+ res[0] = emitir(IRTN(IR_FPMATH), lj_ir_tonum(J, arg[0]), rd->data);
+ /* Note: result is integral (or NaN/Inf), but may not fit into an integer. */
+}
+
+/* Record unary math.* functions, mapped to IR_FPMATH opcode. */
+static void recff_math_unary(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ res[0] = emitir(IRTN(IR_FPMATH), lj_ir_tonum(J, arg[0]), rd->data);
+}
+
+/* Record binary math.* functions math.atan2 and math.ldexp. */
+static void recff_math_binary(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef tr = lj_ir_tonum(J, arg[0]);
+ res[0] = emitir(IRTN(rd->data), tr, lj_ir_tonum(J, arg[1]));
+}
+
+/* Record math.asin, math.acos, math.atan. */
+static void recff_math_atrig(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef y = lj_ir_tonum(J, arg[0]);
+ TRef x = lj_ir_knum_one(J);
+ uint32_t ffid = rd->data;
+ if (ffid != FF_math_atan) {
+ TRef tmp = emitir(IRTN(IR_MUL), y, y);
+ tmp = emitir(IRTN(IR_SUB), x, tmp);
+ tmp = emitir(IRTN(IR_FPMATH), tmp, IRFPM_SQRT);
+ if (ffid == FF_math_asin) { x = tmp; } else { x = y; y = tmp; }
+ }
+ res[0] = emitir(IRTN(IR_ATAN2), y, x);
+}
+
+static void recff_math_modf(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef tr = arg[0];
+ if (tref_isinteger(arg[0])) {
+ res[0] = tr;
+ res[1] = lj_ir_kint(J, 0);
+ } else {
+ tr = lj_ir_tonum(J, tr);
+ res[0] = emitir(IRTN(IR_FPMATH), tr, IRFPM_TRUNC);
+ res[1] = emitir(IRTN(IR_SUB), tr, res[0]);
+ }
+ rd->nres = 2;
+}
+
+static void recff_math_degrad(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef tr = lj_ir_tonum(J, arg[0]);
+ res[0] = emitir(IRTN(IR_MUL), tr, lj_ir_knum(J, numV(&rd->fn->c.upvalue[0])));
+}
+
+static void recff_math_pow(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ if (!tref_isnumber_str(arg[1]))
+ lj_trace_err(J, LJ_TRERR_BADTYPE);
+ res[0] = lj_opt_narrow_pow(J, lj_ir_tonum(J, arg[0]), arg[1], &rd->argv[1]);
+ UNUSED(rd);
+}
+
+static void recff_math_minmax(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef tr = lj_ir_tonum(J, arg[0]);
+ uint32_t op = rd->data;
+ BCReg i;
+ for (i = 1; arg[i]; i++)
+ tr = emitir(IRTN(op), tr, lj_ir_tonum(J, arg[i]));
+ res[0] = tr;
+}
+
+/* -- Bit library fast functions ------------------------------------------ */
+
+/* Record unary bit.tobit, bit.bnot, bit.bswap. */
+static void recff_bit_unary(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef tr = lj_ir_tobit(J, arg[0]);
+ res[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0);
+}
+
+/* Record N-ary bit.band, bit.bor, bit.bxor. */
+static void recff_bit_nary(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef tr = lj_ir_tobit(J, arg[0]);
+ uint32_t op = rd->data;
+ BCReg i;
+ for (i = 1; arg[i]; i++)
+ tr = emitir(IRTI(op), tr, lj_ir_tobit(J, arg[i]));
+ res[0] = tr;
+}
+
+/* Record bit shifts. */
+static void recff_bit_shift(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef tr = lj_ir_tobit(J, arg[0]);
+ TRef tsh = lj_ir_tobit(J, arg[1]);
+#if !LJ_TARGET_MASKEDSHIFT
+ if (!tref_isk(tsh))
+ tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31));
+#endif
+ res[0] = emitir(IRTI(rd->data), tr, tsh);
+}
+
+/* -- String library fast functions --------------------------------------- */
+
+static void recff_string_len(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ res[0] = emitir(IRTI(IR_FLOAD), lj_ir_tostr(J, arg[0]), IRFL_STR_LEN);
+ UNUSED(rd);
+}
+
+/* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */
+static void recff_string_range(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ TRef trstr = lj_ir_tostr(J, arg[0]);
+ TRef trlen = emitir(IRTI(IR_FLOAD), trstr, IRFL_STR_LEN);
+ TRef tr0 = lj_ir_kint(J, 0);
+ TRef trstart, trend;
+ GCstr *str = argv2str(J, &rd->argv[0]);
+ int32_t start, end;
+ if (rd->data) { /* string.sub(str, start [,end]) */
+ trstart = lj_ir_toint(J, arg[1]);
+ trend = tref_isnil(arg[2]) ? lj_ir_kint(J, -1) : lj_ir_toint(J, arg[2]);
+ start = argv2int(J, &rd->argv[1]);
+ end = tref_isnil(arg[2]) ? -1 : argv2int(J, &rd->argv[2]);
+ } else { /* string.byte(str, [,start [,end]]) */
+ if (arg[1]) {
+ trstart = lj_ir_toint(J, arg[1]);
+ trend = tref_isnil(arg[2]) ? trstart : lj_ir_toint(J, arg[2]);
+ start = argv2int(J, &rd->argv[1]);
+ end = tref_isnil(arg[2]) ? start : argv2int(J, &rd->argv[2]);
+ } else {
+ trend = trstart = lj_ir_kint(J, 1);
+ end = start = 1;
+ }
+ }
+ if (end < 0) {
+ emitir(IRTGI(IR_LT), trend, tr0);
+ trend = emitir(IRTI(IR_ADD), emitir(IRTI(IR_ADD), trlen, trend),
+ lj_ir_kint(J, 1));
+ end = end+(int32_t)str->len+1;
+ } else if ((MSize)end <= str->len) {
+ emitir(IRTGI(IR_ULE), trend, trlen);
+ } else {
+ emitir(IRTGI(IR_GT), trend, trlen);
+ end = (int32_t)str->len;
+ trend = trlen;
+ }
+ if (start < 0) {
+ emitir(IRTGI(IR_LT), trstart, tr0);
+ trstart = emitir(IRTI(IR_ADD), trlen, trstart);
+ start = start+(int32_t)str->len;
+ emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), trstart, tr0);
+ if (start < 0) {
+ trstart = tr0;
+ start = 0;
+ }
+ } else {
+ if (start == 0) {
+ emitir(IRTGI(IR_EQ), trstart, tr0);
+ trstart = tr0;
+ } else {
+ trstart = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, -1));
+ emitir(IRTGI(IR_GE), trstart, tr0);
+ start--;
+ }
+ }
+ if (rd->data) { /* Return string.sub result. */
+ if (end - start >= 0) {
+ /* Also handle empty range here, to avoid extra traces. */
+ TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart);
+ emitir(IRTGI(IR_GE), trslen, tr0);
+ trptr = emitir(IRT(IR_STRREF, IRT_PTR), trstr, trstart);
+ res[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen);
+ } else { /* Range underflow: return empty string. */
+ emitir(IRTGI(IR_LT), trend, trstart);
+ res[0] = lj_ir_kstr(J, lj_str_new(J->L, strdata(str), 0));
+ }
+ } else { /* Return string.byte result(s). */
+ int32_t i, len = end - start;
+ if (len > 0) {
+ TRef trslen = emitir(IRTI(IR_SUB), trend, trstart);
+ emitir(IRTGI(IR_EQ), trslen, lj_ir_kint(J, len));
+ if (res + len > J->slot + LJ_MAX_JSLOTS)
+ lj_trace_err(J, LJ_TRERR_STACKOV);
+ rd->nres = len;
+ for (i = 0; i < len; i++) {
+ TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, i));
+ tmp = emitir(IRT(IR_STRREF, IRT_PTR), trstr, tmp);
+ res[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, 0);
+ }
+ } else { /* Empty range or range underflow: return no results. */
+ emitir(IRTGI(IR_LE), trend, trstart);
+ rd->nres = 0;
+ }
+ }
+}
+
+/* -- Table library fast functions ---------------------------------------- */
+
+static void recff_table_getn(jit_State *J, TRef *res, RecordFFData *rd)
+{
+ if (tref_istab(arg[0])) {
+ res[0] = emitir(IRTI(IR_TLEN), arg[0], 0);
+ } /* else: Interpreter will throw. */
+ UNUSED(rd);
+}
+
+/* -- Record calls and returns -------------------------------------------- */
+
+#undef arg
+
+#include "lj_recdef.h"
+
+/* Record return. */
+static void rec_ret(jit_State *J, BCReg rbase, int gotresults)
+{
+ TValue *frame = J->L->base - 1;
+ TRef *res = J->base + rbase;
+ J->tailcalled = 0;
+ while (frame_ispcall(frame)) {
+ BCReg cbase = (BCReg)frame_delta(frame);
+ lua_assert(J->baseslot > 1);
+ J->baseslot -= (BCReg)cbase;
+ J->base -= cbase;
+ *--res = TREF_TRUE; /* Prepend true to results. */
+ gotresults++;
+ J->framedepth--;
+ frame = frame_prevd(frame);
+ }
+ if (J->framedepth-- <= 0)
+ lj_trace_err(J, LJ_TRERR_NYIRETL);
+ lua_assert(J->baseslot > 1);
+ if (frame_islua(frame)) {
+ BCIns callins = *(J->pc = frame_pc(frame)-1);
+ ptrdiff_t nresults = bc_b(callins) ? (int)bc_b(callins)-1 : gotresults;
+ BCReg cbase = bc_a(callins);
+ int i;
+ for (i = 0; i < nresults; i++)
+ J->base[i-1] = i < gotresults ? res[i] : TREF_NIL;
+ J->maxslot = cbase+(BCReg)nresults;
+ J->baseslot -= cbase+1;
+ J->base -= cbase+1;
+ } else if (frame_iscont(frame)) {
+ ASMFunction cont = frame_contf(frame);
+ BCReg i, cbase = (BCReg)frame_delta(frame);
+ J->pc = frame_contpc(frame)-1;
+ J->baseslot -= (BCReg)cbase;
+ J->base -= cbase;
+ /* Shrink maxslot as much as possible after return from continuation. */
+ for (i = cbase-2; i > 0 && J->base[i] == 0; i--) ;
+ J->maxslot = i;
+ if (cont == lj_cont_ra) {
+ /* Copy result to destination slot. */
+ BCReg dst = bc_a(*J->pc);
+ J->base[dst] = res[0];
+ if (dst > J->maxslot) J->maxslot = dst+1;
+ } else if (cont == lj_cont_nop) {
+ /* Nothing to do here. */
+ } else if (cont == lj_cont_cat) {
+ lua_assert(0);
+ } else {
+ /* Result type already specialized. */
+ lua_assert(cont == lj_cont_condf || cont == lj_cont_condt);
+ }
+ } else {
+ lua_assert(0);
+ }
+ lua_assert(J->baseslot >= 1);
+}
+
+/* Check unroll limits for calls. */
+static void check_call_unroll(jit_State *J, GCfunc *fn)
+{
+ TValue *first = J->L->base - J->baseslot;
+ TValue *frame = J->L->base - 1;
+ int count = 0;
+ while (frame > first) {
+ if (frame_func(frame) == fn)
+ count++;
+ if (frame_isvarg(frame))
+ frame = frame_prevd(frame);
+ frame = frame_prev(frame);
+ }
+ if (frame_func(first) == fn && bc_op(J->cur.startins) == BC_CALL) {
+ if (count >= J->param[JIT_P_recunroll])
+ lj_trace_err(J, LJ_TRERR_NYIRECU);
+ } else {
+ if (count >= J->param[JIT_P_callunroll])
+ lj_trace_err(J, LJ_TRERR_CUNROLL);
+ }
+}
+
+/* Record call. Returns 0 for pending calls and 1 for resolved calls. */
+static int rec_call(jit_State *J, BCReg func, int cres, int nargs)
+{
+ RecordFFData rd;
+ TRef *res = &J->base[func];
+ TValue *tv = &J->L->base[func];
+
+ if (tref_isfunc(res[0])) { /* Regular function call. */
+ rd.fn = funcV(tv);
+ rd.argv = tv+1;
+ } else { /* Otherwise resolve __call metamethod for called object. */
+ RecordIndex ix;
+ int i;
+ ix.tab = res[0];
+ copyTV(J->L, &ix.tabv, tv);
+ if (!rec_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj))
+ lj_trace_err(J, LJ_TRERR_NOMM);
+ /* Update the recorder state, but not the Lua stack. */
+ for (i = ++nargs; i > 0; i--)
+ res[i] = res[i-1];
+ res[0] = ix.mobj;
+ rd.fn = funcV(&ix.mobjv);
+ rd.argv = tv; /* The called object is the 1st arg. */
+ }
+
+ /* Specialize to the runtime value of the called function. */
+ res[0] = emitir(IRTG(IR_FRAME, IRT_FUNC), res[0], lj_ir_kfunc(J, rd.fn));
+
+ if (isluafunc(rd.fn)) { /* Record call to Lua function. */
+ GCproto *pt = funcproto(rd.fn);
+ if ((pt->flags & PROTO_NO_JIT))
+ lj_trace_err(J, LJ_TRERR_CJITOFF);
+ if ((pt->flags & PROTO_IS_VARARG)) {
+ if (rd.fn->l.gate != lj_gate_lv)
+ lj_trace_err(J, LJ_TRERR_NYILNKF);
+ lj_trace_err(J, LJ_TRERR_NYIVF);
+ } else {
+ if (rd.fn->l.gate != lj_gate_lf)
+ lj_trace_err(J, LJ_TRERR_NYILNKF);
+ }
+ check_call_unroll(J, rd.fn);
+ if (cres == CALLRES_TAILCALL) {
+ int i;
+ /* Tailcalls can form a loop, so count towards the loop unroll limit. */
+ if (++J->tailcalled > J->loopunroll)
+ lj_trace_err(J, LJ_TRERR_LUNROLL);
+ for (i = 0; i <= nargs; i++) /* Move func + args down. */
+ J->base[i-1] = res[i];
+ /* Note: the new FRAME is now at J->base[-1] (even for slot #0). */
+ } else { /* Regular call. */
+ J->base += func+1;
+ J->baseslot += func+1;
+ J->framedepth++;
+ }
+ if (J->baseslot + pt->framesize >= LJ_MAX_JSLOTS)
+ lj_trace_err(J, LJ_TRERR_STACKOV);
+ /* Fill up missing args with nil. */
+ while (nargs < pt->numparams)
+ J->base[nargs++] = TREF_NIL;
+ /* The remaining slots should never be read before they are written. */
+ J->maxslot = pt->numparams;
+ return 0; /* No result yet. */
+ } else { /* Record call to C function or fast function. */
+ uint32_t m = 0;
+ res[1+nargs] = 0;
+ rd.nargs = nargs;
+ if (rd.fn->c.ffid < sizeof(recff_idmap)/sizeof(recff_idmap[0]))
+ m = recff_idmap[rd.fn->c.ffid];
+ rd.data = m & 0xff;
+ rd.cres = cres;
+ rd.nres = 1; /* Default is one result. */
+ (recff_func[m >> 8])(J, res, &rd); /* Call recff_* handler. */
+ cres = rd.cres;
+ if (cres >= 0) {
+ /* Caller takes fixed number of results: local a,b = f() */
+ J->maxslot = func + (BCReg)cres;
+ while (rd.nres < cres) /* Fill up missing results with nil. */
+ res[rd.nres++] = TREF_NIL;
+ } else if (cres == CALLRES_MULTI) {
+ /* Caller takes any number of results: return 1,f() */
+ J->maxslot = func + (BCReg)rd.nres;
+ } else if (cres == CALLRES_TAILCALL) {
+ /* Tail call: return f() */
+ rec_ret(J, func, rd.nres);
+ } else if (cres == CALLRES_CONT) {
+ /* Note: immediately resolved continuations must not change J->maxslot. */
+ res[rd.nres] = TREF_NIL; /* Turn 0 results into nil result. */
+ } else {
+ J->framedepth++;
+ lua_assert(cres == CALLRES_PENDING);
+ return 0; /* Pending call, no result yet. */
+ }
+ return 1; /* Result resolved immediately. */
+ }
+}
+
+/* -- Record allocations -------------------------------------------------- */
+
+static TRef rec_tnew(jit_State *J, uint32_t ah)
+{
+ uint32_t asize = ah & 0x7ff;
+ uint32_t hbits = ah >> 11;
+ if (asize == 0x7ff) asize = 0x801;
+ return emitir(IRT(IR_TNEW, IRT_TAB), asize, hbits);
+}
+
+/* -- Record bytecode ops ------------------------------------------------- */
+
+/* Optimize state after comparison. */
+static void optstate_comp(jit_State *J, int cond)
+{
+ BCIns jmpins = J->pc[1];
+ const BCIns *npc = J->pc + 2 + (cond ? bc_j(jmpins) : 0);
+ SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
+ /* Avoid re-recording the comparison in side traces. */
+ J->cur.snapmap[snap->mapofs + snap->nslots] = u32ptr(npc);
+ J->needsnap = 1;
+ /* Shrink last snapshot if possible. */
+ if (bc_a(jmpins) < J->maxslot) {
+ J->maxslot = bc_a(jmpins);
+ lj_snap_shrink(J);
+ }
+}
+
+/* Record the next bytecode instruction (_before_ it's executed). */
+void lj_record_ins(jit_State *J)
+{
+ cTValue *lbase;
+ RecordIndex ix;
+ const BCIns *pc;
+ BCIns ins;
+ BCOp op;
+ TRef ra, rb, rc;
+
+ /* Need snapshot before recording next bytecode (e.g. after a store). */
+ if (J->needsnap) {
+ J->needsnap = 0;
+ lj_snap_add(J);
+ J->mergesnap = 1;
+ }
+
+ /* Record only closed loops for root traces. */
+ pc = J->pc;
+ if (J->framedepth == 0 &&
+ (MSize)((char *)pc - (char *)J->bc_min) >= J->bc_extent)
+ lj_trace_err(J, LJ_TRERR_LLEAVE);
+
+#ifdef LUA_USE_ASSERT
+ rec_check_slots(J);
+ rec_check_ir(J);
+#endif
+
+ /* Keep a copy of the runtime values of var/num/str operands. */
+#define rav (&ix.valv)
+#define rbv (&ix.tabv)
+#define rcv (&ix.keyv)
+
+ lbase = J->L->base;
+ ins = *pc;
+ op = bc_op(ins);
+ ra = bc_a(ins);
+ ix.val = 0;
+ switch (bcmode_a(op)) {
+ case BCMvar:
+ copyTV(J->L, rav, &lbase[ra]); ix.val = ra = getslot(J, ra); break;
+ default: break; /* Handled later. */
+ }
+ rb = bc_b(ins);
+ rc = bc_c(ins);
+ switch (bcmode_b(op)) {
+ case BCMnone: rb = 0; rc = bc_d(ins); break; /* Upgrade rc to 'rd'. */
+ case BCMvar:
+ copyTV(J->L, rbv, &lbase[rb]); ix.tab = rb = getslot(J, rb); break;
+ case BCMnum: { lua_Number n = J->pt->k.n[rb];
+ setnumV(rbv, n); ix.tab = rb = lj_ir_knumint(J, n); } break;
+ default: break; /* Handled later. */
+ }
+ switch (bcmode_c(op)) {
+ case BCMvar:
+ copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break;
+ case BCMpri: setitype(rcv, (int32_t)~rc); rc = TREF_PRI(IRT_NIL+rc); break;
+ case BCMnum: { lua_Number n = J->pt->k.n[rc];
+ setnumV(rcv, n); ix.key = rc = lj_ir_knumint(J, n); } break;
+ case BCMstr: { GCstr *s = strref(J->pt->k.gc[~rc]);
+ setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break;
+ default: break; /* Handled later. */
+ }
+
+ switch (op) {
+
+ /* -- Comparison ops ---------------------------------------------------- */
+
+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+ /* Emit nothing for two numeric or string consts. */
+ if (!(tref_isk2(ra,rc) && tref_isnumber_str(ra) && tref_isnumber_str(rc))) {
+ IRType ta = tref_type(ra);
+ IRType tc = tref_type(rc);
+ int irop;
+ if (ta != tc) {
+ /* Widen mixed number/int comparisons to number/number comparison. */
+ if (ta == IRT_INT && tc == IRT_NUM) {
+ ra = emitir(IRTN(IR_TONUM), ra, 0);
+ ta = IRT_NUM;
+ } else if (ta == IRT_NUM && tc == IRT_INT) {
+ rc = emitir(IRTN(IR_TONUM), rc, 0);
+ } else if (!((ta == IRT_FALSE || ta == IRT_TRUE) &&
+ (tc == IRT_FALSE || tc == IRT_TRUE))) {
+ break; /* Interpreter will throw for two different types. */
+ }
+ }
+ lj_snap_add(J);
+ irop = (int)op - (int)BC_ISLT + (int)IR_LT;
+ if (ta == IRT_NUM) {
+ if ((irop & 1)) irop ^= 4; /* ISGE/ISGT are unordered. */
+ if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 5;
+ } else if (ta == IRT_INT) {
+ if (!lj_ir_numcmp(numV(rav), numV(rcv), (IROp)irop)) irop ^= 1;
+ } else if (ta == IRT_STR) {
+ if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1;
+ } else {
+ rec_mm_comp(J, &ix, (int)op);
+ break;
+ }
+ emitir(IRTG(irop, ta), ra, rc);
+ optstate_comp(J, ((int)op ^ irop) & 1);
+ }
+ break;
+
+ case BC_ISEQV: case BC_ISNEV:
+ case BC_ISEQS: case BC_ISNES:
+ case BC_ISEQN: case BC_ISNEN:
+ case BC_ISEQP: case BC_ISNEP:
+ /* Emit nothing for two non-table, non-udata consts. */
+ if (!(tref_isk2(ra, rc) && !(tref_istab(ra) || tref_isudata(ra)))) {
+ int diff;
+ lj_snap_add(J);
+ diff = rec_objcmp(J, ra, rc, rav, rcv);
+ if (diff == 1 && (tref_istab(ra) || tref_isudata(ra))) {
+ /* Only check __eq if different, but the same type (table or udata). */
+ rec_mm_equal(J, &ix, (int)op);
+ break;
+ }
+ optstate_comp(J, ((int)op & 1) == !diff);
+ }
+ break;
+
+ /* -- Unary test and copy ops ------------------------------------------- */
+
+ case BC_ISTC: case BC_ISFC:
+ if ((op & 1) == tref_istruecond(rc))
+ rc = 0; /* Don't store if condition is not true. */
+ /* fallthrough */
+ case BC_IST: case BC_ISF: /* Type specialization suffices. */
+ if (bc_a(pc[1]) < J->maxslot)
+ J->maxslot = bc_a(pc[1]); /* Shrink used slots. */
+ break;
+
+ /* -- Unary ops --------------------------------------------------------- */
+
+ case BC_NOT:
+ /* Type specialization already forces const result. */
+ rc = tref_istruecond(rc) ? TREF_FALSE : TREF_TRUE;
+ break;
+
+ case BC_LEN:
+ if (tref_isstr(rc)) {
+ rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN);
+ } else if (tref_istab(rc)) {
+ rc = emitir(IRTI(IR_TLEN), rc, 0);
+ } else {
+ ix.tab = rc;
+ copyTV(J->L, &ix.tabv, &ix.keyv);
+ ix.key = IRT_NIL;
+ setnilV(&ix.keyv);
+ rc = rec_mm_arith(J, &ix, MM_len);
+ }
+ break;
+
+ /* -- Arithmetic ops ---------------------------------------------------- */
+
+ case BC_UNM:
+ if (tref_isnumber_str(rc)) {
+ rc = lj_ir_tonum(J, rc);
+ rc = emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J));
+ } else {
+ ix.tab = rc;
+ copyTV(J->L, &ix.tabv, &ix.keyv);
+ rc = rec_mm_arith(J, &ix, MM_unm);
+ }
+ break;
+
+ case BC_ADDNV: case BC_SUBNV: case BC_MULNV: case BC_DIVNV: case BC_MODNV:
+ ix.tab = rc; ix.key = rc = rb; rb = ix.tab;
+ copyTV(J->L, &ix.valv, &ix.tabv);
+ copyTV(J->L, &ix.tabv, &ix.keyv);
+ copyTV(J->L, &ix.keyv, &ix.valv);
+ if (op == BC_MODNV)
+ goto recmod;
+ /* fallthrough */
+ case BC_ADDVN: case BC_SUBVN: case BC_MULVN: case BC_DIVVN:
+ case BC_ADDVV: case BC_SUBVV: case BC_MULVV: case BC_DIVVV: {
+ MMS mm = bcmode_mm(op);
+ if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) {
+ rb = lj_ir_tonum(J, rb);
+ rc = lj_ir_tonum(J, rc);
+ rc = emitir(IRTN((int)mm - (int)MM_add + (int)IR_ADD), rb, rc);
+ } else {
+ rc = rec_mm_arith(J, &ix, mm);
+ }
+ break;
+ }
+
+ case BC_MODVN: case BC_MODVV:
+ recmod:
+ if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
+ rc = lj_opt_narrow_mod(J, rb, rc);
+ else
+ rc = rec_mm_arith(J, &ix, MM_mod);
+ break;
+
+ case BC_POW:
+ if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
+ rc = lj_opt_narrow_pow(J, lj_ir_tonum(J, rb), rc, rcv);
+ else
+ rc = rec_mm_arith(J, &ix, MM_pow);
+ break;
+
+ /* -- Constant and move ops --------------------------------------------- */
+
+ case BC_KSTR: case BC_KNUM: case BC_KPRI: case BC_MOV:
+ break;
+ case BC_KSHORT:
+ rc = lj_ir_kint(J, (int32_t)(int16_t)rc);
+ break;
+ case BC_KNIL:
+ while (ra <= rc)
+ J->base[ra++] = TREF_NIL;
+ if (rc >= J->maxslot) J->maxslot = rc+1;
+ break;
+
+ /* -- Upvalue and function ops ------------------------------------------ */
+
+ case BC_UGET:
+ rc = rec_upvalue(J, rc, 0);
+ break;
+ case BC_USETV: case BC_USETS: case BC_USETN: case BC_USETP:
+ rec_upvalue(J, ra, rc);
+ break;
+
+ /* -- Table ops --------------------------------------------------------- */
+
+ case BC_GGET: case BC_GSET:
+ settabV(J->L, &ix.tabv, tabref(J->fn->l.env));
+ ix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), getcurrf(J), IRFL_FUNC_ENV);
+ ix.idxchain = LJ_MAX_IDXCHAIN;
+ rc = rec_idx(J, &ix);
+ break;
+
+ case BC_TGETB: case BC_TSETB:
+ setintV(&ix.keyv, (int32_t)rc);
+ ix.key = lj_ir_kint(J, (int32_t)rc);
+ /* fallthrough */
+ case BC_TGETV: case BC_TGETS: case BC_TSETV: case BC_TSETS:
+ ix.idxchain = LJ_MAX_IDXCHAIN;
+ rc = rec_idx(J, &ix);
+ break;
+
+ case BC_TNEW:
+ rc = rec_tnew(J, rc);
+ break;
+ case BC_TDUP:
+ rc = emitir(IRT(IR_TDUP, IRT_TAB),
+ lj_ir_ktab(J, tabref(J->pt->k.gc[~rc])), 0);
+ break;
+
+ /* -- Calls and vararg handling ----------------------------------------- */
+
+ case BC_ITERC:
+ J->base[ra] = getslot(J, ra-3);
+ J->base[ra+1] = getslot(J, ra-2);
+ J->base[ra+2] = getslot(J, ra-1);
+ { /* Have to do the actual copy now because rec_call needs the values. */
+ TValue *b = &J->L->base[ra];
+ copyTV(J->L, b, b-3);
+ copyTV(J->L, b+1, b-2);
+ copyTV(J->L, b+2, b-1);
+ }
+ goto callop;
+
+ case BC_CALLMT:
+ rb = (TRef)(CALLRES_TAILCALL+1);
+ /* fallthrough */
+ case BC_CALLM:
+ /* L->top is set to L->base+ra+rc+NRESULTS-1+1, see lj_dispatch_ins(). */
+ rc = (BCReg)(J->L->top - J->L->base) - ra;
+ goto callop;
+
+ case BC_CALLT:
+ rb = (TRef)(CALLRES_TAILCALL+1);
+ /* fallthrough */
+ case BC_CALL:
+ callop:
+ if (rb == (TRef)(CALLRES_TAILCALL+1)) { /* Tail call. */
+ }
+ rec_call(J, ra, (int)(rb-1), (int)(rc-1));
+ break;
+
+ /* -- Returns ----------------------------------------------------------- */
+
+ case BC_RETM:
+ /* L->top is set to L->base+ra+rc+NRESULTS-1, see lj_dispatch_ins(). */
+ rc = (BCReg)(J->L->top - J->L->base) - ra + 1;
+ /* fallthrough */
+ case BC_RET: case BC_RET0: case BC_RET1:
+ rec_ret(J, ra, (int)(rc-1));
+ break;
+
+ /* -- Loops and branches ------------------------------------------------ */
+
+ case BC_FORI:
+ if (rec_for(J, pc, 0) != LOOPEV_LEAVE)
+ J->loopref = J->cur.nins;
+ break;
+ case BC_JFORI:
+ lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL);
+ if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */
+ rec_stop(J, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J]));
+ /* Continue tracing if the loop is not entered. */
+ break;
+
+ case BC_FORL:
+ rec_loop_interp(J, pc, rec_for(J, pc+((ptrdiff_t)rc-BCBIAS_J), 1));
+ break;
+ case BC_ITERL:
+ rec_loop_interp(J, pc, rec_iterl(J, *pc));
+ break;
+ case BC_LOOP:
+ rec_loop_interp(J, pc, rec_loop(J, ra));
+ break;
+
+ case BC_JFORL:
+ rec_loop_jit(J, rc, rec_for(J, pc+bc_j(J->trace[rc]->startins), 1));
+ break;
+ case BC_JITERL:
+ rec_loop_jit(J, rc, rec_iterl(J, J->trace[rc]->startins));
+ break;
+ case BC_JLOOP:
+ rec_loop_jit(J, rc, rec_loop(J, ra));
+ break;
+
+ case BC_IFORL:
+ case BC_IITERL:
+ case BC_ILOOP:
+ lj_trace_err_info(J, LJ_TRERR_LBLACKL);
+ break;
+
+ case BC_JMP:
+ if (ra < J->maxslot)
+ J->maxslot = ra; /* Shrink used slots. */
+ break;
+
+ case BC_CAT:
+ case BC_UCLO:
+ case BC_FNEW:
+ case BC_TSETM:
+ case BC_VARG:
+ default:
+ setintV(&J->errinfo, (int32_t)op);
+ lj_trace_err_info(J, LJ_TRERR_NYIBC);
+ break;
+ }
+
+ /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */
+ if (bcmode_a(op) == BCMdst && rc) {
+ J->base[ra] = rc;
+ if (ra >= J->maxslot) J->maxslot = ra+1;
+ }
+
+#undef rav
+#undef rbv
+#undef rcv
+
+ /* Limit the number of recorded IR instructions. */
+ if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord])
+ lj_trace_err(J, LJ_TRERR_TRACEOV);
+}
+
+/* -- Recording setup ----------------------------------------------------- */
+
+/* Setup recording for a FORL loop. */
+static void rec_setup_forl(jit_State *J, const BCIns *fori)
+{
+ BCReg ra = bc_a(*fori);
+ cTValue *forbase = &J->L->base[ra];
+ IRType t = (J->flags & JIT_F_OPT_NARROW) ? lj_opt_narrow_forl(forbase)
+ : IRT_NUM;
+ TRef stop = fori_arg(J, fori-2, ra+FORL_STOP, t);
+ TRef step = fori_arg(J, fori-1, ra+FORL_STEP, t);
+ int dir = (0 <= numV(&forbase[FORL_STEP]));
+ lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI);
+ if (!tref_isk(step)) {
+ /* Non-constant step: need a guard for the direction. */
+ TRef zero = (t == IRT_INT) ? lj_ir_kint(J, 0) : lj_ir_knum_zero(J);
+ emitir(IRTG(dir ? IR_GE : IR_LT, t), step, zero);
+ /* Add hoistable overflow checks for a narrowed FORL index. */
+ if (t == IRT_INT) {
+ if (tref_isk(stop)) {
+ /* Constant stop: optimize check away or to a range check for step. */
+ int32_t k = IR(tref_ref(stop))->i;
+ if (dir) {
+ if (k > 0)
+ emitir(IRTGI(IR_LE), step, lj_ir_kint(J, (int32_t)0x7fffffff-k));
+ } else {
+ if (k < 0)
+ emitir(IRTGI(IR_GE), step, lj_ir_kint(J, (int32_t)0x80000000-k));
+ }
+ } else {
+ /* Stop+step variable: need full overflow check (with dead result). */
+ emitir(IRTGI(IR_ADDOV), step, stop);
+ }
+ }
+ } else if (t == IRT_INT && !tref_isk(stop)) {
+ /* Constant step: optimize overflow check to a range check for stop. */
+ int32_t k = IR(tref_ref(step))->i;
+ k = (int32_t)(dir ? 0x7fffffff : 0x80000000) - k;
+ emitir(IRTGI(dir ? IR_LE : IR_GE), stop, lj_ir_kint(J, k));
+ }
+ J->base[ra+FORL_EXT] = sloadt(J, (int32_t)(ra+FORL_IDX), t, IRSLOAD_INHERIT);
+ J->maxslot = ra+FORL_EXT+1;
+}
+
+/* Setup recording for a root trace started by a hot loop. */
+static const BCIns *rec_setup_root(jit_State *J)
+{
+ /* Determine the next PC and the bytecode range for the loop. */
+ const BCIns *pcj, *pc = J->pc;
+ BCIns ins = *pc;
+ BCReg ra = bc_a(ins);
+ switch (bc_op(ins)) {
+ case BC_FORL:
+ J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns);
+ pc += 1+bc_j(ins);
+ J->bc_min = pc;
+ break;
+ case BC_ITERL:
+ lua_assert(bc_op(pc[-1]) == BC_ITERC);
+ J->maxslot = ra + bc_b(pc[-1]) - 1;
+ J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns);
+ pc += 1+bc_j(ins);
+ lua_assert(bc_op(pc[-1]) == BC_JMP);
+ J->bc_min = pc;
+ break;
+ case BC_LOOP:
+ /* Only check BC range for real loops, but not for "repeat until true". */
+ pcj = pc + bc_j(ins);
+ ins = *pcj;
+ if (bc_op(ins) == BC_JMP && bc_j(ins) < 0) {
+ J->bc_min = pcj+1 + bc_j(ins);
+ J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns);
+ }
+ J->maxslot = ra;
+ pc++;
+ break;
+ default:
+ lua_assert(0);
+ break;
+ }
+ return pc;
+}
+
+/* Setup recording for a side trace. */
+static void rec_setup_side(jit_State *J, Trace *T)
+{
+ SnapShot *snap = &T->snap[J->exitno];
+ IRRef2 *map = &T->snapmap[snap->mapofs];
+ BCReg s, nslots = snap->nslots;
+ BloomFilter seen = 0;
+ for (s = 0; s < nslots; s++) {
+ IRRef ref = snap_ref(map[s]);
+ if (ref) {
+ IRIns *ir = &T->ir[ref];
+ TRef tr = 0;
+ /* The bloom filter avoids O(nslots^2) overhead for de-duping slots. */
+ if (bloomtest(seen, ref)) {
+ BCReg j;
+ for (j = 0; j < s; j++)
+ if (snap_ref(map[j]) == ref) {
+ if (ir->o == IR_FRAME && irt_isfunc(ir->t))
+ J->baseslot = s+1;
+ tr = J->slot[j];
+ goto dupslot;
+ }
+ }
+ bloomset(seen, ref);
+ switch ((IROp)ir->o) {
+ case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break;
+ case IR_KINT: tr = lj_ir_kint(J, ir->i); break;
+ case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break;
+ case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break;
+ case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */
+ if (irt_isfunc(ir->t)) {
+ J->baseslot = s+1;
+ J->framedepth++;
+ tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2]));
+ tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr);
+ } else {
+ tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void));
+ tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr);
+ }
+ break;
+ case IR_SLOAD: /* Inherited SLOADs don't need a guard. */
+ tr = emitir_raw(ir->ot & ~IRT_GUARD, s,
+ (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT);
+ break;
+ default: /* Parent refs are already typed and don't need a guard. */
+ tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s,
+ IRSLOAD_INHERIT|IRSLOAD_PARENT);
+ break;
+ }
+ dupslot:
+ J->slot[s] = tr;
+ }
+ }
+ J->base = J->slot + J->baseslot;
+ J->maxslot = nslots - J->baseslot;
+ lj_snap_add(J);
+}
+
+/* Setup for recording a new trace. */
+void lj_record_setup(jit_State *J)
+{
+ uint32_t i;
+
+ /* Initialize state related to current trace. */
+ memset(J->slot, 0, sizeof(J->slot));
+ memset(J->chain, 0, sizeof(J->chain));
+ memset(J->bpropcache, 0, sizeof(J->bpropcache));
+
+ J->baseslot = 1; /* Invoking function is at base[-1]. */
+ J->base = J->slot + J->baseslot;
+ J->maxslot = 0;
+ J->framedepth = 0;
+
+ J->instunroll = J->param[JIT_P_instunroll];
+ J->loopunroll = J->param[JIT_P_loopunroll];
+ J->tailcalled = 0;
+ J->loopref = 0;
+
+ J->bc_min = NULL; /* Means no limit. */
+ J->bc_extent = ~(MSize)0;
+
+ /* Emit instructions for fixed references. Also triggers initial IR alloc. */
+ emitir_raw(IRT(IR_BASE, IRT_PTR), J->parent, J->exitno);
+ for (i = 0; i <= 2; i++) {
+ IRIns *ir = IR(REF_NIL-i);
+ ir->i = 0;
+ ir->t.irt = (uint8_t)(IRT_NIL+i);
+ ir->o = IR_KPRI;
+ ir->prev = 0;
+ }
+ J->cur.nk = REF_TRUE;
+
+ setgcref(J->cur.startpt, obj2gco(J->pt));
+ J->startpc = J->pc;
+ if (J->parent) { /* Side trace. */
+ Trace *T = J->trace[J->parent];
+ TraceNo root = T->root ? T->root : J->parent;
+ J->cur.root = (uint16_t)root;
+ J->cur.startins = BCINS_AD(BC_JMP, 0, 0);
+ /* Check whether we could at least potentially form an extra loop. */
+ if (J->exitno == 0 && T->snap[0].nslots == 1 && T->snapmap[0] == 0) {
+ /* We can narrow a FORL for some side traces, too. */
+ if (J->pc > J->pt->bc && bc_op(J->pc[-1]) == BC_JFORI &&
+ bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) {
+ lj_snap_add(J);
+ rec_setup_forl(J, J->pc-1);
+ goto sidecheck;
+ }
+ } else {
+ J->startpc = NULL; /* Prevent forming an extra loop. */
+ }
+ rec_setup_side(J, T);
+ sidecheck:
+ if (J->trace[J->cur.root]->nchild >= J->param[JIT_P_maxside] ||
+ T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
+ J->param[JIT_P_tryside])
+ rec_stop(J, TRACE_INTERP);
+ } else { /* Root trace. */
+ J->cur.root = 0;
+ if (J->pc >= J->pt->bc) { /* Not a hot CALL? */
+ J->cur.startins = *J->pc;
+ J->pc = rec_setup_root(J);
+ /* Note: the loop instruction itself is recorded at the end and not
+ ** at the start! So snapshot #0 needs to point to the *next* instruction.
+ */
+ } else {
+ J->cur.startins = BCINS_ABC(BC_CALL, 0, 0, 0);
+ }
+ lj_snap_add(J);
+ if (bc_op(J->cur.startins) == BC_FORL)
+ rec_setup_forl(J, J->pc-1);
+ if (1 + J->pt->framesize >= LJ_MAX_JSLOTS)
+ lj_trace_err(J, LJ_TRERR_STACKOV);
+ }
+}
+
+#undef IR
+#undef emitir_raw
+#undef emitir
+
+#endif
diff --git a/src/lj_record.h b/src/lj_record.h
new file mode 100644
index 00000000..7bb7952c
--- /dev/null
+++ b/src/lj_record.h
@@ -0,0 +1,17 @@
+/*
+** Trace recorder (bytecode -> SSA IR).
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_RECORD_H
+#define _LJ_RECORD_H
+
+#include "lj_obj.h"
+#include "lj_jit.h"
+
+#if LJ_HASJIT
+LJ_FUNC void lj_record_ins(jit_State *J);
+LJ_FUNC void lj_record_setup(jit_State *J);
+#endif
+
+#endif
diff --git a/src/lj_snap.c b/src/lj_snap.c
new file mode 100644
index 00000000..09cd095c
--- /dev/null
+++ b/src/lj_snap.c
@@ -0,0 +1,286 @@
+/*
+** Snapshot handling.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_snap_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+#if LJ_HASJIT
+
+#include "lj_gc.h"
+#include "lj_state.h"
+#include "lj_frame.h"
+#include "lj_ir.h"
+#include "lj_jit.h"
+#include "lj_iropt.h"
+#include "lj_trace.h"
+#include "lj_snap.h"
+#include "lj_target.h"
+
+/* Some local macros to save typing. Undef'd at the end. */
+#define IR(ref) (&J->cur.ir[(ref)])
+
+/* -- Snapshot generation ------------------------------------------------- */
+
+/* NYI: Snapshots are in need of a redesign. The current storage model for
+** snapshot maps is too wasteful. They could be compressed (1D or 2D) and
+** made more flexible at the same time. Iterators should no longer need to
+** skip unmodified slots. IR_FRAME should be eliminated, too.
+*/
+
+/* Add all modified slots to the snapshot. */
+static void snapshot_slots(jit_State *J, IRRef2 *map, BCReg nslots)
+{
+ BCReg s;
+ for (s = 0; s < nslots; s++) {
+ IRRef ref = tref_ref(J->slot[s]);
+ if (ref) {
+ IRIns *ir = IR(ref);
+ if (ir->o == IR_SLOAD && ir->op1 == s && !(ir->op2 & IRSLOAD_INHERIT))
+ ref = 0;
+ }
+ map[s] = (IRRef2)ref;
+ }
+}
+
+/* Add frame links at the end of the snapshot. */
+static MSize snapshot_framelinks(jit_State *J, IRRef2 *map)
+{
+ cTValue *frame = J->L->base - 1;
+ cTValue *lim = J->L->base - J->baseslot;
+ MSize f = 0;
+ map[f++] = u32ptr(J->pc);
+ while (frame > lim) {
+ if (frame_islua(frame)) {
+ map[f++] = u32ptr(frame_pc(frame));
+ frame = frame_prevl(frame);
+ } else if (frame_ispcall(frame)) {
+ map[f++] = (uint32_t)frame_ftsz(frame);
+ frame = frame_prevd(frame);
+ } else if (frame_iscont(frame)) {
+ map[f++] = (uint32_t)frame_ftsz(frame);
+ map[f++] = u32ptr(frame_contpc(frame));
+ frame = frame_prevd(frame);
+ } else {
+ lua_assert(0);
+ }
+ }
+ return f;
+}
+
+/* Take a snapshot of the current stack. */
+static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
+{
+ BCReg nslots = J->baseslot + J->maxslot;
+ MSize nsm, nframelinks;
+ IRRef2 *p;
+ /* Conservative estimate. Continuation frames need 2 slots. */
+ nsm = nsnapmap + nslots + (uint32_t)J->framedepth*2+1;
+ if (LJ_UNLIKELY(nsm > J->sizesnapmap)) { /* Need to grow snapshot map? */
+ if (nsm < 2*J->sizesnapmap)
+ nsm = 2*J->sizesnapmap;
+ else if (nsm < 64)
+ nsm = 64;
+ J->snapmapbuf = (IRRef2 *)lj_mem_realloc(J->L, J->snapmapbuf,
+ J->sizesnapmap*sizeof(IRRef2), nsm*sizeof(IRRef2));
+ J->cur.snapmap = J->snapmapbuf;
+ J->sizesnapmap = nsm;
+ }
+ p = &J->cur.snapmap[nsnapmap];
+ snapshot_slots(J, p, nslots);
+ nframelinks = snapshot_framelinks(J, p + nslots);
+ J->cur.nsnapmap = (uint16_t)(nsnapmap + nslots + nframelinks);
+ snap->mapofs = (uint16_t)nsnapmap;
+ snap->ref = (IRRef1)J->cur.nins;
+ snap->nslots = (uint8_t)nslots;
+ snap->nframelinks = (uint8_t)nframelinks;
+ snap->count = 0;
+}
+
+/* Add or merge a snapshot. */
+void lj_snap_add(jit_State *J)
+{
+ MSize nsnap = J->cur.nsnap;
+ MSize nsnapmap = J->cur.nsnapmap;
+ /* Merge if no ins. inbetween or if requested and no guard inbetween. */
+ if (J->mergesnap ? !irt_isguard(J->guardemit) :
+ (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) {
+ nsnapmap = J->cur.snap[--nsnap].mapofs;
+ } else {
+ /* Need to grow snapshot buffer? */
+ if (LJ_UNLIKELY(nsnap >= J->sizesnap)) {
+ MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
+ if (nsnap >= maxsnap)
+ lj_trace_err(J, LJ_TRERR_SNAPOV);
+ lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
+ J->cur.snap = J->snapbuf;
+ }
+ J->cur.nsnap = (uint16_t)(nsnap+1);
+ }
+ J->mergesnap = 0;
+ J->guardemit.irt = 0;
+ snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap);
+}
+
+/* Shrink last snapshot. */
+void lj_snap_shrink(jit_State *J)
+{
+ BCReg nslots = J->baseslot + J->maxslot;
+ SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
+ IRRef2 *oflinks = &J->cur.snapmap[snap->mapofs + snap->nslots];
+ IRRef2 *nflinks = &J->cur.snapmap[snap->mapofs + nslots];
+ uint32_t s, nframelinks = snap->nframelinks;
+ lua_assert(nslots < snap->nslots);
+ snap->nslots = (uint8_t)nslots;
+ J->cur.nsnapmap = (uint16_t)(snap->mapofs + nslots + nframelinks);
+ for (s = 0; s < nframelinks; s++) /* Move frame links down. */
+ nflinks[s] = oflinks[s];
+}
+
+/* -- Snapshot access ----------------------------------------------------- */
+
+/* Initialize a Bloom Filter with all renamed refs.
+** There are very few renames (often none), so the filter has
+** very few bits set. This makes it suitable for negative filtering.
+*/
+static BloomFilter snap_renamefilter(Trace *T, SnapNo lim)
+{
+ BloomFilter rfilt = 0;
+ IRIns *ir;
+ for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
+ if (ir->op2 <= lim)
+ bloomset(rfilt, ir->op1);
+ return rfilt;
+}
+
+/* Process matching renames to find the original RegSP. */
+static RegSP snap_renameref(Trace *T, SnapNo lim, IRRef ref, RegSP rs)
+{
+ IRIns *ir;
+ for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
+ if (ir->op1 == ref && ir->op2 <= lim)
+ rs = ir->prev;
+ return rs;
+}
+
+/* Convert a snapshot into a linear slot -> RegSP map. */
+void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno)
+{
+ SnapShot *snap = &T->snap[snapno];
+ BCReg s, nslots = snap->nslots;
+ IRRef2 *map = &T->snapmap[snap->mapofs];
+ BloomFilter rfilt = snap_renamefilter(T, snapno);
+ for (s = 0; s < nslots; s++) {
+ IRRef ref = snap_ref(map[s]);
+ if (!irref_isk(ref)) {
+ IRIns *ir = &T->ir[ref];
+ uint32_t rs = ir->prev;
+ if (bloomtest(rfilt, ref))
+ rs = snap_renameref(T, snapno, ref, rs);
+ rsmap[s] = (uint16_t)rs;
+ }
+ }
+}
+
+/* Restore interpreter state from exit state with the help of a snapshot. */
+void lj_snap_restore(jit_State *J, void *exptr)
+{
+ ExitState *ex = (ExitState *)exptr;
+ SnapNo snapno = J->exitno; /* For now, snapno == exitno. */
+ Trace *T = J->trace[J->parent];
+ SnapShot *snap = &T->snap[snapno];
+ BCReg s, nslots = snap->nslots;
+ IRRef2 *map = &T->snapmap[snap->mapofs];
+ IRRef2 *flinks = map + nslots + snap->nframelinks;
+ TValue *o, *newbase, *ntop;
+ BloomFilter rfilt = snap_renamefilter(T, snapno);
+ lua_State *L = J->L;
+
+ /* Make sure the stack is big enough for the slots from the snapshot. */
+ if (L->base + nslots >= L->maxstack) {
+ L->top = curr_topL(L);
+ lj_state_growstack(L, nslots - curr_proto(L)->framesize);
+ }
+
+ /* Fill stack slots with data from the registers and spill slots. */
+ newbase = NULL;
+ ntop = L->base;
+ for (s = 0, o = L->base-1; s < nslots; s++, o++) {
+ IRRef ref = snap_ref(map[s]);
+ if (ref) {
+ IRIns *ir = &T->ir[ref];
+ if (irref_isk(ref)) { /* Restore constant slot. */
+ lj_ir_kvalue(L, o, ir);
+ } else {
+ IRType1 t = ir->t;
+ RegSP rs = ir->prev;
+ if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
+ rs = snap_renameref(T, snapno, ref, rs);
+ if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
+ int32_t *sps = &ex->spill[regsp_spill(rs)];
+ if (irt_isinteger(t)) {
+ setintV(o, *sps);
+ } else if (irt_isnum(t)) {
+ o->u64 = *(uint64_t *)sps;
+ } else {
+ lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
+ setgcrefi(o->gcr, *sps);
+ setitype(o, irt_toitype(t));
+ }
+ } else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */
+ Reg r = regsp_reg(rs);
+ if (irt_isinteger(t)) {
+ setintV(o, ex->gpr[r-RID_MIN_GPR]);
+ } else if (irt_isnum(t)) {
+ setnumV(o, ex->fpr[r-RID_MIN_FPR]);
+ } else {
+ if (!irt_ispri(t))
+ setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
+ setitype(o, irt_toitype(t));
+ }
+ } else { /* Restore frame slot. */
+ lua_assert(ir->o == IR_FRAME);
+ /* This works for both PTR and FUNC IR_FRAME. */
+ setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void));
+ if (s != 0) /* Do not overwrite link to previous frame. */
+ o->fr.tp.ftsz = (int32_t)*--flinks;
+ if (irt_isfunc(ir->t)) {
+ GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr));
+ if (isluafunc(fn)) {
+ TValue *fs;
+ newbase = o+1;
+ fs = newbase + funcproto(fn)->framesize;
+ if (fs > ntop) ntop = fs; /* Update top for newly added frames. */
+ }
+ }
+ }
+ }
+ } else if (newbase) {
+ setnilV(o); /* Clear unreferenced slots of newly added frames. */
+ }
+ }
+ if (newbase) { /* Clear remainder of newly added frames. */
+ L->base = newbase;
+ if (ntop >= L->maxstack) { /* Need to grow the stack again. */
+ MSize need = (MSize)(ntop - o);
+ L->top = o;
+ lj_state_growstack(L, need);
+ o = L->top;
+ ntop = o + need;
+ }
+ L->top = curr_topL(L);
+ for (; o < ntop; o++)
+ setnilV(o);
+ } else { /* Must not clear slots of existing frame. */
+ L->top = curr_topL(L);
+ }
+ lua_assert(map + nslots == flinks-1);
+ J->pc = (const BCIns *)(uintptr_t)(*--flinks);
+}
+
+#undef IR
+
+#endif
diff --git a/src/lj_snap.h b/src/lj_snap.h
new file mode 100644
index 00000000..806047b1
--- /dev/null
+++ b/src/lj_snap.h
@@ -0,0 +1,19 @@
+/*
+** Snapshot handling.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_SNAP_H
+#define _LJ_SNAP_H
+
+#include "lj_obj.h"
+#include "lj_jit.h"
+
+#if LJ_HASJIT
+LJ_FUNC void lj_snap_add(jit_State *J);
+LJ_FUNC void lj_snap_shrink(jit_State *J);
+LJ_FUNC void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno);
+LJ_FUNC void lj_snap_restore(jit_State *J, void *exptr);
+#endif
+
+#endif
diff --git a/src/lj_state.c b/src/lj_state.c
new file mode 100644
index 00000000..b4bc7a0c
--- /dev/null
+++ b/src/lj_state.c
@@ -0,0 +1,255 @@
+/*
+** State and stack handling.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#define lj_state_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_func.h"
+#include "lj_meta.h"
+#include "lj_state.h"
+#include "lj_frame.h"
+#include "lj_trace.h"
+#include "lj_dispatch.h"
+#include "lj_vm.h"
+#include "lj_lex.h"
+#include "lj_alloc.h"
+
+/* -- Stack handling ------------------------------------------------------ */
+
+/* Stack sizes. */
+#define LJ_STACK_MIN LUA_MINSTACK /* Min. stack size. */
+#define LJ_STACK_MAX LUAI_MAXSTACK /* Max. stack size. */
+#define LJ_STACK_START (2*LJ_STACK_MIN) /* Starting stack size. */
+#define LJ_STACK_MAXEX (LJ_STACK_MAX + 1 + LJ_STACK_EXTRA)
+
+/* Explanation of LJ_STACK_EXTRA:
+**
+** Calls to metamethods store their arguments beyond the current top
+** without checking for the stack limit. This avoids stack resizes which
+** would invalidate passed TValue pointers. The stack check is performed
+** later by the call gate. This can safely resize the stack or raise an
+** error. Thus we need some extra slots beyond the current stack limit.
+**
+** Most metamethods need 4 slots above top (cont, mobj, arg1, arg2) plus
+** one extra slot if mobj is not a function. Only lj_meta_tset needs 5
+** slots above top, but then mobj is always a function. So we can get by
+** with 5 extra slots.
+*/
+
+/* Resize stack slots and adjust pointers in state. */
+static void resizestack(lua_State *L, MSize n)
+{
+ TValue *oldst = L->stack;
+ ptrdiff_t delta;
+ MSize realsize = n + 1 + LJ_STACK_EXTRA;
+ GCobj *up;
+ lua_assert((MSize)(L->maxstack-L->stack) == L->stacksize-LJ_STACK_EXTRA-1);
+ lj_mem_reallocvec(L, L->stack, L->stacksize, realsize, TValue);
+ delta = (char *)L->stack - (char *)oldst;
+ L->maxstack = L->stack + n;
+ L->stacksize = realsize;
+ L->base = (TValue *)((char *)L->base + delta);
+ L->top = (TValue *)((char *)L->top + delta);
+ for (up = gcref(L->openupval); up != NULL; up = gcnext(up))
+ gco2uv(up)->v = (TValue *)((char *)gco2uv(up)->v + delta);
+ if (obj2gco(L) == gcref(G(L)->jit_L))
+ setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta);
+}
+
+/* Relimit stack after error, in case the limit was overdrawn. */
+void lj_state_relimitstack(lua_State *L)
+{
+ if (L->stacksize > LJ_STACK_MAXEX && L->top - L->stack < LJ_STACK_MAX-1)
+ resizestack(L, LJ_STACK_MAX);
+}
+
+/* Try to shrink the stack (called from GC). */
+void lj_state_shrinkstack(lua_State *L, MSize used)
+{
+ if (L->stacksize > LJ_STACK_MAXEX)
+ return; /* Avoid stack shrinking while handling stack overflow. */
+ if (4*used < L->stacksize &&
+ 2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize &&
+ obj2gco(L) != gcref(G(L)->jit_L)) /* Don't shrink stack of live trace. */
+ resizestack(L, L->stacksize >> 1);
+}
+
+/* Try to grow stack. */
+void lj_state_growstack(lua_State *L, MSize need)
+{
+ if (L->stacksize > LJ_STACK_MAXEX) /* overflow while handling overflow? */
+ lj_err_throw(L, LUA_ERRERR);
+ resizestack(L, L->stacksize + (need > L->stacksize ? need : L->stacksize));
+ if (L->stacksize > LJ_STACK_MAXEX) {
+ if (curr_funcisL(L)) { /* Clear slots of incomplete Lua frame. */
+ TValue *top = curr_topL(L);
+ while (--top >= L->top) setnilV(top);
+ }
+ lj_err_msg(L, LJ_ERR_STKOV); /* ... to allow L->top = curr_topL(L). */
+ }
+}
+
+void lj_state_growstack1(lua_State *L)
+{
+ lj_state_growstack(L, 1);
+}
+
+/* Allocate basic stack for new state. */
+static void stack_init(lua_State *L1, lua_State *L)
+{
+ L1->stack = lj_mem_newvec(L, LJ_STACK_START + LJ_STACK_EXTRA, TValue);
+ L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA;
+ L1->top = L1->stack;
+ L1->maxstack = L1->stack+(L1->stacksize - LJ_STACK_EXTRA)-1;
+ setthreadV(L1, L1->top, L1); /* needed for curr_funcisL() on empty stack */
+ setnilV(L1->top); /* but clear its type */
+ L1->base = ++L1->top;
+}
+
+/* -- State handling ------------------------------------------------------ */
+
+/* Open parts that may cause memory-allocation errors. */
+static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud)
+{
+ global_State *g = G(L);
+ UNUSED(dummy);
+ UNUSED(ud);
+ stack_init(L, L);
+ /* NOBARRIER: State initialization, all objects are white. */
+ setgcref(L->env, obj2gco(lj_tab_new(L, 0, LJ_MIN_GLOBAL)));
+ settabV(L, registry(L), lj_tab_new(L, 0, LJ_MIN_REGISTRY));
+ lj_str_resize(L, LJ_MIN_STRTAB-1);
+ lj_meta_init(L);
+ lj_lex_init(L);
+ fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */
+ g->gc.threshold = 4*g->gc.total;
+ return NULL;
+}
+
+static void close_state(lua_State *L)
+{
+ global_State *g = G(L);
+#ifndef LUAJIT_USE_SYSMALLOC
+ if (g->allocf == lj_alloc_f) {
+ lj_alloc_destroy(g->allocd);
+ } else
+#endif
+ {
+ lj_func_closeuv(L, L->stack);
+ lj_gc_freeall(g);
+ lua_assert(gcref(g->gc.root) == obj2gco(L));
+ lua_assert(g->strnum == 0);
+ lj_trace_freestate(g);
+ lj_mem_freevec(g, g->strhash, g->strmask+1, GCstr *);
+ lj_str_freebuf(g, &g->tmpbuf);
+ lj_mem_freevec(g, L->stack, L->stacksize, TValue);
+ lua_assert(g->gc.total == sizeof(GG_State));
+ g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0);
+ }
+}
+
+LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
+{
+ GG_State *GG = cast(GG_State *, f(ud, NULL, 0, sizeof(GG_State)));
+ lua_State *L = &GG->L;
+ global_State *g = &GG->g;
+ if (GG == NULL) return NULL;
+ memset(GG, 0, sizeof(GG_State));
+ L->gct = ~LJ_TTHREAD;
+ L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */
+ L->dummy_ffid = FF_C;
+ setmref(L->glref, g);
+ g->gc.currentwhite = LJ_GC_WHITE0 | LJ_GC_FIXED;
+ g->allocf = f;
+ g->allocd = ud;
+ setgcref(g->mainthref, obj2gco(L));
+ setgcref(g->uvhead.prev, obj2gco(&g->uvhead));
+ setgcref(g->uvhead.next, obj2gco(&g->uvhead));
+ g->strmask = ~(MSize)0;
+ setnilV(registry(L));
+ setnilV(&g->nilnode.val);
+ setnilV(&g->nilnode.key);
+ lj_str_initbuf(L, &g->tmpbuf);
+ g->gc.state = GCSpause;
+ setgcref(g->gc.root, obj2gco(L));
+ g->gc.sweep = &g->gc.root;
+ g->gc.total = sizeof(GG_State);
+ g->gc.pause = LUAI_GCPAUSE;
+ g->gc.stepmul = LUAI_GCMUL;
+ lj_dispatch_init((GG_State *)L);
+ L->status = LUA_ERRERR+1; /* Avoid touching the stack upon memory error. */
+ if (lj_vm_cpcall(L, cpluaopen, NULL, NULL) != 0) {
+ /* Memory allocation error: free partial state. */
+ close_state(L);
+ return NULL;
+ }
+ L->status = 0;
+ return L;
+}
+
+static TValue *cpfinalize(lua_State *L, lua_CFunction dummy, void *ud)
+{
+ UNUSED(dummy);
+ UNUSED(ud);
+ lj_gc_finalizeudata(L);
+ /* Frame pop omitted. */
+ return NULL;
+}
+
+LUA_API void lua_close(lua_State *L)
+{
+ global_State *g = G(L);
+ L = mainthread(g); /* Only the main thread can be closed. */
+ lj_func_closeuv(L, L->stack);
+ lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */
+#if LJ_HASJIT
+ G2J(g)->flags &= ~JIT_F_ON;
+ G2J(g)->state = LJ_TRACE_IDLE;
+ lj_dispatch_update(g);
+#endif
+ do {
+ hook_enter(g);
+ L->status = 0;
+ L->cframe = NULL;
+ L->base = L->top = L->stack + 1;
+ } while (lj_vm_cpcall(L, cpfinalize, NULL, NULL) != 0);
+ close_state(L);
+}
+
+lua_State *lj_state_new(lua_State *L)
+{
+ lua_State *L1 = lj_mem_newobj(L, lua_State);
+ L1->gct = ~LJ_TTHREAD;
+ L1->dummy_ffid = FF_C;
+ L1->status = 0;
+ L1->stacksize = 0;
+ L1->stack = NULL;
+ L1->cframe = NULL;
+ /* NOBARRIER: The lua_State is new (marked white). */
+ setgcrefnull(L1->openupval);
+ setmrefr(L1->glref, L->glref);
+ setgcrefr(L1->env, L->env);
+ stack_init(L1, L); /* init stack */
+ lua_assert(iswhite(obj2gco(L1)));
+ return L1;
+}
+
+void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L)
+{
+ lua_assert(L != mainthread(g));
+ lj_func_closeuv(L, L->stack);
+ lua_assert(gcref(L->openupval) == NULL);
+ lj_mem_freevec(g, L->stack, L->stacksize, TValue);
+ lj_mem_freet(g, L);
+}
+
diff --git a/src/lj_state.h b/src/lj_state.h
new file mode 100644
index 00000000..54e85405
--- /dev/null
+++ b/src/lj_state.h
@@ -0,0 +1,31 @@
+/*
+** State and stack handling.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_STATE_H
+#define _LJ_STATE_H
+
+#include "lj_obj.h"
+
+#define incr_top(L) \
+ (++L->top >= L->maxstack && (lj_state_growstack1(L), 0))
+
+#define savestack(L, p) ((char *)(p) - (char *)L->stack)
+#define restorestack(L, n) ((TValue *)((char *)L->stack + (n)))
+
+LJ_FUNC void lj_state_relimitstack(lua_State *L);
+LJ_FUNC void lj_state_shrinkstack(lua_State *L, MSize used);
+LJ_FUNCA void lj_state_growstack(lua_State *L, MSize need);
+LJ_FUNCA void lj_state_growstack1(lua_State *L);
+
+static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need)
+{
+ if ((MSize)((char *)L->maxstack-(char *)L->top) <= need*(MSize)sizeof(TValue))
+ lj_state_growstack(L, need);
+}
+
+LJ_FUNC lua_State *lj_state_new(lua_State *L);
+LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L);
+
+#endif
diff --git a/src/lj_str.c b/src/lj_str.c
new file mode 100644
index 00000000..26f91cba
--- /dev/null
+++ b/src/lj_str.c
@@ -0,0 +1,301 @@
+/*
+** String handling.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#include <stdio.h>
+
+#define lj_str_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_state.h"
+#include "lj_ctype.h"
+
+/* -- String interning ---------------------------------------------------- */
+
+/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
+int32_t lj_str_cmp(GCstr *a, GCstr *b)
+{
+ MSize i, n = a->len > b->len ? b->len : a->len;
+ for (i = 0; i < n; i += 4) {
+ /* Note: innocuous access up to end of string + 3. */
+ uint32_t va = *(const uint32_t *)(strdata(a)+i);
+ uint32_t vb = *(const uint32_t *)(strdata(b)+i);
+ if (va != vb) {
+#if LJ_ARCH_ENDIAN == LUAJIT_LE
+ va = lj_bswap(va); vb = lj_bswap(vb);
+#endif
+ i -= n;
+ if ((int32_t)i >= -3) {
+ va >>= 32+(i<<3); vb >>= 32+(i<<3);
+ if (va == vb) break;
+ }
+ return (int32_t)(va - vb);
+ }
+ }
+ return (int32_t)(a->len - b->len);
+}
+
+/* Resize the string hash table (grow and shrink). */
+void lj_str_resize(lua_State *L, MSize newmask)
+{
+ global_State *g = G(L);
+ GCRef *newhash;
+ MSize i;
+ if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
+ return; /* No resizing during GC traversal or if already too big. */
+ newhash = lj_mem_newvec(L, newmask+1, GCRef);
+ memset(newhash, 0, (newmask+1)*sizeof(GCRef));
+ for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */
+ GCobj *p = gcref(g->strhash[i]);
+ while (p) { /* Follow each hash chain and reinsert all strings. */
+ MSize h = gco2str(p)->hash & newmask;
+ GCobj *next = gcnext(p);
+ /* NOBARRIER: The string table is a GC root. */
+ setgcrefr(p->gch.nextgc, newhash[h]);
+ setgcref(newhash[h], p);
+ p = next;
+ }
+ }
+ lj_mem_freevec(g, g->strhash, g->strmask+1, GCstr *);
+ g->strmask = newmask;
+ g->strhash = newhash;
+}
+
+/* Intern a string and return string object. */
+GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
+{
+ global_State *g;
+ GCstr *s;
+ GCobj *o;
+ MSize len = (MSize)lenx;
+ MSize h = len;
+ MSize step = (len>>5)+1; /* Partial hash. */
+ MSize l1;
+ if (lenx >= LJ_MAX_STR)
+ lj_err_msg(L, LJ_ERR_STROV);
+ for (l1 = len; l1 >= step; l1 -= step) /* Compute string hash. */
+ h = h ^ ((h<<5)+(h>>2)+cast(unsigned char, str[l1-1]));
+ /* Check if the string has already been interned. */
+ g = G(L);
+ for (o = gcref(g->strhash[h & g->strmask]); o != NULL; o = gcnext(o)) {
+ GCstr *tso = gco2str(o);
+ if (tso->len == len && (memcmp(str, strdata(tso), len) == 0)) {
+ if (isdead(g, o)) flipwhite(o); /* Resurrect if dead. */
+ return tso; /* Return existing string. */
+ }
+ }
+ /* Nope, create a new string. */
+ s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
+ newwhite(g, s);
+ s->gct = ~LJ_TSTR;
+ s->len = len;
+ s->hash = h;
+ s->reserved = 0;
+ memcpy(strdatawr(s), str, len);
+ strdatawr(s)[len] = '\0'; /* Zero-terminate string. */
+ /* Add it to string hash table. */
+ h &= g->strmask;
+ s->nextgc = g->strhash[h];
+ /* NOBARRIER: The string table is a GC root. */
+ setgcref(g->strhash[h], obj2gco(s));
+ if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */
+ lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */
+ return s; /* Return newly interned string. */
+}
+
+void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
+{
+ g->strnum--;
+ lj_mem_free(g, s, sizestring(s));
+}
+
+/* -- Type conversions ---------------------------------------------------- */
+
+/* Convert string to number. */
+int lj_str_numconv(const char *s, TValue *n)
+{
+ lua_Number sign = 1;
+ const uint8_t *p = (const uint8_t *)s;
+ while (lj_ctype_isspace(*p)) p++;
+ if (*p == '-') { p++; sign = -1; } else if (*p == '+') { p++; }
+ if ((uint32_t)(*p - '0') < 10) {
+ uint32_t k = (uint32_t)(*p++ - '0');
+ if (k == 0 && ((*p & ~0x20) == 'X')) {
+ p++;
+ while (lj_ctype_isxdigit(*p)) {
+ if (k >= 0x10000000) goto parsedbl;
+ k = (k << 4) + (*p & 15u);
+ if (!lj_ctype_isdigit(*p)) k += 9;
+ p++;
+ }
+ } else {
+ while ((uint32_t)(*p - '0') < 10) {
+ if (k >= 0x19999999) goto parsedbl;
+ k = k * 10u + (uint32_t)(*p++ - '0');
+ }
+ }
+ while (LJ_UNLIKELY(lj_ctype_isspace(*p))) p++;
+ if (LJ_LIKELY(*p == '\0')) {
+ setnumV(n, sign * cast_num(k));
+ return 1;
+ }
+ }
+parsedbl:
+ {
+ TValue tv;
+ char *endptr;
+ setnumV(&tv, lua_str2number(s, &endptr));
+ if (endptr == s) return 0; /* conversion failed */
+ if (LJ_UNLIKELY(*endptr != '\0')) {
+ while (lj_ctype_isspace((uint8_t)*endptr)) endptr++;
+ if (*endptr != '\0') return 0; /* invalid trailing characters? */
+ }
+ if (LJ_LIKELY(!tvisnan(&tv)))
+ setnumV(n, numV(&tv));
+ else
+ setnanV(n); /* Canonicalize injected NaNs. */
+ return 1;
+ }
+}
+
+/* Convert number to string. */
+GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np)
+{
+ char s[LUAI_MAXNUMBER2STR];
+ lua_Number n = *np;
+ size_t len = (size_t)lua_number2str(s, n);
+ return lj_str_new(L, s, len);
+}
+
+/* Convert integer to string. */
+GCstr *lj_str_fromint(lua_State *L, int32_t k)
+{
+ char s[1+10];
+ char *p = s+sizeof(s);
+ uint32_t i = (uint32_t)(k < 0 ? -k : k);
+ do { *--p = (char)('0' + i % 10); } while (i /= 10);
+ if (k < 0) *--p = '-';
+ return lj_str_new(L, p, (size_t)(s+sizeof(s)-p));
+}
+
+/* -- String formatting --------------------------------------------------- */
+
+static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len)
+{
+ char *p;
+ MSize i;
+ if (sb->n + len > sb->sz) {
+ MSize sz = sb->sz * 2;
+ while (sb->n + len > sz) sz = sz * 2;
+ lj_str_resizebuf(L, sb, sz);
+ }
+ p = sb->buf + sb->n;
+ sb->n += len;
+ for (i = 0; i < len; i++) p[i] = str[i];
+}
+
+static void addchar(lua_State *L, SBuf *sb, int c)
+{
+ if (sb->n + 1 > sb->sz) {
+ MSize sz = sb->sz * 2;
+ lj_str_resizebuf(L, sb, sz);
+ }
+ sb->buf[sb->n++] = cast(char, c);
+}
+
+/* Push formatted message as a string object to Lua stack. va_list variant. */
+const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp)
+{
+ SBuf *sb = &G(L)->tmpbuf;
+ lj_str_needbuf(L, sb, (MSize)strlen(fmt));
+ lj_str_resetbuf(sb);
+ for (;;) {
+ const char *e = strchr(fmt, '%');
+ if (e == NULL) break;
+ addstr(L, sb, fmt, (MSize)(e-fmt));
+ /* This function only handles %s, %c, %d, %f and %p formats. */
+ switch (e[1]) {
+ case 's': {
+ const char *s = va_arg(argp, char *);
+ if (s == NULL) s = "(null)";
+ addstr(L, sb, s, (MSize)strlen(s));
+ break;
+ }
+ case 'c':
+ addchar(L, sb, va_arg(argp, int));
+ break;
+ case 'd': {
+ char buff[1+10];
+ char *p = buff+sizeof(buff);
+ int32_t k = va_arg(argp, int32_t);
+ uint32_t i = (uint32_t)(k < 0 ? -k : k);
+ do { *--p = (char)('0' + i % 10); } while (i /= 10);
+ if (k < 0) *--p = '-';
+ addstr(L, sb, p, (MSize)(buff+sizeof(buff)-p));
+ break;
+ }
+ case 'f': {
+ char buff[LUAI_MAXNUMBER2STR];
+ lua_Number n = cast_num(va_arg(argp, LUAI_UACNUMBER));
+ MSize len = (MSize)lua_number2str(buff, n);
+ addstr(L, sb, buff, len);
+ break;
+ }
+ case 'p': {
+#define FMTP_CHARS (2*sizeof(ptrdiff_t))
+ char buff[2+FMTP_CHARS];
+ ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *));
+ int i;
+ buff[0] = '0';
+ buff[1] = 'x';
+ for (i = 2+FMTP_CHARS-1; i >= 2; i--, p >>= 4)
+ buff[i] = "0123456789abcdef"[(p & 15)];
+ addstr(L, sb, buff, 2+FMTP_CHARS);
+ break;
+ }
+ case '%':
+ addchar(L, sb, '%');
+ break;
+ default:
+ addchar(L, sb, '%');
+ addchar(L, sb, e[1]);
+ break;
+ }
+ fmt = e+2;
+ }
+ addstr(L, sb, fmt, (MSize)strlen(fmt));
+ setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n));
+ incr_top(L);
+ return strVdata(L->top - 1);
+}
+
+/* Push formatted message as a string object to Lua stack. Vararg variant. */
+const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
+{
+ const char *msg;
+ va_list argp;
+ va_start(argp, fmt);
+ msg = lj_str_pushvf(L, fmt, argp);
+ va_end(argp);
+ return msg;
+}
+
+/* -- Buffer handling ----------------------------------------------------- */
+
+char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz)
+{
+ if (sz > sb->sz) {
+ if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF;
+ lj_str_resizebuf(L, sb, sz);
+ }
+ return sb->buf;
+}
+
diff --git a/src/lj_str.h b/src/lj_str.h
new file mode 100644
index 00000000..f7e56d16
--- /dev/null
+++ b/src/lj_str.h
@@ -0,0 +1,45 @@
+/*
+** String handling.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_STR_H
+#define _LJ_STR_H
+
+#include <stdarg.h>
+
+#include "lj_obj.h"
+
+/* String interning. */
+LJ_FUNCA int32_t lj_str_cmp(GCstr *a, GCstr *b);
+LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
+LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
+LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
+
+#define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s)))
+#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
+
+/* Type conversions. */
+LJ_FUNCA int lj_str_numconv(const char *s, TValue *n);
+LJ_FUNCA GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np);
+LJ_FUNCA GCstr *lj_str_fromint(lua_State *L, int32_t k);
+
+/* String formatting. */
+LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp);
+LJ_FUNC const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
+#if defined(__GNUC__)
+ __attribute__ ((format (printf, 2, 3)))
+#endif
+ ;
+
+/* Resizable string buffers. Struct definition in lj_obj.h. */
+LJ_FUNC char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz);
+
+#define lj_str_initbuf(L, sb) ((sb)->buf = NULL, (sb)->sz = 0)
+#define lj_str_resetbuf(sb) ((sb)->n = 0)
+#define lj_str_resizebuf(L, sb, size) \
+ ((sb)->buf = (char *)lj_mem_realloc(L, (sb)->buf, (sb)->sz, (size)), \
+ (sb)->sz = (size))
+#define lj_str_freebuf(g, sb) lj_mem_free(g, (void *)(sb)->buf, (sb)->sz)
+
+#endif
diff --git a/src/lj_tab.c b/src/lj_tab.c
new file mode 100644
index 00000000..633ea20c
--- /dev/null
+++ b/src/lj_tab.c
@@ -0,0 +1,618 @@
+/*
+** Table handling.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#define lj_tab_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_tab.h"
+
+/* -- Object hashing ------------------------------------------------------ */
+
+/* Hash values are masked with the table hash mask and used as an index. */
+#define hashmask(t, x) (&noderef(t->node)[(x) & t->hmask])
+
+/* String hashes are precomputed when they are interned. */
+#define hashstr(t, s) hashmask(t, (s)->hash)
+
+#define hashnum(t, o) hashrot(t, (o)->u32.lo, (o)->u32.hi&0x7fffffff)
+#define hashgcref(t, r) hashrot(t, gcrefu(r), gcrefu(r)-0x04c11db7)
+
+/* Scramble the bits of numbers and pointers. */
+static LJ_AINLINE Node *hashrot(const GCtab *t, uint32_t lo, uint32_t hi)
+{
+ lo ^= hi; hi = lj_rol(hi, 14);
+ lo -= hi; hi = lj_rol(hi, 5);
+ hi ^= lo; hi -= lj_rol(lo, 27);
+ return hashmask(t, hi);
+}
+
+/* Hash an arbitrary key and return its anchor position in the hash table. */
+static Node *hashkey(const GCtab *t, cTValue *key)
+{
+ if (tvisstr(key))
+ return hashstr(t, strV(key));
+ else if (tvisnum(key))
+ return hashnum(t, key);
+ else if (tvisbool(key))
+ return hashmask(t, boolV(key));
+ else
+ return hashgcref(t, key->gcr);
+ /* Only hash 32 bits of lightuserdata on a 64 bit CPU. Good enough? */
+}
+
+/* -- Table creation and destruction -------------------------------------- */
+
+/* Create new hash part for table. */
+static LJ_AINLINE void newhpart(lua_State *L, GCtab *t, uint32_t hbits)
+{
+ uint32_t hsize;
+ Node *node;
+ lua_assert(hbits != 0);
+ if (hbits > LJ_MAX_HBITS)
+ lj_err_msg(L, LJ_ERR_TABOV);
+ hsize = 1u << hbits;
+ node = lj_mem_newvec(L, hsize, Node);
+ setmref(t->node, node);
+ t->hmask = hsize-1;
+ setmref(t->lastfree, &node[hsize]);
+}
+
+/*
+** Q: Why all of these copies of t->hmask, t->node etc. to local variables?
+** A: Because alias analysis for C is _really_ tough.
+** Even state-of-the-art C compilers won't produce good code without this.
+*/
+
+/* Clear hash part of table. */
+static LJ_AINLINE void clearhpart(GCtab *t)
+{
+ uint32_t i, hmask = t->hmask;
+ Node *node = noderef(t->node);
+ lua_assert(t->hmask != 0);
+ for (i = 0; i <= hmask; i++) {
+ Node *n = &node[i];
+ setmref(n->next, NULL);
+ setnilV(&n->key);
+ setnilV(&n->val);
+ }
+}
+
+/* Clear array part of table. */
+static LJ_AINLINE void clearapart(GCtab *t)
+{
+ uint32_t i, asize = t->asize;
+ TValue *array = tvref(t->array);
+ for (i = 0; i < asize; i++)
+ setnilV(&array[i]);
+}
+
+/* Create a new table. Note: the slots are not initialized (yet). */
+static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
+{
+ GCtab *t;
+ global_State *g;
+ /* First try to colocate the array part. */
+ if (LJ_MAX_COLOSIZE && asize > 0 && asize <= LJ_MAX_COLOSIZE) {
+ /* This is ugly. (sizeof(GCtab)&7) != 0. So prepend the colocated array. */
+ TValue *array = lj_mem_newt(L, sizetabcolo(asize), TValue);
+ t = cast(GCtab *, array + asize);
+ g = G(L);
+ setgcrefr(t->nextgc, g->gc.root);
+ setgcref(g->gc.root, obj2gco(t));
+ newwhite(g, t);
+ t->gct = ~LJ_TTAB;
+ t->nomm = cast_byte(~0);
+ t->colo = (int8_t)asize;
+ setmref(t->array, array);
+ setgcrefnull(t->metatable);
+ t->asize = asize;
+ t->hmask = 0;
+ setmref(t->node, &g->nilnode);
+ setmref(t->lastfree, &g->nilnode);
+ } else { /* Otherwise separately allocate the array part. */
+ t = lj_mem_newobj(L, GCtab);
+ t->gct = ~LJ_TTAB;
+ t->nomm = cast_byte(~0);
+ t->colo = 0;
+ setmref(t->array, NULL);
+ setgcrefnull(t->metatable);
+ t->asize = 0; /* In case the array allocation fails. */
+ t->hmask = 0;
+ g = G(L);
+ setmref(t->node, &g->nilnode);
+ setmref(t->lastfree, &g->nilnode);
+ if (asize > 0) {
+ if (asize > LJ_MAX_ASIZE)
+ lj_err_msg(L, LJ_ERR_TABOV);
+ setmref(t->array, lj_mem_newvec(L, asize, TValue));
+ t->asize = asize;
+ }
+ }
+ if (hbits)
+ newhpart(L, t, hbits);
+ return t;
+}
+
+/* Create a new table.
+**
+** IMPORTANT NOTE: The API differs from lua_createtable()!
+**
+** The array size is non-inclusive. E.g. asize=128 creates array slots
+** for 0..127, but not for 128. If you need slots 1..128, pass asize=129
+** (slot 0 is wasted in this case).
+**
+** The hash size is given in hash bits. hbits=0 means no hash part.
+** hbits=1 creates 2 hash slots, hbits=2 creates 4 hash slots and so on.
+*/
+GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits)
+{
+ GCtab *t = newtab(L, asize, hbits);
+ clearapart(t);
+ if (t->hmask > 0) clearhpart(t);
+ return t;
+}
+
+/* Duplicate a table. */
+GCtab *lj_tab_dup(lua_State *L, const GCtab *kt)
+{
+ GCtab *t;
+ uint32_t asize, hmask;
+ t = newtab(L, kt->asize, kt->hmask > 0 ? lj_fls(kt->hmask)+1 : 0);
+ lua_assert(kt->asize == t->asize && kt->hmask == t->hmask);
+ t->nomm = 0; /* Keys with metamethod names may be present. */
+ asize = kt->asize;
+ if (asize > 0) {
+ TValue *array = tvref(t->array);
+ TValue *karray = tvref(kt->array);
+ if (asize < 64) { /* An inlined loop beats memcpy for < 512 bytes. */
+ uint32_t i;
+ for (i = 0; i < asize; i++)
+ copyTV(L, &array[i], &karray[i]);
+ } else {
+ memcpy(array, karray, asize*sizeof(TValue));
+ }
+ }
+ hmask = kt->hmask;
+ if (hmask > 0) {
+ uint32_t i;
+ Node *node = noderef(t->node);
+ Node *knode = noderef(kt->node);
+ ptrdiff_t d = (char *)node - (char *)knode;
+ setmref(t->lastfree, (Node *)((char *)noderef(kt->lastfree) + d));
+ for (i = 0; i <= hmask; i++) {
+ Node *kn = &knode[i];
+ Node *n = &node[i];
+ Node *next = nextnode(kn);
+ copyTV(L, &n->val, &kn->val);
+ copyTV(L, &n->key, &kn->key);
+ setmref(n->next, next == NULL? next : (Node *)((char *)next + d));
+ }
+ }
+ return t;
+}
+
+/* Free a table. */
+void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t)
+{
+ if (t->hmask > 0)
+ lj_mem_freevec(g, noderef(t->node), t->hmask+1, Node);
+ if (LJ_MAX_COLOSIZE && t->colo) {
+ ptrdiff_t n;
+ if (t->colo < 0 && t->asize > 0) /* Array part was separated. */
+ lj_mem_freevec(g, tvref(t->array), t->asize, TValue);
+ n = t->colo & 0x7f;
+ lj_mem_free(g, (TValue *)t - n, sizetabcolo((uint32_t)n));
+ } else {
+ if (t->asize > 0)
+ lj_mem_freevec(g, tvref(t->array), t->asize, TValue);
+ lj_mem_freet(g, t);
+ }
+}
+
+/* -- Table resizing ------------------------------------------------------ */
+
+/* Resize a table to fit the new array/hash part sizes. */
+static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
+{
+ Node *oldnode = noderef(t->node);
+ uint32_t oldasize = t->asize;
+ uint32_t oldhmask = t->hmask;
+ if (asize > oldasize) { /* Array part grows? */
+ TValue *array;
+ uint32_t i;
+ if (asize > LJ_MAX_ASIZE)
+ lj_err_msg(L, LJ_ERR_TABOV);
+ if (LJ_MAX_COLOSIZE && t->colo > 0) {
+ /* A colocated array must be separated and copied. */
+ TValue *oarray = tvref(t->array);
+ array = lj_mem_newvec(L, asize, TValue);
+ t->colo = (int8_t)(t->colo | 0x80); /* Mark as separated (colo < 0). */
+ for (i = 0; i < oldasize; i++)
+ copyTV(L, &array[i], &oarray[i]);
+ } else {
+ array = (TValue *)lj_mem_realloc(L, tvref(t->array),
+ oldasize*sizeof(TValue), asize*sizeof(TValue));
+ }
+ setmref(t->array, array);
+ t->asize = asize;
+ for (i = oldasize; i < asize; i++) /* Clear newly allocated slots. */
+ setnilV(&array[i]);
+ }
+ /* Create new (empty) hash part. */
+ if (hbits) {
+ newhpart(L, t, hbits);
+ clearhpart(t);
+ } else {
+ global_State *g = G(L);
+ setmref(t->node, &g->nilnode);
+ setmref(t->lastfree, &g->nilnode);
+ t->hmask = 0;
+ }
+ if (asize < oldasize) { /* Array part shrinks? */
+ TValue *array = tvref(t->array);
+ uint32_t i;
+ t->asize = asize; /* Note: This 'shrinks' even colocated arrays. */
+ for (i = asize; i < oldasize; i++) /* Reinsert old array values. */
+ if (!tvisnil(&array[i]))
+ copyTV(L, lj_tab_setinth(L, t, (int32_t)i), &array[i]);
+ /* Physically shrink only separated arrays. */
+ if (LJ_MAX_COLOSIZE && t->colo <= 0)
+ setmref(t->array, lj_mem_realloc(L, array,
+ oldasize*sizeof(TValue), asize*sizeof(TValue)));
+ }
+ if (oldhmask > 0) { /* Reinsert pairs from old hash part. */
+ global_State *g;
+ uint32_t i;
+ for (i = 0; i <= oldhmask; i++) {
+ Node *n = &oldnode[i];
+ if (!tvisnil(&n->val))
+ copyTV(L, lj_tab_set(L, t, &n->key), &n->val);
+ }
+ g = G(L);
+ lj_mem_freevec(g, oldnode, oldhmask+1, Node);
+ }
+}
+
+static uint32_t countint(cTValue *key, uint32_t *bins)
+{
+ if (tvisnum(key)) {
+ lua_Number nk = numV(key);
+ int32_t k = lj_num2int(nk);
+ if ((uint32_t)k < LJ_MAX_ASIZE && nk == cast_num(k)) {
+ bins[(k > 2 ? lj_fls((uint32_t)(k-1)) : 0)]++;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static uint32_t countarray(const GCtab *t, uint32_t *bins)
+{
+ uint32_t na, b, i;
+ if (t->asize == 0) return 0;
+ for (na = i = b = 0; b < LJ_MAX_ABITS; b++) {
+ uint32_t n, top = 2u << b;
+ TValue *array;
+ if (top >= t->asize) {
+ top = t->asize-1;
+ if (i > top)
+ break;
+ }
+ array = tvref(t->array);
+ for (n = 0; i <= top; i++)
+ if (!tvisnil(&array[i]))
+ n++;
+ bins[b] += n;
+ na += n;
+ }
+ return na;
+}
+
+static uint32_t counthash(const GCtab *t, uint32_t *bins, uint32_t *narray)
+{
+ uint32_t total, na, i, hmask = t->hmask;
+ Node *node = noderef(t->node);
+ for (total = na = 0, i = 0; i <= hmask; i++) {
+ Node *n = &node[i];
+ if (!tvisnil(&n->val)) {
+ na += countint(&n->key, bins);
+ total++;
+ }
+ }
+ *narray += na;
+ return total;
+}
+
+static uint32_t bestasize(uint32_t bins[], uint32_t *narray)
+{
+ uint32_t b, sum, na = 0, sz = 0, nn = *narray;
+ for (b = 0, sum = 0; (1u<<b) <= nn && sum != nn; b++)
+ if (bins[b] > 0 && (sum += bins[b]) >= (1u<<b)) {
+ sz = (2u<<b)+1;
+ na = sum;
+ }
+ *narray = sz;
+ return na;
+}
+
+static void rehashtab(lua_State *L, GCtab *t, cTValue *ek)
+{
+ uint32_t bins[LJ_MAX_ABITS];
+ uint32_t total, asize, na, i;
+ for (i = 0; i < LJ_MAX_ABITS; i++) bins[i] = 0;
+ asize = countarray(t, bins);
+ total = 1 + asize + counthash(t, bins, &asize);
+ asize += countint(ek, bins);
+ na = bestasize(bins, &asize);
+ total -= na;
+ resizetab(L, t, asize, hsize2hbits(total));
+}
+
+void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize)
+{
+ resizetab(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0);
+}
+
+/* -- Table getters ------------------------------------------------------- */
+
+cTValue *lj_tab_getinth(GCtab *t, int32_t key)
+{
+ TValue k;
+ Node *n;
+ k.n = cast_num(key);
+ n = hashnum(t, &k);
+ do {
+ if (tvisnum(&n->key) && n->key.n == k.n)
+ return &n->val;
+ } while ((n = nextnode(n)));
+ return NULL;
+}
+
+cTValue *lj_tab_getstr(GCtab *t, GCstr *key)
+{
+ Node *n = hashstr(t, key);
+ do {
+ if (tvisstr(&n->key) && strV(&n->key) == key)
+ return &n->val;
+ } while ((n = nextnode(n)));
+ return NULL;
+}
+
+cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key)
+{
+ if (tvisstr(key)) {
+ cTValue *tv = lj_tab_getstr(t, strV(key));
+ if (tv)
+ return tv;
+ } else if (tvisnum(key)) {
+ lua_Number nk = numV(key);
+ int32_t k = lj_num2int(nk);
+ if (nk == cast_num(k)) {
+ cTValue *tv = lj_tab_getint(t, k);
+ if (tv)
+ return tv;
+ } else {
+ goto genlookup; /* Else use the generic lookup. */
+ }
+ } else if (!tvisnil(key)) {
+ Node *n;
+ genlookup:
+ n = hashkey(t, key);
+ do {
+ if (lj_obj_equal(&n->key, key))
+ return &n->val;
+ } while ((n = nextnode(n)));
+ }
+ return niltv(L);
+}
+
+/* -- Table setters ------------------------------------------------------- */
+
+static Node *getfreepos(GCtab *t)
+{
+ Node *node = noderef(t->node);
+ Node *lastfree = noderef(t->lastfree);
+ while (lastfree > node) {
+ lastfree--;
+ setmref(t->lastfree, lastfree);
+ if (tvisnil(&lastfree->key))
+ return lastfree;
+ }
+ return NULL; /* could not find a free place */
+}
+
+/*
+** inserts a new key into a hash table; first, check whether key's main
+** position is free. If not, check whether colliding node is in its main
+** position or not: if it is not, move colliding node to an empty place and
+** put new key in its main position; otherwise (colliding node is in its main
+** position), new key goes to an empty position.
+*/
+TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
+{
+ Node *mp = hashkey(t, key);
+ if (!tvisnil(&mp->val) || t->hmask == 0) {
+ Node *othern;
+ Node *n = getfreepos(t); /* get a free place */
+ if (n == NULL) { /* cannot find a free place? */
+ rehashtab(L, t, key); /* grow table */
+ return lj_tab_set(L, t, key); /* re-insert key into grown table */
+ }
+ lua_assert(n != &G(L)->nilnode);
+ othern = hashkey(t, &mp->key);
+ if (othern != mp) { /* is colliding node out of its main position? */
+ /* yes; move colliding node into free position */
+ while (noderef(othern->next) != mp)
+ othern = nextnode(othern); /* find previous */
+ setmref(othern->next, n); /* redo the chain with `n' in place of `mp' */
+ *n = *mp; /* copy colliding node into free pos. (mp->next also goes) */
+ setmref(mp->next, NULL); /* now `mp' is free */
+ setnilV(&mp->val);
+ } else { /* colliding node is in its own main position */
+ /* new node will go into free position */
+ setmrefr(n->next, mp->next); /* chain new position */
+ setmref(mp->next, n);
+ mp = n;
+ }
+ }
+ mp->key.u64 = key->u64;
+ if (LJ_UNLIKELY(tvismzero(&mp->key)))
+ mp->key.u64 = 0;
+ lj_gc_barriert(L, t, key);
+ lua_assert(tvisnil(&mp->val));
+ return &mp->val;
+}
+
+TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key)
+{
+ TValue k;
+ Node *n;
+ k.n = cast_num(key);
+ n = hashnum(t, &k);
+ do {
+ if (tvisnum(&n->key) && n->key.n == k.n)
+ return &n->val;
+ } while ((n = nextnode(n)));
+ return lj_tab_newkey(L, t, &k);
+}
+
+TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key)
+{
+ TValue k;
+ Node *n = hashstr(t, key);
+ do {
+ if (tvisstr(&n->key) && strV(&n->key) == key)
+ return &n->val;
+ } while ((n = nextnode(n)));
+ setstrV(L, &k, key);
+ return lj_tab_newkey(L, t, &k);
+}
+
+TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key)
+{
+ Node *n;
+ t->nomm = 0; /* Invalidate negative metamethod cache. */
+ if (tvisstr(key)) {
+ return lj_tab_setstr(L, t, strV(key));
+ } else if (tvisnum(key)) {
+ lua_Number nk = numV(key);
+ int32_t k = lj_num2int(nk);
+ if (nk == cast_num(k))
+ return lj_tab_setint(L, t, k);
+ if (tvisnan(key))
+ lj_err_msg(L, LJ_ERR_NANIDX);
+ /* Else use the generic lookup. */
+ } else if (tvisnil(key)) {
+ lj_err_msg(L, LJ_ERR_NILIDX);
+ }
+ n = hashkey(t, key);
+ do {
+ if (lj_obj_equal(&n->key, key))
+ return &n->val;
+ } while ((n = nextnode(n)));
+ return lj_tab_newkey(L, t, key);
+}
+
+/* -- Table traversal ----------------------------------------------------- */
+
+/* Get the traversal index of a key. */
+static uint32_t keyindex(lua_State *L, GCtab *t, cTValue *key)
+{
+ if (tvisnum(key)) {
+ lua_Number nk = numV(key);
+ int32_t k = lj_num2int(nk);
+ if ((uint32_t)k < t->asize && nk == cast_num(k))
+ return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */
+ }
+ if (!tvisnil(key)) {
+ Node *n = hashkey(t, key);
+ do {
+ if (lj_obj_equal(&n->key, key) ||
+ (itype(&n->key) == LJ_TDEADKEY && tvisgcv(key) &&
+ gcV(&n->key) == gcV(key)))
+ return t->asize + (uint32_t)(n - noderef(t->node));
+ /* Hash key indexes: [t->asize..t->asize+t->nmask] */
+ } while ((n = nextnode(n)));
+ lj_err_msg(L, LJ_ERR_NEXTIDX);
+ return 0; /* unreachable */
+ }
+ return ~0u; /* A nil key starts the traversal. */
+}
+
+/* Advance to the next step in a table traversal. */
+int lj_tab_next(lua_State *L, GCtab *t, TValue *key)
+{
+ uint32_t i = keyindex(L, t, key); /* Find predecessor key index. */
+ for (i++; i < t->asize; i++) /* First traverse the array keys. */
+ if (!tvisnil(arrayslot(t, i))) {
+ setintV(key, i);
+ copyTV(L, key+1, arrayslot(t, i));
+ return 1;
+ }
+ for (i -= t->asize; i <= t->hmask; i++) { /* Then traverse the hash keys. */
+ Node *n = &noderef(t->node)[i];
+ if (!tvisnil(&n->val)) {
+ copyTV(L, key, &n->key);
+ copyTV(L, key+1, &n->val);
+ return 1;
+ }
+ }
+ return 0; /* End of traversal. */
+}
+
+/* -- Table length calculation -------------------------------------------- */
+
+static MSize unbound_search(GCtab *t, MSize j)
+{
+ cTValue *tv;
+ MSize i = j; /* i is zero or a present index */
+ j++;
+ /* find `i' and `j' such that i is present and j is not */
+ while ((tv = lj_tab_getint(t, cast(int32_t, j))) && !tvisnil(tv)) {
+ i = j;
+ j *= 2;
+ if (j > (MSize)(INT_MAX-2)) { /* overflow? */
+ /* table was built with bad purposes: resort to linear search */
+ i = 1;
+ while ((tv = lj_tab_getint(t, cast(int32_t, i))) && !tvisnil(tv)) i++;
+ return i - 1;
+ }
+ }
+ /* now do a binary search between them */
+ while (j - i > 1) {
+ MSize m = (i+j)/2;
+ cTValue *tvb = lj_tab_getint(t, cast(int32_t, m));
+ if (tvb && !tvisnil(tvb)) i = m; else j = m;
+ }
+ return i;
+}
+
+/*
+** Try to find a boundary in table `t'. A `boundary' is an integer index
+** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil).
+*/
+MSize lj_tab_len(GCtab *t)
+{
+ MSize j = (MSize)t->asize;
+ if (j > 1 && tvisnil(arrayslot(t, j-1))) {
+ MSize i = 1;
+ while (j - i > 1) {
+ MSize m = (i+j)/2;
+ if (tvisnil(arrayslot(t, m-1))) j = m; else i = m;
+ }
+ return i-1;
+ }
+ if (j) j--;
+ if (t->hmask <= 0)
+ return j;
+ return unbound_search(t, j);
+}
+
diff --git a/src/lj_tab.h b/src/lj_tab.h
new file mode 100644
index 00000000..e9e8bcd1
--- /dev/null
+++ b/src/lj_tab.h
@@ -0,0 +1,41 @@
+/*
+** Table handling.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_TAB_H
+#define _LJ_TAB_H
+
+#include "lj_obj.h"
+
+#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0)
+
+LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits);
+LJ_FUNCA GCtab *lj_tab_dup(lua_State *L, const GCtab *kt);
+LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t);
+LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize);
+
+/* Caveat: all getters except lj_tab_get() can return NULL! */
+
+LJ_FUNCA cTValue *lj_tab_getinth(GCtab *t, int32_t key);
+LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, GCstr *key);
+LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key);
+
+/* Caveat: all setters require a write barrier for the stored value. */
+
+LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key);
+LJ_FUNC TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key);
+LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key);
+LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
+
+#define inarray(t, key) ((MSize)(key) < (MSize)(t)->asize)
+#define arrayslot(t, i) (&tvref((t)->array)[(i)])
+#define lj_tab_getint(t, key) \
+ (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_getinth((t), (key)))
+#define lj_tab_setint(L, t, key) \
+ (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key)))
+
+LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key);
+LJ_FUNCA MSize lj_tab_len(GCtab *t);
+
+#endif
diff --git a/src/lj_target.h b/src/lj_target.h
new file mode 100644
index 00000000..0b464d3f
--- /dev/null
+++ b/src/lj_target.h
@@ -0,0 +1,132 @@
+/*
+** Definitions for target CPU.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_TARGET_H
+#define _LJ_TARGET_H
+
+#include "lj_def.h"
+#include "lj_arch.h"
+
+/* -- Registers and spill slots ------------------------------------------- */
+
+/* Register type (uint8_t in ir->r). */
+typedef uint32_t Reg;
+
+/* The hi-bit is NOT set for an allocated register. This means the value
+** can be directly used without masking. The hi-bit is set for a register
+** allocation hint or for RID_INIT.
+*/
+#define RID_NONE 0x80
+#define RID_MASK 0x7f
+#define RID_INIT (RID_NONE|RID_MASK)
+
+#define ra_noreg(r) ((r) & RID_NONE)
+#define ra_hasreg(r) (!((r) & RID_NONE))
+
+/* The ra_hashint() macro assumes a previous test for ra_noreg(). */
+#define ra_hashint(r) ((r) != RID_INIT)
+#define ra_gethint(r) ((Reg)((r) & RID_MASK))
+#define ra_sethint(rr, r) rr = (uint8_t)((r)|RID_NONE)
+#define ra_samehint(r1, r2) (ra_gethint((r1)^(r2)) == 0)
+
+/* Spill slot 0 means no spill slot has been allocated. */
+#define SPS_NONE 0
+
+#define ra_hasspill(s) ((s) != SPS_NONE)
+
+/* Combined register and spill slot (uint16_t in ir->prev). */
+typedef uint32_t RegSP;
+
+#define REGSP(r, s) ((r) + ((s) << 8))
+#define REGSP_HINT(r) ((r)|RID_NONE)
+#define REGSP_INIT REGSP(RID_INIT, 0)
+
+#define regsp_reg(rs) ((rs) & 255)
+#define regsp_spill(rs) ((rs) >> 8)
+#define regsp_used(rs) \
+ (((rs) & ~REGSP(RID_MASK, 0)) != REGSP(RID_NONE, 0))
+
+/* -- Register sets ------------------------------------------------------- */
+
+/* Bitset for registers. 32 registers suffice right now.
+** Note that one set holds bits for both GPRs and FPRs.
+*/
+typedef uint32_t RegSet;
+
+#define RID2RSET(r) (((RegSet)1) << (r))
+#define RSET_EMPTY 0
+#define RSET_RANGE(lo, hi) ((RID2RSET((hi)-(lo))-1) << (lo))
+
+#define rset_test(rs, r) (((rs) >> (r)) & 1)
+#define rset_set(rs, r) (rs |= RID2RSET(r))
+#define rset_clear(rs, r) (rs &= ~RID2RSET(r))
+#define rset_exclude(rs, r) (rs & ~RID2RSET(r))
+#define rset_picktop(rs) ((Reg)lj_fls(rs))
+#define rset_pickbot(rs) ((Reg)lj_ffs(rs))
+
+/* -- Register allocation cost -------------------------------------------- */
+
+/* The register allocation heuristic keeps track of the cost for allocating
+** a specific register:
+**
+** A free register (obviously) has a cost of 0 and a 1-bit in the free mask.
+**
+** An already allocated register has the (non-zero) IR reference in the lowest
+** bits and the result of a blended cost-model in the higher bits.
+**
+** The allocator first checks the free mask for a hit. Otherwise an (unrolled)
+** linear search for the minimum cost is used. The search doesn't need to
+** keep track of the position of the minimum, which makes it very fast.
+** The lowest bits of the minimum cost show the desired IR reference whose
+** register is the one to evict.
+**
+** Without the cost-model this degenerates to the standard heuristics for
+** (reverse) linear-scan register allocation. Since code generation is done
+** in reverse, a live interval extends from the last use to the first def.
+** For an SSA IR the IR reference is the first (and only) def and thus
+** trivially marks the end of the interval. The LSRA heuristics says to pick
+** the register whose live interval has the furthest extent, i.e. the lowest
+** IR reference in our case.
+**
+** A cost-model should take into account other factors, like spill-cost and
+** restore- or rematerialization-cost, which depend on the kind of instruction.
+** E.g. constants have zero spill costs, variant instructions have higher
+** costs than invariants and PHIs should preferably never be spilled.
+**
+** Here's a first cut at simple, but effective blended cost-model for R-LSRA:
+** - Due to careful design of the IR, constants already have lower IR
+** references than invariants and invariants have lower IR references
+** than variants.
+** - The cost in the upper 16 bits is the sum of the IR reference and a
+** weighted score. The score currently only takes into account whether
+** the IRT_ISPHI bit is set in the instruction type.
+** - The PHI weight is the minimum distance (in IR instructions) a PHI
+** reference has to be further apart from a non-PHI reference to be spilled.
+** - It should be a power of two (for speed) and must be between 2 and 32768.
+** Good values for the PHI weight seem to be between 40 and 150.
+** - Further study is required.
+*/
+#define REGCOST_PHI_WEIGHT 64
+
+/* Cost for allocating a specific register. */
+typedef uint32_t RegCost;
+
+/* Note: assumes 16 bit IRRef1. */
+#define REGCOST(cost, ref) ((RegCost)(ref) + ((RegCost)(cost) << 16))
+#define regcost_ref(rc) ((IRRef1)(rc))
+
+#define REGCOST_T(t) \
+ ((RegCost)((t)&IRT_ISPHI) * (((RegCost)(REGCOST_PHI_WEIGHT)<<16)/IRT_ISPHI))
+#define REGCOST_REF_T(ref, t) (REGCOST((ref), (ref)) + REGCOST_T((t)))
+
+/* -- Target-specific definitions ----------------------------------------- */
+
+#if LJ_TARGET_X86ORX64
+#include "lj_target_x86.h"
+#else
+#error "Missing include for target CPU"
+#endif
+
+#endif
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
new file mode 100644
index 00000000..3ee4fa00
--- /dev/null
+++ b/src/lj_target_x86.h
@@ -0,0 +1,257 @@
+/*
+** Definitions for x86 and x64 CPUs.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_TARGET_X86_H
+#define _LJ_TARGET_X86_H
+
+/* -- Registers IDs ------------------------------------------------------- */
+
+#if LJ_64
+#define GPRDEF(_) \
+ _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) \
+ _(R8D) _(R9D) _(R10D) _(R11D) _(R12D) _(R13D) _(R14D) _(R15D)
+#define FPRDEF(_) \
+ _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) \
+ _(XMM8) _(XMM9) _(XMM10) _(XMM11) _(XMM12) _(XMM13) _(XMM14) _(XMM15)
+#else
+#define GPRDEF(_) \
+ _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI)
+#define FPRDEF(_) \
+ _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7)
+#endif
+
+#define RIDENUM(name) RID_##name,
+
+enum {
+ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
+ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
+ RID_MAX,
+ RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */
+
+ /* Calling conventions. */
+ RID_RET = RID_EAX,
+
+ /* These definitions must match with the *.dasc file(s): */
+ RID_BASE = RID_EDX, /* Interpreter BASE. */
+ RID_PC = RID_ESI, /* Interpreter PC. */
+ RID_DISPATCH = RID_EBX, /* Interpreter DISPATCH table. */
+
+ /* Register ranges [min, max) and number of registers. */
+ RID_MIN_GPR = RID_EAX,
+ RID_MIN_FPR = RID_XMM0,
+ RID_MAX_GPR = RID_MIN_FPR,
+ RID_MAX_FPR = RID_MAX,
+ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
+ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR,
+};
+
+/* -- Register sets ------------------------------------------------------- */
+
+/* Make use of all registers, except the stack pointer. */
+#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP))
+#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
+#define RSET_ALL (RSET_GPR|RSET_FPR)
+
+#if LJ_64
+/* Note: this requires the use of FORCE_REX! */
+#define RSET_GPR8 RSET_GPR
+#else
+#define RSET_GPR8 (RSET_RANGE(RID_EAX, RID_EBX+1))
+#endif
+
+/* ABI-specific register sets. */
+#define RSET_ACD (RID2RSET(RID_EAX)|RID2RSET(RID_ECX)|RID2RSET(RID_EDX))
+#if LJ_64
+#ifdef _WIN64
+/* Windows x64 ABI. */
+#define RSET_SCRATCH \
+ (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1))
+#else
+/* The rest of the civilized x64 world has a common ABI. */
+#define RSET_SCRATCH \
+ (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR)
+#endif
+#else
+/* Common x86 ABI. */
+#define RSET_SCRATCH (RSET_ACD|RSET_FPR)
+#endif
+
+#if LJ_64
+/* Prefer the low 8 regs of each type to reduce REX prefixes. */
+#undef rset_picktop
+#define rset_picktop(rs) (lj_fls(lj_bswap(rs)) ^ 0x18)
+#endif
+
+/* -- Spill slots --------------------------------------------------------- */
+
+/* Stack layout for the compiled machine code (after stack adjustment). */
+enum {
+ SPS_TEMP1, /* Temps (3*dword) for calls and asm_x87load. */
+ SPS_TEMP2,
+ SPS_TEMP3,
+ SPS_FIRST, /* First spill slot for general use. */
+
+ /* This definition must match with the *.dasc file(s). */
+ SPS_FIXED = 6 /* Available fixed spill slots in interpreter frame. */
+};
+
+/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. */
+#define sps_scale(slot) (4 * (int32_t)(slot))
+#define sps_adjust(as) (sps_scale((as->evenspill-SPS_FIXED+3)&~3))
+
+/* -- Exit state ---------------------------------------------------------- */
+
+/* This definition must match with the *.dasc file(s). */
+typedef struct {
+ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
+ int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
+ int32_t spill[256]; /* Spill slots. */
+} ExitState;
+
+/* -- x86 ModRM operand encoding ------------------------------------------ */
+
+typedef enum {
+ XM_OFS0 = 0x00, XM_OFS8 = 0x40, XM_OFS32 = 0x80, XM_REG = 0xc0,
+ XM_SCALE1 = 0x00, XM_SCALE2 = 0x40, XM_SCALE4 = 0x80, XM_SCALE8 = 0xc0,
+ XM_MASK = 0xc0
+} x86Mode;
+
+/* Structure to hold variable ModRM operand. */
+typedef struct {
+ int32_t ofs; /* Offset. */
+ uint8_t base; /* Base register or RID_NONE. */
+ uint8_t idx; /* Index register or RID_NONE. */
+ uint8_t scale; /* Index scale (XM_SCALE1 .. XM_SCALE8). */
+} x86ModRM;
+
+/* -- Opcodes ------------------------------------------------------------- */
+
+/* Macros to construct variable-length x86 opcodes. -(len+1) is in LSB. */
+#define XO_(o) ((uint32_t)(0x0000fe + (0x##o<<24)))
+#define XO_FPU(a,b) ((uint32_t)(0x00fd + (0x##a<<16)+(0x##b<<24)))
+#define XO_0f(o) ((uint32_t)(0x0f00fd + (0x##o<<24)))
+#define XO_66(o) ((uint32_t)(0x6600fd + (0x##o<<24)))
+#define XO_660f(o) ((uint32_t)(0x0f66fc + (0x##o<<24)))
+#define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24)))
+#define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24)))
+
+/* This list of x86 opcodes is not intended to be complete. Opcodes are only
+** included when needed. Take a look at DynASM or jit.dis_x86 to see the
+** whole mess.
+*/
+typedef enum {
+ /* Fixed length opcodes. XI_* prefix. */
+ XI_NOP = 0x90,
+ XI_CALL = 0xe8,
+ XI_JMP = 0xe9,
+ XI_JMPs = 0xeb,
+ XI_JCCs = 0x70, /* Really 7x. */
+ XI_JCCn = 0x80, /* Really 0f8x. */
+ XI_LEA = 0x8d,
+ XI_MOVri = 0xb8, /* Really b8+r. */
+ XI_ARITHib = 0x80,
+ XI_ARITHi = 0x81,
+ XI_ARITHi8 = 0x83,
+ XI_PUSHi8 = 0x6a,
+ XI_TEST = 0x85,
+ XI_MOVmi = 0xc7,
+ XI_BSWAP = 0xc8, /* Really 0fc8+r. */
+
+ /* Note: little-endian byte-order! */
+ XI_FLDZ = 0xeed9,
+ XI_FLD1 = 0xe8d9,
+ XI_FLDLG2 = 0xecd9,
+ XI_FLDLN2 = 0xedd9,
+ XI_FPOP = 0xd8dd, /* Really fstp st0. */
+ XI_FPOP1 = 0xd9dd, /* Really fstp st1. */
+ XI_FRNDINT = 0xfcd9,
+ XI_FSIN = 0xfed9,
+ XI_FCOS = 0xffd9,
+ XI_FPTAN = 0xf2d9,
+ XI_FPATAN = 0xf3d9,
+ XI_FSCALE = 0xfdd9,
+ XI_FYL2X = 0xf1d9,
+
+ /* Variable-length opcodes. XO_* prefix. */
+ XO_MOV = XO_(8b),
+ XO_MOVto = XO_(89),
+ XO_MOVtow = XO_66(89),
+ XO_MOVtob = XO_(88),
+ XO_MOVmi = XO_(c7),
+ XO_MOVmib = XO_(c6),
+ XO_LEA = XO_(8d),
+ XO_ARITHib = XO_(80),
+ XO_ARITHi = XO_(81),
+ XO_ARITHi8 = XO_(83),
+ XO_SHIFTi = XO_(c1),
+ XO_SHIFT1 = XO_(d1),
+ XO_SHIFTcl = XO_(d3),
+ XO_IMULi8 = XO_(6b),
+ XO_CMP = XO_(3b),
+ XO_TEST = XO_(85),
+ XO_GROUP3b = XO_(f6),
+ XO_GROUP3 = XO_(f7),
+ XO_MOVZXb = XO_0f(b6),
+ XO_MOVZXw = XO_0f(b7),
+ XO_MOVSXb = XO_0f(be),
+ XO_MOVSXw = XO_0f(bf),
+
+ XO_MOVSD = XO_f20f(10),
+ XO_MOVSDto = XO_f20f(11),
+ XO_MOVLPD = XO_660f(12),
+ XO_MOVAPS = XO_0f(28),
+ XO_XORPS = XO_0f(57),
+ XO_ANDPS = XO_0f(54),
+ XO_ADDSD = XO_f20f(58),
+ XO_SUBSD = XO_f20f(5c),
+ XO_MULSD = XO_f20f(59),
+ XO_DIVSD = XO_f20f(5e),
+ XO_SQRTSD = XO_f20f(51),
+ XO_MINSD = XO_f20f(5d),
+ XO_MAXSD = XO_f20f(5f),
+ XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */
+ XO_UCOMISD = XO_660f(2e),
+ XO_CVTSI2SD = XO_f20f(2a),
+ XO_CVTSD2SI = XO_f20f(2d),
+ XO_CVTTSD2SI= XO_f20f(2c),
+ XO_MOVDto = XO_660f(7e),
+
+ XO_FLDq = XO_(dd), XOg_FLDq = 0,
+ XO_FILDd = XO_(db), XOg_FILDd = 0,
+ XO_FSTPq = XO_(dd), XOg_FSTPq = 3,
+ XO_FISTPq = XO_(df), XOg_FISTPq = 7,
+} x86Op;
+
+/* x86 opcode groups. */
+typedef uint32_t x86Group;
+
+#define XG_(i8, i, g) ((x86Group)(((i8) << 16) + ((i) << 8) + (g)))
+#define XG_ARITHi(g) XG_(XI_ARITHi8, XI_ARITHi, g)
+
+#define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27)))
+
+typedef enum {
+ XOg_ADD, XOg_OR, XOg_ADC, XOg_SBB, XOg_AND, XOg_SUB, XOg_XOR, XOg_CMP
+} x86Arith;
+
+typedef enum {
+ XOg_ROL, XOg_ROR, XOg_RCL, XOg_RCR, XOg_SHL, XOg_SHR, XOg_SAL, XOg_SAR
+} x86Shift;
+
+typedef enum {
+ XOg_TEST, XOg_TEST_, XOg_NOT, XOg_NEG, XOg_MUL, XOg_IMUL, XOg_DIV, XOg_IDIV
+} x86Group3;
+
+/* x86 condition codes. */
+typedef enum {
+ CC_O, CC_NO, CC_B, CC_NB, CC_E, CC_NE, CC_BE, CC_NBE,
+ CC_S, CC_NS, CC_P, CC_NP, CC_L, CC_NL, CC_LE, CC_NLE,
+ CC_C = CC_B, CC_NAE = CC_C, CC_NC = CC_NB, CC_AE = CC_NB,
+ CC_Z = CC_E, CC_NZ = CC_NE, CC_NA = CC_BE, CC_A = CC_NBE,
+ CC_PE = CC_P, CC_PO = CC_NP, CC_NGE = CC_L, CC_GE = CC_NL,
+ CC_NG = CC_LE, CC_G = CC_NLE
+} x86CC;
+
+#endif
diff --git a/src/lj_trace.c b/src/lj_trace.c
new file mode 100644
index 00000000..6ceb5633
--- /dev/null
+++ b/src/lj_trace.c
@@ -0,0 +1,591 @@
+/*
+** Trace management.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_trace_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+#if LJ_HASJIT
+
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_str.h"
+#include "lj_frame.h"
+#include "lj_state.h"
+#include "lj_bc.h"
+#include "lj_ir.h"
+#include "lj_jit.h"
+#include "lj_iropt.h"
+#include "lj_mcode.h"
+#include "lj_trace.h"
+#include "lj_snap.h"
+#include "lj_gdbjit.h"
+#include "lj_record.h"
+#include "lj_asm.h"
+#include "lj_dispatch.h"
+#include "lj_vm.h"
+#include "lj_vmevent.h"
+#include "lj_target.h"
+
+/* -- Error handling ------------------------------------------------------ */
+
+/* Synchronous abort with error message. */
+void lj_trace_err(jit_State *J, TraceError e)
+{
+ setnilV(&J->errinfo); /* No error info. */
+ setintV(J->L->top++, (int32_t)e);
+ lj_err_throw(J->L, LUA_ERRRUN);
+}
+
+/* Synchronous abort with error message and error info. */
+void lj_trace_err_info(jit_State *J, TraceError e)
+{
+ setintV(J->L->top++, (int32_t)e);
+ lj_err_throw(J->L, LUA_ERRRUN);
+}
+
+/* -- Trace management ---------------------------------------------------- */
+
+/* The current trace is first assembled in J->cur. The variable length
+** arrays point to shared, growable buffers (J->irbuf etc.). The trace is
+** kept in this state until a new trace needs to be created. Then the current
+** trace and its data structures are copied to a new (compact) Trace object.
+*/
+
+/* Find a free trace number. */
+static TraceNo trace_findfree(jit_State *J)
+{
+ MSize osz, lim;
+ if (J->freetrace == 0)
+ J->freetrace = 1;
+ for (; J->freetrace < J->sizetrace; J->freetrace++)
+ if (J->trace[J->freetrace] == NULL)
+ return J->freetrace++;
+ /* Need to grow trace array. */
+ lim = (MSize)J->param[JIT_P_maxtrace] + 1;
+ if (lim < 2) lim = 2; else if (lim > 65535) lim = 65535;
+ osz = J->sizetrace;
+ if (osz >= lim)
+ return 0; /* Too many traces. */
+ lj_mem_growvec(J->L, J->trace, J->sizetrace, lim, Trace *);
+ while (osz < J->sizetrace)
+ J->trace[osz++] = NULL;
+ return J->freetrace;
+}
+
+#define TRACE_COPYELEM(field, szfield, tp) \
+ T2->field = (tp *)p; \
+ memcpy(p, T->field, T->szfield*sizeof(tp)); \
+ p += T->szfield*sizeof(tp);
+
+/* Save a trace by copying and compacting it. */
+static Trace *trace_save(jit_State *J, Trace *T)
+{
+ size_t sztr = ((sizeof(Trace)+7)&~7);
+ size_t szins = (T->nins-T->nk)*sizeof(IRIns);
+ size_t sz = sztr + szins +
+ T->nsnap*sizeof(SnapShot) +
+ T->nsnapmap*sizeof(IRRef2);
+ Trace *T2 = lj_mem_newt(J->L, (MSize)sz, Trace);
+ char *p = (char *)T2 + sztr;
+ memcpy(T2, T, sizeof(Trace));
+ T2->ir = (IRIns *)p - T->nk;
+ memcpy(p, T->ir+T->nk, szins);
+ p += szins;
+ TRACE_COPYELEM(snap, nsnap, SnapShot)
+ TRACE_COPYELEM(snapmap, nsnapmap, IRRef2)
+ lj_gc_barriertrace(J2G(J), T);
+ return T2;
+}
+
+/* Free a trace. */
+static void trace_free(jit_State *J, TraceNo traceno)
+{
+ lua_assert(traceno != 0);
+ if (traceno < J->freetrace)
+ J->freetrace = traceno;
+ lj_gdbjit_deltrace(J, J->trace[traceno]);
+ if (traceno == J->curtrace) {
+ lua_assert(J->trace[traceno] == &J->cur);
+ J->trace[traceno] = NULL;
+ J->curtrace = 0;
+ } else {
+ Trace *T = J->trace[traceno];
+ lua_assert(T != NULL && T != &J->cur);
+ J->trace[traceno] = NULL;
+ lj_mem_free(J2G(J), T,
+ ((sizeof(Trace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) +
+ T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(IRRef2));
+ }
+}
+
+/* Free all traces associated with a prototype. No unpatching needed. */
+void lj_trace_freeproto(global_State *g, GCproto *pt)
+{
+ jit_State *J = G2J(g);
+ TraceNo traceno;
+ /* Free all root traces. */
+ for (traceno = pt->trace; traceno != 0; ) {
+ TraceNo side, nextroot = J->trace[traceno]->nextroot;
+ /* Free all side traces. */
+ for (side = J->trace[traceno]->nextside; side != 0; ) {
+ TraceNo next = J->trace[side]->nextside;
+ trace_free(J, side);
+ side = next;
+ }
+ /* Now free the trace itself. */
+ trace_free(J, traceno);
+ traceno = nextroot;
+ }
+}
+
+/* Re-enable compiling a prototype by unpatching any modified bytecode. */
+void lj_trace_reenableproto(GCproto *pt)
+{
+ if ((pt->flags & PROTO_HAS_ILOOP)) {
+ BCIns *bc = pt->bc;
+ BCPos i, sizebc = pt->sizebc;;
+ pt->flags &= ~PROTO_HAS_ILOOP;
+ for (i = 0; i < sizebc; i++) {
+ BCOp op = bc_op(bc[i]);
+ if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP)
+ setbc_op(&bc[i], (int)op+(int)BC_LOOP-(int)BC_ILOOP);
+ }
+ }
+}
+
+/* Unpatch the bytecode modified by a root trace. */
+static void trace_unpatch(jit_State *J, Trace *T)
+{
+ BCOp op = bc_op(T->startins);
+ uint32_t pcofs = T->snap[0].mapofs + T->snap[0].nslots;
+ BCIns *pc = ((BCIns *)(uintptr_t)T->snapmap[pcofs]) - 1;
+ switch (op) {
+ case BC_FORL:
+ lua_assert(bc_op(*pc) == BC_JFORI);
+ setbc_op(pc, BC_FORI); /* Unpatch JFORI, too. */
+ pc += bc_j(*pc);
+ lua_assert(bc_op(*pc) == BC_JFORL && J->trace[bc_d(*pc)] == T);
+ *pc = T->startins;
+ break;
+ case BC_LOOP:
+ lua_assert(bc_op(*pc) == BC_JLOOP && J->trace[bc_d(*pc)] == T);
+ *pc = T->startins;
+ break;
+ case BC_ITERL:
+ lua_assert(bc_op(*pc) == BC_JMP);
+ pc += bc_j(*pc)+2;
+ lua_assert(bc_op(*pc) == BC_JITERL && J->trace[bc_d(*pc)] == T);
+ *pc = T->startins;
+ break;
+ case BC_CALL:
+ lj_trace_err(J, LJ_TRERR_NYILNKF);
+ break;
+ case BC_JMP: /* No need to unpatch branches in parent traces (yet). */
+ default:
+ lua_assert(0);
+ break;
+ }
+}
+
+/* Flush a root trace and any attached side traces. */
+void lj_trace_flush(jit_State *J, TraceNo traceno)
+{
+ Trace *T = NULL;
+ GCproto *pt;
+ if (traceno > 0 && traceno <= J->sizetrace)
+ T = J->trace[traceno];
+ if (T == NULL)
+ return;
+ pt = &gcref(T->startpt)->pt;
+ if (T->root == 0 && pt != NULL) {
+ TraceNo side;
+ /* First unpatch any modified bytecode. */
+ trace_unpatch(J, T);
+ /* Unlink root trace from chain anchored in prototype. */
+ if (pt->trace == traceno) { /* Trace is first in chain. Easy. */
+ pt->trace = T->nextroot;
+ } else { /* Otherwise search in chain of root traces. */
+ Trace *T2 = J->trace[pt->trace];
+ while (T2->nextroot != traceno) {
+ lua_assert(T2->nextroot != 0);
+ T2 = J->trace[T2->nextroot];
+ }
+ T2->nextroot = T->nextroot; /* Unlink from chain. */
+ }
+ /* Free all side traces. */
+ for (side = T->nextside; side != 0; ) {
+ TraceNo next = J->trace[side]->nextside;
+ trace_free(J, side);
+ side = next;
+ }
+ /* Now free the trace itself. */
+ trace_free(J, traceno);
+ } /* Flush for non-root traces is currently ignored. */
+}
+
+/* Flush all traces associated with a prototype. */
+void lj_trace_flushproto(global_State *g, GCproto *pt)
+{
+ while (pt->trace != 0)
+ lj_trace_flush(G2J(g), pt->trace);
+}
+
+/* Flush all traces. */
+int lj_trace_flushall(lua_State *L)
+{
+ jit_State *J = L2J(L);
+ ptrdiff_t i;
+ if ((J2G(J)->hookmask & HOOK_GC))
+ return 1;
+ for (i = (ptrdiff_t)J->sizetrace-1; i > 0; i--)
+ lj_trace_flush(J, (TraceNo)i);
+#ifdef LUA_USE_ASSERT
+ for (i = 0; i < (ptrdiff_t)J->sizetrace; i++)
+ lua_assert(J->trace[i] == NULL);
+#endif
+ J->freetrace = 0;
+ /* Free the whole machine code and invalidate all exit stub groups. */
+ lj_mcode_free(J);
+ memset(J->exitstubgroup, 0, sizeof(J->exitstubgroup));
+ lj_vmevent_send(L, TRACE,
+ setstrV(L, L->top++, lj_str_newlit(L, "flush"));
+ );
+ return 0;
+}
+
+/* Free everything associated with the JIT compiler state. */
+void lj_trace_freestate(global_State *g)
+{
+ jit_State *J = G2J(g);
+#ifdef LUA_USE_ASSERT
+ { /* This assumes all traces have already been freed. */
+ ptrdiff_t i;
+ for (i = 0; i < (ptrdiff_t)J->sizetrace; i++)
+ lua_assert(J->trace[i] == NULL);
+ }
+#endif
+ lj_mcode_free(J);
+ lj_ir_knum_freeall(J);
+ lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, IRRef2);
+ lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot);
+ lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns);
+ lj_mem_freevec(g, J->trace, J->sizetrace, Trace *);
+}
+
+/* -- Trace compiler state machine ---------------------------------------- */
+
+/* Penalize a bytecode instruction by bumping its hot counter. */
+static void hotpenalty(jit_State *J, const BCIns *pc, TraceError e)
+{
+ uint32_t i, val = HOTCOUNT_MIN_PENALTY;
+ for (i = 0; i < PENALTY_SLOTS; i++)
+ if (J->penalty[i].pc == pc) {
+ val = ((uint32_t)J->penalty[i].val << 1) + 1;
+ if (val > HOTCOUNT_MAX_PENALTY) val = HOTCOUNT_MAX_PENALTY;
+ goto setpenalty;
+ }
+ i = J->penaltyslot;
+ J->penaltyslot = (J->penaltyslot + 1) & (PENALTY_SLOTS-1);
+ J->penalty[i].pc = pc;
+setpenalty:
+ J->penalty[i].val = (uint16_t)val;
+ J->penalty[i].reason = e;
+ hotcount_set(J2GG(J), pc+1, val);
+}
+
+/* Start tracing. */
+static void trace_start(jit_State *J)
+{
+ lua_State *L;
+
+ if (J->curtrace != 0 && J->trace[J->curtrace] == &J->cur) {
+ J->trace[J->curtrace] = trace_save(J, &J->cur); /* Save current trace. */
+ J->curtrace = 0;
+ }
+
+ if ((J->pt->flags & PROTO_NO_JIT)) { /* JIT disabled for this proto? */
+ if (J->parent == 0) {
+ if (J->pc >= J->pt->bc) {
+ /* Lazy bytecode patching to disable hotcount events. */
+ setbc_op(J->pc, (int)bc_op(*J->pc)+(int)BC_ILOOP-(int)BC_LOOP);
+ J->pt->flags |= PROTO_HAS_ILOOP;
+ } else {
+ /* NYI: lazy closure patching to disable hotcall events. */
+ lua_assert(0);
+ }
+ }
+ J->state = LJ_TRACE_IDLE; /* Silently ignored. */
+ return;
+ }
+
+ /* Get a new trace number. */
+ J->curtrace = trace_findfree(J);
+ if (LJ_UNLIKELY(J->curtrace == 0)) { /* No free trace? */
+ lua_assert((J2G(J)->hookmask & HOOK_GC) == 0);
+ lj_trace_flushall(J->L);
+ J->state = LJ_TRACE_IDLE; /* Silently ignored. */
+ return;
+ }
+ J->trace[J->curtrace] = &J->cur;
+
+ /* Setup enough of the current trace to be able to send the vmevent. */
+ memset(&J->cur, 0, sizeof(Trace));
+ J->cur.nins = J->cur.nk = REF_BASE;
+ J->cur.ir = J->irbuf;
+ J->cur.snap = J->snapbuf;
+ J->cur.snapmap = J->snapmapbuf;
+ /* J->cur.nsnapmap = 0; */
+ J->mergesnap = 0;
+ J->needsnap = 0;
+ J->guardemit.irt = 0;
+
+ L = J->L;
+ lj_vmevent_send(L, TRACE,
+ setstrV(L, L->top++, lj_str_newlit(L, "start"));
+ setintV(L->top++, J->curtrace);
+ setfuncV(L, L->top++, J->fn);
+ setintV(L->top++, J->pc - J->pt->bc + 1);
+ if (J->parent) {
+ setintV(L->top++, J->parent);
+ setintV(L->top++, J->exitno);
+ }
+ );
+ lj_record_setup(J);
+}
+
+/* Stop tracing. */
+static void trace_stop(jit_State *J)
+{
+ BCIns *pc = (BCIns *)J->startpc; /* Not const here. */
+ BCOp op = bc_op(J->cur.startins);
+ GCproto *pt = &gcref(J->cur.startpt)->pt;
+ lua_State *L;
+
+ switch (op) {
+ case BC_FORL:
+ setbc_op(pc+bc_j(J->cur.startins), BC_JFORI); /* Patch FORI, too. */
+ /* fallthrough */
+ case BC_LOOP:
+ case BC_ITERL:
+ /* Patch bytecode of starting instruction in root trace. */
+ setbc_op(pc, (int)op+(int)BC_JLOOP-(int)BC_LOOP);
+ setbc_d(pc, J->curtrace);
+ /* Add to root trace chain in prototype. */
+ J->cur.nextroot = pt->trace;
+ pt->trace = (TraceNo1)J->curtrace;
+ break;
+ case BC_CALL:
+ lj_trace_err(J, LJ_TRERR_NYILNKF);
+ break;
+ case BC_JMP:
+ /* Patch exit branch in parent to side trace entry. */
+ lua_assert(J->parent != 0 && J->cur.root != 0);
+ lj_asm_patchexit(J, J->trace[J->parent], J->exitno, J->cur.mcode);
+ /* Avoid compiling a side trace twice (stack resizing uses parent exit). */
+ J->trace[J->parent]->snap[J->exitno].count = SNAPCOUNT_DONE;
+ /* Add to side trace chain in root trace. */
+ {
+ Trace *root = J->trace[J->cur.root];
+ root->nchild++;
+ J->cur.nextside = root->nextside;
+ root->nextside = (TraceNo1)J->curtrace;
+ }
+ break;
+ default:
+ lua_assert(0);
+ break;
+ }
+
+ /* Commit new mcode only after all patching is done. */
+ lj_mcode_commit(J, J->cur.mcode);
+ lj_gdbjit_addtrace(J, &J->cur, J->curtrace);
+
+ L = J->L;
+ lj_vmevent_send(L, TRACE,
+ setstrV(L, L->top++, lj_str_newlit(L, "stop"));
+ setintV(L->top++, J->curtrace);
+ );
+}
+
+/* Abort tracing. */
+static int trace_abort(jit_State *J)
+{
+ lua_State *L = J->L;
+ TraceError e = LJ_TRERR_RECERR;
+ lj_mcode_abort(J);
+ if (tvisnum(L->top-1))
+ e = (TraceError)lj_num2int(numV(L->top-1));
+ if (e == LJ_TRERR_MCODELM) {
+ J->state = LJ_TRACE_ASM;
+ return 1; /* Retry ASM with new MCode area. */
+ }
+ if (J->parent == 0)
+ hotpenalty(J, J->startpc, e); /* Penalize starting instruction. */
+ if (J->curtrace) { /* Is there anything to abort? */
+ ptrdiff_t errobj = savestack(L, L->top-1); /* Stack may be resized. */
+ lj_vmevent_send(L, TRACE,
+ setstrV(L, L->top++, lj_str_newlit(L, "abort"));
+ setintV(L->top++, J->curtrace);
+ setfuncV(L, L->top++, J->fn);
+ setintV(L->top++, J->pc - J->pt->bc + 1);
+ copyTV(L, L->top++, restorestack(L, errobj));
+ copyTV(L, L->top++, &J->errinfo);
+ );
+ /* Drop aborted trace after the vmevent (which may still access it). */
+ J->trace[J->curtrace] = NULL;
+ if (J->curtrace < J->freetrace)
+ J->freetrace = J->curtrace;
+ J->curtrace = 0;
+ }
+ L->top--; /* Remove error object */
+ if (e == LJ_TRERR_MCODEAL)
+ lj_trace_flushall(L);
+ return 0;
+}
+
+/* State machine for the trace compiler. Protected callback. */
+static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud)
+{
+ jit_State *J = (jit_State *)ud;
+ UNUSED(dummy);
+ do {
+ switch (J->state) {
+ case LJ_TRACE_START:
+ J->state = LJ_TRACE_RECORD; /* trace_start() may change state. */
+ trace_start(J);
+ lj_dispatch_update(J2G(J));
+ break;
+
+ case LJ_TRACE_RECORD:
+ setvmstate(J2G(J), RECORD);
+ lj_vmevent_send(L, RECORD,
+ setintV(L->top++, J->curtrace);
+ setfuncV(L, L->top++, J->fn);
+ setintV(L->top++, J->pc - J->pt->bc + 1);
+ setintV(L->top++, J->framedepth);
+ if (bcmode_mm(bc_op(*J->pc)) == MM_call) {
+ cTValue *o = &L->base[bc_a(*J->pc)];
+ if (bc_op(*J->pc) == BC_ITERC) o -= 3;
+ copyTV(L, L->top++, o);
+ }
+ );
+ lj_record_ins(J);
+ break;
+
+ case LJ_TRACE_END:
+ J->loopref = 0;
+ if ((J->flags & JIT_F_OPT_LOOP) && J->cur.link == J->curtrace) {
+ setvmstate(J2G(J), OPT);
+ lj_opt_dce(J);
+ if (lj_opt_loop(J)) { /* Loop optimization failed? */
+ J->loopref = J->cur.nins;
+ J->state = LJ_TRACE_RECORD; /* Try to continue recording. */
+ break;
+ }
+ J->loopref = J->chain[IR_LOOP]; /* Needed by assembler. */
+ }
+ J->state = LJ_TRACE_ASM;
+ break;
+
+ case LJ_TRACE_ASM:
+ setvmstate(J2G(J), ASM);
+ lj_asm_trace(J, &J->cur);
+ trace_stop(J);
+ setvmstate(J2G(J), INTERP);
+ J->state = LJ_TRACE_IDLE;
+ lj_dispatch_update(J2G(J));
+ return NULL;
+
+ default: /* Trace aborted asynchronously. */
+ setintV(L->top++, (int32_t)LJ_TRERR_RECERR);
+ /* fallthrough */
+ case LJ_TRACE_ERR:
+ if (trace_abort(J))
+ break; /* Retry. */
+ setvmstate(J2G(J), INTERP);
+ J->state = LJ_TRACE_IDLE;
+ lj_dispatch_update(J2G(J));
+ return NULL;
+ }
+ } while (J->state > LJ_TRACE_RECORD);
+ return NULL;
+}
+
+/* -- Event handling ------------------------------------------------------ */
+
+/* A bytecode instruction is about to be executed. Record it. */
+void lj_trace_ins(jit_State *J)
+{
+ while (lj_vm_cpcall(J->L, trace_state, NULL, (void *)J) != 0)
+ J->state = LJ_TRACE_ERR;
+}
+
+/* Start recording a new trace. */
+static void trace_new(jit_State *J)
+{
+ /* Only start a new trace if not inside __gc call or vmevent. */
+ if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) {
+ lua_assert(J->state == LJ_TRACE_IDLE);
+ J->state = LJ_TRACE_START;
+ J->fn = curr_func(J->L);
+ J->pt = funcproto(J->fn);
+ lj_trace_ins(J);
+ }
+}
+
+/* A hotcount triggered. Start recording a root trace. */
+void lj_trace_hot(jit_State *J, const BCIns *pc)
+{
+ lua_State *L = J->L;
+ L->top = curr_topL(L); /* Only called from Lua and NRESULTS is not used. */
+ hotcount_set(J2GG(J), pc, J->param[JIT_P_hotloop]+1); /* Reset hotcount. */
+ J->parent = 0; /* Root trace. */
+ J->exitno = 0;
+ J->pc = pc-1; /* The interpreter bytecode PC is offset by 1. */
+ trace_new(J);
+}
+
+/* A trace exited. Restore interpreter state and check for hot exits. */
+void *lj_trace_exit(jit_State *J, void *exptr)
+{
+ lua_State *L = J->L;
+ void *cf;
+
+ /* Restore interpreter state. */
+ lj_snap_restore(J, exptr);
+ cf = cframe_raw(L->cframe);
+ cframe_pc(cf) = J->pc;
+
+ lj_vmevent_send(L, TEXIT,
+ ExitState *ex = (ExitState *)exptr;
+ uint32_t i;
+ lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK);
+ setintV(L->top++, J->parent);
+ setintV(L->top++, J->exitno);
+ setintV(L->top++, RID_NUM_GPR);
+ setintV(L->top++, RID_NUM_FPR);
+ for (i = 0; i < RID_NUM_GPR; i++)
+ setintV(L->top++, ex->gpr[i]);
+ for (i = 0; i < RID_NUM_FPR; i++) {
+ setnumV(L->top, ex->fpr[i]);
+ if (LJ_UNLIKELY(tvisnan(L->top)))
+ setnanV(L->top);
+ L->top++;
+ }
+ );
+
+ { /* Check for a hot exit. */
+ SnapShot *snap = &J->trace[J->parent]->snap[J->exitno];
+ if (snap->count != SNAPCOUNT_DONE &&
+ ++snap->count >= J->param[JIT_P_hotexit])
+ trace_new(J); /* Start recording a side trace. */
+ }
+
+ return cf; /* Return the interpreter C frame. */
+}
+
+#endif
diff --git a/src/lj_trace.h b/src/lj_trace.h
new file mode 100644
index 00000000..9d8eb790
--- /dev/null
+++ b/src/lj_trace.h
@@ -0,0 +1,52 @@
+/*
+** Trace management.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_TRACE_H
+#define _LJ_TRACE_H
+
+#if LJ_HASJIT
+
+#include "lj_obj.h"
+#include "lj_jit.h"
+#include "lj_dispatch.h"
+
+/* Trace errors. */
+typedef enum {
+#define TREDEF(name, msg) LJ_TRERR_##name,
+#include "lj_traceerr.h"
+ LJ_TRERR__MAX
+} TraceError;
+
+LJ_FUNC_NORET void lj_trace_err(jit_State *J, TraceError e);
+LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e);
+
+/* Trace management. */
+LJ_FUNC void lj_trace_freeproto(global_State *g, GCproto *pt);
+LJ_FUNC void lj_trace_reenableproto(GCproto *pt);
+LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt);
+LJ_FUNC void lj_trace_flush(jit_State *J, TraceNo traceno);
+LJ_FUNC int lj_trace_flushall(lua_State *L);
+LJ_FUNC void lj_trace_freestate(global_State *g);
+
+/* Event handling. */
+LJ_FUNC void lj_trace_ins(jit_State *J);
+LJ_FUNCA void lj_trace_hot(jit_State *J, const BCIns *pc);
+LJ_FUNCA void *lj_trace_exit(jit_State *J, void *exptr);
+
+/* Signal asynchronous abort of trace or end of trace. */
+#define lj_trace_abort(g) (G2J(g)->state &= ~LJ_TRACE_ACTIVE)
+#define lj_trace_end(J) (J->state = LJ_TRACE_END)
+
+#else
+
+#define lj_trace_flushall(L) (UNUSED(L), 0)
+#define lj_trace_freestate(g) UNUSED(g)
+#define lj_trace_freeproto(g, pt) (UNUSED(g), UNUSED(pt), (void)0)
+#define lj_trace_abort(g) UNUSED(g)
+#define lj_trace_end(J) UNUSED(J)
+
+#endif
+
+#endif
diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h
new file mode 100644
index 00000000..2c8347b0
--- /dev/null
+++ b/src/lj_traceerr.h
@@ -0,0 +1,59 @@
+/*
+** Trace compiler error messages.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+/* This file may be included multiple times with different TREDEF macros. */
+
+/* Recording. */
+TREDEF(RECERR, "error thrown or hook called during recording")
+TREDEF(TRACEOV, "trace too long")
+TREDEF(STACKOV, "trace too deep")
+TREDEF(SNAPOV, "too many snapshots")
+TREDEF(NYIBC, "NYI: bytecode %d")
+
+/* Recording loop ops. */
+TREDEF(LLEAVE, "leaving loop in root trace")
+TREDEF(LINNER, "inner loop in root trace")
+TREDEF(LUNROLL, "loop unroll limit reached")
+TREDEF(LBLACKL, "blacklisted loop")
+
+/* Recording calls/returns. */
+TREDEF(BADTYPE, "bad argument type")
+TREDEF(CJITOFF, "call to JIT-disabled function")
+TREDEF(CUNROLL, "call unroll limit reached")
+TREDEF(NYIRECU, "NYI: recursive calls")
+TREDEF(NYILNKF, "NYI: linking/patching function calls")
+TREDEF(NYIVF, "NYI: vararg function")
+TREDEF(NYICF, "NYI: C function %p")
+TREDEF(NYIFF, "NYI: FastFunc %s")
+TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s")
+TREDEF(NYIRETL, "NYI: return to lower frame")
+
+/* Recording indexed load/store. */
+TREDEF(STORENN, "store with nil or NaN key")
+TREDEF(NOMM, "missing metamethod")
+TREDEF(IDXLOOP, "looping index lookup")
+TREDEF(NYITMIX, "NYI: mixed sparse/dense table")
+
+/* Optimizations. */
+TREDEF(GFAIL, "guard would always fail")
+TREDEF(PHIOV, "too many PHIs")
+TREDEF(TYPEINS, "persistent type instability")
+
+/* Assembler. */
+TREDEF(MCODEAL, "failed to allocate mcode memory")
+TREDEF(MCODEOV, "machine code too long")
+TREDEF(MCODELM, "hit mcode limit (retrying)")
+TREDEF(SPILLOV, "too many spill slots")
+TREDEF(BADRA, "inconsistent register allocation")
+TREDEF(NYIIR, "NYI: cannot assemble IR instruction %d")
+TREDEF(NYIPHI, "NYI: PHI shuffling too complex")
+TREDEF(NYICOAL, "NYI: register coalescing too complex")
+TREDEF(NYIGCF, "NYI: gcstep sync with frames")
+
+#undef TREDEF
+
+/* Detecting unused error messages:
+ awk -F, '/^TREDEF/ { gsub(/TREDEF./, ""); printf "grep -q LJ_TRERR_%s *.[ch] || echo %s\n", $1, $1}' lj_traceerr.h | sh
+*/
diff --git a/src/lj_udata.c b/src/lj_udata.c
new file mode 100644
index 00000000..863889c9
--- /dev/null
+++ b/src/lj_udata.c
@@ -0,0 +1,33 @@
+/*
+** Userdata handling.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_udata_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_udata.h"
+
+GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env)
+{
+ GCudata *ud = lj_mem_newt(L, sizeof(GCudata) + sz, GCudata);
+ global_State *g = G(L);
+ newwhite(g, ud); /* Not finalized. */
+ ud->gct = ~LJ_TUDATA;
+ ud->len = sz;
+ /* NOBARRIER: The GCudata is new (marked white). */
+ setgcrefnull(ud->metatable);
+ setgcref(ud->env, obj2gco(env));
+ /* Chain to userdata list (after main thread). */
+ setgcrefr(ud->nextgc, mainthread(g)->nextgc);
+ setgcref(mainthread(g)->nextgc, obj2gco(ud));
+ return ud;
+}
+
+void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud)
+{
+ lj_mem_free(g, ud, sizeudata(ud));
+}
+
diff --git a/src/lj_udata.h b/src/lj_udata.h
new file mode 100644
index 00000000..981304f8
--- /dev/null
+++ b/src/lj_udata.h
@@ -0,0 +1,14 @@
+/*
+** Userdata handling.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_UDATA_H
+#define _LJ_UDATA_H
+
+#include "lj_obj.h"
+
+LJ_FUNC GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env);
+LJ_FUNC void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud);
+
+#endif
diff --git a/src/lj_vm.h b/src/lj_vm.h
new file mode 100644
index 00000000..f50614bb
--- /dev/null
+++ b/src/lj_vm.h
@@ -0,0 +1,66 @@
+/*
+** Assembler VM interface definitions.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_VM_H
+#define _LJ_VM_H
+
+#include "lj_obj.h"
+
+/* Entry points for ASM parts of VM. */
+LJ_ASMF void lj_vm_call(lua_State *L, TValue *base, int nres1);
+LJ_ASMF int lj_vm_pcall(lua_State *L, TValue *base, int nres1, ptrdiff_t ef);
+typedef TValue *(*lua_CPFunction)(lua_State *L, lua_CFunction func, void *ud);
+LJ_ASMF int lj_vm_cpcall(lua_State *L, lua_CPFunction cp, lua_CFunction func,
+ void *ud);
+LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef);
+LJ_ASMF_NORET void lj_vm_unwind_c(void *cframe, int errcode);
+LJ_ASMF_NORET void lj_vm_unwind_ff(void *cframe);
+
+/* Miscellaneous functions. */
+#if LJ_TARGET_X86ORX64
+LJ_ASMF int lj_vm_cpuid(uint32_t f, uint32_t res[4]);
+#endif
+LJ_ASMF double lj_vm_foldarith(double x, double y, int op);
+LJ_ASMF double lj_vm_foldfpm(double x, int op);
+
+/* Dispatch targets for recording and hooks. */
+LJ_ASMF void lj_vm_record(void);
+LJ_ASMF void lj_vm_hook(void);
+
+/* Trace exit handling. */
+LJ_ASMF void lj_vm_exit_handler(void);
+LJ_ASMF void lj_vm_exit_interp(void);
+
+/* Handlers callable from compiled code. */
+LJ_ASMF void lj_vm_floor(void);
+LJ_ASMF void lj_vm_ceil(void);
+LJ_ASMF void lj_vm_trunc(void);
+LJ_ASMF void lj_vm_exp(void);
+LJ_ASMF void lj_vm_exp2(void);
+LJ_ASMF void lj_vm_pow(void);
+LJ_ASMF void lj_vm_powi(void);
+
+/* Call gates for functions. */
+LJ_ASMF void lj_gate_lf(void);
+LJ_ASMF void lj_gate_lv(void);
+LJ_ASMF void lj_gate_c(void);
+
+/* Continuations for metamethods. */
+LJ_ASMF void lj_cont_cat(void); /* Continue with concatenation. */
+LJ_ASMF void lj_cont_ra(void); /* Store result in RA from instruction. */
+LJ_ASMF void lj_cont_nop(void); /* Do nothing, just continue execution. */
+LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */
+LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */
+
+/* Start of the ASM code. */
+LJ_ASMF void lj_vm_asm_begin(void);
+
+/* Opcode handler offsets, relative to lj_vm_asm_begin. */
+LJ_ASMF const uint16_t lj_vm_op_ofs[];
+
+#define makeasmfunc(ofs) \
+ ((ASMFunction)((char *)lj_vm_asm_begin + (ofs)))
+
+#endif
diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c
new file mode 100644
index 00000000..657eb8d7
--- /dev/null
+++ b/src/lj_vmevent.c
@@ -0,0 +1,56 @@
+/*
+** VM event handling.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#include <stdio.h>
+
+#define lj_vmevent_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_state.h"
+#include "lj_dispatch.h"
+#include "lj_vm.h"
+#include "lj_vmevent.h"
+
+ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev)
+{
+ global_State *g = G(L);
+ GCstr *s = lj_str_newlit(L, LJ_VMEVENTS_REGKEY);
+ cTValue *tv = lj_tab_getstr(tabV(registry(L)), s);
+ if (tvistab(tv)) {
+ int hash = VMEVENT_HASH(ev);
+ tv = lj_tab_getint(tabV(tv), hash);
+ if (tv && tvisfunc(tv)) {
+ lj_state_checkstack(L, LUA_MINSTACK);
+ setfuncV(L, L->top++, funcV(tv));
+ return savestack(L, L->top);
+ }
+ }
+ g->vmevmask &= ~VMEVENT_MASK(ev); /* No handler: cache this fact. */
+ return 0;
+}
+
+void lj_vmevent_call(lua_State *L, ptrdiff_t argbase)
+{
+ global_State *g = G(L);
+ uint8_t oldmask = g->vmevmask;
+ uint8_t oldh = hook_save(g);
+ int status;
+ g->vmevmask = 0; /* Disable all events. */
+ hook_vmevent(g);
+ status = lj_vm_pcall(L, restorestack(L, argbase), 0+1, 0);
+ if (LJ_UNLIKELY(status)) {
+ /* Really shouldn't use stderr here, but where else to complain? */
+ L->top--;
+ fprintf(stderr, "VM handler failed: %s\n",
+ tvisstr(L->top) ? strVdata(L->top) : "?");
+ }
+ hook_restore(g, oldh);
+ if (g->vmevmask != VMEVENT_NOCACHE)
+ g->vmevmask = oldmask; /* Restore event mask, but not if not modified. */
+}
+
diff --git a/src/lj_vmevent.h b/src/lj_vmevent.h
new file mode 100644
index 00000000..9eaa52e1
--- /dev/null
+++ b/src/lj_vmevent.h
@@ -0,0 +1,49 @@
+/*
+** VM event handling.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_VMEVENT_H
+#define _LJ_VMEVENT_H
+
+#include "lj_obj.h"
+
+/* Registry key for VM event handler table. */
+#define LJ_VMEVENTS_REGKEY "_VMEVENTS"
+#define LJ_VMEVENTS_HSIZE 4
+
+#define VMEVENT_MASK(ev) ((uint8_t)1 << ((int)(ev) & 7))
+#define VMEVENT_HASH(ev) ((int)(ev) & ~7)
+#define VMEVENT_HASHIDX(h) ((int)(h) << 3)
+#define VMEVENT_NOCACHE 255
+
+#define VMEVENT_DEF(name, hash) \
+ LJ_VMEVENT_##name##_, \
+ LJ_VMEVENT_##name = ((LJ_VMEVENT_##name##_) & 7)|((hash) << 3)
+
+/* VM event IDs. */
+typedef enum {
+ VMEVENT_DEF(BC, 0x0000140b),
+ VMEVENT_DEF(TRACE, 0x10ea574d),
+ VMEVENT_DEF(RECORD, 0x5698231c),
+ VMEVENT_DEF(TEXIT, 0x12d984a7),
+ LJ_VMEVENT__MAX
+} VMEvent;
+
+#ifdef LUAJIT_DISABLE_VMEVENT
+#define lj_vmevent_send(L, ev, args) UNUSED(L)
+#else
+#define lj_vmevent_send(L, ev, args) \
+ if (G(L)->vmevmask & VMEVENT_MASK(LJ_VMEVENT_##ev)) { \
+ ptrdiff_t argbase = lj_vmevent_prepare(L, LJ_VMEVENT_##ev); \
+ if (argbase) { \
+ args \
+ lj_vmevent_call(L, argbase); \
+ } \
+ }
+
+LJ_FUNC ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev);
+LJ_FUNC void lj_vmevent_call(lua_State *L, ptrdiff_t argbase);
+#endif
+
+#endif
diff --git a/src/ljamalg.c b/src/ljamalg.c
new file mode 100644
index 00000000..46d0e21f
--- /dev/null
+++ b/src/ljamalg.c
@@ -0,0 +1,70 @@
+/*
+** LuaJIT core and libraries amalgamation.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+/*
++--------------------------------------------------------------------------+
+| WARNING: Compiling the amalgamation needs a lot of virtual memory |
+| (around 160 MB with GCC 4.x)! If you don't have enough physical memory |
+| your machine will start swapping to disk and the compile will not finish |
+| within a reasonable amount of time. |
+| So either compile on a bigger machine or use the non-amalgamated build. |
++--------------------------------------------------------------------------+
+*/
+
+#define ljamalg_c
+#define LUA_CORE
+
+/* To get the mremap prototype. Must be defind before any system includes. */
+#if defined(__linux__) && !defined(_GNU_SOURCE)
+#define _GNU_SOURCE
+#endif
+
+#include "lua.h"
+#include "lauxlib.h"
+
+#include "lj_gc.c"
+#include "lj_err.c"
+#include "lj_ctype.c"
+#include "lj_bc.c"
+#include "lj_obj.c"
+#include "lj_str.c"
+#include "lj_tab.c"
+#include "lj_func.c"
+#include "lj_udata.c"
+#include "lj_meta.c"
+#include "lj_state.c"
+#include "lj_dispatch.c"
+#include "lj_vmevent.c"
+#include "lj_api.c"
+#include "lj_lex.c"
+#include "lj_parse.c"
+#include "lj_lib.c"
+#include "lj_ir.c"
+#include "lj_opt_mem.c"
+#include "lj_opt_fold.c"
+#include "lj_opt_narrow.c"
+#include "lj_opt_dce.c"
+#include "lj_opt_loop.c"
+#include "lj_mcode.c"
+#include "lj_snap.c"
+#include "lj_record.c"
+#include "lj_asm.c"
+#include "lj_trace.c"
+#include "lj_gdbjit.c"
+#include "lj_alloc.c"
+
+#include "lib_aux.c"
+#include "lib_base.c"
+#include "lib_math.c"
+#include "lib_string.c"
+#include "lib_table.c"
+#include "lib_io.c"
+#include "lib_os.c"
+#include "lib_package.c"
+#include "lib_debug.c"
+#include "lib_bit.c"
+#include "lib_jit.c"
+#include "lib_init.c"
+
diff --git a/src/lua.h b/src/lua.h
new file mode 100644
index 00000000..0e98b374
--- /dev/null
+++ b/src/lua.h
@@ -0,0 +1,388 @@
+/*
+** $Id: lua.h,v 1.218.1.5 2008/08/06 13:30:12 roberto Exp $
+** Lua - An Extensible Extension Language
+** Lua.org, PUC-Rio, Brazil (http://www.lua.org)
+** See Copyright Notice at the end of this file
+*/
+
+
+#ifndef lua_h
+#define lua_h
+
+#include <stdarg.h>
+#include <stddef.h>
+
+
+#include "luaconf.h"
+
+
+#define LUA_VERSION "Lua 5.1"
+#define LUA_RELEASE "Lua 5.1.4"
+#define LUA_VERSION_NUM 501
+#define LUA_COPYRIGHT "Copyright (C) 1994-2008 Lua.org, PUC-Rio"
+#define LUA_AUTHORS "R. Ierusalimschy, L. H. de Figueiredo & W. Celes"
+
+
+/* mark for precompiled code (`<esc>Lua') */
+#define LUA_SIGNATURE "\033Lua"
+
+/* option for multiple returns in `lua_pcall' and `lua_call' */
+#define LUA_MULTRET (-1)
+
+
+/*
+** pseudo-indices
+*/
+#define LUA_REGISTRYINDEX (-10000)
+#define LUA_ENVIRONINDEX (-10001)
+#define LUA_GLOBALSINDEX (-10002)
+#define lua_upvalueindex(i) (LUA_GLOBALSINDEX-(i))
+
+
+/* thread status; 0 is OK */
+#define LUA_YIELD 1
+#define LUA_ERRRUN 2
+#define LUA_ERRSYNTAX 3
+#define LUA_ERRMEM 4
+#define LUA_ERRERR 5
+
+
+typedef struct lua_State lua_State;
+
+typedef int (*lua_CFunction) (lua_State *L);
+
+
+/*
+** functions that read/write blocks when loading/dumping Lua chunks
+*/
+typedef const char * (*lua_Reader) (lua_State *L, void *ud, size_t *sz);
+
+typedef int (*lua_Writer) (lua_State *L, const void* p, size_t sz, void* ud);
+
+
+/*
+** prototype for memory-allocation functions
+*/
+typedef void * (*lua_Alloc) (void *ud, void *ptr, size_t osize, size_t nsize);
+
+
+/*
+** basic types
+*/
+#define LUA_TNONE (-1)
+
+#define LUA_TNIL 0
+#define LUA_TBOOLEAN 1
+#define LUA_TLIGHTUSERDATA 2
+#define LUA_TNUMBER 3
+#define LUA_TSTRING 4
+#define LUA_TTABLE 5
+#define LUA_TFUNCTION 6
+#define LUA_TUSERDATA 7
+#define LUA_TTHREAD 8
+
+
+
+/* minimum Lua stack available to a C function */
+#define LUA_MINSTACK 20
+
+
+/*
+** generic extra include file
+*/
+#if defined(LUA_USER_H)
+#include LUA_USER_H
+#endif
+
+
+/* type of numbers in Lua */
+typedef LUA_NUMBER lua_Number;
+
+
+/* type for integer functions */
+typedef LUA_INTEGER lua_Integer;
+
+
+
+/*
+** state manipulation
+*/
+LUA_API lua_State *(lua_newstate) (lua_Alloc f, void *ud);
+LUA_API void (lua_close) (lua_State *L);
+LUA_API lua_State *(lua_newthread) (lua_State *L);
+
+LUA_API lua_CFunction (lua_atpanic) (lua_State *L, lua_CFunction panicf);
+
+
+/*
+** basic stack manipulation
+*/
+LUA_API int (lua_gettop) (lua_State *L);
+LUA_API void (lua_settop) (lua_State *L, int idx);
+LUA_API void (lua_pushvalue) (lua_State *L, int idx);
+LUA_API void (lua_remove) (lua_State *L, int idx);
+LUA_API void (lua_insert) (lua_State *L, int idx);
+LUA_API void (lua_replace) (lua_State *L, int idx);
+LUA_API int (lua_checkstack) (lua_State *L, int sz);
+
+LUA_API void (lua_xmove) (lua_State *from, lua_State *to, int n);
+
+
+/*
+** access functions (stack -> C)
+*/
+
+LUA_API int (lua_isnumber) (lua_State *L, int idx);
+LUA_API int (lua_isstring) (lua_State *L, int idx);
+LUA_API int (lua_iscfunction) (lua_State *L, int idx);
+LUA_API int (lua_isuserdata) (lua_State *L, int idx);
+LUA_API int (lua_type) (lua_State *L, int idx);
+LUA_API const char *(lua_typename) (lua_State *L, int tp);
+
+LUA_API int (lua_equal) (lua_State *L, int idx1, int idx2);
+LUA_API int (lua_rawequal) (lua_State *L, int idx1, int idx2);
+LUA_API int (lua_lessthan) (lua_State *L, int idx1, int idx2);
+
+LUA_API lua_Number (lua_tonumber) (lua_State *L, int idx);
+LUA_API lua_Integer (lua_tointeger) (lua_State *L, int idx);
+LUA_API int (lua_toboolean) (lua_State *L, int idx);
+LUA_API const char *(lua_tolstring) (lua_State *L, int idx, size_t *len);
+LUA_API size_t (lua_objlen) (lua_State *L, int idx);
+LUA_API lua_CFunction (lua_tocfunction) (lua_State *L, int idx);
+LUA_API void *(lua_touserdata) (lua_State *L, int idx);
+LUA_API lua_State *(lua_tothread) (lua_State *L, int idx);
+LUA_API const void *(lua_topointer) (lua_State *L, int idx);
+
+
+/*
+** push functions (C -> stack)
+*/
+LUA_API void (lua_pushnil) (lua_State *L);
+LUA_API void (lua_pushnumber) (lua_State *L, lua_Number n);
+LUA_API void (lua_pushinteger) (lua_State *L, lua_Integer n);
+LUA_API void (lua_pushlstring) (lua_State *L, const char *s, size_t l);
+LUA_API void (lua_pushstring) (lua_State *L, const char *s);
+LUA_API const char *(lua_pushvfstring) (lua_State *L, const char *fmt,
+ va_list argp);
+LUA_API const char *(lua_pushfstring) (lua_State *L, const char *fmt, ...);
+LUA_API void (lua_pushcclosure) (lua_State *L, lua_CFunction fn, int n);
+LUA_API void (lua_pushboolean) (lua_State *L, int b);
+LUA_API void (lua_pushlightuserdata) (lua_State *L, void *p);
+LUA_API int (lua_pushthread) (lua_State *L);
+
+
+/*
+** get functions (Lua -> stack)
+*/
+LUA_API void (lua_gettable) (lua_State *L, int idx);
+LUA_API void (lua_getfield) (lua_State *L, int idx, const char *k);
+LUA_API void (lua_rawget) (lua_State *L, int idx);
+LUA_API void (lua_rawgeti) (lua_State *L, int idx, int n);
+LUA_API void (lua_createtable) (lua_State *L, int narr, int nrec);
+LUA_API void *(lua_newuserdata) (lua_State *L, size_t sz);
+LUA_API int (lua_getmetatable) (lua_State *L, int objindex);
+LUA_API void (lua_getfenv) (lua_State *L, int idx);
+
+
+/*
+** set functions (stack -> Lua)
+*/
+LUA_API void (lua_settable) (lua_State *L, int idx);
+LUA_API void (lua_setfield) (lua_State *L, int idx, const char *k);
+LUA_API void (lua_rawset) (lua_State *L, int idx);
+LUA_API void (lua_rawseti) (lua_State *L, int idx, int n);
+LUA_API int (lua_setmetatable) (lua_State *L, int objindex);
+LUA_API int (lua_setfenv) (lua_State *L, int idx);
+
+
+/*
+** `load' and `call' functions (load and run Lua code)
+*/
+LUA_API void (lua_call) (lua_State *L, int nargs, int nresults);
+LUA_API int (lua_pcall) (lua_State *L, int nargs, int nresults, int errfunc);
+LUA_API int (lua_cpcall) (lua_State *L, lua_CFunction func, void *ud);
+LUA_API int (lua_load) (lua_State *L, lua_Reader reader, void *dt,
+ const char *chunkname);
+
+LUA_API int (lua_dump) (lua_State *L, lua_Writer writer, void *data);
+
+
+/*
+** coroutine functions
+*/
+LUA_API int (lua_yield) (lua_State *L, int nresults);
+LUA_API int (lua_resume) (lua_State *L, int narg);
+LUA_API int (lua_status) (lua_State *L);
+
+/*
+** garbage-collection function and options
+*/
+
+#define LUA_GCSTOP 0
+#define LUA_GCRESTART 1
+#define LUA_GCCOLLECT 2
+#define LUA_GCCOUNT 3
+#define LUA_GCCOUNTB 4
+#define LUA_GCSTEP 5
+#define LUA_GCSETPAUSE 6
+#define LUA_GCSETSTEPMUL 7
+
+LUA_API int (lua_gc) (lua_State *L, int what, int data);
+
+
+/*
+** miscellaneous functions
+*/
+
+LUA_API int (lua_error) (lua_State *L);
+
+LUA_API int (lua_next) (lua_State *L, int idx);
+
+LUA_API void (lua_concat) (lua_State *L, int n);
+
+LUA_API lua_Alloc (lua_getallocf) (lua_State *L, void **ud);
+LUA_API void lua_setallocf (lua_State *L, lua_Alloc f, void *ud);
+
+
+
+/*
+** ===============================================================
+** some useful macros
+** ===============================================================
+*/
+
+#define lua_pop(L,n) lua_settop(L, -(n)-1)
+
+#define lua_newtable(L) lua_createtable(L, 0, 0)
+
+#define lua_register(L,n,f) (lua_pushcfunction(L, (f)), lua_setglobal(L, (n)))
+
+#define lua_pushcfunction(L,f) lua_pushcclosure(L, (f), 0)
+
+#define lua_strlen(L,i) lua_objlen(L, (i))
+
+#define lua_isfunction(L,n) (lua_type(L, (n)) == LUA_TFUNCTION)
+#define lua_istable(L,n) (lua_type(L, (n)) == LUA_TTABLE)
+#define lua_islightuserdata(L,n) (lua_type(L, (n)) == LUA_TLIGHTUSERDATA)
+#define lua_isnil(L,n) (lua_type(L, (n)) == LUA_TNIL)
+#define lua_isboolean(L,n) (lua_type(L, (n)) == LUA_TBOOLEAN)
+#define lua_isthread(L,n) (lua_type(L, (n)) == LUA_TTHREAD)
+#define lua_isnone(L,n) (lua_type(L, (n)) == LUA_TNONE)
+#define lua_isnoneornil(L, n) (lua_type(L, (n)) <= 0)
+
+#define lua_pushliteral(L, s) \
+ lua_pushlstring(L, "" s, (sizeof(s)/sizeof(char))-1)
+
+#define lua_setglobal(L,s) lua_setfield(L, LUA_GLOBALSINDEX, (s))
+#define lua_getglobal(L,s) lua_getfield(L, LUA_GLOBALSINDEX, (s))
+
+#define lua_tostring(L,i) lua_tolstring(L, (i), NULL)
+
+
+
+/*
+** compatibility macros and functions
+*/
+
+#define lua_open() luaL_newstate()
+
+#define lua_getregistry(L) lua_pushvalue(L, LUA_REGISTRYINDEX)
+
+#define lua_getgccount(L) lua_gc(L, LUA_GCCOUNT, 0)
+
+#define lua_Chunkreader lua_Reader
+#define lua_Chunkwriter lua_Writer
+
+
+/* hack */
+LUA_API void lua_setlevel (lua_State *from, lua_State *to);
+
+
+/*
+** {======================================================================
+** Debug API
+** =======================================================================
+*/
+
+
+/*
+** Event codes
+*/
+#define LUA_HOOKCALL 0
+#define LUA_HOOKRET 1
+#define LUA_HOOKLINE 2
+#define LUA_HOOKCOUNT 3
+#define LUA_HOOKTAILRET 4
+
+
+/*
+** Event masks
+*/
+#define LUA_MASKCALL (1 << LUA_HOOKCALL)
+#define LUA_MASKRET (1 << LUA_HOOKRET)
+#define LUA_MASKLINE (1 << LUA_HOOKLINE)
+#define LUA_MASKCOUNT (1 << LUA_HOOKCOUNT)
+
+typedef struct lua_Debug lua_Debug; /* activation record */
+
+
+/* Functions to be called by the debuger in specific events */
+typedef void (*lua_Hook) (lua_State *L, lua_Debug *ar);
+
+
+LUA_API int lua_getstack (lua_State *L, int level, lua_Debug *ar);
+LUA_API int lua_getinfo (lua_State *L, const char *what, lua_Debug *ar);
+LUA_API const char *lua_getlocal (lua_State *L, const lua_Debug *ar, int n);
+LUA_API const char *lua_setlocal (lua_State *L, const lua_Debug *ar, int n);
+LUA_API const char *lua_getupvalue (lua_State *L, int funcindex, int n);
+LUA_API const char *lua_setupvalue (lua_State *L, int funcindex, int n);
+
+LUA_API int lua_sethook (lua_State *L, lua_Hook func, int mask, int count);
+LUA_API lua_Hook lua_gethook (lua_State *L);
+LUA_API int lua_gethookmask (lua_State *L);
+LUA_API int lua_gethookcount (lua_State *L);
+
+
+struct lua_Debug {
+ int event;
+ const char *name; /* (n) */
+ const char *namewhat; /* (n) `global', `local', `field', `method' */
+ const char *what; /* (S) `Lua', `C', `main', `tail' */
+ const char *source; /* (S) */
+ int currentline; /* (l) */
+ int nups; /* (u) number of upvalues */
+ int linedefined; /* (S) */
+ int lastlinedefined; /* (S) */
+ char short_src[LUA_IDSIZE]; /* (S) */
+ /* private part */
+ int i_ci; /* active function */
+};
+
+/* }====================================================================== */
+
+
+/******************************************************************************
+* Copyright (C) 1994-2008 Lua.org, PUC-Rio. All rights reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining
+* a copy of this software and associated documentation files (the
+* "Software"), to deal in the Software without restriction, including
+* without limitation the rights to use, copy, modify, merge, publish,
+* distribute, sublicense, and/or sell copies of the Software, and to
+* permit persons to whom the Software is furnished to do so, subject to
+* the following conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+******************************************************************************/
+
+
+#endif
diff --git a/src/lua.hpp b/src/lua.hpp
new file mode 100644
index 00000000..ec417f59
--- /dev/null
+++ b/src/lua.hpp
@@ -0,0 +1,9 @@
+// lua.hpp
+// Lua header files for C++
+// <<extern "C">> not supplied automatically because Lua also compiles as C++
+
+extern "C" {
+#include "lua.h"
+#include "lualib.h"
+#include "lauxlib.h"
+}
diff --git a/src/luaconf.h b/src/luaconf.h
new file mode 100644
index 00000000..4d4f1099
--- /dev/null
+++ b/src/luaconf.h
@@ -0,0 +1,133 @@
+/*
+** Configuration header.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef luaconf_h
+#define luaconf_h
+
+#include <limits.h>
+#include <stddef.h>
+
+/* Try to determine supported features for a couple of standard platforms. */
+#if defined(_WIN32)
+#define LUA_USE_WIN
+#define LUA_DL_DLL
+#elif defined(__linux__) || defined(__solaris__) || defined(__CYGWIN__) || \
+ defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
+ (defined(__MACH__) && defined(__APPLE__))
+#define LUA_USE_POSIX
+#define LUA_DL_DLOPEN
+#endif
+
+/* Default path for loading Lua and C modules with require(). */
+#ifdef LUA_USE_WIN
+/*
+** In Windows, any exclamation mark ('!') in the path is replaced by the
+** path of the directory of the executable file of the current process.
+*/
+#define LUA_LDIR "!\\lua\\"
+#define LUA_CDIR "!\\"
+#define LUA_PATH_DEFAULT \
+ ".\\?.lua;" LUA_LDIR"?.lua;" LUA_LDIR"?\\init.lua;"
+#define LUA_CPATH_DEFAULT \
+ ".\\?.dll;" LUA_CDIR"?.dll;" LUA_CDIR"loadall.dll"
+#else
+#define LUA_ROOT "/usr/local/"
+#define LUA_JDIR LUA_ROOT "share/luajit-2.0.0-beta1/"
+#define LUA_LDIR LUA_ROOT "share/lua/5.1/"
+#define LUA_CDIR LUA_ROOT "lib/lua/5.1/"
+#define LUA_PATH_DEFAULT \
+ "./?.lua;" LUA_JDIR"?.lua;" LUA_LDIR"?.lua;" LUA_LDIR"?/init.lua;"
+#define LUA_CPATH_DEFAULT \
+ "./?.so;" LUA_CDIR"?.so;" LUA_CDIR"loadall.so"
+#endif
+
+/* Environment variable names for path overrides and initialization code. */
+#define LUA_PATH "LUA_PATH"
+#define LUA_CPATH "LUA_CPATH"
+#define LUA_INIT "LUA_INIT"
+
+/* Special file system characters. */
+#ifdef LUA_USE_WIN
+#define LUA_DIRSEP "\\"
+#else
+#define LUA_DIRSEP "/"
+#endif
+#define LUA_PATHSEP ";"
+#define LUA_PATH_MARK "?"
+#define LUA_EXECDIR "!"
+#define LUA_IGMARK "-"
+#define LUA_PATH_CONFIG \
+ LUA_DIRSEP "\n" LUA_PATHSEP "\n" LUA_PATH_MARK "\n" \
+ LUA_EXECDIR "\n" LUA_IGMARK
+
+/* Quoting in error messages. */
+#define LUA_QL(x) "'" x "'"
+#define LUA_QS LUA_QL("%s")
+
+/* Various tunables. */
+#define LUAI_MAXSTACK 65500 /* Max. # of stack slots for a thread (<64K). */
+#define LUAI_MAXCSTACK 8000 /* Max. # of stack slots for a C func (<10K). */
+#define LUAI_GCPAUSE 200 /* Pause GC until memory is at 200%. */
+#define LUAI_GCMUL 200 /* Run GC at 200% of allocation speed. */
+#define LUA_MAXCAPTURES 32 /* Max. pattern captures. */
+
+/* Compatibility with older library function names. */
+#define LUA_COMPAT_MOD /* OLD: math.mod, NEW: math.fmod */
+#define LUA_COMPAT_GFIND /* OLD: string.gfind, NEW: string.gmatch */
+
+/* Configuration for the frontend (the luajit executable). */
+#if defined(luajit_c)
+#define LUA_PROGNAME "luajit" /* Fallback frontend name. */
+#define LUA_PROMPT "> " /* Interactive prompt. */
+#define LUA_PROMPT2 ">> " /* Continuation prompt. */
+#define LUA_MAXINPUT 512 /* Max. input line length. */
+#endif
+
+/* Note: changing the following defines breaks the Lua 5.1 ABI. */
+#define LUA_INTEGER ptrdiff_t
+#define LUA_IDSIZE 60 /* Size of lua_Debug.short_src. */
+#define LUAL_BUFFERSIZE BUFSIZ /* Size of lauxlib and io.* buffers. */
+
+/* The following defines are here only for compatibility with luaconf.h
+** from the standard Lua distribution. They must not be changed for LuaJIT.
+*/
+#define LUA_NUMBER_DOUBLE
+#define LUA_NUMBER double
+#define LUAI_UACNUMBER double
+#define LUA_NUMBER_SCAN "%lf"
+#define LUA_NUMBER_FMT "%.14g"
+#define lua_number2str(s, n) sprintf((s), LUA_NUMBER_FMT, (n))
+#define LUAI_MAXNUMBER2STR 32
+#define lua_str2number(s, p) strtod((s), (p))
+#define LUA_INTFRMLEN "l"
+#define LUA_INTFRM_T long
+
+/* Linkage of public API functions. */
+#if defined(LUA_BUILD_AS_DLL)
+#if defined(LUA_CORE) || defined(LUA_LIB)
+#define LUA_API __declspec(dllexport)
+#else
+#define LUA_API __declspec(dllimport)
+#endif
+#else
+#define LUA_API extern
+#endif
+
+#define LUALIB_API LUA_API
+
+/* Support for internal assertions. */
+#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK)
+#include <assert.h>
+#endif
+#ifdef LUA_USE_ASSERT
+#define lua_assert(x) assert(x)
+#endif
+#ifdef LUA_USE_APICHECK
+#define luai_apicheck(L, o) { (void)L; assert(o); }
+#else
+#define luai_apicheck(L, o) { (void)L; }
+#endif
+
+#endif
diff --git a/src/luajit.c b/src/luajit.c
new file mode 100644
index 00000000..9153975b
--- /dev/null
+++ b/src/luajit.c
@@ -0,0 +1,519 @@
+/*
+** LuaJIT frontend. Runs commands, scripts, read-eval-print (REPL) etc.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+**
+** Major portions taken verbatim or adapted from the Lua interpreter.
+** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
+*/
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define luajit_c
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+#include "luajit.h"
+
+#if defined(LUA_USE_POSIX)
+#include <unistd.h>
+#define lua_stdin_is_tty() isatty(0)
+#elif defined(LUA_USE_WIN)
+#include <io.h>
+#ifdef __BORLANDC__
+#define lua_stdin_is_tty() isatty(_fileno(stdin))
+#else
+#define lua_stdin_is_tty() _isatty(_fileno(stdin))
+#endif
+#else
+#define lua_stdin_is_tty() 1
+#endif
+
+static lua_State *globalL = NULL;
+static const char *progname = LUA_PROGNAME;
+
+static void lstop(lua_State *L, lua_Debug *ar)
+{
+ (void)ar; /* unused arg. */
+ lua_sethook(L, NULL, 0, 0);
+ /* Avoid luaL_error -- a C hook doesn't add an extra frame. */
+ luaL_where(L, 0);
+ lua_pushfstring(L, "%sinterrupted!", lua_tostring(L, -1));
+ lua_error(L);
+}
+
+static void laction(int i)
+{
+ signal(i, SIG_DFL); /* if another SIGINT happens before lstop,
+ terminate process (default action) */
+ lua_sethook(globalL, lstop, LUA_MASKCALL | LUA_MASKRET | LUA_MASKCOUNT, 1);
+}
+
+static void print_usage(void)
+{
+ fprintf(stderr,
+ "usage: %s [options] [script [args]].\n"
+ "Available options are:\n"
+ " -e stat execute string " LUA_QL("stat") "\n"
+ " -l name require library " LUA_QL("name") "\n"
+ " -j cmd perform LuaJIT control command\n"
+ " -O[lvl] set LuaJIT optimization level\n"
+ " -i enter interactive mode after executing " LUA_QL("script") "\n"
+ " -v show version information\n"
+ " -- stop handling options\n"
+ " - execute stdin and stop handling options\n"
+ ,
+ progname);
+ fflush(stderr);
+}
+
+static void l_message(const char *pname, const char *msg)
+{
+ if (pname) fprintf(stderr, "%s: ", pname);
+ fprintf(stderr, "%s\n", msg);
+ fflush(stderr);
+}
+
+static int report(lua_State *L, int status)
+{
+ if (status && !lua_isnil(L, -1)) {
+ const char *msg = lua_tostring(L, -1);
+ if (msg == NULL) msg = "(error object is not a string)";
+ l_message(progname, msg);
+ lua_pop(L, 1);
+ }
+ return status;
+}
+
+static int traceback(lua_State *L)
+{
+ if (!lua_isstring(L, 1)) /* 'message' not a string? */
+ return 1; /* keep it intact */
+ lua_getfield(L, LUA_GLOBALSINDEX, "debug");
+ if (!lua_istable(L, -1)) {
+ lua_pop(L, 1);
+ return 1;
+ }
+ lua_getfield(L, -1, "traceback");
+ if (!lua_isfunction(L, -1)) {
+ lua_pop(L, 2);
+ return 1;
+ }
+ lua_pushvalue(L, 1); /* pass error message */
+ lua_pushinteger(L, 2); /* skip this function and traceback */
+ lua_call(L, 2, 1); /* call debug.traceback */
+ return 1;
+}
+
+static int docall(lua_State *L, int narg, int clear)
+{
+ int status;
+ int base = lua_gettop(L) - narg; /* function index */
+ lua_pushcfunction(L, traceback); /* push traceback function */
+ lua_insert(L, base); /* put it under chunk and args */
+ signal(SIGINT, laction);
+ status = lua_pcall(L, narg, (clear ? 0 : LUA_MULTRET), base);
+ signal(SIGINT, SIG_DFL);
+ lua_remove(L, base); /* remove traceback function */
+ /* force a complete garbage collection in case of errors */
+ if (status != 0) lua_gc(L, LUA_GCCOLLECT, 0);
+ return status;
+}
+
+static void print_version(void)
+{
+ fprintf(stderr,
+ LUAJIT_VERSION " -- " LUAJIT_COPYRIGHT ". " LUAJIT_URL "\n");
+}
+
+static void print_jit_status(lua_State *L)
+{
+ int n;
+ const char *s;
+ lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
+ lua_getfield(L, -1, "jit"); /* Get jit.* module table. */
+ lua_remove(L, -2);
+ lua_getfield(L, -1, "status");
+ lua_remove(L, -2);
+ n = lua_gettop(L);
+ lua_call(L, 0, LUA_MULTRET);
+ fputs(lua_toboolean(L, n) ? "JIT: ON" : "JIT: OFF", stderr);
+ for (n++; (s = lua_tostring(L, n)); n++)
+ fprintf(stderr, " %s", s);
+ fputs("\n", stdout);
+}
+
+static int getargs(lua_State *L, char **argv, int n)
+{
+ int narg;
+ int i;
+ int argc = 0;
+ while (argv[argc]) argc++; /* count total number of arguments */
+ narg = argc - (n + 1); /* number of arguments to the script */
+ luaL_checkstack(L, narg + 3, "too many arguments to script");
+ for (i = n+1; i < argc; i++)
+ lua_pushstring(L, argv[i]);
+ lua_createtable(L, narg, n + 1);
+ for (i = 0; i < argc; i++) {
+ lua_pushstring(L, argv[i]);
+ lua_rawseti(L, -2, i - n);
+ }
+ return narg;
+}
+
+static int dofile(lua_State *L, const char *name)
+{
+ int status = luaL_loadfile(L, name) || docall(L, 0, 1);
+ return report(L, status);
+}
+
+static int dostring(lua_State *L, const char *s, const char *name)
+{
+ int status = luaL_loadbuffer(L, s, strlen(s), name) || docall(L, 0, 1);
+ return report(L, status);
+}
+
+static int dolibrary(lua_State *L, const char *name)
+{
+ lua_getglobal(L, "require");
+ lua_pushstring(L, name);
+ return report(L, docall(L, 1, 1));
+}
+
+static void write_prompt(lua_State *L, int firstline)
+{
+ const char *p;
+ lua_getfield(L, LUA_GLOBALSINDEX, firstline ? "_PROMPT" : "_PROMPT2");
+ p = lua_tostring(L, -1);
+ if (p == NULL) p = firstline ? LUA_PROMPT : LUA_PROMPT2;
+ fputs(p, stdout);
+ fflush(stdout);
+ lua_pop(L, 1); /* remove global */
+}
+
+static int incomplete(lua_State *L, int status)
+{
+ if (status == LUA_ERRSYNTAX) {
+ size_t lmsg;
+ const char *msg = lua_tolstring(L, -1, &lmsg);
+ const char *tp = msg + lmsg - (sizeof(LUA_QL("<eof>")) - 1);
+ if (strstr(msg, LUA_QL("<eof>")) == tp) {
+ lua_pop(L, 1);
+ return 1;
+ }
+ }
+ return 0; /* else... */
+}
+
+static int pushline(lua_State *L, int firstline)
+{
+ char buf[LUA_MAXINPUT];
+ write_prompt(L, firstline);
+ if (fgets(buf, LUA_MAXINPUT, stdin)) {
+ size_t len = strlen(buf);
+ if (len > 0 && buf[len-1] == '\n')
+ buf[len-1] = '\0';
+ if (firstline && buf[0] == '=')
+ lua_pushfstring(L, "return %s", buf+1);
+ else
+ lua_pushstring(L, buf);
+ return 1;
+ }
+ return 0;
+}
+
+static int loadline(lua_State *L)
+{
+ int status;
+ lua_settop(L, 0);
+ if (!pushline(L, 1))
+ return -1; /* no input */
+ for (;;) { /* repeat until gets a complete line */
+ status = luaL_loadbuffer(L, lua_tostring(L, 1), lua_strlen(L, 1), "=stdin");
+ if (!incomplete(L, status)) break; /* cannot try to add lines? */
+ if (!pushline(L, 0)) /* no more input? */
+ return -1;
+ lua_pushliteral(L, "\n"); /* add a new line... */
+ lua_insert(L, -2); /* ...between the two lines */
+ lua_concat(L, 3); /* join them */
+ }
+ lua_remove(L, 1); /* remove line */
+ return status;
+}
+
+static void dotty(lua_State *L)
+{
+ int status;
+ const char *oldprogname = progname;
+ progname = NULL;
+ while ((status = loadline(L)) != -1) {
+ if (status == 0) status = docall(L, 0, 0);
+ report(L, status);
+ if (status == 0 && lua_gettop(L) > 0) { /* any result to print? */
+ lua_getglobal(L, "print");
+ lua_insert(L, 1);
+ if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0)
+ l_message(progname,
+ lua_pushfstring(L, "error calling " LUA_QL("print") " (%s)",
+ lua_tostring(L, -1)));
+ }
+ }
+ lua_settop(L, 0); /* clear stack */
+ fputs("\n", stdout);
+ fflush(stdout);
+ progname = oldprogname;
+}
+
+static int handle_script(lua_State *L, char **argv, int n)
+{
+ int status;
+ const char *fname;
+ int narg = getargs(L, argv, n); /* collect arguments */
+ lua_setglobal(L, "arg");
+ fname = argv[n];
+ if (strcmp(fname, "-") == 0 && strcmp(argv[n-1], "--") != 0)
+ fname = NULL; /* stdin */
+ status = luaL_loadfile(L, fname);
+ lua_insert(L, -(narg+1));
+ if (status == 0)
+ status = docall(L, narg, 0);
+ else
+ lua_pop(L, narg);
+ return report(L, status);
+}
+
+/* Load add-on module. */
+static int loadjitmodule(lua_State *L, const char *notfound)
+{
+ lua_getglobal(L, "require");
+ lua_pushliteral(L, "jit.");
+ lua_pushvalue(L, -3);
+ lua_concat(L, 2);
+ if (lua_pcall(L, 1, 1, 0)) {
+ const char *msg = lua_tostring(L, -1);
+ if (msg && !strncmp(msg, "module ", 7)) {
+ err:
+ l_message(progname, notfound);
+ return 1;
+ } else {
+ return report(L, 1);
+ }
+ }
+ lua_getfield(L, -1, "start");
+ if (lua_isnil(L, -1)) goto err;
+ lua_remove(L, -2); /* Drop module table. */
+ return 0;
+}
+
+/* Run command with options. */
+static int runcmdopt(lua_State *L, const char *opt)
+{
+ int narg = 0;
+ if (opt && *opt) {
+ for (;;) { /* Split arguments. */
+ const char *p = strchr(opt, ',');
+ narg++;
+ if (!p) break;
+ if (p == opt)
+ lua_pushnil(L);
+ else
+ lua_pushlstring(L, opt, (size_t)(p - opt));
+ opt = p + 1;
+ }
+ if (*opt)
+ lua_pushstring(L, opt);
+ else
+ lua_pushnil(L);
+ }
+ return report(L, lua_pcall(L, narg, 0, 0));
+}
+
+/* JIT engine control command: try jit library first or load add-on module. */
+static int dojitcmd(lua_State *L, const char *cmd)
+{
+ const char *opt = strchr(cmd, '=');
+ lua_pushlstring(L, cmd, opt ? (size_t)(opt - cmd) : strlen(cmd));
+ lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
+ lua_getfield(L, -1, "jit"); /* Get jit.* module table. */
+ lua_remove(L, -2);
+ lua_pushvalue(L, -2);
+ lua_gettable(L, -2); /* Lookup library function. */
+ if (!lua_isfunction(L, -1)) {
+ lua_pop(L, 2); /* Drop non-function and jit.* table, keep module name. */
+ if (loadjitmodule(L, "unknown luaJIT command"))
+ return 1;
+ } else {
+ lua_remove(L, -2); /* Drop jit.* table. */
+ }
+ lua_remove(L, -2); /* Drop module name. */
+ return runcmdopt(L, opt ? opt+1 : opt);
+}
+
+/* Optimization flags. */
+static int dojitopt(lua_State *L, const char *opt)
+{
+ lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
+ lua_getfield(L, -1, "jit.opt"); /* Get jit.opt.* module table. */
+ lua_remove(L, -2);
+ lua_getfield(L, -1, "start");
+ lua_remove(L, -2);
+ return runcmdopt(L, opt);
+}
+
+/* check that argument has no extra characters at the end */
+#define notail(x) {if ((x)[2] != '\0') return -1;}
+
+static int collectargs(char **argv, int *pi, int *pv, int *pe)
+{
+ int i;
+ for (i = 1; argv[i] != NULL; i++) {
+ if (argv[i][0] != '-') /* not an option? */
+ return i;
+ switch (argv[i][1]) { /* option */
+ case '-':
+ notail(argv[i]);
+ return (argv[i+1] != NULL ? i+1 : 0);
+ case '\0':
+ return i;
+ case 'i':
+ notail(argv[i]);
+ *pi = 1; /* go through */
+ case 'v':
+ notail(argv[i]);
+ *pv = 1;
+ break;
+ case 'e':
+ *pe = 1; /* go through */
+ case 'j': /* LuaJIT extension */
+ case 'l':
+ if (argv[i][2] == '\0') {
+ i++;
+ if (argv[i] == NULL) return -1;
+ }
+ break;
+ case 'O': break; /* LuaJIT extension */
+ default: return -1; /* invalid option */
+ }
+ }
+ return 0;
+}
+
+static int runargs(lua_State *L, char **argv, int n)
+{
+ int i;
+ for (i = 1; i < n; i++) {
+ if (argv[i] == NULL) continue;
+ lua_assert(argv[i][0] == '-');
+ switch (argv[i][1]) { /* option */
+ case 'e': {
+ const char *chunk = argv[i] + 2;
+ if (*chunk == '\0') chunk = argv[++i];
+ lua_assert(chunk != NULL);
+ if (dostring(L, chunk, "=(command line)") != 0)
+ return 1;
+ break;
+ }
+ case 'l': {
+ const char *filename = argv[i] + 2;
+ if (*filename == '\0') filename = argv[++i];
+ lua_assert(filename != NULL);
+ if (dolibrary(L, filename))
+ return 1; /* stop if file fails */
+ break;
+ }
+ case 'j': { /* LuaJIT extension */
+ const char *cmd = argv[i] + 2;
+ if (*cmd == '\0') cmd = argv[++i];
+ lua_assert(cmd != NULL);
+ if (dojitcmd(L, cmd))
+ return 1;
+ break;
+ }
+ case 'O': /* LuaJIT extension */
+ if (dojitopt(L, argv[i] + 2))
+ return 1;
+ break;
+ default: break;
+ }
+ }
+ return 0;
+}
+
+static int handle_luainit(lua_State *L)
+{
+ const char *init = getenv(LUA_INIT);
+ if (init == NULL)
+ return 0; /* status OK */
+ else if (init[0] == '@')
+ return dofile(L, init+1);
+ else
+ return dostring(L, init, "=" LUA_INIT);
+}
+
+struct Smain {
+ int argc;
+ char **argv;
+ int status;
+};
+
+static int pmain(lua_State *L)
+{
+ struct Smain *s = (struct Smain *)lua_touserdata(L, 1);
+ char **argv = s->argv;
+ int script;
+ int has_i = 0, has_v = 0, has_e = 0;
+ globalL = L;
+ if (argv[0] && argv[0][0]) progname = argv[0];
+ LUAJIT_VERSION_SYM(); /* linker-enforced version check */
+ lua_gc(L, LUA_GCSTOP, 0); /* stop collector during initialization */
+ luaL_openlibs(L); /* open libraries */
+ lua_gc(L, LUA_GCRESTART, 0);
+ s->status = handle_luainit(L);
+ if (s->status != 0) return 0;
+ script = collectargs(argv, &has_i, &has_v, &has_e);
+ if (script < 0) { /* invalid args? */
+ print_usage();
+ s->status = 1;
+ return 0;
+ }
+ if (has_v) print_version();
+ s->status = runargs(L, argv, (script > 0) ? script : s->argc);
+ if (s->status != 0) return 0;
+ if (script)
+ s->status = handle_script(L, argv, script);
+ if (s->status != 0) return 0;
+ if (has_i) {
+ print_jit_status(L);
+ dotty(L);
+ } else if (script == 0 && !has_e && !has_v) {
+ if (lua_stdin_is_tty()) {
+ print_version();
+ print_jit_status(L);
+ dotty(L);
+ } else {
+ dofile(L, NULL); /* executes stdin as a file */
+ }
+ }
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int status;
+ struct Smain s;
+ lua_State *L = lua_open(); /* create state */
+ if (L == NULL) {
+ l_message(argv[0], "cannot create state: not enough memory");
+ return EXIT_FAILURE;
+ }
+ s.argc = argc;
+ s.argv = argv;
+ status = lua_cpcall(L, pmain, &s);
+ report(L, status);
+ lua_close(L);
+ return (status || s.status) ? EXIT_FAILURE : EXIT_SUCCESS;
+}
+
diff --git a/src/luajit.h b/src/luajit.h
new file mode 100644
index 00000000..01913755
--- /dev/null
+++ b/src/luajit.h
@@ -0,0 +1,68 @@
+/*
+** LuaJIT -- a Just-In-Time Compiler for Lua. http://luajit.org/
+**
+** Copyright (C) 2005-2009 Mike Pall. All rights reserved.
+**
+** Permission is hereby granted, free of charge, to any person obtaining
+** a copy of this software and associated documentation files (the
+** "Software"), to deal in the Software without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Software, and to
+** permit persons to whom the Software is furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be
+** included in all copies or substantial portions of the Software.
+**
+** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**
+** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
+*/
+
+#ifndef _LUAJIT_H
+#define _LUAJIT_H
+
+#include "lua.h"
+
+#define LUAJIT_VERSION "LuaJIT 2.0.0-beta1"
+#define LUAJIT_VERSION_NUM 20000 /* Version 2.0.0 = 02.00.00. */
+#define LUAJIT_VERSION_SYM luaJIT_version_2_0_0_beta1
+#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2009 Mike Pall"
+#define LUAJIT_URL "http://luajit.org/"
+
+/* Modes for luaJIT_setmode. */
+#define LUAJIT_MODE_MASK 0x00ff
+
+enum {
+ LUAJIT_MODE_ENGINE, /* Set mode for whole JIT engine. */
+ LUAJIT_MODE_DEBUG, /* Set debug mode (idx = level). */
+
+ LUAJIT_MODE_FUNC, /* Change mode for a function. */
+ LUAJIT_MODE_ALLFUNC, /* Recurse into subroutine protos. */
+ LUAJIT_MODE_ALLSUBFUNC, /* Change only the subroutines. */
+
+ LUAJIT_MODE_TRACE, /* Flush a compiled trace. */
+
+ LUAJIT_MODE_MAX
+};
+
+/* Flags or'ed in to the mode. */
+#define LUAJIT_MODE_OFF 0x0000 /* Disable JIT compilation. */
+#define LUAJIT_MODE_ON 0x0100 /* (Re-)enable JIT compilation. */
+#define LUAJIT_MODE_FLUSH 0x0200 /* Flush JIT-compiled code. */
+
+/* LuaJIT public C API. */
+
+/* Control the JIT engine. */
+LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode);
+
+/* Enforce (dynamic) linker error for version mismatches. Call from main. */
+LUA_API void LUAJIT_VERSION_SYM(void);
+
+#endif
diff --git a/src/lualib.h b/src/lualib.h
new file mode 100644
index 00000000..c1ceb613
--- /dev/null
+++ b/src/lualib.h
@@ -0,0 +1,41 @@
+/*
+** Standard library header.
+** Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LUALIB_H
+#define _LUALIB_H
+
+#include "lua.h"
+
+#define LUA_FILEHANDLE "FILE*"
+
+#define LUA_COLIBNAME "coroutine"
+#define LUA_MATHLIBNAME "math"
+#define LUA_STRLIBNAME "string"
+#define LUA_TABLIBNAME "table"
+#define LUA_IOLIBNAME "io"
+#define LUA_OSLIBNAME "os"
+#define LUA_LOADLIBNAME "package"
+#define LUA_DBLIBNAME "debug"
+#define LUA_BITLIBNAME "bit"
+#define LUA_JITLIBNAME "jit"
+
+LUALIB_API int luaopen_base(lua_State *L);
+LUALIB_API int luaopen_math(lua_State *L);
+LUALIB_API int luaopen_string(lua_State *L);
+LUALIB_API int luaopen_table(lua_State *L);
+LUALIB_API int luaopen_io(lua_State *L);
+LUALIB_API int luaopen_os(lua_State *L);
+LUALIB_API int luaopen_package(lua_State *L);
+LUALIB_API int luaopen_debug(lua_State *L);
+LUALIB_API int luaopen_bit(lua_State *L);
+LUALIB_API int luaopen_jit(lua_State *L);
+
+LUALIB_API void luaL_openlibs(lua_State *L);
+
+#ifndef lua_assert
+#define lua_assert(x) ((void)0)
+#endif
+
+#endif
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat
new file mode 100644
index 00000000..8bdc4d8a
--- /dev/null
+++ b/src/msvcbuild.bat
@@ -0,0 +1,53 @@
+@rem Script to build LuaJIT with MSVC.
+@rem Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
+@rem
+@rem Open a "Visual Studio .NET Command Prompt", cd to this directory
+@rem and run this script.
+
+@if not defined INCLUDE goto :FAIL
+
+@setlocal
+@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE
+@set LJLINK=link /nologo
+@set LJMT=mt /nologo
+@set DASMDIR=..\dynasm
+@set DASM=lua %DASMDIR%\dynasm.lua
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c
+
+if not exist buildvm_x86.h^
+ %DASM% -LN -o buildvm_x86.h buildvm_x86.dasc
+
+%LJCOMPILE% /I "." /I %DASMDIR% buildvm*.c
+%LJLINK% /out:buildvm.exe buildvm*.obj
+if exist buildvm.exe.manifest^
+ %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
+
+buildvm -m peobj -o lj_vm.obj
+buildvm -m ffdef -o lj_ffdef.h %ALL_LIB%
+buildvm -m libdef -o lj_libdef.h %ALL_LIB%
+buildvm -m recdef -o lj_recdef.h %ALL_LIB%
+buildvm -m vmdef -o ..\lib\vmdef.lua %ALL_LIB%
+buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
+
+@if "%1"=="amalg" goto :AMALGDLL
+%LJCOMPILE% /DLUA_BUILD_AS_DLL lj_*.c lib_*.c
+%LJLINK% /DLL /out:lua51.dll lj_*.obj lib_*.obj
+@goto :MTDLL
+:AMALGDLL
+%LJCOMPILE% /DLUA_BUILD_AS_DLL ljamalg.c
+%LJLINK% /DLL /out:lua51.dll ljamalg.obj lj_vm.obj
+:MTDLL
+if exist lua51.dll.manifest^
+ %LJMT% -manifest lua51.dll.manifest -outputresource:lua51.dll;2
+
+%LJCOMPILE% luajit.c
+%LJLINK% /out:luajit.exe luajit.obj lua51.lib
+if exist luajit.exe.manifest^
+ %LJMT% -manifest luajit.exe.manifest -outputresource:luajit.exe
+
+del *.obj *.manifest buildvm.exe
+
+@goto :END
+:FAIL
+@echo You must open a "Visual Studio .NET Command Prompt" to run this script
+:END