summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile25
-rw-r--r--README4
-rw-r--r--doc/contact.html8
-rw-r--r--doc/ext_buffer.html697
-rw-r--r--doc/ext_c_api.html8
-rw-r--r--doc/ext_ffi.html8
-rw-r--r--doc/ext_ffi_api.html16
-rw-r--r--doc/ext_ffi_semantics.html35
-rw-r--r--doc/ext_ffi_tutorial.html8
-rw-r--r--doc/ext_jit.html10
-rw-r--r--doc/ext_profiler.html361
-rw-r--r--doc/extensions.html112
-rw-r--r--doc/faq.html8
-rw-r--r--doc/install.html148
-rw-r--r--doc/luajit.html14
-rw-r--r--doc/running.html9
-rw-r--r--doc/status.html16
-rw-r--r--dynasm/dasm_arm.h7
-rw-r--r--dynasm/dasm_arm.lua6
-rw-r--r--dynasm/dasm_arm64.h563
-rw-r--r--dynasm/dasm_arm64.lua1219
-rw-r--r--dynasm/dasm_mips.h37
-rw-r--r--dynasm/dasm_mips.lua684
-rw-r--r--dynasm/dasm_mips64.lua12
-rw-r--r--dynasm/dasm_ppc.h20
-rw-r--r--dynasm/dasm_ppc.lua702
-rw-r--r--dynasm/dasm_proto.h4
-rw-r--r--dynasm/dasm_x86.h62
-rw-r--r--dynasm/dasm_x86.lua648
-rw-r--r--dynasm/dynasm.lua7
-rw-r--r--etc/luajit.pc6
-rw-r--r--src/.gitignore2
-rw-r--r--src/Makefile109
-rw-r--r--src/Makefile.dep255
-rw-r--r--src/host/buildvm.c36
-rw-r--r--src/host/buildvm.h1
-rw-r--r--src/host/buildvm_asm.c56
-rw-r--r--src/host/buildvm_fold.c7
-rw-r--r--src/host/buildvm_lib.c63
-rw-r--r--src/host/buildvm_libbc.h81
-rw-r--r--src/host/buildvm_peobj.c43
-rw-r--r--src/host/genlibbc.lua225
-rw-r--r--src/jit/bc.lua19
-rw-r--r--src/jit/bcsave.lua137
-rw-r--r--src/jit/dis_arm.lua18
-rw-r--r--src/jit/dis_arm64.lua1216
-rw-r--r--src/jit/dis_arm64be.lua12
-rw-r--r--src/jit/dis_mips.lua372
-rw-r--r--src/jit/dis_mips64.lua17
-rw-r--r--src/jit/dis_mips64el.lua17
-rw-r--r--src/jit/dis_mips64r6.lua17
-rw-r--r--src/jit/dis_mips64r6el.lua17
-rw-r--r--src/jit/dis_mipsel.lua15
-rw-r--r--src/jit/dis_ppc.lua18
-rw-r--r--src/jit/dis_x64.lua15
-rw-r--r--src/jit/dis_x86.lua297
-rw-r--r--src/jit/dump.lua78
-rw-r--r--src/jit/p.lua312
-rw-r--r--src/jit/v.lua17
-rw-r--r--src/jit/zone.lua45
-rw-r--r--src/lauxlib.h34
-rw-r--r--src/lib_aux.c82
-rw-r--r--src/lib_base.c156
-rw-r--r--src/lib_bit.c134
-rw-r--r--src/lib_buffer.c360
-rw-r--r--src/lib_debug.c14
-rw-r--r--src/lib_ffi.c83
-rw-r--r--src/lib_io.c57
-rw-r--r--src/lib_jit.c233
-rw-r--r--src/lib_math.c92
-rw-r--r--src/lib_os.c39
-rw-r--r--src/lib_package.c73
-rw-r--r--src/lib_string.c427
-rw-r--r--src/lib_table.c187
-rw-r--r--src/lj.supp41
-rw-r--r--src/lj_alloc.c275
-rw-r--r--src/lj_alloc.h3
-rw-r--r--src/lj_api.c393
-rw-r--r--src/lj_arch.h424
-rw-r--r--src/lj_asm.c1016
-rw-r--r--src/lj_asm_arm.h663
-rw-r--r--src/lj_asm_arm64.h2070
-rw-r--r--src/lj_asm_mips.h1798
-rw-r--r--src/lj_asm_ppc.h920
-rw-r--r--src/lj_asm_x86.h1363
-rw-r--r--src/lj_assert.c28
-rw-r--r--src/lj_bc.h4
-rw-r--r--src/lj_bcdump.h6
-rw-r--r--src/lj_bcread.c158
-rw-r--r--src/lj_bcwrite.c245
-rw-r--r--src/lj_buf.c305
-rw-r--r--src/lj_buf.h198
-rw-r--r--src/lj_carith.c83
-rw-r--r--src/lj_carith.h10
-rw-r--r--src/lj_ccall.c397
-rw-r--r--src/lj_ccall.h49
-rw-r--r--src/lj_ccallback.c280
-rw-r--r--src/lj_cconv.c66
-rw-r--r--src/lj_cconv.h5
-rw-r--r--src/lj_cdata.c67
-rw-r--r--src/lj_cdata.h14
-rw-r--r--src/lj_clib.c47
-rw-r--r--src/lj_cparse.c178
-rw-r--r--src/lj_cparse.h2
-rw-r--r--src/lj_crecord.c391
-rw-r--r--src/lj_crecord.h12
-rw-r--r--src/lj_ctype.c30
-rw-r--r--src/lj_ctype.h27
-rw-r--r--src/lj_debug.c204
-rw-r--r--src/lj_debug.h8
-rw-r--r--src/lj_def.h56
-rw-r--r--src/lj_dispatch.c113
-rw-r--r--src/lj_dispatch.h49
-rw-r--r--src/lj_emit_arm.h71
-rw-r--r--src/lj_emit_arm64.h424
-rw-r--r--src/lj_emit_mips.h161
-rw-r--r--src/lj_emit_ppc.h34
-rw-r--r--src/lj_emit_x86.h200
-rw-r--r--src/lj_err.c692
-rw-r--r--src/lj_err.h19
-rw-r--r--src/lj_errmsg.h23
-rw-r--r--src/lj_ffrecord.c1013
-rw-r--r--src/lj_frame.h160
-rw-r--r--src/lj_func.c18
-rw-r--r--src/lj_gc.c183
-rw-r--r--src/lj_gc.h16
-rw-r--r--src/lj_gdbjit.c55
-rw-r--r--src/lj_ir.c175
-rw-r--r--src/lj_ir.h119
-rw-r--r--src/lj_ircall.h238
-rw-r--r--src/lj_iropt.h17
-rw-r--r--src/lj_jit.h233
-rw-r--r--src/lj_lex.c391
-rw-r--r--src/lj_lex.h23
-rw-r--r--src/lj_lib.c131
-rw-r--r--src/lj_lib.h40
-rw-r--r--src/lj_load.c6
-rw-r--r--src/lj_mcode.c78
-rw-r--r--src/lj_meta.c134
-rw-r--r--src/lj_meta.h1
-rw-r--r--src/lj_obj.c18
-rw-r--r--src/lj_obj.h303
-rw-r--r--src/lj_opt_fold.c626
-rw-r--r--src/lj_opt_loop.c45
-rw-r--r--src/lj_opt_mem.c195
-rw-r--r--src/lj_opt_narrow.c66
-rw-r--r--src/lj_opt_sink.c14
-rw-r--r--src/lj_opt_split.c198
-rw-r--r--src/lj_parse.c301
-rw-r--r--src/lj_prng.c259
-rw-r--r--src/lj_prng.h24
-rw-r--r--src/lj_profile.c367
-rw-r--r--src/lj_profile.h21
-rw-r--r--src/lj_record.c973
-rw-r--r--src/lj_record.h3
-rw-r--r--src/lj_serialize.c539
-rw-r--r--src/lj_serialize.h28
-rw-r--r--src/lj_snap.c271
-rw-r--r--src/lj_snap.h3
-rw-r--r--src/lj_state.c112
-rw-r--r--src/lj_state.h4
-rw-r--r--src/lj_str.c519
-rw-r--r--src/lj_str.h39
-rw-r--r--src/lj_strfmt.c606
-rw-r--r--src/lj_strfmt.h131
-rw-r--r--src/lj_strfmt_num.c592
-rw-r--r--src/lj_strscan.c80
-rw-r--r--src/lj_strscan.h3
-rw-r--r--src/lj_tab.c222
-rw-r--r--src/lj_tab.h34
-rw-r--r--src/lj_target.h9
-rw-r--r--src/lj_target_arm.h5
-rw-r--r--src/lj_target_arm64.h336
-rw-r--r--src/lj_target_mips.h195
-rw-r--r--src/lj_target_ppc.h2
-rw-r--r--src/lj_target_x86.h41
-rw-r--r--src/lj_trace.c257
-rw-r--r--src/lj_trace.h5
-rw-r--r--src/lj_traceerr.h4
-rw-r--r--src/lj_udata.c28
-rw-r--r--src/lj_udata.h3
-rw-r--r--src/lj_vm.h36
-rw-r--r--src/lj_vmevent.c1
-rw-r--r--src/lj_vmmath.c97
-rw-r--r--src/ljamalg.c18
-rw-r--r--src/lua.h11
-rw-r--r--src/luaconf.h10
-rw-r--r--src/luajit.c133
-rw-r--r--src/luajit.h15
-rw-r--r--src/lualib.h1
-rw-r--r--src/msvcbuild.bat14
-rw-r--r--src/nxbuild.bat159
-rw-r--r--src/ps4build.bat34
-rw-r--r--src/ps5build.bat123
-rw-r--r--src/psvitabuild.bat2
-rw-r--r--src/vm_arm.dasc466
-rw-r--r--src/vm_arm64.dasc4158
-rw-r--r--src/vm_mips.dasc2673
-rw-r--r--src/vm_mips64.dasc5538
-rw-r--r--src/vm_ppc.dasc1680
-rw-r--r--src/vm_ppcspe.dasc3691
-rw-r--r--src/vm_x64.dasc4935
-rw-r--r--src/vm_x86.dasc1773
-rw-r--r--src/xb1build.bat101
-rw-r--r--src/xedkbuild.bat2
205 files changed, 47044 insertions, 13713 deletions
diff --git a/Makefile b/Makefile
index 35da2e73..10c83c73 100644
--- a/Makefile
+++ b/Makefile
@@ -14,9 +14,10 @@
##############################################################################
MAJVER= 2
-MINVER= 0
-RELVER= 5
-VERSION= $(MAJVER).$(MINVER).$(RELVER)
+MINVER= 1
+RELVER= 0
+PREREL= -beta3
+VERSION= $(MAJVER).$(MINVER).$(RELVER)$(PREREL)
ABIVER= 5.1
##############################################################################
@@ -84,8 +85,10 @@ FILE_SO= libluajit.so
FILE_MAN= luajit.1
FILE_PC= luajit.pc
FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h
-FILES_JITLIB= bc.lua v.lua dump.lua dis_x86.lua dis_x64.lua dis_arm.lua \
- dis_ppc.lua dis_mips.lua dis_mipsel.lua bcsave.lua vmdef.lua
+FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \
+ dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \
+ dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \
+ dis_mips64.lua dis_mips64el.lua vmdef.lua
ifeq (,$(findstring Windows,$(OS)))
HOST_SYS:= $(shell uname -s)
@@ -115,7 +118,7 @@ install: $(INSTALL_DEP)
$(MKDIR) $(INSTALL_DIRS)
cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T)
cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || :
- $(RM) $(INSTALL_TSYM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2)
+ $(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2)
cd src && test -f $(FILE_SO) && \
$(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \
( $(LDCONFIG) $(INSTALL_LIB) || : ) && \
@@ -127,12 +130,18 @@ install: $(INSTALL_DEP)
$(RM) $(FILE_PC).tmp
cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC)
cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB)
- $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)
@echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ===="
+ @echo ""
+ @echo "Note: the development releases deliberately do NOT install a symlink for luajit"
+ @echo "You can do this now by running this command (with sudo):"
+ @echo ""
+ @echo " $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)"
+ @echo ""
+
uninstall:
@echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ===="
- $(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
+ $(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC)
for file in $(FILES_JITLIB); do \
$(UNINSTALL) $(INSTALL_JITLIB)/$$file; \
done
diff --git a/README b/README
index 107ff66c..1faef255 100644
--- a/README
+++ b/README
@@ -1,5 +1,5 @@
-README for LuaJIT 2.0.5
------------------------
+README for LuaJIT 2.1.0-beta3
+-----------------------------
LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language.
diff --git a/doc/contact.html b/doc/contact.html
index 5629eb23..89c1af95 100644
--- a/doc/contact.html
+++ b/doc/contact.html
@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
<html>
<head>
<title>Contact</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2022">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -37,9 +37,13 @@
<a href="ext_ffi_semantics.html">FFI Semantics</a>
</li></ul>
</li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
<a href="ext_jit.html">jit.* Library</a>
</li><li>
<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
</li></ul>
</li><li>
<a href="status.html">Status</a>
diff --git a/doc/ext_buffer.html b/doc/ext_buffer.html
new file mode 100644
index 00000000..192110a7
--- /dev/null
+++ b/doc/ext_buffer.html
@@ -0,0 +1,697 @@
+<!DOCTYPE html>
+<html>
+<head>
+<title>String Buffer Library</title>
+<meta charset="utf-8">
+<meta name="Copyright" content="Copyright (C) 2005-2022">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+<style type="text/css">
+.lib {
+ vertical-align: middle;
+ margin-left: 5px;
+ padding: 0 5px;
+ font-size: 60%;
+ border-radius: 5px;
+ background: #c5d5ff;
+ color: #000;
+}
+</style>
+</head>
+<body>
+<div id="site">
+<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>String Buffer Library</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="https://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a class="current" href="ext_buffer.html">String Buffers</a>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+The string buffer library allows <b>high-performance manipulation of
+string-like data</b>.
+</p>
+<p>
+Unlike Lua strings, which are constants, string buffers are
+<b>mutable</b> sequences of 8-bit (binary-transparent) characters. Data
+can be stored, formatted and encoded into a string buffer and later
+converted, extracted or decoded.
+</p>
+<p>
+The convenient string buffer API simplifies common string manipulation
+tasks, that would otherwise require creating many intermediate strings.
+String buffers improve performance by eliminating redundant memory
+copies, object creation, string interning and garbage collection
+overhead. In conjunction with the FFI library, they allow zero-copy
+operations.
+</p>
+<p>
+The string buffer library also includes a high-performance
+<a href="serialize">serializer</a> for Lua objects.
+</p>
+
+<h2 id="wip" style="color:#ff0000">Work in Progress</h2>
+<p>
+<b style="color:#ff0000">This library is a work in progress. More
+functionality will be added soon.</b>
+</p>
+
+<h2 id="use">Using the String Buffer Library</h2>
+<p>
+The string buffer library is built into LuaJIT by default, but it's not
+loaded by default. Add this to the start of every Lua file that needs
+one of its functions:
+</p>
+<pre class="code">
+local buffer = require("string.buffer")
+</pre>
+<p>
+The convention for the syntax shown on this page is that <tt>buffer</tt>
+refers to the buffer library and <tt>buf</tt> refers to an individual
+buffer object.
+</p>
+<p>
+Please note the difference between a Lua function call, e.g.
+<tt>buffer.new()</tt> (with a dot) and a Lua method call, e.g.
+<tt>buf:reset()</tt> (with a colon).
+</p>
+
+<h3 id="buffer_object">Buffer Objects</h3>
+<p>
+A buffer object is a garbage-collected Lua object. After creation with
+<tt>buffer.new()</tt>, it can (and should) be reused for many operations.
+When the last reference to a buffer object is gone, it will eventually
+be freed by the garbage collector, along with the allocated buffer
+space.
+</p>
+<p>
+Buffers operate like a FIFO (first-in first-out) data structure. Data
+can be appended (written) to the end of the buffer and consumed (read)
+from the front of the buffer. These operations may be freely mixed.
+</p>
+<p>
+The buffer space that holds the characters is managed automatically
+&mdash; it grows as needed and already consumed space is recycled. Use
+<tt>buffer.new(size)</tt> and <tt>buf:free()</tt>, if you need more
+control.
+</p>
+<p>
+The maximum size of a single buffer is the same as the maximum size of a
+Lua string, which is slightly below two gigabytes. For huge data sizes,
+neither strings nor buffers are the right data structure &mdash; use the
+FFI library to directly map memory or files up to the virtual memory
+limit of your OS.
+</p>
+
+<h3 id="buffer_overview">Buffer Method Overview</h3>
+<ul>
+<li>
+The <tt>buf:put*()</tt>-like methods append (write) characters to the
+end of the buffer.
+</li>
+<li>
+The <tt>buf:get*()</tt>-like methods consume (read) characters from the
+front of the buffer.
+</li>
+<li>
+Other methods, like <tt>buf:tostring()</tt> only read the buffer
+contents, but don't change the buffer.
+</li>
+<li>
+The <tt>buf:set()</tt> method allows zero-copy consumption of a string
+or an FFI cdata object as a buffer.
+</li>
+<li>
+The FFI-specific methods allow zero-copy read/write-style operations or
+modifying the buffer contents in-place. Please check the
+<a href="#ffi_caveats">FFI caveats</a> below, too.
+</li>
+<li>
+Methods that don't need to return anything specific, return the buffer
+object itself as a convenience. This allows method chaining, e.g.:
+<tt>buf:reset():encode(obj)</tt> or <tt>buf:skip(len):get()</tt>
+</li>
+</ul>
+
+<h2 id="create">Buffer Creation and Management</h2>
+
+<h3 id="buffer_new"><tt>local buf = buffer.new([size [,options]])<br>
+local buf = buffer.new([options])</tt></h3>
+<p>
+Creates a new buffer object.
+</p>
+<p>
+The optional <tt>size</tt> argument ensures a minimum initial buffer
+size. This is strictly an optimization when the required buffer size is
+known beforehand. The buffer space will grow as needed, in any case.
+</p>
+<p>
+The optional table <tt>options</tt> sets various
+<a href="#serialize_options">serialization options</a>.
+</p>
+
+<h3 id="buffer_reset"><tt>buf = buf:reset()</tt></h3>
+<p>
+Reset (empty) the buffer. The allocated buffer space is not freed and
+may be reused.
+</p>
+
+<h3 id="buffer_free"><tt>buf = buf:free()</tt></h3>
+<p>
+The buffer space of the buffer object is freed. The object itself
+remains intact, empty and may be reused.
+</p>
+<p>
+Note: you normally don't need to use this method. The garbage collector
+automatically frees the buffer space, when the buffer object is
+collected. Use this method, if you need to free the associated memory
+immediately.
+</p>
+
+<h2 id="write">Buffer Writers</h2>
+
+<h3 id="buffer_put"><tt>buf = buf:put([str|num|obj] [,…])</tt></h3>
+<p>
+Appends a string <tt>str</tt>, a number <tt>num</tt> or any object
+<tt>obj</tt> with a <tt>__tostring</tt> metamethod to the buffer.
+Multiple arguments are appended in the given order.
+</p>
+<p>
+Appending a buffer to a buffer is possible and short-circuited
+internally. But it still involves a copy. Better combine the buffer
+writes to use a single buffer.
+</p>
+
+<h3 id="buffer_putf"><tt>buf = buf:putf(format, …)</tt></h3>
+<p>
+Appends the formatted arguments to the buffer. The <tt>format</tt>
+string supports the same options as <tt>string.format()</tt>.
+</p>
+
+<h3 id="buffer_putcdata"><tt>buf = buf:putcdata(cdata, len)</tt><span class="lib">FFI</span></h3>
+<p>
+Appends the given <tt>len</tt> number of bytes from the memory pointed
+to by the FFI <tt>cdata</tt> object to the buffer. The object needs to
+be convertible to a (constant) pointer.
+</p>
+
+<h3 id="buffer_set"><tt>buf = buf:set(str)<br>
+buf = buf:set(cdata, len)</tt><span class="lib">FFI</span></h3>
+<p>
+This method allows zero-copy consumption of a string or an FFI cdata
+object as a buffer. It stores a reference to the passed string
+<tt>str</tt> or the FFI <tt>cdata</tt> object in the buffer. Any buffer
+space originally allocated is freed. This is <i>not</i> an append
+operation, unlike the <tt>buf:put*()</tt> methods.
+</p>
+<p>
+After calling this method, the buffer behaves as if
+<tt>buf:free():put(str)</tt> or <tt>buf:free():put(cdata,&nbsp;len)</tt>
+had been called. However, the data is only referenced and not copied, as
+long as the buffer is only consumed.
+</p>
+<p>
+In case the buffer is written to later on, the referenced data is copied
+and the object reference is removed (copy-on-write semantics).
+</p>
+<p>
+The stored reference is an anchor for the garbage collector and keeps the
+originally passed string or FFI cdata object alive.
+</p>
+
+<h3 id="buffer_reserve"><tt>ptr, len = buf:reserve(size)</tt><span class="lib">FFI</span><br>
+<tt>buf = buf:commit(used)</tt><span class="lib">FFI</span></h3>
+<p>
+The <tt>reserve</tt> method reserves at least <tt>size</tt> bytes of
+write space in the buffer. It returns an <tt>uint8_t&nbsp;*</tt> FFI
+cdata pointer <tt>ptr</tt> that points to this space.
+</p>
+<p>
+The available length in bytes is returned in <tt>len</tt>. This is at
+least <tt>size</tt> bytes, but may be more to facilitate efficient
+buffer growth. You can either make use of the additional space or ignore
+<tt>len</tt> and only use <tt>size</tt> bytes.
+</p>
+<p>
+The <tt>commit</tt> method appends the <tt>used</tt> bytes of the
+previously returned write space to the buffer data.
+</p>
+<p>
+This pair of methods allows zero-copy use of C read-style APIs:
+</p>
+<pre class="code">
+local MIN_SIZE = 65536
+repeat
+ local ptr, len = buf:reserve(MIN_SIZE)
+ local n = C.read(fd, ptr, len)
+ if n == 0 then break end -- EOF.
+ if n &lt; 0 then error("read error") end
+ buf:commit(n)
+until false
+</pre>
+<p>
+The reserved write space is <i>not</i> initialized. At least the
+<tt>used</tt> bytes <b>must</b> be written to before calling the
+<tt>commit</tt> method. There's no need to call the <tt>commit</tt>
+method, if nothing is added to the buffer (e.g. on error).
+</p>
+
+<h2 id="read">Buffer Readers</h2>
+
+<h3 id="buffer_length"><tt>len = #buf</tt></h3>
+<p>
+Returns the current length of the buffer data in bytes.
+</p>
+
+<h3 id="buffer_concat"><tt>res = str|num|buf .. str|num|buf […]</tt></h3>
+<p>
+The Lua concatenation operator <tt>..</tt> also accepts buffers, just
+like strings or numbers. It always returns a string and not a buffer.
+</p>
+<p>
+Note that although this is supported for convenience, this thwarts one
+of the main reasons to use buffers, which is to avoid string
+allocations. Rewrite it with <tt>buf:put()</tt> and <tt>buf:get()</tt>.
+</p>
+<p>
+Mixing this with unrelated objects that have a <tt>__concat</tt>
+metamethod may not work, since these probably only expect strings.
+</p>
+
+<h3 id="buffer_skip"><tt>buf = buf:skip(len)</tt></h3>
+<p>
+Skips (consumes) <tt>len</tt> bytes from the buffer up to the current
+length of the buffer data.
+</p>
+
+<h3 id="buffer_get"><tt>str, … = buf:get([len|nil] [,…])</tt></h3>
+<p>
+Consumes the buffer data and returns one or more strings. If called
+without arguments, the whole buffer data is consumed. If called with a
+number, up to <tt>len</tt> bytes are consumed. A <tt>nil</tt> argument
+consumes the remaining buffer space (this only makes sense as the last
+argument). Multiple arguments consume the buffer data in the given
+order.
+</p>
+<p>
+Note: a zero length or no remaining buffer data returns an empty string
+and not <tt>nil</tt>.
+</p>
+
+<h3 id="buffer_tostring"><tt>str = buf:tostring()<br>
+str = tostring(buf)</tt></h3>
+<p>
+Creates a string from the buffer data, but doesn't consume it. The
+buffer remains unchanged.
+</p>
+<p>
+Buffer objects also define a <tt>__tostring</tt> metamethod. This means
+buffers can be passed to the global <tt>tostring()</tt> function and
+many other functions that accept this in place of strings. The important
+internal uses in functions like <tt>io.write()</tt> are short-circuited
+to avoid the creation of an intermediate string object.
+</p>
+
+<h3 id="buffer_ref"><tt>ptr, len = buf:ref()</tt><span class="lib">FFI</span></h3>
+<p>
+Returns an <tt>uint8_t&nbsp;*</tt> FFI cdata pointer <tt>ptr</tt> that
+points to the buffer data. The length of the buffer data in bytes is
+returned in <tt>len</tt>.
+</p>
+<p>
+The returned pointer can be directly passed to C functions that expect a
+buffer and a length. You can also do bytewise reads
+(<tt>local&nbsp;x&nbsp;=&nbsp;ptr[i]</tt>) or writes
+(<tt>ptr[i]&nbsp;=&nbsp;0x40</tt>) of the buffer data.
+</p>
+<p>
+In conjunction with the <tt>skip</tt> method, this allows zero-copy use
+of C write-style APIs:
+</p>
+<pre class="code">
+repeat
+ local ptr, len = buf:ref()
+ if len == 0 then break end
+ local n = C.write(fd, ptr, len)
+ if n &lt; 0 then error("write error") end
+ buf:skip(n)
+until n >= len
+</pre>
+<p>
+Unlike Lua strings, buffer data is <i>not</i> implicitly
+zero-terminated. It's not safe to pass <tt>ptr</tt> to C functions that
+expect zero-terminated strings. If you're not using <tt>len</tt>, then
+you're doing something wrong.
+</p>
+
+<h2 id="serialize">Serialization of Lua Objects</h2>
+<p>
+The following functions and methods allow <b>high-speed serialization</b>
+(encoding) of a Lua object into a string and decoding it back to a Lua
+object. This allows convenient storage and transport of <b>structured
+data</b>.
+</p>
+<p>
+The encoded data is in an <a href="#serialize_format">internal binary
+format</a>. The data can be stored in files, binary-transparent
+databases or transmitted to other LuaJIT instances across threads,
+processes or networks.
+</p>
+<p>
+Encoding speed can reach up to 1 Gigabyte/second on a modern desktop- or
+server-class system, even when serializing many small objects. Decoding
+speed is mostly constrained by object creation cost.
+</p>
+<p>
+The serializer handles most Lua types, common FFI number types and
+nested structures. Functions, thread objects, other FFI cdata and full
+userdata cannot be serialized (yet).
+</p>
+<p>
+The encoder serializes nested structures as trees. Multiple references
+to a single object will be stored separately and create distinct objects
+after decoding. Circular references cause an error.
+</p>
+
+<h3 id="serialize_methods">Serialization Functions and Methods</h3>
+
+<h3 id="buffer_encode"><tt>str = buffer.encode(obj)<br>
+buf = buf:encode(obj)</tt></h3>
+<p>
+Serializes (encodes) the Lua object <tt>obj</tt>. The stand-alone
+function returns a string <tt>str</tt>. The buffer method appends the
+encoding to the buffer.
+</p>
+<p>
+<tt>obj</tt> can be any of the supported Lua types &mdash; it doesn't
+need to be a Lua table.
+</p>
+<p>
+This function may throw an error when attempting to serialize
+unsupported object types, circular references or deeply nested tables.
+</p>
+
+<h3 id="buffer_decode"><tt>obj = buffer.decode(str)<br>
+obj = buf:decode()</tt></h3>
+<p>
+The stand-alone function deserializes (decodes) the string
+<tt>str</tt>, the buffer method deserializes one object from the
+buffer. Both return a Lua object <tt>obj</tt>.
+</p>
+<p>
+The returned object may be any of the supported Lua types &mdash;
+even <tt>nil</tt>.
+</p>
+<p>
+This function may throw an error when fed with malformed or incomplete
+encoded data. The stand-alone function throws when there's left-over
+data after decoding a single top-level object. The buffer method leaves
+any left-over data in the buffer.
+</p>
+<p>
+Attempting to deserialize an FFI type will throw an error, if the FFI
+library is not built-in or has not been loaded, yet.
+</p>
+
+<h3 id="serialize_options">Serialization Options</h3>
+<p>
+The <tt>options</tt> table passed to <tt>buffer.new()</tt> may contain
+the following members (all optional):
+</p>
+<ul>
+<li>
+<tt>dict</tt> is a Lua table holding a <b>dictionary of strings</b> that
+commonly occur as table keys of objects you are serializing. These keys
+are compactly encoded as indexes during serialization. A well-chosen
+dictionary saves space and improves serialization performance.
+</li>
+<li>
+<tt>metatable</tt> is a Lua table holding a <b>dictionary of metatables</b>
+for the table objects you are serializing.
+</li>
+</ul>
+<p>
+<tt>dict</tt> needs to be an array of strings and <tt>metatable</tt> needs
+to be an array of tables. Both starting at index 1 and without holes (no
+<tt>nil</tt> in between). The tables are anchored in the buffer object and
+internally modified into a two-way index (don't do this yourself, just pass
+a plain array). The tables must not be modified after they have been passed
+to <tt>buffer.new()</tt>.
+</p>
+<p>
+The <tt>dict</tt> and <tt>metatable</tt> tables used by the encoder and
+decoder must be the same. Put the most common entries at the front. Extend
+at the end to ensure backwards-compatibility &mdash; older encodings can
+then still be read. You may also set some indexes to <tt>false</tt> to
+explicitly drop backwards-compatibility. Old encodings that use these
+indexes will throw an error when decoded.
+</p>
+<p>
+Metatables that are not found in the <tt>metatable</tt> dictionary are
+ignored when encoding. Decoding returns a table with a <tt>nil</tt>
+metatable.
+</p>
+<p>
+Note: parsing and preparation of the options table is somewhat
+expensive. Create a buffer object only once and recycle it for multiple
+uses. Avoid mixing encoder and decoder buffers, since the
+<tt>buf:set()</tt> method frees the already allocated buffer space:
+</p>
+<pre class="code">
+local options = {
+ dict = { "commonly", "used", "string", "keys" },
+}
+local buf_enc = buffer.new(options)
+local buf_dec = buffer.new(options)
+
+local function encode(obj)
+ return buf_enc:reset():encode(obj):get()
+end
+
+local function decode(str)
+ return buf_dec:set(str):decode()
+end
+</pre>
+
+<h3 id="serialize_stream">Streaming Serialization</h3>
+<p>
+In some contexts, it's desirable to do piecewise serialization of large
+datasets, also known as <i>streaming</i>.
+</p>
+<p>
+This serialization format can be safely concatenated and supports streaming.
+Multiple encodings can simply be appended to a buffer and later decoded
+individually:
+</p>
+<pre class="code">
+local buf = buffer.new()
+buf:encode(obj1)
+buf:encode(obj2)
+local copy1 = buf:decode()
+local copy2 = buf:decode()
+</pre>
+<p>
+Here's how to iterate over a stream:
+</p>
+<pre class="code">
+while #buf ~= 0 do
+ local obj = buf:decode()
+ -- Do something with obj.
+end
+</pre>
+<p>
+Since the serialization format doesn't prepend a length to its encoding,
+network applications may need to transmit the length, too.
+</p>
+
+<h3 id="serialize_format">Serialization Format Specification</h3>
+<p>
+This serialization format is designed for <b>internal use</b> by LuaJIT
+applications. Serialized data is upwards-compatible and portable across
+all supported LuaJIT platforms.
+</p>
+<p>
+It's an <b>8-bit binary format</b> and not human-readable. It uses e.g.
+embedded zeroes and stores embedded Lua string objects unmodified, which
+are 8-bit-clean, too. Encoded data can be safely concatenated for
+streaming and later decoded one top-level object at a time.
+</p>
+<p>
+The encoding is reasonably compact, but tuned for maximum performance,
+not for minimum space usage. It compresses well with any of the common
+byte-oriented data compression algorithms.
+</p>
+<p>
+Although documented here for reference, this format is explicitly
+<b>not</b> intended to be a 'public standard' for structured data
+interchange across computer languages (like JSON or MessagePack). Please
+do not use it as such.
+</p>
+<p>
+The specification is given below as a context-free grammar with a
+top-level <tt>object</tt> as the starting point. Alternatives are
+separated by the <tt>|</tt> symbol and <tt>*</tt> indicates repeats.
+Grouping is implicit or indicated by <tt>{…}</tt>. Terminals are
+either plain hex numbers, encoded as bytes, or have a <tt>.format</tt>
+suffix.
+</p>
+<pre>
+object → nil | false | true
+ | null | lightud32 | lightud64
+ | int | num | tab | tab_mt
+ | int64 | uint64 | complex
+ | string
+
+nil → 0x00
+false → 0x01
+true → 0x02
+
+null → 0x03 // NULL lightuserdata
+lightud32 → 0x04 data.I // 32 bit lightuserdata
+lightud64 → 0x05 data.L // 64 bit lightuserdata
+
+int → 0x06 int.I // int32_t
+num → 0x07 double.L
+
+tab → 0x08 // Empty table
+ | 0x09 h.U h*{object object} // Key/value hash
+ | 0x0a a.U a*object // 0-based array
+ | 0x0b a.U a*object h.U h*{object object} // Mixed
+ | 0x0c a.U (a-1)*object // 1-based array
+ | 0x0d a.U (a-1)*object h.U h*{object object} // Mixed
+tab_mt → 0x0e (index-1).U tab // Metatable dict entry
+
+int64 → 0x10 int.L // FFI int64_t
+uint64 → 0x11 uint.L // FFI uint64_t
+complex → 0x12 re.L im.L // FFI complex
+
+string → (0x20+len).U len*char.B
+ | 0x0f (index-1).U // String dict entry
+
+.B = 8 bit
+.I = 32 bit little-endian
+.L = 64 bit little-endian
+.U = prefix-encoded 32 bit unsigned number n:
+ 0x00..0xdf → n.B
+ 0xe0..0x1fdf → (0xe0|(((n-0xe0)>>8)&0x1f)).B ((n-0xe0)&0xff).B
+ 0x1fe0.. → 0xff n.I
+</pre>
+
+<h2 id="error">Error handling</h2>
+<p>
+Many of the buffer methods can throw an error. Out-of-memory or usage
+errors are best caught with an outer wrapper for larger parts of code.
+There's not much one can do after that, anyway.
+</p>
+<p>
+OTOH, you may want to catch some errors individually. Buffer methods need
+to receive the buffer object as the first argument. The Lua colon-syntax
+<tt>obj:method()</tt> does that implicitly. But to wrap a method with
+<tt>pcall()</tt>, the arguments need to be passed like this:
+</p>
+<pre class="code">
+local ok, err = pcall(buf.encode, buf, obj)
+if not ok then
+ -- Handle error in err.
+end
+</pre>
+
+<h2 id="ffi_caveats">FFI caveats</h2>
+<p>
+The string buffer library has been designed to work well together with
+the FFI library. But due to the low-level nature of the FFI library,
+some care needs to be taken:
+</p>
+<p>
+First, please remember that FFI pointers are zero-indexed. The space
+returned by <tt>buf:reserve()</tt> and <tt>buf:ref()</tt> starts at the
+returned pointer and ends before <tt>len</tt> bytes after that.
+</p>
+<p>
+I.e. the first valid index is <tt>ptr[0]</tt> and the last valid index
+is <tt>ptr[len-1]</tt>. If the returned length is zero, there's no valid
+index at all. The returned pointer may even be <tt>NULL</tt>.
+</p>
+<p>
+The space pointed to by the returned pointer is only valid as long as
+the buffer is not modified in any way (neither append, nor consume, nor
+reset, etc.). The pointer is also not a GC anchor for the buffer object
+itself.
+</p>
+<p>
+Buffer data is only guaranteed to be byte-aligned. Casting the returned
+pointer to a data type with higher alignment may cause unaligned
+accesses. It depends on the CPU architecture whether this is allowed or
+not (it's always OK on x86/x64 and mostly OK on other modern
+architectures).
+</p>
+<p>
+FFI pointers or references do not count as GC anchors for an underlying
+object. E.g. an <tt>array</tt> allocated with <tt>ffi.new()</tt> is
+anchored by <tt>buf:set(array,&nbsp;len)</tt>, but not by
+<tt>buf:set(array+offset,&nbsp;len)</tt>. The addition of the offset
+creates a new pointer, even when the offset is zero. In this case, you
+need to make sure there's still a reference to the original array as
+long as its contents are in use by the buffer.
+</p>
+<p>
+Even though each LuaJIT VM instance is single-threaded (but you can
+create multiple VMs), FFI data structures can be accessed concurrently.
+Be careful when reading/writing FFI cdata from/to buffers to avoid
+concurrent accesses or modifications. In particular, the memory
+referenced by <tt>buf:set(cdata,&nbsp;len)</tt> must not be modified
+while buffer readers are working on it. Shared, but read-only memory
+mappings of files are OK, but only if the file does not change.
+</p>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2022
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>
diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html
index b328047a..21dc1310 100644
--- a/doc/ext_c_api.html
+++ b/doc/ext_c_api.html
@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
<html>
<head>
<title>Lua/C API Extensions</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2022">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -37,9 +37,13 @@
<a href="ext_ffi_semantics.html">FFI Semantics</a>
</li></ul>
</li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
<a href="ext_jit.html">jit.* Library</a>
</li><li>
<a class="current" href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
</li></ul>
</li><li>
<a href="status.html">Status</a>
diff --git a/doc/ext_ffi.html b/doc/ext_ffi.html
index 04b78d98..b39ae314 100644
--- a/doc/ext_ffi.html
+++ b/doc/ext_ffi.html
@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
<html>
<head>
<title>FFI Library</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2022">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -37,9 +37,13 @@
<a href="ext_ffi_semantics.html">FFI Semantics</a>
</li></ul>
</li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
<a href="ext_jit.html">jit.* Library</a>
</li><li>
<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
</li></ul>
</li><li>
<a href="status.html">Status</a>
diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html
index 962db6dc..b8da1c95 100644
--- a/doc/ext_ffi_api.html
+++ b/doc/ext_ffi_api.html
@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
<html>
<head>
<title>ffi.* API Functions</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2022">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -42,9 +42,13 @@ td.abiparam { font-weight: bold; width: 6em; }
<a href="ext_ffi_semantics.html">FFI Semantics</a>
</li></ul>
</li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
<a href="ext_jit.html">jit.* Library</a>
</li><li>
<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
</li></ul>
</li><li>
<a href="status.html">Status</a>
@@ -460,6 +464,10 @@ otherwise. The following parameters are currently defined:
<td class="abiparam">eabi</td><td class="abidesc">EABI variant of the standard ABI</td></tr>
<tr class="odd">
<td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr>
+<tr class="even">
+<td class="abiparam">uwp</td><td class="abidesc">Universal Windows Platform</td></tr>
+<tr class="odd">
+<td class="abiparam">gc64</td><td class="abidesc">64 bit GC references</td></tr>
</table>
<h3 id="ffi_os"><tt>ffi.os</tt></h3>
@@ -536,8 +544,8 @@ corresponding ctype.
The parser for Lua source code treats numeric literals with the
suffixes <tt>LL</tt> or <tt>ULL</tt> as signed or unsigned 64&nbsp;bit
integers. Case doesn't matter, but uppercase is recommended for
-readability. It handles both decimal (<tt>42LL</tt>) and hexadecimal
-(<tt>0x2aLL</tt>) literals.
+readability. It handles decimal (<tt>42LL</tt>), hexadecimal
+(<tt>0x2aLL</tt>) and binary (<tt>0b101010LL</tt>) literals.
</p>
<p>
The imaginary part of complex numbers can be specified by suffixing
diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html
index 6c6f8ad7..22f7f17c 100644
--- a/doc/ext_ffi_semantics.html
+++ b/doc/ext_ffi_semantics.html
@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
<html>
<head>
<title>FFI Semantics</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2022">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -42,9 +42,13 @@ td.convop { font-style: italic; width: 40%; }
<a class="current" href="ext_ffi_semantics.html">FFI Semantics</a>
</li></ul>
</li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
<a href="ext_jit.html">jit.* Library</a>
</li><li>
<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
</li></ul>
</li><li>
<a href="status.html">Status</a>
@@ -177,6 +181,8 @@ a <tt>typedef</tt>, except re-declarations will be ignored):
<tt>uint16_t</tt>, <tt>uint32_t</tt>, <tt>uint64_t</tt>,
<tt>intptr_t</tt>, <tt>uintptr_t</tt>.</li>
+<li>From <tt>&lt;unistd.h&gt;</tt> (POSIX): <tt>ssize_t</tt>.</li>
+
</ul>
<p>
You're encouraged to use these types in preference to
@@ -724,6 +730,22 @@ You'll have to explicitly convert a 64&nbsp;bit integer to a Lua
number (e.g. for regular floating-point calculations) with
<tt>tonumber()</tt>. But note this may incur a precision loss.</li>
+<li><b>64&nbsp;bit bitwise operations</b>: the rules for 64&nbsp;bit
+arithmetic operators apply analogously.<br>
+
+Unlike the other <tt>bit.*</tt> operations, <tt>bit.tobit()</tt>
+converts a cdata number via <tt>int64_t</tt> to <tt>int32_t</tt> and
+returns a Lua number.<br>
+
+For <tt>bit.band()</tt>, <tt>bit.bor()</tt> and <tt>bit.bxor()</tt>, the
+conversion to <tt>int64_t</tt> or <tt>uint64_t</tt> applies to
+<em>all</em> arguments, if <em>any</em> argument is a cdata number.<br>
+
+For all other operations, only the first argument is used to determine
+the output type. This implies that a cdata number as a shift count for
+shifts and rotates is accepted, but that alone does <em>not</em> cause
+a cdata number output.
+
</ul>
<h3 id="cdata_comp">Comparisons of cdata objects</h3>
@@ -1195,14 +1217,12 @@ The following operations are currently not compiled and may exhibit
suboptimal performance, especially when used in inner loops:
</p>
<ul>
-<li>Bitfield accesses and initializations.</li>
<li>Vector operations.</li>
<li>Table initializers.</li>
<li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li>
-<li>Allocations of variable-length arrays or structs.</li>
-<li>Allocations of C&nbsp;types with a size &gt; 128&nbsp;bytes or an
-alignment &gt; 8&nbsp;bytes.</li>
-<li>Conversions from lightuserdata to <tt>void&nbsp;*</tt>.</li>
+<li>Non-default initialization of VLA/VLS or large C&nbsp;types
+(&gt; 128&nbsp;bytes or &gt; 16 array elements).</li>
+<li>Bitfield initializations.</li>
<li>Pointer differences for element sizes that are not a power of
two.</li>
<li>Calls to C&nbsp;functions with aggregates passed or returned by
@@ -1218,7 +1238,6 @@ value.</li>
Other missing features:
</p>
<ul>
-<li>Bit operations for 64&nbsp;bit types.</li>
<li>Arithmetic for <tt>complex</tt> numbers.</li>
<li>Passing structs by value to vararg C&nbsp;functions.</li>
<li><a href="extensions.html#exceptions">C++ exception interoperability</a>
diff --git a/doc/ext_ffi_tutorial.html b/doc/ext_ffi_tutorial.html
index de6b6f5e..e0bf9040 100644
--- a/doc/ext_ffi_tutorial.html
+++ b/doc/ext_ffi_tutorial.html
@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
<html>
<head>
<title>FFI Tutorial</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2022">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -44,9 +44,13 @@ td.idiomlua b { font-weight: normal; color: #2142bf; }
<a href="ext_ffi_semantics.html">FFI Semantics</a>
</li></ul>
</li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
<a href="ext_jit.html">jit.* Library</a>
</li><li>
<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
</li></ul>
</li><li>
<a href="status.html">Status</a>
diff --git a/doc/ext_jit.html b/doc/ext_jit.html
index 8f58a0c7..b8a25967 100644
--- a/doc/ext_jit.html
+++ b/doc/ext_jit.html
@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
<html>
<head>
<title>jit.* Library</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2022">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -37,9 +37,13 @@
<a href="ext_ffi_semantics.html">FFI Semantics</a>
</li></ul>
</li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
<a class="current" href="ext_jit.html">jit.* Library</a>
</li><li>
<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
</li></ul>
</li><li>
<a href="status.html">Status</a>
@@ -145,7 +149,7 @@ Contains the target OS name:
<h3 id="jit_arch"><tt>jit.arch</tt></h3>
<p>
Contains the target architecture name:
-"x86", "x64", "arm", "ppc", "ppcspe", or "mips".
+"x86", "x64", "arm", "arm64", "arm64be", "ppc", "mips", "mipsel", "mips64", "mips64el", "mips64r6", "mips64r6el".
</p>
<h2 id="jit_opt"><tt>jit.opt.*</tt> &mdash; JIT compiler optimization control</h2>
diff --git a/doc/ext_profiler.html b/doc/ext_profiler.html
new file mode 100644
index 00000000..b645a204
--- /dev/null
+++ b/doc/ext_profiler.html
@@ -0,0 +1,361 @@
+<!DOCTYPE html>
+<html>
+<head>
+<title>Profiler</title>
+<meta charset="utf-8">
+<meta name="Copyright" content="Copyright (C) 2005-2022">
+<meta name="Language" content="en">
+<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
+<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print">
+</head>
+<body>
+<div id="site">
+<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a>
+</div>
+<div id="head">
+<h1>Profiler</h1>
+</div>
+<div id="nav">
+<ul><li>
+<a href="luajit.html">LuaJIT</a>
+<ul><li>
+<a href="https://luajit.org/download.html">Download <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="install.html">Installation</a>
+</li><li>
+<a href="running.html">Running</a>
+</li></ul>
+</li><li>
+<a href="extensions.html">Extensions</a>
+<ul><li>
+<a href="ext_ffi.html">FFI Library</a>
+<ul><li>
+<a href="ext_ffi_tutorial.html">FFI Tutorial</a>
+</li><li>
+<a href="ext_ffi_api.html">ffi.* API</a>
+</li><li>
+<a href="ext_ffi_semantics.html">FFI Semantics</a>
+</li></ul>
+</li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
+<a href="ext_jit.html">jit.* Library</a>
+</li><li>
+<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a class="current" href="ext_profiler.html">Profiler</a>
+</li></ul>
+</li><li>
+<a href="status.html">Status</a>
+</li><li>
+<a href="faq.html">FAQ</a>
+</li><li>
+<a href="http://wiki.luajit.org/">Wiki <span class="ext">&raquo;</span></a>
+</li><li>
+<a href="https://luajit.org/list.html">Mailing List <span class="ext">&raquo;</span></a>
+</li></ul>
+</div>
+<div id="main">
+<p>
+LuaJIT has an integrated statistical profiler with very low overhead. It
+allows sampling the currently executing stack and other parameters in
+regular intervals.
+</p>
+<p>
+The integrated profiler can be accessed from three levels:
+</p>
+<ul>
+<li>The <a href="#hl_profiler">bundled high-level profiler</a>, invoked by the
+<a href="#j_p"><tt>-jp</tt></a> command line option.</li>
+<li>A <a href="#ll_lua_api">low-level Lua API</a> to control the profiler.</li>
+<li>A <a href="#ll_c_api">low-level C API</a> to control the profiler.</li>
+</ul>
+
+<h2 id="hl_profiler">High-Level Profiler</h2>
+<p>
+The bundled high-level profiler offers basic profiling functionality. It
+generates simple textual summaries or source code annotations. It can be
+accessed with the <a href="#j_p"><tt>-jp</tt></a> command line option
+or from Lua code by loading the underlying <tt>jit.p</tt> module.
+</p>
+<p>
+To cut to the chase &mdash; run this to get a CPU usage profile by
+function name:
+</p>
+<pre class="code">
+luajit -jp myapp.lua
+</pre>
+<p>
+It's <em>not</em> a stated goal of the bundled profiler to add every
+possible option or to cater for special profiling needs. The low-level
+profiler APIs are documented below. They may be used by third-party
+authors to implement advanced functionality, e.g. IDE integration or
+graphical profilers.
+</p>
+<p>
+Note: Sampling works for both interpreted and JIT-compiled code. The
+results for JIT-compiled code may sometimes be surprising. LuaJIT
+heavily optimizes and inlines Lua code &mdash; there's no simple
+one-to-one correspondence between source code lines and the sampled
+machine code.
+</p>
+
+<h3 id="j_p"><tt>-jp=[options[,output]]</tt></h3>
+<p>
+The <tt>-jp</tt> command line option starts the high-level profiler.
+When the application run by the command line terminates, the profiler
+stops and writes the results to <tt>stdout</tt> or to the specified
+<tt>output</tt> file.
+</p>
+<p>
+The <tt>options</tt> argument specifies how the profiling is to be
+performed:
+</p>
+<ul>
+<li><tt>f</tt> &mdash; Stack dump: function name, otherwise module:line.
+This is the default mode.</li>
+<li><tt>F</tt> &mdash; Stack dump: ditto, but dump module:name.</li>
+<li><tt>l</tt> &mdash; Stack dump: module:line.</li>
+<li><tt>&lt;number&gt;</tt> &mdash; stack dump depth (callee &larr;
+caller). Default: 1.</li>
+<li><tt>-&lt;number&gt;</tt> &mdash; Inverse stack dump depth (caller
+&rarr; callee).</li>
+<li><tt>s</tt> &mdash; Split stack dump after first stack level. Implies
+depth&nbsp;&ge;&nbsp;2 or depth&nbsp;&le;&nbsp;-2.</li>
+<li><tt>p</tt> &mdash; Show full path for module names.</li>
+<li><tt>v</tt> &mdash; Show VM states.</li>
+<li><tt>z</tt> &mdash; Show <a href="#jit_zone">zones</a>.</li>
+<li><tt>r</tt> &mdash; Show raw sample counts. Default: show percentages.</li>
+<li><tt>a</tt> &mdash; Annotate excerpts from source code files.</li>
+<li><tt>A</tt> &mdash; Annotate complete source code files.</li>
+<li><tt>G</tt> &mdash; Produce raw output suitable for graphical tools.</li>
+<li><tt>m&lt;number&gt;</tt> &mdash; Minimum sample percentage to be shown.
+Default: 3%.</li>
+<li><tt>i&lt;number&gt;</tt> &mdash; Sampling interval in milliseconds.
+Default: 10ms.<br>
+Note: The actual sampling precision is OS-dependent.</li>
+</ul>
+<p>
+The default output for <tt>-jp</tt> is a list of the most CPU consuming
+spots in the application. Increasing the stack dump depth with (say)
+<tt>-jp=2</tt> may help to point out the main callers or callees of
+hotspots. But sample aggregation is still flat per unique stack dump.
+</p>
+<p>
+To get a two-level view (split view) of callers/callees, use
+<tt>-jp=s</tt> or <tt>-jp=-s</tt>. The percentages shown for the second
+level are relative to the first level.
+</p>
+<p>
+To see how much time is spent in each line relative to a function, use
+<tt>-jp=fl</tt>.
+</p>
+<p>
+To see how much time is spent in different VM states or
+<a href="#jit_zone">zones</a>, use <tt>-jp=v</tt> or <tt>-jp=z</tt>.
+</p>
+<p>
+Combinations of <tt>v/z</tt> with <tt>f/F/l</tt> produce two-level
+views, e.g. <tt>-jp=vf</tt> or <tt>-jp=fv</tt>. This shows the time
+spent in a VM state or zone vs. hotspots. This can be used to answer
+questions like "Which time-consuming functions are only interpreted?" or
+"What's the garbage collector overhead for a specific function?".
+</p>
+<p>
+Multiple options can be combined &mdash; but not all combinations make
+sense, see above. E.g. <tt>-jp=3si4m1</tt> samples three stack levels
+deep in 4ms intervals and shows a split view of the CPU consuming
+functions and their callers with a 1% threshold.
+</p>
+<p>
+Source code annotations produced by <tt>-jp=a</tt> or <tt>-jp=A</tt> are
+always flat and at the line level. Obviously, the source code files need
+to be readable by the profiler script.
+</p>
+<p>
+The high-level profiler can also be started and stopped from Lua code with:
+</p>
+<pre class="code">
+require("jit.p").start(options, output)
+...
+require("jit.p").stop()
+</pre>
+
+<h3 id="jit_zone"><tt>jit.zone</tt> &mdash; Zones</h3>
+<p>
+Zones can be used to provide information about different parts of an
+application to the high-level profiler. E.g. a game could make use of an
+<tt>"AI"</tt> zone, a <tt>"PHYS"</tt> zone, etc. Zones are hierarchical,
+organized as a stack.
+</p>
+<p>
+The <tt>jit.zone</tt> module needs to be loaded explicitly:
+</p>
+<pre class="code">
+local zone = require("jit.zone")
+</pre>
+<ul>
+<li><tt>zone("name")</tt> pushes a named zone to the zone stack.</li>
+<li><tt>zone()</tt> pops the current zone from the zone stack and
+returns its name.</li>
+<li><tt>zone:get()</tt> returns the current zone name or <tt>nil</tt>.</li>
+<li><tt>zone:flush()</tt> flushes the zone stack.</li>
+</ul>
+<p>
+To show the time spent in each zone use <tt>-jp=z</tt>. To show the time
+spent relative to hotspots use e.g. <tt>-jp=zf</tt> or <tt>-jp=fz</tt>.
+</p>
+
+<h2 id="ll_lua_api">Low-level Lua API</h2>
+<p>
+The <tt>jit.profile</tt> module gives access to the low-level API of the
+profiler from Lua code. This module needs to be loaded explicitly:
+<pre class="code">
+local profile = require("jit.profile")
+</pre>
+<p>
+This module can be used to implement your own higher-level profiler.
+A typical profiling run starts the profiler, captures stack dumps in
+the profiler callback, adds them to a hash table to aggregate the number
+of samples, stops the profiler and then analyzes all captured
+stack dumps. Other parameters can be sampled in the profiler callback,
+too. But it's important not to spend too much time in the callback,
+since this may skew the statistics.
+</p>
+
+<h3 id="profile_start"><tt>profile.start(mode, cb)</tt>
+&mdash; Start profiler</h3>
+<p>
+This function starts the profiler. The <tt>mode</tt> argument is a
+string holding options:
+</p>
+<ul>
+<li><tt>f</tt> &mdash; Profile with precision down to the function level.</li>
+<li><tt>l</tt> &mdash; Profile with precision down to the line level.</li>
+<li><tt>i&lt;number&gt;</tt> &mdash; Sampling interval in milliseconds (default
+10ms).</br>
+Note: The actual sampling precision is OS-dependent.
+</li>
+</ul>
+<p>
+The <tt>cb</tt> argument is a callback function which is called with
+three arguments: <tt>(thread, samples, vmstate)</tt>. The callback is
+called on a separate coroutine, the <tt>thread</tt> argument is the
+state that holds the stack to sample for profiling. Note: do
+<em>not</em> modify the stack of that state or call functions on it.
+</p>
+<p>
+<tt>samples</tt> gives the number of accumulated samples since the last
+callback (usually 1).
+</p>
+<p>
+<tt>vmstate</tt> holds the VM state at the time the profiling timer
+triggered. This may or may not correspond to the state of the VM when
+the profiling callback is called. The state is either <tt>'N'</tt>
+native (compiled) code, <tt>'I'</tt> interpreted code, <tt>'C'</tt>
+C&nbsp;code, <tt>'G'</tt> the garbage collector, or <tt>'J'</tt> the JIT
+compiler.
+</p>
+
+<h3 id="profile_stop"><tt>profile.stop()</tt>
+&mdash; Stop profiler</h3>
+<p>
+This function stops the profiler.
+</p>
+
+<h3 id="profile_dump"><tt>dump = profile.dumpstack([thread,] fmt, depth)</tt>
+&mdash; Dump stack </h3>
+<p>
+This function allows taking stack dumps in an efficient manner. It
+returns a string with a stack dump for the <tt>thread</tt> (coroutine),
+formatted according to the <tt>fmt</tt> argument:
+</p>
+<ul>
+<li><tt>p</tt> &mdash; Preserve the full path for module names. Otherwise,
+only the file name is used.</li>
+<li><tt>f</tt> &mdash; Dump the function name if it can be derived. Otherwise,
+use module:line.</li>
+<li><tt>F</tt> &mdash; Ditto, but dump module:name.</li>
+<li><tt>l</tt> &mdash; Dump module:line.</li>
+<li><tt>Z</tt> &mdash; Zap the following characters for the last dumped
+frame.</li>
+<li>All other characters are added verbatim to the output string.</li>
+</ul>
+<p>
+The <tt>depth</tt> argument gives the number of frames to dump, starting
+at the topmost frame of the thread. A negative number dumps the frames in
+inverse order.
+</p>
+<p>
+The first example prints a list of the current module names and line
+numbers of up to 10 frames in separate lines. The second example prints
+semicolon-separated function names for all frames (up to 100) in inverse
+order:
+</p>
+<pre class="code">
+print(profile.dumpstack(thread, "l\n", 10))
+print(profile.dumpstack(thread, "lZ;", -100))
+</pre>
+
+<h2 id="ll_c_api">Low-level C API</h2>
+<p>
+The profiler can be controlled directly from C&nbsp;code, e.g. for
+use by IDEs. The declarations are in <tt>"luajit.h"</tt> (see
+<a href="ext_c_api.html">Lua/C API</a> extensions).
+</p>
+
+<h3 id="luaJIT_profile_start"><tt>luaJIT_profile_start(L, mode, cb, data)</tt>
+&mdash; Start profiler</h3>
+<p>
+This function starts the profiler. <a href="#profile_start">See
+above</a> for a description of the <tt>mode</tt> argument.
+</p>
+<p>
+The <tt>cb</tt> argument is a callback function with the following
+declaration:
+</p>
+<pre class="code">
+typedef void (*luaJIT_profile_callback)(void *data, lua_State *L,
+ int samples, int vmstate);
+</pre>
+<p>
+<tt>data</tt> is available for use by the callback. <tt>L</tt> is the
+state that holds the stack to sample for profiling. Note: do
+<em>not</em> modify this stack or call functions on this stack &mdash;
+use a separate coroutine for this purpose. <a href="#profile_start">See
+above</a> for a description of <tt>samples</tt> and <tt>vmstate</tt>.
+</p>
+
+<h3 id="luaJIT_profile_stop"><tt>luaJIT_profile_stop(L)</tt>
+&mdash; Stop profiler</h3>
+<p>
+This function stops the profiler.
+</p>
+
+<h3 id="luaJIT_profile_dumpstack"><tt>p = luaJIT_profile_dumpstack(L, fmt, depth, len)</tt>
+&mdash; Dump stack </h3>
+<p>
+This function allows taking stack dumps in an efficient manner.
+<a href="#profile_dump">See above</a> for a description of <tt>fmt</tt>
+and <tt>depth</tt>.
+</p>
+<p>
+This function returns a <tt>const&nbsp;char&nbsp;*</tt> pointing to a
+private string buffer of the profiler. The <tt>int&nbsp;*len</tt>
+argument returns the length of the output string. The buffer is
+overwritten on the next call and deallocated when the profiler stops.
+You either need to consume the content immediately or copy it for later
+use.
+</p>
+<br class="flush">
+</div>
+<div id="foot">
+<hr class="hide">
+Copyright &copy; 2005-2022
+<span class="noprint">
+&middot;
+<a href="contact.html">Contact</a>
+</span>
+</div>
+</body>
+</html>
diff --git a/doc/extensions.html b/doc/extensions.html
index 3ed13804..040fc588 100644
--- a/doc/extensions.html
+++ b/doc/extensions.html
@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
<html>
<head>
<title>Extensions</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2022">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -54,9 +54,13 @@ td.excinterop {
<a href="ext_ffi_semantics.html">FFI Semantics</a>
</li></ul>
</li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
<a href="ext_jit.html">jit.* Library</a>
</li><li>
<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
</li></ul>
</li><li>
<a href="status.html">Status</a>
@@ -107,6 +111,9 @@ bit.lshift bit.rshift bit.arshift bit.rol bit.ror bit.bswap
This module is a LuaJIT built-in &mdash; you don't need to download or
install Lua BitOp. The Lua BitOp site has full documentation for all
<a href="https://bitop.luajit.org/api.html"><span class="ext">&raquo;</span>&nbsp;Lua BitOp API functions</a>.
+The FFI adds support for
+<a href="ext_ffi_semantics.html#cdata_arith">64&nbsp;bit bitwise operations</a>,
+using the same API functions.
</p>
<p>
Please make sure to <tt>require</tt> the module before using any of
@@ -140,6 +147,11 @@ LuaJIT adds some
<a href="ext_c_api.html">extra functions to the Lua/C API</a>.
</p>
+<h3 id="profiler">Profiler</h3>
+<p>
+LuaJIT has an <a href="ext_profiler.html">integrated profiler</a>.
+</p>
+
<h2 id="library">Enhanced Standard Library Functions</h2>
<h3 id="xpcall"><tt>xpcall(f, err [,args...])</tt> passes arguments</h3>
@@ -167,7 +179,7 @@ in <tt>"-inf"</tt>.
<h3 id="tonumber"><tt>tonumber()</tt> etc. use builtin string to number conversion</h3>
<p>
All string-to-number conversions consistently convert integer and
-floating-point inputs in decimal and hexadecimal on all platforms.
+floating-point inputs in decimal, hexadecimal and binary on all platforms.
<tt>strtod()</tt> is <em>not</em> used anymore, which avoids numerous
problems with poor C library implementations. The builtin conversion
function provides full precision according to the IEEE-754 standard, it
@@ -191,6 +203,36 @@ for dot releases (x.y.0 &rarr; x.y.1), but may change with major or
minor releases (2.0 &rarr; 2.1) or between any beta release. Foreign
bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded.
</p>
+<p>
+Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies
+a different, incompatible bytecode format for all 64 bit ports. This may be
+rectified in the future.
+</p>
+
+<h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3>
+<p>
+An extra library function <tt>table.new()</tt> can be made available via
+<tt>require("table.new")</tt>. This creates a pre-sized table, just like
+the C API equivalent <tt>lua_createtable()</tt>. This is useful for big
+tables if the final table size is known and automatic table resizing is
+too expensive.
+</p>
+
+<h3 id="table_clear"><tt>table.clear(tab)</tt> clears a table</h3>
+<p>
+An extra library function <tt>table.clear()</tt> can be made available
+via <tt>require("table.clear")</tt>. This clears all keys and values
+from a table, but preserves the allocated array/hash sizes. This is
+useful when a table, which is linked from multiple places, needs to be
+cleared and/or when recycling a table for use by the same context. This
+avoids managing backlinks, saves an allocation and the overhead of
+incremental array/hash part growth.
+</p>
+<p>
+Please note, this function is meant for very specific situations. In most
+cases it's better to replace the (usually single) link with a new table
+and let the GC do its work.
+</p>
<h3 id="math_random">Enhanced PRNG for <tt>math.random()</tt></h3>
<p>
@@ -269,6 +311,26 @@ indexes for varargs.</li>
<li><tt>debug.getupvalue()</tt> and <tt>debug.setupvalue()</tt> handle
C&nbsp;functions.</li>
<li><tt>debug.upvalueid()</tt> and <tt>debug.upvaluejoin()</tt>.</li>
+<li>Lua/C API extensions:
+<tt>lua_version()</tt>
+<tt>lua_upvalueid()</tt>
+<tt>lua_upvaluejoin()</tt>
+<tt>lua_loadx()</tt>
+<tt>lua_copy()</tt>
+<tt>lua_tonumberx()</tt>
+<tt>lua_tointegerx()</tt>
+<tt>luaL_fileresult()</tt>
+<tt>luaL_execresult()</tt>
+<tt>luaL_loadfilex()</tt>
+<tt>luaL_loadbufferx()</tt>
+<tt>luaL_traceback()</tt>
+<tt>luaL_setfuncs()</tt>
+<tt>luaL_pushmodule()</tt>
+<tt>luaL_newlibtable()</tt>
+<tt>luaL_newlib()</tt>
+<tt>luaL_testudata()</tt>
+<tt>luaL_setmetatable()</tt>
+</li>
<li>Command line option <tt>-E</tt>.</li>
<li>Command line checks <tt>__tostring</tt> for errors.</li>
</ul>
@@ -294,6 +356,8 @@ exit status.</li>
<li><tt>debug.setmetatable()</tt> returns object.</li>
<li><tt>debug.getuservalue()</tt> and <tt>debug.setuservalue()</tt>.</li>
<li>Remove <tt>math.mod()</tt>, <tt>string.gfind()</tt>.</li>
+<li><tt>package.searchers</tt>.</li>
+<li><tt>module()</tt> returns the module table.</li>
</ul>
<p>
Note: this provides only partial compatibility with Lua 5.2 at the
@@ -302,6 +366,21 @@ Lua&nbsp;5.1, which prevents implementing features that would otherwise
break the Lua/C API and ABI (e.g. <tt>_ENV</tt>).
</p>
+<h2 id="lua53">Extensions from Lua 5.3</h2>
+<p>
+LuaJIT supports some extensions from Lua&nbsp;5.3:
+<ul>
+<li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li>
+<li>The argument table <tt>arg</tt> can be read (and modified) by <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li>
+<li><tt>io.read()</tt> and <tt>file:read()</tt> accept formats with or without a leading <tt>*</tt>.</li>
+<li><tt>assert()</tt> accepts any type of error object.</li>
+<li><tt>table.move(a1, f, e, t [,a2])</tt>.</li>
+<li><tt>coroutine.isyieldable()</tt>.</li>
+<li>Lua/C API extensions:
+<tt>lua_isyieldable()</tt>
+</li>
+</ul>
+
<h2 id="exceptions">C++ Exception Interoperability</h2>
<p>
LuaJIT has built-in support for interoperating with C++&nbsp;exceptions.
@@ -315,26 +394,21 @@ the toolchain used to compile LuaJIT:
<td class="excinterop">Interoperability</td>
</tr>
<tr class="odd separate">
-<td class="excplatform">POSIX/x64, DWARF2 unwinding</td>
-<td class="exccompiler">GCC 4.3+</td>
+<td class="excplatform">External frame unwinding</td>
+<td class="exccompiler">GCC, Clang, MSVC</td>
<td class="excinterop"><b style="color: #00a000;">Full</b></td>
</tr>
<tr class="even">
-<td class="excplatform">Other platforms, DWARF2 unwinding</td>
-<td class="exccompiler">GCC</td>
+<td class="excplatform">Internal frame unwinding + DWARF2</td>
+<td class="exccompiler">GCC, Clang</td>
<td class="excinterop"><b style="color: #c06000;">Limited</b></td>
</tr>
<tr class="odd">
-<td class="excplatform">Windows/x64</td>
-<td class="exccompiler">MSVC</td>
-<td class="excinterop"><b style="color: #00a000;">Full</b></td>
+<td class="excplatform">Windows 64 bit</td>
+<td class="exccompiler">non-MSVC</td>
+<td class="excinterop"><b style="color: #c06000;">Limited</b></td>
</tr>
<tr class="even">
-<td class="excplatform">Windows/x86</td>
-<td class="exccompiler">Any</td>
-<td class="excinterop"><b style="color: #a00000;">No</b></td>
-</tr>
-<tr class="odd">
<td class="excplatform">Other platforms</td>
<td class="exccompiler">Other compilers</td>
<td class="excinterop"><b style="color: #a00000;">No</b></td>
@@ -385,14 +459,6 @@ C++ destructors.</li>
<li>Lua errors <b>cannot</b> be caught on the C++ side.</li>
<li>Throwing Lua errors across C++ frames will <b>not</b> call
C++ destructors.</li>
-<li>Additionally, on Windows/x86 with SEH-based C++&nbsp;exceptions:
-it's <b>not</b> safe to throw a Lua error across any frames containing
-a C++ function with any try/catch construct or using variables with
-(implicit) destructors. This also applies to any functions which may be
-inlined in such a function. It doesn't matter whether <tt>lua_error()</tt>
-is called inside or outside of a try/catch or whether any object actually
-needs to be destroyed: the SEH chain is corrupted and this will eventually
-lead to the termination of the process.</li>
</ul>
<br class="flush">
</div>
diff --git a/doc/faq.html b/doc/faq.html
index 0b4b2df0..9effeb16 100644
--- a/doc/faq.html
+++ b/doc/faq.html
@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
<html>
<head>
<title>Frequently Asked Questions (FAQ)</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2022">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -40,9 +40,13 @@ dd { margin-left: 1.5em; }
<a href="ext_ffi_semantics.html">FFI Semantics</a>
</li></ul>
</li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
<a href="ext_jit.html">jit.* Library</a>
</li><li>
<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
</li></ul>
</li><li>
<a href="status.html">Status</a>
diff --git a/doc/install.html b/doc/install.html
index fe89fc5c..9fad31f4 100644
--- a/doc/install.html
+++ b/doc/install.html
@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
<html>
<head>
<title>Installation</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2022">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -60,9 +60,13 @@ td.compatx {
<a href="ext_ffi_semantics.html">FFI Semantics</a>
</li></ul>
</li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
<a href="ext_jit.html">jit.* Library</a>
</li><li>
<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
</li></ul>
</li><li>
<a href="status.html">Status</a>
@@ -118,7 +122,7 @@ LuaJIT currently builds out-of-the box on most systems:
<td class="compatx">v2.0 &ndash;</td>
</tr>
<tr class="even">
-<td class="compatname"><a href="#osx">macOS (OSX)</a></td>
+<td class="compatname"><a href="#posix">macOS (OSX)</a></td>
<td class="compatver">10.4</td>
<td class="compatx">&nbsp;</td>
<td class="compatx">v2.1 &ndash;</td>
@@ -304,6 +308,13 @@ MSVC (Visual Studio).</li>
Please read the instructions given in these files, before changing
any settings.
</p>
+<p>
+All LuaJIT 64 bit ports use 64 bit GC objects by default (<tt>LJ_GC64</tt>).
+For x64, you can select the old 32-on-64 bit mode by adding
+<tt>XCFLAGS=-DLUAJIT_DISABLE_GC64</tt> to the make command.
+Please check the note about the
+<a href="extensions.html#string_dump">bytecode format</a> differences, too.
+</p>
<h2 id="posix">POSIX Systems (Linux, macOS, *BSD etc.)</h2>
<h3>Prerequisites</h3>
@@ -349,9 +360,12 @@ You can add an extra prefix to the search paths by appending the
make PREFIX=/home/myself/lj2
</pre>
<p>
-Please use the LuaJIT 2.1 branch to compile for
-<b id="osx">macOS (OSX)</b>.
+Note for macOS: you <b>must</b> set the <tt>MACOSX_DEPLOYMENT_TARGET</tt>
+environment variable to a value supported by your toolchain:
</p>
+<pre class="code">
+MACOSX_DEPLOYMENT_TARGET=XX.YY make
+</pre>
<h3>Installing LuaJIT</h3>
<p>
The top-level Makefile installs LuaJIT by default under
@@ -435,25 +449,36 @@ directory where <tt>luajit.exe</tt> is installed
<h2 id="cross">Cross-compiling LuaJIT</h2>
<p>
+First, let's clear up some terminology:
+</p>
+<ul>
+<li>Host: This is your development system, usually based on a x64 or x86 CPU.</li>
+<li>Target: This is the target system you want LuaJIT to run on, e.g. Android/ARM.</li>
+<li>Toolchain: This comprises a C compiler, linker, assembler and a matching C library.</li>
+<li>Host (or system) toolchain: This is the toolchain used to build native binaries for your host system.</li>
+<li>Cross-compile toolchain: This is the toolchain used to build binaries for the target system. They can only be run on the target system.</li>
+</ul>
+<p>
The GNU Makefile-based build system allows cross-compiling on any host
-for any supported target, as long as both architectures have the same
-pointer size. If you want to cross-compile to any 32 bit target on an
-x64 OS, you need to install the multilib development package (e.g.
-<tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part
-(<tt>HOST_CC="gcc -m32"</tt>).
+for any supported target:
</p>
+<ul>
+<li>Yes, you need a toolchain for both your host <em>and</em> your target!</li>
+<li>Both host and target architectures must have the same pointer size.</li>
+<li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li>
+<li>64 bit targets always require compilation on a 64 bit host.</li>
+</ul>
<p>
You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the
-target OS differ, or you'll get assembler or linker errors. E.g. if
-you're compiling on a Windows or macOS host for embedded Linux or Android,
-you need to add <tt>TARGET_SYS=Linux</tt> to the examples below. For a
-minimal target OS, you may need to disable the built-in allocator in
-<tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>. Don't forget to
-specify the same <tt>TARGET_SYS</tt> for the install step, too.
+target OS differ, or you'll get assembler or linker errors:
</p>
+<ul>
+<li>E.g. if you're compiling on a Windows or macOS host for embedded Linux or Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples below.</li>
+<li>For a minimal target OS, you may need to disable the built-in allocator in <tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>.</li>
+<li>Don't forget to specify the same <tt>TARGET_SYS</tt> for the install step, too.</li>
+</ul>
<p>
-The examples below only show some popular targets &mdash; please check
-the comments in <tt>src/Makefile</tt> for more details.
+Here are some examples where host and target have the same CPU:
</p>
<pre class="code">
# Cross-compile to a 32 bit binary on a multilib x64 OS
@@ -471,34 +496,44 @@ use the canonical toolchain triplets for Linux.
</p>
<p>
Since there's often no easy way to detect CPU features at runtime, it's
-important to compile with the proper CPU or architecture settings. You
-can specify these when building the toolchain yourself. Or add
-<tt>-mcpu=...</tt> or <tt>-march=...</tt> to <tt>TARGET_CFLAGS</tt>. For
-ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting,
-too. Otherwise, LuaJIT may not run at the full performance of your target
-CPU.
+important to compile with the proper CPU or architecture settings:
+</o>
+<ul>
+<li>The best way to get consistent results is to specify the correct settings when building the toolchain yourself.</li>
+<li>For a pre-built, generic toolchain add <tt>-mcpu=...</tt> or <tt>-march=...</tt> and other necessary flags to <tt>TARGET_CFLAGS</tt>.</li>
+<li>For ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, too. Otherwise LuaJIT may not run at the full performance of your target CPU.</li>
+<li>For MIPS it's important to select a supported ABI (o32 on MIPS32, n64 on MIPS64) and consistently compile your project either with hard-float or soft-float compiler settings.</li>
+</ul>
+<p>
+Here are some examples for targets with a different CPU than the host:
</p>
<pre class="code">
# ARM soft-float
make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
TARGET_CFLAGS="-mfloat-abi=soft"
-# ARM soft-float ABI with VFP (example for Cortex-A8)
+# ARM soft-float ABI with VFP (example for Cortex-A9)
make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \
- TARGET_CFLAGS="-mcpu=cortex-a8 -mfloat-abi=softfp"
+ TARGET_CFLAGS="-mcpu=cortex-a9 -mfloat-abi=softfp"
-# ARM hard-float ABI with VFP (armhf, requires recent toolchain)
+# ARM hard-float ABI with VFP (armhf, most modern toolchains)
make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf-
+# ARM64
+make CROSS=aarch64-linux-
+
# PPC
make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
-# PPC/e500v2 (fast interpreter only)
-make HOST_CC="gcc -m32" CROSS=powerpc-e500v2-linux-gnuspe-
-# MIPS big-endian
+# MIPS32 big-endian
make HOST_CC="gcc -m32" CROSS=mips-linux-
-# MIPS little-endian
+# MIPS32 little-endian
make HOST_CC="gcc -m32" CROSS=mipsel-linux-
+
+# MIPS64 big-endian
+make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
+# MIPS64 little-endian
+make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64"
</pre>
<p>
You can cross-compile for <b id="android">Android</b> using the <a href="https://developer.android.com/ndk/"><span class="ext">&raquo;</span>&nbsp;Android NDK</a>.
@@ -506,8 +541,17 @@ Please adapt the environment variables to match the install locations and the
desired target platform. E.g. Android&nbsp;4.1 corresponds to ABI level&nbsp;16.
</p>
<pre class="code">
-# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.1+ (JB)
+# Android/ARM64, aarch64, Android 5.0+ (L)
+NDKDIR=/opt/android/ndk
+NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin
+NDKCROSS=$NDKBIN/aarch64-linux-android-
+NDKCC=$NDKBIN/aarch64-linux-android21-clang
+make CROSS=$NDKCROSS \
+ STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \
+ TARGET_LD=$NDKCC TARGET_AR="$NDKBIN/llvm-ar rcus" \
+ TARGET_STRIP=$NDKBIN/llvm-strip
+# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.1+ (JB)
NDKDIR=/opt/android/ndk
NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin
NDKCROSS=$NDKBIN/arm-linux-androideabi-
@@ -518,9 +562,23 @@ make HOST_CC="gcc -m32" CROSS=$NDKCROSS \
TARGET_STRIP=$NDKBIN/llvm-strip
</pre>
<p>
-Please use the LuaJIT 2.1 branch to compile for
-<b id="ios">iOS</b> (iPhone/iPad).
+You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="https://developer.apple.com/ios/"><span class="ext">&raquo;</span>&nbsp;iOS SDK</a>:
+</p>
+<p style="font-size: 8pt;">
+Note: <b>the JIT compiler is disabled for iOS</b>, because regular iOS Apps
+are not allowed to generate code at runtime. You'll only get the performance
+of the LuaJIT interpreter on iOS. This is still faster than plain Lua, but
+much slower than the JIT compiler. Please complain to Apple, not me.
+Or use Android. :-p
</p>
+<pre class="code">
+# iOS/ARM64
+ISDKP=$(xcrun --sdk iphoneos --show-sdk-path)
+ICC=$(xcrun --sdk iphoneos --find clang)
+ISDKF="-arch arm64 -isysroot $ISDKP"
+make DEFAULT_CC=clang CROSS="$(dirname $ICC)/" \
+ TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS
+</pre>
<h3 id="consoles">Cross-compiling for consoles</h3>
<p>
@@ -564,15 +622,35 @@ unpacked the sources and run the build command given in the table:
<td class="compatx"><tt>ps4build</tt></td>
</tr>
<tr class="even">
+<td class="compatname"><b id="ps5">PS5</b></td>
+<td class="compatbits">64</td>
+<td class="compatx"><tt>ps5build</tt></td>
+</tr>
+<tr class="odd">
<td class="compatname"><b id="psvita">PS Vita</b></td>
<td class="compatbits">32</td>
<td class="compatx"><tt>psvitabuild</tt></td>
</tr>
-<tr class="odd">
+<tr class="even">
<td class="compatname"><b id="xbox360">Xbox 360</b></td>
<td class="compatbits">32</td>
<td class="compatx"><tt>xedkbuild</tt></td>
</tr>
+<tr class="odd">
+<td class="compatname"><b id="xboxone">Xbox One</b></td>
+<td class="compatbits">64</td>
+<td class="compatx"><tt>xb1build</tt></td>
+</tr>
+<tr class="even">
+<td class="compatname"><b id="nx32">Nintendo Switch NX32</b></td>
+<td class="compatbits">32</td>
+<td class="compatx"><tt>nxbuild</tt></td>
+</tr>
+<tr class="odd">
+<td class="compatname"><b id="nx64">Nintendo Switch NX64</b></td>
+<td class="compatbits">64</td>
+<td class="compatx"><tt>nxbuild</tt></td>
+</tr>
</table>
<p>
Please check out the comments in the corresponding <tt>*.bat</tt>
diff --git a/doc/luajit.html b/doc/luajit.html
index 88d4c194..03a29528 100644
--- a/doc/luajit.html
+++ b/doc/luajit.html
@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
<html>
<head>
<title>LuaJIT</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2022">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -122,9 +122,13 @@ table.feature small {
<a href="ext_ffi_semantics.html">FFI Semantics</a>
</li></ul>
</li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
<a href="ext_jit.html">jit.* Library</a>
</li><li>
<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
</li></ul>
</li><li>
<a href="status.html">Status</a>
@@ -158,13 +162,13 @@ LuaJIT is Copyright &copy; 2005-2022 Mike Pall, released under the
<tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr>
</table>
<table class="feature os os3">
-<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td></tr>
+<tr><td>PS3</td><td>PS4<br>PS5</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td><td>Nintendo<br>Switch</td></tr>
</table>
<table class="feature compiler">
-<tr><td>GCC</td><td>CLANG<br>LLVM</td><td>MSVC</td></tr>
+<tr><td>GCC</td><td>Clang<br>LLVM</td><td>MSVC</td></tr>
</table>
<table class="feature cpu">
-<tr><td>x86</td><td>x64</td><td>ARM</td><td>PPC</td><td>e500</td><td>MIPS</td></tr>
+<tr><td>x86<br>x64</td><td>ARM<br>ARM64</td><td>PPC</td><td>MIPS32<br>MIPS64</td></tr>
</table>
<table class="feature fcompat">
<tr><td>Lua&nbsp;5.1<br>API+ABI</td><td>+&nbsp;JIT</td><td>+&nbsp;BitOp</td><td>+&nbsp;FFI</td><td>Drop-in<br>DLL/.so</td></tr>
diff --git a/doc/running.html b/doc/running.html
index 3f408141..177e6357 100644
--- a/doc/running.html
+++ b/doc/running.html
@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
<html>
<head>
<title>Running LuaJIT</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2022">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -59,9 +59,13 @@ td.param_default {
<a href="ext_ffi_semantics.html">FFI Semantics</a>
</li></ul>
</li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
<a href="ext_jit.html">jit.* Library</a>
</li><li>
<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
</li></ul>
</li><li>
<a href="status.html">Status</a>
@@ -172,6 +176,7 @@ Here are the available LuaJIT control commands:
<li id="j_flush"><tt>-jflush</tt> &mdash; Flushes the whole cache of compiled code.</li>
<li id="j_v"><tt>-jv</tt> &mdash; Shows verbose information about the progress of the JIT compiler.</li>
<li id="j_dump"><tt>-jdump</tt> &mdash; Dumps the code and structures used in various compiler stages.</li>
+<li id="j_p"><tt>-jp</tt> &mdash; Start the <a href="ext_profiler.html">integrated profiler</a>.</li>
</ul>
<p>
The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules
diff --git a/doc/status.html b/doc/status.html
index 7ecedf3d..1c86c103 100644
--- a/doc/status.html
+++ b/doc/status.html
@@ -1,8 +1,8 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<!DOCTYPE html>
<html>
<head>
<title>Status</title>
-<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+<meta charset="utf-8">
<meta name="Copyright" content="Copyright (C) 2005-2022">
<meta name="Language" content="en">
<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen">
@@ -40,9 +40,13 @@ ul li { padding-bottom: 0.3em; }
<a href="ext_ffi_semantics.html">FFI Semantics</a>
</li></ul>
</li><li>
+<a href="ext_buffer.html">String Buffers</a>
+</li><li>
<a href="ext_jit.html">jit.* Library</a>
</li><li>
<a href="ext_c_api.html">Lua/C API</a>
+</li><li>
+<a href="ext_profiler.html">Profiler</a>
</li></ul>
</li><li>
<a class="current" href="status.html">Status</a>
@@ -56,7 +60,7 @@ ul li { padding-bottom: 0.3em; }
</div>
<div id="main">
<p>
-This documentation is for LuaJIT 2.0.5. Please check the <tt>doc</tt>
+This documentation is for LuaJIT 2.1.0-beta3. Please check the <tt>doc</tt>
directory in each git branch for the version-specific documentation.
</p>
<p>
@@ -88,12 +92,6 @@ The Lua <b>debug API</b> is missing a couple of features (return
hooks for non-Lua functions) and shows slightly different behavior
in LuaJIT (no per-coroutine hooks, no tail call counting).
</li>
-<li>
-Currently, some <b>out-of-memory</b> errors from <b>on-trace code</b> are not
-handled correctly. The error may fall through an on-trace
-<tt>pcall</tt> or it may be passed on to the function set with
-<tt>lua_atpanic</tt> on x64.
-</li>
</ul>
<br class="flush">
</div>
diff --git a/dynasm/dasm_arm.h b/dynasm/dasm_arm.h
index f7f3d0db..fbfebee0 100644
--- a/dynasm/dasm_arm.h
+++ b/dynasm/dasm_arm.h
@@ -294,7 +294,7 @@ int dasm_link(Dst_DECL, size_t *szp)
{ /* Handle globals not defined in this translation unit. */
int idx;
- for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
int n = D->lglabels[idx];
/* Undefined label: Collapse rel chain and replace with marker (< 0). */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -371,7 +371,10 @@ int dasm_encode(Dst_DECL, void *buffer)
ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000;
break;
case DASM_REL_LG:
- CK(n >= 0, UNDEF_LG);
+ if (n < 0) {
+ n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp - 4);
+ goto patchrel;
+ }
/* fallthrough */
case DASM_REL_PC:
CK(n >= 0, UNDEF_PC);
diff --git a/dynasm/dasm_arm.lua b/dynasm/dasm_arm.lua
index 5dedaea1..3b4db86e 100644
--- a/dynasm/dasm_arm.lua
+++ b/dynasm/dasm_arm.lua
@@ -9,9 +9,9 @@
local _info = {
arch = "arm",
description = "DynASM ARM module",
- version = "1.3.0",
- vernum = 10300,
- release = "2011-05-05",
+ version = "1.5.0",
+ vernum = 10500,
+ release = "2021-05-02",
author = "Mike Pall",
license = "MIT",
}
diff --git a/dynasm/dasm_arm64.h b/dynasm/dasm_arm64.h
new file mode 100644
index 00000000..47c9c37d
--- /dev/null
+++ b/dynasm/dasm_arm64.h
@@ -0,0 +1,563 @@
+/*
+** DynASM ARM64 encoding engine.
+** Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+** Released under the MIT license. See dynasm.lua for full copyright notice.
+*/
+
+#include <stddef.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define DASM_ARCH "arm64"
+
+#ifndef DASM_EXTERN
+#define DASM_EXTERN(a,b,c,d) 0
+#endif
+
+/* Action definitions. */
+enum {
+ DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT,
+ /* The following actions need a buffer position. */
+ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
+ /* The following actions also have an argument. */
+ DASM_REL_PC, DASM_LABEL_PC, DASM_REL_A,
+ DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML,
+ DASM_IMMV, DASM_VREG,
+ DASM__MAX
+};
+
+/* Maximum number of section buffer positions for a single dasm_put() call. */
+#define DASM_MAXSECPOS 25
+
+/* DynASM encoder status codes. Action list offset or number are or'ed in. */
+#define DASM_S_OK 0x00000000
+#define DASM_S_NOMEM 0x01000000
+#define DASM_S_PHASE 0x02000000
+#define DASM_S_MATCH_SEC 0x03000000
+#define DASM_S_RANGE_I 0x11000000
+#define DASM_S_RANGE_SEC 0x12000000
+#define DASM_S_RANGE_LG 0x13000000
+#define DASM_S_RANGE_PC 0x14000000
+#define DASM_S_RANGE_REL 0x15000000
+#define DASM_S_RANGE_VREG 0x16000000
+#define DASM_S_UNDEF_LG 0x21000000
+#define DASM_S_UNDEF_PC 0x22000000
+
+/* Macros to convert positions (8 bit section + 24 bit index). */
+#define DASM_POS2IDX(pos) ((pos)&0x00ffffff)
+#define DASM_POS2BIAS(pos) ((pos)&0xff000000)
+#define DASM_SEC2POS(sec) ((sec)<<24)
+#define DASM_POS2SEC(pos) ((pos)>>24)
+#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos))
+
+/* Action list type. */
+typedef const unsigned int *dasm_ActList;
+
+/* Per-section structure. */
+typedef struct dasm_Section {
+ int *rbuf; /* Biased buffer pointer (negative section bias). */
+ int *buf; /* True buffer pointer. */
+ size_t bsize; /* Buffer size in bytes. */
+ int pos; /* Biased buffer position. */
+ int epos; /* End of biased buffer position - max single put. */
+ int ofs; /* Byte offset into section. */
+} dasm_Section;
+
+/* Core structure holding the DynASM encoding state. */
+struct dasm_State {
+ size_t psize; /* Allocated size of this structure. */
+ dasm_ActList actionlist; /* Current actionlist pointer. */
+ int *lglabels; /* Local/global chain/pos ptrs. */
+ size_t lgsize;
+ int *pclabels; /* PC label chains/pos ptrs. */
+ size_t pcsize;
+ void **globals; /* Array of globals (bias -10). */
+ dasm_Section *section; /* Pointer to active section. */
+ size_t codesize; /* Total size of all code sections. */
+ int maxsection; /* 0 <= sectionidx < maxsection. */
+ int status; /* Status code. */
+ dasm_Section sections[1]; /* All sections. Alloc-extended. */
+};
+
+/* The size of the core structure depends on the max. number of sections. */
+#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section))
+
+
+/* Initialize DynASM state. */
+void dasm_init(Dst_DECL, int maxsection)
+{
+ dasm_State *D;
+ size_t psz = 0;
+ int i;
+ Dst_REF = NULL;
+ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection));
+ D = Dst_REF;
+ D->psize = psz;
+ D->lglabels = NULL;
+ D->lgsize = 0;
+ D->pclabels = NULL;
+ D->pcsize = 0;
+ D->globals = NULL;
+ D->maxsection = maxsection;
+ for (i = 0; i < maxsection; i++) {
+ D->sections[i].buf = NULL; /* Need this for pass3. */
+ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i);
+ D->sections[i].bsize = 0;
+ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */
+ }
+}
+
+/* Free DynASM state. */
+void dasm_free(Dst_DECL)
+{
+ dasm_State *D = Dst_REF;
+ int i;
+ for (i = 0; i < D->maxsection; i++)
+ if (D->sections[i].buf)
+ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize);
+ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize);
+ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize);
+ DASM_M_FREE(Dst, D, D->psize);
+}
+
+/* Setup global label array. Must be called before dasm_setup(). */
+void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl)
+{
+ dasm_State *D = Dst_REF;
+ D->globals = gl - 10; /* Negative bias to compensate for locals. */
+ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int));
+}
+
+/* Grow PC label array. Can be called after dasm_setup(), too. */
+void dasm_growpc(Dst_DECL, unsigned int maxpc)
+{
+ dasm_State *D = Dst_REF;
+ size_t osz = D->pcsize;
+ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int));
+ memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz);
+}
+
+/* Setup encoder. */
+void dasm_setup(Dst_DECL, const void *actionlist)
+{
+ dasm_State *D = Dst_REF;
+ int i;
+ D->actionlist = (dasm_ActList)actionlist;
+ D->status = DASM_S_OK;
+ D->section = &D->sections[0];
+ memset((void *)D->lglabels, 0, D->lgsize);
+ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize);
+ for (i = 0; i < D->maxsection; i++) {
+ D->sections[i].pos = DASM_SEC2POS(i);
+ D->sections[i].ofs = 0;
+ }
+}
+
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+ do { if (!(x)) { \
+ D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0)
+#define CKPL(kind, st) \
+ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
+ D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0)
+#else
+#define CK(x, st) ((void)0)
+#define CKPL(kind, st) ((void)0)
+#endif
+
+static int dasm_imm12(unsigned int n)
+{
+ if ((n >> 12) == 0)
+ return n;
+ else if ((n & 0xff000fff) == 0)
+ return (n >> 12) | 0x1000;
+ else
+ return -1;
+}
+
+static int dasm_ffs(unsigned long long x)
+{
+ int n = -1;
+ while (x) { x >>= 1; n++; }
+ return n;
+}
+
+static int dasm_imm13(int lo, int hi)
+{
+ int inv = 0, w = 64, s = 0xfff, xa, xb;
+ unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo;
+ unsigned long long m = 1ULL, a, b, c;
+ if (n & 1) { n = ~n; inv = 1; }
+ a = n & (unsigned long long)-(long long)n;
+ b = (n+a)&(unsigned long long)-(long long)(n+a);
+ c = (n+a-b)&(unsigned long long)-(long long)(n+a-b);
+ xa = dasm_ffs(a); xb = dasm_ffs(b);
+ if (c) {
+ w = dasm_ffs(c) - xa;
+ if (w == 32) m = 0x0000000100000001UL;
+ else if (w == 16) m = 0x0001000100010001UL;
+ else if (w == 8) m = 0x0101010101010101UL;
+ else if (w == 4) m = 0x1111111111111111UL;
+ else if (w == 2) m = 0x5555555555555555UL;
+ else return -1;
+ s = (-2*w & 0x3f) - 1;
+ } else if (!a) {
+ return -1;
+ } else if (xb == -1) {
+ xb = 64;
+ }
+ if ((b-a) * m != n) return -1;
+ if (inv) {
+ return ((w - xb) << 6) | (s+w+xa-xb);
+ } else {
+ return ((w - xa) << 6) | (s+xb-xa);
+ }
+ return -1;
+}
+
+/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */
+void dasm_put(Dst_DECL, int start, ...)
+{
+ va_list ap;
+ dasm_State *D = Dst_REF;
+ dasm_ActList p = D->actionlist + start;
+ dasm_Section *sec = D->section;
+ int pos = sec->pos, ofs = sec->ofs;
+ int *b;
+
+ if (pos >= sec->epos) {
+ DASM_M_GROW(Dst, int, sec->buf, sec->bsize,
+ sec->bsize + 2*DASM_MAXSECPOS*sizeof(int));
+ sec->rbuf = sec->buf - DASM_POS2BIAS(pos);
+ sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos);
+ }
+
+ b = sec->rbuf;
+ b[pos++] = start;
+
+ va_start(ap, start);
+ while (1) {
+ unsigned int ins = *p++;
+ unsigned int action = (ins >> 16);
+ if (action >= DASM__MAX) {
+ ofs += 4;
+ } else {
+ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0;
+ switch (action) {
+ case DASM_STOP: goto stop;
+ case DASM_SECTION:
+ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC);
+ D->section = &D->sections[n]; goto stop;
+ case DASM_ESC: p++; ofs += 4; break;
+ case DASM_REL_EXT: if ((ins & 0x8000)) ofs += 8; break;
+ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break;
+ case DASM_REL_LG:
+ n = (ins & 2047) - 10; pl = D->lglabels + n;
+ /* Bkwd rel or global. */
+ if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; }
+ pl += 10; n = *pl;
+ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */
+ goto linkrel;
+ case DASM_REL_PC:
+ pl = D->pclabels + n; CKPL(pc, PC);
+ putrel:
+ n = *pl;
+ if (n < 0) { /* Label exists. Get label pos and store it. */
+ b[pos] = -n;
+ } else {
+ linkrel:
+ b[pos] = n; /* Else link to rel chain, anchored at label. */
+ *pl = pos;
+ }
+ pos++;
+ if ((ins & 0x8000)) ofs += 8;
+ break;
+ case DASM_REL_A:
+ b[pos++] = n;
+ b[pos++] = va_arg(ap, int);
+ break;
+ case DASM_LABEL_LG:
+ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel;
+ case DASM_LABEL_PC:
+ pl = D->pclabels + n; CKPL(pc, PC);
+ putlabel:
+ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos;
+ }
+ *pl = -pos; /* Label exists now. */
+ b[pos++] = ofs; /* Store pass1 offset estimate. */
+ break;
+ case DASM_IMM:
+ CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
+ n >>= ((ins>>10)&31);
+#ifdef DASM_CHECKS
+ if ((ins & 0x8000))
+ CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I);
+ else
+ CK((n>>((ins>>5)&31)) == 0, RANGE_I);
+#endif
+ b[pos++] = n;
+ break;
+ case DASM_IMM6:
+ CK((n >> 6) == 0, RANGE_I);
+ b[pos++] = n;
+ break;
+ case DASM_IMM12:
+ CK(dasm_imm12((unsigned int)n) != -1, RANGE_I);
+ b[pos++] = n;
+ break;
+ case DASM_IMM13W:
+ CK(dasm_imm13(n, n) != -1, RANGE_I);
+ b[pos++] = n;
+ break;
+ case DASM_IMM13X: {
+ int m = va_arg(ap, int);
+ CK(dasm_imm13(n, m) != -1, RANGE_I);
+ b[pos++] = n;
+ b[pos++] = m;
+ break;
+ }
+ case DASM_IMML: {
+#ifdef DASM_CHECKS
+ int scale = (ins & 3);
+ CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ||
+ (unsigned int)(n+256) < 512, RANGE_I);
+#endif
+ b[pos++] = n;
+ break;
+ }
+ case DASM_IMMV:
+ ofs += 4;
+ b[pos++] = n;
+ break;
+ case DASM_VREG:
+ CK(n < 32, RANGE_VREG);
+ b[pos++] = n;
+ break;
+ }
+ }
+ }
+stop:
+ va_end(ap);
+ sec->pos = pos;
+ sec->ofs = ofs;
+}
+#undef CK
+
+/* Pass 2: Link sections, shrink aligns, fix label offsets. */
+int dasm_link(Dst_DECL, size_t *szp)
+{
+ dasm_State *D = Dst_REF;
+ int secnum;
+ int ofs = 0;
+
+#ifdef DASM_CHECKS
+ *szp = 0;
+ if (D->status != DASM_S_OK) return D->status;
+ {
+ int pc;
+ for (pc = 0; pc*sizeof(int) < D->pcsize; pc++)
+ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc;
+ }
+#endif
+
+ { /* Handle globals not defined in this translation unit. */
+ int idx;
+ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
+ int n = D->lglabels[idx];
+ /* Undefined label: Collapse rel chain and replace with marker (< 0). */
+ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
+ }
+ }
+
+ /* Combine all code sections. No support for data sections (yet). */
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
+ dasm_Section *sec = D->sections + secnum;
+ int *b = sec->rbuf;
+ int pos = DASM_SEC2POS(secnum);
+ int lastpos = sec->pos;
+
+ while (pos != lastpos) {
+ dasm_ActList p = D->actionlist + b[pos++];
+ while (1) {
+ unsigned int ins = *p++;
+ unsigned int action = (ins >> 16);
+ switch (action) {
+ case DASM_STOP: case DASM_SECTION: goto stop;
+ case DASM_ESC: p++; break;
+ case DASM_REL_EXT: break;
+ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
+ case DASM_REL_LG: case DASM_REL_PC: pos++; break;
+ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
+ case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W:
+ case DASM_IMML: case DASM_IMMV: case DASM_VREG: pos++; break;
+ case DASM_IMM13X: case DASM_REL_A: pos += 2; break;
+ }
+ }
+ stop: (void)0;
+ }
+ ofs += sec->ofs; /* Next section starts right after current section. */
+ }
+
+ D->codesize = ofs; /* Total size of all code sections */
+ *szp = ofs;
+ return DASM_S_OK;
+}
+
+#ifdef DASM_CHECKS
+#define CK(x, st) \
+ do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0)
+#else
+#define CK(x, st) ((void)0)
+#endif
+
+/* Pass 3: Encode sections. */
+int dasm_encode(Dst_DECL, void *buffer)
+{
+ dasm_State *D = Dst_REF;
+ char *base = (char *)buffer;
+ unsigned int *cp = (unsigned int *)buffer;
+ int secnum;
+
+ /* Encode all code sections. No support for data sections (yet). */
+ for (secnum = 0; secnum < D->maxsection; secnum++) {
+ dasm_Section *sec = D->sections + secnum;
+ int *b = sec->buf;
+ int *endb = sec->rbuf + sec->pos;
+
+ while (b != endb) {
+ dasm_ActList p = D->actionlist + *b++;
+ while (1) {
+ unsigned int ins = *p++;
+ unsigned int action = (ins >> 16);
+ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0;
+ switch (action) {
+ case DASM_STOP: case DASM_SECTION: goto stop;
+ case DASM_ESC: *cp++ = *p++; break;
+ case DASM_REL_EXT:
+ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048));
+ goto patchrel;
+ case DASM_ALIGN:
+ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xd503201f;
+ break;
+ case DASM_REL_LG:
+ if (n < 0) {
+ ptrdiff_t na = (ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp + 4;
+ n = (int)na;
+ CK((ptrdiff_t)n == na, RANGE_REL);
+ goto patchrel;
+ }
+ /* fallthrough */
+ case DASM_REL_PC:
+ CK(n >= 0, UNDEF_PC);
+ n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4;
+ patchrel:
+ if (!(ins & 0xf800)) { /* B, BL */
+ CK((n & 3) == 0 && ((n+0x08000000) >> 28) == 0, RANGE_REL);
+ cp[-1] |= ((n >> 2) & 0x03ffffff);
+ } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */
+ CK((n & 3) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL);
+ cp[-1] |= ((n << 3) & 0x00ffffe0);
+ } else if ((ins & 0x3000) == 0x2000) { /* ADR */
+ CK(((n+0x00100000) >> 21) == 0, RANGE_REL);
+ cp[-1] |= ((n << 3) & 0x00ffffe0) | ((n & 3) << 29);
+ } else if ((ins & 0x3000) == 0x3000) { /* ADRP */
+ cp[-1] |= ((n >> 9) & 0x00ffffe0) | (((n >> 12) & 3) << 29);
+ } else if ((ins & 0x1000)) { /* TBZ, TBNZ */
+ CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL);
+ cp[-1] |= ((n << 3) & 0x0007ffe0);
+ } else if ((ins & 0x8000)) { /* absolute */
+ cp[0] = (unsigned int)((ptrdiff_t)cp - 4 + n);
+ cp[1] = (unsigned int)(((ptrdiff_t)cp - 4 + n) >> 32);
+ cp += 2;
+ }
+ break;
+ case DASM_REL_A: {
+ ptrdiff_t na = (((ptrdiff_t)(*b++) << 32) | (unsigned int)n);
+ if ((ins & 0x3000) == 0x3000) { /* ADRP */
+ ins &= ~0x1000;
+ na = (na >> 12) - (((ptrdiff_t)cp - 4) >> 12);
+ } else {
+ na = na - (ptrdiff_t)cp + 4;
+ }
+ n = (int)na;
+ CK((ptrdiff_t)n == na, RANGE_REL);
+ goto patchrel;
+ }
+ case DASM_LABEL_LG:
+ ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
+ break;
+ case DASM_LABEL_PC: break;
+ case DASM_IMM:
+ cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
+ break;
+ case DASM_IMM6:
+ cp[-1] |= ((n&31) << 19) | ((n&32) << 26);
+ break;
+ case DASM_IMM12:
+ cp[-1] |= (dasm_imm12((unsigned int)n) << 10);
+ break;
+ case DASM_IMM13W:
+ cp[-1] |= (dasm_imm13(n, n) << 10);
+ break;
+ case DASM_IMM13X:
+ cp[-1] |= (dasm_imm13(n, *b++) << 10);
+ break;
+ case DASM_IMML: {
+ int scale = (ins & 3);
+ cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ?
+ ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12);
+ break;
+ }
+ case DASM_IMMV:
+ *cp++ = n;
+ break;
+ case DASM_VREG:
+ cp[-1] |= (n & 0x1f) << (ins & 0x1f);
+ break;
+ default: *cp++ = ins; break;
+ }
+ }
+ stop: (void)0;
+ }
+ }
+
+ if (base + D->codesize != (char *)cp) /* Check for phase errors. */
+ return DASM_S_PHASE;
+ return DASM_S_OK;
+}
+#undef CK
+
+/* Get PC label offset. */
+int dasm_getpclabel(Dst_DECL, unsigned int pc)
+{
+ dasm_State *D = Dst_REF;
+ if (pc*sizeof(int) < D->pcsize) {
+ int pos = D->pclabels[pc];
+ if (pos < 0) return *DASM_POS2PTR(D, -pos);
+ if (pos > 0) return -1; /* Undefined. */
+ }
+ return -2; /* Unused or out of range. */
+}
+
+#ifdef DASM_CHECKS
+/* Optional sanity checker to call between isolated encoding steps. */
+int dasm_checkstep(Dst_DECL, int secmatch)
+{
+ dasm_State *D = Dst_REF;
+ if (D->status == DASM_S_OK) {
+ int i;
+ for (i = 1; i <= 9; i++) {
+ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; }
+ D->lglabels[i] = 0;
+ }
+ }
+ if (D->status == DASM_S_OK && secmatch >= 0 &&
+ D->section != &D->sections[secmatch])
+ D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections);
+ return D->status;
+}
+#endif
+
diff --git a/dynasm/dasm_arm64.lua b/dynasm/dasm_arm64.lua
new file mode 100644
index 00000000..1f581ba0
--- /dev/null
+++ b/dynasm/dasm_arm64.lua
@@ -0,0 +1,1219 @@
+------------------------------------------------------------------------------
+-- DynASM ARM64 module.
+--
+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+
+-- Module information:
+local _info = {
+ arch = "arm",
+ description = "DynASM ARM64 module",
+ version = "1.5.0",
+ vernum = 10500,
+ release = "2021-05-02",
+ author = "Mike Pall",
+ license = "MIT",
+}
+
+-- Exported glue functions for the arch-specific module.
+local _M = { _info = _info }
+
+-- Cache library functions.
+local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
+local assert, setmetatable, rawget = assert, setmetatable, rawget
+local _s = string
+local format, byte, char = _s.format, _s.byte, _s.char
+local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
+local concat, sort, insert = table.concat, table.sort, table.insert
+local bit = bit or require("bit")
+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
+local ror, tohex, tobit = bit.ror, bit.tohex, bit.tobit
+
+-- Inherited tables and callbacks.
+local g_opt, g_arch
+local wline, werror, wfatal, wwarn
+
+-- Action name list.
+-- CHECK: Keep this in sync with the C code!
+local action_names = {
+ "STOP", "SECTION", "ESC", "REL_EXT",
+ "ALIGN", "REL_LG", "LABEL_LG",
+ "REL_PC", "LABEL_PC", "REL_A",
+ "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML", "IMMV",
+ "VREG",
+}
+
+-- Maximum number of section buffer positions for dasm_put().
+-- CHECK: Keep this in sync with the C code!
+local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
+
+-- Action name -> action number.
+local map_action = {}
+for n,name in ipairs(action_names) do
+ map_action[name] = n-1
+end
+
+-- Action list buffer.
+local actlist = {}
+
+-- Argument list for next dasm_put(). Start with offset 0 into action list.
+local actargs = { 0 }
+
+-- Current number of section buffer positions for dasm_put().
+local secpos = 1
+
+------------------------------------------------------------------------------
+
+-- Dump action names and numbers.
+local function dumpactions(out)
+ out:write("DynASM encoding engine action codes:\n")
+ for n,name in ipairs(action_names) do
+ local num = map_action[name]
+ out:write(format(" %-10s %02X %d\n", name, num, num))
+ end
+ out:write("\n")
+end
+
+-- Write action list buffer as a huge static C array.
+local function writeactions(out, name)
+ local nn = #actlist
+ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
+ out:write("static const unsigned int ", name, "[", nn, "] = {\n")
+ for i = 1,nn-1 do
+ assert(out:write("0x", tohex(actlist[i]), ",\n"))
+ end
+ assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
+end
+
+------------------------------------------------------------------------------
+
+-- Add word to action list.
+local function wputxw(n)
+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+ actlist[#actlist+1] = n
+end
+
+-- Add action to list with optional arg. Advance buffer pos, too.
+local function waction(action, val, a, num)
+ local w = assert(map_action[action], "bad action name `"..action.."'")
+ wputxw(w * 0x10000 + (val or 0))
+ if a then actargs[#actargs+1] = a end
+ if a or num then secpos = secpos + (num or 1) end
+end
+
+-- Flush action list (intervening C code or buffer pos overflow).
+local function wflush(term)
+ if #actlist == actargs[1] then return end -- Nothing to flush.
+ if not term then waction("STOP") end -- Terminate action list.
+ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
+ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
+ secpos = 1 -- The actionlist offset occupies a buffer position, too.
+end
+
+-- Put escaped word.
+local function wputw(n)
+ if n <= 0x000fffff then waction("ESC") end
+ wputxw(n)
+end
+
+-- Reserve position for word.
+local function wpos()
+ local pos = #actlist+1
+ actlist[pos] = ""
+ return pos
+end
+
+-- Store word to reserved position.
+local function wputpos(pos, n)
+ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
+ if n <= 0x000fffff then
+ insert(actlist, pos+1, n)
+ n = map_action.ESC * 0x10000
+ end
+ actlist[pos] = n
+end
+
+------------------------------------------------------------------------------
+
+-- Global label name -> global label number. With auto assignment on 1st use.
+local next_global = 20
+local map_global = setmetatable({}, { __index = function(t, name)
+ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
+ local n = next_global
+ if n > 2047 then werror("too many global labels") end
+ next_global = n + 1
+ t[name] = n
+ return n
+end})
+
+-- Dump global labels.
+local function dumpglobals(out, lvl)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("Global labels:\n")
+ for i=20,next_global-1 do
+ out:write(format(" %s\n", t[i]))
+ end
+ out:write("\n")
+end
+
+-- Write global label enum.
+local function writeglobals(out, prefix)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("enum {\n")
+ for i=20,next_global-1 do
+ out:write(" ", prefix, t[i], ",\n")
+ end
+ out:write(" ", prefix, "_MAX\n};\n")
+end
+
+-- Write global label names.
+local function writeglobalnames(out, name)
+ local t = {}
+ for name, n in pairs(map_global) do t[n] = name end
+ out:write("static const char *const ", name, "[] = {\n")
+ for i=20,next_global-1 do
+ out:write(" \"", t[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Extern label name -> extern label number. With auto assignment on 1st use.
+local next_extern = 0
+local map_extern_ = {}
+local map_extern = setmetatable({}, { __index = function(t, name)
+ -- No restrictions on the name for now.
+ local n = next_extern
+ if n > 2047 then werror("too many extern labels") end
+ next_extern = n + 1
+ t[name] = n
+ map_extern_[n] = name
+ return n
+end})
+
+-- Dump extern labels.
+local function dumpexterns(out, lvl)
+ out:write("Extern labels:\n")
+ for i=0,next_extern-1 do
+ out:write(format(" %s\n", map_extern_[i]))
+ end
+ out:write("\n")
+end
+
+-- Write extern label names.
+local function writeexternnames(out, name)
+ out:write("static const char *const ", name, "[] = {\n")
+ for i=0,next_extern-1 do
+ out:write(" \"", map_extern_[i], "\",\n")
+ end
+ out:write(" (const char *)0\n};\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Arch-specific maps.
+
+-- Ext. register name -> int. name.
+local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", }
+
+-- Int. register name -> ext. name.
+local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", }
+
+local map_type = {} -- Type name -> { ctype, reg }
+local ctypenum = 0 -- Type number (for Dt... macros).
+
+-- Reverse defines for registers.
+function _M.revdef(s)
+ return map_reg_rev[s] or s
+end
+
+local map_shift = { lsl = 0, lsr = 1, asr = 2, }
+
+local map_extend = {
+ uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3,
+ sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7,
+}
+
+local map_cond = {
+ eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
+ hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
+ hs = 2, lo = 3,
+}
+
+------------------------------------------------------------------------------
+
+local parse_reg_type
+
+local function parse_reg(expr, shift, no_vreg)
+ if not expr then werror("expected register name") end
+ local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$")
+ if not tname then
+ tname, ovreg = match(expr, "^([%w_]+):(R[xwqdshb]%b())$")
+ end
+ local tp = map_type[tname or expr]
+ if tp then
+ local reg = ovreg or tp.reg
+ if not reg then
+ werror("type `"..(tname or expr).."' needs a register override")
+ end
+ expr = reg
+ end
+ local ok31, rt, r = match(expr, "^(@?)([xwqdshb])([123]?[0-9])$")
+ if r then
+ r = tonumber(r)
+ if r <= 30 or (r == 31 and ok31 ~= "" or (rt ~= "w" and rt ~= "x")) then
+ if not parse_reg_type then
+ parse_reg_type = rt
+ elseif parse_reg_type ~= rt then
+ werror("register size mismatch")
+ end
+ return shl(r, shift), tp
+ end
+ end
+ local vrt, vreg = match(expr, "^R([xwqdshb])(%b())$")
+ if vreg then
+ if not parse_reg_type then
+ parse_reg_type = vrt
+ elseif parse_reg_type ~= vrt then
+ werror("register size mismatch")
+ end
+ if not no_vreg then waction("VREG", shift, vreg) end
+ return 0
+ end
+ werror("bad register name `"..expr.."'")
+end
+
+local function parse_reg_base(expr)
+ if expr == "sp" then return 0x3e0 end
+ local base, tp = parse_reg(expr, 5)
+ if parse_reg_type ~= "x" then werror("bad register type") end
+ parse_reg_type = false
+ return base, tp
+end
+
+local parse_ctx = {}
+
+local loadenv = setfenv and function(s)
+ local code = loadstring(s, "")
+ if code then setfenv(code, parse_ctx) end
+ return code
+end or function(s)
+ return load(s, "", nil, parse_ctx)
+end
+
+-- Try to parse simple arithmetic, too, since some basic ops are aliases.
+local function parse_number(n)
+ local x = tonumber(n)
+ if x then return x end
+ local code = loadenv("return "..n)
+ if code then
+ local ok, y = pcall(code)
+ if ok and type(y) == "number" then return y end
+ end
+ return nil
+end
+
+local function parse_imm(imm, bits, shift, scale, signed)
+ imm = match(imm, "^#(.*)$")
+ if not imm then werror("expected immediate operand") end
+ local n = parse_number(imm)
+ if n then
+ local m = sar(n, scale)
+ if shl(m, scale) == n then
+ if signed then
+ local s = sar(m, bits-1)
+ if s == 0 then return shl(m, shift)
+ elseif s == -1 then return shl(m + shl(1, bits), shift) end
+ else
+ if sar(m, bits) == 0 then return shl(m, shift) end
+ end
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+ waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
+ return 0
+ end
+end
+
+local function parse_imm12(imm)
+ imm = match(imm, "^#(.*)$")
+ if not imm then werror("expected immediate operand") end
+ local n = parse_number(imm)
+ if n then
+ if shr(n, 12) == 0 then
+ return shl(n, 10)
+ elseif band(n, 0xff000fff) == 0 then
+ return shr(n, 2) + 0x00400000
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+ waction("IMM12", 0, imm)
+ return 0
+ end
+end
+
+local function parse_imm13(imm)
+ imm = match(imm, "^#(.*)$")
+ if not imm then werror("expected immediate operand") end
+ local n = parse_number(imm)
+ local r64 = parse_reg_type == "x"
+ if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then
+ local inv = false
+ if band(n, 1) == 1 then n = bit.bnot(n); inv = true end
+ local t = {}
+ for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end
+ local b = table.concat(t)
+ b = b..(r64 and (inv and "1" or "0"):rep(32) or b)
+ local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)")
+ if p0 then
+ local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a
+ if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then
+ local s = band(-2*w, 0x3f) - 1
+ if w == 64 then s = s + 0x1000 end
+ if inv then
+ return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10)
+ else
+ return shl(w-#p0, 16) + shl(s+#p1, 10)
+ end
+ end
+ end
+ werror("out of range immediate `"..imm.."'")
+ elseif r64 then
+ waction("IMM13X", 0, format("(unsigned int)(%s)", imm))
+ actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm)
+ return 0
+ else
+ waction("IMM13W", 0, imm)
+ return 0
+ end
+end
+
+local function parse_imm6(imm)
+ imm = match(imm, "^#(.*)$")
+ if not imm then werror("expected immediate operand") end
+ local n = parse_number(imm)
+ if n then
+ if n >= 0 and n <= 63 then
+ return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0)
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+ waction("IMM6", 0, imm)
+ return 0
+ end
+end
+
+local function parse_imm_load(imm, scale)
+ local n = parse_number(imm)
+ if n then
+ local m = sar(n, scale)
+ if shl(m, scale) == n and m >= 0 and m < 0x1000 then
+ return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset.
+ elseif n >= -256 and n < 256 then
+ return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset.
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+ waction("IMML", scale, imm)
+ return 0
+ end
+end
+
+local function parse_fpimm(imm)
+ imm = match(imm, "^#(.*)$")
+ if not imm then werror("expected immediate operand") end
+ local n = parse_number(imm)
+ if n then
+ local m, e = math.frexp(n)
+ local s, e2 = 0, band(e-2, 7)
+ if m < 0 then m = -m; s = 0x00100000 end
+ m = m*32-16
+ if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then
+ return s + shl(e2, 17) + shl(m, 13)
+ end
+ werror("out of range immediate `"..imm.."'")
+ else
+ werror("NYI fpimm action")
+ end
+end
+
+local function parse_shift(expr)
+ local s, s2 = match(expr, "^(%S+)%s*(.*)$")
+ s = map_shift[s]
+ if not s then werror("expected shift operand") end
+ return parse_imm(s2, 6, 10, 0, false) + shl(s, 22)
+end
+
+local function parse_lslx16(expr)
+ local n = match(expr, "^lsl%s*#(%d+)$")
+ n = tonumber(n)
+ if not n then werror("expected shift operand") end
+ if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then
+ werror("bad shift amount")
+ end
+ return shl(n, 17)
+end
+
+local function parse_extend(expr)
+ local s, s2 = match(expr, "^(%S+)%s*(.*)$")
+ if s == "lsl" then
+ s = parse_reg_type == "x" and 3 or 2
+ else
+ s = map_extend[s]
+ end
+ if not s then werror("expected extend operand") end
+ return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13)
+end
+
+local function parse_cond(expr, inv)
+ local c = map_cond[expr]
+ if not c then werror("expected condition operand") end
+ return shl(bit.bxor(c, inv), 12)
+end
+
+local function parse_load(params, nparams, n, op)
+ if params[n+2] then werror("too many operands") end
+ local scale = shr(op, 30)
+ local pn, p2 = params[n], params[n+1]
+ local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+ if not p1 then
+ if not p2 then
+ local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
+ if reg and tailr ~= "" then
+ local base, tp = parse_reg_base(reg)
+ if tp then
+ waction("IMML", scale, format(tp.ctypefmt, tailr))
+ return op + base
+ end
+ end
+ end
+ werror("expected address operand")
+ end
+ if p2 then
+ if wb == "!" then werror("bad use of '!'") end
+ op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
+ elseif wb == "!" then
+ local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
+ if not p1a then werror("bad use of '!'") end
+ op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00
+ else
+ local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$")
+ op = op + parse_reg_base(p1a)
+ if p2a ~= "" then
+ local imm = match(p2a, "^,%s*#(.*)$")
+ if imm then
+ op = op + parse_imm_load(imm, scale)
+ else
+ local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
+ op = op + parse_reg(p2b, 16) + 0x00200800
+ if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
+ werror("bad index register type")
+ end
+ if p3b == "" then
+ if parse_reg_type ~= "x" then werror("bad index register type") end
+ op = op + 0x6000
+ else
+ if p3s == "" or p3s == "#0" then
+ elseif p3s == "#"..scale then
+ op = op + 0x1000
+ else
+ werror("bad scale")
+ end
+ if parse_reg_type == "x" then
+ if p3b == "lsl" and p3s ~= "" then op = op + 0x6000
+ elseif p3b == "sxtx" then op = op + 0xe000
+ else
+ werror("bad extend/shift specifier")
+ end
+ else
+ if p3b == "uxtw" then op = op + 0x4000
+ elseif p3b == "sxtw" then op = op + 0xc000
+ else
+ werror("bad extend/shift specifier")
+ end
+ end
+ end
+ end
+ else
+ if wb == "!" then werror("bad use of '!'") end
+ op = op + 0x01000000
+ end
+ end
+ return op
+end
+
+local function parse_load_pair(params, nparams, n, op)
+ if params[n+2] then werror("too many operands") end
+ local pn, p2 = params[n], params[n+1]
+ local scale = shr(op, 30) == 0 and 2 or 3
+ local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
+ if not p1 then
+ if not p2 then
+ local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
+ if reg and tailr ~= "" then
+ local base, tp = parse_reg_base(reg)
+ if tp then
+ waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr))
+ return op + base + 0x01000000
+ end
+ end
+ end
+ werror("expected address operand")
+ end
+ if p2 then
+ if wb == "!" then werror("bad use of '!'") end
+ op = op + 0x00800000
+ else
+ local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
+ if p1a then p1, p2 = p1a, p2a else p2 = "#0" end
+ op = op + (wb == "!" and 0x01800000 or 0x01000000)
+ end
+ return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true)
+end
+
+local function parse_label(label, def)
+ local prefix = label:sub(1, 2)
+ -- =>label (pc label reference)
+ if prefix == "=>" then
+ return "PC", 0, label:sub(3)
+ end
+ -- ->name (global label reference)
+ if prefix == "->" then
+ return "LG", map_global[label:sub(3)]
+ end
+ if def then
+ -- [1-9] (local label definition)
+ if match(label, "^[1-9]$") then
+ return "LG", 10+tonumber(label)
+ end
+ else
+ -- [<>][1-9] (local label reference)
+ local dir, lnum = match(label, "^([<>])([1-9])$")
+ if dir then -- Fwd: 1-9, Bkwd: 11-19.
+ return "LG", lnum + (dir == ">" and 0 or 10)
+ end
+ -- extern label (extern label reference)
+ local extname = match(label, "^extern%s+(%S+)$")
+ if extname then
+ return "EXT", map_extern[extname]
+ end
+ -- &expr (pointer)
+ if label:sub(1, 1) == "&" then
+ return "A", 0, format("(ptrdiff_t)(%s)", label:sub(2))
+ end
+ end
+end
+
+local function branch_type(op)
+ if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL
+ elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or
+ band(op, 0x3b000000) == 0x18000000 then
+ return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal
+ elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ
+ elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR
+ elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP
+ else
+ assert(false, "unknown branch type")
+ end
+end
+
+------------------------------------------------------------------------------
+
+local map_op, op_template
+
+local function op_alias(opname, f)
+ return function(params, nparams)
+ if not params then return "-> "..opname:sub(1, -3) end
+ f(params, nparams)
+ op_template(params, map_op[opname], nparams)
+ end
+end
+
+local function alias_bfx(p)
+ p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1"
+end
+
+local function alias_bfiz(p)
+ parse_reg(p[1], 0, true)
+ if parse_reg_type == "w" then
+ p[3] = "#(32-("..p[3]:sub(2).."))%32"
+ p[4] = "#("..p[4]:sub(2)..")-1"
+ else
+ p[3] = "#(64-("..p[3]:sub(2).."))%64"
+ p[4] = "#("..p[4]:sub(2)..")-1"
+ end
+end
+
+local alias_lslimm = op_alias("ubfm_4", function(p)
+ parse_reg(p[1], 0, true)
+ local sh = p[3]:sub(2)
+ if parse_reg_type == "w" then
+ p[3] = "#(32-("..sh.."))%32"
+ p[4] = "#31-("..sh..")"
+ else
+ p[3] = "#(64-("..sh.."))%64"
+ p[4] = "#63-("..sh..")"
+ end
+end)
+
+-- Template strings for ARM instructions.
+map_op = {
+ -- Basic data processing instructions.
+ add_3 = "0b000000DNMg|11000000pDpNIg|8b206000pDpNMx",
+ add_4 = "0b000000DNMSg|0b200000DNMXg|8b200000pDpNMXx|8b200000pDpNxMwX",
+ adds_3 = "2b000000DNMg|31000000DpNIg|ab206000DpNMx",
+ adds_4 = "2b000000DNMSg|2b200000DNMXg|ab200000DpNMXx|ab200000DpNxMwX",
+ cmn_2 = "2b00001fNMg|3100001fpNIg|ab20601fpNMx",
+ cmn_3 = "2b00001fNMSg|2b20001fNMXg|ab20001fpNMXx|ab20001fpNxMwX",
+
+ sub_3 = "4b000000DNMg|51000000pDpNIg|cb206000pDpNMx",
+ sub_4 = "4b000000DNMSg|4b200000DNMXg|cb200000pDpNMXx|cb200000pDpNxMwX",
+ subs_3 = "6b000000DNMg|71000000DpNIg|eb206000DpNMx",
+ subs_4 = "6b000000DNMSg|6b200000DNMXg|eb200000DpNMXx|eb200000DpNxMwX",
+ cmp_2 = "6b00001fNMg|7100001fpNIg|eb20601fpNMx",
+ cmp_3 = "6b00001fNMSg|6b20001fNMXg|eb20001fpNMXx|eb20001fpNxMwX",
+
+ neg_2 = "4b0003e0DMg",
+ neg_3 = "4b0003e0DMSg",
+ negs_2 = "6b0003e0DMg",
+ negs_3 = "6b0003e0DMSg",
+
+ adc_3 = "1a000000DNMg",
+ adcs_3 = "3a000000DNMg",
+ sbc_3 = "5a000000DNMg",
+ sbcs_3 = "7a000000DNMg",
+ ngc_2 = "5a0003e0DMg",
+ ngcs_2 = "7a0003e0DMg",
+
+ and_3 = "0a000000DNMg|12000000pDNig",
+ and_4 = "0a000000DNMSg",
+ orr_3 = "2a000000DNMg|32000000pDNig",
+ orr_4 = "2a000000DNMSg",
+ eor_3 = "4a000000DNMg|52000000pDNig",
+ eor_4 = "4a000000DNMSg",
+ ands_3 = "6a000000DNMg|72000000DNig",
+ ands_4 = "6a000000DNMSg",
+ tst_2 = "6a00001fNMg|7200001fNig",
+ tst_3 = "6a00001fNMSg",
+
+ bic_3 = "0a200000DNMg",
+ bic_4 = "0a200000DNMSg",
+ orn_3 = "2a200000DNMg",
+ orn_4 = "2a200000DNMSg",
+ eon_3 = "4a200000DNMg",
+ eon_4 = "4a200000DNMSg",
+ bics_3 = "6a200000DNMg",
+ bics_4 = "6a200000DNMSg",
+
+ movn_2 = "12800000DWg",
+ movn_3 = "12800000DWRg",
+ movz_2 = "52800000DWg",
+ movz_3 = "52800000DWRg",
+ movk_2 = "72800000DWg",
+ movk_3 = "72800000DWRg",
+
+ -- TODO: this doesn't cover all valid immediates for mov reg, #imm.
+ mov_2 = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg",
+ mov_3 = "2a0003e0DMSg",
+ mvn_2 = "2a2003e0DMg",
+ mvn_3 = "2a2003e0DMSg",
+
+ adr_2 = "10000000DBx",
+ adrp_2 = "90000000DBx",
+
+ csel_4 = "1a800000DNMCg",
+ csinc_4 = "1a800400DNMCg",
+ csinv_4 = "5a800000DNMCg",
+ csneg_4 = "5a800400DNMCg",
+ cset_2 = "1a9f07e0Dcg",
+ csetm_2 = "5a9f03e0Dcg",
+ cinc_3 = "1a800400DNmcg",
+ cinv_3 = "5a800000DNmcg",
+ cneg_3 = "5a800400DNmcg",
+
+ ccmn_4 = "3a400000NMVCg|3a400800N5VCg",
+ ccmp_4 = "7a400000NMVCg|7a400800N5VCg",
+
+ madd_4 = "1b000000DNMAg",
+ msub_4 = "1b008000DNMAg",
+ mul_3 = "1b007c00DNMg",
+ mneg_3 = "1b00fc00DNMg",
+
+ smaddl_4 = "9b200000DxNMwAx",
+ smsubl_4 = "9b208000DxNMwAx",
+ smull_3 = "9b207c00DxNMw",
+ smnegl_3 = "9b20fc00DxNMw",
+ smulh_3 = "9b407c00DNMx",
+ umaddl_4 = "9ba00000DxNMwAx",
+ umsubl_4 = "9ba08000DxNMwAx",
+ umull_3 = "9ba07c00DxNMw",
+ umnegl_3 = "9ba0fc00DxNMw",
+ umulh_3 = "9bc07c00DNMx",
+
+ udiv_3 = "1ac00800DNMg",
+ sdiv_3 = "1ac00c00DNMg",
+
+ -- Bit operations.
+ sbfm_4 = "13000000DN12w|93400000DN12x",
+ bfm_4 = "33000000DN12w|b3400000DN12x",
+ ubfm_4 = "53000000DN12w|d3400000DN12x",
+ extr_4 = "13800000DNM2w|93c00000DNM2x",
+
+ sxtb_2 = "13001c00DNw|93401c00DNx",
+ sxth_2 = "13003c00DNw|93403c00DNx",
+ sxtw_2 = "93407c00DxNw",
+ uxtb_2 = "53001c00DNw",
+ uxth_2 = "53003c00DNw",
+
+ sbfx_4 = op_alias("sbfm_4", alias_bfx),
+ bfxil_4 = op_alias("bfm_4", alias_bfx),
+ ubfx_4 = op_alias("ubfm_4", alias_bfx),
+ sbfiz_4 = op_alias("sbfm_4", alias_bfiz),
+ bfi_4 = op_alias("bfm_4", alias_bfiz),
+ ubfiz_4 = op_alias("ubfm_4", alias_bfiz),
+
+ lsl_3 = function(params, nparams)
+ if params and params[3]:byte() == 35 then
+ return alias_lslimm(params, nparams)
+ else
+ return op_template(params, "1ac02000DNMg", nparams)
+ end
+ end,
+ lsr_3 = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x",
+ asr_3 = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x",
+ ror_3 = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x",
+
+ clz_2 = "5ac01000DNg",
+ cls_2 = "5ac01400DNg",
+ rbit_2 = "5ac00000DNg",
+ rev_2 = "5ac00800DNw|dac00c00DNx",
+ rev16_2 = "5ac00400DNg",
+ rev32_2 = "dac00800DNx",
+
+ -- Loads and stores.
+ ["strb_*"] = "38000000DwL",
+ ["ldrb_*"] = "38400000DwL",
+ ["ldrsb_*"] = "38c00000DwL|38800000DxL",
+ ["strh_*"] = "78000000DwL",
+ ["ldrh_*"] = "78400000DwL",
+ ["ldrsh_*"] = "78c00000DwL|78800000DxL",
+ ["str_*"] = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL",
+ ["ldr_*"] = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL",
+ ["ldrsw_*"] = "98000000DxB|b8800000DxL",
+ -- NOTE: ldur etc. are handled by ldr et al.
+
+ ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP",
+ ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP",
+ ["ldpsw_*"] = "68400000DAxP",
+
+ -- Branches.
+ b_1 = "14000000B",
+ bl_1 = "94000000B",
+ blr_1 = "d63f0000Nx",
+ br_1 = "d61f0000Nx",
+ ret_0 = "d65f03c0",
+ ret_1 = "d65f0000Nx",
+ -- b.cond is added below.
+ cbz_2 = "34000000DBg",
+ cbnz_2 = "35000000DBg",
+ tbz_3 = "36000000DTBw|36000000DTBx",
+ tbnz_3 = "37000000DTBw|37000000DTBx",
+
+ -- Miscellaneous instructions.
+ -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
+ -- TODO: sys, sysl, ic, dc, at, tlbi
+ -- TODO: hint, yield, wfe, wfi, sev, sevl
+ -- TODO: clrex, dsb, dmb, isb
+ nop_0 = "d503201f",
+ brk_0 = "d4200000",
+ brk_1 = "d4200000W",
+
+ -- Floating point instructions.
+ fmov_2 = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf",
+ fabs_2 = "1e20c000DNf",
+ fneg_2 = "1e214000DNf",
+ fsqrt_2 = "1e21c000DNf",
+
+ fcvt_2 = "1e22c000DdNs|1e624000DsNd",
+
+ -- TODO: half-precision and fixed-point conversions.
+ fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd",
+ fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd",
+ fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd",
+ fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd",
+ fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd",
+ fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd",
+ fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd",
+ fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd",
+ fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd",
+ fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd",
+
+ scvtf_2 = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx",
+ ucvtf_2 = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx",
+
+ frintn_2 = "1e244000DNf",
+ frintp_2 = "1e24c000DNf",
+ frintm_2 = "1e254000DNf",
+ frintz_2 = "1e25c000DNf",
+ frinta_2 = "1e264000DNf",
+ frintx_2 = "1e274000DNf",
+ frinti_2 = "1e27c000DNf",
+
+ fadd_3 = "1e202800DNMf",
+ fsub_3 = "1e203800DNMf",
+ fmul_3 = "1e200800DNMf",
+ fnmul_3 = "1e208800DNMf",
+ fdiv_3 = "1e201800DNMf",
+
+ fmadd_4 = "1f000000DNMAf",
+ fmsub_4 = "1f008000DNMAf",
+ fnmadd_4 = "1f200000DNMAf",
+ fnmsub_4 = "1f208000DNMAf",
+
+ fmax_3 = "1e204800DNMf",
+ fmaxnm_3 = "1e206800DNMf",
+ fmin_3 = "1e205800DNMf",
+ fminnm_3 = "1e207800DNMf",
+
+ fcmp_2 = "1e202000NMf|1e202008NZf",
+ fcmpe_2 = "1e202010NMf|1e202018NZf",
+
+ fccmp_4 = "1e200400NMVCf",
+ fccmpe_4 = "1e200410NMVCf",
+
+ fcsel_4 = "1e200c00DNMCf",
+
+ -- TODO: crc32*, aes*, sha*, pmull
+ -- TODO: SIMD instructions.
+}
+
+for cond,c in pairs(map_cond) do
+ map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
+end
+
+------------------------------------------------------------------------------
+
+-- Handle opcodes defined with template strings.
+local function parse_template(params, template, nparams, pos)
+ local op = tonumber(template:sub(1, 8), 16)
+ local n = 1
+ local rtt = {}
+
+ parse_reg_type = false
+
+ -- Process each character.
+ for p in gmatch(template:sub(9), ".") do
+ local q = params[n]
+ if p == "D" then
+ op = op + parse_reg(q, 0); n = n + 1
+ elseif p == "N" then
+ op = op + parse_reg(q, 5); n = n + 1
+ elseif p == "M" then
+ op = op + parse_reg(q, 16); n = n + 1
+ elseif p == "A" then
+ op = op + parse_reg(q, 10); n = n + 1
+ elseif p == "m" then
+ op = op + parse_reg(params[n-1], 16)
+
+ elseif p == "p" then
+ if q == "sp" then params[n] = "@x31" end
+ elseif p == "g" then
+ if parse_reg_type == "x" then
+ op = op + 0x80000000
+ elseif parse_reg_type ~= "w" then
+ werror("bad register type")
+ end
+ parse_reg_type = false
+ elseif p == "f" then
+ if parse_reg_type == "d" then
+ op = op + 0x00400000
+ elseif parse_reg_type ~= "s" then
+ werror("bad register type")
+ end
+ parse_reg_type = false
+ elseif p == "x" or p == "w" or p == "d" or p == "s" then
+ if parse_reg_type ~= p then
+ werror("register size mismatch")
+ end
+ parse_reg_type = false
+
+ elseif p == "L" then
+ op = parse_load(params, nparams, n, op)
+ elseif p == "P" then
+ op = parse_load_pair(params, nparams, n, op)
+
+ elseif p == "B" then
+ local mode, v, s = parse_label(q, false); n = n + 1
+ if not mode then werror("bad label `"..q.."'") end
+ local m = branch_type(op)
+ if mode == "A" then
+ waction("REL_"..mode, v+m, format("(unsigned int)(%s)", s))
+ actargs[#actargs+1] = format("(unsigned int)((%s)>>32)", s)
+ else
+ waction("REL_"..mode, v+m, s, 1)
+ end
+
+ elseif p == "I" then
+ op = op + parse_imm12(q); n = n + 1
+ elseif p == "i" then
+ op = op + parse_imm13(q); n = n + 1
+ elseif p == "W" then
+ op = op + parse_imm(q, 16, 5, 0, false); n = n + 1
+ elseif p == "T" then
+ op = op + parse_imm6(q); n = n + 1
+ elseif p == "1" then
+ op = op + parse_imm(q, 6, 16, 0, false); n = n + 1
+ elseif p == "2" then
+ op = op + parse_imm(q, 6, 10, 0, false); n = n + 1
+ elseif p == "5" then
+ op = op + parse_imm(q, 5, 16, 0, false); n = n + 1
+ elseif p == "V" then
+ op = op + parse_imm(q, 4, 0, 0, false); n = n + 1
+ elseif p == "F" then
+ op = op + parse_fpimm(q); n = n + 1
+ elseif p == "Z" then
+ if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end
+ n = n + 1
+
+ elseif p == "S" then
+ op = op + parse_shift(q); n = n + 1
+ elseif p == "X" then
+ op = op + parse_extend(q); n = n + 1
+ elseif p == "R" then
+ op = op + parse_lslx16(q); n = n + 1
+ elseif p == "C" then
+ op = op + parse_cond(q, 0); n = n + 1
+ elseif p == "c" then
+ op = op + parse_cond(q, 1); n = n + 1
+
+ else
+ assert(false)
+ end
+ end
+ wputpos(pos, op)
+end
+
+function op_template(params, template, nparams)
+ if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
+
+ -- Limit number of section buffer positions used by a single dasm_put().
+ -- A single opcode needs a maximum of 4 positions.
+ if secpos+4 > maxsecpos then wflush() end
+ local pos = wpos()
+ local lpos, apos, spos = #actlist, #actargs, secpos
+
+ local ok, err
+ for t in gmatch(template, "[^|]+") do
+ ok, err = pcall(parse_template, params, t, nparams, pos)
+ if ok then return end
+ secpos = spos
+ actlist[lpos+1] = nil
+ actlist[lpos+2] = nil
+ actlist[lpos+3] = nil
+ actlist[lpos+4] = nil
+ actargs[apos+1] = nil
+ actargs[apos+2] = nil
+ actargs[apos+3] = nil
+ actargs[apos+4] = nil
+ end
+ error(err, 0)
+end
+
+map_op[".template__"] = op_template
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode to mark the position where the action list is to be emitted.
+map_op[".actionlist_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeactions(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the global enum is to be emitted.
+map_op[".globals_1"] = function(params)
+ if not params then return "prefix" end
+ local prefix = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeglobals(out, prefix) end)
+end
+
+-- Pseudo-opcode to mark the position where the global names are to be emitted.
+map_op[".globalnames_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeglobalnames(out, name) end)
+end
+
+-- Pseudo-opcode to mark the position where the extern names are to be emitted.
+map_op[".externnames_1"] = function(params)
+ if not params then return "cvar" end
+ local name = params[1] -- No syntax check. You get to keep the pieces.
+ wline(function(out) writeexternnames(out, name) end)
+end
+
+------------------------------------------------------------------------------
+
+-- Label pseudo-opcode (converted from trailing colon form).
+map_op[".label_1"] = function(params)
+ if not params then return "[1-9] | ->global | =>pcexpr" end
+ if secpos+1 > maxsecpos then wflush() end
+ local mode, n, s = parse_label(params[1], true)
+ if not mode or mode == "EXT" then werror("bad label definition") end
+ waction("LABEL_"..mode, n, s, 1)
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcodes for data storage.
+local function op_data(params)
+ if not params then return "imm..." end
+ local sz = params.op == ".long" and 4 or 8
+ for _,p in ipairs(params) do
+ local imm = parse_number(p)
+ if imm then
+ local n = tobit(imm)
+ if n == imm or (n < 0 and n + 2^32 == imm) then
+ wputw(n < 0 and n + 2^32 or n)
+ if sz == 8 then
+ wputw(imm < 0 and 0xffffffff or 0)
+ end
+ elseif sz == 4 then
+ werror("bad immediate `"..p.."'")
+ else
+ imm = nil
+ end
+ end
+ if not imm then
+ local mode, v, s = parse_label(p, false)
+ if sz == 4 then
+ if mode then werror("label does not fit into .long") end
+ waction("IMMV", 0, p)
+ elseif mode and mode ~= "A" then
+ waction("REL_"..mode, v+0x8000, s, 1)
+ else
+ if mode == "A" then p = s end
+ waction("IMMV", 0, format("(unsigned int)(%s)", p))
+ waction("IMMV", 0, format("(unsigned int)((unsigned long long)(%s)>>32)", p))
+ end
+ end
+ if secpos+2 > maxsecpos then wflush() end
+ end
+end
+map_op[".long_*"] = op_data
+map_op[".quad_*"] = op_data
+map_op[".addr_*"] = op_data
+
+-- Alignment pseudo-opcode.
+map_op[".align_1"] = function(params)
+ if not params then return "numpow2" end
+ if secpos+1 > maxsecpos then wflush() end
+ local align = tonumber(params[1])
+ if align then
+ local x = align
+ -- Must be a power of 2 in the range (2 ... 256).
+ for i=1,8 do
+ x = x / 2
+ if x == 1 then
+ waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
+ return
+ end
+ end
+ end
+ werror("bad alignment")
+end
+
+------------------------------------------------------------------------------
+
+-- Pseudo-opcode for (primitive) type definitions (map to C types).
+map_op[".type_3"] = function(params, nparams)
+ if not params then
+ return nparams == 2 and "name, ctype" or "name, ctype, reg"
+ end
+ local name, ctype, reg = params[1], params[2], params[3]
+ if not match(name, "^[%a_][%w_]*$") then
+ werror("bad type name `"..name.."'")
+ end
+ local tp = map_type[name]
+ if tp then
+ werror("duplicate type `"..name.."'")
+ end
+ -- Add #type to defines. A bit unclean to put it in map_archdef.
+ map_archdef["#"..name] = "sizeof("..ctype..")"
+ -- Add new type and emit shortcut define.
+ local num = ctypenum + 1
+ map_type[name] = {
+ ctype = ctype,
+ ctypefmt = format("Dt%X(%%s)", num),
+ reg = reg,
+ }
+ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
+ ctypenum = num
+end
+map_op[".type_2"] = map_op[".type_3"]
+
+-- Dump type definitions.
+local function dumptypes(out, lvl)
+ local t = {}
+ for name in pairs(map_type) do t[#t+1] = name end
+ sort(t)
+ out:write("Type definitions:\n")
+ for _,name in ipairs(t) do
+ local tp = map_type[name]
+ local reg = tp.reg or ""
+ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
+ end
+ out:write("\n")
+end
+
+------------------------------------------------------------------------------
+
+-- Set the current section.
+function _M.section(num)
+ waction("SECTION", num)
+ wflush(true) -- SECTION is a terminal action.
+end
+
+------------------------------------------------------------------------------
+
+-- Dump architecture description.
+function _M.dumparch(out)
+ out:write(format("DynASM %s version %s, released %s\n\n",
+ _info.arch, _info.version, _info.release))
+ dumpactions(out)
+end
+
+-- Dump all user defined elements.
+function _M.dumpdef(out, lvl)
+ dumptypes(out, lvl)
+ dumpglobals(out, lvl)
+ dumpexterns(out, lvl)
+end
+
+------------------------------------------------------------------------------
+
+-- Pass callbacks from/to the DynASM core.
+function _M.passcb(wl, we, wf, ww)
+ wline, werror, wfatal, wwarn = wl, we, wf, ww
+ return wflush
+end
+
+-- Setup the arch-specific module.
+function _M.setup(arch, opt)
+ g_arch, g_opt = arch, opt
+end
+
+-- Merge the core maps and the arch-specific maps.
+function _M.mergemaps(map_coreop, map_def)
+ setmetatable(map_op, { __index = map_coreop })
+ setmetatable(map_def, { __index = map_archdef })
+ return map_op, map_def
+end
+
+return _M
+
+------------------------------------------------------------------------------
+
diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h
index 15b4b137..3e99a005 100644
--- a/dynasm/dasm_mips.h
+++ b/dynasm/dasm_mips.h
@@ -21,7 +21,7 @@ enum {
/* The following actions need a buffer position. */
DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
/* The following actions also have an argument. */
- DASM_REL_PC, DASM_LABEL_PC, DASM_IMM,
+ DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS,
DASM__MAX
};
@@ -155,10 +155,10 @@ void dasm_setup(Dst_DECL, const void *actionlist)
#ifdef DASM_CHECKS
#define CK(x, st) \
do { if (!(x)) { \
- D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0)
+ D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0)
#define CKPL(kind, st) \
do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \
- D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0)
+ D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0)
#else
#define CK(x, st) ((void)0)
#define CKPL(kind, st) ((void)0)
@@ -231,7 +231,7 @@ void dasm_put(Dst_DECL, int start, ...)
*pl = -pos; /* Label exists now. */
b[pos++] = ofs; /* Store pass1 offset estimate. */
break;
- case DASM_IMM:
+ case DASM_IMM: case DASM_IMMS:
#ifdef DASM_CHECKS
CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I);
#endif
@@ -273,7 +273,7 @@ int dasm_link(Dst_DECL, size_t *szp)
{ /* Handle globals not defined in this translation unit. */
int idx;
- for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
int n = D->lglabels[idx];
/* Undefined label: Collapse rel chain and replace with marker (< 0). */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -299,7 +299,7 @@ int dasm_link(Dst_DECL, size_t *szp)
case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
case DASM_REL_LG: case DASM_REL_PC: pos++; break;
case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
- case DASM_IMM: pos++; break;
+ case DASM_IMM: case DASM_IMMS: pos++; break;
}
}
stop: (void)0;
@@ -314,7 +314,7 @@ int dasm_link(Dst_DECL, size_t *szp)
#ifdef DASM_CHECKS
#define CK(x, st) \
- do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0)
+ do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0)
#else
#define CK(x, st) ((void)0)
#endif
@@ -349,25 +349,32 @@ int dasm_encode(Dst_DECL, void *buffer)
ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
break;
case DASM_REL_LG:
- CK(n >= 0, UNDEF_LG);
+ if (n < 0) {
+ n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp);
+ goto patchrel;
+ }
/* fallthrough */
case DASM_REL_PC:
CK(n >= 0, UNDEF_PC);
n = *DASM_POS2PTR(D, n);
if (ins & 2048)
- n = n - (int)((char *)cp - base);
+ n = (n + (int)(size_t)base) & 0x0fffffff;
else
- n = (n + (int)base) & 0x0fffffff;
- patchrel:
+ n = n - (int)((char *)cp - base);
+ patchrel: {
+ unsigned int e = 16 + ((ins >> 12) & 15);
CK((n & 3) == 0 &&
- ((n + ((ins & 2048) ? 0x00020000 : 0)) >>
- ((ins & 2048) ? 18 : 28)) == 0, RANGE_REL);
- cp[-1] |= ((n>>2) & ((ins & 2048) ? 0x0000ffff: 0x03ffffff));
+ ((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0, RANGE_REL);
+ cp[-1] |= ((n>>2) & ((1<<e)-1));
+ }
break;
case DASM_LABEL_LG:
ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n);
break;
case DASM_LABEL_PC: break;
+ case DASM_IMMS:
+ cp[-1] |= ((n>>3) & 4); n &= 0x1f;
+ /* fallthrough */
case DASM_IMM:
cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
break;
@@ -410,7 +417,7 @@ int dasm_checkstep(Dst_DECL, int secmatch)
}
if (D->status == DASM_S_OK && secmatch >= 0 &&
D->section != &D->sections[secmatch])
- D->status = DASM_S_MATCH_SEC|(D->section-D->sections);
+ D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections);
return D->status;
}
#endif
diff --git a/dynasm/dasm_mips.lua b/dynasm/dasm_mips.lua
index dfa1f72e..6f893fe0 100644
--- a/dynasm/dasm_mips.lua
+++ b/dynasm/dasm_mips.lua
@@ -1,17 +1,20 @@
------------------------------------------------------------------------------
--- DynASM MIPS module.
+-- DynASM MIPS32/MIPS64 module.
--
-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
------------------------------------------------------------------------------
+local mips64 = mips64
+local mipsr6 = _map_def.MIPSR6
+
-- Module information:
local _info = {
- arch = "mips",
- description = "DynASM MIPS module",
- version = "1.3.0",
- vernum = 10300,
- release = "2012-01-23",
+ arch = mips64 and "mips64" or "mips",
+ description = "DynASM MIPS32/MIPS64 module",
+ version = "1.5.0",
+ vernum = 10500,
+ release = "2021-05-02",
author = "Mike Pall",
license = "MIT",
}
@@ -27,7 +30,8 @@ local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
local match, gmatch = _s.match, _s.gmatch
local concat, sort = table.concat, table.sort
local bit = bit or require("bit")
-local band, shl, sar, tohex = bit.band, bit.lshift, bit.arshift, bit.tohex
+local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
+local tohex = bit.tohex
-- Inherited tables and callbacks.
local g_opt, g_arch
@@ -38,7 +42,7 @@ local wline, werror, wfatal, wwarn
local action_names = {
"STOP", "SECTION", "ESC", "REL_EXT",
"ALIGN", "REL_LG", "LABEL_LG",
- "REL_PC", "LABEL_PC", "IMM",
+ "REL_PC", "LABEL_PC", "IMM", "IMMS",
}
-- Maximum number of section buffer positions for dasm_put().
@@ -235,7 +239,6 @@ local map_op = {
bne_3 = "14000000STB",
blez_2 = "18000000SB",
bgtz_2 = "1c000000SB",
- addi_3 = "20000000TSI",
li_2 = "24000000TI",
addiu_3 = "24000000TSI",
slti_3 = "28000000TSI",
@@ -245,70 +248,52 @@ local map_op = {
ori_3 = "34000000TSU",
xori_3 = "38000000TSU",
lui_2 = "3c000000TU",
- beqzl_2 = "50000000SB",
- beql_3 = "50000000STB",
- bnezl_2 = "54000000SB",
- bnel_3 = "54000000STB",
- blezl_2 = "58000000SB",
- bgtzl_2 = "5c000000SB",
+ daddiu_3 = mips64 and "64000000TSI",
+ ldl_2 = mips64 and "68000000TO",
+ ldr_2 = mips64 and "6c000000TO",
lb_2 = "80000000TO",
lh_2 = "84000000TO",
- lwl_2 = "88000000TO",
lw_2 = "8c000000TO",
lbu_2 = "90000000TO",
lhu_2 = "94000000TO",
- lwr_2 = "98000000TO",
+ lwu_2 = mips64 and "9c000000TO",
sb_2 = "a0000000TO",
sh_2 = "a4000000TO",
- swl_2 = "a8000000TO",
sw_2 = "ac000000TO",
- swr_2 = "b8000000TO",
- cache_2 = "bc000000NO",
- ll_2 = "c0000000TO",
lwc1_2 = "c4000000HO",
- pref_2 = "cc000000NO",
ldc1_2 = "d4000000HO",
- sc_2 = "e0000000TO",
+ ld_2 = mips64 and "dc000000TO",
swc1_2 = "e4000000HO",
sdc1_2 = "f4000000HO",
+ sd_2 = mips64 and "fc000000TO",
-- Opcode SPECIAL.
nop_0 = "00000000",
sll_3 = "00000000DTA",
- movf_2 = "00000001DS",
- movf_3 = "00000001DSC",
- movt_2 = "00010001DS",
- movt_3 = "00010001DSC",
+ sextw_2 = "00000000DT",
srl_3 = "00000002DTA",
rotr_3 = "00200002DTA",
sra_3 = "00000003DTA",
sllv_3 = "00000004DTS",
srlv_3 = "00000006DTS",
rotrv_3 = "00000046DTS",
+ drotrv_3 = mips64 and "00000056DTS",
srav_3 = "00000007DTS",
- jr_1 = "00000008S",
jalr_1 = "0000f809S",
jalr_2 = "00000009DS",
- movz_3 = "0000000aDST",
- movn_3 = "0000000bDST",
syscall_0 = "0000000c",
syscall_1 = "0000000cY",
break_0 = "0000000d",
break_1 = "0000000dY",
sync_0 = "0000000f",
- mfhi_1 = "00000010D",
- mthi_1 = "00000011S",
- mflo_1 = "00000012D",
- mtlo_1 = "00000013S",
- mult_2 = "00000018ST",
- multu_2 = "00000019ST",
- div_2 = "0000001aST",
- divu_2 = "0000001bST",
+ dsllv_3 = mips64 and "00000014DTS",
+ dsrlv_3 = mips64 and "00000016DTS",
+ dsrav_3 = mips64 and "00000017DTS",
add_3 = "00000020DST",
- move_2 = "00000021DS",
+ move_2 = mips64 and "00000025DS" or "00000021DS",
addu_3 = "00000021DST",
sub_3 = "00000022DST",
- negu_2 = "00000023DT",
+ negu_2 = mips64 and "0000002fDT" or "00000023DT",
subu_3 = "00000023DST",
and_3 = "00000024DST",
or_3 = "00000025DST",
@@ -317,6 +302,10 @@ local map_op = {
nor_3 = "00000027DST",
slt_3 = "0000002aDST",
sltu_3 = "0000002bDST",
+ dadd_3 = mips64 and "0000002cDST",
+ daddu_3 = mips64 and "0000002dDST",
+ dsub_3 = mips64 and "0000002eDST",
+ dsubu_3 = mips64 and "0000002fDST",
tge_2 = "00000030ST",
tge_3 = "00000030STZ",
tgeu_2 = "00000031ST",
@@ -329,40 +318,36 @@ local map_op = {
teq_3 = "00000034STZ",
tne_2 = "00000036ST",
tne_3 = "00000036STZ",
+ dsll_3 = mips64 and "00000038DTa",
+ dsrl_3 = mips64 and "0000003aDTa",
+ drotr_3 = mips64 and "0020003aDTa",
+ dsra_3 = mips64 and "0000003bDTa",
+ dsll32_3 = mips64 and "0000003cDTA",
+ dsrl32_3 = mips64 and "0000003eDTA",
+ drotr32_3 = mips64 and "0020003eDTA",
+ dsra32_3 = mips64 and "0000003fDTA",
-- Opcode REGIMM.
bltz_2 = "04000000SB",
bgez_2 = "04010000SB",
bltzl_2 = "04020000SB",
bgezl_2 = "04030000SB",
- tgei_2 = "04080000SI",
- tgeiu_2 = "04090000SI",
- tlti_2 = "040a0000SI",
- tltiu_2 = "040b0000SI",
- teqi_2 = "040c0000SI",
- tnei_2 = "040e0000SI",
- bltzal_2 = "04100000SB",
bal_1 = "04110000B",
- bgezal_2 = "04110000SB",
- bltzall_2 = "04120000SB",
- bgezall_2 = "04130000SB",
synci_1 = "041f0000O",
- -- Opcode SPECIAL2.
- madd_2 = "70000000ST",
- maddu_2 = "70000001ST",
- mul_3 = "70000002DST",
- msub_2 = "70000004ST",
- msubu_2 = "70000005ST",
- clz_2 = "70000020DS=",
- clo_2 = "70000021DS=",
- sdbbp_0 = "7000003f",
- sdbbp_1 = "7000003fY",
-
-- Opcode SPECIAL3.
ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1
+ dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32
+ dextu_4 = mips64 and "7c000002TSAM", -- Args: pos-32 | size-1
+ dext_4 = mips64 and "7c000003TSAM", -- Args: pos | size-1
+ zextw_2 = mips64 and "7c00f803TS",
ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1
+ dinsm_4 = mips64 and "7c000005TSAM", -- Args: pos | pos+size-33
+ dinsu_4 = mips64 and "7c000006TSAM", -- Args: pos-32 | pos+size-33
+ dins_4 = mips64 and "7c000007TSAM", -- Args: pos | pos+size-1
wsbh_2 = "7c0000a0DT",
+ dsbh_2 = mips64 and "7c0000a4DT",
+ dshd_2 = mips64 and "7c000164DT",
seb_2 = "7c000420DT",
seh_2 = "7c000620DT",
rdhwr_2 = "7c00003bTD",
@@ -370,8 +355,12 @@ local map_op = {
-- Opcode COP0.
mfc0_2 = "40000000TD",
mfc0_3 = "40000000TDW",
+ dmfc0_2 = mips64 and "40200000TD",
+ dmfc0_3 = mips64 and "40200000TDW",
mtc0_2 = "40800000TD",
mtc0_3 = "40800000TDW",
+ dmtc0_2 = mips64 and "40a00000TD",
+ dmtc0_3 = mips64 and "40a00000TDW",
rdpgpr_2 = "41400000DT",
di_0 = "41606000",
di_1 = "41606000T",
@@ -388,21 +377,14 @@ local map_op = {
-- Opcode COP1.
mfc1_2 = "44000000TG",
+ dmfc1_2 = mips64 and "44200000TG",
cfc1_2 = "44400000TG",
mfhc1_2 = "44600000TG",
mtc1_2 = "44800000TG",
+ dmtc1_2 = mips64 and "44a00000TG",
ctc1_2 = "44c00000TG",
mthc1_2 = "44e00000TG",
- bc1f_1 = "45000000B",
- bc1f_2 = "45000000CB",
- bc1t_1 = "45010000B",
- bc1t_2 = "45010000CB",
- bc1fl_1 = "45020000B",
- bc1fl_2 = "45020000CB",
- bc1tl_1 = "45030000B",
- bc1tl_2 = "45030000CB",
-
["add.s_3"] = "46000000FGH",
["sub.s_3"] = "46000001FGH",
["mul.s_3"] = "46000002FGH",
@@ -419,51 +401,11 @@ local map_op = {
["trunc.w.s_2"] = "4600000dFG",
["ceil.w.s_2"] = "4600000eFG",
["floor.w.s_2"] = "4600000fFG",
- ["movf.s_2"] = "46000011FG",
- ["movf.s_3"] = "46000011FGC",
- ["movt.s_2"] = "46010011FG",
- ["movt.s_3"] = "46010011FGC",
- ["movz.s_3"] = "46000012FGT",
- ["movn.s_3"] = "46000013FGT",
["recip.s_2"] = "46000015FG",
["rsqrt.s_2"] = "46000016FG",
["cvt.d.s_2"] = "46000021FG",
["cvt.w.s_2"] = "46000024FG",
["cvt.l.s_2"] = "46000025FG",
- ["cvt.ps.s_3"] = "46000026FGH",
- ["c.f.s_2"] = "46000030GH",
- ["c.f.s_3"] = "46000030VGH",
- ["c.un.s_2"] = "46000031GH",
- ["c.un.s_3"] = "46000031VGH",
- ["c.eq.s_2"] = "46000032GH",
- ["c.eq.s_3"] = "46000032VGH",
- ["c.ueq.s_2"] = "46000033GH",
- ["c.ueq.s_3"] = "46000033VGH",
- ["c.olt.s_2"] = "46000034GH",
- ["c.olt.s_3"] = "46000034VGH",
- ["c.ult.s_2"] = "46000035GH",
- ["c.ult.s_3"] = "46000035VGH",
- ["c.ole.s_2"] = "46000036GH",
- ["c.ole.s_3"] = "46000036VGH",
- ["c.ule.s_2"] = "46000037GH",
- ["c.ule.s_3"] = "46000037VGH",
- ["c.sf.s_2"] = "46000038GH",
- ["c.sf.s_3"] = "46000038VGH",
- ["c.ngle.s_2"] = "46000039GH",
- ["c.ngle.s_3"] = "46000039VGH",
- ["c.seq.s_2"] = "4600003aGH",
- ["c.seq.s_3"] = "4600003aVGH",
- ["c.ngl.s_2"] = "4600003bGH",
- ["c.ngl.s_3"] = "4600003bVGH",
- ["c.lt.s_2"] = "4600003cGH",
- ["c.lt.s_3"] = "4600003cVGH",
- ["c.nge.s_2"] = "4600003dGH",
- ["c.nge.s_3"] = "4600003dVGH",
- ["c.le.s_2"] = "4600003eGH",
- ["c.le.s_3"] = "4600003eVGH",
- ["c.ngt.s_2"] = "4600003fGH",
- ["c.ngt.s_3"] = "4600003fVGH",
-
["add.d_3"] = "46200000FGH",
["sub.d_3"] = "46200001FGH",
["mul.d_3"] = "46200002FGH",
@@ -480,130 +422,410 @@ local map_op = {
["trunc.w.d_2"] = "4620000dFG",
["ceil.w.d_2"] = "4620000eFG",
["floor.w.d_2"] = "4620000fFG",
- ["movf.d_2"] = "46200011FG",
- ["movf.d_3"] = "46200011FGC",
- ["movt.d_2"] = "46210011FG",
- ["movt.d_3"] = "46210011FGC",
- ["movz.d_3"] = "46200012FGT",
- ["movn.d_3"] = "46200013FGT",
["recip.d_2"] = "46200015FG",
["rsqrt.d_2"] = "46200016FG",
["cvt.s.d_2"] = "46200020FG",
["cvt.w.d_2"] = "46200024FG",
["cvt.l.d_2"] = "46200025FG",
- ["c.f.d_2"] = "46200030GH",
- ["c.f.d_3"] = "46200030VGH",
- ["c.un.d_2"] = "46200031GH",
- ["c.un.d_3"] = "46200031VGH",
- ["c.eq.d_2"] = "46200032GH",
- ["c.eq.d_3"] = "46200032VGH",
- ["c.ueq.d_2"] = "46200033GH",
- ["c.ueq.d_3"] = "46200033VGH",
- ["c.olt.d_2"] = "46200034GH",
- ["c.olt.d_3"] = "46200034VGH",
- ["c.ult.d_2"] = "46200035GH",
- ["c.ult.d_3"] = "46200035VGH",
- ["c.ole.d_2"] = "46200036GH",
- ["c.ole.d_3"] = "46200036VGH",
- ["c.ule.d_2"] = "46200037GH",
- ["c.ule.d_3"] = "46200037VGH",
- ["c.sf.d_2"] = "46200038GH",
- ["c.sf.d_3"] = "46200038VGH",
- ["c.ngle.d_2"] = "46200039GH",
- ["c.ngle.d_3"] = "46200039VGH",
- ["c.seq.d_2"] = "4620003aGH",
- ["c.seq.d_3"] = "4620003aVGH",
- ["c.ngl.d_2"] = "4620003bGH",
- ["c.ngl.d_3"] = "4620003bVGH",
- ["c.lt.d_2"] = "4620003cGH",
- ["c.lt.d_3"] = "4620003cVGH",
- ["c.nge.d_2"] = "4620003dGH",
- ["c.nge.d_3"] = "4620003dVGH",
- ["c.le.d_2"] = "4620003eGH",
- ["c.le.d_3"] = "4620003eVGH",
- ["c.ngt.d_2"] = "4620003fGH",
- ["c.ngt.d_3"] = "4620003fVGH",
-
- ["add.ps_3"] = "46c00000FGH",
- ["sub.ps_3"] = "46c00001FGH",
- ["mul.ps_3"] = "46c00002FGH",
- ["abs.ps_2"] = "46c00005FG",
- ["mov.ps_2"] = "46c00006FG",
- ["neg.ps_2"] = "46c00007FG",
- ["movf.ps_2"] = "46c00011FG",
- ["movf.ps_3"] = "46c00011FGC",
- ["movt.ps_2"] = "46c10011FG",
- ["movt.ps_3"] = "46c10011FGC",
- ["movz.ps_3"] = "46c00012FGT",
- ["movn.ps_3"] = "46c00013FGT",
- ["cvt.s.pu_2"] = "46c00020FG",
- ["cvt.s.pl_2"] = "46c00028FG",
- ["pll.ps_3"] = "46c0002cFGH",
- ["plu.ps_3"] = "46c0002dFGH",
- ["pul.ps_3"] = "46c0002eFGH",
- ["puu.ps_3"] = "46c0002fFGH",
- ["c.f.ps_2"] = "46c00030GH",
- ["c.f.ps_3"] = "46c00030VGH",
- ["c.un.ps_2"] = "46c00031GH",
- ["c.un.ps_3"] = "46c00031VGH",
- ["c.eq.ps_2"] = "46c00032GH",
- ["c.eq.ps_3"] = "46c00032VGH",
- ["c.ueq.ps_2"] = "46c00033GH",
- ["c.ueq.ps_3"] = "46c00033VGH",
- ["c.olt.ps_2"] = "46c00034GH",
- ["c.olt.ps_3"] = "46c00034VGH",
- ["c.ult.ps_2"] = "46c00035GH",
- ["c.ult.ps_3"] = "46c00035VGH",
- ["c.ole.ps_2"] = "46c00036GH",
- ["c.ole.ps_3"] = "46c00036VGH",
- ["c.ule.ps_2"] = "46c00037GH",
- ["c.ule.ps_3"] = "46c00037VGH",
- ["c.sf.ps_2"] = "46c00038GH",
- ["c.sf.ps_3"] = "46c00038VGH",
- ["c.ngle.ps_2"] = "46c00039GH",
- ["c.ngle.ps_3"] = "46c00039VGH",
- ["c.seq.ps_2"] = "46c0003aGH",
- ["c.seq.ps_3"] = "46c0003aVGH",
- ["c.ngl.ps_2"] = "46c0003bGH",
- ["c.ngl.ps_3"] = "46c0003bVGH",
- ["c.lt.ps_2"] = "46c0003cGH",
- ["c.lt.ps_3"] = "46c0003cVGH",
- ["c.nge.ps_2"] = "46c0003dGH",
- ["c.nge.ps_3"] = "46c0003dVGH",
- ["c.le.ps_2"] = "46c0003eGH",
- ["c.le.ps_3"] = "46c0003eVGH",
- ["c.ngt.ps_2"] = "46c0003fGH",
- ["c.ngt.ps_3"] = "46c0003fVGH",
-
["cvt.s.w_2"] = "46800020FG",
["cvt.d.w_2"] = "46800021FG",
-
["cvt.s.l_2"] = "46a00020FG",
["cvt.d.l_2"] = "46a00021FG",
-
- -- Opcode COP1X.
- lwxc1_2 = "4c000000FX",
- ldxc1_2 = "4c000001FX",
- luxc1_2 = "4c000005FX",
- swxc1_2 = "4c000008FX",
- sdxc1_2 = "4c000009FX",
- suxc1_2 = "4c00000dFX",
- prefx_2 = "4c00000fMX",
- ["alnv.ps_4"] = "4c00001eFGHS",
- ["madd.s_4"] = "4c000020FRGH",
- ["madd.d_4"] = "4c000021FRGH",
- ["madd.ps_4"] = "4c000026FRGH",
- ["msub.s_4"] = "4c000028FRGH",
- ["msub.d_4"] = "4c000029FRGH",
- ["msub.ps_4"] = "4c00002eFRGH",
- ["nmadd.s_4"] = "4c000030FRGH",
- ["nmadd.d_4"] = "4c000031FRGH",
- ["nmadd.ps_4"] = "4c000036FRGH",
- ["nmsub.s_4"] = "4c000038FRGH",
- ["nmsub.d_4"] = "4c000039FRGH",
- ["nmsub.ps_4"] = "4c00003eFRGH",
}
+if mipsr6 then -- Instructions added with MIPSR6.
+
+ for k,v in pairs({
+
+ -- Add immediate to upper bits.
+ aui_3 = "3c000000TSI",
+ daui_3 = mips64 and "74000000TSI",
+ dahi_2 = mips64 and "04060000SI",
+ dati_2 = mips64 and "041e0000SI",
+
+ -- TODO: addiupc, auipc, aluipc, lwpc, lwupc, ldpc.
+
+ -- Compact branches.
+ blezalc_2 = "18000000TB", -- rt != 0.
+ bgezalc_2 = "18000000T=SB", -- rt != 0.
+ bgtzalc_2 = "1c000000TB", -- rt != 0.
+ bltzalc_2 = "1c000000T=SB", -- rt != 0.
+
+ blezc_2 = "58000000TB", -- rt != 0.
+ bgezc_2 = "58000000T=SB", -- rt != 0.
+ bgec_3 = "58000000STB", -- rs != rt.
+ blec_3 = "58000000TSB", -- rt != rs.
+
+ bgtzc_2 = "5c000000TB", -- rt != 0.
+ bltzc_2 = "5c000000T=SB", -- rt != 0.
+ bltc_3 = "5c000000STB", -- rs != rt.
+ bgtc_3 = "5c000000TSB", -- rt != rs.
+
+ bgeuc_3 = "18000000STB", -- rs != rt.
+ bleuc_3 = "18000000TSB", -- rt != rs.
+ bltuc_3 = "1c000000STB", -- rs != rt.
+ bgtuc_3 = "1c000000TSB", -- rt != rs.
+
+ beqzalc_2 = "20000000TB", -- rt != 0.
+ bnezalc_2 = "60000000TB", -- rt != 0.
+ beqc_3 = "20000000STB", -- rs < rt.
+ bnec_3 = "60000000STB", -- rs < rt.
+ bovc_3 = "20000000STB", -- rs >= rt.
+ bnvc_3 = "60000000STB", -- rs >= rt.
+
+ beqzc_2 = "d8000000SK", -- rs != 0.
+ bnezc_2 = "f8000000SK", -- rs != 0.
+ jic_2 = "d8000000TI",
+ jialc_2 = "f8000000TI",
+ bc_1 = "c8000000L",
+ balc_1 = "e8000000L",
+
+ -- Opcode SPECIAL.
+ jr_1 = "00000009S",
+ sdbbp_0 = "0000000e",
+ sdbbp_1 = "0000000eY",
+ lsa_4 = "00000005DSTA",
+ dlsa_4 = mips64 and "00000015DSTA",
+ seleqz_3 = "00000035DST",
+ selnez_3 = "00000037DST",
+ clz_2 = "00000050DS",
+ clo_2 = "00000051DS",
+ dclz_2 = mips64 and "00000052DS",
+ dclo_2 = mips64 and "00000053DS",
+ mul_3 = "00000098DST",
+ muh_3 = "000000d8DST",
+ mulu_3 = "00000099DST",
+ muhu_3 = "000000d9DST",
+ div_3 = "0000009aDST",
+ mod_3 = "000000daDST",
+ divu_3 = "0000009bDST",
+ modu_3 = "000000dbDST",
+ dmul_3 = mips64 and "0000009cDST",
+ dmuh_3 = mips64 and "000000dcDST",
+ dmulu_3 = mips64 and "0000009dDST",
+ dmuhu_3 = mips64 and "000000ddDST",
+ ddiv_3 = mips64 and "0000009eDST",
+ dmod_3 = mips64 and "000000deDST",
+ ddivu_3 = mips64 and "0000009fDST",
+ dmodu_3 = mips64 and "000000dfDST",
+
+ -- Opcode SPECIAL3.
+ align_4 = "7c000220DSTA",
+ dalign_4 = mips64 and "7c000224DSTA",
+ bitswap_2 = "7c000020DT",
+ dbitswap_2 = mips64 and "7c000024DT",
+
+ -- Opcode COP1.
+ bc1eqz_2 = "45200000HB",
+ bc1nez_2 = "45a00000HB",
+
+ ["sel.s_3"] = "46000010FGH",
+ ["seleqz.s_3"] = "46000014FGH",
+ ["selnez.s_3"] = "46000017FGH",
+ ["maddf.s_3"] = "46000018FGH",
+ ["msubf.s_3"] = "46000019FGH",
+ ["rint.s_2"] = "4600001aFG",
+ ["class.s_2"] = "4600001bFG",
+ ["min.s_3"] = "4600001cFGH",
+ ["mina.s_3"] = "4600001dFGH",
+ ["max.s_3"] = "4600001eFGH",
+ ["maxa.s_3"] = "4600001fFGH",
+ ["cmp.af.s_3"] = "46800000FGH",
+ ["cmp.un.s_3"] = "46800001FGH",
+ ["cmp.or.s_3"] = "46800011FGH",
+ ["cmp.eq.s_3"] = "46800002FGH",
+ ["cmp.une.s_3"] = "46800012FGH",
+ ["cmp.ueq.s_3"] = "46800003FGH",
+ ["cmp.ne.s_3"] = "46800013FGH",
+ ["cmp.lt.s_3"] = "46800004FGH",
+ ["cmp.ult.s_3"] = "46800005FGH",
+ ["cmp.le.s_3"] = "46800006FGH",
+ ["cmp.ule.s_3"] = "46800007FGH",
+ ["cmp.saf.s_3"] = "46800008FGH",
+ ["cmp.sun.s_3"] = "46800009FGH",
+ ["cmp.sor.s_3"] = "46800019FGH",
+ ["cmp.seq.s_3"] = "4680000aFGH",
+ ["cmp.sune.s_3"] = "4680001aFGH",
+ ["cmp.sueq.s_3"] = "4680000bFGH",
+ ["cmp.sne.s_3"] = "4680001bFGH",
+ ["cmp.slt.s_3"] = "4680000cFGH",
+ ["cmp.sult.s_3"] = "4680000dFGH",
+ ["cmp.sle.s_3"] = "4680000eFGH",
+ ["cmp.sule.s_3"] = "4680000fFGH",
+
+ ["sel.d_3"] = "46200010FGH",
+ ["seleqz.d_3"] = "46200014FGH",
+ ["selnez.d_3"] = "46200017FGH",
+ ["maddf.d_3"] = "46200018FGH",
+ ["msubf.d_3"] = "46200019FGH",
+ ["rint.d_2"] = "4620001aFG",
+ ["class.d_2"] = "4620001bFG",
+ ["min.d_3"] = "4620001cFGH",
+ ["mina.d_3"] = "4620001dFGH",
+ ["max.d_3"] = "4620001eFGH",
+ ["maxa.d_3"] = "4620001fFGH",
+ ["cmp.af.d_3"] = "46a00000FGH",
+ ["cmp.un.d_3"] = "46a00001FGH",
+ ["cmp.or.d_3"] = "46a00011FGH",
+ ["cmp.eq.d_3"] = "46a00002FGH",
+ ["cmp.une.d_3"] = "46a00012FGH",
+ ["cmp.ueq.d_3"] = "46a00003FGH",
+ ["cmp.ne.d_3"] = "46a00013FGH",
+ ["cmp.lt.d_3"] = "46a00004FGH",
+ ["cmp.ult.d_3"] = "46a00005FGH",
+ ["cmp.le.d_3"] = "46a00006FGH",
+ ["cmp.ule.d_3"] = "46a00007FGH",
+ ["cmp.saf.d_3"] = "46a00008FGH",
+ ["cmp.sun.d_3"] = "46a00009FGH",
+ ["cmp.sor.d_3"] = "46a00019FGH",
+ ["cmp.seq.d_3"] = "46a0000aFGH",
+ ["cmp.sune.d_3"] = "46a0001aFGH",
+ ["cmp.sueq.d_3"] = "46a0000bFGH",
+ ["cmp.sne.d_3"] = "46a0001bFGH",
+ ["cmp.slt.d_3"] = "46a0000cFGH",
+ ["cmp.sult.d_3"] = "46a0000dFGH",
+ ["cmp.sle.d_3"] = "46a0000eFGH",
+ ["cmp.sule.d_3"] = "46a0000fFGH",
+
+ }) do map_op[k] = v end
+
+else -- Instructions removed by MIPSR6.
+
+ for k,v in pairs({
+ -- Traps, don't use.
+ addi_3 = "20000000TSI",
+ daddi_3 = mips64 and "60000000TSI",
+
+ -- Branch on likely, don't use.
+ beqzl_2 = "50000000SB",
+ beql_3 = "50000000STB",
+ bnezl_2 = "54000000SB",
+ bnel_3 = "54000000STB",
+ blezl_2 = "58000000SB",
+ bgtzl_2 = "5c000000SB",
+
+ lwl_2 = "88000000TO",
+ lwr_2 = "98000000TO",
+ swl_2 = "a8000000TO",
+ sdl_2 = mips64 and "b0000000TO",
+ sdr_2 = mips64 and "b1000000TO",
+ swr_2 = "b8000000TO",
+ cache_2 = "bc000000NO",
+ ll_2 = "c0000000TO",
+ pref_2 = "cc000000NO",
+ sc_2 = "e0000000TO",
+ scd_2 = mips64 and "f0000000TO",
+
+ -- Opcode SPECIAL.
+ movf_2 = "00000001DS",
+ movf_3 = "00000001DSC",
+ movt_2 = "00010001DS",
+ movt_3 = "00010001DSC",
+ jr_1 = "00000008S",
+ movz_3 = "0000000aDST",
+ movn_3 = "0000000bDST",
+ mfhi_1 = "00000010D",
+ mthi_1 = "00000011S",
+ mflo_1 = "00000012D",
+ mtlo_1 = "00000013S",
+ mult_2 = "00000018ST",
+ multu_2 = "00000019ST",
+ div_3 = "0000001aST",
+ divu_3 = "0000001bST",
+ ddiv_3 = mips64 and "0000001eST",
+ ddivu_3 = mips64 and "0000001fST",
+ dmult_2 = mips64 and "0000001cST",
+ dmultu_2 = mips64 and "0000001dST",
+
+ -- Opcode REGIMM.
+ tgei_2 = "04080000SI",
+ tgeiu_2 = "04090000SI",
+ tlti_2 = "040a0000SI",
+ tltiu_2 = "040b0000SI",
+ teqi_2 = "040c0000SI",
+ tnei_2 = "040e0000SI",
+ bltzal_2 = "04100000SB",
+ bgezal_2 = "04110000SB",
+ bltzall_2 = "04120000SB",
+ bgezall_2 = "04130000SB",
+
+ -- Opcode SPECIAL2.
+ madd_2 = "70000000ST",
+ maddu_2 = "70000001ST",
+ mul_3 = "70000002DST",
+ msub_2 = "70000004ST",
+ msubu_2 = "70000005ST",
+ clz_2 = "70000020D=TS",
+ clo_2 = "70000021D=TS",
+ dclz_2 = mips64 and "70000024D=TS",
+ dclo_2 = mips64 and "70000025D=TS",
+ sdbbp_0 = "7000003f",
+ sdbbp_1 = "7000003fY",
+
+ -- Opcode COP1.
+ bc1f_1 = "45000000B",
+ bc1f_2 = "45000000CB",
+ bc1t_1 = "45010000B",
+ bc1t_2 = "45010000CB",
+ bc1fl_1 = "45020000B",
+ bc1fl_2 = "45020000CB",
+ bc1tl_1 = "45030000B",
+ bc1tl_2 = "45030000CB",
+
+ ["movf.s_2"] = "46000011FG",
+ ["movf.s_3"] = "46000011FGC",
+ ["movt.s_2"] = "46010011FG",
+ ["movt.s_3"] = "46010011FGC",
+ ["movz.s_3"] = "46000012FGT",
+ ["movn.s_3"] = "46000013FGT",
+ ["cvt.ps.s_3"] = "46000026FGH",
+ ["c.f.s_2"] = "46000030GH",
+ ["c.f.s_3"] = "46000030VGH",
+ ["c.un.s_2"] = "46000031GH",
+ ["c.un.s_3"] = "46000031VGH",
+ ["c.eq.s_2"] = "46000032GH",
+ ["c.eq.s_3"] = "46000032VGH",
+ ["c.ueq.s_2"] = "46000033GH",
+ ["c.ueq.s_3"] = "46000033VGH",
+ ["c.olt.s_2"] = "46000034GH",
+ ["c.olt.s_3"] = "46000034VGH",
+ ["c.ult.s_2"] = "46000035GH",
+ ["c.ult.s_3"] = "46000035VGH",
+ ["c.ole.s_2"] = "46000036GH",
+ ["c.ole.s_3"] = "46000036VGH",
+ ["c.ule.s_2"] = "46000037GH",
+ ["c.ule.s_3"] = "46000037VGH",
+ ["c.sf.s_2"] = "46000038GH",
+ ["c.sf.s_3"] = "46000038VGH",
+ ["c.ngle.s_2"] = "46000039GH",
+ ["c.ngle.s_3"] = "46000039VGH",
+ ["c.seq.s_2"] = "4600003aGH",
+ ["c.seq.s_3"] = "4600003aVGH",
+ ["c.ngl.s_2"] = "4600003bGH",
+ ["c.ngl.s_3"] = "4600003bVGH",
+ ["c.lt.s_2"] = "4600003cGH",
+ ["c.lt.s_3"] = "4600003cVGH",
+ ["c.nge.s_2"] = "4600003dGH",
+ ["c.nge.s_3"] = "4600003dVGH",
+ ["c.le.s_2"] = "4600003eGH",
+ ["c.le.s_3"] = "4600003eVGH",
+ ["c.ngt.s_2"] = "4600003fGH",
+ ["c.ngt.s_3"] = "4600003fVGH",
+ ["movf.d_2"] = "46200011FG",
+ ["movf.d_3"] = "46200011FGC",
+ ["movt.d_2"] = "46210011FG",
+ ["movt.d_3"] = "46210011FGC",
+ ["movz.d_3"] = "46200012FGT",
+ ["movn.d_3"] = "46200013FGT",
+ ["c.f.d_2"] = "46200030GH",
+ ["c.f.d_3"] = "46200030VGH",
+ ["c.un.d_2"] = "46200031GH",
+ ["c.un.d_3"] = "46200031VGH",
+ ["c.eq.d_2"] = "46200032GH",
+ ["c.eq.d_3"] = "46200032VGH",
+ ["c.ueq.d_2"] = "46200033GH",
+ ["c.ueq.d_3"] = "46200033VGH",
+ ["c.olt.d_2"] = "46200034GH",
+ ["c.olt.d_3"] = "46200034VGH",
+ ["c.ult.d_2"] = "46200035GH",
+ ["c.ult.d_3"] = "46200035VGH",
+ ["c.ole.d_2"] = "46200036GH",
+ ["c.ole.d_3"] = "46200036VGH",
+ ["c.ule.d_2"] = "46200037GH",
+ ["c.ule.d_3"] = "46200037VGH",
+ ["c.sf.d_2"] = "46200038GH",
+ ["c.sf.d_3"] = "46200038VGH",
+ ["c.ngle.d_2"] = "46200039GH",
+ ["c.ngle.d_3"] = "46200039VGH",
+ ["c.seq.d_2"] = "4620003aGH",
+ ["c.seq.d_3"] = "4620003aVGH",
+ ["c.ngl.d_2"] = "4620003bGH",
+ ["c.ngl.d_3"] = "4620003bVGH",
+ ["c.lt.d_2"] = "4620003cGH",
+ ["c.lt.d_3"] = "4620003cVGH",
+ ["c.nge.d_2"] = "4620003dGH",
+ ["c.nge.d_3"] = "4620003dVGH",
+ ["c.le.d_2"] = "4620003eGH",
+ ["c.le.d_3"] = "4620003eVGH",
+ ["c.ngt.d_2"] = "4620003fGH",
+ ["c.ngt.d_3"] = "4620003fVGH",
+ ["add.ps_3"] = "46c00000FGH",
+ ["sub.ps_3"] = "46c00001FGH",
+ ["mul.ps_3"] = "46c00002FGH",
+ ["abs.ps_2"] = "46c00005FG",
+ ["mov.ps_2"] = "46c00006FG",
+ ["neg.ps_2"] = "46c00007FG",
+ ["movf.ps_2"] = "46c00011FG",
+ ["movf.ps_3"] = "46c00011FGC",
+ ["movt.ps_2"] = "46c10011FG",
+ ["movt.ps_3"] = "46c10011FGC",
+ ["movz.ps_3"] = "46c00012FGT",
+ ["movn.ps_3"] = "46c00013FGT",
+ ["cvt.s.pu_2"] = "46c00020FG",
+ ["cvt.s.pl_2"] = "46c00028FG",
+ ["pll.ps_3"] = "46c0002cFGH",
+ ["plu.ps_3"] = "46c0002dFGH",
+ ["pul.ps_3"] = "46c0002eFGH",
+ ["puu.ps_3"] = "46c0002fFGH",
+ ["c.f.ps_2"] = "46c00030GH",
+ ["c.f.ps_3"] = "46c00030VGH",
+ ["c.un.ps_2"] = "46c00031GH",
+ ["c.un.ps_3"] = "46c00031VGH",
+ ["c.eq.ps_2"] = "46c00032GH",
+ ["c.eq.ps_3"] = "46c00032VGH",
+ ["c.ueq.ps_2"] = "46c00033GH",
+ ["c.ueq.ps_3"] = "46c00033VGH",
+ ["c.olt.ps_2"] = "46c00034GH",
+ ["c.olt.ps_3"] = "46c00034VGH",
+ ["c.ult.ps_2"] = "46c00035GH",
+ ["c.ult.ps_3"] = "46c00035VGH",
+ ["c.ole.ps_2"] = "46c00036GH",
+ ["c.ole.ps_3"] = "46c00036VGH",
+ ["c.ule.ps_2"] = "46c00037GH",
+ ["c.ule.ps_3"] = "46c00037VGH",
+ ["c.sf.ps_2"] = "46c00038GH",
+ ["c.sf.ps_3"] = "46c00038VGH",
+ ["c.ngle.ps_2"] = "46c00039GH",
+ ["c.ngle.ps_3"] = "46c00039VGH",
+ ["c.seq.ps_2"] = "46c0003aGH",
+ ["c.seq.ps_3"] = "46c0003aVGH",
+ ["c.ngl.ps_2"] = "46c0003bGH",
+ ["c.ngl.ps_3"] = "46c0003bVGH",
+ ["c.lt.ps_2"] = "46c0003cGH",
+ ["c.lt.ps_3"] = "46c0003cVGH",
+ ["c.nge.ps_2"] = "46c0003dGH",
+ ["c.nge.ps_3"] = "46c0003dVGH",
+ ["c.le.ps_2"] = "46c0003eGH",
+ ["c.le.ps_3"] = "46c0003eVGH",
+ ["c.ngt.ps_2"] = "46c0003fGH",
+ ["c.ngt.ps_3"] = "46c0003fVGH",
+
+ -- Opcode COP1X.
+ lwxc1_2 = "4c000000FX",
+ ldxc1_2 = "4c000001FX",
+ luxc1_2 = "4c000005FX",
+ swxc1_2 = "4c000008FX",
+ sdxc1_2 = "4c000009FX",
+ suxc1_2 = "4c00000dFX",
+ prefx_2 = "4c00000fMX",
+ ["alnv.ps_4"] = "4c00001eFGHS",
+ ["madd.s_4"] = "4c000020FRGH",
+ ["madd.d_4"] = "4c000021FRGH",
+ ["madd.ps_4"] = "4c000026FRGH",
+ ["msub.s_4"] = "4c000028FRGH",
+ ["msub.d_4"] = "4c000029FRGH",
+ ["msub.ps_4"] = "4c00002eFRGH",
+ ["nmadd.s_4"] = "4c000030FRGH",
+ ["nmadd.d_4"] = "4c000031FRGH",
+ ["nmadd.ps_4"] = "4c000036FRGH",
+ ["nmsub.s_4"] = "4c000038FRGH",
+ ["nmsub.d_4"] = "4c000039FRGH",
+ ["nmsub.ps_4"] = "4c00003eFRGH",
+
+ }) do map_op[k] = v end
+
+end
+
------------------------------------------------------------------------------
local function parse_gpr(expr)
@@ -633,7 +855,7 @@ local function parse_fpr(expr)
werror("bad register name `"..expr.."'")
end
-local function parse_imm(imm, bits, shift, scale, signed)
+local function parse_imm(imm, bits, shift, scale, signed, action)
local n = tonumber(imm)
if n then
local m = sar(n, scale)
@@ -651,7 +873,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then
werror("expected immediate operand, got register")
else
- waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
+ waction(action or "IMM",
+ (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm)
return 0
end
end
@@ -756,13 +979,18 @@ map_op[".template__"] = function(params, template, nparams)
op = op + parse_disp(params[n]); n = n + 1
elseif p == "X" then
op = op + parse_index(params[n]); n = n + 1
- elseif p == "B" or p == "J" then
+ elseif p == "B" or p == "J" or p == "K" or p == "L" then
local mode, m, s = parse_label(params[n], false)
- if p == "B" then m = m + 2048 end
+ if p == "J" then m = m + 0xa800
+ elseif p == "K" then m = m + 0x5000
+ elseif p == "L" then m = m + 0xa000 end
waction("REL_"..mode, m, s, 1)
n = n + 1
elseif p == "A" then
op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1
+ elseif p == "a" then
+ local m = parse_imm(params[n], 6, 6, 0, false, "IMMS"); n = n + 1
+ op = op + band(m, 0x7c0) + band(shr(m, 9), 4)
elseif p == "M" then
op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1
elseif p == "N" then
@@ -778,7 +1006,7 @@ map_op[".template__"] = function(params, template, nparams)
elseif p == "Z" then
op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1
elseif p == "=" then
- op = op + shl(band(op, 0xf800), 5) -- Copy D to T for clz, clo.
+ n = n - 1 -- Re-use previous parameter for next template char.
else
assert(false)
end
diff --git a/dynasm/dasm_mips64.lua b/dynasm/dasm_mips64.lua
new file mode 100644
index 00000000..b4f8707d
--- /dev/null
+++ b/dynasm/dasm_mips64.lua
@@ -0,0 +1,12 @@
+------------------------------------------------------------------------------
+-- DynASM MIPS64 module.
+--
+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+-- See dynasm.lua for full copyright notice.
+------------------------------------------------------------------------------
+-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module.
+-- All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+mips64 = true -- Using a global is an ugly, but effective solution.
+return require("dasm_mips")
diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h
index 4ea6729f..fdb89bce 100644
--- a/dynasm/dasm_ppc.h
+++ b/dynasm/dasm_ppc.h
@@ -1,5 +1,5 @@
/*
-** DynASM PPC encoding engine.
+** DynASM PPC/PPC64 encoding engine.
** Copyright (C) 2005-2022 Mike Pall. All rights reserved.
** Released under the MIT license. See dynasm.lua for full copyright notice.
*/
@@ -21,7 +21,7 @@ enum {
/* The following actions need a buffer position. */
DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG,
/* The following actions also have an argument. */
- DASM_REL_PC, DASM_LABEL_PC, DASM_IMM,
+ DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH,
DASM__MAX
};
@@ -244,6 +244,10 @@ void dasm_put(Dst_DECL, int start, ...)
#endif
b[pos++] = n;
break;
+ case DASM_IMMSH:
+ CK((n >> 6) == 0, RANGE_I);
+ b[pos++] = n;
+ break;
}
}
}
@@ -273,7 +277,7 @@ int dasm_link(Dst_DECL, size_t *szp)
{ /* Handle globals not defined in this translation unit. */
int idx;
- for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) {
+ for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) {
int n = D->lglabels[idx];
/* Undefined label: Collapse rel chain and replace with marker (< 0). */
while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; }
@@ -299,7 +303,7 @@ int dasm_link(Dst_DECL, size_t *szp)
case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break;
case DASM_REL_LG: case DASM_REL_PC: pos++; break;
case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break;
- case DASM_IMM: pos++; break;
+ case DASM_IMM: case DASM_IMMSH: pos++; break;
}
}
stop: (void)0;
@@ -349,7 +353,10 @@ int dasm_encode(Dst_DECL, void *buffer)
ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000;
break;
case DASM_REL_LG:
- CK(n >= 0, UNDEF_LG);
+ if (n < 0) {
+ n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp);
+ goto patchrel;
+ }
/* fallthrough */
case DASM_REL_PC:
CK(n >= 0, UNDEF_PC);
@@ -367,6 +374,9 @@ int dasm_encode(Dst_DECL, void *buffer)
case DASM_IMM:
cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31);
break;
+ case DASM_IMMSH:
+ cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32);
+ break;
default: *cp++ = ins; break;
}
}
diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua
index c05d6573..3624e882 100644
--- a/dynasm/dasm_ppc.lua
+++ b/dynasm/dasm_ppc.lua
@@ -1,17 +1,19 @@
------------------------------------------------------------------------------
--- DynASM PPC module.
+-- DynASM PPC/PPC64 module.
--
-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
-- See dynasm.lua for full copyright notice.
+--
+-- Support for various extensions contributed by Caio Souza Oliveira.
------------------------------------------------------------------------------
-- Module information:
local _info = {
arch = "ppc",
description = "DynASM PPC module",
- version = "1.3.0",
- vernum = 10300,
- release = "2011-05-05",
+ version = "1.5.0",
+ vernum = 10500,
+ release = "2021-05-02",
author = "Mike Pall",
license = "MIT",
}
@@ -39,7 +41,7 @@ local wline, werror, wfatal, wwarn
local action_names = {
"STOP", "SECTION", "ESC", "REL_EXT",
"ALIGN", "REL_LG", "LABEL_LG",
- "REL_PC", "LABEL_PC", "IMM",
+ "REL_PC", "LABEL_PC", "IMM", "IMMSH"
}
-- Maximum number of section buffer positions for dasm_put().
@@ -228,8 +230,18 @@ local map_cond = {
------------------------------------------------------------------------------
+local map_op, op_template
+
+local function op_alias(opname, f)
+ return function(params, nparams)
+ if not params then return "-> "..opname:sub(1, -3) end
+ f(params, nparams)
+ op_template(params, map_op[opname], nparams)
+ end
+end
+
-- Template strings for PPC instructions.
-local map_op = {
+map_op = {
tdi_3 = "08000000ARI",
twi_3 = "0c000000ARI",
mulli_3 = "1c000000RRI",
@@ -297,6 +309,250 @@ local map_op = {
std_2 = "f8000000RD",
stdu_2 = "f8000001RD",
+ subi_3 = op_alias("addi_3", function(p) p[3] = "-("..p[3]..")" end),
+ subis_3 = op_alias("addis_3", function(p) p[3] = "-("..p[3]..")" end),
+ subic_3 = op_alias("addic_3", function(p) p[3] = "-("..p[3]..")" end),
+ ["subic._3"] = op_alias("addic._3", function(p) p[3] = "-("..p[3]..")" end),
+
+ rotlwi_3 = op_alias("rlwinm_5", function(p)
+ p[4] = "0"; p[5] = "31"
+ end),
+ rotrwi_3 = op_alias("rlwinm_5", function(p)
+ p[3] = "32-("..p[3]..")"; p[4] = "0"; p[5] = "31"
+ end),
+ rotlw_3 = op_alias("rlwnm_5", function(p)
+ p[4] = "0"; p[5] = "31"
+ end),
+ slwi_3 = op_alias("rlwinm_5", function(p)
+ p[5] = "31-("..p[3]..")"; p[4] = "0"
+ end),
+ srwi_3 = op_alias("rlwinm_5", function(p)
+ p[4] = p[3]; p[3] = "32-("..p[3]..")"; p[5] = "31"
+ end),
+ clrlwi_3 = op_alias("rlwinm_5", function(p)
+ p[4] = p[3]; p[3] = "0"; p[5] = "31"
+ end),
+ clrrwi_3 = op_alias("rlwinm_5", function(p)
+ p[5] = "31-("..p[3]..")"; p[3] = "0"; p[4] = "0"
+ end),
+
+ -- Primary opcode 4:
+ mulhhwu_3 = "10000010RRR.",
+ machhwu_3 = "10000018RRR.",
+ mulhhw_3 = "10000050RRR.",
+ nmachhw_3 = "1000005cRRR.",
+ machhwsu_3 = "10000098RRR.",
+ machhws_3 = "100000d8RRR.",
+ nmachhws_3 = "100000dcRRR.",
+ mulchwu_3 = "10000110RRR.",
+ macchwu_3 = "10000118RRR.",
+ mulchw_3 = "10000150RRR.",
+ macchw_3 = "10000158RRR.",
+ nmacchw_3 = "1000015cRRR.",
+ macchwsu_3 = "10000198RRR.",
+ macchws_3 = "100001d8RRR.",
+ nmacchws_3 = "100001dcRRR.",
+ mullhw_3 = "10000350RRR.",
+ maclhw_3 = "10000358RRR.",
+ nmaclhw_3 = "1000035cRRR.",
+ maclhwsu_3 = "10000398RRR.",
+ maclhws_3 = "100003d8RRR.",
+ nmaclhws_3 = "100003dcRRR.",
+ machhwuo_3 = "10000418RRR.",
+ nmachhwo_3 = "1000045cRRR.",
+ machhwsuo_3 = "10000498RRR.",
+ machhwso_3 = "100004d8RRR.",
+ nmachhwso_3 = "100004dcRRR.",
+ macchwuo_3 = "10000518RRR.",
+ macchwo_3 = "10000558RRR.",
+ nmacchwo_3 = "1000055cRRR.",
+ macchwsuo_3 = "10000598RRR.",
+ macchwso_3 = "100005d8RRR.",
+ nmacchwso_3 = "100005dcRRR.",
+ maclhwo_3 = "10000758RRR.",
+ nmaclhwo_3 = "1000075cRRR.",
+ maclhwsuo_3 = "10000798RRR.",
+ maclhwso_3 = "100007d8RRR.",
+ nmaclhwso_3 = "100007dcRRR.",
+
+ vaddubm_3 = "10000000VVV",
+ vmaxub_3 = "10000002VVV",
+ vrlb_3 = "10000004VVV",
+ vcmpequb_3 = "10000006VVV",
+ vmuloub_3 = "10000008VVV",
+ vaddfp_3 = "1000000aVVV",
+ vmrghb_3 = "1000000cVVV",
+ vpkuhum_3 = "1000000eVVV",
+ vmhaddshs_4 = "10000020VVVV",
+ vmhraddshs_4 = "10000021VVVV",
+ vmladduhm_4 = "10000022VVVV",
+ vmsumubm_4 = "10000024VVVV",
+ vmsummbm_4 = "10000025VVVV",
+ vmsumuhm_4 = "10000026VVVV",
+ vmsumuhs_4 = "10000027VVVV",
+ vmsumshm_4 = "10000028VVVV",
+ vmsumshs_4 = "10000029VVVV",
+ vsel_4 = "1000002aVVVV",
+ vperm_4 = "1000002bVVVV",
+ vsldoi_4 = "1000002cVVVP",
+ vpermxor_4 = "1000002dVVVV",
+ vmaddfp_4 = "1000002eVVVV~",
+ vnmsubfp_4 = "1000002fVVVV~",
+ vaddeuqm_4 = "1000003cVVVV",
+ vaddecuq_4 = "1000003dVVVV",
+ vsubeuqm_4 = "1000003eVVVV",
+ vsubecuq_4 = "1000003fVVVV",
+ vadduhm_3 = "10000040VVV",
+ vmaxuh_3 = "10000042VVV",
+ vrlh_3 = "10000044VVV",
+ vcmpequh_3 = "10000046VVV",
+ vmulouh_3 = "10000048VVV",
+ vsubfp_3 = "1000004aVVV",
+ vmrghh_3 = "1000004cVVV",
+ vpkuwum_3 = "1000004eVVV",
+ vadduwm_3 = "10000080VVV",
+ vmaxuw_3 = "10000082VVV",
+ vrlw_3 = "10000084VVV",
+ vcmpequw_3 = "10000086VVV",
+ vmulouw_3 = "10000088VVV",
+ vmuluwm_3 = "10000089VVV",
+ vmrghw_3 = "1000008cVVV",
+ vpkuhus_3 = "1000008eVVV",
+ vaddudm_3 = "100000c0VVV",
+ vmaxud_3 = "100000c2VVV",
+ vrld_3 = "100000c4VVV",
+ vcmpeqfp_3 = "100000c6VVV",
+ vcmpequd_3 = "100000c7VVV",
+ vpkuwus_3 = "100000ceVVV",
+ vadduqm_3 = "10000100VVV",
+ vmaxsb_3 = "10000102VVV",
+ vslb_3 = "10000104VVV",
+ vmulosb_3 = "10000108VVV",
+ vrefp_2 = "1000010aV-V",
+ vmrglb_3 = "1000010cVVV",
+ vpkshus_3 = "1000010eVVV",
+ vaddcuq_3 = "10000140VVV",
+ vmaxsh_3 = "10000142VVV",
+ vslh_3 = "10000144VVV",
+ vmulosh_3 = "10000148VVV",
+ vrsqrtefp_2 = "1000014aV-V",
+ vmrglh_3 = "1000014cVVV",
+ vpkswus_3 = "1000014eVVV",
+ vaddcuw_3 = "10000180VVV",
+ vmaxsw_3 = "10000182VVV",
+ vslw_3 = "10000184VVV",
+ vmulosw_3 = "10000188VVV",
+ vexptefp_2 = "1000018aV-V",
+ vmrglw_3 = "1000018cVVV",
+ vpkshss_3 = "1000018eVVV",
+ vmaxsd_3 = "100001c2VVV",
+ vsl_3 = "100001c4VVV",
+ vcmpgefp_3 = "100001c6VVV",
+ vlogefp_2 = "100001caV-V",
+ vpkswss_3 = "100001ceVVV",
+ vadduhs_3 = "10000240VVV",
+ vminuh_3 = "10000242VVV",
+ vsrh_3 = "10000244VVV",
+ vcmpgtuh_3 = "10000246VVV",
+ vmuleuh_3 = "10000248VVV",
+ vrfiz_2 = "1000024aV-V",
+ vsplth_3 = "1000024cVV3",
+ vupkhsh_2 = "1000024eV-V",
+ vminuw_3 = "10000282VVV",
+ vminud_3 = "100002c2VVV",
+ vcmpgtud_3 = "100002c7VVV",
+ vrfim_2 = "100002caV-V",
+ vcmpgtsb_3 = "10000306VVV",
+ vcfux_3 = "1000030aVVA~",
+ vaddshs_3 = "10000340VVV",
+ vminsh_3 = "10000342VVV",
+ vsrah_3 = "10000344VVV",
+ vcmpgtsh_3 = "10000346VVV",
+ vmulesh_3 = "10000348VVV",
+ vcfsx_3 = "1000034aVVA~",
+ vspltish_2 = "1000034cVS",
+ vupkhpx_2 = "1000034eV-V",
+ vaddsws_3 = "10000380VVV",
+ vminsw_3 = "10000382VVV",
+ vsraw_3 = "10000384VVV",
+ vcmpgtsw_3 = "10000386VVV",
+ vmulesw_3 = "10000388VVV",
+ vctuxs_3 = "1000038aVVA~",
+ vspltisw_2 = "1000038cVS",
+ vminsd_3 = "100003c2VVV",
+ vsrad_3 = "100003c4VVV",
+ vcmpbfp_3 = "100003c6VVV",
+ vcmpgtsd_3 = "100003c7VVV",
+ vctsxs_3 = "100003caVVA~",
+ vupklpx_2 = "100003ceV-V",
+ vsububm_3 = "10000400VVV",
+ ["bcdadd._4"] = "10000401VVVy.",
+ vavgub_3 = "10000402VVV",
+ vand_3 = "10000404VVV",
+ ["vcmpequb._3"] = "10000406VVV",
+ vmaxfp_3 = "1000040aVVV",
+ vsubuhm_3 = "10000440VVV",
+ ["bcdsub._4"] = "10000441VVVy.",
+ vavguh_3 = "10000442VVV",
+ vandc_3 = "10000444VVV",
+ ["vcmpequh._3"] = "10000446VVV",
+ vminfp_3 = "1000044aVVV",
+ vpkudum_3 = "1000044eVVV",
+ vsubuwm_3 = "10000480VVV",
+ vavguw_3 = "10000482VVV",
+ vor_3 = "10000484VVV",
+ ["vcmpequw._3"] = "10000486VVV",
+ vpmsumw_3 = "10000488VVV",
+ ["vcmpeqfp._3"] = "100004c6VVV",
+ ["vcmpequd._3"] = "100004c7VVV",
+ vpkudus_3 = "100004ceVVV",
+ vavgsb_3 = "10000502VVV",
+ vavgsh_3 = "10000542VVV",
+ vorc_3 = "10000544VVV",
+ vbpermq_3 = "1000054cVVV",
+ vpksdus_3 = "1000054eVVV",
+ vavgsw_3 = "10000582VVV",
+ vsld_3 = "100005c4VVV",
+ ["vcmpgefp._3"] = "100005c6VVV",
+ vpksdss_3 = "100005ceVVV",
+ vsububs_3 = "10000600VVV",
+ mfvscr_1 = "10000604V--",
+ vsum4ubs_3 = "10000608VVV",
+ vsubuhs_3 = "10000640VVV",
+ mtvscr_1 = "10000644--V",
+ ["vcmpgtuh._3"] = "10000646VVV",
+ vsum4shs_3 = "10000648VVV",
+ vupkhsw_2 = "1000064eV-V",
+ vsubuws_3 = "10000680VVV",
+ vshasigmaw_4 = "10000682VVYp",
+ veqv_3 = "10000684VVV",
+ vsum2sws_3 = "10000688VVV",
+ vmrgow_3 = "1000068cVVV",
+ vshasigmad_4 = "100006c2VVYp",
+ vsrd_3 = "100006c4VVV",
+ ["vcmpgtud._3"] = "100006c7VVV",
+ vupklsw_2 = "100006ceV-V",
+ vupkslw_2 = "100006ceV-V",
+ vsubsbs_3 = "10000700VVV",
+ vclzb_2 = "10000702V-V",
+ vpopcntb_2 = "10000703V-V",
+ ["vcmpgtsb._3"] = "10000706VVV",
+ vsum4sbs_3 = "10000708VVV",
+ vsubshs_3 = "10000740VVV",
+ vclzh_2 = "10000742V-V",
+ vpopcnth_2 = "10000743V-V",
+ ["vcmpgtsh._3"] = "10000746VVV",
+ vsubsws_3 = "10000780VVV",
+ vclzw_2 = "10000782V-V",
+ vpopcntw_2 = "10000783V-V",
+ ["vcmpgtsw._3"] = "10000786VVV",
+ vsumsws_3 = "10000788VVV",
+ vmrgew_3 = "1000078cVVV",
+ vclzd_2 = "100007c2V-V",
+ vpopcntd_2 = "100007c3V-V",
+ ["vcmpbfp._3"] = "100007c6VVV",
+ ["vcmpgtsd._3"] = "100007c7VVV",
+
-- Primary opcode 19:
mcrf_2 = "4c000000XX",
isync_0 = "4c00012c",
@@ -316,6 +572,8 @@ local map_op = {
bclrl_2 = "4c000021AA",
bcctr_2 = "4c000420AA",
bcctrl_2 = "4c000421AA",
+ bctar_2 = "4c000460AA",
+ bctarl_2 = "4c000461AA",
blr_0 = "4e800020",
blrl_0 = "4e800021",
bctr_0 = "4e800420",
@@ -327,6 +585,7 @@ local map_op = {
cmpd_3 = "7c200000XRR",
cmpd_2 = "7c200000-RR",
tw_3 = "7c000008ARR",
+ lvsl_3 = "7c00000cVRR",
subfc_3 = "7c000010RRR.",
subc_3 = "7c000010RRR~.",
mulhdu_3 = "7c000012RRR.",
@@ -351,50 +610,68 @@ local map_op = {
cmplw_2 = "7c000040-RR",
cmpld_3 = "7c200040XRR",
cmpld_2 = "7c200040-RR",
+ lvsr_3 = "7c00004cVRR",
subf_3 = "7c000050RRR.",
sub_3 = "7c000050RRR~.",
+ lbarx_3 = "7c000068RR0R",
ldux_3 = "7c00006aRR0R",
dcbst_2 = "7c00006c-RR",
lwzux_3 = "7c00006eRR0R",
cntlzd_2 = "7c000074RR~",
andc_3 = "7c000078RR~R.",
td_3 = "7c000088ARR",
+ lvewx_3 = "7c00008eVRR",
mulhd_3 = "7c000092RRR.",
+ addg6s_3 = "7c000094RRR",
mulhw_3 = "7c000096RRR.",
+ dlmzb_3 = "7c00009cRR~R.",
ldarx_3 = "7c0000a8RR0R",
dcbf_2 = "7c0000ac-RR",
lbzx_3 = "7c0000aeRR0R",
+ lvx_3 = "7c0000ceVRR",
neg_2 = "7c0000d0RR.",
+ lharx_3 = "7c0000e8RR0R",
lbzux_3 = "7c0000eeRR0R",
popcntb_2 = "7c0000f4RR~",
not_2 = "7c0000f8RR~%.",
nor_3 = "7c0000f8RR~R.",
+ stvebx_3 = "7c00010eVRR",
subfe_3 = "7c000110RRR.",
sube_3 = "7c000110RRR~.",
adde_3 = "7c000114RRR.",
stdx_3 = "7c00012aRR0R",
- stwcx_3 = "7c00012cRR0R.",
+ ["stwcx._3"] = "7c00012dRR0R.",
stwx_3 = "7c00012eRR0R",
prtyw_2 = "7c000134RR~",
+ stvehx_3 = "7c00014eVRR",
stdux_3 = "7c00016aRR0R",
+ ["stqcx._3"] = "7c00016dR:R0R.",
stwux_3 = "7c00016eRR0R",
prtyd_2 = "7c000174RR~",
+ stvewx_3 = "7c00018eVRR",
subfze_2 = "7c000190RR.",
addze_2 = "7c000194RR.",
- stdcx_3 = "7c0001acRR0R.",
+ ["stdcx._3"] = "7c0001adRR0R.",
stbx_3 = "7c0001aeRR0R",
+ stvx_3 = "7c0001ceVRR",
subfme_2 = "7c0001d0RR.",
mulld_3 = "7c0001d2RRR.",
addme_2 = "7c0001d4RR.",
mullw_3 = "7c0001d6RRR.",
dcbtst_2 = "7c0001ec-RR",
stbux_3 = "7c0001eeRR0R",
+ bpermd_3 = "7c0001f8RR~R",
+ lvepxl_3 = "7c00020eVRR",
add_3 = "7c000214RRR.",
+ lqarx_3 = "7c000228R:R0R",
dcbt_2 = "7c00022c-RR",
lhzx_3 = "7c00022eRR0R",
+ cdtbcd_2 = "7c000234RR~",
eqv_3 = "7c000238RR~R.",
+ lvepx_3 = "7c00024eVRR",
eciwx_3 = "7c00026cRR0R",
lhzux_3 = "7c00026eRR0R",
+ cbcdtd_2 = "7c000274RR~",
xor_3 = "7c000278RR~R.",
mfspefscr_1 = "7c0082a6R",
mfxer_1 = "7c0102a6R",
@@ -404,8 +681,12 @@ local map_op = {
lhax_3 = "7c0002aeRR0R",
mftb_1 = "7c0c42e6R",
mftbu_1 = "7c0d42e6R",
+ lvxl_3 = "7c0002ceVRR",
lwaux_3 = "7c0002eaRR0R",
lhaux_3 = "7c0002eeRR0R",
+ popcntw_2 = "7c0002f4RR~",
+ divdeu_3 = "7c000312RRR.",
+ divweu_3 = "7c000316RRR.",
sthx_3 = "7c00032eRR0R",
orc_3 = "7c000338RR~R.",
ecowx_3 = "7c00036cRR0R",
@@ -420,10 +701,14 @@ local map_op = {
mtctr_1 = "7c0903a6R",
dcbi_2 = "7c0003ac-RR",
nand_3 = "7c0003b8RR~R.",
+ dsn_2 = "7c0003c6-RR",
+ stvxl_3 = "7c0003ceVRR",
divd_3 = "7c0003d2RRR.",
divw_3 = "7c0003d6RRR.",
+ popcntd_2 = "7c0003f4RR~",
cmpb_3 = "7c0003f8RR~R.",
mcrxr_1 = "7c000400X",
+ lbdx_3 = "7c000406RRR",
subfco_3 = "7c000410RRR.",
subco_3 = "7c000410RRR~.",
addco_3 = "7c000414RRR.",
@@ -433,16 +718,20 @@ local map_op = {
lfsx_3 = "7c00042eFR0R",
srw_3 = "7c000430RR~R.",
srd_3 = "7c000436RR~R.",
+ lhdx_3 = "7c000446RRR",
subfo_3 = "7c000450RRR.",
subo_3 = "7c000450RRR~.",
lfsux_3 = "7c00046eFR0R",
+ lwdx_3 = "7c000486RRR",
lswi_3 = "7c0004aaRR0A",
sync_0 = "7c0004ac",
lwsync_0 = "7c2004ac",
ptesync_0 = "7c4004ac",
lfdx_3 = "7c0004aeFR0R",
+ lddx_3 = "7c0004c6RRR",
nego_2 = "7c0004d0RR.",
lfdux_3 = "7c0004eeFR0R",
+ stbdx_3 = "7c000506RRR",
subfeo_3 = "7c000510RRR.",
subeo_3 = "7c000510RRR~.",
addeo_3 = "7c000514RRR.",
@@ -450,27 +739,42 @@ local map_op = {
stswx_3 = "7c00052aRR0R",
stwbrx_3 = "7c00052cRR0R",
stfsx_3 = "7c00052eFR0R",
+ sthdx_3 = "7c000546RRR",
+ ["stbcx._3"] = "7c00056dRRR",
stfsux_3 = "7c00056eFR0R",
+ stwdx_3 = "7c000586RRR",
subfzeo_2 = "7c000590RR.",
addzeo_2 = "7c000594RR.",
stswi_3 = "7c0005aaRR0A",
+ ["sthcx._3"] = "7c0005adRRR",
stfdx_3 = "7c0005aeFR0R",
+ stddx_3 = "7c0005c6RRR",
subfmeo_2 = "7c0005d0RR.",
mulldo_3 = "7c0005d2RRR.",
addmeo_2 = "7c0005d4RR.",
mullwo_3 = "7c0005d6RRR.",
dcba_2 = "7c0005ec-RR",
stfdux_3 = "7c0005eeFR0R",
+ stvepxl_3 = "7c00060eVRR",
addo_3 = "7c000614RRR.",
lhbrx_3 = "7c00062cRR0R",
+ lfdpx_3 = "7c00062eF:RR",
sraw_3 = "7c000630RR~R.",
srad_3 = "7c000634RR~R.",
+ lfddx_3 = "7c000646FRR",
+ stvepx_3 = "7c00064eVRR",
srawi_3 = "7c000670RR~A.",
sradi_3 = "7c000674RR~H.",
eieio_0 = "7c0006ac",
lfiwax_3 = "7c0006aeFR0R",
+ divdeuo_3 = "7c000712RRR.",
+ divweuo_3 = "7c000716RRR.",
sthbrx_3 = "7c00072cRR0R",
+ stfdpx_3 = "7c00072eF:RR",
extsh_2 = "7c000734RR~.",
+ stfddx_3 = "7c000746FRR",
+ divdeo_3 = "7c000752RRR.",
+ divweo_3 = "7c000756RRR.",
extsb_2 = "7c000774RR~.",
divduo_3 = "7c000792RRR.",
divwou_3 = "7c000796RRR.",
@@ -481,6 +785,40 @@ local map_op = {
divwo_3 = "7c0007d6RRR.",
dcbz_2 = "7c0007ec-RR",
+ ["tbegin._1"] = "7c00051d1",
+ ["tbegin._0"] = "7c00051d",
+ ["tend._1"] = "7c00055dY",
+ ["tend._0"] = "7c00055d",
+ ["tendall._0"] = "7e00055d",
+ tcheck_1 = "7c00059cX",
+ ["tsr._1"] = "7c0005dd1",
+ ["tsuspend._0"] = "7c0005dd",
+ ["tresume._0"] = "7c2005dd",
+ ["tabortwc._3"] = "7c00061dARR",
+ ["tabortdc._3"] = "7c00065dARR",
+ ["tabortwci._3"] = "7c00069dARS",
+ ["tabortdci._3"] = "7c0006ddARS",
+ ["tabort._1"] = "7c00071d-R-",
+ ["treclaim._1"] = "7c00075d-R",
+ ["trechkpt._0"] = "7c0007dd",
+
+ lxsiwzx_3 = "7c000018QRR",
+ lxsiwax_3 = "7c000098QRR",
+ mfvsrd_2 = "7c000066-Rq",
+ mfvsrwz_2 = "7c0000e6-Rq",
+ stxsiwx_3 = "7c000118QRR",
+ mtvsrd_2 = "7c000166QR",
+ mtvsrwa_2 = "7c0001a6QR",
+ lxvdsx_3 = "7c000298QRR",
+ lxsspx_3 = "7c000418QRR",
+ lxsdx_3 = "7c000498QRR",
+ stxsspx_3 = "7c000518QRR",
+ stxsdx_3 = "7c000598QRR",
+ lxvw4x_3 = "7c000618QRR",
+ lxvd2x_3 = "7c000698QRR",
+ stxvw4x_3 = "7c000718QRR",
+ stxvd2x_3 = "7c000798QRR",
+
-- Primary opcode 30:
rldicl_4 = "78000000RR~HM.",
rldicr_4 = "78000004RR~HM.",
@@ -489,6 +827,34 @@ local map_op = {
rldcl_4 = "78000010RR~RM.",
rldcr_4 = "78000012RR~RM.",
+ rotldi_3 = op_alias("rldicl_4", function(p)
+ p[4] = "0"
+ end),
+ rotrdi_3 = op_alias("rldicl_4", function(p)
+ p[3] = "64-("..p[3]..")"; p[4] = "0"
+ end),
+ rotld_3 = op_alias("rldcl_4", function(p)
+ p[4] = "0"
+ end),
+ sldi_3 = op_alias("rldicr_4", function(p)
+ p[4] = "63-("..p[3]..")"
+ end),
+ srdi_3 = op_alias("rldicl_4", function(p)
+ p[4] = p[3]; p[3] = "64-("..p[3]..")"
+ end),
+ clrldi_3 = op_alias("rldicl_4", function(p)
+ p[4] = p[3]; p[3] = "0"
+ end),
+ clrrdi_3 = op_alias("rldicr_4", function(p)
+ p[4] = "63-("..p[3]..")"; p[3] = "0"
+ end),
+
+ -- Primary opcode 56:
+ lq_2 = "e0000000R:D", -- NYI: displacement must be divisible by 8.
+
+ -- Primary opcode 57:
+ lfdp_2 = "e4000000F:D", -- NYI: displacement must be divisible by 4.
+
-- Primary opcode 59:
fdivs_3 = "ec000024FFF.",
fsubs_3 = "ec000028FFF.",
@@ -501,6 +867,200 @@ local map_op = {
fmadds_4 = "ec00003aFFFF~.",
fnmsubs_4 = "ec00003cFFFF~.",
fnmadds_4 = "ec00003eFFFF~.",
+ fcfids_2 = "ec00069cF-F.",
+ fcfidus_2 = "ec00079cF-F.",
+
+ dadd_3 = "ec000004FFF.",
+ dqua_4 = "ec000006FFFZ.",
+ dmul_3 = "ec000044FFF.",
+ drrnd_4 = "ec000046FFFZ.",
+ dscli_3 = "ec000084FF6.",
+ dquai_4 = "ec000086SF~FZ.",
+ dscri_3 = "ec0000c4FF6.",
+ drintx_4 = "ec0000c61F~FZ.",
+ dcmpo_3 = "ec000104XFF",
+ dtstex_3 = "ec000144XFF",
+ dtstdc_3 = "ec000184XF6",
+ dtstdg_3 = "ec0001c4XF6",
+ drintn_4 = "ec0001c61F~FZ.",
+ dctdp_2 = "ec000204F-F.",
+ dctfix_2 = "ec000244F-F.",
+ ddedpd_3 = "ec000284ZF~F.",
+ dxex_2 = "ec0002c4F-F.",
+ dsub_3 = "ec000404FFF.",
+ ddiv_3 = "ec000444FFF.",
+ dcmpu_3 = "ec000504XFF",
+ dtstsf_3 = "ec000544XFF",
+ drsp_2 = "ec000604F-F.",
+ dcffix_2 = "ec000644F-F.",
+ denbcd_3 = "ec000684YF~F.",
+ diex_3 = "ec0006c4FFF.",
+
+ -- Primary opcode 60:
+ xsaddsp_3 = "f0000000QQQ",
+ xsmaddasp_3 = "f0000008QQQ",
+ xxsldwi_4 = "f0000010QQQz",
+ xsrsqrtesp_2 = "f0000028Q-Q",
+ xssqrtsp_2 = "f000002cQ-Q",
+ xxsel_4 = "f0000030QQQQ",
+ xssubsp_3 = "f0000040QQQ",
+ xsmaddmsp_3 = "f0000048QQQ",
+ xxpermdi_4 = "f0000050QQQz",
+ xsresp_2 = "f0000068Q-Q",
+ xsmulsp_3 = "f0000080QQQ",
+ xsmsubasp_3 = "f0000088QQQ",
+ xxmrghw_3 = "f0000090QQQ",
+ xsdivsp_3 = "f00000c0QQQ",
+ xsmsubmsp_3 = "f00000c8QQQ",
+ xsadddp_3 = "f0000100QQQ",
+ xsmaddadp_3 = "f0000108QQQ",
+ xscmpudp_3 = "f0000118XQQ",
+ xscvdpuxws_2 = "f0000120Q-Q",
+ xsrdpi_2 = "f0000124Q-Q",
+ xsrsqrtedp_2 = "f0000128Q-Q",
+ xssqrtdp_2 = "f000012cQ-Q",
+ xssubdp_3 = "f0000140QQQ",
+ xsmaddmdp_3 = "f0000148QQQ",
+ xscmpodp_3 = "f0000158XQQ",
+ xscvdpsxws_2 = "f0000160Q-Q",
+ xsrdpiz_2 = "f0000164Q-Q",
+ xsredp_2 = "f0000168Q-Q",
+ xsmuldp_3 = "f0000180QQQ",
+ xsmsubadp_3 = "f0000188QQQ",
+ xxmrglw_3 = "f0000190QQQ",
+ xsrdpip_2 = "f00001a4Q-Q",
+ xstsqrtdp_2 = "f00001a8X-Q",
+ xsrdpic_2 = "f00001acQ-Q",
+ xsdivdp_3 = "f00001c0QQQ",
+ xsmsubmdp_3 = "f00001c8QQQ",
+ xsrdpim_2 = "f00001e4Q-Q",
+ xstdivdp_3 = "f00001e8XQQ",
+ xvaddsp_3 = "f0000200QQQ",
+ xvmaddasp_3 = "f0000208QQQ",
+ xvcmpeqsp_3 = "f0000218QQQ",
+ xvcvspuxws_2 = "f0000220Q-Q",
+ xvrspi_2 = "f0000224Q-Q",
+ xvrsqrtesp_2 = "f0000228Q-Q",
+ xvsqrtsp_2 = "f000022cQ-Q",
+ xvsubsp_3 = "f0000240QQQ",
+ xvmaddmsp_3 = "f0000248QQQ",
+ xvcmpgtsp_3 = "f0000258QQQ",
+ xvcvspsxws_2 = "f0000260Q-Q",
+ xvrspiz_2 = "f0000264Q-Q",
+ xvresp_2 = "f0000268Q-Q",
+ xvmulsp_3 = "f0000280QQQ",
+ xvmsubasp_3 = "f0000288QQQ",
+ xxspltw_3 = "f0000290QQg~",
+ xvcmpgesp_3 = "f0000298QQQ",
+ xvcvuxwsp_2 = "f00002a0Q-Q",
+ xvrspip_2 = "f00002a4Q-Q",
+ xvtsqrtsp_2 = "f00002a8X-Q",
+ xvrspic_2 = "f00002acQ-Q",
+ xvdivsp_3 = "f00002c0QQQ",
+ xvmsubmsp_3 = "f00002c8QQQ",
+ xvcvsxwsp_2 = "f00002e0Q-Q",
+ xvrspim_2 = "f00002e4Q-Q",
+ xvtdivsp_3 = "f00002e8XQQ",
+ xvadddp_3 = "f0000300QQQ",
+ xvmaddadp_3 = "f0000308QQQ",
+ xvcmpeqdp_3 = "f0000318QQQ",
+ xvcvdpuxws_2 = "f0000320Q-Q",
+ xvrdpi_2 = "f0000324Q-Q",
+ xvrsqrtedp_2 = "f0000328Q-Q",
+ xvsqrtdp_2 = "f000032cQ-Q",
+ xvsubdp_3 = "f0000340QQQ",
+ xvmaddmdp_3 = "f0000348QQQ",
+ xvcmpgtdp_3 = "f0000358QQQ",
+ xvcvdpsxws_2 = "f0000360Q-Q",
+ xvrdpiz_2 = "f0000364Q-Q",
+ xvredp_2 = "f0000368Q-Q",
+ xvmuldp_3 = "f0000380QQQ",
+ xvmsubadp_3 = "f0000388QQQ",
+ xvcmpgedp_3 = "f0000398QQQ",
+ xvcvuxwdp_2 = "f00003a0Q-Q",
+ xvrdpip_2 = "f00003a4Q-Q",
+ xvtsqrtdp_2 = "f00003a8X-Q",
+ xvrdpic_2 = "f00003acQ-Q",
+ xvdivdp_3 = "f00003c0QQQ",
+ xvmsubmdp_3 = "f00003c8QQQ",
+ xvcvsxwdp_2 = "f00003e0Q-Q",
+ xvrdpim_2 = "f00003e4Q-Q",
+ xvtdivdp_3 = "f00003e8XQQ",
+ xsnmaddasp_3 = "f0000408QQQ",
+ xxland_3 = "f0000410QQQ",
+ xscvdpsp_2 = "f0000424Q-Q",
+ xscvdpspn_2 = "f000042cQ-Q",
+ xsnmaddmsp_3 = "f0000448QQQ",
+ xxlandc_3 = "f0000450QQQ",
+ xsrsp_2 = "f0000464Q-Q",
+ xsnmsubasp_3 = "f0000488QQQ",
+ xxlor_3 = "f0000490QQQ",
+ xscvuxdsp_2 = "f00004a0Q-Q",
+ xsnmsubmsp_3 = "f00004c8QQQ",
+ xxlxor_3 = "f00004d0QQQ",
+ xscvsxdsp_2 = "f00004e0Q-Q",
+ xsmaxdp_3 = "f0000500QQQ",
+ xsnmaddadp_3 = "f0000508QQQ",
+ xxlnor_3 = "f0000510QQQ",
+ xscvdpuxds_2 = "f0000520Q-Q",
+ xscvspdp_2 = "f0000524Q-Q",
+ xscvspdpn_2 = "f000052cQ-Q",
+ xsmindp_3 = "f0000540QQQ",
+ xsnmaddmdp_3 = "f0000548QQQ",
+ xxlorc_3 = "f0000550QQQ",
+ xscvdpsxds_2 = "f0000560Q-Q",
+ xsabsdp_2 = "f0000564Q-Q",
+ xscpsgndp_3 = "f0000580QQQ",
+ xsnmsubadp_3 = "f0000588QQQ",
+ xxlnand_3 = "f0000590QQQ",
+ xscvuxddp_2 = "f00005a0Q-Q",
+ xsnabsdp_2 = "f00005a4Q-Q",
+ xsnmsubmdp_3 = "f00005c8QQQ",
+ xxleqv_3 = "f00005d0QQQ",
+ xscvsxddp_2 = "f00005e0Q-Q",
+ xsnegdp_2 = "f00005e4Q-Q",
+ xvmaxsp_3 = "f0000600QQQ",
+ xvnmaddasp_3 = "f0000608QQQ",
+ ["xvcmpeqsp._3"] = "f0000618QQQ",
+ xvcvspuxds_2 = "f0000620Q-Q",
+ xvcvdpsp_2 = "f0000624Q-Q",
+ xvminsp_3 = "f0000640QQQ",
+ xvnmaddmsp_3 = "f0000648QQQ",
+ ["xvcmpgtsp._3"] = "f0000658QQQ",
+ xvcvspsxds_2 = "f0000660Q-Q",
+ xvabssp_2 = "f0000664Q-Q",
+ xvcpsgnsp_3 = "f0000680QQQ",
+ xvnmsubasp_3 = "f0000688QQQ",
+ ["xvcmpgesp._3"] = "f0000698QQQ",
+ xvcvuxdsp_2 = "f00006a0Q-Q",
+ xvnabssp_2 = "f00006a4Q-Q",
+ xvnmsubmsp_3 = "f00006c8QQQ",
+ xvcvsxdsp_2 = "f00006e0Q-Q",
+ xvnegsp_2 = "f00006e4Q-Q",
+ xvmaxdp_3 = "f0000700QQQ",
+ xvnmaddadp_3 = "f0000708QQQ",
+ ["xvcmpeqdp._3"] = "f0000718QQQ",
+ xvcvdpuxds_2 = "f0000720Q-Q",
+ xvcvspdp_2 = "f0000724Q-Q",
+ xvmindp_3 = "f0000740QQQ",
+ xvnmaddmdp_3 = "f0000748QQQ",
+ ["xvcmpgtdp._3"] = "f0000758QQQ",
+ xvcvdpsxds_2 = "f0000760Q-Q",
+ xvabsdp_2 = "f0000764Q-Q",
+ xvcpsgndp_3 = "f0000780QQQ",
+ xvnmsubadp_3 = "f0000788QQQ",
+ ["xvcmpgedp._3"] = "f0000798QQQ",
+ xvcvuxddp_2 = "f00007a0Q-Q",
+ xvnabsdp_2 = "f00007a4Q-Q",
+ xvnmsubmdp_3 = "f00007c8QQQ",
+ xvcvsxddp_2 = "f00007e0Q-Q",
+ xvnegdp_2 = "f00007e4Q-Q",
+
+ -- Primary opcode 61:
+ stfdp_2 = "f4000000F:D", -- NYI: displacement must be divisible by 4.
+
+ -- Primary opcode 62:
+ stq_2 = "f8000002R:D", -- NYI: displacement must be divisible by 8.
-- Primary opcode 63:
fdiv_3 = "fc000024FFF.",
@@ -526,8 +1086,12 @@ local map_op = {
frsp_2 = "fc000018F-F.",
fctiw_2 = "fc00001cF-F.",
fctiwz_2 = "fc00001eF-F.",
+ ftdiv_2 = "fc000100X-F.",
+ fctiwu_2 = "fc00011cF-F.",
+ fctiwuz_2 = "fc00011eF-F.",
mtfsfi_2 = "fc00010cAA", -- NYI: upshift.
fnabs_2 = "fc000110F-F.",
+ ftsqrt_2 = "fc000140X-F.",
fabs_2 = "fc000210F-F.",
frin_2 = "fc000310F-F.",
friz_2 = "fc000350F-F.",
@@ -537,7 +1101,38 @@ local map_op = {
-- NYI: mtfsf, mtfsb0, mtfsb1.
fctid_2 = "fc00065cF-F.",
fctidz_2 = "fc00065eF-F.",
+ fmrgow_3 = "fc00068cFFF",
fcfid_2 = "fc00069cF-F.",
+ fctidu_2 = "fc00075cF-F.",
+ fctiduz_2 = "fc00075eF-F.",
+ fmrgew_3 = "fc00078cFFF",
+ fcfidu_2 = "fc00079cF-F.",
+
+ daddq_3 = "fc000004F:F:F:.",
+ dquaq_4 = "fc000006F:F:F:Z.",
+ dmulq_3 = "fc000044F:F:F:.",
+ drrndq_4 = "fc000046F:F:F:Z.",
+ dscliq_3 = "fc000084F:F:6.",
+ dquaiq_4 = "fc000086SF:~F:Z.",
+ dscriq_3 = "fc0000c4F:F:6.",
+ drintxq_4 = "fc0000c61F:~F:Z.",
+ dcmpoq_3 = "fc000104XF:F:",
+ dtstexq_3 = "fc000144XF:F:",
+ dtstdcq_3 = "fc000184XF:6",
+ dtstdgq_3 = "fc0001c4XF:6",
+ drintnq_4 = "fc0001c61F:~F:Z.",
+ dctqpq_2 = "fc000204F:-F:.",
+ dctfixq_2 = "fc000244F:-F:.",
+ ddedpdq_3 = "fc000284ZF:~F:.",
+ dxexq_2 = "fc0002c4F:-F:.",
+ dsubq_3 = "fc000404F:F:F:.",
+ ddivq_3 = "fc000444F:F:F:.",
+ dcmpuq_3 = "fc000504XF:F:",
+ dtstsfq_3 = "fc000544XF:F:",
+ drdpq_2 = "fc000604F:-F:.",
+ dcffixq_2 = "fc000644F:-F:.",
+ denbcdq_3 = "fc000684YF:~F:.",
+ diexq_3 = "fc0006c4F:FF:.",
-- Primary opcode 4, SPE APU extension:
evaddw_3 = "10000200RRR",
@@ -822,7 +1417,7 @@ local map_op = {
do
local t = {}
for k,v in pairs(map_op) do
- if sub(v, -1) == "." then
+ if type(v) == "string" and sub(v, -1) == "." then
local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2)
t[sub(k, 1, -3).."."..sub(k, -2)] = v2
end
@@ -884,6 +1479,24 @@ local function parse_fpr(expr)
werror("bad register name `"..expr.."'")
end
+local function parse_vr(expr)
+ local r = match(expr, "^v([1-3]?[0-9])$")
+ if r then
+ r = tonumber(r)
+ if r <= 31 then return r end
+ end
+ werror("bad register name `"..expr.."'")
+end
+
+local function parse_vs(expr)
+ local r = match(expr, "^vs([1-6]?[0-9])$")
+ if r then
+ r = tonumber(r)
+ if r <= 63 then return r end
+ end
+ werror("bad register name `"..expr.."'")
+end
+
local function parse_cr(expr)
local r = match(expr, "^cr([0-7])$")
if r then return tonumber(r) end
@@ -900,8 +1513,30 @@ local function parse_cond(expr)
werror("bad condition bit name `"..expr.."'")
end
+local parse_ctx = {}
+
+local loadenv = setfenv and function(s)
+ local code = loadstring(s, "")
+ if code then setfenv(code, parse_ctx) end
+ return code
+end or function(s)
+ return load(s, "", nil, parse_ctx)
+end
+
+-- Try to parse simple arithmetic, too, since some basic ops are aliases.
+local function parse_number(n)
+ local x = tonumber(n)
+ if x then return x end
+ local code = loadenv("return "..n)
+ if code then
+ local ok, y = pcall(code)
+ if ok then return y end
+ end
+ return nil
+end
+
local function parse_imm(imm, bits, shift, scale, signed)
- local n = tonumber(imm)
+ local n = parse_number(imm)
if n then
local m = sar(n, scale)
if shl(m, scale) == n then
@@ -914,7 +1549,8 @@ local function parse_imm(imm, bits, shift, scale, signed)
end
end
werror("out of range immediate `"..imm.."'")
- elseif match(imm, "^r([1-3]?[0-9])$") or
+ elseif match(imm, "^[rfv]([1-3]?[0-9])$") or
+ match(imm, "^vs([1-6]?[0-9])$") or
match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
werror("expected immediate operand, got register")
else
@@ -924,11 +1560,11 @@ local function parse_imm(imm, bits, shift, scale, signed)
end
local function parse_shiftmask(imm, isshift)
- local n = tonumber(imm)
+ local n = parse_number(imm)
if n then
if shr(n, 6) == 0 then
- local lsb = band(imm, 31)
- local msb = imm - lsb
+ local lsb = band(n, 31)
+ local msb = n - lsb
return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb)
end
werror("out of range immediate `"..imm.."'")
@@ -936,7 +1572,8 @@ local function parse_shiftmask(imm, isshift)
match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then
werror("expected immediate operand, got register")
else
- werror("NYI: parameterized 64 bit shift/mask")
+ waction("IMMSH", isshift and 1 or 0, imm)
+ return 0;
end
end
@@ -1011,7 +1648,7 @@ end
------------------------------------------------------------------------------
-- Handle opcodes defined with template strings.
-map_op[".template__"] = function(params, template, nparams)
+op_template = function(params, template, nparams)
if not params then return sub(template, 9) end
local op = tonumber(sub(template, 1, 8), 16)
local n, rs = 1, 26
@@ -1027,6 +1664,15 @@ map_op[".template__"] = function(params, template, nparams)
rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1
elseif p == "F" then
rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1
+ elseif p == "V" then
+ rs = rs - 5; op = op + shl(parse_vr(params[n]), rs); n = n + 1
+ elseif p == "Q" then
+ local vs = parse_vs(params[n]); n = n + 1; rs = rs - 5
+ local sh = rs == 6 and 2 or 3 + band(shr(rs, 1), 3)
+ op = op + shl(band(vs, 31), rs) + shr(band(vs, 32), sh)
+ elseif p == "q" then
+ local vs = parse_vs(params[n]); n = n + 1
+ op = op + shl(band(vs, 31), 21) + shr(band(vs, 32), 5)
elseif p == "A" then
rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1
elseif p == "S" then
@@ -1047,6 +1693,26 @@ map_op[".template__"] = function(params, template, nparams)
rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1
elseif p == "X" then
rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1
+ elseif p == "1" then
+ rs = rs - 5; op = op + parse_imm(params[n], 1, rs, 0, false); n = n + 1
+ elseif p == "g" then
+ rs = rs - 5; op = op + parse_imm(params[n], 2, rs, 0, false); n = n + 1
+ elseif p == "3" then
+ rs = rs - 5; op = op + parse_imm(params[n], 3, rs, 0, false); n = n + 1
+ elseif p == "P" then
+ rs = rs - 5; op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
+ elseif p == "p" then
+ op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1
+ elseif p == "6" then
+ rs = rs - 6; op = op + parse_imm(params[n], 6, rs, 0, false); n = n + 1
+ elseif p == "Y" then
+ rs = rs - 5; op = op + parse_imm(params[n], 1, rs+4, 0, false); n = n + 1
+ elseif p == "y" then
+ rs = rs - 5; op = op + parse_imm(params[n], 1, rs+3, 0, false); n = n + 1
+ elseif p == "Z" then
+ rs = rs - 5; op = op + parse_imm(params[n], 2, rs+3, 0, false); n = n + 1
+ elseif p == "z" then
+ rs = rs - 5; op = op + parse_imm(params[n], 2, rs+2, 0, false); n = n + 1
elseif p == "W" then
op = op + parse_cr(params[n]); n = n + 1
elseif p == "G" then
@@ -1071,6 +1737,8 @@ map_op[".template__"] = function(params, template, nparams)
local lo = band(op, mm)
local hi = band(op, shl(mm, 5))
op = op - lo - hi + shl(lo, 5) + shr(hi, 5)
+ elseif p == ":" then
+ if band(shr(op, rs), 1) ~= 0 then werror("register pair expected") end
elseif p == "-" then
rs = rs - 5
elseif p == "." then
@@ -1082,6 +1750,8 @@ map_op[".template__"] = function(params, template, nparams)
wputpos(pos, op)
end
+map_op[".template__"] = op_template
+
------------------------------------------------------------------------------
-- Pseudo-opcode to mark the position where the action list is to be emitted.
diff --git a/dynasm/dasm_proto.h b/dynasm/dasm_proto.h
index a16fadcc..d798554b 100644
--- a/dynasm/dasm_proto.h
+++ b/dynasm/dasm_proto.h
@@ -10,8 +10,8 @@
#include <stddef.h>
#include <stdarg.h>
-#define DASM_IDENT "DynASM 1.3.0"
-#define DASM_VERSION 10300 /* 1.3.0 */
+#define DASM_IDENT "DynASM 1.5.0"
+#define DASM_VERSION 10500 /* 1.5.0 */
#ifndef Dst_DECL
#define Dst_DECL dasm_State **Dst
diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h
index c3089d91..f0327302 100644
--- a/dynasm/dasm_x86.h
+++ b/dynasm/dasm_x86.h
@@ -170,7 +170,7 @@ void dasm_put(Dst_DECL, int start, ...)
dasm_State *D = Dst_REF;
dasm_ActList p = D->actionlist + start;
dasm_Section *sec = D->section;
- int pos = sec->pos, ofs = sec->ofs, mrm = 4;
+ int pos = sec->pos, ofs = sec->ofs, mrm = -1;
int *b;
if (pos >= sec->epos) {
@@ -193,7 +193,7 @@ void dasm_put(Dst_DECL, int start, ...)
b[pos++] = n;
switch (action) {
case DASM_DISP:
- if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; }
+ if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
/* fallthrough */
case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */
case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
@@ -204,11 +204,17 @@ void dasm_put(Dst_DECL, int start, ...)
case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
case DASM_SPACE: p++; ofs += n; break;
case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
- case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG);
- if (*p++ == 1 && *p == DASM_DISP) mrm = n;
+ case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG);
+ if (*p < 0x40 && p[1] == DASM_DISP) mrm = n;
+ if (*p < 0x20 && (n&7) == 4) ofs++;
+ switch ((*p++ >> 3) & 3) {
+ case 3: n |= b[pos-3]; /* fallthrough */
+ case 2: n |= b[pos-2]; /* fallthrough */
+ case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; }
+ }
continue;
}
- mrm = 4;
+ mrm = -1;
} else {
int *pl, n;
switch (action) {
@@ -233,8 +239,11 @@ void dasm_put(Dst_DECL, int start, ...)
}
pos++;
ofs += 4; /* Maximum offset needed. */
- if (action == DASM_REL_LG || action == DASM_REL_PC)
+ if (action == DASM_REL_LG || action == DASM_REL_PC) {
b[pos++] = ofs; /* Store pass1 offset estimate. */
+ } else if (sizeof(ptrdiff_t) == 8) {
+ ofs += 4;
+ }
break;
case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel;
case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC);
@@ -359,10 +368,22 @@ int dasm_link(Dst_DECL, size_t *szp)
do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0)
#define dasmd(x) \
do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0)
+#define dasmq(x) \
+ do { *((unsigned long long *)cp) = (unsigned long long)(x); cp+=8; } while (0)
#else
#define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0)
#define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0)
+#define dasmq(x) do { dasmd(x); dasmd((x)>>32); } while (0)
#endif
+static unsigned char *dasma_(unsigned char *cp, ptrdiff_t x)
+{
+ if (sizeof(ptrdiff_t) == 8)
+ dasmq((unsigned long long)x);
+ else
+ dasmd((unsigned int)x);
+ return cp;
+}
+#define dasma(x) (cp = dasma_(cp, (x)))
/* Pass 3: Encode sections. */
int dasm_encode(Dst_DECL, void *buffer)
@@ -402,7 +423,27 @@ int dasm_encode(Dst_DECL, void *buffer)
case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL;
/* fallthrough */
case DASM_IMM_W: dasmw(n); break;
- case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; }
+ case DASM_VREG: {
+ int t = *p++;
+ unsigned char *ex = cp - (t&7);
+ if ((n & 8) && t < 0xa0) {
+ if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6);
+ n &= 7;
+ } else if (n & 0x10) {
+ if (*ex & 0x80) {
+ *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2;
+ }
+ while (++ex < cp) ex[-1] = *ex;
+ if (mark) mark--;
+ cp--;
+ n &= 7;
+ }
+ if (t >= 0xc0) n <<= 4;
+ else if (t >= 0x40) n <<= 3;
+ else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; }
+ cp[-1] ^= n;
+ break;
+ }
case DASM_REL_LG: p++; if (n >= 0) goto rel_pc;
b++; n = (int)(ptrdiff_t)D->globals[-n];
/* fallthrough */
@@ -417,12 +458,13 @@ int dasm_encode(Dst_DECL, void *buffer)
goto wb;
}
case DASM_IMM_LG:
- p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; }
+ p++;
+ if (n < 0) { dasma((ptrdiff_t)D->globals[-n]); break; }
/* fallthrough */
case DASM_IMM_PC: {
int *pb = DASM_POS2PTR(D, n);
- n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base);
- goto wd;
+ dasma(*pb < 0 ? (ptrdiff_t)pb[1] : (*pb + (ptrdiff_t)base));
+ break;
}
case DASM_LABEL_LG: {
int idx = *p++;
diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
index 24c07f37..b442cd0d 100644
--- a/dynasm/dasm_x86.lua
+++ b/dynasm/dasm_x86.lua
@@ -11,9 +11,9 @@ local x64 = x64
local _info = {
arch = x64 and "x64" or "x86",
description = "DynASM x86/x64 module",
- version = "1.3.0",
- vernum = 10300,
- release = "2011-05-05",
+ version = "1.5.0",
+ vernum = 10500,
+ release = "2021-05-02",
author = "Mike Pall",
license = "MIT",
}
@@ -27,9 +27,9 @@ local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatabl
local _s = string
local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
-local concat, sort = table.concat, table.sort
+local concat, sort, remove = table.concat, table.sort, table.remove
local bit = bit or require("bit")
-local band, shl, shr = bit.band, bit.lshift, bit.rshift
+local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift
-- Inherited tables and callbacks.
local g_opt, g_arch
@@ -41,7 +41,7 @@ local action_names = {
-- int arg, 1 buffer pos:
"DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
-- action arg (1 byte), int arg, 1 buffer pos (reg/num):
- "VREG", "SPACE", -- !x64: VREG support NYI.
+ "VREG", "SPACE",
-- ptrdiff_t arg, 1 buffer pos (address): !x64
"SETLABEL", "REL_A",
-- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
@@ -83,6 +83,21 @@ local actargs = { 0 }
-- Current number of section buffer positions for dasm_put().
local secpos = 1
+-- VREG kind encodings, pre-shifted by 5 bits.
+local map_vreg = {
+ ["modrm.rm.m"] = 0x00,
+ ["modrm.rm.r"] = 0x20,
+ ["opcode"] = 0x20,
+ ["sib.base"] = 0x20,
+ ["sib.index"] = 0x40,
+ ["modrm.reg"] = 0x80,
+ ["vex.v"] = 0xa0,
+ ["imm.hi"] = 0xc0,
+}
+
+-- Current number of VREG actions contributing to REX/VEX shrinkage.
+local vreg_shrink_count = 0
+
------------------------------------------------------------------------------
-- Compute action numbers for action names.
@@ -134,6 +149,21 @@ local function waction(action, a, num)
if a or num then secpos = secpos + (num or 1) end
end
+-- Optionally add a VREG action.
+local function wvreg(kind, vreg, psz, sk, defer)
+ if not vreg then return end
+ waction("VREG", vreg)
+ local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'")
+ if b < (sk or 0) then
+ vreg_shrink_count = vreg_shrink_count + 1
+ end
+ if not defer then
+ b = b + vreg_shrink_count * 8
+ vreg_shrink_count = 0
+ end
+ wputxb(b + (psz or 0))
+end
+
-- Add call to embedded DynASM C code.
local function wcall(func, args)
wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
@@ -299,7 +329,7 @@ local function mkrmap(sz, cl, names)
local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
if needrex then map_reg_needrex[iname] = true end
local name
- if sz == "o" then name = format("xmm%d", i)
+ if sz == "o" or sz == "y" then name = format("%s%d", cl, i)
elseif sz == "f" then name = format("st%d", i)
else name = format("r%d%s", i, sz == addrsize and "" or sz) end
map_archdef[name] = iname
@@ -326,6 +356,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
map_reg_valid_index[map_archdef.esp] = false
if x64 then map_reg_valid_index[map_archdef.rsp] = false end
+if x64 then map_reg_needrex[map_archdef.Rb] = true end
map_archdef["Ra"] = "@"..addrsize
-- FP registers (internally tword sized, but use "f" as operand size).
@@ -334,21 +365,24 @@ mkrmap("f", "Rf")
-- SSE registers (oword sized, but qword and dword accessible).
mkrmap("o", "xmm")
+-- AVX registers (yword sized, but oword, qword and dword accessible).
+mkrmap("y", "ymm")
+
-- Operand size prefixes to codes.
local map_opsize = {
- byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t",
- aword = addrsize,
+ byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y",
+ tword = "t", aword = addrsize,
}
-- Operand size code to number.
local map_opsizenum = {
- b = 1, w = 2, d = 4, q = 8, o = 16, t = 10,
+ b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10,
}
-- Operand size code to name.
local map_opsizename = {
- b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword",
- f = "fpword",
+ b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword",
+ t = "tword", f = "fpword",
}
-- Valid index register scale factors.
@@ -450,6 +484,22 @@ local function wputdarg(n)
end
end
+-- Put signed or unsigned qword or arg.
+local function wputqarg(n)
+ local tn = type(n)
+ if tn == "number" then -- This is only used for numbers from -2^31..2^32-1.
+ wputb(band(n, 255))
+ wputb(band(shr(n, 8), 255))
+ wputb(band(shr(n, 16), 255))
+ wputb(shr(n, 24))
+ local sign = n < 0 and 255 or 0
+ wputb(sign); wputb(sign); wputb(sign); wputb(sign)
+ else
+ waction("IMM_D", format("(unsigned int)(%s)", n))
+ waction("IMM_D", format("(unsigned int)((unsigned long long)(%s)>>32)", n))
+ end
+end
+
-- Put operand-size dependent number or arg (defaults to dword).
local function wputszarg(sz, n)
if not sz or sz == "d" or sz == "q" then wputdarg(n)
@@ -460,9 +510,45 @@ local function wputszarg(sz, n)
end
-- Put multi-byte opcode with operand-size dependent modifications.
-local function wputop(sz, op, rex)
+local function wputop(sz, op, rex, vex, vregr, vregxb)
+ local psz, sk = 0, nil
+ if vex then
+ local tail
+ if vex.m == 1 and band(rex, 11) == 0 then
+ if x64 and vregxb then
+ sk = map_vreg["modrm.reg"]
+ else
+ wputb(0xc5)
+ tail = shl(bxor(band(rex, 4), 4), 5)
+ psz = 3
+ end
+ end
+ if not tail then
+ wputb(0xc4)
+ wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
+ tail = shl(band(rex, 8), 4)
+ psz = 4
+ end
+ local reg, vreg = 0, nil
+ if vex.v then
+ reg = vex.v.reg
+ if not reg then werror("bad vex operand") end
+ if reg < 0 then reg = 0; vreg = vex.v.vreg end
+ end
+ if sz == "y" or vex.l then tail = tail + 4 end
+ wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
+ wvreg("vex.v", vreg)
+ rex = 0
+ if op >= 256 then werror("bad vex opcode") end
+ else
+ if rex ~= 0 then
+ if not x64 then werror("bad operand size") end
+ elseif (vregr or vregxb) and x64 then
+ rex = 0x10
+ sk = map_vreg["vex.v"]
+ end
+ end
local r
- if rex ~= 0 and not x64 then werror("bad operand size") end
if sz == "w" then wputb(102) end
-- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
@@ -471,20 +557,20 @@ local function wputop(sz, op, rex)
if rex ~= 0 then
local opc3 = band(op, 0xffff00)
if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
- wputb(64 + band(rex, 15)); rex = 0
+ wputb(64 + band(rex, 15)); rex = 0; psz = 2
end
end
- wputb(shr(op, 16)); op = band(op, 0xffff)
+ wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1
end
if op >= 256 then
local b = shr(op, 8)
- if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end
- wputb(b)
- op = band(op, 255)
+ if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end
+ wputb(b); op = band(op, 255); psz = psz + 1
end
- if rex ~= 0 then wputb(64 + band(rex, 15)) end
+ if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end
if sz == "b" then op = op - 1 end
wputb(op)
+ return psz, sk
end
-- Put ModRM or SIB formatted byte.
@@ -494,7 +580,7 @@ local function wputmodrm(m, s, rm, vs, vrm)
end
-- Put ModRM/SIB plus optional displacement.
-local function wputmrmsib(t, imark, s, vsreg)
+local function wputmrmsib(t, imark, s, vsreg, psz, sk)
local vreg, vxreg
local reg, xreg = t.reg, t.xreg
if reg and reg < 0 then reg = 0; vreg = t.vreg end
@@ -504,8 +590,8 @@ local function wputmrmsib(t, imark, s, vsreg)
-- Register mode.
if sub(t.mode, 1, 1) == "r" then
wputmodrm(3, s, reg)
- if vsreg then waction("VREG", vsreg); wputxb(2) end
- if vreg then waction("VREG", vreg); wputxb(0) end
+ wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
+ wvreg("modrm.rm.r", vreg, psz+1, sk)
return
end
@@ -519,21 +605,22 @@ local function wputmrmsib(t, imark, s, vsreg)
-- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
wputmodrm(0, s, 4)
if imark == "I" then waction("MARK") end
- if vsreg then waction("VREG", vsreg); wputxb(2) end
+ wvreg("modrm.reg", vsreg, psz+1, sk, vxreg)
wputmodrm(t.xsc, xreg, 5)
- if vxreg then waction("VREG", vxreg); wputxb(3) end
+ wvreg("sib.index", vxreg, psz+2, sk)
else
-- Pure 32 bit displacement.
if x64 and tdisp ~= "table" then
wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
+ wvreg("modrm.reg", vsreg, psz+1, sk)
if imark == "I" then waction("MARK") end
wputmodrm(0, 4, 5)
else
riprel = x64
wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
+ wvreg("modrm.reg", vsreg, psz+1, sk)
if imark == "I" then waction("MARK") end
end
- if vsreg then waction("VREG", vsreg); wputxb(2) end
end
if riprel then -- Emit rip-relative displacement.
if match("UWSiI", imark) then
@@ -561,16 +648,16 @@ local function wputmrmsib(t, imark, s, vsreg)
if xreg or band(reg, 7) == 4 then
wputmodrm(m or 2, s, 4) -- ModRM.
if m == nil or imark == "I" then waction("MARK") end
- if vsreg then waction("VREG", vsreg); wputxb(2) end
+ wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg)
wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
- if vxreg then waction("VREG", vxreg); wputxb(3) end
- if vreg then waction("VREG", vreg); wputxb(1) end
+ wvreg("sib.index", vxreg, psz+2, sk, vreg)
+ wvreg("sib.base", vreg, psz+2, sk)
else
wputmodrm(m or 2, s, reg) -- ModRM.
if (imark == "I" and (m == 1 or m == 2)) or
(m == nil and (vsreg or vreg)) then waction("MARK") end
- if vsreg then waction("VREG", vsreg); wputxb(2) end
- if vreg then waction("VREG", vreg); wputxb(1) end
+ wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
+ wvreg("modrm.rm.m", vreg, psz+1, sk)
end
-- Put displacement.
@@ -592,10 +679,16 @@ local function opmodestr(op, args)
end
-- Convert number to valid integer or nil.
-local function toint(expr)
+local function toint(expr, isqword)
local n = tonumber(expr)
if n then
- if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then
+ if n % 1 ~= 0 then
+ werror("not an integer number `"..expr.."'")
+ elseif isqword then
+ if n < -2147483648 or n > 2147483647 then
+ n = nil -- Handle it as an expression to avoid precision loss.
+ end
+ elseif n < -2147483648 or n > 4294967295 then
werror("bad integer number `"..expr.."'")
end
return n
@@ -678,7 +771,7 @@ local function rtexpr(expr)
end
-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
-local function parseoperand(param)
+local function parseoperand(param, isqword)
local t = {}
local expr = param
@@ -766,7 +859,7 @@ local function parseoperand(param)
t.disp = dispexpr(tailx)
else
-- imm or opsize*imm
- local imm = toint(expr)
+ local imm = toint(expr, isqword)
if not imm and sub(expr, 1, 1) == "*" and t.opsize then
imm = toint(sub(expr, 2))
if imm then
@@ -881,9 +974,16 @@ end
-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
-- The spare 3 bits are either filled with the last hex digit or
-- the result from a previous "r"/"R". The opcode is restored.
+-- "u" Use VEX encoding, vvvv unused.
+-- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is
+-- removed from the list used by future characters).
+-- "w" Use VEX encoding, vvvv from 3rd operand.
+-- "L" Force VEX.L
--
-- All of the following characters force a flush of the opcode:
-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand.
+-- "s" stores a 4 bit immediate from the last register operand,
+-- followed by 4 zero bits.
-- "S" stores a signed 8 bit immediate from the last operand.
-- "U" stores an unsigned 8 bit immediate from the last operand.
-- "W" stores an unsigned 16 bit immediate from the last operand.
@@ -1226,46 +1326,14 @@ local map_op = {
movups_2 = "rmo:0F10rM|mro:0F11Rm",
orpd_2 = "rmo:660F56rM",
orps_2 = "rmo:0F56rM",
- packssdw_2 = "rmo:660F6BrM",
- packsswb_2 = "rmo:660F63rM",
- packuswb_2 = "rmo:660F67rM",
- paddb_2 = "rmo:660FFCrM",
- paddd_2 = "rmo:660FFErM",
- paddq_2 = "rmo:660FD4rM",
- paddsb_2 = "rmo:660FECrM",
- paddsw_2 = "rmo:660FEDrM",
- paddusb_2 = "rmo:660FDCrM",
- paddusw_2 = "rmo:660FDDrM",
- paddw_2 = "rmo:660FFDrM",
- pand_2 = "rmo:660FDBrM",
- pandn_2 = "rmo:660FDFrM",
pause_0 = "F390",
- pavgb_2 = "rmo:660FE0rM",
- pavgw_2 = "rmo:660FE3rM",
- pcmpeqb_2 = "rmo:660F74rM",
- pcmpeqd_2 = "rmo:660F76rM",
- pcmpeqw_2 = "rmo:660F75rM",
- pcmpgtb_2 = "rmo:660F64rM",
- pcmpgtd_2 = "rmo:660F66rM",
- pcmpgtw_2 = "rmo:660F65rM",
pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:",
- pmaddwd_2 = "rmo:660FF5rM",
- pmaxsw_2 = "rmo:660FEErM",
- pmaxub_2 = "rmo:660FDErM",
- pminsw_2 = "rmo:660FEArM",
- pminub_2 = "rmo:660FDArM",
pmovmskb_2 = "rr/do:660FD7rM",
- pmulhuw_2 = "rmo:660FE4rM",
- pmulhw_2 = "rmo:660FE5rM",
- pmullw_2 = "rmo:660FD5rM",
- pmuludq_2 = "rmo:660FF4rM",
- por_2 = "rmo:660FEBrM",
prefetchnta_1 = "xb:n0F180m",
prefetcht0_1 = "xb:n0F181m",
prefetcht1_1 = "xb:n0F182m",
prefetcht2_1 = "xb:n0F183m",
- psadbw_2 = "rmo:660FF6rM",
pshufd_3 = "rmio:660F70rMU",
pshufhw_3 = "rmio:F30F70rMU",
pshuflw_3 = "rmio:F20F70rMU",
@@ -1279,23 +1347,6 @@ local map_op = {
psrldq_2 = "rio:660F733mU",
psrlq_2 = "rmo:660FD3rM|rio:660F732mU",
psrlw_2 = "rmo:660FD1rM|rio:660F712mU",
- psubb_2 = "rmo:660FF8rM",
- psubd_2 = "rmo:660FFArM",
- psubq_2 = "rmo:660FFBrM",
- psubsb_2 = "rmo:660FE8rM",
- psubsw_2 = "rmo:660FE9rM",
- psubusb_2 = "rmo:660FD8rM",
- psubusw_2 = "rmo:660FD9rM",
- psubw_2 = "rmo:660FF9rM",
- punpckhbw_2 = "rmo:660F68rM",
- punpckhdq_2 = "rmo:660F6ArM",
- punpckhqdq_2 = "rmo:660F6DrM",
- punpckhwd_2 = "rmo:660F69rM",
- punpcklbw_2 = "rmo:660F60rM",
- punpckldq_2 = "rmo:660F62rM",
- punpcklqdq_2 = "rmo:660F6CrM",
- punpcklwd_2 = "rmo:660F61rM",
- pxor_2 = "rmo:660FEFrM",
rcpps_2 = "rmo:0F53rM",
rcpss_2 = "rro:F30F53rM|rx/od:",
rsqrtps_2 = "rmo:0F52rM",
@@ -1413,6 +1464,327 @@ local map_op = {
movntsd_2 = "xr/qo:nF20F2BRm",
movntss_2 = "xr/do:F30F2BRm",
-- popcnt is also in SSE4.2
+
+ -- AES-NI
+ aesdec_2 = "rmo:660F38DErM",
+ aesdeclast_2 = "rmo:660F38DFrM",
+ aesenc_2 = "rmo:660F38DCrM",
+ aesenclast_2 = "rmo:660F38DDrM",
+ aesimc_2 = "rmo:660F38DBrM",
+ aeskeygenassist_3 = "rmio:660F3ADFrMU",
+ pclmulqdq_3 = "rmio:660F3A44rMU",
+
+ -- AVX FP ops
+ vaddsubpd_3 = "rrmoy:660FVD0rM",
+ vaddsubps_3 = "rrmoy:F20FVD0rM",
+ vandpd_3 = "rrmoy:660FV54rM",
+ vandps_3 = "rrmoy:0FV54rM",
+ vandnpd_3 = "rrmoy:660FV55rM",
+ vandnps_3 = "rrmoy:0FV55rM",
+ vblendpd_4 = "rrmioy:660F3AV0DrMU",
+ vblendps_4 = "rrmioy:660F3AV0CrMU",
+ vblendvpd_4 = "rrmroy:660F3AV4BrMs",
+ vblendvps_4 = "rrmroy:660F3AV4ArMs",
+ vbroadcastf128_2 = "rx/yo:660F38u1ArM",
+ vcmppd_4 = "rrmioy:660FVC2rMU",
+ vcmpps_4 = "rrmioy:0FVC2rMU",
+ vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:",
+ vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:",
+ vcomisd_2 = "rro:660Fu2FrM|rx/oq:",
+ vcomiss_2 = "rro:0Fu2FrM|rx/od:",
+ vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:",
+ vcvtdq2ps_2 = "rmoy:0Fu5BrM",
+ vcvtpd2dq_2 = "rmoy:F20FuE6rM",
+ vcvtpd2ps_2 = "rmoy:660Fu5ArM",
+ vcvtps2dq_2 = "rmoy:660Fu5BrM",
+ vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:",
+ vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:",
+ vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:",
+ vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM",
+ vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM",
+ vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:",
+ vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:",
+ vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM",
+ vcvttps2dq_2 = "rmoy:F30Fu5BrM",
+ vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:",
+ vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:",
+ vdppd_4 = "rrmio:660F3AV41rMU",
+ vdpps_4 = "rrmioy:660F3AV40rMU",
+ vextractf128_3 = "mri/oy:660F3AuL19RmU",
+ vextractps_3 = "mri/do:660F3Au17RmU",
+ vhaddpd_3 = "rrmoy:660FV7CrM",
+ vhaddps_3 = "rrmoy:F20FV7CrM",
+ vhsubpd_3 = "rrmoy:660FV7DrM",
+ vhsubps_3 = "rrmoy:F20FV7DrM",
+ vinsertf128_4 = "rrmi/yyo:660F3AV18rMU",
+ vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:",
+ vldmxcsr_1 = "xd:0FuAE2m",
+ vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm",
+ vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm",
+ vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm",
+ vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm",
+ vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:",
+ vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm",
+ vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:",
+ vmovhlps_3 = "rrro:0FV12rM",
+ vmovhpd_2 = "xr/qo:660Fu17Rm",
+ vmovhpd_3 = "rrx/ooq:660FV16rM",
+ vmovhps_2 = "xr/qo:0Fu17Rm",
+ vmovhps_3 = "rrx/ooq:0FV16rM",
+ vmovlhps_3 = "rrro:0FV16rM",
+ vmovlpd_2 = "xr/qo:660Fu13Rm",
+ vmovlpd_3 = "rrx/ooq:660FV12rM",
+ vmovlps_2 = "xr/qo:0Fu13Rm",
+ vmovlps_3 = "rrx/ooq:0FV12rM",
+ vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM",
+ vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM",
+ vmovntpd_2 = "xroy:660Fu2BRm",
+ vmovntps_2 = "xroy:0Fu2BRm",
+ vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm",
+ vmovsd_3 = "rrro:F20FV10rM",
+ vmovshdup_2 = "rmoy:F30Fu16rM",
+ vmovsldup_2 = "rmoy:F30Fu12rM",
+ vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm",
+ vmovss_3 = "rrro:F30FV10rM",
+ vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm",
+ vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm",
+ vorpd_3 = "rrmoy:660FV56rM",
+ vorps_3 = "rrmoy:0FV56rM",
+ vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU",
+ vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU",
+ vperm2f128_4 = "rrmiy:660F3AV06rMU",
+ vptestpd_2 = "rmoy:660F38u0FrM",
+ vptestps_2 = "rmoy:660F38u0ErM",
+ vrcpps_2 = "rmoy:0Fu53rM",
+ vrcpss_3 = "rrro:F30FV53rM|rrx/ood:",
+ vrsqrtps_2 = "rmoy:0Fu52rM",
+ vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:",
+ vroundpd_3 = "rmioy:660F3Au09rMU",
+ vroundps_3 = "rmioy:660F3Au08rMU",
+ vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:",
+ vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:",
+ vshufpd_4 = "rrmioy:660FVC6rMU",
+ vshufps_4 = "rrmioy:0FVC6rMU",
+ vsqrtps_2 = "rmoy:0Fu51rM",
+ vsqrtss_2 = "rro:F30Fu51rM|rx/od:",
+ vsqrtpd_2 = "rmoy:660Fu51rM",
+ vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:",
+ vstmxcsr_1 = "xd:0FuAE3m",
+ vucomisd_2 = "rro:660Fu2ErM|rx/oq:",
+ vucomiss_2 = "rro:0Fu2ErM|rx/od:",
+ vunpckhpd_3 = "rrmoy:660FV15rM",
+ vunpckhps_3 = "rrmoy:0FV15rM",
+ vunpcklpd_3 = "rrmoy:660FV14rM",
+ vunpcklps_3 = "rrmoy:0FV14rM",
+ vxorpd_3 = "rrmoy:660FV57rM",
+ vxorps_3 = "rrmoy:0FV57rM",
+ vzeroall_0 = "0FuL77",
+ vzeroupper_0 = "0Fu77",
+
+ -- AVX2 FP ops
+ vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:",
+ vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:",
+ -- *vgather* (!vsib)
+ vpermpd_3 = "rmiy:660F3AuX01rMU",
+ vpermps_3 = "rrmy:660F38V16rM",
+
+ -- AVX, AVX2 integer ops
+ -- In general, xmm requires AVX, ymm requires AVX2.
+ vaesdec_3 = "rrmo:660F38VDErM",
+ vaesdeclast_3 = "rrmo:660F38VDFrM",
+ vaesenc_3 = "rrmo:660F38VDCrM",
+ vaesenclast_3 = "rrmo:660F38VDDrM",
+ vaesimc_2 = "rmo:660F38uDBrM",
+ vaeskeygenassist_3 = "rmio:660F3AuDFrMU",
+ vlddqu_2 = "rxoy:F20FuF0rM",
+ vmaskmovdqu_2 = "rro:660FuF7rM",
+ vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm",
+ vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm",
+ vmovntdq_2 = "xroy:660FuE7Rm",
+ vmovntdqa_2 = "rxoy:660F38u2ArM",
+ vmpsadbw_4 = "rrmioy:660F3AV42rMU",
+ vpabsb_2 = "rmoy:660F38u1CrM",
+ vpabsd_2 = "rmoy:660F38u1ErM",
+ vpabsw_2 = "rmoy:660F38u1DrM",
+ vpackusdw_3 = "rrmoy:660F38V2BrM",
+ vpalignr_4 = "rrmioy:660F3AV0FrMU",
+ vpblendvb_4 = "rrmroy:660F3AV4CrMs",
+ vpblendw_4 = "rrmioy:660F3AV0ErMU",
+ vpclmulqdq_4 = "rrmio:660F3AV44rMU",
+ vpcmpeqq_3 = "rrmoy:660F38V29rM",
+ vpcmpestri_3 = "rmio:660F3Au61rMU",
+ vpcmpestrm_3 = "rmio:660F3Au60rMU",
+ vpcmpgtq_3 = "rrmoy:660F38V37rM",
+ vpcmpistri_3 = "rmio:660F3Au63rMU",
+ vpcmpistrm_3 = "rmio:660F3Au62rMU",
+ vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:",
+ vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU",
+ vpextrd_3 = "mri/do:660F3Au16RmU",
+ vpextrq_3 = "mri/qo:660F3Au16RmU",
+ vphaddw_3 = "rrmoy:660F38V01rM",
+ vphaddd_3 = "rrmoy:660F38V02rM",
+ vphaddsw_3 = "rrmoy:660F38V03rM",
+ vphminposuw_2 = "rmo:660F38u41rM",
+ vphsubw_3 = "rrmoy:660F38V05rM",
+ vphsubd_3 = "rrmoy:660F38V06rM",
+ vphsubsw_3 = "rrmoy:660F38V07rM",
+ vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:",
+ vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:",
+ vpinsrd_4 = "rrmi/ood:660F3AV22rMU",
+ vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU",
+ vpmaddubsw_3 = "rrmoy:660F38V04rM",
+ vpmaxsb_3 = "rrmoy:660F38V3CrM",
+ vpmaxsd_3 = "rrmoy:660F38V3DrM",
+ vpmaxuw_3 = "rrmoy:660F38V3ErM",
+ vpmaxud_3 = "rrmoy:660F38V3FrM",
+ vpminsb_3 = "rrmoy:660F38V38rM",
+ vpminsd_3 = "rrmoy:660F38V39rM",
+ vpminuw_3 = "rrmoy:660F38V3ArM",
+ vpminud_3 = "rrmoy:660F38V3BrM",
+ vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM",
+ vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:",
+ vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:",
+ vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:",
+ vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:",
+ vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:",
+ vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:",
+ vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:",
+ vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:",
+ vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:",
+ vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:",
+ vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:",
+ vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:",
+ vpmuldq_3 = "rrmoy:660F38V28rM",
+ vpmulhrsw_3 = "rrmoy:660F38V0BrM",
+ vpmulld_3 = "rrmoy:660F38V40rM",
+ vpshufb_3 = "rrmoy:660F38V00rM",
+ vpshufd_3 = "rmioy:660Fu70rMU",
+ vpshufhw_3 = "rmioy:F30Fu70rMU",
+ vpshuflw_3 = "rmioy:F20Fu70rMU",
+ vpsignb_3 = "rrmoy:660F38V08rM",
+ vpsignw_3 = "rrmoy:660F38V09rM",
+ vpsignd_3 = "rrmoy:660F38V0ArM",
+ vpslldq_3 = "rrioy:660Fv737mU",
+ vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU",
+ vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU",
+ vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU",
+ vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU",
+ vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU",
+ vpsrldq_3 = "rrioy:660Fv733mU",
+ vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU",
+ vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU",
+ vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU",
+ vptest_2 = "rmoy:660F38u17rM",
+
+ -- AVX2 integer ops
+ vbroadcasti128_2 = "rx/yo:660F38u5ArM",
+ vinserti128_4 = "rrmi/yyo:660F3AV38rMU",
+ vextracti128_3 = "mri/oy:660F3AuL39RmU",
+ vpblendd_4 = "rrmioy:660F3AV02rMU",
+ vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:",
+ vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:",
+ vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:",
+ vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:",
+ vpermd_3 = "rrmy:660F38V36rM",
+ vpermq_3 = "rmiy:660F3AuX00rMU",
+ -- *vpgather* (!vsib)
+ vperm2i128_4 = "rrmiy:660F3AV46rMU",
+ vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm",
+ vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm",
+ vpsllvd_3 = "rrmoy:660F38V47rM",
+ vpsllvq_3 = "rrmoy:660F38VX47rM",
+ vpsravd_3 = "rrmoy:660F38V46rM",
+ vpsrlvd_3 = "rrmoy:660F38V45rM",
+ vpsrlvq_3 = "rrmoy:660F38VX45rM",
+
+ -- Intel ADX
+ adcx_2 = "rmqd:660F38F6rM",
+ adox_2 = "rmqd:F30F38F6rM",
+
+ -- BMI1
+ andn_3 = "rrmqd:0F38VF2rM",
+ bextr_3 = "rmrqd:0F38wF7rM",
+ blsi_2 = "rmqd:0F38vF33m",
+ blsmsk_2 = "rmqd:0F38vF32m",
+ blsr_2 = "rmqd:0F38vF31m",
+ tzcnt_2 = "rmqdw:F30FBCrM",
+
+ -- BMI2
+ bzhi_3 = "rmrqd:0F38wF5rM",
+ mulx_3 = "rrmqd:F20F38VF6rM",
+ pdep_3 = "rrmqd:F20F38VF5rM",
+ pext_3 = "rrmqd:F30F38VF5rM",
+ rorx_3 = "rmSqd:F20F3AuF0rMS",
+ sarx_3 = "rmrqd:F30F38wF7rM",
+ shrx_3 = "rmrqd:F20F38wF7rM",
+ shlx_3 = "rmrqd:660F38wF7rM",
+
+ -- FMA3
+ vfmaddsub132pd_3 = "rrmoy:660F38VX96rM",
+ vfmaddsub132ps_3 = "rrmoy:660F38V96rM",
+ vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM",
+ vfmaddsub213ps_3 = "rrmoy:660F38VA6rM",
+ vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM",
+ vfmaddsub231ps_3 = "rrmoy:660F38VB6rM",
+
+ vfmsubadd132pd_3 = "rrmoy:660F38VX97rM",
+ vfmsubadd132ps_3 = "rrmoy:660F38V97rM",
+ vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM",
+ vfmsubadd213ps_3 = "rrmoy:660F38VA7rM",
+ vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM",
+ vfmsubadd231ps_3 = "rrmoy:660F38VB7rM",
+
+ vfmadd132pd_3 = "rrmoy:660F38VX98rM",
+ vfmadd132ps_3 = "rrmoy:660F38V98rM",
+ vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:",
+ vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:",
+ vfmadd213pd_3 = "rrmoy:660F38VXA8rM",
+ vfmadd213ps_3 = "rrmoy:660F38VA8rM",
+ vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:",
+ vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:",
+ vfmadd231pd_3 = "rrmoy:660F38VXB8rM",
+ vfmadd231ps_3 = "rrmoy:660F38VB8rM",
+ vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:",
+ vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:",
+
+ vfmsub132pd_3 = "rrmoy:660F38VX9ArM",
+ vfmsub132ps_3 = "rrmoy:660F38V9ArM",
+ vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:",
+ vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:",
+ vfmsub213pd_3 = "rrmoy:660F38VXAArM",
+ vfmsub213ps_3 = "rrmoy:660F38VAArM",
+ vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:",
+ vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:",
+ vfmsub231pd_3 = "rrmoy:660F38VXBArM",
+ vfmsub231ps_3 = "rrmoy:660F38VBArM",
+ vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:",
+ vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:",
+
+ vfnmadd132pd_3 = "rrmoy:660F38VX9CrM",
+ vfnmadd132ps_3 = "rrmoy:660F38V9CrM",
+ vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:",
+ vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:",
+ vfnmadd213pd_3 = "rrmoy:660F38VXACrM",
+ vfnmadd213ps_3 = "rrmoy:660F38VACrM",
+ vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:",
+ vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:",
+ vfnmadd231pd_3 = "rrmoy:660F38VXBCrM",
+ vfnmadd231ps_3 = "rrmoy:660F38VBCrM",
+ vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:",
+ vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:",
+
+ vfnmsub132pd_3 = "rrmoy:660F38VX9ErM",
+ vfnmsub132ps_3 = "rrmoy:660F38V9ErM",
+ vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:",
+ vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:",
+ vfnmsub213pd_3 = "rrmoy:660F38VXAErM",
+ vfnmsub213ps_3 = "rrmoy:660F38VAErM",
+ vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:",
+ vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:",
+ vfnmsub231pd_3 = "rrmoy:660F38VXBErM",
+ vfnmsub231ps_3 = "rrmoy:660F38VBErM",
+ vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:",
+ vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:",
}
------------------------------------------------------------------------------
@@ -1463,28 +1835,58 @@ for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
end
--- SSE FP arithmetic ops.
+-- SSE / AVX FP arithmetic ops.
for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
sub = 12, min = 13, div = 14, max = 15 } do
map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
+ if n ~= 1 then
+ map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n)
+ map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n)
+ map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n)
+ map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n)
+ end
+end
+
+-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf).
+for name,n in pairs{
+ paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4,
+ paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B,
+ packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC,
+ paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0,
+ pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76,
+ pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66,
+ pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE,
+ pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA,
+ pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5,
+ pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8,
+ psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8,
+ psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9,
+ punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A,
+ punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61,
+ punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF
+} do
+ map_op[name.."_2"] = format("rmo:660F%02XrM", n)
+ map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n)
end
------------------------------------------------------------------------------
+local map_vexarg = { u = false, v = 1, V = 2, w = 3 }
+
-- Process pattern string.
local function dopattern(pat, args, sz, op, needrex)
- local digit, addin
+ local digit, addin, vex
local opcode = 0
local szov = sz
local narg = 1
local rex = 0
-- Limit number of section buffer positions used by a single dasm_put().
- -- A single opcode needs a maximum of 5 positions.
- if secpos+5 > maxsecpos then wflush() end
+ -- A single opcode needs a maximum of 6 positions.
+ if secpos+6 > maxsecpos then wflush() end
-- Process each character.
for c in gmatch(pat.."|", ".") do
@@ -1498,6 +1900,8 @@ local function dopattern(pat, args, sz, op, needrex)
szov = nil
elseif c == "X" then -- Force REX.W.
rex = 8
+ elseif c == "L" then -- Force VEX.L.
+ vex.l = true
elseif c == "r" then -- Merge 1st operand regno. into opcode.
addin = args[1]; opcode = opcode + (addin.reg % 8)
if narg < 2 then narg = 2 end
@@ -1521,21 +1925,42 @@ local function dopattern(pat, args, sz, op, needrex)
if t.xreg and t.xreg > 7 then rex = rex + 2 end
if s > 7 then rex = rex + 4 end
if needrex then rex = rex + 16 end
- wputop(szov, opcode, rex); opcode = nil
+ local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg)
+ opcode = nil
local imark = sub(pat, -1) -- Force a mark (ugly).
-- Put ModRM/SIB with regno/last digit as spare.
- wputmrmsib(t, imark, s, addin and addin.vreg)
+ wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk)
addin = nil
+ elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
+ local b = band(opcode, 255); opcode = shr(opcode, 8)
+ local m = 1
+ if b == 0x38 then m = 2
+ elseif b == 0x3a then m = 3 end
+ if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end
+ if b ~= 0x0f then
+ werror("expected `0F', `0F38', or `0F3A' to precede `"..c..
+ "' in pattern `"..pat.."' for `"..op.."'")
+ end
+ local v = map_vexarg[c]
+ if v then v = remove(args, v) end
+ b = band(opcode, 255)
+ local p = 0
+ if b == 0x66 then p = 1
+ elseif b == 0xf3 then p = 2
+ elseif b == 0xf2 then p = 3 end
+ if p ~= 0 then opcode = shr(opcode, 8) end
+ if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end
+ vex = { m = m, p = p, v = v }
else
if opcode then -- Flush opcode.
if szov == "q" and rex == 0 then rex = rex + 8 end
if needrex then rex = rex + 16 end
if addin and addin.reg == -1 then
- wputop(szov, opcode - 7, rex)
- waction("VREG", addin.vreg); wputxb(0)
+ local psz, sk = wputop(szov, opcode - 7, rex, vex, true)
+ wvreg("opcode", addin.vreg, psz, sk)
else
if addin and addin.reg > 7 then rex = rex + 1 end
- wputop(szov, opcode, rex)
+ wputop(szov, opcode, rex, vex)
end
opcode = nil
end
@@ -1549,7 +1974,7 @@ local function dopattern(pat, args, sz, op, needrex)
local a = args[narg]
narg = narg + 1
local mode, imm = a.mode, a.imm
- if mode == "iJ" and not match("iIJ", c) then
+ if mode == "iJ" and not match(x64 and "J" or "iIJ", c) then
werror("bad operand size for label")
end
if c == "S" then
@@ -1572,6 +1997,14 @@ local function dopattern(pat, args, sz, op, needrex)
else
wputlabel("REL_", imm, 2)
end
+ elseif c == "s" then
+ local reg = a.reg
+ if reg < 0 then
+ wputb(0)
+ wvreg("imm.hi", a.vreg)
+ else
+ wputb(shl(reg, 4))
+ end
else
werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
end
@@ -1648,11 +2081,14 @@ map_op[".template__"] = function(params, template, nparams)
if pat == "" then pat = lastpat else lastpat = pat end
if matchtm(tm, args) then
local prefix = sub(szm, 1, 1)
- if prefix == "/" then -- Match both operand sizes.
- if args[1].opsize == sub(szm, 2, 2) and
- args[2].opsize == sub(szm, 3, 3) then
- dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
- return
+ if prefix == "/" then -- Exactly match leading operand sizes.
+ for i = #szm,1,-1 do
+ if i == 1 then
+ dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
+ return
+ elseif args[i-1].opsize ~= sub(szm, i, i) then
+ break
+ end
end
else -- Match common operand size.
local szp = sz
@@ -1717,8 +2153,8 @@ if x64 then
rex = a.reg > 7 and 9 or 8
end
end
- wputop(sz, opcode, rex)
- if vreg then waction("VREG", vreg); wputxb(0) end
+ local psz, sk = wputop(sz, opcode, rex, nil, vreg)
+ wvreg("opcode", vreg, psz, sk)
waction("IMM_D", format("(unsigned int)(%s)", op64))
waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
end
@@ -1730,14 +2166,16 @@ end
local function op_data(params)
if not params then return "imm..." end
local sz = sub(params.op, 2, 2)
- if sz == "a" then sz = addrsize end
+ if sz == "l" then sz = "d" elseif sz == "a" then sz = addrsize end
for _,p in ipairs(params) do
- local a = parseoperand(p)
+ local a = parseoperand(p, sz == "q")
if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then
werror("bad mode or size in `"..p.."'")
end
if a.mode == "iJ" then
wputlabel("IMM_", a.imm, 1)
+ elseif sz == "q" then
+ wputqarg(a.imm)
else
wputszarg(sz, a.imm)
end
@@ -1749,7 +2187,11 @@ map_op[".byte_*"] = op_data
map_op[".sbyte_*"] = op_data
map_op[".word_*"] = op_data
map_op[".dword_*"] = op_data
+map_op[".qword_*"] = op_data
map_op[".aword_*"] = op_data
+map_op[".long_*"] = op_data
+map_op[".quad_*"] = op_data
+map_op[".addr_*"] = op_data
------------------------------------------------------------------------------
diff --git a/dynasm/dynasm.lua b/dynasm/dynasm.lua
index 2c773cb6..95251b93 100644
--- a/dynasm/dynasm.lua
+++ b/dynasm/dynasm.lua
@@ -10,9 +10,9 @@
local _info = {
name = "DynASM",
description = "A dynamic assembler for code generation engines",
- version = "1.3.0",
- vernum = 10300,
- release = "2011-05-05",
+ version = "1.5.0",
+ vernum = 10500,
+ release = "2021-05-02",
author = "Mike Pall",
url = "https://luajit.org/dynasm.html",
license = "MIT",
@@ -630,6 +630,7 @@ end
-- Load architecture-specific module.
local function loadarch(arch)
if not match(arch, "^[%w_]+$") then return "bad arch name" end
+ _G._map_def = map_def
local ok, m_arch = pcall(require, "dasm_"..arch)
if not ok then return "cannot load module: "..m_arch end
g_arch = m_arch
diff --git a/etc/luajit.pc b/etc/luajit.pc
index 9bac3a8b..39e1e577 100644
--- a/etc/luajit.pc
+++ b/etc/luajit.pc
@@ -1,8 +1,8 @@
# Package information for LuaJIT to be used by pkg-config.
majver=2
-minver=0
-relver=5
-version=${majver}.${minver}.${relver}
+minver=1
+relver=0
+version=${majver}.${minver}.${relver}-beta3
abiver=5.1
prefix=/usr/local
diff --git a/src/.gitignore b/src/.gitignore
index fc94e82c..1a30573c 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -4,4 +4,4 @@ lj_ffdef.h
lj_libdef.h
lj_recdef.h
lj_folddef.h
-lj_vm.s
+lj_vm.[sS]
diff --git a/src/Makefile b/src/Makefile
index c4d0b14d..30d64be2 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -11,8 +11,8 @@
##############################################################################
MAJVER= 2
-MINVER= 0
-RELVER= 5
+MINVER= 1
+RELVER= 0
ABIVER= 5.1
NODOTABIVER= 51
@@ -44,17 +44,14 @@ CCOPT= -O2 -fomit-frame-pointer
#
# Target-specific compiler options:
#
-# x86 only: it's recommended to compile at least for i686. Better yet,
-# compile for an architecture that has SSE2, too (-msse -msse2).
-#
# x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute
# the binaries to a different machine you could also use: -march=native
#
-CCOPT_x86= -march=i686
+CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse
CCOPT_x64=
CCOPT_arm=
+CCOPT_arm64=
CCOPT_ppc=
-CCOPT_ppcspe=
CCOPT_mips=
#
CCDEBUG=
@@ -113,6 +110,9 @@ XCFLAGS=
#XCFLAGS+= -DLUAJIT_NUMMODE=1
#XCFLAGS+= -DLUAJIT_NUMMODE=2
#
+# Disable LJ_GC64 mode for x64.
+#XCFLAGS+= -DLUAJIT_DISABLE_GC64
+#
##############################################################################
##############################################################################
@@ -124,15 +124,14 @@ XCFLAGS=
#
# Use the system provided memory allocator (realloc) instead of the
# bundled memory allocator. This is slower, but sometimes helpful for
-# debugging. This option cannot be enabled on x64, since realloc usually
-# doesn't return addresses in the right address range.
+# debugging. This option cannot be enabled on x64 without GC64, since
+# realloc usually doesn't return addresses in the right address range.
# OTOH this option is mandatory for Valgrind's memcheck tool on x64 and
# the only way to get useful results from it for all other architectures.
#XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
#
# This define is required to run LuaJIT under Valgrind. The Valgrind
# header files must be installed. You should enable debug information, too.
-# Use --suppressions=lj.supp to avoid some false positives.
#XCFLAGS+= -DLUAJIT_USE_VALGRIND
#
# This is the client for the GDB JIT API. GDB 7.0 or higher is required
@@ -189,7 +188,8 @@ endif
# make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows
# make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu-
-CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
+ASOPTIONS= $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS)
+CCOPTIONS= $(CCDEBUG) $(ASOPTIONS)
LDOPTIONS= $(CCDEBUG) $(LDFLAGS)
HOST_CC= $(CC)
@@ -229,6 +229,7 @@ TARGET_XLDFLAGS=
TARGET_XLIBS= -lm
TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
+TARGET_ASFLAGS= $(ASOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS)
TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS)
TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS)
TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS)
@@ -243,17 +244,29 @@ else
ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH)))
TARGET_LJARCH= arm
else
+ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH)))
+ ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH)))
+ TARGET_ARCH= -D__AARCH64EB__=1
+ endif
+ TARGET_LJARCH= arm64
+else
ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH)))
+ ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH)))
+ TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE
+ else
+ TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_BE
+ endif
TARGET_LJARCH= ppc
else
-ifneq (,$(findstring LJ_TARGET_PPCSPE ,$(TARGET_TESTARCH)))
- TARGET_LJARCH= ppcspe
-else
ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH)))
ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH)))
TARGET_ARCH= -D__MIPSEL__=1
endif
- TARGET_LJARCH= mips
+ ifneq (,$(findstring LJ_TARGET_MIPS64 ,$(TARGET_TESTARCH)))
+ TARGET_LJARCH= mips64
+ else
+ TARGET_LJARCH= mips
+ endif
else
$(error Unsupported target architecture)
endif
@@ -267,6 +280,7 @@ ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH)))
TARGET_SYS= PS3
TARGET_ARCH+= -D__CELLOS_LV2__
TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC
+ TARGET_XLIBS+= -lpthread
endif
TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH))
@@ -306,20 +320,27 @@ ifeq (Darwin,$(TARGET_SYS))
$(error missing: export MACOSX_DEPLOYMENT_TARGET=XX.YY)
endif
TARGET_STRIP+= -x
+ TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
TARGET_DYNXLDOPTS=
TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
- ifeq (x64,$(TARGET_LJARCH))
- TARGET_XLDFLAGS+= -pagezero_size 10000 -image_base 100000000
- TARGET_XSHLDFLAGS+= -image_base 7fff04c4a000
- endif
else
ifeq (iOS,$(TARGET_SYS))
TARGET_STRIP+= -x
TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC
TARGET_DYNXLDOPTS=
TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER)
+ ifeq (arm64,$(TARGET_LJARCH))
+ TARGET_XCFLAGS+= -fno-omit-frame-pointer
+ endif
else
+ ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH)))
+ # Find out whether the target toolchain always generates unwind tables.
+ TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && { grep -qa -e eh_frame -e __unwind_info tmpunwind.o || grep -qU -e eh_frame -e __unwind_info tmpunwind.o; } && echo E; rm -f tmpunwind.o)
+ ifneq (,$(findstring E,$(TARGET_TESTUNWIND)))
+ TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL
+ endif
+ endif
ifneq (SunOS,$(TARGET_SYS))
ifneq (PS3,$(TARGET_SYS))
TARGET_XLDFLAGS+= -Wl,-E
@@ -346,7 +367,7 @@ ifneq ($(HOST_SYS),$(TARGET_SYS))
HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX
else
ifeq (iOS,$(TARGET_SYS))
- HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX
+ HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX -DTARGET_OS_IPHONE=1
else
HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OTHER
endif
@@ -379,6 +400,11 @@ DASM_XFLAGS=
DASM_AFLAGS=
DASM_ARCH= $(TARGET_LJARCH)
+ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH)))
+ DASM_AFLAGS+= -D ENDIAN_LE
+else
+ DASM_AFLAGS+= -D ENDIAN_BE
+endif
ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH)))
DASM_AFLAGS+= -D P64
endif
@@ -411,19 +437,19 @@ DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subs
ifeq (Windows,$(TARGET_SYS))
DASM_AFLAGS+= -D WIN
endif
-ifeq (x86,$(TARGET_LJARCH))
- ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH)))
- DASM_AFLAGS+= -D SSE
- endif
-else
ifeq (x64,$(TARGET_LJARCH))
- DASM_ARCH= x86
+ ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH)))
+ DASM_ARCH= x86
+ endif
else
ifeq (arm,$(TARGET_LJARCH))
ifeq (iOS,$(TARGET_SYS))
DASM_AFLAGS+= -D IOS
endif
else
+ifneq (,$(findstring LJ_TARGET_MIPSR6 ,$(TARGET_TESTARCH)))
+ DASM_AFLAGS+= -D MIPSR6
+endif
ifeq (ppc,$(TARGET_LJARCH))
ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH)))
DASM_AFLAGS+= -D SQRT
@@ -431,7 +457,7 @@ ifeq (ppc,$(TARGET_LJARCH))
ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH)))
DASM_AFLAGS+= -D ROUND
endif
- ifneq (,$(findstring LJ_ARCH_PPC64 1,$(TARGET_TESTARCH)))
+ ifneq (,$(findstring LJ_ARCH_PPC32ON64 1,$(TARGET_TESTARCH)))
DASM_AFLAGS+= -D GPR64
endif
ifeq (PS3,$(TARGET_SYS))
@@ -440,7 +466,6 @@ ifeq (ppc,$(TARGET_LJARCH))
endif
endif
endif
-endif
DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS)
DASM_DASC= vm_$(DASM_ARCH).dasc
@@ -453,19 +478,22 @@ BUILDVM_X= $(BUILDVM_T)
HOST_O= $(MINILUA_O) $(BUILDVM_O)
HOST_T= $(MINILUA_T) $(BUILDVM_T)
-LJVM_S= lj_vm.s
+LJVM_S= lj_vm.S
LJVM_O= lj_vm.o
LJVM_BOUT= $(LJVM_S)
LJVM_MODE= elfasm
LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \
- lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o
+ lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o \
+ lib_buffer.o
LJLIB_C= $(LJLIB_O:.o=.c)
-LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \
+LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \
- lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \
- lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
+ lj_prng.o lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o \
+ lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_serialize.o \
+ lj_api.o lj_profile.o \
+ lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \
lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \
lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \
lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \
@@ -580,12 +608,15 @@ E= @echo
default all: $(TARGET_T)
amalg:
- @grep "^[+|]" ljamalg.c
$(MAKE) all "LJCORE_O=ljamalg.o"
clean:
$(HOST_RM) $(ALL_RM)
+libbc:
+ ./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C)
+ $(MAKE) all
+
depend:
@for file in $(ALL_HDRGEN); do \
test -f $$file || touch $$file; \
@@ -600,7 +631,7 @@ depend:
test -s $$file || $(HOST_RM) $$file; \
done
-.PHONY: default all amalg clean depend
+.PHONY: default all amalg clean libbc depend
##############################################################################
# Rules for generated files.
@@ -610,7 +641,7 @@ $(MINILUA_T): $(MINILUA_O)
$(E) "HOSTLINK $@"
$(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS)
-host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) lj_arch.h lua.h luaconf.h
+host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua lj_arch.h lua.h luaconf.h
$(E) "DYNASM $@"
$(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC)
@@ -657,10 +688,10 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c
$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
-%.o: %.s
+%.o: %.S
$(E) "ASM $@"
- $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
- $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
+ $(Q)$(TARGET_DYNCC) $(TARGET_ASFLAGS) -c -o $(@:.o=_dyn.o) $<
+ $(Q)$(TARGET_CC) $(TARGET_ASFLAGS) -c -o $@ $<
$(LUAJIT_O):
$(E) "CC $@"
diff --git a/src/Makefile.dep b/src/Makefile.dep
index 9e14d617..1ad6701a 100644
--- a/src/Makefile.dep
+++ b/src/Makefile.dep
@@ -1,66 +1,79 @@
lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \
- lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h
+ lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h
lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
- lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \
- lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \
- lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \
- lj_lib.h lj_libdef.h
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h \
+ lj_str.h lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \
+ lj_cconv.h lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h \
+ lj_strscan.h lj_strfmt.h lj_lib.h lj_libdef.h
lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h
+ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \
+ lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \
+ lj_ffdef.h lj_lib.h lj_libdef.h
+lib_buffer.o: lib_buffer.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
+ lj_tab.h lj_udata.h lj_meta.h lj_ctype.h lj_cdata.h lj_cconv.h \
+ lj_strfmt.h lj_serialize.h lj_lib.h lj_libdef.h
lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \
lj_libdef.h
lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \
lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \
- lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
+ lj_ccallback.h lj_clib.h lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h \
+ lj_libdef.h
lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h
lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
- lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ff.h \
- lj_ffdef.h lj_lib.h lj_libdef.h
-lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \
- lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
- lj_bc.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_target.h \
- lj_target_*.h lj_dispatch.h lj_vm.h lj_vmevent.h lj_lib.h luajit.h \
- lj_libdef.h
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \
+ lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
+lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \
+ lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
+ lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+ lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h
lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
- lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h
+ lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_prng.h lj_libdef.h
lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \
+ lj_libdef.h
lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h
lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
- lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \
- lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h lj_char.h \
- lj_lib.h lj_libdef.h
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
+ lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \
+ lj_char.h lj_strfmt.h lj_lib.h lj_libdef.h
lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \
- lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \
- lj_libdef.h
-lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h
+ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \
+ lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h
+lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h \
+ lj_prng.h
lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \
- lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h
+ lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h
lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \
- lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \
- lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h lj_emit_*.h \
- lj_asm_*.h
+ lj_buf.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h \
+ lj_jit.h lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \
+ lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \
+ lj_emit_*.h lj_asm_*.h
+lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \
lj_bcdef.h
lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_bc.h lj_ctype.h \
- lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_bc.h \
+ lj_ctype.h lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h \
+ lj_strfmt.h
lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h lj_ir.h \
- lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h
+ lj_gc.h lj_buf.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h \
+ lj_ir.h lj_strfmt.h lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h
+lj_buf.o: lj_buf.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_strfmt.h
lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ctype.h lj_cconv.h \
- lj_cdata.h lj_carith.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ir.h lj_ctype.h \
+ lj_cconv.h lj_cdata.h lj_carith.h lj_strscan.h
lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \
- lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
+ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h \
+ lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
lj_traceerr.h
lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \
@@ -68,110 +81,127 @@ lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \
lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h \
lj_traceerr.h lj_vm.h
lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \
- lj_ccallback.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ctype.h \
+ lj_cdata.h lj_cconv.h lj_ccallback.h
lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \
- lj_cdata.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h
lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h
lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \
- lj_cdata.h lj_clib.h
+ lj_cdata.h lj_clib.h lj_strfmt.h
lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_cparse.h lj_frame.h \
- lj_bc.h lj_vm.h lj_char.h lj_strscan.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_ctype.h lj_cparse.h \
+ lj_frame.h lj_bc.h lj_vm.h lj_char.h lj_strscan.h lj_strfmt.h
lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \
- lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ff.h \
- lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
+ lj_err.h lj_errmsg.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_gc.h \
+ lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \
+ lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \
- lj_crecord.h
+ lj_crecord.h lj_strfmt.h
lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_ccallback.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \
+ lj_ccallback.h lj_buf.h
lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_state.h lj_frame.h \
- lj_bc.h lj_vm.h lj_jit.h lj_ir.h
+ lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
+ lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h
lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_func.h lj_str.h lj_tab.h lj_meta.h lj_debug.h \
- lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h lj_jit.h lj_ir.h \
- lj_ccallback.h lj_ctype.h lj_gc.h lj_trace.h lj_dispatch.h lj_traceerr.h \
- lj_vm.h luajit.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_func.h lj_tab.h \
+ lj_meta.h lj_debug.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \
+ lj_strfmt.h lj_jit.h lj_ir.h lj_ccallback.h lj_ctype.h lj_trace.h \
+ lj_dispatch.h lj_traceerr.h lj_profile.h lj_vm.h luajit.h
lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \
lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \
lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
- lj_traceerr.h lj_vm.h
+ lj_traceerr.h lj_vm.h lj_strfmt.h
lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \
- lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
- lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \
- lj_vm.h lj_strscan.h lj_recdef.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_frame.h \
+ lj_bc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
+ lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h \
+ lj_crecord.h lj_vm.h lj_strscan.h lj_strfmt.h lj_serialize.h lj_recdef.h
lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \
lj_traceerr.h lj_vm.h
lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \
- lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h \
- lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \
+ lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h \
+ lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h
lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_jit.h \
- lj_ir.h lj_dispatch.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \
+ lj_str.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.h
lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \
- lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \
- lj_vm.h lj_strscan.h lj_lib.h
+ lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \
+ lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \
+ lj_carith.h lj_vm.h lj_strscan.h lj_serialize.h lj_strfmt.h lj_prng.h
lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \
- lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \
+ lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \
+ lj_strfmt.h
lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \
lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \
- lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lib.h
+ lj_dispatch.h lj_jit.h lj_ir.h lj_ctype.h lj_vm.h lj_strscan.h \
+ lj_strfmt.h lj_lex.h lj_bcdump.h lj_lib.h
lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \
- lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_func.h lj_frame.h \
- lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
+ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \
+ lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h
lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \
- lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h
+ lj_dispatch.h lj_bc.h lj_traceerr.h lj_prng.h lj_vm.h
lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
- lj_vm.h lj_strscan.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \
+ lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h
lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h
lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_ir.h lj_jit.h lj_iropt.h
lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
- lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \
- lj_strscan.h lj_folddef.h
+ lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h \
+ lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h \
+ lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_folddef.h
lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \
- lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h lj_jit.h \
+ lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \
+ lj_vm.h
lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_tab.h lj_ir.h lj_jit.h lj_iropt.h
+ lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h lj_dispatch.h lj_bc.h
lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \
lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \
lj_traceerr.h lj_vm.h lj_strscan.h
lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h
lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \
- lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \
- lj_iropt.h lj_vm.h
+ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \
+ lj_jit.h lj_ircall.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h
lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h \
- lj_state.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \
+ lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \
+ lj_vm.h lj_vmevent.h
+lj_prng.o: lj_prng.c lj_def.h lua.h luaconf.h lj_arch.h lj_prng.h
+lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \
+ lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h
lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \
- lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \
- lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h \
- lj_ffrecord.h lj_snap.h lj_vm.h
+ lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \
+ lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \
+ lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h lj_prng.h
+lj_serialize.o: lj_serialize.c lj_obj.h lua.h luaconf.h lj_def.h \
+ lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \
+ lj_udata.h lj_ctype.h lj_cdata.h lj_ir.h lj_serialize.h
lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \
lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \
lj_target_*.h lj_ctype.h lj_cdata.h
lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \
- lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \
- lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \
+ lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \
+ lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_prng.h lj_lex.h \
+ lj_alloc.h luajit.h
lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
- lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_char.h
+ lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_prng.h
+lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
+ lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_meta.h lj_state.h \
+ lj_char.h lj_strfmt.h lj_ctype.h lj_lib.h
+lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \
+ lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h
lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_char.h lj_strscan.h
lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \
@@ -180,35 +210,37 @@ lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_frame.h lj_bc.h \
lj_state.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \
lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \
- lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h
+ lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h lj_prng.h
lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
- lj_gc.h lj_udata.h
+ lj_gc.h lj_err.h lj_errmsg.h lj_udata.h
lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \
lj_vm.h lj_vmevent.h
lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \
lj_ir.h lj_vm.h
-ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \
- lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \
- lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h \
- lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \
- lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \
- lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \
- lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h \
- luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c \
- lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \
- lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \
- lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \
- lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \
- lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h \
- lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \
- lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \
- lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
+ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_assert.c lj_obj.h \
+ lj_def.h lj_arch.h lj_gc.c lj_gc.h lj_err.h lj_errmsg.h lj_buf.h \
+ lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h \
+ lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \
+ lj_traceerr.h lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h \
+ lj_char.c lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c \
+ lj_prng.h lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h \
+ lj_debug.c lj_prng.c lj_state.c lj_lex.h lj_alloc.h luajit.h \
+ lj_dispatch.c lj_ccallback.h lj_profile.h lj_vmevent.c lj_vmevent.h \
+ lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_serialize.c \
+ lj_serialize.h lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \
+ lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \
+ lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \
+ lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \
+ lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \
+ lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \
+ lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \
+ lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \
lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \
lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \
lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \
lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \
- lib_init.c
+ lib_buffer.c lib_init.c
luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h
host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \
lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \
@@ -220,7 +252,8 @@ host/buildvm_asm.o: host/buildvm_asm.c host/buildvm.h lj_def.h lua.h luaconf.h \
host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \
luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h
host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \
- lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_lib.h lj_obj.h
+ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_bc.h lj_lib.h lj_obj.h \
+ host/buildvm_libbc.h
host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \
luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h
host/minilua.o: host/minilua.c
diff --git a/src/host/buildvm.c b/src/host/buildvm.c
index a12245fd..9ee47ada 100644
--- a/src/host/buildvm.c
+++ b/src/host/buildvm.c
@@ -18,8 +18,10 @@
#include "lj_obj.h"
#include "lj_gc.h"
#include "lj_bc.h"
+#if LJ_HASJIT
#include "lj_ir.h"
#include "lj_ircall.h"
+#endif
#include "lj_frame.h"
#include "lj_dispatch.h"
#if LJ_HASFFI
@@ -59,10 +61,10 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type);
#include "../dynasm/dasm_x86.h"
#elif LJ_TARGET_ARM
#include "../dynasm/dasm_arm.h"
+#elif LJ_TARGET_ARM64
+#include "../dynasm/dasm_arm64.h"
#elif LJ_TARGET_PPC
#include "../dynasm/dasm_ppc.h"
-#elif LJ_TARGET_PPCSPE
-#include "../dynasm/dasm_ppc.h"
#elif LJ_TARGET_MIPS
#include "../dynasm/dasm_mips.h"
#else
@@ -110,11 +112,11 @@ static const char *sym_decorate(BuildCtx *ctx,
if (p) {
#if LJ_TARGET_X86ORX64
if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj))
- name[0] = '@';
+ name[0] = name[1] == 'R' ? '_' : '@'; /* Just for _RtlUnwind@16. */
else
*p = '\0';
-#elif (LJ_TARGET_PPC || LJ_TARGET_PPCSPE) && !LJ_TARGET_CONSOLE
- /* Keep @plt. */
+#elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE
+ /* Keep @plt etc. */
#else
*p = '\0';
#endif
@@ -179,6 +181,7 @@ static int build_code(BuildCtx *ctx)
ctx->nreloc = 0;
ctx->globnames = globnames;
+ ctx->extnames = extnames;
ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *));
ctx->nrelocsym = 0;
for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1;
@@ -249,6 +252,7 @@ BCDEF(BCNAME)
NULL
};
+#if LJ_HASJIT
const char *const ir_names[] = {
#define IRNAME(name, m, m1, m2) #name,
IRDEF(IRNAME)
@@ -289,7 +293,9 @@ static const char *const trace_errors[] = {
#include "lj_traceerr.h"
NULL
};
+#endif
+#if LJ_HASJIT
static const char *lower(char *buf, const char *s)
{
char *p = buf;
@@ -300,6 +306,7 @@ static const char *lower(char *buf, const char *s)
*p = '\0';
return buf;
}
+#endif
/* Emit C source code for bytecode-related definitions. */
static void emit_bcdef(BuildCtx *ctx)
@@ -317,23 +324,26 @@ static void emit_bcdef(BuildCtx *ctx)
/* Emit VM definitions as Lua code for debug modules. */
static void emit_vmdef(BuildCtx *ctx)
{
+#if LJ_HASJIT
char buf[80];
+#endif
int i;
fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n");
- fprintf(ctx->fp, "module(...)\n\n");
+ fprintf(ctx->fp, "return {\n\n");
fprintf(ctx->fp, "bcnames = \"");
for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]);
- fprintf(ctx->fp, "\"\n\n");
+ fprintf(ctx->fp, "\",\n\n");
+#if LJ_HASJIT
fprintf(ctx->fp, "irnames = \"");
for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]);
- fprintf(ctx->fp, "\"\n\n");
+ fprintf(ctx->fp, "\",\n\n");
fprintf(ctx->fp, "irfpm = { [0]=");
for (i = 0; irfpm_names[i]; i++)
fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i]));
- fprintf(ctx->fp, "}\n\n");
+ fprintf(ctx->fp, "},\n\n");
fprintf(ctx->fp, "irfield = { [0]=");
for (i = 0; irfield_names[i]; i++) {
@@ -343,17 +353,18 @@ static void emit_vmdef(BuildCtx *ctx)
if (p) *p = '.';
fprintf(ctx->fp, "\"%s\", ", buf);
}
- fprintf(ctx->fp, "}\n\n");
+ fprintf(ctx->fp, "},\n\n");
fprintf(ctx->fp, "ircall = {\n[0]=");
for (i = 0; ircall_names[i]; i++)
fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]);
- fprintf(ctx->fp, "}\n\n");
+ fprintf(ctx->fp, "},\n\n");
fprintf(ctx->fp, "traceerr = {\n[0]=");
for (i = 0; trace_errors[i]; i++)
fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]);
- fprintf(ctx->fp, "}\n\n");
+ fprintf(ctx->fp, "},\n\n");
+#endif
}
/* -- Argument parsing ---------------------------------------------------- */
@@ -490,6 +501,7 @@ int main(int argc, char **argv)
case BUILD_vmdef:
emit_vmdef(ctx);
emit_lib(ctx);
+ fprintf(ctx->fp, "}\n\n");
break;
case BUILD_ffdef:
case BUILD_libdef:
diff --git a/src/host/buildvm.h b/src/host/buildvm.h
index 3b3110fb..18cd8848 100644
--- a/src/host/buildvm.h
+++ b/src/host/buildvm.h
@@ -82,6 +82,7 @@ typedef struct BuildCtx {
const char *beginsym;
/* Strings generated by DynASM. */
const char *const *globnames;
+ const char *const *extnames;
const char *dasm_ident;
const char *dasm_arch;
/* Relocations. */
diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c
index 390abbdd..7baa011f 100644
--- a/src/host/buildvm_asm.c
+++ b/src/host/buildvm_asm.c
@@ -51,8 +51,8 @@ static const char *const jccnames[] = {
"js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg"
};
-/* Emit relocation for the incredibly stupid OSX assembler. */
-static void emit_asm_reloc_mach(BuildCtx *ctx, uint8_t *cp, int n,
+/* Emit x86/x64 text relocations. */
+static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n,
const char *sym)
{
const char *opname = NULL;
@@ -71,6 +71,20 @@ err:
exit(1);
}
emit_asm_bytes(ctx, cp, n);
+ if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) {
+ /* Various fixups for external symbols outside of our binary. */
+ if (ctx->mode == BUILD_elfasm) {
+ if (LJ_32)
+ fprintf(ctx->fp, "#if __PIC__\n\t%s lj_wrap_%s\n#else\n", opname, sym);
+ fprintf(ctx->fp, "\t%s %s@PLT\n", opname, sym);
+ if (LJ_32)
+ fprintf(ctx->fp, "#endif\n");
+ return;
+ } else if (LJ_32 && ctx->mode == BUILD_machasm) {
+ fprintf(ctx->fp, "\t%s L%s$stub\n", opname, sym);
+ return;
+ }
+ }
fprintf(ctx->fp, "\t%s %s\n", opname, sym);
}
#else
@@ -79,10 +93,14 @@ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n)
{
int i;
for (i = 0; i < n; i += 4) {
+ uint32_t ins = *(uint32_t *)(p+i);
+#if LJ_TARGET_ARM64 && LJ_BE
+ ins = lj_bswap(ins); /* ARM64 instructions are always little-endian. */
+#endif
if ((i & 15) == 0)
- fprintf(ctx->fp, "\t.long 0x%08x", *(uint32_t *)(p+i));
+ fprintf(ctx->fp, "\t.long 0x%08x", ins);
else
- fprintf(ctx->fp, ",0x%08x", *(uint32_t *)(p+i));
+ fprintf(ctx->fp, ",0x%08x", ins);
if ((i & 15) == 12) putc('\n', ctx->fp);
}
if ((n & 15) != 0) putc('\n', ctx->fp);
@@ -107,7 +125,16 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n,
ins, sym);
exit(1);
}
-#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE
+#elif LJ_TARGET_ARM64
+ if ((ins >> 26) == 0x25u) {
+ fprintf(ctx->fp, "\tbl %s\n", sym);
+ } else {
+ fprintf(stderr,
+ "Error: unsupported opcode %08x for %s symbol relocation.\n",
+ ins, sym);
+ exit(1);
+ }
+#elif LJ_TARGET_PPC
#if LJ_TARGET_PS3
#define TOCPREFIX "."
#else
@@ -228,11 +255,20 @@ void emit_asm(BuildCtx *ctx)
#if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND
/* This should really be moved into buildvm_arm.dasc. */
+#if LJ_ARCH_HASFPU
+ fprintf(ctx->fp,
+ ".fnstart\n"
+ ".save {r5, r6, r7, r8, r9, r10, r11, lr}\n"
+ ".vsave {d8-d15}\n"
+ ".save {r4}\n"
+ ".pad #28\n");
+#else
fprintf(ctx->fp,
".fnstart\n"
".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n"
".pad #28\n");
#endif
+#endif
#if LJ_TARGET_MIPS
fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n");
#endif
@@ -255,8 +291,9 @@ void emit_asm(BuildCtx *ctx)
BuildReloc *r = &ctx->reloc[rel];
int n = r->ofs - ofs;
#if LJ_TARGET_X86ORX64
- if (ctx->mode == BUILD_machasm && r->type != 0) {
- emit_asm_reloc_mach(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
+ if (r->type != 0 &&
+ (ctx->mode == BUILD_elfasm || ctx->mode == BUILD_machasm)) {
+ emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]);
} else {
emit_asm_bytes(ctx, ctx->code+ofs, n);
emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]);
@@ -290,10 +327,7 @@ void emit_asm(BuildCtx *ctx)
#if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA)
fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n");
#endif
-#if LJ_TARGET_PPCSPE
- /* Soft-float ABI + SPE. */
- fprintf(ctx->fp, "\t.gnu_attribute 4, 2\n\t.gnu_attribute 8, 3\n");
-#elif LJ_TARGET_PPC && !LJ_TARGET_PS3
+#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP
/* Hard-float ABI. */
fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n");
#endif
diff --git a/src/host/buildvm_fold.c b/src/host/buildvm_fold.c
index 7f9ac058..edb55768 100644
--- a/src/host/buildvm_fold.c
+++ b/src/host/buildvm_fold.c
@@ -5,6 +5,7 @@
#include "buildvm.h"
#include "lj_obj.h"
+#if LJ_HASJIT
#include "lj_ir.h"
/* Context for the folding hash table generator. */
@@ -226,4 +227,10 @@ void emit_fold(BuildCtx *ctx)
makehash(ctx);
}
+#else
+void emit_fold(BuildCtx *ctx)
+{
+ UNUSED(ctx);
+}
+#endif
diff --git a/src/host/buildvm_lib.c b/src/host/buildvm_lib.c
index a9829d0d..b125ea12 100644
--- a/src/host/buildvm_lib.c
+++ b/src/host/buildvm_lib.c
@@ -5,7 +5,9 @@
#include "buildvm.h"
#include "lj_obj.h"
+#include "lj_bc.h"
#include "lj_lib.h"
+#include "buildvm_libbc.h"
/* Context for library definitions. */
static uint8_t obuf[8192];
@@ -151,6 +153,62 @@ static void libdef_func(BuildCtx *ctx, char *p, int arg)
regfunc = REGFUNC_OK;
}
+static uint8_t *libdef_uleb128(uint8_t *p, uint32_t *vv)
+{
+ uint32_t v = *p++;
+ if (v >= 0x80) {
+ int sh = 0; v &= 0x7f;
+ do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
+ }
+ *vv = v;
+ return p;
+}
+
+static void libdef_fixupbc(uint8_t *p)
+{
+ uint32_t i, sizebc;
+ p += 4;
+ p = libdef_uleb128(p, &sizebc);
+ p = libdef_uleb128(p, &sizebc);
+ p = libdef_uleb128(p, &sizebc);
+ for (i = 0; i < sizebc; i++, p += 4) {
+ uint8_t op = p[libbc_endian ? 3 : 0];
+ uint8_t ra = p[libbc_endian ? 2 : 1];
+ uint8_t rc = p[libbc_endian ? 1 : 2];
+ uint8_t rb = p[libbc_endian ? 0 : 3];
+ if (!LJ_DUALNUM && op == BC_ISTYPE && rc == ~LJ_TNUMX+1) {
+ op = BC_ISNUM; rc++;
+ }
+ p[LJ_ENDIAN_SELECT(0, 3)] = op;
+ p[LJ_ENDIAN_SELECT(1, 2)] = ra;
+ p[LJ_ENDIAN_SELECT(2, 1)] = rc;
+ p[LJ_ENDIAN_SELECT(3, 0)] = rb;
+ }
+}
+
+static void libdef_lua(BuildCtx *ctx, char *p, int arg)
+{
+ UNUSED(arg);
+ if (ctx->mode == BUILD_libdef) {
+ int i;
+ for (i = 0; libbc_map[i].name != NULL; i++) {
+ if (!strcmp(libbc_map[i].name, p)) {
+ int ofs = libbc_map[i].ofs;
+ int len = libbc_map[i+1].ofs - ofs;
+ obuf[2]++; /* Bump hash table size. */
+ *optr++ = LIBINIT_LUA;
+ libdef_name(p, 0);
+ memcpy(optr, libbc_code + ofs, len);
+ libdef_fixupbc(optr);
+ optr += len;
+ return;
+ }
+ }
+ fprintf(stderr, "Error: missing libbc definition for %s\n", p);
+ exit(1);
+ }
+}
+
static uint32_t find_rec(char *name)
{
char *p = (char *)obuf;
@@ -277,6 +335,7 @@ static const LibDefHandler libdef_handlers[] = {
{ "CF(", ")", libdef_func, LIBINIT_CF },
{ "ASM(", ")", libdef_func, LIBINIT_ASM },
{ "ASM_(", ")", libdef_func, LIBINIT_ASM_ },
+ { "LUA(", ")", libdef_lua, 0 },
{ "REC(", ")", libdef_rec, 0 },
{ "PUSH(", ")", libdef_push, 0 },
{ "SET(", ")", libdef_set, 0 },
@@ -326,6 +385,8 @@ void emit_lib(BuildCtx *ctx)
ok = LJ_HASJIT;
else if (!strcmp(buf, "#if LJ_HASFFI\n"))
ok = LJ_HASFFI;
+ else if (!strcmp(buf, "#if LJ_HASBUFFER\n"))
+ ok = LJ_HASBUFFER;
if (!ok) {
int lvl = 1;
while (fgets(buf, sizeof(buf), fp) != NULL) {
@@ -373,7 +434,7 @@ void emit_lib(BuildCtx *ctx)
"#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n",
ffasmfunc);
} else if (ctx->mode == BUILD_vmdef) {
- fprintf(ctx->fp, "}\n\n");
+ fprintf(ctx->fp, "},\n\n");
} else if (ctx->mode == BUILD_bcdef) {
int i;
fprintf(ctx->fp, "\n};\n\n");
diff --git a/src/host/buildvm_libbc.h b/src/host/buildvm_libbc.h
new file mode 100644
index 00000000..276463b2
--- /dev/null
+++ b/src/host/buildvm_libbc.h
@@ -0,0 +1,81 @@
+/* This is a generated file. DO NOT EDIT! */
+
+static const int libbc_endian = 0;
+
+static const uint8_t libbc_code[] = {
+#if LJ_FR2
+/* math.deg */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,241,135,158,166,3,
+220,203,178,130,4,
+/* math.rad */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,243,244,148,165,20,
+198,190,199,252,3,
+/* string.len */ 0,1,2,0,0,0,3,BC_ISTYPE,0,5,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
+/* table.foreachi */ 0,2,10,0,0,0,15,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,
+BC_KSHORT,2,1,0,BC_LEN,3,0,0,BC_KSHORT,4,1,0,BC_FORI,2,8,128,BC_MOV,6,1,0,
+BC_MOV,8,5,0,BC_TGETR,9,5,0,BC_CALL,6,3,2,BC_ISEQP,6,0,0,BC_JMP,7,1,128,
+BC_RET1,6,2,0,BC_FORL,2,248,127,BC_RET0,0,1,0,
+/* table.foreach */ 0,2,11,0,0,1,16,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,BC_KPRI,
+2,0,0,BC_MOV,3,0,0,BC_KNUM,4,0,0,BC_JMP,5,7,128,BC_MOV,7,1,0,BC_MOV,9,5,0,
+BC_MOV,10,6,0,BC_CALL,7,3,2,BC_ISEQP,7,0,0,BC_JMP,8,1,128,BC_RET1,7,2,0,
+BC_ITERN,5,3,3,BC_ITERL,5,247,127,BC_RET0,0,1,0,1,255,255,249,255,15,
+/* table.getn */ 0,1,2,0,0,0,3,BC_ISTYPE,0,12,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
+/* table.remove */ 0,2,10,0,0,2,30,BC_ISTYPE,0,12,0,BC_LEN,2,0,0,BC_ISNEP,1,0,
+0,BC_JMP,3,7,128,BC_ISEQN,2,0,0,BC_JMP,3,23,128,BC_TGETR,3,2,0,BC_KPRI,4,0,0,
+BC_TSETR,4,2,0,BC_RET1,3,2,0,BC_JMP,3,18,128,BC_ISTYPE,1,14,0,BC_KSHORT,3,1,0,
+BC_ISGT,3,1,0,BC_JMP,3,14,128,BC_ISGT,1,2,0,BC_JMP,3,12,128,BC_TGETR,3,1,0,
+BC_ADDVN,4,1,1,BC_MOV,5,2,0,BC_KSHORT,6,1,0,BC_FORI,4,4,128,BC_SUBVN,8,1,7,
+BC_TGETR,9,7,0,BC_TSETR,9,8,0,BC_FORL,4,252,127,BC_KPRI,4,0,0,BC_TSETR,4,2,0,
+BC_RET1,3,2,0,BC_RET0,0,1,0,0,2,
+/* table.move */ 0,5,12,0,0,0,35,BC_ISTYPE,0,12,0,BC_ISTYPE,1,14,0,BC_ISTYPE,
+2,14,0,BC_ISTYPE,3,14,0,BC_ISNEP,4,0,0,BC_JMP,5,1,128,BC_MOV,4,0,0,BC_ISTYPE,
+4,12,0,BC_ISGT,1,2,0,BC_JMP,5,24,128,BC_SUBVV,5,1,3,BC_ISLT,2,3,0,BC_JMP,6,4,
+128,BC_ISLE,3,1,0,BC_JMP,6,2,128,BC_ISEQV,4,0,0,BC_JMP,6,9,128,BC_MOV,6,1,0,
+BC_MOV,7,2,0,BC_KSHORT,8,1,0,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,
+BC_TSETR,11,10,4,BC_FORL,6,252,127,BC_JMP,6,8,128,BC_MOV,6,2,0,BC_MOV,7,1,0,
+BC_KSHORT,8,255,255,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,BC_TSETR,
+11,10,4,BC_FORL,6,252,127,BC_RET1,4,2,0,
+#else
+/* math.deg */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,241,135,158,166,3,
+220,203,178,130,4,
+/* math.rad */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,243,244,148,165,20,
+198,190,199,252,3,
+/* string.len */ 0,1,2,0,0,0,3,BC_ISTYPE,0,5,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
+/* table.foreachi */ 0,2,9,0,0,0,15,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,
+BC_KSHORT,2,1,0,BC_LEN,3,0,0,BC_KSHORT,4,1,0,BC_FORI,2,8,128,BC_MOV,6,1,0,
+BC_MOV,7,5,0,BC_TGETR,8,5,0,BC_CALL,6,3,2,BC_ISEQP,6,0,0,BC_JMP,7,1,128,
+BC_RET1,6,2,0,BC_FORL,2,248,127,BC_RET0,0,1,0,
+/* table.foreach */ 0,2,10,0,0,1,16,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,BC_KPRI,
+2,0,0,BC_MOV,3,0,0,BC_KNUM,4,0,0,BC_JMP,5,7,128,BC_MOV,7,1,0,BC_MOV,8,5,0,
+BC_MOV,9,6,0,BC_CALL,7,3,2,BC_ISEQP,7,0,0,BC_JMP,8,1,128,BC_RET1,7,2,0,
+BC_ITERN,5,3,3,BC_ITERL,5,247,127,BC_RET0,0,1,0,1,255,255,249,255,15,
+/* table.getn */ 0,1,2,0,0,0,3,BC_ISTYPE,0,12,0,BC_LEN,1,0,0,BC_RET1,1,2,0,
+/* table.remove */ 0,2,10,0,0,2,30,BC_ISTYPE,0,12,0,BC_LEN,2,0,0,BC_ISNEP,1,0,
+0,BC_JMP,3,7,128,BC_ISEQN,2,0,0,BC_JMP,3,23,128,BC_TGETR,3,2,0,BC_KPRI,4,0,0,
+BC_TSETR,4,2,0,BC_RET1,3,2,0,BC_JMP,3,18,128,BC_ISTYPE,1,14,0,BC_KSHORT,3,1,0,
+BC_ISGT,3,1,0,BC_JMP,3,14,128,BC_ISGT,1,2,0,BC_JMP,3,12,128,BC_TGETR,3,1,0,
+BC_ADDVN,4,1,1,BC_MOV,5,2,0,BC_KSHORT,6,1,0,BC_FORI,4,4,128,BC_SUBVN,8,1,7,
+BC_TGETR,9,7,0,BC_TSETR,9,8,0,BC_FORL,4,252,127,BC_KPRI,4,0,0,BC_TSETR,4,2,0,
+BC_RET1,3,2,0,BC_RET0,0,1,0,0,2,
+/* table.move */ 0,5,12,0,0,0,35,BC_ISTYPE,0,12,0,BC_ISTYPE,1,14,0,BC_ISTYPE,
+2,14,0,BC_ISTYPE,3,14,0,BC_ISNEP,4,0,0,BC_JMP,5,1,128,BC_MOV,4,0,0,BC_ISTYPE,
+4,12,0,BC_ISGT,1,2,0,BC_JMP,5,24,128,BC_SUBVV,5,1,3,BC_ISLT,2,3,0,BC_JMP,6,4,
+128,BC_ISLE,3,1,0,BC_JMP,6,2,128,BC_ISEQV,4,0,0,BC_JMP,6,9,128,BC_MOV,6,1,0,
+BC_MOV,7,2,0,BC_KSHORT,8,1,0,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,
+BC_TSETR,11,10,4,BC_FORL,6,252,127,BC_JMP,6,8,128,BC_MOV,6,2,0,BC_MOV,7,1,0,
+BC_KSHORT,8,255,255,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,BC_TSETR,
+11,10,4,BC_FORL,6,252,127,BC_RET1,4,2,0,
+#endif
+0
+};
+
+static const struct { const char *name; int ofs; } libbc_map[] = {
+{"math_deg",0},
+{"math_rad",25},
+{"string_len",50},
+{"table_foreachi",69},
+{"table_foreach",136},
+{"table_getn",213},
+{"table_remove",232},
+{"table_move",361},
+{NULL,508}
+};
+
diff --git a/src/host/buildvm_peobj.c b/src/host/buildvm_peobj.c
index 97c0698e..b030f234 100644
--- a/src/host/buildvm_peobj.c
+++ b/src/host/buildvm_peobj.c
@@ -9,7 +9,7 @@
#include "buildvm.h"
#include "lj_bc.h"
-#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC
+#if LJ_TARGET_X86ORX64
/* Context for PE object emitter. */
static char *strtab;
@@ -93,12 +93,6 @@ typedef struct PEsymaux {
#define PEOBJ_RELOC_ADDR32NB 0x03
#define PEOBJ_RELOC_OFS 0
#define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */
-#elif LJ_TARGET_PPC
-#define PEOBJ_ARCH_TARGET 0x01f2
-#define PEOBJ_RELOC_REL32 0x06
-#define PEOBJ_RELOC_DIR32 0x02
-#define PEOBJ_RELOC_OFS (-4)
-#define PEOBJ_TEXT_FLAGS 0x60400020 /* 60=r+x, 40=align8, 20=code. */
#endif
/* Section numbers (0-based). */
@@ -109,6 +103,8 @@ enum {
#if LJ_TARGET_X64
PEOBJ_SECT_PDATA,
PEOBJ_SECT_XDATA,
+#elif LJ_TARGET_X86
+ PEOBJ_SECT_SXDATA,
#endif
PEOBJ_SECT_RDATA_Z,
PEOBJ_NSECTIONS
@@ -208,6 +204,13 @@ void emit_peobj(BuildCtx *ctx)
sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE;
/* Flags: 40 = read, 30 = align4, 40 = initialized data. */
pesect[PEOBJ_SECT_XDATA].flags = 0x40300040;
+#elif LJ_TARGET_X86
+ memcpy(pesect[PEOBJ_SECT_SXDATA].name, ".sxdata", sizeof(".sxdata")-1);
+ pesect[PEOBJ_SECT_SXDATA].ofs = sofs;
+ sofs += (pesect[PEOBJ_SECT_SXDATA].size = 4);
+ pesect[PEOBJ_SECT_SXDATA].relocofs = sofs;
+ /* Flags: 40 = read, 30 = align4, 02 = lnk_info, 40 = initialized data. */
+ pesect[PEOBJ_SECT_SXDATA].flags = 0x40300240;
#endif
memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1);
@@ -232,7 +235,7 @@ void emit_peobj(BuildCtx *ctx)
nrsym = ctx->nrelocsym;
pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym;
#if LJ_TARGET_X64
- pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win64. */
+ pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */
#endif
/* Write PE object header and all sections. */
@@ -242,15 +245,8 @@ void emit_peobj(BuildCtx *ctx)
/* Write .text section. */
host_endian.u = 1;
if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) {
-#if LJ_TARGET_PPC
- uint32_t *p = (uint32_t *)ctx->code;
- int n = (int)(ctx->codesz >> 2);
- for (i = 0; i < n; i++, p++)
- *p = lj_bswap(*p); /* Byteswap .text section. */
-#else
fprintf(stderr, "Error: different byte order for host and target\n");
exit(1);
-#endif
}
owrite(ctx, ctx->code, ctx->codesz);
for (i = 0; i < ctx->nreloc; i++) {
@@ -312,6 +308,19 @@ void emit_peobj(BuildCtx *ctx)
reloc.type = PEOBJ_RELOC_ADDR32NB;
owrite(ctx, &reloc, PEOBJ_RELOC_SIZE);
}
+#elif LJ_TARGET_X86
+ /* Write .sxdata section. */
+ for (i = 0; i < nrsym; i++) {
+ if (!strcmp(ctx->relocsym[i], "_lj_err_unwind_win")) {
+ uint32_t symidx = 1+2+i;
+ owrite(ctx, &symidx, 4);
+ break;
+ }
+ }
+ if (i == nrsym) {
+ fprintf(stderr, "Error: extern lj_err_unwind_win not used\n");
+ exit(1);
+ }
#endif
/* Write .rdata$Z section. */
@@ -333,8 +342,10 @@ void emit_peobj(BuildCtx *ctx)
#if LJ_TARGET_X64
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA);
emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA);
- emit_peobj_sym(ctx, "lj_err_unwind_win64", 0,
+ emit_peobj_sym(ctx, "lj_err_unwind_win", 0,
PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN);
+#elif LJ_TARGET_X86
+ emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_SXDATA);
#endif
emit_peobj_sym(ctx, ctx->beginsym, 0,
diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua
new file mode 100644
index 00000000..ba18812c
--- /dev/null
+++ b/src/host/genlibbc.lua
@@ -0,0 +1,225 @@
+----------------------------------------------------------------------------
+-- Lua script to dump the bytecode of the library functions written in Lua.
+-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT.
+----------------------------------------------------------------------------
+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+
+local ffi = require("ffi")
+local bit = require("bit")
+local vmdef = require("jit.vmdef")
+local bcnames = vmdef.bcnames
+
+local format = string.format
+
+local isbe = (string.byte(string.dump(function() end), 5) % 2 == 1)
+
+local function usage(arg)
+ io.stderr:write("Usage: ", arg and arg[0] or "genlibbc",
+ " [-o buildvm_libbc.h] lib_*.c\n")
+ os.exit(1)
+end
+
+local function parse_arg(arg)
+ local outfile = "-"
+ if not (arg and arg[1]) then
+ usage(arg)
+ end
+ if arg[1] == "-o" then
+ outfile = arg[2]
+ if not outfile then usage(arg) end
+ table.remove(arg, 1)
+ table.remove(arg, 1)
+ end
+ return outfile
+end
+
+local function read_files(names)
+ local src = ""
+ for _,name in ipairs(names) do
+ local fp = assert(io.open(name))
+ src = src .. fp:read("*a")
+ fp:close()
+ end
+ return src
+end
+
+local function transform_lua(code)
+ local fixup = {}
+ local n = -30000
+ code = string.gsub(code, "CHECK_(%w*)%((.-)%)", function(tp, var)
+ n = n + 1
+ fixup[n] = { "CHECK", tp }
+ return format("%s=%d", var, n)
+ end)
+ code = string.gsub(code, "PAIRS%((.-)%)", function(var)
+ fixup.PAIRS = true
+ return format("nil, %s, 0x4dp80", var)
+ end)
+ return "return "..code, fixup
+end
+
+local function read_uleb128(p)
+ local v = p[0]; p = p + 1
+ if v >= 128 then
+ local sh = 7; v = v - 128
+ repeat
+ local r = p[0]
+ v = v + bit.lshift(bit.band(r, 127), sh)
+ sh = sh + 7
+ p = p + 1
+ until r < 128
+ end
+ return p, v
+end
+
+-- ORDER LJ_T
+local name2itype = {
+ str = 5, func = 9, tab = 12, int = 14, num = 15
+}
+
+local BC, BCN = {}, {}
+for i=0,#bcnames/6-1 do
+ local name = bcnames:sub(i*6+1, i*6+6):gsub(" ", "")
+ BC[name] = i
+ BCN[i] = name
+end
+local xop, xra = isbe and 3 or 0, isbe and 2 or 1
+local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3
+
+local function fixup_dump(dump, fixup)
+ local buf = ffi.new("uint8_t[?]", #dump+1, dump)
+ local p = buf+5
+ local n, sizebc
+ p, n = read_uleb128(p)
+ local start = p
+ p = p + 4
+ p = read_uleb128(p)
+ p = read_uleb128(p)
+ p, sizebc = read_uleb128(p)
+ local startbc = tonumber(p - start)
+ local rawtab = {}
+ for i=0,sizebc-1 do
+ local op = p[xop]
+ if op == BC.KSHORT then
+ local rd = p[xrc] + 256*p[xrb]
+ rd = bit.arshift(bit.lshift(rd, 16), 16)
+ local f = fixup[rd]
+ if f then
+ if f[1] == "CHECK" then
+ local tp = f[2]
+ if tp == "tab" then rawtab[p[xra]] = true end
+ p[xop] = tp == "num" and BC.ISNUM or BC.ISTYPE
+ p[xrb] = 0
+ p[xrc] = name2itype[tp]
+ else
+ error("unhandled fixup type: "..f[1])
+ end
+ end
+ elseif op == BC.TGETV then
+ if rawtab[p[xrb]] then
+ p[xop] = BC.TGETR
+ end
+ elseif op == BC.TSETV then
+ if rawtab[p[xrb]] then
+ p[xop] = BC.TSETR
+ end
+ elseif op == BC.ITERC then
+ if fixup.PAIRS then
+ p[xop] = BC.ITERN
+ end
+ end
+ p = p + 4
+ end
+ local ndump = ffi.string(start, n)
+ -- Fixup hi-part of 0x4dp80 to LJ_KEYINDEX.
+ ndump = ndump:gsub("\x80\x80\xcd\xaa\x04", "\xff\xff\xf9\xff\x0f")
+ return { dump = ndump, startbc = startbc, sizebc = sizebc }
+end
+
+local function find_defs(src)
+ local defs = {}
+ for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do
+ local env = {}
+ local tcode, fixup = transform_lua(code)
+ local func = assert(load(tcode, "", nil, env))()
+ defs[name] = fixup_dump(string.dump(func, true), fixup)
+ defs[#defs+1] = name
+ end
+ return defs
+end
+
+local function gen_header(defs)
+ local t = {}
+ local function w(x) t[#t+1] = x end
+ w("/* This is a generated file. DO NOT EDIT! */\n\n")
+ w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n")
+ local s, sb = "", ""
+ for i,name in ipairs(defs) do
+ local d = defs[name]
+ s = s .. d.dump
+ sb = sb .. string.char(i) .. ("\0"):rep(d.startbc - 1)
+ .. (isbe and "\0\0\0\255" or "\255\0\0\0"):rep(d.sizebc)
+ .. ("\0"):rep(#d.dump - d.startbc - d.sizebc*4)
+ end
+ w("static const uint8_t libbc_code[] = {\n")
+ local n = 0
+ for i=1,#s do
+ local x = string.byte(s, i)
+ local xb = string.byte(sb, i)
+ if xb == 255 then
+ local name = BCN[x]
+ local m = #name + 4
+ if n + m > 78 then n = 0; w("\n") end
+ n = n + m
+ w("BC_"); w(name)
+ else
+ local m = x < 10 and 2 or (x < 100 and 3 or 4)
+ if xb == 0 then
+ if n + m > 78 then n = 0; w("\n") end
+ else
+ local name = defs[xb]:gsub("_", ".")
+ if n ~= 0 then w("\n") end
+ w("/* "); w(name); w(" */ ")
+ n = #name + 7
+ end
+ n = n + m
+ w(x)
+ end
+ w(",")
+ end
+ w("\n0\n};\n\n")
+ w("static const struct { const char *name; int ofs; } libbc_map[] = {\n")
+ local m = 0
+ for _,name in ipairs(defs) do
+ w('{"'); w(name); w('",'); w(m) w('},\n')
+ m = m + #defs[name].dump
+ end
+ w("{NULL,"); w(m); w("}\n};\n\n")
+ return table.concat(t)
+end
+
+local function write_file(name, data)
+ if name == "-" then
+ assert(io.write(data))
+ assert(io.flush())
+ else
+ local fp = io.open(name)
+ if fp then
+ local old = fp:read("*a")
+ fp:close()
+ if data == old then return end
+ end
+ fp = assert(io.open(name, "w"))
+ assert(fp:write(data))
+ assert(fp:close())
+ end
+end
+
+local outfile = parse_arg(arg)
+local src = read_files(arg)
+local defs = find_defs(src)
+local hdr = gen_header(defs)
+write_file(outfile, hdr)
+
diff --git a/src/jit/bc.lua b/src/jit/bc.lua
index d4c6d4a6..8d0844c0 100644
--- a/src/jit/bc.lua
+++ b/src/jit/bc.lua
@@ -41,7 +41,7 @@
-- Cache some library functions and objects.
local jit = require("jit")
-assert(jit.version_num == 20005, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
local jutil = require("jit.util")
local vmdef = require("jit.vmdef")
local bit = require("bit")
@@ -179,13 +179,12 @@ local function bcliston(outfile)
end
-- Public module functions.
-module(...)
-
-line = bcline
-dump = bcdump
-targets = bctargets
-
-on = bcliston
-off = bclistoff
-start = bcliston -- For -j command line option.
+return {
+ line = bcline,
+ dump = bcdump,
+ targets = bctargets,
+ on = bcliston,
+ off = bclistoff,
+ start = bcliston -- For -j command line option.
+}
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
index 086d5f88..f8ed3a1b 100644
--- a/src/jit/bcsave.lua
+++ b/src/jit/bcsave.lua
@@ -11,12 +11,16 @@
------------------------------------------------------------------------------
local jit = require("jit")
-assert(jit.version_num == 20005, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
local bit = require("bit")
-- Symbol name prefix for LuaJIT bytecode.
local LJBC_PREFIX = "luaJIT_BC_"
+local type, assert = type, assert
+local format = string.format
+local tremove, tconcat = table.remove, table.concat
+
------------------------------------------------------------------------------
local function usage()
@@ -56,6 +60,11 @@ local function savefile(name, mode)
return check(io.open(name, mode))
end
+local function set_stdout_binary(ffi)
+ ffi.cdef[[int _setmode(int fd, int mode);]]
+ ffi.C._setmode(1, 0x8000)
+end
+
------------------------------------------------------------------------------
local map_type = {
@@ -63,8 +72,18 @@ local map_type = {
}
local map_arch = {
- x86 = true, x64 = true, arm = true, ppc = true, ppcspe = true,
- mips = true, mipsel = true,
+ x86 = { e = "le", b = 32, m = 3, p = 0x14c, },
+ x64 = { e = "le", b = 64, m = 62, p = 0x8664, },
+ arm = { e = "le", b = 32, m = 40, p = 0x1c0, },
+ arm64 = { e = "le", b = 64, m = 183, p = 0xaa64, },
+ arm64be = { e = "be", b = 64, m = 183, },
+ ppc = { e = "be", b = 32, m = 20, },
+ mips = { e = "be", b = 32, m = 8, f = 0x50001006, },
+ mipsel = { e = "le", b = 32, m = 8, f = 0x50001006, },
+ mips64 = { e = "be", b = 64, m = 8, f = 0x80000007, },
+ mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, },
+ mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, },
+ mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, },
}
local map_os = {
@@ -73,33 +92,33 @@ local map_os = {
}
local function checkarg(str, map, err)
- str = string.lower(str)
+ str = str:lower()
local s = check(map[str], "unknown ", err)
- return s == true and str or s
+ return type(s) == "string" and s or str
end
local function detecttype(str)
- local ext = string.match(string.lower(str), "%.(%a+)$")
+ local ext = str:lower():match("%.(%a+)$")
return map_type[ext] or "raw"
end
local function checkmodname(str)
- check(string.match(str, "^[%w_.%-]+$"), "bad module name")
- return string.gsub(str, "[%.%-]", "_")
+ check(str:match("^[%w_.%-]+$"), "bad module name")
+ return str:gsub("[%.%-]", "_")
end
local function detectmodname(str)
if type(str) == "string" then
- local tail = string.match(str, "[^/\\]+$")
+ local tail = str:match("[^/\\]+$")
if tail then str = tail end
- local head = string.match(str, "^(.*)%.[^.]*$")
+ local head = str:match("^(.*)%.[^.]*$")
if head then str = head end
- str = string.match(str, "^[%w_.%-]+")
+ str = str:match("^[%w_.%-]+")
else
str = nil
end
check(str, "cannot derive module name, use -n name")
- return string.gsub(str, "[%.%-]", "_")
+ return str:gsub("[%.%-]", "_")
end
------------------------------------------------------------------------------
@@ -111,6 +130,11 @@ local function bcsave_tail(fp, output, s)
end
local function bcsave_raw(output, s)
+ if output == "-" and jit.os == "Windows" then
+ local ok, ffi = pcall(require, "ffi")
+ check(ok, "FFI library required to write binary file to stdout")
+ set_stdout_binary(ffi)
+ end
local fp = savefile(output, "wb")
bcsave_tail(fp, output, s)
end
@@ -118,19 +142,19 @@ end
local function bcsave_c(ctx, output, s)
local fp = savefile(output, "w")
if ctx.type == "c" then
- fp:write(string.format([[
+ fp:write(format([[
#ifdef __cplusplus
extern "C"
#endif
#ifdef _WIN32
__declspec(dllexport)
#endif
-const char %s%s[] = {
+const unsigned char %s%s[] = {
]], LJBC_PREFIX, ctx.modname))
else
- fp:write(string.format([[
+ fp:write(format([[
#define %s%s_SIZE %d
-static const char %s%s[] = {
+static const unsigned char %s%s[] = {
]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname))
end
local t, n, m = {}, 0, 0
@@ -138,13 +162,13 @@ static const char %s%s[] = {
local b = tostring(string.byte(s, i))
m = m + #b + 1
if m > 78 then
- fp:write(table.concat(t, ",", 1, n), ",\n")
+ fp:write(tconcat(t, ",", 1, n), ",\n")
n, m = 0, #b + 1
end
n = n + 1
t[n] = b
end
- bcsave_tail(fp, output, table.concat(t, ",", 1, n).."\n};\n")
+ bcsave_tail(fp, output, tconcat(t, ",", 1, n).."\n};\n")
end
local function bcsave_elfobj(ctx, output, s, ffi)
@@ -199,12 +223,8 @@ typedef struct {
} ELF64obj;
]]
local symname = LJBC_PREFIX..ctx.modname
- local is64, isbe = false, false
- if ctx.arch == "x64" then
- is64 = true
- elseif ctx.arch == "ppc" or ctx.arch == "ppcspe" or ctx.arch == "mips" then
- isbe = true
- end
+ local ai = assert(map_arch[ctx.arch])
+ local is64, isbe = ai.b == 64, ai.e == "be"
-- Handle different host/target endianess.
local function f32(x) return x end
@@ -237,10 +257,8 @@ typedef struct {
hdr.eendian = isbe and 2 or 1
hdr.eversion = 1
hdr.type = f16(1)
- hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, ppcspe=20, mips=8, mipsel=8 })[ctx.arch])
- if ctx.arch == "mips" or ctx.arch == "mipsel" then
- hdr.flags = f32(0x50001006)
- end
+ hdr.machine = f16(ai.m)
+ hdr.flags = f32(ai.f or 0)
hdr.version = f32(1)
hdr.shofs = fofs(ffi.offsetof(o, "sect"))
hdr.ehsize = f16(ffi.sizeof(hdr))
@@ -336,12 +354,8 @@ typedef struct {
} PEobj;
]]
local symname = LJBC_PREFIX..ctx.modname
- local is64 = false
- if ctx.arch == "x86" then
- symname = "_"..symname
- elseif ctx.arch == "x64" then
- is64 = true
- end
+ local ai = assert(map_arch[ctx.arch])
+ local is64 = ai.b == 64
local symexport = " /EXPORT:"..symname..",DATA "
-- The file format is always little-endian. Swap if the host is big-endian.
@@ -355,7 +369,7 @@ typedef struct {
-- Create PE object and fill in header.
local o = ffi.new("PEobj")
local hdr = o.hdr
- hdr.arch = f16(({ x86=0x14c, x64=0x8664, arm=0x1c0, ppc=0x1f2, mips=0x366, mipsel=0x366 })[ctx.arch])
+ hdr.arch = f16(assert(ai.p))
hdr.nsects = f16(2)
hdr.symtabofs = f32(ffi.offsetof(o, "sym0"))
hdr.nsyms = f32(6)
@@ -442,18 +456,18 @@ typedef struct {
uint32_t value;
} mach_nlist;
typedef struct {
- uint32_t strx;
+ int32_t strx;
uint8_t type, sect;
uint16_t desc;
uint64_t value;
} mach_nlist_64;
typedef struct
{
- uint32_t magic, nfat_arch;
+ int32_t magic, nfat_arch;
} mach_fat_header;
typedef struct
{
- uint32_t cputype, cpusubtype, offset, size, align;
+ int32_t cputype, cpusubtype, offset, size, align;
} mach_fat_arch;
typedef struct {
struct {
@@ -477,16 +491,28 @@ typedef struct {
} mach_obj_64;
typedef struct {
mach_fat_header fat;
- mach_fat_arch fat_arch[4];
+ mach_fat_arch fat_arch[2];
struct {
mach_header hdr;
mach_segment_command seg;
mach_section sec;
mach_symtab_command sym;
- } arch[4];
+ } arch[2];
mach_nlist sym_entry;
uint8_t space[4096];
} mach_fat_obj;
+typedef struct {
+ mach_fat_header fat;
+ mach_fat_arch fat_arch[2];
+ struct {
+ mach_header_64 hdr;
+ mach_segment_command_64 seg;
+ mach_section_64 sec;
+ mach_symtab_command sym;
+ } arch[2];
+ mach_nlist_64 sym_entry;
+ uint8_t space[4096];
+} mach_fat_obj_64;
]]
local symname = '_'..LJBC_PREFIX..ctx.modname
local isfat, is64, align, mobj = false, false, 4, "mach_obj"
@@ -494,6 +520,8 @@ typedef struct {
is64, align, mobj = true, 8, "mach_obj_64"
elseif ctx.arch == "arm" then
isfat, mobj = true, "mach_fat_obj"
+ elseif ctx.arch == "arm64" then
+ is64, align, isfat, mobj = true, 8, true, "mach_fat_obj_64"
else
check(ctx.arch == "x86", "unsupported architecture for OSX")
end
@@ -503,8 +531,8 @@ typedef struct {
-- Create Mach-O object and fill in header.
local o = ffi.new(mobj)
local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align)
- local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12,12,12} })[ctx.arch]
- local cpusubtype = ({ x86={3}, x64={3}, arm={3,6,9,11} })[ctx.arch]
+ local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12}, arm64={0x01000007,0x0100000c} })[ctx.arch]
+ local cpusubtype = ({ x86={3}, x64={3}, arm={3,9}, arm64={3,0} })[ctx.arch]
if isfat then
o.fat.magic = be32(0xcafebabe)
o.fat.nfat_arch = be32(#cpusubtype)
@@ -562,6 +590,9 @@ end
local function bcsave_obj(ctx, output, s)
local ok, ffi = pcall(require, "ffi")
check(ok, "FFI library required to write this file type")
+ if output == "-" and jit.os == "Windows" then
+ set_stdout_binary(ffi)
+ end
if ctx.os == "windows" then
return bcsave_peobj(ctx, output, s, ffi)
elseif ctx.os == "osx" then
@@ -603,16 +634,16 @@ local function docmd(...)
local n = 1
local list = false
local ctx = {
- strip = true, arch = jit.arch, os = string.lower(jit.os),
+ strip = true, arch = jit.arch, os = jit.os:lower(),
type = false, modname = false,
}
while n <= #arg do
local a = arg[n]
- if type(a) == "string" and string.sub(a, 1, 1) == "-" and a ~= "-" then
- table.remove(arg, n)
+ if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then
+ tremove(arg, n)
if a == "--" then break end
for m=2,#a do
- local opt = string.sub(a, m, m)
+ local opt = a:sub(m, m)
if opt == "l" then
list = true
elseif opt == "s" then
@@ -625,13 +656,13 @@ local function docmd(...)
if n ~= 1 then usage() end
arg[1] = check(loadstring(arg[1]))
elseif opt == "n" then
- ctx.modname = checkmodname(table.remove(arg, n))
+ ctx.modname = checkmodname(tremove(arg, n))
elseif opt == "t" then
- ctx.type = checkarg(table.remove(arg, n), map_type, "file type")
+ ctx.type = checkarg(tremove(arg, n), map_type, "file type")
elseif opt == "a" then
- ctx.arch = checkarg(table.remove(arg, n), map_arch, "architecture")
+ ctx.arch = checkarg(tremove(arg, n), map_arch, "architecture")
elseif opt == "o" then
- ctx.os = checkarg(table.remove(arg, n), map_os, "OS name")
+ ctx.os = checkarg(tremove(arg, n), map_os, "OS name")
else
usage()
end
@@ -653,7 +684,7 @@ end
------------------------------------------------------------------------------
-- Public module functions.
-module(...)
-
-start = docmd -- Process -b command line option.
+return {
+ start = docmd -- Process -b command line option.
+}
diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua
index d572a5c3..18ab68df 100644
--- a/src/jit/dis_arm.lua
+++ b/src/jit/dis_arm.lua
@@ -658,7 +658,7 @@ local function disass_block(ctx, ofs, len)
end
-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
local ctx = {}
ctx.code = code
ctx.addr = addr or 0
@@ -670,20 +670,20 @@ local function create_(code, addr, out)
end
-- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
- create_(code, addr, out):disass()
+local function disass(code, addr, out)
+ create(code, addr, out):disass()
end
-- Return register name for RID.
-local function regname_(r)
+local function regname(r)
if r < 16 then return map_gpr[r] end
return "d"..(r-16)
end
-- Public module functions.
-module(...)
-
-create = create_
-disass = disass_
-regname = regname_
+return {
+ create = create,
+ disass = disass,
+ regname = regname
+}
diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua
new file mode 100644
index 00000000..531584a1
--- /dev/null
+++ b/src/jit/dis_arm64.lua
@@ -0,0 +1,1216 @@
+----------------------------------------------------------------------------
+-- LuaJIT ARM64 disassembler module.
+--
+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+--
+-- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
+-- Sponsored by Cisco Systems, Inc.
+----------------------------------------------------------------------------
+-- This is a helper module used by the LuaJIT machine code dumper module.
+--
+-- It disassembles most user-mode AArch64 instructions.
+-- NYI: Advanced SIMD and VFP instructions.
+------------------------------------------------------------------------------
+
+local type = type
+local sub, byte, format = string.sub, string.byte, string.format
+local match, gmatch, gsub = string.match, string.gmatch, string.gsub
+local concat = table.concat
+local bit = require("bit")
+local band, bor, bxor, tohex = bit.band, bit.bor, bit.bxor, bit.tohex
+local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
+local ror = bit.ror
+
+------------------------------------------------------------------------------
+-- Opcode maps
+------------------------------------------------------------------------------
+
+local map_adr = { -- PC-relative addressing.
+ shift = 31, mask = 1,
+ [0] = "adrDBx", "adrpDBx"
+}
+
+local map_addsubi = { -- Add/subtract immediate.
+ shift = 29, mask = 3,
+ [0] = "add|movDNIg", "adds|cmnD0NIg", "subDNIg", "subs|cmpD0NIg",
+}
+
+local map_logi = { -- Logical immediate.
+ shift = 31, mask = 1,
+ [0] = {
+ shift = 22, mask = 1,
+ [0] = {
+ shift = 29, mask = 3,
+ [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig"
+ },
+ false -- unallocated
+ },
+ {
+ shift = 29, mask = 3,
+ [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig"
+ }
+}
+
+local map_movwi = { -- Move wide immediate.
+ shift = 31, mask = 1,
+ [0] = {
+ shift = 22, mask = 1,
+ [0] = {
+ shift = 29, mask = 3,
+ [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg"
+ }, false -- unallocated
+ },
+ {
+ shift = 29, mask = 3,
+ [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg"
+ },
+}
+
+local map_bitf = { -- Bitfield.
+ shift = 31, mask = 1,
+ [0] = {
+ shift = 22, mask = 1,
+ [0] = {
+ shift = 29, mask = 3,
+ [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12w",
+ "bfm|bfi|bfxilDN13w",
+ "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12w"
+ }
+ },
+ {
+ shift = 22, mask = 1,
+ {
+ shift = 29, mask = 3,
+ [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12x",
+ "bfm|bfi|bfxilDN13x",
+ "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12x"
+ }
+ }
+}
+
+local map_datai = { -- Data processing - immediate.
+ shift = 23, mask = 7,
+ [0] = map_adr, map_adr, map_addsubi, false,
+ map_logi, map_movwi, map_bitf,
+ {
+ shift = 15, mask = 0x1c0c1,
+ [0] = "extr|rorDNM4w", [0x10080] = "extr|rorDNM4x",
+ [0x10081] = "extr|rorDNM4x"
+ }
+}
+
+local map_logsr = { -- Logical, shifted register.
+ shift = 31, mask = 1,
+ [0] = {
+ shift = 15, mask = 1,
+ [0] = {
+ shift = 29, mask = 3,
+ [0] = {
+ shift = 21, mask = 7,
+ [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
+ "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
+ },
+ {
+ shift = 21, mask = 7,
+ [0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
+ "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
+ },
+ {
+ shift = 21, mask = 7,
+ [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
+ "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
+ },
+ {
+ shift = 21, mask = 7,
+ [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
+ "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
+ }
+ },
+ false -- unallocated
+ },
+ {
+ shift = 29, mask = 3,
+ [0] = {
+ shift = 21, mask = 7,
+ [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg",
+ "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg"
+ },
+ {
+ shift = 21, mask = 7,
+ [0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg",
+ "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg"
+ },
+ {
+ shift = 21, mask = 7,
+ [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg",
+ "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg"
+ },
+ {
+ shift = 21, mask = 7,
+ [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg",
+ "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg"
+ }
+ }
+}
+
+local map_assh = {
+ shift = 31, mask = 1,
+ [0] = {
+ shift = 15, mask = 1,
+ [0] = {
+ shift = 29, mask = 3,
+ [0] = {
+ shift = 22, mask = 3,
+ [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg"
+ },
+ {
+ shift = 22, mask = 3,
+ [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg",
+ "adds|cmnD0NMSg", "adds|cmnD0NMg"
+ },
+ {
+ shift = 22, mask = 3,
+ [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg"
+ },
+ {
+ shift = 22, mask = 3,
+ [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg",
+ "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg"
+ },
+ },
+ false -- unallocated
+ },
+ {
+ shift = 29, mask = 3,
+ [0] = {
+ shift = 22, mask = 3,
+ [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg"
+ },
+ {
+ shift = 22, mask = 3,
+ [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", "adds|cmnD0NMSg",
+ "adds|cmnD0NMg"
+ },
+ {
+ shift = 22, mask = 3,
+ [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg"
+ },
+ {
+ shift = 22, mask = 3,
+ [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg",
+ "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg"
+ }
+ }
+}
+
+local map_addsubsh = { -- Add/subtract, shifted register.
+ shift = 22, mask = 3,
+ [0] = map_assh, map_assh, map_assh
+}
+
+local map_addsubex = { -- Add/subtract, extended register.
+ shift = 22, mask = 3,
+ [0] = {
+ shift = 29, mask = 3,
+ [0] = "addDNMXg", "adds|cmnD0NMXg", "subDNMXg", "subs|cmpD0NMzXg",
+ }
+}
+
+local map_addsubc = { -- Add/subtract, with carry.
+ shift = 10, mask = 63,
+ [0] = {
+ shift = 29, mask = 3,
+ [0] = "adcDNMg", "adcsDNMg", "sbc|ngcDN0Mg", "sbcs|ngcsDN0Mg",
+ }
+}
+
+local map_ccomp = {
+ shift = 4, mask = 1,
+ [0] = {
+ shift = 10, mask = 3,
+ [0] = { -- Conditional compare register.
+ shift = 29, mask = 3,
+ "ccmnNMVCg", false, "ccmpNMVCg",
+ },
+ [2] = { -- Conditional compare immediate.
+ shift = 29, mask = 3,
+ "ccmnN5VCg", false, "ccmpN5VCg",
+ }
+ }
+}
+
+local map_csel = { -- Conditional select.
+ shift = 11, mask = 1,
+ [0] = {
+ shift = 10, mask = 1,
+ [0] = {
+ shift = 29, mask = 3,
+ [0] = "cselDNMzCg", false, "csinv|cinv|csetmDNMcg", false,
+ },
+ {
+ shift = 29, mask = 3,
+ [0] = "csinc|cinc|csetDNMcg", false, "csneg|cnegDNMcg", false,
+ }
+ }
+}
+
+local map_data1s = { -- Data processing, 1 source.
+ shift = 29, mask = 1,
+ [0] = {
+ shift = 31, mask = 1,
+ [0] = {
+ shift = 10, mask = 0x7ff,
+ [0] = "rbitDNg", "rev16DNg", "revDNw", false, "clzDNg", "clsDNg"
+ },
+ {
+ shift = 10, mask = 0x7ff,
+ [0] = "rbitDNg", "rev16DNg", "rev32DNx", "revDNx", "clzDNg", "clsDNg"
+ }
+ }
+}
+
+local map_data2s = { -- Data processing, 2 sources.
+ shift = 29, mask = 1,
+ [0] = {
+ shift = 10, mask = 63,
+ false, "udivDNMg", "sdivDNMg", false, false, false, false, "lslDNMg",
+ "lsrDNMg", "asrDNMg", "rorDNMg"
+ }
+}
+
+local map_data3s = { -- Data processing, 3 sources.
+ shift = 29, mask = 7,
+ [0] = {
+ shift = 21, mask = 7,
+ [0] = {
+ shift = 15, mask = 1,
+ [0] = "madd|mulDNMA0g", "msub|mnegDNMA0g"
+ }
+ }, false, false, false,
+ {
+ shift = 15, mask = 1,
+ [0] = {
+ shift = 21, mask = 7,
+ [0] = "madd|mulDNMA0g", "smaddl|smullDxNMwA0x", "smulhDNMx", false,
+ false, "umaddl|umullDxNMwA0x", "umulhDNMx"
+ },
+ {
+ shift = 21, mask = 7,
+ [0] = "msub|mnegDNMA0g", "smsubl|smneglDxNMwA0x", false, false,
+ false, "umsubl|umneglDxNMwA0x"
+ }
+ }
+}
+
+local map_datar = { -- Data processing, register.
+ shift = 28, mask = 1,
+ [0] = {
+ shift = 24, mask = 1,
+ [0] = map_logsr,
+ {
+ shift = 21, mask = 1,
+ [0] = map_addsubsh, map_addsubex
+ }
+ },
+ {
+ shift = 21, mask = 15,
+ [0] = map_addsubc, false, map_ccomp, false, map_csel, false,
+ {
+ shift = 30, mask = 1,
+ [0] = map_data2s, map_data1s
+ },
+ false, map_data3s, map_data3s, map_data3s, map_data3s, map_data3s,
+ map_data3s, map_data3s, map_data3s
+ }
+}
+
+local map_lrl = { -- Load register, literal.
+ shift = 26, mask = 1,
+ [0] = {
+ shift = 30, mask = 3,
+ [0] = "ldrDwB", "ldrDxB", "ldrswDxB"
+ },
+ {
+ shift = 30, mask = 3,
+ [0] = "ldrDsB", "ldrDdB"
+ }
+}
+
+local map_lsriind = { -- Load/store register, immediate pre/post-indexed.
+ shift = 30, mask = 3,
+ [0] = {
+ shift = 26, mask = 1,
+ [0] = {
+ shift = 22, mask = 3,
+ [0] = "strbDwzL", "ldrbDwzL", "ldrsbDxzL", "ldrsbDwzL"
+ }
+ },
+ {
+ shift = 26, mask = 1,
+ [0] = {
+ shift = 22, mask = 3,
+ [0] = "strhDwzL", "ldrhDwzL", "ldrshDxzL", "ldrshDwzL"
+ }
+ },
+ {
+ shift = 26, mask = 1,
+ [0] = {
+ shift = 22, mask = 3,
+ [0] = "strDwzL", "ldrDwzL", "ldrswDxzL"
+ },
+ {
+ shift = 22, mask = 3,
+ [0] = "strDszL", "ldrDszL"
+ }
+ },
+ {
+ shift = 26, mask = 1,
+ [0] = {
+ shift = 22, mask = 3,
+ [0] = "strDxzL", "ldrDxzL"
+ },
+ {
+ shift = 22, mask = 3,
+ [0] = "strDdzL", "ldrDdzL"
+ }
+ }
+}
+
+local map_lsriro = {
+ shift = 21, mask = 1,
+ [0] = { -- Load/store register immediate.
+ shift = 10, mask = 3,
+ [0] = { -- Unscaled immediate.
+ shift = 26, mask = 1,
+ [0] = {
+ shift = 30, mask = 3,
+ [0] = {
+ shift = 22, mask = 3,
+ [0] = "sturbDwK", "ldurbDwK"
+ },
+ {
+ shift = 22, mask = 3,
+ [0] = "sturhDwK", "ldurhDwK"
+ },
+ {
+ shift = 22, mask = 3,
+ [0] = "sturDwK", "ldurDwK"
+ },
+ {
+ shift = 22, mask = 3,
+ [0] = "sturDxK", "ldurDxK"
+ }
+ }
+ }, map_lsriind, false, map_lsriind
+ },
+ { -- Load/store register, register offset.
+ shift = 10, mask = 3,
+ [2] = {
+ shift = 26, mask = 1,
+ [0] = {
+ shift = 30, mask = 3,
+ [0] = {
+ shift = 22, mask = 3,
+ [0] = "strbDwO", "ldrbDwO", "ldrsbDxO", "ldrsbDwO"
+ },
+ {
+ shift = 22, mask = 3,
+ [0] = "strhDwO", "ldrhDwO", "ldrshDxO", "ldrshDwO"
+ },
+ {
+ shift = 22, mask = 3,
+ [0] = "strDwO", "ldrDwO", "ldrswDxO"
+ },
+ {
+ shift = 22, mask = 3,
+ [0] = "strDxO", "ldrDxO"
+ }
+ },
+ {
+ shift = 30, mask = 3,
+ [2] = {
+ shift = 22, mask = 3,
+ [0] = "strDsO", "ldrDsO"
+ },
+ [3] = {
+ shift = 22, mask = 3,
+ [0] = "strDdO", "ldrDdO"
+ }
+ }
+ }
+ }
+}
+
+local map_lsp = { -- Load/store register pair, offset.
+ shift = 22, mask = 1,
+ [0] = {
+ shift = 30, mask = 3,
+ [0] = {
+ shift = 26, mask = 1,
+ [0] = "stpDzAzwP", "stpDzAzsP",
+ },
+ {
+ shift = 26, mask = 1,
+ "stpDzAzdP"
+ },
+ {
+ shift = 26, mask = 1,
+ [0] = "stpDzAzxP"
+ }
+ },
+ {
+ shift = 30, mask = 3,
+ [0] = {
+ shift = 26, mask = 1,
+ [0] = "ldpDzAzwP", "ldpDzAzsP",
+ },
+ {
+ shift = 26, mask = 1,
+ [0] = "ldpswDAxP", "ldpDzAzdP"
+ },
+ {
+ shift = 26, mask = 1,
+ [0] = "ldpDzAzxP"
+ }
+ }
+}
+
+local map_ls = { -- Loads and stores.
+ shift = 24, mask = 0x31,
+ [0x10] = map_lrl, [0x30] = map_lsriro,
+ [0x20] = {
+ shift = 23, mask = 3,
+ map_lsp, map_lsp, map_lsp
+ },
+ [0x21] = {
+ shift = 23, mask = 3,
+ map_lsp, map_lsp, map_lsp
+ },
+ [0x31] = {
+ shift = 26, mask = 1,
+ [0] = {
+ shift = 30, mask = 3,
+ [0] = {
+ shift = 22, mask = 3,
+ [0] = "strbDwzU", "ldrbDwzU"
+ },
+ {
+ shift = 22, mask = 3,
+ [0] = "strhDwzU", "ldrhDwzU"
+ },
+ {
+ shift = 22, mask = 3,
+ [0] = "strDwzU", "ldrDwzU"
+ },
+ {
+ shift = 22, mask = 3,
+ [0] = "strDxzU", "ldrDxzU"
+ }
+ },
+ {
+ shift = 30, mask = 3,
+ [2] = {
+ shift = 22, mask = 3,
+ [0] = "strDszU", "ldrDszU"
+ },
+ [3] = {
+ shift = 22, mask = 3,
+ [0] = "strDdzU", "ldrDdzU"
+ }
+ }
+ },
+}
+
+local map_datafp = { -- Data processing, SIMD and FP.
+ shift = 28, mask = 7,
+ { -- 001
+ shift = 24, mask = 1,
+ [0] = {
+ shift = 21, mask = 1,
+ {
+ shift = 10, mask = 3,
+ [0] = {
+ shift = 12, mask = 1,
+ [0] = {
+ shift = 13, mask = 1,
+ [0] = {
+ shift = 14, mask = 1,
+ [0] = {
+ shift = 15, mask = 1,
+ [0] = { -- FP/int conversion.
+ shift = 31, mask = 1,
+ [0] = {
+ shift = 16, mask = 0xff,
+ [0x20] = "fcvtnsDwNs", [0x21] = "fcvtnuDwNs",
+ [0x22] = "scvtfDsNw", [0x23] = "ucvtfDsNw",
+ [0x24] = "fcvtasDwNs", [0x25] = "fcvtauDwNs",
+ [0x26] = "fmovDwNs", [0x27] = "fmovDsNw",
+ [0x28] = "fcvtpsDwNs", [0x29] = "fcvtpuDwNs",
+ [0x30] = "fcvtmsDwNs", [0x31] = "fcvtmuDwNs",
+ [0x38] = "fcvtzsDwNs", [0x39] = "fcvtzuDwNs",
+ [0x60] = "fcvtnsDwNd", [0x61] = "fcvtnuDwNd",
+ [0x62] = "scvtfDdNw", [0x63] = "ucvtfDdNw",
+ [0x64] = "fcvtasDwNd", [0x65] = "fcvtauDwNd",
+ [0x68] = "fcvtpsDwNd", [0x69] = "fcvtpuDwNd",
+ [0x70] = "fcvtmsDwNd", [0x71] = "fcvtmuDwNd",
+ [0x78] = "fcvtzsDwNd", [0x79] = "fcvtzuDwNd"
+ },
+ {
+ shift = 16, mask = 0xff,
+ [0x20] = "fcvtnsDxNs", [0x21] = "fcvtnuDxNs",
+ [0x22] = "scvtfDsNx", [0x23] = "ucvtfDsNx",
+ [0x24] = "fcvtasDxNs", [0x25] = "fcvtauDxNs",
+ [0x28] = "fcvtpsDxNs", [0x29] = "fcvtpuDxNs",
+ [0x30] = "fcvtmsDxNs", [0x31] = "fcvtmuDxNs",
+ [0x38] = "fcvtzsDxNs", [0x39] = "fcvtzuDxNs",
+ [0x60] = "fcvtnsDxNd", [0x61] = "fcvtnuDxNd",
+ [0x62] = "scvtfDdNx", [0x63] = "ucvtfDdNx",
+ [0x64] = "fcvtasDxNd", [0x65] = "fcvtauDxNd",
+ [0x66] = "fmovDxNd", [0x67] = "fmovDdNx",
+ [0x68] = "fcvtpsDxNd", [0x69] = "fcvtpuDxNd",
+ [0x70] = "fcvtmsDxNd", [0x71] = "fcvtmuDxNd",
+ [0x78] = "fcvtzsDxNd", [0x79] = "fcvtzuDxNd"
+ }
+ }
+ },
+ { -- FP data-processing, 1 source.
+ shift = 31, mask = 1,
+ [0] = {
+ shift = 22, mask = 3,
+ [0] = {
+ shift = 15, mask = 63,
+ [0] = "fmovDNf", "fabsDNf", "fnegDNf",
+ "fsqrtDNf", false, "fcvtDdNs", false, false,
+ "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf",
+ "frintaDNf", false, "frintxDNf", "frintiDNf",
+ },
+ {
+ shift = 15, mask = 63,
+ [0] = "fmovDNf", "fabsDNf", "fnegDNf",
+ "fsqrtDNf", "fcvtDsNd", false, false, false,
+ "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf",
+ "frintaDNf", false, "frintxDNf", "frintiDNf",
+ }
+ }
+ }
+ },
+ { -- FP compare.
+ shift = 31, mask = 1,
+ [0] = {
+ shift = 14, mask = 3,
+ [0] = {
+ shift = 23, mask = 1,
+ [0] = {
+ shift = 0, mask = 31,
+ [0] = "fcmpNMf", [8] = "fcmpNZf",
+ [16] = "fcmpeNMf", [24] = "fcmpeNZf",
+ }
+ }
+ }
+ }
+ },
+ { -- FP immediate.
+ shift = 31, mask = 1,
+ [0] = {
+ shift = 5, mask = 31,
+ [0] = {
+ shift = 23, mask = 1,
+ [0] = "fmovDFf"
+ }
+ }
+ }
+ },
+ { -- FP conditional compare.
+ shift = 31, mask = 1,
+ [0] = {
+ shift = 23, mask = 1,
+ [0] = {
+ shift = 4, mask = 1,
+ [0] = "fccmpNMVCf", "fccmpeNMVCf"
+ }
+ }
+ },
+ { -- FP data-processing, 2 sources.
+ shift = 31, mask = 1,
+ [0] = {
+ shift = 23, mask = 1,
+ [0] = {
+ shift = 12, mask = 15,
+ [0] = "fmulDNMf", "fdivDNMf", "faddDNMf", "fsubDNMf",
+ "fmaxDNMf", "fminDNMf", "fmaxnmDNMf", "fminnmDNMf",
+ "fnmulDNMf"
+ }
+ }
+ },
+ { -- FP conditional select.
+ shift = 31, mask = 1,
+ [0] = {
+ shift = 23, mask = 1,
+ [0] = "fcselDNMCf"
+ }
+ }
+ }
+ },
+ { -- FP data-processing, 3 sources.
+ shift = 31, mask = 1,
+ [0] = {
+ shift = 15, mask = 1,
+ [0] = {
+ shift = 21, mask = 5,
+ [0] = "fmaddDNMAf", "fnmaddDNMAf"
+ },
+ {
+ shift = 21, mask = 5,
+ [0] = "fmsubDNMAf", "fnmsubDNMAf"
+ }
+ }
+ }
+ }
+}
+
+local map_br = { -- Branches, exception generating and system instructions.
+ shift = 29, mask = 7,
+ [0] = "bB",
+ { -- Compare & branch, immediate.
+ shift = 24, mask = 3,
+ [0] = "cbzDBg", "cbnzDBg", "tbzDTBw", "tbnzDTBw"
+ },
+ { -- Conditional branch, immediate.
+ shift = 24, mask = 3,
+ [0] = {
+ shift = 4, mask = 1,
+ [0] = {
+ shift = 0, mask = 15,
+ [0] = "beqB", "bneB", "bhsB", "bloB", "bmiB", "bplB", "bvsB", "bvcB",
+ "bhiB", "blsB", "bgeB", "bltB", "bgtB", "bleB", "balB"
+ }
+ }
+ }, false, "blB",
+ { -- Compare & branch, immediate.
+ shift = 24, mask = 3,
+ [0] = "cbzDBg", "cbnzDBg", "tbzDTBx", "tbnzDTBx"
+ },
+ {
+ shift = 24, mask = 3,
+ [0] = { -- Exception generation.
+ shift = 0, mask = 0xe0001f,
+ [0x200000] = "brkW"
+ },
+ { -- System instructions.
+ shift = 0, mask = 0x3fffff,
+ [0x03201f] = "nop"
+ },
+ { -- Unconditional branch, register.
+ shift = 0, mask = 0xfffc1f,
+ [0x1f0000] = "brNx", [0x3f0000] = "blrNx",
+ [0x5f0000] = "retNx"
+ },
+ }
+}
+
+local map_init = {
+ shift = 25, mask = 15,
+ [0] = false, false, false, false, map_ls, map_datar, map_ls, map_datafp,
+ map_datai, map_datai, map_br, map_br, map_ls, map_datar, map_ls, map_datafp
+}
+
+------------------------------------------------------------------------------
+
+local map_regs = { x = {}, w = {}, d = {}, s = {} }
+
+for i=0,30 do
+ map_regs.x[i] = "x"..i
+ map_regs.w[i] = "w"..i
+ map_regs.d[i] = "d"..i
+ map_regs.s[i] = "s"..i
+end
+map_regs.x[31] = "sp"
+map_regs.w[31] = "wsp"
+map_regs.d[31] = "d31"
+map_regs.s[31] = "s31"
+
+local map_cond = {
+ [0] = "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
+ "hi", "ls", "ge", "lt", "gt", "le", "al",
+}
+
+local map_shift = { [0] = "lsl", "lsr", "asr", }
+
+local map_extend = {
+ [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx",
+}
+
+------------------------------------------------------------------------------
+
+-- Output a nicely formatted line with an opcode and operands.
+local function putop(ctx, text, operands)
+ local pos = ctx.pos
+ local extra = ""
+ if ctx.rel then
+ local sym = ctx.symtab[ctx.rel]
+ if sym then
+ extra = "\t->"..sym
+ end
+ end
+ if ctx.hexdump > 0 then
+ ctx.out(format("%08x %s %-5s %s%s\n",
+ ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra))
+ else
+ ctx.out(format("%08x %-5s %s%s\n",
+ ctx.addr+pos, text, concat(operands, ", "), extra))
+ end
+ ctx.pos = pos + 4
+end
+
+-- Fallback for unknown opcodes.
+local function unknown(ctx)
+ return putop(ctx, ".long", { "0x"..tohex(ctx.op) })
+end
+
+local function match_reg(p, pat, regnum)
+ return map_regs[match(pat, p.."%w-([xwds])")][regnum]
+end
+
+local function fmt_hex32(x)
+ if x < 0 then
+ return tohex(x)
+ else
+ return format("%x", x)
+ end
+end
+
+local imm13_rep = { 0x55555555, 0x11111111, 0x01010101, 0x00010001, 0x00000001 }
+
+local function decode_imm13(op)
+ local imms = band(rshift(op, 10), 63)
+ local immr = band(rshift(op, 16), 63)
+ if band(op, 0x00400000) == 0 then
+ local len = 5
+ if imms >= 56 then
+ if imms >= 60 then len = 1 else len = 2 end
+ elseif imms >= 48 then len = 3 elseif imms >= 32 then len = 4 end
+ local l = lshift(1, len)-1
+ local s = band(imms, l)
+ local r = band(immr, l)
+ local imm = ror(rshift(-1, 31-s), r)
+ if len ~= 5 then imm = band(imm, lshift(1, l)-1) + rshift(imm, 31-l) end
+ imm = imm * imm13_rep[len]
+ local ix = fmt_hex32(imm)
+ if rshift(op, 31) ~= 0 then
+ return ix..tohex(imm)
+ else
+ return ix
+ end
+ else
+ local lo, hi = -1, 0
+ if imms < 32 then lo = rshift(-1, 31-imms) else hi = rshift(-1, 63-imms) end
+ if immr ~= 0 then
+ lo, hi = ror(lo, immr), ror(hi, immr)
+ local x = immr == 32 and 0 or band(bxor(lo, hi), lshift(-1, 32-immr))
+ lo, hi = bxor(lo, x), bxor(hi, x)
+ if immr >= 32 then lo, hi = hi, lo end
+ end
+ if hi ~= 0 then
+ return fmt_hex32(hi)..tohex(lo)
+ else
+ return fmt_hex32(lo)
+ end
+ end
+end
+
+local function parse_immpc(op, name)
+ if name == "b" or name == "bl" then
+ return arshift(lshift(op, 6), 4)
+ elseif name == "adr" or name == "adrp" then
+ local immlo = band(rshift(op, 29), 3)
+ local immhi = lshift(arshift(lshift(op, 8), 13), 2)
+ return bor(immhi, immlo)
+ elseif name == "tbz" or name == "tbnz" then
+ return lshift(arshift(lshift(op, 13), 18), 2)
+ else
+ return lshift(arshift(lshift(op, 8), 13), 2)
+ end
+end
+
+local function parse_fpimm8(op)
+ local sign = band(op, 0x100000) == 0 and 1 or -1
+ local exp = bxor(rshift(arshift(lshift(op, 12), 5), 24), 0x80) - 131
+ local frac = 16+band(rshift(op, 13), 15)
+ return sign * frac * 2^exp
+end
+
+local function prefer_bfx(sf, uns, imms, immr)
+ if imms < immr or imms == 31 or imms == 63 then
+ return false
+ end
+ if immr == 0 then
+ if sf == 0 and (imms == 7 or imms == 15) then
+ return false
+ end
+ if sf ~= 0 and uns == 0 and (imms == 7 or imms == 15 or imms == 31) then
+ return false
+ end
+ end
+ return true
+end
+
+-- Disassemble a single instruction.
+local function disass_ins(ctx)
+ local pos = ctx.pos
+ local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4)
+ local op = bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0)
+ local operands = {}
+ local suffix = ""
+ local last, name, pat
+ local map_reg
+ ctx.op = op
+ ctx.rel = nil
+ last = nil
+ local opat
+ opat = map_init[band(rshift(op, 25), 15)]
+ while type(opat) ~= "string" do
+ if not opat then return unknown(ctx) end
+ opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
+ end
+ name, pat = match(opat, "^([a-z0-9]*)(.*)")
+ local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
+ if altname then pat = pat2 end
+ if sub(pat, 1, 1) == "." then
+ local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)")
+ suffix = suffix..s2
+ pat = p2
+ end
+
+ local rt = match(pat, "[gf]")
+ if rt then
+ if rt == "g" then
+ map_reg = band(op, 0x80000000) ~= 0 and map_regs.x or map_regs.w
+ else
+ map_reg = band(op, 0x400000) ~= 0 and map_regs.d or map_regs.s
+ end
+ end
+
+ local second0, immr
+
+ for p in gmatch(pat, ".") do
+ local x = nil
+ if p == "D" then
+ local regnum = band(op, 31)
+ x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
+ elseif p == "N" then
+ local regnum = band(rshift(op, 5), 31)
+ x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
+ elseif p == "M" then
+ local regnum = band(rshift(op, 16), 31)
+ x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
+ elseif p == "A" then
+ local regnum = band(rshift(op, 10), 31)
+ x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
+ elseif p == "B" then
+ local addr = ctx.addr + pos + parse_immpc(op, name)
+ ctx.rel = addr
+ x = "0x"..tohex(addr)
+ elseif p == "T" then
+ x = bor(band(rshift(op, 26), 32), band(rshift(op, 19), 31))
+ elseif p == "V" then
+ x = band(op, 15)
+ elseif p == "C" then
+ x = map_cond[band(rshift(op, 12), 15)]
+ elseif p == "c" then
+ local rn = band(rshift(op, 5), 31)
+ local rm = band(rshift(op, 16), 31)
+ local cond = band(rshift(op, 12), 15)
+ local invc = bxor(cond, 1)
+ x = map_cond[cond]
+ if altname and cond ~= 14 and cond ~= 15 then
+ local a1, a2 = match(altname, "([^|]*)|(.*)")
+ if rn == rm then
+ local n = #operands
+ operands[n] = nil
+ x = map_cond[invc]
+ if rn ~= 31 then
+ if a1 then name = a1 else name = altname end
+ else
+ operands[n-1] = nil
+ name = a2
+ end
+ end
+ end
+ elseif p == "W" then
+ x = band(rshift(op, 5), 0xffff)
+ elseif p == "Y" then
+ x = band(rshift(op, 5), 0xffff)
+ local hw = band(rshift(op, 21), 3)
+ if altname and (hw == 0 or x ~= 0) then
+ name = altname
+ end
+ elseif p == "L" then
+ local rn = map_regs.x[band(rshift(op, 5), 31)]
+ local imm9 = arshift(lshift(op, 11), 23)
+ if band(op, 0x800) ~= 0 then
+ x = "["..rn..", #"..imm9.."]!"
+ else
+ x = "["..rn.."], #"..imm9
+ end
+ elseif p == "U" then
+ local rn = map_regs.x[band(rshift(op, 5), 31)]
+ local sz = band(rshift(op, 30), 3)
+ local imm12 = lshift(arshift(lshift(op, 10), 20), sz)
+ if imm12 ~= 0 then
+ x = "["..rn..", #"..imm12.."]"
+ else
+ x = "["..rn.."]"
+ end
+ elseif p == "K" then
+ local rn = map_regs.x[band(rshift(op, 5), 31)]
+ local imm9 = arshift(lshift(op, 11), 23)
+ if imm9 ~= 0 then
+ x = "["..rn..", #"..imm9.."]"
+ else
+ x = "["..rn.."]"
+ end
+ elseif p == "O" then
+ local rn, rm = map_regs.x[band(rshift(op, 5), 31)]
+ local m = band(rshift(op, 13), 1)
+ if m == 0 then
+ rm = map_regs.w[band(rshift(op, 16), 31)]
+ else
+ rm = map_regs.x[band(rshift(op, 16), 31)]
+ end
+ x = "["..rn..", "..rm
+ local opt = band(rshift(op, 13), 7)
+ local s = band(rshift(op, 12), 1)
+ local sz = band(rshift(op, 30), 3)
+ -- extension to be applied
+ if opt == 3 then
+ if s == 0 then x = x.."]"
+ else x = x..", lsl #"..sz.."]" end
+ elseif opt == 2 or opt == 6 or opt == 7 then
+ if s == 0 then x = x..", "..map_extend[opt].."]"
+ else x = x..", "..map_extend[opt].." #"..sz.."]" end
+ else
+ x = x.."]"
+ end
+ elseif p == "P" then
+ local opcv, sh = rshift(op, 26), 2
+ if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end
+ local imm7 = lshift(arshift(lshift(op, 10), 25), sh)
+ local rn = map_regs.x[band(rshift(op, 5), 31)]
+ local ind = band(rshift(op, 23), 3)
+ if ind == 1 then
+ x = "["..rn.."], #"..imm7
+ elseif ind == 2 then
+ if imm7 == 0 then
+ x = "["..rn.."]"
+ else
+ x = "["..rn..", #"..imm7.."]"
+ end
+ elseif ind == 3 then
+ x = "["..rn..", #"..imm7.."]!"
+ end
+ elseif p == "I" then
+ local shf = band(rshift(op, 22), 3)
+ local imm12 = band(rshift(op, 10), 0x0fff)
+ local rn, rd = band(rshift(op, 5), 31), band(op, 31)
+ if altname == "mov" and shf == 0 and imm12 == 0 and (rn == 31 or rd == 31) then
+ name = altname
+ x = nil
+ elseif shf == 0 then
+ x = imm12
+ elseif shf == 1 then
+ x = imm12..", lsl #12"
+ end
+ elseif p == "i" then
+ x = "#0x"..decode_imm13(op)
+ elseif p == "1" then
+ immr = band(rshift(op, 16), 63)
+ x = immr
+ elseif p == "2" then
+ x = band(rshift(op, 10), 63)
+ if altname then
+ local a1, a2, a3, a4, a5, a6 =
+ match(altname, "([^|]*)|([^|]*)|([^|]*)|([^|]*)|([^|]*)|(.*)")
+ local sf = band(rshift(op, 26), 32)
+ local uns = band(rshift(op, 30), 1)
+ if prefer_bfx(sf, uns, x, immr) then
+ name = a2
+ x = x - immr + 1
+ elseif immr == 0 and x == 7 then
+ local n = #operands
+ operands[n] = nil
+ if sf ~= 0 then
+ operands[n-1] = gsub(operands[n-1], "x", "w")
+ end
+ last = operands[n-1]
+ name = a6
+ x = nil
+ elseif immr == 0 and x == 15 then
+ local n = #operands
+ operands[n] = nil
+ if sf ~= 0 then
+ operands[n-1] = gsub(operands[n-1], "x", "w")
+ end
+ last = operands[n-1]
+ name = a5
+ x = nil
+ elseif x == 31 or x == 63 then
+ if x == 31 and immr == 0 and name == "sbfm" then
+ name = a4
+ local n = #operands
+ operands[n] = nil
+ if sf ~= 0 then
+ operands[n-1] = gsub(operands[n-1], "x", "w")
+ end
+ last = operands[n-1]
+ else
+ name = a3
+ end
+ x = nil
+ elseif band(x, 31) ~= 31 and immr == x+1 and name == "ubfm" then
+ name = a4
+ last = "#"..(sf+32 - immr)
+ operands[#operands] = last
+ x = nil
+ elseif x < immr then
+ name = a1
+ last = "#"..(sf+32 - immr)
+ operands[#operands] = last
+ x = x + 1
+ end
+ end
+ elseif p == "3" then
+ x = band(rshift(op, 10), 63)
+ if altname then
+ local a1, a2 = match(altname, "([^|]*)|(.*)")
+ if x < immr then
+ name = a1
+ local sf = band(rshift(op, 26), 32)
+ last = "#"..(sf+32 - immr)
+ operands[#operands] = last
+ x = x + 1
+ else
+ name = a2
+ x = x - immr + 1
+ end
+ end
+ elseif p == "4" then
+ x = band(rshift(op, 10), 63)
+ local rn = band(rshift(op, 5), 31)
+ local rm = band(rshift(op, 16), 31)
+ if altname and rn == rm then
+ local n = #operands
+ operands[n] = nil
+ last = operands[n-1]
+ name = altname
+ end
+ elseif p == "5" then
+ x = band(rshift(op, 16), 31)
+ elseif p == "S" then
+ x = band(rshift(op, 10), 63)
+ if x == 0 then x = nil
+ else x = map_shift[band(rshift(op, 22), 3)].." #"..x end
+ elseif p == "X" then
+ local opt = band(rshift(op, 13), 7)
+ -- Width specifier <R>.
+ if opt ~= 3 and opt ~= 7 then
+ last = map_regs.w[band(rshift(op, 16), 31)]
+ operands[#operands] = last
+ end
+ x = band(rshift(op, 10), 7)
+ -- Extension.
+ if opt == 2 + band(rshift(op, 31), 1) and
+ band(rshift(op, second0 and 5 or 0), 31) == 31 then
+ if x == 0 then x = nil
+ else x = "lsl #"..x end
+ else
+ if x == 0 then x = map_extend[band(rshift(op, 13), 7)]
+ else x = map_extend[band(rshift(op, 13), 7)].." #"..x end
+ end
+ elseif p == "R" then
+ x = band(rshift(op,21), 3)
+ if x == 0 then x = nil
+ else x = "lsl #"..x*16 end
+ elseif p == "z" then
+ local n = #operands
+ if operands[n] == "sp" then operands[n] = "xzr"
+ elseif operands[n] == "wsp" then operands[n] = "wzr"
+ end
+ elseif p == "Z" then
+ x = 0
+ elseif p == "F" then
+ x = parse_fpimm8(op)
+ elseif p == "g" or p == "f" or p == "x" or p == "w" or
+ p == "d" or p == "s" then
+ -- These are handled in D/N/M/A.
+ elseif p == "0" then
+ if last == "sp" or last == "wsp" then
+ local n = #operands
+ operands[n] = nil
+ last = operands[n-1]
+ if altname then
+ local a1, a2 = match(altname, "([^|]*)|(.*)")
+ if not a1 then
+ name = altname
+ elseif second0 then
+ name, altname = a2, a1
+ else
+ name, altname = a1, a2
+ end
+ end
+ end
+ second0 = true
+ else
+ assert(false)
+ end
+ if x then
+ last = x
+ if type(x) == "number" then x = "#"..x end
+ operands[#operands+1] = x
+ end
+ end
+
+ return putop(ctx, name..suffix, operands)
+end
+
+------------------------------------------------------------------------------
+
+-- Disassemble a block of code.
+local function disass_block(ctx, ofs, len)
+ if not ofs then ofs = 0 end
+ local stop = len and ofs+len or #ctx.code
+ ctx.pos = ofs
+ ctx.rel = nil
+ while ctx.pos < stop do disass_ins(ctx) end
+end
+
+-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
+local function create(code, addr, out)
+ local ctx = {}
+ ctx.code = code
+ ctx.addr = addr or 0
+ ctx.out = out or io.write
+ ctx.symtab = {}
+ ctx.disass = disass_block
+ ctx.hexdump = 8
+ return ctx
+end
+
+-- Simple API: disassemble code (a string) at address and output via out.
+local function disass(code, addr, out)
+ create(code, addr, out):disass()
+end
+
+-- Return register name for RID.
+local function regname(r)
+ if r < 32 then return map_regs.x[r] end
+ return map_regs.d[r-32]
+end
+
+-- Public module functions.
+return {
+ create = create,
+ disass = disass,
+ regname = regname
+}
+
diff --git a/src/jit/dis_arm64be.lua b/src/jit/dis_arm64be.lua
new file mode 100644
index 00000000..7337f5b7
--- /dev/null
+++ b/src/jit/dis_arm64be.lua
@@ -0,0 +1,12 @@
+----------------------------------------------------------------------------
+-- LuaJIT ARM64BE disassembler wrapper module.
+--
+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- ARM64 instructions are always little-endian. So just forward to the
+-- common ARM64 disassembler module. All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+return require((string.match(..., ".*%.") or "").."dis_arm64")
+
diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua
index ed65702a..05dc30fd 100644
--- a/src/jit/dis_mips.lua
+++ b/src/jit/dis_mips.lua
@@ -19,13 +19,34 @@ local band, bor, tohex = bit.band, bit.bor, bit.tohex
local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
------------------------------------------------------------------------------
--- Primary and extended opcode maps
+-- Extended opcode maps common to all MIPS releases
------------------------------------------------------------------------------
-local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", }
local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", }
local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", }
+local map_cop0 = {
+ shift = 25, mask = 1,
+ [0] = {
+ shift = 21, mask = 15,
+ [0] = "mfc0TDW", [4] = "mtc0TDW",
+ [10] = "rdpgprDT",
+ [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", },
+ [14] = "wrpgprDT",
+ }, {
+ shift = 0, mask = 63,
+ [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp",
+ [24] = "eret", [31] = "deret",
+ [32] = "wait",
+ },
+}
+
+------------------------------------------------------------------------------
+-- Primary and extended opcode maps for MIPS R1-R5
+------------------------------------------------------------------------------
+
+local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", }
+
local map_special = {
shift = 0, mask = 63,
[0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" },
@@ -34,15 +55,17 @@ local map_special = {
"jrS", "jalrD1S", "movzDST", "movnDST",
"syscallY", "breakY", false, "sync",
"mfhiD", "mthiS", "mfloD", "mtloS",
- false, false, false, false,
+ "dsllvDST", false, "dsrlvDST", "dsravDST",
"multST", "multuST", "divST", "divuST",
- false, false, false, false,
+ "dmultST", "dmultuST", "ddivST", "ddivuST",
"addDST", "addu|moveDST0", "subDST", "subu|neguDS0T",
- "andDST", "orDST", "xorDST", "nor|notDST0",
+ "andDST", "or|moveDST0", "xorDST", "nor|notDST0",
false, false, "sltDST", "sltuDST",
- false, false, false, false,
+ "daddDST", "dadduDST", "dsubDST", "dsubuDST",
"tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ",
- "teqSTZ", false, "tneSTZ",
+ "teqSTZ", false, "tneSTZ", false,
+ "dsllDTA", false, "dsrlDTA", "dsraDTA",
+ "dsll32DTA", false, "dsrl32DTA", "dsra32DTA",
}
local map_special2 = {
@@ -60,11 +83,17 @@ local map_bshfl = {
[24] = "sehDT",
}
+local map_dbshfl = {
+ shift = 6, mask = 31,
+ [2] = "dsbhDT",
+ [5] = "dshdDT",
+}
+
local map_special3 = {
shift = 0, mask = 63,
- [0] = "extTSAK", [4] = "insTSAL",
- [32] = map_bshfl,
- [59] = "rdhwrTD",
+ [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK",
+ [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL",
+ [32] = map_bshfl, [36] = map_dbshfl, [59] = "rdhwrTD",
}
local map_regimm = {
@@ -79,22 +108,6 @@ local map_regimm = {
false, false, false, "synciSO",
}
-local map_cop0 = {
- shift = 25, mask = 1,
- [0] = {
- shift = 21, mask = 15,
- [0] = "mfc0TDW", [4] = "mtc0TDW",
- [10] = "rdpgprDT",
- [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", },
- [14] = "wrpgprDT",
- }, {
- shift = 0, mask = 63,
- [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp",
- [24] = "eret", [31] = "deret",
- [32] = "wait",
- },
-}
-
local map_cop1s = {
shift = 0, mask = 63,
[0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH",
@@ -178,8 +191,8 @@ local map_cop1bc = {
local map_cop1 = {
shift = 21, mask = 31,
- [0] = "mfc1TG", false, "cfc1TG", "mfhc1TG",
- "mtc1TG", false, "ctc1TG", "mthc1TG",
+ [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG",
+ "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG",
map_cop1bc, false, false, false,
false, false, false, false,
map_cop1s, map_cop1d, false, false,
@@ -213,16 +226,218 @@ local map_pri = {
"andiTSU", "ori|liTS0U", "xoriTSU", "luiTU",
map_cop0, map_cop1, false, map_cop1x,
"beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB",
- false, false, false, false,
- map_special2, false, false, map_special3,
+ "daddiTSI", "daddiuTSI", false, false,
+ map_special2, "jalxJ", false, map_special3,
"lbTSO", "lhTSO", "lwlTSO", "lwTSO",
"lbuTSO", "lhuTSO", "lwrTSO", false,
"sbTSO", "shTSO", "swlTSO", "swTSO",
false, false, "swrTSO", "cacheNSO",
"llTSO", "lwc1HSO", "lwc2TSO", "prefNSO",
- false, "ldc1HSO", "ldc2TSO", false,
+ false, "ldc1HSO", "ldc2TSO", "ldTSO",
"scTSO", "swc1HSO", "swc2TSO", false,
- false, "sdc1HSO", "sdc2TSO", false,
+ false, "sdc1HSO", "sdc2TSO", "sdTSO",
+}
+
+------------------------------------------------------------------------------
+-- Primary and extended opcode maps for MIPS R6
+------------------------------------------------------------------------------
+
+local map_mul_r6 = { shift = 6, mask = 3, [2] = "mulDST", [3] = "muhDST" }
+local map_mulu_r6 = { shift = 6, mask = 3, [2] = "muluDST", [3] = "muhuDST" }
+local map_div_r6 = { shift = 6, mask = 3, [2] = "divDST", [3] = "modDST" }
+local map_divu_r6 = { shift = 6, mask = 3, [2] = "divuDST", [3] = "moduDST" }
+local map_dmul_r6 = { shift = 6, mask = 3, [2] = "dmulDST", [3] = "dmuhDST" }
+local map_dmulu_r6 = { shift = 6, mask = 3, [2] = "dmuluDST", [3] = "dmuhuDST" }
+local map_ddiv_r6 = { shift = 6, mask = 3, [2] = "ddivDST", [3] = "dmodDST" }
+local map_ddivu_r6 = { shift = 6, mask = 3, [2] = "ddivuDST", [3] = "dmoduDST" }
+
+local map_special_r6 = {
+ shift = 0, mask = 63,
+ [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" },
+ false, map_srl, "sraDTA",
+ "sllvDTS", false, map_srlv, "sravDTS",
+ "jrS", "jalrD1S", false, false,
+ "syscallY", "breakY", false, "sync",
+ "clzDS", "cloDS", "dclzDS", "dcloDS",
+ "dsllvDST", "dlsaDSTA", "dsrlvDST", "dsravDST",
+ map_mul_r6, map_mulu_r6, map_div_r6, map_divu_r6,
+ map_dmul_r6, map_dmulu_r6, map_ddiv_r6, map_ddivu_r6,
+ "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T",
+ "andDST", "or|moveDST0", "xorDST", "nor|notDST0",
+ false, false, "sltDST", "sltuDST",
+ "daddDST", "dadduDST", "dsubDST", "dsubuDST",
+ "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ",
+ "teqSTZ", "seleqzDST", "tneSTZ", "selnezDST",
+ "dsllDTA", false, "dsrlDTA", "dsraDTA",
+ "dsll32DTA", false, "dsrl32DTA", "dsra32DTA",
+}
+
+local map_bshfl_r6 = {
+ shift = 9, mask = 3,
+ [1] = "alignDSTa",
+ _ = {
+ shift = 6, mask = 31,
+ [0] = "bitswapDT",
+ [2] = "wsbhDT",
+ [16] = "sebDT",
+ [24] = "sehDT",
+ }
+}
+
+local map_dbshfl_r6 = {
+ shift = 9, mask = 3,
+ [1] = "dalignDSTa",
+ _ = {
+ shift = 6, mask = 31,
+ [0] = "dbitswapDT",
+ [2] = "dsbhDT",
+ [5] = "dshdDT",
+ }
+}
+
+local map_special3_r6 = {
+ shift = 0, mask = 63,
+ [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK",
+ [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL",
+ [32] = map_bshfl_r6, [36] = map_dbshfl_r6, [59] = "rdhwrTD",
+}
+
+local map_regimm_r6 = {
+ shift = 16, mask = 31,
+ [0] = "bltzSB", [1] = "bgezSB",
+ [6] = "dahiSI", [30] = "datiSI",
+ [23] = "sigrieI", [31] = "synciSO",
+}
+
+local map_pcrel_r6 = {
+ shift = 19, mask = 3,
+ [0] = "addiupcS2", "lwpcS2", "lwupcS2", {
+ shift = 18, mask = 1,
+ [0] = "ldpcS3", { shift = 16, mask = 3, [2] = "auipcSI", [3] = "aluipcSI" }
+ }
+}
+
+local map_cop1s_r6 = {
+ shift = 0, mask = 63,
+ [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH",
+ "sqrt.sFG", "abs.sFG", "mov.sFG", "neg.sFG",
+ "round.l.sFG", "trunc.l.sFG", "ceil.l.sFG", "floor.l.sFG",
+ "round.w.sFG", "trunc.w.sFG", "ceil.w.sFG", "floor.w.sFG",
+ "sel.sFGH", false, false, false,
+ "seleqz.sFGH", "recip.sFG", "rsqrt.sFG", "selnez.sFGH",
+ "maddf.sFGH", "msubf.sFGH", "rint.sFG", "class.sFG",
+ "min.sFGH", "mina.sFGH", "max.sFGH", "maxa.sFGH",
+ false, "cvt.d.sFG", false, false,
+ "cvt.w.sFG", "cvt.l.sFG",
+}
+
+local map_cop1d_r6 = {
+ shift = 0, mask = 63,
+ [0] = "add.dFGH", "sub.dFGH", "mul.dFGH", "div.dFGH",
+ "sqrt.dFG", "abs.dFG", "mov.dFG", "neg.dFG",
+ "round.l.dFG", "trunc.l.dFG", "ceil.l.dFG", "floor.l.dFG",
+ "round.w.dFG", "trunc.w.dFG", "ceil.w.dFG", "floor.w.dFG",
+ "sel.dFGH", false, false, false,
+ "seleqz.dFGH", "recip.dFG", "rsqrt.dFG", "selnez.dFGH",
+ "maddf.dFGH", "msubf.dFGH", "rint.dFG", "class.dFG",
+ "min.dFGH", "mina.dFGH", "max.dFGH", "maxa.dFGH",
+ "cvt.s.dFG", false, false, false,
+ "cvt.w.dFG", "cvt.l.dFG",
+}
+
+local map_cop1w_r6 = {
+ shift = 0, mask = 63,
+ [0] = "cmp.af.sFGH", "cmp.un.sFGH", "cmp.eq.sFGH", "cmp.ueq.sFGH",
+ "cmp.lt.sFGH", "cmp.ult.sFGH", "cmp.le.sFGH", "cmp.ule.sFGH",
+ "cmp.saf.sFGH", "cmp.sun.sFGH", "cmp.seq.sFGH", "cmp.sueq.sFGH",
+ "cmp.slt.sFGH", "cmp.sult.sFGH", "cmp.sle.sFGH", "cmp.sule.sFGH",
+ false, "cmp.or.sFGH", "cmp.une.sFGH", "cmp.ne.sFGH",
+ false, false, false, false,
+ false, "cmp.sor.sFGH", "cmp.sune.sFGH", "cmp.sne.sFGH",
+ false, false, false, false,
+ "cvt.s.wFG", "cvt.d.wFG",
+}
+
+local map_cop1l_r6 = {
+ shift = 0, mask = 63,
+ [0] = "cmp.af.dFGH", "cmp.un.dFGH", "cmp.eq.dFGH", "cmp.ueq.dFGH",
+ "cmp.lt.dFGH", "cmp.ult.dFGH", "cmp.le.dFGH", "cmp.ule.dFGH",
+ "cmp.saf.dFGH", "cmp.sun.dFGH", "cmp.seq.dFGH", "cmp.sueq.dFGH",
+ "cmp.slt.dFGH", "cmp.sult.dFGH", "cmp.sle.dFGH", "cmp.sule.dFGH",
+ false, "cmp.or.dFGH", "cmp.une.dFGH", "cmp.ne.dFGH",
+ false, false, false, false,
+ false, "cmp.sor.dFGH", "cmp.sune.dFGH", "cmp.sne.dFGH",
+ false, false, false, false,
+ "cvt.s.lFG", "cvt.d.lFG",
+}
+
+local map_cop1_r6 = {
+ shift = 21, mask = 31,
+ [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG",
+ "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG",
+ false, "bc1eqzHB", false, false,
+ false, "bc1nezHB", false, false,
+ map_cop1s_r6, map_cop1d_r6, false, false,
+ map_cop1w_r6, map_cop1l_r6,
+}
+
+local function maprs_popTS(rs, rt)
+ if rt == 0 then return 0 elseif rs == 0 then return 1
+ elseif rs == rt then return 2 else return 3 end
+end
+
+local map_pop06_r6 = {
+ maprs = maprs_popTS, [0] = "blezSB", "blezalcTB", "bgezalcTB", "bgeucSTB"
+}
+local map_pop07_r6 = {
+ maprs = maprs_popTS, [0] = "bgtzSB", "bgtzalcTB", "bltzalcTB", "bltucSTB"
+}
+local map_pop26_r6 = {
+ maprs = maprs_popTS, "blezcTB", "bgezcTB", "bgecSTB"
+}
+local map_pop27_r6 = {
+ maprs = maprs_popTS, "bgtzcTB", "bltzcTB", "bltcSTB"
+}
+
+local function maprs_popS(rs, rt)
+ if rs == 0 then return 0 else return 1 end
+end
+
+local map_pop66_r6 = {
+ maprs = maprs_popS, [0] = "jicTI", "beqzcSb"
+}
+local map_pop76_r6 = {
+ maprs = maprs_popS, [0] = "jialcTI", "bnezcSb"
+}
+
+local function maprs_popST(rs, rt)
+ if rs >= rt then return 0 elseif rs == 0 then return 1 else return 2 end
+end
+
+local map_pop10_r6 = {
+ maprs = maprs_popST, [0] = "bovcSTB", "beqzalcTB", "beqcSTB"
+}
+local map_pop30_r6 = {
+ maprs = maprs_popST, [0] = "bnvcSTB", "bnezalcTB", "bnecSTB"
+}
+
+local map_pri_r6 = {
+ [0] = map_special_r6, map_regimm_r6, "jJ", "jalJ",
+ "beq|beqz|bST00B", "bne|bnezST0B", map_pop06_r6, map_pop07_r6,
+ map_pop10_r6, "addiu|liTS0I", "sltiTSI", "sltiuTSI",
+ "andiTSU", "ori|liTS0U", "xoriTSU", "aui|luiTS0U",
+ map_cop0, map_cop1_r6, false, false,
+ false, false, map_pop26_r6, map_pop27_r6,
+ map_pop30_r6, "daddiuTSI", false, false,
+ false, "dauiTSI", false, map_special3_r6,
+ "lbTSO", "lhTSO", false, "lwTSO",
+ "lbuTSO", "lhuTSO", false, false,
+ "sbTSO", "shTSO", false, "swTSO",
+ false, false, false, false,
+ false, "lwc1HSO", "bc#", false,
+ false, "ldc1HSO", map_pop66_r6, "ldTSO",
+ false, "swc1HSO", "balc#", map_pcrel_r6,
+ false, "sdc1HSO", map_pop76_r6, "sdTSO",
}
------------------------------------------------------------------------------
@@ -279,10 +494,14 @@ local function disass_ins(ctx)
ctx.op = op
ctx.rel = nil
- local opat = map_pri[rshift(op, 26)]
+ local opat = ctx.map_pri[rshift(op, 26)]
while type(opat) ~= "string" do
if not opat then return unknown(ctx) end
- opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
+ if opat.maprs then
+ opat = opat[opat.maprs(band(rshift(op,21),31), band(rshift(op,16),31))]
+ else
+ opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
+ end
end
local name, pat = match(opat, "^([a-z0-9_.]*)(.*)")
local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
@@ -306,6 +525,10 @@ local function disass_ins(ctx)
x = "f"..band(rshift(op, 21), 31)
elseif p == "A" then
x = band(rshift(op, 6), 31)
+ elseif p == "a" then
+ x = band(rshift(op, 6), 7)
+ elseif p == "E" then
+ x = band(rshift(op, 6), 31) + 32
elseif p == "M" then
x = band(rshift(op, 11), 31)
elseif p == "N" then
@@ -315,10 +538,18 @@ local function disass_ins(ctx)
if x == 0 then x = nil end
elseif p == "K" then
x = band(rshift(op, 11), 31) + 1
+ elseif p == "P" then
+ x = band(rshift(op, 11), 31) + 33
elseif p == "L" then
x = band(rshift(op, 11), 31) - last + 1
+ elseif p == "Q" then
+ x = band(rshift(op, 11), 31) - last + 33
elseif p == "I" then
x = arshift(lshift(op, 16), 16)
+ elseif p == "2" then
+ x = arshift(lshift(op, 13), 11)
+ elseif p == "3" then
+ x = arshift(lshift(op, 14), 11)
elseif p == "U" then
x = band(op, 0xffff)
elseif p == "O" then
@@ -328,13 +559,22 @@ local function disass_ins(ctx)
local index = map_gpr[band(rshift(op, 16), 31)]
operands[#operands] = format("%s(%s)", index, last)
elseif p == "B" then
- x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4
+ x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 14) + 4
+ ctx.rel = x
+ x = format("0x%08x", x)
+ elseif p == "b" then
+ x = ctx.addr + ctx.pos + arshift(lshift(op, 11), 9) + 4
ctx.rel = x
- x = "0x"..tohex(x)
+ x = format("0x%08x", x)
+ elseif p == "#" then
+ x = ctx.addr + ctx.pos + arshift(lshift(op, 6), 4) + 4
+ ctx.rel = x
+ x = format("0x%08x", x)
elseif p == "J" then
- x = band(ctx.addr + ctx.pos, 0xf0000000) + band(op, 0x03ffffff)*4
+ local a = ctx.addr + ctx.pos
+ x = a - band(a, 0x0fffffff) + band(op, 0x03ffffff)*4
ctx.rel = x
- x = "0x"..tohex(x)
+ x = format("0x%08x", x)
elseif p == "V" then
x = band(rshift(op, 8), 7)
if x == 0 then x = nil end
@@ -384,7 +624,7 @@ local function disass_block(ctx, ofs, len)
end
-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
local ctx = {}
ctx.code = code
ctx.addr = addr or 0
@@ -393,36 +633,62 @@ local function create_(code, addr, out)
ctx.disass = disass_block
ctx.hexdump = 8
ctx.get = get_be
+ ctx.map_pri = map_pri
+ return ctx
+end
+
+local function create_el(code, addr, out)
+ local ctx = create(code, addr, out)
+ ctx.get = get_le
+ return ctx
+end
+
+local function create_r6(code, addr, out)
+ local ctx = create(code, addr, out)
+ ctx.map_pri = map_pri_r6
return ctx
end
-local function create_el_(code, addr, out)
- local ctx = create_(code, addr, out)
+local function create_r6_el(code, addr, out)
+ local ctx = create(code, addr, out)
ctx.get = get_le
+ ctx.map_pri = map_pri_r6
return ctx
end
-- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
- create_(code, addr, out):disass()
+local function disass(code, addr, out)
+ create(code, addr, out):disass()
+end
+
+local function disass_el(code, addr, out)
+ create_el(code, addr, out):disass()
end
-local function disass_el_(code, addr, out)
- create_el_(code, addr, out):disass()
+local function disass_r6(code, addr, out)
+ create_r6(code, addr, out):disass()
+end
+
+local function disass_r6_el(code, addr, out)
+ create_r6_el(code, addr, out):disass()
end
-- Return register name for RID.
-local function regname_(r)
+local function regname(r)
if r < 32 then return map_gpr[r] end
return "f"..(r-32)
end
-- Public module functions.
-module(...)
-
-create = create_
-create_el = create_el_
-disass = disass_
-disass_el = disass_el_
-regname = regname_
+return {
+ create = create,
+ create_el = create_el,
+ create_r6 = create_r6,
+ create_r6_el = create_r6_el,
+ disass = disass,
+ disass_el = disass_el,
+ disass_r6 = disass_r6,
+ disass_r6_el = disass_r6_el,
+ regname = regname
+}
diff --git a/src/jit/dis_mips64.lua b/src/jit/dis_mips64.lua
new file mode 100644
index 00000000..1236e524
--- /dev/null
+++ b/src/jit/dis_mips64.lua
@@ -0,0 +1,17 @@
+----------------------------------------------------------------------------
+-- LuaJIT MIPS64 disassembler wrapper module.
+--
+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This module just exports the big-endian functions from the
+-- MIPS disassembler module. All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
+return {
+ create = dis_mips.create,
+ disass = dis_mips.disass,
+ regname = dis_mips.regname
+}
+
diff --git a/src/jit/dis_mips64el.lua b/src/jit/dis_mips64el.lua
new file mode 100644
index 00000000..7c478d2d
--- /dev/null
+++ b/src/jit/dis_mips64el.lua
@@ -0,0 +1,17 @@
+----------------------------------------------------------------------------
+-- LuaJIT MIPS64EL disassembler wrapper module.
+--
+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This module just exports the little-endian functions from the
+-- MIPS disassembler module. All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
+return {
+ create = dis_mips.create_el,
+ disass = dis_mips.disass_el,
+ regname = dis_mips.regname
+}
+
diff --git a/src/jit/dis_mips64r6.lua b/src/jit/dis_mips64r6.lua
new file mode 100644
index 00000000..c5789ce4
--- /dev/null
+++ b/src/jit/dis_mips64r6.lua
@@ -0,0 +1,17 @@
+----------------------------------------------------------------------------
+-- LuaJIT MIPS64R6 disassembler wrapper module.
+--
+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This module just exports the r6 big-endian functions from the
+-- MIPS disassembler module. All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
+return {
+ create = dis_mips.create_r6,
+ disass = dis_mips.disass_r6,
+ regname = dis_mips.regname
+}
+
diff --git a/src/jit/dis_mips64r6el.lua b/src/jit/dis_mips64r6el.lua
new file mode 100644
index 00000000..f67f6240
--- /dev/null
+++ b/src/jit/dis_mips64r6el.lua
@@ -0,0 +1,17 @@
+----------------------------------------------------------------------------
+-- LuaJIT MIPS64R6EL disassembler wrapper module.
+--
+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+-- This module just exports the r6 little-endian functions from the
+-- MIPS disassembler module. All the interesting stuff is there.
+------------------------------------------------------------------------------
+
+local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
+return {
+ create = dis_mips.create_r6_el,
+ disass = dis_mips.disass_r6_el,
+ regname = dis_mips.regname
+}
+
diff --git a/src/jit/dis_mipsel.lua b/src/jit/dis_mipsel.lua
index 4f75ca32..a4fa6c60 100644
--- a/src/jit/dis_mipsel.lua
+++ b/src/jit/dis_mipsel.lua
@@ -8,13 +8,10 @@
-- MIPS disassembler module. All the interesting stuff is there.
------------------------------------------------------------------------------
-local require = require
-
-module(...)
-
-local dis_mips = require(_PACKAGE.."dis_mips")
-
-create = dis_mips.create_el
-disass = dis_mips.disass_el
-regname = dis_mips.regname
+local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
+return {
+ create = dis_mips.create_el,
+ disass = dis_mips.disass_el,
+ regname = dis_mips.regname
+}
diff --git a/src/jit/dis_ppc.lua b/src/jit/dis_ppc.lua
index 6d3adfe0..8f65f25a 100644
--- a/src/jit/dis_ppc.lua
+++ b/src/jit/dis_ppc.lua
@@ -560,7 +560,7 @@ local function disass_block(ctx, ofs, len)
end
-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
local ctx = {}
ctx.code = code
ctx.addr = addr or 0
@@ -572,20 +572,20 @@ local function create_(code, addr, out)
end
-- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
- create_(code, addr, out):disass()
+local function disass(code, addr, out)
+ create(code, addr, out):disass()
end
-- Return register name for RID.
-local function regname_(r)
+local function regname(r)
if r < 32 then return map_gpr[r] end
return "f"..(r-32)
end
-- Public module functions.
-module(...)
-
-create = create_
-disass = disass_
-regname = regname_
+return {
+ create = create,
+ disass = disass,
+ regname = regname
+}
diff --git a/src/jit/dis_x64.lua b/src/jit/dis_x64.lua
index 5a11c2cc..d076c6ae 100644
--- a/src/jit/dis_x64.lua
+++ b/src/jit/dis_x64.lua
@@ -8,13 +8,10 @@
-- x86/x64 disassembler module. All the interesting stuff is there.
------------------------------------------------------------------------------
-local require = require
-
-module(...)
-
-local dis_x86 = require(_PACKAGE.."dis_x86")
-
-create = dis_x86.create64
-disass = dis_x86.disass64
-regname = dis_x86.regname64
+local dis_x86 = require((string.match(..., ".*%.") or "").."dis_x86")
+return {
+ create = dis_x86.create64,
+ disass = dis_x86.disass64,
+ regname = dis_x86.regname64
+}
diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua
index 25f60c77..84492fff 100644
--- a/src/jit/dis_x86.lua
+++ b/src/jit/dis_x86.lua
@@ -15,19 +15,20 @@
-- Intel and AMD manuals. The supported instruction set is quite extensive
-- and reflects what a current generation Intel or AMD CPU implements in
-- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
--- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM)
--- instructions.
+-- SSE4.1, SSE4.2, SSE4a, AVX, AVX2 and even privileged and hypervisor
+-- (VMX/SVM) instructions.
--
-- Notes:
-- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
-- * No attempt at optimization has been made -- it's fast enough for my needs.
--- * The public API may change when more architectures are added.
------------------------------------------------------------------------------
local type = type
local sub, byte, format = string.sub, string.byte, string.format
local match, gmatch, gsub = string.match, string.gmatch, string.gsub
local lower, rep = string.lower, string.rep
+local bit = require("bit")
+local tohex = bit.tohex
-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
local map_opc1_32 = {
@@ -76,7 +77,7 @@ local map_opc1_32 = {
"movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
"movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
--Cx
-"shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi",
+"shift!Bmu","shift!Vmu","retBw","ret","vex*3$lesVrm","vex*2$ldsVrm","movBmi","movVmi",
"enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
--Dx
"shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
@@ -101,7 +102,7 @@ local map_opc1_64 = setmetatable({
[0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb",
[0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb",
[0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
- [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false,
+ [0x82]=false, [0x9a]=false, [0xc4]="vex*3", [0xc5]="vex*2", [0xce]=false,
[0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
}, { __index = map_opc1_32 })
@@ -112,12 +113,12 @@ local map_opc2 = {
[0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
"invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
--1x
-"movupsXrm|movssXrm|movupdXrm|movsdXrm",
-"movupsXmr|movssXmr|movupdXmr|movsdXmr",
+"movupsXrm|movssXrvm|movupdXrm|movsdXrvm",
+"movupsXmr|movssXmvr|movupdXmr|movsdXmvr",
"movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
"movlpsXmr||movlpdXmr",
-"unpcklpsXrm||unpcklpdXrm",
-"unpckhpsXrm||unpckhpdXrm",
+"unpcklpsXrvm||unpcklpdXrvm",
+"unpckhpsXrvm||unpckhpdXrvm",
"movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
"movhpsXmr||movhpdXmr",
"$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
@@ -126,7 +127,7 @@ local map_opc2 = {
"movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
"movapsXrm||movapdXrm",
"movapsXmr||movapdXmr",
-"cvtpi2psXrMm|cvtsi2ssXrVmt|cvtpi2pdXrMm|cvtsi2sdXrVmt",
+"cvtpi2psXrMm|cvtsi2ssXrvVmt|cvtpi2pdXrMm|cvtsi2sdXrvVmt",
"movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
"cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
"cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
@@ -142,27 +143,27 @@ local map_opc2 = {
"cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
--5x
"movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
-"rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm",
-"andpsXrm||andpdXrm","andnpsXrm||andnpdXrm",
-"orpsXrm||orpdXrm","xorpsXrm||xorpdXrm",
-"addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm",
-"cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm",
+"rsqrtpsXrm|rsqrtssXrvm","rcppsXrm|rcpssXrvm",
+"andpsXrvm||andpdXrvm","andnpsXrvm||andnpdXrvm",
+"orpsXrvm||orpdXrvm","xorpsXrvm||xorpdXrvm",
+"addpsXrvm|addssXrvm|addpdXrvm|addsdXrvm","mulpsXrvm|mulssXrvm|mulpdXrvm|mulsdXrvm",
+"cvtps2pdXrm|cvtss2sdXrvm|cvtpd2psXrm|cvtsd2ssXrvm",
"cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
-"subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm",
-"divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm",
+"subpsXrvm|subssXrvm|subpdXrvm|subsdXrvm","minpsXrvm|minssXrvm|minpdXrvm|minsdXrvm",
+"divpsXrvm|divssXrvm|divpdXrvm|divsdXrvm","maxpsXrvm|maxssXrvm|maxpdXrvm|maxsdXrvm",
--6x
-"punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm",
-"pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm",
-"punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm",
-"||punpcklqdqXrm","||punpckhqdqXrm",
+"punpcklbwPrvm","punpcklwdPrvm","punpckldqPrvm","packsswbPrvm",
+"pcmpgtbPrvm","pcmpgtwPrvm","pcmpgtdPrvm","packuswbPrvm",
+"punpckhbwPrvm","punpckhwdPrvm","punpckhdqPrvm","packssdwPrvm",
+"||punpcklqdqXrvm","||punpckhqdqXrvm",
"movPrVSm","movqMrm|movdquXrm|movdqaXrm",
--7x
-"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu",
-"pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu",
-"pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|",
+"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pvmu",
+"pshiftd!Pvmu","pshiftq!Mvmu||pshiftdq!Xvmu",
+"pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|",
"vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
nil,nil,
-"||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm",
+"||haddpdXrvm|haddpsXrvm","||hsubpdXrvm|hsubpsXrvm",
"movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
--8x
"joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
@@ -180,27 +181,27 @@ nil,nil,
"bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
--Cx
"xaddBmr","xaddVmr",
-"cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|",
-"pinsrwPrWmu","pextrwDrPmu",
-"shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp",
+"cmppsXrvmu|cmpssXrvmu|cmppdXrvmu|cmpsdXrvmu","$movntiVmr|",
+"pinsrwPrvWmu","pextrwDrPmu",
+"shufpsXrvmu||shufpdXrvmu","$cmpxchg!Qmp",
"bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
--Dx
-"||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm",
-"paddqPrm","pmullwPrm",
+"||addsubpdXrvm|addsubpsXrvm","psrlwPrvm","psrldPrvm","psrlqPrvm",
+"paddqPrvm","pmullwPrvm",
"|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
-"psubusbPrm","psubuswPrm","pminubPrm","pandPrm",
-"paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm",
+"psubusbPrvm","psubuswPrvm","pminubPrvm","pandPrvm",
+"paddusbPrvm","padduswPrvm","pmaxubPrvm","pandnPrvm",
--Ex
-"pavgbPrm","psrawPrm","psradPrm","pavgwPrm",
-"pmulhuwPrm","pmulhwPrm",
+"pavgbPrvm","psrawPrvm","psradPrvm","pavgwPrvm",
+"pmulhuwPrvm","pmulhwPrvm",
"|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
-"psubsbPrm","psubswPrm","pminswPrm","porPrm",
-"paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm",
+"psubsbPrvm","psubswPrvm","pminswPrvm","porPrvm",
+"paddsbPrvm","paddswPrvm","pmaxswPrvm","pxorPrvm",
--Fx
-"|||lddquXrm","psllwPrm","pslldPrm","psllqPrm",
-"pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$",
-"psubbPrm","psubwPrm","psubdPrm","psubqPrm",
-"paddbPrm","paddwPrm","padddPrm","ud",
+"|||lddquXrm","psllwPrvm","pslldPrvm","psllqPrvm",
+"pmuludqPrvm","pmaddwdPrvm","psadbwPrvm","maskmovqMrm||maskmovdquXrm$",
+"psubbPrvm","psubwPrvm","psubdPrvm","psubqPrvm",
+"paddbPrvm","paddwPrvm","padddPrvm","ud",
}
assert(map_opc2[255] == "ud")
@@ -208,49 +209,91 @@ assert(map_opc2[255] == "ud")
local map_opc3 = {
["38"] = { -- [66] 0f 38 xx
--0x
-[0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm",
-"pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm",
-"psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm",
-nil,nil,nil,nil,
+[0]="pshufbPrvm","phaddwPrvm","phadddPrvm","phaddswPrvm",
+"pmaddubswPrvm","phsubwPrvm","phsubdPrvm","phsubswPrvm",
+"psignbPrvm","psignwPrvm","psigndPrvm","pmulhrswPrvm",
+"||permilpsXrvm","||permilpdXrvm",nil,nil,
--1x
"||pblendvbXrma",nil,nil,nil,
-"||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm",
-nil,nil,nil,nil,
+"||blendvpsXrma","||blendvpdXrma","||permpsXrvm","||ptestXrm",
+"||broadcastssXrm","||broadcastsdXrm","||broadcastf128XrlXm",nil,
"pabsbPrm","pabswPrm","pabsdPrm",nil,
--2x
"||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
"||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
-"||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm",
-nil,nil,nil,nil,
+"||pmuldqXrvm","||pcmpeqqXrvm","||$movntdqaXrm","||packusdwXrvm",
+"||maskmovpsXrvm","||maskmovpdXrvm","||maskmovpsXmvr","||maskmovpdXmvr",
--3x
"||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
-"||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm",
-"||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm",
-"||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm",
+"||pmovzxwqXrm","||pmovzxdqXrm","||permdXrvm","||pcmpgtqXrvm",
+"||pminsbXrvm","||pminsdXrvm","||pminuwXrvm","||pminudXrvm",
+"||pmaxsbXrvm","||pmaxsdXrvm","||pmaxuwXrvm","||pmaxudXrvm",
--4x
-"||pmulddXrm","||phminposuwXrm",
+"||pmulddXrvm","||phminposuwXrm",nil,nil,
+nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm",
+--5x
+[0x58] = "||pbroadcastdXrlXm",[0x59] = "||pbroadcastqXrlXm",
+[0x5a] = "||broadcasti128XrlXm",
+--7x
+[0x78] = "||pbroadcastbXrlXm",[0x79] = "||pbroadcastwXrlXm",
+--8x
+[0x8c] = "||pmaskmovXrvVSm",
+[0x8e] = "||pmaskmovVSmXvr",
+--9x
+[0x96] = "||fmaddsub132pHXrvm",[0x97] = "||fmsubadd132pHXrvm",
+[0x98] = "||fmadd132pHXrvm",[0x99] = "||fmadd132sHXrvm",
+[0x9a] = "||fmsub132pHXrvm",[0x9b] = "||fmsub132sHXrvm",
+[0x9c] = "||fnmadd132pHXrvm",[0x9d] = "||fnmadd132sHXrvm",
+[0x9e] = "||fnmsub132pHXrvm",[0x9f] = "||fnmsub132sHXrvm",
+--Ax
+[0xa6] = "||fmaddsub213pHXrvm",[0xa7] = "||fmsubadd213pHXrvm",
+[0xa8] = "||fmadd213pHXrvm",[0xa9] = "||fmadd213sHXrvm",
+[0xaa] = "||fmsub213pHXrvm",[0xab] = "||fmsub213sHXrvm",
+[0xac] = "||fnmadd213pHXrvm",[0xad] = "||fnmadd213sHXrvm",
+[0xae] = "||fnmsub213pHXrvm",[0xaf] = "||fnmsub213sHXrvm",
+--Bx
+[0xb6] = "||fmaddsub231pHXrvm",[0xb7] = "||fmsubadd231pHXrvm",
+[0xb8] = "||fmadd231pHXrvm",[0xb9] = "||fmadd231sHXrvm",
+[0xba] = "||fmsub231pHXrvm",[0xbb] = "||fmsub231sHXrvm",
+[0xbc] = "||fnmadd231pHXrvm",[0xbd] = "||fnmadd231sHXrvm",
+[0xbe] = "||fnmsub231pHXrvm",[0xbf] = "||fnmsub231sHXrvm",
+--Dx
+[0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm",
+[0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm",
--Fx
[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
+[0xf7] = "| sarxVrmv| shlxVrmv| shrxVrmv",
},
["3a"] = { -- [66] 0f 3a xx
--0x
-[0x00]=nil,nil,nil,nil,nil,nil,nil,nil,
-"||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu",
-"||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu",
+[0x00]="||permqXrmu","||permpdXrmu","||pblenddXrvmu",nil,
+"||permilpsXrmu","||permilpdXrmu","||perm2f128Xrvmu",nil,
+"||roundpsXrmu","||roundpdXrmu","||roundssXrvmu","||roundsdXrvmu",
+"||blendpsXrvmu","||blendpdXrvmu","||pblendwXrvmu","palignrPrvmu",
--1x
nil,nil,nil,nil,
"||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
-nil,nil,nil,nil,nil,nil,nil,nil,
+"||insertf128XrvlXmu","||extractf128XlXmYru",nil,nil,
+nil,nil,nil,nil,
--2x
-"||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil,
+"||pinsrbXrvVmu","||insertpsXrvmu","||pinsrXrvVmuS",nil,
+--3x
+[0x38] = "||inserti128Xrvmu",[0x39] = "||extracti128XlXmYru",
--4x
-[0x40] = "||dppsXrmu",
-[0x41] = "||dppdXrmu",
-[0x42] = "||mpsadbwXrmu",
+[0x40] = "||dppsXrvmu",
+[0x41] = "||dppdXrvmu",
+[0x42] = "||mpsadbwXrvmu",
+[0x44] = "||pclmulqdqXrvmu",
+[0x46] = "||perm2i128Xrvmu",
+[0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb",
+[0x4c] = "||pblendvbXrvmb",
--6x
[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
+[0xdf] = "||aeskeygenassistXrmu",
+--Fx
+[0xf0] = "||| rorxVrmu",
},
}
@@ -354,17 +397,19 @@ local map_regs = {
"mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
+ Y = { "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7",
+ "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" },
}
local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }
-- Maps for size names.
local map_sz2n = {
- B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16,
+ B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, Y = 32,
}
local map_sz2prefix = {
B = "byte", W = "word", D = "dword",
Q = "qword",
- M = "qword", X = "xword",
+ M = "qword", X = "xword", Y = "yword",
F = "dword", G = "qword", -- No need for sizes/register names for these two.
}
@@ -387,10 +432,13 @@ local function putop(ctx, text, operands)
if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end
if ctx.rex then
local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "")..
- (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")
- if t ~= "" then text = "rex."..t.." "..text end
+ (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")..
+ (ctx.vexl and "l" or "")
+ if ctx.vexv and ctx.vexv ~= 0 then t = t.."v"..ctx.vexv end
+ if t ~= "" then text = ctx.rex.."."..t.." "..gsub(text, "^ ", "")
+ elseif ctx.rex == "vex" then text = gsub("v"..text, "^v ", "") end
ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
- ctx.rex = false
+ ctx.rex = false; ctx.vexl = false; ctx.vexv = false
end
if ctx.seg then
local text2, n = gsub(text, "%[", "["..ctx.seg..":")
@@ -405,6 +453,7 @@ local function putop(ctx, text, operands)
end
ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text))
ctx.mrm = false
+ ctx.vexv = false
ctx.start = pos
ctx.imm = nil
end
@@ -413,7 +462,7 @@ end
local function clearprefixes(ctx)
ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false
ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
- ctx.rex = false; ctx.a32 = false
+ ctx.rex = false; ctx.a32 = false; ctx.vexl = false
end
-- Fallback for incomplete opcodes at the end.
@@ -450,9 +499,9 @@ end
-- Process pattern string and generate the operands.
local function putpat(ctx, name, pat)
local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
- local code, pos, stop = ctx.code, ctx.pos, ctx.stop
+ local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl
- -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz
+ -- Chars used: 1DFGHIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
for p in gmatch(pat, ".") do
local x = nil
if p == "V" or p == "U" then
@@ -467,12 +516,17 @@ local function putpat(ctx, name, pat)
elseif p == "B" then
sz = "B"
regs = ctx.rex and map_regs.B64 or map_regs.B
- elseif match(p, "[WDQMXFG]") then
+ elseif match(p, "[WDQMXYFG]") then
sz = p
+ if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
regs = map_regs[sz]
elseif p == "P" then
sz = ctx.o16 and "X" or "M"; ctx.o16 = false
+ if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
regs = map_regs[sz]
+ elseif p == "H" then
+ name = name..(ctx.rexw and "d" or "s")
+ ctx.rexw = false
elseif p == "S" then
name = name..lower(sz)
elseif p == "s" then
@@ -484,6 +538,10 @@ local function putpat(ctx, name, pat)
local imm = getimm(ctx, pos, 1); if not imm then return end
x = format("0x%02x", imm)
pos = pos+1
+ elseif p == "b" then
+ local imm = getimm(ctx, pos, 1); if not imm then return end
+ x = regs[imm/16+1]
+ pos = pos+1
elseif p == "w" then
local imm = getimm(ctx, pos, 2); if not imm then return end
x = format("0x%x", imm)
@@ -532,7 +590,7 @@ local function putpat(ctx, name, pat)
local lo = imm % 0x1000000
x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
else
- x = format("0x%08x", imm)
+ x = "0x"..tohex(imm)
end
elseif p == "R" then
local r = byte(code, pos-1, pos-1)%8
@@ -616,8 +674,13 @@ local function putpat(ctx, name, pat)
else
x = "CR"..sp
end
+ elseif p == "v" then
+ if ctx.vexv then
+ x = regs[ctx.vexv+1]; ctx.vexv = false
+ end
elseif p == "y" then x = "DR"..sp
elseif p == "z" then x = "TR"..sp
+ elseif p == "l" then vexl = false
elseif p == "t" then
else
error("bad pattern `"..pat.."'")
@@ -692,7 +755,8 @@ map_act = {
B = putpat, W = putpat, D = putpat, Q = putpat,
V = putpat, U = putpat, T = putpat,
M = putpat, X = putpat, P = putpat,
- F = putpat, G = putpat,
+ F = putpat, G = putpat, Y = putpat,
+ H = putpat,
-- Collect prefixes.
[":"] = function(ctx, name, pat)
@@ -753,15 +817,68 @@ map_act = {
-- REX prefix.
rex = function(ctx, name, pat)
- if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed.
+ if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
for p in gmatch(pat, ".") do ctx["rex"..p] = true end
- ctx.rex = true
+ ctx.rex = "rex"
+ end,
+
+ -- VEX prefix.
+ vex = function(ctx, name, pat)
+ if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
+ ctx.rex = "vex"
+ local pos = ctx.pos
+ if ctx.mrm then
+ ctx.mrm = nil
+ pos = pos-1
+ end
+ local b = byte(ctx.code, pos, pos)
+ if not b then return incomplete(ctx) end
+ pos = pos+1
+ if b < 128 then ctx.rexr = true end
+ local m = 1
+ if pat == "3" then
+ m = b%32; b = (b-m)/32
+ local nb = b%2; b = (b-nb)/2
+ if nb == 0 then ctx.rexb = true end
+ local nx = b%2
+ if nx == 0 then ctx.rexx = true end
+ b = byte(ctx.code, pos, pos)
+ if not b then return incomplete(ctx) end
+ pos = pos+1
+ if b >= 128 then ctx.rexw = true end
+ end
+ ctx.pos = pos
+ local map
+ if m == 1 then map = map_opc2
+ elseif m == 2 then map = map_opc3["38"]
+ elseif m == 3 then map = map_opc3["3a"]
+ else return unknown(ctx) end
+ local p = b%4; b = (b-p)/4
+ if p == 1 then ctx.o16 = "o16"
+ elseif p == 2 then ctx.rep = "rep"
+ elseif p == 3 then ctx.rep = "repne" end
+ local l = b%2; b = (b-l)/2
+ if l ~= 0 then ctx.vexl = true end
+ ctx.vexv = (-1-b)%16
+ return dispatchmap(ctx, map)
end,
-- Special case for nop with REX prefix.
nop = function(ctx, name, pat)
return dispatch(ctx, ctx.rex and pat or "nop")
end,
+
+ -- Special case for 0F 77.
+ emms = function(ctx, name, pat)
+ if ctx.rex ~= "vex" then
+ return putop(ctx, "emms")
+ elseif ctx.vexl then
+ ctx.vexl = false
+ return putop(ctx, "zeroall")
+ else
+ return putop(ctx, "zeroupper")
+ end
+ end,
}
------------------------------------------------------------------------------
@@ -782,7 +899,7 @@ local function disass_block(ctx, ofs, len)
end
-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
-local function create_(code, addr, out)
+local function create(code, addr, out)
local ctx = {}
ctx.code = code
ctx.addr = (addr or 0) - 1
@@ -796,8 +913,8 @@ local function create_(code, addr, out)
return ctx
end
-local function create64_(code, addr, out)
- local ctx = create_(code, addr, out)
+local function create64(code, addr, out)
+ local ctx = create(code, addr, out)
ctx.x64 = true
ctx.map1 = map_opc1_64
ctx.aregs = map_regs.Q
@@ -805,32 +922,32 @@ local function create64_(code, addr, out)
end
-- Simple API: disassemble code (a string) at address and output via out.
-local function disass_(code, addr, out)
- create_(code, addr, out):disass()
+local function disass(code, addr, out)
+ create(code, addr, out):disass()
end
-local function disass64_(code, addr, out)
- create64_(code, addr, out):disass()
+local function disass64(code, addr, out)
+ create64(code, addr, out):disass()
end
-- Return register name for RID.
-local function regname_(r)
+local function regname(r)
if r < 8 then return map_regs.D[r+1] end
return map_regs.X[r-7]
end
-local function regname64_(r)
+local function regname64(r)
if r < 16 then return map_regs.Q[r+1] end
return map_regs.X[r-15]
end
-- Public module functions.
-module(...)
-
-create = create_
-create64 = create64_
-disass = disass_
-disass64 = disass64_
-regname = regname_
-regname64 = regname64_
+return {
+ create = create,
+ create64 = create64,
+ disass = disass,
+ disass64 = disass64,
+ regname = regname,
+ regname64 = regname64
+}
diff --git a/src/jit/dump.lua b/src/jit/dump.lua
index 344fa926..18e7a4b7 100644
--- a/src/jit/dump.lua
+++ b/src/jit/dump.lua
@@ -55,7 +55,7 @@
-- Cache some library functions and objects.
local jit = require("jit")
-assert(jit.version_num == 20005, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
local jutil = require("jit.util")
local vmdef = require("jit.vmdef")
local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc
@@ -63,7 +63,7 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
local bit = require("bit")
-local band, shr = bit.band, bit.rshift
+local band, shr, tohex = bit.band, bit.rshift, bit.tohex
local sub, gsub, format = string.sub, string.gsub, string.format
local byte, rep = string.byte, string.rep
local type, tostring = type, tostring
@@ -85,12 +85,13 @@ local nexitsym = 0
local function fillsymtab_tr(tr, nexit)
local t = {}
symtabmt.__index = t
- if jit.arch == "mips" or jit.arch == "mipsel" then
+ if jit.arch:sub(1, 4) == "mips" then
t[traceexitstub(tr, 0)] = "exit"
return
end
for i=0,nexit-1 do
local addr = traceexitstub(tr, i)
+ if addr < 0 then addr = addr + 2^32 end
t[addr] = tostring(i)
end
local addr = traceexitstub(tr, nexit)
@@ -101,10 +102,15 @@ end
local function fillsymtab(tr, nexit)
local t = symtab
if nexitsym == 0 then
+ local maskaddr = jit.arch == "arm" and -2
local ircall = vmdef.ircall
for i=0,#ircall do
local addr = ircalladdr(i)
- if addr ~= 0 then t[addr] = ircall[i] end
+ if addr ~= 0 then
+ if maskaddr then addr = band(addr, maskaddr) end
+ if addr < 0 then addr = addr + 2^32 end
+ t[addr] = ircall[i]
+ end
end
end
if nexitsym == 1000000 then -- Per-trace exit stubs.
@@ -118,6 +124,7 @@ local function fillsymtab(tr, nexit)
nexit = 1000000
break
end
+ if addr < 0 then addr = addr + 2^32 end
t[addr] = tostring(i)
end
nexitsym = nexit
@@ -136,6 +143,7 @@ local function dump_mcode(tr)
local mcode, addr, loop = tracemc(tr)
if not mcode then return end
if not disass then disass = require("jit.dis_"..jit.arch) end
+ if addr < 0 then addr = addr + 2^32 end
out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
local ctx = disass.create(mcode, addr, dumpwrite)
ctx.hexdump = 0
@@ -211,8 +219,10 @@ local function colorize_text(s)
return s
end
-local function colorize_ansi(s, t)
- return format(colortype_ansi[t], s)
+local function colorize_ansi(s, t, extra)
+ local out = format(colortype_ansi[t], s)
+ if extra then out = "\027[3m"..out end
+ return out
end
local irtype_ansi = setmetatable({},
@@ -221,9 +231,10 @@ local irtype_ansi = setmetatable({},
local html_escape = { ["<"] = "&lt;", [">"] = "&gt;", ["&"] = "&amp;", }
-local function colorize_html(s, t)
+local function colorize_html(s, t, extra)
s = gsub(s, "[<>&]", html_escape)
- return format('<span class="irt_%s">%s</span>', irtype_text[t], s)
+ return format('<span class="irt_%s%s">%s</span>',
+ irtype_text[t], extra and " irt_extra" or "", s)
end
local irtype_html = setmetatable({},
@@ -248,6 +259,7 @@ span.irt_tab { color: #c00000; }
span.irt_udt, span.irt_lud { color: #00c0c0; }
span.irt_num { color: #4040c0; }
span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; }
+span.irt_extra { font-style: italic; }
</style>
]]
@@ -263,6 +275,7 @@ local litname = {
if band(mode, 8) ~= 0 then s = s.."C" end
if band(mode, 16) ~= 0 then s = s.."R" end
if band(mode, 32) ~= 0 then s = s.."I" end
+ if band(mode, 64) ~= 0 then s = s.."K" end
t[mode] = s
return s
end}),
@@ -270,16 +283,20 @@ local litname = {
["CONV "] = setmetatable({}, { __index = function(t, mode)
local s = irtype[band(mode, 31)]
s = irtype[band(shr(mode, 5), 31)].."."..s
- if band(mode, 0x400) ~= 0 then s = s.." trunc"
- elseif band(mode, 0x800) ~= 0 then s = s.." sext" end
+ if band(mode, 0x800) ~= 0 then s = s.." sext" end
local c = shr(mode, 12)
- if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end
+ if c == 1 then s = s.." none"
+ elseif c == 2 then s = s.." index"
+ elseif c == 3 then s = s.." check" end
t[mode] = s
return s
end}),
["FLOAD "] = vmdef.irfield,
["FREF "] = vmdef.irfield,
["FPMATH"] = vmdef.irfpm,
+ ["TMPREF"] = { [0] = "", "IN", "OUT", "INOUT", "", "", "OUT2", "INOUT2" },
+ ["BUFHDR"] = { [0] = "RESET", "APPEND", "WRITE" },
+ ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" },
}
local function ctlsub(c)
@@ -303,15 +320,19 @@ local function fmtfunc(func, pc)
end
end
-local function formatk(tr, idx)
+local function formatk(tr, idx, sn)
local k, t, slot = tracek(tr, idx)
local tn = type(k)
local s
if tn == "number" then
- if k == 2^52+2^51 then
+ if t < 12 then
+ s = k == 0 and "NULL" or format("[0x%08x]", k)
+ elseif band(sn or 0, 0x30000) ~= 0 then
+ s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz"
+ elseif k == 2^52+2^51 then
s = "bias"
else
- s = format("%+.14g", k)
+ s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k)
end
elseif tn == "string" then
s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub))
@@ -329,10 +350,12 @@ local function formatk(tr, idx)
elseif t == 21 then -- int64_t
s = sub(tostring(k), 1, -3)
if sub(s, 1, 1) ~= "-" then s = "+"..s end
+ elseif sn == 0x1057fff then -- SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL)
+ return "----" -- Special case for LJ_FR2 slot 1.
else
s = tostring(k) -- For primitives.
end
- s = colorize(format("%-4s", s), t)
+ s = colorize(format("%-4s", s), t, band(sn or 0, 0x100000) ~= 0)
if slot then
s = format("%s @%d", s, slot)
end
@@ -347,12 +370,12 @@ local function printsnap(tr, snap)
n = n + 1
local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS
if ref < 0 then
- out:write(formatk(tr, ref))
+ out:write(formatk(tr, ref, sn))
elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM
out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
else
local m, ot, op1, op2 = traceir(tr, ref)
- out:write(colorize(format("%04d", ref), band(ot, 31)))
+ out:write(colorize(format("%04d", ref), band(ot, 31), band(sn, 0x100000) ~= 0))
end
out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME
else
@@ -545,7 +568,7 @@ local function dump_trace(what, tr, func, pc, otr, oex)
if what == "start" then
if dumpmode.H then out:write('<pre class="ljdump">\n') end
out:write("---- TRACE ", tr, " ", what)
- if otr then out:write(" ", otr, "/", oex) end
+ if otr then out:write(" ", otr, "/", oex == -1 and "stitch" or oex) end
out:write(" ", fmtfunc(func, pc), "\n")
elseif what == "stop" or what == "abort" then
out:write("---- TRACE ", tr, " ", what)
@@ -595,23 +618,26 @@ end
------------------------------------------------------------------------------
+local gpr64 = jit.arch:match("64")
+local fprmips32 = jit.arch == "mips" or jit.arch == "mipsel"
+
-- Dump taken trace exits.
local function dump_texit(tr, ex, ngpr, nfpr, ...)
out:write("---- TRACE ", tr, " exit ", ex, "\n")
if dumpmode.X then
local regs = {...}
- if jit.arch == "x64" then
+ if gpr64 then
for i=1,ngpr do
out:write(format(" %016x", regs[i]))
if i % 4 == 0 then out:write("\n") end
end
else
for i=1,ngpr do
- out:write(format(" %08x", regs[i]))
+ out:write(" ", tohex(regs[i]))
if i % 8 == 0 then out:write("\n") end
end
end
- if jit.arch == "mips" or jit.arch == "mipsel" then
+ if fprmips32 then
for i=1,nfpr,2 do
out:write(format(" %+17.14g", regs[ngpr+i]))
if i % 8 == 7 then out:write("\n") end
@@ -692,9 +718,9 @@ local function dumpon(opt, outfile)
end
-- Public module functions.
-module(...)
-
-on = dumpon
-off = dumpoff
-start = dumpon -- For -j command line option.
+return {
+ on = dumpon,
+ off = dumpoff,
+ start = dumpon -- For -j command line option.
+}
diff --git a/src/jit/p.lua b/src/jit/p.lua
new file mode 100644
index 00000000..f225c312
--- /dev/null
+++ b/src/jit/p.lua
@@ -0,0 +1,312 @@
+----------------------------------------------------------------------------
+-- LuaJIT profiler.
+--
+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+--
+-- This module is a simple command line interface to the built-in
+-- low-overhead profiler of LuaJIT.
+--
+-- The lower-level API of the profiler is accessible via the "jit.profile"
+-- module or the luaJIT_profile_* C API.
+--
+-- Example usage:
+--
+-- luajit -jp myapp.lua
+-- luajit -jp=s myapp.lua
+-- luajit -jp=-s myapp.lua
+-- luajit -jp=vl myapp.lua
+-- luajit -jp=G,profile.txt myapp.lua
+--
+-- The following dump features are available:
+--
+-- f Stack dump: function name, otherwise module:line. Default mode.
+-- F Stack dump: ditto, but always prepend module.
+-- l Stack dump: module:line.
+-- <number> stack dump depth (callee < caller). Default: 1.
+-- -<number> Inverse stack dump depth (caller > callee).
+-- s Split stack dump after first stack level. Implies abs(depth) >= 2.
+-- p Show full path for module names.
+-- v Show VM states. Can be combined with stack dumps, e.g. vf or fv.
+-- z Show zones. Can be combined with stack dumps, e.g. zf or fz.
+-- r Show raw sample counts. Default: show percentages.
+-- a Annotate excerpts from source code files.
+-- A Annotate complete source code files.
+-- G Produce raw output suitable for graphical tools (e.g. flame graphs).
+-- m<number> Minimum sample percentage to be shown. Default: 3.
+-- i<number> Sampling interval in milliseconds. Default: 10.
+--
+----------------------------------------------------------------------------
+
+-- Cache some library functions and objects.
+local jit = require("jit")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
+local profile = require("jit.profile")
+local vmdef = require("jit.vmdef")
+local math = math
+local pairs, ipairs, tonumber, floor = pairs, ipairs, tonumber, math.floor
+local sort, format = table.sort, string.format
+local stdout = io.stdout
+local zone -- Load jit.zone module on demand.
+
+-- Output file handle.
+local out
+
+------------------------------------------------------------------------------
+
+local prof_ud
+local prof_states, prof_split, prof_min, prof_raw, prof_fmt, prof_depth
+local prof_ann, prof_count1, prof_count2, prof_samples
+
+local map_vmmode = {
+ N = "Compiled",
+ I = "Interpreted",
+ C = "C code",
+ G = "Garbage Collector",
+ J = "JIT Compiler",
+}
+
+-- Profiler callback.
+local function prof_cb(th, samples, vmmode)
+ prof_samples = prof_samples + samples
+ local key_stack, key_stack2, key_state
+ -- Collect keys for sample.
+ if prof_states then
+ if prof_states == "v" then
+ key_state = map_vmmode[vmmode] or vmmode
+ else
+ key_state = zone:get() or "(none)"
+ end
+ end
+ if prof_fmt then
+ key_stack = profile.dumpstack(th, prof_fmt, prof_depth)
+ key_stack = key_stack:gsub("%[builtin#(%d+)%]", function(x)
+ return vmdef.ffnames[tonumber(x)]
+ end)
+ if prof_split == 2 then
+ local k1, k2 = key_stack:match("(.-) [<>] (.*)")
+ if k2 then key_stack, key_stack2 = k1, k2 end
+ elseif prof_split == 3 then
+ key_stack2 = profile.dumpstack(th, "l", 1)
+ end
+ end
+ -- Order keys.
+ local k1, k2
+ if prof_split == 1 then
+ if key_state then
+ k1 = key_state
+ if key_stack then k2 = key_stack end
+ end
+ elseif key_stack then
+ k1 = key_stack
+ if key_stack2 then k2 = key_stack2 elseif key_state then k2 = key_state end
+ end
+ -- Coalesce samples in one or two levels.
+ if k1 then
+ local t1 = prof_count1
+ t1[k1] = (t1[k1] or 0) + samples
+ if k2 then
+ local t2 = prof_count2
+ local t3 = t2[k1]
+ if not t3 then t3 = {}; t2[k1] = t3 end
+ t3[k2] = (t3[k2] or 0) + samples
+ end
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Show top N list.
+local function prof_top(count1, count2, samples, indent)
+ local t, n = {}, 0
+ for k in pairs(count1) do
+ n = n + 1
+ t[n] = k
+ end
+ sort(t, function(a, b) return count1[a] > count1[b] end)
+ for i=1,n do
+ local k = t[i]
+ local v = count1[k]
+ local pct = floor(v*100/samples + 0.5)
+ if pct < prof_min then break end
+ if not prof_raw then
+ out:write(format("%s%2d%% %s\n", indent, pct, k))
+ elseif prof_raw == "r" then
+ out:write(format("%s%5d %s\n", indent, v, k))
+ else
+ out:write(format("%s %d\n", k, v))
+ end
+ if count2 then
+ local r = count2[k]
+ if r then
+ prof_top(r, nil, v, (prof_split == 3 or prof_split == 1) and " -- " or
+ (prof_depth < 0 and " -> " or " <- "))
+ end
+ end
+ end
+end
+
+-- Annotate source code
+local function prof_annotate(count1, samples)
+ local files = {}
+ local ms = 0
+ for k, v in pairs(count1) do
+ local pct = floor(v*100/samples + 0.5)
+ ms = math.max(ms, v)
+ if pct >= prof_min then
+ local file, line = k:match("^(.*):(%d+)$")
+ if not file then file = k; line = 0 end
+ local fl = files[file]
+ if not fl then fl = {}; files[file] = fl; files[#files+1] = file end
+ line = tonumber(line)
+ fl[line] = prof_raw and v or pct
+ end
+ end
+ sort(files)
+ local fmtv, fmtn = " %3d%% | %s\n", " | %s\n"
+ if prof_raw then
+ local n = math.max(5, math.ceil(math.log10(ms)))
+ fmtv = "%"..n.."d | %s\n"
+ fmtn = (" "):rep(n).." | %s\n"
+ end
+ local ann = prof_ann
+ for _, file in ipairs(files) do
+ local f0 = file:byte()
+ if f0 == 40 or f0 == 91 then
+ out:write(format("\n====== %s ======\n[Cannot annotate non-file]\n", file))
+ break
+ end
+ local fp, err = io.open(file)
+ if not fp then
+ out:write(format("====== ERROR: %s: %s\n", file, err))
+ break
+ end
+ out:write(format("\n====== %s ======\n", file))
+ local fl = files[file]
+ local n, show = 1, false
+ if ann ~= 0 then
+ for i=1,ann do
+ if fl[i] then show = true; out:write("@@ 1 @@\n"); break end
+ end
+ end
+ for line in fp:lines() do
+ if line:byte() == 27 then
+ out:write("[Cannot annotate bytecode file]\n")
+ break
+ end
+ local v = fl[n]
+ if ann ~= 0 then
+ local v2 = fl[n+ann]
+ if show then
+ if v2 then show = n+ann elseif v then show = n
+ elseif show+ann < n then show = false end
+ elseif v2 then
+ show = n+ann
+ out:write(format("@@ %d @@\n", n))
+ end
+ if not show then goto next end
+ end
+ if v then
+ out:write(format(fmtv, v, line))
+ else
+ out:write(format(fmtn, line))
+ end
+ ::next::
+ n = n + 1
+ end
+ fp:close()
+ end
+end
+
+------------------------------------------------------------------------------
+
+-- Finish profiling and dump result.
+local function prof_finish()
+ if prof_ud then
+ profile.stop()
+ local samples = prof_samples
+ if samples == 0 then
+ if prof_raw ~= true then out:write("[No samples collected]\n") end
+ return
+ end
+ if prof_ann then
+ prof_annotate(prof_count1, samples)
+ else
+ prof_top(prof_count1, prof_count2, samples, "")
+ end
+ prof_count1 = nil
+ prof_count2 = nil
+ prof_ud = nil
+ if out ~= stdout then out:close() end
+ end
+end
+
+-- Start profiling.
+local function prof_start(mode)
+ local interval = ""
+ mode = mode:gsub("i%d*", function(s) interval = s; return "" end)
+ prof_min = 3
+ mode = mode:gsub("m(%d+)", function(s) prof_min = tonumber(s); return "" end)
+ prof_depth = 1
+ mode = mode:gsub("%-?%d+", function(s) prof_depth = tonumber(s); return "" end)
+ local m = {}
+ for c in mode:gmatch(".") do m[c] = c end
+ prof_states = m.z or m.v
+ if prof_states == "z" then zone = require("jit.zone") end
+ local scope = m.l or m.f or m.F or (prof_states and "" or "f")
+ local flags = (m.p or "")
+ prof_raw = m.r
+ if m.s then
+ prof_split = 2
+ if prof_depth == -1 or m["-"] then prof_depth = -2
+ elseif prof_depth == 1 then prof_depth = 2 end
+ elseif mode:find("[fF].*l") then
+ scope = "l"
+ prof_split = 3
+ else
+ prof_split = (scope == "" or mode:find("[zv].*[lfF]")) and 1 or 0
+ end
+ prof_ann = m.A and 0 or (m.a and 3)
+ if prof_ann then
+ scope = "l"
+ prof_fmt = "pl"
+ prof_split = 0
+ prof_depth = 1
+ elseif m.G and scope ~= "" then
+ prof_fmt = flags..scope.."Z;"
+ prof_depth = -100
+ prof_raw = true
+ prof_min = 0
+ elseif scope == "" then
+ prof_fmt = false
+ else
+ local sc = prof_split == 3 and m.f or m.F or scope
+ prof_fmt = flags..sc..(prof_depth >= 0 and "Z < " or "Z > ")
+ end
+ prof_count1 = {}
+ prof_count2 = {}
+ prof_samples = 0
+ profile.start(scope:lower()..interval, prof_cb)
+ prof_ud = newproxy(true)
+ getmetatable(prof_ud).__gc = prof_finish
+end
+
+------------------------------------------------------------------------------
+
+local function start(mode, outfile)
+ if not outfile then outfile = os.getenv("LUAJIT_PROFILEFILE") end
+ if outfile then
+ out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
+ else
+ out = stdout
+ end
+ prof_start(mode or "f")
+end
+
+-- Public module functions.
+return {
+ start = start, -- For -j command line option.
+ stop = prof_finish
+}
+
diff --git a/src/jit/v.lua b/src/jit/v.lua
index 9624688b..ac8b19db 100644
--- a/src/jit/v.lua
+++ b/src/jit/v.lua
@@ -59,7 +59,7 @@
-- Cache some library functions and objects.
local jit = require("jit")
-assert(jit.version_num == 20005, "LuaJIT core/library version mismatch")
+assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
local jutil = require("jit.util")
local vmdef = require("jit.vmdef")
local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
@@ -99,7 +99,7 @@ end
local function dump_trace(what, tr, func, pc, otr, oex)
if what == "start" then
startloc = fmtfunc(func, pc)
- startex = otr and "("..otr.."/"..oex..") " or ""
+ startex = otr and "("..otr.."/"..(oex == -1 and "stitch" or oex)..") " or ""
else
if what == "abort" then
local loc = fmtfunc(func, pc)
@@ -116,6 +116,9 @@ local function dump_trace(what, tr, func, pc, otr, oex)
if ltype == "interpreter" then
out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n",
tr, startex, startloc))
+ elseif ltype == "stitch" then
+ out:write(format("[TRACE %3s %s%s %s %s]\n",
+ tr, startex, startloc, ltype, fmtfunc(func, pc)))
elseif link == tr or link == 0 then
out:write(format("[TRACE %3s %s%s %s]\n",
tr, startex, startloc, ltype))
@@ -159,9 +162,9 @@ local function dumpon(outfile)
end
-- Public module functions.
-module(...)
-
-on = dumpon
-off = dumpoff
-start = dumpon -- For -j command line option.
+return {
+ on = dumpon,
+ off = dumpoff,
+ start = dumpon -- For -j command line option.
+}
diff --git a/src/jit/zone.lua b/src/jit/zone.lua
new file mode 100644
index 00000000..1308cb74
--- /dev/null
+++ b/src/jit/zone.lua
@@ -0,0 +1,45 @@
+----------------------------------------------------------------------------
+-- LuaJIT profiler zones.
+--
+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved.
+-- Released under the MIT license. See Copyright Notice in luajit.h
+----------------------------------------------------------------------------
+--
+-- This module implements a simple hierarchical zone model.
+--
+-- Example usage:
+--
+-- local zone = require("jit.zone")
+-- zone("AI")
+-- ...
+-- zone("A*")
+-- ...
+-- print(zone:get()) --> "A*"
+-- ...
+-- zone()
+-- ...
+-- print(zone:get()) --> "AI"
+-- ...
+-- zone()
+--
+----------------------------------------------------------------------------
+
+local remove = table.remove
+
+return setmetatable({
+ flush = function(t)
+ for i=#t,1,-1 do t[i] = nil end
+ end,
+ get = function(t)
+ return t[#t]
+ end
+}, {
+ __call = function(t, zone)
+ if zone then
+ t[#t+1] = zone
+ else
+ return (assert(remove(t), "empty zone stack"))
+ end
+ end
+})
+
diff --git a/src/lauxlib.h b/src/lauxlib.h
index fed1491b..a44f0272 100644
--- a/src/lauxlib.h
+++ b/src/lauxlib.h
@@ -15,9 +15,6 @@
#include "lua.h"
-#define luaL_getn(L,i) ((int)lua_objlen(L, i))
-#define luaL_setn(L,i,j) ((void)0) /* no op! */
-
/* extra error code for `luaL_load' */
#define LUA_ERRFILE (LUA_ERRERR+1)
@@ -58,6 +55,10 @@ LUALIB_API int (luaL_error) (lua_State *L, const char *fmt, ...);
LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def,
const char *const lst[]);
+/* pre-defined references */
+#define LUA_NOREF (-2)
+#define LUA_REFNIL (-1)
+
LUALIB_API int (luaL_ref) (lua_State *L, int t);
LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref);
@@ -84,6 +85,11 @@ LUALIB_API int (luaL_loadbufferx) (lua_State *L, const char *buff, size_t sz,
const char *name, const char *mode);
LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg,
int level);
+LUALIB_API void (luaL_setfuncs) (lua_State *L, const luaL_Reg *l, int nup);
+LUALIB_API void (luaL_pushmodule) (lua_State *L, const char *modname,
+ int sizehint);
+LUALIB_API void *(luaL_testudata) (lua_State *L, int ud, const char *tname);
+LUALIB_API void (luaL_setmetatable) (lua_State *L, const char *tname);
/*
@@ -113,6 +119,11 @@ LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg,
#define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n)))
+/* From Lua 5.2. */
+#define luaL_newlibtable(L, l) \
+ lua_createtable(L, 0, sizeof(l)/sizeof((l)[0]) - 1)
+#define luaL_newlib(L, l) (luaL_newlibtable(L, l), luaL_setfuncs(L, l, 0))
+
/*
** {======================================================
** Generic Buffer manipulation
@@ -147,21 +158,4 @@ LUALIB_API void (luaL_pushresult) (luaL_Buffer *B);
/* }====================================================== */
-
-/* compatibility with ref system */
-
-/* pre-defined references */
-#define LUA_NOREF (-2)
-#define LUA_REFNIL (-1)
-
-#define lua_ref(L,lock) ((lock) ? luaL_ref(L, LUA_REGISTRYINDEX) : \
- (lua_pushstring(L, "unlocked references are obsolete"), lua_error(L), 0))
-
-#define lua_unref(L,ref) luaL_unref(L, LUA_REGISTRYINDEX, (ref))
-
-#define lua_getref(L,ref) lua_rawgeti(L, LUA_REGISTRYINDEX, (ref))
-
-
-#define luaL_reg luaL_Reg
-
#endif
diff --git a/src/lib_aux.c b/src/lib_aux.c
index 14dd57e3..b8e56436 100644
--- a/src/lib_aux.c
+++ b/src/lib_aux.c
@@ -107,38 +107,36 @@ LUALIB_API const char *luaL_findtable(lua_State *L, int idx,
static int libsize(const luaL_Reg *l)
{
int size = 0;
- for (; l->name; l++) size++;
+ for (; l && l->name; l++) size++;
return size;
}
+LUALIB_API void luaL_pushmodule(lua_State *L, const char *modname, int sizehint)
+{
+ luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
+ lua_getfield(L, -1, modname);
+ if (!lua_istable(L, -1)) {
+ lua_pop(L, 1);
+ if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, sizehint) != NULL)
+ lj_err_callerv(L, LJ_ERR_BADMODN, modname);
+ lua_pushvalue(L, -1);
+ lua_setfield(L, -3, modname); /* _LOADED[modname] = new table. */
+ }
+ lua_remove(L, -2); /* Remove _LOADED table. */
+}
+
LUALIB_API void luaL_openlib(lua_State *L, const char *libname,
const luaL_Reg *l, int nup)
{
lj_lib_checkfpu(L);
if (libname) {
- int size = libsize(l);
- /* check whether lib already exists */
- luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16);
- lua_getfield(L, -1, libname); /* get _LOADED[libname] */
- if (!lua_istable(L, -1)) { /* not found? */
- lua_pop(L, 1); /* remove previous result */
- /* try global variable (and create one if it does not exist) */
- if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, size) != NULL)
- lj_err_callerv(L, LJ_ERR_BADMODN, libname);
- lua_pushvalue(L, -1);
- lua_setfield(L, -3, libname); /* _LOADED[libname] = new table */
- }
- lua_remove(L, -2); /* remove _LOADED table */
- lua_insert(L, -(nup+1)); /* move library table to below upvalues */
+ luaL_pushmodule(L, libname, libsize(l));
+ lua_insert(L, -(nup + 1)); /* Move module table below upvalues. */
}
- for (; l->name; l++) {
- int i;
- for (i = 0; i < nup; i++) /* copy upvalues to the top */
- lua_pushvalue(L, -nup);
- lua_pushcclosure(L, l->func, nup);
- lua_setfield(L, -(nup+2), l->name);
- }
- lua_pop(L, nup); /* remove upvalues */
+ if (l)
+ luaL_setfuncs(L, l, nup);
+ else
+ lua_pop(L, nup); /* Remove upvalues. */
}
LUALIB_API void luaL_register(lua_State *L, const char *libname,
@@ -147,6 +145,19 @@ LUALIB_API void luaL_register(lua_State *L, const char *libname,
luaL_openlib(L, libname, l, 0);
}
+LUALIB_API void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup)
+{
+ luaL_checkstack(L, nup, "too many upvalues");
+ for (; l->name; l++) {
+ int i;
+ for (i = 0; i < nup; i++) /* Copy upvalues to the top. */
+ lua_pushvalue(L, -nup);
+ lua_pushcclosure(L, l->func, nup);
+ lua_setfield(L, -(nup + 2), l->name);
+ }
+ lua_pop(L, nup); /* Remove upvalues. */
+}
+
LUALIB_API const char *luaL_gsub(lua_State *L, const char *s,
const char *p, const char *r)
{
@@ -207,8 +218,15 @@ LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B)
LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l)
{
- while (l--)
- luaL_addchar(B, *s++);
+ if (l <= bufffree(B)) {
+ memcpy(B->p, s, l);
+ B->p += l;
+ } else {
+ emptybuffer(B);
+ lua_pushlstring(B->L, s, l);
+ B->lvl++;
+ adjuststack(B);
+ }
}
LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s)
@@ -302,7 +320,7 @@ static int panic(lua_State *L)
#ifdef LUAJIT_USE_SYSMALLOC
-#if LJ_64 && !defined(LUAJIT_USE_VALGRIND)
+#if LJ_64 && !LJ_GC64 && !defined(LUAJIT_USE_VALGRIND)
#error "Must use builtin allocator for 64 bit target"
#endif
@@ -327,23 +345,19 @@ LUALIB_API lua_State *luaL_newstate(void)
#else
-#include "lj_alloc.h"
-
LUALIB_API lua_State *luaL_newstate(void)
{
lua_State *L;
- void *ud = lj_alloc_create();
- if (ud == NULL) return NULL;
-#if LJ_64
- L = lj_state_newstate(lj_alloc_f, ud);
+#if LJ_64 && !LJ_GC64
+ L = lj_state_newstate(LJ_ALLOCF_INTERNAL, NULL);
#else
- L = lua_newstate(lj_alloc_f, ud);
+ L = lua_newstate(LJ_ALLOCF_INTERNAL, NULL);
#endif
if (L) G(L)->panic = panic;
return L;
}
-#if LJ_64
+#if LJ_64 && !LJ_GC64
LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
{
UNUSED(f); UNUSED(ud);
diff --git a/src/lib_base.c b/src/lib_base.c
index 6c96e8d5..98ec67c7 100644
--- a/src/lib_base.c
+++ b/src/lib_base.c
@@ -19,10 +19,12 @@
#include "lj_gc.h"
#include "lj_err.h"
#include "lj_debug.h"
+#include "lj_buf.h"
#include "lj_str.h"
#include "lj_tab.h"
#include "lj_meta.h"
#include "lj_state.h"
+#include "lj_frame.h"
#if LJ_HASFFI
#include "lj_ctype.h"
#include "lj_cconv.h"
@@ -32,6 +34,7 @@
#include "lj_dispatch.h"
#include "lj_char.h"
#include "lj_strscan.h"
+#include "lj_strfmt.h"
#include "lj_lib.h"
/* -- Base library: checks ------------------------------------------------ */
@@ -40,13 +43,13 @@
LJLIB_ASM(assert) LJLIB_REC(.)
{
- GCstr *s;
lj_lib_checkany(L, 1);
- s = lj_lib_optstr(L, 2);
- if (s)
- lj_err_callermsg(L, strdata(s));
- else
+ if (L->top == L->base+1)
lj_err_caller(L, LJ_ERR_ASSERT);
+ else if (tvisstr(L->base+1) || tvisnumber(L->base+1))
+ lj_err_callermsg(L, strdata(lj_lib_checkstr(L, 2)));
+ else
+ lj_err_run(L);
return FFH_UNREACHABLE;
}
@@ -73,9 +76,10 @@ LJLIB_ASM_(type) LJLIB_REC(.)
/* This solves a circular dependency problem -- change FF_next_N as needed. */
LJ_STATIC_ASSERT((int)FF_next == FF_next_N);
-LJLIB_ASM(next)
+LJLIB_ASM(next) LJLIB_REC(.)
{
lj_lib_checktab(L, 1);
+ lj_err_msg(L, LJ_ERR_NEXTIDX);
return FFH_UNREACHABLE;
}
@@ -86,10 +90,11 @@ static int ffh_pairs(lua_State *L, MMS mm)
cTValue *mo = lj_meta_lookup(L, o, mm);
if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) {
L->top = o+1; /* Only keep one argument. */
- copyTV(L, L->base-1, mo); /* Replace callable. */
+ copyTV(L, L->base-1-LJ_FR2, mo); /* Replace callable. */
return FFH_TAILCALL;
} else {
if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE);
+ if (LJ_FR2) { copyTV(L, o-1, o); o--; }
setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1)));
if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0);
return FFH_RES(3);
@@ -100,7 +105,7 @@ static int ffh_pairs(lua_State *L, MMS mm)
#endif
LJLIB_PUSH(lastcl)
-LJLIB_ASM(pairs)
+LJLIB_ASM(pairs) LJLIB_REC(xpairs 0)
{
return ffh_pairs(L, MM_pairs);
}
@@ -113,7 +118,7 @@ LJLIB_NOREGUV LJLIB_ASM(ipairs_aux) LJLIB_REC(.)
}
LJLIB_PUSH(lastcl)
-LJLIB_ASM(ipairs) LJLIB_REC(.)
+LJLIB_ASM(ipairs) LJLIB_REC(xpairs 1)
{
return ffh_pairs(L, MM_ipairs);
}
@@ -131,11 +136,11 @@ LJLIB_ASM(setmetatable) LJLIB_REC(.)
lj_err_caller(L, LJ_ERR_PROTMT);
setgcref(t->metatable, obj2gco(mt));
if (mt) { lj_gc_objbarriert(L, t, mt); }
- settabV(L, L->base-1, t);
+ settabV(L, L->base-1-LJ_FR2, t);
return FFH_RES(1);
}
-LJLIB_CF(getfenv)
+LJLIB_CF(getfenv) LJLIB_REC(.)
{
GCfunc *fn;
cTValue *o = L->base;
@@ -144,6 +149,7 @@ LJLIB_CF(getfenv)
o = lj_debug_frame(L, level, &level);
if (o == NULL)
lj_err_arg(L, 1, LJ_ERR_INVLVL);
+ if (LJ_FR2) o--;
}
fn = &gcval(o)->fn;
settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env));
@@ -165,6 +171,7 @@ LJLIB_CF(setfenv)
o = lj_debug_frame(L, level, &level);
if (o == NULL)
lj_err_arg(L, 1, LJ_ERR_INVLVL);
+ if (LJ_FR2) o--;
}
fn = &gcval(o)->fn;
if (!isluafunc(fn))
@@ -259,7 +266,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
if (base == 10) {
TValue *o = lj_lib_checkany(L, 1);
if (lj_strscan_numberobj(o)) {
- copyTV(L, L->base-1, o);
+ copyTV(L, L->base-1-LJ_FR2, o);
return FFH_RES(1);
}
#if LJ_HASFFI
@@ -272,11 +279,11 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) {
int32_t i;
lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0);
- setintV(L->base-1, i);
+ setintV(L->base-1-LJ_FR2, i);
return FFH_RES(1);
}
lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE),
- (uint8_t *)&(L->base-1)->n, o, 0);
+ (uint8_t *)&(L->base-1-LJ_FR2)->n, o, 0);
return FFH_RES(1);
}
}
@@ -284,53 +291,46 @@ LJLIB_ASM(tonumber) LJLIB_REC(.)
} else {
const char *p = strdata(lj_lib_checkstr(L, 1));
char *ep;
+ unsigned int neg = 0;
unsigned long ul;
if (base < 2 || base > 36)
lj_err_arg(L, 2, LJ_ERR_BASERNG);
- ul = strtoul(p, &ep, base);
- if (p != ep) {
- while (lj_char_isspace((unsigned char)(*ep))) ep++;
- if (*ep == '\0') {
- if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u))
- setintV(L->base-1, (int32_t)ul);
- else
- setnumV(L->base-1, (lua_Number)ul);
- return FFH_RES(1);
+ while (lj_char_isspace((unsigned char)(*p))) p++;
+ if (*p == '-') { p++; neg = 1; } else if (*p == '+') { p++; }
+ if (lj_char_isalnum((unsigned char)(*p))) {
+ ul = strtoul(p, &ep, base);
+ if (p != ep) {
+ while (lj_char_isspace((unsigned char)(*ep))) ep++;
+ if (*ep == '\0') {
+ if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) {
+ if (neg) ul = (unsigned long)-(long)ul;
+ setintV(L->base-1-LJ_FR2, (int32_t)ul);
+ } else {
+ lua_Number n = (lua_Number)ul;
+ if (neg) n = -n;
+ setnumV(L->base-1-LJ_FR2, n);
+ }
+ return FFH_RES(1);
+ }
}
}
}
- setnilV(L->base-1);
+ setnilV(L->base-1-LJ_FR2);
return FFH_RES(1);
}
-LJLIB_PUSH("nil")
-LJLIB_PUSH("false")
-LJLIB_PUSH("true")
LJLIB_ASM(tostring) LJLIB_REC(.)
{
TValue *o = lj_lib_checkany(L, 1);
cTValue *mo;
L->top = o+1; /* Only keep one argument. */
if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
- copyTV(L, L->base-1, mo); /* Replace callable. */
+ copyTV(L, L->base-1-LJ_FR2, mo); /* Replace callable. */
return FFH_TAILCALL;
- } else {
- GCstr *s;
- if (tvisnumber(o)) {
- s = lj_str_fromnumber(L, o);
- } else if (tvispri(o)) {
- s = strV(lj_lib_upvalue(L, -(int32_t)itype(o)));
- } else {
- if (tvisfunc(o) && isffunc(funcV(o)))
- lua_pushfstring(L, "function: builtin#%d", funcV(o)->c.ffid);
- else
- lua_pushfstring(L, "%s: %p", lj_typename(o), lua_topointer(L, 1));
- /* Note: lua_pushfstring calls the GC which may invalidate o. */
- s = strV(L->top-1);
- }
- setstrV(L, L->base-1, s);
- return FFH_RES(1);
}
+ lj_gc_check(L);
+ setstrV(L, L->base-1-LJ_FR2, lj_strfmt_obj(L, L->base));
+ return FFH_RES(1);
}
/* -- Base library: throw and catch errors -------------------------------- */
@@ -359,7 +359,7 @@ LJLIB_ASM_(xpcall) LJLIB_REC(.)
static int load_aux(lua_State *L, int status, int envarg)
{
- if (status == 0) {
+ if (status == LUA_OK) {
if (tvistab(L->base+envarg-1)) {
GCfunc *fn = funcV(L->top-1);
GCtab *t = tabV(L->base+envarg-1);
@@ -408,10 +408,22 @@ LJLIB_CF(load)
GCstr *name = lj_lib_optstr(L, 2);
GCstr *mode = lj_lib_optstr(L, 3);
int status;
- if (L->base < L->top && (tvisstr(L->base) || tvisnumber(L->base))) {
- GCstr *s = lj_lib_checkstr(L, 1);
+ if (L->base < L->top &&
+ (tvisstr(L->base) || tvisnumber(L->base) || tvisbuf(L->base))) {
+ const char *s;
+ MSize len;
+ if (tvisbuf(L->base)) {
+ SBufExt *sbx = bufV(L->base);
+ s = sbx->r;
+ len = sbufxlen(sbx);
+ if (!name) name = &G(L)->strempty; /* Buffers are not NUL-terminated. */
+ } else {
+ GCstr *str = lj_lib_checkstr(L, 1);
+ s = strdata(str);
+ len = str->len;
+ }
lua_settop(L, 4); /* Ensure env arg exists. */
- status = luaL_loadbufferx(L, strdata(s), s->len, strdata(name ? name : s),
+ status = luaL_loadbufferx(L, s, len, name ? strdata(name) : s,
mode ? strdata(mode) : NULL);
} else {
lj_lib_checkfunc(L, 1);
@@ -432,7 +444,7 @@ LJLIB_CF(dofile)
GCstr *fname = lj_lib_optstr(L, 1);
setnilV(L->top);
L->top = L->base+1;
- if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != 0)
+ if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != LUA_OK)
lua_error(L);
lua_call(L, 0, LUA_MULTRET);
return (int)(L->top - L->base) - 1;
@@ -442,20 +454,20 @@ LJLIB_CF(dofile)
LJLIB_CF(gcinfo)
{
- setintV(L->top++, (G(L)->gc.total >> 10));
+ setintV(L->top++, (int32_t)(G(L)->gc.total >> 10));
return 1;
}
LJLIB_CF(collectgarbage)
{
int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT, /* ORDER LUA_GC* */
- "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul");
+ "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul\1\377\11isrunning");
int32_t data = lj_lib_optint(L, 2, 0);
if (opt == LUA_GCCOUNT) {
setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0);
} else {
int res = lua_gc(L, opt, data);
- if (opt == LUA_GCSTEP)
+ if (opt == LUA_GCSTEP || opt == LUA_GCISRUNNING)
setboolV(L->top, res);
else
setintV(L->top, res);
@@ -507,23 +519,14 @@ LJLIB_CF(print)
tv = L->top-1;
}
shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring) &&
- !gcrefu(basemt_it(G(L), LJ_TNUMX));
+ !gcrefu(basemt_it(G(L), LJ_TNUMX));
for (i = 0; i < nargs; i++) {
+ cTValue *o = &L->base[i];
const char *str;
size_t size;
- cTValue *o = &L->base[i];
- if (shortcut && tvisstr(o)) {
- str = strVdata(o);
- size = strV(o)->len;
- } else if (shortcut && tvisint(o)) {
- char buf[LJ_STR_INTBUF];
- char *p = lj_str_bufint(buf, intV(o));
- size = (size_t)(buf+LJ_STR_INTBUF-p);
- str = p;
- } else if (shortcut && tvisnum(o)) {
- char buf[LJ_STR_NUMBUF];
- size = lj_str_bufnum(buf, o);
- str = buf;
+ MSize len;
+ if (shortcut && (str = lj_strfmt_wstrnum(L, o, &len)) != NULL) {
+ size = len;
} else {
copyTV(L, L->top+1, o);
copyTV(L, L->top, L->top-1);
@@ -560,8 +563,8 @@ LJLIB_CF(coroutine_status)
co = threadV(L->base);
if (co == L) s = "running";
else if (co->status == LUA_YIELD) s = "suspended";
- else if (co->status != 0) s = "dead";
- else if (co->base > tvref(co->stack)+1) s = "normal";
+ else if (co->status != LUA_OK) s = "dead";
+ else if (co->base > tvref(co->stack)+1+LJ_FR2) s = "normal";
else if (co->top == co->base) s = "dead";
else s = "suspended";
lua_pushstring(L, s);
@@ -581,6 +584,12 @@ LJLIB_CF(coroutine_running)
#endif
}
+LJLIB_CF(coroutine_isyieldable)
+{
+ setboolV(L->top++, cframe_canyield(L->cframe));
+ return 1;
+}
+
LJLIB_CF(coroutine_create)
{
lua_State *L1;
@@ -600,11 +609,11 @@ LJLIB_ASM(coroutine_yield)
static int ffh_resume(lua_State *L, lua_State *co, int wrap)
{
if (co->cframe != NULL || co->status > LUA_YIELD ||
- (co->status == 0 && co->top == co->base)) {
+ (co->status == LUA_OK && co->top == co->base)) {
ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD;
if (wrap) lj_err_caller(L, em);
- setboolV(L->base-1, 0);
- setstrV(L, L->base, lj_err_str(L, em));
+ setboolV(L->base-1-LJ_FR2, 0);
+ setstrV(L, L->base-LJ_FR2, lj_err_str(L, em));
return FFH_RES(2);
}
lj_state_growstack(co, (MSize)(L->top - L->base));
@@ -645,9 +654,10 @@ static void setpc_wrap_aux(lua_State *L, GCfunc *fn);
LJLIB_CF(coroutine_wrap)
{
+ GCfunc *fn;
lj_cf_coroutine_create(L);
- lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1);
- setpc_wrap_aux(L, funcV(L->top-1));
+ fn = lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1);
+ setpc_wrap_aux(L, fn);
return 1;
}
diff --git a/src/lib_bit.c b/src/lib_bit.c
index 9e75eef3..38c0f578 100644
--- a/src/lib_bit.c
+++ b/src/lib_bit.c
@@ -12,26 +12,99 @@
#include "lj_obj.h"
#include "lj_err.h"
-#include "lj_str.h"
+#include "lj_buf.h"
+#include "lj_strscan.h"
+#include "lj_strfmt.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#include "lj_cdata.h"
+#include "lj_cconv.h"
+#include "lj_carith.h"
+#endif
+#include "lj_ff.h"
#include "lj_lib.h"
/* ------------------------------------------------------------------------ */
#define LJLIB_MODULE_bit
-LJLIB_ASM(bit_tobit) LJLIB_REC(bit_unary IR_TOBIT)
+#if LJ_HASFFI
+static int bit_result64(lua_State *L, CTypeID id, uint64_t x)
{
+ GCcdata *cd = lj_cdata_new_(L, id, 8);
+ *(uint64_t *)cdataptr(cd) = x;
+ setcdataV(L, L->base-1-LJ_FR2, cd);
+ return FFH_RES(1);
+}
+#else
+static int32_t bit_checkbit(lua_State *L, int narg)
+{
+ TValue *o = L->base + narg-1;
+ if (!(o < L->top && lj_strscan_numberobj(o)))
+ lj_err_argt(L, narg, LUA_TNUMBER);
+ if (LJ_LIKELY(tvisint(o))) {
+ return intV(o);
+ } else {
+ int32_t i = lj_num2bit(numV(o));
+ if (LJ_DUALNUM) setintV(o, i);
+ return i;
+ }
+}
+#endif
+
+LJLIB_ASM(bit_tobit) LJLIB_REC(bit_tobit)
+{
+#if LJ_HASFFI
+ CTypeID id = 0;
+ setintV(L->base-1-LJ_FR2, (int32_t)lj_carith_check64(L, 1, &id));
+ return FFH_RES(1);
+#else
+ lj_lib_checknumber(L, 1);
+ return FFH_RETRY;
+#endif
+}
+
+LJLIB_ASM(bit_bnot) LJLIB_REC(bit_unary IR_BNOT)
+{
+#if LJ_HASFFI
+ CTypeID id = 0;
+ uint64_t x = lj_carith_check64(L, 1, &id);
+ return id ? bit_result64(L, id, ~x) : FFH_RETRY;
+#else
lj_lib_checknumber(L, 1);
return FFH_RETRY;
+#endif
+}
+
+LJLIB_ASM(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP)
+{
+#if LJ_HASFFI
+ CTypeID id = 0;
+ uint64_t x = lj_carith_check64(L, 1, &id);
+ return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY;
+#else
+ lj_lib_checknumber(L, 1);
+ return FFH_RETRY;
+#endif
}
-LJLIB_ASM_(bit_bnot) LJLIB_REC(bit_unary IR_BNOT)
-LJLIB_ASM_(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP)
LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL)
{
+#if LJ_HASFFI
+ CTypeID id = 0, id2 = 0;
+ uint64_t x = lj_carith_check64(L, 1, &id);
+ int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2);
+ if (id) {
+ x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift);
+ return bit_result64(L, id, x);
+ }
+ if (id2) setintV(L->base+1, sh);
+ return FFH_RETRY;
+#else
lj_lib_checknumber(L, 1);
- lj_lib_checkbit(L, 2);
+ bit_checkbit(L, 2);
return FFH_RETRY;
+#endif
}
LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR)
LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR)
@@ -40,25 +113,58 @@ LJLIB_ASM_(bit_ror) LJLIB_REC(bit_shift IR_BROR)
LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND)
{
+#if LJ_HASFFI
+ CTypeID id = 0;
+ TValue *o = L->base, *top = L->top;
+ int i = 0;
+ do { lj_carith_check64(L, ++i, &id); } while (++o < top);
+ if (id) {
+ CTState *cts = ctype_cts(L);
+ CType *ct = ctype_get(cts, id);
+ int op = curr_func(L)->c.ffid - (int)FF_bit_bor;
+ uint64_t x, y = op >= 0 ? 0 : ~(uint64_t)0;
+ o = L->base;
+ do {
+ lj_cconv_ct_tv(cts, ct, (uint8_t *)&x, o, 0);
+ if (op < 0) y &= x; else if (op == 0) y |= x; else y ^= x;
+ } while (++o < top);
+ return bit_result64(L, id, y);
+ }
+ return FFH_RETRY;
+#else
int i = 0;
do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top);
return FFH_RETRY;
+#endif
}
LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR)
LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR)
/* ------------------------------------------------------------------------ */
-LJLIB_CF(bit_tohex)
+LJLIB_CF(bit_tohex) LJLIB_REC(.)
{
- uint32_t b = (uint32_t)lj_lib_checkbit(L, 1);
- int32_t i, n = L->base+1 >= L->top ? 8 : lj_lib_checkbit(L, 2);
- const char *hexdigits = "0123456789abcdef";
- char buf[8];
- if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; }
- if (n > 8) n = 8;
- for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; }
- lua_pushlstring(L, buf, (size_t)n);
+#if LJ_HASFFI
+ CTypeID id = 0, id2 = 0;
+ uint64_t b = lj_carith_check64(L, 1, &id);
+ int32_t n = L->base+1>=L->top ? (id ? 16 : 8) :
+ (int32_t)lj_carith_check64(L, 2, &id2);
+#else
+ uint32_t b = (uint32_t)bit_checkbit(L, 1);
+ int32_t n = L->base+1>=L->top ? 8 : bit_checkbit(L, 2);
+#endif
+ SBuf *sb = lj_buf_tmp_(L);
+ SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
+ if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
+ sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
+#if LJ_HASFFI
+ if (n < 16) b &= ((uint64_t)1 << 4*n)-1;
+#else
+ if (n < 8) b &= (1u << 4*n)-1;
+#endif
+ sb = lj_strfmt_putfxint(sb, sf, b);
+ setstrV(L, L->top-1, lj_buf_str(L, sb));
+ lj_gc_check(L);
return 1;
}
diff --git a/src/lib_buffer.c b/src/lib_buffer.c
new file mode 100644
index 00000000..aad8e7eb
--- /dev/null
+++ b/src/lib_buffer.c
@@ -0,0 +1,360 @@
+/*
+** Buffer library.
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lib_buffer_c
+#define LUA_LIB
+
+#include "lua.h"
+#include "lauxlib.h"
+#include "lualib.h"
+
+#include "lj_obj.h"
+
+#if LJ_HASBUFFER
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_udata.h"
+#include "lj_meta.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#include "lj_cdata.h"
+#include "lj_cconv.h"
+#endif
+#include "lj_strfmt.h"
+#include "lj_serialize.h"
+#include "lj_lib.h"
+
+/* -- Helper functions ---------------------------------------------------- */
+
+/* Check that the first argument is a string buffer. */
+static SBufExt *buffer_tobuf(lua_State *L)
+{
+ if (!(L->base < L->top && tvisbuf(L->base)))
+ lj_err_argtype(L, 1, "buffer");
+ return bufV(L->base);
+}
+
+/* Ditto, but for writers. */
+static LJ_AINLINE SBufExt *buffer_tobufw(lua_State *L)
+{
+ SBufExt *sbx = buffer_tobuf(L);
+ setsbufXL_(sbx, L);
+ return sbx;
+}
+
+#define buffer_toudata(sbx) ((GCudata *)(sbx)-1)
+
+/* -- Buffer methods ------------------------------------------------------ */
+
+#define LJLIB_MODULE_buffer_method
+
+LJLIB_CF(buffer_method_free)
+{
+ SBufExt *sbx = buffer_tobuf(L);
+ lj_bufx_free(L, sbx);
+ L->top = L->base+1; /* Chain buffer object. */
+ return 1;
+}
+
+LJLIB_CF(buffer_method_reset) LJLIB_REC(.)
+{
+ SBufExt *sbx = buffer_tobuf(L);
+ lj_bufx_reset(sbx);
+ L->top = L->base+1; /* Chain buffer object. */
+ return 1;
+}
+
+LJLIB_CF(buffer_method_skip) LJLIB_REC(.)
+{
+ SBufExt *sbx = buffer_tobuf(L);
+ MSize n = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
+ MSize len = sbufxlen(sbx);
+ if (n < len) {
+ sbx->r += n;
+ } else if (sbufiscow(sbx)) {
+ sbx->r = sbx->w;
+ } else {
+ sbx->r = sbx->w = sbx->b;
+ }
+ L->top = L->base+1; /* Chain buffer object. */
+ return 1;
+}
+
+LJLIB_CF(buffer_method_set) LJLIB_REC(.)
+{
+ SBufExt *sbx = buffer_tobuf(L);
+ GCobj *ref;
+ const char *p;
+ MSize len;
+#if LJ_HASFFI
+ if (tviscdata(L->base+1)) {
+ CTState *cts = ctype_cts(L);
+ lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p,
+ L->base+1, CCF_ARG(2));
+ len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF);
+ } else
+#endif
+ {
+ GCstr *str = lj_lib_checkstrx(L, 2);
+ p = strdata(str);
+ len = str->len;
+ }
+ lj_bufx_free(L, sbx);
+ lj_bufx_set_cow(L, sbx, p, len);
+ ref = gcV(L->base+1);
+ setgcref(sbx->cowref, ref);
+ lj_gc_objbarrier(L, buffer_toudata(sbx), ref);
+ L->top = L->base+1; /* Chain buffer object. */
+ return 1;
+}
+
+LJLIB_CF(buffer_method_put) LJLIB_REC(.)
+{
+ SBufExt *sbx = buffer_tobufw(L);
+ ptrdiff_t arg, narg = L->top - L->base;
+ for (arg = 1; arg < narg; arg++) {
+ cTValue *o = &L->base[arg], *mo = NULL;
+ retry:
+ if (tvisstr(o)) {
+ lj_buf_putstr((SBuf *)sbx, strV(o));
+ } else if (tvisint(o)) {
+ lj_strfmt_putint((SBuf *)sbx, intV(o));
+ } else if (tvisnum(o)) {
+ lj_strfmt_putfnum((SBuf *)sbx, STRFMT_G14, numV(o));
+ } else if (tvisbuf(o)) {
+ SBufExt *sbx2 = bufV(o);
+ if (sbx2 == sbx) lj_err_arg(L, arg+1, LJ_ERR_BUFFER_SELF);
+ lj_buf_putmem((SBuf *)sbx, sbx2->r, sbufxlen(sbx2));
+ } else if (!mo && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
+ /* Call __tostring metamethod inline. */
+ copyTV(L, L->top++, mo);
+ copyTV(L, L->top++, o);
+ lua_call(L, 1, 1);
+ o = &L->base[arg]; /* The stack may have been reallocated. */
+ copyTV(L, &L->base[arg], L->top-1);
+ L->top = L->base + narg;
+ goto retry; /* Retry with the result. */
+ } else {
+ lj_err_argtype(L, arg+1, "string/number/__tostring");
+ }
+ /* Probably not useful to inline other __tostring MMs, e.g. FFI numbers. */
+ }
+ L->top = L->base+1; /* Chain buffer object. */
+ lj_gc_check(L);
+ return 1;
+}
+
+LJLIB_CF(buffer_method_putf) LJLIB_REC(.)
+{
+ SBufExt *sbx = buffer_tobufw(L);
+ lj_strfmt_putarg(L, (SBuf *)sbx, 2, 2);
+ L->top = L->base+1; /* Chain buffer object. */
+ lj_gc_check(L);
+ return 1;
+}
+
+LJLIB_CF(buffer_method_get) LJLIB_REC(.)
+{
+ SBufExt *sbx = buffer_tobuf(L);
+ ptrdiff_t arg, narg = L->top - L->base;
+ if (narg == 1) {
+ narg++;
+ setnilV(L->top++); /* get() is the same as get(nil). */
+ }
+ for (arg = 1; arg < narg; arg++) {
+ TValue *o = &L->base[arg];
+ MSize n = tvisnil(o) ? LJ_MAX_BUF :
+ (MSize) lj_lib_checkintrange(L, arg+1, 0, LJ_MAX_BUF);
+ MSize len = sbufxlen(sbx);
+ if (n > len) n = len;
+ setstrV(L, o, lj_str_new(L, sbx->r, n));
+ sbx->r += n;
+ }
+ if (sbx->r == sbx->w && !sbufiscow(sbx)) sbx->r = sbx->w = sbx->b;
+ lj_gc_check(L);
+ return narg-1;
+}
+
+#if LJ_HASFFI
+LJLIB_CF(buffer_method_putcdata) LJLIB_REC(.)
+{
+ SBufExt *sbx = buffer_tobufw(L);
+ const char *p;
+ MSize len;
+ if (tviscdata(L->base+1)) {
+ CTState *cts = ctype_cts(L);
+ lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p,
+ L->base+1, CCF_ARG(2));
+ } else {
+ lj_err_argtype(L, 2, "cdata");
+ }
+ len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF);
+ lj_buf_putmem((SBuf *)sbx, p, len);
+ L->top = L->base+1; /* Chain buffer object. */
+ return 1;
+}
+
+LJLIB_CF(buffer_method_reserve) LJLIB_REC(.)
+{
+ SBufExt *sbx = buffer_tobufw(L);
+ MSize sz = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
+ GCcdata *cd;
+ lj_buf_more((SBuf *)sbx, sz);
+ ctype_loadffi(L);
+ cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR);
+ *(void **)cdataptr(cd) = sbx->w;
+ setcdataV(L, L->top++, cd);
+ setintV(L->top++, sbufleft(sbx));
+ return 2;
+}
+
+LJLIB_CF(buffer_method_commit) LJLIB_REC(.)
+{
+ SBufExt *sbx = buffer_tobuf(L);
+ MSize len = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF);
+ if (len > sbufleft(sbx)) lj_err_arg(L, 2, LJ_ERR_NUMRNG);
+ sbx->w += len;
+ L->top = L->base+1; /* Chain buffer object. */
+ return 1;
+}
+
+LJLIB_CF(buffer_method_ref) LJLIB_REC(.)
+{
+ SBufExt *sbx = buffer_tobuf(L);
+ GCcdata *cd;
+ ctype_loadffi(L);
+ cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR);
+ *(void **)cdataptr(cd) = sbx->r;
+ setcdataV(L, L->top++, cd);
+ setintV(L->top++, sbufxlen(sbx));
+ return 2;
+}
+#endif
+
+LJLIB_CF(buffer_method_encode) LJLIB_REC(.)
+{
+ SBufExt *sbx = buffer_tobufw(L);
+ cTValue *o = lj_lib_checkany(L, 2);
+ lj_serialize_put(sbx, o);
+ lj_gc_check(L);
+ L->top = L->base+1; /* Chain buffer object. */
+ return 1;
+}
+
+LJLIB_CF(buffer_method_decode) LJLIB_REC(.)
+{
+ SBufExt *sbx = buffer_tobufw(L);
+ setnilV(L->top++);
+ sbx->r = lj_serialize_get(sbx, L->top-1);
+ lj_gc_check(L);
+ return 1;
+}
+
+LJLIB_CF(buffer_method___gc)
+{
+ SBufExt *sbx = buffer_tobuf(L);
+ lj_bufx_free(L, sbx);
+ return 0;
+}
+
+LJLIB_CF(buffer_method___tostring) LJLIB_REC(.)
+{
+ SBufExt *sbx = buffer_tobuf(L);
+ setstrV(L, L->top-1, lj_str_new(L, sbx->r, sbufxlen(sbx)));
+ lj_gc_check(L);
+ return 1;
+}
+
+LJLIB_CF(buffer_method___len) LJLIB_REC(.)
+{
+ SBufExt *sbx = buffer_tobuf(L);
+ setintV(L->top-1, (int32_t)sbufxlen(sbx));
+ return 1;
+}
+
+LJLIB_PUSH("buffer") LJLIB_SET(__metatable)
+LJLIB_PUSH(top-1) LJLIB_SET(__index)
+
+/* -- Buffer library functions -------------------------------------------- */
+
+#define LJLIB_MODULE_buffer
+
+LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */
+
+LJLIB_CF(buffer_new)
+{
+ MSize sz = 0;
+ int targ = 1;
+ GCtab *env, *dict_str = NULL, *dict_mt = NULL;
+ GCudata *ud;
+ SBufExt *sbx;
+ if (L->base < L->top && !tvistab(L->base)) {
+ targ = 2;
+ if (!tvisnil(L->base))
+ sz = (MSize)lj_lib_checkintrange(L, 1, 0, LJ_MAX_BUF);
+ }
+ if (L->base+targ-1 < L->top) {
+ GCtab *options = lj_lib_checktab(L, targ);
+ cTValue *opt_dict, *opt_mt;
+ opt_dict = lj_tab_getstr(options, lj_str_newlit(L, "dict"));
+ if (opt_dict && tvistab(opt_dict)) {
+ dict_str = tabV(opt_dict);
+ lj_serialize_dict_prep_str(L, dict_str);
+ }
+ opt_mt = lj_tab_getstr(options, lj_str_newlit(L, "metatable"));
+ if (opt_mt && tvistab(opt_mt)) {
+ dict_mt = tabV(opt_mt);
+ lj_serialize_dict_prep_mt(L, dict_mt);
+ }
+ }
+ env = tabref(curr_func(L)->c.env);
+ ud = lj_udata_new(L, sizeof(SBufExt), env);
+ ud->udtype = UDTYPE_BUFFER;
+ /* NOBARRIER: The GCudata is new (marked white). */
+ setgcref(ud->metatable, obj2gco(env));
+ setudataV(L, L->top++, ud);
+ sbx = (SBufExt *)uddata(ud);
+ lj_bufx_init(L, sbx);
+ setgcref(sbx->dict_str, obj2gco(dict_str));
+ setgcref(sbx->dict_mt, obj2gco(dict_mt));
+ if (sz > 0) lj_buf_need2((SBuf *)sbx, sz);
+ lj_gc_check(L);
+ return 1;
+}
+
+LJLIB_CF(buffer_encode) LJLIB_REC(.)
+{
+ cTValue *o = lj_lib_checkany(L, 1);
+ setstrV(L, L->top++, lj_serialize_encode(L, o));
+ lj_gc_check(L);
+ return 1;
+}
+
+LJLIB_CF(buffer_decode) LJLIB_REC(.)
+{
+ GCstr *str = lj_lib_checkstrx(L, 1);
+ setnilV(L->top++);
+ lj_serialize_decode(L, L->top-1, str);
+ lj_gc_check(L);
+ return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#include "lj_libdef.h"
+
+int luaopen_string_buffer(lua_State *L)
+{
+ LJ_LIB_REG(L, NULL, buffer_method);
+ lua_getfield(L, -1, "__tostring");
+ lua_setfield(L, -2, "tostring");
+ LJ_LIB_REG(L, NULL, buffer);
+ return 1;
+}
+
+#endif
diff --git a/src/lib_debug.c b/src/lib_debug.c
index e7d8d24a..3af7a353 100644
--- a/src/lib_debug.c
+++ b/src/lib_debug.c
@@ -29,7 +29,7 @@ LJLIB_CF(debug_getregistry)
return 1;
}
-LJLIB_CF(debug_getmetatable)
+LJLIB_CF(debug_getmetatable) LJLIB_REC(.)
{
lj_lib_checkany(L, 1);
if (!lua_getmetatable(L, 1)) {
@@ -231,8 +231,8 @@ LJLIB_CF(debug_upvalueid)
int32_t n = lj_lib_checkint(L, 2) - 1;
if ((uint32_t)n >= fn->l.nupvalues)
lj_err_arg(L, 2, LJ_ERR_IDXRNG);
- setlightudV(L->top-1, isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) :
- (void *)&fn->c.upvalue[n]);
+ lua_pushlightuserdata(L, isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) :
+ (void *)&fn->c.upvalue[n]);
return 1;
}
@@ -283,13 +283,13 @@ LJLIB_CF(debug_setuservalue)
/* ------------------------------------------------------------------------ */
-static const char KEY_HOOK = 'h';
+#define KEY_HOOK (U64x(80000000,00000000)|'h')
static void hookf(lua_State *L, lua_Debug *ar)
{
static const char *const hooknames[] =
{"call", "return", "line", "count", "tail return"};
- lua_pushlightuserdata(L, (void *)&KEY_HOOK);
+ (L->top++)->u64 = KEY_HOOK;
lua_rawget(L, LUA_REGISTRYINDEX);
if (lua_isfunction(L, -1)) {
lua_pushstring(L, hooknames[(int)ar->event]);
@@ -334,7 +334,7 @@ LJLIB_CF(debug_sethook)
count = luaL_optint(L, arg+3, 0);
func = hookf; mask = makemask(smask, count);
}
- lua_pushlightuserdata(L, (void *)&KEY_HOOK);
+ (L->top++)->u64 = KEY_HOOK;
lua_pushvalue(L, arg+1);
lua_rawset(L, LUA_REGISTRYINDEX);
lua_sethook(L, func, mask, count);
@@ -349,7 +349,7 @@ LJLIB_CF(debug_gethook)
if (hook != NULL && hook != hookf) { /* external hook? */
lua_pushliteral(L, "external hook");
} else {
- lua_pushlightuserdata(L, (void *)&KEY_HOOK);
+ (L->top++)->u64 = KEY_HOOK;
lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */
}
lua_pushstring(L, unmakemask(mask, buff));
diff --git a/src/lib_ffi.c b/src/lib_ffi.c
index 654e71a2..2295cf15 100644
--- a/src/lib_ffi.c
+++ b/src/lib_ffi.c
@@ -29,6 +29,7 @@
#include "lj_ccall.h"
#include "lj_ccallback.h"
#include "lj_clib.h"
+#include "lj_strfmt.h"
#include "lj_ff.h"
#include "lj_lib.h"
@@ -137,7 +138,7 @@ static int ffi_index_meta(lua_State *L, CTState *cts, CType *ct, MMS mm)
}
}
copyTV(L, base, L->top);
- tv = L->top-1;
+ tv = L->top-1-LJ_FR2;
}
return lj_meta_tailcall(L, tv);
}
@@ -318,7 +319,7 @@ LJLIB_CF(ffi_meta___tostring)
}
}
}
- lj_str_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p);
+ lj_strfmt_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p);
checkgc:
lj_gc_check(L);
return 1;
@@ -504,10 +505,7 @@ LJLIB_CF(ffi_new) LJLIB_REC(.)
}
if (sz == CTSIZE_INVALID)
lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE);
- if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
- cd = lj_cdata_new(cts, id, sz);
- else
- cd = lj_cdata_newv(cts, id, sz, ctype_align(info));
+ cd = lj_cdata_newx(cts, id, sz, info);
setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */
lj_cconv_ct_init(cts, ct, sz, cdataptr(cd),
o, (MSize)(L->top - o)); /* Initialize cdata. */
@@ -558,6 +556,32 @@ LJLIB_CF(ffi_typeof) LJLIB_REC(.)
return 1;
}
+/* Internal and unsupported API. */
+LJLIB_CF(ffi_typeinfo)
+{
+ CTState *cts = ctype_cts(L);
+ CTypeID id = (CTypeID)ffi_checkint(L, 1);
+ if (id > 0 && id < cts->top) {
+ CType *ct = ctype_get(cts, id);
+ GCtab *t;
+ lua_createtable(L, 0, 4); /* Increment hash size if fields are added. */
+ t = tabV(L->top-1);
+ setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "info")), (int32_t)ct->info);
+ if (ct->size != CTSIZE_INVALID)
+ setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "size")), (int32_t)ct->size);
+ if (ct->sib)
+ setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "sib")), (int32_t)ct->sib);
+ if (gcref(ct->name)) {
+ GCstr *s = gco2str(gcref(ct->name));
+ if (isdead(G(L), obj2gco(s))) flipwhite(obj2gco(s));
+ setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "name")), s);
+ }
+ lj_gc_check(L);
+ return 1;
+ }
+ return 0;
+}
+
LJLIB_CF(ffi_istype) LJLIB_REC(.)
{
CTState *cts = ctype_cts(L);
@@ -697,44 +721,47 @@ LJLIB_CF(ffi_fill) LJLIB_REC(.)
return 0;
}
-#define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be)
-
/* Test ABI string. */
LJLIB_CF(ffi_abi) LJLIB_REC(.)
{
GCstr *s = lj_lib_checkstr(L, 1);
- int b = 0;
- switch (s->hash) {
+ int b = lj_cparse_case(s,
#if LJ_64
- case H_(849858eb,ad35fd06): b = 1; break; /* 64bit */
+ "\00564bit"
#else
- case H_(662d3c79,d0e22477): b = 1; break; /* 32bit */
+ "\00532bit"
#endif
#if LJ_ARCH_HASFPU
- case H_(e33ee463,e33ee463): b = 1; break; /* fpu */
+ "\003fpu"
#endif
#if LJ_ABI_SOFTFP
- case H_(61211a23,c2e8c81c): b = 1; break; /* softfp */
+ "\006softfp"
#else
- case H_(539417a8,8ce0812f): b = 1; break; /* hardfp */
+ "\006hardfp"
#endif
#if LJ_ABI_EABI
- case H_(2182df8f,f2ed1152): b = 1; break; /* eabi */
+ "\004eabi"
#endif
#if LJ_ABI_WIN
- case H_(4ab624a8,4ab624a8): b = 1; break; /* win */
+ "\003win"
#endif
- case H_(3af93066,1f001464): b = 1; break; /* le/be */
- default:
- break;
- }
+#if LJ_TARGET_UWP
+ "\003uwp"
+#endif
+#if LJ_LE
+ "\002le"
+#else
+ "\002be"
+#endif
+#if LJ_GC64
+ "\004gc64"
+#endif
+ ) >= 0;
setboolV(L->top-1, b);
setboolV(&G(L)->tmptv2, b); /* Remember for trace recorder. */
return 1;
}
-#undef H_
-
LJLIB_PUSH(top-8) LJLIB_SET(!) /* Store reference to miscmap table. */
LJLIB_CF(ffi_metatype)
@@ -768,19 +795,11 @@ LJLIB_CF(ffi_gc) LJLIB_REC(.)
GCcdata *cd = ffi_checkcdata(L, 1);
TValue *fin = lj_lib_checkany(L, 2);
CTState *cts = ctype_cts(L);
- GCtab *t = cts->finalizer;
CType *ct = ctype_raw(cts, cd->ctypeid);
if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) ||
ctype_isrefarray(ct->info)))
lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE);
- if (gcref(t->metatable)) { /* Update finalizer table, if still enabled. */
- copyTV(L, lj_tab_set(L, t, L->base), fin);
- lj_gc_anybarriert(L, t);
- if (!tvisnil(fin))
- cd->marked |= LJ_GC_CDATA_FIN;
- else
- cd->marked &= ~LJ_GC_CDATA_FIN;
- }
+ lj_cdata_setfin(L, cd, gcval(fin), itype(fin));
L->top = L->base+1; /* Pass through the cdata object. */
return 1;
}
diff --git a/src/lib_io.c b/src/lib_io.c
index d5786e5d..c22faa24 100644
--- a/src/lib_io.c
+++ b/src/lib_io.c
@@ -19,8 +19,10 @@
#include "lj_obj.h"
#include "lj_gc.h"
#include "lj_err.h"
+#include "lj_buf.h"
#include "lj_str.h"
#include "lj_state.h"
+#include "lj_strfmt.h"
#include "lj_ff.h"
#include "lj_lib.h"
@@ -84,7 +86,7 @@ static IOFileUD *io_file_open(lua_State *L, const char *mode)
IOFileUD *iof = io_file_new(L);
iof->fp = fopen(fname, mode);
if (iof->fp == NULL)
- luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno)));
+ luaL_argerror(L, 1, lj_strfmt_pushf(L, "%s: %s", fname, strerror(errno)));
return iof;
}
@@ -97,11 +99,8 @@ static int io_file_close(lua_State *L, IOFileUD *iof)
int stat = -1;
#if LJ_TARGET_POSIX
stat = pclose(iof->fp);
-#elif LJ_TARGET_WINDOWS
+#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP
stat = _pclose(iof->fp);
-#else
- lua_assert(0);
- return 0;
#endif
#if LJ_52
iof->fp = NULL;
@@ -110,7 +109,8 @@ static int io_file_close(lua_State *L, IOFileUD *iof)
ok = (stat != -1);
#endif
} else {
- lua_assert((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF);
+ lj_assertL((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF,
+ "close of unknown FILE* type");
setnilV(L->top++);
lua_pushliteral(L, "cannot close standard file");
return 2;
@@ -145,7 +145,7 @@ static int io_file_readline(lua_State *L, FILE *fp, MSize chop)
MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0;
char *buf;
for (;;) {
- buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+ buf = lj_buf_tmp(L, m);
if (fgets(buf+n, m-n, fp) == NULL) break;
n += (MSize)strlen(buf+n);
ok |= n;
@@ -161,7 +161,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
{
MSize m, n;
for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) {
- char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+ char *buf = lj_buf_tmp(L, m);
n += (MSize)fread(buf+n, 1, m-n, fp);
if (n != m) {
setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
@@ -174,7 +174,7 @@ static void io_file_readall(lua_State *L, FILE *fp)
static int io_file_readlen(lua_State *L, FILE *fp, MSize m)
{
if (m) {
- char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m);
+ char *buf = lj_buf_tmp(L, m);
MSize n = (MSize)fread(buf, 1, m, fp);
setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n));
lj_gc_check(L);
@@ -202,13 +202,12 @@ static int io_file_read(lua_State *L, IOFileUD *iof, int start)
for (n = start; nargs-- && ok; n++) {
if (tvisstr(L->base+n)) {
const char *p = strVdata(L->base+n);
- if (p[0] != '*')
- lj_err_arg(L, n+1, LJ_ERR_INVOPT);
- if (p[1] == 'n')
+ if (p[0] == '*') p++;
+ if (p[0] == 'n')
ok = io_file_readnum(L, fp);
- else if ((p[1] & ~0x20) == 'L')
- ok = io_file_readline(L, fp, (p[1] == 'l'));
- else if (p[1] == 'a')
+ else if ((p[0] & ~0x20) == 'L')
+ ok = io_file_readline(L, fp, (p[0] == 'l'));
+ else if (p[0] == 'a')
io_file_readall(L, fp);
else
lj_err_arg(L, n+1, LJ_ERR_INVFMT);
@@ -232,19 +231,11 @@ static int io_file_write(lua_State *L, IOFileUD *iof, int start)
cTValue *tv;
int status = 1;
for (tv = L->base+start; tv < L->top; tv++) {
- if (tvisstr(tv)) {
- MSize len = strV(tv)->len;
- status = status && (fwrite(strVdata(tv), 1, len, fp) == len);
- } else if (tvisint(tv)) {
- char buf[LJ_STR_INTBUF];
- char *p = lj_str_bufint(buf, intV(tv));
- size_t len = (size_t)(buf+LJ_STR_INTBUF-p);
- status = status && (fwrite(p, 1, len, fp) == len);
- } else if (tvisnum(tv)) {
- status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0);
- } else {
+ MSize len;
+ const char *p = lj_strfmt_wstrnum(L, tv, &len);
+ if (!p)
lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING);
- }
+ status = status && (fwrite(p, 1, len, fp) == len);
}
if (LJ_52 && status) {
L->top = L->base+1;
@@ -319,6 +310,14 @@ LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0)
return luaL_fileresult(L, fflush(io_tofile(L)->fp) == 0, NULL);
}
+#if LJ_32 && defined(__ANDROID__) && __ANDROID_API__ < 24
+/* The Android NDK is such an unmatched marvel of engineering. */
+extern int fseeko32(FILE *, long int, int) __asm__("fseeko");
+extern long int ftello32(FILE *) __asm__("ftello");
+#define fseeko(fp, pos, whence) (fseeko32((fp), (pos), (whence)))
+#define ftello(fp) (ftello32((fp)))
+#endif
+
LJLIB_CF(io_method_seek)
{
FILE *fp = io_tofile(L)->fp;
@@ -419,7 +418,7 @@ LJLIB_CF(io_open)
LJLIB_CF(io_popen)
{
-#if LJ_TARGET_POSIX || LJ_TARGET_WINDOWS
+#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP)
const char *fname = strdata(lj_lib_checkstr(L, 1));
GCstr *s = lj_lib_optstr(L, 2);
const char *mode = s ? strdata(s) : "r";
@@ -440,7 +439,7 @@ LJLIB_CF(io_popen)
LJLIB_CF(io_tmpfile)
{
IOFileUD *iof = io_file_new(L);
-#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA
+#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA || LJ_TARGET_NX
iof->fp = NULL; errno = ENOSYS;
#else
iof->fp = tmpfile();
diff --git a/src/lib_jit.c b/src/lib_jit.c
index 83ee0984..2867d420 100644
--- a/src/lib_jit.c
+++ b/src/lib_jit.c
@@ -10,13 +10,17 @@
#include "lauxlib.h"
#include "lualib.h"
-#include "lj_arch.h"
#include "lj_obj.h"
+#include "lj_gc.h"
#include "lj_err.h"
#include "lj_debug.h"
#include "lj_str.h"
#include "lj_tab.h"
+#include "lj_state.h"
#include "lj_bc.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#endif
#if LJ_HASJIT
#include "lj_ir.h"
#include "lj_jit.h"
@@ -24,6 +28,7 @@
#include "lj_iropt.h"
#include "lj_target.h"
#endif
+#include "lj_trace.h"
#include "lj_dispatch.h"
#include "lj_vm.h"
#include "lj_vmevent.h"
@@ -99,8 +104,8 @@ LJLIB_CF(jit_status)
jit_State *J = L2J(L);
L->top = L->base;
setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0);
- flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING);
- flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING);
+ flagbits_to_strings(L, J->flags, JIT_F_CPU, JIT_F_CPUSTRING);
+ flagbits_to_strings(L, J->flags, JIT_F_OPT, JIT_F_OPTSTRING);
return (int)(L->top - L->base);
#else
setboolV(L->top++, 0);
@@ -108,6 +113,13 @@ LJLIB_CF(jit_status)
#endif
}
+LJLIB_CF(jit_security)
+{
+ int idx = lj_lib_checkopt(L, 1, -1, LJ_SECURITY_MODESTRING);
+ setintV(L->top++, ((LJ_SECURITY_MODE >> (2*idx)) & 3));
+ return 1;
+}
+
LJLIB_CF(jit_attach)
{
#ifdef LUAJIT_DISABLE_VMEVENT
@@ -222,7 +234,7 @@ LJLIB_CF(jit_util_funcbc)
if (pc < pt->sizebc) {
BCIns ins = proto_bc(pt)[pc];
BCOp op = bc_op(ins);
- lua_assert(op < BC__MAX);
+ lj_assertL(op < BC__MAX, "bad bytecode op %d", op);
setintV(L->top, ins);
setintV(L->top+1, lj_bc_mode[op]);
L->top += 2;
@@ -280,7 +292,7 @@ static GCtrace *jit_checktrace(lua_State *L)
/* Names of link types. ORDER LJ_TRLINK */
static const char *const jit_trlinkname[] = {
"none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion",
- "interpreter", "return"
+ "interpreter", "return", "stitch"
};
/* local info = jit.util.traceinfo(tr) */
@@ -333,6 +345,9 @@ LJLIB_CF(jit_util_tracek)
slot = ir->op2;
ir = &T->ir[ir->op1];
}
+#if LJ_HASFFI
+ if (ir->o == IR_KINT64) ctype_loadffi(L);
+#endif
lj_ir_kvalue(L, L->top-2, ir);
setintV(L->top-1, (int32_t)irt_type(ir->t));
if (slot == -1)
@@ -417,6 +432,12 @@ LJLIB_CF(jit_util_ircalladdr)
#include "lj_libdef.h"
+static int luaopen_jit_util(lua_State *L)
+{
+ LJ_LIB_REG(L, NULL, jit_util);
+ return 1;
+}
+
/* -- jit.opt module ------------------------------------------------------ */
#if LJ_HASJIT
@@ -453,7 +474,7 @@ static int jitopt_flag(jit_State *J, const char *str)
str += str[2] == '-' ? 3 : 2;
set = 0;
}
- for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) {
+ for (opt = JIT_F_OPT; ; opt <<= 1) {
size_t len = *(const uint8_t *)lst;
if (len == 0)
break;
@@ -473,7 +494,7 @@ static int jitopt_param(jit_State *J, const char *str)
int i;
for (i = 0; i < JIT_P__MAX; i++) {
size_t len = *(const uint8_t *)lst;
- lua_assert(len != 0);
+ lj_assertJ(len != 0, "bad JIT_P_STRING");
if (strncmp(str, lst+1, len) == 0 && str[len] == '=') {
int32_t n = 0;
const char *p = &str[len+1];
@@ -514,6 +535,104 @@ LJLIB_CF(jit_opt_start)
#endif
+/* -- jit.profile module -------------------------------------------------- */
+
+#if LJ_HASPROFILE
+
+#define LJLIB_MODULE_jit_profile
+
+/* Not loaded by default, use: local profile = require("jit.profile") */
+
+#define KEY_PROFILE_THREAD (U64x(80000000,00000000)|'t')
+#define KEY_PROFILE_FUNC (U64x(80000000,00000000)|'f')
+
+static void jit_profile_callback(lua_State *L2, lua_State *L, int samples,
+ int vmstate)
+{
+ TValue key;
+ cTValue *tv;
+ key.u64 = KEY_PROFILE_FUNC;
+ tv = lj_tab_get(L, tabV(registry(L)), &key);
+ if (tvisfunc(tv)) {
+ char vmst = (char)vmstate;
+ int status;
+ setfuncV(L2, L2->top++, funcV(tv));
+ setthreadV(L2, L2->top++, L);
+ setintV(L2->top++, samples);
+ setstrV(L2, L2->top++, lj_str_new(L2, &vmst, 1));
+ status = lua_pcall(L2, 3, 0, 0); /* callback(thread, samples, vmstate) */
+ if (status) {
+ if (G(L2)->panic) G(L2)->panic(L2);
+ exit(EXIT_FAILURE);
+ }
+ lj_trace_abort(G(L2));
+ }
+}
+
+/* profile.start(mode, cb) */
+LJLIB_CF(jit_profile_start)
+{
+ GCtab *registry = tabV(registry(L));
+ GCstr *mode = lj_lib_optstr(L, 1);
+ GCfunc *func = lj_lib_checkfunc(L, 2);
+ lua_State *L2 = lua_newthread(L); /* Thread that runs profiler callback. */
+ TValue key;
+ /* Anchor thread and function in registry. */
+ key.u64 = KEY_PROFILE_THREAD;
+ setthreadV(L, lj_tab_set(L, registry, &key), L2);
+ key.u64 = KEY_PROFILE_FUNC;
+ setfuncV(L, lj_tab_set(L, registry, &key), func);
+ lj_gc_anybarriert(L, registry);
+ luaJIT_profile_start(L, mode ? strdata(mode) : "",
+ (luaJIT_profile_callback)jit_profile_callback, L2);
+ return 0;
+}
+
+/* profile.stop() */
+LJLIB_CF(jit_profile_stop)
+{
+ GCtab *registry;
+ TValue key;
+ luaJIT_profile_stop(L);
+ registry = tabV(registry(L));
+ key.u64 = KEY_PROFILE_THREAD;
+ setnilV(lj_tab_set(L, registry, &key));
+ key.u64 = KEY_PROFILE_FUNC;
+ setnilV(lj_tab_set(L, registry, &key));
+ lj_gc_anybarriert(L, registry);
+ return 0;
+}
+
+/* dump = profile.dumpstack([thread,] fmt, depth) */
+LJLIB_CF(jit_profile_dumpstack)
+{
+ lua_State *L2 = L;
+ int arg = 0;
+ size_t len;
+ int depth;
+ GCstr *fmt;
+ const char *p;
+ if (L->top > L->base && tvisthread(L->base)) {
+ L2 = threadV(L->base);
+ arg = 1;
+ }
+ fmt = lj_lib_checkstr(L, arg+1);
+ depth = lj_lib_checkint(L, arg+2);
+ p = luaJIT_profile_dumpstack(L2, strdata(fmt), depth, &len);
+ lua_pushlstring(L, p, len);
+ return 1;
+}
+
+#include "lj_libdef.h"
+
+static int luaopen_jit_profile(lua_State *L)
+{
+ LJ_LIB_REG(L, NULL, jit_profile);
+ return 1;
+}
+
+#endif
+
/* -- JIT compiler initialization ----------------------------------------- */
#if LJ_HASJIT
@@ -524,66 +643,41 @@ JIT_PARAMDEF(JIT_PARAMINIT)
#undef JIT_PARAMINIT
0
};
-#endif
#if LJ_TARGET_ARM && LJ_TARGET_LINUX
#include <sys/utsname.h>
#endif
-/* Arch-dependent CPU detection. */
-static uint32_t jit_cpudetect(lua_State *L)
+/* Arch-dependent CPU feature detection. */
+static uint32_t jit_cpudetect(void)
{
uint32_t flags = 0;
#if LJ_TARGET_X86ORX64
+
uint32_t vendor[4];
uint32_t features[4];
if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
-#if !LJ_HASJIT
-#define JIT_F_CMOV 1
-#define JIT_F_SSE2 2
-#endif
- flags |= ((features[3] >> 15)&1) * JIT_F_CMOV;
- flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
-#if LJ_HASJIT
flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
- if (vendor[2] == 0x6c65746e) { /* Intel. */
- if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */
- flags |= JIT_F_P4; /* Currently unused. */
- else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
- flags |= JIT_F_LEA_AGU;
- } else if (vendor[2] == 0x444d4163) { /* AMD. */
- uint32_t fam = (features[0] & 0x0ff00f00);
- if (fam == 0x00000f00) /* K8. */
- flags |= JIT_F_SPLIT_XMM;
- if (fam >= 0x00000f00) /* K8, K10. */
- flags |= JIT_F_PREFER_IMUL;
+ if (vendor[0] >= 7) {
+ uint32_t xfeatures[4];
+ lj_vm_cpuid(7, xfeatures);
+ flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
}
-#endif
}
- /* Check for required instruction set support on x86 (unnecessary on x64). */
-#if LJ_TARGET_X86
-#if !defined(LUAJIT_CPU_NOCMOV)
- if (!(flags & JIT_F_CMOV))
- luaL_error(L, "CPU not supported");
-#endif
-#if defined(LUAJIT_CPU_SSE2)
- if (!(flags & JIT_F_SSE2))
- luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)");
-#endif
-#endif
+ /* Don't bother checking for SSE2 -- the VM will crash before getting here. */
+
#elif LJ_TARGET_ARM
-#if LJ_HASJIT
+
int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */
#if LJ_TARGET_LINUX
if (ver < 70) { /* Runtime ARM CPU detection. */
struct utsname ut;
uname(&ut);
if (strncmp(ut.machine, "armv", 4) == 0) {
- if (ut.machine[4] >= '7')
- ver = 70;
- else if (ut.machine[4] == '6')
- ver = 60;
+ if (ut.machine[4] >= '8') ver = 80;
+ else if (ut.machine[4] == '7') ver = 70;
+ else if (ut.machine[4] == '6') ver = 60;
}
}
#endif
@@ -591,74 +685,77 @@ static uint32_t jit_cpudetect(lua_State *L)
ver >= 61 ? JIT_F_ARMV6T2_ :
ver >= 60 ? JIT_F_ARMV6_ : 0;
flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
-#endif
+
+#elif LJ_TARGET_ARM64
+
+ /* No optional CPU features to detect (for now). */
+
#elif LJ_TARGET_PPC
-#if LJ_HASJIT
+
#if LJ_ARCH_SQRT
flags |= JIT_F_SQRT;
#endif
#if LJ_ARCH_ROUND
flags |= JIT_F_ROUND;
#endif
-#endif
-#elif LJ_TARGET_PPCSPE
- /* Nothing to do. */
+
#elif LJ_TARGET_MIPS
-#if LJ_HASJIT
+
/* Compile-time MIPS CPU detection. */
#if LJ_ARCH_VERSION >= 20
- flags |= JIT_F_MIPS32R2;
+ flags |= JIT_F_MIPSXXR2;
#endif
/* Runtime MIPS CPU detection. */
#if defined(__GNUC__)
- if (!(flags & JIT_F_MIPS32R2)) {
+ if (!(flags & JIT_F_MIPSXXR2)) {
int x;
+#ifdef __mips16
+ x = 0; /* Runtime detection is difficult. Ensure optimal -march flags. */
+#else
/* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */
__asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2");
- if (x) flags |= JIT_F_MIPS32R2; /* Either 0x80000000 (R2) or 0 (R1). */
- }
#endif
+ if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */
+ }
#endif
+
#else
#error "Missing CPU detection for this architecture"
#endif
- UNUSED(L);
return flags;
}
/* Initialize JIT compiler. */
static void jit_init(lua_State *L)
{
- uint32_t flags = jit_cpudetect(L);
-#if LJ_HASJIT
jit_State *J = L2J(L);
-#if LJ_TARGET_X86
- /* Silently turn off the JIT compiler on CPUs without SSE2. */
- if ((flags & JIT_F_SSE2))
-#endif
- J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
+ J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT;
memcpy(J->param, jit_param_default, sizeof(J->param));
lj_dispatch_update(G(L));
-#else
- UNUSED(flags);
-#endif
}
+#endif
LUALIB_API int luaopen_jit(lua_State *L)
{
+#if LJ_HASJIT
+ jit_init(L);
+#endif
lua_pushliteral(L, LJ_OS_NAME);
lua_pushliteral(L, LJ_ARCH_NAME);
lua_pushinteger(L, LUAJIT_VERSION_NUM);
lua_pushliteral(L, LUAJIT_VERSION);
LJ_LIB_REG(L, LUA_JITLIBNAME, jit);
+#if LJ_HASPROFILE
+ lj_lib_prereg(L, LUA_JITLIBNAME ".profile", luaopen_jit_profile,
+ tabref(L->env));
+#endif
#ifndef LUAJIT_DISABLE_JITUTIL
- LJ_LIB_REG(L, "jit.util", jit_util);
+ lj_lib_prereg(L, LUA_JITLIBNAME ".util", luaopen_jit_util, tabref(L->env));
#endif
#if LJ_HASJIT
LJ_LIB_REG(L, "jit.opt", jit_opt);
#endif
L->top -= 2;
- jit_init(L);
return 1;
}
diff --git a/src/lib_math.c b/src/lib_math.c
index 56fb091b..b677bbcd 100644
--- a/src/lib_math.c
+++ b/src/lib_math.c
@@ -15,6 +15,7 @@
#include "lj_obj.h"
#include "lj_lib.h"
#include "lj_vm.h"
+#include "lj_prng.h"
/* ------------------------------------------------------------------------ */
@@ -33,25 +34,19 @@ LJLIB_ASM(math_sqrt) LJLIB_REC(math_unary IRFPM_SQRT)
lj_lib_checknum(L, 1);
return FFH_RETRY;
}
-LJLIB_ASM_(math_log10) LJLIB_REC(math_unary IRFPM_LOG10)
-LJLIB_ASM_(math_exp) LJLIB_REC(math_unary IRFPM_EXP)
-LJLIB_ASM_(math_sin) LJLIB_REC(math_unary IRFPM_SIN)
-LJLIB_ASM_(math_cos) LJLIB_REC(math_unary IRFPM_COS)
-LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN)
-LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin)
-LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos)
-LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan)
-LJLIB_ASM_(math_sinh) LJLIB_REC(math_htrig IRCALL_sinh)
-LJLIB_ASM_(math_cosh) LJLIB_REC(math_htrig IRCALL_cosh)
-LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh)
+LJLIB_ASM_(math_log10) LJLIB_REC(math_call IRCALL_log10)
+LJLIB_ASM_(math_exp) LJLIB_REC(math_call IRCALL_exp)
+LJLIB_ASM_(math_sin) LJLIB_REC(math_call IRCALL_sin)
+LJLIB_ASM_(math_cos) LJLIB_REC(math_call IRCALL_cos)
+LJLIB_ASM_(math_tan) LJLIB_REC(math_call IRCALL_tan)
+LJLIB_ASM_(math_asin) LJLIB_REC(math_call IRCALL_asin)
+LJLIB_ASM_(math_acos) LJLIB_REC(math_call IRCALL_acos)
+LJLIB_ASM_(math_atan) LJLIB_REC(math_call IRCALL_atan)
+LJLIB_ASM_(math_sinh) LJLIB_REC(math_call IRCALL_sinh)
+LJLIB_ASM_(math_cosh) LJLIB_REC(math_call IRCALL_cosh)
+LJLIB_ASM_(math_tanh) LJLIB_REC(math_call IRCALL_tanh)
LJLIB_ASM_(math_frexp)
-LJLIB_ASM_(math_modf) LJLIB_REC(.)
-
-LJLIB_PUSH(57.29577951308232)
-LJLIB_ASM_(math_deg) LJLIB_REC(math_degrad)
-
-LJLIB_PUSH(0.017453292519943295)
-LJLIB_ASM_(math_rad) LJLIB_REC(math_degrad)
+LJLIB_ASM_(math_modf)
LJLIB_ASM(math_log) LJLIB_REC(math_log)
{
@@ -63,12 +58,15 @@ LJLIB_ASM(math_log) LJLIB_REC(math_log)
#else
x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y);
#endif
- setnumV(L->base-1, x*y); /* Do NOT join the expression to x / y. */
+ setnumV(L->base-1-LJ_FR2, x*y); /* Do NOT join the expression to x / y. */
return FFH_RES(1);
}
return FFH_RETRY;
}
+LJLIB_LUA(math_deg) /* function(x) return x * 57.29577951308232 end */
+LJLIB_LUA(math_rad) /* function(x) return x * 0.017453292519943295 end */
+
LJLIB_ASM(math_atan2) LJLIB_REC(.)
{
lj_lib_checknum(L, 1);
@@ -108,34 +106,11 @@ LJLIB_PUSH(1e310) LJLIB_SET(huge)
** Full-period ME-CF generator with L=64, J=4, k=223, N1=49.
*/
-/* PRNG state. */
-struct RandomState {
- uint64_t gen[4]; /* State of the 4 LFSR generators. */
- int valid; /* State is valid. */
-};
-
/* Union needed for bit-pattern conversion between uint64_t and double. */
typedef union { uint64_t u64; double d; } U64double;
-/* Update generator i and compute a running xor of all states. */
-#define TW223_GEN(i, k, q, s) \
- z = rs->gen[i]; \
- z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \
- r ^= z; rs->gen[i] = z;
-
-/* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */
-LJ_NOINLINE uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs)
-{
- uint64_t z, r = 0;
- TW223_GEN(0, 63, 31, 18)
- TW223_GEN(1, 58, 19, 28)
- TW223_GEN(2, 55, 24, 7)
- TW223_GEN(3, 47, 21, 8)
- return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000);
-}
-
-/* PRNG initialization function. */
-static void random_init(RandomState *rs, double d)
+/* PRNG seeding function. */
+static void random_seed(PRNGState *rs, double d)
{
uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */
int i;
@@ -144,24 +119,22 @@ static void random_init(RandomState *rs, double d)
uint32_t m = 1u << (r&255);
r >>= 8;
u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354;
- if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */
- rs->gen[i] = u.u64;
+ if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of u[i] are non-zero. */
+ rs->u[i] = u.u64;
}
- rs->valid = 1;
for (i = 0; i < 10; i++)
- lj_math_random_step(rs);
+ (void)lj_prng_u64(rs);
}
/* PRNG extract function. */
-LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */
+LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */
LJLIB_CF(math_random) LJLIB_REC(.)
{
int n = (int)(L->top - L->base);
- RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
+ PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
U64double u;
double d;
- if (LJ_UNLIKELY(!rs->valid)) random_init(rs, 0.0);
- u.u64 = lj_math_random_step(rs);
+ u.u64 = lj_prng_u64d(rs);
d = u.d - 1.0;
if (n > 0) {
#if LJ_DUALNUM
@@ -206,11 +179,11 @@ LJLIB_CF(math_random) LJLIB_REC(.)
}
/* PRNG seed function. */
-LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */
+LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */
LJLIB_CF(math_randomseed)
{
- RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
- random_init(rs, lj_lib_checknum(L, 1));
+ PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1))));
+ random_seed(rs, lj_lib_checknum(L, 1));
return 0;
}
@@ -220,14 +193,9 @@ LJLIB_CF(math_randomseed)
LUALIB_API int luaopen_math(lua_State *L)
{
- RandomState *rs;
- rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState));
- rs->valid = 0; /* Use lazy initialization to save some time on startup. */
+ PRNGState *rs = (PRNGState *)lua_newuserdata(L, sizeof(PRNGState));
+ lj_prng_seed_fixed(rs);
LJ_LIB_REG(L, LUA_MATHLIBNAME, math);
-#if defined(LUA_COMPAT_MOD) && !LJ_52
- lua_getfield(L, -1, "fmod");
- lua_setfield(L, -2, "mod");
-#endif
return 1;
}
diff --git a/src/lib_os.c b/src/lib_os.c
index 7ad7dfaf..6bcd0147 100644
--- a/src/lib_os.c
+++ b/src/lib_os.c
@@ -17,7 +17,10 @@
#include "lualib.h"
#include "lj_obj.h"
+#include "lj_gc.h"
#include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_str.h"
#include "lj_lib.h"
#if LJ_TARGET_POSIX
@@ -73,7 +76,7 @@ LJLIB_CF(os_rename)
LJLIB_CF(os_tmpname)
{
-#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA
+#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA || LJ_TARGET_NX
lj_err_caller(L, LJ_ERR_OSUNIQF);
return 0;
#else
@@ -188,7 +191,7 @@ LJLIB_CF(os_date)
#endif
}
if (stm == NULL) { /* Invalid date? */
- setnilV(L->top-1);
+ setnilV(L->top++);
} else if (strcmp(s, "*t") == 0) {
lua_createtable(L, 0, 9); /* 9 = number of fields */
setfield(L, "sec", stm->tm_sec);
@@ -200,23 +203,25 @@ LJLIB_CF(os_date)
setfield(L, "wday", stm->tm_wday+1);
setfield(L, "yday", stm->tm_yday+1);
setboolfield(L, "isdst", stm->tm_isdst);
- } else {
- char cc[3];
- luaL_Buffer b;
- cc[0] = '%'; cc[2] = '\0';
- luaL_buffinit(L, &b);
- for (; *s; s++) {
- if (*s != '%' || *(s + 1) == '\0') { /* No conversion specifier? */
- luaL_addchar(&b, *s);
- } else {
- size_t reslen;
- char buff[200]; /* Should be big enough for any conversion result. */
- cc[1] = *(++s);
- reslen = strftime(buff, sizeof(buff), cc, stm);
- luaL_addlstring(&b, buff, reslen);
+ } else if (*s) {
+ SBuf *sb = &G(L)->tmpbuf;
+ MSize sz = 0, retry = 4;
+ const char *q;
+ for (q = s; *q; q++)
+ sz += (*q == '%') ? 30 : 1; /* Overflow doesn't matter. */
+ setsbufL(sb, L);
+ while (retry--) { /* Limit growth for invalid format or empty result. */
+ char *buf = lj_buf_need(sb, sz);
+ size_t len = strftime(buf, sbufsz(sb), s, stm);
+ if (len) {
+ setstrV(L, L->top++, lj_str_new(L, buf, len));
+ lj_gc_check(L);
+ break;
}
+ sz += (sz|1);
}
- luaL_pushresult(&b);
+ } else {
+ setstrV(L, L->top++, &G(L)->strempty);
}
return 1;
}
diff --git a/src/lib_package.c b/src/lib_package.c
index d2ef474f..63a91211 100644
--- a/src/lib_package.c
+++ b/src/lib_package.c
@@ -76,6 +76,20 @@ static const char *ll_bcsym(void *lib, const char *sym)
BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*);
#endif
+#if LJ_TARGET_UWP
+void *LJ_WIN_LOADLIBA(const char *path)
+{
+ DWORD err = GetLastError();
+ wchar_t wpath[256];
+ HANDLE lib = NULL;
+ if (MultiByteToWideChar(CP_ACP, 0, path, -1, wpath, 256) > 0) {
+ lib = LoadPackagedLibrary(wpath, 0);
+ }
+ SetLastError(err);
+ return lib;
+}
+#endif
+
#undef setprogdir
static void setprogdir(lua_State *L)
@@ -96,9 +110,17 @@ static void setprogdir(lua_State *L)
static void pusherror(lua_State *L)
{
DWORD error = GetLastError();
+#if LJ_TARGET_XBOXONE
+ wchar_t wbuffer[128];
+ char buffer[128*2];
+ if (FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
+ NULL, error, 0, wbuffer, sizeof(wbuffer)/sizeof(wchar_t), NULL) &&
+ WideCharToMultiByte(CP_ACP, 0, wbuffer, 128, buffer, 128*2, NULL, NULL))
+#else
char buffer[128];
if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM,
NULL, error, 0, buffer, sizeof(buffer), NULL))
+#endif
lua_pushstring(L, buffer);
else
lua_pushfstring(L, "system error %d\n", error);
@@ -111,7 +133,7 @@ static void ll_unloadlib(void *lib)
static void *ll_load(lua_State *L, const char *path, int gl)
{
- HINSTANCE lib = LoadLibraryA(path);
+ HINSTANCE lib = LJ_WIN_LOADLIBA(path);
if (lib == NULL) pusherror(L);
UNUSED(gl);
return lib;
@@ -124,17 +146,25 @@ static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym)
return f;
}
+#if LJ_TARGET_UWP
+EXTERN_C IMAGE_DOS_HEADER __ImageBase;
+#endif
+
static const char *ll_bcsym(void *lib, const char *sym)
{
if (lib) {
return (const char *)GetProcAddress((HINSTANCE)lib, sym);
} else {
+#if LJ_TARGET_UWP
+ return (const char *)GetProcAddress((HINSTANCE)&__ImageBase, sym);
+#else
HINSTANCE h = GetModuleHandleA(NULL);
const char *p = (const char *)GetProcAddress(h, sym);
if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
(const char *)ll_bcsym, &h))
p = (const char *)GetProcAddress(h, sym);
return p;
+#endif
}
}
@@ -185,8 +215,7 @@ static void **ll_register(lua_State *L, const char *path)
lua_pop(L, 1);
plib = (void **)lua_newuserdata(L, sizeof(void *));
*plib = NULL;
- luaL_getmetatable(L, "_LOADLIB");
- lua_setmetatable(L, -2);
+ luaL_setmetatable(L, "_LOADLIB");
lua_pushfstring(L, "LOADLIB: %s", path);
lua_pushvalue(L, -2);
lua_settable(L, LUA_REGISTRYINDEX);
@@ -396,8 +425,7 @@ static int lj_cf_package_loader_preload(lua_State *L)
/* ------------------------------------------------------------------------ */
-static const int sentinel_ = 0;
-#define sentinel ((void *)&sentinel_)
+#define KEY_SENTINEL (U64x(80000000,00000000)|'s')
static int lj_cf_package_require(lua_State *L)
{
@@ -407,7 +435,7 @@ static int lj_cf_package_require(lua_State *L)
lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
lua_getfield(L, 2, name);
if (lua_toboolean(L, -1)) { /* is it there? */
- if (lua_touserdata(L, -1) == sentinel) /* check loops */
+ if ((L->top-1)->u64 == KEY_SENTINEL) /* check loops */
luaL_error(L, "loop or previous error loading module " LUA_QS, name);
return 1; /* package is already loaded */
}
@@ -430,14 +458,14 @@ static int lj_cf_package_require(lua_State *L)
else
lua_pop(L, 1);
}
- lua_pushlightuserdata(L, sentinel);
+ (L->top++)->u64 = KEY_SENTINEL;
lua_setfield(L, 2, name); /* _LOADED[name] = sentinel */
lua_pushstring(L, name); /* pass name as argument to module */
lua_call(L, 1, 1); /* run loaded module */
if (!lua_isnil(L, -1)) /* non-nil return? */
lua_setfield(L, 2, name); /* _LOADED[name] = returned value */
lua_getfield(L, 2, name);
- if (lua_touserdata(L, -1) == sentinel) { /* module did not set a value? */
+ if ((L->top-1)->u64 == KEY_SENTINEL) { /* module did not set a value? */
lua_pushboolean(L, 1); /* use true as result */
lua_pushvalue(L, -1); /* extra copy to be returned */
lua_setfield(L, 2, name); /* _LOADED[name] = true */
@@ -487,29 +515,19 @@ static void modinit(lua_State *L, const char *modname)
static int lj_cf_package_module(lua_State *L)
{
const char *modname = luaL_checkstring(L, 1);
- int loaded = lua_gettop(L) + 1; /* index of _LOADED table */
- lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED");
- lua_getfield(L, loaded, modname); /* get _LOADED[modname] */
- if (!lua_istable(L, -1)) { /* not found? */
- lua_pop(L, 1); /* remove previous result */
- /* try global variable (and create one if it does not exist) */
- if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, 1) != NULL)
- lj_err_callerv(L, LJ_ERR_BADMODN, modname);
- lua_pushvalue(L, -1);
- lua_setfield(L, loaded, modname); /* _LOADED[modname] = new table */
- }
- /* check whether table already has a _NAME field */
+ int lastarg = (int)(L->top - L->base);
+ luaL_pushmodule(L, modname, 1);
lua_getfield(L, -1, "_NAME");
- if (!lua_isnil(L, -1)) { /* is table an initialized module? */
+ if (!lua_isnil(L, -1)) { /* Module already initialized? */
lua_pop(L, 1);
- } else { /* no; initialize it */
+ } else {
lua_pop(L, 1);
modinit(L, modname);
}
lua_pushvalue(L, -1);
setfenv(L);
- dooptions(L, loaded - 1);
- return 0;
+ dooptions(L, lastarg);
+ return LJ_52;
}
static int lj_cf_package_seeall(lua_State *L)
@@ -580,13 +598,16 @@ LUALIB_API int luaopen_package(lua_State *L)
lj_lib_pushcf(L, lj_cf_package_unloadlib, 1);
lua_setfield(L, -2, "__gc");
luaL_register(L, LUA_LOADLIBNAME, package_lib);
- lua_pushvalue(L, -1);
- lua_replace(L, LUA_ENVIRONINDEX);
+ lua_copy(L, -1, LUA_ENVIRONINDEX);
lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0);
for (i = 0; package_loaders[i] != NULL; i++) {
lj_lib_pushcf(L, package_loaders[i], 1);
lua_rawseti(L, -2, i+1);
}
+#if LJ_52
+ lua_pushvalue(L, -1);
+ lua_setfield(L, -3, "searchers");
+#endif
lua_setfield(L, -2, "loaders");
lua_getfield(L, LUA_REGISTRYINDEX, "LUA_NOENV");
noenv = lua_toboolean(L, -1);
diff --git a/src/lib_string.c b/src/lib_string.c
index 60bb8088..79aeddfc 100644
--- a/src/lib_string.c
+++ b/src/lib_string.c
@@ -6,8 +6,6 @@
** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
*/
-#include <stdio.h>
-
#define lib_string_c
#define LUA_LIB
@@ -18,6 +16,7 @@
#include "lj_obj.h"
#include "lj_gc.h"
#include "lj_err.h"
+#include "lj_buf.h"
#include "lj_str.h"
#include "lj_tab.h"
#include "lj_meta.h"
@@ -25,17 +24,19 @@
#include "lj_ff.h"
#include "lj_bcdump.h"
#include "lj_char.h"
+#include "lj_strfmt.h"
#include "lj_lib.h"
/* ------------------------------------------------------------------------ */
#define LJLIB_MODULE_string
-LJLIB_ASM(string_len) LJLIB_REC(.)
-{
- lj_lib_checkstr(L, 1);
- return FFH_RETRY;
-}
+LJLIB_LUA(string_len) /*
+ function(s)
+ CHECK_str(s)
+ return #s
+ end
+*/
LJLIB_ASM(string_byte) LJLIB_REC(string_range 0)
{
@@ -57,21 +58,21 @@ LJLIB_ASM(string_byte) LJLIB_REC(string_range 0)
lj_state_checkstack(L, (MSize)n);
p = (const unsigned char *)strdata(s) + start;
for (i = 0; i < n; i++)
- setintV(L->base + i-1, p[i]);
+ setintV(L->base + i-1-LJ_FR2, p[i]);
return FFH_RES(n);
}
-LJLIB_ASM(string_char)
+LJLIB_ASM(string_char) LJLIB_REC(.)
{
int i, nargs = (int)(L->top - L->base);
- char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (MSize)nargs);
+ char *buf = lj_buf_tmp(L, (MSize)nargs);
for (i = 1; i <= nargs; i++) {
int32_t k = lj_lib_checkint(L, i);
if (!checku8(k))
lj_err_arg(L, i, LJ_ERR_BADVAL);
buf[i-1] = (char)k;
}
- setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)nargs));
+ setstrV(L, L->base-1-LJ_FR2, lj_str_new(L, buf, (size_t)nargs));
return FFH_RES(1);
}
@@ -83,68 +84,38 @@ LJLIB_ASM(string_sub) LJLIB_REC(string_range 1)
return FFH_RETRY;
}
-LJLIB_ASM(string_rep)
+LJLIB_CF(string_rep) LJLIB_REC(.)
{
GCstr *s = lj_lib_checkstr(L, 1);
- int32_t k = lj_lib_checkint(L, 2);
+ int32_t rep = lj_lib_checkint(L, 2);
GCstr *sep = lj_lib_optstr(L, 3);
- int32_t len = (int32_t)s->len;
- global_State *g = G(L);
- int64_t tlen;
- const char *src;
- char *buf;
- if (k <= 0) {
- empty:
- setstrV(L, L->base-1, &g->strempty);
- return FFH_RES(1);
- }
- if (sep) {
- tlen = (int64_t)len + sep->len;
- if (tlen > LJ_MAX_STR)
- lj_err_caller(L, LJ_ERR_STROV);
- tlen *= k;
- if (tlen > LJ_MAX_STR)
- lj_err_caller(L, LJ_ERR_STROV);
- } else {
- tlen = (int64_t)k * len;
- if (tlen > LJ_MAX_STR)
- lj_err_caller(L, LJ_ERR_STROV);
- }
- if (tlen == 0) goto empty;
- buf = lj_str_needbuf(L, &g->tmpbuf, (MSize)tlen);
- src = strdata(s);
- if (sep) {
- tlen -= sep->len; /* Ignore trailing separator. */
- if (k > 1) { /* Paste one string and one separator. */
- int32_t i;
- i = 0; while (i < len) *buf++ = src[i++];
- src = strdata(sep); len = sep->len;
- i = 0; while (i < len) *buf++ = src[i++];
- src = g->tmpbuf.buf; len += s->len; k--; /* Now copy that k-1 times. */
- }
+ SBuf *sb = lj_buf_tmp_(L);
+ if (sep && rep > 1) {
+ GCstr *s2 = lj_buf_cat2str(L, sep, s);
+ lj_buf_reset(sb);
+ lj_buf_putstr(sb, s);
+ s = s2;
+ rep--;
}
- do {
- int32_t i = 0;
- do { *buf++ = src[i++]; } while (i < len);
- } while (--k > 0);
- setstrV(L, L->base-1, lj_str_new(L, g->tmpbuf.buf, (size_t)tlen));
- return FFH_RES(1);
+ sb = lj_buf_putstr_rep(sb, s, rep);
+ setstrV(L, L->top-1, lj_buf_str(L, sb));
+ lj_gc_check(L);
+ return 1;
}
-LJLIB_ASM(string_reverse)
+LJLIB_ASM(string_reverse) LJLIB_REC(string_op IRCALL_lj_buf_putstr_reverse)
{
- GCstr *s = lj_lib_checkstr(L, 1);
- lj_str_needbuf(L, &G(L)->tmpbuf, s->len);
+ lj_lib_checkstr(L, 1);
return FFH_RETRY;
}
-LJLIB_ASM_(string_lower)
-LJLIB_ASM_(string_upper)
+LJLIB_ASM_(string_lower) LJLIB_REC(string_op IRCALL_lj_buf_putstr_lower)
+LJLIB_ASM_(string_upper) LJLIB_REC(string_op IRCALL_lj_buf_putstr_upper)
/* ------------------------------------------------------------------------ */
-static int writer_buf(lua_State *L, const void *p, size_t size, void *b)
+static int writer_buf(lua_State *L, const void *p, size_t size, void *sb)
{
- luaL_addlstring((luaL_Buffer *)b, (const char *)p, size);
+ lj_buf_putmem((SBuf *)sb, p, (MSize)size);
UNUSED(L);
return 0;
}
@@ -153,19 +124,19 @@ LJLIB_CF(string_dump)
{
GCfunc *fn = lj_lib_checkfunc(L, 1);
int strip = L->base+1 < L->top && tvistruecond(L->base+1);
- luaL_Buffer b;
+ SBuf *sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */
L->top = L->base+1;
- luaL_buffinit(L, &b);
- if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, &b, strip))
+ if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip))
lj_err_caller(L, LJ_ERR_STRDUMP);
- luaL_pushresult(&b);
+ setstrV(L, L->top-1, lj_buf_str(L, sb));
+ lj_gc_check(L);
return 1;
}
/* ------------------------------------------------------------------------ */
/* macro to `unsign' a character */
-#define uchar(c) ((unsigned char)(c))
+#define uchar(c) ((unsigned char)(c))
#define CAP_UNFINISHED (-1)
#define CAP_POSITION (-2)
@@ -183,7 +154,6 @@ typedef struct MatchState {
} MatchState;
#define L_ESC '%'
-#define SPECIALS "^$*+?.([%-"
static int check_capture(MatchState *ms, int l)
{
@@ -450,30 +420,6 @@ static const char *match(MatchState *ms, const char *s, const char *p)
return s;
}
-static const char *lmemfind(const char *s1, size_t l1,
- const char *s2, size_t l2)
-{
- if (l2 == 0) {
- return s1; /* empty strings are everywhere */
- } else if (l2 > l1) {
- return NULL; /* avoids a negative `l1' */
- } else {
- const char *init; /* to search for a `*s2' inside `s1' */
- l2--; /* 1st char will be checked by `memchr' */
- l1 = l1-l2; /* `s2' cannot be found after that */
- while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
- init++; /* 1st char is already checked */
- if (memcmp(init, s2+1, l2) == 0) {
- return init-1;
- } else { /* correct `l1' and `s1' to try again */
- l1 -= (size_t)(init-s1);
- s1 = init;
- }
- }
- return NULL; /* not found */
- }
-}
-
static void push_onecapture(MatchState *ms, int i, const char *s, const char *e)
{
if (i >= ms->level) {
@@ -501,64 +447,60 @@ static int push_captures(MatchState *ms, const char *s, const char *e)
return nlevels; /* number of strings pushed */
}
-static ptrdiff_t posrelat(ptrdiff_t pos, size_t len)
-{
- /* relative string position: negative means back from end */
- if (pos < 0) pos += (ptrdiff_t)len + 1;
- return (pos >= 0) ? pos : 0;
-}
-
static int str_find_aux(lua_State *L, int find)
{
- size_t l1, l2;
- const char *s = luaL_checklstring(L, 1, &l1);
- const char *p = luaL_checklstring(L, 2, &l2);
- ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1;
- if (init < 0) {
- init = 0;
- } else if ((size_t)(init) > l1) {
+ GCstr *s = lj_lib_checkstr(L, 1);
+ GCstr *p = lj_lib_checkstr(L, 2);
+ int32_t start = lj_lib_optint(L, 3, 1);
+ MSize st;
+ if (start < 0) start += (int32_t)s->len; else start--;
+ if (start < 0) start = 0;
+ st = (MSize)start;
+ if (st > s->len) {
#if LJ_52
setnilV(L->top-1);
return 1;
#else
- init = (ptrdiff_t)l1;
+ st = s->len;
#endif
}
- if (find && (lua_toboolean(L, 4) || /* explicit request? */
- strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */
- /* do a plain search */
- const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2);
- if (s2) {
- lua_pushinteger(L, s2-s+1);
- lua_pushinteger(L, s2-s+(ptrdiff_t)l2);
+ if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) ||
+ !lj_str_haspattern(p))) { /* Search for fixed string. */
+ const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len);
+ if (q) {
+ setintV(L->top-2, (int32_t)(q-strdata(s)) + 1);
+ setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len);
return 2;
}
- } else {
+ } else { /* Search for pattern. */
MatchState ms;
- int anchor = (*p == '^') ? (p++, 1) : 0;
- const char *s1=s+init;
+ const char *pstr = strdata(p);
+ const char *sstr = strdata(s) + st;
+ int anchor = 0;
+ if (*pstr == '^') { pstr++; anchor = 1; }
ms.L = L;
- ms.src_init = s;
- ms.src_end = s+l1;
- do {
- const char *res;
+ ms.src_init = strdata(s);
+ ms.src_end = strdata(s) + s->len;
+ do { /* Loop through string and try to match the pattern. */
+ const char *q;
ms.level = ms.depth = 0;
- if ((res=match(&ms, s1, p)) != NULL) {
+ q = match(&ms, sstr, pstr);
+ if (q) {
if (find) {
- lua_pushinteger(L, s1-s+1); /* start */
- lua_pushinteger(L, res-s); /* end */
- return push_captures(&ms, NULL, 0) + 2;
+ setintV(L->top++, (int32_t)(sstr-(strdata(s)-1)));
+ setintV(L->top++, (int32_t)(q-strdata(s)));
+ return push_captures(&ms, NULL, NULL) + 2;
} else {
- return push_captures(&ms, s1, res);
+ return push_captures(&ms, sstr, q);
}
}
- } while (s1++ < ms.src_end && !anchor);
+ } while (sstr++ < ms.src_end && !anchor);
}
- lua_pushnil(L); /* not found */
+ setnilV(L->top-1); /* Not found. */
return 1;
}
-LJLIB_CF(string_find)
+LJLIB_CF(string_find) LJLIB_REC(.)
{
return str_find_aux(L, 1);
}
@@ -698,222 +640,16 @@ LJLIB_CF(string_gsub)
/* ------------------------------------------------------------------------ */
-/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */
-#define MAX_FMTITEM 512
-/* valid flags in a format specification */
-#define FMT_FLAGS "-+ #0"
-/*
-** maximum size of each format specification (such as '%-099.99d')
-** (+10 accounts for %99.99x plus margin of error)
-*/
-#define MAX_FMTSPEC (sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10)
-
-static void addquoted(lua_State *L, luaL_Buffer *b, int arg)
-{
- GCstr *str = lj_lib_checkstr(L, arg);
- int32_t len = (int32_t)str->len;
- const char *s = strdata(str);
- luaL_addchar(b, '"');
- while (len--) {
- uint32_t c = uchar(*s);
- if (c == '"' || c == '\\' || c == '\n') {
- luaL_addchar(b, '\\');
- } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
- uint32_t d;
- luaL_addchar(b, '\\');
- if (c >= 100 || lj_char_isdigit(uchar(s[1]))) {
- luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100;
- goto tens;
- } else if (c >= 10) {
- tens:
- d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d);
- }
- c += '0';
- }
- luaL_addchar(b, c);
- s++;
- }
- luaL_addchar(b, '"');
-}
-
-static const char *scanformat(lua_State *L, const char *strfrmt, char *form)
-{
- const char *p = strfrmt;
- while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++; /* skip flags */
- if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS))
- lj_err_caller(L, LJ_ERR_STRFMTR);
- if (lj_char_isdigit(uchar(*p))) p++; /* skip width */
- if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */
- if (*p == '.') {
- p++;
- if (lj_char_isdigit(uchar(*p))) p++; /* skip precision */
- if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */
- }
- if (lj_char_isdigit(uchar(*p)))
- lj_err_caller(L, LJ_ERR_STRFMTW);
- *(form++) = '%';
- strncpy(form, strfrmt, (size_t)(p - strfrmt + 1));
- form += p - strfrmt + 1;
- *form = '\0';
- return p;
-}
-
-static void addintlen(char *form)
-{
- size_t l = strlen(form);
- char spec = form[l - 1];
- strcpy(form + l - 1, LUA_INTFRMLEN);
- form[l + sizeof(LUA_INTFRMLEN) - 2] = spec;
- form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0';
-}
-
-static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg)
-{
- if (sizeof(LUA_INTFRM_T) == 4) {
- return (LUA_INTFRM_T)lj_lib_checkbit(L, arg);
- } else {
- cTValue *o;
- lj_lib_checknumber(L, arg);
- o = L->base+arg-1;
- if (tvisint(o))
- return (LUA_INTFRM_T)intV(o);
- else
- return (LUA_INTFRM_T)numV(o);
- }
-}
-
-static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg)
+LJLIB_CF(string_format) LJLIB_REC(.)
{
- if (sizeof(LUA_INTFRM_T) == 4) {
- return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg);
- } else {
- cTValue *o;
- lj_lib_checknumber(L, arg);
- o = L->base+arg-1;
- if (tvisint(o))
- return (unsigned LUA_INTFRM_T)intV(o);
- else if ((int32_t)o->u32.hi < 0)
- return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o);
- else
- return (unsigned LUA_INTFRM_T)numV(o);
- }
-}
-
-static GCstr *meta_tostring(lua_State *L, int arg)
-{
- TValue *o = L->base+arg-1;
- cTValue *mo;
- lua_assert(o < L->top); /* Caller already checks for existence. */
- if (LJ_LIKELY(tvisstr(o)))
- return strV(o);
- if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
- copyTV(L, L->top++, mo);
- copyTV(L, L->top++, o);
- lua_call(L, 1, 1);
- L->top--;
- if (tvisstr(L->top))
- return strV(L->top);
- o = L->base+arg-1;
- copyTV(L, o, L->top);
- }
- if (tvisnumber(o)) {
- return lj_str_fromnumber(L, o);
- } else if (tvisnil(o)) {
- return lj_str_newlit(L, "nil");
- } else if (tvisfalse(o)) {
- return lj_str_newlit(L, "false");
- } else if (tvistrue(o)) {
- return lj_str_newlit(L, "true");
- } else {
- if (tvisfunc(o) && isffunc(funcV(o)))
- lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid);
- else
- lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg));
- L->top--;
- return strV(L->top);
- }
-}
-
-LJLIB_CF(string_format)
-{
- int arg = 1, top = (int)(L->top - L->base);
- GCstr *fmt = lj_lib_checkstr(L, arg);
- const char *strfrmt = strdata(fmt);
- const char *strfrmt_end = strfrmt + fmt->len;
- luaL_Buffer b;
- luaL_buffinit(L, &b);
- while (strfrmt < strfrmt_end) {
- if (*strfrmt != L_ESC) {
- luaL_addchar(&b, *strfrmt++);
- } else if (*++strfrmt == L_ESC) {
- luaL_addchar(&b, *strfrmt++); /* %% */
- } else { /* format item */
- char form[MAX_FMTSPEC]; /* to store the format (`%...') */
- char buff[MAX_FMTITEM]; /* to store the formatted item */
- int n = 0;
- if (++arg > top)
- luaL_argerror(L, arg, lj_obj_typename[0]);
- strfrmt = scanformat(L, strfrmt, form);
- switch (*strfrmt++) {
- case 'c':
- n = sprintf(buff, form, lj_lib_checkint(L, arg));
- break;
- case 'd': case 'i':
- addintlen(form);
- n = sprintf(buff, form, num2intfrm(L, arg));
- break;
- case 'o': case 'u': case 'x': case 'X':
- addintlen(form);
- n = sprintf(buff, form, num2uintfrm(L, arg));
- break;
- case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': {
- TValue tv;
- tv.n = lj_lib_checknum(L, arg);
- if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) {
- /* Canonicalize output of non-finite values. */
- char *p, nbuf[LJ_STR_NUMBUF];
- size_t len = lj_str_bufnum(nbuf, &tv);
- if (strfrmt[-1] < 'a') {
- nbuf[len-3] = nbuf[len-3] - 0x20;
- nbuf[len-2] = nbuf[len-2] - 0x20;
- nbuf[len-1] = nbuf[len-1] - 0x20;
- }
- nbuf[len] = '\0';
- for (p = form; *p < 'A' && *p != '.'; p++) ;
- *p++ = 's'; *p = '\0';
- n = sprintf(buff, form, nbuf);
- break;
- }
- n = sprintf(buff, form, (double)tv.n);
- break;
- }
- case 'q':
- addquoted(L, &b, arg);
- continue;
- case 'p':
- lj_str_pushf(L, "%p", lua_topointer(L, arg));
- luaL_addvalue(&b);
- continue;
- case 's': {
- GCstr *str = meta_tostring(L, arg);
- if (!strchr(form, '.') && str->len >= 100) {
- /* no precision and string is too long to be formatted;
- keep original string */
- setstrV(L, L->top++, str);
- luaL_addvalue(&b);
- continue;
- }
- n = sprintf(buff, form, strdata(str));
- break;
- }
- default:
- lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1));
- break;
- }
- luaL_addlstring(&b, buff, n);
- }
- }
- luaL_pushresult(&b);
+ int retry = 0;
+ SBuf *sb;
+ do {
+ sb = lj_buf_tmp_(L);
+ retry = lj_strfmt_putarg(L, sb, 1, -retry);
+ } while (retry > 0);
+ setstrV(L, L->top-1, lj_buf_str(L, sb));
+ lj_gc_check(L);
return 1;
}
@@ -926,16 +662,15 @@ LUALIB_API int luaopen_string(lua_State *L)
GCtab *mt;
global_State *g;
LJ_LIB_REG(L, LUA_STRLIBNAME, string);
-#if defined(LUA_COMPAT_GFIND) && !LJ_52
- lua_getfield(L, -1, "gmatch");
- lua_setfield(L, -2, "gfind");
-#endif
mt = lj_tab_new(L, 0, 1);
/* NOBARRIER: basemt is a GC root. */
g = G(L);
setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt));
settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1));
mt->nomm = (uint8_t)(~(1u<<MM_index));
+#if LJ_HASBUFFER
+ lj_lib_prereg(L, LUA_STRLIBNAME ".buffer", luaopen_string_buffer, tabV(L->top-1));
+#endif
return 1;
}
diff --git a/src/lib_table.c b/src/lib_table.c
index dc89116f..a723326a 100644
--- a/src/lib_table.c
+++ b/src/lib_table.c
@@ -16,57 +16,43 @@
#include "lj_obj.h"
#include "lj_gc.h"
#include "lj_err.h"
+#include "lj_buf.h"
#include "lj_tab.h"
+#include "lj_ff.h"
#include "lj_lib.h"
/* ------------------------------------------------------------------------ */
#define LJLIB_MODULE_table
-LJLIB_CF(table_foreachi)
-{
- GCtab *t = lj_lib_checktab(L, 1);
- GCfunc *func = lj_lib_checkfunc(L, 2);
- MSize i, n = lj_tab_len(t);
- for (i = 1; i <= n; i++) {
- cTValue *val;
- setfuncV(L, L->top, func);
- setintV(L->top+1, i);
- val = lj_tab_getint(t, (int32_t)i);
- if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); }
- L->top += 3;
- lua_call(L, 2, 1);
- if (!tvisnil(L->top-1))
- return 1;
- L->top--;
- }
- return 0;
-}
+LJLIB_LUA(table_foreachi) /*
+ function(t, f)
+ CHECK_tab(t)
+ CHECK_func(f)
+ for i=1,#t do
+ local r = f(i, t[i])
+ if r ~= nil then return r end
+ end
+ end
+*/
-LJLIB_CF(table_foreach)
-{
- GCtab *t = lj_lib_checktab(L, 1);
- GCfunc *func = lj_lib_checkfunc(L, 2);
- L->top = L->base+3;
- setnilV(L->top-1);
- while (lj_tab_next(L, t, L->top-1)) {
- copyTV(L, L->top+2, L->top);
- copyTV(L, L->top+1, L->top-1);
- setfuncV(L, L->top, func);
- L->top += 3;
- lua_call(L, 2, 1);
- if (!tvisnil(L->top-1))
- return 1;
- L->top--;
- }
- return 0;
-}
+LJLIB_LUA(table_foreach) /*
+ function(t, f)
+ CHECK_tab(t)
+ CHECK_func(f)
+ for k, v in PAIRS(t) do
+ local r = f(k, v)
+ if r ~= nil then return r end
+ end
+ end
+*/
-LJLIB_ASM(table_getn) LJLIB_REC(.)
-{
- lj_lib_checktab(L, 1);
- return FFH_UNREACHABLE;
-}
+LJLIB_LUA(table_getn) /*
+ function(t)
+ CHECK_tab(t)
+ return #t
+ end
+*/
LJLIB_CF(table_maxn)
{
@@ -119,52 +105,67 @@ LJLIB_CF(table_insert) LJLIB_REC(.)
return 0;
}
-LJLIB_CF(table_remove) LJLIB_REC(.)
-{
- GCtab *t = lj_lib_checktab(L, 1);
- int32_t e = (int32_t)lj_tab_len(t);
- int32_t pos = lj_lib_optint(L, 2, e);
- if (!(1 <= pos && pos <= e)) /* Nothing to remove? */
- return 0;
- lua_rawgeti(L, 1, pos); /* Get previous value. */
- /* NOBARRIER: This just moves existing elements around. */
- for (; pos < e; pos++) {
- cTValue *src = lj_tab_getint(t, pos+1);
- TValue *dst = lj_tab_setint(L, t, pos);
- if (src) {
- copyTV(L, dst, src);
- } else {
- setnilV(dst);
- }
- }
- setnilV(lj_tab_setint(L, t, e)); /* Remove (last) value. */
- return 1; /* Return previous value. */
-}
+LJLIB_LUA(table_remove) /*
+ function(t, pos)
+ CHECK_tab(t)
+ local len = #t
+ if pos == nil then
+ if len ~= 0 then
+ local old = t[len]
+ t[len] = nil
+ return old
+ end
+ else
+ CHECK_int(pos)
+ if pos >= 1 and pos <= len then
+ local old = t[pos]
+ for i=pos+1,len do
+ t[i-1] = t[i]
+ end
+ t[len] = nil
+ return old
+ end
+ end
+ end
+*/
+
+LJLIB_LUA(table_move) /*
+ function(a1, f, e, t, a2)
+ CHECK_tab(a1)
+ CHECK_int(f)
+ CHECK_int(e)
+ CHECK_int(t)
+ if a2 == nil then a2 = a1 end
+ CHECK_tab(a2)
+ if e >= f then
+ local d = t - f
+ if t > e or t <= f or a2 ~= a1 then
+ for i=f,e do a2[i+d] = a1[i] end
+ else
+ for i=e,f,-1 do a2[i+d] = a1[i] end
+ end
+ end
+ return a2
+ end
+*/
-LJLIB_CF(table_concat)
+LJLIB_CF(table_concat) LJLIB_REC(.)
{
- luaL_Buffer b;
GCtab *t = lj_lib_checktab(L, 1);
GCstr *sep = lj_lib_optstr(L, 2);
- MSize seplen = sep ? sep->len : 0;
int32_t i = lj_lib_optint(L, 3, 1);
int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ?
lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t);
- luaL_buffinit(L, &b);
- if (i <= e) {
- for (;;) {
- cTValue *o;
- lua_rawgeti(L, 1, i);
- o = L->top-1;
- if (!(tvisstr(o) || tvisnumber(o)))
- lj_err_callerv(L, LJ_ERR_TABCAT, lj_typename(o), i);
- luaL_addvalue(&b);
- if (i++ == e) break;
- if (seplen)
- luaL_addlstring(&b, strdata(sep), seplen);
- }
+ SBuf *sb = lj_buf_tmp_(L);
+ SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e);
+ if (LJ_UNLIKELY(!sbx)) { /* Error: bad element type. */
+ int32_t idx = (int32_t)(intptr_t)sb->w;
+ cTValue *o = lj_tab_getint(t, idx);
+ lj_err_callerv(L, LJ_ERR_TABCAT,
+ lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx);
}
- luaL_pushresult(&b);
+ setstrV(L, L->top-1, lj_buf_str(L, sbx));
+ lj_gc_check(L);
return 1;
}
@@ -284,6 +285,30 @@ LJLIB_CF(table_pack)
}
#endif
+LJLIB_NOREG LJLIB_CF(table_new) LJLIB_REC(.)
+{
+ int32_t a = lj_lib_checkint(L, 1);
+ int32_t h = lj_lib_checkint(L, 2);
+ lua_createtable(L, a, h);
+ return 1;
+}
+
+LJLIB_NOREG LJLIB_CF(table_clear) LJLIB_REC(.)
+{
+ lj_tab_clear(lj_lib_checktab(L, 1));
+ return 0;
+}
+
+static int luaopen_table_new(lua_State *L)
+{
+ return lj_lib_postreg(L, lj_cf_table_new, FF_table_new, "new");
+}
+
+static int luaopen_table_clear(lua_State *L)
+{
+ return lj_lib_postreg(L, lj_cf_table_clear, FF_table_clear, "clear");
+}
+
/* ------------------------------------------------------------------------ */
#include "lj_libdef.h"
@@ -295,6 +320,8 @@ LUALIB_API int luaopen_table(lua_State *L)
lua_getglobal(L, "unpack");
lua_setfield(L, -2, "unpack");
#endif
+ lj_lib_prereg(L, LUA_TABLIBNAME ".new", luaopen_table_new, tabV(L->top-1));
+ lj_lib_prereg(L, LUA_TABLIBNAME ".clear", luaopen_table_clear, tabV(L->top-1));
return 1;
}
diff --git a/src/lj.supp b/src/lj.supp
deleted file mode 100644
index 217f7c89..00000000
--- a/src/lj.supp
+++ /dev/null
@@ -1,41 +0,0 @@
-# Valgrind suppression file for LuaJIT 2.0.
-{
- Optimized string compare
- Memcheck:Addr4
- fun:lj_str_cmp
-}
-{
- Optimized string compare
- Memcheck:Addr1
- fun:lj_str_cmp
-}
-{
- Optimized string compare
- Memcheck:Addr4
- fun:lj_str_new
-}
-{
- Optimized string compare
- Memcheck:Addr1
- fun:lj_str_new
-}
-{
- Optimized string compare
- Memcheck:Cond
- fun:lj_str_new
-}
-{
- Optimized string compare
- Memcheck:Addr4
- fun:str_fastcmp
-}
-{
- Optimized string compare
- Memcheck:Addr1
- fun:str_fastcmp
-}
-{
- Optimized string compare
- Memcheck:Cond
- fun:str_fastcmp
-}
diff --git a/src/lj_alloc.c b/src/lj_alloc.c
index 9adaa0e5..20e60493 100644
--- a/src/lj_alloc.c
+++ b/src/lj_alloc.c
@@ -31,6 +31,7 @@
#include "lj_def.h"
#include "lj_arch.h"
#include "lj_alloc.h"
+#include "lj_prng.h"
#ifndef LUAJIT_USE_SYSMALLOC
@@ -72,15 +73,58 @@
#define IS_DIRECT_BIT (SIZE_T_ONE)
+
+/* Determine system-specific block allocation method. */
#if LJ_TARGET_WINDOWS
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
+#define LJ_ALLOC_VIRTUALALLOC 1
+
+#if LJ_64 && !LJ_GC64
+#define LJ_ALLOC_NTAVM 1
+#endif
+
+#else
+
+#include <errno.h>
+/* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */
+#include <sys/mman.h>
+
+#define LJ_ALLOC_MMAP 1
+
#if LJ_64
+#define LJ_ALLOC_MMAP_PROBE 1
+
+#if LJ_GC64
+#define LJ_ALLOC_MBITS 47 /* 128 TB in LJ_GC64 mode. */
+#elif LJ_TARGET_X64 && LJ_HASJIT
+/* Due to limitations in the x64 compiler backend. */
+#define LJ_ALLOC_MBITS 31 /* 2 GB on x64 with !LJ_GC64. */
+#else
+#define LJ_ALLOC_MBITS 32 /* 4 GB on other archs with !LJ_GC64. */
+#endif
+
+#endif
+
+#if LJ_64 && !LJ_GC64 && defined(MAP_32BIT)
+#define LJ_ALLOC_MMAP32 1
+#endif
+
+#if LJ_TARGET_LINUX
+#define LJ_ALLOC_MREMAP 1
+#endif
+
+#endif
+
+
+#if LJ_ALLOC_VIRTUALALLOC
+
+#if LJ_ALLOC_NTAVM
/* Undocumented, but hey, that's what we all love so much about Windows. */
-typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits,
+typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG_PTR zbits,
size_t *size, ULONG alloctype, ULONG prot);
static PNTAVM ntavm;
@@ -89,14 +133,15 @@ static PNTAVM ntavm;
*/
#define NTAVM_ZEROBITS 1
-static void INIT_MMAP(void)
+static void init_mmap(void)
{
ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"),
"NtAllocateVirtualMemory");
}
+#define INIT_MMAP() init_mmap()
/* Win64 32 bit MMAP via NtAllocateVirtualMemory. */
-static LJ_AINLINE void *CALL_MMAP(size_t size)
+static void *mmap_plain(size_t size)
{
DWORD olderr = GetLastError();
void *ptr = NULL;
@@ -107,7 +152,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size)
}
/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
-static LJ_AINLINE void *DIRECT_MMAP(size_t size)
+static void *direct_mmap(size_t size)
{
DWORD olderr = GetLastError();
void *ptr = NULL;
@@ -119,31 +164,32 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size)
#else
-#define INIT_MMAP() ((void)0)
-
/* Win32 MMAP via VirtualAlloc */
-static LJ_AINLINE void *CALL_MMAP(size_t size)
+static void *mmap_plain(size_t size)
{
DWORD olderr = GetLastError();
- void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+ void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
SetLastError(olderr);
return ptr ? ptr : MFAIL;
}
/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
-static LJ_AINLINE void *DIRECT_MMAP(size_t size)
+static void *direct_mmap(size_t size)
{
DWORD olderr = GetLastError();
- void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
- PAGE_READWRITE);
+ void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
+ PAGE_READWRITE);
SetLastError(olderr);
return ptr ? ptr : MFAIL;
}
#endif
+#define CALL_MMAP(prng, size) mmap_plain(size)
+#define DIRECT_MMAP(prng, size) direct_mmap(size)
+
/* This function supports releasing coalesed segments */
-static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
+static int CALL_MUNMAP(void *ptr, size_t size)
{
DWORD olderr = GetLastError();
MEMORY_BASIC_INFORMATION minfo;
@@ -163,10 +209,7 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
return 0;
}
-#else
-
-#include <errno.h>
-#include <sys/mman.h>
+#elif LJ_ALLOC_MMAP
#define MMAP_PROT (PROT_READ|PROT_WRITE)
#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
@@ -174,105 +217,134 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
#endif
#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS)
-#if LJ_64
-/* 64 bit mode needs special support for allocating memory in the lower 2GB. */
+#if LJ_ALLOC_MMAP_PROBE
-#if defined(MAP_32BIT)
-
-#if defined(__sun__)
-#define MMAP_REGION_START ((uintptr_t)0x1000)
+#ifdef MAP_TRYFIXED
+#define MMAP_FLAGS_PROBE (MMAP_FLAGS|MAP_TRYFIXED)
#else
-/* Actually this only gives us max. 1GB in current Linux kernels. */
-#define MMAP_REGION_START ((uintptr_t)0)
+#define MMAP_FLAGS_PROBE MMAP_FLAGS
#endif
-static LJ_AINLINE void *CALL_MMAP(size_t size)
+#define LJ_ALLOC_MMAP_PROBE_MAX 30
+#define LJ_ALLOC_MMAP_PROBE_LINEAR 5
+
+#define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000)
+
+static void *mmap_probe(PRNGState *rs, size_t size)
{
+ /* Hint for next allocation. Doesn't need to be thread-safe. */
+ static uintptr_t hint_addr = 0;
int olderr = errno;
- void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
+ int retry;
+ for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) {
+ void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0);
+ uintptr_t addr = (uintptr_t)p;
+ if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER &&
+ ((addr + size) >> LJ_ALLOC_MBITS) == 0) {
+ /* We got a suitable address. Bump the hint address. */
+ hint_addr = addr + size;
+ errno = olderr;
+ return p;
+ }
+ if (p != MFAIL) {
+ munmap(p, size);
+ } else if (errno == ENOMEM) {
+ return MFAIL;
+ }
+ if (hint_addr) {
+ /* First, try linear probing. */
+ if (retry < LJ_ALLOC_MMAP_PROBE_LINEAR) {
+ hint_addr += 0x1000000;
+ if (((hint_addr + size) >> LJ_ALLOC_MBITS) != 0)
+ hint_addr = 0;
+ continue;
+ } else if (retry == LJ_ALLOC_MMAP_PROBE_LINEAR) {
+ /* Next, try a no-hint probe to get back an ASLR address. */
+ hint_addr = 0;
+ continue;
+ }
+ }
+ /* Finally, try pseudo-random probing. */
+ do {
+ hint_addr = lj_prng_u64(rs) & (((uintptr_t)1<<LJ_ALLOC_MBITS)-LJ_PAGESIZE);
+ } while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER);
+ }
errno = olderr;
- return ptr;
+ return MFAIL;
}
-#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || LJ_TARGET_CYGWIN
+#endif
+
+#if LJ_ALLOC_MMAP32
-/* OSX and FreeBSD mmap() use a naive first-fit linear search.
-** That's perfect for us. Except that -pagezero_size must be set for OSX,
-** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs
-** to be reduced to 250MB on FreeBSD.
-*/
-#if LJ_TARGET_OSX || defined(__DragonFly__)
-#define MMAP_REGION_START ((uintptr_t)0x10000)
-#elif LJ_TARGET_PS4
-#define MMAP_REGION_START ((uintptr_t)0x4000)
+#if LJ_TARGET_SOLARIS
+#define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000)
#else
-#define MMAP_REGION_START ((uintptr_t)0x10000000)
+#define LJ_ALLOC_MMAP32_START ((uintptr_t)0)
#endif
-#define MMAP_REGION_END ((uintptr_t)0x80000000)
-#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
-#include <sys/resource.h>
+#if LJ_ALLOC_MMAP_PROBE
+static void *mmap_map32(PRNGState *rs, size_t size)
+#else
+static void *mmap_map32(size_t size)
#endif
-
-static LJ_AINLINE void *CALL_MMAP(size_t size)
{
- int olderr = errno;
- /* Hint for next allocation. Doesn't need to be thread-safe. */
- static uintptr_t alloc_hint = MMAP_REGION_START;
- int retry = 0;
-#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
- static int rlimit_modified = 0;
- if (LJ_UNLIKELY(rlimit_modified == 0)) {
- struct rlimit rlim;
- rlim.rlim_cur = rlim.rlim_max = MMAP_REGION_START;
- setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail below. */
- rlimit_modified = 1;
- }
+#if LJ_ALLOC_MMAP_PROBE
+ static int fallback = 0;
+ if (fallback)
+ return mmap_probe(rs, size);
#endif
- for (;;) {
- void *p = mmap((void *)alloc_hint, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
- if ((uintptr_t)p >= MMAP_REGION_START &&
- (uintptr_t)p + size < MMAP_REGION_END) {
- alloc_hint = (uintptr_t)p + size;
- errno = olderr;
- return p;
+ {
+ int olderr = errno;
+ void *ptr = mmap((void *)LJ_ALLOC_MMAP32_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
+ errno = olderr;
+ /* This only allows 1GB on Linux. So fallback to probing to get 2GB. */
+#if LJ_ALLOC_MMAP_PROBE
+ if (ptr == MFAIL) {
+ fallback = 1;
+ return mmap_probe(rs, size);
}
- if (p != CMFAIL) munmap(p, size);
-#if defined(__sun__) || defined(__DragonFly__)
- alloc_hint += 0x1000000; /* Need near-exhaustive linear scan. */
- if (alloc_hint + size < MMAP_REGION_END) continue;
#endif
- if (retry) break;
- retry = 1;
- alloc_hint = MMAP_REGION_START;
+ return ptr;
}
- errno = olderr;
- return CMFAIL;
}
-#else
-
-#error "NYI: need an equivalent of MAP_32BIT for this 64 bit OS"
-
#endif
+#if LJ_ALLOC_MMAP32
+#if LJ_ALLOC_MMAP_PROBE
+#define CALL_MMAP(prng, size) mmap_map32(prng, size)
#else
-
-/* 32 bit mode is easy. */
-static LJ_AINLINE void *CALL_MMAP(size_t size)
+#define CALL_MMAP(prng, size) mmap_map32(size)
+#endif
+#elif LJ_ALLOC_MMAP_PROBE
+#define CALL_MMAP(prng, size) mmap_probe(prng, size)
+#else
+static void *mmap_plain(size_t size)
{
int olderr = errno;
void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
errno = olderr;
return ptr;
}
-
+#define CALL_MMAP(prng, size) mmap_plain(size)
#endif
-#define INIT_MMAP() ((void)0)
-#define DIRECT_MMAP(s) CALL_MMAP(s)
+#if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__ < 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 && !LJ_TARGET_PS5
+
+#include <sys/resource.h>
-static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
+static void init_mmap(void)
+{
+ struct rlimit rlim;
+ rlim.rlim_cur = rlim.rlim_max = 0x10000;
+ setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail later. */
+}
+#define INIT_MMAP() init_mmap()
+
+#endif
+
+static int CALL_MUNMAP(void *ptr, size_t size)
{
int olderr = errno;
int ret = munmap(ptr, size);
@@ -280,10 +352,9 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size)
return ret;
}
-#if LJ_TARGET_LINUX
+#if LJ_ALLOC_MREMAP
/* Need to define _GNU_SOURCE to get the mremap prototype. */
-static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
- int flags)
+static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags)
{
int olderr = errno;
ptr = mremap(ptr, osz, nsz, flags);
@@ -294,7 +365,7 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
#define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
#define CALL_MREMAP_NOMOVE 0
#define CALL_MREMAP_MAYMOVE 1
-#if LJ_64
+#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64)
#define CALL_MREMAP_MV CALL_MREMAP_NOMOVE
#else
#define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE
@@ -303,6 +374,15 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz,
#endif
+
+#ifndef INIT_MMAP
+#define INIT_MMAP() ((void)0)
+#endif
+
+#ifndef DIRECT_MMAP
+#define DIRECT_MMAP(prng, s) CALL_MMAP(prng, s)
+#endif
+
#ifndef CALL_MREMAP
#define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL)
#endif
@@ -459,6 +539,7 @@ struct malloc_state {
mchunkptr smallbins[(NSMALLBINS+1)*2];
tbinptr treebins[NTREEBINS];
msegment seg;
+ PRNGState *prng;
};
typedef struct malloc_state *mstate;
@@ -516,7 +597,7 @@ static int has_segment_link(mstate m, msegmentptr ss)
noncontiguous segments are added.
*/
#define TOP_FOOT_SIZE\
- (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
+ (align_offset(TWO_SIZE_T_SIZES)+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
/* ---------------------------- Indexing Bins ---------------------------- */
@@ -741,11 +822,11 @@ static int has_segment_link(mstate m, msegmentptr ss)
/* ----------------------- Direct-mmapping chunks ----------------------- */
-static void *direct_alloc(size_t nb)
+static void *direct_alloc(mstate m, size_t nb)
{
size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
if (LJ_LIKELY(mmsize > nb)) { /* Check for wrap around 0 */
- char *mm = (char *)(DIRECT_MMAP(mmsize));
+ char *mm = (char *)(DIRECT_MMAP(m->prng, mmsize));
if (mm != CMFAIL) {
size_t offset = align_offset(chunk2mem(mm));
size_t psize = mmsize - offset - DIRECT_FOOT_PAD;
@@ -757,6 +838,7 @@ static void *direct_alloc(size_t nb)
return chunk2mem(p);
}
}
+ UNUSED(m);
return NULL;
}
@@ -905,7 +987,7 @@ static void *alloc_sys(mstate m, size_t nb)
/* Directly map large chunks */
if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) {
- void *mem = direct_alloc(nb);
+ void *mem = direct_alloc(m, nb);
if (mem != 0)
return mem;
}
@@ -914,7 +996,7 @@ static void *alloc_sys(mstate m, size_t nb)
size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE;
size_t rsize = granularity_align(req);
if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */
- char *mp = (char *)(CALL_MMAP(rsize));
+ char *mp = (char *)(CALL_MMAP(m->prng, rsize));
if (mp != CMFAIL) {
tbase = mp;
tsize = rsize;
@@ -1141,12 +1223,13 @@ static void *tmalloc_small(mstate m, size_t nb)
/* ----------------------------------------------------------------------- */
-void *lj_alloc_create(void)
+void *lj_alloc_create(PRNGState *rs)
{
size_t tsize = DEFAULT_GRANULARITY;
char *tbase;
INIT_MMAP();
- tbase = (char *)(CALL_MMAP(tsize));
+ UNUSED(rs);
+ tbase = (char *)(CALL_MMAP(rs, tsize));
if (tbase != CMFAIL) {
size_t msize = pad_request(sizeof(struct malloc_state));
mchunkptr mn;
@@ -1165,6 +1248,12 @@ void *lj_alloc_create(void)
return NULL;
}
+void lj_alloc_setprng(void *msp, PRNGState *rs)
+{
+ mstate ms = (mstate)msp;
+ ms->prng = rs;
+}
+
void lj_alloc_destroy(void *msp)
{
mstate ms = (mstate)msp;
diff --git a/src/lj_alloc.h b/src/lj_alloc.h
index f87a7cf3..669f50b7 100644
--- a/src/lj_alloc.h
+++ b/src/lj_alloc.h
@@ -9,7 +9,8 @@
#include "lj_def.h"
#ifndef LUAJIT_USE_SYSMALLOC
-LJ_FUNC void *lj_alloc_create(void);
+LJ_FUNC void *lj_alloc_create(PRNGState *rs);
+LJ_FUNC void lj_alloc_setprng(void *msp, PRNGState *rs);
LJ_FUNC void lj_alloc_destroy(void *msp);
LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize);
#endif
diff --git a/src/lj_api.c b/src/lj_api.c
index 04a41792..d869ebf8 100644
--- a/src/lj_api.c
+++ b/src/lj_api.c
@@ -24,11 +24,12 @@
#include "lj_trace.h"
#include "lj_vm.h"
#include "lj_strscan.h"
+#include "lj_strfmt.h"
/* -- Common helper functions --------------------------------------------- */
-#define api_checknelems(L, n) api_check(L, (n) <= (L->top - L->base))
-#define api_checkvalidindex(L, i) api_check(L, (i) != niltv(L))
+#define lj_checkapi_slot(idx) \
+ lj_checkapi((idx) <= (L->top - L->base), "stack slot %d out of range", (idx))
static TValue *index2adr(lua_State *L, int idx)
{
@@ -36,7 +37,8 @@ static TValue *index2adr(lua_State *L, int idx)
TValue *o = L->base + (idx - 1);
return o < L->top ? o : niltv(L);
} else if (idx > LUA_REGISTRYINDEX) {
- api_check(L, idx != 0 && -idx <= L->top - L->base);
+ lj_checkapi(idx != 0 && -idx <= L->top - L->base,
+ "bad stack slot %d", idx);
return L->top + idx;
} else if (idx == LUA_GLOBALSINDEX) {
TValue *o = &G(L)->tmptv;
@@ -46,7 +48,8 @@ static TValue *index2adr(lua_State *L, int idx)
return registry(L);
} else {
GCfunc *fn = curr_func(L);
- api_check(L, fn->c.gct == ~LJ_TFUNC && !isluafunc(fn));
+ lj_checkapi(fn->c.gct == ~LJ_TFUNC && !isluafunc(fn),
+ "calling frame is not a C function");
if (idx == LUA_ENVIRONINDEX) {
TValue *o = &G(L)->tmptv;
settabV(L, o, tabref(fn->c.env));
@@ -58,13 +61,27 @@ static TValue *index2adr(lua_State *L, int idx)
}
}
-static TValue *stkindex2adr(lua_State *L, int idx)
+static LJ_AINLINE TValue *index2adr_check(lua_State *L, int idx)
+{
+ TValue *o = index2adr(L, idx);
+ lj_checkapi(o != niltv(L), "invalid stack slot %d", idx);
+ return o;
+}
+
+static TValue *index2adr_stack(lua_State *L, int idx)
{
if (idx > 0) {
TValue *o = L->base + (idx - 1);
+ if (o < L->top) {
+ return o;
+ } else {
+ lj_checkapi(0, "invalid stack slot %d", idx);
+ return niltv(L);
+ }
return o < L->top ? o : niltv(L);
} else {
- api_check(L, idx != 0 && -idx <= L->top - L->base);
+ lj_checkapi(idx != 0 && -idx <= L->top - L->base,
+ "invalid stack slot %d", idx);
return L->top + idx;
}
}
@@ -98,17 +115,24 @@ LUALIB_API void luaL_checkstack(lua_State *L, int size, const char *msg)
lj_err_callerv(L, LJ_ERR_STKOVM, msg);
}
-LUA_API void lua_xmove(lua_State *from, lua_State *to, int n)
+LUA_API void lua_xmove(lua_State *L, lua_State *to, int n)
{
TValue *f, *t;
- if (from == to) return;
- api_checknelems(from, n);
- api_check(from, G(from) == G(to));
+ if (L == to) return;
+ lj_checkapi_slot(n);
+ lj_checkapi(G(L) == G(to), "move across global states");
lj_state_checkstack(to, (MSize)n);
- f = from->top;
+ f = L->top;
t = to->top = to->top + n;
while (--n >= 0) copyTV(to, --t, --f);
- from->top = f;
+ L->top = f;
+}
+
+LUA_API const lua_Number *lua_version(lua_State *L)
+{
+ static const lua_Number version = LUA_VERSION_NUM;
+ UNUSED(L);
+ return &version;
}
/* -- Stack manipulation -------------------------------------------------- */
@@ -121,7 +145,7 @@ LUA_API int lua_gettop(lua_State *L)
LUA_API void lua_settop(lua_State *L, int idx)
{
if (idx >= 0) {
- api_check(L, idx <= tvref(L->maxstack) - L->base);
+ lj_checkapi(idx <= tvref(L->maxstack) - L->base, "bad stack slot %d", idx);
if (L->base + idx > L->top) {
if (L->base + idx >= tvref(L->maxstack))
lj_state_growstack(L, (MSize)idx - (MSize)(L->top - L->base));
@@ -130,51 +154,58 @@ LUA_API void lua_settop(lua_State *L, int idx)
L->top = L->base + idx;
}
} else {
- api_check(L, -(idx+1) <= (L->top - L->base));
+ lj_checkapi(-(idx+1) <= (L->top - L->base), "bad stack slot %d", idx);
L->top += idx+1; /* Shrinks top (idx < 0). */
}
}
LUA_API void lua_remove(lua_State *L, int idx)
{
- TValue *p = stkindex2adr(L, idx);
- api_checkvalidindex(L, p);
+ TValue *p = index2adr_stack(L, idx);
while (++p < L->top) copyTV(L, p-1, p);
L->top--;
}
LUA_API void lua_insert(lua_State *L, int idx)
{
- TValue *q, *p = stkindex2adr(L, idx);
- api_checkvalidindex(L, p);
+ TValue *q, *p = index2adr_stack(L, idx);
for (q = L->top; q > p; q--) copyTV(L, q, q-1);
copyTV(L, p, L->top);
}
-LUA_API void lua_replace(lua_State *L, int idx)
+static void copy_slot(lua_State *L, TValue *f, int idx)
{
- api_checknelems(L, 1);
if (idx == LUA_GLOBALSINDEX) {
- api_check(L, tvistab(L->top-1));
+ lj_checkapi(tvistab(f), "stack slot %d is not a table", idx);
/* NOBARRIER: A thread (i.e. L) is never black. */
- setgcref(L->env, obj2gco(tabV(L->top-1)));
+ setgcref(L->env, obj2gco(tabV(f)));
} else if (idx == LUA_ENVIRONINDEX) {
GCfunc *fn = curr_func(L);
if (fn->c.gct != ~LJ_TFUNC)
lj_err_msg(L, LJ_ERR_NOENV);
- api_check(L, tvistab(L->top-1));
- setgcref(fn->c.env, obj2gco(tabV(L->top-1)));
- lj_gc_barrier(L, fn, L->top-1);
+ lj_checkapi(tvistab(f), "stack slot %d is not a table", idx);
+ setgcref(fn->c.env, obj2gco(tabV(f)));
+ lj_gc_barrier(L, fn, f);
} else {
- TValue *o = index2adr(L, idx);
- api_checkvalidindex(L, o);
- copyTV(L, o, L->top-1);
+ TValue *o = index2adr_check(L, idx);
+ copyTV(L, o, f);
if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */
- lj_gc_barrier(L, curr_func(L), L->top-1);
+ lj_gc_barrier(L, curr_func(L), f);
}
+}
+
+LUA_API void lua_replace(lua_State *L, int idx)
+{
+ lj_checkapi_slot(1);
+ copy_slot(L, L->top - 1, idx);
L->top--;
}
+LUA_API void lua_copy(lua_State *L, int fromidx, int toidx)
+{
+ copy_slot(L, index2adr(L, fromidx), toidx);
+}
+
LUA_API void lua_pushvalue(lua_State *L, int idx)
{
copyTV(L, L->top, index2adr(L, idx));
@@ -188,7 +219,7 @@ LUA_API int lua_type(lua_State *L, int idx)
cTValue *o = index2adr(L, idx);
if (tvisnumber(o)) {
return LUA_TNUMBER;
-#if LJ_64
+#if LJ_64 && !LJ_GC64
} else if (tvislightud(o)) {
return LUA_TLIGHTUSERDATA;
#endif
@@ -201,7 +232,7 @@ LUA_API int lua_type(lua_State *L, int idx)
#else
int tt = (int)(((t < 8 ? 0x98042110u : 0x75a06u) >> 4*(t&7)) & 15u);
#endif
- lua_assert(tt != LUA_TNIL || tvisnil(o));
+ lj_assertL(tt != LUA_TNIL || tvisnil(o), "bad tag conversion");
return tt;
}
}
@@ -268,7 +299,7 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
return 0;
} else if (tvispri(o1)) {
return o1 != niltv(L) && o2 != niltv(L);
-#if LJ_64
+#if LJ_64 && !LJ_GC64
} else if (tvislightud(o1)) {
return o1->u64 == o2->u64;
#endif
@@ -283,8 +314,8 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2)
} else {
L->top = base+2;
lj_vm_call(L, base, 1+1);
- L->top -= 2;
- return tvistruecond(L->top+1);
+ L->top -= 2+LJ_FR2;
+ return tvistruecond(L->top+1+LJ_FR2);
}
}
}
@@ -306,8 +337,8 @@ LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2)
} else {
L->top = base+2;
lj_vm_call(L, base, 1+1);
- L->top -= 2;
- return tvistruecond(L->top+1);
+ L->top -= 2+LJ_FR2;
+ return tvistruecond(L->top+1+LJ_FR2);
}
}
}
@@ -324,6 +355,22 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx)
return 0;
}
+LUA_API lua_Number lua_tonumberx(lua_State *L, int idx, int *ok)
+{
+ cTValue *o = index2adr(L, idx);
+ TValue tmp;
+ if (LJ_LIKELY(tvisnumber(o))) {
+ if (ok) *ok = 1;
+ return numberVnum(o);
+ } else if (tvisstr(o) && lj_strscan_num(strV(o), &tmp)) {
+ if (ok) *ok = 1;
+ return numV(&tmp);
+ } else {
+ if (ok) *ok = 0;
+ return 0;
+ }
+}
+
LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx)
{
cTValue *o = index2adr(L, idx);
@@ -361,9 +408,38 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp)))
return 0;
if (tvisint(&tmp))
- return (lua_Integer)intV(&tmp);
+ return intV(&tmp);
+ n = numV(&tmp);
+ }
+#if LJ_64
+ return (lua_Integer)n;
+#else
+ return lj_num2int(n);
+#endif
+}
+
+LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok)
+{
+ cTValue *o = index2adr(L, idx);
+ TValue tmp;
+ lua_Number n;
+ if (LJ_LIKELY(tvisint(o))) {
+ if (ok) *ok = 1;
+ return intV(o);
+ } else if (LJ_LIKELY(tvisnum(o))) {
+ n = numV(o);
+ } else {
+ if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) {
+ if (ok) *ok = 0;
+ return 0;
+ }
+ if (tvisint(&tmp)) {
+ if (ok) *ok = 1;
+ return intV(&tmp);
+ }
n = numV(&tmp);
}
+ if (ok) *ok = 1;
#if LJ_64
return (lua_Integer)n;
#else
@@ -434,7 +510,7 @@ LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len)
} else if (tvisnumber(o)) {
lj_gc_check(L);
o = index2adr(L, idx); /* GC may move the stack. */
- s = lj_str_fromnumber(L, o);
+ s = lj_strfmt_number(L, o);
setstrV(L, o, s);
} else {
if (len != NULL) *len = 0;
@@ -453,7 +529,7 @@ LUALIB_API const char *luaL_checklstring(lua_State *L, int idx, size_t *len)
} else if (tvisnumber(o)) {
lj_gc_check(L);
o = index2adr(L, idx); /* GC may move the stack. */
- s = lj_str_fromnumber(L, o);
+ s = lj_strfmt_number(L, o);
setstrV(L, o, s);
} else {
lj_err_argt(L, idx, LUA_TSTRING);
@@ -475,7 +551,7 @@ LUALIB_API const char *luaL_optlstring(lua_State *L, int idx,
} else if (tvisnumber(o)) {
lj_gc_check(L);
o = index2adr(L, idx); /* GC may move the stack. */
- s = lj_str_fromnumber(L, o);
+ s = lj_strfmt_number(L, o);
setstrV(L, o, s);
} else {
lj_err_argt(L, idx, LUA_TSTRING);
@@ -507,7 +583,7 @@ LUA_API size_t lua_objlen(lua_State *L, int idx)
} else if (tvisudata(o)) {
return udataV(o)->len;
} else if (tvisnumber(o)) {
- GCstr *s = lj_str_fromnumber(L, o);
+ GCstr *s = lj_strfmt_number(L, o);
setstrV(L, o, s);
return s->len;
} else {
@@ -532,7 +608,7 @@ LUA_API void *lua_touserdata(lua_State *L, int idx)
if (tvisudata(o))
return uddata(udataV(o));
else if (tvislightud(o))
- return lightudV(o);
+ return lightudV(G(L), o);
else
return NULL;
}
@@ -545,17 +621,7 @@ LUA_API lua_State *lua_tothread(lua_State *L, int idx)
LUA_API const void *lua_topointer(lua_State *L, int idx)
{
- cTValue *o = index2adr(L, idx);
- if (tvisudata(o))
- return uddata(udataV(o));
- else if (tvislightud(o))
- return lightudV(o);
- else if (tviscdata(o))
- return cdataptr(cdataV(o));
- else if (tvisgcv(o))
- return gcV(o);
- else
- return NULL;
+ return lj_obj_ptr(G(L), index2adr(L, idx));
}
/* -- Stack setters (object creation) ------------------------------------- */
@@ -606,7 +672,7 @@ LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt,
va_list argp)
{
lj_gc_check(L);
- return lj_str_pushvf(L, fmt, argp);
+ return lj_strfmt_pushvf(L, fmt, argp);
}
LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
@@ -615,7 +681,7 @@ LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...)
va_list argp;
lj_gc_check(L);
va_start(argp, fmt);
- ret = lj_str_pushvf(L, fmt, argp);
+ ret = lj_strfmt_pushvf(L, fmt, argp);
va_end(argp);
return ret;
}
@@ -624,14 +690,14 @@ LUA_API void lua_pushcclosure(lua_State *L, lua_CFunction f, int n)
{
GCfunc *fn;
lj_gc_check(L);
- api_checknelems(L, n);
+ lj_checkapi_slot(n);
fn = lj_func_newC(L, (MSize)n, getcurrenv(L));
fn->c.f = f;
L->top -= n;
while (n--)
copyTV(L, &fn->c.upvalue[n], L->top+n);
setfuncV(L, L->top, fn);
- lua_assert(iswhite(obj2gco(fn)));
+ lj_assertL(iswhite(obj2gco(fn)), "new GC object is not white");
incr_top(L);
}
@@ -643,16 +709,17 @@ LUA_API void lua_pushboolean(lua_State *L, int b)
LUA_API void lua_pushlightuserdata(lua_State *L, void *p)
{
- setlightudV(L->top, checklightudptr(L, p));
+#if LJ_64
+ p = lj_lightud_intern(L, p);
+#endif
+ setrawlightudV(L->top, p);
incr_top(L);
}
LUA_API void lua_createtable(lua_State *L, int narray, int nrec)
{
- GCtab *t;
lj_gc_check(L);
- t = lj_tab_new(L, (uint32_t)(narray > 0 ? narray+1 : 0), hsize2hbits(nrec));
- settabV(L, L->top, t);
+ settabV(L, L->top, lj_tab_new_ah(L, narray, nrec));
incr_top(L);
}
@@ -703,7 +770,7 @@ LUA_API void *lua_newuserdata(lua_State *L, size_t size)
LUA_API void lua_concat(lua_State *L, int n)
{
- api_checknelems(L, n);
+ lj_checkapi_slot(n);
if (n >= 2) {
n--;
do {
@@ -715,8 +782,8 @@ LUA_API void lua_concat(lua_State *L, int n)
n -= (int)(L->top - top);
L->top = top+2;
lj_vm_call(L, top, 1+1);
- L->top--;
- copyTV(L, L->top-1, L->top);
+ L->top -= 1+LJ_FR2;
+ copyTV(L, L->top-1, L->top+LJ_FR2);
} while (--n > 0);
} else if (n == 0) { /* Push empty string. */
setstrV(L, L->top, &G(L)->strempty);
@@ -729,30 +796,28 @@ LUA_API void lua_concat(lua_State *L, int n)
LUA_API void lua_gettable(lua_State *L, int idx)
{
- cTValue *v, *t = index2adr(L, idx);
- api_checkvalidindex(L, t);
- v = lj_meta_tget(L, t, L->top-1);
+ cTValue *t = index2adr_check(L, idx);
+ cTValue *v = lj_meta_tget(L, t, L->top-1);
if (v == NULL) {
L->top += 2;
lj_vm_call(L, L->top-2, 1+1);
- L->top -= 2;
- v = L->top+1;
+ L->top -= 2+LJ_FR2;
+ v = L->top+1+LJ_FR2;
}
copyTV(L, L->top-1, v);
}
LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
{
- cTValue *v, *t = index2adr(L, idx);
+ cTValue *v, *t = index2adr_check(L, idx);
TValue key;
- api_checkvalidindex(L, t);
setstrV(L, &key, lj_str_newz(L, k));
v = lj_meta_tget(L, t, &key);
if (v == NULL) {
L->top += 2;
lj_vm_call(L, L->top-2, 1+1);
- L->top -= 2;
- v = L->top+1;
+ L->top -= 2+LJ_FR2;
+ v = L->top+1+LJ_FR2;
}
copyTV(L, L->top, v);
incr_top(L);
@@ -761,14 +826,14 @@ LUA_API void lua_getfield(lua_State *L, int idx, const char *k)
LUA_API void lua_rawget(lua_State *L, int idx)
{
cTValue *t = index2adr(L, idx);
- api_check(L, tvistab(t));
+ lj_checkapi(tvistab(t), "stack slot %d is not a table", idx);
copyTV(L, L->top-1, lj_tab_get(L, tabV(t), L->top-1));
}
LUA_API void lua_rawgeti(lua_State *L, int idx, int n)
{
cTValue *v, *t = index2adr(L, idx);
- api_check(L, tvistab(t));
+ lj_checkapi(tvistab(t), "stack slot %d is not a table", idx);
v = lj_tab_getint(tabV(t), n);
if (v) {
copyTV(L, L->top, v);
@@ -810,8 +875,7 @@ LUALIB_API int luaL_getmetafield(lua_State *L, int idx, const char *field)
LUA_API void lua_getfenv(lua_State *L, int idx)
{
- cTValue *o = index2adr(L, idx);
- api_checkvalidindex(L, o);
+ cTValue *o = index2adr_check(L, idx);
if (tvisfunc(o)) {
settabV(L, L->top, tabref(funcV(o)->c.env));
} else if (tvisudata(o)) {
@@ -828,12 +892,14 @@ LUA_API int lua_next(lua_State *L, int idx)
{
cTValue *t = index2adr(L, idx);
int more;
- api_check(L, tvistab(t));
- more = lj_tab_next(L, tabV(t), L->top-1);
- if (more) {
+ lj_checkapi(tvistab(t), "stack slot %d is not a table", idx);
+ more = lj_tab_next(tabV(t), L->top-1, L->top-1);
+ if (more > 0) {
incr_top(L); /* Return new key and value slot. */
- } else { /* End of traversal. */
+ } else if (!more) { /* End of traversal. */
L->top--; /* Remove key slot. */
+ } else {
+ lj_err_msg(L, LJ_ERR_NEXTIDX);
}
return more;
}
@@ -854,7 +920,7 @@ LUA_API void *lua_upvalueid(lua_State *L, int idx, int n)
{
GCfunc *fn = funcV(index2adr(L, idx));
n--;
- api_check(L, (uint32_t)n < fn->l.nupvalues);
+ lj_checkapi((uint32_t)n < fn->l.nupvalues, "bad upvalue %d", n);
return isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) :
(void *)&fn->c.upvalue[n];
}
@@ -864,13 +930,15 @@ LUA_API void lua_upvaluejoin(lua_State *L, int idx1, int n1, int idx2, int n2)
GCfunc *fn1 = funcV(index2adr(L, idx1));
GCfunc *fn2 = funcV(index2adr(L, idx2));
n1--; n2--;
- api_check(L, isluafunc(fn1) && (uint32_t)n1 < fn1->l.nupvalues);
- api_check(L, isluafunc(fn2) && (uint32_t)n2 < fn2->l.nupvalues);
+ lj_checkapi(isluafunc(fn1), "stack slot %d is not a Lua function", idx1);
+ lj_checkapi(isluafunc(fn2), "stack slot %d is not a Lua function", idx2);
+ lj_checkapi((uint32_t)n1 < fn1->l.nupvalues, "bad upvalue %d", n1+1);
+ lj_checkapi((uint32_t)n2 < fn2->l.nupvalues, "bad upvalue %d", n2+1);
setgcrefr(fn1->l.uvptr[n1], fn2->l.uvptr[n2]);
lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1]));
}
-LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
+LUALIB_API void *luaL_testudata(lua_State *L, int idx, const char *tname)
{
cTValue *o = index2adr(L, idx);
if (tvisudata(o)) {
@@ -879,8 +947,14 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable))
return uddata(ud);
}
- lj_err_argtype(L, idx, tname);
- return NULL; /* unreachable */
+ return NULL; /* value is not a userdata with a metatable */
+}
+
+LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
+{
+ void *p = luaL_testudata(L, idx, tname);
+ if (!p) lj_err_argtype(L, idx, tname);
+ return p;
}
/* -- Object setters ------------------------------------------------------ */
@@ -888,19 +962,19 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname)
LUA_API void lua_settable(lua_State *L, int idx)
{
TValue *o;
- cTValue *t = index2adr(L, idx);
- api_checknelems(L, 2);
- api_checkvalidindex(L, t);
+ cTValue *t = index2adr_check(L, idx);
+ lj_checkapi_slot(2);
o = lj_meta_tset(L, t, L->top-2);
if (o) {
/* NOBARRIER: lj_meta_tset ensures the table is not black. */
- copyTV(L, o, L->top-1);
L->top -= 2;
+ copyTV(L, o, L->top+1);
} else {
- L->top += 3;
- copyTV(L, L->top-1, L->top-6);
- lj_vm_call(L, L->top-3, 0+1);
- L->top -= 3;
+ TValue *base = L->top;
+ copyTV(L, base+2, base-3-2*LJ_FR2);
+ L->top = base+3;
+ lj_vm_call(L, base, 0+1);
+ L->top -= 3+LJ_FR2;
}
}
@@ -908,20 +982,19 @@ LUA_API void lua_setfield(lua_State *L, int idx, const char *k)
{
TValue *o;
TValue key;
- cTValue *t = index2adr(L, idx);
- api_checknelems(L, 1);
- api_checkvalidindex(L, t);
+ cTValue *t = index2adr_check(L, idx);
+ lj_checkapi_slot(1);
setstrV(L, &key, lj_str_newz(L, k));
o = lj_meta_tset(L, t, &key);
if (o) {
- L->top--;
/* NOBARRIER: lj_meta_tset ensures the table is not black. */
- copyTV(L, o, L->top);
+ copyTV(L, o, --L->top);
} else {
- L->top += 3;
- copyTV(L, L->top-1, L->top-6);
- lj_vm_call(L, L->top-3, 0+1);
- L->top -= 2;
+ TValue *base = L->top;
+ copyTV(L, base+2, base-3-2*LJ_FR2);
+ L->top = base+3;
+ lj_vm_call(L, base, 0+1);
+ L->top -= 2+LJ_FR2;
}
}
@@ -929,7 +1002,7 @@ LUA_API void lua_rawset(lua_State *L, int idx)
{
GCtab *t = tabV(index2adr(L, idx));
TValue *dst, *key;
- api_checknelems(L, 2);
+ lj_checkapi_slot(2);
key = L->top-2;
dst = lj_tab_set(L, t, key);
copyTV(L, dst, key+1);
@@ -941,7 +1014,7 @@ LUA_API void lua_rawseti(lua_State *L, int idx, int n)
{
GCtab *t = tabV(index2adr(L, idx));
TValue *dst, *src;
- api_checknelems(L, 1);
+ lj_checkapi_slot(1);
dst = lj_tab_setint(L, t, n);
src = L->top-1;
copyTV(L, dst, src);
@@ -953,13 +1026,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx)
{
global_State *g;
GCtab *mt;
- cTValue *o = index2adr(L, idx);
- api_checknelems(L, 1);
- api_checkvalidindex(L, o);
+ cTValue *o = index2adr_check(L, idx);
+ lj_checkapi_slot(1);
if (tvisnil(L->top-1)) {
mt = NULL;
} else {
- api_check(L, tvistab(L->top-1));
+ lj_checkapi(tvistab(L->top-1), "top stack slot is not a table");
mt = tabV(L->top-1);
}
g = G(L);
@@ -988,13 +1060,18 @@ LUA_API int lua_setmetatable(lua_State *L, int idx)
return 1;
}
+LUALIB_API void luaL_setmetatable(lua_State *L, const char *tname)
+{
+ lua_getfield(L, LUA_REGISTRYINDEX, tname);
+ lua_setmetatable(L, -2);
+}
+
LUA_API int lua_setfenv(lua_State *L, int idx)
{
- cTValue *o = index2adr(L, idx);
+ cTValue *o = index2adr_check(L, idx);
GCtab *t;
- api_checknelems(L, 1);
- api_checkvalidindex(L, o);
- api_check(L, tvistab(L->top-1));
+ lj_checkapi_slot(1);
+ lj_checkapi(tvistab(L->top-1), "top stack slot is not a table");
t = tabV(L->top-1);
if (tvisfunc(o)) {
setgcref(funcV(o)->c.env, obj2gco(t));
@@ -1017,7 +1094,7 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n)
TValue *val;
GCobj *o;
const char *name;
- api_checknelems(L, 1);
+ lj_checkapi_slot(1);
name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val, &o);
if (name) {
L->top--;
@@ -1029,11 +1106,25 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n)
/* -- Calls --------------------------------------------------------------- */
+#if LJ_FR2
+static TValue *api_call_base(lua_State *L, int nargs)
+{
+ TValue *o = L->top, *base = o - nargs;
+ L->top = o+1;
+ for (; o > base; o--) copyTV(L, o, o-1);
+ setnilV(o);
+ return o+1;
+}
+#else
+#define api_call_base(L, nargs) (L->top - (nargs))
+#endif
+
LUA_API void lua_call(lua_State *L, int nargs, int nresults)
{
- api_check(L, L->status == 0 || L->status == LUA_ERRERR);
- api_checknelems(L, nargs+1);
- lj_vm_call(L, L->top - nargs, nresults+1);
+ lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR,
+ "thread called in wrong state %d", L->status);
+ lj_checkapi_slot(nargs+1);
+ lj_vm_call(L, api_call_base(L, nargs), nresults+1);
}
LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
@@ -1042,16 +1133,16 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
uint8_t oldh = hook_save(g);
ptrdiff_t ef;
int status;
- api_check(L, L->status == 0 || L->status == LUA_ERRERR);
- api_checknelems(L, nargs+1);
+ lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR,
+ "thread called in wrong state %d", L->status);
+ lj_checkapi_slot(nargs+1);
if (errfunc == 0) {
ef = 0;
} else {
- cTValue *o = stkindex2adr(L, errfunc);
- api_checkvalidindex(L, o);
+ cTValue *o = index2adr_stack(L, errfunc);
ef = savestack(L, o);
}
- status = lj_vm_pcall(L, L->top - nargs, nresults+1, ef);
+ status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef);
if (status) hook_restore(g, oldh);
return status;
}
@@ -1059,12 +1150,17 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc)
static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud)
{
GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L));
+ TValue *top = L->top;
fn->c.f = func;
- setfuncV(L, L->top, fn);
- setlightudV(L->top+1, checklightudptr(L, ud));
+ setfuncV(L, top++, fn);
+ if (LJ_FR2) setnilV(top++);
+#if LJ_64
+ ud = lj_lightud_intern(L, ud);
+#endif
+ setrawlightudV(top++, ud);
cframe_nres(L->cframe) = 1+0; /* Zero results. */
- L->top += 2;
- return L->top-1; /* Now call the newly allocated C function. */
+ L->top = top;
+ return top-1; /* Now call the newly allocated C function. */
}
LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
@@ -1072,7 +1168,8 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
global_State *g = G(L);
uint8_t oldh = hook_save(g);
int status;
- api_check(L, L->status == 0 || L->status == LUA_ERRERR);
+ lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR,
+ "thread called in wrong state %d", L->status);
status = lj_vm_cpcall(L, func, ud, cpcall);
if (status) hook_restore(g, oldh);
return status;
@@ -1081,10 +1178,11 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud)
LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
{
if (luaL_getmetafield(L, idx, field)) {
- TValue *base = L->top--;
- copyTV(L, base, index2adr(L, idx));
- L->top = base+1;
- lj_vm_call(L, base, 1+1);
+ TValue *top = L->top--;
+ if (LJ_FR2) setnilV(top++);
+ copyTV(L, top++, index2adr(L, idx));
+ L->top = top;
+ lj_vm_call(L, top-1, 1+1);
return 1;
}
return 0;
@@ -1092,6 +1190,11 @@ LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field)
/* -- Coroutine yield and resume ------------------------------------------ */
+LUA_API int lua_isyieldable(lua_State *L)
+{
+ return cframe_canyield(L->cframe);
+}
+
LUA_API int lua_yield(lua_State *L, int nresults)
{
void *cf = L->cframe;
@@ -1111,13 +1214,16 @@ LUA_API int lua_yield(lua_State *L, int nresults)
} else { /* Yield from hook: add a pseudo-frame. */
TValue *top = L->top;
hook_leave(g);
- top->u64 = cframe_multres(cf);
- setcont(top+1, lj_cont_hook);
- setframe_pc(top+1, cframe_pc(cf)-1);
- setframe_gc(top+2, obj2gco(L));
- setframe_ftsz(top+2, (int)((char *)(top+3)-(char *)L->base)+FRAME_CONT);
- L->top = L->base = top+3;
-#if LJ_TARGET_X64
+ (top++)->u64 = cframe_multres(cf);
+ setcont(top, lj_cont_hook);
+ if (LJ_FR2) top++;
+ setframe_pc(top, cframe_pc(cf)-1);
+ top++;
+ setframe_gc(top, obj2gco(L), LJ_TTHREAD);
+ if (LJ_FR2) top++;
+ setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT);
+ L->top = L->base = top+1;
+#if ((defined(__GNUC__) || defined(__clang__)) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND) || LJ_TARGET_WINDOWS
lj_err_throw(L, LUA_YIELD);
#else
L->cframe = NULL;
@@ -1133,7 +1239,9 @@ LUA_API int lua_yield(lua_State *L, int nresults)
LUA_API int lua_resume(lua_State *L, int nargs)
{
if (L->cframe == NULL && L->status <= LUA_YIELD)
- return lj_vm_resume(L, L->top - nargs, 0, 0);
+ return lj_vm_resume(L,
+ L->status == LUA_OK ? api_call_base(L, nargs) : L->top - nargs,
+ 0, 0);
L->top = L->base;
setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP));
incr_top(L);
@@ -1163,7 +1271,7 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
res = (int)(g->gc.total & 0x3ff);
break;
case LUA_GCSTEP: {
- MSize a = (MSize)data << 10;
+ GCSize a = (GCSize)data << 10;
g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0;
while (g->gc.total >= g->gc.threshold)
if (lj_gc_step(L) > 0) {
@@ -1180,6 +1288,9 @@ LUA_API int lua_gc(lua_State *L, int what, int data)
res = (int)(g->gc.stepmul);
g->gc.stepmul = (MSize)data;
break;
+ case LUA_GCISRUNNING:
+ res = (g->gc.threshold != LJ_MAX_MEM);
+ break;
default:
res = -1; /* Invalid option. */
}
diff --git a/src/lj_arch.h b/src/lj_arch.h
index db46f886..882c99cb 100644
--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -8,6 +8,8 @@
#include "lua.h"
+/* -- Target definitions -------------------------------------------------- */
+
/* Target endianess. */
#define LUAJIT_LE 0
#define LUAJIT_BE 1
@@ -19,12 +21,16 @@
#define LUAJIT_ARCH_x64 2
#define LUAJIT_ARCH_ARM 3
#define LUAJIT_ARCH_arm 3
-#define LUAJIT_ARCH_PPC 4
-#define LUAJIT_ARCH_ppc 4
-#define LUAJIT_ARCH_PPCSPE 5
-#define LUAJIT_ARCH_ppcspe 5
+#define LUAJIT_ARCH_ARM64 4
+#define LUAJIT_ARCH_arm64 4
+#define LUAJIT_ARCH_PPC 5
+#define LUAJIT_ARCH_ppc 5
#define LUAJIT_ARCH_MIPS 6
#define LUAJIT_ARCH_mips 6
+#define LUAJIT_ARCH_MIPS32 6
+#define LUAJIT_ARCH_mips32 6
+#define LUAJIT_ARCH_MIPS64 7
+#define LUAJIT_ARCH_mips64 7
/* Target OS. */
#define LUAJIT_OS_OTHER 0
@@ -34,6 +40,14 @@
#define LUAJIT_OS_BSD 4
#define LUAJIT_OS_POSIX 5
+/* Number mode. */
+#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */
+#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */
+#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */
+#define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */
+
+/* -- Target detection ---------------------------------------------------- */
+
/* Select native target if no target defined. */
#ifndef LUAJIT_TARGET
@@ -43,14 +57,14 @@
#define LUAJIT_TARGET LUAJIT_ARCH_X64
#elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM)
#define LUAJIT_TARGET LUAJIT_ARCH_ARM
+#elif defined(__aarch64__)
+#define LUAJIT_TARGET LUAJIT_ARCH_ARM64
#elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC)
-#ifdef __NO_FPRS__
-#define LUAJIT_TARGET LUAJIT_ARCH_PPCSPE
-#else
#define LUAJIT_TARGET LUAJIT_ARCH_PPC
-#endif
+#elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64)
+#define LUAJIT_TARGET LUAJIT_ARCH_MIPS64
#elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS)
-#define LUAJIT_TARGET LUAJIT_ARCH_MIPS
+#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32
#else
#error "No support for this architecture (yet)"
#endif
@@ -65,12 +79,16 @@
#elif defined(__linux__)
#define LUAJIT_OS LUAJIT_OS_LINUX
#elif defined(__MACH__) && defined(__APPLE__)
+#include "TargetConditionals.h"
#define LUAJIT_OS LUAJIT_OS_OSX
#elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \
defined(__NetBSD__) || defined(__OpenBSD__) || \
- defined(__DragonFly__)) && !defined(__ORBIS__)
+ defined(__DragonFly__)) && !defined(__ORBIS__) && !defined(__PROSPERO__)
#define LUAJIT_OS LUAJIT_OS_BSD
#elif (defined(__sun__) && defined(__svr4__))
+#define LJ_TARGET_SOLARIS 1
+#define LUAJIT_OS LUAJIT_OS_POSIX
+#elif defined(__HAIKU__)
#define LUAJIT_OS LUAJIT_OS_POSIX
#elif defined(__CYGWIN__)
#define LJ_TARGET_CYGWIN 1
@@ -99,10 +117,16 @@
#define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS)
#define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX)
#define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX)
-#define LJ_TARGET_IOS (LJ_TARGET_OSX && LUAJIT_TARGET == LUAJIT_ARCH_ARM)
+#define LJ_TARGET_BSD (LUAJIT_OS == LUAJIT_OS_BSD)
#define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS)
#define LJ_TARGET_DLOPEN LJ_TARGET_POSIX
+#if TARGET_OS_IPHONE
+#define LJ_TARGET_IOS 1
+#else
+#define LJ_TARGET_IOS 0
+#endif
+
#ifdef __CELLOS_LV2__
#define LJ_TARGET_PS3 1
#define LJ_TARGET_CONSOLE 1
@@ -115,6 +139,13 @@
#define NULL ((void*)0)
#endif
+#ifdef __PROSPERO__
+#define LJ_TARGET_PS5 1
+#define LJ_TARGET_CONSOLE 1
+#undef NULL
+#define NULL ((void*)0)
+#endif
+
#ifdef __psp2__
#define LJ_TARGET_PSVITA 1
#define LJ_TARGET_CONSOLE 1
@@ -125,10 +156,27 @@
#define LJ_TARGET_CONSOLE 1
#endif
-#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */
-#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */
-#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */
-#define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */
+#ifdef _DURANGO
+#define LJ_TARGET_XBOXONE 1
+#define LJ_TARGET_CONSOLE 1
+#define LJ_TARGET_GC64 1
+#endif
+
+#ifdef __NX__
+#define LJ_TARGET_NX 1
+#define LJ_TARGET_CONSOLE 1
+#undef NULL
+#define NULL ((void*)0)
+#endif
+
+#ifdef _UWP
+#define LJ_TARGET_UWP 1
+#if LUAJIT_TARGET == LUAJIT_ARCH_X64
+#define LJ_TARGET_GC64 1
+#endif
+#endif
+
+/* -- Arch-specific settings ---------------------------------------------- */
/* Set target architecture properties. */
#if LUAJIT_TARGET == LUAJIT_ARCH_X86
@@ -136,14 +184,10 @@
#define LJ_ARCH_NAME "x86"
#define LJ_ARCH_BITS 32
#define LJ_ARCH_ENDIAN LUAJIT_LE
-#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
-#define LJ_ABI_WIN 1
-#else
-#define LJ_ABI_WIN 0
-#endif
#define LJ_TARGET_X86 1
#define LJ_TARGET_X86ORX64 1
#define LJ_TARGET_EHRETREG 0
+#define LJ_TARGET_EHRAREG 8
#define LJ_TARGET_MASKSHIFT 1
#define LJ_TARGET_MASKROT 1
#define LJ_TARGET_UNALIGNED 1
@@ -154,19 +198,20 @@
#define LJ_ARCH_NAME "x64"
#define LJ_ARCH_BITS 64
#define LJ_ARCH_ENDIAN LUAJIT_LE
-#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
-#define LJ_ABI_WIN 1
-#else
-#define LJ_ABI_WIN 0
-#endif
#define LJ_TARGET_X64 1
#define LJ_TARGET_X86ORX64 1
#define LJ_TARGET_EHRETREG 0
+#define LJ_TARGET_EHRAREG 16
#define LJ_TARGET_JUMPRANGE 31 /* +-2^31 = +-2GB */
#define LJ_TARGET_MASKSHIFT 1
#define LJ_TARGET_MASKROT 1
#define LJ_TARGET_UNALIGNED 1
#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL
+#ifndef LUAJIT_DISABLE_GC64
+#define LJ_TARGET_GC64 1
+#elif LJ_TARGET_OSX
+#error "macOS requires GC64 -- don't disable it"
+#endif
#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM
@@ -182,40 +227,105 @@
#define LJ_ABI_EABI 1
#define LJ_TARGET_ARM 1
#define LJ_TARGET_EHRETREG 0
+#define LJ_TARGET_EHRAREG 14
#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
#define LJ_TARGET_MASKSHIFT 0
#define LJ_TARGET_MASKROT 1
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
-#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__
+#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
#define LJ_ARCH_VERSION 80
-#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
+#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
#define LJ_ARCH_VERSION 70
#elif __ARM_ARCH_6T2__
#define LJ_ARCH_VERSION 61
-#elif __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__
+#elif __ARM_ARCH == 6 || __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__
#define LJ_ARCH_VERSION 60
#else
#define LJ_ARCH_VERSION 50
#endif
+#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64
+
+#define LJ_ARCH_BITS 64
+#if defined(__AARCH64EB__)
+#define LJ_ARCH_NAME "arm64be"
+#define LJ_ARCH_ENDIAN LUAJIT_BE
+#else
+#define LJ_ARCH_NAME "arm64"
+#define LJ_ARCH_ENDIAN LUAJIT_LE
+#endif
+#define LJ_TARGET_ARM64 1
+#define LJ_TARGET_EHRETREG 0
+#define LJ_TARGET_EHRAREG 30
+#define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */
+#define LJ_TARGET_MASKSHIFT 1
+#define LJ_TARGET_MASKROT 1
+#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
+#define LJ_TARGET_GC64 1
+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
+
+#define LJ_ARCH_VERSION 80
+
#elif LUAJIT_TARGET == LUAJIT_ARCH_PPC
-#define LJ_ARCH_NAME "ppc"
+#ifndef LJ_ARCH_ENDIAN
+#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
+#define LJ_ARCH_ENDIAN LUAJIT_LE
+#else
+#define LJ_ARCH_ENDIAN LUAJIT_BE
+#endif
+#endif
+
#if _LP64
#define LJ_ARCH_BITS 64
+#if LJ_ARCH_ENDIAN == LUAJIT_LE
+#define LJ_ARCH_NAME "ppc64le"
+#else
+#define LJ_ARCH_NAME "ppc64"
+#endif
#else
#define LJ_ARCH_BITS 32
+#define LJ_ARCH_NAME "ppc"
+
+#if !defined(LJ_ARCH_HASFPU)
+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
+#define LJ_ARCH_HASFPU 0
+#else
+#define LJ_ARCH_HASFPU 1
#endif
-#define LJ_ARCH_ENDIAN LUAJIT_BE
+#endif
+
+#if !defined(LJ_ABI_SOFTFP)
+#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
+#define LJ_ABI_SOFTFP 1
+#else
+#define LJ_ABI_SOFTFP 0
+#endif
+#endif
+#endif
+
+#if LJ_ABI_SOFTFP
+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
+#else
+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
+#endif
+
#define LJ_TARGET_PPC 1
#define LJ_TARGET_EHRETREG 3
+#define LJ_TARGET_EHRAREG 65
#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
#define LJ_TARGET_MASKSHIFT 0
#define LJ_TARGET_MASKROT 1
#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
-#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE
+
+#if LJ_TARGET_CONSOLE
+#define LJ_ARCH_PPC32ON64 1
+#define LJ_ARCH_NOFFI 1
+#elif LJ_ARCH_BITS == 64
+#error "No support for PPC64"
+#endif
#if _ARCH_PWR7
#define LJ_ARCH_VERSION 70
@@ -230,10 +340,6 @@
#else
#define LJ_ARCH_VERSION 0
#endif
-#if __PPC64__ || __powerpc64__ || LJ_TARGET_CONSOLE
-#define LJ_ARCH_PPC64 1
-#define LJ_ARCH_NOFFI 1
-#endif
#if _ARCH_PPCSQ
#define LJ_ARCH_SQRT 1
#endif
@@ -247,44 +353,80 @@
#define LJ_ARCH_XENON 1
#endif
-#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE
-
-#define LJ_ARCH_NAME "ppcspe"
-#define LJ_ARCH_BITS 32
-#define LJ_ARCH_ENDIAN LUAJIT_BE
-#ifndef LJ_ABI_SOFTFP
-#define LJ_ABI_SOFTFP 1
-#endif
-#define LJ_ABI_EABI 1
-#define LJ_TARGET_PPCSPE 1
-#define LJ_TARGET_EHRETREG 3
-#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */
-#define LJ_TARGET_MASKSHIFT 0
-#define LJ_TARGET_MASKROT 1
-#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */
-#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE
-#define LJ_ARCH_NOFFI 1 /* NYI: comparisons, calls. */
-#define LJ_ARCH_NOJIT 1
-
-#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS
+#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL)
+#if __mips_isa_rev >= 6
+#define LJ_TARGET_MIPSR6 1
+#define LJ_TARGET_UNALIGNED 1
+#endif
+#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
+#if LJ_TARGET_MIPSR6
+#define LJ_ARCH_NAME "mips32r6el"
+#else
#define LJ_ARCH_NAME "mipsel"
+#endif
+#else
+#if LJ_TARGET_MIPSR6
+#define LJ_ARCH_NAME "mips64r6el"
+#else
+#define LJ_ARCH_NAME "mips64el"
+#endif
+#endif
#define LJ_ARCH_ENDIAN LUAJIT_LE
#else
+#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
+#if LJ_TARGET_MIPSR6
+#define LJ_ARCH_NAME "mips32r6"
+#else
#define LJ_ARCH_NAME "mips"
+#endif
+#else
+#if LJ_TARGET_MIPSR6
+#define LJ_ARCH_NAME "mips64r6"
+#else
+#define LJ_ARCH_NAME "mips64"
+#endif
+#endif
#define LJ_ARCH_ENDIAN LUAJIT_BE
#endif
+
+#if !defined(LJ_ARCH_HASFPU)
+#ifdef __mips_soft_float
+#define LJ_ARCH_HASFPU 0
+#else
+#define LJ_ARCH_HASFPU 1
+#endif
+#endif
+
+#if !defined(LJ_ABI_SOFTFP)
+#ifdef __mips_soft_float
+#define LJ_ABI_SOFTFP 1
+#else
+#define LJ_ABI_SOFTFP 0
+#endif
+#endif
+
+#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32
#define LJ_ARCH_BITS 32
+#define LJ_TARGET_MIPS32 1
+#else
+#define LJ_ARCH_BITS 64
+#define LJ_TARGET_MIPS64 1
+#define LJ_TARGET_GC64 1
+#endif
#define LJ_TARGET_MIPS 1
#define LJ_TARGET_EHRETREG 4
+#define LJ_TARGET_EHRAREG 31
#define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */
#define LJ_TARGET_MASKSHIFT 1
#define LJ_TARGET_MASKROT 1
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
-#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE
+#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
-#if _MIPS_ARCH_MIPS32R2
+#if LJ_TARGET_MIPSR6
+#define LJ_ARCH_VERSION 60
+#elif _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2
#define LJ_ARCH_VERSION 20
#else
#define LJ_ARCH_VERSION 10
@@ -294,9 +436,7 @@
#error "No target architecture defined"
#endif
-#ifndef LJ_PAGESIZE
-#define LJ_PAGESIZE 4096
-#endif
+/* -- Checks for requirements --------------------------------------------- */
/* Check for minimum required compiler versions. */
#if defined(__GNUC__)
@@ -312,6 +452,16 @@
#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2)
#error "Need at least GCC 4.2 or newer"
#endif
+#elif LJ_TARGET_ARM64
+#if __clang__
+#if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)) && !defined(__NX_TOOLCHAIN_MAJOR__)
+#error "Need at least Clang 3.5 or newer"
+#endif
+#else
+#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8)
+#error "Need at least GCC 4.8 or newer"
+#endif
+#endif
#elif !LJ_TARGET_PS3
#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3)
#error "Need at least GCC 4.3 or newer"
@@ -335,26 +485,35 @@
#if !(__ARM_EABI__ || LJ_TARGET_IOS)
#error "Only ARM EABI or iOS 3.0+ ABI is supported"
#endif
-#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE
-#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE)
-#error "No support for PowerPC CPUs without double-precision FPU"
+#elif LJ_TARGET_ARM64
+#if defined(_ILP32)
+#error "No support for ILP32 model on ARM64"
#endif
+#elif LJ_TARGET_PPC
#if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN))
-#error "No support for little-endian PowerPC"
+#error "No support for little-endian PPC32"
+#endif
+#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT)
+#error "No support for PPC/e500 anymore (use LuaJIT 2.0)"
#endif
-#if defined(_LP64)
-#error "No support for PowerPC 64 bit mode"
+#elif LJ_TARGET_MIPS32
+#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32))
+#error "Only o32 ABI supported for MIPS32"
#endif
-#elif LJ_TARGET_MIPS
-#if defined(__mips_soft_float)
-#error "No support for MIPS CPUs without FPU"
+#if LJ_TARGET_MIPSR6
+/* Not that useful, since most available r6 CPUs are 64 bit. */
+#error "No support for MIPS32R6"
#endif
-#if defined(_LP64)
-#error "No support for MIPS64"
+#elif LJ_TARGET_MIPS64
+#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64))
+/* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */
+#error "Only n64 ABI supported for MIPS64"
#endif
#endif
#endif
+/* -- Derived defines ----------------------------------------------------- */
+
/* Enable or disable the dual-number mode for the VM. */
#if (LJ_ARCH_NUMMODE == LJ_NUMMODE_SINGLE && LUAJIT_NUMMODE == 2) || \
(LJ_ARCH_NUMMODE == LJ_NUMMODE_DUAL && LUAJIT_NUMMODE == 1)
@@ -376,6 +535,20 @@
#endif
#endif
+/* 64 bit GC references. */
+#if LJ_TARGET_GC64
+#define LJ_GC64 1
+#else
+#define LJ_GC64 0
+#endif
+
+/* 2-slot frame info. */
+#if LJ_GC64
+#define LJ_FR2 1
+#else
+#define LJ_FR2 0
+#endif
+
/* Disable or enable the JIT compiler. */
#if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT)
#define LJ_HASJIT 0
@@ -390,6 +563,28 @@
#define LJ_HASFFI 1
#endif
+/* Disable or enable the string buffer extension. */
+#if defined(LUAJIT_DISABLE_BUFFER)
+#define LJ_HASBUFFER 0
+#else
+#define LJ_HASBUFFER 1
+#endif
+
+#if defined(LUAJIT_DISABLE_PROFILE)
+#define LJ_HASPROFILE 0
+#elif LJ_TARGET_POSIX
+#define LJ_HASPROFILE 1
+#define LJ_PROFILE_SIGPROF 1
+#elif LJ_TARGET_PS3
+#define LJ_HASPROFILE 1
+#define LJ_PROFILE_PTHREAD 1
+#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOX360
+#define LJ_HASPROFILE 1
+#define LJ_PROFILE_WTHREAD 1
+#else
+#define LJ_HASPROFILE 0
+#endif
+
#ifndef LJ_ARCH_HASFPU
#define LJ_ARCH_HASFPU 1
#endif
@@ -397,6 +592,7 @@
#define LJ_ABI_SOFTFP 0
#endif
#define LJ_SOFTFP (!LJ_ARCH_HASFPU)
+#define LJ_SOFTFP32 (LJ_SOFTFP && LJ_32)
#if LJ_ARCH_ENDIAN == LUAJIT_BE
#define LJ_LE 0
@@ -422,26 +618,52 @@
#define LJ_TARGET_UNALIGNED 0
#endif
-/* Various workarounds for embedded operating systems. */
-#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360
-#define LUAJIT_NO_LOG2
+#ifndef LJ_PAGESIZE
+#define LJ_PAGESIZE 4096
#endif
-#if defined(__symbian__)
-#define LUAJIT_NO_EXP2
+
+/* Various workarounds for embedded operating systems or weak C runtimes. */
+#if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS
+#define LUAJIT_NO_LOG2
#endif
#if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0)
#define LJ_NO_SYSTEM 1
#endif
-#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__
-/* NYI: no support for compact unwind specification, yet. */
-#define LUAJIT_NO_UNWIND 1
+#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN
+#define LJ_ABI_WIN 1
+#else
+#define LJ_ABI_WIN 0
+#endif
+
+#if LJ_TARGET_WINDOWS
+#if LJ_TARGET_UWP
+#define LJ_WIN_VALLOC VirtualAllocFromApp
+#define LJ_WIN_VPROTECT VirtualProtectFromApp
+extern void *LJ_WIN_LOADLIBA(const char *path);
+#else
+#define LJ_WIN_VALLOC VirtualAlloc
+#define LJ_WIN_VPROTECT VirtualProtect
+#define LJ_WIN_LOADLIBA(path) LoadLibraryExA((path), NULL, 0)
+#endif
#endif
-#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4
+#if defined(LUAJIT_NO_UNWIND) || __GNU_COMPACT_EH__ || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5
#define LJ_NO_UNWIND 1
#endif
+#if !LJ_NO_UNWIND && !defined(LUAJIT_UNWIND_INTERNAL) && (LJ_ABI_WIN || (defined(LUAJIT_UNWIND_EXTERNAL) && (defined(__GNUC__) || defined(__clang__))))
+#define LJ_UNWIND_EXT 1
+#else
+#define LJ_UNWIND_EXT 0
+#endif
+
+#if LJ_UNWIND_EXT && LJ_HASJIT && !LJ_TARGET_ARM && !(LJ_ABI_WIN && LJ_TARGET_X86)
+#define LJ_UNWIND_JIT 1
+#else
+#define LJ_UNWIND_JIT 0
+#endif
+
/* Compatibility with Lua 5.1 vs. 5.2. */
#ifdef LUAJIT_ENABLE_LUA52COMPAT
#define LJ_52 1
@@ -449,4 +671,46 @@
#define LJ_52 0
#endif
+/* -- VM security --------------------------------------------------------- */
+
+/* Don't make any changes here. Instead build with:
+** make "XCFLAGS=-DLUAJIT_SECURITY_flag=value"
+**
+** Important note to distro maintainers: DO NOT change the defaults for a
+** regular distro build -- neither upwards, nor downwards!
+** These build-time configurable security flags are intended for embedders
+** who may have specific needs wrt. security vs. performance.
+*/
+
+/* Security defaults. */
+#ifndef LUAJIT_SECURITY_PRNG
+/* PRNG init: 0 = fixed/insecure, 1 = secure from OS. */
+#define LUAJIT_SECURITY_PRNG 1
+#endif
+
+#ifndef LUAJIT_SECURITY_STRHASH
+/* String hash: 0 = sparse only, 1 = sparse + dense. */
+#define LUAJIT_SECURITY_STRHASH 1
+#endif
+
+#ifndef LUAJIT_SECURITY_STRID
+/* String IDs: 0 = linear, 1 = reseed < 255, 2 = reseed < 15, 3 = random. */
+#define LUAJIT_SECURITY_STRID 1
+#endif
+
+#ifndef LUAJIT_SECURITY_MCODE
+/* Machine code page protection: 0 = insecure RWX, 1 = secure RW^X. */
+#define LUAJIT_SECURITY_MCODE 1
+#endif
+
+#define LJ_SECURITY_MODE \
+ ( 0u \
+ | ((LUAJIT_SECURITY_PRNG & 3) << 0) \
+ | ((LUAJIT_SECURITY_STRHASH & 3) << 2) \
+ | ((LUAJIT_SECURITY_STRID & 3) << 4) \
+ | ((LUAJIT_SECURITY_MCODE & 3) << 6) \
+ )
+#define LJ_SECURITY_MODESTRING \
+ "\004prng\007strhash\005strid\005mcode"
+
#endif
diff --git a/src/lj_asm.c b/src/lj_asm.c
index 9ff9215f..6f5e0c45 100644
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -11,6 +11,7 @@
#if LJ_HASJIT
#include "lj_gc.h"
+#include "lj_buf.h"
#include "lj_str.h"
#include "lj_tab.h"
#include "lj_frame.h"
@@ -71,6 +72,7 @@ typedef struct ASMState {
IRRef snaprename; /* Rename highwater mark for snapshot check. */
SnapNo snapno; /* Current snapshot number. */
SnapNo loopsnapno; /* Loop snapshot number. */
+ int snapalloc; /* Current snapshot needs allocation. */
BloomFilter snapfilt1, snapfilt2; /* Filled with snapshot refs. */
IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */
@@ -85,18 +87,25 @@ typedef struct ASMState {
MCode *mcbot; /* Bottom of reserved MCode. */
MCode *mctop; /* Top of generated MCode. */
+ MCode *mctoporig; /* Original top of generated MCode. */
MCode *mcloop; /* Pointer to loop MCode (or NULL). */
MCode *invmcp; /* Points to invertible loop branch (or NULL). */
MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */
MCode *realign; /* Realign loop if not NULL. */
#ifdef RID_NUM_KREF
- int32_t krefk[RID_NUM_KREF];
+ intptr_t krefk[RID_NUM_KREF];
#endif
IRRef1 phireg[RID_MAX]; /* PHI register references. */
uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */
} ASMState;
+#ifdef LUA_USE_ASSERT
+#define lj_assertA(c, ...) lj_assertG_(J2G(as->J), (c), __VA_ARGS__)
+#else
+#define lj_assertA(c, ...) ((void)as)
+#endif
+
#define IR(ref) (&as->ir[(ref)])
#define ASMREF_TMP1 REF_TRUE /* Temp. register. */
@@ -128,9 +137,8 @@ static LJ_AINLINE void checkmclim(ASMState *as)
#ifdef LUA_USE_ASSERT
if (as->mcp + MCLIM_REDZONE < as->mcp_prev) {
IRIns *ir = IR(as->curins+1);
- fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d %02d %04d %04d\n", as->mcp,
- as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS);
- lua_assert(0);
+ lj_assertA(0, "red zone overflow: %p IR %04d %02d %04d %04d\n", as->mcp,
+ as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS);
}
#endif
if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as);
@@ -144,7 +152,7 @@ static LJ_AINLINE void checkmclim(ASMState *as)
#define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref)))
#define ra_krefk(as, ref) (as->krefk[(ref)])
-static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k)
+static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k)
{
IRRef ref = (IRRef)(r - RID_MIN_KREF);
as->krefk[ref] = k;
@@ -171,6 +179,8 @@ IRFLDEF(FLOFS)
#include "lj_emit_x86.h"
#elif LJ_TARGET_ARM
#include "lj_emit_arm.h"
+#elif LJ_TARGET_ARM64
+#include "lj_emit_arm64.h"
#elif LJ_TARGET_PPC
#include "lj_emit_ppc.h"
#elif LJ_TARGET_MIPS
@@ -179,6 +189,12 @@ IRFLDEF(FLOFS)
#error "Missing instruction emitter for target CPU"
#endif
+/* Generic load/store of register from/to stack slot. */
+#define emit_spload(as, ir, r, ofs) \
+ emit_loadofs(as, ir, (r), RID_SP, (ofs))
+#define emit_spstore(as, ir, r, ofs) \
+ emit_storeofs(as, ir, (r), RID_SP, (ofs))
+
/* -- Register allocator debugging ---------------------------------------- */
/* #define LUAJIT_DEBUG_RA */
@@ -236,7 +252,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...)
*p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q;
} else {
*p++ = '?';
- lua_assert(0);
+ lj_assertA(0, "bad register %d for debug format \"%s\"", r, fmt);
}
} else if (e[1] == 'f' || e[1] == 'i') {
IRRef ref;
@@ -254,7 +270,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...)
} else if (e[1] == 'x') {
p += sprintf(p, "%08x", va_arg(argp, int32_t));
} else {
- lua_assert(0);
+ lj_assertA(0, "bad debug format code");
}
fmt = e+2;
}
@@ -313,37 +329,51 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
Reg r;
if (ra_iskref(ref)) {
r = ra_krefreg(ref);
- lua_assert(!rset_test(as->freeset, r));
+ lj_assertA(!rset_test(as->freeset, r), "rematk of free reg %d", r);
ra_free(as, r);
ra_modified(as, r);
+#if LJ_64
+ emit_loadu64(as, r, ra_krefk(as, ref));
+#else
emit_loadi(as, r, ra_krefk(as, ref));
+#endif
return r;
}
ir = IR(ref);
r = ir->r;
- lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
+ lj_assertA(ra_hasreg(r), "rematk of K%03d has no reg", REF_BIAS - ref);
+ lj_assertA(!ra_hasspill(ir->s),
+ "rematk of K%03d has spill slot [%x]", REF_BIAS - ref, ir->s);
ra_free(as, r);
ra_modified(as, r);
ir->r = RID_INIT; /* Do not keep any hint. */
RA_DBGX((as, "remat $i $r", ir, r));
-#if !LJ_SOFTFP
+#if !LJ_SOFTFP32
if (ir->o == IR_KNUM) {
- emit_loadn(as, r, ir_knum(ir));
+ emit_loadk64(as, r, ir);
} else
#endif
if (emit_canremat(REF_BASE) && ir->o == IR_BASE) {
ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
emit_getgl(as, r, jit_base);
} else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) {
- lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */
- emit_getgl(as, r, jit_L);
+ /* REF_NIL stores ASMREF_L register. */
+ lj_assertA(irt_isnil(ir->t), "rematk of bad ASMREF_L");
+ emit_getgl(as, r, cur_L);
#if LJ_64
} else if (ir->o == IR_KINT64) {
emit_loadu64(as, r, ir_kint64(ir)->u64);
+#if LJ_GC64
+ } else if (ir->o == IR_KGC) {
+ emit_loadu64(as, r, (uintptr_t)ir_kgc(ir));
+ } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
+ emit_loadu64(as, r, (uintptr_t)ir_kptr(ir));
+#endif
#endif
} else {
- lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
- ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
+ lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
+ ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
+ "rematk of bad IR op %d", ir->o);
emit_loadi(as, r, ir->i);
}
return r;
@@ -353,7 +383,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref)
static int32_t ra_spill(ASMState *as, IRIns *ir)
{
int32_t slot = ir->s;
- lua_assert(ir >= as->ir + REF_TRUE);
+ lj_assertA(ir >= as->ir + REF_TRUE,
+ "spill of K%03d", REF_BIAS - (int)(ir - as->ir));
if (!ra_hasspill(slot)) {
if (irt_is64(ir->t)) {
slot = as->evenspill;
@@ -378,7 +409,9 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref)
{
IRIns *ir = IR(ref);
Reg r = ir->r;
- lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
+ lj_assertA(ra_hasreg(r), "release of TMP%d has no reg", ref-ASMREF_TMP1+1);
+ lj_assertA(!ra_hasspill(ir->s),
+ "release of TMP%d has spill slot [%x]", ref-ASMREF_TMP1+1, ir->s);
ra_free(as, r);
ra_modified(as, r);
ir->r = RID_INIT;
@@ -394,7 +427,7 @@ static Reg ra_restore(ASMState *as, IRRef ref)
IRIns *ir = IR(ref);
int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */
Reg r = ir->r;
- lua_assert(ra_hasreg(r));
+ lj_assertA(ra_hasreg(r), "restore of IR %04d has no reg", ref - REF_BIAS);
ra_sethint(ir->r, r); /* Keep hint. */
ra_free(as, r);
if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */
@@ -423,14 +456,15 @@ static Reg ra_evict(ASMState *as, RegSet allow)
{
IRRef ref;
RegCost cost = ~(RegCost)0;
- lua_assert(allow != RSET_EMPTY);
+ lj_assertA(allow != RSET_EMPTY, "evict from empty set");
if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) {
GPRDEF(MINCOST)
} else {
FPRDEF(MINCOST)
}
ref = regcost_ref(cost);
- lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins));
+ lj_assertA(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins),
+ "evict of out-of-range IR %04d", ref - REF_BIAS);
/* Preferably pick any weak ref instead of a non-weak, non-const ref. */
if (!irref_isk(ref) && (as->weakset & allow)) {
IRIns *ir = IR(ref);
@@ -512,7 +546,7 @@ static void ra_evictk(ASMState *as)
#ifdef RID_NUM_KREF
/* Allocate a register for a constant. */
-static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
+static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow)
{
/* First try to find a register which already holds the same constant. */
RegSet pick, work = ~as->freeset & RSET_GPR;
@@ -521,9 +555,31 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
IRRef ref;
r = rset_pickbot(work);
ref = regcost_ref(as->cost[r]);
+#if LJ_64
+ if (ref < ASMREF_L) {
+ if (ra_iskref(ref)) {
+ if (k == ra_krefk(as, ref))
+ return r;
+ } else {
+ IRIns *ir = IR(ref);
+ if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) ||
+#if LJ_GC64
+ (ir->o == IR_KINT && k == ir->i) ||
+ (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) ||
+ ((ir->o == IR_KPTR || ir->o == IR_KKPTR) &&
+ k == (intptr_t)ir_kptr(ir))
+#else
+ (ir->o != IR_KINT64 && k == ir->i)
+#endif
+ )
+ return r;
+ }
+ }
+#else
if (ref < ASMREF_L &&
k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i))
return r;
+#endif
rset_clear(work, r);
}
pick = as->freeset & allow;
@@ -543,7 +599,7 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow)
}
/* Allocate a specific register for a constant. */
-static void ra_allockreg(ASMState *as, int32_t k, Reg r)
+static void ra_allockreg(ASMState *as, intptr_t k, Reg r)
{
Reg kr = ra_allock(as, k, RID2RSET(r));
if (kr != r) {
@@ -566,7 +622,8 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow)
IRIns *ir = IR(ref);
RegSet pick = as->freeset & allow;
Reg r;
- lua_assert(ra_noreg(ir->r));
+ lj_assertA(ra_noreg(ir->r),
+ "IR %04d already has reg %d", ref - REF_BIAS, ir->r);
if (pick) {
/* First check register hint from propagation or PHI. */
if (ra_hashint(ir->r)) {
@@ -613,15 +670,27 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow)
return r;
}
+/* Add a register rename to the IR. */
+static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno)
+{
+ IRRef ren;
+ lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno);
+ ren = tref_ref(lj_ir_emit(as->J));
+ as->J->cur.ir[ren].r = (uint8_t)down;
+ as->J->cur.ir[ren].s = SPS_NONE;
+}
+
/* Rename register allocation and emit move. */
static void ra_rename(ASMState *as, Reg down, Reg up)
{
- IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]);
+ IRRef ref = regcost_ref(as->cost[up] = as->cost[down]);
IRIns *ir = IR(ref);
ir->r = (uint8_t)up;
as->cost[down] = 0;
- lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR));
- lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up));
+ lj_assertA((down < RID_MAX_GPR) == (up < RID_MAX_GPR),
+ "rename between GPR/FPR %d and %d", down, up);
+ lj_assertA(!rset_test(as->freeset, down), "rename from free reg %d", down);
+ lj_assertA(rset_test(as->freeset, up), "rename to non-free reg %d", up);
ra_free(as, down); /* 'down' is free ... */
ra_modified(as, down);
rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */
@@ -629,11 +698,14 @@ static void ra_rename(ASMState *as, Reg down, Reg up)
RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up));
emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */
if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */
- lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno);
- ren = tref_ref(lj_ir_emit(as->J));
- as->ir = as->T->ir; /* The IR may have been reallocated. */
- IR(ren)->r = (uint8_t)down;
- IR(ren)->s = SPS_NONE;
+ /*
+ ** The rename is effective at the subsequent (already emitted) exit
+ ** branch. This is for the current snapshot (as->snapno). Except if we
+ ** haven't yet allocated any refs for the snapshot (as->snapalloc == 1),
+ ** then it belongs to the next snapshot.
+ ** See also the discussion at asm_snap_checkrename().
+ */
+ ra_addrename(as, down, ref, as->snapno + as->snapalloc);
}
}
@@ -666,7 +738,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r)
{
Reg dest = ra_dest(as, ir, RID2RSET(r));
if (dest != r) {
- lua_assert(rset_test(as->freeset, r));
+ lj_assertA(rset_test(as->freeset, r), "dest reg %d is not free", r);
ra_modified(as, r);
emit_movrr(as, ir, dest, r);
}
@@ -683,20 +755,25 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref)
if (ra_noreg(left)) {
if (irref_isk(lref)) {
if (ir->o == IR_KNUM) {
- cTValue *tv = ir_knum(ir);
/* FP remat needs a load except for +0. Still better than eviction. */
- if (tvispzero(tv) || !(as->freeset & RSET_FPR)) {
- emit_loadn(as, dest, tv);
+ if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) {
+ emit_loadk64(as, dest, ir);
return;
}
#if LJ_64
} else if (ir->o == IR_KINT64) {
- emit_loadu64(as, dest, ir_kint64(ir)->u64);
+ emit_loadk64(as, dest, ir);
+ return;
+#if LJ_GC64
+ } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) {
+ emit_loadk64(as, dest, ir);
return;
#endif
- } else {
- lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
- ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL);
+#endif
+ } else if (ir->o != IR_KPRI) {
+ lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC ||
+ ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL,
+ "K%03d has bad IR op %d", REF_BIAS - lref, ir->o);
emit_loadi(as, dest, ir->i);
return;
}
@@ -741,11 +818,11 @@ static void ra_leftov(ASMState *as, Reg dest, IRRef lref)
}
#endif
-#if !LJ_64
/* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */
static void ra_destpair(ASMState *as, IRIns *ir)
{
Reg destlo = ir->r, desthi = (ir+1)->r;
+ IRIns *irx = (LJ_64 && !irt_is64(ir->t)) ? ir+1 : ir;
/* First spill unrelated refs blocking the destination registers. */
if (!rset_test(as->freeset, RID_RETLO) &&
destlo != RID_RETLO && desthi != RID_RETLO)
@@ -769,29 +846,29 @@ static void ra_destpair(ASMState *as, IRIns *ir)
/* Check for conflicts and shuffle the registers as needed. */
if (destlo == RID_RETHI) {
if (desthi == RID_RETLO) {
-#if LJ_TARGET_X86
+#if LJ_TARGET_X86ORX64
*--as->mcp = XI_XCHGa + RID_RETHI;
+ if (LJ_64 && irt_is64(irx->t)) *--as->mcp = 0x48;
#else
- emit_movrr(as, ir, RID_RETHI, RID_TMP);
- emit_movrr(as, ir, RID_RETLO, RID_RETHI);
- emit_movrr(as, ir, RID_TMP, RID_RETLO);
+ emit_movrr(as, irx, RID_RETHI, RID_TMP);
+ emit_movrr(as, irx, RID_RETLO, RID_RETHI);
+ emit_movrr(as, irx, RID_TMP, RID_RETLO);
#endif
} else {
- emit_movrr(as, ir, RID_RETHI, RID_RETLO);
- if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
+ emit_movrr(as, irx, RID_RETHI, RID_RETLO);
+ if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI);
}
} else if (desthi == RID_RETLO) {
- emit_movrr(as, ir, RID_RETLO, RID_RETHI);
- if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
+ emit_movrr(as, irx, RID_RETLO, RID_RETHI);
+ if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO);
} else {
- if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI);
- if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO);
+ if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI);
+ if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO);
}
/* Restore spill slots (if any). */
if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI);
if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO);
}
-#endif
/* -- Snapshot handling --------- ----------------------------------------- */
@@ -841,11 +918,14 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref)
#endif
{ /* Allocate stored values for TNEW, TDUP and CNEW. */
IRIns *irs;
- lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW);
+ lj_assertA(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW,
+ "sink of IR %04d has bad op %d", ref - REF_BIAS, ir->o);
for (irs = IR(as->snapref-1); irs > ir; irs--)
if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) {
- lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
- irs->o == IR_FSTORE || irs->o == IR_XSTORE);
+ lj_assertA(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
+ irs->o == IR_FSTORE || irs->o == IR_XSTORE,
+ "sunk store IR %04d has bad op %d",
+ (int)(irs - as->ir) - REF_BIAS, irs->o);
asm_snap_alloc1(as, irs->op2);
if (LJ_32 && (irs+1)->o == IR_HIOP)
asm_snap_alloc1(as, (irs+1)->op2);
@@ -881,9 +961,9 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref)
}
/* Allocate refs escaping to a snapshot. */
-static void asm_snap_alloc(ASMState *as)
+static void asm_snap_alloc(ASMState *as, int snapno)
{
- SnapShot *snap = &as->T->snap[as->snapno];
+ SnapShot *snap = &as->T->snap[snapno];
SnapEntry *map = &as->T->snapmap[snap->mapofs];
MSize n, nent = snap->nent;
as->snapfilt1 = as->snapfilt2 = 0;
@@ -893,7 +973,9 @@ static void asm_snap_alloc(ASMState *as)
if (!irref_isk(ref)) {
asm_snap_alloc1(as, ref);
if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) {
- lua_assert(irt_type(IR(ref+1)->t) == IRT_SOFTFP);
+ lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP,
+ "snap %d[%d] points to bad SOFTFP IR %04d",
+ snapno, n, ref - REF_BIAS);
asm_snap_alloc1(as, ref+1);
}
}
@@ -919,67 +1001,55 @@ static int asm_snap_checkrename(ASMState *as, IRRef ren)
return 0; /* Not found. */
}
-/* Prepare snapshot for next guard instruction. */
+/* Prepare snapshot for next guard or throwing instruction. */
static void asm_snap_prep(ASMState *as)
{
- if (as->curins < as->snapref) {
- do {
- if (as->snapno == 0) return; /* Called by sunk stores before snap #0. */
- as->snapno--;
- as->snapref = as->T->snap[as->snapno].ref;
- } while (as->curins < as->snapref);
- asm_snap_alloc(as);
+ if (as->snapalloc) {
+ /* Alloc on first invocation for each snapshot. */
+ as->snapalloc = 0;
+ asm_snap_alloc(as, as->snapno);
as->snaprename = as->T->nins;
} else {
- /* Process any renames above the highwater mark. */
+ /* Check any renames above the highwater mark. */
for (; as->snaprename < as->T->nins; as->snaprename++) {
- IRIns *ir = IR(as->snaprename);
+ IRIns *ir = &as->T->ir[as->snaprename];
if (asm_snap_checkrename(as, ir->op1))
ir->op2 = REF_BIAS-1; /* Kill rename. */
}
}
}
-/* -- Miscellaneous helpers ----------------------------------------------- */
-
-/* Collect arguments from CALL* and CARG instructions. */
-static void asm_collectargs(ASMState *as, IRIns *ir,
- const CCallInfo *ci, IRRef *args)
+/* Move to previous snapshot when we cross the current snapshot ref. */
+static void asm_snap_prev(ASMState *as)
{
- uint32_t n = CCI_NARGS(ci);
- lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */
- if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
- while (n-- > 1) {
- ir = IR(ir->op1);
- lua_assert(ir->o == IR_CARG);
- args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
+ if (as->curins < as->snapref) {
+ uintptr_t ofs = (uintptr_t)(as->mctoporig - as->mcp);
+ if (ofs >= 0x10000) lj_trace_err(as->J, LJ_TRERR_MCODEOV);
+ do {
+ if (as->snapno == 0) return;
+ as->snapno--;
+ as->snapref = as->T->snap[as->snapno].ref;
+ as->T->snap[as->snapno].mcofs = (uint16_t)ofs; /* Remember mcode ofs. */
+ } while (as->curins < as->snapref); /* May have no ins inbetween. */
+ as->snapalloc = 1;
}
- args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
- lua_assert(IR(ir->op1)->o != IR_CARG);
}
-/* Reconstruct CCallInfo flags for CALLX*. */
-static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
+/* Fixup snapshot mcode offsetst. */
+static void asm_snap_fixup_mcofs(ASMState *as)
{
- uint32_t nargs = 0;
- if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
- IRIns *ira = IR(ir->op1);
- nargs++;
- while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
+ uint32_t sz = (uint32_t)(as->mctoporig - as->mcp);
+ SnapShot *snap = as->T->snap;
+ SnapNo i;
+ for (i = as->T->nsnap-1; i > 0; i--) {
+ /* Compute offset from mcode start and store in correct snapshot. */
+ snap[i].mcofs = (uint16_t)(sz - snap[i-1].mcofs);
}
-#if LJ_HASFFI
- if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
- CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
- CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
- nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
-#if LJ_TARGET_X86
- nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
-#endif
- }
-#endif
- return (nargs | (ir->t.irt << CCI_OTSHIFT));
+ snap[0].mcofs = 0;
}
+/* -- Miscellaneous helpers ----------------------------------------------- */
+
/* Calculate stack adjustment. */
static int32_t asm_stack_adjust(ASMState *as)
{
@@ -989,21 +1059,26 @@ static int32_t asm_stack_adjust(ASMState *as)
}
/* Must match with hash*() in lj_tab.c. */
-static uint32_t ir_khash(IRIns *ir)
+static uint32_t ir_khash(ASMState *as, IRIns *ir)
{
uint32_t lo, hi;
+ UNUSED(as);
if (irt_isstr(ir->t)) {
- return ir_kstr(ir)->hash;
+ return ir_kstr(ir)->sid;
} else if (irt_isnum(ir->t)) {
lo = ir_knum(ir)->u32.lo;
hi = ir_knum(ir)->u32.hi << 1;
} else if (irt_ispri(ir->t)) {
- lua_assert(!irt_isnil(ir->t));
+ lj_assertA(!irt_isnil(ir->t), "hash of nil key");
return irt_type(ir->t)-IRT_FALSE;
} else {
- lua_assert(irt_isgcv(ir->t));
+ lj_assertA(irt_isgcv(ir->t), "hash of bad IR type %d", irt_type(ir->t));
lo = u32ptr(ir_kgc(ir));
+#if LJ_GC64
+ hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15);
+#else
hi = lo + HASH_BIAS;
+#endif
}
return hashrot(lo, hi);
}
@@ -1017,6 +1092,7 @@ static void asm_snew(ASMState *as, IRIns *ir)
{
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
IRRef args[3];
+ asm_snap_prep(as);
args[0] = ASMREF_L; /* lua_State *L */
args[1] = ir->op1; /* const char *str */
args[2] = ir->op2; /* size_t len */
@@ -1029,6 +1105,7 @@ static void asm_tnew(ASMState *as, IRIns *ir)
{
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
IRRef args[2];
+ asm_snap_prep(as);
args[0] = ASMREF_L; /* lua_State *L */
args[1] = ASMREF_TMP1; /* uint32_t ahsize */
as->gcsteps++;
@@ -1041,6 +1118,7 @@ static void asm_tdup(ASMState *as, IRIns *ir)
{
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
IRRef args[2];
+ asm_snap_prep(as);
args[0] = ASMREF_L; /* lua_State *L */
args[1] = ir->op1; /* const GCtab *kt */
as->gcsteps++;
@@ -1064,6 +1142,260 @@ static void asm_gcstep(ASMState *as, IRIns *ir)
as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */
}
+/* -- Buffer operations --------------------------------------------------- */
+
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode);
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb);
+#endif
+
+static void asm_bufhdr(ASMState *as, IRIns *ir)
+{
+ Reg sb = ra_dest(as, ir, RSET_GPR);
+ switch (ir->op2) {
+ case IRBUFHDR_RESET: {
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+ IRIns irbp;
+ irbp.ot = IRT(0, IRT_PTR); /* Buffer data pointer type. */
+ emit_storeofs(as, &irbp, tmp, sb, offsetof(SBuf, w));
+ emit_loadofs(as, &irbp, tmp, sb, offsetof(SBuf, b));
+ break;
+ }
+ case IRBUFHDR_APPEND: {
+ /* Rematerialize const buffer pointer instead of likely spill. */
+ IRIns *irp = IR(ir->op1);
+ if (!(ra_hasreg(irp->r) || irp == ir-1 ||
+ (irp == ir-2 && !ra_used(ir-1)))) {
+ while (!(irp->o == IR_BUFHDR && irp->op2 == IRBUFHDR_RESET))
+ irp = IR(irp->op1);
+ if (irref_isk(irp->op1)) {
+ ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR));
+ ir = irp;
+ }
+ }
+ break;
+ }
+#if LJ_HASBUFFER
+ case IRBUFHDR_WRITE:
+ asm_bufhdr_write(as, sb);
+ break;
+#endif
+ default: lj_assertA(0, "bad BUFHDR op2 %d", ir->op2); break;
+ }
+#if LJ_TARGET_X86ORX64
+ ra_left(as, sb, ir->op1);
+#else
+ ra_leftov(as, sb, ir->op1);
+#endif
+}
+
+static void asm_bufput(ASMState *as, IRIns *ir)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr];
+ IRRef args[3];
+ IRIns *irs;
+ int kchar = -129;
+ args[0] = ir->op1; /* SBuf * */
+ args[1] = ir->op2; /* GCstr * */
+ irs = IR(ir->op2);
+ lj_assertA(irt_isstr(irs->t),
+ "BUFPUT of non-string IR %04d", ir->op2 - REF_BIAS);
+ if (irs->o == IR_KGC) {
+ GCstr *s = ir_kstr(irs);
+ if (s->len == 1) { /* Optimize put of single-char string constant. */
+ kchar = (int8_t)strdata(s)[0]; /* Signed! */
+ args[1] = ASMREF_TMP1; /* int, truncated to char */
+ ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
+ }
+ } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) {
+ if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */
+ if (irs->op2 == IRTOSTR_NUM) {
+ args[1] = ASMREF_TMP1; /* TValue * */
+ ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum];
+ } else {
+ lj_assertA(irt_isinteger(IR(irs->op1)->t),
+ "TOSTR of non-numeric IR %04d", irs->op1);
+ args[1] = irs->op1; /* int */
+ if (irs->op2 == IRTOSTR_INT)
+ ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint];
+ else
+ ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar];
+ }
+ } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */
+ args[1] = irs->op1; /* const void * */
+ args[2] = irs->op2; /* MSize */
+ ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem];
+ }
+ }
+ asm_setupresult(as, ir, ci); /* SBuf * */
+ asm_gencall(as, ci, args);
+ if (args[1] == ASMREF_TMP1) {
+ Reg tmp = ra_releasetmp(as, ASMREF_TMP1);
+ if (kchar == -129)
+ asm_tvptr(as, tmp, irs->op1, IRTMPREF_IN1);
+ else
+ ra_allockreg(as, kchar, tmp);
+ }
+}
+
+static void asm_bufstr(ASMState *as, IRIns *ir)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr];
+ IRRef args[1];
+ args[0] = ir->op1; /* SBuf *sb */
+ as->gcsteps++;
+ asm_setupresult(as, ir, ci); /* GCstr * */
+ asm_gencall(as, ci, args);
+}
+
+/* -- Type conversions ---------------------------------------------------- */
+
+static void asm_tostr(ASMState *as, IRIns *ir)
+{
+ const CCallInfo *ci;
+ IRRef args[2];
+ asm_snap_prep(as);
+ args[0] = ASMREF_L;
+ as->gcsteps++;
+ if (ir->op2 == IRTOSTR_NUM) {
+ args[1] = ASMREF_TMP1; /* cTValue * */
+ ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num];
+ } else {
+ args[1] = ir->op1; /* int32_t k */
+ if (ir->op2 == IRTOSTR_INT)
+ ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int];
+ else
+ ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char];
+ }
+ asm_setupresult(as, ir, ci); /* GCstr * */
+ asm_gencall(as, ci, args);
+ if (ir->op2 == IRTOSTR_NUM)
+ asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1, IRTMPREF_IN1);
+}
+
+#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86
+static void asm_conv64(ASMState *as, IRIns *ir)
+{
+ IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
+ IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
+ IRCallID id;
+ IRRef args[2];
+ lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP,
+ "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS);
+ args[LJ_BE] = (ir-1)->op1;
+ args[LJ_LE] = ir->op1;
+ if (st == IRT_NUM || st == IRT_FLOAT) {
+ id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
+ ir--;
+ } else {
+ id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
+ }
+ {
+#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
+ CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
+ cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
+#else
+ const CCallInfo *ci = &lj_ir_callinfo[id];
+#endif
+ asm_setupresult(as, ir, ci);
+ asm_gencall(as, ci, args);
+ }
+}
+#endif
+
+/* -- Memory references --------------------------------------------------- */
+
+static void asm_newref(ASMState *as, IRIns *ir)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
+ IRRef args[3];
+ if (ir->r == RID_SINK)
+ return;
+ asm_snap_prep(as);
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ir->op1; /* GCtab *t */
+ args[2] = ASMREF_TMP1; /* cTValue *key */
+ asm_setupresult(as, ir, ci); /* TValue * */
+ asm_gencall(as, ci, args);
+ asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2, IRTMPREF_IN1);
+}
+
+static void asm_tmpref(ASMState *as, IRIns *ir)
+{
+ Reg r = ra_dest(as, ir, RSET_GPR);
+ asm_tvptr(as, r, ir->op1, ir->op2);
+}
+
+static void asm_lref(ASMState *as, IRIns *ir)
+{
+ Reg r = ra_dest(as, ir, RSET_GPR);
+#if LJ_TARGET_X86ORX64
+ ra_left(as, r, ASMREF_L);
+#else
+ ra_leftov(as, r, ASMREF_L);
+#endif
+}
+
+/* -- Calls --------------------------------------------------------------- */
+
+/* Collect arguments from CALL* and CARG instructions. */
+static void asm_collectargs(ASMState *as, IRIns *ir,
+ const CCallInfo *ci, IRRef *args)
+{
+ uint32_t n = CCI_XNARGS(ci);
+ /* Account for split args. */
+ lj_assertA(n <= CCI_NARGS_MAX*2, "too many args %d to collect", n);
+ if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
+ while (n-- > 1) {
+ ir = IR(ir->op1);
+ lj_assertA(ir->o == IR_CARG, "malformed CALL arg tree");
+ args[n] = ir->op2 == REF_NIL ? 0 : ir->op2;
+ }
+ args[0] = ir->op1 == REF_NIL ? 0 : ir->op1;
+ lj_assertA(IR(ir->op1)->o != IR_CARG, "malformed CALL arg tree");
+}
+
+/* Reconstruct CCallInfo flags for CALLX*. */
+static uint32_t asm_callx_flags(ASMState *as, IRIns *ir)
+{
+ uint32_t nargs = 0;
+ if (ir->op1 != REF_NIL) { /* Count number of arguments first. */
+ IRIns *ira = IR(ir->op1);
+ nargs++;
+ while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); }
+ }
+#if LJ_HASFFI
+ if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */
+ CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i;
+ CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id);
+ nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0);
+#if LJ_TARGET_X86
+ nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT);
+#endif
+ }
+#endif
+ return (nargs | (ir->t.irt << CCI_OTSHIFT));
+}
+
+static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[id];
+ IRRef args[2];
+ args[0] = ir->op1;
+ args[1] = ir->op2;
+ asm_setupresult(as, ir, ci);
+ asm_gencall(as, ci, args);
+}
+
+static void asm_call(ASMState *as, IRIns *ir)
+{
+ IRRef args[CCI_NARGS_MAX];
+ const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
+ asm_collectargs(as, ir, ci, args);
+ asm_setupresult(as, ir, ci);
+ asm_gencall(as, ci, args);
+}
+
/* -- PHI and loop handling ----------------------------------------------- */
/* Break a PHI cycle by renaming to a free register (evict if needed). */
@@ -1249,12 +1581,7 @@ static void asm_phi_fixup(ASMState *as)
irt_clearmark(ir->t);
/* Left PHI gained a spill slot before the loop? */
if (ra_hasspill(ir->s)) {
- IRRef ren;
- lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno);
- ren = tref_ref(lj_ir_emit(as->J));
- as->ir = as->T->ir; /* The IR may have been reallocated. */
- IR(ren)->r = (uint8_t)r;
- IR(ren)->s = SPS_NONE;
+ ra_addrename(as, r, lref, as->loopsnapno);
}
}
rset_clear(work, r);
@@ -1329,6 +1656,8 @@ static void asm_loop(ASMState *as)
#include "lj_asm_x86.h"
#elif LJ_TARGET_ARM
#include "lj_asm_arm.h"
+#elif LJ_TARGET_ARM64
+#include "lj_asm_arm64.h"
#elif LJ_TARGET_PPC
#include "lj_asm_ppc.h"
#elif LJ_TARGET_MIPS
@@ -1337,6 +1666,200 @@ static void asm_loop(ASMState *as)
#error "Missing assembler for target CPU"
#endif
+/* -- Common instruction helpers ------------------------------------------ */
+
+#if !LJ_SOFTFP32
+#if !LJ_TARGET_X86ORX64
+#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp)
+#endif
+
+static void asm_pow(ASMState *as, IRIns *ir)
+{
+#if LJ_64 && LJ_HASFFI
+ if (!irt_isnum(ir->t))
+ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
+ IRCALL_lj_carith_powu64);
+ else
+#endif
+ asm_callid(as, ir, IRCALL_pow);
+}
+
+static void asm_div(ASMState *as, IRIns *ir)
+{
+#if LJ_64 && LJ_HASFFI
+ if (!irt_isnum(ir->t))
+ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
+ IRCALL_lj_carith_divu64);
+ else
+#endif
+ asm_fpdiv(as, ir);
+}
+#endif
+
+static void asm_mod(ASMState *as, IRIns *ir)
+{
+#if LJ_64 && LJ_HASFFI
+ if (!irt_isint(ir->t))
+ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
+ IRCALL_lj_carith_modu64);
+ else
+#endif
+ asm_callid(as, ir, IRCALL_lj_vm_modi);
+}
+
+static void asm_fuseequal(ASMState *as, IRIns *ir)
+{
+ /* Fuse HREF + EQ/NE. */
+ if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
+ as->curins--;
+ asm_href(as, ir-1, (IROp)ir->o);
+ } else {
+ asm_equal(as, ir);
+ }
+}
+
+static void asm_alen(ASMState *as, IRIns *ir)
+{
+ asm_callid(as, ir, ir->op2 == REF_NIL ? IRCALL_lj_tab_len :
+ IRCALL_lj_tab_len_hint);
+}
+
+/* -- Instruction dispatch ------------------------------------------------ */
+
+/* Assemble a single instruction. */
+static void asm_ir(ASMState *as, IRIns *ir)
+{
+ switch ((IROp)ir->o) {
+ /* Miscellaneous ops. */
+ case IR_LOOP: asm_loop(as); break;
+ case IR_NOP: case IR_XBAR:
+ lj_assertA(!ra_used(ir),
+ "IR %04d not unused", (int)(ir - as->ir) - REF_BIAS);
+ break;
+ case IR_USE:
+ ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
+ case IR_PHI: asm_phi(as, ir); break;
+ case IR_HIOP: asm_hiop(as, ir); break;
+ case IR_GCSTEP: asm_gcstep(as, ir); break;
+ case IR_PROF: asm_prof(as, ir); break;
+
+ /* Guarded assertions. */
+ case IR_LT: case IR_GE: case IR_LE: case IR_GT:
+ case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
+ case IR_ABC:
+ asm_comp(as, ir);
+ break;
+ case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break;
+
+ case IR_RETF: asm_retf(as, ir); break;
+
+ /* Bit ops. */
+ case IR_BNOT: asm_bnot(as, ir); break;
+ case IR_BSWAP: asm_bswap(as, ir); break;
+ case IR_BAND: asm_band(as, ir); break;
+ case IR_BOR: asm_bor(as, ir); break;
+ case IR_BXOR: asm_bxor(as, ir); break;
+ case IR_BSHL: asm_bshl(as, ir); break;
+ case IR_BSHR: asm_bshr(as, ir); break;
+ case IR_BSAR: asm_bsar(as, ir); break;
+ case IR_BROL: asm_brol(as, ir); break;
+ case IR_BROR: asm_bror(as, ir); break;
+
+ /* Arithmetic ops. */
+ case IR_ADD: asm_add(as, ir); break;
+ case IR_SUB: asm_sub(as, ir); break;
+ case IR_MUL: asm_mul(as, ir); break;
+ case IR_MOD: asm_mod(as, ir); break;
+ case IR_NEG: asm_neg(as, ir); break;
+#if LJ_SOFTFP32
+ case IR_DIV: case IR_POW: case IR_ABS:
+ case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
+ /* Unused for LJ_SOFTFP32. */
+ lj_assertA(0, "IR %04d with unused op %d",
+ (int)(ir - as->ir) - REF_BIAS, ir->o);
+ break;
+#else
+ case IR_DIV: asm_div(as, ir); break;
+ case IR_POW: asm_pow(as, ir); break;
+ case IR_ABS: asm_abs(as, ir); break;
+ case IR_LDEXP: asm_ldexp(as, ir); break;
+ case IR_FPMATH: asm_fpmath(as, ir); break;
+ case IR_TOBIT: asm_tobit(as, ir); break;
+#endif
+ case IR_MIN: asm_min(as, ir); break;
+ case IR_MAX: asm_max(as, ir); break;
+
+ /* Overflow-checking arithmetic ops. */
+ case IR_ADDOV: asm_addov(as, ir); break;
+ case IR_SUBOV: asm_subov(as, ir); break;
+ case IR_MULOV: asm_mulov(as, ir); break;
+
+ /* Memory references. */
+ case IR_AREF: asm_aref(as, ir); break;
+ case IR_HREF: asm_href(as, ir, 0); break;
+ case IR_HREFK: asm_hrefk(as, ir); break;
+ case IR_NEWREF: asm_newref(as, ir); break;
+ case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
+ case IR_FREF: asm_fref(as, ir); break;
+ case IR_TMPREF: asm_tmpref(as, ir); break;
+ case IR_STRREF: asm_strref(as, ir); break;
+ case IR_LREF: asm_lref(as, ir); break;
+
+ /* Loads and stores. */
+ case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+ asm_ahuvload(as, ir);
+ break;
+ case IR_FLOAD: asm_fload(as, ir); break;
+ case IR_XLOAD: asm_xload(as, ir); break;
+ case IR_SLOAD: asm_sload(as, ir); break;
+ case IR_ALEN: asm_alen(as, ir); break;
+
+ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
+ case IR_FSTORE: asm_fstore(as, ir); break;
+ case IR_XSTORE: asm_xstore(as, ir); break;
+
+ /* Allocations. */
+ case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
+ case IR_TNEW: asm_tnew(as, ir); break;
+ case IR_TDUP: asm_tdup(as, ir); break;
+ case IR_CNEW: case IR_CNEWI:
+#if LJ_HASFFI
+ asm_cnew(as, ir);
+#else
+ lj_assertA(0, "IR %04d with unused op %d",
+ (int)(ir - as->ir) - REF_BIAS, ir->o);
+#endif
+ break;
+
+ /* Buffer operations. */
+ case IR_BUFHDR: asm_bufhdr(as, ir); break;
+ case IR_BUFPUT: asm_bufput(as, ir); break;
+ case IR_BUFSTR: asm_bufstr(as, ir); break;
+
+ /* Write barriers. */
+ case IR_TBAR: asm_tbar(as, ir); break;
+ case IR_OBAR: asm_obar(as, ir); break;
+
+ /* Type conversions. */
+ case IR_CONV: asm_conv(as, ir); break;
+ case IR_TOSTR: asm_tostr(as, ir); break;
+ case IR_STRTO: asm_strto(as, ir); break;
+
+ /* Calls. */
+ case IR_CALLA:
+ as->gcsteps++;
+ /* fallthrough */
+ case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
+ case IR_CALLXS: asm_callx(as, ir); break;
+ case IR_CARG: break;
+
+ default:
+ setintV(&as->J->errinfo, ir->o);
+ lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
+ break;
+ }
+}
+
/* -- Head of trace ------------------------------------------------------- */
/* Head of a root trace. */
@@ -1373,8 +1896,7 @@ static void asm_head_side(ASMState *as)
if (as->snapno && as->topslot > as->parent->topslot) {
/* Force snap #0 alloc to prevent register overwrite in stack check. */
- as->snapno = 0;
- asm_snap_alloc(as);
+ asm_snap_alloc(as, 0);
}
allow = asm_head_side_base(as, irp, allow);
@@ -1382,8 +1904,10 @@ static void asm_head_side(ASMState *as)
for (i = as->stopins; i > REF_BASE; i--) {
IRIns *ir = IR(i);
RegSP rs;
- lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
- (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL);
+ lj_assertA((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) ||
+ (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL,
+ "IR %04d has bad parent op %d",
+ (int)(ir - as->ir) - REF_BIAS, ir->o);
rs = as->parentmap[i - REF_FIRST];
if (ra_hasreg(ir->r)) {
rset_clear(allow, ir->r);
@@ -1535,7 +2059,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe)
SnapEntry sn = map[n-1];
if ((sn & SNAP_FRAME)) {
*gotframe = 1;
- return snap_slot(sn);
+ return snap_slot(sn) - LJ_FR2;
}
}
return 0;
@@ -1555,19 +2079,23 @@ static void asm_tail_link(ASMState *as)
if (as->T->link == 0) {
/* Setup fixed registers for exit to interpreter. */
- const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]);
+ const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]);
int32_t mres;
if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */
BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins;
if (bc_isret(bc_op(*retpc)))
pc = retpc;
}
+#if LJ_GC64
+ emit_loadu64(as, RID_LPC, u64ptr(pc));
+#else
ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH);
ra_allockreg(as, i32ptr(pc), RID_LPC);
- mres = (int32_t)(snap->nslots - baseslot);
+#endif
+ mres = (int32_t)(snap->nslots - baseslot - LJ_FR2);
switch (bc_op(*pc)) {
case BC_CALLM: case BC_CALLMT:
- mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break;
+ mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break;
case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break;
case BC_TSETM: mres -= (int32_t)bc_a(*pc); break;
default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break;
@@ -1579,6 +2107,11 @@ static void asm_tail_link(ASMState *as)
}
emit_addptr(as, RID_BASE, 8*(int32_t)baseslot);
+ if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */
+ setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal));
+ IR(as->J->ktrace)->o = IR_KGC;
+ }
+
/* Sync the interpreter state with the on-trace state. */
asm_stack_restore(as, snap);
@@ -1602,22 +2135,32 @@ static void asm_setup_regsp(ASMState *as)
#endif
ra_setup(as);
+#if LJ_TARGET_ARM64
+ ra_setkref(as, RID_GL, (intptr_t)J2G(as->J));
+#endif
/* Clear reg/sp for constants. */
- for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++)
+ for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) {
ir->prev = REGSP_INIT;
+ if (irt_is64(ir->t) && ir->o != IR_KNULL) {
+#if LJ_GC64
+ /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
+ ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */
+#else
+ /* Make life easier for backends by putting address of constant in i. */
+ ir->i = (int32_t)(intptr_t)(ir+1);
+#endif
+ ir++;
+ }
+ }
/* REF_BASE is used for implicit references to the BASE register. */
lastir->prev = REGSP_HINT(RID_BASE);
- ir = IR(nins-1);
- if (ir->o == IR_RENAME) {
- do { ir--; nins--; } while (ir->o == IR_RENAME);
- T->nins = nins; /* Remove any renames left over from ASM restart. */
- }
as->snaprename = nins;
as->snapref = nins;
as->snapno = T->nsnap;
+ as->snapalloc = 0;
as->stopins = REF_BASE;
as->orignins = nins;
@@ -1627,7 +2170,7 @@ static void asm_setup_regsp(ASMState *as)
ir = IR(REF_FIRST);
if (as->parent) {
uint16_t *p;
- lastir = lj_snap_regspmap(as->parent, as->J->exitno, ir);
+ lastir = lj_snap_regspmap(as->J, as->parent, as->J->exitno, ir);
if (lastir - ir > LJ_MAX_JSLOTS)
lj_trace_err(as->J, LJ_TRERR_NYICOAL);
as->stopins = (IRRef)((lastir-1) - as->ir);
@@ -1666,6 +2209,10 @@ static void asm_setup_regsp(ASMState *as)
ir->prev = (uint16_t)REGSP_HINT((rload & 15));
rload = lj_ror(rload, 4);
continue;
+ case IR_TMPREF:
+ if ((ir->op2 & IRTMPREF_OUT2) && as->evenspill < 4)
+ as->evenspill = 4; /* TMPREF OUT2 needs two TValues on the stack. */
+ break;
#endif
case IR_CALLXS: {
CCallInfo ci;
@@ -1675,7 +2222,17 @@ static void asm_setup_regsp(ASMState *as)
as->modset |= RSET_SCRATCH;
continue;
}
- case IR_CALLN: case IR_CALLL: case IR_CALLS: {
+ case IR_CALLL:
+ /* lj_vm_next needs two TValues on the stack. */
+#if LJ_TARGET_X64 && LJ_ABI_WIN
+ if (ir->op2 == IRCALL_lj_vm_next && as->evenspill < SPS_FIRST + 4)
+ as->evenspill = SPS_FIRST + 4;
+#else
+ if (SPS_FIRST < 4 && ir->op2 == IRCALL_lj_vm_next && as->evenspill < 4)
+ as->evenspill = 4;
+#endif
+ /* fallthrough */
+ case IR_CALLN: case IR_CALLA: case IR_CALLS: {
const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
ir->prev = asm_setup_call_slots(as, ir, ci);
if (inloop)
@@ -1683,7 +2240,6 @@ static void asm_setup_regsp(ASMState *as)
(RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
continue;
}
-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
case IR_HIOP:
switch ((ir-1)->o) {
#if LJ_SOFTFP && LJ_TARGET_ARM
@@ -1694,15 +2250,15 @@ static void asm_setup_regsp(ASMState *as)
}
break;
#endif
-#if !LJ_SOFTFP && LJ_NEED_FP64
+#if !LJ_SOFTFP && LJ_NEED_FP64 && LJ_32 && LJ_HASFFI
case IR_CONV:
if (irt_isfp((ir-1)->t)) {
ir->prev = REGSP_HINT(RID_FPRET);
continue;
}
- /* fallthrough */
#endif
- case IR_CALLN: case IR_CALLXS:
+ /* fallthrough */
+ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
#if LJ_SOFTFP
case IR_MIN: case IR_MAX:
#endif
@@ -1713,18 +2269,29 @@ static void asm_setup_regsp(ASMState *as)
break;
}
break;
-#endif
#if LJ_SOFTFP
case IR_MIN: case IR_MAX:
if ((ir+1)->o != IR_HIOP) break;
#endif
/* fallthrough */
/* C calls evict all scratch regs and return results in RID_RET. */
- case IR_SNEW: case IR_XSNEW: case IR_NEWREF:
+ case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT:
if (REGARG_NUMGPR < 3 && as->evenspill < 3)
as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */
+#if LJ_TARGET_X86 && LJ_HASFFI
+ if (0) {
+ case IR_CNEW:
+ if (ir->op2 != REF_NIL && as->evenspill < 4)
+ as->evenspill = 4; /* lj_cdata_newv needs 4 args. */
+ }
/* fallthrough */
- case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR:
+#else
+ /* fallthrough */
+ case IR_CNEW:
+#endif
+ /* fallthrough */
+ case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR:
+ case IR_BUFSTR:
ir->prev = REGSP_HINT(RID_RET);
if (inloop)
as->modset = RSET_SCRATCH;
@@ -1733,58 +2300,73 @@ static void asm_setup_regsp(ASMState *as)
if (inloop)
as->modset = RSET_SCRATCH;
break;
-#if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP
- case IR_ATAN2: case IR_LDEXP:
+#if !LJ_SOFTFP
+#if !LJ_TARGET_X86ORX64
+ case IR_LDEXP:
#endif
+#endif
+ /* fallthrough */
case IR_POW:
if (!LJ_SOFTFP && irt_isnum(ir->t)) {
-#if LJ_TARGET_X86ORX64
- ir->prev = REGSP_HINT(RID_XMM0);
if (inloop)
- as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX);
+ as->modset |= RSET_SCRATCH;
+#if LJ_TARGET_X86
+ if (irt_isnum(IR(ir->op2)->t)) {
+ if (as->evenspill < 4) /* Leave room to call pow(). */
+ as->evenspill = 4;
+ }
+ break;
#else
ir->prev = REGSP_HINT(RID_FPRET);
- if (inloop)
- as->modset |= RSET_SCRATCH;
-#endif
continue;
+#endif
}
/* fallthrough */ /* for integer POW */
case IR_DIV: case IR_MOD:
- if (!irt_isnum(ir->t)) {
+ if ((LJ_64 && LJ_SOFTFP) || !irt_isnum(ir->t)) {
ir->prev = REGSP_HINT(RID_RET);
if (inloop)
as->modset |= (RSET_SCRATCH & RSET_GPR);
continue;
}
break;
- case IR_FPMATH:
-#if LJ_TARGET_X86ORX64
- if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */
- ir->prev = REGSP_HINT(RID_XMM0);
-#if !LJ_64
- if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */
- as->evenspill = 4;
-#endif
- if (inloop)
- as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
- continue;
- } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) {
- ir->prev = REGSP_HINT(RID_XMM0);
+#if LJ_64 && LJ_SOFTFP
+ case IR_ADD: case IR_SUB: case IR_MUL:
+ if (irt_isnum(ir->t)) {
+ ir->prev = REGSP_HINT(RID_RET);
if (inloop)
- as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
+ as->modset |= (RSET_SCRATCH & RSET_GPR);
continue;
}
break;
-#else
- ir->prev = REGSP_HINT(RID_FPRET);
+#endif
+ case IR_FPMATH:
+#if LJ_TARGET_X86ORX64
+ if (ir->op2 <= IRFPM_TRUNC) {
+ if (!(as->flags & JIT_F_SSE4_1)) {
+ ir->prev = REGSP_HINT(RID_XMM0);
+ if (inloop)
+ as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX);
+ continue;
+ }
+ break;
+ }
+#endif
if (inloop)
as->modset |= RSET_SCRATCH;
+#if LJ_TARGET_X86
+ break;
+#else
+ ir->prev = REGSP_HINT(RID_FPRET);
continue;
#endif
#if LJ_TARGET_X86ORX64
/* Non-constant shift counts need to be in RID_ECX on x86/x64. */
- case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
+ case IR_BSHL: case IR_BSHR: case IR_BSAR:
+ if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */
+ break;
+ /* fallthrough */
+ case IR_BROL: case IR_BROR:
if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) {
IR(ir->op2)->r = REGSP_HINT(RID_ECX);
if (inloop)
@@ -1828,16 +2410,26 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
{
ASMState as_;
ASMState *as = &as_;
- MCode *origtop;
+
+ /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */
+ {
+ IRRef nins = T->nins;
+ IRIns *ir = &T->ir[nins-1];
+ if (ir->o == IR_NOP || ir->o == IR_RENAME) {
+ do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME);
+ T->nins = nins;
+ }
+ }
/* Ensure an initialized instruction beyond the last one for HIOP checks. */
- J->cur.nins = lj_ir_nextins(J);
- lj_ir_nop(&J->cur.ir[J->cur.nins]);
+ /* This also allows one RENAME to be added without reallocating curfinal. */
+ as->orignins = lj_ir_nextins(J);
+ lj_ir_nop(&J->cur.ir[as->orignins]);
/* Setup initial state. Copy some fields to reduce indirections. */
as->J = J;
as->T = T;
- as->ir = T->ir;
+ J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */
as->flags = J->flags;
as->loopref = J->loopref;
as->realign = NULL;
@@ -1845,17 +2437,46 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
as->parent = J->parent ? traceref(J, J->parent) : NULL;
/* Reserve MCode memory. */
- as->mctop = origtop = lj_mcode_reserve(J, &as->mcbot);
+ as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot);
as->mcp = as->mctop;
as->mclim = as->mcbot + MCLIM_REDZONE;
asm_setup_target(as);
- do {
+ /*
+ ** This is a loop, because the MCode may have to be (re-)assembled
+ ** multiple times:
+ **
+ ** 1. as->realign is set (and the assembly aborted), if the arch-specific
+ ** backend wants the MCode to be aligned differently.
+ **
+ ** This is currently only the case on x86/x64, where small loops get
+ ** an aligned loop body plus a short branch. Not much effort is wasted,
+ ** because the abort happens very quickly and only once.
+ **
+ ** 2. The IR is immovable, since the MCode embeds pointers to various
+ ** constants inside the IR. But RENAMEs may need to be added to the IR
+ ** during assembly, which might grow and reallocate the IR. We check
+ ** at the end if the IR (in J->cur.ir) has actually grown, resize the
+ ** copy (in J->curfinal.ir) and try again.
+ **
+ ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have
+ ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to
+ ** always have one spare slot in the IR (see above), which means we
+ ** have to redo the assembly for only ~2% of all traces.
+ **
+ ** Very, very rarely, this needs to be done repeatedly, since the
+ ** location of constants inside the IR (actually, reachability from
+ ** a global pointer) may affect register allocation and thus the
+ ** number of RENAMEs.
+ */
+ for (;;) {
as->mcp = as->mctop;
#ifdef LUA_USE_ASSERT
as->mcp_prev = as->mcp;
#endif
- as->curins = T->nins;
+ as->ir = J->curfinal->ir; /* Use the copied IR. */
+ as->curins = J->cur.nins = as->orignins;
+
RA_DBG_START();
RA_DBGX((as, "===== STOP ====="));
@@ -1874,7 +2495,11 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
/* Assemble a trace in linear backwards order. */
for (as->curins--; as->curins > as->stopins; as->curins--) {
IRIns *ir = IR(as->curins);
- lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */
+ /* 64 bit types handled by SPLIT for 32 bit archs. */
+ lj_assertA(!(LJ_32 && irt_isint64(ir->t)),
+ "IR %04d has unsplit 64 bit type",
+ (int)(ir - as->ir) - REF_BIAS);
+ asm_snap_prev(as);
if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE))
continue; /* Dead-code elimination can be soooo easy. */
if (irt_isguard(ir->t))
@@ -1883,22 +2508,43 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
checkmclim(as);
asm_ir(as, ir);
}
- } while (as->realign); /* Retry in case the MCode needs to be realigned. */
- /* Emit head of trace. */
- RA_DBG_REF();
- checkmclim(as);
- if (as->gcsteps > 0) {
- as->curins = as->T->snap[0].ref;
- asm_snap_prep(as); /* The GC check is a guard. */
- asm_gc_check(as);
+ if (as->realign && J->curfinal->nins >= T->nins)
+ continue; /* Retry in case only the MCode needs to be realigned. */
+
+ /* Emit head of trace. */
+ RA_DBG_REF();
+ checkmclim(as);
+ if (as->gcsteps > 0) {
+ as->curins = as->T->snap[0].ref;
+ asm_snap_prep(as); /* The GC check is a guard. */
+ asm_gc_check(as);
+ as->curins = as->stopins;
+ }
+ ra_evictk(as);
+ if (as->parent)
+ asm_head_side(as);
+ else
+ asm_head_root(as);
+ asm_phi_fixup(as);
+
+ if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */
+ lj_assertA(J->curfinal->nk == T->nk, "unexpected IR constant growth");
+ memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins,
+ (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */
+ T->nins = J->curfinal->nins;
+ /* Fill mcofs of any unprocessed snapshots. */
+ as->curins = REF_FIRST;
+ asm_snap_prev(as);
+ break; /* Done. */
+ }
+
+ /* Otherwise try again with a bigger IR. */
+ lj_trace_free(J2G(J), J->curfinal);
+ J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */
+ J->curfinal = lj_trace_alloc(J->L, T);
+ as->realign = NULL;
}
- ra_evictk(as);
- if (as->parent)
- asm_head_side(as);
- else
- asm_head_root(as);
- asm_phi_fixup(as);
RA_DBGX((as, "===== START ===="));
RA_DBG_FLUSH();
@@ -1908,10 +2554,16 @@ void lj_asm_trace(jit_State *J, GCtrace *T)
/* Set trace entry point before fixing up tail to allow link to self. */
T->mcode = as->mcp;
T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0;
- if (!as->loopref)
+ if (as->loopref)
+ asm_loop_tail_fixup(as);
+ else
asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */
T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp);
- lj_mcode_sync(T->mcode, origtop);
+ asm_snap_fixup_mcofs(as);
+#if LJ_TARGET_MCODE_FIXUP
+ asm_mcode_fixup(T->mcode, T->szmcode);
+#endif
+ lj_mcode_sync(T->mcode, as->mctoporig);
}
#undef IR
diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h
index 262fa59e..326330f4 100644
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -41,7 +41,7 @@ static Reg ra_scratchpair(ASMState *as, RegSet allow)
}
}
}
- lua_assert(rset_test(RSET_GPREVEN, r));
+ lj_assertA(rset_test(RSET_GPREVEN, r), "odd reg %d", r);
ra_modified(as, r);
ra_modified(as, r+1);
RA_DBGX((as, "scratchpair $r $r", r, r+1));
@@ -185,6 +185,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
*ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */
return ra_allock(as, (ofs & ~255), allow);
}
+ } else if (ir->o == IR_TMPREF) {
+ *ofsp = 0;
+ return RID_SP;
}
}
*ofsp = 0;
@@ -269,7 +272,7 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref,
return;
}
} else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) {
- lua_assert(ofs == 0);
+ lj_assertA(ofs == 0, "bad usage");
ofs = (int32_t)sizeof(GCstr);
if (irref_isk(ir->op2)) {
ofs += IR(ir->op2)->i;
@@ -338,7 +341,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air)
/* Generate a call to a C function. */
static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
{
- uint32_t n, nargs = CCI_NARGS(ci);
+ uint32_t n, nargs = CCI_XNARGS(ci);
int32_t ofs = 0;
#if LJ_SOFTFP
Reg gpr = REGARG_FIRSTGPR;
@@ -389,9 +392,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u;
if (gpr <= REGARG_LASTGPR) {
- lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */
+ lj_assertA(rset_test(as->freeset, gpr),
+ "reg %d not free", gpr); /* Must have been evicted. */
if (irt_isnum(ir->t)) {
- lua_assert(rset_test(as->freeset, gpr+1)); /* Ditto. */
+ lj_assertA(rset_test(as->freeset, gpr+1),
+ "reg %d not free", gpr+1); /* Ditto. */
emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15));
gpr += 2;
} else {
@@ -408,7 +413,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
#endif
{
if (gpr <= REGARG_LASTGPR) {
- lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */
+ lj_assertA(rset_test(as->freeset, gpr),
+ "reg %d not free", gpr); /* Must have been evicted. */
if (ref) ra_leftov(as, gpr, ref);
gpr++;
} else {
@@ -433,7 +439,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
ra_evictset(as, drop); /* Evictions must be performed first. */
if (ra_used(ir)) {
- lua_assert(!irt_ispri(ir->t));
+ lj_assertA(!irt_ispri(ir->t), "PRI dest");
if (!LJ_SOFTFP && irt_isfp(ir->t)) {
if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) {
Reg dest = (ra_dest(as, ir, RSET_FPR) & 15);
@@ -453,15 +459,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
UNUSED(ci);
}
-static void asm_call(ASMState *as, IRIns *ir)
-{
- IRRef args[CCI_NARGS_MAX];
- const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
- asm_collectargs(as, ir, ci, args);
- asm_setupresult(as, ir, ci);
- asm_gencall(as, ci, args);
-}
-
static void asm_callx(ASMState *as, IRIns *ir)
{
IRRef args[CCI_NARGS_MAX*2];
@@ -490,7 +487,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
{
Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
void *pc = ir_kptr(IR(ir->op2));
- int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
as->topslot -= (BCReg)delta;
if ((int32_t)as->topslot < 0) as->topslot = 0;
irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -504,6 +501,30 @@ static void asm_retf(ASMState *as, IRIns *ir)
emit_lso(as, ARMI_LDR, RID_TMP, base, -4);
}
+/* -- Buffer operations --------------------------------------------------- */
+
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb)
+{
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+ IRIns irgc;
+ int32_t addr = i32ptr((void *)&J2G(as->J)->cur_L);
+ irgc.ot = IRT(0, IRT_PGC); /* GC type. */
+ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
+ if ((as->flags & JIT_F_ARMV6T2)) {
+ emit_dnm(as, ARMI_BFI, RID_TMP, lj_fls(SBUF_MASK_FLAG), tmp);
+ } else {
+ emit_dnm(as, ARMI_ORR, RID_TMP, RID_TMP, tmp);
+ emit_dn(as, ARMI_AND|ARMI_K12|SBUF_MASK_FLAG, tmp, tmp);
+ }
+ emit_lso(as, ARMI_LDR, RID_TMP,
+ ra_allock(as, (addr & ~4095),
+ rset_exclude(rset_exclude(RSET_GPR, sb), tmp)),
+ (addr & 4095));
+ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+}
+#endif
+
/* -- Type conversions ---------------------------------------------------- */
#if !LJ_SOFTFP
@@ -539,13 +560,17 @@ static void asm_conv(ASMState *as, IRIns *ir)
#endif
IRRef lref = ir->op1;
/* 64 bit integer conversions are handled by SPLIT. */
- lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64));
+ lj_assertA(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64),
+ "IR %04d has unsplit 64 bit type",
+ (int)(ir - as->ir) - REF_BIAS);
#if LJ_SOFTFP
/* FP conversions are handled by SPLIT. */
- lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));
+ lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT),
+ "IR %04d has FP type",
+ (int)(ir - as->ir) - REF_BIAS);
/* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
#else
- lua_assert(irt_type(ir->t) != st);
+ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
if (irt_isfp(ir->t)) {
Reg dest = ra_dest(as, ir, RSET_FPR);
if (stfp) { /* FP to FP conversion. */
@@ -562,7 +587,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
} else if (stfp) { /* FP to integer conversion. */
if (irt_isguard(ir->t)) {
/* Checked conversions are only supported from number to int. */
- lua_assert(irt_isint(ir->t) && st == IRT_NUM);
+ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
+ "bad type for checked CONV");
asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
} else {
Reg left = ra_alloc1(as, lref, RSET_FPR);
@@ -581,7 +607,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
Reg dest = ra_dest(as, ir, RSET_GPR);
if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
Reg left = ra_alloc1(as, lref, RSET_GPR);
- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
+ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
if ((as->flags & JIT_F_ARMV6)) {
ARMIns ai = st == IRT_I8 ? ARMI_SXTB :
st == IRT_U8 ? ARMI_UXTB :
@@ -601,31 +627,6 @@ static void asm_conv(ASMState *as, IRIns *ir)
}
}
-#if !LJ_SOFTFP && LJ_HASFFI
-static void asm_conv64(ASMState *as, IRIns *ir)
-{
- IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
- IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
- IRCallID id;
- CCallInfo ci;
- IRRef args[2];
- args[0] = (ir-1)->op1;
- args[1] = ir->op1;
- if (st == IRT_NUM || st == IRT_FLOAT) {
- id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
- ir--;
- } else {
- id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
- }
- ci = lj_ir_callinfo[id];
-#if !LJ_ABI_SOFTFP
- ci.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */
-#endif
- asm_setupresult(as, ir, &ci);
- asm_gencall(as, &ci, args);
-}
-#endif
-
static void asm_strto(ASMState *as, IRIns *ir)
{
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
@@ -689,60 +690,61 @@ static void asm_strto(ASMState *as, IRIns *ir)
emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR);
}
+/* -- Memory references --------------------------------------------------- */
+
/* Get pointer to TValue. */
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
{
- IRIns *ir = IR(ref);
- if (irt_isnum(ir->t)) {
- if (irref_isk(ref)) {
- /* Use the number constant itself as a TValue. */
- ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
- } else {
+ if ((mode & IRTMPREF_IN1)) {
+ IRIns *ir = IR(ref);
+ if (irt_isnum(ir->t)) {
+ if ((mode & IRTMPREF_OUT1)) {
#if LJ_SOFTFP
- lua_assert(0);
+ lj_assertA(irref_isk(ref), "unsplit FP op");
+ emit_dm(as, ARMI_MOV, dest, RID_SP);
+ emit_lso(as, ARMI_STR,
+ ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
+ RID_SP, 0);
+ emit_lso(as, ARMI_STR,
+ ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
+ RID_SP, 4);
#else
- /* Otherwise force a spill and use the spill slot. */
- emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
+ Reg src = ra_alloc1(as, ref, RSET_FPR);
+ emit_dm(as, ARMI_MOV, dest, RID_SP);
+ emit_vlso(as, ARMI_VSTR_D, src, RID_SP, 0);
#endif
+ } else if (irref_isk(ref)) {
+ /* Use the number constant itself as a TValue. */
+ ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
+ } else {
+#if LJ_SOFTFP
+ lj_assertA(0, "unsplit FP op");
+#else
+ /* Otherwise force a spill and use the spill slot. */
+ emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
+#endif
+ }
+ } else {
+ /* Otherwise use [sp] and [sp+4] to hold the TValue.
+ ** This assumes the following call has max. 4 args.
+ */
+ Reg type;
+ emit_dm(as, ARMI_MOV, dest, RID_SP);
+ if (!irt_ispri(ir->t)) {
+ Reg src = ra_alloc1(as, ref, RSET_GPR);
+ emit_lso(as, ARMI_STR, src, RID_SP, 0);
+ }
+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
+ type = ra_alloc1(as, ref+1, RSET_GPR);
+ else
+ type = ra_allock(as, irt_toitype(ir->t), RSET_GPR);
+ emit_lso(as, ARMI_STR, type, RID_SP, 4);
}
} else {
- /* Otherwise use [sp] and [sp+4] to hold the TValue. */
- RegSet allow = rset_exclude(RSET_GPR, dest);
- Reg type;
emit_dm(as, ARMI_MOV, dest, RID_SP);
- if (!irt_ispri(ir->t)) {
- Reg src = ra_alloc1(as, ref, allow);
- emit_lso(as, ARMI_STR, src, RID_SP, 0);
- }
- if ((ir+1)->o == IR_HIOP)
- type = ra_alloc1(as, ref+1, allow);
- else
- type = ra_allock(as, irt_toitype(ir->t), allow);
- emit_lso(as, ARMI_STR, type, RID_SP, 4);
}
}
-static void asm_tostr(ASMState *as, IRIns *ir)
-{
- IRRef args[2];
- args[0] = ASMREF_L;
- as->gcsteps++;
- if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
- args[1] = ASMREF_TMP1; /* const lua_Number * */
- asm_setupresult(as, ir, ci); /* GCstr * */
- asm_gencall(as, ci, args);
- asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
- } else {
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
- args[1] = ir->op1; /* int32_t k */
- asm_setupresult(as, ir, ci); /* GCstr * */
- asm_gencall(as, ci, args);
- }
-}
-
-/* -- Memory references --------------------------------------------------- */
-
static void asm_aref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -864,16 +866,16 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
*l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu);
/* Load main position relative to tab->node into dest. */
- khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
+ khash = irref_isk(refkey) ? ir_khash(as, irkey) : 1;
if (khash == 0) {
emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
} else {
emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp);
emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp);
- if (irt_isstr(kt)) { /* Fetch of str->hash is cheaper than ra_allock. */
+ if (irt_isstr(kt)) { /* Fetch of str->sid is cheaper than ra_allock. */
emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP);
emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node));
- emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, hash));
+ emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, sid));
emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask));
} else if (irref_isk(refkey)) {
emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash,
@@ -920,7 +922,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
Reg key = RID_NONE, type = RID_TMP, idx = node;
RegSet allow = rset_exclude(RSET_GPR, node);
- lua_assert(ofs % sizeof(Node) == 0);
+ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
if (ofs > 4095) {
idx = dest;
rset_clear(allow, dest);
@@ -960,20 +962,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR);
}
-static void asm_newref(ASMState *as, IRIns *ir)
-{
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
- IRRef args[3];
- if (ir->r == RID_SINK)
- return;
- args[0] = ASMREF_L; /* lua_State *L */
- args[1] = ir->op1; /* GCtab *t */
- args[2] = ASMREF_TMP1; /* cTValue *key */
- asm_setupresult(as, ir, ci); /* TValue * */
- asm_gencall(as, ci, args);
- asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
-}
-
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1001,7 +989,7 @@ static void asm_uref(ASMState *as, IRIns *ir)
static void asm_fref(ASMState *as, IRIns *ir)
{
UNUSED(as); UNUSED(ir);
- lua_assert(!ra_used(ir));
+ lj_assertA(!ra_used(ir), "unfused FREF");
}
static void asm_strref(ASMState *as, IRIns *ir)
@@ -1038,25 +1026,27 @@ static void asm_strref(ASMState *as, IRIns *ir)
/* -- Loads and stores ---------------------------------------------------- */
-static ARMIns asm_fxloadins(IRIns *ir)
+static ARMIns asm_fxloadins(ASMState *as, IRIns *ir)
{
+ UNUSED(as);
switch (irt_type(ir->t)) {
case IRT_I8: return ARMI_LDRSB;
case IRT_U8: return ARMI_LDRB;
case IRT_I16: return ARMI_LDRSH;
case IRT_U16: return ARMI_LDRH;
- case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D;
+ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VLDR_D;
case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */
default: return ARMI_LDR;
}
}
-static ARMIns asm_fxstoreins(IRIns *ir)
+static ARMIns asm_fxstoreins(ASMState *as, IRIns *ir)
{
+ UNUSED(as);
switch (irt_type(ir->t)) {
case IRT_I8: case IRT_U8: return ARMI_STRB;
case IRT_I16: case IRT_U16: return ARMI_STRH;
- case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D;
+ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VSTR_D;
case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */
default: return ARMI_STR;
}
@@ -1065,17 +1055,23 @@ static ARMIns asm_fxstoreins(IRIns *ir)
static void asm_fload(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
- Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
- ARMIns ai = asm_fxloadins(ir);
+ ARMIns ai = asm_fxloadins(as, ir);
+ Reg idx;
int32_t ofs;
- if (ir->op2 == IRFL_TAB_ARRAY) {
- ofs = asm_fuseabase(as, ir->op1);
- if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
- emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
- return;
+ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
+ idx = ra_allock(as, (int32_t)(ir->op2<<2) + (int32_t)J2GG(as->J), RSET_GPR);
+ ofs = 0;
+ } else {
+ idx = ra_alloc1(as, ir->op1, RSET_GPR);
+ if (ir->op2 == IRFL_TAB_ARRAY) {
+ ofs = asm_fuseabase(as, ir->op1);
+ if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
+ emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx);
+ return;
+ }
}
+ ofs = field_ofs[ir->op2];
}
- ofs = field_ofs[ir->op2];
if ((ai & 0x04000000))
emit_lso(as, ai, dest, idx, ofs);
else
@@ -1089,7 +1085,7 @@ static void asm_fstore(ASMState *as, IRIns *ir)
IRIns *irf = IR(ir->op1);
Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
int32_t ofs = field_ofs[irf->op2];
- ARMIns ai = asm_fxstoreins(ir);
+ ARMIns ai = asm_fxstoreins(as, ir);
if ((ai & 0x04000000))
emit_lso(as, ai, src, idx, ofs);
else
@@ -1101,20 +1097,22 @@ static void asm_xload(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir,
(!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
- lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
+ lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD");
+ asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
}
-static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
{
if (ir->r != RID_SINK) {
Reg src = ra_alloc1(as, ir->op2,
(!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
- asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
+ asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1,
rset_exclude(RSET_GPR, src), ofs);
}
}
+#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
+
static void asm_ahuvload(ASMState *as, IRIns *ir)
{
int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
@@ -1127,13 +1125,15 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
rset_clear(allow, type);
}
if (ra_used(ir)) {
- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
- irt_isint(ir->t) || irt_isaddr(ir->t));
+ lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
+ irt_isint(ir->t) || irt_isaddr(ir->t),
+ "bad load type %d", irt_type(ir->t));
dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
rset_clear(allow, dest);
}
idx = asm_fuseahuref(as, ir->op1, &ofs, allow,
(!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096);
+ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
if (!hiop || type == RID_NONE) {
rset_clear(allow, idx);
if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 &&
@@ -1194,10 +1194,13 @@ static void asm_sload(ASMState *as, IRIns *ir)
IRType t = hiop ? IRT_NUM : irt_type(ir->t);
Reg dest = RID_NONE, type = RID_NONE, base;
RegSet allow = RSET_GPR;
- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
- lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
+ lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
+ "bad parent SLOAD"); /* Handled by asm_head_side(). */
+ lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
+ "inconsistent SLOAD variant");
#if LJ_SOFTFP
- lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */
+ lj_assertA(!(ir->op2 & IRSLOAD_CONVERT),
+ "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */
if (hiop && ra_used(ir+1)) {
type = ra_dest(as, ir+1, allow);
rset_clear(allow, type);
@@ -1213,8 +1216,9 @@ static void asm_sload(ASMState *as, IRIns *ir)
Reg tmp = RID_NONE;
if ((ir->op2 & IRSLOAD_CONVERT))
tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR);
- lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
- irt_isint(ir->t) || irt_isaddr(ir->t));
+ lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
+ irt_isint(ir->t) || irt_isaddr(ir->t),
+ "bad SLOAD type %d", irt_type(ir->t));
dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow);
rset_clear(allow, dest);
base = ra_alloc1(as, REF_BASE, allow);
@@ -1246,7 +1250,12 @@ dotypecheck:
}
}
asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE);
- emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type);
+ if ((ir->op2 & IRSLOAD_KEYINDEX)) {
+ emit_n(as, ARMI_CMN|ARMI_K12|1, type);
+ emit_dn(as, ARMI_EOR^emit_isk12(ARMI_EOR, ~LJ_KEYINDEX), type, type);
+ } else {
+ emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type);
+ }
}
if (ra_hasreg(dest)) {
#if !LJ_SOFTFP
@@ -1272,19 +1281,17 @@ dotypecheck:
static void asm_cnew(ASMState *as, IRIns *ir)
{
CTState *cts = ctype_ctsG(J2G(as->J));
- CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
- CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
- lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+ CTypeID id = (CTypeID)IR(ir->op1)->i;
+ CTSize sz;
+ CTInfo info = lj_ctype_info(cts, id, &sz);
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
- IRRef args[2];
+ IRRef args[4];
RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
RegSet drop = RSET_SCRATCH;
- lua_assert(sz != CTSIZE_INVALID);
+ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
+ "bad CNEW/CNEWI operands");
- args[0] = ASMREF_L; /* lua_State *L */
- args[1] = ASMREF_TMP1; /* MSize size */
as->gcsteps++;
-
if (ra_hasreg(ir->r))
rset_clear(drop, ir->r); /* Dest reg handled below. */
ra_evictset(as, drop);
@@ -1294,10 +1301,10 @@ static void asm_cnew(ASMState *as, IRIns *ir)
/* Initialize immutable cdata object. */
if (ir->o == IR_CNEWI) {
int32_t ofs = sizeof(GCcdata);
- lua_assert(sz == 4 || sz == 8);
+ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
if (sz == 8) {
ofs += 4; ir++;
- lua_assert(ir->o == IR_HIOP);
+ lj_assertA(ir->o == IR_HIOP, "expected HIOP for CNEWI");
}
for (;;) {
Reg r = ra_alloc1(as, ir->op2, allow);
@@ -1306,22 +1313,32 @@ static void asm_cnew(ASMState *as, IRIns *ir)
if (ofs == sizeof(GCcdata)) break;
ofs -= 4; ir--;
}
+ } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
+ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ir->op1; /* CTypeID id */
+ args[2] = ir->op2; /* CTSize sz */
+ args[3] = ASMREF_TMP1; /* CTSize align */
+ asm_gencall(as, ci, args);
+ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+ return;
}
+
/* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
{
- uint32_t k = emit_isk12(ARMI_MOV, ctypeid);
- Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow);
+ uint32_t k = emit_isk12(ARMI_MOV, id);
+ Reg r = k ? RID_R1 : ra_allock(as, id, allow);
emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP);
if (k) emit_d(as, ARMI_MOV^k, RID_R1);
}
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ASMREF_TMP1; /* MSize size */
asm_gencall(as, ci, args);
ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
ra_releasetmp(as, ASMREF_TMP1));
}
-#else
-#define asm_cnew(as, ir) ((void)0)
#endif
/* -- Write barriers ------------------------------------------------------ */
@@ -1353,7 +1370,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
MCLabel l_end;
Reg obj, val, tmp;
/* No need for other object barriers (yet). */
- lua_assert(IR(ir->op1)->o == IR_UREFC);
+ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
ra_evictset(as, RSET_SCRATCH);
l_end = emit_label(as);
args[0] = ASMREF_TMP1; /* global_State *g */
@@ -1392,23 +1409,36 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai)
emit_dm(as, ai, (dest & 15), (left & 15));
}
-static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
-{
- IRIns *irp = IR(ir->op1);
- if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
- IRIns *irpp = IR(irp->op1);
- if (irpp == ir-2 && irpp->o == IR_FPMATH &&
- irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
- IRRef args[2];
- args[0] = irpp->op1;
- args[1] = irp->op2;
- asm_setupresult(as, ir, ci);
- asm_gencall(as, ci, args);
- return 1;
- }
- }
- return 0;
+static void asm_callround(ASMState *as, IRIns *ir, int id)
+{
+ /* The modified regs must match with the *.dasc implementation. */
+ RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
+ RID2RSET(RID_R3)|RID2RSET(RID_R12);
+ RegSet of;
+ Reg dest, src;
+ ra_evictset(as, drop);
+ dest = ra_dest(as, ir, RSET_FPR);
+ emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
+ emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
+ id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
+ (void *)lj_vm_trunc_sf);
+ /* Workaround to protect argument GPRs from being used for remat. */
+ of = as->freeset;
+ as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
+ as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
+ src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
+ as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
+ emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
+}
+
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+ if (ir->op2 <= IRFPM_TRUNC)
+ asm_callround(as, ir, ir->op2);
+ else if (ir->op2 == IRFPM_SQRT)
+ asm_fpunary(as, ir, ARMI_VSQRT_D);
+ else
+ asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
}
#endif
@@ -1474,19 +1504,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai)
asm_intop(as, ir, asm_drop_cmp0(as, ai));
}
-static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
-{
- ai = asm_drop_cmp0(as, ai);
- if (ir->op2 == 0) {
- Reg dest = ra_dest(as, ir, RSET_GPR);
- uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
- emit_d(as, ai^m, dest);
- } else {
- /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
- asm_intop(as, ir, ai);
- }
-}
-
static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1552,6 +1569,15 @@ static void asm_mul(ASMState *as, IRIns *ir)
asm_intmul(as, ir);
}
+#define asm_addov(as, ir) asm_add(as, ir)
+#define asm_subov(as, ir) asm_sub(as, ir)
+#define asm_mulov(as, ir) asm_mul(as, ir)
+
+#if !LJ_SOFTFP
+#define asm_fpdiv(as, ir) asm_fparith(as, ir, ARMI_VDIV_D)
+#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D)
+#endif
+
static void asm_neg(ASMState *as, IRIns *ir)
{
#if !LJ_SOFTFP
@@ -1563,41 +1589,22 @@ static void asm_neg(ASMState *as, IRIns *ir)
asm_intneg(as, ir, ARMI_RSB);
}
-static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
+static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai)
{
- const CCallInfo *ci = &lj_ir_callinfo[id];
- IRRef args[2];
- args[0] = ir->op1;
- args[1] = ir->op2;
- asm_setupresult(as, ir, ci);
- asm_gencall(as, ci, args);
+ ai = asm_drop_cmp0(as, ai);
+ if (ir->op2 == 0) {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
+ emit_d(as, ai^m, dest);
+ } else {
+ /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */
+ asm_intop(as, ir, ai);
+ }
}
-#if !LJ_SOFTFP
-static void asm_callround(ASMState *as, IRIns *ir, int id)
-{
- /* The modified regs must match with the *.dasc implementation. */
- RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)|
- RID2RSET(RID_R3)|RID2RSET(RID_R12);
- RegSet of;
- Reg dest, src;
- ra_evictset(as, drop);
- dest = ra_dest(as, ir, RSET_FPR);
- emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15));
- emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf :
- id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf :
- (void *)lj_vm_trunc_sf);
- /* Workaround to protect argument GPRs from being used for remat. */
- of = as->freeset;
- as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1);
- as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L);
- src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */
- as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1));
- emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15));
-}
-#endif
+#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN)
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_bswap(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -1614,6 +1621,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
}
}
+#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND)
+#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR)
+#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR)
+
static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
{
if (irref_isk(ir->op2)) { /* Constant shifts. */
@@ -1631,6 +1642,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh)
}
}
+#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL)
+#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR)
+#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR)
+#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR)
+#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
+
static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
{
uint32_t kcmp = 0, kmov = 0;
@@ -1704,6 +1721,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc)
asm_intmin_max(as, ir, cc);
}
+#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_PL)
+#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LE)
+
/* -- Comparisons --------------------------------------------------------- */
/* Map of comparisons to flags. ORDER IR. */
@@ -1777,7 +1797,8 @@ static void asm_intcomp(ASMState *as, IRIns *ir)
Reg left;
uint32_t m;
int cmpprev0 = 0;
- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t));
+ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t),
+ "bad comparison data type %d", irt_type(ir->t));
if (asm_swapops(as, lref, rref)) {
Reg tmp = lref; lref = rref; rref = tmp;
if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */
@@ -1819,6 +1840,18 @@ notst:
as->flagmcp = as->mcp; /* Allow elimination of the compare. */
}
+static void asm_comp(ASMState *as, IRIns *ir)
+{
+#if !LJ_SOFTFP
+ if (irt_isnum(ir->t))
+ asm_fpcomp(as, ir);
+ else
+#endif
+ asm_intcomp(as, ir);
+}
+
+#define asm_equal(as, ir) asm_comp(as, ir)
+
#if LJ_HASFFI
/* 64 bit integer comparisons. */
static void asm_int64comp(ASMState *as, IRIns *ir)
@@ -1857,15 +1890,15 @@ static void asm_int64comp(ASMState *as, IRIns *ir)
}
#endif
-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+/* -- Split register ops -------------------------------------------------- */
-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+/* Hiword op of a split 32/32 bit op. Previous op is the loword op. */
static void asm_hiop(ASMState *as, IRIns *ir)
{
-#if LJ_HASFFI || LJ_SOFTFP
/* HIOP is marked as a store because it needs its own DCE logic. */
int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+#if LJ_HASFFI || LJ_SOFTFP
if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */
as->curins--; /* Always skip the loword comparison. */
#if LJ_SOFTFP
@@ -1882,7 +1915,7 @@ static void asm_hiop(ASMState *as, IRIns *ir)
} else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
as->curins--; /* Always skip the loword min/max. */
if (uselo || usehi)
- asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO);
+ asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE);
return;
#elif LJ_HASFFI
} else if ((ir-1)->o == IR_CONV) {
@@ -1893,9 +1926,10 @@ static void asm_hiop(ASMState *as, IRIns *ir)
#endif
} else if ((ir-1)->o == IR_XSTORE) {
if ((ir-1)->r != RID_SINK)
- asm_xstore(as, ir, 4);
+ asm_xstore_(as, ir, 4);
return;
}
+#endif
if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
switch ((ir-1)->o) {
#if LJ_HASFFI
@@ -1914,6 +1948,9 @@ static void asm_hiop(ASMState *as, IRIns *ir)
asm_intneg(as, ir, ARMI_RSC);
asm_intneg(as, ir-1, ARMI_RSB|ARMI_S);
break;
+ case IR_CNEWI:
+ /* Nothing to do here. Handled by lo op itself. */
+ break;
#endif
#if LJ_SOFTFP
case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
@@ -1921,24 +1958,26 @@ static void asm_hiop(ASMState *as, IRIns *ir)
if (!uselo)
ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
break;
+ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
+ /* Nothing to do here. Handled by lo op itself. */
+ break;
#endif
- case IR_CALLN:
- case IR_CALLS:
- case IR_CALLXS:
+ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
if (!uselo)
ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
break;
-#if LJ_SOFTFP
- case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
-#endif
- case IR_CNEWI:
- /* Nothing to do here. Handled by lo op itself. */
- break;
- default: lua_assert(0); break;
+ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
}
-#else
- UNUSED(as); UNUSED(ir); lua_assert(0);
-#endif
+}
+
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+ UNUSED(ir);
+ asm_guardcc(as, CC_NE);
+ emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP);
+ emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
}
/* -- Stack handling ------------------------------------------------------ */
@@ -1952,7 +1991,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
if (irp) {
if (!ra_hasspill(irp->s)) {
pbase = irp->r;
- lua_assert(ra_hasreg(pbase));
+ lj_assertA(ra_hasreg(pbase), "base reg lost");
} else if (allow) {
pbase = rset_pickbot(allow);
} else {
@@ -1964,13 +2003,13 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
}
emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno));
k = emit_isk12(0, (int32_t)(8*topslot));
- lua_assert(k);
+ lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
emit_n(as, ARMI_CMP^k, RID_TMP);
emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase);
emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP,
(int32_t)offsetof(lua_State, maxstack));
if (irp) { /* Must not spill arbitrary registers in head of side trace. */
- int32_t i = i32ptr(&J2G(as->J)->jit_L);
+ int32_t i = i32ptr(&J2G(as->J)->cur_L);
if (ra_hasspill(irp->s))
emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s));
emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095));
@@ -1978,7 +2017,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */
emit_loadi(as, RID_TMP, (i & ~4095));
} else {
- emit_getgl(as, RID_TMP, jit_L);
+ emit_getgl(as, RID_TMP, cur_L);
}
}
@@ -2001,7 +2040,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
#if LJ_SOFTFP
RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
Reg tmp;
- lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */
+ /* LJ_SOFTFP: must be a number constant. */
+ lj_assertA(irref_isk(ref), "unsplit FP op");
tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo,
rset_exclude(RSET_GPREVEN, RID_BASE));
emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs);
@@ -2015,7 +2055,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
} else {
RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE);
Reg type;
- lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
+ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
+ "restore of IR type %d", irt_type(ir->t));
if (!irt_ispri(ir->t)) {
Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE));
emit_lso(as, ARMI_STR, src, RID_BASE, ofs);
@@ -2028,6 +2069,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
} else if ((sn & SNAP_SOFTFPNUM)) {
type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE));
#endif
+ } else if ((sn & SNAP_KEYINDEX)) {
+ type = ra_allock(as, (int32_t)LJ_KEYINDEX, odd);
} else {
type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd);
}
@@ -2035,7 +2078,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
}
checkmclim(as);
}
- lua_assert(map + nent == flinks);
+ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
}
/* -- GC handling --------------------------------------------------------- */
@@ -2089,15 +2132,21 @@ static void asm_loop_fixup(ASMState *as)
}
}
+/* Fixup the tail of the loop. */
+static void asm_loop_tail_fixup(ASMState *as)
+{
+ UNUSED(as); /* Nothing to do. */
+}
+
/* -- Head of trace ------------------------------------------------------- */
-/* Reload L register from g->jit_L. */
+/* Reload L register from g->cur_L. */
static void asm_head_lreg(ASMState *as)
{
IRIns *ir = IR(ASMREF_L);
if (ra_used(ir)) {
Reg r = ra_dest(as, ir, RSET_GPR);
- emit_getgl(as, r, jit_L);
+ emit_getgl(as, r, cur_L);
ra_evictk(as);
}
}
@@ -2125,7 +2174,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
rset_clear(allow, ra_dest(as, ir, allow));
} else {
Reg r = irp->r;
- lua_assert(ra_hasreg(r));
+ lj_assertA(ra_hasreg(r), "base reg lost");
rset_clear(allow, r);
if (r != ir->r && !rset_test(as->freeset, r))
ra_restore(as, regcost_ref(as->cost[r]));
@@ -2147,7 +2196,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
} else {
/* Patch stack adjustment. */
uint32_t k = emit_isk12(ARMI_ADD, spadj);
- lua_assert(k);
+ lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP);
}
/* Patch exit branch. */
@@ -2168,143 +2217,13 @@ static void asm_tail_prep(ASMState *as)
*p = 0; /* Prevent load/store merging. */
}
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
- switch ((IROp)ir->o) {
- /* Miscellaneous ops. */
- case IR_LOOP: asm_loop(as); break;
- case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
- case IR_USE:
- ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
- case IR_PHI: asm_phi(as, ir); break;
- case IR_HIOP: asm_hiop(as, ir); break;
- case IR_GCSTEP: asm_gcstep(as, ir); break;
-
- /* Guarded assertions. */
- case IR_EQ: case IR_NE:
- if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
- as->curins--;
- asm_href(as, ir-1, (IROp)ir->o);
- break;
- }
- /* fallthrough */
- case IR_LT: case IR_GE: case IR_LE: case IR_GT:
- case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
- case IR_ABC:
-#if !LJ_SOFTFP
- if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; }
-#endif
- asm_intcomp(as, ir);
- break;
-
- case IR_RETF: asm_retf(as, ir); break;
-
- /* Bit ops. */
- case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break;
- case IR_BSWAP: asm_bitswap(as, ir); break;
-
- case IR_BAND: asm_bitop(as, ir, ARMI_AND); break;
- case IR_BOR: asm_bitop(as, ir, ARMI_ORR); break;
- case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break;
-
- case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break;
- case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break;
- case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break;
- case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break;
- case IR_BROL: lua_assert(0); break;
-
- /* Arithmetic ops. */
- case IR_ADD: case IR_ADDOV: asm_add(as, ir); break;
- case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break;
- case IR_MUL: case IR_MULOV: asm_mul(as, ir); break;
- case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
- case IR_NEG: asm_neg(as, ir); break;
-
-#if LJ_SOFTFP
- case IR_DIV: case IR_POW: case IR_ABS:
- case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT:
- lua_assert(0); /* Unused for LJ_SOFTFP. */
- break;
-#else
- case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break;
- case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
- case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break;
- case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
- case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
- case IR_FPMATH:
- if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
- break;
- if (ir->op2 <= IRFPM_TRUNC)
- asm_callround(as, ir, ir->op2);
- else if (ir->op2 == IRFPM_SQRT)
- asm_fpunary(as, ir, ARMI_VSQRT_D);
- else
- asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
- break;
- case IR_TOBIT: asm_tobit(as, ir); break;
-#endif
-
- case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break;
- case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break;
-
- /* Memory references. */
- case IR_AREF: asm_aref(as, ir); break;
- case IR_HREF: asm_href(as, ir, 0); break;
- case IR_HREFK: asm_hrefk(as, ir); break;
- case IR_NEWREF: asm_newref(as, ir); break;
- case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
- case IR_FREF: asm_fref(as, ir); break;
- case IR_STRREF: asm_strref(as, ir); break;
-
- /* Loads and stores. */
- case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
- asm_ahuvload(as, ir);
- break;
- case IR_FLOAD: asm_fload(as, ir); break;
- case IR_XLOAD: asm_xload(as, ir); break;
- case IR_SLOAD: asm_sload(as, ir); break;
-
- case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
- case IR_FSTORE: asm_fstore(as, ir); break;
- case IR_XSTORE: asm_xstore(as, ir, 0); break;
-
- /* Allocations. */
- case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
- case IR_TNEW: asm_tnew(as, ir); break;
- case IR_TDUP: asm_tdup(as, ir); break;
- case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
- /* Write barriers. */
- case IR_TBAR: asm_tbar(as, ir); break;
- case IR_OBAR: asm_obar(as, ir); break;
-
- /* Type conversions. */
- case IR_CONV: asm_conv(as, ir); break;
- case IR_TOSTR: asm_tostr(as, ir); break;
- case IR_STRTO: asm_strto(as, ir); break;
-
- /* Calls. */
- case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
- case IR_CALLXS: asm_callx(as, ir); break;
- case IR_CARG: break;
-
- default:
- setintV(&as->J->errinfo, ir->o);
- lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
- break;
- }
-}
-
/* -- Trace setup --------------------------------------------------------- */
/* Ensure there are enough stack slots for call arguments. */
static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
{
IRRef args[CCI_NARGS_MAX*2];
- uint32_t i, nargs = (int)CCI_NARGS(ci);
+ uint32_t i, nargs = CCI_XNARGS(ci);
int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0;
asm_collectargs(as, ir, ci, args);
for (i = 0; i < nargs; i++) {
@@ -2360,7 +2279,7 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
if (!cstart) cstart = p;
}
}
- lua_assert(cstart != NULL);
+ lj_assertJ(cstart != NULL, "exit stub %d not found", exitno);
lj_mcode_sync(cstart, cend);
lj_mcode_patch(J, mcarea, 1);
}
diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h
new file mode 100644
index 00000000..1f44d023
--- /dev/null
+++ b/src/lj_asm_arm64.h
@@ -0,0 +1,2070 @@
+/*
+** ARM64 IR assembler (SSA IR -> machine code).
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+**
+** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
+** Sponsored by Cisco Systems, Inc.
+*/
+
+/* -- Register allocator extensions --------------------------------------- */
+
+/* Allocate a register with a hint. */
+static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
+{
+ Reg r = IR(ref)->r;
+ if (ra_noreg(r)) {
+ if (!ra_hashint(r) && !iscrossref(as, ref))
+ ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */
+ r = ra_allocref(as, ref, allow);
+ }
+ ra_noweak(as, r);
+ return r;
+}
+
+/* Allocate two source registers for three-operand instructions. */
+static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
+{
+ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
+ Reg left = irl->r, right = irr->r;
+ if (ra_hasreg(left)) {
+ ra_noweak(as, left);
+ if (ra_noreg(right))
+ right = ra_allocref(as, ir->op2, rset_exclude(allow, left));
+ else
+ ra_noweak(as, right);
+ } else if (ra_hasreg(right)) {
+ ra_noweak(as, right);
+ left = ra_allocref(as, ir->op1, rset_exclude(allow, right));
+ } else if (ra_hashint(right)) {
+ right = ra_allocref(as, ir->op2, allow);
+ left = ra_alloc1(as, ir->op1, rset_exclude(allow, right));
+ } else {
+ left = ra_allocref(as, ir->op1, allow);
+ right = ra_alloc1(as, ir->op2, rset_exclude(allow, left));
+ }
+ return left | (right << 8);
+}
+
+/* -- Guard handling ------------------------------------------------------ */
+
+/* Setup all needed exit stubs. */
+static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
+{
+ ExitNo i;
+ MCode *mxp = as->mctop;
+ if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
+ asm_mclimit(as);
+ /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */
+ for (i = nexits-1; (int32_t)i >= 0; i--)
+ *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i));
+ *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno));
+ mxp--;
+ *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp)));
+ *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP));
+ as->mctop = mxp;
+}
+
+static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
+{
+ /* Keep this in-sync with exitstub_trace_addr(). */
+ return as->mctop + exitno + 3;
+}
+
+/* Emit conditional branch to exit for guard. */
+static void asm_guardcc(ASMState *as, A64CC cc)
+{
+ MCode *target = asm_exitstub_addr(as, as->snapno);
+ MCode *p = as->mcp;
+ if (LJ_UNLIKELY(p == as->invmcp)) {
+ as->loopinv = 1;
+ *p = A64I_B | A64F_S26(target-p);
+ emit_cond_branch(as, cc^1, p-1);
+ return;
+ }
+ emit_cond_branch(as, cc, target);
+}
+
+/* Emit test and branch instruction to exit for guard. */
+static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
+{
+ MCode *target = asm_exitstub_addr(as, as->snapno);
+ MCode *p = as->mcp;
+ if (LJ_UNLIKELY(p == as->invmcp)) {
+ as->loopinv = 1;
+ *p = A64I_B | A64F_S26(target-p);
+ emit_tnb(as, ai^0x01000000u, r, bit, p-1);
+ return;
+ }
+ emit_tnb(as, ai, r, bit, target);
+}
+
+/* Emit compare and branch instruction to exit for guard. */
+static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r)
+{
+ MCode *target = asm_exitstub_addr(as, as->snapno);
+ MCode *p = as->mcp;
+ if (LJ_UNLIKELY(p == as->invmcp)) {
+ as->loopinv = 1;
+ *p = A64I_B | A64F_S26(target-p);
+ emit_cnb(as, ai^0x01000000u, r, p-1);
+ return;
+ }
+ emit_cnb(as, ai, r, target);
+}
+
+/* -- Operand fusion ------------------------------------------------------ */
+
+/* Limit linear search to this distance. Avoids O(n^2) behavior. */
+#define CONFLICT_SEARCH_LIM 31
+
+static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
+{
+ if (irref_isk(ref)) {
+ IRIns *ir = IR(ref);
+ if (ir->o == IR_KNULL || !irt_is64(ir->t)) {
+ *k = ir->i;
+ return 1;
+ } else if (checki32((int64_t)ir_k64(ir)->u64)) {
+ *k = (int32_t)ir_k64(ir)->u64;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/* Check if there's no conflicting instruction between curins and ref. */
+static int noconflict(ASMState *as, IRRef ref, IROp conflict)
+{
+ IRIns *ir = as->ir;
+ IRRef i = as->curins;
+ if (i > ref + CONFLICT_SEARCH_LIM)
+ return 0; /* Give up, ref is too far away. */
+ while (--i > ref)
+ if (ir[i].o == conflict)
+ return 0; /* Conflict found. */
+ return 1; /* Ok, no conflict. */
+}
+
+/* Fuse the array base of colocated arrays. */
+static int32_t asm_fuseabase(ASMState *as, IRRef ref)
+{
+ IRIns *ir = IR(ref);
+ if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
+ !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
+ return (int32_t)sizeof(GCtab);
+ return 0;
+}
+
+#define FUSE_REG 0x40000000
+
+/* Fuse array/hash/upvalue reference into register+offset operand. */
+static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
+ A64Ins ins)
+{
+ IRIns *ir = IR(ref);
+ if (ra_noreg(ir->r)) {
+ if (ir->o == IR_AREF) {
+ if (mayfuse(as, ref)) {
+ if (irref_isk(ir->op2)) {
+ IRRef tab = IR(ir->op1)->op1;
+ int32_t ofs = asm_fuseabase(as, tab);
+ IRRef refa = ofs ? tab : ir->op1;
+ ofs += 8*IR(ir->op2)->i;
+ if (emit_checkofs(ins, ofs)) {
+ *ofsp = ofs;
+ return ra_alloc1(as, refa, allow);
+ }
+ } else {
+ Reg base = ra_alloc1(as, ir->op1, allow);
+ *ofsp = FUSE_REG|ra_alloc1(as, ir->op2, rset_exclude(allow, base));
+ return base;
+ }
+ }
+ } else if (ir->o == IR_HREFK) {
+ if (mayfuse(as, ref)) {
+ int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
+ if (emit_checkofs(ins, ofs)) {
+ *ofsp = ofs;
+ return ra_alloc1(as, ir->op1, allow);
+ }
+ }
+ } else if (ir->o == IR_UREFC) {
+ if (irref_isk(ir->op1)) {
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
+ GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
+ int64_t ofs = glofs(as, &uv->tv);
+ if (emit_checkofs(ins, ofs)) {
+ *ofsp = (int32_t)ofs;
+ return RID_GL;
+ }
+ }
+ } else if (ir->o == IR_TMPREF) {
+ *ofsp = (int32_t)glofs(as, &J2G(as->J)->tmptv);
+ return RID_GL;
+ }
+ }
+ *ofsp = 0;
+ return ra_alloc1(as, ref, allow);
+}
+
+/* Fuse m operand into arithmetic/logic instructions. */
+static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
+{
+ IRIns *ir = IR(ref);
+ if (ra_hasreg(ir->r)) {
+ ra_noweak(as, ir->r);
+ return A64F_M(ir->r);
+ } else if (irref_isk(ref)) {
+ uint32_t m;
+ int64_t k = get_k64val(as, ref);
+ if ((ai & 0x1f000000) == 0x0a000000)
+ m = emit_isk13(k, irt_is64(ir->t));
+ else
+ m = emit_isk12(k);
+ if (m)
+ return m;
+ } else if (mayfuse(as, ref)) {
+ if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR && irref_isk(ir->op2)) ||
+ (ir->o == IR_ADD && ir->op1 == ir->op2)) {
+ A64Shift sh = ir->o == IR_BSHR ? A64SH_LSR :
+ ir->o == IR_BSAR ? A64SH_ASR : A64SH_LSL;
+ int shift = ir->o == IR_ADD ? 1 :
+ (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
+ IRIns *irl = IR(ir->op1);
+ if (sh == A64SH_LSL &&
+ irl->o == IR_CONV &&
+ irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
+ shift <= 4 &&
+ canfuse(as, irl)) {
+ Reg m = ra_alloc1(as, irl->op1, allow);
+ return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift);
+ } else {
+ Reg m = ra_alloc1(as, ir->op1, allow);
+ return A64F_M(m) | A64F_SH(sh, shift);
+ }
+ } else if (ir->o == IR_CONV &&
+ ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) {
+ Reg m = ra_alloc1(as, ir->op1, allow);
+ return A64F_M(m) | A64F_EX(A64EX_SXTW);
+ }
+ }
+ return A64F_M(ra_allocref(as, ref, allow));
+}
+
+/* Fuse XLOAD/XSTORE reference into load/store operand. */
+static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref,
+ RegSet allow)
+{
+ IRIns *ir = IR(ref);
+ Reg base;
+ int32_t ofs = 0;
+ if (ra_noreg(ir->r) && canfuse(as, ir)) {
+ if (ir->o == IR_ADD) {
+ if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) {
+ ref = ir->op1;
+ } else {
+ Reg rn, rm;
+ IRRef lref = ir->op1, rref = ir->op2;
+ IRIns *irl = IR(lref);
+ if (mayfuse(as, irl->op1)) {
+ unsigned int shift = 4;
+ if (irl->o == IR_BSHL && irref_isk(irl->op2)) {
+ shift = (IR(irl->op2)->i & 63);
+ } else if (irl->o == IR_ADD && irl->op1 == irl->op2) {
+ shift = 1;
+ }
+ if ((ai >> 30) == shift) {
+ lref = irl->op1;
+ irl = IR(lref);
+ ai |= A64I_LS_SH;
+ }
+ }
+ if (irl->o == IR_CONV &&
+ irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
+ canfuse(as, irl)) {
+ lref = irl->op1;
+ ai |= A64I_LS_SXTWx;
+ } else {
+ ai |= A64I_LS_LSLx;
+ }
+ rm = ra_alloc1(as, lref, allow);
+ rn = ra_alloc1(as, rref, rset_exclude(allow, rm));
+ emit_dnm(as, (ai^A64I_LS_R), (rd & 31), rn, rm);
+ return;
+ }
+ } else if (ir->o == IR_STRREF) {
+ if (asm_isk32(as, ir->op2, &ofs)) {
+ ref = ir->op1;
+ } else if (asm_isk32(as, ir->op1, &ofs)) {
+ ref = ir->op2;
+ } else {
+ Reg refk = irref_isk(ir->op1) ? ir->op1 : ir->op2;
+ Reg refv = irref_isk(ir->op1) ? ir->op2 : ir->op1;
+ Reg rn = ra_alloc1(as, refv, allow);
+ IRIns *irr = IR(refk);
+ uint32_t m;
+ if (irr+1 == ir && !ra_used(irr) &&
+ irr->o == IR_ADD && irref_isk(irr->op2)) {
+ ofs = sizeof(GCstr) + IR(irr->op2)->i;
+ if (emit_checkofs(ai, ofs)) {
+ Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn));
+ m = A64F_M(rm) | A64F_EX(A64EX_SXTW);
+ goto skipopm;
+ }
+ }
+ m = asm_fuseopm(as, 0, refk, rset_exclude(allow, rn));
+ ofs = sizeof(GCstr);
+ skipopm:
+ emit_lso(as, ai, rd, rd, ofs);
+ emit_dn(as, A64I_ADDx^m, rd, rn);
+ return;
+ }
+ ofs += sizeof(GCstr);
+ if (!emit_checkofs(ai, ofs)) {
+ Reg rn = ra_alloc1(as, ref, allow);
+ Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn));
+ emit_dnm(as, (ai^A64I_LS_R)|A64I_LS_UXTWx, rd, rn, rm);
+ return;
+ }
+ }
+ }
+ base = ra_alloc1(as, ref, allow);
+ emit_lso(as, ai, (rd & 31), base, ofs);
+}
+
+/* Fuse FP multiply-add/sub. */
+static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
+{
+ IRRef lref = ir->op1, rref = ir->op2;
+ IRIns *irm;
+ if (lref != rref &&
+ ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
+ ra_noreg(irm->r)) ||
+ (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
+ (rref = lref, ai = air, ra_noreg(irm->r))))) {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
+ Reg left = ra_alloc2(as, irm,
+ rset_exclude(rset_exclude(RSET_FPR, dest), add));
+ Reg right = (left >> 8); left &= 255;
+ emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31));
+ return 1;
+ }
+ return 0;
+}
+
+/* Fuse BAND + BSHL/BSHR into UBFM. */
+static int asm_fuseandshift(ASMState *as, IRIns *ir)
+{
+ IRIns *irl = IR(ir->op1);
+ lj_assertA(ir->o == IR_BAND, "bad usage");
+ if (canfuse(as, irl) && irref_isk(ir->op2)) {
+ uint64_t mask = get_k64val(as, ir->op2);
+ if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) {
+ int32_t shmask = irt_is64(irl->t) ? 63 : 31;
+ int32_t shift = (IR(irl->op2)->i & shmask);
+ int32_t imms = shift;
+ if (irl->o == IR_BSHL) {
+ mask >>= shift;
+ shift = (shmask-shift+1) & shmask;
+ imms = 0;
+ }
+ if (mask && !((mask+1) & mask)) { /* Contiguous 1-bits at the bottom. */
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_alloc1(as, irl->op1, RSET_GPR);
+ A64Ins ai = shmask == 63 ? A64I_UBFMx : A64I_UBFMw;
+ imms += 63 - emit_clz64(mask);
+ if (imms > shmask) imms = shmask;
+ emit_dn(as, ai | A64F_IMMS(imms) | A64F_IMMR(shift), dest, left);
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+/* Fuse BOR(BSHL, BSHR) into EXTR/ROR. */
+static int asm_fuseorshift(ASMState *as, IRIns *ir)
+{
+ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
+ lj_assertA(ir->o == IR_BOR, "bad usage");
+ if (canfuse(as, irl) && canfuse(as, irr) &&
+ ((irl->o == IR_BSHR && irr->o == IR_BSHL) ||
+ (irl->o == IR_BSHL && irr->o == IR_BSHR))) {
+ if (irref_isk(irl->op2) && irref_isk(irr->op2)) {
+ IRRef lref = irl->op1, rref = irr->op1;
+ uint32_t lshift = IR(irl->op2)->i, rshift = IR(irr->op2)->i;
+ if (irl->o == IR_BSHR) { /* BSHR needs to be the right operand. */
+ uint32_t tmp2;
+ IRRef tmp1 = lref; lref = rref; rref = tmp1;
+ tmp2 = lshift; lshift = rshift; rshift = tmp2;
+ }
+ if (rshift + lshift == (irt_is64(ir->t) ? 64 : 32)) {
+ A64Ins ai = irt_is64(ir->t) ? A64I_EXTRx : A64I_EXTRw;
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ Reg right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left));
+ emit_dnm(as, ai | A64F_IMMS(rshift), dest, left, right);
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+/* -- Calls --------------------------------------------------------------- */
+
+/* Generate a call to a C function. */
+static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
+{
+ uint32_t n, nargs = CCI_XNARGS(ci);
+ int32_t ofs = 0;
+ Reg gpr, fpr = REGARG_FIRSTFPR;
+ if ((void *)ci->func)
+ emit_call(as, (void *)ci->func);
+ for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
+ as->cost[gpr] = REGCOST(~0u, ASMREF_L);
+ gpr = REGARG_FIRSTGPR;
+ for (n = 0; n < nargs; n++) { /* Setup args. */
+ IRRef ref = args[n];
+ IRIns *ir = IR(ref);
+ if (ref) {
+ if (irt_isfp(ir->t)) {
+ if (fpr <= REGARG_LASTFPR) {
+ lj_assertA(rset_test(as->freeset, fpr),
+ "reg %d not free", fpr); /* Must have been evicted. */
+ ra_leftov(as, fpr, ref);
+ fpr++;
+ } else {
+ Reg r = ra_alloc1(as, ref, RSET_FPR);
+ emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0));
+ ofs += 8;
+ }
+ } else {
+ if (gpr <= REGARG_LASTGPR) {
+ lj_assertA(rset_test(as->freeset, gpr),
+ "reg %d not free", gpr); /* Must have been evicted. */
+ ra_leftov(as, gpr, ref);
+ gpr++;
+ } else {
+ Reg r = ra_alloc1(as, ref, RSET_GPR);
+ emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0));
+ ofs += 8;
+ }
+ }
+ }
+ }
+}
+
+/* Setup result reg/sp for call. Evict scratch regs. */
+static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+ RegSet drop = RSET_SCRATCH;
+ int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
+ if (ra_hasreg(ir->r))
+ rset_clear(drop, ir->r); /* Dest reg handled below. */
+ if (hiop && ra_hasreg((ir+1)->r))
+ rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
+ ra_evictset(as, drop); /* Evictions must be performed first. */
+ if (ra_used(ir)) {
+ lj_assertA(!irt_ispri(ir->t), "PRI dest");
+ if (irt_isfp(ir->t)) {
+ if (ci->flags & CCI_CASTU64) {
+ Reg dest = ra_dest(as, ir, RSET_FPR) & 31;
+ emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D_R : A64I_FMOV_S_R,
+ dest, RID_RET);
+ } else {
+ ra_destreg(as, ir, RID_FPRET);
+ }
+ } else if (hiop) {
+ ra_destpair(as, ir);
+ } else {
+ ra_destreg(as, ir, RID_RET);
+ }
+ }
+ UNUSED(ci);
+}
+
+static void asm_callx(ASMState *as, IRIns *ir)
+{
+ IRRef args[CCI_NARGS_MAX*2];
+ CCallInfo ci;
+ IRRef func;
+ IRIns *irf;
+ ci.flags = asm_callx_flags(as, ir);
+ asm_collectargs(as, ir, &ci, args);
+ asm_setupresult(as, ir, &ci);
+ func = ir->op2; irf = IR(func);
+ if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
+ if (irref_isk(func)) { /* Call to constant address. */
+ ci.func = (ASMFunction)(ir_k64(irf)->u64);
+ } else { /* Need a non-argument register for indirect calls. */
+ Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
+ emit_n(as, A64I_BLR, freg);
+ ci.func = (ASMFunction)(void *)0;
+ }
+ asm_gencall(as, &ci, args);
+}
+
+/* -- Returns ------------------------------------------------------------- */
+
+/* Return to lower frame. Guard that it goes to the right spot. */
+static void asm_retf(ASMState *as, IRIns *ir)
+{
+ Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
+ void *pc = ir_kptr(IR(ir->op2));
+ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
+ as->topslot -= (BCReg)delta;
+ if ((int32_t)as->topslot < 0) as->topslot = 0;
+ irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
+ /* Need to force a spill on REF_BASE now to update the stack slot. */
+ emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE)));
+ emit_setgl(as, base, jit_base);
+ emit_addptr(as, base, -8*delta);
+ asm_guardcc(as, CC_NE);
+ emit_nm(as, A64I_CMPx, RID_TMP,
+ ra_allock(as, i64ptr(pc), rset_exclude(RSET_GPR, base)));
+ emit_lso(as, A64I_LDRx, RID_TMP, base, -8);
+}
+
+/* -- Buffer operations --------------------------------------------------- */
+
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb)
+{
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+ IRIns irgc;
+ irgc.ot = IRT(0, IRT_PGC); /* GC type. */
+ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
+ emit_dn(as, A64I_BFMx | A64F_IMMS(lj_fls(SBUF_MASK_FLAG)) | A64F_IMMR(0), RID_TMP, tmp);
+ emit_getgl(as, RID_TMP, cur_L);
+ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+}
+#endif
+
+/* -- Type conversions ---------------------------------------------------- */
+
+static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
+{
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ asm_guardcc(as, CC_NE);
+ emit_nm(as, A64I_FCMPd, (tmp & 31), (left & 31));
+ emit_dn(as, A64I_FCVT_F64_S32, (tmp & 31), dest);
+ emit_dn(as, A64I_FCVT_S32_F64, dest, (left & 31));
+}
+
+static void asm_tobit(ASMState *as, IRIns *ir)
+{
+ RegSet allow = RSET_FPR;
+ Reg left = ra_alloc1(as, ir->op1, allow);
+ Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
+ Reg tmp = ra_scratch(as, rset_clear(allow, right));
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ emit_dn(as, A64I_FMOV_R_S, dest, (tmp & 31));
+ emit_dnm(as, A64I_FADDd, (tmp & 31), (left & 31), (right & 31));
+}
+
+static void asm_conv(ASMState *as, IRIns *ir)
+{
+ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+ int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
+ int stfp = (st == IRT_NUM || st == IRT_FLOAT);
+ IRRef lref = ir->op1;
+ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
+ if (irt_isfp(ir->t)) {
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ if (stfp) { /* FP to FP conversion. */
+ emit_dn(as, st == IRT_NUM ? A64I_FCVT_F32_F64 : A64I_FCVT_F64_F32,
+ (dest & 31), (ra_alloc1(as, lref, RSET_FPR) & 31));
+ } else { /* Integer to FP conversion. */
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ A64Ins ai = irt_isfloat(ir->t) ?
+ (((IRT_IS64 >> st) & 1) ?
+ (st == IRT_I64 ? A64I_FCVT_F32_S64 : A64I_FCVT_F32_U64) :
+ (st == IRT_INT ? A64I_FCVT_F32_S32 : A64I_FCVT_F32_U32)) :
+ (((IRT_IS64 >> st) & 1) ?
+ (st == IRT_I64 ? A64I_FCVT_F64_S64 : A64I_FCVT_F64_U64) :
+ (st == IRT_INT ? A64I_FCVT_F64_S32 : A64I_FCVT_F64_U32));
+ emit_dn(as, ai, (dest & 31), left);
+ }
+ } else if (stfp) { /* FP to integer conversion. */
+ if (irt_isguard(ir->t)) {
+ /* Checked conversions are only supported from number to int. */
+ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
+ "bad type for checked CONV");
+ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
+ } else {
+ Reg left = ra_alloc1(as, lref, RSET_FPR);
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ A64Ins ai = irt_is64(ir->t) ?
+ (st == IRT_NUM ?
+ (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) :
+ (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) :
+ (st == IRT_NUM ?
+ (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) :
+ (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32));
+ emit_dn(as, ai, dest, (left & 31));
+ }
+ } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ A64Ins ai = st == IRT_I8 ? A64I_SXTBw :
+ st == IRT_U8 ? A64I_UXTBw :
+ st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw;
+ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
+ emit_dn(as, ai, dest, left);
+ } else {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ if (irt_is64(ir->t)) {
+ if (st64 || !(ir->op2 & IRCONV_SEXT)) {
+ /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */
+ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
+ } else { /* 32 to 64 bit sign extension. */
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ emit_dn(as, A64I_SXTW, dest, left);
+ }
+ } else {
+ if (st64 && !(ir->op2 & IRCONV_NONE)) {
+ /* This is either a 32 bit reg/reg mov which zeroes the hiword
+ ** or a load of the loword from a 64 bit address.
+ */
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ emit_dm(as, A64I_MOVw, dest, left);
+ } else { /* 32/32 bit no-op (cast). */
+ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
+ }
+ }
+ }
+}
+
+static void asm_strto(ASMState *as, IRIns *ir)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
+ IRRef args[2];
+ Reg dest = 0, tmp;
+ int destused = ra_used(ir);
+ int32_t ofs = 0;
+ ra_evictset(as, RSET_SCRATCH);
+ if (destused) {
+ if (ra_hasspill(ir->s)) {
+ ofs = sps_scale(ir->s);
+ destused = 0;
+ if (ra_hasreg(ir->r)) {
+ ra_free(as, ir->r);
+ ra_modified(as, ir->r);
+ emit_spload(as, ir, ir->r, ofs);
+ }
+ } else {
+ dest = ra_dest(as, ir, RSET_FPR);
+ }
+ }
+ if (destused)
+ emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
+ asm_guardcnb(as, A64I_CBZ, RID_RET);
+ args[0] = ir->op1; /* GCstr *str */
+ args[1] = ASMREF_TMP1; /* TValue *n */
+ asm_gencall(as, ci, args);
+ tmp = ra_releasetmp(as, ASMREF_TMP1);
+ emit_opk(as, A64I_ADDx, tmp, RID_SP, ofs, RSET_GPR);
+}
+
+/* -- Memory references --------------------------------------------------- */
+
+/* Store tagged value for ref at base+ofs. */
+static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
+{
+ RegSet allow = rset_exclude(RSET_GPR, base);
+ IRIns *ir = IR(ref);
+ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
+ "store of IR type %d", irt_type(ir->t));
+ if (irref_isk(ref)) {
+ TValue k;
+ lj_ir_kvalue(as->J->L, &k, ir);
+ emit_lso(as, A64I_STRx, ra_allock(as, k.u64, allow), base, ofs);
+ } else {
+ Reg src = ra_alloc1(as, ref, allow);
+ rset_clear(allow, src);
+ if (irt_isinteger(ir->t)) {
+ Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
+ emit_lso(as, A64I_STRx, RID_TMP, base, ofs);
+ emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src);
+ } else {
+ Reg type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
+ emit_lso(as, A64I_STRx, RID_TMP, base, ofs);
+ emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type);
+ }
+ }
+}
+
+/* Get pointer to TValue. */
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
+{
+ if ((mode & IRTMPREF_IN1)) {
+ IRIns *ir = IR(ref);
+ if (irt_isnum(ir->t)) {
+ if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) {
+ /* Use the number constant itself as a TValue. */
+ ra_allockreg(as, i64ptr(ir_knum(ir)), dest);
+ return;
+ }
+ emit_lso(as, A64I_STRd, (ra_alloc1(as, ref, RSET_FPR) & 31), dest, 0);
+ } else {
+ asm_tvstore64(as, dest, 0, ref);
+ }
+ }
+ /* g->tmptv holds the TValue(s). */
+ emit_dn(as, A64I_ADDx^emit_isk12(glofs(as, &J2G(as->J)->tmptv)), dest, RID_GL);
+}
+
+static void asm_aref(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg idx, base;
+ if (irref_isk(ir->op2)) {
+ IRRef tab = IR(ir->op1)->op1;
+ int32_t ofs = asm_fuseabase(as, tab);
+ IRRef refa = ofs ? tab : ir->op1;
+ uint32_t k = emit_isk12(ofs + 8*IR(ir->op2)->i);
+ if (k) {
+ base = ra_alloc1(as, refa, RSET_GPR);
+ emit_dn(as, A64I_ADDx^k, dest, base);
+ return;
+ }
+ }
+ base = ra_alloc1(as, ir->op1, RSET_GPR);
+ idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
+ emit_dnm(as, A64I_ADDx | A64F_EXSH(A64EX_UXTW, 3), dest, base, idx);
+}
+
+/* Inlined hash lookup. Specialized for key type and for const keys.
+** The equivalent C code is:
+** Node *n = hashkey(t, key);
+** do {
+** if (lj_obj_equal(&n->key, key)) return &n->val;
+** } while ((n = nextnode(n)));
+** return niltv(L);
+*/
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
+{
+ RegSet allow = RSET_GPR;
+ int destused = ra_used(ir);
+ Reg dest = ra_dest(as, ir, allow);
+ Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
+ Reg key = 0, tmp = RID_TMP;
+ Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE;
+ IRRef refkey = ir->op2;
+ IRIns *irkey = IR(refkey);
+ int isk = irref_isk(ir->op2);
+ IRType1 kt = irkey->t;
+ uint32_t k = 0;
+ uint32_t khash;
+ MCLabel l_end, l_loop, l_next;
+ rset_clear(allow, tab);
+
+ if (!isk) {
+ key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
+ rset_clear(allow, key);
+ if (!irt_isstr(kt)) {
+ tmp = ra_scratch(as, allow);
+ rset_clear(allow, tmp);
+ }
+ } else if (irt_isnum(kt)) {
+ int64_t val = (int64_t)ir_knum(irkey)->u64;
+ if (!(k = emit_isk12(val))) {
+ key = ra_allock(as, val, allow);
+ rset_clear(allow, key);
+ }
+ } else if (!irt_ispri(kt)) {
+ if (!(k = emit_isk12(irkey->i))) {
+ key = ra_alloc1(as, refkey, allow);
+ rset_clear(allow, key);
+ }
+ }
+
+ /* Allocate constants early. */
+ if (irt_isnum(kt)) {
+ if (!isk) {
+ tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
+ ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
+ rset_clear(allow, tisnum);
+ }
+ } else if (irt_isaddr(kt)) {
+ if (isk) {
+ int64_t kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
+ scr = ra_allock(as, kk, allow);
+ } else {
+ scr = ra_scratch(as, allow);
+ }
+ rset_clear(allow, scr);
+ } else {
+ lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
+ type = ra_allock(as, ~((int64_t)~irt_toitype(kt) << 47), allow);
+ scr = ra_scratch(as, rset_clear(allow, type));
+ rset_clear(allow, scr);
+ }
+
+ /* Key not found in chain: jump to exit (if merged) or load niltv. */
+ l_end = emit_label(as);
+ as->invmcp = NULL;
+ if (merge == IR_NE)
+ asm_guardcc(as, CC_AL);
+ else if (destused)
+ emit_loada(as, dest, niltvg(J2G(as->J)));
+
+ /* Follow hash chain until the end. */
+ l_loop = --as->mcp;
+ emit_n(as, A64I_CMPx^A64I_K12^0, dest);
+ emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
+ l_next = emit_label(as);
+
+ /* Type and value comparison. */
+ if (merge == IR_EQ)
+ asm_guardcc(as, CC_EQ);
+ else
+ emit_cond_branch(as, CC_EQ, l_end);
+
+ if (irt_isnum(kt)) {
+ if (isk) {
+ /* Assumes -0.0 is already canonicalized to +0.0. */
+ if (k)
+ emit_n(as, A64I_CMPx^k, tmp);
+ else
+ emit_nm(as, A64I_CMPx, key, tmp);
+ emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
+ } else {
+ emit_nm(as, A64I_FCMPd, key, ftmp);
+ emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31));
+ emit_cond_branch(as, CC_LO, l_next);
+ emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp);
+ emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n));
+ }
+ } else if (irt_isaddr(kt)) {
+ if (isk) {
+ emit_nm(as, A64I_CMPx, scr, tmp);
+ emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
+ } else {
+ emit_nm(as, A64I_CMPx, tmp, scr);
+ emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64));
+ }
+ } else {
+ emit_nm(as, A64I_CMPx, scr, type);
+ emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
+ }
+
+ *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE;
+ if (!isk && irt_isaddr(kt)) {
+ type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
+ emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type);
+ rset_clear(allow, type);
+ }
+ /* Load main position relative to tab->node into dest. */
+ khash = isk ? ir_khash(as, irkey) : 1;
+ if (khash == 0) {
+ emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node));
+ } else {
+ emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 3), dest, tmp, dest);
+ emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 1), dest, dest, dest);
+ emit_lso(as, A64I_LDRx, tmp, tab, offsetof(GCtab, node));
+ if (isk) {
+ Reg tmphash = ra_allock(as, khash, allow);
+ emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
+ emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
+ } else if (irt_isstr(kt)) {
+ /* Fetch of str->sid is cheaper than ra_allock. */
+ emit_dnm(as, A64I_ANDw, dest, dest, tmp);
+ emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid));
+ emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
+ } else { /* Must match with hash*() in lj_tab.c. */
+ emit_dnm(as, A64I_ANDw, dest, dest, tmp);
+ emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask));
+ emit_dnm(as, A64I_SUBw, dest, dest, tmp);
+ emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp);
+ emit_dnm(as, A64I_EORw, dest, dest, tmp);
+ emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest);
+ emit_dnm(as, A64I_SUBw, tmp, tmp, dest);
+ emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest);
+ emit_dnm(as, A64I_EORw, tmp, tmp, dest);
+ if (irt_isnum(kt)) {
+ emit_dnm(as, A64I_ADDw, dest, dest, dest);
+ emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
+ emit_dm(as, A64I_MOVw, tmp, dest);
+ emit_dn(as, A64I_FMOV_R_D, dest, (key & 31));
+ } else {
+ checkmclim(as);
+ emit_dm(as, A64I_MOVw, tmp, key);
+ emit_dnm(as, A64I_EORw, dest, dest,
+ ra_allock(as, irt_toitype(kt) << 15, allow));
+ emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
+ emit_dm(as, A64I_MOVx, dest, key);
+ }
+ }
+ }
+}
+
+static void asm_hrefk(ASMState *as, IRIns *ir)
+{
+ IRIns *kslot = IR(ir->op2);
+ IRIns *irkey = IR(kslot->op1);
+ int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
+ int32_t kofs = ofs + (int32_t)offsetof(Node, key);
+ int bigofs = !emit_checkofs(A64I_LDRx, ofs);
+ Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
+ Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
+ Reg key, idx = node;
+ RegSet allow = rset_exclude(RSET_GPR, node);
+ uint64_t k;
+ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
+ if (bigofs) {
+ idx = dest;
+ rset_clear(allow, dest);
+ kofs = (int32_t)offsetof(Node, key);
+ } else if (ra_hasreg(dest)) {
+ emit_opk(as, A64I_ADDx, dest, node, ofs, allow);
+ }
+ asm_guardcc(as, CC_NE);
+ if (irt_ispri(irkey->t)) {
+ k = ~((int64_t)~irt_toitype(irkey->t) << 47);
+ } else if (irt_isnum(irkey->t)) {
+ k = ir_knum(irkey)->u64;
+ } else {
+ k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
+ }
+ key = ra_scratch(as, allow);
+ emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key)));
+ emit_lso(as, A64I_LDRx, key, idx, kofs);
+ if (bigofs)
+ emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR);
+}
+
+static void asm_uref(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ if (irref_isk(ir->op1)) {
+ GCfunc *fn = ir_kfunc(IR(ir->op1));
+ MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
+ emit_lsptr(as, A64I_LDRx, dest, v);
+ } else {
+ Reg uv = ra_scratch(as, RSET_GPR);
+ Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
+ if (ir->o == IR_UREFC) {
+ asm_guardcc(as, CC_NE);
+ emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP);
+ emit_opk(as, A64I_ADDx, dest, uv,
+ (int32_t)offsetof(GCupval, tv), RSET_GPR);
+ emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
+ } else {
+ emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v));
+ }
+ emit_lso(as, A64I_LDRx, uv, func,
+ (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
+ }
+}
+
+static void asm_fref(ASMState *as, IRIns *ir)
+{
+ UNUSED(as); UNUSED(ir);
+ lj_assertA(!ra_used(ir), "unfused FREF");
+}
+
+static void asm_strref(ASMState *as, IRIns *ir)
+{
+ RegSet allow = RSET_GPR;
+ Reg dest = ra_dest(as, ir, allow);
+ Reg base = ra_alloc1(as, ir->op1, allow);
+ IRIns *irr = IR(ir->op2);
+ int32_t ofs = sizeof(GCstr);
+ uint32_t m;
+ rset_clear(allow, base);
+ if (irref_isk(ir->op2) && (m = emit_isk12(ofs + irr->i))) {
+ emit_dn(as, A64I_ADDx^m, dest, base);
+ } else {
+ emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, dest);
+ emit_dnm(as, A64I_ADDx, dest, base, ra_alloc1(as, ir->op2, allow));
+ }
+}
+
+/* -- Loads and stores ---------------------------------------------------- */
+
+static A64Ins asm_fxloadins(IRIns *ir)
+{
+ switch (irt_type(ir->t)) {
+ case IRT_I8: return A64I_LDRB ^ A64I_LS_S;
+ case IRT_U8: return A64I_LDRB;
+ case IRT_I16: return A64I_LDRH ^ A64I_LS_S;
+ case IRT_U16: return A64I_LDRH;
+ case IRT_NUM: return A64I_LDRd;
+ case IRT_FLOAT: return A64I_LDRs;
+ default: return irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw;
+ }
+}
+
+static A64Ins asm_fxstoreins(IRIns *ir)
+{
+ switch (irt_type(ir->t)) {
+ case IRT_I8: case IRT_U8: return A64I_STRB;
+ case IRT_I16: case IRT_U16: return A64I_STRH;
+ case IRT_NUM: return A64I_STRd;
+ case IRT_FLOAT: return A64I_STRs;
+ default: return irt_is64(ir->t) ? A64I_STRx : A64I_STRw;
+ }
+}
+
+static void asm_fload(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg idx;
+ A64Ins ai = asm_fxloadins(ir);
+ int32_t ofs;
+ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
+ idx = RID_GL;
+ ofs = (ir->op2 << 2) - GG_OFS(g);
+ } else {
+ idx = ra_alloc1(as, ir->op1, RSET_GPR);
+ if (ir->op2 == IRFL_TAB_ARRAY) {
+ ofs = asm_fuseabase(as, ir->op1);
+ if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
+ emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, idx);
+ return;
+ }
+ }
+ ofs = field_ofs[ir->op2];
+ }
+ emit_lso(as, ai, (dest & 31), idx, ofs);
+}
+
+static void asm_fstore(ASMState *as, IRIns *ir)
+{
+ if (ir->r != RID_SINK) {
+ Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
+ IRIns *irf = IR(ir->op1);
+ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
+ int32_t ofs = field_ofs[irf->op2];
+ emit_lso(as, asm_fxstoreins(ir), (src & 31), idx, ofs);
+ }
+}
+
+static void asm_xload(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+ lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD");
+ asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
+}
+
+static void asm_xstore(ASMState *as, IRIns *ir)
+{
+ if (ir->r != RID_SINK) {
+ Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+ asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
+ rset_exclude(RSET_GPR, src));
+ }
+}
+
+static void asm_ahuvload(ASMState *as, IRIns *ir)
+{
+ Reg idx, tmp, type;
+ int32_t ofs = 0;
+ RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
+ lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
+ irt_isint(ir->t),
+ "bad load type %d", irt_type(ir->t));
+ if (ra_used(ir)) {
+ Reg dest = ra_dest(as, ir, allow);
+ tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest;
+ if (irt_isaddr(ir->t)) {
+ emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest);
+ } else if (irt_isnum(ir->t)) {
+ emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp);
+ } else if (irt_isint(ir->t)) {
+ emit_dm(as, A64I_MOVw, dest, dest);
+ }
+ } else {
+ tmp = ra_scratch(as, gpr);
+ }
+ type = ra_scratch(as, rset_clear(gpr, tmp));
+ idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx);
+ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
+ /* Always do the type check, even if the load result is unused. */
+ asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE);
+ if (irt_type(ir->t) >= IRT_NUM) {
+ lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
+ "bad load type %d", irt_type(ir->t));
+ emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
+ ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp);
+ } else if (irt_isaddr(ir->t)) {
+ emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type);
+ emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
+ } else if (irt_isnil(ir->t)) {
+ emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
+ } else {
+ emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
+ ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, gpr), tmp);
+ }
+ if (ofs & FUSE_REG)
+ emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31));
+ else
+ emit_lso(as, A64I_LDRx, tmp, idx, ofs);
+}
+
+static void asm_ahustore(ASMState *as, IRIns *ir)
+{
+ if (ir->r != RID_SINK) {
+ RegSet allow = RSET_GPR;
+ Reg idx, src = RID_NONE, tmp = RID_TMP, type = RID_NONE;
+ int32_t ofs = 0;
+ if (irt_isnum(ir->t)) {
+ src = ra_alloc1(as, ir->op2, RSET_FPR);
+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow, A64I_STRd);
+ if (ofs & FUSE_REG)
+ emit_dnm(as, (A64I_STRd^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, (src & 31), idx, (ofs &31));
+ else
+ emit_lso(as, A64I_STRd, (src & 31), idx, ofs);
+ } else {
+ if (!irt_ispri(ir->t)) {
+ src = ra_alloc1(as, ir->op2, allow);
+ rset_clear(allow, src);
+ if (irt_isinteger(ir->t))
+ type = ra_allock(as, (uint64_t)(int32_t)LJ_TISNUM << 47, allow);
+ else
+ type = ra_allock(as, irt_toitype(ir->t), allow);
+ } else {
+ tmp = type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t)<<47), allow);
+ }
+ idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type),
+ A64I_STRx);
+ if (ofs & FUSE_REG)
+ emit_dnm(as, (A64I_STRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31));
+ else
+ emit_lso(as, A64I_STRx, tmp, idx, ofs);
+ if (ra_hasreg(src)) {
+ if (irt_isinteger(ir->t)) {
+ emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), tmp, type, src);
+ } else {
+ emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, src, type);
+ }
+ }
+ }
+ }
+}
+
+static void asm_sload(ASMState *as, IRIns *ir)
+{
+ int32_t ofs = 8*((int32_t)ir->op1-2);
+ IRType1 t = ir->t;
+ Reg dest = RID_NONE, base;
+ RegSet allow = RSET_GPR;
+ lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
+ "bad parent SLOAD"); /* Handled by asm_head_side(). */
+ lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK),
+ "inconsistent SLOAD variant");
+ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
+ dest = ra_scratch(as, RSET_FPR);
+ asm_tointg(as, ir, dest);
+ t.irt = IRT_NUM; /* Continue with a regular number type check. */
+ } else if (ra_used(ir)) {
+ Reg tmp = RID_NONE;
+ if ((ir->op2 & IRSLOAD_CONVERT))
+ tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR);
+ lj_assertA((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t),
+ "bad SLOAD type %d", irt_type(t));
+ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow);
+ base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest));
+ if (irt_isaddr(t)) {
+ emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest);
+ } else if ((ir->op2 & IRSLOAD_CONVERT)) {
+ if (irt_isint(t)) {
+ emit_dn(as, A64I_FCVT_S32_F64, dest, (tmp & 31));
+ /* If value is already loaded for type check, move it to FPR. */
+ if ((ir->op2 & IRSLOAD_TYPECHECK))
+ emit_dn(as, A64I_FMOV_D_R, (tmp & 31), dest);
+ else
+ dest = tmp;
+ t.irt = IRT_NUM; /* Check for original type. */
+ } else {
+ emit_dn(as, A64I_FCVT_F64_S32, (dest & 31), tmp);
+ dest = tmp;
+ t.irt = IRT_INT; /* Check for original type. */
+ }
+ } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) {
+ emit_dm(as, A64I_MOVw, dest, dest);
+ }
+ goto dotypecheck;
+ }
+ base = ra_alloc1(as, REF_BASE, allow);
+dotypecheck:
+ rset_clear(allow, base);
+ if ((ir->op2 & IRSLOAD_TYPECHECK)) {
+ Reg tmp;
+ if (ra_hasreg(dest) && rset_test(RSET_GPR, dest)) {
+ tmp = dest;
+ } else {
+ tmp = ra_scratch(as, allow);
+ rset_clear(allow, tmp);
+ }
+ if (irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT))
+ emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp);
+ /* Need type check, even if the load result is unused. */
+ asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE);
+ if (irt_type(t) >= IRT_NUM) {
+ lj_assertA(irt_isinteger(t) || irt_isnum(t),
+ "bad SLOAD type %d", irt_type(t));
+ emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
+ ra_allock(as, (ir->op2 & IRSLOAD_KEYINDEX) ? LJ_KEYINDEX : (LJ_TISNUM << 15), allow), tmp);
+ } else if (irt_isnil(t)) {
+ emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
+ } else if (irt_ispri(t)) {
+ emit_nm(as, A64I_CMPx,
+ ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp);
+ } else {
+ Reg type = ra_scratch(as, allow);
+ emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type);
+ emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
+ }
+ emit_lso(as, A64I_LDRx, tmp, base, ofs);
+ return;
+ }
+ if (ra_hasreg(dest)) {
+ emit_lso(as, irt_isnum(t) ? A64I_LDRd :
+ (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base,
+ ofs ^ ((LJ_BE && irt_isint(t) ? 4 : 0)));
+ }
+}
+
+/* -- Allocations --------------------------------------------------------- */
+
+#if LJ_HASFFI
+static void asm_cnew(ASMState *as, IRIns *ir)
+{
+ CTState *cts = ctype_ctsG(J2G(as->J));
+ CTypeID id = (CTypeID)IR(ir->op1)->i;
+ CTSize sz;
+ CTInfo info = lj_ctype_info(cts, id, &sz);
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
+ IRRef args[4];
+ RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
+ "bad CNEW/CNEWI operands");
+
+ as->gcsteps++;
+ asm_setupresult(as, ir, ci); /* GCcdata * */
+ /* Initialize immutable cdata object. */
+ if (ir->o == IR_CNEWI) {
+ int32_t ofs = sizeof(GCcdata);
+ Reg r = ra_alloc1(as, ir->op2, allow);
+ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
+ emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs);
+ } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
+ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ir->op1; /* CTypeID id */
+ args[2] = ir->op2; /* CTSize sz */
+ args[3] = ASMREF_TMP1; /* CTSize align */
+ asm_gencall(as, ci, args);
+ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+ return;
+ }
+
+ /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
+ {
+ Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
+ emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
+ emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
+ emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP);
+ if (id < 65536) emit_d(as, A64I_MOVZw | A64F_U16(id), RID_X1);
+ }
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ASMREF_TMP1; /* MSize size */
+ asm_gencall(as, ci, args);
+ ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
+ ra_releasetmp(as, ASMREF_TMP1));
+}
+#endif
+
+/* -- Write barriers ------------------------------------------------------ */
+
+static void asm_tbar(ASMState *as, IRIns *ir)
+{
+ Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
+ Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
+ Reg mark = RID_TMP;
+ MCLabel l_end = emit_label(as);
+ emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
+ emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
+ emit_setgl(as, tab, gc.grayagain);
+ emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark);
+ emit_getgl(as, link, gc.grayagain);
+ emit_cond_branch(as, CC_EQ, l_end);
+ emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark);
+ emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked));
+}
+
+static void asm_obar(ASMState *as, IRIns *ir)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
+ IRRef args[2];
+ MCLabel l_end;
+ RegSet allow = RSET_GPR;
+ Reg obj, val, tmp;
+ /* No need for other object barriers (yet). */
+ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
+ ra_evictset(as, RSET_SCRATCH);
+ l_end = emit_label(as);
+ args[0] = ASMREF_TMP1; /* global_State *g */
+ args[1] = ir->op1; /* TValue *tv */
+ asm_gencall(as, ci, args);
+ emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
+ obj = IR(ir->op1)->r;
+ tmp = ra_scratch(as, rset_exclude(allow, obj));
+ emit_cond_branch(as, CC_EQ, l_end);
+ emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp);
+ emit_cond_branch(as, CC_EQ, l_end);
+ emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP);
+ val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
+ emit_lso(as, A64I_LDRB, tmp, obj,
+ (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
+ emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked));
+}
+
+/* -- Arithmetic and logic operations ------------------------------------- */
+
+static void asm_fparith(ASMState *as, IRIns *ir, A64Ins ai)
+{
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+ right = (left >> 8); left &= 255;
+ emit_dnm(as, ai, (dest & 31), (left & 31), (right & 31));
+}
+
+static void asm_fpunary(ASMState *as, IRIns *ir, A64Ins ai)
+{
+ Reg dest = ra_dest(as, ir, RSET_FPR);
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
+ emit_dn(as, ai, (dest & 31), (left & 31));
+}
+
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+ IRFPMathOp fpm = (IRFPMathOp)ir->op2;
+ if (fpm == IRFPM_SQRT) {
+ asm_fpunary(as, ir, A64I_FSQRTd);
+ } else if (fpm <= IRFPM_TRUNC) {
+ asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd :
+ fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd);
+ } else {
+ asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
+ }
+}
+
+static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
+{
+ IRIns *ir;
+ if (irref_isk(rref))
+ return 0; /* Don't swap constants to the left. */
+ if (irref_isk(lref))
+ return 1; /* But swap constants to the right. */
+ ir = IR(rref);
+ if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
+ (ir->o == IR_ADD && ir->op1 == ir->op2) ||
+ (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
+ return 0; /* Don't swap fusable operands to the left. */
+ ir = IR(lref);
+ if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
+ (ir->o == IR_ADD && ir->op1 == ir->op2) ||
+ (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
+ return 1; /* But swap fusable operands to the right. */
+ return 0; /* Otherwise don't swap. */
+}
+
+static void asm_intop(ASMState *as, IRIns *ir, A64Ins ai)
+{
+ IRRef lref = ir->op1, rref = ir->op2;
+ Reg left, dest = ra_dest(as, ir, RSET_GPR);
+ uint32_t m;
+ if ((ai & ~A64I_S) != A64I_SUBw && asm_swapops(as, lref, rref)) {
+ IRRef tmp = lref; lref = rref; rref = tmp;
+ }
+ left = ra_hintalloc(as, lref, dest, RSET_GPR);
+ if (irt_is64(ir->t)) ai |= A64I_X;
+ m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
+ if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */
+ asm_guardcc(as, CC_VS);
+ ai |= A64I_S;
+ }
+ emit_dn(as, ai^m, dest, left);
+}
+
+static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai)
+{
+ if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */
+ as->flagmcp = NULL;
+ as->mcp++;
+ ai |= A64I_S;
+ }
+ asm_intop(as, ir, ai);
+}
+
+static void asm_intneg(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+ emit_dm(as, irt_is64(ir->t) ? A64I_NEGx : A64I_NEGw, dest, left);
+}
+
+/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */
+static void asm_intmul(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest));
+ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+ if (irt_isguard(ir->t)) { /* IR_MULOV */
+ asm_guardcc(as, CC_NE);
+ emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */
+ emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest);
+ emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest);
+ emit_dnm(as, A64I_SMULL, dest, right, left);
+ } else {
+ emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right);
+ }
+}
+
+static void asm_add(ASMState *as, IRIns *ir)
+{
+ if (irt_isnum(ir->t)) {
+ if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd))
+ asm_fparith(as, ir, A64I_FADDd);
+ return;
+ }
+ asm_intop_s(as, ir, A64I_ADDw);
+}
+
+static void asm_sub(ASMState *as, IRIns *ir)
+{
+ if (irt_isnum(ir->t)) {
+ if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd))
+ asm_fparith(as, ir, A64I_FSUBd);
+ return;
+ }
+ asm_intop_s(as, ir, A64I_SUBw);
+}
+
+static void asm_mul(ASMState *as, IRIns *ir)
+{
+ if (irt_isnum(ir->t)) {
+ asm_fparith(as, ir, A64I_FMULd);
+ return;
+ }
+ asm_intmul(as, ir);
+}
+
+#define asm_addov(as, ir) asm_add(as, ir)
+#define asm_subov(as, ir) asm_sub(as, ir)
+#define asm_mulov(as, ir) asm_mul(as, ir)
+
+#define asm_fpdiv(as, ir) asm_fparith(as, ir, A64I_FDIVd)
+#define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS)
+
+static void asm_neg(ASMState *as, IRIns *ir)
+{
+ if (irt_isnum(ir->t)) {
+ asm_fpunary(as, ir, A64I_FNEGd);
+ return;
+ }
+ asm_intneg(as, ir);
+}
+
+static void asm_band(ASMState *as, IRIns *ir)
+{
+ A64Ins ai = A64I_ANDw;
+ if (asm_fuseandshift(as, ir))
+ return;
+ if (as->flagmcp == as->mcp) {
+ /* Try to drop cmp r, #0. */
+ as->flagmcp = NULL;
+ as->mcp++;
+ ai = A64I_ANDSw;
+ }
+ asm_intop(as, ir, ai);
+}
+
+static void asm_borbxor(ASMState *as, IRIns *ir, A64Ins ai)
+{
+ IRRef lref = ir->op1, rref = ir->op2;
+ IRIns *irl = IR(lref), *irr = IR(rref);
+ if ((canfuse(as, irl) && irl->o == IR_BNOT && !irref_isk(rref)) ||
+ (canfuse(as, irr) && irr->o == IR_BNOT && !irref_isk(lref))) {
+ Reg left, dest = ra_dest(as, ir, RSET_GPR);
+ uint32_t m;
+ if (irl->o == IR_BNOT) {
+ IRRef tmp = lref; lref = rref; rref = tmp;
+ }
+ left = ra_alloc1(as, lref, RSET_GPR);
+ ai |= A64I_ON;
+ if (irt_is64(ir->t)) ai |= A64I_X;
+ m = asm_fuseopm(as, ai, IR(rref)->op1, rset_exclude(RSET_GPR, left));
+ emit_dn(as, ai^m, dest, left);
+ } else {
+ asm_intop(as, ir, ai);
+ }
+}
+
+static void asm_bor(ASMState *as, IRIns *ir)
+{
+ if (asm_fuseorshift(as, ir))
+ return;
+ asm_borbxor(as, ir, A64I_ORRw);
+}
+
+#define asm_bxor(as, ir) asm_borbxor(as, ir, A64I_EORw)
+
+static void asm_bnot(ASMState *as, IRIns *ir)
+{
+ A64Ins ai = A64I_MVNw;
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
+ if (irt_is64(ir->t)) ai |= A64I_X;
+ emit_d(as, ai^m, dest);
+}
+
+static void asm_bswap(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+ emit_dn(as, irt_is64(ir->t) ? A64I_REVx : A64I_REVw, dest, left);
+}
+
+static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh)
+{
+ int32_t shmask = irt_is64(ir->t) ? 63 : 31;
+ if (irref_isk(ir->op2)) { /* Constant shifts. */
+ Reg left, dest = ra_dest(as, ir, RSET_GPR);
+ int32_t shift = (IR(ir->op2)->i & shmask);
+ IRIns *irl = IR(ir->op1);
+ if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw;
+
+ /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */
+ if ((sh == A64SH_LSR || sh == A64SH_ASR) && canfuse(as, irl)) {
+ if (irl->o == IR_BSHL && irref_isk(irl->op2)) {
+ int32_t shift2 = (IR(irl->op2)->i & shmask);
+ shift = ((shift - shift2) & shmask);
+ shmask -= shift2;
+ ir = irl;
+ }
+ }
+
+ left = ra_alloc1(as, ir->op1, RSET_GPR);
+ switch (sh) {
+ case A64SH_LSL:
+ emit_dn(as, ai | A64F_IMMS(shmask-shift) |
+ A64F_IMMR((shmask-shift+1)&shmask), dest, left);
+ break;
+ case A64SH_LSR: case A64SH_ASR:
+ emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left);
+ break;
+ case A64SH_ROR:
+ emit_dnm(as, ai | A64F_IMMS(shift), dest, left, left);
+ break;
+ }
+ } else { /* Variable-length shifts. */
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+ emit_dnm(as, (shmask == 63 ? A64I_SHRx : A64I_SHRw) | A64F_BSH(sh), dest, left, right);
+ }
+}
+
+#define asm_bshl(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL)
+#define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR)
+#define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR)
+#define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR)
+#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
+
+static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+ Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+ emit_dnm(as, A64I_CSELw|A64F_CC(cc), dest, left, right);
+ emit_nm(as, A64I_CMPw, left, right);
+}
+
+static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc)
+{
+ Reg dest = (ra_dest(as, ir, RSET_FPR) & 31);
+ Reg right, left = ra_alloc2(as, ir, RSET_FPR);
+ right = ((left >> 8) & 31); left &= 31;
+ emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, right, left);
+ emit_nm(as, A64I_FCMPd, left, right);
+}
+
+static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc)
+{
+ if (irt_isnum(ir->t))
+ asm_fpmin_max(as, ir, fcc);
+ else
+ asm_intmin_max(as, ir, cc);
+}
+
+#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_PL)
+#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_LE)
+
+/* -- Comparisons --------------------------------------------------------- */
+
+/* Map of comparisons to flags. ORDER IR. */
+static const uint8_t asm_compmap[IR_ABC+1] = {
+ /* op FP swp int cc FP cc */
+ /* LT */ CC_GE + (CC_HS << 4),
+ /* GE x */ CC_LT + (CC_HI << 4),
+ /* LE */ CC_GT + (CC_HI << 4),
+ /* GT x */ CC_LE + (CC_HS << 4),
+ /* ULT x */ CC_HS + (CC_LS << 4),
+ /* UGE */ CC_LO + (CC_LO << 4),
+ /* ULE x */ CC_HI + (CC_LO << 4),
+ /* UGT */ CC_LS + (CC_LS << 4),
+ /* EQ */ CC_NE + (CC_NE << 4),
+ /* NE */ CC_EQ + (CC_EQ << 4),
+ /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */
+};
+
+/* FP comparisons. */
+static void asm_fpcomp(ASMState *as, IRIns *ir)
+{
+ Reg left, right;
+ A64Ins ai;
+ int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1);
+ if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) {
+ left = (ra_alloc1(as, ir->op1, RSET_FPR) & 31);
+ right = 0;
+ ai = A64I_FCMPZd;
+ } else {
+ left = ra_alloc2(as, ir, RSET_FPR);
+ if (swp) {
+ right = (left & 31); left = ((left >> 8) & 31);
+ } else {
+ right = ((left >> 8) & 31); left &= 31;
+ }
+ ai = A64I_FCMPd;
+ }
+ asm_guardcc(as, (asm_compmap[ir->o] >> 4));
+ emit_nm(as, ai, left, right);
+}
+
+/* Integer comparisons. */
+static void asm_intcomp(ASMState *as, IRIns *ir)
+{
+ A64CC oldcc, cc = (asm_compmap[ir->o] & 15);
+ A64Ins ai = irt_is64(ir->t) ? A64I_CMPx : A64I_CMPw;
+ IRRef lref = ir->op1, rref = ir->op2;
+ Reg left;
+ uint32_t m;
+ int cmpprev0 = 0;
+ lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) ||
+ irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t),
+ "bad comparison data type %d", irt_type(ir->t));
+ if (asm_swapops(as, lref, rref)) {
+ IRRef tmp = lref; lref = rref; rref = tmp;
+ if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */
+ else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */
+ }
+ oldcc = cc;
+ if (irref_isk(rref) && get_k64val(as, rref) == 0) {
+ IRIns *irl = IR(lref);
+ if (cc == CC_GE) cc = CC_PL;
+ else if (cc == CC_LT) cc = CC_MI;
+ else if (cc > CC_NE) goto nocombine; /* Other conds don't work with tst. */
+ cmpprev0 = (irl+1 == ir);
+ /* Combine and-cmp-bcc into tbz/tbnz or and-cmp into tst. */
+ if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) {
+ IRRef blref = irl->op1, brref = irl->op2;
+ uint32_t m2 = 0;
+ Reg bleft;
+ if (asm_swapops(as, blref, brref)) {
+ Reg tmp = blref; blref = brref; brref = tmp;
+ }
+ if (irref_isk(brref)) {
+ uint64_t k = get_k64val(as, brref);
+ if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) {
+ asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ,
+ ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k));
+ return;
+ }
+ m2 = emit_isk13(k, irt_is64(irl->t));
+ }
+ bleft = ra_alloc1(as, blref, RSET_GPR);
+ ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw);
+ if (!m2)
+ m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft));
+ asm_guardcc(as, cc);
+ emit_n(as, ai^m2, bleft);
+ return;
+ }
+ if (cc == CC_EQ || cc == CC_NE) {
+ /* Combine cmp-bcc into cbz/cbnz. */
+ ai = cc == CC_EQ ? A64I_CBZ : A64I_CBNZ;
+ if (irt_is64(ir->t)) ai |= A64I_X;
+ asm_guardcnb(as, ai, ra_alloc1(as, lref, RSET_GPR));
+ return;
+ }
+ }
+nocombine:
+ left = ra_alloc1(as, lref, RSET_GPR);
+ m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
+ asm_guardcc(as, cc);
+ emit_n(as, ai^m, left);
+ /* Signed comparison with zero and referencing previous ins? */
+ if (cmpprev0 && (oldcc <= CC_NE || oldcc >= CC_GE))
+ as->flagmcp = as->mcp; /* Allow elimination of the compare. */
+}
+
+static void asm_comp(ASMState *as, IRIns *ir)
+{
+ if (irt_isnum(ir->t))
+ asm_fpcomp(as, ir);
+ else
+ asm_intcomp(as, ir);
+}
+
+#define asm_equal(as, ir) asm_comp(as, ir)
+
+/* -- Split register ops -------------------------------------------------- */
+
+/* Hiword op of a split 64/64 bit op. Previous op is the loword op. */
+static void asm_hiop(ASMState *as, IRIns *ir)
+{
+ /* HIOP is marked as a store because it needs its own DCE logic. */
+ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
+ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+ if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
+ switch ((ir-1)->o) {
+ case IR_CALLN:
+ case IR_CALLL:
+ case IR_CALLS:
+ case IR_CALLXS:
+ if (!uselo)
+ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
+ break;
+ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
+ }
+}
+
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+ uint32_t k = emit_isk13(HOOK_PROFILE, 0);
+ lj_assertA(k != 0, "HOOK_PROFILE does not fit in K13");
+ UNUSED(ir);
+ asm_guardcc(as, CC_NE);
+ emit_n(as, A64I_TSTw^k, RID_TMP);
+ emit_lsptr(as, A64I_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
+}
+
+/* -- Stack handling ------------------------------------------------------ */
+
+/* Check Lua stack size for overflow. Use exit handler as fallback. */
+static void asm_stack_check(ASMState *as, BCReg topslot,
+ IRIns *irp, RegSet allow, ExitNo exitno)
+{
+ Reg pbase;
+ uint32_t k;
+ if (irp) {
+ if (!ra_hasspill(irp->s)) {
+ pbase = irp->r;
+ lj_assertA(ra_hasreg(pbase), "base reg lost");
+ } else if (allow) {
+ pbase = rset_pickbot(allow);
+ } else {
+ pbase = RID_RET;
+ emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */
+ }
+ } else {
+ pbase = RID_BASE;
+ }
+ emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno));
+ k = emit_isk12((8*topslot));
+ lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot);
+ emit_n(as, A64I_CMPx^k, RID_TMP);
+ emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase);
+ emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP,
+ (int32_t)offsetof(lua_State, maxstack));
+ if (irp) { /* Must not spill arbitrary registers in head of side trace. */
+ if (ra_hasspill(irp->s))
+ emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s));
+ emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L));
+ if (ra_hasspill(irp->s) && !allow)
+ emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */
+ } else {
+ emit_getgl(as, RID_TMP, cur_L);
+ }
+}
+
+/* Restore Lua stack from on-trace state. */
+static void asm_stack_restore(ASMState *as, SnapShot *snap)
+{
+ SnapEntry *map = &as->T->snapmap[snap->mapofs];
+#ifdef LUA_USE_ASSERT
+ SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
+#endif
+ MSize n, nent = snap->nent;
+ /* Store the value of all modified slots to the Lua stack. */
+ for (n = 0; n < nent; n++) {
+ SnapEntry sn = map[n];
+ BCReg s = snap_slot(sn);
+ int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
+ IRRef ref = snap_ref(sn);
+ IRIns *ir = IR(ref);
+ if ((sn & SNAP_NORESTORE))
+ continue;
+ if ((sn & SNAP_KEYINDEX)) {
+ RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
+ Reg r = irref_isk(ref) ? ra_allock(as, ir->i, allow) :
+ ra_alloc1(as, ref, allow);
+ rset_clear(allow, r);
+ emit_lso(as, A64I_STRw, r, RID_BASE, ofs);
+ emit_lso(as, A64I_STRw, ra_allock(as, LJ_KEYINDEX, allow), RID_BASE, ofs+4);
+ } else if (irt_isnum(ir->t)) {
+ Reg src = ra_alloc1(as, ref, RSET_FPR);
+ emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs);
+ } else {
+ asm_tvstore64(as, RID_BASE, ofs, ref);
+ }
+ checkmclim(as);
+ }
+ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
+}
+
+/* -- GC handling --------------------------------------------------------- */
+
+/* Marker to prevent patching the GC check exit. */
+#define ARM64_NOPATCH_GC_CHECK \
+ (A64I_ORRx|A64F_D(RID_TMP)|A64F_M(RID_TMP)|A64F_N(RID_TMP))
+
+/* Check GC threshold and do one or more GC steps. */
+static void asm_gc_check(ASMState *as)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
+ IRRef args[2];
+ MCLabel l_end;
+ Reg tmp2;
+ ra_evictset(as, RSET_SCRATCH);
+ l_end = emit_label(as);
+ /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
+ asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */
+ *--as->mcp = ARM64_NOPATCH_GC_CHECK;
+ args[0] = ASMREF_TMP1; /* global_State *g */
+ args[1] = ASMREF_TMP2; /* MSize steps */
+ asm_gencall(as, ci, args);
+ emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL);
+ tmp2 = ra_releasetmp(as, ASMREF_TMP2);
+ emit_loadi(as, tmp2, as->gcsteps);
+ /* Jump around GC step if GC total < GC threshold. */
+ emit_cond_branch(as, CC_LS, l_end);
+ emit_nm(as, A64I_CMPx, RID_TMP, tmp2);
+ emit_getgl(as, tmp2, gc.threshold);
+ emit_getgl(as, RID_TMP, gc.total);
+ as->gcsteps = 0;
+ checkmclim(as);
+}
+
+/* -- Loop handling ------------------------------------------------------- */
+
+/* Fixup the loop branch. */
+static void asm_loop_fixup(ASMState *as)
+{
+ MCode *p = as->mctop;
+ MCode *target = as->mcp;
+ if (as->loopinv) { /* Inverted loop branch? */
+ uint32_t mask = (p[-2] & 0x7e000000) == 0x36000000 ? 0x3fffu : 0x7ffffu;
+ ptrdiff_t delta = target - (p - 2);
+ /* asm_guard* already inverted the bcc/tnb/cnb and patched the final b. */
+ p[-2] |= ((uint32_t)delta & mask) << 5;
+ } else {
+ ptrdiff_t delta = target - (p - 1);
+ p[-1] = A64I_B | A64F_S26(delta);
+ }
+}
+
+/* Fixup the tail of the loop. */
+static void asm_loop_tail_fixup(ASMState *as)
+{
+ UNUSED(as); /* Nothing to do. */
+}
+
+/* -- Head of trace ------------------------------------------------------- */
+
+/* Reload L register from g->cur_L. */
+static void asm_head_lreg(ASMState *as)
+{
+ IRIns *ir = IR(ASMREF_L);
+ if (ra_used(ir)) {
+ Reg r = ra_dest(as, ir, RSET_GPR);
+ emit_getgl(as, r, cur_L);
+ ra_evictk(as);
+ }
+}
+
+/* Coalesce BASE register for a root trace. */
+static void asm_head_root_base(ASMState *as)
+{
+ IRIns *ir;
+ asm_head_lreg(as);
+ ir = IR(REF_BASE);
+ if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
+ ra_spill(as, ir);
+ ra_destreg(as, ir, RID_BASE);
+}
+
+/* Coalesce BASE register for a side trace. */
+static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
+{
+ IRIns *ir;
+ asm_head_lreg(as);
+ ir = IR(REF_BASE);
+ if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
+ ra_spill(as, ir);
+ if (ra_hasspill(irp->s)) {
+ rset_clear(allow, ra_dest(as, ir, allow));
+ } else {
+ Reg r = irp->r;
+ lj_assertA(ra_hasreg(r), "base reg lost");
+ rset_clear(allow, r);
+ if (r != ir->r && !rset_test(as->freeset, r))
+ ra_restore(as, regcost_ref(as->cost[r]));
+ ra_destreg(as, ir, r);
+ }
+ return allow;
+}
+
+/* -- Tail of trace ------------------------------------------------------- */
+
+/* Fixup the tail code. */
+static void asm_tail_fixup(ASMState *as, TraceNo lnk)
+{
+ MCode *p = as->mctop;
+ MCode *target;
+ /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
+ int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED));
+ if (spadj == 0) {
+ *--p = A64I_LE(A64I_NOP);
+ as->mctop = p;
+ } else {
+ /* Patch stack adjustment. */
+ uint32_t k = emit_isk12(spadj);
+ lj_assertA(k, "stack adjustment %d does not fit in K12", spadj);
+ p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP);
+ }
+ /* Patch exit branch. */
+ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
+ p[-1] = A64I_B | A64F_S26((target-p)+1);
+}
+
+/* Prepare tail of code. */
+static void asm_tail_prep(ASMState *as)
+{
+ MCode *p = as->mctop - 1; /* Leave room for exit branch. */
+ if (as->loopref) {
+ as->invmcp = as->mcp = p;
+ } else {
+ as->mcp = p-1; /* Leave room for stack pointer adjustment. */
+ as->invmcp = NULL;
+ }
+ *p = 0; /* Prevent load/store merging. */
+}
+
+/* -- Trace setup --------------------------------------------------------- */
+
+/* Ensure there are enough stack slots for call arguments. */
+static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
+{
+ IRRef args[CCI_NARGS_MAX*2];
+ uint32_t i, nargs = CCI_XNARGS(ci);
+ int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
+ asm_collectargs(as, ir, ci, args);
+ for (i = 0; i < nargs; i++) {
+ if (args[i] && irt_isfp(IR(args[i])->t)) {
+ if (nfpr > 0) nfpr--; else nslots += 2;
+ } else {
+ if (ngpr > 0) ngpr--; else nslots += 2;
+ }
+ }
+ if (nslots > as->evenspill) /* Leave room for args in stack slots. */
+ as->evenspill = nslots;
+ return REGSP_HINT(RID_RET);
+}
+
+static void asm_setup_target(ASMState *as)
+{
+ /* May need extra exit for asm_stack_check on side traces. */
+ asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
+}
+
+#if LJ_BE
+/* ARM64 instructions are always little-endian. Swap for ARM64BE. */
+static void asm_mcode_fixup(MCode *mcode, MSize size)
+{
+ MCode *pe = (MCode *)((char *)mcode + size);
+ while (mcode < pe) {
+ MCode ins = *mcode;
+ *mcode++ = lj_bswap(ins);
+ }
+}
+#define LJ_TARGET_MCODE_FIXUP 1
+#endif
+
+/* -- Trace patching ------------------------------------------------------ */
+
+/* Patch exit jumps of existing machine code to a new target. */
+void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
+{
+ MCode *p = T->mcode;
+ MCode *pe = (MCode *)((char *)p + T->szmcode);
+ MCode *cstart = NULL;
+ MCode *mcarea = lj_mcode_patch(J, p, 0);
+ MCode *px = exitstub_trace_addr(T, exitno);
+ int patchlong = 1;
+ /* Note: this assumes a trace exit is only ever patched once. */
+ for (; p < pe; p++) {
+ /* Look for exitstub branch, replace with branch to target. */
+ ptrdiff_t delta = target - p;
+ MCode ins = A64I_LE(*p);
+ if ((ins & 0xff000000u) == 0x54000000u &&
+ ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
+ /* Patch bcc, if within range. */
+ if (A64F_S_OK(delta, 19)) {
+ *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta));
+ if (!cstart) cstart = p;
+ }
+ } else if ((ins & 0xfc000000u) == 0x14000000u &&
+ ((ins ^ (px-p)) & 0x03ffffffu) == 0) {
+ /* Patch b. */
+ lj_assertJ(A64F_S_OK(delta, 26), "branch target out of range");
+ *p = A64I_LE((ins & 0xfc000000u) | A64F_S26(delta));
+ if (!cstart) cstart = p;
+ } else if ((ins & 0x7e000000u) == 0x34000000u &&
+ ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
+ /* Patch cbz/cbnz, if within range. */
+ if (p[-1] == ARM64_NOPATCH_GC_CHECK) {
+ patchlong = 0;
+ } else if (A64F_S_OK(delta, 19)) {
+ *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta));
+ if (!cstart) cstart = p;
+ }
+ } else if ((ins & 0x7e000000u) == 0x36000000u &&
+ ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) {
+ /* Patch tbz/tbnz, if within range. */
+ if (A64F_S_OK(delta, 14)) {
+ *p = A64I_LE((ins & 0xfff8001fu) | A64F_S14(delta));
+ if (!cstart) cstart = p;
+ }
+ }
+ }
+ /* Always patch long-range branch in exit stub itself. Except, if we can't. */
+ if (patchlong) {
+ ptrdiff_t delta = target - px;
+ lj_assertJ(A64F_S_OK(delta, 26), "branch target out of range");
+ *px = A64I_B | A64F_S26(delta);
+ if (!cstart) cstart = px;
+ }
+ if (cstart) lj_mcode_sync(cstart, px+1);
+ lj_mcode_patch(J, mcarea, 1);
+}
+
diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h
index c0e491a6..1686b40f 100644
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -23,7 +23,7 @@ static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow)
{
Reg r = IR(ref)->r;
if (ra_noreg(r)) {
- if (!(allow & RSET_FPR) && irref_isk(ref) && IR(ref)->i == 0)
+ if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(as, ref) == 0)
return RID_ZERO;
r = ra_allocref(as, ref, allow);
} else {
@@ -64,17 +64,29 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
/* Setup spare long-range jump slots per mcarea. */
static void asm_sparejump_setup(ASMState *as)
{
- MCode *mxp = as->mcbot;
- if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == sizeof(MCLink)) {
- lua_assert(MIPSI_NOP == 0);
+ MCode *mxp = as->mctop;
+ if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) {
+ mxp -= MIPS_SPAREJUMP*2;
+ lj_assertA(MIPSI_NOP == 0, "bad NOP");
memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode));
- mxp += MIPS_SPAREJUMP*2;
- lua_assert(mxp < as->mctop);
- lj_mcode_sync(as->mcbot, mxp);
- lj_mcode_commitbot(as->J, mxp);
- as->mcbot = mxp;
- as->mclim = as->mcbot + MCLIM_REDZONE;
+ as->mctop = mxp;
+ }
+}
+
+static MCode *asm_sparejump_use(MCode *mcarea, MCode tjump)
+{
+ MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size);
+ int slot = MIPS_SPAREJUMP;
+ while (slot--) {
+ mxp -= 2;
+ if (*mxp == tjump) {
+ return mxp;
+ } else if (*mxp == MIPSI_NOP) {
+ *mxp = tjump;
+ return mxp;
+ }
}
+ return NULL;
}
/* Setup exit stub after the end of each trace. */
@@ -84,7 +96,8 @@ static void asm_exitstub_setup(ASMState *as)
/* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */
*--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno;
*--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu);
- lua_assert(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0);
+ lj_assertA(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0,
+ "branch target out of range");
*--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0;
as->mctop = mxp;
}
@@ -101,7 +114,12 @@ static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt)
as->invmcp = NULL;
as->loopinv = 1;
as->mcp = p+1;
+#if !LJ_TARGET_MIPSR6
mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */
+#else
+ mi = mi ^ ((mi>>28) == 1 ? 0x04000000u :
+ (mi>>28) == 4 ? 0x00800000u : 0x00010000u); /* Invert cond. */
+#endif
target = p; /* Patch target later in asm_loop_fixup. */
}
emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
@@ -165,9 +183,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
} else if (ir->o == IR_UREFC) {
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
- int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv);
- int32_t jgl = (intptr_t)J2G(as->J);
- if ((uint32_t)(ofs-jgl) < 65536) {
+ intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv;
+ intptr_t jgl = (intptr_t)J2G(as->J);
+ if ((uintptr_t)(ofs-jgl) < 65536) {
*ofsp = ofs-jgl-32768;
return RID_JGL;
} else {
@@ -175,6 +193,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
return ra_allock(as, ofs-(int16_t)ofs, allow);
}
}
+ } else if (ir->o == IR_TMPREF) {
+ *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768);
+ return RID_JGL;
}
}
*ofsp = 0;
@@ -189,20 +210,21 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
Reg base;
if (ra_noreg(ir->r) && canfuse(as, ir)) {
if (ir->o == IR_ADD) {
- int32_t ofs2;
- if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) {
+ intptr_t ofs2;
+ if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2),
+ checki16(ofs2))) {
ref = ir->op1;
- ofs = ofs2;
+ ofs = (int32_t)ofs2;
}
} else if (ir->o == IR_STRREF) {
- int32_t ofs2 = 65536;
- lua_assert(ofs == 0);
+ intptr_t ofs2 = 65536;
+ lj_assertA(ofs == 0, "bad usage");
ofs = (int32_t)sizeof(GCstr);
if (irref_isk(ir->op2)) {
- ofs2 = ofs + IR(ir->op2)->i;
+ ofs2 = ofs + get_kval(as, ir->op2);
ref = ir->op1;
} else if (irref_isk(ir->op1)) {
- ofs2 = ofs + IR(ir->op1)->i;
+ ofs2 = ofs + get_kval(as, ir->op1);
ref = ir->op2;
}
if (!checki16(ofs2)) {
@@ -210,7 +232,7 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
Reg right, left = ra_alloc2(as, ir, allow);
right = (left >> 8); left &= 255;
emit_hsi(as, mi, rt, RID_TMP, ofs);
- emit_dst(as, MIPSI_ADDU, RID_TMP, left, right);
+ emit_dst(as, MIPSI_AADDU, RID_TMP, left, right);
return;
}
ofs = ofs2;
@@ -225,29 +247,43 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
/* Generate a call to a C function. */
static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
{
- uint32_t n, nargs = CCI_NARGS(ci);
- int32_t ofs = 16;
+ uint32_t n, nargs = CCI_XNARGS(ci);
+ int32_t ofs = LJ_32 ? 16 : 0;
+#if LJ_SOFTFP
+ Reg gpr = REGARG_FIRSTGPR;
+#else
Reg gpr, fpr = REGARG_FIRSTFPR;
+#endif
if ((void *)ci->func)
- emit_call(as, (void *)ci->func);
+ emit_call(as, (void *)ci->func, 1);
+#if !LJ_SOFTFP
for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
as->cost[gpr] = REGCOST(~0u, ASMREF_L);
gpr = REGARG_FIRSTGPR;
+#endif
for (n = 0; n < nargs; n++) { /* Setup args. */
IRRef ref = args[n];
if (ref) {
IRIns *ir = IR(ref);
+#if !LJ_SOFTFP
if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR &&
!(ci->flags & CCI_VARARG)) {
- lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */
+ lj_assertA(rset_test(as->freeset, fpr),
+ "reg %d not free", fpr); /* Already evicted. */
ra_leftov(as, fpr, ref);
- fpr += 2;
- gpr += irt_isnum(ir->t) ? 2 : 1;
- } else {
+ fpr += LJ_32 ? 2 : 1;
+ gpr += (LJ_32 && irt_isnum(ir->t)) ? 2 : 1;
+ } else
+#endif
+ {
+#if LJ_32 && !LJ_SOFTFP
fpr = REGARG_LASTFPR+1;
- if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1;
+#endif
+ if (LJ_32 && irt_isnum(ir->t)) gpr = (gpr+1) & ~1;
if (gpr <= REGARG_LASTGPR) {
- lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */
+ lj_assertA(rset_test(as->freeset, gpr),
+ "reg %d not free", gpr); /* Already evicted. */
+#if !LJ_SOFTFP
if (irt_isfp(ir->t)) {
RegSet of = as->freeset;
Reg r;
@@ -256,31 +292,56 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
r = ra_alloc1(as, ref, RSET_FPR);
as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
if (irt_isnum(ir->t)) {
+#if LJ_32
emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1);
emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r);
- lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */
+ lj_assertA(rset_test(as->freeset, gpr+1),
+ "reg %d not free", gpr+1); /* Already evicted. */
gpr += 2;
+#else
+ emit_tg(as, MIPSI_DMFC1, gpr, r);
+ gpr++; fpr++;
+#endif
} else if (irt_isfloat(ir->t)) {
emit_tg(as, MIPSI_MFC1, gpr, r);
gpr++;
+#if LJ_64
+ fpr++;
+#endif
}
- } else {
+ } else
+#endif
+ {
ra_leftov(as, gpr, ref);
gpr++;
+#if LJ_64 && !LJ_SOFTFP
+ fpr++;
+#endif
}
} else {
- Reg r = ra_alloc1z(as, ref, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+ Reg r = ra_alloc1z(as, ref, !LJ_SOFTFP && irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+#if LJ_32
if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4;
emit_spstore(as, ir, r, ofs);
ofs += irt_isnum(ir->t) ? 8 : 4;
+#else
+ emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isfp(ir->t) && !irt_is64(ir->t)) ? 4 : 0));
+ ofs += 8;
+#endif
}
}
} else {
+#if !LJ_SOFTFP
fpr = REGARG_LASTFPR+1;
- if (gpr <= REGARG_LASTGPR)
+#endif
+ if (gpr <= REGARG_LASTGPR) {
gpr++;
- else
- ofs += 4;
+#if LJ_64 && !LJ_SOFTFP
+ fpr++;
+#endif
+ } else {
+ ofs += LJ_32 ? 4 : 8;
+ }
}
checkmclim(as);
}
@@ -291,28 +352,38 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
{
RegSet drop = RSET_SCRATCH;
int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
+#if !LJ_SOFTFP
if ((ci->flags & CCI_NOFPRCLOBBER))
drop &= ~RSET_FPR;
+#endif
if (ra_hasreg(ir->r))
rset_clear(drop, ir->r); /* Dest reg handled below. */
if (hiop && ra_hasreg((ir+1)->r))
rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
ra_evictset(as, drop); /* Evictions must be performed first. */
if (ra_used(ir)) {
- lua_assert(!irt_ispri(ir->t));
- if (irt_isfp(ir->t)) {
+ lj_assertA(!irt_ispri(ir->t), "PRI dest");
+ if (!LJ_SOFTFP && irt_isfp(ir->t)) {
if ((ci->flags & CCI_CASTU64)) {
int32_t ofs = sps_scale(ir->s);
Reg dest = ir->r;
if (ra_hasreg(dest)) {
ra_free(as, dest);
ra_modified(as, dest);
+#if LJ_32
emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1);
emit_tg(as, MIPSI_MTC1, RID_RETLO, dest);
+#else
+ emit_tg(as, MIPSI_DMTC1, RID_RET, dest);
+#endif
}
if (ofs) {
+#if LJ_32
emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0));
emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4));
+#else
+ emit_tsi(as, MIPSI_SD, RID_RET, RID_SP, ofs);
+#endif
}
} else {
ra_destreg(as, ir, RID_FPRET);
@@ -325,15 +396,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
}
}
-static void asm_call(ASMState *as, IRIns *ir)
-{
- IRRef args[CCI_NARGS_MAX];
- const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
- asm_collectargs(as, ir, ci, args);
- asm_setupresult(as, ir, ci);
- asm_gencall(as, ci, args);
-}
-
static void asm_callx(ASMState *as, IRIns *ir)
{
IRRef args[CCI_NARGS_MAX*2];
@@ -346,7 +408,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
func = ir->op2; irf = IR(func);
if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
if (irref_isk(func)) { /* Call to constant address. */
- ci.func = (ASMFunction)(void *)(irf->i);
+ ci.func = (ASMFunction)(void *)get_kval(as, func);
} else { /* Need specific register for indirect calls. */
Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR));
MCode *p = as->mcp;
@@ -361,27 +423,23 @@ static void asm_callx(ASMState *as, IRIns *ir)
asm_gencall(as, &ci, args);
}
-static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
-{
- const CCallInfo *ci = &lj_ir_callinfo[id];
- IRRef args[2];
- args[0] = ir->op1;
- args[1] = ir->op2;
- asm_setupresult(as, ir, ci);
- asm_gencall(as, ci, args);
-}
-
+#if !LJ_SOFTFP
static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
{
/* The modified regs must match with the *.dasc implementation. */
RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)|
- RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR);
+ RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR)
+#if LJ_TARGET_MIPSR6
+ |RID2RSET(RID_F21)
+#endif
+ ;
if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
ra_evictset(as, drop);
ra_destreg(as, ir, RID_FPRET);
- emit_call(as, (void *)lj_ir_callinfo[id].func);
+ emit_call(as, (void *)lj_ir_callinfo[id].func, 0);
ra_leftov(as, REGARG_FIRSTFPR, ir->op1);
}
+#endif
/* -- Returns ------------------------------------------------------------- */
@@ -390,25 +448,52 @@ static void asm_retf(ASMState *as, IRIns *ir)
{
Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
void *pc = ir_kptr(IR(ir->op2));
- int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
as->topslot -= (BCReg)delta;
if ((int32_t)as->topslot < 0) as->topslot = 0;
irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
emit_setgl(as, base, jit_base);
emit_addptr(as, base, -8*delta);
asm_guard(as, MIPSI_BNE, RID_TMP,
- ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base)));
- emit_tsi(as, MIPSI_LW, RID_TMP, base, -8);
+ ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base)));
+ emit_tsi(as, MIPSI_AL, RID_TMP, base, -8);
}
+/* -- Buffer operations --------------------------------------------------- */
+
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb)
+{
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+ IRIns irgc;
+ irgc.ot = IRT(0, IRT_PGC); /* GC type. */
+ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
+ if ((as->flags & JIT_F_MIPSXXR2)) {
+ emit_tsml(as, LJ_64 ? MIPSI_DINS : MIPSI_INS, RID_TMP, tmp,
+ lj_fls(SBUF_MASK_FLAG), 0);
+ } else {
+ emit_dst(as, MIPSI_OR, RID_TMP, RID_TMP, tmp);
+ emit_tsi(as, MIPSI_ANDI, tmp, tmp, SBUF_MASK_FLAG);
+ }
+ emit_getgl(as, RID_TMP, cur_L);
+ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+}
+#endif
+
/* -- Type conversions ---------------------------------------------------- */
+#if !LJ_SOFTFP
static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
{
Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
Reg dest = ra_dest(as, ir, RSET_GPR);
+#if !LJ_TARGET_MIPSR6
asm_guard(as, MIPSI_BC1F, 0, 0);
emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left);
+#else
+ asm_guard(as, MIPSI_BC1EQZ, 0, (tmp&31));
+ emit_fgh(as, MIPSI_CMP_EQ_D, tmp, tmp, left);
+#endif
emit_fg(as, MIPSI_CVT_D_W, tmp, tmp);
emit_tg(as, MIPSI_MFC1, dest, tmp);
emit_fg(as, MIPSI_CVT_W_D, tmp, left);
@@ -424,15 +509,57 @@ static void asm_tobit(ASMState *as, IRIns *ir)
emit_tg(as, MIPSI_MFC1, dest, tmp);
emit_fgh(as, MIPSI_ADD_D, tmp, left, right);
}
+#elif LJ_64 /* && LJ_SOFTFP */
+static void asm_tointg(ASMState *as, IRIns *ir, Reg r)
+{
+ /* The modified regs must match with the *.dasc implementation. */
+ RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)|
+ RID2RSET(RID_R1)|RID2RSET(RID_R12);
+ if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
+ ra_evictset(as, drop);
+ /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */
+ ra_destreg(as, ir, RID_RET);
+ asm_guard(as, MIPSI_BNE, RID_RET+1, RID_ZERO);
+ emit_call(as, (void *)lj_ir_callinfo[IRCALL_lj_vm_tointg].func, 0);
+ if (r == RID_NONE)
+ ra_leftov(as, REGARG_FIRSTGPR, ir->op1);
+ else if (r != REGARG_FIRSTGPR)
+ emit_move(as, REGARG_FIRSTGPR, r);
+}
+
+static void asm_tobit(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ emit_dta(as, MIPSI_SLL, dest, dest, 0);
+ asm_callid(as, ir, IRCALL_lj_vm_tobit);
+}
+#endif
static void asm_conv(ASMState *as, IRIns *ir)
{
IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+#if !LJ_SOFTFP32
int stfp = (st == IRT_NUM || st == IRT_FLOAT);
+#endif
+#if LJ_64
+ int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
+#endif
IRRef lref = ir->op1;
- lua_assert(irt_type(ir->t) != st);
- lua_assert(!(irt_isint64(ir->t) ||
- (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
+#if LJ_32
+ /* 64 bit integer conversions are handled by SPLIT. */
+ lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)),
+ "IR %04d has unsplit 64 bit type",
+ (int)(ir - as->ir) - REF_BIAS);
+#endif
+#if LJ_SOFTFP32
+ /* FP conversions are handled by SPLIT. */
+ lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT),
+ "IR %04d has FP type",
+ (int)(ir - as->ir) - REF_BIAS);
+ /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
+#else
+ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
+#if !LJ_SOFTFP
if (irt_isfp(ir->t)) {
Reg dest = ra_dest(as, ir, RSET_FPR);
if (stfp) { /* FP to FP conversion. */
@@ -448,27 +575,56 @@ static void asm_conv(ASMState *as, IRIns *ir)
emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp);
emit_fg(as, MIPSI_CVT_D_W, dest, dest);
emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
- (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)),
- RSET_GPR);
+ (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
emit_tg(as, MIPSI_MTC1, RID_TMP, dest);
emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left);
emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
+#if LJ_64
+ } else if(st == IRT_U64) { /* U64 to FP conversion. */
+ /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest));
+ MCLabel l_end = emit_label(as);
+ if (irt_isfloat(ir->t)) {
+ emit_fgh(as, MIPSI_ADD_S, dest, dest, tmp);
+ emit_lsptr(as, MIPSI_LWC1, (tmp & 31), (void *)&as->J->k32[LJ_K32_2P63],
+ rset_exclude(RSET_GPR, left));
+ emit_fg(as, MIPSI_CVT_S_L, dest, dest);
+ } else {
+ emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp);
+ emit_lsptr(as, MIPSI_LDC1, (tmp & 31), (void *)&as->J->k64[LJ_K64_2P63],
+ rset_exclude(RSET_GPR, left));
+ emit_fg(as, MIPSI_CVT_D_L, dest, dest);
+ }
+ emit_branch(as, MIPSI_BGEZ, left, RID_ZERO, l_end);
+ emit_tg(as, MIPSI_DMTC1, RID_TMP, dest);
+ emit_tsml(as, MIPSI_DEXTM, RID_TMP, left, 30, 0);
+#endif
} else { /* Integer to FP conversion. */
Reg left = ra_alloc1(as, lref, RSET_GPR);
+#if LJ_32
emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W,
dest, dest);
emit_tg(as, MIPSI_MTC1, left, dest);
+#else
+ MIPSIns mi = irt_isfloat(ir->t) ?
+ (st64 ? MIPSI_CVT_S_L : MIPSI_CVT_S_W) :
+ (st64 ? MIPSI_CVT_D_L : MIPSI_CVT_D_W);
+ emit_fg(as, mi, dest, dest);
+ emit_tg(as, st64 ? MIPSI_DMTC1 : MIPSI_MTC1, left, dest);
+#endif
}
} else if (stfp) { /* FP to integer conversion. */
if (irt_isguard(ir->t)) {
/* Checked conversions are only supported from number to int. */
- lua_assert(irt_isint(ir->t) && st == IRT_NUM);
+ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
+ "bad type for checked CONV");
asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
} else {
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg left = ra_alloc1(as, lref, RSET_FPR);
Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
- if (irt_isu32(ir->t)) {
+ if (irt_isu32(ir->t)) { /* FP to U32 conversion. */
/* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */
emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP);
emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
@@ -479,25 +635,112 @@ static void asm_conv(ASMState *as, IRIns *ir)
tmp, left, tmp);
if (st == IRT_FLOAT)
emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
- (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)),
- RSET_GPR);
+ (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
else
emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
- (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)),
- RSET_GPR);
+ (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
+#if LJ_64
+ } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */
+ MCLabel l_end;
+ emit_tg(as, MIPSI_DMFC1, dest, tmp);
+ l_end = emit_label(as);
+ /* For inputs >= 2^63 add -2^64 and convert again. */
+ if (st == IRT_NUM) {
+ emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp);
+ emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp);
+ emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
+ (void *)&as->J->k64[LJ_K64_M2P64],
+ rset_exclude(RSET_GPR, dest));
+ emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */
+#if !LJ_TARGET_MIPSR6
+ emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
+ emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp);
+#else
+ emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end);
+ emit_fgh(as, MIPSI_CMP_LT_D, left, left, tmp);
+#endif
+ emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
+ (void *)&as->J->k64[LJ_K64_2P63],
+ rset_exclude(RSET_GPR, dest));
+ } else {
+ emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp);
+ emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp);
+ emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
+ (void *)&as->J->k32[LJ_K32_M2P64],
+ rset_exclude(RSET_GPR, dest));
+ emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */
+#if !LJ_TARGET_MIPSR6
+ emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
+ emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp);
+#else
+ emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end);
+ emit_fgh(as, MIPSI_CMP_LT_S, left, left, tmp);
+#endif
+ emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
+ (void *)&as->J->k32[LJ_K32_2P63],
+ rset_exclude(RSET_GPR, dest));
+ }
+#endif
} else {
+#if LJ_32
emit_tg(as, MIPSI_MFC1, dest, tmp);
emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D,
tmp, left);
+#else
+ MIPSIns mi = irt_is64(ir->t) ?
+ (st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) :
+ (st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S);
+ emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, left);
+ emit_fg(as, mi, left, left);
+#endif
}
}
- } else {
+ } else
+#else
+ if (irt_isfp(ir->t)) {
+#if LJ_64 && LJ_HASFFI
+ if (stfp) { /* FP to FP conversion. */
+ asm_callid(as, ir, irt_isnum(ir->t) ? IRCALL_softfp_f2d :
+ IRCALL_softfp_d2f);
+ } else { /* Integer to FP conversion. */
+ IRCallID cid = ((IRT_IS64 >> st) & 1) ?
+ (irt_isnum(ir->t) ?
+ (st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d) :
+ (st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f)) :
+ (irt_isnum(ir->t) ?
+ (st == IRT_INT ? IRCALL_softfp_i2d : IRCALL_softfp_ui2d) :
+ (st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f));
+ asm_callid(as, ir, cid);
+ }
+#else
+ asm_callid(as, ir, IRCALL_softfp_i2d);
+#endif
+ } else if (stfp) { /* FP to integer conversion. */
+ if (irt_isguard(ir->t)) {
+ /* Checked conversions are only supported from number to int. */
+ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
+ "bad type for checked CONV");
+ asm_tointg(as, ir, RID_NONE);
+ } else {
+ IRCallID cid = irt_is64(ir->t) ?
+ ((st == IRT_NUM) ?
+ (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) :
+ (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) :
+ ((st == IRT_NUM) ?
+ (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
+ (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui));
+ asm_callid(as, ir, cid);
+ }
+ } else
+#endif
+#endif
+ {
Reg dest = ra_dest(as, ir, RSET_GPR);
if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
+ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
if ((ir->op2 & IRCONV_SEXT)) {
- if ((as->flags & JIT_F_MIPS32R2)) {
+ if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) {
emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left);
} else {
uint32_t shift = st == IRT_I8 ? 24 : 16;
@@ -509,94 +752,171 @@ static void asm_conv(ASMState *as, IRIns *ir)
(int32_t)(st == IRT_U8 ? 0xff : 0xffff));
}
} else { /* 32/64 bit integer conversions. */
+#if LJ_32
/* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */
ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */
+#else
+ if (irt_is64(ir->t)) {
+ if (st64) {
+ /* 64/64 bit no-op (cast)*/
+ ra_leftov(as, dest, lref);
+ } else {
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */
+ emit_dta(as, MIPSI_SLL, dest, left, 0);
+ } else { /* 32 to 64 bit zero extension. */
+ emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0);
+ }
+ }
+ } else {
+ if (st64 && !(ir->op2 & IRCONV_NONE)) {
+ /* This is either a 32 bit reg/reg mov which zeroes the hiword
+ ** or a load of the loword from a 64 bit address.
+ */
+ Reg left = ra_alloc1(as, lref, RSET_GPR);
+ emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0);
+ } else { /* 32/32 bit no-op (cast). */
+ /* Do nothing, but may need to move regs. */
+ ra_leftov(as, dest, lref);
+ }
+ }
+#endif
}
}
}
-#if LJ_HASFFI
-static void asm_conv64(ASMState *as, IRIns *ir)
-{
- IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
- IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
- IRCallID id;
- const CCallInfo *ci;
- IRRef args[2];
- args[LJ_BE?0:1] = ir->op1;
- args[LJ_BE?1:0] = (ir-1)->op1;
- if (st == IRT_NUM || st == IRT_FLOAT) {
- id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
- ir--;
- } else {
- id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
- }
- ci = &lj_ir_callinfo[id];
- asm_setupresult(as, ir, ci);
- asm_gencall(as, ci, args);
-}
-#endif
-
static void asm_strto(ASMState *as, IRIns *ir)
{
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
IRRef args[2];
+ int32_t ofs = 0;
+#if LJ_SOFTFP32
+ ra_evictset(as, RSET_SCRATCH);
+ if (ra_used(ir)) {
+ if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
+ (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) {
+ int i;
+ for (i = 0; i < 2; i++) {
+ Reg r = (ir+i)->r;
+ if (ra_hasreg(r)) {
+ ra_free(as, r);
+ ra_modified(as, r);
+ emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
+ }
+ }
+ ofs = sps_scale(ir->s & ~1);
+ } else {
+ Reg rhi = ra_dest(as, ir+1, RSET_GPR);
+ Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
+ emit_tsi(as, MIPSI_LW, rhi, RID_SP, ofs+(LJ_BE?0:4));
+ emit_tsi(as, MIPSI_LW, rlo, RID_SP, ofs+(LJ_BE?4:0));
+ }
+ }
+#else
RegSet drop = RSET_SCRATCH;
if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
ra_evictset(as, drop);
+ ofs = sps_scale(ir->s);
+#endif
asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); /* Test return status. */
args[0] = ir->op1; /* GCstr *str */
args[1] = ASMREF_TMP1; /* TValue *n */
asm_gencall(as, ci, args);
/* Store the result to the spill slot or temp slots. */
- emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1),
- RID_SP, sps_scale(ir->s));
+ emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1),
+ RID_SP, ofs);
}
-/* Get pointer to TValue. */
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+/* -- Memory references --------------------------------------------------- */
+
+#if LJ_64
+/* Store tagged value for ref at base+ofs. */
+static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
{
+ RegSet allow = rset_exclude(RSET_GPR, base);
IRIns *ir = IR(ref);
- if (irt_isnum(ir->t)) {
- if (irref_isk(ref)) /* Use the number constant itself as a TValue. */
- ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
- else /* Otherwise force a spill and use the spill slot. */
- emit_tsi(as, MIPSI_ADDIU, dest, RID_SP, ra_spill(as, ir));
+ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
+ "store of IR type %d", irt_type(ir->t));
+ if (irref_isk(ref)) {
+ TValue k;
+ lj_ir_kvalue(as->J->L, &k, ir);
+ emit_tsi(as, MIPSI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs);
} else {
- /* Otherwise use g->tmptv to hold the TValue. */
- RegSet allow = rset_exclude(RSET_GPR, dest);
- Reg type;
- emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, offsetof(global_State, tmptv)-32768);
- if (!irt_ispri(ir->t)) {
- Reg src = ra_alloc1(as, ref, allow);
- emit_setgl(as, src, tmptv.gcr);
+ Reg src = ra_alloc1(as, ref, allow);
+ Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47,
+ rset_exclude(allow, src));
+ emit_tsi(as, MIPSI_SD, RID_TMP, base, ofs);
+ if (irt_isinteger(ir->t)) {
+ emit_dst(as, MIPSI_DADDU, RID_TMP, RID_TMP, type);
+ emit_tsml(as, MIPSI_DEXT, RID_TMP, src, 31, 0);
+ } else {
+ emit_dst(as, MIPSI_DADDU, RID_TMP, src, type);
}
- type = ra_allock(as, irt_toitype(ir->t), allow);
- emit_setgl(as, type, tmptv.it);
}
}
+#endif
-static void asm_tostr(ASMState *as, IRIns *ir)
+/* Get pointer to TValue. */
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
{
- IRRef args[2];
- args[0] = ASMREF_L;
- as->gcsteps++;
- if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
- args[1] = ASMREF_TMP1; /* const lua_Number * */
- asm_setupresult(as, ir, ci); /* GCstr * */
- asm_gencall(as, ci, args);
- asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
+ int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768);
+ if ((mode & IRTMPREF_IN1)) {
+ IRIns *ir = IR(ref);
+ if (irt_isnum(ir->t)) {
+ if ((mode & IRTMPREF_OUT1)) {
+#if LJ_SOFTFP
+ emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
+#if LJ_64
+ emit_setgl(as, ra_alloc1(as, ref, RSET_GPR), tmptv.u64);
+#else
+ lj_assertA(irref_isk(ref), "unsplit FP op");
+ emit_setgl(as,
+ ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
+ tmptv.u32.lo);
+ emit_setgl(as,
+ ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
+ tmptv.u32.hi);
+#endif
+#else
+ Reg src = ra_alloc1(as, ref, RSET_FPR);
+ emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
+ emit_tsi(as, MIPSI_SDC1, (src & 31), RID_JGL, tmpofs);
+#endif
+ } else if (irref_isk(ref)) {
+ /* Use the number constant itself as a TValue. */
+ ra_allockreg(as, igcptr(ir_knum(ir)), dest);
+ } else {
+#if LJ_SOFTFP32
+ lj_assertA(0, "unsplit FP op");
+#else
+ /* Otherwise force a spill and use the spill slot. */
+ emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir));
+#endif
+ }
+ } else {
+ /* Otherwise use g->tmptv to hold the TValue. */
+#if LJ_32
+ Reg type;
+ emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, tmpofs);
+ if (!irt_ispri(ir->t)) {
+ Reg src = ra_alloc1(as, ref, RSET_GPR);
+ emit_setgl(as, src, tmptv.gcr);
+ }
+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
+ type = ra_alloc1(as, ref+1, RSET_GPR);
+ else
+ type = ra_allock(as, (int32_t)irt_toitype(ir->t), RSET_GPR);
+ emit_setgl(as, type, tmptv.it);
+#else
+ asm_tvstore64(as, dest, 0, ref);
+ emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL, tmpofs);
+#endif
+ }
} else {
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
- args[1] = ir->op1; /* int32_t k */
- asm_setupresult(as, ir, ci); /* GCstr * */
- asm_gencall(as, ci, args);
+ emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs);
}
}
-/* -- Memory references --------------------------------------------------- */
-
static void asm_aref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -608,14 +928,18 @@ static void asm_aref(ASMState *as, IRIns *ir)
ofs += 8*IR(ir->op2)->i;
if (checki16(ofs)) {
base = ra_alloc1(as, refa, RSET_GPR);
- emit_tsi(as, MIPSI_ADDIU, dest, base, ofs);
+ emit_tsi(as, MIPSI_AADDIU, dest, base, ofs);
return;
}
}
base = ra_alloc1(as, ir->op1, RSET_GPR);
idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
- emit_dst(as, MIPSI_ADDU, dest, RID_TMP, base);
+#if !LJ_TARGET_MIPSR6
+ emit_dst(as, MIPSI_AADDU, dest, RID_TMP, base);
emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3);
+#else
+ emit_dst(as, MIPSI_ALSA | MIPSF_A(3-1), dest, idx, base);
+#endif
}
/* Inlined hash lookup. Specialized for key type and for const keys.
@@ -626,21 +950,25 @@ static void asm_aref(ASMState *as, IRIns *ir)
** } while ((n = nextnode(n)));
** return niltv(L);
*/
-static void asm_href(ASMState *as, IRIns *ir)
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
{
RegSet allow = RSET_GPR;
int destused = ra_used(ir);
Reg dest = ra_dest(as, ir, allow);
Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2;
+#if LJ_64
+ Reg cmp64 = RID_NONE;
+#endif
IRRef refkey = ir->op2;
IRIns *irkey = IR(refkey);
+ int isk = irref_isk(refkey);
IRType1 kt = irkey->t;
uint32_t khash;
MCLabel l_end, l_loop, l_next;
rset_clear(allow, tab);
- if (irt_isnum(kt)) {
+ if (!LJ_SOFTFP && irt_isnum(kt)) {
key = ra_alloc1(as, refkey, RSET_FPR);
tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
} else {
@@ -648,31 +976,76 @@ static void asm_href(ASMState *as, IRIns *ir)
key = ra_alloc1(as, refkey, allow);
rset_clear(allow, key);
}
- type = ra_allock(as, irt_toitype(irkey->t), allow);
- rset_clear(allow, type);
+#if LJ_32
+ if (LJ_SOFTFP && irkey[1].o == IR_HIOP) {
+ if (ra_hasreg((irkey+1)->r)) {
+ type = tmpnum = (irkey+1)->r;
+ tmp1 = ra_scratch(as, allow);
+ rset_clear(allow, tmp1);
+ ra_noweak(as, tmpnum);
+ } else {
+ type = tmpnum = ra_allocref(as, refkey+1, allow);
+ }
+ rset_clear(allow, tmpnum);
+ } else {
+ type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
+ rset_clear(allow, type);
+ }
+#endif
}
tmp2 = ra_scratch(as, allow);
rset_clear(allow, tmp2);
+#if LJ_64
+ if (LJ_SOFTFP || !irt_isnum(kt)) {
+ /* Allocate cmp64 register used for 64-bit comparisons */
+ if (LJ_SOFTFP && irt_isnum(kt)) {
+ cmp64 = key;
+ } else if (!isk && irt_isaddr(kt)) {
+ cmp64 = tmp2;
+ } else {
+ int64_t k;
+ if (isk && irt_isaddr(kt)) {
+ k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64;
+ } else {
+ lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
+ k = ~((int64_t)~irt_toitype(kt) << 47);
+ }
+ cmp64 = ra_allock(as, k, allow);
+ rset_clear(allow, cmp64);
+ }
+ }
+#endif
- /* Key not found in chain: load niltv. */
+ /* Key not found in chain: jump to exit (if merged) or load niltv. */
l_end = emit_label(as);
- if (destused)
+ as->invmcp = NULL;
+ if (merge == IR_NE)
+ asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO);
+ else if (destused)
emit_loada(as, dest, niltvg(J2G(as->J)));
- else
- *--as->mcp = MIPSI_NOP;
/* Follow hash chain until the end. */
emit_move(as, dest, tmp1);
l_loop = --as->mcp;
- emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, next));
+ emit_tsi(as, MIPSI_AL, tmp1, dest, (int32_t)offsetof(Node, next));
l_next = emit_label(as);
/* Type and value comparison. */
- if (irt_isnum(kt)) {
+ if (merge == IR_EQ) { /* Must match asm_guard(). */
+ emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
+ l_end = asm_exitstub_addr(as);
+ }
+ if (!LJ_SOFTFP && irt_isnum(kt)) {
+#if !LJ_TARGET_MIPSR6
emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
- emit_tg(as, MIPSI_MFC1, tmp1, key+1);
+#else
+ emit_branch(as, MIPSI_BC1NEZ, 0, (tmpnum&31), l_end);
+ emit_fgh(as, MIPSI_CMP_EQ_D, tmpnum, tmpnum, key);
+#endif
+ *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */
emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next);
emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM);
+#if LJ_32
emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n));
} else {
if (irt_ispri(kt)) {
@@ -685,36 +1058,52 @@ static void asm_href(ASMState *as, IRIns *ir)
}
emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it));
*l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
+#else
+ emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15);
+ emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum);
+ emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
+ } else {
+ emit_branch(as, MIPSI_BEQ, tmp1, cmp64, l_end);
+ emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64));
+ }
+ *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
+ if (!isk && irt_isaddr(kt)) {
+ type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow);
+ emit_dst(as, MIPSI_DADDU, tmp2, key, type);
+ rset_clear(allow, type);
+ }
+#endif
/* Load main position relative to tab->node into dest. */
- khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
+ khash = isk ? ir_khash(as, irkey) : 1;
if (khash == 0) {
- emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node));
+ emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node));
} else {
Reg tmphash = tmp1;
- if (irref_isk(refkey))
+ if (isk)
tmphash = ra_allock(as, khash, allow);
- emit_dst(as, MIPSI_ADDU, dest, dest, tmp1);
- lua_assert(sizeof(Node) == 24);
+ emit_dst(as, MIPSI_AADDU, dest, dest, tmp1);
+ lj_assertA(sizeof(Node) == 24, "bad Node size");
emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1);
emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3);
emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5);
emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash);
- emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node));
+ emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node));
emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
- if (irref_isk(refkey)) {
+ if (isk) {
/* Nothing to do. */
} else if (irt_isstr(kt)) {
- emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash));
+ emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid));
} else { /* Must match with hash*() in lj_tab.c. */
emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2);
emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31);
emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2);
emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31);
emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest);
- if (irt_isnum(kt)) {
+#if LJ_32
+ if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1);
- if ((as->flags & JIT_F_MIPS32R2)) {
+ if ((as->flags & JIT_F_MIPSXXR2)) {
emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31);
} else {
emit_dst(as, MIPSI_OR, dest, dest, tmp1);
@@ -722,13 +1111,35 @@ static void asm_href(ASMState *as, IRIns *ir)
emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31);
}
emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1);
+#if LJ_SOFTFP
+ emit_ds(as, MIPSI_MOVE, tmp1, type);
+ emit_ds(as, MIPSI_MOVE, tmp2, key);
+#else
emit_tg(as, MIPSI_MFC1, tmp2, key);
emit_tg(as, MIPSI_MFC1, tmp1, key+1);
+#endif
} else {
emit_dst(as, MIPSI_XOR, tmp2, key, tmp1);
emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31);
emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow));
}
+#else
+ emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1);
+ emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31);
+ if (irt_isnum(kt)) {
+ emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1);
+ emit_dta(as, MIPSI_DSRA32, tmp1, LJ_SOFTFP ? key : tmp1, 0);
+ emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0);
+#if !LJ_SOFTFP
+ emit_tg(as, MIPSI_DMFC1, tmp1, key);
+#endif
+ } else {
+ checkmclim(as);
+ emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0);
+ emit_dta(as, MIPSI_SLL, tmp2, key, 0);
+ emit_dst(as, MIPSI_DADDU, tmp1, key, type);
+ }
+#endif
}
}
}
@@ -741,17 +1152,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
int32_t kofs = ofs + (int32_t)offsetof(Node, key);
Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
- Reg key = RID_NONE, type = RID_TMP, idx = node;
RegSet allow = rset_exclude(RSET_GPR, node);
+ Reg idx = node;
+#if LJ_32
+ Reg key = RID_NONE, type = RID_TMP;
int32_t lo, hi;
- lua_assert(ofs % sizeof(Node) == 0);
+#else
+ Reg key = ra_scratch(as, allow);
+ int64_t k;
+#endif
+ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
if (ofs > 32736) {
idx = dest;
rset_clear(allow, dest);
kofs = (int32_t)offsetof(Node, key);
} else if (ra_hasreg(dest)) {
- emit_tsi(as, MIPSI_ADDIU, dest, node, ofs);
+ emit_tsi(as, MIPSI_AADDIU, dest, node, ofs);
}
+#if LJ_32
if (!irt_ispri(irkey->t)) {
key = ra_scratch(as, allow);
rset_clear(allow, key);
@@ -770,22 +1188,20 @@ nolo:
asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO);
if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0));
emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4));
- if (ofs > 32736)
- emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow));
-}
-
-static void asm_newref(ASMState *as, IRIns *ir)
-{
- if (ir->r != RID_SINK) {
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
- IRRef args[3];
- args[0] = ASMREF_L; /* lua_State *L */
- args[1] = ir->op1; /* GCtab *t */
- args[2] = ASMREF_TMP1; /* cTValue *key */
- asm_setupresult(as, ir, ci); /* TValue * */
- asm_gencall(as, ci, args);
- asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
+#else
+ if (irt_ispri(irkey->t)) {
+ lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type");
+ k = ~((int64_t)~irt_toitype(irkey->t) << 47);
+ } else if (irt_isnum(irkey->t)) {
+ k = (int64_t)ir_knum(irkey)->u64;
+ } else {
+ k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey);
}
+ asm_guard(as, MIPSI_BNE, key, ra_allock(as, k, allow));
+ emit_tsi(as, MIPSI_LD, key, idx, kofs);
+#endif
+ if (ofs > 32736)
+ emit_tsi(as, MIPSI_AADDU, dest, node, ra_allock(as, ofs, allow));
}
static void asm_uref(ASMState *as, IRIns *ir)
@@ -794,30 +1210,31 @@ static void asm_uref(ASMState *as, IRIns *ir)
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
- emit_lsptr(as, MIPSI_LW, dest, v, RSET_GPR);
+ emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
- emit_tsi(as, MIPSI_ADDIU, dest, uv, (int32_t)offsetof(GCupval, tv));
+ emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv));
emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
} else {
- emit_tsi(as, MIPSI_LW, dest, uv, (int32_t)offsetof(GCupval, v));
+ emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v));
}
- emit_tsi(as, MIPSI_LW, uv, func,
- (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
+ emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) +
+ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
}
static void asm_fref(ASMState *as, IRIns *ir)
{
UNUSED(as); UNUSED(ir);
- lua_assert(!ra_used(ir));
+ lj_assertA(!ra_used(ir), "unfused FREF");
}
static void asm_strref(ASMState *as, IRIns *ir)
{
+#if LJ_32
Reg dest = ra_dest(as, ir, RSET_GPR);
IRRef ref = ir->op2, refk = ir->op1;
int32_t ofs = (int32_t)sizeof(GCstr);
@@ -849,49 +1266,79 @@ static void asm_strref(ASMState *as, IRIns *ir)
else
emit_dst(as, MIPSI_ADDU, dest, r,
ra_allock(as, ofs, rset_exclude(RSET_GPR, r)));
+#else
+ RegSet allow = RSET_GPR;
+ Reg dest = ra_dest(as, ir, allow);
+ Reg base = ra_alloc1(as, ir->op1, allow);
+ IRIns *irr = IR(ir->op2);
+ int32_t ofs = sizeof(GCstr);
+ rset_clear(allow, base);
+ if (irref_isk(ir->op2) && checki16(ofs + irr->i)) {
+ emit_tsi(as, MIPSI_DADDIU, dest, base, ofs + irr->i);
+ } else {
+ emit_tsi(as, MIPSI_DADDIU, dest, dest, ofs);
+ emit_dst(as, MIPSI_DADDU, dest, base, ra_alloc1(as, ir->op2, allow));
+ }
+#endif
}
/* -- Loads and stores ---------------------------------------------------- */
-static MIPSIns asm_fxloadins(IRIns *ir)
+static MIPSIns asm_fxloadins(ASMState *as, IRIns *ir)
{
+ UNUSED(as);
switch (irt_type(ir->t)) {
case IRT_I8: return MIPSI_LB;
case IRT_U8: return MIPSI_LBU;
case IRT_I16: return MIPSI_LH;
case IRT_U16: return MIPSI_LHU;
- case IRT_NUM: return MIPSI_LDC1;
- case IRT_FLOAT: return MIPSI_LWC1;
- default: return MIPSI_LW;
+ case IRT_NUM:
+ lj_assertA(!LJ_SOFTFP32, "unsplit FP op");
+ if (!LJ_SOFTFP) return MIPSI_LDC1;
+ /* fallthrough */
+ case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1;
+ /* fallthrough */
+ default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW;
}
}
-static MIPSIns asm_fxstoreins(IRIns *ir)
+static MIPSIns asm_fxstoreins(ASMState *as, IRIns *ir)
{
+ UNUSED(as);
switch (irt_type(ir->t)) {
case IRT_I8: case IRT_U8: return MIPSI_SB;
case IRT_I16: case IRT_U16: return MIPSI_SH;
- case IRT_NUM: return MIPSI_SDC1;
- case IRT_FLOAT: return MIPSI_SWC1;
- default: return MIPSI_SW;
+ case IRT_NUM:
+ lj_assertA(!LJ_SOFTFP32, "unsplit FP op");
+ if (!LJ_SOFTFP) return MIPSI_SDC1;
+ /* fallthrough */
+ case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1;
+ /* fallthrough */
+ default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW;
}
}
static void asm_fload(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
- Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
- MIPSIns mi = asm_fxloadins(ir);
+ MIPSIns mi = asm_fxloadins(as, ir);
+ Reg idx;
int32_t ofs;
- if (ir->op2 == IRFL_TAB_ARRAY) {
- ofs = asm_fuseabase(as, ir->op1);
- if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
- emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs);
- return;
+ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
+ idx = RID_JGL;
+ ofs = (ir->op2 << 2) - 32768 - GG_OFS(g);
+ } else {
+ idx = ra_alloc1(as, ir->op1, RSET_GPR);
+ if (ir->op2 == IRFL_TAB_ARRAY) {
+ ofs = asm_fuseabase(as, ir->op1);
+ if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
+ emit_tsi(as, MIPSI_AADDIU, dest, idx, ofs);
+ return;
+ }
}
+ ofs = field_ofs[ir->op2];
}
- ofs = field_ofs[ir->op2];
- lua_assert(!irt_isfp(ir->t));
+ lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD");
emit_tsi(as, mi, dest, idx, ofs);
}
@@ -902,51 +1349,90 @@ static void asm_fstore(ASMState *as, IRIns *ir)
IRIns *irf = IR(ir->op1);
Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
int32_t ofs = field_ofs[irf->op2];
- MIPSIns mi = asm_fxstoreins(ir);
- lua_assert(!irt_isfp(ir->t));
+ MIPSIns mi = asm_fxstoreins(as, ir);
+ lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE");
emit_tsi(as, mi, src, idx, ofs);
}
}
static void asm_xload(ASMState *as, IRIns *ir)
{
- Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
- lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
+ Reg dest = ra_dest(as, ir,
+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
+ lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED),
+ "unaligned XLOAD");
+ asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
}
-static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
{
if (ir->r != RID_SINK) {
- Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
- asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
+ Reg src = ra_alloc1z(as, ir->op2,
+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
+ asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1,
rset_exclude(RSET_GPR, src), ofs);
}
}
+#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
+
static void asm_ahuvload(ASMState *as, IRIns *ir)
{
- IRType1 t = ir->t;
+ int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP);
Reg dest = RID_NONE, type = RID_TMP, idx;
RegSet allow = RSET_GPR;
int32_t ofs = 0;
+ IRType1 t = ir->t;
+ if (hiop) {
+ t.irt = IRT_NUM;
+ if (ra_used(ir+1)) {
+ type = ra_dest(as, ir+1, allow);
+ rset_clear(allow, type);
+ }
+ }
if (ra_used(ir)) {
- lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
- dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
+ lj_assertA((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) ||
+ irt_isint(ir->t) || irt_isaddr(ir->t),
+ "bad load type %d", irt_type(ir->t));
+ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
rset_clear(allow, dest);
+#if LJ_64
+ if (irt_isaddr(t))
+ emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0);
+ else if (irt_isint(t))
+ emit_dta(as, MIPSI_SLL, dest, dest, 0);
+#endif
}
idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+ if (ir->o == IR_VLOAD) ofs += 8 * ir->op2;
rset_clear(allow, idx);
if (irt_isnum(t)) {
- asm_guard(as, MIPSI_BEQ, type, RID_ZERO);
- emit_tsi(as, MIPSI_SLTIU, type, type, (int32_t)LJ_TISNUM);
- if (ra_hasreg(dest))
- emit_hsi(as, MIPSI_LDC1, dest, idx, ofs);
+ asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
+ emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM);
} else {
- asm_guard(as, MIPSI_BNE, type, ra_allock(as, irt_toitype(t), allow));
- if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0));
+ asm_guard(as, MIPSI_BNE, type,
+ ra_allock(as, (int32_t)irt_toitype(t), allow));
+ }
+#if LJ_32
+ if (ra_hasreg(dest)) {
+ if (!LJ_SOFTFP && irt_isnum(t))
+ emit_hsi(as, MIPSI_LDC1, dest, idx, ofs);
+ else
+ emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0));
}
emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4));
+#else
+ if (ra_hasreg(dest)) {
+ if (!LJ_SOFTFP && irt_isnum(t)) {
+ emit_hsi(as, MIPSI_LDC1, dest, idx, ofs);
+ dest = type;
+ }
+ } else {
+ dest = type;
+ }
+ emit_dta(as, MIPSI_DSRA32, type, dest, 15);
+ emit_tsi(as, MIPSI_LD, dest, idx, ofs);
+#endif
}
static void asm_ahustore(ASMState *as, IRIns *ir)
@@ -956,81 +1442,184 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
int32_t ofs = 0;
if (ir->r == RID_SINK)
return;
- if (irt_isnum(ir->t)) {
- src = ra_alloc1(as, ir->op2, RSET_FPR);
+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
+ src = ra_alloc1(as, ir->op2, LJ_SOFTFP ? RSET_GPR : RSET_FPR);
+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+ emit_hsi(as, LJ_SOFTFP ? MIPSI_SD : MIPSI_SDC1, src, idx, ofs);
} else {
+#if LJ_32
if (!irt_ispri(ir->t)) {
src = ra_alloc1(as, ir->op2, allow);
rset_clear(allow, src);
}
- type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
+ type = ra_alloc1(as, (ir+1)->op2, allow);
+ else
+ type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
rset_clear(allow, type);
- }
- idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
- if (irt_isnum(ir->t)) {
- emit_hsi(as, MIPSI_SDC1, src, idx, ofs);
- } else {
+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
if (ra_hasreg(src))
emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0));
emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4));
+#else
+ Reg tmp = RID_TMP;
+ if (irt_ispri(ir->t)) {
+ tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
+ rset_clear(allow, tmp);
+ } else {
+ src = ra_alloc1(as, ir->op2, allow);
+ rset_clear(allow, src);
+ type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
+ rset_clear(allow, type);
+ }
+ idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+ emit_tsi(as, MIPSI_SD, tmp, idx, ofs);
+ if (ra_hasreg(src)) {
+ if (irt_isinteger(ir->t)) {
+ emit_dst(as, MIPSI_DADDU, tmp, tmp, type);
+ emit_tsml(as, MIPSI_DEXT, tmp, src, 31, 0);
+ } else {
+ emit_dst(as, MIPSI_DADDU, tmp, src, type);
+ }
+ }
+#endif
}
}
static void asm_sload(ASMState *as, IRIns *ir)
{
- int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
- IRType1 t = ir->t;
Reg dest = RID_NONE, type = RID_NONE, base;
RegSet allow = RSET_GPR;
- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
- lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
- lua_assert(!irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
+ IRType1 t = ir->t;
+#if LJ_32
+ int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
+ int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP);
+ if (hiop)
+ t.irt = IRT_NUM;
+#else
+ int32_t ofs = 8*((int32_t)ir->op1-2);
+#endif
+ lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
+ "bad parent SLOAD"); /* Handled by asm_head_side(). */
+ lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
+ "inconsistent SLOAD variant");
+#if LJ_SOFTFP32
+ lj_assertA(!(ir->op2 & IRSLOAD_CONVERT),
+ "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */
+ if (hiop && ra_used(ir+1)) {
+ type = ra_dest(as, ir+1, allow);
+ rset_clear(allow, type);
+ }
+#else
if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
- dest = ra_scratch(as, RSET_FPR);
+ dest = ra_scratch(as, LJ_SOFTFP ? allow : RSET_FPR);
asm_tointg(as, ir, dest);
t.irt = IRT_NUM; /* Continue with a regular number type check. */
- } else if (ra_used(ir)) {
- lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
- dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
+ } else
+#endif
+ if (ra_used(ir)) {
+ lj_assertA((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) ||
+ irt_isint(ir->t) || irt_isaddr(ir->t),
+ "bad SLOAD type %d", irt_type(ir->t));
+ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
rset_clear(allow, dest);
base = ra_alloc1(as, REF_BASE, allow);
rset_clear(allow, base);
- if ((ir->op2 & IRSLOAD_CONVERT)) {
+ if (!LJ_SOFTFP32 && (ir->op2 & IRSLOAD_CONVERT)) {
if (irt_isint(t)) {
- Reg tmp = ra_scratch(as, RSET_FPR);
+ Reg tmp = ra_scratch(as, LJ_SOFTFP ? RSET_GPR : RSET_FPR);
+#if LJ_SOFTFP
+ ra_evictset(as, rset_exclude(RSET_SCRATCH, dest));
+ ra_destreg(as, ir, RID_RET);
+ emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_d2i].func, 0);
+ if (tmp != REGARG_FIRSTGPR)
+ emit_move(as, REGARG_FIRSTGPR, tmp);
+#else
emit_tg(as, MIPSI_MFC1, dest, tmp);
- emit_fg(as, MIPSI_CVT_W_D, tmp, tmp);
+ emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp);
+#endif
dest = tmp;
t.irt = IRT_NUM; /* Check for original type. */
} else {
Reg tmp = ra_scratch(as, RSET_GPR);
+#if LJ_SOFTFP
+ ra_evictset(as, rset_exclude(RSET_SCRATCH, dest));
+ ra_destreg(as, ir, RID_RET);
+ emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_i2d].func, 0);
+ emit_dta(as, MIPSI_SLL, REGARG_FIRSTGPR, tmp, 0);
+#else
emit_fg(as, MIPSI_CVT_D_W, dest, dest);
emit_tg(as, MIPSI_MTC1, tmp, dest);
+#endif
dest = tmp;
t.irt = IRT_INT; /* Check for original type. */
}
}
+#if LJ_64
+ else if (irt_isaddr(t)) {
+ /* Clear type from pointers. */
+ emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0);
+ } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) {
+ /* Sign-extend integers. */
+ emit_dta(as, MIPSI_SLL, dest, dest, 0);
+ }
+#endif
goto dotypecheck;
}
base = ra_alloc1(as, REF_BASE, allow);
rset_clear(allow, base);
dotypecheck:
- if (irt_isnum(t)) {
- if ((ir->op2 & IRSLOAD_TYPECHECK)) {
- asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
- emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM);
+#if LJ_32
+ if ((ir->op2 & IRSLOAD_TYPECHECK)) {
+ if (ra_noreg(type))
type = RID_TMP;
+ if (irt_isnum(t)) {
+ asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
+ emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM);
+ } else {
+ Reg ktype = ra_allock(as, (ir->op2 & IRSLOAD_KEYINDEX) ? LJ_KEYINDEX : irt_toitype(t), allow);
+ asm_guard(as, MIPSI_BNE, type, ktype);
}
- if (ra_hasreg(dest)) emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
- } else {
- if ((ir->op2 & IRSLOAD_TYPECHECK)) {
- Reg ktype = ra_allock(as, irt_toitype(t), allow);
- asm_guard(as, MIPSI_BNE, RID_TMP, ktype);
- type = RID_TMP;
+ }
+ if (ra_hasreg(dest)) {
+ if (!LJ_SOFTFP && irt_isnum(t))
+ emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
+ else
+ emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0));
+ }
+ if (ra_hasreg(type))
+ emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4));
+#else
+ if ((ir->op2 & IRSLOAD_TYPECHECK)) {
+ type = dest < RID_MAX_GPR ? dest : RID_TMP;
+ if (irt_ispri(t)) {
+ asm_guard(as, MIPSI_BNE, type,
+ ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow));
+ } else if ((ir->op2 & IRSLOAD_KEYINDEX)) {
+ asm_guard(as, MIPSI_BNE, RID_TMP,
+ ra_allock(as, (int32_t)LJ_KEYINDEX, allow));
+ emit_dta(as, MIPSI_DSRA32, RID_TMP, type, 0);
+ } else {
+ if (irt_isnum(t)) {
+ asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
+ emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM);
+ if (!LJ_SOFTFP && ra_hasreg(dest))
+ emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
+ } else {
+ asm_guard(as, MIPSI_BNE, RID_TMP,
+ ra_allock(as, (int32_t)irt_toitype(t), allow));
+ }
+ emit_dta(as, MIPSI_DSRA32, RID_TMP, type, 15);
}
- if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0));
+ emit_tsi(as, MIPSI_LD, type, base, ofs);
+ } else if (ra_hasreg(dest)) {
+ if (!LJ_SOFTFP && irt_isnum(t))
+ emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
+ else
+ emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base,
+ ofs ^ ((LJ_BE && irt_isint(t)) ? 4 : 0));
}
- if (ra_hasreg(type)) emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4));
+#endif
}
/* -- Allocations --------------------------------------------------------- */
@@ -1039,19 +1628,16 @@ dotypecheck:
static void asm_cnew(ASMState *as, IRIns *ir)
{
CTState *cts = ctype_ctsG(J2G(as->J));
- CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
- CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
- lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+ CTypeID id = (CTypeID)IR(ir->op1)->i;
+ CTSize sz;
+ CTInfo info = lj_ctype_info(cts, id, &sz);
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
- IRRef args[2];
- RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+ IRRef args[4];
RegSet drop = RSET_SCRATCH;
- lua_assert(sz != CTSIZE_INVALID);
+ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
+ "bad CNEW/CNEWI operands");
- args[0] = ASMREF_L; /* lua_State *L */
- args[1] = ASMREF_TMP1; /* MSize size */
as->gcsteps++;
-
if (ra_hasreg(ir->r))
rset_clear(drop, ir->r); /* Dest reg handled below. */
ra_evictset(as, drop);
@@ -1060,11 +1646,12 @@ static void asm_cnew(ASMState *as, IRIns *ir)
/* Initialize immutable cdata object. */
if (ir->o == IR_CNEWI) {
+ RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+#if LJ_32
int32_t ofs = sizeof(GCcdata);
- lua_assert(sz == 4 || sz == 8);
if (sz == 8) {
ofs += 4;
- lua_assert((ir+1)->o == IR_HIOP);
+ lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI");
if (LJ_LE) ir++;
}
for (;;) {
@@ -1074,18 +1661,33 @@ static void asm_cnew(ASMState *as, IRIns *ir)
if (ofs == sizeof(GCcdata)) break;
ofs -= 4; if (LJ_BE) ir++; else ir--;
}
+#else
+ emit_tsi(as, sz == 8 ? MIPSI_SD : MIPSI_SW, ra_alloc1(as, ir->op2, allow),
+ RID_RET, sizeof(GCcdata));
+#endif
+ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
+ } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
+ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ir->op1; /* CTypeID id */
+ args[2] = ir->op2; /* CTSize sz */
+ args[3] = ASMREF_TMP1; /* CTSize align */
+ asm_gencall(as, ci, args);
+ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+ return;
}
+
/* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA);
- emit_ti(as, MIPSI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */
+ emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ASMREF_TMP1; /* MSize size */
asm_gencall(as, ci, args);
ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
ra_releasetmp(as, ASMREF_TMP1));
}
-#else
-#define asm_cnew(as, ir) ((void)0)
#endif
/* -- Write barriers ------------------------------------------------------ */
@@ -1096,7 +1698,7 @@ static void asm_tbar(ASMState *as, IRIns *ir)
Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab));
Reg link = RID_TMP;
MCLabel l_end = emit_label(as);
- emit_tsi(as, MIPSI_SW, link, tab, (int32_t)offsetof(GCtab, gclist));
+ emit_tsi(as, MIPSI_AS, link, tab, (int32_t)offsetof(GCtab, gclist));
emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked));
emit_setgl(as, tab, gc.grayagain);
emit_getgl(as, link, gc.grayagain);
@@ -1113,13 +1715,13 @@ static void asm_obar(ASMState *as, IRIns *ir)
MCLabel l_end;
Reg obj, val, tmp;
/* No need for other object barriers (yet). */
- lua_assert(IR(ir->op1)->o == IR_UREFC);
+ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
ra_evictset(as, RSET_SCRATCH);
l_end = emit_label(as);
args[0] = ASMREF_TMP1; /* global_State *g */
args[1] = ir->op1; /* TValue *tv */
asm_gencall(as, ci, args);
- emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
+ emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
obj = IR(ir->op1)->r;
tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end);
@@ -1134,6 +1736,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
/* -- Arithmetic and logic operations ------------------------------------- */
+#if !LJ_SOFTFP
static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi)
{
Reg dest = ra_dest(as, ir, RSET_FPR);
@@ -1148,83 +1751,147 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
emit_fg(as, mi, dest, left);
}
+#endif
-static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
-{
- IRIns *irp = IR(ir->op1);
- if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
- IRIns *irpp = IR(irp->op1);
- if (irpp == ir-2 && irpp->o == IR_FPMATH &&
- irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
- IRRef args[2];
- args[0] = irpp->op1;
- args[1] = irp->op2;
- asm_setupresult(as, ir, ci);
- asm_gencall(as, ci, args);
- return 1;
- }
- }
- return 0;
+#if !LJ_SOFTFP32
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+#if !LJ_SOFTFP
+ if (ir->op2 <= IRFPM_TRUNC)
+ asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
+ else if (ir->op2 == IRFPM_SQRT)
+ asm_fpunary(as, ir, MIPSI_SQRT_D);
+ else
+#endif
+ asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
}
+#endif
+
+#if !LJ_SOFTFP
+#define asm_fpadd(as, ir) asm_fparith(as, ir, MIPSI_ADD_D)
+#define asm_fpsub(as, ir) asm_fparith(as, ir, MIPSI_SUB_D)
+#define asm_fpmul(as, ir) asm_fparith(as, ir, MIPSI_MUL_D)
+#elif LJ_64 /* && LJ_SOFTFP */
+#define asm_fpadd(as, ir) asm_callid(as, ir, IRCALL_softfp_add)
+#define asm_fpsub(as, ir) asm_callid(as, ir, IRCALL_softfp_sub)
+#define asm_fpmul(as, ir) asm_callid(as, ir, IRCALL_softfp_mul)
+#endif
static void asm_add(ASMState *as, IRIns *ir)
{
- if (irt_isnum(ir->t)) {
- asm_fparith(as, ir, MIPSI_ADD_D);
- } else {
+ IRType1 t = ir->t;
+#if !LJ_SOFTFP32
+ if (irt_isnum(t)) {
+ asm_fpadd(as, ir);
+ } else
+#endif
+ {
+ /* TODO MIPSR6: Fuse ADD(BSHL(a,1-4),b) or ADD(ADD(a,a),b) to MIPSI_ALSA. */
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
if (irref_isk(ir->op2)) {
- int32_t k = IR(ir->op2)->i;
+ intptr_t k = get_kval(as, ir->op2);
if (checki16(k)) {
- emit_tsi(as, MIPSI_ADDIU, dest, left, k);
+ emit_tsi(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDIU : MIPSI_ADDIU, dest,
+ left, k);
return;
}
}
right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
- emit_dst(as, MIPSI_ADDU, dest, left, right);
+ emit_dst(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDU : MIPSI_ADDU, dest,
+ left, right);
}
}
static void asm_sub(ASMState *as, IRIns *ir)
{
+#if !LJ_SOFTFP32
if (irt_isnum(ir->t)) {
- asm_fparith(as, ir, MIPSI_SUB_D);
- } else {
+ asm_fpsub(as, ir);
+ } else
+#endif
+ {
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg right, left = ra_alloc2(as, ir, RSET_GPR);
right = (left >> 8); left &= 255;
- emit_dst(as, MIPSI_SUBU, dest, left, right);
+ emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest,
+ left, right);
}
}
static void asm_mul(ASMState *as, IRIns *ir)
{
+#if !LJ_SOFTFP32
if (irt_isnum(ir->t)) {
- asm_fparith(as, ir, MIPSI_MUL_D);
- } else {
+ asm_fpmul(as, ir);
+ } else
+#endif
+ {
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg right, left = ra_alloc2(as, ir, RSET_GPR);
right = (left >> 8); left &= 255;
- emit_dst(as, MIPSI_MUL, dest, left, right);
+ if (LJ_64 && irt_is64(ir->t)) {
+#if !LJ_TARGET_MIPSR6
+ emit_dst(as, MIPSI_MFLO, dest, 0, 0);
+ emit_dst(as, MIPSI_DMULT, 0, left, right);
+#else
+ emit_dst(as, MIPSI_DMUL, dest, left, right);
+#endif
+ } else {
+ emit_dst(as, MIPSI_MUL, dest, left, right);
+ }
}
}
+#if !LJ_SOFTFP32
+static void asm_fpdiv(ASMState *as, IRIns *ir)
+{
+#if !LJ_SOFTFP
+ asm_fparith(as, ir, MIPSI_DIV_D);
+#else
+ asm_callid(as, ir, IRCALL_softfp_div);
+#endif
+}
+#endif
+
static void asm_neg(ASMState *as, IRIns *ir)
{
+#if !LJ_SOFTFP
if (irt_isnum(ir->t)) {
asm_fpunary(as, ir, MIPSI_NEG_D);
- } else {
+ } else
+#elif LJ_64 /* && LJ_SOFTFP */
+ if (irt_isnum(ir->t)) {
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+ emit_dst(as, MIPSI_XOR, dest, left,
+ ra_allock(as, 0x8000000000000000ll, rset_exclude(RSET_GPR, dest)));
+ } else
+#endif
+ {
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
- emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left);
+ emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest,
+ RID_ZERO, left);
}
}
+#if !LJ_SOFTFP
+#define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D)
+#elif LJ_64 /* && LJ_SOFTFP */
+static void asm_abs(ASMState *as, IRIns *ir)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
+ emit_tsml(as, MIPSI_DEXTM, dest, left, 30, 0);
+}
+#endif
+
static void asm_arithov(ASMState *as, IRIns *ir)
{
+ /* TODO MIPSR6: bovc/bnvc. Caveat: no delay slot to load RID_TMP. */
Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
+ lj_assertA(!irt_is64(ir->t), "bad usage");
if (irref_isk(ir->op2)) {
int k = IR(ir->op2)->i;
if (ir->o == IR_SUBOV) k = -k;
@@ -1255,16 +1922,29 @@ static void asm_arithov(ASMState *as, IRIns *ir)
emit_move(as, RID_TMP, dest == left ? left : right);
}
+#define asm_addov(as, ir) asm_arithov(as, ir)
+#define asm_subov(as, ir) asm_arithov(as, ir)
+
static void asm_mulov(ASMState *as, IRIns *ir)
{
-#if LJ_DUALNUM
-#error "NYI: MULOV"
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR);
+ right = (left >> 8); left &= 255;
+ tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
+ right), dest));
+ asm_guard(as, MIPSI_BNE, RID_TMP, tmp);
+ emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31);
+#if !LJ_TARGET_MIPSR6
+ emit_dst(as, MIPSI_MFHI, tmp, 0, 0);
+ emit_dst(as, MIPSI_MFLO, dest, 0, 0);
+ emit_dst(as, MIPSI_MULT, 0, left, right);
#else
- UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused in single-number mode. */
+ emit_dst(as, MIPSI_MUL, dest, left, right);
+ emit_dst(as, MIPSI_MUH, tmp, left, right);
#endif
}
-#if LJ_HASFFI
+#if LJ_32 && LJ_HASFFI
static void asm_add64(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1348,7 +2028,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
}
#endif
-static void asm_bitnot(ASMState *as, IRIns *ir)
+static void asm_bnot(ASMState *as, IRIns *ir)
{
Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
IRIns *irl = IR(ir->op1);
@@ -1362,11 +2042,12 @@ static void asm_bitnot(ASMState *as, IRIns *ir)
emit_dst(as, MIPSI_NOR, dest, left, right);
}
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_bswap(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
- if ((as->flags & JIT_F_MIPS32R2)) {
+#if LJ_32
+ if ((as->flags & JIT_F_MIPSXXR2)) {
emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16);
emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left);
} else {
@@ -1381,6 +2062,15 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
emit_dta(as, MIPSI_SRL, tmp, left, 24);
emit_dta(as, MIPSI_SLL, RID_TMP, left, 24);
}
+#else
+ if (irt_is64(ir->t)) {
+ emit_dst(as, MIPSI_DSHD, dest, 0, RID_TMP);
+ emit_dst(as, MIPSI_DSBH, RID_TMP, 0, left);
+ } else {
+ emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16);
+ emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left);
+ }
+#endif
}
static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
@@ -1388,7 +2078,7 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
if (irref_isk(ir->op2)) {
- int32_t k = IR(ir->op2)->i;
+ intptr_t k = get_kval(as, ir->op2);
if (checku16(k)) {
emit_tsi(as, mik, dest, left, k);
return;
@@ -1398,22 +2088,34 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
emit_dst(as, mi, dest, left, right);
}
+#define asm_band(as, ir) asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI)
+#define asm_bor(as, ir) asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI)
+#define asm_bxor(as, ir) asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI)
+
static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op2)) { /* Constant shifts. */
- uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31);
- emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), shift);
+ uint32_t shift = (uint32_t)IR(ir->op2)->i;
+ if (LJ_64 && irt_is64(ir->t)) mik |= (shift & 32) ? MIPSI_D32 : MIPSI_D;
+ emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR),
+ (shift & 31));
} else {
Reg right, left = ra_alloc2(as, ir, RSET_GPR);
right = (left >> 8); left &= 255;
+ if (LJ_64 && irt_is64(ir->t)) mi |= MIPSI_DV;
emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */
}
}
-static void asm_bitror(ASMState *as, IRIns *ir)
+#define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL)
+#define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL)
+#define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA)
+#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL")
+
+static void asm_bror(ASMState *as, IRIns *ir)
{
- if ((as->flags & JIT_F_MIPS32R2)) {
+ if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) {
asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
} else {
Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1432,55 +2134,182 @@ static void asm_bitror(ASMState *as, IRIns *ir)
}
}
+#if LJ_SOFTFP
+static void asm_sfpmin_max(ASMState *as, IRIns *ir)
+{
+ CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax];
+#if LJ_64
+ IRRef args[2];
+ args[0] = ir->op1;
+ args[1] = ir->op2;
+#else
+ IRRef args[4];
+ args[0^LJ_BE] = ir->op1;
+ args[1^LJ_BE] = (ir+1)->op1;
+ args[2^LJ_BE] = ir->op2;
+ args[3^LJ_BE] = (ir+1)->op2;
+#endif
+ asm_setupresult(as, ir, &ci);
+ emit_call(as, (void *)ci.func, 0);
+ ci.func = NULL;
+ asm_gencall(as, &ci, args);
+}
+#endif
+
static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
{
- if (irt_isnum(ir->t)) {
+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
+#if LJ_SOFTFP
+ asm_sfpmin_max(as, ir);
+#else
Reg dest = ra_dest(as, ir, RSET_FPR);
Reg right, left = ra_alloc2(as, ir, RSET_FPR);
right = (left >> 8); left &= 255;
+#if !LJ_TARGET_MIPSR6
if (dest == left) {
- emit_fg(as, MIPSI_MOVT_D, dest, right);
+ emit_fg(as, MIPSI_MOVF_D, dest, right);
} else {
- emit_fg(as, MIPSI_MOVF_D, dest, left);
+ emit_fg(as, MIPSI_MOVT_D, dest, left);
if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right);
}
- emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left);
+ emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? right : left, ismax ? left : right);
+#else
+ emit_fgh(as, ismax ? MIPSI_MAX_D : MIPSI_MIN_D, dest, left, right);
+#endif
+#endif
} else {
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg right, left = ra_alloc2(as, ir, RSET_GPR);
right = (left >> 8); left &= 255;
- if (dest == left) {
- emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP);
+ if (left == right) {
+ if (dest != left) emit_move(as, dest, left);
} else {
- emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP);
- if (dest != right) emit_move(as, dest, right);
+#if !LJ_TARGET_MIPSR6
+ if (dest == left) {
+ emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP);
+ } else {
+ emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP);
+ if (dest != right) emit_move(as, dest, right);
+ }
+#else
+ emit_dst(as, MIPSI_OR, dest, dest, RID_TMP);
+ if (dest != right) {
+ emit_dst(as, MIPSI_SELNEZ, RID_TMP, right, RID_TMP);
+ emit_dst(as, MIPSI_SELEQZ, dest, left, RID_TMP);
+ } else {
+ emit_dst(as, MIPSI_SELEQZ, RID_TMP, left, RID_TMP);
+ emit_dst(as, MIPSI_SELNEZ, dest, right, RID_TMP);
+ }
+#endif
+ emit_dst(as, MIPSI_SLT, RID_TMP,
+ ismax ? left : right, ismax ? right : left);
}
- emit_dst(as, MIPSI_SLT, RID_TMP,
- ismax ? left : right, ismax ? right : left);
}
}
+#define asm_min(as, ir) asm_min_max(as, ir, 0)
+#define asm_max(as, ir) asm_min_max(as, ir, 1)
+
/* -- Comparisons --------------------------------------------------------- */
+#if LJ_SOFTFP
+/* SFP comparisons. */
+static void asm_sfpcomp(ASMState *as, IRIns *ir)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
+ RegSet drop = RSET_SCRATCH;
+ Reg r;
+#if LJ_64
+ IRRef args[2];
+ args[0] = ir->op1;
+ args[1] = ir->op2;
+#else
+ IRRef args[4];
+ args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1;
+ args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2;
+#endif
+
+ for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+(LJ_64?1:3); r++) {
+ if (!rset_test(as->freeset, r) &&
+ regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
+ rset_clear(drop, r);
+ }
+ ra_evictset(as, drop);
+
+ asm_setupresult(as, ir, ci);
+
+ switch ((IROp)ir->o) {
+ case IR_LT:
+ asm_guard(as, MIPSI_BGEZ, RID_RET, 0);
+ break;
+ case IR_ULT:
+ asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
+ emit_loadi(as, RID_TMP, 1);
+ asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO);
+ break;
+ case IR_GE:
+ asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
+ emit_loadi(as, RID_TMP, 2);
+ asm_guard(as, MIPSI_BLTZ, RID_RET, 0);
+ break;
+ case IR_LE:
+ asm_guard(as, MIPSI_BGTZ, RID_RET, 0);
+ break;
+ case IR_GT:
+ asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
+ emit_loadi(as, RID_TMP, 2);
+ asm_guard(as, MIPSI_BLEZ, RID_RET, 0);
+ break;
+ case IR_UGE:
+ asm_guard(as, MIPSI_BLTZ, RID_RET, 0);
+ break;
+ case IR_ULE:
+ asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP);
+ emit_loadi(as, RID_TMP, 1);
+ break;
+ case IR_UGT: case IR_ABC:
+ asm_guard(as, MIPSI_BLEZ, RID_RET, 0);
+ break;
+ case IR_EQ: case IR_NE:
+ asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_RET, RID_ZERO);
+ default:
+ break;
+ }
+ asm_gencall(as, ci, args);
+}
+#endif
+
static void asm_comp(ASMState *as, IRIns *ir)
{
/* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */
IROp op = ir->o;
- if (irt_isnum(ir->t)) {
+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
+#if LJ_SOFTFP
+ asm_sfpcomp(as, ir);
+#else
+#if !LJ_TARGET_MIPSR6
Reg right, left = ra_alloc2(as, ir, RSET_FPR);
right = (left >> 8); left &= 255;
asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right);
+#else
+ Reg tmp, right, left = ra_alloc2(as, ir, RSET_FPR);
+ right = (left >> 8); left &= 255;
+ tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right));
+ asm_guard(as, (op&1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31));
+ emit_fgh(as, MIPSI_CMP_LT_D + ((op&3) ^ ((op>>2)&1)), tmp, left, right);
+#endif
+#endif
} else {
Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
if (op == IR_ABC) op = IR_UGT;
- if ((op&4) == 0 && irref_isk(ir->op2) && IR(ir->op2)->i == 0) {
+ if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) {
MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) :
((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ);
asm_guard(as, mi, left, 0);
} else {
if (irref_isk(ir->op2)) {
- int32_t k = IR(ir->op2)->i;
+ intptr_t k = get_kval(as, ir->op2);
if ((op&2)) k++;
if (checki16(k)) {
asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
@@ -1497,19 +2326,28 @@ static void asm_comp(ASMState *as, IRIns *ir)
}
}
-static void asm_compeq(ASMState *as, IRIns *ir)
+static void asm_equal(ASMState *as, IRIns *ir)
{
- Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR);
+ Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ?
+ RSET_FPR : RSET_GPR);
right = (left >> 8); left &= 255;
- if (irt_isnum(ir->t)) {
+ if (!LJ_SOFTFP32 && irt_isnum(ir->t)) {
+#if LJ_SOFTFP
+ asm_sfpcomp(as, ir);
+#elif !LJ_TARGET_MIPSR6
asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
emit_fgh(as, MIPSI_C_EQ_D, 0, left, right);
+#else
+ Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right));
+ asm_guard(as, (ir->o & 1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31));
+ emit_fgh(as, MIPSI_CMP_EQ_D, tmp, left, right);
+#endif
} else {
asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right);
}
}
-#if LJ_HASFFI
+#if LJ_32 && LJ_HASFFI
/* 64 bit integer comparisons. */
static void asm_comp64(ASMState *as, IRIns *ir)
{
@@ -1546,54 +2384,99 @@ static void asm_comp64eq(ASMState *as, IRIns *ir)
}
#endif
-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+/* -- Split register ops -------------------------------------------------- */
-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+/* Hiword op of a split 32/32 or 64/64 bit op. Previous op is the loword op. */
static void asm_hiop(ASMState *as, IRIns *ir)
{
-#if LJ_HASFFI
/* HIOP is marked as a store because it needs its own DCE logic. */
int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP)
if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
as->curins--; /* Always skip the CONV. */
+#if LJ_HASFFI && !LJ_SOFTFP
if (usehi || uselo)
asm_conv64(as, ir);
return;
+#endif
} else if ((ir-1)->o < IR_EQ) { /* 64 bit integer comparisons. ORDER IR. */
as->curins--; /* Always skip the loword comparison. */
+#if LJ_SOFTFP
+ if (!irt_isint(ir->t)) {
+ asm_sfpcomp(as, ir-1);
+ return;
+ }
+#endif
+#if LJ_HASFFI
asm_comp64(as, ir);
+#endif
return;
} else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
as->curins--; /* Always skip the loword comparison. */
+#if LJ_SOFTFP
+ if (!irt_isint(ir->t)) {
+ asm_sfpcomp(as, ir-1);
+ return;
+ }
+#endif
+#if LJ_HASFFI
asm_comp64eq(as, ir);
+#endif
+ return;
+#if LJ_SOFTFP
+ } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
+ as->curins--; /* Always skip the loword min/max. */
+ if (uselo || usehi)
+ asm_sfpmin_max(as, ir-1);
return;
+#endif
} else if ((ir-1)->o == IR_XSTORE) {
as->curins--; /* Handle both stores here. */
if ((ir-1)->r != RID_SINK) {
- asm_xstore(as, ir, LJ_LE ? 4 : 0);
- asm_xstore(as, ir-1, LJ_LE ? 0 : 4);
+ asm_xstore_(as, ir, LJ_LE ? 4 : 0);
+ asm_xstore_(as, ir-1, LJ_LE ? 0 : 4);
}
return;
}
+#endif
if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
switch ((ir-1)->o) {
+#if LJ_32 && LJ_HASFFI
case IR_ADD: as->curins--; asm_add64(as, ir); break;
case IR_SUB: as->curins--; asm_sub64(as, ir); break;
case IR_NEG: as->curins--; asm_neg64(as, ir); break;
- case IR_CALLN:
- case IR_CALLXS:
+ case IR_CNEWI:
+ /* Nothing to do here. Handled by lo op itself. */
+ break;
+#endif
+#if LJ_32 && LJ_SOFTFP
+ case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+ case IR_STRTO:
if (!uselo)
- ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
+ ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
break;
- case IR_CNEWI:
+ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
/* Nothing to do here. Handled by lo op itself. */
break;
- default: lua_assert(0); break;
- }
-#else
- UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */
#endif
+ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
+ if (!uselo)
+ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
+ break;
+ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
+ }
+}
+
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+ UNUSED(ir);
+ asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
+ emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE);
+ emit_lsglptr(as, MIPSI_LBU, RID_TMP,
+ (int32_t)offsetof(global_State, hookmask));
}
/* -- Stack handling ------------------------------------------------------ */
@@ -1606,47 +2489,70 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE;
ExitNo oldsnap = as->snapno;
rset_clear(allow, pbase);
+#if LJ_32
tmp = allow ? rset_pickbot(allow) :
(pbase == RID_RETHI ? RID_RETLO : RID_RETHI);
+#else
+ tmp = allow ? rset_pickbot(allow) : RID_RET;
+#endif
as->snapno = exitno;
asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
as->snapno = oldsnap;
if (allow == RSET_EMPTY) /* Restore temp. register. */
- emit_tsi(as, MIPSI_LW, tmp, RID_SP, 0);
+ emit_tsi(as, MIPSI_AL, tmp, RID_SP, 0);
else
ra_modified(as, tmp);
emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot));
- emit_dst(as, MIPSI_SUBU, RID_TMP, tmp, pbase);
- emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack));
+ emit_dst(as, MIPSI_ASUBU, RID_TMP, tmp, pbase);
+ emit_tsi(as, MIPSI_AL, tmp, tmp, offsetof(lua_State, maxstack));
if (pbase == RID_TMP)
emit_getgl(as, RID_TMP, jit_base);
- emit_getgl(as, tmp, jit_L);
+ emit_getgl(as, tmp, cur_L);
if (allow == RSET_EMPTY) /* Spill temp. register. */
- emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0);
+ emit_tsi(as, MIPSI_AS, tmp, RID_SP, 0);
}
/* Restore Lua stack from on-trace state. */
static void asm_stack_restore(ASMState *as, SnapShot *snap)
{
SnapEntry *map = &as->T->snapmap[snap->mapofs];
- SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
+#if LJ_32 || defined(LUA_USE_ASSERT)
+ SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
+#endif
MSize n, nent = snap->nent;
/* Store the value of all modified slots to the Lua stack. */
for (n = 0; n < nent; n++) {
SnapEntry sn = map[n];
BCReg s = snap_slot(sn);
- int32_t ofs = 8*((int32_t)s-1);
+ int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
IRRef ref = snap_ref(sn);
IRIns *ir = IR(ref);
if ((sn & SNAP_NORESTORE))
continue;
if (irt_isnum(ir->t)) {
+#if LJ_SOFTFP32
+ Reg tmp;
+ RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
+ /* LJ_SOFTFP: must be a number constant. */
+ lj_assertA(irref_isk(ref), "unsplit FP op");
+ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow);
+ emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?4:0));
+ if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
+ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
+ emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
+#elif LJ_SOFTFP /* && LJ_64 */
+ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
+ emit_tsi(as, MIPSI_SD, src, RID_BASE, ofs);
+#else
Reg src = ra_alloc1(as, ref, RSET_FPR);
emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs);
+#endif
} else {
- Reg type;
+#if LJ_32
RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
- lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
+ Reg type;
+ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
+ "restore of IR type %d", irt_type(ir->t));
if (!irt_ispri(ir->t)) {
Reg src = ra_alloc1(as, ref, allow);
rset_clear(allow, src);
@@ -1655,14 +2561,38 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
if ((sn & (SNAP_CONT|SNAP_FRAME))) {
if (s == 0) continue; /* Do not overwrite link to previous frame. */
type = ra_allock(as, (int32_t)(*flinks--), allow);
+#if LJ_SOFTFP
+ } else if ((sn & SNAP_SOFTFPNUM)) {
+ type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
+#endif
+ } else if ((sn & SNAP_KEYINDEX)) {
+ type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow);
} else {
type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
}
emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4));
+#else
+ if ((sn & SNAP_KEYINDEX)) {
+ RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
+ int64_t kki = (int64_t)LJ_KEYINDEX << 32;
+ if (irref_isk(ref)) {
+ emit_tsi(as, MIPSI_SD,
+ ra_allock(as, kki | (int64_t)(uint32_t)ir->i, allow),
+ RID_BASE, ofs);
+ } else {
+ Reg src = ra_alloc1(as, ref, allow);
+ Reg rki = ra_allock(as, kki, rset_exclude(allow, src));
+ emit_tsi(as, MIPSI_SD, RID_TMP, RID_BASE, ofs);
+ emit_dst(as, MIPSI_DADDU, RID_TMP, src, rki);
+ }
+ } else {
+ asm_tvstore64(as, RID_BASE, ofs, ref);
+ }
+#endif
}
checkmclim(as);
}
- lua_assert(map + nent == flinks);
+ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
}
/* -- GC handling --------------------------------------------------------- */
@@ -1686,7 +2616,7 @@ static void asm_gc_check(ASMState *as)
args[1] = ASMREF_TMP2; /* MSize steps */
asm_gencall(as, ci, args);
l_end[-3] = MIPS_NOPATCH_GC_CHECK; /* Replace the nop after the call. */
- emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
+ emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
tmp = ra_releasetmp(as, ASMREF_TMP2);
emit_loadi(as, tmp, as->gcsteps);
/* Jump around GC step if GC total < GC threshold. */
@@ -1714,6 +2644,12 @@ static void asm_loop_fixup(ASMState *as)
}
}
+/* Fixup the tail of the loop. */
+static void asm_loop_tail_fixup(ASMState *as)
+{
+ if (as->loopinv) as->mctop--;
+}
+
/* -- Head of trace ------------------------------------------------------- */
/* Coalesce BASE register for a root trace. */
@@ -1721,7 +2657,6 @@ static void asm_head_root_base(ASMState *as)
{
IRIns *ir = IR(REF_BASE);
Reg r = ir->r;
- if (as->loopinv) as->mctop--;
if (ra_hasreg(r)) {
ra_free(as, r);
if (rset_test(as->modset, r) || irt_ismarked(ir->t))
@@ -1736,7 +2671,6 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
{
IRIns *ir = IR(REF_BASE);
Reg r = ir->r;
- if (as->loopinv) as->mctop--;
if (ra_hasreg(r)) {
ra_free(as, r);
if (rset_test(as->modset, r) || irt_ismarked(ir->t))
@@ -1761,7 +2695,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp;
int32_t spadj = as->T->spadjust;
MCode *p = as->mctop-1;
- *p = spadj ? (MIPSI_ADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP;
+ *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP;
p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
}
@@ -1772,139 +2706,26 @@ static void asm_tail_prep(ASMState *as)
as->invmcp = as->loopref ? as->mcp : NULL;
}
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
- switch ((IROp)ir->o) {
- /* Miscellaneous ops. */
- case IR_LOOP: asm_loop(as); break;
- case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
- case IR_USE:
- ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
- case IR_PHI: asm_phi(as, ir); break;
- case IR_HIOP: asm_hiop(as, ir); break;
- case IR_GCSTEP: asm_gcstep(as, ir); break;
-
- /* Guarded assertions. */
- case IR_EQ: case IR_NE: asm_compeq(as, ir); break;
- case IR_LT: case IR_GE: case IR_LE: case IR_GT:
- case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
- case IR_ABC:
- asm_comp(as, ir);
- break;
-
- case IR_RETF: asm_retf(as, ir); break;
-
- /* Bit ops. */
- case IR_BNOT: asm_bitnot(as, ir); break;
- case IR_BSWAP: asm_bitswap(as, ir); break;
-
- case IR_BAND: asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI); break;
- case IR_BOR: asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI); break;
- case IR_BXOR: asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI); break;
-
- case IR_BSHL: asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL); break;
- case IR_BSHR: asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL); break;
- case IR_BSAR: asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA); break;
- case IR_BROL: lua_assert(0); break;
- case IR_BROR: asm_bitror(as, ir); break;
-
- /* Arithmetic ops. */
- case IR_ADD: asm_add(as, ir); break;
- case IR_SUB: asm_sub(as, ir); break;
- case IR_MUL: asm_mul(as, ir); break;
- case IR_DIV: asm_fparith(as, ir, MIPSI_DIV_D); break;
- case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
- case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
- case IR_NEG: asm_neg(as, ir); break;
-
- case IR_ABS: asm_fpunary(as, ir, MIPSI_ABS_D); break;
- case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
- case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
- case IR_MIN: asm_min_max(as, ir, 0); break;
- case IR_MAX: asm_min_max(as, ir, 1); break;
- case IR_FPMATH:
- if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
- break;
- if (ir->op2 <= IRFPM_TRUNC)
- asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
- else if (ir->op2 == IRFPM_SQRT)
- asm_fpunary(as, ir, MIPSI_SQRT_D);
- else
- asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
- break;
-
- /* Overflow-checking arithmetic ops. */
- case IR_ADDOV: asm_arithov(as, ir); break;
- case IR_SUBOV: asm_arithov(as, ir); break;
- case IR_MULOV: asm_mulov(as, ir); break;
-
- /* Memory references. */
- case IR_AREF: asm_aref(as, ir); break;
- case IR_HREF: asm_href(as, ir); break;
- case IR_HREFK: asm_hrefk(as, ir); break;
- case IR_NEWREF: asm_newref(as, ir); break;
- case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
- case IR_FREF: asm_fref(as, ir); break;
- case IR_STRREF: asm_strref(as, ir); break;
-
- /* Loads and stores. */
- case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
- asm_ahuvload(as, ir);
- break;
- case IR_FLOAD: asm_fload(as, ir); break;
- case IR_XLOAD: asm_xload(as, ir); break;
- case IR_SLOAD: asm_sload(as, ir); break;
-
- case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
- case IR_FSTORE: asm_fstore(as, ir); break;
- case IR_XSTORE: asm_xstore(as, ir, 0); break;
-
- /* Allocations. */
- case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
- case IR_TNEW: asm_tnew(as, ir); break;
- case IR_TDUP: asm_tdup(as, ir); break;
- case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
- /* Write barriers. */
- case IR_TBAR: asm_tbar(as, ir); break;
- case IR_OBAR: asm_obar(as, ir); break;
-
- /* Type conversions. */
- case IR_CONV: asm_conv(as, ir); break;
- case IR_TOBIT: asm_tobit(as, ir); break;
- case IR_TOSTR: asm_tostr(as, ir); break;
- case IR_STRTO: asm_strto(as, ir); break;
-
- /* Calls. */
- case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
- case IR_CALLXS: asm_callx(as, ir); break;
- case IR_CARG: break;
-
- default:
- setintV(&as->J->errinfo, ir->o);
- lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
- break;
- }
-}
-
/* -- Trace setup --------------------------------------------------------- */
/* Ensure there are enough stack slots for call arguments. */
static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
{
IRRef args[CCI_NARGS_MAX*2];
- uint32_t i, nargs = (int)CCI_NARGS(ci);
+ uint32_t i, nargs = CCI_XNARGS(ci);
+#if LJ_32
int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
+#else
+ int nslots = 0, ngpr = REGARG_NUMGPR;
+#endif
asm_collectargs(as, ir, ci, args);
for (i = 0; i < nargs; i++) {
- if (args[i] && irt_isfp(IR(args[i])->t) &&
+#if LJ_32
+ if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t) &&
nfpr > 0 && !(ci->flags & CCI_VARARG)) {
nfpr--;
ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1;
- } else if (args[i] && irt_isnum(IR(args[i])->t)) {
+ } else if (!LJ_SOFTFP && args[i] && irt_isnum(IR(args[i])->t)) {
nfpr = 0;
ngpr = ngpr & ~1;
if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1;
@@ -1912,6 +2733,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
nfpr = 0;
if (ngpr > 0) ngpr--; else nslots++;
}
+#else
+ if (ngpr > 0) ngpr--; else nslots += 2;
+#endif
}
if (nslots > as->evenspill) /* Leave room for args in stack slots. */
as->evenspill = nslots;
@@ -1942,35 +2766,35 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
if (((p[-1] ^ (px-p)) & 0xffffu) == 0 &&
((p[-1] & 0xf0000000u) == MIPSI_BEQ ||
(p[-1] & 0xfc1e0000u) == MIPSI_BLTZ ||
- (p[-1] & 0xffe00000u) == MIPSI_BC1F) &&
- p[-2] != MIPS_NOPATCH_GC_CHECK) {
+#if !LJ_TARGET_MIPSR6
+ (p[-1] & 0xffe00000u) == MIPSI_BC1F
+#else
+ (p[-1] & 0xff600000u) == MIPSI_BC1EQZ
+#endif
+ ) && p[-2] != MIPS_NOPATCH_GC_CHECK) {
ptrdiff_t delta = target - p;
if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */
patchbranch:
p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu);
*p = MIPSI_NOP; /* Replace the load of the exit number. */
- cstop = p;
+ cstop = p+1;
if (!cstart) cstart = p-1;
} else { /* Branch out of range. Use spare jump slot in mcarea. */
- int i;
- for (i = (int)(sizeof(MCLink)/sizeof(MCode));
- i < (int)(sizeof(MCLink)/sizeof(MCode)+MIPS_SPAREJUMP*2);
- i += 2) {
- if (mcarea[i] == tjump) {
- delta = mcarea+i - p;
- goto patchbranch;
- } else if (mcarea[i] == MIPSI_NOP) {
- mcarea[i] = tjump;
- cstart = mcarea+i;
- delta = mcarea+i - p;
+ MCode *mcjump = asm_sparejump_use(mcarea, tjump);
+ if (mcjump) {
+ lj_mcode_sync(mcjump, mcjump+1);
+ delta = mcjump - p;
+ if (((delta + 0x8000) >> 16) == 0) {
goto patchbranch;
+ } else {
+ lj_assertJ(0, "spare jump out of range: -Osizemcode too big");
}
}
/* Ignore jump slot overflow. Child trace is simply not attached. */
}
} else if (p+1 == pe) {
/* Patch NOP after code for inverted loop branch. Use of J is ok. */
- lua_assert(p[1] == MIPSI_NOP);
+ lj_assertJ(p[1] == MIPSI_NOP, "expected NOP");
p[1] = tjump;
*p = MIPSI_NOP; /* Replace the load of the exit number. */
cstop = p+2;
diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h
index 5fd35d2e..546b8e5d 100644
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -156,6 +156,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
return ra_allock(as, ofs-(int16_t)ofs, allow);
}
}
+ } else if (ir->o == IR_TMPREF) {
+ *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768);
+ return RID_JGL;
}
}
*ofsp = 0;
@@ -181,7 +184,7 @@ static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
return;
}
} else if (ir->o == IR_STRREF) {
- lua_assert(ofs == 0);
+ lj_assertA(ofs == 0, "bad usage");
ofs = (int32_t)sizeof(GCstr);
if (irref_isk(ir->op2)) {
ofs += IR(ir->op2)->i;
@@ -226,6 +229,7 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref,
emit_tab(as, pi, rt, left, right);
}
+#if !LJ_SOFTFP
/* Fuse to multiply-add/sub instruction. */
static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
{
@@ -245,24 +249,30 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
}
return 0;
}
+#endif
/* -- Calls --------------------------------------------------------------- */
/* Generate a call to a C function. */
static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
{
- uint32_t n, nargs = CCI_NARGS(ci);
+ uint32_t n, nargs = CCI_XNARGS(ci);
int32_t ofs = 8;
- Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
+ Reg gpr = REGARG_FIRSTGPR;
+#if !LJ_SOFTFP
+ Reg fpr = REGARG_FIRSTFPR;
+#endif
if ((void *)ci->func)
emit_call(as, (void *)ci->func);
for (n = 0; n < nargs; n++) { /* Setup args. */
IRRef ref = args[n];
if (ref) {
IRIns *ir = IR(ref);
+#if !LJ_SOFTFP
if (irt_isfp(ir->t)) {
if (fpr <= REGARG_LASTFPR) {
- lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */
+ lj_assertA(rset_test(as->freeset, fpr),
+ "reg %d not free", fpr); /* Already evicted. */
ra_leftov(as, fpr, ref);
fpr++;
} else {
@@ -271,9 +281,12 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
emit_spstore(as, ir, r, ofs);
ofs += irt_isnum(ir->t) ? 8 : 4;
}
- } else {
+ } else
+#endif
+ {
if (gpr <= REGARG_LASTGPR) {
- lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */
+ lj_assertA(rset_test(as->freeset, gpr),
+ "reg %d not free", gpr); /* Already evicted. */
ra_leftov(as, gpr, ref);
gpr++;
} else {
@@ -290,8 +303,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
}
checkmclim(as);
}
+#if !LJ_SOFTFP
if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */
emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
+#endif
}
/* Setup result reg/sp for call. Evict scratch regs. */
@@ -299,16 +314,18 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
{
RegSet drop = RSET_SCRATCH;
int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
+#if !LJ_SOFTFP
if ((ci->flags & CCI_NOFPRCLOBBER))
drop &= ~RSET_FPR;
+#endif
if (ra_hasreg(ir->r))
rset_clear(drop, ir->r); /* Dest reg handled below. */
if (hiop && ra_hasreg((ir+1)->r))
rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */
ra_evictset(as, drop); /* Evictions must be performed first. */
if (ra_used(ir)) {
- lua_assert(!irt_ispri(ir->t));
- if (irt_isfp(ir->t)) {
+ lj_assertA(!irt_ispri(ir->t), "PRI dest");
+ if (!LJ_SOFTFP && irt_isfp(ir->t)) {
if ((ci->flags & CCI_CASTU64)) {
/* Use spill slot or temp slots. */
int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
@@ -331,15 +348,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
}
}
-static void asm_call(ASMState *as, IRIns *ir)
-{
- IRRef args[CCI_NARGS_MAX];
- const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
- asm_collectargs(as, ir, ci, args);
- asm_setupresult(as, ir, ci);
- asm_gencall(as, ci, args);
-}
-
static void asm_callx(ASMState *as, IRIns *ir)
{
IRRef args[CCI_NARGS_MAX*2];
@@ -352,7 +360,7 @@ static void asm_callx(ASMState *as, IRIns *ir)
func = ir->op2; irf = IR(func);
if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
if (irref_isk(func)) { /* Call to constant address. */
- ci.func = (ASMFunction)(void *)(irf->i);
+ ci.func = (ASMFunction)(void *)(intptr_t)(irf->i);
} else { /* Need a non-argument register for indirect calls. */
RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1);
Reg freg = ra_alloc1(as, func, allow);
@@ -363,16 +371,6 @@ static void asm_callx(ASMState *as, IRIns *ir)
asm_gencall(as, &ci, args);
}
-static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
-{
- const CCallInfo *ci = &lj_ir_callinfo[id];
- IRRef args[2];
- args[0] = ir->op1;
- args[1] = ir->op2;
- asm_setupresult(as, ir, ci);
- asm_gencall(as, ci, args);
-}
-
/* -- Returns ------------------------------------------------------------- */
/* Return to lower frame. Guard that it goes to the right spot. */
@@ -380,7 +378,7 @@ static void asm_retf(ASMState *as, IRIns *ir)
{
Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
void *pc = ir_kptr(IR(ir->op2));
- int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
as->topslot -= (BCReg)delta;
if ((int32_t)as->topslot < 0) as->topslot = 0;
irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
@@ -392,8 +390,24 @@ static void asm_retf(ASMState *as, IRIns *ir)
emit_tai(as, PPCI_LWZ, RID_TMP, base, -8);
}
+/* -- Buffer operations --------------------------------------------------- */
+
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb)
+{
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+ IRIns irgc;
+ irgc.ot = IRT(0, IRT_PGC); /* GC type. */
+ emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L));
+ emit_rot(as, PPCI_RLWIMI, RID_TMP, tmp, 0, 31-lj_fls(SBUF_MASK_FLAG), 31);
+ emit_getgl(as, RID_TMP, cur_L);
+ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+}
+#endif
+
/* -- Type conversions ---------------------------------------------------- */
+#if !LJ_SOFTFP
static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
{
RegSet allow = RSET_FPR;
@@ -410,8 +424,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000);
emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
emit_lsptr(as, PPCI_LFS, (fbias & 31),
- (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
- RSET_GPR);
+ (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR);
emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
emit_fb(as, PPCI_FCTIWZ, tmp, left);
}
@@ -427,15 +440,27 @@ static void asm_tobit(ASMState *as, IRIns *ir)
emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
emit_fab(as, PPCI_FADD, tmp, left, right);
}
+#endif
static void asm_conv(ASMState *as, IRIns *ir)
{
IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+#if !LJ_SOFTFP
int stfp = (st == IRT_NUM || st == IRT_FLOAT);
+#endif
IRRef lref = ir->op1;
- lua_assert(irt_type(ir->t) != st);
- lua_assert(!(irt_isint64(ir->t) ||
- (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
+ /* 64 bit integer conversions are handled by SPLIT. */
+ lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)),
+ "IR %04d has unsplit 64 bit type",
+ (int)(ir - as->ir) - REF_BIAS);
+#if LJ_SOFTFP
+ /* FP conversions are handled by SPLIT. */
+ lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT),
+ "IR %04d has FP type",
+ (int)(ir - as->ir) - REF_BIAS);
+ /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
+#else
+ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
if (irt_isfp(ir->t)) {
Reg dest = ra_dest(as, ir, RSET_FPR);
if (stfp) { /* FP to FP conversion. */
@@ -450,13 +475,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
Reg left = ra_alloc1(as, lref, allow);
Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left));
Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
- const float *kbias;
if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest);
emit_fab(as, PPCI_FSUB, dest, dest, fbias);
emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
- kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000));
- if (st == IRT_U32) kbias++;
- emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias,
+ emit_lsptr(as, PPCI_LFS, (fbias & 31),
+ &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31],
rset_clear(allow, hibias));
emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP,
RID_SP, SPOFS_TMPLO);
@@ -466,7 +489,8 @@ static void asm_conv(ASMState *as, IRIns *ir)
} else if (stfp) { /* FP to integer conversion. */
if (irt_isguard(ir->t)) {
/* Checked conversions are only supported from number to int. */
- lua_assert(irt_isint(ir->t) && st == IRT_NUM);
+ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
+ "bad type for checked CONV");
asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
} else {
Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -489,19 +513,20 @@ static void asm_conv(ASMState *as, IRIns *ir)
emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
emit_fab(as, PPCI_FSUB, tmp, left, tmp);
emit_lsptr(as, PPCI_LFS, (tmp & 31),
- (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)),
- RSET_GPR);
+ (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
} else {
emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
emit_fb(as, PPCI_FCTIWZ, tmp, left);
}
}
- } else {
+ } else
+#endif
+ {
Reg dest = ra_dest(as, ir, RSET_GPR);
if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
+ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
if ((ir->op2 & IRCONV_SEXT))
emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left);
else
@@ -513,90 +538,102 @@ static void asm_conv(ASMState *as, IRIns *ir)
}
}
-#if LJ_HASFFI
-static void asm_conv64(ASMState *as, IRIns *ir)
-{
- IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
- IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
- IRCallID id;
- const CCallInfo *ci;
- IRRef args[2];
- args[0] = ir->op1;
- args[1] = (ir-1)->op1;
- if (st == IRT_NUM || st == IRT_FLOAT) {
- id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
- ir--;
- } else {
- id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
- }
- ci = &lj_ir_callinfo[id];
- asm_setupresult(as, ir, ci);
- asm_gencall(as, ci, args);
-}
-#endif
-
static void asm_strto(ASMState *as, IRIns *ir)
{
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
IRRef args[2];
- int32_t ofs;
+ int32_t ofs = SPOFS_TMP;
+#if LJ_SOFTFP
+ ra_evictset(as, RSET_SCRATCH);
+ if (ra_used(ir)) {
+ if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
+ (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) {
+ int i;
+ for (i = 0; i < 2; i++) {
+ Reg r = (ir+i)->r;
+ if (ra_hasreg(r)) {
+ ra_free(as, r);
+ ra_modified(as, r);
+ emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
+ }
+ }
+ ofs = sps_scale(ir->s & ~1);
+ } else {
+ Reg rhi = ra_dest(as, ir+1, RSET_GPR);
+ Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
+ emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs);
+ emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4);
+ }
+ }
+#else
RegSet drop = RSET_SCRATCH;
if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */
ra_evictset(as, drop);
+ if (ir->s) ofs = sps_scale(ir->s);
+#endif
asm_guardcc(as, CC_EQ);
emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */
args[0] = ir->op1; /* GCstr *str */
args[1] = ASMREF_TMP1; /* TValue *n */
asm_gencall(as, ci, args);
/* Store the result to the spill slot or temp slots. */
- ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
}
+/* -- Memory references --------------------------------------------------- */
+
/* Get pointer to TValue. */
-static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
{
- IRIns *ir = IR(ref);
- if (irt_isnum(ir->t)) {
- if (irref_isk(ref)) /* Use the number constant itself as a TValue. */
- ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
- else /* Otherwise force a spill and use the spill slot. */
- emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir));
- } else {
- /* Otherwise use g->tmptv to hold the TValue. */
- RegSet allow = rset_exclude(RSET_GPR, dest);
- Reg type;
- emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768);
- if (!irt_ispri(ir->t)) {
- Reg src = ra_alloc1(as, ref, allow);
- emit_setgl(as, src, tmptv.gcr);
+ int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768);
+ if ((mode & IRTMPREF_IN1)) {
+ IRIns *ir = IR(ref);
+ if (irt_isnum(ir->t)) {
+ if ((mode & IRTMPREF_OUT1)) {
+#if LJ_SOFTFP
+ lj_assertA(irref_isk(ref), "unsplit FP op");
+ emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
+ emit_setgl(as,
+ ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR),
+ tmptv.u32.lo);
+ emit_setgl(as,
+ ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR),
+ tmptv.u32.hi);
+#else
+ Reg src = ra_alloc1(as, ref, RSET_FPR);
+ emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
+ emit_fai(as, PPCI_STFD, src, RID_JGL, tmpofs);
+#endif
+ } else if (irref_isk(ref)) {
+ /* Use the number constant itself as a TValue. */
+ ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
+ } else {
+#if LJ_SOFTFP
+ lj_assertA(0, "unsplit FP op");
+#else
+ /* Otherwise force a spill and use the spill slot. */
+ emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir));
+#endif
+ }
+ } else {
+ /* Otherwise use g->tmptv to hold the TValue. */
+ Reg type;
+ emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
+ if (!irt_ispri(ir->t)) {
+ Reg src = ra_alloc1(as, ref, RSET_GPR);
+ emit_setgl(as, src, tmptv.gcr);
+ }
+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t))
+ type = ra_alloc1(as, ref+1, RSET_GPR);
+ else
+ type = ra_allock(as, irt_toitype(ir->t), RSET_GPR);
+ emit_setgl(as, type, tmptv.it);
}
- type = ra_allock(as, irt_toitype(ir->t), allow);
- emit_setgl(as, type, tmptv.it);
- }
-}
-
-static void asm_tostr(ASMState *as, IRIns *ir)
-{
- IRRef args[2];
- args[0] = ASMREF_L;
- as->gcsteps++;
- if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
- args[1] = ASMREF_TMP1; /* const lua_Number * */
- asm_setupresult(as, ir, ci); /* GCstr * */
- asm_gencall(as, ci, args);
- asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
} else {
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
- args[1] = ir->op1; /* int32_t k */
- asm_setupresult(as, ir, ci); /* GCstr * */
- asm_gencall(as, ci, args);
+ emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs);
}
}
-/* -- Memory references --------------------------------------------------- */
-
static void asm_aref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -636,11 +673,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
Reg tisnum = RID_NONE, tmpnum = RID_NONE;
IRRef refkey = ir->op2;
IRIns *irkey = IR(refkey);
+ int isk = irref_isk(refkey);
IRType1 kt = irkey->t;
uint32_t khash;
MCLabel l_end, l_loop, l_next;
rset_clear(allow, tab);
+#if LJ_SOFTFP
+ if (!isk) {
+ key = ra_alloc1(as, refkey, allow);
+ rset_clear(allow, key);
+ if (irkey[1].o == IR_HIOP) {
+ if (ra_hasreg((irkey+1)->r)) {
+ tmpnum = (irkey+1)->r;
+ ra_noweak(as, tmpnum);
+ } else {
+ tmpnum = ra_allocref(as, refkey+1, allow);
+ }
+ rset_clear(allow, tmpnum);
+ }
+ }
+#else
if (irt_isnum(kt)) {
key = ra_alloc1(as, refkey, RSET_FPR);
tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
@@ -650,6 +703,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
key = ra_alloc1(as, refkey, allow);
rset_clear(allow, key);
}
+#endif
tmp2 = ra_scratch(as, allow);
rset_clear(allow, tmp2);
@@ -672,7 +726,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
asm_guardcc(as, CC_EQ);
else
emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
- if (irt_isnum(kt)) {
+ if (!LJ_SOFTFP && irt_isnum(kt)) {
emit_fab(as, PPCI_FCMPU, 0, tmpnum, key);
emit_condbranch(as, PPCI_BC, CC_GE, l_next);
emit_ab(as, PPCI_CMPLW, tmp1, tisnum);
@@ -682,7 +736,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
emit_ab(as, PPCI_CMPW, tmp2, key);
emit_condbranch(as, PPCI_BC, CC_NE, l_next);
}
- emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
+ if (LJ_SOFTFP && ra_hasreg(tmpnum))
+ emit_ab(as, PPCI_CMPW, tmp1, tmpnum);
+ else
+ emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
if (!irt_ispri(kt))
emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
}
@@ -691,35 +748,41 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
(((char *)as->mcp-(char *)l_loop) & 0xffffu);
/* Load main position relative to tab->node into dest. */
- khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
+ khash = isk ? ir_khash(as, irkey) : 1;
if (khash == 0) {
emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
} else {
Reg tmphash = tmp1;
- if (irref_isk(refkey))
+ if (isk)
tmphash = ra_allock(as, khash, allow);
emit_tab(as, PPCI_ADD, dest, dest, tmp1);
emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node));
emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash);
emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
- if (irref_isk(refkey)) {
+ if (isk) {
/* Nothing to do. */
} else if (irt_isstr(kt)) {
- emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash));
+ emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, sid));
} else { /* Must match with hash*() in lj_tab.c. */
emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1);
emit_rotlwi(as, tmp2, tmp2, HASH_ROT3);
emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2);
emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31);
emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2);
- if (irt_isnum(kt)) {
+ if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
+#if LJ_SOFTFP
+ emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
+ emit_rotlwi(as, dest, tmp1, HASH_ROT1);
+ emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum);
+#else
int32_t ofs = ra_spill(as, irkey);
emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1);
emit_rotlwi(as, dest, tmp1, HASH_ROT1);
emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1);
emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4);
emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs);
+#endif
} else {
emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
emit_rotlwi(as, dest, tmp1, HASH_ROT1);
@@ -740,7 +803,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
Reg key = RID_NONE, type = RID_TMP, idx = node;
RegSet allow = rset_exclude(RSET_GPR, node);
- lua_assert(ofs % sizeof(Node) == 0);
+ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
if (ofs > 32736) {
idx = dest;
rset_clear(allow, dest);
@@ -773,20 +836,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
}
}
-static void asm_newref(ASMState *as, IRIns *ir)
-{
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
- IRRef args[3];
- if (ir->r == RID_SINK)
- return;
- args[0] = ASMREF_L; /* lua_State *L */
- args[1] = ir->op1; /* GCtab *t */
- args[2] = ASMREF_TMP1; /* cTValue *key */
- asm_setupresult(as, ir, ci); /* TValue * */
- asm_gencall(as, ci, args);
- asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
-}
-
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -813,7 +862,7 @@ static void asm_uref(ASMState *as, IRIns *ir)
static void asm_fref(ASMState *as, IRIns *ir)
{
UNUSED(as); UNUSED(ir);
- lua_assert(!ra_used(ir));
+ lj_assertA(!ra_used(ir), "unfused FREF");
}
static void asm_strref(ASMState *as, IRIns *ir)
@@ -853,26 +902,28 @@ static void asm_strref(ASMState *as, IRIns *ir)
/* -- Loads and stores ---------------------------------------------------- */
-static PPCIns asm_fxloadins(IRIns *ir)
+static PPCIns asm_fxloadins(ASMState *as, IRIns *ir)
{
+ UNUSED(as);
switch (irt_type(ir->t)) {
case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */
case IRT_U8: return PPCI_LBZ;
case IRT_I16: return PPCI_LHA;
case IRT_U16: return PPCI_LHZ;
- case IRT_NUM: return PPCI_LFD;
- case IRT_FLOAT: return PPCI_LFS;
+ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_LFD;
+ case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS;
default: return PPCI_LWZ;
}
}
-static PPCIns asm_fxstoreins(IRIns *ir)
+static PPCIns asm_fxstoreins(ASMState *as, IRIns *ir)
{
+ UNUSED(as);
switch (irt_type(ir->t)) {
case IRT_I8: case IRT_U8: return PPCI_STB;
case IRT_I16: case IRT_U16: return PPCI_STH;
- case IRT_NUM: return PPCI_STFD;
- case IRT_FLOAT: return PPCI_STFS;
+ case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_STFD;
+ case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS;
default: return PPCI_STW;
}
}
@@ -880,18 +931,24 @@ static PPCIns asm_fxstoreins(IRIns *ir)
static void asm_fload(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
- Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
- PPCIns pi = asm_fxloadins(ir);
+ PPCIns pi = asm_fxloadins(as, ir);
+ Reg idx;
int32_t ofs;
- if (ir->op2 == IRFL_TAB_ARRAY) {
- ofs = asm_fuseabase(as, ir->op1);
- if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
- emit_tai(as, PPCI_ADDI, dest, idx, ofs);
- return;
+ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
+ idx = RID_JGL;
+ ofs = (ir->op2 << 2) - 32768 - GG_OFS(g);
+ } else {
+ idx = ra_alloc1(as, ir->op1, RSET_GPR);
+ if (ir->op2 == IRFL_TAB_ARRAY) {
+ ofs = asm_fuseabase(as, ir->op1);
+ if (ofs) { /* Turn the t->array load into an add for colocated arrays. */
+ emit_tai(as, PPCI_ADDI, dest, idx, ofs);
+ return;
+ }
}
+ ofs = field_ofs[ir->op2];
}
- ofs = field_ofs[ir->op2];
- lua_assert(!irt_isi8(ir->t));
+ lj_assertA(!irt_isi8(ir->t), "unsupported FLOAD I8");
emit_tai(as, pi, dest, idx, ofs);
}
@@ -902,21 +959,22 @@ static void asm_fstore(ASMState *as, IRIns *ir)
IRIns *irf = IR(ir->op1);
Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
int32_t ofs = field_ofs[irf->op2];
- PPCIns pi = asm_fxstoreins(ir);
+ PPCIns pi = asm_fxstoreins(as, ir);
emit_tai(as, pi, src, idx, ofs);
}
}
static void asm_xload(ASMState *as, IRIns *ir)
{
- Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
- lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
+ Reg dest = ra_dest(as, ir,
+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
+ lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD");
if (irt_isi8(ir->t))
emit_as(as, PPCI_EXTSB, dest, dest);
- asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
+ asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0);
}
-static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
+static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs)
{
IRIns *irb;
if (ir->r == RID_SINK)
@@ -927,36 +985,54 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src));
} else {
- Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
- asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
+ Reg src = ra_alloc1(as, ir->op2,
+ (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
+ asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1,
rset_exclude(RSET_GPR, src), ofs);
}
}
+#define asm_xstore(as, ir) asm_xstore_(as, ir, 0)
+
static void asm_ahuvload(ASMState *as, IRIns *ir)
{
IRType1 t = ir->t;
Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx;
RegSet allow = RSET_GPR;
int32_t ofs = AHUREF_LSX;
+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) {
+ t.irt = IRT_NUM;
+ if (ra_used(ir+1)) {
+ type = ra_dest(as, ir+1, allow);
+ rset_clear(allow, type);
+ }
+ ofs = 0;
+ }
if (ra_used(ir)) {
- lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
- if (!irt_isnum(t)) ofs = 0;
- dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
+ lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
+ irt_isint(ir->t) || irt_isaddr(ir->t),
+ "bad load type %d", irt_type(ir->t));
+ if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0;
+ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
rset_clear(allow, dest);
}
idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
+ if (ir->o == IR_VLOAD) {
+ ofs = ofs != AHUREF_LSX ? ofs + 8 * ir->op2 :
+ ir->op2 ? 8 * ir->op2 : AHUREF_LSX;
+ }
if (irt_isnum(t)) {
Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx));
asm_guardcc(as, CC_GE);
emit_ab(as, PPCI_CMPLW, type, tisnum);
if (ra_hasreg(dest)) {
- if (ofs == AHUREF_LSX) {
+ if (!LJ_SOFTFP && ofs == AHUREF_LSX) {
tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR,
(idx&255)), (idx>>8)));
emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp);
} else {
- emit_fai(as, PPCI_LFD, dest, idx, ofs);
+ emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx,
+ ofs+4*LJ_SOFTFP);
}
}
} else {
@@ -979,7 +1055,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
int32_t ofs = AHUREF_LSX;
if (ir->r == RID_SINK)
return;
- if (irt_isnum(ir->t)) {
+ if (!LJ_SOFTFP && irt_isnum(ir->t)) {
src = ra_alloc1(as, ir->op2, RSET_FPR);
} else {
if (!irt_ispri(ir->t)) {
@@ -987,11 +1063,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
rset_clear(allow, src);
ofs = 0;
}
- type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
+ if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
+ type = ra_alloc1(as, (ir+1)->op2, allow);
+ else
+ type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
rset_clear(allow, type);
}
idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
- if (irt_isnum(ir->t)) {
+ if (!LJ_SOFTFP && irt_isnum(ir->t)) {
if (ofs == AHUREF_LSX) {
emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP);
emit_slwi(as, RID_TMP, (idx>>8), 3);
@@ -1016,21 +1095,39 @@ static void asm_sload(ASMState *as, IRIns *ir)
IRType1 t = ir->t;
Reg dest = RID_NONE, type = RID_NONE, base;
RegSet allow = RSET_GPR;
- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
- lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
- lua_assert(LJ_DUALNUM ||
- !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
+ int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
+ if (hiop)
+ t.irt = IRT_NUM;
+ lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
+ "bad parent SLOAD"); /* Handled by asm_head_side(). */
+ lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK),
+ "inconsistent SLOAD variant");
+ lj_assertA(LJ_DUALNUM ||
+ !irt_isint(t) ||
+ (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)),
+ "bad SLOAD type");
+#if LJ_SOFTFP
+ lj_assertA(!(ir->op2 & IRSLOAD_CONVERT),
+ "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */
+ if (hiop && ra_used(ir+1)) {
+ type = ra_dest(as, ir+1, allow);
+ rset_clear(allow, type);
+ }
+#else
if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
dest = ra_scratch(as, RSET_FPR);
asm_tointg(as, ir, dest);
t.irt = IRT_NUM; /* Continue with a regular number type check. */
- } else if (ra_used(ir)) {
- lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
- dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
+ } else
+#endif
+ if (ra_used(ir)) {
+ lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t),
+ "bad SLOAD type %d", irt_type(ir->t));
+ dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
rset_clear(allow, dest);
base = ra_alloc1(as, REF_BASE, allow);
rset_clear(allow, base);
- if ((ir->op2 & IRSLOAD_CONVERT)) {
+ if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) {
if (irt_isint(t)) {
emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
dest = ra_scratch(as, RSET_FPR);
@@ -1044,7 +1141,7 @@ static void asm_sload(ASMState *as, IRIns *ir)
emit_fab(as, PPCI_FSUB, dest, dest, fbias);
emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP);
emit_lsptr(as, PPCI_LFS, (fbias & 31),
- (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)),
+ (void *)&as->J->k32[LJ_K32_2P52_2P31],
rset_clear(allow, hibias));
emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO);
emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI);
@@ -1062,14 +1159,22 @@ dotypecheck:
if ((ir->op2 & IRSLOAD_TYPECHECK)) {
Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow);
asm_guardcc(as, CC_GE);
- emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum);
+#if !LJ_SOFTFP
type = RID_TMP;
+#endif
+ emit_ab(as, PPCI_CMPLW, type, tisnum);
}
- if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4);
+ if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest,
+ base, ofs-(LJ_SOFTFP?0:4));
} else {
if ((ir->op2 & IRSLOAD_TYPECHECK)) {
asm_guardcc(as, CC_NE);
- emit_ai(as, PPCI_CMPWI, RID_TMP, irt_toitype(t));
+ if ((ir->op2 & IRSLOAD_KEYINDEX)) {
+ emit_ai(as, PPCI_CMPWI, RID_TMP, (LJ_KEYINDEX & 0xffff));
+ emit_asi(as, PPCI_XORIS, RID_TMP, RID_TMP, (LJ_KEYINDEX >> 16));
+ } else {
+ emit_ai(as, PPCI_CMPWI, RID_TMP, irt_toitype(t));
+ }
type = RID_TMP;
}
if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, base, ofs);
@@ -1083,19 +1188,16 @@ dotypecheck:
static void asm_cnew(ASMState *as, IRIns *ir)
{
CTState *cts = ctype_ctsG(J2G(as->J));
- CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
- CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
- lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+ CTypeID id = (CTypeID)IR(ir->op1)->i;
+ CTSize sz;
+ CTInfo info = lj_ctype_info(cts, id, &sz);
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
- IRRef args[2];
- RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
+ IRRef args[4];
RegSet drop = RSET_SCRATCH;
- lua_assert(sz != CTSIZE_INVALID);
+ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
+ "bad CNEW/CNEWI operands");
- args[0] = ASMREF_L; /* lua_State *L */
- args[1] = ASMREF_TMP1; /* MSize size */
as->gcsteps++;
-
if (ra_hasreg(ir->r))
rset_clear(drop, ir->r); /* Dest reg handled below. */
ra_evictset(as, drop);
@@ -1104,11 +1206,12 @@ static void asm_cnew(ASMState *as, IRIns *ir)
/* Initialize immutable cdata object. */
if (ir->o == IR_CNEWI) {
+ RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
int32_t ofs = sizeof(GCcdata);
- lua_assert(sz == 4 || sz == 8);
+ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
if (sz == 8) {
ofs += 4;
- lua_assert((ir+1)->o == IR_HIOP);
+ lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI");
}
for (;;) {
Reg r = ra_alloc1(as, ir->op2, allow);
@@ -1117,18 +1220,28 @@ static void asm_cnew(ASMState *as, IRIns *ir)
if (ofs == sizeof(GCcdata)) break;
ofs -= 4; ir++;
}
+ } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
+ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ir->op1; /* CTypeID id */
+ args[2] = ir->op2; /* CTSize sz */
+ args[3] = ASMREF_TMP1; /* CTSize align */
+ asm_gencall(as, ci, args);
+ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+ return;
}
+
/* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA);
- emit_ti(as, PPCI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */
+ emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ASMREF_TMP1; /* MSize size */
asm_gencall(as, ci, args);
ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
ra_releasetmp(as, ASMREF_TMP1));
}
-#else
-#define asm_cnew(as, ir) ((void)0)
#endif
/* -- Write barriers ------------------------------------------------------ */
@@ -1142,7 +1255,7 @@ static void asm_tbar(ASMState *as, IRIns *ir)
emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist));
emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked));
emit_setgl(as, tab, gc.grayagain);
- lua_assert(LJ_GC_BLACK == 0x04);
+ lj_assertA(LJ_GC_BLACK == 0x04, "bad LJ_GC_BLACK");
emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */
emit_getgl(as, link, gc.grayagain);
emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
@@ -1157,7 +1270,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
MCLabel l_end;
Reg obj, val, tmp;
/* No need for other object barriers (yet). */
- lua_assert(IR(ir->op1)->o == IR_UREFC);
+ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
ra_evictset(as, RSET_SCRATCH);
l_end = emit_label(as);
args[0] = ASMREF_TMP1; /* global_State *g */
@@ -1178,6 +1291,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
/* -- Arithmetic and logic operations ------------------------------------- */
+#if !LJ_SOFTFP
static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi)
{
Reg dest = ra_dest(as, ir, RSET_FPR);
@@ -1196,31 +1310,24 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi)
emit_fb(as, pi, dest, left);
}
-static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
-{
- IRIns *irp = IR(ir->op1);
- if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
- IRIns *irpp = IR(irp->op1);
- if (irpp == ir-2 && irpp->o == IR_FPMATH &&
- irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
- IRRef args[2];
- args[0] = irpp->op1;
- args[1] = irp->op2;
- asm_setupresult(as, ir, ci);
- asm_gencall(as, ci, args);
- return 1;
- }
- }
- return 0;
+static void asm_fpmath(ASMState *as, IRIns *ir)
+{
+ if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
+ asm_fpunary(as, ir, PPCI_FSQRT);
+ else
+ asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
}
+#endif
static void asm_add(ASMState *as, IRIns *ir)
{
+#if !LJ_SOFTFP
if (irt_isnum(ir->t)) {
if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD))
asm_fparith(as, ir, PPCI_FADD);
- } else {
+ } else
+#endif
+ {
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
PPCIns pi;
@@ -1259,10 +1366,13 @@ static void asm_add(ASMState *as, IRIns *ir)
static void asm_sub(ASMState *as, IRIns *ir)
{
+#if !LJ_SOFTFP
if (irt_isnum(ir->t)) {
if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB))
asm_fparith(as, ir, PPCI_FSUB);
- } else {
+ } else
+#endif
+ {
PPCIns pi = PPCI_SUBF;
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg left, right;
@@ -1288,9 +1398,12 @@ static void asm_sub(ASMState *as, IRIns *ir)
static void asm_mul(ASMState *as, IRIns *ir)
{
+#if !LJ_SOFTFP
if (irt_isnum(ir->t)) {
asm_fparith(as, ir, PPCI_FMUL);
- } else {
+ } else
+#endif
+ {
PPCIns pi = PPCI_MULLW;
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
@@ -1312,11 +1425,16 @@ static void asm_mul(ASMState *as, IRIns *ir)
}
}
+#define asm_fpdiv(as, ir) asm_fparith(as, ir, PPCI_FDIV)
+
static void asm_neg(ASMState *as, IRIns *ir)
{
+#if !LJ_SOFTFP
if (irt_isnum(ir->t)) {
asm_fpunary(as, ir, PPCI_FNEG);
- } else {
+ } else
+#endif
+ {
Reg dest, left;
PPCIns pi = PPCI_NEG;
if (as->flagmcp == as->mcp) {
@@ -1330,6 +1448,8 @@ static void asm_neg(ASMState *as, IRIns *ir)
}
}
+#define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS)
+
static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
{
Reg dest, left, right;
@@ -1345,6 +1465,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi)
emit_tab(as, pi|PPCF_DOT, dest, left, right);
}
+#define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO)
+#define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO)
+#define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO)
+
#if LJ_HASFFI
static void asm_add64(ASMState *as, IRIns *ir)
{
@@ -1424,7 +1548,7 @@ static void asm_neg64(ASMState *as, IRIns *ir)
}
#endif
-static void asm_bitnot(ASMState *as, IRIns *ir)
+static void asm_bnot(ASMState *as, IRIns *ir)
{
Reg dest, left, right;
PPCIns pi = PPCI_NOR;
@@ -1451,7 +1575,7 @@ nofuse:
emit_asb(as, pi, dest, left, right);
}
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_bswap(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
IRIns *irx;
@@ -1472,32 +1596,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
}
}
-static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
-{
- Reg dest = ra_dest(as, ir, RSET_GPR);
- Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
- if (irref_isk(ir->op2)) {
- int32_t k = IR(ir->op2)->i;
- Reg tmp = left;
- if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
- if (!checku16(k)) {
- emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
- if ((k & 0xffff) == 0) return;
- }
- emit_asi(as, pik, dest, left, k);
- return;
- }
- }
- /* May fail due to spills/restores above, but simplifies the logic. */
- if (as->flagmcp == as->mcp) {
- as->flagmcp = NULL;
- as->mcp++;
- pi |= PPCF_DOT;
- }
- right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
- emit_asb(as, pi, dest, left, right);
-}
-
/* Fuse BAND with contiguous bitmask and a shift to rlwinm. */
static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref)
{
@@ -1528,7 +1626,7 @@ nofuse:
*--as->mcp = pi | PPCF_T(left);
}
-static void asm_bitand(ASMState *as, IRIns *ir)
+static void asm_band(ASMState *as, IRIns *ir)
{
Reg dest, left, right;
IRRef lref = ir->op1;
@@ -1583,6 +1681,35 @@ static void asm_bitand(ASMState *as, IRIns *ir)
emit_asb(as, PPCI_AND ^ dot, dest, left, right);
}
+static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
+{
+ Reg dest = ra_dest(as, ir, RSET_GPR);
+ Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
+ if (irref_isk(ir->op2)) {
+ int32_t k = IR(ir->op2)->i;
+ Reg tmp = left;
+ if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) {
+ if (!checku16(k)) {
+ emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16));
+ if ((k & 0xffff) == 0) return;
+ }
+ emit_asi(as, pik, dest, left, k);
+ return;
+ }
+ }
+ /* May fail due to spills/restores above, but simplifies the logic. */
+ if (as->flagmcp == as->mcp) {
+ as->flagmcp = NULL;
+ as->mcp++;
+ pi |= PPCF_DOT;
+ }
+ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
+ emit_asb(as, pi, dest, left, right);
+}
+
+#define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI)
+#define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI)
+
static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
{
Reg dest, left;
@@ -1608,9 +1735,48 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik)
}
}
+#define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0)
+#define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1)
+#define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI)
+#define asm_brol(as, ir) \
+ asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \
+ PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
+#define asm_bror(as, ir) lj_assertA(0, "unexpected BROR")
+
+#if LJ_SOFTFP
+static void asm_sfpmin_max(ASMState *as, IRIns *ir)
+{
+ CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp];
+ IRRef args[4];
+ MCLabel l_right, l_end;
+ Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR);
+ Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR);
+ Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR);
+ PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE;
+ righthi = (lefthi >> 8); lefthi &= 255;
+ rightlo = (leftlo >> 8); leftlo &= 255;
+ args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
+ args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
+ l_end = emit_label(as);
+ if (desthi != righthi) emit_mr(as, desthi, righthi);
+ if (destlo != rightlo) emit_mr(as, destlo, rightlo);
+ l_right = emit_label(as);
+ if (l_end != l_right) emit_jmp(as, l_end);
+ if (desthi != lefthi) emit_mr(as, desthi, lefthi);
+ if (destlo != leftlo) emit_mr(as, destlo, leftlo);
+ if (l_right == as->mcp+1) {
+ cond ^= 4; l_right = l_end; ++as->mcp;
+ }
+ emit_condbranch(as, PPCI_BC, cond, l_right);
+ ra_evictset(as, RSET_SCRATCH);
+ emit_cmpi(as, RID_RET, 1);
+ asm_gencall(as, &ci, args);
+}
+#endif
+
static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
{
- if (irt_isnum(ir->t)) {
+ if (!LJ_SOFTFP && irt_isnum(ir->t)) {
Reg dest = ra_dest(as, ir, RSET_FPR);
Reg tmp = dest;
Reg right, left = ra_alloc2(as, ir, RSET_FPR);
@@ -1618,9 +1784,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
if (tmp == left || tmp == right)
tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR,
dest), left), right));
- emit_facb(as, PPCI_FSEL, dest, tmp,
- ismax ? left : right, ismax ? right : left);
- emit_fab(as, PPCI_FSUB, tmp, left, right);
+ emit_facb(as, PPCI_FSEL, dest, tmp, left, right);
+ emit_fab(as, PPCI_FSUB, tmp, ismax ? left : right, ismax ? right : left);
} else {
Reg dest = ra_dest(as, ir, RSET_GPR);
Reg tmp1 = RID_TMP, tmp2 = dest;
@@ -1638,6 +1803,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
}
}
+#define asm_min(as, ir) asm_min_max(as, ir, 0)
+#define asm_max(as, ir) asm_min_max(as, ir, 1)
+
/* -- Comparisons --------------------------------------------------------- */
#define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */
@@ -1695,7 +1863,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc)
static void asm_comp(ASMState *as, IRIns *ir)
{
PPCCC cc = asm_compmap[ir->o];
- if (irt_isnum(ir->t)) {
+ if (!LJ_SOFTFP && irt_isnum(ir->t)) {
Reg right, left = ra_alloc2(as, ir, RSET_FPR);
right = (left >> 8); left &= 255;
asm_guardcc(as, (cc >> 4));
@@ -1714,6 +1882,46 @@ static void asm_comp(ASMState *as, IRIns *ir)
}
}
+#define asm_equal(as, ir) asm_comp(as, ir)
+
+#if LJ_SOFTFP
+/* SFP comparisons. */
+static void asm_sfpcomp(ASMState *as, IRIns *ir)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
+ RegSet drop = RSET_SCRATCH;
+ Reg r;
+ IRRef args[4];
+ args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
+ args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
+
+ for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) {
+ if (!rset_test(as->freeset, r) &&
+ regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
+ rset_clear(drop, r);
+ }
+ ra_evictset(as, drop);
+ asm_setupresult(as, ir, ci);
+ switch ((IROp)ir->o) {
+ case IR_ULT:
+ asm_guardcc(as, CC_EQ);
+ emit_ai(as, PPCI_CMPWI, RID_RET, 0);
+ case IR_ULE:
+ asm_guardcc(as, CC_EQ);
+ emit_ai(as, PPCI_CMPWI, RID_RET, 1);
+ break;
+ case IR_GE: case IR_GT:
+ asm_guardcc(as, CC_EQ);
+ emit_ai(as, PPCI_CMPWI, RID_RET, 2);
+ default:
+ asm_guardcc(as, (asm_compmap[ir->o] & 0xf));
+ emit_ai(as, PPCI_CMPWI, RID_RET, 0);
+ break;
+ }
+ asm_gencall(as, ci, args);
+}
+#endif
+
#if LJ_HASFFI
/* 64 bit integer comparisons. */
static void asm_comp64(ASMState *as, IRIns *ir)
@@ -1738,50 +1946,87 @@ static void asm_comp64(ASMState *as, IRIns *ir)
}
#endif
-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+/* -- Split register ops -------------------------------------------------- */
-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+/* Hiword op of a split 32/32 bit op. Previous op is be the loword op. */
static void asm_hiop(ASMState *as, IRIns *ir)
{
-#if LJ_HASFFI
/* HIOP is marked as a store because it needs its own DCE logic. */
int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+#if LJ_HASFFI || LJ_SOFTFP
if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
as->curins--; /* Always skip the CONV. */
+#if LJ_HASFFI && !LJ_SOFTFP
if (usehi || uselo)
asm_conv64(as, ir);
return;
+#endif
} else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
as->curins--; /* Always skip the loword comparison. */
+#if LJ_SOFTFP
+ if (!irt_isint(ir->t)) {
+ asm_sfpcomp(as, ir-1);
+ return;
+ }
+#endif
+#if LJ_HASFFI
asm_comp64(as, ir);
+#endif
return;
+#if LJ_SOFTFP
+ } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
+ as->curins--; /* Always skip the loword min/max. */
+ if (uselo || usehi)
+ asm_sfpmin_max(as, ir-1);
+ return;
+#endif
} else if ((ir-1)->o == IR_XSTORE) {
as->curins--; /* Handle both stores here. */
if ((ir-1)->r != RID_SINK) {
- asm_xstore(as, ir, 0);
- asm_xstore(as, ir-1, 4);
+ asm_xstore_(as, ir, 0);
+ asm_xstore_(as, ir-1, 4);
}
return;
}
+#endif
if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
switch ((ir-1)->o) {
+#if LJ_HASFFI
case IR_ADD: as->curins--; asm_add64(as, ir); break;
case IR_SUB: as->curins--; asm_sub64(as, ir); break;
case IR_NEG: as->curins--; asm_neg64(as, ir); break;
- case IR_CALLN:
- case IR_CALLXS:
+ case IR_CNEWI:
+ /* Nothing to do here. Handled by lo op itself. */
+ break;
+#endif
+#if LJ_SOFTFP
+ case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+ case IR_STRTO:
if (!uselo)
- ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
+ ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */
break;
- case IR_CNEWI:
+ case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF:
/* Nothing to do here. Handled by lo op itself. */
break;
- default: lua_assert(0); break;
- }
-#else
- UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */
#endif
+ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
+ if (!uselo)
+ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
+ break;
+ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
+ }
+}
+
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+ UNUSED(ir);
+ asm_guardcc(as, CC_NE);
+ emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE);
+ emit_lsglptr(as, PPCI_LBZ, RID_TMP,
+ (int32_t)offsetof(global_State, hookmask));
}
/* -- Stack handling ------------------------------------------------------ */
@@ -1805,7 +2050,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack));
if (pbase == RID_TMP)
emit_getgl(as, RID_TMP, jit_base);
- emit_getgl(as, tmp, jit_L);
+ emit_getgl(as, tmp, cur_L);
if (allow == RSET_EMPTY) /* Spill temp. register. */
emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW);
}
@@ -1826,12 +2071,25 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
if ((sn & SNAP_NORESTORE))
continue;
if (irt_isnum(ir->t)) {
+#if LJ_SOFTFP
+ Reg tmp;
+ RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
+ /* LJ_SOFTFP: must be a number constant. */
+ lj_assertA(irref_isk(ref), "unsplit FP op");
+ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow);
+ emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0));
+ if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
+ tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
+ emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
+#else
Reg src = ra_alloc1(as, ref, RSET_FPR);
emit_fai(as, PPCI_STFD, src, RID_BASE, ofs);
+#endif
} else {
Reg type;
RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
- lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
+ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t),
+ "restore of IR type %d", irt_type(ir->t));
if (!irt_ispri(ir->t)) {
Reg src = ra_alloc1(as, ref, allow);
rset_clear(allow, src);
@@ -1840,6 +2098,12 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
if ((sn & (SNAP_CONT|SNAP_FRAME))) {
if (s == 0) continue; /* Do not overwrite link to previous frame. */
type = ra_allock(as, (int32_t)(*flinks--), allow);
+#if LJ_SOFTFP
+ } else if ((sn & SNAP_SOFTFPNUM)) {
+ type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
+#endif
+ } else if ((sn & SNAP_KEYINDEX)) {
+ type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow);
} else {
type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
}
@@ -1847,7 +2111,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap)
}
checkmclim(as);
}
- lua_assert(map + nent == flinks);
+ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
}
/* -- GC handling --------------------------------------------------------- */
@@ -1898,6 +2162,12 @@ static void asm_loop_fixup(ASMState *as)
}
}
+/* Fixup the tail of the loop. */
+static void asm_loop_tail_fixup(ASMState *as)
+{
+ UNUSED(as); /* Nothing to do. */
+}
+
/* -- Head of trace ------------------------------------------------------- */
/* Coalesce BASE register for a root trace. */
@@ -1949,7 +2219,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
as->mctop = p;
} else {
/* Patch stack adjustment. */
- lua_assert(checki16(CFRAME_SIZE+spadj));
+ lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range");
p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj);
p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj;
}
@@ -1970,147 +2240,25 @@ static void asm_tail_prep(ASMState *as)
}
}
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
- switch ((IROp)ir->o) {
- /* Miscellaneous ops. */
- case IR_LOOP: asm_loop(as); break;
- case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
- case IR_USE:
- ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
- case IR_PHI: asm_phi(as, ir); break;
- case IR_HIOP: asm_hiop(as, ir); break;
- case IR_GCSTEP: asm_gcstep(as, ir); break;
-
- /* Guarded assertions. */
- case IR_EQ: case IR_NE:
- if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) {
- as->curins--;
- asm_href(as, ir-1, (IROp)ir->o);
- break;
- }
- /* fallthrough */
- case IR_LT: case IR_GE: case IR_LE: case IR_GT:
- case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
- case IR_ABC:
- asm_comp(as, ir);
- break;
-
- case IR_RETF: asm_retf(as, ir); break;
-
- /* Bit ops. */
- case IR_BNOT: asm_bitnot(as, ir); break;
- case IR_BSWAP: asm_bitswap(as, ir); break;
-
- case IR_BAND: asm_bitand(as, ir); break;
- case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break;
- case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break;
-
- case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break;
- case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break;
- case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break;
- case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31),
- PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break;
- case IR_BROR: lua_assert(0); break;
-
- /* Arithmetic ops. */
- case IR_ADD: asm_add(as, ir); break;
- case IR_SUB: asm_sub(as, ir); break;
- case IR_MUL: asm_mul(as, ir); break;
- case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break;
- case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
- case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
- case IR_NEG: asm_neg(as, ir); break;
-
- case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break;
- case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
- case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
- case IR_MIN: asm_min_max(as, ir, 0); break;
- case IR_MAX: asm_min_max(as, ir, 1); break;
- case IR_FPMATH:
- if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
- break;
- if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT))
- asm_fpunary(as, ir, PPCI_FSQRT);
- else
- asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
- break;
-
- /* Overflow-checking arithmetic ops. */
- case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break;
- case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break;
- case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break;
-
- /* Memory references. */
- case IR_AREF: asm_aref(as, ir); break;
- case IR_HREF: asm_href(as, ir, 0); break;
- case IR_HREFK: asm_hrefk(as, ir); break;
- case IR_NEWREF: asm_newref(as, ir); break;
- case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
- case IR_FREF: asm_fref(as, ir); break;
- case IR_STRREF: asm_strref(as, ir); break;
-
- /* Loads and stores. */
- case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
- asm_ahuvload(as, ir);
- break;
- case IR_FLOAD: asm_fload(as, ir); break;
- case IR_XLOAD: asm_xload(as, ir); break;
- case IR_SLOAD: asm_sload(as, ir); break;
-
- case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
- case IR_FSTORE: asm_fstore(as, ir); break;
- case IR_XSTORE: asm_xstore(as, ir, 0); break;
-
- /* Allocations. */
- case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
- case IR_TNEW: asm_tnew(as, ir); break;
- case IR_TDUP: asm_tdup(as, ir); break;
- case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
- /* Write barriers. */
- case IR_TBAR: asm_tbar(as, ir); break;
- case IR_OBAR: asm_obar(as, ir); break;
-
- /* Type conversions. */
- case IR_CONV: asm_conv(as, ir); break;
- case IR_TOBIT: asm_tobit(as, ir); break;
- case IR_TOSTR: asm_tostr(as, ir); break;
- case IR_STRTO: asm_strto(as, ir); break;
-
- /* Calls. */
- case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
- case IR_CALLXS: asm_callx(as, ir); break;
- case IR_CARG: break;
-
- default:
- setintV(&as->J->errinfo, ir->o);
- lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
- break;
- }
-}
-
/* -- Trace setup --------------------------------------------------------- */
/* Ensure there are enough stack slots for call arguments. */
static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
{
IRRef args[CCI_NARGS_MAX*2];
- uint32_t i, nargs = (int)CCI_NARGS(ci);
+ uint32_t i, nargs = CCI_XNARGS(ci);
int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
asm_collectargs(as, ir, ci, args);
for (i = 0; i < nargs; i++)
- if (args[i] && irt_isfp(IR(args[i])->t)) {
+ if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) {
if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1;
} else {
if (ngpr > 0) ngpr--; else nslots++;
}
if (nslots > as->evenspill) /* Leave room for args in stack slots. */
as->evenspill = nslots;
- return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
+ return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) :
+ REGSP_HINT(RID_RET);
}
static void asm_setup_target(ASMState *as)
@@ -2150,7 +2298,8 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
} else if ((ins & 0xfc000000u) == PPCI_B &&
((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) {
ptrdiff_t delta = (char *)target - (char *)p;
- lua_assert(((delta + 0x02000000) >> 26) == 0);
+ lj_assertJ(((delta + 0x02000000) >> 26) == 0,
+ "branch target out of range");
*p = PPCI_B | ((uint32_t)delta & 0x03ffffffu);
if (!cstart) cstart = p;
}
@@ -2158,7 +2307,8 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
/* Always patch long-range branch in exit stub itself. Except, if we can't. */
if (patchlong) {
ptrdiff_t delta = (char *)target - (char *)px - clearso;
- lua_assert(((delta + 0x02000000) >> 26) == 0);
+ lj_assertJ(((delta + 0x02000000) >> 26) == 0,
+ "branch target out of range");
*px = PPCI_B | ((uint32_t)delta & 0x03ffffffu);
}
if (!cstart) cstart = px;
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index 8b529086..2bf9d939 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -21,15 +21,17 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group)
}
/* Push the high byte of the exitno for each exit stub group. */
*mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8);
+#if !LJ_GC64
/* Store DISPATCH at original stack slot 0. Account for the two push ops. */
*mxp++ = XI_MOVmi;
*mxp++ = MODRM(XM_OFS8, 0, RID_ESP);
*mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
*mxp++ = 2*sizeof(void *);
*(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4;
+#endif
/* Jump to exit handler which fills in the ExitState. */
*mxp++ = XI_JMP; mxp += 4;
- *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler);
+ *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, (MCode *)(void *)lj_vm_exit_handler);
/* Commit the code for this group (even if assembly fails later on). */
lj_mcode_commitbot(as->J, mxp);
as->mcbot = mxp;
@@ -58,14 +60,18 @@ static void asm_guardcc(ASMState *as, int cc)
MCode *p = as->mcp;
if (LJ_UNLIKELY(p == as->invmcp)) {
as->loopinv = 1;
- *(int32_t *)(p+1) = jmprel(p+5, target);
+ *(int32_t *)(p+1) = jmprel(as->J, p+5, target);
target = p;
cc ^= 1;
if (as->realign) {
+ if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP))
+ as->mrm.ofs += 2; /* Fixup RIP offset for pending fused load. */
emit_sjcc(as, cc, target);
return;
}
}
+ if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP))
+ as->mrm.ofs += 6; /* Fixup RIP offset for pending fused load. */
emit_jcc(as, cc, target);
}
@@ -79,6 +85,15 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
{
if (irref_isk(ref)) {
IRIns *ir = IR(ref);
+#if LJ_GC64
+ if (ir->o == IR_KNULL || !irt_is64(ir->t)) {
+ *k = ir->i;
+ return 1;
+ } else if (checki32((int64_t)ir_k64(ir)->u64)) {
+ *k = (int32_t)ir_k64(ir)->u64;
+ return 1;
+ }
+#else
if (ir->o != IR_KINT64) {
*k = ir->i;
return 1;
@@ -86,6 +101,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
*k = (int32_t)ir_kint64(ir)->u64;
return 1;
}
+#endif
}
return 0;
}
@@ -115,7 +131,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
as->mrm.ofs = 0;
if (irb->o == IR_FLOAD) {
IRIns *ira = IR(irb->op1);
- lua_assert(irb->op2 == IRFL_TAB_ARRAY);
+ lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY");
/* We can avoid the FLOAD of t->array for colocated arrays. */
if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
!neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) {
@@ -134,7 +150,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow)
{
IRIns *irx;
- lua_assert(ir->o == IR_AREF);
+ lj_assertA(ir->o == IR_AREF, "expected AREF");
as->mrm.base = (uint8_t)ra_alloc1(as, asm_fuseabase(as, ir->op1), allow);
irx = IR(ir->op2);
if (irref_isk(ir->op2)) {
@@ -185,14 +201,32 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
+#if LJ_GC64
+ int64_t ofs = dispofs(as, &uv->tv);
+ if (checki32(ofs) && checki32(ofs+4)) {
+ as->mrm.ofs = (int32_t)ofs;
+ as->mrm.base = RID_DISPATCH;
+ as->mrm.idx = RID_NONE;
+ return;
+ }
+#else
as->mrm.ofs = ptr2addr(&uv->tv);
as->mrm.base = as->mrm.idx = RID_NONE;
return;
+#endif
}
break;
+ case IR_TMPREF:
+#if LJ_GC64
+ as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->tmptv);
+ as->mrm.base = RID_DISPATCH;
+ as->mrm.idx = RID_NONE;
+#else
+ as->mrm.ofs = igcptr(&J2G(as->J)->tmptv);
+ as->mrm.base = as->mrm.idx = RID_NONE;
+#endif
+ return;
default:
- lua_assert(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO ||
- ir->o == IR_KKPTR);
break;
}
}
@@ -204,26 +238,53 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow)
/* Fuse FLOAD/FREF reference into memory operand. */
static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow)
{
- lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF);
- as->mrm.ofs = field_ofs[ir->op2];
+ lj_assertA(ir->o == IR_FLOAD || ir->o == IR_FREF,
+ "bad IR op %d", ir->o);
as->mrm.idx = RID_NONE;
+ if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */
+#if LJ_GC64
+ as->mrm.ofs = (int32_t)(ir->op2 << 2) - GG_OFS(dispatch);
+ as->mrm.base = RID_DISPATCH;
+#else
+ as->mrm.ofs = (int32_t)(ir->op2 << 2) + ptr2addr(J2GG(as->J));
+ as->mrm.base = RID_NONE;
+#endif
+ return;
+ }
+ as->mrm.ofs = field_ofs[ir->op2];
if (irref_isk(ir->op1)) {
- as->mrm.ofs += IR(ir->op1)->i;
+ IRIns *op1 = IR(ir->op1);
+#if LJ_GC64
+ if (ir->op1 == REF_NIL) {
+ as->mrm.ofs -= GG_OFS(dispatch);
+ as->mrm.base = RID_DISPATCH;
+ return;
+ } else if (op1->o == IR_KPTR || op1->o == IR_KKPTR) {
+ intptr_t ofs = dispofs(as, ir_kptr(op1));
+ if (checki32(as->mrm.ofs + ofs)) {
+ as->mrm.ofs += (int32_t)ofs;
+ as->mrm.base = RID_DISPATCH;
+ return;
+ }
+ }
+#else
+ as->mrm.ofs += op1->i;
as->mrm.base = RID_NONE;
- } else {
- as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
+ return;
+#endif
}
+ as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow);
}
/* Fuse string reference into memory operand. */
static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
{
IRIns *irr;
- lua_assert(ir->o == IR_STRREF);
+ lj_assertA(ir->o == IR_STRREF, "bad IR op %d", ir->o);
as->mrm.base = as->mrm.idx = RID_NONE;
as->mrm.scale = XM_SCALE1;
as->mrm.ofs = sizeof(GCstr);
- if (irref_isk(ir->op1)) {
+ if (!LJ_GC64 && irref_isk(ir->op1)) {
as->mrm.ofs += IR(ir->op1)->i;
} else {
Reg r = ra_alloc1(as, ir->op1, allow);
@@ -255,10 +316,20 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
IRIns *ir = IR(ref);
as->mrm.idx = RID_NONE;
if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
+#if LJ_GC64
+ intptr_t ofs = dispofs(as, ir_kptr(ir));
+ if (checki32(ofs)) {
+ as->mrm.ofs = (int32_t)ofs;
+ as->mrm.base = RID_DISPATCH;
+ return;
+ }
+ } if (0) {
+#else
as->mrm.ofs = ir->i;
as->mrm.base = RID_NONE;
} else if (ir->o == IR_STRREF) {
asm_fusestrref(as, ir, allow);
+#endif
} else {
as->mrm.ofs = 0;
if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) {
@@ -301,7 +372,47 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow)
}
}
-/* Fuse load into memory operand. */
+/* Fuse load of 64 bit IR constant into memory operand. */
+static Reg asm_fuseloadk64(ASMState *as, IRIns *ir)
+{
+ const uint64_t *k = &ir_k64(ir)->u64;
+ if (!LJ_GC64 || checki32((intptr_t)k)) {
+ as->mrm.ofs = ptr2addr(k);
+ as->mrm.base = RID_NONE;
+#if LJ_GC64
+ } else if (checki32(dispofs(as, k))) {
+ as->mrm.ofs = (int32_t)dispofs(as, k);
+ as->mrm.base = RID_DISPATCH;
+ } else if (checki32(mcpofs(as, k)) && checki32(mcpofs(as, k+1)) &&
+ checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) {
+ as->mrm.ofs = (int32_t)mcpofs(as, k);
+ as->mrm.base = RID_RIP;
+ } else { /* Intern 64 bit constant at bottom of mcode. */
+ if (ir->i) {
+ lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i),
+ "bad interned 64 bit constant");
+ } else {
+ while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
+ *(uint64_t*)as->mcbot = *k;
+ ir->i = (int32_t)(as->mctop - as->mcbot);
+ as->mcbot += 8;
+ as->mclim = as->mcbot + MCLIM_REDZONE;
+ lj_mcode_commitbot(as->J, as->mcbot);
+ }
+ as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i);
+ as->mrm.base = RID_RIP;
+#endif
+ }
+ as->mrm.idx = RID_NONE;
+ return RID_MRM;
+}
+
+/* Fuse load into memory operand.
+**
+** Important caveat: this may emit RIP-relative loads! So don't place any
+** code emitters between this function and the use of its result.
+** The only permitted exception is asm_guardcc().
+*/
static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
{
IRIns *ir = IR(ref);
@@ -319,27 +430,36 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
}
if (ir->o == IR_KNUM) {
RegSet avail = as->freeset & ~as->modset & RSET_FPR;
- lua_assert(allow != RSET_EMPTY);
- if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
- as->mrm.ofs = ptr2addr(ir_knum(ir));
- as->mrm.base = as->mrm.idx = RID_NONE;
- return RID_MRM;
- }
+ lj_assertA(allow != RSET_EMPTY, "no register allowed");
+ if (!(avail & (avail-1))) /* Fuse if less than two regs available. */
+ return asm_fuseloadk64(as, ir);
} else if (ref == REF_BASE || ir->o == IR_KINT64) {
RegSet avail = as->freeset & ~as->modset & RSET_GPR;
- lua_assert(allow != RSET_EMPTY);
+ lj_assertA(allow != RSET_EMPTY, "no register allowed");
if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
- as->mrm.ofs = ptr2addr(ref == REF_BASE ? (void *)&J2G(as->J)->jit_base : (void *)ir_kint64(ir));
- as->mrm.base = as->mrm.idx = RID_NONE;
- return RID_MRM;
+ if (ref == REF_BASE) {
+#if LJ_GC64
+ as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->jit_base);
+ as->mrm.base = RID_DISPATCH;
+#else
+ as->mrm.ofs = ptr2addr(&J2G(as->J)->jit_base);
+ as->mrm.base = RID_NONE;
+#endif
+ as->mrm.idx = RID_NONE;
+ return RID_MRM;
+ } else {
+ return asm_fuseloadk64(as, ir);
+ }
}
} else if (mayfuse(as, ref)) {
RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
if (ir->o == IR_SLOAD) {
if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
- noconflict(as, ref, IR_RETF, 0)) {
+ noconflict(as, ref, IR_RETF, 0) &&
+ !(LJ_GC64 && irt_isaddr(ir->t))) {
as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
- as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0);
+ as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
+ (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
as->mrm.idx = RID_NONE;
return RID_MRM;
}
@@ -351,7 +471,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
return RID_MRM;
}
} else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
- if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) {
+ if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) &&
+ !(LJ_GC64 && irt_isaddr(ir->t))) {
asm_fuseahuref(as, ir->op1, xallow);
return RID_MRM;
}
@@ -364,11 +485,17 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
asm_fusexref(as, ir->op1, xallow);
return RID_MRM;
}
- } else if (ir->o == IR_VLOAD) {
+ } else if (ir->o == IR_VLOAD && IR(ir->op1)->o == IR_AREF &&
+ !(LJ_GC64 && irt_isaddr(ir->t))) {
asm_fuseahuref(as, ir->op1, xallow);
+ as->mrm.ofs += 8 * ir->op2;
return RID_MRM;
}
}
+ if (ir->o == IR_FLOAD && ir->op1 == REF_NIL) {
+ asm_fusefref(as, ir, RSET_EMPTY);
+ return RID_MRM;
+ }
if (!(as->freeset & allow) && !emit_canremat(ref) &&
(allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref)))
goto fusespill;
@@ -392,7 +519,7 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64)
/* Count the required number of stack slots for a call. */
static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
{
- uint32_t i, nargs = CCI_NARGS(ci);
+ uint32_t i, nargs = CCI_XNARGS(ci);
int nslots = 0;
#if LJ_64
if (LJ_ABI_WIN) {
@@ -425,7 +552,7 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args)
/* Generate a call to a C function. */
static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
{
- uint32_t n, nargs = CCI_NARGS(ci);
+ uint32_t n, nargs = CCI_XNARGS(ci);
int32_t ofs = STACKARG_OFS;
#if LJ_64
uint32_t gprs = REGARG_GPRS;
@@ -485,13 +612,14 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
if (r) { /* Argument is in a register. */
if (r < RID_MAX_GPR && ref < ASMREF_TMP1) {
#if LJ_64
- if (ir->o == IR_KINT64)
- emit_loadu64(as, r, ir_kint64(ir)->u64);
+ if (LJ_GC64 ? !(ir->o == IR_KINT || ir->o == IR_KNULL) : ir->o == IR_KINT64)
+ emit_loadu64(as, r, ir_k64(ir)->u64);
else
#endif
emit_loadi(as, r, ir->i);
} else {
- lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */
+ /* Must have been evicted. */
+ lj_assertA(rset_test(as->freeset, r), "reg %d not free", r);
if (ra_hasreg(ir->r)) {
ra_noweak(as, ir->r);
emit_movrr(as, ir, r, ir->r);
@@ -500,7 +628,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
}
}
} else if (irt_isfp(ir->t)) { /* FP argument is on stack. */
- lua_assert(!(irt_isfloat(ir->t) && irref_isk(ref))); /* No float k. */
+ lj_assertA(!(irt_isfloat(ir->t) && irref_isk(ref)),
+ "unexpected float constant");
if (LJ_32 && (ofs & 4) && irref_isk(ref)) {
/* Split stores for unaligned FP consts. */
emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
@@ -531,7 +660,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
{
RegSet drop = RSET_SCRATCH;
- int hiop = (LJ_32 && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
+ int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
if ((ci->flags & CCI_NOFPRCLOBBER))
drop &= ~RSET_FPR;
if (ra_hasreg(ir->r))
@@ -560,7 +689,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
if (ra_hasreg(dest)) {
ra_free(as, dest);
ra_modified(as, dest);
- emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS,
+ emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS,
dest, RID_ESP, ofs);
}
if ((ci->flags & CCI_CASTU64)) {
@@ -571,12 +700,10 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
}
#endif
-#if LJ_32
} else if (hiop) {
ra_destpair(as, ir);
-#endif
} else {
- lua_assert(!irt_ispri(ir->t));
+ lj_assertA(!irt_ispri(ir->t), "PRI dest");
ra_destreg(as, ir, RID_RET);
}
} else if (LJ_32 && irt_isfp(ir->t) && !(ci->flags & CCI_CASTU64)) {
@@ -584,15 +711,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
}
}
-static void asm_call(ASMState *as, IRIns *ir)
-{
- IRRef args[CCI_NARGS_MAX];
- const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
- asm_collectargs(as, ir, ci, args);
- asm_setupresult(as, ir, ci);
- asm_gencall(as, ci, args);
-}
-
/* Return a constant function pointer or NULL for indirect calls. */
static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func)
{
@@ -651,16 +769,39 @@ static void asm_callx(ASMState *as, IRIns *ir)
static void asm_retf(ASMState *as, IRIns *ir)
{
Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
+#if LJ_FR2
+ Reg rpc = ra_scratch(as, rset_exclude(RSET_GPR, base));
+#endif
void *pc = ir_kptr(IR(ir->op2));
- int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
+ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
as->topslot -= (BCReg)delta;
if ((int32_t)as->topslot < 0) as->topslot = 0;
irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */
emit_setgl(as, base, jit_base);
emit_addptr(as, base, -8*delta);
asm_guardcc(as, CC_NE);
+#if LJ_FR2
+ emit_rmro(as, XO_CMP, rpc|REX_GC64, base, -8);
+ emit_loadu64(as, rpc, u64ptr(pc));
+#else
emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc));
+#endif
+}
+
+/* -- Buffer operations --------------------------------------------------- */
+
+#if LJ_HASBUFFER
+static void asm_bufhdr_write(ASMState *as, Reg sb)
+{
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb));
+ IRIns irgc;
+ irgc.ot = IRT(0, IRT_PGC); /* GC type. */
+ emit_storeofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
+ emit_opgl(as, XO_ARITH(XOg_OR), tmp|REX_GC64, cur_L);
+ emit_gri(as, XG_ARITHi(XOg_AND), tmp, SBUF_MASK_FLAG);
+ emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L));
}
+#endif
/* -- Type conversions ---------------------------------------------------- */
@@ -672,8 +813,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
asm_guardcc(as, CC_NE);
emit_rr(as, XO_UCOMISD, left, tmp);
emit_rr(as, XO_CVTSI2SD, tmp, dest);
- if (!(as->flags & JIT_F_SPLIT_XMM))
- emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
+ emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */
emit_rr(as, XO_CVTTSD2SI, dest, left);
/* Can't fuse since left is needed twice. */
}
@@ -684,8 +824,9 @@ static void asm_tobit(ASMState *as, IRIns *ir)
Reg tmp = ra_noreg(IR(ir->op1)->r) ?
ra_alloc1(as, ir->op1, RSET_FPR) :
ra_scratch(as, RSET_FPR);
- Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp));
+ Reg right;
emit_rr(as, XO_MOVDto, tmp, dest);
+ right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp));
emit_mrm(as, XO_ADDSD, tmp, right);
ra_left(as, tmp, ir->op1);
}
@@ -696,8 +837,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
int st64 = (st == IRT_I64 || st == IRT_U64 || (LJ_64 && st == IRT_P64));
int stfp = (st == IRT_NUM || st == IRT_FLOAT);
IRRef lref = ir->op1;
- lua_assert(irt_type(ir->t) != st);
- lua_assert(!(LJ_32 && (irt_isint64(ir->t) || st64))); /* Handled by SPLIT. */
+ lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV");
+ lj_assertA(!(LJ_32 && (irt_isint64(ir->t) || st64)),
+ "IR %04d has unsplit 64 bit type",
+ (int)(ir - as->ir) - REF_BIAS);
if (irt_isfp(ir->t)) {
Reg dest = ra_dest(as, ir, RSET_FPR);
if (stfp) { /* FP to FP conversion. */
@@ -706,13 +849,13 @@ static void asm_conv(ASMState *as, IRIns *ir)
if (left == dest) return; /* Avoid the XO_XORPS. */
} else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */
/* number = (2^52+2^51 .. u32) - (2^52+2^51) */
- cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000));
+ cTValue *k = &as->J->k64[LJ_K64_TOBIT];
Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest));
if (irt_isfloat(ir->t))
emit_rr(as, XO_CVTSD2SS, dest, dest);
emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */
emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */
- emit_loadn(as, bias, k);
+ emit_rma(as, XO_MOVSD, bias, k);
emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR));
return;
} else { /* Integer to FP conversion. */
@@ -721,7 +864,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
asm_fuseloadm(as, lref, RSET_GPR, st64);
if (LJ_64 && st == IRT_U64) {
MCLabel l_end = emit_label(as);
- const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000));
+ cTValue *k = &as->J->k64[LJ_K64_2P64];
emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */
emit_sjcc(as, CC_NS, l_end);
emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */
@@ -729,18 +872,16 @@ static void asm_conv(ASMState *as, IRIns *ir)
emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS,
dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left);
}
- if (!(as->flags & JIT_F_SPLIT_XMM))
- emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
+ emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */
} else if (stfp) { /* FP to integer conversion. */
if (irt_isguard(ir->t)) {
/* Checked conversions are only supported from number to int. */
- lua_assert(irt_isint(ir->t) && st == IRT_NUM);
+ lj_assertA(irt_isint(ir->t) && st == IRT_NUM,
+ "bad type for checked CONV");
asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
} else {
Reg dest = ra_dest(as, ir, RSET_GPR);
- x86Op op = st == IRT_NUM ?
- ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) :
- ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI);
+ x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
/* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
/* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
@@ -751,30 +892,27 @@ static void asm_conv(ASMState *as, IRIns *ir)
emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000);
emit_rr(as, op, dest|REX_64, tmp);
if (st == IRT_NUM)
- emit_rma(as, XO_ADDSD, tmp, lj_ir_k64_find(as->J,
- LJ_64 ? U64x(c3f00000,00000000) : U64x(c1e00000,00000000)));
+ emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]);
else
- emit_rma(as, XO_ADDSS, tmp, lj_ir_k64_find(as->J,
- LJ_64 ? U64x(00000000,df800000) : U64x(00000000,cf000000)));
+ emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]);
emit_sjcc(as, CC_NS, l_end);
emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */
emit_rr(as, op, dest|REX_64, tmp);
ra_left(as, tmp, lref);
} else {
- Reg left = asm_fuseload(as, lref, RSET_FPR);
if (LJ_64 && irt_isu32(ir->t))
emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */
emit_mrm(as, op,
dest|((LJ_64 &&
(irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0),
- left);
+ asm_fuseload(as, lref, RSET_FPR));
}
}
} else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
Reg left, dest = ra_dest(as, ir, RSET_GPR);
RegSet allow = RSET_GPR;
x86Op op;
- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
+ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT");
if (st == IRT_I8) {
op = XO_MOVSXb; allow = RSET_GPR8; dest |= FORCE_REX;
} else if (st == IRT_U8) {
@@ -808,7 +946,7 @@ static void asm_conv(ASMState *as, IRIns *ir)
}
} else {
Reg dest = ra_dest(as, ir, RSET_GPR);
- if (st64) {
+ if (st64 && !(ir->op2 & IRCONV_NONE)) {
Reg left = asm_fuseload(as, lref, RSET_GPR);
/* This is either a 32 bit reg/reg mov which zeroes the hiword
** or a load of the loword from a 64 bit address.
@@ -834,20 +972,18 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir)
if (ra_hasreg(dest)) {
ra_free(as, dest);
ra_modified(as, dest);
- emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS,
- dest, RID_ESP, ofs);
+ emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs);
}
emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd,
irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs);
if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) {
/* For inputs in [2^63,2^64-1] add 2^64 to compensate. */
MCLabel l_end = emit_label(as);
- emit_rma(as, XO_FADDq, XOg_FADDq,
- lj_ir_k64_find(as->J, U64x(43f00000,00000000)));
+ emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_2P64]);
emit_sjcc(as, CC_NS, l_end);
emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */
} else {
- lua_assert(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64);
+ lj_assertA(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64, "bad type for CONV");
}
emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0);
/* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */
@@ -861,9 +997,8 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
Reg lo, hi;
- lua_assert(st == IRT_NUM || st == IRT_FLOAT);
- lua_assert(dt == IRT_I64 || dt == IRT_U64);
- lua_assert(((ir-1)->op2 & IRCONV_TRUNC));
+ lj_assertA(st == IRT_NUM || st == IRT_FLOAT, "bad type for CONV");
+ lj_assertA(dt == IRT_I64 || dt == IRT_U64, "bad type for CONV");
hi = ra_dest(as, ir, RSET_GPR);
lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi));
if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0);
@@ -884,8 +1019,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
else
emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0);
- emit_rma(as, XO_FADDq, XOg_FADDq,
- lj_ir_k64_find(as->J, U64x(c3f00000,00000000)));
+ emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]);
emit_sjcc(as, CC_NS, l_pop);
emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */
}
@@ -906,6 +1040,14 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
st == IRT_NUM ? XOg_FLDq: XOg_FLDd,
asm_fuseload(as, ir->op1, RSET_EMPTY));
}
+
+static void asm_conv64(ASMState *as, IRIns *ir)
+{
+ if (irt_isfp(ir->t))
+ asm_conv_fp_int64(as, ir);
+ else
+ asm_conv_int64_fp(as, ir);
+}
#endif
static void asm_strto(ASMState *as, IRIns *ir)
@@ -927,54 +1069,61 @@ static void asm_strto(ASMState *as, IRIns *ir)
RID_ESP, sps_scale(ir->s));
}
-static void asm_tostr(ASMState *as, IRIns *ir)
+/* -- Memory references --------------------------------------------------- */
+
+/* Get pointer to TValue. */
+static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode)
{
- IRIns *irl = IR(ir->op1);
- IRRef args[2];
- args[0] = ASMREF_L;
- as->gcsteps++;
- if (irt_isnum(irl->t)) {
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
- args[1] = ASMREF_TMP1; /* const lua_Number * */
- asm_setupresult(as, ir, ci); /* GCstr * */
- asm_gencall(as, ci, args);
- emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64,
- RID_ESP, ra_spill(as, irl));
- } else {
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
- args[1] = ir->op1; /* int32_t k */
- asm_setupresult(as, ir, ci); /* GCstr * */
- asm_gencall(as, ci, args);
+ if ((mode & IRTMPREF_IN1)) {
+ IRIns *ir = IR(ref);
+ if (irt_isnum(ir->t)) {
+ if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) {
+ /* Use the number constant itself as a TValue. */
+ emit_loada(as, dest, ir_knum(ir));
+ return;
+ }
+ emit_rmro(as, XO_MOVSDto, ra_alloc1(as, ref, RSET_FPR), dest, 0);
+ } else {
+#if LJ_GC64
+ if (irref_isk(ref)) {
+ TValue k;
+ lj_ir_kvalue(as->J->L, &k, ir);
+ emit_movmroi(as, dest, 4, k.u32.hi);
+ emit_movmroi(as, dest, 0, k.u32.lo);
+ } else {
+ /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
+ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
+ if (irt_is64(ir->t)) {
+ emit_u32(as, irt_toitype(ir->t) << 15);
+ emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4);
+ } else {
+ emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15));
+ }
+ emit_movtomro(as, REX_64IR(ir, src), dest, 0);
+ }
+#else
+ if (!irref_isk(ref)) {
+ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest));
+ emit_movtomro(as, REX_64IR(ir, src), dest, 0);
+ } else if (!irt_ispri(ir->t)) {
+ emit_movmroi(as, dest, 0, ir->i);
+ }
+ if (!(LJ_64 && irt_islightud(ir->t)))
+ emit_movmroi(as, dest, 4, irt_toitype(ir->t));
+#endif
+ }
}
+ emit_loada(as, dest, &J2G(as->J)->tmptv); /* g->tmptv holds the TValue(s). */
}
-/* -- Memory references --------------------------------------------------- */
-
static void asm_aref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
asm_fusearef(as, ir, RSET_GPR);
if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0))
- emit_mrm(as, XO_LEA, dest, RID_MRM);
+ emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM);
else if (as->mrm.base != dest)
- emit_rr(as, XO_MOV, dest, as->mrm.base);
-}
-
-/* Merge NE(HREF, niltv) check. */
-static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
-{
- /* Assumes nothing else generates NE of HREF. */
- if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins &&
- ra_hasreg(ir->r)) {
- MCode *p = as->mcp;
- p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6;
- /* Ensure no loop branch inversion happened. */
- if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) {
- as->mcp = p; /* Kill cmp reg, imm32 + jz exit. */
- return p + *(int32_t *)(p-4); /* Return exit address. */
- }
- }
- return NULL;
+ emit_rr(as, XO_MOV, dest|REX_GC64, as->mrm.base);
}
/* Inlined hash lookup. Specialized for key type and for const keys.
@@ -985,10 +1134,10 @@ static MCode *merge_href_niltv(ASMState *as, IRIns *ir)
** } while ((n = nextnode(n)));
** return niltv(L);
*/
-static void asm_href(ASMState *as, IRIns *ir)
+static void asm_href(ASMState *as, IRIns *ir, IROp merge)
{
- MCode *nilexit = merge_href_niltv(as, ir); /* Do this before any restores. */
RegSet allow = RSET_GPR;
+ int destused = ra_used(ir);
Reg dest = ra_dest(as, ir, allow);
Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
Reg key = RID_NONE, tmp = RID_NONE;
@@ -1001,28 +1150,26 @@ static void asm_href(ASMState *as, IRIns *ir)
if (!isk) {
rset_clear(allow, tab);
key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
- if (!irt_isstr(kt))
+ if (LJ_GC64 || !irt_isstr(kt))
tmp = ra_scratch(as, rset_exclude(allow, key));
}
- /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */
+ /* Key not found in chain: jump to exit (if merged) or load niltv. */
l_end = emit_label(as);
- if (nilexit && ir[1].o == IR_NE) {
- emit_jcc(as, CC_E, nilexit); /* XI_JMP is not found by lj_asm_patchexit. */
- nilexit = NULL;
- } else {
+ if (merge == IR_NE)
+ asm_guardcc(as, CC_E); /* XI_JMP is not found by lj_asm_patchexit. */
+ else if (destused)
emit_loada(as, dest, niltvg(J2G(as->J)));
- }
/* Follow hash chain until the end. */
l_loop = emit_sjcc_label(as, CC_NZ);
- emit_rr(as, XO_TEST, dest, dest);
- emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next));
+ emit_rr(as, XO_TEST, dest|REX_GC64, dest);
+ emit_rmro(as, XO_MOV, dest|REX_GC64, dest, offsetof(Node, next));
l_next = emit_label(as);
/* Type and value comparison. */
- if (nilexit)
- emit_jcc(as, CC_E, nilexit);
+ if (merge == IR_EQ)
+ asm_guardcc(as, CC_E);
else
emit_sjcc(as, CC_E, l_end);
if (irt_isnum(kt)) {
@@ -1038,7 +1185,7 @@ static void asm_href(ASMState *as, IRIns *ir)
emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n));
emit_sjcc(as, CC_AE, l_next);
/* The type check avoids NaN penalties and complaints from Valgrind. */
-#if LJ_64
+#if LJ_64 && !LJ_GC64
emit_u32(as, LJ_TISNUM);
emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
#else
@@ -1046,13 +1193,31 @@ static void asm_href(ASMState *as, IRIns *ir)
emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
#endif
}
-#if LJ_64
+#if LJ_64 && !LJ_GC64
} else if (irt_islightud(kt)) {
emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64));
#endif
+#if LJ_GC64
+ } else if (irt_isaddr(kt)) {
+ if (isk) {
+ TValue k;
+ k.u64 = ((uint64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo),
+ k.u32.lo);
+ emit_sjcc(as, CC_NE, l_next);
+ emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi),
+ k.u32.hi);
+ } else {
+ emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64));
+ }
+ } else {
+ lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type");
+ emit_u32(as, (irt_toitype(kt)<<15)|0x7fff);
+ emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it));
+#else
} else {
if (!irt_ispri(kt)) {
- lua_assert(irt_isaddr(kt));
+ lj_assertA(irt_isaddr(kt), "bad HREF key type");
if (isk)
emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr),
ptr2addr(ir_kgc(irkey)));
@@ -1060,31 +1225,33 @@ static void asm_href(ASMState *as, IRIns *ir)
emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr));
emit_sjcc(as, CC_NE, l_next);
}
- lua_assert(!irt_isnil(kt));
+ lj_assertA(!irt_isnil(kt), "bad HREF key type");
emit_i8(as, irt_toitype(kt));
emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it));
+#endif
}
emit_sfixup(as, l_loop);
checkmclim(as);
+#if LJ_GC64
+ if (!isk && irt_isaddr(kt)) {
+ emit_rr(as, XO_OR, tmp|REX_64, key);
+ emit_loadu64(as, tmp, (uint64_t)irt_toitype(kt) << 47);
+ }
+#endif
/* Load main position relative to tab->node into dest. */
- khash = isk ? ir_khash(irkey) : 1;
+ khash = isk ? ir_khash(as, irkey) : 1;
if (khash == 0) {
- emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node));
+ emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node));
} else {
- emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node));
- if ((as->flags & JIT_F_PREFER_IMUL)) {
- emit_i8(as, sizeof(Node));
- emit_rr(as, XO_IMULi8, dest, dest);
- } else {
- emit_shifti(as, XOg_SHL, dest, 3);
- emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0);
- }
+ emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node));
+ emit_shifti(as, XOg_SHL, dest, 3);
+ emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0);
if (isk) {
emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash);
emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
} else if (irt_isstr(kt)) {
- emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, hash));
+ emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, sid));
emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
} else { /* Must match with hashrot() in lj_tab.c. */
emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask));
@@ -1107,7 +1274,19 @@ static void asm_href(ASMState *as, IRIns *ir)
#endif
} else {
emit_rr(as, XO_MOV, tmp, key);
+#if LJ_GC64
+ checkmclim(as);
+ emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15);
+ if ((as->flags & JIT_F_BMI2)) {
+ emit_i8(as, 32);
+ emit_mrm(as, XV_RORX|VEX_64, dest, key);
+ } else {
+ emit_shifti(as, XOg_SHR|REX_64, dest, 32);
+ emit_rr(as, XO_MOV, dest|REX_64, key|REX_64);
+ }
+#else
emit_rmro(as, XO_LEA, dest, key, HASH_BIAS);
+#endif
}
}
}
@@ -1123,15 +1302,15 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
#if !LJ_64
MCLabel l_exit;
#endif
- lua_assert(ofs % sizeof(Node) == 0);
+ lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
if (ra_hasreg(dest)) {
if (ofs != 0) {
- if (dest == node && !(as->flags & JIT_F_LEA_AGU))
- emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs);
+ if (dest == node)
+ emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs);
else
- emit_rmro(as, XO_LEA, dest, node, ofs);
+ emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs);
} else if (dest != node) {
- emit_rr(as, XO_MOV, dest, node);
+ emit_rr(as, XO_MOV, dest|REX_GC64, node);
}
}
asm_guardcc(as, CC_NE);
@@ -1140,16 +1319,28 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node));
emit_rmro(as, XO_CMP, key|REX_64, node,
ofs + (int32_t)offsetof(Node, key.u64));
- lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t));
+ lj_assertA(irt_isnum(irkey->t) || irt_isgcv(irkey->t),
+ "bad HREFK key type");
/* Assumes -0.0 is already canonicalized to +0.0. */
emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 :
+#if LJ_GC64
+ ((uint64_t)irt_toitype(irkey->t) << 47) |
+ (uint64_t)ir_kgc(irkey));
+#else
((uint64_t)irt_toitype(irkey->t) << 32) |
(uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey)));
+#endif
} else {
- lua_assert(!irt_isnil(irkey->t));
+ lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type");
+#if LJ_GC64
+ emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff);
+ emit_rmro(as, XO_ARITHi, XOg_CMP, node,
+ ofs + (int32_t)offsetof(Node, key.it));
+#else
emit_i8(as, irt_toitype(irkey->t));
emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
ofs + (int32_t)offsetof(Node, key.it));
+#endif
}
#else
l_exit = emit_label(as);
@@ -1164,13 +1355,13 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
(int32_t)ir_knum(irkey)->u32.hi);
} else {
if (!irt_ispri(irkey->t)) {
- lua_assert(irt_isgcv(irkey->t));
+ lj_assertA(irt_isgcv(irkey->t), "bad HREFK key type");
emit_gmroi(as, XG_ARITHi(XOg_CMP), node,
ofs + (int32_t)offsetof(Node, key.gcr),
ptr2addr(ir_kgc(irkey)));
emit_sjcc(as, CC_NE, l_exit);
}
- lua_assert(!irt_isnil(irkey->t));
+ lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type");
emit_i8(as, irt_toitype(irkey->t));
emit_rmro(as, XO_ARITHi8, XOg_CMP, node,
ofs + (int32_t)offsetof(Node, key.it));
@@ -1178,61 +1369,27 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
#endif
}
-static void asm_newref(ASMState *as, IRIns *ir)
-{
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
- IRRef args[3];
- IRIns *irkey;
- Reg tmp;
- if (ir->r == RID_SINK)
- return;
- args[0] = ASMREF_L; /* lua_State *L */
- args[1] = ir->op1; /* GCtab *t */
- args[2] = ASMREF_TMP1; /* cTValue *key */
- asm_setupresult(as, ir, ci); /* TValue * */
- asm_gencall(as, ci, args);
- tmp = ra_releasetmp(as, ASMREF_TMP1);
- irkey = IR(ir->op2);
- if (irt_isnum(irkey->t)) {
- /* For numbers use the constant itself or a spill slot as a TValue. */
- if (irref_isk(ir->op2))
- emit_loada(as, tmp, ir_knum(irkey));
- else
- emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey));
- } else {
- /* Otherwise use g->tmptv to hold the TValue. */
- if (!irref_isk(ir->op2)) {
- Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
- emit_movtomro(as, REX_64IR(irkey, src), tmp, 0);
- } else if (!irt_ispri(irkey->t)) {
- emit_movmroi(as, tmp, 0, irkey->i);
- }
- if (!(LJ_64 && irt_islightud(irkey->t)))
- emit_movmroi(as, tmp, 4, irt_toitype(irkey->t));
- emit_loada(as, tmp, &J2G(as->J)->tmptv);
- }
-}
-
static void asm_uref(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
if (irref_isk(ir->op1)) {
GCfunc *fn = ir_kfunc(IR(ir->op1));
MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
- emit_rma(as, XO_MOV, dest, v);
+ emit_rma(as, XO_MOV, dest|REX_GC64, v);
} else {
Reg uv = ra_scratch(as, RSET_GPR);
Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
if (ir->o == IR_UREFC) {
- emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv));
+ emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv));
asm_guardcc(as, CC_NE);
emit_i8(as, 1);
emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed));
} else {
- emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v));
+ emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v));
}
- emit_rmro(as, XO_MOV, uv, func,
- (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
+ emit_rmro(as, XO_MOV, uv|REX_GC64, func,
+ (int32_t)offsetof(GCfuncL, uvptr) +
+ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8));
}
}
@@ -1250,9 +1407,9 @@ static void asm_strref(ASMState *as, IRIns *ir)
if (as->mrm.base == RID_NONE)
emit_loadi(as, dest, as->mrm.ofs);
else if (as->mrm.base == dest && as->mrm.idx == RID_NONE)
- emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs);
+ emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, as->mrm.ofs);
else
- emit_mrm(as, XO_LEA, dest, RID_MRM);
+ emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM);
}
/* -- Loads and stores ---------------------------------------------------- */
@@ -1271,19 +1428,23 @@ static void asm_fxload(ASMState *as, IRIns *ir)
case IRT_U8: xo = XO_MOVZXb; break;
case IRT_I16: xo = XO_MOVSXw; break;
case IRT_U16: xo = XO_MOVZXw; break;
- case IRT_NUM: xo = XMM_MOVRM(as); break;
+ case IRT_NUM: xo = XO_MOVSD; break;
case IRT_FLOAT: xo = XO_MOVSS; break;
default:
if (LJ_64 && irt_is64(ir->t))
dest |= REX_64;
else
- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t));
+ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t),
+ "unsplit 64 bit load");
xo = XO_MOV;
break;
}
emit_mrm(as, xo, dest, RID_MRM);
}
+#define asm_fload(as, ir) asm_fxload(as, ir)
+#define asm_xload(as, ir) asm_fxload(as, ir)
+
static void asm_fxstore(ASMState *as, IRIns *ir)
{
RegSet allow = RSET_GPR;
@@ -1318,14 +1479,17 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
case IRT_I16: case IRT_U16: xo = XO_MOVtow; break;
case IRT_NUM: xo = XO_MOVSDto; break;
case IRT_FLOAT: xo = XO_MOVSSto; break;
-#if LJ_64
- case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */
+#if LJ_64 && !LJ_GC64
+ case IRT_LIGHTUD:
+ /* NYI: mask 64 bit lightuserdata. */
+ lj_assertA(0, "store of lightuserdata");
#endif
default:
if (LJ_64 && irt_is64(ir->t))
src |= REX_64;
else
- lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t));
+ lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t),
+ "unsplit 64 bit store");
xo = XO_MOVto;
break;
}
@@ -1339,15 +1503,18 @@ static void asm_fxstore(ASMState *as, IRIns *ir)
emit_i8(as, k);
emit_mrm(as, XO_MOVmib, 0, RID_MRM);
} else {
- lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) ||
- irt_isaddr(ir->t));
+ lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) ||
+ irt_isaddr(ir->t), "bad store type");
emit_i32(as, k);
emit_mrm(as, XO_MOVmi, REX_64IR(ir, 0), RID_MRM);
}
}
}
-#if LJ_64
+#define asm_fstore(as, ir) asm_fxstore(as, ir)
+#define asm_xstore(as, ir) asm_fxstore(as, ir)
+
+#if LJ_64 && !LJ_GC64
static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
{
if (ra_used(ir) || typecheck) {
@@ -1369,13 +1536,18 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck)
static void asm_ahuvload(ASMState *as, IRIns *ir)
{
- lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
- (LJ_DUALNUM && irt_isint(ir->t)));
-#if LJ_64
+#if LJ_GC64
+ Reg tmp = RID_NONE;
+#endif
+ lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
+ (LJ_DUALNUM && irt_isint(ir->t)),
+ "bad load type %d", irt_type(ir->t));
+#if LJ_64 && !LJ_GC64
if (irt_islightud(ir->t)) {
Reg dest = asm_load_lightud64(as, ir, 1);
if (ra_hasreg(dest)) {
asm_fuseahuref(as, ir->op1, RSET_GPR);
+ if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
}
return;
@@ -1385,20 +1557,67 @@ static void asm_ahuvload(ASMState *as, IRIns *ir)
RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
Reg dest = ra_dest(as, ir, allow);
asm_fuseahuref(as, ir->op1, RSET_GPR);
- emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM);
+ if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
+#if LJ_GC64
+ if (irt_isaddr(ir->t)) {
+ emit_shifti(as, XOg_SHR|REX_64, dest, 17);
+ asm_guardcc(as, CC_NE);
+ emit_i8(as, irt_toitype(ir->t));
+ emit_rr(as, XO_ARITHi8, XOg_CMP, dest);
+ emit_i8(as, XI_O16);
+ if ((as->flags & JIT_F_BMI2)) {
+ emit_i8(as, 47);
+ emit_mrm(as, XV_RORX|VEX_64, dest, RID_MRM);
+ } else {
+ emit_shifti(as, XOg_ROR|REX_64, dest, 47);
+ emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM);
+ }
+ return;
+ } else
+#endif
+ emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM);
} else {
- asm_fuseahuref(as, ir->op1, RSET_GPR);
+ RegSet gpr = RSET_GPR;
+#if LJ_GC64
+ if (irt_isaddr(ir->t)) {
+ tmp = ra_scratch(as, RSET_GPR);
+ gpr = rset_exclude(gpr, tmp);
+ }
+#endif
+ asm_fuseahuref(as, ir->op1, gpr);
+ if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2;
}
/* Always do the type check, even if the load result is unused. */
as->mrm.ofs += 4;
asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE);
if (LJ_64 && irt_type(ir->t) >= IRT_NUM) {
- lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
+ lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t),
+ "bad load type %d", irt_type(ir->t));
+#if LJ_GC64
+ emit_u32(as, LJ_TISNUM << 15);
+#else
emit_u32(as, LJ_TISNUM);
+#endif
+ emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
+#if LJ_GC64
+ } else if (irt_isaddr(ir->t)) {
+ as->mrm.ofs -= 4;
+ emit_i8(as, irt_toitype(ir->t));
+ emit_mrm(as, XO_ARITHi8, XOg_CMP, tmp);
+ emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
+ emit_mrm(as, XO_MOV, tmp|REX_64, RID_MRM);
+ } else if (irt_isnil(ir->t)) {
+ as->mrm.ofs -= 4;
+ emit_i8(as, -1);
+ emit_mrm(as, XO_ARITHi8, XOg_CMP|REX_64, RID_MRM);
+ } else {
+ emit_u32(as, (irt_toitype(ir->t) << 15) | 0x7fff);
emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM);
+#else
} else {
emit_i8(as, irt_toitype(ir->t));
emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM);
+#endif
}
}
@@ -1410,12 +1629,28 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
Reg src = ra_alloc1(as, ir->op2, RSET_FPR);
asm_fuseahuref(as, ir->op1, RSET_GPR);
emit_mrm(as, XO_MOVSDto, src, RID_MRM);
-#if LJ_64
+#if LJ_64 && !LJ_GC64
} else if (irt_islightud(ir->t)) {
Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src));
emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
#endif
+#if LJ_GC64
+ } else if (irref_isk(ir->op2)) {
+ TValue k;
+ lj_ir_kvalue(as->J->L, &k, IR(ir->op2));
+ asm_fuseahuref(as, ir->op1, RSET_GPR);
+ if (tvisnil(&k)) {
+ emit_i32(as, -1);
+ emit_mrm(as, XO_MOVmi, REX_64, RID_MRM);
+ } else {
+ emit_u32(as, k.u32.lo);
+ emit_mrm(as, XO_MOVmi, 0, RID_MRM);
+ as->mrm.ofs += 4;
+ emit_u32(as, k.u32.hi);
+ emit_mrm(as, XO_MOVmi, 0, RID_MRM);
+ }
+#endif
} else {
IRIns *irr = IR(ir->op2);
RegSet allow = RSET_GPR;
@@ -1426,34 +1661,56 @@ static void asm_ahustore(ASMState *as, IRIns *ir)
}
asm_fuseahuref(as, ir->op1, allow);
if (ra_hasreg(src)) {
+#if LJ_GC64
+ if (!(LJ_DUALNUM && irt_isinteger(ir->t))) {
+ /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
+ as->mrm.ofs += 4;
+ emit_u32(as, irt_toitype(ir->t) << 15);
+ emit_mrm(as, XO_ARITHi, XOg_OR, RID_MRM);
+ as->mrm.ofs -= 4;
+ emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM);
+ return;
+ }
+#endif
emit_mrm(as, XO_MOVto, src, RID_MRM);
} else if (!irt_ispri(irr->t)) {
- lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)));
+ lj_assertA(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)),
+ "bad store type");
emit_i32(as, irr->i);
emit_mrm(as, XO_MOVmi, 0, RID_MRM);
}
as->mrm.ofs += 4;
+#if LJ_GC64
+ lj_assertA(LJ_DUALNUM && irt_isinteger(ir->t), "bad store type");
+ emit_i32(as, LJ_TNUMX << 15);
+#else
emit_i32(as, (int32_t)irt_toitype(ir->t));
+#endif
emit_mrm(as, XO_MOVmi, 0, RID_MRM);
}
}
static void asm_sload(ASMState *as, IRIns *ir)
{
- int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
+ int32_t ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
+ (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
IRType1 t = ir->t;
Reg base;
- lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */
- lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
- lua_assert(LJ_DUALNUM ||
- !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
+ lj_assertA(!(ir->op2 & IRSLOAD_PARENT),
+ "bad parent SLOAD"); /* Handled by asm_head_side(). */
+ lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK),
+ "inconsistent SLOAD variant");
+ lj_assertA(LJ_DUALNUM ||
+ !irt_isint(t) ||
+ (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)),
+ "bad SLOAD type");
if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
Reg left = ra_scratch(as, RSET_FPR);
asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */
base = ra_alloc1(as, REF_BASE, RSET_GPR);
- emit_rmro(as, XMM_MOVRM(as), left, base, ofs);
+ emit_rmro(as, XO_MOVSD, left, base, ofs);
t.irt = IRT_NUM; /* Continue with a regular number type check. */
-#if LJ_64
+#if LJ_64 && !LJ_GC64
} else if (irt_islightud(t)) {
Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK));
if (ra_hasreg(dest)) {
@@ -1466,14 +1723,43 @@ static void asm_sload(ASMState *as, IRIns *ir)
RegSet allow = irt_isnum(t) ? RSET_FPR : RSET_GPR;
Reg dest = ra_dest(as, ir, allow);
base = ra_alloc1(as, REF_BASE, RSET_GPR);
- lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
+ lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t),
+ "bad SLOAD type %d", irt_type(t));
if ((ir->op2 & IRSLOAD_CONVERT)) {
t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */
- emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs);
- } else if (irt_isnum(t)) {
- emit_rmro(as, XMM_MOVRM(as), dest, base, ofs);
+ emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs);
} else {
- emit_rmro(as, XO_MOV, dest, base, ofs);
+#if LJ_GC64
+ if (irt_isaddr(t)) {
+ /* LJ_GC64 type check + tag removal without BMI2 and with BMI2:
+ **
+ ** mov r64, [addr] rorx r64, [addr], 47
+ ** ror r64, 47
+ ** cmp r16, itype cmp r16, itype
+ ** jne ->exit jne ->exit
+ ** shr r64, 16 shr r64, 16
+ */
+ emit_shifti(as, XOg_SHR|REX_64, dest, 17);
+ if ((ir->op2 & IRSLOAD_TYPECHECK)) {
+ asm_guardcc(as, CC_NE);
+ emit_i8(as, irt_toitype(t));
+ emit_rr(as, XO_ARITHi8, XOg_CMP, dest);
+ emit_i8(as, XI_O16);
+ }
+ if ((as->flags & JIT_F_BMI2)) {
+ emit_i8(as, 47);
+ emit_rmro(as, XV_RORX|VEX_64, dest, base, ofs);
+ } else {
+ if ((ir->op2 & IRSLOAD_TYPECHECK))
+ emit_shifti(as, XOg_ROR|REX_64, dest, 47);
+ else
+ emit_shifti(as, XOg_SHL|REX_64, dest, 17);
+ emit_rmro(as, XO_MOV, dest|REX_64, base, ofs);
+ }
+ return;
+ } else
+#endif
+ emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs);
}
} else {
if (!(ir->op2 & IRSLOAD_TYPECHECK))
@@ -1483,13 +1769,42 @@ static void asm_sload(ASMState *as, IRIns *ir)
if ((ir->op2 & IRSLOAD_TYPECHECK)) {
/* Need type check, even if the load result is unused. */
asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE);
- if (LJ_64 && irt_type(t) >= IRT_NUM) {
- lua_assert(irt_isinteger(t) || irt_isnum(t));
- emit_u32(as, LJ_TISNUM);
+ if ((LJ_64 && irt_type(t) >= IRT_NUM) || (ir->op2 & IRSLOAD_KEYINDEX)) {
+ lj_assertA(irt_isinteger(t) || irt_isnum(t),
+ "bad SLOAD type %d", irt_type(t));
+ emit_u32(as, (ir->op2 & IRSLOAD_KEYINDEX) ? LJ_KEYINDEX :
+ LJ_GC64 ? (LJ_TISNUM << 15) : LJ_TISNUM);
emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
+#if LJ_GC64
+ } else if (irt_isnil(t)) {
+ /* LJ_GC64 type check for nil:
+ **
+ ** cmp qword [addr], -1
+ ** jne ->exit
+ */
+ emit_i8(as, -1);
+ emit_rmro(as, XO_ARITHi8, XOg_CMP|REX_64, base, ofs);
+ } else if (irt_ispri(t)) {
+ emit_u32(as, (irt_toitype(t) << 15) | 0x7fff);
+ emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4);
+ } else {
+ /* LJ_GC64 type check only:
+ **
+ ** mov r64, [addr]
+ ** sar r64, 47
+ ** cmp r32, itype
+ ** jne ->exit
+ */
+ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, base));
+ emit_i8(as, irt_toitype(t));
+ emit_rr(as, XO_ARITHi8, XOg_CMP, tmp);
+ emit_shifti(as, XOg_SAR|REX_64, tmp, 47);
+ emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs);
+#else
} else {
emit_i8(as, irt_toitype(t));
emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4);
+#endif
}
}
}
@@ -1500,15 +1815,14 @@ static void asm_sload(ASMState *as, IRIns *ir)
static void asm_cnew(ASMState *as, IRIns *ir)
{
CTState *cts = ctype_ctsG(J2G(as->J));
- CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
- CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
- lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
+ CTypeID id = (CTypeID)IR(ir->op1)->i;
+ CTSize sz;
+ CTInfo info = lj_ctype_info(cts, id, &sz);
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
- IRRef args[2];
- lua_assert(sz != CTSIZE_INVALID);
+ IRRef args[4];
+ lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL),
+ "bad CNEW/CNEWI operands");
- args[0] = ASMREF_L; /* lua_State *L */
- args[1] = ASMREF_TMP1; /* MSize size */
as->gcsteps++;
asm_setupresult(as, ir, ci); /* GCcdata * */
@@ -1519,8 +1833,9 @@ static void asm_cnew(ASMState *as, IRIns *ir)
Reg r64 = sz == 8 ? REX_64 : 0;
if (irref_isk(ir->op2)) {
IRIns *irk = IR(ir->op2);
- uint64_t k = irk->o == IR_KINT64 ? ir_k64(irk)->u64 :
- (uint64_t)(uint32_t)irk->i;
+ uint64_t k = (irk->o == IR_KINT64 ||
+ (LJ_GC64 && (irk->o == IR_KPTR || irk->o == IR_KKPTR))) ?
+ ir_k64(irk)->u64 : (uint64_t)(uint32_t)irk->i;
if (sz == 4 || checki32((int64_t)k)) {
emit_i32(as, (int32_t)k);
emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata));
@@ -1536,7 +1851,7 @@ static void asm_cnew(ASMState *as, IRIns *ir)
int32_t ofs = sizeof(GCcdata);
if (sz == 8) {
ofs += 4; ir++;
- lua_assert(ir->o == IR_HIOP);
+ lj_assertA(ir->o == IR_HIOP, "missing CNEWI HIOP");
}
do {
if (irref_isk(ir->op2)) {
@@ -1550,21 +1865,30 @@ static void asm_cnew(ASMState *as, IRIns *ir)
ofs -= 4; ir--;
} while (1);
#endif
- lua_assert(sz == 4 || sz == 8);
+ lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz);
+ } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */
+ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ir->op1; /* CTypeID id */
+ args[2] = ir->op2; /* CTSize sz */
+ args[3] = ASMREF_TMP1; /* CTSize align */
+ asm_gencall(as, ci, args);
+ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
+ return;
}
/* Combine initialization of marked, gct and ctypeid. */
emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked));
emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX,
- (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16)));
+ (int32_t)((~LJ_TCDATA<<8)+(id<<16)));
emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES);
emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite);
+ args[0] = ASMREF_L; /* lua_State *L */
+ args[1] = ASMREF_TMP1; /* MSize size */
asm_gencall(as, ci, args);
emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata)));
}
-#else
-#define asm_cnew(as, ir) ((void)0)
#endif
/* -- Write barriers ------------------------------------------------------ */
@@ -1574,7 +1898,7 @@ static void asm_tbar(ASMState *as, IRIns *ir)
Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab));
MCLabel l_end = emit_label(as);
- emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist));
+ emit_movtomro(as, tmp|REX_GC64, tab, offsetof(GCtab, gclist));
emit_setgl(as, tab, gc.grayagain);
emit_getgl(as, tmp, gc.grayagain);
emit_i8(as, ~LJ_GC_BLACK);
@@ -1591,7 +1915,7 @@ static void asm_obar(ASMState *as, IRIns *ir)
MCLabel l_end;
Reg obj;
/* No need for other object barriers (yet). */
- lua_assert(IR(ir->op1)->o == IR_UREFC);
+ lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type");
ra_evictset(as, RSET_SCRATCH);
l_end = emit_label(as);
args[0] = ASMREF_TMP1; /* global_State *g */
@@ -1637,36 +1961,9 @@ static void asm_x87load(ASMState *as, IRRef ref)
}
}
-/* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */
-static int fpmjoin_pow(ASMState *as, IRIns *ir)
-{
- IRIns *irp = IR(ir->op1);
- if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
- IRIns *irpp = IR(irp->op1);
- if (irpp == ir-2 && irpp->o == IR_FPMATH &&
- irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
- /* The modified regs must match with the *.dasc implementation. */
- RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX);
- IRIns *irx;
- if (ra_hasreg(ir->r))
- rset_clear(drop, ir->r); /* Dest reg handled below. */
- ra_evictset(as, drop);
- ra_destreg(as, ir, RID_XMM0);
- emit_call(as, lj_vm_pow_sse);
- irx = IR(irpp->op1);
- if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1)
- irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */
- ra_left(as, RID_XMM0, irpp->op1);
- ra_left(as, RID_XMM1, irp->op2);
- return 1;
- }
- }
- return 0;
-}
-
static void asm_fpmath(ASMState *as, IRIns *ir)
{
- IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER;
+ IRFPMathOp fpm = (IRFPMathOp)ir->op2;
if (fpm == IRFPM_SQRT) {
Reg dest = ra_dest(as, ir, RSET_FPR);
Reg left = asm_fuseload(as, ir->op1, RSET_FPR);
@@ -1697,93 +1994,32 @@ static void asm_fpmath(ASMState *as, IRIns *ir)
fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse);
ra_left(as, RID_XMM0, ir->op1);
}
- } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) {
- /* Rejoined to pow(). */
- } else { /* Handle x87 ops. */
- int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
- Reg dest = ir->r;
- if (ra_hasreg(dest)) {
- ra_free(as, dest);
- ra_modified(as, dest);
- emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs);
- }
- emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
- switch (fpm) { /* st0 = lj_vm_*(st0) */
- case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break;
- case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break;
- case IRFPM_SIN: emit_x87op(as, XI_FSIN); break;
- case IRFPM_COS: emit_x87op(as, XI_FCOS); break;
- case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break;
- case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10:
- /* Note: the use of fyl2xp1 would be pointless here. When computing
- ** log(1.0+eps) the precision is already lost after 1.0 is added.
- ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense.
- */
- emit_x87op(as, XI_FYL2X); break;
- case IRFPM_OTHER:
- switch (ir->o) {
- case IR_ATAN2:
- emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break;
- case IR_LDEXP:
- emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break;
- default: lua_assert(0); break;
- }
- break;
- default: lua_assert(0); break;
- }
- asm_x87load(as, ir->op1);
- switch (fpm) {
- case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break;
- case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break;
- case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break;
- case IRFPM_OTHER:
- if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2);
- break;
- default: break;
- }
+ } else {
+ asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
}
}
-static void asm_fppowi(ASMState *as, IRIns *ir)
-{
- /* The modified regs must match with the *.dasc implementation. */
- RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX);
- if (ra_hasreg(ir->r))
- rset_clear(drop, ir->r); /* Dest reg handled below. */
- ra_evictset(as, drop);
- ra_destreg(as, ir, RID_XMM0);
- emit_call(as, lj_vm_powi_sse);
- ra_left(as, RID_XMM0, ir->op1);
- ra_left(as, RID_EAX, ir->op2);
-}
-
-#if LJ_64 && LJ_HASFFI
-static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id)
-{
- const CCallInfo *ci = &lj_ir_callinfo[id];
- IRRef args[2];
- args[0] = ir->op1;
- args[1] = ir->op2;
- asm_setupresult(as, ir, ci);
- asm_gencall(as, ci, args);
-}
-#endif
-
-static void asm_intmod(ASMState *as, IRIns *ir)
+static void asm_ldexp(ASMState *as, IRIns *ir)
{
- const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi];
- IRRef args[2];
- args[0] = ir->op1;
- args[1] = ir->op2;
- asm_setupresult(as, ir, ci);
- asm_gencall(as, ci, args);
+ int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */
+ Reg dest = ir->r;
+ if (ra_hasreg(dest)) {
+ ra_free(as, dest);
+ ra_modified(as, dest);
+ emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs);
+ }
+ emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
+ emit_x87op(as, XI_FPOP1);
+ emit_x87op(as, XI_FSCALE);
+ asm_x87load(as, ir->op1);
+ asm_x87load(as, ir->op2);
}
static int asm_swapops(ASMState *as, IRIns *ir)
{
IRIns *irl = IR(ir->op1);
IRIns *irr = IR(ir->op2);
- lua_assert(ra_noreg(irr->r));
+ lj_assertA(ra_noreg(irr->r), "bad usage");
if (!irm_iscomm(lj_ir_mode[ir->o]))
return 0; /* Can't swap non-commutative operations. */
if (irref_isk(ir->op2))
@@ -1955,11 +2191,28 @@ static void asm_add(ASMState *as, IRIns *ir)
{
if (irt_isnum(ir->t))
asm_fparith(as, ir, XO_ADDSD);
- else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp ||
- irt_is64(ir->t) || !asm_lea(as, ir))
+ else if (as->flagmcp == as->mcp || irt_is64(ir->t) || !asm_lea(as, ir))
asm_intarith(as, ir, XOg_ADD);
}
+static void asm_sub(ASMState *as, IRIns *ir)
+{
+ if (irt_isnum(ir->t))
+ asm_fparith(as, ir, XO_SUBSD);
+ else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
+ asm_intarith(as, ir, XOg_SUB);
+}
+
+static void asm_mul(ASMState *as, IRIns *ir)
+{
+ if (irt_isnum(ir->t))
+ asm_fparith(as, ir, XO_MULSD);
+ else
+ asm_intarith(as, ir, XOg_X_IMUL);
+}
+
+#define asm_fpdiv(as, ir) asm_fparith(as, ir, XO_DIVSD)
+
static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
@@ -1967,7 +2220,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg)
ra_left(as, dest, ir->op1);
}
-static void asm_min_max(ASMState *as, IRIns *ir, int cc)
+static void asm_neg(ASMState *as, IRIns *ir)
+{
+ if (irt_isnum(ir->t))
+ asm_fparith(as, ir, XO_XORPS);
+ else
+ asm_neg_not(as, ir, XOg_NEG);
+}
+
+#define asm_abs(as, ir) asm_fparith(as, ir, XO_ANDPS)
+
+static void asm_intmin_max(ASMState *as, IRIns *ir, int cc)
{
Reg right, dest = ra_dest(as, ir, RSET_GPR);
IRRef lref = ir->op1, rref = ir->op2;
@@ -1978,7 +2241,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc)
ra_left(as, dest, lref);
}
-static void asm_bitswap(ASMState *as, IRIns *ir)
+static void asm_min(ASMState *as, IRIns *ir)
+{
+ if (irt_isnum(ir->t))
+ asm_fparith(as, ir, XO_MINSD);
+ else
+ asm_intmin_max(as, ir, CC_G);
+}
+
+static void asm_max(ASMState *as, IRIns *ir)
+{
+ if (irt_isnum(ir->t))
+ asm_fparith(as, ir, XO_MAXSD);
+ else
+ asm_intmin_max(as, ir, CC_L);
+}
+
+/* Note: don't use LEA for overflow-checking arithmetic! */
+#define asm_addov(as, ir) asm_intarith(as, ir, XOg_ADD)
+#define asm_subov(as, ir) asm_intarith(as, ir, XOg_SUB)
+#define asm_mulov(as, ir) asm_intarith(as, ir, XOg_X_IMUL)
+
+#define asm_bnot(as, ir) asm_neg_not(as, ir, XOg_NOT)
+
+static void asm_bswap(ASMState *as, IRIns *ir)
{
Reg dest = ra_dest(as, ir, RSET_GPR);
as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24),
@@ -1986,7 +2272,11 @@ static void asm_bitswap(ASMState *as, IRIns *ir)
ra_left(as, dest, ir->op1);
}
-static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
+#define asm_band(as, ir) asm_intarith(as, ir, XOg_AND)
+#define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR)
+#define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR)
+
+static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs, x86Op xv)
{
IRRef rref = ir->op2;
IRIns *irr = IR(rref);
@@ -1995,17 +2285,33 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
int shift;
dest = ra_dest(as, ir, RSET_GPR);
shift = irr->i & (irt_is64(ir->t) ? 63 : 31);
+ if (!xv && shift && (as->flags & JIT_F_BMI2)) {
+ Reg left = asm_fuseloadm(as, ir->op1, RSET_GPR, irt_is64(ir->t));
+ if (left != dest) { /* BMI2 rotate right by constant. */
+ emit_i8(as, xs == XOg_ROL ? -shift : shift);
+ emit_mrm(as, VEX_64IR(ir, XV_RORX), dest, left);
+ return;
+ }
+ }
switch (shift) {
case 0: break;
case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break;
default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break;
}
+ } else if ((as->flags & JIT_F_BMI2) && xv) { /* BMI2 variable shifts. */
+ Reg left, right;
+ dest = ra_dest(as, ir, RSET_GPR);
+ right = ra_alloc1(as, rref, RSET_GPR);
+ left = asm_fuseloadm(as, ir->op1, rset_exclude(RSET_GPR, right),
+ irt_is64(ir->t));
+ emit_mrm(as, VEX_64IR(ir, xv) ^ (right << 19), dest, left);
+ return;
} else { /* Variable shifts implicitly use register cl (i.e. ecx). */
Reg right;
dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX));
if (dest == RID_ECX) {
dest = ra_scratch(as, rset_exclude(RSET_GPR, RID_ECX));
- emit_rr(as, XO_MOV, RID_ECX, dest);
+ emit_rr(as, XO_MOV, REX_64IR(ir, RID_ECX), dest);
}
right = irr->r;
if (ra_noreg(right))
@@ -2025,6 +2331,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs)
*/
}
+#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL, XV_SHLX)
+#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR, XV_SHRX)
+#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR, XV_SARX)
+#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL, 0)
+#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR, 0)
+
/* -- Comparisons --------------------------------------------------------- */
/* Virtual flags for unordered FP comparisons. */
@@ -2051,8 +2363,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = {
};
/* FP and integer comparisons. */
-static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
+static void asm_comp(ASMState *as, IRIns *ir)
{
+ uint32_t cc = asm_compmap[ir->o];
if (irt_isnum(ir->t)) {
IRRef lref = ir->op1;
IRRef rref = ir->op2;
@@ -2073,7 +2386,6 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */
}
left = ra_alloc1(as, lref, RSET_FPR);
- right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
l_around = emit_label(as);
asm_guardcc(as, cc >> 4);
if (cc & VCC_P) { /* Extra CC_P branch required? */
@@ -2090,14 +2402,16 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
emit_jcc(as, CC_P, as->mcp);
}
}
+ right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left));
emit_mrm(as, XO_UCOMISD, left, right);
} else {
IRRef lref = ir->op1, rref = ir->op2;
IROp leftop = (IROp)(IR(lref)->o);
Reg r64 = REX_64IR(ir, 0);
int32_t imm = 0;
- lua_assert(irt_is64(ir->t) || irt_isint(ir->t) ||
- irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t));
+ lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) ||
+ irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t),
+ "bad comparison data type %d", irt_type(ir->t));
/* Swap constants (only for ABC) and fusable loads to the right. */
if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) {
if ((cc & 0xc) == 0xc) cc ^= 0x53; /* L <-> G, LE <-> GE */
@@ -2179,7 +2493,7 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
/* Use test r,r instead of cmp r,0. */
x86Op xo = XO_TEST;
if (irt_isu8(ir->t)) {
- lua_assert(ir->o == IR_EQ || ir->o == IR_NE);
+ lj_assertA(ir->o == IR_EQ || ir->o == IR_NE, "bad usage");
xo = XO_TESTb;
if (!rset_test(RSET_RANGE(RID_EAX, RID_EBX+1), left)) {
if (LJ_64) {
@@ -2207,6 +2521,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc)
}
}
+#define asm_equal(as, ir) asm_comp(as, ir)
+
#if LJ_32 && LJ_HASFFI
/* 64 bit integer comparisons in 32 bit mode. */
static void asm_comp_int64(ASMState *as, IRIns *ir)
@@ -2279,23 +2595,19 @@ static void asm_comp_int64(ASMState *as, IRIns *ir)
}
#endif
-/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
+/* -- Split register ops -------------------------------------------------- */
-/* Hiword op of a split 64 bit op. Previous op must be the loword op. */
+/* Hiword op of a split 32/32 or 64/64 bit op. Previous op is the loword op. */
static void asm_hiop(ASMState *as, IRIns *ir)
{
-#if LJ_32 && LJ_HASFFI
/* HIOP is marked as a store because it needs its own DCE logic. */
int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */
if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
+#if LJ_32 && LJ_HASFFI
if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */
- if (usehi || uselo) {
- if (irt_isfp(ir->t))
- asm_conv_fp_int64(as, ir);
- else
- asm_conv_int64_fp(as, ir);
- }
as->curins--; /* Always skip the CONV. */
+ if (usehi || uselo)
+ asm_conv64(as, ir);
return;
} else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */
asm_comp_int64(as, ir);
@@ -2305,8 +2617,10 @@ static void asm_hiop(ASMState *as, IRIns *ir)
asm_fxstore(as, ir);
return;
}
+#endif
if (!usehi) return; /* Skip unused hiword op for all remaining ops. */
switch ((ir-1)->o) {
+#if LJ_32 && LJ_HASFFI
case IR_ADD:
as->flagmcp = NULL;
as->curins--;
@@ -2329,19 +2643,26 @@ static void asm_hiop(ASMState *as, IRIns *ir)
asm_neg_not(as, ir-1, XOg_NEG);
break;
}
- case IR_CALLN:
- case IR_CALLXS:
- if (!uselo)
- ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
- break;
case IR_CNEWI:
/* Nothing to do here. Handled by CNEWI itself. */
break;
- default: lua_assert(0); break;
- }
-#else
- UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on x64 or without FFI. */
#endif
+ case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS:
+ if (!uselo)
+ ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */
+ break;
+ default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break;
+ }
+}
+
+/* -- Profiling ----------------------------------------------------------- */
+
+static void asm_prof(ASMState *as, IRIns *ir)
+{
+ UNUSED(ir);
+ asm_guardcc(as, CC_NE);
+ emit_i8(as, HOOK_PROFILE);
+ emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask);
}
/* -- Stack handling ------------------------------------------------------ */
@@ -2358,14 +2679,19 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0);
else
ra_modified(as, r);
- emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot));
+ emit_gri(as, XG_ARITHi(XOg_CMP), r|REX_GC64, (int32_t)(8*topslot));
if (ra_hasreg(pbase) && pbase != r)
- emit_rr(as, XO_ARITH(XOg_SUB), r, pbase);
+ emit_rr(as, XO_ARITH(XOg_SUB), r|REX_GC64, pbase);
else
+#if LJ_GC64
+ emit_rmro(as, XO_ARITH(XOg_SUB), r|REX_64, RID_DISPATCH,
+ (int32_t)dispofs(as, &J2G(as->J)->jit_base));
+#else
emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE,
ptr2addr(&J2G(as->J)->jit_base));
- emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack));
- emit_getgl(as, r, jit_L);
+#endif
+ emit_rmro(as, XO_MOV, r|REX_GC64, r, offsetof(lua_State, maxstack));
+ emit_getgl(as, r, cur_L);
if (allow == RSET_EMPTY) /* Spill temp. register. */
emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0);
}
@@ -2374,40 +2700,79 @@ static void asm_stack_check(ASMState *as, BCReg topslot,
static void asm_stack_restore(ASMState *as, SnapShot *snap)
{
SnapEntry *map = &as->T->snapmap[snap->mapofs];
- SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
+#if !LJ_FR2 || defined(LUA_USE_ASSERT)
+ SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
+#endif
MSize n, nent = snap->nent;
/* Store the value of all modified slots to the Lua stack. */
for (n = 0; n < nent; n++) {
SnapEntry sn = map[n];
BCReg s = snap_slot(sn);
- int32_t ofs = 8*((int32_t)s-1);
+ int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
IRRef ref = snap_ref(sn);
IRIns *ir = IR(ref);
if ((sn & SNAP_NORESTORE))
continue;
- if (irt_isnum(ir->t)) {
+ if ((sn & SNAP_KEYINDEX)) {
+ emit_movmroi(as, RID_BASE, ofs+4, LJ_KEYINDEX);
+ if (irref_isk(ref)) {
+ emit_movmroi(as, RID_BASE, ofs, ir->i);
+ } else {
+ Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
+ emit_movtomro(as, src, RID_BASE, ofs);
+ }
+ } else if (irt_isnum(ir->t)) {
Reg src = ra_alloc1(as, ref, RSET_FPR);
emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs);
} else {
- lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) ||
- (LJ_DUALNUM && irt_isinteger(ir->t)));
+ lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) ||
+ (LJ_DUALNUM && irt_isinteger(ir->t)),
+ "restore of IR type %d", irt_type(ir->t));
if (!irref_isk(ref)) {
Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE));
+#if LJ_GC64
+ if (irt_is64(ir->t)) {
+ /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */
+ emit_u32(as, irt_toitype(ir->t) << 15);
+ emit_rmro(as, XO_ARITHi, XOg_OR, RID_BASE, ofs+4);
+ } else if (LJ_DUALNUM && irt_isinteger(ir->t)) {
+ emit_movmroi(as, RID_BASE, ofs+4, LJ_TISNUM << 15);
+ } else {
+ emit_movmroi(as, RID_BASE, ofs+4, (irt_toitype(ir->t)<<15)|0x7fff);
+ }
+#endif
emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs);
+#if LJ_GC64
+ } else {
+ TValue k;
+ lj_ir_kvalue(as->J->L, &k, ir);
+ if (tvisnil(&k)) {
+ emit_i32(as, -1);
+ emit_rmro(as, XO_MOVmi, REX_64, RID_BASE, ofs);
+ } else {
+ emit_movmroi(as, RID_BASE, ofs+4, k.u32.hi);
+ emit_movmroi(as, RID_BASE, ofs, k.u32.lo);
+ }
+#else
} else if (!irt_ispri(ir->t)) {
emit_movmroi(as, RID_BASE, ofs, ir->i);
+#endif
}
if ((sn & (SNAP_CONT|SNAP_FRAME))) {
+#if !LJ_FR2
if (s != 0) /* Do not overwrite link to previous frame. */
emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--));
+#endif
+#if !LJ_GC64
} else {
if (!(LJ_64 && irt_islightud(ir->t)))
emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t));
+#endif
}
}
checkmclim(as);
}
- lua_assert(map + nent == flinks);
+ lj_assertA(map + nent == flinks, "inconsistent frames in snapshot");
}
/* -- GC handling --------------------------------------------------------- */
@@ -2428,11 +2793,15 @@ static void asm_gc_check(ASMState *as)
args[1] = ASMREF_TMP2; /* MSize steps */
asm_gencall(as, ci, args);
tmp = ra_releasetmp(as, ASMREF_TMP1);
+#if LJ_GC64
+ emit_rmro(as, XO_LEA, tmp|REX_64, RID_DISPATCH, GG_DISP2G);
+#else
emit_loada(as, tmp, J2G(as->J));
+#endif
emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps);
/* Jump around GC step if GC total < GC threshold. */
emit_sjcc(as, CC_B, l_end);
- emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold);
+ emit_opgl(as, XO_ARITH(XOg_CMP), tmp|REX_GC64, gc.threshold);
emit_getgl(as, tmp, gc.total);
as->gcsteps = 0;
checkmclim(as);
@@ -2447,16 +2816,16 @@ static void asm_loop_fixup(ASMState *as)
MCode *target = as->mcp;
if (as->realign) { /* Realigned loops use short jumps. */
as->realign = NULL; /* Stop another retry. */
- lua_assert(((intptr_t)target & 15) == 0);
+ lj_assertA(((intptr_t)target & 15) == 0, "loop realign failed");
if (as->loopinv) { /* Inverted loop branch? */
p -= 5;
p[0] = XI_JMP;
- lua_assert(target - p >= -128);
+ lj_assertA(target - p >= -128, "loop realign failed");
p[-1] = (MCode)(target - p); /* Patch sjcc. */
if (as->loopinv == 2)
p[-3] = (MCode)(target - p + 2); /* Patch opt. short jp. */
} else {
- lua_assert(target - p >= -128);
+ lj_assertA(target - p >= -128, "loop realign failed");
p[-1] = (MCode)(int8_t)(target - p); /* Patch short jmp. */
p[-2] = XI_JMPs;
}
@@ -2485,6 +2854,12 @@ static void asm_loop_fixup(ASMState *as)
}
}
+/* Fixup the tail of the loop. */
+static void asm_loop_tail_fixup(ASMState *as)
+{
+ UNUSED(as); /* Nothing to do. */
+}
+
/* -- Head of trace ------------------------------------------------------- */
/* Coalesce BASE register for a root trace. */
@@ -2497,7 +2872,7 @@ static void asm_head_root_base(ASMState *as)
if (rset_test(as->modset, r) || irt_ismarked(ir->t))
ir->r = RID_INIT; /* No inheritance for modified BASE register. */
if (r != RID_BASE)
- emit_rr(as, XO_MOV, r, RID_BASE);
+ emit_rr(as, XO_MOV, r|REX_GC64, RID_BASE);
}
}
@@ -2513,8 +2888,9 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
if (irp->r == r) {
rset_clear(allow, r); /* Mark same BASE register as coalesced. */
} else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
+ /* Move from coalesced parent reg. */
rset_clear(allow, irp->r);
- emit_rr(as, XO_MOV, r, irp->r); /* Move from coalesced parent reg. */
+ emit_rr(as, XO_MOV, r|REX_GC64, irp->r);
} else {
emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */
}
@@ -2532,7 +2908,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
MCode *target, *q;
int32_t spadj = as->T->spadjust;
if (spadj == 0) {
- p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0);
+ p -= LJ_64 ? 7 : 6;
} else {
MCode *p1;
/* Patch stack adjustment. */
@@ -2544,24 +2920,15 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
p1 = p-9;
*(int32_t *)p1 = spadj;
}
- if ((as->flags & JIT_F_LEA_AGU)) {
#if LJ_64
- p1[-4] = 0x48;
+ p1[-3] = 0x48;
#endif
- p1[-3] = (MCode)XI_LEA;
- p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP);
- p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
- } else {
-#if LJ_64
- p1[-3] = 0x48;
-#endif
- p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
- p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
- }
+ p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
+ p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
}
/* Patch exit branch. */
target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
- *(int32_t *)(p-4) = jmprel(p, target);
+ *(int32_t *)(p-4) = jmprel(as->J, p, target);
p[-5] = XI_JMP;
/* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */
for (q = as->mctop-1; q >= p; q--)
@@ -2588,168 +2955,11 @@ static void asm_tail_prep(ASMState *as)
as->invmcp = as->mcp = p;
} else {
/* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */
- as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0));
+ as->mcp = p - (LJ_64 ? 7 : 6);
as->invmcp = NULL;
}
}
-/* -- Instruction dispatch ------------------------------------------------ */
-
-/* Assemble a single instruction. */
-static void asm_ir(ASMState *as, IRIns *ir)
-{
- switch ((IROp)ir->o) {
- /* Miscellaneous ops. */
- case IR_LOOP: asm_loop(as); break;
- case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
- case IR_USE:
- ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
- case IR_PHI: asm_phi(as, ir); break;
- case IR_HIOP: asm_hiop(as, ir); break;
- case IR_GCSTEP: asm_gcstep(as, ir); break;
-
- /* Guarded assertions. */
- case IR_LT: case IR_GE: case IR_LE: case IR_GT:
- case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
- case IR_EQ: case IR_NE: case IR_ABC:
- asm_comp(as, ir, asm_compmap[ir->o]);
- break;
-
- case IR_RETF: asm_retf(as, ir); break;
-
- /* Bit ops. */
- case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break;
- case IR_BSWAP: asm_bitswap(as, ir); break;
-
- case IR_BAND: asm_intarith(as, ir, XOg_AND); break;
- case IR_BOR: asm_intarith(as, ir, XOg_OR); break;
- case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break;
-
- case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break;
- case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break;
- case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break;
- case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break;
- case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break;
-
- /* Arithmetic ops. */
- case IR_ADD: asm_add(as, ir); break;
- case IR_SUB:
- if (irt_isnum(ir->t))
- asm_fparith(as, ir, XO_SUBSD);
- else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */
- asm_intarith(as, ir, XOg_SUB);
- break;
- case IR_MUL:
- if (irt_isnum(ir->t))
- asm_fparith(as, ir, XO_MULSD);
- else
- asm_intarith(as, ir, XOg_X_IMUL);
- break;
- case IR_DIV:
-#if LJ_64 && LJ_HASFFI
- if (!irt_isnum(ir->t))
- asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
- IRCALL_lj_carith_divu64);
- else
-#endif
- asm_fparith(as, ir, XO_DIVSD);
- break;
- case IR_MOD:
-#if LJ_64 && LJ_HASFFI
- if (!irt_isint(ir->t))
- asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
- IRCALL_lj_carith_modu64);
- else
-#endif
- asm_intmod(as, ir);
- break;
-
- case IR_NEG:
- if (irt_isnum(ir->t))
- asm_fparith(as, ir, XO_XORPS);
- else
- asm_neg_not(as, ir, XOg_NEG);
- break;
- case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break;
-
- case IR_MIN:
- if (irt_isnum(ir->t))
- asm_fparith(as, ir, XO_MINSD);
- else
- asm_min_max(as, ir, CC_G);
- break;
- case IR_MAX:
- if (irt_isnum(ir->t))
- asm_fparith(as, ir, XO_MAXSD);
- else
- asm_min_max(as, ir, CC_L);
- break;
-
- case IR_FPMATH: case IR_ATAN2: case IR_LDEXP:
- asm_fpmath(as, ir);
- break;
- case IR_POW:
-#if LJ_64 && LJ_HASFFI
- if (!irt_isnum(ir->t))
- asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
- IRCALL_lj_carith_powu64);
- else
-#endif
- asm_fppowi(as, ir);
- break;
-
- /* Overflow-checking arithmetic ops. Note: don't use LEA here! */
- case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break;
- case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break;
- case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break;
-
- /* Memory references. */
- case IR_AREF: asm_aref(as, ir); break;
- case IR_HREF: asm_href(as, ir); break;
- case IR_HREFK: asm_hrefk(as, ir); break;
- case IR_NEWREF: asm_newref(as, ir); break;
- case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
- case IR_FREF: asm_fref(as, ir); break;
- case IR_STRREF: asm_strref(as, ir); break;
-
- /* Loads and stores. */
- case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
- asm_ahuvload(as, ir);
- break;
- case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
- case IR_SLOAD: asm_sload(as, ir); break;
-
- case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
- case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break;
-
- /* Allocations. */
- case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
- case IR_TNEW: asm_tnew(as, ir); break;
- case IR_TDUP: asm_tdup(as, ir); break;
- case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
-
- /* Write barriers. */
- case IR_TBAR: asm_tbar(as, ir); break;
- case IR_OBAR: asm_obar(as, ir); break;
-
- /* Type conversions. */
- case IR_TOBIT: asm_tobit(as, ir); break;
- case IR_CONV: asm_conv(as, ir); break;
- case IR_TOSTR: asm_tostr(as, ir); break;
- case IR_STRTO: asm_strto(as, ir); break;
-
- /* Calls. */
- case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
- case IR_CALLXS: asm_callx(as, ir); break;
- case IR_CARG: break;
-
- default:
- setintV(&as->J->errinfo, ir->o);
- lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
- break;
- }
-}
-
/* -- Trace setup --------------------------------------------------------- */
/* Ensure there are enough stack slots for call arguments. */
@@ -2772,6 +2982,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
static void asm_setup_target(ASMState *as)
{
asm_exitstub_setup(as, as->T->nsnap);
+ as->mrm.base = 0;
}
/* -- Trace patching ------------------------------------------------------ */
@@ -2885,18 +3096,24 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
MCode *px = exitstub_addr(J, exitno) - 6;
MCode *pe = p+len-6;
MCode *pgc = NULL;
- uint32_t stateaddr = u32ptr(&J2G(J)->vmstate);
+#if LJ_GC64
+ uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch));
+#else
+ uint32_t statei = u32ptr(&J2G(J)->vmstate);
+#endif
if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px)
- *(int32_t *)(p+len-4) = jmprel(p+len, target);
+ *(int32_t *)(p+len-4) = jmprel(J, p+len, target);
/* Do not patch parent exit for a stack check. Skip beyond vmstate update. */
- for (; p < pe; p += asm_x86_inslen(p))
- if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi)
+ for (; p < pe; p += asm_x86_inslen(p)) {
+ intptr_t ofs = LJ_GC64 ? (p[0] & 0xf0) == 0x40 : LJ_64;
+ if (*(uint32_t *)(p+2+ofs) == statei && p[ofs+LJ_GC64-LJ_64] == XI_MOVmi)
break;
- lua_assert(p < pe);
+ }
+ lj_assertJ(p < pe, "instruction length decoder failed");
for (; p < pe; p += asm_x86_inslen(p)) {
if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px &&
p != pgc) {
- *(int32_t *)(p+2) = jmprel(p+6, target);
+ *(int32_t *)(p+2) = jmprel(J, p+6, target);
} else if (*p == XI_CALL &&
(void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) {
pgc = p+7; /* Do not patch GC check exit. */
diff --git a/src/lj_assert.c b/src/lj_assert.c
new file mode 100644
index 00000000..4b713b2b
--- /dev/null
+++ b/src/lj_assert.c
@@ -0,0 +1,28 @@
+/*
+** Internal assertions.
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_assert_c
+#define LUA_CORE
+
+#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK)
+
+#include <stdio.h>
+
+#include "lj_obj.h"
+
+void lj_assert_fail(global_State *g, const char *file, int line,
+ const char *func, const char *fmt, ...)
+{
+ va_list argp;
+ va_start(argp, fmt);
+ fprintf(stderr, "LuaJIT ASSERT %s:%d: %s: ", file, line, func);
+ vfprintf(stderr, fmt, argp);
+ fputc('\n', stderr);
+ va_end(argp);
+ UNUSED(g); /* May be NULL. TODO: optionally dump state. */
+ abort();
+}
+
+#endif
diff --git a/src/lj_bc.h b/src/lj_bc.h
index 22c43caa..02356e5b 100644
--- a/src/lj_bc.h
+++ b/src/lj_bc.h
@@ -89,6 +89,8 @@
_(ISFC, dst, ___, var, ___) \
_(IST, ___, ___, var, ___) \
_(ISF, ___, ___, var, ___) \
+ _(ISTYPE, var, ___, lit, ___) \
+ _(ISNUM, var, ___, lit, ___) \
\
/* Unary ops. */ \
_(MOV, dst, ___, var, ___) \
@@ -143,10 +145,12 @@
_(TGETV, dst, var, var, index) \
_(TGETS, dst, var, str, index) \
_(TGETB, dst, var, lit, index) \
+ _(TGETR, dst, var, var, index) \
_(TSETV, var, var, var, newindex) \
_(TSETS, var, var, str, newindex) \
_(TSETB, var, var, lit, newindex) \
_(TSETM, base, ___, num, newindex) \
+ _(TSETR, var, var, var, newindex) \
\
/* Calls and vararg handling. T = tail call. */ \
_(CALLM, base, lit, lit, call) \
diff --git a/src/lj_bcdump.h b/src/lj_bcdump.h
index 8ca62f80..69da16e9 100644
--- a/src/lj_bcdump.h
+++ b/src/lj_bcdump.h
@@ -36,14 +36,15 @@
/* If you perform *any* kind of private modifications to the bytecode itself
** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher.
*/
-#define BCDUMP_VERSION 1
+#define BCDUMP_VERSION 2
/* Compatibility flags. */
#define BCDUMP_F_BE 0x01
#define BCDUMP_F_STRIP 0x02
#define BCDUMP_F_FFI 0x04
+#define BCDUMP_F_FR2 0x08
-#define BCDUMP_F_KNOWN (BCDUMP_F_FFI*2-1)
+#define BCDUMP_F_KNOWN (BCDUMP_F_FR2*2-1)
/* Type codes for the GC constants of a prototype. Plus length for strings. */
enum {
@@ -61,6 +62,7 @@ enum {
LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer,
void *data, int strip);
+LJ_FUNC GCproto *lj_bcread_proto(LexState *ls);
LJ_FUNC GCproto *lj_bcread(LexState *ls);
#endif
diff --git a/src/lj_bcread.c b/src/lj_bcread.c
index 4a925f1c..2ce05707 100644
--- a/src/lj_bcread.c
+++ b/src/lj_bcread.c
@@ -9,6 +9,7 @@
#include "lj_obj.h"
#include "lj_gc.h"
#include "lj_err.h"
+#include "lj_buf.h"
#include "lj_str.h"
#include "lj_tab.h"
#include "lj_bc.h"
@@ -20,6 +21,7 @@
#include "lj_lex.h"
#include "lj_bcdump.h"
#include "lj_state.h"
+#include "lj_strfmt.h"
/* Reuse some lexer fields for our own purposes. */
#define bcread_flags(ls) ls->level
@@ -38,85 +40,74 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em)
const char *name = ls->chunkarg;
if (*name == BCDUMP_HEAD1) name = "(binary)";
else if (*name == '@' || *name == '=') name++;
- lj_str_pushf(L, "%s: %s", name, err2msg(em));
+ lj_strfmt_pushf(L, "%s: %s", name, err2msg(em));
lj_err_throw(L, LUA_ERRSYNTAX);
}
-/* Resize input buffer. */
-static void bcread_resize(LexState *ls, MSize len)
-{
- if (ls->sb.sz < len) {
- MSize sz = ls->sb.sz * 2;
- while (len > sz) sz = sz * 2;
- lj_str_resizebuf(ls->L, &ls->sb, sz);
- /* Caveat: this may change ls->sb.buf which may affect ls->p. */
- }
-}
-
-/* Refill buffer if needed. */
+/* Refill buffer. */
static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need)
{
- lua_assert(len != 0);
- if (len > LJ_MAX_MEM || ls->current < 0)
+ lj_assertLS(len != 0, "empty refill");
+ if (len > LJ_MAX_BUF || ls->c < 0)
bcread_error(ls, LJ_ERR_BCBAD);
do {
const char *buf;
- size_t size;
- if (ls->n) { /* Copy remainder to buffer. */
- if (ls->sb.n) { /* Move down in buffer. */
- lua_assert(ls->p + ls->n == ls->sb.buf + ls->sb.n);
- if (ls->n != ls->sb.n)
- memmove(ls->sb.buf, ls->p, ls->n);
+ size_t sz;
+ char *p = ls->sb.b;
+ MSize n = (MSize)(ls->pe - ls->p);
+ if (n) { /* Copy remainder to buffer. */
+ if (sbuflen(&ls->sb)) { /* Move down in buffer. */
+ lj_assertLS(ls->pe == ls->sb.w, "bad buffer pointer");
+ if (ls->p != p) memmove(p, ls->p, n);
} else { /* Copy from buffer provided by reader. */
- bcread_resize(ls, len);
- memcpy(ls->sb.buf, ls->p, ls->n);
+ p = lj_buf_need(&ls->sb, len);
+ memcpy(p, ls->p, n);
}
- ls->p = ls->sb.buf;
+ ls->p = p;
+ ls->pe = p + n;
}
- ls->sb.n = ls->n;
- buf = ls->rfunc(ls->L, ls->rdata, &size); /* Get more data from reader. */
- if (buf == NULL || size == 0) { /* EOF? */
+ ls->sb.w = p + n;
+ buf = ls->rfunc(ls->L, ls->rdata, &sz); /* Get more data from reader. */
+ if (buf == NULL || sz == 0) { /* EOF? */
if (need) bcread_error(ls, LJ_ERR_BCBAD);
- ls->current = -1; /* Only bad if we get called again. */
+ ls->c = -1; /* Only bad if we get called again. */
break;
}
- if (size >= LJ_MAX_MEM - ls->sb.n) lj_err_mem(ls->L);
- if (ls->sb.n) { /* Append to buffer. */
- MSize n = ls->sb.n + (MSize)size;
- bcread_resize(ls, n < len ? len : n);
- memcpy(ls->sb.buf + ls->sb.n, buf, size);
- ls->n = ls->sb.n = n;
- ls->p = ls->sb.buf;
+ if (sz >= LJ_MAX_BUF - n) lj_err_mem(ls->L);
+ if (n) { /* Append to buffer. */
+ n += (MSize)sz;
+ p = lj_buf_need(&ls->sb, n < len ? len : n);
+ memcpy(ls->sb.w, buf, sz);
+ ls->sb.w = p + n;
+ ls->p = p;
+ ls->pe = p + n;
} else { /* Return buffer provided by reader. */
- ls->n = (MSize)size;
ls->p = buf;
+ ls->pe = buf + sz;
}
- } while (ls->n < len);
+ } while ((MSize)(ls->pe - ls->p) < len);
}
/* Need a certain number of bytes. */
static LJ_AINLINE void bcread_need(LexState *ls, MSize len)
{
- if (LJ_UNLIKELY(ls->n < len))
+ if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
bcread_fill(ls, len, 1);
}
/* Want to read up to a certain number of bytes, but may need less. */
static LJ_AINLINE void bcread_want(LexState *ls, MSize len)
{
- if (LJ_UNLIKELY(ls->n < len))
+ if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len))
bcread_fill(ls, len, 0);
}
-#define bcread_dec(ls) check_exp(ls->n > 0, ls->n--)
-#define bcread_consume(ls, len) check_exp(ls->n >= (len), ls->n -= (len))
-
/* Return memory block from buffer. */
-static uint8_t *bcread_mem(LexState *ls, MSize len)
+static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len)
{
uint8_t *p = (uint8_t *)ls->p;
- bcread_consume(ls, len);
- ls->p = (char *)p + len;
+ ls->p += len;
+ lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
return p;
}
@@ -129,25 +120,15 @@ static void bcread_block(LexState *ls, void *q, MSize len)
/* Read byte from buffer. */
static LJ_AINLINE uint32_t bcread_byte(LexState *ls)
{
- bcread_dec(ls);
+ lj_assertLS(ls->p < ls->pe, "buffer read overflow");
return (uint32_t)(uint8_t)*ls->p++;
}
/* Read ULEB128 value from buffer. */
-static uint32_t bcread_uleb128(LexState *ls)
+static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls)
{
- const uint8_t *p = (const uint8_t *)ls->p;
- uint32_t v = *p++;
- if (LJ_UNLIKELY(v >= 0x80)) {
- int sh = 0;
- v &= 0x7f;
- do {
- v |= ((*p & 0x7f) << (sh += 7));
- bcread_dec(ls);
- } while (*p++ >= 0x80);
- }
- bcread_dec(ls);
- ls->p = (char *)p;
+ uint32_t v = lj_buf_ruleb128(&ls->p);
+ lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
return v;
}
@@ -161,11 +142,10 @@ static uint32_t bcread_uleb128_33(LexState *ls)
v &= 0x3f;
do {
v |= ((*p & 0x7f) << (sh += 7));
- bcread_dec(ls);
} while (*p++ >= 0x80);
}
- bcread_dec(ls);
ls->p = (char *)p;
+ lj_assertLS(ls->p <= ls->pe, "buffer read overflow");
return v;
}
@@ -212,8 +192,8 @@ static void bcread_ktabk(LexState *ls, TValue *o)
o->u32.lo = bcread_uleb128(ls);
o->u32.hi = bcread_uleb128(ls);
} else {
- lua_assert(tp <= BCDUMP_KTAB_TRUE);
- setitype(o, ~tp);
+ lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp);
+ setpriV(o, ~tp);
}
}
@@ -234,7 +214,7 @@ static GCtab *bcread_ktab(LexState *ls)
for (i = 0; i < nhash; i++) {
TValue key;
bcread_ktabk(ls, &key);
- lua_assert(!tvisnil(&key));
+ lj_assertLS(!tvisnil(&key), "nil key");
bcread_ktabk(ls, lj_tab_set(ls->L, t, &key));
}
}
@@ -271,7 +251,7 @@ static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc)
#endif
} else {
lua_State *L = ls->L;
- lua_assert(tp == BCDUMP_KGC_CHILD);
+ lj_assertLS(tp == BCDUMP_KGC_CHILD, "bad constant type %d", tp);
if (L->top <= bcread_oldtop(L, ls)) /* Stack underflow? */
bcread_error(ls, LJ_ERR_BCBAD);
L->top--;
@@ -327,25 +307,13 @@ static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv)
}
/* Read a prototype. */
-static GCproto *bcread_proto(LexState *ls)
+GCproto *lj_bcread_proto(LexState *ls)
{
GCproto *pt;
MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept;
MSize ofsk, ofsuv, ofsdbg;
MSize sizedbg = 0;
BCLine firstline = 0, numline = 0;
- MSize len, startn;
-
- /* Read length. */
- if (ls->n > 0 && ls->p[0] == 0) { /* Shortcut EOF. */
- ls->n--; ls->p++;
- return NULL;
- }
- bcread_want(ls, 5);
- len = bcread_uleb128(ls);
- if (!len) return NULL; /* EOF */
- bcread_need(ls, len);
- startn = ls->n;
/* Read prototype header. */
flags = bcread_byte(ls);
@@ -414,9 +382,6 @@ static GCproto *bcread_proto(LexState *ls)
setmref(pt->uvinfo, NULL);
setmref(pt->varinfo, NULL);
}
-
- if (len != startn - ls->n)
- bcread_error(ls, LJ_ERR_BCBAD);
return pt;
}
@@ -430,14 +395,11 @@ static int bcread_header(LexState *ls)
bcread_byte(ls) != BCDUMP_VERSION) return 0;
bcread_flags(ls) = flags = bcread_uleb128(ls);
if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0;
+ if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0;
if ((flags & BCDUMP_F_FFI)) {
#if LJ_HASFFI
lua_State *L = ls->L;
- if (!ctype_ctsG(G(L))) {
- ptrdiff_t oldtop = savestack(L, L->top);
- luaopen_ffi(L); /* Load FFI library on-demand. */
- L->top = restorestack(L, oldtop);
- }
+ ctype_loadffi(L);
#else
return 0;
#endif
@@ -456,19 +418,33 @@ static int bcread_header(LexState *ls)
GCproto *lj_bcread(LexState *ls)
{
lua_State *L = ls->L;
- lua_assert(ls->current == BCDUMP_HEAD1);
+ lj_assertLS(ls->c == BCDUMP_HEAD1, "bad bytecode header");
bcread_savetop(L, ls, L->top);
- lj_str_resetbuf(&ls->sb);
+ lj_buf_reset(&ls->sb);
/* Check for a valid bytecode dump header. */
if (!bcread_header(ls))
bcread_error(ls, LJ_ERR_BCFMT);
for (;;) { /* Process all prototypes in the bytecode dump. */
- GCproto *pt = bcread_proto(ls);
- if (!pt) break;
+ GCproto *pt;
+ MSize len;
+ const char *startp;
+ /* Read length. */
+ if (ls->p < ls->pe && ls->p[0] == 0) { /* Shortcut EOF. */
+ ls->p++;
+ break;
+ }
+ bcread_want(ls, 5);
+ len = bcread_uleb128(ls);
+ if (!len) break; /* EOF */
+ bcread_need(ls, len);
+ startp = ls->p;
+ pt = lj_bcread_proto(ls);
+ if (ls->p != startp + len)
+ bcread_error(ls, LJ_ERR_BCBAD);
setprotoV(L, L->top, pt);
incr_top(L);
}
- if ((ls->n && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls))
+ if ((ls->pe != ls->p && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls))
bcread_error(ls, LJ_ERR_BCBAD);
/* Pop off last prototype. */
L->top--;
diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c
index d836497e..2c70ff47 100644
--- a/src/lj_bcwrite.c
+++ b/src/lj_bcwrite.c
@@ -8,7 +8,7 @@
#include "lj_obj.h"
#include "lj_gc.h"
-#include "lj_str.h"
+#include "lj_buf.h"
#include "lj_bc.h"
#if LJ_HASFFI
#include "lj_ctype.h"
@@ -17,99 +17,67 @@
#include "lj_dispatch.h"
#include "lj_jit.h"
#endif
+#include "lj_strfmt.h"
#include "lj_bcdump.h"
#include "lj_vm.h"
/* Context for bytecode writer. */
typedef struct BCWriteCtx {
SBuf sb; /* Output buffer. */
- lua_State *L; /* Lua state. */
GCproto *pt; /* Root prototype. */
lua_Writer wfunc; /* Writer callback. */
void *wdata; /* Writer callback data. */
int strip; /* Strip debug info. */
int status; /* Status from writer callback. */
+#ifdef LUA_USE_ASSERT
+ global_State *g;
+#endif
} BCWriteCtx;
-/* -- Output buffer handling ---------------------------------------------- */
-
-/* Resize buffer if needed. */
-static LJ_NOINLINE void bcwrite_resize(BCWriteCtx *ctx, MSize len)
-{
- MSize sz = ctx->sb.sz * 2;
- while (ctx->sb.n + len > sz) sz = sz * 2;
- lj_str_resizebuf(ctx->L, &ctx->sb, sz);
-}
-
-/* Need a certain amount of buffer space. */
-static LJ_AINLINE void bcwrite_need(BCWriteCtx *ctx, MSize len)
-{
- if (LJ_UNLIKELY(ctx->sb.n + len > ctx->sb.sz))
- bcwrite_resize(ctx, len);
-}
-
-/* Add memory block to buffer. */
-static void bcwrite_block(BCWriteCtx *ctx, const void *p, MSize len)
-{
- uint8_t *q = (uint8_t *)(ctx->sb.buf + ctx->sb.n);
- MSize i;
- ctx->sb.n += len;
- for (i = 0; i < len; i++) q[i] = ((uint8_t *)p)[i];
-}
-
-/* Add byte to buffer. */
-static LJ_AINLINE void bcwrite_byte(BCWriteCtx *ctx, uint8_t b)
-{
- ctx->sb.buf[ctx->sb.n++] = b;
-}
-
-/* Add ULEB128 value to buffer. */
-static void bcwrite_uleb128(BCWriteCtx *ctx, uint32_t v)
-{
- MSize n = ctx->sb.n;
- uint8_t *p = (uint8_t *)ctx->sb.buf;
- for (; v >= 0x80; v >>= 7)
- p[n++] = (uint8_t)((v & 0x7f) | 0x80);
- p[n++] = (uint8_t)v;
- ctx->sb.n = n;
-}
+#ifdef LUA_USE_ASSERT
+#define lj_assertBCW(c, ...) lj_assertG_(ctx->g, (c), __VA_ARGS__)
+#else
+#define lj_assertBCW(c, ...) ((void)ctx)
+#endif
/* -- Bytecode writer ----------------------------------------------------- */
/* Write a single constant key/value of a template table. */
static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
{
- bcwrite_need(ctx, 1+10);
+ char *p = lj_buf_more(&ctx->sb, 1+10);
if (tvisstr(o)) {
const GCstr *str = strV(o);
MSize len = str->len;
- bcwrite_need(ctx, 5+len);
- bcwrite_uleb128(ctx, BCDUMP_KTAB_STR+len);
- bcwrite_block(ctx, strdata(str), len);
+ p = lj_buf_more(&ctx->sb, 5+len);
+ p = lj_strfmt_wuleb128(p, BCDUMP_KTAB_STR+len);
+ p = lj_buf_wmem(p, strdata(str), len);
} else if (tvisint(o)) {
- bcwrite_byte(ctx, BCDUMP_KTAB_INT);
- bcwrite_uleb128(ctx, intV(o));
+ *p++ = BCDUMP_KTAB_INT;
+ p = lj_strfmt_wuleb128(p, intV(o));
} else if (tvisnum(o)) {
if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */
lua_Number num = numV(o);
int32_t k = lj_num2int(num);
if (num == (lua_Number)k) { /* -0 is never a constant. */
- bcwrite_byte(ctx, BCDUMP_KTAB_INT);
- bcwrite_uleb128(ctx, k);
+ *p++ = BCDUMP_KTAB_INT;
+ p = lj_strfmt_wuleb128(p, k);
+ ctx->sb.w = p;
return;
}
}
- bcwrite_byte(ctx, BCDUMP_KTAB_NUM);
- bcwrite_uleb128(ctx, o->u32.lo);
- bcwrite_uleb128(ctx, o->u32.hi);
+ *p++ = BCDUMP_KTAB_NUM;
+ p = lj_strfmt_wuleb128(p, o->u32.lo);
+ p = lj_strfmt_wuleb128(p, o->u32.hi);
} else {
- lua_assert(tvispri(o));
- bcwrite_byte(ctx, BCDUMP_KTAB_NIL+~itype(o));
+ lj_assertBCW(tvispri(o), "unhandled type %d", itype(o));
+ *p++ = BCDUMP_KTAB_NIL+~itype(o);
}
+ ctx->sb.w = p;
}
/* Write a template table. */
-static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t)
+static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t)
{
MSize narray = 0, nhash = 0;
if (t->asize > 0) { /* Determine max. length of array part. */
@@ -127,8 +95,9 @@ static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t)
nhash += !tvisnil(&node[i].val);
}
/* Write number of array slots and hash slots. */
- bcwrite_uleb128(ctx, narray);
- bcwrite_uleb128(ctx, nhash);
+ p = lj_strfmt_wuleb128(p, narray);
+ p = lj_strfmt_wuleb128(p, nhash);
+ ctx->sb.w = p;
if (narray) { /* Write array entries (may contain nil). */
MSize i;
TValue *o = tvref(t->array);
@@ -155,12 +124,13 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
for (i = 0; i < sizekgc; i++, kr++) {
GCobj *o = gcref(*kr);
MSize tp, need = 1;
+ char *p;
/* Determine constant type and needed size. */
if (o->gch.gct == ~LJ_TSTR) {
tp = BCDUMP_KGC_STR + gco2str(o)->len;
need = 5+gco2str(o)->len;
} else if (o->gch.gct == ~LJ_TPROTO) {
- lua_assert((pt->flags & PROTO_CHILD));
+ lj_assertBCW((pt->flags & PROTO_CHILD), "prototype has unexpected child");
tp = BCDUMP_KGC_CHILD;
#if LJ_HASFFI
} else if (o->gch.gct == ~LJ_TCDATA) {
@@ -171,34 +141,38 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt)
} else if (id == CTID_UINT64) {
tp = BCDUMP_KGC_U64;
} else {
- lua_assert(id == CTID_COMPLEX_DOUBLE);
+ lj_assertBCW(id == CTID_COMPLEX_DOUBLE,
+ "bad cdata constant CTID %d", id);
tp = BCDUMP_KGC_COMPLEX;
}
#endif
} else {
- lua_assert(o->gch.gct == ~LJ_TTAB);
+ lj_assertBCW(o->gch.gct == ~LJ_TTAB,
+ "bad constant GC type %d", o->gch.gct);
tp = BCDUMP_KGC_TAB;
need = 1+2*5;
}
/* Write constant type. */
- bcwrite_need(ctx, need);
- bcwrite_uleb128(ctx, tp);
+ p = lj_buf_more(&ctx->sb, need);
+ p = lj_strfmt_wuleb128(p, tp);
/* Write constant data (if any). */
if (tp >= BCDUMP_KGC_STR) {
- bcwrite_block(ctx, strdata(gco2str(o)), gco2str(o)->len);
+ p = lj_buf_wmem(p, strdata(gco2str(o)), gco2str(o)->len);
} else if (tp == BCDUMP_KGC_TAB) {
- bcwrite_ktab(ctx, gco2tab(o));
+ bcwrite_ktab(ctx, p, gco2tab(o));
+ continue;
#if LJ_HASFFI
} else if (tp != BCDUMP_KGC_CHILD) {
- cTValue *p = (TValue *)cdataptr(gco2cd(o));
- bcwrite_uleb128(ctx, p[0].u32.lo);
- bcwrite_uleb128(ctx, p[0].u32.hi);
+ cTValue *q = (TValue *)cdataptr(gco2cd(o));
+ p = lj_strfmt_wuleb128(p, q[0].u32.lo);
+ p = lj_strfmt_wuleb128(p, q[0].u32.hi);
if (tp == BCDUMP_KGC_COMPLEX) {
- bcwrite_uleb128(ctx, p[1].u32.lo);
- bcwrite_uleb128(ctx, p[1].u32.hi);
+ p = lj_strfmt_wuleb128(p, q[1].u32.lo);
+ p = lj_strfmt_wuleb128(p, q[1].u32.hi);
}
#endif
}
+ ctx->sb.w = p;
}
}
@@ -207,7 +181,7 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
{
MSize i, sizekn = pt->sizekn;
cTValue *o = mref(pt->k, TValue);
- bcwrite_need(ctx, 10*sizekn);
+ char *p = lj_buf_more(&ctx->sb, 10*sizekn);
for (i = 0; i < sizekn; i++, o++) {
int32_t k;
if (tvisint(o)) {
@@ -220,55 +194,55 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
k = lj_num2int(num);
if (num == (lua_Number)k) { /* -0 is never a constant. */
save_int:
- bcwrite_uleb128(ctx, 2*(uint32_t)k | ((uint32_t)k & 0x80000000u));
- if (k < 0) {
- char *p = &ctx->sb.buf[ctx->sb.n-1];
- *p = (*p & 7) | ((k>>27) & 0x18);
- }
+ p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u));
+ if (k < 0)
+ p[-1] = (p[-1] & 7) | ((k>>27) & 0x18);
continue;
}
}
- bcwrite_uleb128(ctx, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u)));
- if (o->u32.lo >= 0x80000000u) {
- char *p = &ctx->sb.buf[ctx->sb.n-1];
- *p = (*p & 7) | ((o->u32.lo>>27) & 0x18);
- }
- bcwrite_uleb128(ctx, o->u32.hi);
+ p = lj_strfmt_wuleb128(p, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u)));
+ if (o->u32.lo >= 0x80000000u)
+ p[-1] = (p[-1] & 7) | ((o->u32.lo>>27) & 0x18);
+ p = lj_strfmt_wuleb128(p, o->u32.hi);
}
}
+ ctx->sb.w = p;
}
/* Write bytecode instructions. */
-static void bcwrite_bytecode(BCWriteCtx *ctx, GCproto *pt)
+static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt)
{
MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */
#if LJ_HASJIT
- uint8_t *p = (uint8_t *)&ctx->sb.buf[ctx->sb.n];
+ uint8_t *q = (uint8_t *)p;
#endif
- bcwrite_block(ctx, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns));
+ p = lj_buf_wmem(p, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns));
+ UNUSED(ctx);
#if LJ_HASJIT
/* Unpatch modified bytecode containing ILOOP/JLOOP etc. */
if ((pt->flags & PROTO_ILOOP) || pt->trace) {
- jit_State *J = L2J(ctx->L);
+ jit_State *J = L2J(sbufL(&ctx->sb));
MSize i;
- for (i = 0; i < nbc; i++, p += sizeof(BCIns)) {
- BCOp op = (BCOp)p[LJ_ENDIAN_SELECT(0, 3)];
+ for (i = 0; i < nbc; i++, q += sizeof(BCIns)) {
+ BCOp op = (BCOp)q[LJ_ENDIAN_SELECT(0, 3)];
if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP ||
op == BC_JFORI) {
- p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL);
+ q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL);
} else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
- BCReg rd = p[LJ_ENDIAN_SELECT(2, 1)] + (p[LJ_ENDIAN_SELECT(3, 0)] << 8);
- memcpy(p, &traceref(J, rd)->startins, 4);
+ BCReg rd = q[LJ_ENDIAN_SELECT(2, 1)] + (q[LJ_ENDIAN_SELECT(3, 0)] << 8);
+ memcpy(q, &traceref(J, rd)->startins, 4);
}
}
}
#endif
+ return p;
}
/* Write prototype. */
static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
{
MSize sizedbg = 0;
+ char *p;
/* Recursively write children of prototype. */
if ((pt->flags & PROTO_CHILD)) {
@@ -282,31 +256,32 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
}
/* Start writing the prototype info to a buffer. */
- lj_str_resetbuf(&ctx->sb);
- ctx->sb.n = 5; /* Leave room for final size. */
- bcwrite_need(ctx, 4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2);
+ p = lj_buf_need(&ctx->sb,
+ 5+4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2);
+ p += 5; /* Leave room for final size. */
/* Write prototype header. */
- bcwrite_byte(ctx, (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI)));
- bcwrite_byte(ctx, pt->numparams);
- bcwrite_byte(ctx, pt->framesize);
- bcwrite_byte(ctx, pt->sizeuv);
- bcwrite_uleb128(ctx, pt->sizekgc);
- bcwrite_uleb128(ctx, pt->sizekn);
- bcwrite_uleb128(ctx, pt->sizebc-1);
+ *p++ = (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI));
+ *p++ = pt->numparams;
+ *p++ = pt->framesize;
+ *p++ = pt->sizeuv;
+ p = lj_strfmt_wuleb128(p, pt->sizekgc);
+ p = lj_strfmt_wuleb128(p, pt->sizekn);
+ p = lj_strfmt_wuleb128(p, pt->sizebc-1);
if (!ctx->strip) {
if (proto_lineinfo(pt))
sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt);
- bcwrite_uleb128(ctx, sizedbg);
+ p = lj_strfmt_wuleb128(p, sizedbg);
if (sizedbg) {
- bcwrite_uleb128(ctx, pt->firstline);
- bcwrite_uleb128(ctx, pt->numline);
+ p = lj_strfmt_wuleb128(p, pt->firstline);
+ p = lj_strfmt_wuleb128(p, pt->numline);
}
}
/* Write bytecode instructions and upvalue refs. */
- bcwrite_bytecode(ctx, pt);
- bcwrite_block(ctx, proto_uv(pt), pt->sizeuv*2);
+ p = bcwrite_bytecode(ctx, p, pt);
+ p = lj_buf_wmem(p, proto_uv(pt), pt->sizeuv*2);
+ ctx->sb.w = p;
/* Write constants. */
bcwrite_kgc(ctx, pt);
@@ -314,18 +289,19 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt)
/* Write debug info, if not stripped. */
if (sizedbg) {
- bcwrite_need(ctx, sizedbg);
- bcwrite_block(ctx, proto_lineinfo(pt), sizedbg);
+ p = lj_buf_more(&ctx->sb, sizedbg);
+ p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg);
+ ctx->sb.w = p;
}
/* Pass buffer to writer function. */
if (ctx->status == 0) {
- MSize n = ctx->sb.n - 5;
+ MSize n = sbuflen(&ctx->sb) - 5;
MSize nn = (lj_fls(n)+8)*9 >> 6;
- ctx->sb.n = 5 - nn;
- bcwrite_uleb128(ctx, n); /* Fill in final size. */
- lua_assert(ctx->sb.n == 5);
- ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf+5-nn, nn+n, ctx->wdata);
+ char *q = ctx->sb.b + (5 - nn);
+ p = lj_strfmt_wuleb128(q, n); /* Fill in final size. */
+ lj_assertBCW(p == ctx->sb.b + 5, "bad ULEB128 write");
+ ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata);
}
}
@@ -335,20 +311,21 @@ static void bcwrite_header(BCWriteCtx *ctx)
GCstr *chunkname = proto_chunkname(ctx->pt);
const char *name = strdata(chunkname);
MSize len = chunkname->len;
- lj_str_resetbuf(&ctx->sb);
- bcwrite_need(ctx, 5+5+len);
- bcwrite_byte(ctx, BCDUMP_HEAD1);
- bcwrite_byte(ctx, BCDUMP_HEAD2);
- bcwrite_byte(ctx, BCDUMP_HEAD3);
- bcwrite_byte(ctx, BCDUMP_VERSION);
- bcwrite_byte(ctx, (ctx->strip ? BCDUMP_F_STRIP : 0) +
- (LJ_BE ? BCDUMP_F_BE : 0) +
- ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0));
+ char *p = lj_buf_need(&ctx->sb, 5+5+len);
+ *p++ = BCDUMP_HEAD1;
+ *p++ = BCDUMP_HEAD2;
+ *p++ = BCDUMP_HEAD3;
+ *p++ = BCDUMP_VERSION;
+ *p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) +
+ LJ_BE*BCDUMP_F_BE +
+ ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) +
+ LJ_FR2*BCDUMP_F_FR2;
if (!ctx->strip) {
- bcwrite_uleb128(ctx, len);
- bcwrite_block(ctx, name, len);
+ p = lj_strfmt_wuleb128(p, len);
+ p = lj_buf_wmem(p, name, len);
}
- ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf, ctx->sb.n, ctx->wdata);
+ ctx->status = ctx->wfunc(sbufL(&ctx->sb), ctx->sb.b,
+ (MSize)(p - ctx->sb.b), ctx->wdata);
}
/* Write footer of bytecode dump. */
@@ -356,7 +333,7 @@ static void bcwrite_footer(BCWriteCtx *ctx)
{
if (ctx->status == 0) {
uint8_t zero = 0;
- ctx->status = ctx->wfunc(ctx->L, &zero, 1, ctx->wdata);
+ ctx->status = ctx->wfunc(sbufL(&ctx->sb), &zero, 1, ctx->wdata);
}
}
@@ -364,8 +341,8 @@ static void bcwrite_footer(BCWriteCtx *ctx)
static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud)
{
BCWriteCtx *ctx = (BCWriteCtx *)ud;
- UNUSED(dummy);
- lj_str_resizebuf(L, &ctx->sb, 1024); /* Avoids resize for most prototypes. */
+ UNUSED(L); UNUSED(dummy);
+ lj_buf_need(&ctx->sb, 1024); /* Avoids resize for most prototypes. */
bcwrite_header(ctx);
bcwrite_proto(ctx, ctx->pt);
bcwrite_footer(ctx);
@@ -378,16 +355,18 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data,
{
BCWriteCtx ctx;
int status;
- ctx.L = L;
ctx.pt = pt;
ctx.wfunc = writer;
ctx.wdata = data;
ctx.strip = strip;
ctx.status = 0;
- lj_str_initbuf(&ctx.sb);
+#ifdef LUA_USE_ASSERT
+ ctx.g = G(L);
+#endif
+ lj_buf_init(L, &ctx.sb);
status = lj_vm_cpcall(L, NULL, &ctx, cpwriter);
if (status == 0) status = ctx.status;
- lj_str_freebuf(G(ctx.L), &ctx.sb);
+ lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb);
return status;
}
diff --git a/src/lj_buf.c b/src/lj_buf.c
new file mode 100644
index 00000000..cf268af2
--- /dev/null
+++ b/src/lj_buf.c
@@ -0,0 +1,305 @@
+/*
+** Buffer handling.
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_buf_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_strfmt.h"
+
+/* -- Buffer management --------------------------------------------------- */
+
+static void buf_grow(SBuf *sb, MSize sz)
+{
+ MSize osz = sbufsz(sb), len = sbuflen(sb), nsz = osz;
+ char *b;
+ GCSize flag;
+ if (nsz < LJ_MIN_SBUF) nsz = LJ_MIN_SBUF;
+ while (nsz < sz) nsz += nsz;
+ flag = sbufflag(sb);
+ if ((flag & SBUF_FLAG_COW)) { /* Copy-on-write semantics. */
+ lj_assertG_(G(sbufL(sb)), sb->w == sb->e, "bad SBuf COW");
+ b = (char *)lj_mem_new(sbufL(sb), nsz);
+ setsbufflag(sb, flag & ~(GCSize)SBUF_FLAG_COW);
+ setgcrefnull(sbufX(sb)->cowref);
+ memcpy(b, sb->b, osz);
+ } else {
+ b = (char *)lj_mem_realloc(sbufL(sb), sb->b, osz, nsz);
+ }
+ if ((flag & SBUF_FLAG_EXT)) {
+ sbufX(sb)->r = sbufX(sb)->r - sb->b + b; /* Adjust read pointer, too. */
+ }
+ /* Adjust buffer pointers. */
+ sb->b = b;
+ sb->w = b + len;
+ sb->e = b + nsz;
+ if ((flag & SBUF_FLAG_BORROW)) { /* Adjust borrowed buffer pointers. */
+ SBuf *bsb = mref(sbufX(sb)->bsb, SBuf);
+ bsb->b = b;
+ bsb->w = b + len;
+ bsb->e = b + nsz;
+ }
+}
+
+LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz)
+{
+ lj_assertG_(G(sbufL(sb)), sz > sbufsz(sb), "SBuf overflow");
+ if (LJ_UNLIKELY(sz > LJ_MAX_BUF))
+ lj_err_mem(sbufL(sb));
+ buf_grow(sb, sz);
+ return sb->b;
+}
+
+LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz)
+{
+ if (sbufisext(sb)) {
+ SBufExt *sbx = (SBufExt *)sb;
+ MSize len = sbufxlen(sbx);
+ if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
+ lj_err_mem(sbufL(sbx));
+ if (len + sz > sbufsz(sbx)) { /* Must grow. */
+ buf_grow((SBuf *)sbx, len + sz);
+ } else if (sbufiscow(sb) || sbufxslack(sbx) < (sbufsz(sbx) >> 3)) {
+ /* Also grow to avoid excessive compactions, if slack < size/8. */
+ buf_grow((SBuf *)sbx, sbuflen(sbx) + sz); /* Not sbufxlen! */
+ return sbx->w;
+ }
+ if (sbx->r != sbx->b) { /* Compact by moving down. */
+ memmove(sbx->b, sbx->r, len);
+ sbx->r = sbx->b;
+ sbx->w = sbx->b + len;
+ lj_assertG_(G(sbufL(sbx)), len + sz <= sbufsz(sbx), "bad SBuf compact");
+ }
+ } else {
+ MSize len = sbuflen(sb);
+ lj_assertG_(G(sbufL(sb)), sz > sbufleft(sb), "SBuf overflow");
+ if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF))
+ lj_err_mem(sbufL(sb));
+ buf_grow(sb, len + sz);
+ }
+ return sb->w;
+}
+
+void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb)
+{
+ char *b = sb->b;
+ MSize osz = (MSize)(sb->e - b);
+ if (osz > 2*LJ_MIN_SBUF) {
+ MSize n = (MSize)(sb->w - b);
+ b = lj_mem_realloc(L, b, osz, (osz >> 1));
+ sb->b = b;
+ sb->w = b + n;
+ sb->e = b + (osz >> 1);
+ }
+ lj_assertG_(G(sbufL(sb)), !sbufisext(sb), "YAGNI shrink SBufExt");
+}
+
+char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz)
+{
+ SBuf *sb = &G(L)->tmpbuf;
+ setsbufL(sb, L);
+ return lj_buf_need(sb, sz);
+}
+
+#if LJ_HASBUFFER && LJ_HASJIT
+void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *ref)
+{
+ lua_State *L = sbufL(sbx);
+ lj_bufx_free(L, sbx);
+ lj_bufx_set_cow(L, sbx, p, len);
+ setgcref(sbx->cowref, ref);
+ lj_gc_objbarrier(L, (GCudata *)sbx - 1, ref);
+}
+
+#if LJ_HASFFI
+MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz)
+{
+ lj_buf_more((SBuf *)sbx, sz);
+ return sbufleft(sbx);
+}
+#endif
+#endif
+
+/* -- Low-level buffer put operations ------------------------------------- */
+
+SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len)
+{
+ char *w = lj_buf_more(sb, len);
+ w = lj_buf_wmem(w, q, len);
+ sb->w = w;
+ return sb;
+}
+
+#if LJ_HASJIT || LJ_HASFFI
+static LJ_NOINLINE SBuf * LJ_FASTCALL lj_buf_putchar2(SBuf *sb, int c)
+{
+ char *w = lj_buf_more2(sb, 1);
+ *w++ = (char)c;
+ sb->w = w;
+ return sb;
+}
+
+SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c)
+{
+ char *w = sb->w;
+ if (LJ_LIKELY(w < sb->e)) {
+ *w++ = (char)c;
+ sb->w = w;
+ return sb;
+ }
+ return lj_buf_putchar2(sb, c);
+}
+#endif
+
+SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s)
+{
+ MSize len = s->len;
+ char *w = lj_buf_more(sb, len);
+ w = lj_buf_wmem(w, strdata(s), len);
+ sb->w = w;
+ return sb;
+}
+
+/* -- High-level buffer put operations ------------------------------------ */
+
+SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s)
+{
+ MSize len = s->len;
+ char *w = lj_buf_more(sb, len), *e = w+len;
+ const char *q = strdata(s)+len-1;
+ while (w < e)
+ *w++ = *q--;
+ sb->w = w;
+ return sb;
+}
+
+SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s)
+{
+ MSize len = s->len;
+ char *w = lj_buf_more(sb, len), *e = w+len;
+ const char *q = strdata(s);
+ for (; w < e; w++, q++) {
+ uint32_t c = *(unsigned char *)q;
+#if LJ_TARGET_PPC
+ *w = c + ((c >= 'A' && c <= 'Z') << 5);
+#else
+ if (c >= 'A' && c <= 'Z') c += 0x20;
+ *w = c;
+#endif
+ }
+ sb->w = w;
+ return sb;
+}
+
+SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s)
+{
+ MSize len = s->len;
+ char *w = lj_buf_more(sb, len), *e = w+len;
+ const char *q = strdata(s);
+ for (; w < e; w++, q++) {
+ uint32_t c = *(unsigned char *)q;
+#if LJ_TARGET_PPC
+ *w = c - ((c >= 'a' && c <= 'z') << 5);
+#else
+ if (c >= 'a' && c <= 'z') c -= 0x20;
+ *w = c;
+#endif
+ }
+ sb->w = w;
+ return sb;
+}
+
+SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep)
+{
+ MSize len = s->len;
+ if (rep > 0 && len) {
+ uint64_t tlen = (uint64_t)rep * len;
+ char *w;
+ if (LJ_UNLIKELY(tlen > LJ_MAX_STR))
+ lj_err_mem(sbufL(sb));
+ w = lj_buf_more(sb, (MSize)tlen);
+ if (len == 1) { /* Optimize a common case. */
+ uint32_t c = strdata(s)[0];
+ do { *w++ = c; } while (--rep > 0);
+ } else {
+ const char *e = strdata(s) + len;
+ do {
+ const char *q = strdata(s);
+ do { *w++ = *q++; } while (q < e);
+ } while (--rep > 0);
+ }
+ sb->w = w;
+ }
+ return sb;
+}
+
+SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e)
+{
+ MSize seplen = sep ? sep->len : 0;
+ if (i <= e) {
+ for (;;) {
+ cTValue *o = lj_tab_getint(t, i);
+ char *w;
+ if (!o) {
+ badtype: /* Error: bad element type. */
+ sb->w = (char *)(intptr_t)i; /* Store failing index. */
+ return NULL;
+ } else if (tvisstr(o)) {
+ MSize len = strV(o)->len;
+ w = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len);
+ } else if (tvisint(o)) {
+ w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o));
+ } else if (tvisnum(o)) {
+ w = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen);
+ } else {
+ goto badtype;
+ }
+ if (i++ == e) {
+ sb->w = w;
+ break;
+ }
+ if (seplen) w = lj_buf_wmem(w, strdata(sep), seplen);
+ sb->w = w;
+ }
+ }
+ return sb;
+}
+
+/* -- Miscellaneous buffer operations ------------------------------------- */
+
+GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb)
+{
+ return lj_str_new(sbufL(sb), sb->b, sbuflen(sb));
+}
+
+/* Concatenate two strings. */
+GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2)
+{
+ MSize len1 = s1->len, len2 = s2->len;
+ char *buf = lj_buf_tmp(L, len1 + len2);
+ memcpy(buf, strdata(s1), len1);
+ memcpy(buf+len1, strdata(s2), len2);
+ return lj_str_new(L, buf, len1 + len2);
+}
+
+/* Read ULEB128 from buffer. */
+uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp)
+{
+ const uint8_t *w = (const uint8_t *)*pp;
+ uint32_t v = *w++;
+ if (LJ_UNLIKELY(v >= 0x80)) {
+ int sh = 0;
+ v &= 0x7f;
+ do { v |= ((*w & 0x7f) << (sh += 7)); } while (*w++ >= 0x80);
+ }
+ *pp = (const char *)w;
+ return v;
+}
+
diff --git a/src/lj_buf.h b/src/lj_buf.h
new file mode 100644
index 00000000..76114201
--- /dev/null
+++ b/src/lj_buf.h
@@ -0,0 +1,198 @@
+/*
+** Buffer handling.
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_BUF_H
+#define _LJ_BUF_H
+
+#include "lj_obj.h"
+#include "lj_gc.h"
+#include "lj_str.h"
+
+/* Resizable string buffers. */
+
+/* The SBuf struct definition is in lj_obj.h:
+** char *w; Write pointer.
+** char *e; End pointer.
+** char *b; Base pointer.
+** MRef L; lua_State, used for buffer resizing. Extension bits in 3 LSB.
+*/
+
+/* Extended string buffer. */
+typedef struct SBufExt {
+ SBufHeader;
+ union {
+ GCRef cowref; /* Copy-on-write object reference. */
+ MRef bsb; /* Borrowed string buffer. */
+ };
+ char *r; /* Read pointer. */
+ GCRef dict_str; /* Serialization string dictionary table. */
+ GCRef dict_mt; /* Serialization metatable dictionary table. */
+ int depth; /* Remaining recursion depth. */
+} SBufExt;
+
+#define sbufsz(sb) ((MSize)((sb)->e - (sb)->b))
+#define sbuflen(sb) ((MSize)((sb)->w - (sb)->b))
+#define sbufleft(sb) ((MSize)((sb)->e - (sb)->w))
+#define sbufxlen(sbx) ((MSize)((sbx)->w - (sbx)->r))
+#define sbufxslack(sbx) ((MSize)((sbx)->r - (sbx)->b))
+
+#define SBUF_MASK_FLAG (7)
+#define SBUF_MASK_L (~(GCSize)SBUF_MASK_FLAG)
+#define SBUF_FLAG_EXT 1 /* Extended string buffer. */
+#define SBUF_FLAG_COW 2 /* Copy-on-write buffer. */
+#define SBUF_FLAG_BORROW 4 /* Borrowed string buffer. */
+
+#define sbufL(sb) \
+ ((lua_State *)(void *)(uintptr_t)(mrefu((sb)->L) & SBUF_MASK_L))
+#define setsbufL(sb, l) (setmref((sb)->L, (l)))
+#define setsbufXL(sb, l, flag) \
+ (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) + (flag)))
+#define setsbufXL_(sb, l) \
+ (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) | (mrefu((sb)->L) & SBUF_MASK_FLAG)))
+
+#define sbufflag(sb) (mrefu((sb)->L))
+#define sbufisext(sb) (sbufflag((sb)) & SBUF_FLAG_EXT)
+#define sbufiscow(sb) (sbufflag((sb)) & SBUF_FLAG_COW)
+#define sbufisborrow(sb) (sbufflag((sb)) & SBUF_FLAG_BORROW)
+#define sbufiscoworborrow(sb) (sbufflag((sb)) & (SBUF_FLAG_COW|SBUF_FLAG_BORROW))
+#define sbufX(sb) \
+ (lj_assertG_(G(sbufL(sb)), sbufisext(sb), "not an SBufExt"), (SBufExt *)(sb))
+#define setsbufflag(sb, flag) (setmrefu((sb)->L, (flag)))
+
+#define tvisbuf(o) \
+ (LJ_HASBUFFER && tvisudata(o) && udataV(o)->udtype == UDTYPE_BUFFER)
+#define bufV(o) check_exp(tvisbuf(o), ((SBufExt *)uddata(udataV(o))))
+
+/* Buffer management */
+LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz);
+LJ_FUNC char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz);
+LJ_FUNC void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb);
+LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz);
+
+static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb)
+{
+ setsbufL(sb, L);
+ sb->w = sb->e = sb->b = NULL;
+}
+
+static LJ_AINLINE void lj_buf_reset(SBuf *sb)
+{
+ sb->w = sb->b;
+}
+
+static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L)
+{
+ SBuf *sb = &G(L)->tmpbuf;
+ setsbufL(sb, L);
+ lj_buf_reset(sb);
+ return sb;
+}
+
+static LJ_AINLINE void lj_buf_free(global_State *g, SBuf *sb)
+{
+ lj_assertG(!sbufisext(sb), "bad free of SBufExt");
+ lj_mem_free(g, sb->b, sbufsz(sb));
+}
+
+static LJ_AINLINE char *lj_buf_need(SBuf *sb, MSize sz)
+{
+ if (LJ_UNLIKELY(sz > sbufsz(sb)))
+ return lj_buf_need2(sb, sz);
+ return sb->b;
+}
+
+static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz)
+{
+ if (LJ_UNLIKELY(sz > sbufleft(sb)))
+ return lj_buf_more2(sb, sz);
+ return sb->w;
+}
+
+/* Extended buffer management */
+static LJ_AINLINE void lj_bufx_init(lua_State *L, SBufExt *sbx)
+{
+ memset(sbx, 0, sizeof(SBufExt));
+ setsbufXL(sbx, L, SBUF_FLAG_EXT);
+}
+
+static LJ_AINLINE void lj_bufx_set_borrow(lua_State *L, SBufExt *sbx, SBuf *sb)
+{
+ setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_BORROW);
+ setmref(sbx->bsb, sb);
+ sbx->r = sbx->w = sbx->b = sb->b;
+ sbx->e = sb->e;
+}
+
+static LJ_AINLINE void lj_bufx_set_cow(lua_State *L, SBufExt *sbx,
+ const char *p, MSize len)
+{
+ setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_COW);
+ sbx->r = sbx->b = (char *)p;
+ sbx->w = sbx->e = (char *)p + len;
+}
+
+static LJ_AINLINE void lj_bufx_reset(SBufExt *sbx)
+{
+ if (sbufiscow(sbx)) {
+ setmrefu(sbx->L, (mrefu(sbx->L) & ~(GCSize)SBUF_FLAG_COW));
+ setgcrefnull(sbx->cowref);
+ sbx->b = sbx->e = NULL;
+ }
+ sbx->r = sbx->w = sbx->b;
+}
+
+static LJ_AINLINE void lj_bufx_free(lua_State *L, SBufExt *sbx)
+{
+ if (!sbufiscoworborrow(sbx)) lj_mem_free(G(L), sbx->b, sbufsz(sbx));
+ setsbufXL(sbx, L, SBUF_FLAG_EXT);
+ setgcrefnull(sbx->cowref);
+ sbx->r = sbx->w = sbx->b = sbx->e = NULL;
+}
+
+#if LJ_HASBUFFER && LJ_HASJIT
+LJ_FUNC void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *o);
+#if LJ_HASFFI
+LJ_FUNC MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz);
+#endif
+#endif
+
+/* Low-level buffer put operations */
+LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len);
+#if LJ_HASJIT || LJ_HASFFI
+LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c);
+#endif
+LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s);
+
+static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len)
+{
+ return (char *)memcpy(p, q, len) + len;
+}
+
+static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c)
+{
+ char *w = lj_buf_more(sb, 1);
+ *w++ = (char)c;
+ sb->w = w;
+}
+
+/* High-level buffer put operations */
+LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s);
+LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s);
+LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s);
+LJ_FUNC SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep);
+LJ_FUNC SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep,
+ int32_t i, int32_t e);
+
+/* Miscellaneous buffer operations */
+LJ_FUNCA GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb);
+LJ_FUNC GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2);
+LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp);
+
+static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb)
+{
+ return lj_str_new(L, sb->b, sbuflen(sb));
+}
+
+#endif
diff --git a/src/lj_carith.c b/src/lj_carith.c
index 462dbae4..1a2a058f 100644
--- a/src/lj_carith.c
+++ b/src/lj_carith.c
@@ -11,10 +11,12 @@
#include "lj_err.h"
#include "lj_tab.h"
#include "lj_meta.h"
+#include "lj_ir.h"
#include "lj_ctype.h"
#include "lj_cconv.h"
#include "lj_cdata.h"
#include "lj_carith.h"
+#include "lj_strscan.h"
/* -- C data arithmetic --------------------------------------------------- */
@@ -120,7 +122,7 @@ static int carith_ptr(lua_State *L, CTState *cts, CDArith *ca, MMS mm)
setboolV(L->top-1, ((uintptr_t)pp < (uintptr_t)pp2));
return 1;
} else {
- lua_assert(mm == MM_le);
+ lj_assertL(mm == MM_le, "bad metamethod %d", mm);
setboolV(L->top-1, ((uintptr_t)pp <= (uintptr_t)pp2));
return 1;
}
@@ -206,7 +208,9 @@ static int carith_int64(lua_State *L, CTState *cts, CDArith *ca, MMS mm)
*up = lj_carith_powu64(u0, u1);
break;
case MM_unm: *up = (uint64_t)-(int64_t)u0; break;
- default: lua_assert(0); break;
+ default:
+ lj_assertL(0, "bad metamethod %d", mm);
+ break;
}
lj_gc_check(L);
return 1;
@@ -272,6 +276,81 @@ int lj_carith_op(lua_State *L, MMS mm)
return lj_carith_meta(L, cts, &ca, mm);
}
+/* -- 64 bit bit operations helpers --------------------------------------- */
+
+#if LJ_64
+#define B64DEF(name) \
+ static LJ_AINLINE uint64_t lj_carith_##name(uint64_t x, int32_t sh)
+#else
+/* Not inlined on 32 bit archs, since some of these are quite lengthy. */
+#define B64DEF(name) \
+ uint64_t LJ_NOINLINE lj_carith_##name(uint64_t x, int32_t sh)
+#endif
+
+B64DEF(shl64) { return x << (sh&63); }
+B64DEF(shr64) { return x >> (sh&63); }
+B64DEF(sar64) { return (uint64_t)((int64_t)x >> (sh&63)); }
+B64DEF(rol64) { return lj_rol(x, (sh&63)); }
+B64DEF(ror64) { return lj_ror(x, (sh&63)); }
+
+#undef B64DEF
+
+uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op)
+{
+ switch (op) {
+ case IR_BSHL-IR_BSHL: x = lj_carith_shl64(x, sh); break;
+ case IR_BSHR-IR_BSHL: x = lj_carith_shr64(x, sh); break;
+ case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break;
+ case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break;
+ case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break;
+ default:
+ lj_assertX(0, "bad shift op %d", op);
+ break;
+ }
+ return x;
+}
+
+/* Equivalent to lj_lib_checkbit(), but handles cdata. */
+uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id)
+{
+ TValue *o = L->base + narg-1;
+ if (o >= L->top) {
+ err:
+ lj_err_argt(L, narg, LUA_TNUMBER);
+ } else if (LJ_LIKELY(tvisnumber(o))) {
+ /* Handled below. */
+ } else if (tviscdata(o)) {
+ CTState *cts = ctype_cts(L);
+ uint8_t *sp = (uint8_t *)cdataptr(cdataV(o));
+ CTypeID sid = cdataV(o)->ctypeid;
+ CType *s = ctype_get(cts, sid);
+ uint64_t x;
+ if (ctype_isref(s->info)) {
+ sp = *(void **)sp;
+ sid = ctype_cid(s->info);
+ }
+ s = ctype_raw(cts, sid);
+ if (ctype_isenum(s->info)) s = ctype_child(cts, s);
+ if ((s->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
+ CTINFO(CT_NUM, CTF_UNSIGNED) && s->size == 8)
+ *id = CTID_UINT64; /* Use uint64_t, since it has the highest rank. */
+ else if (!*id)
+ *id = CTID_INT64; /* Use int64_t, unless already set. */
+ lj_cconv_ct_ct(cts, ctype_get(cts, *id), s,
+ (uint8_t *)&x, sp, CCF_ARG(narg));
+ return x;
+ } else if (!(tvisstr(o) && lj_strscan_number(strV(o), o))) {
+ goto err;
+ }
+ if (LJ_LIKELY(tvisint(o))) {
+ return (uint32_t)intV(o);
+ } else {
+ int32_t i = lj_num2bit(numV(o));
+ if (LJ_DUALNUM) setintV(o, i);
+ return (uint32_t)i;
+ }
+}
+
/* -- 64 bit integer arithmetic helpers ----------------------------------- */
#if LJ_32 && LJ_HASJIT
diff --git a/src/lj_carith.h b/src/lj_carith.h
index 269c60ea..9d6b1dc9 100644
--- a/src/lj_carith.h
+++ b/src/lj_carith.h
@@ -12,6 +12,16 @@
LJ_FUNC int lj_carith_op(lua_State *L, MMS mm);
+#if LJ_32
+LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_shr64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_sar64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_rol64(uint64_t x, int32_t sh);
+LJ_FUNC uint64_t lj_carith_ror64(uint64_t x, int32_t sh);
+#endif
+LJ_FUNC uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op);
+LJ_FUNC uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id);
+
#if LJ_32 && LJ_HASJIT
LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k);
#endif
diff --git a/src/lj_ccall.c b/src/lj_ccall.c
index 4a859c73..25f54dee 100644
--- a/src/lj_ccall.c
+++ b/src/lj_ccall.c
@@ -9,7 +9,6 @@
#include "lj_gc.h"
#include "lj_err.h"
-#include "lj_str.h"
#include "lj_tab.h"
#include "lj_ctype.h"
#include "lj_cconv.h"
@@ -291,56 +290,85 @@
#define CCALL_HANDLE_RET \
if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0];
-#elif LJ_TARGET_PPC
-/* -- PPC calling conventions --------------------------------------------- */
+#elif LJ_TARGET_ARM64
+/* -- ARM64 calling conventions ------------------------------------------- */
#define CCALL_HANDLE_STRUCTRET \
- cc->retref = 1; /* Return all structs by reference. */ \
- cc->gpr[ngpr++] = (GPRArg)dp;
+ cc->retref = !ccall_classify_struct(cts, ctr); \
+ if (cc->retref) cc->retp = dp;
+
+#define CCALL_HANDLE_STRUCTRET2 \
+ unsigned int cl = ccall_classify_struct(cts, ctr); \
+ if ((cl & 4)) { /* Combine float HFA from separate registers. */ \
+ CTSize i = (cl >> 8) - 1; \
+ do { ((uint32_t *)dp)[i] = cc->fpr[i].lo; } while (i--); \
+ } else { \
+ if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \
+ memcpy(dp, sp, ctr->size); \
+ }
#define CCALL_HANDLE_COMPLEXRET \
- /* Complex values are returned in 2 or 4 GPRs. */ \
+ /* Complex values are returned in one or two FPRs. */ \
cc->retref = 0;
#define CCALL_HANDLE_COMPLEXRET2 \
- memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */
+ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
+ ((float *)dp)[0] = cc->fpr[0].f; \
+ ((float *)dp)[1] = cc->fpr[1].f; \
+ } else { /* Copy complex double from FPRs. */ \
+ ((double *)dp)[0] = cc->fpr[0].d; \
+ ((double *)dp)[1] = cc->fpr[1].d; \
+ }
#define CCALL_HANDLE_STRUCTARG \
- rp = cdataptr(lj_cdata_new(cts, did, sz)); \
- sz = CTSIZE_PTR; /* Pass all structs by reference. */
+ unsigned int cl = ccall_classify_struct(cts, d); \
+ if (cl == 0) { /* Pass struct by reference. */ \
+ rp = cdataptr(lj_cdata_new(cts, did, sz)); \
+ sz = CTSIZE_PTR; \
+ } else if (cl > 1) { /* Pass struct in FPRs or on stack. */ \
+ isfp = (cl & 4) ? 2 : 1; \
+ } /* else: Pass struct in GPRs or on stack. */
#define CCALL_HANDLE_COMPLEXARG \
- /* Pass complex by value in 2 or 4 GPRs. */
+ /* Pass complex by value in separate (!) FPRs or on stack. */ \
+ isfp = sz == 2*sizeof(float) ? 2 : 1;
#define CCALL_HANDLE_REGARG \
- if (isfp) { /* Try to pass argument in FPRs. */ \
- if (nfpr + 1 <= CCALL_NARG_FPR) { \
+ if (LJ_TARGET_OSX && isva) { \
+ /* IOS: All variadic arguments are on the stack. */ \
+ } else if (isfp) { /* Try to pass argument in FPRs. */ \
+ int n2 = ctype_isvector(d->info) ? 1 : \
+ isfp == 1 ? n : (d->size >> (4-isfp)); \
+ if (nfpr + n2 <= CCALL_NARG_FPR) { \
dp = &cc->fpr[nfpr]; \
- nfpr += 1; \
- d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
+ nfpr += n2; \
goto done; \
+ } else { \
+ nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
+ if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \
} \
} else { /* Try to pass argument in GPRs. */ \
- if (n > 1) { \
- lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \
- if (ctype_isinteger(d->info)) \
- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
- else if (ngpr + n > maxgpr) \
- ngpr = maxgpr; /* Prevent reordering. */ \
- } \
+ if (!LJ_TARGET_OSX && (d->info & CTF_ALIGN) > CTALIGN_PTR) \
+ ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
if (ngpr + n <= maxgpr) { \
dp = &cc->gpr[ngpr]; \
ngpr += n; \
goto done; \
+ } else { \
+ ngpr = maxgpr; /* Prevent reordering. */ \
+ if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \
} \
}
+#if LJ_BE
#define CCALL_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
- ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
+ sp = (uint8_t *)&cc->fpr[0].f;
+#endif
-#elif LJ_TARGET_PPCSPE
-/* -- PPC/SPE calling conventions ----------------------------------------- */
+
+#elif LJ_TARGET_PPC
+/* -- PPC calling conventions --------------------------------------------- */
#define CCALL_HANDLE_STRUCTRET \
cc->retref = 1; /* Return all structs by reference. */ \
@@ -360,12 +388,13 @@
#define CCALL_HANDLE_COMPLEXARG \
/* Pass complex by value in 2 or 4 GPRs. */
-/* PPC/SPE has a softfp ABI. */
-#define CCALL_HANDLE_REGARG \
- if (n > 1) { /* Doesn't fit in a single GPR? */ \
- lua_assert(n == 2 || n == 4); /* int64_t, double or complex (float). */ \
- if (n == 2) \
- ngpr = (ngpr + 1u) & ~1u; /* Only align 64 bit value to regpair. */ \
+#define CCALL_HANDLE_GPR \
+ /* Try to pass argument in GPRs. */ \
+ if (n > 1) { \
+ /* int64_t or complex (float). */ \
+ lj_assertL(n == 2 || n == 4, "bad GPR size %d", n); \
+ if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \
+ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
else if (ngpr + n > maxgpr) \
ngpr = maxgpr; /* Prevent reordering. */ \
} \
@@ -373,10 +402,32 @@
dp = &cc->gpr[ngpr]; \
ngpr += n; \
goto done; \
+ } \
+
+#if LJ_ABI_SOFTFP
+#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
+#else
+#define CCALL_HANDLE_REGARG \
+ if (isfp) { /* Try to pass argument in FPRs. */ \
+ if (nfpr + 1 <= CCALL_NARG_FPR) { \
+ dp = &cc->fpr[nfpr]; \
+ nfpr += 1; \
+ d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
+ goto done; \
+ } \
+ } else { \
+ CCALL_HANDLE_GPR \
}
+#endif
-#elif LJ_TARGET_MIPS
-/* -- MIPS calling conventions -------------------------------------------- */
+#if !LJ_ABI_SOFTFP
+#define CCALL_HANDLE_RET \
+ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+ ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */
+#endif
+
+#elif LJ_TARGET_MIPS32
+/* -- MIPS o32 calling conventions ---------------------------------------- */
#define CCALL_HANDLE_STRUCTRET \
cc->retref = 1; /* Return all structs by reference. */ \
@@ -386,6 +437,18 @@
/* Complex values are returned in 1 or 2 FPRs. */ \
cc->retref = 0;
+#if LJ_ABI_SOFTFP
+#define CCALL_HANDLE_COMPLEXRET2 \
+ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \
+ ((intptr_t *)dp)[0] = cc->gpr[0]; \
+ ((intptr_t *)dp)[1] = cc->gpr[1]; \
+ } else { /* Copy complex double from GPRs. */ \
+ ((intptr_t *)dp)[0] = cc->gpr[0]; \
+ ((intptr_t *)dp)[1] = cc->gpr[1]; \
+ ((intptr_t *)dp)[2] = cc->gpr[2]; \
+ ((intptr_t *)dp)[3] = cc->gpr[3]; \
+ }
+#else
#define CCALL_HANDLE_COMPLEXRET2 \
if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
((float *)dp)[0] = cc->fpr[0].f; \
@@ -394,6 +457,7 @@
((double *)dp)[0] = cc->fpr[0].d; \
((double *)dp)[1] = cc->fpr[1].d; \
}
+#endif
#define CCALL_HANDLE_STRUCTARG \
/* Pass all structs by value in registers and/or on the stack. */
@@ -401,6 +465,22 @@
#define CCALL_HANDLE_COMPLEXARG \
/* Pass complex by value in 2 or 4 GPRs. */
+#define CCALL_HANDLE_GPR \
+ if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \
+ ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
+ if (ngpr < maxgpr) { \
+ dp = &cc->gpr[ngpr]; \
+ if (ngpr + n > maxgpr) { \
+ nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
+ if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
+ ngpr = maxgpr; \
+ } else { \
+ ngpr += n; \
+ } \
+ goto done; \
+ }
+
+#if !LJ_ABI_SOFTFP /* MIPS32 hard-float */
#define CCALL_HANDLE_REGARG \
if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \
/* Try to pass argument in FPRs. */ \
@@ -409,25 +489,91 @@
goto done; \
} else { /* Try to pass argument in GPRs. */ \
nfpr = CCALL_NARG_FPR; \
- if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \
- ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
- if (ngpr < maxgpr) { \
- dp = &cc->gpr[ngpr]; \
- if (ngpr + n > maxgpr) { \
- nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
- if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
- ngpr = maxgpr; \
- } else { \
- ngpr += n; \
- } \
- goto done; \
- } \
+ CCALL_HANDLE_GPR \
+ }
+#else /* MIPS32 soft-float */
+#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR
+#endif
+
+#if !LJ_ABI_SOFTFP
+/* On MIPS64 soft-float, position of float return values is endian-dependant. */
+#define CCALL_HANDLE_RET \
+ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+ sp = (uint8_t *)&cc->fpr[0].f;
+#endif
+
+#elif LJ_TARGET_MIPS64
+/* -- MIPS n64 calling conventions ---------------------------------------- */
+
+#define CCALL_HANDLE_STRUCTRET \
+ cc->retref = !(sz <= 16); \
+ if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp;
+
+#define CCALL_HANDLE_STRUCTRET2 \
+ ccall_copy_struct(cc, ctr, dp, sp, ccall_classify_struct(cts, ctr, ct));
+
+#define CCALL_HANDLE_COMPLEXRET \
+ /* Complex values are returned in 1 or 2 FPRs. */ \
+ cc->retref = 0;
+
+#if LJ_ABI_SOFTFP /* MIPS64 soft-float */
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \
+ ((intptr_t *)dp)[0] = cc->gpr[0]; \
+ } else { /* Copy complex double from GPRs. */ \
+ ((intptr_t *)dp)[0] = cc->gpr[0]; \
+ ((intptr_t *)dp)[1] = cc->gpr[1]; \
+ }
+
+#define CCALL_HANDLE_COMPLEXARG \
+ /* Pass complex by value in 2 or 4 GPRs. */
+
+/* Position of soft-float 'float' return value depends on endianess. */
+#define CCALL_HANDLE_RET \
+ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+ sp = (uint8_t *)cc->gpr + LJ_ENDIAN_SELECT(0, 4);
+
+#else /* MIPS64 hard-float */
+
+#define CCALL_HANDLE_COMPLEXRET2 \
+ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \
+ ((float *)dp)[0] = cc->fpr[0].f; \
+ ((float *)dp)[1] = cc->fpr[1].f; \
+ } else { /* Copy complex double from FPRs. */ \
+ ((double *)dp)[0] = cc->fpr[0].d; \
+ ((double *)dp)[1] = cc->fpr[1].d; \
+ }
+
+#define CCALL_HANDLE_COMPLEXARG \
+ if (sz == 2*sizeof(float)) { \
+ isfp = 2; \
+ if (ngpr < maxgpr) \
+ sz *= 2; \
}
#define CCALL_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
sp = (uint8_t *)&cc->fpr[0].f;
+#endif
+
+#define CCALL_HANDLE_STRUCTARG \
+ /* Pass all structs by value in registers and/or on the stack. */
+
+#define CCALL_HANDLE_REGARG \
+ if (ngpr < maxgpr) { \
+ dp = &cc->gpr[ngpr]; \
+ if (ngpr + n > maxgpr) { \
+ nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \
+ if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \
+ ngpr = maxgpr; \
+ } else { \
+ ngpr += n; \
+ } \
+ goto done; \
+ }
+
#else
#error "Missing calling convention definitions for this architecture"
#endif
@@ -497,7 +643,8 @@ static void ccall_classify_ct(CTState *cts, CType *ct, int *rcl, CTSize ofs)
ccall_classify_struct(cts, ct, rcl, ofs);
} else {
int cl = ctype_isfp(ct->info) ? CCALL_RCL_SSE : CCALL_RCL_INT;
- lua_assert(ctype_hassize(ct->info));
+ lj_assertCTS(ctype_hassize(ct->info),
+ "classify ctype %08x without size", ct->info);
if ((ofs & (ct->size-1))) cl = CCALL_RCL_MEM; /* Unaligned. */
rcl[(ofs >= 8)] |= cl;
}
@@ -522,12 +669,13 @@ static int ccall_classify_struct(CTState *cts, CType *ct, int *rcl, CTSize ofs)
}
/* Try to split up a small struct into registers. */
-static int ccall_struct_reg(CCallState *cc, GPRArg *dp, int *rcl)
+static int ccall_struct_reg(CCallState *cc, CTState *cts, GPRArg *dp, int *rcl)
{
MSize ngpr = cc->ngpr, nfpr = cc->nfpr;
uint32_t i;
+ UNUSED(cts);
for (i = 0; i < 2; i++) {
- lua_assert(!(rcl[i] & CCALL_RCL_MEM));
+ lj_assertCTS(!(rcl[i] & CCALL_RCL_MEM), "pass mem struct in reg");
if ((rcl[i] & CCALL_RCL_INT)) { /* Integer class takes precedence. */
if (ngpr >= CCALL_NARG_GPR) return 1; /* Register overflow. */
cc->gpr[ngpr++] = dp[i];
@@ -548,7 +696,8 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl,
dp[0] = dp[1] = 0;
/* Convert to temp. struct. */
lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg));
- if (ccall_struct_reg(cc, dp, rcl)) { /* Register overflow? Pass on stack. */
+ if (ccall_struct_reg(cc, cts, dp, rcl)) {
+ /* Register overflow? Pass on stack. */
MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1;
if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */
cc->nsp = nsp + n;
@@ -621,6 +770,125 @@ noth: /* Not a homogeneous float/double aggregate. */
#endif
+/* -- ARM64 ABI struct classification ------------------------------------- */
+
+#if LJ_TARGET_ARM64
+
+/* Classify a struct based on its fields. */
+static unsigned int ccall_classify_struct(CTState *cts, CType *ct)
+{
+ CTSize sz = ct->size;
+ unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION);
+ while (ct->sib) {
+ CType *sct;
+ ct = ctype_get(cts, ct->sib);
+ if (ctype_isfield(ct->info)) {
+ sct = ctype_rawchild(cts, ct);
+ if (ctype_isfp(sct->info)) {
+ r |= sct->size;
+ if (!isu) n++; else if (n == 0) n = 1;
+ } else if (ctype_iscomplex(sct->info)) {
+ r |= (sct->size >> 1);
+ if (!isu) n += 2; else if (n < 2) n = 2;
+ } else if (ctype_isstruct(sct->info)) {
+ goto substruct;
+ } else {
+ goto noth;
+ }
+ } else if (ctype_isbitfield(ct->info)) {
+ goto noth;
+ } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
+ sct = ctype_rawchild(cts, ct);
+ substruct:
+ if (sct->size > 0) {
+ unsigned int s = ccall_classify_struct(cts, sct);
+ if (s <= 1) goto noth;
+ r |= (s & 255);
+ if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8);
+ }
+ }
+ }
+ if ((r == 4 || r == 8) && n <= 4)
+ return r + (n << 8);
+noth: /* Not a homogeneous float/double aggregate. */
+ return (sz <= 16); /* Return structs of size <= 16 in GPRs. */
+}
+
+#endif
+
+/* -- MIPS64 ABI struct classification ---------------------------- */
+
+#if LJ_TARGET_MIPS64
+
+#define FTYPE_FLOAT 1
+#define FTYPE_DOUBLE 2
+
+/* Classify FP fields (max. 2) and their types. */
+static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf)
+{
+ int n = 0, ft = 0;
+ if ((ctf->info & CTF_VARARG) || (ct->info & CTF_UNION))
+ goto noth;
+ while (ct->sib) {
+ CType *sct;
+ ct = ctype_get(cts, ct->sib);
+ if (n == 2) {
+ goto noth;
+ } else if (ctype_isfield(ct->info)) {
+ sct = ctype_rawchild(cts, ct);
+ if (ctype_isfp(sct->info)) {
+ ft |= (sct->size == 4 ? FTYPE_FLOAT : FTYPE_DOUBLE) << 2*n;
+ n++;
+ } else {
+ goto noth;
+ }
+ } else if (ctype_isbitfield(ct->info) ||
+ ctype_isxattrib(ct->info, CTA_SUBTYPE)) {
+ goto noth;
+ }
+ }
+ if (n <= 2)
+ return ft;
+noth: /* Not a homogeneous float/double aggregate. */
+ return 0; /* Struct is in GPRs. */
+}
+
+static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp,
+ int ft)
+{
+ if (LJ_ABI_SOFTFP ? ft :
+ ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) {
+ int i, ofs = 0;
+ for (i = 0; ft != 0; i++, ft >>= 2) {
+ if ((ft & 3) == FTYPE_FLOAT) {
+#if LJ_ABI_SOFTFP
+ /* The 2nd FP struct result is in CARG1 (gpr[2]) and not CRET2. */
+ memcpy((uint8_t *)dp + ofs,
+ (uint8_t *)&cc->gpr[2*i] + LJ_ENDIAN_SELECT(0, 4), 4);
+#else
+ *(float *)((uint8_t *)dp + ofs) = cc->fpr[i].f;
+#endif
+ ofs += 4;
+ } else {
+ ofs = (ofs + 7) & ~7; /* 64 bit alignment. */
+#if LJ_ABI_SOFTFP
+ *(intptr_t *)((uint8_t *)dp + ofs) = cc->gpr[2*i];
+#else
+ *(double *)((uint8_t *)dp + ofs) = cc->fpr[i].d;
+#endif
+ ofs += 8;
+ }
+ }
+ } else {
+#if !LJ_ABI_SOFTFP
+ if (ft) sp = (uint8_t *)&cc->fpr[0];
+#endif
+ memcpy(dp, sp, ctr->size);
+ }
+}
+
+#endif
+
/* -- Common C call handling ---------------------------------------------- */
/* Infer the destination CTypeID for a vararg argument. */
@@ -726,7 +994,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
if (fid) { /* Get argument type from field. */
CType *ctf = ctype_get(cts, fid);
fid = ctf->sib;
- lua_assert(ctype_isfield(ctf->info));
+ lj_assertL(ctype_isfield(ctf->info), "field expected");
did = ctype_cid(ctf->info);
} else {
if (!(ct->info & CTF_VARARG))
@@ -788,6 +1056,19 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
*(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp :
(int32_t)*(int16_t *)dp;
}
+#if LJ_TARGET_ARM64 && LJ_BE
+ if (isfp && d->size == sizeof(float))
+ ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */
+#endif
+#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
+ if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)
+#if LJ_TARGET_MIPS64
+ || (isfp && nsp == 0)
+#endif
+ ) && d->size <= 4) {
+ *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */
+ }
+#endif
#if LJ_TARGET_X64 && LJ_ABI_WIN
if (isva) { /* Windows/x64 mirrors varargs in both register sets. */
if (nfpr == ngpr)
@@ -803,13 +1084,19 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct,
cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */
cc->fpr[nfpr-2].d[1] = 0;
}
+#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP)
+ if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) {
+ /* Split float HFA or complex float into separate registers. */
+ CTSize i = (sz >> 2) - 1;
+ do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--);
+ }
#else
UNUSED(isfp);
#endif
}
if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */
-#if LJ_TARGET_X64 || LJ_TARGET_PPC
+#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP)
cc->nfpr = nfpr; /* Required for vararg functions. */
#endif
cc->nsp = nsp;
@@ -844,7 +1131,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct,
CCALL_HANDLE_COMPLEXRET2
return 1; /* One GC step. */
}
- if (LJ_BE && ctype_isinteger_or_bool(ctr->info) && ctr->size < CTSIZE_PTR)
+ if (LJ_BE && ctr->size < CTSIZE_PTR &&
+ (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info)))
sp += (CTSIZE_PTR - ctr->size);
#if CCALL_NUM_FPR
if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info))
@@ -854,7 +1142,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct,
CCALL_HANDLE_RET
#endif
/* No reference types end up here, so there's no need for the CTypeID. */
- lua_assert(!(ctype_isrefarray(ctr->info) || ctype_isstruct(ctr->info)));
+ lj_assertL(!(ctype_isrefarray(ctr->info) || ctype_isstruct(ctr->info)),
+ "unexpected reference ctype");
return lj_cconv_tv_ct(cts, ctr, 0, L->top-1, sp);
}
@@ -878,7 +1167,7 @@ int lj_ccall_func(lua_State *L, GCcdata *cd)
lj_vm_ffi_call(&cc);
if (cts->cb.slot != ~0u) { /* Blacklist function that called a callback. */
TValue tv;
- setlightudV(&tv, (void *)cc.func);
+ tv.u64 = ((uintptr_t)(void *)cc.func >> 2) | U64x(800000000, 00000000);
setboolV(lj_tab_set(L, cts->miscmap, &tv), 1);
}
ct = (CType *)((intptr_t)ct+(intptr_t)cts->tab); /* May be reallocated. */
diff --git a/src/lj_ccall.h b/src/lj_ccall.h
index b46483f1..0b3c5244 100644
--- a/src/lj_ccall.h
+++ b/src/lj_ccall.h
@@ -68,35 +68,56 @@ typedef union FPRArg {
float f[2];
} FPRArg;
-#elif LJ_TARGET_PPC
+#elif LJ_TARGET_ARM64
#define CCALL_NARG_GPR 8
+#define CCALL_NRET_GPR 2
#define CCALL_NARG_FPR 8
+#define CCALL_NRET_FPR 4
+#define CCALL_SPS_FREE 0
+
+typedef intptr_t GPRArg;
+typedef union FPRArg {
+ double d;
+ struct { LJ_ENDIAN_LOHI(float f; , float g;) };
+ struct { LJ_ENDIAN_LOHI(uint32_t lo; , uint32_t hi;) };
+} FPRArg;
+
+#elif LJ_TARGET_PPC
+
+#define CCALL_NARG_GPR 8
+#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8)
#define CCALL_NRET_GPR 4 /* For complex double. */
-#define CCALL_NRET_FPR 1
+#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1)
#define CCALL_SPS_EXTRA 4
#define CCALL_SPS_FREE 0
typedef intptr_t GPRArg;
typedef double FPRArg;
-#elif LJ_TARGET_PPCSPE
+#elif LJ_TARGET_MIPS32
-#define CCALL_NARG_GPR 8
-#define CCALL_NARG_FPR 0
-#define CCALL_NRET_GPR 4 /* For softfp complex double. */
-#define CCALL_NRET_FPR 0
-#define CCALL_SPS_FREE 0 /* NYI */
+#define CCALL_NARG_GPR 4
+#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 2)
+#define CCALL_NRET_GPR (LJ_ABI_SOFTFP ? 4 : 2)
+#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2)
+#define CCALL_SPS_EXTRA 7
+#define CCALL_SPS_FREE 1
typedef intptr_t GPRArg;
+typedef union FPRArg {
+ double d;
+ struct { LJ_ENDIAN_LOHI(float f; , float g;) };
+} FPRArg;
-#elif LJ_TARGET_MIPS
+#elif LJ_TARGET_MIPS64
-#define CCALL_NARG_GPR 4
-#define CCALL_NARG_FPR 2
+/* FP args are positional and overlay the GPR array. */
+#define CCALL_NARG_GPR 8
+#define CCALL_NARG_FPR 0
#define CCALL_NRET_GPR 2
-#define CCALL_NRET_FPR 2
-#define CCALL_SPS_EXTRA 7
+#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2)
+#define CCALL_SPS_EXTRA 3
#define CCALL_SPS_FREE 1
typedef intptr_t GPRArg;
@@ -145,6 +166,8 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState {
uint8_t nfpr; /* Number of arguments in FPRs. */
#elif LJ_TARGET_X86
uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */
+#elif LJ_TARGET_ARM64
+ void *retp; /* Aggregate return pointer in x8. */
#elif LJ_TARGET_PPC
uint8_t nfpr; /* Number of arguments in FPRs. */
#endif
diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c
index 5a6785c6..43e44305 100644
--- a/src/lj_ccallback.c
+++ b/src/lj_ccallback.c
@@ -27,7 +27,7 @@
#if LJ_OS_NOJIT
-/* Disabled callback support. */
+/* Callbacks disabled. */
#define CALLBACK_SLOT2OFS(slot) (0*(slot))
#define CALLBACK_OFS2SLOT(ofs) (0*(ofs))
#define CALLBACK_MAX_SLOT 0
@@ -35,7 +35,7 @@
#elif LJ_TARGET_X86ORX64
#define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0)
-#define CALLBACK_MCODE_GROUP (-2+1+2+5+(LJ_64 ? 6 : 5))
+#define CALLBACK_MCODE_GROUP (-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5))
#define CALLBACK_SLOT2OFS(slot) \
(CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot))
@@ -54,23 +54,22 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
#elif LJ_TARGET_ARM
#define CALLBACK_MCODE_HEAD 32
-#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
-#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
-#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
+
+#elif LJ_TARGET_ARM64
+
+#define CALLBACK_MCODE_HEAD 32
#elif LJ_TARGET_PPC
#define CALLBACK_MCODE_HEAD 24
-#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
-#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
-#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
-#elif LJ_TARGET_MIPS
+#elif LJ_TARGET_MIPS32
-#define CALLBACK_MCODE_HEAD 24
-#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
-#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
-#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
+#define CALLBACK_MCODE_HEAD 20
+
+#elif LJ_TARGET_MIPS64
+
+#define CALLBACK_MCODE_HEAD 52
#else
@@ -81,6 +80,12 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
#endif
+#ifndef CALLBACK_SLOT2OFS
+#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot))
+#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8)
+#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
+#endif
+
/* Convert callback slot number to callback function pointer. */
static void *callback_slot2ptr(CTState *cts, MSize slot)
{
@@ -102,9 +107,9 @@ MSize lj_ccallback_ptr2slot(CTState *cts, void *p)
/* Initialize machine code for callback function pointers. */
#if LJ_OS_NOJIT
/* Disabled callback support. */
-#define callback_mcode_init(g, p) UNUSED(p)
+#define callback_mcode_init(g, p) (p)
#elif LJ_TARGET_X86ORX64
-static void callback_mcode_init(global_State *g, uint8_t *page)
+static void *callback_mcode_init(global_State *g, uint8_t *page)
{
uint8_t *p = page;
uint8_t *target = (uint8_t *)(void *)lj_vm_ffi_callback;
@@ -119,8 +124,13 @@ static void callback_mcode_init(global_State *g, uint8_t *page)
/* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */
*p++ = XI_PUSH + RID_EBP;
*p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8);
+#if LJ_GC64
+ *p++ = 0x48; *p++ = XI_MOVri | RID_EBP;
+ *(uint64_t *)p = (uint64_t)(g); p += 8;
+#else
*p++ = XI_MOVri | RID_EBP;
*(int32_t *)p = i32ptr(g); p += 4;
+#endif
#if LJ_64
/* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */
*p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP;
@@ -133,10 +143,10 @@ static void callback_mcode_init(global_State *g, uint8_t *page)
*p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2);
}
}
- lua_assert(p - page <= CALLBACK_MCODE_SIZE);
+ return p;
}
#elif LJ_TARGET_ARM
-static void callback_mcode_init(global_State *g, uint32_t *page)
+static void *callback_mcode_init(global_State *g, uint32_t *page)
{
uint32_t *p = page;
void *target = (void *)lj_vm_ffi_callback;
@@ -155,10 +165,30 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
*p = ARMI_B | ((page-p-2) & 0x00ffffffu);
p++;
}
- lua_assert(p - page <= CALLBACK_MCODE_SIZE);
+ return p;
+}
+#elif LJ_TARGET_ARM64
+static void *callback_mcode_init(global_State *g, uint32_t *page)
+{
+ uint32_t *p = page;
+ void *target = (void *)lj_vm_ffi_callback;
+ MSize slot;
+ *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4));
+ *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5));
+ *p++ = A64I_LE(A64I_BR | A64F_N(RID_X11));
+ *p++ = A64I_LE(A64I_NOP);
+ ((void **)p)[0] = target;
+ ((void **)p)[1] = g;
+ p += 4;
+ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
+ *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot));
+ *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu));
+ p++;
+ }
+ return p;
}
#elif LJ_TARGET_PPC
-static void callback_mcode_init(global_State *g, uint32_t *page)
+static void *callback_mcode_init(global_State *g, uint32_t *page)
{
uint32_t *p = page;
void *target = (void *)lj_vm_ffi_callback;
@@ -174,30 +204,43 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
*p = PPCI_B | (((page-p) & 0x00ffffffu) << 2);
p++;
}
- lua_assert(p - page <= CALLBACK_MCODE_SIZE);
+ return p;
}
#elif LJ_TARGET_MIPS
-static void callback_mcode_init(global_State *g, uint32_t *page)
+static void *callback_mcode_init(global_State *g, uint32_t *page)
{
uint32_t *p = page;
- void *target = (void *)lj_vm_ffi_callback;
+ uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback;
+ uintptr_t ug = (uintptr_t)(void *)g;
MSize slot;
- *p++ = MIPSI_SW | MIPSF_T(RID_R1)|MIPSF_S(RID_SP) | 0;
- *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (u32ptr(target) >> 16);
- *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (u32ptr(g) >> 16);
- *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) |(u32ptr(target)&0xffff);
+#if LJ_TARGET_MIPS32
+ *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 16);
+ *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 16);
+#else
+ *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 48);
+ *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 48);
+ *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 32) & 0xffff);
+ *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 32) & 0xffff);
+ *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16);
+ *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16);
+ *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 16) & 0xffff);
+ *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 16) & 0xffff);
+ *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16);
+ *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16);
+#endif
+ *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | (target & 0xffff);
*p++ = MIPSI_JR | MIPSF_S(RID_R3);
- *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (u32ptr(g)&0xffff);
+ *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (ug & 0xffff);
for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
*p = MIPSI_B | ((page-p-1) & 0x0000ffffu);
p++;
*p++ = MIPSI_LI | MIPSF_T(RID_R1) | slot;
}
- lua_assert(p - page <= CALLBACK_MCODE_SIZE);
+ return p;
}
#else
/* Missing support for this architecture. */
-#define callback_mcode_init(g, p) UNUSED(p)
+#define callback_mcode_init(g, p) (p)
#endif
/* -- Machine code management --------------------------------------------- */
@@ -213,6 +256,11 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
+#ifdef PROT_MPROTECT
+#define CCPROT_CREATE (PROT_MPROTECT(PROT_EXEC))
+#else
+#define CCPROT_CREATE 0
+#endif
#endif
@@ -220,15 +268,15 @@ static void callback_mcode_init(global_State *g, uint32_t *page)
static void callback_mcode_new(CTState *cts)
{
size_t sz = (size_t)CALLBACK_MCODE_SIZE;
- void *p;
+ void *p, *pe;
if (CALLBACK_MAX_SLOT == 0)
lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
#if LJ_TARGET_WINDOWS
- p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+ p = LJ_WIN_VALLOC(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
if (!p)
lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
#elif LJ_TARGET_POSIX
- p = mmap(NULL, sz, (PROT_READ|PROT_WRITE), MAP_PRIVATE|MAP_ANONYMOUS,
+ p = mmap(NULL, sz, (PROT_READ|PROT_WRITE|CCPROT_CREATE), MAP_PRIVATE|MAP_ANONYMOUS,
-1, 0);
if (p == MAP_FAILED)
lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV);
@@ -237,12 +285,15 @@ static void callback_mcode_new(CTState *cts)
p = lj_mem_new(cts->L, sz);
#endif
cts->cb.mcode = p;
- callback_mcode_init(cts->g, p);
+ pe = callback_mcode_init(cts->g, p);
+ UNUSED(pe);
+ lj_assertCTS((size_t)((char *)pe - (char *)p) <= sz,
+ "miscalculated CALLBACK_MAX_SLOT");
lj_mcode_sync(p, (char *)p + sz);
#if LJ_TARGET_WINDOWS
{
DWORD oprot;
- VirtualProtect(p, sz, PAGE_EXECUTE_READ, &oprot);
+ LJ_WIN_VPROTECT(p, sz, PAGE_EXECUTE_READ, &oprot);
}
#elif LJ_TARGET_POSIX
mprotect(p, sz, (PROT_READ|PROT_EXEC));
@@ -351,33 +402,78 @@ void lj_ccallback_mcode_free(CTState *cts)
goto done; \
} CALLBACK_HANDLE_REGARG_FP2
-#elif LJ_TARGET_PPC
+#elif LJ_TARGET_ARM64
#define CALLBACK_HANDLE_REGARG \
if (isfp) { \
- if (nfpr + 1 <= CCALL_NARG_FPR) { \
- sp = &cts->cb.fpr[nfpr++]; \
- cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
+ if (nfpr + n <= CCALL_NARG_FPR) { \
+ sp = &cts->cb.fpr[nfpr]; \
+ nfpr += n; \
goto done; \
+ } else { \
+ nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \
} \
- } else { /* Try to pass argument in GPRs. */ \
- if (n > 1) { \
- lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \
- ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
- } \
+ } else { \
+ if (!LJ_TARGET_OSX && n > 1) \
+ ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
if (ngpr + n <= maxgpr) { \
sp = &cts->cb.gpr[ngpr]; \
ngpr += n; \
goto done; \
+ } else { \
+ ngpr = CCALL_NARG_GPR; /* Prevent reordering. */ \
+ } \
+ }
+
+#elif LJ_TARGET_PPC
+
+#define CALLBACK_HANDLE_GPR \
+ if (n > 1) { \
+ lj_assertCTS(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \
+ ctype_isinteger(cta->info)) && n == 2, /* int64_t. */ \
+ "bad GPR type"); \
+ ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \
+ } \
+ if (ngpr + n <= maxgpr) { \
+ sp = &cts->cb.gpr[ngpr]; \
+ ngpr += n; \
+ goto done; \
+ }
+
+#if LJ_ABI_SOFTFP
+#define CALLBACK_HANDLE_REGARG \
+ CALLBACK_HANDLE_GPR \
+ UNUSED(isfp);
+#else
+#define CALLBACK_HANDLE_REGARG \
+ if (isfp) { \
+ if (nfpr + 1 <= CCALL_NARG_FPR) { \
+ sp = &cts->cb.fpr[nfpr++]; \
+ cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \
+ goto done; \
} \
+ } else { /* Try to pass argument in GPRs. */ \
+ CALLBACK_HANDLE_GPR \
}
+#endif
+#if !LJ_ABI_SOFTFP
#define CALLBACK_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
*(double *)dp = *(float *)dp; /* FPRs always hold doubles. */
+#endif
-#elif LJ_TARGET_MIPS
+#elif LJ_TARGET_MIPS32
+#define CALLBACK_HANDLE_GPR \
+ if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
+ if (ngpr + n <= maxgpr) { \
+ sp = &cts->cb.gpr[ngpr]; \
+ ngpr += n; \
+ goto done; \
+ }
+
+#if !LJ_ABI_SOFTFP /* MIPS32 hard-float */
#define CALLBACK_HANDLE_REGARG \
if (isfp && nfpr < CCALL_NARG_FPR) { /* Try to pass argument in FPRs. */ \
sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \
@@ -385,13 +481,36 @@ void lj_ccallback_mcode_free(CTState *cts)
goto done; \
} else { /* Try to pass argument in GPRs. */ \
nfpr = CCALL_NARG_FPR; \
- if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \
- if (ngpr + n <= maxgpr) { \
- sp = &cts->cb.gpr[ngpr]; \
- ngpr += n; \
- goto done; \
- } \
+ CALLBACK_HANDLE_GPR \
+ }
+#else /* MIPS32 soft-float */
+#define CALLBACK_HANDLE_REGARG \
+ CALLBACK_HANDLE_GPR \
+ UNUSED(isfp);
+#endif
+
+#define CALLBACK_HANDLE_RET \
+ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
+ ((float *)dp)[1] = *(float *)dp;
+
+#elif LJ_TARGET_MIPS64
+
+#if !LJ_ABI_SOFTFP /* MIPS64 hard-float */
+#define CALLBACK_HANDLE_REGARG \
+ if (ngpr + n <= maxgpr) { \
+ sp = isfp ? (void*) &cts->cb.fpr[ngpr] : (void*) &cts->cb.gpr[ngpr]; \
+ ngpr += n; \
+ goto done; \
}
+#else /* MIPS64 soft-float */
+#define CALLBACK_HANDLE_REGARG \
+ if (ngpr + n <= maxgpr) { \
+ UNUSED(isfp); \
+ sp = (void*) &cts->cb.gpr[ngpr]; \
+ ngpr += n; \
+ goto done; \
+ }
+#endif
#define CALLBACK_HANDLE_RET \
if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \
@@ -411,6 +530,7 @@ static void callback_conv_args(CTState *cts, lua_State *L)
int gcsteps = 0;
CType *ct;
GCfunc *fn;
+ int fntp;
MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR;
#if CCALL_NARG_FPR
MSize nfpr = 0;
@@ -421,18 +541,27 @@ static void callback_conv_args(CTState *cts, lua_State *L)
if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) {
ct = ctype_get(cts, id);
- rid = ctype_cid(ct->info);
+ rid = ctype_cid(ct->info); /* Return type. x86: +(spadj<<16). */
fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot));
+ fntp = LJ_TFUNC;
} else { /* Must set up frame first, before throwing the error. */
ct = NULL;
rid = 0;
fn = (GCfunc *)L;
+ fntp = LJ_TTHREAD;
+ }
+ /* Continuation returns from callback. */
+ if (LJ_FR2) {
+ (o++)->u64 = LJ_CONT_FFI_CALLBACK;
+ (o++)->u64 = rid;
+ } else {
+ o->u32.lo = LJ_CONT_FFI_CALLBACK;
+ o->u32.hi = rid;
+ o++;
}
- o->u32.lo = LJ_CONT_FFI_CALLBACK; /* Continuation returns from callback. */
- o->u32.hi = rid; /* Return type. x86: +(spadj<<16). */
- o++;
- setframe_gc(o, obj2gco(fn));
- setframe_ftsz(o, (int)((char *)(o+1) - (char *)L->base) + FRAME_CONT);
+ setframe_gc(o, obj2gco(fn), fntp);
+ if (LJ_FR2) o++;
+ setframe_ftsz(o, ((char *)(o+1) - (char *)L->base) + FRAME_CONT);
L->top = L->base = ++o;
if (!ct)
lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK);
@@ -459,7 +588,7 @@ static void callback_conv_args(CTState *cts, lua_State *L)
CTSize sz;
int isfp;
MSize n;
- lua_assert(ctype_isfield(ctf->info));
+ lj_assertCTS(ctype_isfield(ctf->info), "field expected");
cta = ctype_rawchild(cts, ctf);
isfp = ctype_isfp(cta->info);
sz = (cta->size + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1);
@@ -474,7 +603,11 @@ static void callback_conv_args(CTState *cts, lua_State *L)
nsp += n;
done:
- if (LJ_BE && cta->size < CTSIZE_PTR)
+ if (LJ_BE && cta->size < CTSIZE_PTR
+#if LJ_TARGET_MIPS64
+ && !(isfp && nsp)
+#endif
+ )
sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size);
gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp);
}
@@ -483,9 +616,14 @@ static void callback_conv_args(CTState *cts, lua_State *L)
L->top = o;
#if LJ_TARGET_X86
/* Store stack adjustment for returns from non-cdecl callbacks. */
- if (ctype_cconv(ct->info) != CTCC_CDECL)
+ if (ctype_cconv(ct->info) != CTCC_CDECL) {
+#if LJ_FR2
+ (L->base-3)->u64 |= (nsp << (16+2));
+#else
(L->base-2)->u32.hi |= (nsp << (16+2));
#endif
+ }
+#endif
while (gcsteps-- > 0)
lj_gc_check(L);
}
@@ -493,7 +631,11 @@ static void callback_conv_args(CTState *cts, lua_State *L)
/* Convert Lua object to callback result. */
static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
{
+#if LJ_FR2
+ CType *ctr = ctype_raw(cts, (uint16_t)(L->base-3)->u64);
+#else
CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi);
+#endif
#if LJ_TARGET_X86
cts->cb.gpr[2] = 0;
#endif
@@ -503,6 +645,10 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
if (ctype_isfp(ctr->info))
dp = (uint8_t *)&cts->cb.fpr[0];
#endif
+#if LJ_TARGET_ARM64 && LJ_BE
+ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float))
+ dp = (uint8_t *)&cts->cb.fpr[0].f[1];
+#endif
lj_cconv_ct_tv(cts, ctr, dp, o, 0);
#ifdef CALLBACK_HANDLE_RET
CALLBACK_HANDLE_RET
@@ -516,6 +662,12 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o)
*(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp :
(int32_t)*(int16_t *)dp;
}
+#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE)
+ /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */
+ if (ctr->size <= 4 &&
+ (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info)))
+ *(int64_t *)dp = (int64_t)*(int32_t *)dp;
+#endif
#if LJ_TARGET_X86
if (ctype_isfp(ctr->info))
cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2;
@@ -528,8 +680,8 @@ lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf)
{
lua_State *L = cts->L;
global_State *g = cts->g;
- lua_assert(L != NULL);
- if (gcref(g->jit_L)) {
+ lj_assertG(L != NULL, "uninitialized cts->L in callback");
+ if (tvref(g->jit_base)) {
setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK));
if (g->panic) g->panic(L);
exit(EXIT_FAILURE);
@@ -562,9 +714,9 @@ void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o)
}
callback_conv_result(cts, L, o);
/* Finally drop C frame and continuation frame. */
- L->cframe = cframe_prev(L->cframe);
- L->top -= 2;
+ L->top -= 2+2*LJ_FR2;
L->base = obase;
+ L->cframe = cframe_prev(L->cframe);
cts->cb.slot = 0; /* Blacklist C function that called the callback. */
}
@@ -613,7 +765,7 @@ static CType *callback_checkfunc(CTState *cts, CType *ct)
CType *ctf = ctype_get(cts, fid);
if (!ctype_isattrib(ctf->info)) {
CType *cta;
- lua_assert(ctype_isfield(ctf->info));
+ lj_assertCTS(ctype_isfield(ctf->info), "field expected");
cta = ctype_rawchild(cts, ctf);
if (!(ctype_isenum(cta->info) || ctype_isptr(cta->info) ||
(ctype_isnum(cta->info) && cta->size <= 8)) ||
diff --git a/src/lj_cconv.c b/src/lj_cconv.c
index 8556952f..3bbfd3f1 100644
--- a/src/lj_cconv.c
+++ b/src/lj_cconv.c
@@ -8,6 +8,7 @@
#if LJ_HASFFI
#include "lj_err.h"
+#include "lj_buf.h"
#include "lj_tab.h"
#include "lj_ctype.h"
#include "lj_cdata.h"
@@ -122,19 +123,25 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s,
CTInfo dinfo = d->info, sinfo = s->info;
void *tmpptr;
- lua_assert(!ctype_isenum(dinfo) && !ctype_isenum(sinfo));
- lua_assert(!ctype_isattrib(dinfo) && !ctype_isattrib(sinfo));
+ lj_assertCTS(!ctype_isenum(dinfo) && !ctype_isenum(sinfo),
+ "unresolved enum");
+ lj_assertCTS(!ctype_isattrib(dinfo) && !ctype_isattrib(sinfo),
+ "unstripped attribute");
if (ctype_type(dinfo) > CT_MAYCONVERT || ctype_type(sinfo) > CT_MAYCONVERT)
goto err_conv;
/* Some basic sanity checks. */
- lua_assert(!ctype_isnum(dinfo) || dsize > 0);
- lua_assert(!ctype_isnum(sinfo) || ssize > 0);
- lua_assert(!ctype_isbool(dinfo) || dsize == 1 || dsize == 4);
- lua_assert(!ctype_isbool(sinfo) || ssize == 1 || ssize == 4);
- lua_assert(!ctype_isinteger(dinfo) || (1u<<lj_fls(dsize)) == dsize);
- lua_assert(!ctype_isinteger(sinfo) || (1u<<lj_fls(ssize)) == ssize);
+ lj_assertCTS(!ctype_isnum(dinfo) || dsize > 0, "bad size for number type");
+ lj_assertCTS(!ctype_isnum(sinfo) || ssize > 0, "bad size for number type");
+ lj_assertCTS(!ctype_isbool(dinfo) || dsize == 1 || dsize == 4,
+ "bad size for bool type");
+ lj_assertCTS(!ctype_isbool(sinfo) || ssize == 1 || ssize == 4,
+ "bad size for bool type");
+ lj_assertCTS(!ctype_isinteger(dinfo) || (1u<<lj_fls(dsize)) == dsize,
+ "bad size for integer type");
+ lj_assertCTS(!ctype_isinteger(sinfo) || (1u<<lj_fls(ssize)) == ssize,
+ "bad size for integer type");
switch (cconv_idx2(dinfo, sinfo)) {
/* Destination is a bool. */
@@ -357,7 +364,7 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s,
if ((flags & CCF_CAST) || (d->info & CTF_VLA) || d != s)
goto err_conv; /* Must be exact same type. */
copyval: /* Copy value. */
- lua_assert(dsize == ssize);
+ lj_assertCTS(dsize == ssize, "value copy with different sizes");
memcpy(dp, sp, dsize);
break;
@@ -389,7 +396,7 @@ int lj_cconv_tv_ct(CTState *cts, CType *s, CTypeID sid,
lj_cconv_ct_ct(cts, ctype_get(cts, CTID_DOUBLE), s,
(uint8_t *)&o->n, sp, 0);
/* Numbers are NOT canonicalized here! Beware of uninitialized data. */
- lua_assert(tvisnum(o));
+ lj_assertCTS(tvisnum(o), "non-canonical NaN passed");
}
} else {
uint32_t b = s->size == 1 ? (*sp != 0) : (*(int *)sp != 0);
@@ -406,7 +413,7 @@ int lj_cconv_tv_ct(CTState *cts, CType *s, CTypeID sid,
CTSize sz;
copyval: /* Copy value. */
sz = s->size;
- lua_assert(sz != CTSIZE_INVALID);
+ lj_assertCTS(sz != CTSIZE_INVALID, "value copy with invalid size");
/* Attributes are stripped, qualifiers are kept (but mostly ignored). */
cd = lj_cdata_new(cts, ctype_typeid(cts, s), sz);
setcdataV(cts->L, o, cd);
@@ -421,19 +428,22 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp)
CTInfo info = s->info;
CTSize pos, bsz;
uint32_t val;
- lua_assert(ctype_isbitfield(info));
+ lj_assertCTS(ctype_isbitfield(info), "bitfield expected");
/* NYI: packed bitfields may cause misaligned reads. */
switch (ctype_bitcsz(info)) {
case 4: val = *(uint32_t *)sp; break;
case 2: val = *(uint16_t *)sp; break;
case 1: val = *(uint8_t *)sp; break;
- default: lua_assert(0); val = 0; break;
+ default:
+ lj_assertCTS(0, "bad bitfield container size %d", ctype_bitcsz(info));
+ val = 0;
+ break;
}
/* Check if a packed bitfield crosses a container boundary. */
pos = ctype_bitpos(info);
bsz = ctype_bitbsz(info);
- lua_assert(pos < 8*ctype_bitcsz(info));
- lua_assert(bsz > 0 && bsz <= 8*ctype_bitcsz(info));
+ lj_assertCTS(pos < 8*ctype_bitcsz(info), "bad bitfield position");
+ lj_assertCTS(bsz > 0 && bsz <= 8*ctype_bitcsz(info), "bad bitfield size");
if (pos + bsz > 8*ctype_bitcsz(info))
lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT);
if (!(info & CTF_BOOL)) {
@@ -448,8 +458,10 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp)
setintV(o, (int32_t)val);
}
} else {
- lua_assert(bsz == 1);
- setboolV(o, (val >> pos) & 1);
+ uint32_t b = (val >> pos) & 1;
+ lj_assertCTS(bsz == 1, "bad bool bitfield size");
+ setboolV(o, b);
+ setboolV(&cts->g->tmptv2, b); /* Remember for trace recorder. */
}
return 0; /* No GC step needed. */
}
@@ -551,7 +563,7 @@ void lj_cconv_ct_tv(CTState *cts, CType *d,
sid = cdataV(o)->ctypeid;
s = ctype_get(cts, sid);
if (ctype_isref(s->info)) { /* Resolve reference for value. */
- lua_assert(s->size == CTSIZE_PTR);
+ lj_assertCTS(s->size == CTSIZE_PTR, "ref is not pointer-sized");
sp = *(void **)sp;
sid = ctype_cid(s->info);
}
@@ -571,7 +583,7 @@ void lj_cconv_ct_tv(CTState *cts, CType *d,
CType *cct = lj_ctype_getfield(cts, d, str, &ofs);
if (!cct || !ctype_isconstval(cct->info))
goto err_conv;
- lua_assert(d->size == 4);
+ lj_assertCTS(d->size == 4, "only 32 bit enum supported"); /* NYI */
sp = (uint8_t *)&cct->size;
sid = ctype_cid(cct->info);
} else if (ctype_isrefarray(d->info)) { /* Copy string to array. */
@@ -610,8 +622,10 @@ void lj_cconv_ct_tv(CTState *cts, CType *d,
tmpptr = uddata(ud);
if (ud->udtype == UDTYPE_IO_FILE)
tmpptr = *(void **)tmpptr;
+ else if (ud->udtype == UDTYPE_BUFFER)
+ tmpptr = ((SBufExt *)tmpptr)->r;
} else if (tvislightud(o)) {
- tmpptr = lightudV(o);
+ tmpptr = lightudV(cts->g, o);
} else if (tvisfunc(o)) {
void *p = lj_ccallback_new(cts, d, funcV(o));
if (p) {
@@ -635,10 +649,10 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o)
CTInfo info = d->info;
CTSize pos, bsz;
uint32_t val, mask;
- lua_assert(ctype_isbitfield(info));
+ lj_assertCTS(ctype_isbitfield(info), "bitfield expected");
if ((info & CTF_BOOL)) {
uint8_t tmpbool;
- lua_assert(ctype_bitbsz(info) == 1);
+ lj_assertCTS(ctype_bitbsz(info) == 1, "bad bool bitfield size");
lj_cconv_ct_tv(cts, ctype_get(cts, CTID_BOOL), &tmpbool, o, 0);
val = tmpbool;
} else {
@@ -647,8 +661,8 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o)
}
pos = ctype_bitpos(info);
bsz = ctype_bitbsz(info);
- lua_assert(pos < 8*ctype_bitcsz(info));
- lua_assert(bsz > 0 && bsz <= 8*ctype_bitcsz(info));
+ lj_assertCTS(pos < 8*ctype_bitcsz(info), "bad bitfield position");
+ lj_assertCTS(bsz > 0 && bsz <= 8*ctype_bitcsz(info), "bad bitfield size");
/* Check if a packed bitfield crosses a container boundary. */
if (pos + bsz > 8*ctype_bitcsz(info))
lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT);
@@ -659,7 +673,9 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o)
case 4: *(uint32_t *)dp = (*(uint32_t *)dp & ~mask) | (uint32_t)val; break;
case 2: *(uint16_t *)dp = (*(uint16_t *)dp & ~mask) | (uint16_t)val; break;
case 1: *(uint8_t *)dp = (*(uint8_t *)dp & ~mask) | (uint8_t)val; break;
- default: lua_assert(0); break;
+ default:
+ lj_assertCTS(0, "bad bitfield container size %d", ctype_bitcsz(info));
+ break;
}
}
diff --git a/src/lj_cconv.h b/src/lj_cconv.h
index 2d1cb273..45b0ca1e 100644
--- a/src/lj_cconv.h
+++ b/src/lj_cconv.h
@@ -27,13 +27,14 @@ enum {
static LJ_AINLINE uint32_t cconv_idx(CTInfo info)
{
uint32_t idx = ((info >> 26) & 15u); /* Dispatch bits. */
- lua_assert(ctype_type(info) <= CT_MAYCONVERT);
+ lj_assertX(ctype_type(info) <= CT_MAYCONVERT,
+ "cannot convert ctype %08x", info);
#if LJ_64
idx = ((uint32_t)(U64x(f436fff5,fff7f021) >> 4*idx) & 15u);
#else
idx = (((idx < 8 ? 0xfff7f021u : 0xf436fff5) >> 4*(idx & 7u)) & 15u);
#endif
- lua_assert(idx < 8);
+ lj_assertX(idx < 8, "cannot convert ctype %08x", info);
return idx;
}
diff --git a/src/lj_cdata.c b/src/lj_cdata.c
index 425e6bcf..01a74f5d 100644
--- a/src/lj_cdata.c
+++ b/src/lj_cdata.c
@@ -9,7 +9,6 @@
#include "lj_gc.h"
#include "lj_err.h"
-#include "lj_str.h"
#include "lj_tab.h"
#include "lj_ctype.h"
#include "lj_cconv.h"
@@ -27,20 +26,20 @@ GCcdata *lj_cdata_newref(CTState *cts, const void *p, CTypeID id)
}
/* Allocate variable-sized or specially aligned C data object. */
-GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align)
+GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align)
{
global_State *g;
MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) +
(align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0);
- char *p = lj_mem_newt(cts->L, extra + sz, char);
+ char *p = lj_mem_newt(L, extra + sz, char);
uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata);
uintptr_t almask = (1u << align) - 1u;
GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata));
- lua_assert((char *)cd - p < 65536);
+ lj_assertL((char *)cd - p < 65536, "excessive cdata alignment");
cdatav(cd)->offset = (uint16_t)((char *)cd - p);
cdatav(cd)->extra = extra;
cdatav(cd)->len = sz;
- g = cts->g;
+ g = G(L);
setgcrefr(cd->nextgc, g->gc.root);
setgcref(g->gc.root, obj2gco(cd));
newwhite(g, obj2gco(cd));
@@ -50,6 +49,15 @@ GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align)
return cd;
}
+/* Allocate arbitrary C data object. */
+GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz, CTInfo info)
+{
+ if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN)
+ return lj_cdata_new(cts, id, sz);
+ else
+ return lj_cdata_newv(cts->L, id, sz, ctype_align(info));
+}
+
/* Free a C data object. */
void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
{
@@ -68,29 +76,30 @@ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd)
} else if (LJ_LIKELY(!cdataisv(cd))) {
CType *ct = ctype_raw(ctype_ctsG(g), cd->ctypeid);
CTSize sz = ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR;
- lua_assert(ctype_hassize(ct->info) || ctype_isfunc(ct->info) ||
- ctype_isextern(ct->info));
+ lj_assertG(ctype_hassize(ct->info) || ctype_isfunc(ct->info) ||
+ ctype_isextern(ct->info), "free of ctype without a size");
lj_mem_free(g, cd, sizeof(GCcdata) + sz);
} else {
lj_mem_free(g, memcdatav(cd), sizecdatav(cd));
}
}
-TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd)
+void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it)
{
- global_State *g = G(L);
- GCtab *t = ctype_ctsG(g)->finalizer;
+ GCtab *t = ctype_ctsG(G(L))->finalizer;
if (gcref(t->metatable)) {
/* Add cdata to finalizer table, if still enabled. */
TValue *tv, tmp;
setcdataV(L, &tmp, cd);
lj_gc_anybarriert(L, t);
tv = lj_tab_set(L, t, &tmp);
- cd->marked |= LJ_GC_CDATA_FIN;
- return tv;
- } else {
- /* Otherwise return dummy TValue. */
- return &g->tmptv;
+ if (it == LJ_TNIL) {
+ setnilV(tv);
+ cd->marked &= ~LJ_GC_CDATA_FIN;
+ } else {
+ setgcV(L, tv, obj, it);
+ cd->marked |= LJ_GC_CDATA_FIN;
+ }
}
}
@@ -106,7 +115,7 @@ CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, uint8_t **pp,
/* Resolve reference for cdata object. */
if (ctype_isref(ct->info)) {
- lua_assert(ct->size == CTSIZE_PTR);
+ lj_assertCTS(ct->size == CTSIZE_PTR, "ref is not pointer-sized");
p = *(uint8_t **)p;
ct = ctype_child(cts, ct);
}
@@ -117,13 +126,19 @@ collect_attrib:
if (ctype_attrib(ct->info) == CTA_QUAL) *qual |= ct->size;
ct = ctype_child(cts, ct);
}
- lua_assert(!ctype_isref(ct->info)); /* Interning rejects refs to refs. */
+ /* Interning rejects refs to refs. */
+ lj_assertCTS(!ctype_isref(ct->info), "bad ref of ref");
if (tvisint(key)) {
idx = (ptrdiff_t)intV(key);
goto integer_key;
} else if (tvisnum(key)) { /* Numeric key. */
- idx = LJ_64 ? (ptrdiff_t)numV(key) : (ptrdiff_t)lj_num2int(numV(key));
+#ifdef _MSC_VER
+ /* Workaround for MSVC bug. */
+ volatile
+#endif
+ lua_Number n = numV(key);
+ idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n);
integer_key:
if (ctype_ispointer(ct->info)) {
CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */
@@ -198,7 +213,8 @@ collect_attrib:
static void cdata_getconst(CTState *cts, TValue *o, CType *ct)
{
CType *ctt = ctype_child(cts, ct);
- lua_assert(ctype_isinteger(ctt->info) && ctt->size <= 4);
+ lj_assertCTS(ctype_isinteger(ctt->info) && ctt->size <= 4,
+ "only 32 bit const supported"); /* NYI */
/* Constants are already zero-extended/sign-extended to 32 bits. */
if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0)
setnumV(o, (lua_Number)(uint32_t)ct->size);
@@ -219,13 +235,14 @@ int lj_cdata_get(CTState *cts, CType *s, TValue *o, uint8_t *sp)
}
/* Get child type of pointer/array/field. */
- lua_assert(ctype_ispointer(s->info) || ctype_isfield(s->info));
+ lj_assertCTS(ctype_ispointer(s->info) || ctype_isfield(s->info),
+ "pointer or field expected");
sid = ctype_cid(s->info);
s = ctype_get(cts, sid);
/* Resolve reference for field. */
if (ctype_isref(s->info)) {
- lua_assert(s->size == CTSIZE_PTR);
+ lj_assertCTS(s->size == CTSIZE_PTR, "ref is not pointer-sized");
sp = *(uint8_t **)sp;
sid = ctype_cid(s->info);
s = ctype_get(cts, sid);
@@ -252,12 +269,13 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual)
}
/* Get child type of pointer/array/field. */
- lua_assert(ctype_ispointer(d->info) || ctype_isfield(d->info));
+ lj_assertCTS(ctype_ispointer(d->info) || ctype_isfield(d->info),
+ "pointer or field expected");
d = ctype_child(cts, d);
/* Resolve reference for field. */
if (ctype_isref(d->info)) {
- lua_assert(d->size == CTSIZE_PTR);
+ lj_assertCTS(d->size == CTSIZE_PTR, "ref is not pointer-sized");
dp = *(uint8_t **)dp;
d = ctype_child(cts, d);
}
@@ -272,7 +290,8 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual)
d = ctype_child(cts, d);
}
- lua_assert(ctype_hassize(d->info) && !ctype_isvoid(d->info));
+ lj_assertCTS(ctype_hassize(d->info), "store to ctype without size");
+ lj_assertCTS(!ctype_isvoid(d->info), "store to void type");
if (((d->info|qual) & CTF_CONST)) {
err_const:
diff --git a/src/lj_cdata.h b/src/lj_cdata.h
index 2a82a9d8..de52e8aa 100644
--- a/src/lj_cdata.h
+++ b/src/lj_cdata.h
@@ -18,7 +18,7 @@ static LJ_AINLINE void *cdata_getptr(void *p, CTSize sz)
if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */
return ((void *)(uintptr_t)*(uint32_t *)p);
} else {
- lua_assert(sz == CTSIZE_PTR);
+ lj_assertX(sz == CTSIZE_PTR, "bad pointer size %d", sz);
return *(void **)p;
}
}
@@ -29,7 +29,7 @@ static LJ_AINLINE void cdata_setptr(void *p, CTSize sz, const void *v)
if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */
*(uint32_t *)p = (uint32_t)(uintptr_t)v;
} else {
- lua_assert(sz == CTSIZE_PTR);
+ lj_assertX(sz == CTSIZE_PTR, "bad pointer size %d", sz);
*(void **)p = (void *)v;
}
}
@@ -40,7 +40,8 @@ static LJ_AINLINE GCcdata *lj_cdata_new(CTState *cts, CTypeID id, CTSize sz)
GCcdata *cd;
#ifdef LUA_USE_ASSERT
CType *ct = ctype_raw(cts, id);
- lua_assert((ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR) == sz);
+ lj_assertCTS((ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR) == sz,
+ "inconsistent size of fixed-size cdata alloc");
#endif
cd = (GCcdata *)lj_mem_newgco(cts->L, sizeof(GCcdata) + sz);
cd->gct = ~LJ_TCDATA;
@@ -58,11 +59,14 @@ static LJ_AINLINE GCcdata *lj_cdata_new_(lua_State *L, CTypeID id, CTSize sz)
}
LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id);
-LJ_FUNC GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz,
+LJ_FUNC GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz,
CTSize align);
+LJ_FUNC GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz,
+ CTInfo info);
LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd);
-LJ_FUNCA TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd);
+LJ_FUNC void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj,
+ uint32_t it);
LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key,
uint8_t **pp, CTInfo *qual);
diff --git a/src/lj_clib.c b/src/lj_clib.c
index ab2db33a..f0ef6edd 100644
--- a/src/lj_clib.c
+++ b/src/lj_clib.c
@@ -16,6 +16,7 @@
#include "lj_cconv.h"
#include "lj_cdata.h"
#include "lj_clib.h"
+#include "lj_strfmt.h"
/* -- OS-specific functions ----------------------------------------------- */
@@ -61,7 +62,7 @@ static const char *clib_extname(lua_State *L, const char *name)
#endif
) {
if (!strchr(name, '.')) {
- name = lj_str_pushf(L, CLIB_SOEXT, name);
+ name = lj_strfmt_pushf(L, CLIB_SOEXT, name);
L->top--;
#if LJ_TARGET_CYGWIN
} else {
@@ -70,7 +71,7 @@ static const char *clib_extname(lua_State *L, const char *name)
}
if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] &&
name[2] == CLIB_SOPREFIX[2])) {
- name = lj_str_pushf(L, CLIB_SOPREFIX "%s", name);
+ name = lj_strfmt_pushf(L, CLIB_SOPREFIX "%s", name);
L->top--;
}
}
@@ -158,11 +159,13 @@ BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*);
/* Default libraries. */
enum {
CLIB_HANDLE_EXE,
+#if !LJ_TARGET_UWP
CLIB_HANDLE_DLL,
CLIB_HANDLE_CRT,
CLIB_HANDLE_KERNEL32,
CLIB_HANDLE_USER32,
CLIB_HANDLE_GDI32,
+#endif
CLIB_HANDLE_MAX
};
@@ -172,11 +175,19 @@ LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt,
const char *name)
{
DWORD err = GetLastError();
+#if LJ_TARGET_XBOXONE
+ wchar_t wbuf[128];
+ char buf[128*2];
+ if (!FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM,
+ NULL, err, 0, wbuf, sizeof(wbuf)/sizeof(wchar_t), NULL) ||
+ !WideCharToMultiByte(CP_ACP, 0, wbuf, 128, buf, 128*2, NULL, NULL))
+#else
char buf[128];
if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM,
NULL, err, 0, buf, sizeof(buf), NULL))
+#endif
buf[0] = '\0';
- lj_err_callermsg(L, lj_str_pushf(L, fmt, name, buf));
+ lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, buf));
}
static int clib_needext(const char *s)
@@ -191,7 +202,7 @@ static int clib_needext(const char *s)
static const char *clib_extname(lua_State *L, const char *name)
{
if (clib_needext(name)) {
- name = lj_str_pushf(L, "%s.dll", name);
+ name = lj_strfmt_pushf(L, "%s.dll", name);
L->top--;
}
return name;
@@ -200,7 +211,7 @@ static const char *clib_extname(lua_State *L, const char *name)
static void *clib_loadlib(lua_State *L, const char *name, int global)
{
DWORD oldwerr = GetLastError();
- void *h = (void *)LoadLibraryA(clib_extname(L, name));
+ void *h = LJ_WIN_LOADLIBA(clib_extname(L, name));
if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name);
SetLastError(oldwerr);
UNUSED(global);
@@ -210,6 +221,7 @@ static void *clib_loadlib(lua_State *L, const char *name, int global)
static void clib_unloadlib(CLibrary *cl)
{
if (cl->handle == CLIB_DEFHANDLE) {
+#if !LJ_TARGET_UWP
MSize i;
for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) {
void *h = clib_def_handle[i];
@@ -218,11 +230,16 @@ static void clib_unloadlib(CLibrary *cl)
FreeLibrary((HINSTANCE)h);
}
}
+#endif
} else if (cl->handle) {
FreeLibrary((HINSTANCE)cl->handle);
}
}
+#if LJ_TARGET_UWP
+EXTERN_C IMAGE_DOS_HEADER __ImageBase;
+#endif
+
static void *clib_getsym(CLibrary *cl, const char *name)
{
void *p = NULL;
@@ -231,6 +248,9 @@ static void *clib_getsym(CLibrary *cl, const char *name)
for (i = 0; i < CLIB_HANDLE_MAX; i++) {
HINSTANCE h = (HINSTANCE)clib_def_handle[i];
if (!(void *)h) { /* Resolve default library handles (once). */
+#if LJ_TARGET_UWP
+ h = (HINSTANCE)&__ImageBase;
+#else
switch (i) {
case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, &h); break;
case CLIB_HANDLE_DLL:
@@ -241,11 +261,12 @@ static void *clib_getsym(CLibrary *cl, const char *name)
GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
(const char *)&_fmode, &h);
break;
- case CLIB_HANDLE_KERNEL32: h = LoadLibraryA("kernel32.dll"); break;
- case CLIB_HANDLE_USER32: h = LoadLibraryA("user32.dll"); break;
- case CLIB_HANDLE_GDI32: h = LoadLibraryA("gdi32.dll"); break;
+ case CLIB_HANDLE_KERNEL32: h = LJ_WIN_LOADLIBA("kernel32.dll"); break;
+ case CLIB_HANDLE_USER32: h = LJ_WIN_LOADLIBA("user32.dll"); break;
+ case CLIB_HANDLE_GDI32: h = LJ_WIN_LOADLIBA("gdi32.dll"); break;
}
if (!h) continue;
+#endif
clib_def_handle[i] = (void *)h;
}
p = (void *)GetProcAddress(h, name);
@@ -264,7 +285,7 @@ static void *clib_getsym(CLibrary *cl, const char *name)
LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt,
const char *name)
{
- lj_err_callermsg(L, lj_str_pushf(L, fmt, name, "no support for this OS"));
+ lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, "no support for this OS"));
}
static void *clib_loadlib(lua_State *L, const char *name, int global)
@@ -329,7 +350,8 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name)
lj_err_callerv(L, LJ_ERR_FFI_NODECL, strdata(name));
if (ctype_isconstval(ct->info)) {
CType *ctt = ctype_child(cts, ct);
- lua_assert(ctype_isinteger(ctt->info) && ctt->size <= 4);
+ lj_assertCTS(ctype_isinteger(ctt->info) && ctt->size <= 4,
+ "only 32 bit const supported"); /* NYI */
if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0)
setnumV(tv, (lua_Number)(uint32_t)ct->size);
else
@@ -341,14 +363,15 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name)
#endif
void *p = clib_getsym(cl, sym);
GCcdata *cd;
- lua_assert(ctype_isfunc(ct->info) || ctype_isextern(ct->info));
+ lj_assertCTS(ctype_isfunc(ct->info) || ctype_isextern(ct->info),
+ "unexpected ctype %08x in clib", ct->info);
#if LJ_TARGET_X86 && LJ_ABI_WIN
/* Retry with decorated name for fastcall/stdcall functions. */
if (!p && ctype_isfunc(ct->info)) {
CTInfo cconv = ctype_cconv(ct->info);
if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) {
CTSize sz = clib_func_argsize(cts, ct);
- const char *symd = lj_str_pushf(L,
+ const char *symd = lj_strfmt_pushf(L,
cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d",
sym, sz);
L->top--;
diff --git a/src/lj_cparse.c b/src/lj_cparse.c
index df85d23b..7fd83997 100644
--- a/src/lj_cparse.c
+++ b/src/lj_cparse.c
@@ -9,13 +9,14 @@
#include "lj_gc.h"
#include "lj_err.h"
-#include "lj_str.h"
+#include "lj_buf.h"
#include "lj_ctype.h"
#include "lj_cparse.h"
#include "lj_frame.h"
#include "lj_vm.h"
#include "lj_char.h"
#include "lj_strscan.h"
+#include "lj_strfmt.h"
/*
** Important note: this is NOT a validating C parser! This is a minimal
@@ -27,6 +28,30 @@
** If in doubt, please check the input against your favorite C compiler.
*/
+#ifdef LUA_USE_ASSERT
+#define lj_assertCP(c, ...) (lj_assertG_(G(cp->L), (c), __VA_ARGS__))
+#else
+#define lj_assertCP(c, ...) ((void)cp)
+#endif
+
+/* -- Miscellaneous ------------------------------------------------------- */
+
+/* Match string against a C literal. */
+#define cp_str_is(str, k) \
+ ((str)->len == sizeof(k)-1 && !memcmp(strdata(str), k, sizeof(k)-1))
+
+/* Check string against a linear list of matches. */
+int lj_cparse_case(GCstr *str, const char *match)
+{
+ MSize len;
+ int n;
+ for (n = 0; (len = (MSize)*match++); n++, match += len) {
+ if (str->len == len && !memcmp(match, strdata(str), len))
+ return n;
+ }
+ return -1;
+}
+
/* -- C lexer ------------------------------------------------------------- */
/* C lexer token names. */
@@ -42,13 +67,13 @@ LJ_NORET static void cp_err(CPState *cp, ErrMsg em);
static const char *cp_tok2str(CPState *cp, CPToken tok)
{
- lua_assert(tok < CTOK_FIRSTDECL);
+ lj_assertCP(tok < CTOK_FIRSTDECL, "bad CPToken %d", tok);
if (tok > CTOK_OFS)
return ctoknames[tok-CTOK_OFS-1];
else if (!lj_char_iscntrl(tok))
- return lj_str_pushf(cp->L, "%c", tok);
+ return lj_strfmt_pushf(cp->L, "%c", tok);
else
- return lj_str_pushf(cp->L, "char(%d)", tok);
+ return lj_strfmt_pushf(cp->L, "char(%d)", tok);
}
/* End-of-line? */
@@ -85,24 +110,10 @@ static LJ_NOINLINE CPChar cp_get_bs(CPState *cp)
return cp_get(cp);
}
-/* Grow save buffer. */
-static LJ_NOINLINE void cp_save_grow(CPState *cp, CPChar c)
-{
- MSize newsize;
- if (cp->sb.sz >= CPARSE_MAX_BUF/2)
- cp_err(cp, LJ_ERR_XELEM);
- newsize = cp->sb.sz * 2;
- lj_str_resizebuf(cp->L, &cp->sb, newsize);
- cp->sb.buf[cp->sb.n++] = (char)c;
-}
-
/* Save character in buffer. */
static LJ_AINLINE void cp_save(CPState *cp, CPChar c)
{
- if (LJ_UNLIKELY(cp->sb.n + 1 > cp->sb.sz))
- cp_save_grow(cp, c);
- else
- cp->sb.buf[cp->sb.n++] = (char)c;
+ lj_buf_putb(&cp->sb, c);
}
/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
@@ -122,20 +133,20 @@ LJ_NORET static void cp_errmsg(CPState *cp, CPToken tok, ErrMsg em, ...)
tokstr = NULL;
} else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING ||
tok >= CTOK_FIRSTDECL) {
- if (cp->sb.n == 0) cp_save(cp, '$');
+ if (cp->sb.w == cp->sb.b) cp_save(cp, '$');
cp_save(cp, '\0');
- tokstr = cp->sb.buf;
+ tokstr = cp->sb.b;
} else {
tokstr = cp_tok2str(cp, tok);
}
L = cp->L;
va_start(argp, em);
- msg = lj_str_pushvf(L, err2msg(em), argp);
+ msg = lj_strfmt_pushvf(L, err2msg(em), argp);
va_end(argp);
if (tokstr)
- msg = lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr);
+ msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr);
if (cp->linenumber > 1)
- msg = lj_str_pushf(L, "%s at line %d", msg, cp->linenumber);
+ msg = lj_strfmt_pushf(L, "%s at line %d", msg, cp->linenumber);
lj_err_callermsg(L, msg);
}
@@ -164,7 +175,8 @@ static CPToken cp_number(CPState *cp)
TValue o;
do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
cp_save(cp, '\0');
- fmt = lj_strscan_scan((const uint8_t *)cp->sb.buf, &o, STRSCAN_OPT_C);
+ fmt = lj_strscan_scan((const uint8_t *)(cp->sb.b), sbuflen(&cp->sb)-1,
+ &o, STRSCAN_OPT_C);
if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32;
else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32;
else if (!(cp->mode & CPARSE_MODE_SKIP))
@@ -177,7 +189,7 @@ static CPToken cp_number(CPState *cp)
static CPToken cp_ident(CPState *cp)
{
do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp)));
- cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n);
+ cp->str = lj_buf_str(cp->L, &cp->sb);
cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask);
if (ctype_type(cp->ct->info) == CT_KW)
return ctype_cid(cp->ct->info);
@@ -263,11 +275,11 @@ static CPToken cp_string(CPState *cp)
}
cp_get(cp);
if (delim == '"') {
- cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n);
+ cp->str = lj_buf_str(cp->L, &cp->sb);
return CTOK_STRING;
} else {
- if (cp->sb.n != 1) cp_err_token(cp, '\'');
- cp->val.i32 = (int32_t)(char)cp->sb.buf[0];
+ if (sbuflen(&cp->sb) != 1) cp_err_token(cp, '\'');
+ cp->val.i32 = (int32_t)(char)*cp->sb.b;
cp->val.id = CTID_INT32;
return CTOK_INTEGER;
}
@@ -296,7 +308,7 @@ static void cp_comment_cpp(CPState *cp)
/* Lexical scanner for C. Only a minimal subset is implemented. */
static CPToken cp_next_(CPState *cp)
{
- lj_str_resetbuf(&cp->sb);
+ lj_buf_reset(&cp->sb);
for (;;) {
if (lj_char_isident(cp->c))
return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp);
@@ -385,9 +397,8 @@ static void cp_init(CPState *cp)
cp->depth = 0;
cp->curpack = 0;
cp->packstack[0] = 255;
- lj_str_initbuf(&cp->sb);
- lj_str_resizebuf(cp->L, &cp->sb, LJ_MIN_SBUF);
- lua_assert(cp->p != NULL);
+ lj_buf_init(cp->L, &cp->sb);
+ lj_assertCP(cp->p != NULL, "uninitialized cp->p");
cp_get(cp); /* Read-ahead first char. */
cp->tok = 0;
cp->tmask = CPNS_DEFAULT;
@@ -398,7 +409,7 @@ static void cp_init(CPState *cp)
static void cp_cleanup(CPState *cp)
{
global_State *g = G(cp->L);
- lj_str_freebuf(g, &cp->sb);
+ lj_buf_free(g, &cp->sb);
}
/* Check and consume optional token. */
@@ -848,12 +859,13 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl)
/* The cid is already part of info for copies of pointers/functions. */
idx = ct->next;
if (ctype_istypedef(info)) {
- lua_assert(id == 0);
+ lj_assertCP(id == 0, "typedef not at toplevel");
id = ctype_cid(info);
/* Always refetch info/size, since struct/enum may have been completed. */
cinfo = ctype_get(cp->cts, id)->info;
csize = ctype_get(cp->cts, id)->size;
- lua_assert(ctype_isstruct(cinfo) || ctype_isenum(cinfo));
+ lj_assertCP(ctype_isstruct(cinfo) || ctype_isenum(cinfo),
+ "typedef of bad type");
} else if (ctype_isfunc(info)) { /* Intern function. */
CType *fct;
CTypeID fid;
@@ -886,7 +898,7 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl)
/* Inherit csize/cinfo from original type. */
} else {
if (ctype_isnum(info)) { /* Handle mode/vector-size attributes. */
- lua_assert(id == 0);
+ lj_assertCP(id == 0, "number not at toplevel");
if (!(info & CTF_BOOL)) {
CTSize msize = ctype_msizeP(decl->attr);
CTSize vsize = ctype_vsizeP(decl->attr);
@@ -941,7 +953,7 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl)
info = (info & ~CTF_ALIGN) | (cinfo & CTF_ALIGN);
info |= (cinfo & CTF_QUAL); /* Inherit qual. */
} else {
- lua_assert(ctype_isvoid(info));
+ lj_assertCP(ctype_isvoid(info), "bad ctype %08x", info);
}
csize = size;
cinfo = info+id;
@@ -953,8 +965,6 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl)
/* -- C declaration parser ------------------------------------------------ */
-#define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be)
-
/* Reset declaration state to declaration specifier. */
static void cp_decl_reset(CPDecl *decl)
{
@@ -1031,7 +1041,7 @@ static void cp_decl_asm(CPState *cp, CPDecl *decl)
if (cp->tok == CTOK_STRING) {
GCstr *str = cp->str;
while (cp_next(cp) == CTOK_STRING) {
- lj_str_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str));
+ lj_strfmt_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str));
cp->L->top--;
str = strV(cp->L->top);
}
@@ -1083,44 +1093,57 @@ static void cp_decl_gccattribute(CPState *cp, CPDecl *decl)
if (cp->tok == CTOK_IDENT) {
GCstr *attrstr = cp->str;
cp_next(cp);
- switch (attrstr->hash) {
- case H_(64a9208e,8ce14319): case H_(8e6331b2,95a282af): /* aligned */
+ switch (lj_cparse_case(attrstr,
+ "\007aligned" "\013__aligned__"
+ "\006packed" "\012__packed__"
+ "\004mode" "\010__mode__"
+ "\013vector_size" "\017__vector_size__"
+#if LJ_TARGET_X86
+ "\007regparm" "\013__regparm__"
+ "\005cdecl" "\011__cdecl__"
+ "\010thiscall" "\014__thiscall__"
+ "\010fastcall" "\014__fastcall__"
+ "\007stdcall" "\013__stdcall__"
+ "\012sseregparm" "\016__sseregparm__"
+#endif
+ )) {
+ case 0: case 1: /* aligned */
cp_decl_align(cp, decl);
break;
- case H_(42eb47de,f0ede26c): case H_(29f48a09,cf383e0c): /* packed */
+ case 2: case 3: /* packed */
decl->attr |= CTFP_PACKED;
break;
- case H_(0a84eef6,8dfab04c): case H_(995cf92c,d5696591): /* mode */
+ case 4: case 5: /* mode */
cp_decl_mode(cp, decl);
break;
- case H_(0ab31997,2d5213fa): case H_(bf875611,200e9990): /* vector_size */
+ case 6: case 7: /* vector_size */
{
CTSize vsize = cp_decl_sizeattr(cp);
if (vsize) CTF_INSERT(decl->attr, VSIZEP, lj_fls(vsize));
}
break;
#if LJ_TARGET_X86
- case H_(5ad22db8,c689b848): case H_(439150fa,65ea78cb): /* regparm */
+ case 8: case 9: /* regparm */
CTF_INSERT(decl->fattr, REGPARM, cp_decl_sizeattr(cp));
decl->fattr |= CTFP_CCONV;
break;
- case H_(18fc0b98,7ff4c074): case H_(4e62abed,0a747424): /* cdecl */
+ case 10: case 11: /* cdecl */
CTF_INSERT(decl->fattr, CCONV, CTCC_CDECL);
decl->fattr |= CTFP_CCONV;
break;
- case H_(72b2e41b,494c5a44): case H_(f2356d59,f25fc9bd): /* thiscall */
+ case 12: case 13: /* thiscall */
CTF_INSERT(decl->fattr, CCONV, CTCC_THISCALL);
decl->fattr |= CTFP_CCONV;
break;
- case H_(0d0ffc42,ab746f88): case H_(21c54ba1,7f0ca7e3): /* fastcall */
+ case 14: case 15: /* fastcall */
CTF_INSERT(decl->fattr, CCONV, CTCC_FASTCALL);
decl->fattr |= CTFP_CCONV;
break;
- case H_(ef76b040,9412e06a): case H_(de56697b,c750e6e1): /* stdcall */
+ case 16: case 17: /* stdcall */
CTF_INSERT(decl->fattr, CCONV, CTCC_STDCALL);
decl->fattr |= CTFP_CCONV;
break;
- case H_(ea78b622,f234bd8e): case H_(252ffb06,8d50f34b): /* sseregparm */
+ case 18: case 19: /* sseregparm */
decl->fattr |= CTF_SSEREGPARM;
decl->fattr |= CTFP_CCONV;
break;
@@ -1152,16 +1175,13 @@ static void cp_decl_msvcattribute(CPState *cp, CPDecl *decl)
while (cp->tok == CTOK_IDENT) {
GCstr *attrstr = cp->str;
cp_next(cp);
- switch (attrstr->hash) {
- case H_(bc2395fa,98f267f8): /* align */
+ if (cp_str_is(attrstr, "align")) {
cp_decl_align(cp, decl);
- break;
- default: /* Ignore all other attributes. */
+ } else { /* Ignore all other attributes. */
if (cp_opt(cp, '(')) {
while (cp->tok != ')' && cp->tok != CTOK_EOF) cp_next(cp);
cp_check(cp, ')');
}
- break;
}
}
cp_check(cp, ')');
@@ -1572,7 +1592,7 @@ end_decl:
cp_errmsg(cp, cp->tok, LJ_ERR_FFI_DECLSPEC);
sz = sizeof(int);
}
- lua_assert(sz != 0);
+ lj_assertCP(sz != 0, "basic ctype with zero size");
info += CTALIGN(lj_fls(sz)); /* Use natural alignment. */
info += (decl->attr & CTF_QUAL); /* Merge qualifiers. */
cp_push(decl, info, sz);
@@ -1741,17 +1761,16 @@ static CTypeID cp_decl_abstract(CPState *cp)
static void cp_pragma(CPState *cp, BCLine pragmaline)
{
cp_next(cp);
- if (cp->tok == CTOK_IDENT &&
- cp->str->hash == H_(e79b999f,42ca3e85)) { /* pack */
+ if (cp->tok == CTOK_IDENT && cp_str_is(cp->str, "pack")) {
cp_next(cp);
cp_check(cp, '(');
if (cp->tok == CTOK_IDENT) {
- if (cp->str->hash == H_(738e923c,a1b65954)) { /* push */
+ if (cp_str_is(cp->str, "push")) {
if (cp->curpack < CPARSE_MAX_PACKSTACK) {
cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack];
cp->curpack++;
}
- } else if (cp->str->hash == H_(6c71cf27,6c71cf27)) { /* pop */
+ } else if (cp_str_is(cp->str, "pop")) {
if (cp->curpack > 0) cp->curpack--;
} else {
cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL);
@@ -1773,6 +1792,16 @@ static void cp_pragma(CPState *cp, BCLine pragmaline)
}
}
+/* Handle line number. */
+static void cp_line(CPState *cp, BCLine hashline)
+{
+ BCLine newline = cp->val.u32;
+ /* TODO: Handle file name and include it in error messages. */
+ while (cp->tok != CTOK_EOF && cp->linenumber == hashline)
+ cp_next(cp);
+ cp->linenumber = newline;
+}
+
/* Parse multiple C declarations of types or extern identifiers. */
static void cp_decl_multi(CPState *cp)
{
@@ -1785,12 +1814,21 @@ static void cp_decl_multi(CPState *cp)
continue;
}
if (cp->tok == '#') { /* Workaround, since we have no preprocessor, yet. */
- BCLine pragmaline = cp->linenumber;
- if (!(cp_next(cp) == CTOK_IDENT &&
- cp->str->hash == H_(f5e6b4f8,1d509107))) /* pragma */
+ BCLine hashline = cp->linenumber;
+ CPToken tok = cp_next(cp);
+ if (tok == CTOK_INTEGER) {
+ cp_line(cp, hashline);
+ continue;
+ } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "line")) {
+ if (cp_next(cp) != CTOK_INTEGER) cp_err_token(cp, tok);
+ cp_line(cp, hashline);
+ continue;
+ } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "pragma")) {
+ cp_pragma(cp, hashline);
+ continue;
+ } else {
cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL);
- cp_pragma(cp, pragmaline);
- continue;
+ }
}
scl = cp_decl_spec(cp, &decl, CDF_TYPEDEF|CDF_EXTERN|CDF_STATIC);
if ((cp->tok == ';' || cp->tok == CTOK_EOF) &&
@@ -1814,7 +1852,7 @@ static void cp_decl_multi(CPState *cp)
/* Treat both static and extern function declarations as extern. */
ct = ctype_get(cp->cts, ctypeid);
/* We always get new anonymous functions (typedefs are copied). */
- lua_assert(gcref(ct->name) == NULL);
+ lj_assertCP(gcref(ct->name) == NULL, "unexpected named function");
id = ctypeid; /* Just name it. */
} else if ((scl & CDF_STATIC)) { /* Accept static constants. */
id = cp_decl_constinit(cp, &ct, ctypeid);
@@ -1856,8 +1894,6 @@ static void cp_decl_single(CPState *cp)
if (cp->tok != CTOK_EOF) cp_err_token(cp, CTOK_EOF);
}
-#undef H_
-
/* ------------------------------------------------------------------------ */
/* Protected callback for C parser. */
@@ -1873,7 +1909,7 @@ static TValue *cpcparser(lua_State *L, lua_CFunction dummy, void *ud)
cp_decl_single(cp);
if (cp->param && cp->param != cp->L->top)
cp_err(cp, LJ_ERR_FFI_NUMPARAM);
- lua_assert(cp->depth == 0);
+ lj_assertCP(cp->depth == 0, "unbalanced cparser declaration depth");
return NULL;
}
diff --git a/src/lj_cparse.h b/src/lj_cparse.h
index df884497..c0f61edc 100644
--- a/src/lj_cparse.h
+++ b/src/lj_cparse.h
@@ -60,6 +60,8 @@ typedef struct CPState {
LJ_FUNC int lj_cparse(CPState *cp);
+LJ_FUNC int lj_cparse_case(GCstr *str, const char *match);
+
#endif
#endif
diff --git a/src/lj_crecord.c b/src/lj_crecord.c
index 3f3552a6..bc21d859 100644
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -11,13 +11,13 @@
#if LJ_HASJIT && LJ_HASFFI
#include "lj_err.h"
-#include "lj_str.h"
#include "lj_tab.h"
#include "lj_frame.h"
#include "lj_ctype.h"
#include "lj_cdata.h"
#include "lj_cparse.h"
#include "lj_cconv.h"
+#include "lj_carith.h"
#include "lj_clib.h"
#include "lj_ccall.h"
#include "lj_ff.h"
@@ -31,6 +31,7 @@
#include "lj_snap.h"
#include "lj_crecord.h"
#include "lj_dispatch.h"
+#include "lj_strfmt.h"
/* Some local macros to save typing. Undef'd at the end. */
#define IR(ref) (&J->cur.ir[(ref)])
@@ -60,7 +61,8 @@ static GCcdata *argv2cdata(jit_State *J, TRef tr, cTValue *o)
static CTypeID crec_constructor(jit_State *J, GCcdata *cd, TRef tr)
{
CTypeID id;
- lua_assert(tref_iscdata(tr) && cd->ctypeid == CTID_CTYPEID);
+ lj_assertJ(tref_iscdata(tr) && cd->ctypeid == CTID_CTYPEID,
+ "expected CTypeID cdata");
id = *(CTypeID *)cdataptr(cd);
tr = emitir(IRT(IR_FLOAD, IRT_INT), tr, IRFL_CDATA_INT);
emitir(IRTG(IR_EQ, IRT_INT), tr, lj_ir_kint(J, (int32_t)id));
@@ -211,7 +213,7 @@ static void crec_copy_emit(jit_State *J, CRecMemList *ml, MSize mlp,
ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0);
ml[i].trofs = trofs;
i++;
- rwin += (LJ_SOFTFP && ml[i].tp == IRT_NUM) ? 2 : 1;
+ rwin += (LJ_SOFTFP32 && ml[i].tp == IRT_NUM) ? 2 : 1;
if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */
rwin = 0;
for ( ; j < i; j++) {
@@ -236,13 +238,14 @@ static void crec_copy(jit_State *J, TRef trdst, TRef trsrc, TRef trlen,
if (len > CREC_COPY_MAXLEN) goto fallback;
if (ct) {
CTState *cts = ctype_ctsG(J2G(J));
- lua_assert(ctype_isarray(ct->info) || ctype_isstruct(ct->info));
+ lj_assertJ(ctype_isarray(ct->info) || ctype_isstruct(ct->info),
+ "copy of non-aggregate");
if (ctype_isarray(ct->info)) {
CType *cct = ctype_rawchild(cts, ct);
tp = crec_ct2irt(cts, cct);
if (tp == IRT_CDATA) goto rawcopy;
step = lj_ir_type_size[tp];
- lua_assert((len & (step-1)) == 0);
+ lj_assertJ((len & (step-1)) == 0, "copy of fractional size");
} else if ((ct->info & CTF_UNION)) {
step = (1u << ctype_align(ct->info));
goto rawcopy;
@@ -441,7 +444,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
/* fallthrough */
case CCX(I, F):
if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi;
- sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC|IRCONV_ANY);
+ sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY);
goto xstore;
case CCX(I, P):
case CCX(I, A):
@@ -521,7 +524,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
if (st == IRT_CDATA) goto err_nyi;
/* The signed conversion is cheaper. x64 really has 47 bit pointers. */
sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32,
- st, IRCONV_TRUNC|IRCONV_ANY);
+ st, IRCONV_ANY);
goto xstore;
/* Destination is an array. */
@@ -613,10 +616,12 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)
sp = lj_ir_kptr(J, NULL);
} else if (tref_isudata(sp)) {
GCudata *ud = udataV(sval);
- if (ud->udtype == UDTYPE_IO_FILE) {
+ if (ud->udtype == UDTYPE_IO_FILE || ud->udtype == UDTYPE_BUFFER) {
TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), sp, IRFL_UDATA_UDTYPE);
- emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, UDTYPE_IO_FILE));
- sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp, IRFL_UDATA_FILE);
+ emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, ud->udtype));
+ sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp,
+ ud->udtype == UDTYPE_IO_FILE ? IRFL_UDATA_FILE :
+ IRFL_SBUF_R);
} else {
sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCudata)));
}
@@ -628,7 +633,8 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)
/* Specialize to the name of the enum constant. */
emitir(IRTG(IR_EQ, IRT_STR), sp, lj_ir_kstr(J, str));
if (cct && ctype_isconstval(cct->info)) {
- lua_assert(ctype_child(cts, cct)->size == 4);
+ lj_assertJ(ctype_child(cts, cct)->size == 4,
+ "only 32 bit const supported"); /* NYI */
svisnz = (void *)(intptr_t)(ofs != 0);
sp = lj_ir_kint(J, (int32_t)ofs);
sid = ctype_cid(cct->info);
@@ -640,12 +646,22 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval)
sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCstr)));
sid = CTID_A_CCHAR;
}
- } else { /* NYI: tref_istab(sp), tref_islightud(sp). */
+ } else if (tref_islightud(sp)) {
+#if LJ_64
+ lj_trace_err(J, LJ_TRERR_NYICONV);
+#endif
+ } else { /* NYI: tref_istab(sp). */
IRType t;
sid = argv2cdata(J, sp, sval)->ctypeid;
s = ctype_raw(cts, sid);
svisnz = cdataptr(cdataV(sval));
- t = crec_ct2irt(cts, s);
+ if (ctype_isfunc(s->info)) {
+ sid = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|sid), CTSIZE_PTR);
+ s = ctype_get(cts, sid);
+ t = IRT_PTR;
+ } else {
+ t = crec_ct2irt(cts, s);
+ }
if (ctype_isptr(s->info)) {
sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_PTR);
if (ctype_isref(s->info)) {
@@ -700,6 +716,19 @@ static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz)
return tr;
}
+/* Tailcall to function. */
+static void crec_tailcall(jit_State *J, RecordFFData *rd, cTValue *tv)
+{
+ TRef kfunc = lj_ir_kfunc(J, funcV(tv));
+#if LJ_FR2
+ J->base[-2] = kfunc;
+ J->base[-1] = TREF_FRAME;
+#else
+ J->base[-1] = kfunc | TREF_FRAME;
+#endif
+ rd->nres = -1; /* Pending tailcall. */
+}
+
/* Record ctype __index/__newindex metamethods. */
static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
RecordFFData *rd)
@@ -709,8 +738,7 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
if (!tv)
lj_trace_err(J, LJ_TRERR_BADTYPE);
if (tvisfunc(tv)) {
- J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME;
- rd->nres = -1; /* Pending tailcall. */
+ crec_tailcall(J, rd, tv);
} else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) {
/* Specialize to result of __index lookup. */
cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]);
@@ -727,6 +755,48 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct,
}
}
+/* Record bitfield load/store. */
+static void crec_index_bf(jit_State *J, RecordFFData *rd, TRef ptr, CTInfo info)
+{
+ IRType t = IRT_I8 + 2*lj_fls(ctype_bitcsz(info)) + ((info&CTF_UNSIGNED)?1:0);
+ TRef tr = emitir(IRT(IR_XLOAD, t), ptr, 0);
+ CTSize pos = ctype_bitpos(info), bsz = ctype_bitbsz(info), shift = 32 - bsz;
+ lj_assertJ(t <= IRT_U32, "only 32 bit bitfields supported"); /* NYI */
+ if (rd->data == 0) { /* __index metamethod. */
+ if ((info & CTF_BOOL)) {
+ tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << pos))));
+ /* Assume not equal to zero. Fixup and emit pending guard later. */
+ lj_ir_set(J, IRTGI(IR_NE), tr, lj_ir_kint(J, 0));
+ J->postproc = LJ_POST_FIXGUARD;
+ tr = TREF_TRUE;
+ } else if (!(info & CTF_UNSIGNED)) {
+ tr = emitir(IRTI(IR_BSHL), tr, lj_ir_kint(J, shift - pos));
+ tr = emitir(IRTI(IR_BSAR), tr, lj_ir_kint(J, shift));
+ } else {
+ lj_assertJ(bsz < 32, "unexpected full bitfield index");
+ tr = emitir(IRTI(IR_BSHR), tr, lj_ir_kint(J, pos));
+ tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << bsz)-1)));
+ /* We can omit the U32 to NUM conversion, since bsz < 32. */
+ }
+ J->base[0] = tr;
+ } else { /* __newindex metamethod. */
+ CTState *cts = ctype_ctsG(J2G(J));
+ CType *ct = ctype_get(cts,
+ (info & CTF_BOOL) ? CTID_BOOL :
+ (info & CTF_UNSIGNED) ? CTID_UINT32 : CTID_INT32);
+ int32_t mask = (int32_t)(((1u << bsz)-1) << pos);
+ TRef sp = crec_ct_tv(J, ct, 0, J->base[2], &rd->argv[2]);
+ sp = emitir(IRTI(IR_BSHL), sp, lj_ir_kint(J, pos));
+ /* Use of the target type avoids forwarding conversions. */
+ sp = emitir(IRT(IR_BAND, t), sp, lj_ir_kint(J, mask));
+ tr = emitir(IRT(IR_BAND, t), tr, lj_ir_kint(J, (int32_t)~mask));
+ tr = emitir(IRT(IR_BOR, t), tr, sp);
+ emitir(IRT(IR_XSTORE, t), ptr, tr);
+ rd->nres = 0;
+ J->needsnap = 1;
+ }
+}
+
void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd)
{
TRef idx, ptr = J->base[0];
@@ -801,6 +871,7 @@ again:
CType *fct;
fct = lj_ctype_getfield(cts, ct, name, &fofs);
if (fct) {
+ ofs += (ptrdiff_t)fofs;
/* Always specialize to the field name. */
emitir(IRTG(IR_EQ, IRT_STR), idx, lj_ir_kstr(J, name));
if (ctype_isconstval(fct->info)) {
@@ -812,12 +883,14 @@ again:
J->base[0] = lj_ir_kint(J, (int32_t)fct->size);
return; /* Interpreter will throw for newindex. */
} else if (ctype_isbitfield(fct->info)) {
- lj_trace_err(J, LJ_TRERR_NYICONV);
+ if (ofs)
+ ptr = emitir(IRT(IR_ADD, IRT_PTR), ptr, lj_ir_kintp(J, ofs));
+ crec_index_bf(J, rd, ptr, fct->info);
+ return;
} else {
- lua_assert(ctype_isfield(fct->info));
+ lj_assertJ(ctype_isfield(fct->info), "field expected");
sid = ctype_cid(fct->info);
}
- ofs += (ptrdiff_t)fofs;
}
} else if (ctype_iscomplex(ct->info)) {
if (name->len == 2 &&
@@ -867,21 +940,17 @@ again:
}
/* Record setting a finalizer. */
-static void crec_finalizer(jit_State *J, TRef trcd, cTValue *fin)
+static void crec_finalizer(jit_State *J, TRef trcd, TRef trfin, cTValue *fin)
{
- TRef trlo = lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd);
- TRef trhi = emitir(IRT(IR_ADD, IRT_P32), trlo, lj_ir_kint(J, 4));
- if (LJ_BE) { TRef tmp = trlo; trlo = trhi; trhi = tmp; }
- if (tvisfunc(fin)) {
- emitir(IRT(IR_XSTORE, IRT_P32), trlo, lj_ir_kfunc(J, funcV(fin)));
- emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TFUNC));
- } else if (tviscdata(fin)) {
- emitir(IRT(IR_XSTORE, IRT_P32), trlo,
- lj_ir_kgc(J, obj2gco(cdataV(fin)), IRT_CDATA));
- emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TCDATA));
+ if (tvisgcv(fin)) {
+ if (!trfin) trfin = lj_ir_kptr(J, gcval(fin));
+ } else if (tvisnil(fin)) {
+ trfin = lj_ir_kptr(J, NULL);
} else {
lj_trace_err(J, LJ_TRERR_BADTYPE);
}
+ lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd,
+ trfin, lj_ir_kint(J, (int32_t)itype(fin)));
J->needsnap = 1;
}
@@ -892,10 +961,8 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
CTSize sz;
CTInfo info = lj_ctype_info(cts, id, &sz);
CType *d = ctype_raw(cts, id);
- TRef trid;
- if (!sz || sz > 128 || (info & CTF_VLA) || ctype_align(info) > CT_MEMALIGN)
- lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: large/special allocations. */
- trid = lj_ir_kint(J, id);
+ TRef trcd, trid = lj_ir_kint(J, id);
+ cTValue *fin;
/* Use special instruction to box pointer or 32/64 bit integer. */
if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) {
TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) :
@@ -903,11 +970,36 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
sz == 4 ? lj_ir_kint(J, 0) :
(lj_needsplit(J), lj_ir_kint64(J, 0));
J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp);
+ return;
} else {
- TRef trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, TREF_NIL);
- cTValue *fin;
- J->base[0] = trcd;
- if (J->base[1] && !J->base[2] &&
+ TRef trsz = TREF_NIL;
+ if ((info & CTF_VLA)) { /* Calculate VLA/VLS size at runtime. */
+ CTSize sz0, sz1;
+ if (!J->base[1] || J->base[2])
+ lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init VLA/VLS. */
+ trsz = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0,
+ J->base[1], &rd->argv[1]);
+ sz0 = lj_ctype_vlsize(cts, d, 0);
+ sz1 = lj_ctype_vlsize(cts, d, 1);
+ trsz = emitir(IRTGI(IR_MULOV), trsz, lj_ir_kint(J, (int32_t)(sz1-sz0)));
+ trsz = emitir(IRTGI(IR_ADDOV), trsz, lj_ir_kint(J, (int32_t)sz0));
+ J->base[1] = 0; /* Simplify logic below. */
+ } else if (ctype_align(info) > CT_MEMALIGN) {
+ trsz = lj_ir_kint(J, sz);
+ }
+ trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, trsz);
+ if (sz > 128 || (info & CTF_VLA)) {
+ TRef dp;
+ CTSize align;
+ special: /* Only handle bulk zero-fill for large/VLA/VLS types. */
+ if (J->base[1])
+ lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init large/VLA/VLS types. */
+ dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, sizeof(GCcdata)));
+ if (trsz == TREF_NIL) trsz = lj_ir_kint(J, sz);
+ align = ctype_align(info);
+ if (align < CT_MEMALIGN) align = CT_MEMALIGN;
+ crec_fill(J, dp, trsz, lj_ir_kint(J, 0), (1u << align));
+ } else if (J->base[1] && !J->base[2] &&
!lj_cconv_multi_init(cts, d, &rd->argv[1])) {
goto single_init;
} else if (ctype_isarray(d->info)) {
@@ -918,8 +1010,9 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
TValue *sval = &tv;
MSize i;
tv.u64 = 0;
- if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)))
- lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init array of aggregates. */
+ if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)) ||
+ esize * CREC_FILL_MAXUNROLL < sz)
+ goto special;
for (i = 1, ofs = 0; ofs < sz; ofs += esize) {
TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd,
lj_ir_kintp(J, ofs + sizeof(GCcdata)));
@@ -933,8 +1026,26 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
crec_ct_tv(J, dc, dp, sp, sval);
}
} else if (ctype_isstruct(d->info)) {
- CTypeID fid = d->sib;
+ CTypeID fid;
MSize i = 1;
+ if (!J->base[1]) { /* Handle zero-fill of struct-of-NYI. */
+ fid = d->sib;
+ while (fid) {
+ CType *df = ctype_get(cts, fid);
+ fid = df->sib;
+ if (ctype_isfield(df->info)) {
+ CType *dc;
+ if (!gcref(df->name)) continue; /* Ignore unnamed fields. */
+ dc = ctype_rawchild(cts, df); /* Field type. */
+ if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info) ||
+ ctype_isenum(dc->info)))
+ goto special;
+ } else if (!ctype_isconstval(df->info)) {
+ goto special;
+ }
+ }
+ }
+ fid = d->sib;
while (fid) {
CType *df = ctype_get(cts, fid);
fid = df->sib;
@@ -981,11 +1092,12 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id)
crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv);
}
}
- /* Handle __gc metamethod. */
- fin = lj_ctype_meta(cts, id, MM_gc);
- if (fin)
- crec_finalizer(J, trcd, fin);
}
+ J->base[0] = trcd;
+ /* Handle __gc metamethod. */
+ fin = lj_ctype_meta(cts, id, MM_gc);
+ if (fin)
+ crec_finalizer(J, trcd, 0, fin);
}
/* Record argument conversions. */
@@ -1026,7 +1138,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
if (fid) { /* Get argument type from field. */
CType *ctf = ctype_get(cts, fid);
fid = ctf->sib;
- lua_assert(ctype_isfield(ctf->info));
+ lj_assertJ(ctype_isfield(ctf->info), "field expected");
did = ctype_cid(ctf->info);
} else {
if (!(ct->info & CTF_VARARG))
@@ -1045,7 +1157,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd,
else
tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT);
}
- } else if (LJ_SOFTFP && ctype_isfp(d->info) && d->size > 4) {
+ } else if (LJ_SOFTFP32 && ctype_isfp(d->info) && d->size > 4) {
lj_needsplit(J);
}
#if LJ_TARGET_X86
@@ -1091,20 +1203,20 @@ static void crec_snap_caller(jit_State *J)
lua_State *L = J->L;
TValue *base = L->base, *top = L->top;
const BCIns *pc = J->pc;
- TRef ftr = J->base[-1];
+ TRef ftr = J->base[-1-LJ_FR2];
ptrdiff_t delta;
if (!frame_islua(base-1) || J->framedepth <= 0)
lj_trace_err(J, LJ_TRERR_NYICALL);
- J->pc = frame_pc(base-1); delta = 1+bc_a(J->pc[-1]);
+ J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]);
L->top = base; L->base = base - delta;
- J->base[-1] = TREF_FALSE;
+ J->base[-1-LJ_FR2] = TREF_FALSE;
J->base -= delta; J->baseslot -= (BCReg)delta;
- J->maxslot = (BCReg)delta; J->framedepth--;
+ J->maxslot = (BCReg)delta-LJ_FR2; J->framedepth--;
lj_snap_add(J);
L->base = base; L->top = top;
J->framedepth++; J->maxslot = 1;
J->base += delta; J->baseslot += (BCReg)delta;
- J->base[-1] = ftr; J->pc = pc;
+ J->base[-1-LJ_FR2] = ftr; J->pc = pc;
}
/* Record function call. */
@@ -1124,8 +1236,7 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd)
TRef tr;
TValue tv;
/* Check for blacklisted C functions that might call a callback. */
- setlightudV(&tv,
- cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4));
+ tv.u64 = ((uintptr_t)cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4) >> 2) | U64x(800000000, 00000000);
if (tvistrue(lj_tab_get(J->L, cts->miscmap, &tv)))
lj_trace_err(J, LJ_TRERR_BLACKL);
if (ctype_isvoid(ctr->info)) {
@@ -1196,8 +1307,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd)
tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm);
if (tv) {
if (tvisfunc(tv)) {
- J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME;
- rd->nres = -1; /* Pending tailcall. */
+ crec_tailcall(J, rd, tv);
return;
}
} else if (mm == MM_new) {
@@ -1238,7 +1348,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm)
for (i = 0; i < 2; i++) {
IRType st = tref_type(sp[i]);
if (st == IRT_NUM || st == IRT_FLOAT)
- sp[i] = emitconv(sp[i], dt, st, IRCONV_TRUNC|IRCONV_ANY);
+ sp[i] = emitconv(sp[i], dt, st, IRCONV_ANY);
else if (!(st == IRT_I64 || st == IRT_U64))
sp[i] = emitconv(sp[i], dt, IRT_INT,
(s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT);
@@ -1307,15 +1417,14 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm)
CTypeID id;
#if LJ_64
if (t == IRT_NUM || t == IRT_FLOAT)
- tr = emitconv(tr, IRT_INTP, t, IRCONV_TRUNC|IRCONV_ANY);
+ tr = emitconv(tr, IRT_INTP, t, IRCONV_ANY);
else if (!(t == IRT_I64 || t == IRT_U64))
tr = emitconv(tr, IRT_INTP, IRT_INT,
((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT);
#else
if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) {
tr = emitconv(tr, IRT_INTP, t,
- (t == IRT_NUM || t == IRT_FLOAT) ?
- IRCONV_TRUNC|IRCONV_ANY : 0);
+ (t == IRT_NUM || t == IRT_FLOAT) ? IRCONV_ANY : 0);
}
#endif
tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz));
@@ -1347,8 +1456,7 @@ static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts,
}
if (tv) {
if (tvisfunc(tv)) {
- J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME;
- rd->nres = -1; /* Pending tailcall. */
+ crec_tailcall(J, rd, tv);
return 0;
} /* NYI: non-function metamethods. */
} else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */
@@ -1460,8 +1568,7 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd)
!irt_isguard(J->guardemit)) {
const BCIns *pc = frame_contpc(J->L->base-1) - 1;
if (bc_op(*pc) <= BC_ISNEP) {
- setframe_pc(&J2G(J)->tmptv, pc);
- J2G(J)->tmptv.u32.lo = ((tref_istrue(tr) ^ bc_op(*pc)) & 1);
+ J2G(J)->tmptv.u64 = (uint64_t)(uintptr_t)pc;
J->postproc = LJ_POST_FIXCOMP;
}
}
@@ -1650,7 +1757,139 @@ void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd)
void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd)
{
argv2cdata(J, J->base[0], &rd->argv[0]);
- crec_finalizer(J, J->base[0], &rd->argv[1]);
+ if (!J->base[1])
+ lj_trace_err(J, LJ_TRERR_BADTYPE);
+ crec_finalizer(J, J->base[0], J->base[1], &rd->argv[1]);
+}
+
+/* -- 64 bit bit.* library functions -------------------------------------- */
+
+/* Determine bit operation type from argument type. */
+static CTypeID crec_bit64_type(CTState *cts, cTValue *tv)
+{
+ if (tviscdata(tv)) {
+ CType *ct = lj_ctype_rawref(cts, cdataV(tv)->ctypeid);
+ if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct);
+ if ((ct->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) ==
+ CTINFO(CT_NUM, CTF_UNSIGNED) && ct->size == 8)
+ return CTID_UINT64; /* Use uint64_t, since it has the highest rank. */
+ return CTID_INT64; /* Otherwise use int64_t. */
+ }
+ return 0; /* Use regular 32 bit ops. */
+}
+
+void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd)
+{
+ CTState *cts = ctype_ctsG(J2G(J));
+ TRef tr = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
+ J->base[0], &rd->argv[0]);
+ if (!tref_isinteger(tr))
+ tr = emitconv(tr, IRT_INT, tref_type(tr), 0);
+ J->base[0] = tr;
+}
+
+int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd)
+{
+ CTState *cts = ctype_ctsG(J2G(J));
+ CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
+ if (id) {
+ TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
+ tr = emitir(IRT(rd->data, id-CTID_INT64+IRT_I64), tr, 0);
+ J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
+ return 1;
+ }
+ return 0;
+}
+
+int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd)
+{
+ CTState *cts = ctype_ctsG(J2G(J));
+ CTypeID id = 0;
+ MSize i;
+ for (i = 0; J->base[i] != 0; i++) {
+ CTypeID aid = crec_bit64_type(cts, &rd->argv[i]);
+ if (id < aid) id = aid; /* Determine highest type rank of all arguments. */
+ }
+ if (id) {
+ CType *ct = ctype_get(cts, id);
+ uint32_t ot = IRT(rd->data, id-CTID_INT64+IRT_I64);
+ TRef tr = crec_ct_tv(J, ct, 0, J->base[0], &rd->argv[0]);
+ for (i = 1; J->base[i] != 0; i++) {
+ TRef tr2 = crec_ct_tv(J, ct, 0, J->base[i], &rd->argv[i]);
+ tr = emitir(ot, tr, tr2);
+ }
+ J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
+ return 1;
+ }
+ return 0;
+}
+
+int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd)
+{
+ CTState *cts = ctype_ctsG(J2G(J));
+ CTypeID id;
+ TRef tsh = 0;
+ if (J->base[0] && tref_iscdata(J->base[1])) {
+ tsh = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0,
+ J->base[1], &rd->argv[1]);
+ if (!tref_isinteger(tsh))
+ tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0);
+ J->base[1] = tsh;
+ }
+ id = crec_bit64_type(cts, &rd->argv[0]);
+ if (id) {
+ TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
+ uint32_t op = rd->data;
+ if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]);
+ if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
+ !tref_isk(tsh))
+ tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63));
+#ifdef LJ_TARGET_UNIFYROT
+ if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
+ op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
+ tsh = emitir(IRTI(IR_NEG), tsh, tsh);
+ }
+#endif
+ tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh);
+ J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
+ return 1;
+ }
+ return 0;
+}
+
+TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr)
+{
+ CTState *cts = ctype_ctsG(J2G(J));
+ CTypeID id = crec_bit64_type(cts, &rd->argv[0]);
+ TRef tr, trsf = J->base[1];
+ SFormat sf = (STRFMT_UINT|STRFMT_T_HEX);
+ int32_t n;
+ if (trsf) {
+ CTypeID id2 = 0;
+ n = (int32_t)lj_carith_check64(J->L, 2, &id2);
+ if (id2)
+ trsf = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0, trsf, &rd->argv[1]);
+ else
+ trsf = lj_opt_narrow_tobit(J, trsf);
+ emitir(IRTGI(IR_EQ), trsf, lj_ir_kint(J, n)); /* Specialize to n. */
+ } else {
+ n = id ? 16 : 8;
+ }
+ if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; }
+ sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC);
+ if (id) {
+ tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]);
+ if (n < 16)
+ tr = emitir(IRT(IR_BAND, IRT_U64), tr,
+ lj_ir_kint64(J, ((uint64_t)1 << 4*n)-1));
+ } else {
+ tr = lj_opt_narrow_tobit(J, J->base[0]);
+ if (n < 8)
+ tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << 4*n)-1)));
+ tr = emitconv(tr, IRT_U64, IRT_INT, 0); /* No sign-extension. */
+ lj_needsplit(J);
+ }
+ return lj_ir_call(J, IRCALL_lj_strfmt_putfxint, hdr, lj_ir_kint(J, sf), tr);
}
/* -- Miscellaneous library functions ------------------------------------- */
@@ -1674,6 +1913,30 @@ void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd)
}
}
+TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o)
+{
+ CTypeID id = argv2cdata(J, tr, o)->ctypeid;
+ if (!(id == CTID_INT64 || id == CTID_UINT64))
+ lj_trace_err(J, LJ_TRERR_BADTYPE);
+ lj_needsplit(J);
+ return emitir(IRT(IR_FLOAD, id == CTID_INT64 ? IRT_I64 : IRT_U64), tr,
+ IRFL_CDATA_INT64);
+}
+
+#if LJ_HASBUFFER
+TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o)
+{
+ CTState *cts = ctype_ctsG(J2G(J));
+ if (!tref_iscdata(tr)) lj_trace_err(J, LJ_TRERR_BADTYPE);
+ return crec_ct_tv(J, ctype_get(cts, CTID_P_CVOID), 0, tr, o);
+}
+
+TRef lj_crecord_topuint8(jit_State *J, TRef tr)
+{
+ return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, CTID_P_UINT8), tr);
+}
+#endif
+
#undef IR
#undef emitir
#undef emitconv
diff --git a/src/lj_crecord.h b/src/lj_crecord.h
index 513ded7b..2c8cf05c 100644
--- a/src/lj_crecord.h
+++ b/src/lj_crecord.h
@@ -25,7 +25,19 @@ LJ_FUNC void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd);
LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd);
LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd);
LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd);
+
+LJ_FUNC void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd);
+LJ_FUNC int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd);
+LJ_FUNC int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd);
+LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd);
+LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr);
+
LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd);
+LJ_FUNC TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o);
+#if LJ_HASBUFFER
+LJ_FUNC TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o);
+LJ_FUNC TRef lj_crecord_topuint8(jit_State *J, TRef tr);
+#endif
#endif
#endif
diff --git a/src/lj_ctype.c b/src/lj_ctype.c
index 7ef00521..204be034 100644
--- a/src/lj_ctype.c
+++ b/src/lj_ctype.c
@@ -11,8 +11,10 @@
#include "lj_err.h"
#include "lj_str.h"
#include "lj_tab.h"
+#include "lj_strfmt.h"
#include "lj_ctype.h"
#include "lj_ccallback.h"
+#include "lj_buf.h"
/* -- C type definitions -------------------------------------------------- */
@@ -37,6 +39,8 @@
_("uint64_t", UINT64) \
_("intptr_t", INT_PSZ) \
_("uintptr_t", UINT_PSZ) \
+ /* From POSIX. */ \
+ _("ssize_t", INT_PSZ) \
/* End of typedef list. */
/* Keywords (only the ones we actually care for). */
@@ -149,7 +153,7 @@ CTypeID lj_ctype_new(CTState *cts, CType **ctp)
{
CTypeID id = cts->top;
CType *ct;
- lua_assert(cts->L);
+ lj_assertCTS(cts->L, "uninitialized cts->L");
if (LJ_UNLIKELY(id >= cts->sizetab)) {
if (id >= CTID_MAX) lj_err_msg(cts->L, LJ_ERR_TABOV);
#ifdef LUAJIT_CTYPE_CHECK_ANCHOR
@@ -178,7 +182,7 @@ CTypeID lj_ctype_intern(CTState *cts, CTInfo info, CTSize size)
{
uint32_t h = ct_hashtype(info, size);
CTypeID id = cts->hash[h];
- lua_assert(cts->L);
+ lj_assertCTS(cts->L, "uninitialized cts->L");
while (id) {
CType *ct = ctype_get(cts, id);
if (ct->info == info && ct->size == size)
@@ -294,9 +298,9 @@ CTSize lj_ctype_vlsize(CTState *cts, CType *ct, CTSize nelem)
}
ct = ctype_raw(cts, arrid);
}
- lua_assert(ctype_isvlarray(ct->info)); /* Must be a VLA. */
+ lj_assertCTS(ctype_isvlarray(ct->info), "VLA expected");
ct = ctype_rawchild(cts, ct); /* Get array element. */
- lua_assert(ctype_hassize(ct->info));
+ lj_assertCTS(ctype_hassize(ct->info), "bad VLA without size");
/* Calculate actual size of VLA and check for overflow. */
xsz += (uint64_t)ct->size * nelem;
return xsz < 0x80000000u ? (CTSize)xsz : CTSIZE_INVALID;
@@ -319,7 +323,8 @@ CTInfo lj_ctype_info(CTState *cts, CTypeID id, CTSize *szp)
} else {
if (!(qual & CTFP_ALIGNED)) qual |= (info & CTF_ALIGN);
qual |= (info & ~(CTF_ALIGN|CTMASK_CID));
- lua_assert(ctype_hassize(info) || ctype_isfunc(info));
+ lj_assertCTS(ctype_hassize(info) || ctype_isfunc(info),
+ "ctype without size");
*szp = ctype_isfunc(info) ? CTSIZE_INVALID : ct->size;
break;
}
@@ -532,7 +537,7 @@ static void ctype_repr(CTRepr *ctr, CTypeID id)
ctype_appc(ctr, ')');
break;
default:
- lua_assert(0);
+ lj_assertG_(ctr->cts->g, 0, "bad ctype %08x", info);
break;
}
ct = ctype_get(ctr->cts, ctype_cid(info));
@@ -576,19 +581,18 @@ GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned)
/* Convert complex to string with 'i' or 'I' suffix. */
GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size)
{
- char buf[2*LJ_STR_NUMBUF+2+1];
+ SBuf *sb = lj_buf_tmp_(L);
TValue re, im;
- size_t len;
if (size == 2*sizeof(double)) {
re.n = *(double *)sp; im.n = ((double *)sp)[1];
} else {
re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1];
}
- len = lj_str_bufnum(buf, &re);
- if (!(im.u32.hi & 0x80000000u) || im.n != im.n) buf[len++] = '+';
- len += lj_str_bufnum(buf+len, &im);
- buf[len] = buf[len-1] >= 'a' ? 'I' : 'i';
- return lj_str_new(L, buf, len+1);
+ lj_strfmt_putfnum(sb, STRFMT_G14, re.n);
+ if (!(im.u32.hi & 0x80000000u) || im.n != im.n) lj_buf_putchar(sb, '+');
+ lj_strfmt_putfnum(sb, STRFMT_G14, im.n);
+ lj_buf_putchar(sb, sb->w[-1] >= 'a' ? 'I' : 'i');
+ return lj_buf_str(L, sb);
}
/* -- C type state -------------------------------------------------------- */
diff --git a/src/lj_ctype.h b/src/lj_ctype.h
index 4979a7ac..3dbcdbfb 100644
--- a/src/lj_ctype.h
+++ b/src/lj_ctype.h
@@ -260,10 +260,16 @@ typedef struct CTState {
#define CT_MEMALIGN 3 /* Alignment guaranteed by memory allocator. */
+#ifdef LUA_USE_ASSERT
+#define lj_assertCTS(c, ...) (lj_assertG_(cts->g, (c), __VA_ARGS__))
+#else
+#define lj_assertCTS(c, ...) ((void)cts)
+#endif
+
/* -- Predefined types ---------------------------------------------------- */
/* Target-dependent types. */
-#if LJ_TARGET_PPC || LJ_TARGET_PPCSPE
+#if LJ_TARGET_PPC
#define CTTYDEFP(_) \
_(LINT32, 4, CT_NUM, CTF_LONG|CTALIGN(2))
#else
@@ -292,6 +298,7 @@ typedef struct CTState {
_(P_VOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_VOID) \
_(P_CVOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CVOID) \
_(P_CCHAR, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CCHAR) \
+ _(P_UINT8, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_UINT8) \
_(A_CCHAR, -1, CT_ARRAY, CTF_CONST|CTALIGN(0)|CTID_CCHAR) \
_(CTYPEID, 4, CT_ENUM, CTALIGN(2)|CTID_INT32) \
CTTYDEFP(_) \
@@ -383,6 +390,16 @@ static LJ_AINLINE CTState *ctype_cts(lua_State *L)
return cts;
}
+/* Load FFI library on-demand. */
+#define ctype_loadffi(L) \
+ do { \
+ if (!ctype_ctsG(G(L))) { \
+ ptrdiff_t oldtop = (char *)L->top - mref(L->stack, char); \
+ luaopen_ffi(L); \
+ L->top = (TValue *)(mref(L->stack, char) + oldtop); \
+ } \
+ } while (0)
+
/* Save and restore state of C type table. */
#define LJ_CTYPE_SAVE(cts) CTState savects_ = *(cts)
#define LJ_CTYPE_RESTORE(cts) \
@@ -392,7 +409,8 @@ static LJ_AINLINE CTState *ctype_cts(lua_State *L)
/* Check C type ID for validity when assertions are enabled. */
static LJ_AINLINE CTypeID ctype_check(CTState *cts, CTypeID id)
{
- lua_assert(id > 0 && id < cts->top); UNUSED(cts);
+ UNUSED(cts);
+ lj_assertCTS(id > 0 && id < cts->top, "bad CTID %d", id);
return id;
}
@@ -408,8 +426,9 @@ static LJ_AINLINE CType *ctype_get(CTState *cts, CTypeID id)
/* Get child C type. */
static LJ_AINLINE CType *ctype_child(CTState *cts, CType *ct)
{
- lua_assert(!(ctype_isvoid(ct->info) || ctype_isstruct(ct->info) ||
- ctype_isbitfield(ct->info))); /* These don't have children. */
+ lj_assertCTS(!(ctype_isvoid(ct->info) || ctype_isstruct(ct->info) ||
+ ctype_isbitfield(ct->info)),
+ "ctype %08x has no children", ct->info);
return ctype_get(cts, ctype_cid(ct->info));
}
diff --git a/src/lj_debug.c b/src/lj_debug.c
index 65ec26f0..112f5358 100644
--- a/src/lj_debug.c
+++ b/src/lj_debug.c
@@ -9,12 +9,12 @@
#include "lj_obj.h"
#include "lj_err.h"
#include "lj_debug.h"
-#include "lj_str.h"
+#include "lj_buf.h"
#include "lj_tab.h"
#include "lj_state.h"
#include "lj_frame.h"
#include "lj_bc.h"
-#include "lj_vm.h"
+#include "lj_strfmt.h"
#if LJ_HASJIT
#include "lj_jit.h"
#endif
@@ -24,11 +24,11 @@
/* Get frame corresponding to a level. */
cTValue *lj_debug_frame(lua_State *L, int level, int *size)
{
- cTValue *frame, *nextframe, *bot = tvref(L->stack);
+ cTValue *frame, *nextframe, *bot = tvref(L->stack)+LJ_FR2;
/* Traverse frames backwards. */
for (nextframe = frame = L->base-1; frame > bot; ) {
if (frame_gc(frame) == obj2gco(L))
- level++; /* Skip dummy frames. See lj_meta_call(). */
+ level++; /* Skip dummy frames. See lj_err_optype_call(). */
if (level-- == 0) {
*size = (int)(nextframe - frame);
return frame; /* Level found. */
@@ -55,7 +55,8 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
const BCIns *ins;
GCproto *pt;
BCPos pos;
- lua_assert(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD);
+ lj_assertL(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD,
+ "function or frame expected");
if (!isluafunc(fn)) { /* Cannot derive a PC for non-Lua functions. */
return NO_BCPOS;
} else if (nextframe == NULL) { /* Lua function on top. */
@@ -87,8 +88,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
if (frame_islua(f)) {
f = frame_prevl(f);
} else {
- if (frame_isc(f) || (LJ_HASFFI && frame_iscont(f) &&
- (f-1)->u32.lo == LJ_CONT_FFI_CALLBACK))
+ if (frame_isc(f) || (frame_iscont(f) && frame_iscont_fficb(f)))
cf = cframe_raw(cframe_prev(cf));
f = frame_prevd(f);
}
@@ -102,7 +102,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe)
#if LJ_HASJIT
if (pos > pt->sizebc) { /* Undo the effects of lj_trace_exit for JLOOP. */
GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins));
- lua_assert(bc_isret(bc_op(ins[-1])));
+ lj_assertL(bc_isret(bc_op(ins[-1])), "return bytecode expected");
pos = proto_bcpos(pt, mref(T->startpc, const BCIns));
}
#endif
@@ -135,7 +135,7 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe)
BCPos pc = debug_framepc(L, fn, nextframe);
if (pc != NO_BCPOS) {
GCproto *pt = funcproto(fn);
- lua_assert(pc <= pt->sizebc);
+ lj_assertL(pc <= pt->sizebc, "PC out of range");
return lj_debug_line(pt, pc);
}
return -1;
@@ -143,38 +143,25 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe)
/* -- Variable names ------------------------------------------------------ */
-/* Read ULEB128 value. */
-static uint32_t debug_read_uleb128(const uint8_t **pp)
-{
- const uint8_t *p = *pp;
- uint32_t v = *p++;
- if (LJ_UNLIKELY(v >= 0x80)) {
- int sh = 0;
- v &= 0x7f;
- do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80);
- }
- *pp = p;
- return v;
-}
-
/* Get name of a local variable from slot number and PC. */
static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot)
{
- const uint8_t *p = proto_varinfo(pt);
+ const char *p = (const char *)proto_varinfo(pt);
if (p) {
BCPos lastpc = 0;
for (;;) {
- const char *name = (const char *)p;
- uint32_t vn = *p++;
+ const char *name = p;
+ uint32_t vn = *(const uint8_t *)p;
BCPos startpc, endpc;
if (vn < VARNAME__MAX) {
if (vn == VARNAME_END) break; /* End of varinfo. */
} else {
- while (*p++) ; /* Skip over variable name string. */
+ do { p++; } while (*(const uint8_t *)p); /* Skip over variable name. */
}
- lastpc = startpc = lastpc + debug_read_uleb128(&p);
+ p++;
+ lastpc = startpc = lastpc + lj_buf_ruleb128(&p);
if (startpc > pc) break;
- endpc = startpc + debug_read_uleb128(&p);
+ endpc = startpc + lj_buf_ruleb128(&p);
if (pc < endpc && slot-- == 0) {
if (vn < VARNAME__MAX) {
#define VARNAMESTR(name, str) str "\0"
@@ -199,7 +186,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
TValue *nextframe = size ? frame + size : NULL;
GCfunc *fn = frame_func(frame);
BCPos pc = debug_framepc(L, fn, nextframe);
- if (!nextframe) nextframe = L->top;
+ if (!nextframe) nextframe = L->top+LJ_FR2;
if ((int)slot1 < 0) { /* Negative slot number is for varargs. */
if (pc != NO_BCPOS) {
GCproto *pt = funcproto(fn);
@@ -209,7 +196,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
nextframe = frame;
frame = frame_prevd(frame);
}
- if (frame + slot1 < nextframe) {
+ if (frame + slot1+LJ_FR2 < nextframe) {
*name = "(*vararg)";
return frame+slot1;
}
@@ -220,7 +207,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
if (pc != NO_BCPOS &&
(*name = debug_varname(funcproto(fn), pc, slot1-1)) != NULL)
;
- else if (slot1 > 0 && frame + slot1 < nextframe)
+ else if (slot1 > 0 && frame + slot1+LJ_FR2 < nextframe)
*name = "(*temporary)";
return frame+slot1;
}
@@ -229,7 +216,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar,
const char *lj_debug_uvname(GCproto *pt, uint32_t idx)
{
const uint8_t *p = proto_uvinfo(pt);
- lua_assert(idx < pt->sizeuv);
+ lj_assertX(idx < pt->sizeuv, "bad upvalue index");
if (!p) return "";
if (idx) while (*p++ || --idx) ;
return (const char *)p;
@@ -286,7 +273,7 @@ restart:
*name = strdata(gco2str(proto_kgc(pt, ~(ptrdiff_t)bc_c(ins))));
if (ip > proto_bc(pt)) {
BCIns insp = ip[-1];
- if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1 &&
+ if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1+LJ_FR2 &&
bc_d(insp) == bc_b(ins))
return "method";
}
@@ -303,12 +290,12 @@ restart:
}
/* Deduce function name from caller of a frame. */
-const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name)
+const char *lj_debug_funcname(lua_State *L, cTValue *frame, const char **name)
{
- TValue *pframe;
+ cTValue *pframe;
GCfunc *fn;
BCPos pc;
- if (frame <= tvref(L->stack))
+ if (frame <= tvref(L->stack)+LJ_FR2)
return NULL;
if (frame_isvarg(frame))
frame = frame_prevd(frame);
@@ -334,7 +321,7 @@ const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name)
/* -- Source code locations ----------------------------------------------- */
/* Generate shortened source name. */
-void lj_debug_shortname(char *out, GCstr *str)
+void lj_debug_shortname(char *out, GCstr *str, BCLine line)
{
const char *src = strdata(str);
if (*src == '=') {
@@ -348,11 +335,11 @@ void lj_debug_shortname(char *out, GCstr *str)
*out++ = '.'; *out++ = '.'; *out++ = '.';
}
strcpy(out, src);
- } else { /* Output [string "string"]. */
+ } else { /* Output [string "string"] or [builtin:name]. */
size_t len; /* Length, up to first control char. */
for (len = 0; len < LUA_IDSIZE-12; len++)
if (((const unsigned char *)src)[len] < ' ') break;
- strcpy(out, "[string \""); out += 9;
+ strcpy(out, line == ~(BCLine)0 ? "[builtin:" : "[string \""); out += 9;
if (src[len] != '\0') { /* Must truncate? */
if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15;
strncpy(out, src, len); out += len;
@@ -360,7 +347,7 @@ void lj_debug_shortname(char *out, GCstr *str)
} else {
strcpy(out, src); out += len;
}
- strcpy(out, "\"]");
+ strcpy(out, line == ~(BCLine)0 ? "]" : "\"]");
}
}
@@ -373,14 +360,15 @@ void lj_debug_addloc(lua_State *L, const char *msg,
if (isluafunc(fn)) {
BCLine line = debug_frameline(L, fn, nextframe);
if (line >= 0) {
+ GCproto *pt = funcproto(fn);
char buf[LUA_IDSIZE];
- lj_debug_shortname(buf, proto_chunkname(funcproto(fn)));
- lj_str_pushf(L, "%s:%d: %s", buf, line, msg);
+ lj_debug_shortname(buf, proto_chunkname(pt), pt->firstline);
+ lj_strfmt_pushf(L, "%s:%d: %s", buf, line, msg);
return;
}
}
}
- lj_str_pushf(L, "%s", msg);
+ lj_strfmt_pushf(L, "%s", msg);
}
/* Push location string for a bytecode position to Lua stack. */
@@ -390,20 +378,22 @@ void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc)
const char *s = strdata(name);
MSize i, len = name->len;
BCLine line = lj_debug_line(pt, pc);
- if (*s == '@') {
+ if (pt->firstline == ~(BCLine)0) {
+ lj_strfmt_pushf(L, "builtin:%s", s);
+ } else if (*s == '@') {
s++; len--;
for (i = len; i > 0; i--)
if (s[i] == '/' || s[i] == '\\') {
s += i+1;
break;
}
- lj_str_pushf(L, "%s:%d", s, line);
+ lj_strfmt_pushf(L, "%s:%d", s, line);
} else if (len > 40) {
- lj_str_pushf(L, "%p:%d", pt, line);
+ lj_strfmt_pushf(L, "%p:%d", pt, line);
} else if (*s == '=') {
- lj_str_pushf(L, "%s:%d", s+1, line);
+ lj_strfmt_pushf(L, "%s:%d", s+1, line);
} else {
- lj_str_pushf(L, "\"%s\":%d", s, line);
+ lj_strfmt_pushf(L, "\"%s\":%d", s, line);
}
}
@@ -451,13 +441,14 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext)
} else {
uint32_t offset = (uint32_t)ar->i_ci & 0xffff;
uint32_t size = (uint32_t)ar->i_ci >> 16;
- lua_assert(offset != 0);
+ lj_assertL(offset != 0, "bad frame offset");
frame = tvref(L->stack) + offset;
if (size) nextframe = frame + size;
- lua_assert(frame <= tvref(L->maxstack) &&
- (!nextframe || nextframe <= tvref(L->maxstack)));
+ lj_assertL(frame <= tvref(L->maxstack) &&
+ (!nextframe || nextframe <= tvref(L->maxstack)),
+ "broken frame chain");
fn = frame_func(frame);
- lua_assert(fn->c.gct == ~LJ_TFUNC);
+ lj_assertL(fn->c.gct == ~LJ_TFUNC, "bad frame function");
}
for (; *what; what++) {
if (*what == 'S') {
@@ -466,7 +457,7 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext)
BCLine firstline = pt->firstline;
GCstr *name = proto_chunkname(pt);
ar->source = strdata(name);
- lj_debug_shortname(ar->short_src, name);
+ lj_debug_shortname(ar->short_src, name, pt->firstline);
ar->linedefined = (int)firstline;
ar->lastlinedefined = (int)(firstline + pt->numline);
ar->what = (firstline || !pt->numline) ? "Lua" : "main";
@@ -556,6 +547,111 @@ LUA_API int lua_getstack(lua_State *L, int level, lua_Debug *ar)
}
}
+#if LJ_HASPROFILE
+/* Put the chunkname into a buffer. */
+static int debug_putchunkname(SBuf *sb, GCproto *pt, int pathstrip)
+{
+ GCstr *name = proto_chunkname(pt);
+ const char *p = strdata(name);
+ if (pt->firstline == ~(BCLine)0) {
+ lj_buf_putmem(sb, "[builtin:", 9);
+ lj_buf_putstr(sb, name);
+ lj_buf_putb(sb, ']');
+ return 0;
+ }
+ if (*p == '=' || *p == '@') {
+ MSize len = name->len-1;
+ p++;
+ if (pathstrip) {
+ int i;
+ for (i = len-1; i >= 0; i--)
+ if (p[i] == '/' || p[i] == '\\') {
+ len -= i+1;
+ p = p+i+1;
+ break;
+ }
+ }
+ lj_buf_putmem(sb, p, len);
+ } else {
+ lj_buf_putmem(sb, "[string]", 8);
+ }
+ return 1;
+}
+
+/* Put a compact stack dump into a buffer. */
+void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth)
+{
+ int level = 0, dir = 1, pathstrip = 1;
+ MSize lastlen = 0;
+ if (depth < 0) { level = ~depth; depth = dir = -1; } /* Reverse frames. */
+ while (level != depth) { /* Loop through all frame. */
+ int size;
+ cTValue *frame = lj_debug_frame(L, level, &size);
+ if (frame) {
+ cTValue *nextframe = size ? frame+size : NULL;
+ GCfunc *fn = frame_func(frame);
+ const uint8_t *p = (const uint8_t *)fmt;
+ int c;
+ while ((c = *p++)) {
+ switch (c) {
+ case 'p': /* Preserve full path. */
+ pathstrip = 0;
+ break;
+ case 'F': case 'f': { /* Dump function name. */
+ const char *name;
+ const char *what = lj_debug_funcname(L, frame, &name);
+ if (what) {
+ if (c == 'F' && isluafunc(fn)) { /* Dump module:name for 'F'. */
+ GCproto *pt = funcproto(fn);
+ if (pt->firstline != ~(BCLine)0) { /* Not a bytecode builtin. */
+ debug_putchunkname(sb, pt, pathstrip);
+ lj_buf_putb(sb, ':');
+ }
+ }
+ lj_buf_putmem(sb, name, (MSize)strlen(name));
+ break;
+ } /* else: can't derive a name, dump module:line. */
+ }
+ /* fallthrough */
+ case 'l': /* Dump module:line. */
+ if (isluafunc(fn)) {
+ GCproto *pt = funcproto(fn);
+ if (debug_putchunkname(sb, pt, pathstrip)) {
+ /* Regular Lua function. */
+ BCLine line = c == 'l' ? debug_frameline(L, fn, nextframe) :
+ pt->firstline;
+ lj_buf_putb(sb, ':');
+ lj_strfmt_putint(sb, line >= 0 ? line : pt->firstline);
+ }
+ } else if (isffunc(fn)) { /* Dump numbered builtins. */
+ lj_buf_putmem(sb, "[builtin#", 9);
+ lj_strfmt_putint(sb, fn->c.ffid);
+ lj_buf_putb(sb, ']');
+ } else { /* Dump C function address. */
+ lj_buf_putb(sb, '@');
+ lj_strfmt_putptr(sb, fn->c.f);
+ }
+ break;
+ case 'Z': /* Zap trailing separator. */
+ lastlen = sbuflen(sb);
+ break;
+ default:
+ lj_buf_putb(sb, c);
+ break;
+ }
+ }
+ } else if (dir == 1) {
+ break;
+ } else {
+ level -= size; /* Reverse frame order: quickly skip missing level. */
+ }
+ level += dir;
+ }
+ if (lastlen)
+ sb->w = sb->b + lastlen; /* Zap trailing separator. */
+}
+#endif
+
/* Number of frames for the leading and trailing part of a traceback. */
#define TRACEBACK_LEVELS1 12
#define TRACEBACK_LEVELS2 10
diff --git a/src/lj_debug.h b/src/lj_debug.h
index 15cdee3c..28127ae9 100644
--- a/src/lj_debug.h
+++ b/src/lj_debug.h
@@ -33,14 +33,18 @@ LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp,
GCobj **op);
LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc,
BCReg slot, const char **name);
-LJ_FUNC const char *lj_debug_funcname(lua_State *L, TValue *frame,
+LJ_FUNC const char *lj_debug_funcname(lua_State *L, cTValue *frame,
const char **name);
-LJ_FUNC void lj_debug_shortname(char *out, GCstr *str);
+LJ_FUNC void lj_debug_shortname(char *out, GCstr *str, BCLine line);
LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg,
cTValue *frame, cTValue *nextframe);
LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc);
LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar,
int ext);
+#if LJ_HASPROFILE
+LJ_FUNC void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt,
+ int depth);
+#endif
/* Fixed internal variable names. */
#define VARNAMEDEF(_) \
diff --git a/src/lj_def.h b/src/lj_def.h
index d09ebb10..b61297aa 100644
--- a/src/lj_def.h
+++ b/src/lj_def.h
@@ -46,10 +46,14 @@ typedef unsigned int uintptr_t;
#include <stdlib.h>
/* Various VM limits. */
-#define LJ_MAX_MEM 0x7fffff00 /* Max. total memory allocation. */
+#define LJ_MAX_MEM32 0x7fffff00 /* Max. 32 bit memory allocation. */
+#define LJ_MAX_MEM64 ((uint64_t)1<<47) /* Max. 64 bit memory allocation. */
+/* Max. total memory allocation. */
+#define LJ_MAX_MEM (LJ_GC64 ? LJ_MAX_MEM64 : LJ_MAX_MEM32)
#define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */
-#define LJ_MAX_STR LJ_MAX_MEM /* Max. string length. */
-#define LJ_MAX_UDATA LJ_MAX_MEM /* Max. userdata length. */
+#define LJ_MAX_STR LJ_MAX_MEM32 /* Max. string length. */
+#define LJ_MAX_BUF LJ_MAX_MEM32 /* Max. buffer length. */
+#define LJ_MAX_UDATA LJ_MAX_MEM32 /* Max. userdata length. */
#define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */
#define LJ_MAX_HBITS 26 /* Max. hash bits. */
@@ -57,7 +61,7 @@ typedef unsigned int uintptr_t;
#define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */
#define LJ_MAX_COLOSIZE 16 /* Max. elems for colocated array. */
-#define LJ_MAX_LINE LJ_MAX_MEM /* Max. source code line number. */
+#define LJ_MAX_LINE LJ_MAX_MEM32 /* Max. source code line number. */
#define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */
#define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */
#define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */
@@ -65,7 +69,7 @@ typedef unsigned int uintptr_t;
#define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */
#define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */
-#define LJ_STACK_EXTRA 5 /* Extra stack space (metamethods). */
+#define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */
#define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */
@@ -76,7 +80,6 @@ typedef unsigned int uintptr_t;
#define LJ_MIN_SBUF 32 /* Min. string buffer length. */
#define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */
#define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */
-#define LJ_MIN_K64SZ 16 /* Min. size for chained K64Array. */
/* JIT compiler limits. */
#define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */
@@ -91,6 +94,9 @@ typedef unsigned int uintptr_t;
#define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo)
#define i32ptr(p) ((int32_t)(intptr_t)(void *)(p))
#define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p))
+#define i64ptr(p) ((int64_t)(intptr_t)(void *)(p))
+#define u64ptr(p) ((uint64_t)(intptr_t)(void *)(p))
+#define igcptr(p) (LJ_GC64 ? i64ptr(p) : i32ptr(p))
#define checki8(x) ((x) == (int32_t)(int8_t)(x))
#define checku8(x) ((x) == (int32_t)(uint8_t)(x))
@@ -98,7 +104,10 @@ typedef unsigned int uintptr_t;
#define checku16(x) ((x) == (int32_t)(uint16_t)(x))
#define checki32(x) ((x) == (int32_t)(x))
#define checku32(x) ((x) == (uint32_t)(x))
+#define checkptr31(x) (((uint64_t)(uintptr_t)(x) >> 31) == 0)
#define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x))
+#define checkptr47(x) (((uint64_t)(uintptr_t)(x) >> 47) == 0)
+#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr31((x)) :1)
/* Every half-decent C compiler transforms this into a rotate instruction. */
#define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
@@ -111,7 +120,7 @@ typedef uintptr_t BloomFilter;
#define bloomset(b, x) ((b) |= bloombit((x)))
#define bloomtest(b, x) ((b) & bloombit((x)))
-#if defined(__GNUC__) || defined(__psp2__)
+#if defined(__GNUC__) || defined(__clang__) || defined(__psp2__)
#define LJ_NORET __attribute__((noreturn))
#define LJ_ALIGN(n) __attribute__((aligned(n)))
@@ -173,7 +182,7 @@ static LJ_AINLINE uint64_t lj_bswap64(uint64_t x)
{
return ((uint64_t)lj_bswap((uint32_t)x)<<32) | lj_bswap((uint32_t)(x>>32));
}
-#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
+#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __clang__
static LJ_AINLINE uint32_t lj_bswap(uint32_t x)
{
return (uint32_t)__builtin_bswap32((int32_t)x);
@@ -329,14 +338,28 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v)
#define LJ_FUNCA_NORET LJ_FUNCA LJ_NORET
#define LJ_ASMF_NORET LJ_ASMF LJ_NORET
-/* Runtime assertions. */
-#ifdef lua_assert
-#define check_exp(c, e) (lua_assert(c), (e))
-#define api_check(l, e) lua_assert(e)
+/* Internal assertions. */
+#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK)
+#define lj_assert_check(g, c, ...) \
+ ((c) ? (void)0 : \
+ (lj_assert_fail((g), __FILE__, __LINE__, __func__, __VA_ARGS__), 0))
+#define lj_checkapi(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__)
#else
-#define lua_assert(c) ((void)0)
+#define lj_checkapi(c, ...) ((void)L)
+#endif
+
+#ifdef LUA_USE_ASSERT
+#define lj_assertG_(g, c, ...) lj_assert_check((g), (c), __VA_ARGS__)
+#define lj_assertG(c, ...) lj_assert_check(g, (c), __VA_ARGS__)
+#define lj_assertL(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__)
+#define lj_assertX(c, ...) lj_assert_check(NULL, (c), __VA_ARGS__)
+#define check_exp(c, e) (lj_assertX((c), #c), (e))
+#else
+#define lj_assertG_(g, c, ...) ((void)0)
+#define lj_assertG(c, ...) ((void)g)
+#define lj_assertL(c, ...) ((void)L)
+#define lj_assertX(c, ...) ((void)0)
#define check_exp(c, e) (e)
-#define api_check luai_apicheck
#endif
/* Static assertions. */
@@ -350,4 +373,9 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v)
extern void LJ_ASSERT_NAME(__LINE__)(int STATIC_ASSERTION_FAILED[(cond)?1:-1])
#endif
+/* PRNG state. Need this here, details in lj_prng.h. */
+typedef struct PRNGState {
+ uint64_t u[4];
+} PRNGState;
+
#endif
diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c
index 54c86038..ded382aa 100644
--- a/src/lj_dispatch.c
+++ b/src/lj_dispatch.c
@@ -8,6 +8,7 @@
#include "lj_obj.h"
#include "lj_err.h"
+#include "lj_buf.h"
#include "lj_func.h"
#include "lj_str.h"
#include "lj_tab.h"
@@ -17,6 +18,7 @@
#include "lj_frame.h"
#include "lj_bc.h"
#include "lj_ff.h"
+#include "lj_strfmt.h"
#if LJ_HASJIT
#include "lj_jit.h"
#endif
@@ -25,6 +27,9 @@
#endif
#include "lj_trace.h"
#include "lj_dispatch.h"
+#if LJ_HASPROFILE
+#include "lj_profile.h"
+#endif
#include "lj_vm.h"
#include "luajit.h"
@@ -37,6 +42,12 @@ LJ_STATIC_ASSERT(GG_NUM_ASMFF == FF_NUM_ASMFUNC);
#include <math.h>
LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L,
lua_State *co);
+#if !LJ_HASJIT
+#define lj_dispatch_stitch lj_dispatch_ins
+#endif
+#if !LJ_HASPROFILE
+#define lj_dispatch_profile lj_dispatch_ins
+#endif
#define GOTFUNC(name) (ASMFunction)name,
static const ASMFunction dispatch_got[] = {
@@ -57,6 +68,8 @@ void lj_dispatch_init(GG_State *GG)
/* The JIT engine is off by default. luaopen_jit() turns it on. */
disp[BC_FORL] = disp[BC_IFORL];
disp[BC_ITERL] = disp[BC_IITERL];
+ /* Workaround for stable v2.1 bytecode. TODO: Replace with BC_IITERN. */
+ disp[BC_ITERN] = &lj_vm_IITERN;
disp[BC_LOOP] = disp[BC_ILOOP];
disp[BC_FUNCF] = disp[BC_IFUNCF];
disp[BC_FUNCV] = disp[BC_IFUNCV];
@@ -64,7 +77,7 @@ void lj_dispatch_init(GG_State *GG)
for (i = 0; i < GG_NUM_ASMFF; i++)
GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0);
#if LJ_TARGET_MIPS
- memcpy(GG->got, dispatch_got, LJ_GOT__MAX*4);
+ memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *));
#endif
}
@@ -82,11 +95,12 @@ void lj_dispatch_init_hotcount(global_State *g)
#endif
/* Internal dispatch mode bits. */
-#define DISPMODE_JIT 0x01 /* JIT compiler on. */
-#define DISPMODE_REC 0x02 /* Recording active. */
+#define DISPMODE_CALL 0x01 /* Override call dispatch. */
+#define DISPMODE_RET 0x02 /* Override return dispatch. */
#define DISPMODE_INS 0x04 /* Override instruction dispatch. */
-#define DISPMODE_CALL 0x08 /* Override call dispatch. */
-#define DISPMODE_RET 0x10 /* Override return dispatch. */
+#define DISPMODE_JIT 0x10 /* JIT compiler on. */
+#define DISPMODE_REC 0x20 /* Recording active. */
+#define DISPMODE_PROF 0x40 /* Profiling active. */
/* Update dispatch table depending on various flags. */
void lj_dispatch_update(global_State *g)
@@ -98,24 +112,29 @@ void lj_dispatch_update(global_State *g)
mode |= G2J(g)->state != LJ_TRACE_IDLE ?
(DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0;
#endif
+#if LJ_HASPROFILE
+ mode |= (g->hookmask & HOOK_PROFILE) ? (DISPMODE_PROF|DISPMODE_INS) : 0;
+#endif
mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0;
mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0;
mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0;
if (oldmode != mode) { /* Mode changed? */
ASMFunction *disp = G2GG(g)->dispatch;
- ASMFunction f_forl, f_iterl, f_loop, f_funcf, f_funcv;
+ ASMFunction f_forl, f_iterl, f_itern, f_loop, f_funcf, f_funcv;
g->dispatchmode = mode;
/* Hotcount if JIT is on, but not while recording. */
if ((mode & (DISPMODE_JIT|DISPMODE_REC)) == DISPMODE_JIT) {
f_forl = makeasmfunc(lj_bc_ofs[BC_FORL]);
f_iterl = makeasmfunc(lj_bc_ofs[BC_ITERL]);
+ f_itern = makeasmfunc(lj_bc_ofs[BC_ITERN]);
f_loop = makeasmfunc(lj_bc_ofs[BC_LOOP]);
f_funcf = makeasmfunc(lj_bc_ofs[BC_FUNCF]);
f_funcv = makeasmfunc(lj_bc_ofs[BC_FUNCV]);
} else { /* Otherwise use the non-hotcounting instructions. */
f_forl = disp[GG_LEN_DDISP+BC_IFORL];
f_iterl = disp[GG_LEN_DDISP+BC_IITERL];
+ f_itern = &lj_vm_IITERN;
f_loop = disp[GG_LEN_DDISP+BC_ILOOP];
f_funcf = makeasmfunc(lj_bc_ofs[BC_IFUNCF]);
f_funcv = makeasmfunc(lj_bc_ofs[BC_IFUNCV]);
@@ -123,12 +142,13 @@ void lj_dispatch_update(global_State *g)
/* Init static counting instruction dispatch first (may be copied below). */
disp[GG_LEN_DDISP+BC_FORL] = f_forl;
disp[GG_LEN_DDISP+BC_ITERL] = f_iterl;
+ disp[GG_LEN_DDISP+BC_ITERN] = f_itern;
disp[GG_LEN_DDISP+BC_LOOP] = f_loop;
/* Set dynamic instruction dispatch. */
- if ((oldmode ^ mode) & (DISPMODE_REC|DISPMODE_INS)) {
+ if ((oldmode ^ mode) & (DISPMODE_PROF|DISPMODE_REC|DISPMODE_INS)) {
/* Need to update the whole table. */
- if (!(mode & (DISPMODE_REC|DISPMODE_INS))) { /* No ins dispatch? */
+ if (!(mode & DISPMODE_INS)) { /* No ins dispatch? */
/* Copy static dispatch table to dynamic dispatch table. */
memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction));
/* Overwrite with dynamic return dispatch. */
@@ -140,15 +160,17 @@ void lj_dispatch_update(global_State *g)
}
} else {
/* The recording dispatch also checks for hooks. */
- ASMFunction f = (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook;
+ ASMFunction f = (mode & DISPMODE_PROF) ? lj_vm_profhook :
+ (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook;
uint32_t i;
for (i = 0; i < GG_LEN_SDISP; i++)
disp[i] = f;
}
- } else if (!(mode & (DISPMODE_REC|DISPMODE_INS))) {
+ } else if (!(mode & DISPMODE_INS)) {
/* Otherwise set dynamic counting ins. */
disp[BC_FORL] = f_forl;
disp[BC_ITERL] = f_iterl;
+ disp[BC_ITERN] = f_itern;
disp[BC_LOOP] = f_loop;
/* Set dynamic return dispatch. */
if ((mode & DISPMODE_RET)) {
@@ -236,22 +258,15 @@ int luaJIT_setmode(lua_State *L, int idx, int mode)
} else {
if (!(mode & LUAJIT_MODE_ON))
G2J(g)->flags &= ~(uint32_t)JIT_F_ON;
-#if LJ_TARGET_X86ORX64
- else if ((G2J(g)->flags & JIT_F_SSE2))
- G2J(g)->flags |= (uint32_t)JIT_F_ON;
- else
- return 0; /* Don't turn on JIT compiler without SSE2 support. */
-#else
else
G2J(g)->flags |= (uint32_t)JIT_F_ON;
-#endif
lj_dispatch_update(g);
}
break;
case LUAJIT_MODE_FUNC:
case LUAJIT_MODE_ALLFUNC:
case LUAJIT_MODE_ALLSUBFUNC: {
- cTValue *tv = idx == 0 ? frame_prev(L->base-1) :
+ cTValue *tv = idx == 0 ? frame_prev(L->base-1)-LJ_FR2 :
idx > 0 ? L->base + (idx-1) : L->top + idx;
GCproto *pt;
if ((idx == 0 || tvisfunc(tv)) && isluafunc(&gcval(tv)->fn))
@@ -286,7 +301,7 @@ int luaJIT_setmode(lua_State *L, int idx, int mode)
if (idx != 0) {
cTValue *tv = idx > 0 ? L->base + (idx-1) : L->top + idx;
if (tvislightud(tv))
- g->wrapf = (lua_CFunction)lightudV(tv);
+ g->wrapf = (lua_CFunction)lightudV(g, tv);
else
return 0; /* Failed. */
} else {
@@ -352,10 +367,19 @@ static void callhook(lua_State *L, int event, BCLine line)
/* Top frame, nextframe = NULL. */
ar.i_ci = (int)((L->base-1) - tvref(L->stack));
lj_state_checkstack(L, 1+LUA_MINSTACK);
+#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF
+ lj_profile_hook_enter(g);
+#else
hook_enter(g);
+#endif
hookf(L, &ar);
- lua_assert(hook_active(g));
+ lj_assertG(hook_active(g), "active hook flag removed");
+ setgcref(g->cur_L, obj2gco(L));
+#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF
+ lj_profile_hook_leave(g);
+#else
hook_leave(g);
+#endif
}
}
@@ -368,7 +392,7 @@ static BCReg cur_topslot(GCproto *pt, const BCIns *pc, uint32_t nres)
if (bc_op(ins) == BC_UCLO)
ins = pc[bc_j(ins)];
switch (bc_op(ins)) {
- case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1;
+ case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1+LJ_FR2;
case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1;
case BC_TSETM: return bc_a(ins) + nres-1;
default: return pt->framesize;
@@ -397,7 +421,8 @@ void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc)
#endif
J->L = L;
lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */
- lua_assert(L->top - L->base == delta);
+ lj_assertG(L->top - L->base == delta,
+ "unbalanced stack after tracing of instruction");
}
}
#endif
@@ -457,7 +482,8 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc)
#endif
pc = (const BCIns *)((uintptr_t)pc & ~(uintptr_t)1);
lj_trace_hot(J, pc);
- lua_assert(L->top - L->base == delta);
+ lj_assertG(L->top - L->base == delta,
+ "unbalanced stack after hot call");
goto out;
} else if (J->state != LJ_TRACE_IDLE &&
!(g->hookmask & (HOOK_GC|HOOK_VMEVENT))) {
@@ -466,7 +492,8 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc)
#endif
/* Record the FUNC* bytecodes, too. */
lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */
- lua_assert(L->top - L->base == delta);
+ lj_assertG(L->top - L->base == delta,
+ "unbalanced stack after hot instruction");
}
#endif
if ((g->hookmask & LUA_MASKCALL)) {
@@ -492,3 +519,41 @@ out:
return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */
}
+#if LJ_HASJIT
+/* Stitch a new trace. */
+void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc)
+{
+ ERRNO_SAVE
+ lua_State *L = J->L;
+ void *cf = cframe_raw(L->cframe);
+ const BCIns *oldpc = cframe_pc(cf);
+ setcframe_pc(cf, pc);
+ /* Before dispatch, have to bias PC by 1. */
+ L->top = L->base + cur_topslot(curr_proto(L), pc+1, cframe_multres_n(cf));
+ lj_trace_stitch(J, pc-1); /* Point to the CALL instruction. */
+ setcframe_pc(cf, oldpc);
+ ERRNO_RESTORE
+}
+#endif
+
+#if LJ_HASPROFILE
+/* Profile dispatch. */
+void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc)
+{
+ ERRNO_SAVE
+ GCfunc *fn = curr_func(L);
+ GCproto *pt = funcproto(fn);
+ void *cf = cframe_raw(L->cframe);
+ const BCIns *oldpc = cframe_pc(cf);
+ global_State *g;
+ setcframe_pc(cf, pc);
+ L->top = L->base + cur_topslot(pt, pc, cframe_multres_n(cf));
+ lj_profile_interpreter(L);
+ setcframe_pc(cf, oldpc);
+ g = G(L);
+ setgcref(g->cur_L, obj2gco(L));
+ setvmstate(g, INTERP);
+ ERRNO_RESTORE
+}
+#endif
+
diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h
index cb4cbf8e..52762eea 100644
--- a/src/lj_dispatch.h
+++ b/src/lj_dispatch.h
@@ -14,8 +14,24 @@
#if LJ_TARGET_MIPS
/* Need our own global offset table for the dreaded MIPS calling conventions. */
+
+#ifndef _LJ_VM_H
+LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b);
+#endif
+
+#if LJ_SOFTFP
+#ifndef _LJ_IRCALL_H
+extern double __adddf3(double a, double b);
+extern double __subdf3(double a, double b);
+extern double __muldf3(double a, double b);
+extern double __divdf3(double a, double b);
+#endif
+#define SFGOTDEF(_) _(sqrt) _(__adddf3) _(__subdf3) _(__muldf3) _(__divdf3)
+#else
+#define SFGOTDEF(_)
+#endif
#if LJ_HASJIT
-#define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot)
+#define JITGOTDEF(_) _(lj_err_trace) _(lj_trace_exit) _(lj_trace_hot)
#else
#define JITGOTDEF(_)
#endif
@@ -28,16 +44,19 @@
#define GOTDEF(_) \
_(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \
_(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \
- _(pow) _(fmod) _(ldexp) \
- _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_err_throw) _(lj_err_run) \
+ _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \
+ _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \
+ _(lj_dispatch_profile) _(lj_err_throw) \
_(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \
_(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \
_(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \
- _(lj_meta_for) _(lj_meta_len) _(lj_meta_tget) _(lj_meta_tset) \
- _(lj_state_growstack) _(lj_str_fromnum) _(lj_str_fromnumber) _(lj_str_new) \
- _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) _(lj_tab_new) \
- _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \
- JITGOTDEF(_) FFIGOTDEF(_)
+ _(lj_meta_for) _(lj_meta_istype) _(lj_meta_len) _(lj_meta_tget) \
+ _(lj_meta_tset) _(lj_state_growstack) _(lj_strfmt_number) \
+ _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \
+ _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \
+ _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \
+ _(lj_buf_putstr_upper) _(lj_buf_tostr) \
+ JITGOTDEF(_) FFIGOTDEF(_) SFGOTDEF(_)
enum {
#define GOTENUM(name) LJ_GOT_##name,
@@ -60,7 +79,7 @@ typedef uint16_t HotCount;
#define HOTCOUNT_CALL 1
/* This solves a circular dependency problem -- bump as needed. Sigh. */
-#define GG_NUM_ASMFF 62
+#define GG_NUM_ASMFF 57
#define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF)
#define GG_LEN_SDISP BC_FUNCF
@@ -70,7 +89,7 @@ typedef uint16_t HotCount;
typedef struct GG_State {
lua_State L; /* Main thread. */
global_State g; /* Global state. */
-#if LJ_TARGET_ARM
+#if LJ_TARGET_ARM && !LJ_TARGET_NX
/* Make g reachable via K12 encoded DISPATCH-relative addressing. */
uint8_t align1[(16-sizeof(global_State))&15];
#endif
@@ -80,7 +99,7 @@ typedef struct GG_State {
#if LJ_HASJIT
jit_State J; /* JIT state. */
HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */
-#if LJ_TARGET_ARM
+#if LJ_TARGET_ARM && !LJ_TARGET_NX
/* Ditto for J. */
uint8_t align2[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15];
#endif
@@ -96,6 +115,7 @@ typedef struct GG_State {
#define J2G(J) (&J2GG(J)->g)
#define G2J(gl) (&G2GG(gl)->J)
#define L2J(L) (&L2GG(L)->J)
+#define GG_G2J (GG_OFS(J) - GG_OFS(g))
#define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g))
#define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch))
#define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch))
@@ -117,7 +137,12 @@ LJ_FUNC void lj_dispatch_update(global_State *g);
/* Instruction dispatch callback for hooks or when recording. */
LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc);
LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc);
-LJ_FUNCA void LJ_FASTCALL lj_dispatch_return(lua_State *L, const BCIns *pc);
+#if LJ_HASJIT
+LJ_FUNCA void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc);
+#endif
+#if LJ_HASPROFILE
+LJ_FUNCA void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc);
+#endif
#if LJ_HASFFI && !defined(_BUILDVM_H)
/* Save/restore errno and GetLastError() around hooks, exits and recording. */
diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h
index 2db07ef6..cfb174f4 100644
--- a/src/lj_emit_arm.h
+++ b/src/lj_emit_arm.h
@@ -81,7 +81,8 @@ static void emit_m(ASMState *as, ARMIns ai, Reg rm)
static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
{
- lua_assert(ofs >= -255 && ofs <= 255);
+ lj_assertA(ofs >= -255 && ofs <= 255,
+ "load/store offset %d out of range", ofs);
if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U;
*--as->mcp = ai | ARMI_LS_P | ARMI_LSX_I | ARMF_D(rd) | ARMF_N(rn) |
((ofs & 0xf0) << 4) | (ofs & 0x0f);
@@ -89,7 +90,8 @@ static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
{
- lua_assert(ofs >= -4095 && ofs <= 4095);
+ lj_assertA(ofs >= -4095 && ofs <= 4095,
+ "load/store offset %d out of range", ofs);
/* Combine LDR/STR pairs to LDRD/STRD. */
if (*as->mcp == (ai|ARMI_LS_P|ARMI_LS_U|ARMF_D(rd^1)|ARMF_N(rn)|(ofs^4)) &&
(ai & ~(ARMI_LDR^ARMI_STR)) == ARMI_STR && rd != rn &&
@@ -106,7 +108,8 @@ static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
#if !LJ_SOFTFP
static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs)
{
- lua_assert(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0);
+ lj_assertA(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0,
+ "load/store offset %d out of range", ofs);
if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U;
*--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2);
}
@@ -124,7 +127,7 @@ static int emit_kdelta1(ASMState *as, Reg d, int32_t i)
while (work) {
Reg r = rset_picktop(work);
IRRef ref = regcost_ref(as->cost[r]);
- lua_assert(r != d);
+ lj_assertA(r != d, "dest reg not free");
if (emit_canremat(ref)) {
int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i);
uint32_t k = emit_isk12(ARMI_ADD, delta);
@@ -142,13 +145,13 @@ static int emit_kdelta1(ASMState *as, Reg d, int32_t i)
}
/* Try to find a two step delta relative to another constant. */
-static int emit_kdelta2(ASMState *as, Reg d, int32_t i)
+static int emit_kdelta2(ASMState *as, Reg rd, int32_t i)
{
RegSet work = ~as->freeset & RSET_GPR;
while (work) {
Reg r = rset_picktop(work);
IRRef ref = regcost_ref(as->cost[r]);
- lua_assert(r != d);
+ lj_assertA(r != rd, "dest reg %d not free", rd);
if (emit_canremat(ref)) {
int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i;
if (other) {
@@ -159,8 +162,8 @@ static int emit_kdelta2(ASMState *as, Reg d, int32_t i)
k2 = emit_isk12(0, delta & (255 << sh));
k = emit_isk12(0, delta & ~(255 << sh));
if (k) {
- emit_dn(as, ARMI_ADD^k2^inv, d, d);
- emit_dn(as, ARMI_ADD^k^inv, d, r);
+ emit_dn(as, ARMI_ADD^k2^inv, rd, rd);
+ emit_dn(as, ARMI_ADD^k^inv, rd, r);
return 1;
}
}
@@ -171,23 +174,24 @@ static int emit_kdelta2(ASMState *as, Reg d, int32_t i)
}
/* Load a 32 bit constant into a GPR. */
-static void emit_loadi(ASMState *as, Reg r, int32_t i)
+static void emit_loadi(ASMState *as, Reg rd, int32_t i)
{
uint32_t k = emit_isk12(ARMI_MOV, i);
- lua_assert(rset_test(as->freeset, r) || r == RID_TMP);
+ lj_assertA(rset_test(as->freeset, rd) || rd == RID_TMP,
+ "dest reg %d not free", rd);
if (k) {
/* Standard K12 constant. */
- emit_d(as, ARMI_MOV^k, r);
+ emit_d(as, ARMI_MOV^k, rd);
} else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) {
/* 16 bit loword constant for ARMv6T2. */
- emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r);
- } else if (emit_kdelta1(as, r, i)) {
+ emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd);
+ } else if (emit_kdelta1(as, rd, i)) {
/* One step delta relative to another constant. */
} else if ((as->flags & JIT_F_ARMV6T2)) {
/* 32 bit hiword/loword constant for ARMv6T2. */
- emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), r);
- emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r);
- } else if (emit_kdelta2(as, r, i)) {
+ emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), rd);
+ emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd);
+ } else if (emit_kdelta2(as, rd, i)) {
/* Two step delta relative to another constant. */
} else {
/* Otherwise construct the constant with up to 4 instructions. */
@@ -197,17 +201,17 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
int32_t m = i & (255 << sh);
i &= ~(255 << sh);
if (i == 0) {
- emit_d(as, ARMI_MOV ^ emit_isk12(0, m), r);
+ emit_d(as, ARMI_MOV ^ emit_isk12(0, m), rd);
break;
}
- emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), r, r);
+ emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), rd, rd);
}
}
}
-#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr)))
+#define emit_loada(as, rd, addr) emit_loadi(as, (rd), i32ptr((addr)))
-static Reg ra_allock(ASMState *as, int32_t k, RegSet allow);
+static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
/* Get/set from constant pointer. */
static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p)
@@ -219,8 +223,9 @@ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p)
#if !LJ_SOFTFP
/* Load a number constant into an FPR. */
-static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
+static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
{
+ cTValue *tv = ir_knum(ir);
int32_t i;
if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) {
uint32_t hi = tv->u32.hi;
@@ -260,7 +265,7 @@ static void emit_branch(ASMState *as, ARMIns ai, MCode *target)
{
MCode *p = as->mcp;
ptrdiff_t delta = (target - p) - 1;
- lua_assert(((delta + 0x00800000) >> 24) == 0);
+ lj_assertA(((delta + 0x00800000) >> 24) == 0, "branch target out of range");
*--p = ai | ((uint32_t)delta & 0x00ffffffu);
as->mcp = p;
}
@@ -288,7 +293,7 @@ static void emit_call(ASMState *as, void *target)
static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
{
#if LJ_SOFTFP
- lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
+ lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
#else
if (dst >= RID_MAX_GPR) {
emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S,
@@ -308,30 +313,30 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
emit_dm(as, ARMI_MOV, dst, src);
}
-/* Generic load of register from stack slot. */
-static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic load of register with base and (small) offset address. */
+static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
{
#if LJ_SOFTFP
- lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
+ lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
#else
if (r >= RID_MAX_GPR)
- emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, RID_SP, ofs);
+ emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs);
else
#endif
- emit_lso(as, ARMI_LDR, r, RID_SP, ofs);
+ emit_lso(as, ARMI_LDR, r, base, ofs);
}
-/* Generic store of register to stack slot. */
-static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic store of register with base and (small) offset address. */
+static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
{
#if LJ_SOFTFP
- lua_assert(!irt_isnum(ir->t)); UNUSED(ir);
+ lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir);
#else
if (r >= RID_MAX_GPR)
- emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, RID_SP, ofs);
+ emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs);
else
#endif
- emit_lso(as, ARMI_STR, r, RID_SP, ofs);
+ emit_lso(as, ARMI_STR, r, base, ofs);
}
/* Emit an arithmetic/logic operation with a constant operand. */
diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h
new file mode 100644
index 00000000..c4b4c147
--- /dev/null
+++ b/src/lj_emit_arm64.h
@@ -0,0 +1,424 @@
+/*
+** ARM64 instruction emitter.
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+**
+** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
+** Sponsored by Cisco Systems, Inc.
+*/
+
+/* -- Constant encoding --------------------------------------------------- */
+
+static uint64_t get_k64val(ASMState *as, IRRef ref)
+{
+ IRIns *ir = IR(ref);
+ if (ir->o == IR_KINT64) {
+ return ir_kint64(ir)->u64;
+ } else if (ir->o == IR_KGC) {
+ return (uint64_t)ir_kgc(ir);
+ } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
+ return (uint64_t)ir_kptr(ir);
+ } else {
+ lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL,
+ "bad 64 bit const IR op %d", ir->o);
+ return ir->i; /* Sign-extended. */
+ }
+}
+
+/* Encode constant in K12 format for data processing instructions. */
+static uint32_t emit_isk12(int64_t n)
+{
+ uint64_t k = (n < 0) ? -n : n;
+ uint32_t m = (n < 0) ? 0x40000000 : 0;
+ if (k < 0x1000) {
+ return A64I_K12|m|A64F_U12(k);
+ } else if ((k & 0xfff000) == k) {
+ return A64I_K12|m|0x400000|A64F_U12(k>>12);
+ }
+ return 0;
+}
+
+#define emit_clz64(n) __builtin_clzll(n)
+#define emit_ctz64(n) __builtin_ctzll(n)
+
+/* Encode constant in K13 format for logical data processing instructions. */
+static uint32_t emit_isk13(uint64_t n, int is64)
+{
+ int inv = 0, w = 128, lz, tz;
+ if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */
+ if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */
+ do { /* Find the repeat width. */
+ if (is64 && (uint32_t)(n^(n>>32))) break;
+ n = (uint32_t)n;
+ if (!n) return 0; /* Ditto when passing n=0xffffffff and is64=0. */
+ w = 32; if ((n^(n>>16)) & 0xffff) break;
+ n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break;
+ n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break;
+ n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break;
+ n = n & 0x3; w = 2;
+ } while (0);
+ lz = emit_clz64(n);
+ tz = emit_ctz64(n);
+ if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */
+ if (inv)
+ return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10);
+ else
+ return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10);
+}
+
+static uint32_t emit_isfpk64(uint64_t n)
+{
+ uint64_t etop9 = ((n >> 54) & 0x1ff);
+ if ((n << 16) == 0 && (etop9 == 0x100 || etop9 == 0x0ff)) {
+ return (uint32_t)(((n >> 48) & 0x7f) | ((n >> 56) & 0x80));
+ }
+ return ~0u;
+}
+
+/* -- Emit basic instructions --------------------------------------------- */
+
+static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra)
+{
+ *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra);
+}
+
+static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm)
+{
+ *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm);
+}
+
+static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm)
+{
+ *--as->mcp = ai | A64F_D(rd) | A64F_M(rm);
+}
+
+static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn)
+{
+ *--as->mcp = ai | A64F_D(rd) | A64F_N(rn);
+}
+
+static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm)
+{
+ *--as->mcp = ai | A64F_N(rn) | A64F_M(rm);
+}
+
+static void emit_d(ASMState *as, A64Ins ai, Reg rd)
+{
+ *--as->mcp = ai | A64F_D(rd);
+}
+
+static void emit_n(ASMState *as, A64Ins ai, Reg rn)
+{
+ *--as->mcp = ai | A64F_N(rn);
+}
+
+static int emit_checkofs(A64Ins ai, int64_t ofs)
+{
+ int scale = (ai >> 30) & 3;
+ if (ofs < 0 || (ofs & ((1<<scale)-1))) {
+ return (ofs >= -256 && ofs <= 255) ? -1 : 0;
+ } else {
+ return (ofs < (4096<<scale)) ? 1 : 0;
+ }
+}
+
+static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs)
+{
+ int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3;
+ lj_assertA(ot, "load/store offset %d out of range", ofs);
+ /* Combine LDR/STR pairs to LDP/STP. */
+ if ((sc == 2 || sc == 3) &&
+ (!(ai & 0x400000) || rd != rn) &&
+ as->mcp != as->mcloop) {
+ uint32_t prev = *as->mcp & ~A64F_D(31);
+ int ofsm = ofs - (1<<sc), ofsp = ofs + (1<<sc);
+ A64Ins aip;
+ if (prev == (ai | A64F_N(rn) | A64F_U12(ofsm>>sc)) ||
+ prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) {
+ aip = (A64F_A(rd) | A64F_D(*as->mcp & 31));
+ } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) ||
+ prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) {
+ aip = (A64F_D(rd) | A64F_A(*as->mcp & 31));
+ ofsm = ofs;
+ } else {
+ goto nopair;
+ }
+ if (ofsm >= (int)((unsigned int)-64<<sc) && ofsm <= (63<<sc)) {
+ *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) |
+ (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000));
+ return;
+ }
+ }
+nopair:
+ if (ot == 1)
+ *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_U12(ofs >> sc);
+ else
+ *--as->mcp = (ai^A64I_LS_U) | A64F_D(rd) | A64F_N(rn) | A64F_S9(ofs & 0x1ff);
+}
+
+/* -- Emit loads/stores --------------------------------------------------- */
+
+/* Prefer rematerialization of BASE/L from global_State over spills. */
+#define emit_canremat(ref) ((ref) <= ASMREF_L)
+
+/* Try to find an N-step delta relative to other consts with N < lim. */
+static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
+{
+ RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL);
+ if (lim <= 1) return 0; /* Can't beat that. */
+ while (work) {
+ Reg r = rset_picktop(work);
+ IRRef ref = regcost_ref(as->cost[r]);
+ lj_assertA(r != rd, "dest reg %d not free", rd);
+ if (ref < REF_TRUE) {
+ uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) :
+ get_k64val(as, ref);
+ int64_t delta = (int64_t)(k - kx);
+ if (delta == 0) {
+ emit_dm(as, A64I_MOVx, rd, r);
+ return 1;
+ } else {
+ uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta);
+ if (k12) {
+ emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r);
+ return 1;
+ }
+ /* Do other ops or multi-step deltas pay off? Probably not.
+ ** E.g. XOR rarely helps with pointer consts.
+ */
+ }
+ }
+ rset_clear(work, r);
+ }
+ return 0; /* Failed. */
+}
+
+static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64)
+{
+ int i, zeros = 0, ones = 0, neg;
+ if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */
+ /* Count homogeneous 16 bit fragments. */
+ for (i = 0; i < 4; i++) {
+ uint64_t frag = (u64 >> i*16) & 0xffff;
+ zeros += (frag == 0);
+ ones += (frag == 0xffff);
+ }
+ neg = ones > zeros; /* Use MOVN if it pays off. */
+ if ((neg ? ones : zeros) < 3) { /* Need 2+ ins. Try shorter K13 encoding. */
+ uint32_t k13 = emit_isk13(u64, is64);
+ if (k13) {
+ emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
+ return;
+ }
+ }
+ if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) {
+ int shift = 0, lshift = 0;
+ uint64_t n64 = neg ? ~u64 : u64;
+ if (n64 != 0) {
+ /* Find first/last fragment to be filled. */
+ shift = (63-emit_clz64(n64)) & ~15;
+ lshift = emit_ctz64(n64) & ~15;
+ }
+ /* MOVK requires the original value (u64). */
+ while (shift > lshift) {
+ uint32_t u16 = (u64 >> shift) & 0xffff;
+ /* Skip fragments that are correctly filled by MOVN/MOVZ. */
+ if (u16 != (neg ? 0xffff : 0))
+ emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd);
+ shift -= 16;
+ }
+ /* But MOVN needs an inverted value (n64). */
+ emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) |
+ A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
+ }
+}
+
+/* Load a 32 bit constant into a GPR. */
+#define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0)
+
+/* Load a 64 bit constant into a GPR. */
+#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X)
+
+#define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr))
+
+#define glofs(as, k) \
+ ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g))
+#define mcpofs(as, k) \
+ ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1)))
+#define checkmcpofs(as, k) \
+ (A64F_S_OK(mcpofs(as, k)>>2, 19))
+
+static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
+
+/* Get/set from constant pointer. */
+static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p)
+{
+ /* First, check if ip + offset is in range. */
+ if ((ai & 0x00400000) && checkmcpofs(as, p)) {
+ emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r);
+ } else {
+ Reg base = RID_GL; /* Next, try GL + offset. */
+ int64_t ofs = glofs(as, p);
+ if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */
+ int64_t i64 = i64ptr(p);
+ base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r));
+ ofs = i64 & 0x7fffull;
+ }
+ emit_lso(as, ai, r, base, ofs);
+ }
+}
+
+/* Load 64 bit IR constant into register. */
+static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
+{
+ const uint64_t *k = &ir_k64(ir)->u64;
+ int64_t ofs;
+ if (r >= RID_MAX_GPR) {
+ uint32_t fpk = emit_isfpk64(*k);
+ if (fpk != ~0u) {
+ emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31));
+ return;
+ }
+ }
+ ofs = glofs(as, k);
+ if (emit_checkofs(A64I_LDRx, ofs)) {
+ emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx,
+ (r & 31), RID_GL, ofs);
+ } else {
+ if (r >= RID_MAX_GPR) {
+ emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP);
+ r = RID_TMP;
+ }
+ if (checkmcpofs(as, k))
+ emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r);
+ else
+ emit_loadu64(as, r, *k);
+ }
+}
+
+/* Get/set global_State fields. */
+#define emit_getgl(as, r, field) \
+ emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field)
+#define emit_setgl(as, r, field) \
+ emit_lsptr(as, A64I_STRx, (r), (void *)&J2G(as->J)->field)
+
+/* Trace number is determined from pc of exit instruction. */
+#define emit_setvmstate(as, i) UNUSED(i)
+
+/* -- Emit control-flow instructions -------------------------------------- */
+
+/* Label for internal jumps. */
+typedef MCode *MCLabel;
+
+/* Return label pointing to current PC. */
+#define emit_label(as) ((as)->mcp)
+
+static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target)
+{
+ MCode *p = --as->mcp;
+ ptrdiff_t delta = target - p;
+ lj_assertA(A64F_S_OK(delta, 19), "branch target out of range");
+ *p = A64I_BCC | A64F_S19(delta) | cond;
+}
+
+static void emit_branch(ASMState *as, A64Ins ai, MCode *target)
+{
+ MCode *p = --as->mcp;
+ ptrdiff_t delta = target - p;
+ lj_assertA(A64F_S_OK(delta, 26), "branch target out of range");
+ *p = ai | A64F_S26(delta);
+}
+
+static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target)
+{
+ MCode *p = --as->mcp;
+ ptrdiff_t delta = target - p;
+ lj_assertA(bit < 63, "bit number out of range");
+ lj_assertA(A64F_S_OK(delta, 14), "branch target out of range");
+ if (bit > 31) ai |= A64I_X;
+ *p = ai | A64F_BIT(bit & 31) | A64F_S14(delta) | r;
+}
+
+static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target)
+{
+ MCode *p = --as->mcp;
+ ptrdiff_t delta = target - p;
+ lj_assertA(A64F_S_OK(delta, 19), "branch target out of range");
+ *p = ai | A64F_S19(delta) | r;
+}
+
+#define emit_jmp(as, target) emit_branch(as, A64I_B, (target))
+
+static void emit_call(ASMState *as, void *target)
+{
+ MCode *p = --as->mcp;
+ ptrdiff_t delta = (char *)target - (char *)p;
+ if (A64F_S_OK(delta>>2, 26)) {
+ *p = A64I_BL | A64F_S26(delta>>2);
+ } else { /* Target out of range: need indirect call. But don't use R0-R7. */
+ Reg r = ra_allock(as, i64ptr(target),
+ RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
+ *p = A64I_BLR | A64F_N(r);
+ }
+}
+
+/* -- Emit generic operations --------------------------------------------- */
+
+/* Generic move between two regs. */
+static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
+{
+ if (dst >= RID_MAX_GPR) {
+ emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S,
+ (dst & 31), (src & 31));
+ return;
+ }
+ if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */
+ MCode ins = *as->mcp, swp = (src^dst);
+ if ((ins & 0xbf800000) == 0xb9000000) {
+ if (!((ins ^ (dst << 5)) & 0x000003e0))
+ *as->mcp = ins ^ (swp << 5); /* Swap N in load/store. */
+ if (!(ins & 0x00400000) && !((ins ^ dst) & 0x0000001f))
+ *as->mcp = ins ^ swp; /* Swap D in store. */
+ }
+ }
+ emit_dm(as, A64I_MOVx, dst, src);
+}
+
+/* Generic load of register with base and (small) offset address. */
+static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
+{
+ if (r >= RID_MAX_GPR)
+ emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, (r & 31), base, ofs);
+ else
+ emit_lso(as, irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw, r, base, ofs);
+}
+
+/* Generic store of register with base and (small) offset address. */
+static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
+{
+ if (r >= RID_MAX_GPR)
+ emit_lso(as, irt_isnum(ir->t) ? A64I_STRd : A64I_STRs, (r & 31), base, ofs);
+ else
+ emit_lso(as, irt_is64(ir->t) ? A64I_STRx : A64I_STRw, r, base, ofs);
+}
+
+/* Emit an arithmetic operation with a constant operand. */
+static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src,
+ int32_t i, RegSet allow)
+{
+ uint32_t k = emit_isk12(i);
+ if (k)
+ emit_dn(as, ai^k, dest, src);
+ else
+ emit_dnm(as, ai, dest, src, ra_allock(as, i, allow));
+}
+
+/* Add offset to pointer. */
+static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
+{
+ if (ofs)
+ emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r,
+ ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r));
+}
+
+#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs))
+
diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h
index 366cf7ab..0cea5479 100644
--- a/src/lj_emit_mips.h
+++ b/src/lj_emit_mips.h
@@ -3,6 +3,32 @@
** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
*/
+#if LJ_64
+static intptr_t get_k64val(ASMState *as, IRRef ref)
+{
+ IRIns *ir = IR(ref);
+ if (ir->o == IR_KINT64) {
+ return (intptr_t)ir_kint64(ir)->u64;
+ } else if (ir->o == IR_KGC) {
+ return (intptr_t)ir_kgc(ir);
+ } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
+ return (intptr_t)ir_kptr(ir);
+ } else if (LJ_SOFTFP && ir->o == IR_KNUM) {
+ return (intptr_t)ir_knum(ir)->u64;
+ } else {
+ lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL,
+ "bad 64 bit const IR op %d", ir->o);
+ return ir->i; /* Sign-extended. */
+ }
+}
+#endif
+
+#if LJ_64
+#define get_kval(as, ref) get_k64val(as, ref)
+#else
+#define get_kval(as, ref) (IR((ref))->i)
+#endif
+
/* -- Emit basic instructions --------------------------------------------- */
static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt)
@@ -35,7 +61,7 @@ static void emit_fgh(ASMState *as, MIPSIns mi, Reg rf, Reg rg, Reg rh)
static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift)
{
- if ((as->flags & JIT_F_MIPS32R2)) {
+ if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) {
emit_dta(as, MIPSI_ROTR, dest, src, shift);
} else {
emit_dst(as, MIPSI_OR, dest, dest, tmp);
@@ -44,23 +70,32 @@ static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift)
}
}
+#if LJ_64 || LJ_HASBUFFER
+static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb,
+ uint32_t lsb)
+{
+ *--as->mcp = mi | MIPSF_T(rt) | MIPSF_S(rs) | MIPSF_M(msb) | MIPSF_L(lsb);
+}
+#endif
+
/* -- Emit loads/stores --------------------------------------------------- */
/* Prefer rematerialization of BASE/L from global_State over spills. */
#define emit_canremat(ref) ((ref) <= REF_BASE)
/* Try to find a one step delta relative to another constant. */
-static int emit_kdelta1(ASMState *as, Reg t, int32_t i)
+static int emit_kdelta1(ASMState *as, Reg rd, intptr_t i)
{
RegSet work = ~as->freeset & RSET_GPR;
while (work) {
Reg r = rset_picktop(work);
IRRef ref = regcost_ref(as->cost[r]);
- lua_assert(r != t);
+ lj_assertA(r != rd, "dest reg %d not free", rd);
if (ref < ASMREF_L) {
- int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i);
+ intptr_t delta = (intptr_t)((uintptr_t)i -
+ (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(as, ref)));
if (checki16(delta)) {
- emit_tsi(as, MIPSI_ADDIU, t, r, delta);
+ emit_tsi(as, MIPSI_AADDIU, rd, r, delta);
return 1;
}
}
@@ -76,8 +111,8 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
emit_ti(as, MIPSI_LI, r, i);
} else {
if ((i & 0xffff)) {
- int32_t jgl = i32ptr(J2G(as->J));
- if ((uint32_t)(i-jgl) < 65536) {
+ intptr_t jgl = (intptr_t)(void *)J2G(as->J);
+ if ((uintptr_t)(i-jgl) < 65536) {
emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768);
return;
} else if (emit_kdelta1(as, r, i)) {
@@ -92,16 +127,49 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
}
}
+#if LJ_64
+/* Load a 64 bit constant into a GPR. */
+static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
+{
+ if (checki32((int64_t)u64)) {
+ emit_loadi(as, r, (int32_t)u64);
+ } else {
+ uint64_t delta = u64 - (uint64_t)(void *)J2G(as->J);
+ if (delta < 65536) {
+ emit_tsi(as, MIPSI_DADDIU, r, RID_JGL, (int32_t)(delta-32768));
+ } else if (emit_kdelta1(as, r, (intptr_t)u64)) {
+ return;
+ } else {
+ /* TODO MIPSR6: Use DAHI & DATI. Caveat: sign-extension. */
+ if ((u64 & 0xffff)) {
+ emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff);
+ }
+ if (((u64 >> 16) & 0xffff)) {
+ emit_dta(as, MIPSI_DSLL, r, r, 16);
+ emit_tsi(as, MIPSI_ORI, r, r, (u64 >> 16) & 0xffff);
+ emit_dta(as, MIPSI_DSLL, r, r, 16);
+ } else {
+ emit_dta(as, MIPSI_DSLL32, r, r, 0);
+ }
+ emit_loadi(as, r, (int32_t)(u64 >> 32));
+ }
+ /* TODO: There are probably more optimization opportunities. */
+ }
+}
+
+#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr)))
+#else
#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr)))
+#endif
-static Reg ra_allock(ASMState *as, int32_t k, RegSet allow);
-static void ra_allockreg(ASMState *as, int32_t k, Reg r);
+static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
+static void ra_allockreg(ASMState *as, intptr_t k, Reg r);
/* Get/set from constant pointer. */
static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow)
{
- int32_t jgl = i32ptr(J2G(as->J));
- int32_t i = i32ptr(p);
+ intptr_t jgl = (intptr_t)(J2G(as->J));
+ intptr_t i = (intptr_t)(p);
Reg base;
if ((uint32_t)(i-jgl) < 65536) {
i = i-jgl-32768;
@@ -112,8 +180,24 @@ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow)
emit_tsi(as, mi, r, base, i);
}
-#define emit_loadn(as, r, tv) \
- emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)(tv), RSET_GPR)
+#if LJ_64
+static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
+{
+ const uint64_t *k = &ir_k64(ir)->u64;
+ Reg r64 = r;
+ if (rset_test(RSET_FPR, r)) {
+ r64 = RID_TMP;
+ emit_tg(as, MIPSI_DMTC1, r64, r);
+ }
+ if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536)
+ emit_lsptr(as, MIPSI_LD, r64, (void *)k, 0);
+ else
+ emit_loadu64(as, r64, *k);
+}
+#else
+#define emit_loadk64(as, r, ir) \
+ emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR)
+#endif
/* Get/set global_State fields. */
static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs)
@@ -122,9 +206,9 @@ static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs)
}
#define emit_getgl(as, r, field) \
- emit_lsglptr(as, MIPSI_LW, (r), (int32_t)offsetof(global_State, field))
+ emit_lsglptr(as, MIPSI_AL, (r), (int32_t)offsetof(global_State, field))
#define emit_setgl(as, r, field) \
- emit_lsglptr(as, MIPSI_SW, (r), (int32_t)offsetof(global_State, field))
+ emit_lsglptr(as, MIPSI_AS, (r), (int32_t)offsetof(global_State, field))
/* Trace number is determined from per-trace exit stubs. */
#define emit_setvmstate(as, i) UNUSED(i)
@@ -141,7 +225,7 @@ static void emit_branch(ASMState *as, MIPSIns mi, Reg rs, Reg rt, MCode *target)
{
MCode *p = as->mcp;
ptrdiff_t delta = target - p;
- lua_assert(((delta + 0x8000) >> 16) == 0);
+ lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of range");
*--p = mi | MIPSF_S(rs) | MIPSF_T(rt) | ((uint32_t)delta & 0xffffu);
as->mcp = p;
}
@@ -152,16 +236,31 @@ static void emit_jmp(ASMState *as, MCode *target)
emit_branch(as, MIPSI_B, RID_ZERO, RID_ZERO, (target));
}
-static void emit_call(ASMState *as, void *target)
+static void emit_call(ASMState *as, void *target, int needcfa)
{
MCode *p = as->mcp;
- *--p = MIPSI_NOP;
- if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0)
+#if LJ_TARGET_MIPSR6
+ ptrdiff_t delta = (char *)target - (char *)p;
+ if ((((delta>>2) + 0x02000000) >> 26) == 0) { /* Try compact call first. */
+ *--p = MIPSI_BALC | (((uintptr_t)delta >>2) & 0x03ffffffu);
+ as->mcp = p;
+ return;
+ }
+#endif
+ *--p = MIPSI_NOP; /* Delay slot. */
+ if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) {
+#if !LJ_TARGET_MIPSR6
+ *--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) |
+ (((uintptr_t)target >>2) & 0x03ffffffu);
+#else
*--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu);
- else /* Target out of range: need indirect call. */
+#endif
+ } else { /* Target out of range: need indirect call. */
*--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR);
+ needcfa = 1;
+ }
as->mcp = p;
- ra_allockreg(as, i32ptr(target), RID_CFUNCADDR);
+ if (needcfa) ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR);
}
/* -- Emit generic operations --------------------------------------------- */
@@ -178,32 +277,32 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src);
}
-/* Generic load of register from stack slot. */
-static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic load of register with base and (small) offset address. */
+static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
{
if (r < RID_MAX_GPR)
- emit_tsi(as, MIPSI_LW, r, RID_SP, ofs);
+ emit_tsi(as, irt_is64(ir->t) ? MIPSI_LD : MIPSI_LW, r, base, ofs);
else
emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1,
- (r & 31), RID_SP, ofs);
+ (r & 31), base, ofs);
}
-/* Generic store of register to stack slot. */
-static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic store of register with base and (small) offset address. */
+static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
{
if (r < RID_MAX_GPR)
- emit_tsi(as, MIPSI_SW, r, RID_SP, ofs);
+ emit_tsi(as, irt_is64(ir->t) ? MIPSI_SD : MIPSI_SW, r, base, ofs);
else
emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1,
- (r&31), RID_SP, ofs);
+ (r&31), base, ofs);
}
/* Add offset to pointer. */
static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
{
if (ofs) {
- lua_assert(checki16(ofs));
- emit_tsi(as, MIPSI_ADDIU, r, r, ofs);
+ lj_assertA(checki16(ofs), "offset %d out of range", ofs);
+ emit_tsi(as, MIPSI_AADDIU, r, r, ofs);
}
}
diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h
index 6d0ea185..86760e78 100644
--- a/src/lj_emit_ppc.h
+++ b/src/lj_emit_ppc.h
@@ -41,13 +41,13 @@ static void emit_rot(ASMState *as, PPCIns pi, Reg ra, Reg rs,
static void emit_slwi(ASMState *as, Reg ra, Reg rs, int32_t n)
{
- lua_assert(n >= 0 && n < 32);
+ lj_assertA(n >= 0 && n < 32, "shift out or range");
emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31-n);
}
static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n)
{
- lua_assert(n >= 0 && n < 32);
+ lj_assertA(n >= 0 && n < 32, "shift out or range");
emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31);
}
@@ -57,17 +57,17 @@ static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n)
#define emit_canremat(ref) ((ref) <= REF_BASE)
/* Try to find a one step delta relative to another constant. */
-static int emit_kdelta1(ASMState *as, Reg t, int32_t i)
+static int emit_kdelta1(ASMState *as, Reg rd, int32_t i)
{
RegSet work = ~as->freeset & RSET_GPR;
while (work) {
Reg r = rset_picktop(work);
IRRef ref = regcost_ref(as->cost[r]);
- lua_assert(r != t);
+ lj_assertA(r != rd, "dest reg %d not free", rd);
if (ref < ASMREF_L) {
int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i);
if (checki16(delta)) {
- emit_tai(as, PPCI_ADDI, t, r, delta);
+ emit_tai(as, PPCI_ADDI, rd, r, delta);
return 1;
}
}
@@ -98,7 +98,7 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr)))
-static Reg ra_allock(ASMState *as, int32_t k, RegSet allow);
+static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
/* Get/set from constant pointer. */
static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow)
@@ -115,8 +115,8 @@ static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow)
emit_tai(as, pi, r, base, i);
}
-#define emit_loadn(as, r, tv) \
- emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)(tv), RSET_GPR)
+#define emit_loadk64(as, r, ir) \
+ emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR)
/* Get/set global_State fields. */
static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs)
@@ -144,7 +144,7 @@ static void emit_condbranch(ASMState *as, PPCIns pi, PPCCC cc, MCode *target)
{
MCode *p = --as->mcp;
ptrdiff_t delta = (char *)target - (char *)p;
- lua_assert(((delta + 0x8000) >> 16) == 0);
+ lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of range");
pi ^= (delta & 0x8000) * (PPCF_Y/0x8000);
*p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu);
}
@@ -186,22 +186,22 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
emit_fb(as, PPCI_FMR, dst, src);
}
-/* Generic load of register from stack slot. */
-static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic load of register with base and (small) offset address. */
+static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
{
if (r < RID_MAX_GPR)
- emit_tai(as, PPCI_LWZ, r, RID_SP, ofs);
+ emit_tai(as, PPCI_LWZ, r, base, ofs);
else
- emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, RID_SP, ofs);
+ emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, base, ofs);
}
-/* Generic store of register to stack slot. */
-static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic store of register with base and (small) offset address. */
+static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
{
if (r < RID_MAX_GPR)
- emit_tai(as, PPCI_STW, r, RID_SP, ofs);
+ emit_tai(as, PPCI_STW, r, base, ofs);
else
- emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, RID_SP, ofs);
+ emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, base, ofs);
}
/* Emit a compare (for equality) with a constant operand. */
diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h
index d8b4b8a0..3d3beda3 100644
--- a/src/lj_emit_x86.h
+++ b/src/lj_emit_x86.h
@@ -13,10 +13,17 @@
if (rex != 0x40) *--(p) = rex; }
#define FORCE_REX 0x200
#define REX_64 (FORCE_REX|0x080000)
+#define VEX_64 0x800000
#else
#define REXRB(p, rr, rb) ((void)0)
#define FORCE_REX 0
#define REX_64 0
+#define VEX_64 0
+#endif
+#if LJ_GC64
+#define REX_GC64 REX_64
+#else
+#define REX_GC64 0
#endif
#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
@@ -31,7 +38,14 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
MCode *p, int delta)
{
int n = (int8_t)xo;
-#if defined(__GNUC__)
+ if (n == -60) { /* VEX-encoded instruction */
+#if LJ_64
+ xo ^= (((rr>>1)&4)+((rx>>2)&2)+((rb>>3)&1))<<13;
+#endif
+ *(uint32_t *)(p+delta-5) = (uint32_t)xo;
+ return p+delta-5;
+ }
+#if defined(__GNUC__) || defined(__clang__)
if (__builtin_constant_p(xo) && n == -2)
p[delta-2] = (MCode)(xo >> 24);
else if (__builtin_constant_p(xo) && n == -3)
@@ -78,33 +92,24 @@ static void emit_rr(ASMState *as, x86Op xo, Reg r1, Reg r2)
/* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */
static int32_t ptr2addr(const void *p)
{
- lua_assert((uintptr_t)p < (uintptr_t)0x80000000);
+ lj_assertX((uintptr_t)p < (uintptr_t)0x80000000, "pointer outside 2G range");
return i32ptr(p);
}
#else
#define ptr2addr(p) (i32ptr((p)))
#endif
-/* op r, [addr] */
-static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
-{
- MCode *p = as->mcp;
- *(int32_t *)(p-4) = ptr2addr(addr);
-#if LJ_64
- p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
- as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
-#else
- as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
-#endif
-}
-
/* op r, [base+ofs] */
static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs)
{
MCode *p = as->mcp;
x86Mode mode;
if (ra_hasreg(rb)) {
- if (ofs == 0 && (rb&7) != RID_EBP) {
+ if (LJ_GC64 && rb == RID_RIP) {
+ mode = XM_OFS0;
+ p -= 4;
+ *(int32_t *)p = ofs;
+ } else if (ofs == 0 && (rb&7) != RID_EBP) {
mode = XM_OFS0;
} else if (checki8(ofs)) {
*--p = (MCode)ofs;
@@ -202,6 +207,11 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb)
*--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
rb = RID_ESP;
#endif
+ } else if (LJ_GC64 && rb == RID_RIP) {
+ lj_assertA(as->mrm.idx == RID_NONE, "RIP-rel mrm cannot have index");
+ mode = XM_OFS0;
+ p -= 4;
+ *(int32_t *)p = as->mrm.ofs;
} else {
if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) {
mode = XM_OFS0;
@@ -241,10 +251,6 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i)
/* -- Emit loads/stores --------------------------------------------------- */
-/* Instruction selection for XMM moves. */
-#define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS)
-#define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD)
-
/* mov [base+ofs], i */
static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
{
@@ -259,8 +265,8 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i)
/* Get/set global_State fields. */
#define emit_opgl(as, xo, r, field) \
emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field)
-#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field)
-#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field)
+#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field)
+#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field)
#define emit_setvmstate(as, i) \
(emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate))
@@ -285,9 +291,21 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i)
}
}
+#if LJ_GC64
+#define dispofs(as, k) \
+ ((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch))
+#define mcpofs(as, k) \
+ ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp))
+#define mctopofs(as, k) \
+ ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop))
+/* mov r, addr */
+#define emit_loada(as, r, addr) \
+ emit_loadu64(as, (r), (uintptr_t)(addr))
+#else
/* mov r, addr */
#define emit_loada(as, r, addr) \
emit_loadi(as, (r), ptr2addr((addr)))
+#endif
#if LJ_64
/* mov r, imm64 or shorter 32 bit extended load. */
@@ -299,6 +317,15 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
MCode *p = as->mcp;
*(int32_t *)(p-4) = (int32_t)u64;
as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4);
+#if LJ_GC64
+ } else if (checki32(dispofs(as, u64))) {
+ emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64));
+ } else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) {
+ /* Since as->realign assumes the code size doesn't change, check
+ ** RIP-relative addressing reachability for both as->mcp and as->mctop.
+ */
+ emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64));
+#endif
} else { /* Full-size 64 bit load. */
MCode *p = as->mcp;
*(uint64_t *)(p-8) = u64;
@@ -310,13 +337,90 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64)
}
#endif
-/* movsd r, [&tv->n] / xorps r, r */
-static void emit_loadn(ASMState *as, Reg r, cTValue *tv)
+/* op r, [addr] */
+static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr)
{
- if (tvispzero(tv)) /* Use xor only for +0. */
- emit_rr(as, XO_XORPS, r, r);
- else
- emit_rma(as, XMM_MOVRM(as), r, &tv->n);
+#if LJ_GC64
+ if (checki32(dispofs(as, addr))) {
+ emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr));
+ } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) {
+ emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr));
+ } else if (!checki32((intptr_t)addr)) {
+ Reg ra = (rr & 15);
+ if (xo != XO_MOV) {
+ /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */
+ uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch;
+ uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0;
+ ra = RID_DISPATCH;
+ if (checku32(dispaddr)) {
+ emit_loadi(as, ra, (int32_t)dispaddr);
+ } else { /* Full-size 64 bit load. */
+ MCode *p = as->mcp;
+ *(uint64_t *)(p-8) = dispaddr;
+ p[-9] = (MCode)(XI_MOVri+(ra&7));
+ p[-10] = 0x48 + ((ra>>3)&1);
+ p -= 10;
+ as->mcp = p;
+ }
+ if (xo == XO_GROUP3b) emit_i8(as, i8);
+ }
+ emit_rmro(as, xo, rr, ra, 0);
+ emit_loadu64(as, ra, (uintptr_t)addr);
+ } else
+#endif
+ {
+ MCode *p = as->mcp;
+ *(int32_t *)(p-4) = ptr2addr(addr);
+#if LJ_64
+ p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP);
+ as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5);
+#else
+ as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4);
+#endif
+ }
+}
+
+/* Load 64 bit IR constant into register. */
+static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
+{
+ Reg r64;
+ x86Op xo;
+ const uint64_t *k = &ir_k64(ir)->u64;
+ if (rset_test(RSET_FPR, r)) {
+ r64 = r;
+ xo = XO_MOVSD;
+ } else {
+ r64 = r | REX_64;
+ xo = XO_MOV;
+ }
+ if (*k == 0) {
+ emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r);
+#if LJ_GC64
+ } else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) ||
+ (checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) {
+ emit_rma(as, xo, r64, k);
+ } else {
+ if (ir->i) {
+ lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i),
+ "bad interned 64 bit constant");
+ } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) {
+ emit_loadu64(as, r, *k);
+ return;
+ } else {
+ /* If all else fails, add the FP constant at the MCode area bottom. */
+ while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3;
+ *(uint64_t *)as->mcbot = *k;
+ ir->i = (int32_t)(as->mctop - as->mcbot);
+ as->mcbot += 8;
+ as->mclim = as->mcbot + MCLIM_REDZONE;
+ lj_mcode_commitbot(as->J, as->mcbot);
+ }
+ emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i));
+#else
+ } else {
+ emit_rma(as, xo, r64, k);
+#endif
+ }
}
/* -- Emit control-flow instructions -------------------------------------- */
@@ -330,7 +434,7 @@ static void emit_sjmp(ASMState *as, MCLabel target)
{
MCode *p = as->mcp;
ptrdiff_t delta = target - p;
- lua_assert(delta == (int8_t)delta);
+ lj_assertA(delta == (int8_t)delta, "short jump target out of range");
p[-1] = (MCode)(int8_t)delta;
p[-2] = XI_JMPs;
as->mcp = p - 2;
@@ -342,7 +446,7 @@ static void emit_sjcc(ASMState *as, int cc, MCLabel target)
{
MCode *p = as->mcp;
ptrdiff_t delta = target - p;
- lua_assert(delta == (int8_t)delta);
+ lj_assertA(delta == (int8_t)delta, "short jump target out of range");
p[-1] = (MCode)(int8_t)delta;
p[-2] = (MCode)(XI_JCCs+(cc&15));
as->mcp = p - 2;
@@ -368,10 +472,11 @@ static void emit_sfixup(ASMState *as, MCLabel source)
#define emit_label(as) ((as)->mcp)
/* Compute relative 32 bit offset for jump and call instructions. */
-static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target)
+static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target)
{
ptrdiff_t delta = target - p;
- lua_assert(delta == (int32_t)delta);
+ UNUSED(J);
+ lj_assertJ(delta == (int32_t)delta, "jump target out of range");
return (int32_t)delta;
}
@@ -379,7 +484,7 @@ static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target)
static void emit_jcc(ASMState *as, int cc, MCode *target)
{
MCode *p = as->mcp;
- *(int32_t *)(p-4) = jmprel(p, target);
+ *(int32_t *)(p-4) = jmprel(as->J, p, target);
p[-5] = (MCode)(XI_JCCn+(cc&15));
p[-6] = 0x0f;
as->mcp = p - 6;
@@ -389,7 +494,7 @@ static void emit_jcc(ASMState *as, int cc, MCode *target)
static void emit_jmp(ASMState *as, MCode *target)
{
MCode *p = as->mcp;
- *(int32_t *)(p-4) = jmprel(p, target);
+ *(int32_t *)(p-4) = jmprel(as->J, p, target);
p[-5] = XI_JMP;
as->mcp = p - 5;
}
@@ -406,7 +511,7 @@ static void emit_call_(ASMState *as, MCode *target)
return;
}
#endif
- *(int32_t *)(p-4) = jmprel(p, target);
+ *(int32_t *)(p-4) = jmprel(as->J, p, target);
p[-5] = XI_CALL;
as->mcp = p - 5;
}
@@ -418,8 +523,10 @@ static void emit_call_(ASMState *as, MCode *target)
/* Use 64 bit operations to handle 64 bit IR types. */
#if LJ_64
#define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0))
+#define VEX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? VEX_64 : 0))
#else
#define REX_64IR(ir, r) (r)
+#define VEX_64IR(ir, r) (r)
#endif
/* Generic move between two regs. */
@@ -429,35 +536,32 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
if (dst < RID_MAX_GPR)
emit_rr(as, XO_MOV, REX_64IR(ir, dst), src);
else
- emit_rr(as, XMM_MOVRR(as), dst, src);
+ emit_rr(as, XO_MOVAPS, dst, src);
}
-/* Generic load of register from stack slot. */
-static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic load of register with base and (small) offset address. */
+static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
{
if (r < RID_MAX_GPR)
- emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs);
+ emit_rmro(as, XO_MOV, REX_64IR(ir, r), base, ofs);
else
- emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs);
+ emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, base, ofs);
}
-/* Generic store of register to stack slot. */
-static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs)
+/* Generic store of register with base and (small) offset address. */
+static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
{
if (r < RID_MAX_GPR)
- emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, ofs);
+ emit_rmro(as, XO_MOVto, REX_64IR(ir, r), base, ofs);
else
- emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, RID_ESP, ofs);
+ emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, base, ofs);
}
/* Add offset to pointer. */
static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
{
if (ofs) {
- if ((as->flags & JIT_F_LEA_AGU))
- emit_rmro(as, XO_LEA, r, r, ofs);
- else
- emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs);
+ emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
}
}
diff --git a/src/lj_err.c b/src/lj_err.c
index b677b655..563c7706 100644
--- a/src/lj_err.c
+++ b/src/lj_err.c
@@ -16,6 +16,7 @@
#include "lj_ff.h"
#include "lj_trace.h"
#include "lj_vm.h"
+#include "lj_strfmt.h"
/*
** LuaJIT can either use internal or external frame unwinding:
@@ -28,12 +29,18 @@
** Pros and Cons:
**
** - EXT requires unwind tables for *all* functions on the C stack between
-** the pcall/catch and the error/throw. This is the default on x64,
-** but needs to be manually enabled on x86/PPC for non-C++ code.
+** the pcall/catch and the error/throw. C modules used by Lua code can
+** throw errors, so these need to have unwind tables, too. Transitively
+** this applies to all system libraries used by C modules -- at least
+** when they have callbacks which may throw an error.
**
-** - INT is faster when actually throwing errors (but this happens rarely).
+** - INT is faster when actually throwing errors, but this happens rarely.
** Setting up error handlers is zero-cost in any case.
**
+** - INT needs to save *all* callee-saved registers when entering the
+** interpreter. EXT only needs to save those actually used inside the
+** interpreter. JIT-compiled code may need to save some more.
+**
** - EXT provides full interoperability with C++ exceptions. You can throw
** Lua errors or C++ exceptions through a mix of Lua frames and C++ frames.
** C++ destructors are called as needed. C++ exceptions caught by pcall
@@ -45,27 +52,38 @@
** the wrapper function feature. Lua errors thrown through C++ frames
** cannot be caught by C++ code and C++ destructors are not run.
**
-** EXT is the default on x64 systems, INT is the default on all other systems.
+** - EXT can handle errors from internal helper functions that are called
+** from JIT-compiled code (except for Windows/x86 and 32 bit ARM).
+** INT has no choice but to call the panic handler, if this happens.
+** Note: this is mainly relevant for out-of-memory errors.
+**
+** EXT is the default on all systems where the toolchain produces unwind
+** tables by default (*). This is hard-coded and/or detected in src/Makefile.
+** You can thwart the detection with: TARGET_XCFLAGS=-DLUAJIT_UNWIND_INTERNAL
+**
+** INT is the default on all other systems.
+**
+** EXT can be manually enabled for toolchains that are able to produce
+** conforming unwind tables:
+** "TARGET_XCFLAGS=-funwind-tables -DLUAJIT_UNWIND_EXTERNAL"
+** As explained above, *all* C code used directly or indirectly by LuaJIT
+** must be compiled with -funwind-tables (or -fexceptions). C++ code must
+** *not* be compiled with -fno-exceptions.
+**
+** If you're unsure whether error handling inside the VM works correctly,
+** try running this and check whether it prints "OK":
**
-** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack
-** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled
-** with -funwind-tables (or -fexceptions). This includes LuaJIT itself (set
-** TARGET_CFLAGS), all of your C/Lua binding code, all loadable C modules
-** and all C libraries that have callbacks which may be used to call back
-** into Lua. C++ code must *not* be compiled with -fno-exceptions.
+** luajit -e "print(select(2, load('OK')):match('OK'))"
**
-** EXT cannot be enabled on WIN32 since system exceptions use code-driven SEH.
-** EXT is mandatory on WIN64 since the calling convention has an abundance
-** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15).
-** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4).
+** (*) Originally, toolchains only generated unwind tables for C++ code. For
+** interoperability reasons, this can be manually enabled for plain C code,
+** too (with -funwind-tables). With the introduction of the x64 architecture,
+** the corresponding POSIX and Windows ABIs mandated unwind tables for all
+** code. Over the following years most desktop and server platforms have
+** enabled unwind tables by default on all architectures. OTOH mobile and
+** embedded platforms do not consistently mandate unwind tables.
*/
-#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND
-#define LJ_UNWIND_EXT 1
-#elif LJ_TARGET_X64 && LJ_TARGET_WINDOWS
-#define LJ_UNWIND_EXT 1
-#endif
-
/* -- Error messages ------------------------------------------------------ */
/* Error message strings. */
@@ -98,14 +116,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
TValue *top = restorestack(L, -nres);
if (frame < top) { /* Frame reached? */
if (errcode) {
- L->cframe = cframe_prev(cf);
L->base = frame+1;
+ L->cframe = cframe_prev(cf);
unwindstack(L, top);
}
return cf;
}
}
- if (frame <= tvref(L->stack))
+ if (frame <= tvref(L->stack)+LJ_FR2)
break;
switch (frame_typep(frame)) {
case FRAME_LUA: /* Lua frame. */
@@ -113,14 +131,12 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
frame = frame_prevl(frame);
break;
case FRAME_C: /* C frame. */
-#if LJ_HASFFI
unwind_c:
-#endif
#if LJ_UNWIND_EXT
if (errcode) {
- L->cframe = cframe_prev(cf);
L->base = frame_prevd(frame) + 1;
- unwindstack(L, frame);
+ L->cframe = cframe_prev(cf);
+ unwindstack(L, frame - LJ_FR2);
} else if (cf != stopcf) {
cf = cframe_prev(cf);
frame = frame_prevd(frame);
@@ -143,16 +159,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
return cf;
}
if (errcode) {
- L->cframe = cframe_prev(cf);
L->base = frame_prevd(frame) + 1;
- unwindstack(L, frame);
+ L->cframe = cframe_prev(cf);
+ unwindstack(L, frame - LJ_FR2);
}
return cf;
case FRAME_CONT: /* Continuation frame. */
-#if LJ_HASFFI
- if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK)
+ if (frame_iscont_fficb(frame))
goto unwind_c;
-#endif
/* fallthrough */
case FRAME_VARG: /* Vararg frame. */
frame = frame_prevd(frame);
@@ -166,8 +180,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
}
if (frame_typep(frame) == FRAME_PCALL)
hook_leave(G(L));
- L->cframe = cf;
L->base = frame_prevd(frame) + 1;
+ L->cframe = cf;
unwindstack(L, L->base);
}
return (void *)((intptr_t)cf | CFRAME_UNWIND_FF);
@@ -175,8 +189,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
}
/* No C frame. */
if (errcode) {
+ L->base = tvref(L->stack)+1+LJ_FR2;
L->cframe = NULL;
- L->base = tvref(L->stack)+1;
unwindstack(L, L->base);
if (G(L)->panic)
G(L)->panic(L);
@@ -187,33 +201,206 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode)
/* -- External frame unwinding -------------------------------------------- */
-#if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_ABI_WIN
+#if LJ_ABI_WIN
/*
-** We have to use our own definitions instead of the mandatory (!) unwind.h,
-** since various OS, distros and compilers mess up the header installation.
+** Someone in Redmond owes me several days of my life. A lot of this is
+** undocumented or just plain wrong on MSDN. Some of it can be gathered
+** from 3rd party docs or must be found by trial-and-error. They really
+** don't want you to write your own language-specific exception handler
+** or to interact gracefully with MSVC. :-(
+**
+** Apparently MSVC doesn't call C++ destructors for foreign exceptions
+** unless you compile your C++ code with /EHa. Unfortunately this means
+** catch (...) also catches things like access violations. The use of
+** _set_se_translator doesn't really help, because it requires /EHa, too.
*/
-typedef struct _Unwind_Exception
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#if LJ_TARGET_X86
+typedef void *UndocumentedDispatcherContext; /* Unused on x86. */
+#else
+/* Taken from: http://www.nynaeve.net/?p=99 */
+typedef struct UndocumentedDispatcherContext {
+ ULONG64 ControlPc;
+ ULONG64 ImageBase;
+ PRUNTIME_FUNCTION FunctionEntry;
+ ULONG64 EstablisherFrame;
+ ULONG64 TargetIp;
+ PCONTEXT ContextRecord;
+ void (*LanguageHandler)(void);
+ PVOID HandlerData;
+ PUNWIND_HISTORY_TABLE HistoryTable;
+ ULONG ScopeIndex;
+ ULONG Fill0;
+} UndocumentedDispatcherContext;
+#endif
+
+/* Another wild guess. */
+extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
+
+#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT)
+/* Workaround for broken MinGW64 declaration. */
+VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
+#define RtlUnwindEx RtlUnwindEx_FIXED
+#endif
+
+#define LJ_MSVC_EXCODE ((DWORD)0xe06d7363)
+#define LJ_GCC_EXCODE ((DWORD)0x20474343)
+
+#define LJ_EXCODE ((DWORD)0xe24c4a00)
+#define LJ_EXCODE_MAKE(c) (LJ_EXCODE | (DWORD)(c))
+#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff)
+#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff))
+
+/* Windows exception handler for interpreter frame. */
+LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec,
+ void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
{
- uint64_t exclass;
- void (*excleanup)(int, struct _Unwind_Exception *);
- uintptr_t p1, p2;
-} __attribute__((__aligned__)) _Unwind_Exception;
+#if LJ_TARGET_X86
+ void *cf = (char *)f - CFRAME_OFS_SEH;
+#else
+ void *cf = f;
+#endif
+ lua_State *L = cframe_L(cf);
+ int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
+ LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
+ if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */
+ /* Unwind internal frames. */
+ err_unwind(L, cf, errcode);
+ } else {
+ void *cf2 = err_unwind(L, cf, 0);
+ if (cf2) { /* We catch it, so start unwinding the upper frames. */
+ if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
+ rec->ExceptionCode == LJ_GCC_EXCODE) {
+#if !LJ_TARGET_CYGWIN
+ __DestructExceptionObject(rec, 1);
+#endif
+ setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
+ } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
+ /* Don't catch access violations etc. */
+ return 1; /* ExceptionContinueSearch */
+ }
+#if LJ_TARGET_X86
+ UNUSED(ctx);
+ UNUSED(dispatch);
+ /* Call all handlers for all lower C frames (including ourselves) again
+ ** with EH_UNWINDING set. Then call the specified function, passing cf
+ ** and errcode.
+ */
+ lj_vm_rtlunwind(cf, (void *)rec,
+ (cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
+ (void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode);
+ /* lj_vm_rtlunwind does not return. */
+#else
+ /* Unwind the stack and call all handlers for all lower C frames
+ ** (including ourselves) again with EH_UNWINDING set. Then set
+ ** stack pointer = cf, result = errcode and jump to the specified target.
+ */
+ RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
+ lj_vm_unwind_ff_eh :
+ lj_vm_unwind_c_eh),
+ rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
+ /* RtlUnwindEx should never return. */
+#endif
+ }
+ }
+ return 1; /* ExceptionContinueSearch */
+}
+
+#if LJ_UNWIND_JIT
+
+#if LJ_TARGET_X64
+#define CONTEXT_REG_PC Rip
+#elif LJ_TARGET_ARM64
+#define CONTEXT_REG_PC Pc
+#else
+#error "NYI: Windows arch-specific unwinder for JIT-compiled code"
+#endif
+
+/* Windows unwinder for JIT-compiled code. */
+static void err_unwind_win_jit(global_State *g, int errcode)
+{
+ CONTEXT ctx;
+ UNWIND_HISTORY_TABLE hist;
+
+ memset(&hist, 0, sizeof(hist));
+ RtlCaptureContext(&ctx);
+ while (1) {
+ uintptr_t frame, base, addr = ctx.CONTEXT_REG_PC;
+ void *hdata;
+ PRUNTIME_FUNCTION func = RtlLookupFunctionEntry(addr, &base, &hist);
+ if (!func) { /* Found frame without .pdata: must be JIT-compiled code. */
+ ExitNo exitno;
+ uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno);
+ if (stub) { /* Jump to side exit to unwind the trace. */
+ ctx.CONTEXT_REG_PC = stub;
+ G2J(g)->exitcode = errcode;
+ RtlRestoreContext(&ctx, NULL); /* Does not return. */
+ }
+ break;
+ }
+ RtlVirtualUnwind(UNW_FLAG_NHANDLER, base, addr, func,
+ &ctx, &hdata, &frame, NULL);
+ if (!addr) break;
+ }
+ /* Unwinding failed, if we end up here. */
+}
+#endif
+
+/* Raise Windows exception. */
+static void err_raise_ext(global_State *g, int errcode)
+{
+#if LJ_UNWIND_JIT
+ if (tvref(g->jit_base)) {
+ err_unwind_win_jit(g, errcode);
+ return; /* Unwinding failed. */
+ }
+#elif LJ_HASJIT
+ /* Cannot catch on-trace errors for Windows/x86 SEH. Unwind to interpreter. */
+ setmref(g->jit_base, NULL);
+#endif
+ UNUSED(g);
+ RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
+}
+
+#elif !LJ_NO_UNWIND && (defined(__GNUC__) || defined(__clang__))
+
+/*
+** We have to use our own definitions instead of the mandatory (!) unwind.h,
+** since various OS, distros and compilers mess up the header installation.
+*/
typedef struct _Unwind_Context _Unwind_Context;
#define _URC_OK 0
+#define _URC_FATAL_PHASE2_ERROR 2
#define _URC_FATAL_PHASE1_ERROR 3
#define _URC_HANDLER_FOUND 6
#define _URC_INSTALL_CONTEXT 7
#define _URC_CONTINUE_UNWIND 8
#define _URC_FAILURE 9
+#define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */
+#define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c))
+#define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff)
+#define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff))
+
#if !LJ_TARGET_ARM
+typedef struct _Unwind_Exception
+{
+ uint64_t exclass;
+ void (*excleanup)(int, struct _Unwind_Exception *);
+ uintptr_t p1, p2;
+} __attribute__((__aligned__)) _Unwind_Exception;
+#define UNWIND_EXCEPTION_TYPE _Unwind_Exception
+
extern uintptr_t _Unwind_GetCFA(_Unwind_Context *);
extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t);
+extern uintptr_t _Unwind_GetIP(_Unwind_Context *);
extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t);
extern void _Unwind_DeleteException(_Unwind_Exception *);
extern int _Unwind_RaiseException(_Unwind_Exception *);
@@ -223,11 +410,6 @@ extern int _Unwind_RaiseException(_Unwind_Exception *);
#define _UA_HANDLER_FRAME 4
#define _UA_FORCE_UNWIND 8
-#define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */
-#define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c))
-#define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff)
-#define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff))
-
/* DWARF2 personality handler referenced from interpreter .eh_frame. */
LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx)
@@ -236,7 +418,6 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
lua_State *L;
if (version != 1)
return _URC_FATAL_PHASE1_ERROR;
- UNUSED(uexclass);
cf = (void *)_Unwind_GetCFA(ctx);
L = cframe_L(cf);
if ((actions & _UA_SEARCH_PHASE)) {
@@ -284,27 +465,162 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions,
** it on non-x64 because the interpreter restores all callee-saved regs.
*/
lj_err_throw(L, errcode);
+#if LJ_TARGET_X64
+#error "Broken build system -- only use the provided Makefiles!"
+#endif
#endif
}
return _URC_CONTINUE_UNWIND;
}
-#if LJ_UNWIND_EXT
-static __thread _Unwind_Exception static_uex;
+#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT)
+struct dwarf_eh_bases { void *tbase, *dbase, *func; };
+extern const void *_Unwind_Find_FDE(void *pc, struct dwarf_eh_bases *bases);
-/* Raise DWARF2 exception. */
-static void err_raise_ext(int errcode)
+/* Verify that external error handling actually has a chance to work. */
+void lj_err_verify(void)
{
- static_uex.exclass = LJ_UEXCLASS_MAKE(errcode);
- static_uex.excleanup = NULL;
- _Unwind_RaiseException(&static_uex);
+#if !LJ_TARGET_OSX
+ /* Check disabled on MacOS due to brilliant software engineering at Apple. */
+ struct dwarf_eh_bases ehb;
+ lj_assertX(_Unwind_Find_FDE((void *)lj_err_throw, &ehb), "broken build: external frame unwinding enabled, but missing -funwind-tables");
+#endif
+ /* Check disabled, because of broken Fedora/ARM64. See #722.
+ lj_assertX(_Unwind_Find_FDE((void *)_Unwind_RaiseException, &ehb), "broken build: external frame unwinding enabled, but system libraries have no unwind tables");
+ */
}
#endif
+#if LJ_UNWIND_JIT
+/* DWARF2 personality handler for JIT-compiled code. */
+static int err_unwind_jit(int version, int actions,
+ uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx)
+{
+ /* NYI: FFI C++ exception interoperability. */
+ if (version != 1 || !LJ_UEXCLASS_CHECK(uexclass))
+ return _URC_FATAL_PHASE1_ERROR;
+ if ((actions & _UA_SEARCH_PHASE)) {
+ return _URC_HANDLER_FOUND;
+ }
+ if ((actions & _UA_CLEANUP_PHASE)) {
+ global_State *g = *(global_State **)(uex+1);
+ ExitNo exitno;
+ uintptr_t addr = _Unwind_GetIP(ctx); /* Return address _after_ call. */
+ uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno);
+ lj_assertG(tvref(g->jit_base), "unexpected throw across mcode frame");
+ if (stub) { /* Jump to side exit to unwind the trace. */
+ G2J(g)->exitcode = LJ_UEXCLASS_ERRCODE(uexclass);
+#ifdef LJ_TARGET_MIPS
+ _Unwind_SetGR(ctx, 4, stub);
+ _Unwind_SetGR(ctx, 5, exitno);
+ _Unwind_SetIP(ctx, (uintptr_t)(void *)lj_vm_unwind_stub);
+#else
+ _Unwind_SetIP(ctx, stub);
+#endif
+ return _URC_INSTALL_CONTEXT;
+ }
+ return _URC_FATAL_PHASE2_ERROR;
+ }
+ return _URC_FATAL_PHASE1_ERROR;
+}
+
+/* DWARF2 template frame info for JIT-compiled code.
+**
+** After copying the template to the start of the mcode segment,
+** the frame handler function and the code size is patched.
+** The frame handler always installs a new context to jump to the exit,
+** so don't bother to add any unwind opcodes.
+*/
+static const uint8_t err_frame_jit_template[] = {
+#if LJ_BE
+ 0,0,0,
+#endif
+ LJ_64 ? 0x1c : 0x14, /* CIE length. */
+#if LJ_LE
+ 0,0,0,
+#endif
+ 0,0,0,0, 1, 'z','P','R',0, /* CIE mark, CIE version, augmentation. */
+ 1, LJ_64 ? 0x78 : 0x7c, LJ_TARGET_EHRAREG, /* Code/data align, RA. */
+#if LJ_64
+ 10, 0, 0,0,0,0,0,0,0,0, 0x1b, /* Aug. data ABS handler, PCREL|SDATA4 code. */
+ 0,0,0,0,0, /* Alignment. */
+#else
+ 6, 0, 0,0,0,0, 0x1b, /* Aug. data ABS handler, PCREL|SDATA4 code. */
+ 0, /* Alignment. */
+#endif
+#if LJ_BE
+ 0,0,0,
+#endif
+ LJ_64 ? 0x14 : 0x10, /* FDE length. */
+ 0,0,0,
+ LJ_64 ? 0x24 : 0x1c, /* CIE offset. */
+ 0,0,0,
+ LJ_64 ? 0x14 : 0x10, /* Code offset. After Final FDE. */
+#if LJ_LE
+ 0,0,0,
+#endif
+ 0,0,0,0, 0, 0,0,0, /* Code size, augmentation length, alignment. */
+#if LJ_64
+ 0,0,0,0, /* Alignment. */
+#endif
+ 0,0,0,0 /* Final FDE. */
+};
+
+#define ERR_FRAME_JIT_OFS_HANDLER 0x12
+#define ERR_FRAME_JIT_OFS_FDE (LJ_64 ? 0x20 : 0x18)
+#define ERR_FRAME_JIT_OFS_CODE_SIZE (LJ_64 ? 0x2c : 0x24)
+#if LJ_TARGET_OSX
+#define ERR_FRAME_JIT_OFS_REGISTER ERR_FRAME_JIT_OFS_FDE
#else
+#define ERR_FRAME_JIT_OFS_REGISTER 0
+#endif
+
+extern void __register_frame(const void *);
+extern void __deregister_frame(const void *);
+
+uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info)
+{
+ void **handler;
+ memcpy(info, err_frame_jit_template, sizeof(err_frame_jit_template));
+ handler = (void *)err_unwind_jit;
+ memcpy(info + ERR_FRAME_JIT_OFS_HANDLER, &handler, sizeof(handler));
+ *(uint32_t *)(info + ERR_FRAME_JIT_OFS_CODE_SIZE) =
+ (uint32_t)(sz - sizeof(err_frame_jit_template) - (info - (uint8_t *)base));
+ __register_frame(info + ERR_FRAME_JIT_OFS_REGISTER);
+#ifdef LUA_USE_ASSERT
+ {
+ struct dwarf_eh_bases ehb;
+ lj_assertX(_Unwind_Find_FDE(info + sizeof(err_frame_jit_template)+1, &ehb),
+ "bad JIT unwind table registration");
+ }
+#endif
+ return info + sizeof(err_frame_jit_template);
+}
+
+void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info)
+{
+ UNUSED(base); UNUSED(sz);
+ __deregister_frame(info + ERR_FRAME_JIT_OFS_REGISTER);
+}
+#endif
+
+#else /* LJ_TARGET_ARM */
+
+#define _US_VIRTUAL_UNWIND_FRAME 0
+#define _US_UNWIND_FRAME_STARTING 1
+#define _US_ACTION_MASK 3
+#define _US_FORCE_UNWIND 8
+
+typedef struct _Unwind_Control_Block _Unwind_Control_Block;
+#define UNWIND_EXCEPTION_TYPE _Unwind_Control_Block
+
+struct _Unwind_Control_Block {
+ uint64_t exclass;
+ uint32_t misc[20];
+};
-extern void _Unwind_DeleteException(void *);
-extern int __gnu_unwind_frame (void *, _Unwind_Context *);
+extern int _Unwind_RaiseException(_Unwind_Control_Block *);
+extern int __gnu_unwind_frame(_Unwind_Control_Block *, _Unwind_Context *);
extern int _Unwind_VRS_Set(_Unwind_Context *, int, uint32_t, int, void *);
extern int _Unwind_VRS_Get(_Unwind_Context *, int, uint32_t, int, void *);
@@ -320,126 +636,98 @@ static inline void _Unwind_SetGR(_Unwind_Context *ctx, int r, uint32_t v)
_Unwind_VRS_Set(ctx, 0, r, 0, &v);
}
-#define _US_VIRTUAL_UNWIND_FRAME 0
-#define _US_UNWIND_FRAME_STARTING 1
-#define _US_ACTION_MASK 3
-#define _US_FORCE_UNWIND 8
+extern void lj_vm_unwind_ext(void);
/* ARM unwinder personality handler referenced from interpreter .ARM.extab. */
-LJ_FUNCA int lj_err_unwind_arm(int state, void *ucb, _Unwind_Context *ctx)
+LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb,
+ _Unwind_Context *ctx)
{
void *cf = (void *)_Unwind_GetGR(ctx, 13);
lua_State *L = cframe_L(cf);
- if ((state & _US_ACTION_MASK) == _US_VIRTUAL_UNWIND_FRAME) {
- setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
+ int errcode;
+
+ switch ((state & _US_ACTION_MASK)) {
+ case _US_VIRTUAL_UNWIND_FRAME:
+ if ((state & _US_FORCE_UNWIND)) break;
return _URC_HANDLER_FOUND;
- }
- if ((state&(_US_ACTION_MASK|_US_FORCE_UNWIND)) == _US_UNWIND_FRAME_STARTING) {
- _Unwind_DeleteException(ucb);
- _Unwind_SetGR(ctx, 15, (uint32_t)(void *)lj_err_throw);
- _Unwind_SetGR(ctx, 0, (uint32_t)L);
- _Unwind_SetGR(ctx, 1, (uint32_t)LUA_ERRRUN);
+ case _US_UNWIND_FRAME_STARTING:
+ if (LJ_UEXCLASS_CHECK(ucb->exclass)) {
+ errcode = LJ_UEXCLASS_ERRCODE(ucb->exclass);
+ } else {
+ errcode = LUA_ERRRUN;
+ setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
+ }
+ cf = err_unwind(L, cf, errcode);
+ if ((state & _US_FORCE_UNWIND) || cf == NULL) break;
+ _Unwind_SetGR(ctx, 15, (uint32_t)lj_vm_unwind_ext);
+ _Unwind_SetGR(ctx, 0, (uint32_t)ucb);
+ _Unwind_SetGR(ctx, 1, (uint32_t)errcode);
+ _Unwind_SetGR(ctx, 2, cframe_unwind_ff(cf) ?
+ (uint32_t)lj_vm_unwind_ff_eh :
+ (uint32_t)lj_vm_unwind_c_eh);
return _URC_INSTALL_CONTEXT;
+ default:
+ return _URC_FAILURE;
}
if (__gnu_unwind_frame(ucb, ctx) != _URC_OK)
return _URC_FAILURE;
+#ifdef LUA_USE_ASSERT
+ /* We should never get here unless this is a forced unwind aka backtrace. */
+ if (_Unwind_GetGR(ctx, 0) == 0xff33aa77) {
+ _Unwind_SetGR(ctx, 0, 0xff33aa88);
+ }
+#endif
return _URC_CONTINUE_UNWIND;
}
-#endif
+#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT)
+typedef int (*_Unwind_Trace_Fn)(_Unwind_Context *, void *);
+extern int _Unwind_Backtrace(_Unwind_Trace_Fn, void *);
+
+static int err_verify_bt(_Unwind_Context *ctx, int *got)
+{
+ if (_Unwind_GetGR(ctx, 0) == 0xff33aa88) { *got = 2; }
+ else if (*got == 0) { *got = 1; _Unwind_SetGR(ctx, 0, 0xff33aa77); }
+ return _URC_OK;
+}
-#elif LJ_TARGET_X64 && LJ_ABI_WIN
+/* Verify that external error handling actually has a chance to work. */
+void lj_err_verify(void)
+{
+ int got = 0;
+ _Unwind_Backtrace((_Unwind_Trace_Fn)err_verify_bt, &got);
+ lj_assertX(got == 2, "broken build: external frame unwinding enabled, but missing -funwind-tables");
+}
+#endif
/*
-** Someone in Redmond owes me several days of my life. A lot of this is
-** undocumented or just plain wrong on MSDN. Some of it can be gathered
-** from 3rd party docs or must be found by trial-and-error. They really
-** don't want you to write your own language-specific exception handler
-** or to interact gracefully with MSVC. :-(
+** Note: LJ_UNWIND_JIT is not implemented for 32 bit ARM.
**
-** Apparently MSVC doesn't call C++ destructors for foreign exceptions
-** unless you compile your C++ code with /EHa. Unfortunately this means
-** catch (...) also catches things like access violations. The use of
-** _set_se_translator doesn't really help, because it requires /EHa, too.
+** The quirky ARM unwind API doesn't have __register_frame().
+** A potential workaround might involve _Unwind_Backtrace.
+** But most 32 bit ARM targets don't qualify for LJ_UNWIND_EXT, anyway,
+** since they are built without unwind tables by default.
*/
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
+#endif /* LJ_TARGET_ARM */
-/* Taken from: http://www.nynaeve.net/?p=99 */
-typedef struct UndocumentedDispatcherContext {
- ULONG64 ControlPc;
- ULONG64 ImageBase;
- PRUNTIME_FUNCTION FunctionEntry;
- ULONG64 EstablisherFrame;
- ULONG64 TargetIp;
- PCONTEXT ContextRecord;
- void (*LanguageHandler)(void);
- PVOID HandlerData;
- PUNWIND_HISTORY_TABLE HistoryTable;
- ULONG ScopeIndex;
- ULONG Fill0;
-} UndocumentedDispatcherContext;
-
-/* Another wild guess. */
-extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow);
-
-#ifdef MINGW_SDK_INIT
-/* Workaround for broken MinGW64 declaration. */
-VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx");
-#define RtlUnwindEx RtlUnwindEx_FIXED
-#endif
-
-#define LJ_MSVC_EXCODE ((DWORD)0xe06d7363)
-#define LJ_GCC_EXCODE ((DWORD)0x20474343)
-#define LJ_EXCODE ((DWORD)0xe24c4a00)
-#define LJ_EXCODE_MAKE(c) (LJ_EXCODE | (DWORD)(c))
-#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff)
-#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff))
+#if LJ_UNWIND_EXT
+static __thread struct {
+ UNWIND_EXCEPTION_TYPE ex;
+ global_State *g;
+} static_uex;
-/* Win64 exception handler for interpreter frame. */
-LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec,
- void *cf, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch)
+/* Raise external exception. */
+static void err_raise_ext(global_State *g, int errcode)
{
- lua_State *L = cframe_L(cf);
- int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ?
- LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN;
- if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */
- /* Unwind internal frames. */
- err_unwind(L, cf, errcode);
- } else {
- void *cf2 = err_unwind(L, cf, 0);
- if (cf2) { /* We catch it, so start unwinding the upper frames. */
- if (rec->ExceptionCode == LJ_MSVC_EXCODE ||
- rec->ExceptionCode == LJ_GCC_EXCODE) {
-#if LJ_TARGET_WINDOWS
- __DestructExceptionObject(rec, 1);
-#endif
- setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP));
- } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) {
- /* Don't catch access violations etc. */
- return ExceptionContinueSearch;
- }
- /* Unwind the stack and call all handlers for all lower C frames
- ** (including ourselves) again with EH_UNWINDING set. Then set
- ** rsp = cf, rax = errcode and jump to the specified target.
- */
- RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ?
- lj_vm_unwind_ff_eh :
- lj_vm_unwind_c_eh),
- rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable);
- /* RtlUnwindEx should never return. */
- }
- }
- return ExceptionContinueSearch;
+ memset(&static_uex, 0, sizeof(static_uex));
+ static_uex.ex.exclass = LJ_UEXCLASS_MAKE(errcode);
+ static_uex.g = g;
+ _Unwind_RaiseException(&static_uex.ex);
}
-/* Raise Windows exception. */
-static void err_raise_ext(int errcode)
-{
- RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL);
-}
+#endif
#endif
@@ -450,22 +738,23 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode)
{
global_State *g = G(L);
lj_trace_abort(g);
- setgcrefnull(g->jit_L);
- L->status = 0;
+ L->status = LUA_OK;
#if LJ_UNWIND_EXT
- err_raise_ext(errcode);
+ err_raise_ext(g, errcode);
/*
** A return from this function signals a corrupt C stack that cannot be
** unwound. We have no choice but to call the panic function and exit.
**
** Usually this is caused by a C function without unwind information.
- ** This should never happen on x64, but may happen if you've manually
- ** enabled LUAJIT_UNWIND_EXTERNAL and forgot to recompile *every*
- ** non-C++ file with -funwind-tables.
+ ** This may happen if you've manually enabled LUAJIT_UNWIND_EXTERNAL
+ ** and forgot to recompile *every* non-C++ file with -funwind-tables.
*/
if (G(L)->panic)
G(L)->panic(L);
#else
+#if LJ_HASJIT
+ setmref(g->jit_base, NULL);
+#endif
{
void *cf = err_unwind(L, NULL, errcode);
if (cframe_unwind_ff(cf))
@@ -495,7 +784,7 @@ LJ_NOINLINE void lj_err_mem(lua_State *L)
/* Find error function for runtime errors. Requires an extra stack traversal. */
static ptrdiff_t finderrfunc(lua_State *L)
{
- cTValue *frame = L->base-1, *bot = tvref(L->stack);
+ cTValue *frame = L->base-1, *bot = tvref(L->stack)+LJ_FR2;
void *cf = L->cframe;
while (frame > bot && cf) {
while (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */
@@ -519,10 +808,8 @@ static ptrdiff_t finderrfunc(lua_State *L)
frame = frame_prevd(frame);
break;
case FRAME_CONT:
-#if LJ_HASFFI
- if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK)
+ if (frame_iscont_fficb(frame))
cf = cframe_prev(cf);
-#endif
frame = frame_prevd(frame);
break;
case FRAME_CP:
@@ -534,11 +821,11 @@ static ptrdiff_t finderrfunc(lua_State *L)
break;
case FRAME_PCALL:
case FRAME_PCALLH:
- if (frame_ftsz(frame) >= (ptrdiff_t)(2*sizeof(TValue))) /* xpcall? */
- return savestack(L, frame-1); /* Point to xpcall's errorfunc. */
+ if (frame_func(frame_prevd(frame))->c.ffid == FF_xpcall)
+ return savestack(L, frame_prevd(frame)+1); /* xpcall's errorfunc. */
return 0;
default:
- lua_assert(0);
+ lj_assertL(0, "bad frame type");
return 0;
}
}
@@ -548,7 +835,7 @@ static ptrdiff_t finderrfunc(lua_State *L)
/* Runtime error. */
LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L)
{
- ptrdiff_t ef = finderrfunc(L);
+ ptrdiff_t ef = (LJ_HASJIT && tvref(G(L)->jit_base)) ? 0 : finderrfunc(L);
if (ef) {
TValue *errfunc = restorestack(L, ef);
TValue *top = L->top;
@@ -558,14 +845,25 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L)
lj_err_throw(L, LUA_ERRERR);
}
L->status = LUA_ERRERR;
- copyTV(L, top, top-1);
+ copyTV(L, top+LJ_FR2, top-1);
copyTV(L, top-1, errfunc);
+ if (LJ_FR2) setnilV(top++);
L->top = top+1;
lj_vm_call(L, top, 1+1); /* Stack: |errfunc|msg| -> |msg| */
}
lj_err_throw(L, LUA_ERRRUN);
}
+#if LJ_HASJIT
+LJ_NOINLINE void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode)
+{
+ if (errcode == LUA_ERRRUN)
+ lj_err_run(L);
+ else
+ lj_err_throw(L, errcode);
+}
+#endif
+
/* Formatted runtime error message. */
LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...)
{
@@ -573,7 +871,7 @@ LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...)
va_list argp;
va_start(argp, em);
if (curr_funcisL(L)) L->top = curr_topL(L);
- msg = lj_str_pushvf(L, err2msg(em), argp);
+ msg = lj_strfmt_pushvf(L, err2msg(em), argp);
va_end(argp);
lj_debug_addloc(L, msg, L->base-1, NULL);
lj_err_run(L);
@@ -591,11 +889,11 @@ LJ_NOINLINE void lj_err_lex(lua_State *L, GCstr *src, const char *tok,
{
char buff[LUA_IDSIZE];
const char *msg;
- lj_debug_shortname(buff, src);
- msg = lj_str_pushvf(L, err2msg(em), argp);
- msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg);
+ lj_debug_shortname(buff, src, line);
+ msg = lj_strfmt_pushvf(L, err2msg(em), argp);
+ msg = lj_strfmt_pushf(L, "%s:%d: %s", buff, line, msg);
if (tok)
- lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok);
+ lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok);
lj_err_throw(L, LUA_ERRSYNTAX);
}
@@ -634,8 +932,9 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o)
const BCIns *pc = cframe_Lpc(L);
if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) {
const char *tname = lj_typename(o);
+ setframe_gc(o, obj2gco(L), LJ_TTHREAD);
+ if (LJ_FR2) o++;
setframe_pc(o, pc);
- setframe_gc(o, obj2gco(L));
L->top = L->base = o+1;
err_msgv(L, LJ_ERR_BADCALL, tname);
}
@@ -645,28 +944,27 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o)
/* Error in context of caller. */
LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg)
{
- TValue *frame = L->base-1;
- TValue *pframe = NULL;
- if (frame_islua(frame)) {
- pframe = frame_prevl(frame);
- } else if (frame_iscont(frame)) {
+ TValue *frame = NULL, *pframe = NULL;
+ if (!(LJ_HASJIT && tvref(G(L)->jit_base))) {
+ frame = L->base-1;
+ if (frame_islua(frame)) {
+ pframe = frame_prevl(frame);
+ } else if (frame_iscont(frame)) {
+ if (frame_iscont_fficb(frame)) {
+ pframe = frame;
+ frame = NULL;
+ } else {
+ pframe = frame_prevd(frame);
#if LJ_HASFFI
- if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK) {
- pframe = frame;
- frame = NULL;
- } else
+ /* Remove frame for FFI metamethods. */
+ if (frame_func(frame)->c.ffid >= FF_ffi_meta___index &&
+ frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) {
+ L->base = pframe+1;
+ L->top = frame;
+ setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame));
+ }
#endif
- {
- pframe = frame_prevd(frame);
-#if LJ_HASFFI
- /* Remove frame for FFI metamethods. */
- if (frame_func(frame)->c.ffid >= FF_ffi_meta___index &&
- frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) {
- L->base = pframe+1;
- L->top = frame;
- setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame));
}
-#endif
}
}
lj_debug_addloc(L, msg, pframe, frame);
@@ -679,7 +977,7 @@ LJ_NOINLINE void lj_err_callerv(lua_State *L, ErrMsg em, ...)
const char *msg;
va_list argp;
va_start(argp, em);
- msg = lj_str_pushvf(L, err2msg(em), argp);
+ msg = lj_strfmt_pushvf(L, err2msg(em), argp);
va_end(argp);
lj_err_callermsg(L, msg);
}
@@ -699,9 +997,9 @@ LJ_NORET LJ_NOINLINE static void err_argmsg(lua_State *L, int narg,
if (narg < 0 && narg > LUA_REGISTRYINDEX)
narg = (int)(L->top - L->base) + narg + 1;
if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */
- msg = lj_str_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg);
+ msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg);
else
- msg = lj_str_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg);
+ msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg);
lj_err_callermsg(L, msg);
}
@@ -711,7 +1009,7 @@ LJ_NOINLINE void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...)
const char *msg;
va_list argp;
va_start(argp, em);
- msg = lj_str_pushvf(L, err2msg(em), argp);
+ msg = lj_strfmt_pushvf(L, err2msg(em), argp);
va_end(argp);
err_argmsg(L, narg, msg);
}
@@ -741,7 +1039,7 @@ LJ_NOINLINE void lj_err_argtype(lua_State *L, int narg, const char *xname)
TValue *o = narg < 0 ? L->top + narg : L->base + narg-1;
tname = o < L->top ? lj_typename(o) : lj_obj_typename[0];
}
- msg = lj_str_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname);
+ msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname);
err_argmsg(L, narg, msg);
}
@@ -791,7 +1089,7 @@ LUALIB_API int luaL_error(lua_State *L, const char *fmt, ...)
const char *msg;
va_list argp;
va_start(argp, fmt);
- msg = lj_str_pushvf(L, fmt, argp);
+ msg = lj_strfmt_pushvf(L, fmt, argp);
va_end(argp);
lj_err_callermsg(L, msg);
return 0; /* unreachable */
diff --git a/src/lj_err.h b/src/lj_err.h
index 59253b58..bd4de9ae 100644
--- a/src/lj_err.h
+++ b/src/lj_err.h
@@ -23,7 +23,10 @@ LJ_DATA const char *lj_err_allmsg;
LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em);
LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode);
LJ_FUNC_NORET void lj_err_mem(lua_State *L);
-LJ_FUNCA_NORET void LJ_FASTCALL lj_err_run(lua_State *L);
+LJ_FUNC_NORET void LJ_FASTCALL lj_err_run(lua_State *L);
+#if LJ_HASJIT
+LJ_FUNCA_NORET void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode);
+#endif
LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em);
LJ_FUNC_NORET void lj_err_lex(lua_State *L, GCstr *src, const char *tok,
BCLine line, ErrMsg em, va_list argp);
@@ -38,4 +41,18 @@ LJ_FUNC_NORET void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...);
LJ_FUNC_NORET void lj_err_argtype(lua_State *L, int narg, const char *xname);
LJ_FUNC_NORET void lj_err_argt(lua_State *L, int narg, int tt);
+#if LJ_UNWIND_JIT && !LJ_ABI_WIN
+LJ_FUNC uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info);
+LJ_FUNC void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info);
+#else
+#define lj_err_register_mcode(base, sz, info) (info)
+#define lj_err_deregister_mcode(base, sz, info) UNUSED(base)
+#endif
+
+#if LJ_UNWIND_EXT && !LJ_ABI_WIN && defined(LUA_USE_ASSERT)
+LJ_FUNC void lj_err_verify(void);
+#else
+#define lj_err_verify() ((void)0)
+#endif
+
#endif
diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h
index 2c26a4f1..2e5c776a 100644
--- a/src/lj_errmsg.h
+++ b/src/lj_errmsg.h
@@ -67,6 +67,7 @@ ERRDEF(PROTMT, "cannot change a protected metatable")
ERRDEF(UNPACK, "too many results to unpack")
ERRDEF(RDRSTR, "reader function must return a string")
ERRDEF(PRTOSTR, LUA_QL("tostring") " must return a string to " LUA_QL("print"))
+ERRDEF(NUMRNG, "number out of range")
ERRDEF(IDXRNG, "index out of range")
ERRDEF(BASERNG, "base out of range")
ERRDEF(LVLRNG, "level out of range")
@@ -96,18 +97,12 @@ ERRDEF(STRPATX, "pattern too complex")
ERRDEF(STRCAPI, "invalid capture index")
ERRDEF(STRCAPN, "too many captures")
ERRDEF(STRCAPU, "unfinished capture")
-ERRDEF(STRFMTO, "invalid option " LUA_QL("%%%c") " to " LUA_QL("format"))
-ERRDEF(STRFMTR, "invalid format (repeated flags)")
-ERRDEF(STRFMTW, "invalid format (width or precision too long)")
+ERRDEF(STRFMT, "invalid option " LUA_QS " to " LUA_QL("format"))
ERRDEF(STRGSRV, "invalid replacement value (a %s)")
ERRDEF(BADMODN, "name conflict for module " LUA_QS)
#if LJ_HASJIT
ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?")
-#if LJ_TARGET_X86ORX64
-ERRDEF(NOJIT, "JIT compiler disabled, CPU does not support SSE2")
-#else
ERRDEF(NOJIT, "JIT compiler disabled")
-#endif
#elif defined(LJ_ARCH_NOJIT)
ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)")
#else
@@ -118,7 +113,6 @@ ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS)
/* Lexer/parser errors. */
ERRDEF(XMODE, "attempt to load chunk with wrong mode")
ERRDEF(XNEAR, "%s near " LUA_QS)
-ERRDEF(XELEM, "lexical element too long")
ERRDEF(XLINES, "chunk has too many lines")
ERRDEF(XLEVELS, "chunk has too many syntax levels")
ERRDEF(XNUMBER, "malformed number")
@@ -186,6 +180,19 @@ ERRDEF(FFI_NYIPACKBIT, "NYI: packed bit fields")
ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)")
#endif
+#if LJ_HASBUFFER
+/* String buffer errors. */
+ERRDEF(BUFFER_SELF, "cannot put buffer into itself")
+ERRDEF(BUFFER_BADOPT, "bad options table")
+ERRDEF(BUFFER_BADENC, "cannot serialize " LUA_QS)
+ERRDEF(BUFFER_BADDEC, "cannot deserialize tag 0x%02x")
+ERRDEF(BUFFER_BADDICTX, "cannot deserialize dictionary index %d")
+ERRDEF(BUFFER_DEPTH, "too deep to serialize")
+ERRDEF(BUFFER_DUPKEY, "duplicate table key")
+ERRDEF(BUFFER_EOB, "unexpected end of buffer")
+ERRDEF(BUFFER_LEFTOV, "left-over data in buffer")
+#endif
+
#undef ERRDEF
/* Detecting unused error messages:
diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c
index f833bc16..60c1d84f 100644
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -11,6 +11,7 @@
#if LJ_HASJIT
#include "lj_err.h"
+#include "lj_buf.h"
#include "lj_str.h"
#include "lj_tab.h"
#include "lj_frame.h"
@@ -27,6 +28,8 @@
#include "lj_dispatch.h"
#include "lj_vm.h"
#include "lj_strscan.h"
+#include "lj_strfmt.h"
+#include "lj_serialize.h"
/* Some local macros to save typing. Undef'd at the end. */
#define IR(ref) (&J->cur.ir[(ref)])
@@ -79,10 +82,7 @@ static GCstr *argv2str(jit_State *J, TValue *o)
GCstr *s;
if (!tvisnumber(o))
lj_trace_err(J, LJ_TRERR_BADTYPE);
- if (tvisint(o))
- s = lj_str_fromint(J->L, intV(o));
- else
- s = lj_str_fromnum(J->L, &o->n);
+ s = lj_strfmt_number(J->L, o);
setstrV(J->L, o, s);
return s;
}
@@ -98,27 +98,102 @@ static ptrdiff_t results_wanted(jit_State *J)
return -1;
}
-/* Throw error for unsupported variant of fast function. */
-LJ_NORET static void recff_nyiu(jit_State *J)
+/* Trace stitching: add continuation below frame to start a new trace. */
+static void recff_stitch(jit_State *J)
{
- setfuncV(J->L, &J->errinfo, J->fn);
- lj_trace_err_info(J, LJ_TRERR_NYIFFU);
+ ASMFunction cont = lj_cont_stitch;
+ lua_State *L = J->L;
+ TValue *base = L->base;
+ BCReg nslot = J->maxslot + 1 + LJ_FR2;
+ TValue *nframe = base + 1 + LJ_FR2;
+ const BCIns *pc = frame_pc(base-1);
+ TValue *pframe = frame_prevl(base-1);
+
+ /* Check for this now. Throwing in lj_record_stop messes up the stack. */
+ if (J->cur.nsnap >= (MSize)J->param[JIT_P_maxsnap])
+ lj_trace_err(J, LJ_TRERR_SNAPOV);
+
+ /* Move func + args up in Lua stack and insert continuation. */
+ memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot);
+ setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT);
+ setcont(base-LJ_FR2, cont);
+ setframe_pc(base, pc);
+ setnilV(base-1-LJ_FR2); /* Incorrect, but rec_check_slots() won't run anymore. */
+ L->base += 2 + LJ_FR2;
+ L->top += 2 + LJ_FR2;
+
+ /* Ditto for the IR. */
+ memmove(&J->base[1], &J->base[-1-LJ_FR2], sizeof(TRef)*nslot);
+#if LJ_FR2
+ J->base[2] = TREF_FRAME;
+ J->base[-1] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont)));
+ J->base[0] = lj_ir_k64(J, IR_KNUM, u64ptr(pc)) | TREF_CONT;
+#else
+ J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT;
+#endif
+ J->ktrace = tref_ref((J->base[-1-LJ_FR2] = lj_ir_ktrace(J)));
+ J->base += 2 + LJ_FR2;
+ J->baseslot += 2 + LJ_FR2;
+ J->framedepth++;
+
+ lj_record_stop(J, LJ_TRLINK_STITCH, 0);
+
+ /* Undo Lua stack changes. */
+ memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot);
+ setframe_pc(base-1, pc);
+ L->base -= 2 + LJ_FR2;
+ L->top -= 2 + LJ_FR2;
}
-/* Fallback handler for all fast functions that are not recorded (yet). */
+/* Fallback handler for fast functions that are not recorded (yet). */
static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd)
{
- setfuncV(J->L, &J->errinfo, J->fn);
- lj_trace_err_info(J, LJ_TRERR_NYIFF);
- UNUSED(rd);
+ if (J->cur.nins < (IRRef)J->param[JIT_P_minstitch] + REF_BASE) {
+ lj_trace_err_info(J, LJ_TRERR_TRACEUV);
+ } else {
+ /* Can only stitch from Lua call. */
+ if (J->framedepth && frame_islua(J->L->base-1)) {
+ BCOp op = bc_op(*frame_pc(J->L->base-1));
+ /* Stitched trace cannot start with *M op with variable # of args. */
+ if (!(op == BC_CALLM || op == BC_CALLMT ||
+ op == BC_RETM || op == BC_TSETM)) {
+ switch (J->fn->c.ffid) {
+ case FF_error:
+ case FF_debug_sethook:
+ case FF_jit_flush:
+ break; /* Don't stitch across special builtins. */
+ default:
+ recff_stitch(J); /* Use trace stitching. */
+ rd->nres = -1;
+ return;
+ }
+ }
+ }
+ /* Otherwise stop trace and return to interpreter. */
+ lj_record_stop(J, LJ_TRLINK_RETURN, 0);
+ rd->nres = -1;
+ }
}
-/* C functions can have arbitrary side-effects and are not recorded (yet). */
-static void LJ_FASTCALL recff_c(jit_State *J, RecordFFData *rd)
+/* Fallback handler for unsupported variants of fast functions. */
+#define recff_nyiu recff_nyi
+
+/* Must stop the trace for classic C functions with arbitrary side-effects. */
+#define recff_c recff_nyi
+
+/* Emit BUFHDR for the global temporary buffer. */
+static TRef recff_bufhdr(jit_State *J)
{
- setfuncV(J->L, &J->errinfo, J->fn);
- lj_trace_err_info(J, LJ_TRERR_NYICF);
- UNUSED(rd);
+ return emitir(IRT(IR_BUFHDR, IRT_PGC),
+ lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
+}
+
+/* Emit TMPREF. */
+static TRef recff_tmpref(jit_State *J, TRef tr, int mode)
+{
+ if (!LJ_DUALNUM && tref_isinteger(tr))
+ tr = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT);
+ return emitir(IRT(IR_TMPREF, IRT_PGC), tr, mode);
}
/* -- Base library fast functions ----------------------------------------- */
@@ -135,7 +210,7 @@ static void LJ_FASTCALL recff_type(jit_State *J, RecordFFData *rd)
uint32_t t;
if (tvisnumber(&rd->argv[0]))
t = ~LJ_TNUMX;
- else if (LJ_64 && tvislightud(&rd->argv[0]))
+ else if (LJ_64 && !LJ_GC64 && tvislightud(&rd->argv[0]))
t = ~LJ_TLIGHTUD;
else
t = ~itype(&rd->argv[0]);
@@ -167,7 +242,7 @@ static void LJ_FASTCALL recff_setmetatable(jit_State *J, RecordFFData *rd)
ix.tab = tr;
copyTV(J->L, &ix.tabv, &rd->argv[0]);
lj_record_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable. */
- fref = emitir(IRT(IR_FREF, IRT_P32), tr, IRFL_TAB_META);
+ fref = emitir(IRT(IR_FREF, IRT_PGC), tr, IRFL_TAB_META);
mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt;
emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref);
if (!tref_isnil(mt))
@@ -220,7 +295,7 @@ static void LJ_FASTCALL recff_rawlen(jit_State *J, RecordFFData *rd)
if (tref_isstr(tr))
J->base[0] = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN);
else if (tref_istab(tr))
- J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, tr);
+ J->base[0] = emitir(IRTI(IR_ALEN), tr, TREF_NIL);
/* else: Interpreter will throw. */
UNUSED(rd);
}
@@ -233,9 +308,9 @@ int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv)
if (strV(tv)->len == 1) {
emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, strV(tv)));
} else {
- TRef trptr = emitir(IRT(IR_STRREF, IRT_P32), tr, lj_ir_kint(J, 0));
+ TRef trptr = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0));
TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY);
- emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#'));
+ emitir(IRTGI(IR_EQ), trchar, lj_ir_kint(J, '#'));
}
return 0;
} else { /* select(n, ...) */
@@ -263,7 +338,8 @@ static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd)
J->base[i] = J->base[start+i];
} /* else: Interpreter will throw. */
} else {
- recff_nyiu(J);
+ recff_nyiu(J, rd);
+ return;
}
} /* else: Interpreter will throw. */
}
@@ -274,14 +350,18 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd)
TRef base = J->base[1];
if (tr && !tref_isnil(base)) {
base = lj_opt_narrow_toint(J, base);
- if (!tref_isk(base) || IR(tref_ref(base))->i != 10)
- recff_nyiu(J);
+ if (!tref_isk(base) || IR(tref_ref(base))->i != 10) {
+ recff_nyiu(J, rd);
+ return;
+ }
}
if (tref_isnumber_str(tr)) {
if (tref_isstr(tr)) {
TValue tmp;
- if (!lj_strscan_num(strV(&rd->argv[0]), &tmp))
- recff_nyiu(J); /* Would need an inverted STRTO for this case. */
+ if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) {
+ recff_nyiu(J, rd); /* Would need an inverted STRTO for this case. */
+ return;
+ }
tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
}
#if LJ_HASFFI
@@ -313,10 +393,10 @@ static int recff_metacall(jit_State *J, RecordFFData *rd, MMS mm)
int errcode;
TValue argv0;
/* Temporarily insert metamethod below object. */
- J->base[1] = J->base[0];
+ J->base[1+LJ_FR2] = J->base[0];
J->base[0] = ix.mobj;
copyTV(J->L, &argv0, &rd->argv[0]);
- copyTV(J->L, &rd->argv[1], &rd->argv[0]);
+ copyTV(J->L, &rd->argv[1+LJ_FR2], &rd->argv[0]);
copyTV(J->L, &rd->argv[0], &ix.mobjv);
/* Need to protect lj_record_tailcall because it may throw. */
errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp);
@@ -336,13 +416,15 @@ static void LJ_FASTCALL recff_tostring(jit_State *J, RecordFFData *rd)
if (tref_isstr(tr)) {
/* Ignore __tostring in the string base metatable. */
/* Pass on result in J->base[0]. */
- } else if (!recff_metacall(J, rd, MM_tostring)) {
+ } else if (tr && !recff_metacall(J, rd, MM_tostring)) {
if (tref_isnumber(tr)) {
- J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0);
+ J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr,
+ tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT);
} else if (tref_ispri(tr)) {
- J->base[0] = lj_ir_kstr(J, strV(&J->fn->c.upvalue[tref_type(tr)]));
+ J->base[0] = lj_ir_kstr(J, lj_strfmt_obj(J->L, &rd->argv[0]));
} else {
- recff_nyiu(J);
+ recff_nyiu(J, rd);
+ return;
}
}
}
@@ -364,15 +446,15 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd)
} /* else: Interpreter will throw. */
}
-static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd)
+static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd)
{
TRef tr = J->base[0];
if (!((LJ_52 || (LJ_HASFFI && tref_iscdata(tr))) &&
- recff_metacall(J, rd, MM_ipairs))) {
+ recff_metacall(J, rd, MM_pairs + rd->data))) {
if (tref_istab(tr)) {
J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0]));
J->base[1] = tr;
- J->base[2] = lj_ir_kint(J, 0);
+ J->base[2] = rd->data ? lj_ir_kint(J, 0) : TREF_NIL;
rd->nres = 3;
} /* else: Interpreter will throw. */
}
@@ -381,8 +463,13 @@ static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd)
static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd)
{
if (J->maxslot >= 1) {
+#if LJ_FR2
+ /* Shift function arguments up. */
+ memmove(J->base + 1, J->base, sizeof(TRef) * J->maxslot);
+#endif
lj_record_call(J, 0, J->maxslot - 1);
rd->nres = -1; /* Pending call. */
+ J->needsnap = 1; /* Start catching on-trace errors. */
} /* else: Interpreter will throw. */
}
@@ -406,6 +493,10 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
copyTV(J->L, &argv1, &rd->argv[1]);
copyTV(J->L, &rd->argv[0], &argv1);
copyTV(J->L, &rd->argv[1], &argv0);
+#if LJ_FR2
+ /* Shift function arguments up. */
+ memmove(J->base + 2, J->base + 1, sizeof(TRef) * (J->maxslot-1));
+#endif
/* Need to protect lj_record_call because it may throw. */
errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp);
/* Always undo Lua stack swap to avoid confusing the interpreter. */
@@ -414,15 +505,62 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd)
if (errcode)
lj_err_throw(J->L, errcode); /* Propagate errors. */
rd->nres = -1; /* Pending call. */
+ J->needsnap = 1; /* Start catching on-trace errors. */
} /* else: Interpreter will throw. */
}
+static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd)
+{
+ TRef tr = J->base[0];
+ /* Only support getfenv(0) for now. */
+ if (tref_isint(tr) && tref_isk(tr) && IR(tref_ref(tr))->i == 0) {
+ TRef trl = emitir(IRT(IR_LREF, IRT_THREAD), 0, 0);
+ J->base[0] = emitir(IRT(IR_FLOAD, IRT_TAB), trl, IRFL_THREAD_ENV);
+ return;
+ }
+ recff_nyiu(J, rd);
+}
+
+static void LJ_FASTCALL recff_next(jit_State *J, RecordFFData *rd)
+{
+#if LJ_BE
+ /* YAGNI: Disabled on big-endian due to issues with lj_vm_next,
+ ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair.
+ */
+ recff_nyi(J, rd);
+#else
+ TRef tab = J->base[0];
+ if (tref_istab(tab)) {
+ RecordIndex ix;
+ cTValue *keyv;
+ ix.tab = tab;
+ if (tref_isnil(J->base[1])) { /* Shortcut for start of traversal. */
+ ix.key = lj_ir_kint(J, 0);
+ keyv = niltvg(J2G(J));
+ } else {
+ TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1);
+ ix.key = lj_ir_call(J, IRCALL_lj_tab_keyindex, tab, tmp);
+ keyv = &rd->argv[1];
+ }
+ copyTV(J->L, &ix.tabv, &rd->argv[0]);
+ ix.keyv.u32.lo = lj_tab_keyindex(tabV(&ix.tabv), keyv);
+ /* Omit the value, if not used by the caller. */
+ ix.idxchain = (J->framedepth && frame_islua(J->L->base-1) &&
+ bc_b(frame_pc(J->L->base-1)[-1])-1 < 2);
+ ix.mobj = 0; /* We don't need the next index. */
+ rd->nres = lj_record_next(J, &ix);
+ J->base[0] = ix.key;
+ J->base[1] = ix.val;
+ } /* else: Interpreter will throw. */
+#endif
+}
+
/* -- Math library fast functions ----------------------------------------- */
static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd)
{
TRef tr = lj_ir_tonum(J, J->base[0]);
- J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_knum_abs(J));
+ J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_ksimd(J, LJ_KSIMD_ABS));
UNUSED(rd);
}
@@ -475,7 +613,7 @@ static void LJ_FASTCALL recff_math_atan2(jit_State *J, RecordFFData *rd)
{
TRef tr = lj_ir_tonum(J, J->base[0]);
TRef tr2 = lj_ir_tonum(J, J->base[1]);
- J->base[0] = emitir(IRTN(IR_ATAN2), tr, tr2);
+ J->base[0] = lj_ir_call(J, IRCALL_atan2, tr, tr2);
UNUSED(rd);
}
@@ -492,55 +630,16 @@ static void LJ_FASTCALL recff_math_ldexp(jit_State *J, RecordFFData *rd)
UNUSED(rd);
}
-/* Record math.asin, math.acos, math.atan. */
-static void LJ_FASTCALL recff_math_atrig(jit_State *J, RecordFFData *rd)
-{
- TRef y = lj_ir_tonum(J, J->base[0]);
- TRef x = lj_ir_knum_one(J);
- uint32_t ffid = rd->data;
- if (ffid != FF_math_atan) {
- TRef tmp = emitir(IRTN(IR_MUL), y, y);
- tmp = emitir(IRTN(IR_SUB), x, tmp);
- tmp = emitir(IRTN(IR_FPMATH), tmp, IRFPM_SQRT);
- if (ffid == FF_math_asin) { x = tmp; } else { x = y; y = tmp; }
- }
- J->base[0] = emitir(IRTN(IR_ATAN2), y, x);
-}
-
-static void LJ_FASTCALL recff_math_htrig(jit_State *J, RecordFFData *rd)
+static void LJ_FASTCALL recff_math_call(jit_State *J, RecordFFData *rd)
{
TRef tr = lj_ir_tonum(J, J->base[0]);
J->base[0] = emitir(IRTN(IR_CALLN), tr, rd->data);
}
-static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd)
-{
- TRef tr = J->base[0];
- if (tref_isinteger(tr)) {
- J->base[0] = tr;
- J->base[1] = lj_ir_kint(J, 0);
- } else {
- TRef trt;
- tr = lj_ir_tonum(J, tr);
- trt = emitir(IRTN(IR_FPMATH), tr, IRFPM_TRUNC);
- J->base[0] = trt;
- J->base[1] = emitir(IRTN(IR_SUB), tr, trt);
- }
- rd->nres = 2;
-}
-
-static void LJ_FASTCALL recff_math_degrad(jit_State *J, RecordFFData *rd)
-{
- TRef tr = lj_ir_tonum(J, J->base[0]);
- TRef trm = lj_ir_knum(J, numV(&J->fn->c.upvalue[0]));
- J->base[0] = emitir(IRTN(IR_MUL), tr, trm);
- UNUSED(rd);
-}
-
static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd)
{
- J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1],
- &rd->argv[0], &rd->argv[1]);
+ J->base[0] = lj_opt_narrow_arith(J, J->base[0], J->base[1],
+ &rd->argv[0], &rd->argv[1], IR_POW);
UNUSED(rd);
}
@@ -567,7 +666,7 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd)
GCudata *ud = udataV(&J->fn->c.upvalue[0]);
TRef tr, one;
lj_ir_kgc(J, obj2gco(ud), IRT_UDATA); /* Prevent collection. */
- tr = lj_ir_call(J, IRCALL_lj_math_random_step, lj_ir_kptr(J, uddata(ud)));
+ tr = lj_ir_call(J, IRCALL_lj_prng_u64d, lj_ir_kptr(J, uddata(ud)));
one = lj_ir_knum_one(J);
tr = emitir(IRTN(IR_SUB), tr, one);
if (J->base[0]) {
@@ -591,48 +690,105 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd)
/* -- Bit library fast functions ------------------------------------------ */
-/* Record unary bit.tobit, bit.bnot, bit.bswap. */
+/* Record bit.tobit. */
+static void LJ_FASTCALL recff_bit_tobit(jit_State *J, RecordFFData *rd)
+{
+ TRef tr = J->base[0];
+#if LJ_HASFFI
+ if (tref_iscdata(tr)) { recff_bit64_tobit(J, rd); return; }
+#endif
+ J->base[0] = lj_opt_narrow_tobit(J, tr);
+ UNUSED(rd);
+}
+
+/* Record unary bit.bnot, bit.bswap. */
static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd)
{
- TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
- J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0);
+#if LJ_HASFFI
+ if (recff_bit64_unary(J, rd))
+ return;
+#endif
+ J->base[0] = emitir(IRTI(rd->data), lj_opt_narrow_tobit(J, J->base[0]), 0);
}
/* Record N-ary bit.band, bit.bor, bit.bxor. */
static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd)
{
- TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
- uint32_t op = rd->data;
- BCReg i;
- for (i = 1; J->base[i] != 0; i++)
- tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i]));
- J->base[0] = tr;
+#if LJ_HASFFI
+ if (recff_bit64_nary(J, rd))
+ return;
+#endif
+ {
+ TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
+ uint32_t ot = IRTI(rd->data);
+ BCReg i;
+ for (i = 1; J->base[i] != 0; i++)
+ tr = emitir(ot, tr, lj_opt_narrow_tobit(J, J->base[i]));
+ J->base[0] = tr;
+ }
}
/* Record bit shifts. */
static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd)
{
- TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
- TRef tsh = lj_opt_narrow_tobit(J, J->base[1]);
- IROp op = (IROp)rd->data;
- if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
- !tref_isk(tsh))
- tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31));
+#if LJ_HASFFI
+ if (recff_bit64_shift(J, rd))
+ return;
+#endif
+ {
+ TRef tr = lj_opt_narrow_tobit(J, J->base[0]);
+ TRef tsh = lj_opt_narrow_tobit(J, J->base[1]);
+ IROp op = (IROp)rd->data;
+ if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
+ !tref_isk(tsh))
+ tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31));
#ifdef LJ_TARGET_UNIFYROT
- if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
- op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
- tsh = emitir(IRTI(IR_NEG), tsh, tsh);
+ if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
+ op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
+ tsh = emitir(IRTI(IR_NEG), tsh, tsh);
+ }
+#endif
+ J->base[0] = emitir(IRTI(op), tr, tsh);
}
+}
+
+static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd)
+{
+#if LJ_HASFFI
+ TRef hdr = recff_bufhdr(J);
+ TRef tr = recff_bit64_tohex(J, rd, hdr);
+ J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
+#else
+ recff_nyiu(J, rd); /* Don't bother working around this NYI. */
#endif
- J->base[0] = emitir(IRTI(op), tr, tsh);
}
/* -- String library fast functions --------------------------------------- */
-static void LJ_FASTCALL recff_string_len(jit_State *J, RecordFFData *rd)
+/* Specialize to relative starting position for string. */
+static TRef recff_string_start(jit_State *J, GCstr *s, int32_t *st, TRef tr,
+ TRef trlen, TRef tr0)
{
- J->base[0] = emitir(IRTI(IR_FLOAD), lj_ir_tostr(J, J->base[0]), IRFL_STR_LEN);
- UNUSED(rd);
+ int32_t start = *st;
+ if (start < 0) {
+ emitir(IRTGI(IR_LT), tr, tr0);
+ tr = emitir(IRTI(IR_ADD), trlen, tr);
+ start = start + (int32_t)s->len;
+ emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), tr, tr0);
+ if (start < 0) {
+ tr = tr0;
+ start = 0;
+ }
+ } else if (start == 0) {
+ emitir(IRTGI(IR_EQ), tr, tr0);
+ tr = tr0;
+ } else {
+ tr = emitir(IRTI(IR_ADD), tr, lj_ir_kint(J, -1));
+ emitir(IRTGI(IR_GE), tr, tr0);
+ start--;
+ }
+ *st = start;
+ return tr;
}
/* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */
@@ -679,39 +835,21 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
} else if ((MSize)end <= str->len) {
emitir(IRTGI(IR_ULE), trend, trlen);
} else {
- emitir(IRTGI(IR_GT), trend, trlen);
+ emitir(IRTGI(IR_UGT), trend, trlen);
end = (int32_t)str->len;
trend = trlen;
}
- if (start < 0) {
- emitir(IRTGI(IR_LT), trstart, tr0);
- trstart = emitir(IRTI(IR_ADD), trlen, trstart);
- start = start+(int32_t)str->len;
- emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), trstart, tr0);
- if (start < 0) {
- trstart = tr0;
- start = 0;
- }
- } else {
- if (start == 0) {
- emitir(IRTGI(IR_EQ), trstart, tr0);
- trstart = tr0;
- } else {
- trstart = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, -1));
- emitir(IRTGI(IR_GE), trstart, tr0);
- start--;
- }
- }
+ trstart = recff_string_start(J, str, &start, trstart, trlen, tr0);
if (rd->data) { /* Return string.sub result. */
if (end - start >= 0) {
/* Also handle empty range here, to avoid extra traces. */
TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart);
emitir(IRTGI(IR_GE), trslen, tr0);
- trptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart);
+ trptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart);
J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen);
} else { /* Range underflow: return empty string. */
emitir(IRTGI(IR_LT), trend, trstart);
- J->base[0] = lj_ir_kstr(J, lj_str_new(J->L, strdata(str), 0));
+ J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty);
}
} else { /* Return string.byte result(s). */
ptrdiff_t i, len = end - start;
@@ -723,7 +861,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
rd->nres = len;
for (i = 0; i < len; i++) {
TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, (int32_t)i));
- tmp = emitir(IRT(IR_STRREF, IRT_P32), trstr, tmp);
+ tmp = emitir(IRT(IR_STRREF, IRT_PGC), trstr, tmp);
J->base[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY);
}
} else { /* Empty range or range underflow: return no results. */
@@ -733,48 +871,535 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd)
}
}
-/* -- Table library fast functions ---------------------------------------- */
-
-static void LJ_FASTCALL recff_table_getn(jit_State *J, RecordFFData *rd)
+static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd)
{
- if (tref_istab(J->base[0]))
- J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, J->base[0]);
- /* else: Interpreter will throw. */
+ TRef k255 = lj_ir_kint(J, 255);
+ BCReg i;
+ for (i = 0; J->base[i] != 0; i++) { /* Convert char values to strings. */
+ TRef tr = lj_opt_narrow_toint(J, J->base[i]);
+ emitir(IRTGI(IR_ULE), tr, k255);
+ J->base[i] = emitir(IRT(IR_TOSTR, IRT_STR), tr, IRTOSTR_CHAR);
+ }
+ if (i > 1) { /* Concatenate the strings, if there's more than one. */
+ TRef hdr = recff_bufhdr(J), tr = hdr;
+ for (i = 0; J->base[i] != 0; i++)
+ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, J->base[i]);
+ J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
+ } else if (i == 0) {
+ J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty);
+ }
UNUSED(rd);
}
-static void LJ_FASTCALL recff_table_remove(jit_State *J, RecordFFData *rd)
+static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd)
{
- TRef tab = J->base[0];
- rd->nres = 0;
- if (tref_istab(tab)) {
- if (tref_isnil(J->base[1])) { /* Simple pop: t[#t] = nil */
- TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, tab);
- GCtab *t = tabV(&rd->argv[0]);
- MSize len = lj_tab_len(t);
- emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0));
- if (len) {
- RecordIndex ix;
- ix.tab = tab;
- ix.key = trlen;
- settabV(J->L, &ix.tabv, t);
- setintV(&ix.keyv, len);
- ix.idxchain = 0;
- if (results_wanted(J) != 0) { /* Specialize load only if needed. */
- ix.val = 0;
- J->base[0] = lj_record_idx(J, &ix); /* Load previous value. */
- rd->nres = 1;
- /* Assumes ix.key/ix.tab is not modified for raw lj_record_idx(). */
+ TRef str = lj_ir_tostr(J, J->base[0]);
+ TRef rep = lj_opt_narrow_toint(J, J->base[1]);
+ TRef hdr, tr, str2 = 0;
+ if (!tref_isnil(J->base[2])) {
+ TRef sep = lj_ir_tostr(J, J->base[2]);
+ int32_t vrep = argv2int(J, &rd->argv[1]);
+ emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1));
+ if (vrep > 1) {
+ TRef hdr2 = recff_bufhdr(J);
+ TRef tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), hdr2, sep);
+ tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr2, str);
+ str2 = emitir(IRTG(IR_BUFSTR, IRT_STR), tr2, hdr2);
+ }
+ }
+ tr = hdr = recff_bufhdr(J);
+ if (str2) {
+ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, str);
+ str = str2;
+ rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1));
+ }
+ tr = lj_ir_call(J, IRCALL_lj_buf_putstr_rep, tr, str, rep);
+ J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
+}
+
+static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd)
+{
+ TRef str = lj_ir_tostr(J, J->base[0]);
+ TRef hdr = recff_bufhdr(J);
+ TRef tr = lj_ir_call(J, rd->data, hdr, str);
+ J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
+}
+
+static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd)
+{
+ TRef trstr = lj_ir_tostr(J, J->base[0]);
+ TRef trpat = lj_ir_tostr(J, J->base[1]);
+ TRef trlen = emitir(IRTI(IR_FLOAD), trstr, IRFL_STR_LEN);
+ TRef tr0 = lj_ir_kint(J, 0);
+ TRef trstart;
+ GCstr *str = argv2str(J, &rd->argv[0]);
+ GCstr *pat = argv2str(J, &rd->argv[1]);
+ int32_t start;
+ J->needsnap = 1;
+ if (tref_isnil(J->base[2])) {
+ trstart = lj_ir_kint(J, 1);
+ start = 1;
+ } else {
+ trstart = lj_opt_narrow_toint(J, J->base[2]);
+ start = argv2int(J, &rd->argv[2]);
+ }
+ trstart = recff_string_start(J, str, &start, trstart, trlen, tr0);
+ if ((MSize)start <= str->len) {
+ emitir(IRTGI(IR_ULE), trstart, trlen);
+ } else {
+ emitir(IRTGI(IR_UGT), trstart, trlen);
+#if LJ_52
+ J->base[0] = TREF_NIL;
+ return;
+#else
+ trstart = trlen;
+ start = str->len;
+#endif
+ }
+ /* Fixed arg or no pattern matching chars? (Specialized to pattern string.) */
+ if ((J->base[2] && tref_istruecond(J->base[3])) ||
+ (emitir(IRTG(IR_EQ, IRT_STR), trpat, lj_ir_kstr(J, pat)),
+ !lj_str_haspattern(pat))) { /* Search for fixed string. */
+ TRef trsptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart);
+ TRef trpptr = emitir(IRT(IR_STRREF, IRT_PGC), trpat, tr0);
+ TRef trslen = emitir(IRTI(IR_SUB), trlen, trstart);
+ TRef trplen = emitir(IRTI(IR_FLOAD), trpat, IRFL_STR_LEN);
+ TRef tr = lj_ir_call(J, IRCALL_lj_str_find, trsptr, trpptr, trslen, trplen);
+ TRef trp0 = lj_ir_kkptr(J, NULL);
+ if (lj_str_find(strdata(str)+(MSize)start, strdata(pat),
+ str->len-(MSize)start, pat->len)) {
+ TRef pos;
+ emitir(IRTG(IR_NE, IRT_PGC), tr, trp0);
+ /* Recompute offset. trsptr may not point into trstr after folding. */
+ pos = emitir(IRTI(IR_ADD), emitir(IRTI(IR_SUB), tr, trsptr), trstart);
+ J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1));
+ J->base[1] = emitir(IRTI(IR_ADD), pos, trplen);
+ rd->nres = 2;
+ } else {
+ emitir(IRTG(IR_EQ, IRT_PGC), tr, trp0);
+ J->base[0] = TREF_NIL;
+ }
+ } else { /* Search for pattern. */
+ recff_nyiu(J, rd);
+ return;
+ }
+}
+
+static void recff_format(jit_State *J, RecordFFData *rd, TRef hdr, int sbufx)
+{
+ ptrdiff_t arg = sbufx;
+ TRef tr = hdr, trfmt = lj_ir_tostr(J, J->base[arg]);
+ GCstr *fmt = argv2str(J, &rd->argv[arg]);
+ FormatState fs;
+ SFormat sf;
+ /* Specialize to the format string. */
+ emitir(IRTG(IR_EQ, IRT_STR), trfmt, lj_ir_kstr(J, fmt));
+ lj_strfmt_init(&fs, strdata(fmt), fmt->len);
+ while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { /* Parse format. */
+ TRef tra = sf == STRFMT_LIT ? 0 : J->base[++arg];
+ TRef trsf = lj_ir_kint(J, (int32_t)sf);
+ IRCallID id;
+ switch (STRFMT_TYPE(sf)) {
+ case STRFMT_LIT:
+ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr,
+ lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len)));
+ break;
+ case STRFMT_INT:
+ id = IRCALL_lj_strfmt_putfnum_int;
+ handle_int:
+ if (!tref_isinteger(tra)) {
+#if LJ_HASFFI
+ if (tref_iscdata(tra)) {
+ tra = lj_crecord_loadiu64(J, tra, &rd->argv[arg]);
+ tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra);
+ break;
}
- ix.val = TREF_NIL;
- lj_record_idx(J, &ix); /* Remove value. */
+#endif
+ goto handle_num;
+ }
+ if (sf == STRFMT_INT) { /* Shortcut for plain %d. */
+ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr,
+ emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT));
+ } else {
+#if LJ_HASFFI
+ tra = emitir(IRT(IR_CONV, IRT_U64), tra,
+ (IRT_INT|(IRT_U64<<5)|IRCONV_SEXT));
+ tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra);
+ lj_needsplit(J);
+#else
+ recff_nyiu(J, rd); /* Don't bother working around this NYI. */
+ return;
+#endif
}
- } else { /* Complex case: remove in the middle. */
- recff_nyiu(J);
+ break;
+ case STRFMT_UINT:
+ id = IRCALL_lj_strfmt_putfnum_uint;
+ goto handle_int;
+ case STRFMT_NUM:
+ id = IRCALL_lj_strfmt_putfnum;
+ handle_num:
+ tra = lj_ir_tonum(J, tra);
+ tr = lj_ir_call(J, id, tr, trsf, tra);
+ if (LJ_SOFTFP32) lj_needsplit(J);
+ break;
+ case STRFMT_STR:
+ if (!tref_isstr(tra)) {
+ recff_nyiu(J, rd); /* NYI: __tostring and non-string types for %s. */
+ /* NYI: also buffers. */
+ return;
+ }
+ if (sf == STRFMT_STR) /* Shortcut for plain %s. */
+ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, tra);
+ else if ((sf & STRFMT_T_QUOTED))
+ tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra);
+ else
+ tr = lj_ir_call(J, IRCALL_lj_strfmt_putfstr, tr, trsf, tra);
+ break;
+ case STRFMT_CHAR:
+ tra = lj_opt_narrow_toint(J, tra);
+ if (sf == STRFMT_CHAR) /* Shortcut for plain %c. */
+ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr,
+ emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR));
+ else
+ tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra);
+ break;
+ case STRFMT_PTR: /* NYI */
+ case STRFMT_ERR:
+ default:
+ recff_nyiu(J, rd);
+ return;
+ }
+ }
+ if (sbufx) {
+ emitir(IRT(IR_USE, IRT_NIL), tr, 0);
+ } else {
+ J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
+ }
+}
+
+static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd)
+{
+ recff_format(J, rd, recff_bufhdr(J), 0);
+}
+
+/* -- Buffer library fast functions --------------------------------------- */
+
+#if LJ_HASBUFFER
+
+static LJ_AINLINE TRef recff_sbufx_get_L(jit_State *J, TRef ud)
+{
+ return emitir(IRT(IR_FLOAD, IRT_PGC), ud, IRFL_SBUF_L);
+}
+
+static LJ_AINLINE void recff_sbufx_set_L(jit_State *J, TRef ud, TRef val)
+{
+ TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_L);
+ emitir(IRT(IR_FSTORE, IRT_PGC), fref, val);
+}
+
+static LJ_AINLINE TRef recff_sbufx_get_ptr(jit_State *J, TRef ud, IRFieldID fl)
+{
+ return emitir(IRT(IR_FLOAD, IRT_PTR), ud, fl);
+}
+
+static LJ_AINLINE void recff_sbufx_set_ptr(jit_State *J, TRef ud, IRFieldID fl, TRef val)
+{
+ TRef fref = emitir(IRT(IR_FREF, IRT_PTR), ud, fl);
+ emitir(IRT(IR_FSTORE, IRT_PTR), fref, val);
+}
+
+static LJ_AINLINE TRef recff_sbufx_len(jit_State *J, TRef trr, TRef trw)
+{
+ TRef len = emitir(IRT(IR_SUB, IRT_INTP), trw, trr);
+ if (LJ_64)
+ len = emitir(IRTI(IR_CONV), len, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE);
+ return len;
+}
+
+/* Emit typecheck for string buffer. */
+static TRef recff_sbufx_check(jit_State *J, RecordFFData *rd, int arg)
+{
+ TRef trtype, ud = J->base[arg];
+ if (!tvisbuf(&rd->argv[arg])) lj_trace_err(J, LJ_TRERR_BADTYPE);
+ trtype = emitir(IRT(IR_FLOAD, IRT_U8), ud, IRFL_UDATA_UDTYPE);
+ emitir(IRTGI(IR_EQ), trtype, lj_ir_kint(J, UDTYPE_BUFFER));
+ J->needsnap = 1;
+ return ud;
+}
+
+/* Emit BUFHDR for write to extended string buffer. */
+static TRef recff_sbufx_write(jit_State *J, TRef ud)
+{
+ TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kint(J, sizeof(GCudata)));
+ return emitir(IRT(IR_BUFHDR, IRT_PGC), trbuf, IRBUFHDR_WRITE);
+}
+
+/* Check for integer in range for the buffer API. */
+static TRef recff_sbufx_checkint(jit_State *J, RecordFFData *rd, int arg)
+{
+ TRef tr = J->base[arg];
+ TRef trlim = lj_ir_kint(J, LJ_MAX_BUF);
+ if (tref_isinteger(tr)) {
+ emitir(IRTGI(IR_ULE), tr, trlim);
+ } else if (tref_isnum(tr)) {
+ tr = emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY);
+ emitir(IRTGI(IR_ULE), tr, trlim);
+#if LJ_HASFFI
+ } else if (tref_iscdata(tr)) {
+ tr = lj_crecord_loadiu64(J, tr, &rd->argv[arg]);
+ emitir(IRTG(IR_ULE, IRT_U64), tr, lj_ir_kint64(J, LJ_MAX_BUF));
+ tr = emitir(IRTI(IR_CONV), tr, (IRT_INT<<5)|IRT_I64|IRCONV_NONE);
+#else
+ UNUSED(rd);
+#endif
+ } else {
+ lj_trace_err(J, LJ_TRERR_BADTYPE);
+ }
+ return tr;
+}
+
+static void LJ_FASTCALL recff_buffer_method_reset(jit_State *J, RecordFFData *rd)
+{
+ TRef ud = recff_sbufx_check(J, rd, 0);
+ SBufExt *sbx = bufV(&rd->argv[0]);
+ int iscow = (int)sbufiscow(sbx);
+ TRef trl = recff_sbufx_get_L(J, ud);
+ TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kint(J, SBUF_FLAG_COW));
+ TRef zero = lj_ir_kint(J, 0);
+ emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zero);
+ if (iscow) {
+ trl = emitir(IRT(IR_BXOR, IRT_IGC), trl,
+ LJ_GC64 ? lj_ir_kint64(J, SBUF_FLAG_COW) :
+ lj_ir_kint(J, SBUF_FLAG_COW));
+ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zero);
+ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zero);
+ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zero);
+ recff_sbufx_set_L(J, ud, trl);
+ emitir(IRT(IR_FSTORE, IRT_PGC),
+ emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zero);
+ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zero);
+ } else {
+ TRef trb = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_B);
+ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trb);
+ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trb);
+ }
+}
+
+static void LJ_FASTCALL recff_buffer_method_skip(jit_State *J, RecordFFData *rd)
+{
+ TRef ud = recff_sbufx_check(J, rd, 0);
+ TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+ TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+ TRef len = recff_sbufx_len(J, trr, trw);
+ TRef trn = recff_sbufx_checkint(J, rd, 1);
+ len = emitir(IRTI(IR_MIN), len, trn);
+ trr = emitir(IRT(IR_ADD, IRT_PTR), trr, len);
+ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
+}
+
+static void LJ_FASTCALL recff_buffer_method_set(jit_State *J, RecordFFData *rd)
+{
+ TRef ud = recff_sbufx_check(J, rd, 0);
+ TRef trbuf = recff_sbufx_write(J, ud);
+ TRef tr = J->base[1];
+ if (tref_isstr(tr)) {
+ TRef trp = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0));
+ TRef len = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN);
+ lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr);
+#if LJ_HASFFI
+ } else if (tref_iscdata(tr)) {
+ TRef trp = lj_crecord_topcvoid(J, tr, &rd->argv[1]);
+ TRef len = recff_sbufx_checkint(J, rd, 2);
+ lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr);
+#endif
+ } /* else: Interpreter will throw. */
+}
+
+static void LJ_FASTCALL recff_buffer_method_put(jit_State *J, RecordFFData *rd)
+{
+ TRef ud = recff_sbufx_check(J, rd, 0);
+ TRef trbuf = recff_sbufx_write(J, ud);
+ TRef tr;
+ ptrdiff_t arg;
+ if (!J->base[1]) return;
+ for (arg = 1; (tr = J->base[arg]); arg++) {
+ if (tref_isudata(tr)) {
+ TRef ud2 = recff_sbufx_check(J, rd, arg);
+ emitir(IRTG(IR_NE, IRT_PGC), ud, ud2);
+ }
+ }
+ for (arg = 1; (tr = J->base[arg]); arg++) {
+ if (tref_isstr(tr)) {
+ trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf, tr);
+ } else if (tref_isnumber(tr)) {
+ trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf,
+ emitir(IRT(IR_TOSTR, IRT_STR), tr,
+ tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT));
+ } else if (tref_isudata(tr)) {
+ TRef trr = recff_sbufx_get_ptr(J, tr, IRFL_SBUF_R);
+ TRef trw = recff_sbufx_get_ptr(J, tr, IRFL_SBUF_W);
+ TRef len = recff_sbufx_len(J, trr, trw);
+ trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, trr, len);
+ } else {
+ recff_nyiu(J, rd);
+ }
+ }
+ emitir(IRT(IR_USE, IRT_NIL), trbuf, 0);
+}
+
+static void LJ_FASTCALL recff_buffer_method_putf(jit_State *J, RecordFFData *rd)
+{
+ TRef ud = recff_sbufx_check(J, rd, 0);
+ TRef trbuf = recff_sbufx_write(J, ud);
+ recff_format(J, rd, trbuf, 1);
+}
+
+static void LJ_FASTCALL recff_buffer_method_get(jit_State *J, RecordFFData *rd)
+{
+ TRef ud = recff_sbufx_check(J, rd, 0);
+ TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+ TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+ TRef tr;
+ ptrdiff_t arg;
+ if (!J->base[1]) { J->base[1] = TREF_NIL; J->base[2] = 0; }
+ for (arg = 0; (tr = J->base[arg+1]); arg++) {
+ if (!tref_isnil(tr)) {
+ J->base[arg+1] = recff_sbufx_checkint(J, rd, arg+1);
}
+ }
+ for (arg = 0; (tr = J->base[arg+1]); arg++) {
+ TRef len = recff_sbufx_len(J, trr, trw);
+ if (tref_isnil(tr)) {
+ J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len);
+ trr = trw;
+ } else {
+ TRef tru;
+ len = emitir(IRTI(IR_MIN), len, tr);
+ tru = emitir(IRT(IR_ADD, IRT_PTR), trr, len);
+ J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len);
+ trr = tru; /* Doing the ADD before the SNEW generates better code. */
+ }
+ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
+ }
+ rd->nres = arg;
+}
+
+static void LJ_FASTCALL recff_buffer_method___tostring(jit_State *J, RecordFFData *rd)
+{
+ TRef ud = recff_sbufx_check(J, rd, 0);
+ TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+ TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+ J->base[0] = emitir(IRT(IR_XSNEW, IRT_STR), trr, recff_sbufx_len(J, trr, trw));
+}
+
+static void LJ_FASTCALL recff_buffer_method___len(jit_State *J, RecordFFData *rd)
+{
+ TRef ud = recff_sbufx_check(J, rd, 0);
+ TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+ TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+ J->base[0] = recff_sbufx_len(J, trr, trw);
+}
+
+#if LJ_HASFFI
+static void LJ_FASTCALL recff_buffer_method_putcdata(jit_State *J, RecordFFData *rd)
+{
+ TRef ud = recff_sbufx_check(J, rd, 0);
+ TRef trbuf = recff_sbufx_write(J, ud);
+ TRef tr = lj_crecord_topcvoid(J, J->base[1], &rd->argv[1]);
+ TRef len = recff_sbufx_checkint(J, rd, 2);
+ trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, tr, len);
+ emitir(IRT(IR_USE, IRT_NIL), trbuf, 0);
+}
+
+static void LJ_FASTCALL recff_buffer_method_reserve(jit_State *J, RecordFFData *rd)
+{
+ TRef ud = recff_sbufx_check(J, rd, 0);
+ TRef trbuf = recff_sbufx_write(J, ud);
+ TRef trsz = recff_sbufx_checkint(J, rd, 1);
+ J->base[1] = lj_ir_call(J, IRCALL_lj_bufx_more, trbuf, trsz);
+ J->base[0] = lj_crecord_topuint8(J, recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W));
+ rd->nres = 2;
+}
+
+static void LJ_FASTCALL recff_buffer_method_commit(jit_State *J, RecordFFData *rd)
+{
+ TRef ud = recff_sbufx_check(J, rd, 0);
+ TRef len = recff_sbufx_checkint(J, rd, 1);
+ TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+ TRef tre = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_E);
+ TRef left = emitir(IRT(IR_SUB, IRT_INTP), tre, trw);
+ if (LJ_64)
+ left = emitir(IRTI(IR_CONV), left, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE);
+ emitir(IRTGI(IR_ULE), len, left);
+ trw = emitir(IRT(IR_ADD, IRT_PTR), trw, len);
+ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trw);
+}
+
+static void LJ_FASTCALL recff_buffer_method_ref(jit_State *J, RecordFFData *rd)
+{
+ TRef ud = recff_sbufx_check(J, rd, 0);
+ TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R);
+ TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W);
+ J->base[0] = lj_crecord_topuint8(J, trr);
+ J->base[1] = recff_sbufx_len(J, trr, trw);
+ rd->nres = 2;
+}
+#endif
+
+static void LJ_FASTCALL recff_buffer_method_encode(jit_State *J, RecordFFData *rd)
+{
+ TRef ud = recff_sbufx_check(J, rd, 0);
+ TRef trbuf = recff_sbufx_write(J, ud);
+ TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1);
+ lj_ir_call(J, IRCALL_lj_serialize_put, trbuf, tmp);
+ /* No IR_USE needed, since the call is a store. */
+}
+
+static void LJ_FASTCALL recff_buffer_method_decode(jit_State *J, RecordFFData *rd)
+{
+ TRef ud = recff_sbufx_check(J, rd, 0);
+ TRef trbuf = recff_sbufx_write(J, ud);
+ TRef tmp = recff_tmpref(J, TREF_NIL, IRTMPREF_OUT1);
+ TRef trr = lj_ir_call(J, IRCALL_lj_serialize_get, trbuf, tmp);
+ IRType t = (IRType)lj_serialize_peektype(bufV(&rd->argv[0]));
+ /* No IR_USE needed, since the call is a store. */
+ J->base[0] = lj_record_vload(J, tmp, 0, t);
+ /* The sbx->r store must be after the VLOAD type check, in case it fails. */
+ recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr);
+}
+
+static void LJ_FASTCALL recff_buffer_encode(jit_State *J, RecordFFData *rd)
+{
+ TRef tmp = recff_tmpref(J, J->base[0], IRTMPREF_IN1);
+ J->base[0] = lj_ir_call(J, IRCALL_lj_serialize_encode, tmp);
+ /* IR_USE needed for IR_CALLA, because the encoder may throw non-OOM. */
+ emitir(IRT(IR_USE, IRT_NIL), J->base[0], 0);
+ UNUSED(rd);
+}
+
+static void LJ_FASTCALL recff_buffer_decode(jit_State *J, RecordFFData *rd)
+{
+ if (tvisstr(&rd->argv[0])) {
+ GCstr *str = strV(&rd->argv[0]);
+ SBufExt sbx;
+ IRType t;
+ TRef tmp = recff_tmpref(J, TREF_NIL, IRTMPREF_OUT1);
+ TRef tr = lj_ir_call(J, IRCALL_lj_serialize_decode, tmp, J->base[0]);
+ /* IR_USE needed for IR_CALLA, because the decoder may throw non-OOM.
+ ** That's why IRCALL_lj_serialize_decode needs a fake INT result.
+ */
+ emitir(IRT(IR_USE, IRT_NIL), tr, 0);
+ memset(&sbx, 0, sizeof(SBufExt));
+ lj_bufx_set_cow(J->L, &sbx, strdata(str), str->len);
+ t = (IRType)lj_serialize_peektype(&sbx);
+ J->base[0] = lj_record_vload(J, tmp, 0, t);
} /* else: Interpreter will throw. */
}
+#endif
+
+/* -- Table library fast functions ---------------------------------------- */
+
static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
{
RecordIndex ix;
@@ -783,7 +1408,7 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
rd->nres = 0;
if (tref_istab(ix.tab) && ix.val) {
if (!J->base[2]) { /* Simple push: t[#t+1] = v */
- TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, ix.tab);
+ TRef trlen = emitir(IRTI(IR_ALEN), ix.tab, TREF_NIL);
GCtab *t = tabV(&rd->argv[0]);
ix.key = emitir(IRTI(IR_ADD), trlen, lj_ir_kint(J, 1));
settabV(J->L, &ix.tabv, t);
@@ -791,11 +1416,49 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd)
ix.idxchain = 0;
lj_record_idx(J, &ix); /* Set new value. */
} else { /* Complex case: insert in the middle. */
- recff_nyiu(J);
+ recff_nyiu(J, rd);
+ return;
}
} /* else: Interpreter will throw. */
}
+static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd)
+{
+ TRef tab = J->base[0];
+ if (tref_istab(tab)) {
+ TRef sep = !tref_isnil(J->base[1]) ?
+ lj_ir_tostr(J, J->base[1]) : lj_ir_knull(J, IRT_STR);
+ TRef tri = (J->base[1] && !tref_isnil(J->base[2])) ?
+ lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1);
+ TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ?
+ lj_opt_narrow_toint(J, J->base[3]) :
+ emitir(IRTI(IR_ALEN), tab, TREF_NIL);
+ TRef hdr = recff_bufhdr(J);
+ TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre);
+ emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL));
+ J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
+ } /* else: Interpreter will throw. */
+ UNUSED(rd);
+}
+
+static void LJ_FASTCALL recff_table_new(jit_State *J, RecordFFData *rd)
+{
+ TRef tra = lj_opt_narrow_toint(J, J->base[0]);
+ TRef trh = lj_opt_narrow_toint(J, J->base[1]);
+ J->base[0] = lj_ir_call(J, IRCALL_lj_tab_new_ah, tra, trh);
+ UNUSED(rd);
+}
+
+static void LJ_FASTCALL recff_table_clear(jit_State *J, RecordFFData *rd)
+{
+ TRef tr = J->base[0];
+ if (tref_istab(tr)) {
+ rd->nres = 0;
+ lj_ir_call(J, IRCALL_lj_tab_clear, tr);
+ J->needsnap = 1;
+ } /* else: Interpreter will throw. */
+}
+
/* -- I/O library fast functions ------------------------------------------ */
/* Get FILE* for I/O function. Any I/O error aborts recording, so there's
@@ -805,8 +1468,7 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id)
{
TRef tr, ud, fp;
if (id) { /* io.func() */
- tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]);
- ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0);
+ ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id]));
} else { /* fp:method() */
ud = J->base[0];
if (!tref_isudata(ud))
@@ -828,10 +1490,13 @@ static void LJ_FASTCALL recff_io_write(jit_State *J, RecordFFData *rd)
ptrdiff_t i = rd->data == 0 ? 1 : 0;
for (; J->base[i]; i++) {
TRef str = lj_ir_tostr(J, J->base[i]);
- TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero);
+ TRef buf = emitir(IRT(IR_STRREF, IRT_PGC), str, zero);
TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN);
if (tref_isk(len) && IR(tref_ref(len))->i == 1) {
- TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY);
+ IRIns *irs = IR(tref_ref(str));
+ TRef tr = (irs->o == IR_TOSTR && irs->op2 == IRTOSTR_CHAR) ?
+ irs->op1 :
+ emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY);
tr = lj_ir_call(J, IRCALL_fputc, tr, fp);
if (results_wanted(J) != 0) /* Check result only if not ignored. */
emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1));
@@ -853,6 +1518,28 @@ static void LJ_FASTCALL recff_io_flush(jit_State *J, RecordFFData *rd)
J->base[0] = TREF_TRUE;
}
+/* -- Debug library fast functions ---------------------------------------- */
+
+static void LJ_FASTCALL recff_debug_getmetatable(jit_State *J, RecordFFData *rd)
+{
+ GCtab *mt;
+ TRef mtref;
+ TRef tr = J->base[0];
+ if (tref_istab(tr)) {
+ mt = tabref(tabV(&rd->argv[0])->metatable);
+ mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_TAB_META);
+ } else if (tref_isudata(tr)) {
+ mt = tabref(udataV(&rd->argv[0])->metatable);
+ mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_UDATA_META);
+ } else {
+ mt = tabref(basemt_obj(J2G(J), &rd->argv[0]));
+ J->base[0] = mt ? lj_ir_ktab(J, mt) : TREF_NIL;
+ return;
+ }
+ emitir(IRTG(mt ? IR_NE : IR_EQ, IRT_TAB), mtref, lj_ir_knull(J, IRT_TAB));
+ J->base[0] = mt ? mtref : TREF_NIL;
+}
+
/* -- Record calls to fast functions -------------------------------------- */
#include "lj_recdef.h"
diff --git a/src/lj_frame.h b/src/lj_frame.h
index 33bd8e3e..aa1dc11a 100644
--- a/src/lj_frame.h
+++ b/src/lj_frame.h
@@ -11,7 +11,16 @@
/* -- Lua stack frame ----------------------------------------------------- */
-/* Frame type markers in callee function slot (callee base-1). */
+/* Frame type markers in LSB of PC (4-byte aligned) or delta (8-byte aligned:
+**
+** PC 00 Lua frame
+** delta 001 C frame
+** delta 010 Continuation frame
+** delta 011 Lua vararg frame
+** delta 101 cpcall() frame
+** delta 110 ff pcall() frame
+** delta 111 ff pcall() frame with active hook
+*/
enum {
FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG,
FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH
@@ -21,9 +30,47 @@ enum {
#define FRAME_TYPEP (FRAME_TYPE|FRAME_P)
/* Macros to access and modify Lua frames. */
+#if LJ_FR2
+/* Two-slot frame info, required for 64 bit PC/GCRef:
+**
+** base-2 base-1 | base base+1 ...
+** [func PC/delta/ft] | [slots ...]
+** ^-- frame | ^-- base ^-- top
+**
+** Continuation frames:
+**
+** base-4 base-3 base-2 base-1 | base base+1 ...
+** [cont PC ] [func PC/delta/ft] | [slots ...]
+** ^-- frame | ^-- base ^-- top
+*/
+#define frame_gc(f) (gcval((f)-1))
+#define frame_ftsz(f) ((ptrdiff_t)(f)->ftsz)
+#define frame_pc(f) ((const BCIns *)frame_ftsz(f))
+#define setframe_gc(f, p, tp) (setgcVraw((f), (p), (tp)))
+#define setframe_ftsz(f, sz) ((f)->ftsz = (sz))
+#define setframe_pc(f, pc) ((f)->ftsz = (int64_t)(intptr_t)(pc))
+#else
+/* One-slot frame info, sufficient for 32 bit PC/GCRef:
+**
+** base-1 | base base+1 ...
+** lo hi |
+** [func | PC/delta/ft] | [slots ...]
+** ^-- frame | ^-- base ^-- top
+**
+** Continuation frames:
+**
+** base-2 base-1 | base base+1 ...
+** lo hi lo hi |
+** [cont | PC] [func | PC/delta/ft] | [slots ...]
+** ^-- frame | ^-- base ^-- top
+*/
#define frame_gc(f) (gcref((f)->fr.func))
-#define frame_func(f) (&frame_gc(f)->fn)
-#define frame_ftsz(f) ((f)->fr.tp.ftsz)
+#define frame_ftsz(f) ((ptrdiff_t)(f)->fr.tp.ftsz)
+#define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns))
+#define setframe_gc(f, p, tp) (setgcref((f)->fr.func, (p)), UNUSED(tp))
+#define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (int32_t)(sz))
+#define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc)))
+#endif
#define frame_type(f) (frame_ftsz(f) & FRAME_TYPE)
#define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP)
@@ -33,33 +80,53 @@ enum {
#define frame_isvarg(f) (frame_typep(f) == FRAME_VARG)
#define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL)
-#define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns))
+#define frame_func(f) (&frame_gc(f)->fn)
+#define frame_delta(f) (frame_ftsz(f) >> 3)
+#define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP)
+
+enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
+
+#if LJ_FR2
+#define frame_contpc(f) (frame_pc((f)-2))
+#define frame_contv(f) (((f)-3)->u64)
+#else
#define frame_contpc(f) (frame_pc((f)-1))
-#if LJ_64
+#define frame_contv(f) (((f)-1)->u32.lo)
+#endif
+#if LJ_FR2
+#define frame_contf(f) ((ASMFunction)(uintptr_t)((f)-3)->u64)
+#elif LJ_64
#define frame_contf(f) \
((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \
(intptr_t)(int32_t)((f)-1)->u32.lo))
#else
#define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void))
#endif
-#define frame_delta(f) (frame_ftsz(f) >> 3)
-#define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP)
+#define frame_iscont_fficb(f) \
+ (LJ_HASFFI && frame_contv(f) == LJ_CONT_FFI_CALLBACK)
-#define frame_prevl(f) ((f) - (1+bc_a(frame_pc(f)[-1])))
+#define frame_prevl(f) ((f) - (1+LJ_FR2+bc_a(frame_pc(f)[-1])))
#define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f)))
#define frame_prev(f) (frame_islua(f)?frame_prevl(f):frame_prevd(f))
/* Note: this macro does not skip over FRAME_VARG. */
-#define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc)))
-#define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (sz))
-#define setframe_gc(f, p) (setgcref((f)->fr.func, (p)))
-
/* -- C stack frame ------------------------------------------------------- */
/* Macros to access and modify the C stack frame chain. */
/* These definitions must match with the arch-specific *.dasc files. */
#if LJ_TARGET_X86
+#if LJ_ABI_WIN
+#define CFRAME_OFS_ERRF (19*4)
+#define CFRAME_OFS_NRES (18*4)
+#define CFRAME_OFS_PREV (17*4)
+#define CFRAME_OFS_L (16*4)
+#define CFRAME_OFS_SEH (9*4)
+#define CFRAME_OFS_PC (6*4)
+#define CFRAME_OFS_MULTRES (5*4)
+#define CFRAME_SIZE (16*4)
+#define CFRAME_SHIFT_MULTRES 0
+#else
#define CFRAME_OFS_ERRF (15*4)
#define CFRAME_OFS_NRES (14*4)
#define CFRAME_OFS_PREV (13*4)
@@ -68,24 +135,41 @@ enum {
#define CFRAME_OFS_MULTRES (5*4)
#define CFRAME_SIZE (12*4)
#define CFRAME_SHIFT_MULTRES 0
+#endif
#elif LJ_TARGET_X64
#if LJ_ABI_WIN
#define CFRAME_OFS_PREV (13*8)
+#if LJ_GC64
+#define CFRAME_OFS_PC (12*8)
+#define CFRAME_OFS_L (11*8)
+#define CFRAME_OFS_ERRF (21*4)
+#define CFRAME_OFS_NRES (20*4)
+#define CFRAME_OFS_MULTRES (8*4)
+#else
#define CFRAME_OFS_PC (25*4)
#define CFRAME_OFS_L (24*4)
#define CFRAME_OFS_ERRF (23*4)
#define CFRAME_OFS_NRES (22*4)
#define CFRAME_OFS_MULTRES (21*4)
+#endif
#define CFRAME_SIZE (10*8)
#define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8)
#define CFRAME_SHIFT_MULTRES 0
#else
#define CFRAME_OFS_PREV (4*8)
+#if LJ_GC64
+#define CFRAME_OFS_PC (3*8)
+#define CFRAME_OFS_L (2*8)
+#define CFRAME_OFS_ERRF (3*4)
+#define CFRAME_OFS_NRES (2*4)
+#define CFRAME_OFS_MULTRES (0*4)
+#else
#define CFRAME_OFS_PC (7*4)
#define CFRAME_OFS_L (6*4)
#define CFRAME_OFS_ERRF (5*4)
#define CFRAME_OFS_NRES (4*4)
#define CFRAME_OFS_MULTRES (1*4)
+#endif
#if LJ_NO_UNWIND
#define CFRAME_SIZE (12*8)
#else
@@ -107,6 +191,15 @@ enum {
#define CFRAME_SIZE 64
#endif
#define CFRAME_SHIFT_MULTRES 3
+#elif LJ_TARGET_ARM64
+#define CFRAME_OFS_ERRF 36
+#define CFRAME_OFS_NRES 40
+#define CFRAME_OFS_PREV 0
+#define CFRAME_OFS_L 16
+#define CFRAME_OFS_PC 8
+#define CFRAME_OFS_MULTRES 32
+#define CFRAME_SIZE 208
+#define CFRAME_SHIFT_MULTRES 3
#elif LJ_TARGET_PPC
#if LJ_TARGET_XBOX360
#define CFRAME_OFS_ERRF 424
@@ -117,7 +210,7 @@ enum {
#define CFRAME_OFS_MULTRES 408
#define CFRAME_SIZE 384
#define CFRAME_SHIFT_MULTRES 3
-#elif LJ_ARCH_PPC64
+#elif LJ_ARCH_PPC32ON64
#define CFRAME_OFS_ERRF 472
#define CFRAME_OFS_NRES 468
#define CFRAME_OFS_PREV 448
@@ -133,26 +226,43 @@ enum {
#define CFRAME_OFS_L 36
#define CFRAME_OFS_PC 32
#define CFRAME_OFS_MULTRES 28
-#define CFRAME_SIZE 272
+#define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128)
#define CFRAME_SHIFT_MULTRES 3
#endif
-#elif LJ_TARGET_PPCSPE
-#define CFRAME_OFS_ERRF 28
-#define CFRAME_OFS_NRES 24
-#define CFRAME_OFS_PREV 20
-#define CFRAME_OFS_L 16
-#define CFRAME_OFS_PC 12
-#define CFRAME_OFS_MULTRES 8
-#define CFRAME_SIZE 184
-#define CFRAME_SHIFT_MULTRES 3
-#elif LJ_TARGET_MIPS
+#elif LJ_TARGET_MIPS32
+#if LJ_ARCH_HASFPU
#define CFRAME_OFS_ERRF 124
#define CFRAME_OFS_NRES 120
#define CFRAME_OFS_PREV 116
#define CFRAME_OFS_L 112
+#define CFRAME_SIZE 112
+#else
+#define CFRAME_OFS_ERRF 76
+#define CFRAME_OFS_NRES 72
+#define CFRAME_OFS_PREV 68
+#define CFRAME_OFS_L 64
+#define CFRAME_SIZE 64
+#endif
#define CFRAME_OFS_PC 20
#define CFRAME_OFS_MULTRES 16
-#define CFRAME_SIZE 112
+#define CFRAME_SHIFT_MULTRES 3
+#elif LJ_TARGET_MIPS64
+#if LJ_ARCH_HASFPU
+#define CFRAME_OFS_ERRF 188
+#define CFRAME_OFS_NRES 184
+#define CFRAME_OFS_PREV 176
+#define CFRAME_OFS_L 168
+#define CFRAME_OFS_PC 160
+#define CFRAME_SIZE 192
+#else
+#define CFRAME_OFS_ERRF 124
+#define CFRAME_OFS_NRES 120
+#define CFRAME_OFS_PREV 112
+#define CFRAME_OFS_L 104
+#define CFRAME_OFS_PC 96
+#define CFRAME_SIZE 128
+#endif
+#define CFRAME_OFS_MULTRES 0
#define CFRAME_SHIFT_MULTRES 3
#else
#error "Missing CFRAME_* definitions for this architecture"
diff --git a/src/lj_func.c b/src/lj_func.c
index 5df652d8..9795a771 100644
--- a/src/lj_func.c
+++ b/src/lj_func.c
@@ -24,9 +24,11 @@ void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt)
/* -- Upvalues ------------------------------------------------------------ */
-static void unlinkuv(GCupval *uv)
+static void unlinkuv(global_State *g, GCupval *uv)
{
- lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv);
+ UNUSED(g);
+ lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv,
+ "broken upvalue chain");
setgcrefr(uvnext(uv)->prev, uv->prev);
setgcrefr(uvprev(uv)->next, uv->next);
}
@@ -40,7 +42,7 @@ static GCupval *func_finduv(lua_State *L, TValue *slot)
GCupval *uv;
/* Search the sorted list of open upvalues. */
while (gcref(*pp) != NULL && uvval((p = gco2uv(gcref(*pp)))) >= slot) {
- lua_assert(!p->closed && uvval(p) != &p->tv);
+ lj_assertG(!p->closed && uvval(p) != &p->tv, "closed upvalue in chain");
if (uvval(p) == slot) { /* Found open upvalue pointing to same slot? */
if (isdead(g, obj2gco(p))) /* Resurrect it, if it's dead. */
flipwhite(obj2gco(p));
@@ -61,7 +63,8 @@ static GCupval *func_finduv(lua_State *L, TValue *slot)
setgcrefr(uv->next, g->uvhead.next);
setgcref(uvnext(uv)->prev, obj2gco(uv));
setgcref(g->uvhead.next, obj2gco(uv));
- lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv);
+ lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv,
+ "broken upvalue chain");
return uv;
}
@@ -84,12 +87,13 @@ void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level)
while (gcref(L->openupval) != NULL &&
uvval((uv = gco2uv(gcref(L->openupval)))) >= level) {
GCobj *o = obj2gco(uv);
- lua_assert(!isblack(o) && !uv->closed && uvval(uv) != &uv->tv);
+ lj_assertG(!isblack(o), "bad black upvalue");
+ lj_assertG(!uv->closed && uvval(uv) != &uv->tv, "closed upvalue in chain");
setgcrefr(L->openupval, uv->nextgc); /* No longer in open list. */
if (isdead(g, o)) {
lj_func_freeuv(g, uv);
} else {
- unlinkuv(uv);
+ unlinkuv(g, uv);
lj_gc_closeuv(g, uv);
}
}
@@ -98,7 +102,7 @@ void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level)
void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv)
{
if (!uv->closed)
- unlinkuv(uv);
+ unlinkuv(g, uv);
lj_mem_freet(g, uv);
}
diff --git a/src/lj_gc.c b/src/lj_gc.c
index 899b4e02..2fc52ec1 100644
--- a/src/lj_gc.c
+++ b/src/lj_gc.c
@@ -12,6 +12,7 @@
#include "lj_obj.h"
#include "lj_gc.h"
#include "lj_err.h"
+#include "lj_buf.h"
#include "lj_str.h"
#include "lj_tab.h"
#include "lj_func.h"
@@ -24,6 +25,7 @@
#include "lj_cdata.h"
#endif
#include "lj_trace.h"
+#include "lj_dispatch.h"
#include "lj_vm.h"
#define GCSTEPSIZE 1024u
@@ -40,7 +42,8 @@
/* Mark a TValue (if needed). */
#define gc_marktv(g, tv) \
- { lua_assert(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct)); \
+ { lj_assertG(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct), \
+ "TValue and GC type mismatch"); \
if (tviswhite(tv)) gc_mark(g, gcV(tv)); }
/* Mark a GCobj (if needed). */
@@ -54,21 +57,32 @@
static void gc_mark(global_State *g, GCobj *o)
{
int gct = o->gch.gct;
- lua_assert(iswhite(o) && !isdead(g, o));
+ lj_assertG(iswhite(o), "mark of non-white object");
+ lj_assertG(!isdead(g, o), "mark of dead object");
white2gray(o);
if (LJ_UNLIKELY(gct == ~LJ_TUDATA)) {
GCtab *mt = tabref(gco2ud(o)->metatable);
gray2black(o); /* Userdata are never gray. */
if (mt) gc_markobj(g, mt);
gc_markobj(g, tabref(gco2ud(o)->env));
+ if (LJ_HASBUFFER && gco2ud(o)->udtype == UDTYPE_BUFFER) {
+ SBufExt *sbx = (SBufExt *)uddata(gco2ud(o));
+ if (sbufiscow(sbx) && gcref(sbx->cowref))
+ gc_markobj(g, gcref(sbx->cowref));
+ if (gcref(sbx->dict_str))
+ gc_markobj(g, gcref(sbx->dict_str));
+ if (gcref(sbx->dict_mt))
+ gc_markobj(g, gcref(sbx->dict_mt));
+ }
} else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) {
GCupval *uv = gco2uv(o);
gc_marktv(g, uvval(uv));
if (uv->closed)
gray2black(o); /* Closed upvalues are never gray. */
} else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) {
- lua_assert(gct == ~LJ_TFUNC || gct == ~LJ_TTAB ||
- gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO);
+ lj_assertG(gct == ~LJ_TFUNC || gct == ~LJ_TTAB ||
+ gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO || gct == ~LJ_TTRACE,
+ "bad GC type %d", gct);
setgcrefr(o->gch.gclist, g->gc.gray);
setgcref(g->gc.gray, o);
}
@@ -101,7 +115,8 @@ static void gc_mark_uv(global_State *g)
{
GCupval *uv;
for (uv = uvnext(&g->uvhead); uv != &g->uvhead; uv = uvnext(uv)) {
- lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv);
+ lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv,
+ "broken upvalue chain");
if (isgray(obj2gco(uv)))
gc_marktv(g, uvval(uv));
}
@@ -196,7 +211,7 @@ static int gc_traverse_tab(global_State *g, GCtab *t)
for (i = 0; i <= hmask; i++) {
Node *n = &node[i];
if (!tvisnil(&n->val)) { /* Mark non-empty slot. */
- lua_assert(!tvisnil(&n->key));
+ lj_assertG(!tvisnil(&n->key), "mark of nil key in non-empty slot");
if (!(weak & LJ_GC_WEAKKEY)) gc_marktv(g, &n->key);
if (!(weak & LJ_GC_WEAKVAL)) gc_marktv(g, &n->val);
}
@@ -211,7 +226,8 @@ static void gc_traverse_func(global_State *g, GCfunc *fn)
gc_markobj(g, tabref(fn->c.env));
if (isluafunc(fn)) {
uint32_t i;
- lua_assert(fn->l.nupvalues <= funcproto(fn)->sizeuv);
+ lj_assertG(fn->l.nupvalues <= funcproto(fn)->sizeuv,
+ "function upvalues out of range");
gc_markobj(g, funcproto(fn));
for (i = 0; i < fn->l.nupvalues; i++) /* Mark Lua function upvalues. */
gc_markobj(g, &gcref(fn->l.uvptr[i])->uv);
@@ -227,7 +243,7 @@ static void gc_traverse_func(global_State *g, GCfunc *fn)
static void gc_marktrace(global_State *g, TraceNo traceno)
{
GCobj *o = obj2gco(traceref(G2J(g), traceno));
- lua_assert(traceno != G2J(g)->cur.traceno);
+ lj_assertG(traceno != G2J(g)->cur.traceno, "active trace escaped");
if (iswhite(o)) {
white2gray(o);
setgcrefr(o->gch.gclist, g->gc.gray);
@@ -244,6 +260,8 @@ static void gc_traverse_trace(global_State *g, GCtrace *T)
IRIns *ir = &T->ir[ref];
if (ir->o == IR_KGC)
gc_markobj(g, ir_kgc(ir));
+ if (irt_is64(ir->t) && ir->o != IR_KNULL)
+ ref++;
}
if (T->link) gc_marktrace(g, T->link);
if (T->nextroot) gc_marktrace(g, T->nextroot);
@@ -274,12 +292,12 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th)
{
TValue *frame, *top = th->top-1, *bot = tvref(th->stack);
/* Note: extra vararg frame not skipped, marks function twice (harmless). */
- for (frame = th->base-1; frame > bot; frame = frame_prev(frame)) {
+ for (frame = th->base-1; frame > bot+LJ_FR2; frame = frame_prev(frame)) {
GCfunc *fn = frame_func(frame);
TValue *ftop = frame;
if (isluafunc(fn)) ftop += funcproto(fn)->framesize;
if (ftop > top) top = ftop;
- gc_markobj(g, fn); /* Need to mark hidden function (or L). */
+ if (!LJ_FR2) gc_markobj(g, fn); /* Need to mark hidden function (or L). */
}
top++; /* Correct bias of -1 (frame == base-1). */
if (top > tvref(th->maxstack)) top = tvref(th->maxstack);
@@ -290,7 +308,7 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th)
static void gc_traverse_thread(global_State *g, lua_State *th)
{
TValue *o, *top = th->top;
- for (o = tvref(th->stack)+1; o < top; o++)
+ for (o = tvref(th->stack)+1+LJ_FR2; o < top; o++)
gc_marktv(g, o);
if (g->gc.state == GCSatomic) {
top = tvref(th->stack) + th->stacksize;
@@ -306,7 +324,7 @@ static size_t propagatemark(global_State *g)
{
GCobj *o = gcref(g->gc.gray);
int gct = o->gch.gct;
- lua_assert(isgray(o));
+ lj_assertG(isgray(o), "propagation of non-gray object");
gray2black(o);
setgcrefr(g->gc.gray, o->gch.gclist); /* Remove from gray list. */
if (LJ_LIKELY(gct == ~LJ_TTAB)) {
@@ -338,7 +356,7 @@ static size_t propagatemark(global_State *g)
return ((sizeof(GCtrace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) +
T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(SnapEntry);
#else
- lua_assert(0);
+ lj_assertG(0, "bad GC type %d", gct);
return 0;
#endif
}
@@ -355,15 +373,6 @@ static size_t gc_propagate_gray(global_State *g)
/* -- Sweep phase --------------------------------------------------------- */
-/* Try to shrink some common data structures. */
-static void gc_shrink(global_State *g, lua_State *L)
-{
- if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1)
- lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */
- if (g->tmpbuf.sz > LJ_MIN_SBUF*2)
- lj_str_resizebuf(L, &g->tmpbuf, g->tmpbuf.sz >> 1); /* Shrink temp buf. */
-}
-
/* Type of GC free functions. */
typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o);
@@ -389,7 +398,7 @@ static const GCFreeFunc gc_freefunc[] = {
};
/* Full sweep of a GC list. */
-#define gc_fullsweep(g, p) gc_sweep(g, (p), LJ_MAX_MEM)
+#define gc_fullsweep(g, p) gc_sweep(g, (p), ~(uint32_t)0)
/* Partial sweep of a GC list. */
static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
@@ -401,11 +410,13 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
if (o->gch.gct == ~LJ_TTHREAD) /* Need to sweep open upvalues, too. */
gc_fullsweep(g, &gco2th(o)->openupval);
if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */
- lua_assert(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED));
+ lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED),
+ "sweep of undead object");
makewhite(g, o); /* Value is alive, change to the current white. */
p = &o->gch.nextgc;
} else { /* Otherwise value is dead, free it. */
- lua_assert(isdead(g, o) || ow == LJ_GC_SFIXED);
+ lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED,
+ "sweep of unlive object");
setgcrefr(*p, o->gch.nextgc);
if (o == gcref(g->gc.root))
setgcrefr(g->gc.root, o->gch.nextgc); /* Adjust list anchor. */
@@ -415,6 +426,32 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim)
return p;
}
+/* Sweep one string interning table chain. Preserves hashalg bit. */
+static void gc_sweepstr(global_State *g, GCRef *chain)
+{
+ /* Mask with other white and LJ_GC_FIXED. Or LJ_GC_SFIXED on shutdown. */
+ int ow = otherwhite(g);
+ uintptr_t u = gcrefu(*chain);
+ GCRef q;
+ GCRef *p = &q;
+ GCobj *o;
+ setgcrefp(q, (u & ~(uintptr_t)1));
+ while ((o = gcref(*p)) != NULL) {
+ if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */
+ lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED),
+ "sweep of undead string");
+ makewhite(g, o); /* String is alive, change to the current white. */
+ p = &o->gch.nextgc;
+ } else { /* Otherwise string is dead, free it. */
+ lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED,
+ "sweep of unlive string");
+ setgcrefr(*p, o->gch.nextgc);
+ lj_str_free(g, gco2str(o));
+ }
+ }
+ setgcrefp(*chain, (gcrefu(q) | (u & 1)));
+}
+
/* Check whether we can clear a key or a value slot from a table. */
static int gc_mayclear(cTValue *o, int val)
{
@@ -432,11 +469,12 @@ static int gc_mayclear(cTValue *o, int val)
}
/* Clear collected entries from weak tables. */
-static void gc_clearweak(GCobj *o)
+static void gc_clearweak(global_State *g, GCobj *o)
{
+ UNUSED(g);
while (o) {
GCtab *t = gco2tab(o);
- lua_assert((t->marked & LJ_GC_WEAK));
+ lj_assertG((t->marked & LJ_GC_WEAK), "clear of non-weak table");
if ((t->marked & LJ_GC_WEAKVAL)) {
MSize i, asize = t->asize;
for (i = 0; i < asize; i++) {
@@ -467,18 +505,21 @@ static void gc_call_finalizer(global_State *g, lua_State *L,
{
/* Save and restore lots of state around the __gc callback. */
uint8_t oldh = hook_save(g);
- MSize oldt = g->gc.threshold;
+ GCSize oldt = g->gc.threshold;
int errcode;
TValue *top;
lj_trace_abort(g);
- top = L->top;
- L->top = top+2;
hook_entergc(g); /* Disable hooks and new traces during __gc. */
+ if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g);
g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */
- copyTV(L, top, mo);
- setgcV(L, top+1, o, ~o->gch.gct);
- errcode = lj_vm_pcall(L, top+1, 1+0, -1); /* Stack: |mo|o| -> | */
+ top = L->top;
+ copyTV(L, top++, mo);
+ if (LJ_FR2) setnilV(top++);
+ setgcV(L, top, o, ~o->gch.gct);
+ L->top = top+1;
+ errcode = lj_vm_pcall(L, top, 1+0, -1); /* Stack: |mo|o| -> | */
hook_restore(g, oldh);
+ if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g);
g->gc.threshold = oldt; /* Restore GC threshold. */
if (errcode)
lj_err_throw(L, errcode); /* Propagate errors. */
@@ -490,7 +531,7 @@ static void gc_finalize(lua_State *L)
global_State *g = G(L);
GCobj *o = gcnext(gcref(g->gc.mmudata));
cTValue *mo;
- lua_assert(gcref(g->jit_L) == NULL); /* Must not be called on trace. */
+ lj_assertG(tvref(g->jit_base) == NULL, "finalizer called on trace");
/* Unchain from list of userdata to be finalized. */
if (o == gcref(g->gc.mmudata))
setgcrefnull(g->gc.mmudata);
@@ -565,9 +606,9 @@ void lj_gc_freeall(global_State *g)
/* Free everything, except super-fixed objects (the main thread). */
g->gc.currentwhite = LJ_GC_WHITES | LJ_GC_SFIXED;
gc_fullsweep(g, &g->gc.root);
- strmask = g->strmask;
+ strmask = g->str.mask;
for (i = 0; i <= strmask; i++) /* Free all string hash chains. */
- gc_fullsweep(g, &g->strhash[i]);
+ gc_sweepstr(g, &g->str.tab[i]);
}
/* -- Collector ----------------------------------------------------------- */
@@ -582,7 +623,7 @@ static void atomic(global_State *g, lua_State *L)
setgcrefr(g->gc.gray, g->gc.weak); /* Empty the list of weak tables. */
setgcrefnull(g->gc.weak);
- lua_assert(!iswhite(obj2gco(mainthread(g))));
+ lj_assertG(!iswhite(obj2gco(mainthread(g))), "main thread turned white");
gc_markobj(g, L); /* Mark running thread. */
gc_traverse_curtrace(g); /* Traverse current trace. */
gc_mark_gcroot(g); /* Mark GC roots (again). */
@@ -597,13 +638,15 @@ static void atomic(global_State *g, lua_State *L)
udsize += gc_propagate_gray(g); /* And propagate the marks. */
/* All marking done, clear weak tables. */
- gc_clearweak(gcref(g->gc.weak));
+ gc_clearweak(g, gcref(g->gc.weak));
+
+ lj_buf_shrink(L, &g->tmpbuf); /* Shrink temp buffer. */
/* Prepare for sweep phase. */
g->gc.currentwhite = (uint8_t)otherwhite(g); /* Flip current white. */
g->strempty.marked = g->gc.currentwhite;
setmref(g->gc.sweep, &g->gc.root);
- g->gc.estimate = g->gc.total - (MSize)udsize; /* Initial estimate. */
+ g->gc.estimate = g->gc.total - (GCSize)udsize; /* Initial estimate. */
}
/* GC state machine. Returns a cost estimate for each step performed. */
@@ -620,28 +663,29 @@ static size_t gc_onestep(lua_State *L)
g->gc.state = GCSatomic; /* End of mark phase. */
return 0;
case GCSatomic:
- if (gcref(g->jit_L)) /* Don't run atomic phase on trace. */
+ if (tvref(g->jit_base)) /* Don't run atomic phase on trace. */
return LJ_MAX_MEM;
atomic(g, L);
g->gc.state = GCSsweepstring; /* Start of sweep phase. */
g->gc.sweepstr = 0;
return 0;
case GCSsweepstring: {
- MSize old = g->gc.total;
- gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */
- if (g->gc.sweepstr > g->strmask)
+ GCSize old = g->gc.total;
+ gc_sweepstr(g, &g->str.tab[g->gc.sweepstr++]); /* Sweep one chain. */
+ if (g->gc.sweepstr > g->str.mask)
g->gc.state = GCSsweep; /* All string hash chains sweeped. */
- lua_assert(old >= g->gc.total);
+ lj_assertG(old >= g->gc.total, "sweep increased memory");
g->gc.estimate -= old - g->gc.total;
return GCSWEEPCOST;
}
case GCSsweep: {
- MSize old = g->gc.total;
+ GCSize old = g->gc.total;
setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX));
- lua_assert(old >= g->gc.total);
+ lj_assertG(old >= g->gc.total, "sweep increased memory");
g->gc.estimate -= old - g->gc.total;
if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) {
- gc_shrink(g, L);
+ if (g->str.num <= (g->str.mask >> 2) && g->str.mask > LJ_MIN_STRTAB*2-1)
+ lj_str_resize(L, g->str.mask >> 1); /* Shrink string table. */
if (gcref(g->gc.mmudata)) { /* Need any finalizations? */
g->gc.state = GCSfinalize;
#if LJ_HASFFI
@@ -656,9 +700,12 @@ static size_t gc_onestep(lua_State *L)
}
case GCSfinalize:
if (gcref(g->gc.mmudata) != NULL) {
- if (gcref(g->jit_L)) /* Don't call finalizers on trace. */
+ GCSize old = g->gc.total;
+ if (tvref(g->jit_base)) /* Don't call finalizers on trace. */
return LJ_MAX_MEM;
gc_finalize(L); /* Finalize one userdata object. */
+ if (old >= g->gc.total && g->gc.estimate > old - g->gc.total)
+ g->gc.estimate -= old - g->gc.total;
if (g->gc.estimate > GCFINALIZECOST)
g->gc.estimate -= GCFINALIZECOST;
return GCFINALIZECOST;
@@ -670,7 +717,7 @@ static size_t gc_onestep(lua_State *L)
g->gc.debt = 0;
return 0;
default:
- lua_assert(0);
+ lj_assertG(0, "bad GC state");
return 0;
}
}
@@ -679,7 +726,7 @@ static size_t gc_onestep(lua_State *L)
int LJ_FASTCALL lj_gc_step(lua_State *L)
{
global_State *g = G(L);
- MSize lim;
+ GCSize lim;
int32_t ostate = g->vmstate;
setvmstate(g, GC);
lim = (GCSTEPSIZE/100) * g->gc.stepmul;
@@ -688,13 +735,13 @@ int LJ_FASTCALL lj_gc_step(lua_State *L)
if (g->gc.total > g->gc.threshold)
g->gc.debt += g->gc.total - g->gc.threshold;
do {
- lim -= (MSize)gc_onestep(L);
+ lim -= (GCSize)gc_onestep(L);
if (g->gc.state == GCSpause) {
g->gc.threshold = (g->gc.estimate/100) * g->gc.pause;
g->vmstate = ostate;
return 1; /* Finished a GC cycle. */
}
- } while ((int32_t)lim > 0);
+ } while (sizeof(lim) == 8 ? ((int64_t)lim > 0) : ((int32_t)lim > 0));
if (g->gc.debt < GCSTEPSIZE) {
g->gc.threshold = g->gc.total + GCSTEPSIZE;
g->vmstate = ostate;
@@ -718,8 +765,8 @@ void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L)
/* Perform multiple GC steps. Called from JIT-compiled code. */
int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps)
{
- lua_State *L = gco2th(gcref(g->jit_L));
- L->base = mref(G(L)->jit_base, TValue);
+ lua_State *L = gco2th(gcref(g->cur_L));
+ L->base = tvref(G(L)->jit_base);
L->top = curr_topL(L);
while (steps-- > 0 && lj_gc_step(L) == 0)
;
@@ -744,7 +791,8 @@ void lj_gc_fullgc(lua_State *L)
}
while (g->gc.state == GCSsweepstring || g->gc.state == GCSsweep)
gc_onestep(L); /* Finish sweep. */
- lua_assert(g->gc.state == GCSfinalize || g->gc.state == GCSpause);
+ lj_assertG(g->gc.state == GCSfinalize || g->gc.state == GCSpause,
+ "bad GC state");
/* Now perform a full GC. */
g->gc.state = GCSpause;
do { gc_onestep(L); } while (g->gc.state != GCSpause);
@@ -757,9 +805,11 @@ void lj_gc_fullgc(lua_State *L)
/* Move the GC propagation frontier forward. */
void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v)
{
- lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o));
- lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause);
- lua_assert(o->gch.gct != ~LJ_TTAB);
+ lj_assertG(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o),
+ "bad object states for forward barrier");
+ lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause,
+ "bad GC state");
+ lj_assertG(o->gch.gct != ~LJ_TTAB, "barrier object is not a table");
/* Preserve invariant during propagation. Otherwise it doesn't matter. */
if (g->gc.state == GCSpropagate || g->gc.state == GCSatomic)
gc_mark(g, v); /* Move frontier forward. */
@@ -796,7 +846,8 @@ void lj_gc_closeuv(global_State *g, GCupval *uv)
lj_gc_barrierf(g, o, gcV(&uv->tv));
} else {
makewhite(g, o); /* Make it white, i.e. sweep the upvalue. */
- lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause);
+ lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause,
+ "bad GC state");
}
}
}
@@ -813,27 +864,29 @@ void lj_gc_barriertrace(global_State *g, uint32_t traceno)
/* -- Allocator ----------------------------------------------------------- */
/* Call pluggable memory allocator to allocate or resize a fragment. */
-void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz)
+void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz)
{
global_State *g = G(L);
- lua_assert((osz == 0) == (p == NULL));
+ lj_assertG((osz == 0) == (p == NULL), "realloc API violation");
p = g->allocf(g->allocd, p, osz, nsz);
if (p == NULL && nsz > 0)
lj_err_mem(L);
- lua_assert((nsz == 0) == (p == NULL));
- lua_assert(checkptr32(p));
+ lj_assertG((nsz == 0) == (p == NULL), "allocf API violation");
+ lj_assertG(checkptrGC(p),
+ "allocated memory address %p outside required range", p);
g->gc.total = (g->gc.total - osz) + nsz;
return p;
}
/* Allocate new GC object and link it to the root set. */
-void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size)
+void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size)
{
global_State *g = G(L);
GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size);
if (o == NULL)
lj_err_mem(L);
- lua_assert(checkptr32(o));
+ lj_assertG(checkptrGC(o),
+ "allocated memory address %p outside required range", o);
g->gc.total += size;
setgcrefr(o->gch.nextgc, g->gc.root);
setgcref(g->gc.root, o);
diff --git a/src/lj_gc.h b/src/lj_gc.h
index c211e072..0df7dee6 100644
--- a/src/lj_gc.h
+++ b/src/lj_gc.h
@@ -81,8 +81,10 @@ LJ_FUNC void lj_gc_barriertrace(global_State *g, uint32_t traceno);
static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t)
{
GCobj *o = obj2gco(t);
- lua_assert(isblack(o) && !isdead(g, o));
- lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause);
+ lj_assertG(isblack(o) && !isdead(g, o),
+ "bad object states for backward barrier");
+ lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause,
+ "bad GC state");
black2gray(o);
setgcrefr(t->gclist, g->gc.grayagain);
setgcref(g->gc.grayagain, o);
@@ -107,8 +109,8 @@ static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t)
lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); }
/* Allocator. */
-LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz);
-LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size);
+LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz);
+LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size);
LJ_FUNC void *lj_mem_grow(lua_State *L, void *p,
MSize *szp, MSize lim, MSize esz);
@@ -116,13 +118,13 @@ LJ_FUNC void *lj_mem_grow(lua_State *L, void *p,
static LJ_AINLINE void lj_mem_free(global_State *g, void *p, size_t osize)
{
- g->gc.total -= (MSize)osize;
+ g->gc.total -= (GCSize)osize;
g->allocf(g->allocd, p, osize, 0);
}
-#define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (MSize)((n)*sizeof(t))))
+#define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (GCSize)((n)*sizeof(t))))
#define lj_mem_reallocvec(L, p, on, n, t) \
- ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (MSize)((n)*sizeof(t))))
+ ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (GCSize)((n)*sizeof(t))))
#define lj_mem_growvec(L, p, n, m, t) \
((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t)))
#define lj_mem_freevec(g, p, n, t) lj_mem_free(g, (p), (n)*sizeof(t))
diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c
index e4b68375..c50d0d4c 100644
--- a/src/lj_gdbjit.c
+++ b/src/lj_gdbjit.c
@@ -14,6 +14,8 @@
#include "lj_err.h"
#include "lj_debug.h"
#include "lj_frame.h"
+#include "lj_buf.h"
+#include "lj_strfmt.h"
#include "lj_jit.h"
#include "lj_dispatch.h"
@@ -294,6 +296,9 @@ enum {
#elif LJ_TARGET_ARM
DW_REG_SP = 13,
DW_REG_RA = 14,
+#elif LJ_TARGET_ARM64
+ DW_REG_SP = 31,
+ DW_REG_RA = 30,
#elif LJ_TARGET_PPC
DW_REG_SP = 1,
DW_REG_RA = 65,
@@ -358,7 +363,7 @@ static const ELFheader elfhdr_template = {
.eosabi = 12,
#elif defined(__DragonFly__)
.eosabi = 0,
-#elif (defined(__sun__) && defined(__svr4__))
+#elif LJ_TARGET_SOLARIS
.eosabi = 6,
#else
.eosabi = 0,
@@ -372,6 +377,8 @@ static const ELFheader elfhdr_template = {
.machine = 62,
#elif LJ_TARGET_ARM
.machine = 40,
+#elif LJ_TARGET_ARM64
+ .machine = 183,
#elif LJ_TARGET_PPC
.machine = 20,
#elif LJ_TARGET_MIPS
@@ -428,16 +435,6 @@ static void gdbjit_catnum(GDBJITctx *ctx, uint32_t n)
*ctx->p++ = '0' + n;
}
-/* Add a ULEB128 value. */
-static void gdbjit_uleb128(GDBJITctx *ctx, uint32_t v)
-{
- uint8_t *p = ctx->p;
- for (; v >= 0x80; v >>= 7)
- *p++ = (uint8_t)((v & 0x7f) | 0x80);
- *p++ = (uint8_t)v;
- ctx->p = p;
-}
-
/* Add a SLEB128 value. */
static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
{
@@ -454,7 +451,7 @@ static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v)
#define DU16(x) (*(uint16_t *)p = (x), p += 2)
#define DU32(x) (*(uint32_t *)p = (x), p += 4)
#define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t))
-#define DUV(x) (ctx->p = p, gdbjit_uleb128(ctx, (x)), p = ctx->p)
+#define DUV(x) (p = (uint8_t *)lj_strfmt_wuleb128((char *)p, (x)))
#define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p)
#define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p)
#define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop
@@ -564,13 +561,20 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx)
DB(DW_CFA_offset|DW_REG_15); DUV(4);
DB(DW_CFA_offset|DW_REG_14); DUV(5);
/* Extra registers saved for JIT-compiled code. */
- DB(DW_CFA_offset|DW_REG_13); DUV(9);
- DB(DW_CFA_offset|DW_REG_12); DUV(10);
+ DB(DW_CFA_offset|DW_REG_13); DUV(LJ_GC64 ? 10 : 9);
+ DB(DW_CFA_offset|DW_REG_12); DUV(LJ_GC64 ? 11 : 10);
#elif LJ_TARGET_ARM
{
int i;
for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); }
}
+#elif LJ_TARGET_ARM64
+ {
+ int i;
+ DB(DW_CFA_offset|31); DUV(2);
+ for (i = 28; i >= 19; i--) { DB(DW_CFA_offset|i); DUV(3+(28-i)); }
+ for (i = 15; i >= 8; i--) { DB(DW_CFA_offset|32|i); DUV(28-i); }
+ }
#elif LJ_TARGET_PPC
{
int i;
@@ -720,13 +724,27 @@ static void gdbjit_buildobj(GDBJITctx *ctx)
SECTALIGN(ctx->p, sizeof(uintptr_t));
gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame, gdbjit_ehframe);
ctx->objsize = (size_t)((char *)ctx->p - (char *)obj);
- lua_assert(ctx->objsize < sizeof(GDBJITobj));
+ lj_assertX(ctx->objsize < sizeof(GDBJITobj), "GDBJITobj overflow");
}
#undef SECTALIGN
/* -- Interface to GDB JIT API -------------------------------------------- */
+static int gdbjit_lock;
+
+static void gdbjit_lock_acquire()
+{
+ while (__sync_lock_test_and_set(&gdbjit_lock, 1)) {
+ /* Just spin; futexes or pthreads aren't worth the portability cost. */
+ }
+}
+
+static void gdbjit_lock_release()
+{
+ __sync_lock_release(&gdbjit_lock);
+}
+
/* Add new entry to GDB JIT symbol chain. */
static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
{
@@ -738,6 +756,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
ctx->T->gdbjit_entry = (void *)eo;
/* Link new entry to chain and register it. */
eo->entry.prev_entry = NULL;
+ gdbjit_lock_acquire();
eo->entry.next_entry = __jit_debug_descriptor.first_entry;
if (eo->entry.next_entry)
eo->entry.next_entry->prev_entry = &eo->entry;
@@ -747,6 +766,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx)
__jit_debug_descriptor.relevant_entry = &eo->entry;
__jit_debug_descriptor.action_flag = GDBJIT_REGISTER;
__jit_debug_register_code();
+ gdbjit_lock_release();
}
/* Add debug info for newly compiled trace and notify GDB. */
@@ -762,7 +782,8 @@ void lj_gdbjit_addtrace(jit_State *J, GCtrace *T)
ctx.spadjp = CFRAME_SIZE_JIT +
(MSize)(parent ? traceref(J, parent)->spadjust : 0);
ctx.spadj = CFRAME_SIZE_JIT + T->spadjust;
- lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc);
+ lj_assertJ(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc,
+ "start PC out of range");
ctx.lineno = lj_debug_line(pt, proto_bcpos(pt, startpc));
ctx.filename = proto_chunknamestr(pt);
if (*ctx.filename == '@' || *ctx.filename == '=')
@@ -778,6 +799,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T)
{
GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry;
if (eo) {
+ gdbjit_lock_acquire();
if (eo->entry.prev_entry)
eo->entry.prev_entry->next_entry = eo->entry.next_entry;
else
@@ -787,6 +809,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T)
__jit_debug_descriptor.relevant_entry = &eo->entry;
__jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER;
__jit_debug_register_code();
+ gdbjit_lock_release();
lj_mem_free(J2G(J), eo, eo->sz);
}
}
diff --git a/src/lj_ir.c b/src/lj_ir.c
index b2846680..65901510 100644
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -15,6 +15,7 @@
#if LJ_HASJIT
#include "lj_gc.h"
+#include "lj_buf.h"
#include "lj_str.h"
#include "lj_tab.h"
#include "lj_ir.h"
@@ -29,14 +30,16 @@
#endif
#include "lj_vm.h"
#include "lj_strscan.h"
-#include "lj_lib.h"
+#include "lj_serialize.h"
+#include "lj_strfmt.h"
+#include "lj_prng.h"
/* Some local macros to save typing. Undef'd at the end. */
#define IR(ref) (&J->cur.ir[(ref)])
#define fins (&J->fold.ins)
/* Pass IR on to next optimization in chain (FOLD). */
-#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
+#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
/* -- IR tables ----------------------------------------------------------- */
@@ -88,8 +91,9 @@ static void lj_ir_growbot(jit_State *J)
{
IRIns *baseir = J->irbuf + J->irbotlim;
MSize szins = J->irtoplim - J->irbotlim;
- lua_assert(szins != 0);
- lua_assert(J->cur.nk == J->irbotlim);
+ lj_assertJ(szins != 0, "zero IR size");
+ lj_assertJ(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim,
+ "unexpected IR growth");
if (J->cur.nins + (szins >> 1) < J->irtoplim) {
/* More than half of the buffer is free on top: shift up by a quarter. */
MSize ofs = szins >> 2;
@@ -143,6 +147,17 @@ TRef lj_ir_call(jit_State *J, IRCallID id, ...)
return emitir(CCI_OPTYPE(ci), tr, id);
}
+/* Load field of type t from GG_State + offset. Must be 32 bit aligned. */
+TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs)
+{
+ lj_assertJ((ofs & 3) == 0, "unaligned GG_State field offset");
+ ofs >>= 2;
+ lj_assertJ(ofs >= IRFL__MAX && ofs <= 0x3ff,
+ "GG_State field offset breaks 10 bit FOLD key limit");
+ lj_ir_set(J, IRT(IR_FLOAD, t), REF_NIL, ofs);
+ return lj_opt_fold(J);
+}
+
/* -- Interning of constants ---------------------------------------------- */
/*
@@ -163,6 +178,24 @@ static LJ_AINLINE IRRef ir_nextk(jit_State *J)
return ref;
}
+/* Get ref of next 64 bit IR constant and optionally grow IR.
+** Note: this may invalidate all IRIns *!
+*/
+static LJ_AINLINE IRRef ir_nextk64(jit_State *J)
+{
+ IRRef ref = J->cur.nk - 2;
+ lj_assertJ(J->state != LJ_TRACE_ASM, "bad JIT state");
+ if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J);
+ J->cur.nk = ref;
+ return ref;
+}
+
+#if LJ_GC64
+#define ir_nextkgc ir_nextk64
+#else
+#define ir_nextkgc ir_nextk
+#endif
+
/* Intern int32_t constant. */
TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k)
{
@@ -182,79 +215,21 @@ found:
return TREF(ref, IRT_INT);
}
-/* The MRef inside the KNUM/KINT64 IR instructions holds the address of the
-** 64 bit constant. The constants themselves are stored in a chained array
-** and shared across traces.
-**
-** Rationale for choosing this data structure:
-** - The address of the constants is embedded in the generated machine code
-** and must never move. A resizable array or hash table wouldn't work.
-** - Most apps need very few non-32 bit integer constants (less than a dozen).
-** - Linear search is hard to beat in terms of speed and low complexity.
-*/
-typedef struct K64Array {
- MRef next; /* Pointer to next list. */
- MSize numk; /* Number of used elements in this array. */
- TValue k[LJ_MIN_K64SZ]; /* Array of constants. */
-} K64Array;
-
-/* Free all chained arrays. */
-void lj_ir_k64_freeall(jit_State *J)
-{
- K64Array *k;
- for (k = mref(J->k64, K64Array); k; ) {
- K64Array *next = mref(k->next, K64Array);
- lj_mem_free(J2G(J), k, sizeof(K64Array));
- k = next;
- }
-}
-
-/* Find 64 bit constant in chained array or add it. */
-cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64)
-{
- K64Array *k, *kp = NULL;
- TValue *ntv;
- MSize idx;
- /* Search for the constant in the whole chain of arrays. */
- for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) {
- kp = k; /* Remember previous element in list. */
- for (idx = 0; idx < k->numk; idx++) { /* Search one array. */
- TValue *tv = &k->k[idx];
- if (tv->u64 == u64) /* Needed for +-0/NaN/absmask. */
- return tv;
- }
- }
- /* Constant was not found, need to add it. */
- if (!(kp && kp->numk < LJ_MIN_K64SZ)) { /* Allocate a new array. */
- K64Array *kn = lj_mem_newt(J->L, sizeof(K64Array), K64Array);
- setmref(kn->next, NULL);
- kn->numk = 0;
- if (kp)
- setmref(kp->next, kn); /* Chain to the end of the list. */
- else
- setmref(J->k64, kn); /* Link first array. */
- kp = kn;
- }
- ntv = &kp->k[kp->numk++]; /* Add to current array. */
- ntv->u64 = u64;
- return ntv;
-}
-
-/* Intern 64 bit constant, given by its address. */
-TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv)
+/* Intern 64 bit constant, given by its 64 bit pattern. */
+TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64)
{
IRIns *ir, *cir = J->cur.ir;
IRRef ref;
IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64;
for (ref = J->chain[op]; ref; ref = cir[ref].prev)
- if (ir_k64(&cir[ref]) == tv)
+ if (ir_k64(&cir[ref])->u64 == u64)
goto found;
- ref = ir_nextk(J);
+ ref = ir_nextk64(J);
ir = IR(ref);
- lua_assert(checkptr32(tv));
- setmref(ir->ptr, tv);
+ ir[1].tv.u64 = u64;
ir->t.irt = t;
ir->o = op;
+ ir->op12 = 0;
ir->prev = J->chain[op];
J->chain[op] = (IRRef1)ref;
found:
@@ -264,13 +239,13 @@ found:
/* Intern FP constant, given by its 64 bit pattern. */
TRef lj_ir_knum_u64(jit_State *J, uint64_t u64)
{
- return lj_ir_k64(J, IR_KNUM, lj_ir_k64_find(J, u64));
+ return lj_ir_k64(J, IR_KNUM, u64);
}
/* Intern 64 bit integer constant. */
TRef lj_ir_kint64(jit_State *J, uint64_t u64)
{
- return lj_ir_k64(J, IR_KINT64, lj_ir_k64_find(J, u64));
+ return lj_ir_k64(J, IR_KINT64, u64);
}
/* Check whether a number is int and return it. -0 is NOT considered an int. */
@@ -305,14 +280,15 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t)
{
IRIns *ir, *cir = J->cur.ir;
IRRef ref;
- lua_assert(!isdead(J2G(J), o));
+ lj_assertJ(!isdead(J2G(J), o), "interning of dead GC object");
for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev)
if (ir_kgc(&cir[ref]) == o)
goto found;
- ref = ir_nextk(J);
+ ref = ir_nextkgc(J);
ir = IR(ref);
/* NOBARRIER: Current trace is a GC root. */
- setgcref(ir->gcr, o);
+ ir->op12 = 0;
+ setgcref(ir[LJ_GC64].gcr, o);
ir->t.irt = (uint8_t)t;
ir->o = IR_KGC;
ir->prev = J->chain[IR_KGC];
@@ -321,24 +297,44 @@ found:
return TREF(ref, t);
}
-/* Intern 32 bit pointer constant. */
+/* Allocate GCtrace constant placeholder (no interning). */
+TRef lj_ir_ktrace(jit_State *J)
+{
+ IRRef ref = ir_nextkgc(J);
+ IRIns *ir = IR(ref);
+ lj_assertJ(irt_toitype_(IRT_P64) == LJ_TTRACE, "mismatched type mapping");
+ ir->t.irt = IRT_P64;
+ ir->o = LJ_GC64 ? IR_KNUM : IR_KNULL; /* Not IR_KGC yet, but same size. */
+ ir->op12 = 0;
+ ir->prev = 0;
+ return TREF(ref, IRT_P64);
+}
+
+/* Intern pointer constant. */
TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr)
{
IRIns *ir, *cir = J->cur.ir;
IRRef ref;
- lua_assert((void *)(intptr_t)i32ptr(ptr) == ptr);
+#if LJ_64 && !LJ_GC64
+ lj_assertJ((void *)(uintptr_t)u32ptr(ptr) == ptr, "out-of-range GC pointer");
+#endif
for (ref = J->chain[op]; ref; ref = cir[ref].prev)
- if (mref(cir[ref].ptr, void) == ptr)
+ if (ir_kptr(&cir[ref]) == ptr)
goto found;
+#if LJ_GC64
+ ref = ir_nextk64(J);
+#else
ref = ir_nextk(J);
+#endif
ir = IR(ref);
- setmref(ir->ptr, ptr);
- ir->t.irt = IRT_P32;
+ ir->op12 = 0;
+ setmref(ir[LJ_GC64].ptr, ptr);
+ ir->t.irt = IRT_PGC;
ir->o = op;
ir->prev = J->chain[op];
J->chain[op] = (IRRef1)ref;
found:
- return TREF(ref, IRT_P32);
+ return TREF(ref, IRT_PGC);
}
/* Intern typed NULL constant. */
@@ -367,7 +363,8 @@ TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot)
IRRef2 op12 = IRREF2((IRRef1)key, (IRRef1)slot);
IRRef ref;
/* Const part is not touched by CSE/DCE, so 0-65535 is ok for IRMlit here. */
- lua_assert(tref_isk(key) && slot == (IRRef)(IRRef1)slot);
+ lj_assertJ(tref_isk(key) && slot == (IRRef)(IRRef1)slot,
+ "out-of-range key/slot");
for (ref = J->chain[IR_KSLOT]; ref; ref = cir[ref].prev)
if (cir[ref].op12 == op12)
goto found;
@@ -388,14 +385,15 @@ found:
void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir)
{
UNUSED(L);
- lua_assert(ir->o != IR_KSLOT); /* Common mistake. */
+ lj_assertL(ir->o != IR_KSLOT, "unexpected KSLOT"); /* Common mistake. */
switch (ir->o) {
- case IR_KPRI: setitype(tv, irt_toitype(ir->t)); break;
+ case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break;
case IR_KINT: setintV(tv, ir->i); break;
case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break;
- case IR_KPTR: case IR_KKPTR: case IR_KNULL:
- setlightudV(tv, mref(ir->ptr, void));
+ case IR_KPTR: case IR_KKPTR:
+ setnumV(tv, (lua_Number)(uintptr_t)ir_kptr(ir));
break;
+ case IR_KNULL: setintV(tv, 0); break;
case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break;
#if LJ_HASFFI
case IR_KINT64: {
@@ -405,7 +403,7 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir)
break;
}
#endif
- default: lua_assert(0); break;
+ default: lj_assertL(0, "bad IR constant op %d", ir->o); break;
}
}
@@ -443,7 +441,8 @@ TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr)
if (!tref_isstr(tr)) {
if (!tref_isnumber(tr))
lj_trace_err(J, LJ_TRERR_BADTYPE);
- tr = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0);
+ tr = emitir(IRT(IR_TOSTR, IRT_STR), tr,
+ tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT);
}
return tr;
}
@@ -464,7 +463,7 @@ int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op)
case IR_UGE: return !(a < b);
case IR_ULE: return !(a > b);
case IR_UGT: return !(a <= b);
- default: lua_assert(0); return 0;
+ default: lj_assertX(0, "bad IR op %d", op); return 0;
}
}
@@ -477,7 +476,7 @@ int lj_ir_strcmp(GCstr *a, GCstr *b, IROp op)
case IR_GE: return (res >= 0);
case IR_LE: return (res <= 0);
case IR_GT: return (res > 0);
- default: lua_assert(0); return 0;
+ default: lj_assertX(0, "bad IR op %d", op); return 0;
}
}
diff --git a/src/lj_ir.h b/src/lj_ir.h
index da73a4b7..ed492e93 100644
--- a/src/lj_ir.h
+++ b/src/lj_ir.h
@@ -40,6 +40,7 @@
_(USE, S , ref, ___) \
_(PHI, S , ref, ref) \
_(RENAME, S , ref, lit) \
+ _(PROF, S , ___, ___) \
\
/* Constants. */ \
_(KPRI, N , ___, ___) \
@@ -74,7 +75,6 @@
_(NEG, N , ref, ref) \
\
_(ABS, N , ref, ref) \
- _(ATAN2, N , ref, ref) \
_(LDEXP, N , ref, ref) \
_(MIN, C , ref, ref) \
_(MAX, C , ref, ref) \
@@ -95,7 +95,9 @@
_(UREFO, LW, ref, lit) \
_(UREFC, LW, ref, lit) \
_(FREF, R , ref, lit) \
+ _(TMPREF, S , ref, lit) \
_(STRREF, N , ref, ref) \
+ _(LREF, L , ___, ___) \
\
/* Loads and Stores. These must be in the same order. */ \
_(ALOAD, L , ref, ___) \
@@ -104,7 +106,8 @@
_(FLOAD, L , ref, lit) \
_(XLOAD, L , ref, lit) \
_(SLOAD, L , lit, lit) \
- _(VLOAD, L , ref, ___) \
+ _(VLOAD, L , ref, lit) \
+ _(ALEN, L , ref, ref) \
\
_(ASTORE, S , ref, ref) \
_(HSTORE, S , ref, ref) \
@@ -120,6 +123,11 @@
_(CNEW, AW, ref, ref) \
_(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \
\
+ /* Buffer operations. */ \
+ _(BUFHDR, L , ref, lit) \
+ _(BUFPUT, LW, ref, ref) \
+ _(BUFSTR, AW, ref, ref) \
+ \
/* Barriers. */ \
_(TBAR, S , ref, ___) \
_(OBAR, S , ref, ref) \
@@ -128,12 +136,13 @@
/* Type conversions. */ \
_(CONV, N , ref, lit) \
_(TOBIT, N , ref, ref) \
- _(TOSTR, N , ref, ___) \
+ _(TOSTR, N , ref, lit) \
_(STRTO, N , ref, ___) \
\
/* Calls. */ \
- _(CALLN, N , ref, lit) \
- _(CALLL, L , ref, lit) \
+ _(CALLN, NW, ref, lit) \
+ _(CALLA, AW, ref, lit) \
+ _(CALLL, LW, ref, lit) \
_(CALLS, S , ref, lit) \
_(CALLXS, S , ref, ref) \
_(CARG, N , ref, ref) \
@@ -170,8 +179,7 @@ LJ_STATIC_ASSERT((int)IR_XLOAD + IRDELTA_L2S == (int)IR_XSTORE);
/* FPMATH sub-functions. ORDER FPM. */
#define IRFPMDEF(_) \
_(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \
- _(SQRT) _(EXP) _(EXP2) _(LOG) _(LOG2) _(LOG10) \
- _(SIN) _(COS) _(TAN) \
+ _(SQRT) _(LOG) _(LOG2) \
_(OTHER)
typedef enum {
@@ -186,6 +194,8 @@ IRFPMDEF(FPMENUM)
_(STR_LEN, offsetof(GCstr, len)) \
_(FUNC_ENV, offsetof(GCfunc, l.env)) \
_(FUNC_PC, offsetof(GCfunc, l.pc)) \
+ _(FUNC_FFID, offsetof(GCfunc, l.ffid)) \
+ _(THREAD_ENV, offsetof(lua_State, env)) \
_(TAB_META, offsetof(GCtab, metatable)) \
_(TAB_ARRAY, offsetof(GCtab, array)) \
_(TAB_NODE, offsetof(GCtab, node)) \
@@ -195,9 +205,15 @@ IRFPMDEF(FPMENUM)
_(UDATA_META, offsetof(GCudata, metatable)) \
_(UDATA_UDTYPE, offsetof(GCudata, udtype)) \
_(UDATA_FILE, sizeof(GCudata)) \
+ _(SBUF_W, sizeof(GCudata) + offsetof(SBufExt, w)) \
+ _(SBUF_E, sizeof(GCudata) + offsetof(SBufExt, e)) \
+ _(SBUF_B, sizeof(GCudata) + offsetof(SBufExt, b)) \
+ _(SBUF_L, sizeof(GCudata) + offsetof(SBufExt, L)) \
+ _(SBUF_REF, sizeof(GCudata) + offsetof(SBufExt, cowref)) \
+ _(SBUF_R, sizeof(GCudata) + offsetof(SBufExt, r)) \
_(CDATA_CTYPEID, offsetof(GCcdata, ctypeid)) \
_(CDATA_PTR, sizeof(GCcdata)) \
- _(CDATA_INT, sizeof(GCcdata)) \
+ _(CDATA_INT, sizeof(GCcdata)) \
_(CDATA_INT64, sizeof(GCcdata)) \
_(CDATA_INT64_4, sizeof(GCcdata) + 4)
@@ -208,18 +224,29 @@ IRFLDEF(FLENUM)
IRFL__MAX
} IRFieldID;
+/* TMPREF mode bits, stored in op2. */
+#define IRTMPREF_IN1 0x01 /* First input value. */
+#define IRTMPREF_OUT1 0x02 /* First output value. */
+#define IRTMPREF_OUT2 0x04 /* Second output value. */
+
/* SLOAD mode bits, stored in op2. */
#define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */
-#define IRSLOAD_FRAME 0x02 /* Load hiword of frame. */
+#define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */
#define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */
#define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */
#define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */
#define IRSLOAD_INHERIT 0x20 /* Inherited by exits/side traces. */
+#define IRSLOAD_KEYINDEX 0x40 /* Table traversal key index. */
-/* XLOAD mode, stored in op2. */
-#define IRXLOAD_READONLY 1 /* Load from read-only data. */
-#define IRXLOAD_VOLATILE 2 /* Load from volatile data. */
-#define IRXLOAD_UNALIGNED 4 /* Unaligned load. */
+/* XLOAD mode bits, stored in op2. */
+#define IRXLOAD_READONLY 0x01 /* Load from read-only data. */
+#define IRXLOAD_VOLATILE 0x02 /* Load from volatile data. */
+#define IRXLOAD_UNALIGNED 0x04 /* Unaligned load. */
+
+/* BUFHDR mode, stored in op2. */
+#define IRBUFHDR_RESET 0 /* Reset buffer. */
+#define IRBUFHDR_APPEND 1 /* Append to buffer. */
+#define IRBUFHDR_WRITE 2 /* Write to string buffer. */
/* CONV mode, stored in op2. */
#define IRCONV_SRCMASK 0x001f /* Source IRType. */
@@ -227,7 +254,6 @@ IRFLDEF(FLENUM)
#define IRCONV_DSH 5
#define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT)
#define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM)
-#define IRCONV_TRUNC 0x0400 /* Truncate number to integer. */
#define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */
#define IRCONV_MODEMASK 0x0fff
#define IRCONV_CONVMASK 0xf000
@@ -237,6 +263,12 @@ IRFLDEF(FLENUM)
#define IRCONV_ANY (1<<IRCONV_CSH) /* Any FP number is ok. */
#define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */
#define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */
+#define IRCONV_NONE IRCONV_ANY /* INT|*64 no conv, but change type. */
+
+/* TOSTR mode, stored in op2. */
+#define IRTOSTR_INT 0 /* Convert integer to string. */
+#define IRTOSTR_NUM 1 /* Convert number to string. */
+#define IRTOSTR_CHAR 2 /* Convert char value to string. */
/* -- IR operands --------------------------------------------------------- */
@@ -276,7 +308,9 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
/* -- IR instruction types ------------------------------------------------ */
-/* Map of itypes to non-negative numbers. ORDER LJ_T.
+#define IRTSIZE_PGC (LJ_GC64 ? 8 : 4)
+
+/* Map of itypes to non-negative numbers and their sizes. ORDER LJ_T.
** LJ_TUPVAL/LJ_TTRACE never appear in a TValue. Use these itypes for
** IRT_P32 and IRT_P64, which never escape the IR.
** The various integers are only used in the IR and can only escape to
@@ -284,12 +318,13 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1];
** contiguous and next to IRT_NUM (see the typerange macros below).
*/
#define IRTDEF(_) \
- _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) _(STR, 4) \
- _(P32, 4) _(THREAD, 4) _(PROTO, 4) _(FUNC, 4) _(P64, 8) _(CDATA, 4) \
- _(TAB, 4) _(UDATA, 4) \
+ _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) \
+ _(STR, IRTSIZE_PGC) _(P32, 4) _(THREAD, IRTSIZE_PGC) _(PROTO, IRTSIZE_PGC) \
+ _(FUNC, IRTSIZE_PGC) _(P64, 8) _(CDATA, IRTSIZE_PGC) _(TAB, IRTSIZE_PGC) \
+ _(UDATA, IRTSIZE_PGC) \
_(FLOAT, 4) _(NUM, 8) _(I8, 1) _(U8, 1) _(I16, 2) _(U16, 2) \
_(INT, 4) _(U32, 4) _(I64, 8) _(U64, 8) \
- _(SOFTFP, 4) /* There is room for 9 more types. */
+ _(SOFTFP, 4) /* There is room for 8 more types. */
/* IR result type and flags (8 bit). */
typedef enum {
@@ -300,6 +335,8 @@ IRTDEF(IRTENUM)
/* Native pointer type and the corresponding integer type. */
IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32,
+ IRT_PGC = LJ_GC64 ? IRT_P64 : IRT_P32,
+ IRT_IGC = LJ_GC64 ? IRT_I64 : IRT_INT,
IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT,
IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32,
@@ -353,7 +390,14 @@ typedef struct IRType1 { uint8_t irt; } IRType1;
#define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA))
#define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64))
-#if LJ_64
+#if LJ_GC64
+/* Include IRT_NIL, so IR(ASMREF_L) (aka REF_NIL) is considered 64 bit. */
+#define IRT_IS64 \
+ ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|\
+ (1u<<IRT_LIGHTUD)|(1u<<IRT_STR)|(1u<<IRT_THREAD)|(1u<<IRT_PROTO)|\
+ (1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA)|\
+ (1u<<IRT_NIL))
+#elif LJ_64
#define IRT_IS64 \
((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD))
#else
@@ -374,7 +418,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv)
return IRT_INT;
else if (tvisnum(tv))
return IRT_NUM;
-#if LJ_64
+#if LJ_64 && !LJ_GC64
else if (tvislightud(tv))
return IRT_LIGHTUD;
#endif
@@ -384,11 +428,12 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv)
static LJ_AINLINE uint32_t irt_toitype_(IRType t)
{
- lua_assert(!LJ_64 || t != IRT_LIGHTUD);
+ lj_assertX(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD,
+ "no plain type tag for lightuserdata");
if (LJ_DUALNUM && t > IRT_NUM) {
return LJ_TISNUM;
} else {
- lua_assert(t <= IRT_NUM);
+ lj_assertX(t <= IRT_NUM, "no plain type tag for IR type %d", t);
return ~(uint32_t)t;
}
}
@@ -451,6 +496,7 @@ typedef uint32_t TRef;
#define TREF_REFMASK 0x0000ffff
#define TREF_FRAME 0x00010000
#define TREF_CONT 0x00020000
+#define TREF_KEYINDEX 0x00100000
#define TREF(ref, t) ((TRef)((ref) + ((t)<<24)))
@@ -464,6 +510,7 @@ typedef uint32_t TRef;
#define tref_isnil(tr) (tref_istype((tr), IRT_NIL))
#define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE))
#define tref_istrue(tr) (tref_istype((tr), IRT_TRUE))
+#define tref_islightud(tr) (tref_istype((tr), IRT_LIGHTUD))
#define tref_isstr(tr) (tref_istype((tr), IRT_STR))
#define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC))
#define tref_iscdata(tr) (tref_istype((tr), IRT_CDATA))
@@ -496,7 +543,9 @@ typedef uint32_t TRef;
** +-------+-------+---+---+---+---+
** | op1 | op2 | t | o | r | s |
** +-------+-------+---+---+---+---+
-** | op12/i/gco | ot | prev | (alternative fields in union)
+** | op12/i/gco32 | ot | prev | (alternative fields in union)
+** +-------+-------+---+---+---+---+
+** | TValue/gco64 | (2nd IR slot for 64 bit constants)
** +---------------+-------+-------+
** 32 16 16
**
@@ -524,21 +573,27 @@ typedef union IRIns {
)
};
int32_t i; /* 32 bit signed integer literal (overlaps op12). */
- GCRef gcr; /* GCobj constant (overlaps op12). */
- MRef ptr; /* Pointer constant (overlaps op12). */
+ GCRef gcr; /* GCobj constant (overlaps op12 or entire slot). */
+ MRef ptr; /* Pointer constant (overlaps op12 or entire slot). */
+ TValue tv; /* TValue constant (overlaps entire slot). */
} IRIns;
-#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)->gcr))
+#define ir_isk64(ir) \
+ ((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \
+ (LJ_GC64 && \
+ ((ir)->o == IR_KGC || (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR)))
+
+#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[LJ_GC64].gcr))
#define ir_kstr(ir) (gco2str(ir_kgc((ir))))
#define ir_ktab(ir) (gco2tab(ir_kgc((ir))))
#define ir_kfunc(ir) (gco2func(ir_kgc((ir))))
#define ir_kcdata(ir) (gco2cd(ir_kgc((ir))))
-#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, mref((ir)->ptr, cTValue))
-#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, mref((ir)->ptr,cTValue))
-#define ir_k64(ir) \
- check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, mref((ir)->ptr,cTValue))
+#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv)
+#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv)
+#define ir_k64(ir) check_exp(ir_isk64(ir), &(ir)[1].tv)
#define ir_kptr(ir) \
- check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void))
+ check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \
+ mref((ir)[LJ_GC64].ptr, void))
/* A store or any other op with a non-weak guard has a side-effect. */
static LJ_AINLINE int ir_sideeff(IRIns *ir)
diff --git a/src/lj_ircall.h b/src/lj_ircall.h
index 9ddfb156..67fb58ae 100644
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -16,22 +16,26 @@ typedef struct CCallInfo {
uint32_t flags; /* Number of arguments and flags. */
} CCallInfo;
-#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* Extract # of args. */
+#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* # of args. */
#define CCI_NARGS_MAX 32 /* Max. # of args. */
#define CCI_OTSHIFT 16
#define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */
+#define CCI_TYPE(ci) (((ci)->flags>>CCI_OTSHIFT) & IRT_TYPE)
#define CCI_OPSHIFT 24
#define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */
#define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT)
+#define CCI_CALL_A (IR_CALLA << CCI_OPSHIFT)
#define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT)
#define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT)
#define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL)
+#define CCI_CALL_FA (CCI_CALL_A|CCI_CC_FASTCALL)
#define CCI_CALL_FL (CCI_CALL_L|CCI_CC_FASTCALL)
#define CCI_CALL_FS (CCI_CALL_S|CCI_CC_FASTCALL)
/* C call info flags. */
+#define CCI_T (IRT_GUARD << CCI_OTSHIFT) /* May throw. */
#define CCI_L 0x0100 /* Implicit L arg. */
#define CCI_CASTU64 0x0200 /* Cast u64 result to number. */
#define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */
@@ -45,6 +49,17 @@ typedef struct CCallInfo {
#define CCI_CC_FASTCALL 0x2000 /* Fastcall calling convention. */
#define CCI_CC_STDCALL 0x3000 /* Stdcall calling convention. */
+/* Extra args for SOFTFP, SPLIT 64 bit. */
+#define CCI_XARGS_SHIFT 14
+#define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3)
+#define CCI_XA (1u << CCI_XARGS_SHIFT)
+
+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
+#define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci)))
+#else
+#define CCI_XNARGS(ci) CCI_NARGS((ci))
+#endif
+
/* Helpers for conditional function definitions. */
#define IRCALLCOND_ANY(x) x
@@ -66,6 +81,18 @@ typedef struct CCallInfo {
#define IRCALLCOND_SOFTFP_FFI(x) NULL
#endif
+#if LJ_SOFTFP && LJ_TARGET_MIPS
+#define IRCALLCOND_SOFTFP_MIPS(x) x
+#else
+#define IRCALLCOND_SOFTFP_MIPS(x) NULL
+#endif
+
+#if LJ_SOFTFP && LJ_TARGET_MIPS64
+#define IRCALLCOND_SOFTFP_MIPS64(x) x
+#else
+#define IRCALLCOND_SOFTFP_MIPS64(x) NULL
+#endif
+
#define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS)
#if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64)
@@ -86,93 +113,158 @@ typedef struct CCallInfo {
#define IRCALLCOND_FFI32(x) NULL
#endif
+#if LJ_HASBUFFER
+#define IRCALLCOND_BUFFER(x) x
+#else
+#define IRCALLCOND_BUFFER(x) NULL
+#endif
+
+#if LJ_HASBUFFER && LJ_HASFFI
+#define IRCALLCOND_BUFFFI(x) x
+#else
+#define IRCALLCOND_BUFFFI(x) NULL
+#endif
+
#if LJ_SOFTFP
-#define ARG1_FP 2 /* Treat as 2 32 bit arguments. */
+#define XA_FP CCI_XA
+#define XA2_FP (CCI_XA+CCI_XA)
#else
-#define ARG1_FP 1
+#define XA_FP 0
+#define XA2_FP 0
+#endif
+
+#if LJ_SOFTFP32
+#define XA_FP32 CCI_XA
+#define XA2_FP32 (CCI_XA+CCI_XA)
+#else
+#define XA_FP32 0
+#define XA2_FP32 0
#endif
#if LJ_32
-#define ARG2_64 4 /* Treat as 4 32 bit arguments. */
+#define XA_64 CCI_XA
+#define XA2_64 (CCI_XA+CCI_XA)
#else
-#define ARG2_64 2
+#define XA_64 0
+#define XA2_64 0
#endif
/* Function definitions for CALL* instructions. */
#define IRCALLDEF(_) \
_(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \
- _(ANY, lj_str_new, 3, S, STR, CCI_L) \
+ _(ANY, lj_str_find, 4, N, PGC, 0) \
+ _(ANY, lj_str_new, 3, S, STR, CCI_L|CCI_T) \
_(ANY, lj_strscan_num, 2, FN, INT, 0) \
- _(ANY, lj_str_fromint, 2, FN, STR, CCI_L) \
- _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \
- _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \
- _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \
- _(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \
+ _(ANY, lj_strfmt_int, 2, FN, STR, CCI_L|CCI_T) \
+ _(ANY, lj_strfmt_num, 2, FN, STR, CCI_L|CCI_T) \
+ _(ANY, lj_strfmt_char, 2, FN, STR, CCI_L|CCI_T) \
+ _(ANY, lj_strfmt_putint, 2, FL, PGC, CCI_T) \
+ _(ANY, lj_strfmt_putnum, 2, FL, PGC, CCI_T) \
+ _(ANY, lj_strfmt_putquoted, 2, FL, PGC, CCI_T) \
+ _(ANY, lj_strfmt_putfxint, 3, L, PGC, XA_64|CCI_T) \
+ _(ANY, lj_strfmt_putfnum_int, 3, L, PGC, XA_FP|CCI_T) \
+ _(ANY, lj_strfmt_putfnum_uint, 3, L, PGC, XA_FP|CCI_T) \
+ _(ANY, lj_strfmt_putfnum, 3, L, PGC, XA_FP|CCI_T) \
+ _(ANY, lj_strfmt_putfstr, 3, L, PGC, CCI_T) \
+ _(ANY, lj_strfmt_putfchar, 3, L, PGC, CCI_T) \
+ _(ANY, lj_buf_putmem, 3, S, PGC, CCI_T) \
+ _(ANY, lj_buf_putstr, 2, FL, PGC, CCI_T) \
+ _(ANY, lj_buf_putchar, 2, FL, PGC, CCI_T) \
+ _(ANY, lj_buf_putstr_reverse, 2, FL, PGC, CCI_T) \
+ _(ANY, lj_buf_putstr_lower, 2, FL, PGC, CCI_T) \
+ _(ANY, lj_buf_putstr_upper, 2, FL, PGC, CCI_T) \
+ _(ANY, lj_buf_putstr_rep, 3, L, PGC, CCI_T) \
+ _(ANY, lj_buf_puttab, 5, L, PGC, CCI_T) \
+ _(BUFFER, lj_bufx_set, 4, S, NIL, 0) \
+ _(BUFFFI, lj_bufx_more, 2, FS, INT, CCI_T) \
+ _(BUFFER, lj_serialize_put, 2, FS, PGC, CCI_T) \
+ _(BUFFER, lj_serialize_get, 2, FS, PTR, CCI_T) \
+ _(BUFFER, lj_serialize_encode, 2, FA, STR, CCI_L|CCI_T) \
+ _(BUFFER, lj_serialize_decode, 3, A, INT, CCI_L|CCI_T) \
+ _(ANY, lj_buf_tostr, 1, FL, STR, CCI_T) \
+ _(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L|CCI_T) \
+ _(ANY, lj_tab_new1, 2, FA, TAB, CCI_L|CCI_T) \
+ _(ANY, lj_tab_dup, 2, FA, TAB, CCI_L|CCI_T) \
+ _(ANY, lj_tab_clear, 1, FS, NIL, 0) \
+ _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L|CCI_T) \
+ _(ANY, lj_tab_keyindex, 2, FL, INT, 0) \
+ _(ANY, lj_vm_next, 2, FL, PTR, 0) \
_(ANY, lj_tab_len, 1, FL, INT, 0) \
+ _(ANY, lj_tab_len_hint, 2, FL, INT, 0) \
_(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \
_(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \
- _(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \
- _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64) \
+ _(ANY, lj_mem_newgco, 2, FA, PGC, CCI_L|CCI_T) \
+ _(ANY, lj_prng_u64d, 1, FS, NUM, CCI_CASTU64) \
_(ANY, lj_vm_modi, 2, FN, INT, 0) \
- _(ANY, sinh, ARG1_FP, N, NUM, 0) \
- _(ANY, cosh, ARG1_FP, N, NUM, 0) \
- _(ANY, tanh, ARG1_FP, N, NUM, 0) \
- _(ANY, fputc, 2, S, INT, 0) \
- _(ANY, fwrite, 4, S, INT, 0) \
- _(ANY, fflush, 1, S, INT, 0) \
+ _(ANY, log10, 1, N, NUM, XA_FP) \
+ _(ANY, exp, 1, N, NUM, XA_FP) \
+ _(ANY, sin, 1, N, NUM, XA_FP) \
+ _(ANY, cos, 1, N, NUM, XA_FP) \
+ _(ANY, tan, 1, N, NUM, XA_FP) \
+ _(ANY, asin, 1, N, NUM, XA_FP) \
+ _(ANY, acos, 1, N, NUM, XA_FP) \
+ _(ANY, atan, 1, N, NUM, XA_FP) \
+ _(ANY, sinh, 1, N, NUM, XA_FP) \
+ _(ANY, cosh, 1, N, NUM, XA_FP) \
+ _(ANY, tanh, 1, N, NUM, XA_FP) \
+ _(ANY, fputc, 2, S, INT, 0) \
+ _(ANY, fwrite, 4, S, INT, 0) \
+ _(ANY, fflush, 1, S, INT, 0) \
/* ORDER FPM */ \
- _(FPMATH, lj_vm_floor, ARG1_FP, N, NUM, 0) \
- _(FPMATH, lj_vm_ceil, ARG1_FP, N, NUM, 0) \
- _(FPMATH, lj_vm_trunc, ARG1_FP, N, NUM, 0) \
- _(FPMATH, sqrt, ARG1_FP, N, NUM, 0) \
- _(FPMATH, exp, ARG1_FP, N, NUM, 0) \
- _(FPMATH, lj_vm_exp2, ARG1_FP, N, NUM, 0) \
- _(FPMATH, log, ARG1_FP, N, NUM, 0) \
- _(FPMATH, lj_vm_log2, ARG1_FP, N, NUM, 0) \
- _(FPMATH, log10, ARG1_FP, N, NUM, 0) \
- _(FPMATH, sin, ARG1_FP, N, NUM, 0) \
- _(FPMATH, cos, ARG1_FP, N, NUM, 0) \
- _(FPMATH, tan, ARG1_FP, N, NUM, 0) \
- _(FPMATH, lj_vm_powi, ARG1_FP+1, N, NUM, 0) \
- _(FPMATH, pow, ARG1_FP*2, N, NUM, 0) \
- _(FPMATH, atan2, ARG1_FP*2, N, NUM, 0) \
- _(FPMATH, ldexp, ARG1_FP+1, N, NUM, 0) \
- _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \
- _(SOFTFP, softfp_add, 4, N, NUM, 0) \
- _(SOFTFP, softfp_sub, 4, N, NUM, 0) \
- _(SOFTFP, softfp_mul, 4, N, NUM, 0) \
- _(SOFTFP, softfp_div, 4, N, NUM, 0) \
- _(SOFTFP, softfp_cmp, 4, N, NIL, 0) \
+ _(FPMATH, lj_vm_floor, 1, N, NUM, XA_FP) \
+ _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \
+ _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \
+ _(FPMATH, sqrt, 1, N, NUM, XA_FP) \
+ _(ANY, log, 1, N, NUM, XA_FP) \
+ _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \
+ _(ANY, pow, 2, N, NUM, XA2_FP) \
+ _(ANY, atan2, 2, N, NUM, XA2_FP) \
+ _(ANY, ldexp, 2, N, NUM, XA_FP) \
+ _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \
+ _(SOFTFP, softfp_add, 2, N, NUM, XA2_FP32) \
+ _(SOFTFP, softfp_sub, 2, N, NUM, XA2_FP32) \
+ _(SOFTFP, softfp_mul, 2, N, NUM, XA2_FP32) \
+ _(SOFTFP, softfp_div, 2, N, NUM, XA2_FP32) \
+ _(SOFTFP, softfp_cmp, 2, N, NIL, XA2_FP32) \
_(SOFTFP, softfp_i2d, 1, N, NUM, 0) \
- _(SOFTFP, softfp_d2i, 2, N, INT, 0) \
+ _(SOFTFP, softfp_d2i, 1, N, INT, XA_FP32) \
+ _(SOFTFP_MIPS, lj_vm_sfmin, 2, N, NUM, XA2_FP32) \
+ _(SOFTFP_MIPS, lj_vm_sfmax, 2, N, NUM, XA2_FP32) \
+ _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \
_(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \
_(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \
- _(SOFTFP_FFI, softfp_d2ui, 2, N, INT, 0) \
- _(SOFTFP_FFI, softfp_d2f, 2, N, FLOAT, 0) \
+ _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \
+ _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \
_(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \
_(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \
_(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \
_(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \
- _(FP64_FFI, fp64_l2d, 2, N, NUM, 0) \
- _(FP64_FFI, fp64_ul2d, 2, N, NUM, 0) \
- _(FP64_FFI, fp64_l2f, 2, N, FLOAT, 0) \
- _(FP64_FFI, fp64_ul2f, 2, N, FLOAT, 0) \
- _(FP64_FFI, fp64_d2l, ARG1_FP, N, I64, 0) \
- _(FP64_FFI, fp64_d2ul, ARG1_FP, N, U64, 0) \
+ _(FP64_FFI, fp64_l2d, 1, N, NUM, XA_64) \
+ _(FP64_FFI, fp64_ul2d, 1, N, NUM, XA_64) \
+ _(FP64_FFI, fp64_l2f, 1, N, FLOAT, XA_64) \
+ _(FP64_FFI, fp64_ul2f, 1, N, FLOAT, XA_64) \
+ _(FP64_FFI, fp64_d2l, 1, N, I64, XA_FP) \
+ _(FP64_FFI, fp64_d2ul, 1, N, U64, XA_FP) \
_(FP64_FFI, fp64_f2l, 1, N, I64, 0) \
_(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \
- _(FFI, lj_carith_divi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \
- _(FFI, lj_carith_divu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \
- _(FFI, lj_carith_modi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \
- _(FFI, lj_carith_modu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \
- _(FFI, lj_carith_powi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \
- _(FFI, lj_carith_powu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \
- _(FFI, lj_cdata_setfin, 2, FN, P32, CCI_L) \
- _(FFI, strlen, 1, L, INTP, 0) \
- _(FFI, memcpy, 3, S, PTR, 0) \
- _(FFI, memset, 3, S, PTR, 0) \
- _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \
- _(FFI32, lj_carith_mul64, ARG2_64, N, I64, CCI_NOFPRCLOBBER)
+ _(FFI, lj_carith_divi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
+ _(FFI, lj_carith_divu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
+ _(FFI, lj_carith_modi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
+ _(FFI, lj_carith_modu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
+ _(FFI, lj_carith_powi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
+ _(FFI, lj_carith_powu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \
+ _(FFI, lj_cdata_newv, 4, S, CDATA, CCI_L) \
+ _(FFI, lj_cdata_setfin, 4, S, NIL, CCI_L) \
+ _(FFI, strlen, 1, L, INTP, 0) \
+ _(FFI, memcpy, 3, S, PTR, 0) \
+ _(FFI, memset, 3, S, PTR, 0) \
+ _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \
+ _(FFI32, lj_carith_mul64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \
+ _(FFI32, lj_carith_shl64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
+ _(FFI32, lj_carith_shr64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
+ _(FFI32, lj_carith_sar64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
+ _(FFI32, lj_carith_rol64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
+ _(FFI32, lj_carith_ror64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \
\
/* End of list. */
@@ -220,6 +312,22 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
#define fp64_f2l __aeabi_f2lz
#define fp64_f2ul __aeabi_f2ulz
#endif
+#elif LJ_TARGET_MIPS || LJ_TARGET_PPC
+#define softfp_add __adddf3
+#define softfp_sub __subdf3
+#define softfp_mul __muldf3
+#define softfp_div __divdf3
+#define softfp_cmp __ledf2
+#define softfp_i2d __floatsidf
+#define softfp_d2i __fixdfsi
+#define softfp_ui2d __floatunsidf
+#define softfp_f2d __extendsfdf2
+#define softfp_d2ui __fixunsdfsi
+#define softfp_d2f __truncdfsf2
+#define softfp_i2f __floatsisf
+#define softfp_ui2f __floatunsisf
+#define softfp_f2i __fixsfsi
+#define softfp_f2ui __fixunssfsi
#else
#error "Missing soft-float definitions for target architecture"
#endif
@@ -240,10 +348,14 @@ extern float softfp_ui2f(uint32_t a);
extern int32_t softfp_f2i(float a);
extern uint32_t softfp_f2ui(float a);
#endif
+#if LJ_TARGET_MIPS
+extern double lj_vm_sfmin(double a, double b);
+extern double lj_vm_sfmax(double a, double b);
+#endif
#endif
#if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP)
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
#define fp64_l2d __floatdidf
#define fp64_ul2d __floatundidf
#define fp64_l2f __floatdisf
diff --git a/src/lj_iropt.h b/src/lj_iropt.h
index e89d796f..d239f173 100644
--- a/src/lj_iropt.h
+++ b/src/lj_iropt.h
@@ -36,11 +36,11 @@ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J)
return ref;
}
+LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs);
+
/* Interning of constants. */
LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k);
-LJ_FUNC void lj_ir_k64_freeall(jit_State *J);
-LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv);
-LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64);
+LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64);
LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64);
LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n);
LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64);
@@ -48,6 +48,7 @@ LJ_FUNC TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t);
LJ_FUNC TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr);
LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t);
LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot);
+LJ_FUNC TRef lj_ir_ktrace(jit_State *J);
#if LJ_64
#define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k))
@@ -74,8 +75,8 @@ static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n)
#define lj_ir_knum_tobit(J) lj_ir_knum_u64(J, U64x(43380000,00000000))
/* Special 128 bit SIMD constants. */
-#define lj_ir_knum_abs(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_ABS))
-#define lj_ir_knum_neg(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_NEG))
+#define lj_ir_ksimd(J, idx) \
+ lj_ir_ggfload(J, IRT_NUM, (uintptr_t)LJ_KSIMD(J, idx) - (uintptr_t)J2GG(J))
/* Access to constants. */
LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir);
@@ -119,10 +120,11 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J);
-LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J);
+LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J);
LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J);
LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J);
LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim);
+LJ_FUNC int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim);
LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref);
/* Dead-store elimination. */
@@ -143,13 +145,12 @@ LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
TValue *vb, TValue *vc, IROp op);
LJ_FUNC TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc);
LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc);
-LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc);
LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase);
/* Optimization passes. */
LJ_FUNC void lj_opt_dce(jit_State *J);
LJ_FUNC int lj_opt_loop(jit_State *J);
-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
LJ_FUNC void lj_opt_split(jit_State *J);
#else
#define lj_opt_split(J) UNUSED(J)
diff --git a/src/lj_jit.h b/src/lj_jit.h
index 02850935..32b3861a 100644
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -7,73 +7,88 @@
#define _LJ_JIT_H
#include "lj_obj.h"
+#if LJ_HASJIT
#include "lj_ir.h"
-/* JIT engine flags. */
+/* -- JIT engine flags ---------------------------------------------------- */
+
+/* General JIT engine flags. 4 bits. */
#define JIT_F_ON 0x00000001
-/* CPU-specific JIT engine flags. */
+/* CPU-specific JIT engine flags. 12 bits. Flags and strings must match. */
+#define JIT_F_CPU 0x00000010
+
#if LJ_TARGET_X86ORX64
-#define JIT_F_CMOV 0x00000010
-#define JIT_F_SSE2 0x00000020
-#define JIT_F_SSE3 0x00000040
-#define JIT_F_SSE4_1 0x00000080
-#define JIT_F_P4 0x00000100
-#define JIT_F_PREFER_IMUL 0x00000200
-#define JIT_F_SPLIT_XMM 0x00000400
-#define JIT_F_LEA_AGU 0x00000800
-
-/* Names for the CPU-specific flags. Must match the order above. */
-#define JIT_F_CPU_FIRST JIT_F_CMOV
-#define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM"
+
+#define JIT_F_SSE3 (JIT_F_CPU << 0)
+#define JIT_F_SSE4_1 (JIT_F_CPU << 1)
+#define JIT_F_BMI2 (JIT_F_CPU << 2)
+
+
+#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2"
+
#elif LJ_TARGET_ARM
-#define JIT_F_ARMV6_ 0x00000010
-#define JIT_F_ARMV6T2_ 0x00000020
-#define JIT_F_ARMV7 0x00000040
-#define JIT_F_VFPV2 0x00000080
-#define JIT_F_VFPV3 0x00000100
-
-#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7)
-#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7)
+
+#define JIT_F_ARMV6_ (JIT_F_CPU << 0)
+#define JIT_F_ARMV6T2_ (JIT_F_CPU << 1)
+#define JIT_F_ARMV7 (JIT_F_CPU << 2)
+#define JIT_F_ARMV8 (JIT_F_CPU << 3)
+#define JIT_F_VFPV2 (JIT_F_CPU << 4)
+#define JIT_F_VFPV3 (JIT_F_CPU << 5)
+
+#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8)
+#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8)
#define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3)
-/* Names for the CPU-specific flags. Must match the order above. */
-#define JIT_F_CPU_FIRST JIT_F_ARMV6_
-#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3"
+#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5ARMv8\5VFPv2\5VFPv3"
+
#elif LJ_TARGET_PPC
-#define JIT_F_SQRT 0x00000010
-#define JIT_F_ROUND 0x00000020
-/* Names for the CPU-specific flags. Must match the order above. */
-#define JIT_F_CPU_FIRST JIT_F_SQRT
+#define JIT_F_SQRT (JIT_F_CPU << 0)
+#define JIT_F_ROUND (JIT_F_CPU << 1)
+
#define JIT_F_CPUSTRING "\4SQRT\5ROUND"
+
#elif LJ_TARGET_MIPS
-#define JIT_F_MIPS32R2 0x00000010
-/* Names for the CPU-specific flags. Must match the order above. */
-#define JIT_F_CPU_FIRST JIT_F_MIPS32R2
+#define JIT_F_MIPSXXR2 (JIT_F_CPU << 0)
+
+#if LJ_TARGET_MIPS32
+#if LJ_TARGET_MIPSR6
+#define JIT_F_CPUSTRING "\010MIPS32R6"
+#else
#define JIT_F_CPUSTRING "\010MIPS32R2"
+#endif
+#else
+#if LJ_TARGET_MIPSR6
+#define JIT_F_CPUSTRING "\010MIPS64R6"
#else
-#define JIT_F_CPU_FIRST 0
+#define JIT_F_CPUSTRING "\010MIPS64R2"
+#endif
+#endif
+
+#else
+
#define JIT_F_CPUSTRING ""
+
#endif
-/* Optimization flags. */
+/* Optimization flags. 12 bits. */
+#define JIT_F_OPT 0x00010000
#define JIT_F_OPT_MASK 0x0fff0000
-#define JIT_F_OPT_FOLD 0x00010000
-#define JIT_F_OPT_CSE 0x00020000
-#define JIT_F_OPT_DCE 0x00040000
-#define JIT_F_OPT_FWD 0x00080000
-#define JIT_F_OPT_DSE 0x00100000
-#define JIT_F_OPT_NARROW 0x00200000
-#define JIT_F_OPT_LOOP 0x00400000
-#define JIT_F_OPT_ABC 0x00800000
-#define JIT_F_OPT_SINK 0x01000000
-#define JIT_F_OPT_FUSE 0x02000000
+#define JIT_F_OPT_FOLD (JIT_F_OPT << 0)
+#define JIT_F_OPT_CSE (JIT_F_OPT << 1)
+#define JIT_F_OPT_DCE (JIT_F_OPT << 2)
+#define JIT_F_OPT_FWD (JIT_F_OPT << 3)
+#define JIT_F_OPT_DSE (JIT_F_OPT << 4)
+#define JIT_F_OPT_NARROW (JIT_F_OPT << 5)
+#define JIT_F_OPT_LOOP (JIT_F_OPT << 6)
+#define JIT_F_OPT_ABC (JIT_F_OPT << 7)
+#define JIT_F_OPT_SINK (JIT_F_OPT << 8)
+#define JIT_F_OPT_FUSE (JIT_F_OPT << 9)
/* Optimizations names for -O. Must match the order above. */
-#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD
#define JIT_F_OPTSTRING \
"\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse"
@@ -85,6 +100,8 @@
JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE)
#define JIT_F_OPT_DEFAULT JIT_F_OPT_3
+/* -- JIT engine parameters ----------------------------------------------- */
+
#if LJ_TARGET_WINDOWS || LJ_64
/* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */
#define JIT_P_sizemcode_DEFAULT 64
@@ -100,6 +117,7 @@
_(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \
_(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \
_(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \
+ _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \
\
_(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \
_(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \
@@ -126,11 +144,14 @@ JIT_PARAMDEF(JIT_PARAMENUM)
#define JIT_PARAMSTR(len, name, value) #len #name
#define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR)
+/* -- JIT engine data structures ------------------------------------------ */
+
/* Trace compiler state. */
typedef enum {
LJ_TRACE_IDLE, /* Trace compiler idle. */
LJ_TRACE_ACTIVE = 0x10,
LJ_TRACE_RECORD, /* Bytecode recording active. */
+ LJ_TRACE_RECORD_1ST, /* Record 1st instruction, too. */
LJ_TRACE_START, /* New trace started. */
LJ_TRACE_END, /* End of trace. */
LJ_TRACE_ASM, /* Assemble trace. */
@@ -165,6 +186,7 @@ typedef struct MCLink {
typedef struct SnapShot {
uint32_t mapofs; /* Offset into snapshot map. */
IRRef1 ref; /* First IR ref for this snapshot. */
+ uint16_t mcofs; /* Offset into machine code in MCode units. */
uint8_t nslots; /* Number of valid slots. */
uint8_t topslot; /* Maximum frame extent. */
uint8_t nent; /* Number of compressed entries. */
@@ -180,20 +202,35 @@ typedef uint32_t SnapEntry;
#define SNAP_CONT 0x020000 /* Continuation slot. */
#define SNAP_NORESTORE 0x040000 /* No need to restore slot. */
#define SNAP_SOFTFPNUM 0x080000 /* Soft-float number. */
+#define SNAP_KEYINDEX 0x100000 /* Traversal key index. */
LJ_STATIC_ASSERT(SNAP_FRAME == TREF_FRAME);
LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT);
+LJ_STATIC_ASSERT(SNAP_KEYINDEX == TREF_KEYINDEX);
#define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref))
#define SNAP_TR(slot, tr) \
- (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK)))
+ (((SnapEntry)(slot) << 24) + \
+ ((tr) & (TREF_KEYINDEX|TREF_CONT|TREF_FRAME|TREF_REFMASK)))
+#if !LJ_FR2
#define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc))
+#endif
#define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz))
#define snap_ref(sn) ((sn) & 0xffff)
#define snap_slot(sn) ((BCReg)((sn) >> 24))
#define snap_isframe(sn) ((sn) & SNAP_FRAME)
-#define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn))
#define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref))
+static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn)
+{
+#if LJ_FR2
+ uint64_t pcbase;
+ memcpy(&pcbase, sn, sizeof(uint64_t));
+ return (const BCIns *)(pcbase >> 8);
+#else
+ return (const BCIns *)(uintptr_t)*sn;
+#endif
+}
+
/* Snapshot and exit numbers. */
typedef uint32_t SnapNo;
typedef uint32_t ExitNo;
@@ -211,7 +248,8 @@ typedef enum {
LJ_TRLINK_UPREC, /* Up-recursion. */
LJ_TRLINK_DOWNREC, /* Down-recursion. */
LJ_TRLINK_INTERP, /* Fallback to interpreter. */
- LJ_TRLINK_RETURN /* Return to interpreter. */
+ LJ_TRLINK_RETURN, /* Return to interpreter. */
+ LJ_TRLINK_STITCH /* Trace stitching. */
} TraceLink;
/* Trace object. */
@@ -219,6 +257,9 @@ typedef struct GCtrace {
GCHeader;
uint16_t nsnap; /* Number of snapshots. */
IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */
+#if LJ_GC64
+ uint32_t unused_gc64;
+#endif
GCRef gclist;
IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */
IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */
@@ -294,6 +335,16 @@ typedef struct ScEvEntry {
uint8_t dir; /* Direction. 1: +, 0: -. */
} ScEvEntry;
+/* Reverse bytecode map (IRRef -> PC). Only for selected instructions. */
+typedef struct RBCHashEntry {
+ MRef pc; /* Bytecode PC. */
+ GCRef pt; /* Prototype. */
+ IRRef ref; /* IR reference. */
+} RBCHashEntry;
+
+/* Number of slots in the reverse bytecode hash table. Must be a power of 2. */
+#define RBCHASH_SLOTS 8
+
/* 128 bit SIMD constants. */
enum {
LJ_KSIMD_ABS,
@@ -301,12 +352,53 @@ enum {
LJ_KSIMD__MAX
};
+enum {
+#if LJ_TARGET_X86ORX64
+ LJ_K64_TOBIT, /* 2^52 + 2^51 */
+ LJ_K64_2P64, /* 2^64 */
+ LJ_K64_M2P64, /* -2^64 */
+#if LJ_32
+ LJ_K64_M2P64_31, /* -2^64 or -2^31 */
+#else
+ LJ_K64_M2P64_31 = LJ_K64_M2P64,
+#endif
+#endif
+#if LJ_TARGET_MIPS
+ LJ_K64_2P31, /* 2^31 */
+#if LJ_64
+ LJ_K64_2P63, /* 2^63 */
+ LJ_K64_M2P64, /* -2^64 */
+#endif
+#endif
+ LJ_K64__MAX,
+};
+#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS)
+
+enum {
+#if LJ_TARGET_X86ORX64
+ LJ_K32_M2P64_31, /* -2^64 or -2^31 */
+#endif
+#if LJ_TARGET_PPC
+ LJ_K32_2P52_2P31, /* 2^52 + 2^31 */
+ LJ_K32_2P52, /* 2^52 */
+#endif
+#if LJ_TARGET_PPC || LJ_TARGET_MIPS
+ LJ_K32_2P31, /* 2^31 */
+#endif
+#if LJ_TARGET_MIPS64
+ LJ_K32_2P63, /* 2^63 */
+ LJ_K32_M2P64, /* -2^64 */
+#endif
+ LJ_K32__MAX
+};
+#define LJ_K32__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_PPC || LJ_TARGET_MIPS)
+
/* Get 16 byte aligned pointer to SIMD constant. */
#define LJ_KSIMD(J, n) \
((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))
/* Set/reset flag to activate the SPLIT pass for the current trace. */
-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
#define lj_needsplit(J) (J->needsplit = 1)
#define lj_resetsplit(J) (J->needsplit = 0)
#else
@@ -317,13 +409,14 @@ enum {
/* Fold state is used to fold instructions on-the-fly. */
typedef struct FoldState {
IRIns ins; /* Currently emitted instruction. */
- IRIns left; /* Instruction referenced by left operand. */
- IRIns right; /* Instruction referenced by right operand. */
+ IRIns left[2]; /* Instruction referenced by left operand. */
+ IRIns right[2]; /* Instruction referenced by right operand. */
} FoldState;
/* JIT compiler state. */
typedef struct jit_State {
GCtrace cur; /* Current trace. */
+ GCtrace *curfinal; /* Final address of current trace (set during asm). */
lua_State *L; /* Current Lua state. */
const BCIns *pc; /* Current PC. */
@@ -353,8 +446,13 @@ typedef struct jit_State {
int32_t framedepth; /* Current frame depth. */
int32_t retdepth; /* Return frame depth (count of RETF). */
- MRef k64; /* Pointer to chained array of 64 bit constants. */
+#if LJ_K32__USED
+ uint32_t k32[LJ_K32__MAX]; /* Common 4 byte constants used by backends. */
+#endif
TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */
+#if LJ_K64__USED
+ TValue k64[LJ_K64__MAX]; /* Common 8 byte constants. */
+#endif
IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */
IRRef irtoplim; /* Upper limit of instuction buffer (biased). */
@@ -367,13 +465,15 @@ typedef struct jit_State {
MSize sizesnapmap; /* Size of temp. snapshot map buffer. */
PostProc postproc; /* Required post-processing after execution. */
-#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
- int needsplit; /* Need SPLIT pass. */
+#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)
+ uint8_t needsplit; /* Need SPLIT pass. */
#endif
+ uint8_t retryrec; /* Retry recording. */
GCRef *trace; /* Array of traces. */
TraceNo freetrace; /* Start of scan for next free trace. */
MSize sizetrace; /* Size of trace array. */
+ IRRef1 ktrace; /* Reference to KGC with GCtrace. */
IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */
TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */
@@ -384,7 +484,10 @@ typedef struct jit_State {
HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */
uint32_t penaltyslot; /* Round-robin index into penalty slots. */
- uint32_t prngstate; /* PRNG state. */
+
+#ifdef LUAJIT_ENABLE_TABLE_BUMP
+ RBCHashEntry rbchash[RBCHASH_SLOTS]; /* Reverse bytecode map. */
+#endif
BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */
uint32_t bpropslot; /* Round-robin index into bpropcache slots. */
@@ -394,6 +497,7 @@ typedef struct jit_State {
const BCIns *startpc; /* Bytecode PC of starting instruction. */
TraceNo parent; /* Parent of current side trace (0 for root traces). */
ExitNo exitno; /* Exit number in parent of current side trace. */
+ int exitcode; /* Exit code from unwound trace. */
BCIns *patchpc; /* PC for pending re-patch. */
BCIns patchins; /* Instruction for pending re-patch. */
@@ -406,14 +510,19 @@ typedef struct jit_State {
size_t szallmcarea; /* Total size of all allocated mcode areas. */
TValue errinfo; /* Additional info element for trace errors. */
+
+#if LJ_HASPROFILE
+ GCproto *prev_pt; /* Previous prototype. */
+ BCLine prev_line; /* Previous line. */
+ int prof_mode; /* Profiling mode: 0, 'f', 'l'. */
+#endif
} jit_State;
-/* Trivial PRNG e.g. used for penalty randomization. */
-static LJ_AINLINE uint32_t LJ_PRNG_BITS(jit_State *J, int bits)
-{
- /* Yes, this LCG is very weak, but that doesn't matter for our use case. */
- J->prngstate = J->prngstate * 1103515245 + 12345;
- return J->prngstate >> (32-bits);
-}
+#ifdef LUA_USE_ASSERT
+#define lj_assertJ(c, ...) lj_assertG_(J2G(J), (c), __VA_ARGS__)
+#else
+#define lj_assertJ(c, ...) ((void)J)
+#endif
+#endif
#endif
diff --git a/src/lj_lex.c b/src/lj_lex.c
index a74b4d6d..463a87ce 100644
--- a/src/lj_lex.c
+++ b/src/lj_lex.c
@@ -12,6 +12,7 @@
#include "lj_obj.h"
#include "lj_gc.h"
#include "lj_err.h"
+#include "lj_buf.h"
#include "lj_str.h"
#if LJ_HASFFI
#include "lj_tab.h"
@@ -24,6 +25,7 @@
#include "lj_parse.h"
#include "lj_char.h"
#include "lj_strscan.h"
+#include "lj_strfmt.h"
/* Lua lexer token names. */
static const char *const tokennames[] = {
@@ -37,54 +39,54 @@ TKDEF(TKSTR1, TKSTR2)
/* -- Buffer handling ----------------------------------------------------- */
-#define char2int(c) ((int)(uint8_t)(c))
-#define next(ls) \
- (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls))
-#define save_and_next(ls) (save(ls, ls->current), next(ls))
-#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r')
-#define END_OF_STREAM (-1)
+#define LEX_EOF (-1)
+#define lex_iseol(ls) (ls->c == '\n' || ls->c == '\r')
-static int fillbuf(LexState *ls)
+/* Get more input from reader. */
+static LJ_NOINLINE LexChar lex_more(LexState *ls)
{
size_t sz;
- const char *buf = ls->rfunc(ls->L, ls->rdata, &sz);
- if (buf == NULL || sz == 0) return END_OF_STREAM;
- if (sz >= LJ_MAX_MEM) {
+ const char *p = ls->rfunc(ls->L, ls->rdata, &sz);
+ if (p == NULL || sz == 0) return LEX_EOF;
+ if (sz >= LJ_MAX_BUF) {
if (sz != ~(size_t)0) lj_err_mem(ls->L);
+ sz = ~(uintptr_t)0 - (uintptr_t)p;
+ if (sz >= LJ_MAX_BUF) sz = LJ_MAX_BUF-1;
ls->endmark = 1;
}
- ls->n = (MSize)sz - 1;
- ls->p = buf;
- return char2int(*(ls->p++));
+ ls->pe = p + sz;
+ ls->p = p + 1;
+ return (LexChar)(uint8_t)p[0];
}
-static LJ_NOINLINE void save_grow(LexState *ls, int c)
+/* Get next character. */
+static LJ_AINLINE LexChar lex_next(LexState *ls)
{
- MSize newsize;
- if (ls->sb.sz >= LJ_MAX_STR/2)
- lj_lex_error(ls, 0, LJ_ERR_XELEM);
- newsize = ls->sb.sz * 2;
- lj_str_resizebuf(ls->L, &ls->sb, newsize);
- ls->sb.buf[ls->sb.n++] = (char)c;
+ return (ls->c = ls->p < ls->pe ? (LexChar)(uint8_t)*ls->p++ : lex_more(ls));
}
-static LJ_AINLINE void save(LexState *ls, int c)
+/* Save character. */
+static LJ_AINLINE void lex_save(LexState *ls, LexChar c)
{
- if (LJ_UNLIKELY(ls->sb.n + 1 > ls->sb.sz))
- save_grow(ls, c);
- else
- ls->sb.buf[ls->sb.n++] = (char)c;
+ lj_buf_putb(&ls->sb, c);
+}
+
+/* Save previous character and get next character. */
+static LJ_AINLINE LexChar lex_savenext(LexState *ls)
+{
+ lex_save(ls, ls->c);
+ return lex_next(ls);
}
-static void inclinenumber(LexState *ls)
+/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */
+static void lex_newline(LexState *ls)
{
- int old = ls->current;
- lua_assert(currIsNewline(ls));
- next(ls); /* skip `\n' or `\r' */
- if (currIsNewline(ls) && ls->current != old)
- next(ls); /* skip `\n\r' or `\r\n' */
+ LexChar old = ls->c;
+ lj_assertLS(lex_iseol(ls), "bad usage");
+ lex_next(ls); /* Skip "\n" or "\r". */
+ if (lex_iseol(ls) && ls->c != old) lex_next(ls); /* Skip "\n\r" or "\r\n". */
if (++ls->linenumber >= LJ_MAX_LINE)
- lj_lex_error(ls, ls->token, LJ_ERR_XLINES);
+ lj_lex_error(ls, ls->tok, LJ_ERR_XLINES);
}
/* -- Scanner for terminals ----------------------------------------------- */
@@ -93,19 +95,17 @@ static void inclinenumber(LexState *ls)
static void lex_number(LexState *ls, TValue *tv)
{
StrScanFmt fmt;
- int c, xp = 'e';
- lua_assert(lj_char_isdigit(ls->current));
- if ((c = ls->current) == '0') {
- save_and_next(ls);
- if ((ls->current | 0x20) == 'x') xp = 'p';
- }
- while (lj_char_isident(ls->current) || ls->current == '.' ||
- ((ls->current == '-' || ls->current == '+') && (c | 0x20) == xp)) {
- c = ls->current;
- save_and_next(ls);
+ LexChar c, xp = 'e';
+ lj_assertLS(lj_char_isdigit(ls->c), "bad usage");
+ if ((c = ls->c) == '0' && (lex_savenext(ls) | 0x20) == 'x')
+ xp = 'p';
+ while (lj_char_isident(ls->c) || ls->c == '.' ||
+ ((ls->c == '-' || ls->c == '+') && (c | 0x20) == xp)) {
+ c = ls->c;
+ lex_savenext(ls);
}
- save(ls, '\0');
- fmt = lj_strscan_scan((const uint8_t *)ls->sb.buf, tv,
+ lex_save(ls, '\0');
+ fmt = lj_strscan_scan((const uint8_t *)ls->sb.b, sbuflen(&ls->sb)-1, tv,
(LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) |
(LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0));
if (LJ_DUALNUM && fmt == STRSCAN_INT) {
@@ -116,12 +116,9 @@ static void lex_number(LexState *ls, TValue *tv)
} else if (fmt != STRSCAN_ERROR) {
lua_State *L = ls->L;
GCcdata *cd;
- lua_assert(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG);
- if (!ctype_ctsG(G(L))) {
- ptrdiff_t oldtop = savestack(L, L->top);
- luaopen_ffi(L); /* Load FFI library on-demand. */
- L->top = restorestack(L, oldtop);
- }
+ lj_assertLS(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG,
+ "unexpected number format %d", fmt);
+ ctype_loadffi(L);
if (fmt == STRSCAN_IMAG) {
cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double));
((double *)cdataptr(cd))[0] = 0;
@@ -133,65 +130,66 @@ static void lex_number(LexState *ls, TValue *tv)
lj_parse_keepcdata(ls, tv, cd);
#endif
} else {
- lua_assert(fmt == STRSCAN_ERROR);
+ lj_assertLS(fmt == STRSCAN_ERROR,
+ "unexpected number format %d", fmt);
lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER);
}
}
-static int skip_sep(LexState *ls)
+/* Skip equal signs for "[=...=[" and "]=...=]" and return their count. */
+static int lex_skipeq(LexState *ls)
{
int count = 0;
- int s = ls->current;
- lua_assert(s == '[' || s == ']');
- save_and_next(ls);
- while (ls->current == '=' && count < 0x20000000) {
- save_and_next(ls);
+ LexChar s = ls->c;
+ lj_assertLS(s == '[' || s == ']', "bad usage");
+ while (lex_savenext(ls) == '=' && count < 0x20000000)
count++;
- }
- return (ls->current == s) ? count : (-count) - 1;
+ return (ls->c == s) ? count : (-count) - 1;
}
-static void read_long_string(LexState *ls, TValue *tv, int sep)
+/* Parse a long string or long comment (tv set to NULL). */
+static void lex_longstring(LexState *ls, TValue *tv, int sep)
{
- save_and_next(ls); /* skip 2nd `[' */
- if (currIsNewline(ls)) /* string starts with a newline? */
- inclinenumber(ls); /* skip it */
+ lex_savenext(ls); /* Skip second '['. */
+ if (lex_iseol(ls)) /* Skip initial newline. */
+ lex_newline(ls);
for (;;) {
- switch (ls->current) {
- case END_OF_STREAM:
+ switch (ls->c) {
+ case LEX_EOF:
lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM);
break;
case ']':
- if (skip_sep(ls) == sep) {
- save_and_next(ls); /* skip 2nd `]' */
+ if (lex_skipeq(ls) == sep) {
+ lex_savenext(ls); /* Skip second ']'. */
goto endloop;
}
break;
case '\n':
case '\r':
- save(ls, '\n');
- inclinenumber(ls);
- if (!tv) lj_str_resetbuf(&ls->sb); /* avoid wasting space */
+ lex_save(ls, '\n');
+ lex_newline(ls);
+ if (!tv) lj_buf_reset(&ls->sb); /* Don't waste space for comments. */
break;
default:
- if (tv) save_and_next(ls);
- else next(ls);
+ lex_savenext(ls);
break;
}
} endloop:
if (tv) {
- GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep),
- ls->sb.n - 2*(2 + (MSize)sep));
+ GCstr *str = lj_parse_keepstr(ls, ls->sb.b + (2 + (MSize)sep),
+ sbuflen(&ls->sb) - 2*(2 + (MSize)sep));
setstrV(ls->L, tv, str);
}
}
-static void read_string(LexState *ls, int delim, TValue *tv)
+/* Parse a string. */
+static void lex_string(LexState *ls, TValue *tv)
{
- save_and_next(ls);
- while (ls->current != delim) {
- switch (ls->current) {
- case END_OF_STREAM:
+ LexChar delim = ls->c; /* Delimiter is '\'' or '"'. */
+ lex_savenext(ls);
+ while (ls->c != delim) {
+ switch (ls->c) {
+ case LEX_EOF:
lj_lex_error(ls, TK_eof, LJ_ERR_XSTR);
continue;
case '\n':
@@ -199,7 +197,7 @@ static void read_string(LexState *ls, int delim, TValue *tv)
lj_lex_error(ls, TK_string, LJ_ERR_XSTR);
continue;
case '\\': {
- int c = next(ls); /* Skip the '\\'. */
+ LexChar c = lex_next(ls); /* Skip the '\\'. */
switch (c) {
case 'a': c = '\a'; break;
case 'b': c = '\b'; break;
@@ -209,111 +207,139 @@ static void read_string(LexState *ls, int delim, TValue *tv)
case 't': c = '\t'; break;
case 'v': c = '\v'; break;
case 'x': /* Hexadecimal escape '\xXX'. */
- c = (next(ls) & 15u) << 4;
- if (!lj_char_isdigit(ls->current)) {
- if (!lj_char_isxdigit(ls->current)) goto err_xesc;
+ c = (lex_next(ls) & 15u) << 4;
+ if (!lj_char_isdigit(ls->c)) {
+ if (!lj_char_isxdigit(ls->c)) goto err_xesc;
c += 9 << 4;
}
- c += (next(ls) & 15u);
- if (!lj_char_isdigit(ls->current)) {
- if (!lj_char_isxdigit(ls->current)) goto err_xesc;
+ c += (lex_next(ls) & 15u);
+ if (!lj_char_isdigit(ls->c)) {
+ if (!lj_char_isxdigit(ls->c)) goto err_xesc;
c += 9;
}
break;
+ case 'u': /* Unicode escape '\u{XX...}'. */
+ if (lex_next(ls) != '{') goto err_xesc;
+ lex_next(ls);
+ c = 0;
+ do {
+ c = (c << 4) | (ls->c & 15u);
+ if (!lj_char_isdigit(ls->c)) {
+ if (!lj_char_isxdigit(ls->c)) goto err_xesc;
+ c += 9;
+ }
+ if (c >= 0x110000) goto err_xesc; /* Out of Unicode range. */
+ } while (lex_next(ls) != '}');
+ if (c < 0x800) {
+ if (c < 0x80) break;
+ lex_save(ls, 0xc0 | (c >> 6));
+ } else {
+ if (c >= 0x10000) {
+ lex_save(ls, 0xf0 | (c >> 18));
+ lex_save(ls, 0x80 | ((c >> 12) & 0x3f));
+ } else {
+ if (c >= 0xd800 && c < 0xe000) goto err_xesc; /* No surrogates. */
+ lex_save(ls, 0xe0 | (c >> 12));
+ }
+ lex_save(ls, 0x80 | ((c >> 6) & 0x3f));
+ }
+ c = 0x80 | (c & 0x3f);
+ break;
case 'z': /* Skip whitespace. */
- next(ls);
- while (lj_char_isspace(ls->current))
- if (currIsNewline(ls)) inclinenumber(ls); else next(ls);
+ lex_next(ls);
+ while (lj_char_isspace(ls->c))
+ if (lex_iseol(ls)) lex_newline(ls); else lex_next(ls);
continue;
- case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue;
+ case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); continue;
case '\\': case '\"': case '\'': break;
- case END_OF_STREAM: continue;
+ case LEX_EOF: continue;
default:
if (!lj_char_isdigit(c))
goto err_xesc;
c -= '0'; /* Decimal escape '\ddd'. */
- if (lj_char_isdigit(next(ls))) {
- c = c*10 + (ls->current - '0');
- if (lj_char_isdigit(next(ls))) {
- c = c*10 + (ls->current - '0');
+ if (lj_char_isdigit(lex_next(ls))) {
+ c = c*10 + (ls->c - '0');
+ if (lj_char_isdigit(lex_next(ls))) {
+ c = c*10 + (ls->c - '0');
if (c > 255) {
err_xesc:
lj_lex_error(ls, TK_string, LJ_ERR_XESC);
}
- next(ls);
+ lex_next(ls);
}
}
- save(ls, c);
+ lex_save(ls, c);
continue;
}
- save(ls, c);
- next(ls);
+ lex_save(ls, c);
+ lex_next(ls);
continue;
}
default:
- save_and_next(ls);
+ lex_savenext(ls);
break;
}
}
- save_and_next(ls); /* skip delimiter */
- setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2));
+ lex_savenext(ls); /* Skip trailing delimiter. */
+ setstrV(ls->L, tv,
+ lj_parse_keepstr(ls, ls->sb.b+1, sbuflen(&ls->sb)-2));
}
/* -- Main lexical scanner ------------------------------------------------ */
-static int llex(LexState *ls, TValue *tv)
+/* Get next lexical token. */
+static LexToken lex_scan(LexState *ls, TValue *tv)
{
- lj_str_resetbuf(&ls->sb);
+ lj_buf_reset(&ls->sb);
for (;;) {
- if (lj_char_isident(ls->current)) {
+ if (lj_char_isident(ls->c)) {
GCstr *s;
- if (lj_char_isdigit(ls->current)) { /* Numeric literal. */
+ if (lj_char_isdigit(ls->c)) { /* Numeric literal. */
lex_number(ls, tv);
return TK_number;
}
/* Identifier or reserved word. */
do {
- save_and_next(ls);
- } while (lj_char_isident(ls->current));
- s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n);
+ lex_savenext(ls);
+ } while (lj_char_isident(ls->c));
+ s = lj_parse_keepstr(ls, ls->sb.b, sbuflen(&ls->sb));
setstrV(ls->L, tv, s);
if (s->reserved > 0) /* Reserved word? */
return TK_OFS + s->reserved;
return TK_name;
}
- switch (ls->current) {
+ switch (ls->c) {
case '\n':
case '\r':
- inclinenumber(ls);
+ lex_newline(ls);
continue;
case ' ':
case '\t':
case '\v':
case '\f':
- next(ls);
+ lex_next(ls);
continue;
case '-':
- next(ls);
- if (ls->current != '-') return '-';
- /* else is a comment */
- next(ls);
- if (ls->current == '[') {
- int sep = skip_sep(ls);
- lj_str_resetbuf(&ls->sb); /* `skip_sep' may dirty the buffer */
+ lex_next(ls);
+ if (ls->c != '-') return '-';
+ lex_next(ls);
+ if (ls->c == '[') { /* Long comment "--[=*[...]=*]". */
+ int sep = lex_skipeq(ls);
+ lj_buf_reset(&ls->sb); /* `lex_skipeq' may dirty the buffer */
if (sep >= 0) {
- read_long_string(ls, NULL, sep); /* long comment */
- lj_str_resetbuf(&ls->sb);
+ lex_longstring(ls, NULL, sep);
+ lj_buf_reset(&ls->sb);
continue;
}
}
- /* else short comment */
- while (!currIsNewline(ls) && ls->current != END_OF_STREAM)
- next(ls);
+ /* Short comment "--.*\n". */
+ while (!lex_iseol(ls) && ls->c != LEX_EOF)
+ lex_next(ls);
continue;
case '[': {
- int sep = skip_sep(ls);
+ int sep = lex_skipeq(ls);
if (sep >= 0) {
- read_long_string(ls, tv, sep);
+ lex_longstring(ls, tv, sep);
return TK_string;
} else if (sep == -1) {
return '[';
@@ -323,44 +349,43 @@ static int llex(LexState *ls, TValue *tv)
}
}
case '=':
- next(ls);
- if (ls->current != '=') return '='; else { next(ls); return TK_eq; }
+ lex_next(ls);
+ if (ls->c != '=') return '='; else { lex_next(ls); return TK_eq; }
case '<':
- next(ls);
- if (ls->current != '=') return '<'; else { next(ls); return TK_le; }
+ lex_next(ls);
+ if (ls->c != '=') return '<'; else { lex_next(ls); return TK_le; }
case '>':
- next(ls);
- if (ls->current != '=') return '>'; else { next(ls); return TK_ge; }
+ lex_next(ls);
+ if (ls->c != '=') return '>'; else { lex_next(ls); return TK_ge; }
case '~':
- next(ls);
- if (ls->current != '=') return '~'; else { next(ls); return TK_ne; }
+ lex_next(ls);
+ if (ls->c != '=') return '~'; else { lex_next(ls); return TK_ne; }
case ':':
- next(ls);
- if (ls->current != ':') return ':'; else { next(ls); return TK_label; }
+ lex_next(ls);
+ if (ls->c != ':') return ':'; else { lex_next(ls); return TK_label; }
case '"':
case '\'':
- read_string(ls, ls->current, tv);
+ lex_string(ls, tv);
return TK_string;
case '.':
- save_and_next(ls);
- if (ls->current == '.') {
- next(ls);
- if (ls->current == '.') {
- next(ls);
+ if (lex_savenext(ls) == '.') {
+ lex_next(ls);
+ if (ls->c == '.') {
+ lex_next(ls);
return TK_dots; /* ... */
}
return TK_concat; /* .. */
- } else if (!lj_char_isdigit(ls->current)) {
+ } else if (!lj_char_isdigit(ls->c)) {
return '.';
} else {
lex_number(ls, tv);
return TK_number;
}
- case END_OF_STREAM:
+ case LEX_EOF:
return TK_eof;
default: {
- int c = ls->current;
- next(ls);
+ LexChar c = ls->c;
+ lex_next(ls);
return c; /* Single-char tokens (+ - / ...). */
}
}
@@ -375,36 +400,33 @@ int lj_lex_setup(lua_State *L, LexState *ls)
int header = 0;
ls->L = L;
ls->fs = NULL;
- ls->n = 0;
- ls->p = NULL;
+ ls->pe = ls->p = NULL;
ls->vstack = NULL;
ls->sizevstack = 0;
ls->vtop = 0;
ls->bcstack = NULL;
ls->sizebcstack = 0;
- ls->token = 0;
+ ls->tok = 0;
ls->lookahead = TK_eof; /* No look-ahead token. */
ls->linenumber = 1;
ls->lastline = 1;
ls->endmark = 0;
- lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF);
- next(ls); /* Read-ahead first char. */
- if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb &&
- char2int(ls->p[1]) == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
- ls->n -= 2;
+ lex_next(ls); /* Read-ahead first char. */
+ if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb &&
+ (uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */
ls->p += 2;
- next(ls);
+ lex_next(ls);
header = 1;
}
- if (ls->current == '#') { /* Skip POSIX #! header line. */
+ if (ls->c == '#') { /* Skip POSIX #! header line. */
do {
- next(ls);
- if (ls->current == END_OF_STREAM) return 0;
- } while (!currIsNewline(ls));
- inclinenumber(ls);
+ lex_next(ls);
+ if (ls->c == LEX_EOF) return 0;
+ } while (!lex_iseol(ls));
+ lex_newline(ls);
header = 1;
}
- if (ls->current == LUA_SIGNATURE[0]) { /* Bytecode dump. */
+ if (ls->c == LUA_SIGNATURE[0]) { /* Bytecode dump. */
if (header) {
/*
** Loading bytecode with an extra header is disabled for security
@@ -426,55 +448,60 @@ void lj_lex_cleanup(lua_State *L, LexState *ls)
global_State *g = G(L);
lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine);
lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo);
- lj_str_freebuf(g, &ls->sb);
+ lj_buf_free(g, &ls->sb);
}
+/* Return next lexical token. */
void lj_lex_next(LexState *ls)
{
ls->lastline = ls->linenumber;
if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */
- ls->token = llex(ls, &ls->tokenval); /* Get next token. */
+ ls->tok = lex_scan(ls, &ls->tokval); /* Get next token. */
} else { /* Otherwise return lookahead token. */
- ls->token = ls->lookahead;
+ ls->tok = ls->lookahead;
ls->lookahead = TK_eof;
- ls->tokenval = ls->lookaheadval;
+ ls->tokval = ls->lookaheadval;
}
}
+/* Look ahead for the next token. */
LexToken lj_lex_lookahead(LexState *ls)
{
- lua_assert(ls->lookahead == TK_eof);
- ls->lookahead = llex(ls, &ls->lookaheadval);
+ lj_assertLS(ls->lookahead == TK_eof, "double lookahead");
+ ls->lookahead = lex_scan(ls, &ls->lookaheadval);
return ls->lookahead;
}
-const char *lj_lex_token2str(LexState *ls, LexToken token)
+/* Convert token to string. */
+const char *lj_lex_token2str(LexState *ls, LexToken tok)
{
- if (token > TK_OFS)
- return tokennames[token-TK_OFS-1];
- else if (!lj_char_iscntrl(token))
- return lj_str_pushf(ls->L, "%c", token);
+ if (tok > TK_OFS)
+ return tokennames[tok-TK_OFS-1];
+ else if (!lj_char_iscntrl(tok))
+ return lj_strfmt_pushf(ls->L, "%c", tok);
else
- return lj_str_pushf(ls->L, "char(%d)", token);
+ return lj_strfmt_pushf(ls->L, "char(%d)", tok);
}
-void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...)
+/* Lexer error. */
+void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...)
{
- const char *tok;
+ const char *tokstr;
va_list argp;
- if (token == 0) {
- tok = NULL;
- } else if (token == TK_name || token == TK_string || token == TK_number) {
- save(ls, '\0');
- tok = ls->sb.buf;
+ if (tok == 0) {
+ tokstr = NULL;
+ } else if (tok == TK_name || tok == TK_string || tok == TK_number) {
+ lex_save(ls, '\0');
+ tokstr = ls->sb.b;
} else {
- tok = lj_lex_token2str(ls, token);
+ tokstr = lj_lex_token2str(ls, tok);
}
va_start(argp, em);
- lj_err_lex(ls->L, ls->chunkname, tok, ls->linenumber, em, argp);
+ lj_err_lex(ls->L, ls->chunkname, tokstr, ls->linenumber, em, argp);
va_end(argp);
}
+/* Initialize strings for reserved words. */
void lj_lex_init(lua_State *L)
{
uint32_t i;
diff --git a/src/lj_lex.h b/src/lj_lex.h
index a284af19..cb5b5769 100644
--- a/src/lj_lex.h
+++ b/src/lj_lex.h
@@ -30,7 +30,8 @@ TKDEF(TKENUM1, TKENUM2)
TK_RESERVED = TK_while - TK_OFS
};
-typedef int LexToken;
+typedef int LexChar; /* Lexical character. Unsigned ext. from char. */
+typedef int LexToken; /* Lexical token. */
/* Combined bytecode ins/line. Only used during bytecode generation. */
typedef struct BCInsLine {
@@ -51,13 +52,13 @@ typedef struct VarInfo {
typedef struct LexState {
struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */
struct lua_State *L; /* Lua state. */
- TValue tokenval; /* Current token value. */
+ TValue tokval; /* Current token value. */
TValue lookaheadval; /* Lookahead token value. */
- int current; /* Current character (charint). */
- LexToken token; /* Current token. */
- LexToken lookahead; /* Lookahead token. */
- MSize n; /* Bytes left in input buffer. */
const char *p; /* Current position in input buffer. */
+ const char *pe; /* End of input buffer. */
+ LexChar c; /* Current character. */
+ LexToken tok; /* Current token. */
+ LexToken lookahead; /* Lookahead token. */
SBuf sb; /* String buffer for tokens. */
lua_Reader rfunc; /* Reader callback. */
void *rdata; /* Reader callback data. */
@@ -79,8 +80,14 @@ LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls);
LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls);
LJ_FUNC void lj_lex_next(LexState *ls);
LJ_FUNC LexToken lj_lex_lookahead(LexState *ls);
-LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken token);
-LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...);
+LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok);
+LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...);
LJ_FUNC void lj_lex_init(lua_State *L);
+#ifdef LUA_USE_ASSERT
+#define lj_assertLS(c, ...) (lj_assertG_(G(ls->L), (c), __VA_ARGS__))
+#else
+#define lj_assertLS(c, ...) ((void)ls)
+#endif
+
#endif
diff --git a/src/lj_lib.c b/src/lj_lib.c
index 5796766a..82a9e256 100644
--- a/src/lj_lib.c
+++ b/src/lj_lib.c
@@ -16,8 +16,14 @@
#include "lj_func.h"
#include "lj_bc.h"
#include "lj_dispatch.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#endif
#include "lj_vm.h"
#include "lj_strscan.h"
+#include "lj_strfmt.h"
+#include "lj_lex.h"
+#include "lj_bcdump.h"
#include "lj_lib.h"
/* -- Library initialization ---------------------------------------------- */
@@ -43,6 +49,28 @@ static GCtab *lib_create_table(lua_State *L, const char *libname, int hsize)
return tabV(L->top-1);
}
+static const uint8_t *lib_read_lfunc(lua_State *L, const uint8_t *p, GCtab *tab)
+{
+ int len = *p++;
+ GCstr *name = lj_str_new(L, (const char *)p, len);
+ LexState ls;
+ GCproto *pt;
+ GCfunc *fn;
+ memset(&ls, 0, sizeof(ls));
+ ls.L = L;
+ ls.p = (const char *)(p+len);
+ ls.pe = (const char *)~(uintptr_t)0;
+ ls.c = -1;
+ ls.level = (BCDUMP_F_STRIP|(LJ_BE*BCDUMP_F_BE));
+ ls.chunkname = name;
+ pt = lj_bcread_proto(&ls);
+ pt->firstline = ~(BCLine)0;
+ fn = lj_func_newL_empty(L, pt, tabref(L->env));
+ /* NOBARRIER: See below for common barrier. */
+ setfuncV(L, lj_tab_setstr(L, tab, name), fn);
+ return (const uint8_t *)ls.p;
+}
+
void lj_lib_register(lua_State *L, const char *libname,
const uint8_t *p, const lua_CFunction *cf)
{
@@ -87,6 +115,9 @@ void lj_lib_register(lua_State *L, const char *libname,
ofn = fn;
} else {
switch (tag | len) {
+ case LIBINIT_LUA:
+ p = lib_read_lfunc(L, p, tab);
+ break;
case LIBINIT_SET:
L->top -= 2;
if (tvisstr(L->top+1) && strV(L->top+1)->len == 0)
@@ -120,6 +151,37 @@ void lj_lib_register(lua_State *L, const char *libname,
}
}
+/* Push internal function on the stack. */
+GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n)
+{
+ GCfunc *fn;
+ lua_pushcclosure(L, f, n);
+ fn = funcV(L->top-1);
+ fn->c.ffid = (uint8_t)id;
+ setmref(fn->c.pc, &G(L)->bc_cfunc_int);
+ return fn;
+}
+
+void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f, GCtab *env)
+{
+ luaL_findtable(L, LUA_REGISTRYINDEX, "_PRELOAD", 4);
+ lua_pushcfunction(L, f);
+ /* NOBARRIER: The function is new (marked white). */
+ setgcref(funcV(L->top-1)->c.env, obj2gco(env));
+ lua_setfield(L, -2, name);
+ L->top--;
+}
+
+int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id, const char *name)
+{
+ GCfunc *fn = lj_lib_pushcf(L, cf, id);
+ GCtab *t = tabref(curr_func(L)->c.env); /* Reference to parent table. */
+ setfuncV(L, lj_tab_setstr(L, t, lj_str_newz(L, name)), fn);
+ lj_gc_anybarriert(L, t);
+ setfuncV(L, L->top++, fn);
+ return 1;
+}
+
/* -- Type checks --------------------------------------------------------- */
TValue *lj_lib_checkany(lua_State *L, int narg)
@@ -137,7 +199,7 @@ GCstr *lj_lib_checkstr(lua_State *L, int narg)
if (LJ_LIKELY(tvisstr(o))) {
return strV(o);
} else if (tvisnumber(o)) {
- GCstr *s = lj_str_fromnumber(L, o);
+ GCstr *s = lj_strfmt_number(L, o);
setstrV(L, o, s);
return s;
}
@@ -196,20 +258,6 @@ int32_t lj_lib_optint(lua_State *L, int narg, int32_t def)
return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def;
}
-int32_t lj_lib_checkbit(lua_State *L, int narg)
-{
- TValue *o = L->base + narg-1;
- if (!(o < L->top && lj_strscan_numberobj(o)))
- lj_err_argt(L, narg, LUA_TNUMBER);
- if (LJ_LIKELY(tvisint(o))) {
- return intV(o);
- } else {
- int32_t i = lj_num2bit(numV(o));
- if (LJ_DUALNUM) setintV(o, i);
- return i;
- }
-}
-
GCfunc *lj_lib_checkfunc(lua_State *L, int narg)
{
TValue *o = L->base + narg-1;
@@ -256,3 +304,56 @@ int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst)
return def;
}
+/* -- Strict type checks -------------------------------------------------- */
+
+/* The following type checks do not coerce between strings and numbers.
+** And they handle plain int64_t/uint64_t FFI numbers, too.
+*/
+
+#if LJ_HASBUFFER
+GCstr *lj_lib_checkstrx(lua_State *L, int narg)
+{
+ TValue *o = L->base + narg-1;
+ if (!(o < L->top && tvisstr(o))) lj_err_argt(L, narg, LUA_TSTRING);
+ return strV(o);
+}
+
+int32_t lj_lib_checkintrange(lua_State *L, int narg, int32_t a, int32_t b)
+{
+ TValue *o = L->base + narg-1;
+ lj_assertL(b >= 0, "expected range must be non-negative");
+ if (o < L->top) {
+ if (LJ_LIKELY(tvisint(o))) {
+ int32_t i = intV(o);
+ if (i >= a && i <= b) return i;
+ } else if (LJ_LIKELY(tvisnum(o))) {
+ /* For performance reasons, this doesn't check for integerness or
+ ** integer overflow. Overflow detection still works, since all FPUs
+ ** return either MININT or MAXINT, which is then out of range.
+ */
+ int32_t i = (int32_t)numV(o);
+ if (i >= a && i <= b) return i;
+#if LJ_HASFFI
+ } else if (tviscdata(o)) {
+ GCcdata *cd = cdataV(o);
+ if (cd->ctypeid == CTID_INT64) {
+ int64_t i = *(int64_t *)cdataptr(cd);
+ if (i >= (int64_t)a && i <= (int64_t)b) return (int32_t)i;
+ } else if (cd->ctypeid == CTID_UINT64) {
+ uint64_t i = *(uint64_t *)cdataptr(cd);
+ if ((a < 0 || i >= (uint64_t)a) && i <= (uint64_t)b) return (int32_t)i;
+ } else {
+ goto badtype;
+ }
+#endif
+ } else {
+ goto badtype;
+ }
+ lj_err_arg(L, narg, LJ_ERR_NUMRNG);
+ }
+badtype:
+ lj_err_argt(L, narg, LUA_TNUMBER);
+ return 0; /* unreachable */
+}
+#endif
+
diff --git a/src/lj_lib.h b/src/lj_lib.h
index 55529ad8..a18f52bf 100644
--- a/src/lj_lib.h
+++ b/src/lj_lib.h
@@ -41,15 +41,28 @@ LJ_FUNC void lj_lib_checknumber(lua_State *L, int narg);
LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg);
LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg);
LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def);
-LJ_FUNC int32_t lj_lib_checkbit(lua_State *L, int narg);
LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg);
LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg);
LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg);
LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
+#if LJ_HASBUFFER
+LJ_FUNC GCstr *lj_lib_checkstrx(lua_State *L, int narg);
+LJ_FUNC int32_t lj_lib_checkintrange(lua_State *L, int narg,
+ int32_t a, int32_t b);
+#endif
+
/* Avoid including lj_frame.h. */
+#if LJ_GC64
+#define lj_lib_upvalue(L, n) \
+ (&gcval(L->base-2)->fn.c.upvalue[(n)-1])
+#elif LJ_FR2
+#define lj_lib_upvalue(L, n) \
+ (&gcref((L->base-2)->gcr)->fn.c.upvalue[(n)-1])
+#else
#define lj_lib_upvalue(L, n) \
(&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1])
+#endif
#if LJ_TARGET_WINDOWS
#define lj_lib_checkfpu(L) \
@@ -60,23 +73,14 @@ LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst);
#define lj_lib_checkfpu(L) UNUSED(L)
#endif
-/* Push internal function on the stack. */
-static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f,
- int id, int n)
-{
- GCfunc *fn;
- lua_pushcclosure(L, f, n);
- fn = funcV(L->top-1);
- fn->c.ffid = (uint8_t)id;
- setmref(fn->c.pc, &G(L)->bc_cfunc_int);
-}
-
+LJ_FUNC GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n);
#define lj_lib_pushcf(L, fn, id) (lj_lib_pushcc(L, (fn), (id), 0))
/* Library function declarations. Scanned by buildvm. */
#define LJLIB_CF(name) static int lj_cf_##name(lua_State *L)
#define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L)
#define LJLIB_ASM_(name)
+#define LJLIB_LUA(name)
#define LJLIB_SET(name)
#define LJLIB_PUSH(arg)
#define LJLIB_REC(handler)
@@ -88,6 +92,10 @@ static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f,
LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
const uint8_t *init, const lua_CFunction *cf);
+LJ_FUNC void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f,
+ GCtab *env);
+LJ_FUNC int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id,
+ const char *name);
/* Library init data tags. */
#define LIBINIT_LENMASK 0x3f
@@ -96,7 +104,8 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
#define LIBINIT_ASM 0x40
#define LIBINIT_ASM_ 0x80
#define LIBINIT_STRING 0xc0
-#define LIBINIT_MAXSTR 0x39
+#define LIBINIT_MAXSTR 0x38
+#define LIBINIT_LUA 0xf9
#define LIBINIT_SET 0xfa
#define LIBINIT_NUMBER 0xfb
#define LIBINIT_COPY 0xfc
@@ -104,9 +113,4 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname,
#define LIBINIT_FFID 0xfe
#define LIBINIT_END 0xff
-/* Exported library functions. */
-
-typedef struct RandomState RandomState;
-LJ_FUNC uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs);
-
#endif
diff --git a/src/lj_load.c b/src/lj_load.c
index dbd36ac7..0aab4884 100644
--- a/src/lj_load.c
+++ b/src/lj_load.c
@@ -15,7 +15,7 @@
#include "lj_obj.h"
#include "lj_gc.h"
#include "lj_err.h"
-#include "lj_str.h"
+#include "lj_buf.h"
#include "lj_func.h"
#include "lj_frame.h"
#include "lj_vm.h"
@@ -54,7 +54,7 @@ LUA_API int lua_loadx(lua_State *L, lua_Reader reader, void *data,
ls.rdata = data;
ls.chunkarg = chunkname ? chunkname : "?";
ls.mode = mode;
- lj_str_initbuf(&ls.sb);
+ lj_buf_init(L, &ls.sb);
status = lj_vm_cpcall(L, NULL, &ls, cpparser);
lj_lex_cleanup(L, &ls);
lj_gc_check(L);
@@ -159,7 +159,7 @@ LUALIB_API int luaL_loadstring(lua_State *L, const char *s)
LUA_API int lua_dump(lua_State *L, lua_Writer writer, void *data)
{
cTValue *o = L->top-1;
- api_check(L, L->top > L->base);
+ lj_checkapi(L->top > L->base, "top slot empty");
if (tvisfunc(o) && isluafunc(funcV(o)))
return lj_bcwrite(L, funcproto(funcV(o)), writer, data, 0);
else
diff --git a/src/lj_mcode.c b/src/lj_mcode.c
index 0a3b3fa3..163aada4 100644
--- a/src/lj_mcode.c
+++ b/src/lj_mcode.c
@@ -14,6 +14,7 @@
#include "lj_mcode.h"
#include "lj_trace.h"
#include "lj_dispatch.h"
+#include "lj_prng.h"
#endif
#if LJ_HASJIT || LJ_HASFFI
#include "lj_vm.h"
@@ -44,7 +45,7 @@ void lj_mcode_sync(void *start, void *end)
sys_icache_invalidate(start, (char *)end-(char *)start);
#elif LJ_TARGET_PPC
lj_vm_cachesync(start, end);
-#elif defined(__GNUC__)
+#elif defined(__GNUC__) || defined(__clang__)
__clear_cache(start, end);
#else
#error "Missing builtin to flush instruction cache"
@@ -66,8 +67,8 @@ void lj_mcode_sync(void *start, void *end)
static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot)
{
- void *p = VirtualAlloc((void *)hint, sz,
- MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
+ void *p = LJ_WIN_VALLOC((void *)hint, sz,
+ MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot);
if (!p && !hint)
lj_trace_err(J, LJ_TRERR_MCODEAL);
return p;
@@ -82,7 +83,7 @@ static void mcode_free(jit_State *J, void *p, size_t sz)
static int mcode_setprot(void *p, size_t sz, DWORD prot)
{
DWORD oprot;
- return !VirtualProtect(p, sz, prot, &oprot);
+ return !LJ_WIN_VPROTECT(p, sz, prot, &oprot);
}
#elif LJ_TARGET_POSIX
@@ -96,10 +97,15 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot)
#define MCPROT_RW (PROT_READ|PROT_WRITE)
#define MCPROT_RX (PROT_READ|PROT_EXEC)
#define MCPROT_RWX (PROT_READ|PROT_WRITE|PROT_EXEC)
+#ifdef PROT_MPROTECT
+#define MCPROT_CREATE (PROT_MPROTECT(MCPROT_RWX))
+#else
+#define MCPROT_CREATE 0
+#endif
static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot)
{
- void *p = mmap((void *)hint, sz, prot, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ void *p = mmap((void *)hint, sz, prot|MCPROT_CREATE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (p == MAP_FAILED) {
if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL);
p = NULL;
@@ -118,52 +124,34 @@ static int mcode_setprot(void *p, size_t sz, int prot)
return mprotect(p, sz, prot);
}
-#elif LJ_64
-
-#error "Missing OS support for explicit placement of executable memory"
-
#else
-/* Fallback allocator. This will fail if memory is not executable by default. */
-#define LUAJIT_UNPROTECT_MCODE
-#define MCPROT_RW 0
-#define MCPROT_RX 0
-#define MCPROT_RWX 0
-
-static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot)
-{
- UNUSED(hint); UNUSED(prot);
- return lj_mem_new(J->L, sz);
-}
-
-static void mcode_free(jit_State *J, void *p, size_t sz)
-{
- lj_mem_free(J2G(J), p, sz);
-}
+#error "Missing OS support for explicit placement of executable memory"
#endif
/* -- MCode area protection ----------------------------------------------- */
-/* Define this ONLY if page protection twiddling becomes a bottleneck. */
-#ifdef LUAJIT_UNPROTECT_MCODE
+#if LUAJIT_SECURITY_MCODE == 0
-/* It's generally considered to be a potential security risk to have
+/* Define this ONLY if page protection twiddling becomes a bottleneck.
+**
+** It's generally considered to be a potential security risk to have
** pages with simultaneous write *and* execute access in a process.
**
** Do not even think about using this mode for server processes or
-** apps handling untrusted external data (such as a browser).
+** apps handling untrusted external data.
**
** The security risk is not in LuaJIT itself -- but if an adversary finds
-** any *other* flaw in your C application logic, then any RWX memory page
-** simplifies writing an exploit considerably.
+** any *other* flaw in your C application logic, then any RWX memory pages
+** simplify writing an exploit considerably.
*/
#define MCPROT_GEN MCPROT_RWX
#define MCPROT_RUN MCPROT_RWX
static void mcode_protect(jit_State *J, int prot)
{
- UNUSED(J); UNUSED(prot);
+ UNUSED(J); UNUSED(prot); UNUSED(mcode_setprot);
}
#else
@@ -222,8 +210,8 @@ static void *mcode_alloc(jit_State *J, size_t sz)
*/
#if LJ_TARGET_MIPS
/* Use the middle of the 256MB-aligned region. */
- uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & 0xf0000000u) +
- 0x08000000u;
+ uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler &
+ ~(uintptr_t)0x0fffffffu) + 0x08000000u;
#else
uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff;
#endif
@@ -243,7 +231,7 @@ static void *mcode_alloc(jit_State *J, size_t sz)
}
/* Next try probing 64K-aligned pseudo-random addresses. */
do {
- hint = LJ_PRNG_BITS(J, LJ_TARGET_JUMPRANGE-16) << 16;
+ hint = lj_prng_u64(&J2G(J)->prng) & ((1u<<LJ_TARGET_JUMPRANGE)-0x10000);
} while (!(hint + sz < range+range));
hint = target + hint - range;
}
@@ -256,7 +244,7 @@ static void *mcode_alloc(jit_State *J, size_t sz)
/* All memory addresses are reachable by relative jumps. */
static void *mcode_alloc(jit_State *J, size_t sz)
{
-#ifdef __OpenBSD__
+#if defined(__OpenBSD__) || defined(__NetBSD__) || LJ_TARGET_UWP
/* Allow better executable memory allocation for OpenBSD W^X mode. */
void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN);
if (p && mcode_setprot(p, sz, MCPROT_GEN)) {
@@ -287,6 +275,7 @@ static void mcode_allocarea(jit_State *J)
((MCLink *)J->mcarea)->next = oldarea;
((MCLink *)J->mcarea)->size = sz;
J->szallmcarea += sz;
+ J->mcbot = (MCode *)lj_err_register_mcode(J->mcarea, sz, (uint8_t *)J->mcbot);
}
/* Free all MCode areas. */
@@ -297,7 +286,9 @@ void lj_mcode_free(jit_State *J)
J->szallmcarea = 0;
while (mc) {
MCode *next = ((MCLink *)mc)->next;
- mcode_free(J, mc, ((MCLink *)mc)->size);
+ size_t sz = ((MCLink *)mc)->size;
+ lj_err_deregister_mcode(mc, sz, (uint8_t *)mc + sizeof(MCLink));
+ mcode_free(J, mc, sz);
mc = next;
}
}
@@ -332,35 +323,36 @@ void lj_mcode_abort(jit_State *J)
/* Set/reset protection to allow patching of MCode areas. */
MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish)
{
-#ifdef LUAJIT_UNPROTECT_MCODE
- UNUSED(J); UNUSED(ptr); UNUSED(finish);
- return NULL;
-#else
if (finish) {
+#if LUAJIT_SECURITY_MCODE
if (J->mcarea == ptr)
mcode_protect(J, MCPROT_RUN);
else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN)))
mcode_protfail(J);
+#endif
return NULL;
} else {
MCode *mc = J->mcarea;
/* Try current area first to use the protection cache. */
if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) {
+#if LUAJIT_SECURITY_MCODE
mcode_protect(J, MCPROT_GEN);
+#endif
return mc;
}
/* Otherwise search through the list of MCode areas. */
for (;;) {
mc = ((MCLink *)mc)->next;
- lua_assert(mc != NULL);
+ lj_assertJ(mc != NULL, "broken MCode area chain");
if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) {
+#if LUAJIT_SECURITY_MCODE
if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN)))
mcode_protfail(J);
+#endif
return mc;
}
}
}
-#endif
}
/* Limit of MCode reservation reached. */
diff --git a/src/lj_meta.c b/src/lj_meta.c
index 1d4d2234..5324c666 100644
--- a/src/lj_meta.c
+++ b/src/lj_meta.c
@@ -12,6 +12,7 @@
#include "lj_obj.h"
#include "lj_gc.h"
#include "lj_err.h"
+#include "lj_buf.h"
#include "lj_str.h"
#include "lj_tab.h"
#include "lj_meta.h"
@@ -19,6 +20,8 @@
#include "lj_bc.h"
#include "lj_vm.h"
#include "lj_strscan.h"
+#include "lj_strfmt.h"
+#include "lj_lib.h"
/* -- Metamethod handling ------------------------------------------------- */
@@ -44,7 +47,7 @@ void lj_meta_init(lua_State *L)
cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name)
{
cTValue *mo = lj_tab_getstr(mt, name);
- lua_assert(mm <= MM_FAST);
+ lj_assertX(mm <= MM_FAST, "bad metamethod %d", mm);
if (!mo || tvisnil(mo)) { /* No metamethod? */
mt->nomm |= (uint8_t)(1u<<mm); /* Set negative cache flag. */
return NULL;
@@ -77,12 +80,16 @@ int lj_meta_tailcall(lua_State *L, cTValue *tv)
TValue *base = L->base;
TValue *top = L->top;
const BCIns *pc = frame_pc(base-1); /* Preserve old PC from frame. */
- copyTV(L, base-1, tv); /* Replace frame with new object. */
- top->u32.lo = LJ_CONT_TAILCALL;
- setframe_pc(top, pc);
- setframe_gc(top+1, obj2gco(L)); /* Dummy frame object. */
- setframe_ftsz(top+1, (int)((char *)(top+2) - (char *)base) + FRAME_CONT);
- L->base = L->top = top+2;
+ copyTV(L, base-1-LJ_FR2, tv); /* Replace frame with new object. */
+ if (LJ_FR2)
+ (top++)->u64 = LJ_CONT_TAILCALL;
+ else
+ top->u32.lo = LJ_CONT_TAILCALL;
+ setframe_pc(top++, pc);
+ setframe_gc(top, obj2gco(L), LJ_TTHREAD); /* Dummy frame object. */
+ if (LJ_FR2) top++;
+ setframe_ftsz(top, ((char *)(top+1) - (char *)base) + FRAME_CONT);
+ L->base = L->top = top+1;
/*
** before: [old_mo|PC] [... ...]
** ^base ^top
@@ -113,11 +120,13 @@ static TValue *mmcall(lua_State *L, ASMFunction cont, cTValue *mo,
*/
TValue *top = L->top;
if (curr_funcisL(L)) top = curr_topL(L);
- setcont(top, cont); /* Assembler VM stores PC in upper word. */
- copyTV(L, top+1, mo); /* Store metamethod and two arguments. */
- copyTV(L, top+2, a);
- copyTV(L, top+3, b);
- return top+2; /* Return new base. */
+ setcont(top++, cont); /* Assembler VM stores PC in upper word or FR2. */
+ if (LJ_FR2) setnilV(top++);
+ copyTV(L, top++, mo); /* Store metamethod and two arguments. */
+ if (LJ_FR2) setnilV(top++);
+ copyTV(L, top, a);
+ copyTV(L, top+1, b);
+ return top; /* Return new base. */
}
/* -- C helpers for some instructions, called from assembler VM ----------- */
@@ -225,27 +234,14 @@ TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb, cTValue *rc,
}
}
-/* In-place coercion of a number to a string. */
-static LJ_AINLINE int tostring(lua_State *L, TValue *o)
-{
- if (tvisstr(o)) {
- return 1;
- } else if (tvisnumber(o)) {
- setstrV(L, o, lj_str_fromnumber(L, o));
- return 1;
- } else {
- return 0;
- }
-}
-
/* Helper for CAT. Coercion, iterative concat, __concat metamethod. */
TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
{
int fromc = 0;
if (left < 0) { left = -left; fromc = 1; }
do {
- int n = 1;
- if (!(tvisstr(top-1) || tvisnumber(top-1)) || !tostring(L, top)) {
+ if (!(tvisstr(top) || tvisnumber(top) || tvisbuf(top)) ||
+ !(tvisstr(top-1) || tvisnumber(top-1) || tvisbuf(top-1))) {
cTValue *mo = lj_meta_lookup(L, top-1, MM_concat);
if (tvisnil(mo)) {
mo = lj_meta_lookup(L, top, MM_concat);
@@ -266,13 +262,12 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
** after mm: [...][CAT stack ...] <--push-- [result]
** next step: [...][CAT stack .............]
*/
- copyTV(L, top+2, top); /* Careful with the order of stack copies! */
- copyTV(L, top+1, top-1);
- copyTV(L, top, mo);
+ copyTV(L, top+2*LJ_FR2+2, top); /* Carefully ordered stack copies! */
+ copyTV(L, top+2*LJ_FR2+1, top-1);
+ copyTV(L, top+LJ_FR2, mo);
setcont(top-1, lj_cont_cat);
+ if (LJ_FR2) { setnilV(top); setnilV(top+2); top += 2; }
return top+1; /* Trigger metamethod call. */
- } else if (strV(top)->len == 0) { /* Shortcut. */
- (void)tostring(L, top-1);
} else {
/* Pick as many strings as possible from the top and concatenate them:
**
@@ -281,27 +276,33 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left)
** concat: [...][CAT stack ...] [result]
** next step: [...][CAT stack ............]
*/
- MSize tlen = strV(top)->len;
- char *buffer;
- int i;
- for (n = 1; n <= left && tostring(L, top-n); n++) {
- MSize len = strV(top-n)->len;
- if (len >= LJ_MAX_STR - tlen)
- lj_err_msg(L, LJ_ERR_STROV);
- tlen += len;
- }
- buffer = lj_str_needbuf(L, &G(L)->tmpbuf, tlen);
- n--;
- tlen = 0;
- for (i = n; i >= 0; i--) {
- MSize len = strV(top-i)->len;
- memcpy(buffer + tlen, strVdata(top-i), len);
- tlen += len;
+ TValue *e, *o = top;
+ uint64_t tlen = tvisstr(o) ? strV(o)->len :
+ tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM;
+ SBuf *sb;
+ do {
+ o--; tlen += tvisstr(o) ? strV(o)->len :
+ tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM;
+ } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1)));
+ if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV);
+ sb = lj_buf_tmp_(L);
+ lj_buf_more(sb, (MSize)tlen);
+ for (e = top, top = o; o <= e; o++) {
+ if (tvisstr(o)) {
+ GCstr *s = strV(o);
+ MSize len = s->len;
+ lj_buf_putmem(sb, strdata(s), len);
+ } else if (tvisbuf(o)) {
+ SBufExt *sbx = bufV(o);
+ lj_buf_putmem(sb, sbx->r, sbufxlen(sbx));
+ } else if (tvisint(o)) {
+ lj_strfmt_putint(sb, intV(o));
+ } else {
+ lj_strfmt_putfnum(sb, STRFMT_G14, numV(o));
+ }
}
- setstrV(L, top-n, lj_str_new(L, buffer, tlen));
+ setstrV(L, top, lj_buf_str(L, sb));
}
- left -= n;
- top -= n;
} while (left >= 1);
if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) {
if (!fromc) L->top = curr_topL(L);
@@ -338,12 +339,14 @@ TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne)
return (TValue *)(intptr_t)ne;
}
top = curr_top(L);
- setcont(top, ne ? lj_cont_condf : lj_cont_condt);
- copyTV(L, top+1, mo);
+ setcont(top++, ne ? lj_cont_condf : lj_cont_condt);
+ if (LJ_FR2) setnilV(top++);
+ copyTV(L, top++, mo);
+ if (LJ_FR2) setnilV(top++);
it = ~(uint32_t)o1->gch.gct;
- setgcV(L, top+2, o1, it);
- setgcV(L, top+3, o2, it);
- return top+2; /* Trigger metamethod call. */
+ setgcV(L, top, o1, it);
+ setgcV(L, top+1, o2, it);
+ return top; /* Trigger metamethod call. */
}
return (TValue *)(intptr_t)ne;
}
@@ -365,8 +368,8 @@ TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins)
} else if (op == BC_ISEQN) {
o2 = &mref(curr_proto(L)->k, cTValue)[bc_d(ins)];
} else {
- lua_assert(op == BC_ISEQP);
- setitype(&tv, ~bc_d(ins));
+ lj_assertL(op == BC_ISEQP, "bad bytecode op %d", op);
+ setpriV(&tv, ~bc_d(ins));
o2 = &tv;
}
mo = lj_meta_lookup(L, o1mm, MM_eq);
@@ -423,6 +426,18 @@ TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op)
}
}
+/* Helper for ISTYPE and ISNUM. Implicit coercion or error. */
+void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp)
+{
+ L->top = curr_topL(L);
+ ra++; tp--;
+ lj_assertL(LJ_DUALNUM || tp != ~LJ_TNUMX, "bad type for ISTYPE");
+ if (LJ_DUALNUM && tp == ~LJ_TNUMX) lj_lib_checkint(L, ra);
+ else if (tp == ~LJ_TNUMX+1) lj_lib_checknum(L, ra);
+ else if (tp == ~LJ_TSTR) lj_lib_checkstr(L, ra);
+ else lj_err_argtype(L, ra, lj_obj_itypename[tp]);
+}
+
/* Helper for calls. __call metamethod. */
void lj_meta_call(lua_State *L, TValue *func, TValue *top)
{
@@ -430,7 +445,8 @@ void lj_meta_call(lua_State *L, TValue *func, TValue *top)
TValue *p;
if (!tvisfunc(mo))
lj_err_optype_call(L, func);
- for (p = top; p > func; p--) copyTV(L, p, p-1);
+ for (p = top; p > func+2*LJ_FR2; p--) copyTV(L, p, p-1);
+ if (LJ_FR2) copyTV(L, func+2, func);
copyTV(L, func, mo);
}
diff --git a/src/lj_meta.h b/src/lj_meta.h
index 9c36aea5..3a6eaac2 100644
--- a/src/lj_meta.h
+++ b/src/lj_meta.h
@@ -31,6 +31,7 @@ LJ_FUNCA TValue * LJ_FASTCALL lj_meta_len(lua_State *L, cTValue *o);
LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne);
LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins);
LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op);
+LJ_FUNCA void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp);
LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top);
LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o);
diff --git a/src/lj_obj.c b/src/lj_obj.c
index 528b3a58..65cbe1a1 100644
--- a/src/lj_obj.c
+++ b/src/lj_obj.c
@@ -20,7 +20,7 @@ LJ_DATADEF const char *const lj_obj_itypename[] = { /* ORDER LJ_T */
};
/* Compare two objects without calling metamethods. */
-int lj_obj_equal(cTValue *o1, cTValue *o2)
+int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2)
{
if (itype(o1) == itype(o2)) {
if (tvispri(o1))
@@ -33,3 +33,19 @@ int lj_obj_equal(cTValue *o1, cTValue *o2)
return numberVnum(o1) == numberVnum(o2);
}
+/* Return pointer to object or its object data. */
+const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o)
+{
+ UNUSED(g);
+ if (tvisudata(o))
+ return uddata(udataV(o));
+ else if (tvislightud(o))
+ return lightudV(g, o);
+ else if (LJ_HASFFI && tviscdata(o))
+ return cdataptr(cdataV(o));
+ else if (tvisgcv(o))
+ return gcV(o);
+ else
+ return NULL;
+}
+
diff --git a/src/lj_obj.h b/src/lj_obj.h
index ea8fe870..67e41181 100644
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -13,44 +13,81 @@
#include "lj_def.h"
#include "lj_arch.h"
-/* -- Memory references (32 bit address space) ---------------------------- */
+/* -- Memory references --------------------------------------------------- */
-/* Memory size. */
+/* Memory and GC object sizes. */
typedef uint32_t MSize;
+#if LJ_GC64
+typedef uint64_t GCSize;
+#else
+typedef uint32_t GCSize;
+#endif
/* Memory reference */
typedef struct MRef {
+#if LJ_GC64
+ uint64_t ptr64; /* True 64 bit pointer. */
+#else
uint32_t ptr32; /* Pseudo 32 bit pointer. */
+#endif
} MRef;
+#if LJ_GC64
+#define mref(r, t) ((t *)(void *)(r).ptr64)
+#define mrefu(r) ((r).ptr64)
+
+#define setmref(r, p) ((r).ptr64 = (uint64_t)(void *)(p))
+#define setmrefu(r, u) ((r).ptr64 = (uint64_t)(u))
+#define setmrefr(r, v) ((r).ptr64 = (v).ptr64)
+#else
#define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32)
+#define mrefu(r) ((r).ptr32)
#define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p))
+#define setmrefu(r, u) ((r).ptr32 = (uint32_t)(u))
#define setmrefr(r, v) ((r).ptr32 = (v).ptr32)
+#endif
-/* -- GC object references (32 bit address space) ------------------------- */
+/* -- GC object references ------------------------------------------------ */
/* GCobj reference */
typedef struct GCRef {
+#if LJ_GC64
+ uint64_t gcptr64; /* True 64 bit pointer. */
+#else
uint32_t gcptr32; /* Pseudo 32 bit pointer. */
+#endif
} GCRef;
/* Common GC header for all collectable objects. */
#define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct
/* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */
+#if LJ_GC64
+#define gcref(r) ((GCobj *)(r).gcptr64)
+#define gcrefp(r, t) ((t *)(void *)(r).gcptr64)
+#define gcrefu(r) ((r).gcptr64)
+#define gcrefeq(r1, r2) ((r1).gcptr64 == (r2).gcptr64)
+
+#define setgcref(r, gc) ((r).gcptr64 = (uint64_t)&(gc)->gch)
+#define setgcreft(r, gc, it) \
+ (r).gcptr64 = (uint64_t)&(gc)->gch | (((uint64_t)(it)) << 47)
+#define setgcrefp(r, p) ((r).gcptr64 = (uint64_t)(p))
+#define setgcrefnull(r) ((r).gcptr64 = 0)
+#define setgcrefr(r, v) ((r).gcptr64 = (v).gcptr64)
+#else
#define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32)
#define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32)
#define gcrefu(r) ((r).gcptr32)
-#define gcrefi(r) ((int32_t)(r).gcptr32)
#define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32)
-#define gcnext(gc) (gcref((gc)->gch.nextgc))
#define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch)
-#define setgcrefi(r, i) ((r).gcptr32 = (uint32_t)(i))
#define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p))
#define setgcrefnull(r) ((r).gcptr32 = 0)
#define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32)
+#endif
+
+#define gcnext(gc) (gcref((gc)->gch.nextgc))
/* IMPORTANT NOTE:
**
@@ -119,11 +156,10 @@ typedef int32_t BCLine; /* Bytecode line number. */
/* Internal assembler functions. Never call these directly from C. */
typedef void (*ASMFunction)(void);
-/* Resizable string buffer. Need this here, details in lj_str.h. */
+/* Resizable string buffer. Need this here, details in lj_buf.h. */
+#define SBufHeader char *w, *e, *b; MRef L
typedef struct SBuf {
- char *buf; /* String buffer base. */
- MSize n; /* String buffer length. */
- MSize sz; /* String buffer size. */
+ SBufHeader;
} SBuf;
/* -- Tags and values ----------------------------------------------------- */
@@ -131,13 +167,23 @@ typedef struct SBuf {
/* Frame link. */
typedef union {
int32_t ftsz; /* Frame type and size of previous frame. */
- MRef pcr; /* Overlaps PC for Lua frames. */
+ MRef pcr; /* Or PC for Lua frames. */
} FrameLink;
/* Tagged value. */
typedef LJ_ALIGN(8) union TValue {
uint64_t u64; /* 64 bit pattern overlaps number. */
lua_Number n; /* Number object overlaps split tag/value object. */
+#if LJ_GC64
+ GCRef gcr; /* GCobj reference with tag. */
+ int64_t it64;
+ struct {
+ LJ_ENDIAN_LOHI(
+ int32_t i; /* Integer value. */
+ , uint32_t it; /* Internal object tag. Must overlap MSW of number. */
+ )
+ };
+#else
struct {
LJ_ENDIAN_LOHI(
union {
@@ -147,12 +193,17 @@ typedef LJ_ALIGN(8) union TValue {
, uint32_t it; /* Internal object tag. Must overlap MSW of number. */
)
};
+#endif
+#if LJ_FR2
+ int64_t ftsz; /* Frame type and size of previous frame, or PC. */
+#else
struct {
LJ_ENDIAN_LOHI(
GCRef func; /* Function for next frame (or dummy L). */
, FrameLink tp; /* Link to previous frame. */
)
} fr;
+#endif
struct {
LJ_ENDIAN_LOHI(
uint32_t lo; /* Lower 32 bits of number. */
@@ -172,6 +223,8 @@ typedef const TValue cTValue;
/* Internal object tags.
**
+** Format for 32 bit GC references (!LJ_GC64):
+**
** Internal tags overlap the MSW of a number object (must be a double).
** Interpreted as a double these are special NaNs. The FPU only generates
** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available
@@ -181,11 +234,24 @@ typedef const TValue cTValue;
** ---MSW---.---LSW---
** primitive types | itype | |
** lightuserdata | itype | void * | (32 bit platforms)
-** lightuserdata |ffff| void * | (64 bit platforms, 47 bit pointers)
+** lightuserdata |ffff|seg| ofs | (64 bit platforms)
** GC objects | itype | GCRef |
** int (LJ_DUALNUM)| itype | int |
** number -------double------
**
+** Format for 64 bit GC references (LJ_GC64):
+**
+** The upper 13 bits must be 1 (0xfff8...) for a special NaN. The next
+** 4 bits hold the internal tag. The lowest 47 bits either hold a pointer,
+** a zero-extended 32 bit integer or all bits set to 1 for primitive types.
+**
+** ------MSW------.------LSW------
+** primitive types |1..1|itype|1..................1|
+** GC objects |1..1|itype|-------GCRef--------|
+** lightuserdata |1..1|itype|seg|------ofs-------|
+** int (LJ_DUALNUM) |1..1|itype|0..0|-----int-------|
+** number ------------double-------------
+**
** ORDER LJ_T
** Primitive types nil/false/true must be first, lightuserdata next.
** GC objects are at the end, table/userdata must be lowest.
@@ -208,7 +274,7 @@ typedef const TValue cTValue;
#define LJ_TNUMX (~13u)
/* Integers have itype == LJ_TISNUM doubles have itype < LJ_TISNUM */
-#if LJ_64
+#if LJ_64 && !LJ_GC64
#define LJ_TISNUM 0xfffeffffu
#else
#define LJ_TISNUM LJ_TNUMX
@@ -218,14 +284,31 @@ typedef const TValue cTValue;
#define LJ_TISGCV (LJ_TSTR+1)
#define LJ_TISTABUD LJ_TTAB
+/* Type marker for slot holding a traversal index. Must be lightuserdata. */
+#define LJ_KEYINDEX 0xfffe7fffu
+
+#if LJ_GC64
+#define LJ_GCVMASK (((uint64_t)1 << 47) - 1)
+#endif
+
+#if LJ_64
+/* To stay within 47 bits, lightuserdata is segmented. */
+#define LJ_LIGHTUD_BITS_SEG 8
+#define LJ_LIGHTUD_BITS_LO (47 - LJ_LIGHTUD_BITS_SEG)
+#endif
+
/* -- String object ------------------------------------------------------- */
+typedef uint32_t StrHash; /* String hash value. */
+typedef uint32_t StrID; /* String ID. */
+
/* String object header. String payload follows. */
typedef struct GCstr {
GCHeader;
uint8_t reserved; /* Used by lexer for fast lookup of reserved words. */
- uint8_t unused;
- MSize hash; /* Hash of string. */
+ uint8_t hashalg; /* Hash algorithm. */
+ StrID sid; /* Interned string ID. */
+ StrHash hash; /* Hash of string. */
MSize len; /* Size of string. */
} GCstr;
@@ -233,7 +316,6 @@ typedef struct GCstr {
#define strdata(s) ((const char *)((s)+1))
#define strdatawr(s) ((char *)((s)+1))
#define strVdata(o) strdata(strV(o))
-#define sizestring(s) (sizeof(struct GCstr)+(s)->len+1)
/* -- Userdata object ----------------------------------------------------- */
@@ -253,6 +335,7 @@ enum {
UDTYPE_USERDATA, /* Regular userdata. */
UDTYPE_IO_FILE, /* I/O library FILE. */
UDTYPE_FFI_CLIB, /* FFI C library namespace. */
+ UDTYPE_BUFFER, /* String buffer. */
UDTYPE__MAX
};
@@ -291,6 +374,9 @@ typedef struct GCproto {
uint8_t numparams; /* Number of parameters. */
uint8_t framesize; /* Fixed frame size. */
MSize sizebc; /* Number of bytecode instructions. */
+#if LJ_GC64
+ uint32_t unused_gc64;
+#endif
GCRef gclist;
MRef k; /* Split constant array (points to the middle). */
MRef uv; /* Upvalue list. local slot|0x8000 or parent uv idx. */
@@ -402,7 +488,9 @@ typedef struct Node {
TValue val; /* Value object. Must be first field. */
TValue key; /* Key object. */
MRef next; /* Hash chain. */
+#if !LJ_GC64
MRef freetop; /* Top of free elements (stored in t->node[0]). */
+#endif
} Node;
LJ_STATIC_ASSERT(offsetof(Node, val) == 0);
@@ -417,12 +505,22 @@ typedef struct GCtab {
MRef node; /* Hash part. */
uint32_t asize; /* Size of array part (keys [0, asize-1]). */
uint32_t hmask; /* Hash part mask (size of hash part - 1). */
+#if LJ_GC64
+ MRef freetop; /* Top of free elements. */
+#endif
} GCtab;
#define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab))
#define tabref(r) ((GCtab *)gcref((r)))
#define noderef(r) (mref((r), Node))
#define nextnode(n) (mref((n)->next, Node))
+#if LJ_GC64
+#define getfreetop(t, n) (noderef((t)->freetop))
+#define setfreetop(t, n, v) (setmref((t)->freetop, (v)))
+#else
+#define getfreetop(t, n) (noderef((n)->freetop))
+#define setfreetop(t, n, v) (setmref((n)->freetop, (v)))
+#endif
/* -- State objects ------------------------------------------------------- */
@@ -488,13 +586,18 @@ typedef enum {
#define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)])
#define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)]))
+/* Garbage collector state. */
typedef struct GCState {
- MSize total; /* Memory currently allocated. */
- MSize threshold; /* Memory threshold. */
+ GCSize total; /* Memory currently allocated. */
+ GCSize threshold; /* Memory threshold. */
uint8_t currentwhite; /* Current white color. */
uint8_t state; /* GC state. */
uint8_t nocdatafin; /* No cdata finalizer called. */
- uint8_t unused2;
+#if LJ_64
+ uint8_t lightudnum; /* Number of lightuserdata segments - 1. */
+#else
+ uint8_t unused1;
+#endif
MSize sweepstr; /* Sweep position in string table. */
GCRef root; /* List of all collectable objects. */
MRef sweep; /* Sweep position in root list. */
@@ -502,42 +605,57 @@ typedef struct GCState {
GCRef grayagain; /* List of objects for atomic traversal. */
GCRef weak; /* List of weak tables (to be cleared). */
GCRef mmudata; /* List of userdata (to be finalized). */
+ GCSize debt; /* Debt (how much GC is behind schedule). */
+ GCSize estimate; /* Estimate of memory actually in use. */
MSize stepmul; /* Incremental GC step granularity. */
- MSize debt; /* Debt (how much GC is behind schedule). */
- MSize estimate; /* Estimate of memory actually in use. */
MSize pause; /* Pause between successive GC cycles. */
+#if LJ_64
+ MRef lightudseg; /* Upper bits of lightuserdata segments. */
+#endif
} GCState;
+/* String interning state. */
+typedef struct StrInternState {
+ GCRef *tab; /* String hash table anchors. */
+ MSize mask; /* String hash mask (size of hash table - 1). */
+ MSize num; /* Number of strings in hash table. */
+ StrID id; /* Next string ID. */
+ uint8_t idreseed; /* String ID reseed counter. */
+ uint8_t second; /* String interning table uses secondary hashing. */
+ uint8_t unused1;
+ uint8_t unused2;
+ LJ_ALIGN(8) uint64_t seed; /* Random string seed. */
+} StrInternState;
+
/* Global state, shared by all threads of a Lua universe. */
typedef struct global_State {
- GCRef *strhash; /* String hash table (hash chain anchors). */
- MSize strmask; /* String hash mask (size of hash table - 1). */
- MSize strnum; /* Number of strings in hash table. */
lua_Alloc allocf; /* Memory allocator. */
void *allocd; /* Memory allocator data. */
GCState gc; /* Garbage collector. */
- SBuf tmpbuf; /* Temporary buffer for string concatenation. */
- Node nilnode; /* Fallback 1-element hash part (nil key and value). */
GCstr strempty; /* Empty string. */
uint8_t stremptyz; /* Zero terminator of empty string. */
uint8_t hookmask; /* Hook mask. */
uint8_t dispatchmode; /* Dispatch mode. */
uint8_t vmevmask; /* VM event mask. */
+ StrInternState str; /* String interning. */
+ volatile int32_t vmstate; /* VM state or current JIT code trace number. */
GCRef mainthref; /* Link to main thread. */
- TValue registrytv; /* Anchor for registry. */
+ SBuf tmpbuf; /* Temporary string buffer. */
TValue tmptv, tmptv2; /* Temporary TValues. */
+ Node nilnode; /* Fallback 1-element hash part (nil key and value). */
+ TValue registrytv; /* Anchor for registry. */
GCupval uvhead; /* Head of double-linked list of all open upvalues. */
int32_t hookcount; /* Instruction hook countdown. */
int32_t hookcstart; /* Start count for instruction hook counter. */
lua_Hook hookf; /* Hook function. */
lua_CFunction wrapf; /* Wrapper for C function calls. */
lua_CFunction panic; /* Called as a last resort for errors. */
- volatile int32_t vmstate; /* VM state or current JIT code trace number. */
BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */
BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */
- GCRef jit_L; /* Current JIT code lua_State or NULL. */
- MRef jit_base; /* Current JIT code L->base. */
+ GCRef cur_L; /* Currently executing lua_State. */
+ MRef jit_base; /* Current JIT code L->base or NULL. */
MRef ctype_state; /* Pointer to C type state. */
+ PRNGState prng; /* Global PRNG state. */
GCRef gcroot[GCROOT_MAX]; /* GC roots. */
} global_State;
@@ -553,9 +671,11 @@ typedef struct global_State {
#define HOOK_ACTIVE_SHIFT 4
#define HOOK_VMEVENT 0x20
#define HOOK_GC 0x40
+#define HOOK_PROFILE 0x80
#define hook_active(g) ((g)->hookmask & HOOK_ACTIVE)
#define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE)
-#define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC))
+#define hook_entergc(g) \
+ ((g)->hookmask = ((g)->hookmask | (HOOK_ACTIVE|HOOK_GC)) & ~HOOK_PROFILE)
#define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT))
#define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE)
#define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK)
@@ -583,12 +703,23 @@ struct lua_State {
#define registry(L) (&G(L)->registrytv)
/* Macros to access the currently executing (Lua) function. */
+#if LJ_GC64
+#define curr_func(L) (&gcval(L->base-2)->fn)
+#elif LJ_FR2
+#define curr_func(L) (&gcref((L->base-2)->gcr)->fn)
+#else
#define curr_func(L) (&gcref((L->base-1)->fr.func)->fn)
+#endif
#define curr_funcisL(L) (isluafunc(curr_func(L)))
#define curr_proto(L) (funcproto(curr_func(L)))
#define curr_topL(L) (L->base + curr_proto(L)->framesize)
#define curr_top(L) (curr_funcisL(L) ? curr_topL(L) : L->top)
+#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK)
+LJ_FUNC_NORET void lj_assert_fail(global_State *g, const char *file, int line,
+ const char *func, const char *fmt, ...);
+#endif
+
/* -- GC object definition and conversions -------------------------------- */
/* GC header for generic access to common fields of GC objects. */
@@ -642,17 +773,18 @@ typedef union GCobj {
/* -- TValue getters/setters ---------------------------------------------- */
-#ifdef LUA_USE_ASSERT
-#include "lj_gc.h"
-#endif
-
/* Macros to test types. */
+#if LJ_GC64
+#define itype(o) ((uint32_t)((o)->it64 >> 47))
+#define tvisnil(o) ((o)->it64 == -1)
+#else
#define itype(o) ((o)->it)
#define tvisnil(o) (itype(o) == LJ_TNIL)
+#endif
#define tvisfalse(o) (itype(o) == LJ_TFALSE)
#define tvistrue(o) (itype(o) == LJ_TTRUE)
#define tvisbool(o) (tvisfalse(o) || tvistrue(o))
-#if LJ_64
+#if LJ_64 && !LJ_GC64
#define tvislightud(o) (((int32_t)itype(o) >> 15) == -2)
#else
#define tvislightud(o) (itype(o) == LJ_TLIGHTUD)
@@ -686,7 +818,7 @@ typedef union GCobj {
#define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64)
/* Macros to convert type ids. */
-#if LJ_64
+#if LJ_64 && !LJ_GC64
#define itypemap(o) \
(tvisnumber(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o))
#else
@@ -694,13 +826,31 @@ typedef union GCobj {
#endif
/* Macros to get tagged values. */
+#if LJ_GC64
+#define gcval(o) ((GCobj *)(gcrefu((o)->gcr) & LJ_GCVMASK))
+#else
#define gcval(o) (gcref((o)->gcr))
-#define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - (o)->it))
+#endif
+#define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - itype(o)))
#if LJ_64
-#define lightudV(o) \
- check_exp(tvislightud(o), (void *)((o)->u64 & U64x(00007fff,ffffffff)))
+#define lightudseg(u) \
+ (((u) >> LJ_LIGHTUD_BITS_LO) & ((1 << LJ_LIGHTUD_BITS_SEG)-1))
+#define lightudlo(u) \
+ ((u) & (((uint64_t)1 << LJ_LIGHTUD_BITS_LO) - 1))
+#define lightudup(p) \
+ ((uint32_t)(((p) >> LJ_LIGHTUD_BITS_LO) << (LJ_LIGHTUD_BITS_LO-32)))
+static LJ_AINLINE void *lightudV(global_State *g, cTValue *o)
+{
+ uint64_t u = o->u64;
+ uint64_t seg = lightudseg(u);
+ uint32_t *segmap = mref(g->gc.lightudseg, uint32_t);
+ lj_assertG(tvislightud(o), "lightuserdata expected");
+ if (seg == (1 << LJ_LIGHTUD_BITS_SEG)-1) return NULL;
+ lj_assertG(seg <= g->gc.lightudnum, "bad lightuserdata segment %d", seg);
+ return (void *)(((uint64_t)segmap[seg] << 32) | lightudlo(u));
+}
#else
-#define lightudV(o) check_exp(tvislightud(o), gcrefp((o)->gcr, void))
+#define lightudV(g, o) check_exp(tvislightud(o), gcrefp((o)->gcr, void))
#endif
#define gcV(o) check_exp(tvisgcv(o), gcval(o))
#define strV(o) check_exp(tvisstr(o), &gcval(o)->str)
@@ -714,40 +864,70 @@ typedef union GCobj {
#define intV(o) check_exp(tvisint(o), (int32_t)(o)->i)
/* Macros to set tagged values. */
+#if LJ_GC64
+#define setitype(o, i) ((o)->it = ((i) << 15))
+#define setnilV(o) ((o)->it64 = -1)
+#define setpriV(o, x) ((o)->it64 = (int64_t)~((uint64_t)~(x)<<47))
+#define setboolV(o, x) ((o)->it64 = (int64_t)~((uint64_t)((x)+1)<<47))
+#else
#define setitype(o, i) ((o)->it = (i))
#define setnilV(o) ((o)->it = LJ_TNIL)
#define setboolV(o, x) ((o)->it = LJ_TFALSE-(uint32_t)(x))
+#define setpriV(o, i) (setitype((o), (i)))
+#endif
-static LJ_AINLINE void setlightudV(TValue *o, void *p)
+static LJ_AINLINE void setrawlightudV(TValue *o, void *p)
{
-#if LJ_64
+#if LJ_GC64
+ o->u64 = (uint64_t)p | (((uint64_t)LJ_TLIGHTUD) << 47);
+#elif LJ_64
o->u64 = (uint64_t)p | (((uint64_t)0xffff) << 48);
#else
setgcrefp(o->gcr, p); setitype(o, LJ_TLIGHTUD);
#endif
}
-#if LJ_64
-#define checklightudptr(L, p) \
- (((uint64_t)(p) >> 47) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p))
+#if LJ_FR2 || LJ_32
+#define contptr(f) ((void *)(f))
+#define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)contptr(f))
+#else
+#define contptr(f) \
+ ((void *)(uintptr_t)(uint32_t)((intptr_t)(f) - (intptr_t)lj_vm_asm_begin))
#define setcont(o, f) \
((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin)
-#else
-#define checklightudptr(L, p) (p)
-#define setcont(o, f) setlightudV((o), (void *)(f))
#endif
-#define tvchecklive(L, o) \
- UNUSED(L), lua_assert(!tvisgcv(o) || \
- ((~itype(o) == gcval(o)->gch.gct) && !isdead(G(L), gcval(o))))
+static LJ_AINLINE void checklivetv(lua_State *L, TValue *o, const char *msg)
+{
+ UNUSED(L); UNUSED(o); UNUSED(msg);
+#if LUA_USE_ASSERT
+ if (tvisgcv(o)) {
+ lj_assertL(~itype(o) == gcval(o)->gch.gct,
+ "mismatch of TValue type %d vs GC type %d",
+ ~itype(o), gcval(o)->gch.gct);
+ /* Copy of isdead check from lj_gc.h to avoid circular include. */
+ lj_assertL(!(gcval(o)->gch.marked & (G(L)->gc.currentwhite ^ 3) & 3), msg);
+ }
+#endif
+}
+
+static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t itype)
+{
+#if LJ_GC64
+ setgcreft(o->gcr, v, itype);
+#else
+ setgcref(o->gcr, v); setitype(o, itype);
+#endif
+}
-static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t itype)
+static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t it)
{
- setgcref(o->gcr, v); setitype(o, itype); tvchecklive(L, o);
+ setgcVraw(o, v, it);
+ checklivetv(L, o, "store to dead GC object");
}
#define define_setV(name, type, tag) \
-static LJ_AINLINE void name(lua_State *L, TValue *o, type *v) \
+static LJ_AINLINE void name(lua_State *L, TValue *o, const type *v) \
{ \
setgcV(L, o, obj2gco(v), tag); \
}
@@ -790,13 +970,17 @@ static LJ_AINLINE void setint64V(TValue *o, int64_t i)
/* Copy tagged values. */
static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2)
{
- *o1 = *o2; tvchecklive(L, o1);
+ *o1 = *o2;
+ checklivetv(L, o1, "copy of dead GC object");
}
/* -- Number to integer conversion ---------------------------------------- */
#if LJ_SOFTFP
LJ_ASMF int32_t lj_vm_tobit(double x);
+#if LJ_TARGET_MIPS64
+LJ_ASMF int32_t lj_vm_tointg(double x);
+#endif
#endif
static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
@@ -810,11 +994,7 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
#endif
}
-#if LJ_TARGET_X86 && !defined(__SSE2__)
-#define lj_num2int(n) lj_num2bit((n))
-#else
#define lj_num2int(n) ((int32_t)(n))
-#endif
/*
** This must match the JIT backend behavior. In particular for archs
@@ -859,6 +1039,7 @@ LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1];
#define lj_typename(o) (lj_obj_itypename[itypemap(o)])
/* Compare two objects without calling metamethods. */
-LJ_FUNC int lj_obj_equal(cTValue *o1, cTValue *o2);
+LJ_FUNC int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2);
+LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o);
#endif
diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c
index e9a6532a..7ef09a1f 100644
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -14,18 +14,21 @@
#if LJ_HASJIT
+#include "lj_buf.h"
#include "lj_str.h"
#include "lj_tab.h"
#include "lj_ir.h"
#include "lj_jit.h"
+#include "lj_ircall.h"
#include "lj_iropt.h"
#include "lj_trace.h"
#if LJ_HASFFI
#include "lj_ctype.h"
-#endif
#include "lj_carith.h"
+#endif
#include "lj_vm.h"
#include "lj_strscan.h"
+#include "lj_strfmt.h"
/* Here's a short description how the FOLD engine processes instructions:
**
@@ -133,8 +136,8 @@
/* Some local macros to save typing. Undef'd at the end. */
#define IR(ref) (&J->cur.ir[(ref)])
#define fins (&J->fold.ins)
-#define fleft (&J->fold.left)
-#define fright (&J->fold.right)
+#define fleft (J->fold.left)
+#define fright (J->fold.right)
#define knumleft (ir_knum(fleft)->n)
#define knumright (ir_knum(fright)->n)
@@ -155,13 +158,14 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
/* Barrier to prevent folding across a GC step.
** GC steps can only happen at the head of a trace and at LOOP.
-** And the GC is only driven forward if there is at least one allocation.
+** And the GC is only driven forward if there's at least one allocation.
*/
#define gcstep_barrier(J, ref) \
((ref) < J->chain[IR_LOOP] && \
(J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
- J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR]))
+ J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
+ J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA]))
/* -- Constant folding for FP numbers ------------------------------------- */
@@ -169,9 +173,6 @@ LJFOLD(ADD KNUM KNUM)
LJFOLD(SUB KNUM KNUM)
LJFOLD(MUL KNUM KNUM)
LJFOLD(DIV KNUM KNUM)
-LJFOLD(NEG KNUM KNUM)
-LJFOLD(ABS KNUM KNUM)
-LJFOLD(ATAN2 KNUM KNUM)
LJFOLD(LDEXP KNUM KNUM)
LJFOLD(MIN KNUM KNUM)
LJFOLD(MAX KNUM KNUM)
@@ -183,6 +184,15 @@ LJFOLDF(kfold_numarith)
return lj_ir_knum(J, y);
}
+LJFOLD(NEG KNUM FLOAD)
+LJFOLD(ABS KNUM FLOAD)
+LJFOLDF(kfold_numabsneg)
+{
+ lua_Number a = knumleft;
+ lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD);
+ return lj_ir_knum(J, y);
+}
+
LJFOLD(LDEXP KNUM KINT)
LJFOLDF(kfold_ldexp)
{
@@ -202,13 +212,34 @@ LJFOLDF(kfold_fpmath)
return lj_ir_knum(J, y);
}
-LJFOLD(POW KNUM KINT)
+LJFOLD(CALLN KNUM any)
+LJFOLDF(kfold_fpcall1)
+{
+ const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
+ if (CCI_TYPE(ci) == IRT_NUM) {
+ double y = ((double (*)(double))ci->func)(knumleft);
+ return lj_ir_knum(J, y);
+ }
+ return NEXTFOLD;
+}
+
+LJFOLD(CALLN CARG IRCALL_atan2)
+LJFOLDF(kfold_fpcall2)
+{
+ if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
+ const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
+ double a = ir_knum(IR(fleft->op1))->n;
+ double b = ir_knum(IR(fleft->op2))->n;
+ double y = ((double (*)(double, double))ci->func)(a, b);
+ return lj_ir_knum(J, y);
+ }
+ return NEXTFOLD;
+}
+
+LJFOLD(POW KNUM KNUM)
LJFOLDF(kfold_numpow)
{
- lua_Number a = knumleft;
- lua_Number b = (lua_Number)fright->i;
- lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
- return lj_ir_knum(J, y);
+ return lj_ir_knum(J, lj_vm_foldarith(knumleft, knumright, IR_POW - IR_ADD));
}
/* Must not use kfold_kref for numbers (could be NaN). */
@@ -247,7 +278,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
- default: lua_assert(0); break;
+ default: lj_assertX(0, "bad IR op %d", op); break;
}
return k1;
}
@@ -319,7 +350,7 @@ LJFOLDF(kfold_intcomp)
case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
case IR_ABC:
case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
- default: lua_assert(0); return FAILFOLD;
+ default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
}
}
@@ -333,21 +364,29 @@ LJFOLDF(kfold_intcomp0)
/* -- Constant folding for 64 bit integers -------------------------------- */
-static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
+static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
+ IROp op)
{
+ UNUSED(J);
+#if LJ_HASFFI
switch (op) {
-#if LJ_64 || LJ_HASFFI
case IR_ADD: k1 += k2; break;
case IR_SUB: k1 -= k2; break;
-#endif
-#if LJ_HASFFI
case IR_MUL: k1 *= k2; break;
case IR_BAND: k1 &= k2; break;
case IR_BOR: k1 |= k2; break;
case IR_BXOR: k1 ^= k2; break;
-#endif
- default: UNUSED(k2); lua_assert(0); break;
+ case IR_BSHL: k1 <<= (k2 & 63); break;
+ case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break;
+ case IR_BSAR: k1 >>= (k2 & 63); break;
+ case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break;
+ case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break;
+ default: lj_assertJ(0, "bad IR op %d", op); break;
}
+#else
+ UNUSED(k2); UNUSED(op);
+ lj_assertJ(0, "FFI IR op without FFI");
+#endif
return k1;
}
@@ -359,7 +398,7 @@ LJFOLD(BOR KINT64 KINT64)
LJFOLD(BXOR KINT64 KINT64)
LJFOLDF(kfold_int64arith)
{
- return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64,
+ return INT64FOLD(kfold_int64arith(J, ir_k64(fleft)->u64,
ir_k64(fright)->u64, (IROp)fins->o));
}
@@ -381,7 +420,7 @@ LJFOLDF(kfold_int64arith2)
}
return INT64FOLD(k1);
#else
- UNUSED(J); lua_assert(0); return FAILFOLD;
+ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
#endif
}
@@ -392,22 +431,12 @@ LJFOLD(BROL KINT64 KINT)
LJFOLD(BROR KINT64 KINT)
LJFOLDF(kfold_int64shift)
{
-#if LJ_HASFFI || LJ_64
+#if LJ_HASFFI
uint64_t k = ir_k64(fleft)->u64;
int32_t sh = (fright->i & 63);
- switch ((IROp)fins->o) {
- case IR_BSHL: k <<= sh; break;
-#if LJ_HASFFI
- case IR_BSHR: k >>= sh; break;
- case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
- case IR_BROL: k = lj_rol(k, sh); break;
- case IR_BROR: k = lj_ror(k, sh); break;
-#endif
- default: lua_assert(0); break;
- }
- return INT64FOLD(k);
+ return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
#else
- UNUSED(J); lua_assert(0); return FAILFOLD;
+ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
#endif
}
@@ -417,7 +446,7 @@ LJFOLDF(kfold_bnot64)
#if LJ_HASFFI
return INT64FOLD(~ir_k64(fleft)->u64);
#else
- UNUSED(J); lua_assert(0); return FAILFOLD;
+ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
#endif
}
@@ -427,7 +456,7 @@ LJFOLDF(kfold_bswap64)
#if LJ_HASFFI
return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
#else
- UNUSED(J); lua_assert(0); return FAILFOLD;
+ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
#endif
}
@@ -452,10 +481,10 @@ LJFOLDF(kfold_int64comp)
case IR_UGE: return CONDFOLD(a >= b);
case IR_ULE: return CONDFOLD(a <= b);
case IR_UGT: return CONDFOLD(a > b);
- default: lua_assert(0); return FAILFOLD;
+ default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
}
#else
- UNUSED(J); lua_assert(0); return FAILFOLD;
+ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
#endif
}
@@ -467,7 +496,7 @@ LJFOLDF(kfold_int64comp0)
return DROPFOLD;
return NEXTFOLD;
#else
- UNUSED(J); lua_assert(0); return FAILFOLD;
+ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
#endif
}
@@ -481,6 +510,7 @@ LJFOLDF(kfold_snew_kptr)
}
LJFOLD(SNEW any KINT)
+LJFOLD(XSNEW any KINT)
LJFOLDF(kfold_snew_empty)
{
if (fright->i == 0)
@@ -492,7 +522,7 @@ LJFOLD(STRREF KGC KINT)
LJFOLDF(kfold_strref)
{
GCstr *str = ir_kstr(fleft);
- lua_assert((MSize)fright->i <= str->len);
+ lj_assertJ((MSize)fright->i <= str->len, "bad string ref");
return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
}
@@ -510,7 +540,7 @@ LJFOLDF(kfold_strref_snew)
PHIBARRIER(ir);
fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
fins->op1 = str;
- fins->ot = IRT(IR_STRREF, IRT_P32);
+ fins->ot = IRT(IR_STRREF, IRT_PGC);
return RETRYFOLD;
}
}
@@ -528,6 +558,211 @@ LJFOLDF(kfold_strcmp)
return NEXTFOLD;
}
+/* -- Constant folding and forwarding for buffers ------------------------- */
+
+/*
+** Buffer ops perform stores, but their effect is limited to the buffer
+** itself. Also, buffer ops are chained: a use of an op implies a use of
+** all other ops up the chain. Conversely, if an op is unused, all ops
+** up the chain can go unsed. This largely eliminates the need to treat
+** them as stores.
+**
+** Alas, treating them as normal (IRM_N) ops doesn't work, because they
+** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
+** or if FOLD is disabled.
+**
+** The compromise is to declare them as loads, emit them like stores and
+** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
+** fragments left over from CSE are eliminated by DCE.
+**
+** The string buffer methods emit a USE instead of a BUFSTR to keep the
+** chain alive.
+*/
+
+LJFOLD(BUFHDR any any)
+LJFOLDF(bufhdr_merge)
+{
+ return fins->op2 == IRBUFHDR_WRITE ? CSEFOLD : EMITFOLD;
+}
+
+LJFOLD(BUFPUT any BUFSTR)
+LJFOLDF(bufput_bufstr)
+{
+ if ((J->flags & JIT_F_OPT_FWD)) {
+ IRRef hdr = fright->op2;
+ /* New buffer, no other buffer op inbetween and same buffer? */
+ if (fleft->o == IR_BUFHDR && fleft->op2 == IRBUFHDR_RESET &&
+ fleft->prev == hdr &&
+ fleft->op1 == IR(hdr)->op1 &&
+ !(irt_isphi(fright->t) && IR(hdr)->prev) &&
+ (!LJ_HASBUFFER || J->chain[IR_CALLA] < hdr)) {
+ IRRef ref = fins->op1;
+ IR(ref)->op2 = IRBUFHDR_APPEND; /* Modify BUFHDR. */
+ IR(ref)->op1 = fright->op1;
+ return ref;
+ }
+ /* Replay puts to global temporary buffer. */
+ if (IR(hdr)->op2 == IRBUFHDR_RESET && !irt_isphi(fright->t)) {
+ IRIns *ir = IR(fright->op1);
+ /* For now only handle single string.reverse .lower .upper .rep. */
+ if (ir->o == IR_CALLL &&
+ ir->op2 >= IRCALL_lj_buf_putstr_reverse &&
+ ir->op2 <= IRCALL_lj_buf_putstr_rep) {
+ IRIns *carg1 = IR(ir->op1);
+ if (ir->op2 == IRCALL_lj_buf_putstr_rep) {
+ IRIns *carg2 = IR(carg1->op1);
+ if (carg2->op1 == hdr) {
+ return lj_ir_call(J, ir->op2, fins->op1, carg2->op2, carg1->op2);
+ }
+ } else if (carg1->op1 == hdr) {
+ return lj_ir_call(J, ir->op2, fins->op1, carg1->op2);
+ }
+ }
+ }
+ }
+ return EMITFOLD; /* Always emit, CSE later. */
+}
+
+LJFOLD(BUFPUT any any)
+LJFOLDF(bufput_kgc)
+{
+ if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) {
+ GCstr *s2 = ir_kstr(fright);
+ if (s2->len == 0) { /* Empty string? */
+ return LEFTFOLD;
+ } else {
+ if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) &&
+ !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */
+ GCstr *s1 = ir_kstr(IR(fleft->op2));
+ IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2));
+ /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
+ IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */
+ return fins->op1;
+ }
+ }
+ }
+ return EMITFOLD; /* Always emit, CSE later. */
+}
+
+LJFOLD(BUFSTR any any)
+LJFOLDF(bufstr_kfold_cse)
+{
+ lj_assertJ(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
+ fleft->o == IR_CALLL,
+ "bad buffer constructor IR op %d", fleft->o);
+ if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
+ if (fleft->o == IR_BUFHDR) { /* No put operations? */
+ if (fleft->op2 == IRBUFHDR_RESET) /* Empty buffer? */
+ return lj_ir_kstr(J, &J2G(J)->strempty);
+ fins->op1 = fleft->op1;
+ fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */
+ return CSEFOLD;
+ } else if (fleft->o == IR_BUFPUT) {
+ IRIns *irb = IR(fleft->op1);
+ if (irb->o == IR_BUFHDR && irb->op2 == IRBUFHDR_RESET)
+ return fleft->op2; /* Shortcut for a single put operation. */
+ }
+ }
+ /* Try to CSE the whole chain. */
+ if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
+ IRRef ref = J->chain[IR_BUFSTR];
+ while (ref) {
+ IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
+ while (ira->o == irb->o && ira->op2 == irb->op2) {
+ lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
+ ira->o == IR_CALLL || ira->o == IR_CARG,
+ "bad buffer constructor IR op %d", ira->o);
+ if (ira->o == IR_BUFHDR && ira->op2 == IRBUFHDR_RESET)
+ return ref; /* CSE succeeded. */
+ if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
+ break;
+ ira = IR(ira->op1);
+ irb = IR(irb->op1);
+ }
+ ref = irs->prev;
+ }
+ }
+ return EMITFOLD; /* No CSE possible. */
+}
+
+LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse)
+LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)
+LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower)
+LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted)
+LJFOLDF(bufput_kfold_op)
+{
+ if (irref_isk(fleft->op2)) {
+ const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
+ SBuf *sb = lj_buf_tmp_(J->L);
+ sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb,
+ ir_kstr(IR(fleft->op2)));
+ fins->o = IR_BUFPUT;
+ fins->op1 = fleft->op1;
+ fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
+ return RETRYFOLD;
+ }
+ return EMITFOLD; /* Always emit, CSE later. */
+}
+
+LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep)
+LJFOLDF(bufput_kfold_rep)
+{
+ if (irref_isk(fleft->op2)) {
+ IRIns *irc = IR(fleft->op1);
+ if (irref_isk(irc->op2)) {
+ SBuf *sb = lj_buf_tmp_(J->L);
+ sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i);
+ fins->o = IR_BUFPUT;
+ fins->op1 = irc->op1;
+ fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
+ return RETRYFOLD;
+ }
+ }
+ return EMITFOLD; /* Always emit, CSE later. */
+}
+
+LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint)
+LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)
+LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint)
+LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum)
+LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr)
+LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar)
+LJFOLDF(bufput_kfold_fmt)
+{
+ IRIns *irc = IR(fleft->op1);
+ lj_assertJ(irref_isk(irc->op2), "SFormat must be const");
+ if (irref_isk(fleft->op2)) {
+ SFormat sf = (SFormat)IR(irc->op2)->i;
+ IRIns *ira = IR(fleft->op2);
+ SBuf *sb = lj_buf_tmp_(J->L);
+ switch (fins->op2) {
+ case IRCALL_lj_strfmt_putfxint:
+ sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64);
+ break;
+ case IRCALL_lj_strfmt_putfstr:
+ sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira));
+ break;
+ case IRCALL_lj_strfmt_putfchar:
+ sb = lj_strfmt_putfchar(sb, sf, ira->i);
+ break;
+ case IRCALL_lj_strfmt_putfnum_int:
+ case IRCALL_lj_strfmt_putfnum_uint:
+ case IRCALL_lj_strfmt_putfnum:
+ default: {
+ const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
+ sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf,
+ ir_knum(ira)->n);
+ break;
+ }
+ }
+ fins->o = IR_BUFPUT;
+ fins->op1 = irc->op1;
+ fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
+ return RETRYFOLD;
+ }
+ return EMITFOLD; /* Always emit, CSE later. */
+}
+
/* -- Constant folding of pointer arithmetic ------------------------------ */
LJFOLD(ADD KGC KINT)
@@ -648,21 +883,17 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM)
LJFOLDF(kfold_conv_knum_int_num)
{
lua_Number n = knumleft;
- if (!(fins->op2 & IRCONV_TRUNC)) {
- int32_t k = lj_num2int(n);
- if (irt_isguard(fins->t) && n != (lua_Number)k) {
- /* We're about to create a guard which always fails, like CONV +1.5.
- ** Some pathological loops cause this during LICM, e.g.:
- ** local x,k,t = 0,1.5,{1,[1.5]=2}
- ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
- ** assert(x == 300)
- */
- return FAILFOLD;
- }
- return INTFOLD(k);
- } else {
- return INTFOLD((int32_t)n);
+ int32_t k = lj_num2int(n);
+ if (irt_isguard(fins->t) && n != (lua_Number)k) {
+ /* We're about to create a guard which always fails, like CONV +1.5.
+ ** Some pathological loops cause this during LICM, e.g.:
+ ** local x,k,t = 0,1.5,{1,[1.5]=2}
+ ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
+ ** assert(x == 300)
+ */
+ return FAILFOLD;
}
+ return INTFOLD(k);
}
LJFOLD(CONV KNUM IRCONV_U32_NUM)
@@ -690,16 +921,18 @@ LJFOLDF(kfold_conv_knum_u64_num)
return INT64FOLD(lj_num2u64(knumleft));
}
-LJFOLD(TOSTR KNUM)
+LJFOLD(TOSTR KNUM any)
LJFOLDF(kfold_tostr_knum)
{
- return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft));
+ return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft)));
}
-LJFOLD(TOSTR KINT)
+LJFOLD(TOSTR KINT any)
LJFOLDF(kfold_tostr_kint)
{
- return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i));
+ return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ?
+ lj_strfmt_int(J->L, fleft->i) :
+ lj_strfmt_char(J->L, fleft->i));
}
LJFOLD(STRTO KGC)
@@ -747,13 +980,13 @@ LJFOLDF(shortcut_round)
return NEXTFOLD;
}
-LJFOLD(ABS ABS KNUM)
+LJFOLD(ABS ABS FLOAD)
LJFOLDF(shortcut_left)
{
return LEFTFOLD; /* f(g(x)) ==> g(x) */
}
-LJFOLD(ABS NEG KNUM)
+LJFOLD(ABS NEG FLOAD)
LJFOLDF(shortcut_dropleft)
{
PHIBARRIER(fleft);
@@ -833,8 +1066,10 @@ LJFOLDF(simplify_nummuldiv_k)
if (n == 1.0) { /* x o 1 ==> x */
return LEFTFOLD;
} else if (n == -1.0) { /* x o -1 ==> -x */
+ IRRef op1 = fins->op1;
+ fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */
+ fins->op1 = op1;
fins->o = IR_NEG;
- fins->op2 = (IRRef1)lj_ir_knum_neg(J);
return RETRYFOLD;
} else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
fins->o = IR_ADD;
@@ -874,52 +1109,17 @@ LJFOLDF(simplify_nummuldiv_negneg)
return RETRYFOLD;
}
-LJFOLD(POW any KINT)
-LJFOLDF(simplify_numpow_xk)
+LJFOLD(POW any KNUM)
+LJFOLDF(simplify_numpow_k)
{
- int32_t k = fright->i;
- TRef ref = fins->op1;
- if (k == 0) /* x ^ 0 ==> 1 */
+ if (knumright == 0.0) /* x ^ 0 ==> 1 */
return lj_ir_knum_one(J); /* Result must be a number, not an int. */
- if (k == 1) /* x ^ 1 ==> x */
+ else if (knumright == 1.0) /* x ^ 1 ==> x */
return LEFTFOLD;
- if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */
+ else if (knumright == 2.0) /* x ^ 2 ==> x * x */
+ return emitir(IRTN(IR_MUL), fins->op1, fins->op1);
+ else
return NEXTFOLD;
- if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
- ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
- k = -k;
- }
- /* Unroll x^k for 1 <= k <= 65536. */
- for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
- ref = emitir(IRTN(IR_MUL), ref, ref);
- if ((k >>= 1) != 0) { /* Handle trailing bits. */
- TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
- for (; k != 1; k >>= 1) {
- if (k & 1)
- ref = emitir(IRTN(IR_MUL), ref, tmp);
- tmp = emitir(IRTN(IR_MUL), tmp, tmp);
- }
- ref = emitir(IRTN(IR_MUL), ref, tmp);
- }
- return ref;
-}
-
-LJFOLD(POW KNUM any)
-LJFOLDF(simplify_numpow_kx)
-{
- lua_Number n = knumleft;
- if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
- fins->o = IR_CONV;
-#if LJ_TARGET_X86ORX64
- fins->op1 = fins->op2;
- fins->op2 = IRCONV_NUM_INT;
- fins->op2 = (IRRef1)lj_opt_fold(J);
-#endif
- fins->op1 = (IRRef1)lj_ir_knum_one(J);
- fins->o = IR_LDEXP;
- return RETRYFOLD;
- }
- return NEXTFOLD;
}
/* -- Simplify conversions ------------------------------------------------ */
@@ -1004,10 +1204,10 @@ LJFOLDF(simplify_tobit_conv)
{
/* Fold even across PHI to avoid expensive num->int conversions in loop. */
if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
- lua_assert(irt_isnum(fleft->t));
+ lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
return fleft->op1;
} else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
- lua_assert(irt_isnum(fleft->t));
+ lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
fins->o = IR_CONV;
fins->op1 = fleft->op1;
fins->op2 = (IRT_INT<<5)|IRT_U32;
@@ -1047,7 +1247,7 @@ LJFOLDF(simplify_conv_sext)
/* Use scalar evolution analysis results to strength-reduce sign-extension. */
if (ref == J->scev.idx) {
IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
- lua_assert(irt_isint(J->scev.t));
+ lj_assertJ(irt_isint(J->scev.t), "only int SCEV supported");
if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) {
ok_reduce:
#if LJ_TARGET_X64
@@ -1078,6 +1278,10 @@ LJFOLD(CONV SUB IRCONV_U32_U64)
LJFOLD(CONV MUL IRCONV_U32_U64)
LJFOLDF(simplify_conv_narrow)
{
+#if LJ_64
+ UNUSED(J);
+ return NEXTFOLD;
+#else
IROp op = (IROp)fleft->o;
IRType t = irt_type(fins->t);
IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
@@ -1088,6 +1292,7 @@ LJFOLDF(simplify_conv_narrow)
fins->op1 = op1;
fins->op2 = op2;
return RETRYFOLD;
+#endif
}
/* Special CSE rule for CONV. */
@@ -1123,7 +1328,8 @@ LJFOLDF(narrow_convert)
/* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
if (J->chain[IR_LOOP])
return NEXTFOLD;
- lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT);
+ lj_assertJ(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT,
+ "unexpected CONV TOBIT");
return lj_opt_narrow_convert(J);
}
@@ -1201,7 +1407,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
** But this is mainly intended for simple address arithmetic.
** Also it's easier for the backend to optimize the original multiplies.
*/
- if (k == 1) { /* i * 1 ==> i */
+ if (k == 0) { /* i * 0 ==> 0 */
+ return RIGHTFOLD;
+ } else if (k == 1) { /* i * 1 ==> i */
return LEFTFOLD;
} else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
fins->o = IR_BSHL;
@@ -1214,9 +1422,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k)
LJFOLD(MUL any KINT)
LJFOLDF(simplify_intmul_k32)
{
- if (fright->i == 0) /* i * 0 ==> 0 */
- return INTFOLD(0);
- else if (fright->i > 0)
+ if (fright->i >= 0)
return simplify_intmul_k(J, fright->i);
return NEXTFOLD;
}
@@ -1224,21 +1430,20 @@ LJFOLDF(simplify_intmul_k32)
LJFOLD(MUL any KINT64)
LJFOLDF(simplify_intmul_k64)
{
- if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */
- return INT64FOLD(0);
-#if LJ_64
- /* NYI: SPLIT for BSHL and 32 bit backend support. */
- else if (ir_kint64(fright)->u64 < 0x80000000u)
+#if LJ_HASFFI
+ if (ir_kint64(fright)->u64 < 0x80000000u)
return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
-#endif
return NEXTFOLD;
+#else
+ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
+#endif
}
LJFOLD(MOD any KINT)
LJFOLDF(simplify_intmod_k)
{
int32_t k = fright->i;
- lua_assert(k != 0);
+ lj_assertJ(k != 0, "integer mod 0");
if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
fins->o = IR_BAND;
fins->op2 = lj_ir_kint(J, k-1);
@@ -1487,6 +1692,15 @@ LJFOLDF(simplify_shiftk_andk)
fins->op2 = (IRRef1)lj_ir_kint(J, k);
fins->ot = IRTI(IR_BAND);
return RETRYFOLD;
+ } else if (irk->o == IR_KINT64) {
+ uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, fright->i,
+ (IROp)fins->o);
+ IROpT ot = fleft->ot;
+ fins->op1 = fleft->op1;
+ fins->op1 = (IRRef1)lj_opt_fold(J);
+ fins->op2 = (IRRef1)lj_ir_kint64(J, k);
+ fins->ot = ot;
+ return RETRYFOLD;
}
return NEXTFOLD;
}
@@ -1502,6 +1716,47 @@ LJFOLDF(simplify_andk_shiftk)
return NEXTFOLD;
}
+LJFOLD(BAND BOR KINT)
+LJFOLD(BOR BAND KINT)
+LJFOLDF(simplify_andor_k)
+{
+ IRIns *irk = IR(fleft->op2);
+ PHIBARRIER(fleft);
+ if (irk->o == IR_KINT) {
+ int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
+ /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
+ /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
+ if (k == (fins->o == IR_BAND ? 0 : -1)) {
+ fins->op1 = fleft->op1;
+ return RETRYFOLD;
+ }
+ }
+ return NEXTFOLD;
+}
+
+LJFOLD(BAND BOR KINT64)
+LJFOLD(BOR BAND KINT64)
+LJFOLDF(simplify_andor_k64)
+{
+#if LJ_HASFFI
+ IRIns *irk = IR(fleft->op2);
+ PHIBARRIER(fleft);
+ if (irk->o == IR_KINT64) {
+ uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
+ (IROp)fins->o);
+ /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
+ /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
+ if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) {
+ fins->op1 = fleft->op1;
+ return RETRYFOLD;
+ }
+ }
+ return NEXTFOLD;
+#else
+ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
+#endif
+}
+
/* -- Reassociation ------------------------------------------------------- */
LJFOLD(ADD ADD KINT)
@@ -1531,11 +1786,11 @@ LJFOLD(BOR BOR KINT64)
LJFOLD(BXOR BXOR KINT64)
LJFOLDF(reassoc_intarith_k64)
{
-#if LJ_HASFFI || LJ_64
+#if LJ_HASFFI
IRIns *irk = IR(fleft->op2);
if (irk->o == IR_KINT64) {
- uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
- ir_k64(fright)->u64, (IROp)fins->o);
+ uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
+ (IROp)fins->o);
PHIBARRIER(fleft);
fins->op1 = fleft->op1;
fins->op2 = (IRRef1)lj_ir_kint64(J, k);
@@ -1543,12 +1798,10 @@ LJFOLDF(reassoc_intarith_k64)
}
return NEXTFOLD;
#else
- UNUSED(J); lua_assert(0); return FAILFOLD;
+ UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
#endif
}
-LJFOLD(MIN MIN any)
-LJFOLD(MAX MAX any)
LJFOLD(BAND BAND any)
LJFOLD(BOR BOR any)
LJFOLDF(reassoc_dup)
@@ -1558,6 +1811,15 @@ LJFOLDF(reassoc_dup)
return NEXTFOLD;
}
+LJFOLD(MIN MIN any)
+LJFOLD(MAX MAX any)
+LJFOLDF(reassoc_dup_minmax)
+{
+ if (fins->op2 == fleft->op2)
+ return LEFTFOLD; /* (a o b) o b ==> a o b */
+ return NEXTFOLD;
+}
+
LJFOLD(BXOR BXOR any)
LJFOLDF(reassoc_bxor)
{
@@ -1596,23 +1858,12 @@ LJFOLDF(reassoc_shift)
return NEXTFOLD;
}
-LJFOLD(MIN MIN KNUM)
-LJFOLD(MAX MAX KNUM)
LJFOLD(MIN MIN KINT)
LJFOLD(MAX MAX KINT)
LJFOLDF(reassoc_minmax_k)
{
IRIns *irk = IR(fleft->op2);
- if (irk->o == IR_KNUM) {
- lua_Number a = ir_knum(irk)->n;
- lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD);
- if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
- return LEFTFOLD;
- PHIBARRIER(fleft);
- fins->op1 = fleft->op1;
- fins->op2 = (IRRef1)lj_ir_knum(J, y);
- return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
- } else if (irk->o == IR_KINT) {
+ if (irk->o == IR_KINT) {
int32_t a = irk->i;
int32_t y = kfold_intop(a, fright->i, fins->o);
if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
@@ -1625,24 +1876,6 @@ LJFOLDF(reassoc_minmax_k)
return NEXTFOLD;
}
-LJFOLD(MIN MAX any)
-LJFOLD(MAX MIN any)
-LJFOLDF(reassoc_minmax_left)
-{
- if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
- return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
- return NEXTFOLD;
-}
-
-LJFOLD(MIN any MAX)
-LJFOLD(MAX any MIN)
-LJFOLDF(reassoc_minmax_right)
-{
- if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
- return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
- return NEXTFOLD;
-}
-
/* -- Array bounds check elimination -------------------------------------- */
/* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
@@ -1769,8 +2002,6 @@ LJFOLDF(comm_comp)
LJFOLD(BAND any any)
LJFOLD(BOR any any)
-LJFOLD(MIN any any)
-LJFOLD(MAX any any)
LJFOLDF(comm_dup)
{
if (fins->op1 == fins->op2) /* x o x ==> x */
@@ -1778,6 +2009,15 @@ LJFOLDF(comm_dup)
return fold_comm_swap(J);
}
+LJFOLD(MIN any any)
+LJFOLD(MAX any any)
+LJFOLDF(comm_dup_minmax)
+{
+ if (fins->op1 == fins->op2) /* x o x ==> x */
+ return LEFTFOLD;
+ return NEXTFOLD;
+}
+
LJFOLD(BXOR any any)
LJFOLDF(comm_bxor)
{
@@ -1814,7 +2054,7 @@ LJFOLDF(merge_eqne_snew_kgc)
{
GCstr *kstr = ir_kstr(fright);
int32_t len = (int32_t)kstr->len;
- lua_assert(irt_isstr(fins->t));
+ lj_assertJ(irt_isstr(fins->t), "bad equality IR type");
#if LJ_TARGET_UNALIGNED
#define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
@@ -1878,7 +2118,7 @@ LJFOLD(HLOAD KKPTR)
LJFOLDF(kfold_hload_kkptr)
{
UNUSED(J);
- lua_assert(ir_kptr(fleft) == niltvg(J2G(J)));
+ lj_assertJ(ir_kptr(fleft) == niltvg(J2G(J)), "expected niltv");
return TREF_NIL;
}
@@ -1888,8 +2128,8 @@ LJFOLDX(lj_opt_fwd_hload)
LJFOLD(ULOAD any)
LJFOLDX(lj_opt_fwd_uload)
-LJFOLD(CALLL any IRCALL_lj_tab_len)
-LJFOLDX(lj_opt_fwd_tab_len)
+LJFOLD(ALEN any any)
+LJFOLDX(lj_opt_fwd_alen)
/* Upvalue refs are really loads, but there are no corresponding stores.
** So CSE is ok for them, except for UREFO across a GC step (see below).
@@ -1950,6 +2190,7 @@ LJFOLDF(fwd_href_tdup)
** an aliased table, as it may invalidate all of the pointers and fields.
** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
** FLOADs. And NEWREF itself is treated like a store (see below).
+** LREF is constant (per trace) since coroutine switches are not inlined.
*/
LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
LJFOLDF(fload_tab_tnew_asize)
@@ -2013,6 +2254,35 @@ LJFOLDF(fload_str_len_snew)
return NEXTFOLD;
}
+LJFOLD(FLOAD TOSTR IRFL_STR_LEN)
+LJFOLDF(fload_str_len_tostr)
+{
+ if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR)
+ return INTFOLD(1);
+ return NEXTFOLD;
+}
+
+LJFOLD(FLOAD any IRFL_SBUF_W)
+LJFOLD(FLOAD any IRFL_SBUF_E)
+LJFOLD(FLOAD any IRFL_SBUF_B)
+LJFOLD(FLOAD any IRFL_SBUF_L)
+LJFOLD(FLOAD any IRFL_SBUF_REF)
+LJFOLD(FLOAD any IRFL_SBUF_R)
+LJFOLDF(fload_sbuf)
+{
+ TRef tr = lj_opt_fwd_fload(J);
+ return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD;
+}
+
+/* The fast function ID of function objects is immutable. */
+LJFOLD(FLOAD KGC IRFL_FUNC_FFID)
+LJFOLDF(fload_func_ffid_kgc)
+{
+ if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
+ return INTFOLD((int32_t)ir_kfunc(fleft)->c.ffid);
+ return NEXTFOLD;
+}
+
/* The C type ID of cdata objects is immutable. */
LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
LJFOLDF(fload_cdata_typeid_kgc)
@@ -2059,6 +2329,8 @@ LJFOLDF(fload_cdata_ptr_int64_cnew)
}
LJFOLD(FLOAD any IRFL_STR_LEN)
+LJFOLD(FLOAD any IRFL_FUNC_ENV)
+LJFOLD(FLOAD any IRFL_THREAD_ENV)
LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
LJFOLD(FLOAD any IRFL_CDATA_PTR)
LJFOLD(FLOAD any IRFL_CDATA_INT)
@@ -2078,7 +2350,7 @@ LJFOLDF(fwd_sload)
TRef tr = lj_opt_cse(J);
return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
} else {
- lua_assert(J->slot[fins->op1] != 0);
+ lj_assertJ(J->slot[fins->op1] != 0, "uninitialized slot accessed");
return J->slot[fins->op1];
}
}
@@ -2135,6 +2407,17 @@ LJFOLDF(barrier_tnew_tdup)
return DROPFOLD;
}
+/* -- Profiling ----------------------------------------------------------- */
+
+LJFOLD(PROF any any)
+LJFOLDF(prof)
+{
+ IRRef ref = J->chain[IR_PROF];
+ if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */
+ return ref;
+ return EMITFOLD;
+}
+
/* -- Stores and allocations ---------------------------------------------- */
/* Stores and allocations cannot be folded or passed on to CSE in general.
@@ -2157,8 +2440,10 @@ LJFOLD(XSTORE any any)
LJFOLDX(lj_opt_dse_xstore)
LJFOLD(NEWREF any any) /* Treated like a store. */
-LJFOLD(CALLS any any)
+LJFOLD(TMPREF any any)
+LJFOLD(CALLA any any)
LJFOLD(CALLL any any) /* Safeguard fallback. */
+LJFOLD(CALLS any any)
LJFOLD(CALLXS any any)
LJFOLD(XBAR)
LJFOLD(RETF any any) /* Modifies BASE. */
@@ -2191,8 +2476,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
IRRef ref;
if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
- lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
- JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT);
+ lj_assertJ(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
+ JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT,
+ "bad JIT_F_OPT_DEFAULT");
/* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
return lj_opt_cse(J);
@@ -2217,10 +2503,14 @@ retry:
if (fins->op1 >= J->cur.nk) {
key += (uint32_t)IR(fins->op1)->o << 10;
*fleft = *IR(fins->op1);
+ if (fins->op1 < REF_TRUE)
+ fleft[1] = IR(fins->op1)[1];
}
if (fins->op2 >= J->cur.nk) {
key += (uint32_t)IR(fins->op2)->o;
*fright = *IR(fins->op2);
+ if (fins->op2 < REF_TRUE)
+ fright[1] = IR(fins->op2)[1];
} else {
key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
}
@@ -2250,7 +2540,7 @@ retry:
return lj_ir_kint(J, fins->i);
if (ref == FAILFOLD)
lj_trace_err(J, LJ_TRERR_GFAIL);
- lua_assert(ref == DROPFOLD);
+ lj_assertJ(ref == DROPFOLD, "bad fold result");
return REF_DROP;
}
diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c
index 466f88de..ee3ee049 100644
--- a/src/lj_opt_loop.c
+++ b/src/lj_opt_loop.c
@@ -11,7 +11,7 @@
#if LJ_HASJIT
#include "lj_err.h"
-#include "lj_str.h"
+#include "lj_buf.h"
#include "lj_ir.h"
#include "lj_jit.h"
#include "lj_iropt.h"
@@ -225,6 +225,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
/* Setup new snapshot. */
snap->mapofs = (uint32_t)nmapofs;
snap->ref = (IRRef1)J->cur.nins;
+ snap->mcofs = 0;
snap->nslots = nslots;
snap->topslot = osnap->topslot;
snap->count = 0;
@@ -254,9 +255,16 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap,
J->cur.nsnapmap = (uint32_t)(nmap - J->cur.snapmap);
}
+typedef struct LoopState {
+ jit_State *J;
+ IRRef1 *subst;
+ MSize sizesubst;
+} LoopState;
+
/* Unroll loop. */
-static void loop_unroll(jit_State *J)
+static void loop_unroll(LoopState *lps)
{
+ jit_State *J = lps->J;
IRRef1 phi[LJ_MAX_PHI];
uint32_t nphi = 0;
IRRef1 *subst;
@@ -265,13 +273,13 @@ static void loop_unroll(jit_State *J)
SnapEntry *loopmap, *psentinel;
IRRef ins, invar;
- /* Use temp buffer for substitution table.
+ /* Allocate substitution table.
** Only non-constant refs in [REF_BIAS,invar) are valid indexes.
- ** Caveat: don't call into the VM or run the GC or the buffer may be gone.
*/
invar = J->cur.nins;
- subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf,
- (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS;
+ lps->sizesubst = invar - REF_BIAS;
+ lps->subst = lj_mem_newvec(J->L, lps->sizesubst, IRRef1);
+ subst = lps->subst - REF_BIAS;
subst[REF_BASE] = REF_BASE;
/* LOOP separates the pre-roll from the loop body. */
@@ -292,7 +300,8 @@ static void loop_unroll(jit_State *J)
loopmap = &J->cur.snapmap[loopsnap->mapofs];
/* The PC of snapshot #0 and the loop snapshot must match. */
psentinel = &loopmap[loopsnap->nent];
- lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]);
+ lj_assertJ(*psentinel == J->cur.snapmap[J->cur.snap[0].nent],
+ "mismatched PC for loop snapshot");
*psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */
/* Start substitution with snapshot #1 (#0 is empty for root traces). */
@@ -345,10 +354,12 @@ static void loop_unroll(jit_State *J)
irr = IR(ref);
goto phiconv;
}
- } else if (ref != REF_DROP && irr->o == IR_CONV &&
- ref > invar && irr->op1 < invar) {
- /* May need an extra PHI for a CONV. */
- ref = irr->op1;
+ } else if (ref != REF_DROP && ref > invar &&
+ ((irr->o == IR_CONV && irr->op1 < invar) ||
+ (irr->o == IR_ALEN && irr->op2 < invar &&
+ irr->op2 != REF_NIL))) {
+ /* May need an extra PHI for a CONV or ALEN hint. */
+ ref = irr->o == IR_CONV ? irr->op1 : irr->op2;
irr = IR(ref);
phiconv:
if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) {
@@ -363,7 +374,7 @@ static void loop_unroll(jit_State *J)
}
if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */
J->cur.nsnapmap = (uint32_t)J->cur.snap[--J->cur.nsnap].mapofs;
- lua_assert(J->cur.nsnapmap <= J->sizesnapmap);
+ lj_assertJ(J->cur.nsnapmap <= J->sizesnapmap, "bad snapshot map index");
*psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */
loop_emit_phi(J, subst, phi, nphi, onsnap);
@@ -396,7 +407,7 @@ static void loop_undo(jit_State *J, IRRef ins, SnapNo nsnap, MSize nsnapmap)
static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud)
{
UNUSED(L); UNUSED(dummy);
- loop_unroll((jit_State *)ud);
+ loop_unroll((LoopState *)ud);
return NULL;
}
@@ -406,7 +417,13 @@ int lj_opt_loop(jit_State *J)
IRRef nins = J->cur.nins;
SnapNo nsnap = J->cur.nsnap;
MSize nsnapmap = J->cur.nsnapmap;
- int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt);
+ LoopState lps;
+ int errcode;
+ lps.J = J;
+ lps.subst = NULL;
+ lps.sizesubst = 0;
+ errcode = lj_vm_cpcall(J->L, NULL, &lps, cploop_opt);
+ lj_mem_freevec(J2G(J), lps.subst, lps.sizesubst, IRRef1);
if (LJ_UNLIKELY(errcode)) {
lua_State *L = J->L;
if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */
diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c
index feec6bb7..09de2f05 100644
--- a/src/lj_opt_mem.c
+++ b/src/lj_opt_mem.c
@@ -17,12 +17,14 @@
#include "lj_ir.h"
#include "lj_jit.h"
#include "lj_iropt.h"
+#include "lj_ircall.h"
+#include "lj_dispatch.h"
/* Some local macros to save typing. Undef'd at the end. */
#define IR(ref) (&J->cur.ir[(ref)])
#define fins (&J->fold.ins)
-#define fleft (&J->fold.left)
-#define fright (&J->fold.right)
+#define fleft (J->fold.left)
+#define fright (J->fold.right)
/*
** Caveat #1: return value is not always a TRef -- only use with tref_ref().
@@ -55,8 +57,8 @@ static AliasRet aa_table(jit_State *J, IRRef ta, IRRef tb)
{
IRIns *taba = IR(ta), *tabb = IR(tb);
int newa, newb;
- lua_assert(ta != tb);
- lua_assert(irt_istab(taba->t) && irt_istab(tabb->t));
+ lj_assertJ(ta != tb, "bad usage");
+ lj_assertJ(irt_istab(taba->t) && irt_istab(tabb->t), "bad usage");
/* Disambiguate new allocations. */
newa = (taba->o == IR_TNEW || taba->o == IR_TDUP);
newb = (tabb->o == IR_TNEW || tabb->o == IR_TDUP);
@@ -70,6 +72,34 @@ static AliasRet aa_table(jit_State *J, IRRef ta, IRRef tb)
return aa_escape(J, taba, tabb);
}
+/* Check whether there's no aliasing table.clear. */
+static int fwd_aa_tab_clear(jit_State *J, IRRef lim, IRRef ta)
+{
+ IRRef ref = J->chain[IR_CALLS];
+ while (ref > lim) {
+ IRIns *calls = IR(ref);
+ if (calls->op2 == IRCALL_lj_tab_clear &&
+ (ta == calls->op1 || aa_table(J, ta, calls->op1) != ALIAS_NO))
+ return 0; /* Conflict. */
+ ref = calls->prev;
+ }
+ return 1; /* No conflict. Can safely FOLD/CSE. */
+}
+
+/* Check whether there's no aliasing NEWREF/table.clear for the left operand. */
+int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim)
+{
+ IRRef ta = fins->op1;
+ IRRef ref = J->chain[IR_NEWREF];
+ while (ref > lim) {
+ IRIns *newref = IR(ref);
+ if (ta == newref->op1 || aa_table(J, ta, newref->op1) != ALIAS_NO)
+ return 0; /* Conflict. */
+ ref = newref->prev;
+ }
+ return fwd_aa_tab_clear(J, lim, ta);
+}
+
/* Alias analysis for array and hash access using key-based disambiguation. */
static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb)
{
@@ -98,7 +128,7 @@ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb)
/* Disambiguate array references based on index arithmetic. */
int32_t ofsa = 0, ofsb = 0;
IRRef basea = ka, baseb = kb;
- lua_assert(refb->o == IR_AREF);
+ lj_assertJ(refb->o == IR_AREF, "expected AREF");
/* Gather base and offset from t[base] or t[base+-ofs]. */
if (keya->o == IR_ADD && irref_isk(keya->op2)) {
basea = keya->op1;
@@ -116,8 +146,9 @@ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb)
return ALIAS_NO; /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */
} else {
/* Disambiguate hash references based on the type of their keys. */
- lua_assert((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) &&
- (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF));
+ lj_assertJ((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) &&
+ (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF),
+ "bad xREF IR op %d or %d", refa->o, refb->o);
if (!irt_sametype(keya->t, keyb->t))
return ALIAS_NO; /* Different key types. */
}
@@ -151,7 +182,8 @@ static TRef fwd_ahload(jit_State *J, IRRef xref)
IRIns *ir = (xr->o == IR_HREFK || xr->o == IR_AREF) ? IR(xr->op1) : xr;
IRRef tab = ir->op1;
ir = IR(tab);
- if (ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) {
+ if ((ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) &&
+ fwd_aa_tab_clear(J, tab, tab)) {
/* A NEWREF with a number key may end up pointing to the array part.
** But it's referenced from HSTORE and not found in the ASTORE chain.
** For now simply consider this a conflict without forwarding anything.
@@ -191,7 +223,8 @@ static TRef fwd_ahload(jit_State *J, IRRef xref)
if (key->o == IR_KSLOT) key = IR(key->op1);
lj_ir_kvalue(J->L, &keyv, key);
tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv);
- lua_assert(itype2irt(tv) == irt_type(fins->t));
+ lj_assertJ(itype2irt(tv) == irt_type(fins->t),
+ "mismatched type in constant table");
if (irt_isnum(fins->t))
return lj_ir_knum_u64(J, tv->u64);
else if (LJ_DUALNUM && irt_isint(fins->t))
@@ -265,7 +298,7 @@ TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J)
while (ref > tab) {
IRIns *newref = IR(ref);
if (tab == newref->op1) {
- if (fright->op1 == newref->op2)
+ if (fright->op1 == newref->op2 && fwd_aa_tab_clear(J, ref, tab))
return ref; /* Forward from NEWREF. */
else
goto docse;
@@ -275,7 +308,7 @@ TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J)
ref = newref->prev;
}
/* No conflicting NEWREF: key location unchanged for HREFK of TDUP. */
- if (IR(tab)->o == IR_TDUP)
+ if (IR(tab)->o == IR_TDUP && fwd_aa_tab_clear(J, tab, tab))
fins->t.irt &= ~IRT_GUARD; /* Drop HREFK guard. */
docse:
return CSEFOLD;
@@ -309,20 +342,6 @@ int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J)
return 1; /* No conflict. Can fold to niltv. */
}
-/* Check whether there's no aliasing NEWREF for the left operand. */
-int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim)
-{
- IRRef ta = fins->op1;
- IRRef ref = J->chain[IR_NEWREF];
- while (ref > lim) {
- IRIns *newref = IR(ref);
- if (ta == newref->op1 || aa_table(J, ta, newref->op1) != ALIAS_NO)
- return 0; /* Conflict. */
- ref = newref->prev;
- }
- return 1; /* No conflict. Can safely FOLD/CSE. */
-}
-
/* ASTORE/HSTORE elimination. */
TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J)
{
@@ -346,9 +365,12 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J)
/* Different value: try to eliminate the redundant store. */
if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */
IRIns *ir;
- /* Check for any intervening guards (includes conflicting loads). */
+ /* Check for any intervening guards (includes conflicting loads).
+ ** Note that lj_tab_keyindex and lj_vm_next don't need guards,
+ ** since they are followed by at least one guarded VLOAD.
+ */
for (ir = IR(J->cur.nins-1); ir > store; ir--)
- if (irt_isguard(ir->t) || ir->o == IR_CALLL)
+ if (irt_isguard(ir->t) || ir->o == IR_ALEN)
goto doemit; /* No elimination possible. */
/* Remove redundant store from chain and replace with NOP. */
*refp = store->prev;
@@ -363,6 +385,67 @@ doemit:
return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
}
+/* ALEN forwarding. */
+TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J)
+{
+ IRRef tab = fins->op1; /* Table reference. */
+ IRRef lim = tab; /* Search limit. */
+ IRRef ref;
+
+ /* Search for conflicting HSTORE with numeric key. */
+ ref = J->chain[IR_HSTORE];
+ while (ref > lim) {
+ IRIns *store = IR(ref);
+ IRIns *href = IR(store->op1);
+ IRIns *key = IR(href->op2);
+ if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) {
+ lim = ref; /* Conflicting store found, limits search for ALEN. */
+ break;
+ }
+ ref = store->prev;
+ }
+
+ /* Try to find a matching ALEN. */
+ ref = J->chain[IR_ALEN];
+ while (ref > lim) {
+ /* CSE for ALEN only depends on the table, not the hint. */
+ if (IR(ref)->op1 == tab) {
+ IRRef sref;
+
+ /* Search for aliasing table.clear. */
+ if (!fwd_aa_tab_clear(J, ref, tab))
+ break;
+
+ /* Search for hint-forwarding or conflicting store. */
+ sref = J->chain[IR_ASTORE];
+ while (sref > ref) {
+ IRIns *store = IR(sref);
+ IRIns *aref = IR(store->op1);
+ IRIns *fref = IR(aref->op1);
+ if (tab == fref->op1) { /* ASTORE to the same table. */
+ /* Detect t[#t+1] = x idiom for push. */
+ IRIns *idx = IR(aref->op2);
+ if (!irt_isnil(store->t) &&
+ idx->o == IR_ADD && idx->op1 == ref &&
+ IR(idx->op2)->o == IR_KINT && IR(idx->op2)->i == 1) {
+ /* Note: this requires an extra PHI check in loop unroll. */
+ fins->op2 = aref->op2; /* Set ALEN hint. */
+ }
+ goto doemit; /* Conflicting store, possibly giving a hint. */
+ } else if (aa_table(J, tab, fref->op1) != ALIAS_NO) {
+ goto doemit; /* Conflicting store. */
+ }
+ sref = store->prev;
+ }
+
+ return ref; /* Plain ALEN forwarding. */
+ }
+ ref = IR(ref)->prev;
+ }
+doemit:
+ return EMITFOLD;
+}
+
/* -- ULOAD forwarding ---------------------------------------------------- */
/* The current alias analysis for upvalues is very simplistic. It only
@@ -412,7 +495,6 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J)
cselim:
/* Try to find a matching load. Below the conflicting store, if any. */
-
ref = J->chain[IR_ULOAD];
while (ref > lim) {
IRIns *ir = IR(ref);
@@ -542,8 +624,9 @@ TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J)
goto doemit;
break; /* Otherwise continue searching. */
case ALIAS_MUST:
- if (store->op2 == val) /* Same value: drop the new store. */
- return DROPFOLD;
+ if (store->op2 == val &&
+ !(xr->op2 >= IRFL_SBUF_W && xr->op2 <= IRFL_SBUF_R))
+ return DROPFOLD; /* Same value: drop the new store. */
/* Different value: try to eliminate the redundant store. */
if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */
IRIns *ir;
@@ -564,6 +647,29 @@ doemit:
return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
}
+/* Check whether there's no aliasing buffer op between IRFL_SBUF_*. */
+int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim)
+{
+ IRRef ref;
+ if (J->chain[IR_BUFPUT] > lim)
+ return 0; /* Conflict. */
+ ref = J->chain[IR_CALLS];
+ while (ref > lim) {
+ IRIns *ir = IR(ref);
+ if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr)
+ return 0; /* Conflict. */
+ ref = ir->prev;
+ }
+ ref = J->chain[IR_CALLL];
+ while (ref > lim) {
+ IRIns *ir = IR(ref);
+ if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr)
+ return 0; /* Conflict. */
+ ref = ir->prev;
+ }
+ return 1; /* No conflict. Can safely FOLD/CSE. */
+}
+
/* -- XLOAD forwarding and XSTORE elimination ----------------------------- */
/* Find cdata allocation for a reference (if any). */
@@ -815,35 +921,6 @@ doemit:
return EMITFOLD; /* Otherwise we have a conflict or simply no match. */
}
-/* -- Forwarding of lj_tab_len -------------------------------------------- */
-
-/* This is rather simplistic right now, but better than nothing. */
-TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J)
-{
- IRRef tab = fins->op1; /* Table reference. */
- IRRef lim = tab; /* Search limit. */
- IRRef ref;
-
- /* Any ASTORE is a conflict and limits the search. */
- if (J->chain[IR_ASTORE] > lim) lim = J->chain[IR_ASTORE];
-
- /* Search for conflicting HSTORE with numeric key. */
- ref = J->chain[IR_HSTORE];
- while (ref > lim) {
- IRIns *store = IR(ref);
- IRIns *href = IR(store->op1);
- IRIns *key = IR(href->op2);
- if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) {
- lim = ref; /* Conflicting store found, limits search for TLEN. */
- break;
- }
- ref = store->prev;
- }
-
- /* Try to find a matching load. Below the conflicting store, if any. */
- return lj_opt_cselim(J, lim);
-}
-
/* -- ASTORE/HSTORE previous type analysis -------------------------------- */
/* Check whether the previous value for a table store is non-nil.
diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c
index 34fe6c39..586f1bc7 100644
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -372,17 +372,17 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
} else if (op == NARROW_CONV) {
*sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */
} else if (op == NARROW_SEXT) {
- lua_assert(sp >= nc->stack+1);
+ lj_assertJ(sp >= nc->stack+1, "stack underflow");
sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1],
(IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
} else if (op == NARROW_INT) {
- lua_assert(next < last);
+ lj_assertJ(next < last, "missing arg to NARROW_INT");
*sp++ = nc->t == IRT_I64 ?
lj_ir_kint64(J, (int64_t)(int32_t)*next++) :
lj_ir_kint(J, *next++);
} else { /* Regular IROpT. Pops two operands and pushes one result. */
IRRef mode = nc->mode;
- lua_assert(sp >= nc->stack+2);
+ lj_assertJ(sp >= nc->stack+2, "stack underflow");
sp--;
/* Omit some overflow checks for array indexing. See comments above. */
if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) {
@@ -398,7 +398,7 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc)
narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode);
}
}
- lua_assert(sp == nc->stack+1);
+ lj_assertJ(sp == nc->stack+1, "stack misalignment");
return nc->stack[0];
}
@@ -452,7 +452,7 @@ static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode)
TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr)
{
IRIns *ir;
- lua_assert(tref_isnumber(tr));
+ lj_assertJ(tref_isnumber(tr), "expected number type");
if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */
return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX);
/* Omit some overflow checks for array indexing. See comments above. */
@@ -499,7 +499,7 @@ TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr)
/* Narrow C array index (overflow undefined). */
TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
{
- lua_assert(tref_isnumber(tr));
+ lj_assertJ(tref_isnumber(tr), "expected number type");
if (tref_isnum(tr))
return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY);
/* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */
@@ -551,11 +551,16 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc)
{
rc = conv_str_tonum(J, rc, vc);
if (tref_isinteger(rc)) {
- if ((uint32_t)numberVint(vc) != 0x80000000u)
- return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc);
+ uint32_t k = (uint32_t)numberVint(vc);
+ if ((LJ_DUALNUM || k != 0) && k != 0x80000000u) {
+ TRef zero = lj_ir_kint(J, 0);
+ if (!LJ_DUALNUM)
+ emitir(IRTGI(IR_NE), rc, zero);
+ return emitir(IRTGI(IR_SUBOV), zero, rc);
+ }
rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
}
- return emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J));
+ return emitir(IRTN(IR_NEG), rc, lj_ir_ksimd(J, LJ_KSIMD_NEG));
}
/* Narrowing of modulo operator. */
@@ -579,44 +584,6 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
return emitir(IRTN(IR_SUB), rb, tmp);
}
-/* Narrowing of power operator or math.pow. */
-TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
-{
- rb = conv_str_tonum(J, rb, vb);
- rb = lj_ir_tonum(J, rb); /* Left arg is always treated as an FP number. */
- rc = conv_str_tonum(J, rc, vc);
- /* Narrowing must be unconditional to preserve (-x)^i semantics. */
- if (tvisint(vc) || numisint(numV(vc))) {
- int checkrange = 0;
- /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */
- if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) {
- int32_t k = numberVint(vc);
- if (!(k >= -65536 && k <= 65536)) goto split_pow;
- checkrange = 1;
- }
- if (!tref_isinteger(rc)) {
- /* Guarded conversion to integer! */
- rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK);
- }
- if (checkrange && !tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */
- TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536));
- emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536));
- }
- return emitir(IRTN(IR_POW), rb, rc);
- }
-split_pow:
- /* FOLD covers most cases, but some are easier to do here. */
- if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb)))))
- return rb; /* 1 ^ x ==> 1 */
- rc = lj_ir_tonum(J, rc);
- if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5)
- return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT); /* x ^ 0.5 ==> sqrt(x) */
- /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */
- rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2);
- rc = emitir(IRTN(IR_MUL), rb, rc);
- return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2);
-}
-
/* -- Predictive narrowing of induction variables ------------------------- */
/* Narrow a single runtime value. */
@@ -630,9 +597,10 @@ static int narrow_forl(jit_State *J, cTValue *o)
/* Narrow the FORL index type by looking at the runtime values. */
IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv)
{
- lua_assert(tvisnumber(&tv[FORL_IDX]) &&
+ lj_assertJ(tvisnumber(&tv[FORL_IDX]) &&
tvisnumber(&tv[FORL_STOP]) &&
- tvisnumber(&tv[FORL_STEP]));
+ tvisnumber(&tv[FORL_STEP]),
+ "expected number types");
/* Narrow only if the runtime values of start/stop/step are all integers. */
if (narrow_forl(J, &tv[FORL_IDX]) &&
narrow_forl(J, &tv[FORL_STOP]) &&
diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c
index 784d9a1a..4b9008be 100644
--- a/src/lj_opt_sink.c
+++ b/src/lj_opt_sink.c
@@ -86,8 +86,7 @@ static void sink_mark_ins(jit_State *J)
switch (ir->o) {
case IR_BASE:
return; /* Finished. */
- case IR_CALLL: /* IRCALL_lj_tab_len */
- case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR:
+ case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: case IR_ALEN:
irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */
break;
case IR_FLOAD:
@@ -173,8 +172,8 @@ static void sink_remark_phi(jit_State *J)
/* Sweep instructions and tag sunken allocations and stores. */
static void sink_sweep_ins(jit_State *J)
{
- IRIns *ir, *irfirst = IR(J->cur.nk);
- for (ir = IR(J->cur.nins-1) ; ir >= irfirst; ir--) {
+ IRIns *ir, *irbase = IR(REF_BASE);
+ for (ir = IR(J->cur.nins-1) ; ir >= irbase; ir--) {
switch (ir->o) {
case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: {
IRIns *ira = sink_checkalloc(J, ir);
@@ -224,6 +223,13 @@ static void sink_sweep_ins(jit_State *J)
break;
}
}
+ for (ir = IR(J->cur.nk); ir < irbase; ir++) {
+ irt_clearmark(ir->t);
+ ir->prev = REGSP_INIT;
+ /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */
+ if (irt_is64(ir->t) && ir->o != IR_KNULL)
+ ir++;
+ }
}
/* Allocation sinking and store sinking.
diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c
index 190b6ba4..506b9814 100644
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -8,14 +8,15 @@
#include "lj_obj.h"
-#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
+#if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI))
#include "lj_err.h"
-#include "lj_str.h"
+#include "lj_buf.h"
#include "lj_ir.h"
#include "lj_jit.h"
#include "lj_ircall.h"
#include "lj_iropt.h"
+#include "lj_dispatch.h"
#include "lj_vm.h"
/* SPLIT pass:
@@ -139,6 +140,7 @@ static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
}
+#endif
/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -155,7 +157,6 @@ static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
}
-#endif
/* Emit a CALLN with two split 64 bit arguments. */
static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
@@ -192,9 +193,121 @@ static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
nref = ir->op1;
if (ofs == 0) return nref;
}
- return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs));
+ return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs));
}
+#if LJ_HASFFI
+static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
+ IRIns *oir, IRIns *nir, IRIns *ir)
+{
+ IROp op = ir->o;
+ IRRef kref = nir->op2;
+ if (irref_isk(kref)) { /* Optimize constant shifts. */
+ int32_t k = (IR(kref)->i & 63);
+ IRRef lo = nir->op1, hi = hisubst[ir->op1];
+ if (op == IR_BROL || op == IR_BROR) {
+ if (op == IR_BROR) k = (-k & 63);
+ if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
+ if (k == 0) {
+ passthrough:
+ J->cur.nins--;
+ ir->prev = lo;
+ return hi;
+ } else {
+ TRef k1, k2;
+ IRRef t1, t2, t3, t4;
+ J->cur.nins--;
+ k1 = lj_ir_kint(J, k);
+ k2 = lj_ir_kint(J, (-k & 31));
+ t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
+ t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
+ t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
+ t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
+ ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
+ return split_emit(J, IRTI(IR_BOR), t2, t3);
+ }
+ } else if (k == 0) {
+ goto passthrough;
+ } else if (k < 32) {
+ if (op == IR_BSHL) {
+ IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
+ IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
+ return split_emit(J, IRTI(IR_BOR), t1, t2);
+ } else {
+ IRRef t1 = ir->prev, t2;
+ lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage");
+ nir->o = IR_BSHR;
+ t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
+ ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
+ return split_emit(J, IRTI(op), hi, kref);
+ }
+ } else {
+ if (op == IR_BSHL) {
+ if (k == 32)
+ J->cur.nins--;
+ else
+ lo = ir->prev;
+ ir->prev = lj_ir_kint(J, 0);
+ return lo;
+ } else {
+ lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage");
+ if (k == 32) {
+ J->cur.nins--;
+ ir->prev = hi;
+ } else {
+ nir->op1 = hi;
+ }
+ if (op == IR_BSHR)
+ return lj_ir_kint(J, 0);
+ else
+ return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
+ }
+ }
+ }
+ return split_call_li(J, hisubst, oir, ir,
+ op - IR_BSHL + IRCALL_lj_carith_shl64);
+}
+
+static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
+ IRIns *nir, IRIns *ir)
+{
+ IROp op = ir->o;
+ IRRef hi, kref = nir->op2;
+ if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */
+ int32_t k = IR(kref)->i;
+ if (k == 0 || k == -1) {
+ if (op == IR_BAND) k = ~k;
+ if (k == 0) {
+ J->cur.nins--;
+ ir->prev = nir->op1;
+ } else if (op == IR_BXOR) {
+ nir->o = IR_BNOT;
+ nir->op2 = 0;
+ } else {
+ J->cur.nins--;
+ ir->prev = kref;
+ }
+ }
+ }
+ hi = hisubst[ir->op1];
+ kref = hisubst[ir->op2];
+ if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */
+ int32_t k = IR(kref)->i;
+ if (k == 0 || k == -1) {
+ if (op == IR_BAND) k = ~k;
+ if (k == 0) {
+ return hi;
+ } else if (op == IR_BXOR) {
+ return split_emit(J, IRTI(IR_BNOT), hi, 0);
+ } else {
+ return kref;
+ }
+ }
+ }
+ return split_emit(J, IRTI(op), hi, kref);
+}
+#endif
+
/* Substitute references of a snapshot. */
static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
{
@@ -214,7 +327,7 @@ static void split_ir(jit_State *J)
IRRef nins = J->cur.nins, nk = J->cur.nk;
MSize irlen = nins - nk;
MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
- IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need);
+ IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need);
IRRef1 *hisubst;
IRRef ref, snref;
SnapShot *snap;
@@ -241,6 +354,8 @@ static void split_ir(jit_State *J)
ir->prev = ref; /* Identity substitution for loword. */
hisubst[ref] = 0;
}
+ if (irt_is64(ir->t) && ir->o != IR_KNULL)
+ ref++;
}
/* Process old IR instructions. */
@@ -285,35 +400,11 @@ static void split_ir(jit_State *J)
hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
break;
case IR_POW:
- hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
+ hi = split_call_ll(J, hisubst, oir, ir, IRCALL_pow);
break;
case IR_FPMATH:
- /* Try to rejoin pow from EXP2, MUL and LOG2. */
- if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) {
- IRIns *irp = IR(nir->op1);
- if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) {
- IRIns *irm4 = IR(irp->op1);
- IRIns *irm3 = IR(irm4->op1);
- IRIns *irm12 = IR(irm3->op1);
- IRIns *irl1 = IR(irm12->op1);
- if (irm12->op1 > J->loopref && irl1->o == IR_CALLN &&
- irl1->op2 == IRCALL_lj_vm_log2) {
- IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */
- IRRef arg3 = irm3->op2, arg4 = irm4->op2;
- J->cur.nins--;
- tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3);
- tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4);
- ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow);
- hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
- break;
- }
- }
- }
hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
break;
- case IR_ATAN2:
- hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2);
- break;
case IR_LDEXP:
hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
break;
@@ -321,7 +412,8 @@ static void split_ir(jit_State *J)
nir->o = IR_CONV; /* Pass through loword. */
nir->op2 = (IRT_INT << 5) | IRT_INT;
hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
- hisubst[ir->op1], hisubst[ir->op2]);
+ hisubst[ir->op1],
+ lj_ir_kint(J, (int32_t)(0x7fffffffu + (ir->o == IR_NEG))));
break;
case IR_SLOAD:
if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */
@@ -336,15 +428,24 @@ static void split_ir(jit_State *J)
case IR_STRTO:
hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
break;
+ case IR_FLOAD:
+ lj_assertJ(ir->op1 == REF_NIL, "expected FLOAD from GG_State");
+ hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4));
+ nir->op2 += LJ_BE*4;
+ break;
case IR_XLOAD: {
IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */
J->cur.nins--;
hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */
+#if LJ_BE
+ hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2);
+ inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD);
+#endif
nref = lj_ir_nextins(J);
nir = IR(nref);
- *nir = inslo; /* Re-emit lo XLOAD immediately before hi XLOAD. */
- hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
+ *nir = inslo; /* Re-emit lo XLOAD. */
#if LJ_LE
+ hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
ir->prev = nref;
#else
ir->prev = hi; hi = nref;
@@ -364,8 +465,9 @@ static void split_ir(jit_State *J)
break;
}
#endif
- lua_assert(st == IRT_INT ||
- (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)));
+ lj_assertJ(st == IRT_INT ||
+ (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)),
+ "bad source type for CONV");
nir->o = IR_CALLN;
#if LJ_32 && LJ_HASFFI
nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
@@ -395,7 +497,8 @@ static void split_ir(jit_State *J)
hi = nir->op2;
break;
default:
- lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX);
+ lj_assertJ(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX,
+ "bad IR op %d", ir->o);
hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
hisubst[ir->op1], hisubst[ir->op2]);
break;
@@ -438,8 +541,21 @@ static void split_ir(jit_State *J)
irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
IRCALL_lj_carith_powu64);
break;
+ case IR_BNOT:
+ hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
+ break;
+ case IR_BSWAP:
+ ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
+ hi = nref;
+ break;
+ case IR_BAND: case IR_BOR: case IR_BXOR:
+ hi = split_bitop(J, hisubst, nir, ir);
+ break;
+ case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
+ hi = split_bitshift(J, hisubst, oir, nir, ir);
+ break;
case IR_FLOAD:
- lua_assert(ir->op2 == IRFL_CDATA_INT64);
+ lj_assertJ(ir->op2 == IRFL_CDATA_INT64, "only INT64 supported");
hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
#if LJ_BE
ir->prev = hi; hi = nref;
@@ -505,7 +621,7 @@ static void split_ir(jit_State *J)
hi = nir->op2;
break;
default:
- lua_assert(ir->o <= IR_NE); /* Comparisons. */
+ lj_assertJ(ir->o <= IR_NE, "bad IR op %d", ir->o); /* Comparisons. */
split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
break;
}
@@ -529,7 +645,7 @@ static void split_ir(jit_State *J)
tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
#endif
ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
- } else if (ir->o == IR_TOSTR) {
+ } else if (ir->o == IR_TOSTR || ir->o == IR_TMPREF) {
if (hisubst[ir->op1]) {
if (irref_isk(ir->op1))
nir->op1 = ir->op1;
@@ -583,7 +699,7 @@ static void split_ir(jit_State *J)
#if LJ_SOFTFP
if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
if (irt_isguard(ir->t)) {
- lua_assert(st == IRT_NUM && irt_isint(ir->t));
+ lj_assertJ(st == IRT_NUM && irt_isint(ir->t), "bad CONV types");
J->cur.nins--;
ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1);
} else {
@@ -714,7 +830,7 @@ void lj_opt_split(jit_State *J)
if (!J->needsplit)
J->needsplit = split_needsplit(J);
#else
- lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */
+ lj_assertJ(J->needsplit >= split_needsplit(J), "bad SPLIT state");
#endif
if (J->needsplit) {
int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
diff --git a/src/lj_parse.c b/src/lj_parse.c
index 5a8bcff9..9ddf60ed 100644
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -13,6 +13,7 @@
#include "lj_gc.h"
#include "lj_err.h"
#include "lj_debug.h"
+#include "lj_buf.h"
#include "lj_str.h"
#include "lj_tab.h"
#include "lj_func.h"
@@ -21,6 +22,7 @@
#if LJ_HASFFI
#include "lj_ctype.h"
#endif
+#include "lj_strfmt.h"
#include "lj_lex.h"
#include "lj_parse.h"
#include "lj_vm.h"
@@ -161,16 +163,22 @@ LJ_STATIC_ASSERT((int)BC_MULVV-(int)BC_ADDVV == (int)OPR_MUL-(int)OPR_ADD);
LJ_STATIC_ASSERT((int)BC_DIVVV-(int)BC_ADDVV == (int)OPR_DIV-(int)OPR_ADD);
LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD);
+#ifdef LUA_USE_ASSERT
+#define lj_assertFS(c, ...) (lj_assertG_(G(fs->L), (c), __VA_ARGS__))
+#else
+#define lj_assertFS(c, ...) ((void)fs)
+#endif
+
/* -- Error handling ------------------------------------------------------ */
LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em)
{
- lj_lex_error(ls, ls->token, em);
+ lj_lex_error(ls, ls->tok, em);
}
-LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken token)
+LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken tok)
{
- lj_lex_error(ls, ls->token, LJ_ERR_XTOKEN, lj_lex_token2str(ls, token));
+ lj_lex_error(ls, ls->tok, LJ_ERR_XTOKEN, lj_lex_token2str(ls, tok));
}
LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what)
@@ -198,7 +206,7 @@ static BCReg const_num(FuncState *fs, ExpDesc *e)
{
lua_State *L = fs->L;
TValue *o;
- lua_assert(expr_isnumk(e));
+ lj_assertFS(expr_isnumk(e), "bad usage");
o = lj_tab_set(L, fs->kt, &e->u.nval);
if (tvhaskslot(o))
return tvkslot(o);
@@ -223,7 +231,7 @@ static BCReg const_gc(FuncState *fs, GCobj *gc, uint32_t itype)
/* Add a string constant. */
static BCReg const_str(FuncState *fs, ExpDesc *e)
{
- lua_assert(expr_isstrk(e) || e->k == VGLOBAL);
+ lj_assertFS(expr_isstrk(e) || e->k == VGLOBAL, "bad usage");
return const_gc(fs, obj2gco(e->u.sval), LJ_TSTR);
}
@@ -311,7 +319,7 @@ static void jmp_patchins(FuncState *fs, BCPos pc, BCPos dest)
{
BCIns *jmp = &fs->bcbase[pc].ins;
BCPos offset = dest-(pc+1)+BCBIAS_J;
- lua_assert(dest != NO_JMP);
+ lj_assertFS(dest != NO_JMP, "uninitialized jump target");
if (offset > BCMAX_D)
err_syntax(fs->ls, LJ_ERR_XJUMP);
setbc_d(jmp, offset);
@@ -360,7 +368,7 @@ static void jmp_patch(FuncState *fs, BCPos list, BCPos target)
if (target == fs->pc) {
jmp_tohere(fs, list);
} else {
- lua_assert(target < fs->pc);
+ lj_assertFS(target < fs->pc, "bad jump target");
jmp_patchval(fs, list, target, NO_REG, target);
}
}
@@ -390,7 +398,7 @@ static void bcreg_free(FuncState *fs, BCReg reg)
{
if (reg >= fs->nactvar) {
fs->freereg--;
- lua_assert(reg == fs->freereg);
+ lj_assertFS(reg == fs->freereg, "bad regfree");
}
}
@@ -540,7 +548,7 @@ static void expr_toreg_nobranch(FuncState *fs, ExpDesc *e, BCReg reg)
} else if (e->k <= VKTRUE) {
ins = BCINS_AD(BC_KPRI, reg, const_pri(e));
} else {
- lua_assert(e->k == VVOID || e->k == VJMP);
+ lj_assertFS(e->k == VVOID || e->k == VJMP, "bad expr type %d", e->k);
return;
}
bcemit_INS(fs, ins);
@@ -635,7 +643,7 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e)
ins = BCINS_AD(BC_GSET, ra, const_str(fs, var));
} else {
BCReg ra, rc;
- lua_assert(var->k == VINDEXED);
+ lj_assertFS(var->k == VINDEXED, "bad expr type %d", var->k);
ra = expr_toanyreg(fs, e);
rc = var->u.s.aux;
if ((int32_t)rc < 0) {
@@ -643,10 +651,12 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e)
} else if (rc > BCMAX_C) {
ins = BCINS_ABC(BC_TSETB, ra, var->u.s.info, rc-(BCMAX_C+1));
} else {
+#ifdef LUA_USE_ASSERT
/* Free late alloced key reg to avoid assert on free of value reg. */
/* This can only happen when called from expr_table(). */
- lua_assert(e->k != VNONRELOC || ra < fs->nactvar ||
- rc < ra || (bcreg_free(fs, rc),1));
+ if (e->k == VNONRELOC && ra >= fs->nactvar && rc >= ra)
+ bcreg_free(fs, rc);
+#endif
ins = BCINS_ABC(BC_TSETV, ra, var->u.s.info, rc);
}
}
@@ -660,16 +670,16 @@ static void bcemit_method(FuncState *fs, ExpDesc *e, ExpDesc *key)
BCReg idx, func, obj = expr_toanyreg(fs, e);
expr_free(fs, e);
func = fs->freereg;
- bcemit_AD(fs, BC_MOV, func+1, obj); /* Copy object to first argument. */
- lua_assert(expr_isstrk(key));
+ bcemit_AD(fs, BC_MOV, func+1+LJ_FR2, obj); /* Copy object to 1st argument. */
+ lj_assertFS(expr_isstrk(key), "bad usage");
idx = const_str(fs, key);
if (idx <= BCMAX_C) {
- bcreg_reserve(fs, 2);
+ bcreg_reserve(fs, 2+LJ_FR2);
bcemit_ABC(fs, BC_TGETS, func, obj, idx);
} else {
- bcreg_reserve(fs, 3);
- bcemit_AD(fs, BC_KSTR, func+2, idx);
- bcemit_ABC(fs, BC_TGETV, func, obj, func+2);
+ bcreg_reserve(fs, 3+LJ_FR2);
+ bcemit_AD(fs, BC_KSTR, func+2+LJ_FR2, idx);
+ bcemit_ABC(fs, BC_TGETV, func, obj, func+2+LJ_FR2);
fs->freereg--;
}
e->u.s.info = func;
@@ -801,7 +811,8 @@ static void bcemit_arith(FuncState *fs, BinOpr opr, ExpDesc *e1, ExpDesc *e2)
else
rc = expr_toanyreg(fs, e2);
/* 1st operand discharged by bcemit_binop_left, but need KNUM/KSHORT. */
- lua_assert(expr_isnumk(e1) || e1->k == VNONRELOC);
+ lj_assertFS(expr_isnumk(e1) || e1->k == VNONRELOC,
+ "bad expr type %d", e1->k);
expr_toval(fs, e1);
/* Avoid two consts to satisfy bytecode constraints. */
if (expr_isnumk(e1) && !expr_isnumk(e2) &&
@@ -889,19 +900,20 @@ static void bcemit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2)
if (op <= OPR_POW) {
bcemit_arith(fs, op, e1, e2);
} else if (op == OPR_AND) {
- lua_assert(e1->t == NO_JMP); /* List must be closed. */
+ lj_assertFS(e1->t == NO_JMP, "jump list not closed");
expr_discharge(fs, e2);
jmp_append(fs, &e2->f, e1->f);
*e1 = *e2;
} else if (op == OPR_OR) {
- lua_assert(e1->f == NO_JMP); /* List must be closed. */
+ lj_assertFS(e1->f == NO_JMP, "jump list not closed");
expr_discharge(fs, e2);
jmp_append(fs, &e2->t, e1->t);
*e1 = *e2;
} else if (op == OPR_CONCAT) {
expr_toval(fs, e2);
if (e2->k == VRELOCABLE && bc_op(*bcptr(fs, e2)) == BC_CAT) {
- lua_assert(e1->u.s.info == bc_b(*bcptr(fs, e2))-1);
+ lj_assertFS(e1->u.s.info == bc_b(*bcptr(fs, e2))-1,
+ "bad CAT stack layout");
expr_free(fs, e1);
setbc_b(bcptr(fs, e2), e1->u.s.info);
e1->u.s.info = e2->u.s.info;
@@ -913,8 +925,9 @@ static void bcemit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2)
}
e1->k = VRELOCABLE;
} else {
- lua_assert(op == OPR_NE || op == OPR_EQ ||
- op == OPR_LT || op == OPR_GE || op == OPR_LE || op == OPR_GT);
+ lj_assertFS(op == OPR_NE || op == OPR_EQ ||
+ op == OPR_LT || op == OPR_GE || op == OPR_LE || op == OPR_GT,
+ "bad binop %d", op);
bcemit_comp(fs, op, e1, e2);
}
}
@@ -943,10 +956,10 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e)
e->u.s.info = fs->freereg-1;
e->k = VNONRELOC;
} else {
- lua_assert(e->k == VNONRELOC);
+ lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k);
}
} else {
- lua_assert(op == BC_UNM || op == BC_LEN);
+ lj_assertFS(op == BC_UNM || op == BC_LEN, "bad unop %d", op);
if (op == BC_UNM && !expr_hasjump(e)) { /* Constant-fold negations. */
#if LJ_HASFFI
if (e->k == VKCDATA) { /* Fold in-place since cdata is not interned. */
@@ -986,7 +999,7 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e)
/* Check and consume optional token. */
static int lex_opt(LexState *ls, LexToken tok)
{
- if (ls->token == tok) {
+ if (ls->tok == tok) {
lj_lex_next(ls);
return 1;
}
@@ -996,7 +1009,7 @@ static int lex_opt(LexState *ls, LexToken tok)
/* Check and consume token. */
static void lex_check(LexState *ls, LexToken tok)
{
- if (ls->token != tok)
+ if (ls->tok != tok)
err_token(ls, tok);
lj_lex_next(ls);
}
@@ -1010,7 +1023,7 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
} else {
const char *swhat = lj_lex_token2str(ls, what);
const char *swho = lj_lex_token2str(ls, who);
- lj_lex_error(ls, ls->token, LJ_ERR_XMATCH, swhat, swho, line);
+ lj_lex_error(ls, ls->tok, LJ_ERR_XMATCH, swhat, swho, line);
}
}
}
@@ -1019,9 +1032,9 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line)
static GCstr *lex_str(LexState *ls)
{
GCstr *s;
- if (ls->token != TK_name && (LJ_52 || ls->token != TK_goto))
+ if (ls->tok != TK_name && (LJ_52 || ls->tok != TK_goto))
err_token(ls, TK_name);
- s = strV(&ls->tokenval);
+ s = strV(&ls->tokval);
lj_lex_next(ls);
return s;
}
@@ -1041,8 +1054,9 @@ static void var_new(LexState *ls, BCReg n, GCstr *name)
lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK);
lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo);
}
- lua_assert((uintptr_t)name < VARNAME__MAX ||
- lj_tab_getstr(fs->kt, name) != NULL);
+ lj_assertFS((uintptr_t)name < VARNAME__MAX ||
+ lj_tab_getstr(fs->kt, name) != NULL,
+ "unanchored variable name");
/* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */
setgcref(ls->vstack[vtop].name, obj2gco(name));
fs->varmap[fs->nactvar+n] = (uint16_t)vtop;
@@ -1097,7 +1111,7 @@ static MSize var_lookup_uv(FuncState *fs, MSize vidx, ExpDesc *e)
return i; /* Already exists. */
/* Otherwise create a new one. */
checklimit(fs, fs->nuv, LJ_MAX_UPVAL, "upvalues");
- lua_assert(e->k == VLOCAL || e->k == VUPVAL);
+ lj_assertFS(e->k == VLOCAL || e->k == VUPVAL, "bad expr type %d", e->k);
fs->uvmap[n] = (uint16_t)vidx;
fs->uvtmp[n] = (uint16_t)(e->k == VLOCAL ? vidx : LJ_MAX_VSTACK+e->u.s.info);
fs->nuv = n+1;
@@ -1148,7 +1162,8 @@ static MSize gola_new(LexState *ls, GCstr *name, uint8_t info, BCPos pc)
lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK);
lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo);
}
- lua_assert(name == NAME_BREAK || lj_tab_getstr(fs->kt, name) != NULL);
+ lj_assertFS(name == NAME_BREAK || lj_tab_getstr(fs->kt, name) != NULL,
+ "unanchored label name");
/* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */
setgcref(ls->vstack[vtop].name, obj2gco(name));
ls->vstack[vtop].startpc = pc;
@@ -1178,8 +1193,9 @@ static void gola_close(LexState *ls, VarInfo *vg)
FuncState *fs = ls->fs;
BCPos pc = vg->startpc;
BCIns *ip = &fs->bcbase[pc].ins;
- lua_assert(gola_isgoto(vg));
- lua_assert(bc_op(*ip) == BC_JMP || bc_op(*ip) == BC_UCLO);
+ lj_assertFS(gola_isgoto(vg), "expected goto");
+ lj_assertFS(bc_op(*ip) == BC_JMP || bc_op(*ip) == BC_UCLO,
+ "bad bytecode op %d", bc_op(*ip));
setbc_a(ip, vg->slot);
if (bc_op(*ip) == BC_JMP) {
BCPos next = jmp_next(fs, pc);
@@ -1198,9 +1214,9 @@ static void gola_resolve(LexState *ls, FuncScope *bl, MSize idx)
if (gcrefeq(vg->name, vl->name) && gola_isgoto(vg)) {
if (vg->slot < vl->slot) {
GCstr *name = strref(var_get(ls, ls->fs, vg->slot).name);
- lua_assert((uintptr_t)name >= VARNAME__MAX);
+ lj_assertLS((uintptr_t)name >= VARNAME__MAX, "expected goto name");
ls->linenumber = ls->fs->bcbase[vg->startpc].line;
- lua_assert(strref(vg->name) != NAME_BREAK);
+ lj_assertLS(strref(vg->name) != NAME_BREAK, "unexpected break");
lj_lex_error(ls, 0, LJ_ERR_XGSCOPE,
strdata(strref(vg->name)), strdata(name));
}
@@ -1264,7 +1280,7 @@ static void fscope_begin(FuncState *fs, FuncScope *bl, int flags)
bl->vstart = fs->ls->vtop;
bl->prev = fs->bl;
fs->bl = bl;
- lua_assert(fs->freereg == fs->nactvar);
+ lj_assertFS(fs->freereg == fs->nactvar, "bad regalloc");
}
/* End a scope. */
@@ -1275,7 +1291,7 @@ static void fscope_end(FuncState *fs)
fs->bl = bl->prev;
var_remove(ls, bl->nactvar);
fs->freereg = fs->nactvar;
- lua_assert(bl->nactvar == fs->nactvar);
+ lj_assertFS(bl->nactvar == fs->nactvar, "bad regalloc");
if ((bl->flags & (FSCOPE_UPVAL|FSCOPE_NOCLOSE)) == FSCOPE_UPVAL)
bcemit_AJ(fs, BC_UCLO, bl->nactvar, 0);
if ((bl->flags & FSCOPE_BREAK)) {
@@ -1362,13 +1378,13 @@ static void fs_fixup_k(FuncState *fs, GCproto *pt, void *kptr)
Node *n = &node[i];
if (tvhaskslot(&n->val)) {
ptrdiff_t kidx = (ptrdiff_t)tvkslot(&n->val);
- lua_assert(!tvisint(&n->key));
+ lj_assertFS(!tvisint(&n->key), "unexpected integer key");
if (tvisnum(&n->key)) {
TValue *tv = &((TValue *)kptr)[kidx];
if (LJ_DUALNUM) {
lua_Number nn = numV(&n->key);
int32_t k = lj_num2int(nn);
- lua_assert(!tvismzero(&n->key));
+ lj_assertFS(!tvismzero(&n->key), "unexpected -0 key");
if ((lua_Number)k == nn)
setintV(tv, k);
else
@@ -1416,98 +1432,66 @@ static void fs_fixup_line(FuncState *fs, GCproto *pt,
uint8_t *li = (uint8_t *)lineinfo;
do {
BCLine delta = base[i].line - first;
- lua_assert(delta >= 0 && delta < 256);
+ lj_assertFS(delta >= 0 && delta < 256, "bad line delta");
li[i] = (uint8_t)delta;
} while (++i < n);
} else if (LJ_LIKELY(numline < 65536)) {
uint16_t *li = (uint16_t *)lineinfo;
do {
BCLine delta = base[i].line - first;
- lua_assert(delta >= 0 && delta < 65536);
+ lj_assertFS(delta >= 0 && delta < 65536, "bad line delta");
li[i] = (uint16_t)delta;
} while (++i < n);
} else {
uint32_t *li = (uint32_t *)lineinfo;
do {
BCLine delta = base[i].line - first;
- lua_assert(delta >= 0);
+ lj_assertFS(delta >= 0, "bad line delta");
li[i] = (uint32_t)delta;
} while (++i < n);
}
}
-/* Resize buffer if needed. */
-static LJ_NOINLINE void fs_buf_resize(LexState *ls, MSize len)
-{
- MSize sz = ls->sb.sz * 2;
- while (ls->sb.n + len > sz) sz = sz * 2;
- lj_str_resizebuf(ls->L, &ls->sb, sz);
-}
-
-static LJ_AINLINE void fs_buf_need(LexState *ls, MSize len)
-{
- if (LJ_UNLIKELY(ls->sb.n + len > ls->sb.sz))
- fs_buf_resize(ls, len);
-}
-
-/* Add string to buffer. */
-static void fs_buf_str(LexState *ls, const char *str, MSize len)
-{
- char *p = ls->sb.buf + ls->sb.n;
- MSize i;
- ls->sb.n += len;
- for (i = 0; i < len; i++) p[i] = str[i];
-}
-
-/* Add ULEB128 value to buffer. */
-static void fs_buf_uleb128(LexState *ls, uint32_t v)
-{
- MSize n = ls->sb.n;
- uint8_t *p = (uint8_t *)ls->sb.buf;
- for (; v >= 0x80; v >>= 7)
- p[n++] = (uint8_t)((v & 0x7f) | 0x80);
- p[n++] = (uint8_t)v;
- ls->sb.n = n;
-}
-
/* Prepare variable info for prototype. */
static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar)
{
VarInfo *vs =ls->vstack, *ve;
MSize i, n;
BCPos lastpc;
- lj_str_resetbuf(&ls->sb); /* Copy to temp. string buffer. */
+ lj_buf_reset(&ls->sb); /* Copy to temp. string buffer. */
/* Store upvalue names. */
for (i = 0, n = fs->nuv; i < n; i++) {
GCstr *s = strref(vs[fs->uvmap[i]].name);
MSize len = s->len+1;
- fs_buf_need(ls, len);
- fs_buf_str(ls, strdata(s), len);
+ char *p = lj_buf_more(&ls->sb, len);
+ p = lj_buf_wmem(p, strdata(s), len);
+ ls->sb.w = p;
}
- *ofsvar = ls->sb.n;
+ *ofsvar = sbuflen(&ls->sb);
lastpc = 0;
/* Store local variable names and compressed ranges. */
for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) {
if (!gola_isgotolabel(vs)) {
GCstr *s = strref(vs->name);
BCPos startpc;
+ char *p;
if ((uintptr_t)s < VARNAME__MAX) {
- fs_buf_need(ls, 1 + 2*5);
- ls->sb.buf[ls->sb.n++] = (uint8_t)(uintptr_t)s;
+ p = lj_buf_more(&ls->sb, 1 + 2*5);
+ *p++ = (char)(uintptr_t)s;
} else {
MSize len = s->len+1;
- fs_buf_need(ls, len + 2*5);
- fs_buf_str(ls, strdata(s), len);
+ p = lj_buf_more(&ls->sb, len + 2*5);
+ p = lj_buf_wmem(p, strdata(s), len);
}
startpc = vs->startpc;
- fs_buf_uleb128(ls, startpc-lastpc);
- fs_buf_uleb128(ls, vs->endpc-startpc);
+ p = lj_strfmt_wuleb128(p, startpc-lastpc);
+ p = lj_strfmt_wuleb128(p, vs->endpc-startpc);
+ ls->sb.w = p;
lastpc = startpc;
}
}
- fs_buf_need(ls, 1);
- ls->sb.buf[ls->sb.n++] = '\0'; /* Terminator for varinfo. */
- return ls->sb.n;
+ lj_buf_putb(&ls->sb, '\0'); /* Terminator for varinfo. */
+ return sbuflen(&ls->sb);
}
/* Fixup variable info for prototype. */
@@ -1515,7 +1499,7 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar)
{
setmref(pt->uvinfo, p);
setmref(pt->varinfo, (char *)p + ofsvar);
- memcpy(p, ls->sb.buf, ls->sb.n); /* Copy from temp. string buffer. */
+ memcpy(p, ls->sb.b, sbuflen(&ls->sb)); /* Copy from temp. buffer. */
}
#else
@@ -1552,7 +1536,7 @@ static void fs_fixup_ret(FuncState *fs)
}
fs->bl->flags |= FSCOPE_NOCLOSE; /* Handled above. */
fscope_end(fs);
- lua_assert(fs->bl == NULL);
+ lj_assertFS(fs->bl == NULL, "bad scope nesting");
/* May need to fixup returns encoded before first function was created. */
if (fs->flags & PROTO_FIXUP_RETURN) {
BCPos pc;
@@ -1624,7 +1608,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line)
L->top--; /* Pop table of constants. */
ls->vtop = fs->vbase; /* Reset variable stack. */
ls->fs = fs->prev;
- lua_assert(ls->fs != NULL || ls->token == TK_eof);
+ lj_assertL(ls->fs != NULL || ls->tok == TK_eof, "bad parser state");
return pt;
}
@@ -1718,15 +1702,15 @@ static void expr_bracket(LexState *ls, ExpDesc *v)
}
/* Get value of constant expression. */
-static void expr_kvalue(TValue *v, ExpDesc *e)
+static void expr_kvalue(FuncState *fs, TValue *v, ExpDesc *e)
{
+ UNUSED(fs);
if (e->k <= VKTRUE) {
- setitype(v, ~(uint32_t)e->k);
+ setpriV(v, ~(uint32_t)e->k);
} else if (e->k == VKSTR) {
- setgcref(v->gcr, obj2gco(e->u.sval));
- setitype(v, LJ_TSTR);
+ setgcVraw(v, obj2gco(e->u.sval), LJ_TSTR);
} else {
- lua_assert(tvisnumber(expr_numtv(e)));
+ lj_assertFS(tvisnumber(expr_numtv(e)), "bad number constant");
*v = *expr_numtv(e);
}
}
@@ -1746,15 +1730,15 @@ static void expr_table(LexState *ls, ExpDesc *e)
bcreg_reserve(fs, 1);
freg++;
lex_check(ls, '{');
- while (ls->token != '}') {
+ while (ls->tok != '}') {
ExpDesc key, val;
vcall = 0;
- if (ls->token == '[') {
+ if (ls->tok == '[') {
expr_bracket(ls, &key); /* Already calls expr_toval. */
if (!expr_isk(&key)) expr_index(fs, e, &key);
if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++;
lex_check(ls, '=');
- } else if ((ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) &&
+ } else if ((ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) &&
lj_lex_lookahead(ls) == '=') {
expr_str(ls, &key);
lex_check(ls, '=');
@@ -1776,11 +1760,11 @@ static void expr_table(LexState *ls, ExpDesc *e)
fs->bcbase[pc].ins = BCINS_AD(BC_TDUP, freg-1, kidx);
}
vcall = 0;
- expr_kvalue(&k, &key);
+ expr_kvalue(fs, &k, &key);
v = lj_tab_set(fs->L, t, &k);
lj_gc_anybarriert(fs->L, t);
if (expr_isk_nojump(&val)) { /* Add const key/value to template table. */
- expr_kvalue(v, &val);
+ expr_kvalue(fs, v, &val);
} else { /* Otherwise create dummy string key (avoids lj_tab_newkey). */
settabV(fs->L, v, t); /* Preserve key with table itself as value. */
fixt = 1; /* Fix this later, after all resizes. */
@@ -1799,8 +1783,9 @@ static void expr_table(LexState *ls, ExpDesc *e)
if (vcall) {
BCInsLine *ilp = &fs->bcbase[fs->pc-1];
ExpDesc en;
- lua_assert(bc_a(ilp->ins) == freg &&
- bc_op(ilp->ins) == (narr > 256 ? BC_TSETV : BC_TSETB));
+ lj_assertFS(bc_a(ilp->ins) == freg &&
+ bc_op(ilp->ins) == (narr > 256 ? BC_TSETV : BC_TSETB),
+ "bad CALL code generation");
expr_init(&en, VKNUM, 0);
en.u.nval.u32.lo = narr-1;
en.u.nval.u32.hi = 0x43300000; /* Biased integer to avoid denormals. */
@@ -1830,7 +1815,7 @@ static void expr_table(LexState *ls, ExpDesc *e)
for (i = 0; i <= hmask; i++) {
Node *n = &node[i];
if (tvistab(&n->val)) {
- lua_assert(tabV(&n->val) == t);
+ lj_assertFS(tabV(&n->val) == t, "bad dummy key in template table");
setnilV(&n->val); /* Turn value into nil. */
}
}
@@ -1847,11 +1832,11 @@ static BCReg parse_params(LexState *ls, int needself)
lex_check(ls, '(');
if (needself)
var_new_lit(ls, nparams++, "self");
- if (ls->token != ')') {
+ if (ls->tok != ')') {
do {
- if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) {
+ if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
var_new(ls, nparams++, lex_str(ls));
- } else if (ls->token == TK_dots) {
+ } else if (ls->tok == TK_dots) {
lj_lex_next(ls);
fs->flags |= PROTO_VARARG;
break;
@@ -1861,7 +1846,7 @@ static BCReg parse_params(LexState *ls, int needself)
} while (lex_opt(ls, ','));
}
var_add(ls, nparams);
- lua_assert(fs->nactvar == nparams);
+ lj_assertFS(fs->nactvar == nparams, "bad regalloc");
bcreg_reserve(fs, nparams);
lex_check(ls, ')');
return nparams;
@@ -1885,7 +1870,7 @@ static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line)
fs.bclim = pfs->bclim - pfs->pc;
bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */
parse_chunk(ls);
- if (ls->token != TK_end) lex_match(ls, TK_end, TK_function, line);
+ if (ls->tok != TK_end) lex_match(ls, TK_end, TK_function, line);
pt = fs_finish(ls, (ls->lastline = ls->linenumber));
pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */
pfs->bclim = (BCPos)(ls->sizebcstack - oldbase);
@@ -1924,13 +1909,13 @@ static void parse_args(LexState *ls, ExpDesc *e)
BCIns ins;
BCReg base;
BCLine line = ls->linenumber;
- if (ls->token == '(') {
+ if (ls->tok == '(') {
#if !LJ_52
if (line != ls->lastline)
err_syntax(ls, LJ_ERR_XAMBIG);
#endif
lj_lex_next(ls);
- if (ls->token == ')') { /* f(). */
+ if (ls->tok == ')') { /* f(). */
args.k = VVOID;
} else {
expr_list(ls, &args);
@@ -1938,24 +1923,24 @@ static void parse_args(LexState *ls, ExpDesc *e)
setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */
}
lex_match(ls, ')', '(', line);
- } else if (ls->token == '{') {
+ } else if (ls->tok == '{') {
expr_table(ls, &args);
- } else if (ls->token == TK_string) {
+ } else if (ls->tok == TK_string) {
expr_init(&args, VKSTR, 0);
- args.u.sval = strV(&ls->tokenval);
+ args.u.sval = strV(&ls->tokval);
lj_lex_next(ls);
} else {
err_syntax(ls, LJ_ERR_XFUNARG);
return; /* Silence compiler. */
}
- lua_assert(e->k == VNONRELOC);
+ lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k);
base = e->u.s.info; /* Base register for call. */
if (args.k == VCALL) {
- ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1);
+ ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1 - LJ_FR2);
} else {
if (args.k != VVOID)
expr_tonextreg(fs, &args);
- ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base);
+ ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base - LJ_FR2);
}
expr_init(e, VCALL, bcemit_INS(fs, ins));
e->u.s.aux = base;
@@ -1968,33 +1953,34 @@ static void expr_primary(LexState *ls, ExpDesc *v)
{
FuncState *fs = ls->fs;
/* Parse prefix expression. */
- if (ls->token == '(') {
+ if (ls->tok == '(') {
BCLine line = ls->linenumber;
lj_lex_next(ls);
expr(ls, v);
lex_match(ls, ')', '(', line);
expr_discharge(ls->fs, v);
- } else if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) {
+ } else if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) {
var_lookup(ls, v);
} else {
err_syntax(ls, LJ_ERR_XSYMBOL);
}
for (;;) { /* Parse multiple expression suffixes. */
- if (ls->token == '.') {
+ if (ls->tok == '.') {
expr_field(ls, v);
- } else if (ls->token == '[') {
+ } else if (ls->tok == '[') {
ExpDesc key;
expr_toanyreg(fs, v);
expr_bracket(ls, &key);
expr_index(fs, v, &key);
- } else if (ls->token == ':') {
+ } else if (ls->tok == ':') {
ExpDesc key;
lj_lex_next(ls);
expr_str(ls, &key);
bcemit_method(fs, v, &key);
parse_args(ls, v);
- } else if (ls->token == '(' || ls->token == TK_string || ls->token == '{') {
+ } else if (ls->tok == '(' || ls->tok == TK_string || ls->tok == '{') {
expr_tonextreg(fs, v);
+ if (LJ_FR2) bcreg_reserve(fs, 1);
parse_args(ls, v);
} else {
break;
@@ -2005,14 +1991,14 @@ static void expr_primary(LexState *ls, ExpDesc *v)
/* Parse simple expression. */
static void expr_simple(LexState *ls, ExpDesc *v)
{
- switch (ls->token) {
+ switch (ls->tok) {
case TK_number:
- expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokenval)) ? VKCDATA : VKNUM, 0);
- copyTV(ls->L, &v->u.nval, &ls->tokenval);
+ expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokval)) ? VKCDATA : VKNUM, 0);
+ copyTV(ls->L, &v->u.nval, &ls->tokval);
break;
case TK_string:
expr_init(v, VKSTR, 0);
- v->u.sval = strV(&ls->tokenval);
+ v->u.sval = strV(&ls->tokval);
break;
case TK_nil:
expr_init(v, VKNIL, 0);
@@ -2100,11 +2086,11 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit);
static void expr_unop(LexState *ls, ExpDesc *v)
{
BCOp op;
- if (ls->token == TK_not) {
+ if (ls->tok == TK_not) {
op = BC_NOT;
- } else if (ls->token == '-') {
+ } else if (ls->tok == '-') {
op = BC_UNM;
- } else if (ls->token == '#') {
+ } else if (ls->tok == '#') {
op = BC_LEN;
} else {
expr_simple(ls, v);
@@ -2121,7 +2107,7 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit)
BinOpr op;
synlevel_begin(ls);
expr_unop(ls, v);
- op = token2binop(ls->token);
+ op = token2binop(ls->tok);
while (op != OPR_NOBINOPR && priority[op].left > limit) {
ExpDesc v2;
BinOpr nextop;
@@ -2310,9 +2296,9 @@ static void parse_func(LexState *ls, BCLine line)
lj_lex_next(ls); /* Skip 'function'. */
/* Parse function name. */
var_lookup(ls, &v);
- while (ls->token == '.') /* Multiple dot-separated fields. */
+ while (ls->tok == '.') /* Multiple dot-separated fields. */
expr_field(ls, &v);
- if (ls->token == ':') { /* Optional colon to signify method call. */
+ if (ls->tok == ':') { /* Optional colon to signify method call. */
needself = 1;
expr_field(ls, &v);
}
@@ -2325,9 +2311,9 @@ static void parse_func(LexState *ls, BCLine line)
/* -- Control transfer statements ----------------------------------------- */
/* Check for end of block. */
-static int endofblock(LexToken token)
+static int parse_isend(LexToken tok)
{
- switch (token) {
+ switch (tok) {
case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof:
return 1;
default:
@@ -2342,7 +2328,7 @@ static void parse_return(LexState *ls)
FuncState *fs = ls->fs;
lj_lex_next(ls); /* Skip 'return'. */
fs->flags |= PROTO_HAS_RETURN;
- if (endofblock(ls->token) || ls->token == ';') { /* Bare return. */
+ if (parse_isend(ls->tok) || ls->tok == ';') { /* Bare return. */
ins = BCINS_AD(BC_RET0, 0, 1);
} else { /* Return with one or more values. */
ExpDesc e; /* Receives the _last_ expression in the list. */
@@ -2408,18 +2394,18 @@ static void parse_label(LexState *ls)
lex_check(ls, TK_label);
/* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */
for (;;) {
- if (ls->token == TK_label) {
+ if (ls->tok == TK_label) {
synlevel_begin(ls);
parse_label(ls);
synlevel_end(ls);
- } else if (LJ_52 && ls->token == ';') {
+ } else if (LJ_52 && ls->tok == ';') {
lj_lex_next(ls);
} else {
break;
}
}
/* Trailing label is considered to be outside of scope. */
- if (endofblock(ls->token) && ls->token != TK_until)
+ if (parse_isend(ls->tok) && ls->tok != TK_until)
ls->vstack[idx].slot = fs->bl->nactvar;
gola_resolve(ls, fs->bl, idx);
}
@@ -2575,7 +2561,8 @@ static void parse_for_iter(LexState *ls, GCstr *indexname)
lex_check(ls, TK_in);
line = ls->linenumber;
assign_adjust(ls, 3, expr_list(ls, &e), &e);
- bcreg_bump(fs, 3); /* The iterator needs another 3 slots (func + 2 args). */
+ /* The iterator needs another 3 [4] slots (func [pc] | state ctl). */
+ bcreg_bump(fs, 3+LJ_FR2);
isnext = (nvars <= 5 && predict_next(ls, fs, exprpc));
var_add(ls, 3); /* Hidden control variables. */
lex_check(ls, TK_do);
@@ -2603,9 +2590,9 @@ static void parse_for(LexState *ls, BCLine line)
fscope_begin(fs, &bl, FSCOPE_LOOP);
lj_lex_next(ls); /* Skip 'for'. */
varname = lex_str(ls); /* Get first variable name. */
- if (ls->token == '=')
+ if (ls->tok == '=')
parse_for_num(ls, varname, line);
- else if (ls->token == ',' || ls->token == TK_in)
+ else if (ls->tok == ',' || ls->tok == TK_in)
parse_for_iter(ls, varname);
else
err_syntax(ls, LJ_ERR_XFOR);
@@ -2631,12 +2618,12 @@ static void parse_if(LexState *ls, BCLine line)
BCPos flist;
BCPos escapelist = NO_JMP;
flist = parse_then(ls);
- while (ls->token == TK_elseif) { /* Parse multiple 'elseif' blocks. */
+ while (ls->tok == TK_elseif) { /* Parse multiple 'elseif' blocks. */
jmp_append(fs, &escapelist, bcemit_jmp(fs));
jmp_tohere(fs, flist);
flist = parse_then(ls);
}
- if (ls->token == TK_else) { /* Parse optional 'else' block. */
+ if (ls->tok == TK_else) { /* Parse optional 'else' block. */
jmp_append(fs, &escapelist, bcemit_jmp(fs));
jmp_tohere(fs, flist);
lj_lex_next(ls); /* Skip 'else'. */
@@ -2654,7 +2641,7 @@ static void parse_if(LexState *ls, BCLine line)
static int parse_stmt(LexState *ls)
{
BCLine line = ls->linenumber;
- switch (ls->token) {
+ switch (ls->tok) {
case TK_if:
parse_if(ls, line);
break;
@@ -2713,11 +2700,12 @@ static void parse_chunk(LexState *ls)
{
int islast = 0;
synlevel_begin(ls);
- while (!islast && !endofblock(ls->token)) {
+ while (!islast && !parse_isend(ls->tok)) {
islast = parse_stmt(ls);
lex_opt(ls, ';');
- lua_assert(ls->fs->framesize >= ls->fs->freereg &&
- ls->fs->freereg >= ls->fs->nactvar);
+ lj_assertLS(ls->fs->framesize >= ls->fs->freereg &&
+ ls->fs->freereg >= ls->fs->nactvar,
+ "bad regalloc");
ls->fs->freereg = ls->fs->nactvar; /* Free registers after each stmt. */
}
synlevel_end(ls);
@@ -2748,13 +2736,12 @@ GCproto *lj_parse(LexState *ls)
bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */
lj_lex_next(ls); /* Read-ahead first token. */
parse_chunk(ls);
- if (ls->token != TK_eof)
+ if (ls->tok != TK_eof)
err_token(ls, TK_eof);
pt = fs_finish(ls, ls->linenumber);
L->top--; /* Drop chunkname. */
- lua_assert(fs.prev == NULL);
- lua_assert(ls->fs == NULL);
- lua_assert(pt->sizeuv == 0);
+ lj_assertL(fs.prev == NULL && ls->fs == NULL, "mismatched frame nesting");
+ lj_assertL(pt->sizeuv == 0, "toplevel proto has upvalues");
return pt;
}
diff --git a/src/lj_prng.c b/src/lj_prng.c
new file mode 100644
index 00000000..9e57505e
--- /dev/null
+++ b/src/lj_prng.c
@@ -0,0 +1,259 @@
+/*
+** Pseudo-random number generation.
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_prng_c
+#define LUA_CORE
+
+/* To get the syscall prototype. */
+#if defined(__linux__) && !defined(_GNU_SOURCE)
+#define _GNU_SOURCE
+#endif
+
+#include "lj_def.h"
+#include "lj_arch.h"
+#include "lj_prng.h"
+
+/* -- PRNG step function -------------------------------------------------- */
+
+/* This implements a Tausworthe PRNG with period 2^223. Based on:
+** Tables of maximally-equidistributed combined LFSR generators,
+** Pierre L'Ecuyer, 1991, table 3, 1st entry.
+** Full-period ME-CF generator with L=64, J=4, k=223, N1=49.
+**
+** Important note: This PRNG is NOT suitable for cryptographic use!
+**
+** But it works fine for math.random(), which has an API that's not
+** suitable for cryptography, anyway.
+**
+** When used as a securely seeded global PRNG, it substantially raises
+** the difficulty for various attacks on the VM.
+*/
+
+/* Update generator i and compute a running xor of all states. */
+#define TW223_GEN(rs, z, r, i, k, q, s) \
+ z = rs->u[i]; \
+ z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \
+ r ^= z; rs->u[i] = z;
+
+#define TW223_STEP(rs, z, r) \
+ TW223_GEN(rs, z, r, 0, 63, 31, 18) \
+ TW223_GEN(rs, z, r, 1, 58, 19, 28) \
+ TW223_GEN(rs, z, r, 2, 55, 24, 7) \
+ TW223_GEN(rs, z, r, 3, 47, 21, 8)
+
+/* PRNG step function with uint64_t result. */
+LJ_NOINLINE uint64_t LJ_FASTCALL lj_prng_u64(PRNGState *rs)
+{
+ uint64_t z, r = 0;
+ TW223_STEP(rs, z, r)
+ return r;
+}
+
+/* PRNG step function with double in uint64_t result. */
+LJ_NOINLINE uint64_t LJ_FASTCALL lj_prng_u64d(PRNGState *rs)
+{
+ uint64_t z, r = 0;
+ TW223_STEP(rs, z, r)
+ /* Returns a double bit pattern in the range 1.0 <= d < 2.0. */
+ return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000);
+}
+
+/* Condition seed: ensure k[i] MSB of u[i] are non-zero. */
+static LJ_AINLINE void lj_prng_condition(PRNGState *rs)
+{
+ if (rs->u[0] < (1u << 1)) rs->u[0] += (1u << 1);
+ if (rs->u[1] < (1u << 6)) rs->u[1] += (1u << 6);
+ if (rs->u[2] < (1u << 9)) rs->u[2] += (1u << 9);
+ if (rs->u[3] < (1u << 17)) rs->u[3] += (1u << 17);
+}
+
+/* -- PRNG seeding from OS ------------------------------------------------ */
+
+#if LUAJIT_SECURITY_PRNG == 0
+
+/* Nothing to define. */
+
+#elif LJ_TARGET_XBOX360
+
+extern int XNetRandom(void *buf, unsigned int len);
+
+#elif LJ_TARGET_PS3
+
+extern int sys_get_random_number(void *buf, uint64_t len);
+
+#elif LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA
+
+extern int sceRandomGetRandomNumber(void *buf, size_t len);
+
+#elif LJ_TARGET_NX
+
+#include <unistd.h>
+
+#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOXONE
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#if LJ_TARGET_UWP || LJ_TARGET_XBOXONE
+/* Must use BCryptGenRandom. */
+#include <bcrypt.h>
+#pragma comment(lib, "bcrypt.lib")
+#else
+/* If you wonder about this mess, then search online for RtlGenRandom. */
+typedef BOOLEAN (WINAPI *PRGR)(void *buf, ULONG len);
+static PRGR libfunc_rgr;
+#endif
+
+#elif LJ_TARGET_POSIX
+
+#if LJ_TARGET_LINUX
+/* Avoid a dependency on glibc 2.25+ and use the getrandom syscall instead. */
+#include <sys/syscall.h>
+#else
+
+#if LJ_TARGET_OSX && !LJ_TARGET_IOS
+/*
+** In their infinite wisdom Apple decided to disallow getentropy() in the
+** iOS App Store. Even though the call is common to all BSD-ish OS, it's
+** recommended by Apple in their own security-related docs, and, to top
+** off the foolery, /dev/urandom is handled by the same kernel code,
+** yet accessing it is actually permitted (but less efficient).
+*/
+#include <Availability.h>
+#if __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200
+#define LJ_TARGET_HAS_GETENTROPY 1
+#endif
+#elif (LJ_TARGET_BSD && !defined(__NetBSD__)) || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN
+#define LJ_TARGET_HAS_GETENTROPY 1
+#endif
+
+#if LJ_TARGET_HAS_GETENTROPY
+extern int getentropy(void *buf, size_t len)
+#ifdef __ELF__
+ __attribute__((weak))
+#endif
+;
+#endif
+
+#endif
+
+/* For the /dev/urandom fallback. */
+#include <fcntl.h>
+#include <unistd.h>
+
+#endif
+
+#if LUAJIT_SECURITY_PRNG == 0
+
+/* If you really don't care about security, then define
+** LUAJIT_SECURITY_PRNG=0. This yields a predictable seed
+** and provides NO SECURITY against various attacks on the VM.
+**
+** BTW: This is NOT the way to get predictable table iteration,
+** predictable trace generation, predictable bytecode generation, etc.
+*/
+int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs)
+{
+ lj_prng_seed_fixed(rs); /* The fixed seed is already conditioned. */
+ return 1;
+}
+
+#else
+
+/* Securely seed PRNG from system entropy. Returns 0 on failure. */
+int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs)
+{
+#if LJ_TARGET_XBOX360
+
+ if (XNetRandom(rs->u, (unsigned int)sizeof(rs->u)) == 0)
+ goto ok;
+
+#elif LJ_TARGET_PS3
+
+ if (sys_get_random_number(rs->u, sizeof(rs->u)) == 0)
+ goto ok;
+
+#elif LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA
+
+ if (sceRandomGetRandomNumber(rs->u, sizeof(rs->u)) == 0)
+ goto ok;
+
+#elif LJ_TARGET_NX
+
+ if (getentropy(rs->u, sizeof(rs->u)) == 0)
+ goto ok;
+
+#elif LJ_TARGET_UWP || LJ_TARGET_XBOXONE
+
+ if (BCryptGenRandom(NULL, (PUCHAR)(rs->u), (ULONG)sizeof(rs->u),
+ BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0)
+ goto ok;
+
+#elif LJ_TARGET_WINDOWS
+
+ /* Keep the library loaded in case multiple VMs are started. */
+ if (!libfunc_rgr) {
+ HMODULE lib = LJ_WIN_LOADLIBA("advapi32.dll");
+ if (!lib) return 0;
+ libfunc_rgr = (PRGR)GetProcAddress(lib, "SystemFunction036");
+ if (!libfunc_rgr) return 0;
+ }
+ if (libfunc_rgr(rs->u, (ULONG)sizeof(rs->u)))
+ goto ok;
+
+#elif LJ_TARGET_POSIX
+
+#if LJ_TARGET_LINUX && defined(SYS_getrandom)
+
+ if (syscall(SYS_getrandom, rs->u, sizeof(rs->u), 0) == (long)sizeof(rs->u))
+ goto ok;
+
+#elif LJ_TARGET_HAS_GETENTROPY
+
+#ifdef __ELF__
+ if (&getentropy && getentropy(rs->u, sizeof(rs->u)) == 0)
+ goto ok;
+#else
+ if (getentropy(rs->u, sizeof(rs->u)) == 0)
+ goto ok;
+#endif
+
+#endif
+
+ /* Fallback to /dev/urandom. This may fail if the device is not
+ ** existent or accessible in a chroot or container, or if the process
+ ** or the OS ran out of file descriptors.
+ */
+ {
+ int fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC);
+ if (fd != -1) {
+ ssize_t n = read(fd, rs->u, sizeof(rs->u));
+ (void)close(fd);
+ if (n == (ssize_t)sizeof(rs->u))
+ goto ok;
+ }
+ }
+
+#else
+
+ /* Add an elif above for your OS with a secure PRNG seed.
+ ** Note that fiddling around with rand(), getpid(), time() or coercing
+ ** ASLR to yield a few bits of randomness is not helpful.
+ ** If you don't want any security, then don't pretend you have any
+ ** and simply define LUAJIT_SECURITY_PRNG=0 for the build.
+ */
+#error "Missing secure PRNG seed for this OS"
+
+#endif
+ return 0; /* Fail. */
+
+ok:
+ lj_prng_condition(rs);
+ (void)lj_prng_u64(rs);
+ return 1; /* Success. */
+}
+
+#endif
+
diff --git a/src/lj_prng.h b/src/lj_prng.h
new file mode 100644
index 00000000..bdc958ab
--- /dev/null
+++ b/src/lj_prng.h
@@ -0,0 +1,24 @@
+/*
+** Pseudo-random number generation.
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_PRNG_H
+#define _LJ_PRNG_H
+
+#include "lj_def.h"
+
+LJ_FUNC int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs);
+LJ_FUNC uint64_t LJ_FASTCALL lj_prng_u64(PRNGState *rs);
+LJ_FUNC uint64_t LJ_FASTCALL lj_prng_u64d(PRNGState *rs);
+
+/* This is just the precomputed result of lib_math.c:random_seed(rs, 0.0). */
+static LJ_AINLINE void lj_prng_seed_fixed(PRNGState *rs)
+{
+ rs->u[0] = U64x(a0d27757,0a345b8c);
+ rs->u[1] = U64x(764a296c,5d4aa64f);
+ rs->u[2] = U64x(51220704,070adeaa);
+ rs->u[3] = U64x(2a2717b5,a7b7b927);
+}
+
+#endif
diff --git a/src/lj_profile.c b/src/lj_profile.c
new file mode 100644
index 00000000..f0af91cb
--- /dev/null
+++ b/src/lj_profile.c
@@ -0,0 +1,367 @@
+/*
+** Low-overhead profiling.
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_profile_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+#if LJ_HASPROFILE
+
+#include "lj_buf.h"
+#include "lj_frame.h"
+#include "lj_debug.h"
+#include "lj_dispatch.h"
+#if LJ_HASJIT
+#include "lj_jit.h"
+#include "lj_trace.h"
+#endif
+#include "lj_profile.h"
+
+#include "luajit.h"
+
+#if LJ_PROFILE_SIGPROF
+
+#include <sys/time.h>
+#include <signal.h>
+#define profile_lock(ps) UNUSED(ps)
+#define profile_unlock(ps) UNUSED(ps)
+
+#elif LJ_PROFILE_PTHREAD
+
+#include <pthread.h>
+#include <time.h>
+#if LJ_TARGET_PS3
+#include <sys/timer.h>
+#endif
+#define profile_lock(ps) pthread_mutex_lock(&ps->lock)
+#define profile_unlock(ps) pthread_mutex_unlock(&ps->lock)
+
+#elif LJ_PROFILE_WTHREAD
+
+#define WIN32_LEAN_AND_MEAN
+#if LJ_TARGET_XBOX360
+#include <xtl.h>
+#include <xbox.h>
+#else
+#include <windows.h>
+#endif
+typedef unsigned int (WINAPI *WMM_TPFUNC)(unsigned int);
+#define profile_lock(ps) EnterCriticalSection(&ps->lock)
+#define profile_unlock(ps) LeaveCriticalSection(&ps->lock)
+
+#endif
+
+/* Profiler state. */
+typedef struct ProfileState {
+ global_State *g; /* VM state that started the profiler. */
+ luaJIT_profile_callback cb; /* Profiler callback. */
+ void *data; /* Profiler callback data. */
+ SBuf sb; /* String buffer for stack dumps. */
+ int interval; /* Sample interval in milliseconds. */
+ int samples; /* Number of samples for next callback. */
+ int vmstate; /* VM state when profile timer triggered. */
+#if LJ_PROFILE_SIGPROF
+ struct sigaction oldsa; /* Previous SIGPROF state. */
+#elif LJ_PROFILE_PTHREAD
+ pthread_mutex_t lock; /* g->hookmask update lock. */
+ pthread_t thread; /* Timer thread. */
+ int abort; /* Abort timer thread. */
+#elif LJ_PROFILE_WTHREAD
+#if LJ_TARGET_WINDOWS
+ HINSTANCE wmm; /* WinMM library handle. */
+ WMM_TPFUNC wmm_tbp; /* WinMM timeBeginPeriod function. */
+ WMM_TPFUNC wmm_tep; /* WinMM timeEndPeriod function. */
+#endif
+ CRITICAL_SECTION lock; /* g->hookmask update lock. */
+ HANDLE thread; /* Timer thread. */
+ int abort; /* Abort timer thread. */
+#endif
+} ProfileState;
+
+/* Sadly, we have to use a static profiler state.
+**
+** The SIGPROF variant needs a static pointer to the global state, anyway.
+** And it would be hard to extend for multiple threads. You can still use
+** multiple VMs in multiple threads, but only profile one at a time.
+*/
+static ProfileState profile_state;
+
+/* Default sample interval in milliseconds. */
+#define LJ_PROFILE_INTERVAL_DEFAULT 10
+
+/* -- Profiler/hook interaction ------------------------------------------- */
+
+#if !LJ_PROFILE_SIGPROF
+void LJ_FASTCALL lj_profile_hook_enter(global_State *g)
+{
+ ProfileState *ps = &profile_state;
+ if (ps->g) {
+ profile_lock(ps);
+ hook_enter(g);
+ profile_unlock(ps);
+ } else {
+ hook_enter(g);
+ }
+}
+
+void LJ_FASTCALL lj_profile_hook_leave(global_State *g)
+{
+ ProfileState *ps = &profile_state;
+ if (ps->g) {
+ profile_lock(ps);
+ hook_leave(g);
+ profile_unlock(ps);
+ } else {
+ hook_leave(g);
+ }
+}
+#endif
+
+/* -- Profile callbacks --------------------------------------------------- */
+
+/* Callback from profile hook (HOOK_PROFILE already cleared). */
+void LJ_FASTCALL lj_profile_interpreter(lua_State *L)
+{
+ ProfileState *ps = &profile_state;
+ global_State *g = G(L);
+ uint8_t mask;
+ profile_lock(ps);
+ mask = (g->hookmask & ~HOOK_PROFILE);
+ if (!(mask & HOOK_VMEVENT)) {
+ int samples = ps->samples;
+ ps->samples = 0;
+ g->hookmask = HOOK_VMEVENT;
+ lj_dispatch_update(g);
+ profile_unlock(ps);
+ ps->cb(ps->data, L, samples, ps->vmstate); /* Invoke user callback. */
+ profile_lock(ps);
+ mask |= (g->hookmask & HOOK_PROFILE);
+ }
+ g->hookmask = mask;
+ lj_dispatch_update(g);
+ profile_unlock(ps);
+}
+
+/* Trigger profile hook. Asynchronous call from OS-specific profile timer. */
+static void profile_trigger(ProfileState *ps)
+{
+ global_State *g = ps->g;
+ uint8_t mask;
+ profile_lock(ps);
+ ps->samples++; /* Always increment number of samples. */
+ mask = g->hookmask;
+ if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT|HOOK_GC))) { /* Set profile hook. */
+ int st = g->vmstate;
+ ps->vmstate = st >= 0 ? 'N' :
+ st == ~LJ_VMST_INTERP ? 'I' :
+ st == ~LJ_VMST_C ? 'C' :
+ st == ~LJ_VMST_GC ? 'G' : 'J';
+ g->hookmask = (mask | HOOK_PROFILE);
+ lj_dispatch_update(g);
+ }
+ profile_unlock(ps);
+}
+
+/* -- OS-specific profile timer handling ---------------------------------- */
+
+#if LJ_PROFILE_SIGPROF
+
+/* SIGPROF handler. */
+static void profile_signal(int sig)
+{
+ UNUSED(sig);
+ profile_trigger(&profile_state);
+}
+
+/* Start profiling timer. */
+static void profile_timer_start(ProfileState *ps)
+{
+ int interval = ps->interval;
+ struct itimerval tm;
+ struct sigaction sa;
+ tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000;
+ tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000;
+ setitimer(ITIMER_PROF, &tm, NULL);
+ sa.sa_flags = SA_RESTART;
+ sa.sa_handler = profile_signal;
+ sigemptyset(&sa.sa_mask);
+ sigaction(SIGPROF, &sa, &ps->oldsa);
+}
+
+/* Stop profiling timer. */
+static void profile_timer_stop(ProfileState *ps)
+{
+ struct itimerval tm;
+ tm.it_value.tv_sec = tm.it_interval.tv_sec = 0;
+ tm.it_value.tv_usec = tm.it_interval.tv_usec = 0;
+ setitimer(ITIMER_PROF, &tm, NULL);
+ sigaction(SIGPROF, &ps->oldsa, NULL);
+}
+
+#elif LJ_PROFILE_PTHREAD
+
+/* POSIX timer thread. */
+static void *profile_thread(ProfileState *ps)
+{
+ int interval = ps->interval;
+#if !LJ_TARGET_PS3
+ struct timespec ts;
+ ts.tv_sec = interval / 1000;
+ ts.tv_nsec = (interval % 1000) * 1000000;
+#endif
+ while (1) {
+#if LJ_TARGET_PS3
+ sys_timer_usleep(interval * 1000);
+#else
+ nanosleep(&ts, NULL);
+#endif
+ if (ps->abort) break;
+ profile_trigger(ps);
+ }
+ return NULL;
+}
+
+/* Start profiling timer thread. */
+static void profile_timer_start(ProfileState *ps)
+{
+ pthread_mutex_init(&ps->lock, 0);
+ ps->abort = 0;
+ pthread_create(&ps->thread, NULL, (void *(*)(void *))profile_thread, ps);
+}
+
+/* Stop profiling timer thread. */
+static void profile_timer_stop(ProfileState *ps)
+{
+ ps->abort = 1;
+ pthread_join(ps->thread, NULL);
+ pthread_mutex_destroy(&ps->lock);
+}
+
+#elif LJ_PROFILE_WTHREAD
+
+/* Windows timer thread. */
+static DWORD WINAPI profile_thread(void *psx)
+{
+ ProfileState *ps = (ProfileState *)psx;
+ int interval = ps->interval;
+#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
+ ps->wmm_tbp(interval);
+#endif
+ while (1) {
+ Sleep(interval);
+ if (ps->abort) break;
+ profile_trigger(ps);
+ }
+#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
+ ps->wmm_tep(interval);
+#endif
+ return 0;
+}
+
+/* Start profiling timer thread. */
+static void profile_timer_start(ProfileState *ps)
+{
+#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP
+ if (!ps->wmm) { /* Load WinMM library on-demand. */
+ ps->wmm = LJ_WIN_LOADLIBA("winmm.dll");
+ if (ps->wmm) {
+ ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeBeginPeriod");
+ ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeEndPeriod");
+ if (!ps->wmm_tbp || !ps->wmm_tep) {
+ ps->wmm = NULL;
+ return;
+ }
+ }
+ }
+#endif
+ InitializeCriticalSection(&ps->lock);
+ ps->abort = 0;
+ ps->thread = CreateThread(NULL, 0, profile_thread, ps, 0, NULL);
+}
+
+/* Stop profiling timer thread. */
+static void profile_timer_stop(ProfileState *ps)
+{
+ ps->abort = 1;
+ WaitForSingleObject(ps->thread, INFINITE);
+ DeleteCriticalSection(&ps->lock);
+}
+
+#endif
+
+/* -- Public profiling API ------------------------------------------------ */
+
+/* Start profiling. */
+LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
+ luaJIT_profile_callback cb, void *data)
+{
+ ProfileState *ps = &profile_state;
+ int interval = LJ_PROFILE_INTERVAL_DEFAULT;
+ while (*mode) {
+ int m = *mode++;
+ switch (m) {
+ case 'i':
+ interval = 0;
+ while (*mode >= '0' && *mode <= '9')
+ interval = interval * 10 + (*mode++ - '0');
+ if (interval <= 0) interval = 1;
+ break;
+#if LJ_HASJIT
+ case 'l': case 'f':
+ L2J(L)->prof_mode = m;
+ lj_trace_flushall(L);
+ break;
+#endif
+ default: /* Ignore unknown mode chars. */
+ break;
+ }
+ }
+ if (ps->g) {
+ luaJIT_profile_stop(L);
+ if (ps->g) return; /* Profiler in use by another VM. */
+ }
+ ps->g = G(L);
+ ps->interval = interval;
+ ps->cb = cb;
+ ps->data = data;
+ ps->samples = 0;
+ lj_buf_init(L, &ps->sb);
+ profile_timer_start(ps);
+}
+
+/* Stop profiling. */
+LUA_API void luaJIT_profile_stop(lua_State *L)
+{
+ ProfileState *ps = &profile_state;
+ global_State *g = ps->g;
+ if (G(L) == g) { /* Only stop profiler if started by this VM. */
+ profile_timer_stop(ps);
+ g->hookmask &= ~HOOK_PROFILE;
+ lj_dispatch_update(g);
+#if LJ_HASJIT
+ G2J(g)->prof_mode = 0;
+ lj_trace_flushall(L);
+#endif
+ lj_buf_free(g, &ps->sb);
+ ps->sb.w = ps->sb.e = NULL;
+ ps->g = NULL;
+ }
+}
+
+/* Return a compact stack dump. */
+LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
+ int depth, size_t *len)
+{
+ ProfileState *ps = &profile_state;
+ SBuf *sb = &ps->sb;
+ setsbufL(sb, L);
+ lj_buf_reset(sb);
+ lj_debug_dumpstack(L, sb, fmt, depth);
+ *len = (size_t)sbuflen(sb);
+ return sb->b;
+}
+
+#endif
diff --git a/src/lj_profile.h b/src/lj_profile.h
new file mode 100644
index 00000000..3969f8e8
--- /dev/null
+++ b/src/lj_profile.h
@@ -0,0 +1,21 @@
+/*
+** Low-overhead profiling.
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_PROFILE_H
+#define _LJ_PROFILE_H
+
+#include "lj_obj.h"
+
+#if LJ_HASPROFILE
+
+LJ_FUNC void LJ_FASTCALL lj_profile_interpreter(lua_State *L);
+#if !LJ_PROFILE_SIGPROF
+LJ_FUNC void LJ_FASTCALL lj_profile_hook_enter(global_State *g);
+LJ_FUNC void LJ_FASTCALL lj_profile_hook_leave(global_State *g);
+#endif
+
+#endif
+
+#endif
diff --git a/src/lj_record.c b/src/lj_record.c
index f7552db0..92bdbfc9 100644
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -20,6 +20,9 @@
#endif
#include "lj_bc.h"
#include "lj_ff.h"
+#if LJ_HASPROFILE
+#include "lj_debug.h"
+#endif
#include "lj_ir.h"
#include "lj_jit.h"
#include "lj_ircall.h"
@@ -30,6 +33,7 @@
#include "lj_snap.h"
#include "lj_dispatch.h"
#include "lj_vm.h"
+#include "lj_prng.h"
/* Some local macros to save typing. Undef'd at the end. */
#define IR(ref) (&J->cur.ir[(ref)])
@@ -47,31 +51,52 @@
static void rec_check_ir(jit_State *J)
{
IRRef i, nins = J->cur.nins, nk = J->cur.nk;
- lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536);
- for (i = nins-1; i >= nk; i--) {
+ lj_assertJ(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536,
+ "inconsistent IR layout");
+ for (i = nk; i < nins; i++) {
IRIns *ir = IR(i);
uint32_t mode = lj_ir_mode[ir->o];
IRRef op1 = ir->op1;
IRRef op2 = ir->op2;
+ const char *err = NULL;
switch (irm_op1(mode)) {
- case IRMnone: lua_assert(op1 == 0); break;
- case IRMref: lua_assert(op1 >= nk);
- lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break;
+ case IRMnone:
+ if (op1 != 0) err = "IRMnone op1 used";
+ break;
+ case IRMref:
+ if (op1 < nk || (i >= REF_BIAS ? op1 >= i : op1 <= i))
+ err = "IRMref op1 out of range";
+ break;
case IRMlit: break;
- case IRMcst: lua_assert(i < REF_BIAS); continue;
+ case IRMcst:
+ if (i >= REF_BIAS) { err = "constant in IR range"; break; }
+ if (irt_is64(ir->t) && ir->o != IR_KNULL)
+ i++;
+ continue;
}
switch (irm_op2(mode)) {
- case IRMnone: lua_assert(op2 == 0); break;
- case IRMref: lua_assert(op2 >= nk);
- lua_assert(i >= REF_BIAS ? op2 < i : op2 > i); break;
+ case IRMnone:
+ if (op2) err = "IRMnone op2 used";
+ break;
+ case IRMref:
+ if (op2 < nk || (i >= REF_BIAS ? op2 >= i : op2 <= i))
+ err = "IRMref op2 out of range";
+ break;
case IRMlit: break;
- case IRMcst: lua_assert(0); break;
+ case IRMcst: err = "IRMcst op2"; break;
}
- if (ir->prev) {
- lua_assert(ir->prev >= nk);
- lua_assert(i >= REF_BIAS ? ir->prev < i : ir->prev > i);
- lua_assert(ir->o == IR_NOP || IR(ir->prev)->o == ir->o);
+ if (!err && ir->prev) {
+ if (ir->prev < nk || (i >= REF_BIAS ? ir->prev >= i : ir->prev <= i))
+ err = "chain out of range";
+ else if (ir->o != IR_NOP && IR(ir->prev)->o != ir->o)
+ err = "chain to different op";
}
+ lj_assertJ(!err, "bad IR %04d op %d(%04d,%04d): %s",
+ i-REF_BIAS,
+ ir->o,
+ irm_op1(mode) == IRMref ? op1-REF_BIAS : op1,
+ irm_op2(mode) == IRMref ? op2-REF_BIAS : op2,
+ err);
}
}
@@ -81,48 +106,79 @@ static void rec_check_slots(jit_State *J)
BCReg s, nslots = J->baseslot + J->maxslot;
int32_t depth = 0;
cTValue *base = J->L->base - J->baseslot;
- lua_assert(J->baseslot >= 1);
- lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME));
- lua_assert(nslots <= LJ_MAX_JSLOTS);
+ lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot");
+ lj_assertJ(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME),
+ "baseslot does not point to frame");
+ lj_assertJ(nslots <= LJ_MAX_JSLOTS, "slot overflow");
for (s = 0; s < nslots; s++) {
TRef tr = J->slot[s];
if (tr) {
cTValue *tv = &base[s];
IRRef ref = tref_ref(tr);
- IRIns *ir;
- lua_assert(ref >= J->cur.nk && ref < J->cur.nins);
- ir = IR(ref);
- lua_assert(irt_t(ir->t) == tref_t(tr));
+ IRIns *ir = NULL; /* Silence compiler. */
+ if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) {
+ lj_assertJ(ref >= J->cur.nk && ref < J->cur.nins,
+ "slot %d ref %04d out of range", s, ref - REF_BIAS);
+ ir = IR(ref);
+ lj_assertJ(irt_t(ir->t) == tref_t(tr), "slot %d IR type mismatch", s);
+ }
if (s == 0) {
- lua_assert(tref_isfunc(tr));
+ lj_assertJ(tref_isfunc(tr), "frame slot 0 is not a function");
+#if LJ_FR2
+ } else if (s == 1) {
+ lj_assertJ((tr & ~TREF_FRAME) == 0, "bad frame slot 1");
+#endif
} else if ((tr & TREF_FRAME)) {
GCfunc *fn = gco2func(frame_gc(tv));
BCReg delta = (BCReg)(tv - frame_prev(tv));
- lua_assert(tref_isfunc(tr));
- if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir));
- lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta));
+#if LJ_FR2
+ lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64,
+ "frame slot %d PC mismatch", s);
+ tr = J->slot[s-1];
+ ir = IR(tref_ref(tr));
+#endif
+ lj_assertJ(tref_isfunc(tr),
+ "frame slot %d is not a function", s-LJ_FR2);
+ lj_assertJ(!tref_isk(tr) || fn == ir_kfunc(ir),
+ "frame slot %d function mismatch", s-LJ_FR2);
+ lj_assertJ(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME)
+ : (s == delta + LJ_FR2),
+ "frame slot %d broken chain", s-LJ_FR2);
depth++;
} else if ((tr & TREF_CONT)) {
- lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void));
- lua_assert((J->slot[s+1] & TREF_FRAME));
+#if LJ_FR2
+ lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64,
+ "cont slot %d continuation mismatch", s);
+#else
+ lj_assertJ(ir_kptr(ir) == gcrefp(tv->gcr, void),
+ "cont slot %d continuation mismatch", s);
+#endif
+ lj_assertJ((J->slot[s+1+LJ_FR2] & TREF_FRAME),
+ "cont slot %d not followed by frame", s);
depth++;
+ } else if ((tr & TREF_KEYINDEX)) {
+ lj_assertJ(tref_isint(tr), "keyindex slot %d bad type %d",
+ s, tref_type(tr));
} else {
- if (tvisnumber(tv))
- lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */
- else
- lua_assert(itype2irt(tv) == tref_type(tr));
+ /* Number repr. may differ, but other types must be the same. */
+ lj_assertJ(tvisnumber(tv) ? tref_isnumber(tr) :
+ itype2irt(tv) == tref_type(tr),
+ "slot %d type mismatch: stack type %d vs IR type %d",
+ s, itypemap(tv), tref_type(tr));
if (tref_isk(tr)) { /* Compare constants. */
TValue tvk;
lj_ir_kvalue(J->L, &tvk, ir);
- if (!(tvisnum(&tvk) && tvisnan(&tvk)))
- lua_assert(lj_obj_equal(tv, &tvk));
- else
- lua_assert(tvisnum(tv) && tvisnan(tv));
+ lj_assertJ((tvisnum(&tvk) && tvisnan(&tvk)) ?
+ (tvisnum(tv) && tvisnan(tv)) :
+ lj_obj_equal(tv, &tvk),
+ "slot %d const mismatch: stack %016llx vs IR %016llx",
+ s, tv->u64, tvk.u64);
}
}
}
}
- lua_assert(J->framedepth == depth);
+ lj_assertJ(J->framedepth == depth,
+ "frame depth mismatch %d vs %d", J->framedepth, depth);
}
#endif
@@ -156,10 +212,11 @@ static TRef sload(jit_State *J, int32_t slot)
/* Get TRef for current function. */
static TRef getcurrf(jit_State *J)
{
- if (J->base[-1])
- return J->base[-1];
- lua_assert(J->baseslot == 1);
- return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY);
+ if (J->base[-1-LJ_FR2])
+ return J->base[-1-LJ_FR2];
+ /* Non-base frame functions ought to be loaded already. */
+ lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot");
+ return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY);
}
/* Compare for raw object equality.
@@ -205,6 +262,14 @@ TRef lj_record_constify(jit_State *J, cTValue *o)
return 0; /* Can't represent lightuserdata (pointless). */
}
+/* Emit a VLOAD with the correct type. */
+TRef lj_record_vload(jit_State *J, TRef ref, MSize idx, IRType t)
+{
+ TRef tr = emitir(IRTG(IR_VLOAD, t), ref, idx);
+ if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
+ return tr;
+}
+
/* -- Record loop ops ----------------------------------------------------- */
/* Loop event. */
@@ -221,17 +286,21 @@ static void canonicalize_slots(jit_State *J)
if (LJ_DUALNUM) return;
for (s = J->baseslot+J->maxslot-1; s >= 1; s--) {
TRef tr = J->slot[s];
- if (tref_isinteger(tr)) {
+ if (tref_isinteger(tr) && !(tr & TREF_KEYINDEX)) {
IRIns *ir = IR(tref_ref(tr));
- if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY)))
+ if (!(ir->o == IR_SLOAD && (ir->op2 & (IRSLOAD_READONLY))))
J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT);
}
}
}
/* Stop recording. */
-static void rec_stop(jit_State *J, TraceLink linktype, TraceNo lnk)
+void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk)
{
+#ifdef LUAJIT_ENABLE_TABLE_BUMP
+ if (J->retryrec)
+ lj_trace_err(J, LJ_TRERR_RETRY);
+#endif
lj_trace_end(J);
J->cur.linktype = (uint8_t)linktype;
J->cur.link = (uint16_t)lnk;
@@ -399,7 +468,8 @@ static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry *scev,
TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode);
TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode);
int tc, dir = rec_for_direction(&tv[FORL_STEP]);
- lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI);
+ lj_assertJ(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI,
+ "bad bytecode %d instead of FORI/JFORI", bc_op(*fori));
scev->t.irt = t;
scev->dir = dir;
scev->stop = tref_ref(stop);
@@ -455,7 +525,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
IRT_NUM;
for (i = FORL_IDX; i <= FORL_STEP; i++) {
if (!tr[i]) sload(J, ra+i);
- lua_assert(tref_isnumber_str(tr[i]));
+ lj_assertJ(tref_isnumber_str(tr[i]), "bad FORI argument type");
if (tref_isstr(tr[i]))
tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0);
if (t == IRT_INT) {
@@ -499,8 +569,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
static LoopEvent rec_iterl(jit_State *J, const BCIns iterins)
{
BCReg ra = bc_a(iterins);
- lua_assert(J->base[ra] != 0);
- if (!tref_isnil(J->base[ra])) { /* Looping back? */
+ if (!tref_isnil(getslot(J, ra))) { /* Looping back? */
J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */
J->maxslot = ra-1+bc_b(J->pc[-1]);
J->pc += bc_j(iterins)+1;
@@ -538,12 +607,13 @@ static int innerloopleft(jit_State *J, const BCIns *pc)
/* Handle the case when an interpreted loop op is hit. */
static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
{
- if (J->parent == 0) {
+ if (J->parent == 0 && J->exitno == 0) {
if (pc == J->startpc && J->framedepth + J->retdepth == 0) {
+ if (bc_op(J->cur.startins) == BC_ITERN) return; /* See rec_itern(). */
/* Same loop? */
if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */
lj_trace_err(J, LJ_TRERR_LLEAVE);
- rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping root trace. */
+ lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */
} else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */
/* It's usually better to abort here and wait until the inner loop
** is traced. But if the inner loop repeatedly didn't loop back,
@@ -568,18 +638,136 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
/* Handle the case when an already compiled loop op is hit. */
static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev)
{
- if (J->parent == 0) { /* Root trace hit an inner loop. */
+ if (J->parent == 0 && J->exitno == 0) { /* Root trace hit an inner loop. */
/* Better let the inner loop spawn a side trace back here. */
lj_trace_err(J, LJ_TRERR_LINNER);
} else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */
J->instunroll = 0; /* Cannot continue across a compiled loop op. */
if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
- rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form an extra loop. */
+ lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form extra loop. */
else
- rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */
+ lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */
} /* Side trace continues across a loop that's left or not entered. */
}
+/* Record ITERN. */
+static LoopEvent rec_itern(jit_State *J, BCReg ra, BCReg rb)
+{
+#if LJ_BE
+ /* YAGNI: Disabled on big-endian due to issues with lj_vm_next,
+ ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair.
+ */
+ UNUSED(ra); UNUSED(rb);
+ setintV(&J->errinfo, (int32_t)BC_ITERN);
+ lj_trace_err_info(J, LJ_TRERR_NYIBC);
+#else
+ RecordIndex ix;
+ /* Since ITERN is recorded at the start, we need our own loop detection. */
+ if (J->pc == J->startpc &&
+ J->framedepth + J->retdepth == 0 && J->parent == 0 && J->exitno == 0) {
+ IRRef ref = REF_FIRST + LJ_HASPROFILE;
+#ifdef LUAJIT_ENABLE_CHECKHOOK
+ ref += 3;
+#endif
+ if (J->cur.nins > ref ||
+ (LJ_HASPROFILE && J->cur.nins == ref && J->cur.ir[ref-1].o != IR_PROF)) {
+ J->instunroll = 0; /* Cannot continue unrolling across an ITERN. */
+ lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */
+ return LOOPEV_ENTER;
+ }
+ }
+ J->maxslot = ra;
+ lj_snap_add(J); /* Required to make JLOOP the first ins in a side-trace. */
+ ix.tab = getslot(J, ra-2);
+ ix.key = J->base[ra-1] ? J->base[ra-1] :
+ sloadt(J, (int32_t)(ra-1), IRT_GUARD|IRT_INT,
+ IRSLOAD_TYPECHECK|IRSLOAD_KEYINDEX);
+ copyTV(J->L, &ix.tabv, &J->L->base[ra-2]);
+ copyTV(J->L, &ix.keyv, &J->L->base[ra-1]);
+ ix.idxchain = (rb < 3); /* Omit value type check, if unused. */
+ ix.mobj = 1; /* We need the next index, too. */
+ J->maxslot = ra + lj_record_next(J, &ix);
+ J->needsnap = 1;
+ if (!tref_isnil(ix.key)) { /* Looping back? */
+ J->base[ra-1] = ix.mobj | TREF_KEYINDEX; /* Control var has next index. */
+ J->base[ra] = ix.key;
+ J->base[ra+1] = ix.val;
+ J->pc += bc_j(J->pc[1])+2;
+ return LOOPEV_ENTER;
+ } else {
+ J->maxslot = ra-3;
+ J->pc += 2;
+ return LOOPEV_LEAVE;
+ }
+#endif
+}
+
+/* Record ISNEXT. */
+static void rec_isnext(jit_State *J, BCReg ra)
+{
+ cTValue *b = &J->L->base[ra-3];
+ if (tvisfunc(b) && funcV(b)->c.ffid == FF_next &&
+ tvistab(b+1) && tvisnil(b+2)) {
+ /* These checks are folded away for a compiled pairs(). */
+ TRef func = getslot(J, ra-3);
+ TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), func, IRFL_FUNC_FFID);
+ emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, FF_next));
+ (void)getslot(J, ra-2); /* Type check for table. */
+ (void)getslot(J, ra-1); /* Type check for nil key. */
+ J->base[ra-1] = lj_ir_kint(J, 0) | TREF_KEYINDEX;
+ J->maxslot = ra;
+ } else { /* Abort trace. Interpreter will despecialize bytecode. */
+ lj_trace_err(J, LJ_TRERR_RECERR);
+ }
+}
+
+/* -- Record profiler hook checks ----------------------------------------- */
+
+#if LJ_HASPROFILE
+
+/* Need to insert profiler hook check? */
+static int rec_profile_need(jit_State *J, GCproto *pt, const BCIns *pc)
+{
+ GCproto *ppt;
+ lj_assertJ(J->prof_mode == 'f' || J->prof_mode == 'l',
+ "bad profiler mode %c", J->prof_mode);
+ if (!pt)
+ return 0;
+ ppt = J->prev_pt;
+ J->prev_pt = pt;
+ if (pt != ppt && ppt) {
+ J->prev_line = -1;
+ return 1;
+ }
+ if (J->prof_mode == 'l') {
+ BCLine line = lj_debug_line(pt, proto_bcpos(pt, pc));
+ BCLine pline = J->prev_line;
+ J->prev_line = line;
+ if (pline != line)
+ return 1;
+ }
+ return 0;
+}
+
+static void rec_profile_ins(jit_State *J, const BCIns *pc)
+{
+ if (J->prof_mode && rec_profile_need(J, J->pt, pc)) {
+ emitir(IRTG(IR_PROF, IRT_NIL), 0, 0);
+ lj_snap_add(J);
+ }
+}
+
+static void rec_profile_ret(jit_State *J)
+{
+ if (J->prof_mode == 'f') {
+ emitir(IRTG(IR_PROF, IRT_NIL), 0, 0);
+ J->prev_pt = NULL;
+ lj_snap_add(J);
+ }
+}
+
+#endif
+
/* -- Record calls and returns -------------------------------------------- */
/* Specialize to the runtime value of the called function or its prototype. */
@@ -590,11 +778,26 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr)
GCproto *pt = funcproto(fn);
/* Too many closures created? Probably not a monomorphic function. */
if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */
- TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC);
- emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt)));
+ TRef trpt = emitir(IRT(IR_FLOAD, IRT_PGC), tr, IRFL_FUNC_PC);
+ emitir(IRTG(IR_EQ, IRT_PGC), trpt, lj_ir_kptr(J, proto_bc(pt)));
(void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */
return tr;
}
+ } else {
+ /* Don't specialize to non-monomorphic builtins. */
+ switch (fn->c.ffid) {
+ case FF_coroutine_wrap_aux:
+ case FF_string_gmatch_aux:
+ /* NYI: io_file_iter doesn't have an ffid, yet. */
+ { /* Specialize to the ffid. */
+ TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), tr, IRFL_FUNC_FFID);
+ emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, fn->c.ffid));
+ }
+ return tr;
+ default:
+ /* NYI: don't specialize to non-monomorphic C functions. */
+ break;
+ }
}
/* Otherwise specialize to the function (closure) value itself. */
kfunc = lj_ir_kfunc(J, fn);
@@ -607,21 +810,31 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs)
{
RecordIndex ix;
TValue *functv = &J->L->base[func];
- TRef *fbase = &J->base[func];
+ TRef kfunc, *fbase = &J->base[func];
ptrdiff_t i;
- for (i = 0; i <= nargs; i++)
- (void)getslot(J, func+i); /* Ensure func and all args have a reference. */
+ (void)getslot(J, func); /* Ensure func has a reference. */
+ for (i = 1; i <= nargs; i++)
+ (void)getslot(J, func+LJ_FR2+i); /* Ensure all args have a reference. */
if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */
ix.tab = fbase[0];
copyTV(J->L, &ix.tabv, functv);
if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj))
lj_trace_err(J, LJ_TRERR_NOMM);
- for (i = ++nargs; i > 0; i--) /* Shift arguments up. */
- fbase[i] = fbase[i-1];
+ for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */
+ fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1];
+#if LJ_FR2
+ fbase[2] = fbase[0];
+#endif
fbase[0] = ix.mobj; /* Replace function. */
functv = &ix.mobjv;
}
- fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]);
+ kfunc = rec_call_specialize(J, funcV(functv), fbase[0]);
+#if LJ_FR2
+ fbase[0] = kfunc;
+ fbase[1] = TREF_FRAME;
+#else
+ fbase[0] = kfunc | TREF_FRAME;
+#endif
J->maxslot = (BCReg)nargs;
}
@@ -631,8 +844,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs)
rec_call_setup(J, func, nargs);
/* Bump frame. */
J->framedepth++;
- J->base += func+1;
- J->baseslot += func+1;
+ J->base += func+1+LJ_FR2;
+ J->baseslot += func+1+LJ_FR2;
if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS)
lj_trace_err(J, LJ_TRERR_STACKOV);
}
@@ -650,7 +863,9 @@ void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs)
func += cbase;
}
/* Move func + args down. */
- memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1));
+ if (LJ_FR2 && J->baseslot == 2)
+ J->base[func+1] = TREF_FRAME;
+ memmove(&J->base[-1-LJ_FR2], &J->base[func], sizeof(TRef)*(J->maxslot+1+LJ_FR2));
/* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */
/* Tailcalls can form a loop, so count towards the loop unroll limit. */
if (++J->tailcalled > J->loopunroll)
@@ -680,6 +895,8 @@ static int check_downrec_unroll(jit_State *J, GCproto *pt)
return 0;
}
+static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot);
+
/* Record return. */
void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
{
@@ -691,30 +908,32 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
BCReg cbase = (BCReg)frame_delta(frame);
if (--J->framedepth <= 0)
lj_trace_err(J, LJ_TRERR_NYIRETL);
- lua_assert(J->baseslot > 1);
+ lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return");
gotresults++;
rbase += cbase;
J->baseslot -= (BCReg)cbase;
J->base -= cbase;
J->base[--rbase] = TREF_TRUE; /* Prepend true to results. */
frame = frame_prevd(frame);
+ J->needsnap = 1; /* Stop catching on-trace errors. */
}
/* Return to lower frame via interpreter for unhandled cases. */
if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) &&
(!frame_islua(frame) ||
- (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))))) {
+ (J->parent == 0 && J->exitno == 0 &&
+ !bc_isret(bc_op(J->cur.startins))))) {
/* NYI: specialize to frame type and return directly, not via RET*. */
for (i = 0; i < (ptrdiff_t)rbase; i++)
J->base[i] = 0; /* Purge dead slots. */
J->maxslot = rbase + (BCReg)gotresults;
- rec_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */
+ lj_record_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */
return;
}
if (frame_isvarg(frame)) {
BCReg cbase = (BCReg)frame_delta(frame);
if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */
lj_trace_err(J, LJ_TRERR_NYIRETL);
- lua_assert(J->baseslot > 1);
+ lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return");
rbase += cbase;
J->baseslot -= (BCReg)cbase;
J->base -= cbase;
@@ -724,27 +943,28 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
BCIns callins = *(frame_pc(frame)-1);
ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults;
BCReg cbase = bc_a(callins);
- GCproto *pt = funcproto(frame_func(frame - (cbase+1)));
+ GCproto *pt = funcproto(frame_func(frame - (cbase+1+LJ_FR2)));
if ((pt->flags & PROTO_NOJIT))
lj_trace_err(J, LJ_TRERR_CJITOFF);
if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) {
if (check_downrec_unroll(J, pt)) {
J->maxslot = (BCReg)(rbase + gotresults);
lj_snap_purge(J);
- rec_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-recursion. */
+ lj_record_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-rec. */
return;
}
lj_snap_add(J);
}
for (i = 0; i < nresults; i++) /* Adjust results. */
- J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL;
+ J->base[i-1-LJ_FR2] = i < gotresults ? J->base[rbase+i] : TREF_NIL;
J->maxslot = cbase+(BCReg)nresults;
if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */
J->framedepth--;
- lua_assert(J->baseslot > cbase+1);
- J->baseslot -= cbase+1;
- J->base -= cbase+1;
- } else if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) {
+ lj_assertJ(J->baseslot > cbase+1+LJ_FR2, "bad baseslot for return");
+ J->baseslot -= cbase+1+LJ_FR2;
+ J->base -= cbase+1+LJ_FR2;
+ } else if (J->parent == 0 && J->exitno == 0 &&
+ !bc_isret(bc_op(J->cur.startins))) {
/* Return to lower frame would leave the loop in a root trace. */
lj_trace_err(J, LJ_TRERR_LLEAVE);
} else if (J->needsnap) { /* Tailcalled to ff with side-effects. */
@@ -752,13 +972,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
} else { /* Return to lower frame. Guard for the target we return to. */
TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO);
TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame));
- emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc);
+ emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc);
J->retdepth++;
J->needsnap = 1;
- lua_assert(J->baseslot == 1);
+ lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot for return");
/* Shift result slots up and clear the slots of the new frame below. */
- memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults);
- memset(J->base-1, 0, sizeof(TRef)*(cbase+1));
+ memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults);
+ memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2));
}
} else if (frame_iscont(frame)) { /* Return to continuation frame. */
ASMFunction cont = frame_contf(frame);
@@ -767,24 +987,52 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
lj_trace_err(J, LJ_TRERR_NYIRETL);
J->baseslot -= (BCReg)cbase;
J->base -= cbase;
- J->maxslot = cbase-2;
+ J->maxslot = cbase-(2<<LJ_FR2);
if (cont == lj_cont_ra) {
/* Copy result to destination slot. */
BCReg dst = bc_a(*(frame_contpc(frame)-1));
J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL;
- if (dst >= J->maxslot) J->maxslot = dst+1;
+ if (dst >= J->maxslot) {
+ J->maxslot = dst+1;
+ }
} else if (cont == lj_cont_nop) {
/* Nothing to do here. */
} else if (cont == lj_cont_cat) {
- lua_assert(0);
+ BCReg bslot = bc_b(*(frame_contpc(frame)-1));
+ TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL;
+ if (bslot != J->maxslot) { /* Concatenate the remainder. */
+ TValue *b = J->L->base, save; /* Simulate lower frame and result. */
+ /* Can't handle MM_concat + CALLT + fast func side-effects. */
+ if (J->postproc != LJ_POST_NONE)
+ lj_trace_err(J, LJ_TRERR_NYIRETL);
+ J->base[J->maxslot] = tr;
+ copyTV(J->L, &save, b-(2<<LJ_FR2));
+ if (gotresults)
+ copyTV(J->L, b-(2<<LJ_FR2), b+rbase);
+ else
+ setnilV(b-(2<<LJ_FR2));
+ J->L->base = b - cbase;
+ tr = rec_cat(J, bslot, cbase-(2<<LJ_FR2));
+ b = J->L->base + cbase; /* Undo. */
+ J->L->base = b;
+ copyTV(J->L, b-(2<<LJ_FR2), &save);
+ }
+ if (tr) { /* Store final result. */
+ BCReg dst = bc_a(*(frame_contpc(frame)-1));
+ J->base[dst] = tr;
+ if (dst >= J->maxslot) {
+ J->maxslot = dst+1;
+ }
+ } /* Otherwise continue with another __concat call. */
} else {
/* Result type already specialized. */
- lua_assert(cont == lj_cont_condf || cont == lj_cont_condt);
+ lj_assertJ(cont == lj_cont_condf || cont == lj_cont_condt,
+ "bad continuation type");
}
} else {
lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */
}
- lua_assert(J->baseslot >= 1);
+ lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot for return");
}
/* -- Metamethod handling ------------------------------------------------- */
@@ -792,19 +1040,17 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
/* Prepare to record call to metamethod. */
static BCReg rec_mm_prep(jit_State *J, ASMFunction cont)
{
- BCReg s, top = curr_proto(J->L)->framesize;
- TRef trcont;
- setcont(&J->L->base[top], cont);
-#if LJ_64
- trcont = lj_ir_kptr(J, (void *)((int64_t)cont - (int64_t)lj_vm_asm_begin));
+ BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize;
+#if LJ_FR2
+ J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont)));
+ J->base[top+1] = TREF_CONT;
#else
- trcont = lj_ir_kptr(J, (void *)cont);
+ J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT;
#endif
- J->base[top] = trcont | TREF_CONT;
J->framedepth++;
for (s = J->maxslot; s < top; s++)
J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */
- return top+1;
+ return top+1+LJ_FR2;
}
/* Record metamethod lookup. */
@@ -823,7 +1069,7 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
cTValue *mo;
if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) {
/* Specialize to the C library namespace object. */
- emitir(IRTG(IR_EQ, IRT_P32), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv)));
+ emitir(IRTG(IR_EQ, IRT_PGC), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv)));
} else {
/* Specialize to the type of userdata. */
TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE);
@@ -852,7 +1098,8 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
}
/* The cdata metatable is treated as immutable. */
if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt;
- ix->mt = mix.tab = lj_ir_ktab(J, mt);
+ ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB,
+ GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)]));
goto nocheck;
}
ix->mt = mt ? mix.tab : TREF_NIL;
@@ -879,12 +1126,12 @@ nocheck:
static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
{
/* Set up metamethod call first to save ix->tab and ix->tabv. */
- BCReg func = rec_mm_prep(J, lj_cont_ra);
+ BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra);
TRef *base = J->base + func;
TValue *basev = J->L->base + func;
- base[1] = ix->tab; base[2] = ix->key;
- copyTV(J->L, basev+1, &ix->tabv);
- copyTV(J->L, basev+2, &ix->keyv);
+ base[1+LJ_FR2] = ix->tab; base[2+LJ_FR2] = ix->key;
+ copyTV(J->L, basev+1+LJ_FR2, &ix->tabv);
+ copyTV(J->L, basev+2+LJ_FR2, &ix->keyv);
if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */
if (mm != MM_unm) {
ix->tab = ix->key;
@@ -896,6 +1143,9 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
}
ok:
base[0] = ix->mobj;
+#if LJ_FR2
+ base[1] = 0;
+#endif
copyTV(J->L, basev+0, &ix->mobjv);
lj_record_call(J, func, 2);
return 0; /* No result yet. */
@@ -912,6 +1162,8 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
TRef *base = J->base + func;
TValue *basev = J->L->base + func;
base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv);
+ base += LJ_FR2;
+ basev += LJ_FR2;
base[1] = tr; copyTV(J->L, basev+1, tv);
#if LJ_52
base[2] = tr; copyTV(J->L, basev+2, tv);
@@ -921,7 +1173,7 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
lj_record_call(J, func, 2);
} else {
if (LJ_52 && tref_istab(tr))
- return lj_ir_call(J, IRCALL_lj_tab_len, tr);
+ return emitir(IRTI(IR_ALEN), tr, TREF_NIL);
lj_trace_err(J, LJ_TRERR_NOMM);
}
return 0; /* No result yet. */
@@ -931,10 +1183,10 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op)
{
BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt);
- TRef *base = J->base + func;
- TValue *tv = J->L->base + func;
- base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key;
- copyTV(J->L, tv+0, &ix->mobjv);
+ TRef *base = J->base + func + LJ_FR2;
+ TValue *tv = J->L->base + func + LJ_FR2;
+ base[-LJ_FR2] = ix->mobj; base[1] = ix->val; base[2] = ix->key;
+ copyTV(J->L, tv-LJ_FR2, &ix->mobjv);
copyTV(J->L, tv+1, &ix->valv);
copyTV(J->L, tv+2, &ix->keyv);
lj_record_call(J, func, 2);
@@ -1030,7 +1282,7 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm)
ix->tab = ix->val;
copyTV(J->L, &ix->tabv, &ix->valv);
} else {
- lua_assert(tref_iscdata(ix->key));
+ lj_assertJ(tref_iscdata(ix->key), "cdata expected");
ix->tab = ix->key;
copyTV(J->L, &ix->tabv, &ix->keyv);
}
@@ -1041,6 +1293,72 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm)
/* -- Indexed access ------------------------------------------------------ */
+#ifdef LUAJIT_ENABLE_TABLE_BUMP
+/* Bump table allocations in bytecode when they grow during recording. */
+static void rec_idx_bump(jit_State *J, RecordIndex *ix)
+{
+ RBCHashEntry *rbc = &J->rbchash[(ix->tab & (RBCHASH_SLOTS-1))];
+ if (tref_ref(ix->tab) == rbc->ref) {
+ const BCIns *pc = mref(rbc->pc, const BCIns);
+ GCtab *tb = tabV(&ix->tabv);
+ uint32_t nhbits;
+ IRIns *ir;
+ if (!tvisnil(&ix->keyv))
+ (void)lj_tab_set(J->L, tb, &ix->keyv); /* Grow table right now. */
+ nhbits = tb->hmask > 0 ? lj_fls(tb->hmask)+1 : 0;
+ ir = IR(tref_ref(ix->tab));
+ if (ir->o == IR_TNEW) {
+ uint32_t ah = bc_d(*pc);
+ uint32_t asize = ah & 0x7ff, hbits = ah >> 11;
+ if (nhbits > hbits) hbits = nhbits;
+ if (tb->asize > asize) {
+ asize = tb->asize <= 0x7ff ? tb->asize : 0x7ff;
+ }
+ if ((asize | (hbits<<11)) != ah) { /* Has the size changed? */
+ /* Patch bytecode, but continue recording (for more patching). */
+ setbc_d(pc, (asize | (hbits<<11)));
+ /* Patching TNEW operands is only safe if the trace is aborted. */
+ ir->op1 = asize; ir->op2 = hbits;
+ J->retryrec = 1; /* Abort the trace at the end of recording. */
+ }
+ } else if (ir->o == IR_TDUP) {
+ GCtab *tpl = gco2tab(proto_kgc(&gcref(rbc->pt)->pt, ~(ptrdiff_t)bc_d(*pc)));
+ /* Grow template table, but preserve keys with nil values. */
+ if ((tb->asize > tpl->asize && (1u << nhbits)-1 == tpl->hmask) ||
+ (tb->asize == tpl->asize && (1u << nhbits)-1 > tpl->hmask)) {
+ Node *node = noderef(tpl->node);
+ uint32_t i, hmask = tpl->hmask, asize;
+ TValue *array;
+ for (i = 0; i <= hmask; i++) {
+ if (!tvisnil(&node[i].key) && tvisnil(&node[i].val))
+ settabV(J->L, &node[i].val, tpl);
+ }
+ if (!tvisnil(&ix->keyv) && tref_isk(ix->key)) {
+ TValue *o = lj_tab_set(J->L, tpl, &ix->keyv);
+ if (tvisnil(o)) settabV(J->L, o, tpl);
+ }
+ lj_tab_resize(J->L, tpl, tb->asize, nhbits);
+ node = noderef(tpl->node);
+ hmask = tpl->hmask;
+ for (i = 0; i <= hmask; i++) {
+ /* This is safe, since template tables only hold immutable values. */
+ if (tvistab(&node[i].val))
+ setnilV(&node[i].val);
+ }
+ /* The shape of the table may have changed. Clean up array part, too. */
+ asize = tpl->asize;
+ array = tvref(tpl->array);
+ for (i = 0; i < asize; i++) {
+ if (tvistab(&array[i]))
+ setnilV(&array[i]);
+ }
+ J->retryrec = 1; /* Abort the trace at the end of recording. */
+ }
+ }
+ }
+}
+#endif
+
/* Record bounds-check. */
static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize)
{
@@ -1061,7 +1379,8 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize)
/* Got scalar evolution analysis results for this reference? */
if (ref == J->scev.idx) {
int32_t stop;
- lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD);
+ lj_assertJ(irt_isint(J->scev.t) && ir->o == IR_SLOAD,
+ "only int SCEV supported");
stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]);
/* Runtime value for stop of loop is within bounds? */
if ((uint64_t)stop + ofs < (uint64_t)asize) {
@@ -1080,11 +1399,14 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize)
}
/* Record indexed key lookup. */
-static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
+static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref,
+ IRType1 *rbguard)
{
TRef key;
GCtab *t = tabV(&ix->tabv);
ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */
+ *rbref = 0;
+ rbguard->irt = 0;
/* Integer keys are looked up in the array part first. */
key = ix->key;
@@ -1098,8 +1420,8 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
if ((MSize)k < t->asize) { /* Currently an array key? */
TRef arrayref;
rec_idx_abc(J, asizeref, ikey, t->asize);
- arrayref = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_ARRAY);
- return emitir(IRT(IR_AREF, IRT_P32), arrayref, ikey);
+ arrayref = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_ARRAY);
+ return emitir(IRT(IR_AREF, IRT_PGC), arrayref, ikey);
} else { /* Currently not in array (may be an array extension)? */
emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */
if (k == 0 && tref_isk(key))
@@ -1131,19 +1453,21 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT);
if (tref_isk(key)) {
/* Optimize lookup of constant hash keys. */
- MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val);
- if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) &&
- hslot <= 65535*(MSize)sizeof(Node)) {
- TRef node, kslot;
- TRef hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK);
+ GCSize hslot = (GCSize)((char *)ix->oldv-(char *)&noderef(t->node)[0].val);
+ if (hslot <= t->hmask*(GCSize)sizeof(Node) &&
+ hslot <= 65535*(GCSize)sizeof(Node)) {
+ TRef node, kslot, hm;
+ *rbref = J->cur.nins; /* Mark possible rollback point. */
+ *rbguard = J->guardemit;
+ hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK);
emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask));
- node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE);
- kslot = lj_ir_kslot(J, key, hslot / sizeof(Node));
- return emitir(IRTG(IR_HREFK, IRT_P32), node, kslot);
+ node = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_NODE);
+ kslot = lj_ir_kslot(J, key, (IRRef)(hslot / sizeof(Node)));
+ return emitir(IRTG(IR_HREFK, IRT_PGC), node, kslot);
}
}
/* Fall back to a regular hash lookup. */
- return emitir(IRT(IR_HREF, IRT_P32), ix->tab, key);
+ return emitir(IRT(IR_HREF, IRT_PGC), ix->tab, key);
}
/* Determine whether a key is NOT one of the fast metamethod names. */
@@ -1168,20 +1492,22 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
{
TRef xref;
IROp xrefop, loadop;
+ IRRef rbref;
+ IRType1 rbguard;
cTValue *oldv;
while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */
/* Never call raw lj_record_idx() on non-table. */
- lua_assert(ix->idxchain != 0);
+ lj_assertJ(ix->idxchain != 0, "bad usage");
if (!lj_record_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index))
lj_trace_err(J, LJ_TRERR_NOMM);
handlemm:
if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */
BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra);
- TRef *base = J->base + func;
- TValue *tv = J->L->base + func;
- base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key;
- setfuncV(J->L, tv+0, funcV(&ix->mobjv));
+ TRef *base = J->base + func + LJ_FR2;
+ TValue *tv = J->L->base + func + LJ_FR2;
+ base[-LJ_FR2] = ix->mobj; base[1] = ix->tab; base[2] = ix->key;
+ setfuncV(J->L, tv-LJ_FR2, funcV(&ix->mobjv));
copyTV(J->L, tv+1, &ix->tabv);
copyTV(J->L, tv+2, &ix->keyv);
if (ix->val) {
@@ -1194,6 +1520,16 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
return 0; /* No result yet. */
}
}
+#if LJ_HASBUFFER
+ /* The index table of buffer objects is treated as immutable. */
+ if (ix->mt == TREF_NIL && !ix->val &&
+ tref_isudata(ix->tab) && udataV(&ix->tabv)->udtype == UDTYPE_BUFFER &&
+ tref_istab(ix->mobj) && tref_isstr(ix->key) && tref_isk(ix->key)) {
+ cTValue *val = lj_tab_getstr(tabV(&ix->mobjv), strV(&ix->keyv));
+ TRef tr = lj_record_constify(J, val);
+ if (tr) return tr; /* Specialize to the value, i.e. a method. */
+ }
+#endif
/* Otherwise retry lookup with metaobject. */
ix->tab = ix->mobj;
copyTV(J->L, &ix->tabv, &ix->mobjv);
@@ -1213,7 +1549,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
}
/* Record the key lookup. */
- xref = rec_idx_key(J, ix);
+ xref = rec_idx_key(J, ix, &rbref, &rbguard);
xrefop = IR(tref_ref(xref))->o;
loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD;
/* The lj_meta_tset() inconsistency is gone, but better play safe. */
@@ -1223,11 +1559,15 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
IRType t = itype2irt(oldv);
TRef res;
if (oldv == niltvg(J2G(J))) {
- emitir(IRTG(IR_EQ, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
+ emitir(IRTG(IR_EQ, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
res = TREF_NIL;
} else {
res = emitir(IRTG(loadop, t), xref, 0);
}
+ if (tref_ref(res) < rbref) { /* HREFK + load forwarded? */
+ lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */
+ J->guardemit = rbguard;
+ }
if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index))
goto handlemm;
if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */
@@ -1235,6 +1575,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
} else { /* Indexed store. */
GCtab *mt = tabref(tabV(&ix->tabv)->metatable);
int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val);
+ if (tref_ref(xref) < rbref) { /* HREFK forwarded? */
+ lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */
+ J->guardemit = rbguard;
+ }
if (tvisnil(oldv)) { /* Previous value was nil? */
/* Need to duplicate the hasmm check for the early guards. */
int hasmm = 0;
@@ -1245,24 +1589,28 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
if (hasmm)
emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */
else if (xrefop == IR_HREF)
- emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_P32),
+ emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_PGC),
xref, lj_ir_kkptr(J, niltvg(J2G(J))));
if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) {
- lua_assert(hasmm);
+ lj_assertJ(hasmm, "inconsistent metamethod handling");
goto handlemm;
}
- lua_assert(!hasmm);
+ lj_assertJ(!hasmm, "inconsistent metamethod handling");
if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */
TRef key = ix->key;
if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */
key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT);
- xref = emitir(IRT(IR_NEWREF, IRT_P32), ix->tab, key);
+ xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key);
keybarrier = 0; /* NEWREF already takes care of the key barrier. */
+#ifdef LUAJIT_ENABLE_TABLE_BUMP
+ if ((J->flags & JIT_F_OPT_SINK)) /* Avoid a separate flag. */
+ rec_idx_bump(J, ix);
+#endif
}
} else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) {
/* Cannot derive that the previous value was non-nil, must do checks. */
if (xrefop == IR_HREF) /* Guard against store to niltv. */
- emitir(IRTG(IR_NE, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
+ emitir(IRTG(IR_NE, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
if (ix->idxchain) { /* Metamethod lookup required? */
/* A check for NULL metatable is cheaper (hoistable) than a load. */
if (!mt) {
@@ -1284,7 +1632,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0);
/* Invalidate neg. metamethod cache for stores with certain string keys. */
if (!nommstr(J, ix->key)) {
- TRef fref = emitir(IRT(IR_FREF, IRT_P32), ix->tab, IRFL_TAB_NOMM);
+ TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ix->tab, IRFL_TAB_NOMM);
emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0));
}
J->needsnap = 1;
@@ -1292,6 +1640,72 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix)
}
}
+/* Determine result type of table traversal. */
+static IRType rec_next_types(GCtab *t, uint32_t idx)
+{
+ for (; idx < t->asize; idx++) {
+ cTValue *a = arrayslot(t, idx);
+ if (LJ_LIKELY(!tvisnil(a)))
+ return (LJ_DUALNUM ? IRT_INT : IRT_NUM) + (itype2irt(a) << 8);
+ }
+ idx -= t->asize;
+ for (; idx <= t->hmask; idx++) {
+ Node *n = &noderef(t->node)[idx];
+ if (!tvisnil(&n->val))
+ return itype2irt(&n->key) + (itype2irt(&n->val) << 8);
+ }
+ return IRT_NIL + (IRT_NIL << 8);
+}
+
+/* Record a table traversal step aka next(). */
+int lj_record_next(jit_State *J, RecordIndex *ix)
+{
+ IRType t, tkey, tval;
+ TRef trvk;
+ t = rec_next_types(tabV(&ix->tabv), ix->keyv.u32.lo);
+ tkey = (t & 0xff); tval = (t >> 8);
+ trvk = lj_ir_call(J, IRCALL_lj_vm_next, ix->tab, ix->key);
+ if (ix->mobj || tkey == IRT_NIL) {
+ TRef idx = emitir(IRTI(IR_HIOP), trvk, trvk);
+ /* Always check for invalid key from next() for nil result. */
+ if (!ix->mobj) emitir(IRTGI(IR_NE), idx, lj_ir_kint(J, -1));
+ ix->mobj = idx;
+ }
+ ix->key = lj_record_vload(J, trvk, 1, tkey);
+ if (tkey == IRT_NIL || ix->idxchain) { /* Omit value type check. */
+ ix->val = TREF_NIL;
+ return 1;
+ } else { /* Need value. */
+ ix->val = lj_record_vload(J, trvk, 0, tval);
+ return 2;
+ }
+}
+
+static void rec_tsetm(jit_State *J, BCReg ra, BCReg rn, int32_t i)
+{
+ RecordIndex ix;
+ cTValue *basev = J->L->base;
+ GCtab *t = tabV(&basev[ra-1]);
+ settabV(J->L, &ix.tabv, t);
+ ix.tab = getslot(J, ra-1);
+ ix.idxchain = 0;
+#ifdef LUAJIT_ENABLE_TABLE_BUMP
+ if ((J->flags & JIT_F_OPT_SINK)) {
+ if (t->asize < i+rn-ra)
+ lj_tab_reasize(J->L, t, i+rn-ra);
+ setnilV(&ix.keyv);
+ rec_idx_bump(J, &ix);
+ }
+#endif
+ for (; ra < rn; i++, ra++) {
+ setintV(&ix.keyv, i);
+ ix.key = lj_ir_kint(J, i);
+ copyTV(J->L, &ix.valv, &basev[ra]);
+ ix.val = getslot(J, ra);
+ lj_record_idx(J, &ix);
+ }
+}
+
/* -- Upvalue access ------------------------------------------------------ */
/* Check whether upvalue is immutable and ok to constify. */
@@ -1328,13 +1742,17 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val)
int needbarrier = 0;
if (rec_upvalue_constify(J, uvp)) { /* Try to constify immutable upvalue. */
TRef tr, kfunc;
- lua_assert(val == 0);
+ lj_assertJ(val == 0, "bad usage");
if (!tref_isk(fn)) { /* Late specialization of current function. */
if (J->pt->flags >= PROTO_CLC_POLY)
goto noconstify;
kfunc = lj_ir_kfunc(J, J->fn);
emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc);
- J->base[-1] = TREF_FRAME | kfunc;
+#if LJ_FR2
+ J->base[-2] = kfunc;
+#else
+ J->base[-1] = kfunc | TREF_FRAME;
+#endif
fn = kfunc;
}
tr = lj_record_constify(J, uvval(uvp));
@@ -1345,16 +1763,16 @@ noconstify:
/* Note: this effectively limits LJ_MAX_UPVAL to 127. */
uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff);
if (!uvp->closed) {
- uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_P32), fn, uv));
+ uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv));
/* In current stack? */
if (uvval(uvp) >= tvref(J->L->stack) &&
uvval(uvp) < tvref(J->L->maxstack)) {
int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot));
if (slot >= 0) { /* Aliases an SSA slot? */
- emitir(IRTG(IR_EQ, IRT_P32),
+ emitir(IRTG(IR_EQ, IRT_PGC),
REF_BASE,
- emitir(IRT(IR_ADD, IRT_P32), uref,
- lj_ir_kint(J, (slot - 1) * -8)));
+ emitir(IRT(IR_ADD, IRT_PGC), uref,
+ lj_ir_kint(J, (slot - 1 - LJ_FR2) * -8)));
slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */
if (val == 0) {
return getslot(J, slot);
@@ -1365,12 +1783,12 @@ noconstify:
}
}
}
- emitir(IRTG(IR_UGT, IRT_P32),
- emitir(IRT(IR_SUB, IRT_P32), uref, REF_BASE),
+ emitir(IRTG(IR_UGT, IRT_PGC),
+ emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE),
lj_ir_kint(J, (J->baseslot + J->maxslot) * 8));
} else {
needbarrier = 1;
- uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_P32), fn, uv));
+ uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv));
}
if (val == 0) { /* Upvalue load */
IRType t = itype2irt(uvval(uvp));
@@ -1409,16 +1827,16 @@ static void check_call_unroll(jit_State *J, TraceNo lnk)
if (count + J->tailcalled > J->param[JIT_P_recunroll]) {
J->pc++;
if (J->framedepth + J->retdepth == 0)
- rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-recursion. */
+ lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-rec. */
else
- rec_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */
+ lj_record_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */
}
} else {
if (count > J->param[JIT_P_callunroll]) {
if (lnk) { /* Possible tail- or up-recursion. */
lj_trace_flush(J, lnk); /* Flush trace that only returns. */
/* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */
- hotcount_set(J2GG(J), J->pc+1, LJ_PRNG_BITS(J, 4));
+ hotcount_set(J2GG(J), J->pc+1, lj_prng_u64(&J2G(J)->prng) & 15u);
}
lj_trace_err(J, LJ_TRERR_CUNROLL);
}
@@ -1445,11 +1863,14 @@ static void rec_func_setup(jit_State *J)
static void rec_func_vararg(jit_State *J)
{
GCproto *pt = J->pt;
- BCReg s, fixargs, vframe = J->maxslot+1;
- lua_assert((pt->flags & PROTO_VARARG));
+ BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2;
+ lj_assertJ((pt->flags & PROTO_VARARG), "FUNCV in non-vararg function");
if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS)
lj_trace_err(J, LJ_TRERR_STACKOV);
- J->base[vframe-1] = J->base[-1]; /* Copy function up. */
+ J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */
+#if LJ_FR2
+ J->base[vframe-1] = TREF_FRAME;
+#endif
/* Copy fixarg slots up and set their original slots to nil. */
fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot;
for (s = 0; s < fixargs; s++) {
@@ -1485,9 +1906,9 @@ static void rec_func_jit(jit_State *J, TraceNo lnk)
}
J->instunroll = 0; /* Cannot continue across a compiled function. */
if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
- rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-recursion. */
+ lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-rec. */
else
- rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */
+ lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */
}
/* -- Vararg handling ----------------------------------------------------- */
@@ -1511,8 +1932,10 @@ static int select_detect(jit_State *J)
static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
{
int32_t numparams = J->pt->numparams;
- ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1;
- lua_assert(frame_isvarg(J->L->base-1));
+ ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2;
+ lj_assertJ(frame_isvarg(J->L->base-1), "VARG in non-vararg frame");
+ if (LJ_FR2 && dst > J->maxslot)
+ J->base[dst-1] = 0; /* Prevent resurrection of unrelated slot. */
if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */
ptrdiff_t i;
if (nvararg < 0) nvararg = 0;
@@ -1523,10 +1946,10 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
J->maxslot = dst + (BCReg)nresults;
}
for (i = 0; i < nresults; i++)
- J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL;
+ J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL;
} else { /* Unknown number of varargs passed to trace. */
- TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME);
- int32_t frofs = 8*(1+numparams)+FRAME_VARG;
+ TRef fr = emitir(IRTI(IR_SLOAD), LJ_FR2, IRSLOAD_READONLY|IRSLOAD_FRAME);
+ int32_t frofs = 8*(1+LJ_FR2+numparams)+FRAME_VARG;
if (nresults >= 0) { /* Known fixed number of results. */
ptrdiff_t i;
if (nvararg > 0) {
@@ -1535,16 +1958,13 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
if (nvararg >= nresults)
emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults));
else
- emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, frame_ftsz(J->L->base-1)));
- vbase = emitir(IRTI(IR_SUB), REF_BASE, fr);
- vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8));
+ emitir(IRTGI(IR_EQ), fr,
+ lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1)));
+ vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
+ vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8*(1+LJ_FR2)));
for (i = 0; i < nload; i++) {
- IRType t = itype2irt(&J->L->base[i-1-nvararg]);
- TRef aref = emitir(IRT(IR_AREF, IRT_P32),
- vbase, lj_ir_kint(J, (int32_t)i));
- TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
- if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
- J->base[dst+i] = tr;
+ IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]);
+ J->base[dst+i] = lj_record_vload(J, vbase, i, t);
}
} else {
emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs));
@@ -1586,15 +2006,15 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
}
if (idx != 0 && idx <= nvararg) {
IRType t;
- TRef aref, vbase = emitir(IRTI(IR_SUB), REF_BASE, fr);
- vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8));
- t = itype2irt(&J->L->base[idx-2-nvararg]);
- aref = emitir(IRT(IR_AREF, IRT_P32), vbase, tridx);
- tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
- if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */
+ TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr);
+ vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase,
+ lj_ir_kint(J, frofs-(8<<LJ_FR2)));
+ t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]);
+ aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx);
+ tr = lj_record_vload(J, aref, 0, t);
}
- J->base[dst-2] = tr;
- J->maxslot = dst-1;
+ J->base[dst-2-LJ_FR2] = tr;
+ J->maxslot = dst-1-LJ_FR2;
J->bcskip = 2; /* Skip CALLM + select. */
} else {
nyivarg:
@@ -1612,8 +2032,63 @@ static TRef rec_tnew(jit_State *J, uint32_t ah)
{
uint32_t asize = ah & 0x7ff;
uint32_t hbits = ah >> 11;
+ TRef tr;
if (asize == 0x7ff) asize = 0x801;
- return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits);
+ tr = emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits);
+#ifdef LUAJIT_ENABLE_TABLE_BUMP
+ J->rbchash[(tr & (RBCHASH_SLOTS-1))].ref = tref_ref(tr);
+ setmref(J->rbchash[(tr & (RBCHASH_SLOTS-1))].pc, J->pc);
+ setgcref(J->rbchash[(tr & (RBCHASH_SLOTS-1))].pt, obj2gco(J->pt));
+#endif
+ return tr;
+}
+
+/* -- Concatenation ------------------------------------------------------- */
+
+static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot)
+{
+ TRef *top = &J->base[topslot];
+ TValue savetv[5+LJ_FR2];
+ BCReg s;
+ RecordIndex ix;
+ lj_assertJ(baseslot < topslot, "bad CAT arg");
+ for (s = baseslot; s <= topslot; s++)
+ (void)getslot(J, s); /* Ensure all arguments have a reference. */
+ if (tref_isnumber_str(top[0]) && tref_isnumber_str(top[-1])) {
+ TRef tr, hdr, *trp, *xbase, *base = &J->base[baseslot];
+ /* First convert numbers to strings. */
+ for (trp = top; trp >= base; trp--) {
+ if (tref_isnumber(*trp))
+ *trp = emitir(IRT(IR_TOSTR, IRT_STR), *trp,
+ tref_isnum(*trp) ? IRTOSTR_NUM : IRTOSTR_INT);
+ else if (!tref_isstr(*trp))
+ break;
+ }
+ xbase = ++trp;
+ tr = hdr = emitir(IRT(IR_BUFHDR, IRT_PGC),
+ lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET);
+ do {
+ tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, *trp++);
+ } while (trp <= top);
+ tr = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr);
+ J->maxslot = (BCReg)(xbase - J->base);
+ if (xbase == base) return tr; /* Return simple concatenation result. */
+ /* Pass partial result. */
+ topslot = J->maxslot--;
+ *xbase = tr;
+ top = xbase;
+ setstrV(J->L, &ix.keyv, &J2G(J)->strempty); /* Simulate string result. */
+ } else {
+ J->maxslot = topslot-1;
+ copyTV(J->L, &ix.keyv, &J->L->base[topslot]);
+ }
+ copyTV(J->L, &ix.tabv, &J->L->base[topslot-1]);
+ ix.tab = top[-1];
+ ix.key = top[0];
+ memcpy(savetv, &J->L->base[topslot-1], sizeof(savetv)); /* Save slots. */
+ rec_mm_arith(J, &ix, MM_concat); /* Call __concat metamethod. */
+ memcpy(&J->L->base[topslot-1], savetv, sizeof(savetv)); /* Restore slots. */
+ return 0; /* No result yet. */
}
/* -- Record bytecode ops ------------------------------------------------- */
@@ -1634,7 +2109,15 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond)
const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0);
SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
/* Set PC to opposite target to avoid re-recording the comp. in side trace. */
+#if LJ_FR2
+ SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent];
+ uint64_t pcbase;
+ memcpy(&pcbase, flink, sizeof(uint64_t));
+ pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8);
+ memcpy(flink, &pcbase, sizeof(uint64_t));
+#else
J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc);
+#endif
J->needsnap = 1;
if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins);
lj_snap_shrink(J); /* Shrink last snapshot if possible. */
@@ -1654,7 +2137,7 @@ void lj_record_ins(jit_State *J)
if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) {
switch (J->postproc) {
case LJ_POST_FIXCOMP: /* Fixup comparison. */
- pc = frame_pc(&J2G(J)->tmptv);
+ pc = (const BCIns *)(uintptr_t)J2G(J)->tmptv.u64;
rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1)));
/* fallthrough */
case LJ_POST_FIXGUARD: /* Fixup and emit pending guard. */
@@ -1692,7 +2175,7 @@ void lj_record_ins(jit_State *J)
if (bc_op(*J->pc) >= BC__MAX)
return;
break;
- default: lua_assert(0); break;
+ default: lj_assertJ(0, "bad post-processing mode"); break;
}
J->postproc = LJ_POST_NONE;
}
@@ -1700,7 +2183,7 @@ void lj_record_ins(jit_State *J)
/* Need snapshot before recording next bytecode (e.g. after a store). */
if (J->needsnap) {
J->needsnap = 0;
- lj_snap_purge(J);
+ if (J->pt) lj_snap_purge(J);
lj_snap_add(J);
J->mergesnap = 1;
}
@@ -1722,6 +2205,10 @@ void lj_record_ins(jit_State *J)
rec_check_ir(J);
#endif
+#if LJ_HASPROFILE
+ rec_profile_ins(J, pc);
+#endif
+
/* Keep a copy of the runtime values of var/num/str operands. */
#define rav (&ix.valv)
#define rbv (&ix.tabv)
@@ -1748,9 +2235,10 @@ void lj_record_ins(jit_State *J)
switch (bcmode_c(op)) {
case BCMvar:
copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break;
- case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break;
+ case BCMpri: setpriV(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break;
case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc);
copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) :
+ tv->u32.hi == LJ_KEYINDEX ? (lj_ir_kint(J, 0) | TREF_KEYINDEX) :
lj_ir_knumint(J, numV(tv)); } break;
case BCMstr: { GCstr *s = gco2str(proto_kgc(J->pt, ~(ptrdiff_t)rc));
setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break;
@@ -1843,6 +2331,18 @@ void lj_record_ins(jit_State *J)
J->maxslot = bc_a(pc[1]); /* Shrink used slots. */
break;
+ case BC_ISTYPE: case BC_ISNUM:
+ /* These coercions need to correspond with lj_meta_istype(). */
+ if (LJ_DUALNUM && rc == ~LJ_TNUMX+1)
+ ra = lj_opt_narrow_toint(J, ra);
+ else if (rc == ~LJ_TNUMX+2)
+ ra = lj_ir_tonum(J, ra);
+ else if (rc == ~LJ_TSTR+1)
+ ra = lj_ir_tostr(J, ra);
+ /* else: type specialization suffices. */
+ J->base[bc_a(ins)] = ra;
+ break;
+
/* -- Unary ops --------------------------------------------------------- */
case BC_NOT:
@@ -1854,7 +2354,7 @@ void lj_record_ins(jit_State *J)
if (tref_isstr(rc))
rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN);
else if (!LJ_52 && tref_istab(rc))
- rc = lj_ir_call(J, IRCALL_lj_tab_len, rc);
+ rc = emitir(IRTI(IR_ALEN), rc, TREF_NIL);
else
rc = rec_mm_len(J, rc, rcv);
break;
@@ -1901,16 +2401,28 @@ void lj_record_ins(jit_State *J)
case BC_POW:
if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
- rc = lj_opt_narrow_pow(J, rb, rc, rbv, rcv);
+ rc = lj_opt_narrow_arith(J, rb, rc, rbv, rcv, IR_POW);
else
rc = rec_mm_arith(J, &ix, MM_pow);
break;
+ /* -- Miscellaneous ops ------------------------------------------------- */
+
+ case BC_CAT:
+ rc = rec_cat(J, rb, rc);
+ break;
+
/* -- Constant and move ops --------------------------------------------- */
case BC_MOV:
/* Clear gap of method call to avoid resurrecting previous refs. */
- if (ra > J->maxslot) J->base[ra-1] = 0;
+ if (ra > J->maxslot) {
+#if LJ_FR2
+ memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef));
+#else
+ J->base[ra-1] = 0;
+#endif
+ }
break;
case BC_KSTR: case BC_KNUM: case BC_KPRI:
break;
@@ -1918,6 +2430,8 @@ void lj_record_ins(jit_State *J)
rc = lj_ir_kint(J, (int32_t)(int16_t)rc);
break;
case BC_KNIL:
+ if (LJ_FR2 && ra > J->maxslot)
+ J->base[ra-1] = 0;
while (ra <= rc)
J->base[ra++] = TREF_NIL;
if (rc >= J->maxslot) J->maxslot = rc+1;
@@ -1954,6 +2468,14 @@ void lj_record_ins(jit_State *J)
ix.idxchain = LJ_MAX_IDXCHAIN;
rc = lj_record_idx(J, &ix);
break;
+ case BC_TGETR: case BC_TSETR:
+ ix.idxchain = 0;
+ rc = lj_record_idx(J, &ix);
+ break;
+
+ case BC_TSETM:
+ rec_tsetm(J, ra, (BCReg)(J->L->top - J->L->base), (int32_t)rcv->u32.lo);
+ break;
case BC_TNEW:
rc = rec_tnew(J, rc);
@@ -1961,33 +2483,38 @@ void lj_record_ins(jit_State *J)
case BC_TDUP:
rc = emitir(IRTG(IR_TDUP, IRT_TAB),
lj_ir_ktab(J, gco2tab(proto_kgc(J->pt, ~(ptrdiff_t)rc))), 0);
+#ifdef LUAJIT_ENABLE_TABLE_BUMP
+ J->rbchash[(rc & (RBCHASH_SLOTS-1))].ref = tref_ref(rc);
+ setmref(J->rbchash[(rc & (RBCHASH_SLOTS-1))].pc, pc);
+ setgcref(J->rbchash[(rc & (RBCHASH_SLOTS-1))].pt, obj2gco(J->pt));
+#endif
break;
/* -- Calls and vararg handling ----------------------------------------- */
case BC_ITERC:
J->base[ra] = getslot(J, ra-3);
- J->base[ra+1] = getslot(J, ra-2);
- J->base[ra+2] = getslot(J, ra-1);
+ J->base[ra+1+LJ_FR2] = getslot(J, ra-2);
+ J->base[ra+2+LJ_FR2] = getslot(J, ra-1);
{ /* Do the actual copy now because lj_record_call needs the values. */
TValue *b = &J->L->base[ra];
copyTV(J->L, b, b-3);
- copyTV(J->L, b+1, b-2);
- copyTV(J->L, b+2, b-1);
+ copyTV(J->L, b+1+LJ_FR2, b-2);
+ copyTV(J->L, b+2+LJ_FR2, b-1);
}
lj_record_call(J, ra, (ptrdiff_t)rc-1);
break;
/* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */
case BC_CALLM:
- rc = (BCReg)(J->L->top - J->L->base) - ra;
+ rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2;
/* fallthrough */
case BC_CALL:
lj_record_call(J, ra, (ptrdiff_t)rc-1);
break;
case BC_CALLMT:
- rc = (BCReg)(J->L->top - J->L->base) - ra;
+ rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2;
/* fallthrough */
case BC_CALLT:
lj_record_tailcall(J, ra, (ptrdiff_t)rc-1);
@@ -2004,6 +2531,9 @@ void lj_record_ins(jit_State *J)
rc = (BCReg)(J->L->top - J->L->base) - ra + 1;
/* fallthrough */
case BC_RET: case BC_RET0: case BC_RET1:
+#if LJ_HASPROFILE
+ rec_profile_ret(J);
+#endif
lj_record_ret(J, ra, (ptrdiff_t)rc-1);
break;
@@ -2014,9 +2544,10 @@ void lj_record_ins(jit_State *J)
J->loopref = J->cur.nins;
break;
case BC_JFORI:
- lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL);
+ lj_assertJ(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL,
+ "JFORI does not point to JFORL");
if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */
- rec_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J]));
+ lj_record_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J]));
/* Continue tracing if the loop is not entered. */
break;
@@ -2026,6 +2557,9 @@ void lj_record_ins(jit_State *J)
case BC_ITERL:
rec_loop_interp(J, pc, rec_iterl(J, *pc));
break;
+ case BC_ITERN:
+ rec_loop_interp(J, pc, rec_itern(J, ra, rb));
+ break;
case BC_LOOP:
rec_loop_interp(J, pc, rec_loop(J, ra, 1));
break;
@@ -2054,6 +2588,10 @@ void lj_record_ins(jit_State *J)
J->maxslot = ra; /* Shrink used slots. */
break;
+ case BC_ISNEXT:
+ rec_isnext(J, ra);
+ break;
+
/* -- Function headers -------------------------------------------------- */
case BC_FUNCF:
@@ -2068,7 +2606,8 @@ void lj_record_ins(jit_State *J)
rec_func_lua(J);
break;
case BC_JFUNCV:
- lua_assert(0); /* Cannot happen. No hotcall counting for varag funcs. */
+ /* Cannot happen. No hotcall counting for varag funcs. */
+ lj_assertJ(0, "unsupported vararg hotcall");
break;
case BC_FUNCC:
@@ -2082,12 +2621,8 @@ void lj_record_ins(jit_State *J)
break;
}
/* fallthrough */
- case BC_ITERN:
- case BC_ISNEXT:
- case BC_CAT:
case BC_UCLO:
case BC_FNEW:
- case BC_TSETM:
setintV(&J->errinfo, (int32_t)op);
lj_trace_err_info(J, LJ_TRERR_NYIBC);
break;
@@ -2096,15 +2631,21 @@ void lj_record_ins(jit_State *J)
/* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */
if (bcmode_a(op) == BCMdst && rc) {
J->base[ra] = rc;
- if (ra >= J->maxslot) J->maxslot = ra+1;
+ if (ra >= J->maxslot) {
+#if LJ_FR2
+ if (ra > J->maxslot) J->base[ra-1] = 0;
+#endif
+ J->maxslot = ra+1;
+ }
}
#undef rav
#undef rbv
#undef rcv
- /* Limit the number of recorded IR instructions. */
- if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord])
+ /* Limit the number of recorded IR instructions and constants. */
+ if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord] ||
+ J->cur.nk < REF_BIAS-(IRRef)J->param[JIT_P_maxirconst])
lj_trace_err(J, LJ_TRERR_TRACEOV);
}
@@ -2124,13 +2665,22 @@ static const BCIns *rec_setup_root(jit_State *J)
J->bc_min = pc;
break;
case BC_ITERL:
- lua_assert(bc_op(pc[-1]) == BC_ITERC);
+ if (bc_op(pc[-1]) == BC_JLOOP)
+ lj_trace_err(J, LJ_TRERR_LINNER);
+ lj_assertJ(bc_op(pc[-1]) == BC_ITERC, "no ITERC before ITERL");
J->maxslot = ra + bc_b(pc[-1]) - 1;
J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns);
pc += 1+bc_j(ins);
- lua_assert(bc_op(pc[-1]) == BC_JMP);
+ lj_assertJ(bc_op(pc[-1]) == BC_JMP, "ITERL does not point to JMP+1");
J->bc_min = pc;
break;
+ case BC_ITERN:
+ lj_assertJ(bc_op(pc[1]) == BC_ITERL, "no ITERL after ITERN");
+ J->maxslot = ra;
+ J->bc_extent = (MSize)(-bc_j(pc[1]))*sizeof(BCIns);
+ J->bc_min = pc+2 + bc_j(pc[1]);
+ J->state = LJ_TRACE_RECORD_1ST; /* Record the first ITERN, too. */
+ break;
case BC_LOOP:
/* Only check BC range for real loops, but not for "repeat until true". */
pcj = pc + bc_j(ins);
@@ -2153,8 +2703,14 @@ static const BCIns *rec_setup_root(jit_State *J)
J->maxslot = J->pt->numparams;
pc++;
break;
+ case BC_CALLM:
+ case BC_CALL:
+ case BC_ITERC:
+ /* No bytecode range check for stitched traces. */
+ pc++;
+ break;
default:
- lua_assert(0);
+ lj_assertJ(0, "bad root trace start bytecode %d", bc_op(ins));
break;
}
return pc;
@@ -2168,11 +2724,14 @@ void lj_record_setup(jit_State *J)
/* Initialize state related to current trace. */
memset(J->slot, 0, sizeof(J->slot));
memset(J->chain, 0, sizeof(J->chain));
+#ifdef LUAJIT_ENABLE_TABLE_BUMP
+ memset(J->rbchash, 0, sizeof(J->rbchash));
+#endif
memset(J->bpropcache, 0, sizeof(J->bpropcache));
J->scev.idx = REF_NIL;
setmref(J->scev.pc, NULL);
- J->baseslot = 1; /* Invoking function is at base[-1]. */
+ J->baseslot = 1+LJ_FR2; /* Invoking function is at base[-1-LJ_FR2]. */
J->base = J->slot + J->baseslot;
J->maxslot = 0;
J->framedepth = 0;
@@ -2187,7 +2746,7 @@ void lj_record_setup(jit_State *J)
J->bc_extent = ~(MSize)0;
/* Emit instructions for fixed references. Also triggers initial IR alloc. */
- emitir_raw(IRT(IR_BASE, IRT_P32), J->parent, J->exitno);
+ emitir_raw(IRT(IR_BASE, IRT_PGC), J->parent, J->exitno);
for (i = 0; i <= 2; i++) {
IRIns *ir = IR(REF_NIL-i);
ir->i = 0;
@@ -2218,10 +2777,15 @@ void lj_record_setup(jit_State *J)
}
lj_snap_replay(J, T);
sidecheck:
- if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] ||
- T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
- J->param[JIT_P_tryside]) {
- rec_stop(J, LJ_TRLINK_INTERP, 0);
+ if ((traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] ||
+ T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
+ J->param[JIT_P_tryside])) {
+ if (bc_op(*J->pc) == BC_JLOOP) {
+ BCIns startins = traceref(J, bc_d(*J->pc))->startins;
+ if (bc_op(startins) == BC_ITERN)
+ rec_itern(J, bc_a(startins), bc_b(startins));
+ }
+ lj_record_stop(J, LJ_TRLINK_INTERP, 0);
}
} else { /* Root trace. */
J->cur.root = 0;
@@ -2229,13 +2793,20 @@ void lj_record_setup(jit_State *J)
J->pc = rec_setup_root(J);
/* Note: the loop instruction itself is recorded at the end and not
** at the start! So snapshot #0 needs to point to the *next* instruction.
+ ** The one exception is BC_ITERN, which sets LJ_TRACE_RECORD_1ST.
*/
lj_snap_add(J);
if (bc_op(J->cur.startins) == BC_FORL)
rec_for_loop(J, J->pc-1, &J->scev, 1);
+ else if (bc_op(J->cur.startins) == BC_ITERC)
+ J->startpc = NULL;
if (1 + J->pt->framesize >= LJ_MAX_JSLOTS)
lj_trace_err(J, LJ_TRERR_STACKOV);
}
+#if LJ_HASPROFILE
+ J->prev_pt = NULL;
+ J->prev_line = -1;
+#endif
#ifdef LUAJIT_ENABLE_CHECKHOOK
/* Regularly check for instruction/line hooks from compiled code and
** exit to the interpreter if the hooks are set.
diff --git a/src/lj_record.h b/src/lj_record.h
index 4b180fc2..ab2f4c8d 100644
--- a/src/lj_record.h
+++ b/src/lj_record.h
@@ -28,7 +28,9 @@ typedef struct RecordIndex {
LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b,
cTValue *av, cTValue *bv);
+LJ_FUNC void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk);
LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o);
+LJ_FUNC TRef lj_record_vload(jit_State *J, TRef ref, MSize idx, IRType t);
LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs);
LJ_FUNC void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs);
@@ -36,6 +38,7 @@ LJ_FUNC void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults);
LJ_FUNC int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm);
LJ_FUNC TRef lj_record_idx(jit_State *J, RecordIndex *ix);
+LJ_FUNC int lj_record_next(jit_State *J, RecordIndex *ix);
LJ_FUNC void lj_record_ins(jit_State *J);
LJ_FUNC void lj_record_setup(jit_State *J);
diff --git a/src/lj_serialize.c b/src/lj_serialize.c
new file mode 100644
index 00000000..f7e51828
--- /dev/null
+++ b/src/lj_serialize.c
@@ -0,0 +1,539 @@
+/*
+** Object de/serialization.
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#define lj_serialize_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+
+#if LJ_HASBUFFER
+#include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_str.h"
+#include "lj_tab.h"
+#include "lj_udata.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#include "lj_cdata.h"
+#endif
+#if LJ_HASJIT
+#include "lj_ir.h"
+#endif
+#include "lj_serialize.h"
+
+/* Tags for internal serialization format. */
+enum {
+ SER_TAG_NIL, /* 0x00 */
+ SER_TAG_FALSE,
+ SER_TAG_TRUE,
+ SER_TAG_NULL,
+ SER_TAG_LIGHTUD32,
+ SER_TAG_LIGHTUD64,
+ SER_TAG_INT,
+ SER_TAG_NUM,
+ SER_TAG_TAB, /* 0x08 */
+ SER_TAG_DICT_MT = SER_TAG_TAB+6,
+ SER_TAG_DICT_STR,
+ SER_TAG_INT64, /* 0x10 */
+ SER_TAG_UINT64,
+ SER_TAG_COMPLEX,
+ SER_TAG_0x13,
+ SER_TAG_0x14,
+ SER_TAG_0x15,
+ SER_TAG_0x16,
+ SER_TAG_0x17,
+ SER_TAG_0x18, /* 0x18 */
+ SER_TAG_0x19,
+ SER_TAG_0x1a,
+ SER_TAG_0x1b,
+ SER_TAG_0x1c,
+ SER_TAG_0x1d,
+ SER_TAG_0x1e,
+ SER_TAG_0x1f,
+ SER_TAG_STR, /* 0x20 + str->len */
+};
+LJ_STATIC_ASSERT((SER_TAG_TAB & 7) == 0);
+
+/* -- Helper functions ---------------------------------------------------- */
+
+static LJ_AINLINE char *serialize_more(char *w, SBufExt *sbx, MSize sz)
+{
+ if (LJ_UNLIKELY(sz > (MSize)(sbx->e - w))) {
+ sbx->w = w;
+ w = lj_buf_more2((SBuf *)sbx, sz);
+ }
+ return w;
+}
+
+/* Write U124 to buffer. */
+static LJ_NOINLINE char *serialize_wu124_(char *w, uint32_t v)
+{
+ if (v < 0x1fe0) {
+ v -= 0xe0;
+ *w++ = (char)(0xe0 | (v >> 8)); *w++ = (char)v;
+ } else {
+ *w++ = (char)0xff;
+#if LJ_BE
+ v = lj_bswap(v);
+#endif
+ memcpy(w, &v, 4); w += 4;
+ }
+ return w;
+}
+
+static LJ_AINLINE char *serialize_wu124(char *w, uint32_t v)
+{
+ if (LJ_LIKELY(v < 0xe0)) {
+ *w++ = (char)v;
+ return w;
+ } else {
+ return serialize_wu124_(w, v);
+ }
+}
+
+static LJ_NOINLINE char *serialize_ru124_(char *r, char *w, uint32_t *pv)
+{
+ uint32_t v = *pv;
+ if (v != 0xff) {
+ if (r >= w) return NULL;
+ v = ((v & 0x1f) << 8) + *(uint8_t *)r + 0xe0; r++;
+ } else {
+ if (r + 4 > w) return NULL;
+ v = lj_getu32(r); r += 4;
+#if LJ_BE
+ v = lj_bswap(v);
+#endif
+ }
+ *pv = v;
+ return r;
+}
+
+static LJ_AINLINE char *serialize_ru124(char *r, char *w, uint32_t *pv)
+{
+ if (LJ_LIKELY(r < w)) {
+ uint32_t v = *(uint8_t *)r; r++;
+ *pv = v;
+ if (LJ_UNLIKELY(v >= 0xe0)) {
+ r = serialize_ru124_(r, w, pv);
+ }
+ return r;
+ }
+ return NULL;
+}
+
+/* Prepare string dictionary for use (once). */
+void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict)
+{
+ if (!dict->hmask) { /* No hash part means not prepared, yet. */
+ MSize i, len = lj_tab_len(dict);
+ if (!len) return;
+ lj_tab_resize(L, dict, dict->asize, hsize2hbits(len));
+ for (i = 1; i <= len && i < dict->asize; i++) {
+ cTValue *o = arrayslot(dict, i);
+ if (tvisstr(o)) {
+ if (!lj_tab_getstr(dict, strV(o))) { /* Ignore dups. */
+ lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1);
+ }
+ } else if (!tvisfalse(o)) {
+ lj_err_caller(L, LJ_ERR_BUFFER_BADOPT);
+ }
+ }
+ }
+}
+
+/* Prepare metatable dictionary for use (once). */
+void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict)
+{
+ if (!dict->hmask) { /* No hash part means not prepared, yet. */
+ MSize i, len = lj_tab_len(dict);
+ if (!len) return;
+ lj_tab_resize(L, dict, dict->asize, hsize2hbits(len));
+ for (i = 1; i <= len && i < dict->asize; i++) {
+ cTValue *o = arrayslot(dict, i);
+ if (tvistab(o)) {
+ if (tvisnil(lj_tab_get(L, dict, o))) { /* Ignore dups. */
+ lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1);
+ }
+ } else if (!tvisfalse(o)) {
+ lj_err_caller(L, LJ_ERR_BUFFER_BADOPT);
+ }
+ }
+ }
+}
+
+/* -- Internal serializer ------------------------------------------------- */
+
+/* Put serialized object into buffer. */
+static char *serialize_put(char *w, SBufExt *sbx, cTValue *o)
+{
+ if (LJ_LIKELY(tvisstr(o))) {
+ const GCstr *str = strV(o);
+ MSize len = str->len;
+ w = serialize_more(w, sbx, 5+len);
+ w = serialize_wu124(w, SER_TAG_STR + len);
+ w = lj_buf_wmem(w, strdata(str), len);
+ } else if (tvisint(o)) {
+ uint32_t x = LJ_BE ? lj_bswap((uint32_t)intV(o)) : (uint32_t)intV(o);
+ w = serialize_more(w, sbx, 1+4);
+ *w++ = SER_TAG_INT; memcpy(w, &x, 4); w += 4;
+ } else if (tvisnum(o)) {
+ uint64_t x = LJ_BE ? lj_bswap64(o->u64) : o->u64;
+ w = serialize_more(w, sbx, 1+sizeof(lua_Number));
+ *w++ = SER_TAG_NUM; memcpy(w, &x, 8); w += 8;
+ } else if (tvispri(o)) {
+ w = serialize_more(w, sbx, 1);
+ *w++ = (char)(SER_TAG_NIL + ~itype(o));
+ } else if (tvistab(o)) {
+ const GCtab *t = tabV(o);
+ uint32_t narray = 0, nhash = 0, one = 2;
+ if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH);
+ sbx->depth--;
+ if (t->asize > 0) { /* Determine max. length of array part. */
+ ptrdiff_t i;
+ TValue *array = tvref(t->array);
+ for (i = (ptrdiff_t)t->asize-1; i >= 0; i--)
+ if (!tvisnil(&array[i]))
+ break;
+ narray = (uint32_t)(i+1);
+ if (narray && tvisnil(&array[0])) one = 4;
+ }
+ if (t->hmask > 0) { /* Count number of used hash slots. */
+ uint32_t i, hmask = t->hmask;
+ Node *node = noderef(t->node);
+ for (i = 0; i <= hmask; i++)
+ nhash += !tvisnil(&node[i].val);
+ }
+ /* Write metatable index. */
+ if (LJ_UNLIKELY(tabref(sbx->dict_mt)) && tabref(t->metatable)) {
+ TValue mto;
+ Node *n;
+ settabV(sbufL(sbx), &mto, tabref(t->metatable));
+ n = hashgcref(tabref(sbx->dict_mt), mto.gcr);
+ do {
+ if (n->key.u64 == mto.u64) {
+ uint32_t idx = n->val.u32.lo;
+ w = serialize_more(w, sbx, 1+5);
+ *w++ = SER_TAG_DICT_MT;
+ w = serialize_wu124(w, idx);
+ break;
+ }
+ } while ((n = nextnode(n)));
+ }
+ /* Write number of array slots and hash slots. */
+ w = serialize_more(w, sbx, 1+2*5);
+ *w++ = (char)(SER_TAG_TAB + (nhash ? 1 : 0) + (narray ? one : 0));
+ if (narray) w = serialize_wu124(w, narray);
+ if (nhash) w = serialize_wu124(w, nhash);
+ if (narray) { /* Write array entries. */
+ cTValue *oa = tvref(t->array) + (one >> 2);
+ cTValue *oe = tvref(t->array) + narray;
+ while (oa < oe) w = serialize_put(w, sbx, oa++);
+ }
+ if (nhash) { /* Write hash entries. */
+ const Node *node = noderef(t->node) + t->hmask;
+ GCtab *dict_str = tabref(sbx->dict_str);
+ if (LJ_UNLIKELY(dict_str)) {
+ for (;; node--)
+ if (!tvisnil(&node->val)) {
+ if (LJ_LIKELY(tvisstr(&node->key))) {
+ /* Inlined lj_tab_getstr is 30% faster. */
+ const GCstr *str = strV(&node->key);
+ Node *n = hashstr(dict_str, str);
+ do {
+ if (tvisstr(&n->key) && strV(&n->key) == str) {
+ uint32_t idx = n->val.u32.lo;
+ w = serialize_more(w, sbx, 1+5);
+ *w++ = SER_TAG_DICT_STR;
+ w = serialize_wu124(w, idx);
+ break;
+ }
+ n = nextnode(n);
+ if (!n) {
+ MSize len = str->len;
+ w = serialize_more(w, sbx, 5+len);
+ w = serialize_wu124(w, SER_TAG_STR + len);
+ w = lj_buf_wmem(w, strdata(str), len);
+ break;
+ }
+ } while (1);
+ } else {
+ w = serialize_put(w, sbx, &node->key);
+ }
+ w = serialize_put(w, sbx, &node->val);
+ if (--nhash == 0) break;
+ }
+ } else {
+ for (;; node--)
+ if (!tvisnil(&node->val)) {
+ w = serialize_put(w, sbx, &node->key);
+ w = serialize_put(w, sbx, &node->val);
+ if (--nhash == 0) break;
+ }
+ }
+ }
+ sbx->depth++;
+#if LJ_HASFFI
+ } else if (tviscdata(o)) {
+ CTState *cts = ctype_cts(sbufL(sbx));
+ CType *s = ctype_raw(cts, cdataV(o)->ctypeid);
+ uint8_t *sp = cdataptr(cdataV(o));
+ if (ctype_isinteger(s->info) && s->size == 8) {
+ w = serialize_more(w, sbx, 1+8);
+ *w++ = (s->info & CTF_UNSIGNED) ? SER_TAG_UINT64 : SER_TAG_INT64;
+#if LJ_BE
+ { uint64_t u = lj_bswap64(*(uint64_t *)sp); memcpy(w, &u, 8); }
+#else
+ memcpy(w, sp, 8);
+#endif
+ w += 8;
+ } else if (ctype_iscomplex(s->info) && s->size == 16) {
+ w = serialize_more(w, sbx, 1+16);
+ *w++ = SER_TAG_COMPLEX;
+#if LJ_BE
+ { /* Only swap the doubles. The re/im order stays the same. */
+ uint64_t u = lj_bswap64(((uint64_t *)sp)[0]); memcpy(w, &u, 8);
+ u = lj_bswap64(((uint64_t *)sp)[1]); memcpy(w+8, &u, 8);
+ }
+#else
+ memcpy(w, sp, 16);
+#endif
+ w += 16;
+ } else {
+ goto badenc; /* NYI other cdata */
+ }
+#endif
+ } else if (tvislightud(o)) {
+ uintptr_t ud = (uintptr_t)lightudV(G(sbufL(sbx)), o);
+ w = serialize_more(w, sbx, 1+sizeof(ud));
+ if (ud == 0) {
+ *w++ = SER_TAG_NULL;
+ } else if (LJ_32 || checku32(ud)) {
+#if LJ_BE && LJ_64
+ ud = lj_bswap64(ud);
+#elif LJ_BE
+ ud = lj_bswap(ud);
+#endif
+ *w++ = SER_TAG_LIGHTUD32; memcpy(w, &ud, 4); w += 4;
+#if LJ_64
+ } else {
+#if LJ_BE
+ ud = lj_bswap64(ud);
+#endif
+ *w++ = SER_TAG_LIGHTUD64; memcpy(w, &ud, 8); w += 8;
+#endif
+ }
+ } else {
+ /* NYI userdata */
+#if LJ_HASFFI
+ badenc:
+#endif
+ lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADENC, lj_typename(o));
+ }
+ return w;
+}
+
+/* Get serialized object from buffer. */
+static char *serialize_get(char *r, SBufExt *sbx, TValue *o)
+{
+ char *w = sbx->w;
+ uint32_t tp;
+ r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob;
+ if (LJ_LIKELY(tp >= SER_TAG_STR)) {
+ uint32_t len = tp - SER_TAG_STR;
+ if (LJ_UNLIKELY(len > (uint32_t)(w - r))) goto eob;
+ setstrV(sbufL(sbx), o, lj_str_new(sbufL(sbx), r, len));
+ r += len;
+ } else if (tp == SER_TAG_INT) {
+ if (LJ_UNLIKELY(r + 4 > w)) goto eob;
+ setintV(o, (int32_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r)));
+ r += 4;
+ } else if (tp == SER_TAG_NUM) {
+ if (LJ_UNLIKELY(r + 8 > w)) goto eob;
+ memcpy(o, r, 8); r += 8;
+#if LJ_BE
+ o->u64 = lj_bswap64(o->u64);
+#endif
+ if (!tvisnum(o)) setnanV(o); /* Fix non-canonical NaNs. */
+ } else if (tp <= SER_TAG_TRUE) {
+ setpriV(o, ~tp);
+ } else if (tp == SER_TAG_DICT_STR) {
+ GCtab *dict_str;
+ uint32_t idx;
+ r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob;
+ idx++;
+ dict_str = tabref(sbx->dict_str);
+ if (dict_str && idx < dict_str->asize && tvisstr(arrayslot(dict_str, idx)))
+ copyTV(sbufL(sbx), o, arrayslot(dict_str, idx));
+ else
+ lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
+ } else if (tp >= SER_TAG_TAB && tp <= SER_TAG_DICT_MT) {
+ uint32_t narray = 0, nhash = 0;
+ GCtab *t, *mt = NULL;
+ if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH);
+ sbx->depth--;
+ if (tp == SER_TAG_DICT_MT) {
+ GCtab *dict_mt;
+ uint32_t idx;
+ r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob;
+ idx++;
+ dict_mt = tabref(sbx->dict_mt);
+ if (dict_mt && idx < dict_mt->asize && tvistab(arrayslot(dict_mt, idx)))
+ mt = tabV(arrayslot(dict_mt, idx));
+ else
+ lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
+ r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob;
+ if (!(tp >= SER_TAG_TAB && tp < SER_TAG_DICT_MT)) goto badtag;
+ }
+ if (tp >= SER_TAG_TAB+2) {
+ r = serialize_ru124(r, w, &narray); if (LJ_UNLIKELY(!r)) goto eob;
+ }
+ if ((tp & 1)) {
+ r = serialize_ru124(r, w, &nhash); if (LJ_UNLIKELY(!r)) goto eob;
+ }
+ t = lj_tab_new(sbufL(sbx), narray, hsize2hbits(nhash));
+ /* NOBARRIER: The table is new (marked white). */
+ setgcref(t->metatable, obj2gco(mt));
+ settabV(sbufL(sbx), o, t);
+ if (narray) {
+ TValue *oa = tvref(t->array) + (tp >= SER_TAG_TAB+4);
+ TValue *oe = tvref(t->array) + narray;
+ while (oa < oe) r = serialize_get(r, sbx, oa++);
+ }
+ if (nhash) {
+ do {
+ TValue k, *v;
+ r = serialize_get(r, sbx, &k);
+ v = lj_tab_set(sbufL(sbx), t, &k);
+ if (LJ_UNLIKELY(!tvisnil(v)))
+ lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DUPKEY);
+ r = serialize_get(r, sbx, v);
+ } while (--nhash);
+ }
+ sbx->depth++;
+#if LJ_HASFFI
+ } else if (tp >= SER_TAG_INT64 && tp <= SER_TAG_COMPLEX) {
+ uint32_t sz = tp == SER_TAG_COMPLEX ? 16 : 8;
+ GCcdata *cd;
+ if (LJ_UNLIKELY(r + sz > w)) goto eob;
+ if (LJ_UNLIKELY(!ctype_ctsG(G(sbufL(sbx))))) goto badtag;
+ cd = lj_cdata_new_(sbufL(sbx),
+ tp == SER_TAG_INT64 ? CTID_INT64 :
+ tp == SER_TAG_UINT64 ? CTID_UINT64 : CTID_COMPLEX_DOUBLE,
+ sz);
+ memcpy(cdataptr(cd), r, sz); r += sz;
+#if LJ_BE
+ *(uint64_t *)cdataptr(cd) = lj_bswap64(*(uint64_t *)cdataptr(cd));
+ if (sz == 16)
+ ((uint64_t *)cdataptr(cd))[1] = lj_bswap64(((uint64_t *)cdataptr(cd))[1]);
+#endif
+ if (sz == 16) { /* Fix non-canonical NaNs. */
+ TValue *cdo = (TValue *)cdataptr(cd);
+ if (!tvisnum(&cdo[0])) setnanV(&cdo[0]);
+ if (!tvisnum(&cdo[1])) setnanV(&cdo[1]);
+ }
+ setcdataV(sbufL(sbx), o, cd);
+#endif
+ } else if (tp <= (LJ_64 ? SER_TAG_LIGHTUD64 : SER_TAG_LIGHTUD32)) {
+ uintptr_t ud = 0;
+ if (tp == SER_TAG_LIGHTUD32) {
+ if (LJ_UNLIKELY(r + 4 > w)) goto eob;
+ ud = (uintptr_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r));
+ r += 4;
+ }
+#if LJ_64
+ else if (tp == SER_TAG_LIGHTUD64) {
+ if (LJ_UNLIKELY(r + 8 > w)) goto eob;
+ memcpy(&ud, r, 8); r += 8;
+#if LJ_BE
+ ud = lj_bswap64(ud);
+#endif
+ }
+ setrawlightudV(o, lj_lightud_intern(sbufL(sbx), (void *)ud));
+#else
+ setrawlightudV(o, (void *)ud);
+#endif
+ } else {
+badtag:
+ lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDEC, tp);
+ }
+ return r;
+eob:
+ lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_EOB);
+ return NULL;
+}
+
+/* -- External serialization API ------------------------------------------ */
+
+/* Encode to buffer. */
+SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o)
+{
+ sbx->depth = LJ_SERIALIZE_DEPTH;
+ sbx->w = serialize_put(sbx->w, sbx, o);
+ return sbx;
+}
+
+/* Decode from buffer. */
+char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o)
+{
+ sbx->depth = LJ_SERIALIZE_DEPTH;
+ return serialize_get(sbx->r, sbx, o);
+}
+
+/* Stand-alone encoding, borrowing from global temporary buffer. */
+GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o)
+{
+ SBufExt sbx;
+ char *w;
+ memset(&sbx, 0, sizeof(SBufExt));
+ lj_bufx_set_borrow(L, &sbx, &G(L)->tmpbuf);
+ sbx.depth = LJ_SERIALIZE_DEPTH;
+ w = serialize_put(sbx.w, &sbx, o);
+ return lj_str_new(L, sbx.b, (size_t)(w - sbx.b));
+}
+
+/* Stand-alone decoding, copy-on-write from string. */
+void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str)
+{
+ SBufExt sbx;
+ char *r;
+ memset(&sbx, 0, sizeof(SBufExt));
+ lj_bufx_set_cow(L, &sbx, strdata(str), str->len);
+ /* No need to set sbx.cowref here. */
+ sbx.depth = LJ_SERIALIZE_DEPTH;
+ r = serialize_get(sbx.r, &sbx, o);
+ if (r != sbx.w) lj_err_caller(L, LJ_ERR_BUFFER_LEFTOV);
+}
+
+#if LJ_HASJIT
+/* Peek into buffer to find the result IRType for specialization purposes. */
+LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx)
+{
+ uint32_t tp;
+ if (serialize_ru124(sbx->r, sbx->w, &tp)) {
+ /* This must match the handling of all tags in the decoder above. */
+ switch (tp) {
+ case SER_TAG_NIL: return IRT_NIL;
+ case SER_TAG_FALSE: return IRT_FALSE;
+ case SER_TAG_TRUE: return IRT_TRUE;
+ case SER_TAG_NULL: case SER_TAG_LIGHTUD32: case SER_TAG_LIGHTUD64:
+ return IRT_LIGHTUD;
+ case SER_TAG_INT: return LJ_DUALNUM ? IRT_INT : IRT_NUM;
+ case SER_TAG_NUM: return IRT_NUM;
+ case SER_TAG_TAB: case SER_TAG_TAB+1: case SER_TAG_TAB+2:
+ case SER_TAG_TAB+3: case SER_TAG_TAB+4: case SER_TAG_TAB+5:
+ case SER_TAG_DICT_MT:
+ return IRT_TAB;
+ case SER_TAG_INT64: case SER_TAG_UINT64: case SER_TAG_COMPLEX:
+ return IRT_CDATA;
+ case SER_TAG_DICT_STR:
+ default:
+ return IRT_STR;
+ }
+ }
+ return IRT_NIL; /* Will fail on actual decode. */
+}
+#endif
+
+#endif
diff --git a/src/lj_serialize.h b/src/lj_serialize.h
new file mode 100644
index 00000000..d3f4275a
--- /dev/null
+++ b/src/lj_serialize.h
@@ -0,0 +1,28 @@
+/*
+** Object de/serialization.
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_SERIALIZE_H
+#define _LJ_SERIALIZE_H
+
+#include "lj_obj.h"
+#include "lj_buf.h"
+
+#if LJ_HASBUFFER
+
+#define LJ_SERIALIZE_DEPTH 100 /* Default depth. */
+
+LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict);
+LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict);
+LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o);
+LJ_FUNC char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o);
+LJ_FUNC GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o);
+LJ_FUNC void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str);
+#if LJ_HASJIT
+LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx);
+#endif
+
+#endif
+
+#endif
diff --git a/src/lj_snap.c b/src/lj_snap.c
index e2da4b3e..4140fdb7 100644
--- a/src/lj_snap.c
+++ b/src/lj_snap.c
@@ -68,20 +68,37 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
for (s = 0; s < nslots; s++) {
TRef tr = J->slot[s];
IRRef ref = tref_ref(tr);
+#if LJ_FR2
+ if (s == 1) { /* Ignore slot 1 in LJ_FR2 mode, except if tailcalled. */
+ if ((tr & TREF_FRAME))
+ map[n++] = SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL);
+ continue;
+ }
+ if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) {
+ cTValue *base = J->L->base - J->baseslot;
+ tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64);
+ ref = tref_ref(tr);
+ }
+#endif
if (ref) {
SnapEntry sn = SNAP_TR(s, tr);
IRIns *ir = &J->cur.ir[ref];
- if (!(sn & (SNAP_CONT|SNAP_FRAME)) &&
+ if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) &&
ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
- /* No need to snapshot unmodified non-inherited slots. */
- if (!(ir->op2 & IRSLOAD_INHERIT))
+ /*
+ ** No need to snapshot unmodified non-inherited slots.
+ ** But always snapshot the function below a frame in LJ_FR2 mode.
+ */
+ if (!(ir->op2 & IRSLOAD_INHERIT) &&
+ (!LJ_FR2 || s == 0 || s+1 == nslots ||
+ !(J->slot[s+1] & (TREF_CONT|TREF_FRAME))))
continue;
/* No need to restore readonly slots and unmodified non-parent slots. */
if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
(ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
sn |= SNAP_NORESTORE;
}
- if (LJ_SOFTFP && irt_isnum(ir->t))
+ if (LJ_SOFTFP32 && irt_isnum(ir->t))
sn |= SNAP_SOFTFPNUM;
map[n++] = sn;
}
@@ -90,35 +107,54 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
}
/* Add frame links at the end of the snapshot. */
-static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map)
+static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot)
{
cTValue *frame = J->L->base - 1;
- cTValue *lim = J->L->base - J->baseslot;
- cTValue *ftop = frame + funcproto(frame_func(frame))->framesize;
+ cTValue *lim = J->L->base - J->baseslot + LJ_FR2;
+ GCfunc *fn = frame_func(frame);
+ cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
+#if LJ_FR2
+ uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2);
+ lj_assertJ(2 <= J->baseslot && J->baseslot <= 257, "bad baseslot");
+ memcpy(map, &pcbase, sizeof(uint64_t));
+#else
MSize f = 0;
map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
- lua_assert(!J->pt ||
+#endif
+ lj_assertJ(!J->pt ||
(J->pc >= proto_bc(J->pt) &&
- J->pc < proto_bc(J->pt) + J->pt->sizebc));
+ J->pc < proto_bc(J->pt) + J->pt->sizebc), "bad snapshot PC");
while (frame > lim) { /* Backwards traversal of all frames above base. */
if (frame_islua(frame)) {
+#if !LJ_FR2
map[f++] = SNAP_MKPC(frame_pc(frame));
+#endif
frame = frame_prevl(frame);
} else if (frame_iscont(frame)) {
+#if !LJ_FR2
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
map[f++] = SNAP_MKPC(frame_contpc(frame));
+#endif
frame = frame_prevd(frame);
} else {
- lua_assert(!frame_isc(frame));
+ lj_assertJ(!frame_isc(frame), "broken frame chain");
+#if !LJ_FR2
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
+#endif
frame = frame_prevd(frame);
continue;
}
if (frame + funcproto(frame_func(frame))->framesize > ftop)
ftop = frame + funcproto(frame_func(frame))->framesize;
}
- lua_assert(f == (MSize)(1 + J->framedepth));
- return (BCReg)(ftop - lim);
+ *topslot = (uint8_t)(ftop - lim);
+#if LJ_FR2
+ lj_assertJ(sizeof(SnapEntry) * 2 == sizeof(uint64_t), "bad SnapEntry def");
+ return 2;
+#else
+ lj_assertJ(f == (MSize)(1 + J->framedepth), "miscalculated snapshot size");
+ return f;
+#endif
}
/* Take a snapshot of the current stack. */
@@ -128,16 +164,17 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
MSize nent;
SnapEntry *p;
/* Conservative estimate. */
- lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1);
+ lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1));
p = &J->cur.snapmap[nsnapmap];
nent = snapshot_slots(J, p, nslots);
- snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent);
+ snap->nent = (uint8_t)nent;
+ nent += snapshot_framelinks(J, p + nent, &snap->topslot);
snap->mapofs = (uint32_t)nsnapmap;
snap->ref = (IRRef1)J->cur.nins;
- snap->nent = (uint8_t)nent;
+ snap->mcofs = 0;
snap->nslots = (uint8_t)nslots;
snap->count = 0;
- J->cur.nsnapmap = (uint32_t)(nsnapmap + nent + 1 + J->framedepth);
+ J->cur.nsnapmap = (uint32_t)(nsnapmap + nent);
}
/* Add or merge a snapshot. */
@@ -146,8 +183,8 @@ void lj_snap_add(jit_State *J)
MSize nsnap = J->cur.nsnap;
MSize nsnapmap = J->cur.nsnapmap;
/* Merge if no ins. inbetween or if requested and no guard inbetween. */
- if (J->mergesnap ? !irt_isguard(J->guardemit) :
- (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) {
+ if ((nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins) ||
+ (J->mergesnap && !irt_isguard(J->guardemit))) {
if (nsnap == 1) { /* But preserve snap #0 PC. */
emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0);
goto nomerge;
@@ -194,7 +231,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
#define DEF_SLOT(s) udf[(s)] *= 3
/* Scan through following bytecode and check for uses/defs. */
- lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
+ lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc,
+ "snapshot PC out of range");
for (;;) {
BCIns ins = *pc++;
BCOp op = bc_op(ins);
@@ -205,7 +243,7 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
switch (bcmode_c(op)) {
case BCMvar: USE_SLOT(bc_c(ins)); break;
case BCMrbase:
- lua_assert(op == BC_CAT);
+ lj_assertJ(op == BC_CAT, "unhandled op %d with RC rbase", op);
for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s);
for (; s < maxslot; s++) DEF_SLOT(s);
break;
@@ -245,7 +283,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
case BCMbase:
if (op >= BC_CALLM && op <= BC_ITERN) {
BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
- maxslot : (bc_a(ins) + bc_c(ins));
+ maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2);
+ if (LJ_FR2) DEF_SLOT(bc_a(ins)+1);
s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0);
for (; s < top; s++) USE_SLOT(s);
for (; s < maxslot; s++) DEF_SLOT(s);
@@ -263,7 +302,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf,
break;
default: break;
}
- lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
+ lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc,
+ "use/def analysis PC out of range");
}
#undef USE_SLOT
@@ -321,8 +361,8 @@ void lj_snap_shrink(jit_State *J)
MSize n, m, nlim, nent = snap->nent;
uint8_t udf[SNAP_USEDEF_SLOTS];
BCReg maxslot = J->maxslot;
- BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot);
BCReg baseslot = J->baseslot;
+ BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot);
if (minslot < maxslot) snap_useuv(J->pt, udf);
maxslot += baseslot;
minslot += baseslot;
@@ -365,25 +405,26 @@ static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
}
/* Copy RegSP from parent snapshot to the parent links of the IR. */
-IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir)
+IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno, IRIns *ir)
{
SnapShot *snap = &T->snap[snapno];
SnapEntry *map = &T->snapmap[snap->mapofs];
BloomFilter rfilt = snap_renamefilter(T, snapno);
MSize n = 0;
IRRef ref = 0;
+ UNUSED(J);
for ( ; ; ir++) {
uint32_t rs;
if (ir->o == IR_SLOAD) {
if (!(ir->op2 & IRSLOAD_PARENT)) break;
for ( ; ; n++) {
- lua_assert(n < snap->nent);
+ lj_assertJ(n < snap->nent, "slot %d not found in snapshot", ir->op1);
if (snap_slot(map[n]) == ir->op1) {
ref = snap_ref(map[n++]);
break;
}
}
- } else if (LJ_SOFTFP && ir->o == IR_HIOP) {
+ } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) {
ref++;
} else if (ir->o == IR_PVAL) {
ref = ir->op1 + REF_BIAS;
@@ -394,7 +435,7 @@ IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir)
if (bloomtest(rfilt, ref))
rs = snap_renameref(T, snapno, ref, rs);
ir->prev = (uint16_t)rs;
- lua_assert(regsp_used(rs));
+ lj_assertJ(regsp_used(rs), "unused IR %04d in snapshot", ref - REF_BIAS);
}
return ir;
}
@@ -409,10 +450,10 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir)
case IR_KPRI: return TREF_PRI(irt_type(ir->t));
case IR_KINT: return lj_ir_kint(J, ir->i);
case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
- case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir));
- case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir));
+ case IR_KNUM: case IR_KINT64:
+ return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64);
case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */
- default: lua_assert(0); return TREF_NIL; break;
+ default: lj_assertJ(0, "bad IR constant op %d", ir->o); return TREF_NIL;
}
}
@@ -422,7 +463,7 @@ static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref)
MSize j;
for (j = 0; j < nmax; j++)
if (snap_ref(map[j]) == ref)
- return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME);
+ return J->slot[snap_slot(map[j])] & ~(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME);
return 0;
}
@@ -483,21 +524,27 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
goto setslot;
bloomset(seen, ref);
if (irref_isk(ref)) {
- tr = snap_replay_const(J, ir);
+ /* See special treatment of LJ_FR2 slot 1 in snapshot_slots() above. */
+ if (LJ_FR2 && (sn == SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL)))
+ tr = 0;
+ else
+ tr = snap_replay_const(J, ir);
} else if (!regsp_used(ir->prev)) {
pass23 = 1;
- lua_assert(s != 0);
+ lj_assertJ(s != 0, "unused slot 0 in snapshot");
tr = s;
} else {
IRType t = irt_type(ir->t);
uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
- if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
+ if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
+ if ((sn & SNAP_KEYINDEX)) mode |= IRSLOAD_KEYINDEX;
tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
}
setslot:
- J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */
- J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s);
+ /* Same as TREF_* flags. */
+ J->slot[s] = tr | (sn&(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME));
+ J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2));
if ((sn & SNAP_FRAME))
J->baseslot = s+1;
}
@@ -512,8 +559,9 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
if (regsp_reg(ir->r) == RID_SUNK) {
if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
pass23 = 1;
- lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
- ir->o == IR_CNEW || ir->o == IR_CNEWI);
+ lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP ||
+ ir->o == IR_CNEW || ir->o == IR_CNEWI,
+ "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o);
if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
if (LJ_HASFFI && ir->o == IR_CNEWI) {
@@ -525,13 +573,14 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
- else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
+ else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
irs+1 < irlast && (irs+1)->o == IR_HIOP)
snap_pref(J, T, map, nent, seen, (irs+1)->op2);
}
}
} else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
- lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
+ lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
+ "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o);
J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
}
}
@@ -581,20 +630,21 @@ void lj_snap_replay(jit_State *J, GCtrace *T)
val = snap_pref(J, T, map, nent, seen, irs->op2);
if (val == 0) {
IRIns *irc = &T->ir[irs->op2];
- lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
+ lj_assertJ(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT,
+ "sunk store for parent IR %04d with bad op %d",
+ refp - REF_BIAS, irc->o);
val = snap_pref(J, T, map, nent, seen, irc->op1);
val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
- } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) &&
+ } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
irs+1 < irlast && (irs+1)->o == IR_HIOP) {
IRType t = IRT_I64;
- if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP)
+ if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP)
t = IRT_NUM;
lj_needsplit(J);
if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
uint64_t k = (uint32_t)T->ir[irs->op2].i +
((uint64_t)T->ir[(irs+1)->op2].i << 32);
- val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM,
- lj_ir_k64_find(J, k));
+ val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k);
} else {
val = emitir_raw(IRT(IR_HIOP, t), val,
snap_pref(J, T, map, nent, seen, (irs+1)->op2));
@@ -632,7 +682,14 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
IRType1 t = ir->t;
RegSP rs = ir->prev;
if (irref_isk(ref)) { /* Restore constant slot. */
- lj_ir_kvalue(J->L, o, ir);
+ if (ir->o == IR_KPTR) {
+ o->u64 = (uint64_t)(uintptr_t)ir_kptr(ir);
+ } else {
+ lj_assertJ(!(ir->o == IR_KKPTR || ir->o == IR_KNULL),
+ "restore of const from IR %04d with bad op %d",
+ ref - REF_BIAS, ir->o);
+ lj_ir_kvalue(J->L, o, ir);
+ }
return;
}
if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
@@ -641,22 +698,24 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
int32_t *sps = &ex->spill[regsp_spill(rs)];
if (irt_isinteger(t)) {
setintV(o, *sps);
-#if !LJ_SOFTFP
+#if !LJ_SOFTFP32
} else if (irt_isnum(t)) {
o->u64 = *(uint64_t *)sps;
#endif
- } else if (LJ_64 && irt_islightud(t)) {
+#if LJ_64 && !LJ_GC64
+ } else if (irt_islightud(t)) {
/* 64 bit lightuserdata which may escape already has the tag bits. */
o->u64 = *(uint64_t *)sps;
+#endif
} else {
- lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
- setgcrefi(o->gcr, *sps);
- setitype(o, irt_toitype(t));
+ lj_assertJ(!irt_ispri(t), "PRI ref with spill slot");
+ setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
}
} else { /* Restore from register. */
Reg r = regsp_reg(rs);
if (ra_noreg(r)) {
- lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
+ lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
+ "restore from IR %04d has no reg", ref - REF_BIAS);
snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
return;
@@ -665,21 +724,26 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
#if !LJ_SOFTFP
} else if (irt_isnum(t)) {
setnumV(o, ex->fpr[r-RID_MIN_FPR]);
+#elif LJ_64 /* && LJ_SOFTFP */
+ } else if (irt_isnum(t)) {
+ o->u64 = ex->gpr[r-RID_MIN_GPR];
#endif
- } else if (LJ_64 && irt_islightud(t)) {
- /* 64 bit lightuserdata which may escape already has the tag bits. */
+#if LJ_64 && !LJ_GC64
+ } else if (irt_is64(t)) {
+ /* 64 bit values that already have the tag bits. */
o->u64 = ex->gpr[r-RID_MIN_GPR];
+#endif
+ } else if (irt_ispri(t)) {
+ setpriV(o, irt_toitype(t));
} else {
- if (!irt_ispri(t))
- setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
- setitype(o, irt_toitype(t));
+ setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t));
}
}
}
#if LJ_HASFFI
/* Restore raw data from the trace exit state. */
-static void snap_restoredata(GCtrace *T, ExitState *ex,
+static void snap_restoredata(jit_State *J, GCtrace *T, ExitState *ex,
SnapNo snapno, BloomFilter rfilt,
IRRef ref, void *dst, CTSize sz)
{
@@ -687,9 +751,10 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
RegSP rs = ir->prev;
int32_t *src;
uint64_t tmp;
+ UNUSED(J);
if (irref_isk(ref)) {
- if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
- src = mref(ir->ptr, int32_t);
+ if (ir_isk64(ir)) {
+ src = (int32_t *)&ir[1];
} else if (sz == 8) {
tmp = (uint64_t)(uint32_t)ir->i;
src = (int32_t *)&tmp;
@@ -709,8 +774,9 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
Reg r = regsp_reg(rs);
if (ra_noreg(r)) {
/* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
- lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
- snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4);
+ lj_assertJ(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT,
+ "restore from IR %04d has no reg", ref - REF_BIAS);
+ snap_restoredata(J, T, ex, snapno, rfilt, ir->op1, dst, 4);
*(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
return;
}
@@ -726,11 +792,13 @@ static void snap_restoredata(GCtrace *T, ExitState *ex,
#else
if (LJ_BE && sz == 4) src++;
#endif
- }
+ } else
#endif
+ if (LJ_64 && LJ_BE && sz == 4) src++;
}
}
- lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
+ lj_assertJ(sz == 1 || sz == 2 || sz == 4 || sz == 8,
+ "restore from IR %04d with bad size %d", ref - REF_BIAS, sz);
if (sz == 4) *(int32_t *)dst = *src;
else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
@@ -743,24 +811,27 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
SnapNo snapno, BloomFilter rfilt,
IRIns *ir, TValue *o)
{
- lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
- ir->o == IR_CNEW || ir->o == IR_CNEWI);
+ lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP ||
+ ir->o == IR_CNEW || ir->o == IR_CNEWI,
+ "sunk allocation with bad op %d", ir->o);
#if LJ_HASFFI
if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
CTState *cts = ctype_cts(J->L);
CTypeID id = (CTypeID)T->ir[ir->op1].i;
- CTSize sz = lj_ctype_size(cts, id);
- GCcdata *cd = lj_cdata_new(cts, id, sz);
+ CTSize sz;
+ CTInfo info = lj_ctype_info(cts, id, &sz);
+ GCcdata *cd = lj_cdata_newx(cts, id, sz, info);
setcdataV(J->L, o, cd);
if (ir->o == IR_CNEWI) {
uint8_t *p = (uint8_t *)cdataptr(cd);
- lua_assert(sz == 4 || sz == 8);
+ lj_assertJ(sz == 4 || sz == 8, "sunk cdata with bad size %d", sz);
if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) {
- snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4);
+ snap_restoredata(J, T, ex, snapno, rfilt, (ir+1)->op2,
+ LJ_LE ? p+4 : p, 4);
if (LJ_BE) p += 4;
sz = 4;
}
- snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz);
+ snap_restoredata(J, T, ex, snapno, rfilt, ir->op2, p, sz);
} else {
IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
for (irs = ir+1; irs < irlast; irs++)
@@ -768,8 +839,11 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
IRIns *iro = &T->ir[T->ir[irs->op1].op2];
uint8_t *p = (uint8_t *)cd;
CTSize szs;
- lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD);
- lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64);
+ lj_assertJ(irs->o == IR_XSTORE, "sunk store with bad op %d", irs->o);
+ lj_assertJ(T->ir[irs->op1].o == IR_ADD,
+ "sunk store with bad add op %d", T->ir[irs->op1].o);
+ lj_assertJ(iro->o == IR_KINT || iro->o == IR_KINT64,
+ "sunk store with bad const offset op %d", iro->o);
if (irt_is64(irs->t)) szs = 8;
else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1;
else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2;
@@ -778,14 +852,16 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
p += (int64_t)ir_k64(iro)->u64;
else
p += iro->i;
- lua_assert(p >= (uint8_t *)cdataptr(cd) &&
- p + szs <= (uint8_t *)cdataptr(cd) + sz);
+ lj_assertJ(p >= (uint8_t *)cdataptr(cd) &&
+ p + szs <= (uint8_t *)cdataptr(cd) + sz,
+ "sunk store with offset out of range");
if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
- lua_assert(szs == 4);
- snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4);
+ lj_assertJ(szs == 4, "sunk store with bad size %d", szs);
+ snap_restoredata(J, T, ex, snapno, rfilt, (irs+1)->op2,
+ LJ_LE ? p+4 : p, 4);
if (LJ_BE) p += 4;
}
- snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs);
+ snap_restoredata(J, T, ex, snapno, rfilt, irs->op2, p, szs);
}
}
} else
@@ -800,10 +876,12 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
IRIns *irk = &T->ir[irs->op1];
TValue tmp, *val;
- lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
- irs->o == IR_FSTORE);
+ lj_assertJ(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
+ irs->o == IR_FSTORE,
+ "sunk store with bad op %d", irs->o);
if (irk->o == IR_FREF) {
- lua_assert(irk->op2 == IRFL_TAB_META);
+ lj_assertJ(irk->op2 == IRFL_TAB_META,
+ "sunk store with bad field %d", irk->op2);
snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
/* NOBARRIER: The table is new (marked white). */
setgcref(t->metatable, obj2gco(tabV(&tmp)));
@@ -814,7 +892,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
val = lj_tab_set(J->L, t, &tmp);
/* NOBARRIER: The table is new (marked white). */
snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
- if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
+ if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
val->u32.hi = tmp.u32.lo;
}
@@ -832,11 +910,15 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
SnapShot *snap = &T->snap[snapno];
MSize n, nent = snap->nent;
SnapEntry *map = &T->snapmap[snap->mapofs];
- SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1];
- int32_t ftsz0;
+#if !LJ_FR2 || defined(LUA_USE_ASSERT)
+ SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2];
+#endif
+#if !LJ_FR2
+ ptrdiff_t ftsz0;
+#endif
TValue *frame;
BloomFilter rfilt = snap_renamefilter(T, snapno);
- const BCIns *pc = snap_pc(map[nent]);
+ const BCIns *pc = snap_pc(&map[nent]);
lua_State *L = J->L;
/* Set interpreter PC to the next PC to get correct error messages. */
@@ -849,8 +931,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
}
/* Fill stack slots with data from the registers and spill slots. */
- frame = L->base-1;
+ frame = L->base-1-LJ_FR2;
+#if !LJ_FR2
ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */
+#endif
for (n = 0; n < nent; n++) {
SnapEntry sn = map[n];
if (!(sn & SNAP_NORESTORE)) {
@@ -869,18 +953,27 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr)
continue;
}
snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
- if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
+ if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
TValue tmp;
snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
o->u32.hi = tmp.u32.lo;
+#if !LJ_FR2
} else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
/* Overwrite tag with frame link. */
- o->fr.tp.ftsz = snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0;
+ setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
L->base = o+1;
+#endif
+ } else if ((sn & SNAP_KEYINDEX)) {
+ /* A IRT_INT key index slot is restored as a number. Undo this. */
+ o->u32.lo = (uint32_t)(LJ_DUALNUM ? intV(o) : lj_num2int(numV(o)));
+ o->u32.hi = LJ_KEYINDEX;
}
}
}
- lua_assert(map + nent == flinks);
+#if LJ_FR2
+ L->base += (map[nent+LJ_BE] & 0xff);
+#endif
+ lj_assertJ(map + nent == flinks, "inconsistent frames in snapshot");
/* Compute current stack top. */
switch (bc_op(*pc)) {
diff --git a/src/lj_snap.h b/src/lj_snap.h
index 03cf9038..b7dabed8 100644
--- a/src/lj_snap.h
+++ b/src/lj_snap.h
@@ -13,7 +13,8 @@
LJ_FUNC void lj_snap_add(jit_State *J);
LJ_FUNC void lj_snap_purge(jit_State *J);
LJ_FUNC void lj_snap_shrink(jit_State *J);
-LJ_FUNC IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir);
+LJ_FUNC IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno,
+ IRIns *ir);
LJ_FUNC void lj_snap_replay(jit_State *J, GCtrace *T);
LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr);
LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need);
diff --git a/src/lj_state.c b/src/lj_state.c
index 1e2cfde9..0b9c46ba 100644
--- a/src/lj_state.c
+++ b/src/lj_state.c
@@ -12,6 +12,7 @@
#include "lj_obj.h"
#include "lj_gc.h"
#include "lj_err.h"
+#include "lj_buf.h"
#include "lj_str.h"
#include "lj_tab.h"
#include "lj_func.h"
@@ -24,8 +25,10 @@
#include "lj_trace.h"
#include "lj_dispatch.h"
#include "lj_vm.h"
+#include "lj_prng.h"
#include "lj_lex.h"
#include "lj_alloc.h"
+#include "luajit.h"
/* -- Stack handling ------------------------------------------------------ */
@@ -47,6 +50,7 @@
** one extra slot if mobj is not a function. Only lj_meta_tset needs 5
** slots above top, but then mobj is always a function. So we can get by
** with 5 extra slots.
+** LJ_FR2: We need 2 more slots for the frame PC and the continuation PC.
*/
/* Resize stack slots and adjust pointers in state. */
@@ -57,9 +61,10 @@ static void resizestack(lua_State *L, MSize n)
MSize oldsize = L->stacksize;
MSize realsize = n + 1 + LJ_STACK_EXTRA;
GCobj *up;
- lua_assert((MSize)(tvref(L->maxstack)-oldst)==L->stacksize-LJ_STACK_EXTRA-1);
+ lj_assertL((MSize)(tvref(L->maxstack)-oldst) == L->stacksize-LJ_STACK_EXTRA-1,
+ "inconsistent stack size");
st = (TValue *)lj_mem_realloc(L, tvref(L->stack),
- (MSize)(L->stacksize*sizeof(TValue)),
+ (MSize)(oldsize*sizeof(TValue)),
(MSize)(realsize*sizeof(TValue)));
setmref(L->stack, st);
delta = (char *)st - (char *)oldst;
@@ -67,12 +72,12 @@ static void resizestack(lua_State *L, MSize n)
while (oldsize < realsize) /* Clear new slots. */
setnilV(st + oldsize++);
L->stacksize = realsize;
+ if ((size_t)(mref(G(L)->jit_base, char) - (char *)oldst) < oldsize)
+ setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta);
L->base = (TValue *)((char *)L->base + delta);
L->top = (TValue *)((char *)L->top + delta);
for (up = gcref(L->openupval); up != NULL; up = gcnext(up))
setmref(gco2uv(up)->v, (TValue *)((char *)uvval(gco2uv(up)) + delta));
- if (obj2gco(L) == gcref(G(L)->jit_L))
- setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta);
}
/* Relimit stack after error, in case the limit was overdrawn. */
@@ -89,7 +94,8 @@ void lj_state_shrinkstack(lua_State *L, MSize used)
return; /* Avoid stack shrinking while handling stack overflow. */
if (4*used < L->stacksize &&
2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize &&
- obj2gco(L) != gcref(G(L)->jit_L)) /* Don't shrink stack of live trace. */
+ /* Don't shrink stack of live trace. */
+ (tvref(G(L)->jit_base) == NULL || obj2gco(L) != gcref(G(L)->cur_L)))
resizestack(L, L->stacksize >> 1);
}
@@ -125,8 +131,9 @@ static void stack_init(lua_State *L1, lua_State *L)
L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA;
stend = st + L1->stacksize;
setmref(L1->maxstack, stend - LJ_STACK_EXTRA - 1);
- L1->base = L1->top = st+1;
- setthreadV(L1, st, L1); /* Needed for curr_funcisL() on empty stack. */
+ setthreadV(L1, st++, L1); /* Needed for curr_funcisL() on empty stack. */
+ if (LJ_FR2) setnilV(st++);
+ L1->base = L1->top = st;
while (st < stend) /* Clear new slots. */
setnilV(st++);
}
@@ -143,12 +150,13 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud)
/* NOBARRIER: State initialization, all objects are white. */
setgcref(L->env, obj2gco(lj_tab_new(L, 0, LJ_MIN_GLOBAL)));
settabV(L, registry(L), lj_tab_new(L, 0, LJ_MIN_REGISTRY));
- lj_str_resize(L, LJ_MIN_STRTAB-1);
+ lj_str_init(L);
lj_meta_init(L);
lj_lex_init(L);
fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */
g->gc.threshold = 4*g->gc.total;
lj_trace_initstate(g);
+ lj_err_verify();
return NULL;
}
@@ -157,16 +165,25 @@ static void close_state(lua_State *L)
global_State *g = G(L);
lj_func_closeuv(L, tvref(L->stack));
lj_gc_freeall(g);
- lua_assert(gcref(g->gc.root) == obj2gco(L));
- lua_assert(g->strnum == 0);
+ lj_assertG(gcref(g->gc.root) == obj2gco(L),
+ "main thread is not first GC object");
+ lj_assertG(g->str.num == 0, "leaked %d strings", g->str.num);
lj_trace_freestate(g);
#if LJ_HASFFI
lj_ctype_freestate(g);
#endif
- lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
- lj_str_freebuf(g, &g->tmpbuf);
+ lj_str_freetab(g);
+ lj_buf_free(g, &g->tmpbuf);
lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
- lua_assert(g->gc.total == sizeof(GG_State));
+#if LJ_64
+ if (mref(g->gc.lightudseg, uint32_t)) {
+ MSize segnum = g->gc.lightudnum ? (2 << lj_fls(g->gc.lightudnum)) : 2;
+ lj_mem_freevec(g, mref(g->gc.lightudseg, uint32_t), segnum, uint32_t);
+ }
+#endif
+ lj_assertG(g->gc.total == sizeof(GG_State),
+ "memory leak of %lld bytes",
+ (long long)(g->gc.total - sizeof(GG_State)));
#ifndef LUAJIT_USE_SYSMALLOC
if (g->allocf == lj_alloc_f)
lj_alloc_destroy(g->allocd);
@@ -175,17 +192,34 @@ static void close_state(lua_State *L)
g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0);
}
-#if LJ_64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC))
-lua_State *lj_state_newstate(lua_Alloc f, void *ud)
+#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC))
+lua_State *lj_state_newstate(lua_Alloc allocf, void *allocd)
#else
-LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
+LUA_API lua_State *lua_newstate(lua_Alloc allocf, void *allocd)
#endif
{
- GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State));
- lua_State *L = &GG->L;
- global_State *g = &GG->g;
- if (GG == NULL || !checkptr32(GG)) return NULL;
+ PRNGState prng;
+ GG_State *GG;
+ lua_State *L;
+ global_State *g;
+ /* We need the PRNG for the memory allocator, so initialize this first. */
+ if (!lj_prng_seed_secure(&prng)) {
+ lj_assertX(0, "secure PRNG seeding failed");
+ /* Can only return NULL here, so this errors with "not enough memory". */
+ return NULL;
+ }
+#ifndef LUAJIT_USE_SYSMALLOC
+ if (allocf == LJ_ALLOCF_INTERNAL) {
+ allocd = lj_alloc_create(&prng);
+ if (!allocd) return NULL;
+ allocf = lj_alloc_f;
+ }
+#endif
+ GG = (GG_State *)allocf(allocd, NULL, 0, sizeof(GG_State));
+ if (GG == NULL || !checkptrGC(GG)) return NULL;
memset(GG, 0, sizeof(GG_State));
+ L = &GG->L;
+ g = &GG->g;
L->gct = ~LJ_TTHREAD;
L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */
L->dummy_ffid = FF_C;
@@ -193,17 +227,25 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
g->gc.currentwhite = LJ_GC_WHITE0 | LJ_GC_FIXED;
g->strempty.marked = LJ_GC_WHITE0;
g->strempty.gct = ~LJ_TSTR;
- g->allocf = f;
- g->allocd = ud;
+ g->allocf = allocf;
+ g->allocd = allocd;
+ g->prng = prng;
+#ifndef LUAJIT_USE_SYSMALLOC
+ if (allocf == lj_alloc_f) {
+ lj_alloc_setprng(allocd, &g->prng);
+ }
+#endif
setgcref(g->mainthref, obj2gco(L));
setgcref(g->uvhead.prev, obj2gco(&g->uvhead));
setgcref(g->uvhead.next, obj2gco(&g->uvhead));
- g->strmask = ~(MSize)0;
+ g->str.mask = ~(MSize)0;
setnilV(registry(L));
setnilV(&g->nilnode.val);
setnilV(&g->nilnode.key);
+#if !LJ_GC64
setmref(g->nilnode.freetop, &g->nilnode);
- lj_str_initbuf(&g->tmpbuf);
+#endif
+ lj_buf_init(NULL, &g->tmpbuf);
g->gc.state = GCSpause;
setgcref(g->gc.root, obj2gco(L));
setmref(g->gc.sweep, &g->gc.root);
@@ -217,7 +259,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud)
close_state(L);
return NULL;
}
- L->status = 0;
+ L->status = LUA_OK;
return L;
}
@@ -236,6 +278,10 @@ LUA_API void lua_close(lua_State *L)
global_State *g = G(L);
int i;
L = mainthread(g); /* Only the main thread can be closed. */
+#if LJ_HASPROFILE
+ luaJIT_profile_stop(L);
+#endif
+ setgcrefnull(g->cur_L);
lj_func_closeuv(L, tvref(L->stack));
lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */
#if LJ_HASJIT
@@ -245,10 +291,10 @@ LUA_API void lua_close(lua_State *L)
#endif
for (i = 0;;) {
hook_enter(g);
- L->status = 0;
+ L->status = LUA_OK;
+ L->base = L->top = tvref(L->stack) + 1 + LJ_FR2;
L->cframe = NULL;
- L->base = L->top = tvref(L->stack) + 1;
- if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == 0) {
+ if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == LUA_OK) {
if (++i >= 10) break;
lj_gc_separateudata(g, 1); /* Separate udata again. */
if (gcref(g->gc.mmudata) == NULL) /* Until nothing is left to do. */
@@ -263,7 +309,7 @@ lua_State *lj_state_new(lua_State *L)
lua_State *L1 = lj_mem_newobj(L, lua_State);
L1->gct = ~LJ_TTHREAD;
L1->dummy_ffid = FF_C;
- L1->status = 0;
+ L1->status = LUA_OK;
L1->stacksize = 0;
setmref(L1->stack, NULL);
L1->cframe = NULL;
@@ -272,15 +318,17 @@ lua_State *lj_state_new(lua_State *L)
setmrefr(L1->glref, L->glref);
setgcrefr(L1->env, L->env);
stack_init(L1, L); /* init stack */
- lua_assert(iswhite(obj2gco(L1)));
+ lj_assertL(iswhite(obj2gco(L1)), "new thread object is not white");
return L1;
}
void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L)
{
- lua_assert(L != mainthread(g));
+ lj_assertG(L != mainthread(g), "free of main thread");
+ if (obj2gco(L) == gcref(g->cur_L))
+ setgcrefnull(g->cur_L);
lj_func_closeuv(L, tvref(L->stack));
- lua_assert(gcref(L->openupval) == NULL);
+ lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues");
lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue);
lj_mem_freet(g, L);
}
diff --git a/src/lj_state.h b/src/lj_state.h
index 48c4d700..d22b7a6f 100644
--- a/src/lj_state.h
+++ b/src/lj_state.h
@@ -28,8 +28,10 @@ static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need)
LJ_FUNC lua_State *lj_state_new(lua_State *L);
LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L);
-#if LJ_64
+#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC))
LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud);
#endif
+#define LJ_ALLOCF_INTERNAL ((lua_Alloc)(void *)(uintptr_t)(1237<<4))
+
#endif
diff --git a/src/lj_str.c b/src/lj_str.c
index 60912aed..a5282da6 100644
--- a/src/lj_str.c
+++ b/src/lj_str.c
@@ -1,13 +1,8 @@
/*
** String handling.
** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
-**
-** Portions taken verbatim or adapted from the Lua interpreter.
-** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
*/
-#include <stdio.h>
-
#define lj_str_c
#define LUA_CORE
@@ -15,10 +10,10 @@
#include "lj_gc.h"
#include "lj_err.h"
#include "lj_str.h"
-#include "lj_state.h"
#include "lj_char.h"
+#include "lj_prng.h"
-/* -- String interning ---------------------------------------------------- */
+/* -- String helpers ------------------------------------------------------ */
/* Ordered compare of strings. Assumes string data is 4-byte aligned. */
int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
@@ -43,297 +38,333 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
return (int32_t)(a->len - b->len);
}
-/* Fast string data comparison. Caveat: unaligned access to 1st string! */
-static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
+/* Find fixed string p inside string s. */
+const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
{
- MSize i = 0;
- lua_assert(len > 0);
- lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4);
- do { /* Note: innocuous access up to end of string + 3. */
- uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i);
- if (v) {
- i -= len;
-#if LJ_LE
- return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1;
-#else
- return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1;
-#endif
+ if (plen <= slen) {
+ if (plen == 0) {
+ return s;
+ } else {
+ int c = *(const uint8_t *)p++;
+ plen--; slen -= plen;
+ while (slen) {
+ const char *q = (const char *)memchr(s, c, slen);
+ if (!q) break;
+ if (memcmp(q+1, p, plen) == 0) return q;
+ q++; slen -= (MSize)(q-s); s = q;
+ }
}
- i += 4;
- } while (i < len);
- return 0;
+ }
+ return NULL;
}
-/* Resize the string hash table (grow and shrink). */
-void lj_str_resize(lua_State *L, MSize newmask)
+/* Check whether a string has a pattern matching character. */
+int lj_str_haspattern(GCstr *s)
{
- global_State *g = G(L);
- GCRef *newhash;
- MSize i;
- if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
- return; /* No resizing during GC traversal or if already too big. */
- newhash = lj_mem_newvec(L, newmask+1, GCRef);
- memset(newhash, 0, (newmask+1)*sizeof(GCRef));
- for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */
- GCobj *p = gcref(g->strhash[i]);
- while (p) { /* Follow each hash chain and reinsert all strings. */
- MSize h = gco2str(p)->hash & newmask;
- GCobj *next = gcnext(p);
- /* NOBARRIER: The string table is a GC root. */
- setgcrefr(p->gch.nextgc, newhash[h]);
- setgcref(newhash[h], p);
- p = next;
- }
+ const char *p = strdata(s), *q = p + s->len;
+ while (p < q) {
+ int c = *(const uint8_t *)p++;
+ if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
+ return 1; /* Found a pattern matching char. */
}
- lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
- g->strmask = newmask;
- g->strhash = newhash;
+ return 0; /* No pattern matching chars found. */
}
-/* Intern a string and return string object. */
-GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
+/* -- String hashing ------------------------------------------------------ */
+
+/* Keyed sparse ARX string hash. Constant time. */
+static StrHash hash_sparse(uint64_t seed, const char *str, MSize len)
{
- global_State *g;
- GCstr *s;
- GCobj *o;
- MSize len = (MSize)lenx;
- MSize a, b, h = len;
- if (lenx >= LJ_MAX_STR)
- lj_err_msg(L, LJ_ERR_STROV);
- g = G(L);
- /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
+ /* Constants taken from lookup3 hash by Bob Jenkins. */
+ StrHash a, b, h = len ^ (StrHash)seed;
if (len >= 4) { /* Caveat: unaligned access! */
a = lj_getu32(str);
h ^= lj_getu32(str+len-4);
b = lj_getu32(str+(len>>1)-2);
h ^= b; h -= lj_rol(b, 14);
b += lj_getu32(str+(len>>2)-1);
- } else if (len > 0) {
+ } else {
a = *(const uint8_t *)str;
h ^= *(const uint8_t *)(str+len-1);
b = *(const uint8_t *)(str+(len>>1));
h ^= b; h -= lj_rol(b, 14);
- } else {
- return &g->strempty;
}
a ^= h; a -= lj_rol(h, 11);
b ^= a; b -= lj_rol(a, 25);
h ^= b; h -= lj_rol(b, 16);
- /* Check if the string has already been interned. */
- o = gcref(g->strhash[h & g->strmask]);
- if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
- while (o != NULL) {
- GCstr *sx = gco2str(o);
- if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
- /* Resurrect if dead. Can only happen with fixstring() (keywords). */
- if (isdead(g, o)) flipwhite(o);
- return sx; /* Return existing string. */
- }
- o = gcnext(o);
- }
- } else { /* Slow path: end of string is too close to a page boundary. */
- while (o != NULL) {
- GCstr *sx = gco2str(o);
- if (sx->len == len && memcmp(str, strdata(sx), len) == 0) {
- /* Resurrect if dead. Can only happen with fixstring() (keywords). */
- if (isdead(g, o)) flipwhite(o);
- return sx; /* Return existing string. */
- }
- o = gcnext(o);
- }
- }
- /* Nope, create a new string. */
- s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
- newwhite(g, s);
- s->gct = ~LJ_TSTR;
- s->len = len;
- s->hash = h;
- s->reserved = 0;
- memcpy(strdatawr(s), str, len);
- strdatawr(s)[len] = '\0'; /* Zero-terminate string. */
- /* Add it to string hash table. */
- h &= g->strmask;
- s->nextgc = g->strhash[h];
- /* NOBARRIER: The string table is a GC root. */
- setgcref(g->strhash[h], obj2gco(s));
- if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */
- lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */
- return s; /* Return newly interned string. */
+ return h;
}
-void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
+#if LUAJIT_SECURITY_STRHASH
+/* Keyed dense ARX string hash. Linear time. */
+static LJ_NOINLINE StrHash hash_dense(uint64_t seed, StrHash h,
+ const char *str, MSize len)
{
- g->strnum--;
- lj_mem_free(g, s, sizestring(s));
+ StrHash b = lj_bswap(lj_rol(h ^ (StrHash)(seed >> 32), 4));
+ if (len > 12) {
+ StrHash a = (StrHash)seed;
+ const char *pe = str+len-12, *p = pe, *q = str;
+ do {
+ a += lj_getu32(p);
+ b += lj_getu32(p+4);
+ h += lj_getu32(p+8);
+ p = q; q += 12;
+ h ^= b; h -= lj_rol(b, 14);
+ a ^= h; a -= lj_rol(h, 11);
+ b ^= a; b -= lj_rol(a, 25);
+ } while (p < pe);
+ h ^= b; h -= lj_rol(b, 16);
+ a ^= h; a -= lj_rol(h, 4);
+ b ^= a; b -= lj_rol(a, 14);
+ }
+ return b;
}
+#endif
-/* -- Type conversions ---------------------------------------------------- */
+/* -- String interning ---------------------------------------------------- */
-/* Print number to buffer. Canonicalizes non-finite values. */
-size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o)
-{
- if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */
- lua_Number n = o->n;
-#if __BIONIC__
- if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; }
-#endif
- return (size_t)lua_number2str(s, n);
- } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) {
- s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3;
- } else if ((o->u32.hi & 0x80000000) == 0) {
- s[0] = 'i'; s[1] = 'n'; s[2] = 'f'; return 3;
- } else {
- s[0] = '-'; s[1] = 'i'; s[2] = 'n'; s[3] = 'f'; return 4;
- }
-}
+#define LJ_STR_MAXCOLL 32
-/* Print integer to buffer. Returns pointer to start. */
-char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k)
+/* Resize the string interning hash table (grow and shrink). */
+void lj_str_resize(lua_State *L, MSize newmask)
{
- uint32_t u = (uint32_t)(k < 0 ? -k : k);
- p += 1+10;
- do { *--p = (char)('0' + u % 10); } while (u /= 10);
- if (k < 0) *--p = '-';
- return p;
-}
+ global_State *g = G(L);
+ GCRef *newtab, *oldtab = g->str.tab;
+ MSize i;
-/* Convert number to string. */
-GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np)
-{
- char buf[LJ_STR_NUMBUF];
- size_t len = lj_str_bufnum(buf, (TValue *)np);
- return lj_str_new(L, buf, len);
-}
+ /* No resizing during GC traversal or if already too big. */
+ if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
+ return;
-/* Convert integer to string. */
-GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k)
-{
- char s[1+10];
- char *p = lj_str_bufint(s, k);
- return lj_str_new(L, p, (size_t)(s+sizeof(s)-p));
-}
+ newtab = lj_mem_newvec(L, newmask+1, GCRef);
+ memset(newtab, 0, (newmask+1)*sizeof(GCRef));
-GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o)
-{
- return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n);
-}
+#if LUAJIT_SECURITY_STRHASH
+ /* Check which chains need secondary hashes. */
+ if (g->str.second) {
+ int newsecond = 0;
+ /* Compute primary chain lengths. */
+ for (i = g->str.mask; i != ~(MSize)0; i--) {
+ GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1);
+ while (o) {
+ GCstr *s = gco2str(o);
+ MSize hash = s->hashalg ? hash_sparse(g->str.seed, strdata(s), s->len) :
+ s->hash;
+ hash &= newmask;
+ setgcrefp(newtab[hash], gcrefu(newtab[hash]) + 1);
+ o = gcnext(o);
+ }
+ }
+ /* Mark secondary chains. */
+ for (i = newmask; i != ~(MSize)0; i--) {
+ int secondary = gcrefu(newtab[i]) > LJ_STR_MAXCOLL;
+ newsecond |= secondary;
+ setgcrefp(newtab[i], secondary);
+ }
+ g->str.second = newsecond;
+ }
+#endif
-/* -- String formatting --------------------------------------------------- */
+ /* Reinsert all strings from the old table into the new table. */
+ for (i = g->str.mask; i != ~(MSize)0; i--) {
+ GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1);
+ while (o) {
+ GCobj *next = gcnext(o);
+ GCstr *s = gco2str(o);
+ MSize hash = s->hash;
+#if LUAJIT_SECURITY_STRHASH
+ uintptr_t u;
+ if (LJ_LIKELY(!s->hashalg)) { /* String hashed with primary hash. */
+ hash &= newmask;
+ u = gcrefu(newtab[hash]);
+ if (LJ_UNLIKELY(u & 1)) { /* Switch string to secondary hash. */
+ s->hash = hash = hash_dense(g->str.seed, s->hash, strdata(s), s->len);
+ s->hashalg = 1;
+ hash &= newmask;
+ u = gcrefu(newtab[hash]);
+ }
+ } else { /* String hashed with secondary hash. */
+ MSize shash = hash_sparse(g->str.seed, strdata(s), s->len);
+ u = gcrefu(newtab[shash & newmask]);
+ if (u & 1) {
+ hash &= newmask;
+ u = gcrefu(newtab[hash]);
+ } else { /* Revert string back to primary hash. */
+ s->hash = shash;
+ s->hashalg = 0;
+ hash = (shash & newmask);
+ }
+ }
+ /* NOBARRIER: The string table is a GC root. */
+ setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1));
+ setgcrefp(newtab[hash], ((uintptr_t)o | (u & 1)));
+#else
+ hash &= newmask;
+ /* NOBARRIER: The string table is a GC root. */
+ setgcrefr(o->gch.nextgc, newtab[hash]);
+ setgcref(newtab[hash], o);
+#endif
+ o = next;
+ }
+ }
+
+ /* Free old table and replace with new table. */
+ lj_str_freetab(g);
+ g->str.tab = newtab;
+ g->str.mask = newmask;
+}
-static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len)
+#if LUAJIT_SECURITY_STRHASH
+/* Rehash and rechain all strings in a chain. */
+static LJ_NOINLINE GCstr *lj_str_rehash_chain(lua_State *L, StrHash hashc,
+ const char *str, MSize len)
{
- char *p;
- MSize i;
- if (sb->n + len > sb->sz) {
- MSize sz = sb->sz * 2;
- while (sb->n + len > sz) sz = sz * 2;
- lj_str_resizebuf(L, sb, sz);
+ global_State *g = G(L);
+ int ow = g->gc.state == GCSsweepstring ? otherwhite(g) : 0; /* Sweeping? */
+ GCRef *strtab = g->str.tab;
+ MSize strmask = g->str.mask;
+ GCobj *o = gcref(strtab[hashc & strmask]);
+ setgcrefp(strtab[hashc & strmask], (void *)((uintptr_t)1));
+ g->str.second = 1;
+ while (o) {
+ uintptr_t u;
+ GCobj *next = gcnext(o);
+ GCstr *s = gco2str(o);
+ StrHash hash;
+ if (ow) { /* Must sweep while rechaining. */
+ if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* String alive? */
+ lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED),
+ "sweep of undead string");
+ makewhite(g, o);
+ } else { /* Free dead string. */
+ lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED,
+ "sweep of unlive string");
+ lj_str_free(g, s);
+ o = next;
+ continue;
+ }
+ }
+ hash = s->hash;
+ if (!s->hashalg) { /* Rehash with secondary hash. */
+ hash = hash_dense(g->str.seed, hash, strdata(s), s->len);
+ s->hash = hash;
+ s->hashalg = 1;
+ }
+ /* Rechain. */
+ hash &= strmask;
+ u = gcrefu(strtab[hash]);
+ setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1));
+ setgcrefp(strtab[hash], ((uintptr_t)o | (u & 1)));
+ o = next;
}
- p = sb->buf + sb->n;
- sb->n += len;
- for (i = 0; i < len; i++) p[i] = str[i];
+ /* Try to insert the pending string again. */
+ return lj_str_new(L, str, len);
}
+#endif
+
+/* Reseed String ID from PRNG after random interval < 2^bits. */
+#if LUAJIT_SECURITY_STRID == 1
+#define STRID_RESEED_INTERVAL 8
+#elif LUAJIT_SECURITY_STRID == 2
+#define STRID_RESEED_INTERVAL 4
+#elif LUAJIT_SECURITY_STRID >= 3
+#define STRID_RESEED_INTERVAL 0
+#endif
-static void addchar(lua_State *L, SBuf *sb, int c)
+/* Allocate a new string and add to string interning table. */
+static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len,
+ StrHash hash, int hashalg)
{
- if (sb->n + 1 > sb->sz) {
- MSize sz = sb->sz * 2;
- lj_str_resizebuf(L, sb, sz);
+ GCstr *s = lj_mem_newt(L, lj_str_size(len), GCstr);
+ global_State *g = G(L);
+ uintptr_t u;
+ newwhite(g, s);
+ s->gct = ~LJ_TSTR;
+ s->len = len;
+ s->hash = hash;
+#ifndef STRID_RESEED_INTERVAL
+ s->sid = g->str.id++;
+#elif STRID_RESEED_INTERVAL
+ if (!g->str.idreseed--) {
+ uint64_t r = lj_prng_u64(&g->prng);
+ g->str.id = (StrID)r;
+ g->str.idreseed = (uint8_t)(r >> (64 - STRID_RESEED_INTERVAL));
}
- sb->buf[sb->n++] = (char)c;
+ s->sid = g->str.id++;
+#else
+ s->sid = (StrID)lj_prng_u64(&g->prng);
+#endif
+ s->reserved = 0;
+ s->hashalg = (uint8_t)hashalg;
+ /* Clear last 4 bytes of allocated memory. Implies zero-termination, too. */
+ *(uint32_t *)(strdatawr(s)+(len & ~(MSize)3)) = 0;
+ memcpy(strdatawr(s), str, len);
+ /* Add to string hash table. */
+ hash &= g->str.mask;
+ u = gcrefu(g->str.tab[hash]);
+ setgcrefp(s->nextgc, (u & ~(uintptr_t)1));
+ /* NOBARRIER: The string table is a GC root. */
+ setgcrefp(g->str.tab[hash], ((uintptr_t)s | (u & 1)));
+ if (g->str.num++ > g->str.mask) /* Allow a 100% load factor. */
+ lj_str_resize(L, (g->str.mask<<1)+1); /* Grow string table. */
+ return s; /* Return newly interned string. */
}
-/* Push formatted message as a string object to Lua stack. va_list variant. */
-const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp)
+/* Intern a string and return string object. */
+GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
{
- SBuf *sb = &G(L)->tmpbuf;
- lj_str_needbuf(L, sb, (MSize)strlen(fmt));
- lj_str_resetbuf(sb);
- for (;;) {
- const char *e = strchr(fmt, '%');
- if (e == NULL) break;
- addstr(L, sb, fmt, (MSize)(e-fmt));
- /* This function only handles %s, %c, %d, %f and %p formats. */
- switch (e[1]) {
- case 's': {
- const char *s = va_arg(argp, char *);
- if (s == NULL) s = "(null)";
- addstr(L, sb, s, (MSize)strlen(s));
- break;
- }
- case 'c':
- addchar(L, sb, va_arg(argp, int));
- break;
- case 'd': {
- char buf[LJ_STR_INTBUF];
- char *p = lj_str_bufint(buf, va_arg(argp, int32_t));
- addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p));
- break;
- }
- case 'f': {
- char buf[LJ_STR_NUMBUF];
- TValue tv;
- MSize len;
- tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER));
- len = (MSize)lj_str_bufnum(buf, &tv);
- addstr(L, sb, buf, len);
- break;
- }
- case 'p': {
-#define FMTP_CHARS (2*sizeof(ptrdiff_t))
- char buf[2+FMTP_CHARS];
- ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *));
- ptrdiff_t i, lasti = 2+FMTP_CHARS;
- if (p == 0) {
- addstr(L, sb, "NULL", 4);
- break;
- }
-#if LJ_64
- /* Shorten output for 64 bit pointers. */
- lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0);
+ global_State *g = G(L);
+ if (lenx-1 < LJ_MAX_STR-1) {
+ MSize len = (MSize)lenx;
+ StrHash hash = hash_sparse(g->str.seed, str, len);
+ MSize coll = 0;
+ int hashalg = 0;
+ /* Check if the string has already been interned. */
+ GCobj *o = gcref(g->str.tab[hash & g->str.mask]);
+#if LUAJIT_SECURITY_STRHASH
+ if (LJ_UNLIKELY((uintptr_t)o & 1)) { /* Secondary hash for this chain? */
+ hashalg = 1;
+ hash = hash_dense(g->str.seed, hash, str, len);
+ o = (GCobj *)(gcrefu(g->str.tab[hash & g->str.mask]) & ~(uintptr_t)1);
+ }
#endif
- buf[0] = '0';
- buf[1] = 'x';
- for (i = lasti-1; i >= 2; i--, p >>= 4)
- buf[i] = "0123456789abcdef"[(p & 15)];
- addstr(L, sb, buf, (MSize)lasti);
- break;
+ while (o != NULL) {
+ GCstr *sx = gco2str(o);
+ if (sx->hash == hash && sx->len == len) {
+ if (memcmp(str, strdata(sx), len) == 0) {
+ if (isdead(g, o)) flipwhite(o); /* Resurrect if dead. */
+ return sx; /* Return existing string. */
+ }
+ coll++;
}
- case '%':
- addchar(L, sb, '%');
- break;
- default:
- addchar(L, sb, '%');
- addchar(L, sb, e[1]);
- break;
+ coll++;
+ o = gcnext(o);
+ }
+#if LUAJIT_SECURITY_STRHASH
+ /* Rehash chain if there are too many collisions. */
+ if (LJ_UNLIKELY(coll > LJ_STR_MAXCOLL) && !hashalg) {
+ return lj_str_rehash_chain(L, hash, str, len);
}
- fmt = e+2;
+#endif
+ /* Otherwise allocate a new string. */
+ return lj_str_alloc(L, str, len, hash, hashalg);
+ } else {
+ if (lenx)
+ lj_err_msg(L, LJ_ERR_STROV);
+ return &g->strempty;
}
- addstr(L, sb, fmt, (MSize)strlen(fmt));
- setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n));
- incr_top(L);
- return strVdata(L->top - 1);
}
-/* Push formatted message as a string object to Lua stack. Vararg variant. */
-const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
+void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
{
- const char *msg;
- va_list argp;
- va_start(argp, fmt);
- msg = lj_str_pushvf(L, fmt, argp);
- va_end(argp);
- return msg;
+ g->str.num--;
+ lj_mem_free(g, s, lj_str_size(s->len));
}
-/* -- Buffer handling ----------------------------------------------------- */
-
-char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz)
+void LJ_FASTCALL lj_str_init(lua_State *L)
{
- if (sz > sb->sz) {
- if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF;
- lj_str_resizebuf(L, sb, sz);
- }
- return sb->buf;
+ global_State *g = G(L);
+ g->str.seed = lj_prng_u64(&g->prng);
+ lj_str_resize(L, LJ_MIN_STRTAB-1);
}
diff --git a/src/lj_str.h b/src/lj_str.h
index e304f72f..28edb5a5 100644
--- a/src/lj_str.h
+++ b/src/lj_str.h
@@ -10,41 +10,22 @@
#include "lj_obj.h"
-/* String interning. */
+/* String helpers. */
LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b);
+LJ_FUNC const char *lj_str_find(const char *s, const char *f,
+ MSize slen, MSize flen);
+LJ_FUNC int lj_str_haspattern(GCstr *s);
+
+/* String interning. */
LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask);
LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len);
LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s);
+LJ_FUNC void LJ_FASTCALL lj_str_init(lua_State *L);
+#define lj_str_freetab(g) \
+ (lj_mem_freevec(g, g->str.tab, g->str.mask+1, GCRef))
#define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s)))
#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
-
-/* Type conversions. */
-LJ_FUNC size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o);
-LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k);
-LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np);
-LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k);
-LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o);
-
-#define LJ_STR_INTBUF (1+10)
-#define LJ_STR_NUMBUF LUAI_MAXNUMBER2STR
-
-/* String formatting. */
-LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp);
-LJ_FUNC const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
-#if defined(__GNUC__)
- __attribute__ ((format (printf, 2, 3)))
-#endif
- ;
-
-/* Resizable string buffers. Struct definition in lj_obj.h. */
-LJ_FUNC char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz);
-
-#define lj_str_initbuf(sb) ((sb)->buf = NULL, (sb)->sz = 0)
-#define lj_str_resetbuf(sb) ((sb)->n = 0)
-#define lj_str_resizebuf(L, sb, size) \
- ((sb)->buf = (char *)lj_mem_realloc(L, (sb)->buf, (sb)->sz, (size)), \
- (sb)->sz = (size))
-#define lj_str_freebuf(g, sb) lj_mem_free(g, (void *)(sb)->buf, (sb)->sz)
+#define lj_str_size(len) (sizeof(GCstr) + (((len)+4) & ~(MSize)3))
#endif
diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c
new file mode 100644
index 00000000..5c808290
--- /dev/null
+++ b/src/lj_strfmt.c
@@ -0,0 +1,606 @@
+/*
+** String formatting.
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#include <stdio.h>
+
+#define lj_strfmt_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_err.h"
+#include "lj_buf.h"
+#include "lj_str.h"
+#include "lj_meta.h"
+#include "lj_state.h"
+#include "lj_char.h"
+#include "lj_strfmt.h"
+#if LJ_HASFFI
+#include "lj_ctype.h"
+#endif
+#include "lj_lib.h"
+
+/* -- Format parser ------------------------------------------------------- */
+
+static const uint8_t strfmt_map[('x'-'A')+1] = {
+ STRFMT_A,0,0,0,STRFMT_E,STRFMT_F,STRFMT_G,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0,
+ 0,0,0,0,0,0,
+ STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0,
+ 0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X
+};
+
+SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs)
+{
+ const uint8_t *p = fs->p, *e = fs->e;
+ fs->str = (const char *)p;
+ for (; p < e; p++) {
+ if (*p == '%') { /* Escape char? */
+ if (p[1] == '%') { /* '%%'? */
+ fs->p = ++p+1;
+ goto retlit;
+ } else {
+ SFormat sf = 0;
+ uint32_t c;
+ if (p != (const uint8_t *)fs->str)
+ break;
+ for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) {
+ /* Parse flags. */
+ if (*p == '-') sf |= STRFMT_F_LEFT;
+ else if (*p == '+') sf |= STRFMT_F_PLUS;
+ else if (*p == '0') sf |= STRFMT_F_ZERO;
+ else if (*p == ' ') sf |= STRFMT_F_SPACE;
+ else if (*p == '#') sf |= STRFMT_F_ALT;
+ else break;
+ }
+ if ((uint32_t)*p - '0' < 10) { /* Parse width. */
+ uint32_t width = (uint32_t)*p++ - '0';
+ if ((uint32_t)*p - '0' < 10)
+ width = (uint32_t)*p++ - '0' + width*10;
+ sf |= (width << STRFMT_SH_WIDTH);
+ }
+ if (*p == '.') { /* Parse precision. */
+ uint32_t prec = 0;
+ p++;
+ if ((uint32_t)*p - '0' < 10) {
+ prec = (uint32_t)*p++ - '0';
+ if ((uint32_t)*p - '0' < 10)
+ prec = (uint32_t)*p++ - '0' + prec*10;
+ }
+ sf |= ((prec+1) << STRFMT_SH_PREC);
+ }
+ /* Parse conversion. */
+ c = (uint32_t)*p - 'A';
+ if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) {
+ uint32_t sx = strfmt_map[c];
+ if (sx) {
+ fs->p = p+1;
+ return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER));
+ }
+ }
+ /* Return error location. */
+ if (*p >= 32) p++;
+ fs->len = (MSize)(p - (const uint8_t *)fs->str);
+ fs->p = fs->e;
+ return STRFMT_ERR;
+ }
+ }
+ }
+ fs->p = p;
+retlit:
+ fs->len = (MSize)(p - (const uint8_t *)fs->str);
+ return fs->len ? STRFMT_LIT : STRFMT_EOF;
+}
+
+/* -- Raw conversions ----------------------------------------------------- */
+
+#define WINT_R(x, sh, sc) \
+ { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); }
+
+/* Write integer to buffer. */
+char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k)
+{
+ uint32_t u = (uint32_t)k;
+ if (k < 0) { u = (uint32_t)-k; *p++ = '-'; }
+ if (u < 10000) {
+ if (u < 10) goto dig1;
+ if (u < 100) goto dig2;
+ if (u < 1000) goto dig3;
+ } else {
+ uint32_t v = u / 10000; u -= v * 10000;
+ if (v < 10000) {
+ if (v < 10) goto dig5;
+ if (v < 100) goto dig6;
+ if (v < 1000) goto dig7;
+ } else {
+ uint32_t w = v / 10000; v -= w * 10000;
+ if (w >= 10) WINT_R(w, 10, 10)
+ *p++ = (char)('0'+w);
+ }
+ WINT_R(v, 23, 1000)
+ dig7: WINT_R(v, 12, 100)
+ dig6: WINT_R(v, 10, 10)
+ dig5: *p++ = (char)('0'+v);
+ }
+ WINT_R(u, 23, 1000)
+ dig3: WINT_R(u, 12, 100)
+ dig2: WINT_R(u, 10, 10)
+ dig1: *p++ = (char)('0'+u);
+ return p;
+}
+#undef WINT_R
+
+/* Write pointer to buffer. */
+char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v)
+{
+ ptrdiff_t x = (ptrdiff_t)v;
+ MSize i, n = STRFMT_MAXBUF_PTR;
+ if (x == 0) {
+ *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L';
+ return p;
+ }
+#if LJ_64
+ /* Shorten output for 64 bit pointers. */
+ n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0);
+#endif
+ p[0] = '0';
+ p[1] = 'x';
+ for (i = n-1; i >= 2; i--, x >>= 4)
+ p[i] = "0123456789abcdef"[(x & 15)];
+ return p+n;
+}
+
+/* Write ULEB128 to buffer. */
+char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v)
+{
+ for (; v >= 0x80; v >>= 7)
+ *p++ = (char)((v & 0x7f) | 0x80);
+ *p++ = (char)v;
+ return p;
+}
+
+/* Return string or write number to tmp buffer and return pointer to start. */
+const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp)
+{
+ SBuf *sb;
+ if (tvisstr(o)) {
+ *lenp = strV(o)->len;
+ return strVdata(o);
+ } else if (tvisbuf(o)) {
+ SBufExt *sbx = bufV(o);
+ *lenp = sbufxlen(sbx);
+ return sbx->r;
+ } else if (tvisint(o)) {
+ sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o));
+ } else if (tvisnum(o)) {
+ sb = lj_strfmt_putfnum(lj_buf_tmp_(L), STRFMT_G14, o->n);
+ } else {
+ return NULL;
+ }
+ *lenp = sbuflen(sb);
+ return sb->b;
+}
+
+/* -- Unformatted conversions to buffer ----------------------------------- */
+
+/* Add integer to buffer. */
+SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k)
+{
+ sb->w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k);
+ return sb;
+}
+
+#if LJ_HASJIT
+/* Add number to buffer. */
+SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o)
+{
+ return lj_strfmt_putfnum(sb, STRFMT_G14, o->n);
+}
+#endif
+
+SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v)
+{
+ sb->w = lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v);
+ return sb;
+}
+
+/* Add quoted string to buffer. */
+static SBuf *strfmt_putquotedlen(SBuf *sb, const char *s, MSize len)
+{
+ lj_buf_putb(sb, '"');
+ while (len--) {
+ uint32_t c = (uint32_t)(uint8_t)*s++;
+ char *w = lj_buf_more(sb, 4);
+ if (c == '"' || c == '\\' || c == '\n') {
+ *w++ = '\\';
+ } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */
+ uint32_t d;
+ *w++ = '\\';
+ if (c >= 100 || lj_char_isdigit((uint8_t)*s)) {
+ *w++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100;
+ goto tens;
+ } else if (c >= 10) {
+ tens:
+ d = (c * 205) >> 11; c -= d * 10; *w++ = (char)('0'+d);
+ }
+ c += '0';
+ }
+ *w++ = (char)c;
+ sb->w = w;
+ }
+ lj_buf_putb(sb, '"');
+ return sb;
+}
+
+#if LJ_HASJIT
+SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str)
+{
+ return strfmt_putquotedlen(sb, strdata(str), str->len);
+}
+#endif
+
+/* -- Formatted conversions to buffer ------------------------------------- */
+
+/* Add formatted char to buffer. */
+SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c)
+{
+ MSize width = STRFMT_WIDTH(sf);
+ char *w = lj_buf_more(sb, width > 1 ? width : 1);
+ if ((sf & STRFMT_F_LEFT)) *w++ = (char)c;
+ while (width-- > 1) *w++ = ' ';
+ if (!(sf & STRFMT_F_LEFT)) *w++ = (char)c;
+ sb->w = w;
+ return sb;
+}
+
+/* Add formatted string to buffer. */
+static SBuf *strfmt_putfstrlen(SBuf *sb, SFormat sf, const char *s, MSize len)
+{
+ MSize width = STRFMT_WIDTH(sf);
+ char *w;
+ if (len > STRFMT_PREC(sf)) len = STRFMT_PREC(sf);
+ w = lj_buf_more(sb, width > len ? width : len);
+ if ((sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len);
+ while (width-- > len) *w++ = ' ';
+ if (!(sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len);
+ sb->w = w;
+ return sb;
+}
+
+#if LJ_HASJIT
+SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str)
+{
+ return strfmt_putfstrlen(sb, sf, strdata(str), str->len);
+}
+#endif
+
+/* Add formatted signed/unsigned integer to buffer. */
+SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
+{
+ char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *w;
+#ifdef LUA_USE_ASSERT
+ char *ws;
+#endif
+ MSize prefix = 0, len, prec, pprec, width, need;
+
+ /* Figure out signed prefixes. */
+ if (STRFMT_TYPE(sf) == STRFMT_INT) {
+ if ((int64_t)k < 0) {
+ k = (uint64_t)-(int64_t)k;
+ prefix = 256 + '-';
+ } else if ((sf & STRFMT_F_PLUS)) {
+ prefix = 256 + '+';
+ } else if ((sf & STRFMT_F_SPACE)) {
+ prefix = 256 + ' ';
+ }
+ }
+
+ /* Convert number and store to fixed-size buffer in reverse order. */
+ prec = STRFMT_PREC(sf);
+ if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO;
+ if (k == 0) { /* Special-case zero argument. */
+ if (prec != 0 ||
+ (sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT))
+ *--q = '0';
+ } else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) { /* Decimal. */
+ uint32_t k2;
+ while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; }
+ k2 = (uint32_t)k;
+ do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2);
+ } else if ((sf & STRFMT_T_HEX)) { /* Hex. */
+ const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" :
+ "0123456789abcdef";
+ do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k);
+ if ((sf & STRFMT_F_ALT)) prefix = 512 + ((sf & STRFMT_F_UPPER) ? 'X' : 'x');
+ } else { /* Octal. */
+ do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k);
+ if ((sf & STRFMT_F_ALT)) *--q = '0';
+ }
+
+ /* Calculate sizes. */
+ len = (MSize)(buf + sizeof(buf) - q);
+ if ((int32_t)len >= (int32_t)prec) prec = len;
+ width = STRFMT_WIDTH(sf);
+ pprec = prec + (prefix >> 8);
+ need = width > pprec ? width : pprec;
+ w = lj_buf_more(sb, need);
+#ifdef LUA_USE_ASSERT
+ ws = w;
+#endif
+
+ /* Format number with leading/trailing whitespace and zeros. */
+ if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0)
+ while (width-- > pprec) *w++ = ' ';
+ if (prefix) {
+ if ((char)prefix >= 'X') *w++ = '0';
+ *w++ = (char)prefix;
+ }
+ if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO)
+ while (width-- > pprec) *w++ = '0';
+ while (prec-- > len) *w++ = '0';
+ while (q < buf + sizeof(buf)) *w++ = *q++; /* Add number itself. */
+ if ((sf & STRFMT_F_LEFT))
+ while (width-- > pprec) *w++ = ' ';
+
+ lj_assertX(need == (MSize)(w - ws), "miscalculated format size");
+ sb->w = w;
+ return sb;
+}
+
+/* Add number formatted as signed integer to buffer. */
+SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
+{
+ int64_t k = (int64_t)n;
+ if (checki32(k) && sf == STRFMT_INT)
+ return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */
+ else
+ return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
+}
+
+/* Add number formatted as unsigned integer to buffer. */
+SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n)
+{
+ int64_t k;
+ if (n >= 9223372036854775808.0)
+ k = (int64_t)(n - 18446744073709551616.0);
+ else
+ k = (int64_t)n;
+ return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
+}
+
+/* Format stack arguments to buffer. */
+int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry)
+{
+ int narg = (int)(L->top - L->base);
+ GCstr *fmt = lj_lib_checkstr(L, arg);
+ FormatState fs;
+ SFormat sf;
+ lj_strfmt_init(&fs, strdata(fmt), fmt->len);
+ while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
+ if (sf == STRFMT_LIT) {
+ lj_buf_putmem(sb, fs.str, fs.len);
+ } else if (sf == STRFMT_ERR) {
+ lj_err_callerv(L, LJ_ERR_STRFMT,
+ strdata(lj_str_new(L, fs.str, fs.len)));
+ } else {
+ TValue *o = &L->base[arg++];
+ if (arg > narg)
+ lj_err_arg(L, arg, LJ_ERR_NOVAL);
+ switch (STRFMT_TYPE(sf)) {
+ case STRFMT_INT:
+ if (tvisint(o)) {
+ int32_t k = intV(o);
+ if (sf == STRFMT_INT)
+ lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */
+ else
+ lj_strfmt_putfxint(sb, sf, k);
+ break;
+ }
+#if LJ_HASFFI
+ if (tviscdata(o)) {
+ GCcdata *cd = cdataV(o);
+ if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) {
+ lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd));
+ break;
+ }
+ }
+#endif
+ lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg));
+ break;
+ case STRFMT_UINT:
+ if (tvisint(o)) {
+ lj_strfmt_putfxint(sb, sf, intV(o));
+ break;
+ }
+#if LJ_HASFFI
+ if (tviscdata(o)) {
+ GCcdata *cd = cdataV(o);
+ if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) {
+ lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd));
+ break;
+ }
+ }
+#endif
+ lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg));
+ break;
+ case STRFMT_NUM:
+ lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg));
+ break;
+ case STRFMT_STR: {
+ MSize len;
+ const char *s;
+ cTValue *mo;
+ if (LJ_UNLIKELY(!tvisstr(o) && !tvisbuf(o)) && retry >= 0 &&
+ !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) {
+ /* Call __tostring metamethod once. */
+ copyTV(L, L->top++, mo);
+ copyTV(L, L->top++, o);
+ lua_call(L, 1, 1);
+ o = &L->base[arg-1]; /* Stack may have been reallocated. */
+ copyTV(L, o, --L->top); /* Replace inline for retry. */
+ if (retry < 2) { /* Global buffer may have been overwritten. */
+ retry = 1;
+ break;
+ }
+ }
+ if (LJ_LIKELY(tvisstr(o))) {
+ len = strV(o)->len;
+ s = strVdata(o);
+#if LJ_HASBUFFER
+ } else if (tvisbuf(o)) {
+ SBufExt *sbx = bufV(o);
+ if (sbx == (SBufExt *)sb) lj_err_arg(L, arg+1, LJ_ERR_BUFFER_SELF);
+ len = sbufxlen(sbx);
+ s = sbx->r;
+#endif
+ } else {
+ GCstr *str = lj_strfmt_obj(L, o);
+ len = str->len;
+ s = strdata(str);
+ }
+ if ((sf & STRFMT_T_QUOTED))
+ strfmt_putquotedlen(sb, s, len); /* No formatting. */
+ else
+ strfmt_putfstrlen(sb, sf, s, len);
+ break;
+ }
+ case STRFMT_CHAR:
+ lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg));
+ break;
+ case STRFMT_PTR: /* No formatting. */
+ lj_strfmt_putptr(sb, lj_obj_ptr(G(L), o));
+ break;
+ default:
+ lj_assertL(0, "bad string format type");
+ break;
+ }
+ }
+ }
+ return retry;
+}
+
+/* -- Conversions to strings ---------------------------------------------- */
+
+/* Convert integer to string. */
+GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k)
+{
+ char buf[STRFMT_MAXBUF_INT];
+ MSize len = (MSize)(lj_strfmt_wint(buf, k) - buf);
+ return lj_str_new(L, buf, len);
+}
+
+/* Convert integer or number to string. */
+GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o)
+{
+ return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o);
+}
+
+#if LJ_HASJIT
+/* Convert char value to string. */
+GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c)
+{
+ char buf[1];
+ buf[0] = c;
+ return lj_str_new(L, buf, 1);
+}
+#endif
+
+/* Raw conversion of object to string. */
+GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o)
+{
+ if (tvisstr(o)) {
+ return strV(o);
+ } else if (tvisnumber(o)) {
+ return lj_strfmt_number(L, o);
+ } else if (tvisnil(o)) {
+ return lj_str_newlit(L, "nil");
+ } else if (tvisfalse(o)) {
+ return lj_str_newlit(L, "false");
+ } else if (tvistrue(o)) {
+ return lj_str_newlit(L, "true");
+ } else {
+ char buf[8+2+2+16], *p = buf;
+ p = lj_buf_wmem(p, lj_typename(o), (MSize)strlen(lj_typename(o)));
+ *p++ = ':'; *p++ = ' ';
+ if (tvisfunc(o) && isffunc(funcV(o))) {
+ p = lj_buf_wmem(p, "builtin#", 8);
+ p = lj_strfmt_wint(p, funcV(o)->c.ffid);
+ } else {
+ p = lj_strfmt_wptr(p, lj_obj_ptr(G(L), o));
+ }
+ return lj_str_new(L, buf, (size_t)(p - buf));
+ }
+}
+
+/* -- Internal string formatting ------------------------------------------ */
+
+/*
+** These functions are only used for lua_pushfstring(), lua_pushvfstring()
+** and for internal string formatting (e.g. error messages). Caveat: unlike
+** string.format(), only a limited subset of formats and flags are supported!
+**
+** LuaJIT has support for a couple more formats than Lua 5.1/5.2:
+** - %d %u %o %x with full formatting, 32 bit integers only.
+** - %f and other FP formats are really %.14g.
+** - %s %c %p without formatting.
+*/
+
+/* Push formatted message as a string object to Lua stack. va_list variant. */
+const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp)
+{
+ SBuf *sb = lj_buf_tmp_(L);
+ FormatState fs;
+ SFormat sf;
+ GCstr *str;
+ lj_strfmt_init(&fs, fmt, (MSize)strlen(fmt));
+ while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) {
+ switch (STRFMT_TYPE(sf)) {
+ case STRFMT_LIT:
+ lj_buf_putmem(sb, fs.str, fs.len);
+ break;
+ case STRFMT_INT:
+ lj_strfmt_putfxint(sb, sf, va_arg(argp, int32_t));
+ break;
+ case STRFMT_UINT:
+ lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t));
+ break;
+ case STRFMT_NUM:
+ lj_strfmt_putfnum(sb, STRFMT_G14, va_arg(argp, lua_Number));
+ break;
+ case STRFMT_STR: {
+ const char *s = va_arg(argp, char *);
+ if (s == NULL) s = "(null)";
+ lj_buf_putmem(sb, s, (MSize)strlen(s));
+ break;
+ }
+ case STRFMT_CHAR:
+ lj_buf_putb(sb, va_arg(argp, int));
+ break;
+ case STRFMT_PTR:
+ lj_strfmt_putptr(sb, va_arg(argp, void *));
+ break;
+ case STRFMT_ERR:
+ default:
+ lj_buf_putb(sb, '?');
+ lj_assertL(0, "bad string format near offset %d", fs.len);
+ break;
+ }
+ }
+ str = lj_buf_str(L, sb);
+ setstrV(L, L->top, str);
+ incr_top(L);
+ return strdata(str);
+}
+
+/* Push formatted message as a string object to Lua stack. Vararg variant. */
+const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
+{
+ const char *msg;
+ va_list argp;
+ va_start(argp, fmt);
+ msg = lj_strfmt_pushvf(L, fmt, argp);
+ va_end(argp);
+ return msg;
+}
+
diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h
new file mode 100644
index 00000000..a4529604
--- /dev/null
+++ b/src/lj_strfmt.h
@@ -0,0 +1,131 @@
+/*
+** String formatting.
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_STRFMT_H
+#define _LJ_STRFMT_H
+
+#include "lj_obj.h"
+
+typedef uint32_t SFormat; /* Format indicator. */
+
+/* Format parser state. */
+typedef struct FormatState {
+ const uint8_t *p; /* Current format string pointer. */
+ const uint8_t *e; /* End of format string. */
+ const char *str; /* Returned literal string. */
+ MSize len; /* Size of literal string. */
+} FormatState;
+
+/* Format types (max. 16). */
+typedef enum FormatType {
+ STRFMT_EOF, STRFMT_ERR, STRFMT_LIT,
+ STRFMT_INT, STRFMT_UINT, STRFMT_NUM, STRFMT_STR, STRFMT_CHAR, STRFMT_PTR
+} FormatType;
+
+/* Format subtypes (bits are reused). */
+#define STRFMT_T_HEX 0x0010 /* STRFMT_UINT */
+#define STRFMT_T_OCT 0x0020 /* STRFMT_UINT */
+#define STRFMT_T_FP_A 0x0000 /* STRFMT_NUM */
+#define STRFMT_T_FP_E 0x0010 /* STRFMT_NUM */
+#define STRFMT_T_FP_F 0x0020 /* STRFMT_NUM */
+#define STRFMT_T_FP_G 0x0030 /* STRFMT_NUM */
+#define STRFMT_T_QUOTED 0x0010 /* STRFMT_STR */
+
+/* Format flags. */
+#define STRFMT_F_LEFT 0x0100
+#define STRFMT_F_PLUS 0x0200
+#define STRFMT_F_ZERO 0x0400
+#define STRFMT_F_SPACE 0x0800
+#define STRFMT_F_ALT 0x1000
+#define STRFMT_F_UPPER 0x2000
+
+/* Format indicator fields. */
+#define STRFMT_SH_WIDTH 16
+#define STRFMT_SH_PREC 24
+
+#define STRFMT_TYPE(sf) ((FormatType)((sf) & 15))
+#define STRFMT_WIDTH(sf) (((sf) >> STRFMT_SH_WIDTH) & 255u)
+#define STRFMT_PREC(sf) ((((sf) >> STRFMT_SH_PREC) & 255u) - 1u)
+#define STRFMT_FP(sf) (((sf) >> 4) & 3)
+
+/* Formats for conversion characters. */
+#define STRFMT_A (STRFMT_NUM|STRFMT_T_FP_A)
+#define STRFMT_C (STRFMT_CHAR)
+#define STRFMT_D (STRFMT_INT)
+#define STRFMT_E (STRFMT_NUM|STRFMT_T_FP_E)
+#define STRFMT_F (STRFMT_NUM|STRFMT_T_FP_F)
+#define STRFMT_G (STRFMT_NUM|STRFMT_T_FP_G)
+#define STRFMT_I STRFMT_D
+#define STRFMT_O (STRFMT_UINT|STRFMT_T_OCT)
+#define STRFMT_P (STRFMT_PTR)
+#define STRFMT_Q (STRFMT_STR|STRFMT_T_QUOTED)
+#define STRFMT_S (STRFMT_STR)
+#define STRFMT_U (STRFMT_UINT)
+#define STRFMT_X (STRFMT_UINT|STRFMT_T_HEX)
+#define STRFMT_G14 (STRFMT_G | ((14+1) << STRFMT_SH_PREC))
+
+/* Maximum buffer sizes for conversions. */
+#define STRFMT_MAXBUF_XINT (1+22) /* '0' prefix + uint64_t in octal. */
+#define STRFMT_MAXBUF_INT (1+10) /* Sign + int32_t in decimal. */
+#define STRFMT_MAXBUF_NUM 32 /* Must correspond with STRFMT_G14. */
+#define STRFMT_MAXBUF_PTR (2+2*sizeof(ptrdiff_t)) /* "0x" + hex ptr. */
+
+/* Format parser. */
+LJ_FUNC SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs);
+
+static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len)
+{
+ fs->p = (const uint8_t *)p;
+ fs->e = (const uint8_t *)p + len;
+ /* Must be NUL-terminated. May have NULs inside, too. */
+ lj_assertX(*fs->e == 0, "format not NUL-terminated");
+}
+
+/* Raw conversions. */
+LJ_FUNC char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k);
+LJ_FUNC char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v);
+LJ_FUNC char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v);
+LJ_FUNC const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp);
+
+/* Unformatted conversions to buffer. */
+LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k);
+#if LJ_HASJIT
+LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o);
+#endif
+LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v);
+#if LJ_HASJIT
+LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str);
+#endif
+
+/* Formatted conversions to buffer. */
+LJ_FUNC SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k);
+LJ_FUNC SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n);
+LJ_FUNC SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n);
+LJ_FUNC SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat, lua_Number n);
+LJ_FUNC SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat, int32_t c);
+#if LJ_HASJIT
+LJ_FUNC SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat, GCstr *str);
+#endif
+LJ_FUNC int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry);
+
+/* Conversions to strings. */
+LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k);
+LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o);
+LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o);
+#if LJ_HASJIT
+LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c);
+#endif
+LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o);
+
+/* Internal string formatting. */
+LJ_FUNC const char *lj_strfmt_pushvf(lua_State *L, const char *fmt,
+ va_list argp);
+LJ_FUNC const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...)
+#if defined(__GNUC__) || defined(__clang__)
+ __attribute__ ((format (printf, 2, 3)))
+#endif
+ ;
+
+#endif
diff --git a/src/lj_strfmt_num.c b/src/lj_strfmt_num.c
new file mode 100644
index 00000000..3c60695c
--- /dev/null
+++ b/src/lj_strfmt_num.c
@@ -0,0 +1,592 @@
+/*
+** String formatting for floating-point numbers.
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+** Contributed by Peter Cawley.
+*/
+
+#include <stdio.h>
+
+#define lj_strfmt_num_c
+#define LUA_CORE
+
+#include "lj_obj.h"
+#include "lj_buf.h"
+#include "lj_str.h"
+#include "lj_strfmt.h"
+
+/* -- Precomputed tables -------------------------------------------------- */
+
+/* Rescale factors to push the exponent of a number towards zero. */
+#define RESCALE_EXPONENTS(P, N) \
+ P(308), P(289), P(270), P(250), P(231), P(212), P(193), P(173), P(154), \
+ P(135), P(115), P(96), P(77), P(58), P(38), P(0), P(0), P(0), N(39), N(58), \
+ N(77), N(96), N(116), N(135), N(154), N(174), N(193), N(212), N(231), \
+ N(251), N(270), N(289)
+
+#define ONE_E_P(X) 1e+0 ## X
+#define ONE_E_N(X) 1e-0 ## X
+static const int16_t rescale_e[] = { RESCALE_EXPONENTS(-, +) };
+static const double rescale_n[] = { RESCALE_EXPONENTS(ONE_E_P, ONE_E_N) };
+#undef ONE_E_N
+#undef ONE_E_P
+
+/*
+** For p in range -70 through 57, this table encodes pairs (m, e) such that
+** 4*2^p <= (uint8_t)m*10^e, and is the smallest value for which this holds.
+*/
+static const int8_t four_ulp_m_e[] = {
+ 34, -21, 68, -21, 14, -20, 28, -20, 55, -20, 2, -19, 3, -19, 5, -19, 9, -19,
+ -82, -18, 35, -18, 7, -17, -117, -17, 28, -17, 56, -17, 112, -16, -33, -16,
+ 45, -16, 89, -16, -78, -15, 36, -15, 72, -15, -113, -14, 29, -14, 57, -14,
+ 114, -13, -28, -13, 46, -13, 91, -12, -74, -12, 37, -12, 73, -12, 15, -11, 3,
+ -11, 59, -11, 2, -10, 3, -10, 5, -10, 1, -9, -69, -9, 38, -9, 75, -9, 15, -7,
+ 3, -7, 6, -7, 12, -6, -17, -7, 48, -7, 96, -7, -65, -6, 39, -6, 77, -6, -103,
+ -5, 31, -5, 62, -5, 123, -4, -11, -4, 49, -4, 98, -4, -60, -3, 4, -2, 79, -3,
+ 16, -2, 32, -2, 63, -2, 2, -1, 25, 0, 5, 1, 1, 2, 2, 2, 4, 2, 8, 2, 16, 2,
+ 32, 2, 64, 2, -128, 2, 26, 2, 52, 2, 103, 3, -51, 3, 41, 4, 82, 4, -92, 4,
+ 33, 4, 66, 4, -124, 5, 27, 5, 53, 5, 105, 6, 21, 6, 42, 6, 84, 6, 17, 7, 34,
+ 7, 68, 7, 2, 8, 3, 8, 6, 8, 108, 9, -41, 9, 43, 10, 86, 9, -84, 10, 35, 10,
+ 69, 10, -118, 11, 28, 11, 55, 12, 11, 13, 22, 13, 44, 13, 88, 13, -80, 13,
+ 36, 13, 71, 13, -115, 14, 29, 14, 57, 14, 113, 15, -30, 15, 46, 15, 91, 15,
+ 19, 16, 37, 16, 73, 16, 2, 17, 3, 17, 6, 17
+};
+
+/* min(2^32-1, 10^e-1) for e in range 0 through 10 */
+static uint32_t ndigits_dec_threshold[] = {
+ 0, 9U, 99U, 999U, 9999U, 99999U, 999999U,
+ 9999999U, 99999999U, 999999999U, 0xffffffffU
+};
+
+/* -- Helper functions ---------------------------------------------------- */
+
+/* Compute the number of digits in the decimal representation of x. */
+static MSize ndigits_dec(uint32_t x)
+{
+ MSize t = ((lj_fls(x | 1) * 77) >> 8) + 1; /* 2^8/77 is roughly log2(10) */
+ return t + (x > ndigits_dec_threshold[t]);
+}
+
+#define WINT_R(x, sh, sc) \
+ { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); }
+
+/* Write 9-digit unsigned integer to buffer. */
+static char *lj_strfmt_wuint9(char *p, uint32_t u)
+{
+ uint32_t v = u / 10000, w;
+ u -= v * 10000;
+ w = v / 10000;
+ v -= w * 10000;
+ *p++ = (char)('0'+w);
+ WINT_R(v, 23, 1000)
+ WINT_R(v, 12, 100)
+ WINT_R(v, 10, 10)
+ *p++ = (char)('0'+v);
+ WINT_R(u, 23, 1000)
+ WINT_R(u, 12, 100)
+ WINT_R(u, 10, 10)
+ *p++ = (char)('0'+u);
+ return p;
+}
+#undef WINT_R
+
+/* -- Extended precision arithmetic --------------------------------------- */
+
+/*
+** The "nd" format is a fixed-precision decimal representation for numbers. It
+** consists of up to 64 uint32_t values, with each uint32_t storing a value
+** in the range [0, 1e9). A number in "nd" format consists of three variables:
+**
+** uint32_t nd[64];
+** uint32_t ndlo;
+** uint32_t ndhi;
+**
+** The integral part of the number is stored in nd[0 ... ndhi], the value of
+** which is sum{i in [0, ndhi] | nd[i] * 10^(9*i)}. If the fractional part of
+** the number is zero, ndlo is zero. Otherwise, the fractional part is stored
+** in nd[ndlo ... 63], the value of which is taken to be
+** sum{i in [ndlo, 63] | nd[i] * 10^(9*(i-64))}.
+**
+** If the array part had 128 elements rather than 64, then every double would
+** have an exact representation in "nd" format. With 64 elements, all integral
+** doubles have an exact representation, and all non-integral doubles have
+** enough digits to make both %.99e and %.99f do the right thing.
+*/
+
+#if LJ_64
+#define ND_MUL2K_MAX_SHIFT 29
+#define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) / 1000000000))
+#else
+#define ND_MUL2K_MAX_SHIFT 11
+#define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) >> 9) / 1953125)
+#endif
+
+/* Multiply nd by 2^k and add carry_in (ndlo is assumed to be zero). */
+static uint32_t nd_mul2k(uint32_t* nd, uint32_t ndhi, uint32_t k,
+ uint32_t carry_in, SFormat sf)
+{
+ uint32_t i, ndlo = 0, start = 1;
+ /* Performance hacks. */
+ if (k > ND_MUL2K_MAX_SHIFT*2 && STRFMT_FP(sf) != STRFMT_FP(STRFMT_T_FP_F)) {
+ start = ndhi - (STRFMT_PREC(sf) + 17) / 8;
+ }
+ /* Real logic. */
+ while (k >= ND_MUL2K_MAX_SHIFT) {
+ for (i = ndlo; i <= ndhi; i++) {
+ uint64_t val = ((uint64_t)nd[i] << ND_MUL2K_MAX_SHIFT) | carry_in;
+ carry_in = ND_MUL2K_DIV1E9(val);
+ nd[i] = (uint32_t)val - carry_in * 1000000000;
+ }
+ if (carry_in) {
+ nd[++ndhi] = carry_in; carry_in = 0;
+ if (start++ == ndlo) ++ndlo;
+ }
+ k -= ND_MUL2K_MAX_SHIFT;
+ }
+ if (k) {
+ for (i = ndlo; i <= ndhi; i++) {
+ uint64_t val = ((uint64_t)nd[i] << k) | carry_in;
+ carry_in = ND_MUL2K_DIV1E9(val);
+ nd[i] = (uint32_t)val - carry_in * 1000000000;
+ }
+ if (carry_in) nd[++ndhi] = carry_in;
+ }
+ return ndhi;
+}
+
+/* Divide nd by 2^k (ndlo is assumed to be zero). */
+static uint32_t nd_div2k(uint32_t* nd, uint32_t ndhi, uint32_t k, SFormat sf)
+{
+ uint32_t ndlo = 0, stop1 = ~0, stop2 = ~0;
+ /* Performance hacks. */
+ if (!ndhi) {
+ if (!nd[0]) {
+ return 0;
+ } else {
+ uint32_t s = lj_ffs(nd[0]);
+ if (s >= k) { nd[0] >>= k; return 0; }
+ nd[0] >>= s; k -= s;
+ }
+ }
+ if (k > 18) {
+ if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_F)) {
+ stop1 = 63 - (int32_t)STRFMT_PREC(sf) / 9;
+ } else {
+ int32_t floorlog2 = ndhi * 29 + lj_fls(nd[ndhi]) - k;
+ int32_t floorlog10 = (int32_t)(floorlog2 * 0.30102999566398114);
+ stop1 = 62 + (floorlog10 - (int32_t)STRFMT_PREC(sf)) / 9;
+ stop2 = 61 + ndhi - (int32_t)STRFMT_PREC(sf) / 8;
+ }
+ }
+ /* Real logic. */
+ while (k >= 9) {
+ uint32_t i = ndhi, carry = 0;
+ for (;;) {
+ uint32_t val = nd[i];
+ nd[i] = (val >> 9) + carry;
+ carry = (val & 0x1ff) * 1953125;
+ if (i == ndlo) break;
+ i = (i - 1) & 0x3f;
+ }
+ if (ndlo != stop1 && ndlo != stop2) {
+ if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; }
+ if (!nd[ndhi]) { ndhi = (ndhi - 1) & 0x3f; stop2--; }
+ } else if (!nd[ndhi]) {
+ if (ndhi != ndlo) { ndhi = (ndhi - 1) & 0x3f; stop2--; }
+ else return ndlo;
+ }
+ k -= 9;
+ }
+ if (k) {
+ uint32_t mask = (1U << k) - 1, mul = 1000000000 >> k, i = ndhi, carry = 0;
+ for (;;) {
+ uint32_t val = nd[i];
+ nd[i] = (val >> k) + carry;
+ carry = (val & mask) * mul;
+ if (i == ndlo) break;
+ i = (i - 1) & 0x3f;
+ }
+ if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; }
+ }
+ return ndlo;
+}
+
+/* Add m*10^e to nd (assumes ndlo <= e/9 <= ndhi and 0 <= m <= 9). */
+static uint32_t nd_add_m10e(uint32_t* nd, uint32_t ndhi, uint8_t m, int32_t e)
+{
+ uint32_t i, carry;
+ if (e >= 0) {
+ i = (uint32_t)e/9;
+ carry = m * (ndigits_dec_threshold[e - (int32_t)i*9] + 1);
+ } else {
+ int32_t f = (e-8)/9;
+ i = (uint32_t)(64 + f);
+ carry = m * (ndigits_dec_threshold[e - f*9] + 1);
+ }
+ for (;;) {
+ uint32_t val = nd[i] + carry;
+ if (LJ_UNLIKELY(val >= 1000000000)) {
+ val -= 1000000000;
+ nd[i] = val;
+ if (LJ_UNLIKELY(i == ndhi)) {
+ ndhi = (ndhi + 1) & 0x3f;
+ nd[ndhi] = 1;
+ break;
+ }
+ carry = 1;
+ i = (i + 1) & 0x3f;
+ } else {
+ nd[i] = val;
+ break;
+ }
+ }
+ return ndhi;
+}
+
+/* Test whether two "nd" values are equal in their most significant digits. */
+static int nd_similar(uint32_t* nd, uint32_t ndhi, uint32_t* ref, MSize hilen,
+ MSize prec)
+{
+ char nd9[9], ref9[9];
+ if (hilen <= prec) {
+ if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0;
+ prec -= hilen; ref--; ndhi = (ndhi - 1) & 0x3f;
+ if (prec >= 9) {
+ if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0;
+ prec -= 9; ref--; ndhi = (ndhi - 1) & 0x3f;
+ }
+ } else {
+ prec -= hilen - 9;
+ }
+ lj_assertX(prec < 9, "bad precision %d", prec);
+ lj_strfmt_wuint9(nd9, nd[ndhi]);
+ lj_strfmt_wuint9(ref9, *ref);
+ return !memcmp(nd9, ref9, prec) && (nd9[prec] < '5') == (ref9[prec] < '5');
+}
+
+/* -- Formatted conversions to buffer ------------------------------------- */
+
+/* Write formatted floating-point number to either sb or p. */
+static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p)
+{
+ MSize width = STRFMT_WIDTH(sf), prec = STRFMT_PREC(sf), len;
+ TValue t;
+ t.n = n;
+ if (LJ_UNLIKELY((t.u32.hi << 1) >= 0xffe00000)) {
+ /* Handle non-finite values uniformly for %a, %e, %f, %g. */
+ int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0;
+ if (((t.u32.hi & 0x000fffff) | t.u32.lo) != 0) {
+ ch ^= ('n' << 16) | ('a' << 8) | 'n';
+ if ((sf & STRFMT_F_SPACE)) prefix = ' ';
+ } else {
+ ch ^= ('i' << 16) | ('n' << 8) | 'f';
+ if ((t.u32.hi & 0x80000000)) prefix = '-';
+ else if ((sf & STRFMT_F_PLUS)) prefix = '+';
+ else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
+ }
+ len = 3 + (prefix != 0);
+ if (!p) p = lj_buf_more(sb, width > len ? width : len);
+ if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
+ if (prefix) *p++ = prefix;
+ *p++ = (char)(ch >> 16); *p++ = (char)(ch >> 8); *p++ = (char)ch;
+ } else if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_A)) {
+ /* %a */
+ const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEFPX"
+ : "0123456789abcdefpx";
+ int32_t e = (t.u32.hi >> 20) & 0x7ff;
+ char prefix = 0, eprefix = '+';
+ if (t.u32.hi & 0x80000000) prefix = '-';
+ else if ((sf & STRFMT_F_PLUS)) prefix = '+';
+ else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
+ t.u32.hi &= 0xfffff;
+ if (e) {
+ t.u32.hi |= 0x100000;
+ e -= 1023;
+ } else if (t.u32.lo | t.u32.hi) {
+ /* Non-zero denormal - normalise it. */
+ uint32_t shift = t.u32.hi ? 20-lj_fls(t.u32.hi) : 52-lj_fls(t.u32.lo);
+ e = -1022 - shift;
+ t.u64 <<= shift;
+ }
+ /* abs(n) == t.u64 * 2^(e - 52) */
+ /* If n != 0, bit 52 of t.u64 is set, and is the highest set bit. */
+ if ((int32_t)prec < 0) {
+ /* Default precision: use smallest precision giving exact result. */
+ prec = t.u32.lo ? 13-lj_ffs(t.u32.lo)/4 : 5-lj_ffs(t.u32.hi|0x100000)/4;
+ } else if (prec < 13) {
+ /* Precision is sufficiently low as to maybe require rounding. */
+ t.u64 += (((uint64_t)1) << (51 - prec*4));
+ }
+ if (e < 0) {
+ eprefix = '-';
+ e = -e;
+ }
+ len = 5 + ndigits_dec((uint32_t)e) + prec + (prefix != 0)
+ + ((prec | (sf & STRFMT_F_ALT)) != 0);
+ if (!p) p = lj_buf_more(sb, width > len ? width : len);
+ if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) {
+ while (width-- > len) *p++ = ' ';
+ }
+ if (prefix) *p++ = prefix;
+ *p++ = '0';
+ *p++ = hexdig[17]; /* x or X */
+ if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) {
+ while (width-- > len) *p++ = '0';
+ }
+ *p++ = '0' + (t.u32.hi >> 20); /* Usually '1', sometimes '0' or '2'. */
+ if ((prec | (sf & STRFMT_F_ALT))) {
+ /* Emit fractional part. */
+ char *q = p + 1 + prec;
+ *p = '.';
+ if (prec < 13) t.u64 >>= (52 - prec*4);
+ else while (prec > 13) p[prec--] = '0';
+ while (prec) { p[prec--] = hexdig[t.u64 & 15]; t.u64 >>= 4; }
+ p = q;
+ }
+ *p++ = hexdig[16]; /* p or P */
+ *p++ = eprefix; /* + or - */
+ p = lj_strfmt_wint(p, e);
+ } else {
+ /* %e or %f or %g - begin by converting n to "nd" format. */
+ uint32_t nd[64];
+ uint32_t ndhi = 0, ndlo, i;
+ int32_t e = (t.u32.hi >> 20) & 0x7ff, ndebias = 0;
+ char prefix = 0, *q;
+ if (t.u32.hi & 0x80000000) prefix = '-';
+ else if ((sf & STRFMT_F_PLUS)) prefix = '+';
+ else if ((sf & STRFMT_F_SPACE)) prefix = ' ';
+ prec += ((int32_t)prec >> 31) & 7; /* Default precision is 6. */
+ if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_G)) {
+ /* %g - decrement precision if non-zero (to make it like %e). */
+ prec--;
+ prec ^= (uint32_t)((int32_t)prec >> 31);
+ }
+ if ((sf & STRFMT_T_FP_E) && prec < 14 && n != 0) {
+ /* Precision is sufficiently low that rescaling will probably work. */
+ if ((ndebias = rescale_e[e >> 6])) {
+ t.n = n * rescale_n[e >> 6];
+ if (LJ_UNLIKELY(!e)) t.n *= 1e10, ndebias -= 10;
+ t.u64 -= 2; /* Convert 2ulp below (later we convert 2ulp above). */
+ nd[0] = 0x100000 | (t.u32.hi & 0xfffff);
+ e = ((t.u32.hi >> 20) & 0x7ff) - 1075 - (ND_MUL2K_MAX_SHIFT < 29);
+ goto load_t_lo; rescale_failed:
+ t.n = n;
+ e = (t.u32.hi >> 20) & 0x7ff;
+ ndebias = ndhi = 0;
+ }
+ }
+ nd[0] = t.u32.hi & 0xfffff;
+ if (e == 0) e++; else nd[0] |= 0x100000;
+ e -= 1043;
+ if (t.u32.lo) {
+ e -= 32 + (ND_MUL2K_MAX_SHIFT < 29); load_t_lo:
+#if ND_MUL2K_MAX_SHIFT >= 29
+ nd[0] = (nd[0] << 3) | (t.u32.lo >> 29);
+ ndhi = nd_mul2k(nd, ndhi, 29, t.u32.lo & 0x1fffffff, sf);
+#elif ND_MUL2K_MAX_SHIFT >= 11
+ ndhi = nd_mul2k(nd, ndhi, 11, t.u32.lo >> 21, sf);
+ ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo >> 10) & 0x7ff, sf);
+ ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo << 1) & 0x7ff, sf);
+#else
+#error "ND_MUL2K_MAX_SHIFT too small"
+#endif
+ }
+ if (e >= 0) {
+ ndhi = nd_mul2k(nd, ndhi, (uint32_t)e, 0, sf);
+ ndlo = 0;
+ } else {
+ ndlo = nd_div2k(nd, ndhi, (uint32_t)-e, sf);
+ if (ndhi && !nd[ndhi]) ndhi--;
+ }
+ /* abs(n) == nd * 10^ndebias (for slightly loose interpretation of ==) */
+ if ((sf & STRFMT_T_FP_E)) {
+ /* %e or %g - assume %e and start by calculating nd's exponent (nde). */
+ char eprefix = '+';
+ int32_t nde = -1;
+ MSize hilen;
+ if (ndlo && !nd[ndhi]) {
+ ndhi = 64; do {} while (!nd[--ndhi]);
+ nde -= 64 * 9;
+ }
+ hilen = ndigits_dec(nd[ndhi]);
+ nde += ndhi * 9 + hilen;
+ if (ndebias) {
+ /*
+ ** Rescaling was performed, but this introduced some error, and might
+ ** have pushed us across a rounding boundary. We check whether this
+ ** error affected the result by introducing even more error (2ulp in
+ ** either direction), and seeing whether a rounding boundary was
+ ** crossed. Having already converted the -2ulp case, we save off its
+ ** most significant digits, convert the +2ulp case, and compare them.
+ */
+ int32_t eidx = e + 70 + (ND_MUL2K_MAX_SHIFT < 29)
+ + (t.u32.lo >= 0xfffffffe && !(~t.u32.hi << 12));
+ const int8_t *m_e = four_ulp_m_e + eidx * 2;
+ lj_assertG_(G(sbufL(sb)), 0 <= eidx && eidx < 128, "bad eidx %d", eidx);
+ nd[33] = nd[ndhi];
+ nd[32] = nd[(ndhi - 1) & 0x3f];
+ nd[31] = nd[(ndhi - 2) & 0x3f];
+ nd_add_m10e(nd, ndhi, (uint8_t)*m_e, m_e[1]);
+ if (LJ_UNLIKELY(!nd_similar(nd, ndhi, nd + 33, hilen, prec + 1))) {
+ goto rescale_failed;
+ }
+ }
+ if ((int32_t)(prec - nde) < (0x3f & -(int32_t)ndlo) * 9) {
+ /* Precision is sufficiently low as to maybe require rounding. */
+ ndhi = nd_add_m10e(nd, ndhi, 5, nde - prec - 1);
+ nde += (hilen != ndigits_dec(nd[ndhi]));
+ }
+ nde += ndebias;
+ if ((sf & STRFMT_T_FP_F)) {
+ /* %g */
+ if ((int32_t)prec >= nde && nde >= -4) {
+ if (nde < 0) ndhi = 0;
+ prec -= nde;
+ goto g_format_like_f;
+ } else if (!(sf & STRFMT_F_ALT) && prec && width > 5) {
+ /* Decrease precision in order to strip trailing zeroes. */
+ char tail[9];
+ uint32_t maxprec = hilen - 1 + ((ndhi - ndlo) & 0x3f) * 9;
+ if (prec >= maxprec) prec = maxprec;
+ else ndlo = (ndhi - (((int32_t)(prec - hilen) + 9) / 9)) & 0x3f;
+ i = prec - hilen - (((ndhi - ndlo) & 0x3f) * 9) + 10;
+ lj_strfmt_wuint9(tail, nd[ndlo]);
+ while (prec && tail[--i] == '0') {
+ prec--;
+ if (!i) {
+ if (ndlo == ndhi) { prec = 0; break; }
+ lj_strfmt_wuint9(tail, nd[++ndlo]);
+ i = 9;
+ }
+ }
+ }
+ }
+ if (nde < 0) {
+ /* Make nde non-negative. */
+ eprefix = '-';
+ nde = -nde;
+ }
+ len = 3 + prec + (prefix != 0) + ndigits_dec((uint32_t)nde) + (nde < 10)
+ + ((prec | (sf & STRFMT_F_ALT)) != 0);
+ if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 5);
+ if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) {
+ while (width-- > len) *p++ = ' ';
+ }
+ if (prefix) *p++ = prefix;
+ if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) {
+ while (width-- > len) *p++ = '0';
+ }
+ q = lj_strfmt_wint(p + 1, nd[ndhi]);
+ p[0] = p[1]; /* Put leading digit in the correct place. */
+ if ((prec | (sf & STRFMT_F_ALT))) {
+ /* Emit fractional part. */
+ p[1] = '.'; p += 2;
+ prec -= (MSize)(q - p); p = q; /* Account for digits already emitted. */
+ /* Then emit chunks of 9 digits (this may emit 8 digits too many). */
+ for (i = ndhi; (int32_t)prec > 0 && i != ndlo; prec -= 9) {
+ i = (i - 1) & 0x3f;
+ p = lj_strfmt_wuint9(p, nd[i]);
+ }
+ if ((sf & STRFMT_T_FP_F) && !(sf & STRFMT_F_ALT)) {
+ /* %g (and not %#g) - strip trailing zeroes. */
+ p += (int32_t)prec & ((int32_t)prec >> 31);
+ while (p[-1] == '0') p--;
+ if (p[-1] == '.') p--;
+ } else {
+ /* %e (or %#g) - emit trailing zeroes. */
+ while ((int32_t)prec > 0) { *p++ = '0'; prec--; }
+ p += (int32_t)prec;
+ }
+ } else {
+ p++;
+ }
+ *p++ = (sf & STRFMT_F_UPPER) ? 'E' : 'e';
+ *p++ = eprefix; /* + or - */
+ if (nde < 10) *p++ = '0'; /* Always at least two digits of exponent. */
+ p = lj_strfmt_wint(p, nde);
+ } else {
+ /* %f (or, shortly, %g in %f style) */
+ if (prec < (MSize)(0x3f & -(int32_t)ndlo) * 9) {
+ /* Precision is sufficiently low as to maybe require rounding. */
+ ndhi = nd_add_m10e(nd, ndhi, 5, 0 - prec - 1);
+ }
+ g_format_like_f:
+ if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT) && prec && width) {
+ /* Decrease precision in order to strip trailing zeroes. */
+ if (ndlo) {
+ /* nd has a fractional part; we need to look at its digits. */
+ char tail[9];
+ uint32_t maxprec = (64 - ndlo) * 9;
+ if (prec >= maxprec) prec = maxprec;
+ else ndlo = 64 - (prec + 8) / 9;
+ i = prec - ((63 - ndlo) * 9);
+ lj_strfmt_wuint9(tail, nd[ndlo]);
+ while (prec && tail[--i] == '0') {
+ prec--;
+ if (!i) {
+ if (ndlo == 63) { prec = 0; break; }
+ lj_strfmt_wuint9(tail, nd[++ndlo]);
+ i = 9;
+ }
+ }
+ } else {
+ /* nd has no fractional part, so precision goes straight to zero. */
+ prec = 0;
+ }
+ }
+ len = ndhi * 9 + ndigits_dec(nd[ndhi]) + prec + (prefix != 0)
+ + ((prec | (sf & STRFMT_F_ALT)) != 0);
+ if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 8);
+ if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) {
+ while (width-- > len) *p++ = ' ';
+ }
+ if (prefix) *p++ = prefix;
+ if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) {
+ while (width-- > len) *p++ = '0';
+ }
+ /* Emit integer part. */
+ p = lj_strfmt_wint(p, nd[ndhi]);
+ i = ndhi;
+ while (i) p = lj_strfmt_wuint9(p, nd[--i]);
+ if ((prec | (sf & STRFMT_F_ALT))) {
+ /* Emit fractional part. */
+ *p++ = '.';
+ /* Emit chunks of 9 digits (this may emit 8 digits too many). */
+ while ((int32_t)prec > 0 && i != ndlo) {
+ i = (i - 1) & 0x3f;
+ p = lj_strfmt_wuint9(p, nd[i]);
+ prec -= 9;
+ }
+ if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT)) {
+ /* %g (and not %#g) - strip trailing zeroes. */
+ p += (int32_t)prec & ((int32_t)prec >> 31);
+ while (p[-1] == '0') p--;
+ if (p[-1] == '.') p--;
+ } else {
+ /* %f (or %#g) - emit trailing zeroes. */
+ while ((int32_t)prec > 0) { *p++ = '0'; prec--; }
+ p += (int32_t)prec;
+ }
+ }
+ }
+ }
+ if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' ';
+ return p;
+}
+
+/* Add formatted floating-point number to buffer. */
+SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n)
+{
+ sb->w = lj_strfmt_wfnum(sb, sf, n, NULL);
+ return sb;
+}
+
+/* -- Conversions to strings ---------------------------------------------- */
+
+/* Convert number to string. */
+GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o)
+{
+ char buf[STRFMT_MAXBUF_NUM];
+ MSize len = (MSize)(lj_strfmt_wfnum(NULL, STRFMT_G14, o->n, buf) - buf);
+ return lj_str_new(L, buf, len);
+}
+
diff --git a/src/lj_strscan.c b/src/lj_strscan.c
index 914cfb7a..1d1c1c74 100644
--- a/src/lj_strscan.c
+++ b/src/lj_strscan.c
@@ -80,7 +80,7 @@ static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t neg)
/* Avoid double rounding for denormals. */
if (LJ_UNLIKELY(ex2 <= -1075 && x != 0)) {
/* NYI: all of this generates way too much code on 32 bit CPUs. */
-#if defined(__GNUC__) && LJ_64
+#if (defined(__GNUC__) || defined(__clang__)) && LJ_64
int32_t b = (int32_t)(__builtin_clzll(x)^63);
#else
int32_t b = (x>>32) ? 32+(int32_t)lj_fls((uint32_t)(x>>32)) :
@@ -94,7 +94,7 @@ static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t neg)
}
/* Convert to double using a signed int64_t conversion, then rescale. */
- lua_assert((int64_t)x >= 0);
+ lj_assertX((int64_t)x >= 0, "bad double conversion");
n = (double)(int64_t)x;
if (neg) n = -n;
if (ex2) n = ldexp(n, ex2);
@@ -142,7 +142,7 @@ static StrScanFmt strscan_hex(const uint8_t *p, TValue *o,
break;
}
- /* Reduce range then convert to double. */
+ /* Reduce range, then convert to double. */
if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; }
strscan_double(x, o, ex2, neg);
return fmt;
@@ -264,7 +264,7 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o,
uint32_t hi = 0, lo = (uint32_t)(xip-xi);
int32_t ex2 = 0, idig = (int32_t)lo + (ex10 >> 1);
- lua_assert(lo > 0 && (ex10 & 1) == 0);
+ lj_assertX(lo > 0 && (ex10 & 1) == 0, "bad lo %d ex10 %d", lo, ex10);
/* Handle simple overflow/underflow. */
if (idig > 310/2) { if (neg) setminfV(o); else setpinfV(o); return fmt; }
@@ -328,10 +328,55 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o,
return fmt;
}
+/* Parse binary number. */
+static StrScanFmt strscan_bin(const uint8_t *p, TValue *o,
+ StrScanFmt fmt, uint32_t opt,
+ int32_t ex2, int32_t neg, uint32_t dig)
+{
+ uint64_t x = 0;
+ uint32_t i;
+
+ if (ex2 || dig > 64) return STRSCAN_ERROR;
+
+ /* Scan binary digits. */
+ for (i = dig; i; i--, p++) {
+ if ((*p & ~1) != '0') return STRSCAN_ERROR;
+ x = (x << 1) | (*p & 1);
+ }
+
+ /* Format-specific handling. */
+ switch (fmt) {
+ case STRSCAN_INT:
+ if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) {
+ o->i = neg ? -(int32_t)x : (int32_t)x;
+ return STRSCAN_INT; /* Fast path for 32 bit integers. */
+ }
+ if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; break; }
+ /* fallthrough */
+ case STRSCAN_U32:
+ if (dig > 32) return STRSCAN_ERROR;
+ o->i = neg ? -(int32_t)x : (int32_t)x;
+ return STRSCAN_U32;
+ case STRSCAN_I64:
+ case STRSCAN_U64:
+ o->u64 = neg ? (uint64_t)-(int64_t)x : x;
+ return fmt;
+ default:
+ break;
+ }
+
+ /* Reduce range, then convert to double. */
+ if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; }
+ strscan_double(x, o, ex2, neg);
+ return fmt;
+}
+
/* Scan string containing a number. Returns format. Returns value in o. */
-StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
+StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
+ uint32_t opt)
{
int32_t neg = 0;
+ const uint8_t *pe = p + len;
/* Remove leading space, parse sign and non-numbers. */
if (LJ_UNLIKELY(!lj_char_isdigit(*p))) {
@@ -349,7 +394,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
p += 3;
}
while (lj_char_isspace(*p)) p++;
- if (*p) return STRSCAN_ERROR;
+ if (*p || p < pe) return STRSCAN_ERROR;
o->u64 = tmp.u64;
return STRSCAN_NUM;
}
@@ -366,8 +411,12 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
/* Determine base and skip leading zeros. */
if (LJ_UNLIKELY(*p <= '0')) {
- if (*p == '0' && casecmp(p[1], 'x'))
- base = 16, cmask = LJ_CHAR_XDIGIT, p += 2;
+ if (*p == '0') {
+ if (casecmp(p[1], 'x'))
+ base = 16, cmask = LJ_CHAR_XDIGIT, p += 2;
+ else if (casecmp(p[1], 'b'))
+ base = 2, cmask = LJ_CHAR_DIGIT, p += 2;
+ }
for ( ; ; p++) {
if (*p == '0') {
hasdig = 1;
@@ -396,6 +445,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
/* Handle decimal point. */
if (dp) {
+ if (base == 2) return STRSCAN_ERROR;
fmt = STRSCAN_NUM;
if (dig) {
ex = (int32_t)(dp-(p-1)); dp = p-1;
@@ -406,7 +456,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
}
/* Parse exponent. */
- if (casecmp(*p, (uint32_t)(base == 16 ? 'p' : 'e'))) {
+ if (base >= 10 && casecmp(*p, (uint32_t)(base == 16 ? 'p' : 'e'))) {
uint32_t xx;
int negx = 0;
fmt = STRSCAN_NUM; p++;
@@ -445,6 +495,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
while (lj_char_isspace(*p)) p++;
if (*p) return STRSCAN_ERROR;
}
+ if (p < pe) return STRSCAN_ERROR;
/* Fast path for decimal 32 bit integers. */
if (fmt == STRSCAN_INT && base == 10 &&
@@ -466,6 +517,8 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
return strscan_oct(sp, o, fmt, neg, dig);
if (base == 16)
fmt = strscan_hex(sp, o, fmt, opt, ex, neg, dig);
+ else if (base == 2)
+ fmt = strscan_bin(sp, o, fmt, opt, ex, neg, dig);
else
fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig);
@@ -481,18 +534,19 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt)
int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o)
{
- StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o,
+ StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
STRSCAN_OPT_TONUM);
- lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM);
+ lj_assertX(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM, "bad scan format");
return (fmt != STRSCAN_ERROR);
}
#if LJ_DUALNUM
int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o)
{
- StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o,
+ StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o,
STRSCAN_OPT_TOINT);
- lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT);
+ lj_assertX(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT,
+ "bad scan format");
if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM);
return (fmt != STRSCAN_ERROR);
}
diff --git a/src/lj_strscan.h b/src/lj_strscan.h
index 61ddcb45..8ed31542 100644
--- a/src/lj_strscan.h
+++ b/src/lj_strscan.h
@@ -22,7 +22,8 @@ typedef enum {
STRSCAN_INT, STRSCAN_U32, STRSCAN_I64, STRSCAN_U64,
} StrScanFmt;
-LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt);
+LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
+ uint32_t opt);
LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o);
#if LJ_DUALNUM
LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o);
diff --git a/src/lj_tab.c b/src/lj_tab.c
index c5b6bcbf..c3609b38 100644
--- a/src/lj_tab.c
+++ b/src/lj_tab.c
@@ -16,25 +16,10 @@
/* -- Object hashing ------------------------------------------------------ */
-/* Hash values are masked with the table hash mask and used as an index. */
-static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash)
-{
- Node *n = noderef(t->node);
- return &n[hash & t->hmask];
-}
-
-/* String hashes are precomputed when they are interned. */
-#define hashstr(t, s) hashmask(t, (s)->hash)
-
-#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi)))
-#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1))
-#define hashptr(t, p) hashlohi((t), u32ptr(p), u32ptr(p) + HASH_BIAS)
-#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS)
-
/* Hash an arbitrary key and return its anchor position in the hash table. */
static Node *hashkey(const GCtab *t, cTValue *key)
{
- lua_assert(!tvisint(key));
+ lj_assertX(!tvisint(key), "attempt to hash integer");
if (tvisstr(key))
return hashstr(t, strV(key));
else if (tvisnum(key))
@@ -53,13 +38,13 @@ static LJ_AINLINE void newhpart(lua_State *L, GCtab *t, uint32_t hbits)
{
uint32_t hsize;
Node *node;
- lua_assert(hbits != 0);
+ lj_assertL(hbits != 0, "zero hash size");
if (hbits > LJ_MAX_HBITS)
lj_err_msg(L, LJ_ERR_TABOV);
hsize = 1u << hbits;
node = lj_mem_newvec(L, hsize, Node);
- setmref(node->freetop, &node[hsize]);
setmref(t->node, node);
+ setfreetop(t, node, &node[hsize]);
t->hmask = hsize-1;
}
@@ -74,7 +59,7 @@ static LJ_AINLINE void clearhpart(GCtab *t)
{
uint32_t i, hmask = t->hmask;
Node *node = noderef(t->node);
- lua_assert(t->hmask != 0);
+ lj_assertX(t->hmask != 0, "empty hash part");
for (i = 0; i <= hmask; i++) {
Node *n = &node[i];
setmref(n->next, NULL);
@@ -98,7 +83,8 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
GCtab *t;
/* First try to colocate the array part. */
if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) {
- lua_assert((sizeof(GCtab) & 7) == 0);
+ Node *nilnode;
+ lj_assertL((sizeof(GCtab) & 7) == 0, "bad GCtab size");
t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize));
t->gct = ~LJ_TTAB;
t->nomm = (uint8_t)~0;
@@ -107,8 +93,13 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
setgcrefnull(t->metatable);
t->asize = asize;
t->hmask = 0;
- setmref(t->node, &G(L)->nilnode);
+ nilnode = &G(L)->nilnode;
+ setmref(t->node, nilnode);
+#if LJ_GC64
+ setmref(t->freetop, nilnode);
+#endif
} else { /* Otherwise separately allocate the array part. */
+ Node *nilnode;
t = lj_mem_newobj(L, GCtab);
t->gct = ~LJ_TTAB;
t->nomm = (uint8_t)~0;
@@ -117,7 +108,11 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits)
setgcrefnull(t->metatable);
t->asize = 0; /* In case the array allocation fails. */
t->hmask = 0;
- setmref(t->node, &G(L)->nilnode);
+ nilnode = &G(L)->nilnode;
+ setmref(t->node, nilnode);
+#if LJ_GC64
+ setmref(t->freetop, nilnode);
+#endif
if (asize > 0) {
if (asize > LJ_MAX_ASIZE)
lj_err_msg(L, LJ_ERR_TABOV);
@@ -149,6 +144,12 @@ GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits)
return t;
}
+/* The API of this function conforms to lua_createtable(). */
+GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h)
+{
+ return lj_tab_new(L, (uint32_t)(a > 0 ? a+1 : 0), hsize2hbits(h));
+}
+
#if LJ_HASJIT
GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize)
{
@@ -165,7 +166,8 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
GCtab *t;
uint32_t asize, hmask;
t = newtab(L, kt->asize, kt->hmask > 0 ? lj_fls(kt->hmask)+1 : 0);
- lua_assert(kt->asize == t->asize && kt->hmask == t->hmask);
+ lj_assertL(kt->asize == t->asize && kt->hmask == t->hmask,
+ "mismatched size of table and template");
t->nomm = 0; /* Keys with metamethod names may be present. */
asize = kt->asize;
if (asize > 0) {
@@ -185,7 +187,7 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
Node *node = noderef(t->node);
Node *knode = noderef(kt->node);
ptrdiff_t d = (char *)node - (char *)knode;
- setmref(node->freetop, (Node *)((char *)noderef(knode->freetop) + d));
+ setfreetop(t, node, (Node *)((char *)getfreetop(kt, knode) + d));
for (i = 0; i <= hmask; i++) {
Node *kn = &knode[i];
Node *n = &node[i];
@@ -198,6 +200,17 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt)
return t;
}
+/* Clear a table. */
+void LJ_FASTCALL lj_tab_clear(GCtab *t)
+{
+ clearapart(t);
+ if (t->hmask > 0) {
+ Node *node = noderef(t->node);
+ setfreetop(t, node, &node[t->hmask+1]);
+ clearhpart(t);
+ }
+}
+
/* Free a table. */
void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t)
{
@@ -214,7 +227,7 @@ void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t)
/* -- Table resizing ------------------------------------------------------ */
/* Resize a table to fit the new array/hash part sizes. */
-static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
+void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
{
Node *oldnode = noderef(t->node);
uint32_t oldasize = t->asize;
@@ -247,6 +260,9 @@ static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
} else {
global_State *g = G(L);
setmref(t->node, &g->nilnode);
+#if LJ_GC64
+ setmref(t->freetop, &g->nilnode);
+#endif
t->hmask = 0;
}
if (asize < oldasize) { /* Array part shrinks? */
@@ -276,7 +292,7 @@ static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits)
static uint32_t countint(cTValue *key, uint32_t *bins)
{
- lua_assert(!tvisint(key));
+ lj_assertX(!tvisint(key), "bad integer key");
if (tvisnum(key)) {
lua_Number nk = numV(key);
int32_t k = lj_num2int(nk);
@@ -348,7 +364,7 @@ static void rehashtab(lua_State *L, GCtab *t, cTValue *ek)
asize += countint(ek, bins);
na = bestasize(bins, &asize);
total -= na;
- resizetab(L, t, asize, hsize2hbits(total));
+ lj_tab_resize(L, t, asize, hsize2hbits(total));
}
#if LJ_HASFFI
@@ -360,7 +376,7 @@ void lj_tab_rehash(lua_State *L, GCtab *t)
void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize)
{
- resizetab(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0);
+ lj_tab_resize(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0);
}
/* -- Table getters ------------------------------------------------------- */
@@ -378,7 +394,7 @@ cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key)
return NULL;
}
-cTValue *lj_tab_getstr(GCtab *t, GCstr *key)
+cTValue *lj_tab_getstr(GCtab *t, const GCstr *key)
{
Node *n = hashstr(t, key);
do {
@@ -428,16 +444,17 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
Node *n = hashkey(t, key);
if (!tvisnil(&n->val) || t->hmask == 0) {
Node *nodebase = noderef(t->node);
- Node *collide, *freenode = noderef(nodebase->freetop);
- lua_assert(freenode >= nodebase && freenode <= nodebase+t->hmask+1);
+ Node *collide, *freenode = getfreetop(t, nodebase);
+ lj_assertL(freenode >= nodebase && freenode <= nodebase+t->hmask+1,
+ "bad freenode");
do {
if (freenode == nodebase) { /* No free node found? */
rehashtab(L, t, key); /* Rehash table. */
return lj_tab_set(L, t, key); /* Retry key insertion. */
}
} while (!tvisnil(&(--freenode)->key));
- setmref(nodebase->freetop, freenode);
- lua_assert(freenode != &G(L)->nilnode);
+ setfreetop(t, nodebase, freenode);
+ lj_assertL(freenode != &G(L)->nilnode, "store to fallback hash");
collide = hashkey(t, &n->key);
if (collide != n) { /* Colliding node not the main node? */
while (noderef(collide->next) != n) /* Find predecessor. */
@@ -493,7 +510,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key)
if (LJ_UNLIKELY(tvismzero(&n->key)))
n->key.u64 = 0;
lj_gc_anybarriert(L, t);
- lua_assert(tvisnil(&n->val));
+ lj_assertL(tvisnil(&n->val), "new hash slot is not empty");
return &n->val;
}
@@ -510,7 +527,7 @@ TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key)
return lj_tab_newkey(L, t, &k);
}
-TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key)
+TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key)
{
TValue k;
Node *n = hashstr(t, key);
@@ -551,103 +568,126 @@ TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key)
/* -- Table traversal ----------------------------------------------------- */
-/* Get the traversal index of a key. */
-static uint32_t keyindex(lua_State *L, GCtab *t, cTValue *key)
+/* Table traversal indexes:
+**
+** Array key index: [0 .. t->asize-1]
+** Hash key index: [t->asize .. t->asize+t->hmask]
+** Invalid key: ~0
+*/
+
+/* Get the successor traversal index of a key. */
+uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key)
{
TValue tmp;
if (tvisint(key)) {
int32_t k = intV(key);
if ((uint32_t)k < t->asize)
- return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */
+ return (uint32_t)k + 1;
setnumV(&tmp, (lua_Number)k);
key = &tmp;
} else if (tvisnum(key)) {
lua_Number nk = numV(key);
int32_t k = lj_num2int(nk);
if ((uint32_t)k < t->asize && nk == (lua_Number)k)
- return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */
+ return (uint32_t)k + 1;
}
if (!tvisnil(key)) {
Node *n = hashkey(t, key);
do {
if (lj_obj_equal(&n->key, key))
- return t->asize + (uint32_t)(n - noderef(t->node));
- /* Hash key indexes: [t->asize..t->asize+t->nmask] */
+ return t->asize + (uint32_t)((n+1) - noderef(t->node));
} while ((n = nextnode(n)));
- if (key->u32.hi == 0xfffe7fff) /* ITERN was despecialized while running. */
- return key->u32.lo - 1;
- lj_err_msg(L, LJ_ERR_NEXTIDX);
- return 0; /* unreachable */
+ if (key->u32.hi == LJ_KEYINDEX) /* Despecialized ITERN while running. */
+ return key->u32.lo;
+ return ~0u; /* Invalid key to next. */
}
- return ~0u; /* A nil key starts the traversal. */
+ return 0; /* A nil key starts the traversal. */
}
-/* Advance to the next step in a table traversal. */
-int lj_tab_next(lua_State *L, GCtab *t, TValue *key)
+/* Get the next key/value pair of a table traversal. */
+int lj_tab_next(GCtab *t, cTValue *key, TValue *o)
{
- uint32_t i = keyindex(L, t, key); /* Find predecessor key index. */
- for (i++; i < t->asize; i++) /* First traverse the array keys. */
- if (!tvisnil(arrayslot(t, i))) {
- setintV(key, i);
- copyTV(L, key+1, arrayslot(t, i));
+ uint32_t idx = lj_tab_keyindex(t, key); /* Find successor index of key. */
+ /* First traverse the array part. */
+ for (; idx < t->asize; idx++) {
+ cTValue *a = arrayslot(t, idx);
+ if (LJ_LIKELY(!tvisnil(a))) {
+ setintV(o, idx);
+ o[1] = *a;
return 1;
}
- for (i -= t->asize; i <= t->hmask; i++) { /* Then traverse the hash keys. */
- Node *n = &noderef(t->node)[i];
+ }
+ idx -= t->asize;
+ /* Then traverse the hash part. */
+ for (; idx <= t->hmask; idx++) {
+ Node *n = &noderef(t->node)[idx];
if (!tvisnil(&n->val)) {
- copyTV(L, key, &n->key);
- copyTV(L, key+1, &n->val);
+ o[0] = n->key;
+ o[1] = n->val;
return 1;
}
}
- return 0; /* End of traversal. */
+ return (int32_t)idx < 0 ? -1 : 0; /* Invalid key or end of traversal. */
}
/* -- Table length calculation -------------------------------------------- */
-static MSize unbound_search(GCtab *t, MSize j)
+/* Compute table length. Slow path with mixed array/hash lookups. */
+LJ_NOINLINE static MSize tab_len_slow(GCtab *t, size_t hi)
{
cTValue *tv;
- MSize i = j; /* i is zero or a present index */
- j++;
- /* find `i' and `j' such that i is present and j is not */
- while ((tv = lj_tab_getint(t, (int32_t)j)) && !tvisnil(tv)) {
- i = j;
- j *= 2;
- if (j > (MSize)(INT_MAX-2)) { /* overflow? */
- /* table was built with bad purposes: resort to linear search */
- i = 1;
- while ((tv = lj_tab_getint(t, (int32_t)i)) && !tvisnil(tv)) i++;
- return i - 1;
+ size_t lo = hi;
+ hi++;
+ /* Widening search for an upper bound. */
+ while ((tv = lj_tab_getint(t, (int32_t)hi)) && !tvisnil(tv)) {
+ lo = hi;
+ hi += hi;
+ if (hi > (size_t)(INT_MAX-2)) { /* Punt and do a linear search. */
+ lo = 1;
+ while ((tv = lj_tab_getint(t, (int32_t)lo)) && !tvisnil(tv)) lo++;
+ return (MSize)(lo - 1);
}
}
- /* now do a binary search between them */
- while (j - i > 1) {
- MSize m = (i+j)/2;
- cTValue *tvb = lj_tab_getint(t, (int32_t)m);
- if (tvb && !tvisnil(tvb)) i = m; else j = m;
+ /* Binary search to find a non-nil to nil transition. */
+ while (hi - lo > 1) {
+ size_t mid = (lo+hi) >> 1;
+ cTValue *tvb = lj_tab_getint(t, (int32_t)mid);
+ if (tvb && !tvisnil(tvb)) lo = mid; else hi = mid;
}
- return i;
+ return (MSize)lo;
}
-/*
-** Try to find a boundary in table `t'. A `boundary' is an integer index
-** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil).
-*/
+/* Compute table length. Fast path. */
MSize LJ_FASTCALL lj_tab_len(GCtab *t)
{
- MSize j = (MSize)t->asize;
- if (j > 1 && tvisnil(arrayslot(t, j-1))) {
- MSize i = 1;
- while (j - i > 1) {
- MSize m = (i+j)/2;
- if (tvisnil(arrayslot(t, m-1))) j = m; else i = m;
+ size_t hi = (size_t)t->asize;
+ if (hi) hi--;
+ /* In a growing array the last array element is very likely nil. */
+ if (hi > 0 && LJ_LIKELY(tvisnil(arrayslot(t, hi)))) {
+ /* Binary search to find a non-nil to nil transition in the array. */
+ size_t lo = 0;
+ while (hi - lo > 1) {
+ size_t mid = (lo+hi) >> 1;
+ if (tvisnil(arrayslot(t, mid))) hi = mid; else lo = mid;
}
- return i-1;
+ return (MSize)lo;
}
- if (j) j--;
- if (t->hmask <= 0)
- return j;
- return unbound_search(t, j);
+ /* Without a hash part, there's an implicit nil after the last element. */
+ return t->hmask ? tab_len_slow(t, hi) : (MSize)hi;
}
+#if LJ_HASJIT
+/* Verify hinted table length or compute it. */
+MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint)
+{
+ size_t asize = (size_t)t->asize;
+ cTValue *tv = arrayslot(t, hint);
+ if (LJ_LIKELY(hint+1 < asize)) {
+ if (LJ_LIKELY(!tvisnil(tv) && tvisnil(tv+1))) return (MSize)hint;
+ } else if (hint+1 <= asize && LJ_LIKELY(t->hmask == 0) && !tvisnil(tv)) {
+ return (MSize)hint;
+ }
+ return lj_tab_len(t);
+}
+#endif
+
diff --git a/src/lj_tab.h b/src/lj_tab.h
index 4a106873..2a3f76bf 100644
--- a/src/lj_tab.h
+++ b/src/lj_tab.h
@@ -31,30 +31,52 @@ static LJ_AINLINE uint32_t hashrot(uint32_t lo, uint32_t hi)
return hi;
}
+/* Hash values are masked with the table hash mask and used as an index. */
+static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash)
+{
+ Node *n = noderef(t->node);
+ return &n[hash & t->hmask];
+}
+
+/* String IDs are generated when a string is interned. */
+#define hashstr(t, s) hashmask(t, (s)->sid)
+
+#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi)))
+#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1))
+#if LJ_GC64
+#define hashgcref(t, r) \
+ hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32))
+#else
+#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS)
+#endif
+
#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0)
LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits);
+LJ_FUNC GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h);
#if LJ_HASJIT
LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize);
#endif
LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt);
+LJ_FUNC void LJ_FASTCALL lj_tab_clear(GCtab *t);
LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t);
#if LJ_HASFFI
LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t);
#endif
+LJ_FUNC void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits);
LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize);
/* Caveat: all getters except lj_tab_get() can return NULL! */
LJ_FUNCA cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key);
-LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, GCstr *key);
+LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, const GCstr *key);
LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key);
/* Caveat: all setters require a write barrier for the stored value. */
LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key);
-LJ_FUNC TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key);
-LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key);
+LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key);
+LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key);
LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
#define inarray(t, key) ((MSize)(key) < (MSize)(t)->asize)
@@ -64,7 +86,11 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
#define lj_tab_setint(L, t, key) \
(inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key)))
-LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key);
+LJ_FUNC uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key);
+LJ_FUNCA int lj_tab_next(GCtab *t, cTValue *key, TValue *o);
LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t);
+#if LJ_HASJIT
+LJ_FUNC MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint);
+#endif
#endif
diff --git a/src/lj_target.h b/src/lj_target.h
index 75eb965f..19716928 100644
--- a/src/lj_target.h
+++ b/src/lj_target.h
@@ -55,7 +55,7 @@ typedef uint32_t RegSP;
/* Bitset for registers. 32 registers suffice for most architectures.
** Note that one set holds bits for both GPRs and FPRs.
*/
-#if LJ_TARGET_PPC || LJ_TARGET_MIPS
+#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
typedef uint64_t RegSet;
#else
typedef uint32_t RegSet;
@@ -69,7 +69,7 @@ typedef uint32_t RegSet;
#define rset_set(rs, r) (rs |= RID2RSET(r))
#define rset_clear(rs, r) (rs &= ~RID2RSET(r))
#define rset_exclude(rs, r) (rs & ~RID2RSET(r))
-#if LJ_TARGET_PPC || LJ_TARGET_MIPS
+#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64
#define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63))
#define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs))
#else
@@ -138,6 +138,8 @@ typedef uint32_t RegCost;
#include "lj_target_x86.h"
#elif LJ_TARGET_ARM
#include "lj_target_arm.h"
+#elif LJ_TARGET_ARM64
+#include "lj_target_arm64.h"
#elif LJ_TARGET_PPC
#include "lj_target_ppc.h"
#elif LJ_TARGET_MIPS
@@ -150,7 +152,8 @@ typedef uint32_t RegCost;
/* Return the address of an exit stub. */
static LJ_AINLINE char *exitstub_addr_(char **group, uint32_t exitno)
{
- lua_assert(group[exitno / EXITSTUBS_PER_GROUP] != NULL);
+ lj_assertX(group[exitno / EXITSTUBS_PER_GROUP] != NULL,
+ "exit stub group for exit %d uninitialized", exitno);
return (char *)group[exitno / EXITSTUBS_PER_GROUP] +
EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP);
}
diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h
index 76a30710..48f487a5 100644
--- a/src/lj_target_arm.h
+++ b/src/lj_target_arm.h
@@ -211,6 +211,7 @@ typedef enum ARMIns {
/* ARMv6T2 */
ARMI_MOVW = 0xe3000000,
ARMI_MOVT = 0xe3400000,
+ ARMI_BFI = 0xe7c00010,
/* VFP */
ARMI_VMOV_D = 0xeeb00b40,
@@ -243,10 +244,6 @@ typedef enum ARMIns {
ARMI_VCVT_S32_F64 = 0xeebd0bc0,
ARMI_VCVT_U32_F32 = 0xeebc0ac0,
ARMI_VCVT_U32_F64 = 0xeebc0bc0,
- ARMI_VCVTR_S32_F32 = 0xeebd0a40,
- ARMI_VCVTR_S32_F64 = 0xeebd0b40,
- ARMI_VCVTR_U32_F32 = 0xeebc0a40,
- ARMI_VCVTR_U32_F64 = 0xeebc0b40,
ARMI_VCVT_F32_S32 = 0xeeb80ac0,
ARMI_VCVT_F64_S32 = 0xeeb80bc0,
ARMI_VCVT_F32_U32 = 0xeeb80a40,
diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h
new file mode 100644
index 00000000..d45af2e4
--- /dev/null
+++ b/src/lj_target_arm64.h
@@ -0,0 +1,336 @@
+/*
+** Definitions for ARM64 CPUs.
+** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+*/
+
+#ifndef _LJ_TARGET_ARM64_H
+#define _LJ_TARGET_ARM64_H
+
+/* -- Registers IDs ------------------------------------------------------- */
+
+#define GPRDEF(_) \
+ _(X0) _(X1) _(X2) _(X3) _(X4) _(X5) _(X6) _(X7) \
+ _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \
+ _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \
+ _(X24) _(X25) _(X26) _(X27) _(X28) _(FP) _(LR) _(SP)
+#define FPRDEF(_) \
+ _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \
+ _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) \
+ _(D16) _(D17) _(D18) _(D19) _(D20) _(D21) _(D22) _(D23) \
+ _(D24) _(D25) _(D26) _(D27) _(D28) _(D29) _(D30) _(D31)
+#define VRIDDEF(_)
+
+#define RIDENUM(name) RID_##name,
+
+enum {
+ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */
+ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
+ RID_MAX,
+ RID_TMP = RID_LR,
+ RID_ZERO = RID_SP,
+
+ /* Calling conventions. */
+ RID_RET = RID_X0,
+ RID_RETLO = RID_X0,
+ RID_RETHI = RID_X1,
+ RID_FPRET = RID_D0,
+
+ /* These definitions must match with the *.dasc file(s): */
+ RID_BASE = RID_X19, /* Interpreter BASE. */
+ RID_LPC = RID_X21, /* Interpreter PC. */
+ RID_GL = RID_X22, /* Interpreter GL. */
+ RID_LREG = RID_X23, /* Interpreter L. */
+
+ /* Register ranges [min, max) and number of registers. */
+ RID_MIN_GPR = RID_X0,
+ RID_MAX_GPR = RID_SP+1,
+ RID_MIN_FPR = RID_MAX_GPR,
+ RID_MAX_FPR = RID_D31+1,
+ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
+ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR
+};
+
+#define RID_NUM_KREF RID_NUM_GPR
+#define RID_MIN_KREF RID_X0
+
+/* -- Register sets ------------------------------------------------------- */
+
+/* Make use of all registers, except for x18, fp, lr and sp. */
+#define RSET_FIXED \
+ (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)|\
+ RID2RSET(RID_GL))
+#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
+#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
+#define RSET_ALL (RSET_GPR|RSET_FPR)
+#define RSET_INIT RSET_ALL
+
+/* lr is an implicit scratch register. */
+#define RSET_SCRATCH_GPR (RSET_RANGE(RID_X0, RID_X17+1))
+#define RSET_SCRATCH_FPR \
+ (RSET_RANGE(RID_D0, RID_D7+1)|RSET_RANGE(RID_D16, RID_D31+1))
+#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
+#define REGARG_FIRSTGPR RID_X0
+#define REGARG_LASTGPR RID_X7
+#define REGARG_NUMGPR 8
+#define REGARG_FIRSTFPR RID_D0
+#define REGARG_LASTFPR RID_D7
+#define REGARG_NUMFPR 8
+
+/* -- Spill slots --------------------------------------------------------- */
+
+/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs.
+**
+** SPS_FIXED: Available fixed spill slots in interpreter frame.
+** This definition must match with the vm_arm64.dasc file.
+** Pre-allocate some slots to avoid sp adjust in every root trace.
+**
+** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots.
+*/
+#define SPS_FIXED 4
+#define SPS_FIRST 2
+
+#define SPOFS_TMP 0
+
+#define sps_scale(slot) (4 * (int32_t)(slot))
+#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3)
+
+/* -- Exit state ---------------------------------------------------------- */
+
+/* This definition must match with the *.dasc file(s). */
+typedef struct {
+ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
+ intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
+ int32_t spill[256]; /* Spill slots. */
+} ExitState;
+
+/* Highest exit + 1 indicates stack check. */
+#define EXITSTATE_CHECKEXIT 1
+
+/* Return the address of a per-trace exit stub. */
+static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno)
+{
+ while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */
+ return p + 3 + exitno;
+}
+/* Avoid dependence on lj_jit.h if only including lj_target.h. */
+#define exitstub_trace_addr(T, exitno) \
+ exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno))
+
+/* -- Instructions -------------------------------------------------------- */
+
+/* ARM64 instructions are always little-endian. Swap for ARM64BE. */
+#if LJ_BE
+#define A64I_LE(x) (lj_bswap(x))
+#else
+#define A64I_LE(x) (x)
+#endif
+
+/* Instruction fields. */
+#define A64F_D(r) (r)
+#define A64F_N(r) ((r) << 5)
+#define A64F_A(r) ((r) << 10)
+#define A64F_M(r) ((r) << 16)
+#define A64F_IMMS(x) ((x) << 10)
+#define A64F_IMMR(x) ((x) << 16)
+#define A64F_U16(x) ((x) << 5)
+#define A64F_U12(x) ((x) << 10)
+#define A64F_S26(x) (((uint32_t)(x) & 0x03ffffffu))
+#define A64F_S19(x) (((uint32_t)(x) & 0x7ffffu) << 5)
+#define A64F_S14(x) (((uint32_t)(x) & 0x3fffu) << 5)
+#define A64F_S9(x) ((x) << 12)
+#define A64F_BIT(x) ((x) << 19)
+#define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10))
+#define A64F_EX(ex) (A64I_EX | ((ex) << 13))
+#define A64F_EXSH(ex,x) (A64I_EX | ((ex) << 13) | ((x) << 10))
+#define A64F_FP8(x) ((x) << 13)
+#define A64F_CC(cc) ((cc) << 12)
+#define A64F_LSL16(x) (((x) / 16) << 21)
+#define A64F_BSH(sh) ((sh) << 10)
+
+/* Check for valid field range. */
+#define A64F_S_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0)
+
+typedef enum A64Ins {
+ A64I_S = 0x20000000,
+ A64I_X = 0x80000000,
+ A64I_EX = 0x00200000,
+ A64I_ON = 0x00200000,
+ A64I_K12 = 0x1a000000,
+ A64I_K13 = 0x18000000,
+ A64I_LS_U = 0x01000000,
+ A64I_LS_S = 0x00800000,
+ A64I_LS_R = 0x01200800,
+ A64I_LS_SH = 0x00001000,
+ A64I_LS_UXTWx = 0x00004000,
+ A64I_LS_SXTWx = 0x0000c000,
+ A64I_LS_SXTXx = 0x0000e000,
+ A64I_LS_LSLx = 0x00006000,
+
+ A64I_ADDw = 0x0b000000,
+ A64I_ADDx = 0x8b000000,
+ A64I_ADDSw = 0x2b000000,
+ A64I_ADDSx = 0xab000000,
+ A64I_NEGw = 0x4b0003e0,
+ A64I_NEGx = 0xcb0003e0,
+ A64I_SUBw = 0x4b000000,
+ A64I_SUBx = 0xcb000000,
+ A64I_SUBSw = 0x6b000000,
+ A64I_SUBSx = 0xeb000000,
+
+ A64I_MULw = 0x1b007c00,
+ A64I_MULx = 0x9b007c00,
+ A64I_SMULL = 0x9b207c00,
+
+ A64I_ANDw = 0x0a000000,
+ A64I_ANDx = 0x8a000000,
+ A64I_ANDSw = 0x6a000000,
+ A64I_ANDSx = 0xea000000,
+ A64I_EORw = 0x4a000000,
+ A64I_EORx = 0xca000000,
+ A64I_ORRw = 0x2a000000,
+ A64I_ORRx = 0xaa000000,
+ A64I_TSTw = 0x6a00001f,
+ A64I_TSTx = 0xea00001f,
+
+ A64I_CMPw = 0x6b00001f,
+ A64I_CMPx = 0xeb00001f,
+ A64I_CMNw = 0x2b00001f,
+ A64I_CMNx = 0xab00001f,
+ A64I_CCMPw = 0x7a400000,
+ A64I_CCMPx = 0xfa400000,
+ A64I_CSELw = 0x1a800000,
+ A64I_CSELx = 0x9a800000,
+
+ A64I_ASRw = 0x13007c00,
+ A64I_ASRx = 0x9340fc00,
+ A64I_LSLx = 0xd3400000,
+ A64I_LSRx = 0xd340fc00,
+ A64I_SHRw = 0x1ac02000,
+ A64I_SHRx = 0x9ac02000, /* lsl/lsr/asr/ror x0, x0, x0 */
+ A64I_REVw = 0x5ac00800,
+ A64I_REVx = 0xdac00c00,
+
+ A64I_EXTRw = 0x13800000,
+ A64I_EXTRx = 0x93c00000,
+ A64I_BFMw = 0x33000000,
+ A64I_BFMx = 0xb3400000,
+ A64I_SBFMw = 0x13000000,
+ A64I_SBFMx = 0x93400000,
+ A64I_SXTBw = 0x13001c00,
+ A64I_SXTHw = 0x13003c00,
+ A64I_SXTW = 0x93407c00,
+ A64I_UBFMw = 0x53000000,
+ A64I_UBFMx = 0xd3400000,
+ A64I_UXTBw = 0x53001c00,
+ A64I_UXTHw = 0x53003c00,
+
+ A64I_MOVw = 0x2a0003e0,
+ A64I_MOVx = 0xaa0003e0,
+ A64I_MVNw = 0x2a2003e0,
+ A64I_MVNx = 0xaa2003e0,
+ A64I_MOVKw = 0x72800000,
+ A64I_MOVKx = 0xf2800000,
+ A64I_MOVZw = 0x52800000,
+ A64I_MOVZx = 0xd2800000,
+ A64I_MOVNw = 0x12800000,
+ A64I_MOVNx = 0x92800000,
+
+ A64I_LDRB = 0x39400000,
+ A64I_LDRH = 0x79400000,
+ A64I_LDRw = 0xb9400000,
+ A64I_LDRx = 0xf9400000,
+ A64I_LDRLw = 0x18000000,
+ A64I_LDRLx = 0x58000000,
+ A64I_STRB = 0x39000000,
+ A64I_STRH = 0x79000000,
+ A64I_STRw = 0xb9000000,
+ A64I_STRx = 0xf9000000,
+ A64I_STPw = 0x29000000,
+ A64I_STPx = 0xa9000000,
+ A64I_LDPw = 0x29400000,
+ A64I_LDPx = 0xa9400000,
+
+ A64I_B = 0x14000000,
+ A64I_BCC = 0x54000000,
+ A64I_BL = 0x94000000,
+ A64I_BR = 0xd61f0000,
+ A64I_BLR = 0xd63f0000,
+ A64I_TBZ = 0x36000000,
+ A64I_TBNZ = 0x37000000,
+ A64I_CBZ = 0x34000000,
+ A64I_CBNZ = 0x35000000,
+
+ A64I_NOP = 0xd503201f,
+
+ /* FP */
+ A64I_FADDd = 0x1e602800,
+ A64I_FSUBd = 0x1e603800,
+ A64I_FMADDd = 0x1f400000,
+ A64I_FMSUBd = 0x1f408000,
+ A64I_FNMADDd = 0x1f600000,
+ A64I_FNMSUBd = 0x1f608000,
+ A64I_FMULd = 0x1e600800,
+ A64I_FDIVd = 0x1e601800,
+ A64I_FNEGd = 0x1e614000,
+ A64I_FABS = 0x1e60c000,
+ A64I_FSQRTd = 0x1e61c000,
+ A64I_LDRs = 0xbd400000,
+ A64I_LDRd = 0xfd400000,
+ A64I_STRs = 0xbd000000,
+ A64I_STRd = 0xfd000000,
+ A64I_LDPs = 0x2d400000,
+ A64I_LDPd = 0x6d400000,
+ A64I_STPs = 0x2d000000,
+ A64I_STPd = 0x6d000000,
+ A64I_FCMPd = 0x1e602000,
+ A64I_FCMPZd = 0x1e602008,
+ A64I_FCSELd = 0x1e600c00,
+ A64I_FRINTMd = 0x1e654000,
+ A64I_FRINTPd = 0x1e64c000,
+ A64I_FRINTZd = 0x1e65c000,
+
+ A64I_FCVT_F32_F64 = 0x1e624000,
+ A64I_FCVT_F64_F32 = 0x1e22c000,
+ A64I_FCVT_F32_S32 = 0x1e220000,
+ A64I_FCVT_F64_S32 = 0x1e620000,
+ A64I_FCVT_F32_U32 = 0x1e230000,
+ A64I_FCVT_F64_U32 = 0x1e630000,
+ A64I_FCVT_F32_S64 = 0x9e220000,
+ A64I_FCVT_F64_S64 = 0x9e620000,
+ A64I_FCVT_F32_U64 = 0x9e230000,
+ A64I_FCVT_F64_U64 = 0x9e630000,
+ A64I_FCVT_S32_F64 = 0x1e780000,
+ A64I_FCVT_S32_F32 = 0x1e380000,
+ A64I_FCVT_U32_F64 = 0x1e790000,
+ A64I_FCVT_U32_F32 = 0x1e390000,
+ A64I_FCVT_S64_F64 = 0x9e780000,
+ A64I_FCVT_S64_F32 = 0x9e380000,
+ A64I_FCVT_U64_F64 = 0x9e790000,
+ A64I_FCVT_U64_F32 = 0x9e390000,
+
+ A64I_FMOV_S = 0x1e204000,
+ A64I_FMOV_D = 0x1e604000,
+ A64I_FMOV_R_S = 0x1e260000,
+ A64I_FMOV_S_R = 0x1e270000,
+ A64I_FMOV_R_D = 0x9e660000,
+ A64I_FMOV_D_R = 0x9e670000,
+ A64I_FMOV_DI = 0x1e601000,
+} A64Ins;
+
+typedef enum A64Shift {
+ A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR
+} A64Shift;
+
+typedef enum A64Extend {
+ A64EX_UXTB, A64EX_UXTH, A64EX_UXTW, A64EX_UXTX,
+ A64EX_SXTB, A64EX_SXTH, A64EX_SXTW, A64EX_SXTX,
+} A64Extend;
+
+/* ARM condition codes. */
+typedef enum A64CC {
+ CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC,
+ CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL,
+ CC_HS = CC_CS, CC_LO = CC_CC
+} A64CC;
+
+#endif
diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h
index ec935494..da72d61a 100644
--- a/src/lj_target_mips.h
+++ b/src/lj_target_mips.h
@@ -13,11 +13,15 @@
_(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \
_(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \
_(R24) _(R25) _(SYS1) _(SYS2) _(R28) _(SP) _(R30) _(RA)
+#if LJ_SOFTFP
+#define FPRDEF(_)
+#else
#define FPRDEF(_) \
_(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \
_(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \
_(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \
_(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31)
+#endif
#define VRIDDEF(_)
#define RIDENUM(name) RID_##name,
@@ -39,7 +43,11 @@ enum {
RID_RETHI = RID_R2,
RID_RETLO = RID_R3,
#endif
+#if LJ_SOFTFP
+ RID_FPRET = RID_R2,
+#else
RID_FPRET = RID_F0,
+#endif
RID_CFUNCADDR = RID_R25,
/* These definitions must match with the *.dasc file(s): */
@@ -52,8 +60,12 @@ enum {
/* Register ranges [min, max) and number of registers. */
RID_MIN_GPR = RID_R0,
RID_MAX_GPR = RID_RA+1,
- RID_MIN_FPR = RID_F0,
+ RID_MIN_FPR = RID_MAX_GPR,
+#if LJ_SOFTFP
+ RID_MAX_FPR = RID_MIN_FPR,
+#else
RID_MAX_FPR = RID_F31+1,
+#endif
RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR,
RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */
};
@@ -68,28 +80,60 @@ enum {
(RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\
RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP))
#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED)
+#if LJ_SOFTFP
+#define RSET_FPR 0
+#else
+#if LJ_32
#define RSET_FPR \
(RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\
RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\
RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\
RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30))
-#define RSET_ALL (RSET_GPR|RSET_FPR)
-#define RSET_INIT RSET_ALL
+#else
+#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)
+#endif
+#endif
+#define RSET_ALL (RSET_GPR|RSET_FPR)
+#define RSET_INIT RSET_ALL
#define RSET_SCRATCH_GPR \
(RSET_RANGE(RID_R1, RID_R15+1)|\
RID2RSET(RID_R24)|RID2RSET(RID_R25))
+#if LJ_SOFTFP
+#define RSET_SCRATCH_FPR 0
+#else
+#if LJ_32
#define RSET_SCRATCH_FPR \
(RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\
RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\
RID2RSET(RID_F16)|RID2RSET(RID_F18))
+#else
+#define RSET_SCRATCH_FPR RSET_RANGE(RID_F0, RID_F24)
+#endif
+#endif
#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR)
#define REGARG_FIRSTGPR RID_R4
+#if LJ_32
#define REGARG_LASTGPR RID_R7
#define REGARG_NUMGPR 4
+#else
+#define REGARG_LASTGPR RID_R11
+#define REGARG_NUMGPR 8
+#endif
+#if LJ_ABI_SOFTFP
+#define REGARG_FIRSTFPR 0
+#define REGARG_LASTFPR 0
+#define REGARG_NUMFPR 0
+#else
#define REGARG_FIRSTFPR RID_F12
+#if LJ_32
#define REGARG_LASTFPR RID_F14
#define REGARG_NUMFPR 2
+#else
+#define REGARG_LASTFPR RID_F19
+#define REGARG_NUMFPR 8
+#endif
+#endif
/* -- Spill slots --------------------------------------------------------- */
@@ -100,7 +144,11 @@ enum {
**
** SPS_FIRST: First spill slot for general use.
*/
+#if LJ_32
#define SPS_FIXED 5
+#else
+#define SPS_FIXED 4
+#endif
#define SPS_FIRST 4
#define SPOFS_TMP 0
@@ -112,8 +160,10 @@ enum {
/* This definition must match with the *.dasc file(s). */
typedef struct {
+#if !LJ_SOFTFP
lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
- int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
+#endif
+ intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
int32_t spill[256]; /* Spill slots. */
} ExitState;
@@ -142,52 +192,87 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p)
#define MIPSF_F(r) ((r) << 6)
#define MIPSF_A(n) ((n) << 6)
#define MIPSF_M(n) ((n) << 11)
+#define MIPSF_L(n) ((n) << 6)
typedef enum MIPSIns {
+ MIPSI_D = 0x38,
+ MIPSI_DV = 0x10,
+ MIPSI_D32 = 0x3c,
/* Integer instructions. */
- MIPSI_MOVE = 0x00000021,
+ MIPSI_MOVE = 0x00000025,
MIPSI_NOP = 0x00000000,
MIPSI_LI = 0x24000000,
MIPSI_LU = 0x34000000,
MIPSI_LUI = 0x3c000000,
- MIPSI_ADDIU = 0x24000000,
+ MIPSI_AND = 0x00000024,
MIPSI_ANDI = 0x30000000,
+ MIPSI_OR = 0x00000025,
MIPSI_ORI = 0x34000000,
+ MIPSI_XOR = 0x00000026,
MIPSI_XORI = 0x38000000,
+ MIPSI_NOR = 0x00000027,
+
+ MIPSI_SLT = 0x0000002a,
+ MIPSI_SLTU = 0x0000002b,
MIPSI_SLTI = 0x28000000,
MIPSI_SLTIU = 0x2c000000,
MIPSI_ADDU = 0x00000021,
+ MIPSI_ADDIU = 0x24000000,
+ MIPSI_SUB = 0x00000022,
MIPSI_SUBU = 0x00000023,
+
+#if !LJ_TARGET_MIPSR6
MIPSI_MUL = 0x70000002,
- MIPSI_AND = 0x00000024,
- MIPSI_OR = 0x00000025,
- MIPSI_XOR = 0x00000026,
- MIPSI_NOR = 0x00000027,
- MIPSI_SLT = 0x0000002a,
- MIPSI_SLTU = 0x0000002b,
+ MIPSI_DIV = 0x0000001a,
+ MIPSI_DIVU = 0x0000001b,
+
MIPSI_MOVZ = 0x0000000a,
MIPSI_MOVN = 0x0000000b,
+ MIPSI_MFHI = 0x00000010,
+ MIPSI_MFLO = 0x00000012,
+ MIPSI_MULT = 0x00000018,
+#else
+ MIPSI_MUL = 0x00000098,
+ MIPSI_MUH = 0x000000d8,
+ MIPSI_DIV = 0x0000009a,
+ MIPSI_DIVU = 0x0000009b,
+
+ MIPSI_SELEQZ = 0x00000035,
+ MIPSI_SELNEZ = 0x00000037,
+#endif
MIPSI_SLL = 0x00000000,
MIPSI_SRL = 0x00000002,
MIPSI_SRA = 0x00000003,
- MIPSI_ROTR = 0x00200002, /* MIPS32R2 */
+ MIPSI_ROTR = 0x00200002, /* MIPSXXR2 */
+ MIPSI_DROTR = 0x0020003a,
+ MIPSI_DROTR32 = 0x0020003e,
MIPSI_SLLV = 0x00000004,
MIPSI_SRLV = 0x00000006,
MIPSI_SRAV = 0x00000007,
- MIPSI_ROTRV = 0x00000046, /* MIPS32R2 */
+ MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */
+ MIPSI_DROTRV = 0x00000056,
+
+ MIPSI_INS = 0x7c000004, /* MIPSXXR2 */
- MIPSI_SEB = 0x7c000420, /* MIPS32R2 */
- MIPSI_SEH = 0x7c000620, /* MIPS32R2 */
- MIPSI_WSBH = 0x7c0000a0, /* MIPS32R2 */
+ MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */
+ MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */
+ MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */
+ MIPSI_DSBH = 0x7c0000a4,
MIPSI_B = 0x10000000,
MIPSI_J = 0x08000000,
MIPSI_JAL = 0x0c000000,
+#if !LJ_TARGET_MIPSR6
+ MIPSI_JALX = 0x74000000,
MIPSI_JR = 0x00000008,
+#else
+ MIPSI_JR = 0x00000009,
+ MIPSI_BALC = 0xe8000000,
+#endif
MIPSI_JALR = 0x0000f809,
MIPSI_BEQ = 0x10000000,
@@ -199,7 +284,9 @@ typedef enum MIPSIns {
/* Load/store instructions. */
MIPSI_LW = 0x8c000000,
+ MIPSI_LD = 0xdc000000,
MIPSI_SW = 0xac000000,
+ MIPSI_SD = 0xfc000000,
MIPSI_LB = 0x80000000,
MIPSI_SB = 0xa0000000,
MIPSI_LH = 0x84000000,
@@ -211,11 +298,69 @@ typedef enum MIPSIns {
MIPSI_LDC1 = 0xd4000000,
MIPSI_SDC1 = 0xf4000000,
+ /* MIPS64 instructions. */
+ MIPSI_DADD = 0x0000002c,
+ MIPSI_DADDU = 0x0000002d,
+ MIPSI_DADDIU = 0x64000000,
+ MIPSI_DSUB = 0x0000002e,
+ MIPSI_DSUBU = 0x0000002f,
+#if !LJ_TARGET_MIPSR6
+ MIPSI_DDIV = 0x0000001e,
+ MIPSI_DDIVU = 0x0000001f,
+ MIPSI_DMULT = 0x0000001c,
+ MIPSI_DMULTU = 0x0000001d,
+#else
+ MIPSI_DDIV = 0x0000009e,
+ MIPSI_DMOD = 0x000000de,
+ MIPSI_DDIVU = 0x0000009f,
+ MIPSI_DMODU = 0x000000df,
+ MIPSI_DMUL = 0x0000009c,
+ MIPSI_DMUH = 0x000000dc,
+#endif
+
+ MIPSI_DSLL = 0x00000038,
+ MIPSI_DSRL = 0x0000003a,
+ MIPSI_DSLLV = 0x00000014,
+ MIPSI_DSRLV = 0x00000016,
+ MIPSI_DSRA = 0x0000003b,
+ MIPSI_DSRAV = 0x00000017,
+ MIPSI_DSRA32 = 0x0000003f,
+ MIPSI_DSLL32 = 0x0000003c,
+ MIPSI_DSRL32 = 0x0000003e,
+ MIPSI_DSHD = 0x7c000164,
+
+ MIPSI_AADDU = LJ_32 ? MIPSI_ADDU : MIPSI_DADDU,
+ MIPSI_AADDIU = LJ_32 ? MIPSI_ADDIU : MIPSI_DADDIU,
+ MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU,
+ MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD,
+ MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD,
+#if LJ_TARGET_MIPSR6
+ MIPSI_LSA = 0x00000005,
+ MIPSI_DLSA = 0x00000015,
+ MIPSI_ALSA = LJ_32 ? MIPSI_LSA : MIPSI_DLSA,
+#endif
+
+ /* Extract/insert instructions. */
+ MIPSI_DEXTM = 0x7c000001,
+ MIPSI_DEXTU = 0x7c000002,
+ MIPSI_DEXT = 0x7c000003,
+ MIPSI_DINSM = 0x7c000005,
+ MIPSI_DINSU = 0x7c000006,
+ MIPSI_DINS = 0x7c000007,
+
+ MIPSI_FLOOR_D = 0x4620000b,
+
/* FP instructions. */
MIPSI_MOV_S = 0x46000006,
MIPSI_MOV_D = 0x46200006,
+#if !LJ_TARGET_MIPSR6
MIPSI_MOVT_D = 0x46210011,
MIPSI_MOVF_D = 0x46200011,
+#else
+ MIPSI_MIN_D = 0x4620001C,
+ MIPSI_MAX_D = 0x4620001E,
+ MIPSI_SEL_D = 0x46200010,
+#endif
MIPSI_ABS_D = 0x46200005,
MIPSI_NEG_D = 0x46200007,
@@ -235,23 +380,37 @@ typedef enum MIPSIns {
MIPSI_CVT_W_D = 0x46200024,
MIPSI_CVT_S_W = 0x46800020,
MIPSI_CVT_D_W = 0x46800021,
+ MIPSI_CVT_S_L = 0x46a00020,
+ MIPSI_CVT_D_L = 0x46a00021,
MIPSI_TRUNC_W_S = 0x4600000d,
MIPSI_TRUNC_W_D = 0x4620000d,
+ MIPSI_TRUNC_L_S = 0x46000009,
+ MIPSI_TRUNC_L_D = 0x46200009,
MIPSI_FLOOR_W_S = 0x4600000f,
MIPSI_FLOOR_W_D = 0x4620000f,
MIPSI_MFC1 = 0x44000000,
MIPSI_MTC1 = 0x44800000,
+ MIPSI_DMTC1 = 0x44a00000,
+ MIPSI_DMFC1 = 0x44200000,
+#if !LJ_TARGET_MIPSR6
MIPSI_BC1F = 0x45000000,
MIPSI_BC1T = 0x45010000,
-
MIPSI_C_EQ_D = 0x46200032,
+ MIPSI_C_OLT_S = 0x46000034,
MIPSI_C_OLT_D = 0x46200034,
MIPSI_C_ULT_D = 0x46200035,
MIPSI_C_OLE_D = 0x46200036,
MIPSI_C_ULE_D = 0x46200037,
+#else
+ MIPSI_BC1EQZ = 0x45200000,
+ MIPSI_BC1NEZ = 0x45a00000,
+ MIPSI_CMP_EQ_D = 0x46a00002,
+ MIPSI_CMP_LT_S = 0x46800004,
+ MIPSI_CMP_LT_D = 0x46a00004,
+#endif
} MIPSIns;
diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h
index b4f600eb..bc9802a4 100644
--- a/src/lj_target_ppc.h
+++ b/src/lj_target_ppc.h
@@ -104,7 +104,7 @@ enum {
/* This definition must match with the *.dasc file(s). */
typedef struct {
lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */
- int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
+ intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */
int32_t spill[256]; /* Spill slots. */
} ExitState;
diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h
index 69aec37c..69cb8ca5 100644
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -22,7 +22,7 @@
_(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7)
#endif
#define VRIDDEF(_) \
- _(MRM)
+ _(MRM) _(RIP)
#define RIDENUM(name) RID_##name,
@@ -31,15 +31,16 @@ enum {
FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */
RID_MAX,
RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */
+ RID_RIP = RID_MAX+5, /* Pseudo-id for RIP (x64 only), rm bits = 5. */
/* Calling conventions. */
+ RID_SP = RID_ESP,
RID_RET = RID_EAX,
#if LJ_64
RID_FPRET = RID_XMM0,
-#else
+#endif
RID_RETLO = RID_EAX,
RID_RETHI = RID_EDX,
-#endif
/* These definitions must match with the *.dasc file(s): */
RID_BASE = RID_EDX, /* Interpreter BASE. */
@@ -62,8 +63,10 @@ enum {
/* -- Register sets ------------------------------------------------------- */
-/* Make use of all registers, except the stack pointer. */
-#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP))
+/* Make use of all registers, except the stack pointer (and maybe DISPATCH). */
+#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \
+ - RID2RSET(RID_ESP) \
+ - LJ_GC64*RID2RSET(RID_DISPATCH))
#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR))
#define RSET_ALL (RSET_GPR|RSET_FPR)
#define RSET_INIT RSET_ALL
@@ -131,7 +134,11 @@ enum {
#define SPS_FIXED (4*2)
#define SPS_FIRST (4*2) /* Don't use callee register save area. */
#else
+#if LJ_GC64
+#define SPS_FIXED 2
+#else
#define SPS_FIXED 4
+#endif
#define SPS_FIRST 2
#endif
#else
@@ -157,6 +164,8 @@ typedef struct {
#define EXITSTUB_SPACING (2+2)
#define EXITSTUBS_PER_GROUP 32
+#define EXITTRACE_VMSTATE 1 /* g->vmstate has traceno on exit. */
+
/* -- x86 ModRM operand encoding ------------------------------------------ */
typedef enum {
@@ -184,12 +193,18 @@ typedef struct {
#define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24)))
#define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24)))
+#define XV_660f38(o) ((uint32_t)(0x79e2c4 + (0x##o<<24)))
+#define XV_f20f38(o) ((uint32_t)(0x7be2c4 + (0x##o<<24)))
+#define XV_f20f3a(o) ((uint32_t)(0x7be3c4 + (0x##o<<24)))
+#define XV_f30f38(o) ((uint32_t)(0x7ae2c4 + (0x##o<<24)))
+
/* This list of x86 opcodes is not intended to be complete. Opcodes are only
** included when needed. Take a look at DynASM or jit.dis_x86 to see the
** whole mess.
*/
typedef enum {
/* Fixed length opcodes. XI_* prefix. */
+ XI_O16 = 0x66,
XI_NOP = 0x90,
XI_XCHGa = 0x90,
XI_CALL = 0xe8,
@@ -207,26 +222,28 @@ typedef enum {
XI_PUSHi8 = 0x6a,
XI_TESTb = 0x84,
XI_TEST = 0x85,
+ XI_INT3 = 0xcc,
XI_MOVmi = 0xc7,
XI_GROUP5 = 0xff,
/* Note: little-endian byte-order! */
XI_FLDZ = 0xeed9,
XI_FLD1 = 0xe8d9,
- XI_FLDLG2 = 0xecd9,
- XI_FLDLN2 = 0xedd9,
XI_FDUP = 0xc0d9, /* Really fld st0. */
XI_FPOP = 0xd8dd, /* Really fstp st0. */
XI_FPOP1 = 0xd9dd, /* Really fstp st1. */
XI_FRNDINT = 0xfcd9,
- XI_FSIN = 0xfed9,
- XI_FCOS = 0xffd9,
- XI_FPTAN = 0xf2d9,
- XI_FPATAN = 0xf3d9,
XI_FSCALE = 0xfdd9,
XI_FYL2X = 0xf1d9,
+ /* VEX-encoded instructions. XV_* prefix. */
+ XV_RORX = XV_f20f3a(f0),
+ XV_SARX = XV_f30f38(f7),
+ XV_SHLX = XV_660f38(f7),
+ XV_SHRX = XV_f20f38(f7),
+
/* Variable-length opcodes. XO_* prefix. */
+ XO_OR = XO_(0b),
XO_MOV = XO_(8b),
XO_MOVto = XO_(89),
XO_MOVtow = XO_66(89),
@@ -277,10 +294,8 @@ typedef enum {
XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */
XO_UCOMISD = XO_660f(2e),
XO_CVTSI2SD = XO_f20f(2a),
- XO_CVTSD2SI = XO_f20f(2d),
XO_CVTTSD2SI= XO_f20f(2c),
XO_CVTSI2SS = XO_f30f(2a),
- XO_CVTSS2SI = XO_f30f(2d),
XO_CVTTSS2SI= XO_f30f(2c),
XO_CVTSS2SD = XO_f30f(5a),
XO_CVTSD2SS = XO_f20f(5a),
diff --git a/src/lj_trace.c b/src/lj_trace.c
index 89c3c5ed..c2329394 100644
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -30,6 +30,7 @@
#include "lj_vm.h"
#include "lj_vmevent.h"
#include "lj_target.h"
+#include "lj_prng.h"
/* -- Error handling ------------------------------------------------------ */
@@ -104,7 +105,8 @@ static void perftools_addtrace(GCtrace *T)
name++;
else
name = "(string)";
- lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc);
+ lj_assertX(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc,
+ "trace PC out of range");
lineno = lj_debug_line(pt, proto_bcpos(pt, startpc));
if (!fp) {
char fname[40];
@@ -117,15 +119,26 @@ static void perftools_addtrace(GCtrace *T)
}
#endif
-/* Allocate space for copy of trace. */
-static GCtrace *trace_save_alloc(jit_State *J)
+/* Allocate space for copy of T. */
+GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T)
{
size_t sztr = ((sizeof(GCtrace)+7)&~7);
- size_t szins = (J->cur.nins-J->cur.nk)*sizeof(IRIns);
+ size_t szins = (T->nins-T->nk)*sizeof(IRIns);
size_t sz = sztr + szins +
- J->cur.nsnap*sizeof(SnapShot) +
- J->cur.nsnapmap*sizeof(SnapEntry);
- return lj_mem_newt(J->L, (MSize)sz, GCtrace);
+ T->nsnap*sizeof(SnapShot) +
+ T->nsnapmap*sizeof(SnapEntry);
+ GCtrace *T2 = lj_mem_newt(L, (MSize)sz, GCtrace);
+ char *p = (char *)T2 + sztr;
+ T2->gct = ~LJ_TTRACE;
+ T2->marked = 0;
+ T2->traceno = 0;
+ T2->ir = (IRIns *)p - T->nk;
+ T2->nins = T->nins;
+ T2->nk = T->nk;
+ T2->nsnap = T->nsnap;
+ T2->nsnapmap = T->nsnapmap;
+ memcpy(p, T->ir + T->nk, szins);
+ return T2;
}
/* Save current trace by copying and compacting it. */
@@ -139,12 +152,12 @@ static void trace_save(jit_State *J, GCtrace *T)
setgcrefp(J2G(J)->gc.root, T);
newwhite(J2G(J), T);
T->gct = ~LJ_TTRACE;
- T->ir = (IRIns *)p - J->cur.nk;
- memcpy(p, J->cur.ir+J->cur.nk, szins);
+ T->ir = (IRIns *)p - J->cur.nk; /* The IR has already been copied above. */
p += szins;
TRACE_APPENDVEC(snap, nsnap, SnapShot)
TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry)
J->cur.traceno = 0;
+ J->curfinal = NULL;
setgcrefp(J->trace[T->traceno], T);
lj_gc_barriertrace(J2G(J), T->traceno);
lj_gdbjit_addtrace(J, T);
@@ -172,7 +185,7 @@ void lj_trace_reenableproto(GCproto *pt)
{
if ((pt->flags & PROTO_ILOOP)) {
BCIns *bc = proto_bc(pt);
- BCPos i, sizebc = pt->sizebc;;
+ BCPos i, sizebc = pt->sizebc;
pt->flags &= ~PROTO_ILOOP;
if (bc_op(bc[0]) == BC_IFUNCF)
setbc_op(&bc[0], BC_FUNCF);
@@ -194,27 +207,28 @@ static void trace_unpatch(jit_State *J, GCtrace *T)
return; /* No need to unpatch branches in parent traces (yet). */
switch (bc_op(*pc)) {
case BC_JFORL:
- lua_assert(traceref(J, bc_d(*pc)) == T);
+ lj_assertJ(traceref(J, bc_d(*pc)) == T, "JFORL references other trace");
*pc = T->startins;
pc += bc_j(T->startins);
- lua_assert(bc_op(*pc) == BC_JFORI);
+ lj_assertJ(bc_op(*pc) == BC_JFORI, "FORL does not point to JFORI");
setbc_op(pc, BC_FORI);
break;
case BC_JITERL:
case BC_JLOOP:
- lua_assert(op == BC_ITERL || op == BC_LOOP || bc_isret(op));
+ lj_assertJ(op == BC_ITERL || op == BC_ITERN || op == BC_LOOP ||
+ bc_isret(op), "bad original bytecode %d", op);
*pc = T->startins;
break;
case BC_JMP:
- lua_assert(op == BC_ITERL);
+ lj_assertJ(op == BC_ITERL, "bad original bytecode %d", op);
pc += bc_j(*pc)+2;
if (bc_op(*pc) == BC_JITERL) {
- lua_assert(traceref(J, bc_d(*pc)) == T);
+ lj_assertJ(traceref(J, bc_d(*pc)) == T, "JITERL references other trace");
*pc = T->startins;
}
break;
case BC_JFUNCF:
- lua_assert(op == BC_FUNCF);
+ lj_assertJ(op == BC_FUNCF, "bad original bytecode %d", op);
*pc = T->startins;
break;
default: /* Already unpatched. */
@@ -226,7 +240,8 @@ static void trace_unpatch(jit_State *J, GCtrace *T)
static void trace_flushroot(jit_State *J, GCtrace *T)
{
GCproto *pt = &gcref(T->startpt)->pt;
- lua_assert(T->root == 0 && pt != NULL);
+ lj_assertJ(T->root == 0, "not a root trace");
+ lj_assertJ(pt != NULL, "trace has no prototype");
/* First unpatch any modified bytecode. */
trace_unpatch(J, T);
/* Unlink root trace from chain anchored in prototype. */
@@ -274,7 +289,7 @@ int lj_trace_flushall(lua_State *L)
if (T->root == 0)
trace_flushroot(J, T);
lj_gdbjit_deltrace(J, T);
- T->traceno = 0;
+ T->traceno = T->link = 0; /* Blacklist the link for cont_stitch. */
setgcrefnull(J->trace[i]);
}
}
@@ -296,13 +311,42 @@ void lj_trace_initstate(global_State *g)
{
jit_State *J = G2J(g);
TValue *tv;
- /* Initialize SIMD constants. */
+
+ /* Initialize aligned SIMD constants. */
tv = LJ_KSIMD(J, LJ_KSIMD_ABS);
tv[0].u64 = U64x(7fffffff,ffffffff);
tv[1].u64 = U64x(7fffffff,ffffffff);
tv = LJ_KSIMD(J, LJ_KSIMD_NEG);
tv[0].u64 = U64x(80000000,00000000);
tv[1].u64 = U64x(80000000,00000000);
+
+ /* Initialize 32/64 bit constants. */
+#if LJ_TARGET_X86ORX64
+ J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000);
+#if LJ_32
+ J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000);
+#endif
+ J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000);
+ J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000;
+#endif
+#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64
+ J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
+#endif
+#if LJ_TARGET_PPC
+ J->k32[LJ_K32_2P52_2P31] = 0x59800004;
+ J->k32[LJ_K32_2P52] = 0x59800000;
+#endif
+#if LJ_TARGET_PPC || LJ_TARGET_MIPS
+ J->k32[LJ_K32_2P31] = 0x4f000000;
+#endif
+#if LJ_TARGET_MIPS
+ J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
+#if LJ_64
+ J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000);
+ J->k32[LJ_K32_2P63] = 0x5f000000;
+ J->k32[LJ_K32_M2P64] = 0xdf800000;
+#endif
+#endif
}
/* Free everything associated with the JIT compiler state. */
@@ -313,11 +357,11 @@ void lj_trace_freestate(global_State *g)
{ /* This assumes all traces have already been freed. */
ptrdiff_t i;
for (i = 1; i < (ptrdiff_t)J->sizetrace; i++)
- lua_assert(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL);
+ lj_assertG(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL,
+ "trace still allocated");
}
#endif
lj_mcode_free(J);
- lj_ir_k64_freeall(J);
lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry);
lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot);
lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns);
@@ -329,8 +373,13 @@ void lj_trace_freestate(global_State *g)
/* Blacklist a bytecode instruction. */
static void blacklist_pc(GCproto *pt, BCIns *pc)
{
- setbc_op(pc, (int)bc_op(*pc)+(int)BC_ILOOP-(int)BC_LOOP);
- pt->flags |= PROTO_ILOOP;
+ if (bc_op(*pc) == BC_ITERN) {
+ setbc_op(pc, BC_ITERC);
+ setbc_op(pc+1+bc_j(pc[1]), BC_JMP);
+ } else {
+ setbc_op(pc, (int)bc_op(*pc)+(int)BC_ILOOP-(int)BC_LOOP);
+ pt->flags |= PROTO_ILOOP;
+ }
}
/* Penalize a bytecode instruction. */
@@ -341,7 +390,7 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e)
if (mref(J->penalty[i].pc, const BCIns) == pc) { /* Cache slot found? */
/* First try to bump its hotcount several times. */
val = ((uint32_t)J->penalty[i].val << 1) +
- LJ_PRNG_BITS(J, PENALTY_RNDBITS);
+ (lj_prng_u64(&J2G(J)->prng) & ((1u<<PENALTY_RNDBITS)-1));
if (val > PENALTY_MAX) {
blacklist_pc(pt, pc); /* Blacklist it, if that didn't help. */
return;
@@ -367,10 +416,11 @@ static void trace_start(jit_State *J)
TraceNo traceno;
if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */
- if (J->parent == 0) {
+ if (J->parent == 0 && J->exitno == 0 && bc_op(*J->pc) != BC_ITERN) {
/* Lazy bytecode patching to disable hotcount events. */
- lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL ||
- bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF);
+ lj_assertJ(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL ||
+ bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF,
+ "bad hot bytecode %d", bc_op(*J->pc));
setbc_op(J->pc, (int)bc_op(*J->pc)+(int)BC_ILOOP-(int)BC_LOOP);
J->pt->flags |= PROTO_ILOOP;
}
@@ -381,7 +431,8 @@ static void trace_start(jit_State *J)
/* Get a new trace number. */
traceno = trace_findfree(J);
if (LJ_UNLIKELY(traceno == 0)) { /* No free trace? */
- lua_assert((J2G(J)->hookmask & HOOK_GC) == 0);
+ lj_assertJ((J2G(J)->hookmask & HOOK_GC) == 0,
+ "recorder called from GC hook");
lj_trace_flushall(J->L);
J->state = LJ_TRACE_IDLE; /* Silently ignored. */
return;
@@ -401,6 +452,8 @@ static void trace_start(jit_State *J)
J->guardemit.irt = 0;
J->postproc = LJ_POST_NONE;
lj_resetsplit(J);
+ J->retryrec = 0;
+ J->ktrace = 0;
setgcref(J->cur.startpt, obj2gco(J->pt));
L = J->L;
@@ -412,6 +465,12 @@ static void trace_start(jit_State *J)
if (J->parent) {
setintV(L->top++, J->parent);
setintV(L->top++, J->exitno);
+ } else {
+ BCOp op = bc_op(*J->pc);
+ if (op == BC_CALLM || op == BC_CALL || op == BC_ITERC) {
+ setintV(L->top++, J->exitno); /* Parent of stitched trace. */
+ setintV(L->top++, -1);
+ }
}
);
lj_record_setup(J);
@@ -424,7 +483,7 @@ static void trace_stop(jit_State *J)
BCOp op = bc_op(J->cur.startins);
GCproto *pt = &gcref(J->cur.startpt)->pt;
TraceNo traceno = J->cur.traceno;
- GCtrace *T = trace_save_alloc(J); /* Do this first. May throw OOM. */
+ GCtrace *T = J->curfinal;
lua_State *L;
switch (op) {
@@ -442,6 +501,7 @@ static void trace_stop(jit_State *J)
J->cur.nextroot = pt->trace;
pt->trace = (TraceNo1)traceno;
break;
+ case BC_ITERN:
case BC_RET:
case BC_RET0:
case BC_RET1:
@@ -449,7 +509,7 @@ static void trace_stop(jit_State *J)
goto addroot;
case BC_JMP:
/* Patch exit branch in parent to side trace entry. */
- lua_assert(J->parent != 0 && J->cur.root != 0);
+ lj_assertJ(J->parent != 0 && J->cur.root != 0, "not a side trace");
lj_asm_patchexit(J, traceref(J, J->parent), J->exitno, J->cur.mcode);
/* Avoid compiling a side trace twice (stack resizing uses parent exit). */
{
@@ -465,8 +525,14 @@ static void trace_stop(jit_State *J)
root->nextside = (TraceNo1)traceno;
}
break;
+ case BC_CALLM:
+ case BC_CALL:
+ case BC_ITERC:
+ /* Trace stitching: patch link of previous trace. */
+ traceref(J, J->exitno)->link = traceno;
+ break;
default:
- lua_assert(0);
+ lj_assertJ(0, "bad stop bytecode %d", op);
break;
}
@@ -479,6 +545,7 @@ static void trace_stop(jit_State *J)
lj_vmevent_send(L, TRACE,
setstrV(L, L->top++, lj_str_newlit(L, "stop"));
setintV(L->top++, traceno);
+ setfuncV(L, L->top++, J->fn);
);
}
@@ -486,8 +553,8 @@ static void trace_stop(jit_State *J)
static int trace_downrec(jit_State *J)
{
/* Restart recording at the return instruction. */
- lua_assert(J->pt != NULL);
- lua_assert(bc_isret(bc_op(*J->pc)));
+ lj_assertJ(J->pt != NULL, "no active prototype");
+ lj_assertJ(bc_isret(bc_op(*J->pc)), "not at a return bytecode");
if (bc_op(*J->pc) == BC_RETM)
return 0; /* NYI: down-recursion with RETM. */
J->parent = 0;
@@ -506,6 +573,10 @@ static int trace_abort(jit_State *J)
J->postproc = LJ_POST_NONE;
lj_mcode_abort(J);
+ if (J->curfinal) {
+ lj_trace_free(J2G(J), J->curfinal);
+ J->curfinal = NULL;
+ }
if (tvisnumber(L->top-1))
e = (TraceError)numberVint(L->top-1);
if (e == LJ_TRERR_MCODELM) {
@@ -514,8 +585,17 @@ static int trace_abort(jit_State *J)
return 1; /* Retry ASM with new MCode area. */
}
/* Penalize or blacklist starting bytecode instruction. */
- if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins)))
- penalty_pc(J, &gcref(J->cur.startpt)->pt, mref(J->cur.startpc, BCIns), e);
+ if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) {
+ if (J->exitno == 0) {
+ BCIns *startpc = mref(J->cur.startpc, BCIns);
+ if (e == LJ_TRERR_RETRY)
+ hotcount_set(J2GG(J), startpc+1, 1); /* Immediate retry. */
+ else
+ penalty_pc(J, &gcref(J->cur.startpt)->pt, startpc, e);
+ } else {
+ traceref(J, J->exitno)->link = J->exitno; /* Self-link is blacklisted. */
+ }
+ }
/* Is there anything to abort? */
traceno = J->cur.traceno;
@@ -581,8 +661,13 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud)
J->state = LJ_TRACE_RECORD; /* trace_start() may change state. */
trace_start(J);
lj_dispatch_update(J2G(J));
- break;
+ if (J->state != LJ_TRACE_RECORD_1ST)
+ break;
+ /* fallthrough */
+ case LJ_TRACE_RECORD_1ST:
+ J->state = LJ_TRACE_RECORD;
+ /* fallthrough */
case LJ_TRACE_RECORD:
trace_pendpatch(J, 0);
setvmstate(J2G(J), RECORD);
@@ -688,15 +773,30 @@ static void trace_hotside(jit_State *J, const BCIns *pc)
{
SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno];
if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) &&
+ isluafunc(curr_func(J->L)) &&
snap->count != SNAPCOUNT_DONE &&
++snap->count >= J->param[JIT_P_hotexit]) {
- lua_assert(J->state == LJ_TRACE_IDLE);
+ lj_assertJ(J->state == LJ_TRACE_IDLE, "hot side exit while recording");
/* J->parent is non-zero for a side trace. */
J->state = LJ_TRACE_START;
lj_trace_ins(J, pc);
}
}
+/* Stitch a new trace to the previous trace. */
+void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc)
+{
+ /* Only start a new trace if not recording or inside __gc call or vmevent. */
+ if (J->state == LJ_TRACE_IDLE &&
+ !(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) {
+ J->parent = 0; /* Have to treat it like a root trace. */
+ /* J->exitno is set to the invoking trace. */
+ J->state = LJ_TRACE_START;
+ lj_trace_ins(J, pc);
+ }
+}
+
+
/* Tiny struct to pass data to protected call. */
typedef struct ExitDataCP {
jit_State *J;
@@ -740,7 +840,7 @@ static void trace_exit_regs(lua_State *L, ExitState *ex)
}
#endif
-#ifdef EXITSTATE_PCREG
+#if defined(EXITSTATE_PCREG) || (LJ_UNWIND_JIT && !EXITTRACE_VMSTATE)
/* Determine trace number from pc of exit instruction. */
static TraceNo trace_exit_find(jit_State *J, MCode *pc)
{
@@ -750,7 +850,7 @@ static TraceNo trace_exit_find(jit_State *J, MCode *pc)
if (T && pc >= T->mcode && pc < (MCode *)((char *)T->mcode + T->szmcode))
return traceno;
}
- lua_assert(0);
+ lj_assertJ(0, "bad exit pc");
return 0;
}
#endif
@@ -762,40 +862,55 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
lua_State *L = J->L;
ExitState *ex = (ExitState *)exptr;
ExitDataCP exd;
- int errcode;
+ int errcode, exitcode = J->exitcode;
+ TValue exiterr;
const BCIns *pc;
void *cf;
GCtrace *T;
+
+ setnilV(&exiterr);
+ if (exitcode) { /* Trace unwound with error code. */
+ J->exitcode = 0;
+ copyTV(L, &exiterr, L->top-1);
+ }
+
#ifdef EXITSTATE_PCREG
J->parent = trace_exit_find(J, (MCode *)(intptr_t)ex->gpr[EXITSTATE_PCREG]);
#endif
T = traceref(J, J->parent); UNUSED(T);
#ifdef EXITSTATE_CHECKEXIT
if (J->exitno == T->nsnap) { /* Treat stack check like a parent exit. */
- lua_assert(T->root != 0);
+ lj_assertJ(T->root != 0, "stack check in root trace");
J->exitno = T->ir[REF_BASE].op2;
J->parent = T->ir[REF_BASE].op1;
T = traceref(J, J->parent);
}
#endif
- lua_assert(T != NULL && J->exitno < T->nsnap);
+ lj_assertJ(T != NULL && J->exitno < T->nsnap, "bad trace or exit number");
exd.J = J;
exd.exptr = exptr;
errcode = lj_vm_cpcall(L, NULL, &exd, trace_exit_cp);
if (errcode)
return -errcode; /* Return negated error code. */
- lj_vmevent_send(L, TEXIT,
- lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK);
- setintV(L->top++, J->parent);
- setintV(L->top++, J->exitno);
- trace_exit_regs(L, ex);
- );
+ if (exitcode) copyTV(L, L->top++, &exiterr); /* Anchor the error object. */
+
+ if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)))
+ lj_vmevent_send(L, TEXIT,
+ lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK);
+ setintV(L->top++, J->parent);
+ setintV(L->top++, J->exitno);
+ trace_exit_regs(L, ex);
+ );
pc = exd.pc;
cf = cframe_raw(L->cframe);
setcframe_pc(cf, pc);
- if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) {
+ if (exitcode) {
+ return -exitcode;
+ } else if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) {
+ /* Just exit to interpreter. */
+ } else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) {
if (!(G(L)->hookmask & HOOK_GC))
lj_gc_step(L); /* Exited because of GC: drive GC forward. */
} else {
@@ -803,13 +918,14 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
}
if (bc_op(*pc) == BC_JLOOP) {
BCIns *retpc = &traceref(J, bc_d(*pc))->startins;
- if (bc_isret(bc_op(*retpc))) {
+ int isret = bc_isret(bc_op(*retpc));
+ if (isret || bc_op(*retpc) == BC_ITERN) {
if (J->state == LJ_TRACE_RECORD) {
J->patchins = *pc;
J->patchpc = (BCIns *)pc;
*J->patchpc = *retpc;
J->bcskip = 1;
- } else {
+ } else if (isret) {
pc = retpc;
setcframe_pc(cf, pc);
}
@@ -819,7 +935,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
ERRNO_RESTORE
switch (bc_op(*pc)) {
case BC_CALLM: case BC_CALLMT:
- return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc));
+ return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc) - LJ_FR2);
case BC_RETM:
return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc));
case BC_TSETM:
@@ -831,4 +947,41 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr)
}
}
+#if LJ_UNWIND_JIT
+/* Given an mcode address determine trace exit address for unwinding. */
+uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep)
+{
+#if EXITTRACE_VMSTATE
+ TraceNo traceno = J2G(J)->vmstate;
+#else
+ TraceNo traceno = trace_exit_find(J, (MCode *)addr);
+#endif
+ GCtrace *T = traceref(J, traceno);
+ if (T
+#if EXITTRACE_VMSTATE
+ && addr >= (uintptr_t)T->mcode && addr < (uintptr_t)T->mcode + T->szmcode
+#endif
+ ) {
+ SnapShot *snap = T->snap;
+ SnapNo lo = 0, exitno = T->nsnap;
+ uintptr_t ofs = (uintptr_t)((MCode *)addr - T->mcode); /* MCode units! */
+ /* Rightmost binary search for mcode offset to determine exit number. */
+ do {
+ SnapNo mid = (lo+exitno) >> 1;
+ if (ofs < snap[mid].mcofs) exitno = mid; else lo = mid + 1;
+ } while (lo < exitno);
+ exitno--;
+ *ep = exitno;
+#ifdef EXITSTUBS_PER_GROUP
+ return (uintptr_t)exitstub_addr(J, exitno);
+#else
+ return (uintptr_t)exitstub_trace_addr(T, exitno);
+#endif
+ }
+ /* Cannot correlate addr with trace/exit. This will be fatal. */
+ lj_assertJ(0, "bad exit pc");
+ return 0;
+}
+#endif
+
#endif
diff --git a/src/lj_trace.h b/src/lj_trace.h
index 0fc03672..3d7f76f0 100644
--- a/src/lj_trace.h
+++ b/src/lj_trace.h
@@ -23,6 +23,7 @@ LJ_FUNC_NORET void lj_trace_err(jit_State *J, TraceError e);
LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e);
/* Trace management. */
+LJ_FUNC GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T);
LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T);
LJ_FUNC void lj_trace_reenableproto(GCproto *pt);
LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt);
@@ -34,7 +35,11 @@ LJ_FUNC void lj_trace_freestate(global_State *g);
/* Event handling. */
LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc);
LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc);
+LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc);
LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr);
+#if LJ_UNWIND_EXT
+LJ_FUNC uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep);
+#endif
/* Signal asynchronous abort of trace or end of trace. */
#define lj_trace_abort(g) (G2J(g)->state &= ~LJ_TRACE_ACTIVE)
diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h
index a4e5ae64..8ed8ac82 100644
--- a/src/lj_traceerr.h
+++ b/src/lj_traceerr.h
@@ -7,10 +7,12 @@
/* Recording. */
TREDEF(RECERR, "error thrown or hook called during recording")
+TREDEF(TRACEUV, "trace too short")
TREDEF(TRACEOV, "trace too long")
TREDEF(STACKOV, "trace too deep")
TREDEF(SNAPOV, "too many snapshots")
TREDEF(BLACKL, "blacklisted")
+TREDEF(RETRY, "retry recording")
TREDEF(NYIBC, "NYI: bytecode %d")
/* Recording loop ops. */
@@ -23,8 +25,6 @@ TREDEF(BADTYPE, "bad argument type")
TREDEF(CJITOFF, "JIT compilation disabled for function")
TREDEF(CUNROLL, "call unroll limit reached")
TREDEF(DOWNREC, "down-recursion, restarting")
-TREDEF(NYICF, "NYI: C function %s")
-TREDEF(NYIFF, "NYI: FastFunc %s")
TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s")
TREDEF(NYIRETL, "NYI: return to lower frame")
diff --git a/src/lj_udata.c b/src/lj_udata.c
index 7dada848..ee4a145d 100644
--- a/src/lj_udata.c
+++ b/src/lj_udata.c
@@ -8,6 +8,7 @@
#include "lj_obj.h"
#include "lj_gc.h"
+#include "lj_err.h"
#include "lj_udata.h"
GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env)
@@ -32,3 +33,30 @@ void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud)
lj_mem_free(g, ud, sizeudata(ud));
}
+#if LJ_64
+void *lj_lightud_intern(lua_State *L, void *p)
+{
+ global_State *g = G(L);
+ uint64_t u = (uint64_t)p;
+ uint32_t up = lightudup(u);
+ uint32_t *segmap = mref(g->gc.lightudseg, uint32_t);
+ MSize segnum = g->gc.lightudnum;
+ if (segmap) {
+ MSize seg;
+ for (seg = 0; seg <= segnum; seg++)
+ if (segmap[seg] == up) /* Fast path. */
+ return (void *)(((uint64_t)seg << LJ_LIGHTUD_BITS_LO) | lightudlo(u));
+ segnum++;
+ /* Leave last segment unused to avoid clash with ITERN key. */
+ if (segnum >= (1 << LJ_LIGHTUD_BITS_SEG)-1) lj_err_msg(L, LJ_ERR_BADLU);
+ }
+ if (!((segnum-1) & segnum) && segnum != 1) {
+ lj_mem_reallocvec(L, segmap, segnum, segnum ? 2*segnum : 2u, uint32_t);
+ setmref(g->gc.lightudseg, segmap);
+ }
+ g->gc.lightudnum = segnum;
+ segmap[segnum] = up;
+ return (void *)(((uint64_t)segnum << LJ_LIGHTUD_BITS_LO) | lightudlo(u));
+}
+#endif
+
diff --git a/src/lj_udata.h b/src/lj_udata.h
index acd136a7..503c9e30 100644
--- a/src/lj_udata.h
+++ b/src/lj_udata.h
@@ -10,5 +10,8 @@
LJ_FUNC GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env);
LJ_FUNC void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud);
+#if LJ_64
+LJ_FUNC void * LJ_FASTCALL lj_lightud_intern(lua_State *L, void *p);
+#endif
#endif
diff --git a/src/lj_vm.h b/src/lj_vm.h
index b66f5b85..c66db004 100644
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -17,11 +17,18 @@ LJ_ASMF int lj_vm_cpcall(lua_State *L, lua_CFunction func, void *ud,
LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef);
LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode);
LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe);
+#if LJ_ABI_WIN && LJ_TARGET_X86
+LJ_ASMF_NORET void LJ_FASTCALL lj_vm_rtlunwind(void *cframe, void *excptrec,
+ void *unwinder, int errcode);
+#endif
LJ_ASMF void lj_vm_unwind_c_eh(void);
LJ_ASMF void lj_vm_unwind_ff_eh(void);
#if LJ_TARGET_X86ORX64
LJ_ASMF void lj_vm_unwind_rethrow(void);
#endif
+#if LJ_TARGET_MIPS
+LJ_ASMF void lj_vm_unwind_stub(void);
+#endif
/* Miscellaneous functions. */
#if LJ_TARGET_X86ORX64
@@ -43,13 +50,15 @@ LJ_ASMF void lj_vm_record(void);
LJ_ASMF void lj_vm_inshook(void);
LJ_ASMF void lj_vm_rethook(void);
LJ_ASMF void lj_vm_callhook(void);
+LJ_ASMF void lj_vm_profhook(void);
+LJ_ASMF void lj_vm_IITERN(void);
/* Trace exit handling. */
LJ_ASMF void lj_vm_exit_handler(void);
LJ_ASMF void lj_vm_exit_interp(void);
/* Internal math helper functions. */
-#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC
+#if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP)
#define lj_vm_floor floor
#define lj_vm_ceil ceil
#else
@@ -60,23 +69,22 @@ LJ_ASMF double lj_vm_floor_sf(double);
LJ_ASMF double lj_vm_ceil_sf(double);
#endif
#endif
-#if defined(LUAJIT_NO_LOG2) || LJ_TARGET_X86ORX64
+#ifdef LUAJIT_NO_LOG2
LJ_ASMF double lj_vm_log2(double);
#else
#define lj_vm_log2 log2
#endif
+#if !(defined(_LJ_DISPATCH_H) && LJ_TARGET_MIPS)
+LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
+#endif
#if LJ_HASJIT
#if LJ_TARGET_X86ORX64
LJ_ASMF void lj_vm_floor_sse(void);
LJ_ASMF void lj_vm_ceil_sse(void);
LJ_ASMF void lj_vm_trunc_sse(void);
-LJ_ASMF void lj_vm_exp_x87(void);
-LJ_ASMF void lj_vm_exp2_x87(void);
-LJ_ASMF void lj_vm_pow_sse(void);
-LJ_ASMF void lj_vm_powi_sse(void);
-#else
-#if LJ_TARGET_PPC
+#endif
+#if LJ_TARGET_PPC || LJ_TARGET_ARM64
#define lj_vm_trunc trunc
#else
LJ_ASMF double lj_vm_trunc(double);
@@ -84,17 +92,10 @@ LJ_ASMF double lj_vm_trunc(double);
LJ_ASMF double lj_vm_trunc_sf(double);
#endif
#endif
-LJ_ASMF double lj_vm_powi(double, int32_t);
-#ifdef LUAJIT_NO_EXP2
-LJ_ASMF double lj_vm_exp2(double);
-#else
-#define lj_vm_exp2 exp2
-#endif
-#endif
-LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
#if LJ_HASFFI
LJ_ASMF int lj_vm_errno(void);
#endif
+LJ_ASMF TValue *lj_vm_next(GCtab *t, uint32_t idx);
#endif
/* Continuations for metamethods. */
@@ -104,8 +105,7 @@ LJ_ASMF void lj_cont_nop(void); /* Do nothing, just continue execution. */
LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */
LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */
LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */
-
-enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */
+LJ_ASMF void lj_cont_stitch(void); /* Trace stitching. */
/* Start of the ASM code. */
LJ_ASMF char lj_vm_asm_begin[];
diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c
index 704af254..c8491d82 100644
--- a/src/lj_vmevent.c
+++ b/src/lj_vmevent.c
@@ -27,6 +27,7 @@ ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev)
if (tv && tvisfunc(tv)) {
lj_state_checkstack(L, LUA_MINSTACK);
setfuncV(L, L->top++, funcV(tv));
+ if (LJ_FR2) setnilV(L->top++);
return savestack(L, L->top);
}
}
diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c
index ff41ba28..b6cc60ba 100644
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -13,16 +13,29 @@
#include "lj_ir.h"
#include "lj_vm.h"
-/* -- Helper functions for generated machine code ------------------------- */
+/* -- Wrapper functions --------------------------------------------------- */
-#if LJ_TARGET_X86ORX64
-/* Wrapper functions to avoid linker issues on OSX. */
-LJ_FUNCA double lj_vm_sinh(double x) { return sinh(x); }
-LJ_FUNCA double lj_vm_cosh(double x) { return cosh(x); }
-LJ_FUNCA double lj_vm_tanh(double x) { return tanh(x); }
+#if LJ_TARGET_X86 && __ELF__ && __PIC__
+/* Wrapper functions to deal with the ELF/x86 PIC disaster. */
+LJ_FUNCA double lj_wrap_log(double x) { return log(x); }
+LJ_FUNCA double lj_wrap_log10(double x) { return log10(x); }
+LJ_FUNCA double lj_wrap_exp(double x) { return exp(x); }
+LJ_FUNCA double lj_wrap_sin(double x) { return sin(x); }
+LJ_FUNCA double lj_wrap_cos(double x) { return cos(x); }
+LJ_FUNCA double lj_wrap_tan(double x) { return tan(x); }
+LJ_FUNCA double lj_wrap_asin(double x) { return asin(x); }
+LJ_FUNCA double lj_wrap_acos(double x) { return acos(x); }
+LJ_FUNCA double lj_wrap_atan(double x) { return atan(x); }
+LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); }
+LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); }
+LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); }
+LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); }
+LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); }
+LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); }
#endif
-#if !LJ_TARGET_X86ORX64
+/* -- Helper functions ---------------------------------------------------- */
+
double lj_vm_foldarith(double x, double y, int op)
{
switch (op) {
@@ -35,37 +48,22 @@ double lj_vm_foldarith(double x, double y, int op)
case IR_NEG - IR_ADD: return -x; break;
case IR_ABS - IR_ADD: return fabs(x); break;
#if LJ_HASJIT
- case IR_ATAN2 - IR_ADD: return atan2(x, y); break;
case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break;
- case IR_MIN - IR_ADD: return x > y ? y : x; break;
- case IR_MAX - IR_ADD: return x < y ? y : x; break;
+ case IR_MIN - IR_ADD: return x < y ? x : y; break;
+ case IR_MAX - IR_ADD: return x > y ? x : y; break;
#endif
default: return x;
}
}
-#endif
-#if LJ_HASJIT
-
-#ifdef LUAJIT_NO_LOG2
-double lj_vm_log2(double a)
-{
- return log(a) * 1.4426950408889634074;
-}
-#endif
-
-#ifdef LUAJIT_NO_EXP2
-double lj_vm_exp2(double a)
-{
- return exp(a * 0.6931471805599453);
-}
-#endif
+/* -- Helper functions for generated machine code ------------------------- */
-#if !(LJ_TARGET_ARM || LJ_TARGET_PPC)
+#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS
int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
{
uint32_t y, ua, ub;
- lua_assert(b != 0); /* This must be checked before using this function. */
+ /* This must be checked before using this function. */
+ lj_assertX(b != 0, "modulo with zero divisor");
ua = a < 0 ? (uint32_t)-a : (uint32_t)a;
ub = b < 0 ? (uint32_t)-b : (uint32_t)b;
y = ua % ub;
@@ -75,38 +73,14 @@ int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b)
}
#endif
-#if !LJ_TARGET_X86ORX64
-/* Unsigned x^k. */
-static double lj_vm_powui(double x, uint32_t k)
-{
- double y;
- lua_assert(k != 0);
- for (; (k & 1) == 0; k >>= 1) x *= x;
- y = x;
- if ((k >>= 1) != 0) {
- for (;;) {
- x *= x;
- if (k == 1) break;
- if (k & 1) y *= x;
- k >>= 1;
- }
- y *= x;
- }
- return y;
-}
+#if LJ_HASJIT
-/* Signed x^k. */
-double lj_vm_powi(double x, int32_t k)
+#ifdef LUAJIT_NO_LOG2
+double lj_vm_log2(double a)
{
- if (k > 1)
- return lj_vm_powui(x, (uint32_t)k);
- else if (k == 1)
- return x;
- else if (k == 0)
- return 1.0;
- else
- return 1.0 / lj_vm_powui(x, (uint32_t)-k);
+ return log(a) * 1.4426950408889634074;
}
+#endif
/* Computes fpm(x) for extended math functions. */
double lj_vm_foldfpm(double x, int fpm)
@@ -116,19 +90,12 @@ double lj_vm_foldfpm(double x, int fpm)
case IRFPM_CEIL: return lj_vm_ceil(x);
case IRFPM_TRUNC: return lj_vm_trunc(x);
case IRFPM_SQRT: return sqrt(x);
- case IRFPM_EXP: return exp(x);
- case IRFPM_EXP2: return lj_vm_exp2(x);
case IRFPM_LOG: return log(x);
case IRFPM_LOG2: return lj_vm_log2(x);
- case IRFPM_LOG10: return log10(x);
- case IRFPM_SIN: return sin(x);
- case IRFPM_COS: return cos(x);
- case IRFPM_TAN: return tan(x);
- default: lua_assert(0);
+ default: lj_assertX(0, "bad fpm %d", fpm);
}
return 0;
}
-#endif
#if LJ_HASFFI
int lj_vm_errno(void)
diff --git a/src/ljamalg.c b/src/ljamalg.c
index 92f070da..cae8356c 100644
--- a/src/ljamalg.c
+++ b/src/ljamalg.c
@@ -3,16 +3,6 @@
** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
*/
-/*
-+--------------------------------------------------------------------------+
-| WARNING: Compiling the amalgamation needs a lot of virtual memory |
-| (around 300 MB with GCC 4.x)! If you don't have enough physical memory |
-| your machine will start swapping to disk and the compile will not finish |
-| within a reasonable amount of time. |
-| So either compile on a bigger machine or use the non-amalgamated build. |
-+--------------------------------------------------------------------------+
-*/
-
#define ljamalg_c
#define LUA_CORE
@@ -28,23 +18,30 @@
#include "lua.h"
#include "lauxlib.h"
+#include "lj_assert.c"
#include "lj_gc.c"
#include "lj_err.c"
#include "lj_char.c"
#include "lj_bc.c"
#include "lj_obj.c"
+#include "lj_buf.c"
#include "lj_str.c"
#include "lj_tab.c"
#include "lj_func.c"
#include "lj_udata.c"
#include "lj_meta.c"
#include "lj_debug.c"
+#include "lj_prng.c"
#include "lj_state.c"
#include "lj_dispatch.c"
#include "lj_vmevent.c"
#include "lj_vmmath.c"
#include "lj_strscan.c"
+#include "lj_strfmt.c"
+#include "lj_strfmt_num.c"
+#include "lj_serialize.c"
#include "lj_api.c"
+#include "lj_profile.c"
#include "lj_lex.c"
#include "lj_parse.c"
#include "lj_bcread.c"
@@ -89,5 +86,6 @@
#include "lib_bit.c"
#include "lib_jit.c"
#include "lib_ffi.c"
+#include "lib_buffer.c"
#include "lib_init.c"
diff --git a/src/lua.h b/src/lua.h
index 2bd683c2..6d1634d1 100644
--- a/src/lua.h
+++ b/src/lua.h
@@ -39,7 +39,8 @@
#define lua_upvalueindex(i) (LUA_GLOBALSINDEX-(i))
-/* thread status; 0 is OK */
+/* thread status */
+#define LUA_OK 0
#define LUA_YIELD 1
#define LUA_ERRRUN 2
#define LUA_ERRSYNTAX 3
@@ -226,6 +227,7 @@ LUA_API int (lua_status) (lua_State *L);
#define LUA_GCSTEP 5
#define LUA_GCSETPAUSE 6
#define LUA_GCSETSTEPMUL 7
+#define LUA_GCISRUNNING 9
LUA_API int (lua_gc) (lua_State *L, int what, int data);
@@ -346,6 +348,13 @@ LUA_API void *lua_upvalueid (lua_State *L, int idx, int n);
LUA_API void lua_upvaluejoin (lua_State *L, int idx1, int n1, int idx2, int n2);
LUA_API int lua_loadx (lua_State *L, lua_Reader reader, void *dt,
const char *chunkname, const char *mode);
+LUA_API const lua_Number *lua_version (lua_State *L);
+LUA_API void lua_copy (lua_State *L, int fromidx, int toidx);
+LUA_API lua_Number lua_tonumberx (lua_State *L, int idx, int *isnum);
+LUA_API lua_Integer lua_tointegerx (lua_State *L, int idx, int *isnum);
+
+/* From Lua 5.3. */
+LUA_API int lua_isyieldable (lua_State *L);
struct lua_Debug {
diff --git a/src/luaconf.h b/src/luaconf.h
index 9d587e9d..e8790c1d 100644
--- a/src/luaconf.h
+++ b/src/luaconf.h
@@ -37,7 +37,7 @@
#endif
#define LUA_LROOT "/usr/local"
#define LUA_LUADIR "/lua/5.1/"
-#define LUA_LJDIR "/luajit-2.0.5/"
+#define LUA_LJDIR "/luajit-2.1.0-beta3/"
#ifdef LUA_ROOT
#define LUA_JROOT LUA_ROOT
@@ -79,7 +79,7 @@
#define LUA_IGMARK "-"
#define LUA_PATH_CONFIG \
LUA_DIRSEP "\n" LUA_PATHSEP "\n" LUA_PATH_MARK "\n" \
- LUA_EXECDIR "\n" LUA_IGMARK
+ LUA_EXECDIR "\n" LUA_IGMARK "\n"
/* Quoting in error messages. */
#define LUA_QL(x) "'" x "'"
@@ -92,10 +92,6 @@
#define LUAI_GCMUL 200 /* Run GC at 200% of allocation speed. */
#define LUA_MAXCAPTURES 32 /* Max. pattern captures. */
-/* Compatibility with older library function names. */
-#define LUA_COMPAT_MOD /* OLD: math.mod, NEW: math.fmod */
-#define LUA_COMPAT_GFIND /* OLD: string.gfind, NEW: string.gmatch */
-
/* Configuration for the frontend (the luajit executable). */
#if defined(luajit_c)
#define LUA_PROGNAME "luajit" /* Fallback frontend name. */
@@ -140,7 +136,7 @@
#define LUALIB_API LUA_API
-/* Support for internal assertions. */
+/* Compatibility support for assertions. */
#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK)
#include <assert.h>
#endif
diff --git a/src/luajit.c b/src/luajit.c
index cf4982a6..6dd64026 100644
--- a/src/luajit.c
+++ b/src/luajit.c
@@ -62,8 +62,9 @@ static void laction(int i)
static void print_usage(void)
{
- fprintf(stderr,
- "usage: %s [options]... [script [args]...].\n"
+ fputs("usage: ", stderr);
+ fputs(progname, stderr);
+ fputs(" [options]... [script [args]...].\n"
"Available options are:\n"
" -e chunk Execute string " LUA_QL("chunk") ".\n"
" -l name Require library " LUA_QL("name") ".\n"
@@ -74,16 +75,14 @@ static void print_usage(void)
" -v Show version information.\n"
" -E Ignore environment variables.\n"
" -- Stop handling options.\n"
- " - Execute stdin and stop handling options.\n"
- ,
- progname);
+ " - Execute stdin and stop handling options.\n", stderr);
fflush(stderr);
}
static void l_message(const char *msg)
{
- if (progname) fprintf(stderr, "%s: ", progname);
- fprintf(stderr, "%s\n", msg);
+ if (progname) { fputs(progname, stderr); fputc(':', stderr); fputc(' ', stderr); }
+ fputs(msg, stderr); fputc('\n', stderr);
fflush(stderr);
}
@@ -126,7 +125,7 @@ static int docall(lua_State *L, int narg, int clear)
#endif
lua_remove(L, base); /* remove traceback function */
/* force a complete garbage collection in case of errors */
- if (status != 0) lua_gc(L, LUA_GCCOLLECT, 0);
+ if (status != LUA_OK) lua_gc(L, LUA_GCCOLLECT, 0);
return status;
}
@@ -155,22 +154,15 @@ static void print_jit_status(lua_State *L)
lua_settop(L, 0); /* clear stack */
}
-static int getargs(lua_State *L, char **argv, int n)
+static void createargtable(lua_State *L, char **argv, int argc, int argf)
{
- int narg;
int i;
- int argc = 0;
- while (argv[argc]) argc++; /* count total number of arguments */
- narg = argc - (n + 1); /* number of arguments to the script */
- luaL_checkstack(L, narg + 3, "too many arguments to script");
- for (i = n+1; i < argc; i++)
- lua_pushstring(L, argv[i]);
- lua_createtable(L, narg, n + 1);
+ lua_createtable(L, argc - argf, argf);
for (i = 0; i < argc; i++) {
lua_pushstring(L, argv[i]);
- lua_rawseti(L, -2, i - n);
+ lua_rawseti(L, -2, i - argf);
}
- return narg;
+ lua_setglobal(L, "arg");
}
static int dofile(lua_State *L, const char *name)
@@ -259,9 +251,9 @@ static void dotty(lua_State *L)
const char *oldprogname = progname;
progname = NULL;
while ((status = loadline(L)) != -1) {
- if (status == 0) status = docall(L, 0, 0);
+ if (status == LUA_OK) status = docall(L, 0, 0);
report(L, status);
- if (status == 0 && lua_gettop(L) > 0) { /* any result to print? */
+ if (status == LUA_OK && lua_gettop(L) > 0) { /* any result to print? */
lua_getglobal(L, "print");
lua_insert(L, 1);
if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0)
@@ -275,21 +267,30 @@ static void dotty(lua_State *L)
progname = oldprogname;
}
-static int handle_script(lua_State *L, char **argv, int n)
+static int handle_script(lua_State *L, char **argx)
{
int status;
- const char *fname;
- int narg = getargs(L, argv, n); /* collect arguments */
- lua_setglobal(L, "arg");
- fname = argv[n];
- if (strcmp(fname, "-") == 0 && strcmp(argv[n-1], "--") != 0)
+ const char *fname = argx[0];
+ if (strcmp(fname, "-") == 0 && strcmp(argx[-1], "--") != 0)
fname = NULL; /* stdin */
status = luaL_loadfile(L, fname);
- lua_insert(L, -(narg+1));
- if (status == 0)
+ if (status == LUA_OK) {
+ /* Fetch args from arg table. LUA_INIT or -e might have changed them. */
+ int narg = 0;
+ lua_getglobal(L, "arg");
+ if (lua_istable(L, -1)) {
+ do {
+ narg++;
+ lua_rawgeti(L, -narg, narg);
+ } while (!lua_isnil(L, -1));
+ lua_pop(L, 1);
+ lua_remove(L, -narg);
+ narg--;
+ } else {
+ lua_pop(L, 1);
+ }
status = docall(L, narg, 0);
- else
- lua_pop(L, narg);
+ }
return report(L, status);
}
@@ -385,7 +386,8 @@ static int dobytecode(lua_State *L, char **argv)
}
for (argv++; *argv != NULL; narg++, argv++)
lua_pushstring(L, *argv);
- return report(L, lua_pcall(L, narg, 0, 0));
+ report(L, lua_pcall(L, narg, 0, 0));
+ return -1;
}
/* check that argument has no extra characters at the end */
@@ -406,7 +408,7 @@ static int collectargs(char **argv, int *flags)
switch (argv[i][1]) { /* Check option. */
case '-':
notail(argv[i]);
- return (argv[i+1] != NULL ? i+1 : 0);
+ return i+1;
case '\0':
return i;
case 'i':
@@ -432,23 +434,23 @@ static int collectargs(char **argv, int *flags)
case 'b': /* LuaJIT extension */
if (*flags) return -1;
*flags |= FLAGS_EXEC;
- return 0;
+ return i+1;
case 'E':
*flags |= FLAGS_NOENV;
break;
default: return -1; /* invalid option */
}
}
- return 0;
+ return i;
}
-static int runargs(lua_State *L, char **argv, int n)
+static int runargs(lua_State *L, char **argv, int argn)
{
int i;
- for (i = 1; i < n; i++) {
+ for (i = 1; i < argn; i++) {
if (argv[i] == NULL) continue;
lua_assert(argv[i][0] == '-');
- switch (argv[i][1]) { /* option */
+ switch (argv[i][1]) {
case 'e': {
const char *chunk = argv[i] + 2;
if (*chunk == '\0') chunk = argv[++i];
@@ -462,10 +464,10 @@ static int runargs(lua_State *L, char **argv, int n)
if (*filename == '\0') filename = argv[++i];
lua_assert(filename != NULL);
if (dolibrary(L, filename))
- return 1; /* stop if file fails */
+ return 1;
break;
}
- case 'j': { /* LuaJIT extension */
+ case 'j': { /* LuaJIT extension. */
const char *cmd = argv[i] + 2;
if (*cmd == '\0') cmd = argv[++i];
lua_assert(cmd != NULL);
@@ -473,16 +475,16 @@ static int runargs(lua_State *L, char **argv, int n)
return 1;
break;
}
- case 'O': /* LuaJIT extension */
+ case 'O': /* LuaJIT extension. */
if (dojitopt(L, argv[i] + 2))
return 1;
break;
- case 'b': /* LuaJIT extension */
+ case 'b': /* LuaJIT extension. */
return dobytecode(L, argv+i);
default: break;
}
}
- return 0;
+ return LUA_OK;
}
static int handle_luainit(lua_State *L)
@@ -493,7 +495,7 @@ static int handle_luainit(lua_State *L)
const char *init = getenv(LUA_INIT);
#endif
if (init == NULL)
- return 0; /* status OK */
+ return LUA_OK;
else if (init[0] == '@')
return dofile(L, init+1);
else
@@ -510,44 +512,55 @@ static int pmain(lua_State *L)
{
struct Smain *s = &smain;
char **argv = s->argv;
- int script;
+ int argn;
int flags = 0;
globalL = L;
- LUAJIT_VERSION_SYM(); /* linker-enforced version check */
- script = collectargs(argv, &flags);
- if (script < 0) { /* invalid args? */
+ LUAJIT_VERSION_SYM(); /* Linker-enforced version check. */
+
+ argn = collectargs(argv, &flags);
+ if (argn < 0) { /* Invalid args? */
print_usage();
s->status = 1;
return 0;
}
+
if ((flags & FLAGS_NOENV)) {
lua_pushboolean(L, 1);
lua_setfield(L, LUA_REGISTRYINDEX, "LUA_NOENV");
}
- lua_gc(L, LUA_GCSTOP, 0); /* stop collector during initialization */
- luaL_openlibs(L); /* open libraries */
+
+ /* Stop collector during library initialization. */
+ lua_gc(L, LUA_GCSTOP, 0);
+ luaL_openlibs(L);
lua_gc(L, LUA_GCRESTART, -1);
+
+ createargtable(L, argv, s->argc, argn);
+
if (!(flags & FLAGS_NOENV)) {
s->status = handle_luainit(L);
- if (s->status != 0) return 0;
+ if (s->status != LUA_OK) return 0;
}
+
if ((flags & FLAGS_VERSION)) print_version();
- s->status = runargs(L, argv, (script > 0) ? script : s->argc);
- if (s->status != 0) return 0;
- if (script) {
- s->status = handle_script(L, argv, script);
- if (s->status != 0) return 0;
+
+ s->status = runargs(L, argv, argn);
+ if (s->status != LUA_OK) return 0;
+
+ if (s->argc > argn) {
+ s->status = handle_script(L, argv + argn);
+ if (s->status != LUA_OK) return 0;
}
+
if ((flags & FLAGS_INTERACTIVE)) {
print_jit_status(L);
dotty(L);
- } else if (script == 0 && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) {
+ } else if (s->argc == argn && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) {
if (lua_stdin_is_tty()) {
print_version();
print_jit_status(L);
dotty(L);
} else {
- dofile(L, NULL); /* executes stdin as a file */
+ dofile(L, NULL); /* Executes stdin as a file. */
}
}
return 0;
@@ -558,7 +571,7 @@ int main(int argc, char **argv)
int status;
lua_State *L;
if (!argv[0]) argv = empty_argv; else if (argv[0][0]) progname = argv[0];
- L = lua_open(); /* create state */
+ L = lua_open();
if (L == NULL) {
l_message("cannot create state: not enough memory");
return EXIT_FAILURE;
@@ -568,6 +581,6 @@ int main(int argc, char **argv)
status = lua_cpcall(L, pmain, NULL);
report(L, status);
lua_close(L);
- return (status || smain.status) ? EXIT_FAILURE : EXIT_SUCCESS;
+ return (status || smain.status > 0) ? EXIT_FAILURE : EXIT_SUCCESS;
}
diff --git a/src/luajit.h b/src/luajit.h
index 8b666f63..31f1eb1f 100644
--- a/src/luajit.h
+++ b/src/luajit.h
@@ -30,9 +30,9 @@
#include "lua.h"
-#define LUAJIT_VERSION "LuaJIT 2.0.5"
-#define LUAJIT_VERSION_NUM 20005 /* Version 2.0.5 = 02.00.05. */
-#define LUAJIT_VERSION_SYM luaJIT_version_2_0_5
+#define LUAJIT_VERSION "LuaJIT 2.1.0-beta3"
+#define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */
+#define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_beta3
#define LUAJIT_COPYRIGHT "Copyright (C) 2005-2022 Mike Pall"
#define LUAJIT_URL "https://luajit.org/"
@@ -64,6 +64,15 @@ enum {
/* Control the JIT engine. */
LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode);
+/* Low-overhead profiling API. */
+typedef void (*luaJIT_profile_callback)(void *data, lua_State *L,
+ int samples, int vmstate);
+LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
+ luaJIT_profile_callback cb, void *data);
+LUA_API void luaJIT_profile_stop(lua_State *L);
+LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt,
+ int depth, size_t *len);
+
/* Enforce (dynamic) linker error for version mismatches. Call from main. */
LUA_API void LUAJIT_VERSION_SYM(void);
diff --git a/src/lualib.h b/src/lualib.h
index 4a2f8692..87748456 100644
--- a/src/lualib.h
+++ b/src/lualib.h
@@ -33,6 +33,7 @@ LUALIB_API int luaopen_debug(lua_State *L);
LUALIB_API int luaopen_bit(lua_State *L);
LUALIB_API int luaopen_jit(lua_State *L);
LUALIB_API int luaopen_ffi(lua_State *L);
+LUALIB_API int luaopen_string_buffer(lua_State *L);
LUALIB_API void luaL_openlibs(lua_State *L);
diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat
index 045965f8..d323d8d4 100644
--- a/src/msvcbuild.bat
+++ b/src/msvcbuild.bat
@@ -5,6 +5,7 @@
@rem Then cd to this directory and run this script. Use the following
@rem options (in order), if needed. The default is a dynamic release build.
@rem
+@rem nogc64 disable LJ_GC64 mode for x64
@rem debug emit debug symbols
@rem amalg amalgamated build
@rem static static linkage
@@ -20,10 +21,11 @@
@set LJLIB=lib /nologo /nodefaultlib
@set DASMDIR=..\dynasm
@set DASM=%DASMDIR%\dynasm.lua
+@set DASC=vm_x64.dasc
@set LJDLLNAME=lua51.dll
@set LJLIBNAME=lua51.lib
@set BUILDTYPE=release
-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
%LJCOMPILE% host\minilua.c
@if errorlevel 1 goto :BAD
@@ -36,10 +38,17 @@ if exist minilua.exe.manifest^
@set LJARCH=x64
@minilua
@if errorlevel 8 goto :X64
+@set DASC=vm_x86.dasc
@set DASMFLAGS=-D WIN -D JIT -D FFI
@set LJARCH=x86
+@set LJCOMPILE=%LJCOMPILE% /arch:SSE2
:X64
-minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc
+@if "%1" neq "nogc64" goto :GC64
+@shift
+@set DASC=vm_x86.dasc
+@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64
+:GC64
+minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
@if errorlevel 1 goto :BAD
%LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c
@@ -68,6 +77,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
@shift
@set BUILDTYPE=debug
@set LJCOMPILE=%LJCOMPILE% /Zi %DEBUGCFLAGS%
+@set LJLINK=%LJLINK% /opt:ref /opt:icf /incremental:no
:NODEBUG
@set LJLINK=%LJLINK% /%BUILDTYPE%
@if "%1"=="amalg" goto :AMALGDLL
diff --git a/src/nxbuild.bat b/src/nxbuild.bat
new file mode 100644
index 00000000..c4a21f05
--- /dev/null
+++ b/src/nxbuild.bat
@@ -0,0 +1,159 @@
+@rem Script to build LuaJIT with NintendoSDK + NX Addon.
+@rem Donated to the public domain by Swyter.
+@rem
+@rem To run this script you must open a "Native Tools Command Prompt for VS".
+@rem
+@rem Either the x86 version for NX32, or x64 for the NX64 target.
+@rem This is because the pointer size of the LuaJIT host tools (buildvm.exe)
+@rem must match the cross-compiled target (32 or 64 bits).
+@rem
+@rem Then cd to this directory and run this script.
+@rem
+@rem Recommended invocation:
+@rem
+@rem nxbuild # release build, amalgamated
+@rem nxbuild debug # debug build, amalgamated
+@rem
+@rem Additional command-line options (not generally recommended):
+@rem
+@rem noamalg # (after debug) non-amalgamated build
+
+@if not defined INCLUDE goto :FAIL
+@if not defined NINTENDO_SDK_ROOT goto :FAIL
+@if not defined PLATFORM goto :FAIL
+
+@if "%platform%" == "x86" goto :DO_NX32
+@if "%platform%" == "x64" goto :DO_NX64
+
+@echo Error: Current host platform is %platform%!
+@echo.
+@goto :FAIL
+
+@setlocal
+
+:DO_NX32
+@set DASC=vm_arm.dasc
+@set DASMFLAGS= -D HFABI -D FPU
+@set DASMTARGET= -D LUAJIT_TARGET=LUAJIT_ARCH_ARM
+@set HOST_PTR_SIZE=4
+goto :BEGIN
+
+:DO_NX64
+@set DASC=vm_arm64.dasc
+@set DASMFLAGS= -D ENDIAN_LE
+@set DASMTARGET= -D LUAJIT_TARGET=LUAJIT_ARCH_ARM64
+@set HOST_PTR_SIZE=8
+
+:BEGIN
+@rem ---- Host compiler ----
+@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /wo4146 /wo4244 /D_CRT_SECURE_NO_DEPRECATE
+@set LJLINK=link /nologo
+@set LJMT=mt /nologo
+@set DASMDIR=..\dynasm
+@set DASM=%DASMDIR%\dynasm.lua
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
+
+%LJCOMPILE% host\minilua.c
+@if errorlevel 1 goto :BAD
+%LJLINK% /out:minilua.exe minilua.obj
+@if errorlevel 1 goto :BAD
+if exist minilua.exe.manifest^
+ %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
+
+@rem Check that we have the right 32/64 bit host compiler to generate the right virtual machine files.
+@minilua
+@if "%ERRORLEVEL%" == "%HOST_PTR_SIZE%" goto :PASSED_PTR_CHECK
+
+@echo The pointer size of the host in bytes (%HOST_PTR_SIZE%) does not match the expected value (%errorlevel%).
+@echo Check that the script is being ran under the correct x86/x64 VS prompt.
+@goto :BAD
+
+:PASSED_PTR_CHECK
+@set DASMFLAGS=%DASMFLAGS% %DASMTARGET% -D LJ_TARGET_NX -D LUAJIT_OS=LUAJIT_OS_OTHER -D LUAJIT_DISABLE_JIT -D LUAJIT_DISABLE_FFI
+minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
+@if errorlevel 1 goto :BAD
+%LJCOMPILE% /I "." /I %DASMDIR% %DASMTARGET% -D LJ_TARGET_NX -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI host\buildvm*.c
+@if errorlevel 1 goto :BAD
+%LJLINK% /out:buildvm.exe buildvm*.obj
+@if errorlevel 1 goto :BAD
+if exist buildvm.exe.manifest^
+ %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
+
+buildvm -m elfasm -o lj_vm.s
+@if errorlevel 1 goto :BAD
+buildvm -m bcdef -o lj_bcdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m ffdef -o lj_ffdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m libdef -o lj_libdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m recdef -o lj_recdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
+@if errorlevel 1 goto :BAD
+
+@rem ---- Cross compiler ----
+@if "%platform%" neq "x64" goto :NX32_CROSSBUILD
+@set LJCOMPILE="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\aarch64\bin\clang" -Wall -I%NINTENDO_SDK_ROOT%\Include %DASMTARGET% -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -c
+@set LJLIB="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\aarch64\bin\aarch64-nintendo-nx-elf-ar" rc
+@set TARGETLIB_SUFFIX=nx64
+
+%NINTENDO_SDK_ROOT%\Compilers\NX\nx\aarch64\bin\aarch64-nintendo-nx-elf-as -o lj_vm.o lj_vm.s
+goto :DEBUGCHECK
+
+:NX32_CROSSBUILD
+@set LJCOMPILE="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\armv7l\bin\clang" -Wall -I%NINTENDO_SDK_ROOT%\Include %DASMTARGET% -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -c
+@set LJLIB="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\armv7l\bin\armv7l-nintendo-nx-eabihf-ar" rc
+@set TARGETLIB_SUFFIX=nx32
+
+%NINTENDO_SDK_ROOT%\Compilers\NX\nx\armv7l\bin\armv7l-nintendo-nx-eabihf-as -o lj_vm.o lj_vm.s
+:DEBUGCHECK
+
+@if "%1" neq "debug" goto :NODEBUG
+@shift
+@set LJCOMPILE=%LJCOMPILE% -DNN_SDK_BUILD_DEBUG -g -O0
+@set TARGETLIB=libluajitD_%TARGETLIB_SUFFIX%.a
+goto :BUILD
+:NODEBUG
+@set LJCOMPILE=%LJCOMPILE% -DNN_SDK_BUILD_RELEASE -O3
+@set TARGETLIB=libluajit_%TARGETLIB_SUFFIX%.a
+:BUILD
+del %TARGETLIB%
+@if "%1" neq "noamalg" goto :AMALG
+for %%f in (lj_*.c lib_*.c) do (
+ %LJCOMPILE% %%f
+ @if errorlevel 1 goto :BAD
+)
+
+%LJLIB% %TARGETLIB% lj_*.o lib_*.o
+@if errorlevel 1 goto :BAD
+@goto :NOAMALG
+:AMALG
+%LJCOMPILE% ljamalg.c
+@if errorlevel 1 goto :BAD
+%LJLIB% %TARGETLIB% ljamalg.o lj_vm.o
+@if errorlevel 1 goto :BAD
+:NOAMALG
+
+@del *.o *.obj *.manifest minilua.exe buildvm.exe
+@echo.
+@echo === Successfully built LuaJIT for Nintendo Switch (%TARGETLIB_SUFFIX%) ===
+
+@goto :END
+:BAD
+@echo.
+@echo *******************************************************
+@echo *** Build FAILED -- Please check the error messages ***
+@echo *******************************************************
+@goto :END
+:FAIL
+@echo To run this script you must open a "Native Tools Command Prompt for VS".
+@echo.
+@echo Either the x86 version for NX32, or x64 for the NX64 target.
+@echo This is because the pointer size of the LuaJIT host tools (buildvm.exe)
+@echo must match the cross-compiled target (32 or 64 bits).
+@echo.
+@echo Keep in mind that NintendoSDK + NX Addon must be installed, too.
+:END
diff --git a/src/ps4build.bat b/src/ps4build.bat
index 337a44fa..fdd09d81 100644
--- a/src/ps4build.bat
+++ b/src/ps4build.bat
@@ -2,7 +2,19 @@
@rem Donated to the public domain.
@rem
@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler)
+@rem or "VS2015 x64 Native Tools Command Prompt".
+@rem
@rem Then cd to this directory and run this script.
+@rem
+@rem Recommended invocation:
+@rem
+@rem ps4build release build, amalgamated, 64-bit GC
+@rem ps4build debug debug build, amalgamated, 64-bit GC
+@rem
+@rem Additional command-line options (not generally recommended):
+@rem
+@rem gc32 (before debug) 32-bit GC
+@rem noamalg (after debug) non-amalgamated build
@if not defined INCLUDE goto :FAIL
@if not defined SCE_ORBIS_SDK_DIR goto :FAIL
@@ -14,7 +26,15 @@
@set LJMT=mt /nologo
@set DASMDIR=..\dynasm
@set DASM=%DASMDIR%\dynasm.lua
-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
+@set GC64=
+@set DASC=vm_x64.dasc
+
+@if "%1" neq "gc32" goto :NOGC32
+@shift
+@set GC64=-DLUAJIT_DISABLE_GC64
+@set DASC=vm_x86.dasc
+:NOGC32
%LJCOMPILE% host\minilua.c
@if errorlevel 1 goto :BAD
@@ -28,10 +48,10 @@ if exist minilua.exe.manifest^
@if not errorlevel 8 goto :FAIL
@set DASMFLAGS=-D P64 -D NO_UNWIND
-minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc
+minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
@if errorlevel 1 goto :BAD
-%LJCOMPILE% /I "." /I %DASMDIR% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c
+%LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c
@if errorlevel 1 goto :BAD
%LJLINK% /out:buildvm.exe buildvm*.obj
@if errorlevel 1 goto :BAD
@@ -54,7 +74,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
@if errorlevel 1 goto :BAD
@rem ---- Cross compiler ----
-@set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI
+@set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI %GC64%
@set LJLIB="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-ar" rcus
@set INCLUDE=""
@@ -63,14 +83,14 @@ orbis-as -o lj_vm.o lj_vm.s
@if "%1" neq "debug" goto :NODEBUG
@shift
@set LJCOMPILE=%LJCOMPILE% -g -O0
-@set TARGETLIB=libluajitD.a
+@set TARGETLIB=libluajitD_ps4.a
goto :BUILD
:NODEBUG
@set LJCOMPILE=%LJCOMPILE% -O2
-@set TARGETLIB=libluajit.a
+@set TARGETLIB=libluajit_ps4.a
:BUILD
del %TARGETLIB%
-@if "%1"=="amalg" goto :AMALG
+@if "%1" neq "noamalg" goto :AMALG
for %%f in (lj_*.c lib_*.c) do (
%LJCOMPILE% %%f
@if errorlevel 1 goto :BAD
diff --git a/src/ps5build.bat b/src/ps5build.bat
new file mode 100644
index 00000000..0b1ebd5b
--- /dev/null
+++ b/src/ps5build.bat
@@ -0,0 +1,123 @@
+@rem Script to build LuaJIT with the PS5 SDK.
+@rem Donated to the public domain.
+@rem
+@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler)
+@rem or "VS20xx x64 Native Tools Command Prompt".
+@rem
+@rem Then cd to this directory and run this script.
+@rem
+@rem Recommended invocation:
+@rem
+@rem ps5build release build, amalgamated, 64-bit GC
+@rem ps5build debug debug build, amalgamated, 64-bit GC
+@rem
+@rem Additional command-line options (not generally recommended):
+@rem
+@rem gc32 (before debug) 32-bit GC
+@rem noamalg (after debug) non-amalgamated build
+
+@if not defined INCLUDE goto :FAIL
+@if not defined SCE_PROSPERO_SDK_DIR goto :FAIL
+
+@setlocal
+@rem ---- Host compiler ----
+@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE
+@set LJLINK=link /nologo
+@set LJMT=mt /nologo
+@set DASMDIR=..\dynasm
+@set DASM=%DASMDIR%\dynasm.lua
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
+@set GC64=
+@set DASC=vm_x64.dasc
+
+@if "%1" neq "gc32" goto :NOGC32
+@shift
+@set GC64=-DLUAJIT_DISABLE_GC64
+@set DASC=vm_x86.dasc
+:NOGC32
+
+%LJCOMPILE% host\minilua.c
+@if errorlevel 1 goto :BAD
+%LJLINK% /out:minilua.exe minilua.obj
+@if errorlevel 1 goto :BAD
+if exist minilua.exe.manifest^
+ %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
+
+@rem Check for 64 bit host compiler.
+@minilua
+@if not errorlevel 8 goto :FAIL
+
+@set DASMFLAGS=-D P64 -D NO_UNWIND
+minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
+@if errorlevel 1 goto :BAD
+
+%LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c
+@if errorlevel 1 goto :BAD
+%LJLINK% /out:buildvm.exe buildvm*.obj
+@if errorlevel 1 goto :BAD
+if exist buildvm.exe.manifest^
+ %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
+
+buildvm -m elfasm -o lj_vm.s
+@if errorlevel 1 goto :BAD
+buildvm -m bcdef -o lj_bcdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m ffdef -o lj_ffdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m libdef -o lj_libdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m recdef -o lj_recdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
+@if errorlevel 1 goto :BAD
+
+@rem ---- Cross compiler ----
+@set LJCOMPILE="%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-clang" -c -Wall -DLUAJIT_DISABLE_FFI %GC64%
+@set LJLIB="%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-llvm-ar" rcus
+@set INCLUDE=""
+
+%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-llvm-as -o lj_vm.o lj_vm.s
+
+@if "%1" neq "debug" goto :NODEBUG
+@shift
+@set LJCOMPILE=%LJCOMPILE% -g -O0
+@set TARGETLIB=libluajitD_ps5.a
+goto :BUILD
+:NODEBUG
+@set LJCOMPILE=%LJCOMPILE% -O2
+@set TARGETLIB=libluajit_ps5.a
+:BUILD
+del %TARGETLIB%
+@if "%1" neq "noamalg" goto :AMALG
+for %%f in (lj_*.c lib_*.c) do (
+ %LJCOMPILE% %%f
+ @if errorlevel 1 goto :BAD
+)
+
+%LJLIB% %TARGETLIB% lj_*.o lib_*.o
+@if errorlevel 1 goto :BAD
+@goto :NOAMALG
+:AMALG
+%LJCOMPILE% ljamalg.c
+@if errorlevel 1 goto :BAD
+%LJLIB% %TARGETLIB% ljamalg.o lj_vm.o
+@if errorlevel 1 goto :BAD
+:NOAMALG
+
+@del *.o *.obj *.manifest minilua.exe buildvm.exe
+@echo.
+@echo === Successfully built LuaJIT for PS5 ===
+
+@goto :END
+:BAD
+@echo.
+@echo *******************************************************
+@echo *** Build FAILED -- Please check the error messages ***
+@echo *******************************************************
+@goto :END
+:FAIL
+@echo To run this script you must open a "Visual Studio .NET Command Prompt"
+@echo (64 bit host compiler). The PS5 Prospero SDK must be installed, too.
+:END
diff --git a/src/psvitabuild.bat b/src/psvitabuild.bat
index 3991dc65..2980e157 100644
--- a/src/psvitabuild.bat
+++ b/src/psvitabuild.bat
@@ -14,7 +14,7 @@
@set LJMT=mt /nologo
@set DASMDIR=..\dynasm
@set DASM=%DASMDIR%\dynasm.lua
-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
%LJCOMPILE% host\minilua.c
@if errorlevel 1 goto :BAD
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc
index 4a13c68b..770c1602 100644
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -99,6 +99,7 @@
|.type NODE, Node
|.type NARGS8, int
|.type TRACE, GCtrace
+|.type SBUF, SBuf
|
|//-----------------------------------------------------------------------
|
@@ -372,6 +373,17 @@ static void build_subroutines(BuildCtx *ctx)
| st_vmstate CARG2
| b ->vm_returnc
|
+ |->vm_unwind_ext: // Complete external unwind.
+#if !LJ_NO_UNWIND
+ | push {r0, r1, r2, lr}
+ | bl extern _Unwind_Complete
+ | ldr r0, [sp]
+ | bl extern _Unwind_DeleteException
+ | pop {r0, r1, r2, lr}
+ | mov r0, r1
+ | bx r2
+#endif
+ |
|//-----------------------------------------------------------------------
|//-- Grow stack for calls -----------------------------------------------
|//-----------------------------------------------------------------------
@@ -418,13 +430,14 @@ static void build_subroutines(BuildCtx *ctx)
| add CARG2, sp, #CFRAME_RESUME
| ldrb CARG1, L->status
| str CARG3, SAVE_ERRF
- | str CARG2, L->cframe
+ | str L, SAVE_PC // Any value outside of bytecode is ok.
| str CARG3, SAVE_CFRAME
| cmp CARG1, #0
- | str L, SAVE_PC // Any value outside of bytecode is ok.
+ | str CARG2, L->cframe
| beq >3
|
| // Resume after yield (like a return).
+ | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
| mov RA, BASE
| ldr BASE, L->base
| ldr CARG1, L->top
@@ -458,14 +471,15 @@ static void build_subroutines(BuildCtx *ctx)
| str CARG3, SAVE_NRES
| mov L, CARG1
| str CARG1, SAVE_L
- | mov BASE, CARG2
- | str sp, L->cframe // Add our C frame to cframe chain.
| ldr DISPATCH, L->glref // Setup pointer to dispatch table.
+ | mov BASE, CARG2
| str CARG1, SAVE_PC // Any value outside of bytecode is ok.
| str RC, SAVE_CFRAME
| add DISPATCH, DISPATCH, #GG_G2DISP
+ | str sp, L->cframe // Add our C frame to cframe chain.
|
|3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
+ | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
| ldr RB, L->base // RB = old base (for vmeta_call).
| ldr CARG1, L->top
| mov MASKR8, #255
@@ -491,20 +505,21 @@ static void build_subroutines(BuildCtx *ctx)
| mov L, CARG1
| ldr RA, L:CARG1->stack
| str CARG1, SAVE_L
+ | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
| ldr RB, L->top
| str CARG1, SAVE_PC // Any value outside of bytecode is ok.
| ldr RC, L->cframe
+ | add DISPATCH, DISPATCH, #GG_G2DISP
| sub RA, RA, RB // Compute -savestack(L, L->top).
- | str sp, L->cframe // Add our C frame to cframe chain.
| mov RB, #0
| str RA, SAVE_NRES // Neg. delta means cframe w/o frame.
| str RB, SAVE_ERRF // No error function.
| str RC, SAVE_CFRAME
+ | str sp, L->cframe // Add our C frame to cframe chain.
+ | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
| blx CARG4 // (lua_State *L, lua_CFunction func, void *ud)
- | ldr DISPATCH, L->glref // Setup pointer to dispatch table.
| movs BASE, CRET1
- | mov PC, #FRAME_CP
- | add DISPATCH, DISPATCH, #GG_G2DISP
+ | mov PC, #FRAME_CP
| bne <3 // Else continue with the call.
| b ->vm_leave_cp // No base? Just remove C frame.
|
@@ -614,6 +629,16 @@ static void build_subroutines(BuildCtx *ctx)
| ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
| b ->vm_call_dispatch_f
|
+ |->vmeta_tgetr:
+ | .IOS mov RC, BASE
+ | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
+ | // Returns cTValue * or NULL.
+ | .IOS mov BASE, RC
+ | cmp CRET1, #0
+ | ldrdne CARG12, [CRET1]
+ | mvneq CARG2, #~LJ_TNIL
+ | b ->BC_TGETR_Z
+ |
|//-----------------------------------------------------------------------
|
|->vmeta_tsets1:
@@ -671,6 +696,16 @@ static void build_subroutines(BuildCtx *ctx)
| ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
| b ->vm_call_dispatch_f
|
+ |->vmeta_tsetr:
+ | str BASE, L->base
+ | .IOS mov RC, BASE
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
+ | // Returns TValue *.
+ | .IOS mov BASE, RC
+ | b ->BC_TSETR_Z
+ |
|//-- Comparison metamethods ---------------------------------------------
|
|->vmeta_comp:
@@ -735,6 +770,17 @@ static void build_subroutines(BuildCtx *ctx)
| b <3
|.endif
|
+ |->vmeta_istype:
+ | sub PC, PC, #4
+ | str BASE, L->base
+ | mov CARG1, L
+ | lsr CARG2, RA, #3
+ | mov CARG3, RC
+ | str PC, SAVE_PC
+ | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
+ | .IOS ldr BASE, L->base
+ | b ->cont_nop
+ |
|//-- Arithmetic metamethods ---------------------------------------------
|
|->vmeta_arith_vn:
@@ -966,9 +1012,9 @@ static void build_subroutines(BuildCtx *ctx)
| cmp TAB:RB, #0
| beq ->fff_restv
| ldr CARG3, TAB:RB->hmask
- | ldr CARG4, STR:RC->hash
+ | ldr CARG4, STR:RC->sid
| ldr NODE:INS, TAB:RB->node
- | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask
+ | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask
| add CARG3, CARG3, CARG3, lsl #1
| add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8
|3: // Rearranged logic, because we expect _not_ to find the key.
@@ -1052,7 +1098,7 @@ static void build_subroutines(BuildCtx *ctx)
| ffgccheck
| mov CARG1, L
| mov CARG2, BASE
- | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o)
+ | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
| // Returns GCstr *.
| ldr BASE, L->base
| mvn CARG2, #~LJ_TSTR
@@ -1065,24 +1111,18 @@ static void build_subroutines(BuildCtx *ctx)
| checktab CARG2, ->fff_fallback
| strd CARG34, [BASE, NARGS8:RC] // Set missing 2nd arg to nil.
| ldr PC, [BASE, FRAME_PC]
- | mov CARG2, CARG1
- | str BASE, L->base // Add frame since C call can throw.
- | mov CARG1, L
- | str BASE, L->top // Dummy frame length is ok.
- | add CARG3, BASE, #8
- | str PC, SAVE_PC
- | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
- | // Returns 0 at end of traversal.
+ | add CARG2, BASE, #8
+ | sub CARG3, BASE, #8
+ | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
+ | // Returns 1=found, 0=end, -1=error.
| .IOS ldr BASE, L->base
| cmp CRET1, #0
- | mvneq CRET2, #~LJ_TNIL
- | beq ->fff_restv // End of traversal: return nil.
- | ldrd CARG12, [BASE, #8] // Copy key and value to results.
- | ldrd CARG34, [BASE, #16]
- | mov RC, #(2+1)*8
- | strd CARG12, [BASE, #-8]
- | strd CARG34, [BASE]
- | b ->fff_res
+ | mov RC, #(2+1)*8
+ | bgt ->fff_res // Found key/value.
+ | bmi ->fff_fallback // Invalid key.
+ | // End of traversal: return nil.
+ | mvn CRET2, #~LJ_TNIL
+ | b ->fff_restv
|
|.ffunc_1 pairs
| checktab CARG2, ->fff_fallback
@@ -1230,9 +1270,10 @@ static void build_subroutines(BuildCtx *ctx)
| ldr CARG3, L:RA->base
| mv_vmstate CARG2, INTERP
| ldr CARG4, L:RA->top
- | st_vmstate CARG2
| cmp CRET1, #LUA_YIELD
| ldr BASE, L->base
+ | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
+ | st_vmstate CARG2
| bhi >8
| subs RC, CARG4, CARG3
| ldr CARG1, L->maxstack
@@ -1500,19 +1541,6 @@ static void build_subroutines(BuildCtx *ctx)
| math_extern2 atan2
| math_extern2 fmod
|
- |->ff_math_deg:
- |.if FPU
- | .ffunc_d math_rad
- | vldr d1, CFUNC:CARG3->upvalue[0]
- | vmul.f64 d0, d0, d1
- | b ->fff_resd
- |.else
- | .ffunc_n math_rad
- | ldrd CARG34, CFUNC:CARG3->upvalue[0]
- | bl extern __aeabi_dmul
- | b ->fff_restv
- |.endif
- |
|.if HFABI
| .ffunc math_ldexp
| ldr CARG4, [BASE, #4]
@@ -1682,17 +1710,11 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
|.endmacro
|
- | math_minmax math_min, gt, hi
- | math_minmax math_max, lt, lo
+ | math_minmax math_min, gt, pl
+ | math_minmax math_max, lt, le
|
|//-- String library -----------------------------------------------------
|
- |.ffunc_1 string_len
- | checkstr CARG2, ->fff_fallback
- | ldr CARG1, STR:CARG1->len
- | mvn CARG2, #~LJ_TISNUM
- | b ->fff_restv
- |
|.ffunc string_byte // Only handle the 1-arg case here.
| ldrd CARG12, [BASE]
| ldr PC, [BASE, FRAME_PC]
@@ -1725,6 +1747,7 @@ static void build_subroutines(BuildCtx *ctx)
| mov CARG1, L
| str PC, SAVE_PC
| bl extern lj_str_new // (lua_State *L, char *str, size_t l)
+ |->fff_resstr:
| // Returns GCstr *.
| ldr BASE, L->base
| mvn CARG2, #~LJ_TSTR
@@ -1768,91 +1791,28 @@ static void build_subroutines(BuildCtx *ctx)
| mvn CARG2, #~LJ_TSTR
| b ->fff_restv
|
- |.ffunc string_rep // Only handle the 1-char case inline.
- | ffgccheck
- | ldrd CARG12, [BASE]
- | ldrd CARG34, [BASE, #8]
- | cmp NARGS8:RC, #16
- | bne ->fff_fallback // Exactly 2 arguments
- | checktp CARG2, LJ_TSTR
- | checktpeq CARG4, LJ_TISNUM
- | bne ->fff_fallback
- | subs CARG4, CARG3, #1
- | ldr CARG2, STR:CARG1->len
- | blt ->fff_emptystr // Count <= 0?
- | cmp CARG2, #1
- | blo ->fff_emptystr // Zero-length string?
- | bne ->fff_fallback // Fallback for > 1-char strings.
- | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
- | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
- | ldr CARG1, STR:CARG1[1]
- | cmp RB, CARG3
- | blo ->fff_fallback
- |1: // Fill buffer with char.
- | strb CARG1, [CARG2, CARG4]
- | subs CARG4, CARG4, #1
- | bge <1
- | b ->fff_newstr
- |
- |.ffunc string_reverse
- | ffgccheck
- | ldrd CARG12, [BASE]
- | cmp NARGS8:RC, #8
- | blo ->fff_fallback
- | checkstr CARG2, ->fff_fallback
- | ldr CARG3, STR:CARG1->len
- | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
- | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
- | mov CARG4, CARG3
- | add CARG1, STR:CARG1, #sizeof(GCstr)
- | cmp RB, CARG3
- | blo ->fff_fallback
- |1: // Reverse string copy.
- | ldrb RB, [CARG1], #1
- | subs CARG4, CARG4, #1
- | blt ->fff_newstr
- | strb RB, [CARG2, CARG4]
- | b <1
- |
- |.macro ffstring_case, name, lo
- | .ffunc name
+ |.macro ffstring_op, name
+ | .ffunc string_ .. name
| ffgccheck
- | ldrd CARG12, [BASE]
+ | ldr CARG3, [BASE, #4]
| cmp NARGS8:RC, #8
+ | ldr STR:CARG2, [BASE]
| blo ->fff_fallback
- | checkstr CARG2, ->fff_fallback
- | ldr CARG3, STR:CARG1->len
- | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)]
- | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)]
- | mov CARG4, #0
- | add CARG1, STR:CARG1, #sizeof(GCstr)
- | cmp RB, CARG3
- | blo ->fff_fallback
- |1: // ASCII case conversion.
- | ldrb RB, [CARG1, CARG4]
- | cmp CARG4, CARG3
- | bhs ->fff_newstr
- | sub RC, RB, #lo
- | cmp RC, #26
- | eorlo RB, RB, #0x20
- | strb RB, [CARG2, CARG4]
- | add CARG4, CARG4, #1
- | b <1
+ | sub SBUF:CARG1, DISPATCH, #-DISPATCH_GL(tmpbuf)
+ | checkstr CARG3, ->fff_fallback
+ | ldr CARG4, SBUF:CARG1->b
+ | str BASE, L->base
+ | str PC, SAVE_PC
+ | str L, SBUF:CARG1->L
+ | str CARG4, SBUF:CARG1->w
+ | bl extern lj_buf_putstr_ .. name
+ | bl extern lj_buf_tostr
+ | b ->fff_resstr
|.endmacro
|
- |ffstring_case string_lower, 65
- |ffstring_case string_upper, 97
- |
- |//-- Table library ------------------------------------------------------
- |
- |.ffunc_1 table_getn
- | checktab CARG2, ->fff_fallback
- | .IOS mov RA, BASE
- | bl extern lj_tab_len // (GCtab *t)
- | // Returns uint32_t (but less than 2^31).
- | .IOS mov BASE, RA
- | mvn CARG2, #~LJ_TISNUM
- | b ->fff_restv
+ |ffstring_op reverse
+ |ffstring_op lower
+ |ffstring_op upper
|
|//-- Bit library --------------------------------------------------------
|
@@ -2127,6 +2087,66 @@ static void build_subroutines(BuildCtx *ctx)
| ldr INS, [PC, #-4]
| bx CRET1
|
+ |->cont_stitch: // Trace stitching.
+ |.if JIT
+ | // RA = resultptr, CARG4 = meta base
+ | ldr RB, SAVE_MULTRES
+ | ldr INS, [PC, #-4]
+ | ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace.
+ | subs RB, RB, #8
+ | decode_RA8 RC, INS // Call base.
+ | beq >2
+ |1: // Move results down.
+ | ldrd CARG12, [RA]
+ | add RA, RA, #8
+ | subs RB, RB, #8
+ | strd CARG12, [BASE, RC]
+ | add RC, RC, #8
+ | bne <1
+ |2:
+ | decode_RA8 RA, INS
+ | decode_RB8 RB, INS
+ | add RA, RA, RB
+ |3:
+ | cmp RA, RC
+ | mvn CARG2, #~LJ_TNIL
+ | bhi >9 // More results wanted?
+ |
+ | ldrh RA, TRACE:CARG3->traceno
+ | ldrh RC, TRACE:CARG3->link
+ | cmp RC, RA
+ | beq ->cont_nop // Blacklisted.
+ | cmp RC, #0
+ | bne =>BC_JLOOP // Jump to stitched trace.
+ |
+ | // Stitch a new trace to the previous trace.
+ | str RA, [DISPATCH, #DISPATCH_J(exitno)]
+ | str L, [DISPATCH, #DISPATCH_J(L)]
+ | str BASE, L->base
+ | sub CARG1, DISPATCH, #-GG_DISP2J
+ | mov CARG2, PC
+ | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
+ | ldr BASE, L->base
+ | b ->cont_nop
+ |
+ |9: // Fill up results with nil.
+ | strd CARG12, [BASE, RC]
+ | add RC, RC, #8
+ | b <3
+ |.endif
+ |
+ |->vm_profhook: // Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+ | mov CARG1, L
+ | str BASE, L->base
+ | mov CARG2, PC
+ | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
+ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+ | ldr BASE, L->base
+ | sub PC, PC, #4
+ | b ->cont_nop
+#endif
+ |
|//-----------------------------------------------------------------------
|//-- Trace exit handler -------------------------------------------------
|//-----------------------------------------------------------------------
@@ -2151,14 +2171,14 @@ static void build_subroutines(BuildCtx *ctx)
| add CARG1, CARG1, CARG2, asr #6
| ldr CARG2, [lr, #4] // Load exit stub group offset.
| sub CARG1, CARG1, lr
- | ldr L, [DISPATCH, #DISPATCH_GL(jit_L)]
+ | ldr L, [DISPATCH, #DISPATCH_GL(cur_L)]
| add CARG1, CARG2, CARG1, lsr #2 // Compute exit number.
| ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
| str CARG1, [DISPATCH, #DISPATCH_J(exitno)]
| mov CARG4, #0
- | str L, [DISPATCH, #DISPATCH_J(L)]
| str BASE, L->base
- | str CARG4, [DISPATCH, #DISPATCH_GL(jit_L)]
+ | str L, [DISPATCH, #DISPATCH_J(L)]
+ | str CARG4, [DISPATCH, #DISPATCH_GL(jit_base)]
| sub CARG1, DISPATCH, #-GG_DISP2J
| mov CARG2, sp
| bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
@@ -2177,13 +2197,14 @@ static void build_subroutines(BuildCtx *ctx)
| ldr L, SAVE_L
|1:
| cmp CARG1, #0
- | blt >3 // Check for error from exit.
+ | blt >9 // Check for error from exit.
| lsl RC, CARG1, #3
| ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
| str RC, SAVE_MULTRES
| mov CARG3, #0
+ | str BASE, L->base
| ldr CARG2, LFUNC:CARG2->field_pc
- | str CARG3, [DISPATCH, #DISPATCH_GL(jit_L)]
+ | str CARG3, [DISPATCH, #DISPATCH_GL(jit_base)]
| mv_vmstate CARG4, INTERP
| ldr KBASE, [CARG2, #PC2PROTO(k)]
| // Modified copy of ins_next which handles function header dispatch, too.
@@ -2192,17 +2213,35 @@ static void build_subroutines(BuildCtx *ctx)
| ldr INS, [PC], #4
| lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8.
| st_vmstate CARG4
+ | cmp OP, #BC_FUNCC+2 // Fast function?
+ | bhs >4
+ |2:
| cmp OP, #BC_FUNCF // Function header?
| ldr OP, [DISPATCH, OP, lsl #2]
| decode_RA8 RA, INS
| lsrlo RC, INS, #16 // No: Decode operands A*8 and D.
| subhs RC, RC, #8
| addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8
+ | ldrhs CARG3, [BASE, FRAME_FUNC]
| bx OP
|
- |3: // Rethrow error from the right C frame.
+ |4: // Check frame below fast function.
+ | ldr CARG1, [BASE, FRAME_PC]
+ | ands CARG2, CARG1, #FRAME_TYPE
+ | bne <2 // Trace stitching continuation?
+ | // Otherwise set KBASE for Lua function below fast function.
+ | ldr CARG3, [CARG1, #-4]
+ | decode_RA8 CARG1, CARG3
+ | sub CARG2, BASE, CARG1
+ | ldr LFUNC:CARG3, [CARG2, #-16]
+ | ldr CARG3, LFUNC:CARG3->field_pc
+ | ldr KBASE, [CARG3, #PC2PROTO(k)]
+ | b <2
+ |
+ |9: // Rethrow error from the right C frame.
+ | rsb CARG2, CARG1, #0
| mov CARG1, L
- | bl extern lj_err_run // (lua_State *L)
+ | bl extern lj_err_trace // (lua_State *L, int errcode)
|.endif
|
|//-----------------------------------------------------------------------
@@ -2385,6 +2424,64 @@ static void build_subroutines(BuildCtx *ctx)
|//-- Miscellaneous functions --------------------------------------------
|//-----------------------------------------------------------------------
|
+ |.define NEXT_TAB, TAB:CARG1
+ |.define NEXT_RES, CARG1
+ |.define NEXT_IDX, CARG2
+ |.define NEXT_TMP0, CARG3
+ |.define NEXT_TMP1, CARG4
+ |.define NEXT_LIM, r12
+ |.define NEXT_RES_PTR, sp
+ |.define NEXT_RES_VAL, [sp]
+ |.define NEXT_RES_KEY_I, [sp, #8]
+ |.define NEXT_RES_KEY_IT, [sp, #12]
+ |
+ |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
+ |// Next idx returned in CRET2.
+ |->vm_next:
+ |.if JIT
+ | ldr NEXT_TMP0, NEXT_TAB->array
+ | ldr NEXT_LIM, NEXT_TAB->asize
+ | add NEXT_TMP0, NEXT_TMP0, NEXT_IDX, lsl #3
+ |1: // Traverse array part.
+ | subs NEXT_TMP1, NEXT_IDX, NEXT_LIM
+ | bhs >5
+ | ldr NEXT_TMP1, [NEXT_TMP0, #4]
+ | str NEXT_IDX, NEXT_RES_KEY_I
+ | add NEXT_TMP0, NEXT_TMP0, #8
+ | add NEXT_IDX, NEXT_IDX, #1
+ | checktp NEXT_TMP1, LJ_TNIL
+ | beq <1 // Skip holes in array part.
+ | ldr NEXT_TMP0, [NEXT_TMP0, #-8]
+ | mov NEXT_RES, NEXT_RES_PTR
+ | strd NEXT_TMP0, NEXT_RES_VAL // Stores NEXT_TMP1, too.
+ | mvn NEXT_TMP0, #~LJ_TISNUM
+ | str NEXT_TMP0, NEXT_RES_KEY_IT
+ | bx lr
+ |
+ |5: // Traverse hash part.
+ | ldr NEXT_TMP0, NEXT_TAB->hmask
+ | ldr NODE:NEXT_RES, NEXT_TAB->node
+ | add NEXT_TMP1, NEXT_TMP1, NEXT_TMP1, lsl #1
+ | add NEXT_LIM, NEXT_LIM, NEXT_TMP0
+ | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP1, lsl #3
+ |6:
+ | cmp NEXT_IDX, NEXT_LIM
+ | bhi >9
+ | ldr NEXT_TMP1, NODE:NEXT_RES->val.it
+ | checktp NEXT_TMP1, LJ_TNIL
+ | add NEXT_IDX, NEXT_IDX, #1
+ | bxne lr
+ | // Skip holes in hash part.
+ | add NEXT_RES, NEXT_RES, #sizeof(Node)
+ | b <6
+ |
+ |9: // End of iteration. Set the key to nil (not the value).
+ | mvn NEXT_TMP0, #0
+ | mov NEXT_RES, NEXT_RES_PTR
+ | str NEXT_TMP0, NEXT_RES_KEY_IT
+ | bx lr
+ |.endif
+ |
|//-----------------------------------------------------------------------
|//-- FFI helper functions -----------------------------------------------
|//-----------------------------------------------------------------------
@@ -2832,6 +2929,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_next
break;
+ case BC_ISTYPE:
+ | // RA = src*8, RC = -type
+ | ldrd CARG12, [BASE, RA]
+ | ins_next1
+ | cmn CARG2, RC
+ | ins_next2
+ | bne ->vmeta_istype
+ | ins_next3
+ break;
+ case BC_ISNUM:
+ | // RA = src*8, RC = -(TISNUM-1)
+ | ldrd CARG12, [BASE, RA]
+ | ins_next1
+ | checktp CARG2, LJ_TISNUM
+ | ins_next2
+ | bhs ->vmeta_istype
+ | ins_next3
+ break;
+
/* -- Unary ops --------------------------------------------------------- */
case BC_MOV:
@@ -3436,10 +3552,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|->BC_TGETS_Z:
| // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8
| ldr CARG3, TAB:CARG1->hmask
- | ldr CARG4, STR:RC->hash
+ | ldr CARG4, STR:RC->sid
| ldr NODE:INS, TAB:CARG1->node
| mov TAB:RB, TAB:CARG1
- | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask
+ | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask
| add CARG3, CARG3, CARG3, lsl #1
| add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8
|1:
@@ -3502,6 +3618,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| bne <1 // 'no __index' flag set: done.
| b ->vmeta_tgetb
break;
+ case BC_TGETR:
+ | decode_RB8 RB, INS
+ | decode_RC8 RC, INS
+ | // RA = dst*8, RB = table*8, RC = key*8
+ | ldr TAB:CARG1, [BASE, RB]
+ | ldr CARG2, [BASE, RC]
+ | ldr CARG4, TAB:CARG1->array
+ | ldr CARG3, TAB:CARG1->asize
+ | add CARG4, CARG4, CARG2, lsl #3
+ | cmp CARG2, CARG3 // In array part?
+ | bhs ->vmeta_tgetr
+ | ldrd CARG12, [CARG4]
+ |->BC_TGETR_Z:
+ | ins_next1
+ | ins_next2
+ | strd CARG12, [BASE, RA]
+ | ins_next3
+ break;
case BC_TSETV:
| decode_RB8 RB, INS
@@ -3565,10 +3699,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|->BC_TSETS_Z:
| // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8
| ldr CARG3, TAB:CARG1->hmask
- | ldr CARG4, STR:RC->hash
+ | ldr CARG4, STR:RC->sid
| ldr NODE:INS, TAB:CARG1->node
| mov TAB:RB, TAB:CARG1
- | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask
+ | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask
| add CARG3, CARG3, CARG3, lsl #1
| mov CARG4, #0
| add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8
@@ -3672,6 +3806,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| barrierback TAB:CARG1, INS, CARG3
| b <2
break;
+ case BC_TSETR:
+ | decode_RB8 RB, INS
+ | decode_RC8 RC, INS
+ | // RA = src*8, RB = table*8, RC = key*8
+ | ldr TAB:CARG2, [BASE, RB]
+ | ldr CARG3, [BASE, RC]
+ | ldrb INS, TAB:CARG2->marked
+ | ldr CARG1, TAB:CARG2->array
+ | ldr CARG4, TAB:CARG2->asize
+ | tst INS, #LJ_GC_BLACK // isblack(table)
+ | add CARG1, CARG1, CARG3, lsl #3
+ | bne >7
+ |2:
+ | cmp CARG3, CARG4 // In array part?
+ | bhs ->vmeta_tsetr
+ |->BC_TSETR_Z:
+ | ldrd CARG34, [BASE, RA]
+ | ins_next1
+ | ins_next2
+ | strd CARG34, [CARG1]
+ | ins_next3
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:CARG2, INS, RB
+ | b <2
+ break;
case BC_TSETM:
| // RA = base*8 (table at base-1), RC = num_const (start index)
@@ -3812,10 +3972,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_ITERN:
- | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1))
|.if JIT
- | // NYI: add hotloop, record BC_ITERN.
+ | hotloop
|.endif
+ |->vm_IITERN:
+ | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1))
| add RA, BASE, RA
| ldr TAB:RB, [RA, #-16]
| ldr CARG1, [RA, #-8] // Get index from control var.
@@ -3881,7 +4042,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_next1
| ins_next2
| mov CARG1, #0
- | mvn CARG2, #0x00018000
+ | mvn CARG2, #~LJ_KEYINDEX
| strd CARG1, [RA, #-8] // Initialize control var.
|1:
| ins_next3
@@ -3890,9 +4051,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| mov OP, #BC_ITERC
| strb CARG1, [PC, #-4]
| sub PC, RC, #0x20000
+ |.if JIT
+ | ldrb CARG1, [PC]
+ | cmp CARG1, #BC_ITERN
+ | bne >6
+ |.endif
| strb OP, [PC] // Subsumes ins_next1.
| ins_next2
| b <1
+ |.if JIT
+ |6: // Unpatch JLOOP.
+ | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)]
+ | ldrh CARG2, [PC, #2]
+ | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2]
+ | // Subsumes ins_next1 and ins_next2.
+ | ldr INS, TRACE:CARG1->startins
+ | bfi INS, OP, #0, #8
+ | str INS, [PC], #4
+ | b <1
+ |.endif
break;
case BC_VARG:
@@ -4269,7 +4446,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| st_vmstate CARG2
| ldr RA, TRACE:RC->mcode
| str BASE, [DISPATCH, #DISPATCH_GL(jit_base)]
- | str L, [DISPATCH, #DISPATCH_GL(jit_L)]
+ | str L, [DISPATCH, #DISPATCH_GL(tmpbuf.L)]
| bx RA
|.endif
break;
@@ -4387,6 +4564,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ldr BASE, L->base
| mv_vmstate CARG3, INTERP
| ldr CRET2, L->top
+ | str L, [DISPATCH, #DISPATCH_GL(cur_L)]
| lsl RC, CRET1, #3
| st_vmstate CARG3
| ldr PC, [BASE, FRAME_PC]
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc
new file mode 100644
index 00000000..3448d0d2
--- /dev/null
+++ b/src/vm_arm64.dasc
@@ -0,0 +1,4158 @@
+|// Low-level VM code for ARM64 CPUs.
+|// Bytecode interpreter, fast functions and helper functions.
+|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+|
+|.arch arm64
+|.section code_op, code_sub
+|
+|.actionlist build_actionlist
+|.globals GLOB_
+|.globalnames globnames
+|.externnames extnames
+|
+|// Note: The ragged indentation of the instructions is intentional.
+|// The starting columns indicate data dependencies.
+|
+|//-----------------------------------------------------------------------
+|
+|// ARM64 registers and the AAPCS64 ABI 1.0 at a glance:
+|//
+|// x0-x17 temp, x19-x28 callee-saved, x29 fp, x30 lr
+|// x18 is reserved on most platforms. Don't use it, save it or restore it.
+|// x31 doesn't exist. Register number 31 either means xzr/wzr (zero) or sp,
+|// depending on the instruction.
+|// v0-v7 temp, v8-v15 callee-saved (only d8-d15 preserved), v16-v31 temp
+|//
+|// x0-x7/v0-v7 hold parameters and results.
+|
+|// Fixed register assignments for the interpreter.
+|
+|// The following must be C callee-save.
+|.define BASE, x19 // Base of current Lua stack frame.
+|.define KBASE, x20 // Constants of current Lua function.
+|.define PC, x21 // Next PC.
+|.define GLREG, x22 // Global state.
+|.define LREG, x23 // Register holding lua_State (also in SAVE_L).
+|.define TISNUM, x24 // Constant LJ_TISNUM << 47.
+|.define TISNUMhi, x25 // Constant LJ_TISNUM << 15.
+|.define TISNIL, x26 // Constant -1LL.
+|.define fp, x29 // Yes, we have to maintain a frame pointer.
+|
+|.define ST_INTERP, w26 // Constant -1.
+|
+|// The following temporaries are not saved across C calls, except for RA/RC.
+|.define RA, x27
+|.define RC, x28
+|.define RB, x17
+|.define RAw, w27
+|.define RCw, w28
+|.define RBw, w17
+|.define INS, x16
+|.define INSw, w16
+|.define ITYPE, x15
+|.define TMP0, x8
+|.define TMP1, x9
+|.define TMP2, x10
+|.define TMP3, x11
+|.define TMP0w, w8
+|.define TMP1w, w9
+|.define TMP2w, w10
+|.define TMP3w, w11
+|
+|// Calling conventions. Also used as temporaries.
+|.define CARG1, x0
+|.define CARG2, x1
+|.define CARG3, x2
+|.define CARG4, x3
+|.define CARG5, x4
+|.define CARG1w, w0
+|.define CARG2w, w1
+|.define CARG3w, w2
+|.define CARG4w, w3
+|.define CARG5w, w4
+|
+|.define FARG1, d0
+|.define FARG2, d1
+|
+|.define CRET1, x0
+|.define CRET1w, w0
+|
+|// Stack layout while in interpreter. Must match with lj_frame.h.
+|
+|.define CFRAME_SPACE, 208
+|//----- 16 byte aligned, <-- sp entering interpreter
+|.define SAVE_FP_LR_, 192
+|.define SAVE_GPR_, 112 // 112+10*8: 64 bit GPR saves
+|.define SAVE_FPR_, 48 // 48+8*8: 64 bit FPR saves
+|// Unused [sp, #44] // 32 bit values
+|.define SAVE_NRES, [sp, #40]
+|.define SAVE_ERRF, [sp, #36]
+|.define SAVE_MULTRES, [sp, #32]
+|.define TMPD, [sp, #24] // 64 bit values
+|.define SAVE_L, [sp, #16]
+|.define SAVE_PC, [sp, #8]
+|.define SAVE_CFRAME, [sp, #0]
+|//----- 16 byte aligned, <-- sp while in interpreter.
+|
+|.define TMPDofs, #24
+|
+|.macro save_, gpr1, gpr2, fpr1, fpr2
+| stp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
+| stp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
+|.endmacro
+|.macro rest_, gpr1, gpr2, fpr1, fpr2
+| ldp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8]
+| ldp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8]
+|.endmacro
+|
+|.macro saveregs
+| sub sp, sp, # CFRAME_SPACE
+| stp fp, lr, [sp, # SAVE_FP_LR_]
+| add fp, sp, # SAVE_FP_LR_
+| stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
+| save_ 21, 22, 8, 9
+| save_ 23, 24, 10, 11
+| save_ 25, 26, 12, 13
+| save_ 27, 28, 14, 15
+|.endmacro
+|.macro restoreregs
+| ldp x20, x19, [sp, # SAVE_GPR_+(27-19)*8]
+| rest_ 21, 22, 8, 9
+| rest_ 23, 24, 10, 11
+| rest_ 25, 26, 12, 13
+| rest_ 27, 28, 14, 15
+| ldp fp, lr, [sp, # SAVE_FP_LR_]
+| add sp, sp, # CFRAME_SPACE
+|.endmacro
+|
+|// Type definitions. Some of these are only used for documentation.
+|.type L, lua_State, LREG
+|.type GL, global_State, GLREG
+|.type TVALUE, TValue
+|.type GCOBJ, GCobj
+|.type STR, GCstr
+|.type TAB, GCtab
+|.type LFUNC, GCfuncL
+|.type CFUNC, GCfuncC
+|.type PROTO, GCproto
+|.type UPVAL, GCupval
+|.type NODE, Node
+|.type NARGS8, int
+|.type TRACE, GCtrace
+|.type SBUF, SBuf
+|
+|//-----------------------------------------------------------------------
+|
+|// Trap for not-yet-implemented parts.
+|.macro NYI; brk; .endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|// Access to frame relative to BASE.
+|.define FRAME_FUNC, #-16
+|.define FRAME_PC, #-8
+|
+|// Endian-specific defines.
+|.if ENDIAN_LE
+|.define LO, 0
+|.define OFS_RD, 2
+|.define OFS_RB, 3
+|.define OFS_RA, 1
+|.define OFS_OP, 0
+|.else
+|.define LO, 4
+|.define OFS_RD, 0
+|.define OFS_RB, 0
+|.define OFS_RA, 2
+|.define OFS_OP, 3
+|.endif
+|
+|.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro
+|.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro
+|.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro
+|.macro decode_RD, dst, ins; ubfx dst, ins, #16, #16; .endmacro
+|.macro decode_RC8RD, dst, src; ubfiz dst, src, #3, #8; .endmacro
+|
+|// Instruction decode+dispatch.
+|.macro ins_NEXT
+| ldr INSw, [PC], #4
+| add TMP1, GL, INS, uxtb #3
+| decode_RA RA, INS
+| ldr TMP0, [TMP1, #GG_G2DISP]
+| decode_RD RC, INS
+| br TMP0
+|.endmacro
+|
+|// Instruction footer.
+|.if 1
+| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
+| .define ins_next, ins_NEXT
+| .define ins_next_, ins_NEXT
+|.else
+| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
+| // Affects only certain kinds of benchmarks (and only with -j off).
+| .macro ins_next
+| b ->ins_next
+| .endmacro
+| .macro ins_next_
+| ->ins_next:
+| ins_NEXT
+| .endmacro
+|.endif
+|
+|// Call decode and dispatch.
+|.macro ins_callt
+| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
+| ldr PC, LFUNC:CARG3->pc
+| ldr INSw, [PC], #4
+| add TMP1, GL, INS, uxtb #3
+| decode_RA RA, INS
+| ldr TMP0, [TMP1, #GG_G2DISP]
+| add RA, BASE, RA, lsl #3
+| br TMP0
+|.endmacro
+|
+|.macro ins_call
+| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
+| str PC, [BASE, FRAME_PC]
+| ins_callt
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|// Macros to check the TValue type and extract the GCobj. Branch on failure.
+|.macro checktp, reg, tp, target
+| asr ITYPE, reg, #47
+| cmn ITYPE, #-tp
+| and reg, reg, #LJ_GCVMASK
+| bne target
+|.endmacro
+|.macro checktp, dst, reg, tp, target
+| asr ITYPE, reg, #47
+| cmn ITYPE, #-tp
+| and dst, reg, #LJ_GCVMASK
+| bne target
+|.endmacro
+|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
+|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
+|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
+|.macro checkint, reg, target
+| cmp TISNUMhi, reg, lsr #32
+| bne target
+|.endmacro
+|.macro checknum, reg, target
+| cmp TISNUMhi, reg, lsr #32
+| bls target
+|.endmacro
+|.macro checknumber, reg, target
+| cmp TISNUMhi, reg, lsr #32
+| blo target
+|.endmacro
+|
+|.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro
+|.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro
+|
+#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field))
+|
+#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
+|
+|.macro hotcheck, delta
+| lsr CARG1, PC, #1
+| and CARG1, CARG1, #126
+| add CARG1, CARG1, #GG_G2DISP+GG_DISP2HOT
+| ldrh CARG2w, [GL, CARG1]
+| subs CARG2, CARG2, #delta
+| strh CARG2w, [GL, CARG1]
+|.endmacro
+|
+|.macro hotloop
+| hotcheck HOTCOUNT_LOOP
+| blo ->vm_hotloop
+|.endmacro
+|
+|.macro hotcall
+| hotcheck HOTCOUNT_CALL
+| blo ->vm_hotcall
+|.endmacro
+|
+|// Set current VM state.
+|.macro mv_vmstate, reg, st; movn reg, #LJ_VMST_..st; .endmacro
+|.macro st_vmstate, reg; str reg, GL->vmstate; .endmacro
+|
+|// Move table write barrier back. Overwrites mark and tmp.
+|.macro barrierback, tab, mark, tmp
+| ldr tmp, GL->gc.grayagain
+| and mark, mark, #~LJ_GC_BLACK // black2gray(tab)
+| str tab, GL->gc.grayagain
+| strb mark, tab->marked
+| str tmp, tab->gclist
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+
+#if !LJ_DUALNUM
+#error "Only dual-number mode supported for ARM64 target"
+#endif
+
+/* Generate subroutines used by opcodes and other parts of the VM. */
+/* The .code_sub section should be last to help static branch prediction. */
+static void build_subroutines(BuildCtx *ctx)
+{
+ |.code_sub
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Return handling ----------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_returnp:
+ | // See vm_return. Also: RB = previous base.
+ | tbz PC, #2, ->cont_dispatch // (PC & FRAME_P) == 0?
+ |
+ | // Return from pcall or xpcall fast func.
+ | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame.
+ | mov_true TMP0
+ | mov BASE, RB
+ | // Prepending may overwrite the pcall frame, so do it at the end.
+ | str TMP0, [RA, #-8]! // Prepend true to results.
+ |
+ |->vm_returnc:
+ | adds RC, RC, #8 // RC = (nresults+1)*8.
+ | mov CRET1, #LUA_YIELD
+ | beq ->vm_unwind_c_eh
+ | str RCw, SAVE_MULTRES
+ | ands CARG1, PC, #FRAME_TYPE
+ | beq ->BC_RET_Z // Handle regular return to Lua.
+ |
+ |->vm_return:
+ | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return
+ | // CARG1 = PC & FRAME_TYPE
+ | and RB, PC, #~FRAME_TYPEP
+ | cmp CARG1, #FRAME_C
+ | sub RB, BASE, RB // RB = previous base.
+ | bne ->vm_returnp
+ |
+ | str RB, L->base
+ | ldrsw CARG2, SAVE_NRES // CARG2 = nresults+1.
+ | mv_vmstate TMP0w, C
+ | sub BASE, BASE, #16
+ | subs TMP2, RC, #8
+ | st_vmstate TMP0w
+ | beq >2
+ |1:
+ | subs TMP2, TMP2, #8
+ | ldr TMP0, [RA], #8
+ | str TMP0, [BASE], #8
+ | bne <1
+ |2:
+ | cmp RC, CARG2, lsl #3 // More/less results wanted?
+ | bne >6
+ |3:
+ | str BASE, L->top // Store new top.
+ |
+ |->vm_leave_cp:
+ | ldr RC, SAVE_CFRAME // Restore previous C frame.
+ | mov CRET1, #0 // Ok return status for vm_pcall.
+ | str RC, L->cframe
+ |
+ |->vm_leave_unw:
+ | restoreregs
+ | ret
+ |
+ |6:
+ | bgt >7 // Less results wanted?
+ | // More results wanted. Check stack size and fill up results with nil.
+ | ldr CARG3, L->maxstack
+ | cmp BASE, CARG3
+ | bhs >8
+ | str TISNIL, [BASE], #8
+ | add RC, RC, #8
+ | b <2
+ |
+ |7: // Less results wanted.
+ | cbz CARG2, <3 // LUA_MULTRET+1 case?
+ | sub CARG1, RC, CARG2, lsl #3
+ | sub BASE, BASE, CARG1 // Shrink top.
+ | b <3
+ |
+ |8: // Corner case: need to grow stack for filling up results.
+ | // This can happen if:
+ | // - A C function grows the stack (a lot).
+ | // - The GC shrinks the stack in between.
+ | // - A return back from a lua_call() with (high) nresults adjustment.
+ | str BASE, L->top // Save current top held in BASE (yes).
+ | mov CARG1, L
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | ldr BASE, L->top // Need the (realloced) L->top in BASE.
+ | ldrsw CARG2, SAVE_NRES
+ | b <2
+ |
+ |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
+ | // (void *cframe, int errcode)
+ | mov sp, CARG1
+ | mov CRET1, CARG2
+ |->vm_unwind_c_eh: // Landing pad for external unwinder.
+ | ldr L, SAVE_L
+ | mv_vmstate TMP0w, C
+ | ldr GL, L->glref
+ | st_vmstate TMP0w
+ | b ->vm_leave_unw
+ |
+ |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
+ | // (void *cframe)
+ | and sp, CARG1, #CFRAME_RAWMASK
+ |->vm_unwind_ff_eh: // Landing pad for external unwinder.
+ | ldr L, SAVE_L
+ | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
+ | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+ | movn TISNIL, #0
+ | mov RC, #16 // 2 results: false + error message.
+ | ldr BASE, L->base
+ | ldr GL, L->glref // Setup pointer to global state.
+ | mov_false TMP0
+ | sub RA, BASE, #8 // Results start at BASE-8.
+ | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame.
+ | str TMP0, [BASE, #-8] // Prepend false to error message.
+ | st_vmstate ST_INTERP
+ | b ->vm_returnc
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Grow stack for calls -----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_growstack_c: // Grow stack for C function.
+ | // CARG1 = L
+ | mov CARG2, #LUA_MINSTACK
+ | b >2
+ |
+ |->vm_growstack_l: // Grow stack for Lua function.
+ | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
+ | add RC, BASE, RC
+ | sub RA, RA, BASE
+ | mov CARG1, L
+ | stp BASE, RC, L->base
+ | add PC, PC, #4 // Must point after first instruction.
+ | lsr CARG2, RA, #3
+ |2:
+ | // L->base = new base, L->top = top
+ | str PC, SAVE_PC
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | ldp BASE, RC, L->base
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+ | sub NARGS8:RC, RC, BASE
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
+ | ins_callt // Just retry the call.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Entry points into the assembler VM ---------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_resume: // Setup C frame and resume thread.
+ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
+ | saveregs
+ | mov L, CARG1
+ | ldr GL, L->glref // Setup pointer to global state.
+ | mov BASE, CARG2
+ | str L, SAVE_L
+ | mov PC, #FRAME_CP
+ | str wzr, SAVE_NRES
+ | add TMP0, sp, #CFRAME_RESUME
+ | ldrb TMP1w, L->status
+ | str wzr, SAVE_ERRF
+ | str L, SAVE_PC // Any value outside of bytecode is ok.
+ | str xzr, SAVE_CFRAME
+ | str TMP0, L->cframe
+ | cbz TMP1w, >3
+ |
+ | // Resume after yield (like a return).
+ | str L, GL->cur_L
+ | mov RA, BASE
+ | ldp BASE, CARG1, L->base
+ | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
+ | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+ | ldr PC, [BASE, FRAME_PC]
+ | strb wzr, L->status
+ | movn TISNIL, #0
+ | sub RC, CARG1, BASE
+ | ands CARG1, PC, #FRAME_TYPE
+ | add RC, RC, #8
+ | st_vmstate ST_INTERP
+ | str RCw, SAVE_MULTRES
+ | beq ->BC_RET_Z
+ | b ->vm_return
+ |
+ |->vm_pcall: // Setup protected C frame and enter VM.
+ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
+ | saveregs
+ | mov PC, #FRAME_CP
+ | str CARG4w, SAVE_ERRF
+ | b >1
+ |
+ |->vm_call: // Setup C frame and enter VM.
+ | // (lua_State *L, TValue *base, int nres1)
+ | saveregs
+ | mov PC, #FRAME_C
+ |
+ |1: // Entry point for vm_pcall above (PC = ftype).
+ | ldr RC, L:CARG1->cframe
+ | str CARG3w, SAVE_NRES
+ | mov L, CARG1
+ | str CARG1, SAVE_L
+ | ldr GL, L->glref // Setup pointer to global state.
+ | mov BASE, CARG2
+ | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
+ | add TMP0, sp, #0
+ | str RC, SAVE_CFRAME
+ | str TMP0, L->cframe // Add our C frame to cframe chain.
+ |
+ |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
+ | str L, GL->cur_L
+ | ldp RB, CARG1, L->base // RB = old base (for vmeta_call).
+ | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
+ | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+ | add PC, PC, BASE
+ | movn TISNIL, #0
+ | sub PC, PC, RB // PC = frame delta + frame type
+ | sub NARGS8:RC, CARG1, BASE
+ | st_vmstate ST_INTERP
+ |
+ |->vm_call_dispatch:
+ | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC
+ | ldr CARG3, [BASE, FRAME_FUNC]
+ | checkfunc CARG3, ->vmeta_call
+ |
+ |->vm_call_dispatch_f:
+ | ins_call
+ | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC
+ |
+ |->vm_cpcall: // Setup protected C frame, call C.
+ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
+ | saveregs
+ | mov L, CARG1
+ | ldr RA, L:CARG1->stack
+ | str CARG1, SAVE_L
+ | ldr GL, L->glref // Setup pointer to global state.
+ | ldr RB, L->top
+ | str CARG1, SAVE_PC // Any value outside of bytecode is ok.
+ | ldr RC, L->cframe
+ | sub RA, RA, RB // Compute -savestack(L, L->top).
+ | str RAw, SAVE_NRES // Neg. delta means cframe w/o frame.
+ | str wzr, SAVE_ERRF // No error function.
+ | add TMP0, sp, #0
+ | str RC, SAVE_CFRAME
+ | str TMP0, L->cframe // Add our C frame to cframe chain.
+ | str L, GL->cur_L
+ | blr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
+ | mov BASE, CRET1
+ | mov PC, #FRAME_CP
+ | cbnz BASE, <3 // Else continue with the call.
+ | b ->vm_leave_cp // No base? Just remove C frame.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Metamethod handling ------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |//-- Continuation dispatch ----------------------------------------------
+ |
+ |->cont_dispatch:
+ | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8
+ | ldr LFUNC:CARG3, [RB, FRAME_FUNC]
+ | ldr CARG1, [BASE, #-32] // Get continuation.
+ | mov CARG4, BASE
+ | mov BASE, RB // Restore caller BASE.
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ |.if FFI
+ | cmp CARG1, #1
+ |.endif
+ | ldr PC, [CARG4, #-24] // Restore PC from [cont|PC].
+ | add TMP0, RA, RC
+ | str TISNIL, [TMP0, #-8] // Ensure one valid arg.
+ |.if FFI
+ | bls >1
+ |.endif
+ | ldr CARG3, LFUNC:CARG3->pc
+ | ldr KBASE, [CARG3, #PC2PROTO(k)]
+ | // BASE = base, RA = resultptr, CARG4 = meta base
+ | br CARG1
+ |
+ |.if FFI
+ |1:
+ | beq ->cont_ffi_callback // cont = 1: return from FFI callback.
+ | // cont = 0: tailcall from C function.
+ | sub CARG4, CARG4, #32
+ | sub RC, CARG4, BASE
+ | b ->vm_call_tail
+ |.endif
+ |
+ |->cont_cat: // RA = resultptr, CARG4 = meta base
+ | ldr INSw, [PC, #-4]
+ | sub CARG2, CARG4, #32
+ | ldr TMP0, [RA]
+ | str BASE, L->base
+ | decode_RB RB, INS
+ | decode_RA RA, INS
+ | add TMP1, BASE, RB, lsl #3
+ | subs TMP1, CARG2, TMP1
+ | beq >1
+ | str TMP0, [CARG2]
+ | lsr CARG3, TMP1, #3
+ | b ->BC_CAT_Z
+ |
+ |1:
+ | str TMP0, [BASE, RA, lsl #3]
+ | b ->cont_nop
+ |
+ |//-- Table indexing metamethods -----------------------------------------
+ |
+ |->vmeta_tgets1:
+ | movn CARG4, #~LJ_TSTR
+ | add CARG2, BASE, RB, lsl #3
+ | add CARG4, STR:RC, CARG4, lsl #47
+ | b >2
+ |
+ |->vmeta_tgets:
+ | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48
+ | str CARG2, GL->tmptv
+ | add CARG2, GL, #offsetof(global_State, tmptv)
+ |2:
+ | add CARG3, sp, TMPDofs
+ | str CARG4, TMPD
+ | b >1
+ |
+ |->vmeta_tgetb: // RB = table, RC = index
+ | add RC, RC, TISNUM
+ | add CARG2, BASE, RB, lsl #3
+ | add CARG3, sp, TMPDofs
+ | str RC, TMPD
+ | b >1
+ |
+ |->vmeta_tgetv: // RB = table, RC = key
+ | add CARG2, BASE, RB, lsl #3
+ | add CARG3, BASE, RC, lsl #3
+ |1:
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
+ | // Returns TValue * (finished) or NULL (metamethod).
+ | cbz CRET1, >3
+ | ldr TMP0, [CRET1]
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ |
+ |3: // Call __index metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k
+ | sub TMP1, BASE, #FRAME_CONT
+ | ldr BASE, L->top
+ | mov NARGS8:RC, #16 // 2 args for func(t, k).
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
+ | str PC, [BASE, #-24] // [cont|PC]
+ | sub PC, BASE, TMP1
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | b ->vm_call_dispatch_f
+ |
+ |->vmeta_tgetr:
+ | sxtw CARG2, TMP1w
+ | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
+ | // Returns cTValue * or NULL.
+ | mov TMP0, TISNIL
+ | cbz CRET1, ->BC_TGETR_Z
+ | ldr TMP0, [CRET1]
+ | b ->BC_TGETR_Z
+ |
+ |//-----------------------------------------------------------------------
+ |
+ |->vmeta_tsets1:
+ | movn CARG4, #~LJ_TSTR
+ | add CARG2, BASE, RB, lsl #3
+ | add CARG4, STR:RC, CARG4, lsl #47
+ | b >2
+ |
+ |->vmeta_tsets:
+ | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48
+ | str CARG2, GL->tmptv
+ | add CARG2, GL, #offsetof(global_State, tmptv)
+ |2:
+ | add CARG3, sp, TMPDofs
+ | str CARG4, TMPD
+ | b >1
+ |
+ |->vmeta_tsetb: // RB = table, RC = index
+ | add RC, RC, TISNUM
+ | add CARG2, BASE, RB, lsl #3
+ | add CARG3, sp, TMPDofs
+ | str RC, TMPD
+ | b >1
+ |
+ |->vmeta_tsetv:
+ | add CARG2, BASE, RB, lsl #3
+ | add CARG3, BASE, RC, lsl #3
+ |1:
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
+ | // Returns TValue * (finished) or NULL (metamethod).
+ | ldr TMP0, [BASE, RA, lsl #3]
+ | cbz CRET1, >3
+ | // NOBARRIER: lj_meta_tset ensures the table is not black.
+ | str TMP0, [CRET1]
+ | ins_next
+ |
+ |3: // Call __newindex metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
+ | sub TMP1, BASE, #FRAME_CONT
+ | ldr BASE, L->top
+ | mov NARGS8:RC, #24 // 3 args for func(t, k, v).
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
+ | str TMP0, [BASE, #16] // Copy value to third argument.
+ | str PC, [BASE, #-24] // [cont|PC]
+ | sub PC, BASE, TMP1
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | b ->vm_call_dispatch_f
+ |
+ |->vmeta_tsetr:
+ | sxtw CARG3, TMP1w
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
+ | // Returns TValue *.
+ | b ->BC_TSETR_Z
+ |
+ |//-- Comparison metamethods ---------------------------------------------
+ |
+ |->vmeta_comp:
+ | add CARG2, BASE, RA, lsl #3
+ | sub PC, PC, #4
+ | add CARG3, BASE, RC, lsl #3
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | uxtb CARG4w, INSw
+ | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
+ | // Returns 0/1 or TValue * (metamethod).
+ |3:
+ | cmp CRET1, #1
+ | bhi ->vmeta_binop
+ |4:
+ | ldrh RBw, [PC, # OFS_RD]
+ | add PC, PC, #4
+ | add RB, PC, RB, lsl #2
+ | sub RB, RB, #0x20000
+ | csel PC, PC, RB, lo
+ |->cont_nop:
+ | ins_next
+ |
+ |->cont_ra: // RA = resultptr
+ | ldr INSw, [PC, #-4]
+ | ldr TMP0, [RA]
+ | decode_RA TMP1, INS
+ | str TMP0, [BASE, TMP1, lsl #3]
+ | b ->cont_nop
+ |
+ |->cont_condt: // RA = resultptr
+ | ldr TMP0, [RA]
+ | mov_true TMP1
+ | cmp TMP1, TMP0 // Branch if result is true.
+ | b <4
+ |
+ |->cont_condf: // RA = resultptr
+ | ldr TMP0, [RA]
+ | mov_false TMP1
+ | cmp TMP0, TMP1 // Branch if result is false.
+ | b <4
+ |
+ |->vmeta_equal:
+ | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
+ | and TAB:CARG3, CARG3, #LJ_GCVMASK
+ | sub PC, PC, #4
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
+ | // Returns 0/1 or TValue * (metamethod).
+ | b <3
+ |
+ |->vmeta_equal_cd:
+ |.if FFI
+ | sub PC, PC, #4
+ | str BASE, L->base
+ | mov CARG1, L
+ | mov CARG2, INS
+ | str PC, SAVE_PC
+ | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op)
+ | // Returns 0/1 or TValue * (metamethod).
+ | b <3
+ |.endif
+ |
+ |->vmeta_istype:
+ | sub PC, PC, #4
+ | str BASE, L->base
+ | mov CARG1, L
+ | mov CARG2, RA
+ | mov CARG3, RC
+ | str PC, SAVE_PC
+ | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
+ | b ->cont_nop
+ |
+ |//-- Arithmetic metamethods ---------------------------------------------
+ |
+ |->vmeta_arith_vn:
+ | add CARG3, BASE, RB, lsl #3
+ | add CARG4, KBASE, RC, lsl #3
+ | b >1
+ |
+ |->vmeta_arith_nv:
+ | add CARG4, BASE, RB, lsl #3
+ | add CARG3, KBASE, RC, lsl #3
+ | b >1
+ |
+ |->vmeta_unm:
+ | add CARG3, BASE, RC, lsl #3
+ | mov CARG4, CARG3
+ | b >1
+ |
+ |->vmeta_arith_vv:
+ | add CARG3, BASE, RB, lsl #3
+ | add CARG4, BASE, RC, lsl #3
+ |1:
+ | uxtb CARG5w, INSw
+ | add CARG2, BASE, RA, lsl #3
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
+ | // Returns NULL (finished) or TValue * (metamethod).
+ | cbz CRET1, ->cont_nop
+ |
+ | // Call metamethod for binary op.
+ |->vmeta_binop:
+ | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
+ | sub TMP1, CRET1, BASE
+ | str PC, [CRET1, #-24] // [cont|PC]
+ | add PC, TMP1, #FRAME_CONT
+ | mov BASE, CRET1
+ | mov NARGS8:RC, #16 // 2 args for func(o1, o2).
+ | b ->vm_call_dispatch
+ |
+ |->vmeta_len:
+ | add CARG2, BASE, RC, lsl #3
+#if LJ_52
+ | mov TAB:RC, TAB:CARG1 // Save table (ignored for other types).
+#endif
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_meta_len // (lua_State *L, TValue *o)
+ | // Returns NULL (retry) or TValue * (metamethod base).
+#if LJ_52
+ | cbnz CRET1, ->vmeta_binop // Binop call for compatibility.
+ | mov TAB:CARG1, TAB:RC
+ | b ->BC_LEN_Z
+#else
+ | b ->vmeta_binop // Binop call for compatibility.
+#endif
+ |
+ |//-- Call metamethod ----------------------------------------------------
+ |
+ |->vmeta_call: // Resolve and call __call metamethod.
+ | // RB = old base, BASE = new base, RC = nargs*8
+ | mov CARG1, L
+ | str RB, L->base // This is the callers base!
+ | sub CARG2, BASE, #16
+ | str PC, SAVE_PC
+ | add CARG3, BASE, NARGS8:RC
+ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here.
+ | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now.
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | ins_call
+ |
+ |->vmeta_callt: // Resolve __call for BC_CALLT.
+ | // BASE = old base, RA = new base, RC = nargs*8
+ | mov CARG1, L
+ | str BASE, L->base
+ | sub CARG2, RA, #16
+ | str PC, SAVE_PC
+ | add CARG3, RA, NARGS8:RC
+ | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
+ | ldr TMP1, [RA, FRAME_FUNC] // Guaranteed to be a function here.
+ | ldr PC, [BASE, FRAME_PC]
+ | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now.
+ | and LFUNC:CARG3, TMP1, #LJ_GCVMASK
+ | b ->BC_CALLT2_Z
+ |
+ |//-- Argument coercion for 'for' statement ------------------------------
+ |
+ |->vmeta_for:
+ | mov CARG1, L
+ | str BASE, L->base
+ | mov CARG2, RA
+ | str PC, SAVE_PC
+ | bl extern lj_meta_for // (lua_State *L, TValue *base)
+ | ldr INSw, [PC, #-4]
+ |.if JIT
+ | uxtb TMP0w, INSw
+ |.endif
+ | decode_RA RA, INS
+ | decode_RD RC, INS
+ |.if JIT
+ | cmp TMP0, #BC_JFORI
+ | beq =>BC_JFORI
+ |.endif
+ | b =>BC_FORI
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Fast functions -----------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |.macro .ffunc, name
+ |->ff_ .. name:
+ |.endmacro
+ |
+ |.macro .ffunc_1, name
+ |->ff_ .. name:
+ | ldr CARG1, [BASE]
+ | cmp NARGS8:RC, #8
+ | blo ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_2, name
+ |->ff_ .. name:
+ | ldp CARG1, CARG2, [BASE]
+ | cmp NARGS8:RC, #16
+ | blo ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_n, name
+ | .ffunc name
+ | ldr CARG1, [BASE]
+ | cmp NARGS8:RC, #8
+ | ldr FARG1, [BASE]
+ | blo ->fff_fallback
+ | checknum CARG1, ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_nn, name
+ | .ffunc name
+ | ldp CARG1, CARG2, [BASE]
+ | cmp NARGS8:RC, #16
+ | ldp FARG1, FARG2, [BASE]
+ | blo ->fff_fallback
+ | checknum CARG1, ->fff_fallback
+ | checknum CARG2, ->fff_fallback
+ |.endmacro
+ |
+ |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2.
+ |.macro ffgccheck
+ | ldp CARG1, CARG2, GL->gc.total // Assumes threshold follows total.
+ | cmp CARG1, CARG2
+ | blt >1
+ | bl ->fff_gcstep
+ |1:
+ |.endmacro
+ |
+ |//-- Base library: checks -----------------------------------------------
+ |
+ |.ffunc_1 assert
+ | ldr PC, [BASE, FRAME_PC]
+ | mov_false TMP1
+ | cmp CARG1, TMP1
+ | bhs ->fff_fallback
+ | str CARG1, [BASE, #-16]
+ | sub RB, BASE, #8
+ | subs RA, NARGS8:RC, #8
+ | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8.
+ | cbz RA, ->fff_res // Done if exactly 1 argument.
+ |1:
+ | ldr CARG1, [RB, #16]
+ | sub RA, RA, #8
+ | str CARG1, [RB], #8
+ | cbnz RA, <1
+ | b ->fff_res
+ |
+ |.ffunc_1 type
+ | mov TMP0, #~LJ_TISNUM
+ | asr ITYPE, CARG1, #47
+ | cmn ITYPE, #~LJ_TISNUM
+ | csinv TMP1, TMP0, ITYPE, lo
+ | add TMP1, TMP1, #offsetof(GCfuncC, upvalue)/8
+ | ldr CARG1, [CFUNC:CARG3, TMP1, lsl #3]
+ | b ->fff_restv
+ |
+ |//-- Base library: getters and setters ---------------------------------
+ |
+ |.ffunc_1 getmetatable
+ | asr ITYPE, CARG1, #47
+ | cmn ITYPE, #-LJ_TTAB
+ | ccmn ITYPE, #-LJ_TUDATA, #4, ne
+ | and TAB:CARG1, CARG1, #LJ_GCVMASK
+ | bne >6
+ |1: // Field metatable must be at same offset for GCtab and GCudata!
+ | ldr TAB:RB, TAB:CARG1->metatable
+ |2:
+ | mov CARG1, TISNIL
+ | ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable]
+ | cbz TAB:RB, ->fff_restv
+ | ldr TMP1w, TAB:RB->hmask
+ | ldr TMP2w, STR:RC->sid
+ | ldr NODE:CARG3, TAB:RB->node
+ | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask
+ | add TMP1, TMP1, TMP1, lsl #1
+ | movn CARG4, #~LJ_TSTR
+ | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
+ | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for.
+ |3: // Rearranged logic, because we expect _not_ to find the key.
+ | ldp CARG1, TMP0, NODE:CARG3->val
+ | ldr NODE:CARG3, NODE:CARG3->next
+ | cmp TMP0, CARG4
+ | beq >5
+ | cbnz NODE:CARG3, <3
+ |4:
+ | mov CARG1, RB // Use metatable as default result.
+ | movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48
+ | b ->fff_restv
+ |5:
+ | cmp TMP0, TISNIL
+ | bne ->fff_restv
+ | b <4
+ |
+ |6:
+ | movn TMP0, #~LJ_TISNUM
+ | cmp ITYPE, TMP0
+ | csel ITYPE, ITYPE, TMP0, hs
+ | sub TMP1, GL, ITYPE, lsl #3
+ | ldr TAB:RB, [TMP1, #offsetof(global_State, gcroot[GCROOT_BASEMT])-8]
+ | b <2
+ |
+ |.ffunc_2 setmetatable
+ | // Fast path: no mt for table yet and not clearing the mt.
+ | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
+ | ldr TAB:TMP0, TAB:TMP1->metatable
+ | asr ITYPE, CARG2, #47
+ | ldrb TMP2w, TAB:TMP1->marked
+ | cmn ITYPE, #-LJ_TTAB
+ | and TAB:CARG2, CARG2, #LJ_GCVMASK
+ | ccmp TAB:TMP0, #0, #0, eq
+ | bne ->fff_fallback
+ | str TAB:CARG2, TAB:TMP1->metatable
+ | tbz TMP2w, #2, ->fff_restv // isblack(table)
+ | barrierback TAB:TMP1, TMP2w, TMP0
+ | b ->fff_restv
+ |
+ |.ffunc rawget
+ | ldr CARG2, [BASE]
+ | cmp NARGS8:RC, #16
+ | blo ->fff_fallback
+ | checktab CARG2, ->fff_fallback
+ | mov CARG1, L
+ | add CARG3, BASE, #8
+ | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
+ | // Returns cTValue *.
+ | ldr CARG1, [CRET1]
+ | b ->fff_restv
+ |
+ |//-- Base library: conversions ------------------------------------------
+ |
+ |.ffunc tonumber
+ | // Only handles the number case inline (without a base argument).
+ | ldr CARG1, [BASE]
+ | cmp NARGS8:RC, #8
+ | bne ->fff_fallback
+ | checknumber CARG1, ->fff_fallback
+ | b ->fff_restv
+ |
+ |.ffunc_1 tostring
+ | // Only handles the string or number case inline.
+ | asr ITYPE, CARG1, #47
+ | cmn ITYPE, #-LJ_TSTR
+ | // A __tostring method in the string base metatable is ignored.
+ | beq ->fff_restv
+ | // Handle numbers inline, unless a number base metatable is present.
+ | ldr TMP1, GL->gcroot[GCROOT_BASEMT_NUM]
+ | str BASE, L->base
+ | cmn ITYPE, #-LJ_TISNUM
+ | ccmp TMP1, #0, #0, ls
+ | str PC, SAVE_PC // Redundant (but a defined value).
+ | bne ->fff_fallback
+ | ffgccheck
+ | mov CARG1, L
+ | mov CARG2, BASE
+ | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
+ | // Returns GCstr *.
+ | movn TMP1, #~LJ_TSTR
+ | ldr BASE, L->base
+ | add CARG1, CARG1, TMP1, lsl #47
+ | b ->fff_restv
+ |
+ |//-- Base library: iterators -------------------------------------------
+ |
+ |.ffunc_1 next
+ | checktp CARG1, LJ_TTAB, ->fff_fallback
+ | str TISNIL, [BASE, NARGS8:RC] // Set missing 2nd arg to nil.
+ | ldr PC, [BASE, FRAME_PC]
+ | add CARG2, BASE, #8
+ | sub CARG3, BASE, #16
+ | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
+ | // Returns 1=found, 0=end, -1=error.
+ | mov RC, #(2+1)*8
+ | tbnz CRET1w, #31, ->fff_fallback // Invalid key.
+ | cbnz CRET1, ->fff_res // Found key/value.
+ | // End of traversal: return nil.
+ | str TISNIL, [BASE, #-16]
+ | b ->fff_res1
+ |
+ |.ffunc_1 pairs
+ | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
+#if LJ_52
+ | ldr TAB:CARG2, TAB:TMP1->metatable
+#endif
+ | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
+ | ldr PC, [BASE, FRAME_PC]
+#if LJ_52
+ | cbnz TAB:CARG2, ->fff_fallback
+#endif
+ | mov RC, #(3+1)*8
+ | stp CARG1, TISNIL, [BASE, #-8]
+ | str CFUNC:CARG4, [BASE, #-16]
+ | b ->fff_res
+ |
+ |.ffunc_2 ipairs_aux
+ | checktab CARG1, ->fff_fallback
+ | checkint CARG2, ->fff_fallback
+ | ldr TMP1w, TAB:CARG1->asize
+ | ldr CARG3, TAB:CARG1->array
+ | ldr TMP0w, TAB:CARG1->hmask
+ | add CARG2w, CARG2w, #1
+ | cmp CARG2w, TMP1w
+ | ldr PC, [BASE, FRAME_PC]
+ | add TMP2, CARG2, TISNUM
+ | mov RC, #(0+1)*8
+ | str TMP2, [BASE, #-16]
+ | bhs >2 // Not in array part?
+ | ldr TMP0, [CARG3, CARG2, lsl #3]
+ |1:
+ | mov TMP1, #(2+1)*8
+ | cmp TMP0, TISNIL
+ | str TMP0, [BASE, #-8]
+ | csel RC, RC, TMP1, eq
+ | b ->fff_res
+ |2: // Check for empty hash part first. Otherwise call C function.
+ | cbz TMP0w, ->fff_res
+ | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
+ | // Returns cTValue * or NULL.
+ | cbz CRET1, ->fff_res
+ | ldr TMP0, [CRET1]
+ | b <1
+ |
+ |.ffunc_1 ipairs
+ | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback
+#if LJ_52
+ | ldr TAB:CARG2, TAB:TMP1->metatable
+#endif
+ | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0]
+ | ldr PC, [BASE, FRAME_PC]
+#if LJ_52
+ | cbnz TAB:CARG2, ->fff_fallback
+#endif
+ | mov RC, #(3+1)*8
+ | stp CARG1, TISNUM, [BASE, #-8]
+ | str CFUNC:CARG4, [BASE, #-16]
+ | b ->fff_res
+ |
+ |//-- Base library: catch errors ----------------------------------------
+ |
+ |.ffunc pcall
+ | cmp NARGS8:RC, #8
+ | ldrb TMP0w, GL->hookmask
+ | blo ->fff_fallback
+ | sub NARGS8:RC, NARGS8:RC, #8
+ | mov RB, BASE
+ | add BASE, BASE, #16
+ | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
+ | add PC, TMP0, #16+FRAME_PCALL
+ | beq ->vm_call_dispatch
+ |1:
+ | add TMP2, BASE, NARGS8:RC
+ |2:
+ | ldr TMP0, [TMP2, #-16]
+ | str TMP0, [TMP2, #-8]!
+ | cmp TMP2, BASE
+ | bne <2
+ | b ->vm_call_dispatch
+ |
+ |.ffunc xpcall
+ | ldp CARG1, CARG2, [BASE]
+ | ldrb TMP0w, GL->hookmask
+ | subs NARGS8:TMP1, NARGS8:RC, #16
+ | blo ->fff_fallback
+ | mov RB, BASE
+ | asr ITYPE, CARG2, #47
+ | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1
+ | cmn ITYPE, #-LJ_TFUNC
+ | add PC, TMP0, #24+FRAME_PCALL
+ | bne ->fff_fallback // Traceback must be a function.
+ | mov NARGS8:RC, NARGS8:TMP1
+ | add BASE, BASE, #24
+ | stp CARG2, CARG1, [RB] // Swap function and traceback.
+ | cbz NARGS8:RC, ->vm_call_dispatch
+ | b <1
+ |
+ |//-- Coroutine library --------------------------------------------------
+ |
+ |.macro coroutine_resume_wrap, resume
+ |.if resume
+ |.ffunc_1 coroutine_resume
+ | checktp CARG1, LJ_TTHREAD, ->fff_fallback
+ |.else
+ |.ffunc coroutine_wrap_aux
+ | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr
+ | and L:CARG1, CARG1, #LJ_GCVMASK
+ |.endif
+ | ldr PC, [BASE, FRAME_PC]
+ | str BASE, L->base
+ | ldp RB, CARG2, L:CARG1->base
+ | ldrb TMP1w, L:CARG1->status
+ | add TMP0, CARG2, TMP1
+ | str PC, SAVE_PC
+ | cmp TMP0, RB
+ | beq ->fff_fallback
+ | cmp TMP1, #LUA_YIELD
+ | add TMP0, CARG2, #8
+ | csel CARG2, CARG2, TMP0, hs
+ | ldr CARG4, L:CARG1->maxstack
+ | add CARG3, CARG2, NARGS8:RC
+ | ldr RB, L:CARG1->cframe
+ | ccmp CARG3, CARG4, #2, ls
+ | ccmp RB, #0, #2, ls
+ | bhi ->fff_fallback
+ |.if resume
+ | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC.
+ | add BASE, BASE, #8
+ | sub NARGS8:RC, NARGS8:RC, #8
+ |.endif
+ | str CARG3, L:CARG1->top
+ | str BASE, L->top
+ | cbz NARGS8:RC, >3
+ |2: // Move args to coroutine.
+ | ldr TMP0, [BASE, RB]
+ | cmp RB, NARGS8:RC
+ | str TMP0, [CARG2, RB]
+ | add RB, RB, #8
+ | bne <2
+ |3:
+ | mov CARG3, #0
+ | mov L:RA, L:CARG1
+ | mov CARG4, #0
+ | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0)
+ | // Returns thread status.
+ |4:
+ | ldp CARG3, CARG4, L:RA->base
+ | cmp CRET1, #LUA_YIELD
+ | ldr BASE, L->base
+ | str L, GL->cur_L
+ | st_vmstate ST_INTERP
+ | bhi >8
+ | sub RC, CARG4, CARG3
+ | ldr CARG1, L->maxstack
+ | add CARG2, BASE, RC
+ | cbz RC, >6 // No results?
+ | cmp CARG2, CARG1
+ | mov RB, #0
+ | bhi >9 // Need to grow stack?
+ |
+ | sub CARG4, RC, #8
+ | str CARG3, L:RA->top // Clear coroutine stack.
+ |5: // Move results from coroutine.
+ | ldr TMP0, [CARG3, RB]
+ | cmp RB, CARG4
+ | str TMP0, [BASE, RB]
+ | add RB, RB, #8
+ | bne <5
+ |6:
+ |.if resume
+ | mov_true TMP1
+ | add RC, RC, #16
+ |7:
+ | str TMP1, [BASE, #-8] // Prepend true/false to results.
+ | sub RA, BASE, #8
+ |.else
+ | mov RA, BASE
+ | add RC, RC, #8
+ |.endif
+ | ands CARG1, PC, #FRAME_TYPE
+ | str PC, SAVE_PC
+ | str RCw, SAVE_MULTRES
+ | beq ->BC_RET_Z
+ | b ->vm_return
+ |
+ |8: // Coroutine returned with error (at co->top-1).
+ |.if resume
+ | ldr TMP0, [CARG4, #-8]!
+ | mov_false TMP1
+ | mov RC, #(2+1)*8
+ | str CARG4, L:RA->top // Remove error from coroutine stack.
+ | str TMP0, [BASE] // Copy error message.
+ | b <7
+ |.else
+ | mov CARG1, L
+ | mov CARG2, L:RA
+ | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
+ | // Never returns.
+ |.endif
+ |
+ |9: // Handle stack expansion on return from yield.
+ | mov CARG1, L
+ | lsr CARG2, RC, #3
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | mov CRET1, #0
+ | b <4
+ |.endmacro
+ |
+ | coroutine_resume_wrap 1 // coroutine.resume
+ | coroutine_resume_wrap 0 // coroutine.wrap
+ |
+ |.ffunc coroutine_yield
+ | ldr TMP0, L->cframe
+ | add TMP1, BASE, NARGS8:RC
+ | mov CRET1, #LUA_YIELD
+ | stp BASE, TMP1, L->base
+ | tbz TMP0, #0, ->fff_fallback
+ | str xzr, L->cframe
+ | strb CRET1w, L->status
+ | b ->vm_leave_unw
+ |
+ |//-- Math library -------------------------------------------------------
+ |
+ |.macro math_round, func, round
+ | .ffunc math_ .. func
+ | ldr CARG1, [BASE]
+ | cmp NARGS8:RC, #8
+ | ldr d0, [BASE]
+ | blo ->fff_fallback
+ | cmp TISNUMhi, CARG1, lsr #32
+ | beq ->fff_restv
+ | blo ->fff_fallback
+ | round d0, d0
+ | b ->fff_resn
+ |.endmacro
+ |
+ | math_round floor, frintm
+ | math_round ceil, frintp
+ |
+ |.ffunc_1 math_abs
+ | checknumber CARG1, ->fff_fallback
+ | and CARG1, CARG1, #U64x(7fffffff,ffffffff)
+ | bne ->fff_restv
+ | eor CARG2w, CARG1w, CARG1w, asr #31
+ | movz CARG3, #0x41e0, lsl #48 // 2^31.
+ | subs CARG1w, CARG2w, CARG1w, asr #31
+ | add CARG1, CARG1, TISNUM
+ | csel CARG1, CARG1, CARG3, pl
+ | // Fallthrough.
+ |
+ |->fff_restv:
+ | // CARG1 = TValue result.
+ | ldr PC, [BASE, FRAME_PC]
+ | str CARG1, [BASE, #-16]
+ |->fff_res1:
+ | // PC = return.
+ | mov RC, #(1+1)*8
+ |->fff_res:
+ | // RC = (nresults+1)*8, PC = return.
+ | ands CARG1, PC, #FRAME_TYPE
+ | str RCw, SAVE_MULTRES
+ | sub RA, BASE, #16
+ | bne ->vm_return
+ | ldr INSw, [PC, #-4]
+ | decode_RB RB, INS
+ |5:
+ | cmp RC, RB, lsl #3 // More results expected?
+ | blo >6
+ | decode_RA TMP1, INS
+ | // Adjust BASE. KBASE is assumed to be set for the calling frame.
+ | sub BASE, RA, TMP1, lsl #3
+ | ins_next
+ |
+ |6: // Fill up results with nil.
+ | add TMP1, RA, RC
+ | add RC, RC, #8
+ | str TISNIL, [TMP1, #-8]
+ | b <5
+ |
+ |.macro math_extern, func
+ | .ffunc_n math_ .. func
+ | bl extern func
+ | b ->fff_resn
+ |.endmacro
+ |
+ |.macro math_extern2, func
+ | .ffunc_nn math_ .. func
+ | bl extern func
+ | b ->fff_resn
+ |.endmacro
+ |
+ |.ffunc_n math_sqrt
+ | fsqrt d0, d0
+ |->fff_resn:
+ | ldr PC, [BASE, FRAME_PC]
+ | str d0, [BASE, #-16]
+ | b ->fff_res1
+ |
+ |.ffunc math_log
+ | ldr CARG1, [BASE]
+ | cmp NARGS8:RC, #8
+ | ldr FARG1, [BASE]
+ | bne ->fff_fallback // Need exactly 1 argument.
+ | checknum CARG1, ->fff_fallback
+ | bl extern log
+ | b ->fff_resn
+ |
+ | math_extern log10
+ | math_extern exp
+ | math_extern sin
+ | math_extern cos
+ | math_extern tan
+ | math_extern asin
+ | math_extern acos
+ | math_extern atan
+ | math_extern sinh
+ | math_extern cosh
+ | math_extern tanh
+ | math_extern2 pow
+ | math_extern2 atan2
+ | math_extern2 fmod
+ |
+ |.ffunc_2 math_ldexp
+ | ldr FARG1, [BASE]
+ | checknum CARG1, ->fff_fallback
+ | checkint CARG2, ->fff_fallback
+ | sxtw CARG1, CARG2w
+ | bl extern ldexp // (double x, int exp)
+ | b ->fff_resn
+ |
+ |.ffunc_n math_frexp
+ | add CARG1, sp, TMPDofs
+ | bl extern frexp
+ | ldr CARG2w, TMPD
+ | ldr PC, [BASE, FRAME_PC]
+ | str d0, [BASE, #-16]
+ | mov RC, #(2+1)*8
+ | add CARG2, CARG2, TISNUM
+ | str CARG2, [BASE, #-8]
+ | b ->fff_res
+ |
+ |.ffunc_n math_modf
+ | sub CARG1, BASE, #16
+ | ldr PC, [BASE, FRAME_PC]
+ | bl extern modf
+ | mov RC, #(2+1)*8
+ | str d0, [BASE, #-8]
+ | b ->fff_res
+ |
+ |.macro math_minmax, name, cond, fcond
+ | .ffunc_1 name
+ | add RB, BASE, RC
+ | add RA, BASE, #8
+ | checkint CARG1, >4
+ |1: // Handle integers.
+ | ldr CARG2, [RA]
+ | cmp RA, RB
+ | bhs ->fff_restv
+ | checkint CARG2, >3
+ | cmp CARG1w, CARG2w
+ | add RA, RA, #8
+ | csel CARG1, CARG2, CARG1, cond
+ | b <1
+ |3: // Convert intermediate result to number and continue below.
+ | scvtf d0, CARG1w
+ | blo ->fff_fallback
+ | ldr d1, [RA]
+ | b >6
+ |
+ |4:
+ | ldr d0, [BASE]
+ | blo ->fff_fallback
+ |5: // Handle numbers.
+ | ldr CARG2, [RA]
+ | ldr d1, [RA]
+ | cmp RA, RB
+ | bhs ->fff_resn
+ | checknum CARG2, >7
+ |6:
+ | fcmp d0, d1
+ | add RA, RA, #8
+ | fcsel d0, d1, d0, fcond
+ | b <5
+ |7: // Convert integer to number and continue above.
+ | scvtf d1, CARG2w
+ | blo ->fff_fallback
+ | b <6
+ |.endmacro
+ |
+ | math_minmax math_min, gt, pl
+ | math_minmax math_max, lt, le
+ |
+ |//-- String library -----------------------------------------------------
+ |
+ |.ffunc string_byte // Only handle the 1-arg case here.
+ | ldp PC, CARG1, [BASE, FRAME_PC]
+ | cmp NARGS8:RC, #8
+ | asr ITYPE, CARG1, #47
+ | ccmn ITYPE, #-LJ_TSTR, #0, eq
+ | and STR:CARG1, CARG1, #LJ_GCVMASK
+ | bne ->fff_fallback
+ | ldrb TMP0w, STR:CARG1[1] // Access is always ok (NUL at end).
+ | ldr CARG3w, STR:CARG1->len
+ | add TMP0, TMP0, TISNUM
+ | str TMP0, [BASE, #-16]
+ | mov RC, #(0+1)*8
+ | cbz CARG3, ->fff_res
+ | b ->fff_res1
+ |
+ |.ffunc string_char // Only handle the 1-arg case here.
+ | ffgccheck
+ | ldp PC, CARG1, [BASE, FRAME_PC]
+ | cmp CARG1w, #255
+ | ccmp NARGS8:RC, #8, #0, ls // Need exactly 1 argument.
+ | bne ->fff_fallback
+ | checkint CARG1, ->fff_fallback
+ | mov CARG3, #1
+ | // Point to the char inside the integer in the stack slot.
+ |.if ENDIAN_LE
+ | mov CARG2, BASE
+ |.else
+ | add CARG2, BASE, #7
+ |.endif
+ |->fff_newstr:
+ | // CARG2 = str, CARG3 = len.
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
+ |->fff_resstr:
+ | // Returns GCstr *.
+ | ldr BASE, L->base
+ | movn TMP1, #~LJ_TSTR
+ | add CARG1, CARG1, TMP1, lsl #47
+ | b ->fff_restv
+ |
+ |.ffunc string_sub
+ | ffgccheck
+ | ldr CARG1, [BASE]
+ | ldr CARG3, [BASE, #16]
+ | cmp NARGS8:RC, #16
+ | movn RB, #0
+ | beq >1
+ | blo ->fff_fallback
+ | checkint CARG3, ->fff_fallback
+ | sxtw RB, CARG3w
+ |1:
+ | ldr CARG2, [BASE, #8]
+ | checkstr CARG1, ->fff_fallback
+ | ldr TMP1w, STR:CARG1->len
+ | checkint CARG2, ->fff_fallback
+ | sxtw CARG2, CARG2w
+ | // CARG1 = str, TMP1 = str->len, CARG2 = start, RB = end
+ | add TMP2, RB, TMP1
+ | cmp RB, #0
+ | add TMP0, CARG2, TMP1
+ | csinc RB, RB, TMP2, ge // if (end < 0) end += len+1
+ | cmp CARG2, #0
+ | csinc CARG2, CARG2, TMP0, ge // if (start < 0) start += len+1
+ | cmp RB, #0
+ | csel RB, RB, xzr, ge // if (end < 0) end = 0
+ | cmp CARG2, #1
+ | csinc CARG2, CARG2, xzr, ge // if (start < 1) start = 1
+ | cmp RB, TMP1
+ | csel RB, RB, TMP1, le // if (end > len) end = len
+ | add CARG1, STR:CARG1, #sizeof(GCstr)-1
+ | subs CARG3, RB, CARG2 // len = end - start
+ | add CARG2, CARG1, CARG2
+ | add CARG3, CARG3, #1 // len += 1
+ | bge ->fff_newstr
+ | add STR:CARG1, GL, #offsetof(global_State, strempty)
+ | movn TMP1, #~LJ_TSTR
+ | add CARG1, CARG1, TMP1, lsl #47
+ | b ->fff_restv
+ |
+ |.macro ffstring_op, name
+ | .ffunc string_ .. name
+ | ffgccheck
+ | ldr CARG2, [BASE]
+ | cmp NARGS8:RC, #8
+ | asr ITYPE, CARG2, #47
+ | ccmn ITYPE, #-LJ_TSTR, #0, hs
+ | and STR:CARG2, CARG2, #LJ_GCVMASK
+ | bne ->fff_fallback
+ | ldr TMP0, GL->tmpbuf.b
+ | add SBUF:CARG1, GL, #offsetof(global_State, tmpbuf)
+ | str BASE, L->base
+ | str PC, SAVE_PC
+ | str L, GL->tmpbuf.L
+ | str TMP0, GL->tmpbuf.w
+ | bl extern lj_buf_putstr_ .. name
+ | bl extern lj_buf_tostr
+ | b ->fff_resstr
+ |.endmacro
+ |
+ |ffstring_op reverse
+ |ffstring_op lower
+ |ffstring_op upper
+ |
+ |//-- Bit library --------------------------------------------------------
+ |
+ |// FP number to bit conversion for soft-float. Clobbers CARG1-CARG3
+ |->vm_tobit_fb:
+ | bls ->fff_fallback
+ | add CARG2, CARG1, CARG1
+ | mov CARG3, #1076
+ | sub CARG3, CARG3, CARG2, lsr #53
+ | cmp CARG3, #53
+ | bhi >1
+ | and CARG2, CARG2, #U64x(001fffff,ffffffff)
+ | orr CARG2, CARG2, #U64x(00200000,00000000)
+ | cmp CARG1, #0
+ | lsr CARG2, CARG2, CARG3
+ | cneg CARG1w, CARG2w, mi
+ | br lr
+ |1:
+ | mov CARG1w, #0
+ | br lr
+ |
+ |.macro .ffunc_bit, name
+ | .ffunc_1 bit_..name
+ | adr lr, >1
+ | checkint CARG1, ->vm_tobit_fb
+ |1:
+ |.endmacro
+ |
+ |.macro .ffunc_bit_op, name, ins
+ | .ffunc_bit name
+ | mov RA, #8
+ | mov TMP0w, CARG1w
+ | adr lr, >2
+ |1:
+ | ldr CARG1, [BASE, RA]
+ | cmp RA, NARGS8:RC
+ | add RA, RA, #8
+ | bge >9
+ | checkint CARG1, ->vm_tobit_fb
+ |2:
+ | ins TMP0w, TMP0w, CARG1w
+ | b <1
+ |.endmacro
+ |
+ |.ffunc_bit_op band, and
+ |.ffunc_bit_op bor, orr
+ |.ffunc_bit_op bxor, eor
+ |
+ |.ffunc_bit tobit
+ | mov TMP0w, CARG1w
+ |9: // Label reused by .ffunc_bit_op users.
+ | add CARG1, TMP0, TISNUM
+ | b ->fff_restv
+ |
+ |.ffunc_bit bswap
+ | rev TMP0w, CARG1w
+ | add CARG1, TMP0, TISNUM
+ | b ->fff_restv
+ |
+ |.ffunc_bit bnot
+ | mvn TMP0w, CARG1w
+ | add CARG1, TMP0, TISNUM
+ | b ->fff_restv
+ |
+ |.macro .ffunc_bit_sh, name, ins, shmod
+ | .ffunc bit_..name
+ | ldp TMP0, CARG1, [BASE]
+ | cmp NARGS8:RC, #16
+ | blo ->fff_fallback
+ | adr lr, >1
+ | checkint CARG1, ->vm_tobit_fb
+ |1:
+ |.if shmod == 0
+ | mov TMP1, CARG1
+ |.else
+ | neg TMP1, CARG1
+ |.endif
+ | mov CARG1, TMP0
+ | adr lr, >2
+ | checkint CARG1, ->vm_tobit_fb
+ |2:
+ | ins TMP0w, CARG1w, TMP1w
+ | add CARG1, TMP0, TISNUM
+ | b ->fff_restv
+ |.endmacro
+ |
+ |.ffunc_bit_sh lshift, lsl, 0
+ |.ffunc_bit_sh rshift, lsr, 0
+ |.ffunc_bit_sh arshift, asr, 0
+ |.ffunc_bit_sh rol, ror, 1
+ |.ffunc_bit_sh ror, ror, 0
+ |
+ |//-----------------------------------------------------------------------
+ |
+ |->fff_fallback: // Call fast function fallback handler.
+ | // BASE = new base, RC = nargs*8
+ | ldp CFUNC:CARG3, PC, [BASE, FRAME_FUNC] // Fallback may overwrite PC.
+ | ldr TMP2, L->maxstack
+ | add TMP1, BASE, NARGS8:RC
+ | stp BASE, TMP1, L->base
+ | and CFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | add TMP1, TMP1, #8*LUA_MINSTACK
+ | ldr CARG3, CFUNC:CARG3->f
+ | str PC, SAVE_PC // Redundant (but a defined value).
+ | cmp TMP1, TMP2
+ | mov CARG1, L
+ | bhi >5 // Need to grow stack.
+ | blr CARG3 // (lua_State *L)
+ | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
+ | ldr BASE, L->base
+ | cmp CRET1w, #0
+ | lsl RC, CRET1, #3
+ | sub RA, BASE, #16
+ | bgt ->fff_res // Returned nresults+1?
+ |1: // Returned 0 or -1: retry fast path.
+ | ldr CARG1, L->top
+ | ldr CFUNC:CARG3, [BASE, FRAME_FUNC]
+ | sub NARGS8:RC, CARG1, BASE
+ | bne ->vm_call_tail // Returned -1?
+ | and CFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | ins_callt // Returned 0: retry fast path.
+ |
+ |// Reconstruct previous base for vmeta_call during tailcall.
+ |->vm_call_tail:
+ | ands TMP0, PC, #FRAME_TYPE
+ | and TMP1, PC, #~FRAME_TYPEP
+ | bne >3
+ | ldrb RAw, [PC, #-4+OFS_RA]
+ | lsl RA, RA, #3
+ | add TMP1, RA, #16
+ |3:
+ | sub RB, BASE, TMP1
+ | b ->vm_call_dispatch // Resolve again for tailcall.
+ |
+ |5: // Grow stack for fallback handler.
+ | mov CARG2, #LUA_MINSTACK
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | ldr BASE, L->base
+ | cmp CARG1, CARG1 // Set zero-flag to force retry.
+ | b <1
+ |
+ |->fff_gcstep: // Call GC step function.
+ | // BASE = new base, RC = nargs*8
+ | add CARG2, BASE, NARGS8:RC // Calculate L->top.
+ | mov RA, lr
+ | stp BASE, CARG2, L->base
+ | str PC, SAVE_PC // Redundant (but a defined value).
+ | mov CARG1, L
+ | bl extern lj_gc_step // (lua_State *L)
+ | ldp BASE, CARG2, L->base
+ | ldr CFUNC:CARG3, [BASE, FRAME_FUNC]
+ | mov lr, RA // Help return address predictor.
+ | sub NARGS8:RC, CARG2, BASE // Calculate nargs*8.
+ | and CFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | ret
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Special dispatch targets -------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_record: // Dispatch target for recording phase.
+ |.if JIT
+ | ldrb CARG1w, GL->hookmask
+ | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent.
+ | bne >5
+ | // Decrement the hookcount for consistency, but always do the call.
+ | ldr CARG2w, GL->hookcount
+ | tst CARG1, #HOOK_ACTIVE
+ | bne >1
+ | sub CARG2w, CARG2w, #1
+ | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT
+ | beq >1
+ | str CARG2w, GL->hookcount
+ | b >1
+ |.endif
+ |
+ |->vm_rethook: // Dispatch target for return hooks.
+ | ldrb TMP2w, GL->hookmask
+ | tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active?
+ |5: // Re-dispatch to static ins.
+ | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
+ | br TMP0
+ |
+ |->vm_inshook: // Dispatch target for instr/line hooks.
+ | ldrb TMP2w, GL->hookmask
+ | ldr TMP3w, GL->hookcount
+ | tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5 // Hook already active?
+ | tst TMP2w, #LUA_MASKLINE|LUA_MASKCOUNT
+ | beq <5
+ | sub TMP3w, TMP3w, #1
+ | str TMP3w, GL->hookcount
+ | cbz TMP3w, >1
+ | tbz TMP2w, #LUA_HOOKLINE, <5
+ |1:
+ | mov CARG1, L
+ | str BASE, L->base
+ | mov CARG2, PC
+ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
+ | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
+ |3:
+ | ldr BASE, L->base
+ |4: // Re-dispatch to static ins.
+ | ldr INSw, [PC, #-4]
+ | add TMP1, GL, INS, uxtb #3
+ | decode_RA RA, INS
+ | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC]
+ | decode_RD RC, INS
+ | br TMP0
+ |
+ |->cont_hook: // Continue from hook yield.
+ | ldr CARG1, [CARG4, #-40]
+ | add PC, PC, #4
+ | str CARG1w, SAVE_MULTRES // Restore MULTRES for *M ins.
+ | b <4
+ |
+ |->vm_hotloop: // Hot loop counter underflow.
+ |.if JIT
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L).
+ | add CARG1, GL, #GG_G2DISP+GG_DISP2J
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | str PC, SAVE_PC
+ | ldr CARG3, LFUNC:CARG3->pc
+ | mov CARG2, PC
+ | str L, [GL, #GL_J(L)]
+ | ldrb CARG3w, [CARG3, #PC2PROTO(framesize)]
+ | str BASE, L->base
+ | add CARG3, BASE, CARG3, lsl #3
+ | str CARG3, L->top
+ | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc)
+ | b <3
+ |.endif
+ |
+ |->vm_callhook: // Dispatch target for call hooks.
+ | mov CARG2, PC
+ |.if JIT
+ | b >1
+ |.endif
+ |
+ |->vm_hotcall: // Hot call counter underflow.
+ |.if JIT
+ | orr CARG2, PC, #1
+ |1:
+ |.endif
+ | add TMP1, BASE, NARGS8:RC
+ | str PC, SAVE_PC
+ | mov CARG1, L
+ | sub RA, RA, BASE
+ | stp BASE, TMP1, L->base
+ | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
+ | // Returns ASMFunction.
+ | ldp BASE, TMP1, L->base
+ | str xzr, SAVE_PC // Invalidate for subsequent line hook.
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+ | add RA, BASE, RA
+ | sub NARGS8:RC, TMP1, BASE
+ | ldr INSw, [PC, #-4]
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | br CRET1
+ |
+ |->cont_stitch: // Trace stitching.
+ |.if JIT
+ | // RA = resultptr, CARG4 = meta base
+ | ldr RBw, SAVE_MULTRES
+ | ldr INSw, [PC, #-4]
+ | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace.
+ | subs RB, RB, #8
+ | decode_RA RC, INS // Call base.
+ | and CARG3, CARG3, #LJ_GCVMASK
+ | beq >2
+ |1: // Move results down.
+ | ldr CARG1, [RA]
+ | add RA, RA, #8
+ | subs RB, RB, #8
+ | str CARG1, [BASE, RC, lsl #3]
+ | add RC, RC, #1
+ | bne <1
+ |2:
+ | decode_RA RA, INS
+ | decode_RB RB, INS
+ | add RA, RA, RB
+ |3:
+ | cmp RA, RC
+ | bhi >9 // More results wanted?
+ |
+ | ldrh RAw, TRACE:CARG3->traceno
+ | ldrh RCw, TRACE:CARG3->link
+ | cmp RCw, RAw
+ | beq ->cont_nop // Blacklisted.
+ | cmp RCw, #0
+ | bne =>BC_JLOOP // Jump to stitched trace.
+ |
+ | // Stitch a new trace to the previous trace.
+ | mov CARG1, #GL_J(exitno)
+ | str RAw, [GL, CARG1]
+ | mov CARG1, #GL_J(L)
+ | str L, [GL, CARG1]
+ | str BASE, L->base
+ | add CARG1, GL, #GG_G2J
+ | mov CARG2, PC
+ | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
+ | ldr BASE, L->base
+ | b ->cont_nop
+ |
+ |9: // Fill up results with nil.
+ | str TISNIL, [BASE, RC, lsl #3]
+ | add RC, RC, #1
+ | b <3
+ |.endif
+ |
+ |->vm_profhook: // Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+ | mov CARG1, L
+ | str BASE, L->base
+ | mov CARG2, PC
+ | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
+ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+ | ldr BASE, L->base
+ | sub PC, PC, #4
+ | b ->cont_nop
+#endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Trace exit handler -------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |.macro savex_, a, b
+ | stp d..a, d..b, [sp, #a*8]
+ | stp x..a, x..b, [sp, #32*8+a*8]
+ |.endmacro
+ |
+ |->vm_exit_handler:
+ |.if JIT
+ | sub sp, sp, #(64*8)
+ | savex_, 0, 1
+ | savex_, 2, 3
+ | savex_, 4, 5
+ | savex_, 6, 7
+ | savex_, 8, 9
+ | savex_, 10, 11
+ | savex_, 12, 13
+ | savex_, 14, 15
+ | savex_, 16, 17
+ | savex_, 18, 19
+ | savex_, 20, 21
+ | savex_, 22, 23
+ | savex_, 24, 25
+ | savex_, 26, 27
+ | savex_, 28, 29
+ | stp d30, d31, [sp, #30*8]
+ | ldr CARG1, [sp, #64*8] // Load original value of lr.
+ | add CARG3, sp, #64*8 // Recompute original value of sp.
+ | mv_vmstate CARG4w, EXIT
+ | stp xzr, CARG3, [sp, #62*8] // Store 0/sp in RID_LR/RID_SP.
+ | sub CARG1, CARG1, lr
+ | ldr L, GL->cur_L
+ | lsr CARG1, CARG1, #2
+ | ldr BASE, GL->jit_base
+ | sub CARG1, CARG1, #2
+ | ldr CARG2w, [lr] // Load trace number.
+ | st_vmstate CARG4w
+ |.if ENDIAN_BE
+ | rev32 CARG2, CARG2
+ |.endif
+ | str BASE, L->base
+ | ubfx CARG2w, CARG2w, #5, #16
+ | str CARG1w, [GL, #GL_J(exitno)]
+ | str CARG2w, [GL, #GL_J(parent)]
+ | str L, [GL, #GL_J(L)]
+ | str xzr, GL->jit_base
+ | add CARG1, GL, #GG_G2J
+ | mov CARG2, sp
+ | bl extern lj_trace_exit // (jit_State *J, ExitState *ex)
+ | // Returns MULTRES (unscaled) or negated error code.
+ | ldr CARG2, L->cframe
+ | ldr BASE, L->base
+ | and sp, CARG2, #CFRAME_RAWMASK
+ | ldr PC, SAVE_PC // Get SAVE_PC.
+ | str L, SAVE_L // Set SAVE_L (on-trace resume/yield).
+ | b >1
+ |.endif
+ |
+ |->vm_exit_interp:
+ | // CARG1 = MULTRES or negated error code, BASE, PC and GL set.
+ |.if JIT
+ | ldr L, SAVE_L
+ |1:
+ | cmp CARG1w, #0
+ | blt >9 // Check for error from exit.
+ | lsl RC, CARG1, #3
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+ | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
+ | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+ | movn TISNIL, #0
+ | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
+ | str RCw, SAVE_MULTRES
+ | str BASE, L->base
+ | ldr CARG2, LFUNC:CARG2->pc
+ | str xzr, GL->jit_base
+ | mv_vmstate CARG4w, INTERP
+ | ldr KBASE, [CARG2, #PC2PROTO(k)]
+ | // Modified copy of ins_next which handles function header dispatch, too.
+ | ldrb RBw, [PC, # OFS_OP]
+ | ldr INSw, [PC], #4
+ | st_vmstate CARG4w
+ | cmp RBw, #BC_FUNCC+2 // Fast function?
+ | add TMP1, GL, INS, uxtb #3
+ | bhs >4
+ |2:
+ | cmp RBw, #BC_FUNCF // Function header?
+ | add TMP0, GL, RB, uxtb #3
+ | ldr RB, [TMP0, #GG_G2DISP]
+ | decode_RA RA, INS
+ | lsr TMP0, INS, #16
+ | csel RC, TMP0, RC, lo
+ | blo >5
+ | ldr CARG3, [BASE, FRAME_FUNC]
+ | sub RC, RC, #8
+ | add RA, BASE, RA, lsl #3 // Yes: RA = BASE+framesize*8, RC = nargs*8
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ |5:
+ | br RB
+ |
+ |4: // Check frame below fast function.
+ | ldr CARG1, [BASE, FRAME_PC]
+ | ands CARG2, CARG1, #FRAME_TYPE
+ | bne <2 // Trace stitching continuation?
+ | // Otherwise set KBASE for Lua function below fast function.
+ | ldr CARG3w, [CARG1, #-4]
+ | decode_RA CARG1, CARG3
+ | sub CARG2, BASE, CARG1, lsl #3
+ | ldr LFUNC:CARG3, [CARG2, #-32]
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | ldr CARG3, LFUNC:CARG3->pc
+ | ldr KBASE, [CARG3, #PC2PROTO(k)]
+ | b <2
+ |
+ |9: // Rethrow error from the right C frame.
+ | neg CARG2w, CARG1w
+ | mov CARG1, L
+ | bl extern lj_err_trace // (lua_State *L, int errcode)
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Math helper functions ----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ | // int lj_vm_modi(int dividend, int divisor);
+ |->vm_modi:
+ | eor CARG4w, CARG1w, CARG2w
+ | cmp CARG4w, #0
+ | eor CARG3w, CARG1w, CARG1w, asr #31
+ | eor CARG4w, CARG2w, CARG2w, asr #31
+ | sub CARG3w, CARG3w, CARG1w, asr #31
+ | sub CARG4w, CARG4w, CARG2w, asr #31
+ | udiv CARG1w, CARG3w, CARG4w
+ | msub CARG1w, CARG1w, CARG4w, CARG3w
+ | ccmp CARG1w, #0, #4, mi
+ | sub CARG3w, CARG1w, CARG4w
+ | csel CARG1w, CARG1w, CARG3w, eq
+ | eor CARG3w, CARG1w, CARG2w
+ | cmp CARG3w, #0
+ | cneg CARG1w, CARG1w, mi
+ | ret
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Miscellaneous functions --------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |.define NEXT_TAB, TAB:CARG1
+ |.define NEXT_RES, CARG1
+ |.define NEXT_IDX, CARG2w
+ |.define NEXT_LIM, CARG3w
+ |.define NEXT_TMP0, TMP0
+ |.define NEXT_TMP0w, TMP0w
+ |.define NEXT_TMP1, TMP1
+ |.define NEXT_TMP1w, TMP1w
+ |.define NEXT_RES_PTR, sp
+ |.define NEXT_RES_VAL, [sp]
+ |.define NEXT_RES_KEY, [sp, #8]
+ |
+ |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
+ |// Next idx returned in CRET2w.
+ |->vm_next:
+ |.if JIT
+ | ldr NEXT_LIM, NEXT_TAB->asize
+ | ldr NEXT_TMP1, NEXT_TAB->array
+ |1: // Traverse array part.
+ | subs NEXT_TMP0w, NEXT_IDX, NEXT_LIM
+ | bhs >5 // Index points after array part?
+ | ldr NEXT_TMP0, [NEXT_TMP1, NEXT_IDX, uxtw #3]
+ | cmn NEXT_TMP0, #-LJ_TNIL
+ | cinc NEXT_IDX, NEXT_IDX, eq
+ | beq <1 // Skip holes in array part.
+ | str NEXT_TMP0, NEXT_RES_VAL
+ | movz NEXT_TMP0w, #(LJ_TISNUM>>1)&0xffff, lsl #16
+ | stp NEXT_IDX, NEXT_TMP0w, NEXT_RES_KEY
+ | add NEXT_IDX, NEXT_IDX, #1
+ | mov NEXT_RES, NEXT_RES_PTR
+ |4:
+ | ret
+ |
+ |5: // Traverse hash part.
+ | ldr NEXT_TMP1w, NEXT_TAB->hmask
+ | ldr NODE:NEXT_RES, NEXT_TAB->node
+ | add NEXT_TMP0w, NEXT_TMP0w, NEXT_TMP0w, lsl #1
+ | add NEXT_LIM, NEXT_LIM, NEXT_TMP1w
+ | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP0w, uxtw #3
+ |6:
+ | cmp NEXT_IDX, NEXT_LIM
+ | bhi >9
+ | ldr NEXT_TMP0, NODE:NEXT_RES->val
+ | cmn NEXT_TMP0, #-LJ_TNIL
+ | add NEXT_IDX, NEXT_IDX, #1
+ | bne <4
+ | // Skip holes in hash part.
+ | add NODE:NEXT_RES, NODE:NEXT_RES, #sizeof(Node)
+ | b <6
+ |
+ |9: // End of iteration. Set the key to nil (not the value).
+ | movn NEXT_TMP0, #0
+ | str NEXT_TMP0, NEXT_RES_KEY
+ | mov NEXT_RES, NEXT_RES_PTR
+ | ret
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- FFI helper functions -----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// Handler for callback functions.
+ |// Saveregs already performed. Callback slot number in [sp], g in r12.
+ |->vm_ffi_callback:
+ |.if FFI
+ |.type CTSTATE, CTState, PC
+ | saveregs
+ | ldr CTSTATE, GL:x10->ctype_state
+ | mov GL, x10
+ | add x10, sp, # CFRAME_SPACE
+ | str w9, CTSTATE->cb.slot
+ | stp x0, x1, CTSTATE->cb.gpr[0]
+ | stp d0, d1, CTSTATE->cb.fpr[0]
+ | stp x2, x3, CTSTATE->cb.gpr[2]
+ | stp d2, d3, CTSTATE->cb.fpr[2]
+ | stp x4, x5, CTSTATE->cb.gpr[4]
+ | stp d4, d5, CTSTATE->cb.fpr[4]
+ | stp x6, x7, CTSTATE->cb.gpr[6]
+ | stp d6, d7, CTSTATE->cb.fpr[6]
+ | str x10, CTSTATE->cb.stack
+ | mov CARG1, CTSTATE
+ | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok.
+ | mov CARG2, sp
+ | bl extern lj_ccallback_enter // (CTState *cts, void *cf)
+ | // Returns lua_State *.
+ | ldp BASE, RC, L:CRET1->base
+ | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48
+ | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16
+ | movn TISNIL, #0
+ | mov L, CRET1
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+ | sub RC, RC, BASE
+ | st_vmstate ST_INTERP
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | ins_callt
+ |.endif
+ |
+ |->cont_ffi_callback: // Return from FFI callback.
+ |.if FFI
+ | ldr CTSTATE, GL->ctype_state
+ | stp BASE, CARG4, L->base
+ | str L, CTSTATE->L
+ | mov CARG1, CTSTATE
+ | mov CARG2, RA
+ | bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
+ | ldp x0, x1, CTSTATE->cb.gpr[0]
+ | ldp d0, d1, CTSTATE->cb.fpr[0]
+ | b ->vm_leave_unw
+ |.endif
+ |
+ |->vm_ffi_call: // Call C function via FFI.
+ | // Caveat: needs special frame unwinding, see below.
+ |.if FFI
+ | .type CCSTATE, CCallState, x19
+ | stp x20, CCSTATE, [sp, #-32]!
+ | stp fp, lr, [sp, #16]
+ | add fp, sp, #16
+ | mov CCSTATE, x0
+ | ldr TMP0w, CCSTATE:x0->spadj
+ | ldrb TMP1w, CCSTATE->nsp
+ | add TMP2, CCSTATE, #offsetof(CCallState, stack)
+ | subs TMP1, TMP1, #1
+ | ldr TMP3, CCSTATE->func
+ | sub sp, sp, TMP0
+ | bmi >2
+ |1: // Copy stack slots
+ | ldr TMP0, [TMP2, TMP1, lsl #3]
+ | str TMP0, [sp, TMP1, lsl #3]
+ | subs TMP1, TMP1, #1
+ | bpl <1
+ |2:
+ | ldp x0, x1, CCSTATE->gpr[0]
+ | ldp d0, d1, CCSTATE->fpr[0]
+ | ldp x2, x3, CCSTATE->gpr[2]
+ | ldp d2, d3, CCSTATE->fpr[2]
+ | ldp x4, x5, CCSTATE->gpr[4]
+ | ldp d4, d5, CCSTATE->fpr[4]
+ | ldp x6, x7, CCSTATE->gpr[6]
+ | ldp d6, d7, CCSTATE->fpr[6]
+ | ldr x8, CCSTATE->retp
+ | blr TMP3
+ | sub sp, fp, #16
+ | stp x0, x1, CCSTATE->gpr[0]
+ | stp d0, d1, CCSTATE->fpr[0]
+ | stp d2, d3, CCSTATE->fpr[2]
+ | ldp fp, lr, [sp, #16]
+ | ldp x20, CCSTATE, [sp], #32
+ | ret
+ |.endif
+ |// Note: vm_ffi_call must be the last function in this object file!
+ |
+ |//-----------------------------------------------------------------------
+}
+
+/* Generate the code for a single instruction. */
+static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+{
+ int vk = 0;
+ |=>defop:
+
+ switch (op) {
+
+ /* -- Comparison ops ---------------------------------------------------- */
+
+ /* Remember: all ops branch for a true comparison, fall through otherwise. */
+
+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+ | // RA = src1, RC = src2, JMP with RC = target
+ | ldr CARG1, [BASE, RA, lsl #3]
+ | ldrh RBw, [PC, # OFS_RD]
+ | ldr CARG2, [BASE, RC, lsl #3]
+ | add PC, PC, #4
+ | add RB, PC, RB, lsl #2
+ | sub RB, RB, #0x20000
+ | checkint CARG1, >3
+ | checkint CARG2, >4
+ | cmp CARG1w, CARG2w
+ if (op == BC_ISLT) {
+ | csel PC, RB, PC, lt
+ } else if (op == BC_ISGE) {
+ | csel PC, RB, PC, ge
+ } else if (op == BC_ISLE) {
+ | csel PC, RB, PC, le
+ } else {
+ | csel PC, RB, PC, gt
+ }
+ |1:
+ | ins_next
+ |
+ |3: // RA not int.
+ | ldr FARG1, [BASE, RA, lsl #3]
+ | blo ->vmeta_comp
+ | ldr FARG2, [BASE, RC, lsl #3]
+ | cmp TISNUMhi, CARG2, lsr #32
+ | bhi >5
+ | bne ->vmeta_comp
+ | // RA number, RC int.
+ | scvtf FARG2, CARG2w
+ | b >5
+ |
+ |4: // RA int, RC not int
+ | ldr FARG2, [BASE, RC, lsl #3]
+ | blo ->vmeta_comp
+ | // RA int, RC number.
+ | scvtf FARG1, CARG1w
+ |
+ |5: // RA number, RC number
+ | fcmp FARG1, FARG2
+ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
+ if (op == BC_ISLT) {
+ | csel PC, RB, PC, lo
+ } else if (op == BC_ISGE) {
+ | csel PC, RB, PC, hs
+ } else if (op == BC_ISLE) {
+ | csel PC, RB, PC, ls
+ } else {
+ | csel PC, RB, PC, hi
+ }
+ | b <1
+ break;
+
+ case BC_ISEQV: case BC_ISNEV:
+ vk = op == BC_ISEQV;
+ | // RA = src1, RC = src2, JMP with RC = target
+ | ldr CARG1, [BASE, RA, lsl #3]
+ | add RC, BASE, RC, lsl #3
+ | ldrh RBw, [PC, # OFS_RD]
+ | ldr CARG3, [RC]
+ | add PC, PC, #4
+ | add RB, PC, RB, lsl #2
+ | sub RB, RB, #0x20000
+ | asr ITYPE, CARG3, #47
+ | cmn ITYPE, #-LJ_TISNUM
+ if (vk) {
+ | bls ->BC_ISEQN_Z
+ } else {
+ | bls ->BC_ISNEN_Z
+ }
+ | // RC is not a number.
+ | asr TMP0, CARG1, #47
+ |.if FFI
+ | // Check if RC or RA is a cdata.
+ | cmn ITYPE, #-LJ_TCDATA
+ | ccmn TMP0, #-LJ_TCDATA, #4, ne
+ | beq ->vmeta_equal_cd
+ |.endif
+ | cmp CARG1, CARG3
+ | bne >2
+ | // Tag and value are equal.
+ if (vk) {
+ |->BC_ISEQV_Z:
+ | mov PC, RB // Perform branch.
+ }
+ |1:
+ | ins_next
+ |
+ |2: // Check if the tags are the same and it's a table or userdata.
+ | cmp ITYPE, TMP0
+ | ccmn ITYPE, #-LJ_TISTABUD, #2, eq
+ if (vk) {
+ | bhi <1
+ } else {
+ | bhi ->BC_ISEQV_Z // Reuse code from opposite instruction.
+ }
+ | // Different tables or userdatas. Need to check __eq metamethod.
+ | // Field metatable must be at same offset for GCtab and GCudata!
+ | and TAB:CARG2, CARG1, #LJ_GCVMASK
+ | ldr TAB:TMP2, TAB:CARG2->metatable
+ if (vk) {
+ | cbz TAB:TMP2, <1 // No metatable?
+ | ldrb TMP1w, TAB:TMP2->nomm
+ | mov CARG4, #0 // ne = 0
+ | tbnz TMP1w, #MM_eq, <1 // 'no __eq' flag set: done.
+ } else {
+ | cbz TAB:TMP2, ->BC_ISEQV_Z // No metatable?
+ | ldrb TMP1w, TAB:TMP2->nomm
+ | mov CARG4, #1 // ne = 1.
+ | tbnz TMP1w, #MM_eq, ->BC_ISEQV_Z // 'no __eq' flag set: done.
+ }
+ | b ->vmeta_equal
+ break;
+
+ case BC_ISEQS: case BC_ISNES:
+ vk = op == BC_ISEQS;
+ | // RA = src, RC = str_const (~), JMP with RC = target
+ | ldr CARG1, [BASE, RA, lsl #3]
+ | mvn RC, RC
+ | ldrh RBw, [PC, # OFS_RD]
+ | ldr CARG2, [KBASE, RC, lsl #3]
+ | add PC, PC, #4
+ | movn TMP0, #~LJ_TSTR
+ |.if FFI
+ | asr ITYPE, CARG1, #47
+ |.endif
+ | add RB, PC, RB, lsl #2
+ | add CARG2, CARG2, TMP0, lsl #47
+ | sub RB, RB, #0x20000
+ |.if FFI
+ | cmn ITYPE, #-LJ_TCDATA
+ | beq ->vmeta_equal_cd
+ |.endif
+ | cmp CARG1, CARG2
+ if (vk) {
+ | csel PC, RB, PC, eq
+ } else {
+ | csel PC, RB, PC, ne
+ }
+ | ins_next
+ break;
+
+ case BC_ISEQN: case BC_ISNEN:
+ vk = op == BC_ISEQN;
+ | // RA = src, RC = num_const (~), JMP with RC = target
+ | ldr CARG1, [BASE, RA, lsl #3]
+ | add RC, KBASE, RC, lsl #3
+ | ldrh RBw, [PC, # OFS_RD]
+ | ldr CARG3, [RC]
+ | add PC, PC, #4
+ | add RB, PC, RB, lsl #2
+ | sub RB, RB, #0x20000
+ if (vk) {
+ |->BC_ISEQN_Z:
+ } else {
+ |->BC_ISNEN_Z:
+ }
+ | checkint CARG1, >4
+ | checkint CARG3, >6
+ | cmp CARG1w, CARG3w
+ |1:
+ if (vk) {
+ | csel PC, RB, PC, eq
+ |2:
+ } else {
+ |2:
+ | csel PC, RB, PC, ne
+ }
+ |3:
+ | ins_next
+ |
+ |4: // RA not int.
+ |.if FFI
+ | blo >7
+ |.else
+ | blo <2
+ |.endif
+ | ldr FARG1, [BASE, RA, lsl #3]
+ | ldr FARG2, [RC]
+ | cmp TISNUMhi, CARG3, lsr #32
+ | bne >5
+ | // RA number, RC int.
+ | scvtf FARG2, CARG3w
+ |5:
+ | // RA number, RC number.
+ | fcmp FARG1, FARG2
+ | b <1
+ |
+ |6: // RA int, RC number
+ | ldr FARG2, [RC]
+ | scvtf FARG1, CARG1w
+ | fcmp FARG1, FARG2
+ | b <1
+ |
+ |.if FFI
+ |7:
+ | asr ITYPE, CARG1, #47
+ | cmn ITYPE, #-LJ_TCDATA
+ | bne <2
+ | b ->vmeta_equal_cd
+ |.endif
+ break;
+
+ case BC_ISEQP: case BC_ISNEP:
+ vk = op == BC_ISEQP;
+ | // RA = src, RC = primitive_type (~), JMP with RC = target
+ | ldr TMP0, [BASE, RA, lsl #3]
+ | ldrh RBw, [PC, # OFS_RD]
+ | add PC, PC, #4
+ | add RC, RC, #1
+ | add RB, PC, RB, lsl #2
+ |.if FFI
+ | asr ITYPE, TMP0, #47
+ | cmn ITYPE, #-LJ_TCDATA
+ | beq ->vmeta_equal_cd
+ | cmn RC, ITYPE
+ |.else
+ | cmn RC, TMP0, asr #47
+ |.endif
+ | sub RB, RB, #0x20000
+ if (vk) {
+ | csel PC, RB, PC, eq
+ } else {
+ | csel PC, RB, PC, ne
+ }
+ | ins_next
+ break;
+
+ /* -- Unary test and copy ops ------------------------------------------- */
+
+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
+ | // RA = dst or unused, RC = src, JMP with RC = target
+ | ldrh RBw, [PC, # OFS_RD]
+ | ldr TMP0, [BASE, RC, lsl #3]
+ | add PC, PC, #4
+ | mov_false TMP1
+ | add RB, PC, RB, lsl #2
+ | cmp TMP0, TMP1
+ | sub RB, RB, #0x20000
+ if (op == BC_ISTC || op == BC_IST) {
+ if (op == BC_ISTC) {
+ | csel RA, RA, RC, lo
+ }
+ | csel PC, RB, PC, lo
+ } else {
+ if (op == BC_ISFC) {
+ | csel RA, RA, RC, hs
+ }
+ | csel PC, RB, PC, hs
+ }
+ if (op == BC_ISTC || op == BC_ISFC) {
+ | str TMP0, [BASE, RA, lsl #3]
+ }
+ | ins_next
+ break;
+
+ case BC_ISTYPE:
+ | // RA = src, RC = -type
+ | ldr TMP0, [BASE, RA, lsl #3]
+ | cmn RC, TMP0, asr #47
+ | bne ->vmeta_istype
+ | ins_next
+ break;
+ case BC_ISNUM:
+ | // RA = src, RC = -(TISNUM-1)
+ | ldr TMP0, [BASE, RA]
+ | checknum TMP0, ->vmeta_istype
+ | ins_next
+ break;
+
+ /* -- Unary ops --------------------------------------------------------- */
+
+ case BC_MOV:
+ | // RA = dst, RC = src
+ | ldr TMP0, [BASE, RC, lsl #3]
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+ case BC_NOT:
+ | // RA = dst, RC = src
+ | ldr TMP0, [BASE, RC, lsl #3]
+ | mov_false TMP1
+ | mov_true TMP2
+ | cmp TMP0, TMP1
+ | csel TMP0, TMP1, TMP2, lo
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+ case BC_UNM:
+ | // RA = dst, RC = src
+ | ldr TMP0, [BASE, RC, lsl #3]
+ | asr ITYPE, TMP0, #47
+ | cmn ITYPE, #-LJ_TISNUM
+ | bhi ->vmeta_unm
+ | eor TMP0, TMP0, #U64x(80000000,00000000)
+ | bne >5
+ | negs TMP0w, TMP0w
+ | movz CARG3, #0x41e0, lsl #48 // 2^31.
+ | add TMP0, TMP0, TISNUM
+ | csel TMP0, TMP0, CARG3, vc
+ |5:
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+ case BC_LEN:
+ | // RA = dst, RC = src
+ | ldr CARG1, [BASE, RC, lsl #3]
+ | asr ITYPE, CARG1, #47
+ | cmn ITYPE, #-LJ_TSTR
+ | and CARG1, CARG1, #LJ_GCVMASK
+ | bne >2
+ | ldr CARG1w, STR:CARG1->len
+ |1:
+ | add CARG1, CARG1, TISNUM
+ | str CARG1, [BASE, RA, lsl #3]
+ | ins_next
+ |
+ |2:
+ | cmn ITYPE, #-LJ_TTAB
+ | bne ->vmeta_len
+#if LJ_52
+ | ldr TAB:CARG2, TAB:CARG1->metatable
+ | cbnz TAB:CARG2, >9
+ |3:
+#endif
+ |->BC_LEN_Z:
+ | bl extern lj_tab_len // (GCtab *t)
+ | // Returns uint32_t (but less than 2^31).
+ | b <1
+ |
+#if LJ_52
+ |9:
+ | ldrb TMP1w, TAB:CARG2->nomm
+ | tbnz TMP1w, #MM_len, <3 // 'no __len' flag set: done.
+ | b ->vmeta_len
+#endif
+ break;
+
+ /* -- Binary ops -------------------------------------------------------- */
+
+ |.macro ins_arithcheck_int, target
+ | checkint CARG1, target
+ | checkint CARG2, target
+ |.endmacro
+ |
+ |.macro ins_arithcheck_num, target
+ | checknum CARG1, target
+ | checknum CARG2, target
+ |.endmacro
+ |
+ |.macro ins_arithcheck_nzdiv, target
+ | cbz CARG2w, target
+ |.endmacro
+ |
+ |.macro ins_arithhead
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+ ||if (vk == 1) {
+ | and RC, RC, #255
+ | decode_RB RB, INS
+ ||} else {
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ ||}
+ |.endmacro
+ |
+ |.macro ins_arithload, reg1, reg2
+ | // RA = dst, RB = src1, RC = src2 | num_const
+ ||switch (vk) {
+ ||case 0:
+ | ldr reg1, [BASE, RB, lsl #3]
+ | ldr reg2, [KBASE, RC, lsl #3]
+ || break;
+ ||case 1:
+ | ldr reg1, [KBASE, RC, lsl #3]
+ | ldr reg2, [BASE, RB, lsl #3]
+ || break;
+ ||default:
+ | ldr reg1, [BASE, RB, lsl #3]
+ | ldr reg2, [BASE, RC, lsl #3]
+ || break;
+ ||}
+ |.endmacro
+ |
+ |.macro ins_arithfallback, ins
+ ||switch (vk) {
+ ||case 0:
+ | ins ->vmeta_arith_vn
+ || break;
+ ||case 1:
+ | ins ->vmeta_arith_nv
+ || break;
+ ||default:
+ | ins ->vmeta_arith_vv
+ || break;
+ ||}
+ |.endmacro
+ |
+ |.macro ins_arithmod, res, reg1, reg2
+ | fdiv d2, reg1, reg2
+ | frintm d2, d2
+ | fmsub res, d2, reg2, reg1
+ |.endmacro
+ |
+ |.macro ins_arithdn, intins, fpins
+ | ins_arithhead
+ | ins_arithload CARG1, CARG2
+ | ins_arithcheck_int >5
+ |.if "intins" == "smull"
+ | smull CARG1, CARG1w, CARG2w
+ | cmp CARG1, CARG1, sxtw
+ | mov CARG1w, CARG1w
+ | ins_arithfallback bne
+ |.elif "intins" == "ins_arithmodi"
+ | ins_arithfallback ins_arithcheck_nzdiv
+ | bl ->vm_modi
+ |.else
+ | intins CARG1w, CARG1w, CARG2w
+ | ins_arithfallback bvs
+ |.endif
+ | add CARG1, CARG1, TISNUM
+ | str CARG1, [BASE, RA, lsl #3]
+ |4:
+ | ins_next
+ |
+ |5: // FP variant.
+ | ins_arithload FARG1, FARG2
+ | ins_arithfallback ins_arithcheck_num
+ | fpins FARG1, FARG1, FARG2
+ | str FARG1, [BASE, RA, lsl #3]
+ | b <4
+ |.endmacro
+ |
+ |.macro ins_arithfp, fpins
+ | ins_arithhead
+ | ins_arithload CARG1, CARG2
+ | ins_arithload FARG1, FARG2
+ | ins_arithfallback ins_arithcheck_num
+ |.if "fpins" == "fpow"
+ | bl extern pow
+ |.else
+ | fpins FARG1, FARG1, FARG2
+ |.endif
+ | str FARG1, [BASE, RA, lsl #3]
+ | ins_next
+ |.endmacro
+
+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
+ | ins_arithdn adds, fadd
+ break;
+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
+ | ins_arithdn subs, fsub
+ break;
+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
+ | ins_arithdn smull, fmul
+ break;
+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
+ | ins_arithfp fdiv
+ break;
+ case BC_MODVN: case BC_MODNV: case BC_MODVV:
+ | ins_arithdn ins_arithmodi, ins_arithmod
+ break;
+ case BC_POW:
+ | // NYI: (partial) integer arithmetic.
+ | ins_arithfp fpow
+ break;
+
+ case BC_CAT:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = dst, RB = src_start, RC = src_end
+ | str BASE, L->base
+ | sub CARG3, RC, RB
+ | add CARG2, BASE, RC, lsl #3
+ |->BC_CAT_Z:
+ | // RA = dst, CARG2 = top-1, CARG3 = left
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
+ | // Returns NULL (finished) or TValue * (metamethod).
+ | ldrb RBw, [PC, #-4+OFS_RB]
+ | ldr BASE, L->base
+ | cbnz CRET1, ->vmeta_binop
+ | ldr TMP0, [BASE, RB, lsl #3]
+ | str TMP0, [BASE, RA, lsl #3] // Copy result to RA.
+ | ins_next
+ break;
+
+ /* -- Constant ops ------------------------------------------------------ */
+
+ case BC_KSTR:
+ | // RA = dst, RC = str_const (~)
+ | mvn RC, RC
+ | ldr TMP0, [KBASE, RC, lsl #3]
+ | movn TMP1, #~LJ_TSTR
+ | add TMP0, TMP0, TMP1, lsl #47
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+ case BC_KCDATA:
+ |.if FFI
+ | // RA = dst, RC = cdata_const (~)
+ | mvn RC, RC
+ | ldr TMP0, [KBASE, RC, lsl #3]
+ | movn TMP1, #~LJ_TCDATA
+ | add TMP0, TMP0, TMP1, lsl #47
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ |.endif
+ break;
+ case BC_KSHORT:
+ | // RA = dst, RC = int16_literal
+ | sxth RCw, RCw
+ | add TMP0, RC, TISNUM
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+ case BC_KNUM:
+ | // RA = dst, RC = num_const
+ | ldr TMP0, [KBASE, RC, lsl #3]
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+ case BC_KPRI:
+ | // RA = dst, RC = primitive_type (~)
+ | mvn TMP0, RC, lsl #47
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+ case BC_KNIL:
+ | // RA = base, RC = end
+ | add RA, BASE, RA, lsl #3
+ | add RC, BASE, RC, lsl #3
+ | str TISNIL, [RA], #8
+ |1:
+ | cmp RA, RC
+ | str TISNIL, [RA], #8
+ | blt <1
+ | ins_next_
+ break;
+
+ /* -- Upvalue and function ops ------------------------------------------ */
+
+ case BC_UGET:
+ | // RA = dst, RC = uvnum
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+ | add RC, RC, #offsetof(GCfuncL, uvptr)/8
+ | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
+ | ldr UPVAL:CARG2, [LFUNC:CARG2, RC, lsl #3]
+ | ldr CARG2, UPVAL:CARG2->v
+ | ldr TMP0, [CARG2]
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+ case BC_USETV:
+ | // RA = uvnum, RC = src
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+ | add RA, RA, #offsetof(GCfuncL, uvptr)/8
+ | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
+ | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3]
+ | ldr CARG3, [BASE, RC, lsl #3]
+ | ldr CARG2, UPVAL:CARG1->v
+ | ldrb TMP2w, UPVAL:CARG1->marked
+ | ldrb TMP0w, UPVAL:CARG1->closed
+ | asr ITYPE, CARG3, #47
+ | str CARG3, [CARG2]
+ | add ITYPE, ITYPE, #-LJ_TISGCV
+ | tst TMP2w, #LJ_GC_BLACK // isblack(uv)
+ | ccmp TMP0w, #0, #4, ne // && uv->closed
+ | ccmn ITYPE, #-(LJ_TNUMX - LJ_TISGCV), #0, ne // && tvisgcv(v)
+ | bhi >2
+ |1:
+ | ins_next
+ |
+ |2: // Check if new value is white.
+ | and GCOBJ:CARG3, CARG3, #LJ_GCVMASK
+ | ldrb TMP1w, GCOBJ:CARG3->gch.marked
+ | tst TMP1w, #LJ_GC_WHITES // iswhite(str)
+ | beq <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | mov CARG1, GL
+ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ | b <1
+ break;
+ case BC_USETS:
+ | // RA = uvnum, RC = str_const (~)
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+ | add RA, RA, #offsetof(GCfuncL, uvptr)/8
+ | mvn RC, RC
+ | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
+ | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3]
+ | ldr STR:CARG3, [KBASE, RC, lsl #3]
+ | movn TMP0, #~LJ_TSTR
+ | ldr CARG2, UPVAL:CARG1->v
+ | ldrb TMP2w, UPVAL:CARG1->marked
+ | add TMP0, STR:CARG3, TMP0, lsl #47
+ | ldrb TMP1w, STR:CARG3->marked
+ | str TMP0, [CARG2]
+ | tbnz TMP2w, #2, >2 // isblack(uv)
+ |1:
+ | ins_next
+ |
+ |2: // Check if string is white and ensure upvalue is closed.
+ | ldrb TMP0w, UPVAL:CARG1->closed
+ | tst TMP1w, #LJ_GC_WHITES // iswhite(str)
+ | ccmp TMP0w, #0, #4, ne
+ | beq <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | mov CARG1, GL
+ | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ | b <1
+ break;
+ case BC_USETN:
+ | // RA = uvnum, RC = num_const
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+ | add RA, RA, #offsetof(GCfuncL, uvptr)/8
+ | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
+ | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3]
+ | ldr TMP0, [KBASE, RC, lsl #3]
+ | ldr CARG2, UPVAL:CARG2->v
+ | str TMP0, [CARG2]
+ | ins_next
+ break;
+ case BC_USETP:
+ | // RA = uvnum, RC = primitive_type (~)
+ | ldr LFUNC:CARG2, [BASE, FRAME_FUNC]
+ | add RA, RA, #offsetof(GCfuncL, uvptr)/8
+ | and LFUNC:CARG2, CARG2, #LJ_GCVMASK
+ | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3]
+ | mvn TMP0, RC, lsl #47
+ | ldr CARG2, UPVAL:CARG2->v
+ | str TMP0, [CARG2]
+ | ins_next
+ break;
+
+ case BC_UCLO:
+ | // RA = level, RC = target
+ | ldr CARG3, L->openupval
+ | add RC, PC, RC, lsl #2
+ | str BASE, L->base
+ | sub PC, RC, #0x20000
+ | cbz CARG3, >1
+ | mov CARG1, L
+ | add CARG2, BASE, RA, lsl #3
+ | bl extern lj_func_closeuv // (lua_State *L, TValue *level)
+ | ldr BASE, L->base
+ |1:
+ | ins_next
+ break;
+
+ case BC_FNEW:
+ | // RA = dst, RC = proto_const (~) (holding function prototype)
+ | mvn RC, RC
+ | str BASE, L->base
+ | ldr LFUNC:CARG3, [BASE, FRAME_FUNC]
+ | str PC, SAVE_PC
+ | ldr CARG2, [KBASE, RC, lsl #3]
+ | mov CARG1, L
+ | and LFUNC:CARG3, CARG3, #LJ_GCVMASK
+ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
+ | bl extern lj_func_newL_gc
+ | // Returns GCfuncL *.
+ | ldr BASE, L->base
+ | movn TMP0, #~LJ_TFUNC
+ | add CRET1, CRET1, TMP0, lsl #47
+ | str CRET1, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+
+ /* -- Table ops --------------------------------------------------------- */
+
+ case BC_TNEW:
+ case BC_TDUP:
+ | // RA = dst, RC = (hbits|asize) | tab_const (~)
+ | ldp CARG3, CARG4, GL->gc.total // Assumes threshold follows total.
+ | str BASE, L->base
+ | str PC, SAVE_PC
+ | mov CARG1, L
+ | cmp CARG3, CARG4
+ | bhs >5
+ |1:
+ if (op == BC_TNEW) {
+ | and CARG2, RC, #0x7ff
+ | lsr CARG3, RC, #11
+ | cmp CARG2, #0x7ff
+ | mov TMP0, #0x801
+ | csel CARG2, CARG2, TMP0, ne
+ | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
+ | // Returns GCtab *.
+ } else {
+ | mvn RC, RC
+ | ldr CARG2, [KBASE, RC, lsl #3]
+ | bl extern lj_tab_dup // (lua_State *L, Table *kt)
+ | // Returns GCtab *.
+ }
+ | ldr BASE, L->base
+ | movk CRET1, #(LJ_TTAB>>1)&0xffff, lsl #48
+ | str CRET1, [BASE, RA, lsl #3]
+ | ins_next
+ |
+ |5:
+ | bl extern lj_gc_step_fixtop // (lua_State *L)
+ | mov CARG1, L
+ | b <1
+ break;
+
+ case BC_GGET:
+ | // RA = dst, RC = str_const (~)
+ case BC_GSET:
+ | // RA = src, RC = str_const (~)
+ | ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
+ | mvn RC, RC
+ | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
+ | ldr TAB:CARG2, LFUNC:CARG1->env
+ | ldr STR:RC, [KBASE, RC, lsl #3]
+ if (op == BC_GGET) {
+ | b ->BC_TGETS_Z
+ } else {
+ | b ->BC_TSETS_Z
+ }
+ break;
+
+ case BC_TGETV:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = dst, RB = table, RC = key
+ | ldr CARG2, [BASE, RB, lsl #3]
+ | ldr TMP1, [BASE, RC, lsl #3]
+ | checktab CARG2, ->vmeta_tgetv
+ | checkint TMP1, >9 // Integer key?
+ | ldr CARG3, TAB:CARG2->array
+ | ldr CARG1w, TAB:CARG2->asize
+ | add CARG3, CARG3, TMP1, uxtw #3
+ | cmp TMP1w, CARG1w // In array part?
+ | bhs ->vmeta_tgetv
+ | ldr TMP0, [CARG3]
+ | cmp TMP0, TISNIL
+ | beq >5
+ |1:
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ |
+ |5: // Check for __index if table value is nil.
+ | ldr TAB:CARG1, TAB:CARG2->metatable
+ | cbz TAB:CARG1, <1 // No metatable: done.
+ | ldrb TMP1w, TAB:CARG1->nomm
+ | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done.
+ | b ->vmeta_tgetv
+ |
+ |9:
+ | asr ITYPE, TMP1, #47
+ | cmn ITYPE, #-LJ_TSTR // String key?
+ | bne ->vmeta_tgetv
+ | and STR:RC, TMP1, #LJ_GCVMASK
+ | b ->BC_TGETS_Z
+ break;
+ case BC_TGETS:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = dst, RB = table, RC = str_const (~)
+ | ldr CARG2, [BASE, RB, lsl #3]
+ | mvn RC, RC
+ | ldr STR:RC, [KBASE, RC, lsl #3]
+ | checktab CARG2, ->vmeta_tgets1
+ |->BC_TGETS_Z:
+ | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = dst
+ | ldr TMP1w, TAB:CARG2->hmask
+ | ldr TMP2w, STR:RC->sid
+ | ldr NODE:CARG3, TAB:CARG2->node
+ | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask
+ | add TMP1, TMP1, TMP1, lsl #1
+ | movn CARG4, #~LJ_TSTR
+ | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
+ | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for.
+ |1:
+ | ldp TMP0, CARG1, NODE:CARG3->val
+ | ldr NODE:CARG3, NODE:CARG3->next
+ | cmp CARG1, CARG4
+ | bne >4
+ | cmp TMP0, TISNIL
+ | beq >5
+ |3:
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ |
+ |4: // Follow hash chain.
+ | cbnz NODE:CARG3, <1
+ | // End of hash chain: key not found, nil result.
+ | mov TMP0, TISNIL
+ |
+ |5: // Check for __index if table value is nil.
+ | ldr TAB:CARG1, TAB:CARG2->metatable
+ | cbz TAB:CARG1, <3 // No metatable: done.
+ | ldrb TMP1w, TAB:CARG1->nomm
+ | tbnz TMP1w, #MM_index, <3 // 'no __index' flag set: done.
+ | b ->vmeta_tgets
+ break;
+ case BC_TGETB:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = dst, RB = table, RC = index
+ | ldr CARG2, [BASE, RB, lsl #3]
+ | checktab CARG2, ->vmeta_tgetb
+ | ldr CARG3, TAB:CARG2->array
+ | ldr CARG1w, TAB:CARG2->asize
+ | add CARG3, CARG3, RC, lsl #3
+ | cmp RCw, CARG1w // In array part?
+ | bhs ->vmeta_tgetb
+ | ldr TMP0, [CARG3]
+ | cmp TMP0, TISNIL
+ | beq >5
+ |1:
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ |
+ |5: // Check for __index if table value is nil.
+ | ldr TAB:CARG1, TAB:CARG2->metatable
+ | cbz TAB:CARG1, <1 // No metatable: done.
+ | ldrb TMP1w, TAB:CARG1->nomm
+ | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done.
+ | b ->vmeta_tgetb
+ break;
+ case BC_TGETR:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = dst, RB = table, RC = key
+ | ldr CARG1, [BASE, RB, lsl #3]
+ | ldr TMP1, [BASE, RC, lsl #3]
+ | and TAB:CARG1, CARG1, #LJ_GCVMASK
+ | ldr CARG3, TAB:CARG1->array
+ | ldr TMP2w, TAB:CARG1->asize
+ | add CARG3, CARG3, TMP1w, uxtw #3
+ | cmp TMP1w, TMP2w // In array part?
+ | bhs ->vmeta_tgetr
+ | ldr TMP0, [CARG3]
+ |->BC_TGETR_Z:
+ | str TMP0, [BASE, RA, lsl #3]
+ | ins_next
+ break;
+
+ case BC_TSETV:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = src, RB = table, RC = key
+ | ldr CARG2, [BASE, RB, lsl #3]
+ | ldr TMP1, [BASE, RC, lsl #3]
+ | checktab CARG2, ->vmeta_tsetv
+ | checkint TMP1, >9 // Integer key?
+ | ldr CARG3, TAB:CARG2->array
+ | ldr CARG1w, TAB:CARG2->asize
+ | add CARG3, CARG3, TMP1, uxtw #3
+ | cmp TMP1w, CARG1w // In array part?
+ | bhs ->vmeta_tsetv
+ | ldr TMP1, [CARG3]
+ | ldr TMP0, [BASE, RA, lsl #3]
+ | ldrb TMP2w, TAB:CARG2->marked
+ | cmp TMP1, TISNIL // Previous value is nil?
+ | beq >5
+ |1:
+ | str TMP0, [CARG3]
+ | tbnz TMP2w, #2, >7 // isblack(table)
+ |2:
+ | ins_next
+ |
+ |5: // Check for __newindex if previous value is nil.
+ | ldr TAB:CARG1, TAB:CARG2->metatable
+ | cbz TAB:CARG1, <1 // No metatable: done.
+ | ldrb TMP1w, TAB:CARG1->nomm
+ | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done.
+ | b ->vmeta_tsetv
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:CARG2, TMP2w, TMP1
+ | b <2
+ |
+ |9:
+ | asr ITYPE, TMP1, #47
+ | cmn ITYPE, #-LJ_TSTR // String key?
+ | bne ->vmeta_tsetv
+ | and STR:RC, TMP1, #LJ_GCVMASK
+ | b ->BC_TSETS_Z
+ break;
+ case BC_TSETS:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = dst, RB = table, RC = str_const (~)
+ | ldr CARG2, [BASE, RB, lsl #3]
+ | mvn RC, RC
+ | ldr STR:RC, [KBASE, RC, lsl #3]
+ | checktab CARG2, ->vmeta_tsets1
+ |->BC_TSETS_Z:
+ | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = src
+ | ldr TMP1w, TAB:CARG2->hmask
+ | ldr TMP2w, STR:RC->sid
+ | ldr NODE:CARG3, TAB:CARG2->node
+ | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask
+ | add TMP1, TMP1, TMP1, lsl #1
+ | movn CARG4, #~LJ_TSTR
+ | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8
+ | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for.
+ | strb wzr, TAB:CARG2->nomm // Clear metamethod cache.
+ |1:
+ | ldp TMP1, CARG1, NODE:CARG3->val
+ | ldr NODE:TMP3, NODE:CARG3->next
+ | ldrb TMP2w, TAB:CARG2->marked
+ | cmp CARG1, CARG4
+ | bne >5
+ | ldr TMP0, [BASE, RA, lsl #3]
+ | cmp TMP1, TISNIL // Previous value is nil?
+ | beq >4
+ |2:
+ | str TMP0, NODE:CARG3->val
+ | tbnz TMP2w, #2, >7 // isblack(table)
+ |3:
+ | ins_next
+ |
+ |4: // Check for __newindex if previous value is nil.
+ | ldr TAB:CARG1, TAB:CARG2->metatable
+ | cbz TAB:CARG1, <2 // No metatable: done.
+ | ldrb TMP1w, TAB:CARG1->nomm
+ | tbnz TMP1w, #MM_newindex, <2 // 'no __newindex' flag set: done.
+ | b ->vmeta_tsets
+ |
+ |5: // Follow hash chain.
+ | mov NODE:CARG3, NODE:TMP3
+ | cbnz NODE:TMP3, <1
+ | // End of hash chain: key not found, add a new one.
+ |
+ | // But check for __newindex first.
+ | ldr TAB:CARG1, TAB:CARG2->metatable
+ | cbz TAB:CARG1, >6 // No metatable: continue.
+ | ldrb TMP1w, TAB:CARG1->nomm
+ | // 'no __newindex' flag NOT set: check.
+ | tbz TMP1w, #MM_newindex, ->vmeta_tsets
+ |6:
+ | movn TMP1, #~LJ_TSTR
+ | str PC, SAVE_PC
+ | add TMP0, STR:RC, TMP1, lsl #47
+ | str BASE, L->base
+ | mov CARG1, L
+ | str TMP0, TMPD
+ | add CARG3, sp, TMPDofs
+ | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
+ | // Returns TValue *.
+ | ldr BASE, L->base
+ | ldr TMP0, [BASE, RA, lsl #3]
+ | str TMP0, [CRET1]
+ | b <3 // No 2nd write barrier needed.
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:CARG2, TMP2w, TMP1
+ | b <3
+ break;
+ case BC_TSETB:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = src, RB = table, RC = index
+ | ldr CARG2, [BASE, RB, lsl #3]
+ | checktab CARG2, ->vmeta_tsetb
+ | ldr CARG3, TAB:CARG2->array
+ | ldr CARG1w, TAB:CARG2->asize
+ | add CARG3, CARG3, RC, lsl #3
+ | cmp RCw, CARG1w // In array part?
+ | bhs ->vmeta_tsetb
+ | ldr TMP1, [CARG3]
+ | ldr TMP0, [BASE, RA, lsl #3]
+ | ldrb TMP2w, TAB:CARG2->marked
+ | cmp TMP1, TISNIL // Previous value is nil?
+ | beq >5
+ |1:
+ | str TMP0, [CARG3]
+ | tbnz TMP2w, #2, >7 // isblack(table)
+ |2:
+ | ins_next
+ |
+ |5: // Check for __newindex if previous value is nil.
+ | ldr TAB:CARG1, TAB:CARG2->metatable
+ | cbz TAB:CARG1, <1 // No metatable: done.
+ | ldrb TMP1w, TAB:CARG1->nomm
+ | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done.
+ | b ->vmeta_tsetb
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:CARG2, TMP2w, TMP1
+ | b <2
+ break;
+ case BC_TSETR:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = src, RB = table, RC = key
+ | ldr CARG2, [BASE, RB, lsl #3]
+ | ldr TMP1, [BASE, RC, lsl #3]
+ | and TAB:CARG2, CARG2, #LJ_GCVMASK
+ | ldr CARG1, TAB:CARG2->array
+ | ldrb TMP2w, TAB:CARG2->marked
+ | ldr CARG4w, TAB:CARG2->asize
+ | add CARG1, CARG1, TMP1, uxtw #3
+ | tbnz TMP2w, #2, >7 // isblack(table)
+ |2:
+ | cmp TMP1w, CARG4w // In array part?
+ | bhs ->vmeta_tsetr
+ |->BC_TSETR_Z:
+ | ldr TMP0, [BASE, RA, lsl #3]
+ | str TMP0, [CARG1]
+ | ins_next
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:CARG2, TMP2w, TMP0
+ | b <2
+ break;
+
+ case BC_TSETM:
+ | // RA = base (table at base-1), RC = num_const (start index)
+ | add RA, BASE, RA, lsl #3
+ |1:
+ | ldr RBw, SAVE_MULTRES
+ | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table.
+ | ldr TMP1, [KBASE, RC, lsl #3] // Integer constant is in lo-word.
+ | sub RB, RB, #8
+ | cbz RB, >4 // Nothing to copy?
+ | and TAB:CARG2, CARG2, #LJ_GCVMASK
+ | ldr CARG1w, TAB:CARG2->asize
+ | add CARG3w, TMP1w, RBw, lsr #3
+ | ldr CARG4, TAB:CARG2->array
+ | cmp CARG3, CARG1
+ | add RB, RA, RB
+ | bhi >5
+ | add TMP1, CARG4, TMP1w, uxtw #3
+ | ldrb TMP2w, TAB:CARG2->marked
+ |3: // Copy result slots to table.
+ | ldr TMP0, [RA], #8
+ | str TMP0, [TMP1], #8
+ | cmp RA, RB
+ | blo <3
+ | tbnz TMP2w, #2, >7 // isblack(table)
+ |4:
+ | ins_next
+ |
+ |5: // Need to resize array part.
+ | str BASE, L->base
+ | mov CARG1, L
+ | str PC, SAVE_PC
+ | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
+ | // Must not reallocate the stack.
+ | b <1
+ |
+ |7: // Possible table write barrier for any value. Skip valiswhite check.
+ | barrierback TAB:CARG2, TMP2w, TMP1
+ | b <4
+ break;
+
+ /* -- Calls and vararg handling ----------------------------------------- */
+
+ case BC_CALLM:
+ | // RA = base, (RB = nresults+1,) RC = extra_nargs
+ | ldr TMP0w, SAVE_MULTRES
+ | decode_RC8RD NARGS8:RC, RC
+ | add NARGS8:RC, NARGS8:RC, TMP0
+ | b ->BC_CALL_Z
+ break;
+ case BC_CALL:
+ | decode_RC8RD NARGS8:RC, RC
+ | // RA = base, (RB = nresults+1,) RC = (nargs+1)*8
+ |->BC_CALL_Z:
+ | mov RB, BASE // Save old BASE for vmeta_call.
+ | add BASE, BASE, RA, lsl #3
+ | ldr CARG3, [BASE]
+ | sub NARGS8:RC, NARGS8:RC, #8
+ | add BASE, BASE, #16
+ | checkfunc CARG3, ->vmeta_call
+ | ins_call
+ break;
+
+ case BC_CALLMT:
+ | // RA = base, (RB = 0,) RC = extra_nargs
+ | ldr TMP0w, SAVE_MULTRES
+ | add NARGS8:RC, TMP0, RC, lsl #3
+ | b ->BC_CALLT1_Z
+ break;
+ case BC_CALLT:
+ | lsl NARGS8:RC, RC, #3
+ | // RA = base, (RB = 0,) RC = (nargs+1)*8
+ |->BC_CALLT1_Z:
+ | add RA, BASE, RA, lsl #3
+ | ldr TMP1, [RA]
+ | sub NARGS8:RC, NARGS8:RC, #8
+ | add RA, RA, #16
+ | checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt
+ | ldr PC, [BASE, FRAME_PC]
+ |->BC_CALLT2_Z:
+ | mov RB, #0
+ | ldrb TMP2w, LFUNC:CARG3->ffid
+ | tst PC, #FRAME_TYPE
+ | bne >7
+ |1:
+ | str TMP1, [BASE, FRAME_FUNC] // Copy function down, but keep PC.
+ | cbz NARGS8:RC, >3
+ |2:
+ | ldr TMP0, [RA, RB]
+ | add TMP1, RB, #8
+ | cmp TMP1, NARGS8:RC
+ | str TMP0, [BASE, RB]
+ | mov RB, TMP1
+ | bne <2
+ |3:
+ | cmp TMP2, #1 // (> FF_C) Calling a fast function?
+ | bhi >5
+ |4:
+ | ins_callt
+ |
+ |5: // Tailcall to a fast function with a Lua frame below.
+ | ldrb RAw, [PC, #-4+OFS_RA]
+ | sub CARG1, BASE, RA, lsl #3
+ | ldr LFUNC:CARG1, [CARG1, #-32]
+ | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
+ | ldr CARG1, LFUNC:CARG1->pc
+ | ldr KBASE, [CARG1, #PC2PROTO(k)]
+ | b <4
+ |
+ |7: // Tailcall from a vararg function.
+ | eor PC, PC, #FRAME_VARG
+ | tst PC, #FRAME_TYPEP // Vararg frame below?
+ | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below.
+ | bne <1
+ | sub BASE, BASE, PC
+ | ldr PC, [BASE, FRAME_PC]
+ | tst PC, #FRAME_TYPE
+ | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below.
+ | b <1
+ break;
+
+ case BC_ITERC:
+ | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
+ | add RA, BASE, RA, lsl #3
+ | ldr CARG3, [RA, #-24]
+ | mov RB, BASE // Save old BASE for vmeta_call.
+ | ldp CARG1, CARG2, [RA, #-16]
+ | add BASE, RA, #16
+ | mov NARGS8:RC, #16 // Iterators get 2 arguments.
+ | str CARG3, [RA] // Copy callable.
+ | stp CARG1, CARG2, [RA, #16] // Copy state and control var.
+ | checkfunc CARG3, ->vmeta_call
+ | ins_call
+ break;
+
+ case BC_ITERN:
+ |.if JIT
+ | hotloop
+ |.endif
+ |->vm_IITERN:
+ | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
+ | add RA, BASE, RA, lsl #3
+ | ldr TAB:RB, [RA, #-16]
+ | ldrh TMP3w, [PC, # OFS_RD]
+ | ldr CARG1w, [RA, #-8+LO] // Get index from control var.
+ | add PC, PC, #4
+ | add TMP3, PC, TMP3, lsl #2
+ | and TAB:RB, RB, #LJ_GCVMASK
+ | sub TMP3, TMP3, #0x20000
+ | ldr TMP1w, TAB:RB->asize
+ | ldr CARG2, TAB:RB->array
+ |1: // Traverse array part.
+ | subs RC, CARG1, TMP1
+ | add CARG3, CARG2, CARG1, lsl #3
+ | bhs >5 // Index points after array part?
+ | ldr TMP0, [CARG3]
+ | cmp TMP0, TISNIL
+ | cinc CARG1, CARG1, eq // Skip holes in array part.
+ | beq <1
+ | add CARG1, CARG1, TISNUM
+ | stp CARG1, TMP0, [RA]
+ | add CARG1, CARG1, #1
+ |3:
+ | str CARG1w, [RA, #-8+LO] // Update control var.
+ | mov PC, TMP3
+ |4:
+ | ins_next
+ |
+ |5: // Traverse hash part.
+ | ldr TMP2w, TAB:RB->hmask
+ | ldr NODE:RB, TAB:RB->node
+ |6:
+ | add CARG1, RC, RC, lsl #1
+ | cmp RC, TMP2 // End of iteration? Branch to ITERN+1.
+ | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8
+ | bhi <4
+ | ldp TMP0, CARG1, NODE:CARG3->val
+ | cmp TMP0, TISNIL
+ | add RC, RC, #1
+ | beq <6 // Skip holes in hash part.
+ | stp CARG1, TMP0, [RA]
+ | add CARG1, RC, TMP1
+ | b <3
+ break;
+
+ case BC_ISNEXT:
+ | // RA = base, RC = target (points to ITERN)
+ | add RA, BASE, RA, lsl #3
+ | ldr CFUNC:CARG1, [RA, #-24]
+ | add RC, PC, RC, lsl #2
+ | ldp TAB:CARG3, CARG4, [RA, #-16]
+ | sub RC, RC, #0x20000
+ | checkfunc CFUNC:CARG1, >5
+ | asr TMP0, TAB:CARG3, #47
+ | ldrb TMP1w, CFUNC:CARG1->ffid
+ | cmn TMP0, #-LJ_TTAB
+ | ccmp CARG4, TISNIL, #0, eq
+ | ccmp TMP1w, #FF_next_N, #0, eq
+ | bne >5
+ | mov TMP0w, #0xfffe7fff // LJ_KEYINDEX
+ | lsl TMP0, TMP0, #32
+ | str TMP0, [RA, #-8] // Initialize control var.
+ |1:
+ | mov PC, RC
+ | ins_next
+ |
+ |5: // Despecialize bytecode if any of the checks fail.
+ |.if JIT
+ | ldrb TMP2w, [RC, # OFS_OP]
+ |.endif
+ | mov TMP0, #BC_JMP
+ | mov TMP1, #BC_ITERC
+ | strb TMP0w, [PC, #-4+OFS_OP]
+ |.if JIT
+ | cmp TMP2w, #BC_ITERN
+ | bne >6
+ |.endif
+ | strb TMP1w, [RC, # OFS_OP]
+ | b <1
+ |.if JIT
+ |6: // Unpatch JLOOP.
+ | ldr RA, [GL, #GL_J(trace)]
+ | ldrh TMP2w, [RC, # OFS_RD]
+ | ldr TRACE:RA, [RA, TMP2, lsl #3]
+ | ldr TMP2w, TRACE:RA->startins
+ | bfxil TMP2w, TMP1w, #0, #8
+ | str TMP2w, [RC]
+ | b <1
+ |.endif
+ break;
+
+ case BC_VARG:
+ | decode_RB RB, INS
+ | and RC, RC, #255
+ | // RA = base, RB = (nresults+1), RC = numparams
+ | ldr TMP1, [BASE, FRAME_PC]
+ | add RC, BASE, RC, lsl #3
+ | add RA, BASE, RA, lsl #3
+ | add RC, RC, #FRAME_VARG
+ | add TMP2, RA, RB, lsl #3
+ | sub RC, RC, TMP1 // RC = vbase
+ | // Note: RC may now be even _above_ BASE if nargs was < numparams.
+ | sub TMP3, BASE, #16 // TMP3 = vtop
+ | cbz RB, >5
+ | sub TMP2, TMP2, #16
+ |1: // Copy vararg slots to destination slots.
+ | cmp RC, TMP3
+ | ldr TMP0, [RC], #8
+ | csel TMP0, TMP0, TISNIL, lo
+ | cmp RA, TMP2
+ | str TMP0, [RA], #8
+ | blo <1
+ |2:
+ | ins_next
+ |
+ |5: // Copy all varargs.
+ | ldr TMP0, L->maxstack
+ | subs TMP2, TMP3, RC
+ | csel RB, xzr, TMP2, le // MULTRES = (max(vtop-vbase,0)+1)*8
+ | add RB, RB, #8
+ | add TMP1, RA, TMP2
+ | str RBw, SAVE_MULTRES
+ | ble <2 // Nothing to copy.
+ | cmp TMP1, TMP0
+ | bhi >7
+ |6:
+ | ldr TMP0, [RC], #8
+ | str TMP0, [RA], #8
+ | cmp RC, TMP3
+ | blo <6
+ | b <2
+ |
+ |7: // Grow stack for varargs.
+ | lsr CARG2, TMP2, #3
+ | stp BASE, RA, L->base
+ | mov CARG1, L
+ | sub RC, RC, BASE // Need delta, because BASE may change.
+ | str PC, SAVE_PC
+ | bl extern lj_state_growstack // (lua_State *L, int n)
+ | ldp BASE, RA, L->base
+ | add RC, BASE, RC
+ | sub TMP3, BASE, #16
+ | b <6
+ break;
+
+ /* -- Returns ----------------------------------------------------------- */
+
+ case BC_RETM:
+ | // RA = results, RC = extra results
+ | ldr TMP0w, SAVE_MULTRES
+ | ldr PC, [BASE, FRAME_PC]
+ | add RA, BASE, RA, lsl #3
+ | add RC, TMP0, RC, lsl #3
+ | b ->BC_RETM_Z
+ break;
+
+ case BC_RET:
+ | // RA = results, RC = nresults+1
+ | ldr PC, [BASE, FRAME_PC]
+ | lsl RC, RC, #3
+ | add RA, BASE, RA, lsl #3
+ |->BC_RETM_Z:
+ | str RCw, SAVE_MULTRES
+ |1:
+ | ands CARG1, PC, #FRAME_TYPE
+ | eor CARG2, PC, #FRAME_VARG
+ | bne ->BC_RETV2_Z
+ |
+ |->BC_RET_Z:
+ | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return
+ | ldr INSw, [PC, #-4]
+ | subs TMP1, RC, #8
+ | sub CARG3, BASE, #16
+ | beq >3
+ |2:
+ | ldr TMP0, [RA], #8
+ | add BASE, BASE, #8
+ | sub TMP1, TMP1, #8
+ | str TMP0, [BASE, #-24]
+ | cbnz TMP1, <2
+ |3:
+ | decode_RA RA, INS
+ | sub CARG4, CARG3, RA, lsl #3
+ | decode_RB RB, INS
+ | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC]
+ |5:
+ | cmp RC, RB, lsl #3 // More results expected?
+ | blo >6
+ | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
+ | mov BASE, CARG4
+ | ldr CARG2, LFUNC:CARG1->pc
+ | ldr KBASE, [CARG2, #PC2PROTO(k)]
+ | ins_next
+ |
+ |6: // Fill up results with nil.
+ | add BASE, BASE, #8
+ | add RC, RC, #8
+ | str TISNIL, [BASE, #-24]
+ | b <5
+ |
+ |->BC_RETV1_Z: // Non-standard return case.
+ | add RA, BASE, RA, lsl #3
+ |->BC_RETV2_Z:
+ | tst CARG2, #FRAME_TYPEP
+ | bne ->vm_return
+ | // Return from vararg function: relocate BASE down.
+ | sub BASE, BASE, CARG2
+ | ldr PC, [BASE, FRAME_PC]
+ | b <1
+ break;
+
+ case BC_RET0: case BC_RET1:
+ | // RA = results, RC = nresults+1
+ | ldr PC, [BASE, FRAME_PC]
+ | lsl RC, RC, #3
+ | str RCw, SAVE_MULTRES
+ | ands CARG1, PC, #FRAME_TYPE
+ | eor CARG2, PC, #FRAME_VARG
+ | bne ->BC_RETV1_Z
+ | ldr INSw, [PC, #-4]
+ if (op == BC_RET1) {
+ | ldr TMP0, [BASE, RA, lsl #3]
+ }
+ | sub CARG4, BASE, #16
+ | decode_RA RA, INS
+ | sub BASE, CARG4, RA, lsl #3
+ if (op == BC_RET1) {
+ | str TMP0, [CARG4], #8
+ }
+ | decode_RB RB, INS
+ | ldr LFUNC:CARG1, [BASE, FRAME_FUNC]
+ |5:
+ | cmp RC, RB, lsl #3
+ | blo >6
+ | and LFUNC:CARG1, CARG1, #LJ_GCVMASK
+ | ldr CARG2, LFUNC:CARG1->pc
+ | ldr KBASE, [CARG2, #PC2PROTO(k)]
+ | ins_next
+ |
+ |6: // Fill up results with nil.
+ | add RC, RC, #8
+ | str TISNIL, [CARG4], #8
+ | b <5
+ break;
+
+ /* -- Loops and branches ------------------------------------------------ */
+
+ |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4]
+ |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12]
+ |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20]
+ |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28]
+
+ case BC_FORL:
+ |.if JIT
+ | hotloop
+ |.endif
+ | // Fall through. Assumes BC_IFORL follows.
+ break;
+
+ case BC_JFORI:
+ case BC_JFORL:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_FORI:
+ case BC_IFORL:
+ | // RA = base, RC = target (after end of loop or start of loop)
+ vk = (op == BC_IFORL || op == BC_JFORL);
+ | add RA, BASE, RA, lsl #3
+ | ldp CARG1, CARG2, FOR_IDX // CARG1 = IDX, CARG2 = STOP
+ | ldr CARG3, FOR_STEP // CARG3 = STEP
+ if (op != BC_JFORL) {
+ | add RC, PC, RC, lsl #2
+ | sub RC, RC, #0x20000
+ }
+ | checkint CARG1, >5
+ if (!vk) {
+ | checkint CARG2, ->vmeta_for
+ | checkint CARG3, ->vmeta_for
+ | tbnz CARG3w, #31, >4
+ | cmp CARG1w, CARG2w
+ } else {
+ | adds CARG1w, CARG1w, CARG3w
+ | bvs >2
+ | add TMP0, CARG1, TISNUM
+ | tbnz CARG3w, #31, >4
+ | cmp CARG1w, CARG2w
+ }
+ |1:
+ if (op == BC_FORI) {
+ | csel PC, RC, PC, gt
+ } else if (op == BC_JFORI) {
+ | mov PC, RC
+ | ldrh RCw, [RC, #-4+OFS_RD]
+ } else if (op == BC_IFORL) {
+ | csel PC, RC, PC, le
+ }
+ if (vk) {
+ | str TMP0, FOR_IDX
+ | str TMP0, FOR_EXT
+ } else {
+ | str CARG1, FOR_EXT
+ }
+ if (op == BC_JFORI || op == BC_JFORL) {
+ | ble =>BC_JLOOP
+ }
+ |2:
+ | ins_next
+ |
+ |4: // Invert check for negative step.
+ | cmp CARG2w, CARG1w
+ | b <1
+ |
+ |5: // FP loop.
+ | ldp d0, d1, FOR_IDX
+ | blo ->vmeta_for
+ if (!vk) {
+ | checknum CARG2, ->vmeta_for
+ | checknum CARG3, ->vmeta_for
+ | str d0, FOR_EXT
+ } else {
+ | ldr d2, FOR_STEP
+ | fadd d0, d0, d2
+ }
+ | tbnz CARG3, #63, >7
+ | fcmp d0, d1
+ |6:
+ if (vk) {
+ | str d0, FOR_IDX
+ | str d0, FOR_EXT
+ }
+ if (op == BC_FORI) {
+ | csel PC, RC, PC, hi
+ } else if (op == BC_JFORI) {
+ | ldrh RCw, [RC, #-4+OFS_RD]
+ | bls =>BC_JLOOP
+ } else if (op == BC_IFORL) {
+ | csel PC, RC, PC, ls
+ } else {
+ | bls =>BC_JLOOP
+ }
+ | b <2
+ |
+ |7: // Invert check for negative step.
+ | fcmp d1, d0
+ | b <6
+ break;
+
+ case BC_ITERL:
+ |.if JIT
+ | hotloop
+ |.endif
+ | // Fall through. Assumes BC_IITERL follows.
+ break;
+
+ case BC_JITERL:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_IITERL:
+ | // RA = base, RC = target
+ | ldr CARG1, [BASE, RA, lsl #3]
+ | add TMP1, BASE, RA, lsl #3
+ | cmp CARG1, TISNIL
+ | beq >1 // Stop if iterator returned nil.
+ if (op == BC_JITERL) {
+ | str CARG1, [TMP1, #-8]
+ | b =>BC_JLOOP
+ } else {
+ | add TMP0, PC, RC, lsl #2 // Otherwise save control var + branch.
+ | sub PC, TMP0, #0x20000
+ | str CARG1, [TMP1, #-8]
+ }
+ |1:
+ | ins_next
+ break;
+
+ case BC_LOOP:
+ | // RA = base, RC = target (loop extent)
+ | // Note: RA/RC is only used by trace recorder to determine scope/extent
+ | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
+ |.if JIT
+ | hotloop
+ |.endif
+ | // Fall through. Assumes BC_ILOOP follows.
+ break;
+
+ case BC_ILOOP:
+ | // RA = base, RC = target (loop extent)
+ | ins_next
+ break;
+
+ case BC_JLOOP:
+ |.if JIT
+ | // RA = base (ignored), RC = traceno
+ | ldr CARG1, [GL, #GL_J(trace)]
+ | mov CARG2w, #0 // Traces on ARM64 don't store the trace #, so use 0.
+ | ldr TRACE:RC, [CARG1, RC, lsl #3]
+ | st_vmstate CARG2w
+ | ldr RA, TRACE:RC->mcode
+ | str BASE, GL->jit_base
+ | str L, GL->tmpbuf.L
+ | sub sp, sp, #16 // See SPS_FIXED. Avoids sp adjust in every root trace.
+ | br RA
+ |.endif
+ break;
+
+ case BC_JMP:
+ | // RA = base (only used by trace recorder), RC = target
+ | add RC, PC, RC, lsl #2
+ | sub PC, RC, #0x20000
+ | ins_next
+ break;
+
+ /* -- Function headers -------------------------------------------------- */
+
+ case BC_FUNCF:
+ |.if JIT
+ | hotcall
+ |.endif
+ case BC_FUNCV: /* NYI: compiled vararg functions. */
+ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
+ break;
+
+ case BC_JFUNCF:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_IFUNCF:
+ | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
+ | ldr CARG1, L->maxstack
+ | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)]
+ | ldr KBASE, [PC, #-4+PC2PROTO(k)]
+ | cmp RA, CARG1
+ | bhi ->vm_growstack_l
+ |2:
+ | cmp NARGS8:RC, TMP1, lsl #3 // Check for missing parameters.
+ | blo >3
+ if (op == BC_JFUNCF) {
+ | decode_RD RC, INS
+ | b =>BC_JLOOP
+ } else {
+ | ins_next
+ }
+ |
+ |3: // Clear missing parameters.
+ | str TISNIL, [BASE, NARGS8:RC]
+ | add NARGS8:RC, NARGS8:RC, #8
+ | b <2
+ break;
+
+ case BC_JFUNCV:
+#if !LJ_HASJIT
+ break;
+#endif
+ | NYI // NYI: compiled vararg functions
+ break; /* NYI: compiled vararg functions. */
+
+ case BC_IFUNCV:
+ | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8
+ | ldr CARG1, L->maxstack
+ | movn TMP0, #~LJ_TFUNC
+ | add TMP2, BASE, RC
+ | add LFUNC:CARG3, CARG3, TMP0, lsl #47
+ | add RA, RA, RC
+ | add TMP0, RC, #16+FRAME_VARG
+ | str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC.
+ | ldr KBASE, [PC, #-4+PC2PROTO(k)]
+ | cmp RA, CARG1
+ | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG.
+ | bhs ->vm_growstack_l
+ | sub RC, TMP2, #16
+ | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)]
+ | mov RA, BASE
+ | mov BASE, TMP2
+ | cbz TMP1, >2
+ |1:
+ | cmp RA, RC // Less args than parameters?
+ | bhs >3
+ | ldr TMP0, [RA]
+ | sub TMP1, TMP1, #1
+ | str TISNIL, [RA], #8 // Clear old fixarg slot (help the GC).
+ | str TMP0, [TMP2], #8
+ | cbnz TMP1, <1
+ |2:
+ | ins_next
+ |
+ |3:
+ | sub TMP1, TMP1, #1
+ | str TISNIL, [TMP2], #8
+ | cbz TMP1, <2
+ | b <3
+ break;
+
+ case BC_FUNCC:
+ case BC_FUNCCW:
+ | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8
+ if (op == BC_FUNCC) {
+ | ldr CARG4, CFUNC:CARG3->f
+ } else {
+ | ldr CARG4, GL->wrapf
+ }
+ | add CARG2, RA, NARGS8:RC
+ | ldr CARG1, L->maxstack
+ | add RC, BASE, NARGS8:RC
+ | cmp CARG2, CARG1
+ | stp BASE, RC, L->base
+ if (op == BC_FUNCCW) {
+ | ldr CARG2, CFUNC:CARG3->f
+ }
+ | mv_vmstate TMP0w, C
+ | mov CARG1, L
+ | bhi ->vm_growstack_c // Need to grow stack.
+ | st_vmstate TMP0w
+ | blr CARG4 // (lua_State *L [, lua_CFunction f])
+ | // Returns nresults.
+ | ldp BASE, TMP1, L->base
+ | str L, GL->cur_L
+ | sbfiz RC, CRET1, #3, #32
+ | st_vmstate ST_INTERP
+ | ldr PC, [BASE, FRAME_PC]
+ | sub RA, TMP1, RC // RA = L->top - nresults*8
+ | b ->vm_returnc
+ break;
+
+ /* ---------------------------------------------------------------------- */
+
+ default:
+ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
+ exit(2);
+ break;
+ }
+}
+
+static int build_backend(BuildCtx *ctx)
+{
+ int op;
+
+ dasm_growpc(Dst, BC__MAX);
+
+ build_subroutines(ctx);
+
+ |.code_op
+ for (op = 0; op < BC__MAX; op++)
+ build_ins(ctx, (BCOp)op, op);
+
+ return BC__MAX;
+}
+
+/* Emit pseudo frame-info for all assembler functions. */
+static void emit_asm_debug(BuildCtx *ctx)
+{
+ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
+ int i;
+ switch (ctx->mode) {
+ case BUILD_elfasm:
+ fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n");
+ fprintf(ctx->fp,
+ ".Lframe0:\n"
+ "\t.long .LECIE0-.LSCIE0\n"
+ ".LSCIE0:\n"
+ "\t.long 0xffffffff\n"
+ "\t.byte 0x1\n"
+ "\t.string \"\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -8\n"
+ "\t.byte 30\n" /* Return address is in lr. */
+ "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
+ "\t.align 3\n"
+ ".LECIE0:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE0:\n"
+ "\t.long .LEFDE0-.LASFDE0\n"
+ ".LASFDE0:\n"
+ "\t.long .Lframe0\n"
+ "\t.quad .Lbegin\n"
+ "\t.quad %d\n"
+ "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
+ "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */
+ fcofs);
+ for (i = 19; i <= 28; i++) /* offset x19-x28 */
+ fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
+ for (i = 8; i <= 15; i++) /* offset d8-d15 */
+ fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
+ 64+i, i+(3+(28-19+1)-8));
+ fprintf(ctx->fp,
+ "\t.align 3\n"
+ ".LEFDE0:\n\n");
+#if LJ_HASFFI
+ fprintf(ctx->fp,
+ ".LSFDE1:\n"
+ "\t.long .LEFDE1-.LASFDE1\n"
+ ".LASFDE1:\n"
+ "\t.long .Lframe0\n"
+ "\t.quad lj_vm_ffi_call\n"
+ "\t.quad %d\n"
+ "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
+ "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */
+ "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */
+ "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */
+ "\t.align 3\n"
+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+#endif
+#if !LJ_NO_UNWIND
+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n");
+ fprintf(ctx->fp,
+ ".Lframe1:\n"
+ "\t.long .LECIE1-.LSCIE1\n"
+ ".LSCIE1:\n"
+ "\t.long 0\n"
+ "\t.byte 0x1\n"
+ "\t.string \"zPR\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -8\n"
+ "\t.byte 30\n" /* Return address is in lr. */
+ "\t.uleb128 6\n" /* augmentation length */
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.long lj_err_unwind_dwarf-.\n"
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
+ "\t.align 3\n"
+ ".LECIE1:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE2:\n"
+ "\t.long .LEFDE2-.LASFDE2\n"
+ ".LASFDE2:\n"
+ "\t.long .LASFDE2-.Lframe1\n"
+ "\t.long .Lbegin-.\n"
+ "\t.long %d\n"
+ "\t.uleb128 0\n" /* augmentation length */
+ "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
+ "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */
+ fcofs);
+ for (i = 19; i <= 28; i++) /* offset x19-x28 */
+ fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
+ for (i = 8; i <= 15; i++) /* offset d8-d15 */
+ fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
+ 64+i, i+(3+(28-19+1)-8));
+ fprintf(ctx->fp,
+ "\t.align 3\n"
+ ".LEFDE2:\n\n");
+#if LJ_HASFFI
+ fprintf(ctx->fp,
+ ".Lframe2:\n"
+ "\t.long .LECIE2-.LSCIE2\n"
+ ".LSCIE2:\n"
+ "\t.long 0\n"
+ "\t.byte 0x1\n"
+ "\t.string \"zR\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -8\n"
+ "\t.byte 30\n" /* Return address is in lr. */
+ "\t.uleb128 1\n" /* augmentation length */
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
+ "\t.align 3\n"
+ ".LECIE2:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE3:\n"
+ "\t.long .LEFDE3-.LASFDE3\n"
+ ".LASFDE3:\n"
+ "\t.long .LASFDE3-.Lframe2\n"
+ "\t.long lj_vm_ffi_call-.\n"
+ "\t.long %d\n"
+ "\t.uleb128 0\n" /* augmentation length */
+ "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
+ "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */
+ "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */
+ "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */
+ "\t.align 3\n"
+ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
+#endif
+#endif
+ break;
+#if !LJ_NO_UNWIND
+ case BUILD_machasm: {
+#if LJ_HASFFI
+ int fcsize = 0;
+#endif
+ int j;
+ fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
+ fprintf(ctx->fp,
+ "EH_frame1:\n"
+ "\t.set L$set$x,LECIEX-LSCIEX\n"
+ "\t.long L$set$x\n"
+ "LSCIEX:\n"
+ "\t.long 0\n"
+ "\t.byte 0x1\n"
+ "\t.ascii \"zPR\\0\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -8\n"
+ "\t.byte 30\n" /* Return address is in lr. */
+ "\t.uleb128 6\n" /* augmentation length */
+ "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
+ "\t.long _lj_err_unwind_dwarf@GOT-.\n"
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
+ "\t.align 3\n"
+ "LECIEX:\n\n");
+ for (j = 0; j < ctx->nsym; j++) {
+ const char *name = ctx->sym[j].name;
+ int32_t size = ctx->sym[j+1].ofs - ctx->sym[j].ofs;
+ if (size == 0) continue;
+#if LJ_HASFFI
+ if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
+#endif
+ fprintf(ctx->fp,
+ "LSFDE%d:\n"
+ "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
+ "\t.long L$set$%d\n"
+ "LASFDE%d:\n"
+ "\t.long LASFDE%d-EH_frame1\n"
+ "\t.long %s-.\n"
+ "\t.long %d\n"
+ "\t.uleb128 0\n" /* augmentation length */
+ "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
+ "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */
+ j, j, j, j, j, j, j, name, size);
+ for (i = 19; i <= 28; i++) /* offset x19-x28 */
+ fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19));
+ for (i = 8; i <= 15; i++) /* offset d8-d15 */
+ fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n",
+ 64+i, i+(3+(28-19+1)-8));
+ fprintf(ctx->fp,
+ "\t.align 3\n"
+ "LEFDE%d:\n\n", j);
+ }
+#if LJ_HASFFI
+ if (fcsize) {
+ fprintf(ctx->fp,
+ "EH_frame2:\n"
+ "\t.set L$set$y,LECIEY-LSCIEY\n"
+ "\t.long L$set$y\n"
+ "LSCIEY:\n"
+ "\t.long 0\n"
+ "\t.byte 0x1\n"
+ "\t.ascii \"zR\\0\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -8\n"
+ "\t.byte 30\n" /* Return address is in lr. */
+ "\t.uleb128 1\n" /* augmentation length */
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */
+ "\t.align 3\n"
+ "LECIEY:\n\n");
+ fprintf(ctx->fp,
+ "LSFDEY:\n"
+ "\t.set L$set$yy,LEFDEY-LASFDEY\n"
+ "\t.long L$set$yy\n"
+ "LASFDEY:\n"
+ "\t.long LASFDEY-EH_frame2\n"
+ "\t.long _lj_vm_ffi_call-.\n"
+ "\t.long %d\n"
+ "\t.uleb128 0\n" /* augmentation length */
+ "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */
+ "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */
+ "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */
+ "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */
+ "\t.align 3\n"
+ "LEFDEY:\n\n", fcsize);
+ }
+#endif
+ fprintf(ctx->fp, ".subsections_via_symbols\n");
+ }
+ break;
+#endif
+ default:
+ break;
+ }
+}
+
diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc
index 866b8e3d..34645bf1 100644
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -1,6 +1,9 @@
|// Low-level VM code for MIPS CPUs.
|// Bytecode interpreter, fast functions and helper functions.
|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+|//
+|// MIPS soft-float support contributed by Djordje Kovacevic and
+|// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc.
|
|.arch mips
|.section code_op, code_sub
@@ -18,6 +21,12 @@
|// Fixed register assignments for the interpreter.
|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra
|
+|.macro .FPU, a, b
+|.if FPU
+| a, b
+|.endif
+|.endmacro
+|
|// The following must be C callee-save (but BASE is often refetched).
|.define BASE, r16 // Base of current Lua stack frame.
|.define KBASE, r17 // Constants of current Lua function.
@@ -25,13 +34,15 @@
|.define DISPATCH, r19 // Opcode dispatch table.
|.define LREG, r20 // Register holding lua_State (also in SAVE_L).
|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8.
-|// NYI: r22 currently unused.
|
|.define JGL, r30 // On-trace: global_State + 32768.
|
|// Constants for type-comparisons, stores and conversions. C callee-save.
+|.define TISNUM, r22
|.define TISNIL, r30
+|.if FPU
|.define TOBIT, f30 // 2^52 + 2^51.
+|.endif
|
|// The following temporaries are not saved across C calls, except for RA.
|.define RA, r23 // Callee-save.
@@ -46,7 +57,7 @@
|.define TMP2, r14
|.define TMP3, r15
|
-|// Calling conventions.
+|// MIPS o32 calling convention.
|.define CFUNCADDR, r25
|.define CARG1, r4
|.define CARG2, r5
@@ -56,13 +67,33 @@
|.define CRET1, r2
|.define CRET2, r3
|
+|.if ENDIAN_LE
+|.define SFRETLO, CRET1
+|.define SFRETHI, CRET2
+|.define SFARG1LO, CARG1
+|.define SFARG1HI, CARG2
+|.define SFARG2LO, CARG3
+|.define SFARG2HI, CARG4
+|.else
+|.define SFRETLO, CRET2
+|.define SFRETHI, CRET1
+|.define SFARG1LO, CARG2
+|.define SFARG1HI, CARG1
+|.define SFARG2LO, CARG4
+|.define SFARG2HI, CARG3
+|.endif
+|
+|.if FPU
|.define FARG1, f12
|.define FARG2, f14
|
|.define FRET1, f0
|.define FRET2, f2
+|.endif
|
|// Stack layout while in interpreter. Must match with lj_frame.h.
+|.if FPU // MIPS32 hard-float.
+|
|.define CFRAME_SPACE, 112 // Delta for sp.
|
|.define SAVE_ERRF, 124(sp) // 32 bit C frame info.
@@ -72,6 +103,20 @@
|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
|.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves.
|.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves.
+|
+|.else // MIPS32 soft-float
+|
+|.define CFRAME_SPACE, 64 // Delta for sp.
+|
+|.define SAVE_ERRF, 76(sp) // 32 bit C frame info.
+|.define SAVE_NRES, 72(sp)
+|.define SAVE_CFRAME, 68(sp)
+|.define SAVE_L, 64(sp)
+|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter.
+|.define SAVE_GPR_, 24 // .. 24+10*4: 32 bit GPR saves.
+|
+|.endif
+|
|.define SAVE_PC, 20(sp)
|.define ARG5, 16(sp)
|.define CSAVE_4, 12(sp)
@@ -83,43 +128,45 @@
|.define ARG5_OFS, 16
|.define SAVE_MULTRES, ARG5
|
+|//-----------------------------------------------------------------------
+|
|.macro saveregs
| addiu sp, sp, -CFRAME_SPACE
| sw ra, SAVE_GPR_+9*4(sp)
| sw r30, SAVE_GPR_+8*4(sp)
-| sdc1 f30, SAVE_FPR_+5*8(sp)
+| .FPU sdc1 f30, SAVE_FPR_+5*8(sp)
| sw r23, SAVE_GPR_+7*4(sp)
| sw r22, SAVE_GPR_+6*4(sp)
-| sdc1 f28, SAVE_FPR_+4*8(sp)
+| .FPU sdc1 f28, SAVE_FPR_+4*8(sp)
| sw r21, SAVE_GPR_+5*4(sp)
| sw r20, SAVE_GPR_+4*4(sp)
-| sdc1 f26, SAVE_FPR_+3*8(sp)
+| .FPU sdc1 f26, SAVE_FPR_+3*8(sp)
| sw r19, SAVE_GPR_+3*4(sp)
| sw r18, SAVE_GPR_+2*4(sp)
-| sdc1 f24, SAVE_FPR_+2*8(sp)
+| .FPU sdc1 f24, SAVE_FPR_+2*8(sp)
| sw r17, SAVE_GPR_+1*4(sp)
| sw r16, SAVE_GPR_+0*4(sp)
-| sdc1 f22, SAVE_FPR_+1*8(sp)
-| sdc1 f20, SAVE_FPR_+0*8(sp)
+| .FPU sdc1 f22, SAVE_FPR_+1*8(sp)
+| .FPU sdc1 f20, SAVE_FPR_+0*8(sp)
|.endmacro
|
|.macro restoreregs_ret
| lw ra, SAVE_GPR_+9*4(sp)
| lw r30, SAVE_GPR_+8*4(sp)
-| ldc1 f30, SAVE_FPR_+5*8(sp)
+| .FPU ldc1 f30, SAVE_FPR_+5*8(sp)
| lw r23, SAVE_GPR_+7*4(sp)
| lw r22, SAVE_GPR_+6*4(sp)
-| ldc1 f28, SAVE_FPR_+4*8(sp)
+| .FPU ldc1 f28, SAVE_FPR_+4*8(sp)
| lw r21, SAVE_GPR_+5*4(sp)
| lw r20, SAVE_GPR_+4*4(sp)
-| ldc1 f26, SAVE_FPR_+3*8(sp)
+| .FPU ldc1 f26, SAVE_FPR_+3*8(sp)
| lw r19, SAVE_GPR_+3*4(sp)
| lw r18, SAVE_GPR_+2*4(sp)
-| ldc1 f24, SAVE_FPR_+2*8(sp)
+| .FPU ldc1 f24, SAVE_FPR_+2*8(sp)
| lw r17, SAVE_GPR_+1*4(sp)
| lw r16, SAVE_GPR_+0*4(sp)
-| ldc1 f22, SAVE_FPR_+1*8(sp)
-| ldc1 f20, SAVE_FPR_+0*8(sp)
+| .FPU ldc1 f22, SAVE_FPR_+1*8(sp)
+| .FPU ldc1 f20, SAVE_FPR_+0*8(sp)
| jr ra
| addiu sp, sp, CFRAME_SPACE
|.endmacro
@@ -138,11 +185,12 @@
|.type NODE, Node
|.type NARGS8, int
|.type TRACE, GCtrace
+|.type SBUF, SBuf
|
|//-----------------------------------------------------------------------
|
|// Trap for not-yet-implemented parts.
-|.macro NYI; .long 0xf0f0f0f0; .endmacro
+|.macro NYI; .long 0xec1cf0f0; .endmacro
|
|// Macros to mark delay slots.
|.macro ., a; a; .endmacro
@@ -152,13 +200,23 @@
|//-----------------------------------------------------------------------
|
|// Endian-specific defines.
-|.define FRAME_PC, LJ_ENDIAN_SELECT(-4,-8)
-|.define FRAME_FUNC, LJ_ENDIAN_SELECT(-8,-4)
-|.define HI, LJ_ENDIAN_SELECT(4,0)
-|.define LO, LJ_ENDIAN_SELECT(0,4)
-|.define OFS_RD, LJ_ENDIAN_SELECT(2,0)
-|.define OFS_RA, LJ_ENDIAN_SELECT(1,2)
-|.define OFS_OP, LJ_ENDIAN_SELECT(0,3)
+|.if ENDIAN_LE
+|.define FRAME_PC, -4
+|.define FRAME_FUNC, -8
+|.define HI, 4
+|.define LO, 0
+|.define OFS_RD, 2
+|.define OFS_RA, 1
+|.define OFS_OP, 0
+|.else
+|.define FRAME_PC, -8
+|.define FRAME_FUNC, -4
+|.define HI, 0
+|.define LO, 4
+|.define OFS_RD, 0
+|.define OFS_RA, 2
+|.define OFS_OP, 3
+|.endif
|
|// Instruction decode.
|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro
@@ -353,9 +411,11 @@ static void build_subroutines(BuildCtx *ctx)
|. sll TMP2, TMP2, 3
|1:
| addiu TMP1, TMP1, -8
- | ldc1 f0, 0(RA)
+ | lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
| addiu RA, RA, 8
- | sdc1 f0, 0(BASE)
+ | sw SFRETHI, HI(BASE)
+ | sw SFRETLO, LO(BASE)
| bnez TMP1, <1
|. addiu BASE, BASE, 8
|
@@ -424,15 +484,16 @@ static void build_subroutines(BuildCtx *ctx)
| and sp, CARG1, AT
|->vm_unwind_ff_eh: // Landing pad for external unwinder.
| lw L, SAVE_L
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
| li TISNIL, LJ_TNIL
| lw BASE, L->base
| lw DISPATCH, L->glref // Setup pointer to dispatch table.
- | mtc1 TMP3, TOBIT
+ | .FPU mtc1 TMP3, TOBIT
| li TMP1, LJ_TFALSE
| li_vmstate INTERP
| lw PC, FRAME_PC(BASE) // Fetch PC of previous frame.
- | cvt.d.s TOBIT, TOBIT
+ | .FPU cvt.d.s TOBIT, TOBIT
| addiu RA, BASE, -8 // Results start at BASE-8.
| addiu DISPATCH, DISPATCH, GG_G2DISP
| sw TMP1, HI(RA) // Prepend false to error message.
@@ -440,6 +501,10 @@ static void build_subroutines(BuildCtx *ctx)
| b ->vm_returnc
|. li RD, 16 // 2 results: false + error message.
|
+ |->vm_unwind_stub: // Jump to exit stub from unwinder.
+ | jr CARG1
+ |. move ra, CARG2
+ |
|//-----------------------------------------------------------------------
|//-- Grow stack for calls -----------------------------------------------
|//-----------------------------------------------------------------------
@@ -486,21 +551,23 @@ static void build_subroutines(BuildCtx *ctx)
| addiu DISPATCH, DISPATCH, GG_G2DISP
| sw r0, SAVE_NRES
| sw r0, SAVE_ERRF
- | sw TMP0, L->cframe
+ | sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
| sw r0, SAVE_CFRAME
| beqz TMP1, >3
- |. sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
+ |. sw TMP0, L->cframe
|
| // Resume after yield (like a return).
+ | sw L, DISPATCH_GL(cur_L)(DISPATCH)
| move RA, BASE
| lw BASE, L->base
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
| lw TMP1, L->top
| lw PC, FRAME_PC(BASE)
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| subu RD, TMP1, BASE
- | mtc1 TMP3, TOBIT
+ | .FPU mtc1 TMP3, TOBIT
| sb r0, L->status
- | cvt.d.s TOBIT, TOBIT
+ | .FPU cvt.d.s TOBIT, TOBIT
| li_vmstate INTERP
| addiu RD, RD, 8
| st_vmstate
@@ -525,25 +592,27 @@ static void build_subroutines(BuildCtx *ctx)
|
|1: // Entry point for vm_pcall above (PC = ftype).
| lw TMP1, L:CARG1->cframe
- | sw CARG3, SAVE_NRES
| move L, CARG1
- | sw CARG1, SAVE_L
- | move BASE, CARG2
- | sw sp, L->cframe // Add our C frame to cframe chain.
+ | sw CARG3, SAVE_NRES
| lw DISPATCH, L->glref // Setup pointer to dispatch table.
+ | sw CARG1, SAVE_L
+ | move BASE, CARG2
+ | addiu DISPATCH, DISPATCH, GG_G2DISP
| sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
| sw TMP1, SAVE_CFRAME
- | addiu DISPATCH, DISPATCH, GG_G2DISP
+ | sw sp, L->cframe // Add our C frame to cframe chain.
|
|3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
+ | sw L, DISPATCH_GL(cur_L)(DISPATCH)
| lw TMP2, L->base // TMP2 = old base (used in vmeta_call).
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| lw TMP1, L->top
- | mtc1 TMP3, TOBIT
+ | .FPU mtc1 TMP3, TOBIT
| addu PC, PC, BASE
| subu NARGS8:RC, TMP1, BASE
| subu PC, PC, TMP2 // PC = frame delta + frame type
- | cvt.d.s TOBIT, TOBIT
+ | .FPU cvt.d.s TOBIT, TOBIT
| li_vmstate INTERP
| li TISNIL, LJ_TNIL
| st_vmstate
@@ -566,20 +635,21 @@ static void build_subroutines(BuildCtx *ctx)
| lw TMP0, L:CARG1->stack
| sw CARG1, SAVE_L
| lw TMP1, L->top
+ | lw DISPATCH, L->glref // Setup pointer to dispatch table.
| sw CARG1, SAVE_PC // Any value outside of bytecode is ok.
| subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
| lw TMP1, L->cframe
- | sw sp, L->cframe // Add our C frame to cframe chain.
+ | addiu DISPATCH, DISPATCH, GG_G2DISP
| sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
| sw r0, SAVE_ERRF // No error function.
- | move CFUNCADDR, CARG4
+ | sw TMP1, SAVE_CFRAME
+ | sw sp, L->cframe // Add our C frame to cframe chain.
+ | sw L, DISPATCH_GL(cur_L)(DISPATCH)
| jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
- |. sw TMP1, SAVE_CFRAME
+ |. move CFUNCADDR, CARG4
| move BASE, CRET1
- | lw DISPATCH, L->glref // Setup pointer to dispatch table.
- | li PC, FRAME_CP
| bnez CRET1, <3 // Else continue with the call.
- |. addiu DISPATCH, DISPATCH, GG_G2DISP
+ |. li PC, FRAME_CP
| b ->vm_leave_cp // No base? Just remove C frame.
|. nop
|
@@ -624,7 +694,8 @@ static void build_subroutines(BuildCtx *ctx)
|->cont_cat: // RA = resultptr, RB = meta base
| lw INS, -4(PC)
| addiu CARG2, RB, -16
- | ldc1 f0, 0(RA)
+ | lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
| decode_RB8a MULTRES, INS
| decode_RA8a RA, INS
| decode_RB8b MULTRES
@@ -632,11 +703,13 @@ static void build_subroutines(BuildCtx *ctx)
| addu TMP1, BASE, MULTRES
| sw BASE, L->base
| subu CARG3, CARG2, TMP1
+ | sw SFRETHI, HI(CARG2)
| bne TMP1, CARG2, ->BC_CAT_Z
- |. sdc1 f0, 0(CARG2)
+ |. sw SFRETLO, LO(CARG2)
| addu RA, BASE, RA
+ | sw SFRETHI, HI(RA)
| b ->cont_nop
- |. sdc1 f0, 0(RA)
+ |. sw SFRETLO, LO(RA)
|
|//-- Table indexing metamethods -----------------------------------------
|
@@ -659,10 +732,9 @@ static void build_subroutines(BuildCtx *ctx)
|. sw TMP1, HI(CARG3)
|
|->vmeta_tgetb: // TMP0 = index
- | mtc1 TMP0, f0
- | cvt.d.w f0, f0
| addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
- | sdc1 f0, 0(CARG3)
+ | sw TMP0, LO(CARG3)
+ | sw TISNUM, HI(CARG3)
|
|->vmeta_tgetv:
|1:
@@ -674,9 +746,11 @@ static void build_subroutines(BuildCtx *ctx)
| // Returns TValue * (finished) or NULL (metamethod).
| beqz CRET1, >3
|. addiu TMP1, BASE, -FRAME_CONT
- | ldc1 f0, 0(CRET1)
+ | lw SFARG1HI, HI(CRET1)
+ | lw SFARG2HI, LO(CRET1)
| ins_next1
- | sdc1 f0, 0(RA)
+ | sw SFARG1HI, HI(RA)
+ | sw SFARG2HI, LO(RA)
| ins_next2
|
|3: // Call __index metamethod.
@@ -688,6 +762,17 @@ static void build_subroutines(BuildCtx *ctx)
| b ->vm_call_dispatch_f
|. li NARGS8:RC, 16 // 2 args for func(t, k).
|
+ |->vmeta_tgetr:
+ | load_got lj_tab_getinth
+ | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
+ |. nop
+ | // Returns cTValue * or NULL.
+ | beqz CRET1, ->BC_TGETR_Z
+ |. move SFARG2HI, TISNIL
+ | lw SFARG2HI, HI(CRET1)
+ | b ->BC_TGETR_Z
+ |. lw SFARG2LO, LO(CRET1)
+ |
|//-----------------------------------------------------------------------
|
|->vmeta_tsets1:
@@ -709,10 +794,9 @@ static void build_subroutines(BuildCtx *ctx)
|. sw TMP1, HI(CARG3)
|
|->vmeta_tsetb: // TMP0 = index
- | mtc1 TMP0, f0
- | cvt.d.w f0, f0
| addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
- | sdc1 f0, 0(CARG3)
+ | sw TMP0, LO(CARG3)
+ | sw TISNUM, HI(CARG3)
|
|->vmeta_tsetv:
|1:
@@ -722,11 +806,13 @@ static void build_subroutines(BuildCtx *ctx)
| call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
|. move CARG1, L
| // Returns TValue * (finished) or NULL (metamethod).
+ | lw SFARG1HI, HI(RA)
| beqz CRET1, >3
- |. ldc1 f0, 0(RA)
+ |. lw SFARG1LO, LO(RA)
| // NOBARRIER: lj_meta_tset ensures the table is not black.
| ins_next1
- | sdc1 f0, 0(CRET1)
+ | sw SFARG1HI, HI(CRET1)
+ | sw SFARG1LO, LO(CRET1)
| ins_next2
|
|3: // Call __newindex metamethod.
@@ -736,14 +822,27 @@ static void build_subroutines(BuildCtx *ctx)
| sw PC, -16+HI(BASE) // [cont|PC]
| subu PC, BASE, TMP1
| lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
- | sdc1 f0, 16(BASE) // Copy value to third argument.
+ | sw SFARG1HI, 16+HI(BASE) // Copy value to third argument.
+ | sw SFARG1LO, 16+LO(BASE)
| b ->vm_call_dispatch_f
|. li NARGS8:RC, 24 // 3 args for func(t, k, v)
|
+ |->vmeta_tsetr:
+ | load_got lj_tab_setinth
+ | sw BASE, L->base
+ | sw PC, SAVE_PC
+ | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
+ |. move CARG1, L
+ | // Returns TValue *.
+ | b ->BC_TSETR_Z
+ |. nop
+ |
|//-- Comparison metamethods ---------------------------------------------
|
|->vmeta_comp:
- | // CARG2, CARG3 are already set by BC_ISLT/BC_ISGE/BC_ISLE/BC_ISGT.
+ | // RA/RD point to o1/o2.
+ | move CARG2, RA
+ | move CARG3, RD
| load_got lj_meta_comp
| addiu PC, PC, -4
| sw BASE, L->base
@@ -769,11 +868,13 @@ static void build_subroutines(BuildCtx *ctx)
|
|->cont_ra: // RA = resultptr
| lbu TMP1, -4+OFS_RA(PC)
- | ldc1 f0, 0(RA)
+ | lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
| sll TMP1, TMP1, 3
| addu TMP1, BASE, TMP1
+ | sw SFRETHI, HI(TMP1)
| b ->cont_nop
- |. sdc1 f0, 0(TMP1)
+ |. sw SFRETLO, LO(TMP1)
|
|->cont_condt: // RA = resultptr
| lw TMP0, HI(RA)
@@ -788,8 +889,11 @@ static void build_subroutines(BuildCtx *ctx)
|. addiu TMP2, AT, -1 // Branch if result is false.
|
|->vmeta_equal:
- | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
+ | // SFARG1LO/SFARG2LO point to o1/o2. TMP0 is set to 0/1.
| load_got lj_meta_equal
+ | move CARG2, SFARG1LO
+ | move CARG3, SFARG2LO
+ | move CARG4, TMP0
| addiu PC, PC, -4
| sw BASE, L->base
| sw PC, SAVE_PC
@@ -813,17 +917,31 @@ static void build_subroutines(BuildCtx *ctx)
|. nop
|.endif
|
+ |->vmeta_istype:
+ | load_got lj_meta_istype
+ | addiu PC, PC, -4
+ | sw BASE, L->base
+ | srl CARG2, RA, 3
+ | srl CARG3, RD, 3
+ | sw PC, SAVE_PC
+ | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
+ |. move CARG1, L
+ | b ->cont_nop
+ |. nop
+ |
|//-- Arithmetic metamethods ---------------------------------------------
|
|->vmeta_unm:
- | move CARG4, CARG3
+ | move RC, RB
|
|->vmeta_arith:
| load_got lj_meta_arith
| decode_OP1 TMP0, INS
| sw BASE, L->base
- | sw PC, SAVE_PC
| move CARG2, RA
+ | sw PC, SAVE_PC
+ | move CARG3, RB
+ | move CARG4, RC
| sw TMP0, ARG5
| call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
|. move CARG1, L
@@ -931,40 +1049,52 @@ static void build_subroutines(BuildCtx *ctx)
|
|.macro .ffunc_1, name
|->ff_ .. name:
+ | lw SFARG1HI, HI(BASE)
| beqz NARGS8:RC, ->fff_fallback
- |. lw CARG3, HI(BASE)
- | lw CARG1, LO(BASE)
+ |. lw SFARG1LO, LO(BASE)
|.endmacro
|
|.macro .ffunc_2, name
|->ff_ .. name:
| sltiu AT, NARGS8:RC, 16
- | lw CARG3, HI(BASE)
+ | lw SFARG1HI, HI(BASE)
| bnez AT, ->fff_fallback
- |. lw CARG4, 8+HI(BASE)
- | lw CARG1, LO(BASE)
- | lw CARG2, 8+LO(BASE)
+ |. lw SFARG2HI, 8+HI(BASE)
+ | lw SFARG1LO, LO(BASE)
+ | lw SFARG2LO, 8+LO(BASE)
|.endmacro
|
|.macro .ffunc_n, name // Caveat: has delay slot!
|->ff_ .. name:
- | lw CARG3, HI(BASE)
+ | lw SFARG1HI, HI(BASE)
+ |.if FPU
+ | ldc1 FARG1, 0(BASE)
+ |.else
+ | lw SFARG1LO, LO(BASE)
+ |.endif
| beqz NARGS8:RC, ->fff_fallback
- |. ldc1 FARG1, 0(BASE)
- | sltiu AT, CARG3, LJ_TISNUM
+ |. sltiu AT, SFARG1HI, LJ_TISNUM
| beqz AT, ->fff_fallback
|.endmacro
|
|.macro .ffunc_nn, name // Caveat: has delay slot!
|->ff_ .. name:
| sltiu AT, NARGS8:RC, 16
- | lw CARG3, HI(BASE)
+ | lw SFARG1HI, HI(BASE)
| bnez AT, ->fff_fallback
- |. lw CARG4, 8+HI(BASE)
- | ldc1 FARG1, 0(BASE)
- | ldc1 FARG2, 8(BASE)
- | sltiu TMP0, CARG3, LJ_TISNUM
- | sltiu TMP1, CARG4, LJ_TISNUM
+ |. lw SFARG2HI, 8+HI(BASE)
+ | sltiu TMP0, SFARG1HI, LJ_TISNUM
+ |.if FPU
+ | ldc1 FARG1, 0(BASE)
+ |.else
+ | lw SFARG1LO, LO(BASE)
+ |.endif
+ | sltiu TMP1, SFARG2HI, LJ_TISNUM
+ |.if FPU
+ | ldc1 FARG2, 8(BASE)
+ |.else
+ | lw SFARG2LO, 8+LO(BASE)
+ |.endif
| and TMP0, TMP0, TMP1
| beqz TMP0, ->fff_fallback
|.endmacro
@@ -980,53 +1110,55 @@ static void build_subroutines(BuildCtx *ctx)
|//-- Base library: checks -----------------------------------------------
|
|.ffunc_1 assert
- | sltiu AT, CARG3, LJ_TISTRUECOND
+ | sltiu AT, SFARG1HI, LJ_TISTRUECOND
| beqz AT, ->fff_fallback
|. addiu RA, BASE, -8
| lw PC, FRAME_PC(BASE)
| addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
| addu TMP2, RA, NARGS8:RC
- | sw CARG3, HI(RA)
+ | sw SFARG1HI, HI(RA)
| addiu TMP1, BASE, 8
| beq BASE, TMP2, ->fff_res // Done if exactly 1 argument.
- |. sw CARG1, LO(RA)
+ |. sw SFARG1LO, LO(RA)
|1:
- | ldc1 f0, 0(TMP1)
- | sdc1 f0, -8(TMP1)
+ | lw SFRETHI, HI(TMP1)
+ | lw SFRETLO, LO(TMP1)
+ | sw SFRETHI, -8+HI(TMP1)
+ | sw SFRETLO, -8+LO(TMP1)
| bne TMP1, TMP2, <1
|. addiu TMP1, TMP1, 8
| b ->fff_res
|. nop
|
|.ffunc type
- | lw CARG3, HI(BASE)
- | li TMP1, LJ_TISNUM
+ | lw SFARG1HI, HI(BASE)
| beqz NARGS8:RC, ->fff_fallback
- |. sltiu TMP0, CARG3, LJ_TISNUM
- | movz TMP1, CARG3, TMP0
- | not TMP1, TMP1
+ |. sltiu TMP0, SFARG1HI, LJ_TISNUM
+ | movn SFARG1HI, TISNUM, TMP0
+ | not TMP1, SFARG1HI
| sll TMP1, TMP1, 3
| addu TMP1, CFUNC:RB, TMP1
- | b ->fff_resn
- |. ldc1 FRET1, CFUNC:TMP1->upvalue
+ | lw SFARG1HI, CFUNC:TMP1->upvalue[0].u32.hi
+ | b ->fff_restv
+ |. lw SFARG1LO, CFUNC:TMP1->upvalue[0].u32.lo
|
|//-- Base library: getters and setters ---------------------------------
|
|.ffunc_1 getmetatable
| li AT, LJ_TTAB
- | bne CARG3, AT, >6
+ | bne SFARG1HI, AT, >6
|. li AT, LJ_TUDATA
|1: // Field metatable must be at same offset for GCtab and GCudata!
- | lw TAB:CARG1, TAB:CARG1->metatable
+ | lw TAB:SFARG1LO, TAB:SFARG1LO->metatable
|2:
| lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
- | beqz TAB:CARG1, ->fff_restv
- |. li CARG3, LJ_TNIL
- | lw TMP0, TAB:CARG1->hmask
- | li CARG3, LJ_TTAB // Use metatable as default result.
- | lw TMP1, STR:RC->hash
- | lw NODE:TMP2, TAB:CARG1->node
- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
+ | beqz TAB:SFARG1LO, ->fff_restv
+ |. li SFARG1HI, LJ_TNIL
+ | lw TMP0, TAB:SFARG1LO->hmask
+ | li SFARG1HI, LJ_TTAB // Use metatable as default result.
+ | lw TMP1, STR:RC->sid
+ | lw NODE:TMP2, TAB:SFARG1LO->node
+ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
| sll TMP0, TMP1, 5
| sll TMP1, TMP1, 3
| subu TMP1, TMP0, TMP1
@@ -1037,7 +1169,7 @@ static void build_subroutines(BuildCtx *ctx)
| lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
| lw NODE:TMP3, NODE:TMP2->next
| bne CARG4, AT, >4
- |. lw CARG2, offsetof(Node, val)+HI(NODE:TMP2)
+ |. lw CARG3, offsetof(Node, val)+HI(NODE:TMP2)
| beq TMP0, STR:RC, >5
|. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2)
|4:
@@ -1046,36 +1178,35 @@ static void build_subroutines(BuildCtx *ctx)
| b <3
|. nop
|5:
- | beq CARG2, TISNIL, ->fff_restv // Ditto for nil value.
+ | beq CARG3, TISNIL, ->fff_restv // Ditto for nil value.
|. nop
- | move CARG3, CARG2 // Return value of mt.__metatable.
+ | move SFARG1HI, CARG3 // Return value of mt.__metatable.
| b ->fff_restv
- |. move CARG1, TMP1
+ |. move SFARG1LO, TMP1
|
|6:
- | beq CARG3, AT, <1
- |. sltiu TMP0, CARG3, LJ_TISNUM
- | li TMP1, LJ_TISNUM
- | movz TMP1, CARG3, TMP0
- | not TMP1, TMP1
+ | beq SFARG1HI, AT, <1
+ |. sltu AT, TISNUM, SFARG1HI
+ | movz SFARG1HI, TISNUM, AT
+ | not TMP1, SFARG1HI
| sll TMP1, TMP1, 2
| addu TMP1, DISPATCH, TMP1
| b <2
- |. lw TAB:CARG1, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1)
+ |. lw TAB:SFARG1LO, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1)
|
|.ffunc_2 setmetatable
| // Fast path: no mt for table yet and not clearing the mt.
| li AT, LJ_TTAB
- | bne CARG3, AT, ->fff_fallback
- |. addiu CARG4, CARG4, -LJ_TTAB
- | lw TAB:TMP1, TAB:CARG1->metatable
- | lbu TMP3, TAB:CARG1->marked
- | or AT, CARG4, TAB:TMP1
+ | bne SFARG1HI, AT, ->fff_fallback
+ |. addiu SFARG2HI, SFARG2HI, -LJ_TTAB
+ | lw TAB:TMP1, TAB:SFARG1LO->metatable
+ | lbu TMP3, TAB:SFARG1LO->marked
+ | or AT, SFARG2HI, TAB:TMP1
| bnez AT, ->fff_fallback
|. andi AT, TMP3, LJ_GC_BLACK // isblack(table)
| beqz AT, ->fff_restv
- |. sw TAB:CARG2, TAB:CARG1->metatable
- | barrierback TAB:CARG1, TMP3, TMP0, ->fff_restv
+ |. sw TAB:SFARG2LO, TAB:SFARG1LO->metatable
+ | barrierback TAB:SFARG1LO, TMP3, TMP0, ->fff_restv
|
|.ffunc rawget
| lw CARG4, HI(BASE)
@@ -1089,90 +1220,89 @@ static void build_subroutines(BuildCtx *ctx)
| call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
|. move CARG1, L
| // Returns cTValue *.
- | b ->fff_resn
- |. ldc1 FRET1, 0(CRET1)
+ | lw SFARG1HI, HI(CRET1)
+ | b ->fff_restv
+ |. lw SFARG1LO, LO(CRET1)
|
|//-- Base library: conversions ------------------------------------------
|
|.ffunc tonumber
| // Only handles the number case inline (without a base argument).
| lw CARG1, HI(BASE)
- | xori AT, NARGS8:RC, 8
- | sltiu CARG1, CARG1, LJ_TISNUM
- | movn CARG1, r0, AT
- | beqz CARG1, ->fff_fallback // Exactly one number argument.
- |. ldc1 FRET1, 0(BASE)
- | b ->fff_resn
- |. nop
+ | xori AT, NARGS8:RC, 8 // Exactly one number argument.
+ | sltu TMP0, TISNUM, CARG1
+ | or AT, AT, TMP0
+ | bnez AT, ->fff_fallback
+ |. lw SFARG1HI, HI(BASE)
+ | b ->fff_restv
+ |. lw SFARG1LO, LO(BASE)
|
|.ffunc_1 tostring
| // Only handles the string or number case inline.
| li AT, LJ_TSTR
| // A __tostring method in the string base metatable is ignored.
- | beq CARG3, AT, ->fff_restv // String key?
+ | beq SFARG1HI, AT, ->fff_restv // String key?
| // Handle numbers inline, unless a number base metatable is present.
|. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
- | sltiu TMP0, CARG3, LJ_TISNUM
- | sltiu TMP1, TMP1, 1
- | and TMP0, TMP0, TMP1
- | beqz TMP0, ->fff_fallback
+ | sltu TMP0, TISNUM, SFARG1HI
+ | or TMP0, TMP0, TMP1
+ | bnez TMP0, ->fff_fallback
|. sw BASE, L->base // Add frame since C call can throw.
| ffgccheck
|. sw PC, SAVE_PC // Redundant (but a defined value).
- | load_got lj_str_fromnum
+ | load_got lj_strfmt_number
| move CARG1, L
- | call_intern lj_str_fromnum // (lua_State *L, lua_Number *np)
+ | call_intern lj_strfmt_number // (lua_State *L, cTValue *o)
|. move CARG2, BASE
| // Returns GCstr *.
- | li CARG3, LJ_TSTR
+ | li SFARG1HI, LJ_TSTR
| b ->fff_restv
- |. move CARG1, CRET1
+ |. move SFARG1LO, CRET1
|
|//-- Base library: iterators -------------------------------------------
|
|.ffunc next
- | lw CARG1, HI(BASE)
- | lw TAB:CARG2, LO(BASE)
+ | lw CARG2, HI(BASE)
+ | lw TAB:CARG1, LO(BASE)
| beqz NARGS8:RC, ->fff_fallback
|. addu TMP2, BASE, NARGS8:RC
| li AT, LJ_TTAB
| sw TISNIL, HI(TMP2) // Set missing 2nd arg to nil.
- | bne CARG1, AT, ->fff_fallback
+ | bne CARG2, AT, ->fff_fallback
|. lw PC, FRAME_PC(BASE)
| load_got lj_tab_next
- | sw BASE, L->base // Add frame since C call can throw.
- | sw BASE, L->top // Dummy frame length is ok.
- | addiu CARG3, BASE, 8
- | sw PC, SAVE_PC
- | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
- |. move CARG1, L
- | // Returns 0 at end of traversal.
+ | addiu CARG2, BASE, 8
+ | call_intern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
+ |. addiu CARG3, BASE, -8
+ | // Returns 1=found, 0=end, -1=error.
+ | addiu RA, BASE, -8
+ | bgtz CRET1, ->fff_res // Found key/value.
+ |. li RD, (2+1)*8
| beqz CRET1, ->fff_restv // End of traversal: return nil.
- |. li CARG3, LJ_TNIL
- | ldc1 f0, 8(BASE) // Copy key and value to results.
- | addiu RA, BASE, -8
- | ldc1 f2, 16(BASE)
- | li RD, (2+1)*8
- | sdc1 f0, 0(RA)
- | b ->fff_res
- |. sdc1 f2, 8(RA)
+ |. li SFARG1HI, LJ_TNIL
+ | lw CFUNC:RB, FRAME_FUNC(BASE)
+ | b ->fff_fallback // Invalid key.
+ |. li RC, 2*8
|
|.ffunc_1 pairs
| li AT, LJ_TTAB
- | bne CARG3, AT, ->fff_fallback
+ | bne SFARG1HI, AT, ->fff_fallback
|. lw PC, FRAME_PC(BASE)
#if LJ_52
- | lw TAB:TMP2, TAB:CARG1->metatable
- | ldc1 f0, CFUNC:RB->upvalue[0]
+ | lw TAB:TMP2, TAB:SFARG1LO->metatable
+ | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
| bnez TAB:TMP2, ->fff_fallback
#else
- | ldc1 f0, CFUNC:RB->upvalue[0]
+ | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
#endif
|. addiu RA, BASE, -8
| sw TISNIL, 8+HI(BASE)
- | li RD, (3+1)*8
+ | sw TMP0, HI(RA)
+ | sw TMP1, LO(RA)
| b ->fff_res
- |. sdc1 f0, 0(RA)
+ |. li RD, (3+1)*8
|
|.ffunc ipairs_aux
| sltiu AT, NARGS8:RC, 16
@@ -1180,35 +1310,32 @@ static void build_subroutines(BuildCtx *ctx)
| lw TAB:CARG1, LO(BASE)
| lw CARG4, 8+HI(BASE)
| bnez AT, ->fff_fallback
- |. ldc1 FARG2, 8(BASE)
- | addiu CARG3, CARG3, -LJ_TTAB
- | sltiu AT, CARG4, LJ_TISNUM
- | li TMP0, 1
- | movn AT, r0, CARG3
- | mtc1 TMP0, FARG1
- | beqz AT, ->fff_fallback
+ |. addiu CARG3, CARG3, -LJ_TTAB
+ | xor CARG4, CARG4, TISNUM
+ | and AT, CARG3, CARG4
+ | bnez AT, ->fff_fallback
|. lw PC, FRAME_PC(BASE)
- | cvt.w.d FRET1, FARG2
- | cvt.d.w FARG1, FARG1
+ | lw TMP2, 8+LO(BASE)
| lw TMP0, TAB:CARG1->asize
| lw TMP1, TAB:CARG1->array
- | mfc1 TMP2, FRET1
- | addiu RA, BASE, -8
- | add.d FARG2, FARG2, FARG1
| addiu TMP2, TMP2, 1
+ | sw TISNUM, -8+HI(BASE)
| sltu AT, TMP2, TMP0
+ | sw TMP2, -8+LO(BASE)
+ | beqz AT, >2 // Not in array part?
+ |. addiu RA, BASE, -8
| sll TMP3, TMP2, 3
| addu TMP3, TMP1, TMP3
- | beqz AT, >2 // Not in array part?
- |. sdc1 FARG2, 0(RA)
- | lw TMP2, HI(TMP3)
- | ldc1 f0, 0(TMP3)
+ | lw TMP1, HI(TMP3)
+ | lw TMP2, LO(TMP3)
|1:
- | beq TMP2, TISNIL, ->fff_res // End of iteration, return 0 results.
+ | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
|. li RD, (0+1)*8
- | li RD, (2+1)*8
+ | sw TMP1, 8+HI(RA)
+ | sw TMP2, 8+LO(RA)
| b ->fff_res
- |. sdc1 f0, 8(RA)
+ |. li RD, (2+1)*8
+ |
|2: // Check for empty hash part first. Otherwise call C function.
| lw TMP0, TAB:CARG1->hmask
| load_got lj_tab_getinth
@@ -1219,27 +1346,30 @@ static void build_subroutines(BuildCtx *ctx)
| // Returns cTValue * or NULL.
| beqz CRET1, ->fff_res
|. li RD, (0+1)*8
- | lw TMP2, HI(CRET1)
+ | lw TMP1, HI(CRET1)
| b <1
- |. ldc1 f0, 0(CRET1)
+ |. lw TMP2, LO(CRET1)
|
|.ffunc_1 ipairs
| li AT, LJ_TTAB
- | bne CARG3, AT, ->fff_fallback
+ | bne SFARG1HI, AT, ->fff_fallback
|. lw PC, FRAME_PC(BASE)
#if LJ_52
- | lw TAB:TMP2, TAB:CARG1->metatable
- | ldc1 f0, CFUNC:RB->upvalue[0]
+ | lw TAB:TMP2, TAB:SFARG1LO->metatable
+ | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
| bnez TAB:TMP2, ->fff_fallback
#else
- | ldc1 f0, CFUNC:RB->upvalue[0]
+ | lw TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lw TMP1, CFUNC:RB->upvalue[0].u32.lo
#endif
|. addiu RA, BASE, -8
- | sw r0, 8+HI(BASE)
+ | sw TISNUM, 8+HI(BASE)
| sw r0, 8+LO(BASE)
- | li RD, (3+1)*8
+ | sw TMP0, HI(RA)
+ | sw TMP1, LO(RA)
| b ->fff_res
- |. sdc1 f0, 0(RA)
+ |. li RD, (3+1)*8
|
|//-- Base library: catch errors ----------------------------------------
|
@@ -1259,8 +1389,9 @@ static void build_subroutines(BuildCtx *ctx)
| sltiu AT, NARGS8:RC, 16
| lw CARG4, 8+HI(BASE)
| bnez AT, ->fff_fallback
- |. ldc1 FARG2, 8(BASE)
- | ldc1 FARG1, 0(BASE)
+ |. lw CARG3, 8+LO(BASE)
+ | lw CARG1, LO(BASE)
+ | lw CARG2, HI(BASE)
| lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
| li AT, LJ_TFUNC
| move TMP2, BASE
@@ -1268,9 +1399,11 @@ static void build_subroutines(BuildCtx *ctx)
| addiu BASE, BASE, 16
| // Remember active hook before pcall.
| srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
- | sdc1 FARG2, 0(TMP2) // Swap function and traceback.
+ | sw CARG3, LO(TMP2) // Swap function and traceback.
+ | sw CARG4, HI(TMP2)
| andi TMP3, TMP3, 1
- | sdc1 FARG1, 8(TMP2)
+ | sw CARG1, 8+LO(TMP2)
+ | sw CARG2, 8+HI(TMP2)
| addiu PC, TMP3, 16+FRAME_PCALL
| b ->vm_call_dispatch
|. addiu NARGS8:RC, NARGS8:RC, -16
@@ -1279,7 +1412,10 @@ static void build_subroutines(BuildCtx *ctx)
|
|.macro coroutine_resume_wrap, resume
|.if resume
- |.ffunc_1 coroutine_resume
+ |.ffunc coroutine_resume
+ | lw CARG3, HI(BASE)
+ | beqz NARGS8:RC, ->fff_fallback
+ |. lw CARG1, LO(BASE)
| li AT, LJ_TTHREAD
| bne CARG3, AT, ->fff_fallback
|.else
@@ -1314,11 +1450,13 @@ static void build_subroutines(BuildCtx *ctx)
| move CARG3, CARG2
| sw BASE, L->top
|2: // Move args to coroutine.
- | ldc1 f0, 0(BASE)
+ | lw SFRETHI, HI(BASE)
+ | lw SFRETLO, LO(BASE)
| sltu AT, BASE, TMP1
| beqz AT, >3
|. addiu BASE, BASE, 8
- | sdc1 f0, 0(CARG3)
+ | sw SFRETHI, HI(CARG3)
+ | sw SFRETLO, LO(CARG3)
| b <2
|. addiu CARG3, CARG3, 8
|3:
@@ -1331,6 +1469,7 @@ static void build_subroutines(BuildCtx *ctx)
| lw TMP3, L:RA->top
| li_vmstate INTERP
| lw BASE, L->base
+ | sw L, DISPATCH_GL(cur_L)(DISPATCH)
| st_vmstate
| beqz AT, >8
|. subu RD, TMP3, TMP2
@@ -1343,10 +1482,12 @@ static void build_subroutines(BuildCtx *ctx)
| sw TMP2, L:RA->top // Clear coroutine stack.
| move TMP1, BASE
|5: // Move results from coroutine.
- | ldc1 f0, 0(TMP2)
+ | lw SFRETHI, HI(TMP2)
+ | lw SFRETLO, LO(TMP2)
| addiu TMP2, TMP2, 8
| sltu AT, TMP2, TMP3
- | sdc1 f0, 0(TMP1)
+ | sw SFRETHI, HI(TMP1)
+ | sw SFRETLO, LO(TMP1)
| bnez AT, <5
|. addiu TMP1, TMP1, 8
|6:
@@ -1371,12 +1512,14 @@ static void build_subroutines(BuildCtx *ctx)
|.if resume
| addiu TMP3, TMP3, -8
| li TMP1, LJ_TFALSE
- | ldc1 f0, 0(TMP3)
+ | lw SFRETHI, HI(TMP3)
+ | lw SFRETLO, LO(TMP3)
| sw TMP3, L:RA->top // Remove error from coroutine stack.
| li RD, (2+1)*8
| sw TMP1, -8+HI(BASE) // Prepend false to results.
| addiu RA, BASE, -8
- | sdc1 f0, 0(BASE) // Copy error message.
+ | sw SFRETHI, HI(BASE) // Copy error message.
+ | sw SFRETLO, LO(BASE)
| b <7
|. andi TMP0, PC, FRAME_TYPE
|.else
@@ -1412,20 +1555,29 @@ static void build_subroutines(BuildCtx *ctx)
|
|//-- Math library -------------------------------------------------------
|
- |.ffunc_n math_abs
- |. abs.d FRET1, FARG1
- |->fff_resn:
- | lw PC, FRAME_PC(BASE)
- | addiu RA, BASE, -8
- | b ->fff_res1
- |. sdc1 FRET1, -8(BASE)
+ |.ffunc_1 math_abs
+ | bne SFARG1HI, TISNUM, >1
+ |. sra TMP0, SFARG1LO, 31
+ | xor TMP1, SFARG1LO, TMP0
+ | subu SFARG1LO, TMP1, TMP0
+ | bgez SFARG1LO, ->fff_restv
+ |. nop
+ | lui SFARG1HI, 0x41e0 // 2^31 as a double.
+ | b ->fff_restv
+ |. li SFARG1LO, 0
+ |1:
+ | sltiu AT, SFARG1HI, LJ_TISNUM
+ | beqz AT, ->fff_fallback
+ |. sll SFARG1HI, SFARG1HI, 1
+ | srl SFARG1HI, SFARG1HI, 1
+ |// fallthrough
|
|->fff_restv:
- | // CARG3/CARG1 = TValue result.
+ | // SFARG1LO/SFARG1HI = TValue result.
| lw PC, FRAME_PC(BASE)
- | sw CARG3, -8+HI(BASE)
+ | sw SFARG1HI, -8+HI(BASE)
| addiu RA, BASE, -8
- | sw CARG1, -8+LO(BASE)
+ | sw SFARG1LO, -8+LO(BASE)
|->fff_res1:
| // RA = results, PC = return.
| li RD, (1+1)*8
@@ -1454,15 +1606,19 @@ static void build_subroutines(BuildCtx *ctx)
|. sw TISNIL, -8+HI(TMP1)
|
|.macro math_extern, func
- |->ff_math_ .. func:
- | lw CARG3, HI(BASE)
+ | .ffunc math_ .. func
+ | lw SFARG1HI, HI(BASE)
| beqz NARGS8:RC, ->fff_fallback
|. load_got func
- | sltiu AT, CARG3, LJ_TISNUM
+ | sltiu AT, SFARG1HI, LJ_TISNUM
| beqz AT, ->fff_fallback
- |. nop
- | call_extern
+ |.if FPU
|. ldc1 FARG1, 0(BASE)
+ |.else
+ |. lw SFARG1LO, LO(BASE)
+ |.endif
+ | call_extern
+ |. nop
| b ->fff_resn
|. nop
|.endmacro
@@ -1476,10 +1632,22 @@ static void build_subroutines(BuildCtx *ctx)
|. nop
|.endmacro
|
+ |// TODO: Return integer type if result is integer (own sf implementation).
|.macro math_round, func
- | .ffunc_n math_ .. func
- |. nop
+ |->ff_math_ .. func:
+ | lw SFARG1HI, HI(BASE)
+ | beqz NARGS8:RC, ->fff_fallback
+ |. lw SFARG1LO, LO(BASE)
+ | beq SFARG1HI, TISNUM, ->fff_restv
+ |. sltu AT, SFARG1HI, TISNUM
+ | beqz AT, ->fff_fallback
+ |.if FPU
+ |. ldc1 FARG1, 0(BASE)
| bal ->vm_ .. func
+ |.else
+ |. load_got func
+ | call_extern
+ |.endif
|. nop
| b ->fff_resn
|. nop
@@ -1489,15 +1657,19 @@ static void build_subroutines(BuildCtx *ctx)
| math_round ceil
|
|.ffunc math_log
- | lw CARG3, HI(BASE)
| li AT, 8
| bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
- |. load_got log
- | sltiu AT, CARG3, LJ_TISNUM
+ |. lw SFARG1HI, HI(BASE)
+ | sltiu AT, SFARG1HI, LJ_TISNUM
| beqz AT, ->fff_fallback
- |. nop
+ |. load_got log
+ |.if FPU
| call_extern
|. ldc1 FARG1, 0(BASE)
+ |.else
+ | call_extern
+ |. lw SFARG1LO, LO(BASE)
+ |.endif
| b ->fff_resn
|. nop
|
@@ -1516,23 +1688,43 @@ static void build_subroutines(BuildCtx *ctx)
| math_extern2 atan2
| math_extern2 fmod
|
+ |.if FPU
|.ffunc_n math_sqrt
|. sqrt.d FRET1, FARG1
- | b ->fff_resn
- |. nop
+ |// fallthrough to ->fff_resn
+ |.else
+ | math_extern sqrt
+ |.endif
+ |
+ |->fff_resn:
+ | lw PC, FRAME_PC(BASE)
+ | addiu RA, BASE, -8
+ |.if FPU
+ | b ->fff_res1
+ |. sdc1 FRET1, -8(BASE)
+ |.else
+ | sw SFRETHI, -8+HI(BASE)
+ | b ->fff_res1
+ |. sw SFRETLO, -8+LO(BASE)
+ |.endif
|
- |->ff_math_deg:
- |.ffunc_n math_rad
- |. ldc1 FARG2, CFUNC:RB->upvalue[0]
- | b ->fff_resn
- |. mul.d FRET1, FARG1, FARG2
|
- |.ffunc_nn math_ldexp
- | cvt.w.d FARG2, FARG2
+ |.ffunc math_ldexp
+ | sltiu AT, NARGS8:RC, 16
+ | lw SFARG1HI, HI(BASE)
+ | bnez AT, ->fff_fallback
+ |. lw CARG4, 8+HI(BASE)
+ | bne CARG4, TISNUM, ->fff_fallback
| load_got ldexp
- | mfc1 CARG3, FARG2
+ |. sltu AT, SFARG1HI, TISNUM
+ | beqz AT, ->fff_fallback
+ |.if FPU
+ |. ldc1 FARG1, 0(BASE)
+ |.else
+ |. lw SFARG1LO, LO(BASE)
+ |.endif
| call_extern
- |. nop
+ |. lw CARG3, 8+LO(BASE)
| b ->fff_resn
|. nop
|
@@ -1543,10 +1735,17 @@ static void build_subroutines(BuildCtx *ctx)
|. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
| lw TMP1, DISPATCH_GL(tmptv)(DISPATCH)
| addiu RA, BASE, -8
+ |.if FPU
| mtc1 TMP1, FARG2
| sdc1 FRET1, 0(RA)
| cvt.d.w FARG2, FARG2
| sdc1 FARG2, 8(RA)
+ |.else
+ | sw SFRETLO, LO(RA)
+ | sw SFRETHI, HI(RA)
+ | sw TMP1, 8+LO(RA)
+ | sw TISNUM, 8+HI(RA)
+ |.endif
| b ->fff_res
|. li RD, (2+1)*8
|
@@ -1556,49 +1755,109 @@ static void build_subroutines(BuildCtx *ctx)
| call_extern
|. addiu CARG3, BASE, -8
| addiu RA, BASE, -8
+ |.if FPU
| sdc1 FRET1, 0(BASE)
+ |.else
+ | sw SFRETLO, LO(BASE)
+ | sw SFRETHI, HI(BASE)
+ |.endif
| b ->fff_res
|. li RD, (2+1)*8
|
- |.macro math_minmax, name, ismax
- |->ff_ .. name:
- | lw CARG3, HI(BASE)
- | beqz NARGS8:RC, ->fff_fallback
- |. ldc1 FRET1, 0(BASE)
- | sltiu AT, CARG3, LJ_TISNUM
+ |.macro math_minmax, name, intins, ismax
+ | .ffunc_1 name
+ | addu TMP3, BASE, NARGS8:RC
+ | bne SFARG1HI, TISNUM, >5
+ |. addiu TMP2, BASE, 8
+ |1: // Handle integers.
+ |. lw SFARG2HI, HI(TMP2)
+ | beq TMP2, TMP3, ->fff_restv
+ |. lw SFARG2LO, LO(TMP2)
+ | bne SFARG2HI, TISNUM, >3
+ |. slt AT, SFARG1LO, SFARG2LO
+ | intins SFARG1LO, SFARG2LO, AT
+ | b <1
+ |. addiu TMP2, TMP2, 8
+ |
+ |3: // Convert intermediate result to number and continue with number loop.
+ | sltiu AT, SFARG2HI, LJ_TISNUM
| beqz AT, ->fff_fallback
- |. addu TMP2, BASE, NARGS8:RC
- | addiu TMP1, BASE, 8
- | beq TMP1, TMP2, ->fff_resn
- |1:
- |. lw CARG3, HI(TMP1)
- | ldc1 FARG1, 0(TMP1)
- | addiu TMP1, TMP1, 8
- | sltiu AT, CARG3, LJ_TISNUM
+ |.if FPU
+ |. mtc1 SFARG1LO, FRET1
+ | cvt.d.w FRET1, FRET1
+ | b >7
+ |. ldc1 FARG1, 0(TMP2)
+ |.else
+ |. nop
+ | bal ->vm_sfi2d_1
+ |. nop
+ | b >7
+ |. nop
+ |.endif
+ |
+ |5:
+ |. sltiu AT, SFARG1HI, LJ_TISNUM
| beqz AT, ->fff_fallback
+ |.if FPU
+ |. ldc1 FRET1, 0(BASE)
+ |.endif
+ |
+ |6: // Handle numbers.
+ |. lw SFARG2HI, HI(TMP2)
+ |.if FPU
+ | beq TMP2, TMP3, ->fff_resn
+ |.else
+ | beq TMP2, TMP3, ->fff_restv
+ |.endif
+ |. sltiu AT, SFARG2HI, LJ_TISNUM
+ | beqz AT, >8
+ |.if FPU
+ |. ldc1 FARG1, 0(TMP2)
+ |.else
+ |. lw SFARG2LO, LO(TMP2)
+ |.endif
+ |7:
+ |.if FPU
|.if ismax
- |. c.olt.d FARG1, FRET1
+ | c.olt.d FARG1, FRET1
|.else
- |. c.olt.d FRET1, FARG1
+ | c.olt.d FRET1, FARG1
+ |.endif
+ | movf.d FRET1, FARG1
+ |.else
+ |.if ismax
+ | bal ->vm_sfcmpogt
+ |.else
+ | bal ->vm_sfcmpolt
|.endif
- | bne TMP1, TMP2, <1
- |. movf.d FRET1, FARG1
- | b ->fff_resn
|. nop
+ | movz SFARG1LO, SFARG2LO, CRET1
+ | movz SFARG1HI, SFARG2HI, CRET1
+ |.endif
+ | b <6
+ |. addiu TMP2, TMP2, 8
+ |
+ |8: // Convert integer to number and continue with number loop.
+ | bne SFARG2HI, TISNUM, ->fff_fallback
+ |.if FPU
+ |. lwc1 FARG1, LO(TMP2)
+ | b <7
+ |. cvt.d.w FARG1, FARG1
+ |.else
+ |. nop
+ | bal ->vm_sfi2d_2
+ |. nop
+ | b <7
+ |. nop
+ |.endif
+ |
|.endmacro
|
- | math_minmax math_min, 0
- | math_minmax math_max, 1
+ | math_minmax math_min, movz, 0
+ | math_minmax math_max, movn, 1
|
|//-- String library -----------------------------------------------------
|
- |.ffunc_1 string_len
- | li AT, LJ_TSTR
- | bne CARG3, AT, ->fff_fallback
- |. nop
- | b ->fff_resi
- |. lw CRET1, STR:CARG1->len
- |
|.ffunc string_byte // Only handle the 1-arg case here.
| lw CARG3, HI(BASE)
| lw STR:CARG1, LO(BASE)
@@ -1608,33 +1867,31 @@ static void build_subroutines(BuildCtx *ctx)
| bnez AT, ->fff_fallback // Need exactly 1 string argument.
|. nop
| lw TMP0, STR:CARG1->len
- | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
| addiu RA, BASE, -8
+ | lw PC, FRAME_PC(BASE)
| sltu RD, r0, TMP0
- | mtc1 TMP1, f0
+ | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
| addiu RD, RD, 1
- | cvt.d.w f0, f0
- | lw PC, FRAME_PC(BASE)
| sll RD, RD, 3 // RD = ((str->len != 0)+1)*8
+ | sw TISNUM, HI(RA)
| b ->fff_res
- |. sdc1 f0, 0(RA)
+ |. sw TMP1, LO(RA)
|
|.ffunc string_char // Only handle the 1-arg case here.
| ffgccheck
|. nop
| lw CARG3, HI(BASE)
- | ldc1 FARG1, 0(BASE)
- | li AT, 8
- | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
- |. sltiu AT, CARG3, LJ_TISNUM
- | beqz AT, ->fff_fallback
+ | lw CARG1, LO(BASE)
+ | li TMP1, 255
+ | xori AT, NARGS8:RC, 8 // Exactly 1 argument.
+ | xor TMP0, CARG3, TISNUM // Integer.
+ | sltu TMP1, TMP1, CARG1 // !(255 < n).
+ | or AT, AT, TMP0
+ | or AT, AT, TMP1
+ | bnez AT, ->fff_fallback
|. li CARG3, 1
- | cvt.w.d FARG1, FARG1
| addiu CARG2, sp, ARG5_OFS
- | sltiu AT, TMP0, 256
- | mfc1 TMP0, FARG1
- | beqz AT, ->fff_fallback
- |. sw TMP0, ARG5
+ | sb CARG1, ARG5
|->fff_newstr:
| load_got lj_str_new
| sw BASE, L->base
@@ -1643,35 +1900,30 @@ static void build_subroutines(BuildCtx *ctx)
|. move CARG1, L
| // Returns GCstr *.
| lw BASE, L->base
- | move CARG1, CRET1
+ |->fff_resstr:
+ | move SFARG1LO, CRET1
| b ->fff_restv
- |. li CARG3, LJ_TSTR
+ |. li SFARG1HI, LJ_TSTR
|
|.ffunc string_sub
| ffgccheck
|. nop
| addiu AT, NARGS8:RC, -16
| lw CARG3, 16+HI(BASE)
- | ldc1 f0, 16(BASE)
| lw TMP0, HI(BASE)
| lw STR:CARG1, LO(BASE)
| bltz AT, ->fff_fallback
- | lw CARG2, 8+HI(BASE)
- | ldc1 f2, 8(BASE)
+ |. lw CARG2, 8+HI(BASE)
| beqz AT, >1
|. li CARG4, -1
- | cvt.w.d f0, f0
- | sltiu AT, CARG3, LJ_TISNUM
- | beqz AT, ->fff_fallback
- |. mfc1 CARG4, f0
+ | bne CARG3, TISNUM, ->fff_fallback
+ |. lw CARG4, 16+LO(BASE)
|1:
- | sltiu AT, CARG2, LJ_TISNUM
- | beqz AT, ->fff_fallback
+ | bne CARG2, TISNUM, ->fff_fallback
|. li AT, LJ_TSTR
- | cvt.w.d f2, f2
| bne TMP0, AT, ->fff_fallback
- |. lw CARG2, STR:CARG1->len
- | mfc1 CARG3, f2
+ |. lw CARG3, 8+LO(BASE)
+ | lw CARG2, STR:CARG1->len
| // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
| slt AT, CARG4, r0
| addiu TMP0, CARG2, 1
@@ -1693,139 +1945,130 @@ static void build_subroutines(BuildCtx *ctx)
| bgez CARG3, ->fff_newstr
|. addiu CARG3, CARG3, 1 // len++
|->fff_emptystr: // Return empty string.
- | addiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty)
+ | addiu STR:SFARG1LO, DISPATCH, DISPATCH_GL(strempty)
| b ->fff_restv
- |. li CARG3, LJ_TSTR
+ |. li SFARG1HI, LJ_TSTR
|
- |.ffunc string_rep // Only handle the 1-char case inline.
- | ffgccheck
- |. nop
- | lw TMP0, HI(BASE)
- | addiu AT, NARGS8:RC, -16 // Exactly 2 arguments.
- | lw CARG4, 8+HI(BASE)
- | lw STR:CARG1, LO(BASE)
- | addiu TMP0, TMP0, -LJ_TSTR
- | ldc1 f0, 8(BASE)
- | or AT, AT, TMP0
- | bnez AT, ->fff_fallback
- |. sltiu AT, CARG4, LJ_TISNUM
- | cvt.w.d f0, f0
- | beqz AT, ->fff_fallback
- |. lw TMP0, STR:CARG1->len
- | mfc1 CARG3, f0
- | lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
- | li AT, 1
- | blez CARG3, ->fff_emptystr // Count <= 0?
- |. sltu AT, AT, TMP0
- | beqz TMP0, ->fff_emptystr // Zero length string?
- |. sltu TMP0, TMP1, CARG3
- | or AT, AT, TMP0
- | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
- | bnez AT, ->fff_fallback // Fallback for > 1-char strings.
- |. lbu TMP0, STR:CARG1[1]
- | addu TMP2, CARG2, CARG3
- |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
- | addiu TMP2, TMP2, -1
- | sltu AT, CARG2, TMP2
- | bnez AT, <1
- |. sb TMP0, 0(TMP2)
- | b ->fff_newstr
- |. nop
- |
- |.ffunc string_reverse
+ |.macro ffstring_op, name
+ | .ffunc string_ .. name
| ffgccheck
|. nop
| lw CARG3, HI(BASE)
- | lw STR:CARG1, LO(BASE)
+ | lw STR:CARG2, LO(BASE)
| beqz NARGS8:RC, ->fff_fallback
|. li AT, LJ_TSTR
| bne CARG3, AT, ->fff_fallback
- |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
- | lw CARG3, STR:CARG1->len
- | addiu CARG1, STR:CARG1, #STR
- | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
- | sltu AT, TMP1, CARG3
- | bnez AT, ->fff_fallback
- |. addu TMP3, CARG1, CARG3
- | addu CARG4, CARG2, CARG3
- |1: // Reverse string copy.
- | lbu TMP1, 0(CARG1)
- | sltu AT, CARG1, TMP3
- | beqz AT, ->fff_newstr
- |. addiu CARG1, CARG1, 1
- | addiu CARG4, CARG4, -1
- | b <1
- | sb TMP1, 0(CARG4)
- |
- |.macro ffstring_case, name, lo
- | .ffunc name
- | ffgccheck
- |. nop
- | lw CARG3, HI(BASE)
- | lw STR:CARG1, LO(BASE)
- | beqz NARGS8:RC, ->fff_fallback
- |. li AT, LJ_TSTR
- | bne CARG3, AT, ->fff_fallback
- |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
- | lw CARG3, STR:CARG1->len
- | addiu CARG1, STR:CARG1, #STR
- | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
- | sltu AT, TMP1, CARG3
- | bnez AT, ->fff_fallback
- |. addu TMP3, CARG1, CARG3
- | move CARG4, CARG2
- |1: // ASCII case conversion.
- | lbu TMP1, 0(CARG1)
- | sltu AT, CARG1, TMP3
- | beqz AT, ->fff_newstr
- |. addiu TMP0, TMP1, -lo
- | xori TMP2, TMP1, 0x20
- | sltiu AT, TMP0, 26
- | movn TMP1, TMP2, AT
- | addiu CARG1, CARG1, 1
- | sb TMP1, 0(CARG4)
- | b <1
- |. addiu CARG4, CARG4, 1
+ |. addiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf)
+ | load_got lj_buf_putstr_ .. name
+ | lw TMP0, SBUF:CARG1->b
+ | sw L, SBUF:CARG1->L
+ | sw BASE, L->base
+ | sw TMP0, SBUF:CARG1->w
+ | call_intern extern lj_buf_putstr_ .. name
+ |. sw PC, SAVE_PC
+ | load_got lj_buf_tostr
+ | call_intern lj_buf_tostr
+ |. move SBUF:CARG1, SBUF:CRET1
+ | b ->fff_resstr
+ |. lw BASE, L->base
|.endmacro
|
- |ffstring_case string_lower, 65
- |ffstring_case string_upper, 97
+ |ffstring_op reverse
+ |ffstring_op lower
+ |ffstring_op upper
|
- |//-- Table library ------------------------------------------------------
+ |//-- Bit library --------------------------------------------------------
|
- |.ffunc_1 table_getn
- | li AT, LJ_TTAB
- | bne CARG3, AT, ->fff_fallback
- |. load_got lj_tab_len
- | call_intern lj_tab_len // (GCtab *t)
- |. nop
- | // Returns uint32_t (but less than 2^31).
- | b ->fff_resi
+ |->vm_tobit_fb:
+ | beqz TMP1, ->fff_fallback
+ |.if FPU
+ |. ldc1 FARG1, 0(BASE)
+ | add.d FARG1, FARG1, TOBIT
+ | jr ra
+ |. mfc1 CRET1, FARG1
+ |.else
+ |// FP number to bit conversion for soft-float.
+ |->vm_tobit:
+ | sll TMP0, SFARG1HI, 1
+ | lui AT, 0x0020
+ | addu TMP0, TMP0, AT
+ | slt AT, TMP0, r0
+ | movz SFARG1LO, r0, AT
+ | beqz AT, >2
+ |. li TMP1, 0x3e0
+ | not TMP1, TMP1
+ | sra TMP0, TMP0, 21
+ | subu TMP0, TMP1, TMP0
+ | slt AT, TMP0, r0
+ | bnez AT, >1
+ |. sll TMP1, SFARG1HI, 11
+ | lui AT, 0x8000
+ | or TMP1, TMP1, AT
+ | srl AT, SFARG1LO, 21
+ | or TMP1, TMP1, AT
+ | slt AT, SFARG1HI, r0
+ | beqz AT, >2
+ |. srlv SFARG1LO, TMP1, TMP0
+ | subu SFARG1LO, r0, SFARG1LO
+ |2:
+ | jr ra
+ |. move CRET1, SFARG1LO
+ |1:
+ | addiu TMP0, TMP0, 21
+ | srlv TMP1, SFARG1LO, TMP0
+ | li AT, 20
+ | subu TMP0, AT, TMP0
+ | sll SFARG1LO, SFARG1HI, 12
+ | sllv AT, SFARG1LO, TMP0
+ | or SFARG1LO, TMP1, AT
+ | slt AT, SFARG1HI, r0
+ | beqz AT, <2
|. nop
- |
- |//-- Bit library --------------------------------------------------------
+ | jr ra
+ |. subu CRET1, r0, SFARG1LO
+ |.endif
|
|.macro .ffunc_bit, name
- | .ffunc_n bit_..name
- |. add.d FARG1, FARG1, TOBIT
- | mfc1 CRET1, FARG1
+ | .ffunc_1 bit_..name
+ | beq SFARG1HI, TISNUM, >6
+ |. move CRET1, SFARG1LO
+ | bal ->vm_tobit_fb
+ |. sltu TMP1, SFARG1HI, TISNUM
+ |6:
|.endmacro
|
|.macro .ffunc_bit_op, name, ins
| .ffunc_bit name
- | addiu TMP1, BASE, 8
- | addu TMP2, BASE, NARGS8:RC
+ | addiu TMP2, BASE, 8
+ | addu TMP3, BASE, NARGS8:RC
|1:
- | lw CARG4, HI(TMP1)
- | beq TMP1, TMP2, ->fff_resi
- |. ldc1 FARG1, 0(TMP1)
- | sltiu AT, CARG4, LJ_TISNUM
- | beqz AT, ->fff_fallback
- | add.d FARG1, FARG1, TOBIT
- | mfc1 CARG2, FARG1
- | ins CRET1, CRET1, CARG2
+ | lw SFARG1HI, HI(TMP2)
+ | beq TMP2, TMP3, ->fff_resi
+ |. lw SFARG1LO, LO(TMP2)
+ |.if FPU
+ | bne SFARG1HI, TISNUM, >2
+ |. addiu TMP2, TMP2, 8
| b <1
- |. addiu TMP1, TMP1, 8
+ |. ins CRET1, CRET1, SFARG1LO
+ |2:
+ | ldc1 FARG1, -8(TMP2)
+ | sltu TMP1, SFARG1HI, TISNUM
+ | beqz TMP1, ->fff_fallback
+ |. add.d FARG1, FARG1, TOBIT
+ | mfc1 SFARG1LO, FARG1
+ | b <1
+ |. ins CRET1, CRET1, SFARG1LO
+ |.else
+ | beq SFARG1HI, TISNUM, >2
+ |. move CRET2, CRET1
+ | bal ->vm_tobit_fb
+ |. sltu TMP1, SFARG1HI, TISNUM
+ | move SFARG1LO, CRET2
+ |2:
+ | ins CRET1, CRET1, SFARG1LO
+ | b <1
+ |. addiu TMP2, TMP2, 8
+ |.endif
|.endmacro
|
|.ffunc_bit_op band, and
@@ -1849,24 +2092,28 @@ static void build_subroutines(BuildCtx *ctx)
|. not CRET1, CRET1
|
|.macro .ffunc_bit_sh, name, ins, shmod
- | .ffunc_nn bit_..name
- |. add.d FARG1, FARG1, TOBIT
- | add.d FARG2, FARG2, TOBIT
- | mfc1 CARG1, FARG1
- | mfc1 CARG2, FARG2
+ | .ffunc_2 bit_..name
+ | beq SFARG1HI, TISNUM, >1
+ |. nop
+ | bal ->vm_tobit_fb
+ |. sltu TMP1, SFARG1HI, TISNUM
+ | move SFARG1LO, CRET1
+ |1:
+ | bne SFARG2HI, TISNUM, ->fff_fallback
+ |. nop
|.if shmod == 1
| li AT, 32
- | subu TMP0, AT, CARG2
- | sllv CARG2, CARG1, CARG2
- | srlv CARG1, CARG1, TMP0
+ | subu TMP0, AT, SFARG2LO
+ | sllv SFARG2LO, SFARG1LO, SFARG2LO
+ | srlv SFARG1LO, SFARG1LO, TMP0
|.elif shmod == 2
| li AT, 32
- | subu TMP0, AT, CARG2
- | srlv CARG2, CARG1, CARG2
- | sllv CARG1, CARG1, TMP0
+ | subu TMP0, AT, SFARG2LO
+ | srlv SFARG2LO, SFARG1LO, SFARG2LO
+ | sllv SFARG1LO, SFARG1LO, TMP0
|.endif
| b ->fff_resi
- |. ins CRET1, CARG1, CARG2
+ |. ins CRET1, SFARG1LO, SFARG2LO
|.endmacro
|
|.ffunc_bit_sh lshift, sllv, 0
@@ -1878,9 +2125,11 @@ static void build_subroutines(BuildCtx *ctx)
|
|.ffunc_bit tobit
|->fff_resi:
- | mtc1 CRET1, FRET1
- | b ->fff_resn
- |. cvt.d.w FRET1, FRET1
+ | lw PC, FRAME_PC(BASE)
+ | addiu RA, BASE, -8
+ | sw TISNUM, -8+HI(BASE)
+ | b ->fff_res1
+ |. sw CRET1, -8+LO(BASE)
|
|//-----------------------------------------------------------------------
|
@@ -2067,19 +2316,96 @@ static void build_subroutines(BuildCtx *ctx)
| jr CRET1
|. lw INS, -4(PC)
|
+ |->cont_stitch: // Trace stitching.
+ |.if JIT
+ | // RA = resultptr, RB = meta base
+ | lw INS, -4(PC)
+ | lw TMP2, -24+LO(RB) // Save previous trace.
+ | decode_RA8a RC, INS
+ | addiu AT, MULTRES, -8
+ | decode_RA8b RC
+ | beqz AT, >2
+ |. addu RC, BASE, RC // Call base.
+ |1: // Move results down.
+ | lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
+ | addiu AT, AT, -8
+ | addiu RA, RA, 8
+ | sw SFRETHI, HI(RC)
+ | sw SFRETLO, LO(RC)
+ | bnez AT, <1
+ |. addiu RC, RC, 8
+ |2:
+ | decode_RA8a RA, INS
+ | decode_RB8a RB, INS
+ | decode_RA8b RA
+ | decode_RB8b RB
+ | addu RA, RA, RB
+ | addu RA, BASE, RA
+ |3:
+ | sltu AT, RC, RA
+ | bnez AT, >9 // More results wanted?
+ |. nop
+ |
+ | lhu TMP3, TRACE:TMP2->traceno
+ | lhu RD, TRACE:TMP2->link
+ | beq RD, TMP3, ->cont_nop // Blacklisted.
+ |. load_got lj_dispatch_stitch
+ | bnez RD, =>BC_JLOOP // Jump to stitched trace.
+ |. sll RD, RD, 3
+ |
+ | // Stitch a new trace to the previous trace.
+ | sw TMP3, DISPATCH_J(exitno)(DISPATCH)
+ | sw L, DISPATCH_J(L)(DISPATCH)
+ | sw BASE, L->base
+ | addiu CARG1, DISPATCH, GG_DISP2J
+ | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
+ |. move CARG2, PC
+ | b ->cont_nop
+ |. lw BASE, L->base
+ |
+ |9:
+ | sw TISNIL, HI(RC)
+ | b <3
+ |. addiu RC, RC, 8
+ |.endif
+ |
+ |->vm_profhook: // Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+ | load_got lj_dispatch_profile
+ | sw MULTRES, SAVE_MULTRES
+ | move CARG2, PC
+ | sw BASE, L->base
+ | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
+ |. move CARG1, L
+ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+ | addiu PC, PC, -4
+ | b ->cont_nop
+ |. lw BASE, L->base
+#endif
+ |
|//-----------------------------------------------------------------------
|//-- Trace exit handler -------------------------------------------------
|//-----------------------------------------------------------------------
|
|.macro savex_, a, b
+ |.if FPU
| sdc1 f..a, 16+a*8(sp)
| sw r..a, 16+32*8+a*4(sp)
| sw r..b, 16+32*8+b*4(sp)
+ |.else
+ | sw r..a, 16+a*4(sp)
+ | sw r..b, 16+b*4(sp)
+ |.endif
|.endmacro
|
|->vm_exit_handler:
|.if JIT
+ |.if FPU
| addiu sp, sp, -(16+32*8+32*4)
+ |.else
+ | addiu sp, sp, -(16+32*4)
+ |.endif
| savex_ 0, 1
| savex_ 2, 3
| savex_ 4, 5
@@ -2094,25 +2420,34 @@ static void build_subroutines(BuildCtx *ctx)
| savex_ 22, 23
| savex_ 24, 25
| savex_ 26, 27
+ |.if FPU
| sdc1 f28, 16+28*8(sp)
- | sw r28, 16+32*8+28*4(sp)
| sdc1 f30, 16+30*8(sp)
+ | sw r28, 16+32*8+28*4(sp)
| sw r30, 16+32*8+30*4(sp)
| sw r0, 16+32*8+31*4(sp) // Clear RID_TMP.
+ | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
+ | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP
+ |.else
+ | sw r28, 16+28*4(sp)
+ | sw r30, 16+30*4(sp)
+ | sw r0, 16+31*4(sp) // Clear RID_TMP.
+ | addiu TMP2, sp, 16+32*4 // Recompute original value of sp.
+ | sw TMP2, 16+29*4(sp) // Store sp in RID_SP
+ |.endif
| li_vmstate EXIT
- | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp.
| addiu DISPATCH, JGL, -GG_DISP2G-32768
| lw TMP1, 0(TMP2) // Load exit number.
| st_vmstate
- | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP.
- | lw L, DISPATCH_GL(jit_L)(DISPATCH)
- | lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
+ | lw L, DISPATCH_GL(cur_L)(DISPATCH)
+ | lw BASE, DISPATCH_GL(jit_base)(DISPATCH)
| load_got lj_trace_exit
| sw L, DISPATCH_J(L)(DISPATCH)
| sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number.
+ | sw BASE, L->base
| sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number.
| addiu CARG1, DISPATCH, GG_DISP2J
- | sw BASE, L->base
+ | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
| call_intern lj_trace_exit // (jit_State *J, ExitState *ex)
|. addiu CARG2, sp, 16
| // Returns MULTRES (unscaled) or negated error code.
@@ -2128,19 +2463,21 @@ static void build_subroutines(BuildCtx *ctx)
|.if JIT
| // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
| lw L, SAVE_L
- | addiu DISPATCH, JGL, -GG_DISP2G-32768
+ | addiu DISPATCH, JGL, -GG_DISP2G-32768
+ | sw BASE, L->base
|1:
- | bltz CRET1, >3 // Check for error from exit.
- |. lw LFUNC:TMP1, FRAME_FUNC(BASE)
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | bltz CRET1, >9 // Check for error from exit.
+ |. lw LFUNC:RB, FRAME_FUNC(BASE)
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| sll MULTRES, CRET1, 3
| li TISNIL, LJ_TNIL
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
| sw MULTRES, SAVE_MULTRES
- | mtc1 TMP3, TOBIT
- | lw TMP1, LFUNC:TMP1->pc
- | sw r0, DISPATCH_GL(jit_L)(DISPATCH)
+ | .FPU mtc1 TMP3, TOBIT
+ | lw TMP1, LFUNC:RB->pc
+ | sw r0, DISPATCH_GL(jit_base)(DISPATCH)
| lw KBASE, PC2PROTO(k)(TMP1)
- | cvt.d.s TOBIT, TOBIT
+ | .FPU cvt.d.s TOBIT, TOBIT
| // Modified copy of ins_next which handles function header dispatch, too.
| lw INS, 0(PC)
| addiu PC, PC, 4
@@ -2148,7 +2485,7 @@ static void build_subroutines(BuildCtx *ctx)
| sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
| decode_OP4a TMP1, INS
| decode_OP4b TMP1
- | sltiu TMP2, TMP1, BC_FUNCF*4 // Function header?
+ | sltiu TMP2, TMP1, BC_FUNCF*4
| addu TMP0, DISPATCH, TMP1
| decode_RD8a RD, INS
| lw AT, 0(TMP0)
@@ -2158,13 +2495,30 @@ static void build_subroutines(BuildCtx *ctx)
| jr AT
|. decode_RD8b RD
|2:
+ | sltiu TMP2, TMP1, (BC_FUNCC+2)*4 // Fast function?
+ | bnez TMP2, >3
+ |. lw TMP1, FRAME_PC(BASE)
+ | // Check frame below fast function.
+ | andi TMP0, TMP1, FRAME_TYPE
+ | bnez TMP0, >3 // Trace stitching continuation?
+ |. nop
+ | // Otherwise set KBASE for Lua function below fast function.
+ | lw TMP2, -4(TMP1)
+ | decode_RA8a TMP0, TMP2
+ | decode_RA8b TMP0
+ | subu TMP1, BASE, TMP0
+ | lw LFUNC:TMP2, -8+FRAME_FUNC(TMP1)
+ | lw TMP1, LFUNC:TMP2->pc
+ | lw KBASE, PC2PROTO(k)(TMP1)
+ |3:
| addiu RC, MULTRES, -8
| jr AT
|. addu RA, RA, BASE
|
- |3: // Rethrow error from the right C frame.
- | load_got lj_err_run
- | call_intern lj_err_run // (lua_State *L)
+ |9: // Rethrow error from the right C frame.
+ | load_got lj_err_trace
+ | sub CARG2, r0, CRET1
+ | call_intern lj_err_trace // (lua_State *L, int errcode)
|. move CARG1, L
|.endif
|
@@ -2172,8 +2526,9 @@ static void build_subroutines(BuildCtx *ctx)
|//-- Math helper functions ----------------------------------------------
|//-----------------------------------------------------------------------
|
+ |// Hard-float round to integer.
|// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
- |.macro vm_round, func
+ |.macro vm_round_hf, func
| lui TMP0, 0x4330 // Hiword of 2^52 (double).
| mtc1 r0, f4
| mtc1 TMP0, f5
@@ -2215,6 +2570,12 @@ static void build_subroutines(BuildCtx *ctx)
|. mov.d FRET1, FARG1
|.endmacro
|
+ |.macro vm_round, func
+ |.if FPU
+ | vm_round_hf, func
+ |.endif
+ |.endmacro
+ |
|->vm_floor:
| vm_round floor
|->vm_ceil:
@@ -2224,10 +2585,286 @@ static void build_subroutines(BuildCtx *ctx)
| vm_round trunc
|.endif
|
+ |// Soft-float integer to number conversion.
+ |.macro sfi2d, AHI, ALO
+ |.if not FPU
+ | beqz ALO, >9 // Handle zero first.
+ |. sra TMP0, ALO, 31
+ | xor TMP1, ALO, TMP0
+ | subu TMP1, TMP1, TMP0 // Absolute value in TMP1.
+ | clz AHI, TMP1
+ | andi TMP0, TMP0, 0x800 // Mask sign bit.
+ | li AT, 0x3ff+31-1
+ | sllv TMP1, TMP1, AHI // Align mantissa left with leading 1.
+ | subu AHI, AT, AHI // Exponent - 1 in AHI.
+ | sll ALO, TMP1, 21
+ | or AHI, AHI, TMP0 // Sign | Exponent.
+ | srl TMP1, TMP1, 11
+ | sll AHI, AHI, 20 // Align left.
+ | jr ra
+ |. addu AHI, AHI, TMP1 // Add mantissa, increment exponent.
+ |9:
+ | jr ra
+ |. li AHI, 0
+ |.endif
+ |.endmacro
+ |
+ |// Input SFARG1LO. Output: SFARG1*. Temporaries: AT, TMP0, TMP1.
+ |->vm_sfi2d_1:
+ | sfi2d SFARG1HI, SFARG1LO
+ |
+ |// Input SFARG2LO. Output: SFARG2*. Temporaries: AT, TMP0, TMP1.
+ |->vm_sfi2d_2:
+ | sfi2d SFARG2HI, SFARG2LO
+ |
+ |// Soft-float comparison. Equivalent to c.eq.d.
+ |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1.
+ |->vm_sfcmpeq:
+ |.if not FPU
+ | sll AT, SFARG1HI, 1
+ | sll TMP0, SFARG2HI, 1
+ | or CRET1, SFARG1LO, SFARG2LO
+ | or TMP1, AT, TMP0
+ | or TMP1, TMP1, CRET1
+ | beqz TMP1, >8 // Both args +-0: return 1.
+ |. sltu CRET1, r0, SFARG1LO
+ | lui TMP1, 0xffe0
+ | addu AT, AT, CRET1
+ | sltu CRET1, r0, SFARG2LO
+ | sltu AT, TMP1, AT
+ | addu TMP0, TMP0, CRET1
+ | sltu TMP0, TMP1, TMP0
+ | or TMP1, AT, TMP0
+ | bnez TMP1, >9 // Either arg is NaN: return 0;
+ |. xor TMP0, SFARG1HI, SFARG2HI
+ | xor TMP1, SFARG1LO, SFARG2LO
+ | or AT, TMP0, TMP1
+ | jr ra
+ |. sltiu CRET1, AT, 1 // Same values: return 1.
+ |8:
+ | jr ra
+ |. li CRET1, 1
+ |9:
+ | jr ra
+ |. li CRET1, 0
+ |.endif
+ |
+ |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d.
+ |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2.
+ |->vm_sfcmpult:
+ |.if not FPU
+ | b >1
+ |. li CRET2, 1
+ |.endif
+ |
+ |->vm_sfcmpolt:
+ |.if not FPU
+ | li CRET2, 0
+ |1:
+ | sll AT, SFARG1HI, 1
+ | sll TMP0, SFARG2HI, 1
+ | or CRET1, SFARG1LO, SFARG2LO
+ | or TMP1, AT, TMP0
+ | or TMP1, TMP1, CRET1
+ | beqz TMP1, >8 // Both args +-0: return 0.
+ |. sltu CRET1, r0, SFARG1LO
+ | lui TMP1, 0xffe0
+ | addu AT, AT, CRET1
+ | sltu CRET1, r0, SFARG2LO
+ | sltu AT, TMP1, AT
+ | addu TMP0, TMP0, CRET1
+ | sltu TMP0, TMP1, TMP0
+ | or TMP1, AT, TMP0
+ | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
+ |. and AT, SFARG1HI, SFARG2HI
+ | bltz AT, >5 // Both args negative?
+ |. nop
+ | beq SFARG1HI, SFARG2HI, >8
+ |. sltu CRET1, SFARG1LO, SFARG2LO
+ | jr ra
+ |. slt CRET1, SFARG1HI, SFARG2HI
+ |5: // Swap conditions if both operands are negative.
+ | beq SFARG1HI, SFARG2HI, >8
+ |. sltu CRET1, SFARG2LO, SFARG1LO
+ | jr ra
+ |. slt CRET1, SFARG2HI, SFARG1HI
+ |8:
+ | jr ra
+ |. nop
+ |9:
+ | jr ra
+ |. move CRET1, CRET2
+ |.endif
+ |
+ |->vm_sfcmpogt:
+ |.if not FPU
+ | sll AT, SFARG2HI, 1
+ | sll TMP0, SFARG1HI, 1
+ | or CRET1, SFARG2LO, SFARG1LO
+ | or TMP1, AT, TMP0
+ | or TMP1, TMP1, CRET1
+ | beqz TMP1, >8 // Both args +-0: return 0.
+ |. sltu CRET1, r0, SFARG2LO
+ | lui TMP1, 0xffe0
+ | addu AT, AT, CRET1
+ | sltu CRET1, r0, SFARG1LO
+ | sltu AT, TMP1, AT
+ | addu TMP0, TMP0, CRET1
+ | sltu TMP0, TMP1, TMP0
+ | or TMP1, AT, TMP0
+ | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
+ |. and AT, SFARG2HI, SFARG1HI
+ | bltz AT, >5 // Both args negative?
+ |. nop
+ | beq SFARG2HI, SFARG1HI, >8
+ |. sltu CRET1, SFARG2LO, SFARG1LO
+ | jr ra
+ |. slt CRET1, SFARG2HI, SFARG1HI
+ |5: // Swap conditions if both operands are negative.
+ | beq SFARG2HI, SFARG1HI, >8
+ |. sltu CRET1, SFARG1LO, SFARG2LO
+ | jr ra
+ |. slt CRET1, SFARG1HI, SFARG2HI
+ |8:
+ | jr ra
+ |. nop
+ |9:
+ | jr ra
+ |. li CRET1, 0
+ |.endif
+ |
+ |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a.
+ |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1.
+ |->vm_sfcmpolex:
+ |.if not FPU
+ | sll AT, SFARG1HI, 1
+ | sll TMP0, SFARG2HI, 1
+ | or CRET1, SFARG1LO, SFARG2LO
+ | or TMP1, AT, TMP0
+ | or TMP1, TMP1, CRET1
+ | beqz TMP1, >8 // Both args +-0: return 1.
+ |. sltu CRET1, r0, SFARG1LO
+ | lui TMP1, 0xffe0
+ | addu AT, AT, CRET1
+ | sltu CRET1, r0, SFARG2LO
+ | sltu AT, TMP1, AT
+ | addu TMP0, TMP0, CRET1
+ | sltu TMP0, TMP1, TMP0
+ | or TMP1, AT, TMP0
+ | bnez TMP1, >9 // Either arg is NaN: return 0;
+ |. and AT, SFARG1HI, SFARG2HI
+ | xor AT, AT, TMP3
+ | bltz AT, >5 // Both args negative?
+ |. nop
+ | beq SFARG1HI, SFARG2HI, >6
+ |. sltu CRET1, SFARG2LO, SFARG1LO
+ | jr ra
+ |. slt CRET1, SFARG2HI, SFARG1HI
+ |5: // Swap conditions if both operands are negative.
+ | beq SFARG1HI, SFARG2HI, >6
+ |. sltu CRET1, SFARG1LO, SFARG2LO
+ | slt CRET1, SFARG1HI, SFARG2HI
+ |6:
+ | jr ra
+ |. nop
+ |8:
+ | jr ra
+ |. li CRET1, 1
+ |9:
+ | jr ra
+ |. li CRET1, 0
+ |.endif
+ |
+ |.macro sfmin_max, name, fpcall
+ |->vm_sf .. name:
+ |.if JIT and not FPU
+ | move TMP2, ra
+ | bal ->fpcall
+ |. nop
+ | move TMP0, CRET1
+ | move SFRETHI, SFARG1HI
+ | move SFRETLO, SFARG1LO
+ | move ra, TMP2
+ | movz SFRETHI, SFARG2HI, TMP0
+ | jr ra
+ |. movz SFRETLO, SFARG2LO, TMP0
+ |.endif
+ |.endmacro
+ |
+ | sfmin_max min, vm_sfcmpolt
+ | sfmin_max max, vm_sfcmpogt
+ |
|//-----------------------------------------------------------------------
|//-- Miscellaneous functions --------------------------------------------
|//-----------------------------------------------------------------------
|
+ |.define NEXT_TAB, TAB:CARG1
+ |.define NEXT_IDX, CARG2
+ |.define NEXT_ASIZE, CARG3
+ |.define NEXT_NIL, CARG4
+ |.define NEXT_TMP0, r12
+ |.define NEXT_TMP1, r13
+ |.define NEXT_TMP2, r14
+ |.define NEXT_RES_VK, CRET1
+ |.define NEXT_RES_IDX, CRET2
+ |.define NEXT_RES_PTR, sp
+ |.define NEXT_RES_VAL_I, 0(sp)
+ |.define NEXT_RES_VAL_IT, 4(sp)
+ |.define NEXT_RES_KEY_I, 8(sp)
+ |.define NEXT_RES_KEY_IT, 12(sp)
+ |
+ |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
+ |// Next idx returned in CRET2.
+ |->vm_next:
+ |.if JIT and ENDIAN_LE
+ | lw NEXT_ASIZE, NEXT_TAB->asize
+ | lw NEXT_TMP0, NEXT_TAB->array
+ | li NEXT_NIL, LJ_TNIL
+ |1: // Traverse array part.
+ | sltu AT, NEXT_IDX, NEXT_ASIZE
+ | sll NEXT_TMP1, NEXT_IDX, 3
+ | beqz AT, >5
+ |. addu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1
+ | lw NEXT_TMP2, 4(NEXT_TMP1)
+ | sw NEXT_IDX, NEXT_RES_KEY_I
+ | beq NEXT_TMP2, NEXT_NIL, <1
+ |. addiu NEXT_IDX, NEXT_IDX, 1
+ | lw NEXT_TMP0, 0(NEXT_TMP1)
+ | li AT, LJ_TISNUM
+ | sw NEXT_TMP2, NEXT_RES_VAL_IT
+ | sw AT, NEXT_RES_KEY_IT
+ | sw NEXT_TMP0, NEXT_RES_VAL_I
+ | move NEXT_RES_VK, NEXT_RES_PTR
+ | jr ra
+ |. move NEXT_RES_IDX, NEXT_IDX
+ |
+ |5: // Traverse hash part.
+ | subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE
+ | lw NODE:NEXT_RES_VK, NEXT_TAB->node
+ | sll NEXT_TMP2, NEXT_RES_IDX, 5
+ | lw NEXT_TMP0, NEXT_TAB->hmask
+ | sll AT, NEXT_RES_IDX, 3
+ | subu AT, NEXT_TMP2, AT
+ | addu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT
+ |6:
+ | sltu AT, NEXT_TMP0, NEXT_RES_IDX
+ | bnez AT, >8
+ |. nop
+ | lw NEXT_TMP2, NODE:NEXT_RES_VK->val.it
+ | bne NEXT_TMP2, NEXT_NIL, >9
+ |. addiu NEXT_RES_IDX, NEXT_RES_IDX, 1
+ | // Skip holes in hash part.
+ | b <6
+ |. addiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node)
+ |
+ |8: // End of iteration. Set the key to nil (not the value).
+ | sw NEXT_NIL, NEXT_RES_KEY_IT
+ | move NEXT_RES_VK, NEXT_RES_PTR
+ |9:
+ | jr ra
+ |. addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE
+ |.endif
+ |
|//-----------------------------------------------------------------------
|//-- FFI helper functions -----------------------------------------------
|//-----------------------------------------------------------------------
@@ -2243,10 +2880,10 @@ static void build_subroutines(BuildCtx *ctx)
| sw r1, CTSTATE->cb.slot
| sw CARG1, CTSTATE->cb.gpr[0]
| sw CARG2, CTSTATE->cb.gpr[1]
- | sdc1 FARG1, CTSTATE->cb.fpr[0]
+ | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0]
| sw CARG3, CTSTATE->cb.gpr[2]
| sw CARG4, CTSTATE->cb.gpr[3]
- | sdc1 FARG2, CTSTATE->cb.fpr[1]
+ | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1]
| addiu TMP0, sp, CFRAME_SPACE+16
| sw TMP0, CTSTATE->cb.stack
| sw r0, SAVE_PC // Any value outside of bytecode is ok.
@@ -2256,15 +2893,16 @@ static void build_subroutines(BuildCtx *ctx)
| // Returns lua_State *.
| lw BASE, L:CRET1->base
| lw RC, L:CRET1->top
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
| move L, CRET1
- | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| lw LFUNC:RB, FRAME_FUNC(BASE)
- | mtc1 TMP3, TOBIT
+ | .FPU mtc1 TMP3, TOBIT
| li_vmstate INTERP
| li TISNIL, LJ_TNIL
| subu RC, RC, BASE
| st_vmstate
- | cvt.d.s TOBIT, TOBIT
+ | .FPU cvt.d.s TOBIT, TOBIT
| ins_callt
|.endif
|
@@ -2278,11 +2916,11 @@ static void build_subroutines(BuildCtx *ctx)
| move CARG2, RA
| call_intern lj_ccallback_leave // (CTState *cts, TValue *o)
|. move CARG1, CTSTATE
+ | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0]
| lw CRET1, CTSTATE->cb.gpr[0]
- | ldc1 FRET1, CTSTATE->cb.fpr[0]
- | lw CRET2, CTSTATE->cb.gpr[1]
+ | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1]
| b ->vm_leave_unw
- |. ldc1 FRET2, CTSTATE->cb.fpr[1]
+ |. lw CRET2, CTSTATE->cb.gpr[1]
|.endif
|
|->vm_ffi_call: // Call C function via FFI.
@@ -2314,8 +2952,8 @@ static void build_subroutines(BuildCtx *ctx)
| lw CARG2, CCSTATE->gpr[1]
| lw CARG3, CCSTATE->gpr[2]
| lw CARG4, CCSTATE->gpr[3]
- | ldc1 FARG1, CCSTATE->fpr[0]
- | ldc1 FARG2, CCSTATE->fpr[1]
+ | .FPU ldc1 FARG1, CCSTATE->fpr[0]
+ | .FPU ldc1 FARG2, CCSTATE->fpr[1]
| jalr CFUNCADDR
|. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
| lw CCSTATE:TMP1, -12(r16)
@@ -2323,8 +2961,13 @@ static void build_subroutines(BuildCtx *ctx)
| lw ra, -4(r16)
| sw CRET1, CCSTATE:TMP1->gpr[0]
| sw CRET2, CCSTATE:TMP1->gpr[1]
+ |.if FPU
| sdc1 FRET1, CCSTATE:TMP1->fpr[0]
| sdc1 FRET2, CCSTATE:TMP1->fpr[1]
+ |.else
+ | sw CARG1, CCSTATE:TMP1->gpr[2] // Soft-float: complex double .im part.
+ | sw CARG2, CCSTATE:TMP1->gpr[3]
+ |.endif
| move sp, r16
| jr ra
|. move r16, TMP2
@@ -2348,82 +2991,143 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
| // RA = src1*8, RD = src2*8, JMP with RD = target
- | addu CARG2, BASE, RA
- | addu CARG3, BASE, RD
- | lw TMP0, HI(CARG2)
- | lw TMP1, HI(CARG3)
- | ldc1 f0, 0(CARG2)
- | ldc1 f2, 0(CARG3)
- | sltiu TMP0, TMP0, LJ_TISNUM
- | sltiu TMP1, TMP1, LJ_TISNUM
+ |.macro bc_comp, FRA, FRD, RAHI, RALO, RDHI, RDLO, movop, fmovop, fcomp, sfcomp
+ | addu RA, BASE, RA
+ | addu RD, BASE, RD
+ | lw RAHI, HI(RA)
+ | lw RDHI, HI(RD)
| lhu TMP2, OFS_RD(PC)
- | and TMP0, TMP0, TMP1
| addiu PC, PC, 4
- | beqz TMP0, ->vmeta_comp
- |. lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535)
- | decode_RD4b TMP2
- | addu TMP2, TMP2, TMP1
- if (op == BC_ISLT || op == BC_ISGE) {
- | c.olt.d f0, f2
- } else {
- | c.ole.d f0, f2
- }
- if (op == BC_ISLT || op == BC_ISLE) {
- | movf TMP2, r0
- } else {
- | movt TMP2, r0
- }
- | addu PC, PC, TMP2
+ | bne RAHI, TISNUM, >2
+ |. lw RALO, LO(RA)
+ | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ | lw RDLO, LO(RD)
+ | bne RDHI, TISNUM, >5
+ |. decode_RD4b TMP2
+ | slt AT, SFARG1LO, SFARG2LO
+ | addu TMP2, TMP2, TMP3
+ | movop TMP2, r0, AT
|1:
+ | addu PC, PC, TMP2
| ins_next
+ |
+ |2: // RA is not an integer.
+ | sltiu AT, RAHI, LJ_TISNUM
+ | beqz AT, ->vmeta_comp
+ |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ | sltiu AT, RDHI, LJ_TISNUM
+ |.if FPU
+ | ldc1 FRA, 0(RA)
+ | ldc1 FRD, 0(RD)
+ |.else
+ | lw RDLO, LO(RD)
+ |.endif
+ | beqz AT, >4
+ |. decode_RD4b TMP2
+ |3: // RA and RD are both numbers.
+ |.if FPU
+ | fcomp f20, f22
+ | addu TMP2, TMP2, TMP3
+ | b <1
+ |. fmovop TMP2, r0
+ |.else
+ | bal sfcomp
+ |. addu TMP2, TMP2, TMP3
+ | b <1
+ |. movop TMP2, r0, CRET1
+ |.endif
+ |
+ |4: // RA is a number, RD is not a number.
+ | bne RDHI, TISNUM, ->vmeta_comp
+ | // RA is a number, RD is an integer. Convert RD to a number.
+ |.if FPU
+ |. lwc1 FRD, LO(RD)
+ | b <3
+ |. cvt.d.w FRD, FRD
+ |.else
+ |. nop
+ |.if "RDHI" == "SFARG1HI"
+ | bal ->vm_sfi2d_1
+ |.else
+ | bal ->vm_sfi2d_2
+ |.endif
+ |. nop
+ | b <3
+ |. nop
+ |.endif
+ |
+ |5: // RA is an integer, RD is not an integer
+ | sltiu AT, RDHI, LJ_TISNUM
+ | beqz AT, ->vmeta_comp
+ | // RA is an integer, RD is a number. Convert RA to a number.
+ |.if FPU
+ |. mtc1 RALO, FRA
+ | ldc1 FRD, 0(RD)
+ | b <3
+ | cvt.d.w FRA, FRA
+ |.else
+ |. nop
+ |.if "RAHI" == "SFARG1HI"
+ | bal ->vm_sfi2d_1
+ |.else
+ | bal ->vm_sfi2d_2
+ |.endif
+ |. nop
+ | b <3
+ |. nop
+ |.endif
+ |.endmacro
+ |
+ if (op == BC_ISLT) {
+ | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movz, movf, c.olt.d, ->vm_sfcmpolt
+ } else if (op == BC_ISGE) {
+ | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movn, movt, c.olt.d, ->vm_sfcmpolt
+ } else if (op == BC_ISLE) {
+ | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movn, movt, c.ult.d, ->vm_sfcmpult
+ } else {
+ | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movz, movf, c.ult.d, ->vm_sfcmpult
+ }
break;
case BC_ISEQV: case BC_ISNEV:
vk = op == BC_ISEQV;
| // RA = src1*8, RD = src2*8, JMP with RD = target
| addu RA, BASE, RA
- | addiu PC, PC, 4
- | lw TMP0, HI(RA)
- | ldc1 f0, 0(RA)
+ | addiu PC, PC, 4
| addu RD, BASE, RD
+ | lw SFARG1HI, HI(RA)
| lhu TMP2, -4+OFS_RD(PC)
- | lw TMP1, HI(RD)
- | ldc1 f2, 0(RD)
+ | lw SFARG2HI, HI(RD)
| lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
- | sltiu AT, TMP0, LJ_TISNUM
- | sltiu CARG1, TMP1, LJ_TISNUM
- | decode_RD4b TMP2
- | and AT, AT, CARG1
- | beqz AT, >5
- |. addu TMP2, TMP2, TMP3
- | c.eq.d f0, f2
+ | sltu AT, TISNUM, SFARG1HI
+ | sltu TMP0, TISNUM, SFARG2HI
+ | or AT, AT, TMP0
if (vk) {
- | movf TMP2, r0
+ | beqz AT, ->BC_ISEQN_Z
} else {
- | movt TMP2, r0
+ | beqz AT, ->BC_ISNEN_Z
}
- |1:
- | addu PC, PC, TMP2
- | ins_next
- |5: // Either or both types are not numbers.
- | lw CARG2, LO(RA)
- | lw CARG3, LO(RD)
+ |. decode_RD4b TMP2
+ | // Either or both types are not numbers.
+ | lw SFARG1LO, LO(RA)
+ | lw SFARG2LO, LO(RD)
+ | addu TMP2, TMP2, TMP3
|.if FFI
| li TMP3, LJ_TCDATA
- | beq TMP0, TMP3, ->vmeta_equal_cd
+ | beq SFARG1HI, TMP3, ->vmeta_equal_cd
|.endif
- |. sltiu AT, TMP0, LJ_TISPRI // Not a primitive?
+ |. sltiu AT, SFARG1HI, LJ_TISPRI // Not a primitive?
|.if FFI
- | beq TMP1, TMP3, ->vmeta_equal_cd
+ | beq SFARG2HI, TMP3, ->vmeta_equal_cd
|.endif
- |. xor TMP3, CARG2, CARG3 // Same tv?
- | xor TMP1, TMP1, TMP0 // Same type?
- | sltiu CARG1, TMP0, LJ_TISTABUD+1 // Table or userdata?
+ |. xor TMP3, SFARG1LO, SFARG2LO // Same tv?
+ | xor SFARG2HI, SFARG2HI, SFARG1HI // Same type?
+ | sltiu TMP0, SFARG1HI, LJ_TISTABUD+1 // Table or userdata?
| movz TMP3, r0, AT // Ignore tv if primitive.
- | movn CARG1, r0, TMP1 // Tab/ud and same type?
- | or AT, TMP1, TMP3 // Same type && (pri||same tv).
- | movz CARG1, r0, AT
- | beqz CARG1, <1 // Done if not tab/ud or not same type or same tv.
+ | movn TMP0, r0, SFARG2HI // Tab/ud and same type?
+ | or AT, SFARG2HI, TMP3 // Same type && (pri||same tv).
+ | movz TMP0, r0, AT
+ | beqz TMP0, >1 // Done if not tab/ud or not same type or same tv.
if (vk) {
|. movn TMP2, r0, AT
} else {
@@ -2431,15 +3135,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
}
| // Different tables or userdatas. Need to check __eq metamethod.
| // Field metatable must be at same offset for GCtab and GCudata!
- | lw TAB:TMP1, TAB:CARG2->metatable
- | beqz TAB:TMP1, <1 // No metatable?
+ | lw TAB:TMP1, TAB:SFARG1LO->metatable
+ | beqz TAB:TMP1, >1 // No metatable?
|. nop
| lbu TMP1, TAB:TMP1->nomm
| andi TMP1, TMP1, 1<<MM_eq
- | bnez TMP1, <1 // Or 'no __eq' flag set?
+ | bnez TMP1, >1 // Or 'no __eq' flag set?
|. nop
| b ->vmeta_equal // Handle __eq metamethod.
- |. li CARG4, 1-vk // ne = 0 or 1.
+ |. li TMP0, 1-vk // ne = 0 or 1.
+ |1:
+ | addu PC, PC, TMP2
+ | ins_next
break;
case BC_ISEQS: case BC_ISNES:
@@ -2476,38 +3183,124 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
vk = op == BC_ISEQN;
| // RA = src*8, RD = num_const*8, JMP with RD = target
| addu RA, BASE, RA
- | addiu PC, PC, 4
- | lw TMP0, HI(RA)
- | ldc1 f0, 0(RA)
- | addu RD, KBASE, RD
- | lhu TMP2, -4+OFS_RD(PC)
- | ldc1 f2, 0(RD)
+ | addu RD, KBASE, RD
+ | lw SFARG1HI, HI(RA)
+ | lw SFARG2HI, HI(RD)
+ | lhu TMP2, OFS_RD(PC)
+ | addiu PC, PC, 4
| lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
- | sltiu AT, TMP0, LJ_TISNUM
| decode_RD4b TMP2
- |.if FFI
- | beqz AT, >5
- |.else
- | beqz AT, >1
- |.endif
- |. addu TMP2, TMP2, TMP3
- | c.eq.d f0, f2
if (vk) {
- | movf TMP2, r0
- | addu PC, PC, TMP2
+ |->BC_ISEQN_Z:
+ } else {
+ |->BC_ISNEN_Z:
+ }
+ | bne SFARG1HI, TISNUM, >3
+ |. lw SFARG1LO, LO(RA)
+ | lw SFARG2LO, LO(RD)
+ | addu TMP2, TMP2, TMP3
+ | bne SFARG2HI, TISNUM, >6
+ |. xor AT, SFARG1LO, SFARG2LO
+ if (vk) {
+ | movn TMP2, r0, AT
|1:
+ | addu PC, PC, TMP2
+ |2:
} else {
- | movt TMP2, r0
+ | movz TMP2, r0, AT
|1:
+ |2:
| addu PC, PC, TMP2
}
| ins_next
+ |
+ |3: // RA is not an integer.
+ | sltiu AT, SFARG1HI, LJ_TISNUM
|.if FFI
- |5:
- | li AT, LJ_TCDATA
- | beq TMP0, AT, ->vmeta_equal_cd
+ | beqz AT, >8
+ |.else
+ | beqz AT, <2
+ |.endif
+ |. addu TMP2, TMP2, TMP3
+ | sltiu AT, SFARG2HI, LJ_TISNUM
+ |.if FPU
+ | ldc1 f20, 0(RA)
+ | ldc1 f22, 0(RD)
+ |.endif
+ | beqz AT, >5
+ |. lw SFARG2LO, LO(RD)
+ |4: // RA and RD are both numbers.
+ |.if FPU
+ | c.eq.d f20, f22
+ | b <1
+ if (vk) {
+ |. movf TMP2, r0
+ } else {
+ |. movt TMP2, r0
+ }
+ |.else
+ | bal ->vm_sfcmpeq
|. nop
| b <1
+ if (vk) {
+ |. movz TMP2, r0, CRET1
+ } else {
+ |. movn TMP2, r0, CRET1
+ }
+ |.endif
+ |
+ |5: // RA is a number, RD is not a number.
+ |.if FFI
+ | bne SFARG2HI, TISNUM, >9
+ |.else
+ | bne SFARG2HI, TISNUM, <2
+ |.endif
+ | // RA is a number, RD is an integer. Convert RD to a number.
+ |.if FPU
+ |. lwc1 f22, LO(RD)
+ | b <4
+ |. cvt.d.w f22, f22
+ |.else
+ |. nop
+ | bal ->vm_sfi2d_2
+ |. nop
+ | b <4
+ |. nop
+ |.endif
+ |
+ |6: // RA is an integer, RD is not an integer
+ | sltiu AT, SFARG2HI, LJ_TISNUM
+ |.if FFI
+ | beqz AT, >9
+ |.else
+ | beqz AT, <2
+ |.endif
+ | // RA is an integer, RD is a number. Convert RA to a number.
+ |.if FPU
+ |. mtc1 SFARG1LO, f20
+ | ldc1 f22, 0(RD)
+ | b <4
+ | cvt.d.w f20, f20
+ |.else
+ |. nop
+ | bal ->vm_sfi2d_1
+ |. nop
+ | b <4
+ |. nop
+ |.endif
+ |
+ |.if FFI
+ |8:
+ | li AT, LJ_TCDATA
+ | bne SFARG1HI, AT, <2
+ |. nop
+ | b ->vmeta_equal_cd
+ |. nop
+ |9:
+ | li AT, LJ_TCDATA
+ | bne SFARG2HI, AT, <2
+ |. nop
+ | b ->vmeta_equal_cd
|. nop
|.endif
break;
@@ -2559,7 +3352,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addu PC, PC, TMP2
} else {
| sltiu TMP0, TMP0, LJ_TISTRUECOND
- | ldc1 f0, 0(RD)
+ | lw SFRETHI, HI(RD)
+ | lw SFRETLO, LO(RD)
if (op == BC_ISTC) {
| beqz TMP0, >1
} else {
@@ -2569,22 +3363,45 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| decode_RD4b TMP2
| lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
| addu TMP2, TMP2, TMP3
- | sdc1 f0, 0(RA)
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
| addu PC, PC, TMP2
|1:
}
| ins_next
break;
+ case BC_ISTYPE:
+ | // RA = src*8, RD = -type*8
+ | addu TMP2, BASE, RA
+ | srl TMP1, RD, 3
+ | lw TMP0, HI(TMP2)
+ | ins_next1
+ | addu AT, TMP0, TMP1
+ | bnez AT, ->vmeta_istype
+ |. ins_next2
+ break;
+ case BC_ISNUM:
+ | // RA = src*8, RD = -(TISNUM-1)*8
+ | addu TMP2, BASE, RA
+ | lw TMP0, HI(TMP2)
+ | ins_next1
+ | sltiu AT, TMP0, LJ_TISNUM
+ | beqz AT, ->vmeta_istype
+ |. ins_next2
+ break;
+
/* -- Unary ops --------------------------------------------------------- */
case BC_MOV:
| // RA = dst*8, RD = src*8
| addu RD, BASE, RD
- | addu RA, BASE, RA
- | ldc1 f0, 0(RD)
+ | addu RA, BASE, RA
+ | lw SFRETHI, HI(RD)
+ | lw SFRETLO, LO(RD)
| ins_next1
- | sdc1 f0, 0(RA)
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
| ins_next2
break;
case BC_NOT:
@@ -2601,16 +3418,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_UNM:
| // RA = dst*8, RD = src*8
- | addu CARG3, BASE, RD
+ | addu RB, BASE, RD
+ | lw SFARG1HI, HI(RB)
| addu RA, BASE, RA
- | lw TMP0, HI(CARG3)
- | ldc1 f0, 0(CARG3)
- | sltiu AT, TMP0, LJ_TISNUM
- | beqz AT, ->vmeta_unm
- |. neg.d f0, f0
+ | bne SFARG1HI, TISNUM, >2
+ |. lw SFARG1LO, LO(RB)
+ | lui TMP1, 0x8000
+ | beq SFARG1LO, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
+ |. negu SFARG1LO, SFARG1LO
+ |1:
| ins_next1
- | sdc1 f0, 0(RA)
+ | sw SFARG1HI, HI(RA)
+ | sw SFARG1LO, LO(RA)
| ins_next2
+ |2:
+ | sltiu AT, SFARG1HI, LJ_TISNUM
+ | beqz AT, ->vmeta_unm
+ |. lui TMP1, 0x8000
+ | b <1
+ |. xor SFARG1HI, SFARG1HI, TMP1
break;
case BC_LEN:
| // RA = dst*8, RD = src*8
@@ -2621,12 +3447,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| li AT, LJ_TSTR
| bne TMP0, AT, >2
|. li AT, LJ_TTAB
- | lw CRET1, STR:CARG1->len
+ | lw CRET1, STR:CARG1->len
|1:
- | mtc1 CRET1, f0
- | cvt.d.w f0, f0
| ins_next1
- | sdc1 f0, 0(RA)
+ | sw TISNUM, HI(RA)
+ | sw CRET1, LO(RA)
| ins_next2
|2:
| bne TMP0, AT, ->vmeta_len
@@ -2657,104 +3482,232 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
/* -- Binary ops -------------------------------------------------------- */
- |.macro ins_arithpre
+ |.macro fpmod, a, b, c
+ | bal ->vm_floor // floor(b/c)
+ |. div.d FARG1, b, c
+ | mul.d a, FRET1, c
+ | sub.d a, b, a // b - floor(b/c)*c
+ |.endmacro
+
+ |.macro sfpmod
+ | addiu sp, sp, -16
+ |
+ | load_got __divdf3
+ | sw SFARG1HI, HI(sp)
+ | sw SFARG1LO, LO(sp)
+ | sw SFARG2HI, 8+HI(sp)
+ | call_extern
+ |. sw SFARG2LO, 8+LO(sp)
+ |
+ | load_got floor
+ | move SFARG1HI, SFRETHI
+ | call_extern
+ |. move SFARG1LO, SFRETLO
+ |
+ | load_got __muldf3
+ | move SFARG1HI, SFRETHI
+ | move SFARG1LO, SFRETLO
+ | lw SFARG2HI, 8+HI(sp)
+ | call_extern
+ |. lw SFARG2LO, 8+LO(sp)
+ |
+ | load_got __subdf3
+ | lw SFARG1HI, HI(sp)
+ | lw SFARG1LO, LO(sp)
+ | move SFARG2HI, SFRETHI
+ | call_extern
+ |. move SFARG2LO, SFRETLO
+ |
+ | addiu sp, sp, 16
+ |.endmacro
+
+ |.macro ins_arithpre, label
||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
- | decode_RB8a RB, INS
- | decode_RB8b RB
- | decode_RDtoRC8 RC, RD
| // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
||switch (vk) {
||case 0:
- | addu CARG3, BASE, RB
- | addu CARG4, KBASE, RC
- | lw TMP1, HI(CARG3)
- | ldc1 f20, 0(CARG3)
- | ldc1 f22, 0(CARG4)
- | sltiu AT, TMP1, LJ_TISNUM
+ | decode_RB8a RB, INS
+ | decode_RB8b RB
+ | decode_RDtoRC8 RC, RD
+ | // RA = dst*8, RB = src1*8, RC = num_const*8
+ | addu RB, BASE, RB
+ |.if "label" ~= "none"
+ | b label
+ |.endif
+ |. addu RC, KBASE, RC
|| break;
||case 1:
- | addu CARG4, BASE, RB
- | addu CARG3, KBASE, RC
- | lw TMP1, HI(CARG4)
- | ldc1 f22, 0(CARG4)
- | ldc1 f20, 0(CARG3)
- | sltiu AT, TMP1, LJ_TISNUM
+ | decode_RB8a RC, INS
+ | decode_RB8b RC
+ | decode_RDtoRC8 RB, RD
+ | // RA = dst*8, RB = num_const*8, RC = src1*8
+ | addu RC, BASE, RC
+ |.if "label" ~= "none"
+ | b label
+ |.endif
+ |. addu RB, KBASE, RB
|| break;
||default:
- | addu CARG3, BASE, RB
- | addu CARG4, BASE, RC
- | lw TMP1, HI(CARG3)
- | lw TMP2, HI(CARG4)
- | ldc1 f20, 0(CARG3)
- | ldc1 f22, 0(CARG4)
- | sltiu AT, TMP1, LJ_TISNUM
- | sltiu TMP0, TMP2, LJ_TISNUM
- | and AT, AT, TMP0
+ | decode_RB8a RB, INS
+ | decode_RB8b RB
+ | decode_RDtoRC8 RC, RD
+ | // RA = dst*8, RB = src1*8, RC = src2*8
+ | addu RB, BASE, RB
+ |.if "label" ~= "none"
+ | b label
+ |.endif
+ |. addu RC, BASE, RC
|| break;
||}
- | beqz AT, ->vmeta_arith
- |. addu RA, BASE, RA
|.endmacro
|
- |.macro fpmod, a, b, c
- |->BC_MODVN_Z:
- | bal ->vm_floor // floor(b/c)
- |. div.d FARG1, b, c
- | mul.d a, FRET1, c
- | sub.d a, b, a // b - floor(b/c)*c
- |.endmacro
+ |.macro ins_arith, intins, fpins, fpcall, label
+ | ins_arithpre none
|
- |.macro ins_arith, ins
- | ins_arithpre
- |.if "ins" == "fpmod_"
- | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
- |. nop
+ |.if "label" ~= "none"
+ |label:
+ |.endif
+ |
+ | lw SFARG1HI, HI(RB)
+ | lw SFARG2HI, HI(RC)
+ |
+ |.if "intins" ~= "div"
+ |
+ | // Check for two integers.
+ | lw SFARG1LO, LO(RB)
+ | bne SFARG1HI, TISNUM, >5
+ |. lw SFARG2LO, LO(RC)
+ | bne SFARG2HI, TISNUM, >5
+ |
+ |.if "intins" == "addu"
+ |. intins CRET1, SFARG1LO, SFARG2LO
+ | xor TMP1, CRET1, SFARG1LO // ((y^a) & (y^b)) < 0: overflow.
+ | xor TMP2, CRET1, SFARG2LO
+ | and TMP1, TMP1, TMP2
+ | bltz TMP1, ->vmeta_arith
+ |. addu RA, BASE, RA
+ |.elif "intins" == "subu"
+ |. intins CRET1, SFARG1LO, SFARG2LO
+ | xor TMP1, CRET1, SFARG1LO // ((y^a) & (a^b)) < 0: overflow.
+ | xor TMP2, SFARG1LO, SFARG2LO
+ | and TMP1, TMP1, TMP2
+ | bltz TMP1, ->vmeta_arith
+ |. addu RA, BASE, RA
+ |.elif "intins" == "mult"
+ |. intins SFARG1LO, SFARG2LO
+ | mflo CRET1
+ | mfhi TMP2
+ | sra TMP1, CRET1, 31
+ | bne TMP1, TMP2, ->vmeta_arith
+ |. addu RA, BASE, RA
|.else
- | ins f0, f20, f22
+ |. load_got lj_vm_modi
+ | beqz SFARG2LO, ->vmeta_arith
+ |. addu RA, BASE, RA
+ |.if ENDIAN_BE
+ | move CARG1, SFARG1LO
+ |.endif
+ | call_extern
+ |. move CARG2, SFARG2LO
+ |.endif
+ |
+ | ins_next1
+ | sw TISNUM, HI(RA)
+ | sw CRET1, LO(RA)
+ |3:
+ | ins_next2
+ |
+ |.elif not FPU
+ |
+ | lw SFARG1LO, LO(RB)
+ | lw SFARG2LO, LO(RC)
+ |
+ |.endif
+ |
+ |5: // Check for two numbers.
+ | .FPU ldc1 f20, 0(RB)
+ | sltiu AT, SFARG1HI, LJ_TISNUM
+ | sltiu TMP0, SFARG2HI, LJ_TISNUM
+ | .FPU ldc1 f22, 0(RC)
+ | and AT, AT, TMP0
+ | beqz AT, ->vmeta_arith
+ |. addu RA, BASE, RA
+ |
+ |.if FPU
+ | fpins FRET1, f20, f22
+ |.elif "fpcall" == "sfpmod"
+ | sfpmod
+ |.else
+ | load_got fpcall
+ | call_extern
+ |. nop
+ |.endif
+ |
| ins_next1
- | sdc1 f0, 0(RA)
+ |.if not FPU
+ | sw SFRETHI, HI(RA)
+ |.endif
+ |.if "intins" ~= "div"
+ | b <3
+ |.endif
+ |.if FPU
+ |. sdc1 FRET1, 0(RA)
+ |.else
+ |. sw SFRETLO, LO(RA)
+ |.endif
+ |.if "intins" == "div"
| ins_next2
|.endif
+ |
|.endmacro
case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
- | ins_arith add.d
+ | ins_arith addu, add.d, __adddf3, none
break;
case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
- | ins_arith sub.d
+ | ins_arith subu, sub.d, __subdf3, none
break;
case BC_MULVN: case BC_MULNV: case BC_MULVV:
- | ins_arith mul.d
+ | ins_arith mult, mul.d, __muldf3, none
+ break;
+ case BC_DIVVN:
+ | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z
break;
- case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
- | ins_arith div.d
+ case BC_DIVNV: case BC_DIVVV:
+ | ins_arithpre ->BC_DIVVN_Z
break;
case BC_MODVN:
- | ins_arith fpmod
+ | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z
break;
case BC_MODNV: case BC_MODVV:
- | ins_arith fpmod_
+ | ins_arithpre ->BC_MODVN_Z
break;
case BC_POW:
- | decode_RB8a RB, INS
- | decode_RB8b RB
- | decode_RDtoRC8 RC, RD
- | addu CARG3, BASE, RB
- | addu CARG4, BASE, RC
- | lw TMP1, HI(CARG3)
- | lw TMP2, HI(CARG4)
- | ldc1 FARG1, 0(CARG3)
- | ldc1 FARG2, 0(CARG4)
- | sltiu AT, TMP1, LJ_TISNUM
- | sltiu TMP0, TMP2, LJ_TISNUM
+ | ins_arithpre none
+ | lw SFARG1HI, HI(RB)
+ | lw SFARG2HI, HI(RC)
+ | sltiu AT, SFARG1HI, LJ_TISNUM
+ | sltiu TMP0, SFARG2HI, LJ_TISNUM
| and AT, AT, TMP0
| load_got pow
| beqz AT, ->vmeta_arith
|. addu RA, BASE, RA
+ |.if FPU
+ | ldc1 FARG1, 0(RB)
+ | ldc1 FARG2, 0(RC)
+ |.else
+ | lw SFARG1LO, LO(RB)
+ | lw SFARG2LO, LO(RC)
+ |.endif
| call_extern
|. nop
| ins_next1
+ |.if FPU
| sdc1 FRET1, 0(RA)
+ |.else
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
+ |.endif
| ins_next2
break;
@@ -2777,10 +3730,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| bnez CRET1, ->vmeta_binop
|. lw BASE, L->base
| addu RB, BASE, MULTRES
- | ldc1 f0, 0(RB)
+ | lw SFRETHI, HI(RB)
+ | lw SFRETLO, LO(RB)
| addu RA, BASE, RA
| ins_next1
- | sdc1 f0, 0(RA) // Copy result from RB to RA.
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
| ins_next2
break;
@@ -2815,20 +3770,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_KSHORT:
| // RA = dst*8, RD = int16_literal*8
| sra RD, INS, 16
- | mtc1 RD, f0
| addu RA, BASE, RA
- | cvt.d.w f0, f0
| ins_next1
- | sdc1 f0, 0(RA)
+ | sw TISNUM, HI(RA)
+ | sw RD, LO(RA)
| ins_next2
break;
case BC_KNUM:
| // RA = dst*8, RD = num_const*8
| addu RD, KBASE, RD
| addu RA, BASE, RA
- | ldc1 f0, 0(RD)
+ | lw SFRETHI, HI(RD)
+ | lw SFRETLO, LO(RD)
| ins_next1
- | sdc1 f0, 0(RA)
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
| ins_next2
break;
case BC_KPRI:
@@ -2864,9 +3820,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lw UPVAL:RB, LFUNC:RD->uvptr
| ins_next1
| lw TMP1, UPVAL:RB->v
- | ldc1 f0, 0(TMP1)
+ | lw SFRETHI, HI(TMP1)
+ | lw SFRETLO, LO(TMP1)
| addu RA, BASE, RA
- | sdc1 f0, 0(RA)
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
| ins_next2
break;
case BC_USETV:
@@ -2875,26 +3833,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| srl RA, RA, 1
| addu RD, BASE, RD
| addu RA, RA, LFUNC:RB
- | ldc1 f0, 0(RD)
| lw UPVAL:RB, LFUNC:RA->uvptr
+ | lw SFRETHI, HI(RD)
+ | lw SFRETLO, LO(RD)
| lbu TMP3, UPVAL:RB->marked
| lw CARG2, UPVAL:RB->v
| andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
| lbu TMP0, UPVAL:RB->closed
- | lw TMP2, HI(RD)
- | sdc1 f0, 0(CARG2)
+ | sw SFRETHI, HI(CARG2)
+ | sw SFRETLO, LO(CARG2)
| li AT, LJ_GC_BLACK|1
| or TMP3, TMP3, TMP0
| beq TMP3, AT, >2 // Upvalue is closed and black?
- |. addiu TMP2, TMP2, -(LJ_TNUMX+1)
+ |. addiu TMP2, SFRETHI, -(LJ_TNUMX+1)
|1:
| ins_next
|
|2: // Check if new value is collectable.
| sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
| beqz AT, <1 // tvisgcv(v)
- |. lw TMP1, LO(RD)
- | lbu TMP3, GCOBJ:TMP1->gch.marked
+ |. nop
+ | lbu TMP3, GCOBJ:SFRETLO->gch.marked
| andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
| beqz TMP3, <1
|. load_got lj_gc_barrieruv
@@ -2942,11 +3901,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| srl RA, RA, 1
| addu RD, KBASE, RD
| addu RA, RA, LFUNC:RB
- | ldc1 f0, 0(RD)
- | lw UPVAL:RB, LFUNC:RA->uvptr
+ | lw UPVAL:RB, LFUNC:RA->uvptr
+ | lw SFRETHI, HI(RD)
+ | lw SFRETLO, LO(RD)
+ | lw TMP1, UPVAL:RB->v
| ins_next1
- | lw TMP1, UPVAL:RB->v
- | sdc1 f0, 0(TMP1)
+ | sw SFRETHI, HI(TMP1)
+ | sw SFRETLO, LO(TMP1)
| ins_next2
break;
case BC_USETP:
@@ -2956,10 +3917,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| srl TMP0, RD, 3
| addu RA, RA, LFUNC:RB
| not TMP0, TMP0
- | lw UPVAL:RB, LFUNC:RA->uvptr
+ | lw UPVAL:RB, LFUNC:RA->uvptr
| ins_next1
- | lw TMP1, UPVAL:RB->v
- | sw TMP0, HI(TMP1)
+ | lw TMP1, UPVAL:RB->v
+ | sw TMP0, HI(TMP1)
| ins_next2
break;
@@ -2995,8 +3956,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| li TMP0, LJ_TFUNC
| ins_next1
| addu RA, BASE, RA
- | sw TMP0, HI(RA)
| sw LFUNC:CRET1, LO(RA)
+ | sw TMP0, HI(RA)
| ins_next2
break;
@@ -3077,31 +4038,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lw TMP2, HI(CARG3)
| lw TAB:RB, LO(CARG2)
| li AT, LJ_TTAB
- | ldc1 f0, 0(CARG3)
| bne TMP1, AT, ->vmeta_tgetv
|. addu RA, BASE, RA
- | sltiu AT, TMP2, LJ_TISNUM
- | beqz AT, >5
- |. li AT, LJ_TSTR
- |
- | // Convert number key to integer, check for integerness and range.
- | cvt.w.d f2, f0
- | lw TMP0, TAB:RB->asize
- | mfc1 TMP2, f2
- | cvt.d.w f4, f2
+ | bne TMP2, TISNUM, >5
+ |. lw RC, LO(CARG3)
+ | lw TMP0, TAB:RB->asize
| lw TMP1, TAB:RB->array
- | c.eq.d f0, f4
- | sltu AT, TMP2, TMP0
- | movf AT, r0
- | sll TMP2, TMP2, 3
+ | sltu AT, RC, TMP0
+ | sll TMP2, RC, 3
| beqz AT, ->vmeta_tgetv // Integer key and in array part?
|. addu TMP2, TMP1, TMP2
- | lw TMP0, HI(TMP2)
- | beq TMP0, TISNIL, >2
- |. ldc1 f0, 0(TMP2)
+ | lw SFRETHI, HI(TMP2)
+ | beq SFRETHI, TISNIL, >2
+ |. lw SFRETLO, LO(TMP2)
|1:
| ins_next1
- | sdc1 f0, 0(RA)
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
| ins_next2
|
|2: // Check for __index if table value is nil.
@@ -3116,8 +4069,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|. nop
|
|5:
+ | li AT, LJ_TSTR
| bne TMP2, AT, ->vmeta_tgetv
- |. lw STR:RC, LO(CARG3)
+ |. nop
| b ->BC_TGETS_Z // String key?
|. nop
break;
@@ -3138,9 +4092,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|->BC_TGETS_Z:
| // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
| lw TMP0, TAB:RB->hmask
- | lw TMP1, STR:RC->hash
+ | lw TMP1, STR:RC->sid
| lw NODE:TMP2, TAB:RB->node
- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
+ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
| sll TMP0, TMP1, 5
| sll TMP1, TMP1, 3
| subu TMP1, TMP0, TMP1
@@ -3149,18 +4103,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
| lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
| lw NODE:TMP1, NODE:TMP2->next
- | lw CARG2, offsetof(Node, val)+HI(NODE:TMP2)
+ | lw SFRETHI, offsetof(Node, val)+HI(NODE:TMP2)
| addiu CARG1, CARG1, -LJ_TSTR
| xor TMP0, TMP0, STR:RC
| or AT, CARG1, TMP0
| bnez AT, >4
|. lw TAB:TMP3, TAB:RB->metatable
- | beq CARG2, TISNIL, >5 // Key found, but nil value?
- |. lw CARG1, offsetof(Node, val)+LO(NODE:TMP2)
+ | beq SFRETHI, TISNIL, >5 // Key found, but nil value?
+ |. lw SFRETLO, offsetof(Node, val)+LO(NODE:TMP2)
|3:
| ins_next1
- | sw CARG2, HI(RA)
- | sw CARG1, LO(RA)
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
| ins_next2
|
|4: // Follow hash chain.
@@ -3170,7 +4124,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|5: // Check for __index if table value is nil.
| beqz TAB:TMP3, <3 // No metatable: done.
- |. li CARG2, LJ_TNIL
+ |. li SFRETHI, LJ_TNIL
| lbu TMP0, TAB:TMP3->nomm
| andi TMP0, TMP0, 1<<MM_index
| bnez TMP0, <3 // 'no __index' flag set: done.
@@ -3195,12 +4149,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| sltu AT, TMP0, TMP1
| beqz AT, ->vmeta_tgetb
|. addu RC, TMP2, RC
- | lw TMP1, HI(RC)
- | beq TMP1, TISNIL, >5
- |. ldc1 f0, 0(RC)
+ | lw SFRETHI, HI(RC)
+ | beq SFRETHI, TISNIL, >5
+ |. lw SFRETLO, LO(RC)
|1:
| ins_next1
- | sdc1 f0, 0(RA)
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
| ins_next2
|
|5: // Check for __index if table value is nil.
@@ -3211,9 +4166,33 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| andi TMP1, TMP1, 1<<MM_index
| bnez TMP1, <1 // 'no __index' flag set: done.
|. nop
- | b ->vmeta_tgetb // Caveat: preserve TMP0!
+ | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
|. nop
break;
+ case BC_TGETR:
+ | // RA = dst*8, RB = table*8, RC = key*8
+ | decode_RB8a RB, INS
+ | decode_RB8b RB
+ | decode_RDtoRC8 RC, RD
+ | addu RB, BASE, RB
+ | addu RC, BASE, RC
+ | lw TAB:CARG1, LO(RB)
+ | lw CARG2, LO(RC)
+ | addu RA, BASE, RA
+ | lw TMP0, TAB:CARG1->asize
+ | lw TMP1, TAB:CARG1->array
+ | sltu AT, CARG2, TMP0
+ | sll TMP2, CARG2, 3
+ | beqz AT, ->vmeta_tgetr // In array part?
+ |. addu CRET1, TMP1, TMP2
+ | lw SFARG2HI, HI(CRET1)
+ | lw SFARG2LO, LO(CRET1)
+ |->BC_TGETR_Z:
+ | ins_next1
+ | sw SFARG2HI, HI(RA)
+ | sw SFARG2LO, LO(RA)
+ | ins_next2
+ break;
case BC_TSETV:
| // RA = src*8, RB = table*8, RC = key*8
@@ -3226,33 +4205,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lw TMP2, HI(CARG3)
| lw TAB:RB, LO(CARG2)
| li AT, LJ_TTAB
- | ldc1 f0, 0(CARG3)
| bne TMP1, AT, ->vmeta_tsetv
|. addu RA, BASE, RA
- | sltiu AT, TMP2, LJ_TISNUM
- | beqz AT, >5
- |. li AT, LJ_TSTR
- |
- | // Convert number key to integer, check for integerness and range.
- | cvt.w.d f2, f0
- | lw TMP0, TAB:RB->asize
- | mfc1 TMP2, f2
- | cvt.d.w f4, f2
+ | bne TMP2, TISNUM, >5
+ |. lw RC, LO(CARG3)
+ | lw TMP0, TAB:RB->asize
| lw TMP1, TAB:RB->array
- | c.eq.d f0, f4
- | sltu AT, TMP2, TMP0
- | movf AT, r0
- | sll TMP2, TMP2, 3
+ | sltu AT, RC, TMP0
+ | sll TMP2, RC, 3
| beqz AT, ->vmeta_tsetv // Integer key and in array part?
|. addu TMP1, TMP1, TMP2
- | lbu TMP3, TAB:RB->marked
| lw TMP0, HI(TMP1)
+ | lbu TMP3, TAB:RB->marked
+ | lw SFRETHI, HI(RA)
| beq TMP0, TISNIL, >3
- |. ldc1 f0, 0(RA)
+ |. lw SFRETLO, LO(RA)
|1:
- | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
- | bnez AT, >7
- |. sdc1 f0, 0(TMP1)
+ | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
+ | sw SFRETHI, HI(TMP1)
+ | bnez AT, >7
+ |. sw SFRETLO, LO(TMP1)
|2:
| ins_next
|
@@ -3268,8 +4240,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|. nop
|
|5:
+ | li AT, LJ_TSTR
| bne TMP2, AT, ->vmeta_tsetv
- |. lw STR:RC, LO(CARG3)
+ |. nop
| b ->BC_TSETS_Z // String key?
|. nop
|
@@ -3293,15 +4266,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|->BC_TSETS_Z:
| // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8
| lw TMP0, TAB:RB->hmask
- | lw TMP1, STR:RC->hash
+ | lw TMP1, STR:RC->sid
| lw NODE:TMP2, TAB:RB->node
| sb r0, TAB:RB->nomm // Clear metamethod cache.
- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
+ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
| sll TMP0, TMP1, 5
| sll TMP1, TMP1, 3
| subu TMP1, TMP0, TMP1
| addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
+ |.if FPU
| ldc1 f20, 0(RA)
+ |.else
+ | lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
+ |.endif
|1:
| lw CARG1, offsetof(Node, key)+HI(NODE:TMP2)
| lw TMP0, offsetof(Node, key)+LO(NODE:TMP2)
@@ -3315,8 +4293,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|. lw TAB:TMP0, TAB:RB->metatable
|2:
| andi AT, TMP3, LJ_GC_BLACK // isblack(table)
+ |.if FPU
| bnez AT, >7
|. sdc1 f20, NODE:TMP2->val
+ |.else
+ | sw SFRETHI, NODE:TMP2->val.u32.hi
+ | bnez AT, >7
+ |. sw SFRETLO, NODE:TMP2->val.u32.lo
+ |.endif
|3:
| ins_next
|
@@ -3354,8 +4338,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|. move CARG1, L
| // Returns TValue *.
| lw BASE, L->base
+ |.if FPU
| b <3 // No 2nd write barrier needed.
|. sdc1 f20, 0(CRET1)
+ |.else
+ | lw SFARG1HI, HI(RA)
+ | lw SFARG1LO, LO(RA)
+ | sw SFARG1HI, HI(CRET1)
+ | b <3 // No 2nd write barrier needed.
+ |. sw SFARG1LO, LO(CRET1)
+ |.endif
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
| barrierback TAB:RB, TMP3, TMP0, <3
@@ -3380,11 +4372,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lw TMP1, HI(RC)
| lbu TMP3, TAB:RB->marked
| beq TMP1, TISNIL, >5
- |. ldc1 f0, 0(RA)
|1:
+ |. lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
| andi AT, TMP3, LJ_GC_BLACK // isblack(table)
+ | sw SFRETHI, HI(RC)
| bnez AT, >7
- |. sdc1 f0, 0(RC)
+ |. sw SFRETLO, LO(RC)
|2:
| ins_next
|
@@ -3396,12 +4390,43 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| andi TMP1, TMP1, 1<<MM_newindex
| bnez TMP1, <1 // 'no __newindex' flag set: done.
|. nop
- | b ->vmeta_tsetb // Caveat: preserve TMP0!
+ | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2!
|. nop
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
| barrierback TAB:RB, TMP3, TMP0, <2
break;
+ case BC_TSETR:
+ | // RA = dst*8, RB = table*8, RC = key*8
+ | decode_RB8a RB, INS
+ | decode_RB8b RB
+ | decode_RDtoRC8 RC, RD
+ | addu CARG1, BASE, RB
+ | addu CARG3, BASE, RC
+ | lw TAB:CARG2, LO(CARG1)
+ | lw CARG3, LO(CARG3)
+ | lbu TMP3, TAB:CARG2->marked
+ | lw TMP0, TAB:CARG2->asize
+ | lw TMP1, TAB:CARG2->array
+ | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
+ | bnez AT, >7
+ |. addu RA, BASE, RA
+ |2:
+ | sltu AT, CARG3, TMP0
+ | sll TMP2, CARG3, 3
+ | beqz AT, ->vmeta_tsetr // In array part?
+ |. addu CRET1, TMP1, TMP2
+ |->BC_TSETR_Z:
+ | lw SFARG1HI, HI(RA)
+ | lw SFARG1LO, LO(RA)
+ | ins_next1
+ | sw SFARG1HI, HI(CRET1)
+ | sw SFARG1LO, LO(CRET1)
+ | ins_next2
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:CARG2, TMP3, CRET1, <2
+ break;
case BC_TSETM:
| // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -3424,10 +4449,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addu TMP1, TMP1, CARG1
| andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
|3: // Copy result slots to table.
- | ldc1 f0, 0(RA)
+ | lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
| addiu RA, RA, 8
| sltu AT, RA, TMP2
- | sdc1 f0, 0(TMP1)
+ | sw SFRETHI, HI(TMP1)
+ | sw SFRETLO, LO(TMP1)
| bnez AT, <3
|. addiu TMP1, TMP1, 8
| bnez TMP0, >7
@@ -3502,10 +4529,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| beqz NARGS8:RC, >3
|. move TMP3, NARGS8:RC
|2:
- | ldc1 f0, 0(RA)
+ | lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
| addiu RA, RA, 8
| addiu TMP3, TMP3, -8
- | sdc1 f0, 0(TMP2)
+ | sw SFRETHI, HI(TMP2)
+ | sw SFRETLO, LO(TMP2)
| bnez TMP3, <2
|. addiu TMP2, TMP2, 8
|3:
@@ -3542,12 +4571,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| li AT, LJ_TFUNC
| lw TMP1, -24+HI(BASE)
| lw LFUNC:RB, -24+LO(BASE)
- | ldc1 f2, -8(BASE)
- | ldc1 f0, -16(BASE)
+ | lw SFARG1HI, -16+HI(BASE)
+ | lw SFARG1LO, -16+LO(BASE)
+ | lw SFARG2HI, -8+HI(BASE)
+ | lw SFARG2LO, -8+LO(BASE)
| sw TMP1, HI(BASE) // Copy callable.
| sw LFUNC:RB, LO(BASE)
- | sdc1 f2, 16(BASE) // Copy control var.
- | sdc1 f0, 8(BASE) // Copy state.
+ | sw SFARG1HI, 8+HI(BASE) // Copy state.
+ | sw SFARG1LO, 8+LO(BASE)
+ | sw SFARG2HI, 16+HI(BASE) // Copy control var.
+ | sw SFARG2LO, 16+LO(BASE)
| addiu BASE, BASE, 8
| bne TMP1, AT, ->vmeta_call
|. li NARGS8:RC, 16 // Iterators get 2 arguments.
@@ -3555,10 +4588,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_ITERN:
- | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
- |.if JIT
- | // NYI: add hotloop, record BC_ITERN.
+ |.if JIT and ENDIAN_LE
+ | hotloop
|.endif
+ |->vm_IITERN:
+ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
| addu RA, BASE, RA
| lw TAB:RB, -16+LO(RA)
| lw RC, -8+LO(RA) // Get index from control var.
@@ -3570,20 +4604,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| beqz AT, >5 // Index points after array part?
|. sll TMP3, RC, 3
| addu TMP3, TMP1, TMP3
- | lw TMP2, HI(TMP3)
- | ldc1 f0, 0(TMP3)
- | mtc1 RC, f2
+ | lw SFARG1HI, HI(TMP3)
+ | lw SFARG1LO, LO(TMP3)
| lhu RD, -4+OFS_RD(PC)
- | beq TMP2, TISNIL, <1 // Skip holes in array part.
+ | sw TISNUM, HI(RA)
+ | sw RC, LO(RA)
+ | beq SFARG1HI, TISNIL, <1 // Skip holes in array part.
|. addiu RC, RC, 1
- | cvt.d.w f2, f2
+ | sw SFARG1HI, 8+HI(RA)
+ | sw SFARG1LO, 8+LO(RA)
| lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
- | sdc1 f0, 8(RA)
| decode_RD4b RD
| addu RD, RD, TMP3
| sw RC, -8+LO(RA) // Update control var.
| addu PC, PC, RD
- | sdc1 f2, 0(RA)
|3:
| ins_next
|
@@ -3598,18 +4632,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| sll RB, RC, 3
| subu TMP3, TMP3, RB
| addu NODE:TMP3, TMP3, TMP2
- | lw RB, HI(NODE:TMP3)
- | ldc1 f0, 0(NODE:TMP3)
+ | lw SFARG1HI, NODE:TMP3->val.u32.hi
+ | lw SFARG1LO, NODE:TMP3->val.u32.lo
| lhu RD, -4+OFS_RD(PC)
- | beq RB, TISNIL, <6 // Skip holes in hash part.
+ | beq SFARG1HI, TISNIL, <6 // Skip holes in hash part.
|. addiu RC, RC, 1
- | ldc1 f2, NODE:TMP3->key
+ | lw SFARG2HI, NODE:TMP3->key.u32.hi
+ | lw SFARG2LO, NODE:TMP3->key.u32.lo
| lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
- | sdc1 f0, 8(RA)
+ | sw SFARG1HI, 8+HI(RA)
+ | sw SFARG1LO, 8+LO(RA)
| addu RC, RC, TMP0
| decode_RD4b RD
| addu RD, RD, TMP3
- | sdc1 f2, 0(RA)
+ | sw SFARG2HI, HI(RA)
+ | sw SFARG2LO, LO(RA)
| addu PC, PC, RD
| b <3
|. sw RC, -8+LO(RA) // Update control var.
@@ -3634,9 +4671,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addiu CARG2, CARG2, -FF_next_N
| or CARG2, CARG2, CARG3
| bnez CARG2, >5
- |. lui TMP1, 0xfffe
+ |. lui TMP1, (LJ_KEYINDEX >> 16)
| addu PC, TMP0, TMP2
- | ori TMP1, TMP1, 0x7fff
+ | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff)
| sw r0, -8+LO(RA) // Initialize control var.
| sw TMP1, -8+HI(RA)
|1:
@@ -3645,9 +4682,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| li TMP3, BC_JMP
| li TMP1, BC_ITERC
| sb TMP3, -4+OFS_OP(PC)
- | addu PC, TMP0, TMP2
+ | addu PC, TMP0, TMP2
+ |.if JIT
+ | lb TMP0, OFS_OP(PC)
+ | li AT, BC_ITERN
+ | bne TMP0, AT, >6
+ |. lhu TMP2, OFS_RD(PC)
+ |.endif
| b <1
|. sb TMP1, OFS_OP(PC)
+ |.if JIT
+ |6: // Unpatch JLOOP.
+ | lw TMP0, DISPATCH_J(trace)(DISPATCH)
+ | sll TMP2, TMP2, 2
+ | addu TMP0, TMP0, TMP2
+ | lw TRACE:TMP2, 0(TMP0)
+ | lw TMP0, TRACE:TMP2->startins
+ | li AT, -256
+ | and TMP0, TMP0, AT
+ | or TMP0, TMP0, TMP1
+ | b <1
+ |. sw TMP0, 0(PC)
+ |.endif
break;
case BC_VARG:
@@ -3689,9 +4745,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| bnez AT, >7
|. addiu MULTRES, TMP1, 8
|6:
- | ldc1 f0, 0(RC)
+ | lw SFRETHI, HI(RC)
+ | lw SFRETLO, LO(RC)
| addiu RC, RC, 8
- | sdc1 f0, 0(RA)
+ | sw SFRETHI, HI(RA)
+ | sw SFRETLO, LO(RA)
| sltu AT, RC, TMP3
| bnez AT, <6 // More vararg slots?
|. addiu RA, RA, 8
@@ -3747,10 +4805,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| beqz RC, >3
|. subu BASE, TMP2, TMP0
|2:
- | ldc1 f0, 0(RA)
+ | lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
| addiu RA, RA, 8
| addiu RC, RC, -8
- | sdc1 f0, 0(TMP2)
+ | sw SFRETHI, HI(TMP2)
+ | sw SFRETLO, LO(TMP2)
| bnez RC, <2
|. addiu TMP2, TMP2, 8
|3:
@@ -3791,14 +4851,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lw INS, -4(PC)
| addiu TMP2, BASE, -8
if (op == BC_RET1) {
- | ldc1 f0, 0(RA)
+ | lw SFRETHI, HI(RA)
+ | lw SFRETLO, LO(RA)
}
| decode_RB8a RB, INS
| decode_RA8a RA, INS
| decode_RB8b RB
| decode_RA8b RA
if (op == BC_RET1) {
- | sdc1 f0, 0(TMP2)
+ | sw SFRETHI, HI(TMP2)
+ | sw SFRETLO, LO(TMP2)
}
| subu BASE, TMP2, RA
|5:
@@ -3840,69 +4902,147 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // RA = base*8, RD = target (after end of loop or start of loop)
vk = (op == BC_IFORL || op == BC_JFORL);
| addu RA, BASE, RA
- if (vk) {
- | ldc1 f0, FORL_IDX*8(RA)
- | ldc1 f4, FORL_STEP*8(RA)
- | ldc1 f2, FORL_STOP*8(RA)
- | lw TMP3, FORL_STEP*8+HI(RA)
- | add.d f0, f0, f4
- | sdc1 f0, FORL_IDX*8(RA)
- } else {
- | lw TMP1, FORL_IDX*8+HI(RA)
- | lw TMP3, FORL_STEP*8+HI(RA)
- | lw TMP2, FORL_STOP*8+HI(RA)
- | sltiu TMP1, TMP1, LJ_TISNUM
- | sltiu TMP0, TMP3, LJ_TISNUM
- | sltiu TMP2, TMP2, LJ_TISNUM
- | and TMP1, TMP1, TMP0
- | and TMP1, TMP1, TMP2
- | ldc1 f0, FORL_IDX*8(RA)
- | beqz TMP1, ->vmeta_for
- |. ldc1 f2, FORL_STOP*8(RA)
- }
+ | lw SFARG1HI, FORL_IDX*8+HI(RA)
+ | lw SFARG1LO, FORL_IDX*8+LO(RA)
if (op != BC_JFORL) {
| srl RD, RD, 1
- | lui TMP0, (-(BCBIAS_J*4 >> 16) & 65535)
+ | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
+ | addu TMP2, RD, TMP2
+ }
+ if (!vk) {
+ | lw SFARG2HI, FORL_STOP*8+HI(RA)
+ | lw SFARG2LO, FORL_STOP*8+LO(RA)
+ | bne SFARG1HI, TISNUM, >5
+ |. lw SFRETHI, FORL_STEP*8+HI(RA)
+ | xor AT, SFARG2HI, TISNUM
+ | lw SFRETLO, FORL_STEP*8+LO(RA)
+ | xor TMP0, SFRETHI, TISNUM
+ | or AT, AT, TMP0
+ | bnez AT, ->vmeta_for
+ |. slt AT, SFRETLO, r0
+ | slt CRET1, SFARG2LO, SFARG1LO
+ | slt TMP1, SFARG1LO, SFARG2LO
+ | movn CRET1, TMP1, AT
+ } else {
+ | bne SFARG1HI, TISNUM, >5
+ |. lw SFARG2LO, FORL_STEP*8+LO(RA)
+ | lw SFRETLO, FORL_STOP*8+LO(RA)
+ | move TMP3, SFARG1LO
+ | addu SFARG1LO, SFARG1LO, SFARG2LO
+ | xor TMP0, SFARG1LO, TMP3
+ | xor TMP1, SFARG1LO, SFARG2LO
+ | and TMP0, TMP0, TMP1
+ | slt TMP1, SFARG1LO, SFRETLO
+ | slt CRET1, SFRETLO, SFARG1LO
+ | slt AT, SFARG2LO, r0
+ | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow.
+ | movn CRET1, TMP1, AT
+ | or CRET1, CRET1, TMP0
+ }
+ |1:
+ if (op == BC_FORI) {
+ | movz TMP2, r0, CRET1
+ | addu PC, PC, TMP2
+ } else if (op == BC_JFORI) {
+ | addu PC, PC, TMP2
+ | lhu RD, -4+OFS_RD(PC)
+ } else if (op == BC_IFORL) {
+ | movn TMP2, r0, CRET1
+ | addu PC, PC, TMP2
+ }
+ if (vk) {
+ | sw SFARG1HI, FORL_IDX*8+HI(RA)
+ | sw SFARG1LO, FORL_IDX*8+LO(RA)
}
- | c.le.d 0, f0, f2
- | c.le.d 1, f2, f0
- | sdc1 f0, FORL_EXT*8(RA)
+ | ins_next1
+ | sw SFARG1HI, FORL_EXT*8+HI(RA)
+ | sw SFARG1LO, FORL_EXT*8+LO(RA)
+ |2:
if (op == BC_JFORI) {
- | li TMP1, 1
- | li TMP2, 1
- | addu TMP0, RD, TMP0
- | slt TMP3, TMP3, r0
- | movf TMP1, r0, 0
- | addu PC, PC, TMP0
- | movf TMP2, r0, 1
- | lhu RD, -4+OFS_RD(PC)
- | movn TMP1, TMP2, TMP3
- | bnez TMP1, =>BC_JLOOP
+ | beqz CRET1, =>BC_JLOOP
|. decode_RD8b RD
} else if (op == BC_JFORL) {
- | li TMP1, 1
- | li TMP2, 1
- | slt TMP3, TMP3, r0
- | movf TMP1, r0, 0
- | movf TMP2, r0, 1
- | movn TMP1, TMP2, TMP3
- | bnez TMP1, =>BC_JLOOP
+ | beqz CRET1, =>BC_JLOOP
+ }
+ | ins_next2
+ |
+ |5: // FP loop.
+ |.if FPU
+ if (!vk) {
+ | ldc1 f0, FORL_IDX*8(RA)
+ | ldc1 f2, FORL_STOP*8(RA)
+ | sltiu TMP0, SFARG1HI, LJ_TISNUM
+ | sltiu TMP1, SFARG2HI, LJ_TISNUM
+ | sltiu AT, SFRETHI, LJ_TISNUM
+ | and TMP0, TMP0, TMP1
+ | and AT, AT, TMP0
+ | beqz AT, ->vmeta_for
+ |. slt TMP3, SFRETHI, r0
+ | c.ole.d 0, f0, f2
+ | c.ole.d 1, f2, f0
+ | li CRET1, 1
+ | movt CRET1, r0, 0
+ | movt AT, r0, 1
+ | b <1
+ |. movn CRET1, AT, TMP3
+ } else {
+ | ldc1 f0, FORL_IDX*8(RA)
+ | ldc1 f4, FORL_STEP*8(RA)
+ | ldc1 f2, FORL_STOP*8(RA)
+ | lw SFARG2HI, FORL_STEP*8+HI(RA)
+ | add.d f0, f0, f4
+ | c.ole.d 0, f0, f2
+ | c.ole.d 1, f2, f0
+ | slt TMP3, SFARG2HI, r0
+ | li CRET1, 1
+ | li AT, 1
+ | movt CRET1, r0, 0
+ | movt AT, r0, 1
+ | movn CRET1, AT, TMP3
+ if (op == BC_IFORL) {
+ | movn TMP2, r0, CRET1
+ | addu PC, PC, TMP2
+ }
+ | sdc1 f0, FORL_IDX*8(RA)
+ | ins_next1
+ | b <2
+ |. sdc1 f0, FORL_EXT*8(RA)
+ }
+ |.else
+ if (!vk) {
+ | sltiu TMP0, SFARG1HI, LJ_TISNUM
+ | sltiu TMP1, SFARG2HI, LJ_TISNUM
+ | sltiu AT, SFRETHI, LJ_TISNUM
+ | and TMP0, TMP0, TMP1
+ | and AT, AT, TMP0
+ | beqz AT, ->vmeta_for
+ |. nop
+ | bal ->vm_sfcmpolex
+ |. move TMP3, SFRETHI
+ | b <1
|. nop
} else {
- | addu TMP1, RD, TMP0
- | slt TMP3, TMP3, r0
- | move TMP2, TMP1
- if (op == BC_FORI) {
- | movt TMP1, r0, 0
- | movt TMP2, r0, 1
+ | lw SFARG2HI, FORL_STEP*8+HI(RA)
+ | load_got __adddf3
+ | call_extern
+ |. sw TMP2, ARG5
+ | lw SFARG2HI, FORL_STOP*8+HI(RA)
+ | lw SFARG2LO, FORL_STOP*8+LO(RA)
+ | move SFARG1HI, SFRETHI
+ | move SFARG1LO, SFRETLO
+ | bal ->vm_sfcmpolex
+ |. lw TMP3, FORL_STEP*8+HI(RA)
+ if ( op == BC_JFORL ) {
+ | lhu RD, -4+OFS_RD(PC)
+ | lw TMP2, ARG5
+ | b <1
+ |. decode_RD8b RD
} else {
- | movf TMP1, r0, 0
- | movf TMP2, r0, 1
+ | b <1
+ |. lw TMP2, ARG5
}
- | movn TMP1, TMP2, TMP3
- | addu PC, PC, TMP1
}
- | ins_next
+ |.endif
break;
case BC_ITERL:
@@ -3961,8 +5101,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| sw AT, DISPATCH_GL(vmstate)(DISPATCH)
| lw TRACE:TMP2, 0(TMP1)
| sw BASE, DISPATCH_GL(jit_base)(DISPATCH)
- | sw L, DISPATCH_GL(jit_L)(DISPATCH)
| lw TMP2, TRACE:TMP2->mcode
+ | sw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
| jr TMP2
|. addiu JGL, DISPATCH, GG_DISP2G+32768
|.endif
@@ -4088,6 +5228,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| li_vmstate INTERP
| lw PC, FRAME_PC(BASE) // Fetch PC of caller.
| subu RA, TMP1, RD // RA = L->top - nresults*8
+ | sw L, DISPATCH_GL(cur_L)(DISPATCH)
| b ->vm_returnc
|. st_vmstate
break;
@@ -4150,8 +5291,10 @@ static void emit_asm_debug(BuildCtx *ctx)
fcofs, CFRAME_SIZE);
for (i = 23; i >= 16; i--)
fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
+#if !LJ_SOFTFP
for (i = 30; i >= 20; i -= 2)
fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
+#endif
fprintf(ctx->fp,
"\t.align 2\n"
".LEFDE0:\n\n");
@@ -4203,8 +5346,10 @@ static void emit_asm_debug(BuildCtx *ctx)
fcofs, CFRAME_SIZE);
for (i = 23; i >= 16; i--)
fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i);
+#if !LJ_SOFTFP
for (i = 30; i >= 20; i -= 2)
fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i);
+#endif
fprintf(ctx->fp,
"\t.align 2\n"
".LEFDE2:\n\n");
diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc
new file mode 100644
index 00000000..651bc42e
--- /dev/null
+++ b/src/vm_mips64.dasc
@@ -0,0 +1,5538 @@
+|// Low-level VM code for MIPS64 CPUs.
+|// Bytecode interpreter, fast functions and helper functions.
+|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+|//
+|// Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
+|// Sponsored by Cisco Systems, Inc.
+|
+|.arch mips64
+|.section code_op, code_sub
+|
+|.actionlist build_actionlist
+|.globals GLOB_
+|.globalnames globnames
+|.externnames extnames
+|
+|// Note: The ragged indentation of the instructions is intentional.
+|// The starting columns indicate data dependencies.
+|
+|//-----------------------------------------------------------------------
+|
+|// Fixed register assignments for the interpreter.
+|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra
+|
+|.macro .FPU, a, b
+|.if FPU
+| a, b
+|.endif
+|.endmacro
+|
+|// The following must be C callee-save (but BASE is often refetched).
+|.define BASE, r16 // Base of current Lua stack frame.
+|.define KBASE, r17 // Constants of current Lua function.
+|.define PC, r18 // Next PC.
+|.define DISPATCH, r19 // Opcode dispatch table.
+|.define LREG, r20 // Register holding lua_State (also in SAVE_L).
+|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8.
+|
+|.define JGL, r30 // On-trace: global_State + 32768.
+|
+|// Constants for type-comparisons, stores and conversions. C callee-save.
+|.define TISNIL, r30
+|.define TISNUM, r22
+|.if FPU
+|.define TOBIT, f30 // 2^52 + 2^51.
+|.endif
+|
+|// The following temporaries are not saved across C calls, except for RA.
+|.define RA, r23 // Callee-save.
+|.define RB, r8
+|.define RC, r9
+|.define RD, r10
+|.define INS, r11
+|
+|.define AT, r1 // Assembler temporary.
+|.define TMP0, r12
+|.define TMP1, r13
+|.define TMP2, r14
+|.define TMP3, r15
+|
+|// MIPS n64 calling convention.
+|.define CFUNCADDR, r25
+|.define CARG1, r4
+|.define CARG2, r5
+|.define CARG3, r6
+|.define CARG4, r7
+|.define CARG5, r8
+|.define CARG6, r9
+|.define CARG7, r10
+|.define CARG8, r11
+|
+|.define CRET1, r2
+|.define CRET2, r3
+|
+|.if FPU
+|.define FARG1, f12
+|.define FARG2, f13
+|.define FARG3, f14
+|.define FARG4, f15
+|.define FARG5, f16
+|.define FARG6, f17
+|.define FARG7, f18
+|.define FARG8, f19
+|
+|.define FRET1, f0
+|.define FRET2, f2
+|
+|.define FTMP0, f20
+|.define FTMP1, f21
+|.define FTMP2, f22
+|.endif
+|
+|// Stack layout while in interpreter. Must match with lj_frame.h.
+|.if FPU // MIPS64 hard-float.
+|
+|.define CFRAME_SPACE, 192 // Delta for sp.
+|
+|//----- 16 byte aligned, <-- sp entering interpreter
+|.define SAVE_ERRF, 188(sp) // 32 bit values.
+|.define SAVE_NRES, 184(sp)
+|.define SAVE_CFRAME, 176(sp) // 64 bit values.
+|.define SAVE_L, 168(sp)
+|.define SAVE_PC, 160(sp)
+|//----- 16 byte aligned
+|.define SAVE_GPR_, 80 // .. 80+10*8: 64 bit GPR saves.
+|.define SAVE_FPR_, 16 // .. 16+8*8: 64 bit FPR saves.
+|
+|.else // MIPS64 soft-float
+|
+|.define CFRAME_SPACE, 128 // Delta for sp.
+|
+|//----- 16 byte aligned, <-- sp entering interpreter
+|.define SAVE_ERRF, 124(sp) // 32 bit values.
+|.define SAVE_NRES, 120(sp)
+|.define SAVE_CFRAME, 112(sp) // 64 bit values.
+|.define SAVE_L, 104(sp)
+|.define SAVE_PC, 96(sp)
+|//----- 16 byte aligned
+|.define SAVE_GPR_, 16 // .. 16+10*8: 64 bit GPR saves.
+|
+|.endif
+|
+|.define TMPX, 8(sp) // Unused by interpreter, temp for JIT code.
+|.define TMPD, 0(sp)
+|//----- 16 byte aligned
+|
+|.define TMPD_OFS, 0
+|
+|.define SAVE_MULTRES, TMPD
+|
+|//-----------------------------------------------------------------------
+|
+|.macro saveregs
+| daddiu sp, sp, -CFRAME_SPACE
+| sd ra, SAVE_GPR_+9*8(sp)
+| sd r30, SAVE_GPR_+8*8(sp)
+| .FPU sdc1 f31, SAVE_FPR_+7*8(sp)
+| sd r23, SAVE_GPR_+7*8(sp)
+| .FPU sdc1 f30, SAVE_FPR_+6*8(sp)
+| sd r22, SAVE_GPR_+6*8(sp)
+| .FPU sdc1 f29, SAVE_FPR_+5*8(sp)
+| sd r21, SAVE_GPR_+5*8(sp)
+| .FPU sdc1 f28, SAVE_FPR_+4*8(sp)
+| sd r20, SAVE_GPR_+4*8(sp)
+| .FPU sdc1 f27, SAVE_FPR_+3*8(sp)
+| sd r19, SAVE_GPR_+3*8(sp)
+| .FPU sdc1 f26, SAVE_FPR_+2*8(sp)
+| sd r18, SAVE_GPR_+2*8(sp)
+| .FPU sdc1 f25, SAVE_FPR_+1*8(sp)
+| sd r17, SAVE_GPR_+1*8(sp)
+| .FPU sdc1 f24, SAVE_FPR_+0*8(sp)
+| sd r16, SAVE_GPR_+0*8(sp)
+|.endmacro
+|
+|.macro restoreregs_ret
+| ld ra, SAVE_GPR_+9*8(sp)
+| ld r30, SAVE_GPR_+8*8(sp)
+| ld r23, SAVE_GPR_+7*8(sp)
+| .FPU ldc1 f31, SAVE_FPR_+7*8(sp)
+| ld r22, SAVE_GPR_+6*8(sp)
+| .FPU ldc1 f30, SAVE_FPR_+6*8(sp)
+| ld r21, SAVE_GPR_+5*8(sp)
+| .FPU ldc1 f29, SAVE_FPR_+5*8(sp)
+| ld r20, SAVE_GPR_+4*8(sp)
+| .FPU ldc1 f28, SAVE_FPR_+4*8(sp)
+| ld r19, SAVE_GPR_+3*8(sp)
+| .FPU ldc1 f27, SAVE_FPR_+3*8(sp)
+| ld r18, SAVE_GPR_+2*8(sp)
+| .FPU ldc1 f26, SAVE_FPR_+2*8(sp)
+| ld r17, SAVE_GPR_+1*8(sp)
+| .FPU ldc1 f25, SAVE_FPR_+1*8(sp)
+| ld r16, SAVE_GPR_+0*8(sp)
+| .FPU ldc1 f24, SAVE_FPR_+0*8(sp)
+| jr ra
+| daddiu sp, sp, CFRAME_SPACE
+|.endmacro
+|
+|// Type definitions. Some of these are only used for documentation.
+|.type L, lua_State, LREG
+|.type GL, global_State
+|.type TVALUE, TValue
+|.type GCOBJ, GCobj
+|.type STR, GCstr
+|.type TAB, GCtab
+|.type LFUNC, GCfuncL
+|.type CFUNC, GCfuncC
+|.type PROTO, GCproto
+|.type UPVAL, GCupval
+|.type NODE, Node
+|.type NARGS8, int
+|.type TRACE, GCtrace
+|.type SBUF, SBuf
+|
+|//-----------------------------------------------------------------------
+|
+|// Trap for not-yet-implemented parts.
+|.macro NYI; .long 0xec1cf0f0; .endmacro
+|
+|// Macros to mark delay slots.
+|.macro ., a; a; .endmacro
+|.macro ., a,b; a,b; .endmacro
+|.macro ., a,b,c; a,b,c; .endmacro
+|.macro ., a,b,c,d; a,b,c,d; .endmacro
+|
+|.define FRAME_PC, -8
+|.define FRAME_FUNC, -16
+|
+|//-----------------------------------------------------------------------
+|
+|// Endian-specific defines.
+|.if ENDIAN_LE
+|.define HI, 4
+|.define LO, 0
+|.define OFS_RD, 2
+|.define OFS_RA, 1
+|.define OFS_OP, 0
+|.else
+|.define HI, 0
+|.define LO, 4
+|.define OFS_RD, 0
+|.define OFS_RA, 2
+|.define OFS_OP, 3
+|.endif
+|
+|// Instruction decode.
+|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro
+|.macro decode_OP8a, dst, ins; andi dst, ins, 0xff; .endmacro
+|.macro decode_OP8b, dst; sll dst, dst, 3; .endmacro
+|.macro decode_RC8a, dst, ins; srl dst, ins, 13; .endmacro
+|.macro decode_RC8b, dst; andi dst, dst, 0x7f8; .endmacro
+|.macro decode_RD4b, dst; sll dst, dst, 2; .endmacro
+|.macro decode_RA8a, dst, ins; srl dst, ins, 5; .endmacro
+|.macro decode_RA8b, dst; andi dst, dst, 0x7f8; .endmacro
+|.macro decode_RB8a, dst, ins; srl dst, ins, 21; .endmacro
+|.macro decode_RB8b, dst; andi dst, dst, 0x7f8; .endmacro
+|.macro decode_RD8a, dst, ins; srl dst, ins, 16; .endmacro
+|.macro decode_RD8b, dst; sll dst, dst, 3; .endmacro
+|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro
+|
+|// Instruction fetch.
+|.macro ins_NEXT1
+| lw INS, 0(PC)
+| daddiu PC, PC, 4
+|.endmacro
+|// Instruction decode+dispatch.
+|.macro ins_NEXT2
+| decode_OP8a TMP1, INS
+| decode_OP8b TMP1
+| daddu TMP0, DISPATCH, TMP1
+| decode_RD8a RD, INS
+| ld AT, 0(TMP0)
+| decode_RA8a RA, INS
+| decode_RD8b RD
+| jr AT
+| decode_RA8b RA
+|.endmacro
+|.macro ins_NEXT
+| ins_NEXT1
+| ins_NEXT2
+|.endmacro
+|
+|// Instruction footer.
+|.if 1
+| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
+| .define ins_next, ins_NEXT
+| .define ins_next_, ins_NEXT
+| .define ins_next1, ins_NEXT1
+| .define ins_next2, ins_NEXT2
+|.else
+| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
+| // Affects only certain kinds of benchmarks (and only with -j off).
+| .macro ins_next
+| b ->ins_next
+| .endmacro
+| .macro ins_next1
+| .endmacro
+| .macro ins_next2
+| b ->ins_next
+| .endmacro
+| .macro ins_next_
+| ->ins_next:
+| ins_NEXT
+| .endmacro
+|.endif
+|
+|// Call decode and dispatch.
+|.macro ins_callt
+| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
+| ld PC, LFUNC:RB->pc
+| lw INS, 0(PC)
+| daddiu PC, PC, 4
+| decode_OP8a TMP1, INS
+| decode_RA8a RA, INS
+| decode_OP8b TMP1
+| decode_RA8b RA
+| daddu TMP0, DISPATCH, TMP1
+| ld TMP0, 0(TMP0)
+| jr TMP0
+| daddu RA, RA, BASE
+|.endmacro
+|
+|.macro ins_call
+| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
+| sd PC, FRAME_PC(BASE)
+| ins_callt
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|.macro branch_RD
+| srl TMP0, RD, 1
+| lui AT, (-(BCBIAS_J*4 >> 16) & 65535)
+| addu TMP0, TMP0, AT
+| daddu PC, PC, TMP0
+|.endmacro
+|
+|// Assumes DISPATCH is relative to GL.
+#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
+#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
+#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch))
+#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name)
+|
+#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
+|
+|.macro load_got, func
+| ld CFUNCADDR, DISPATCH_GOT(func)(DISPATCH)
+|.endmacro
+|// Much faster. Sadly, there's no easy way to force the required code layout.
+|// .macro call_intern, func; bal extern func; .endmacro
+|.macro call_intern, func; jalr CFUNCADDR; .endmacro
+|.macro call_extern; jalr CFUNCADDR; .endmacro
+|.macro jmp_extern; jr CFUNCADDR; .endmacro
+|
+|.macro hotcheck, delta, target
+| dsrl TMP1, PC, 1
+| andi TMP1, TMP1, 126
+| daddu TMP1, TMP1, DISPATCH
+| lhu TMP2, GG_DISP2HOT(TMP1)
+| addiu TMP2, TMP2, -delta
+| bltz TMP2, target
+|. sh TMP2, GG_DISP2HOT(TMP1)
+|.endmacro
+|
+|.macro hotloop
+| hotcheck HOTCOUNT_LOOP, ->vm_hotloop
+|.endmacro
+|
+|.macro hotcall
+| hotcheck HOTCOUNT_CALL, ->vm_hotcall
+|.endmacro
+|
+|// Set current VM state. Uses TMP0.
+|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro
+|.macro st_vmstate; sw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro
+|
+|// Move table write barrier back. Overwrites mark and tmp.
+|.macro barrierback, tab, mark, tmp, target
+| ld tmp, DISPATCH_GL(gc.grayagain)(DISPATCH)
+| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab)
+| sd tab, DISPATCH_GL(gc.grayagain)(DISPATCH)
+| sb mark, tab->marked
+| b target
+|. sd tmp, tab->gclist
+|.endmacro
+|
+|// Clear type tag. Isolate lowest 14+32+1=47 bits of reg.
+|.macro cleartp, reg; dextm reg, reg, 0, 14; .endmacro
+|.macro cleartp, dst, reg; dextm dst, reg, 0, 14; .endmacro
+|
+|// Set type tag: Merge 17 type bits into bits [15+32=47, 31+32+1=64) of dst.
+|.macro settp, dst, tp; dinsu dst, tp, 15, 31; .endmacro
+|
+|// Extract (negative) type tag.
+|.macro gettp, dst, src; dsra dst, src, 47; .endmacro
+|
+|// Macros to check the TValue type and extract the GCobj. Branch on failure.
+|.macro checktp, reg, tp, target
+| gettp AT, reg
+| daddiu AT, AT, tp
+| bnez AT, target
+|. cleartp reg
+|.endmacro
+|.macro checktp, dst, reg, tp, target
+| gettp AT, reg
+| daddiu AT, AT, tp
+| bnez AT, target
+|. cleartp dst, reg
+|.endmacro
+|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro
+|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro
+|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro
+|.macro checkint, reg, target // Caveat: has delay slot!
+| gettp AT, reg
+| bne AT, TISNUM, target
+|.endmacro
+|.macro checknum, reg, target // Caveat: has delay slot!
+| gettp AT, reg
+| sltiu AT, AT, LJ_TISNUM
+| beqz AT, target
+|.endmacro
+|
+|.macro mov_false, reg
+| lu reg, 0x8000
+| dsll reg, reg, 32
+| not reg, reg
+|.endmacro
+|.macro mov_true, reg
+| li reg, 0x0001
+| dsll reg, reg, 48
+| not reg, reg
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+
+/* Generate subroutines used by opcodes and other parts of the VM. */
+/* The .code_sub section should be last to help static branch prediction. */
+static void build_subroutines(BuildCtx *ctx)
+{
+ |.code_sub
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Return handling ----------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_returnp:
+ | // See vm_return. Also: TMP2 = previous base.
+ | andi AT, PC, FRAME_P
+ | beqz AT, ->cont_dispatch
+ |
+ | // Return from pcall or xpcall fast func.
+ |. mov_true TMP1
+ | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame.
+ | move BASE, TMP2 // Restore caller base.
+ | // Prepending may overwrite the pcall frame, so do it at the end.
+ | sd TMP1, -8(RA) // Prepend true to results.
+ | daddiu RA, RA, -8
+ |
+ |->vm_returnc:
+ | addiu RD, RD, 8 // RD = (nresults+1)*8.
+ | andi TMP0, PC, FRAME_TYPE
+ | beqz RD, ->vm_unwind_c_eh
+ |. li CRET1, LUA_YIELD
+ | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua.
+ |. move MULTRES, RD
+ |
+ |->vm_return:
+ | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return
+ | // TMP0 = PC & FRAME_TYPE
+ | li TMP2, -8
+ | xori AT, TMP0, FRAME_C
+ | and TMP2, PC, TMP2
+ | bnez AT, ->vm_returnp
+ | dsubu TMP2, BASE, TMP2 // TMP2 = previous base.
+ |
+ | addiu TMP1, RD, -8
+ | sd TMP2, L->base
+ | li_vmstate C
+ | lw TMP2, SAVE_NRES
+ | daddiu BASE, BASE, -16
+ | st_vmstate
+ | beqz TMP1, >2
+ |. sll TMP2, TMP2, 3
+ |1:
+ | addiu TMP1, TMP1, -8
+ | ld CRET1, 0(RA)
+ | daddiu RA, RA, 8
+ | sd CRET1, 0(BASE)
+ | bnez TMP1, <1
+ |. daddiu BASE, BASE, 8
+ |
+ |2:
+ | bne TMP2, RD, >6
+ |3:
+ |. sd BASE, L->top // Store new top.
+ |
+ |->vm_leave_cp:
+ | ld TMP0, SAVE_CFRAME // Restore previous C frame.
+ | move CRET1, r0 // Ok return status for vm_pcall.
+ | sd TMP0, L->cframe
+ |
+ |->vm_leave_unw:
+ | restoreregs_ret
+ |
+ |6:
+ | ld TMP1, L->maxstack
+ | slt AT, TMP2, RD
+ | bnez AT, >7 // Less results wanted?
+ | // More results wanted. Check stack size and fill up results with nil.
+ |. slt AT, BASE, TMP1
+ | beqz AT, >8
+ |. nop
+ | sd TISNIL, 0(BASE)
+ | addiu RD, RD, 8
+ | b <2
+ |. daddiu BASE, BASE, 8
+ |
+ |7: // Less results wanted.
+ | subu TMP0, RD, TMP2
+ | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it.
+ |.if MIPSR6
+ | selnez TMP0, TMP0, TMP2 // LUA_MULTRET+1 case?
+ | seleqz BASE, BASE, TMP2
+ | b <3
+ |. or BASE, BASE, TMP0
+ |.else
+ | b <3
+ |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case?
+ |.endif
+ |
+ |8: // Corner case: need to grow stack for filling up results.
+ | // This can happen if:
+ | // - A C function grows the stack (a lot).
+ | // - The GC shrinks the stack in between.
+ | // - A return back from a lua_call() with (high) nresults adjustment.
+ | load_got lj_state_growstack
+ | move MULTRES, RD
+ | srl CARG2, TMP2, 3
+ | call_intern lj_state_growstack // (lua_State *L, int n)
+ |. move CARG1, L
+ | lw TMP2, SAVE_NRES
+ | ld BASE, L->top // Need the (realloced) L->top in BASE.
+ | move RD, MULTRES
+ | b <2
+ |. sll TMP2, TMP2, 3
+ |
+ |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
+ | // (void *cframe, int errcode)
+ | move sp, CARG1
+ | move CRET1, CARG2
+ |->vm_unwind_c_eh: // Landing pad for external unwinder.
+ | ld L, SAVE_L
+ | li TMP0, ~LJ_VMST_C
+ | ld GL:TMP1, L->glref
+ | b ->vm_leave_unw
+ |. sw TMP0, GL:TMP1->vmstate
+ |
+ |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
+ | // (void *cframe)
+ | li AT, -4
+ | and sp, CARG1, AT
+ |->vm_unwind_ff_eh: // Landing pad for external unwinder.
+ | ld L, SAVE_L
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | li TISNIL, LJ_TNIL
+ | li TISNUM, LJ_TISNUM
+ | ld BASE, L->base
+ | ld DISPATCH, L->glref // Setup pointer to dispatch table.
+ | .FPU mtc1 TMP3, TOBIT
+ | mov_false TMP1
+ | li_vmstate INTERP
+ | ld PC, FRAME_PC(BASE) // Fetch PC of previous frame.
+ | .FPU cvt.d.s TOBIT, TOBIT
+ | daddiu RA, BASE, -8 // Results start at BASE-8.
+ | daddiu DISPATCH, DISPATCH, GG_G2DISP
+ | sd TMP1, 0(RA) // Prepend false to error message.
+ | st_vmstate
+ | b ->vm_returnc
+ |. li RD, 16 // 2 results: false + error message.
+ |
+ |->vm_unwind_stub: // Jump to exit stub from unwinder.
+ | jr CARG1
+ |. move ra, CARG2
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Grow stack for calls -----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_growstack_c: // Grow stack for C function.
+ | b >2
+ |. li CARG2, LUA_MINSTACK
+ |
+ |->vm_growstack_l: // Grow stack for Lua function.
+ | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
+ | daddu RC, BASE, RC
+ | dsubu RA, RA, BASE
+ | sd BASE, L->base
+ | daddiu PC, PC, 4 // Must point after first instruction.
+ | sd RC, L->top
+ | srl CARG2, RA, 3
+ |2:
+ | // L->base = new base, L->top = top
+ | load_got lj_state_growstack
+ | sd PC, SAVE_PC
+ | call_intern lj_state_growstack // (lua_State *L, int n)
+ |. move CARG1, L
+ | ld BASE, L->base
+ | ld RC, L->top
+ | ld LFUNC:RB, FRAME_FUNC(BASE)
+ | dsubu RC, RC, BASE
+ | cleartp LFUNC:RB
+ | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
+ | ins_callt // Just retry the call.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Entry points into the assembler VM ---------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_resume: // Setup C frame and resume thread.
+ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
+ | saveregs
+ | move L, CARG1
+ | ld DISPATCH, L->glref // Setup pointer to dispatch table.
+ | move BASE, CARG2
+ | lbu TMP1, L->status
+ | sd L, SAVE_L
+ | li PC, FRAME_CP
+ | daddiu TMP0, sp, CFRAME_RESUME
+ | daddiu DISPATCH, DISPATCH, GG_G2DISP
+ | sw r0, SAVE_NRES
+ | sw r0, SAVE_ERRF
+ | sd CARG1, SAVE_PC // Any value outside of bytecode is ok.
+ | sd r0, SAVE_CFRAME
+ | beqz TMP1, >3
+ |. sd TMP0, L->cframe
+ |
+ | // Resume after yield (like a return).
+ | sd L, DISPATCH_GL(cur_L)(DISPATCH)
+ | move RA, BASE
+ | ld BASE, L->base
+ | ld TMP1, L->top
+ | ld PC, FRAME_PC(BASE)
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | dsubu RD, TMP1, BASE
+ | .FPU mtc1 TMP3, TOBIT
+ | sb r0, L->status
+ | .FPU cvt.d.s TOBIT, TOBIT
+ | li_vmstate INTERP
+ | daddiu RD, RD, 8
+ | st_vmstate
+ | move MULTRES, RD
+ | andi TMP0, PC, FRAME_TYPE
+ | li TISNIL, LJ_TNIL
+ | beqz TMP0, ->BC_RET_Z
+ |. li TISNUM, LJ_TISNUM
+ | b ->vm_return
+ |. nop
+ |
+ |->vm_pcall: // Setup protected C frame and enter VM.
+ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
+ | saveregs
+ | sw CARG4, SAVE_ERRF
+ | b >1
+ |. li PC, FRAME_CP
+ |
+ |->vm_call: // Setup C frame and enter VM.
+ | // (lua_State *L, TValue *base, int nres1)
+ | saveregs
+ | li PC, FRAME_C
+ |
+ |1: // Entry point for vm_pcall above (PC = ftype).
+ | ld TMP1, L:CARG1->cframe
+ | move L, CARG1
+ | sw CARG3, SAVE_NRES
+ | ld DISPATCH, L->glref // Setup pointer to dispatch table.
+ | sd CARG1, SAVE_L
+ | move BASE, CARG2
+ | daddiu DISPATCH, DISPATCH, GG_G2DISP
+ | sd CARG1, SAVE_PC // Any value outside of bytecode is ok.
+ | sd TMP1, SAVE_CFRAME
+ | sd sp, L->cframe // Add our C frame to cframe chain.
+ |
+ |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
+ | sd L, DISPATCH_GL(cur_L)(DISPATCH)
+ | ld TMP2, L->base // TMP2 = old base (used in vmeta_call).
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | ld TMP1, L->top
+ | .FPU mtc1 TMP3, TOBIT
+ | daddu PC, PC, BASE
+ | dsubu NARGS8:RC, TMP1, BASE
+ | li TISNUM, LJ_TISNUM
+ | dsubu PC, PC, TMP2 // PC = frame delta + frame type
+ | .FPU cvt.d.s TOBIT, TOBIT
+ | li_vmstate INTERP
+ | li TISNIL, LJ_TNIL
+ | st_vmstate
+ |
+ |->vm_call_dispatch:
+ | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
+ | ld LFUNC:RB, FRAME_FUNC(BASE)
+ | checkfunc LFUNC:RB, ->vmeta_call
+ |
+ |->vm_call_dispatch_f:
+ | ins_call
+ | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC
+ |
+ |->vm_cpcall: // Setup protected C frame, call C.
+ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
+ | saveregs
+ | move L, CARG1
+ | ld TMP0, L:CARG1->stack
+ | sd CARG1, SAVE_L
+ | ld TMP1, L->top
+ | ld DISPATCH, L->glref // Setup pointer to dispatch table.
+ | sd CARG1, SAVE_PC // Any value outside of bytecode is ok.
+ | dsubu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
+ | ld TMP1, L->cframe
+ | daddiu DISPATCH, DISPATCH, GG_G2DISP
+ | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
+ | sw r0, SAVE_ERRF // No error function.
+ | sd TMP1, SAVE_CFRAME
+ | sd sp, L->cframe // Add our C frame to cframe chain.
+ | sd L, DISPATCH_GL(cur_L)(DISPATCH)
+ | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud)
+ |. move CFUNCADDR, CARG4
+ | move BASE, CRET1
+ | bnez CRET1, <3 // Else continue with the call.
+ |. li PC, FRAME_CP
+ | b ->vm_leave_cp // No base? Just remove C frame.
+ |. nop
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Metamethod handling ------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the
+ |// stack, so BASE doesn't need to be reloaded across these calls.
+ |
+ |//-- Continuation dispatch ----------------------------------------------
+ |
+ |->cont_dispatch:
+ | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
+ | ld TMP0, -32(BASE) // Continuation.
+ | move RB, BASE
+ | move BASE, TMP2 // Restore caller BASE.
+ | ld LFUNC:TMP1, FRAME_FUNC(TMP2)
+ |.if FFI
+ | sltiu AT, TMP0, 2
+ |.endif
+ | ld PC, -24(RB) // Restore PC from [cont|PC].
+ | cleartp LFUNC:TMP1
+ | daddu TMP2, RA, RD
+ |.if FFI
+ | bnez AT, >1
+ |.endif
+ |. sd TISNIL, -8(TMP2) // Ensure one valid arg.
+ | ld TMP1, LFUNC:TMP1->pc
+ | // BASE = base, RA = resultptr, RB = meta base
+ | jr TMP0 // Jump to continuation.
+ |. ld KBASE, PC2PROTO(k)(TMP1)
+ |
+ |.if FFI
+ |1:
+ | bnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback.
+ | // cont = 0: tailcall from C function.
+ |. daddiu TMP1, RB, -32
+ | b ->vm_call_tail
+ |. dsubu RC, TMP1, BASE
+ |.endif
+ |
+ |->cont_cat: // RA = resultptr, RB = meta base
+ | lw INS, -4(PC)
+ | daddiu CARG2, RB, -32
+ | ld CRET1, 0(RA)
+ | decode_RB8a MULTRES, INS
+ | decode_RA8a RA, INS
+ | decode_RB8b MULTRES
+ | decode_RA8b RA
+ | daddu TMP1, BASE, MULTRES
+ | sd BASE, L->base
+ | dsubu CARG3, CARG2, TMP1
+ | bne TMP1, CARG2, ->BC_CAT_Z
+ |. sd CRET1, 0(CARG2)
+ | daddu RA, BASE, RA
+ | b ->cont_nop
+ |. sd CRET1, 0(RA)
+ |
+ |//-- Table indexing metamethods -----------------------------------------
+ |
+ |->vmeta_tgets1:
+ | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
+ | li TMP0, LJ_TSTR
+ | settp STR:RC, TMP0
+ | b >1
+ |. sd STR:RC, 0(CARG3)
+ |
+ |->vmeta_tgets:
+ | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv)
+ | li TMP0, LJ_TTAB
+ | li TMP1, LJ_TSTR
+ | settp TAB:RB, TMP0
+ | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2)
+ | sd TAB:RB, 0(CARG2)
+ | settp STR:RC, TMP1
+ | b >1
+ |. sd STR:RC, 0(CARG3)
+ |
+ |->vmeta_tgetb: // TMP0 = index
+ | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
+ | settp TMP0, TISNUM
+ | sd TMP0, 0(CARG3)
+ |
+ |->vmeta_tgetv:
+ |1:
+ | load_got lj_meta_tget
+ | sd BASE, L->base
+ | sd PC, SAVE_PC
+ | call_intern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
+ |. move CARG1, L
+ | // Returns TValue * (finished) or NULL (metamethod).
+ | beqz CRET1, >3
+ |. daddiu TMP1, BASE, -FRAME_CONT
+ | ld CARG1, 0(CRET1)
+ | ins_next1
+ | sd CARG1, 0(RA)
+ | ins_next2
+ |
+ |3: // Call __index metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k
+ | ld BASE, L->top
+ | sd PC, -24(BASE) // [cont|PC]
+ | dsubu PC, BASE, TMP1
+ | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
+ | cleartp LFUNC:RB
+ | b ->vm_call_dispatch_f
+ |. li NARGS8:RC, 16 // 2 args for func(t, k).
+ |
+ |->vmeta_tgetr:
+ | load_got lj_tab_getinth
+ | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
+ |. nop
+ | // Returns cTValue * or NULL.
+ | beqz CRET1, ->BC_TGETR_Z
+ |. move CARG2, TISNIL
+ | b ->BC_TGETR_Z
+ |. ld CARG2, 0(CRET1)
+ |
+ |//-----------------------------------------------------------------------
+ |
+ |->vmeta_tsets1:
+ | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
+ | li TMP0, LJ_TSTR
+ | settp STR:RC, TMP0
+ | b >1
+ |. sd STR:RC, 0(CARG3)
+ |
+ |->vmeta_tsets:
+ | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv)
+ | li TMP0, LJ_TTAB
+ | li TMP1, LJ_TSTR
+ | settp TAB:RB, TMP0
+ | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2)
+ | sd TAB:RB, 0(CARG2)
+ | settp STR:RC, TMP1
+ | b >1
+ |. sd STR:RC, 0(CARG3)
+ |
+ |->vmeta_tsetb: // TMP0 = index
+ | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
+ | settp TMP0, TISNUM
+ | sd TMP0, 0(CARG3)
+ |
+ |->vmeta_tsetv:
+ |1:
+ | load_got lj_meta_tset
+ | sd BASE, L->base
+ | sd PC, SAVE_PC
+ | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
+ |. move CARG1, L
+ | // Returns TValue * (finished) or NULL (metamethod).
+ | beqz CRET1, >3
+ |. ld CARG1, 0(RA)
+ | // NOBARRIER: lj_meta_tset ensures the table is not black.
+ | ins_next1
+ | sd CARG1, 0(CRET1)
+ | ins_next2
+ |
+ |3: // Call __newindex metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
+ | daddiu TMP1, BASE, -FRAME_CONT
+ | ld BASE, L->top
+ | sd PC, -24(BASE) // [cont|PC]
+ | dsubu PC, BASE, TMP1
+ | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
+ | cleartp LFUNC:RB
+ | sd CARG1, 16(BASE) // Copy value to third argument.
+ | b ->vm_call_dispatch_f
+ |. li NARGS8:RC, 24 // 3 args for func(t, k, v)
+ |
+ |->vmeta_tsetr:
+ | load_got lj_tab_setinth
+ | sd BASE, L->base
+ | sd PC, SAVE_PC
+ | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
+ |. move CARG1, L
+ | // Returns TValue *.
+ | b ->BC_TSETR_Z
+ |. nop
+ |
+ |//-- Comparison metamethods ---------------------------------------------
+ |
+ |->vmeta_comp:
+ | // RA/RD point to o1/o2.
+ | move CARG2, RA
+ | move CARG3, RD
+ | load_got lj_meta_comp
+ | daddiu PC, PC, -4
+ | sd BASE, L->base
+ | sd PC, SAVE_PC
+ | decode_OP1 CARG4, INS
+ | call_intern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
+ |. move CARG1, L
+ | // Returns 0/1 or TValue * (metamethod).
+ |3:
+ | sltiu AT, CRET1, 2
+ | beqz AT, ->vmeta_binop
+ | negu TMP2, CRET1
+ |4:
+ | lhu RD, OFS_RD(PC)
+ | daddiu PC, PC, 4
+ | lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535)
+ | sll RD, RD, 2
+ | addu RD, RD, TMP1
+ | and RD, RD, TMP2
+ | daddu PC, PC, RD
+ |->cont_nop:
+ | ins_next
+ |
+ |->cont_ra: // RA = resultptr
+ | lbu TMP1, -4+OFS_RA(PC)
+ | ld CRET1, 0(RA)
+ | sll TMP1, TMP1, 3
+ | daddu TMP1, BASE, TMP1
+ | b ->cont_nop
+ |. sd CRET1, 0(TMP1)
+ |
+ |->cont_condt: // RA = resultptr
+ | ld TMP0, 0(RA)
+ | gettp TMP0, TMP0
+ | sltiu AT, TMP0, LJ_TISTRUECOND
+ | b <4
+ |. negu TMP2, AT // Branch if result is true.
+ |
+ |->cont_condf: // RA = resultptr
+ | ld TMP0, 0(RA)
+ | gettp TMP0, TMP0
+ | sltiu AT, TMP0, LJ_TISTRUECOND
+ | b <4
+ |. addiu TMP2, AT, -1 // Branch if result is false.
+ |
+ |->vmeta_equal:
+ | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1.
+ | load_got lj_meta_equal
+ | cleartp LFUNC:CARG3, CARG2
+ | cleartp LFUNC:CARG2, CARG1
+ | move CARG4, TMP0
+ | daddiu PC, PC, -4
+ | sd BASE, L->base
+ | sd PC, SAVE_PC
+ | call_intern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
+ |. move CARG1, L
+ | // Returns 0/1 or TValue * (metamethod).
+ | b <3
+ |. nop
+ |
+ |->vmeta_equal_cd:
+ |.if FFI
+ | load_got lj_meta_equal_cd
+ | move CARG2, INS
+ | daddiu PC, PC, -4
+ | sd BASE, L->base
+ | sd PC, SAVE_PC
+ | call_intern lj_meta_equal_cd // (lua_State *L, BCIns op)
+ |. move CARG1, L
+ | // Returns 0/1 or TValue * (metamethod).
+ | b <3
+ |. nop
+ |.endif
+ |
+ |->vmeta_istype:
+ | load_got lj_meta_istype
+ | daddiu PC, PC, -4
+ | sd BASE, L->base
+ | srl CARG2, RA, 3
+ | srl CARG3, RD, 3
+ | sd PC, SAVE_PC
+ | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
+ |. move CARG1, L
+ | b ->cont_nop
+ |. nop
+ |
+ |//-- Arithmetic metamethods ---------------------------------------------
+ |
+ |->vmeta_unm:
+ | move RC, RB
+ |
+ |->vmeta_arith:
+ | load_got lj_meta_arith
+ | sd BASE, L->base
+ | move CARG2, RA
+ | sd PC, SAVE_PC
+ | move CARG3, RB
+ | move CARG4, RC
+ | decode_OP1 CARG5, INS // CARG5 == RB.
+ | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
+ |. move CARG1, L
+ | // Returns NULL (finished) or TValue * (metamethod).
+ | beqz CRET1, ->cont_nop
+ |. nop
+ |
+ | // Call metamethod for binary op.
+ |->vmeta_binop:
+ | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
+ | dsubu TMP1, CRET1, BASE
+ | sd PC, -24(CRET1) // [cont|PC]
+ | move TMP2, BASE
+ | daddiu PC, TMP1, FRAME_CONT
+ | move BASE, CRET1
+ | b ->vm_call_dispatch
+ |. li NARGS8:RC, 16 // 2 args for func(o1, o2).
+ |
+ |->vmeta_len:
+ | // CARG2 already set by BC_LEN.
+#if LJ_52
+ | move MULTRES, CARG1
+#endif
+ | load_got lj_meta_len
+ | sd BASE, L->base
+ | sd PC, SAVE_PC
+ | call_intern lj_meta_len // (lua_State *L, TValue *o)
+ |. move CARG1, L
+ | // Returns NULL (retry) or TValue * (metamethod base).
+#if LJ_52
+ | bnez CRET1, ->vmeta_binop // Binop call for compatibility.
+ |. nop
+ | b ->BC_LEN_Z
+ |. move CARG1, MULTRES
+#else
+ | b ->vmeta_binop // Binop call for compatibility.
+ |. nop
+#endif
+ |
+ |//-- Call metamethod ----------------------------------------------------
+ |
+ |->vmeta_call: // Resolve and call __call metamethod.
+ | // TMP2 = old base, BASE = new base, RC = nargs*8
+ | load_got lj_meta_call
+ | sd TMP2, L->base // This is the callers base!
+ | daddiu CARG2, BASE, -16
+ | sd PC, SAVE_PC
+ | daddu CARG3, BASE, RC
+ | move MULTRES, NARGS8:RC
+ | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
+ |. move CARG1, L
+ | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
+ | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now.
+ | cleartp LFUNC:RB
+ | ins_call
+ |
+ |->vmeta_callt: // Resolve __call for BC_CALLT.
+ | // BASE = old base, RA = new base, RC = nargs*8
+ | load_got lj_meta_call
+ | sd BASE, L->base
+ | daddiu CARG2, RA, -16
+ | sd PC, SAVE_PC
+ | daddu CARG3, RA, RC
+ | move MULTRES, NARGS8:RC
+ | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
+ |. move CARG1, L
+ | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here.
+ | ld TMP1, FRAME_PC(BASE)
+ | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now.
+ | b ->BC_CALLT_Z
+ |. cleartp LFUNC:CARG3, RB
+ |
+ |//-- Argument coercion for 'for' statement ------------------------------
+ |
+ |->vmeta_for:
+ | load_got lj_meta_for
+ | sd BASE, L->base
+ | move CARG2, RA
+ | sd PC, SAVE_PC
+ | move MULTRES, INS
+ | call_intern lj_meta_for // (lua_State *L, TValue *base)
+ |. move CARG1, L
+ |.if JIT
+ | decode_OP1 TMP0, MULTRES
+ | li AT, BC_JFORI
+ |.endif
+ | decode_RA8a RA, MULTRES
+ | decode_RD8a RD, MULTRES
+ | decode_RA8b RA
+ |.if JIT
+ | beq TMP0, AT, =>BC_JFORI
+ |. decode_RD8b RD
+ | b =>BC_FORI
+ |. nop
+ |.else
+ | b =>BC_FORI
+ |. decode_RD8b RD
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Fast functions -----------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |.macro .ffunc, name
+ |->ff_ .. name:
+ |.endmacro
+ |
+ |.macro .ffunc_1, name
+ |->ff_ .. name:
+ | beqz NARGS8:RC, ->fff_fallback
+ |. ld CARG1, 0(BASE)
+ |.endmacro
+ |
+ |.macro .ffunc_2, name
+ |->ff_ .. name:
+ | sltiu AT, NARGS8:RC, 16
+ | ld CARG1, 0(BASE)
+ | bnez AT, ->fff_fallback
+ |. ld CARG2, 8(BASE)
+ |.endmacro
+ |
+ |.macro .ffunc_n, name // Caveat: has delay slot!
+ |->ff_ .. name:
+ | ld CARG1, 0(BASE)
+ | beqz NARGS8:RC, ->fff_fallback
+ | // Either ldc1 or the 1st instruction of checknum is in the delay slot.
+ | .FPU ldc1 FARG1, 0(BASE)
+ | checknum CARG1, ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_nn, name // Caveat: has delay slot!
+ |->ff_ .. name:
+ | ld CARG1, 0(BASE)
+ | sltiu AT, NARGS8:RC, 16
+ | ld CARG2, 8(BASE)
+ | bnez AT, ->fff_fallback
+ |. gettp TMP0, CARG1
+ | gettp TMP1, CARG2
+ | sltiu TMP0, TMP0, LJ_TISNUM
+ | sltiu TMP1, TMP1, LJ_TISNUM
+ | .FPU ldc1 FARG1, 0(BASE)
+ | and TMP0, TMP0, TMP1
+ | .FPU ldc1 FARG2, 8(BASE)
+ | beqz TMP0, ->fff_fallback
+ |.endmacro
+ |
+ |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot!
+ |// MIPSR6: no delay slot, but a forbidden slot.
+ |.macro ffgccheck
+ | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH)
+ | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
+ | dsubu AT, TMP0, TMP1
+ |.if MIPSR6
+ | bgezalc AT, ->fff_gcstep
+ |.else
+ | bgezal AT, ->fff_gcstep
+ |.endif
+ |.endmacro
+ |
+ |//-- Base library: checks -----------------------------------------------
+ |.ffunc_1 assert
+ | gettp AT, CARG1
+ | sltiu AT, AT, LJ_TISTRUECOND
+ | beqz AT, ->fff_fallback
+ |. daddiu RA, BASE, -16
+ | ld PC, FRAME_PC(BASE)
+ | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
+ | daddu TMP2, RA, RD
+ | daddiu TMP1, BASE, 8
+ | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument.
+ |. sd CARG1, 0(RA)
+ |1:
+ | ld CRET1, 0(TMP1)
+ | sd CRET1, -16(TMP1)
+ | bne TMP1, TMP2, <1
+ |. daddiu TMP1, TMP1, 8
+ | b ->fff_res
+ |. nop
+ |
+ |.ffunc_1 type
+ | gettp TMP0, CARG1
+ | sltu TMP1, TISNUM, TMP0
+ | not TMP2, TMP0
+ | li TMP3, ~LJ_TISNUM
+ |.if MIPSR6
+ | selnez TMP2, TMP2, TMP1
+ | seleqz TMP3, TMP3, TMP1
+ | or TMP2, TMP2, TMP3
+ |.else
+ | movz TMP2, TMP3, TMP1
+ |.endif
+ | dsll TMP2, TMP2, 3
+ | daddu TMP2, CFUNC:RB, TMP2
+ | b ->fff_restv
+ |. ld CARG1, CFUNC:TMP2->upvalue
+ |
+ |//-- Base library: getters and setters ---------------------------------
+ |
+ |.ffunc_1 getmetatable
+ | gettp TMP2, CARG1
+ | daddiu TMP0, TMP2, -LJ_TTAB
+ | daddiu TMP1, TMP2, -LJ_TUDATA
+ |.if MIPSR6
+ | selnez TMP0, TMP1, TMP0
+ |.else
+ | movn TMP0, TMP1, TMP0
+ |.endif
+ | bnez TMP0, >6
+ |. cleartp TAB:CARG1
+ |1: // Field metatable must be at same offset for GCtab and GCudata!
+ | ld TAB:RB, TAB:CARG1->metatable
+ |2:
+ | ld STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
+ | beqz TAB:RB, ->fff_restv
+ |. li CARG1, LJ_TNIL
+ | lw TMP0, TAB:RB->hmask
+ | lw TMP1, STR:RC->sid
+ | ld NODE:TMP2, TAB:RB->node
+ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
+ | dsll TMP0, TMP1, 5
+ | dsll TMP1, TMP1, 3
+ | dsubu TMP1, TMP0, TMP1
+ | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
+ | li CARG4, LJ_TSTR
+ | settp STR:RC, CARG4 // Tagged key to look for.
+ |3: // Rearranged logic, because we expect _not_ to find the key.
+ | ld TMP0, NODE:TMP2->key
+ | ld CARG1, NODE:TMP2->val
+ | ld NODE:TMP2, NODE:TMP2->next
+ | beq RC, TMP0, >5
+ |. li AT, LJ_TTAB
+ | bnez NODE:TMP2, <3
+ |. nop
+ |4:
+ | move CARG1, RB
+ | b ->fff_restv // Not found, keep default result.
+ |. settp CARG1, AT
+ |5:
+ | bne CARG1, TISNIL, ->fff_restv
+ |. nop
+ | b <4 // Ditto for nil value.
+ |. nop
+ |
+ |6:
+ | sltiu AT, TMP2, LJ_TISNUM
+ |.if MIPSR6
+ | selnez TMP0, TISNUM, AT
+ | seleqz AT, TMP2, AT
+ | or TMP2, TMP0, AT
+ |.else
+ | movn TMP2, TISNUM, AT
+ |.endif
+ | dsll TMP2, TMP2, 3
+ | dsubu TMP0, DISPATCH, TMP2
+ | b <2
+ |. ld TAB:RB, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8(TMP0)
+ |
+ |.ffunc_2 setmetatable
+ | // Fast path: no mt for table yet and not clearing the mt.
+ | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback
+ | gettp TMP3, CARG2
+ | ld TAB:TMP0, TAB:TMP1->metatable
+ | lbu TMP2, TAB:TMP1->marked
+ | daddiu AT, TMP3, -LJ_TTAB
+ | cleartp TAB:CARG2
+ | or AT, AT, TAB:TMP0
+ | bnez AT, ->fff_fallback
+ |. andi AT, TMP2, LJ_GC_BLACK // isblack(table)
+ | beqz AT, ->fff_restv
+ |. sd TAB:CARG2, TAB:TMP1->metatable
+ | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv
+ |
+ |.ffunc rawget
+ | ld CARG2, 0(BASE)
+ | sltiu AT, NARGS8:RC, 16
+ | load_got lj_tab_get
+ | gettp TMP0, CARG2
+ | cleartp CARG2
+ | daddiu TMP0, TMP0, -LJ_TTAB
+ | or AT, AT, TMP0
+ | bnez AT, ->fff_fallback
+ |. daddiu CARG3, BASE, 8
+ | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
+ |. move CARG1, L
+ | b ->fff_restv
+ |. ld CARG1, 0(CRET1)
+ |
+ |//-- Base library: conversions ------------------------------------------
+ |
+ |.ffunc tonumber
+ | // Only handles the number case inline (without a base argument).
+ | ld CARG1, 0(BASE)
+ | xori AT, NARGS8:RC, 8 // Exactly one number argument.
+ | gettp TMP1, CARG1
+ | sltu TMP0, TISNUM, TMP1
+ | or AT, AT, TMP0
+ | bnez AT, ->fff_fallback
+ |. nop
+ | b ->fff_restv
+ |. nop
+ |
+ |.ffunc_1 tostring
+ | // Only handles the string or number case inline.
+ | gettp TMP0, CARG1
+ | daddiu AT, TMP0, -LJ_TSTR
+ | // A __tostring method in the string base metatable is ignored.
+ | beqz AT, ->fff_restv // String key?
+ | // Handle numbers inline, unless a number base metatable is present.
+ |. ld TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
+ | sltu TMP0, TISNUM, TMP0
+ | or TMP0, TMP0, TMP1
+ | bnez TMP0, ->fff_fallback
+ |. sd BASE, L->base // Add frame since C call can throw.
+ |.if MIPSR6
+ | sd PC, SAVE_PC // Redundant (but a defined value).
+ | ffgccheck
+ |.else
+ | ffgccheck
+ |. sd PC, SAVE_PC // Redundant (but a defined value).
+ |.endif
+ | load_got lj_strfmt_number
+ | move CARG1, L
+ | call_intern lj_strfmt_number // (lua_State *L, cTValue *o)
+ |. move CARG2, BASE
+ | // Returns GCstr *.
+ | li AT, LJ_TSTR
+ | settp CRET1, AT
+ | b ->fff_restv
+ |. move CARG1, CRET1
+ |
+ |//-- Base library: iterators -------------------------------------------
+ |
+ |.ffunc_1 next
+ | checktp CARG1, -LJ_TTAB, ->fff_fallback
+ | daddu TMP2, BASE, NARGS8:RC
+ | sd TISNIL, 0(TMP2) // Set missing 2nd arg to nil.
+ | load_got lj_tab_next
+ | ld PC, FRAME_PC(BASE)
+ | daddiu CARG2, BASE, 8
+ | call_intern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
+ |. daddiu CARG3, BASE, -16
+ | // Returns 1=found, 0=end, -1=error.
+ | daddiu RA, BASE, -16
+ | bgtz CRET1, ->fff_res // Found key/value.
+ |. li RD, (2+1)*8
+ | beqz CRET1, ->fff_restv // End of traversal: return nil.
+ |. move CARG1, TISNIL
+ | ld CFUNC:RB, FRAME_FUNC(BASE)
+ | cleartp CFUNC:RB
+ | b ->fff_fallback // Invalid key.
+ |. li RC, 2*8
+ |
+ |.ffunc_1 pairs
+ | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
+ | ld PC, FRAME_PC(BASE)
+#if LJ_52
+ | ld TAB:TMP2, TAB:TMP1->metatable
+ | ld TMP0, CFUNC:RB->upvalue[0]
+ | bnez TAB:TMP2, ->fff_fallback
+#else
+ | ld TMP0, CFUNC:RB->upvalue[0]
+#endif
+ |. daddiu RA, BASE, -16
+ | sd TISNIL, 0(BASE)
+ | sd CARG1, -8(BASE)
+ | sd TMP0, 0(RA)
+ | b ->fff_res
+ |. li RD, (3+1)*8
+ |
+ |.ffunc_2 ipairs_aux
+ | checktab CARG1, ->fff_fallback
+ | checkint CARG2, ->fff_fallback
+ |. lw TMP0, TAB:CARG1->asize
+ | ld TMP1, TAB:CARG1->array
+ | ld PC, FRAME_PC(BASE)
+ | sextw TMP2, CARG2
+ | addiu TMP2, TMP2, 1
+ | sltu AT, TMP2, TMP0
+ | daddiu RA, BASE, -16
+ | zextw TMP0, TMP2
+ | settp TMP0, TISNUM
+ | beqz AT, >2 // Not in array part?
+ |. sd TMP0, 0(RA)
+ | dsll TMP3, TMP2, 3
+ | daddu TMP3, TMP1, TMP3
+ | ld TMP1, 0(TMP3)
+ |1:
+ | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results.
+ |. li RD, (0+1)*8
+ | sd TMP1, -8(BASE)
+ | b ->fff_res
+ |. li RD, (2+1)*8
+ |2: // Check for empty hash part first. Otherwise call C function.
+ | lw TMP0, TAB:CARG1->hmask
+ | load_got lj_tab_getinth
+ | beqz TMP0, ->fff_res
+ |. li RD, (0+1)*8
+ | call_intern lj_tab_getinth // (GCtab *t, int32_t key)
+ |. move CARG2, TMP2
+ | // Returns cTValue * or NULL.
+ | beqz CRET1, ->fff_res
+ |. li RD, (0+1)*8
+ | b <1
+ |. ld TMP1, 0(CRET1)
+ |
+ |.ffunc_1 ipairs
+ | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback
+ | ld PC, FRAME_PC(BASE)
+#if LJ_52
+ | ld TAB:TMP2, TAB:TMP1->metatable
+ | ld CFUNC:TMP0, CFUNC:RB->upvalue[0]
+ | bnez TAB:TMP2, ->fff_fallback
+#else
+ | ld TMP0, CFUNC:RB->upvalue[0]
+#endif
+ | daddiu RA, BASE, -16
+ | dsll AT, TISNUM, 47
+ | sd CARG1, -8(BASE)
+ | sd AT, 0(BASE)
+ | sd CFUNC:TMP0, 0(RA)
+ | b ->fff_res
+ |. li RD, (3+1)*8
+ |
+ |//-- Base library: catch errors ----------------------------------------
+ |
+ |.ffunc pcall
+ | daddiu NARGS8:RC, NARGS8:RC, -8
+ | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
+ | bltz NARGS8:RC, ->fff_fallback
+ |. move TMP2, BASE
+ | daddiu BASE, BASE, 16
+ | // Remember active hook before pcall.
+ | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
+ | andi TMP3, TMP3, 1
+ | daddiu PC, TMP3, 16+FRAME_PCALL
+ | beqz NARGS8:RC, ->vm_call_dispatch
+ |1:
+ |. daddu TMP0, BASE, NARGS8:RC
+ |2:
+ | ld TMP1, -16(TMP0)
+ | sd TMP1, -8(TMP0)
+ | daddiu TMP0, TMP0, -8
+ | bne TMP0, BASE, <2
+ |. nop
+ | b ->vm_call_dispatch
+ |. nop
+ |
+ |.ffunc xpcall
+ | daddiu NARGS8:TMP0, NARGS8:RC, -16
+ | ld CARG1, 0(BASE)
+ | ld CARG2, 8(BASE)
+ | bltz NARGS8:TMP0, ->fff_fallback
+ |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH)
+ | gettp AT, CARG2
+ | daddiu AT, AT, -LJ_TFUNC
+ | bnez AT, ->fff_fallback // Traceback must be a function.
+ |. move TMP2, BASE
+ | move NARGS8:RC, NARGS8:TMP0
+ | daddiu BASE, BASE, 24
+ | // Remember active hook before pcall.
+ | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT
+ | sd CARG2, 0(TMP2) // Swap function and traceback.
+ | andi TMP3, TMP3, 1
+ | sd CARG1, 8(TMP2)
+ | beqz NARGS8:RC, ->vm_call_dispatch
+ |. daddiu PC, TMP3, 24+FRAME_PCALL
+ | b <1
+ |. nop
+ |
+ |//-- Coroutine library --------------------------------------------------
+ |
+ |.macro coroutine_resume_wrap, resume
+ |.if resume
+ |.ffunc_1 coroutine_resume
+ | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback
+ |.else
+ |.ffunc coroutine_wrap_aux
+ | ld L:CARG1, CFUNC:RB->upvalue[0].gcr
+ | cleartp L:CARG1
+ |.endif
+ | lbu TMP0, L:CARG1->status
+ | ld TMP1, L:CARG1->cframe
+ | ld CARG2, L:CARG1->top
+ | ld TMP2, L:CARG1->base
+ | addiu AT, TMP0, -LUA_YIELD
+ | daddu CARG3, CARG2, TMP0
+ | daddiu TMP3, CARG2, 8
+ |.if MIPSR6
+ | seleqz CARG2, CARG2, AT
+ | selnez TMP3, TMP3, AT
+ | bgtz AT, ->fff_fallback // st > LUA_YIELD?
+ |. or CARG2, TMP3, CARG2
+ |.else
+ | bgtz AT, ->fff_fallback // st > LUA_YIELD?
+ |. movn CARG2, TMP3, AT
+ |.endif
+ | xor TMP2, TMP2, CARG3
+ | bnez TMP1, ->fff_fallback // cframe != 0?
+ |. or AT, TMP2, TMP0
+ | ld TMP0, L:CARG1->maxstack
+ | beqz AT, ->fff_fallback // base == top && st == 0?
+ |. ld PC, FRAME_PC(BASE)
+ | daddu TMP2, CARG2, NARGS8:RC
+ | sltu AT, TMP0, TMP2
+ | bnez AT, ->fff_fallback // Stack overflow?
+ |. sd PC, SAVE_PC
+ | sd BASE, L->base
+ |1:
+ |.if resume
+ | daddiu BASE, BASE, 8 // Keep resumed thread in stack for GC.
+ | daddiu NARGS8:RC, NARGS8:RC, -8
+ | daddiu TMP2, TMP2, -8
+ |.endif
+ | sd TMP2, L:CARG1->top
+ | daddu TMP1, BASE, NARGS8:RC
+ | move CARG3, CARG2
+ | sd BASE, L->top
+ |2: // Move args to coroutine.
+ | ld CRET1, 0(BASE)
+ | sltu AT, BASE, TMP1
+ | beqz AT, >3
+ |. daddiu BASE, BASE, 8
+ | sd CRET1, 0(CARG3)
+ | b <2
+ |. daddiu CARG3, CARG3, 8
+ |3:
+ | bal ->vm_resume // (lua_State *L, TValue *base, 0, 0)
+ |. move L:RA, L:CARG1
+ | // Returns thread status.
+ |4:
+ | ld TMP2, L:RA->base
+ | sltiu AT, CRET1, LUA_YIELD+1
+ | ld TMP3, L:RA->top
+ | li_vmstate INTERP
+ | ld BASE, L->base
+ | sd L, DISPATCH_GL(cur_L)(DISPATCH)
+ | st_vmstate
+ | beqz AT, >8
+ |. dsubu RD, TMP3, TMP2
+ | ld TMP0, L->maxstack
+ | beqz RD, >6 // No results?
+ |. daddu TMP1, BASE, RD
+ | sltu AT, TMP0, TMP1
+ | bnez AT, >9 // Need to grow stack?
+ |. daddu TMP3, TMP2, RD
+ | sd TMP2, L:RA->top // Clear coroutine stack.
+ | move TMP1, BASE
+ |5: // Move results from coroutine.
+ | ld CRET1, 0(TMP2)
+ | daddiu TMP2, TMP2, 8
+ | sltu AT, TMP2, TMP3
+ | sd CRET1, 0(TMP1)
+ | bnez AT, <5
+ |. daddiu TMP1, TMP1, 8
+ |6:
+ | andi TMP0, PC, FRAME_TYPE
+ |.if resume
+ | mov_true TMP1
+ | daddiu RA, BASE, -8
+ | sd TMP1, -8(BASE) // Prepend true to results.
+ | daddiu RD, RD, 16
+ |.else
+ | move RA, BASE
+ | daddiu RD, RD, 8
+ |.endif
+ |7:
+ | sd PC, SAVE_PC
+ | beqz TMP0, ->BC_RET_Z
+ |. move MULTRES, RD
+ | b ->vm_return
+ |. nop
+ |
+ |8: // Coroutine returned with error (at co->top-1).
+ |.if resume
+ | daddiu TMP3, TMP3, -8
+ | mov_false TMP1
+ | ld CRET1, 0(TMP3)
+ | sd TMP3, L:RA->top // Remove error from coroutine stack.
+ | li RD, (2+1)*8
+ | sd TMP1, -8(BASE) // Prepend false to results.
+ | daddiu RA, BASE, -8
+ | sd CRET1, 0(BASE) // Copy error message.
+ | b <7
+ |. andi TMP0, PC, FRAME_TYPE
+ |.else
+ | load_got lj_ffh_coroutine_wrap_err
+ | move CARG2, L:RA
+ | call_intern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
+ |. move CARG1, L
+ |.endif
+ |
+ |9: // Handle stack expansion on return from yield.
+ | load_got lj_state_growstack
+ | srl CARG2, RD, 3
+ | call_intern lj_state_growstack // (lua_State *L, int n)
+ |. move CARG1, L
+ | b <4
+ |. li CRET1, 0
+ |.endmacro
+ |
+ | coroutine_resume_wrap 1 // coroutine.resume
+ | coroutine_resume_wrap 0 // coroutine.wrap
+ |
+ |.ffunc coroutine_yield
+ | ld TMP0, L->cframe
+ | daddu TMP1, BASE, NARGS8:RC
+ | sd BASE, L->base
+ | andi TMP0, TMP0, CFRAME_RESUME
+ | sd TMP1, L->top
+ | beqz TMP0, ->fff_fallback
+ |. li CRET1, LUA_YIELD
+ | sd r0, L->cframe
+ | b ->vm_leave_unw
+ |. sb CRET1, L->status
+ |
+ |//-- Math library -------------------------------------------------------
+ |
+ |.ffunc_1 math_abs
+ | gettp CARG2, CARG1
+ | daddiu AT, CARG2, -LJ_TISNUM
+ | bnez AT, >1
+ |. sextw TMP1, CARG1
+ | sra TMP0, TMP1, 31 // Extract sign.
+ | xor TMP1, TMP1, TMP0
+ | dsubu CARG1, TMP1, TMP0
+ | dsll TMP3, CARG1, 32
+ | bgez TMP3, ->fff_restv
+ |. settp CARG1, TISNUM
+ | li CARG1, 0x41e0 // 2^31 as a double.
+ | b ->fff_restv
+ |. dsll CARG1, CARG1, 48
+ |1:
+ | sltiu AT, CARG2, LJ_TISNUM
+ | beqz AT, ->fff_fallback
+ |. dextm CARG1, CARG1, 0, 30
+ |// fallthrough
+ |
+ |->fff_restv:
+ | // CARG1 = TValue result.
+ | ld PC, FRAME_PC(BASE)
+ | daddiu RA, BASE, -16
+ | sd CARG1, -16(BASE)
+ |->fff_res1:
+ | // RA = results, PC = return.
+ | li RD, (1+1)*8
+ |->fff_res:
+ | // RA = results, RD = (nresults+1)*8, PC = return.
+ | andi TMP0, PC, FRAME_TYPE
+ | bnez TMP0, ->vm_return
+ |. move MULTRES, RD
+ | lw INS, -4(PC)
+ | decode_RB8a RB, INS
+ | decode_RB8b RB
+ |5:
+ | sltu AT, RD, RB
+ | bnez AT, >6 // More results expected?
+ |. decode_RA8a TMP0, INS
+ | decode_RA8b TMP0
+ | ins_next1
+ | // Adjust BASE. KBASE is assumed to be set for the calling frame.
+ | dsubu BASE, RA, TMP0
+ | ins_next2
+ |
+ |6: // Fill up results with nil.
+ | daddu TMP1, RA, RD
+ | daddiu RD, RD, 8
+ | b <5
+ |. sd TISNIL, -8(TMP1)
+ |
+ |.macro math_extern, func
+ | .ffunc_n math_ .. func
+ | load_got func
+ | call_extern
+ |. nop
+ | b ->fff_resn
+ |. nop
+ |.endmacro
+ |
+ |.macro math_extern2, func
+ | .ffunc_nn math_ .. func
+ |. load_got func
+ | call_extern
+ |. nop
+ | b ->fff_resn
+ |. nop
+ |.endmacro
+ |
+ |// TODO: Return integer type if result is integer (own sf implementation).
+ |.macro math_round, func
+ |->ff_math_ .. func:
+ | ld CARG1, 0(BASE)
+ | beqz NARGS8:RC, ->fff_fallback
+ |. gettp TMP0, CARG1
+ | beq TMP0, TISNUM, ->fff_restv
+ |. sltu AT, TMP0, TISNUM
+ | beqz AT, ->fff_fallback
+ |.if FPU
+ |. ldc1 FARG1, 0(BASE)
+ | bal ->vm_ .. func
+ |. nop
+ |.else
+ |. load_got func
+ | call_extern
+ |. nop
+ |.endif
+ | b ->fff_resn
+ |. nop
+ |.endmacro
+ |
+ | math_round floor
+ | math_round ceil
+ |
+ |.ffunc math_log
+ | li AT, 8
+ | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument.
+ |. ld CARG1, 0(BASE)
+ | checknum CARG1, ->fff_fallback
+ |. load_got log
+ |.if FPU
+ | call_extern
+ |. ldc1 FARG1, 0(BASE)
+ |.else
+ | call_extern
+ |. nop
+ |.endif
+ | b ->fff_resn
+ |. nop
+ |
+ | math_extern log10
+ | math_extern exp
+ | math_extern sin
+ | math_extern cos
+ | math_extern tan
+ | math_extern asin
+ | math_extern acos
+ | math_extern atan
+ | math_extern sinh
+ | math_extern cosh
+ | math_extern tanh
+ | math_extern2 pow
+ | math_extern2 atan2
+ | math_extern2 fmod
+ |
+ |.if FPU
+ |.ffunc_n math_sqrt
+ |. sqrt.d FRET1, FARG1
+ |// fallthrough to ->fff_resn
+ |.else
+ | math_extern sqrt
+ |.endif
+ |
+ |->fff_resn:
+ | ld PC, FRAME_PC(BASE)
+ | daddiu RA, BASE, -16
+ | b ->fff_res1
+ |.if FPU
+ |. sdc1 FRET1, 0(RA)
+ |.else
+ |. sd CRET1, 0(RA)
+ |.endif
+ |
+ |
+ |.ffunc_2 math_ldexp
+ | checknum CARG1, ->fff_fallback
+ | checkint CARG2, ->fff_fallback
+ |. load_got ldexp
+ | .FPU ldc1 FARG1, 0(BASE)
+ | call_extern
+ |. lw CARG2, 8+LO(BASE)
+ | b ->fff_resn
+ |. nop
+ |
+ |.ffunc_n math_frexp
+ | load_got frexp
+ | ld PC, FRAME_PC(BASE)
+ | call_extern
+ |. daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv)
+ | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH)
+ | daddiu RA, BASE, -16
+ |.if FPU
+ | mtc1 TMP1, FARG2
+ | sdc1 FRET1, 0(RA)
+ | cvt.d.w FARG2, FARG2
+ | sdc1 FARG2, 8(RA)
+ |.else
+ | sd CRET1, 0(RA)
+ | zextw TMP1, TMP1
+ | settp TMP1, TISNUM
+ | sd TMP1, 8(RA)
+ |.endif
+ | b ->fff_res
+ |. li RD, (2+1)*8
+ |
+ |.ffunc_n math_modf
+ | load_got modf
+ | ld PC, FRAME_PC(BASE)
+ | call_extern
+ |. daddiu CARG2, BASE, -16
+ | daddiu RA, BASE, -16
+ |.if FPU
+ | sdc1 FRET1, -8(BASE)
+ |.else
+ | sd CRET1, -8(BASE)
+ |.endif
+ | b ->fff_res
+ |. li RD, (2+1)*8
+ |
+ |.macro math_minmax, name, intins, intinsc, fpins
+ | .ffunc_1 name
+ | daddu TMP3, BASE, NARGS8:RC
+ | checkint CARG1, >5
+ |. daddiu TMP2, BASE, 8
+ |1: // Handle integers.
+ | beq TMP2, TMP3, ->fff_restv
+ |. ld CARG2, 0(TMP2)
+ | checkint CARG2, >3
+ |. sextw CARG1, CARG1
+ | lw CARG2, LO(TMP2)
+ |. slt AT, CARG1, CARG2
+ |.if MIPSR6
+ | intins TMP1, CARG2, AT
+ | intinsc CARG1, CARG1, AT
+ | or CARG1, CARG1, TMP1
+ |.else
+ | intins CARG1, CARG2, AT
+ |.endif
+ | daddiu TMP2, TMP2, 8
+ | zextw CARG1, CARG1
+ | b <1
+ |. settp CARG1, TISNUM
+ |
+ |3: // Convert intermediate result to number and continue with number loop.
+ | checknum CARG2, ->fff_fallback
+ |.if FPU
+ |. mtc1 CARG1, FRET1
+ | cvt.d.w FRET1, FRET1
+ | b >7
+ |. ldc1 FARG1, 0(TMP2)
+ |.else
+ |. nop
+ | bal ->vm_sfi2d_1
+ |. nop
+ | b >7
+ |. nop
+ |.endif
+ |
+ |5:
+ | .FPU ldc1 FRET1, 0(BASE)
+ | checknum CARG1, ->fff_fallback
+ |6: // Handle numbers.
+ |. ld CARG2, 0(TMP2)
+ | beq TMP2, TMP3, ->fff_resn
+ |.if FPU
+ | ldc1 FARG1, 0(TMP2)
+ |.else
+ | move CRET1, CARG1
+ |.endif
+ | checknum CARG2, >8
+ |. nop
+ |7:
+ |.if FPU
+ |.if MIPSR6
+ | fpins FRET1, FRET1, FARG1
+ |.else
+ |.if fpins // ismax
+ | c.olt.d FARG1, FRET1
+ |.else
+ | c.olt.d FRET1, FARG1
+ |.endif
+ | movf.d FRET1, FARG1
+ |.endif
+ |.else
+ |.if fpins // ismax
+ | bal ->vm_sfcmpogt
+ |.else
+ | bal ->vm_sfcmpolt
+ |.endif
+ |. nop
+ |.if MIPSR6
+ | seleqz AT, CARG2, CRET1
+ | selnez CARG1, CARG1, CRET1
+ | or CARG1, CARG1, AT
+ |.else
+ | movz CARG1, CARG2, CRET1
+ |.endif
+ |.endif
+ | b <6
+ |. daddiu TMP2, TMP2, 8
+ |
+ |8: // Convert integer to number and continue with number loop.
+ | checkint CARG2, ->fff_fallback
+ |.if FPU
+ |. lwc1 FARG1, LO(TMP2)
+ | b <7
+ |. cvt.d.w FARG1, FARG1
+ |.else
+ |. lw CARG2, LO(TMP2)
+ | bal ->vm_sfi2d_2
+ |. nop
+ | b <7
+ |. nop
+ |.endif
+ |
+ |.endmacro
+ |
+ |.if MIPSR6
+ | math_minmax math_min, seleqz, selnez, min.d
+ | math_minmax math_max, selnez, seleqz, max.d
+ |.else
+ | math_minmax math_min, movz, _, 0
+ | math_minmax math_max, movn, _, 1
+ |.endif
+ |
+ |//-- String library -----------------------------------------------------
+ |
+ |.ffunc string_byte // Only handle the 1-arg case here.
+ | ld CARG1, 0(BASE)
+ | gettp TMP0, CARG1
+ | xori AT, NARGS8:RC, 8
+ | daddiu TMP0, TMP0, -LJ_TSTR
+ | or AT, AT, TMP0
+ | bnez AT, ->fff_fallback // Need exactly 1 string argument.
+ |. cleartp STR:CARG1
+ | lw TMP0, STR:CARG1->len
+ | daddiu RA, BASE, -16
+ | ld PC, FRAME_PC(BASE)
+ | sltu RD, r0, TMP0
+ | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
+ | addiu RD, RD, 1
+ | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8
+ | settp TMP1, TISNUM
+ | b ->fff_res
+ |. sd TMP1, 0(RA)
+ |
+ |.ffunc string_char // Only handle the 1-arg case here.
+ | ffgccheck
+ |.if not MIPSR6
+ |. nop
+ |.endif
+ | ld CARG1, 0(BASE)
+ | gettp TMP0, CARG1
+ | xori AT, NARGS8:RC, 8 // Exactly 1 argument.
+ | daddiu TMP0, TMP0, -LJ_TISNUM // Integer.
+ | li TMP1, 255
+ | sextw CARG1, CARG1
+ | or AT, AT, TMP0
+ | sltu TMP1, TMP1, CARG1 // !(255 < n).
+ | or AT, AT, TMP1
+ | bnez AT, ->fff_fallback
+ |. li CARG3, 1
+ | daddiu CARG2, sp, TMPD_OFS
+ | sb CARG1, TMPD
+ |->fff_newstr:
+ | load_got lj_str_new
+ | sd BASE, L->base
+ | sd PC, SAVE_PC
+ | call_intern lj_str_new // (lua_State *L, char *str, size_t l)
+ |. move CARG1, L
+ | // Returns GCstr *.
+ | ld BASE, L->base
+ |->fff_resstr:
+ | li AT, LJ_TSTR
+ | settp CRET1, AT
+ | b ->fff_restv
+ |. move CARG1, CRET1
+ |
+ |.ffunc string_sub
+ | ffgccheck
+ |.if not MIPSR6
+ |. nop
+ |.endif
+ | addiu AT, NARGS8:RC, -16
+ | ld TMP0, 0(BASE)
+ | bltz AT, ->fff_fallback
+ |. gettp TMP3, TMP0
+ | cleartp STR:CARG1, TMP0
+ | ld CARG2, 8(BASE)
+ | beqz AT, >1
+ |. li CARG4, -1
+ | ld CARG3, 16(BASE)
+ | checkint CARG3, ->fff_fallback
+ |. sextw CARG4, CARG3
+ |1:
+ | checkint CARG2, ->fff_fallback
+ |. li AT, LJ_TSTR
+ | bne TMP3, AT, ->fff_fallback
+ |. sextw CARG3, CARG2
+ | lw CARG2, STR:CARG1->len
+ | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end
+ | slt AT, CARG4, r0
+ | addiu TMP0, CARG2, 1
+ | addu TMP1, CARG4, TMP0
+ | slt TMP3, CARG3, r0
+ |.if MIPSR6
+ | seleqz CARG4, CARG4, AT
+ | selnez TMP1, TMP1, AT
+ | or CARG4, TMP1, CARG4 // if (end < 0) end += len+1
+ |.else
+ | movn CARG4, TMP1, AT // if (end < 0) end += len+1
+ |.endif
+ | addu TMP1, CARG3, TMP0
+ |.if MIPSR6
+ | selnez TMP1, TMP1, TMP3
+ | seleqz CARG3, CARG3, TMP3
+ | or CARG3, TMP1, CARG3 // if (start < 0) start += len+1
+ | li TMP2, 1
+ | slt AT, CARG4, r0
+ | slt TMP3, r0, CARG3
+ | seleqz CARG4, CARG4, AT // if (end < 0) end = 0
+ | selnez CARG3, CARG3, TMP3
+ | seleqz TMP2, TMP2, TMP3
+ | or CARG3, TMP2, CARG3 // if (start < 1) start = 1
+ | slt AT, CARG2, CARG4
+ | seleqz CARG4, CARG4, AT
+ | selnez CARG2, CARG2, AT
+ | or CARG4, CARG2, CARG4 // if (end > len) end = len
+ |.else
+ | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1
+ | li TMP2, 1
+ | slt AT, CARG4, r0
+ | slt TMP3, r0, CARG3
+ | movn CARG4, r0, AT // if (end < 0) end = 0
+ | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1
+ | slt AT, CARG2, CARG4
+ | movn CARG4, CARG2, AT // if (end > len) end = len
+ |.endif
+ | daddu CARG2, STR:CARG1, CARG3
+ | subu CARG3, CARG4, CARG3 // len = end - start
+ | daddiu CARG2, CARG2, sizeof(GCstr)-1
+ | bgez CARG3, ->fff_newstr
+ |. addiu CARG3, CARG3, 1 // len++
+ |->fff_emptystr: // Return empty string.
+ | li AT, LJ_TSTR
+ | daddiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty)
+ | b ->fff_restv
+ |. settp CARG1, AT
+ |
+ |.macro ffstring_op, name
+ | .ffunc string_ .. name
+ | ffgccheck
+ |. nop
+ | beqz NARGS8:RC, ->fff_fallback
+ |. ld CARG2, 0(BASE)
+ | checkstr STR:CARG2, ->fff_fallback
+ | daddiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf)
+ | load_got lj_buf_putstr_ .. name
+ | ld TMP0, SBUF:CARG1->b
+ | sd L, SBUF:CARG1->L
+ | sd BASE, L->base
+ | sd TMP0, SBUF:CARG1->w
+ | call_intern extern lj_buf_putstr_ .. name
+ |. sd PC, SAVE_PC
+ | load_got lj_buf_tostr
+ | call_intern lj_buf_tostr
+ |. move SBUF:CARG1, SBUF:CRET1
+ | b ->fff_resstr
+ |. ld BASE, L->base
+ |.endmacro
+ |
+ |ffstring_op reverse
+ |ffstring_op lower
+ |ffstring_op upper
+ |
+ |//-- Bit library --------------------------------------------------------
+ |
+ |->vm_tobit_fb:
+ | beqz TMP1, ->fff_fallback
+ |.if FPU
+ |. ldc1 FARG1, 0(BASE)
+ | add.d FARG1, FARG1, TOBIT
+ | mfc1 CRET1, FARG1
+ | jr ra
+ |. zextw CRET1, CRET1
+ |.else
+ |// FP number to bit conversion for soft-float.
+ |->vm_tobit:
+ | dsll TMP0, CARG1, 1
+ | li CARG3, 1076
+ | dsrl AT, TMP0, 53
+ | dsubu CARG3, CARG3, AT
+ | sltiu AT, CARG3, 54
+ | beqz AT, >1
+ |. dextm TMP0, TMP0, 0, 20
+ | dinsu TMP0, AT, 21, 21
+ | slt AT, CARG1, r0
+ | dsrlv CRET1, TMP0, CARG3
+ | dsubu TMP0, r0, CRET1
+ |.if MIPSR6
+ | selnez TMP0, TMP0, AT
+ | seleqz CRET1, CRET1, AT
+ | or CRET1, CRET1, TMP0
+ |.else
+ | movn CRET1, TMP0, AT
+ |.endif
+ | jr ra
+ |. zextw CRET1, CRET1
+ |1:
+ | jr ra
+ |. move CRET1, r0
+ |
+ |// FP number to int conversion with a check for soft-float.
+ |// Modifies CARG1, CRET1, CRET2, TMP0, AT.
+ |->vm_tointg:
+ |.if JIT
+ | dsll CRET2, CARG1, 1
+ | beqz CRET2, >2
+ |. li TMP0, 1076
+ | dsrl AT, CRET2, 53
+ | dsubu TMP0, TMP0, AT
+ | sltiu AT, TMP0, 54
+ | beqz AT, >1
+ |. dextm CRET2, CRET2, 0, 20
+ | dinsu CRET2, AT, 21, 21
+ | slt AT, CARG1, r0
+ | dsrlv CRET1, CRET2, TMP0
+ | dsubu CARG1, r0, CRET1
+ |.if MIPSR6
+ | seleqz CRET1, CRET1, AT
+ | selnez CARG1, CARG1, AT
+ | or CRET1, CRET1, CARG1
+ |.else
+ | movn CRET1, CARG1, AT
+ |.endif
+ | li CARG1, 64
+ | subu TMP0, CARG1, TMP0
+ | dsllv CRET2, CRET2, TMP0 // Integer check.
+ | sextw AT, CRET1
+ | xor AT, CRET1, AT // Range check.
+ |.if MIPSR6
+ | seleqz AT, AT, CRET2
+ | selnez CRET2, CRET2, CRET2
+ | jr ra
+ |. or CRET2, AT, CRET2
+ |.else
+ | jr ra
+ |. movz CRET2, AT, CRET2
+ |.endif
+ |1:
+ | jr ra
+ |. li CRET2, 1
+ |2:
+ | jr ra
+ |. move CRET1, r0
+ |.endif
+ |.endif
+ |
+ |.macro .ffunc_bit, name
+ | .ffunc_1 bit_..name
+ | gettp TMP0, CARG1
+ | beq TMP0, TISNUM, >6
+ |. zextw CRET1, CARG1
+ | bal ->vm_tobit_fb
+ |. sltiu TMP1, TMP0, LJ_TISNUM
+ |6:
+ |.endmacro
+ |
+ |.macro .ffunc_bit_op, name, bins
+ | .ffunc_bit name
+ | daddiu TMP2, BASE, 8
+ | daddu TMP3, BASE, NARGS8:RC
+ |1:
+ | beq TMP2, TMP3, ->fff_resi
+ |. ld CARG1, 0(TMP2)
+ | gettp TMP0, CARG1
+ |.if FPU
+ | bne TMP0, TISNUM, >2
+ |. daddiu TMP2, TMP2, 8
+ | zextw CARG1, CARG1
+ | b <1
+ |. bins CRET1, CRET1, CARG1
+ |2:
+ | ldc1 FARG1, -8(TMP2)
+ | sltiu AT, TMP0, LJ_TISNUM
+ | beqz AT, ->fff_fallback
+ |. add.d FARG1, FARG1, TOBIT
+ | mfc1 CARG1, FARG1
+ | zextw CARG1, CARG1
+ | b <1
+ |. bins CRET1, CRET1, CARG1
+ |.else
+ | beq TMP0, TISNUM, >2
+ |. move CRET2, CRET1
+ | bal ->vm_tobit_fb
+ |. sltiu TMP1, TMP0, LJ_TISNUM
+ | move CARG1, CRET2
+ |2:
+ | zextw CARG1, CARG1
+ | bins CRET1, CRET1, CARG1
+ | b <1
+ |. daddiu TMP2, TMP2, 8
+ |.endif
+ |.endmacro
+ |
+ |.ffunc_bit_op band, and
+ |.ffunc_bit_op bor, or
+ |.ffunc_bit_op bxor, xor
+ |
+ |.ffunc_bit bswap
+ | dsrl TMP0, CRET1, 8
+ | dsrl TMP1, CRET1, 24
+ | andi TMP2, TMP0, 0xff00
+ | dins TMP1, CRET1, 24, 31
+ | dins TMP2, TMP0, 16, 23
+ | b ->fff_resi
+ |. or CRET1, TMP1, TMP2
+ |
+ |.ffunc_bit bnot
+ | not CRET1, CRET1
+ | b ->fff_resi
+ |. zextw CRET1, CRET1
+ |
+ |.macro .ffunc_bit_sh, name, shins, shmod
+ | .ffunc_2 bit_..name
+ | gettp TMP0, CARG1
+ | beq TMP0, TISNUM, >1
+ |. nop
+ | bal ->vm_tobit_fb
+ |. sltiu TMP1, TMP0, LJ_TISNUM
+ | move CARG1, CRET1
+ |1:
+ | gettp TMP0, CARG2
+ | bne TMP0, TISNUM, ->fff_fallback
+ |. zextw CARG2, CARG2
+ | sextw CARG1, CARG1
+ |.if shmod == 1
+ | negu CARG2, CARG2
+ |.endif
+ | shins CRET1, CARG1, CARG2
+ | b ->fff_resi
+ |. zextw CRET1, CRET1
+ |.endmacro
+ |
+ |.ffunc_bit_sh lshift, sllv, 0
+ |.ffunc_bit_sh rshift, srlv, 0
+ |.ffunc_bit_sh arshift, srav, 0
+ |.ffunc_bit_sh rol, rotrv, 1
+ |.ffunc_bit_sh ror, rotrv, 0
+ |
+ |.ffunc_bit tobit
+ |->fff_resi:
+ | ld PC, FRAME_PC(BASE)
+ | daddiu RA, BASE, -16
+ | settp CRET1, TISNUM
+ | b ->fff_res1
+ |. sd CRET1, -16(BASE)
+ |
+ |//-----------------------------------------------------------------------
+ |->fff_fallback: // Call fast function fallback handler.
+ | // BASE = new base, RB = CFUNC, RC = nargs*8
+ | ld TMP3, CFUNC:RB->f
+ | daddu TMP1, BASE, NARGS8:RC
+ | ld PC, FRAME_PC(BASE) // Fallback may overwrite PC.
+ | daddiu TMP0, TMP1, 8*LUA_MINSTACK
+ | ld TMP2, L->maxstack
+ | sd PC, SAVE_PC // Redundant (but a defined value).
+ | sltu AT, TMP2, TMP0
+ | sd BASE, L->base
+ | sd TMP1, L->top
+ | bnez AT, >5 // Need to grow stack.
+ |. move CFUNCADDR, TMP3
+ | jalr TMP3 // (lua_State *L)
+ |. move CARG1, L
+ | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
+ | ld BASE, L->base
+ | sll RD, CRET1, 3
+ | bgtz CRET1, ->fff_res // Returned nresults+1?
+ |. daddiu RA, BASE, -16
+ |1: // Returned 0 or -1: retry fast path.
+ | ld LFUNC:RB, FRAME_FUNC(BASE)
+ | ld TMP0, L->top
+ | cleartp LFUNC:RB
+ | bnez CRET1, ->vm_call_tail // Returned -1?
+ |. dsubu NARGS8:RC, TMP0, BASE
+ | ins_callt // Returned 0: retry fast path.
+ |
+ |// Reconstruct previous base for vmeta_call during tailcall.
+ |->vm_call_tail:
+ | andi TMP0, PC, FRAME_TYPE
+ | li AT, -4
+ | bnez TMP0, >3
+ |. and TMP1, PC, AT
+ | lbu TMP1, OFS_RA(PC)
+ | sll TMP1, TMP1, 3
+ | addiu TMP1, TMP1, 16
+ |3:
+ | b ->vm_call_dispatch // Resolve again for tailcall.
+ |. dsubu TMP2, BASE, TMP1
+ |
+ |5: // Grow stack for fallback handler.
+ | load_got lj_state_growstack
+ | li CARG2, LUA_MINSTACK
+ | call_intern lj_state_growstack // (lua_State *L, int n)
+ |. move CARG1, L
+ | ld BASE, L->base
+ | b <1
+ |. li CRET1, 0 // Force retry.
+ |
+ |->fff_gcstep: // Call GC step function.
+ | // BASE = new base, RC = nargs*8
+ | move MULTRES, ra
+ | load_got lj_gc_step
+ | sd BASE, L->base
+ | daddu TMP0, BASE, NARGS8:RC
+ | sd PC, SAVE_PC // Redundant (but a defined value).
+ | sd TMP0, L->top
+ | call_intern lj_gc_step // (lua_State *L)
+ |. move CARG1, L
+ | ld BASE, L->base
+ | move ra, MULTRES
+ | ld TMP0, L->top
+ | ld CFUNC:RB, FRAME_FUNC(BASE)
+ | cleartp CFUNC:RB
+ | jr ra
+ |. dsubu NARGS8:RC, TMP0, BASE
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Special dispatch targets -------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_record: // Dispatch target for recording phase.
+ |.if JIT
+ | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
+ | andi AT, TMP3, HOOK_VMEVENT // No recording while in vmevent.
+ | bnez AT, >5
+ | // Decrement the hookcount for consistency, but always do the call.
+ |. lw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
+ | andi AT, TMP3, HOOK_ACTIVE
+ | bnez AT, >1
+ |. addiu TMP2, TMP2, -1
+ | andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
+ | beqz AT, >1
+ |. nop
+ | b >1
+ |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
+ |.endif
+ |
+ |->vm_rethook: // Dispatch target for return hooks.
+ | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
+ | andi AT, TMP3, HOOK_ACTIVE // Hook already active?
+ | beqz AT, >1
+ |5: // Re-dispatch to static ins.
+ |. ld AT, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4.
+ | jr AT
+ |. nop
+ |
+ |->vm_inshook: // Dispatch target for instr/line hooks.
+ | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH)
+ | lw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
+ | andi AT, TMP3, HOOK_ACTIVE // Hook already active?
+ | bnez AT, <5
+ |. andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT
+ | beqz AT, <5
+ |. addiu TMP2, TMP2, -1
+ | beqz TMP2, >1
+ |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
+ | andi AT, TMP3, LUA_MASKLINE
+ | beqz AT, <5
+ |1:
+ |. load_got lj_dispatch_ins
+ | sw MULTRES, SAVE_MULTRES
+ | move CARG2, PC
+ | sd BASE, L->base
+ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
+ | call_intern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
+ |. move CARG1, L
+ |3:
+ | ld BASE, L->base
+ |4: // Re-dispatch to static ins.
+ | lw INS, -4(PC)
+ | decode_OP8a TMP1, INS
+ | decode_OP8b TMP1
+ | daddu TMP0, DISPATCH, TMP1
+ | decode_RD8a RD, INS
+ | ld AT, GG_DISP2STATIC(TMP0)
+ | decode_RA8a RA, INS
+ | decode_RD8b RD
+ | jr AT
+ | decode_RA8b RA
+ |
+ |->cont_hook: // Continue from hook yield.
+ | daddiu PC, PC, 4
+ | b <4
+ |. lw MULTRES, -24+LO(RB) // Restore MULTRES for *M ins.
+ |
+ |->vm_hotloop: // Hot loop counter underflow.
+ |.if JIT
+ | ld LFUNC:TMP1, FRAME_FUNC(BASE)
+ | daddiu CARG1, DISPATCH, GG_DISP2J
+ | cleartp LFUNC:TMP1
+ | sd PC, SAVE_PC
+ | ld TMP1, LFUNC:TMP1->pc
+ | move CARG2, PC
+ | sd L, DISPATCH_J(L)(DISPATCH)
+ | lbu TMP1, PC2PROTO(framesize)(TMP1)
+ | load_got lj_trace_hot
+ | sd BASE, L->base
+ | dsll TMP1, TMP1, 3
+ | daddu TMP1, BASE, TMP1
+ | call_intern lj_trace_hot // (jit_State *J, const BCIns *pc)
+ |. sd TMP1, L->top
+ | b <3
+ |. nop
+ |.endif
+ |
+ |
+ |->vm_callhook: // Dispatch target for call hooks.
+ |.if JIT
+ | b >1
+ |.endif
+ |. move CARG2, PC
+ |
+ |->vm_hotcall: // Hot call counter underflow.
+ |.if JIT
+ | ori CARG2, PC, 1
+ |1:
+ |.endif
+ | load_got lj_dispatch_call
+ | daddu TMP0, BASE, RC
+ | sd PC, SAVE_PC
+ | sd BASE, L->base
+ | dsubu RA, RA, BASE
+ | sd TMP0, L->top
+ | call_intern lj_dispatch_call // (lua_State *L, const BCIns *pc)
+ |. move CARG1, L
+ | // Returns ASMFunction.
+ | ld BASE, L->base
+ | ld TMP0, L->top
+ | sd r0, SAVE_PC // Invalidate for subsequent line hook.
+ | dsubu NARGS8:RC, TMP0, BASE
+ | daddu RA, BASE, RA
+ | ld LFUNC:RB, FRAME_FUNC(BASE)
+ | cleartp LFUNC:RB
+ | jr CRET1
+ |. lw INS, -4(PC)
+ |
+ |->cont_stitch: // Trace stitching.
+ |.if JIT
+ | // RA = resultptr, RB = meta base
+ | lw INS, -4(PC)
+ | ld TRACE:TMP2, -40(RB) // Save previous trace.
+ | decode_RA8a RC, INS
+ | daddiu AT, MULTRES, -8
+ | cleartp TRACE:TMP2
+ | decode_RA8b RC
+ | beqz AT, >2
+ |. daddu RC, BASE, RC // Call base.
+ |1: // Move results down.
+ | ld CARG1, 0(RA)
+ | daddiu AT, AT, -8
+ | daddiu RA, RA, 8
+ | sd CARG1, 0(RC)
+ | bnez AT, <1
+ |. daddiu RC, RC, 8
+ |2:
+ | decode_RA8a RA, INS
+ | decode_RB8a RB, INS
+ | decode_RA8b RA
+ | decode_RB8b RB
+ | daddu RA, RA, RB
+ | daddu RA, BASE, RA
+ |3:
+ | sltu AT, RC, RA
+ | bnez AT, >9 // More results wanted?
+ |. nop
+ |
+ | lhu TMP3, TRACE:TMP2->traceno
+ | lhu RD, TRACE:TMP2->link
+ | beq RD, TMP3, ->cont_nop // Blacklisted.
+ |. load_got lj_dispatch_stitch
+ | bnez RD, =>BC_JLOOP // Jump to stitched trace.
+ |. sll RD, RD, 3
+ |
+ | // Stitch a new trace to the previous trace.
+ | sw TMP3, DISPATCH_J(exitno)(DISPATCH)
+ | sd L, DISPATCH_J(L)(DISPATCH)
+ | sd BASE, L->base
+ | daddiu CARG1, DISPATCH, GG_DISP2J
+ | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
+ |. move CARG2, PC
+ | b ->cont_nop
+ |. ld BASE, L->base
+ |
+ |9:
+ | sd TISNIL, 0(RC)
+ | b <3
+ |. daddiu RC, RC, 8
+ |.endif
+ |
+ |->vm_profhook: // Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+ | load_got lj_dispatch_profile
+ | sw MULTRES, SAVE_MULTRES
+ | move CARG2, PC
+ | sd BASE, L->base
+ | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
+ |. move CARG1, L
+ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+ | daddiu PC, PC, -4
+ | b ->cont_nop
+ |. ld BASE, L->base
+#endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Trace exit handler -------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |.macro savex_, a, b
+ |.if FPU
+ | sdc1 f..a, a*8(sp)
+ | sdc1 f..b, b*8(sp)
+ | sd r..a, 32*8+a*8(sp)
+ | sd r..b, 32*8+b*8(sp)
+ |.else
+ | sd r..a, a*8(sp)
+ | sd r..b, b*8(sp)
+ |.endif
+ |.endmacro
+ |
+ |->vm_exit_handler:
+ |.if JIT
+ |.if FPU
+ | daddiu sp, sp, -(32*8+32*8)
+ |.else
+ | daddiu sp, sp, -(32*8)
+ |.endif
+ | savex_ 0, 1
+ | savex_ 2, 3
+ | savex_ 4, 5
+ | savex_ 6, 7
+ | savex_ 8, 9
+ | savex_ 10, 11
+ | savex_ 12, 13
+ | savex_ 14, 15
+ | savex_ 16, 17
+ | savex_ 18, 19
+ | savex_ 20, 21
+ | savex_ 22, 23
+ | savex_ 24, 25
+ | savex_ 26, 27
+ | savex_ 28, 30
+ |.if FPU
+ | sdc1 f29, 29*8(sp)
+ | sdc1 f31, 31*8(sp)
+ | sd r0, 32*8+31*8(sp) // Clear RID_TMP.
+ | daddiu TMP2, sp, 32*8+32*8 // Recompute original value of sp.
+ | sd TMP2, 32*8+29*8(sp) // Store sp in RID_SP
+ |.else
+ | sd r0, 31*8(sp) // Clear RID_TMP.
+ | daddiu TMP2, sp, 32*8 // Recompute original value of sp.
+ | sd TMP2, 29*8(sp) // Store sp in RID_SP
+ |.endif
+ | li_vmstate EXIT
+ | daddiu DISPATCH, JGL, -GG_DISP2G-32768
+ | lw TMP1, 0(TMP2) // Load exit number.
+ | st_vmstate
+ | ld L, DISPATCH_GL(cur_L)(DISPATCH)
+ | ld BASE, DISPATCH_GL(jit_base)(DISPATCH)
+ | load_got lj_trace_exit
+ | sd L, DISPATCH_J(L)(DISPATCH)
+ | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number.
+ | sd BASE, L->base
+ | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number.
+ | daddiu CARG1, DISPATCH, GG_DISP2J
+ | sd r0, DISPATCH_GL(jit_base)(DISPATCH)
+ | call_intern lj_trace_exit // (jit_State *J, ExitState *ex)
+ |. move CARG2, sp
+ | // Returns MULTRES (unscaled) or negated error code.
+ | ld TMP1, L->cframe
+ | li AT, -4
+ | ld BASE, L->base
+ | and sp, TMP1, AT
+ | ld PC, SAVE_PC // Get SAVE_PC.
+ | b >1
+ |. sd L, SAVE_L // Set SAVE_L (on-trace resume/yield).
+ |.endif
+ |->vm_exit_interp:
+ |.if JIT
+ | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set.
+ | ld L, SAVE_L
+ | daddiu DISPATCH, JGL, -GG_DISP2G-32768
+ | sd BASE, L->base
+ |1:
+ | bltz CRET1, >9 // Check for error from exit.
+ |. ld LFUNC:RB, FRAME_FUNC(BASE)
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | dsll MULTRES, CRET1, 3
+ | cleartp LFUNC:RB
+ | sw MULTRES, SAVE_MULTRES
+ | li TISNIL, LJ_TNIL
+ | li TISNUM, LJ_TISNUM // Setup type comparison constants.
+ | .FPU mtc1 TMP3, TOBIT
+ | ld TMP1, LFUNC:RB->pc
+ | sd r0, DISPATCH_GL(jit_base)(DISPATCH)
+ | ld KBASE, PC2PROTO(k)(TMP1)
+ | .FPU cvt.d.s TOBIT, TOBIT
+ | // Modified copy of ins_next which handles function header dispatch, too.
+ | lw INS, 0(PC)
+ | daddiu PC, PC, 4
+ | // Assumes TISNIL == ~LJ_VMST_INTERP == -1
+ | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH)
+ | decode_OP8a TMP1, INS
+ | decode_OP8b TMP1
+ | sltiu TMP2, TMP1, BC_FUNCF*8
+ | daddu TMP0, DISPATCH, TMP1
+ | decode_RD8a RD, INS
+ | ld AT, 0(TMP0)
+ | decode_RA8a RA, INS
+ | beqz TMP2, >2
+ |. decode_RA8b RA
+ | jr AT
+ |. decode_RD8b RD
+ |2:
+ | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function?
+ | bnez TMP2, >3
+ |. ld TMP1, FRAME_PC(BASE)
+ | // Check frame below fast function.
+ | andi TMP0, TMP1, FRAME_TYPE
+ | bnez TMP0, >3 // Trace stitching continuation?
+ |. nop
+ | // Otherwise set KBASE for Lua function below fast function.
+ | lw TMP2, -4(TMP1)
+ | decode_RA8a TMP0, TMP2
+ | decode_RA8b TMP0
+ | dsubu TMP1, BASE, TMP0
+ | ld LFUNC:TMP2, -32(TMP1)
+ | cleartp LFUNC:TMP2
+ | ld TMP1, LFUNC:TMP2->pc
+ | ld KBASE, PC2PROTO(k)(TMP1)
+ |3:
+ | daddiu RC, MULTRES, -8
+ | jr AT
+ |. daddu RA, RA, BASE
+ |
+ |9: // Rethrow error from the right C frame.
+ | load_got lj_err_trace
+ | sub CARG2, r0, CRET1
+ | call_intern lj_err_trace // (lua_State *L, int errcode)
+ |. move CARG1, L
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Math helper functions ----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// Hard-float round to integer.
+ |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1.
+ |// MIPSR6: Modifies FTMP1, too.
+ |.macro vm_round_hf, func
+ | lui TMP0, 0x4330 // Hiword of 2^52 (double).
+ | dsll TMP0, TMP0, 32
+ | dmtc1 TMP0, f4
+ | abs.d FRET2, FARG1 // |x|
+ | dmfc1 AT, FARG1
+ |.if MIPSR6
+ | cmp.lt.d FTMP1, FRET2, f4
+ | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52
+ | bc1eqz FTMP1, >1 // Truncate only if |x| < 2^52.
+ |.else
+ | c.olt.d 0, FRET2, f4
+ | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52
+ | bc1f 0, >1 // Truncate only if |x| < 2^52.
+ |.endif
+ |. sub.d FRET1, FRET1, f4
+ | slt AT, AT, r0
+ |.if "func" == "ceil"
+ | lui TMP0, 0xbff0 // Hiword of -1 (double). Preserves -0.
+ |.else
+ | lui TMP0, 0x3ff0 // Hiword of +1 (double).
+ |.endif
+ |.if "func" == "trunc"
+ | dsll TMP0, TMP0, 32
+ | dmtc1 TMP0, f4
+ |.if MIPSR6
+ | cmp.lt.d FTMP1, FRET2, FRET1 // |x| < result?
+ | sub.d FRET2, FRET1, f4
+ | sel.d FTMP1, FRET1, FRET2 // If yes, subtract +1.
+ | dmtc1 AT, FRET1
+ | neg.d FRET2, FTMP1
+ | jr ra
+ |. sel.d FRET1, FTMP1, FRET2 // Merge sign bit back in.
+ |.else
+ | c.olt.d 0, FRET2, FRET1 // |x| < result?
+ | sub.d FRET2, FRET1, f4
+ | movt.d FRET1, FRET2, 0 // If yes, subtract +1.
+ | neg.d FRET2, FRET1
+ | jr ra
+ |. movn.d FRET1, FRET2, AT // Merge sign bit back in.
+ |.endif
+ |.else
+ | neg.d FRET2, FRET1
+ | dsll TMP0, TMP0, 32
+ | dmtc1 TMP0, f4
+ |.if MIPSR6
+ | dmtc1 AT, FTMP1
+ | sel.d FTMP1, FRET1, FRET2
+ |.if "func" == "ceil"
+ | cmp.lt.d FRET1, FTMP1, FARG1 // x > result?
+ |.else
+ | cmp.lt.d FRET1, FARG1, FTMP1 // x < result?
+ |.endif
+ | sub.d FRET2, FTMP1, f4 // If yes, subtract +-1.
+ | jr ra
+ |. sel.d FRET1, FTMP1, FRET2
+ |.else
+ | movn.d FRET1, FRET2, AT // Merge sign bit back in.
+ |.if "func" == "ceil"
+ | c.olt.d 0, FRET1, FARG1 // x > result?
+ |.else
+ | c.olt.d 0, FARG1, FRET1 // x < result?
+ |.endif
+ | sub.d FRET2, FRET1, f4 // If yes, subtract +-1.
+ | jr ra
+ |. movt.d FRET1, FRET2, 0
+ |.endif
+ |.endif
+ |1:
+ | jr ra
+ |. mov.d FRET1, FARG1
+ |.endmacro
+ |
+ |.macro vm_round, func
+ |.if FPU
+ | vm_round_hf, func
+ |.endif
+ |.endmacro
+ |
+ |->vm_floor:
+ | vm_round floor
+ |->vm_ceil:
+ | vm_round ceil
+ |->vm_trunc:
+ |.if JIT
+ | vm_round trunc
+ |.endif
+ |
+ |// Soft-float integer to number conversion.
+ |.macro sfi2d, ARG
+ |.if not FPU
+ | beqz ARG, >9 // Handle zero first.
+ |. sra TMP0, ARG, 31
+ | xor TMP1, ARG, TMP0
+ | dsubu TMP1, TMP1, TMP0 // Absolute value in TMP1.
+ | dclz ARG, TMP1
+ | addiu ARG, ARG, -11
+ | li AT, 0x3ff+63-11-1
+ | dsllv TMP1, TMP1, ARG // Align mantissa left with leading 1.
+ | subu ARG, AT, ARG // Exponent - 1.
+ | ins ARG, TMP0, 11, 11 // Sign | Exponent.
+ | dsll ARG, ARG, 52 // Align left.
+ | jr ra
+ |. daddu ARG, ARG, TMP1 // Add mantissa, increment exponent.
+ |9:
+ | jr ra
+ |. nop
+ |.endif
+ |.endmacro
+ |
+ |// Input CARG1. Output: CARG1. Temporaries: AT, TMP0, TMP1.
+ |->vm_sfi2d_1:
+ | sfi2d CARG1
+ |
+ |// Input CARG2. Output: CARG2. Temporaries: AT, TMP0, TMP1.
+ |->vm_sfi2d_2:
+ | sfi2d CARG2
+ |
+ |// Soft-float comparison. Equivalent to c.eq.d.
+ |// Input: CARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1.
+ |->vm_sfcmpeq:
+ |.if not FPU
+ | dsll AT, CARG1, 1
+ | dsll TMP0, CARG2, 1
+ | or TMP1, AT, TMP0
+ | beqz TMP1, >8 // Both args +-0: return 1.
+ |. lui TMP1, 0xffe0
+ | dsll TMP1, TMP1, 32
+ | sltu AT, TMP1, AT
+ | sltu TMP0, TMP1, TMP0
+ | or TMP1, AT, TMP0
+ | bnez TMP1, >9 // Either arg is NaN: return 0;
+ |. xor AT, CARG1, CARG2
+ | jr ra
+ |. sltiu CRET1, AT, 1 // Same values: return 1.
+ |8:
+ | jr ra
+ |. li CRET1, 1
+ |9:
+ | jr ra
+ |. li CRET1, 0
+ |.endif
+ |
+ |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d.
+ |// Input: CARG1, CARG2. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2.
+ |->vm_sfcmpult:
+ |.if not FPU
+ | b >1
+ |. li CRET2, 1
+ |.endif
+ |
+ |->vm_sfcmpolt:
+ |.if not FPU
+ | li CRET2, 0
+ |1:
+ | dsll AT, CARG1, 1
+ | dsll TMP0, CARG2, 1
+ | or TMP1, AT, TMP0
+ | beqz TMP1, >8 // Both args +-0: return 0.
+ |. lui TMP1, 0xffe0
+ | dsll TMP1, TMP1, 32
+ | sltu AT, TMP1, AT
+ | sltu TMP0, TMP1, TMP0
+ | or TMP1, AT, TMP0
+ | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
+ |. and AT, CARG1, CARG2
+ | bltz AT, >5 // Both args negative?
+ |. nop
+ | jr ra
+ |. slt CRET1, CARG1, CARG2
+ |5: // Swap conditions if both operands are negative.
+ | jr ra
+ |. slt CRET1, CARG2, CARG1
+ |8:
+ | jr ra
+ |. li CRET1, 0
+ |9:
+ | jr ra
+ |. move CRET1, CRET2
+ |.endif
+ |
+ |->vm_sfcmpogt:
+ |.if not FPU
+ | dsll AT, CARG2, 1
+ | dsll TMP0, CARG1, 1
+ | or TMP1, AT, TMP0
+ | beqz TMP1, >8 // Both args +-0: return 0.
+ |. lui TMP1, 0xffe0
+ | dsll TMP1, TMP1, 32
+ | sltu AT, TMP1, AT
+ | sltu TMP0, TMP1, TMP0
+ | or TMP1, AT, TMP0
+ | bnez TMP1, >9 // Either arg is NaN: return 0 or 1;
+ |. and AT, CARG2, CARG1
+ | bltz AT, >5 // Both args negative?
+ |. nop
+ | jr ra
+ |. slt CRET1, CARG2, CARG1
+ |5: // Swap conditions if both operands are negative.
+ | jr ra
+ |. slt CRET1, CARG1, CARG2
+ |8:
+ | jr ra
+ |. li CRET1, 0
+ |9:
+ | jr ra
+ |. li CRET1, 0
+ |.endif
+ |
+ |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a.
+ |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1.
+ |->vm_sfcmpolex:
+ |.if not FPU
+ | dsll AT, CARG1, 1
+ | dsll TMP0, CARG2, 1
+ | or TMP1, AT, TMP0
+ | beqz TMP1, >8 // Both args +-0: return 1.
+ |. lui TMP1, 0xffe0
+ | dsll TMP1, TMP1, 32
+ | sltu AT, TMP1, AT
+ | sltu TMP0, TMP1, TMP0
+ | or TMP1, AT, TMP0
+ | bnez TMP1, >9 // Either arg is NaN: return 0;
+ |. and AT, CARG1, CARG2
+ | xor AT, AT, TMP3
+ | bltz AT, >5 // Both args negative?
+ |. nop
+ | jr ra
+ |. slt CRET1, CARG2, CARG1
+ |5: // Swap conditions if both operands are negative.
+ | jr ra
+ |. slt CRET1, CARG1, CARG2
+ |8:
+ | jr ra
+ |. li CRET1, 1
+ |9:
+ | jr ra
+ |. li CRET1, 0
+ |.endif
+ |
+ |.macro sfmin_max, name, fpcall
+ |->vm_sf .. name:
+ |.if JIT and not FPU
+ | move TMP2, ra
+ | bal ->fpcall
+ |. nop
+ | move ra, TMP2
+ | move TMP0, CRET1
+ | move CRET1, CARG1
+ |.if MIPSR6
+ | selnez CRET1, CRET1, TMP0
+ | seleqz TMP0, CARG2, TMP0
+ | jr ra
+ |. or CRET1, CRET1, TMP0
+ |.else
+ | jr ra
+ |. movz CRET1, CARG2, TMP0
+ |.endif
+ |.endif
+ |.endmacro
+ |
+ | sfmin_max min, vm_sfcmpolt
+ | sfmin_max max, vm_sfcmpogt
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Miscellaneous functions --------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |.define NEXT_TAB, TAB:CARG1
+ |.define NEXT_IDX, CARG2
+ |.define NEXT_ASIZE, CARG3
+ |.define NEXT_NIL, CARG4
+ |.define NEXT_TMP0, r12
+ |.define NEXT_TMP1, r13
+ |.define NEXT_TMP2, r14
+ |.define NEXT_RES_VK, CRET1
+ |.define NEXT_RES_IDX, CRET2
+ |.define NEXT_RES_PTR, sp
+ |.define NEXT_RES_VAL, 0(sp)
+ |.define NEXT_RES_KEY, 8(sp)
+ |
+ |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
+ |// Next idx returned in CRET2.
+ |->vm_next:
+ |.if JIT and ENDIAN_LE
+ | lw NEXT_ASIZE, NEXT_TAB->asize
+ | ld NEXT_TMP0, NEXT_TAB->array
+ | li NEXT_NIL, LJ_TNIL
+ |1: // Traverse array part.
+ | sltu AT, NEXT_IDX, NEXT_ASIZE
+ | sll NEXT_TMP1, NEXT_IDX, 3
+ | beqz AT, >5
+ |. daddu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1
+ | li AT, LJ_TISNUM
+ | ld NEXT_TMP2, 0(NEXT_TMP1)
+ | dsll AT, AT, 47
+ | or NEXT_TMP1, NEXT_IDX, AT
+ | beq NEXT_TMP2, NEXT_NIL, <1
+ |. addiu NEXT_IDX, NEXT_IDX, 1
+ | sd NEXT_TMP2, NEXT_RES_VAL
+ | sd NEXT_TMP1, NEXT_RES_KEY
+ | move NEXT_RES_VK, NEXT_RES_PTR
+ | jr ra
+ |. move NEXT_RES_IDX, NEXT_IDX
+ |
+ |5: // Traverse hash part.
+ | subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE
+ | ld NODE:NEXT_RES_VK, NEXT_TAB->node
+ | sll NEXT_TMP2, NEXT_RES_IDX, 5
+ | lw NEXT_TMP0, NEXT_TAB->hmask
+ | sll AT, NEXT_RES_IDX, 3
+ | subu AT, NEXT_TMP2, AT
+ | daddu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT
+ |6:
+ | sltu AT, NEXT_TMP0, NEXT_RES_IDX
+ | bnez AT, >8
+ |. nop
+ | ld NEXT_TMP2, NODE:NEXT_RES_VK->val
+ | bne NEXT_TMP2, NEXT_NIL, >9
+ |. addiu NEXT_RES_IDX, NEXT_RES_IDX, 1
+ | // Skip holes in hash part.
+ | b <6
+ |. daddiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node)
+ |
+ |8: // End of iteration. Set the key to nil (not the value).
+ | sd NEXT_NIL, NEXT_RES_KEY
+ | move NEXT_RES_VK, NEXT_RES_PTR
+ |9:
+ | jr ra
+ |. addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- FFI helper functions -----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// Handler for callback functions. Callback slot number in r1, g in r2.
+ |->vm_ffi_callback:
+ |.if FFI
+ |.type CTSTATE, CTState, PC
+ | saveregs
+ | ld CTSTATE, GL:r2->ctype_state
+ | daddiu DISPATCH, r2, GG_G2DISP
+ | load_got lj_ccallback_enter
+ | sw r1, CTSTATE->cb.slot
+ | sd CARG1, CTSTATE->cb.gpr[0]
+ | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0]
+ | sd CARG2, CTSTATE->cb.gpr[1]
+ | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1]
+ | sd CARG3, CTSTATE->cb.gpr[2]
+ | .FPU sdc1 FARG3, CTSTATE->cb.fpr[2]
+ | sd CARG4, CTSTATE->cb.gpr[3]
+ | .FPU sdc1 FARG4, CTSTATE->cb.fpr[3]
+ | sd CARG5, CTSTATE->cb.gpr[4]
+ | .FPU sdc1 FARG5, CTSTATE->cb.fpr[4]
+ | sd CARG6, CTSTATE->cb.gpr[5]
+ | .FPU sdc1 FARG6, CTSTATE->cb.fpr[5]
+ | sd CARG7, CTSTATE->cb.gpr[6]
+ | .FPU sdc1 FARG7, CTSTATE->cb.fpr[6]
+ | sd CARG8, CTSTATE->cb.gpr[7]
+ | .FPU sdc1 FARG8, CTSTATE->cb.fpr[7]
+ | daddiu TMP0, sp, CFRAME_SPACE
+ | sd TMP0, CTSTATE->cb.stack
+ | sd r0, SAVE_PC // Any value outside of bytecode is ok.
+ | move CARG2, sp
+ | call_intern lj_ccallback_enter // (CTState *cts, void *cf)
+ |. move CARG1, CTSTATE
+ | // Returns lua_State *.
+ | ld BASE, L:CRET1->base
+ | ld RC, L:CRET1->top
+ | move L, CRET1
+ | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | ld LFUNC:RB, FRAME_FUNC(BASE)
+ | .FPU mtc1 TMP3, TOBIT
+ | li TISNIL, LJ_TNIL
+ | li TISNUM, LJ_TISNUM
+ | li_vmstate INTERP
+ | subu RC, RC, BASE
+ | cleartp LFUNC:RB
+ | st_vmstate
+ | .FPU cvt.d.s TOBIT, TOBIT
+ | ins_callt
+ |.endif
+ |
+ |->cont_ffi_callback: // Return from FFI callback.
+ |.if FFI
+ | load_got lj_ccallback_leave
+ | ld CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH)
+ | sd BASE, L->base
+ | sd RB, L->top
+ | sd L, CTSTATE->L
+ | move CARG2, RA
+ | call_intern lj_ccallback_leave // (CTState *cts, TValue *o)
+ |. move CARG1, CTSTATE
+ | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0]
+ | ld CRET1, CTSTATE->cb.gpr[0]
+ | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1]
+ | b ->vm_leave_unw
+ |. ld CRET2, CTSTATE->cb.gpr[1]
+ |.endif
+ |
+ |->vm_ffi_call: // Call C function via FFI.
+ | // Caveat: needs special frame unwinding, see below.
+ |.if FFI
+ | .type CCSTATE, CCallState, CARG1
+ | lw TMP1, CCSTATE->spadj
+ | lbu CARG2, CCSTATE->nsp
+ | move TMP2, sp
+ | dsubu sp, sp, TMP1
+ | sd ra, -8(TMP2)
+ | sll CARG2, CARG2, 3
+ | sd r16, -16(TMP2)
+ | sd CCSTATE, -24(TMP2)
+ | move r16, TMP2
+ | daddiu TMP1, CCSTATE, offsetof(CCallState, stack)
+ | move TMP2, sp
+ | beqz CARG2, >2
+ |. daddu TMP3, TMP1, CARG2
+ |1:
+ | ld TMP0, 0(TMP1)
+ | daddiu TMP1, TMP1, 8
+ | sltu AT, TMP1, TMP3
+ | sd TMP0, 0(TMP2)
+ | bnez AT, <1
+ |. daddiu TMP2, TMP2, 8
+ |2:
+ | ld CFUNCADDR, CCSTATE->func
+ | .FPU ldc1 FARG1, CCSTATE->gpr[0]
+ | ld CARG2, CCSTATE->gpr[1]
+ | .FPU ldc1 FARG2, CCSTATE->gpr[1]
+ | ld CARG3, CCSTATE->gpr[2]
+ | .FPU ldc1 FARG3, CCSTATE->gpr[2]
+ | ld CARG4, CCSTATE->gpr[3]
+ | .FPU ldc1 FARG4, CCSTATE->gpr[3]
+ | ld CARG5, CCSTATE->gpr[4]
+ | .FPU ldc1 FARG5, CCSTATE->gpr[4]
+ | ld CARG6, CCSTATE->gpr[5]
+ | .FPU ldc1 FARG6, CCSTATE->gpr[5]
+ | ld CARG7, CCSTATE->gpr[6]
+ | .FPU ldc1 FARG7, CCSTATE->gpr[6]
+ | ld CARG8, CCSTATE->gpr[7]
+ | .FPU ldc1 FARG8, CCSTATE->gpr[7]
+ | jalr CFUNCADDR
+ |. ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1.
+ | ld CCSTATE:TMP1, -24(r16)
+ | ld TMP2, -16(r16)
+ | ld ra, -8(r16)
+ | sd CRET1, CCSTATE:TMP1->gpr[0]
+ | sd CRET2, CCSTATE:TMP1->gpr[1]
+ |.if FPU
+ | sdc1 FRET1, CCSTATE:TMP1->fpr[0]
+ | sdc1 FRET2, CCSTATE:TMP1->fpr[1]
+ |.else
+ | sd CARG1, CCSTATE:TMP1->gpr[2] // 2nd FP struct field for soft-float.
+ |.endif
+ | move sp, r16
+ | jr ra
+ |. move r16, TMP2
+ |.endif
+ |// Note: vm_ffi_call must be the last function in this object file!
+ |
+ |//-----------------------------------------------------------------------
+}
+
+/* Generate the code for a single instruction. */
+static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+{
+ int vk = 0;
+ |=>defop:
+
+ switch (op) {
+
+ /* -- Comparison ops ---------------------------------------------------- */
+
+ /* Remember: all ops branch for a true comparison, fall through otherwise. */
+
+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+ | // RA = src1*8, RD = src2*8, JMP with RD = target
+ |.macro bc_comp, FRA, FRD, ARGRA, ARGRD, movop, fmovop, fcomp, sfcomp
+ | daddu RA, BASE, RA
+ | daddu RD, BASE, RD
+ | ld ARGRA, 0(RA)
+ | ld ARGRD, 0(RD)
+ | lhu TMP2, OFS_RD(PC)
+ | gettp CARG3, ARGRA
+ | gettp CARG4, ARGRD
+ | bne CARG3, TISNUM, >2
+ |. daddiu PC, PC, 4
+ | bne CARG4, TISNUM, >5
+ |. decode_RD4b TMP2
+ | sextw ARGRA, ARGRA
+ | sextw ARGRD, ARGRD
+ | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ | slt AT, CARG1, CARG2
+ | addu TMP2, TMP2, TMP3
+ |.if MIPSR6
+ | movop TMP2, TMP2, AT
+ |.else
+ | movop TMP2, r0, AT
+ |.endif
+ |1:
+ | daddu PC, PC, TMP2
+ | ins_next
+ |
+ |2: // RA is not an integer.
+ | sltiu AT, CARG3, LJ_TISNUM
+ | beqz AT, ->vmeta_comp
+ |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ | sltiu AT, CARG4, LJ_TISNUM
+ | beqz AT, >4
+ |. decode_RD4b TMP2
+ |.if FPU
+ | ldc1 FRA, 0(RA)
+ | ldc1 FRD, 0(RD)
+ |.endif
+ |3: // RA and RD are both numbers.
+ |.if FPU
+ |.if MIPSR6
+ | fcomp FTMP0, FTMP0, FTMP2
+ | addu TMP2, TMP2, TMP3
+ | mfc1 TMP3, FTMP0
+ | b <1
+ |. fmovop TMP2, TMP2, TMP3
+ |.else
+ | fcomp FTMP0, FTMP2
+ | addu TMP2, TMP2, TMP3
+ | b <1
+ |. fmovop TMP2, r0
+ |.endif
+ |.else
+ | bal sfcomp
+ |. addu TMP2, TMP2, TMP3
+ | b <1
+ |.if MIPSR6
+ |. movop TMP2, TMP2, CRET1
+ |.else
+ |. movop TMP2, r0, CRET1
+ |.endif
+ |.endif
+ |
+ |4: // RA is a number, RD is not a number.
+ | bne CARG4, TISNUM, ->vmeta_comp
+ | // RA is a number, RD is an integer. Convert RD to a number.
+ |.if FPU
+ |. lwc1 FRD, LO(RD)
+ | ldc1 FRA, 0(RA)
+ | b <3
+ |. cvt.d.w FRD, FRD
+ |.else
+ |.if "ARGRD" == "CARG1"
+ |. sextw CARG1, CARG1
+ | bal ->vm_sfi2d_1
+ |. nop
+ |.else
+ |. sextw CARG2, CARG2
+ | bal ->vm_sfi2d_2
+ |. nop
+ |.endif
+ | b <3
+ |. nop
+ |.endif
+ |
+ |5: // RA is an integer, RD is not an integer
+ | sltiu AT, CARG4, LJ_TISNUM
+ | beqz AT, ->vmeta_comp
+ |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ | // RA is an integer, RD is a number. Convert RA to a number.
+ |.if FPU
+ | lwc1 FRA, LO(RA)
+ | ldc1 FRD, 0(RD)
+ | b <3
+ | cvt.d.w FRA, FRA
+ |.else
+ |.if "ARGRA" == "CARG1"
+ | bal ->vm_sfi2d_1
+ |. sextw CARG1, CARG1
+ |.else
+ | bal ->vm_sfi2d_2
+ |. sextw CARG2, CARG2
+ |.endif
+ | b <3
+ |. nop
+ |.endif
+ |.endmacro
+ |
+ |.if MIPSR6
+ if (op == BC_ISLT) {
+ | bc_comp FTMP0, FTMP2, CARG1, CARG2, selnez, selnez, cmp.lt.d, ->vm_sfcmpolt
+ } else if (op == BC_ISGE) {
+ | bc_comp FTMP0, FTMP2, CARG1, CARG2, seleqz, seleqz, cmp.lt.d, ->vm_sfcmpolt
+ } else if (op == BC_ISLE) {
+ | bc_comp FTMP2, FTMP0, CARG2, CARG1, seleqz, seleqz, cmp.ult.d, ->vm_sfcmpult
+ } else {
+ | bc_comp FTMP2, FTMP0, CARG2, CARG1, selnez, selnez, cmp.ult.d, ->vm_sfcmpult
+ }
+ |.else
+ if (op == BC_ISLT) {
+ | bc_comp FTMP0, FTMP2, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt
+ } else if (op == BC_ISGE) {
+ | bc_comp FTMP0, FTMP2, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt
+ } else if (op == BC_ISLE) {
+ | bc_comp FTMP2, FTMP0, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult
+ } else {
+ | bc_comp FTMP2, FTMP0, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult
+ }
+ |.endif
+ break;
+
+ case BC_ISEQV: case BC_ISNEV:
+ vk = op == BC_ISEQV;
+ | // RA = src1*8, RD = src2*8, JMP with RD = target
+ | daddu RA, BASE, RA
+ | daddiu PC, PC, 4
+ | daddu RD, BASE, RD
+ | ld CARG1, 0(RA)
+ | lhu TMP2, -4+OFS_RD(PC)
+ | ld CARG2, 0(RD)
+ | gettp CARG3, CARG1
+ | gettp CARG4, CARG2
+ | sltu AT, TISNUM, CARG3
+ | sltu TMP1, TISNUM, CARG4
+ | or AT, AT, TMP1
+ if (vk) {
+ | beqz AT, ->BC_ISEQN_Z
+ } else {
+ | beqz AT, ->BC_ISNEN_Z
+ }
+ | // Either or both types are not numbers.
+ | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ |.if FFI
+ |. li AT, LJ_TCDATA
+ | beq CARG3, AT, ->vmeta_equal_cd
+ |.endif
+ | decode_RD4b TMP2
+ |.if FFI
+ | beq CARG4, AT, ->vmeta_equal_cd
+ |. nop
+ |.endif
+ | bne CARG1, CARG2, >2
+ |. addu TMP2, TMP2, TMP3
+ | // Tag and value are equal.
+ if (vk) {
+ |->BC_ISEQV_Z:
+ | daddu PC, PC, TMP2
+ }
+ |1:
+ | ins_next
+ |
+ |2: // Check if the tags are the same and it's a table or userdata.
+ | xor AT, CARG3, CARG4 // Same type?
+ | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata?
+ |.if MIPSR6
+ | seleqz TMP0, TMP0, AT
+ |.else
+ | movn TMP0, r0, AT
+ |.endif
+ if (vk) {
+ | beqz TMP0, <1
+ } else {
+ | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction.
+ }
+ | // Different tables or userdatas. Need to check __eq metamethod.
+ | // Field metatable must be at same offset for GCtab and GCudata!
+ |. cleartp TAB:TMP1, CARG1
+ | ld TAB:TMP3, TAB:TMP1->metatable
+ if (vk) {
+ | beqz TAB:TMP3, <1 // No metatable?
+ |. nop
+ | lbu TMP3, TAB:TMP3->nomm
+ | andi TMP3, TMP3, 1<<MM_eq
+ | bnez TMP3, >1 // Or 'no __eq' flag set?
+ } else {
+ | beqz TAB:TMP3,->BC_ISEQV_Z // No metatable?
+ |. nop
+ | lbu TMP3, TAB:TMP3->nomm
+ | andi TMP3, TMP3, 1<<MM_eq
+ | bnez TMP3, ->BC_ISEQV_Z // Or 'no __eq' flag set?
+ }
+ |. nop
+ | b ->vmeta_equal // Handle __eq metamethod.
+ |. li TMP0, 1-vk // ne = 0 or 1.
+ break;
+
+ case BC_ISEQS: case BC_ISNES:
+ vk = op == BC_ISEQS;
+ | // RA = src*8, RD = str_const*8 (~), JMP with RD = target
+ | daddu RA, BASE, RA
+ | daddiu PC, PC, 4
+ | ld CARG1, 0(RA)
+ | dsubu RD, KBASE, RD
+ | lhu TMP2, -4+OFS_RD(PC)
+ | ld CARG2, -8(RD) // KBASE-8-str_const*8
+ |.if FFI
+ | gettp TMP0, CARG1
+ | li AT, LJ_TCDATA
+ |.endif
+ | li TMP1, LJ_TSTR
+ | decode_RD4b TMP2
+ |.if FFI
+ | beq TMP0, AT, ->vmeta_equal_cd
+ |.endif
+ |. settp CARG2, TMP1
+ | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ | xor TMP1, CARG1, CARG2
+ | addu TMP2, TMP2, TMP3
+ |.if MIPSR6
+ if (vk) {
+ | seleqz TMP2, TMP2, TMP1
+ } else {
+ | selnez TMP2, TMP2, TMP1
+ }
+ |.else
+ if (vk) {
+ | movn TMP2, r0, TMP1
+ } else {
+ | movz TMP2, r0, TMP1
+ }
+ |.endif
+ | daddu PC, PC, TMP2
+ | ins_next
+ break;
+
+ case BC_ISEQN: case BC_ISNEN:
+ vk = op == BC_ISEQN;
+ | // RA = src*8, RD = num_const*8, JMP with RD = target
+ | daddu RA, BASE, RA
+ | daddu RD, KBASE, RD
+ | ld CARG1, 0(RA)
+ | ld CARG2, 0(RD)
+ | lhu TMP2, OFS_RD(PC)
+ | gettp CARG3, CARG1
+ | gettp CARG4, CARG2
+ | daddiu PC, PC, 4
+ | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ if (vk) {
+ |->BC_ISEQN_Z:
+ } else {
+ |->BC_ISNEN_Z:
+ }
+ | bne CARG3, TISNUM, >3
+ |. decode_RD4b TMP2
+ | bne CARG4, TISNUM, >6
+ |. addu TMP2, TMP2, TMP3
+ | xor AT, CARG1, CARG2
+ |.if MIPSR6
+ if (vk) {
+ | seleqz TMP2, TMP2, AT
+ |1:
+ | daddu PC, PC, TMP2
+ |2:
+ } else {
+ | selnez TMP2, TMP2, AT
+ |1:
+ |2:
+ | daddu PC, PC, TMP2
+ }
+ |.else
+ if (vk) {
+ | movn TMP2, r0, AT
+ |1:
+ | daddu PC, PC, TMP2
+ |2:
+ } else {
+ | movz TMP2, r0, AT
+ |1:
+ |2:
+ | daddu PC, PC, TMP2
+ }
+ |.endif
+ | ins_next
+ |
+ |3: // RA is not an integer.
+ | sltu AT, CARG3, TISNUM
+ |.if FFI
+ | beqz AT, >8
+ |.else
+ | beqz AT, <2
+ |.endif
+ |. addu TMP2, TMP2, TMP3
+ | sltu AT, CARG4, TISNUM
+ |.if FPU
+ | ldc1 FTMP0, 0(RA)
+ | ldc1 FTMP2, 0(RD)
+ |.endif
+ | beqz AT, >5
+ |. nop
+ |4: // RA and RD are both numbers.
+ |.if FPU
+ |.if MIPSR6
+ | cmp.eq.d FTMP0, FTMP0, FTMP2
+ | dmfc1 TMP1, FTMP0
+ | b <1
+ if (vk) {
+ |. selnez TMP2, TMP2, TMP1
+ } else {
+ |. seleqz TMP2, TMP2, TMP1
+ }
+ |.else
+ | c.eq.d FTMP0, FTMP2
+ | b <1
+ if (vk) {
+ |. movf TMP2, r0
+ } else {
+ |. movt TMP2, r0
+ }
+ |.endif
+ |.else
+ | bal ->vm_sfcmpeq
+ |. nop
+ | b <1
+ |.if MIPSR6
+ if (vk) {
+ |. selnez TMP2, TMP2, CRET1
+ } else {
+ |. seleqz TMP2, TMP2, CRET1
+ }
+ |.else
+ if (vk) {
+ |. movz TMP2, r0, CRET1
+ } else {
+ |. movn TMP2, r0, CRET1
+ }
+ |.endif
+ |.endif
+ |
+ |5: // RA is a number, RD is not a number.
+ |.if FFI
+ | bne CARG4, TISNUM, >9
+ |.else
+ | bne CARG4, TISNUM, <2
+ |.endif
+ | // RA is a number, RD is an integer. Convert RD to a number.
+ |.if FPU
+ |. lwc1 FTMP2, LO(RD)
+ | b <4
+ |. cvt.d.w FTMP2, FTMP2
+ |.else
+ |. sextw CARG2, CARG2
+ | bal ->vm_sfi2d_2
+ |. nop
+ | b <4
+ |. nop
+ |.endif
+ |
+ |6: // RA is an integer, RD is not an integer
+ | sltu AT, CARG4, TISNUM
+ |.if FFI
+ | beqz AT, >9
+ |.else
+ | beqz AT, <2
+ |.endif
+ | // RA is an integer, RD is a number. Convert RA to a number.
+ |.if FPU
+ |. lwc1 FTMP0, LO(RA)
+ | ldc1 FTMP2, 0(RD)
+ | b <4
+ | cvt.d.w FTMP0, FTMP0
+ |.else
+ |. sextw CARG1, CARG1
+ | bal ->vm_sfi2d_1
+ |. nop
+ | b <4
+ |. nop
+ |.endif
+ |
+ |.if FFI
+ |8:
+ | li AT, LJ_TCDATA
+ | bne CARG3, AT, <2
+ |. nop
+ | b ->vmeta_equal_cd
+ |. nop
+ |9:
+ | li AT, LJ_TCDATA
+ | bne CARG4, AT, <2
+ |. nop
+ | b ->vmeta_equal_cd
+ |. nop
+ |.endif
+ break;
+
+ case BC_ISEQP: case BC_ISNEP:
+ vk = op == BC_ISEQP;
+ | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
+ | daddu RA, BASE, RA
+ | srl TMP1, RD, 3
+ | ld TMP0, 0(RA)
+ | lhu TMP2, OFS_RD(PC)
+ | not TMP1, TMP1
+ | gettp TMP0, TMP0
+ | daddiu PC, PC, 4
+ |.if FFI
+ | li AT, LJ_TCDATA
+ | beq TMP0, AT, ->vmeta_equal_cd
+ |.endif
+ |. xor TMP0, TMP0, TMP1
+ | decode_RD4b TMP2
+ | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ | addu TMP2, TMP2, TMP3
+ |.if MIPSR6
+ if (vk) {
+ | seleqz TMP2, TMP2, TMP0
+ } else {
+ | selnez TMP2, TMP2, TMP0
+ }
+ |.else
+ if (vk) {
+ | movn TMP2, r0, TMP0
+ } else {
+ | movz TMP2, r0, TMP0
+ }
+ |.endif
+ | daddu PC, PC, TMP2
+ | ins_next
+ break;
+
+ /* -- Unary test and copy ops ------------------------------------------- */
+
+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
+ | // RA = dst*8 or unused, RD = src*8, JMP with RD = target
+ | daddu RD, BASE, RD
+ | lhu TMP2, OFS_RD(PC)
+ | ld TMP0, 0(RD)
+ | daddiu PC, PC, 4
+ | gettp TMP0, TMP0
+ | sltiu TMP0, TMP0, LJ_TISTRUECOND
+ if (op == BC_IST || op == BC_ISF) {
+ | decode_RD4b TMP2
+ | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ | addu TMP2, TMP2, TMP3
+ |.if MIPSR6
+ if (op == BC_IST) {
+ | selnez TMP2, TMP2, TMP0;
+ } else {
+ | seleqz TMP2, TMP2, TMP0;
+ }
+ |.else
+ if (op == BC_IST) {
+ | movz TMP2, r0, TMP0
+ } else {
+ | movn TMP2, r0, TMP0
+ }
+ |.endif
+ | daddu PC, PC, TMP2
+ } else {
+ | ld CRET1, 0(RD)
+ if (op == BC_ISTC) {
+ | beqz TMP0, >1
+ } else {
+ | bnez TMP0, >1
+ }
+ |. daddu RA, BASE, RA
+ | decode_RD4b TMP2
+ | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ | addu TMP2, TMP2, TMP3
+ | sd CRET1, 0(RA)
+ | daddu PC, PC, TMP2
+ |1:
+ }
+ | ins_next
+ break;
+
+ case BC_ISTYPE:
+ | // RA = src*8, RD = -type*8
+ | daddu TMP2, BASE, RA
+ | srl TMP1, RD, 3
+ | ld TMP0, 0(TMP2)
+ | ins_next1
+ | gettp TMP0, TMP0
+ | daddu AT, TMP0, TMP1
+ | bnez AT, ->vmeta_istype
+ |. ins_next2
+ break;
+ case BC_ISNUM:
+ | // RA = src*8, RD = -(TISNUM-1)*8
+ | daddu TMP2, BASE, RA
+ | ld TMP0, 0(TMP2)
+ | ins_next1
+ | checknum TMP0, ->vmeta_istype
+ |. ins_next2
+ break;
+
+ /* -- Unary ops --------------------------------------------------------- */
+
+ case BC_MOV:
+ | // RA = dst*8, RD = src*8
+ | daddu RD, BASE, RD
+ | daddu RA, BASE, RA
+ | ld CRET1, 0(RD)
+ | ins_next1
+ | sd CRET1, 0(RA)
+ | ins_next2
+ break;
+ case BC_NOT:
+ | // RA = dst*8, RD = src*8
+ | daddu RD, BASE, RD
+ | daddu RA, BASE, RA
+ | ld TMP0, 0(RD)
+ | li AT, LJ_TTRUE
+ | gettp TMP0, TMP0
+ | sltu TMP0, AT, TMP0
+ | addiu TMP0, TMP0, 1
+ | dsll TMP0, TMP0, 47
+ | not TMP0, TMP0
+ | ins_next1
+ | sd TMP0, 0(RA)
+ | ins_next2
+ break;
+ case BC_UNM:
+ | // RA = dst*8, RD = src*8
+ | daddu RB, BASE, RD
+ | ld CARG1, 0(RB)
+ | daddu RA, BASE, RA
+ | gettp CARG3, CARG1
+ | bne CARG3, TISNUM, >2
+ |. lui TMP1, 0x8000
+ | sextw CARG1, CARG1
+ | beq CARG1, TMP1, ->vmeta_unm // Meta handler deals with -2^31.
+ |. negu CARG1, CARG1
+ | zextw CARG1, CARG1
+ | settp CARG1, TISNUM
+ |1:
+ | ins_next1
+ | sd CARG1, 0(RA)
+ | ins_next2
+ |2:
+ | sltiu AT, CARG3, LJ_TISNUM
+ | beqz AT, ->vmeta_unm
+ |. dsll TMP1, TMP1, 32
+ | b <1
+ |. xor CARG1, CARG1, TMP1
+ break;
+ case BC_LEN:
+ | // RA = dst*8, RD = src*8
+ | daddu CARG2, BASE, RD
+ | daddu RA, BASE, RA
+ | ld TMP0, 0(CARG2)
+ | gettp TMP1, TMP0
+ | daddiu AT, TMP1, -LJ_TSTR
+ | bnez AT, >2
+ |. cleartp STR:CARG1, TMP0
+ | lw CRET1, STR:CARG1->len
+ |1:
+ | settp CRET1, TISNUM
+ | ins_next1
+ | sd CRET1, 0(RA)
+ | ins_next2
+ |2:
+ | daddiu AT, TMP1, -LJ_TTAB
+ | bnez AT, ->vmeta_len
+ |. nop
+#if LJ_52
+ | ld TAB:TMP2, TAB:CARG1->metatable
+ | bnez TAB:TMP2, >9
+ |. nop
+ |3:
+#endif
+ |->BC_LEN_Z:
+ | load_got lj_tab_len
+ | call_intern lj_tab_len // (GCtab *t)
+ |. nop
+ | // Returns uint32_t (but less than 2^31).
+ | b <1
+ |. nop
+#if LJ_52
+ |9:
+ | lbu TMP0, TAB:TMP2->nomm
+ | andi TMP0, TMP0, 1<<MM_len
+ | bnez TMP0, <3 // 'no __len' flag set: done.
+ |. nop
+ | b ->vmeta_len
+ |. nop
+#endif
+ break;
+
+ /* -- Binary ops -------------------------------------------------------- */
+
+ |.macro fpmod, a, b, c
+ | bal ->vm_floor // floor(b/c)
+ |. div.d FARG1, b, c
+ | mul.d a, FRET1, c
+ | sub.d a, b, a // b - floor(b/c)*c
+ |.endmacro
+
+ |.macro sfpmod
+ | daddiu sp, sp, -16
+ |
+ | load_got __divdf3
+ | sd CARG1, 0(sp)
+ | call_extern
+ |. sd CARG2, 8(sp)
+ |
+ | load_got floor
+ | call_extern
+ |. move CARG1, CRET1
+ |
+ | load_got __muldf3
+ | move CARG1, CRET1
+ | call_extern
+ |. ld CARG2, 8(sp)
+ |
+ | load_got __subdf3
+ | ld CARG1, 0(sp)
+ | call_extern
+ |. move CARG2, CRET1
+ |
+ | daddiu sp, sp, 16
+ |.endmacro
+
+ |.macro ins_arithpre, label
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+ | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
+ ||switch (vk) {
+ ||case 0:
+ | decode_RB8a RB, INS
+ | decode_RB8b RB
+ | decode_RDtoRC8 RC, RD
+ | // RA = dst*8, RB = src1*8, RC = num_const*8
+ | daddu RB, BASE, RB
+ |.if "label" ~= "none"
+ | b label
+ |.endif
+ |. daddu RC, KBASE, RC
+ || break;
+ ||case 1:
+ | decode_RB8a RC, INS
+ | decode_RB8b RC
+ | decode_RDtoRC8 RB, RD
+ | // RA = dst*8, RB = num_const*8, RC = src1*8
+ | daddu RC, BASE, RC
+ |.if "label" ~= "none"
+ | b label
+ |.endif
+ |. daddu RB, KBASE, RB
+ || break;
+ ||default:
+ | decode_RB8a RB, INS
+ | decode_RB8b RB
+ | decode_RDtoRC8 RC, RD
+ | // RA = dst*8, RB = src1*8, RC = src2*8
+ | daddu RB, BASE, RB
+ |.if "label" ~= "none"
+ | b label
+ |.endif
+ |. daddu RC, BASE, RC
+ || break;
+ ||}
+ |.endmacro
+ |
+ |.macro ins_arith, intins, fpins, fpcall, label
+ | ins_arithpre none
+ |
+ |.if "label" ~= "none"
+ |label:
+ |.endif
+ |
+ |// Used in 5.
+ | ld CARG1, 0(RB)
+ | ld CARG2, 0(RC)
+ | gettp TMP0, CARG1
+ | gettp TMP1, CARG2
+ |
+ |.if "intins" ~= "div"
+ |
+ | // Check for two integers.
+ | sextw CARG3, CARG1
+ | bne TMP0, TISNUM, >5
+ |. sextw CARG4, CARG2
+ | bne TMP1, TISNUM, >5
+ |
+ |.if "intins" == "addu"
+ |. intins CRET1, CARG3, CARG4
+ | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow.
+ | xor TMP2, CRET1, CARG4
+ | and TMP1, TMP1, TMP2
+ | bltz TMP1, ->vmeta_arith
+ |. daddu RA, BASE, RA
+ |.elif "intins" == "subu"
+ |. intins CRET1, CARG3, CARG4
+ | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow.
+ | xor TMP2, CARG3, CARG4
+ | and TMP1, TMP1, TMP2
+ | bltz TMP1, ->vmeta_arith
+ |. daddu RA, BASE, RA
+ |.elif "intins" == "mult"
+ |.if MIPSR6
+ |. nop
+ | mul CRET1, CARG3, CARG4
+ | muh TMP2, CARG3, CARG4
+ |.else
+ |. intins CARG3, CARG4
+ | mflo CRET1
+ | mfhi TMP2
+ |.endif
+ | sra TMP1, CRET1, 31
+ | bne TMP1, TMP2, ->vmeta_arith
+ |. daddu RA, BASE, RA
+ |.else
+ |. load_got lj_vm_modi
+ | beqz CARG4, ->vmeta_arith
+ |. daddu RA, BASE, RA
+ | move CARG1, CARG3
+ | call_extern
+ |. move CARG2, CARG4
+ |.endif
+ |
+ | zextw CRET1, CRET1
+ | settp CRET1, TISNUM
+ | ins_next1
+ | sd CRET1, 0(RA)
+ |3:
+ | ins_next2
+ |
+ |.endif
+ |
+ |5: // Check for two numbers.
+ | .FPU ldc1 FTMP0, 0(RB)
+ | sltu AT, TMP0, TISNUM
+ | sltu TMP0, TMP1, TISNUM
+ | .FPU ldc1 FTMP2, 0(RC)
+ | and AT, AT, TMP0
+ | beqz AT, ->vmeta_arith
+ |. daddu RA, BASE, RA
+ |
+ |.if FPU
+ | fpins FRET1, FTMP0, FTMP2
+ |.elif "fpcall" == "sfpmod"
+ | sfpmod
+ |.else
+ | load_got fpcall
+ | call_extern
+ |. nop
+ |.endif
+ |
+ | ins_next1
+ |.if "intins" ~= "div"
+ | b <3
+ |.endif
+ |.if FPU
+ |. sdc1 FRET1, 0(RA)
+ |.else
+ |. sd CRET1, 0(RA)
+ |.endif
+ |.if "intins" == "div"
+ | ins_next2
+ |.endif
+ |
+ |.endmacro
+
+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
+ | ins_arith addu, add.d, __adddf3, none
+ break;
+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
+ | ins_arith subu, sub.d, __subdf3, none
+ break;
+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
+ | ins_arith mult, mul.d, __muldf3, none
+ break;
+ case BC_DIVVN:
+ | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z
+ break;
+ case BC_DIVNV: case BC_DIVVV:
+ | ins_arithpre ->BC_DIVVN_Z
+ break;
+ case BC_MODVN:
+ | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z
+ break;
+ case BC_MODNV: case BC_MODVV:
+ | ins_arithpre ->BC_MODVN_Z
+ break;
+ case BC_POW:
+ | ins_arithpre none
+ | ld CARG1, 0(RB)
+ | ld CARG2, 0(RC)
+ | gettp TMP0, CARG1
+ | gettp TMP1, CARG2
+ | sltiu TMP0, TMP0, LJ_TISNUM
+ | sltiu TMP1, TMP1, LJ_TISNUM
+ | and AT, TMP0, TMP1
+ | load_got pow
+ | beqz AT, ->vmeta_arith
+ |. daddu RA, BASE, RA
+ |.if FPU
+ | ldc1 FARG1, 0(RB)
+ | ldc1 FARG2, 0(RC)
+ |.endif
+ | call_extern
+ |. nop
+ | ins_next1
+ |.if FPU
+ | sdc1 FRET1, 0(RA)
+ |.else
+ | sd CRET1, 0(RA)
+ |.endif
+ | ins_next2
+ break;
+
+ case BC_CAT:
+ | // RA = dst*8, RB = src_start*8, RC = src_end*8
+ | decode_RB8a RB, INS
+ | decode_RB8b RB
+ | decode_RDtoRC8 RC, RD
+ | dsubu CARG3, RC, RB
+ | sd BASE, L->base
+ | daddu CARG2, BASE, RC
+ | move MULTRES, RB
+ |->BC_CAT_Z:
+ | load_got lj_meta_cat
+ | srl CARG3, CARG3, 3
+ | sd PC, SAVE_PC
+ | call_intern lj_meta_cat // (lua_State *L, TValue *top, int left)
+ |. move CARG1, L
+ | // Returns NULL (finished) or TValue * (metamethod).
+ | bnez CRET1, ->vmeta_binop
+ |. ld BASE, L->base
+ | daddu RB, BASE, MULTRES
+ | ld CRET1, 0(RB)
+ | daddu RA, BASE, RA
+ | ins_next1
+ | sd CRET1, 0(RA)
+ | ins_next2
+ break;
+
+ /* -- Constant ops ------------------------------------------------------ */
+
+ case BC_KSTR:
+ | // RA = dst*8, RD = str_const*8 (~)
+ | dsubu TMP1, KBASE, RD
+ | ins_next1
+ | li TMP2, LJ_TSTR
+ | ld TMP0, -8(TMP1) // KBASE-8-str_const*8
+ | daddu RA, BASE, RA
+ | settp TMP0, TMP2
+ | sd TMP0, 0(RA)
+ | ins_next2
+ break;
+ case BC_KCDATA:
+ |.if FFI
+ | // RA = dst*8, RD = cdata_const*8 (~)
+ | dsubu TMP1, KBASE, RD
+ | ins_next1
+ | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8
+ | li TMP2, LJ_TCDATA
+ | daddu RA, BASE, RA
+ | settp TMP0, TMP2
+ | sd TMP0, 0(RA)
+ | ins_next2
+ |.endif
+ break;
+ case BC_KSHORT:
+ | // RA = dst*8, RD = int16_literal*8
+ | sra RD, INS, 16
+ | daddu RA, BASE, RA
+ | zextw RD, RD
+ | ins_next1
+ | settp RD, TISNUM
+ | sd RD, 0(RA)
+ | ins_next2
+ break;
+ case BC_KNUM:
+ | // RA = dst*8, RD = num_const*8
+ | daddu RD, KBASE, RD
+ | daddu RA, BASE, RA
+ | ld CRET1, 0(RD)
+ | ins_next1
+ | sd CRET1, 0(RA)
+ | ins_next2
+ break;
+ case BC_KPRI:
+ | // RA = dst*8, RD = primitive_type*8 (~)
+ | daddu RA, BASE, RA
+ | dsll TMP0, RD, 44
+ | not TMP0, TMP0
+ | ins_next1
+ | sd TMP0, 0(RA)
+ | ins_next2
+ break;
+ case BC_KNIL:
+ | // RA = base*8, RD = end*8
+ | daddu RA, BASE, RA
+ | sd TISNIL, 0(RA)
+ | daddiu RA, RA, 8
+ | daddu RD, BASE, RD
+ |1:
+ | sd TISNIL, 0(RA)
+ | slt AT, RA, RD
+ | bnez AT, <1
+ |. daddiu RA, RA, 8
+ | ins_next_
+ break;
+
+ /* -- Upvalue and function ops ------------------------------------------ */
+
+ case BC_UGET:
+ | // RA = dst*8, RD = uvnum*8
+ | ld LFUNC:RB, FRAME_FUNC(BASE)
+ | daddu RA, BASE, RA
+ | cleartp LFUNC:RB
+ | daddu RD, RD, LFUNC:RB
+ | ld UPVAL:RB, LFUNC:RD->uvptr
+ | ins_next1
+ | ld TMP1, UPVAL:RB->v
+ | ld CRET1, 0(TMP1)
+ | sd CRET1, 0(RA)
+ | ins_next2
+ break;
+ case BC_USETV:
+ | // RA = uvnum*8, RD = src*8
+ | ld LFUNC:RB, FRAME_FUNC(BASE)
+ | daddu RD, BASE, RD
+ | cleartp LFUNC:RB
+ | daddu RA, RA, LFUNC:RB
+ | ld UPVAL:RB, LFUNC:RA->uvptr
+ | ld CRET1, 0(RD)
+ | lbu TMP3, UPVAL:RB->marked
+ | ld CARG2, UPVAL:RB->v
+ | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
+ | lbu TMP0, UPVAL:RB->closed
+ | gettp TMP2, CRET1
+ | sd CRET1, 0(CARG2)
+ | li AT, LJ_GC_BLACK|1
+ | or TMP3, TMP3, TMP0
+ | beq TMP3, AT, >2 // Upvalue is closed and black?
+ |. daddiu TMP2, TMP2, -(LJ_TNUMX+1)
+ |1:
+ | ins_next
+ |
+ |2: // Check if new value is collectable.
+ | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1)
+ | beqz AT, <1 // tvisgcv(v)
+ |. cleartp GCOBJ:CRET1, CRET1
+ | lbu TMP3, GCOBJ:CRET1->gch.marked
+ | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
+ | beqz TMP3, <1
+ |. load_got lj_gc_barrieruv
+ | // Crossed a write barrier. Move the barrier forward.
+ | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ |. daddiu CARG1, DISPATCH, GG_DISP2G
+ | b <1
+ |. nop
+ break;
+ case BC_USETS:
+ | // RA = uvnum*8, RD = str_const*8 (~)
+ | ld LFUNC:RB, FRAME_FUNC(BASE)
+ | dsubu TMP1, KBASE, RD
+ | cleartp LFUNC:RB
+ | daddu RA, RA, LFUNC:RB
+ | ld UPVAL:RB, LFUNC:RA->uvptr
+ | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8
+ | lbu TMP2, UPVAL:RB->marked
+ | ld CARG2, UPVAL:RB->v
+ | lbu TMP3, STR:TMP1->marked
+ | andi AT, TMP2, LJ_GC_BLACK // isblack(uv)
+ | lbu TMP2, UPVAL:RB->closed
+ | li TMP0, LJ_TSTR
+ | settp TMP1, TMP0
+ | bnez AT, >2
+ |. sd TMP1, 0(CARG2)
+ |1:
+ | ins_next
+ |
+ |2: // Check if string is white and ensure upvalue is closed.
+ | beqz TMP2, <1
+ |. andi AT, TMP3, LJ_GC_WHITES // iswhite(str)
+ | beqz AT, <1
+ |. load_got lj_gc_barrieruv
+ | // Crossed a write barrier. Move the barrier forward.
+ | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ |. daddiu CARG1, DISPATCH, GG_DISP2G
+ | b <1
+ |. nop
+ break;
+ case BC_USETN:
+ | // RA = uvnum*8, RD = num_const*8
+ | ld LFUNC:RB, FRAME_FUNC(BASE)
+ | daddu RD, KBASE, RD
+ | cleartp LFUNC:RB
+ | daddu RA, RA, LFUNC:RB
+ | ld UPVAL:RB, LFUNC:RA->uvptr
+ | ld CRET1, 0(RD)
+ | ld TMP1, UPVAL:RB->v
+ | ins_next1
+ | sd CRET1, 0(TMP1)
+ | ins_next2
+ break;
+ case BC_USETP:
+ | // RA = uvnum*8, RD = primitive_type*8 (~)
+ | ld LFUNC:RB, FRAME_FUNC(BASE)
+ | dsll TMP0, RD, 44
+ | cleartp LFUNC:RB
+ | daddu RA, RA, LFUNC:RB
+ | not TMP0, TMP0
+ | ld UPVAL:RB, LFUNC:RA->uvptr
+ | ins_next1
+ | ld TMP1, UPVAL:RB->v
+ | sd TMP0, 0(TMP1)
+ | ins_next2
+ break;
+
+ case BC_UCLO:
+ | // RA = level*8, RD = target
+ | ld TMP2, L->openupval
+ | branch_RD // Do this first since RD is not saved.
+ | load_got lj_func_closeuv
+ | sd BASE, L->base
+ | beqz TMP2, >1
+ |. move CARG1, L
+ | call_intern lj_func_closeuv // (lua_State *L, TValue *level)
+ |. daddu CARG2, BASE, RA
+ | ld BASE, L->base
+ |1:
+ | ins_next
+ break;
+
+ case BC_FNEW:
+ | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
+ | load_got lj_func_newL_gc
+ | dsubu TMP1, KBASE, RD
+ | ld CARG3, FRAME_FUNC(BASE)
+ | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8
+ | sd BASE, L->base
+ | sd PC, SAVE_PC
+ | cleartp CARG3
+ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
+ | call_intern lj_func_newL_gc
+ |. move CARG1, L
+ | // Returns GCfuncL *.
+ | li TMP0, LJ_TFUNC
+ | ld BASE, L->base
+ | ins_next1
+ | settp CRET1, TMP0
+ | daddu RA, BASE, RA
+ | sd CRET1, 0(RA)
+ | ins_next2
+ break;
+
+ /* -- Table ops --------------------------------------------------------- */
+
+ case BC_TNEW:
+ case BC_TDUP:
+ | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
+ | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH)
+ | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
+ | sd BASE, L->base
+ | sd PC, SAVE_PC
+ | sltu AT, TMP0, TMP1
+ | beqz AT, >5
+ |1:
+ if (op == BC_TNEW) {
+ | load_got lj_tab_new
+ | srl CARG2, RD, 3
+ | andi CARG2, CARG2, 0x7ff
+ | li TMP0, 0x801
+ | addiu AT, CARG2, -0x7ff
+ | srl CARG3, RD, 14
+ |.if MIPSR6
+ | seleqz TMP0, TMP0, AT
+ | selnez CARG2, CARG2, AT
+ | or CARG2, CARG2, TMP0
+ |.else
+ | movz CARG2, TMP0, AT
+ |.endif
+ | // (lua_State *L, int32_t asize, uint32_t hbits)
+ | call_intern lj_tab_new
+ |. move CARG1, L
+ | // Returns Table *.
+ } else {
+ | load_got lj_tab_dup
+ | dsubu TMP1, KBASE, RD
+ | move CARG1, L
+ | call_intern lj_tab_dup // (lua_State *L, Table *kt)
+ |. ld CARG2, -8(TMP1) // KBASE-8-str_const*8
+ | // Returns Table *.
+ }
+ | li TMP0, LJ_TTAB
+ | ld BASE, L->base
+ | ins_next1
+ | daddu RA, BASE, RA
+ | settp CRET1, TMP0
+ | sd CRET1, 0(RA)
+ | ins_next2
+ |5:
+ | load_got lj_gc_step_fixtop
+ | move MULTRES, RD
+ | call_intern lj_gc_step_fixtop // (lua_State *L)
+ |. move CARG1, L
+ | b <1
+ |. move RD, MULTRES
+ break;
+
+ case BC_GGET:
+ | // RA = dst*8, RD = str_const*8 (~)
+ case BC_GSET:
+ | // RA = src*8, RD = str_const*8 (~)
+ | ld LFUNC:TMP2, FRAME_FUNC(BASE)
+ | dsubu TMP1, KBASE, RD
+ | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8
+ | cleartp LFUNC:TMP2
+ | ld TAB:RB, LFUNC:TMP2->env
+ if (op == BC_GGET) {
+ | b ->BC_TGETS_Z
+ } else {
+ | b ->BC_TSETS_Z
+ }
+ |. daddu RA, BASE, RA
+ break;
+
+ case BC_TGETV:
+ | // RA = dst*8, RB = table*8, RC = key*8
+ | decode_RB8a RB, INS
+ | decode_RB8b RB
+ | decode_RDtoRC8 RC, RD
+ | daddu CARG2, BASE, RB
+ | daddu CARG3, BASE, RC
+ | ld TAB:RB, 0(CARG2)
+ | ld TMP2, 0(CARG3)
+ | daddu RA, BASE, RA
+ | checktab TAB:RB, ->vmeta_tgetv
+ | gettp TMP3, TMP2
+ | bne TMP3, TISNUM, >5 // Integer key?
+ |. lw TMP0, TAB:RB->asize
+ | sextw TMP2, TMP2
+ | ld TMP1, TAB:RB->array
+ | sltu AT, TMP2, TMP0
+ | sll TMP2, TMP2, 3
+ | beqz AT, ->vmeta_tgetv // Integer key and in array part?
+ |. daddu TMP2, TMP1, TMP2
+ | ld AT, 0(TMP2)
+ | beq AT, TISNIL, >2
+ |. ld CRET1, 0(TMP2)
+ |1:
+ | ins_next1
+ | sd CRET1, 0(RA)
+ | ins_next2
+ |
+ |2: // Check for __index if table value is nil.
+ | ld TAB:TMP2, TAB:RB->metatable
+ | beqz TAB:TMP2, <1 // No metatable: done.
+ |. nop
+ | lbu TMP0, TAB:TMP2->nomm
+ | andi TMP0, TMP0, 1<<MM_index
+ | bnez TMP0, <1 // 'no __index' flag set: done.
+ |. nop
+ | b ->vmeta_tgetv
+ |. nop
+ |
+ |5:
+ | li AT, LJ_TSTR
+ | bne TMP3, AT, ->vmeta_tgetv
+ |. cleartp RC, TMP2
+ | b ->BC_TGETS_Z // String key?
+ |. nop
+ break;
+ case BC_TGETS:
+ | // RA = dst*8, RB = table*8, RC = str_const*8 (~)
+ | decode_RB8a RB, INS
+ | decode_RB8b RB
+ | decode_RC8a RC, INS
+ | daddu CARG2, BASE, RB
+ | decode_RC8b RC
+ | ld TAB:RB, 0(CARG2)
+ | dsubu CARG3, KBASE, RC
+ | daddu RA, BASE, RA
+ | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8
+ | checktab TAB:RB, ->vmeta_tgets1
+ |->BC_TGETS_Z:
+ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
+ | lw TMP0, TAB:RB->hmask
+ | lw TMP1, STR:RC->sid
+ | ld NODE:TMP2, TAB:RB->node
+ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
+ | sll TMP0, TMP1, 5
+ | sll TMP1, TMP1, 3
+ | subu TMP1, TMP0, TMP1
+ | li TMP3, LJ_TSTR
+ | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
+ | settp STR:RC, TMP3 // Tagged key to look for.
+ |1:
+ | ld CARG1, NODE:TMP2->key
+ | ld CRET1, NODE:TMP2->val
+ | ld NODE:TMP1, NODE:TMP2->next
+ | bne CARG1, RC, >4
+ |. ld TAB:TMP3, TAB:RB->metatable
+ | beq CRET1, TISNIL, >5 // Key found, but nil value?
+ |. nop
+ |3:
+ | ins_next1
+ | sd CRET1, 0(RA)
+ | ins_next2
+ |
+ |4: // Follow hash chain.
+ | bnez NODE:TMP1, <1
+ |. move NODE:TMP2, NODE:TMP1
+ | // End of hash chain: key not found, nil result.
+ |
+ |5: // Check for __index if table value is nil.
+ | beqz TAB:TMP3, <3 // No metatable: done.
+ |. move CRET1, TISNIL
+ | lbu TMP0, TAB:TMP3->nomm
+ | andi TMP0, TMP0, 1<<MM_index
+ | bnez TMP0, <3 // 'no __index' flag set: done.
+ |. nop
+ | b ->vmeta_tgets
+ |. nop
+ break;
+ case BC_TGETB:
+ | // RA = dst*8, RB = table*8, RC = index*8
+ | decode_RB8a RB, INS
+ | decode_RB8b RB
+ | daddu CARG2, BASE, RB
+ | decode_RDtoRC8 RC, RD
+ | ld TAB:RB, 0(CARG2)
+ | daddu RA, BASE, RA
+ | srl TMP0, RC, 3
+ | checktab TAB:RB, ->vmeta_tgetb
+ | lw TMP1, TAB:RB->asize
+ | ld TMP2, TAB:RB->array
+ | sltu AT, TMP0, TMP1
+ | beqz AT, ->vmeta_tgetb
+ |. daddu RC, TMP2, RC
+ | ld AT, 0(RC)
+ | beq AT, TISNIL, >5
+ |. ld CRET1, 0(RC)
+ |1:
+ | ins_next1
+ | sd CRET1, 0(RA)
+ | ins_next2
+ |
+ |5: // Check for __index if table value is nil.
+ | ld TAB:TMP2, TAB:RB->metatable
+ | beqz TAB:TMP2, <1 // No metatable: done.
+ |. nop
+ | lbu TMP1, TAB:TMP2->nomm
+ | andi TMP1, TMP1, 1<<MM_index
+ | bnez TMP1, <1 // 'no __index' flag set: done.
+ |. nop
+ | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2!
+ |. nop
+ break;
+ case BC_TGETR:
+ | // RA = dst*8, RB = table*8, RC = key*8
+ | decode_RB8a RB, INS
+ | decode_RB8b RB
+ | decode_RDtoRC8 RC, RD
+ | daddu RB, BASE, RB
+ | daddu RC, BASE, RC
+ | ld TAB:CARG1, 0(RB)
+ | lw CARG2, LO(RC)
+ | daddu RA, BASE, RA
+ | cleartp TAB:CARG1
+ | lw TMP0, TAB:CARG1->asize
+ | ld TMP1, TAB:CARG1->array
+ | sltu AT, CARG2, TMP0
+ | sll TMP2, CARG2, 3
+ | beqz AT, ->vmeta_tgetr // In array part?
+ |. daddu CRET1, TMP1, TMP2
+ | ld CARG2, 0(CRET1)
+ |->BC_TGETR_Z:
+ | ins_next1
+ | sd CARG2, 0(RA)
+ | ins_next2
+ break;
+
+ case BC_TSETV:
+ | // RA = src*8, RB = table*8, RC = key*8
+ | decode_RB8a RB, INS
+ | decode_RB8b RB
+ | decode_RDtoRC8 RC, RD
+ | daddu CARG2, BASE, RB
+ | daddu CARG3, BASE, RC
+ | ld RB, 0(CARG2)
+ | ld TMP2, 0(CARG3)
+ | daddu RA, BASE, RA
+ | checktab RB, ->vmeta_tsetv
+ | checkint TMP2, >5
+ |. sextw RC, TMP2
+ | lw TMP0, TAB:RB->asize
+ | ld TMP1, TAB:RB->array
+ | sltu AT, RC, TMP0
+ | sll TMP2, RC, 3
+ | beqz AT, ->vmeta_tsetv // Integer key and in array part?
+ |. daddu TMP1, TMP1, TMP2
+ | ld TMP0, 0(TMP1)
+ | lbu TMP3, TAB:RB->marked
+ | beq TMP0, TISNIL, >3
+ |. ld CRET1, 0(RA)
+ |1:
+ | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
+ | bnez AT, >7
+ |. sd CRET1, 0(TMP1)
+ |2:
+ | ins_next
+ |
+ |3: // Check for __newindex if previous value is nil.
+ | ld TAB:TMP2, TAB:RB->metatable
+ | beqz TAB:TMP2, <1 // No metatable: done.
+ |. nop
+ | lbu TMP2, TAB:TMP2->nomm
+ | andi TMP2, TMP2, 1<<MM_newindex
+ | bnez TMP2, <1 // 'no __newindex' flag set: done.
+ |. nop
+ | b ->vmeta_tsetv
+ |. nop
+ |
+ |5:
+ | gettp AT, TMP2
+ | daddiu AT, AT, -LJ_TSTR
+ | bnez AT, ->vmeta_tsetv
+ |. nop
+ | b ->BC_TSETS_Z // String key?
+ |. cleartp STR:RC, TMP2
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, TMP3, TMP0, <2
+ break;
+ case BC_TSETS:
+ | // RA = src*8, RB = table*8, RC = str_const*8 (~)
+ | decode_RB8a RB, INS
+ | decode_RB8b RB
+ | daddu CARG2, BASE, RB
+ | decode_RC8a RC, INS
+ | ld TAB:RB, 0(CARG2)
+ | decode_RC8b RC
+ | dsubu CARG3, KBASE, RC
+ | ld RC, -8(CARG3) // KBASE-8-str_const*8
+ | daddu RA, BASE, RA
+ | cleartp STR:RC
+ | checktab TAB:RB, ->vmeta_tsets1
+ |->BC_TSETS_Z:
+ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8
+ | lw TMP0, TAB:RB->hmask
+ | lw TMP1, STR:RC->sid
+ | ld NODE:TMP2, TAB:RB->node
+ | sb r0, TAB:RB->nomm // Clear metamethod cache.
+ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
+ | sll TMP0, TMP1, 5
+ | sll TMP1, TMP1, 3
+ | subu TMP1, TMP0, TMP1
+ | li TMP3, LJ_TSTR
+ | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
+ | settp STR:RC, TMP3 // Tagged key to look for.
+ |.if FPU
+ | ldc1 FTMP0, 0(RA)
+ |.else
+ | ld CRET1, 0(RA)
+ |.endif
+ |1:
+ | ld TMP0, NODE:TMP2->key
+ | ld CARG2, NODE:TMP2->val
+ | ld NODE:TMP1, NODE:TMP2->next
+ | bne TMP0, RC, >5
+ |. lbu TMP3, TAB:RB->marked
+ | beq CARG2, TISNIL, >4 // Key found, but nil value?
+ |. ld TAB:TMP0, TAB:RB->metatable
+ |2:
+ | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
+ | bnez AT, >7
+ |.if FPU
+ |. sdc1 FTMP0, NODE:TMP2->val
+ |.else
+ |. sd CRET1, NODE:TMP2->val
+ |.endif
+ |3:
+ | ins_next
+ |
+ |4: // Check for __newindex if previous value is nil.
+ | beqz TAB:TMP0, <2 // No metatable: done.
+ |. nop
+ | lbu TMP0, TAB:TMP0->nomm
+ | andi TMP0, TMP0, 1<<MM_newindex
+ | bnez TMP0, <2 // 'no __newindex' flag set: done.
+ |. nop
+ | b ->vmeta_tsets
+ |. nop
+ |
+ |5: // Follow hash chain.
+ | bnez NODE:TMP1, <1
+ |. move NODE:TMP2, NODE:TMP1
+ | // End of hash chain: key not found, add a new one
+ |
+ | // But check for __newindex first.
+ | ld TAB:TMP2, TAB:RB->metatable
+ | beqz TAB:TMP2, >6 // No metatable: continue.
+ |. daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv)
+ | lbu TMP0, TAB:TMP2->nomm
+ | andi TMP0, TMP0, 1<<MM_newindex
+ | beqz TMP0, ->vmeta_tsets // 'no __newindex' flag NOT set: check.
+ |6:
+ | load_got lj_tab_newkey
+ | sd RC, 0(CARG3)
+ | sd BASE, L->base
+ | move CARG2, TAB:RB
+ | sd PC, SAVE_PC
+ | call_intern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k
+ |. move CARG1, L
+ | // Returns TValue *.
+ | ld BASE, L->base
+ |.if FPU
+ | b <3 // No 2nd write barrier needed.
+ |. sdc1 FTMP0, 0(CRET1)
+ |.else
+ | ld CARG1, 0(RA)
+ | b <3 // No 2nd write barrier needed.
+ |. sd CARG1, 0(CRET1)
+ |.endif
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, TMP3, TMP0, <3
+ break;
+ case BC_TSETB:
+ | // RA = src*8, RB = table*8, RC = index*8
+ | decode_RB8a RB, INS
+ | decode_RB8b RB
+ | daddu CARG2, BASE, RB
+ | decode_RDtoRC8 RC, RD
+ | ld TAB:RB, 0(CARG2)
+ | daddu RA, BASE, RA
+ | srl TMP0, RC, 3
+ | checktab RB, ->vmeta_tsetb
+ | lw TMP1, TAB:RB->asize
+ | ld TMP2, TAB:RB->array
+ | sltu AT, TMP0, TMP1
+ | beqz AT, ->vmeta_tsetb
+ |. daddu RC, TMP2, RC
+ | ld TMP1, 0(RC)
+ | lbu TMP3, TAB:RB->marked
+ | beq TMP1, TISNIL, >5
+ |1:
+ |. ld CRET1, 0(RA)
+ | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
+ | bnez AT, >7
+ |. sd CRET1, 0(RC)
+ |2:
+ | ins_next
+ |
+ |5: // Check for __newindex if previous value is nil.
+ | ld TAB:TMP2, TAB:RB->metatable
+ | beqz TAB:TMP2, <1 // No metatable: done.
+ |. nop
+ | lbu TMP1, TAB:TMP2->nomm
+ | andi TMP1, TMP1, 1<<MM_newindex
+ | bnez TMP1, <1 // 'no __newindex' flag set: done.
+ |. nop
+ | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2!
+ |. nop
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, TMP3, TMP0, <2
+ break;
+ case BC_TSETR:
+ | // RA = dst*8, RB = table*8, RC = key*8
+ | decode_RB8a RB, INS
+ | decode_RB8b RB
+ | decode_RDtoRC8 RC, RD
+ | daddu CARG1, BASE, RB
+ | daddu CARG3, BASE, RC
+ | ld TAB:CARG2, 0(CARG1)
+ | lw CARG3, LO(CARG3)
+ | cleartp TAB:CARG2
+ | lbu TMP3, TAB:CARG2->marked
+ | lw TMP0, TAB:CARG2->asize
+ | ld TMP1, TAB:CARG2->array
+ | andi AT, TMP3, LJ_GC_BLACK // isblack(table)
+ | bnez AT, >7
+ |. daddu RA, BASE, RA
+ |2:
+ | sltu AT, CARG3, TMP0
+ | sll TMP2, CARG3, 3
+ | beqz AT, ->vmeta_tsetr // In array part?
+ |. daddu CRET1, TMP1, TMP2
+ |->BC_TSETR_Z:
+ | ld CARG1, 0(RA)
+ | ins_next1
+ | sd CARG1, 0(CRET1)
+ | ins_next2
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:CARG2, TMP3, CRET1, <2
+ break;
+
+ case BC_TSETM:
+ | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
+ | daddu RA, BASE, RA
+ |1:
+ | daddu TMP3, KBASE, RD
+ | ld TAB:CARG2, -8(RA) // Guaranteed to be a table.
+ | addiu TMP0, MULTRES, -8
+ | lw TMP3, LO(TMP3) // Integer constant is in lo-word.
+ | beqz TMP0, >4 // Nothing to copy?
+ |. srl CARG3, TMP0, 3
+ | cleartp CARG2
+ | addu CARG3, CARG3, TMP3
+ | lw TMP2, TAB:CARG2->asize
+ | sll TMP1, TMP3, 3
+ | lbu TMP3, TAB:CARG2->marked
+ | ld CARG1, TAB:CARG2->array
+ | sltu AT, TMP2, CARG3
+ | bnez AT, >5
+ |. daddu TMP2, RA, TMP0
+ | daddu TMP1, TMP1, CARG1
+ | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table)
+ |3: // Copy result slots to table.
+ | ld CRET1, 0(RA)
+ | daddiu RA, RA, 8
+ | sltu AT, RA, TMP2
+ | sd CRET1, 0(TMP1)
+ | bnez AT, <3
+ |. daddiu TMP1, TMP1, 8
+ | bnez TMP0, >7
+ |. nop
+ |4:
+ | ins_next
+ |
+ |5: // Need to resize array part.
+ | load_got lj_tab_reasize
+ | sd BASE, L->base
+ | sd PC, SAVE_PC
+ | move BASE, RD
+ | call_intern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
+ |. move CARG1, L
+ | // Must not reallocate the stack.
+ | move RD, BASE
+ | b <1
+ |. ld BASE, L->base // Reload BASE for lack of a saved register.
+ |
+ |7: // Possible table write barrier for any value. Skip valiswhite check.
+ | barrierback TAB:CARG2, TMP3, TMP0, <4
+ break;
+
+ /* -- Calls and vararg handling ----------------------------------------- */
+
+ case BC_CALLM:
+ | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8
+ | decode_RDtoRC8 NARGS8:RC, RD
+ | b ->BC_CALL_Z
+ |. addu NARGS8:RC, NARGS8:RC, MULTRES
+ break;
+ case BC_CALL:
+ | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
+ | decode_RDtoRC8 NARGS8:RC, RD
+ |->BC_CALL_Z:
+ | move TMP2, BASE
+ | daddu BASE, BASE, RA
+ | ld LFUNC:RB, 0(BASE)
+ | daddiu BASE, BASE, 16
+ | addiu NARGS8:RC, NARGS8:RC, -8
+ | checkfunc RB, ->vmeta_call
+ | ins_call
+ break;
+
+ case BC_CALLMT:
+ | // RA = base*8, (RB = 0,) RC = extra_nargs*8
+ | addu NARGS8:RD, NARGS8:RD, MULTRES // BC_CALLT gets RC from RD.
+ | // Fall through. Assumes BC_CALLT follows.
+ break;
+ case BC_CALLT:
+ | // RA = base*8, (RB = 0,) RC = (nargs+1)*8
+ | daddu RA, BASE, RA
+ | ld RB, 0(RA)
+ | move NARGS8:RC, RD
+ | ld TMP1, FRAME_PC(BASE)
+ | daddiu RA, RA, 16
+ | addiu NARGS8:RC, NARGS8:RC, -8
+ | checktp CARG3, RB, -LJ_TFUNC, ->vmeta_callt
+ |->BC_CALLT_Z:
+ | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'.
+ | lbu TMP3, LFUNC:CARG3->ffid
+ | bnez TMP0, >7
+ |. xori TMP2, TMP1, FRAME_VARG
+ |1:
+ | sd RB, FRAME_FUNC(BASE) // Copy function down, but keep PC.
+ | sltiu AT, TMP3, 2 // (> FF_C) Calling a fast function?
+ | move TMP2, BASE
+ | move RB, CARG3
+ | beqz NARGS8:RC, >3
+ |. move TMP3, NARGS8:RC
+ |2:
+ | ld CRET1, 0(RA)
+ | daddiu RA, RA, 8
+ | addiu TMP3, TMP3, -8
+ | sd CRET1, 0(TMP2)
+ | bnez TMP3, <2
+ |. daddiu TMP2, TMP2, 8
+ |3:
+ | or TMP0, TMP0, AT
+ | beqz TMP0, >5
+ |. nop
+ |4:
+ | ins_callt
+ |
+ |5: // Tailcall to a fast function with a Lua frame below.
+ | lw INS, -4(TMP1)
+ | decode_RA8a RA, INS
+ | decode_RA8b RA
+ | dsubu TMP1, BASE, RA
+ | ld TMP1, -32(TMP1)
+ | cleartp LFUNC:TMP1
+ | ld TMP1, LFUNC:TMP1->pc
+ | b <4
+ |. ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE.
+ |
+ |7: // Tailcall from a vararg function.
+ | andi AT, TMP2, FRAME_TYPEP
+ | bnez AT, <1 // Vararg frame below?
+ |. dsubu TMP2, BASE, TMP2 // Relocate BASE down.
+ | move BASE, TMP2
+ | ld TMP1, FRAME_PC(TMP2)
+ | b <1
+ |. andi TMP0, TMP1, FRAME_TYPE
+ break;
+
+ case BC_ITERC:
+ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
+ | move TMP2, BASE // Save old BASE fir vmeta_call.
+ | daddu BASE, BASE, RA
+ | ld RB, -24(BASE)
+ | ld CARG1, -16(BASE)
+ | ld CARG2, -8(BASE)
+ | li NARGS8:RC, 16 // Iterators get 2 arguments.
+ | sd RB, 0(BASE) // Copy callable.
+ | sd CARG1, 16(BASE) // Copy state.
+ | sd CARG2, 24(BASE) // Copy control var.
+ | daddiu BASE, BASE, 16
+ | checkfunc RB, ->vmeta_call
+ | ins_call
+ break;
+
+ case BC_ITERN:
+ |.if JIT and ENDIAN_LE
+ | hotloop
+ |.endif
+ |->vm_IITERN:
+ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
+ | daddu RA, BASE, RA
+ | ld TAB:RB, -16(RA)
+ | lw RC, -8+LO(RA) // Get index from control var.
+ | cleartp TAB:RB
+ | daddiu PC, PC, 4
+ | lw TMP0, TAB:RB->asize
+ | ld TMP1, TAB:RB->array
+ | dsll CARG3, TISNUM, 47
+ |1: // Traverse array part.
+ | sltu AT, RC, TMP0
+ | beqz AT, >5 // Index points after array part?
+ |. sll TMP3, RC, 3
+ | daddu TMP3, TMP1, TMP3
+ | ld CARG1, 0(TMP3)
+ | lhu RD, -4+OFS_RD(PC)
+ | or TMP2, RC, CARG3
+ | beq CARG1, TISNIL, <1 // Skip holes in array part.
+ |. addiu RC, RC, 1
+ | sd TMP2, 0(RA)
+ | sd CARG1, 8(RA)
+ | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ | decode_RD4b RD
+ | daddu RD, RD, TMP3
+ | sw RC, -8+LO(RA) // Update control var.
+ | daddu PC, PC, RD
+ |3:
+ | ins_next
+ |
+ |5: // Traverse hash part.
+ | lw TMP1, TAB:RB->hmask
+ | subu RC, RC, TMP0
+ | ld TMP2, TAB:RB->node
+ |6:
+ | sltu AT, TMP1, RC // End of iteration? Branch to ITERL+1.
+ | bnez AT, <3
+ |. sll TMP3, RC, 5
+ | sll RB, RC, 3
+ | subu TMP3, TMP3, RB
+ | daddu NODE:TMP3, TMP3, TMP2
+ | ld CARG1, 0(NODE:TMP3)
+ | lhu RD, -4+OFS_RD(PC)
+ | beq CARG1, TISNIL, <6 // Skip holes in hash part.
+ |. addiu RC, RC, 1
+ | ld CARG2, NODE:TMP3->key
+ | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535)
+ | sd CARG1, 8(RA)
+ | addu RC, RC, TMP0
+ | decode_RD4b RD
+ | addu RD, RD, TMP3
+ | sd CARG2, 0(RA)
+ | daddu PC, PC, RD
+ | b <3
+ |. sw RC, -8+LO(RA) // Update control var.
+ break;
+
+ case BC_ISNEXT:
+ | // RA = base*8, RD = target (points to ITERN)
+ | daddu RA, BASE, RA
+ | srl TMP0, RD, 1
+ | ld CFUNC:CARG1, -24(RA)
+ | daddu TMP0, PC, TMP0
+ | ld CARG2, -16(RA)
+ | ld CARG3, -8(RA)
+ | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
+ | checkfunc CFUNC:CARG1, >5
+ | gettp CARG2, CARG2
+ | daddiu CARG2, CARG2, -LJ_TTAB
+ | lbu TMP1, CFUNC:CARG1->ffid
+ | daddiu CARG3, CARG3, -LJ_TNIL
+ | or AT, CARG2, CARG3
+ | daddiu TMP1, TMP1, -FF_next_N
+ | or AT, AT, TMP1
+ | bnez AT, >5
+ |. lui TMP1, (LJ_KEYINDEX >> 16)
+ | daddu PC, TMP0, TMP2
+ | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff)
+ | dsll TMP1, TMP1, 32
+ | sd TMP1, -8(RA)
+ |1:
+ | ins_next
+ |5: // Despecialize bytecode if any of the checks fail.
+ | li TMP3, BC_JMP
+ | li TMP1, BC_ITERC
+ | sb TMP3, -4+OFS_OP(PC)
+ | daddu PC, TMP0, TMP2
+ |.if JIT
+ | lb TMP0, OFS_OP(PC)
+ | li AT, BC_ITERN
+ | bne TMP0, AT, >6
+ |. lhu TMP2, OFS_RD(PC)
+ |.endif
+ | b <1
+ |. sb TMP1, OFS_OP(PC)
+ |.if JIT
+ |6: // Unpatch JLOOP.
+ | ld TMP0, DISPATCH_J(trace)(DISPATCH)
+ | sll TMP2, TMP2, 3
+ | daddu TMP0, TMP0, TMP2
+ | ld TRACE:TMP2, 0(TMP0)
+ | lw TMP0, TRACE:TMP2->startins
+ | li AT, -256
+ | and TMP0, TMP0, AT
+ | or TMP0, TMP0, TMP1
+ | b <1
+ |. sw TMP0, 0(PC)
+ |.endif
+ break;
+
+ case BC_VARG:
+ | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
+ | ld TMP0, FRAME_PC(BASE)
+ | decode_RDtoRC8 RC, RD
+ | decode_RB8a RB, INS
+ | daddu RC, BASE, RC
+ | decode_RB8b RB
+ | daddu RA, BASE, RA
+ | daddiu RC, RC, FRAME_VARG
+ | daddu TMP2, RA, RB
+ | daddiu TMP3, BASE, -16 // TMP3 = vtop
+ | dsubu RC, RC, TMP0 // RC = vbase
+ | // Note: RC may now be even _above_ BASE if nargs was < numparams.
+ | beqz RB, >5 // Copy all varargs?
+ |. dsubu TMP1, TMP3, RC
+ | daddiu TMP2, TMP2, -16
+ |1: // Copy vararg slots to destination slots.
+ | ld CARG1, 0(RC)
+ | sltu AT, RC, TMP3
+ | daddiu RC, RC, 8
+ |.if MIPSR6
+ | selnez CARG1, CARG1, AT
+ | seleqz AT, TISNIL, AT
+ | or CARG1, CARG1, AT
+ |.else
+ | movz CARG1, TISNIL, AT
+ |.endif
+ | sd CARG1, 0(RA)
+ | sltu AT, RA, TMP2
+ | bnez AT, <1
+ |. daddiu RA, RA, 8
+ |3:
+ | ins_next
+ |
+ |5: // Copy all varargs.
+ | ld TMP0, L->maxstack
+ | blez TMP1, <3 // No vararg slots?
+ |. li MULTRES, 8 // MULTRES = (0+1)*8
+ | daddu TMP2, RA, TMP1
+ | sltu AT, TMP0, TMP2
+ | bnez AT, >7
+ |. daddiu MULTRES, TMP1, 8
+ |6:
+ | ld CRET1, 0(RC)
+ | daddiu RC, RC, 8
+ | sd CRET1, 0(RA)
+ | sltu AT, RC, TMP3
+ | bnez AT, <6 // More vararg slots?
+ |. daddiu RA, RA, 8
+ | b <3
+ |. nop
+ |
+ |7: // Grow stack for varargs.
+ | load_got lj_state_growstack
+ | sd RA, L->top
+ | dsubu RA, RA, BASE
+ | sd BASE, L->base
+ | dsubu BASE, RC, BASE // Need delta, because BASE may change.
+ | sd PC, SAVE_PC
+ | srl CARG2, TMP1, 3
+ | call_intern lj_state_growstack // (lua_State *L, int n)
+ |. move CARG1, L
+ | move RC, BASE
+ | ld BASE, L->base
+ | daddu RA, BASE, RA
+ | daddu RC, BASE, RC
+ | b <6
+ |. daddiu TMP3, BASE, -16
+ break;
+
+ /* -- Returns ----------------------------------------------------------- */
+
+ case BC_RETM:
+ | // RA = results*8, RD = extra_nresults*8
+ | addu RD, RD, MULTRES // MULTRES >= 8, so RD >= 8.
+ | // Fall through. Assumes BC_RET follows.
+ break;
+
+ case BC_RET:
+ | // RA = results*8, RD = (nresults+1)*8
+ | ld PC, FRAME_PC(BASE)
+ | daddu RA, BASE, RA
+ | move MULTRES, RD
+ |1:
+ | andi TMP0, PC, FRAME_TYPE
+ | bnez TMP0, ->BC_RETV_Z
+ |. xori TMP1, PC, FRAME_VARG
+ |
+ |->BC_RET_Z:
+ | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return
+ | lw INS, -4(PC)
+ | daddiu TMP2, BASE, -16
+ | daddiu RC, RD, -8
+ | decode_RA8a TMP0, INS
+ | decode_RB8a RB, INS
+ | decode_RA8b TMP0
+ | decode_RB8b RB
+ | daddu TMP3, TMP2, RB
+ | beqz RC, >3
+ |. dsubu BASE, TMP2, TMP0
+ |2:
+ | ld CRET1, 0(RA)
+ | daddiu RA, RA, 8
+ | daddiu RC, RC, -8
+ | sd CRET1, 0(TMP2)
+ | bnez RC, <2
+ |. daddiu TMP2, TMP2, 8
+ |3:
+ | daddiu TMP3, TMP3, -8
+ |5:
+ | sltu AT, TMP2, TMP3
+ | bnez AT, >6
+ |. ld LFUNC:TMP1, FRAME_FUNC(BASE)
+ | ins_next1
+ | cleartp LFUNC:TMP1
+ | ld TMP1, LFUNC:TMP1->pc
+ | ld KBASE, PC2PROTO(k)(TMP1)
+ | ins_next2
+ |
+ |6: // Fill up results with nil.
+ | sd TISNIL, 0(TMP2)
+ | b <5
+ |. daddiu TMP2, TMP2, 8
+ |
+ |->BC_RETV_Z: // Non-standard return case.
+ | andi TMP2, TMP1, FRAME_TYPEP
+ | bnez TMP2, ->vm_return
+ |. nop
+ | // Return from vararg function: relocate BASE down.
+ | dsubu BASE, BASE, TMP1
+ | b <1
+ |. ld PC, FRAME_PC(BASE)
+ break;
+
+ case BC_RET0: case BC_RET1:
+ | // RA = results*8, RD = (nresults+1)*8
+ | ld PC, FRAME_PC(BASE)
+ | daddu RA, BASE, RA
+ | move MULTRES, RD
+ | andi TMP0, PC, FRAME_TYPE
+ | bnez TMP0, ->BC_RETV_Z
+ |. xori TMP1, PC, FRAME_VARG
+ | lw INS, -4(PC)
+ | daddiu TMP2, BASE, -16
+ if (op == BC_RET1) {
+ | ld CRET1, 0(RA)
+ }
+ | decode_RB8a RB, INS
+ | decode_RA8a RA, INS
+ | decode_RB8b RB
+ | decode_RA8b RA
+ | dsubu BASE, TMP2, RA
+ if (op == BC_RET1) {
+ | sd CRET1, 0(TMP2)
+ }
+ |5:
+ | sltu AT, RD, RB
+ | bnez AT, >6
+ |. ld TMP1, FRAME_FUNC(BASE)
+ | ins_next1
+ | cleartp LFUNC:TMP1
+ | ld TMP1, LFUNC:TMP1->pc
+ | ld KBASE, PC2PROTO(k)(TMP1)
+ | ins_next2
+ |
+ |6: // Fill up results with nil.
+ | daddiu TMP2, TMP2, 8
+ | daddiu RD, RD, 8
+ | b <5
+ if (op == BC_RET1) {
+ |. sd TISNIL, 0(TMP2)
+ } else {
+ |. sd TISNIL, -8(TMP2)
+ }
+ break;
+
+ /* -- Loops and branches ------------------------------------------------ */
+
+ case BC_FORL:
+ |.if JIT
+ | hotloop
+ |.endif
+ | // Fall through. Assumes BC_IFORL follows.
+ break;
+
+ case BC_JFORI:
+ case BC_JFORL:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_FORI:
+ case BC_IFORL:
+ | // RA = base*8, RD = target (after end of loop or start of loop)
+ vk = (op == BC_IFORL || op == BC_JFORL);
+ | daddu RA, BASE, RA
+ | ld CARG1, FORL_IDX*8(RA) // IDX CARG1 - CARG3 type
+ | gettp CARG3, CARG1
+ if (op != BC_JFORL) {
+ | srl RD, RD, 1
+ | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535)
+ | daddu TMP2, RD, TMP2
+ }
+ if (!vk) {
+ | ld CARG2, FORL_STOP*8(RA) // STOP CARG2 - CARG4 type
+ | ld CRET1, FORL_STEP*8(RA) // STEP CRET1 - CRET2 type
+ | gettp CARG4, CARG2
+ | bne CARG3, TISNUM, >5
+ |. gettp CRET2, CRET1
+ | bne CARG4, TISNUM, ->vmeta_for
+ |. sextw CARG3, CARG1
+ | bne CRET2, TISNUM, ->vmeta_for
+ |. sextw CARG2, CARG2
+ | dext AT, CRET1, 31, 0
+ | slt CRET1, CARG2, CARG3
+ | slt TMP1, CARG3, CARG2
+ |.if MIPSR6
+ | selnez TMP1, TMP1, AT
+ | seleqz CRET1, CRET1, AT
+ | or CRET1, CRET1, TMP1
+ |.else
+ | movn CRET1, TMP1, AT
+ |.endif
+ } else {
+ | bne CARG3, TISNUM, >5
+ |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type
+ | ld CRET1, FORL_STOP*8(RA) // STOP CRET1 - CRET2 type
+ | sextw TMP3, CARG1
+ | sextw CARG2, CARG2
+ | sextw CRET1, CRET1
+ | addu CARG1, TMP3, CARG2
+ | xor TMP0, CARG1, TMP3
+ | xor TMP1, CARG1, CARG2
+ | and TMP0, TMP0, TMP1
+ | slt TMP1, CARG1, CRET1
+ | slt CRET1, CRET1, CARG1
+ | slt AT, CARG2, r0
+ | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow.
+ |.if MIPSR6
+ | selnez TMP1, TMP1, AT
+ | seleqz CRET1, CRET1, AT
+ | or CRET1, CRET1, TMP1
+ |.else
+ | movn CRET1, TMP1, AT
+ |.endif
+ | or CRET1, CRET1, TMP0
+ | zextw CARG1, CARG1
+ | settp CARG1, TISNUM
+ }
+ |1:
+ if (op == BC_FORI) {
+ |.if MIPSR6
+ | selnez TMP2, TMP2, CRET1
+ |.else
+ | movz TMP2, r0, CRET1
+ |.endif
+ | daddu PC, PC, TMP2
+ } else if (op == BC_JFORI) {
+ | daddu PC, PC, TMP2
+ | lhu RD, -4+OFS_RD(PC)
+ } else if (op == BC_IFORL) {
+ |.if MIPSR6
+ | seleqz TMP2, TMP2, CRET1
+ |.else
+ | movn TMP2, r0, CRET1
+ |.endif
+ | daddu PC, PC, TMP2
+ }
+ if (vk) {
+ | sd CARG1, FORL_IDX*8(RA)
+ }
+ | ins_next1
+ | sd CARG1, FORL_EXT*8(RA)
+ |2:
+ if (op == BC_JFORI) {
+ | beqz CRET1, =>BC_JLOOP
+ |. decode_RD8b RD
+ } else if (op == BC_JFORL) {
+ | beqz CRET1, =>BC_JLOOP
+ }
+ | ins_next2
+ |
+ |5: // FP loop.
+ |.if FPU
+ if (!vk) {
+ | ldc1 f0, FORL_IDX*8(RA)
+ | ldc1 f2, FORL_STOP*8(RA)
+ | sltiu TMP0, CARG3, LJ_TISNUM
+ | sltiu TMP1, CARG4, LJ_TISNUM
+ | sltiu AT, CRET2, LJ_TISNUM
+ | ld TMP3, FORL_STEP*8(RA)
+ | and TMP0, TMP0, TMP1
+ | and AT, AT, TMP0
+ | beqz AT, ->vmeta_for
+ |. slt TMP3, TMP3, r0
+ |.if MIPSR6
+ | dmtc1 TMP3, FTMP2
+ | cmp.lt.d FTMP0, f0, f2
+ | cmp.lt.d FTMP1, f2, f0
+ | sel.d FTMP2, FTMP1, FTMP0
+ | b <1
+ |. dmfc1 CRET1, FTMP2
+ |.else
+ | c.ole.d 0, f0, f2
+ | c.ole.d 1, f2, f0
+ | li CRET1, 1
+ | movt CRET1, r0, 0
+ | movt AT, r0, 1
+ | b <1
+ |. movn CRET1, AT, TMP3
+ |.endif
+ } else {
+ | ldc1 f0, FORL_IDX*8(RA)
+ | ldc1 f4, FORL_STEP*8(RA)
+ | ldc1 f2, FORL_STOP*8(RA)
+ | ld TMP3, FORL_STEP*8(RA)
+ | add.d f0, f0, f4
+ |.if MIPSR6
+ | slt TMP3, TMP3, r0
+ | dmtc1 TMP3, FTMP2
+ | cmp.lt.d FTMP0, f0, f2
+ | cmp.lt.d FTMP1, f2, f0
+ | sel.d FTMP2, FTMP1, FTMP0
+ | dmfc1 CRET1, FTMP2
+ if (op == BC_IFORL) {
+ | seleqz TMP2, TMP2, CRET1
+ | daddu PC, PC, TMP2
+ }
+ |.else
+ | c.ole.d 0, f0, f2
+ | c.ole.d 1, f2, f0
+ | slt TMP3, TMP3, r0
+ | li CRET1, 1
+ | li AT, 1
+ | movt CRET1, r0, 0
+ | movt AT, r0, 1
+ | movn CRET1, AT, TMP3
+ if (op == BC_IFORL) {
+ | movn TMP2, r0, CRET1
+ | daddu PC, PC, TMP2
+ }
+ |.endif
+ | sdc1 f0, FORL_IDX*8(RA)
+ | ins_next1
+ | b <2
+ |. sdc1 f0, FORL_EXT*8(RA)
+ }
+ |.else
+ if (!vk) {
+ | sltiu TMP0, CARG3, LJ_TISNUM
+ | sltiu TMP1, CARG4, LJ_TISNUM
+ | sltiu AT, CRET2, LJ_TISNUM
+ | and TMP0, TMP0, TMP1
+ | and AT, AT, TMP0
+ | beqz AT, ->vmeta_for
+ |. nop
+ | bal ->vm_sfcmpolex
+ |. lw TMP3, FORL_STEP*8+HI(RA)
+ | b <1
+ |. nop
+ } else {
+ | load_got __adddf3
+ | call_extern
+ |. sw TMP2, TMPD
+ | ld CARG2, FORL_STOP*8(RA)
+ | move CARG1, CRET1
+ if ( op == BC_JFORL ) {
+ | lhu RD, -4+OFS_RD(PC)
+ | decode_RD8b RD
+ }
+ | bal ->vm_sfcmpolex
+ |. lw TMP3, FORL_STEP*8+HI(RA)
+ | b <1
+ |. lw TMP2, TMPD
+ }
+ |.endif
+ break;
+
+ case BC_ITERL:
+ |.if JIT
+ | hotloop
+ |.endif
+ | // Fall through. Assumes BC_IITERL follows.
+ break;
+
+ case BC_JITERL:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_IITERL:
+ | // RA = base*8, RD = target
+ | daddu RA, BASE, RA
+ | ld TMP1, 0(RA)
+ | beq TMP1, TISNIL, >1 // Stop if iterator returned nil.
+ |. nop
+ if (op == BC_JITERL) {
+ | b =>BC_JLOOP
+ |. sd TMP1, -8(RA)
+ } else {
+ | branch_RD // Otherwise save control var + branch.
+ | sd TMP1, -8(RA)
+ }
+ |1:
+ | ins_next
+ break;
+
+ case BC_LOOP:
+ | // RA = base*8, RD = target (loop extent)
+ | // Note: RA/RD is only used by trace recorder to determine scope/extent
+ | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
+ |.if JIT
+ | hotloop
+ |.endif
+ | // Fall through. Assumes BC_ILOOP follows.
+ break;
+
+ case BC_ILOOP:
+ | // RA = base*8, RD = target (loop extent)
+ | ins_next
+ break;
+
+ case BC_JLOOP:
+ |.if JIT
+ | // RA = base*8 (ignored), RD = traceno*8
+ | ld TMP1, DISPATCH_J(trace)(DISPATCH)
+ | li AT, 0
+ | daddu TMP1, TMP1, RD
+ | // Traces on MIPS don't store the trace number, so use 0.
+ | sd AT, DISPATCH_GL(vmstate)(DISPATCH)
+ | ld TRACE:TMP2, 0(TMP1)
+ | sd BASE, DISPATCH_GL(jit_base)(DISPATCH)
+ | ld TMP2, TRACE:TMP2->mcode
+ | sd L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
+ | jr TMP2
+ |. daddiu JGL, DISPATCH, GG_DISP2G+32768
+ |.endif
+ break;
+
+ case BC_JMP:
+ | // RA = base*8 (only used by trace recorder), RD = target
+ | branch_RD
+ | ins_next
+ break;
+
+ /* -- Function headers -------------------------------------------------- */
+
+ case BC_FUNCF:
+ |.if JIT
+ | hotcall
+ |.endif
+ case BC_FUNCV: /* NYI: compiled vararg functions. */
+ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
+ break;
+
+ case BC_JFUNCF:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_IFUNCF:
+ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
+ | ld TMP2, L->maxstack
+ | lbu TMP1, -4+PC2PROTO(numparams)(PC)
+ | ld KBASE, -4+PC2PROTO(k)(PC)
+ | sltu AT, TMP2, RA
+ | bnez AT, ->vm_growstack_l
+ |. sll TMP1, TMP1, 3
+ if (op != BC_JFUNCF) {
+ | ins_next1
+ }
+ |2:
+ | sltu AT, NARGS8:RC, TMP1 // Check for missing parameters.
+ | bnez AT, >3
+ |. daddu AT, BASE, NARGS8:RC
+ if (op == BC_JFUNCF) {
+ | decode_RD8a RD, INS
+ | b =>BC_JLOOP
+ |. decode_RD8b RD
+ } else {
+ | ins_next2
+ }
+ |
+ |3: // Clear missing parameters.
+ | sd TISNIL, 0(AT)
+ | b <2
+ |. addiu NARGS8:RC, NARGS8:RC, 8
+ break;
+
+ case BC_JFUNCV:
+#if !LJ_HASJIT
+ break;
+#endif
+ | NYI // NYI: compiled vararg functions
+ break; /* NYI: compiled vararg functions. */
+
+ case BC_IFUNCV:
+ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
+ | li TMP0, LJ_TFUNC
+ | daddu TMP1, BASE, RC
+ | ld TMP2, L->maxstack
+ | settp LFUNC:RB, TMP0
+ | daddu TMP0, RA, RC
+ | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC.
+ | daddiu TMP3, RC, 16+FRAME_VARG
+ | sltu AT, TMP0, TMP2
+ | ld KBASE, -4+PC2PROTO(k)(PC)
+ | beqz AT, ->vm_growstack_l
+ |. sd TMP3, 8(TMP1) // Store delta + FRAME_VARG.
+ | lbu TMP2, -4+PC2PROTO(numparams)(PC)
+ | move RA, BASE
+ | move RC, TMP1
+ | ins_next1
+ | beqz TMP2, >3
+ |. daddiu BASE, TMP1, 16
+ |1:
+ | ld TMP0, 0(RA)
+ | sltu AT, RA, RC // Less args than parameters?
+ | move CARG1, TMP0
+ |.if MIPSR6
+ | selnez TMP0, TMP0, AT
+ | seleqz TMP3, TISNIL, AT
+ | or TMP0, TMP0, TMP3
+ | seleqz TMP3, CARG1, AT
+ | selnez CARG1, TISNIL, AT
+ | or CARG1, CARG1, TMP3
+ |.else
+ | movz TMP0, TISNIL, AT // Clear missing parameters.
+ | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC).
+ |.endif
+ | addiu TMP2, TMP2, -1
+ | sd TMP0, 16(TMP1)
+ | daddiu TMP1, TMP1, 8
+ | sd CARG1, 0(RA)
+ | bnez TMP2, <1
+ |. daddiu RA, RA, 8
+ |3:
+ | ins_next2
+ break;
+
+ case BC_FUNCC:
+ case BC_FUNCCW:
+ | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
+ if (op == BC_FUNCC) {
+ | ld CFUNCADDR, CFUNC:RB->f
+ } else {
+ | ld CFUNCADDR, DISPATCH_GL(wrapf)(DISPATCH)
+ }
+ | daddu TMP1, RA, NARGS8:RC
+ | ld TMP2, L->maxstack
+ | daddu RC, BASE, NARGS8:RC
+ | sd BASE, L->base
+ | sltu AT, TMP2, TMP1
+ | sd RC, L->top
+ | li_vmstate C
+ if (op == BC_FUNCCW) {
+ | ld CARG2, CFUNC:RB->f
+ }
+ | bnez AT, ->vm_growstack_c // Need to grow stack.
+ |. move CARG1, L
+ | jalr CFUNCADDR // (lua_State *L [, lua_CFunction f])
+ |. st_vmstate
+ | // Returns nresults.
+ | ld BASE, L->base
+ | sll RD, CRET1, 3
+ | ld TMP1, L->top
+ | li_vmstate INTERP
+ | ld PC, FRAME_PC(BASE) // Fetch PC of caller.
+ | dsubu RA, TMP1, RD // RA = L->top - nresults*8
+ | sd L, DISPATCH_GL(cur_L)(DISPATCH)
+ | b ->vm_returnc
+ |. st_vmstate
+ break;
+
+ /* ---------------------------------------------------------------------- */
+
+ default:
+ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
+ exit(2);
+ break;
+ }
+}
+
+static int build_backend(BuildCtx *ctx)
+{
+ int op;
+
+ dasm_growpc(Dst, BC__MAX);
+
+ build_subroutines(ctx);
+
+ |.code_op
+ for (op = 0; op < BC__MAX; op++)
+ build_ins(ctx, (BCOp)op, op);
+
+ return BC__MAX;
+}
+
+/* Emit pseudo frame-info for all assembler functions. */
+static void emit_asm_debug(BuildCtx *ctx)
+{
+ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
+ int i;
+ switch (ctx->mode) {
+ case BUILD_elfasm:
+ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
+ fprintf(ctx->fp,
+ ".Lframe0:\n"
+ "\t.4byte .LECIE0-.LSCIE0\n"
+ ".LSCIE0:\n"
+ "\t.4byte 0xffffffff\n"
+ "\t.byte 0x1\n"
+ "\t.string \"\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -4\n"
+ "\t.byte 31\n"
+ "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n"
+ "\t.align 2\n"
+ ".LECIE0:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE0:\n"
+ "\t.4byte .LEFDE0-.LASFDE0\n"
+ ".LASFDE0:\n"
+ "\t.4byte .Lframe0\n"
+ "\t.8byte .Lbegin\n"
+ "\t.8byte %d\n"
+ "\t.byte 0xe\n\t.uleb128 %d\n"
+ "\t.byte 0x9f\n\t.sleb128 2*5\n"
+ "\t.byte 0x9e\n\t.sleb128 2*6\n",
+ fcofs, CFRAME_SIZE);
+ for (i = 23; i >= 16; i--)
+ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(30-i));
+#if !LJ_SOFTFP
+ for (i = 31; i >= 24; i--)
+ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(46-i));
+#endif
+ fprintf(ctx->fp,
+ "\t.align 2\n"
+ ".LEFDE0:\n\n");
+#if LJ_HASFFI
+ fprintf(ctx->fp,
+ ".LSFDE1:\n"
+ "\t.4byte .LEFDE1-.LASFDE1\n"
+ ".LASFDE1:\n"
+ "\t.4byte .Lframe0\n"
+ "\t.4byte lj_vm_ffi_call\n"
+ "\t.4byte %d\n"
+ "\t.byte 0x9f\n\t.uleb128 2*1\n"
+ "\t.byte 0x90\n\t.uleb128 2*2\n"
+ "\t.byte 0xd\n\t.uleb128 0x10\n"
+ "\t.align 2\n"
+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+#endif
+#if !LJ_NO_UNWIND
+ /* NYI */
+#endif
+ break;
+ default:
+ break;
+ }
+}
+
diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc
index 48d0ed0f..3cad37d2 100644
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -1,4 +1,4 @@
-|// Low-level VM code for PowerPC CPUs.
+|// Low-level VM code for PowerPC 32 bit or 32on64 bit mode.
|// Bytecode interpreter, fast functions and helper functions.
|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
|
@@ -18,7 +18,6 @@
|// DynASM defines used by the PPC port:
|//
|// P64 64 bit pointers (only for GPR64 testing).
-|// Note: a full PPC64 _LP64 port is not planned.
|// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3).
|// Affects reg saves, stack layout, carry/overflow/dot flags etc.
|// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360).
@@ -103,6 +102,18 @@
|// Fixed register assignments for the interpreter.
|// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA)
|
+|.macro .FPU, a, b
+|.if FPU
+| a, b
+|.endif
+|.endmacro
+|
+|.macro .FPU, a, b, c
+|.if FPU
+| a, b, c
+|.endif
+|.endmacro
+|
|// The following must be C callee-save (but BASE is often refetched).
|.define BASE, r14 // Base of current Lua stack frame.
|.define KBASE, r15 // Constants of current Lua function.
@@ -116,8 +127,10 @@
|.define TISNUM, r22
|.define TISNIL, r23
|.define ZERO, r24
+|.if FPU
|.define TOBIT, f30 // 2^52 + 2^51.
|.define TONUM, f31 // 2^52 + 2^51 + 2^31.
+|.endif
|
|// The following temporaries are not saved across C calls, except for RA.
|.define RA, r20 // Callee-save.
@@ -133,6 +146,7 @@
|
|// Saved temporaries.
|.define SAVE0, r21
+|.define SAVE1, r25
|
|// Calling conventions.
|.define CARG1, r3
@@ -141,8 +155,10 @@
|.define CARG4, r6 // Overlaps TMP3.
|.define CARG5, r7 // Overlaps INS.
|
+|.if FPU
|.define FARG1, f1
|.define FARG2, f2
+|.endif
|
|.define CRET1, r3
|.define CRET2, r4
@@ -213,10 +229,16 @@
|.endif
|.else
|
+|.if FPU
|.define SAVE_LR, 276(sp)
|.define CFRAME_SPACE, 272 // Delta for sp.
|// Back chain for sp: 272(sp) <-- sp entering interpreter
|.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves.
+|.else
+|.define SAVE_LR, 132(sp)
+|.define CFRAME_SPACE, 128 // Delta for sp.
+|// Back chain for sp: 128(sp) <-- sp entering interpreter
+|.endif
|.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves.
|.define SAVE_CR, 52(sp) // 32 bit CR save.
|.define SAVE_ERRF, 48(sp) // 32 bit C frame info.
@@ -226,16 +248,25 @@
|.define SAVE_PC, 32(sp)
|.define SAVE_MULTRES, 28(sp)
|.define UNUSED1, 24(sp)
+|.if FPU
|.define TMPD_LO, 20(sp)
|.define TMPD_HI, 16(sp)
|.define TONUM_LO, 12(sp)
|.define TONUM_HI, 8(sp)
+|.else
+|.define SFSAVE_4, 20(sp)
+|.define SFSAVE_3, 16(sp)
+|.define SFSAVE_2, 12(sp)
+|.define SFSAVE_1, 8(sp)
+|.endif
|// Next frame lr: 4(sp)
|// Back chain for sp: 0(sp) <-- sp while in interpreter
|
+|.if FPU
|.define TMPD_BLO, 23(sp)
|.define TMPD, TMPD_HI
|.define TONUM_D, TONUM_HI
+|.endif
|
|.endif
|
@@ -245,7 +276,7 @@
|.else
| stw r..reg, SAVE_GPR_+(reg-14)*4(sp)
|.endif
-| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
+| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
|.endmacro
|.macro rest_, reg
|.if GPR64
@@ -253,7 +284,7 @@
|.else
| lwz r..reg, SAVE_GPR_+(reg-14)*4(sp)
|.endif
-| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
+| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp)
|.endmacro
|
|.macro saveregs
@@ -316,19 +347,14 @@
|.type NODE, Node
|.type NARGS8, int
|.type TRACE, GCtrace
+|.type SBUF, SBuf
|
|//-----------------------------------------------------------------------
|
-|// These basic macros should really be part of DynASM.
-|.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro
-|.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro
-|.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro
-|.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro
-|.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro
-|
|// Trap for not-yet-implemented parts.
|.macro NYI; tw 4, sp, sp; .endmacro
|
+|.if FPU
|// int/FP conversions.
|.macro tonum_i, freg, reg
| xoris reg, reg, 0x8000
@@ -352,6 +378,7 @@
|.macro toint, reg, freg
| toint reg, freg, freg
|.endmacro
+|.endif
|
|//-----------------------------------------------------------------------
|
@@ -539,9 +566,19 @@ static void build_subroutines(BuildCtx *ctx)
| beq >2
|1:
| addic. TMP1, TMP1, -8
+ |.if FPU
| lfd f0, 0(RA)
+ |.else
+ | lwz CARG1, 0(RA)
+ | lwz CARG2, 4(RA)
+ |.endif
| addi RA, RA, 8
+ |.if FPU
| stfd f0, 0(BASE)
+ |.else
+ | stw CARG1, 0(BASE)
+ | stw CARG2, 4(BASE)
+ |.endif
| addi BASE, BASE, 8
| bney <1
|
@@ -619,23 +656,23 @@ static void build_subroutines(BuildCtx *ctx)
| .toc ld TOCREG, SAVE_TOC
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
| lp BASE, L->base
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| lwz DISPATCH, L->glref // Setup pointer to dispatch table.
| li ZERO, 0
- | stw TMP3, TMPD
+ | .FPU stw TMP3, TMPD
| li TMP1, LJ_TFALSE
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
| li TISNIL, LJ_TNIL
| li_vmstate INTERP
- | lfs TOBIT, TMPD
+ | .FPU lfs TOBIT, TMPD
| lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
| la RA, -8(BASE) // Results start at BASE-8.
- | stw TMP3, TMPD
+ | .FPU stw TMP3, TMPD
| addi DISPATCH, DISPATCH, GG_G2DISP
| stw TMP1, 0(RA) // Prepend false to error message.
| li RD, 16 // 2 results: false + error message.
| st_vmstate
- | lfs TONUM, TMPD
+ | .FPU lfs TONUM, TMPD
| b ->vm_returnc
|
|//-----------------------------------------------------------------------
@@ -684,33 +721,34 @@ static void build_subroutines(BuildCtx *ctx)
| stw CARG3, SAVE_NRES
| cmplwi TMP1, 0
| stw CARG3, SAVE_ERRF
- | stp TMP0, L->cframe
| stp CARG3, SAVE_CFRAME
| stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
+ | stp TMP0, L->cframe
| beq >3
|
| // Resume after yield (like a return).
+ | stw L, DISPATCH_GL(cur_L)(DISPATCH)
| mr RA, BASE
| lp BASE, L->base
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
| lp TMP1, L->top
| lwz PC, FRAME_PC(BASE)
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| stb CARG3, L->status
- | stw TMP3, TMPD
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- | lfs TOBIT, TMPD
+ | .FPU stw TMP3, TMPD
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+ | .FPU lfs TOBIT, TMPD
| sub RD, TMP1, BASE
- | stw TMP3, TMPD
- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
+ | .FPU stw TMP3, TMPD
+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
| addi RD, RD, 8
- | stw TMP0, TONUM_HI
+ | .FPU stw TMP0, TONUM_HI
| li_vmstate INTERP
| li ZERO, 0
| st_vmstate
| andix. TMP0, PC, FRAME_TYPE
| mr MULTRES, RD
- | lfs TONUM, TMPD
+ | .FPU lfs TONUM, TMPD
| li TISNIL, LJ_TNIL
| beq ->BC_RET_Z
| b ->vm_return
@@ -729,33 +767,34 @@ static void build_subroutines(BuildCtx *ctx)
|
|1: // Entry point for vm_pcall above (PC = ftype).
| lp TMP1, L:CARG1->cframe
- | stw CARG3, SAVE_NRES
| mr L, CARG1
- | stw CARG1, SAVE_L
- | mr BASE, CARG2
- | stp sp, L->cframe // Add our C frame to cframe chain.
+ | stw CARG3, SAVE_NRES
| lwz DISPATCH, L->glref // Setup pointer to dispatch table.
+ | stw CARG1, SAVE_L
+ | mr BASE, CARG2
+ | addi DISPATCH, DISPATCH, GG_G2DISP
| stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
| stp TMP1, SAVE_CFRAME
- | addi DISPATCH, DISPATCH, GG_G2DISP
+ | stp sp, L->cframe // Add our C frame to cframe chain.
|
|3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
+ | stw L, DISPATCH_GL(cur_L)(DISPATCH)
| lp TMP2, L->base // TMP2 = old base (used in vmeta_call).
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
| lp TMP1, L->top
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| add PC, PC, BASE
- | stw TMP3, TMPD
+ | .FPU stw TMP3, TMPD
| li ZERO, 0
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- | lfs TOBIT, TMPD
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+ | .FPU lfs TOBIT, TMPD
| sub PC, PC, TMP2 // PC = frame delta + frame type
- | stw TMP3, TMPD
- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
+ | .FPU stw TMP3, TMPD
+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
| sub NARGS8:RC, TMP1, BASE
- | stw TMP0, TONUM_HI
+ | .FPU stw TMP0, TONUM_HI
| li_vmstate INTERP
- | lfs TONUM, TMPD
+ | .FPU lfs TONUM, TMPD
| li TISNIL, LJ_TNIL
| st_vmstate
|
@@ -776,15 +815,18 @@ static void build_subroutines(BuildCtx *ctx)
| lwz TMP0, L:CARG1->stack
| stw CARG1, SAVE_L
| lp TMP1, L->top
+ | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
| stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
| sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
| lp TMP1, L->cframe
- | stp sp, L->cframe // Add our C frame to cframe chain.
+ | addi DISPATCH, DISPATCH, GG_G2DISP
| .toc lp CARG4, 0(CARG4)
| li TMP2, 0
| stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
| stw TMP2, SAVE_ERRF // No error function.
| stp TMP1, SAVE_CFRAME
+ | stp sp, L->cframe // Add our C frame to cframe chain.
+ | stw L, DISPATCH_GL(cur_L)(DISPATCH)
| mtctr CARG4
| bctrl // (lua_State *L, lua_CFunction func, void *ud)
|.if PPE
@@ -793,9 +835,7 @@ static void build_subroutines(BuildCtx *ctx)
|.else
| mr. BASE, CRET1
|.endif
- | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
- | li PC, FRAME_CP
- | addi DISPATCH, DISPATCH, GG_G2DISP
+ | li PC, FRAME_CP
| bne <3 // Else continue with the call.
| b ->vm_leave_cp // No base? Just remove C frame.
|
@@ -842,15 +882,30 @@ static void build_subroutines(BuildCtx *ctx)
| lwz INS, -4(PC)
| subi CARG2, RB, 16
| decode_RB8 SAVE0, INS
+ |.if FPU
| lfd f0, 0(RA)
+ |.else
+ | lwz TMP2, 0(RA)
+ | lwz TMP3, 4(RA)
+ |.endif
| add TMP1, BASE, SAVE0
| stp BASE, L->base
| cmplw TMP1, CARG2
| sub CARG3, CARG2, TMP1
| decode_RA8 RA, INS
+ |.if FPU
| stfd f0, 0(CARG2)
+ |.else
+ | stw TMP2, 0(CARG2)
+ | stw TMP3, 4(CARG2)
+ |.endif
| bney ->BC_CAT_Z
+ |.if FPU
| stfdx f0, BASE, RA
+ |.else
+ | stwux TMP2, RA, BASE
+ | stw TMP3, 4(RA)
+ |.endif
| b ->cont_nop
|
|//-- Table indexing metamethods -----------------------------------------
@@ -903,9 +958,19 @@ static void build_subroutines(BuildCtx *ctx)
| // Returns TValue * (finished) or NULL (metamethod).
| cmplwi CRET1, 0
| beq >3
+ |.if FPU
| lfd f0, 0(CRET1)
+ |.else
+ | lwz TMP0, 0(CRET1)
+ | lwz TMP1, 4(CRET1)
+ |.endif
| ins_next1
+ |.if FPU
| stfdx f0, BASE, RA
+ |.else
+ | stwux TMP0, RA, BASE
+ | stw TMP1, 4(RA)
+ |.endif
| ins_next2
|
|3: // Call __index metamethod.
@@ -918,6 +983,22 @@ static void build_subroutines(BuildCtx *ctx)
| li NARGS8:RC, 16 // 2 args for func(t, k).
| b ->vm_call_dispatch_f
|
+ |->vmeta_tgetr:
+ | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
+ | // Returns cTValue * or NULL.
+ | cmplwi CRET1, 0
+ | beq >1
+ |.if FPU
+ | lfd f14, 0(CRET1)
+ |.else
+ | lwz SAVE0, 0(CRET1)
+ | lwz SAVE1, 4(CRET1)
+ |.endif
+ | b ->BC_TGETR_Z
+ |1:
+ | stwx TISNIL, BASE, RA
+ | b ->cont_nop
+ |
|//-----------------------------------------------------------------------
|
|->vmeta_tsets1:
@@ -967,11 +1048,21 @@ static void build_subroutines(BuildCtx *ctx)
| bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
| // Returns TValue * (finished) or NULL (metamethod).
| cmplwi CRET1, 0
+ |.if FPU
| lfdx f0, BASE, RA
+ |.else
+ | lwzux TMP2, RA, BASE
+ | lwz TMP3, 4(RA)
+ |.endif
| beq >3
| // NOBARRIER: lj_meta_tset ensures the table is not black.
| ins_next1
+ |.if FPU
| stfd f0, 0(CRET1)
+ |.else
+ | stw TMP2, 0(CRET1)
+ | stw TMP3, 4(CRET1)
+ |.endif
| ins_next2
|
|3: // Call __newindex metamethod.
@@ -982,9 +1073,28 @@ static void build_subroutines(BuildCtx *ctx)
| add PC, TMP1, BASE
| lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
| li NARGS8:RC, 24 // 3 args for func(t, k, v)
+ |.if FPU
| stfd f0, 16(BASE) // Copy value to third argument.
+ |.else
+ | stw TMP2, 16(BASE)
+ | stw TMP3, 20(BASE)
+ |.endif
| b ->vm_call_dispatch_f
|
+ |->vmeta_tsetr:
+ | stp BASE, L->base
+ | mr CARG1, L
+ | stw PC, SAVE_PC
+ | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
+ | // Returns TValue *.
+ |.if FPU
+ | stfd f14, 0(CRET1)
+ |.else
+ | stw SAVE0, 0(CRET1)
+ | stw SAVE1, 4(CRET1)
+ |.endif
+ | b ->cont_nop
+ |
|//-- Comparison metamethods ---------------------------------------------
|
|->vmeta_comp:
@@ -1021,9 +1131,19 @@ static void build_subroutines(BuildCtx *ctx)
|
|->cont_ra: // RA = resultptr
| lwz INS, -4(PC)
+ |.if FPU
| lfd f0, 0(RA)
+ |.else
+ | lwz CARG1, 0(RA)
+ | lwz CARG2, 4(RA)
+ |.endif
| decode_RA8 TMP1, INS
+ |.if FPU
| stfdx f0, BASE, TMP1
+ |.else
+ | stwux CARG1, TMP1, BASE
+ | stw CARG2, 4(TMP1)
+ |.endif
| b ->cont_nop
|
|->cont_condt: // RA = resultptr
@@ -1063,6 +1183,16 @@ static void build_subroutines(BuildCtx *ctx)
| b <3
|.endif
|
+ |->vmeta_istype:
+ | subi PC, PC, 4
+ | stp BASE, L->base
+ | srwi CARG2, RA, 3
+ | mr CARG1, L
+ | srwi CARG3, RD, 3
+ | stw PC, SAVE_PC
+ | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
+ | b ->cont_nop
+ |
|//-- Arithmetic metamethods ---------------------------------------------
|
|->vmeta_arith_nv:
@@ -1219,22 +1349,32 @@ static void build_subroutines(BuildCtx *ctx)
|.macro .ffunc_n, name
|->ff_ .. name:
| cmplwi NARGS8:RC, 8
- | lwz CARG3, 0(BASE)
+ | lwz CARG1, 0(BASE)
+ |.if FPU
| lfd FARG1, 0(BASE)
+ |.else
+ | lwz CARG2, 4(BASE)
+ |.endif
| blt ->fff_fallback
- | checknum CARG3; bge ->fff_fallback
+ | checknum CARG1; bge ->fff_fallback
|.endmacro
|
|.macro .ffunc_nn, name
|->ff_ .. name:
| cmplwi NARGS8:RC, 16
- | lwz CARG3, 0(BASE)
+ | lwz CARG1, 0(BASE)
+ |.if FPU
| lfd FARG1, 0(BASE)
- | lwz CARG4, 8(BASE)
+ | lwz CARG3, 8(BASE)
| lfd FARG2, 8(BASE)
+ |.else
+ | lwz CARG2, 4(BASE)
+ | lwz CARG3, 8(BASE)
+ | lwz CARG4, 12(BASE)
+ |.endif
| blt ->fff_fallback
+ | checknum CARG1; bge ->fff_fallback
| checknum CARG3; bge ->fff_fallback
- | checknum CARG4; bge ->fff_fallback
|.endmacro
|
|// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
@@ -1255,14 +1395,21 @@ static void build_subroutines(BuildCtx *ctx)
| bge cr1, ->fff_fallback
| stw CARG3, 0(RA)
| addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
+ | addi TMP1, BASE, 8
+ | add TMP2, RA, NARGS8:RC
| stw CARG1, 4(RA)
| beq ->fff_res // Done if exactly 1 argument.
- | li TMP1, 8
- | subi RC, RC, 8
|1:
- | cmplw TMP1, RC
- | lfdx f0, BASE, TMP1
- | stfdx f0, RA, TMP1
+ | cmplw TMP1, TMP2
+ |.if FPU
+ | lfd f0, 0(TMP1)
+ | stfd f0, 0(TMP1)
+ |.else
+ | lwz CARG1, 0(TMP1)
+ | lwz CARG2, 4(TMP1)
+ | stw CARG1, -8(TMP1)
+ | stw CARG2, -4(TMP1)
+ |.endif
| addi TMP1, TMP1, 8
| bney <1
| b ->fff_res
@@ -1277,8 +1424,14 @@ static void build_subroutines(BuildCtx *ctx)
| orc TMP1, TMP2, TMP0
| addi TMP1, TMP1, ~LJ_TISNUM+1
| slwi TMP1, TMP1, 3
+ |.if FPU
| la TMP2, CFUNC:RB->upvalue
| lfdx FARG1, TMP2, TMP1
+ |.else
+ | add TMP1, CFUNC:RB, TMP1
+ | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi
+ | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo
+ |.endif
| b ->fff_resn
|
|//-- Base library: getters and setters ---------------------------------
@@ -1294,9 +1447,9 @@ static void build_subroutines(BuildCtx *ctx)
| beq ->fff_restv
| lwz TMP0, TAB:CARG1->hmask
| li CARG3, LJ_TTAB // Use metatable as default result.
- | lwz TMP1, STR:RC->hash
+ | lwz TMP1, STR:RC->sid
| lwz NODE:TMP2, TAB:CARG1->node
- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
+ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
| slwi TMP0, TMP1, 5
| slwi TMP1, TMP1, 3
| sub TMP1, TMP0, TMP1
@@ -1356,7 +1509,12 @@ static void build_subroutines(BuildCtx *ctx)
| mr CARG1, L
| bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
| // Returns cTValue *.
+ |.if FPU
| lfd FARG1, 0(CRET1)
+ |.else
+ | lwz CARG2, 4(CRET1)
+ | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1.
+ |.endif
| b ->fff_resn
|
|//-- Base library: conversions ------------------------------------------
@@ -1365,7 +1523,11 @@ static void build_subroutines(BuildCtx *ctx)
| // Only handles the number case inline (without a base argument).
| cmplwi NARGS8:RC, 8
| lwz CARG1, 0(BASE)
+ |.if FPU
| lfd FARG1, 0(BASE)
+ |.else
+ | lwz CARG2, 4(BASE)
+ |.endif
| bne ->fff_fallback // Exactly one argument.
| checknum CARG1; bgt ->fff_fallback
| b ->fff_resn
@@ -1387,9 +1549,9 @@ static void build_subroutines(BuildCtx *ctx)
| mr CARG1, L
| mr CARG2, BASE
|.if DUALNUM
- | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o)
+ | bl extern lj_strfmt_number // (lua_State *L, cTValue *o)
|.else
- | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np)
+ | bl extern lj_strfmt_num // (lua_State *L, lua_Number *np)
|.endif
| // Returns GCstr *.
| li CARG3, LJ_TSTR
@@ -1397,32 +1559,24 @@ static void build_subroutines(BuildCtx *ctx)
|
|//-- Base library: iterators -------------------------------------------
|
- |.ffunc next
- | cmplwi NARGS8:RC, 8
- | lwz CARG1, 0(BASE)
- | lwz TAB:CARG2, 4(BASE)
- | blt ->fff_fallback
+ |.ffunc_1 next
| stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil.
- | checktab CARG1
+ | checktab CARG3
| lwz PC, FRAME_PC(BASE)
| bne ->fff_fallback
- | stp BASE, L->base // Add frame since C call can throw.
- | mr CARG1, L
- | stp BASE, L->top // Dummy frame length is ok.
- | la CARG3, 8(BASE)
- | stw PC, SAVE_PC
- | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
- | // Returns 0 at end of traversal.
- | cmplwi CRET1, 0
- | li CARG3, LJ_TNIL
- | beq ->fff_restv // End of traversal: return nil.
- | lfd f0, 8(BASE) // Copy key and value to results.
+ | la CARG2, 8(BASE)
+ | la CARG3, -8(BASE)
+ | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
+ | // Returns 1=found, 0=end, -1=error.
+ | cmpwi CRET1, 0
| la RA, -8(BASE)
- | lfd f1, 16(BASE)
- | stfd f0, 0(RA)
| li RD, (2+1)*8
- | stfd f1, 8(RA)
- | b ->fff_res
+ | bgt ->fff_res // Found key/value.
+ | li CARG3, LJ_TNIL
+ | beq ->fff_restv // End of traversal: return nil.
+ | lwz CFUNC:RB, FRAME_FUNC(BASE)
+ | li NARGS8:RC, 2*8
+ | b ->fff_fallback // Invalid key.
|
|.ffunc_1 pairs
| checktab CARG3
@@ -1430,17 +1584,32 @@ static void build_subroutines(BuildCtx *ctx)
| bne ->fff_fallback
#if LJ_52
| lwz TAB:TMP2, TAB:CARG1->metatable
+ |.if FPU
| lfd f0, CFUNC:RB->upvalue[0]
+ |.else
+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
+ |.endif
| cmplwi TAB:TMP2, 0
| la RA, -8(BASE)
| bne ->fff_fallback
#else
+ |.if FPU
| lfd f0, CFUNC:RB->upvalue[0]
+ |.else
+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
+ |.endif
| la RA, -8(BASE)
#endif
| stw TISNIL, 8(BASE)
| li RD, (3+1)*8
+ |.if FPU
| stfd f0, 0(RA)
+ |.else
+ | stw TMP0, 0(RA)
+ | stw TMP1, 4(RA)
+ |.endif
| b ->fff_res
|
|.ffunc ipairs_aux
@@ -1486,14 +1655,24 @@ static void build_subroutines(BuildCtx *ctx)
| stfd FARG2, 0(RA)
|.endif
| ble >2 // Not in array part?
+ |.if FPU
| lwzx TMP2, TMP1, TMP3
| lfdx f0, TMP1, TMP3
+ |.else
+ | lwzux TMP2, TMP1, TMP3
+ | lwz TMP3, 4(TMP1)
+ |.endif
|1:
| checknil TMP2
| li RD, (0+1)*8
| beq ->fff_res // End of iteration, return 0 results.
| li RD, (2+1)*8
+ |.if FPU
| stfd f0, 8(RA)
+ |.else
+ | stw TMP2, 8(RA)
+ | stw TMP3, 12(RA)
+ |.endif
| b ->fff_res
|2: // Check for empty hash part first. Otherwise call C function.
| lwz TMP0, TAB:CARG1->hmask
@@ -1507,7 +1686,11 @@ static void build_subroutines(BuildCtx *ctx)
| li RD, (0+1)*8
| beq ->fff_res
| lwz TMP2, 0(CRET1)
+ |.if FPU
| lfd f0, 0(CRET1)
+ |.else
+ | lwz TMP3, 4(CRET1)
+ |.endif
| b <1
|
|.ffunc_1 ipairs
@@ -1516,12 +1699,22 @@ static void build_subroutines(BuildCtx *ctx)
| bne ->fff_fallback
#if LJ_52
| lwz TAB:TMP2, TAB:CARG1->metatable
+ |.if FPU
| lfd f0, CFUNC:RB->upvalue[0]
+ |.else
+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
+ |.endif
| cmplwi TAB:TMP2, 0
| la RA, -8(BASE)
| bne ->fff_fallback
#else
+ |.if FPU
| lfd f0, CFUNC:RB->upvalue[0]
+ |.else
+ | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi
+ | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo
+ |.endif
| la RA, -8(BASE)
#endif
|.if DUALNUM
@@ -1531,7 +1724,12 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
| stw ZERO, 12(BASE)
| li RD, (3+1)*8
+ |.if FPU
| stfd f0, 0(RA)
+ |.else
+ | stw TMP0, 0(RA)
+ | stw TMP1, 4(RA)
+ |.endif
| b ->fff_res
|
|//-- Base library: catch errors ----------------------------------------
@@ -1550,19 +1748,32 @@ static void build_subroutines(BuildCtx *ctx)
|
|.ffunc xpcall
| cmplwi NARGS8:RC, 16
- | lwz CARG4, 8(BASE)
+ | lwz CARG3, 8(BASE)
+ |.if FPU
| lfd FARG2, 8(BASE)
| lfd FARG1, 0(BASE)
+ |.else
+ | lwz CARG1, 0(BASE)
+ | lwz CARG2, 4(BASE)
+ | lwz CARG4, 12(BASE)
+ |.endif
| blt ->fff_fallback
| lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH)
| mr TMP2, BASE
- | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function.
+ | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function.
| la BASE, 16(BASE)
| // Remember active hook before pcall.
| rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31
+ |.if FPU
| stfd FARG2, 0(TMP2) // Swap function and traceback.
- | subi NARGS8:RC, NARGS8:RC, 16
| stfd FARG1, 8(TMP2)
+ |.else
+ | stw CARG3, 0(TMP2)
+ | stw CARG4, 4(TMP2)
+ | stw CARG1, 8(TMP2)
+ | stw CARG2, 12(TMP2)
+ |.endif
+ | subi NARGS8:RC, NARGS8:RC, 16
| addi PC, TMP1, 16+FRAME_PCALL
| b ->vm_call_dispatch
|
@@ -1605,9 +1816,21 @@ static void build_subroutines(BuildCtx *ctx)
| stp BASE, L->top
|2: // Move args to coroutine.
| cmpw TMP1, NARGS8:RC
+ |.if FPU
| lfdx f0, BASE, TMP1
+ |.else
+ | add CARG3, BASE, TMP1
+ | lwz TMP2, 0(CARG3)
+ | lwz TMP3, 4(CARG3)
+ |.endif
| beq >3
+ |.if FPU
| stfdx f0, CARG2, TMP1
+ |.else
+ | add CARG3, CARG2, TMP1
+ | stw TMP2, 0(CARG3)
+ | stw TMP3, 4(CARG3)
+ |.endif
| addi TMP1, TMP1, 8
| b <2
|3:
@@ -1622,6 +1845,7 @@ static void build_subroutines(BuildCtx *ctx)
| lp TMP3, L:SAVE0->top
| li_vmstate INTERP
| lp BASE, L->base
+ | stw L, DISPATCH_GL(cur_L)(DISPATCH)
| st_vmstate
| bgt >8
| sub RD, TMP3, TMP2
@@ -1637,8 +1861,17 @@ static void build_subroutines(BuildCtx *ctx)
| stp TMP2, L:SAVE0->top // Clear coroutine stack.
|5: // Move results from coroutine.
| cmplw TMP1, TMP3
+ |.if FPU
| lfdx f0, TMP2, TMP1
| stfdx f0, BASE, TMP1
+ |.else
+ | add CARG3, TMP2, TMP1
+ | lwz CARG1, 0(CARG3)
+ | lwz CARG2, 4(CARG3)
+ | add CARG3, BASE, TMP1
+ | stw CARG1, 0(CARG3)
+ | stw CARG2, 4(CARG3)
+ |.endif
| addi TMP1, TMP1, 8
| bne <5
|6:
@@ -1663,12 +1896,22 @@ static void build_subroutines(BuildCtx *ctx)
| andix. TMP0, PC, FRAME_TYPE
| la TMP3, -8(TMP3)
| li TMP1, LJ_TFALSE
+ |.if FPU
| lfd f0, 0(TMP3)
+ |.else
+ | lwz CARG1, 0(TMP3)
+ | lwz CARG2, 4(TMP3)
+ |.endif
| stp TMP3, L:SAVE0->top // Remove error from coroutine stack.
| li RD, (2+1)*8
| stw TMP1, -8(BASE) // Prepend false to results.
| la RA, -8(BASE)
+ |.if FPU
| stfd f0, 0(BASE) // Copy error message.
+ |.else
+ | stw CARG1, 0(BASE) // Copy error message.
+ | stw CARG2, 4(BASE)
+ |.endif
| b <7
|.else
| mr CARG1, L
@@ -1847,7 +2090,12 @@ static void build_subroutines(BuildCtx *ctx)
| lus CARG1, 0x8000 // -(2^31).
| beqy ->fff_resi
|5:
+ |.if FPU
| lfd FARG1, 0(BASE)
+ |.else
+ | lwz CARG1, 0(BASE)
+ | lwz CARG2, 4(BASE)
+ |.endif
| blex func
| b ->fff_resn
|.endmacro
@@ -1871,10 +2119,14 @@ static void build_subroutines(BuildCtx *ctx)
|
|.ffunc math_log
| cmplwi NARGS8:RC, 8
- | lwz CARG3, 0(BASE)
- | lfd FARG1, 0(BASE)
+ | lwz CARG1, 0(BASE)
| bne ->fff_fallback // Need exactly 1 argument.
- | checknum CARG3; bge ->fff_fallback
+ | checknum CARG1; bge ->fff_fallback
+ |.if FPU
+ | lfd FARG1, 0(BASE)
+ |.else
+ | lwz CARG2, 4(BASE)
+ |.endif
| blex log
| b ->fff_resn
|
@@ -1893,26 +2145,27 @@ static void build_subroutines(BuildCtx *ctx)
| math_extern2 atan2
| math_extern2 fmod
|
- |->ff_math_deg:
- |.ffunc_n math_rad
- | lfd FARG2, CFUNC:RB->upvalue[0]
- | fmul FARG1, FARG1, FARG2
- | b ->fff_resn
- |
|.if DUALNUM
|.ffunc math_ldexp
| cmplwi NARGS8:RC, 16
- | lwz CARG3, 0(BASE)
+ | lwz TMP0, 0(BASE)
+ |.if FPU
| lfd FARG1, 0(BASE)
- | lwz CARG4, 8(BASE)
+ |.else
+ | lwz CARG1, 0(BASE)
+ | lwz CARG2, 4(BASE)
+ |.endif
+ | lwz TMP1, 8(BASE)
|.if GPR64
| lwz CARG2, 12(BASE)
- |.else
+ |.elif FPU
| lwz CARG1, 12(BASE)
+ |.else
+ | lwz CARG3, 12(BASE)
|.endif
| blt ->fff_fallback
- | checknum CARG3; bge ->fff_fallback
- | checknum CARG4; bne ->fff_fallback
+ | checknum TMP0; bge ->fff_fallback
+ | checknum TMP1; bne ->fff_fallback
|.else
|.ffunc_nn math_ldexp
|.if GPR64
@@ -1927,8 +2180,10 @@ static void build_subroutines(BuildCtx *ctx)
|.ffunc_n math_frexp
|.if GPR64
| la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
- |.else
+ |.elif FPU
| la CARG1, DISPATCH_GL(tmptv)(DISPATCH)
+ |.else
+ | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
|.endif
| lwz PC, FRAME_PC(BASE)
| blex frexp
@@ -1937,7 +2192,12 @@ static void build_subroutines(BuildCtx *ctx)
|.if not DUALNUM
| tonum_i FARG2, TMP1
|.endif
+ |.if FPU
| stfd FARG1, 0(RA)
+ |.else
+ | stw CRET1, 0(RA)
+ | stw CRET2, 4(RA)
+ |.endif
| li RD, (2+1)*8
|.if DUALNUM
| stw TISNUM, 8(RA)
@@ -1950,13 +2210,20 @@ static void build_subroutines(BuildCtx *ctx)
|.ffunc_n math_modf
|.if GPR64
| la CARG2, -8(BASE)
- |.else
+ |.elif FPU
| la CARG1, -8(BASE)
+ |.else
+ | la CARG3, -8(BASE)
|.endif
| lwz PC, FRAME_PC(BASE)
| blex modf
| la RA, -8(BASE)
+ |.if FPU
| stfd FARG1, 0(BASE)
+ |.else
+ | stw CRET1, 0(BASE)
+ | stw CRET2, 4(BASE)
+ |.endif
| li RD, (2+1)*8
| b ->fff_res
|
@@ -1964,13 +2231,13 @@ static void build_subroutines(BuildCtx *ctx)
|.if DUALNUM
| .ffunc_1 name
| checknum CARG3
- | addi TMP1, BASE, 8
- | add TMP2, BASE, NARGS8:RC
+ | addi SAVE0, BASE, 8
+ | add SAVE1, BASE, NARGS8:RC
| bne >4
|1: // Handle integers.
- | lwz CARG4, 0(TMP1)
- | cmplw cr1, TMP1, TMP2
- | lwz CARG2, 4(TMP1)
+ | lwz CARG4, 0(SAVE0)
+ | cmplw cr1, SAVE0, SAVE1
+ | lwz CARG2, 4(SAVE0)
| bge cr1, ->fff_resi
| checknum CARG4
| xoris TMP0, CARG1, 0x8000
@@ -1987,36 +2254,76 @@ static void build_subroutines(BuildCtx *ctx)
|.if GPR64
| rldicl CARG1, CARG1, 0, 32
|.endif
- | addi TMP1, TMP1, 8
+ | addi SAVE0, SAVE0, 8
| b <1
|3:
| bge ->fff_fallback
| // Convert intermediate result to number and continue below.
+ |.if FPU
| tonum_i FARG1, CARG1
- | lfd FARG2, 0(TMP1)
+ | lfd FARG2, 0(SAVE0)
+ |.else
+ | mr CARG2, CARG1
+ | bl ->vm_sfi2d_1
+ | lwz CARG3, 0(SAVE0)
+ | lwz CARG4, 4(SAVE0)
+ |.endif
| b >6
|4:
+ |.if FPU
| lfd FARG1, 0(BASE)
+ |.else
+ | lwz CARG1, 0(BASE)
+ | lwz CARG2, 4(BASE)
+ |.endif
| bge ->fff_fallback
|5: // Handle numbers.
- | lwz CARG4, 0(TMP1)
- | cmplw cr1, TMP1, TMP2
- | lfd FARG2, 0(TMP1)
+ | lwz CARG3, 0(SAVE0)
+ | cmplw cr1, SAVE0, SAVE1
+ |.if FPU
+ | lfd FARG2, 0(SAVE0)
+ |.else
+ | lwz CARG4, 4(SAVE0)
+ |.endif
| bge cr1, ->fff_resn
- | checknum CARG4; bge >7
+ | checknum CARG3; bge >7
|6:
- | fsub f0, FARG1, FARG2
- | addi TMP1, TMP1, 8
+ | addi SAVE0, SAVE0, 8
+ |.if FPU
|.if ismax
+ | fsub f0, FARG1, FARG2
+ |.else
+ | fsub f0, FARG2, FARG1
+ |.endif
| fsel FARG1, f0, FARG1, FARG2
|.else
- | fsel FARG1, f0, FARG2, FARG1
+ | stw CARG1, SFSAVE_1
+ | stw CARG2, SFSAVE_2
+ | stw CARG3, SFSAVE_3
+ | stw CARG4, SFSAVE_4
+ | blex __ledf2
+ | cmpwi CRET1, 0
+ |.if ismax
+ | blt >8
+ |.else
+ | bge >8
+ |.endif
+ | lwz CARG1, SFSAVE_1
+ | lwz CARG2, SFSAVE_2
+ | b <5
+ |8:
+ | lwz CARG1, SFSAVE_3
+ | lwz CARG2, SFSAVE_4
|.endif
| b <5
|7: // Convert integer to number and continue above.
- | lwz CARG2, 4(TMP1)
+ | lwz CARG3, 4(SAVE0)
| bne ->fff_fallback
- | tonum_i FARG2, CARG2
+ |.if FPU
+ | tonum_i FARG2, CARG3
+ |.else
+ | bl ->vm_sfi2d_2
+ |.endif
| b <6
|.else
| .ffunc_n name
@@ -2028,13 +2335,13 @@ static void build_subroutines(BuildCtx *ctx)
| checknum CARG2
| bge cr1, ->fff_resn
| bge ->fff_fallback
- | fsub f0, FARG1, FARG2
- | addi TMP1, TMP1, 8
|.if ismax
- | fsel FARG1, f0, FARG1, FARG2
+ | fsub f0, FARG1, FARG2
|.else
- | fsel FARG1, f0, FARG2, FARG1
+ | fsub f0, FARG2, FARG1
|.endif
+ | addi TMP1, TMP1, 8
+ | fsel FARG1, f0, FARG1, FARG2
| b <1
|.endif
|.endmacro
@@ -2044,11 +2351,6 @@ static void build_subroutines(BuildCtx *ctx)
|
|//-- String library -----------------------------------------------------
|
- |.ffunc_1 string_len
- | checkstr CARG3; bne ->fff_fallback
- | lwz CRET1, STR:CARG1->len
- | b ->fff_resi
- |
|.ffunc string_byte // Only handle the 1-arg case here.
| cmplwi NARGS8:RC, 8
| lwz CARG3, 0(BASE)
@@ -2103,6 +2405,7 @@ static void build_subroutines(BuildCtx *ctx)
| stp BASE, L->base
| stw PC, SAVE_PC
| bl extern lj_str_new // (lua_State *L, char *str, size_t l)
+ |->fff_resstr:
| // Returns GCstr *.
| lp BASE, L->base
| li CARG3, LJ_TSTR
@@ -2180,114 +2483,29 @@ static void build_subroutines(BuildCtx *ctx)
| addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0)
| b <3
|
- |.ffunc string_rep // Only handle the 1-char case inline.
- | ffgccheck
- | cmplwi NARGS8:RC, 16
- | lwz TMP0, 0(BASE)
- | lwz STR:CARG1, 4(BASE)
- | lwz CARG4, 8(BASE)
- |.if DUALNUM
- | lwz CARG3, 12(BASE)
- |.else
- | lfd FARG2, 8(BASE)
- |.endif
- | bne ->fff_fallback // Exactly 2 arguments.
- | checkstr TMP0; bne ->fff_fallback
- |.if DUALNUM
- | checknum CARG4; bne ->fff_fallback
- |.else
- | checknum CARG4; bge ->fff_fallback
- | toint CARG3, FARG2
- |.endif
- | lwz TMP0, STR:CARG1->len
- | cmpwi CARG3, 0
- | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
- | ble >2 // Count <= 0? (or non-int)
- | cmplwi TMP0, 1
- | subi TMP2, CARG3, 1
- | blt >2 // Zero length string?
- | cmplw cr1, TMP1, CARG3
- | bne ->fff_fallback // Fallback for > 1-char strings.
- | lbz TMP0, STR:CARG1[1]
- | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
- | blt cr1, ->fff_fallback
- |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
- | cmplwi TMP2, 0
- | stbx TMP0, CARG2, TMP2
- | subi TMP2, TMP2, 1
- | bne <1
- | b ->fff_newstr
- |2: // Return empty string.
- | la STR:CARG1, DISPATCH_GL(strempty)(DISPATCH)
- | li CARG3, LJ_TSTR
- | b ->fff_restv
- |
- |.ffunc string_reverse
+ |.macro ffstring_op, name
+ | .ffunc string_ .. name
| ffgccheck
| cmplwi NARGS8:RC, 8
| lwz CARG3, 0(BASE)
- | lwz STR:CARG1, 4(BASE)
+ | lwz STR:CARG2, 4(BASE)
| blt ->fff_fallback
| checkstr CARG3
- | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
+ | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH)
| bne ->fff_fallback
- | lwz CARG3, STR:CARG1->len
- | la CARG1, #STR(STR:CARG1)
- | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
- | li TMP2, 0
- | cmplw TMP1, CARG3
- | subi TMP3, CARG3, 1
- | blt ->fff_fallback
- |1: // Reverse string copy.
- | cmpwi TMP3, 0
- | lbzx TMP1, CARG1, TMP2
- | blty ->fff_newstr
- | stbx TMP1, CARG2, TMP3
- | subi TMP3, TMP3, 1
- | addi TMP2, TMP2, 1
- | b <1
- |
- |.macro ffstring_case, name, lo
- | .ffunc name
- | ffgccheck
- | cmplwi NARGS8:RC, 8
- | lwz CARG3, 0(BASE)
- | lwz STR:CARG1, 4(BASE)
- | blt ->fff_fallback
- | checkstr CARG3
- | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
- | bne ->fff_fallback
- | lwz CARG3, STR:CARG1->len
- | la CARG1, #STR(STR:CARG1)
- | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
- | cmplw TMP1, CARG3
- | li TMP2, 0
- | blt ->fff_fallback
- |1: // ASCII case conversion.
- | cmplw TMP2, CARG3
- | lbzx TMP1, CARG1, TMP2
- | bgey ->fff_newstr
- | subi TMP0, TMP1, lo
- | xori TMP3, TMP1, 0x20
- | addic TMP0, TMP0, -26
- | subfe TMP3, TMP3, TMP3
- | rlwinm TMP3, TMP3, 0, 26, 26 // x &= 0x20.
- | xor TMP1, TMP1, TMP3
- | stbx TMP1, CARG2, TMP2
- | addi TMP2, TMP2, 1
- | b <1
+ | lwz TMP0, SBUF:CARG1->b
+ | stw L, SBUF:CARG1->L
+ | stp BASE, L->base
+ | stw PC, SAVE_PC
+ | stw TMP0, SBUF:CARG1->w
+ | bl extern lj_buf_putstr_ .. name
+ | bl extern lj_buf_tostr
+ | b ->fff_resstr
|.endmacro
|
- |ffstring_case string_lower, 65
- |ffstring_case string_upper, 97
- |
- |//-- Table library ------------------------------------------------------
- |
- |.ffunc_1 table_getn
- | checktab CARG3; bne ->fff_fallback
- | bl extern lj_tab_len // (GCtab *t)
- | // Returns uint32_t (but less than 2^31).
- | b ->fff_resi
+ |ffstring_op reverse
+ |ffstring_op lower
+ |ffstring_op upper
|
|//-- Bit library --------------------------------------------------------
|
@@ -2305,28 +2523,37 @@ static void build_subroutines(BuildCtx *ctx)
|
|.macro .ffunc_bit_op, name, ins
| .ffunc_bit name
- | addi TMP1, BASE, 8
- | add TMP2, BASE, NARGS8:RC
+ | addi SAVE0, BASE, 8
+ | add SAVE1, BASE, NARGS8:RC
|1:
- | lwz CARG4, 0(TMP1)
- | cmplw cr1, TMP1, TMP2
+ | lwz CARG4, 0(SAVE0)
+ | cmplw cr1, SAVE0, SAVE1
|.if DUALNUM
- | lwz CARG2, 4(TMP1)
+ | lwz CARG2, 4(SAVE0)
|.else
- | lfd FARG1, 0(TMP1)
+ | lfd FARG1, 0(SAVE0)
|.endif
| bgey cr1, ->fff_resi
| checknum CARG4
|.if DUALNUM
+ |.if FPU
| bnel ->fff_bitop_fb
|.else
+ | beq >3
+ | stw CARG1, SFSAVE_1
+ | bl ->fff_bitop_fb
+ | mr CARG2, CARG1
+ | lwz CARG1, SFSAVE_1
+ |3:
+ |.endif
+ |.else
| fadd FARG1, FARG1, TOBIT
| bge ->fff_fallback
| stfd FARG1, TMPD
| lwz CARG2, TMPD_LO
|.endif
| ins CARG1, CARG1, CARG2
- | addi TMP1, TMP1, 8
+ | addi SAVE0, SAVE0, 8
| b <1
|.endmacro
|
@@ -2348,7 +2575,14 @@ static void build_subroutines(BuildCtx *ctx)
|.macro .ffunc_bit_sh, name, ins, shmod
|.if DUALNUM
| .ffunc_2 bit_..name
+ |.if FPU
| checknum CARG3; bnel ->fff_tobit_fb
+ |.else
+ | checknum CARG3; beq >1
+ | bl ->fff_tobit_fb
+ | lwz CARG2, 12(BASE) // Conversion polluted CARG2.
+ |1:
+ |.endif
| // Note: no inline conversion from number for 2nd argument!
| checknum CARG4; bne ->fff_fallback
|.else
@@ -2385,27 +2619,77 @@ static void build_subroutines(BuildCtx *ctx)
|->fff_resn:
| lwz PC, FRAME_PC(BASE)
| la RA, -8(BASE)
+ |.if FPU
| stfd FARG1, -8(BASE)
+ |.else
+ | stw CARG1, -8(BASE)
+ | stw CARG2, -4(BASE)
+ |.endif
| b ->fff_res1
|
|// Fallback FP number to bit conversion.
|->fff_tobit_fb:
|.if DUALNUM
+ |.if FPU
| lfd FARG1, 0(BASE)
| bgt ->fff_fallback
| fadd FARG1, FARG1, TOBIT
| stfd FARG1, TMPD
| lwz CARG1, TMPD_LO
| blr
+ |.else
+ | bgt ->fff_fallback
+ | mr CARG2, CARG1
+ | mr CARG1, CARG3
+ |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2.
+ |->vm_tobit:
+ | slwi TMP2, CARG1, 1
+ | addis TMP2, TMP2, 0x0020
+ | cmpwi TMP2, 0
+ | bge >2
+ | li TMP1, 0x3e0
+ | srawi TMP2, TMP2, 21
+ | not TMP1, TMP1
+ | sub. TMP2, TMP1, TMP2
+ | cmpwi cr7, CARG1, 0
+ | blt >1
+ | slwi TMP1, CARG1, 11
+ | srwi TMP0, CARG2, 21
+ | oris TMP1, TMP1, 0x8000
+ | or TMP1, TMP1, TMP0
+ | srw CARG1, TMP1, TMP2
+ | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
+ | neg CARG1, CARG1
+ | blr
+ |1:
+ | addi TMP2, TMP2, 21
+ | srw TMP1, CARG2, TMP2
+ | slwi CARG2, CARG1, 12
+ | subfic TMP2, TMP2, 20
+ | slw TMP0, CARG2, TMP2
+ | or CARG1, TMP1, TMP0
+ | bclr 4, 28 // Return if cr7[lt] == 0, no hint.
+ | neg CARG1, CARG1
+ | blr
+ |2:
+ | li CARG1, 0
+ | blr
+ |.endif
|.endif
|->fff_bitop_fb:
|.if DUALNUM
- | lfd FARG1, 0(TMP1)
+ |.if FPU
+ | lfd FARG1, 0(SAVE0)
| bgt ->fff_fallback
| fadd FARG1, FARG1, TOBIT
| stfd FARG1, TMPD
| lwz CARG2, TMPD_LO
| blr
+ |.else
+ | bgt ->fff_fallback
+ | mr CARG1, CARG4
+ | b ->vm_tobit
+ |.endif
|.endif
|
|//-----------------------------------------------------------------------
@@ -2589,15 +2873,88 @@ static void build_subroutines(BuildCtx *ctx)
| mtctr CRET1
| bctr
|
+ |->cont_stitch: // Trace stitching.
+ |.if JIT
+ | // RA = resultptr, RB = meta base
+ | lwz INS, -4(PC)
+ | lwz TRACE:TMP2, -20(RB) // Save previous trace.
+ | addic. TMP1, MULTRES, -8
+ | decode_RA8 RC, INS // Call base.
+ | beq >2
+ |1: // Move results down.
+ |.if FPU
+ | lfd f0, 0(RA)
+ |.else
+ | lwz CARG1, 0(RA)
+ | lwz CARG2, 4(RA)
+ |.endif
+ | addic. TMP1, TMP1, -8
+ | addi RA, RA, 8
+ |.if FPU
+ | stfdx f0, BASE, RC
+ |.else
+ | add CARG3, BASE, RC
+ | stw CARG1, 0(CARG3)
+ | stw CARG2, 4(CARG3)
+ |.endif
+ | addi RC, RC, 8
+ | bne <1
+ |2:
+ | decode_RA8 RA, INS
+ | decode_RB8 RB, INS
+ | add RA, RA, RB
+ |3:
+ | cmplw RA, RC
+ | bgt >9 // More results wanted?
+ |
+ | lhz TMP3, TRACE:TMP2->traceno
+ | lhz RD, TRACE:TMP2->link
+ | cmpw RD, TMP3
+ | cmpwi cr1, RD, 0
+ | beq ->cont_nop // Blacklisted.
+ | slwi RD, RD, 3
+ | bne cr1, =>BC_JLOOP // Jump to stitched trace.
+ |
+ | // Stitch a new trace to the previous trace.
+ | stw TMP3, DISPATCH_J(exitno)(DISPATCH)
+ | stp L, DISPATCH_J(L)(DISPATCH)
+ | stp BASE, L->base
+ | addi CARG1, DISPATCH, GG_DISP2J
+ | mr CARG2, PC
+ | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
+ | lp BASE, L->base
+ | b ->cont_nop
+ |
+ |9:
+ | stwx TISNIL, BASE, RC
+ | addi RC, RC, 8
+ | b <3
+ |.endif
+ |
+ |->vm_profhook: // Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+ | mr CARG1, L
+ | stw MULTRES, SAVE_MULTRES
+ | mr CARG2, PC
+ | stp BASE, L->base
+ | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
+ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+ | lp BASE, L->base
+ | subi PC, PC, 4
+ | b ->cont_nop
+#endif
+ |
|//-----------------------------------------------------------------------
|//-- Trace exit handler -------------------------------------------------
|//-----------------------------------------------------------------------
|
|.macro savex_, a, b, c, d
+ |.if FPU
| stfd f..a, 16+a*8(sp)
| stfd f..b, 16+b*8(sp)
| stfd f..c, 16+c*8(sp)
| stfd f..d, 16+d*8(sp)
+ |.endif
|.endmacro
|
|->vm_exit_handler:
@@ -2623,16 +2980,16 @@ static void build_subroutines(BuildCtx *ctx)
| savex_ 20,21,22,23
| lhz CARG4, 2(CARG3) // Load trace number.
| savex_ 24,25,26,27
- | lwz L, DISPATCH_GL(jit_L)(DISPATCH)
+ | lwz L, DISPATCH_GL(cur_L)(DISPATCH)
| savex_ 28,29,30,31
| sub CARG3, TMP0, CARG3 // Compute exit number.
| lp BASE, DISPATCH_GL(jit_base)(DISPATCH)
| srwi CARG3, CARG3, 2
- | stw L, DISPATCH_J(L)(DISPATCH)
+ | stp L, DISPATCH_J(L)(DISPATCH)
| subi CARG3, CARG3, 2
- | stw TMP1, DISPATCH_GL(jit_L)(DISPATCH)
- | stw CARG4, DISPATCH_J(parent)(DISPATCH)
| stp BASE, L->base
+ | stw CARG4, DISPATCH_J(parent)(DISPATCH)
+ | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH)
| addi CARG1, DISPATCH, GG_DISP2J
| stw CARG3, DISPATCH_J(exitno)(DISPATCH)
| addi CARG2, sp, 16
@@ -2656,28 +3013,29 @@ static void build_subroutines(BuildCtx *ctx)
| // CARG1 = MULTRES or negated error code, BASE, PC and JGL set.
| lwz L, SAVE_L
| addi DISPATCH, JGL, -GG_DISP2G-32768
+ | stp BASE, L->base
|1:
| cmpwi CARG1, 0
- | blt >3 // Check for error from exit.
- | lwz LFUNC:TMP1, FRAME_FUNC(BASE)
+ | blt >9 // Check for error from exit.
+ | lwz LFUNC:RB, FRAME_FUNC(BASE)
| slwi MULTRES, CARG1, 3
| li TMP2, 0
| stw MULTRES, SAVE_MULTRES
- | lwz TMP1, LFUNC:TMP1->pc
- | stw TMP2, DISPATCH_GL(jit_L)(DISPATCH)
+ | lwz TMP1, LFUNC:RB->pc
+ | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH)
| lwz KBASE, PC2PROTO(k)(TMP1)
| // Setup type comparison constants.
| li TISNUM, LJ_TISNUM
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
- | stw TMP3, TMPD
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU stw TMP3, TMPD
| li ZERO, 0
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- | lfs TOBIT, TMPD
- | stw TMP3, TMPD
- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+ | .FPU lfs TOBIT, TMPD
+ | .FPU stw TMP3, TMPD
+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
| li TISNIL, LJ_TNIL
- | stw TMP0, TONUM_HI
- | lfs TONUM, TMPD
+ | .FPU stw TMP0, TONUM_HI
+ | .FPU lfs TONUM, TMPD
| // Modified copy of ins_next which handles function header dispatch, too.
| lwz INS, 0(PC)
| addi PC, PC, 4
@@ -2694,20 +3052,63 @@ static void build_subroutines(BuildCtx *ctx)
| decode_RC8 RC, INS
| bctr
|2:
+ | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function?
+ | blt >3
+ | // Check frame below fast function.
+ | lwz TMP1, FRAME_PC(BASE)
+ | andix. TMP0, TMP1, FRAME_TYPE
+ | bney >3 // Trace stitching continuation?
+ | // Otherwise set KBASE for Lua function below fast function.
+ | lwz TMP2, -4(TMP1)
+ | decode_RA8 TMP0, TMP2
+ | sub TMP1, BASE, TMP0
+ | lwz LFUNC:TMP2, -12(TMP1)
+ | lwz TMP1, LFUNC:TMP2->pc
+ | lwz KBASE, PC2PROTO(k)(TMP1)
+ |3:
| subi RC, MULTRES, 8
| add RA, RA, BASE
| bctr
|
- |3: // Rethrow error from the right C frame.
+ |9: // Rethrow error from the right C frame.
+ | neg CARG2, CARG1
| mr CARG1, L
- | bl extern lj_err_run // (lua_State *L)
+ | bl extern lj_err_trace // (lua_State *L, int errcode)
|.endif
|
|//-----------------------------------------------------------------------
|//-- Math helper functions ----------------------------------------------
|//-----------------------------------------------------------------------
|
- |// NYI: Use internal implementations of floor, ceil, trunc.
+ |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp.
+ |
+ |.macro sfi2d, AHI, ALO
+ |.if not FPU
+ | mr. AHI, ALO
+ | bclr 12, 2 // Handle zero first.
+ | srawi TMP0, ALO, 31
+ | xor TMP1, ALO, TMP0
+ | sub TMP1, TMP1, TMP0 // Absolute value in TMP1.
+ | cntlzw AHI, TMP1
+ | andix. TMP0, TMP0, 0x800 // Mask sign bit.
+ | slw TMP1, TMP1, AHI // Align mantissa left with leading 1.
+ | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI.
+ | slwi ALO, TMP1, 21
+ | or AHI, AHI, TMP0 // Sign | Exponent.
+ | srwi TMP1, TMP1, 11
+ | slwi AHI, AHI, 20 // Align left.
+ | add AHI, AHI, TMP1 // Add mantissa, increment exponent.
+ | blr
+ |.endif
+ |.endmacro
+ |
+ |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1.
+ |->vm_sfi2d_1:
+ | sfi2d CARG1, CARG2
+ |
+ |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1.
+ |->vm_sfi2d_2:
+ | sfi2d CARG3, CARG4
|
|->vm_modi:
| divwo. TMP0, CARG1, CARG2
@@ -2762,6 +3163,11 @@ static void build_subroutines(BuildCtx *ctx)
| blr
|.endif
|
+ |->vm_next:
+ |.if JIT
+ | NYI // On big-endian.
+ |.endif
+ |
|//-----------------------------------------------------------------------
|//-- FFI helper functions -----------------------------------------------
|//-----------------------------------------------------------------------
@@ -2775,21 +3181,21 @@ static void build_subroutines(BuildCtx *ctx)
| addi DISPATCH, r12, GG_G2DISP
| stw r11, CTSTATE->cb.slot
| stw r3, CTSTATE->cb.gpr[0]
- | stfd f1, CTSTATE->cb.fpr[0]
+ | .FPU stfd f1, CTSTATE->cb.fpr[0]
| stw r4, CTSTATE->cb.gpr[1]
- | stfd f2, CTSTATE->cb.fpr[1]
+ | .FPU stfd f2, CTSTATE->cb.fpr[1]
| stw r5, CTSTATE->cb.gpr[2]
- | stfd f3, CTSTATE->cb.fpr[2]
+ | .FPU stfd f3, CTSTATE->cb.fpr[2]
| stw r6, CTSTATE->cb.gpr[3]
- | stfd f4, CTSTATE->cb.fpr[3]
+ | .FPU stfd f4, CTSTATE->cb.fpr[3]
| stw r7, CTSTATE->cb.gpr[4]
- | stfd f5, CTSTATE->cb.fpr[4]
+ | .FPU stfd f5, CTSTATE->cb.fpr[4]
| stw r8, CTSTATE->cb.gpr[5]
- | stfd f6, CTSTATE->cb.fpr[5]
+ | .FPU stfd f6, CTSTATE->cb.fpr[5]
| stw r9, CTSTATE->cb.gpr[6]
- | stfd f7, CTSTATE->cb.fpr[6]
+ | .FPU stfd f7, CTSTATE->cb.fpr[6]
| stw r10, CTSTATE->cb.gpr[7]
- | stfd f8, CTSTATE->cb.fpr[7]
+ | .FPU stfd f8, CTSTATE->cb.fpr[7]
| addi TMP0, sp, CFRAME_SPACE+8
| stw TMP0, CTSTATE->cb.stack
| mr CARG1, CTSTATE
@@ -2800,21 +3206,21 @@ static void build_subroutines(BuildCtx *ctx)
| lp BASE, L:CRET1->base
| li TISNUM, LJ_TISNUM // Setup type comparison constants.
| lp RC, L:CRET1->top
- | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
+ | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float).
| li ZERO, 0
| mr L, CRET1
- | stw TMP3, TMPD
- | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
+ | .FPU stw TMP3, TMPD
+ | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double)
| lwz LFUNC:RB, FRAME_FUNC(BASE)
- | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
- | stw TMP0, TONUM_HI
+ | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float).
+ | .FPU stw TMP0, TONUM_HI
| li TISNIL, LJ_TNIL
| li_vmstate INTERP
- | lfs TOBIT, TMPD
- | stw TMP3, TMPD
+ | .FPU lfs TOBIT, TMPD
+ | .FPU stw TMP3, TMPD
| sub RC, RC, BASE
| st_vmstate
- | lfs TONUM, TMPD
+ | .FPU lfs TONUM, TMPD
| ins_callt
|.endif
|
@@ -2828,7 +3234,7 @@ static void build_subroutines(BuildCtx *ctx)
| mr CARG2, RA
| bl extern lj_ccallback_leave // (CTState *cts, TValue *o)
| lwz CRET1, CTSTATE->cb.gpr[0]
- | lfd FARG1, CTSTATE->cb.fpr[0]
+ | .FPU lfd FARG1, CTSTATE->cb.fpr[0]
| lwz CRET2, CTSTATE->cb.gpr[1]
| b ->vm_leave_unw
|.endif
@@ -2862,14 +3268,14 @@ static void build_subroutines(BuildCtx *ctx)
| bge <1
|2:
| bney cr1, >3
- | lfd f1, CCSTATE->fpr[0]
- | lfd f2, CCSTATE->fpr[1]
- | lfd f3, CCSTATE->fpr[2]
- | lfd f4, CCSTATE->fpr[3]
- | lfd f5, CCSTATE->fpr[4]
- | lfd f6, CCSTATE->fpr[5]
- | lfd f7, CCSTATE->fpr[6]
- | lfd f8, CCSTATE->fpr[7]
+ | .FPU lfd f1, CCSTATE->fpr[0]
+ | .FPU lfd f2, CCSTATE->fpr[1]
+ | .FPU lfd f3, CCSTATE->fpr[2]
+ | .FPU lfd f4, CCSTATE->fpr[3]
+ | .FPU lfd f5, CCSTATE->fpr[4]
+ | .FPU lfd f6, CCSTATE->fpr[5]
+ | .FPU lfd f7, CCSTATE->fpr[6]
+ | .FPU lfd f8, CCSTATE->fpr[7]
|3:
| lp TMP0, CCSTATE->func
| lwz CARG2, CCSTATE->gpr[1]
@@ -2886,7 +3292,7 @@ static void build_subroutines(BuildCtx *ctx)
| lwz TMP2, -4(r14)
| lwz TMP0, 4(r14)
| stw CARG1, CCSTATE:TMP1->gpr[0]
- | stfd FARG1, CCSTATE:TMP1->fpr[0]
+ | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0]
| stw CARG2, CCSTATE:TMP1->gpr[1]
| mtlr TMP0
| stw CARG3, CCSTATE:TMP1->gpr[2]
@@ -2915,19 +3321,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
| // RA = src1*8, RD = src2*8, JMP with RD = target
|.if DUALNUM
- | lwzux TMP0, RA, BASE
+ | lwzux CARG1, RA, BASE
| addi PC, PC, 4
| lwz CARG2, 4(RA)
- | lwzux TMP1, RD, BASE
+ | lwzux CARG3, RD, BASE
| lwz TMP2, -4(PC)
- | checknum cr0, TMP0
- | lwz CARG3, 4(RD)
+ | checknum cr0, CARG1
+ | lwz CARG4, 4(RD)
| decode_RD4 TMP2, TMP2
- | checknum cr1, TMP1
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
+ | checknum cr1, CARG3
+ | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16)
| bne cr0, >7
| bne cr1, >8
- | cmpw CARG2, CARG3
+ | cmpw CARG2, CARG4
if (op == BC_ISLT) {
| bge >2
} else if (op == BC_ISGE) {
@@ -2938,28 +3344,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ble >2
}
|1:
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
|2:
| ins_next
|
|7: // RA is not an integer.
| bgt cr0, ->vmeta_comp
| // RA is a number.
- | lfd f0, 0(RA)
+ | .FPU lfd f0, 0(RA)
| bgt cr1, ->vmeta_comp
| blt cr1, >4
| // RA is a number, RD is an integer.
- | tonum_i f1, CARG3
+ |.if FPU
+ | tonum_i f1, CARG4
+ |.else
+ | bl ->vm_sfi2d_2
+ |.endif
| b >5
|
|8: // RA is an integer, RD is not an integer.
| bgt cr1, ->vmeta_comp
| // RA is an integer, RD is a number.
+ |.if FPU
| tonum_i f0, CARG2
+ |.else
+ | bl ->vm_sfi2d_1
+ |.endif
|4:
- | lfd f1, 0(RD)
+ | .FPU lfd f1, 0(RD)
|5:
+ |.if FPU
| fcmpu cr0, f0, f1
+ |.else
+ | blex __ledf2
+ | cmpwi CRET1, 0
+ |.endif
if (op == BC_ISLT) {
| bge <2
} else if (op == BC_ISGE) {
@@ -3007,42 +3426,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
vk = op == BC_ISEQV;
| // RA = src1*8, RD = src2*8, JMP with RD = target
|.if DUALNUM
- | lwzux TMP0, RA, BASE
+ | lwzux CARG1, RA, BASE
| addi PC, PC, 4
| lwz CARG2, 4(RA)
- | lwzux TMP1, RD, BASE
- | checknum cr0, TMP0
- | lwz TMP2, -4(PC)
- | checknum cr1, TMP1
- | decode_RD4 TMP2, TMP2
- | lwz CARG3, 4(RD)
+ | lwzux CARG3, RD, BASE
+ | checknum cr0, CARG1
+ | lwz SAVE0, -4(PC)
+ | checknum cr1, CARG3
+ | decode_RD4 SAVE0, SAVE0
+ | lwz CARG4, 4(RD)
| cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
if (vk) {
| ble cr7, ->BC_ISEQN_Z
} else {
| ble cr7, ->BC_ISNEN_Z
}
|.else
- | lwzux TMP0, RA, BASE
- | lwz TMP2, 0(PC)
+ | lwzux CARG1, RA, BASE
+ | lwz SAVE0, 0(PC)
| lfd f0, 0(RA)
| addi PC, PC, 4
- | lwzux TMP1, RD, BASE
- | checknum cr0, TMP0
- | decode_RD4 TMP2, TMP2
+ | lwzux CARG3, RD, BASE
+ | checknum cr0, CARG1
+ | decode_RD4 SAVE0, SAVE0
| lfd f1, 0(RD)
- | checknum cr1, TMP1
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
+ | checknum cr1, CARG3
+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
| bge cr0, >5
| bge cr1, >5
| fcmpu cr0, f0, f1
if (vk) {
| bne >1
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
} else {
| beq >1
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
}
|1:
| ins_next
@@ -3050,36 +3469,36 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|5: // Either or both types are not numbers.
|.if not DUALNUM
| lwz CARG2, 4(RA)
- | lwz CARG3, 4(RD)
+ | lwz CARG4, 4(RD)
|.endif
|.if FFI
- | cmpwi cr7, TMP0, LJ_TCDATA
- | cmpwi cr5, TMP1, LJ_TCDATA
+ | cmpwi cr7, CARG1, LJ_TCDATA
+ | cmpwi cr5, CARG3, LJ_TCDATA
|.endif
- | not TMP3, TMP0
- | cmplw TMP0, TMP1
- | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive?
+ | not TMP2, CARG1
+ | cmplw CARG1, CARG3
+ | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive?
|.if FFI
| cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq
|.endif
- | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata?
+ | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata?
|.if FFI
| beq cr7, ->vmeta_equal_cd
|.endif
- | cmplw cr5, CARG2, CARG3
+ | cmplw cr5, CARG2, CARG4
| crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive.
| crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type.
| crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv.
- | mr SAVE0, PC
+ | mr SAVE1, PC
| cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2.
| cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2.
if (vk) {
| bne cr0, >6
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
|6:
} else {
| beq cr0, >6
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
|6:
}
|.if DUALNUM
@@ -3094,6 +3513,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
| // Different tables or userdatas. Need to check __eq metamethod.
| // Field metatable must be at same offset for GCtab and GCudata!
+ | mr CARG3, CARG4
| lwz TAB:TMP2, TAB:CARG2->metatable
| li CARG4, 1-vk // ne = 0 or 1.
| cmplwi TAB:TMP2, 0
@@ -3101,7 +3521,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lbz TMP2, TAB:TMP2->nomm
| andix. TMP2, TMP2, 1<<MM_eq
| bne <1 // Or 'no __eq' flag set?
- | mr PC, SAVE0 // Restore old PC.
+ | mr PC, SAVE1 // Restore old PC.
| b ->vmeta_equal // Handle __eq metamethod.
break;
@@ -3142,16 +3562,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
vk = op == BC_ISEQN;
| // RA = src*8, RD = num_const*8, JMP with RD = target
|.if DUALNUM
- | lwzux TMP0, RA, BASE
+ | lwzux CARG1, RA, BASE
| addi PC, PC, 4
| lwz CARG2, 4(RA)
- | lwzux TMP1, RD, KBASE
- | checknum cr0, TMP0
- | lwz TMP2, -4(PC)
- | checknum cr1, TMP1
- | decode_RD4 TMP2, TMP2
- | lwz CARG3, 4(RD)
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
+ | lwzux CARG3, RD, KBASE
+ | checknum cr0, CARG1
+ | lwz SAVE0, -4(PC)
+ | checknum cr1, CARG3
+ | decode_RD4 SAVE0, SAVE0
+ | lwz CARG4, 4(RD)
+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
if (vk) {
|->BC_ISEQN_Z:
} else {
@@ -3159,7 +3579,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
}
| bne cr0, >7
| bne cr1, >8
- | cmpw CARG2, CARG3
+ | cmpw CARG2, CARG4
|4:
|.else
if (vk) {
@@ -3167,20 +3587,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
} else {
|->BC_ISNEN_Z: // Dummy label.
}
- | lwzx TMP0, BASE, RA
+ | lwzx CARG1, BASE, RA
| addi PC, PC, 4
| lfdx f0, BASE, RA
- | lwz TMP2, -4(PC)
+ | lwz SAVE0, -4(PC)
| lfdx f1, KBASE, RD
- | decode_RD4 TMP2, TMP2
- | checknum TMP0
- | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16)
+ | decode_RD4 SAVE0, SAVE0
+ | checknum CARG1
+ | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16)
| bge >3
| fcmpu cr0, f0, f1
|.endif
if (vk) {
| bne >1
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
|1:
|.if not FFI
|3:
@@ -3191,13 +3611,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.if not FFI
|3:
|.endif
- | add PC, PC, TMP2
+ | add PC, PC, SAVE0
|2:
}
| ins_next
|.if FFI
|3:
- | cmpwi TMP0, LJ_TCDATA
+ | cmpwi CARG1, LJ_TCDATA
| beq ->vmeta_equal_cd
| b <1
|.endif
@@ -3205,18 +3625,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|7: // RA is not an integer.
| bge cr0, <3
| // RA is a number.
- | lfd f0, 0(RA)
+ | .FPU lfd f0, 0(RA)
| blt cr1, >1
| // RA is a number, RD is an integer.
- | tonum_i f1, CARG3
+ |.if FPU
+ | tonum_i f1, CARG4
+ |.else
+ | bl ->vm_sfi2d_2
+ |.endif
| b >2
|
|8: // RA is an integer, RD is a number.
+ |.if FPU
| tonum_i f0, CARG2
+ |.else
+ | bl ->vm_sfi2d_1
+ |.endif
|1:
- | lfd f1, 0(RD)
+ | .FPU lfd f1, 0(RD)
|2:
+ |.if FPU
| fcmpu cr0, f0, f1
+ |.else
+ | blex __ledf2
+ | cmpwi CRET1, 0
+ |.endif
| b <4
|.endif
break;
@@ -3271,7 +3704,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| add PC, PC, TMP2
} else {
| li TMP1, LJ_TFALSE
+ |.if FPU
| lfdx f0, BASE, RD
+ |.else
+ | lwzux CARG1, RD, BASE
+ | lwz CARG2, 4(RD)
+ |.endif
| cmplw TMP0, TMP1
if (op == BC_ISTC) {
| bge >1
@@ -3280,20 +3718,55 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
}
| addis PC, PC, -(BCBIAS_J*4 >> 16)
| decode_RD4 TMP2, INS
+ |.if FPU
| stfdx f0, BASE, RA
+ |.else
+ | stwux CARG1, RA, BASE
+ | stw CARG2, 4(RA)
+ |.endif
| add PC, PC, TMP2
|1:
}
| ins_next
break;
+ case BC_ISTYPE:
+ | // RA = src*8, RD = -type*8
+ | lwzx TMP0, BASE, RA
+ | srwi TMP1, RD, 3
+ | ins_next1
+ |.if not PPE and not GPR64
+ | add. TMP0, TMP0, TMP1
+ |.else
+ | neg TMP1, TMP1
+ | cmpw TMP0, TMP1
+ |.endif
+ | bne ->vmeta_istype
+ | ins_next2
+ break;
+ case BC_ISNUM:
+ | // RA = src*8, RD = -(TISNUM-1)*8
+ | lwzx TMP0, BASE, RA
+ | ins_next1
+ | checknum TMP0
+ | bge ->vmeta_istype
+ | ins_next2
+ break;
+
/* -- Unary ops --------------------------------------------------------- */
case BC_MOV:
| // RA = dst*8, RD = src*8
| ins_next1
+ |.if FPU
| lfdx f0, BASE, RD
| stfdx f0, BASE, RA
+ |.else
+ | lwzux TMP0, RD, BASE
+ | lwz TMP1, 4(RD)
+ | stwux TMP0, RA, BASE
+ | stw TMP1, 4(RA)
+ |.endif
| ins_next2
break;
case BC_NOT:
@@ -3395,44 +3868,65 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
||switch (vk) {
||case 0:
- | lwzx TMP1, BASE, RB
+ | lwzx CARG1, BASE, RB
| .if DUALNUM
- | lwzx TMP2, KBASE, RC
+ | lwzx CARG3, KBASE, RC
| .endif
+ | .if FPU
| lfdx f14, BASE, RB
| lfdx f15, KBASE, RC
+ | .else
+ | add TMP1, BASE, RB
+ | add TMP2, KBASE, RC
+ | lwz CARG2, 4(TMP1)
+ | lwz CARG4, 4(TMP2)
+ | .endif
| .if DUALNUM
- | checknum cr0, TMP1
- | checknum cr1, TMP2
+ | checknum cr0, CARG1
+ | checknum cr1, CARG3
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| bge ->vmeta_arith_vn
| .else
- | checknum TMP1; bge ->vmeta_arith_vn
+ | checknum CARG1; bge ->vmeta_arith_vn
| .endif
|| break;
||case 1:
- | lwzx TMP1, BASE, RB
+ | lwzx CARG1, BASE, RB
| .if DUALNUM
- | lwzx TMP2, KBASE, RC
+ | lwzx CARG3, KBASE, RC
| .endif
+ | .if FPU
| lfdx f15, BASE, RB
| lfdx f14, KBASE, RC
+ | .else
+ | add TMP1, BASE, RB
+ | add TMP2, KBASE, RC
+ | lwz CARG2, 4(TMP1)
+ | lwz CARG4, 4(TMP2)
+ | .endif
| .if DUALNUM
- | checknum cr0, TMP1
- | checknum cr1, TMP2
+ | checknum cr0, CARG1
+ | checknum cr1, CARG3
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| bge ->vmeta_arith_nv
| .else
- | checknum TMP1; bge ->vmeta_arith_nv
+ | checknum CARG1; bge ->vmeta_arith_nv
| .endif
|| break;
||default:
- | lwzx TMP1, BASE, RB
- | lwzx TMP2, BASE, RC
+ | lwzx CARG1, BASE, RB
+ | lwzx CARG3, BASE, RC
+ | .if FPU
| lfdx f14, BASE, RB
| lfdx f15, BASE, RC
- | checknum cr0, TMP1
- | checknum cr1, TMP2
+ | .else
+ | add TMP1, BASE, RB
+ | add TMP2, BASE, RC
+ | lwz CARG2, 4(TMP1)
+ | lwz CARG4, 4(TMP2)
+ | .endif
+ | checknum cr0, CARG1
+ | checknum cr1, CARG3
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| bge ->vmeta_arith_vv
|| break;
@@ -3466,48 +3960,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| fsub a, b, a // b - floor(b/c)*c
|.endmacro
|
+ |.macro sfpmod
+ |->BC_MODVN_Z:
+ | stw CARG1, SFSAVE_1
+ | stw CARG2, SFSAVE_2
+ | mr SAVE0, CARG3
+ | mr SAVE1, CARG4
+ | blex __divdf3
+ | blex floor
+ | mr CARG3, SAVE0
+ | mr CARG4, SAVE1
+ | blex __muldf3
+ | mr CARG3, CRET1
+ | mr CARG4, CRET2
+ | lwz CARG1, SFSAVE_1
+ | lwz CARG2, SFSAVE_2
+ | blex __subdf3
+ |.endmacro
+ |
|.macro ins_arithfp, fpins
| ins_arithpre
|.if "fpins" == "fpmod_"
| b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
- |.else
+ |.elif FPU
| fpins f0, f14, f15
| ins_next1
| stfdx f0, BASE, RA
| ins_next2
+ |.else
+ | blex __divdf3 // Only soft-float div uses this macro.
+ | ins_next1
+ | stwux CRET1, RA, BASE
+ | stw CRET2, 4(RA)
+ | ins_next2
|.endif
|.endmacro
|
- |.macro ins_arithdn, intins, fpins
+ |.macro ins_arithdn, intins, fpins, fpcall
| // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
||switch (vk) {
||case 0:
- | lwzux TMP1, RB, BASE
- | lwzux TMP2, RC, KBASE
- | lwz CARG1, 4(RB)
- | checknum cr0, TMP1
- | lwz CARG2, 4(RC)
+ | lwzux CARG1, RB, BASE
+ | lwzux CARG3, RC, KBASE
+ | lwz CARG2, 4(RB)
+ | checknum cr0, CARG1
+ | lwz CARG4, 4(RC)
+ | checknum cr1, CARG3
|| break;
||case 1:
- | lwzux TMP1, RB, BASE
- | lwzux TMP2, RC, KBASE
- | lwz CARG2, 4(RB)
- | checknum cr0, TMP1
- | lwz CARG1, 4(RC)
+ | lwzux CARG3, RB, BASE
+ | lwzux CARG1, RC, KBASE
+ | lwz CARG4, 4(RB)
+ | checknum cr0, CARG3
+ | lwz CARG2, 4(RC)
+ | checknum cr1, CARG1
|| break;
||default:
- | lwzux TMP1, RB, BASE
- | lwzux TMP2, RC, BASE
- | lwz CARG1, 4(RB)
- | checknum cr0, TMP1
- | lwz CARG2, 4(RC)
+ | lwzux CARG1, RB, BASE
+ | lwzux CARG3, RC, BASE
+ | lwz CARG2, 4(RB)
+ | checknum cr0, CARG1
+ | lwz CARG4, 4(RC)
+ | checknum cr1, CARG3
|| break;
||}
- | checknum cr1, TMP2
| bne >5
| bne cr1, >5
- | intins CARG1, CARG1, CARG2
+ |.if "intins" == "intmod"
+ | mr CARG1, CARG2
+ | mr CARG2, CARG4
+ |.endif
+ | intins CARG1, CARG2, CARG4
| bso >4
|1:
| ins_next1
@@ -3519,29 +4043,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| checkov TMP0, <1 // Ignore unrelated overflow.
| ins_arithfallback b
|5: // FP variant.
+ |.if FPU
||if (vk == 1) {
| lfd f15, 0(RB)
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| lfd f14, 0(RC)
||} else {
| lfd f14, 0(RB)
- | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| lfd f15, 0(RC)
||}
+ |.endif
+ | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| ins_arithfallback bge
|.if "fpins" == "fpmod_"
| b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
|.else
+ |.if FPU
| fpins f0, f14, f15
- | ins_next1
| stfdx f0, BASE, RA
+ |.else
+ |.if "fpcall" == "sfpmod"
+ | sfpmod
+ |.else
+ | blex fpcall
+ |.endif
+ | stwux CRET1, RA, BASE
+ | stw CRET2, 4(RA)
+ |.endif
+ | ins_next1
| b <2
|.endif
|.endmacro
|
- |.macro ins_arith, intins, fpins
+ |.macro ins_arith, intins, fpins, fpcall
|.if DUALNUM
- | ins_arithdn intins, fpins
+ | ins_arithdn intins, fpins, fpcall
|.else
| ins_arithfp fpins
|.endif
@@ -3556,9 +4091,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addo. TMP0, TMP0, TMP1
| add y, a, b
|.endmacro
- | ins_arith addo32., fadd
+ | ins_arith addo32., fadd, __adddf3
|.else
- | ins_arith addo., fadd
+ | ins_arith addo., fadd, __adddf3
|.endif
break;
case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
@@ -3570,36 +4105,48 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| subo. TMP0, TMP0, TMP1
| sub y, a, b
|.endmacro
- | ins_arith subo32., fsub
+ | ins_arith subo32., fsub, __subdf3
|.else
- | ins_arith subo., fsub
+ | ins_arith subo., fsub, __subdf3
|.endif
break;
case BC_MULVN: case BC_MULNV: case BC_MULVV:
- | ins_arith mullwo., fmul
+ | ins_arith mullwo., fmul, __muldf3
break;
case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
| ins_arithfp fdiv
break;
case BC_MODVN:
- | ins_arith intmod, fpmod
+ | ins_arith intmod, fpmod, sfpmod
break;
case BC_MODNV: case BC_MODVV:
- | ins_arith intmod, fpmod_
+ | ins_arith intmod, fpmod_, sfpmod
break;
case BC_POW:
| // NYI: (partial) integer arithmetic.
- | lwzx TMP1, BASE, RB
+ | lwzx CARG1, BASE, RB
+ | lwzx CARG3, BASE, RC
+ |.if FPU
| lfdx FARG1, BASE, RB
- | lwzx TMP2, BASE, RC
| lfdx FARG2, BASE, RC
- | checknum cr0, TMP1
- | checknum cr1, TMP2
+ |.else
+ | add TMP1, BASE, RB
+ | add TMP2, BASE, RC
+ | lwz CARG2, 4(TMP1)
+ | lwz CARG4, 4(TMP2)
+ |.endif
+ | checknum cr0, CARG1
+ | checknum cr1, CARG3
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
| bge ->vmeta_arith_vv
| blex pow
| ins_next1
+ |.if FPU
| stfdx FARG1, BASE, RA
+ |.else
+ | stwux CARG1, RA, BASE
+ | stw CARG2, 4(RA)
+ |.endif
| ins_next2
break;
@@ -3619,8 +4166,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lp BASE, L->base
| bne ->vmeta_binop
| ins_next1
+ |.if FPU
| lfdx f0, BASE, SAVE0 // Copy result from RB to RA.
| stfdx f0, BASE, RA
+ |.else
+ | lwzux TMP0, SAVE0, BASE
+ | lwz TMP1, 4(SAVE0)
+ | stwux TMP0, RA, BASE
+ | stw TMP1, 4(RA)
+ |.endif
| ins_next2
break;
@@ -3683,8 +4237,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_KNUM:
| // RA = dst*8, RD = num_const*8
| ins_next1
+ |.if FPU
| lfdx f0, KBASE, RD
| stfdx f0, BASE, RA
+ |.else
+ | lwzux TMP0, RD, KBASE
+ | lwz TMP1, 4(RD)
+ | stwux TMP0, RA, BASE
+ | stw TMP1, 4(RA)
+ |.endif
| ins_next2
break;
case BC_KPRI:
@@ -3717,8 +4278,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwzx UPVAL:RB, LFUNC:RB, RD
| ins_next1
| lwz TMP1, UPVAL:RB->v
+ |.if FPU
| lfd f0, 0(TMP1)
| stfdx f0, BASE, RA
+ |.else
+ | lwz TMP2, 0(TMP1)
+ | lwz TMP3, 4(TMP1)
+ | stwux TMP2, RA, BASE
+ | stw TMP3, 4(RA)
+ |.endif
| ins_next2
break;
case BC_USETV:
@@ -3726,14 +4294,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwz LFUNC:RB, FRAME_FUNC(BASE)
| srwi RA, RA, 1
| addi RA, RA, offsetof(GCfuncL, uvptr)
+ |.if FPU
| lfdux f0, RD, BASE
+ |.else
+ | lwzux CARG1, RD, BASE
+ | lwz CARG3, 4(RD)
+ |.endif
| lwzx UPVAL:RB, LFUNC:RB, RA
| lbz TMP3, UPVAL:RB->marked
| lwz CARG2, UPVAL:RB->v
| andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
| lbz TMP0, UPVAL:RB->closed
| lwz TMP2, 0(RD)
+ |.if FPU
| stfd f0, 0(CARG2)
+ |.else
+ | stw CARG1, 0(CARG2)
+ | stw CARG3, 4(CARG2)
+ |.endif
| cmplwi cr1, TMP0, 0
| lwz TMP1, 4(RD)
| cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
@@ -3789,11 +4367,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwz LFUNC:RB, FRAME_FUNC(BASE)
| srwi RA, RA, 1
| addi RA, RA, offsetof(GCfuncL, uvptr)
+ |.if FPU
| lfdx f0, KBASE, RD
+ |.else
+ | lwzux TMP2, RD, KBASE
+ | lwz TMP3, 4(RD)
+ |.endif
| lwzx UPVAL:RB, LFUNC:RB, RA
| ins_next1
| lwz TMP1, UPVAL:RB->v
+ |.if FPU
| stfd f0, 0(TMP1)
+ |.else
+ | stw TMP2, 0(TMP1)
+ | stw TMP3, 4(TMP1)
+ |.endif
| ins_next2
break;
case BC_USETP:
@@ -3941,11 +4529,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.endif
| ble ->vmeta_tgetv // Integer key and in array part?
| lwzx TMP0, TMP1, TMP2
+ |.if FPU
| lfdx f14, TMP1, TMP2
+ |.else
+ | lwzux SAVE0, TMP1, TMP2
+ | lwz SAVE1, 4(TMP1)
+ |.endif
| checknil TMP0; beq >2
|1:
| ins_next1
+ |.if FPU
| stfdx f14, BASE, RA
+ |.else
+ | stwux SAVE0, RA, BASE
+ | stw SAVE1, 4(RA)
+ |.endif
| ins_next2
|
|2: // Check for __index if table value is nil.
@@ -3976,9 +4574,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|->BC_TGETS_Z:
| // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
| lwz TMP0, TAB:RB->hmask
- | lwz TMP1, STR:RC->hash
+ | lwz TMP1, STR:RC->sid
| lwz NODE:TMP2, TAB:RB->node
- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
+ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
| slwi TMP0, TMP1, 5
| slwi TMP1, TMP1, 3
| sub TMP1, TMP0, TMP1
@@ -4021,12 +4619,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwz TMP1, TAB:RB->asize
| lwz TMP2, TAB:RB->array
| cmplw TMP0, TMP1; bge ->vmeta_tgetb
+ |.if FPU
| lwzx TMP1, TMP2, RC
| lfdx f0, TMP2, RC
+ |.else
+ | lwzux TMP1, TMP2, RC
+ | lwz TMP3, 4(TMP2)
+ |.endif
| checknil TMP1; beq >5
|1:
| ins_next1
+ |.if FPU
| stfdx f0, BASE, RA
+ |.else
+ | stwux TMP1, RA, BASE
+ | stw TMP3, 4(RA)
+ |.endif
| ins_next2
|
|5: // Check for __index if table value is nil.
@@ -4038,6 +4646,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| bne <1 // 'no __index' flag set: done.
| b ->vmeta_tgetb // Caveat: preserve TMP0!
break;
+ case BC_TGETR:
+ | // RA = dst*8, RB = table*8, RC = key*8
+ | add RB, BASE, RB
+ | lwz TAB:CARG1, 4(RB)
+ |.if DUALNUM
+ | add RC, BASE, RC
+ | lwz TMP0, TAB:CARG1->asize
+ | lwz CARG2, 4(RC)
+ | lwz TMP1, TAB:CARG1->array
+ |.else
+ | lfdx f0, BASE, RC
+ | lwz TMP0, TAB:CARG1->asize
+ | toint CARG2, f0
+ | lwz TMP1, TAB:CARG1->array
+ |.endif
+ | cmplw TMP0, CARG2
+ | slwi TMP2, CARG2, 3
+ | ble ->vmeta_tgetr // In array part?
+ |.if FPU
+ | lfdx f14, TMP1, TMP2
+ |.else
+ | lwzux SAVE0, TMP2, TMP1
+ | lwz SAVE1, 4(TMP2)
+ |.endif
+ |->BC_TGETR_Z:
+ | ins_next1
+ |.if FPU
+ | stfdx f14, BASE, RA
+ |.else
+ | stwux SAVE0, RA, BASE
+ | stw SAVE1, 4(RA)
+ |.endif
+ | ins_next2
+ break;
case BC_TSETV:
| // RA = src*8, RB = table*8, RC = key*8
@@ -4076,11 +4718,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ble ->vmeta_tsetv // Integer key and in array part?
| lwzx TMP2, TMP1, TMP0
| lbz TMP3, TAB:RB->marked
+ |.if FPU
| lfdx f14, BASE, RA
+ |.else
+ | add SAVE1, BASE, RA
+ | lwz SAVE0, 0(SAVE1)
+ | lwz SAVE1, 4(SAVE1)
+ |.endif
| checknil TMP2; beq >3
|1:
| andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
+ |.if FPU
| stfdx f14, TMP1, TMP0
+ |.else
+ | stwux SAVE0, TMP1, TMP0
+ | stw SAVE1, 4(TMP1)
+ |.endif
| bne >7
|2:
| ins_next
@@ -4117,11 +4770,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|->BC_TSETS_Z:
| // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8
| lwz TMP0, TAB:RB->hmask
- | lwz TMP1, STR:RC->hash
+ | lwz TMP1, STR:RC->sid
| lwz NODE:TMP2, TAB:RB->node
| stb ZERO, TAB:RB->nomm // Clear metamethod cache.
- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
+ | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask
+ |.if FPU
| lfdx f14, BASE, RA
+ |.else
+ | add CARG2, BASE, RA
+ | lwz SAVE0, 0(CARG2)
+ | lwz SAVE1, 4(CARG2)
+ |.endif
| slwi TMP0, TMP1, 5
| slwi TMP1, TMP1, 3
| sub TMP1, TMP0, TMP1
@@ -4137,7 +4796,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| checknil CARG2; beq >4 // Key found, but nil value?
|2:
| andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
+ |.if FPU
| stfd f14, NODE:TMP2->val
+ |.else
+ | stw SAVE0, NODE:TMP2->val.u32.hi
+ | stw SAVE1, NODE:TMP2->val.u32.lo
+ |.endif
| bne >7
|3:
| ins_next
@@ -4176,7 +4840,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
| // Returns TValue *.
| lp BASE, L->base
+ |.if FPU
| stfd f14, 0(CRET1)
+ |.else
+ | stw SAVE0, 0(CRET1)
+ | stw SAVE1, 4(CRET1)
+ |.endif
| b <3 // No 2nd write barrier needed.
|
|7: // Possible table write barrier for the value. Skip valiswhite check.
@@ -4193,13 +4862,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lwz TMP2, TAB:RB->array
| lbz TMP3, TAB:RB->marked
| cmplw TMP0, TMP1
+ |.if FPU
| lfdx f14, BASE, RA
+ |.else
+ | add CARG2, BASE, RA
+ | lwz SAVE0, 0(CARG2)
+ | lwz SAVE1, 4(CARG2)
+ |.endif
| bge ->vmeta_tsetb
| lwzx TMP1, TMP2, RC
| checknil TMP1; beq >5
|1:
| andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
+ |.if FPU
| stfdx f14, TMP2, RC
+ |.else
+ | stwux SAVE0, RC, TMP2
+ | stw SAVE1, 4(RC)
+ |.endif
| bne >7
|2:
| ins_next
@@ -4217,6 +4897,49 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| barrierback TAB:RB, TMP3, TMP0
| b <2
break;
+ case BC_TSETR:
+ | // RA = dst*8, RB = table*8, RC = key*8
+ | add RB, BASE, RB
+ | lwz TAB:CARG2, 4(RB)
+ |.if DUALNUM
+ | add RC, BASE, RC
+ | lbz TMP3, TAB:CARG2->marked
+ | lwz TMP0, TAB:CARG2->asize
+ | lwz CARG3, 4(RC)
+ | lwz TMP1, TAB:CARG2->array
+ |.else
+ | lfdx f0, BASE, RC
+ | lbz TMP3, TAB:CARG2->marked
+ | lwz TMP0, TAB:CARG2->asize
+ | toint CARG3, f0
+ | lwz TMP1, TAB:CARG2->array
+ |.endif
+ | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
+ | bne >7
+ |2:
+ | cmplw TMP0, CARG3
+ | slwi TMP2, CARG3, 3
+ |.if FPU
+ | lfdx f14, BASE, RA
+ |.else
+ | lwzux SAVE0, RA, BASE
+ | lwz SAVE1, 4(RA)
+ |.endif
+ | ble ->vmeta_tsetr // In array part?
+ | ins_next1
+ |.if FPU
+ | stfdx f14, TMP1, TMP2
+ |.else
+ | stwux SAVE0, TMP1, TMP2
+ | stw SAVE1, 4(TMP1)
+ |.endif
+ | ins_next2
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:CARG2, TMP3, TMP2
+ | b <2
+ break;
+
case BC_TSETM:
| // RA = base*8 (table at base-1), RD = num_const*8 (start index)
@@ -4239,10 +4962,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| add TMP1, TMP1, TMP0
| andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
|3: // Copy result slots to table.
+ |.if FPU
| lfd f0, 0(RA)
+ |.else
+ | lwz SAVE0, 0(RA)
+ | lwz SAVE1, 4(RA)
+ |.endif
| addi RA, RA, 8
| cmpw cr1, RA, TMP2
+ |.if FPU
| stfd f0, 0(TMP1)
+ |.else
+ | stw SAVE0, 0(TMP1)
+ | stw SAVE1, 4(TMP1)
+ |.endif
| addi TMP1, TMP1, 8
| blt cr1, <3
| bne >7
@@ -4309,9 +5042,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| beq cr1, >3
|2:
| addi TMP3, TMP2, 8
+ |.if FPU
| lfdx f0, RA, TMP2
+ |.else
+ | add CARG3, RA, TMP2
+ | lwz CARG1, 0(CARG3)
+ | lwz CARG2, 4(CARG3)
+ |.endif
| cmplw cr1, TMP3, NARGS8:RC
+ |.if FPU
| stfdx f0, BASE, TMP2
+ |.else
+ | stwux CARG1, TMP2, BASE
+ | stw CARG2, 4(TMP2)
+ |.endif
| mr TMP2, TMP3
| bne cr1, <2
|3:
@@ -4344,14 +5088,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| add BASE, BASE, RA
| lwz TMP1, -24(BASE)
| lwz LFUNC:RB, -20(BASE)
+ |.if FPU
| lfd f1, -8(BASE)
| lfd f0, -16(BASE)
+ |.else
+ | lwz CARG1, -8(BASE)
+ | lwz CARG2, -4(BASE)
+ | lwz CARG3, -16(BASE)
+ | lwz CARG4, -12(BASE)
+ |.endif
| stw TMP1, 0(BASE) // Copy callable.
| stw LFUNC:RB, 4(BASE)
| checkfunc TMP1
- | stfd f1, 16(BASE) // Copy control var.
| li NARGS8:RC, 16 // Iterators get 2 arguments.
+ |.if FPU
+ | stfd f1, 16(BASE) // Copy control var.
| stfdu f0, 8(BASE) // Copy state.
+ |.else
+ | stw CARG1, 16(BASE) // Copy control var.
+ | stw CARG2, 20(BASE)
+ | stwu CARG3, 8(BASE) // Copy state.
+ | stw CARG4, 4(BASE)
+ |.endif
| bne ->vmeta_call
| ins_call
break;
@@ -4359,8 +5117,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_ITERN:
| // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
|.if JIT
- | // NYI: add hotloop, record BC_ITERN.
+ | // NYI on big-endian
|.endif
+ |->vm_IITERN:
| add RA, BASE, RA
| lwz TAB:RB, -12(RA)
| lwz RC, -4(RA) // Get index from control var.
@@ -4372,7 +5131,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| slwi TMP3, RC, 3
| bge >5 // Index points after array part?
| lwzx TMP2, TMP1, TMP3
+ |.if FPU
| lfdx f0, TMP1, TMP3
+ |.else
+ | lwzux CARG1, TMP3, TMP1
+ | lwz CARG2, 4(TMP3)
+ |.endif
| checknil TMP2
| lwz INS, -4(PC)
| beq >4
@@ -4384,7 +5148,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.endif
| addi RC, RC, 1
| addis TMP3, PC, -(BCBIAS_J*4 >> 16)
+ |.if FPU
| stfd f0, 8(RA)
+ |.else
+ | stw CARG1, 8(RA)
+ | stw CARG2, 12(RA)
+ |.endif
| decode_RD4 TMP1, INS
| stw RC, -4(RA) // Update control var.
| add PC, TMP1, TMP3
@@ -4409,17 +5178,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| slwi RB, RC, 3
| sub TMP3, TMP3, RB
| lwzx RB, TMP2, TMP3
+ |.if FPU
| lfdx f0, TMP2, TMP3
+ |.else
+ | add CARG3, TMP2, TMP3
+ | lwz CARG1, 0(CARG3)
+ | lwz CARG2, 4(CARG3)
+ |.endif
| add NODE:TMP3, TMP2, TMP3
| checknil RB
| lwz INS, -4(PC)
| beq >7
+ |.if FPU
| lfd f1, NODE:TMP3->key
+ |.else
+ | lwz CARG3, NODE:TMP3->key.u32.hi
+ | lwz CARG4, NODE:TMP3->key.u32.lo
+ |.endif
| addis TMP2, PC, -(BCBIAS_J*4 >> 16)
+ |.if FPU
| stfd f0, 8(RA)
+ |.else
+ | stw CARG1, 8(RA)
+ | stw CARG2, 12(RA)
+ |.endif
| add RC, RC, TMP0
| decode_RD4 TMP1, INS
+ |.if FPU
| stfd f1, 0(RA)
+ |.else
+ | stw CARG3, 0(RA)
+ | stw CARG4, 4(RA)
+ |.endif
| addi RC, RC, 1
| add PC, TMP1, TMP2
| stw RC, -4(RA) // Update control var.
@@ -4448,8 +5238,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
| add TMP3, PC, TMP0
| bne cr0, >5
- | lus TMP1, 0xfffe
- | ori TMP1, TMP1, 0x7fff
+ | lus TMP1, (LJ_KEYINDEX >> 16)
+ | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff)
| stw ZERO, -4(RA) // Initialize control var.
| stw TMP1, -8(RA)
| addis PC, TMP3, -(BCBIAS_J*4 >> 16)
@@ -4460,6 +5250,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| li TMP1, BC_ITERC
| stb TMP0, -1(PC)
| addis PC, TMP3, -(BCBIAS_J*4 >> 16)
+ | // NYI on big-endian: unpatch JLOOP.
| stb TMP1, 3(PC)
| b <1
break;
@@ -4485,9 +5276,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| subi TMP2, TMP2, 16
| ble >2 // No vararg slots?
|1: // Copy vararg slots to destination slots.
+ |.if FPU
| lfd f0, 0(RC)
+ |.else
+ | lwz CARG1, 0(RC)
+ | lwz CARG2, 4(RC)
+ |.endif
| addi RC, RC, 8
+ |.if FPU
| stfd f0, 0(RA)
+ |.else
+ | stw CARG1, 0(RA)
+ | stw CARG2, 4(RA)
+ |.endif
| cmplw RA, TMP2
| cmplw cr1, RC, TMP3
| bge >3 // All destination slots filled?
@@ -4510,9 +5311,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| addi MULTRES, TMP1, 8
| bgt >7
|6:
+ |.if FPU
| lfd f0, 0(RC)
+ |.else
+ | lwz CARG1, 0(RC)
+ | lwz CARG2, 4(RC)
+ |.endif
| addi RC, RC, 8
+ |.if FPU
| stfd f0, 0(RA)
+ |.else
+ | stw CARG1, 0(RA)
+ | stw CARG2, 4(RA)
+ |.endif
| cmplw RC, TMP3
| addi RA, RA, 8
| blt <6 // More vararg slots?
@@ -4563,14 +5374,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| li TMP1, 0
|2:
| addi TMP3, TMP1, 8
+ |.if FPU
| lfdx f0, RA, TMP1
+ |.else
+ | add CARG3, RA, TMP1
+ | lwz CARG1, 0(CARG3)
+ | lwz CARG2, 4(CARG3)
+ |.endif
| cmpw TMP3, RC
+ |.if FPU
| stfdx f0, TMP2, TMP1
+ |.else
+ | add CARG3, TMP2, TMP1
+ | stw CARG1, 0(CARG3)
+ | stw CARG2, 4(CARG3)
+ |.endif
| beq >3
| addi TMP1, TMP3, 8
+ |.if FPU
| lfdx f1, RA, TMP3
+ |.else
+ | add CARG3, RA, TMP3
+ | lwz CARG1, 0(CARG3)
+ | lwz CARG2, 4(CARG3)
+ |.endif
| cmpw TMP1, RC
+ |.if FPU
| stfdx f1, TMP2, TMP3
+ |.else
+ | add CARG3, TMP2, TMP3
+ | stw CARG1, 0(CARG3)
+ | stw CARG2, 4(CARG3)
+ |.endif
| bne <2
|3:
|5:
@@ -4612,8 +5447,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| subi TMP2, BASE, 8
| decode_RB8 RB, INS
if (op == BC_RET1) {
+ |.if FPU
| lfd f0, 0(RA)
| stfd f0, 0(TMP2)
+ |.else
+ | lwz CARG1, 0(RA)
+ | lwz CARG2, 4(RA)
+ | stw CARG1, 0(TMP2)
+ | stw CARG2, 4(TMP2)
+ |.endif
}
|5:
| cmplw RB, RD
@@ -4674,11 +5516,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|4:
| stw CARG1, FORL_IDX*8+4(RA)
} else {
- | lwz TMP3, FORL_STEP*8(RA)
+ | lwz SAVE0, FORL_STEP*8(RA)
| lwz CARG3, FORL_STEP*8+4(RA)
| lwz TMP2, FORL_STOP*8(RA)
| lwz CARG2, FORL_STOP*8+4(RA)
- | cmplw cr7, TMP3, TISNUM
+ | cmplw cr7, SAVE0, TISNUM
| cmplw cr1, TMP2, TISNUM
| crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
| crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
@@ -4721,41 +5563,80 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
if (vk) {
|.if DUALNUM
|9: // FP loop.
+ |.if FPU
| lfd f1, FORL_IDX*8(RA)
|.else
+ | lwz CARG1, FORL_IDX*8(RA)
+ | lwz CARG2, FORL_IDX*8+4(RA)
+ |.endif
+ |.else
| lfdux f1, RA, BASE
|.endif
+ |.if FPU
| lfd f3, FORL_STEP*8(RA)
| lfd f2, FORL_STOP*8(RA)
- | lwz TMP3, FORL_STEP*8(RA)
| fadd f1, f1, f3
| stfd f1, FORL_IDX*8(RA)
+ |.else
+ | lwz CARG3, FORL_STEP*8(RA)
+ | lwz CARG4, FORL_STEP*8+4(RA)
+ | mr SAVE1, RD
+ | blex __adddf3
+ | mr RD, SAVE1
+ | stw CRET1, FORL_IDX*8(RA)
+ | stw CRET2, FORL_IDX*8+4(RA)
+ | lwz CARG3, FORL_STOP*8(RA)
+ | lwz CARG4, FORL_STOP*8+4(RA)
+ |.endif
+ | lwz SAVE0, FORL_STEP*8(RA)
} else {
|.if DUALNUM
|9: // FP loop.
|.else
| lwzux TMP1, RA, BASE
- | lwz TMP3, FORL_STEP*8(RA)
+ | lwz SAVE0, FORL_STEP*8(RA)
| lwz TMP2, FORL_STOP*8(RA)
| cmplw cr0, TMP1, TISNUM
- | cmplw cr7, TMP3, TISNUM
+ | cmplw cr7, SAVE0, TISNUM
| cmplw cr1, TMP2, TISNUM
|.endif
+ |.if FPU
| lfd f1, FORL_IDX*8(RA)
+ |.else
+ | lwz CARG1, FORL_IDX*8(RA)
+ | lwz CARG2, FORL_IDX*8+4(RA)
+ |.endif
| crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
| crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
+ |.if FPU
| lfd f2, FORL_STOP*8(RA)
+ |.else
+ | lwz CARG3, FORL_STOP*8(RA)
+ | lwz CARG4, FORL_STOP*8+4(RA)
+ |.endif
| bge ->vmeta_for
}
- | cmpwi cr6, TMP3, 0
+ | cmpwi cr6, SAVE0, 0
if (op != BC_JFORL) {
| srwi RD, RD, 1
}
+ |.if FPU
| stfd f1, FORL_EXT*8(RA)
+ |.else
+ | stw CARG1, FORL_EXT*8(RA)
+ | stw CARG2, FORL_EXT*8+4(RA)
+ |.endif
if (op != BC_JFORL) {
| add RD, PC, RD
}
+ |.if FPU
| fcmpu cr0, f1, f2
+ |.else
+ | mr SAVE1, RD
+ | blex __ledf2
+ | cmpwi CRET1, 0
+ | mr RD, SAVE1
+ |.endif
if (op == BC_JFORI) {
| addis PC, RD, -(BCBIAS_J*4 >> 16)
}
@@ -4858,8 +5739,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lp TMP2, TRACE:TMP2->mcode
| stw BASE, DISPATCH_GL(jit_base)(DISPATCH)
| mtctr TMP2
- | stw L, DISPATCH_GL(jit_L)(DISPATCH)
| addi JGL, DISPATCH, GG_DISP2G+32768
+ | stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH)
| bctr
|.endif
break;
@@ -4994,6 +5875,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| lp TMP1, L->top
| li_vmstate INTERP
| lwz PC, FRAME_PC(BASE) // Fetch PC of caller.
+ | stw L, DISPATCH_GL(cur_L)(DISPATCH)
| sub RA, TMP1, RD // RA = L->top - nresults*8
| st_vmstate
| b ->vm_returnc
diff --git a/src/vm_ppcspe.dasc b/src/vm_ppcspe.dasc
deleted file mode 100644
index 1d8f70f0..00000000
--- a/src/vm_ppcspe.dasc
+++ /dev/null
@@ -1,3691 +0,0 @@
-|// Low-level VM code for PowerPC/e500 CPUs.
-|// Bytecode interpreter, fast functions and helper functions.
-|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
-|
-|.arch ppc
-|.section code_op, code_sub
-|
-|.actionlist build_actionlist
-|.globals GLOB_
-|.globalnames globnames
-|.externnames extnames
-|
-|// Note: The ragged indentation of the instructions is intentional.
-|// The starting columns indicate data dependencies.
-|
-|//-----------------------------------------------------------------------
-|
-|// Fixed register assignments for the interpreter.
-|// Don't use: r1 = sp, r2 and r13 = reserved and/or small data area ptr
-|
-|// The following must be C callee-save (but BASE is often refetched).
-|.define BASE, r14 // Base of current Lua stack frame.
-|.define KBASE, r15 // Constants of current Lua function.
-|.define PC, r16 // Next PC.
-|.define DISPATCH, r17 // Opcode dispatch table.
-|.define LREG, r18 // Register holding lua_State (also in SAVE_L).
-|.define MULTRES, r19 // Size of multi-result: (nresults+1)*8.
-|
-|// Constants for vectorized type-comparisons (hi+low GPR). C callee-save.
-|.define TISNUM, r22
-|.define TISSTR, r23
-|.define TISTAB, r24
-|.define TISFUNC, r25
-|.define TISNIL, r26
-|.define TOBIT, r27
-|.define ZERO, TOBIT // Zero in lo word.
-|
-|// The following temporaries are not saved across C calls, except for RA.
-|.define RA, r20 // Callee-save.
-|.define RB, r10
-|.define RC, r11
-|.define RD, r12
-|.define INS, r7 // Overlaps CARG5.
-|
-|.define TMP0, r0
-|.define TMP1, r8
-|.define TMP2, r9
-|.define TMP3, r6 // Overlaps CARG4.
-|
-|// Saved temporaries.
-|.define SAVE0, r21
-|
-|// Calling conventions.
-|.define CARG1, r3
-|.define CARG2, r4
-|.define CARG3, r5
-|.define CARG4, r6 // Overlaps TMP3.
-|.define CARG5, r7 // Overlaps INS.
-|
-|.define CRET1, r3
-|.define CRET2, r4
-|
-|// Stack layout while in interpreter. Must match with lj_frame.h.
-|.define SAVE_LR, 188(sp)
-|.define CFRAME_SPACE, 184 // Delta for sp.
-|// Back chain for sp: 184(sp) <-- sp entering interpreter
-|.define SAVE_r31, 176(sp) // 64 bit register saves.
-|.define SAVE_r30, 168(sp)
-|.define SAVE_r29, 160(sp)
-|.define SAVE_r28, 152(sp)
-|.define SAVE_r27, 144(sp)
-|.define SAVE_r26, 136(sp)
-|.define SAVE_r25, 128(sp)
-|.define SAVE_r24, 120(sp)
-|.define SAVE_r23, 112(sp)
-|.define SAVE_r22, 104(sp)
-|.define SAVE_r21, 96(sp)
-|.define SAVE_r20, 88(sp)
-|.define SAVE_r19, 80(sp)
-|.define SAVE_r18, 72(sp)
-|.define SAVE_r17, 64(sp)
-|.define SAVE_r16, 56(sp)
-|.define SAVE_r15, 48(sp)
-|.define SAVE_r14, 40(sp)
-|.define SAVE_CR, 36(sp)
-|.define UNUSED1, 32(sp)
-|.define SAVE_ERRF, 28(sp) // 32 bit C frame info.
-|.define SAVE_NRES, 24(sp)
-|.define SAVE_CFRAME, 20(sp)
-|.define SAVE_L, 16(sp)
-|.define SAVE_PC, 12(sp)
-|.define SAVE_MULTRES, 8(sp)
-|// Next frame lr: 4(sp)
-|// Back chain for sp: 0(sp) <-- sp while in interpreter
-|
-|.macro save_, reg; evstdd reg, SAVE_..reg; .endmacro
-|.macro rest_, reg; evldd reg, SAVE_..reg; .endmacro
-|
-|.macro saveregs
-| stwu sp, -CFRAME_SPACE(sp)
-| save_ r14; save_ r15; save_ r16; save_ r17; save_ r18; save_ r19
-| mflr r0; mfcr r12
-| save_ r20; save_ r21; save_ r22; save_ r23; save_ r24; save_ r25
-| stw r0, SAVE_LR; stw r12, SAVE_CR
-| save_ r26; save_ r27; save_ r28; save_ r29; save_ r30; save_ r31
-|.endmacro
-|
-|.macro restoreregs
-| lwz r0, SAVE_LR; lwz r12, SAVE_CR
-| rest_ r14; rest_ r15; rest_ r16; rest_ r17; rest_ r18; rest_ r19
-| mtlr r0; mtcrf 0x38, r12
-| rest_ r20; rest_ r21; rest_ r22; rest_ r23; rest_ r24; rest_ r25
-| rest_ r26; rest_ r27; rest_ r28; rest_ r29; rest_ r30; rest_ r31
-| addi sp, sp, CFRAME_SPACE
-|.endmacro
-|
-|// Type definitions. Some of these are only used for documentation.
-|.type L, lua_State, LREG
-|.type GL, global_State
-|.type TVALUE, TValue
-|.type GCOBJ, GCobj
-|.type STR, GCstr
-|.type TAB, GCtab
-|.type LFUNC, GCfuncL
-|.type CFUNC, GCfuncC
-|.type PROTO, GCproto
-|.type UPVAL, GCupval
-|.type NODE, Node
-|.type NARGS8, int
-|.type TRACE, GCtrace
-|
-|//-----------------------------------------------------------------------
-|
-|// These basic macros should really be part of DynASM.
-|.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro
-|.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro
-|.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro
-|.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro
-|.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro
-|
-|// Trap for not-yet-implemented parts.
-|.macro NYI; tw 4, sp, sp; .endmacro
-|
-|//-----------------------------------------------------------------------
-|
-|// Access to frame relative to BASE.
-|.define FRAME_PC, -8
-|.define FRAME_FUNC, -4
-|
-|// Instruction decode.
-|.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro
-|.macro decode_RA8, dst, ins; rlwinm dst, ins, 27, 21, 28; .endmacro
-|.macro decode_RB8, dst, ins; rlwinm dst, ins, 11, 21, 28; .endmacro
-|.macro decode_RC8, dst, ins; rlwinm dst, ins, 19, 21, 28; .endmacro
-|.macro decode_RD8, dst, ins; rlwinm dst, ins, 19, 13, 28; .endmacro
-|
-|.macro decode_OP1, dst, ins; rlwinm dst, ins, 0, 24, 31; .endmacro
-|.macro decode_RD4, dst, ins; rlwinm dst, ins, 18, 14, 29; .endmacro
-|
-|// Instruction fetch.
-|.macro ins_NEXT1
-| lwz INS, 0(PC)
-| addi PC, PC, 4
-|.endmacro
-|// Instruction decode+dispatch.
-|.macro ins_NEXT2
-| decode_OP4 TMP1, INS
-| decode_RB8 RB, INS
-| decode_RD8 RD, INS
-| lwzx TMP0, DISPATCH, TMP1
-| decode_RA8 RA, INS
-| decode_RC8 RC, INS
-| mtctr TMP0
-| bctr
-|.endmacro
-|.macro ins_NEXT
-| ins_NEXT1
-| ins_NEXT2
-|.endmacro
-|
-|// Instruction footer.
-|.if 1
-| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
-| .define ins_next, ins_NEXT
-| .define ins_next_, ins_NEXT
-| .define ins_next1, ins_NEXT1
-| .define ins_next2, ins_NEXT2
-|.else
-| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
-| // Affects only certain kinds of benchmarks (and only with -j off).
-| .macro ins_next
-| b ->ins_next
-| .endmacro
-| .macro ins_next1
-| .endmacro
-| .macro ins_next2
-| b ->ins_next
-| .endmacro
-| .macro ins_next_
-| ->ins_next:
-| ins_NEXT
-| .endmacro
-|.endif
-|
-|// Call decode and dispatch.
-|.macro ins_callt
-| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
-| lwz PC, LFUNC:RB->pc
-| lwz INS, 0(PC)
-| addi PC, PC, 4
-| decode_OP4 TMP1, INS
-| decode_RA8 RA, INS
-| lwzx TMP0, DISPATCH, TMP1
-| add RA, RA, BASE
-| mtctr TMP0
-| bctr
-|.endmacro
-|
-|.macro ins_call
-| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC
-| stw PC, FRAME_PC(BASE)
-| ins_callt
-|.endmacro
-|
-|//-----------------------------------------------------------------------
-|
-|// Macros to test operand types.
-|.macro checknum, reg; evcmpltu reg, TISNUM; .endmacro
-|.macro checkstr, reg; evcmpeq reg, TISSTR; .endmacro
-|.macro checktab, reg; evcmpeq reg, TISTAB; .endmacro
-|.macro checkfunc, reg; evcmpeq reg, TISFUNC; .endmacro
-|.macro checknil, reg; evcmpeq reg, TISNIL; .endmacro
-|.macro checkok, label; blt label; .endmacro
-|.macro checkfail, label; bge label; .endmacro
-|.macro checkanyfail, label; bns label; .endmacro
-|.macro checkallok, label; bso label; .endmacro
-|
-|.macro branch_RD
-| srwi TMP0, RD, 1
-| add PC, PC, TMP0
-| addis PC, PC, -(BCBIAS_J*4 >> 16)
-|.endmacro
-|
-|// Assumes DISPATCH is relative to GL.
-#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
-#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
-|
-#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
-|
-|.macro hotloop
-| NYI
-|.endmacro
-|
-|.macro hotcall
-| NYI
-|.endmacro
-|
-|// Set current VM state. Uses TMP0.
-|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro
-|.macro st_vmstate; stw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro
-|
-|// Move table write barrier back. Overwrites mark and tmp.
-|.macro barrierback, tab, mark, tmp
-| lwz tmp, DISPATCH_GL(gc.grayagain)(DISPATCH)
-| // Assumes LJ_GC_BLACK is 0x04.
-| rlwinm mark, mark, 0, 30, 28 // black2gray(tab)
-| stw tab, DISPATCH_GL(gc.grayagain)(DISPATCH)
-| stb mark, tab->marked
-| stw tmp, tab->gclist
-|.endmacro
-|
-|//-----------------------------------------------------------------------
-
-/* Generate subroutines used by opcodes and other parts of the VM. */
-/* The .code_sub section should be last to help static branch prediction. */
-static void build_subroutines(BuildCtx *ctx)
-{
- |.code_sub
- |
- |//-----------------------------------------------------------------------
- |//-- Return handling ----------------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |->vm_returnp:
- | // See vm_return. Also: TMP2 = previous base.
- | andi. TMP0, PC, FRAME_P
- | evsplati TMP1, LJ_TTRUE
- | beq ->cont_dispatch
- |
- | // Return from pcall or xpcall fast func.
- | lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame.
- | mr BASE, TMP2 // Restore caller base.
- | // Prepending may overwrite the pcall frame, so do it at the end.
- | stwu TMP1, FRAME_PC(RA) // Prepend true to results.
- |
- |->vm_returnc:
- | addi RD, RD, 8 // RD = (nresults+1)*8.
- | andi. TMP0, PC, FRAME_TYPE
- | cmpwi cr1, RD, 0
- | li CRET1, LUA_YIELD
- | beq cr1, ->vm_unwind_c_eh
- | mr MULTRES, RD
- | beq ->BC_RET_Z // Handle regular return to Lua.
- |
- |->vm_return:
- | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return
- | // TMP0 = PC & FRAME_TYPE
- | cmpwi TMP0, FRAME_C
- | rlwinm TMP2, PC, 0, 0, 28
- | li_vmstate C
- | sub TMP2, BASE, TMP2 // TMP2 = previous base.
- | bne ->vm_returnp
- |
- | addic. TMP1, RD, -8
- | stw TMP2, L->base
- | lwz TMP2, SAVE_NRES
- | subi BASE, BASE, 8
- | st_vmstate
- | slwi TMP2, TMP2, 3
- | beq >2
- |1:
- | addic. TMP1, TMP1, -8
- | evldd TMP0, 0(RA)
- | addi RA, RA, 8
- | evstdd TMP0, 0(BASE)
- | addi BASE, BASE, 8
- | bne <1
- |
- |2:
- | cmpw TMP2, RD // More/less results wanted?
- | bne >6
- |3:
- | stw BASE, L->top // Store new top.
- |
- |->vm_leave_cp:
- | lwz TMP0, SAVE_CFRAME // Restore previous C frame.
- | li CRET1, 0 // Ok return status for vm_pcall.
- | stw TMP0, L->cframe
- |
- |->vm_leave_unw:
- | restoreregs
- | blr
- |
- |6:
- | ble >7 // Less results wanted?
- | // More results wanted. Check stack size and fill up results with nil.
- | lwz TMP1, L->maxstack
- | cmplw BASE, TMP1
- | bge >8
- | evstdd TISNIL, 0(BASE)
- | addi RD, RD, 8
- | addi BASE, BASE, 8
- | b <2
- |
- |7: // Less results wanted.
- | sub TMP0, RD, TMP2
- | cmpwi TMP2, 0 // LUA_MULTRET+1 case?
- | sub TMP0, BASE, TMP0 // Subtract the difference.
- | iseleq BASE, BASE, TMP0 // Either keep top or shrink it.
- | b <3
- |
- |8: // Corner case: need to grow stack for filling up results.
- | // This can happen if:
- | // - A C function grows the stack (a lot).
- | // - The GC shrinks the stack in between.
- | // - A return back from a lua_call() with (high) nresults adjustment.
- | stw BASE, L->top // Save current top held in BASE (yes).
- | mr SAVE0, RD
- | mr CARG2, TMP2
- | mr CARG1, L
- | bl extern lj_state_growstack // (lua_State *L, int n)
- | lwz TMP2, SAVE_NRES
- | mr RD, SAVE0
- | slwi TMP2, TMP2, 3
- | lwz BASE, L->top // Need the (realloced) L->top in BASE.
- | b <2
- |
- |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
- | // (void *cframe, int errcode)
- | mr sp, CARG1
- | mr CRET1, CARG2
- |->vm_unwind_c_eh: // Landing pad for external unwinder.
- | lwz L, SAVE_L
- | li TMP0, ~LJ_VMST_C
- | lwz GL:TMP1, L->glref
- | stw TMP0, GL:TMP1->vmstate
- | b ->vm_leave_unw
- |
- |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
- | // (void *cframe)
- | rlwinm sp, CARG1, 0, 0, 29
- |->vm_unwind_ff_eh: // Landing pad for external unwinder.
- | lwz L, SAVE_L
- | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants.
- | evsplati TISFUNC, LJ_TFUNC
- | lus TOBIT, 0x4338
- | evsplati TISTAB, LJ_TTAB
- | li TMP0, 0
- | lwz BASE, L->base
- | evmergelo TOBIT, TOBIT, TMP0
- | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
- | evsplati TISSTR, LJ_TSTR
- | li TMP1, LJ_TFALSE
- | evsplati TISNIL, LJ_TNIL
- | li_vmstate INTERP
- | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame.
- | la RA, -8(BASE) // Results start at BASE-8.
- | addi DISPATCH, DISPATCH, GG_G2DISP
- | stw TMP1, 0(RA) // Prepend false to error message.
- | li RD, 16 // 2 results: false + error message.
- | st_vmstate
- | b ->vm_returnc
- |
- |//-----------------------------------------------------------------------
- |//-- Grow stack for calls -----------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |->vm_growstack_c: // Grow stack for C function.
- | li CARG2, LUA_MINSTACK
- | b >2
- |
- |->vm_growstack_l: // Grow stack for Lua function.
- | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC
- | add RC, BASE, RC
- | sub RA, RA, BASE
- | stw BASE, L->base
- | addi PC, PC, 4 // Must point after first instruction.
- | stw RC, L->top
- | srwi CARG2, RA, 3
- |2:
- | // L->base = new base, L->top = top
- | stw PC, SAVE_PC
- | mr CARG1, L
- | bl extern lj_state_growstack // (lua_State *L, int n)
- | lwz BASE, L->base
- | lwz RC, L->top
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | sub RC, RC, BASE
- | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC
- | ins_callt // Just retry the call.
- |
- |//-----------------------------------------------------------------------
- |//-- Entry points into the assembler VM ---------------------------------
- |//-----------------------------------------------------------------------
- |
- |->vm_resume: // Setup C frame and resume thread.
- | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
- | saveregs
- | mr L, CARG1
- | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
- | mr BASE, CARG2
- | lbz TMP1, L->status
- | stw L, SAVE_L
- | li PC, FRAME_CP
- | addi TMP0, sp, CFRAME_RESUME
- | addi DISPATCH, DISPATCH, GG_G2DISP
- | stw CARG3, SAVE_NRES
- | cmplwi TMP1, 0
- | stw CARG3, SAVE_ERRF
- | stw TMP0, L->cframe
- | stw CARG3, SAVE_CFRAME
- | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
- | beq >3
- |
- | // Resume after yield (like a return).
- | mr RA, BASE
- | lwz BASE, L->base
- | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants.
- | lwz TMP1, L->top
- | evsplati TISFUNC, LJ_TFUNC
- | lus TOBIT, 0x4338
- | evsplati TISTAB, LJ_TTAB
- | lwz PC, FRAME_PC(BASE)
- | li TMP2, 0
- | evsplati TISSTR, LJ_TSTR
- | sub RD, TMP1, BASE
- | evmergelo TOBIT, TOBIT, TMP2
- | stb CARG3, L->status
- | andi. TMP0, PC, FRAME_TYPE
- | li_vmstate INTERP
- | addi RD, RD, 8
- | evsplati TISNIL, LJ_TNIL
- | mr MULTRES, RD
- | st_vmstate
- | beq ->BC_RET_Z
- | b ->vm_return
- |
- |->vm_pcall: // Setup protected C frame and enter VM.
- | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
- | saveregs
- | li PC, FRAME_CP
- | stw CARG4, SAVE_ERRF
- | b >1
- |
- |->vm_call: // Setup C frame and enter VM.
- | // (lua_State *L, TValue *base, int nres1)
- | saveregs
- | li PC, FRAME_C
- |
- |1: // Entry point for vm_pcall above (PC = ftype).
- | lwz TMP1, L:CARG1->cframe
- | stw CARG3, SAVE_NRES
- | mr L, CARG1
- | stw CARG1, SAVE_L
- | mr BASE, CARG2
- | stw sp, L->cframe // Add our C frame to cframe chain.
- | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
- | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
- | stw TMP1, SAVE_CFRAME
- | addi DISPATCH, DISPATCH, GG_G2DISP
- |
- |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype).
- | lwz TMP2, L->base // TMP2 = old base (used in vmeta_call).
- | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants.
- | lwz TMP1, L->top
- | evsplati TISFUNC, LJ_TFUNC
- | add PC, PC, BASE
- | evsplati TISTAB, LJ_TTAB
- | lus TOBIT, 0x4338
- | li TMP0, 0
- | sub PC, PC, TMP2 // PC = frame delta + frame type
- | evsplati TISSTR, LJ_TSTR
- | sub NARGS8:RC, TMP1, BASE
- | evmergelo TOBIT, TOBIT, TMP0
- | li_vmstate INTERP
- | evsplati TISNIL, LJ_TNIL
- | st_vmstate
- |
- |->vm_call_dispatch:
- | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC
- | li TMP0, -8
- | evlddx LFUNC:RB, BASE, TMP0
- | checkfunc LFUNC:RB
- | checkfail ->vmeta_call
- |
- |->vm_call_dispatch_f:
- | ins_call
- | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC
- |
- |->vm_cpcall: // Setup protected C frame, call C.
- | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
- | saveregs
- | mr L, CARG1
- | lwz TMP0, L:CARG1->stack
- | stw CARG1, SAVE_L
- | lwz TMP1, L->top
- | stw CARG1, SAVE_PC // Any value outside of bytecode is ok.
- | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top).
- | lwz TMP1, L->cframe
- | stw sp, L->cframe // Add our C frame to cframe chain.
- | li TMP2, 0
- | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame.
- | stw TMP2, SAVE_ERRF // No error function.
- | stw TMP1, SAVE_CFRAME
- | mtctr CARG4
- | bctrl // (lua_State *L, lua_CFunction func, void *ud)
- | mr. BASE, CRET1
- | lwz DISPATCH, L->glref // Setup pointer to dispatch table.
- | li PC, FRAME_CP
- | addi DISPATCH, DISPATCH, GG_G2DISP
- | bne <3 // Else continue with the call.
- | b ->vm_leave_cp // No base? Just remove C frame.
- |
- |//-----------------------------------------------------------------------
- |//-- Metamethod handling ------------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the
- |// stack, so BASE doesn't need to be reloaded across these calls.
- |
- |//-- Continuation dispatch ----------------------------------------------
- |
- |->cont_dispatch:
- | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8
- | lwz TMP0, -12(BASE) // Continuation.
- | mr RB, BASE
- | mr BASE, TMP2 // Restore caller BASE.
- | lwz LFUNC:TMP1, FRAME_FUNC(TMP2)
- | cmplwi TMP0, 0
- | lwz PC, -16(RB) // Restore PC from [cont|PC].
- | beq >1
- | subi TMP2, RD, 8
- | lwz TMP1, LFUNC:TMP1->pc
- | evstddx TISNIL, RA, TMP2 // Ensure one valid arg.
- | lwz KBASE, PC2PROTO(k)(TMP1)
- | // BASE = base, RA = resultptr, RB = meta base
- | mtctr TMP0
- | bctr // Jump to continuation.
- |
- |1: // Tail call from C function.
- | subi TMP1, RB, 16
- | sub RC, TMP1, BASE
- | b ->vm_call_tail
- |
- |->cont_cat: // RA = resultptr, RB = meta base
- | lwz INS, -4(PC)
- | subi CARG2, RB, 16
- | decode_RB8 SAVE0, INS
- | evldd TMP0, 0(RA)
- | add TMP1, BASE, SAVE0
- | stw BASE, L->base
- | cmplw TMP1, CARG2
- | sub CARG3, CARG2, TMP1
- | decode_RA8 RA, INS
- | evstdd TMP0, 0(CARG2)
- | bne ->BC_CAT_Z
- | evstddx TMP0, BASE, RA
- | b ->cont_nop
- |
- |//-- Table indexing metamethods -----------------------------------------
- |
- |->vmeta_tgets1:
- | evmergelo STR:RC, TISSTR, STR:RC
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | decode_RB8 RB, INS
- | evstdd STR:RC, 0(CARG3)
- | add CARG2, BASE, RB
- | b >1
- |
- |->vmeta_tgets:
- | evmergelo TAB:RB, TISTAB, TAB:RB
- | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
- | evmergelo STR:RC, TISSTR, STR:RC
- | evstdd TAB:RB, 0(CARG2)
- | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
- | evstdd STR:RC, 0(CARG3)
- | b >1
- |
- |->vmeta_tgetb: // TMP0 = index
- | efdcfsi TMP0, TMP0
- | decode_RB8 RB, INS
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | add CARG2, BASE, RB
- | evstdd TMP0, 0(CARG3)
- | b >1
- |
- |->vmeta_tgetv:
- | decode_RB8 RB, INS
- | decode_RC8 RC, INS
- | add CARG2, BASE, RB
- | add CARG3, BASE, RC
- |1:
- | stw BASE, L->base
- | mr CARG1, L
- | stw PC, SAVE_PC
- | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
- | // Returns TValue * (finished) or NULL (metamethod).
- | cmplwi CRET1, 0
- | beq >3
- | evldd TMP0, 0(CRET1)
- | evstddx TMP0, BASE, RA
- | ins_next
- |
- |3: // Call __index metamethod.
- | // BASE = base, L->top = new base, stack = cont/func/t/k
- | subfic TMP1, BASE, FRAME_CONT
- | lwz BASE, L->top
- | stw PC, -16(BASE) // [cont|PC]
- | add PC, TMP1, BASE
- | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
- | li NARGS8:RC, 16 // 2 args for func(t, k).
- | b ->vm_call_dispatch_f
- |
- |//-----------------------------------------------------------------------
- |
- |->vmeta_tsets1:
- | evmergelo STR:RC, TISSTR, STR:RC
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | decode_RB8 RB, INS
- | evstdd STR:RC, 0(CARG3)
- | add CARG2, BASE, RB
- | b >1
- |
- |->vmeta_tsets:
- | evmergelo TAB:RB, TISTAB, TAB:RB
- | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
- | evmergelo STR:RC, TISSTR, STR:RC
- | evstdd TAB:RB, 0(CARG2)
- | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH)
- | evstdd STR:RC, 0(CARG3)
- | b >1
- |
- |->vmeta_tsetb: // TMP0 = index
- | efdcfsi TMP0, TMP0
- | decode_RB8 RB, INS
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | add CARG2, BASE, RB
- | evstdd TMP0, 0(CARG3)
- | b >1
- |
- |->vmeta_tsetv:
- | decode_RB8 RB, INS
- | decode_RC8 RC, INS
- | add CARG2, BASE, RB
- | add CARG3, BASE, RC
- |1:
- | stw BASE, L->base
- | mr CARG1, L
- | stw PC, SAVE_PC
- | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
- | // Returns TValue * (finished) or NULL (metamethod).
- | cmplwi CRET1, 0
- | evlddx TMP0, BASE, RA
- | beq >3
- | // NOBARRIER: lj_meta_tset ensures the table is not black.
- | evstdd TMP0, 0(CRET1)
- | ins_next
- |
- |3: // Call __newindex metamethod.
- | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
- | subfic TMP1, BASE, FRAME_CONT
- | lwz BASE, L->top
- | stw PC, -16(BASE) // [cont|PC]
- | add PC, TMP1, BASE
- | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
- | li NARGS8:RC, 24 // 3 args for func(t, k, v)
- | evstdd TMP0, 16(BASE) // Copy value to third argument.
- | b ->vm_call_dispatch_f
- |
- |//-- Comparison metamethods ---------------------------------------------
- |
- |->vmeta_comp:
- | mr CARG1, L
- | subi PC, PC, 4
- | add CARG2, BASE, RA
- | stw PC, SAVE_PC
- | add CARG3, BASE, RD
- | stw BASE, L->base
- | decode_OP1 CARG4, INS
- | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
- | // Returns 0/1 or TValue * (metamethod).
- |3:
- | cmplwi CRET1, 1
- | bgt ->vmeta_binop
- |4:
- | lwz INS, 0(PC)
- | addi PC, PC, 4
- | decode_RD4 TMP2, INS
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
- | add TMP2, TMP2, TMP3
- | isellt PC, PC, TMP2
- |->cont_nop:
- | ins_next
- |
- |->cont_ra: // RA = resultptr
- | lwz INS, -4(PC)
- | evldd TMP0, 0(RA)
- | decode_RA8 TMP1, INS
- | evstddx TMP0, BASE, TMP1
- | b ->cont_nop
- |
- |->cont_condt: // RA = resultptr
- | lwz TMP0, 0(RA)
- | li TMP1, LJ_TTRUE
- | cmplw TMP1, TMP0 // Branch if result is true.
- | b <4
- |
- |->cont_condf: // RA = resultptr
- | lwz TMP0, 0(RA)
- | li TMP1, LJ_TFALSE
- | cmplw TMP0, TMP1 // Branch if result is false.
- | b <4
- |
- |->vmeta_equal:
- | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV.
- | subi PC, PC, 4
- | stw BASE, L->base
- | mr CARG1, L
- | stw PC, SAVE_PC
- | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
- | // Returns 0/1 or TValue * (metamethod).
- | b <3
- |
- |//-- Arithmetic metamethods ---------------------------------------------
- |
- |->vmeta_arith_vn:
- | add CARG3, BASE, RB
- | add CARG4, KBASE, RC
- | b >1
- |
- |->vmeta_arith_nv:
- | add CARG3, KBASE, RC
- | add CARG4, BASE, RB
- | b >1
- |
- |->vmeta_unm:
- | add CARG3, BASE, RD
- | mr CARG4, CARG3
- | b >1
- |
- |->vmeta_arith_vv:
- | add CARG3, BASE, RB
- | add CARG4, BASE, RC
- |1:
- | add CARG2, BASE, RA
- | stw BASE, L->base
- | mr CARG1, L
- | stw PC, SAVE_PC
- | decode_OP1 CARG5, INS // Caveat: CARG5 overlaps INS.
- | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
- | // Returns NULL (finished) or TValue * (metamethod).
- | cmplwi CRET1, 0
- | beq ->cont_nop
- |
- | // Call metamethod for binary op.
- |->vmeta_binop:
- | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2
- | sub TMP1, CRET1, BASE
- | stw PC, -16(CRET1) // [cont|PC]
- | mr TMP2, BASE
- | addi PC, TMP1, FRAME_CONT
- | mr BASE, CRET1
- | li NARGS8:RC, 16 // 2 args for func(o1, o2).
- | b ->vm_call_dispatch
- |
- |->vmeta_len:
-#if LJ_52
- | mr SAVE0, CARG1
-#endif
- | add CARG2, BASE, RD
- | stw BASE, L->base
- | mr CARG1, L
- | stw PC, SAVE_PC
- | bl extern lj_meta_len // (lua_State *L, TValue *o)
- | // Returns NULL (retry) or TValue * (metamethod base).
-#if LJ_52
- | cmplwi CRET1, 0
- | bne ->vmeta_binop // Binop call for compatibility.
- | mr CARG1, SAVE0
- | b ->BC_LEN_Z
-#else
- | b ->vmeta_binop // Binop call for compatibility.
-#endif
- |
- |//-- Call metamethod ----------------------------------------------------
- |
- |->vmeta_call: // Resolve and call __call metamethod.
- | // TMP2 = old base, BASE = new base, RC = nargs*8
- | mr CARG1, L
- | stw TMP2, L->base // This is the callers base!
- | subi CARG2, BASE, 8
- | stw PC, SAVE_PC
- | add CARG3, BASE, RC
- | mr SAVE0, NARGS8:RC
- | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
- | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here.
- | addi NARGS8:RC, SAVE0, 8 // Got one more argument now.
- | ins_call
- |
- |->vmeta_callt: // Resolve __call for BC_CALLT.
- | // BASE = old base, RA = new base, RC = nargs*8
- | mr CARG1, L
- | stw BASE, L->base
- | subi CARG2, RA, 8
- | stw PC, SAVE_PC
- | add CARG3, RA, RC
- | mr SAVE0, NARGS8:RC
- | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
- | lwz TMP1, FRAME_PC(BASE)
- | addi NARGS8:RC, SAVE0, 8 // Got one more argument now.
- | lwz LFUNC:RB, FRAME_FUNC(RA) // Guaranteed to be a function here.
- | b ->BC_CALLT_Z
- |
- |//-- Argument coercion for 'for' statement ------------------------------
- |
- |->vmeta_for:
- | mr CARG1, L
- | stw BASE, L->base
- | mr CARG2, RA
- | stw PC, SAVE_PC
- | mr SAVE0, INS
- | bl extern lj_meta_for // (lua_State *L, TValue *base)
- |.if JIT
- | decode_OP1 TMP0, SAVE0
- |.endif
- | decode_RA8 RA, SAVE0
- |.if JIT
- | cmpwi TMP0, BC_JFORI
- |.endif
- | decode_RD8 RD, SAVE0
- |.if JIT
- | beq =>BC_JFORI
- |.endif
- | b =>BC_FORI
- |
- |//-----------------------------------------------------------------------
- |//-- Fast functions -----------------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |.macro .ffunc, name
- |->ff_ .. name:
- |.endmacro
- |
- |.macro .ffunc_1, name
- |->ff_ .. name:
- | cmplwi NARGS8:RC, 8
- | evldd CARG1, 0(BASE)
- | blt ->fff_fallback
- |.endmacro
- |
- |.macro .ffunc_2, name
- |->ff_ .. name:
- | cmplwi NARGS8:RC, 16
- | evldd CARG1, 0(BASE)
- | evldd CARG2, 8(BASE)
- | blt ->fff_fallback
- |.endmacro
- |
- |.macro .ffunc_n, name
- | .ffunc_1 name
- | checknum CARG1
- | checkfail ->fff_fallback
- |.endmacro
- |
- |.macro .ffunc_nn, name
- | .ffunc_2 name
- | evmergehi TMP0, CARG1, CARG2
- | checknum TMP0
- | checkanyfail ->fff_fallback
- |.endmacro
- |
- |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1.
- |.macro ffgccheck
- | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH)
- | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
- | cmplw TMP0, TMP1
- | bgel ->fff_gcstep
- |.endmacro
- |
- |//-- Base library: checks -----------------------------------------------
- |
- |.ffunc assert
- | cmplwi NARGS8:RC, 8
- | evldd TMP0, 0(BASE)
- | blt ->fff_fallback
- | evaddw TMP1, TISNIL, TISNIL // Synthesize LJ_TFALSE.
- | la RA, -8(BASE)
- | evcmpltu cr1, TMP0, TMP1
- | lwz PC, FRAME_PC(BASE)
- | bge cr1, ->fff_fallback
- | evstdd TMP0, 0(RA)
- | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8.
- | beq ->fff_res // Done if exactly 1 argument.
- | li TMP1, 8
- | subi RC, RC, 8
- |1:
- | cmplw TMP1, RC
- | evlddx TMP0, BASE, TMP1
- | evstddx TMP0, RA, TMP1
- | addi TMP1, TMP1, 8
- | bne <1
- | b ->fff_res
- |
- |.ffunc type
- | cmplwi NARGS8:RC, 8
- | lwz CARG1, 0(BASE)
- | blt ->fff_fallback
- | li TMP2, ~LJ_TNUMX
- | cmplw CARG1, TISNUM
- | not TMP1, CARG1
- | isellt TMP1, TMP2, TMP1
- | slwi TMP1, TMP1, 3
- | la TMP2, CFUNC:RB->upvalue
- | evlddx STR:CRET1, TMP2, TMP1
- | b ->fff_restv
- |
- |//-- Base library: getters and setters ---------------------------------
- |
- |.ffunc_1 getmetatable
- | checktab CARG1
- | evmergehi TMP1, CARG1, CARG1
- | checkfail >6
- |1: // Field metatable must be at same offset for GCtab and GCudata!
- | lwz TAB:RB, TAB:CARG1->metatable
- |2:
- | evmr CRET1, TISNIL
- | cmplwi TAB:RB, 0
- | lwz STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH)
- | beq ->fff_restv
- | lwz TMP0, TAB:RB->hmask
- | evmergelo CRET1, TISTAB, TAB:RB // Use metatable as default result.
- | lwz TMP1, STR:RC->hash
- | lwz NODE:TMP2, TAB:RB->node
- | evmergelo STR:RC, TISSTR, STR:RC
- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
- | slwi TMP0, TMP1, 5
- | slwi TMP1, TMP1, 3
- | sub TMP1, TMP0, TMP1
- | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
- |3: // Rearranged logic, because we expect _not_ to find the key.
- | evldd TMP0, NODE:TMP2->key
- | evldd TMP1, NODE:TMP2->val
- | evcmpeq TMP0, STR:RC
- | lwz NODE:TMP2, NODE:TMP2->next
- | checkallok >5
- | cmplwi NODE:TMP2, 0
- | beq ->fff_restv // Not found, keep default result.
- | b <3
- |5:
- | checknil TMP1
- | checkok ->fff_restv // Ditto for nil value.
- | evmr CRET1, TMP1 // Return value of mt.__metatable.
- | b ->fff_restv
- |
- |6:
- | cmpwi TMP1, LJ_TUDATA
- | not TMP1, TMP1
- | beq <1
- | checknum CARG1
- | slwi TMP1, TMP1, 2
- | li TMP2, 4*~LJ_TNUMX
- | isellt TMP1, TMP2, TMP1
- | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH)
- | lwzx TAB:RB, TMP2, TMP1
- | b <2
- |
- |.ffunc_2 setmetatable
- | // Fast path: no mt for table yet and not clearing the mt.
- | evmergehi TMP0, TAB:CARG1, TAB:CARG2
- | checktab TMP0
- | checkanyfail ->fff_fallback
- | lwz TAB:TMP1, TAB:CARG1->metatable
- | cmplwi TAB:TMP1, 0
- | lbz TMP3, TAB:CARG1->marked
- | bne ->fff_fallback
- | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
- | stw TAB:CARG2, TAB:CARG1->metatable
- | beq ->fff_restv
- | barrierback TAB:CARG1, TMP3, TMP0
- | b ->fff_restv
- |
- |.ffunc rawget
- | cmplwi NARGS8:RC, 16
- | evldd CARG2, 0(BASE)
- | blt ->fff_fallback
- | checktab CARG2
- | la CARG3, 8(BASE)
- | checkfail ->fff_fallback
- | mr CARG1, L
- | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
- | // Returns cTValue *.
- | evldd CRET1, 0(CRET1)
- | b ->fff_restv
- |
- |//-- Base library: conversions ------------------------------------------
- |
- |.ffunc tonumber
- | // Only handles the number case inline (without a base argument).
- | cmplwi NARGS8:RC, 8
- | evldd CARG1, 0(BASE)
- | bne ->fff_fallback // Exactly one argument.
- | checknum CARG1
- | checkok ->fff_restv
- | b ->fff_fallback
- |
- |.ffunc_1 tostring
- | // Only handles the string or number case inline.
- | checkstr CARG1
- | // A __tostring method in the string base metatable is ignored.
- | checkok ->fff_restv // String key?
- | // Handle numbers inline, unless a number base metatable is present.
- | lwz TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH)
- | checknum CARG1
- | cmplwi cr1, TMP0, 0
- | stw BASE, L->base // Add frame since C call can throw.
- | crand 4*cr0+eq, 4*cr0+lt, 4*cr1+eq
- | stw PC, SAVE_PC // Redundant (but a defined value).
- | bne ->fff_fallback
- | ffgccheck
- | mr CARG1, L
- | mr CARG2, BASE
- | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np)
- | // Returns GCstr *.
- | evmergelo STR:CRET1, TISSTR, STR:CRET1
- | b ->fff_restv
- |
- |//-- Base library: iterators -------------------------------------------
- |
- |.ffunc next
- | cmplwi NARGS8:RC, 8
- | evldd CARG2, 0(BASE)
- | blt ->fff_fallback
- | evstddx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil.
- | checktab TAB:CARG2
- | lwz PC, FRAME_PC(BASE)
- | checkfail ->fff_fallback
- | stw BASE, L->base // Add frame since C call can throw.
- | mr CARG1, L
- | stw BASE, L->top // Dummy frame length is ok.
- | la CARG3, 8(BASE)
- | stw PC, SAVE_PC
- | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
- | // Returns 0 at end of traversal.
- | cmplwi CRET1, 0
- | evmr CRET1, TISNIL
- | beq ->fff_restv // End of traversal: return nil.
- | evldd TMP0, 8(BASE) // Copy key and value to results.
- | la RA, -8(BASE)
- | evldd TMP1, 16(BASE)
- | evstdd TMP0, 0(RA)
- | li RD, (2+1)*8
- | evstdd TMP1, 8(RA)
- | b ->fff_res
- |
- |.ffunc_1 pairs
- | checktab TAB:CARG1
- | lwz PC, FRAME_PC(BASE)
- | checkfail ->fff_fallback
-#if LJ_52
- | lwz TAB:TMP2, TAB:CARG1->metatable
- | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
- | cmplwi TAB:TMP2, 0
- | la RA, -8(BASE)
- | bne ->fff_fallback
-#else
- | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
- | la RA, -8(BASE)
-#endif
- | evstdd TISNIL, 8(BASE)
- | li RD, (3+1)*8
- | evstdd CFUNC:TMP0, 0(RA)
- | b ->fff_res
- |
- |.ffunc_2 ipairs_aux
- | checktab TAB:CARG1
- | lwz PC, FRAME_PC(BASE)
- | checkfail ->fff_fallback
- | checknum CARG2
- | lus TMP3, 0x3ff0
- | checkfail ->fff_fallback
- | efdctsi TMP2, CARG2
- | lwz TMP0, TAB:CARG1->asize
- | evmergelo TMP3, TMP3, ZERO
- | lwz TMP1, TAB:CARG1->array
- | efdadd CARG2, CARG2, TMP3
- | addi TMP2, TMP2, 1
- | la RA, -8(BASE)
- | cmplw TMP0, TMP2
- | slwi TMP3, TMP2, 3
- | evstdd CARG2, 0(RA)
- | ble >2 // Not in array part?
- | evlddx TMP1, TMP1, TMP3
- |1:
- | checknil TMP1
- | li RD, (0+1)*8
- | checkok ->fff_res // End of iteration, return 0 results.
- | li RD, (2+1)*8
- | evstdd TMP1, 8(RA)
- | b ->fff_res
- |2: // Check for empty hash part first. Otherwise call C function.
- | lwz TMP0, TAB:CARG1->hmask
- | cmplwi TMP0, 0
- | li RD, (0+1)*8
- | beq ->fff_res
- | mr CARG2, TMP2
- | bl extern lj_tab_getinth // (GCtab *t, int32_t key)
- | // Returns cTValue * or NULL.
- | cmplwi CRET1, 0
- | li RD, (0+1)*8
- | beq ->fff_res
- | evldd TMP1, 0(CRET1)
- | b <1
- |
- |.ffunc_1 ipairs
- | checktab TAB:CARG1
- | lwz PC, FRAME_PC(BASE)
- | checkfail ->fff_fallback
-#if LJ_52
- | lwz TAB:TMP2, TAB:CARG1->metatable
- | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
- | cmplwi TAB:TMP2, 0
- | la RA, -8(BASE)
- | bne ->fff_fallback
-#else
- | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0]
- | la RA, -8(BASE)
-#endif
- | evsplati TMP1, 0
- | li RD, (3+1)*8
- | evstdd TMP1, 8(BASE)
- | evstdd CFUNC:TMP0, 0(RA)
- | b ->fff_res
- |
- |//-- Base library: catch errors ----------------------------------------
- |
- |.ffunc pcall
- | cmplwi NARGS8:RC, 8
- | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
- | blt ->fff_fallback
- | mr TMP2, BASE
- | la BASE, 8(BASE)
- | // Remember active hook before pcall.
- | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31
- | subi NARGS8:RC, NARGS8:RC, 8
- | addi PC, TMP3, 8+FRAME_PCALL
- | b ->vm_call_dispatch
- |
- |.ffunc_2 xpcall
- | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
- | mr TMP2, BASE
- | checkfunc CARG2 // Traceback must be a function.
- | checkfail ->fff_fallback
- | la BASE, 16(BASE)
- | // Remember active hook before pcall.
- | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31
- | evstdd CARG2, 0(TMP2) // Swap function and traceback.
- | subi NARGS8:RC, NARGS8:RC, 16
- | evstdd CARG1, 8(TMP2)
- | addi PC, TMP3, 16+FRAME_PCALL
- | b ->vm_call_dispatch
- |
- |//-- Coroutine library --------------------------------------------------
- |
- |.macro coroutine_resume_wrap, resume
- |.if resume
- |.ffunc_1 coroutine_resume
- | evmergehi TMP0, L:CARG1, L:CARG1
- |.else
- |.ffunc coroutine_wrap_aux
- | lwz L:CARG1, CFUNC:RB->upvalue[0].gcr
- |.endif
- |.if resume
- | cmpwi TMP0, LJ_TTHREAD
- | bne ->fff_fallback
- |.endif
- | lbz TMP0, L:CARG1->status
- | lwz TMP1, L:CARG1->cframe
- | lwz CARG2, L:CARG1->top
- | cmplwi cr0, TMP0, LUA_YIELD
- | lwz TMP2, L:CARG1->base
- | cmplwi cr1, TMP1, 0
- | lwz TMP0, L:CARG1->maxstack
- | cmplw cr7, CARG2, TMP2
- | lwz PC, FRAME_PC(BASE)
- | crorc 4*cr6+lt, 4*cr0+gt, 4*cr1+eq // st>LUA_YIELD || cframe!=0
- | add TMP2, CARG2, NARGS8:RC
- | crandc 4*cr6+gt, 4*cr7+eq, 4*cr0+eq // base==top && st!=LUA_YIELD
- | cmplw cr1, TMP2, TMP0
- | cror 4*cr6+lt, 4*cr6+lt, 4*cr6+gt
- | stw PC, SAVE_PC
- | cror 4*cr6+lt, 4*cr6+lt, 4*cr1+gt // cond1 || cond2 || stackov
- | stw BASE, L->base
- | blt cr6, ->fff_fallback
- |1:
- |.if resume
- | addi BASE, BASE, 8 // Keep resumed thread in stack for GC.
- | subi NARGS8:RC, NARGS8:RC, 8
- | subi TMP2, TMP2, 8
- |.endif
- | stw TMP2, L:CARG1->top
- | li TMP1, 0
- | stw BASE, L->top
- |2: // Move args to coroutine.
- | cmpw TMP1, NARGS8:RC
- | evlddx TMP0, BASE, TMP1
- | beq >3
- | evstddx TMP0, CARG2, TMP1
- | addi TMP1, TMP1, 8
- | b <2
- |3:
- | li CARG3, 0
- | mr L:SAVE0, L:CARG1
- | li CARG4, 0
- | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0)
- | // Returns thread status.
- |4:
- | lwz TMP2, L:SAVE0->base
- | cmplwi CRET1, LUA_YIELD
- | lwz TMP3, L:SAVE0->top
- | li_vmstate INTERP
- | lwz BASE, L->base
- | st_vmstate
- | bgt >8
- | sub RD, TMP3, TMP2
- | lwz TMP0, L->maxstack
- | cmplwi RD, 0
- | add TMP1, BASE, RD
- | beq >6 // No results?
- | cmplw TMP1, TMP0
- | li TMP1, 0
- | bgt >9 // Need to grow stack?
- |
- | subi TMP3, RD, 8
- | stw TMP2, L:SAVE0->top // Clear coroutine stack.
- |5: // Move results from coroutine.
- | cmplw TMP1, TMP3
- | evlddx TMP0, TMP2, TMP1
- | evstddx TMP0, BASE, TMP1
- | addi TMP1, TMP1, 8
- | bne <5
- |6:
- | andi. TMP0, PC, FRAME_TYPE
- |.if resume
- | li TMP1, LJ_TTRUE
- | la RA, -8(BASE)
- | stw TMP1, -8(BASE) // Prepend true to results.
- | addi RD, RD, 16
- |.else
- | mr RA, BASE
- | addi RD, RD, 8
- |.endif
- |7:
- | stw PC, SAVE_PC
- | mr MULTRES, RD
- | beq ->BC_RET_Z
- | b ->vm_return
- |
- |8: // Coroutine returned with error (at co->top-1).
- |.if resume
- | andi. TMP0, PC, FRAME_TYPE
- | la TMP3, -8(TMP3)
- | li TMP1, LJ_TFALSE
- | evldd TMP0, 0(TMP3)
- | stw TMP3, L:SAVE0->top // Remove error from coroutine stack.
- | li RD, (2+1)*8
- | stw TMP1, -8(BASE) // Prepend false to results.
- | la RA, -8(BASE)
- | evstdd TMP0, 0(BASE) // Copy error message.
- | b <7
- |.else
- | mr CARG1, L
- | mr CARG2, L:SAVE0
- | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
- |.endif
- |
- |9: // Handle stack expansion on return from yield.
- | mr CARG1, L
- | srwi CARG2, RD, 3
- | bl extern lj_state_growstack // (lua_State *L, int n)
- | li CRET1, 0
- | b <4
- |.endmacro
- |
- | coroutine_resume_wrap 1 // coroutine.resume
- | coroutine_resume_wrap 0 // coroutine.wrap
- |
- |.ffunc coroutine_yield
- | lwz TMP0, L->cframe
- | add TMP1, BASE, NARGS8:RC
- | stw BASE, L->base
- | andi. TMP0, TMP0, CFRAME_RESUME
- | stw TMP1, L->top
- | li CRET1, LUA_YIELD
- | beq ->fff_fallback
- | stw ZERO, L->cframe
- | stb CRET1, L->status
- | b ->vm_leave_unw
- |
- |//-- Math library -------------------------------------------------------
- |
- |.ffunc_n math_abs
- | efdabs CRET1, CARG1
- | // Fallthrough.
- |
- |->fff_restv:
- | // CRET1 = TValue result.
- | lwz PC, FRAME_PC(BASE)
- | la RA, -8(BASE)
- | evstdd CRET1, 0(RA)
- |->fff_res1:
- | // RA = results, PC = return.
- | li RD, (1+1)*8
- |->fff_res:
- | // RA = results, RD = (nresults+1)*8, PC = return.
- | andi. TMP0, PC, FRAME_TYPE
- | mr MULTRES, RD
- | bne ->vm_return
- | lwz INS, -4(PC)
- | decode_RB8 RB, INS
- |5:
- | cmplw RB, RD // More results expected?
- | decode_RA8 TMP0, INS
- | bgt >6
- | ins_next1
- | // Adjust BASE. KBASE is assumed to be set for the calling frame.
- | sub BASE, RA, TMP0
- | ins_next2
- |
- |6: // Fill up results with nil.
- | subi TMP1, RD, 8
- | addi RD, RD, 8
- | evstddx TISNIL, RA, TMP1
- | b <5
- |
- |.macro math_extern, func
- | .ffunc math_ .. func
- | cmplwi NARGS8:RC, 8
- | evldd CARG2, 0(BASE)
- | blt ->fff_fallback
- | checknum CARG2
- | evmergehi CARG1, CARG2, CARG2
- | checkfail ->fff_fallback
- | bl extern func@plt
- | evmergelo CRET1, CRET1, CRET2
- | b ->fff_restv
- |.endmacro
- |
- |.macro math_extern2, func
- | .ffunc math_ .. func
- | cmplwi NARGS8:RC, 16
- | evldd CARG2, 0(BASE)
- | evldd CARG4, 8(BASE)
- | blt ->fff_fallback
- | evmergehi CARG1, CARG4, CARG2
- | checknum CARG1
- | evmergehi CARG3, CARG4, CARG4
- | checkanyfail ->fff_fallback
- | bl extern func@plt
- | evmergelo CRET1, CRET1, CRET2
- | b ->fff_restv
- |.endmacro
- |
- |.macro math_round, func
- | .ffunc math_ .. func
- | cmplwi NARGS8:RC, 8
- | evldd CARG2, 0(BASE)
- | blt ->fff_fallback
- | checknum CARG2
- | evmergehi CARG1, CARG2, CARG2
- | checkfail ->fff_fallback
- | lwz PC, FRAME_PC(BASE)
- | bl ->vm_..func.._hilo;
- | la RA, -8(BASE)
- | evstdd CRET2, 0(RA)
- | b ->fff_res1
- |.endmacro
- |
- | math_round floor
- | math_round ceil
- |
- | math_extern sqrt
- |
- |.ffunc math_log
- | cmplwi NARGS8:RC, 8
- | evldd CARG2, 0(BASE)
- | bne ->fff_fallback // Need exactly 1 argument.
- | checknum CARG2
- | evmergehi CARG1, CARG2, CARG2
- | checkfail ->fff_fallback
- | bl extern log@plt
- | evmergelo CRET1, CRET1, CRET2
- | b ->fff_restv
- |
- | math_extern log10
- | math_extern exp
- | math_extern sin
- | math_extern cos
- | math_extern tan
- | math_extern asin
- | math_extern acos
- | math_extern atan
- | math_extern sinh
- | math_extern cosh
- | math_extern tanh
- | math_extern2 pow
- | math_extern2 atan2
- | math_extern2 fmod
- |
- |->ff_math_deg:
- |.ffunc_n math_rad
- | evldd CARG2, CFUNC:RB->upvalue[0]
- | efdmul CRET1, CARG1, CARG2
- | b ->fff_restv
- |
- |.ffunc math_ldexp
- | cmplwi NARGS8:RC, 16
- | evldd CARG2, 0(BASE)
- | evldd CARG4, 8(BASE)
- | blt ->fff_fallback
- | evmergehi CARG1, CARG4, CARG2
- | checknum CARG1
- | checkanyfail ->fff_fallback
- | efdctsi CARG3, CARG4
- | bl extern ldexp@plt
- | evmergelo CRET1, CRET1, CRET2
- | b ->fff_restv
- |
- |.ffunc math_frexp
- | cmplwi NARGS8:RC, 8
- | evldd CARG2, 0(BASE)
- | blt ->fff_fallback
- | checknum CARG2
- | evmergehi CARG1, CARG2, CARG2
- | checkfail ->fff_fallback
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | lwz PC, FRAME_PC(BASE)
- | bl extern frexp@plt
- | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH)
- | evmergelo CRET1, CRET1, CRET2
- | efdcfsi CRET2, TMP1
- | la RA, -8(BASE)
- | evstdd CRET1, 0(RA)
- | li RD, (2+1)*8
- | evstdd CRET2, 8(RA)
- | b ->fff_res
- |
- |.ffunc math_modf
- | cmplwi NARGS8:RC, 8
- | evldd CARG2, 0(BASE)
- | blt ->fff_fallback
- | checknum CARG2
- | evmergehi CARG1, CARG2, CARG2
- | checkfail ->fff_fallback
- | la CARG3, -8(BASE)
- | lwz PC, FRAME_PC(BASE)
- | bl extern modf@plt
- | evmergelo CRET1, CRET1, CRET2
- | la RA, -8(BASE)
- | evstdd CRET1, 0(BASE)
- | li RD, (2+1)*8
- | b ->fff_res
- |
- |.macro math_minmax, name, cmpop
- | .ffunc_1 name
- | checknum CARG1
- | li TMP1, 8
- | checkfail ->fff_fallback
- |1:
- | evlddx CARG2, BASE, TMP1
- | cmplw cr1, TMP1, NARGS8:RC
- | checknum CARG2
- | bge cr1, ->fff_restv // Ok, since CRET1 = CARG1.
- | checkfail ->fff_fallback
- | cmpop CARG2, CARG1
- | addi TMP1, TMP1, 8
- | crmove 4*cr0+lt, 4*cr0+gt
- | evsel CARG1, CARG2, CARG1
- | b <1
- |.endmacro
- |
- | math_minmax math_min, efdtstlt
- | math_minmax math_max, efdtstgt
- |
- |//-- String library -----------------------------------------------------
- |
- |.ffunc_1 string_len
- | checkstr STR:CARG1
- | checkfail ->fff_fallback
- | lwz TMP0, STR:CARG1->len
- | efdcfsi CRET1, TMP0
- | b ->fff_restv
- |
- |.ffunc string_byte // Only handle the 1-arg case here.
- | cmplwi NARGS8:RC, 8
- | evldd STR:CARG1, 0(BASE)
- | bne ->fff_fallback // Need exactly 1 argument.
- | checkstr STR:CARG1
- | la RA, -8(BASE)
- | checkfail ->fff_fallback
- | lwz TMP0, STR:CARG1->len
- | li RD, (0+1)*8
- | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end).
- | li TMP2, (1+1)*8
- | cmplwi TMP0, 0
- | lwz PC, FRAME_PC(BASE)
- | efdcfsi CRET1, TMP1
- | iseleq RD, RD, TMP2
- | evstdd CRET1, 0(RA)
- | b ->fff_res
- |
- |.ffunc string_char // Only handle the 1-arg case here.
- | ffgccheck
- | cmplwi NARGS8:RC, 8
- | evldd CARG1, 0(BASE)
- | bne ->fff_fallback // Exactly 1 argument.
- | checknum CARG1
- | la CARG2, DISPATCH_GL(tmptv)(DISPATCH)
- | checkfail ->fff_fallback
- | efdctsiz TMP0, CARG1
- | li CARG3, 1
- | cmplwi TMP0, 255
- | stb TMP0, 0(CARG2)
- | bgt ->fff_fallback
- |->fff_newstr:
- | mr CARG1, L
- | stw BASE, L->base
- | stw PC, SAVE_PC
- | bl extern lj_str_new // (lua_State *L, char *str, size_t l)
- | // Returns GCstr *.
- | lwz BASE, L->base
- | evmergelo STR:CRET1, TISSTR, STR:CRET1
- | b ->fff_restv
- |
- |.ffunc string_sub
- | ffgccheck
- | cmplwi NARGS8:RC, 16
- | evldd CARG3, 16(BASE)
- | evldd STR:CARG1, 0(BASE)
- | blt ->fff_fallback
- | evldd CARG2, 8(BASE)
- | li TMP2, -1
- | beq >1
- | checknum CARG3
- | checkfail ->fff_fallback
- | efdctsiz TMP2, CARG3
- |1:
- | checknum CARG2
- | checkfail ->fff_fallback
- | checkstr STR:CARG1
- | efdctsiz TMP1, CARG2
- | checkfail ->fff_fallback
- | lwz TMP0, STR:CARG1->len
- | cmplw TMP0, TMP2 // len < end? (unsigned compare)
- | add TMP3, TMP2, TMP0
- | blt >5
- |2:
- | cmpwi TMP1, 0 // start <= 0?
- | add TMP3, TMP1, TMP0
- | ble >7
- |3:
- | sub. CARG3, TMP2, TMP1
- | addi CARG2, STR:CARG1, #STR-1
- | addi CARG3, CARG3, 1
- | add CARG2, CARG2, TMP1
- | isellt CARG3, r0, CARG3
- | b ->fff_newstr
- |
- |5: // Negative end or overflow.
- | cmpw TMP0, TMP2
- | addi TMP3, TMP3, 1
- | iselgt TMP2, TMP3, TMP0 // end = end > len ? len : end+len+1
- | b <2
- |
- |7: // Negative start or underflow.
- | cmpwi cr1, TMP3, 0
- | iseleq TMP1, r0, TMP3
- | isel TMP1, r0, TMP1, 4*cr1+lt
- | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0)
- | b <3
- |
- |.ffunc string_rep // Only handle the 1-char case inline.
- | ffgccheck
- | cmplwi NARGS8:RC, 16
- | evldd CARG1, 0(BASE)
- | evldd CARG2, 8(BASE)
- | bne ->fff_fallback // Exactly 2 arguments.
- | checknum CARG2
- | checkfail ->fff_fallback
- | checkstr STR:CARG1
- | efdctsiz CARG3, CARG2
- | checkfail ->fff_fallback
- | lwz TMP0, STR:CARG1->len
- | cmpwi CARG3, 0
- | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
- | ble >2 // Count <= 0? (or non-int)
- | cmplwi TMP0, 1
- | subi TMP2, CARG3, 1
- | blt >2 // Zero length string?
- | cmplw cr1, TMP1, CARG3
- | bne ->fff_fallback // Fallback for > 1-char strings.
- | lbz TMP0, STR:CARG1[1]
- | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
- | blt cr1, ->fff_fallback
- |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
- | cmplwi TMP2, 0
- | stbx TMP0, CARG2, TMP2
- | subi TMP2, TMP2, 1
- | bne <1
- | b ->fff_newstr
- |2: // Return empty string.
- | la STR:CRET1, DISPATCH_GL(strempty)(DISPATCH)
- | evmergelo CRET1, TISSTR, STR:CRET1
- | b ->fff_restv
- |
- |.ffunc string_reverse
- | ffgccheck
- | cmplwi NARGS8:RC, 8
- | evldd CARG1, 0(BASE)
- | blt ->fff_fallback
- | checkstr STR:CARG1
- | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
- | checkfail ->fff_fallback
- | lwz CARG3, STR:CARG1->len
- | la CARG1, #STR(STR:CARG1)
- | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
- | li TMP2, 0
- | cmplw TMP1, CARG3
- | subi TMP3, CARG3, 1
- | blt ->fff_fallback
- |1: // Reverse string copy.
- | cmpwi TMP3, 0
- | lbzx TMP1, CARG1, TMP2
- | blt ->fff_newstr
- | stbx TMP1, CARG2, TMP3
- | subi TMP3, TMP3, 1
- | addi TMP2, TMP2, 1
- | b <1
- |
- |.macro ffstring_case, name, lo
- | .ffunc name
- | ffgccheck
- | cmplwi NARGS8:RC, 8
- | evldd CARG1, 0(BASE)
- | blt ->fff_fallback
- | checkstr STR:CARG1
- | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH)
- | checkfail ->fff_fallback
- | lwz CARG3, STR:CARG1->len
- | la CARG1, #STR(STR:CARG1)
- | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH)
- | cmplw TMP1, CARG3
- | li TMP2, 0
- | blt ->fff_fallback
- |1: // ASCII case conversion.
- | cmplw TMP2, CARG3
- | lbzx TMP1, CARG1, TMP2
- | bge ->fff_newstr
- | subi TMP0, TMP1, lo
- | xori TMP3, TMP1, 0x20
- | cmplwi TMP0, 26
- | isellt TMP1, TMP3, TMP1
- | stbx TMP1, CARG2, TMP2
- | addi TMP2, TMP2, 1
- | b <1
- |.endmacro
- |
- |ffstring_case string_lower, 65
- |ffstring_case string_upper, 97
- |
- |//-- Table library ------------------------------------------------------
- |
- |.ffunc_1 table_getn
- | checktab CARG1
- | checkfail ->fff_fallback
- | bl extern lj_tab_len // (GCtab *t)
- | // Returns uint32_t (but less than 2^31).
- | efdcfsi CRET1, CRET1
- | b ->fff_restv
- |
- |//-- Bit library --------------------------------------------------------
- |
- |.macro .ffunc_bit, name
- | .ffunc_n bit_..name
- | efdadd CARG1, CARG1, TOBIT
- |.endmacro
- |
- |.ffunc_bit tobit
- |->fff_resbit:
- | efdcfsi CRET1, CARG1
- | b ->fff_restv
- |
- |.macro .ffunc_bit_op, name, ins
- | .ffunc_bit name
- | li TMP1, 8
- |1:
- | evlddx CARG2, BASE, TMP1
- | cmplw cr1, TMP1, NARGS8:RC
- | checknum CARG2
- | bge cr1, ->fff_resbit
- | checkfail ->fff_fallback
- | efdadd CARG2, CARG2, TOBIT
- | ins CARG1, CARG1, CARG2
- | addi TMP1, TMP1, 8
- | b <1
- |.endmacro
- |
- |.ffunc_bit_op band, and
- |.ffunc_bit_op bor, or
- |.ffunc_bit_op bxor, xor
- |
- |.ffunc_bit bswap
- | rotlwi TMP0, CARG1, 8
- | rlwimi TMP0, CARG1, 24, 0, 7
- | rlwimi TMP0, CARG1, 24, 16, 23
- | efdcfsi CRET1, TMP0
- | b ->fff_restv
- |
- |.ffunc_bit bnot
- | not TMP0, CARG1
- | efdcfsi CRET1, TMP0
- | b ->fff_restv
- |
- |.macro .ffunc_bit_sh, name, ins, shmod
- | .ffunc_nn bit_..name
- | efdadd CARG2, CARG2, TOBIT
- | efdadd CARG1, CARG1, TOBIT
- |.if shmod == 1
- | rlwinm CARG2, CARG2, 0, 27, 31
- |.elif shmod == 2
- | neg CARG2, CARG2
- |.endif
- | ins TMP0, CARG1, CARG2
- | efdcfsi CRET1, TMP0
- | b ->fff_restv
- |.endmacro
- |
- |.ffunc_bit_sh lshift, slw, 1
- |.ffunc_bit_sh rshift, srw, 1
- |.ffunc_bit_sh arshift, sraw, 1
- |.ffunc_bit_sh rol, rotlw, 0
- |.ffunc_bit_sh ror, rotlw, 2
- |
- |//-----------------------------------------------------------------------
- |
- |->fff_fallback: // Call fast function fallback handler.
- | // BASE = new base, RB = CFUNC, RC = nargs*8
- | lwz TMP3, CFUNC:RB->f
- | add TMP1, BASE, NARGS8:RC
- | lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC.
- | addi TMP0, TMP1, 8*LUA_MINSTACK
- | lwz TMP2, L->maxstack
- | stw PC, SAVE_PC // Redundant (but a defined value).
- | cmplw TMP0, TMP2
- | stw BASE, L->base
- | stw TMP1, L->top
- | mr CARG1, L
- | bgt >5 // Need to grow stack.
- | mtctr TMP3
- | bctrl // (lua_State *L)
- | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
- | lwz BASE, L->base
- | cmpwi CRET1, 0
- | slwi RD, CRET1, 3
- | la RA, -8(BASE)
- | bgt ->fff_res // Returned nresults+1?
- |1: // Returned 0 or -1: retry fast path.
- | lwz TMP0, L->top
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | sub NARGS8:RC, TMP0, BASE
- | bne ->vm_call_tail // Returned -1?
- | ins_callt // Returned 0: retry fast path.
- |
- |// Reconstruct previous base for vmeta_call during tailcall.
- |->vm_call_tail:
- | andi. TMP0, PC, FRAME_TYPE
- | rlwinm TMP1, PC, 0, 0, 28
- | bne >3
- | lwz INS, -4(PC)
- | decode_RA8 TMP1, INS
- | addi TMP1, TMP1, 8
- |3:
- | sub TMP2, BASE, TMP1
- | b ->vm_call_dispatch // Resolve again for tailcall.
- |
- |5: // Grow stack for fallback handler.
- | li CARG2, LUA_MINSTACK
- | bl extern lj_state_growstack // (lua_State *L, int n)
- | lwz BASE, L->base
- | cmpw TMP0, TMP0 // Set 4*cr0+eq to force retry.
- | b <1
- |
- |->fff_gcstep: // Call GC step function.
- | // BASE = new base, RC = nargs*8
- | mflr SAVE0
- | stw BASE, L->base
- | add TMP0, BASE, NARGS8:RC
- | stw PC, SAVE_PC // Redundant (but a defined value).
- | stw TMP0, L->top
- | mr CARG1, L
- | bl extern lj_gc_step // (lua_State *L)
- | lwz BASE, L->base
- | mtlr SAVE0
- | lwz TMP0, L->top
- | sub NARGS8:RC, TMP0, BASE
- | lwz CFUNC:RB, FRAME_FUNC(BASE)
- | blr
- |
- |//-----------------------------------------------------------------------
- |//-- Special dispatch targets -------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |->vm_record: // Dispatch target for recording phase.
- |.if JIT
- | NYI
- |.endif
- |
- |->vm_rethook: // Dispatch target for return hooks.
- | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
- | andi. TMP0, TMP3, HOOK_ACTIVE // Hook already active?
- | beq >1
- |5: // Re-dispatch to static ins.
- | addi TMP1, TMP1, GG_DISP2STATIC // Assumes decode_OP4 TMP1, INS.
- | lwzx TMP0, DISPATCH, TMP1
- | mtctr TMP0
- | bctr
- |
- |->vm_inshook: // Dispatch target for instr/line hooks.
- | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH)
- | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH)
- | andi. TMP0, TMP3, HOOK_ACTIVE // Hook already active?
- | rlwinm TMP0, TMP3, 31-LUA_HOOKLINE, 31, 0
- | bne <5
- |
- | cmpwi cr1, TMP0, 0
- | addic. TMP2, TMP2, -1
- | beq cr1, <5
- | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH)
- | beq >1
- | bge cr1, <5
- |1:
- | mr CARG1, L
- | stw MULTRES, SAVE_MULTRES
- | mr CARG2, PC
- | stw BASE, L->base
- | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
- | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
- |3:
- | lwz BASE, L->base
- |4: // Re-dispatch to static ins.
- | lwz INS, -4(PC)
- | decode_OP4 TMP1, INS
- | decode_RB8 RB, INS
- | addi TMP1, TMP1, GG_DISP2STATIC
- | decode_RD8 RD, INS
- | lwzx TMP0, DISPATCH, TMP1
- | decode_RA8 RA, INS
- | decode_RC8 RC, INS
- | mtctr TMP0
- | bctr
- |
- |->cont_hook: // Continue from hook yield.
- | addi PC, PC, 4
- | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins.
- | b <4
- |
- |->vm_hotloop: // Hot loop counter underflow.
- |.if JIT
- | NYI
- |.endif
- |
- |->vm_callhook: // Dispatch target for call hooks.
- | mr CARG2, PC
- |.if JIT
- | b >1
- |.endif
- |
- |->vm_hotcall: // Hot call counter underflow.
- |.if JIT
- | ori CARG2, PC, 1
- |1:
- |.endif
- | add TMP0, BASE, RC
- | stw PC, SAVE_PC
- | mr CARG1, L
- | stw BASE, L->base
- | sub RA, RA, BASE
- | stw TMP0, L->top
- | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
- | // Returns ASMFunction.
- | lwz BASE, L->base
- | lwz TMP0, L->top
- | stw ZERO, SAVE_PC // Invalidate for subsequent line hook.
- | sub NARGS8:RC, TMP0, BASE
- | add RA, BASE, RA
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | mtctr CRET1
- | bctr
- |
- |//-----------------------------------------------------------------------
- |//-- Trace exit handler -------------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |->vm_exit_handler:
- |.if JIT
- | NYI
- |.endif
- |->vm_exit_interp:
- |.if JIT
- | NYI
- |.endif
- |
- |//-----------------------------------------------------------------------
- |//-- Math helper functions ----------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |// FP value rounding. Called by math.floor/math.ceil fast functions
- |// and from JIT code.
- |//
- |// This can be inlined if the CPU has the frin/friz/frip/frim instructions.
- |// The alternative hard-float approaches have a deep dependency chain.
- |// The resulting latency is at least 3x-7x the double-precision FP latency
- |// (e500v2: 6cy, e600: 5cy, Cell: 10cy) or around 20-70 cycles.
- |//
- |// The soft-float approach is tedious, but much faster (e500v2: ~11cy/~6cy).
- |// However it relies on a fast way to transfer the FP value to GPRs
- |// (e500v2: 0cy for lo-word, 1cy for hi-word).
- |//
- |.macro vm_round, name, mode
- | // Used temporaries: TMP0, TMP1, TMP2, TMP3.
- |->name.._efd: // Input: CARG2, output: CRET2
- | evmergehi CARG1, CARG2, CARG2
- |->name.._hilo:
- | // Input: CARG1 (hi), CARG2 (hi, lo), output: CRET2
- | rlwinm TMP2, CARG1, 12, 21, 31
- | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023
- | li TMP1, -1
- | cmplwi cr1, TMP2, 51 // 0 <= exp <= 51?
- | subfic TMP0, TMP2, 52
- | bgt cr1, >1
- | lus TMP3, 0xfff0
- | slw TMP0, TMP1, TMP0 // lomask = -1 << (52-exp)
- | sraw TMP1, TMP3, TMP2 // himask = (int32_t)0xfff00000 >> exp
- |.if mode == 2 // trunc(x):
- | evmergelo TMP0, TMP1, TMP0
- | evand CRET2, CARG2, TMP0 // hi &= himask, lo &= lomask
- |.else
- | andc TMP2, CARG2, TMP0
- | andc TMP3, CARG1, TMP1
- | or TMP2, TMP2, TMP3 // ztest = (hi&~himask) | (lo&~lomask)
- | srawi TMP3, CARG1, 31 // signmask = (int32_t)hi >> 31
- |.if mode == 0 // floor(x):
- | and. TMP2, TMP2, TMP3 // iszero = ((ztest & signmask) == 0)
- |.else // ceil(x):
- | andc. TMP2, TMP2, TMP3 // iszero = ((ztest & ~signmask) == 0)
- |.endif
- | and CARG2, CARG2, TMP0 // lo &= lomask
- | and CARG1, CARG1, TMP1 // hi &= himask
- | subc TMP0, CARG2, TMP0
- | iseleq TMP0, CARG2, TMP0 // lo = iszero ? lo : lo-lomask
- | sube TMP1, CARG1, TMP1
- | iseleq TMP1, CARG1, TMP1 // hi = iszero ? hi : hi-himask+carry
- | evmergelo CRET2, TMP1, TMP0
- |.endif
- | blr
- |1:
- | bgtlr // Already done if >=2^52, +-inf or nan.
- |.if mode == 2 // trunc(x):
- | rlwinm TMP1, CARG1, 0, 0, 0 // hi = sign(x)
- | li TMP0, 0
- | evmergelo CRET2, TMP1, TMP0
- |.else
- | rlwinm TMP2, CARG1, 0, 1, 31
- | srawi TMP0, CARG1, 31 // signmask = (int32_t)hi >> 31
- | or TMP2, TMP2, CARG2 // ztest = abs(hi) | lo
- | lus TMP1, 0x3ff0
- |.if mode == 0 // floor(x):
- | and. TMP2, TMP2, TMP0 // iszero = ((ztest & signmask) == 0)
- |.else // ceil(x):
- | andc. TMP2, TMP2, TMP0 // iszero = ((ztest & ~signmask) == 0)
- |.endif
- | li TMP0, 0
- | iseleq TMP1, r0, TMP1
- | rlwimi CARG1, TMP1, 0, 1, 31 // hi = sign(x) | (iszero ? 0.0 : 1.0)
- | evmergelo CRET2, CARG1, TMP0
- |.endif
- | blr
- |.endmacro
- |
- |->vm_floor:
- | mflr CARG3
- | evmergelo CARG2, CARG1, CARG2
- | bl ->vm_floor_hilo
- | mtlr CARG3
- | evmergehi CRET1, CRET2, CRET2
- | blr
- |
- | vm_round vm_floor, 0
- | vm_round vm_ceil, 1
- |.if JIT
- | vm_round vm_trunc, 2
- |.else
- |->vm_trunc_efd:
- |->vm_trunc_hilo:
- |.endif
- |
- |//-----------------------------------------------------------------------
- |//-- Miscellaneous functions --------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |//-----------------------------------------------------------------------
- |//-- FFI helper functions -----------------------------------------------
- |//-----------------------------------------------------------------------
- |
- |->vm_ffi_call:
- |.if FFI
- | NYI
- |.endif
- |
- |//-----------------------------------------------------------------------
-}
-
-/* Generate the code for a single instruction. */
-static void build_ins(BuildCtx *ctx, BCOp op, int defop)
-{
- int vk = 0;
- |=>defop:
-
- switch (op) {
-
- /* -- Comparison ops ---------------------------------------------------- */
-
- /* Remember: all ops branch for a true comparison, fall through otherwise. */
-
- case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
- | // RA = src1*8, RD = src2*8, JMP with RD = target
- | evlddx TMP0, BASE, RA
- | addi PC, PC, 4
- | evlddx TMP1, BASE, RD
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
- | lwz TMP2, -4(PC)
- | evmergehi RB, TMP0, TMP1
- | decode_RD4 TMP2, TMP2
- | checknum RB
- | add TMP2, TMP2, TMP3
- | checkanyfail ->vmeta_comp
- | efdcmplt TMP0, TMP1
- if (op == BC_ISLE || op == BC_ISGT) {
- | efdcmpeq cr1, TMP0, TMP1
- | cror 4*cr0+gt, 4*cr0+gt, 4*cr1+gt
- }
- if (op == BC_ISLT || op == BC_ISLE) {
- | iselgt PC, TMP2, PC
- } else {
- | iselgt PC, PC, TMP2
- }
- | ins_next
- break;
-
- case BC_ISEQV: case BC_ISNEV:
- vk = op == BC_ISEQV;
- | // RA = src1*8, RD = src2*8, JMP with RD = target
- | evlddx CARG2, BASE, RA
- | addi PC, PC, 4
- | evlddx CARG3, BASE, RD
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
- | lwz TMP2, -4(PC)
- | evmergehi RB, CARG2, CARG3
- | decode_RD4 TMP2, TMP2
- | checknum RB
- | add TMP2, TMP2, TMP3
- | checkanyfail >5
- | efdcmpeq CARG2, CARG3
- if (vk) {
- | iselgt PC, TMP2, PC
- } else {
- | iselgt PC, PC, TMP2
- }
- |1:
- | ins_next
- |
- |5: // Either or both types are not numbers.
- | evcmpeq CARG2, CARG3
- | not TMP3, RB
- | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive?
- | crorc 4*cr7+lt, 4*cr0+so, 4*cr0+lt // 1: Same tv or different type.
- | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata?
- | crandc 4*cr7+gt, 4*cr0+lt, 4*cr1+gt // 2: Same type and primitive.
- | mr SAVE0, PC
- if (vk) {
- | isel PC, TMP2, PC, 4*cr7+gt
- } else {
- | isel TMP2, PC, TMP2, 4*cr7+gt
- }
- | cror 4*cr7+lt, 4*cr7+lt, 4*cr7+gt // 1 or 2.
- if (vk) {
- | isel PC, TMP2, PC, 4*cr0+so
- } else {
- | isel PC, PC, TMP2, 4*cr0+so
- }
- | blt cr7, <1 // Done if 1 or 2.
- | blt cr6, <1 // Done if not tab/ud.
- |
- | // Different tables or userdatas. Need to check __eq metamethod.
- | // Field metatable must be at same offset for GCtab and GCudata!
- | lwz TAB:TMP2, TAB:CARG2->metatable
- | li CARG4, 1-vk // ne = 0 or 1.
- | cmplwi TAB:TMP2, 0
- | beq <1 // No metatable?
- | lbz TMP2, TAB:TMP2->nomm
- | andi. TMP2, TMP2, 1<<MM_eq
- | bne <1 // Or 'no __eq' flag set?
- | mr PC, SAVE0 // Restore old PC.
- | b ->vmeta_equal // Handle __eq metamethod.
- break;
-
- case BC_ISEQS: case BC_ISNES:
- vk = op == BC_ISEQS;
- | // RA = src*8, RD = str_const*8 (~), JMP with RD = target
- | evlddx TMP0, BASE, RA
- | srwi RD, RD, 1
- | lwz INS, 0(PC)
- | subfic RD, RD, -4
- | addi PC, PC, 4
- | lwzx STR:TMP1, KBASE, RD // KBASE-4-str_const*4
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
- | decode_RD4 TMP2, INS
- | evmergelo STR:TMP1, TISSTR, STR:TMP1
- | add TMP2, TMP2, TMP3
- | evcmpeq TMP0, STR:TMP1
- if (vk) {
- | isel PC, TMP2, PC, 4*cr0+so
- } else {
- | isel PC, PC, TMP2, 4*cr0+so
- }
- | ins_next
- break;
-
- case BC_ISEQN: case BC_ISNEN:
- vk = op == BC_ISEQN;
- | // RA = src*8, RD = num_const*8, JMP with RD = target
- | evlddx TMP0, BASE, RA
- | addi PC, PC, 4
- | evlddx TMP1, KBASE, RD
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
- | lwz INS, -4(PC)
- | checknum TMP0
- | checkfail >5
- | efdcmpeq TMP0, TMP1
- |1:
- | decode_RD4 TMP2, INS
- | add TMP2, TMP2, TMP3
- if (vk) {
- | iselgt PC, TMP2, PC
- |5:
- } else {
- | iselgt PC, PC, TMP2
- }
- |3:
- | ins_next
- if (!vk) {
- |5:
- | decode_RD4 TMP2, INS
- | add PC, TMP2, TMP3
- | b <3
- }
- break;
-
- case BC_ISEQP: case BC_ISNEP:
- vk = op == BC_ISEQP;
- | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target
- | lwzx TMP0, BASE, RA
- | srwi TMP1, RD, 3
- | lwz INS, 0(PC)
- | addi PC, PC, 4
- | not TMP1, TMP1
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
- | cmplw TMP0, TMP1
- | decode_RD4 TMP2, INS
- | add TMP2, TMP2, TMP3
- if (vk) {
- | iseleq PC, TMP2, PC
- } else {
- | iseleq PC, PC, TMP2
- }
- | ins_next
- break;
-
- /* -- Unary test and copy ops ------------------------------------------- */
-
- case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
- | // RA = dst*8 or unused, RD = src*8, JMP with RD = target
- | evlddx TMP0, BASE, RD
- | evaddw TMP1, TISNIL, TISNIL // Synthesize LJ_TFALSE.
- | lwz INS, 0(PC)
- | evcmpltu TMP0, TMP1
- | addi PC, PC, 4
- if (op == BC_IST || op == BC_ISF) {
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
- | decode_RD4 TMP2, INS
- | add TMP2, TMP2, TMP3
- if (op == BC_IST) {
- | isellt PC, TMP2, PC
- } else {
- | isellt PC, PC, TMP2
- }
- } else {
- if (op == BC_ISTC) {
- | checkfail >1
- } else {
- | checkok >1
- }
- | addis PC, PC, -(BCBIAS_J*4 >> 16)
- | decode_RD4 TMP2, INS
- | evstddx TMP0, BASE, RA
- | add PC, PC, TMP2
- |1:
- }
- | ins_next
- break;
-
- /* -- Unary ops --------------------------------------------------------- */
-
- case BC_MOV:
- | // RA = dst*8, RD = src*8
- | ins_next1
- | evlddx TMP0, BASE, RD
- | evstddx TMP0, BASE, RA
- | ins_next2
- break;
- case BC_NOT:
- | // RA = dst*8, RD = src*8
- | ins_next1
- | lwzx TMP0, BASE, RD
- | subfic TMP1, TMP0, LJ_TTRUE
- | adde TMP0, TMP0, TMP1
- | stwx TMP0, BASE, RA
- | ins_next2
- break;
- case BC_UNM:
- | // RA = dst*8, RD = src*8
- | evlddx TMP0, BASE, RD
- | checknum TMP0
- | checkfail ->vmeta_unm
- | efdneg TMP0, TMP0
- | ins_next1
- | evstddx TMP0, BASE, RA
- | ins_next2
- break;
- case BC_LEN:
- | // RA = dst*8, RD = src*8
- | evlddx CARG1, BASE, RD
- | checkstr CARG1
- | checkfail >2
- | lwz CRET1, STR:CARG1->len
- |1:
- | ins_next1
- | efdcfsi TMP0, CRET1
- | evstddx TMP0, BASE, RA
- | ins_next2
- |2:
- | checktab CARG1
- | checkfail ->vmeta_len
-#if LJ_52
- | lwz TAB:TMP2, TAB:CARG1->metatable
- | cmplwi TAB:TMP2, 0
- | bne >9
- |3:
-#endif
- |->BC_LEN_Z:
- | bl extern lj_tab_len // (GCtab *t)
- | // Returns uint32_t (but less than 2^31).
- | b <1
-#if LJ_52
- |9:
- | lbz TMP0, TAB:TMP2->nomm
- | andi. TMP0, TMP0, 1<<MM_len
- | bne <3 // 'no __len' flag set: done.
- | b ->vmeta_len
-#endif
- break;
-
- /* -- Binary ops -------------------------------------------------------- */
-
- |.macro ins_arithpre, t0, t1
- | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8
- ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
- ||switch (vk) {
- ||case 0:
- | evlddx t0, BASE, RB
- | checknum t0
- | evlddx t1, KBASE, RC
- | checkfail ->vmeta_arith_vn
- || break;
- ||case 1:
- | evlddx t1, BASE, RB
- | checknum t1
- | evlddx t0, KBASE, RC
- | checkfail ->vmeta_arith_nv
- || break;
- ||default:
- | evlddx t0, BASE, RB
- | evlddx t1, BASE, RC
- | evmergehi TMP2, t0, t1
- | checknum TMP2
- | checkanyfail ->vmeta_arith_vv
- || break;
- ||}
- |.endmacro
- |
- |.macro ins_arith, ins
- | ins_arithpre TMP0, TMP1
- | ins_next1
- | ins TMP0, TMP0, TMP1
- | evstddx TMP0, BASE, RA
- | ins_next2
- |.endmacro
-
- case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
- | ins_arith efdadd
- break;
- case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
- | ins_arith efdsub
- break;
- case BC_MULVN: case BC_MULNV: case BC_MULVV:
- | ins_arith efdmul
- break;
- case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
- | ins_arith efddiv
- break;
- case BC_MODVN:
- | ins_arithpre RD, SAVE0
- |->BC_MODVN_Z:
- | efddiv CARG2, RD, SAVE0
- | bl ->vm_floor_efd // floor(b/c)
- | efdmul TMP0, CRET2, SAVE0
- | ins_next1
- | efdsub TMP0, RD, TMP0 // b - floor(b/c)*c
- | evstddx TMP0, BASE, RA
- | ins_next2
- break;
- case BC_MODNV: case BC_MODVV:
- | ins_arithpre RD, SAVE0
- | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
- break;
- case BC_POW:
- | evlddx CARG2, BASE, RB
- | evlddx CARG4, BASE, RC
- | evmergehi CARG1, CARG4, CARG2
- | checknum CARG1
- | evmergehi CARG3, CARG4, CARG4
- | checkanyfail ->vmeta_arith_vv
- | bl extern pow@plt
- | evmergelo CRET2, CRET1, CRET2
- | evstddx CRET2, BASE, RA
- | ins_next
- break;
-
- case BC_CAT:
- | // RA = dst*8, RB = src_start*8, RC = src_end*8
- | sub CARG3, RC, RB
- | stw BASE, L->base
- | add CARG2, BASE, RC
- | mr SAVE0, RB
- |->BC_CAT_Z:
- | stw PC, SAVE_PC
- | mr CARG1, L
- | srwi CARG3, CARG3, 3
- | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left)
- | // Returns NULL (finished) or TValue * (metamethod).
- | cmplwi CRET1, 0
- | lwz BASE, L->base
- | bne ->vmeta_binop
- | evlddx TMP0, BASE, SAVE0 // Copy result from RB to RA.
- | evstddx TMP0, BASE, RA
- | ins_next
- break;
-
- /* -- Constant ops ------------------------------------------------------ */
-
- case BC_KSTR:
- | // RA = dst*8, RD = str_const*8 (~)
- | ins_next1
- | srwi TMP1, RD, 1
- | subfic TMP1, TMP1, -4
- | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4
- | evmergelo TMP0, TISSTR, TMP0
- | evstddx TMP0, BASE, RA
- | ins_next2
- break;
- case BC_KCDATA:
- |.if FFI
- | // RA = dst*8, RD = cdata_const*8 (~)
- | ins_next1
- | srwi TMP1, RD, 1
- | subfic TMP1, TMP1, -4
- | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4
- | li TMP2, LJ_TCDATA
- | evmergelo TMP0, TMP2, TMP0
- | evstddx TMP0, BASE, RA
- | ins_next2
- |.endif
- break;
- case BC_KSHORT:
- | // RA = dst*8, RD = int16_literal*8
- | srwi TMP1, RD, 3
- | extsh TMP1, TMP1
- | ins_next1
- | efdcfsi TMP0, TMP1
- | evstddx TMP0, BASE, RA
- | ins_next2
- break;
- case BC_KNUM:
- | // RA = dst*8, RD = num_const*8
- | evlddx TMP0, KBASE, RD
- | ins_next1
- | evstddx TMP0, BASE, RA
- | ins_next2
- break;
- case BC_KPRI:
- | // RA = dst*8, RD = primitive_type*8 (~)
- | srwi TMP1, RD, 3
- | not TMP0, TMP1
- | ins_next1
- | stwx TMP0, BASE, RA
- | ins_next2
- break;
- case BC_KNIL:
- | // RA = base*8, RD = end*8
- | evstddx TISNIL, BASE, RA
- | addi RA, RA, 8
- |1:
- | evstddx TISNIL, BASE, RA
- | cmpw RA, RD
- | addi RA, RA, 8
- | blt <1
- | ins_next_
- break;
-
- /* -- Upvalue and function ops ------------------------------------------ */
-
- case BC_UGET:
- | // RA = dst*8, RD = uvnum*8
- | ins_next1
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | srwi RD, RD, 1
- | addi RD, RD, offsetof(GCfuncL, uvptr)
- | lwzx UPVAL:RB, LFUNC:RB, RD
- | lwz TMP1, UPVAL:RB->v
- | evldd TMP0, 0(TMP1)
- | evstddx TMP0, BASE, RA
- | ins_next2
- break;
- case BC_USETV:
- | // RA = uvnum*8, RD = src*8
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | srwi RA, RA, 1
- | addi RA, RA, offsetof(GCfuncL, uvptr)
- | evlddx TMP1, BASE, RD
- | lwzx UPVAL:RB, LFUNC:RB, RA
- | lbz TMP3, UPVAL:RB->marked
- | lwz CARG2, UPVAL:RB->v
- | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
- | lbz TMP0, UPVAL:RB->closed
- | evmergehi TMP2, TMP1, TMP1
- | evstdd TMP1, 0(CARG2)
- | cmplwi cr1, TMP0, 0
- | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
- | subi TMP2, TMP2, (LJ_TISNUM+1)
- | bne >2 // Upvalue is closed and black?
- |1:
- | ins_next
- |
- |2: // Check if new value is collectable.
- | cmplwi TMP2, LJ_TISGCV - (LJ_TISNUM+1)
- | bge <1 // tvisgcv(v)
- | lbz TMP3, GCOBJ:TMP1->gch.marked
- | andi. TMP3, TMP3, LJ_GC_WHITES // iswhite(v)
- | la CARG1, GG_DISP2G(DISPATCH)
- | // Crossed a write barrier. Move the barrier forward.
- | beq <1
- | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
- | b <1
- break;
- case BC_USETS:
- | // RA = uvnum*8, RD = str_const*8 (~)
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | srwi TMP1, RD, 1
- | srwi RA, RA, 1
- | subfic TMP1, TMP1, -4
- | addi RA, RA, offsetof(GCfuncL, uvptr)
- | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4
- | lwzx UPVAL:RB, LFUNC:RB, RA
- | evmergelo STR:TMP1, TISSTR, STR:TMP1
- | lbz TMP3, UPVAL:RB->marked
- | lwz CARG2, UPVAL:RB->v
- | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv)
- | lbz TMP3, STR:TMP1->marked
- | lbz TMP2, UPVAL:RB->closed
- | evstdd STR:TMP1, 0(CARG2)
- | bne >2
- |1:
- | ins_next
- |
- |2: // Check if string is white and ensure upvalue is closed.
- | andi. TMP3, TMP3, LJ_GC_WHITES // iswhite(str)
- | cmplwi cr1, TMP2, 0
- | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq
- | la CARG1, GG_DISP2G(DISPATCH)
- | // Crossed a write barrier. Move the barrier forward.
- | beq <1
- | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv)
- | b <1
- break;
- case BC_USETN:
- | // RA = uvnum*8, RD = num_const*8
- | ins_next1
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | srwi RA, RA, 1
- | addi RA, RA, offsetof(GCfuncL, uvptr)
- | evlddx TMP0, KBASE, RD
- | lwzx UPVAL:RB, LFUNC:RB, RA
- | lwz TMP1, UPVAL:RB->v
- | evstdd TMP0, 0(TMP1)
- | ins_next2
- break;
- case BC_USETP:
- | // RA = uvnum*8, RD = primitive_type*8 (~)
- | ins_next1
- | lwz LFUNC:RB, FRAME_FUNC(BASE)
- | srwi RA, RA, 1
- | addi RA, RA, offsetof(GCfuncL, uvptr)
- | srwi TMP0, RD, 3
- | lwzx UPVAL:RB, LFUNC:RB, RA
- | not TMP0, TMP0
- | lwz TMP1, UPVAL:RB->v
- | stw TMP0, 0(TMP1)
- | ins_next2
- break;
-
- case BC_UCLO:
- | // RA = level*8, RD = target
- | lwz TMP1, L->openupval
- | branch_RD // Do this first since RD is not saved.
- | stw BASE, L->base
- | cmplwi TMP1, 0
- | mr CARG1, L
- | beq >1
- | add CARG2, BASE, RA
- | bl extern lj_func_closeuv // (lua_State *L, TValue *level)
- | lwz BASE, L->base
- |1:
- | ins_next
- break;
-
- case BC_FNEW:
- | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype)
- | srwi TMP1, RD, 1
- | stw BASE, L->base
- | subfic TMP1, TMP1, -4
- | stw PC, SAVE_PC
- | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4
- | mr CARG1, L
- | lwz CARG3, FRAME_FUNC(BASE)
- | // (lua_State *L, GCproto *pt, GCfuncL *parent)
- | bl extern lj_func_newL_gc
- | // Returns GCfuncL *.
- | lwz BASE, L->base
- | evmergelo LFUNC:CRET1, TISFUNC, LFUNC:CRET1
- | evstddx LFUNC:CRET1, BASE, RA
- | ins_next
- break;
-
- /* -- Table ops --------------------------------------------------------- */
-
- case BC_TNEW:
- case BC_TDUP:
- | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~)
- | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH)
- | mr CARG1, L
- | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH)
- | stw BASE, L->base
- | cmplw TMP0, TMP1
- | stw PC, SAVE_PC
- | bge >5
- |1:
- if (op == BC_TNEW) {
- | rlwinm CARG2, RD, 29, 21, 31
- | rlwinm CARG3, RD, 18, 27, 31
- | cmpwi CARG2, 0x7ff
- | li TMP1, 0x801
- | iseleq CARG2, TMP1, CARG2
- | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
- | // Returns Table *.
- } else {
- | srwi TMP1, RD, 1
- | subfic TMP1, TMP1, -4
- | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4
- | bl extern lj_tab_dup // (lua_State *L, Table *kt)
- | // Returns Table *.
- }
- | lwz BASE, L->base
- | evmergelo TAB:CRET1, TISTAB, TAB:CRET1
- | evstddx TAB:CRET1, BASE, RA
- | ins_next
- |5:
- | mr SAVE0, RD
- | bl extern lj_gc_step_fixtop // (lua_State *L)
- | mr RD, SAVE0
- | mr CARG1, L
- | b <1
- break;
-
- case BC_GGET:
- | // RA = dst*8, RD = str_const*8 (~)
- case BC_GSET:
- | // RA = src*8, RD = str_const*8 (~)
- | lwz LFUNC:TMP2, FRAME_FUNC(BASE)
- | srwi TMP1, RD, 1
- | lwz TAB:RB, LFUNC:TMP2->env
- | subfic TMP1, TMP1, -4
- | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
- if (op == BC_GGET) {
- | b ->BC_TGETS_Z
- } else {
- | b ->BC_TSETS_Z
- }
- break;
-
- case BC_TGETV:
- | // RA = dst*8, RB = table*8, RC = key*8
- | evlddx TAB:RB, BASE, RB
- | evlddx RC, BASE, RC
- | checktab TAB:RB
- | checkfail ->vmeta_tgetv
- | checknum RC
- | checkfail >5
- | // Convert number key to integer
- | efdctsi TMP2, RC
- | lwz TMP0, TAB:RB->asize
- | efdcfsi TMP1, TMP2
- | cmplw cr0, TMP0, TMP2
- | efdcmpeq cr1, RC, TMP1
- | lwz TMP1, TAB:RB->array
- | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+gt
- | slwi TMP2, TMP2, 3
- | ble ->vmeta_tgetv // Integer key and in array part?
- | evlddx TMP1, TMP1, TMP2
- | checknil TMP1
- | checkok >2
- |1:
- | evstddx TMP1, BASE, RA
- | ins_next
- |
- |2: // Check for __index if table value is nil.
- | lwz TAB:TMP2, TAB:RB->metatable
- | cmplwi TAB:TMP2, 0
- | beq <1 // No metatable: done.
- | lbz TMP0, TAB:TMP2->nomm
- | andi. TMP0, TMP0, 1<<MM_index
- | bne <1 // 'no __index' flag set: done.
- | b ->vmeta_tgetv
- |
- |5:
- | checkstr STR:RC // String key?
- | checkok ->BC_TGETS_Z
- | b ->vmeta_tgetv
- break;
- case BC_TGETS:
- | // RA = dst*8, RB = table*8, RC = str_const*8 (~)
- | evlddx TAB:RB, BASE, RB
- | srwi TMP1, RC, 1
- | checktab TAB:RB
- | subfic TMP1, TMP1, -4
- | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
- | checkfail ->vmeta_tgets1
- |->BC_TGETS_Z:
- | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8
- | lwz TMP0, TAB:RB->hmask
- | lwz TMP1, STR:RC->hash
- | lwz NODE:TMP2, TAB:RB->node
- | evmergelo STR:RC, TISSTR, STR:RC
- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
- | slwi TMP0, TMP1, 5
- | slwi TMP1, TMP1, 3
- | sub TMP1, TMP0, TMP1
- | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
- |1:
- | evldd TMP0, NODE:TMP2->key
- | evldd TMP1, NODE:TMP2->val
- | evcmpeq TMP0, STR:RC
- | checkanyfail >4
- | checknil TMP1
- | checkok >5 // Key found, but nil value?
- |3:
- | evstddx TMP1, BASE, RA
- | ins_next
- |
- |4: // Follow hash chain.
- | lwz NODE:TMP2, NODE:TMP2->next
- | cmplwi NODE:TMP2, 0
- | bne <1
- | // End of hash chain: key not found, nil result.
- | evmr TMP1, TISNIL
- |
- |5: // Check for __index if table value is nil.
- | lwz TAB:TMP2, TAB:RB->metatable
- | cmplwi TAB:TMP2, 0
- | beq <3 // No metatable: done.
- | lbz TMP0, TAB:TMP2->nomm
- | andi. TMP0, TMP0, 1<<MM_index
- | bne <3 // 'no __index' flag set: done.
- | b ->vmeta_tgets
- break;
- case BC_TGETB:
- | // RA = dst*8, RB = table*8, RC = index*8
- | evlddx TAB:RB, BASE, RB
- | srwi TMP0, RC, 3
- | checktab TAB:RB
- | checkfail ->vmeta_tgetb
- | lwz TMP1, TAB:RB->asize
- | lwz TMP2, TAB:RB->array
- | cmplw TMP0, TMP1
- | bge ->vmeta_tgetb
- | evlddx TMP1, TMP2, RC
- | checknil TMP1
- | checkok >5
- |1:
- | ins_next1
- | evstddx TMP1, BASE, RA
- | ins_next2
- |
- |5: // Check for __index if table value is nil.
- | lwz TAB:TMP2, TAB:RB->metatable
- | cmplwi TAB:TMP2, 0
- | beq <1 // No metatable: done.
- | lbz TMP2, TAB:TMP2->nomm
- | andi. TMP2, TMP2, 1<<MM_index
- | bne <1 // 'no __index' flag set: done.
- | b ->vmeta_tgetb // Caveat: preserve TMP0!
- break;
-
- case BC_TSETV:
- | // RA = src*8, RB = table*8, RC = key*8
- | evlddx TAB:RB, BASE, RB
- | evlddx RC, BASE, RC
- | checktab TAB:RB
- | checkfail ->vmeta_tsetv
- | checknum RC
- | checkfail >5
- | // Convert number key to integer
- | efdctsi TMP2, RC
- | evlddx SAVE0, BASE, RA
- | lwz TMP0, TAB:RB->asize
- | efdcfsi TMP1, TMP2
- | cmplw cr0, TMP0, TMP2
- | efdcmpeq cr1, RC, TMP1
- | lwz TMP1, TAB:RB->array
- | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+gt
- | slwi TMP0, TMP2, 3
- | ble ->vmeta_tsetv // Integer key and in array part?
- | lbz TMP3, TAB:RB->marked
- | evlddx TMP2, TMP1, TMP0
- | checknil TMP2
- | checkok >3
- |1:
- | andi. TMP2, TMP3, LJ_GC_BLACK // isblack(table)
- | evstddx SAVE0, TMP1, TMP0
- | bne >7
- |2:
- | ins_next
- |
- |3: // Check for __newindex if previous value is nil.
- | lwz TAB:TMP2, TAB:RB->metatable
- | cmplwi TAB:TMP2, 0
- | beq <1 // No metatable: done.
- | lbz TMP2, TAB:TMP2->nomm
- | andi. TMP2, TMP2, 1<<MM_newindex
- | bne <1 // 'no __newindex' flag set: done.
- | b ->vmeta_tsetv
- |
- |5:
- | checkstr STR:RC // String key?
- | checkok ->BC_TSETS_Z
- | b ->vmeta_tsetv
- |
- |7: // Possible table write barrier for the value. Skip valiswhite check.
- | barrierback TAB:RB, TMP3, TMP0
- | b <2
- break;
- case BC_TSETS:
- | // RA = src*8, RB = table*8, RC = str_const*8 (~)
- | evlddx TAB:RB, BASE, RB
- | srwi TMP1, RC, 1
- | checktab TAB:RB
- | subfic TMP1, TMP1, -4
- | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4
- | checkfail ->vmeta_tsets1
- |->BC_TSETS_Z:
- | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8
- | lwz TMP0, TAB:RB->hmask
- | lwz TMP1, STR:RC->hash
- | lwz NODE:TMP2, TAB:RB->node
- | evmergelo STR:RC, TISSTR, STR:RC
- | stb ZERO, TAB:RB->nomm // Clear metamethod cache.
- | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask
- | evlddx SAVE0, BASE, RA
- | slwi TMP0, TMP1, 5
- | slwi TMP1, TMP1, 3
- | sub TMP1, TMP0, TMP1
- | lbz TMP3, TAB:RB->marked
- | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8)
- |1:
- | evldd TMP0, NODE:TMP2->key
- | evldd TMP1, NODE:TMP2->val
- | evcmpeq TMP0, STR:RC
- | checkanyfail >5
- | checknil TMP1
- | checkok >4 // Key found, but nil value?
- |2:
- | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
- | evstdd SAVE0, NODE:TMP2->val
- | bne >7
- |3:
- | ins_next
- |
- |4: // Check for __newindex if previous value is nil.
- | lwz TAB:TMP1, TAB:RB->metatable
- | cmplwi TAB:TMP1, 0
- | beq <2 // No metatable: done.
- | lbz TMP0, TAB:TMP1->nomm
- | andi. TMP0, TMP0, 1<<MM_newindex
- | bne <2 // 'no __newindex' flag set: done.
- | b ->vmeta_tsets
- |
- |5: // Follow hash chain.
- | lwz NODE:TMP2, NODE:TMP2->next
- | cmplwi NODE:TMP2, 0
- | bne <1
- | // End of hash chain: key not found, add a new one.
- |
- | // But check for __newindex first.
- | lwz TAB:TMP1, TAB:RB->metatable
- | la CARG3, DISPATCH_GL(tmptv)(DISPATCH)
- | stw PC, SAVE_PC
- | mr CARG1, L
- | cmplwi TAB:TMP1, 0
- | stw BASE, L->base
- | beq >6 // No metatable: continue.
- | lbz TMP0, TAB:TMP1->nomm
- | andi. TMP0, TMP0, 1<<MM_newindex
- | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check.
- |6:
- | mr CARG2, TAB:RB
- | evstdd STR:RC, 0(CARG3)
- | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
- | // Returns TValue *.
- | lwz BASE, L->base
- | evstdd SAVE0, 0(CRET1)
- | b <3 // No 2nd write barrier needed.
- |
- |7: // Possible table write barrier for the value. Skip valiswhite check.
- | barrierback TAB:RB, TMP3, TMP0
- | b <3
- break;
- case BC_TSETB:
- | // RA = src*8, RB = table*8, RC = index*8
- | evlddx TAB:RB, BASE, RB
- | srwi TMP0, RC, 3
- | checktab TAB:RB
- | checkfail ->vmeta_tsetb
- | lwz TMP1, TAB:RB->asize
- | lwz TMP2, TAB:RB->array
- | lbz TMP3, TAB:RB->marked
- | cmplw TMP0, TMP1
- | evlddx SAVE0, BASE, RA
- | bge ->vmeta_tsetb
- | evlddx TMP1, TMP2, RC
- | checknil TMP1
- | checkok >5
- |1:
- | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
- | evstddx SAVE0, TMP2, RC
- | bne >7
- |2:
- | ins_next
- |
- |5: // Check for __newindex if previous value is nil.
- | lwz TAB:TMP1, TAB:RB->metatable
- | cmplwi TAB:TMP1, 0
- | beq <1 // No metatable: done.
- | lbz TMP1, TAB:TMP1->nomm
- | andi. TMP1, TMP1, 1<<MM_newindex
- | bne <1 // 'no __newindex' flag set: done.
- | b ->vmeta_tsetb // Caveat: preserve TMP0!
- |
- |7: // Possible table write barrier for the value. Skip valiswhite check.
- | barrierback TAB:RB, TMP3, TMP0
- | b <2
- break;
-
- case BC_TSETM:
- | // RA = base*8 (table at base-1), RD = num_const*8 (start index)
- | add RA, BASE, RA
- |1:
- | add TMP3, KBASE, RD
- | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table.
- | addic. TMP0, MULTRES, -8
- | lwz TMP3, 4(TMP3) // Integer constant is in lo-word.
- | srwi CARG3, TMP0, 3
- | beq >4 // Nothing to copy?
- | add CARG3, CARG3, TMP3
- | lwz TMP2, TAB:CARG2->asize
- | slwi TMP1, TMP3, 3
- | lbz TMP3, TAB:CARG2->marked
- | cmplw CARG3, TMP2
- | add TMP2, RA, TMP0
- | lwz TMP0, TAB:CARG2->array
- | bgt >5
- | add TMP1, TMP1, TMP0
- | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table)
- |3: // Copy result slots to table.
- | evldd TMP0, 0(RA)
- | addi RA, RA, 8
- | cmpw cr1, RA, TMP2
- | evstdd TMP0, 0(TMP1)
- | addi TMP1, TMP1, 8
- | blt cr1, <3
- | bne >7
- |4:
- | ins_next
- |
- |5: // Need to resize array part.
- | stw BASE, L->base
- | mr CARG1, L
- | stw PC, SAVE_PC
- | mr SAVE0, RD
- | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
- | // Must not reallocate the stack.
- | mr RD, SAVE0
- | b <1
- |
- |7: // Possible table write barrier for any value. Skip valiswhite check.
- | barrierback TAB:CARG2, TMP3, TMP0
- | b <4
- break;
-
- /* -- Calls and vararg handling ----------------------------------------- */
-
- case BC_CALLM:
- | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8
- | add NARGS8:RC, NARGS8:RC, MULTRES
- | // Fall through. Assumes BC_CALL follows.
- break;
- case BC_CALL:
- | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8
- | evlddx LFUNC:RB, BASE, RA
- | mr TMP2, BASE
- | add BASE, BASE, RA
- | subi NARGS8:RC, NARGS8:RC, 8
- | checkfunc LFUNC:RB
- | addi BASE, BASE, 8
- | checkfail ->vmeta_call
- | ins_call
- break;
-
- case BC_CALLMT:
- | // RA = base*8, (RB = 0,) RC = extra_nargs*8
- | add NARGS8:RC, NARGS8:RC, MULTRES
- | // Fall through. Assumes BC_CALLT follows.
- break;
- case BC_CALLT:
- | // RA = base*8, (RB = 0,) RC = (nargs+1)*8
- | evlddx LFUNC:RB, BASE, RA
- | add RA, BASE, RA
- | lwz TMP1, FRAME_PC(BASE)
- | subi NARGS8:RC, NARGS8:RC, 8
- | checkfunc LFUNC:RB
- | addi RA, RA, 8
- | checkfail ->vmeta_callt
- |->BC_CALLT_Z:
- | andi. TMP0, TMP1, FRAME_TYPE // Caveat: preserve cr0 until the crand.
- | lbz TMP3, LFUNC:RB->ffid
- | xori TMP2, TMP1, FRAME_VARG
- | cmplwi cr1, NARGS8:RC, 0
- | bne >7
- |1:
- | stw LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC.
- | li TMP2, 0
- | cmplwi cr7, TMP3, 1 // (> FF_C) Calling a fast function?
- | beq cr1, >3
- |2:
- | addi TMP3, TMP2, 8
- | evlddx TMP0, RA, TMP2
- | cmplw cr1, TMP3, NARGS8:RC
- | evstddx TMP0, BASE, TMP2
- | mr TMP2, TMP3
- | bne cr1, <2
- |3:
- | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+gt
- | beq >5
- |4:
- | ins_callt
- |
- |5: // Tailcall to a fast function with a Lua frame below.
- | lwz INS, -4(TMP1)
- | decode_RA8 RA, INS
- | sub TMP1, BASE, RA
- | lwz LFUNC:TMP1, FRAME_FUNC-8(TMP1)
- | lwz TMP1, LFUNC:TMP1->pc
- | lwz KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE.
- | b <4
- |
- |7: // Tailcall from a vararg function.
- | andi. TMP0, TMP2, FRAME_TYPEP
- | bne <1 // Vararg frame below?
- | sub BASE, BASE, TMP2 // Relocate BASE down.
- | lwz TMP1, FRAME_PC(BASE)
- | andi. TMP0, TMP1, FRAME_TYPE
- | b <1
- break;
-
- case BC_ITERC:
- | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8))
- | subi RA, RA, 24 // evldd doesn't support neg. offsets.
- | mr TMP2, BASE
- | evlddx LFUNC:RB, BASE, RA
- | add BASE, BASE, RA
- | evldd TMP0, 8(BASE)
- | evldd TMP1, 16(BASE)
- | evstdd LFUNC:RB, 24(BASE) // Copy callable.
- | checkfunc LFUNC:RB
- | evstdd TMP0, 32(BASE) // Copy state.
- | li NARGS8:RC, 16 // Iterators get 2 arguments.
- | evstdd TMP1, 40(BASE) // Copy control var.
- | addi BASE, BASE, 32
- | checkfail ->vmeta_call
- | ins_call
- break;
-
- case BC_ITERN:
- | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8)
- |.if JIT
- | // NYI: add hotloop, record BC_ITERN.
- |.endif
- | add RA, BASE, RA
- | lwz TAB:RB, -12(RA)
- | lwz RC, -4(RA) // Get index from control var.
- | lwz TMP0, TAB:RB->asize
- | lwz TMP1, TAB:RB->array
- | addi PC, PC, 4
- |1: // Traverse array part.
- | cmplw RC, TMP0
- | slwi TMP3, RC, 3
- | bge >5 // Index points after array part?
- | evlddx TMP2, TMP1, TMP3
- | checknil TMP2
- | lwz INS, -4(PC)
- | checkok >4
- | efdcfsi TMP0, RC
- | addi RC, RC, 1
- | addis TMP3, PC, -(BCBIAS_J*4 >> 16)
- | evstdd TMP2, 8(RA)
- | decode_RD4 TMP1, INS
- | stw RC, -4(RA) // Update control var.
- | add PC, TMP1, TMP3
- | evstdd TMP0, 0(RA)
- |3:
- | ins_next
- |
- |4: // Skip holes in array part.
- | addi RC, RC, 1
- | b <1
- |
- |5: // Traverse hash part.
- | lwz TMP1, TAB:RB->hmask
- | sub RC, RC, TMP0
- | lwz TMP2, TAB:RB->node
- |6:
- | cmplw RC, TMP1 // End of iteration? Branch to ITERL+1.
- | slwi TMP3, RC, 5
- | bgt <3
- | slwi RB, RC, 3
- | sub TMP3, TMP3, RB
- | evlddx RB, TMP2, TMP3
- | add NODE:TMP3, TMP2, TMP3
- | checknil RB
- | lwz INS, -4(PC)
- | checkok >7
- | evldd TMP3, NODE:TMP3->key
- | addis TMP2, PC, -(BCBIAS_J*4 >> 16)
- | evstdd RB, 8(RA)
- | add RC, RC, TMP0
- | decode_RD4 TMP1, INS
- | evstdd TMP3, 0(RA)
- | addi RC, RC, 1
- | add PC, TMP1, TMP2
- | stw RC, -4(RA) // Update control var.
- | b <3
- |
- |7: // Skip holes in hash part.
- | addi RC, RC, 1
- | b <6
- break;
-
- case BC_ISNEXT:
- | // RA = base*8, RD = target (points to ITERN)
- | add RA, BASE, RA
- | li TMP2, -24
- | evlddx CFUNC:TMP1, RA, TMP2
- | lwz TMP2, -16(RA)
- | lwz TMP3, -8(RA)
- | evmergehi TMP0, CFUNC:TMP1, CFUNC:TMP1
- | cmpwi cr0, TMP2, LJ_TTAB
- | cmpwi cr1, TMP0, LJ_TFUNC
- | cmpwi cr6, TMP3, LJ_TNIL
- | bne cr1, >5
- | lbz TMP1, CFUNC:TMP1->ffid
- | crand 4*cr0+eq, 4*cr0+eq, 4*cr6+eq
- | cmpwi cr7, TMP1, FF_next_N
- | srwi TMP0, RD, 1
- | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq
- | add TMP3, PC, TMP0
- | bne cr0, >5
- | lus TMP1, 0xfffe
- | ori TMP1, TMP1, 0x7fff
- | stw ZERO, -4(RA) // Initialize control var.
- | stw TMP1, -8(RA)
- | addis PC, TMP3, -(BCBIAS_J*4 >> 16)
- |1:
- | ins_next
- |5: // Despecialize bytecode if any of the checks fail.
- | li TMP0, BC_JMP
- | li TMP1, BC_ITERC
- | stb TMP0, -1(PC)
- | addis PC, TMP3, -(BCBIAS_J*4 >> 16)
- | stb TMP1, 3(PC)
- | b <1
- break;
-
- case BC_VARG:
- | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8
- | lwz TMP0, FRAME_PC(BASE)
- | add RC, BASE, RC
- | add RA, BASE, RA
- | addi RC, RC, FRAME_VARG
- | add TMP2, RA, RB
- | subi TMP3, BASE, 8 // TMP3 = vtop
- | sub RC, RC, TMP0 // RC = vbase
- | // Note: RC may now be even _above_ BASE if nargs was < numparams.
- | cmplwi cr1, RB, 0
- | sub. TMP1, TMP3, RC
- | beq cr1, >5 // Copy all varargs?
- | subi TMP2, TMP2, 16
- | ble >2 // No vararg slots?
- |1: // Copy vararg slots to destination slots.
- | evldd TMP0, 0(RC)
- | addi RC, RC, 8
- | evstdd TMP0, 0(RA)
- | cmplw RA, TMP2
- | cmplw cr1, RC, TMP3
- | bge >3 // All destination slots filled?
- | addi RA, RA, 8
- | blt cr1, <1 // More vararg slots?
- |2: // Fill up remainder with nil.
- | evstdd TISNIL, 0(RA)
- | cmplw RA, TMP2
- | addi RA, RA, 8
- | blt <2
- |3:
- | ins_next
- |
- |5: // Copy all varargs.
- | lwz TMP0, L->maxstack
- | li MULTRES, 8 // MULTRES = (0+1)*8
- | ble <3 // No vararg slots?
- | add TMP2, RA, TMP1
- | cmplw TMP2, TMP0
- | addi MULTRES, TMP1, 8
- | bgt >7
- |6:
- | evldd TMP0, 0(RC)
- | addi RC, RC, 8
- | evstdd TMP0, 0(RA)
- | cmplw RC, TMP3
- | addi RA, RA, 8
- | blt <6 // More vararg slots?
- | b <3
- |
- |7: // Grow stack for varargs.
- | mr CARG1, L
- | stw RA, L->top
- | sub SAVE0, RC, BASE // Need delta, because BASE may change.
- | stw BASE, L->base
- | sub RA, RA, BASE
- | stw PC, SAVE_PC
- | srwi CARG2, TMP1, 3
- | bl extern lj_state_growstack // (lua_State *L, int n)
- | lwz BASE, L->base
- | add RA, BASE, RA
- | add RC, BASE, SAVE0
- | subi TMP3, BASE, 8
- | b <6
- break;
-
- /* -- Returns ----------------------------------------------------------- */
-
- case BC_RETM:
- | // RA = results*8, RD = extra_nresults*8
- | add RD, RD, MULTRES // MULTRES >= 8, so RD >= 8.
- | // Fall through. Assumes BC_RET follows.
- break;
-
- case BC_RET:
- | // RA = results*8, RD = (nresults+1)*8
- | lwz PC, FRAME_PC(BASE)
- | add RA, BASE, RA
- | mr MULTRES, RD
- |1:
- | andi. TMP0, PC, FRAME_TYPE
- | xori TMP1, PC, FRAME_VARG
- | bne ->BC_RETV_Z
- |
- |->BC_RET_Z:
- | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return
- | lwz INS, -4(PC)
- | cmpwi RD, 8
- | subi TMP2, BASE, 8
- | subi RC, RD, 8
- | decode_RB8 RB, INS
- | beq >3
- | li TMP1, 0
- |2:
- | addi TMP3, TMP1, 8
- | evlddx TMP0, RA, TMP1
- | cmpw TMP3, RC
- | evstddx TMP0, TMP2, TMP1
- | beq >3
- | addi TMP1, TMP3, 8
- | evlddx TMP0, RA, TMP3
- | cmpw TMP1, RC
- | evstddx TMP0, TMP2, TMP3
- | bne <2
- |3:
- |5:
- | cmplw RB, RD
- | decode_RA8 RA, INS
- | bgt >6
- | sub BASE, TMP2, RA
- | lwz LFUNC:TMP1, FRAME_FUNC(BASE)
- | ins_next1
- | lwz TMP1, LFUNC:TMP1->pc
- | lwz KBASE, PC2PROTO(k)(TMP1)
- | ins_next2
- |
- |6: // Fill up results with nil.
- | subi TMP1, RD, 8
- | addi RD, RD, 8
- | evstddx TISNIL, TMP2, TMP1
- | b <5
- |
- |->BC_RETV_Z: // Non-standard return case.
- | andi. TMP2, TMP1, FRAME_TYPEP
- | bne ->vm_return
- | // Return from vararg function: relocate BASE down.
- | sub BASE, BASE, TMP1
- | lwz PC, FRAME_PC(BASE)
- | b <1
- break;
-
- case BC_RET0: case BC_RET1:
- | // RA = results*8, RD = (nresults+1)*8
- | lwz PC, FRAME_PC(BASE)
- | add RA, BASE, RA
- | mr MULTRES, RD
- | andi. TMP0, PC, FRAME_TYPE
- | xori TMP1, PC, FRAME_VARG
- | bne ->BC_RETV_Z
- |
- | lwz INS, -4(PC)
- | subi TMP2, BASE, 8
- | decode_RB8 RB, INS
- if (op == BC_RET1) {
- | evldd TMP0, 0(RA)
- | evstdd TMP0, 0(TMP2)
- }
- |5:
- | cmplw RB, RD
- | decode_RA8 RA, INS
- | bgt >6
- | sub BASE, TMP2, RA
- | lwz LFUNC:TMP1, FRAME_FUNC(BASE)
- | ins_next1
- | lwz TMP1, LFUNC:TMP1->pc
- | lwz KBASE, PC2PROTO(k)(TMP1)
- | ins_next2
- |
- |6: // Fill up results with nil.
- | subi TMP1, RD, 8
- | addi RD, RD, 8
- | evstddx TISNIL, TMP2, TMP1
- | b <5
- break;
-
- /* -- Loops and branches ------------------------------------------------ */
-
- case BC_FORL:
- |.if JIT
- | hotloop
- |.endif
- | // Fall through. Assumes BC_IFORL follows.
- break;
-
- case BC_JFORI:
- case BC_JFORL:
-#if !LJ_HASJIT
- break;
-#endif
- case BC_FORI:
- case BC_IFORL:
- | // RA = base*8, RD = target (after end of loop or start of loop)
- vk = (op == BC_IFORL || op == BC_JFORL);
- | add RA, BASE, RA
- | evldd TMP1, FORL_IDX*8(RA)
- | evldd TMP3, FORL_STEP*8(RA)
- | evldd TMP2, FORL_STOP*8(RA)
- if (!vk) {
- | evcmpgtu cr0, TMP1, TISNUM
- | evcmpgtu cr7, TMP3, TISNUM
- | evcmpgtu cr1, TMP2, TISNUM
- | cror 4*cr0+lt, 4*cr0+lt, 4*cr7+lt
- | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt
- | blt ->vmeta_for
- }
- if (vk) {
- | efdadd TMP1, TMP1, TMP3
- | evstdd TMP1, FORL_IDX*8(RA)
- }
- | evcmpgts TMP3, TISNIL
- | evstdd TMP1, FORL_EXT*8(RA)
- | bge >2
- | efdcmpgt TMP1, TMP2
- |1:
- if (op != BC_JFORL) {
- | srwi RD, RD, 1
- | add RD, PC, RD
- if (op == BC_JFORI) {
- | addis PC, RD, -(BCBIAS_J*4 >> 16)
- } else {
- | addis RD, RD, -(BCBIAS_J*4 >> 16)
- }
- }
- if (op == BC_FORI) {
- | iselgt PC, RD, PC
- } else if (op == BC_IFORL) {
- | iselgt PC, PC, RD
- } else {
- | ble =>BC_JLOOP
- }
- | ins_next
- |2:
- | efdcmpgt TMP2, TMP1
- | b <1
- break;
-
- case BC_ITERL:
- |.if JIT
- | hotloop
- |.endif
- | // Fall through. Assumes BC_IITERL follows.
- break;
-
- case BC_JITERL:
-#if !LJ_HASJIT
- break;
-#endif
- case BC_IITERL:
- | // RA = base*8, RD = target
- | evlddx TMP1, BASE, RA
- | subi RA, RA, 8
- | checknil TMP1
- | checkok >1 // Stop if iterator returned nil.
- if (op == BC_JITERL) {
- | NYI
- } else {
- | branch_RD // Otherwise save control var + branch.
- | evstddx TMP1, BASE, RA
- }
- |1:
- | ins_next
- break;
-
- case BC_LOOP:
- | // RA = base*8, RD = target (loop extent)
- | // Note: RA/RD is only used by trace recorder to determine scope/extent
- | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
- |.if JIT
- | hotloop
- |.endif
- | // Fall through. Assumes BC_ILOOP follows.
- break;
-
- case BC_ILOOP:
- | // RA = base*8, RD = target (loop extent)
- | ins_next
- break;
-
- case BC_JLOOP:
- |.if JIT
- | NYI
- |.endif
- break;
-
- case BC_JMP:
- | // RA = base*8 (only used by trace recorder), RD = target
- | branch_RD
- | ins_next
- break;
-
- /* -- Function headers -------------------------------------------------- */
-
- case BC_FUNCF:
- |.if JIT
- | hotcall
- |.endif
- case BC_FUNCV: /* NYI: compiled vararg functions. */
- | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow.
- break;
-
- case BC_JFUNCF:
-#if !LJ_HASJIT
- break;
-#endif
- case BC_IFUNCF:
- | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
- | lwz TMP2, L->maxstack
- | lbz TMP1, -4+PC2PROTO(numparams)(PC)
- | lwz KBASE, -4+PC2PROTO(k)(PC)
- | cmplw RA, TMP2
- | slwi TMP1, TMP1, 3
- | bgt ->vm_growstack_l
- | ins_next1
- |2:
- | cmplw NARGS8:RC, TMP1 // Check for missing parameters.
- | ble >3
- if (op == BC_JFUNCF) {
- | NYI
- } else {
- | ins_next2
- }
- |
- |3: // Clear missing parameters.
- | evstddx TISNIL, BASE, NARGS8:RC
- | addi NARGS8:RC, NARGS8:RC, 8
- | b <2
- break;
-
- case BC_JFUNCV:
-#if !LJ_HASJIT
- break;
-#endif
- | NYI // NYI: compiled vararg functions
- break; /* NYI: compiled vararg functions. */
-
- case BC_IFUNCV:
- | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8
- | lwz TMP2, L->maxstack
- | add TMP1, BASE, RC
- | add TMP0, RA, RC
- | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC.
- | addi TMP3, RC, 8+FRAME_VARG
- | lwz KBASE, -4+PC2PROTO(k)(PC)
- | cmplw TMP0, TMP2
- | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG.
- | bge ->vm_growstack_l
- | lbz TMP2, -4+PC2PROTO(numparams)(PC)
- | mr RA, BASE
- | mr RC, TMP1
- | ins_next1
- | cmpwi TMP2, 0
- | addi BASE, TMP1, 8
- | beq >3
- |1:
- | cmplw RA, RC // Less args than parameters?
- | evldd TMP0, 0(RA)
- | bge >4
- | evstdd TISNIL, 0(RA) // Clear old fixarg slot (help the GC).
- | addi RA, RA, 8
- |2:
- | addic. TMP2, TMP2, -1
- | evstdd TMP0, 8(TMP1)
- | addi TMP1, TMP1, 8
- | bne <1
- |3:
- | ins_next2
- |
- |4: // Clear missing parameters.
- | evmr TMP0, TISNIL
- | b <2
- break;
-
- case BC_FUNCC:
- case BC_FUNCCW:
- | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8
- if (op == BC_FUNCC) {
- | lwz TMP3, CFUNC:RB->f
- } else {
- | lwz TMP3, DISPATCH_GL(wrapf)(DISPATCH)
- }
- | add TMP1, RA, NARGS8:RC
- | lwz TMP2, L->maxstack
- | add RC, BASE, NARGS8:RC
- | stw BASE, L->base
- | cmplw TMP1, TMP2
- | stw RC, L->top
- | li_vmstate C
- | mtctr TMP3
- if (op == BC_FUNCCW) {
- | lwz CARG2, CFUNC:RB->f
- }
- | mr CARG1, L
- | bgt ->vm_growstack_c // Need to grow stack.
- | st_vmstate
- | bctrl // (lua_State *L [, lua_CFunction f])
- | // Returns nresults.
- | lwz TMP1, L->top
- | slwi RD, CRET1, 3
- | lwz BASE, L->base
- | li_vmstate INTERP
- | lwz PC, FRAME_PC(BASE) // Fetch PC of caller.
- | sub RA, TMP1, RD // RA = L->top - nresults*8
- | st_vmstate
- | b ->vm_returnc
- break;
-
- /* ---------------------------------------------------------------------- */
-
- default:
- fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
- exit(2);
- break;
- }
-}
-
-static int build_backend(BuildCtx *ctx)
-{
- int op;
-
- dasm_growpc(Dst, BC__MAX);
-
- build_subroutines(ctx);
-
- |.code_op
- for (op = 0; op < BC__MAX; op++)
- build_ins(ctx, (BCOp)op, op);
-
- return BC__MAX;
-}
-
-/* Emit pseudo frame-info for all assembler functions. */
-static void emit_asm_debug(BuildCtx *ctx)
-{
- int i;
- switch (ctx->mode) {
- case BUILD_elfasm:
- fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
- fprintf(ctx->fp,
- ".Lframe0:\n"
- "\t.long .LECIE0-.LSCIE0\n"
- ".LSCIE0:\n"
- "\t.long 0xffffffff\n"
- "\t.byte 0x1\n"
- "\t.string \"\"\n"
- "\t.uleb128 0x1\n"
- "\t.sleb128 -4\n"
- "\t.byte 65\n"
- "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
- "\t.align 2\n"
- ".LECIE0:\n\n");
- fprintf(ctx->fp,
- ".LSFDE0:\n"
- "\t.long .LEFDE0-.LASFDE0\n"
- ".LASFDE0:\n"
- "\t.long .Lframe0\n"
- "\t.long .Lbegin\n"
- "\t.long %d\n"
- "\t.byte 0xe\n\t.uleb128 %d\n"
- "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
- "\t.byte 0x5\n\t.uleb128 70\n\t.sleb128 37\n",
- (int)ctx->codesz, CFRAME_SIZE);
- for (i = 14; i <= 31; i++)
- fprintf(ctx->fp,
- "\t.byte %d\n\t.uleb128 %d\n"
- "\t.byte 5\n\t.uleb128 %d\n\t.uleb128 %d\n",
- 0x80+i, 1+2*(31-i), 1200+i, 2+2*(31-i));
- fprintf(ctx->fp,
- "\t.align 2\n"
- ".LEFDE0:\n\n");
- fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
- fprintf(ctx->fp,
- ".Lframe1:\n"
- "\t.long .LECIE1-.LSCIE1\n"
- ".LSCIE1:\n"
- "\t.long 0\n"
- "\t.byte 0x1\n"
- "\t.string \"zPR\"\n"
- "\t.uleb128 0x1\n"
- "\t.sleb128 -4\n"
- "\t.byte 65\n"
- "\t.uleb128 6\n" /* augmentation length */
- "\t.byte 0x1b\n" /* pcrel|sdata4 */
- "\t.long lj_err_unwind_dwarf-.\n"
- "\t.byte 0x1b\n" /* pcrel|sdata4 */
- "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n"
- "\t.align 2\n"
- ".LECIE1:\n\n");
- fprintf(ctx->fp,
- ".LSFDE1:\n"
- "\t.long .LEFDE1-.LASFDE1\n"
- ".LASFDE1:\n"
- "\t.long .LASFDE1-.Lframe1\n"
- "\t.long .Lbegin-.\n"
- "\t.long %d\n"
- "\t.uleb128 0\n" /* augmentation length */
- "\t.byte 0xe\n\t.uleb128 %d\n"
- "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n"
- "\t.byte 0x5\n\t.uleb128 70\n\t.sleb128 37\n",
- (int)ctx->codesz, CFRAME_SIZE);
- for (i = 14; i <= 31; i++)
- fprintf(ctx->fp,
- "\t.byte %d\n\t.uleb128 %d\n"
- "\t.byte 5\n\t.uleb128 %d\n\t.uleb128 %d\n",
- 0x80+i, 1+2*(31-i), 1200+i, 2+2*(31-i));
- fprintf(ctx->fp,
- "\t.align 2\n"
- ".LEFDE1:\n\n");
- break;
- default:
- break;
- }
-}
-
diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc
new file mode 100644
index 00000000..03d96557
--- /dev/null
+++ b/src/vm_x64.dasc
@@ -0,0 +1,4935 @@
+|// Low-level VM code for x64 CPUs in LJ_GC64 mode.
+|// Bytecode interpreter, fast functions and helper functions.
+|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h
+|
+|.arch x64
+|.section code_op, code_sub
+|
+|.actionlist build_actionlist
+|.globals GLOB_
+|.globalnames globnames
+|.externnames extnames
+|
+|//-----------------------------------------------------------------------
+|
+|.if WIN
+|.define X64WIN, 1 // Windows/x64 calling conventions.
+|.endif
+|
+|// Fixed register assignments for the interpreter.
+|// This is very fragile and has many dependencies. Caveat emptor.
+|.define BASE, rdx // Not C callee-save, refetched anyway.
+|.if X64WIN
+|.define KBASE, rdi // Must be C callee-save.
+|.define PC, rsi // Must be C callee-save.
+|.define DISPATCH, rbx // Must be C callee-save.
+|.define KBASEd, edi
+|.define PCd, esi
+|.define DISPATCHd, ebx
+|.else
+|.define KBASE, r15 // Must be C callee-save.
+|.define PC, rbx // Must be C callee-save.
+|.define DISPATCH, r14 // Must be C callee-save.
+|.define KBASEd, r15d
+|.define PCd, ebx
+|.define DISPATCHd, r14d
+|.endif
+|
+|.define RA, rcx
+|.define RAd, ecx
+|.define RAH, ch
+|.define RAL, cl
+|.define RB, rbp // Must be rbp (C callee-save).
+|.define RBd, ebp
+|.define RC, rax // Must be rax.
+|.define RCd, eax
+|.define RCW, ax
+|.define RCH, ah
+|.define RCL, al
+|.define OP, RBd
+|.define RD, RC
+|.define RDd, RCd
+|.define RDW, RCW
+|.define RDL, RCL
+|.define TMPR, r10
+|.define TMPRd, r10d
+|.define ITYPE, r11
+|.define ITYPEd, r11d
+|
+|.if X64WIN
+|.define CARG1, rcx // x64/WIN64 C call arguments.
+|.define CARG2, rdx
+|.define CARG3, r8
+|.define CARG4, r9
+|.define CARG1d, ecx
+|.define CARG2d, edx
+|.define CARG3d, r8d
+|.define CARG4d, r9d
+|.else
+|.define CARG1, rdi // x64/POSIX C call arguments.
+|.define CARG2, rsi
+|.define CARG3, rdx
+|.define CARG4, rcx
+|.define CARG5, r8
+|.define CARG6, r9
+|.define CARG1d, edi
+|.define CARG2d, esi
+|.define CARG3d, edx
+|.define CARG4d, ecx
+|.define CARG5d, r8d
+|.define CARG6d, r9d
+|.endif
+|
+|// Type definitions. Some of these are only used for documentation.
+|.type L, lua_State
+|.type GL, global_State
+|.type TVALUE, TValue
+|.type GCOBJ, GCobj
+|.type STR, GCstr
+|.type TAB, GCtab
+|.type LFUNC, GCfuncL
+|.type CFUNC, GCfuncC
+|.type PROTO, GCproto
+|.type UPVAL, GCupval
+|.type NODE, Node
+|.type NARGS, int
+|.type TRACE, GCtrace
+|.type SBUF, SBuf
+|
+|// Stack layout while in interpreter. Must match with lj_frame.h.
+|//-----------------------------------------------------------------------
+|.if X64WIN // x64/Windows stack layout
+|
+|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
+|.macro saveregs_
+| push rdi; push rsi; push rbx
+| sub rsp, CFRAME_SPACE
+|.endmacro
+|.macro saveregs
+| push rbp; saveregs_
+|.endmacro
+|.macro restoreregs
+| add rsp, CFRAME_SPACE
+| pop rbx; pop rsi; pop rdi; pop rbp
+|.endmacro
+|
+|.define SAVE_CFRAME, aword [rsp+aword*13]
+|.define SAVE_PC, aword [rsp+aword*12]
+|.define SAVE_L, aword [rsp+aword*11]
+|.define SAVE_ERRF, dword [rsp+dword*21]
+|.define SAVE_NRES, dword [rsp+dword*20]
+|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
+|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
+|.define SAVE_R4, aword [rsp+aword*8]
+|.define SAVE_R3, aword [rsp+aword*7]
+|.define SAVE_R2, aword [rsp+aword*6]
+|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
+|.define ARG5, aword [rsp+aword*4]
+|.define CSAVE_4, aword [rsp+aword*3]
+|.define CSAVE_3, aword [rsp+aword*2]
+|.define CSAVE_2, aword [rsp+aword*1]
+|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
+|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
+|
+|.define ARG5d, dword [rsp+dword*8]
+|.define TMP1, ARG5 // TMP1 overlaps ARG5
+|.define TMP1d, ARG5d
+|.define TMP1hi, dword [rsp+dword*9]
+|.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
+|
+|//-----------------------------------------------------------------------
+|.else // x64/POSIX stack layout
+|
+|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
+|.macro saveregs_
+| push rbx; push r15; push r14
+|.if NO_UNWIND
+| push r13; push r12
+|.endif
+| sub rsp, CFRAME_SPACE
+|.endmacro
+|.macro saveregs
+| push rbp; saveregs_
+|.endmacro
+|.macro restoreregs
+| add rsp, CFRAME_SPACE
+|.if NO_UNWIND
+| pop r12; pop r13
+|.endif
+| pop r14; pop r15; pop rbx; pop rbp
+|.endmacro
+|
+|//----- 16 byte aligned,
+|.if NO_UNWIND
+|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
+|.define SAVE_R4, aword [rsp+aword*10]
+|.define SAVE_R3, aword [rsp+aword*9]
+|.define SAVE_R2, aword [rsp+aword*8]
+|.define SAVE_R1, aword [rsp+aword*7]
+|.define SAVE_RU2, aword [rsp+aword*6]
+|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
+|.else
+|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
+|.define SAVE_R4, aword [rsp+aword*8]
+|.define SAVE_R3, aword [rsp+aword*7]
+|.define SAVE_R2, aword [rsp+aword*6]
+|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
+|.endif
+|.define SAVE_CFRAME, aword [rsp+aword*4]
+|.define SAVE_PC, aword [rsp+aword*3]
+|.define SAVE_L, aword [rsp+aword*2]
+|.define SAVE_ERRF, dword [rsp+dword*3]
+|.define SAVE_NRES, dword [rsp+dword*2]
+|.define TMP1, aword [rsp] //<-- rsp while in interpreter.
+|//----- 16 byte aligned
+|
+|.define TMP1d, dword [rsp]
+|.define TMP1hi, dword [rsp+dword*1]
+|.define MULTRES, TMP1d // MULTRES overlaps TMP1d.
+|
+|.endif
+|
+|//-----------------------------------------------------------------------
+|
+|// Instruction headers.
+|.macro ins_A; .endmacro
+|.macro ins_AD; .endmacro
+|.macro ins_AJ; .endmacro
+|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro
+|.macro ins_AB_; movzx RBd, RCH; .endmacro
+|.macro ins_A_C; movzx RCd, RCL; .endmacro
+|.macro ins_AND; not RD; .endmacro
+|
+|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
+|.macro ins_NEXT
+| mov RCd, [PC]
+| movzx RAd, RCH
+| movzx OP, RCL
+| add PC, 4
+| shr RCd, 16
+| jmp aword [DISPATCH+OP*8]
+|.endmacro
+|
+|// Instruction footer.
+|.if 1
+| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
+| .define ins_next, ins_NEXT
+| .define ins_next_, ins_NEXT
+|.else
+| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
+| // Affects only certain kinds of benchmarks (and only with -j off).
+| // Around 10%-30% slower on Core2, a lot more slower on P4.
+| .macro ins_next
+| jmp ->ins_next
+| .endmacro
+| .macro ins_next_
+| ->ins_next:
+| ins_NEXT
+| .endmacro
+|.endif
+|
+|// Call decode and dispatch.
+|.macro ins_callt
+| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC
+| mov PC, LFUNC:RB->pc
+| mov RAd, [PC]
+| movzx OP, RAL
+| movzx RAd, RAH
+| add PC, 4
+| jmp aword [DISPATCH+OP*8]
+|.endmacro
+|
+|.macro ins_call
+| // BASE = new base, RB = LFUNC, RD = nargs+1
+| mov [BASE-8], PC
+| ins_callt
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+|
+|// Macros to clear or set tags.
+|.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro
+|.macro settp, reg, tp
+| mov64 ITYPE, ((uint64_t)tp<<47)
+| or reg, ITYPE
+|.endmacro
+|.macro settp, dst, reg, tp
+| mov64 dst, ((uint64_t)tp<<47)
+| or dst, reg
+|.endmacro
+|.macro setint, reg
+| settp reg, LJ_TISNUM
+|.endmacro
+|.macro setint, dst, reg
+| settp dst, reg, LJ_TISNUM
+|.endmacro
+|
+|// Macros to test operand types.
+|.macro checktp_nc, reg, tp, target
+| mov ITYPE, reg
+| sar ITYPE, 47
+| cmp ITYPEd, tp
+| jne target
+|.endmacro
+|.macro checktp, reg, tp, target
+| mov ITYPE, reg
+| cleartp reg
+| sar ITYPE, 47
+| cmp ITYPEd, tp
+| jne target
+|.endmacro
+|.macro checktptp, src, tp, target
+| mov ITYPE, src
+| sar ITYPE, 47
+| cmp ITYPEd, tp
+| jne target
+|.endmacro
+|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro
+|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro
+|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro
+|
+|.macro checknumx, reg, target, jump
+| mov ITYPE, reg
+| sar ITYPE, 47
+| cmp ITYPEd, LJ_TISNUM
+| jump target
+|.endmacro
+|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro
+|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro
+|.macro checknum, reg, target; checknumx reg, target, jae; .endmacro
+|.macro checknumtp, src, target; checknumx src, target, jae; .endmacro
+|.macro checknumber, src, target; checknumx src, target, ja; .endmacro
+|
+|.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro
+|.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro
+|
+|// These operands must be used with movzx.
+|.define PC_OP, byte [PC-4]
+|.define PC_RA, byte [PC-3]
+|.define PC_RB, byte [PC-1]
+|.define PC_RC, byte [PC-2]
+|.define PC_RD, word [PC-2]
+|
+|.macro branchPC, reg
+| lea PC, [PC+reg*4-BCBIAS_J*4]
+|.endmacro
+|
+|// Assumes DISPATCH is relative to GL.
+#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
+#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
+|
+#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
+|
+|// Decrement hashed hotcount and trigger trace recorder if zero.
+|.macro hotloop, reg
+| mov reg, PCd
+| shr reg, 1
+| and reg, HOTCOUNT_PCMASK
+| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
+| jb ->vm_hotloop
+|.endmacro
+|
+|.macro hotcall, reg
+| mov reg, PCd
+| shr reg, 1
+| and reg, HOTCOUNT_PCMASK
+| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
+| jb ->vm_hotcall
+|.endmacro
+|
+|// Set current VM state.
+|.macro set_vmstate, st
+| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
+|.endmacro
+|
+|.macro fpop1; fstp st1; .endmacro
+|
+|// Synthesize SSE FP constants.
+|.macro sseconst_abs, reg, tmp // Synthesize abs mask.
+| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
+|.endmacro
+|
+|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
+| mov64 tmp, U64x(val,00000000); movd reg, tmp
+|.endmacro
+|
+|.macro sseconst_sign, reg, tmp // Synthesize sign mask.
+| sseconst_hi reg, tmp, 80000000
+|.endmacro
+|.macro sseconst_1, reg, tmp // Synthesize 1.0.
+| sseconst_hi reg, tmp, 3ff00000
+|.endmacro
+|.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
+| sseconst_hi reg, tmp, 43300000
+|.endmacro
+|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
+| sseconst_hi reg, tmp, 43380000
+|.endmacro
+|
+|// Move table write barrier back. Overwrites reg.
+|.macro barrierback, tab, reg
+| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab)
+| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
+| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
+| mov tab->gclist, reg
+|.endmacro
+|
+|//-----------------------------------------------------------------------
+
+/* Generate subroutines used by opcodes and other parts of the VM. */
+/* The .code_sub section should be last to help static branch prediction. */
+static void build_subroutines(BuildCtx *ctx)
+{
+ |.code_sub
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Return handling ----------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_returnp:
+ | test PCd, FRAME_P
+ | jz ->cont_dispatch
+ |
+ | // Return from pcall or xpcall fast func.
+ | and PC, -8
+ | sub BASE, PC // Restore caller base.
+ | lea RA, [RA+PC-8] // Rebase RA and prepend one result.
+ | mov PC, [BASE-8] // Fetch PC of previous frame.
+ | // Prepending may overwrite the pcall frame, so do it at the end.
+ | mov_true ITYPE
+ | mov aword [BASE+RA], ITYPE // Prepend true to results.
+ |
+ |->vm_returnc:
+ | add RDd, 1 // RD = nresults+1
+ | jz ->vm_unwind_yield
+ | mov MULTRES, RDd
+ | test PC, FRAME_TYPE
+ | jz ->BC_RET_Z // Handle regular return to Lua.
+ |
+ |->vm_return:
+ | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
+ | xor PC, FRAME_C
+ | test PCd, FRAME_TYPE
+ | jnz ->vm_returnp
+ |
+ | // Return to C.
+ | set_vmstate C
+ | and PC, -8
+ | sub PC, BASE
+ | neg PC // Previous base = BASE - delta.
+ |
+ | sub RDd, 1
+ | jz >2
+ |1: // Move results down.
+ | mov RB, [BASE+RA]
+ | mov [BASE-16], RB
+ | add BASE, 8
+ | sub RDd, 1
+ | jnz <1
+ |2:
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, PC
+ |3:
+ | mov RDd, MULTRES
+ | mov RAd, SAVE_NRES // RA = wanted nresults+1
+ |4:
+ | cmp RAd, RDd
+ | jne >6 // More/less results wanted?
+ |5:
+ | sub BASE, 16
+ | mov L:RB->top, BASE
+ |
+ |->vm_leave_cp:
+ | mov RA, SAVE_CFRAME // Restore previous C frame.
+ | mov L:RB->cframe, RA
+ | xor eax, eax // Ok return status for vm_pcall.
+ |
+ |->vm_leave_unw:
+ | restoreregs
+ | ret
+ |
+ |6:
+ | jb >7 // Less results wanted?
+ | // More results wanted. Check stack size and fill up results with nil.
+ | cmp BASE, L:RB->maxstack
+ | ja >8
+ | mov aword [BASE-16], LJ_TNIL
+ | add BASE, 8
+ | add RDd, 1
+ | jmp <4
+ |
+ |7: // Less results wanted.
+ | test RAd, RAd
+ | jz <5 // But check for LUA_MULTRET+1.
+ | sub RA, RD // Negative result!
+ | lea BASE, [BASE+RA*8] // Correct top.
+ | jmp <5
+ |
+ |8: // Corner case: need to grow stack for filling up results.
+ | // This can happen if:
+ | // - A C function grows the stack (a lot).
+ | // - The GC shrinks the stack in between.
+ | // - A return back from a lua_call() with (high) nresults adjustment.
+ | mov L:RB->top, BASE // Save current top held in BASE (yes).
+ | mov MULTRES, RDd // Need to fill only remainder with nil.
+ | mov CARG2d, RAd
+ | mov CARG1, L:RB
+ | call extern lj_state_growstack // (lua_State *L, int n)
+ | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
+ | jmp <3
+ |
+ |->vm_unwind_yield:
+ | mov al, LUA_YIELD
+ | jmp ->vm_unwind_c_eh
+ |
+ |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
+ | // (void *cframe, int errcode)
+ | mov eax, CARG2d // Error return status for vm_pcall.
+ | mov rsp, CARG1
+ |->vm_unwind_c_eh: // Landing pad for external unwinder.
+ | mov L:RB, SAVE_L
+ | mov GL:RB, L:RB->glref
+ | mov dword GL:RB->vmstate, ~LJ_VMST_C
+ | jmp ->vm_leave_unw
+ |
+ |->vm_unwind_rethrow:
+ |.if not X64WIN
+ | mov CARG1, SAVE_L
+ | mov CARG2d, eax
+ | restoreregs
+ | jmp extern lj_err_throw // (lua_State *L, int errcode)
+ |.endif
+ |
+ |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
+ | // (void *cframe)
+ | and CARG1, CFRAME_RAWMASK
+ | mov rsp, CARG1
+ |->vm_unwind_ff_eh: // Landing pad for external unwinder.
+ | mov L:RB, SAVE_L
+ | mov RDd, 1+1 // Really 1+2 results, incr. later.
+ | mov BASE, L:RB->base
+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+ | add DISPATCH, GG_G2DISP
+ | mov PC, [BASE-8] // Fetch PC of previous frame.
+ | mov_false RA
+ | mov RB, [BASE]
+ | mov [BASE-16], RA // Prepend false to error message.
+ | mov [BASE-8], RB
+ | mov RA, -16 // Results start at BASE+RA = BASE-16.
+ | set_vmstate INTERP
+ | jmp ->vm_returnc // Increments RD/MULTRES and returns.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Grow stack for calls -----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_growstack_c: // Grow stack for C function.
+ | mov CARG2d, LUA_MINSTACK
+ | jmp >2
+ |
+ |->vm_growstack_v: // Grow stack for vararg Lua function.
+ | sub RD, 16 // LJ_FR2
+ | jmp >1
+ |
+ |->vm_growstack_f: // Grow stack for fixarg Lua function.
+ | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
+ | lea RD, [BASE+NARGS:RD*8-8]
+ |1:
+ | movzx RAd, byte [PC-4+PC2PROTO(framesize)]
+ | add PC, 4 // Must point after first instruction.
+ | mov L:RB->base, BASE
+ | mov L:RB->top, RD
+ | mov SAVE_PC, PC
+ | mov CARG2, RA
+ |2:
+ | // RB = L, L->base = new base, L->top = top
+ | mov CARG1, L:RB
+ | call extern lj_state_growstack // (lua_State *L, int n)
+ | mov BASE, L:RB->base
+ | mov RD, L:RB->top
+ | mov LFUNC:RB, [BASE-16]
+ | cleartp LFUNC:RB
+ | sub RD, BASE
+ | shr RDd, 3
+ | add NARGS:RDd, 1
+ | // BASE = new base, RB = LFUNC, RD = nargs+1
+ | ins_callt // Just retry the call.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Entry points into the assembler VM ---------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_resume: // Setup C frame and resume thread.
+ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
+ | saveregs
+ | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
+ | mov SAVE_L, CARG1
+ | mov RA, CARG2
+ | mov PCd, FRAME_CP
+ | xor RDd, RDd
+ | lea KBASE, [esp+CFRAME_RESUME]
+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+ | add DISPATCH, GG_G2DISP
+ | mov SAVE_PC, RD // Any value outside of bytecode is ok.
+ | mov SAVE_CFRAME, RD
+ | mov SAVE_NRES, RDd
+ | mov SAVE_ERRF, RDd
+ | mov L:RB->cframe, KBASE
+ | cmp byte L:RB->status, RDL
+ | je >2 // Initial resume (like a call).
+ |
+ | // Resume after yield (like a return).
+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+ | set_vmstate INTERP
+ | mov byte L:RB->status, RDL
+ | mov BASE, L:RB->base
+ | mov RD, L:RB->top
+ | sub RD, RA
+ | shr RDd, 3
+ | add RDd, 1 // RD = nresults+1
+ | sub RA, BASE // RA = resultofs
+ | mov PC, [BASE-8]
+ | mov MULTRES, RDd
+ | test PCd, FRAME_TYPE
+ | jz ->BC_RET_Z
+ | jmp ->vm_return
+ |
+ |->vm_pcall: // Setup protected C frame and enter VM.
+ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
+ | saveregs
+ | mov PCd, FRAME_CP
+ | mov SAVE_ERRF, CARG4d
+ | jmp >1
+ |
+ |->vm_call: // Setup C frame and enter VM.
+ | // (lua_State *L, TValue *base, int nres1)
+ | saveregs
+ | mov PCd, FRAME_C
+ |
+ |1: // Entry point for vm_pcall above (PC = ftype).
+ | mov SAVE_NRES, CARG3d
+ | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
+ | mov SAVE_L, CARG1
+ | mov RA, CARG2
+ |
+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+ | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
+ | mov SAVE_CFRAME, KBASE
+ | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
+ | add DISPATCH, GG_G2DISP
+ | mov L:RB->cframe, rsp
+ |
+ |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+ | set_vmstate INTERP
+ | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
+ | add PC, RA
+ | sub PC, BASE // PC = frame delta + frame type
+ |
+ | mov RD, L:RB->top
+ | sub RD, RA
+ | shr NARGS:RDd, 3
+ | add NARGS:RDd, 1 // RD = nargs+1
+ |
+ |->vm_call_dispatch:
+ | mov LFUNC:RB, [RA-16]
+ | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE.
+ |
+ |->vm_call_dispatch_f:
+ | mov BASE, RA
+ | ins_call
+ | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
+ |
+ |->vm_cpcall: // Setup protected C frame, call C.
+ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
+ | saveregs
+ | mov L:RB, CARG1 // Caveat: CARG1 may be RA.
+ | mov SAVE_L, CARG1
+ | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
+ |
+ | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
+ | sub KBASE, L:RB->top
+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
+ | mov SAVE_ERRF, 0 // No error function.
+ | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame.
+ | add DISPATCH, GG_G2DISP
+ | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
+ |
+ | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
+ | mov SAVE_CFRAME, KBASE
+ | mov L:RB->cframe, rsp
+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+ |
+ | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
+ | // TValue * (new base) or NULL returned in eax (RC).
+ | test RC, RC
+ | jz ->vm_leave_cp // No base? Just remove C frame.
+ | mov RA, RC
+ | mov PCd, FRAME_CP
+ | jmp <2 // Else continue with the call.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Metamethod handling ------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |//-- Continuation dispatch ----------------------------------------------
+ |
+ |->cont_dispatch:
+ | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
+ | add RA, BASE
+ | and PC, -8
+ | mov RB, BASE
+ | sub BASE, PC // Restore caller BASE.
+ | mov aword [RA+RD*8-8], LJ_TNIL // Ensure one valid arg.
+ | mov RC, RA // ... in [RC]
+ | mov PC, [RB-24] // Restore PC from [cont|PC].
+ | mov RA, qword [RB-32] // May be negative on WIN64 with debug.
+ |.if FFI
+ | cmp RA, 1
+ | jbe >1
+ |.endif
+ | mov LFUNC:KBASE, [BASE-16]
+ | cleartp LFUNC:KBASE
+ | mov KBASE, LFUNC:KBASE->pc
+ | mov KBASE, [KBASE+PC2PROTO(k)]
+ | // BASE = base, RC = result, RB = meta base
+ | jmp RA // Jump to continuation.
+ |
+ |.if FFI
+ |1:
+ | je ->cont_ffi_callback // cont = 1: return from FFI callback.
+ | // cont = 0: Tail call from C function.
+ | sub RB, BASE
+ | shr RBd, 3
+ | lea RDd, [RBd-3]
+ | jmp ->vm_call_tail
+ |.endif
+ |
+ |->cont_cat: // BASE = base, RC = result, RB = mbase
+ | movzx RAd, PC_RB
+ | sub RB, 32
+ | lea RA, [BASE+RA*8]
+ | sub RA, RB
+ | je ->cont_ra
+ | neg RA
+ | shr RAd, 3
+ |.if X64WIN
+ | mov CARG3d, RAd
+ | mov L:CARG1, SAVE_L
+ | mov L:CARG1->base, BASE
+ | mov RC, [RC]
+ | mov [RB], RC
+ | mov CARG2, RB
+ |.else
+ | mov L:CARG1, SAVE_L
+ | mov L:CARG1->base, BASE
+ | mov CARG3d, RAd
+ | mov RA, [RC]
+ | mov [RB], RA
+ | mov CARG2, RB
+ |.endif
+ | jmp ->BC_CAT_Z
+ |
+ |//-- Table indexing metamethods -----------------------------------------
+ |
+ |->vmeta_tgets:
+ | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
+ | mov TMP1, STR:RC
+ | lea RC, TMP1
+ | cmp PC_OP, BC_GGET
+ | jne >1
+ | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
+ | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
+ | mov [RB], TAB:RA
+ | jmp >2
+ |
+ |->vmeta_tgetb:
+ | movzx RCd, PC_RC
+ |.if DUALNUM
+ | setint RC
+ | mov TMP1, RC
+ |.else
+ | cvtsi2sd xmm0, RCd
+ | movsd TMP1, xmm0
+ |.endif
+ | lea RC, TMP1
+ | jmp >1
+ |
+ |->vmeta_tgetv:
+ | movzx RCd, PC_RC // Reload TValue *k from RC.
+ | lea RC, [BASE+RC*8]
+ |1:
+ | movzx RBd, PC_RB // Reload TValue *t from RB.
+ | lea RB, [BASE+RB*8]
+ |2:
+ | mov L:CARG1, SAVE_L
+ | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
+ | mov CARG2, RB
+ | mov CARG3, RC
+ | mov L:RB, L:CARG1
+ | mov SAVE_PC, PC
+ | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
+ | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
+ | mov BASE, L:RB->base
+ | test RC, RC
+ | jz >3
+ |->cont_ra: // BASE = base, RC = result
+ | movzx RAd, PC_RA
+ | mov RB, [RC]
+ | mov [BASE+RA*8], RB
+ | ins_next
+ |
+ |3: // Call __index metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k
+ | mov RA, L:RB->top
+ | mov [RA-24], PC // [cont|PC]
+ | lea PC, [RA+FRAME_CONT]
+ | sub PC, BASE
+ | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
+ | mov NARGS:RDd, 2+1 // 2 args for func(t, k).
+ | cleartp LFUNC:RB
+ | jmp ->vm_call_dispatch_f
+ |
+ |->vmeta_tgetr:
+ | mov CARG1, TAB:RB
+ | mov RB, BASE // Save BASE.
+ | mov CARG2d, RCd // Caveat: CARG2 == BASE
+ | call extern lj_tab_getinth // (GCtab *t, int32_t key)
+ | // cTValue * or NULL returned in eax (RC).
+ | movzx RAd, PC_RA
+ | mov BASE, RB // Restore BASE.
+ | test RC, RC
+ | jnz ->BC_TGETR_Z
+ | mov ITYPE, LJ_TNIL
+ | jmp ->BC_TGETR2_Z
+ |
+ |//-----------------------------------------------------------------------
+ |
+ |->vmeta_tsets:
+ | settp STR:RC, LJ_TSTR // STR:RC = GCstr *
+ | mov TMP1, STR:RC
+ | lea RC, TMP1
+ | cmp PC_OP, BC_GSET
+ | jne >1
+ | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab *
+ | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
+ | mov [RB], TAB:RA
+ | jmp >2
+ |
+ |->vmeta_tsetb:
+ | movzx RCd, PC_RC
+ |.if DUALNUM
+ | setint RC
+ | mov TMP1, RC
+ |.else
+ | cvtsi2sd xmm0, RCd
+ | movsd TMP1, xmm0
+ |.endif
+ | lea RC, TMP1
+ | jmp >1
+ |
+ |->vmeta_tsetv:
+ | movzx RCd, PC_RC // Reload TValue *k from RC.
+ | lea RC, [BASE+RC*8]
+ |1:
+ | movzx RBd, PC_RB // Reload TValue *t from RB.
+ | lea RB, [BASE+RB*8]
+ |2:
+ | mov L:CARG1, SAVE_L
+ | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
+ | mov CARG2, RB
+ | mov CARG3, RC
+ | mov L:RB, L:CARG1
+ | mov SAVE_PC, PC
+ | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
+ | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
+ | mov BASE, L:RB->base
+ | test RC, RC
+ | jz >3
+ | // NOBARRIER: lj_meta_tset ensures the table is not black.
+ | movzx RAd, PC_RA
+ | mov RB, [BASE+RA*8]
+ | mov [RC], RB
+ |->cont_nop: // BASE = base, (RC = result)
+ | ins_next
+ |
+ |3: // Call __newindex metamethod.
+ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
+ | mov RA, L:RB->top
+ | mov [RA-24], PC // [cont|PC]
+ | movzx RCd, PC_RA
+ | // Copy value to third argument.
+ | mov RB, [BASE+RC*8]
+ | mov [RA+16], RB
+ | lea PC, [RA+FRAME_CONT]
+ | sub PC, BASE
+ | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here.
+ | mov NARGS:RDd, 3+1 // 3 args for func(t, k, v).
+ | cleartp LFUNC:RB
+ | jmp ->vm_call_dispatch_f
+ |
+ |->vmeta_tsetr:
+ |.if X64WIN
+ | mov L:CARG1, SAVE_L
+ | mov CARG3d, RCd
+ | mov L:CARG1->base, BASE
+ | xchg CARG2, TAB:RB // Caveat: CARG2 == BASE.
+ |.else
+ | mov L:CARG1, SAVE_L
+ | mov CARG2, TAB:RB
+ | mov L:CARG1->base, BASE
+ | mov RB, BASE // Save BASE.
+ | mov CARG3d, RCd // Caveat: CARG3 == BASE.
+ |.endif
+ | mov SAVE_PC, PC
+ | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
+ | // TValue * returned in eax (RC).
+ | movzx RAd, PC_RA
+ | mov BASE, RB // Restore BASE.
+ | jmp ->BC_TSETR_Z
+ |
+ |//-- Comparison metamethods ---------------------------------------------
+ |
+ |->vmeta_comp:
+ | movzx RDd, PC_RD
+ | movzx RAd, PC_RA
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE.
+ |.if X64WIN
+ | lea CARG3, [BASE+RD*8]
+ | lea CARG2, [BASE+RA*8]
+ |.else
+ | lea CARG2, [BASE+RA*8]
+ | lea CARG3, [BASE+RD*8]
+ |.endif
+ | mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA.
+ | movzx CARG4d, PC_OP
+ | mov SAVE_PC, PC
+ | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
+ | // 0/1 or TValue * (metamethod) returned in eax (RC).
+ |3:
+ | mov BASE, L:RB->base
+ | cmp RC, 1
+ | ja ->vmeta_binop
+ |4:
+ | lea PC, [PC+4]
+ | jb >6
+ |5:
+ | movzx RDd, PC_RD
+ | branchPC RD
+ |6:
+ | ins_next
+ |
+ |->cont_condt: // BASE = base, RC = result
+ | add PC, 4
+ | mov ITYPE, [RC]
+ | sar ITYPE, 47
+ | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is true.
+ | jb <5
+ | jmp <6
+ |
+ |->cont_condf: // BASE = base, RC = result
+ | mov ITYPE, [RC]
+ | sar ITYPE, 47
+ | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false.
+ | jmp <4
+ |
+ |->vmeta_equal:
+ | cleartp TAB:RD
+ | sub PC, 4
+ |.if X64WIN
+ | mov CARG3, RD
+ | mov CARG4d, RBd
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
+ | mov CARG2, RA
+ | mov CARG1, L:RB // Caveat: CARG1 == RA.
+ |.else
+ | mov CARG2, RA
+ | mov CARG4d, RBd // Caveat: CARG4 == RA.
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE // Caveat: CARG3 == BASE.
+ | mov CARG3, RD
+ | mov CARG1, L:RB
+ |.endif
+ | mov SAVE_PC, PC
+ | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
+ | // 0/1 or TValue * (metamethod) returned in eax (RC).
+ | jmp <3
+ |
+ |->vmeta_equal_cd:
+ |.if FFI
+ | sub PC, 4
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE
+ | mov CARG1, L:RB
+ | mov CARG2d, dword [PC-4]
+ | mov SAVE_PC, PC
+ | call extern lj_meta_equal_cd // (lua_State *L, BCIns ins)
+ | // 0/1 or TValue * (metamethod) returned in eax (RC).
+ | jmp <3
+ |.endif
+ |
+ |->vmeta_istype:
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
+ | mov CARG2d, RAd
+ | mov CARG3d, RDd
+ | mov L:CARG1, L:RB
+ | mov SAVE_PC, PC
+ | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
+ | mov BASE, L:RB->base
+ | jmp <6
+ |
+ |//-- Arithmetic metamethods ---------------------------------------------
+ |
+ |->vmeta_arith_vno:
+ |.if DUALNUM
+ | movzx RBd, PC_RB
+ | movzx RCd, PC_RC
+ |.endif
+ |->vmeta_arith_vn:
+ | lea RC, [KBASE+RC*8]
+ | jmp >1
+ |
+ |->vmeta_arith_nvo:
+ |.if DUALNUM
+ | movzx RBd, PC_RB
+ | movzx RCd, PC_RC
+ |.endif
+ |->vmeta_arith_nv:
+ | lea TMPR, [KBASE+RC*8]
+ | lea RC, [BASE+RB*8]
+ | mov RB, TMPR
+ | jmp >2
+ |
+ |->vmeta_unm:
+ | lea RC, [BASE+RD*8]
+ | mov RB, RC
+ | jmp >2
+ |
+ |->vmeta_arith_vvo:
+ |.if DUALNUM
+ | movzx RBd, PC_RB
+ | movzx RCd, PC_RC
+ |.endif
+ |->vmeta_arith_vv:
+ | lea RC, [BASE+RC*8]
+ |1:
+ | lea RB, [BASE+RB*8]
+ |2:
+ | lea RA, [BASE+RA*8]
+ |.if X64WIN
+ | mov CARG3, RB
+ | mov CARG4, RC
+ | movzx RCd, PC_OP
+ | mov ARG5d, RCd
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE // Caveat: CARG2 == BASE.
+ | mov CARG2, RA
+ | mov CARG1, L:RB // Caveat: CARG1 == RA.
+ |.else
+ | movzx CARG5d, PC_OP
+ | mov CARG2, RA
+ | mov CARG4, RC // Caveat: CARG4 == RA.
+ | mov L:CARG1, SAVE_L
+ | mov L:CARG1->base, BASE // Caveat: CARG3 == BASE.
+ | mov CARG3, RB
+ | mov L:RB, L:CARG1
+ |.endif
+ | mov SAVE_PC, PC
+ | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
+ | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
+ | mov BASE, L:RB->base
+ | test RC, RC
+ | jz ->cont_nop
+ |
+ | // Call metamethod for binary op.
+ |->vmeta_binop:
+ | // BASE = base, RC = new base, stack = cont/func/o1/o2
+ | mov RA, RC
+ | sub RC, BASE
+ | mov [RA-24], PC // [cont|PC]
+ | lea PC, [RC+FRAME_CONT]
+ | mov NARGS:RDd, 2+1 // 2 args for func(o1, o2).
+ | jmp ->vm_call_dispatch
+ |
+ |->vmeta_len:
+ | movzx RDd, PC_RD
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE
+ | lea CARG2, [BASE+RD*8] // Caveat: CARG2 == BASE
+ | mov L:CARG1, L:RB
+ | mov SAVE_PC, PC
+ | call extern lj_meta_len // (lua_State *L, TValue *o)
+ | // NULL (retry) or TValue * (metamethod) returned in eax (RC).
+ | mov BASE, L:RB->base
+#if LJ_52
+ | test RC, RC
+ | jne ->vmeta_binop // Binop call for compatibility.
+ | movzx RDd, PC_RD
+ | mov TAB:CARG1, [BASE+RD*8]
+ | cleartp TAB:CARG1
+ | jmp ->BC_LEN_Z
+#else
+ | jmp ->vmeta_binop // Binop call for compatibility.
+#endif
+ |
+ |//-- Call metamethod ----------------------------------------------------
+ |
+ |->vmeta_call_ra:
+ | lea RA, [BASE+RA*8+16]
+ |->vmeta_call: // Resolve and call __call metamethod.
+ | // BASE = old base, RA = new base, RC = nargs+1, PC = return
+ | mov TMP1d, NARGS:RDd // Save RA, RC for us.
+ | mov RB, RA
+ |.if X64WIN
+ | mov L:TMPR, SAVE_L
+ | mov L:TMPR->base, BASE // Caveat: CARG2 is BASE.
+ | lea CARG2, [RA-16]
+ | lea CARG3, [RA+NARGS:RD*8-8]
+ | mov CARG1, L:TMPR // Caveat: CARG1 is RA.
+ |.else
+ | mov L:CARG1, SAVE_L
+ | mov L:CARG1->base, BASE // Caveat: CARG3 is BASE.
+ | lea CARG2, [RA-16]
+ | lea CARG3, [RA+NARGS:RD*8-8]
+ |.endif
+ | mov SAVE_PC, PC
+ | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
+ | mov RA, RB
+ | mov L:RB, SAVE_L
+ | mov BASE, L:RB->base
+ | mov NARGS:RDd, TMP1d
+ | mov LFUNC:RB, [RA-16]
+ | add NARGS:RDd, 1
+ | // This is fragile. L->base must not move, KBASE must always be defined.
+ | cmp KBASE, BASE // Continue with CALLT if flag set.
+ | je ->BC_CALLT_Z
+ | cleartp LFUNC:RB
+ | mov BASE, RA
+ | ins_call // Otherwise call resolved metamethod.
+ |
+ |//-- Argument coercion for 'for' statement ------------------------------
+ |
+ |->vmeta_for:
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE
+ | mov CARG2, RA // Caveat: CARG2 == BASE
+ | mov L:CARG1, L:RB // Caveat: CARG1 == RA
+ | mov SAVE_PC, PC
+ | call extern lj_meta_for // (lua_State *L, TValue *base)
+ | mov BASE, L:RB->base
+ | mov RCd, [PC-4]
+ | movzx RAd, RCH
+ | movzx OP, RCL
+ | shr RCd, 16
+ | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Fast functions -----------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |.macro .ffunc, name
+ |->ff_ .. name:
+ |.endmacro
+ |
+ |.macro .ffunc_1, name
+ |->ff_ .. name:
+ | cmp NARGS:RDd, 1+1; jb ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_2, name
+ |->ff_ .. name:
+ | cmp NARGS:RDd, 2+1; jb ->fff_fallback
+ |.endmacro
+ |
+ |.macro .ffunc_n, name, op
+ | .ffunc_1 name
+ | checknumtp [BASE], ->fff_fallback
+ | op xmm0, qword [BASE]
+ |.endmacro
+ |
+ |.macro .ffunc_n, name
+ | .ffunc_n name, movsd
+ |.endmacro
+ |
+ |.macro .ffunc_nn, name
+ | .ffunc_2 name
+ | checknumtp [BASE], ->fff_fallback
+ | checknumtp [BASE+8], ->fff_fallback
+ | movsd xmm0, qword [BASE]
+ | movsd xmm1, qword [BASE+8]
+ |.endmacro
+ |
+ |// Inlined GC threshold check. Caveat: uses label 1.
+ |.macro ffgccheck
+ | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
+ | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
+ | jb >1
+ | call ->fff_gcstep
+ |1:
+ |.endmacro
+ |
+ |//-- Base library: checks -----------------------------------------------
+ |
+ |.ffunc_1 assert
+ | mov ITYPE, [BASE]
+ | mov RB, ITYPE
+ | sar ITYPE, 47
+ | cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback
+ | mov PC, [BASE-8]
+ | mov MULTRES, RDd
+ | mov RB, [BASE]
+ | mov [BASE-16], RB
+ | sub RDd, 2
+ | jz >2
+ | mov RA, BASE
+ |1:
+ | add RA, 8
+ | mov RB, [RA]
+ | mov [RA-16], RB
+ | sub RDd, 1
+ | jnz <1
+ |2:
+ | mov RDd, MULTRES
+ | jmp ->fff_res_
+ |
+ |.ffunc_1 type
+ | mov RC, [BASE]
+ | sar RC, 47
+ | mov RBd, LJ_TISNUM
+ | cmp RCd, RBd
+ | cmovb RCd, RBd
+ | not RCd
+ |2:
+ | mov CFUNC:RB, [BASE-16]
+ | cleartp CFUNC:RB
+ | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
+ | mov PC, [BASE-8]
+ | settp STR:RC, LJ_TSTR
+ | mov [BASE-16], STR:RC
+ | jmp ->fff_res1
+ |
+ |//-- Base library: getters and setters ---------------------------------
+ |
+ |.ffunc_1 getmetatable
+ | mov TAB:RB, [BASE]
+ | mov PC, [BASE-8]
+ | checktab TAB:RB, >6
+ |1: // Field metatable must be at same offset for GCtab and GCudata!
+ | mov TAB:RB, TAB:RB->metatable
+ |2:
+ | test TAB:RB, TAB:RB
+ | mov aword [BASE-16], LJ_TNIL
+ | jz ->fff_res1
+ | settp TAB:RC, TAB:RB, LJ_TTAB
+ | mov [BASE-16], TAB:RC // Store metatable as default result.
+ | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)]
+ | mov RAd, TAB:RB->hmask
+ | and RAd, STR:RC->sid
+ | settp STR:RC, LJ_TSTR
+ | imul RAd, #NODE
+ | add NODE:RA, TAB:RB->node
+ |3: // Rearranged logic, because we expect _not_ to find the key.
+ | cmp NODE:RA->key, STR:RC
+ | je >5
+ |4:
+ | mov NODE:RA, NODE:RA->next
+ | test NODE:RA, NODE:RA
+ | jnz <3
+ | jmp ->fff_res1 // Not found, keep default result.
+ |5:
+ | mov RB, NODE:RA->val
+ | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
+ | mov [BASE-16], RB // Return value of mt.__metatable.
+ | jmp ->fff_res1
+ |
+ |6:
+ | cmp ITYPEd, LJ_TUDATA; je <1
+ | cmp ITYPEd, LJ_TISNUM; ja >7
+ | mov ITYPEd, LJ_TISNUM
+ |7:
+ | not ITYPEd
+ | mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
+ | jmp <2
+ |
+ |.ffunc_2 setmetatable
+ | mov TAB:RB, [BASE]
+ | mov TAB:TMPR, TAB:RB
+ | checktab TAB:RB, ->fff_fallback
+ | // Fast path: no mt for table yet and not clearing the mt.
+ | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
+ | mov TAB:RA, [BASE+8]
+ | checktab TAB:RA, ->fff_fallback
+ | mov TAB:RB->metatable, TAB:RA
+ | mov PC, [BASE-8]
+ | mov [BASE-16], TAB:TMPR // Return original table.
+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jz >1
+ | // Possible write barrier. Table is black, but skip iswhite(mt) check.
+ | barrierback TAB:RB, RC
+ |1:
+ | jmp ->fff_res1
+ |
+ |.ffunc_2 rawget
+ |.if X64WIN
+ | mov TAB:RA, [BASE]
+ | checktab TAB:RA, ->fff_fallback
+ | mov RB, BASE // Save BASE.
+ | lea CARG3, [BASE+8]
+ | mov CARG2, TAB:RA // Caveat: CARG2 == BASE.
+ | mov CARG1, SAVE_L
+ |.else
+ | mov TAB:CARG2, [BASE]
+ | checktab TAB:CARG2, ->fff_fallback
+ | mov RB, BASE // Save BASE.
+ | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE.
+ | mov CARG1, SAVE_L
+ |.endif
+ | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
+ | // cTValue * returned in eax (RD).
+ | mov BASE, RB // Restore BASE.
+ | // Copy table slot.
+ | mov RB, [RD]
+ | mov PC, [BASE-8]
+ | mov [BASE-16], RB
+ | jmp ->fff_res1
+ |
+ |//-- Base library: conversions ------------------------------------------
+ |
+ |.ffunc tonumber
+ | // Only handles the number case inline (without a base argument).
+ | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
+ | mov RB, [BASE]
+ | checknumber RB, ->fff_fallback
+ | mov PC, [BASE-8]
+ | mov [BASE-16], RB
+ | jmp ->fff_res1
+ |
+ |.ffunc_1 tostring
+ | // Only handles the string or number case inline.
+ | mov PC, [BASE-8]
+ | mov STR:RB, [BASE]
+ | checktp_nc STR:RB, LJ_TSTR, >3
+ | // A __tostring method in the string base metatable is ignored.
+ |2:
+ | mov [BASE-16], STR:RB
+ | jmp ->fff_res1
+ |3: // Handle numbers inline, unless a number base metatable is present.
+ | cmp ITYPEd, LJ_TISNUM; ja ->fff_fallback_1
+ | cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
+ | jne ->fff_fallback
+ | ffgccheck // Caveat: uses label 1.
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE // Add frame since C call can throw.
+ | mov SAVE_PC, PC // Redundant (but a defined value).
+ |.if not X64WIN
+ | mov CARG2, BASE // Otherwise: CARG2 == BASE
+ |.endif
+ | mov L:CARG1, L:RB
+ |.if DUALNUM
+ | call extern lj_strfmt_number // (lua_State *L, cTValue *o)
+ |.else
+ | call extern lj_strfmt_num // (lua_State *L, lua_Number *np)
+ |.endif
+ | // GCstr returned in eax (RD).
+ | mov BASE, L:RB->base
+ | settp STR:RB, RD, LJ_TSTR
+ | jmp <2
+ |
+ |//-- Base library: iterators -------------------------------------------
+ |
+ |.ffunc_1 next
+ | je >2 // Missing 2nd arg?
+ |1:
+ | mov CARG1, [BASE]
+ | mov PC, [BASE-8]
+ | checktab CARG1, ->fff_fallback
+ | mov RB, BASE // Save BASE.
+ |.if X64WIN
+ | lea CARG3, [BASE-16]
+ | lea CARG2, [BASE+8] // Caveat: CARG2 == BASE.
+ |.else
+ | lea CARG2, [BASE+8]
+ | lea CARG3, [BASE-16] // Caveat: CARG3 == BASE.
+ |.endif
+ | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
+ | // 1=found, 0=end, -1=error returned in eax (RD).
+ | mov BASE, RB // Restore BASE.
+ | test RDd, RDd; jg ->fff_res2 // Found key/value.
+ | js ->fff_fallback_2 // Invalid key.
+ | // End of traversal: return nil.
+ | mov aword [BASE-16], LJ_TNIL
+ | jmp ->fff_res1
+ |2: // Set missing 2nd arg to nil.
+ | mov aword [BASE+8], LJ_TNIL
+ | jmp <1
+ |
+ |.ffunc_1 pairs
+ | mov TAB:RB, [BASE]
+ | mov TMPR, TAB:RB
+ | checktab TAB:RB, ->fff_fallback
+#if LJ_52
+ | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
+#endif
+ | mov CFUNC:RD, [BASE-16]
+ | cleartp CFUNC:RD
+ | mov CFUNC:RD, CFUNC:RD->upvalue[0]
+ | settp CFUNC:RD, LJ_TFUNC
+ | mov PC, [BASE-8]
+ | mov [BASE-16], CFUNC:RD
+ | mov [BASE-8], TMPR
+ | mov aword [BASE], LJ_TNIL
+ | mov RDd, 1+3
+ | jmp ->fff_res
+ |
+ |.ffunc_2 ipairs_aux
+ | mov TAB:RB, [BASE]
+ | checktab TAB:RB, ->fff_fallback
+ |.if DUALNUM
+ | mov RA, [BASE+8]
+ | checkint RA, ->fff_fallback
+ |.else
+ | checknumtp [BASE+8], ->fff_fallback
+ | movsd xmm0, qword [BASE+8]
+ |.endif
+ | mov PC, [BASE-8]
+ |.if DUALNUM
+ | add RAd, 1
+ | setint ITYPE, RA
+ | mov [BASE-16], ITYPE
+ |.else
+ | sseconst_1 xmm1, TMPR
+ | addsd xmm0, xmm1
+ | cvttsd2si RAd, xmm0
+ | movsd qword [BASE-16], xmm0
+ |.endif
+ | cmp RAd, TAB:RB->asize; jae >2 // Not in array part?
+ | mov RD, TAB:RB->array
+ | lea RD, [RD+RA*8]
+ |1:
+ | cmp aword [RD], LJ_TNIL; je ->fff_res0
+ | // Copy array slot.
+ | mov RB, [RD]
+ | mov [BASE-8], RB
+ |->fff_res2:
+ | mov RDd, 1+2
+ | jmp ->fff_res
+ |2: // Check for empty hash part first. Otherwise call C function.
+ | cmp dword TAB:RB->hmask, 0; je ->fff_res0
+ |.if X64WIN
+ | mov TMPR, BASE
+ | mov CARG2d, RAd
+ | mov CARG1, TAB:RB
+ | mov RB, TMPR
+ |.else
+ | mov CARG1, TAB:RB
+ | mov RB, BASE // Save BASE.
+ | mov CARG2d, RAd // Caveat: CARG2 == BASE
+ |.endif
+ | call extern lj_tab_getinth // (GCtab *t, int32_t key)
+ | // cTValue * or NULL returned in eax (RD).
+ | mov BASE, RB
+ | test RD, RD
+ | jnz <1
+ |->fff_res0:
+ | mov RDd, 1+0
+ | jmp ->fff_res
+ |
+ |.ffunc_1 ipairs
+ | mov TAB:RB, [BASE]
+ | mov TMPR, TAB:RB
+ | checktab TAB:RB, ->fff_fallback
+#if LJ_52
+ | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback
+#endif
+ | mov CFUNC:RD, [BASE-16]
+ | cleartp CFUNC:RD
+ | mov CFUNC:RD, CFUNC:RD->upvalue[0]
+ | settp CFUNC:RD, LJ_TFUNC
+ | mov PC, [BASE-8]
+ | mov [BASE-16], CFUNC:RD
+ | mov [BASE-8], TMPR
+ |.if DUALNUM
+ | mov64 RD, ((uint64_t)LJ_TISNUM<<47)
+ | mov [BASE], RD
+ |.else
+ | mov qword [BASE], 0
+ |.endif
+ | mov RDd, 1+3
+ | jmp ->fff_res
+ |
+ |//-- Base library: catch errors ----------------------------------------
+ |
+ |.ffunc_1 pcall
+ | lea RA, [BASE+16]
+ | sub NARGS:RDd, 1
+ | mov PCd, 16+FRAME_PCALL
+ |1:
+ | movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)]
+ | shr RB, HOOK_ACTIVE_SHIFT
+ | and RB, 1
+ | add PC, RB // Remember active hook before pcall.
+ | // Note: this does a (harmless) copy of the function to the PC slot, too.
+ | mov KBASE, RD
+ |2:
+ | mov RB, [RA+KBASE*8-24]
+ | mov [RA+KBASE*8-16], RB
+ | sub KBASE, 1
+ | ja <2
+ | jmp ->vm_call_dispatch
+ |
+ |.ffunc_2 xpcall
+ | mov LFUNC:RA, [BASE+8]
+ | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback
+ | mov LFUNC:RB, [BASE] // Swap function and traceback.
+ | mov [BASE], LFUNC:RA
+ | mov [BASE+8], LFUNC:RB
+ | lea RA, [BASE+24]
+ | sub NARGS:RDd, 2
+ | mov PCd, 24+FRAME_PCALL
+ | jmp <1
+ |
+ |//-- Coroutine library --------------------------------------------------
+ |
+ |.macro coroutine_resume_wrap, resume
+ |.if resume
+ |.ffunc_1 coroutine_resume
+ | mov L:RB, [BASE]
+ | cleartp L:RB
+ |.else
+ |.ffunc coroutine_wrap_aux
+ | mov CFUNC:RB, [BASE-16]
+ | cleartp CFUNC:RB
+ | mov L:RB, CFUNC:RB->upvalue[0].gcr
+ | cleartp L:RB
+ |.endif
+ | mov PC, [BASE-8]
+ | mov SAVE_PC, PC
+ | mov TMP1, L:RB
+ |.if resume
+ | checktptp [BASE], LJ_TTHREAD, ->fff_fallback
+ |.endif
+ | cmp aword L:RB->cframe, 0; jne ->fff_fallback
+ | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
+ | mov RA, L:RB->top
+ | je >1 // Status != LUA_YIELD (i.e. 0)?
+ | cmp RA, L:RB->base // Check for presence of initial func.
+ | je ->fff_fallback
+ | mov PC, [RA-8] // Move initial function up.
+ | mov [RA], PC
+ | add RA, 8
+ |1:
+ |.if resume
+ | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
+ |.else
+ | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
+ |.endif
+ | cmp PC, L:RB->maxstack; ja ->fff_fallback
+ | mov L:RB->top, PC
+ |
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE
+ |.if resume
+ | add BASE, 8 // Keep resumed thread in stack for GC.
+ |.endif
+ | mov L:RB->top, BASE
+ |.if resume
+ | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
+ |.else
+ | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
+ |.endif
+ | sub RB, PC // Relative to PC.
+ |
+ | cmp PC, RA
+ | je >3
+ |2: // Move args to coroutine.
+ | mov RC, [PC+RB]
+ | mov [PC-8], RC
+ | sub PC, 8
+ | cmp PC, RA
+ | jne <2
+ |3:
+ | mov CARG2, RA
+ | mov CARG1, TMP1
+ | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
+ |
+ | mov L:RB, SAVE_L
+ | mov L:PC, TMP1
+ | mov BASE, L:RB->base
+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+ | set_vmstate INTERP
+ |
+ | cmp eax, LUA_YIELD
+ | ja >8
+ |4:
+ | mov RA, L:PC->base
+ | mov KBASE, L:PC->top
+ | mov L:PC->top, RA // Clear coroutine stack.
+ | mov PC, KBASE
+ | sub PC, RA
+ | je >6 // No results?
+ | lea RD, [BASE+PC]
+ | shr PCd, 3
+ | cmp RD, L:RB->maxstack
+ | ja >9 // Need to grow stack?
+ |
+ | mov RB, BASE
+ | sub RB, RA
+ |5: // Move results from coroutine.
+ | mov RD, [RA]
+ | mov [RA+RB], RD
+ | add RA, 8
+ | cmp RA, KBASE
+ | jne <5
+ |6:
+ |.if resume
+ | lea RDd, [PCd+2] // nresults+1 = 1 + true + results.
+ | mov_true ITYPE // Prepend true to results.
+ | mov [BASE-8], ITYPE
+ |.else
+ | lea RDd, [PCd+1] // nresults+1 = 1 + results.
+ |.endif
+ |7:
+ | mov PC, SAVE_PC
+ | mov MULTRES, RDd
+ |.if resume
+ | mov RA, -8
+ |.else
+ | xor RAd, RAd
+ |.endif
+ | test PCd, FRAME_TYPE
+ | jz ->BC_RET_Z
+ | jmp ->vm_return
+ |
+ |8: // Coroutine returned with error (at co->top-1).
+ |.if resume
+ | mov_false ITYPE // Prepend false to results.
+ | mov [BASE-8], ITYPE
+ | mov RA, L:PC->top
+ | sub RA, 8
+ | mov L:PC->top, RA // Clear error from coroutine stack.
+ | // Copy error message.
+ | mov RD, [RA]
+ | mov [BASE], RD
+ | mov RDd, 1+2 // nresults+1 = 1 + false + error.
+ | jmp <7
+ |.else
+ | mov CARG2, L:PC
+ | mov CARG1, L:RB
+ | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
+ | // Error function does not return.
+ |.endif
+ |
+ |9: // Handle stack expansion on return from yield.
+ | mov L:RA, TMP1
+ | mov L:RA->top, KBASE // Undo coroutine stack clearing.
+ | mov CARG2, PC
+ | mov CARG1, L:RB
+ | call extern lj_state_growstack // (lua_State *L, int n)
+ | mov L:PC, TMP1
+ | mov BASE, L:RB->base
+ | jmp <4 // Retry the stack move.
+ |.endmacro
+ |
+ | coroutine_resume_wrap 1 // coroutine.resume
+ | coroutine_resume_wrap 0 // coroutine.wrap
+ |
+ |.ffunc coroutine_yield
+ | mov L:RB, SAVE_L
+ | test aword L:RB->cframe, CFRAME_RESUME
+ | jz ->fff_fallback
+ | mov L:RB->base, BASE
+ | lea RD, [BASE+NARGS:RD*8-8]
+ | mov L:RB->top, RD
+ | xor RDd, RDd
+ | mov aword L:RB->cframe, RD
+ | mov al, LUA_YIELD
+ | mov byte L:RB->status, al
+ | jmp ->vm_leave_unw
+ |
+ |//-- Math library -------------------------------------------------------
+ |
+ | .ffunc_1 math_abs
+ | mov RB, [BASE]
+ |.if DUALNUM
+ | checkint RB, >3
+ | cmp RBd, 0; jns ->fff_resi
+ | neg RBd; js >2
+ |->fff_resbit:
+ |->fff_resi:
+ | setint RB
+ |->fff_resRB:
+ | mov PC, [BASE-8]
+ | mov [BASE-16], RB
+ | jmp ->fff_res1
+ |2:
+ | mov64 RB, U64x(41e00000,00000000) // 2^31.
+ | jmp ->fff_resRB
+ |3:
+ | ja ->fff_fallback
+ |.else
+ | checknum RB, ->fff_fallback
+ |.endif
+ | shl RB, 1
+ | shr RB, 1
+ | mov PC, [BASE-8]
+ | mov [BASE-16], RB
+ | jmp ->fff_res1
+ |
+ |.ffunc_n math_sqrt, sqrtsd
+ |->fff_resxmm0:
+ | mov PC, [BASE-8]
+ | movsd qword [BASE-16], xmm0
+ | // fallthrough
+ |
+ |->fff_res1:
+ | mov RDd, 1+1
+ |->fff_res:
+ | mov MULTRES, RDd
+ |->fff_res_:
+ | test PCd, FRAME_TYPE
+ | jnz >7
+ |5:
+ | cmp PC_RB, RDL // More results expected?
+ | ja >6
+ | // Adjust BASE. KBASE is assumed to be set for the calling frame.
+ | movzx RAd, PC_RA
+ | neg RA
+ | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
+ | ins_next
+ |
+ |6: // Fill up results with nil.
+ | mov aword [BASE+RD*8-24], LJ_TNIL
+ | add RD, 1
+ | jmp <5
+ |
+ |7: // Non-standard return case.
+ | mov RA, -16 // Results start at BASE+RA = BASE-16.
+ | jmp ->vm_return
+ |
+ |.macro math_round, func
+ | .ffunc math_ .. func
+ |.if DUALNUM
+ | mov RB, [BASE]
+ | checknumx RB, ->fff_resRB, je
+ | ja ->fff_fallback
+ |.else
+ | checknumtp [BASE], ->fff_fallback
+ |.endif
+ | movsd xmm0, qword [BASE]
+ | call ->vm_ .. func .. _sse
+ |.if DUALNUM
+ | cvttsd2si RBd, xmm0
+ | cmp RBd, 0x80000000
+ | jne ->fff_resi
+ | cvtsi2sd xmm1, RBd
+ | ucomisd xmm0, xmm1
+ | jp ->fff_resxmm0
+ | je ->fff_resi
+ |.endif
+ | jmp ->fff_resxmm0
+ |.endmacro
+ |
+ | math_round floor
+ | math_round ceil
+ |
+ |.ffunc math_log
+ | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument.
+ | checknumtp [BASE], ->fff_fallback
+ | movsd xmm0, qword [BASE]
+ | mov RB, BASE
+ | call extern log
+ | mov BASE, RB
+ | jmp ->fff_resxmm0
+ |
+ |.macro math_extern, func
+ | .ffunc_n math_ .. func
+ | mov RB, BASE
+ | call extern func
+ | mov BASE, RB
+ | jmp ->fff_resxmm0
+ |.endmacro
+ |
+ |.macro math_extern2, func
+ | .ffunc_nn math_ .. func
+ | mov RB, BASE
+ | call extern func
+ | mov BASE, RB
+ | jmp ->fff_resxmm0
+ |.endmacro
+ |
+ | math_extern log10
+ | math_extern exp
+ | math_extern sin
+ | math_extern cos
+ | math_extern tan
+ | math_extern asin
+ | math_extern acos
+ | math_extern atan
+ | math_extern sinh
+ | math_extern cosh
+ | math_extern tanh
+ | math_extern2 pow
+ | math_extern2 atan2
+ | math_extern2 fmod
+ |
+ |.ffunc_2 math_ldexp
+ | checknumtp [BASE], ->fff_fallback
+ | checknumtp [BASE+8], ->fff_fallback
+ | fld qword [BASE+8]
+ | fld qword [BASE]
+ | fscale
+ | fpop1
+ | mov PC, [BASE-8]
+ | fstp qword [BASE-16]
+ | jmp ->fff_res1
+ |
+ |.ffunc_n math_frexp
+ | mov RB, BASE
+ |.if X64WIN
+ | lea CARG2, TMP1 // Caveat: CARG2 == BASE
+ |.else
+ | lea CARG1, TMP1
+ |.endif
+ | call extern frexp
+ | mov BASE, RB
+ | mov RBd, TMP1d
+ | mov PC, [BASE-8]
+ | movsd qword [BASE-16], xmm0
+ |.if DUALNUM
+ | setint RB
+ | mov [BASE-8], RB
+ |.else
+ | cvtsi2sd xmm1, RBd
+ | movsd qword [BASE-8], xmm1
+ |.endif
+ | mov RDd, 1+2
+ | jmp ->fff_res
+ |
+ |.ffunc_n math_modf
+ | mov RB, BASE
+ |.if X64WIN
+ | lea CARG2, [BASE-16] // Caveat: CARG2 == BASE
+ |.else
+ | lea CARG1, [BASE-16]
+ |.endif
+ | call extern modf
+ | mov BASE, RB
+ | mov PC, [BASE-8]
+ | movsd qword [BASE-8], xmm0
+ | mov RDd, 1+2
+ | jmp ->fff_res
+ |
+ |.macro math_minmax, name, cmovop, sseop
+ | .ffunc_1 name
+ | mov RAd, 2
+ |.if DUALNUM
+ | mov RB, [BASE]
+ | checkint RB, >4
+ |1: // Handle integers.
+ | cmp RAd, RDd; jae ->fff_resRB
+ | mov TMPR, [BASE+RA*8-8]
+ | checkint TMPR, >3
+ | cmp RBd, TMPRd
+ | cmovop RB, TMPR
+ | add RAd, 1
+ | jmp <1
+ |3:
+ | ja ->fff_fallback
+ | // Convert intermediate result to number and continue below.
+ | cvtsi2sd xmm0, RBd
+ | jmp >6
+ |4:
+ | ja ->fff_fallback
+ |.else
+ | checknumtp [BASE], ->fff_fallback
+ |.endif
+ |
+ | movsd xmm0, qword [BASE]
+ |5: // Handle numbers or integers.
+ | cmp RAd, RDd; jae ->fff_resxmm0
+ |.if DUALNUM
+ | mov RB, [BASE+RA*8-8]
+ | checknumx RB, >6, jb
+ | ja ->fff_fallback
+ | cvtsi2sd xmm1, RBd
+ | jmp >7
+ |.else
+ | checknumtp [BASE+RA*8-8], ->fff_fallback
+ |.endif
+ |6:
+ | movsd xmm1, qword [BASE+RA*8-8]
+ |7:
+ | sseop xmm0, xmm1
+ | add RAd, 1
+ | jmp <5
+ |.endmacro
+ |
+ | math_minmax math_min, cmovg, minsd
+ | math_minmax math_max, cmovl, maxsd
+ |
+ |//-- String library -----------------------------------------------------
+ |
+ |.ffunc string_byte // Only handle the 1-arg case here.
+ | cmp NARGS:RDd, 1+1; jne ->fff_fallback
+ | mov STR:RB, [BASE]
+ | checkstr STR:RB, ->fff_fallback
+ | mov PC, [BASE-8]
+ | cmp dword STR:RB->len, 1
+ | jb ->fff_res0 // Return no results for empty string.
+ | movzx RBd, byte STR:RB[1]
+ |.if DUALNUM
+ | jmp ->fff_resi
+ |.else
+ | cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0
+ |.endif
+ |
+ |.ffunc string_char // Only handle the 1-arg case here.
+ | ffgccheck
+ | cmp NARGS:RDd, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
+ |.if DUALNUM
+ | mov RB, [BASE]
+ | checkint RB, ->fff_fallback
+ |.else
+ | checknumtp [BASE], ->fff_fallback
+ | cvttsd2si RBd, qword [BASE]
+ |.endif
+ | cmp RBd, 255; ja ->fff_fallback
+ | mov TMP1d, RBd
+ | mov TMPRd, 1
+ | lea RD, TMP1 // Points to stack. Little-endian.
+ |->fff_newstr:
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE
+ | mov CARG3d, TMPRd // Zero-extended to size_t.
+ | mov CARG2, RD
+ | mov CARG1, L:RB
+ | mov SAVE_PC, PC
+ | call extern lj_str_new // (lua_State *L, char *str, size_t l)
+ |->fff_resstr:
+ | // GCstr * returned in eax (RD).
+ | mov BASE, L:RB->base
+ | mov PC, [BASE-8]
+ | settp STR:RD, LJ_TSTR
+ | mov [BASE-16], STR:RD
+ | jmp ->fff_res1
+ |
+ |.ffunc string_sub
+ | ffgccheck
+ | mov TMPRd, -1
+ | cmp NARGS:RDd, 1+2; jb ->fff_fallback
+ | jna >1
+ |.if DUALNUM
+ | mov TMPR, [BASE+16]
+ | checkint TMPR, ->fff_fallback
+ |.else
+ | checknumtp [BASE+16], ->fff_fallback
+ | cvttsd2si TMPRd, qword [BASE+16]
+ |.endif
+ |1:
+ | mov STR:RB, [BASE]
+ | checkstr STR:RB, ->fff_fallback
+ |.if DUALNUM
+ | mov ITYPE, [BASE+8]
+ | mov RAd, ITYPEd // Must clear hiword for lea below.
+ | sar ITYPE, 47
+ | cmp ITYPEd, LJ_TISNUM
+ | jne ->fff_fallback
+ |.else
+ | checknumtp [BASE+8], ->fff_fallback
+ | cvttsd2si RAd, qword [BASE+8]
+ |.endif
+ | mov RCd, STR:RB->len
+ | cmp RCd, TMPRd // len < end? (unsigned compare)
+ | jb >5
+ |2:
+ | test RAd, RAd // start <= 0?
+ | jle >7
+ |3:
+ | sub TMPRd, RAd // start > end?
+ | jl ->fff_emptystr
+ | lea RD, [STR:RB+RAd+#STR-1]
+ | add TMPRd, 1
+ |4:
+ | jmp ->fff_newstr
+ |
+ |5: // Negative end or overflow.
+ | jl >6
+ | lea TMPRd, [TMPRd+RCd+1] // end = end+(len+1)
+ | jmp <2
+ |6: // Overflow.
+ | mov TMPRd, RCd // end = len
+ | jmp <2
+ |
+ |7: // Negative start or underflow.
+ | je >8
+ | add RAd, RCd // start = start+(len+1)
+ | add RAd, 1
+ | jg <3 // start > 0?
+ |8: // Underflow.
+ | mov RAd, 1 // start = 1
+ | jmp <3
+ |
+ |->fff_emptystr: // Range underflow.
+ | xor TMPRd, TMPRd // Zero length. Any ptr in RD is ok.
+ | jmp <4
+ |
+ |.macro ffstring_op, name
+ | .ffunc_1 string_ .. name
+ | ffgccheck
+ |.if X64WIN
+ | mov STR:TMPR, [BASE]
+ | checkstr STR:TMPR, ->fff_fallback
+ |.else
+ | mov STR:CARG2, [BASE]
+ | checkstr STR:CARG2, ->fff_fallback
+ |.endif
+ | mov L:RB, SAVE_L
+ | lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
+ | mov L:RB->base, BASE
+ |.if X64WIN
+ | mov STR:CARG2, STR:TMPR // Caveat: CARG2 == BASE
+ |.endif
+ | mov RC, SBUF:CARG1->b
+ | mov SBUF:CARG1->L, L:RB
+ | mov SBUF:CARG1->w, RC
+ | mov SAVE_PC, PC
+ | call extern lj_buf_putstr_ .. name
+ | mov CARG1, rax
+ | call extern lj_buf_tostr
+ | jmp ->fff_resstr
+ |.endmacro
+ |
+ |ffstring_op reverse
+ |ffstring_op lower
+ |ffstring_op upper
+ |
+ |//-- Bit library --------------------------------------------------------
+ |
+ |.macro .ffunc_bit, name, kind, fdef
+ | fdef name
+ |.if kind == 2
+ | sseconst_tobit xmm1, RB
+ |.endif
+ |.if DUALNUM
+ | mov RB, [BASE]
+ | checkint RB, >1
+ |.if kind > 0
+ | jmp >2
+ |.else
+ | jmp ->fff_resbit
+ |.endif
+ |1:
+ | ja ->fff_fallback
+ | movd xmm0, RB
+ |.else
+ | checknumtp [BASE], ->fff_fallback
+ | movsd xmm0, qword [BASE]
+ |.endif
+ |.if kind < 2
+ | sseconst_tobit xmm1, RB
+ |.endif
+ | addsd xmm0, xmm1
+ | movd RBd, xmm0
+ |2:
+ |.endmacro
+ |
+ |.macro .ffunc_bit, name, kind
+ | .ffunc_bit name, kind, .ffunc_1
+ |.endmacro
+ |
+ |.ffunc_bit bit_tobit, 0
+ | jmp ->fff_resbit
+ |
+ |.macro .ffunc_bit_op, name, ins
+ | .ffunc_bit name, 2
+ | mov TMPRd, NARGS:RDd // Save for fallback.
+ | lea RD, [BASE+NARGS:RD*8-16]
+ |1:
+ | cmp RD, BASE
+ | jbe ->fff_resbit
+ |.if DUALNUM
+ | mov RA, [RD]
+ | checkint RA, >2
+ | ins RBd, RAd
+ | sub RD, 8
+ | jmp <1
+ |2:
+ | ja ->fff_fallback_bit_op
+ | movd xmm0, RA
+ |.else
+ | checknumtp [RD], ->fff_fallback_bit_op
+ | movsd xmm0, qword [RD]
+ |.endif
+ | addsd xmm0, xmm1
+ | movd RAd, xmm0
+ | ins RBd, RAd
+ | sub RD, 8
+ | jmp <1
+ |.endmacro
+ |
+ |.ffunc_bit_op bit_band, and
+ |.ffunc_bit_op bit_bor, or
+ |.ffunc_bit_op bit_bxor, xor
+ |
+ |.ffunc_bit bit_bswap, 1
+ | bswap RBd
+ | jmp ->fff_resbit
+ |
+ |.ffunc_bit bit_bnot, 1
+ | not RBd
+ |.if DUALNUM
+ | jmp ->fff_resbit
+ |.else
+ |->fff_resbit:
+ | cvtsi2sd xmm0, RBd
+ | jmp ->fff_resxmm0
+ |.endif
+ |
+ |->fff_fallback_bit_op:
+ | mov NARGS:RDd, TMPRd // Restore for fallback
+ | jmp ->fff_fallback
+ |
+ |.macro .ffunc_bit_sh, name, ins
+ |.if DUALNUM
+ | .ffunc_bit name, 1, .ffunc_2
+ | // Note: no inline conversion from number for 2nd argument!
+ | mov RA, [BASE+8]
+ | checkint RA, ->fff_fallback
+ |.else
+ | .ffunc_nn name
+ | sseconst_tobit xmm2, RB
+ | addsd xmm0, xmm2
+ | addsd xmm1, xmm2
+ | movd RBd, xmm0
+ | movd RAd, xmm1
+ |.endif
+ | ins RBd, cl // Assumes RA is ecx.
+ | jmp ->fff_resbit
+ |.endmacro
+ |
+ |.ffunc_bit_sh bit_lshift, shl
+ |.ffunc_bit_sh bit_rshift, shr
+ |.ffunc_bit_sh bit_arshift, sar
+ |.ffunc_bit_sh bit_rol, rol
+ |.ffunc_bit_sh bit_ror, ror
+ |
+ |//-----------------------------------------------------------------------
+ |
+ |->fff_fallback_2:
+ | mov NARGS:RDd, 1+2 // Other args are ignored, anyway.
+ | jmp ->fff_fallback
+ |->fff_fallback_1:
+ | mov NARGS:RDd, 1+1 // Other args are ignored, anyway.
+ |->fff_fallback: // Call fast function fallback handler.
+ | // BASE = new base, RD = nargs+1
+ | mov L:RB, SAVE_L
+ | mov PC, [BASE-8] // Fallback may overwrite PC.
+ | mov SAVE_PC, PC // Redundant (but a defined value).
+ | mov L:RB->base, BASE
+ | lea RD, [BASE+NARGS:RD*8-8]
+ | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
+ | mov L:RB->top, RD
+ | mov CFUNC:RD, [BASE-16]
+ | cleartp CFUNC:RD
+ | cmp RA, L:RB->maxstack
+ | ja >5 // Need to grow stack.
+ | mov CARG1, L:RB
+ | call aword CFUNC:RD->f // (lua_State *L)
+ | mov BASE, L:RB->base
+ | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
+ | test RDd, RDd; jg ->fff_res // Returned nresults+1?
+ |1:
+ | mov RA, L:RB->top
+ | sub RA, BASE
+ | shr RAd, 3
+ | test RDd, RDd
+ | lea NARGS:RDd, [RAd+1]
+ | mov LFUNC:RB, [BASE-16]
+ | jne ->vm_call_tail // Returned -1?
+ | cleartp LFUNC:RB
+ | ins_callt // Returned 0: retry fast path.
+ |
+ |// Reconstruct previous base for vmeta_call during tailcall.
+ |->vm_call_tail:
+ | mov RA, BASE
+ | test PCd, FRAME_TYPE
+ | jnz >3
+ | movzx RBd, PC_RA
+ | neg RB
+ | lea BASE, [BASE+RB*8-16] // base = base - (RB+2)*8
+ | jmp ->vm_call_dispatch // Resolve again for tailcall.
+ |3:
+ | mov RB, PC
+ | and RB, -8
+ | sub BASE, RB
+ | jmp ->vm_call_dispatch // Resolve again for tailcall.
+ |
+ |5: // Grow stack for fallback handler.
+ | mov CARG2d, LUA_MINSTACK
+ | mov CARG1, L:RB
+ | call extern lj_state_growstack // (lua_State *L, int n)
+ | mov BASE, L:RB->base
+ | xor RDd, RDd // Simulate a return 0.
+ | jmp <1 // Dumb retry (goes through ff first).
+ |
+ |->fff_gcstep: // Call GC step function.
+ | // BASE = new base, RD = nargs+1
+ | pop RB // Must keep stack at same level.
+ | mov TMP1, RB // Save return address
+ | mov L:RB, SAVE_L
+ | mov SAVE_PC, PC // Redundant (but a defined value).
+ | mov L:RB->base, BASE
+ | lea RD, [BASE+NARGS:RD*8-8]
+ | mov CARG1, L:RB
+ | mov L:RB->top, RD
+ | call extern lj_gc_step // (lua_State *L)
+ | mov BASE, L:RB->base
+ | mov RD, L:RB->top
+ | sub RD, BASE
+ | shr RDd, 3
+ | add NARGS:RDd, 1
+ | mov RB, TMP1
+ | push RB // Restore return address.
+ | ret
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Special dispatch targets -------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->vm_record: // Dispatch target for recording phase.
+ |.if JIT
+ | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
+ | test RDL, HOOK_VMEVENT // No recording while in vmevent.
+ | jnz >5
+ | // Decrement the hookcount for consistency, but always do the call.
+ | test RDL, HOOK_ACTIVE
+ | jnz >1
+ | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
+ | jz >1
+ | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
+ | jmp >1
+ |.endif
+ |
+ |->vm_rethook: // Dispatch target for return hooks.
+ | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
+ | test RDL, HOOK_ACTIVE // Hook already active?
+ | jnz >5
+ | jmp >1
+ |
+ |->vm_inshook: // Dispatch target for instr/line hooks.
+ | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)]
+ | test RDL, HOOK_ACTIVE // Hook already active?
+ | jnz >5
+ |
+ | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
+ | jz >5
+ | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
+ | jz >1
+ | test RDL, LUA_MASKLINE
+ | jz >5
+ |1:
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE
+ | mov CARG2, PC // Caveat: CARG2 == BASE
+ | mov CARG1, L:RB
+ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
+ | call extern lj_dispatch_ins // (lua_State *L, const BCIns *pc)
+ |3:
+ | mov BASE, L:RB->base
+ |4:
+ | movzx RAd, PC_RA
+ |5:
+ | movzx OP, PC_OP
+ | movzx RDd, PC_RD
+ | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
+ |
+ |->cont_hook: // Continue from hook yield.
+ | add PC, 4
+ | mov RA, [RB-40]
+ | mov MULTRES, RAd // Restore MULTRES for *M ins.
+ | jmp <4
+ |
+ |->vm_hotloop: // Hot loop counter underflow.
+ |.if JIT
+ | mov LFUNC:RB, [BASE-16] // Same as curr_topL(L).
+ | cleartp LFUNC:RB
+ | mov RB, LFUNC:RB->pc
+ | movzx RDd, byte [RB+PC2PROTO(framesize)]
+ | lea RD, [BASE+RD*8]
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE
+ | mov L:RB->top, RD
+ | mov CARG2, PC
+ | lea CARG1, [DISPATCH+GG_DISP2J]
+ | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
+ | mov SAVE_PC, PC
+ | call extern lj_trace_hot // (jit_State *J, const BCIns *pc)
+ | jmp <3
+ |.endif
+ |
+ |->vm_callhook: // Dispatch target for call hooks.
+ | mov SAVE_PC, PC
+ |.if JIT
+ | jmp >1
+ |.endif
+ |
+ |->vm_hotcall: // Hot call counter underflow.
+ |.if JIT
+ | mov SAVE_PC, PC
+ | or PC, 1 // Marker for hot call.
+ |1:
+ |.endif
+ | lea RD, [BASE+NARGS:RD*8-8]
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE
+ | mov L:RB->top, RD
+ | mov CARG2, PC
+ | mov CARG1, L:RB
+ | call extern lj_dispatch_call // (lua_State *L, const BCIns *pc)
+ | // ASMFunction returned in eax/rax (RD).
+ | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
+ |.if JIT
+ | and PC, -2
+ |.endif
+ | mov BASE, L:RB->base
+ | mov RA, RD
+ | mov RD, L:RB->top
+ | sub RD, BASE
+ | mov RB, RA
+ | movzx RAd, PC_RA
+ | shr RDd, 3
+ | add NARGS:RDd, 1
+ | jmp RB
+ |
+ |->cont_stitch: // Trace stitching.
+ |.if JIT
+ | // BASE = base, RC = result, RB = mbase
+ | mov TRACE:ITYPE, [RB-40] // Save previous trace.
+ | cleartp TRACE:ITYPE
+ | mov TMPRd, MULTRES
+ | movzx RAd, PC_RA
+ | lea RA, [BASE+RA*8] // Call base.
+ | sub TMPRd, 1
+ | jz >2
+ |1: // Move results down.
+ | mov RB, [RC]
+ | mov [RA], RB
+ | add RC, 8
+ | add RA, 8
+ | sub TMPRd, 1
+ | jnz <1
+ |2:
+ | movzx RCd, PC_RA
+ | movzx RBd, PC_RB
+ | add RC, RB
+ | lea RC, [BASE+RC*8-8]
+ |3:
+ | cmp RC, RA
+ | ja >9 // More results wanted?
+ |
+ | test TRACE:ITYPE, TRACE:ITYPE
+ | jz ->cont_nop
+ | movzx RBd, word TRACE:ITYPE->traceno
+ | movzx RDd, word TRACE:ITYPE->link
+ | cmp RDd, RBd
+ | je ->cont_nop // Blacklisted.
+ | test RDd, RDd
+ | jne =>BC_JLOOP // Jump to stitched trace.
+ |
+ | // Stitch a new trace to the previous trace.
+ | mov [DISPATCH+DISPATCH_J(exitno)], RB
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE
+ | mov CARG2, PC
+ | lea CARG1, [DISPATCH+GG_DISP2J]
+ | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
+ | call extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc)
+ | mov BASE, L:RB->base
+ | jmp ->cont_nop
+ |
+ |9: // Fill up results with nil.
+ | mov aword [RA], LJ_TNIL
+ | add RA, 8
+ | jmp <3
+ |.endif
+ |
+ |->vm_profhook: // Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE
+ | mov CARG2, PC // Caveat: CARG2 == BASE
+ | mov CARG1, L:RB
+ | call extern lj_dispatch_profile // (lua_State *L, const BCIns *pc)
+ | mov BASE, L:RB->base
+ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+ | sub PC, 4
+ | jmp ->cont_nop
+#endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Trace exit handler -------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// Called from an exit stub with the exit number on the stack.
+ |// The 16 bit exit number is stored with two (sign-extended) push imm8.
+ |->vm_exit_handler:
+ |.if JIT
+ | push r13; push r12
+ | push r11; push r10; push r9; push r8
+ | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
+ | push rbx; push rdx; push rcx; push rax
+ | movzx RCd, byte [rbp-8] // Reconstruct exit number.
+ | mov RCH, byte [rbp-16]
+ | mov [rbp-8], r15; mov [rbp-16], r14
+ | // DISPATCH is preserved on-trace in LJ_GC64 mode.
+ | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
+ | set_vmstate EXIT
+ | mov [DISPATCH+DISPATCH_J(exitno)], RCd
+ | mov [DISPATCH+DISPATCH_J(parent)], RAd
+ |.if X64WIN
+ | sub rsp, 16*8+4*8 // Room for SSE regs + save area.
+ |.else
+ | sub rsp, 16*8 // Room for SSE regs.
+ |.endif
+ | add rbp, -128
+ | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14
+ | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12
+ | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10
+ | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8
+ | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6
+ | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4
+ | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2
+ | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0
+ | // Caveat: RB is rbp.
+ | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
+ | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
+ | mov aword [DISPATCH+DISPATCH_J(L)], L:RB
+ | mov L:RB->base, BASE
+ |.if X64WIN
+ | lea CARG2, [rsp+4*8]
+ |.else
+ | mov CARG2, rsp
+ |.endif
+ | lea CARG1, [DISPATCH+GG_DISP2J]
+ | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
+ | call extern lj_trace_exit // (jit_State *J, ExitState *ex)
+ | // MULTRES or negated error code returned in eax (RD).
+ | mov RA, L:RB->cframe
+ | and RA, CFRAME_RAWMASK
+ | mov [RA+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield).
+ | mov BASE, L:RB->base
+ | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC.
+ | jmp >1
+ |.endif
+ |->vm_exit_interp:
+ | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
+ |.if JIT
+ | // Restore additional callee-save registers only used in compiled code.
+ |.if X64WIN
+ | lea RA, [rsp+10*16+4*8]
+ |1:
+ | movdqa xmm15, [RA-10*16]
+ | movdqa xmm14, [RA-9*16]
+ | movdqa xmm13, [RA-8*16]
+ | movdqa xmm12, [RA-7*16]
+ | movdqa xmm11, [RA-6*16]
+ | movdqa xmm10, [RA-5*16]
+ | movdqa xmm9, [RA-4*16]
+ | movdqa xmm8, [RA-3*16]
+ | movdqa xmm7, [RA-2*16]
+ | mov rsp, RA // Reposition stack to C frame.
+ | movdqa xmm6, [RA-1*16]
+ | mov r15, CSAVE_1
+ | mov r14, CSAVE_2
+ | mov r13, CSAVE_3
+ | mov r12, CSAVE_4
+ |.else
+ | lea RA, [rsp+16]
+ |1:
+ | mov r13, [RA-8]
+ | mov r12, [RA]
+ | mov rsp, RA // Reposition stack to C frame.
+ |.endif
+ | test RDd, RDd; js >9 // Check for error from exit.
+ | mov L:RB, SAVE_L
+ | mov MULTRES, RDd
+ | mov LFUNC:KBASE, [BASE-16]
+ | cleartp LFUNC:KBASE
+ | mov KBASE, LFUNC:KBASE->pc
+ | mov KBASE, [KBASE+PC2PROTO(k)]
+ | mov L:RB->base, BASE
+ | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0
+ | set_vmstate INTERP
+ | // Modified copy of ins_next which handles function header dispatch, too.
+ | mov RCd, [PC]
+ | movzx RAd, RCH
+ | movzx OP, RCL
+ | add PC, 4
+ | shr RCd, 16
+ | cmp OP, BC_FUNCF // Function header?
+ | jb >3
+ | cmp OP, BC_FUNCC+2 // Fast function?
+ | jae >4
+ |2:
+ | mov RCd, MULTRES // RC/RD holds nres+1.
+ |3:
+ | jmp aword [DISPATCH+OP*8]
+ |
+ |4: // Check frame below fast function.
+ | mov RC, [BASE-8]
+ | test RCd, FRAME_TYPE
+ | jnz <2 // Trace stitching continuation?
+ | // Otherwise set KBASE for Lua function below fast function.
+ | movzx RCd, byte [RC-3]
+ | neg RC
+ | mov LFUNC:KBASE, [BASE+RC*8-32]
+ | cleartp LFUNC:KBASE
+ | mov KBASE, LFUNC:KBASE->pc
+ | mov KBASE, [KBASE+PC2PROTO(k)]
+ | jmp <2
+ |
+ |9: // Rethrow error from the right C frame.
+ | mov CARG2d, RDd
+ | mov CARG1, L:RB
+ | neg CARG2d
+ | call extern lj_err_trace // (lua_State *L, int errcode)
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Math helper functions ----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// FP value rounding. Called by math.floor/math.ceil fast functions
+ |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
+ |.macro vm_round, name, mode, cond
+ |->name:
+ |->name .. _sse:
+ | sseconst_abs xmm2, RD
+ | sseconst_2p52 xmm3, RD
+ | movaps xmm1, xmm0
+ | andpd xmm1, xmm2 // |x|
+ | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
+ | jbe >1
+ | andnpd xmm2, xmm0 // Isolate sign bit.
+ |.if mode == 2 // trunc(x)?
+ | movaps xmm0, xmm1
+ | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
+ | subsd xmm1, xmm3
+ | sseconst_1 xmm3, RD
+ | cmpsd xmm0, xmm1, 1 // |x| < result?
+ | andpd xmm0, xmm3
+ | subsd xmm1, xmm0 // If yes, subtract -1.
+ | orpd xmm1, xmm2 // Merge sign bit back in.
+ |.else
+ | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
+ | subsd xmm1, xmm3
+ | orpd xmm1, xmm2 // Merge sign bit back in.
+ | sseconst_1 xmm3, RD
+ | .if mode == 1 // ceil(x)?
+ | cmpsd xmm0, xmm1, 6 // x > result?
+ | andpd xmm0, xmm3
+ | addsd xmm1, xmm0 // If yes, add 1.
+ | orpd xmm1, xmm2 // Merge sign bit back in (again).
+ | .else // floor(x)?
+ | cmpsd xmm0, xmm1, 1 // x < result?
+ | andpd xmm0, xmm3
+ | subsd xmm1, xmm0 // If yes, subtract 1.
+ | .endif
+ |.endif
+ | movaps xmm0, xmm1
+ |1:
+ | ret
+ |.endmacro
+ |
+ | vm_round vm_floor, 0, 1
+ | vm_round vm_ceil, 1, JIT
+ | vm_round vm_trunc, 2, JIT
+ |
+ |// FP modulo x%y. Called by BC_MOD* and vm_arith.
+ |->vm_mod:
+ |// Args in xmm0/xmm1, return value in xmm0.
+ |// Caveat: xmm0-xmm5 and RC (eax) modified!
+ | movaps xmm5, xmm0
+ | divsd xmm0, xmm1
+ | sseconst_abs xmm2, RD
+ | sseconst_2p52 xmm3, RD
+ | movaps xmm4, xmm0
+ | andpd xmm4, xmm2 // |x/y|
+ | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
+ | jbe >1
+ | andnpd xmm2, xmm0 // Isolate sign bit.
+ | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
+ | subsd xmm4, xmm3
+ | orpd xmm4, xmm2 // Merge sign bit back in.
+ | sseconst_1 xmm2, RD
+ | cmpsd xmm0, xmm4, 1 // x/y < result?
+ | andpd xmm0, xmm2
+ | subsd xmm4, xmm0 // If yes, subtract 1.0.
+ | movaps xmm0, xmm5
+ | mulsd xmm1, xmm4
+ | subsd xmm0, xmm1
+ | ret
+ |1:
+ | mulsd xmm1, xmm0
+ | movaps xmm0, xmm5
+ | subsd xmm0, xmm1
+ | ret
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Miscellaneous functions --------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
+ |->vm_cpuid:
+ | mov eax, CARG1d
+ | .if X64WIN; push rsi; mov rsi, CARG2; .endif
+ | push rbx
+ | xor ecx, ecx
+ | cpuid
+ | mov [rsi], eax
+ | mov [rsi+4], ebx
+ | mov [rsi+8], ecx
+ | mov [rsi+12], edx
+ | pop rbx
+ | .if X64WIN; pop rsi; .endif
+ | ret
+ |
+ |.define NEXT_TAB, TAB:CARG1
+ |.define NEXT_IDX, CARG2d
+ |.define NEXT_IDXa, CARG2
+ |.define NEXT_PTR, RC
+ |.define NEXT_PTRd, RCd
+ |.define NEXT_TMP, CARG3
+ |.define NEXT_ASIZE, CARG4d
+ |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro
+ |.if X64WIN
+ |.define NEXT_RES_PTR, [rsp+aword*5]
+ |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro
+ |.else
+ |.define NEXT_RES_PTR, [rsp+aword*1]
+ |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro
+ |.endif
+ |
+ |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
+ |// Next idx returned in edx.
+ |->vm_next:
+ |.if JIT
+ | mov NEXT_ASIZE, NEXT_TAB->asize
+ |1: // Traverse array part.
+ | cmp NEXT_IDX, NEXT_ASIZE; jae >5
+ | mov NEXT_TMP, NEXT_TAB->array
+ | mov NEXT_TMP, qword [NEXT_TMP+NEXT_IDX*8]
+ | cmp NEXT_TMP, LJ_TNIL; je >2
+ | lea NEXT_PTR, NEXT_RES_PTR
+ | mov qword [NEXT_PTR], NEXT_TMP
+ |.if DUALNUM
+ | setint NEXT_TMP, NEXT_IDXa
+ | mov qword [NEXT_PTR+qword*1], NEXT_TMP
+ |.else
+ | cvtsi2sd xmm0, NEXT_IDX
+ | movsd qword [NEXT_PTR+qword*1], xmm0
+ |.endif
+ | NEXT_RES_IDX 1
+ | ret
+ |2: // Skip holes in array part.
+ | add NEXT_IDX, 1
+ | jmp <1
+ |
+ |5: // Traverse hash part.
+ | sub NEXT_IDX, NEXT_ASIZE
+ |6:
+ | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9
+ | imul NEXT_PTRd, NEXT_IDX, #NODE
+ | add NODE:NEXT_PTR, NEXT_TAB->node
+ | cmp qword NODE:NEXT_PTR->val, LJ_TNIL; je >7
+ | NEXT_RES_IDXL NEXT_ASIZE+1
+ | ret
+ |7: // Skip holes in hash part.
+ | add NEXT_IDX, 1
+ | jmp <6
+ |
+ |9: // End of iteration. Set the key to nil (not the value).
+ | NEXT_RES_IDX NEXT_ASIZE
+ | lea NEXT_PTR, NEXT_RES_PTR
+ | mov qword [NEXT_PTR+qword*1], LJ_TNIL
+ | ret
+ |.endif
+ |
+ |//-----------------------------------------------------------------------
+ |//-- Assertions ---------------------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |->assert_bad_for_arg_type:
+#ifdef LUA_USE_ASSERT
+ | int3
+#endif
+ | int3
+ |
+ |//-----------------------------------------------------------------------
+ |//-- FFI helper functions -----------------------------------------------
+ |//-----------------------------------------------------------------------
+ |
+ |// Handler for callback functions. Callback slot number in ah/al.
+ |->vm_ffi_callback:
+ |.if FFI
+ |.type CTSTATE, CTState, PC
+ | saveregs_ // ebp/rbp already saved. ebp now holds global_State *.
+ | lea DISPATCH, [ebp+GG_G2DISP]
+ | mov CTSTATE, GL:ebp->ctype_state
+ | movzx eax, ax
+ | mov CTSTATE->cb.slot, eax
+ | mov CTSTATE->cb.gpr[0], CARG1
+ | mov CTSTATE->cb.gpr[1], CARG2
+ | mov CTSTATE->cb.gpr[2], CARG3
+ | mov CTSTATE->cb.gpr[3], CARG4
+ | movsd qword CTSTATE->cb.fpr[0], xmm0
+ | movsd qword CTSTATE->cb.fpr[1], xmm1
+ | movsd qword CTSTATE->cb.fpr[2], xmm2
+ | movsd qword CTSTATE->cb.fpr[3], xmm3
+ |.if X64WIN
+ | lea rax, [rsp+CFRAME_SIZE+4*8]
+ |.else
+ | lea rax, [rsp+CFRAME_SIZE]
+ | mov CTSTATE->cb.gpr[4], CARG5
+ | mov CTSTATE->cb.gpr[5], CARG6
+ | movsd qword CTSTATE->cb.fpr[4], xmm4
+ | movsd qword CTSTATE->cb.fpr[5], xmm5
+ | movsd qword CTSTATE->cb.fpr[6], xmm6
+ | movsd qword CTSTATE->cb.fpr[7], xmm7
+ |.endif
+ | mov CTSTATE->cb.stack, rax
+ | mov CARG2, rsp
+ | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok.
+ | mov CARG1, CTSTATE
+ | call extern lj_ccallback_enter // (CTState *cts, void *cf)
+ | // lua_State * returned in eax (RD).
+ | set_vmstate INTERP
+ | mov BASE, L:RD->base
+ | mov RD, L:RD->top
+ | sub RD, BASE
+ | mov LFUNC:RB, [BASE-16]
+ | cleartp LFUNC:RB
+ | shr RD, 3
+ | add RD, 1
+ | ins_callt
+ |.endif
+ |
+ |->cont_ffi_callback: // Return from FFI callback.
+ |.if FFI
+ | mov L:RA, SAVE_L
+ | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
+ | mov aword CTSTATE->L, L:RA
+ | mov L:RA->base, BASE
+ | mov L:RA->top, RB
+ | mov CARG1, CTSTATE
+ | mov CARG2, RC
+ | call extern lj_ccallback_leave // (CTState *cts, TValue *o)
+ | mov rax, CTSTATE->cb.gpr[0]
+ | movsd xmm0, qword CTSTATE->cb.fpr[0]
+ | jmp ->vm_leave_unw
+ |.endif
+ |
+ |->vm_ffi_call: // Call C function via FFI.
+ | // Caveat: needs special frame unwinding, see below.
+ |.if FFI
+ | .type CCSTATE, CCallState, rbx
+ | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
+ |
+ | // Readjust stack.
+ | mov eax, CCSTATE->spadj
+ | sub rsp, rax
+ |
+ | // Copy stack slots.
+ | movzx ecx, byte CCSTATE->nsp
+ | sub ecx, 1
+ | js >2
+ |1:
+ | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
+ | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
+ | sub ecx, 1
+ | jns <1
+ |2:
+ |
+ | movzx eax, byte CCSTATE->nfpr
+ | mov CARG1, CCSTATE->gpr[0]
+ | mov CARG2, CCSTATE->gpr[1]
+ | mov CARG3, CCSTATE->gpr[2]
+ | mov CARG4, CCSTATE->gpr[3]
+ |.if not X64WIN
+ | mov CARG5, CCSTATE->gpr[4]
+ | mov CARG6, CCSTATE->gpr[5]
+ |.endif
+ | test eax, eax; jz >5
+ | movaps xmm0, CCSTATE->fpr[0]
+ | movaps xmm1, CCSTATE->fpr[1]
+ | movaps xmm2, CCSTATE->fpr[2]
+ | movaps xmm3, CCSTATE->fpr[3]
+ |.if not X64WIN
+ | cmp eax, 4; jbe >5
+ | movaps xmm4, CCSTATE->fpr[4]
+ | movaps xmm5, CCSTATE->fpr[5]
+ | movaps xmm6, CCSTATE->fpr[6]
+ | movaps xmm7, CCSTATE->fpr[7]
+ |.endif
+ |5:
+ |
+ | call aword CCSTATE->func
+ |
+ | mov CCSTATE->gpr[0], rax
+ | movaps CCSTATE->fpr[0], xmm0
+ |.if not X64WIN
+ | mov CCSTATE->gpr[1], rdx
+ | movaps CCSTATE->fpr[1], xmm1
+ |.endif
+ |
+ | mov rbx, [rbp-8]; leave; ret
+ |.endif
+ |// Note: vm_ffi_call must be the last function in this object file!
+ |
+ |//-----------------------------------------------------------------------
+}
+
+/* Generate the code for a single instruction. */
+static void build_ins(BuildCtx *ctx, BCOp op, int defop)
+{
+ int vk = 0;
+ |// Note: aligning all instructions does not pay off.
+ |=>defop:
+
+ switch (op) {
+
+ /* -- Comparison ops ---------------------------------------------------- */
+
+ /* Remember: all ops branch for a true comparison, fall through otherwise. */
+
+ |.macro jmp_comp, lt, ge, le, gt, target
+ ||switch (op) {
+ ||case BC_ISLT:
+ | lt target
+ ||break;
+ ||case BC_ISGE:
+ | ge target
+ ||break;
+ ||case BC_ISLE:
+ | le target
+ ||break;
+ ||case BC_ISGT:
+ | gt target
+ ||break;
+ ||default: break; /* Shut up GCC. */
+ ||}
+ |.endmacro
+
+ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
+ | // RA = src1, RD = src2, JMP with RD = target
+ | ins_AD
+ | mov ITYPE, [BASE+RA*8]
+ | mov RB, [BASE+RD*8]
+ | mov RA, ITYPE
+ | mov RD, RB
+ | sar ITYPE, 47
+ | sar RB, 47
+ |.if DUALNUM
+ | cmp ITYPEd, LJ_TISNUM; jne >7
+ | cmp RBd, LJ_TISNUM; jne >8
+ | add PC, 4
+ | cmp RAd, RDd
+ | jmp_comp jge, jl, jg, jle, >9
+ |6:
+ | movzx RDd, PC_RD
+ | branchPC RD
+ |9:
+ | ins_next
+ |
+ |7: // RA is not an integer.
+ | ja ->vmeta_comp
+ | // RA is a number.
+ | cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp
+ | // RA is a number, RD is an integer.
+ | cvtsi2sd xmm0, RDd
+ | jmp >2
+ |
+ |8: // RA is an integer, RD is not an integer.
+ | ja ->vmeta_comp
+ | // RA is an integer, RD is a number.
+ | cvtsi2sd xmm1, RAd
+ | movd xmm0, RD
+ | jmp >3
+ |.else
+ | cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp
+ | cmp RBd, LJ_TISNUM; jae ->vmeta_comp
+ |.endif
+ |1:
+ | movd xmm0, RD
+ |2:
+ | movd xmm1, RA
+ |3:
+ | add PC, 4
+ | ucomisd xmm0, xmm1
+ | // Unordered: all of ZF CF PF set, ordered: PF clear.
+ | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
+ |.if DUALNUM
+ | jmp_comp jbe, ja, jb, jae, <9
+ | jmp <6
+ |.else
+ | jmp_comp jbe, ja, jb, jae, >1
+ | movzx RDd, PC_RD
+ | branchPC RD
+ |1:
+ | ins_next
+ |.endif
+ break;
+
+ case BC_ISEQV: case BC_ISNEV:
+ vk = op == BC_ISEQV;
+ | ins_AD // RA = src1, RD = src2, JMP with RD = target
+ | mov RB, [BASE+RD*8]
+ | mov ITYPE, [BASE+RA*8]
+ | add PC, 4
+ | mov RD, RB
+ | mov RA, ITYPE
+ | sar RB, 47
+ | sar ITYPE, 47
+ |.if DUALNUM
+ | cmp RBd, LJ_TISNUM; jne >7
+ | cmp ITYPEd, LJ_TISNUM; jne >8
+ | cmp RDd, RAd
+ if (vk) {
+ | jne >9
+ } else {
+ | je >9
+ }
+ | movzx RDd, PC_RD
+ | branchPC RD
+ |9:
+ | ins_next
+ |
+ |7: // RD is not an integer.
+ | ja >5
+ | // RD is a number.
+ | movd xmm1, RD
+ | cmp ITYPEd, LJ_TISNUM; jb >1; jne >5
+ | // RD is a number, RA is an integer.
+ | cvtsi2sd xmm0, RAd
+ | jmp >2
+ |
+ |8: // RD is an integer, RA is not an integer.
+ | ja >5
+ | // RD is an integer, RA is a number.
+ | cvtsi2sd xmm1, RDd
+ | jmp >1
+ |
+ |.else
+ | cmp RBd, LJ_TISNUM; jae >5
+ | cmp ITYPEd, LJ_TISNUM; jae >5
+ | movd xmm1, RD
+ |.endif
+ |1:
+ | movd xmm0, RA
+ |2:
+ | ucomisd xmm0, xmm1
+ |4:
+ iseqne_fp:
+ if (vk) {
+ | jp >2 // Unordered means not equal.
+ | jne >2
+ } else {
+ | jp >2 // Unordered means not equal.
+ | je >1
+ }
+ iseqne_end:
+ if (vk) {
+ |1: // EQ: Branch to the target.
+ | movzx RDd, PC_RD
+ | branchPC RD
+ |2: // NE: Fallthrough to next instruction.
+ |.if not FFI
+ |3:
+ |.endif
+ } else {
+ |.if not FFI
+ |3:
+ |.endif
+ |2: // NE: Branch to the target.
+ | movzx RDd, PC_RD
+ | branchPC RD
+ |1: // EQ: Fallthrough to next instruction.
+ }
+ if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
+ op == BC_ISEQN || op == BC_ISNEN)) {
+ | jmp <9
+ } else {
+ | ins_next
+ }
+ |
+ if (op == BC_ISEQV || op == BC_ISNEV) {
+ |5: // Either or both types are not numbers.
+ |.if FFI
+ | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
+ | cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd
+ |.endif
+ | cmp RA, RD
+ | je <1 // Same GCobjs or pvalues?
+ | cmp RBd, ITYPEd
+ | jne <2 // Not the same type?
+ | cmp RBd, LJ_TISTABUD
+ | ja <2 // Different objects and not table/ud?
+ |
+ | // Different tables or userdatas. Need to check __eq metamethod.
+ | // Field metatable must be at same offset for GCtab and GCudata!
+ | cleartp TAB:RA
+ | mov TAB:RB, TAB:RA->metatable
+ | test TAB:RB, TAB:RB
+ | jz <2 // No metatable?
+ | test byte TAB:RB->nomm, 1<<MM_eq
+ | jnz <2 // Or 'no __eq' flag set?
+ if (vk) {
+ | xor RBd, RBd // ne = 0
+ } else {
+ | mov RBd, 1 // ne = 1
+ }
+ | jmp ->vmeta_equal // Handle __eq metamethod.
+ } else {
+ |.if FFI
+ |3:
+ | cmp ITYPEd, LJ_TCDATA
+ if (LJ_DUALNUM && vk) {
+ | jne <9
+ } else {
+ | jne <2
+ }
+ | jmp ->vmeta_equal_cd
+ |.endif
+ }
+ break;
+ case BC_ISEQS: case BC_ISNES:
+ vk = op == BC_ISEQS;
+ | ins_AND // RA = src, RD = str const, JMP with RD = target
+ | mov RB, [BASE+RA*8]
+ | add PC, 4
+ | checkstr RB, >3
+ | cmp RB, [KBASE+RD*8]
+ iseqne_test:
+ if (vk) {
+ | jne >2
+ } else {
+ | je >1
+ }
+ goto iseqne_end;
+ case BC_ISEQN: case BC_ISNEN:
+ vk = op == BC_ISEQN;
+ | ins_AD // RA = src, RD = num const, JMP with RD = target
+ | mov RB, [BASE+RA*8]
+ | add PC, 4
+ |.if DUALNUM
+ | checkint RB, >7
+ | mov RD, [KBASE+RD*8]
+ | checkint RD, >8
+ | cmp RBd, RDd
+ if (vk) {
+ | jne >9
+ } else {
+ | je >9
+ }
+ | movzx RDd, PC_RD
+ | branchPC RD
+ |9:
+ | ins_next
+ |
+ |7: // RA is not an integer.
+ | ja >3
+ | // RA is a number.
+ | mov RD, [KBASE+RD*8]
+ | checkint RD, >1
+ | // RA is a number, RD is an integer.
+ | cvtsi2sd xmm0, RDd
+ | jmp >2
+ |
+ |8: // RA is an integer, RD is a number.
+ | cvtsi2sd xmm0, RBd
+ | movd xmm1, RD
+ | ucomisd xmm0, xmm1
+ | jmp >4
+ |1:
+ | movd xmm0, RD
+ |.else
+ | checknum RB, >3
+ |1:
+ | movsd xmm0, qword [KBASE+RD*8]
+ |.endif
+ |2:
+ | ucomisd xmm0, qword [BASE+RA*8]
+ |4:
+ goto iseqne_fp;
+ case BC_ISEQP: case BC_ISNEP:
+ vk = op == BC_ISEQP;
+ | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
+ | mov RB, [BASE+RA*8]
+ | sar RB, 47
+ | add PC, 4
+ | cmp RBd, RDd
+ if (!LJ_HASFFI) goto iseqne_test;
+ if (vk) {
+ | jne >3
+ | movzx RDd, PC_RD
+ | branchPC RD
+ |2:
+ | ins_next
+ |3:
+ | cmp RBd, LJ_TCDATA; jne <2
+ | jmp ->vmeta_equal_cd
+ } else {
+ | je >2
+ | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd
+ | movzx RDd, PC_RD
+ | branchPC RD
+ |2:
+ | ins_next
+ }
+ break;
+
+ /* -- Unary test and copy ops ------------------------------------------- */
+
+ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
+ | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
+ | mov ITYPE, [BASE+RD*8]
+ | add PC, 4
+ if (op == BC_ISTC || op == BC_ISFC) {
+ | mov RB, ITYPE
+ }
+ | sar ITYPE, 47
+ | cmp ITYPEd, LJ_TISTRUECOND
+ if (op == BC_IST || op == BC_ISTC) {
+ | jae >1
+ } else {
+ | jb >1
+ }
+ if (op == BC_ISTC || op == BC_ISFC) {
+ | mov [BASE+RA*8], RB
+ }
+ | movzx RDd, PC_RD
+ | branchPC RD
+ |1: // Fallthrough to the next instruction.
+ | ins_next
+ break;
+
+ case BC_ISTYPE:
+ | ins_AD // RA = src, RD = -type
+ | mov RB, [BASE+RA*8]
+ | sar RB, 47
+ | add RBd, RDd
+ | jne ->vmeta_istype
+ | ins_next
+ break;
+ case BC_ISNUM:
+ | ins_AD // RA = src, RD = -(TISNUM-1)
+ | checknumtp [BASE+RA*8], ->vmeta_istype
+ | ins_next
+ break;
+
+ /* -- Unary ops --------------------------------------------------------- */
+
+ case BC_MOV:
+ | ins_AD // RA = dst, RD = src
+ | mov RB, [BASE+RD*8]
+ | mov [BASE+RA*8], RB
+ | ins_next_
+ break;
+ case BC_NOT:
+ | ins_AD // RA = dst, RD = src
+ | mov RB, [BASE+RD*8]
+ | sar RB, 47
+ | mov RCd, 2
+ | cmp RB, LJ_TISTRUECOND
+ | sbb RCd, 0
+ | shl RC, 47
+ | not RC
+ | mov [BASE+RA*8], RC
+ | ins_next
+ break;
+ case BC_UNM:
+ | ins_AD // RA = dst, RD = src
+ | mov RB, [BASE+RD*8]
+ |.if DUALNUM
+ | checkint RB, >5
+ | neg RBd
+ | jo >4
+ | setint RB
+ |9:
+ | mov [BASE+RA*8], RB
+ | ins_next
+ |4:
+ | mov64 RB, U64x(41e00000,00000000) // 2^31.
+ | jmp <9
+ |5:
+ | ja ->vmeta_unm
+ |.else
+ | checknum RB, ->vmeta_unm
+ |.endif
+ | mov64 RD, U64x(80000000,00000000)
+ | xor RB, RD
+ |.if DUALNUM
+ | jmp <9
+ |.else
+ | mov [BASE+RA*8], RB
+ | ins_next
+ |.endif
+ break;
+ case BC_LEN:
+ | ins_AD // RA = dst, RD = src
+ | mov RD, [BASE+RD*8]
+ | checkstr RD, >2
+ |.if DUALNUM
+ | mov RDd, dword STR:RD->len
+ |1:
+ | setint RD
+ | mov [BASE+RA*8], RD
+ |.else
+ | xorps xmm0, xmm0
+ | cvtsi2sd xmm0, dword STR:RD->len
+ |1:
+ | movsd qword [BASE+RA*8], xmm0
+ |.endif
+ | ins_next
+ |2:
+ | cmp ITYPEd, LJ_TTAB; jne ->vmeta_len
+ | mov TAB:CARG1, TAB:RD
+#if LJ_52
+ | mov TAB:RB, TAB:RD->metatable
+ | cmp TAB:RB, 0
+ | jnz >9
+ |3:
+#endif
+ |->BC_LEN_Z:
+ | mov RB, BASE // Save BASE.
+ | call extern lj_tab_len // (GCtab *t)
+ | // Length of table returned in eax (RD).
+ |.if DUALNUM
+ | // Nothing to do.
+ |.else
+ | cvtsi2sd xmm0, RDd
+ |.endif
+ | mov BASE, RB // Restore BASE.
+ | movzx RAd, PC_RA
+ | jmp <1
+#if LJ_52
+ |9: // Check for __len.
+ | test byte TAB:RB->nomm, 1<<MM_len
+ | jnz <3
+ | jmp ->vmeta_len // 'no __len' flag NOT set: check.
+#endif
+ break;
+
+ /* -- Binary ops -------------------------------------------------------- */
+
+ |.macro ins_arithpre, sseins, ssereg
+ | ins_ABC
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+ ||switch (vk) {
+ ||case 0:
+ | checknumtp [BASE+RB*8], ->vmeta_arith_vn
+ | .if DUALNUM
+ | checknumtp [KBASE+RC*8], ->vmeta_arith_vn
+ | .endif
+ | movsd xmm0, qword [BASE+RB*8]
+ | sseins ssereg, qword [KBASE+RC*8]
+ || break;
+ ||case 1:
+ | checknumtp [BASE+RB*8], ->vmeta_arith_nv
+ | .if DUALNUM
+ | checknumtp [KBASE+RC*8], ->vmeta_arith_nv
+ | .endif
+ | movsd xmm0, qword [KBASE+RC*8]
+ | sseins ssereg, qword [BASE+RB*8]
+ || break;
+ ||default:
+ | checknumtp [BASE+RB*8], ->vmeta_arith_vv
+ | checknumtp [BASE+RC*8], ->vmeta_arith_vv
+ | movsd xmm0, qword [BASE+RB*8]
+ | sseins ssereg, qword [BASE+RC*8]
+ || break;
+ ||}
+ |.endmacro
+ |
+ |.macro ins_arithdn, intins
+ | ins_ABC
+ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
+ ||switch (vk) {
+ ||case 0:
+ | mov RB, [BASE+RB*8]
+ | mov RC, [KBASE+RC*8]
+ | checkint RB, ->vmeta_arith_vno
+ | checkint RC, ->vmeta_arith_vno
+ | intins RBd, RCd; jo ->vmeta_arith_vno
+ || break;
+ ||case 1:
+ | mov RB, [BASE+RB*8]
+ | mov RC, [KBASE+RC*8]
+ | checkint RB, ->vmeta_arith_nvo
+ | checkint RC, ->vmeta_arith_nvo
+ | intins RCd, RBd; jo ->vmeta_arith_nvo
+ || break;
+ ||default:
+ | mov RB, [BASE+RB*8]
+ | mov RC, [BASE+RC*8]
+ | checkint RB, ->vmeta_arith_vvo
+ | checkint RC, ->vmeta_arith_vvo
+ | intins RBd, RCd; jo ->vmeta_arith_vvo
+ || break;
+ ||}
+ ||if (vk == 1) {
+ | setint RC
+ | mov [BASE+RA*8], RC
+ ||} else {
+ | setint RB
+ | mov [BASE+RA*8], RB
+ ||}
+ | ins_next
+ |.endmacro
+ |
+ |.macro ins_arithpost
+ | movsd qword [BASE+RA*8], xmm0
+ |.endmacro
+ |
+ |.macro ins_arith, sseins
+ | ins_arithpre sseins, xmm0
+ | ins_arithpost
+ | ins_next
+ |.endmacro
+ |
+ |.macro ins_arith, intins, sseins
+ |.if DUALNUM
+ | ins_arithdn intins
+ |.else
+ | ins_arith, sseins
+ |.endif
+ |.endmacro
+
+ | // RA = dst, RB = src1 or num const, RC = src2 or num const
+ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
+ | ins_arith add, addsd
+ break;
+ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
+ | ins_arith sub, subsd
+ break;
+ case BC_MULVN: case BC_MULNV: case BC_MULVV:
+ | ins_arith imul, mulsd
+ break;
+ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
+ | ins_arith divsd
+ break;
+ case BC_MODVN:
+ | ins_arithpre movsd, xmm1
+ |->BC_MODVN_Z:
+ | call ->vm_mod
+ | ins_arithpost
+ | ins_next
+ break;
+ case BC_MODNV: case BC_MODVV:
+ | ins_arithpre movsd, xmm1
+ | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
+ break;
+ case BC_POW:
+ | ins_arithpre movsd, xmm1
+ | mov RB, BASE
+ | call extern pow
+ | movzx RAd, PC_RA
+ | mov BASE, RB
+ | ins_arithpost
+ | ins_next
+ break;
+
+ case BC_CAT:
+ | ins_ABC // RA = dst, RB = src_start, RC = src_end
+ | mov L:CARG1, SAVE_L
+ | mov L:CARG1->base, BASE
+ | lea CARG2, [BASE+RC*8]
+ | mov CARG3d, RCd
+ | sub CARG3d, RBd
+ |->BC_CAT_Z:
+ | mov L:RB, L:CARG1
+ | mov SAVE_PC, PC
+ | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
+ | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
+ | mov BASE, L:RB->base
+ | test RC, RC
+ | jnz ->vmeta_binop
+ | movzx RBd, PC_RB // Copy result to Stk[RA] from Stk[RB].
+ | movzx RAd, PC_RA
+ | mov RC, [BASE+RB*8]
+ | mov [BASE+RA*8], RC
+ | ins_next
+ break;
+
+ /* -- Constant ops ------------------------------------------------------ */
+
+ case BC_KSTR:
+ | ins_AND // RA = dst, RD = str const (~)
+ | mov RD, [KBASE+RD*8]
+ | settp RD, LJ_TSTR
+ | mov [BASE+RA*8], RD
+ | ins_next
+ break;
+ case BC_KCDATA:
+ |.if FFI
+ | ins_AND // RA = dst, RD = cdata const (~)
+ | mov RD, [KBASE+RD*8]
+ | settp RD, LJ_TCDATA
+ | mov [BASE+RA*8], RD
+ | ins_next
+ |.endif
+ break;
+ case BC_KSHORT:
+ | ins_AD // RA = dst, RD = signed int16 literal
+ |.if DUALNUM
+ | movsx RDd, RDW
+ | setint RD
+ | mov [BASE+RA*8], RD
+ |.else
+ | movsx RDd, RDW // Sign-extend literal.
+ | cvtsi2sd xmm0, RDd
+ | movsd qword [BASE+RA*8], xmm0
+ |.endif
+ | ins_next
+ break;
+ case BC_KNUM:
+ | ins_AD // RA = dst, RD = num const
+ | movsd xmm0, qword [KBASE+RD*8]
+ | movsd qword [BASE+RA*8], xmm0
+ | ins_next
+ break;
+ case BC_KPRI:
+ | ins_AD // RA = dst, RD = primitive type (~)
+ | shl RD, 47
+ | not RD
+ | mov [BASE+RA*8], RD
+ | ins_next
+ break;
+ case BC_KNIL:
+ | ins_AD // RA = dst_start, RD = dst_end
+ | lea RA, [BASE+RA*8+8]
+ | lea RD, [BASE+RD*8]
+ | mov RB, LJ_TNIL
+ | mov [RA-8], RB // Sets minimum 2 slots.
+ |1:
+ | mov [RA], RB
+ | add RA, 8
+ | cmp RA, RD
+ | jbe <1
+ | ins_next
+ break;
+
+ /* -- Upvalue and function ops ------------------------------------------ */
+
+ case BC_UGET:
+ | ins_AD // RA = dst, RD = upvalue #
+ | mov LFUNC:RB, [BASE-16]
+ | cleartp LFUNC:RB
+ | mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)]
+ | mov RB, UPVAL:RB->v
+ | mov RD, [RB]
+ | mov [BASE+RA*8], RD
+ | ins_next
+ break;
+ case BC_USETV:
+#define TV2MARKOFS \
+ ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
+ | ins_AD // RA = upvalue #, RD = src
+ | mov LFUNC:RB, [BASE-16]
+ | cleartp LFUNC:RB
+ | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
+ | cmp byte UPVAL:RB->closed, 0
+ | mov RB, UPVAL:RB->v
+ | mov RA, [BASE+RD*8]
+ | mov [RB], RA
+ | jz >1
+ | // Check barrier for closed upvalue.
+ | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
+ | jnz >2
+ |1:
+ | ins_next
+ |
+ |2: // Upvalue is black. Check if new value is collectable and white.
+ | mov RD, RA
+ | sar RD, 47
+ | sub RDd, LJ_TISGCV
+ | cmp RDd, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
+ | jbe <1
+ | cleartp GCOBJ:RA
+ | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
+ | jz <1
+ | // Crossed a write barrier. Move the barrier forward.
+ |.if not X64WIN
+ | mov CARG2, RB
+ | mov RB, BASE // Save BASE.
+ |.else
+ | xchg CARG2, RB // Save BASE (CARG2 == BASE).
+ |.endif
+ | lea GL:CARG1, [DISPATCH+GG_DISP2G]
+ | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ | mov BASE, RB // Restore BASE.
+ | jmp <1
+ break;
+#undef TV2MARKOFS
+ case BC_USETS:
+ | ins_AND // RA = upvalue #, RD = str const (~)
+ | mov LFUNC:RB, [BASE-16]
+ | cleartp LFUNC:RB
+ | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
+ | mov STR:RA, [KBASE+RD*8]
+ | mov RD, UPVAL:RB->v
+ | settp STR:ITYPE, STR:RA, LJ_TSTR
+ | mov [RD], STR:ITYPE
+ | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
+ | jnz >2
+ |1:
+ | ins_next
+ |
+ |2: // Check if string is white and ensure upvalue is closed.
+ | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
+ | jz <1
+ | cmp byte UPVAL:RB->closed, 0
+ | jz <1
+ | // Crossed a write barrier. Move the barrier forward.
+ | mov RB, BASE // Save BASE (CARG2 == BASE).
+ | mov CARG2, RD
+ | lea GL:CARG1, [DISPATCH+GG_DISP2G]
+ | call extern lj_gc_barrieruv // (global_State *g, TValue *tv)
+ | mov BASE, RB // Restore BASE.
+ | jmp <1
+ break;
+ case BC_USETN:
+ | ins_AD // RA = upvalue #, RD = num const
+ | mov LFUNC:RB, [BASE-16]
+ | cleartp LFUNC:RB
+ | movsd xmm0, qword [KBASE+RD*8]
+ | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
+ | mov RA, UPVAL:RB->v
+ | movsd qword [RA], xmm0
+ | ins_next
+ break;
+ case BC_USETP:
+ | ins_AD // RA = upvalue #, RD = primitive type (~)
+ | mov LFUNC:RB, [BASE-16]
+ | cleartp LFUNC:RB
+ | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)]
+ | shl RD, 47
+ | not RD
+ | mov RA, UPVAL:RB->v
+ | mov [RA], RD
+ | ins_next
+ break;
+ case BC_UCLO:
+ | ins_AD // RA = level, RD = target
+ | branchPC RD // Do this first to free RD.
+ | mov L:RB, SAVE_L
+ | cmp aword L:RB->openupval, 0
+ | je >1
+ | mov L:RB->base, BASE
+ | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE
+ | mov L:CARG1, L:RB // Caveat: CARG1 == RA
+ | call extern lj_func_closeuv // (lua_State *L, TValue *level)
+ | mov BASE, L:RB->base
+ |1:
+ | ins_next
+ break;
+
+ case BC_FNEW:
+ | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE.
+ | mov CARG3, [BASE-16]
+ | cleartp CARG3
+ | mov CARG2, [KBASE+RD*8] // Fetch GCproto *.
+ | mov CARG1, L:RB
+ | mov SAVE_PC, PC
+ | // (lua_State *L, GCproto *pt, GCfuncL *parent)
+ | call extern lj_func_newL_gc
+ | // GCfuncL * returned in eax (RC).
+ | mov BASE, L:RB->base
+ | movzx RAd, PC_RA
+ | settp LFUNC:RC, LJ_TFUNC
+ | mov [BASE+RA*8], LFUNC:RC
+ | ins_next
+ break;
+
+ /* -- Table ops --------------------------------------------------------- */
+
+ case BC_TNEW:
+ | ins_AD // RA = dst, RD = hbits|asize
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE
+ | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
+ | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
+ | mov SAVE_PC, PC
+ | jae >5
+ |1:
+ | mov CARG3d, RDd
+ | and RDd, 0x7ff
+ | shr CARG3d, 11
+ | cmp RDd, 0x7ff
+ | je >3
+ |2:
+ | mov L:CARG1, L:RB
+ | mov CARG2d, RDd
+ | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
+ | // Table * returned in eax (RC).
+ | mov BASE, L:RB->base
+ | movzx RAd, PC_RA
+ | settp TAB:RC, LJ_TTAB
+ | mov [BASE+RA*8], TAB:RC
+ | ins_next
+ |3: // Turn 0x7ff into 0x801.
+ | mov RDd, 0x801
+ | jmp <2
+ |5:
+ | mov L:CARG1, L:RB
+ | call extern lj_gc_step_fixtop // (lua_State *L)
+ | movzx RDd, PC_RD
+ | jmp <1
+ break;
+ case BC_TDUP:
+ | ins_AND // RA = dst, RD = table const (~) (holding template table)
+ | mov L:RB, SAVE_L
+ | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
+ | mov SAVE_PC, PC
+ | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
+ | mov L:RB->base, BASE
+ | jae >3
+ |2:
+ | mov TAB:CARG2, [KBASE+RD*8] // Caveat: CARG2 == BASE
+ | mov L:CARG1, L:RB // Caveat: CARG1 == RA
+ | call extern lj_tab_dup // (lua_State *L, Table *kt)
+ | // Table * returned in eax (RC).
+ | mov BASE, L:RB->base
+ | movzx RAd, PC_RA
+ | settp TAB:RC, LJ_TTAB
+ | mov [BASE+RA*8], TAB:RC
+ | ins_next
+ |3:
+ | mov L:CARG1, L:RB
+ | call extern lj_gc_step_fixtop // (lua_State *L)
+ | movzx RDd, PC_RD // Need to reload RD.
+ | not RD
+ | jmp <2
+ break;
+
+ case BC_GGET:
+ | ins_AND // RA = dst, RD = str const (~)
+ | mov LFUNC:RB, [BASE-16]
+ | cleartp LFUNC:RB
+ | mov TAB:RB, LFUNC:RB->env
+ | mov STR:RC, [KBASE+RD*8]
+ | jmp ->BC_TGETS_Z
+ break;
+ case BC_GSET:
+ | ins_AND // RA = src, RD = str const (~)
+ | mov LFUNC:RB, [BASE-16]
+ | cleartp LFUNC:RB
+ | mov TAB:RB, LFUNC:RB->env
+ | mov STR:RC, [KBASE+RD*8]
+ | jmp ->BC_TSETS_Z
+ break;
+
+ case BC_TGETV:
+ | ins_ABC // RA = dst, RB = table, RC = key
+ | mov TAB:RB, [BASE+RB*8]
+ | mov RC, [BASE+RC*8]
+ | checktab TAB:RB, ->vmeta_tgetv
+ |
+ | // Integer key?
+ |.if DUALNUM
+ | checkint RC, >5
+ |.else
+ | // Convert number to int and back and compare.
+ | checknum RC, >5
+ | movd xmm0, RC
+ | cvttsd2si RCd, xmm0
+ | cvtsi2sd xmm1, RCd
+ | ucomisd xmm0, xmm1
+ | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
+ |.endif
+ | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
+ | jae ->vmeta_tgetv // Not in array part? Use fallback.
+ | shl RCd, 3
+ | add RC, TAB:RB->array
+ | // Get array slot.
+ | mov ITYPE, [RC]
+ | cmp ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath.
+ | je >2
+ |1:
+ | mov [BASE+RA*8], ITYPE
+ | ins_next
+ |
+ |2: // Check for __index if table value is nil.
+ | mov TAB:TMPR, TAB:RB->metatable
+ | test TAB:TMPR, TAB:TMPR
+ | jz <1
+ | test byte TAB:TMPR->nomm, 1<<MM_index
+ | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
+ | jmp <1
+ |
+ |5: // String key?
+ | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv
+ | cleartp STR:RC
+ | jmp ->BC_TGETS_Z
+ break;
+ case BC_TGETS:
+ | ins_ABC // RA = dst, RB = table, RC = str const (~)
+ | mov TAB:RB, [BASE+RB*8]
+ | not RC
+ | mov STR:RC, [KBASE+RC*8]
+ | checktab TAB:RB, ->vmeta_tgets
+ |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *
+ | mov TMPRd, TAB:RB->hmask
+ | and TMPRd, STR:RC->sid
+ | imul TMPRd, #NODE
+ | add NODE:TMPR, TAB:RB->node
+ | settp ITYPE, STR:RC, LJ_TSTR
+ |1:
+ | cmp NODE:TMPR->key, ITYPE
+ | jne >4
+ | // Get node value.
+ | mov ITYPE, NODE:TMPR->val
+ | cmp ITYPE, LJ_TNIL
+ | je >5 // Key found, but nil value?
+ |2:
+ | mov [BASE+RA*8], ITYPE
+ | ins_next
+ |
+ |4: // Follow hash chain.
+ | mov NODE:TMPR, NODE:TMPR->next
+ | test NODE:TMPR, NODE:TMPR
+ | jnz <1
+ | // End of hash chain: key not found, nil result.
+ | mov ITYPE, LJ_TNIL
+ |
+ |5: // Check for __index if table value is nil.
+ | mov TAB:TMPR, TAB:RB->metatable
+ | test TAB:TMPR, TAB:TMPR
+ | jz <2 // No metatable: done.
+ | test byte TAB:TMPR->nomm, 1<<MM_index
+ | jnz <2 // 'no __index' flag set: done.
+ | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
+ break;
+ case BC_TGETB:
+ | ins_ABC // RA = dst, RB = table, RC = byte literal
+ | mov TAB:RB, [BASE+RB*8]
+ | checktab TAB:RB, ->vmeta_tgetb
+ | cmp RCd, TAB:RB->asize
+ | jae ->vmeta_tgetb
+ | shl RCd, 3
+ | add RC, TAB:RB->array
+ | // Get array slot.
+ | mov ITYPE, [RC]
+ | cmp ITYPE, LJ_TNIL
+ | je >2
+ |1:
+ | mov [BASE+RA*8], ITYPE
+ | ins_next
+ |
+ |2: // Check for __index if table value is nil.
+ | mov TAB:TMPR, TAB:RB->metatable
+ | test TAB:TMPR, TAB:TMPR
+ | jz <1
+ | test byte TAB:TMPR->nomm, 1<<MM_index
+ | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
+ | jmp <1
+ break;
+ case BC_TGETR:
+ | ins_ABC // RA = dst, RB = table, RC = key
+ | mov TAB:RB, [BASE+RB*8]
+ | cleartp TAB:RB
+ |.if DUALNUM
+ | mov RCd, dword [BASE+RC*8]
+ |.else
+ | cvttsd2si RCd, qword [BASE+RC*8]
+ |.endif
+ | cmp RCd, TAB:RB->asize
+ | jae ->vmeta_tgetr // Not in array part? Use fallback.
+ | shl RCd, 3
+ | add RC, TAB:RB->array
+ | // Get array slot.
+ |->BC_TGETR_Z:
+ | mov ITYPE, [RC]
+ |->BC_TGETR2_Z:
+ | mov [BASE+RA*8], ITYPE
+ | ins_next
+ break;
+
+ case BC_TSETV:
+ | ins_ABC // RA = src, RB = table, RC = key
+ | mov TAB:RB, [BASE+RB*8]
+ | mov RC, [BASE+RC*8]
+ | checktab TAB:RB, ->vmeta_tsetv
+ |
+ | // Integer key?
+ |.if DUALNUM
+ | checkint RC, >5
+ |.else
+ | // Convert number to int and back and compare.
+ | checknum RC, >5
+ | movd xmm0, RC
+ | cvttsd2si RCd, xmm0
+ | cvtsi2sd xmm1, RCd
+ | ucomisd xmm0, xmm1
+ | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
+ |.endif
+ | cmp RCd, TAB:RB->asize // Takes care of unordered, too.
+ | jae ->vmeta_tsetv
+ | shl RCd, 3
+ | add RC, TAB:RB->array
+ | cmp aword [RC], LJ_TNIL
+ | je >3 // Previous value is nil?
+ |1:
+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jnz >7
+ |2: // Set array slot.
+ | mov RB, [BASE+RA*8]
+ | mov [RC], RB
+ | ins_next
+ |
+ |3: // Check for __newindex if previous value is nil.
+ | mov TAB:TMPR, TAB:RB->metatable
+ | test TAB:TMPR, TAB:TMPR
+ | jz <1
+ | test byte TAB:TMPR->nomm, 1<<MM_newindex
+ | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
+ | jmp <1
+ |
+ |5: // String key?
+ | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv
+ | cleartp STR:RC
+ | jmp ->BC_TSETS_Z
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, TMPR
+ | jmp <2
+ break;
+ case BC_TSETS:
+ | ins_ABC // RA = src, RB = table, RC = str const (~)
+ | mov TAB:RB, [BASE+RB*8]
+ | not RC
+ | mov STR:RC, [KBASE+RC*8]
+ | checktab TAB:RB, ->vmeta_tsets
+ |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *
+ | mov TMPRd, TAB:RB->hmask
+ | and TMPRd, STR:RC->sid
+ | imul TMPRd, #NODE
+ | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
+ | add NODE:TMPR, TAB:RB->node
+ | settp ITYPE, STR:RC, LJ_TSTR
+ |1:
+ | cmp NODE:TMPR->key, ITYPE
+ | jne >5
+ | // Ok, key found. Assumes: offsetof(Node, val) == 0
+ | cmp aword [TMPR], LJ_TNIL
+ | je >4 // Previous value is nil?
+ |2:
+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jnz >7
+ |3: // Set node value.
+ | mov ITYPE, [BASE+RA*8]
+ | mov [TMPR], ITYPE
+ | ins_next
+ |
+ |4: // Check for __newindex if previous value is nil.
+ | mov TAB:ITYPE, TAB:RB->metatable
+ | test TAB:ITYPE, TAB:ITYPE
+ | jz <2
+ | test byte TAB:ITYPE->nomm, 1<<MM_newindex
+ | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
+ | jmp <2
+ |
+ |5: // Follow hash chain.
+ | mov NODE:TMPR, NODE:TMPR->next
+ | test NODE:TMPR, NODE:TMPR
+ | jnz <1
+ | // End of hash chain: key not found, add a new one.
+ |
+ | // But check for __newindex first.
+ | mov TAB:TMPR, TAB:RB->metatable
+ | test TAB:TMPR, TAB:TMPR
+ | jz >6 // No metatable: continue.
+ | test byte TAB:TMPR->nomm, 1<<MM_newindex
+ | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
+ |6:
+ | mov TMP1, ITYPE
+ | mov L:CARG1, SAVE_L
+ | mov L:CARG1->base, BASE
+ | lea CARG3, TMP1
+ | mov CARG2, TAB:RB
+ | mov SAVE_PC, PC
+ | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
+ | // Handles write barrier for the new key. TValue * returned in eax (RC).
+ | mov L:CARG1, SAVE_L
+ | mov BASE, L:CARG1->base
+ | mov TMPR, rax
+ | movzx RAd, PC_RA
+ | jmp <2 // Must check write barrier for value.
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, ITYPE
+ | jmp <3
+ break;
+ case BC_TSETB:
+ | ins_ABC // RA = src, RB = table, RC = byte literal
+ | mov TAB:RB, [BASE+RB*8]
+ | checktab TAB:RB, ->vmeta_tsetb
+ | cmp RCd, TAB:RB->asize
+ | jae ->vmeta_tsetb
+ | shl RCd, 3
+ | add RC, TAB:RB->array
+ | cmp aword [RC], LJ_TNIL
+ | je >3 // Previous value is nil?
+ |1:
+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jnz >7
+ |2: // Set array slot.
+ | mov ITYPE, [BASE+RA*8]
+ | mov [RC], ITYPE
+ | ins_next
+ |
+ |3: // Check for __newindex if previous value is nil.
+ | mov TAB:TMPR, TAB:RB->metatable
+ | test TAB:TMPR, TAB:TMPR
+ | jz <1
+ | test byte TAB:TMPR->nomm, 1<<MM_newindex
+ | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
+ | jmp <1
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, TMPR
+ | jmp <2
+ break;
+ case BC_TSETR:
+ | ins_ABC // RA = src, RB = table, RC = key
+ | mov TAB:RB, [BASE+RB*8]
+ | cleartp TAB:RB
+ |.if DUALNUM
+ | mov RC, [BASE+RC*8]
+ |.else
+ | cvttsd2si RCd, qword [BASE+RC*8]
+ |.endif
+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jnz >7
+ |2:
+ | cmp RCd, TAB:RB->asize
+ | jae ->vmeta_tsetr
+ | shl RCd, 3
+ | add RC, TAB:RB->array
+ | // Set array slot.
+ |->BC_TSETR_Z:
+ | mov ITYPE, [BASE+RA*8]
+ | mov [RC], ITYPE
+ | ins_next
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, TMPR
+ | jmp <2
+ break;
+
+ case BC_TSETM:
+ | ins_AD // RA = base (table at base-1), RD = num const (start index)
+ |1:
+ | mov TMPRd, dword [KBASE+RD*8] // Integer constant is in lo-word.
+ | lea RA, [BASE+RA*8]
+ | mov TAB:RB, [RA-8] // Guaranteed to be a table.
+ | cleartp TAB:RB
+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jnz >7
+ |2:
+ | mov RDd, MULTRES
+ | sub RDd, 1
+ | jz >4 // Nothing to copy?
+ | add RDd, TMPRd // Compute needed size.
+ | cmp RDd, TAB:RB->asize
+ | ja >5 // Doesn't fit into array part?
+ | sub RDd, TMPRd
+ | shl TMPRd, 3
+ | add TMPR, TAB:RB->array
+ |3: // Copy result slots to table.
+ | mov RB, [RA]
+ | add RA, 8
+ | mov [TMPR], RB
+ | add TMPR, 8
+ | sub RDd, 1
+ | jnz <3
+ |4:
+ | ins_next
+ |
+ |5: // Need to resize array part.
+ | mov L:CARG1, SAVE_L
+ | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE.
+ | mov CARG2, TAB:RB
+ | mov CARG3d, RDd
+ | mov L:RB, L:CARG1
+ | mov SAVE_PC, PC
+ | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
+ | mov BASE, L:RB->base
+ | movzx RAd, PC_RA // Restore RA.
+ | movzx RDd, PC_RD // Restore RD.
+ | jmp <1 // Retry.
+ |
+ |7: // Possible table write barrier for any value. Skip valiswhite check.
+ | barrierback TAB:RB, RD
+ | jmp <2
+ break;
+
+ /* -- Calls and vararg handling ----------------------------------------- */
+
+ case BC_CALL: case BC_CALLM:
+ | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
+ if (op == BC_CALLM) {
+ | add NARGS:RDd, MULTRES
+ }
+ | mov LFUNC:RB, [BASE+RA*8]
+ | checkfunc LFUNC:RB, ->vmeta_call_ra
+ | lea BASE, [BASE+RA*8+16]
+ | ins_call
+ break;
+
+ case BC_CALLMT:
+ | ins_AD // RA = base, RD = extra_nargs
+ | add NARGS:RDd, MULTRES
+ | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
+ break;
+ case BC_CALLT:
+ | ins_AD // RA = base, RD = nargs+1
+ | lea RA, [BASE+RA*8+16]
+ | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
+ | mov LFUNC:RB, [RA-16]
+ | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call
+ |->BC_CALLT_Z:
+ | mov PC, [BASE-8]
+ | test PCd, FRAME_TYPE
+ | jnz >7
+ |1:
+ | mov [BASE-16], LFUNC:RB // Copy func+tag down, reloaded below.
+ | mov MULTRES, NARGS:RDd
+ | sub NARGS:RDd, 1
+ | jz >3
+ |2: // Move args down.
+ | mov RB, [RA]
+ | add RA, 8
+ | mov [KBASE], RB
+ | add KBASE, 8
+ | sub NARGS:RDd, 1
+ | jnz <2
+ |
+ | mov LFUNC:RB, [BASE-16]
+ |3:
+ | cleartp LFUNC:RB
+ | mov NARGS:RDd, MULTRES
+ | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
+ | ja >5
+ |4:
+ | ins_callt
+ |
+ |5: // Tailcall to a fast function.
+ | test PCd, FRAME_TYPE // Lua frame below?
+ | jnz <4
+ | movzx RAd, PC_RA
+ | neg RA
+ | mov LFUNC:KBASE, [BASE+RA*8-32] // Need to prepare KBASE.
+ | cleartp LFUNC:KBASE
+ | mov KBASE, LFUNC:KBASE->pc
+ | mov KBASE, [KBASE+PC2PROTO(k)]
+ | jmp <4
+ |
+ |7: // Tailcall from a vararg function.
+ | sub PC, FRAME_VARG
+ | test PCd, FRAME_TYPEP
+ | jnz >8 // Vararg frame below?
+ | sub BASE, PC // Need to relocate BASE/KBASE down.
+ | mov KBASE, BASE
+ | mov PC, [BASE-8]
+ | jmp <1
+ |8:
+ | add PCd, FRAME_VARG
+ | jmp <1
+ break;
+
+ case BC_ITERC:
+ | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
+ | lea RA, [BASE+RA*8+16] // fb = base+2
+ | mov RB, [RA-32] // Copy state. fb[0] = fb[-4].
+ | mov RC, [RA-24] // Copy control var. fb[1] = fb[-3].
+ | mov [RA], RB
+ | mov [RA+8], RC
+ | mov LFUNC:RB, [RA-40] // Copy callable. fb[-2] = fb[-5]
+ | mov [RA-16], LFUNC:RB
+ | mov NARGS:RDd, 2+1 // Handle like a regular 2-arg call.
+ | checkfunc LFUNC:RB, ->vmeta_call
+ | mov BASE, RA
+ | ins_call
+ break;
+
+ case BC_ITERN:
+ |.if JIT
+ | hotloop RBd
+ |.endif
+ |->vm_IITERN:
+ | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
+ | mov TAB:RB, [BASE+RA*8-16]
+ | cleartp TAB:RB
+ | mov RCd, [BASE+RA*8-8] // Get index from control var.
+ | mov TMPRd, TAB:RB->asize
+ | add PC, 4
+ | mov ITYPE, TAB:RB->array
+ |1: // Traverse array part.
+ | cmp RCd, TMPRd; jae >5 // Index points after array part?
+ | cmp aword [ITYPE+RC*8], LJ_TNIL; je >4
+ |.if not DUALNUM
+ | cvtsi2sd xmm0, RCd
+ |.endif
+ | // Copy array slot to returned value.
+ | mov RB, [ITYPE+RC*8]
+ | mov [BASE+RA*8+8], RB
+ | // Return array index as a numeric key.
+ |.if DUALNUM
+ | setint ITYPE, RC
+ | mov [BASE+RA*8], ITYPE
+ |.else
+ | movsd qword [BASE+RA*8], xmm0
+ |.endif
+ | add RCd, 1
+ | mov [BASE+RA*8-8], RCd // Update control var.
+ |2:
+ | movzx RDd, PC_RD // Get target from ITERL.
+ | branchPC RD
+ |3:
+ | ins_next
+ |
+ |4: // Skip holes in array part.
+ | add RCd, 1
+ | jmp <1
+ |
+ |5: // Traverse hash part.
+ | sub RCd, TMPRd
+ |6:
+ | cmp RCd, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1.
+ | imul ITYPEd, RCd, #NODE
+ | add NODE:ITYPE, TAB:RB->node
+ | cmp aword NODE:ITYPE->val, LJ_TNIL; je >7
+ | lea TMPRd, [RCd+TMPRd+1]
+ | // Copy key and value from hash slot.
+ | mov RB, NODE:ITYPE->key
+ | mov RC, NODE:ITYPE->val
+ | mov [BASE+RA*8], RB
+ | mov [BASE+RA*8+8], RC
+ | mov [BASE+RA*8-8], TMPRd
+ | jmp <2
+ |
+ |7: // Skip holes in hash part.
+ | add RCd, 1
+ | jmp <6
+ break;
+
+ case BC_ISNEXT:
+ | ins_AD // RA = base, RD = target (points to ITERN)
+ | mov CFUNC:RB, [BASE+RA*8-24]
+ | checkfunc CFUNC:RB, >5
+ | checktptp [BASE+RA*8-16], LJ_TTAB, >5
+ | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5
+ | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
+ | branchPC RD
+ | mov64 TMPR, ((uint64_t)LJ_KEYINDEX << 32)
+ | mov [BASE+RA*8-8], TMPR // Initialize control var.
+ |1:
+ | ins_next
+ |5: // Despecialize bytecode if any of the checks fail.
+ | mov PC_OP, BC_JMP
+ | branchPC RD
+ |.if JIT
+ | cmp byte [PC], BC_ITERN
+ | jne >6
+ |.endif
+ | mov byte [PC], BC_ITERC
+ | jmp <1
+ |.if JIT
+ |6: // Unpatch JLOOP.
+ | mov RA, [DISPATCH+DISPATCH_J(trace)]
+ | movzx RCd, word [PC+2]
+ | mov TRACE:RA, [RA+RC*8]
+ | mov eax, TRACE:RA->startins
+ | mov al, BC_ITERC
+ | mov dword [PC], eax
+ | jmp <1
+ |.endif
+ break;
+
+ case BC_VARG:
+ | ins_ABC // RA = base, RB = nresults+1, RC = numparams
+ | lea TMPR, [BASE+RC*8+(16+FRAME_VARG)]
+ | lea RA, [BASE+RA*8]
+ | sub TMPR, [BASE-8]
+ | // Note: TMPR may now be even _above_ BASE if nargs was < numparams.
+ | test RB, RB
+ | jz >5 // Copy all varargs?
+ | lea RB, [RA+RB*8-8]
+ | cmp TMPR, BASE // No vararg slots?
+ | jnb >2
+ |1: // Copy vararg slots to destination slots.
+ | mov RC, [TMPR-16]
+ | add TMPR, 8
+ | mov [RA], RC
+ | add RA, 8
+ | cmp RA, RB // All destination slots filled?
+ | jnb >3
+ | cmp TMPR, BASE // No more vararg slots?
+ | jb <1
+ |2: // Fill up remainder with nil.
+ | mov aword [RA], LJ_TNIL
+ | add RA, 8
+ | cmp RA, RB
+ | jb <2
+ |3:
+ | ins_next
+ |
+ |5: // Copy all varargs.
+ | mov MULTRES, 1 // MULTRES = 0+1
+ | mov RC, BASE
+ | sub RC, TMPR
+ | jbe <3 // No vararg slots?
+ | mov RBd, RCd
+ | shr RBd, 3
+ | add RBd, 1
+ | mov MULTRES, RBd // MULTRES = #varargs+1
+ | mov L:RB, SAVE_L
+ | add RC, RA
+ | cmp RC, L:RB->maxstack
+ | ja >7 // Need to grow stack?
+ |6: // Copy all vararg slots.
+ | mov RC, [TMPR-16]
+ | add TMPR, 8
+ | mov [RA], RC
+ | add RA, 8
+ | cmp TMPR, BASE // No more vararg slots?
+ | jb <6
+ | jmp <3
+ |
+ |7: // Grow stack for varargs.
+ | mov L:RB->base, BASE
+ | mov L:RB->top, RA
+ | mov SAVE_PC, PC
+ | sub TMPR, BASE // Need delta, because BASE may change.
+ | mov TMP1hi, TMPRd
+ | mov CARG2d, MULTRES
+ | sub CARG2d, 1
+ | mov CARG1, L:RB
+ | call extern lj_state_growstack // (lua_State *L, int n)
+ | mov BASE, L:RB->base
+ | movsxd TMPR, TMP1hi
+ | mov RA, L:RB->top
+ | add TMPR, BASE
+ | jmp <6
+ break;
+
+ /* -- Returns ----------------------------------------------------------- */
+
+ case BC_RETM:
+ | ins_AD // RA = results, RD = extra_nresults
+ | add RDd, MULTRES // MULTRES >=1, so RD >=1.
+ | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
+ break;
+
+ case BC_RET: case BC_RET0: case BC_RET1:
+ | ins_AD // RA = results, RD = nresults+1
+ if (op != BC_RET0) {
+ | shl RAd, 3
+ }
+ |1:
+ | mov PC, [BASE-8]
+ | mov MULTRES, RDd // Save nresults+1.
+ | test PCd, FRAME_TYPE // Check frame type marker.
+ | jnz >7 // Not returning to a fixarg Lua func?
+ switch (op) {
+ case BC_RET:
+ |->BC_RET_Z:
+ | mov KBASE, BASE // Use KBASE for result move.
+ | sub RDd, 1
+ | jz >3
+ |2: // Move results down.
+ | mov RB, [KBASE+RA]
+ | mov [KBASE-16], RB
+ | add KBASE, 8
+ | sub RDd, 1
+ | jnz <2
+ |3:
+ | mov RDd, MULTRES // Note: MULTRES may be >255.
+ | movzx RBd, PC_RB // So cannot compare with RDL!
+ |5:
+ | cmp RBd, RDd // More results expected?
+ | ja >6
+ break;
+ case BC_RET1:
+ | mov RB, [BASE+RA]
+ | mov [BASE-16], RB
+ /* fallthrough */
+ case BC_RET0:
+ |5:
+ | cmp PC_RB, RDL // More results expected?
+ | ja >6
+ default:
+ break;
+ }
+ | movzx RAd, PC_RA
+ | neg RA
+ | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8
+ | mov LFUNC:KBASE, [BASE-16]
+ | cleartp LFUNC:KBASE
+ | mov KBASE, LFUNC:KBASE->pc
+ | mov KBASE, [KBASE+PC2PROTO(k)]
+ | ins_next
+ |
+ |6: // Fill up results with nil.
+ if (op == BC_RET) {
+ | mov aword [KBASE-16], LJ_TNIL // Note: relies on shifted base.
+ | add KBASE, 8
+ } else {
+ | mov aword [BASE+RD*8-24], LJ_TNIL
+ }
+ | add RD, 1
+ | jmp <5
+ |
+ |7: // Non-standard return case.
+ | lea RB, [PC-FRAME_VARG]
+ | test RBd, FRAME_TYPEP
+ | jnz ->vm_return
+ | // Return from vararg function: relocate BASE down and RA up.
+ | sub BASE, RB
+ if (op != BC_RET0) {
+ | add RA, RB
+ }
+ | jmp <1
+ break;
+
+ /* -- Loops and branches ------------------------------------------------ */
+
+ |.define FOR_IDX, [RA]
+ |.define FOR_STOP, [RA+8]
+ |.define FOR_STEP, [RA+16]
+ |.define FOR_EXT, [RA+24]
+
+ case BC_FORL:
+ |.if JIT
+ | hotloop RBd
+ |.endif
+ | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
+ break;
+
+ case BC_JFORI:
+ case BC_JFORL:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_FORI:
+ case BC_IFORL:
+ vk = (op == BC_IFORL || op == BC_JFORL);
+ | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
+ | lea RA, [BASE+RA*8]
+ if (LJ_DUALNUM) {
+ | mov RB, FOR_IDX
+ | checkint RB, >9
+ | mov TMPR, FOR_STOP
+ if (!vk) {
+ | checkint TMPR, ->vmeta_for
+ | mov ITYPE, FOR_STEP
+ | test ITYPEd, ITYPEd; js >5
+ | sar ITYPE, 47;
+ | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
+ } else {
+#ifdef LUA_USE_ASSERT
+ | checkinttp FOR_STOP, ->assert_bad_for_arg_type
+ | checkinttp FOR_STEP, ->assert_bad_for_arg_type
+#endif
+ | mov ITYPE, FOR_STEP
+ | test ITYPEd, ITYPEd; js >5
+ | add RBd, ITYPEd; jo >1
+ | setint RB
+ | mov FOR_IDX, RB
+ }
+ | cmp RBd, TMPRd
+ | mov FOR_EXT, RB
+ if (op == BC_FORI) {
+ | jle >7
+ |1:
+ |6:
+ | branchPC RD
+ } else if (op == BC_JFORI) {
+ | branchPC RD
+ | movzx RDd, PC_RD
+ | jle =>BC_JLOOP
+ |1:
+ |6:
+ } else if (op == BC_IFORL) {
+ | jg >7
+ |6:
+ | branchPC RD
+ |1:
+ } else {
+ | jle =>BC_JLOOP
+ |1:
+ |6:
+ }
+ |7:
+ | ins_next
+ |
+ |5: // Invert check for negative step.
+ if (!vk) {
+ | sar ITYPE, 47;
+ | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for
+ } else {
+ | add RBd, ITYPEd; jo <1
+ | setint RB
+ | mov FOR_IDX, RB
+ }
+ | cmp RBd, TMPRd
+ | mov FOR_EXT, RB
+ if (op == BC_FORI) {
+ | jge <7
+ } else if (op == BC_JFORI) {
+ | branchPC RD
+ | movzx RDd, PC_RD
+ | jge =>BC_JLOOP
+ } else if (op == BC_IFORL) {
+ | jl <7
+ } else {
+ | jge =>BC_JLOOP
+ }
+ | jmp <6
+ |9: // Fallback to FP variant.
+ if (!vk) {
+ | jae ->vmeta_for
+ }
+ } else if (!vk) {
+ | checknumtp FOR_IDX, ->vmeta_for
+ }
+ if (!vk) {
+ | checknumtp FOR_STOP, ->vmeta_for
+ } else {
+#ifdef LUA_USE_ASSERT
+ | checknumtp FOR_STOP, ->assert_bad_for_arg_type
+ | checknumtp FOR_STEP, ->assert_bad_for_arg_type
+#endif
+ }
+ | mov RB, FOR_STEP
+ if (!vk) {
+ | checknum RB, ->vmeta_for
+ }
+ | movsd xmm0, qword FOR_IDX
+ | movsd xmm1, qword FOR_STOP
+ if (vk) {
+ | addsd xmm0, qword FOR_STEP
+ | movsd qword FOR_IDX, xmm0
+ | test RB, RB; js >3
+ } else {
+ | jl >3
+ }
+ | ucomisd xmm1, xmm0
+ |1:
+ | movsd qword FOR_EXT, xmm0
+ if (op == BC_FORI) {
+ |.if DUALNUM
+ | jnb <7
+ |.else
+ | jnb >2
+ | branchPC RD
+ |.endif
+ } else if (op == BC_JFORI) {
+ | branchPC RD
+ | movzx RDd, PC_RD
+ | jnb =>BC_JLOOP
+ } else if (op == BC_IFORL) {
+ |.if DUALNUM
+ | jb <7
+ |.else
+ | jb >2
+ | branchPC RD
+ |.endif
+ } else {
+ | jnb =>BC_JLOOP
+ }
+ |.if DUALNUM
+ | jmp <6
+ |.else
+ |2:
+ | ins_next
+ |.endif
+ |
+ |3: // Invert comparison if step is negative.
+ | ucomisd xmm0, xmm1
+ | jmp <1
+ break;
+
+ case BC_ITERL:
+ |.if JIT
+ | hotloop RBd
+ |.endif
+ | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
+ break;
+
+ case BC_JITERL:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_IITERL:
+ | ins_AJ // RA = base, RD = target
+ | lea RA, [BASE+RA*8]
+ | mov RB, [RA]
+ | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
+ if (op == BC_JITERL) {
+ | mov [RA-8], RB
+ | jmp =>BC_JLOOP
+ } else {
+ | branchPC RD // Otherwise save control var + branch.
+ | mov [RA-8], RB
+ }
+ |1:
+ | ins_next
+ break;
+
+ case BC_LOOP:
+ | ins_A // RA = base, RD = target (loop extent)
+ | // Note: RA/RD is only used by trace recorder to determine scope/extent
+ | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
+ |.if JIT
+ | hotloop RBd
+ |.endif
+ | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
+ break;
+
+ case BC_ILOOP:
+ | ins_A // RA = base, RD = target (loop extent)
+ | ins_next
+ break;
+
+ case BC_JLOOP:
+ |.if JIT
+ | ins_AD // RA = base (ignored), RD = traceno
+ | mov RA, [DISPATCH+DISPATCH_J(trace)]
+ | mov TRACE:RD, [RA+RD*8]
+ | mov RD, TRACE:RD->mcode
+ | mov L:RB, SAVE_L
+ | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
+ | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
+ | // Save additional callee-save registers only used in compiled code.
+ |.if X64WIN
+ | mov CSAVE_4, r12
+ | mov CSAVE_3, r13
+ | mov CSAVE_2, r14
+ | mov CSAVE_1, r15
+ | mov RA, rsp
+ | sub rsp, 10*16+4*8
+ | movdqa [RA-1*16], xmm6
+ | movdqa [RA-2*16], xmm7
+ | movdqa [RA-3*16], xmm8
+ | movdqa [RA-4*16], xmm9
+ | movdqa [RA-5*16], xmm10
+ | movdqa [RA-6*16], xmm11
+ | movdqa [RA-7*16], xmm12
+ | movdqa [RA-8*16], xmm13
+ | movdqa [RA-9*16], xmm14
+ | movdqa [RA-10*16], xmm15
+ |.else
+ | sub rsp, 16
+ | mov [rsp+16], r12
+ | mov [rsp+8], r13
+ |.endif
+ | jmp RD
+ |.endif
+ break;
+
+ case BC_JMP:
+ | ins_AJ // RA = unused, RD = target
+ | branchPC RD
+ | ins_next
+ break;
+
+ /* -- Function headers -------------------------------------------------- */
+
+ /*
+ ** Reminder: A function may be called with func/args above L->maxstack,
+ ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
+ ** too. This means all FUNC* ops (including fast functions) must check
+ ** for stack overflow _before_ adding more slots!
+ */
+
+ case BC_FUNCF:
+ |.if JIT
+ | hotcall RBd
+ |.endif
+ case BC_FUNCV: /* NYI: compiled vararg functions. */
+ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
+ break;
+
+ case BC_JFUNCF:
+#if !LJ_HASJIT
+ break;
+#endif
+ case BC_IFUNCF:
+ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
+ | mov KBASE, [PC-4+PC2PROTO(k)]
+ | mov L:RB, SAVE_L
+ | lea RA, [BASE+RA*8] // Top of frame.
+ | cmp RA, L:RB->maxstack
+ | ja ->vm_growstack_f
+ | movzx RAd, byte [PC-4+PC2PROTO(numparams)]
+ | cmp NARGS:RDd, RAd // Check for missing parameters.
+ | jbe >3
+ |2:
+ if (op == BC_JFUNCF) {
+ | movzx RDd, PC_RD
+ | jmp =>BC_JLOOP
+ } else {
+ | ins_next
+ }
+ |
+ |3: // Clear missing parameters.
+ | mov aword [BASE+NARGS:RD*8-8], LJ_TNIL
+ | add NARGS:RDd, 1
+ | cmp NARGS:RDd, RAd
+ | jbe <3
+ | jmp <2
+ break;
+
+ case BC_JFUNCV:
+#if !LJ_HASJIT
+ break;
+#endif
+ | int3 // NYI: compiled vararg functions
+ break; /* NYI: compiled vararg functions. */
+
+ case BC_IFUNCV:
+ | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
+ | lea RBd, [NARGS:RD*8+FRAME_VARG+8]
+ | lea RD, [BASE+NARGS:RD*8+8]
+ | mov LFUNC:KBASE, [BASE-16]
+ | mov [RD-8], RB // Store delta + FRAME_VARG.
+ | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC.
+ | mov L:RB, SAVE_L
+ | lea RA, [RD+RA*8]
+ | cmp RA, L:RB->maxstack
+ | ja ->vm_growstack_v // Need to grow stack.
+ | mov RA, BASE
+ | mov BASE, RD
+ | movzx RBd, byte [PC-4+PC2PROTO(numparams)]
+ | test RBd, RBd
+ | jz >2
+ | add RA, 8
+ |1: // Copy fixarg slots up to new frame.
+ | add RA, 8
+ | cmp RA, BASE
+ | jnb >3 // Less args than parameters?
+ | mov KBASE, [RA-16]
+ | mov [RD], KBASE
+ | add RD, 8
+ | mov aword [RA-16], LJ_TNIL // Clear old fixarg slot (help the GC).
+ | sub RBd, 1
+ | jnz <1
+ |2:
+ if (op == BC_JFUNCV) {
+ | movzx RDd, PC_RD
+ | jmp =>BC_JLOOP
+ } else {
+ | mov KBASE, [PC-4+PC2PROTO(k)]
+ | ins_next
+ }
+ |
+ |3: // Clear missing parameters.
+ | mov aword [RD], LJ_TNIL
+ | add RD, 8
+ | sub RBd, 1
+ | jnz <3
+ | jmp <2
+ break;
+
+ case BC_FUNCC:
+ case BC_FUNCCW:
+ | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
+ | mov CFUNC:RB, [BASE-16]
+ | cleartp CFUNC:RB
+ | mov KBASE, CFUNC:RB->f
+ | mov L:RB, SAVE_L
+ | lea RD, [BASE+NARGS:RD*8-8]
+ | mov L:RB->base, BASE
+ | lea RA, [RD+8*LUA_MINSTACK]
+ | cmp RA, L:RB->maxstack
+ | mov L:RB->top, RD
+ if (op == BC_FUNCC) {
+ | mov CARG1, L:RB // Caveat: CARG1 may be RA.
+ } else {
+ | mov CARG2, KBASE
+ | mov CARG1, L:RB // Caveat: CARG1 may be RA.
+ }
+ | ja ->vm_growstack_c // Need to grow stack.
+ | set_vmstate C
+ if (op == BC_FUNCC) {
+ | call KBASE // (lua_State *L)
+ } else {
+ | // (lua_State *L, lua_CFunction f)
+ | call aword [DISPATCH+DISPATCH_GL(wrapf)]
+ }
+ | // nresults returned in eax (RD).
+ | mov BASE, L:RB->base
+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+ | set_vmstate INTERP
+ | lea RA, [BASE+RD*8]
+ | neg RA
+ | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
+ | mov PC, [BASE-8] // Fetch PC of caller.
+ | jmp ->vm_returnc
+ break;
+
+ /* ---------------------------------------------------------------------- */
+
+ default:
+ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
+ exit(2);
+ break;
+ }
+}
+
+static int build_backend(BuildCtx *ctx)
+{
+ int op;
+ dasm_growpc(Dst, BC__MAX);
+ build_subroutines(ctx);
+ |.code_op
+ for (op = 0; op < BC__MAX; op++)
+ build_ins(ctx, (BCOp)op, op);
+ return BC__MAX;
+}
+
+/* Emit pseudo frame-info for all assembler functions. */
+static void emit_asm_debug(BuildCtx *ctx)
+{
+ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
+ switch (ctx->mode) {
+ case BUILD_elfasm:
+ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
+ fprintf(ctx->fp,
+ ".Lframe0:\n"
+ "\t.long .LECIE0-.LSCIE0\n"
+ ".LSCIE0:\n"
+ "\t.long 0xffffffff\n"
+ "\t.byte 0x1\n"
+ "\t.string \"\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -8\n"
+ "\t.byte 0x10\n"
+ "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
+ "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
+ "\t.align 8\n"
+ ".LECIE0:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE0:\n"
+ "\t.long .LEFDE0-.LASFDE0\n"
+ ".LASFDE0:\n"
+ "\t.long .Lframe0\n"
+ "\t.quad .Lbegin\n"
+ "\t.quad %d\n"
+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
+ "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
+ "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
+#if LJ_NO_UNWIND
+ "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */
+ "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */
+#endif
+ "\t.align 8\n"
+ ".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
+#if LJ_HASFFI
+ fprintf(ctx->fp,
+ ".LSFDE1:\n"
+ "\t.long .LEFDE1-.LASFDE1\n"
+ ".LASFDE1:\n"
+ "\t.long .Lframe0\n"
+ "\t.quad lj_vm_ffi_call\n"
+ "\t.quad %d\n"
+ "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
+ "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
+ "\t.align 8\n"
+ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
+#endif
+#if !LJ_NO_UNWIND
+#if LJ_TARGET_SOLARIS
+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
+#else
+ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
+#endif
+ fprintf(ctx->fp,
+ ".Lframe1:\n"
+ "\t.long .LECIE1-.LSCIE1\n"
+ ".LSCIE1:\n"
+ "\t.long 0\n"
+ "\t.byte 0x1\n"
+ "\t.string \"zPR\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -8\n"
+ "\t.byte 0x10\n"
+ "\t.uleb128 6\n" /* augmentation length */
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.long lj_err_unwind_dwarf-.\n"
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
+ "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
+ "\t.align 8\n"
+ ".LECIE1:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE2:\n"
+ "\t.long .LEFDE2-.LASFDE2\n"
+ ".LASFDE2:\n"
+ "\t.long .LASFDE2-.Lframe1\n"
+ "\t.long .Lbegin-.\n"
+ "\t.long %d\n"
+ "\t.uleb128 0\n" /* augmentation length */
+ "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
+ "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
+ "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
+ "\t.align 8\n"
+ ".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
+#if LJ_HASFFI
+ fprintf(ctx->fp,
+ ".Lframe2:\n"
+ "\t.long .LECIE2-.LSCIE2\n"
+ ".LSCIE2:\n"
+ "\t.long 0\n"
+ "\t.byte 0x1\n"
+ "\t.string \"zR\"\n"
+ "\t.uleb128 0x1\n"
+ "\t.sleb128 -8\n"
+ "\t.byte 0x10\n"
+ "\t.uleb128 1\n" /* augmentation length */
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n"
+ "\t.byte 0x80+0x10\n\t.uleb128 0x1\n"
+ "\t.align 8\n"
+ ".LECIE2:\n\n");
+ fprintf(ctx->fp,
+ ".LSFDE3:\n"
+ "\t.long .LEFDE3-.LASFDE3\n"
+ ".LASFDE3:\n"
+ "\t.long .LASFDE3-.Lframe2\n"
+ "\t.long lj_vm_ffi_call-.\n"
+ "\t.long %d\n"
+ "\t.uleb128 0\n" /* augmentation length */
+ "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
+ "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
+ "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
+ "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
+ "\t.align 8\n"
+ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
+#endif
+#endif
+ break;
+#if !LJ_NO_UNWIND
+ /* Mental note: never let Apple design an assembler.
+ ** Or a linker. Or a plastic case. But I digress.
+ */
+ case BUILD_machasm: {
+#if LJ_HASFFI
+ int fcsize = 0;
+#endif
+ int i;
+ fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
+ fprintf(ctx->fp,
+ "EH_frame1:\n"
+ "\t.set L$set$x,LECIEX-LSCIEX\n"
+ "\t.long L$set$x\n"
+ "LSCIEX:\n"
+ "\t.long 0\n"
+ "\t.byte 0x1\n"
+ "\t.ascii \"zPR\\0\"\n"
+ "\t.byte 0x1\n"
+ "\t.byte 128-8\n"
+ "\t.byte 0x10\n"
+ "\t.byte 6\n" /* augmentation length */
+ "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
+ "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
+ "\t.byte 0x80+0x10\n\t.byte 0x1\n"
+ "\t.align 3\n"
+ "LECIEX:\n\n");
+ for (i = 0; i < ctx->nsym; i++) {
+ const char *name = ctx->sym[i].name;
+ int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
+ if (size == 0) continue;
+#if LJ_HASFFI
+ if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
+#endif
+ fprintf(ctx->fp,
+ "%s.eh:\n"
+ "LSFDE%d:\n"
+ "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
+ "\t.long L$set$%d\n"
+ "LASFDE%d:\n"
+ "\t.long LASFDE%d-EH_frame1\n"
+ "\t.long %s-.\n"
+ "\t.long %d\n"
+ "\t.byte 0\n" /* augmentation length */
+ "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
+ "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
+ "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
+ "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */
+ "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */
+ "\t.align 3\n"
+ "LEFDE%d:\n\n",
+ name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
+ }
+#if LJ_HASFFI
+ if (fcsize) {
+ fprintf(ctx->fp,
+ "EH_frame2:\n"
+ "\t.set L$set$y,LECIEY-LSCIEY\n"
+ "\t.long L$set$y\n"
+ "LSCIEY:\n"
+ "\t.long 0\n"
+ "\t.byte 0x1\n"
+ "\t.ascii \"zR\\0\"\n"
+ "\t.byte 0x1\n"
+ "\t.byte 128-8\n"
+ "\t.byte 0x10\n"
+ "\t.byte 1\n" /* augmentation length */
+ "\t.byte 0x1b\n" /* pcrel|sdata4 */
+ "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n"
+ "\t.byte 0x80+0x10\n\t.byte 0x1\n"
+ "\t.align 3\n"
+ "LECIEY:\n\n");
+ fprintf(ctx->fp,
+ "_lj_vm_ffi_call.eh:\n"
+ "LSFDEY:\n"
+ "\t.set L$set$yy,LEFDEY-LASFDEY\n"
+ "\t.long L$set$yy\n"
+ "LASFDEY:\n"
+ "\t.long LASFDEY-EH_frame2\n"
+ "\t.long _lj_vm_ffi_call-.\n"
+ "\t.long %d\n"
+ "\t.byte 0\n" /* augmentation length */
+ "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */
+ "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
+ "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */
+ "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
+ "\t.align 3\n"
+ "LEFDEY:\n\n", fcsize);
+ }
+#endif
+ fprintf(ctx->fp, ".subsections_via_symbols\n");
+ }
+ break;
+#endif
+ default: /* Difficult for other modes. */
+ break;
+ }
+}
+
diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc
index 7db5e710..18ca87b5 100644
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -18,7 +18,6 @@
|
|.if P64
|.define X64, 1
-|.define SSE, 1
|.if WIN
|.define X64WIN, 1
|.endif
@@ -116,24 +115,74 @@
|.type NODE, Node
|.type NARGS, int
|.type TRACE, GCtrace
+|.type SBUF, SBuf
|
|// Stack layout while in interpreter. Must match with lj_frame.h.
|//-----------------------------------------------------------------------
|.if not X64 // x86 stack layout.
|
-|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
+|.if WIN
+|
+|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
|.macro saveregs_
| push edi; push esi; push ebx
+| push extern lj_err_unwind_win
+| fs; push dword [0]
+| fs; mov [0], esp
| sub esp, CFRAME_SPACE
|.endmacro
-|.macro saveregs
-| push ebp; saveregs_
+|.macro restoreregs
+| add esp, CFRAME_SPACE
+| fs; pop dword [0]
+| pop edi // Short for esp += 4.
+| pop ebx; pop esi; pop edi; pop ebp
+|.endmacro
+|
+|.else
+|
+|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
+|.macro saveregs_
+| push edi; push esi; push ebx
+| sub esp, CFRAME_SPACE
|.endmacro
|.macro restoreregs
| add esp, CFRAME_SPACE
| pop ebx; pop esi; pop edi; pop ebp
|.endmacro
|
+|.endif
+|
+|.macro saveregs
+| push ebp; saveregs_
+|.endmacro
+|
+|.if WIN
+|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only.
+|.define SAVE_NRES, aword [esp+aword*18]
+|.define SAVE_CFRAME, aword [esp+aword*17]
+|.define SAVE_L, aword [esp+aword*16]
+|//----- 16 byte aligned, ^^^ arguments from C caller
+|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter.
+|.define SAVE_R4, aword [esp+aword*14]
+|.define SAVE_R3, aword [esp+aword*13]
+|.define SAVE_R2, aword [esp+aword*12]
+|//----- 16 byte aligned
+|.define SAVE_R1, aword [esp+aword*11]
+|.define SEH_FUNC, aword [esp+aword*10]
+|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves.
+|.define UNUSED2, aword [esp+aword*8]
+|//----- 16 byte aligned
+|.define UNUSED1, aword [esp+aword*7]
+|.define SAVE_PC, aword [esp+aword*6]
+|.define TMP2, aword [esp+aword*5]
+|.define TMP1, aword [esp+aword*4]
+|//----- 16 byte aligned
+|.define ARG4, aword [esp+aword*3]
+|.define ARG3, aword [esp+aword*2]
+|.define ARG2, aword [esp+aword*1]
+|.define ARG1, aword [esp] //<-- esp while in interpreter.
+|//----- 16 byte aligned, ^^^ arguments for C callee
+|.else
|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
|.define SAVE_NRES, aword [esp+aword*14]
|.define SAVE_CFRAME, aword [esp+aword*13]
@@ -154,6 +203,7 @@
|.define ARG2, aword [esp+aword*1]
|.define ARG1, aword [esp] //<-- esp while in interpreter.
|//----- 16 byte aligned, ^^^ arguments for C callee
+|.endif
|
|// FPARGx overlaps ARGx and ARG(x+1) on x86.
|.define FPARG3, qword [esp+qword*1]
@@ -389,7 +439,6 @@
| fpop
|.endmacro
|
-|.macro fdup; fld st0; .endmacro
|.macro fpop1; fstp st1; .endmacro
|
|// Synthesize SSE FP constants.
@@ -552,6 +601,10 @@ static void build_subroutines(BuildCtx *ctx)
|.else
| mov eax, FCARG2 // Error return status for vm_pcall.
| mov esp, FCARG1
+ |.if WIN
+ | lea FCARG1, SEH_NEXT
+ | fs; mov [0], FCARG1
+ |.endif
|.endif
|->vm_unwind_c_eh: // Landing pad for external unwinder.
| mov L:RB, SAVE_L
@@ -575,6 +628,10 @@ static void build_subroutines(BuildCtx *ctx)
|.else
| and FCARG1, CFRAME_RAWMASK
| mov esp, FCARG1
+ |.if WIN
+ | lea FCARG1, SEH_NEXT
+ | fs; mov [0], FCARG1
+ |.endif
|.endif
|->vm_unwind_ff_eh: // Landing pad for external unwinder.
| mov L:RB, SAVE_L
@@ -588,6 +645,19 @@ static void build_subroutines(BuildCtx *ctx)
| set_vmstate INTERP
| jmp ->vm_returnc // Increments RD/MULTRES and returns.
|
+ |.if WIN and not X64
+ |->vm_rtlunwind@16: // Thin layer around RtlUnwind.
+ | // (void *cframe, void *excptrec, void *unwinder, int errcode)
+ | mov [esp], FCARG1 // Return value for RtlUnwind.
+ | push FCARG2 // Exception record for RtlUnwind.
+ | push 0 // Ignored by RtlUnwind.
+ | push dword [FCARG1+CFRAME_OFS_SEH]
+ | call extern RtlUnwind@16 // Violates ABI (clobbers too much).
+ | mov FCARG1, eax
+ | mov FCARG2, [esp+4] // errcode (for vm_unwind_c).
+ | ret // Jump to unwinder.
+ |.endif
+ |
|//-----------------------------------------------------------------------
|//-- Grow stack for calls -----------------------------------------------
|//-----------------------------------------------------------------------
@@ -643,17 +713,18 @@ static void build_subroutines(BuildCtx *ctx)
| lea KBASEa, [esp+CFRAME_RESUME]
| mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
| add DISPATCH, GG_G2DISP
- | mov L:RB->cframe, KBASEa
| mov SAVE_PC, RD // Any value outside of bytecode is ok.
| mov SAVE_CFRAME, RDa
|.if X64
| mov SAVE_NRES, RD
| mov SAVE_ERRF, RD
|.endif
+ | mov L:RB->cframe, KBASEa
| cmp byte L:RB->status, RDL
- | je >3 // Initial resume (like a call).
+ | je >2 // Initial resume (like a call).
|
| // Resume after yield (like a return).
+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
| set_vmstate INTERP
| mov byte L:RB->status, RDL
| mov BASE, L:RB->base
@@ -693,20 +764,19 @@ static void build_subroutines(BuildCtx *ctx)
| mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
|.endif
|
+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
| mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
| mov SAVE_CFRAME, KBASEa
| mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
+ | add DISPATCH, GG_G2DISP
|.if X64
| mov L:RB->cframe, rsp
|.else
| mov L:RB->cframe, esp
|.endif
|
- |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype).
- | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
- | add DISPATCH, GG_G2DISP
- |
- |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype).
+ |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
| set_vmstate INTERP
| mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
| add PC, RA
@@ -744,14 +814,17 @@ static void build_subroutines(BuildCtx *ctx)
|
| mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
| sub KBASE, L:RB->top
+ | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
| mov SAVE_ERRF, 0 // No error function.
| mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
+ | add DISPATCH, GG_G2DISP
| // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
|
|.if X64
| mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
| mov SAVE_CFRAME, KBASEa
| mov L:RB->cframe, rsp
+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
|
| call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
|.else
@@ -762,6 +835,7 @@ static void build_subroutines(BuildCtx *ctx)
| mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
| mov SAVE_CFRAME, KBASE
| mov L:RB->cframe, esp
+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
|
| call BASE // (lua_State *L, lua_CFunction func, void *ud)
|.endif
@@ -869,13 +943,9 @@ static void build_subroutines(BuildCtx *ctx)
|.if DUALNUM
| mov TMP2, LJ_TISNUM
| mov TMP1, RC
- |.elif SSE
+ |.else
| cvtsi2sd xmm0, RC
| movsd TMPQ, xmm0
- |.else
- | mov ARG4, RC
- | fild ARG4
- | fstp TMPQ
|.endif
| lea RCa, TMPQ // Store temp. TValue in TMPQ.
| jmp >1
@@ -929,6 +999,19 @@ static void build_subroutines(BuildCtx *ctx)
| mov NARGS:RD, 2+1 // 2 args for func(t, k).
| jmp ->vm_call_dispatch_f
|
+ |->vmeta_tgetr:
+ | mov FCARG1, TAB:RB
+ | mov RB, BASE // Save BASE.
+ | mov FCARG2, RC // Caveat: FCARG2 == BASE
+ | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
+ | // cTValue * or NULL returned in eax (RC).
+ | movzx RA, PC_RA
+ | mov BASE, RB // Restore BASE.
+ | test RC, RC
+ | jnz ->BC_TGETR_Z
+ | mov dword [BASE+RA*8+4], LJ_TNIL
+ | jmp ->BC_TGETR2_Z
+ |
|//-----------------------------------------------------------------------
|
|->vmeta_tsets:
@@ -948,13 +1031,9 @@ static void build_subroutines(BuildCtx *ctx)
|.if DUALNUM
| mov TMP2, LJ_TISNUM
| mov TMP1, RC
- |.elif SSE
+ |.else
| cvtsi2sd xmm0, RC
| movsd TMPQ, xmm0
- |.else
- | mov ARG4, RC
- | fild ARG4
- | fstp TMPQ
|.endif
| lea RCa, TMPQ // Store temp. TValue in TMPQ.
| jmp >1
@@ -1020,6 +1099,33 @@ static void build_subroutines(BuildCtx *ctx)
| mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
| jmp ->vm_call_dispatch_f
|
+ |->vmeta_tsetr:
+ |.if X64WIN
+ | mov L:CARG1d, SAVE_L
+ | mov CARG3d, RC
+ | mov L:CARG1d->base, BASE
+ | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
+ |.elif X64
+ | mov L:CARG1d, SAVE_L
+ | mov CARG2d, TAB:RB
+ | mov L:CARG1d->base, BASE
+ | mov RB, BASE // Save BASE.
+ | mov CARG3d, RC // Caveat: CARG3d == BASE.
+ |.else
+ | mov L:RA, SAVE_L
+ | mov ARG2, TAB:RB
+ | mov RB, BASE // Save BASE.
+ | mov ARG3, RC
+ | mov ARG1, L:RA
+ | mov L:RA->base, BASE
+ |.endif
+ | mov SAVE_PC, PC
+ | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
+ | // TValue * returned in eax (RC).
+ | movzx RA, PC_RA
+ | mov BASE, RB // Restore BASE.
+ | jmp ->BC_TSETR_Z
+ |
|//-- Comparison metamethods ---------------------------------------------
|
|->vmeta_comp:
@@ -1114,6 +1220,26 @@ static void build_subroutines(BuildCtx *ctx)
| jmp <3
|.endif
|
+ |->vmeta_istype:
+ |.if X64
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
+ | mov CARG2d, RA
+ | movzx CARG3d, PC_RD
+ | mov L:CARG1d, L:RB
+ |.else
+ | movzx RD, PC_RD
+ | mov ARG2, RA
+ | mov L:RB, SAVE_L
+ | mov ARG3, RD
+ | mov ARG1, L:RB
+ | mov L:RB->base, BASE
+ |.endif
+ | mov SAVE_PC, PC
+ | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
+ | mov BASE, L:RB->base
+ | jmp <6
+ |
|//-- Arithmetic metamethods ---------------------------------------------
|
|->vmeta_arith_vno:
@@ -1290,19 +1416,6 @@ static void build_subroutines(BuildCtx *ctx)
| cmp NARGS:RD, 2+1; jb ->fff_fallback
|.endmacro
|
- |.macro .ffunc_n, name
- | .ffunc_1 name
- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
- | fld qword [BASE]
- |.endmacro
- |
- |.macro .ffunc_n, name, op
- | .ffunc_1 name
- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
- | op
- | fld qword [BASE]
- |.endmacro
- |
|.macro .ffunc_nsse, name, op
| .ffunc_1 name
| cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
@@ -1313,14 +1426,6 @@ static void build_subroutines(BuildCtx *ctx)
| .ffunc_nsse name, movsd
|.endmacro
|
- |.macro .ffunc_nn, name
- | .ffunc_2 name
- | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
- | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
- | fld qword [BASE]
- | fld qword [BASE+8]
- |.endmacro
- |
|.macro .ffunc_nnsse, name
| .ffunc_2 name
| cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
@@ -1418,7 +1523,7 @@ static void build_subroutines(BuildCtx *ctx)
| mov dword [BASE-4], LJ_TTAB // Store metatable as default result.
| mov [BASE-8], TAB:RB
| mov RA, TAB:RB->hmask
- | and RA, STR:RC->hash
+ | and RA, STR:RC->sid
| imul RA, #NODE
| add NODE:RA, TAB:RB->node
|3: // Rearranged logic, because we expect _not_ to find the key.
@@ -1526,11 +1631,7 @@ static void build_subroutines(BuildCtx *ctx)
|.else
| jae ->fff_fallback
|.endif
- |.if SSE
| movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
- |.else
- | fld qword [BASE]; jmp ->fff_resn
- |.endif
|
|.ffunc_1 tostring
| // Only handles the string or number case inline.
@@ -1555,9 +1656,9 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
| mov L:FCARG1, L:RB
|.if DUALNUM
- | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o)
+ | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o)
|.else
- | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np)
+ | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np)
|.endif
| // GCstr returned in eax (RD).
| mov BASE, L:RB->base
@@ -1569,55 +1670,35 @@ static void build_subroutines(BuildCtx *ctx)
| je >2 // Missing 2nd arg?
|1:
| cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
- | mov L:RB, SAVE_L
- | mov L:RB->base, BASE // Add frame since C call can throw.
- | mov L:RB->top, BASE // Dummy frame length is ok.
| mov PC, [BASE-4]
+ | mov RB, BASE // Save BASE.
|.if X64WIN
- | lea CARG3d, [BASE+8]
- | mov CARG2d, [BASE] // Caveat: CARG2d == BASE.
- | mov CARG1d, L:RB
+ | mov CARG1d, [BASE]
+ | lea CARG3d, [BASE-8]
+ | lea CARG2d, [BASE+8] // Caveat: CARG2d == BASE.
|.elif X64
- | mov CARG2d, [BASE]
- | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE.
- | mov CARG1d, L:RB
+ | mov CARG1d, [BASE]
+ | lea CARG2d, [BASE+8]
+ | lea CARG3d, [BASE-8] // Caveat: CARG3d == BASE.
|.else
| mov TAB:RD, [BASE]
- | mov ARG2, TAB:RD
- | mov ARG1, L:RB
+ | mov ARG1, TAB:RD
| add BASE, 8
+ | mov ARG2, BASE
+ | sub BASE, 8+8
| mov ARG3, BASE
|.endif
- | mov SAVE_PC, PC // Needed for ITERN fallback.
- | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
- | // Flag returned in eax (RD).
- | mov BASE, L:RB->base
- | test RD, RD; jz >3 // End of traversal?
- | // Copy key and value to results.
- |.if X64
- | mov RBa, [BASE+8]
- | mov RDa, [BASE+16]
- | mov [BASE-8], RBa
- | mov [BASE], RDa
- |.else
- | mov RB, [BASE+8]
- | mov RD, [BASE+12]
- | mov [BASE-8], RB
- | mov [BASE-4], RD
- | mov RB, [BASE+16]
- | mov RD, [BASE+20]
- | mov [BASE], RB
- | mov [BASE+4], RD
- |.endif
- |->fff_res2:
- | mov RD, 1+2
- | jmp ->fff_res
+ | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
+ | // 1=found, 0=end, -1=error returned in eax (RD).
+ | mov BASE, RB // Restore BASE.
+ | test RD, RD; jg ->fff_res2 // Found key/value.
+ | js ->fff_fallback_2 // Invalid key.
+ | // End of traversal: return nil.
+ | mov dword [BASE-4], LJ_TNIL
+ | jmp ->fff_res1
|2: // Set missing 2nd arg to nil.
| mov dword [BASE+12], LJ_TNIL
| jmp <1
- |3: // End of traversal: return nil.
- | mov dword [BASE-4], LJ_TNIL
- | jmp ->fff_res1
|
|.ffunc_1 pairs
| mov TAB:RB, [BASE]
@@ -1648,19 +1729,12 @@ static void build_subroutines(BuildCtx *ctx)
| add RD, 1
| mov dword [BASE-4], LJ_TISNUM
| mov dword [BASE-8], RD
- |.elif SSE
+ |.else
| movsd xmm0, qword [BASE+8]
| sseconst_1 xmm1, RBa
| addsd xmm0, xmm1
- | cvtsd2si RD, xmm0
+ | cvttsd2si RD, xmm0
| movsd qword [BASE-8], xmm0
- |.else
- | fld qword [BASE+8]
- | fld1
- | faddp st1
- | fist ARG1
- | fstp qword [BASE-8]
- | mov RD, ARG1
|.endif
| mov TAB:RB, [BASE]
| cmp RD, TAB:RB->asize; jae >2 // Not in array part?
@@ -1678,7 +1752,9 @@ static void build_subroutines(BuildCtx *ctx)
| mov [BASE], RB
| mov [BASE+4], RD
|.endif
- | jmp ->fff_res2
+ |->fff_res2:
+ | mov RD, 1+2
+ | jmp ->fff_res
|2: // Check for empty hash part first. Otherwise call C function.
| cmp dword TAB:RB->hmask, 0; je ->fff_res0
| mov FCARG1, TAB:RB
@@ -1707,12 +1783,9 @@ static void build_subroutines(BuildCtx *ctx)
|.if DUALNUM
| mov dword [BASE+12], LJ_TISNUM
| mov dword [BASE+8], 0
- |.elif SSE
+ |.else
| xorps xmm0, xmm0
| movsd qword [BASE+8], xmm0
- |.else
- | fldz
- | fstp qword [BASE+8]
|.endif
| mov RD, 1+3
| jmp ->fff_res
@@ -1819,7 +1892,6 @@ static void build_subroutines(BuildCtx *ctx)
| mov ARG3, RA
|.endif
| call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
- | set_vmstate INTERP
|
| mov L:RB, SAVE_L
|.if X64
@@ -1828,6 +1900,9 @@ static void build_subroutines(BuildCtx *ctx)
| mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
|.endif
| mov BASE, L:RB->base
+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+ | set_vmstate INTERP
+ |
| cmp eax, LUA_YIELD
| ja >8
|4:
@@ -1942,12 +2017,10 @@ static void build_subroutines(BuildCtx *ctx)
|->fff_resi: // Dummy.
|.endif
|
- |.if SSE
|->fff_resn:
| mov PC, [BASE-4]
| fstp qword [BASE-8]
| jmp ->fff_res1
- |.endif
|
| .ffunc_1 math_abs
|.if DUALNUM
@@ -1971,8 +2044,6 @@ static void build_subroutines(BuildCtx *ctx)
|.else
| cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
|.endif
- |
- |.if SSE
| movsd xmm0, qword [BASE]
| sseconst_abs xmm1, RDa
| andps xmm0, xmm1
@@ -1980,15 +2051,6 @@ static void build_subroutines(BuildCtx *ctx)
| mov PC, [BASE-4]
| movsd qword [BASE-8], xmm0
| // fallthrough
- |.else
- | fld qword [BASE]
- | fabs
- | // fallthrough
- |->fff_resxmm0: // Dummy.
- |->fff_resn:
- | mov PC, [BASE-4]
- | fstp qword [BASE-8]
- |.endif
|
|->fff_res1:
| mov RD, 1+1
@@ -2015,6 +2077,12 @@ static void build_subroutines(BuildCtx *ctx)
| mov RAa, -8 // Results start at BASE+RA = BASE-8.
| jmp ->vm_return
|
+ |.if X64
+ |.define fff_resfp, fff_resxmm0
+ |.else
+ |.define fff_resfp, fff_resn
+ |.endif
+ |
|.macro math_round, func
| .ffunc math_ .. func
|.if DUALNUM
@@ -2025,107 +2093,75 @@ static void build_subroutines(BuildCtx *ctx)
|.else
| cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
|.endif
- |.if SSE
| movsd xmm0, qword [BASE]
- | call ->vm_ .. func
- | .if DUALNUM
- | cvtsd2si RB, xmm0
- | cmp RB, 0x80000000
- | jne ->fff_resi
- | cvtsi2sd xmm1, RB
- | ucomisd xmm0, xmm1
- | jp ->fff_resxmm0
- | je ->fff_resi
- | .endif
- | jmp ->fff_resxmm0
- |.else
- | fld qword [BASE]
- | call ->vm_ .. func
- | .if DUALNUM
- | fist ARG1
- | mov RB, ARG1
- | cmp RB, 0x80000000; jne >2
- | fdup
- | fild ARG1
- | fcomparepp
- | jp ->fff_resn
- | jne ->fff_resn
- |2:
- | fpop
- | jmp ->fff_resi
- | .else
- | jmp ->fff_resn
- | .endif
+ | call ->vm_ .. func .. _sse
+ |.if DUALNUM
+ | cvttsd2si RB, xmm0
+ | cmp RB, 0x80000000
+ | jne ->fff_resi
+ | cvtsi2sd xmm1, RB
+ | ucomisd xmm0, xmm1
+ | jp ->fff_resxmm0
+ | je ->fff_resi
|.endif
+ | jmp ->fff_resxmm0
|.endmacro
|
| math_round floor
| math_round ceil
|
- |.if SSE
|.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
- |.else
- |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
- |.endif
|
|.ffunc math_log
| cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
| cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
- | fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn
- |
- |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn
- |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn
- |
- |.ffunc_n math_sin; fsin; jmp ->fff_resn
- |.ffunc_n math_cos; fcos; jmp ->fff_resn
- |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn
- |
- |.ffunc_n math_asin
- | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan
- | jmp ->fff_resn
- |.ffunc_n math_acos
- | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan
- | jmp ->fff_resn
- |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
+ | movsd xmm0, qword [BASE]
+ |.if not X64
+ | movsd FPARG1, xmm0
+ |.endif
+ | mov RB, BASE
+ | call extern log
+ | mov BASE, RB
+ | jmp ->fff_resfp
|
|.macro math_extern, func
- |.if SSE
| .ffunc_nsse math_ .. func
- | .if not X64
- | movsd FPARG1, xmm0
- | .endif
- |.else
- | .ffunc_n math_ .. func
- | fstp FPARG1
+ |.if not X64
+ | movsd FPARG1, xmm0
|.endif
| mov RB, BASE
- | call extern lj_vm_ .. func
+ | call extern func
| mov BASE, RB
- | .if X64
- | jmp ->fff_resxmm0
- | .else
- | jmp ->fff_resn
- | .endif
+ | jmp ->fff_resfp
|.endmacro
|
+ |.macro math_extern2, func
+ | .ffunc_nnsse math_ .. func
+ |.if not X64
+ | movsd FPARG1, xmm0
+ | movsd FPARG3, xmm1
+ |.endif
+ | mov RB, BASE
+ | call extern func
+ | mov BASE, RB
+ | jmp ->fff_resfp
+ |.endmacro
+ |
+ | math_extern log10
+ | math_extern exp
+ | math_extern sin
+ | math_extern cos
+ | math_extern tan
+ | math_extern asin
+ | math_extern acos
+ | math_extern atan
| math_extern sinh
| math_extern cosh
| math_extern tanh
+ | math_extern2 pow
+ | math_extern2 atan2
+ | math_extern2 fmod
|
- |->ff_math_deg:
- |.if SSE
- |.ffunc_nsse math_rad
- | mov CFUNC:RB, [BASE-8]
- | mulsd xmm0, qword CFUNC:RB->upvalue[0]
- | jmp ->fff_resxmm0
- |.else
- |.ffunc_n math_rad
- | mov CFUNC:RB, [BASE-8]
- | fmul qword CFUNC:RB->upvalue[0]
- | jmp ->fff_resn
- |.endif
- |
- |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
|.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
|
|.ffunc_1 math_frexp
@@ -2140,65 +2176,34 @@ static void build_subroutines(BuildCtx *ctx)
| cmp RB, 0x00200000; jb >4
|1:
| shr RB, 21; sub RB, RC // Extract and unbias exponent.
- |.if SSE
| cvtsi2sd xmm0, RB
- |.else
- | mov TMP1, RB; fild TMP1
- |.endif
| mov RB, [BASE-4]
| and RB, 0x800fffff // Mask off exponent.
| or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
| mov [BASE-4], RB
|2:
- |.if SSE
| movsd qword [BASE], xmm0
- |.else
- | fstp qword [BASE]
- |.endif
| mov RD, 1+2
| jmp ->fff_res
|3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
- |.if SSE
| xorps xmm0, xmm0; jmp <2
- |.else
- | fldz; jmp <2
- |.endif
|4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
- |.if SSE
| movsd xmm0, qword [BASE]
| sseconst_hi xmm1, RBa, 43500000 // 2^54.
| mulsd xmm0, xmm1
| movsd qword [BASE-8], xmm0
- |.else
- | fld qword [BASE]
- | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
- | fstp qword [BASE-8]
- |.endif
| mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
|
- |.if SSE
|.ffunc_nsse math_modf
- |.else
- |.ffunc_n math_modf
- |.endif
| mov RB, [BASE+4]
| mov PC, [BASE-4]
| shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
- |.if SSE
| movaps xmm4, xmm0
- | call ->vm_trunc
+ | call ->vm_trunc_sse
| subsd xmm4, xmm0
|1:
| movsd qword [BASE-8], xmm0
| movsd qword [BASE], xmm4
- |.else
- | fdup
- | call ->vm_trunc
- | fsub st1, st0
- |1:
- | fstp qword [BASE-8]
- | fstp qword [BASE]
- |.endif
| mov RC, [BASE-4]; mov RB, [BASE+4]
| xor RC, RB; js >3 // Need to adjust sign?
|2:
@@ -2208,25 +2213,10 @@ static void build_subroutines(BuildCtx *ctx)
| xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
| jmp <2
|4:
- |.if SSE
| xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
- |.else
- | fldz; fxch; jmp <1 // Return +-Inf and +-0.
- |.endif
|
- |.ffunc_nnr math_fmod
- |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1
- | fpop1
- | jmp ->fff_resn
- |
- |.if SSE
- |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
- |.else
- |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
- |.endif
- |
- |.macro math_minmax, name, cmovop, fcmovop, sseop
- | .ffunc name
+ |.macro math_minmax, name, cmovop, sseop
+ | .ffunc_1 name
| mov RA, 2
| cmp dword [BASE+4], LJ_TISNUM
|.if DUALNUM
@@ -2242,12 +2232,7 @@ static void build_subroutines(BuildCtx *ctx)
|3:
| ja ->fff_fallback
| // Convert intermediate result to number and continue below.
- |.if SSE
| cvtsi2sd xmm0, RB
- |.else
- | mov TMP1, RB
- | fild TMP1
- |.endif
| jmp >6
|4:
| ja ->fff_fallback
@@ -2255,7 +2240,6 @@ static void build_subroutines(BuildCtx *ctx)
| jae ->fff_fallback
|.endif
|
- |.if SSE
| movsd xmm0, qword [BASE]
|5: // Handle numbers or integers.
| cmp RA, RD; jae ->fff_resxmm0
@@ -2274,48 +2258,13 @@ static void build_subroutines(BuildCtx *ctx)
| sseop xmm0, xmm1
| add RA, 1
| jmp <5
- |.else
- | fld qword [BASE]
- |5: // Handle numbers or integers.
- | cmp RA, RD; jae ->fff_resn
- | cmp dword [BASE+RA*8-4], LJ_TISNUM
- |.if DUALNUM
- | jb >6
- | ja >9
- | fild dword [BASE+RA*8-8]
- | jmp >7
- |.else
- | jae >9
- |.endif
- |6:
- | fld qword [BASE+RA*8-8]
- |7:
- | fucomi st1; fcmovop st1; fpop1
- | add RA, 1
- | jmp <5
- |.endif
|.endmacro
|
- | math_minmax math_min, cmovg, fcmovnbe, minsd
- | math_minmax math_max, cmovl, fcmovbe, maxsd
- |.if not SSE
- |9:
- | fpop; jmp ->fff_fallback
- |.endif
+ | math_minmax math_min, cmovg, minsd
+ | math_minmax math_max, cmovl, maxsd
|
|//-- String library -----------------------------------------------------
|
- |.ffunc_1 string_len
- | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
- | mov STR:RB, [BASE]
- |.if DUALNUM
- | mov RB, dword STR:RB->len; jmp ->fff_resi
- |.elif SSE
- | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
- |.else
- | fild dword STR:RB->len; jmp ->fff_resn
- |.endif
- |
|.ffunc string_byte // Only handle the 1-arg case here.
| cmp NARGS:RD, 1+1; jne ->fff_fallback
| cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2326,10 +2275,8 @@ static void build_subroutines(BuildCtx *ctx)
| movzx RB, byte STR:RB[1]
|.if DUALNUM
| jmp ->fff_resi
- |.elif SSE
- | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
|.else
- | mov TMP1, RB; fild TMP1; jmp ->fff_resn
+ | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
|.endif
|
|.ffunc string_char // Only handle the 1-arg case here.
@@ -2341,16 +2288,11 @@ static void build_subroutines(BuildCtx *ctx)
| mov RB, dword [BASE]
| cmp RB, 255; ja ->fff_fallback
| mov TMP2, RB
- |.elif SSE
+ |.else
| jae ->fff_fallback
| cvttsd2si RB, qword [BASE]
| cmp RB, 255; ja ->fff_fallback
| mov TMP2, RB
- |.else
- | jae ->fff_fallback
- | fld qword [BASE]
- | fistp TMP2
- | cmp TMP2, 255; ja ->fff_fallback
|.endif
|.if X64
| mov TMP3, 1
@@ -2371,6 +2313,7 @@ static void build_subroutines(BuildCtx *ctx)
|.endif
| mov SAVE_PC, PC
| call extern lj_str_new // (lua_State *L, char *str, size_t l)
+ |->fff_resstr:
| // GCstr * returned in eax (RD).
| mov BASE, L:RB->base
| mov PC, [BASE-4]
@@ -2388,14 +2331,10 @@ static void build_subroutines(BuildCtx *ctx)
| jne ->fff_fallback
| mov RB, dword [BASE+16]
| mov TMP2, RB
- |.elif SSE
+ |.else
| jae ->fff_fallback
| cvttsd2si RB, qword [BASE+16]
| mov TMP2, RB
- |.else
- | jae ->fff_fallback
- | fld qword [BASE+16]
- | fistp TMP2
|.endif
|1:
| cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
@@ -2410,12 +2349,8 @@ static void build_subroutines(BuildCtx *ctx)
| mov RB, STR:RB->len
|.if DUALNUM
| mov RA, dword [BASE+8]
- |.elif SSE
- | cvttsd2si RA, qword [BASE+8]
|.else
- | fld qword [BASE+8]
- | fistp ARG3
- | mov RA, ARG3
+ | cvttsd2si RA, qword [BASE+8]
|.endif
| mov RC, TMP2
| cmp RB, RC // len < end? (unsigned compare)
@@ -2459,136 +2394,34 @@ static void build_subroutines(BuildCtx *ctx)
| xor RC, RC // Zero length. Any ptr in RB is ok.
| jmp <4
|
- |.ffunc string_rep // Only handle the 1-char case inline.
+ |.macro ffstring_op, name
+ | .ffunc_1 string_ .. name
| ffgccheck
- | cmp NARGS:RD, 2+1; jne ->fff_fallback // Exactly 2 arguments.
| cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
- | cmp dword [BASE+12], LJ_TISNUM
- | mov STR:RB, [BASE]
- |.if DUALNUM
- | jne ->fff_fallback
- | mov RC, dword [BASE+8]
- |.elif SSE
- | jae ->fff_fallback
- | cvttsd2si RC, qword [BASE+8]
- |.else
- | jae ->fff_fallback
- | fld qword [BASE+8]
- | fistp TMP2
- | mov RC, TMP2
- |.endif
- | test RC, RC
- | jle ->fff_emptystr // Count <= 0? (or non-int)
- | cmp dword STR:RB->len, 1
- | jb ->fff_emptystr // Zero length string?
- | jne ->fff_fallback_2 // Fallback for > 1-char strings.
- | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
- | movzx RA, byte STR:RB[1]
- | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
- |.if X64
- | mov TMP3, RC
- |.else
- | mov ARG3, RC
- |.endif
- |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
- | mov [RB], RAL
- | add RB, 1
- | sub RC, 1
- | jnz <1
- | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
- | jmp ->fff_newstr
- |
- |.ffunc_1 string_reverse
- | ffgccheck
- | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
- | mov STR:RB, [BASE]
- | mov RC, STR:RB->len
- | test RC, RC
- | jz ->fff_emptystr // Zero length string?
- | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
- | add RB, #STR
- | mov TMP2, PC // Need another temp register.
- |.if X64
- | mov TMP3, RC
- |.else
- | mov ARG3, RC
- |.endif
- | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
- |1:
- | movzx RA, byte [RB]
- | add RB, 1
- | sub RC, 1
- | mov [PC+RC], RAL
- | jnz <1
- | mov RD, PC
- | mov PC, TMP2
- | jmp ->fff_newstr
- |
- |.macro ffstring_case, name, lo, hi
- | .ffunc_1 name
- | ffgccheck
- | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
- | mov STR:RB, [BASE]
- | mov RC, STR:RB->len
- | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
- | add RB, #STR
- | mov TMP2, PC // Need another temp register.
- |.if X64
- | mov TMP3, RC
- |.else
- | mov ARG3, RC
- |.endif
- | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
- | jmp >3
- |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
- | movzx RA, byte [RB+RC]
- | cmp RA, lo
- | jb >2
- | cmp RA, hi
- | ja >2
- | xor RA, 0x20
- |2:
- | mov [PC+RC], RAL
- |3:
- | sub RC, 1
- | jns <1
- | mov RD, PC
- | mov PC, TMP2
- | jmp ->fff_newstr
+ | mov L:RB, SAVE_L
+ | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
+ | mov L:RB->base, BASE
+ | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE
+ | mov RCa, SBUF:FCARG1->b
+ | mov SBUF:FCARG1->L, L:RB
+ | mov SBUF:FCARG1->w, RCa
+ | mov SAVE_PC, PC
+ | call extern lj_buf_putstr_ .. name .. @8
+ | mov FCARG1, eax
+ | call extern lj_buf_tostr@4
+ | jmp ->fff_resstr
|.endmacro
|
- |ffstring_case string_lower, 0x41, 0x5a
- |ffstring_case string_upper, 0x61, 0x7a
- |
- |//-- Table library ------------------------------------------------------
- |
- |.ffunc_1 table_getn
- | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
- | mov RB, BASE // Save BASE.
- | mov TAB:FCARG1, [BASE]
- | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
- | // Length of table returned in eax (RD).
- | mov BASE, RB // Restore BASE.
- |.if DUALNUM
- | mov RB, RD; jmp ->fff_resi
- |.elif SSE
- | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
- |.else
- | mov ARG1, RD; fild ARG1; jmp ->fff_resn
- |.endif
+ |ffstring_op reverse
+ |ffstring_op lower
+ |ffstring_op upper
|
|//-- Bit library --------------------------------------------------------
|
- |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
- |
|.macro .ffunc_bit, name, kind, fdef
| fdef name
|.if kind == 2
- |.if SSE
| sseconst_tobit xmm1, RBa
- |.else
- | mov TMP1, TOBIT_BIAS
- |.endif
|.endif
| cmp dword [BASE+4], LJ_TISNUM
|.if DUALNUM
@@ -2604,24 +2437,12 @@ static void build_subroutines(BuildCtx *ctx)
|.else
| jae ->fff_fallback
|.endif
- |.if SSE
| movsd xmm0, qword [BASE]
|.if kind < 2
| sseconst_tobit xmm1, RBa
|.endif
| addsd xmm0, xmm1
| movd RB, xmm0
- |.else
- | fld qword [BASE]
- |.if kind < 2
- | mov TMP1, TOBIT_BIAS
- |.endif
- | fadd TMP1
- | fstp FPARG1
- |.if kind > 0
- | mov RB, ARG1
- |.endif
- |.endif
|2:
|.endmacro
|
@@ -2630,15 +2451,7 @@ static void build_subroutines(BuildCtx *ctx)
|.endmacro
|
|.ffunc_bit bit_tobit, 0
- |.if DUALNUM or SSE
- |.if not SSE
- | mov RB, ARG1
- |.endif
| jmp ->fff_resbit
- |.else
- | fild ARG1
- | jmp ->fff_resn
- |.endif
|
|.macro .ffunc_bit_op, name, ins
| .ffunc_bit name, 2
@@ -2658,17 +2471,10 @@ static void build_subroutines(BuildCtx *ctx)
|.else
| jae ->fff_fallback_bit_op
|.endif
- |.if SSE
| movsd xmm0, qword [RD]
| addsd xmm0, xmm1
| movd RA, xmm0
| ins RB, RA
- |.else
- | fld qword [RD]
- | fadd TMP1
- | fstp FPARG1
- | ins RB, ARG1
- |.endif
| sub RD, 8
| jmp <1
|.endmacro
@@ -2685,15 +2491,10 @@ static void build_subroutines(BuildCtx *ctx)
| not RB
|.if DUALNUM
| jmp ->fff_resbit
- |.elif SSE
+ |.else
|->fff_resbit:
| cvtsi2sd xmm0, RB
| jmp ->fff_resxmm0
- |.else
- |->fff_resbit:
- | mov ARG1, RB
- | fild ARG1
- | jmp ->fff_resn
|.endif
|
|->fff_fallback_bit_op:
@@ -2706,22 +2507,13 @@ static void build_subroutines(BuildCtx *ctx)
| // Note: no inline conversion from number for 2nd argument!
| cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
| mov RA, dword [BASE+8]
- |.elif SSE
+ |.else
| .ffunc_nnsse name
| sseconst_tobit xmm2, RBa
| addsd xmm0, xmm2
| addsd xmm1, xmm2
| movd RB, xmm0
| movd RA, xmm1
- |.else
- | .ffunc_nn name
- | mov TMP1, TOBIT_BIAS
- | fadd TMP1
- | fstp FPARG3
- | fadd TMP1
- | fstp FPARG1
- | mov RA, ARG3
- | mov RB, ARG1
|.endif
| ins RB, cl // Assumes RA is ecx.
| jmp ->fff_resbit
@@ -2855,7 +2647,7 @@ static void build_subroutines(BuildCtx *ctx)
| mov FCARG2, PC // Caveat: FCARG2 == BASE
| mov FCARG1, L:RB
| // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
- | call extern lj_dispatch_ins@8 // (lua_State *L, BCIns *pc)
+ | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc)
|3:
| mov BASE, L:RB->base
|4:
@@ -2926,6 +2718,79 @@ static void build_subroutines(BuildCtx *ctx)
| add NARGS:RD, 1
| jmp RBa
|
+ |->cont_stitch: // Trace stitching.
+ |.if JIT
+ | // BASE = base, RC = result, RB = mbase
+ | mov TRACE:RA, [RB-24] // Save previous trace.
+ | mov TMP1, TRACE:RA
+ | mov TMP3, DISPATCH // Need one more register.
+ | mov DISPATCH, MULTRES
+ | movzx RA, PC_RA
+ | lea RA, [BASE+RA*8] // Call base.
+ | sub DISPATCH, 1
+ | jz >2
+ |1: // Move results down.
+ |.if X64
+ | mov RBa, [RC]
+ | mov [RA], RBa
+ |.else
+ | mov RB, [RC]
+ | mov [RA], RB
+ | mov RB, [RC+4]
+ | mov [RA+4], RB
+ |.endif
+ | add RC, 8
+ | add RA, 8
+ | sub DISPATCH, 1
+ | jnz <1
+ |2:
+ | movzx RC, PC_RA
+ | movzx RB, PC_RB
+ | add RC, RB
+ | lea RC, [BASE+RC*8-8]
+ |3:
+ | cmp RC, RA
+ | ja >9 // More results wanted?
+ |
+ | mov DISPATCH, TMP3
+ | mov TRACE:RD, TMP1 // Get previous trace.
+ | movzx RB, word TRACE:RD->traceno
+ | movzx RD, word TRACE:RD->link
+ | cmp RD, RB
+ | je ->cont_nop // Blacklisted.
+ | test RD, RD
+ | jne =>BC_JLOOP // Jump to stitched trace.
+ |
+ | // Stitch a new trace to the previous trace.
+ | mov [DISPATCH+DISPATCH_J(exitno)], RB
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE
+ | mov FCARG2, PC
+ | lea FCARG1, [DISPATCH+GG_DISP2J]
+ | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
+ | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc)
+ | mov BASE, L:RB->base
+ | jmp ->cont_nop
+ |
+ |9: // Fill up results with nil.
+ | mov dword [RA+4], LJ_TNIL
+ | add RA, 8
+ | jmp <3
+ |.endif
+ |
+ |->vm_profhook: // Dispatch target for profiler hook.
+#if LJ_HASPROFILE
+ | mov L:RB, SAVE_L
+ | mov L:RB->base, BASE
+ | mov FCARG2, PC // Caveat: FCARG2 == BASE
+ | mov FCARG1, L:RB
+ | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc)
+ | mov BASE, L:RB->base
+ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
+ | sub PC, 4
+ | jmp ->cont_nop
+#endif
+ |
|//-----------------------------------------------------------------------
|//-- Trace exit handler -------------------------------------------------
|//-----------------------------------------------------------------------
@@ -2978,10 +2843,9 @@ static void build_subroutines(BuildCtx *ctx)
| movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
|.endif
| // Caveat: RB is ebp.
- | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)]
+ | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
| mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
| mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
- | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
| mov L:RB->base, BASE
|.if X64WIN
| lea CARG2, [rsp+4*8]
@@ -2991,6 +2855,7 @@ static void build_subroutines(BuildCtx *ctx)
| lea FCARG2, [esp+16]
|.endif
| lea FCARG1, [DISPATCH+GG_DISP2J]
+ | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
| call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
| // MULTRES or negated error code returned in eax (RD).
| mov RAa, L:RB->cframe
@@ -3037,12 +2902,14 @@ static void build_subroutines(BuildCtx *ctx)
| mov r13, TMPa
| mov r12, TMPQ
|.endif
- | test RD, RD; js >3 // Check for error from exit.
+ | test RD, RD; js >9 // Check for error from exit.
+ | mov L:RB, SAVE_L
| mov MULTRES, RD
| mov LFUNC:KBASE, [BASE-8]
| mov KBASE, LFUNC:KBASE->pc
| mov KBASE, [KBASE+PC2PROTO(k)]
- | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
+ | mov L:RB->base, BASE
+ | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
| set_vmstate INTERP
| // Modified copy of ins_next which handles function header dispatch, too.
| mov RC, [PC]
@@ -3051,18 +2918,35 @@ static void build_subroutines(BuildCtx *ctx)
| add PC, 4
| shr RC, 16
| cmp OP, BC_FUNCF // Function header?
- | jb >2
- | mov RC, MULTRES // RC/RD holds nres+1.
+ | jb >3
+ | cmp OP, BC_FUNCC+2 // Fast function?
+ | jae >4
|2:
+ | mov RC, MULTRES // RC/RD holds nres+1.
+ |3:
|.if X64
| jmp aword [DISPATCH+OP*8]
|.else
| jmp aword [DISPATCH+OP*4]
|.endif
|
- |3: // Rethrow error from the right C frame.
+ |4: // Check frame below fast function.
+ | mov RC, [BASE-4]
+ | test RC, FRAME_TYPE
+ | jnz <2 // Trace stitching continuation?
+ | // Otherwise set KBASE for Lua function below fast function.
+ | movzx RC, byte [RC-3]
+ | not RCa
+ | mov LFUNC:KBASE, [BASE+RC*8-8]
+ | mov KBASE, LFUNC:KBASE->pc
+ | mov KBASE, [KBASE+PC2PROTO(k)]
+ | jmp <2
+ |
+ |9: // Rethrow error from the right C frame.
+ | mov FCARG2, RD
| mov FCARG1, L:RB
- | call extern lj_err_run@4 // (lua_State *L)
+ | neg FCARG2
+ | call extern lj_err_trace@8 // (lua_State *L, int errcode)
|.endif
|
|//-----------------------------------------------------------------------
@@ -3070,27 +2954,18 @@ static void build_subroutines(BuildCtx *ctx)
|//-----------------------------------------------------------------------
|
|// FP value rounding. Called by math.floor/math.ceil fast functions
- |// and from JIT code.
- |
- |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified.
- |.macro vm_round_x87, mode1, mode2
- | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2.
- | mov [esp+8], eax
- | mov ax, mode1
- | or ax, [esp+4]
- |.if mode2 ~= 0xffff
- | and ax, mode2
- |.endif
- | mov [esp+6], ax
- | fldcw word [esp+6]
- | frndint
- | fldcw word [esp+4]
- | mov eax, [esp+8]
+ |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
+ |.macro vm_round, name, mode, cond
+ |->name:
+ |.if not X64 and cond
+ | movsd xmm0, qword [esp+4]
+ | call ->name .. _sse
+ | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
+ | fld qword [esp+4]
| ret
- |.endmacro
+ |.endif
|
- |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
- |.macro vm_round_sse, mode
+ |->name .. _sse:
| sseconst_abs xmm2, RDa
| sseconst_2p52 xmm3, RDa
| movaps xmm1, xmm0
@@ -3128,22 +3003,12 @@ static void build_subroutines(BuildCtx *ctx)
| ret
|.endmacro
|
- |.macro vm_round, name, ssemode, mode1, mode2
- |->name:
- |.if not SSE
- | vm_round_x87 mode1, mode2
- |.endif
- |->name .. _sse:
- | vm_round_sse ssemode
- |.endmacro
- |
- | vm_round vm_floor, 0, 0x0400, 0xf7ff
- | vm_round vm_ceil, 1, 0x0800, 0xfbff
- | vm_round vm_trunc, 2, 0x0c00, 0xffff
+ | vm_round vm_floor, 0, 1
+ | vm_round vm_ceil, 1, JIT
+ | vm_round vm_trunc, 2, JIT
|
|// FP modulo x%y. Called by BC_MOD* and vm_arith.
|->vm_mod:
- |.if SSE
|// Args in xmm0/xmm1, return value in xmm0.
|// Caveat: xmm0-xmm5 and RC (eax) modified!
| movaps xmm5, xmm0
@@ -3171,488 +3036,6 @@ static void build_subroutines(BuildCtx *ctx)
| movaps xmm0, xmm5
| subsd xmm0, xmm1
| ret
- |.else
- |// Args/ret on x87 stack (y on top). No xmm registers modified.
- |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
- | fld st1
- | fdiv st1
- | fnstcw word [esp+4]
- | mov ax, 0x0400
- | or ax, [esp+4]
- | and ax, 0xf7ff
- | mov [esp+6], ax
- | fldcw word [esp+6]
- | frndint
- | fldcw word [esp+4]
- | fmulp st1
- | fsubp st1
- | ret
- |.endif
- |
- |// FP log2(x). Called by math.log(x, base).
- |->vm_log2:
- |.if X64WIN
- | movsd qword [rsp+8], xmm0 // Use scratch area.
- | fld1
- | fld qword [rsp+8]
- | fyl2x
- | fstp qword [rsp+8]
- | movsd xmm0, qword [rsp+8]
- |.elif X64
- | movsd qword [rsp-8], xmm0 // Use red zone.
- | fld1
- | fld qword [rsp-8]
- | fyl2x
- | fstp qword [rsp-8]
- | movsd xmm0, qword [rsp-8]
- |.else
- | fld1
- | fld qword [esp+4]
- | fyl2x
- |.endif
- | ret
- |
- |// FP exponentiation e^x and 2^x. Called by math.exp fast function and
- |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified.
- |// Caveat: needs 3 slots on x87 stack!
- |->vm_exp_x87:
- | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e))
- |->vm_exp2_x87:
- | .if X64WIN
- | .define expscratch, dword [rsp+8] // Use scratch area.
- | .elif X64
- | .define expscratch, dword [rsp-8] // Use red zone.
- | .else
- | .define expscratch, dword [esp+4] // Needs 4 byte scratch area.
- | .endif
- | fst expscratch // Caveat: overwrites ARG1.
- | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf
- | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0
- |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check.
- | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
- | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
- |1:
- | ret
- |2:
- | fpop; fldz; ret
- |
- |// Generic power function x^y. Called by BC_POW, math.pow fast function,
- |// and vm_arith.
- |// Args/ret on x87 stack (y on top). RC (eax) modified.
- |// Caveat: needs 3 slots on x87 stack!
- |->vm_pow:
- |.if not SSE
- | fist dword [esp+4] // Store/reload int before comparison.
- | fild dword [esp+4] // Integral exponent used in vm_powi.
- | fucomip st1
- | jnz >8 // Branch for FP exponents.
- | jp >9 // Branch for NaN exponent.
- | fpop // Pop y and fallthrough to vm_powi.
- |
- |// FP/int power function x^i. Arg1/ret on x87 stack.
- |// Arg2 (int) on C stack. RC (eax) modified.
- |// Caveat: needs 2 slots on x87 stack!
- | mov eax, [esp+4]
- | cmp eax, 1; jle >6 // i<=1?
- | // Now 1 < (unsigned)i <= 0x80000000.
- |1: // Handle leading zeros.
- | test eax, 1; jnz >2
- | fmul st0
- | shr eax, 1
- | jmp <1
- |2:
- | shr eax, 1; jz >5
- | fdup
- |3: // Handle trailing bits.
- | fmul st0
- | shr eax, 1; jz >4
- | jnc <3
- | fmul st1, st0
- | jmp <3
- |4:
- | fmulp st1
- |5:
- | ret
- |6:
- | je <5 // x^1 ==> x
- | jb >7
- | fld1; fdivrp st1
- | neg eax
- | cmp eax, 1; je <5 // x^-1 ==> 1/x
- | jmp <1 // x^-i ==> (1/x)^i
- |7:
- | fpop; fld1 // x^0 ==> 1
- | ret
- |
- |8: // FP/FP power function x^y.
- | fst dword [esp+4]
- | fxch
- | fst dword [esp+8]
- | mov eax, [esp+4]; shl eax, 1
- | cmp eax, 0xff000000; je >2 // x^+-Inf?
- | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
- | cmp eax, 0xff000000; je >4 // +-Inf^y?
- | fyl2x
- | jmp ->vm_exp2raw
- |
- |9: // Handle x^NaN.
- | fld1
- | fucomip st2
- | je >1 // 1^NaN ==> 1
- | fxch // x^NaN ==> NaN
- |1:
- | fpop
- | ret
- |
- |2: // Handle x^+-Inf.
- | fabs
- | fld1
- | fucomip st1
- | je >3 // +-1^+-Inf ==> 1
- | fpop; fabs; fldz; mov eax, 0; setc al
- | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
- | fxch
- |3:
- | fpop1; fabs
- | ret
- |
- |4: // Handle +-0^y or +-Inf^y.
- | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
- | fpop; fpop
- | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
- | fldz // y < 0, +-Inf^y ==> 0
- | ret
- |5:
- | mov dword [esp+4], 0x7f800000 // Return +Inf.
- | fld dword [esp+4]
- | ret
- |.endif
- |
- |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
- |// Needs 16 byte scratch area for x86. Also called from JIT code.
- |->vm_pow_sse:
- | cvtsd2si eax, xmm1
- | cvtsi2sd xmm2, eax
- | ucomisd xmm1, xmm2
- | jnz >8 // Branch for FP exponents.
- | jp >9 // Branch for NaN exponent.
- | // Fallthrough to vm_powi_sse.
- |
- |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
- |->vm_powi_sse:
- | cmp eax, 1; jle >6 // i<=1?
- | // Now 1 < (unsigned)i <= 0x80000000.
- |1: // Handle leading zeros.
- | test eax, 1; jnz >2
- | mulsd xmm0, xmm0
- | shr eax, 1
- | jmp <1
- |2:
- | shr eax, 1; jz >5
- | movaps xmm1, xmm0
- |3: // Handle trailing bits.
- | mulsd xmm0, xmm0
- | shr eax, 1; jz >4
- | jnc <3
- | mulsd xmm1, xmm0
- | jmp <3
- |4:
- | mulsd xmm0, xmm1
- |5:
- | ret
- |6:
- | je <5 // x^1 ==> x
- | jb >7 // x^0 ==> 1
- | neg eax
- | call <1
- | sseconst_1 xmm1, RDa
- | divsd xmm1, xmm0
- | movaps xmm0, xmm1
- | ret
- |7:
- | sseconst_1 xmm0, RDa
- | ret
- |
- |8: // FP/FP power function x^y.
- |.if X64
- | movd rax, xmm1; shl rax, 1
- | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf?
- | movd rax, xmm0; shl rax, 1; je >4 // +-0^y?
- | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y?
- | .if X64WIN
- | movsd qword [rsp+16], xmm1 // Use scratch area.
- | movsd qword [rsp+8], xmm0
- | fld qword [rsp+16]
- | fld qword [rsp+8]
- | .else
- | movsd qword [rsp-16], xmm1 // Use red zone.
- | movsd qword [rsp-8], xmm0
- | fld qword [rsp-16]
- | fld qword [rsp-8]
- | .endif
- |.else
- | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area.
- | movsd qword [esp+4], xmm0
- | cmp dword [esp+12], 0; jne >1
- | mov eax, [esp+16]; shl eax, 1
- | cmp eax, 0xffe00000; je >2 // x^+-Inf?
- |1:
- | cmp dword [esp+4], 0; jne >1
- | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
- | cmp eax, 0xffe00000; je >5 // +-Inf^y?
- |1:
- | fld qword [esp+12]
- | fld qword [esp+4]
- |.endif
- | fyl2x // y*log2(x)
- | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
- | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
- |.if X64WIN
- | fstp qword [rsp+8] // Use scratch area.
- | movsd xmm0, qword [rsp+8]
- |.elif X64
- | fstp qword [rsp-8] // Use red zone.
- | movsd xmm0, qword [rsp-8]
- |.else
- | fstp qword [esp+4] // Needs 8 byte scratch area.
- | movsd xmm0, qword [esp+4]
- |.endif
- | ret
- |
- |9: // Handle x^NaN.
- | sseconst_1 xmm2, RDa
- | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1
- | movaps xmm0, xmm1 // x^NaN ==> NaN
- |1:
- | ret
- |
- |2: // Handle x^+-Inf.
- | sseconst_abs xmm2, RDa
- | andpd xmm0, xmm2 // |x|
- | sseconst_1 xmm2, RDa
- | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1
- | movmskpd eax, xmm1
- | xorps xmm0, xmm0
- | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0
- |3:
- | sseconst_hi xmm0, RDa, 7ff00000 // +Inf
- | ret
- |
- |4: // Handle +-0^y.
- | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf
- | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0
- | ret
- |
- |5: // Handle +-Inf^y.
- | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf
- | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0
- | ret
- |
- |// Callable from C: double lj_vm_foldfpm(double x, int fpm)
- |// Computes fpm(x) for extended math functions. ORDER FPM.
- |->vm_foldfpm:
- |.if JIT
- |.if X64
- | .if X64WIN
- | .define fpmop, CARG2d
- | .else
- | .define fpmop, CARG1d
- | .endif
- | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
- | cmp fpmop, 3; jb ->vm_trunc; ja >2
- | sqrtsd xmm0, xmm0; ret
- |2:
- | .if X64WIN
- | movsd qword [rsp+8], xmm0 // Use scratch area.
- | fld qword [rsp+8]
- | .else
- | movsd qword [rsp-8], xmm0 // Use red zone.
- | fld qword [rsp-8]
- | .endif
- | cmp fpmop, 5; ja >2
- | .if X64WIN; pop rax; .endif
- | je >1
- | call ->vm_exp_x87
- | .if X64WIN; push rax; .endif
- | jmp >7
- |1:
- | call ->vm_exp2_x87
- | .if X64WIN; push rax; .endif
- | jmp >7
- |2: ; cmp fpmop, 7; je >1; ja >2
- | fldln2; fxch; fyl2x; jmp >7
- |1: ; fld1; fxch; fyl2x; jmp >7
- |2: ; cmp fpmop, 9; je >1; ja >2
- | fldlg2; fxch; fyl2x; jmp >7
- |1: ; fsin; jmp >7
- |2: ; cmp fpmop, 11; je >1; ja >9
- | fcos; jmp >7
- |1: ; fptan; fpop
- |7:
- | .if X64WIN
- | fstp qword [rsp+8] // Use scratch area.
- | movsd xmm0, qword [rsp+8]
- | .else
- | fstp qword [rsp-8] // Use red zone.
- | movsd xmm0, qword [rsp-8]
- | .endif
- | ret
- |.else // x86 calling convention.
- | .define fpmop, eax
- |.if SSE
- | mov fpmop, [esp+12]
- | movsd xmm0, qword [esp+4]
- | cmp fpmop, 1; je >1; ja >2
- | call ->vm_floor; jmp >7
- |1: ; call ->vm_ceil; jmp >7
- |2: ; cmp fpmop, 3; je >1; ja >2
- | call ->vm_trunc; jmp >7
- |1:
- | sqrtsd xmm0, xmm0
- |7:
- | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
- | fld qword [esp+4]
- | ret
- |2: ; fld qword [esp+4]
- | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
- |2: ; cmp fpmop, 7; je >1; ja >2
- | fldln2; fxch; fyl2x; ret
- |1: ; fld1; fxch; fyl2x; ret
- |2: ; cmp fpmop, 9; je >1; ja >2
- | fldlg2; fxch; fyl2x; ret
- |1: ; fsin; ret
- |2: ; cmp fpmop, 11; je >1; ja >9
- | fcos; ret
- |1: ; fptan; fpop; ret
- |.else
- | mov fpmop, [esp+12]
- | fld qword [esp+4]
- | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
- | cmp fpmop, 3; jb ->vm_trunc; ja >2
- | fsqrt; ret
- |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
- | cmp fpmop, 7; je >1; ja >2
- | fldln2; fxch; fyl2x; ret
- |1: ; fld1; fxch; fyl2x; ret
- |2: ; cmp fpmop, 9; je >1; ja >2
- | fldlg2; fxch; fyl2x; ret
- |1: ; fsin; ret
- |2: ; cmp fpmop, 11; je >1; ja >9
- | fcos; ret
- |1: ; fptan; fpop; ret
- |.endif
- |.endif
- |9: ; int3 // Bad fpm.
- |.endif
- |
- |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
- |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
- |// and basic math functions. ORDER ARITH
- |->vm_foldarith:
- |.if X64
- |
- | .if X64WIN
- | .define foldop, CARG3d
- | .else
- | .define foldop, CARG1d
- | .endif
- | cmp foldop, 1; je >1; ja >2
- | addsd xmm0, xmm1; ret
- |1: ; subsd xmm0, xmm1; ret
- |2: ; cmp foldop, 3; je >1; ja >2
- | mulsd xmm0, xmm1; ret
- |1: ; divsd xmm0, xmm1; ret
- |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow
- | cmp foldop, 7; je >1; ja >2
- | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
- |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
- |2: ; cmp foldop, 9; ja >2
- |.if X64WIN
- | movsd qword [rsp+8], xmm0 // Use scratch area.
- | movsd qword [rsp+16], xmm1
- | fld qword [rsp+8]
- | fld qword [rsp+16]
- |.else
- | movsd qword [rsp-8], xmm0 // Use red zone.
- | movsd qword [rsp-16], xmm1
- | fld qword [rsp-8]
- | fld qword [rsp-16]
- |.endif
- | je >1
- | fpatan
- |7:
- |.if X64WIN
- | fstp qword [rsp+8] // Use scratch area.
- | movsd xmm0, qword [rsp+8]
- |.else
- | fstp qword [rsp-8] // Use red zone.
- | movsd xmm0, qword [rsp-8]
- |.endif
- | ret
- |1: ; fxch; fscale; fpop1; jmp <7
- |2: ; cmp foldop, 11; je >1; ja >9
- | minsd xmm0, xmm1; ret
- |1: ; maxsd xmm0, xmm1; ret
- |9: ; int3 // Bad op.
- |
- |.elif SSE // x86 calling convention with SSE ops.
- |
- | .define foldop, eax
- | mov foldop, [esp+20]
- | movsd xmm0, qword [esp+4]
- | movsd xmm1, qword [esp+12]
- | cmp foldop, 1; je >1; ja >2
- | addsd xmm0, xmm1
- |7:
- | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
- | fld qword [esp+4]
- | ret
- |1: ; subsd xmm0, xmm1; jmp <7
- |2: ; cmp foldop, 3; je >1; ja >2
- | mulsd xmm0, xmm1; jmp <7
- |1: ; divsd xmm0, xmm1; jmp <7
- |2: ; cmp foldop, 5
- | je >1; ja >2
- | call ->vm_mod; jmp <7
- |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area.
- |2: ; cmp foldop, 7; je >1; ja >2
- | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
- |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
- |2: ; cmp foldop, 9; ja >2
- | fld qword [esp+4] // Reload from stack
- | fld qword [esp+12]
- | je >1
- | fpatan; ret
- |1: ; fxch; fscale; fpop1; ret
- |2: ; cmp foldop, 11; je >1; ja >9
- | minsd xmm0, xmm1; jmp <7
- |1: ; maxsd xmm0, xmm1; jmp <7
- |9: ; int3 // Bad op.
- |
- |.else // x86 calling convention with x87 ops.
- |
- | mov eax, [esp+20]
- | fld qword [esp+4]
- | fld qword [esp+12]
- | cmp eax, 1; je >1; ja >2
- | faddp st1; ret
- |1: ; fsubp st1; ret
- |2: ; cmp eax, 3; je >1; ja >2
- | fmulp st1; ret
- |1: ; fdivp st1; ret
- |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
- | cmp eax, 7; je >1; ja >2
- | fpop; fchs; ret
- |1: ; fpop; fabs; ret
- |2: ; cmp eax, 9; je >1; ja >2
- | fpatan; ret
- |1: ; fxch; fscale; fpop1; ret
- |2: ; cmp eax, 11; je >1; ja >9
- | fucomi st1; fcmovnbe st1; fpop1; ret
- |1: ; fucomi st1; fcmovbe st1; fpop1; ret
- |9: ; int3 // Bad op.
- |
- |.endif
|
|//-----------------------------------------------------------------------
|//-- Miscellaneous functions --------------------------------------------
@@ -3664,6 +3047,7 @@ static void build_subroutines(BuildCtx *ctx)
| mov eax, CARG1d
| .if X64WIN; push rsi; mov rsi, CARG2; .endif
| push rbx
+ | xor ecx, ecx
| cpuid
| mov [rsi], eax
| mov [rsi+4], ebx
@@ -3687,6 +3071,7 @@ static void build_subroutines(BuildCtx *ctx)
| mov eax, [esp+4] // Argument 1 is function number.
| push edi
| push ebx
+ | xor ecx, ecx
| cpuid
| mov edi, [esp+16] // Argument 2 is result area.
| mov [edi], eax
@@ -3699,6 +3084,86 @@ static void build_subroutines(BuildCtx *ctx)
| ret
|.endif
|
+ |.define NEXT_TAB, TAB:FCARG1
+ |.define NEXT_IDX, FCARG2
+ |.define NEXT_PTR, RCa
+ |.define NEXT_PTRd, RC
+ |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro
+ |.if X64
+ |.define NEXT_TMP, CARG3d
+ |.define NEXT_TMPq, CARG3
+ |.define NEXT_ASIZE, CARG4d
+ |.macro NEXT_ENTER; .endmacro
+ |.macro NEXT_LEAVE; ret; .endmacro
+ |.if X64WIN
+ |.define NEXT_RES_PTR, [rsp+aword*5]
+ |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro
+ |.else
+ |.define NEXT_RES_PTR, [rsp+aword*1]
+ |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro
+ |.endif
+ |.else
+ |.define NEXT_ASIZE, esi
+ |.define NEXT_TMP, edi
+ |.macro NEXT_ENTER; push esi; push edi; .endmacro
+ |.macro NEXT_LEAVE; pop edi; pop esi; ret; .endmacro
+ |.define NEXT_RES_PTR, [esp+dword*3]
+ |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro
+ |.endif
+ |
+ |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
+ |// Next idx returned in edx.
+ |->vm_next:
+ |.if JIT
+ | NEXT_ENTER
+ | mov NEXT_ASIZE, NEXT_TAB->asize
+ |1: // Traverse array part.
+ | cmp NEXT_IDX, NEXT_ASIZE; jae >5
+ | mov NEXT_TMP, NEXT_TAB->array
+ | cmp dword [NEXT_TMP+NEXT_IDX*8+4], LJ_TNIL; je >2
+ | lea NEXT_PTR, NEXT_RES_PTR
+ |.if X64
+ | mov NEXT_TMPq, qword [NEXT_TMP+NEXT_IDX*8]
+ | mov qword [NEXT_PTR], NEXT_TMPq
+ |.else
+ | mov NEXT_ASIZE, dword [NEXT_TMP+NEXT_IDX*8+4]
+ | mov NEXT_TMP, dword [NEXT_TMP+NEXT_IDX*8]
+ | mov dword [NEXT_PTR+4], NEXT_ASIZE
+ | mov dword [NEXT_PTR], NEXT_TMP
+ |.endif
+ |.if DUALNUM
+ | mov dword [NEXT_PTR+dword*3], LJ_TISNUM
+ | mov dword [NEXT_PTR+dword*2], NEXT_IDX
+ |.else
+ | cvtsi2sd xmm0, NEXT_IDX
+ | movsd qword [NEXT_PTR+dword*2], xmm0
+ |.endif
+ | NEXT_RES_IDX 1
+ | NEXT_LEAVE
+ |2: // Skip holes in array part.
+ | add NEXT_IDX, 1
+ | jmp <1
+ |
+ |5: // Traverse hash part.
+ | sub NEXT_IDX, NEXT_ASIZE
+ |6:
+ | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9
+ | imul NEXT_PTRd, NEXT_IDX, #NODE
+ | add NODE:NEXT_PTRd, dword NEXT_TAB->node
+ | cmp dword NODE:NEXT_PTR->val.it, LJ_TNIL; je >7
+ | NEXT_RES_IDXL NEXT_ASIZE+1
+ | NEXT_LEAVE
+ |7: // Skip holes in hash part.
+ | add NEXT_IDX, 1
+ | jmp <6
+ |
+ |9: // End of iteration. Set the key to nil (not the value).
+ | NEXT_RES_IDX NEXT_ASIZE
+ | lea NEXT_PTR, NEXT_RES_PTR
+ | mov dword [NEXT_PTR+dword*3], LJ_TNIL
+ | NEXT_LEAVE
+ |.endif
+ |
|//-----------------------------------------------------------------------
|//-- Assertions ---------------------------------------------------------
|//-----------------------------------------------------------------------
@@ -3964,19 +3429,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // RA is a number.
| cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
| // RA is a number, RD is an integer.
- |.if SSE
| cvtsi2sd xmm0, dword [BASE+RD*8]
| jmp >2
- |.else
- | fld qword [BASE+RA*8]
- | fild dword [BASE+RD*8]
- | jmp >3
- |.endif
|
|8: // RA is an integer, RD is not an integer.
| ja ->vmeta_comp
| // RA is an integer, RD is a number.
- |.if SSE
| cvtsi2sd xmm1, dword [BASE+RA*8]
| movsd xmm0, qword [BASE+RD*8]
| add PC, 4
@@ -3984,29 +3442,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| jmp_comp jbe, ja, jb, jae, <9
| jmp <6
|.else
- | fild dword [BASE+RA*8]
- | jmp >2
- |.endif
- |.else
| checknum RA, ->vmeta_comp
| checknum RD, ->vmeta_comp
|.endif
- |.if SSE
|1:
| movsd xmm0, qword [BASE+RD*8]
|2:
| add PC, 4
| ucomisd xmm0, qword [BASE+RA*8]
|3:
- |.else
- |1:
- | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
- |2:
- | fld qword [BASE+RD*8]
- |3:
- | add PC, 4
- | fcomparepp
- |.endif
| // Unordered: all of ZF CF PF set, ordered: PF clear.
| // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
|.if DUALNUM
@@ -4046,43 +3490,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // RD is a number.
| cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
| // RD is a number, RA is an integer.
- |.if SSE
| cvtsi2sd xmm0, dword [BASE+RA*8]
- |.else
- | fild dword [BASE+RA*8]
- |.endif
| jmp >2
|
|8: // RD is an integer, RA is not an integer.
| ja >5
| // RD is an integer, RA is a number.
- |.if SSE
| cvtsi2sd xmm0, dword [BASE+RD*8]
| ucomisd xmm0, qword [BASE+RA*8]
- |.else
- | fild dword [BASE+RD*8]
- | fld qword [BASE+RA*8]
- |.endif
| jmp >4
|
|.else
| cmp RB, LJ_TISNUM; jae >5
| checknum RA, >5
|.endif
- |.if SSE
|1:
| movsd xmm0, qword [BASE+RA*8]
|2:
| ucomisd xmm0, qword [BASE+RD*8]
|4:
- |.else
- |1:
- | fld qword [BASE+RA*8]
- |2:
- | fld qword [BASE+RD*8]
- |4:
- | fcomparepp
- |.endif
iseqne_fp:
if (vk) {
| jp >2 // Unordered means not equal.
@@ -4205,39 +3631,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // RA is a number.
| cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
| // RA is a number, RD is an integer.
- |.if SSE
| cvtsi2sd xmm0, dword [KBASE+RD*8]
- |.else
- | fild dword [KBASE+RD*8]
- |.endif
| jmp >2
|
|8: // RA is an integer, RD is a number.
- |.if SSE
| cvtsi2sd xmm0, dword [BASE+RA*8]
| ucomisd xmm0, qword [KBASE+RD*8]
- |.else
- | fild dword [BASE+RA*8]
- | fld qword [KBASE+RD*8]
- |.endif
| jmp >4
|.else
| cmp RB, LJ_TISNUM; jae >3
|.endif
- |.if SSE
|1:
| movsd xmm0, qword [KBASE+RD*8]
|2:
| ucomisd xmm0, qword [BASE+RA*8]
|4:
- |.else
- |1:
- | fld qword [KBASE+RD*8]
- |2:
- | fld qword [BASE+RA*8]
- |4:
- | fcomparepp
- |.endif
goto iseqne_fp;
case BC_ISEQP: case BC_ISNEP:
vk = op == BC_ISEQP;
@@ -4288,6 +3696,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_next
break;
+ case BC_ISTYPE:
+ | ins_AD // RA = src, RD = -type
+ | add RD, [BASE+RA*8+4]
+ | jne ->vmeta_istype
+ | ins_next
+ break;
+ case BC_ISNUM:
+ | ins_AD // RA = src, RD = -(TISNUM-1)
+ | checknum RA, ->vmeta_istype
+ | ins_next
+ break;
+
/* -- Unary ops --------------------------------------------------------- */
case BC_MOV:
@@ -4331,16 +3751,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.else
| checknum RD, ->vmeta_unm
|.endif
- |.if SSE
| movsd xmm0, qword [BASE+RD*8]
| sseconst_sign xmm1, RDa
| xorps xmm0, xmm1
| movsd qword [BASE+RA*8], xmm0
- |.else
- | fld qword [BASE+RD*8]
- | fchs
- | fstp qword [BASE+RA*8]
- |.endif
|.if DUALNUM
| jmp <9
|.else
@@ -4356,15 +3770,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|1:
| mov dword [BASE+RA*8+4], LJ_TISNUM
| mov dword [BASE+RA*8], RD
- |.elif SSE
+ |.else
| xorps xmm0, xmm0
| cvtsi2sd xmm0, dword STR:RD->len
|1:
| movsd qword [BASE+RA*8], xmm0
- |.else
- | fild dword STR:RD->len
- |1:
- | fstp qword [BASE+RA*8]
|.endif
| ins_next
|2:
@@ -4382,11 +3792,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // Length of table returned in eax (RD).
|.if DUALNUM
| // Nothing to do.
- |.elif SSE
- | cvtsi2sd xmm0, RD
|.else
- | mov ARG1, RD
- | fild ARG1
+ | cvtsi2sd xmm0, RD
|.endif
| mov BASE, RB // Restore BASE.
| movzx RA, PC_RA
@@ -4401,7 +3808,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
/* -- Binary ops -------------------------------------------------------- */
- |.macro ins_arithpre, x87ins, sseins, ssereg
+ |.macro ins_arithpre, sseins, ssereg
| ins_ABC
||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
||switch (vk) {
@@ -4410,37 +3817,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| .if DUALNUM
| cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
| .endif
- | .if SSE
- | movsd xmm0, qword [BASE+RB*8]
- | sseins ssereg, qword [KBASE+RC*8]
- | .else
- | fld qword [BASE+RB*8]
- | x87ins qword [KBASE+RC*8]
- | .endif
+ | movsd xmm0, qword [BASE+RB*8]
+ | sseins ssereg, qword [KBASE+RC*8]
|| break;
||case 1:
| checknum RB, ->vmeta_arith_nv
| .if DUALNUM
| cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
| .endif
- | .if SSE
- | movsd xmm0, qword [KBASE+RC*8]
- | sseins ssereg, qword [BASE+RB*8]
- | .else
- | fld qword [KBASE+RC*8]
- | x87ins qword [BASE+RB*8]
- | .endif
+ | movsd xmm0, qword [KBASE+RC*8]
+ | sseins ssereg, qword [BASE+RB*8]
|| break;
||default:
| checknum RB, ->vmeta_arith_vv
| checknum RC, ->vmeta_arith_vv
- | .if SSE
- | movsd xmm0, qword [BASE+RB*8]
- | sseins ssereg, qword [BASE+RC*8]
- | .else
- | fld qword [BASE+RB*8]
- | x87ins qword [BASE+RC*8]
- | .endif
+ | movsd xmm0, qword [BASE+RB*8]
+ | sseins ssereg, qword [BASE+RC*8]
|| break;
||}
|.endmacro
@@ -4478,55 +3870,62 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.endmacro
|
|.macro ins_arithpost
- |.if SSE
| movsd qword [BASE+RA*8], xmm0
- |.else
- | fstp qword [BASE+RA*8]
- |.endif
|.endmacro
|
- |.macro ins_arith, x87ins, sseins
- | ins_arithpre x87ins, sseins, xmm0
+ |.macro ins_arith, sseins
+ | ins_arithpre sseins, xmm0
| ins_arithpost
| ins_next
|.endmacro
|
- |.macro ins_arith, intins, x87ins, sseins
+ |.macro ins_arith, intins, sseins
|.if DUALNUM
| ins_arithdn intins
|.else
- | ins_arith, x87ins, sseins
+ | ins_arith, sseins
|.endif
|.endmacro
| // RA = dst, RB = src1 or num const, RC = src2 or num const
case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
- | ins_arith add, fadd, addsd
+ | ins_arith add, addsd
break;
case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
- | ins_arith sub, fsub, subsd
+ | ins_arith sub, subsd
break;
case BC_MULVN: case BC_MULNV: case BC_MULVV:
- | ins_arith imul, fmul, mulsd
+ | ins_arith imul, mulsd
break;
case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
- | ins_arith fdiv, divsd
+ | ins_arith divsd
break;
case BC_MODVN:
- | ins_arithpre fld, movsd, xmm1
+ | ins_arithpre movsd, xmm1
|->BC_MODVN_Z:
| call ->vm_mod
| ins_arithpost
| ins_next
break;
case BC_MODNV: case BC_MODVV:
- | ins_arithpre fld, movsd, xmm1
+ | ins_arithpre movsd, xmm1
| jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
break;
case BC_POW:
- | ins_arithpre fld, movsd, xmm1
- | call ->vm_pow
+ | ins_arithpre movsd, xmm1
+ | mov RB, BASE
+ |.if not X64
+ | movsd FPARG1, xmm0
+ | movsd FPARG3, xmm1
+ |.endif
+ | call extern pow
+ | movzx RA, PC_RA
+ | mov BASE, RB
+ |.if X64
| ins_arithpost
+ |.else
+ | fstp qword [BASE+RA*8]
+ |.endif
| ins_next
break;
@@ -4594,25 +3993,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| movsx RD, RDW
| mov dword [BASE+RA*8+4], LJ_TISNUM
| mov dword [BASE+RA*8], RD
- |.elif SSE
+ |.else
| movsx RD, RDW // Sign-extend literal.
| cvtsi2sd xmm0, RD
| movsd qword [BASE+RA*8], xmm0
- |.else
- | fild PC_RD // Refetch signed RD from instruction.
- | fstp qword [BASE+RA*8]
|.endif
| ins_next
break;
case BC_KNUM:
| ins_AD // RA = dst, RD = num const
- |.if SSE
| movsd xmm0, qword [KBASE+RD*8]
| movsd qword [BASE+RA*8], xmm0
- |.else
- | fld qword [KBASE+RD*8]
- | fstp qword [BASE+RA*8]
- |.endif
| ins_next
break;
case BC_KPRI:
@@ -4719,18 +4110,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
case BC_USETN:
| ins_AD // RA = upvalue #, RD = num const
| mov LFUNC:RB, [BASE-8]
- |.if SSE
| movsd xmm0, qword [KBASE+RD*8]
- |.else
- | fld qword [KBASE+RD*8]
- |.endif
| mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
| mov RA, UPVAL:RB->v
- |.if SSE
| movsd qword [RA], xmm0
- |.else
- | fstp qword [RA]
- |.endif
| ins_next
break;
case BC_USETP:
@@ -4884,18 +4267,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.else
| // Convert number to int and back and compare.
| checknum RC, >5
- |.if SSE
| movsd xmm0, qword [BASE+RC*8]
- | cvtsd2si RC, xmm0
+ | cvttsd2si RC, xmm0
| cvtsi2sd xmm1, RC
| ucomisd xmm0, xmm1
- |.else
- | fld qword [BASE+RC*8]
- | fist ARG1
- | fild ARG1
- | fcomparepp
- | mov RC, ARG1
- |.endif
| jne ->vmeta_tgetv // Generic numeric key? Use fallback.
|.endif
| cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -4941,7 +4316,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| mov TAB:RB, [BASE+RB*8]
|->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
| mov RA, TAB:RB->hmask
- | and RA, STR:RC->hash
+ | and RA, STR:RC->sid
| imul RA, #NODE
| add NODE:RA, TAB:RB->node
|1:
@@ -5019,6 +4394,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| mov dword [BASE+RA*8+4], LJ_TNIL
| jmp <1
break;
+ case BC_TGETR:
+ | ins_ABC // RA = dst, RB = table, RC = key
+ | mov TAB:RB, [BASE+RB*8]
+ |.if DUALNUM
+ | mov RC, dword [BASE+RC*8]
+ |.else
+ | cvttsd2si RC, qword [BASE+RC*8]
+ |.endif
+ | cmp RC, TAB:RB->asize
+ | jae ->vmeta_tgetr // Not in array part? Use fallback.
+ | shl RC, 3
+ | add RC, TAB:RB->array
+ | // Get array slot.
+ |->BC_TGETR_Z:
+ |.if X64
+ | mov RBa, [RC]
+ | mov [BASE+RA*8], RBa
+ |.else
+ | mov RB, [RC]
+ | mov RC, [RC+4]
+ | mov [BASE+RA*8], RB
+ | mov [BASE+RA*8+4], RC
+ |.endif
+ |->BC_TGETR2_Z:
+ | ins_next
+ break;
case BC_TSETV:
| ins_ABC // RA = src, RB = table, RC = key
@@ -5032,18 +4433,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.else
| // Convert number to int and back and compare.
| checknum RC, >5
- |.if SSE
| movsd xmm0, qword [BASE+RC*8]
- | cvtsd2si RC, xmm0
+ | cvttsd2si RC, xmm0
| cvtsi2sd xmm1, RC
| ucomisd xmm0, xmm1
- |.else
- | fld qword [BASE+RC*8]
- | fist ARG1
- | fild ARG1
- | fcomparepp
- | mov RC, ARG1
- |.endif
| jne ->vmeta_tsetv // Generic numeric key? Use fallback.
|.endif
| cmp RC, TAB:RB->asize // Takes care of unordered, too.
@@ -5094,7 +4487,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| mov TAB:RB, [BASE+RB*8]
|->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
| mov RA, TAB:RB->hmask
- | and RA, STR:RC->hash
+ | and RA, STR:RC->sid
| imul RA, #NODE
| mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
| add NODE:RA, TAB:RB->node
@@ -5213,6 +4606,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| movzx RA, PC_RA // Restore RA.
| jmp <2
break;
+ case BC_TSETR:
+ | ins_ABC // RA = src, RB = table, RC = key
+ | mov TAB:RB, [BASE+RB*8]
+ |.if DUALNUM
+ | mov RC, dword [BASE+RC*8]
+ |.else
+ | cvttsd2si RC, qword [BASE+RC*8]
+ |.endif
+ | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
+ | jnz >7
+ |2:
+ | cmp RC, TAB:RB->asize
+ | jae ->vmeta_tsetr
+ | shl RC, 3
+ | add RC, TAB:RB->array
+ | // Set array slot.
+ |->BC_TSETR_Z:
+ |.if X64
+ | mov RBa, [BASE+RA*8]
+ | mov [RC], RBa
+ |.else
+ | mov RB, [BASE+RA*8+4]
+ | mov RA, [BASE+RA*8]
+ | mov [RC+4], RB
+ | mov [RC], RA
+ |.endif
+ | ins_next
+ |
+ |7: // Possible table write barrier for the value. Skip valiswhite check.
+ | barrierback TAB:RB, RA
+ | movzx RA, PC_RA // Restore RA.
+ | jmp <2
+ break;
case BC_TSETM:
| ins_AD // RA = base (table at base-1), RD = num const (start index)
@@ -5389,10 +4815,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
break;
case BC_ITERN:
- | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
|.if JIT
- | // NYI: add hotloop, record BC_ITERN.
+ | hotloop RB
|.endif
+ |->vm_IITERN:
+ | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
| mov TMP1, KBASE // Need two more free registers.
| mov TMP2, DISPATCH
| mov TAB:RB, [BASE+RA*8-16]
@@ -5406,10 +4833,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|.if DUALNUM
| mov dword [BASE+RA*8+4], LJ_TISNUM
| mov dword [BASE+RA*8], RC
- |.elif SSE
- | cvtsi2sd xmm0, RC
|.else
- | fild dword [BASE+RA*8-8]
+ | cvtsi2sd xmm0, RC
|.endif
| // Copy array slot to returned value.
|.if X64
@@ -5425,10 +4850,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // Return array index as a numeric key.
|.if DUALNUM
| // See above.
- |.elif SSE
- | movsd qword [BASE+RA*8], xmm0
|.else
- | fstp qword [BASE+RA*8]
+ | movsd qword [BASE+RA*8], xmm0
|.endif
| mov [BASE+RA*8-8], RC // Update control var.
|2:
@@ -5441,9 +4864,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|
|4: // Skip holes in array part.
| add RC, 1
- |.if not (DUALNUM or SSE)
- | mov [BASE+RA*8-8], RC
- |.endif
| jmp <1
|
|5: // Traverse hash part.
@@ -5487,14 +4907,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
| branchPC RD
| mov dword [BASE+RA*8-8], 0 // Initialize control var.
- | mov dword [BASE+RA*8-4], 0xfffe7fff
+ | mov dword [BASE+RA*8-4], LJ_KEYINDEX
|1:
| ins_next
|5: // Despecialize bytecode if any of the checks fail.
| mov PC_OP, BC_JMP
| branchPC RD
+ |.if JIT
+ | cmp byte [PC], BC_ITERN
+ | jne >6
+ |.endif
| mov byte [PC], BC_ITERC
| jmp <1
+ |.if JIT
+ |6: // Unpatch JLOOP.
+ | mov RA, [DISPATCH+DISPATCH_J(trace)]
+ | movzx RC, word [PC+2]
+ | mov TRACE:RA, [RA+RC*4]
+ | mov eax, TRACE:RA->startins
+ | mov al, BC_ITERC
+ | mov dword [PC], eax
+ | jmp <1
+ |.endif
break;
case BC_VARG:
@@ -5777,7 +5211,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
if (!vk) {
| cmp RB, LJ_TISNUM; jae ->vmeta_for
}
- |.if SSE
| movsd xmm0, qword FOR_IDX
| movsd xmm1, qword FOR_STOP
if (vk) {
@@ -5790,22 +5223,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ucomisd xmm1, xmm0
|1:
| movsd qword FOR_EXT, xmm0
- |.else
- | fld qword FOR_STOP
- | fld qword FOR_IDX
- if (vk) {
- | fadd qword FOR_STEP // nidx = idx + step
- | fst qword FOR_IDX
- | fst qword FOR_EXT
- | test RB, RB; js >1
- } else {
- | fst qword FOR_EXT
- | jl >1
- }
- | fxch // Swap lim/(n)idx if step non-negative.
- |1:
- | fcomparepp
- |.endif
if (op == BC_FORI) {
|.if DUALNUM
| jnb <7
@@ -5833,11 +5250,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|2:
| ins_next
|.endif
- |.if SSE
+ |
|3: // Invert comparison if step is negative.
| ucomisd xmm0, xmm1
| jmp <1
- |.endif
break;
case BC_ITERL:
@@ -5875,7 +5291,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| ins_A // RA = base, RD = target (loop extent)
| // Note: RA/RD is only used by trace recorder to determine scope/extent
| // This opcode does NOT jump, it's only purpose is to detect a hot loop.
- |.if JIT
+ |.if JIT
| hotloop RB
|.endif
| // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
@@ -5894,7 +5310,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| mov RDa, TRACE:RD->mcode
| mov L:RB, SAVE_L
| mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
- | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB
+ | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
| // Save additional callee-save registers only used in compiled code.
|.if X64WIN
| mov TMPQ, r12
@@ -6061,9 +5477,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
| // (lua_State *L, lua_CFunction f)
| call aword [DISPATCH+DISPATCH_GL(wrapf)]
}
- | set_vmstate INTERP
| // nresults returned in eax (RD).
| mov BASE, L:RB->base
+ | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
+ | set_vmstate INTERP
| lea RA, [BASE+RD*8]
| neg RA
| add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
@@ -6176,7 +5593,7 @@ static void emit_asm_debug(BuildCtx *ctx)
".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
#endif
#if !LJ_NO_UNWIND
-#if (defined(__sun__) && defined(__svr4__))
+#if LJ_TARGET_SOLARIS
#if LJ_64
fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
#else
@@ -6383,15 +5800,21 @@ static void emit_asm_debug(BuildCtx *ctx)
"LEFDEY:\n\n", fcsize);
}
#endif
-#if LJ_64
- fprintf(ctx->fp, "\t.subsections_via_symbols\n");
-#else
+#if !LJ_64
fprintf(ctx->fp,
"\t.non_lazy_symbol_pointer\n"
"L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
".indirect_symbol _lj_err_unwind_dwarf\n"
- ".long 0\n");
+ ".long 0\n\n");
+ fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n");
+ {
+ const char *const *xn;
+ for (xn = ctx->extnames; *xn; xn++)
+ if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1))
+ fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn);
+ }
#endif
+ fprintf(ctx->fp, ".subsections_via_symbols\n");
}
break;
#endif
diff --git a/src/xb1build.bat b/src/xb1build.bat
new file mode 100644
index 00000000..2eb68171
--- /dev/null
+++ b/src/xb1build.bat
@@ -0,0 +1,101 @@
+@rem Script to build LuaJIT with the Xbox One SDK.
+@rem Donated to the public domain.
+@rem
+@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler)
+@rem Then cd to this directory and run this script.
+
+@if not defined INCLUDE goto :FAIL
+@if not defined DurangoXDK goto :FAIL
+
+@setlocal
+@echo ---- Host compiler ----
+@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE
+@set LJLINK=link /nologo
+@set LJMT=mt /nologo
+@set DASMDIR=..\dynasm
+@set DASM=%DASMDIR%\dynasm.lua
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
+
+%LJCOMPILE% host\minilua.c
+@if errorlevel 1 goto :BAD
+%LJLINK% /out:minilua.exe minilua.obj
+@if errorlevel 1 goto :BAD
+if exist minilua.exe.manifest^
+ %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
+
+@rem Error out for 64 bit host compiler
+@minilua
+@if not errorlevel 8 goto :FAIL
+
+@set DASMFLAGS=-D WIN -D FFI -D P64
+minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x64.dasc
+@if errorlevel 1 goto :BAD
+
+%LJCOMPILE% /I "." /I %DASMDIR% /D_DURANGO host\buildvm*.c
+@if errorlevel 1 goto :BAD
+%LJLINK% /out:buildvm.exe buildvm*.obj
+@if errorlevel 1 goto :BAD
+if exist buildvm.exe.manifest^
+ %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
+
+buildvm -m peobj -o lj_vm.obj
+@if errorlevel 1 goto :BAD
+buildvm -m bcdef -o lj_bcdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m ffdef -o lj_ffdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m libdef -o lj_libdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m recdef -o lj_recdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
+@if errorlevel 1 goto :BAD
+
+@echo ---- Cross compiler ----
+
+@set CWD=%cd%
+@call "%DurangoXDK%\xdk\DurangoVars.cmd" XDK
+@cd /D "%CWD%"
+@shift
+
+@set LJCOMPILE="cl" /nologo /c /W3 /GF /Gm- /GR- /GS- /Gy /openmp- /D_CRT_SECURE_NO_DEPRECATE /D_LIB /D_UNICODE /D_DURANGO
+@set LJLIB="lib" /nologo
+
+@if "%1"=="debug" (
+ @shift
+ @set LJCOMPILE=%LJCOMPILE% /Zi /MDd /Od
+ @set LJLINK=%LJLINK% /debug
+) else (
+ @set LJCOMPILE=%LJCOMPILE% /MD /O2 /DNDEBUG
+)
+
+@if "%1"=="amalg" goto :AMALG
+%LJCOMPILE% /DLUA_BUILD_AS_DLL lj_*.c lib_*.c
+@if errorlevel 1 goto :BAD
+%LJLIB% /OUT:luajit.lib lj_*.obj lib_*.obj
+@if errorlevel 1 goto :BAD
+@goto :NOAMALG
+:AMALG
+%LJCOMPILE% /DLUA_BUILD_AS_DLL ljamalg.c
+@if errorlevel 1 goto :BAD
+%LJLIB% /OUT:luajit.lib ljamalg.obj lj_vm.obj
+@if errorlevel 1 goto :BAD
+:NOAMALG
+
+@del *.obj *.manifest minilua.exe buildvm.exe
+@echo.
+@echo === Successfully built LuaJIT for Xbox One ===
+
+@goto :END
+:BAD
+@echo.
+@echo *******************************************************
+@echo *** Build FAILED -- Please check the error messages ***
+@echo *******************************************************
+@goto :END
+:FAIL
+@echo To run this script you must open a "Visual Studio .NET Command Prompt"
+@echo (64 bit host compiler). The Xbox One SDK must be installed, too.
+:END
diff --git a/src/xedkbuild.bat b/src/xedkbuild.bat
index 240ec878..37322d03 100644
--- a/src/xedkbuild.bat
+++ b/src/xedkbuild.bat
@@ -14,7 +14,7 @@
@set LJMT=mt /nologo
@set DASMDIR=..\dynasm
@set DASM=%DASMDIR%\dynasm.lua
-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
%LJCOMPILE% host\minilua.c
@if errorlevel 1 goto :BAD