diff options
205 files changed, 47073 insertions, 13722 deletions
@@ -14,9 +14,10 @@ ############################################################################## MAJVER= 2 -MINVER= 0 -RELVER= 5 -VERSION= $(MAJVER).$(MINVER).$(RELVER) +MINVER= 1 +RELVER= 0 +PREREL= -beta3 +VERSION= $(MAJVER).$(MINVER).$(RELVER)$(PREREL) ABIVER= 5.1 ############################################################################## @@ -88,8 +89,10 @@ FILE_SO= libluajit.so FILE_MAN= luajit.1 FILE_PC= luajit.pc FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h -FILES_JITLIB= bc.lua v.lua dump.lua dis_x86.lua dis_x64.lua dis_arm.lua \ - dis_ppc.lua dis_mips.lua dis_mipsel.lua bcsave.lua vmdef.lua +FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ + dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ + dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ + dis_mips64.lua dis_mips64el.lua vmdef.lua ifeq (,$(findstring Windows,$(OS))) HOST_SYS:= $(shell uname -s) @@ -119,7 +122,7 @@ install: $(INSTALL_DEP) $(MKDIR) $(INSTALL_DIRS) cd src && $(INSTALL_X) $(FILE_T) $(INSTALL_T) cd src && test -f $(FILE_A) && $(INSTALL_F) $(FILE_A) $(INSTALL_STATIC) || : - $(RM) $(INSTALL_TSYM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) + $(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) cd src && test -f $(FILE_SO) && \ $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \ ( $(LDCONFIG) $(INSTALL_LIB) || : ) && \ @@ -131,12 +134,18 @@ install: $(INSTALL_DEP) $(RM) $(FILE_PC).tmp cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) - $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM) @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" + @echo "" + @echo "Note: the development releases deliberately do NOT install a symlink for luajit" + @echo "You can do this now by running this command (with sudo):" + @echo "" + @echo " $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)" + @echo "" + uninstall: @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ====" - $(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) + $(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) for file in $(FILES_JITLIB); do \ $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \ done @@ -1,5 +1,5 @@ -README for LuaJIT 2.0.5 ------------------------ +README for LuaJIT 2.1.0-beta3 +----------------------------- LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language. diff --git a/doc/contact.html b/doc/contact.html index 5a5a8691..19f7d294 100644 --- a/doc/contact.html +++ b/doc/contact.html @@ -1,8 +1,8 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>Contact</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta charset="utf-8"> <meta name="Copyright" content="Copyright (C) 2005-2022"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> @@ -37,9 +37,13 @@ <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> <a href="status.html">Status</a> diff --git a/doc/ext_buffer.html b/doc/ext_buffer.html new file mode 100644 index 00000000..2a82aa97 --- /dev/null +++ b/doc/ext_buffer.html @@ -0,0 +1,695 @@ +<!DOCTYPE html> +<html> +<head> +<title>String Buffer Library</title> +<meta charset="utf-8"> +<meta name="Copyright" content="Copyright (C) 2005-2022"> +<meta name="Language" content="en"> +<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> +<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +<style type="text/css"> +.lib { + vertical-align: middle; + margin-left: 5px; + padding: 0 5px; + font-size: 60%; + border-radius: 5px; + background: #c5d5ff; + color: #000; +} +</style> +</head> +<body> +<div id="site"> +<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +</div> +<div id="head"> +<h1>String Buffer Library</h1> +</div> +<div id="nav"> +<ul><li> +<a href="luajit.html">LuaJIT</a> +<ul><li> +<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> +</li><li> +<a href="install.html">Installation</a> +</li><li> +<a href="running.html">Running</a> +</li></ul> +</li><li> +<a href="extensions.html">Extensions</a> +<ul><li> +<a href="ext_ffi.html">FFI Library</a> +<ul><li> +<a href="ext_ffi_tutorial.html">FFI Tutorial</a> +</li><li> +<a href="ext_ffi_api.html">ffi.* API</a> +</li><li> +<a href="ext_ffi_semantics.html">FFI Semantics</a> +</li></ul> +</li><li> +<a class="current" href="ext_buffer.html">String Buffers</a> +</li><li> +<a href="ext_jit.html">jit.* Library</a> +</li><li> +<a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> +</li></ul> +</li><li> +<a href="status.html">Status</a> +</li><li> +<a href="faq.html">FAQ</a> +</li><li> +<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +</li></ul> +</div> +<div id="main"> +<p> +The string buffer library allows <b>high-performance manipulation of +string-like data</b>. +</p> +<p> +Unlike Lua strings, which are constants, string buffers are +<b>mutable</b> sequences of 8-bit (binary-transparent) characters. Data +can be stored, formatted and encoded into a string buffer and later +converted, extracted or decoded. +</p> +<p> +The convenient string buffer API simplifies common string manipulation +tasks, that would otherwise require creating many intermediate strings. +String buffers improve performance by eliminating redundant memory +copies, object creation, string interning and garbage collection +overhead. In conjunction with the FFI library, they allow zero-copy +operations. +</p> +<p> +The string buffer library also includes a high-performance +<a href="serialize">serializer</a> for Lua objects. +</p> + +<h2 id="wip" style="color:#ff0000">Work in Progress</h2> +<p> +<b style="color:#ff0000">This library is a work in progress. More +functionality will be added soon.</b> +</p> + +<h2 id="use">Using the String Buffer Library</h2> +<p> +The string buffer library is built into LuaJIT by default, but it's not +loaded by default. Add this to the start of every Lua file that needs +one of its functions: +</p> +<pre class="code"> +local buffer = require("string.buffer") +</pre> +<p> +The convention for the syntax shown on this page is that <tt>buffer</tt> +refers to the buffer library and <tt>buf</tt> refers to an individual +buffer object. +</p> +<p> +Please note the difference between a Lua function call, e.g. +<tt>buffer.new()</tt> (with a dot) and a Lua method call, e.g. +<tt>buf:reset()</tt> (with a colon). +</p> + +<h3 id="buffer_object">Buffer Objects</h3> +<p> +A buffer object is a garbage-collected Lua object. After creation with +<tt>buffer.new()</tt>, it can (and should) be reused for many operations. +When the last reference to a buffer object is gone, it will eventually +be freed by the garbage collector, along with the allocated buffer +space. +</p> +<p> +Buffers operate like a FIFO (first-in first-out) data structure. Data +can be appended (written) to the end of the buffer and consumed (read) +from the front of the buffer. These operations may be freely mixed. +</p> +<p> +The buffer space that holds the characters is managed automatically +— it grows as needed and already consumed space is recycled. Use +<tt>buffer.new(size)</tt> and <tt>buf:free()</tt>, if you need more +control. +</p> +<p> +The maximum size of a single buffer is the same as the maximum size of a +Lua string, which is slightly below two gigabytes. For huge data sizes, +neither strings nor buffers are the right data structure — use the +FFI library to directly map memory or files up to the virtual memory +limit of your OS. +</p> + +<h3 id="buffer_overview">Buffer Method Overview</h3> +<ul> +<li> +The <tt>buf:put*()</tt>-like methods append (write) characters to the +end of the buffer. +</li> +<li> +The <tt>buf:get*()</tt>-like methods consume (read) characters from the +front of the buffer. +</li> +<li> +Other methods, like <tt>buf:tostring()</tt> only read the buffer +contents, but don't change the buffer. +</li> +<li> +The <tt>buf:set()</tt> method allows zero-copy consumption of a string +or an FFI cdata object as a buffer. +</li> +<li> +The FFI-specific methods allow zero-copy read/write-style operations or +modifying the buffer contents in-place. Please check the +<a href="#ffi_caveats">FFI caveats</a> below, too. +</li> +<li> +Methods that don't need to return anything specific, return the buffer +object itself as a convenience. This allows method chaining, e.g.: +<tt>buf:reset():encode(obj)</tt> or <tt>buf:skip(len):get()</tt> +</li> +</ul> + +<h2 id="create">Buffer Creation and Management</h2> + +<h3 id="buffer_new"><tt>local buf = buffer.new([size [,options]])<br> +local buf = buffer.new([options])</tt></h3> +<p> +Creates a new buffer object. +</p> +<p> +The optional <tt>size</tt> argument ensures a minimum initial buffer +size. This is strictly an optimization when the required buffer size is +known beforehand. The buffer space will grow as needed, in any case. +</p> +<p> +The optional table <tt>options</tt> sets various +<a href="#serialize_options">serialization options</a>. +</p> + +<h3 id="buffer_reset"><tt>buf = buf:reset()</tt></h3> +<p> +Reset (empty) the buffer. The allocated buffer space is not freed and +may be reused. +</p> + +<h3 id="buffer_free"><tt>buf = buf:free()</tt></h3> +<p> +The buffer space of the buffer object is freed. The object itself +remains intact, empty and may be reused. +</p> +<p> +Note: you normally don't need to use this method. The garbage collector +automatically frees the buffer space, when the buffer object is +collected. Use this method, if you need to free the associated memory +immediately. +</p> + +<h2 id="write">Buffer Writers</h2> + +<h3 id="buffer_put"><tt>buf = buf:put([str|num|obj] [,…])</tt></h3> +<p> +Appends a string <tt>str</tt>, a number <tt>num</tt> or any object +<tt>obj</tt> with a <tt>__tostring</tt> metamethod to the buffer. +Multiple arguments are appended in the given order. +</p> +<p> +Appending a buffer to a buffer is possible and short-circuited +internally. But it still involves a copy. Better combine the buffer +writes to use a single buffer. +</p> + +<h3 id="buffer_putf"><tt>buf = buf:putf(format, …)</tt></h3> +<p> +Appends the formatted arguments to the buffer. The <tt>format</tt> +string supports the same options as <tt>string.format()</tt>. +</p> + +<h3 id="buffer_putcdata"><tt>buf = buf:putcdata(cdata, len)</tt><span class="lib">FFI</span></h3> +<p> +Appends the given <tt>len</tt> number of bytes from the memory pointed +to by the FFI <tt>cdata</tt> object to the buffer. The object needs to +be convertible to a (constant) pointer. +</p> + +<h3 id="buffer_set"><tt>buf = buf:set(str)<br> +buf = buf:set(cdata, len)</tt><span class="lib">FFI</span></h3> +<p> +This method allows zero-copy consumption of a string or an FFI cdata +object as a buffer. It stores a reference to the passed string +<tt>str</tt> or the FFI <tt>cdata</tt> object in the buffer. Any buffer +space originally allocated is freed. This is <i>not</i> an append +operation, unlike the <tt>buf:put*()</tt> methods. +</p> +<p> +After calling this method, the buffer behaves as if +<tt>buf:free():put(str)</tt> or <tt>buf:free():put(cdata, len)</tt> +had been called. However, the data is only referenced and not copied, as +long as the buffer is only consumed. +</p> +<p> +In case the buffer is written to later on, the referenced data is copied +and the object reference is removed (copy-on-write semantics). +</p> +<p> +The stored reference is an anchor for the garbage collector and keeps the +originally passed string or FFI cdata object alive. +</p> + +<h3 id="buffer_reserve"><tt>ptr, len = buf:reserve(size)</tt><span class="lib">FFI</span><br> +<tt>buf = buf:commit(used)</tt><span class="lib">FFI</span></h3> +<p> +The <tt>reserve</tt> method reserves at least <tt>size</tt> bytes of +write space in the buffer. It returns an <tt>uint8_t *</tt> FFI +cdata pointer <tt>ptr</tt> that points to this space. +</p> +<p> +The available length in bytes is returned in <tt>len</tt>. This is at +least <tt>size</tt> bytes, but may be more to facilitate efficient +buffer growth. You can either make use of the additional space or ignore +<tt>len</tt> and only use <tt>size</tt> bytes. +</p> +<p> +The <tt>commit</tt> method appends the <tt>used</tt> bytes of the +previously returned write space to the buffer data. +</p> +<p> +This pair of methods allows zero-copy use of C read-style APIs: +</p> +<pre class="code"> +local MIN_SIZE = 65536 +repeat + local ptr, len = buf:reserve(MIN_SIZE) + local n = C.read(fd, ptr, len) + if n == 0 then break end -- EOF. + if n < 0 then error("read error") end + buf:commit(n) +until false +</pre> +<p> +The reserved write space is <i>not</i> initialized. At least the +<tt>used</tt> bytes <b>must</b> be written to before calling the +<tt>commit</tt> method. There's no need to call the <tt>commit</tt> +method, if nothing is added to the buffer (e.g. on error). +</p> + +<h2 id="read">Buffer Readers</h2> + +<h3 id="buffer_length"><tt>len = #buf</tt></h3> +<p> +Returns the current length of the buffer data in bytes. +</p> + +<h3 id="buffer_concat"><tt>res = str|num|buf .. str|num|buf […]</tt></h3> +<p> +The Lua concatenation operator <tt>..</tt> also accepts buffers, just +like strings or numbers. It always returns a string and not a buffer. +</p> +<p> +Note that although this is supported for convenience, this thwarts one +of the main reasons to use buffers, which is to avoid string +allocations. Rewrite it with <tt>buf:put()</tt> and <tt>buf:get()</tt>. +</p> +<p> +Mixing this with unrelated objects that have a <tt>__concat</tt> +metamethod may not work, since these probably only expect strings. +</p> + +<h3 id="buffer_skip"><tt>buf = buf:skip(len)</tt></h3> +<p> +Skips (consumes) <tt>len</tt> bytes from the buffer up to the current +length of the buffer data. +</p> + +<h3 id="buffer_get"><tt>str, … = buf:get([len|nil] [,…])</tt></h3> +<p> +Consumes the buffer data and returns one or more strings. If called +without arguments, the whole buffer data is consumed. If called with a +number, up to <tt>len</tt> bytes are consumed. A <tt>nil</tt> argument +consumes the remaining buffer space (this only makes sense as the last +argument). Multiple arguments consume the buffer data in the given +order. +</p> +<p> +Note: a zero length or no remaining buffer data returns an empty string +and not <tt>nil</tt>. +</p> + +<h3 id="buffer_tostring"><tt>str = buf:tostring()<br> +str = tostring(buf)</tt></h3> +<p> +Creates a string from the buffer data, but doesn't consume it. The +buffer remains unchanged. +</p> +<p> +Buffer objects also define a <tt>__tostring</tt> metamethod. This means +buffers can be passed to the global <tt>tostring()</tt> function and +many other functions that accept this in place of strings. The important +internal uses in functions like <tt>io.write()</tt> are short-circuited +to avoid the creation of an intermediate string object. +</p> + +<h3 id="buffer_ref"><tt>ptr, len = buf:ref()</tt><span class="lib">FFI</span></h3> +<p> +Returns an <tt>uint8_t *</tt> FFI cdata pointer <tt>ptr</tt> that +points to the buffer data. The length of the buffer data in bytes is +returned in <tt>len</tt>. +</p> +<p> +The returned pointer can be directly passed to C functions that expect a +buffer and a length. You can also do bytewise reads +(<tt>local x = ptr[i]</tt>) or writes +(<tt>ptr[i] = 0x40</tt>) of the buffer data. +</p> +<p> +In conjunction with the <tt>skip</tt> method, this allows zero-copy use +of C write-style APIs: +</p> +<pre class="code"> +repeat + local ptr, len = buf:ref() + if len == 0 then break end + local n = C.write(fd, ptr, len) + if n < 0 then error("write error") end + buf:skip(n) +until n >= len +</pre> +<p> +Unlike Lua strings, buffer data is <i>not</i> implicitly +zero-terminated. It's not safe to pass <tt>ptr</tt> to C functions that +expect zero-terminated strings. If you're not using <tt>len</tt>, then +you're doing something wrong. +</p> + +<h2 id="serialize">Serialization of Lua Objects</h2> +<p> +The following functions and methods allow <b>high-speed serialization</b> +(encoding) of a Lua object into a string and decoding it back to a Lua +object. This allows convenient storage and transport of <b>structured +data</b>. +</p> +<p> +The encoded data is in an <a href="#serialize_format">internal binary +format</a>. The data can be stored in files, binary-transparent +databases or transmitted to other LuaJIT instances across threads, +processes or networks. +</p> +<p> +Encoding speed can reach up to 1 Gigabyte/second on a modern desktop- or +server-class system, even when serializing many small objects. Decoding +speed is mostly constrained by object creation cost. +</p> +<p> +The serializer handles most Lua types, common FFI number types and +nested structures. Functions, thread objects, other FFI cdata and full +userdata cannot be serialized (yet). +</p> +<p> +The encoder serializes nested structures as trees. Multiple references +to a single object will be stored separately and create distinct objects +after decoding. Circular references cause an error. +</p> + +<h3 id="serialize_methods">Serialization Functions and Methods</h3> + +<h3 id="buffer_encode"><tt>str = buffer.encode(obj)<br> +buf = buf:encode(obj)</tt></h3> +<p> +Serializes (encodes) the Lua object <tt>obj</tt>. The stand-alone +function returns a string <tt>str</tt>. The buffer method appends the +encoding to the buffer. +</p> +<p> +<tt>obj</tt> can be any of the supported Lua types — it doesn't +need to be a Lua table. +</p> +<p> +This function may throw an error when attempting to serialize +unsupported object types, circular references or deeply nested tables. +</p> + +<h3 id="buffer_decode"><tt>obj = buffer.decode(str)<br> +obj = buf:decode()</tt></h3> +<p> +The stand-alone function deserializes (decodes) the string +<tt>str</tt>, the buffer method deserializes one object from the +buffer. Both return a Lua object <tt>obj</tt>. +</p> +<p> +The returned object may be any of the supported Lua types — +even <tt>nil</tt>. +</p> +<p> +This function may throw an error when fed with malformed or incomplete +encoded data. The stand-alone function throws when there's left-over +data after decoding a single top-level object. The buffer method leaves +any left-over data in the buffer. +</p> +<p> +Attempting to deserialize an FFI type will throw an error, if the FFI +library is not built-in or has not been loaded, yet. +</p> + +<h3 id="serialize_options">Serialization Options</h3> +<p> +The <tt>options</tt> table passed to <tt>buffer.new()</tt> may contain +the following members (all optional): +</p> +<ul> +<li> +<tt>dict</tt> is a Lua table holding a <b>dictionary of strings</b> that +commonly occur as table keys of objects you are serializing. These keys +are compactly encoded as indexes during serialization. A well-chosen +dictionary saves space and improves serialization performance. +</li> +<li> +<tt>metatable</tt> is a Lua table holding a <b>dictionary of metatables</b> +for the table objects you are serializing. +</li> +</ul> +<p> +<tt>dict</tt> needs to be an array of strings and <tt>metatable</tt> needs +to be an array of tables. Both starting at index 1 and without holes (no +<tt>nil</tt> in between). The tables are anchored in the buffer object and +internally modified into a two-way index (don't do this yourself, just pass +a plain array). The tables must not be modified after they have been passed +to <tt>buffer.new()</tt>. +</p> +<p> +The <tt>dict</tt> and <tt>metatable</tt> tables used by the encoder and +decoder must be the same. Put the most common entries at the front. Extend +at the end to ensure backwards-compatibility — older encodings can +then still be read. You may also set some indexes to <tt>false</tt> to +explicitly drop backwards-compatibility. Old encodings that use these +indexes will throw an error when decoded. +</p> +<p> +Metatables that are not found in the <tt>metatable</tt> dictionary are +ignored when encoding. Decoding returns a table with a <tt>nil</tt> +metatable. +</p> +<p> +Note: parsing and preparation of the options table is somewhat +expensive. Create a buffer object only once and recycle it for multiple +uses. Avoid mixing encoder and decoder buffers, since the +<tt>buf:set()</tt> method frees the already allocated buffer space: +</p> +<pre class="code"> +local options = { + dict = { "commonly", "used", "string", "keys" }, +} +local buf_enc = buffer.new(options) +local buf_dec = buffer.new(options) + +local function encode(obj) + return buf_enc:reset():encode(obj):get() +end + +local function decode(str) + return buf_dec:set(str):decode() +end +</pre> + +<h3 id="serialize_stream">Streaming Serialization</h3> +<p> +In some contexts, it's desirable to do piecewise serialization of large +datasets, also known as <i>streaming</i>. +</p> +<p> +This serialization format can be safely concatenated and supports streaming. +Multiple encodings can simply be appended to a buffer and later decoded +individually: +</p> +<pre class="code"> +local buf = buffer.new() +buf:encode(obj1) +buf:encode(obj2) +local copy1 = buf:decode() +local copy2 = buf:decode() +</pre> +<p> +Here's how to iterate over a stream: +</p> +<pre class="code"> +while #buf ~= 0 do + local obj = buf:decode() + -- Do something with obj. +end +</pre> +<p> +Since the serialization format doesn't prepend a length to its encoding, +network applications may need to transmit the length, too. +</p> + +<h3 id="serialize_format">Serialization Format Specification</h3> +<p> +This serialization format is designed for <b>internal use</b> by LuaJIT +applications. Serialized data is upwards-compatible and portable across +all supported LuaJIT platforms. +</p> +<p> +It's an <b>8-bit binary format</b> and not human-readable. It uses e.g. +embedded zeroes and stores embedded Lua string objects unmodified, which +are 8-bit-clean, too. Encoded data can be safely concatenated for +streaming and later decoded one top-level object at a time. +</p> +<p> +The encoding is reasonably compact, but tuned for maximum performance, +not for minimum space usage. It compresses well with any of the common +byte-oriented data compression algorithms. +</p> +<p> +Although documented here for reference, this format is explicitly +<b>not</b> intended to be a 'public standard' for structured data +interchange across computer languages (like JSON or MessagePack). Please +do not use it as such. +</p> +<p> +The specification is given below as a context-free grammar with a +top-level <tt>object</tt> as the starting point. Alternatives are +separated by the <tt>|</tt> symbol and <tt>*</tt> indicates repeats. +Grouping is implicit or indicated by <tt>{…}</tt>. Terminals are +either plain hex numbers, encoded as bytes, or have a <tt>.format</tt> +suffix. +</p> +<pre> +object → nil | false | true + | null | lightud32 | lightud64 + | int | num | tab | tab_mt + | int64 | uint64 | complex + | string + +nil → 0x00 +false → 0x01 +true → 0x02 + +null → 0x03 // NULL lightuserdata +lightud32 → 0x04 data.I // 32 bit lightuserdata +lightud64 → 0x05 data.L // 64 bit lightuserdata + +int → 0x06 int.I // int32_t +num → 0x07 double.L + +tab → 0x08 // Empty table + | 0x09 h.U h*{object object} // Key/value hash + | 0x0a a.U a*object // 0-based array + | 0x0b a.U a*object h.U h*{object object} // Mixed + | 0x0c a.U (a-1)*object // 1-based array + | 0x0d a.U (a-1)*object h.U h*{object object} // Mixed +tab_mt → 0x0e (index-1).U tab // Metatable dict entry + +int64 → 0x10 int.L // FFI int64_t +uint64 → 0x11 uint.L // FFI uint64_t +complex → 0x12 re.L im.L // FFI complex + +string → (0x20+len).U len*char.B + | 0x0f (index-1).U // String dict entry + +.B = 8 bit +.I = 32 bit little-endian +.L = 64 bit little-endian +.U = prefix-encoded 32 bit unsigned number n: + 0x00..0xdf → n.B + 0xe0..0x1fdf → (0xe0|(((n-0xe0)>>8)&0x1f)).B ((n-0xe0)&0xff).B + 0x1fe0.. → 0xff n.I +</pre> + +<h2 id="error">Error handling</h2> +<p> +Many of the buffer methods can throw an error. Out-of-memory or usage +errors are best caught with an outer wrapper for larger parts of code. +There's not much one can do after that, anyway. +</p> +<p> +OTOH, you may want to catch some errors individually. Buffer methods need +to receive the buffer object as the first argument. The Lua colon-syntax +<tt>obj:method()</tt> does that implicitly. But to wrap a method with +<tt>pcall()</tt>, the arguments need to be passed like this: +</p> +<pre class="code"> +local ok, err = pcall(buf.encode, buf, obj) +if not ok then + -- Handle error in err. +end +</pre> + +<h2 id="ffi_caveats">FFI caveats</h2> +<p> +The string buffer library has been designed to work well together with +the FFI library. But due to the low-level nature of the FFI library, +some care needs to be taken: +</p> +<p> +First, please remember that FFI pointers are zero-indexed. The space +returned by <tt>buf:reserve()</tt> and <tt>buf:ref()</tt> starts at the +returned pointer and ends before <tt>len</tt> bytes after that. +</p> +<p> +I.e. the first valid index is <tt>ptr[0]</tt> and the last valid index +is <tt>ptr[len-1]</tt>. If the returned length is zero, there's no valid +index at all. The returned pointer may even be <tt>NULL</tt>. +</p> +<p> +The space pointed to by the returned pointer is only valid as long as +the buffer is not modified in any way (neither append, nor consume, nor +reset, etc.). The pointer is also not a GC anchor for the buffer object +itself. +</p> +<p> +Buffer data is only guaranteed to be byte-aligned. Casting the returned +pointer to a data type with higher alignment may cause unaligned +accesses. It depends on the CPU architecture whether this is allowed or +not (it's always OK on x86/x64 and mostly OK on other modern +architectures). +</p> +<p> +FFI pointers or references do not count as GC anchors for an underlying +object. E.g. an <tt>array</tt> allocated with <tt>ffi.new()</tt> is +anchored by <tt>buf:set(array, len)</tt>, but not by +<tt>buf:set(array+offset, len)</tt>. The addition of the offset +creates a new pointer, even when the offset is zero. In this case, you +need to make sure there's still a reference to the original array as +long as its contents are in use by the buffer. +</p> +<p> +Even though each LuaJIT VM instance is single-threaded (but you can +create multiple VMs), FFI data structures can be accessed concurrently. +Be careful when reading/writing FFI cdata from/to buffers to avoid +concurrent accesses or modifications. In particular, the memory +referenced by <tt>buf:set(cdata, len)</tt> must not be modified +while buffer readers are working on it. Shared, but read-only memory +mappings of files are OK, but only if the file does not change. +</p> +<br class="flush"> +</div> +<div id="foot"> +<hr class="hide"> +Copyright © 2005-2022 +<span class="noprint"> +· +<a href="contact.html">Contact</a> +</span> +</div> +</body> +</html> diff --git a/doc/ext_c_api.html b/doc/ext_c_api.html index 6b5e06d3..151d20b5 100644 --- a/doc/ext_c_api.html +++ b/doc/ext_c_api.html @@ -1,8 +1,8 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>Lua/C API Extensions</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta charset="utf-8"> <meta name="Copyright" content="Copyright (C) 2005-2022"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> @@ -37,9 +37,13 @@ <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a class="current" href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> <a href="status.html">Status</a> diff --git a/doc/ext_ffi.html b/doc/ext_ffi.html index 82ee9425..5f1e2d7c 100644 --- a/doc/ext_ffi.html +++ b/doc/ext_ffi.html @@ -1,8 +1,8 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>FFI Library</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta charset="utf-8"> <meta name="Copyright" content="Copyright (C) 2005-2022"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> @@ -37,9 +37,13 @@ <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> <a href="status.html">Status</a> diff --git a/doc/ext_ffi_api.html b/doc/ext_ffi_api.html index 2e0840df..89ddb0d9 100644 --- a/doc/ext_ffi_api.html +++ b/doc/ext_ffi_api.html @@ -1,8 +1,8 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>ffi.* API Functions</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta charset="utf-8"> <meta name="Copyright" content="Copyright (C) 2005-2022"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> @@ -42,9 +42,13 @@ td.abiparam { font-weight: bold; width: 6em; } <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> <a href="status.html">Status</a> @@ -458,6 +462,10 @@ otherwise. The following parameters are currently defined: <td class="abiparam">eabi</td><td class="abidesc">EABI variant of the standard ABI</td></tr> <tr class="odd"> <td class="abiparam">win</td><td class="abidesc">Windows variant of the standard ABI</td></tr> +<tr class="even"> +<td class="abiparam">uwp</td><td class="abidesc">Universal Windows Platform</td></tr> +<tr class="odd"> +<td class="abiparam">gc64</td><td class="abidesc">64 bit GC references</td></tr> </table> <h3 id="ffi_os"><tt>ffi.os</tt></h3> @@ -534,8 +542,8 @@ corresponding ctype. The parser for Lua source code treats numeric literals with the suffixes <tt>LL</tt> or <tt>ULL</tt> as signed or unsigned 64 bit integers. Case doesn't matter, but uppercase is recommended for -readability. It handles both decimal (<tt>42LL</tt>) and hexadecimal -(<tt>0x2aLL</tt>) literals. +readability. It handles decimal (<tt>42LL</tt>), hexadecimal +(<tt>0x2aLL</tt>) and binary (<tt>0b101010LL</tt>) literals. </p> <p> The imaginary part of complex numbers can be specified by suffixing diff --git a/doc/ext_ffi_semantics.html b/doc/ext_ffi_semantics.html index e3491696..603f9950 100644 --- a/doc/ext_ffi_semantics.html +++ b/doc/ext_ffi_semantics.html @@ -1,8 +1,8 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>FFI Semantics</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta charset="utf-8"> <meta name="Copyright" content="Copyright (C) 2005-2022"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> @@ -42,9 +42,13 @@ td.convop { font-style: italic; width: 40%; } <a class="current" href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> <a href="status.html">Status</a> @@ -175,6 +179,8 @@ a <tt>typedef</tt>, except re-declarations will be ignored): <tt>uint16_t</tt>, <tt>uint32_t</tt>, <tt>uint64_t</tt>, <tt>intptr_t</tt>, <tt>uintptr_t</tt>.</li> +<li>From <tt><unistd.h></tt> (POSIX): <tt>ssize_t</tt>.</li> + </ul> <p> You're encouraged to use these types in preference to @@ -722,6 +728,22 @@ You'll have to explicitly convert a 64 bit integer to a Lua number (e.g. for regular floating-point calculations) with <tt>tonumber()</tt>. But note this may incur a precision loss.</li> +<li><b>64 bit bitwise operations</b>: the rules for 64 bit +arithmetic operators apply analogously.<br> + +Unlike the other <tt>bit.*</tt> operations, <tt>bit.tobit()</tt> +converts a cdata number via <tt>int64_t</tt> to <tt>int32_t</tt> and +returns a Lua number.<br> + +For <tt>bit.band()</tt>, <tt>bit.bor()</tt> and <tt>bit.bxor()</tt>, the +conversion to <tt>int64_t</tt> or <tt>uint64_t</tt> applies to +<em>all</em> arguments, if <em>any</em> argument is a cdata number.<br> + +For all other operations, only the first argument is used to determine +the output type. This implies that a cdata number as a shift count for +shifts and rotates is accepted, but that alone does <em>not</em> cause +a cdata number output. + </ul> <h3 id="cdata_comp">Comparisons of cdata objects</h3> @@ -1193,14 +1215,12 @@ The following operations are currently not compiled and may exhibit suboptimal performance, especially when used in inner loops: </p> <ul> -<li>Bitfield accesses and initializations.</li> <li>Vector operations.</li> <li>Table initializers.</li> <li>Initialization of nested <tt>struct</tt>/<tt>union</tt> types.</li> -<li>Allocations of variable-length arrays or structs.</li> -<li>Allocations of C types with a size > 128 bytes or an -alignment > 8 bytes.</li> -<li>Conversions from lightuserdata to <tt>void *</tt>.</li> +<li>Non-default initialization of VLA/VLS or large C types +(> 128 bytes or > 16 array elements).</li> +<li>Bitfield initializations.</li> <li>Pointer differences for element sizes that are not a power of two.</li> <li>Calls to C functions with aggregates passed or returned by @@ -1216,7 +1236,6 @@ value.</li> Other missing features: </p> <ul> -<li>Bit operations for 64 bit types.</li> <li>Arithmetic for <tt>complex</tt> numbers.</li> <li>Passing structs by value to vararg C functions.</li> <li><a href="extensions.html#exceptions">C++ exception interoperability</a> diff --git a/doc/ext_ffi_tutorial.html b/doc/ext_ffi_tutorial.html index 18b78d8b..ff0c3a9a 100644 --- a/doc/ext_ffi_tutorial.html +++ b/doc/ext_ffi_tutorial.html @@ -1,8 +1,8 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>FFI Tutorial</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta charset="utf-8"> <meta name="Copyright" content="Copyright (C) 2005-2022"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> @@ -44,9 +44,13 @@ td.idiomlua b { font-weight: normal; color: #2142bf; } <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> <a href="status.html">Status</a> diff --git a/doc/ext_jit.html b/doc/ext_jit.html index 7778e618..3ff5c05e 100644 --- a/doc/ext_jit.html +++ b/doc/ext_jit.html @@ -1,8 +1,8 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>jit.* Library</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta charset="utf-8"> <meta name="Copyright" content="Copyright (C) 2005-2022"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> @@ -37,9 +37,13 @@ <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a class="current" href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> <a href="status.html">Status</a> @@ -143,7 +147,7 @@ Contains the target OS name: <h3 id="jit_arch"><tt>jit.arch</tt></h3> <p> Contains the target architecture name: -"x86", "x64", "arm", "ppc", "ppcspe", or "mips". +"x86", "x64", "arm", "arm64", "arm64be", "ppc", "mips", "mipsel", "mips64", "mips64el", "mips64r6", "mips64r6el". </p> <h2 id="jit_opt"><tt>jit.opt.*</tt> — JIT compiler optimization control</h2> diff --git a/doc/ext_profiler.html b/doc/ext_profiler.html new file mode 100644 index 00000000..d6e26efd --- /dev/null +++ b/doc/ext_profiler.html @@ -0,0 +1,359 @@ +<!DOCTYPE html> +<html> +<head> +<title>Profiler</title> +<meta charset="utf-8"> +<meta name="Copyright" content="Copyright (C) 2005-2022"> +<meta name="Language" content="en"> +<link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> +<link rel="stylesheet" type="text/css" href="bluequad-print.css" media="print"> +</head> +<body> +<div id="site"> +<a href="https://luajit.org"><span>Lua<span id="logo">JIT</span></span></a> +</div> +<div id="head"> +<h1>Profiler</h1> +</div> +<div id="nav"> +<ul><li> +<a href="luajit.html">LuaJIT</a> +<ul><li> +<a href="https://luajit.org/download.html">Download <span class="ext">»</span></a> +</li><li> +<a href="install.html">Installation</a> +</li><li> +<a href="running.html">Running</a> +</li></ul> +</li><li> +<a href="extensions.html">Extensions</a> +<ul><li> +<a href="ext_ffi.html">FFI Library</a> +<ul><li> +<a href="ext_ffi_tutorial.html">FFI Tutorial</a> +</li><li> +<a href="ext_ffi_api.html">ffi.* API</a> +</li><li> +<a href="ext_ffi_semantics.html">FFI Semantics</a> +</li></ul> +</li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> +<a href="ext_jit.html">jit.* Library</a> +</li><li> +<a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a class="current" href="ext_profiler.html">Profiler</a> +</li></ul> +</li><li> +<a href="status.html">Status</a> +</li><li> +<a href="faq.html">FAQ</a> +</li><li> +<a href="https://luajit.org/list.html">Mailing List <span class="ext">»</span></a> +</li></ul> +</div> +<div id="main"> +<p> +LuaJIT has an integrated statistical profiler with very low overhead. It +allows sampling the currently executing stack and other parameters in +regular intervals. +</p> +<p> +The integrated profiler can be accessed from three levels: +</p> +<ul> +<li>The <a href="#hl_profiler">bundled high-level profiler</a>, invoked by the +<a href="#j_p"><tt>-jp</tt></a> command line option.</li> +<li>A <a href="#ll_lua_api">low-level Lua API</a> to control the profiler.</li> +<li>A <a href="#ll_c_api">low-level C API</a> to control the profiler.</li> +</ul> + +<h2 id="hl_profiler">High-Level Profiler</h2> +<p> +The bundled high-level profiler offers basic profiling functionality. It +generates simple textual summaries or source code annotations. It can be +accessed with the <a href="#j_p"><tt>-jp</tt></a> command line option +or from Lua code by loading the underlying <tt>jit.p</tt> module. +</p> +<p> +To cut to the chase — run this to get a CPU usage profile by +function name: +</p> +<pre class="code"> +luajit -jp myapp.lua +</pre> +<p> +It's <em>not</em> a stated goal of the bundled profiler to add every +possible option or to cater for special profiling needs. The low-level +profiler APIs are documented below. They may be used by third-party +authors to implement advanced functionality, e.g. IDE integration or +graphical profilers. +</p> +<p> +Note: Sampling works for both interpreted and JIT-compiled code. The +results for JIT-compiled code may sometimes be surprising. LuaJIT +heavily optimizes and inlines Lua code — there's no simple +one-to-one correspondence between source code lines and the sampled +machine code. +</p> + +<h3 id="j_p"><tt>-jp=[options[,output]]</tt></h3> +<p> +The <tt>-jp</tt> command line option starts the high-level profiler. +When the application run by the command line terminates, the profiler +stops and writes the results to <tt>stdout</tt> or to the specified +<tt>output</tt> file. +</p> +<p> +The <tt>options</tt> argument specifies how the profiling is to be +performed: +</p> +<ul> +<li><tt>f</tt> — Stack dump: function name, otherwise module:line. +This is the default mode.</li> +<li><tt>F</tt> — Stack dump: ditto, but dump module:name.</li> +<li><tt>l</tt> — Stack dump: module:line.</li> +<li><tt><number></tt> — stack dump depth (callee ← +caller). Default: 1.</li> +<li><tt>-<number></tt> — Inverse stack dump depth (caller +→ callee).</li> +<li><tt>s</tt> — Split stack dump after first stack level. Implies +depth ≥ 2 or depth ≤ -2.</li> +<li><tt>p</tt> — Show full path for module names.</li> +<li><tt>v</tt> — Show VM states.</li> +<li><tt>z</tt> — Show <a href="#jit_zone">zones</a>.</li> +<li><tt>r</tt> — Show raw sample counts. Default: show percentages.</li> +<li><tt>a</tt> — Annotate excerpts from source code files.</li> +<li><tt>A</tt> — Annotate complete source code files.</li> +<li><tt>G</tt> — Produce raw output suitable for graphical tools.</li> +<li><tt>m<number></tt> — Minimum sample percentage to be shown. +Default: 3%.</li> +<li><tt>i<number></tt> — Sampling interval in milliseconds. +Default: 10ms.<br> +Note: The actual sampling precision is OS-dependent.</li> +</ul> +<p> +The default output for <tt>-jp</tt> is a list of the most CPU consuming +spots in the application. Increasing the stack dump depth with (say) +<tt>-jp=2</tt> may help to point out the main callers or callees of +hotspots. But sample aggregation is still flat per unique stack dump. +</p> +<p> +To get a two-level view (split view) of callers/callees, use +<tt>-jp=s</tt> or <tt>-jp=-s</tt>. The percentages shown for the second +level are relative to the first level. +</p> +<p> +To see how much time is spent in each line relative to a function, use +<tt>-jp=fl</tt>. +</p> +<p> +To see how much time is spent in different VM states or +<a href="#jit_zone">zones</a>, use <tt>-jp=v</tt> or <tt>-jp=z</tt>. +</p> +<p> +Combinations of <tt>v/z</tt> with <tt>f/F/l</tt> produce two-level +views, e.g. <tt>-jp=vf</tt> or <tt>-jp=fv</tt>. This shows the time +spent in a VM state or zone vs. hotspots. This can be used to answer +questions like "Which time-consuming functions are only interpreted?" or +"What's the garbage collector overhead for a specific function?". +</p> +<p> +Multiple options can be combined — but not all combinations make +sense, see above. E.g. <tt>-jp=3si4m1</tt> samples three stack levels +deep in 4ms intervals and shows a split view of the CPU consuming +functions and their callers with a 1% threshold. +</p> +<p> +Source code annotations produced by <tt>-jp=a</tt> or <tt>-jp=A</tt> are +always flat and at the line level. Obviously, the source code files need +to be readable by the profiler script. +</p> +<p> +The high-level profiler can also be started and stopped from Lua code with: +</p> +<pre class="code"> +require("jit.p").start(options, output) +... +require("jit.p").stop() +</pre> + +<h3 id="jit_zone"><tt>jit.zone</tt> — Zones</h3> +<p> +Zones can be used to provide information about different parts of an +application to the high-level profiler. E.g. a game could make use of an +<tt>"AI"</tt> zone, a <tt>"PHYS"</tt> zone, etc. Zones are hierarchical, +organized as a stack. +</p> +<p> +The <tt>jit.zone</tt> module needs to be loaded explicitly: +</p> +<pre class="code"> +local zone = require("jit.zone") +</pre> +<ul> +<li><tt>zone("name")</tt> pushes a named zone to the zone stack.</li> +<li><tt>zone()</tt> pops the current zone from the zone stack and +returns its name.</li> +<li><tt>zone:get()</tt> returns the current zone name or <tt>nil</tt>.</li> +<li><tt>zone:flush()</tt> flushes the zone stack.</li> +</ul> +<p> +To show the time spent in each zone use <tt>-jp=z</tt>. To show the time +spent relative to hotspots use e.g. <tt>-jp=zf</tt> or <tt>-jp=fz</tt>. +</p> + +<h2 id="ll_lua_api">Low-level Lua API</h2> +<p> +The <tt>jit.profile</tt> module gives access to the low-level API of the +profiler from Lua code. This module needs to be loaded explicitly: +<pre class="code"> +local profile = require("jit.profile") +</pre> +<p> +This module can be used to implement your own higher-level profiler. +A typical profiling run starts the profiler, captures stack dumps in +the profiler callback, adds them to a hash table to aggregate the number +of samples, stops the profiler and then analyzes all captured +stack dumps. Other parameters can be sampled in the profiler callback, +too. But it's important not to spend too much time in the callback, +since this may skew the statistics. +</p> + +<h3 id="profile_start"><tt>profile.start(mode, cb)</tt> +— Start profiler</h3> +<p> +This function starts the profiler. The <tt>mode</tt> argument is a +string holding options: +</p> +<ul> +<li><tt>f</tt> — Profile with precision down to the function level.</li> +<li><tt>l</tt> — Profile with precision down to the line level.</li> +<li><tt>i<number></tt> — Sampling interval in milliseconds (default +10ms).</br> +Note: The actual sampling precision is OS-dependent. +</li> +</ul> +<p> +The <tt>cb</tt> argument is a callback function which is called with +three arguments: <tt>(thread, samples, vmstate)</tt>. The callback is +called on a separate coroutine, the <tt>thread</tt> argument is the +state that holds the stack to sample for profiling. Note: do +<em>not</em> modify the stack of that state or call functions on it. +</p> +<p> +<tt>samples</tt> gives the number of accumulated samples since the last +callback (usually 1). +</p> +<p> +<tt>vmstate</tt> holds the VM state at the time the profiling timer +triggered. This may or may not correspond to the state of the VM when +the profiling callback is called. The state is either <tt>'N'</tt> +native (compiled) code, <tt>'I'</tt> interpreted code, <tt>'C'</tt> +C code, <tt>'G'</tt> the garbage collector, or <tt>'J'</tt> the JIT +compiler. +</p> + +<h3 id="profile_stop"><tt>profile.stop()</tt> +— Stop profiler</h3> +<p> +This function stops the profiler. +</p> + +<h3 id="profile_dump"><tt>dump = profile.dumpstack([thread,] fmt, depth)</tt> +— Dump stack </h3> +<p> +This function allows taking stack dumps in an efficient manner. It +returns a string with a stack dump for the <tt>thread</tt> (coroutine), +formatted according to the <tt>fmt</tt> argument: +</p> +<ul> +<li><tt>p</tt> — Preserve the full path for module names. Otherwise, +only the file name is used.</li> +<li><tt>f</tt> — Dump the function name if it can be derived. Otherwise, +use module:line.</li> +<li><tt>F</tt> — Ditto, but dump module:name.</li> +<li><tt>l</tt> — Dump module:line.</li> +<li><tt>Z</tt> — Zap the following characters for the last dumped +frame.</li> +<li>All other characters are added verbatim to the output string.</li> +</ul> +<p> +The <tt>depth</tt> argument gives the number of frames to dump, starting +at the topmost frame of the thread. A negative number dumps the frames in +inverse order. +</p> +<p> +The first example prints a list of the current module names and line +numbers of up to 10 frames in separate lines. The second example prints +semicolon-separated function names for all frames (up to 100) in inverse +order: +</p> +<pre class="code"> +print(profile.dumpstack(thread, "l\n", 10)) +print(profile.dumpstack(thread, "lZ;", -100)) +</pre> + +<h2 id="ll_c_api">Low-level C API</h2> +<p> +The profiler can be controlled directly from C code, e.g. for +use by IDEs. The declarations are in <tt>"luajit.h"</tt> (see +<a href="ext_c_api.html">Lua/C API</a> extensions). +</p> + +<h3 id="luaJIT_profile_start"><tt>luaJIT_profile_start(L, mode, cb, data)</tt> +— Start profiler</h3> +<p> +This function starts the profiler. <a href="#profile_start">See +above</a> for a description of the <tt>mode</tt> argument. +</p> +<p> +The <tt>cb</tt> argument is a callback function with the following +declaration: +</p> +<pre class="code"> +typedef void (*luaJIT_profile_callback)(void *data, lua_State *L, + int samples, int vmstate); +</pre> +<p> +<tt>data</tt> is available for use by the callback. <tt>L</tt> is the +state that holds the stack to sample for profiling. Note: do +<em>not</em> modify this stack or call functions on this stack — +use a separate coroutine for this purpose. <a href="#profile_start">See +above</a> for a description of <tt>samples</tt> and <tt>vmstate</tt>. +</p> + +<h3 id="luaJIT_profile_stop"><tt>luaJIT_profile_stop(L)</tt> +— Stop profiler</h3> +<p> +This function stops the profiler. +</p> + +<h3 id="luaJIT_profile_dumpstack"><tt>p = luaJIT_profile_dumpstack(L, fmt, depth, len)</tt> +— Dump stack </h3> +<p> +This function allows taking stack dumps in an efficient manner. +<a href="#profile_dump">See above</a> for a description of <tt>fmt</tt> +and <tt>depth</tt>. +</p> +<p> +This function returns a <tt>const char *</tt> pointing to a +private string buffer of the profiler. The <tt>int *len</tt> +argument returns the length of the output string. The buffer is +overwritten on the next call and deallocated when the profiler stops. +You either need to consume the content immediately or copy it for later +use. +</p> +<br class="flush"> +</div> +<div id="foot"> +<hr class="hide"> +Copyright © 2005-2022 +<span class="noprint"> +· +<a href="contact.html">Contact</a> +</span> +</div> +</body> +</html> diff --git a/doc/extensions.html b/doc/extensions.html index 19ec9907..f006a6db 100644 --- a/doc/extensions.html +++ b/doc/extensions.html @@ -1,8 +1,8 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>Extensions</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta charset="utf-8"> <meta name="Copyright" content="Copyright (C) 2005-2022"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> @@ -54,9 +54,13 @@ td.excinterop { <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> <a href="status.html">Status</a> @@ -105,6 +109,9 @@ bit.lshift bit.rshift bit.arshift bit.rol bit.ror bit.bswap This module is a LuaJIT built-in — you don't need to download or install Lua BitOp. The Lua BitOp site has full documentation for all <a href="https://bitop.luajit.org/api.html"><span class="ext">»</span> Lua BitOp API functions</a>. +The FFI adds support for +<a href="ext_ffi_semantics.html#cdata_arith">64 bit bitwise operations</a>, +using the same API functions. </p> <p> Please make sure to <tt>require</tt> the module before using any of @@ -138,6 +145,11 @@ LuaJIT adds some <a href="ext_c_api.html">extra functions to the Lua/C API</a>. </p> +<h3 id="profiler">Profiler</h3> +<p> +LuaJIT has an <a href="ext_profiler.html">integrated profiler</a>. +</p> + <h2 id="library">Enhanced Standard Library Functions</h2> <h3 id="xpcall"><tt>xpcall(f, err [,args...])</tt> passes arguments</h3> @@ -165,7 +177,7 @@ in <tt>"-inf"</tt>. <h3 id="tonumber"><tt>tonumber()</tt> etc. use builtin string to number conversion</h3> <p> All string-to-number conversions consistently convert integer and -floating-point inputs in decimal and hexadecimal on all platforms. +floating-point inputs in decimal, hexadecimal and binary on all platforms. <tt>strtod()</tt> is <em>not</em> used anymore, which avoids numerous problems with poor C library implementations. The builtin conversion function provides full precision according to the IEEE-754 standard, it @@ -189,6 +201,36 @@ for dot releases (x.y.0 → x.y.1), but may change with major or minor releases (2.0 → 2.1) or between any beta release. Foreign bytecode (e.g. from Lua 5.1) is incompatible and cannot be loaded. </p> +<p> +Note: <tt>LJ_GC64</tt> mode requires a different frame layout, which implies +a different, incompatible bytecode format for all 64 bit ports. This may be +rectified in the future. +</p> + +<h3 id="table_new"><tt>table.new(narray, nhash)</tt> allocates a pre-sized table</h3> +<p> +An extra library function <tt>table.new()</tt> can be made available via +<tt>require("table.new")</tt>. This creates a pre-sized table, just like +the C API equivalent <tt>lua_createtable()</tt>. This is useful for big +tables if the final table size is known and automatic table resizing is +too expensive. +</p> + +<h3 id="table_clear"><tt>table.clear(tab)</tt> clears a table</h3> +<p> +An extra library function <tt>table.clear()</tt> can be made available +via <tt>require("table.clear")</tt>. This clears all keys and values +from a table, but preserves the allocated array/hash sizes. This is +useful when a table, which is linked from multiple places, needs to be +cleared and/or when recycling a table for use by the same context. This +avoids managing backlinks, saves an allocation and the overhead of +incremental array/hash part growth. +</p> +<p> +Please note, this function is meant for very specific situations. In most +cases it's better to replace the (usually single) link with a new table +and let the GC do its work. +</p> <h3 id="math_random">Enhanced PRNG for <tt>math.random()</tt></h3> <p> @@ -267,6 +309,26 @@ indexes for varargs.</li> <li><tt>debug.getupvalue()</tt> and <tt>debug.setupvalue()</tt> handle C functions.</li> <li><tt>debug.upvalueid()</tt> and <tt>debug.upvaluejoin()</tt>.</li> +<li>Lua/C API extensions: +<tt>lua_version()</tt> +<tt>lua_upvalueid()</tt> +<tt>lua_upvaluejoin()</tt> +<tt>lua_loadx()</tt> +<tt>lua_copy()</tt> +<tt>lua_tonumberx()</tt> +<tt>lua_tointegerx()</tt> +<tt>luaL_fileresult()</tt> +<tt>luaL_execresult()</tt> +<tt>luaL_loadfilex()</tt> +<tt>luaL_loadbufferx()</tt> +<tt>luaL_traceback()</tt> +<tt>luaL_setfuncs()</tt> +<tt>luaL_pushmodule()</tt> +<tt>luaL_newlibtable()</tt> +<tt>luaL_newlib()</tt> +<tt>luaL_testudata()</tt> +<tt>luaL_setmetatable()</tt> +</li> <li>Command line option <tt>-E</tt>.</li> <li>Command line checks <tt>__tostring</tt> for errors.</li> </ul> @@ -292,6 +354,8 @@ exit status.</li> <li><tt>debug.setmetatable()</tt> returns object.</li> <li><tt>debug.getuservalue()</tt> and <tt>debug.setuservalue()</tt>.</li> <li>Remove <tt>math.mod()</tt>, <tt>string.gfind()</tt>.</li> +<li><tt>package.searchers</tt>.</li> +<li><tt>module()</tt> returns the module table.</li> </ul> <p> Note: this provides only partial compatibility with Lua 5.2 at the @@ -300,6 +364,21 @@ Lua 5.1, which prevents implementing features that would otherwise break the Lua/C API and ABI (e.g. <tt>_ENV</tt>). </p> +<h2 id="lua53">Extensions from Lua 5.3</h2> +<p> +LuaJIT supports some extensions from Lua 5.3: +<ul> +<li>Unicode escape <tt>'\u{XX...}'</tt> embeds the UTF-8 encoding in string literals.</li> +<li>The argument table <tt>arg</tt> can be read (and modified) by <tt>LUA_INIT</tt> and <tt>-e</tt> chunks.</li> +<li><tt>io.read()</tt> and <tt>file:read()</tt> accept formats with or without a leading <tt>*</tt>.</li> +<li><tt>assert()</tt> accepts any type of error object.</li> +<li><tt>table.move(a1, f, e, t [,a2])</tt>.</li> +<li><tt>coroutine.isyieldable()</tt>.</li> +<li>Lua/C API extensions: +<tt>lua_isyieldable()</tt> +</li> +</ul> + <h2 id="exceptions">C++ Exception Interoperability</h2> <p> LuaJIT has built-in support for interoperating with C++ exceptions. @@ -313,26 +392,21 @@ the toolchain used to compile LuaJIT: <td class="excinterop">Interoperability</td> </tr> <tr class="odd separate"> -<td class="excplatform">POSIX/x64, DWARF2 unwinding</td> -<td class="exccompiler">GCC 4.3+</td> +<td class="excplatform">External frame unwinding</td> +<td class="exccompiler">GCC, Clang, MSVC</td> <td class="excinterop"><b style="color: #00a000;">Full</b></td> </tr> <tr class="even"> -<td class="excplatform">Other platforms, DWARF2 unwinding</td> -<td class="exccompiler">GCC</td> +<td class="excplatform">Internal frame unwinding + DWARF2</td> +<td class="exccompiler">GCC, Clang</td> <td class="excinterop"><b style="color: #c06000;">Limited</b></td> </tr> <tr class="odd"> -<td class="excplatform">Windows/x64</td> -<td class="exccompiler">MSVC</td> -<td class="excinterop"><b style="color: #00a000;">Full</b></td> +<td class="excplatform">Windows 64 bit</td> +<td class="exccompiler">non-MSVC</td> +<td class="excinterop"><b style="color: #c06000;">Limited</b></td> </tr> <tr class="even"> -<td class="excplatform">Windows/x86</td> -<td class="exccompiler">Any</td> -<td class="excinterop"><b style="color: #a00000;">No</b></td> -</tr> -<tr class="odd"> <td class="excplatform">Other platforms</td> <td class="exccompiler">Other compilers</td> <td class="excinterop"><b style="color: #a00000;">No</b></td> @@ -383,14 +457,6 @@ C++ destructors.</li> <li>Lua errors <b>cannot</b> be caught on the C++ side.</li> <li>Throwing Lua errors across C++ frames will <b>not</b> call C++ destructors.</li> -<li>Additionally, on Windows/x86 with SEH-based C++ exceptions: -it's <b>not</b> safe to throw a Lua error across any frames containing -a C++ function with any try/catch construct or using variables with -(implicit) destructors. This also applies to any functions which may be -inlined in such a function. It doesn't matter whether <tt>lua_error()</tt> -is called inside or outside of a try/catch or whether any object actually -needs to be destroyed: the SEH chain is corrupted and this will eventually -lead to the termination of the process.</li> </ul> <br class="flush"> </div> diff --git a/doc/faq.html b/doc/faq.html index 5427a5c8..c07fd248 100644 --- a/doc/faq.html +++ b/doc/faq.html @@ -1,8 +1,8 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>Frequently Asked Questions (FAQ)</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta charset="utf-8"> <meta name="Copyright" content="Copyright (C) 2005-2022"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> @@ -40,9 +40,13 @@ dd { margin-left: 1.5em; } <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> <a href="status.html">Status</a> diff --git a/doc/install.html b/doc/install.html index 8bf05d99..d78dda3e 100644 --- a/doc/install.html +++ b/doc/install.html @@ -1,8 +1,8 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>Installation</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta charset="utf-8"> <meta name="Copyright" content="Copyright (C) 2005-2022"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> @@ -60,9 +60,13 @@ td.compatx { <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> <a href="status.html">Status</a> @@ -116,7 +120,7 @@ LuaJIT currently builds out-of-the box on most systems: <td class="compatx">v2.0 –</td> </tr> <tr class="even"> -<td class="compatname"><a href="#osx">macOS (OSX)</a></td> +<td class="compatname"><a href="#posix">macOS (OSX)</a></td> <td class="compatver">10.4</td> <td class="compatx"> </td> <td class="compatx">v2.1 –</td> @@ -302,6 +306,13 @@ MSVC (Visual Studio).</li> Please read the instructions given in these files, before changing any settings. </p> +<p> +All LuaJIT 64 bit ports use 64 bit GC objects by default (<tt>LJ_GC64</tt>). +For x64, you can select the old 32-on-64 bit mode by adding +<tt>XCFLAGS=-DLUAJIT_DISABLE_GC64</tt> to the make command. +Please check the note about the +<a href="extensions.html#string_dump">bytecode format</a> differences, too. +</p> <h2 id="posix">POSIX Systems (Linux, macOS, *BSD etc.)</h2> <h3>Prerequisites</h3> @@ -347,9 +358,12 @@ You can add an extra prefix to the search paths by appending the make PREFIX=/home/myself/lj2 </pre> <p> -Please use the LuaJIT 2.1 branch to compile for -<b id="osx">macOS (OSX)</b>. +Note for macOS: you <b>must</b> set the <tt>MACOSX_DEPLOYMENT_TARGET</tt> +environment variable to a value supported by your toolchain: </p> +<pre class="code"> +MACOSX_DEPLOYMENT_TARGET=XX.YY make +</pre> <h3>Installing LuaJIT</h3> <p> The top-level Makefile installs LuaJIT by default under @@ -433,25 +447,36 @@ directory where <tt>luajit.exe</tt> is installed <h2 id="cross">Cross-compiling LuaJIT</h2> <p> +First, let's clear up some terminology: +</p> +<ul> +<li>Host: This is your development system, usually based on a x64 or x86 CPU.</li> +<li>Target: This is the target system you want LuaJIT to run on, e.g. Android/ARM.</li> +<li>Toolchain: This comprises a C compiler, linker, assembler and a matching C library.</li> +<li>Host (or system) toolchain: This is the toolchain used to build native binaries for your host system.</li> +<li>Cross-compile toolchain: This is the toolchain used to build binaries for the target system. They can only be run on the target system.</li> +</ul> +<p> The GNU Makefile-based build system allows cross-compiling on any host -for any supported target, as long as both architectures have the same -pointer size. If you want to cross-compile to any 32 bit target on an -x64 OS, you need to install the multilib development package (e.g. -<tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part -(<tt>HOST_CC="gcc -m32"</tt>). +for any supported target: </p> +<ul> +<li>Yes, you need a toolchain for both your host <em>and</em> your target!</li> +<li>Both host and target architectures must have the same pointer size.</li> +<li>E.g. if you want to cross-compile to a 32 bit target on a 64 bit host, you need to install the multilib development package (e.g. <tt>libc6-dev-i386</tt> on Debian/Ubuntu) and build a 32 bit host part (<tt>HOST_CC="gcc -m32"</tt>).</li> +<li>64 bit targets always require compilation on a 64 bit host.</li> +</ul> <p> You need to specify <tt>TARGET_SYS</tt> whenever the host OS and the -target OS differ, or you'll get assembler or linker errors. E.g. if -you're compiling on a Windows or macOS host for embedded Linux or Android, -you need to add <tt>TARGET_SYS=Linux</tt> to the examples below. For a -minimal target OS, you may need to disable the built-in allocator in -<tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>. Don't forget to -specify the same <tt>TARGET_SYS</tt> for the install step, too. +target OS differ, or you'll get assembler or linker errors: </p> +<ul> +<li>E.g. if you're compiling on a Windows or macOS host for embedded Linux or Android, you need to add <tt>TARGET_SYS=Linux</tt> to the examples below.</li> +<li>For a minimal target OS, you may need to disable the built-in allocator in <tt>src/Makefile</tt> and use <tt>TARGET_SYS=Other</tt>.</li> +<li>Don't forget to specify the same <tt>TARGET_SYS</tt> for the install step, too.</li> +</ul> <p> -The examples below only show some popular targets — please check -the comments in <tt>src/Makefile</tt> for more details. +Here are some examples where host and target have the same CPU: </p> <pre class="code"> # Cross-compile to a 32 bit binary on a multilib x64 OS @@ -469,34 +494,44 @@ use the canonical toolchain triplets for Linux. </p> <p> Since there's often no easy way to detect CPU features at runtime, it's -important to compile with the proper CPU or architecture settings. You -can specify these when building the toolchain yourself. Or add -<tt>-mcpu=...</tt> or <tt>-march=...</tt> to <tt>TARGET_CFLAGS</tt>. For -ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, -too. Otherwise, LuaJIT may not run at the full performance of your target -CPU. +important to compile with the proper CPU or architecture settings: +</o> +<ul> +<li>The best way to get consistent results is to specify the correct settings when building the toolchain yourself.</li> +<li>For a pre-built, generic toolchain add <tt>-mcpu=...</tt> or <tt>-march=...</tt> and other necessary flags to <tt>TARGET_CFLAGS</tt>.</li> +<li>For ARM it's important to have the correct <tt>-mfloat-abi=...</tt> setting, too. Otherwise LuaJIT may not run at the full performance of your target CPU.</li> +<li>For MIPS it's important to select a supported ABI (o32 on MIPS32, n64 on MIPS64) and consistently compile your project either with hard-float or soft-float compiler settings.</li> +</ul> +<p> +Here are some examples for targets with a different CPU than the host: </p> <pre class="code"> # ARM soft-float make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \ TARGET_CFLAGS="-mfloat-abi=soft" -# ARM soft-float ABI with VFP (example for Cortex-A8) +# ARM soft-float ABI with VFP (example for Cortex-A9) make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabi- \ - TARGET_CFLAGS="-mcpu=cortex-a8 -mfloat-abi=softfp" + TARGET_CFLAGS="-mcpu=cortex-a9 -mfloat-abi=softfp" -# ARM hard-float ABI with VFP (armhf, requires recent toolchain) +# ARM hard-float ABI with VFP (armhf, most modern toolchains) make HOST_CC="gcc -m32" CROSS=arm-linux-gnueabihf- +# ARM64 +make CROSS=aarch64-linux- + # PPC make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- -# PPC/e500v2 (fast interpreter only) -make HOST_CC="gcc -m32" CROSS=powerpc-e500v2-linux-gnuspe- -# MIPS big-endian +# MIPS32 big-endian make HOST_CC="gcc -m32" CROSS=mips-linux- -# MIPS little-endian +# MIPS32 little-endian make HOST_CC="gcc -m32" CROSS=mipsel-linux- + +# MIPS64 big-endian +make CROSS=mips-linux- TARGET_CFLAGS="-mips64r2 -mabi=64" +# MIPS64 little-endian +make CROSS=mipsel-linux- TARGET_CFLAGS="-mips64r2 -mabi=64" </pre> <p> You can cross-compile for <b id="android">Android</b> using the <a href="https://developer.android.com/ndk/"><span class="ext">»</span> Android NDK</a>. @@ -504,8 +539,17 @@ Please adapt the environment variables to match the install locations and the desired target platform. E.g. Android 4.1 corresponds to ABI level 16. </p> <pre class="code"> -# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.1+ (JB) +# Android/ARM64, aarch64, Android 5.0+ (L) +NDKDIR=/opt/android/ndk +NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin +NDKCROSS=$NDKBIN/aarch64-linux-android- +NDKCC=$NDKBIN/aarch64-linux-android21-clang +make CROSS=$NDKCROSS \ + STATIC_CC=$NDKCC DYNAMIC_CC="$NDKCC -fPIC" \ + TARGET_LD=$NDKCC TARGET_AR="$NDKBIN/llvm-ar rcus" \ + TARGET_STRIP=$NDKBIN/llvm-strip +# Android/ARM, armeabi-v7a (ARMv7 VFP), Android 4.1+ (JB) NDKDIR=/opt/android/ndk NDKBIN=$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin NDKCROSS=$NDKBIN/arm-linux-androideabi- @@ -516,9 +560,23 @@ make HOST_CC="gcc -m32" CROSS=$NDKCROSS \ TARGET_STRIP=$NDKBIN/llvm-strip </pre> <p> -Please use the LuaJIT 2.1 branch to compile for -<b id="ios">iOS</b> (iPhone/iPad). +You can cross-compile for <b id="ios">iOS 3.0+</b> (iPhone/iPad) using the <a href="https://developer.apple.com/ios/"><span class="ext">»</span> iOS SDK</a>: +</p> +<p style="font-size: 8pt;"> +Note: <b>the JIT compiler is disabled for iOS</b>, because regular iOS Apps +are not allowed to generate code at runtime. You'll only get the performance +of the LuaJIT interpreter on iOS. This is still faster than plain Lua, but +much slower than the JIT compiler. Please complain to Apple, not me. +Or use Android. :-p </p> +<pre class="code"> +# iOS/ARM64 +ISDKP=$(xcrun --sdk iphoneos --show-sdk-path) +ICC=$(xcrun --sdk iphoneos --find clang) +ISDKF="-arch arm64 -isysroot $ISDKP" +make DEFAULT_CC=clang CROSS="$(dirname $ICC)/" \ + TARGET_FLAGS="$ISDKF" TARGET_SYS=iOS +</pre> <h3 id="consoles">Cross-compiling for consoles</h3> <p> @@ -562,15 +620,35 @@ unpacked the sources and run the build command given in the table: <td class="compatx"><tt>ps4build</tt></td> </tr> <tr class="even"> +<td class="compatname"><b id="ps5">PS5</b></td> +<td class="compatbits">64</td> +<td class="compatx"><tt>ps5build</tt></td> +</tr> +<tr class="odd"> <td class="compatname"><b id="psvita">PS Vita</b></td> <td class="compatbits">32</td> <td class="compatx"><tt>psvitabuild</tt></td> </tr> -<tr class="odd"> +<tr class="even"> <td class="compatname"><b id="xbox360">Xbox 360</b></td> <td class="compatbits">32</td> <td class="compatx"><tt>xedkbuild</tt></td> </tr> +<tr class="odd"> +<td class="compatname"><b id="xboxone">Xbox One</b></td> +<td class="compatbits">64</td> +<td class="compatx"><tt>xb1build</tt></td> +</tr> +<tr class="even"> +<td class="compatname"><b id="nx32">Nintendo Switch NX32</b></td> +<td class="compatbits">32</td> +<td class="compatx"><tt>nxbuild</tt></td> +</tr> +<tr class="odd"> +<td class="compatname"><b id="nx64">Nintendo Switch NX64</b></td> +<td class="compatbits">64</td> +<td class="compatx"><tt>nxbuild</tt></td> +</tr> </table> <p> Please check out the comments in the corresponding <tt>*.bat</tt> diff --git a/doc/luajit.html b/doc/luajit.html index ecb13fa8..e3a5478d 100644 --- a/doc/luajit.html +++ b/doc/luajit.html @@ -1,8 +1,8 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>LuaJIT</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta charset="utf-8"> <meta name="Copyright" content="Copyright (C) 2005-2022"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> @@ -122,9 +122,13 @@ table.feature small { <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> <a href="status.html">Status</a> @@ -156,13 +160,13 @@ LuaJIT is Copyright © 2005-2022 Mike Pall, released under the <tr><td><span style="font-size:90%;">Embedded</span></td><td>Android</td><td>iOS</td></tr> </table> <table class="feature os os3"> -<tr><td>PS3</td><td>PS4</td><td>PS Vita</td><td>Xbox 360</td></tr> +<tr><td>PS3</td><td>PS4<br>PS5</td><td>PS Vita</td><td>Xbox 360</td><td>Xbox One</td><td>Nintendo<br>Switch</td></tr> </table> <table class="feature compiler"> -<tr><td>GCC</td><td>CLANG<br>LLVM</td><td>MSVC</td></tr> +<tr><td>GCC</td><td>Clang<br>LLVM</td><td>MSVC</td></tr> </table> <table class="feature cpu"> -<tr><td>x86</td><td>x64</td><td>ARM</td><td>PPC</td><td>e500</td><td>MIPS</td></tr> +<tr><td>x86<br>x64</td><td>ARM<br>ARM64</td><td>PPC</td><td>MIPS32<br>MIPS64</td></tr> </table> <table class="feature fcompat"> <tr><td>Lua 5.1<br>API+ABI</td><td>+ JIT</td><td>+ BitOp</td><td>+ FFI</td><td>Drop-in<br>DLL/.so</td></tr> diff --git a/doc/running.html b/doc/running.html index 5689249f..9979d223 100644 --- a/doc/running.html +++ b/doc/running.html @@ -1,8 +1,8 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>Running LuaJIT</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta charset="utf-8"> <meta name="Copyright" content="Copyright (C) 2005-2022"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> @@ -59,9 +59,13 @@ td.param_default { <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> <a href="status.html">Status</a> @@ -105,6 +109,7 @@ are accepted: <li><tt>-t type</tt> — Set output file type (default: auto-detect from output name).</li> <li><tt>-a arch</tt> — Override architecture for object files (default: native).</li> <li><tt>-o os</tt> — Override OS for object files (default: native).</li> +<li><tt>-F name</tt> — Override filename (default: input filename).</li> <li><tt>-e chunk</tt> — Use chunk string as input.</li> <li><tt>-</tt> (a single minus sign) — Use stdin as input and/or stdout as output.</li> </ul> @@ -170,6 +175,7 @@ Here are the available LuaJIT control commands: <li id="j_flush"><tt>-jflush</tt> — Flushes the whole cache of compiled code.</li> <li id="j_v"><tt>-jv</tt> — Shows verbose information about the progress of the JIT compiler.</li> <li id="j_dump"><tt>-jdump</tt> — Dumps the code and structures used in various compiler stages.</li> +<li id="j_p"><tt>-jp</tt> — Start the <a href="ext_profiler.html">integrated profiler</a>.</li> </ul> <p> The <tt>-jv</tt> and <tt>-jdump</tt> commands are extension modules diff --git a/doc/status.html b/doc/status.html index 62a0d40a..efb1e064 100644 --- a/doc/status.html +++ b/doc/status.html @@ -1,8 +1,8 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> +<!DOCTYPE html> <html> <head> <title>Status</title> -<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +<meta charset="utf-8"> <meta name="Copyright" content="Copyright (C) 2005-2022"> <meta name="Language" content="en"> <link rel="stylesheet" type="text/css" href="bluequad.css" media="screen"> @@ -40,9 +40,13 @@ ul li { padding-bottom: 0.3em; } <a href="ext_ffi_semantics.html">FFI Semantics</a> </li></ul> </li><li> +<a href="ext_buffer.html">String Buffers</a> +</li><li> <a href="ext_jit.html">jit.* Library</a> </li><li> <a href="ext_c_api.html">Lua/C API</a> +</li><li> +<a href="ext_profiler.html">Profiler</a> </li></ul> </li><li> <a class="current" href="status.html">Status</a> @@ -54,7 +58,7 @@ ul li { padding-bottom: 0.3em; } </div> <div id="main"> <p> -This documentation is for LuaJIT 2.0.5. Please check the <tt>doc</tt> +This documentation is for LuaJIT 2.1.0-beta3. Please check the <tt>doc</tt> directory in each git branch for the version-specific documentation. </p> <p> @@ -86,12 +90,6 @@ The Lua <b>debug API</b> is missing a couple of features (return hooks for non-Lua functions) and shows slightly different behavior in LuaJIT (no per-coroutine hooks, no tail call counting). </li> -<li> -Currently, some <b>out-of-memory</b> errors from <b>on-trace code</b> are not -handled correctly. The error may fall through an on-trace -<tt>pcall</tt> or it may be passed on to the function set with -<tt>lua_atpanic</tt> on x64. -</li> </ul> <br class="flush"> </div> diff --git a/dynasm/dasm_arm.h b/dynasm/dasm_arm.h index f7f3d0db..fbfebee0 100644 --- a/dynasm/dasm_arm.h +++ b/dynasm/dasm_arm.h @@ -294,7 +294,7 @@ int dasm_link(Dst_DECL, size_t *szp) { /* Handle globals not defined in this translation unit. */ int idx; - for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { int n = D->lglabels[idx]; /* Undefined label: Collapse rel chain and replace with marker (< 0). */ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } @@ -371,7 +371,10 @@ int dasm_encode(Dst_DECL, void *buffer) ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xe1a00000; break; case DASM_REL_LG: - CK(n >= 0, UNDEF_LG); + if (n < 0) { + n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp - 4); + goto patchrel; + } /* fallthrough */ case DASM_REL_PC: CK(n >= 0, UNDEF_PC); diff --git a/dynasm/dasm_arm.lua b/dynasm/dasm_arm.lua index 5dedaea1..3b4db86e 100644 --- a/dynasm/dasm_arm.lua +++ b/dynasm/dasm_arm.lua @@ -9,9 +9,9 @@ local _info = { arch = "arm", description = "DynASM ARM module", - version = "1.3.0", - vernum = 10300, - release = "2011-05-05", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", author = "Mike Pall", license = "MIT", } diff --git a/dynasm/dasm_arm64.h b/dynasm/dasm_arm64.h new file mode 100644 index 00000000..47c9c37d --- /dev/null +++ b/dynasm/dasm_arm64.h @@ -0,0 +1,563 @@ +/* +** DynASM ARM64 encoding engine. +** Copyright (C) 2005-2022 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + +#include <stddef.h> +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> + +#define DASM_ARCH "arm64" + +#ifndef DASM_EXTERN +#define DASM_EXTERN(a,b,c,d) 0 +#endif + +/* Action definitions. */ +enum { + DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, + /* The following actions need a buffer position. */ + DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, + /* The following actions also have an argument. */ + DASM_REL_PC, DASM_LABEL_PC, DASM_REL_A, + DASM_IMM, DASM_IMM6, DASM_IMM12, DASM_IMM13W, DASM_IMM13X, DASM_IMML, + DASM_IMMV, DASM_VREG, + DASM__MAX +}; + +/* Maximum number of section buffer positions for a single dasm_put() call. */ +#define DASM_MAXSECPOS 25 + +/* DynASM encoder status codes. Action list offset or number are or'ed in. */ +#define DASM_S_OK 0x00000000 +#define DASM_S_NOMEM 0x01000000 +#define DASM_S_PHASE 0x02000000 +#define DASM_S_MATCH_SEC 0x03000000 +#define DASM_S_RANGE_I 0x11000000 +#define DASM_S_RANGE_SEC 0x12000000 +#define DASM_S_RANGE_LG 0x13000000 +#define DASM_S_RANGE_PC 0x14000000 +#define DASM_S_RANGE_REL 0x15000000 +#define DASM_S_RANGE_VREG 0x16000000 +#define DASM_S_UNDEF_LG 0x21000000 +#define DASM_S_UNDEF_PC 0x22000000 + +/* Macros to convert positions (8 bit section + 24 bit index). */ +#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) +#define DASM_POS2BIAS(pos) ((pos)&0xff000000) +#define DASM_SEC2POS(sec) ((sec)<<24) +#define DASM_POS2SEC(pos) ((pos)>>24) +#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) + +/* Action list type. */ +typedef const unsigned int *dasm_ActList; + +/* Per-section structure. */ +typedef struct dasm_Section { + int *rbuf; /* Biased buffer pointer (negative section bias). */ + int *buf; /* True buffer pointer. */ + size_t bsize; /* Buffer size in bytes. */ + int pos; /* Biased buffer position. */ + int epos; /* End of biased buffer position - max single put. */ + int ofs; /* Byte offset into section. */ +} dasm_Section; + +/* Core structure holding the DynASM encoding state. */ +struct dasm_State { + size_t psize; /* Allocated size of this structure. */ + dasm_ActList actionlist; /* Current actionlist pointer. */ + int *lglabels; /* Local/global chain/pos ptrs. */ + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; + void **globals; /* Array of globals (bias -10). */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ + int status; /* Status code. */ + dasm_Section sections[1]; /* All sections. Alloc-extended. */ +}; + +/* The size of the core structure depends on the max. number of sections. */ +#define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) + + +/* Initialize DynASM state. */ +void dasm_init(Dst_DECL, int maxsection) +{ + dasm_State *D; + size_t psz = 0; + int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; + D->psize = psz; + D->lglabels = NULL; + D->lgsize = 0; + D->pclabels = NULL; + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; + for (i = 0; i < maxsection; i++) { + D->sections[i].buf = NULL; /* Need this for pass3. */ + D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); + D->sections[i].bsize = 0; + D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ + } +} + +/* Free DynASM state. */ +void dasm_free(Dst_DECL) +{ + dasm_State *D = Dst_REF; + int i; + for (i = 0; i < D->maxsection; i++) + if (D->sections[i].buf) + DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); + if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); + if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); + DASM_M_FREE(Dst, D, D->psize); +} + +/* Setup global label array. Must be called before dasm_setup(). */ +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; + D->globals = gl - 10; /* Negative bias to compensate for locals. */ + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + +/* Grow PC label array. Can be called after dasm_setup(), too. */ +void dasm_growpc(Dst_DECL, unsigned int maxpc) +{ + dasm_State *D = Dst_REF; + size_t osz = D->pcsize; + DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); + memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); +} + +/* Setup encoder. */ +void dasm_setup(Dst_DECL, const void *actionlist) +{ + dasm_State *D = Dst_REF; + int i; + D->actionlist = (dasm_ActList)actionlist; + D->status = DASM_S_OK; + D->section = &D->sections[0]; + memset((void *)D->lglabels, 0, D->lgsize); + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); + D->sections[i].ofs = 0; + } +} + + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) { \ + D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0) +#define CKPL(kind, st) \ + do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ + D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0) +#else +#define CK(x, st) ((void)0) +#define CKPL(kind, st) ((void)0) +#endif + +static int dasm_imm12(unsigned int n) +{ + if ((n >> 12) == 0) + return n; + else if ((n & 0xff000fff) == 0) + return (n >> 12) | 0x1000; + else + return -1; +} + +static int dasm_ffs(unsigned long long x) +{ + int n = -1; + while (x) { x >>= 1; n++; } + return n; +} + +static int dasm_imm13(int lo, int hi) +{ + int inv = 0, w = 64, s = 0xfff, xa, xb; + unsigned long long n = (((unsigned long long)hi) << 32) | (unsigned int)lo; + unsigned long long m = 1ULL, a, b, c; + if (n & 1) { n = ~n; inv = 1; } + a = n & (unsigned long long)-(long long)n; + b = (n+a)&(unsigned long long)-(long long)(n+a); + c = (n+a-b)&(unsigned long long)-(long long)(n+a-b); + xa = dasm_ffs(a); xb = dasm_ffs(b); + if (c) { + w = dasm_ffs(c) - xa; + if (w == 32) m = 0x0000000100000001UL; + else if (w == 16) m = 0x0001000100010001UL; + else if (w == 8) m = 0x0101010101010101UL; + else if (w == 4) m = 0x1111111111111111UL; + else if (w == 2) m = 0x5555555555555555UL; + else return -1; + s = (-2*w & 0x3f) - 1; + } else if (!a) { + return -1; + } else if (xb == -1) { + xb = 64; + } + if ((b-a) * m != n) return -1; + if (inv) { + return ((w - xb) << 6) | (s+w+xa-xb); + } else { + return ((w - xa) << 6) | (s+xb-xa); + } + return -1; +} + +/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ +void dasm_put(Dst_DECL, int start, ...) +{ + va_list ap; + dasm_State *D = Dst_REF; + dasm_ActList p = D->actionlist + start; + dasm_Section *sec = D->section; + int pos = sec->pos, ofs = sec->ofs; + int *b; + + if (pos >= sec->epos) { + DASM_M_GROW(Dst, int, sec->buf, sec->bsize, + sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); + sec->rbuf = sec->buf - DASM_POS2BIAS(pos); + sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); + } + + b = sec->rbuf; + b[pos++] = start; + + va_start(ap, start); + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + if (action >= DASM__MAX) { + ofs += 4; + } else { + int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; + switch (action) { + case DASM_STOP: goto stop; + case DASM_SECTION: + n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); + D->section = &D->sections[n]; goto stop; + case DASM_ESC: p++; ofs += 4; break; + case DASM_REL_EXT: if ((ins & 0x8000)) ofs += 8; break; + case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; + case DASM_REL_LG: + n = (ins & 2047) - 10; pl = D->lglabels + n; + /* Bkwd rel or global. */ + if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } + pl += 10; n = *pl; + if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ + goto linkrel; + case DASM_REL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putrel: + n = *pl; + if (n < 0) { /* Label exists. Get label pos and store it. */ + b[pos] = -n; + } else { + linkrel: + b[pos] = n; /* Else link to rel chain, anchored at label. */ + *pl = pos; + } + pos++; + if ((ins & 0x8000)) ofs += 8; + break; + case DASM_REL_A: + b[pos++] = n; + b[pos++] = va_arg(ap, int); + break; + case DASM_LABEL_LG: + pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; + case DASM_LABEL_PC: + pl = D->pclabels + n; CKPL(pc, PC); + putlabel: + n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; + } + *pl = -pos; /* Label exists now. */ + b[pos++] = ofs; /* Store pass1 offset estimate. */ + break; + case DASM_IMM: + CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); + n >>= ((ins>>10)&31); +#ifdef DASM_CHECKS + if ((ins & 0x8000)) + CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); + else + CK((n>>((ins>>5)&31)) == 0, RANGE_I); +#endif + b[pos++] = n; + break; + case DASM_IMM6: + CK((n >> 6) == 0, RANGE_I); + b[pos++] = n; + break; + case DASM_IMM12: + CK(dasm_imm12((unsigned int)n) != -1, RANGE_I); + b[pos++] = n; + break; + case DASM_IMM13W: + CK(dasm_imm13(n, n) != -1, RANGE_I); + b[pos++] = n; + break; + case DASM_IMM13X: { + int m = va_arg(ap, int); + CK(dasm_imm13(n, m) != -1, RANGE_I); + b[pos++] = n; + b[pos++] = m; + break; + } + case DASM_IMML: { +#ifdef DASM_CHECKS + int scale = (ins & 3); + CK((!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) || + (unsigned int)(n+256) < 512, RANGE_I); +#endif + b[pos++] = n; + break; + } + case DASM_IMMV: + ofs += 4; + b[pos++] = n; + break; + case DASM_VREG: + CK(n < 32, RANGE_VREG); + b[pos++] = n; + break; + } + } + } +stop: + va_end(ap); + sec->pos = pos; + sec->ofs = ofs; +} +#undef CK + +/* Pass 2: Link sections, shrink aligns, fix label offsets. */ +int dasm_link(Dst_DECL, size_t *szp) +{ + dasm_State *D = Dst_REF; + int secnum; + int ofs = 0; + +#ifdef DASM_CHECKS + *szp = 0; + if (D->status != DASM_S_OK) return D->status; + { + int pc; + for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) + if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; + } +#endif + + { /* Handle globals not defined in this translation unit. */ + int idx; + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { + int n = D->lglabels[idx]; + /* Undefined label: Collapse rel chain and replace with marker (< 0). */ + while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } + } + } + + /* Combine all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->rbuf; + int pos = DASM_SEC2POS(secnum); + int lastpos = sec->pos; + + while (pos != lastpos) { + dasm_ActList p = D->actionlist + b[pos++]; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: p++; break; + case DASM_REL_EXT: break; + case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; + case DASM_REL_LG: case DASM_REL_PC: pos++; break; + case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; + case DASM_IMM: case DASM_IMM6: case DASM_IMM12: case DASM_IMM13W: + case DASM_IMML: case DASM_IMMV: case DASM_VREG: pos++; break; + case DASM_IMM13X: case DASM_REL_A: pos += 2; break; + } + } + stop: (void)0; + } + ofs += sec->ofs; /* Next section starts right after current section. */ + } + + D->codesize = ofs; /* Total size of all code sections */ + *szp = ofs; + return DASM_S_OK; +} + +#ifdef DASM_CHECKS +#define CK(x, st) \ + do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0) +#else +#define CK(x, st) ((void)0) +#endif + +/* Pass 3: Encode sections. */ +int dasm_encode(Dst_DECL, void *buffer) +{ + dasm_State *D = Dst_REF; + char *base = (char *)buffer; + unsigned int *cp = (unsigned int *)buffer; + int secnum; + + /* Encode all code sections. No support for data sections (yet). */ + for (secnum = 0; secnum < D->maxsection; secnum++) { + dasm_Section *sec = D->sections + secnum; + int *b = sec->buf; + int *endb = sec->rbuf + sec->pos; + + while (b != endb) { + dasm_ActList p = D->actionlist + *b++; + while (1) { + unsigned int ins = *p++; + unsigned int action = (ins >> 16); + int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; + switch (action) { + case DASM_STOP: case DASM_SECTION: goto stop; + case DASM_ESC: *cp++ = *p++; break; + case DASM_REL_EXT: + n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins&2047), !(ins&2048)); + goto patchrel; + case DASM_ALIGN: + ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0xd503201f; + break; + case DASM_REL_LG: + if (n < 0) { + ptrdiff_t na = (ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp + 4; + n = (int)na; + CK((ptrdiff_t)n == na, RANGE_REL); + goto patchrel; + } + /* fallthrough */ + case DASM_REL_PC: + CK(n >= 0, UNDEF_PC); + n = *DASM_POS2PTR(D, n) - (int)((char *)cp - base) + 4; + patchrel: + if (!(ins & 0xf800)) { /* B, BL */ + CK((n & 3) == 0 && ((n+0x08000000) >> 28) == 0, RANGE_REL); + cp[-1] |= ((n >> 2) & 0x03ffffff); + } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */ + CK((n & 3) == 0 && ((n+0x00100000) >> 21) == 0, RANGE_REL); + cp[-1] |= ((n << 3) & 0x00ffffe0); + } else if ((ins & 0x3000) == 0x2000) { /* ADR */ + CK(((n+0x00100000) >> 21) == 0, RANGE_REL); + cp[-1] |= ((n << 3) & 0x00ffffe0) | ((n & 3) << 29); + } else if ((ins & 0x3000) == 0x3000) { /* ADRP */ + cp[-1] |= ((n >> 9) & 0x00ffffe0) | (((n >> 12) & 3) << 29); + } else if ((ins & 0x1000)) { /* TBZ, TBNZ */ + CK((n & 3) == 0 && ((n+0x00008000) >> 16) == 0, RANGE_REL); + cp[-1] |= ((n << 3) & 0x0007ffe0); + } else if ((ins & 0x8000)) { /* absolute */ + cp[0] = (unsigned int)((ptrdiff_t)cp - 4 + n); + cp[1] = (unsigned int)(((ptrdiff_t)cp - 4 + n) >> 32); + cp += 2; + } + break; + case DASM_REL_A: { + ptrdiff_t na = (((ptrdiff_t)(*b++) << 32) | (unsigned int)n); + if ((ins & 0x3000) == 0x3000) { /* ADRP */ + ins &= ~0x1000; + na = (na >> 12) - (((ptrdiff_t)cp - 4) >> 12); + } else { + na = na - (ptrdiff_t)cp + 4; + } + n = (int)na; + CK((ptrdiff_t)n == na, RANGE_REL); + goto patchrel; + } + case DASM_LABEL_LG: + ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_IMM: + cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); + break; + case DASM_IMM6: + cp[-1] |= ((n&31) << 19) | ((n&32) << 26); + break; + case DASM_IMM12: + cp[-1] |= (dasm_imm12((unsigned int)n) << 10); + break; + case DASM_IMM13W: + cp[-1] |= (dasm_imm13(n, n) << 10); + break; + case DASM_IMM13X: + cp[-1] |= (dasm_imm13(n, *b++) << 10); + break; + case DASM_IMML: { + int scale = (ins & 3); + cp[-1] |= (!(n & ((1<<scale)-1)) && (unsigned int)(n>>scale) < 4096) ? + ((n << (10-scale)) | 0x01000000) : ((n & 511) << 12); + break; + } + case DASM_IMMV: + *cp++ = n; + break; + case DASM_VREG: + cp[-1] |= (n & 0x1f) << (ins & 0x1f); + break; + default: *cp++ = ins; break; + } + } + stop: (void)0; + } + } + + if (base + D->codesize != (char *)cp) /* Check for phase errors. */ + return DASM_S_PHASE; + return DASM_S_OK; +} +#undef CK + +/* Get PC label offset. */ +int dasm_getpclabel(Dst_DECL, unsigned int pc) +{ + dasm_State *D = Dst_REF; + if (pc*sizeof(int) < D->pcsize) { + int pos = D->pclabels[pc]; + if (pos < 0) return *DASM_POS2PTR(D, -pos); + if (pos > 0) return -1; /* Undefined. */ + } + return -2; /* Unused or out of range. */ +} + +#ifdef DASM_CHECKS +/* Optional sanity checker to call between isolated encoding steps. */ +int dasm_checkstep(Dst_DECL, int secmatch) +{ + dasm_State *D = Dst_REF; + if (D->status == DASM_S_OK) { + int i; + for (i = 1; i <= 9; i++) { + if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } + D->lglabels[i] = 0; + } + } + if (D->status == DASM_S_OK && secmatch >= 0 && + D->section != &D->sections[secmatch]) + D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections); + return D->status; +} +#endif + diff --git a/dynasm/dasm_arm64.lua b/dynasm/dasm_arm64.lua new file mode 100644 index 00000000..1f581ba0 --- /dev/null +++ b/dynasm/dasm_arm64.lua @@ -0,0 +1,1219 @@ +------------------------------------------------------------------------------ +-- DynASM ARM64 module. +-- +-- Copyright (C) 2005-2022 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + +-- Module information: +local _info = { + arch = "arm", + description = "DynASM ARM64 module", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", + author = "Mike Pall", + license = "MIT", +} + +-- Exported glue functions for the arch-specific module. +local _M = { _info = _info } + +-- Cache library functions. +local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs +local assert, setmetatable, rawget = assert, setmetatable, rawget +local _s = string +local format, byte, char = _s.format, _s.byte, _s.char +local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub +local concat, sort, insert = table.concat, table.sort, table.insert +local bit = bit or require("bit") +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local ror, tohex, tobit = bit.ror, bit.tohex, bit.tobit + +-- Inherited tables and callbacks. +local g_opt, g_arch +local wline, werror, wfatal, wwarn + +-- Action name list. +-- CHECK: Keep this in sync with the C code! +local action_names = { + "STOP", "SECTION", "ESC", "REL_EXT", + "ALIGN", "REL_LG", "LABEL_LG", + "REL_PC", "LABEL_PC", "REL_A", + "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML", "IMMV", + "VREG", +} + +-- Maximum number of section buffer positions for dasm_put(). +-- CHECK: Keep this in sync with the C code! +local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. + +-- Action name -> action number. +local map_action = {} +for n,name in ipairs(action_names) do + map_action[name] = n-1 +end + +-- Action list buffer. +local actlist = {} + +-- Argument list for next dasm_put(). Start with offset 0 into action list. +local actargs = { 0 } + +-- Current number of section buffer positions for dasm_put(). +local secpos = 1 + +------------------------------------------------------------------------------ + +-- Dump action names and numbers. +local function dumpactions(out) + out:write("DynASM encoding engine action codes:\n") + for n,name in ipairs(action_names) do + local num = map_action[name] + out:write(format(" %-10s %02X %d\n", name, num, num)) + end + out:write("\n") +end + +-- Write action list buffer as a huge static C array. +local function writeactions(out, name) + local nn = #actlist + if nn == 0 then nn = 1; actlist[0] = map_action.STOP end + out:write("static const unsigned int ", name, "[", nn, "] = {\n") + for i = 1,nn-1 do + assert(out:write("0x", tohex(actlist[i]), ",\n")) + end + assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) +end + +------------------------------------------------------------------------------ + +-- Add word to action list. +local function wputxw(n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + actlist[#actlist+1] = n +end + +-- Add action to list with optional arg. Advance buffer pos, too. +local function waction(action, val, a, num) + local w = assert(map_action[action], "bad action name `"..action.."'") + wputxw(w * 0x10000 + (val or 0)) + if a then actargs[#actargs+1] = a end + if a or num then secpos = secpos + (num or 1) end +end + +-- Flush action list (intervening C code or buffer pos overflow). +local function wflush(term) + if #actlist == actargs[1] then return end -- Nothing to flush. + if not term then waction("STOP") end -- Terminate action list. + wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) + actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). + secpos = 1 -- The actionlist offset occupies a buffer position, too. +end + +-- Put escaped word. +local function wputw(n) + if n <= 0x000fffff then waction("ESC") end + wputxw(n) +end + +-- Reserve position for word. +local function wpos() + local pos = #actlist+1 + actlist[pos] = "" + return pos +end + +-- Store word to reserved position. +local function wputpos(pos, n) + assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") + if n <= 0x000fffff then + insert(actlist, pos+1, n) + n = map_action.ESC * 0x10000 + end + actlist[pos] = n +end + +------------------------------------------------------------------------------ + +-- Global label name -> global label number. With auto assignment on 1st use. +local next_global = 20 +local map_global = setmetatable({}, { __index = function(t, name) + if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end + local n = next_global + if n > 2047 then werror("too many global labels") end + next_global = n + 1 + t[name] = n + return n +end}) + +-- Dump global labels. +local function dumpglobals(out, lvl) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("Global labels:\n") + for i=20,next_global-1 do + out:write(format(" %s\n", t[i])) + end + out:write("\n") +end + +-- Write global label enum. +local function writeglobals(out, prefix) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("enum {\n") + for i=20,next_global-1 do + out:write(" ", prefix, t[i], ",\n") + end + out:write(" ", prefix, "_MAX\n};\n") +end + +-- Write global label names. +local function writeglobalnames(out, name) + local t = {} + for name, n in pairs(map_global) do t[n] = name end + out:write("static const char *const ", name, "[] = {\n") + for i=20,next_global-1 do + out:write(" \"", t[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Extern label name -> extern label number. With auto assignment on 1st use. +local next_extern = 0 +local map_extern_ = {} +local map_extern = setmetatable({}, { __index = function(t, name) + -- No restrictions on the name for now. + local n = next_extern + if n > 2047 then werror("too many extern labels") end + next_extern = n + 1 + t[name] = n + map_extern_[n] = name + return n +end}) + +-- Dump extern labels. +local function dumpexterns(out, lvl) + out:write("Extern labels:\n") + for i=0,next_extern-1 do + out:write(format(" %s\n", map_extern_[i])) + end + out:write("\n") +end + +-- Write extern label names. +local function writeexternnames(out, name) + out:write("static const char *const ", name, "[] = {\n") + for i=0,next_extern-1 do + out:write(" \"", map_extern_[i], "\",\n") + end + out:write(" (const char *)0\n};\n") +end + +------------------------------------------------------------------------------ + +-- Arch-specific maps. + +-- Ext. register name -> int. name. +local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", } + +-- Int. register name -> ext. name. +local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", } + +local map_type = {} -- Type name -> { ctype, reg } +local ctypenum = 0 -- Type number (for Dt... macros). + +-- Reverse defines for registers. +function _M.revdef(s) + return map_reg_rev[s] or s +end + +local map_shift = { lsl = 0, lsr = 1, asr = 2, } + +local map_extend = { + uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3, + sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7, +} + +local map_cond = { + eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7, + hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14, + hs = 2, lo = 3, +} + +------------------------------------------------------------------------------ + +local parse_reg_type + +local function parse_reg(expr, shift, no_vreg) + if not expr then werror("expected register name") end + local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$") + if not tname then + tname, ovreg = match(expr, "^([%w_]+):(R[xwqdshb]%b())$") + end + local tp = map_type[tname or expr] + if tp then + local reg = ovreg or tp.reg + if not reg then + werror("type `"..(tname or expr).."' needs a register override") + end + expr = reg + end + local ok31, rt, r = match(expr, "^(@?)([xwqdshb])([123]?[0-9])$") + if r then + r = tonumber(r) + if r <= 30 or (r == 31 and ok31 ~= "" or (rt ~= "w" and rt ~= "x")) then + if not parse_reg_type then + parse_reg_type = rt + elseif parse_reg_type ~= rt then + werror("register size mismatch") + end + return shl(r, shift), tp + end + end + local vrt, vreg = match(expr, "^R([xwqdshb])(%b())$") + if vreg then + if not parse_reg_type then + parse_reg_type = vrt + elseif parse_reg_type ~= vrt then + werror("register size mismatch") + end + if not no_vreg then waction("VREG", shift, vreg) end + return 0 + end + werror("bad register name `"..expr.."'") +end + +local function parse_reg_base(expr) + if expr == "sp" then return 0x3e0 end + local base, tp = parse_reg(expr, 5) + if parse_reg_type ~= "x" then werror("bad register type") end + parse_reg_type = false + return base, tp +end + +local parse_ctx = {} + +local loadenv = setfenv and function(s) + local code = loadstring(s, "") + if code then setfenv(code, parse_ctx) end + return code +end or function(s) + return load(s, "", nil, parse_ctx) +end + +-- Try to parse simple arithmetic, too, since some basic ops are aliases. +local function parse_number(n) + local x = tonumber(n) + if x then return x end + local code = loadenv("return "..n) + if code then + local ok, y = pcall(code) + if ok and type(y) == "number" then return y end + end + return nil +end + +local function parse_imm(imm, bits, shift, scale, signed) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n then + if signed then + local s = sar(m, bits-1) + if s == 0 then return shl(m, shift) + elseif s == -1 then return shl(m + shl(1, bits), shift) end + else + if sar(m, bits) == 0 then return shl(m, shift) end + end + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) + return 0 + end +end + +local function parse_imm12(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + if n then + if shr(n, 12) == 0 then + return shl(n, 10) + elseif band(n, 0xff000fff) == 0 then + return shr(n, 2) + 0x00400000 + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM12", 0, imm) + return 0 + end +end + +local function parse_imm13(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + local r64 = parse_reg_type == "x" + if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then + local inv = false + if band(n, 1) == 1 then n = bit.bnot(n); inv = true end + local t = {} + for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end + local b = table.concat(t) + b = b..(r64 and (inv and "1" or "0"):rep(32) or b) + local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)") + if p0 then + local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a + if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then + local s = band(-2*w, 0x3f) - 1 + if w == 64 then s = s + 0x1000 end + if inv then + return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10) + else + return shl(w-#p0, 16) + shl(s+#p1, 10) + end + end + end + werror("out of range immediate `"..imm.."'") + elseif r64 then + waction("IMM13X", 0, format("(unsigned int)(%s)", imm)) + actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm) + return 0 + else + waction("IMM13W", 0, imm) + return 0 + end +end + +local function parse_imm6(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + if n then + if n >= 0 and n <= 63 then + return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0) + end + werror("out of range immediate `"..imm.."'") + else + waction("IMM6", 0, imm) + return 0 + end +end + +local function parse_imm_load(imm, scale) + local n = parse_number(imm) + if n then + local m = sar(n, scale) + if shl(m, scale) == n and m >= 0 and m < 0x1000 then + return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset. + elseif n >= -256 and n < 256 then + return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset. + end + werror("out of range immediate `"..imm.."'") + else + waction("IMML", scale, imm) + return 0 + end +end + +local function parse_fpimm(imm) + imm = match(imm, "^#(.*)$") + if not imm then werror("expected immediate operand") end + local n = parse_number(imm) + if n then + local m, e = math.frexp(n) + local s, e2 = 0, band(e-2, 7) + if m < 0 then m = -m; s = 0x00100000 end + m = m*32-16 + if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then + return s + shl(e2, 17) + shl(m, 13) + end + werror("out of range immediate `"..imm.."'") + else + werror("NYI fpimm action") + end +end + +local function parse_shift(expr) + local s, s2 = match(expr, "^(%S+)%s*(.*)$") + s = map_shift[s] + if not s then werror("expected shift operand") end + return parse_imm(s2, 6, 10, 0, false) + shl(s, 22) +end + +local function parse_lslx16(expr) + local n = match(expr, "^lsl%s*#(%d+)$") + n = tonumber(n) + if not n then werror("expected shift operand") end + if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then + werror("bad shift amount") + end + return shl(n, 17) +end + +local function parse_extend(expr) + local s, s2 = match(expr, "^(%S+)%s*(.*)$") + if s == "lsl" then + s = parse_reg_type == "x" and 3 or 2 + else + s = map_extend[s] + end + if not s then werror("expected extend operand") end + return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13) +end + +local function parse_cond(expr, inv) + local c = map_cond[expr] + if not c then werror("expected condition operand") end + return shl(bit.bxor(c, inv), 12) +end + +local function parse_load(params, nparams, n, op) + if params[n+2] then werror("too many operands") end + local scale = shr(op, 30) + local pn, p2 = params[n], params[n+1] + local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") + if not p1 then + if not p2 then + local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local base, tp = parse_reg_base(reg) + if tp then + waction("IMML", scale, format(tp.ctypefmt, tailr)) + return op + base + end + end + end + werror("expected address operand") + end + if p2 then + if wb == "!" then werror("bad use of '!'") end + op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400 + elseif wb == "!" then + local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$") + if not p1a then werror("bad use of '!'") end + op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00 + else + local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$") + op = op + parse_reg_base(p1a) + if p2a ~= "" then + local imm = match(p2a, "^,%s*#(.*)$") + if imm then + op = op + parse_imm_load(imm, scale) + else + local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$") + op = op + parse_reg(p2b, 16) + 0x00200800 + if parse_reg_type ~= "x" and parse_reg_type ~= "w" then + werror("bad index register type") + end + if p3b == "" then + if parse_reg_type ~= "x" then werror("bad index register type") end + op = op + 0x6000 + else + if p3s == "" or p3s == "#0" then + elseif p3s == "#"..scale then + op = op + 0x1000 + else + werror("bad scale") + end + if parse_reg_type == "x" then + if p3b == "lsl" and p3s ~= "" then op = op + 0x6000 + elseif p3b == "sxtx" then op = op + 0xe000 + else + werror("bad extend/shift specifier") + end + else + if p3b == "uxtw" then op = op + 0x4000 + elseif p3b == "sxtw" then op = op + 0xc000 + else + werror("bad extend/shift specifier") + end + end + end + end + else + if wb == "!" then werror("bad use of '!'") end + op = op + 0x01000000 + end + end + return op +end + +local function parse_load_pair(params, nparams, n, op) + if params[n+2] then werror("too many operands") end + local pn, p2 = params[n], params[n+1] + local scale = shr(op, 30) == 0 and 2 or 3 + local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$") + if not p1 then + if not p2 then + local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$") + if reg and tailr ~= "" then + local base, tp = parse_reg_base(reg) + if tp then + waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr)) + return op + base + 0x01000000 + end + end + end + werror("expected address operand") + end + if p2 then + if wb == "!" then werror("bad use of '!'") end + op = op + 0x00800000 + else + local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$") + if p1a then p1, p2 = p1a, p2a else p2 = "#0" end + op = op + (wb == "!" and 0x01800000 or 0x01000000) + end + return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true) +end + +local function parse_label(label, def) + local prefix = label:sub(1, 2) + -- =>label (pc label reference) + if prefix == "=>" then + return "PC", 0, label:sub(3) + end + -- ->name (global label reference) + if prefix == "->" then + return "LG", map_global[label:sub(3)] + end + if def then + -- [1-9] (local label definition) + if match(label, "^[1-9]$") then + return "LG", 10+tonumber(label) + end + else + -- [<>][1-9] (local label reference) + local dir, lnum = match(label, "^([<>])([1-9])$") + if dir then -- Fwd: 1-9, Bkwd: 11-19. + return "LG", lnum + (dir == ">" and 0 or 10) + end + -- extern label (extern label reference) + local extname = match(label, "^extern%s+(%S+)$") + if extname then + return "EXT", map_extern[extname] + end + -- &expr (pointer) + if label:sub(1, 1) == "&" then + return "A", 0, format("(ptrdiff_t)(%s)", label:sub(2)) + end + end +end + +local function branch_type(op) + if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL + elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or + band(op, 0x3b000000) == 0x18000000 then + return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal + elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ + elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR + elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP + else + assert(false, "unknown branch type") + end +end + +------------------------------------------------------------------------------ + +local map_op, op_template + +local function op_alias(opname, f) + return function(params, nparams) + if not params then return "-> "..opname:sub(1, -3) end + f(params, nparams) + op_template(params, map_op[opname], nparams) + end +end + +local function alias_bfx(p) + p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1" +end + +local function alias_bfiz(p) + parse_reg(p[1], 0, true) + if parse_reg_type == "w" then + p[3] = "#(32-("..p[3]:sub(2).."))%32" + p[4] = "#("..p[4]:sub(2)..")-1" + else + p[3] = "#(64-("..p[3]:sub(2).."))%64" + p[4] = "#("..p[4]:sub(2)..")-1" + end +end + +local alias_lslimm = op_alias("ubfm_4", function(p) + parse_reg(p[1], 0, true) + local sh = p[3]:sub(2) + if parse_reg_type == "w" then + p[3] = "#(32-("..sh.."))%32" + p[4] = "#31-("..sh..")" + else + p[3] = "#(64-("..sh.."))%64" + p[4] = "#63-("..sh..")" + end +end) + +-- Template strings for ARM instructions. +map_op = { + -- Basic data processing instructions. + add_3 = "0b000000DNMg|11000000pDpNIg|8b206000pDpNMx", + add_4 = "0b000000DNMSg|0b200000DNMXg|8b200000pDpNMXx|8b200000pDpNxMwX", + adds_3 = "2b000000DNMg|31000000DpNIg|ab206000DpNMx", + adds_4 = "2b000000DNMSg|2b200000DNMXg|ab200000DpNMXx|ab200000DpNxMwX", + cmn_2 = "2b00001fNMg|3100001fpNIg|ab20601fpNMx", + cmn_3 = "2b00001fNMSg|2b20001fNMXg|ab20001fpNMXx|ab20001fpNxMwX", + + sub_3 = "4b000000DNMg|51000000pDpNIg|cb206000pDpNMx", + sub_4 = "4b000000DNMSg|4b200000DNMXg|cb200000pDpNMXx|cb200000pDpNxMwX", + subs_3 = "6b000000DNMg|71000000DpNIg|eb206000DpNMx", + subs_4 = "6b000000DNMSg|6b200000DNMXg|eb200000DpNMXx|eb200000DpNxMwX", + cmp_2 = "6b00001fNMg|7100001fpNIg|eb20601fpNMx", + cmp_3 = "6b00001fNMSg|6b20001fNMXg|eb20001fpNMXx|eb20001fpNxMwX", + + neg_2 = "4b0003e0DMg", + neg_3 = "4b0003e0DMSg", + negs_2 = "6b0003e0DMg", + negs_3 = "6b0003e0DMSg", + + adc_3 = "1a000000DNMg", + adcs_3 = "3a000000DNMg", + sbc_3 = "5a000000DNMg", + sbcs_3 = "7a000000DNMg", + ngc_2 = "5a0003e0DMg", + ngcs_2 = "7a0003e0DMg", + + and_3 = "0a000000DNMg|12000000pDNig", + and_4 = "0a000000DNMSg", + orr_3 = "2a000000DNMg|32000000pDNig", + orr_4 = "2a000000DNMSg", + eor_3 = "4a000000DNMg|52000000pDNig", + eor_4 = "4a000000DNMSg", + ands_3 = "6a000000DNMg|72000000DNig", + ands_4 = "6a000000DNMSg", + tst_2 = "6a00001fNMg|7200001fNig", + tst_3 = "6a00001fNMSg", + + bic_3 = "0a200000DNMg", + bic_4 = "0a200000DNMSg", + orn_3 = "2a200000DNMg", + orn_4 = "2a200000DNMSg", + eon_3 = "4a200000DNMg", + eon_4 = "4a200000DNMSg", + bics_3 = "6a200000DNMg", + bics_4 = "6a200000DNMSg", + + movn_2 = "12800000DWg", + movn_3 = "12800000DWRg", + movz_2 = "52800000DWg", + movz_3 = "52800000DWRg", + movk_2 = "72800000DWg", + movk_3 = "72800000DWRg", + + -- TODO: this doesn't cover all valid immediates for mov reg, #imm. + mov_2 = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg", + mov_3 = "2a0003e0DMSg", + mvn_2 = "2a2003e0DMg", + mvn_3 = "2a2003e0DMSg", + + adr_2 = "10000000DBx", + adrp_2 = "90000000DBx", + + csel_4 = "1a800000DNMCg", + csinc_4 = "1a800400DNMCg", + csinv_4 = "5a800000DNMCg", + csneg_4 = "5a800400DNMCg", + cset_2 = "1a9f07e0Dcg", + csetm_2 = "5a9f03e0Dcg", + cinc_3 = "1a800400DNmcg", + cinv_3 = "5a800000DNmcg", + cneg_3 = "5a800400DNmcg", + + ccmn_4 = "3a400000NMVCg|3a400800N5VCg", + ccmp_4 = "7a400000NMVCg|7a400800N5VCg", + + madd_4 = "1b000000DNMAg", + msub_4 = "1b008000DNMAg", + mul_3 = "1b007c00DNMg", + mneg_3 = "1b00fc00DNMg", + + smaddl_4 = "9b200000DxNMwAx", + smsubl_4 = "9b208000DxNMwAx", + smull_3 = "9b207c00DxNMw", + smnegl_3 = "9b20fc00DxNMw", + smulh_3 = "9b407c00DNMx", + umaddl_4 = "9ba00000DxNMwAx", + umsubl_4 = "9ba08000DxNMwAx", + umull_3 = "9ba07c00DxNMw", + umnegl_3 = "9ba0fc00DxNMw", + umulh_3 = "9bc07c00DNMx", + + udiv_3 = "1ac00800DNMg", + sdiv_3 = "1ac00c00DNMg", + + -- Bit operations. + sbfm_4 = "13000000DN12w|93400000DN12x", + bfm_4 = "33000000DN12w|b3400000DN12x", + ubfm_4 = "53000000DN12w|d3400000DN12x", + extr_4 = "13800000DNM2w|93c00000DNM2x", + + sxtb_2 = "13001c00DNw|93401c00DNx", + sxth_2 = "13003c00DNw|93403c00DNx", + sxtw_2 = "93407c00DxNw", + uxtb_2 = "53001c00DNw", + uxth_2 = "53003c00DNw", + + sbfx_4 = op_alias("sbfm_4", alias_bfx), + bfxil_4 = op_alias("bfm_4", alias_bfx), + ubfx_4 = op_alias("ubfm_4", alias_bfx), + sbfiz_4 = op_alias("sbfm_4", alias_bfiz), + bfi_4 = op_alias("bfm_4", alias_bfiz), + ubfiz_4 = op_alias("ubfm_4", alias_bfiz), + + lsl_3 = function(params, nparams) + if params and params[3]:byte() == 35 then + return alias_lslimm(params, nparams) + else + return op_template(params, "1ac02000DNMg", nparams) + end + end, + lsr_3 = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x", + asr_3 = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x", + ror_3 = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x", + + clz_2 = "5ac01000DNg", + cls_2 = "5ac01400DNg", + rbit_2 = "5ac00000DNg", + rev_2 = "5ac00800DNw|dac00c00DNx", + rev16_2 = "5ac00400DNg", + rev32_2 = "dac00800DNx", + + -- Loads and stores. + ["strb_*"] = "38000000DwL", + ["ldrb_*"] = "38400000DwL", + ["ldrsb_*"] = "38c00000DwL|38800000DxL", + ["strh_*"] = "78000000DwL", + ["ldrh_*"] = "78400000DwL", + ["ldrsh_*"] = "78c00000DwL|78800000DxL", + ["str_*"] = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL", + ["ldr_*"] = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL", + ["ldrsw_*"] = "98000000DxB|b8800000DxL", + -- NOTE: ldur etc. are handled by ldr et al. + + ["stp_*"] = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP", + ["ldp_*"] = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP", + ["ldpsw_*"] = "68400000DAxP", + + -- Branches. + b_1 = "14000000B", + bl_1 = "94000000B", + blr_1 = "d63f0000Nx", + br_1 = "d61f0000Nx", + ret_0 = "d65f03c0", + ret_1 = "d65f0000Nx", + -- b.cond is added below. + cbz_2 = "34000000DBg", + cbnz_2 = "35000000DBg", + tbz_3 = "36000000DTBw|36000000DTBx", + tbnz_3 = "37000000DTBw|37000000DTBx", + + -- Miscellaneous instructions. + -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr + -- TODO: sys, sysl, ic, dc, at, tlbi + -- TODO: hint, yield, wfe, wfi, sev, sevl + -- TODO: clrex, dsb, dmb, isb + nop_0 = "d503201f", + brk_0 = "d4200000", + brk_1 = "d4200000W", + + -- Floating point instructions. + fmov_2 = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf", + fabs_2 = "1e20c000DNf", + fneg_2 = "1e214000DNf", + fsqrt_2 = "1e21c000DNf", + + fcvt_2 = "1e22c000DdNs|1e624000DsNd", + + -- TODO: half-precision and fixed-point conversions. + fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd", + fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd", + fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd", + fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd", + fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd", + fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd", + fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd", + fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd", + fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd", + fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd", + + scvtf_2 = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx", + ucvtf_2 = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx", + + frintn_2 = "1e244000DNf", + frintp_2 = "1e24c000DNf", + frintm_2 = "1e254000DNf", + frintz_2 = "1e25c000DNf", + frinta_2 = "1e264000DNf", + frintx_2 = "1e274000DNf", + frinti_2 = "1e27c000DNf", + + fadd_3 = "1e202800DNMf", + fsub_3 = "1e203800DNMf", + fmul_3 = "1e200800DNMf", + fnmul_3 = "1e208800DNMf", + fdiv_3 = "1e201800DNMf", + + fmadd_4 = "1f000000DNMAf", + fmsub_4 = "1f008000DNMAf", + fnmadd_4 = "1f200000DNMAf", + fnmsub_4 = "1f208000DNMAf", + + fmax_3 = "1e204800DNMf", + fmaxnm_3 = "1e206800DNMf", + fmin_3 = "1e205800DNMf", + fminnm_3 = "1e207800DNMf", + + fcmp_2 = "1e202000NMf|1e202008NZf", + fcmpe_2 = "1e202010NMf|1e202018NZf", + + fccmp_4 = "1e200400NMVCf", + fccmpe_4 = "1e200410NMVCf", + + fcsel_4 = "1e200c00DNMCf", + + -- TODO: crc32*, aes*, sha*, pmull + -- TODO: SIMD instructions. +} + +for cond,c in pairs(map_cond) do + map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B" +end + +------------------------------------------------------------------------------ + +-- Handle opcodes defined with template strings. +local function parse_template(params, template, nparams, pos) + local op = tonumber(template:sub(1, 8), 16) + local n = 1 + local rtt = {} + + parse_reg_type = false + + -- Process each character. + for p in gmatch(template:sub(9), ".") do + local q = params[n] + if p == "D" then + op = op + parse_reg(q, 0); n = n + 1 + elseif p == "N" then + op = op + parse_reg(q, 5); n = n + 1 + elseif p == "M" then + op = op + parse_reg(q, 16); n = n + 1 + elseif p == "A" then + op = op + parse_reg(q, 10); n = n + 1 + elseif p == "m" then + op = op + parse_reg(params[n-1], 16) + + elseif p == "p" then + if q == "sp" then params[n] = "@x31" end + elseif p == "g" then + if parse_reg_type == "x" then + op = op + 0x80000000 + elseif parse_reg_type ~= "w" then + werror("bad register type") + end + parse_reg_type = false + elseif p == "f" then + if parse_reg_type == "d" then + op = op + 0x00400000 + elseif parse_reg_type ~= "s" then + werror("bad register type") + end + parse_reg_type = false + elseif p == "x" or p == "w" or p == "d" or p == "s" then + if parse_reg_type ~= p then + werror("register size mismatch") + end + parse_reg_type = false + + elseif p == "L" then + op = parse_load(params, nparams, n, op) + elseif p == "P" then + op = parse_load_pair(params, nparams, n, op) + + elseif p == "B" then + local mode, v, s = parse_label(q, false); n = n + 1 + if not mode then werror("bad label `"..q.."'") end + local m = branch_type(op) + if mode == "A" then + waction("REL_"..mode, v+m, format("(unsigned int)(%s)", s)) + actargs[#actargs+1] = format("(unsigned int)((%s)>>32)", s) + else + waction("REL_"..mode, v+m, s, 1) + end + + elseif p == "I" then + op = op + parse_imm12(q); n = n + 1 + elseif p == "i" then + op = op + parse_imm13(q); n = n + 1 + elseif p == "W" then + op = op + parse_imm(q, 16, 5, 0, false); n = n + 1 + elseif p == "T" then + op = op + parse_imm6(q); n = n + 1 + elseif p == "1" then + op = op + parse_imm(q, 6, 16, 0, false); n = n + 1 + elseif p == "2" then + op = op + parse_imm(q, 6, 10, 0, false); n = n + 1 + elseif p == "5" then + op = op + parse_imm(q, 5, 16, 0, false); n = n + 1 + elseif p == "V" then + op = op + parse_imm(q, 4, 0, 0, false); n = n + 1 + elseif p == "F" then + op = op + parse_fpimm(q); n = n + 1 + elseif p == "Z" then + if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end + n = n + 1 + + elseif p == "S" then + op = op + parse_shift(q); n = n + 1 + elseif p == "X" then + op = op + parse_extend(q); n = n + 1 + elseif p == "R" then + op = op + parse_lslx16(q); n = n + 1 + elseif p == "C" then + op = op + parse_cond(q, 0); n = n + 1 + elseif p == "c" then + op = op + parse_cond(q, 1); n = n + 1 + + else + assert(false) + end + end + wputpos(pos, op) +end + +function op_template(params, template, nparams) + if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end + + -- Limit number of section buffer positions used by a single dasm_put(). + -- A single opcode needs a maximum of 4 positions. + if secpos+4 > maxsecpos then wflush() end + local pos = wpos() + local lpos, apos, spos = #actlist, #actargs, secpos + + local ok, err + for t in gmatch(template, "[^|]+") do + ok, err = pcall(parse_template, params, t, nparams, pos) + if ok then return end + secpos = spos + actlist[lpos+1] = nil + actlist[lpos+2] = nil + actlist[lpos+3] = nil + actlist[lpos+4] = nil + actargs[apos+1] = nil + actargs[apos+2] = nil + actargs[apos+3] = nil + actargs[apos+4] = nil + end + error(err, 0) +end + +map_op[".template__"] = op_template + +------------------------------------------------------------------------------ + +-- Pseudo-opcode to mark the position where the action list is to be emitted. +map_op[".actionlist_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeactions(out, name) end) +end + +-- Pseudo-opcode to mark the position where the global enum is to be emitted. +map_op[".globals_1"] = function(params) + if not params then return "prefix" end + local prefix = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobals(out, prefix) end) +end + +-- Pseudo-opcode to mark the position where the global names are to be emitted. +map_op[".globalnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeglobalnames(out, name) end) +end + +-- Pseudo-opcode to mark the position where the extern names are to be emitted. +map_op[".externnames_1"] = function(params) + if not params then return "cvar" end + local name = params[1] -- No syntax check. You get to keep the pieces. + wline(function(out) writeexternnames(out, name) end) +end + +------------------------------------------------------------------------------ + +-- Label pseudo-opcode (converted from trailing colon form). +map_op[".label_1"] = function(params) + if not params then return "[1-9] | ->global | =>pcexpr" end + if secpos+1 > maxsecpos then wflush() end + local mode, n, s = parse_label(params[1], true) + if not mode or mode == "EXT" then werror("bad label definition") end + waction("LABEL_"..mode, n, s, 1) +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcodes for data storage. +local function op_data(params) + if not params then return "imm..." end + local sz = params.op == ".long" and 4 or 8 + for _,p in ipairs(params) do + local imm = parse_number(p) + if imm then + local n = tobit(imm) + if n == imm or (n < 0 and n + 2^32 == imm) then + wputw(n < 0 and n + 2^32 or n) + if sz == 8 then + wputw(imm < 0 and 0xffffffff or 0) + end + elseif sz == 4 then + werror("bad immediate `"..p.."'") + else + imm = nil + end + end + if not imm then + local mode, v, s = parse_label(p, false) + if sz == 4 then + if mode then werror("label does not fit into .long") end + waction("IMMV", 0, p) + elseif mode and mode ~= "A" then + waction("REL_"..mode, v+0x8000, s, 1) + else + if mode == "A" then p = s end + waction("IMMV", 0, format("(unsigned int)(%s)", p)) + waction("IMMV", 0, format("(unsigned int)((unsigned long long)(%s)>>32)", p)) + end + end + if secpos+2 > maxsecpos then wflush() end + end +end +map_op[".long_*"] = op_data +map_op[".quad_*"] = op_data +map_op[".addr_*"] = op_data + +-- Alignment pseudo-opcode. +map_op[".align_1"] = function(params) + if not params then return "numpow2" end + if secpos+1 > maxsecpos then wflush() end + local align = tonumber(params[1]) + if align then + local x = align + -- Must be a power of 2 in the range (2 ... 256). + for i=1,8 do + x = x / 2 + if x == 1 then + waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. + return + end + end + end + werror("bad alignment") +end + +------------------------------------------------------------------------------ + +-- Pseudo-opcode for (primitive) type definitions (map to C types). +map_op[".type_3"] = function(params, nparams) + if not params then + return nparams == 2 and "name, ctype" or "name, ctype, reg" + end + local name, ctype, reg = params[1], params[2], params[3] + if not match(name, "^[%a_][%w_]*$") then + werror("bad type name `"..name.."'") + end + local tp = map_type[name] + if tp then + werror("duplicate type `"..name.."'") + end + -- Add #type to defines. A bit unclean to put it in map_archdef. + map_archdef["#"..name] = "sizeof("..ctype..")" + -- Add new type and emit shortcut define. + local num = ctypenum + 1 + map_type[name] = { + ctype = ctype, + ctypefmt = format("Dt%X(%%s)", num), + reg = reg, + } + wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) + ctypenum = num +end +map_op[".type_2"] = map_op[".type_3"] + +-- Dump type definitions. +local function dumptypes(out, lvl) + local t = {} + for name in pairs(map_type) do t[#t+1] = name end + sort(t) + out:write("Type definitions:\n") + for _,name in ipairs(t) do + local tp = map_type[name] + local reg = tp.reg or "" + out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) + end + out:write("\n") +end + +------------------------------------------------------------------------------ + +-- Set the current section. +function _M.section(num) + waction("SECTION", num) + wflush(true) -- SECTION is a terminal action. +end + +------------------------------------------------------------------------------ + +-- Dump architecture description. +function _M.dumparch(out) + out:write(format("DynASM %s version %s, released %s\n\n", + _info.arch, _info.version, _info.release)) + dumpactions(out) +end + +-- Dump all user defined elements. +function _M.dumpdef(out, lvl) + dumptypes(out, lvl) + dumpglobals(out, lvl) + dumpexterns(out, lvl) +end + +------------------------------------------------------------------------------ + +-- Pass callbacks from/to the DynASM core. +function _M.passcb(wl, we, wf, ww) + wline, werror, wfatal, wwarn = wl, we, wf, ww + return wflush +end + +-- Setup the arch-specific module. +function _M.setup(arch, opt) + g_arch, g_opt = arch, opt +end + +-- Merge the core maps and the arch-specific maps. +function _M.mergemaps(map_coreop, map_def) + setmetatable(map_op, { __index = map_coreop }) + setmetatable(map_def, { __index = map_archdef }) + return map_op, map_def +end + +return _M + +------------------------------------------------------------------------------ + diff --git a/dynasm/dasm_mips.h b/dynasm/dasm_mips.h index 15b4b137..3e99a005 100644 --- a/dynasm/dasm_mips.h +++ b/dynasm/dasm_mips.h @@ -21,7 +21,7 @@ enum { /* The following actions need a buffer position. */ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, /* The following actions also have an argument. */ - DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, + DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS, DASM__MAX }; @@ -155,10 +155,10 @@ void dasm_setup(Dst_DECL, const void *actionlist) #ifdef DASM_CHECKS #define CK(x, st) \ do { if (!(x)) { \ - D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) + D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0) #define CKPL(kind, st) \ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ - D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) + D->status = DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0) #else #define CK(x, st) ((void)0) #define CKPL(kind, st) ((void)0) @@ -231,7 +231,7 @@ void dasm_put(Dst_DECL, int start, ...) *pl = -pos; /* Label exists now. */ b[pos++] = ofs; /* Store pass1 offset estimate. */ break; - case DASM_IMM: + case DASM_IMM: case DASM_IMMS: #ifdef DASM_CHECKS CK((n & ((1<<((ins>>10)&31))-1)) == 0, RANGE_I); #endif @@ -273,7 +273,7 @@ int dasm_link(Dst_DECL, size_t *szp) { /* Handle globals not defined in this translation unit. */ int idx; - for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { int n = D->lglabels[idx]; /* Undefined label: Collapse rel chain and replace with marker (< 0). */ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } @@ -299,7 +299,7 @@ int dasm_link(Dst_DECL, size_t *szp) case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; case DASM_REL_LG: case DASM_REL_PC: pos++; break; case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; - case DASM_IMM: pos++; break; + case DASM_IMM: case DASM_IMMS: pos++; break; } } stop: (void)0; @@ -314,7 +314,7 @@ int dasm_link(Dst_DECL, size_t *szp) #ifdef DASM_CHECKS #define CK(x, st) \ - do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) + do { if (!(x)) return DASM_S_##st|(int)(p-D->actionlist-1); } while (0) #else #define CK(x, st) ((void)0) #endif @@ -349,25 +349,32 @@ int dasm_encode(Dst_DECL, void *buffer) ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; break; case DASM_REL_LG: - CK(n >= 0, UNDEF_LG); + if (n < 0) { + n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp); + goto patchrel; + } /* fallthrough */ case DASM_REL_PC: CK(n >= 0, UNDEF_PC); n = *DASM_POS2PTR(D, n); if (ins & 2048) - n = n - (int)((char *)cp - base); + n = (n + (int)(size_t)base) & 0x0fffffff; else - n = (n + (int)base) & 0x0fffffff; - patchrel: + n = n - (int)((char *)cp - base); + patchrel: { + unsigned int e = 16 + ((ins >> 12) & 15); CK((n & 3) == 0 && - ((n + ((ins & 2048) ? 0x00020000 : 0)) >> - ((ins & 2048) ? 18 : 28)) == 0, RANGE_REL); - cp[-1] |= ((n>>2) & ((ins & 2048) ? 0x0000ffff: 0x03ffffff)); + ((n + ((ins & 2048) ? 0 : (1<<(e+1)))) >> (e+2)) == 0, RANGE_REL); + cp[-1] |= ((n>>2) & ((1<<e)-1)); + } break; case DASM_LABEL_LG: ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); break; case DASM_LABEL_PC: break; + case DASM_IMMS: + cp[-1] |= ((n>>3) & 4); n &= 0x1f; + /* fallthrough */ case DASM_IMM: cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); break; @@ -410,7 +417,7 @@ int dasm_checkstep(Dst_DECL, int secmatch) } if (D->status == DASM_S_OK && secmatch >= 0 && D->section != &D->sections[secmatch]) - D->status = DASM_S_MATCH_SEC|(D->section-D->sections); + D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections); return D->status; } #endif diff --git a/dynasm/dasm_mips.lua b/dynasm/dasm_mips.lua index dfa1f72e..6f893fe0 100644 --- a/dynasm/dasm_mips.lua +++ b/dynasm/dasm_mips.lua @@ -1,17 +1,20 @@ ------------------------------------------------------------------------------ --- DynASM MIPS module. +-- DynASM MIPS32/MIPS64 module. -- -- Copyright (C) 2005-2022 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. ------------------------------------------------------------------------------ +local mips64 = mips64 +local mipsr6 = _map_def.MIPSR6 + -- Module information: local _info = { - arch = "mips", - description = "DynASM MIPS module", - version = "1.3.0", - vernum = 10300, - release = "2012-01-23", + arch = mips64 and "mips64" or "mips", + description = "DynASM MIPS32/MIPS64 module", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", author = "Mike Pall", license = "MIT", } @@ -27,7 +30,8 @@ local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char local match, gmatch = _s.match, _s.gmatch local concat, sort = table.concat, table.sort local bit = bit or require("bit") -local band, shl, sar, tohex = bit.band, bit.lshift, bit.arshift, bit.tohex +local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift +local tohex = bit.tohex -- Inherited tables and callbacks. local g_opt, g_arch @@ -38,7 +42,7 @@ local wline, werror, wfatal, wwarn local action_names = { "STOP", "SECTION", "ESC", "REL_EXT", "ALIGN", "REL_LG", "LABEL_LG", - "REL_PC", "LABEL_PC", "IMM", + "REL_PC", "LABEL_PC", "IMM", "IMMS", } -- Maximum number of section buffer positions for dasm_put(). @@ -235,7 +239,6 @@ local map_op = { bne_3 = "14000000STB", blez_2 = "18000000SB", bgtz_2 = "1c000000SB", - addi_3 = "20000000TSI", li_2 = "24000000TI", addiu_3 = "24000000TSI", slti_3 = "28000000TSI", @@ -245,70 +248,52 @@ local map_op = { ori_3 = "34000000TSU", xori_3 = "38000000TSU", lui_2 = "3c000000TU", - beqzl_2 = "50000000SB", - beql_3 = "50000000STB", - bnezl_2 = "54000000SB", - bnel_3 = "54000000STB", - blezl_2 = "58000000SB", - bgtzl_2 = "5c000000SB", + daddiu_3 = mips64 and "64000000TSI", + ldl_2 = mips64 and "68000000TO", + ldr_2 = mips64 and "6c000000TO", lb_2 = "80000000TO", lh_2 = "84000000TO", - lwl_2 = "88000000TO", lw_2 = "8c000000TO", lbu_2 = "90000000TO", lhu_2 = "94000000TO", - lwr_2 = "98000000TO", + lwu_2 = mips64 and "9c000000TO", sb_2 = "a0000000TO", sh_2 = "a4000000TO", - swl_2 = "a8000000TO", sw_2 = "ac000000TO", - swr_2 = "b8000000TO", - cache_2 = "bc000000NO", - ll_2 = "c0000000TO", lwc1_2 = "c4000000HO", - pref_2 = "cc000000NO", ldc1_2 = "d4000000HO", - sc_2 = "e0000000TO", + ld_2 = mips64 and "dc000000TO", swc1_2 = "e4000000HO", sdc1_2 = "f4000000HO", + sd_2 = mips64 and "fc000000TO", -- Opcode SPECIAL. nop_0 = "00000000", sll_3 = "00000000DTA", - movf_2 = "00000001DS", - movf_3 = "00000001DSC", - movt_2 = "00010001DS", - movt_3 = "00010001DSC", + sextw_2 = "00000000DT", srl_3 = "00000002DTA", rotr_3 = "00200002DTA", sra_3 = "00000003DTA", sllv_3 = "00000004DTS", srlv_3 = "00000006DTS", rotrv_3 = "00000046DTS", + drotrv_3 = mips64 and "00000056DTS", srav_3 = "00000007DTS", - jr_1 = "00000008S", jalr_1 = "0000f809S", jalr_2 = "00000009DS", - movz_3 = "0000000aDST", - movn_3 = "0000000bDST", syscall_0 = "0000000c", syscall_1 = "0000000cY", break_0 = "0000000d", break_1 = "0000000dY", sync_0 = "0000000f", - mfhi_1 = "00000010D", - mthi_1 = "00000011S", - mflo_1 = "00000012D", - mtlo_1 = "00000013S", - mult_2 = "00000018ST", - multu_2 = "00000019ST", - div_2 = "0000001aST", - divu_2 = "0000001bST", + dsllv_3 = mips64 and "00000014DTS", + dsrlv_3 = mips64 and "00000016DTS", + dsrav_3 = mips64 and "00000017DTS", add_3 = "00000020DST", - move_2 = "00000021DS", + move_2 = mips64 and "00000025DS" or "00000021DS", addu_3 = "00000021DST", sub_3 = "00000022DST", - negu_2 = "00000023DT", + negu_2 = mips64 and "0000002fDT" or "00000023DT", subu_3 = "00000023DST", and_3 = "00000024DST", or_3 = "00000025DST", @@ -317,6 +302,10 @@ local map_op = { nor_3 = "00000027DST", slt_3 = "0000002aDST", sltu_3 = "0000002bDST", + dadd_3 = mips64 and "0000002cDST", + daddu_3 = mips64 and "0000002dDST", + dsub_3 = mips64 and "0000002eDST", + dsubu_3 = mips64 and "0000002fDST", tge_2 = "00000030ST", tge_3 = "00000030STZ", tgeu_2 = "00000031ST", @@ -329,40 +318,36 @@ local map_op = { teq_3 = "00000034STZ", tne_2 = "00000036ST", tne_3 = "00000036STZ", + dsll_3 = mips64 and "00000038DTa", + dsrl_3 = mips64 and "0000003aDTa", + drotr_3 = mips64 and "0020003aDTa", + dsra_3 = mips64 and "0000003bDTa", + dsll32_3 = mips64 and "0000003cDTA", + dsrl32_3 = mips64 and "0000003eDTA", + drotr32_3 = mips64 and "0020003eDTA", + dsra32_3 = mips64 and "0000003fDTA", -- Opcode REGIMM. bltz_2 = "04000000SB", bgez_2 = "04010000SB", bltzl_2 = "04020000SB", bgezl_2 = "04030000SB", - tgei_2 = "04080000SI", - tgeiu_2 = "04090000SI", - tlti_2 = "040a0000SI", - tltiu_2 = "040b0000SI", - teqi_2 = "040c0000SI", - tnei_2 = "040e0000SI", - bltzal_2 = "04100000SB", bal_1 = "04110000B", - bgezal_2 = "04110000SB", - bltzall_2 = "04120000SB", - bgezall_2 = "04130000SB", synci_1 = "041f0000O", - -- Opcode SPECIAL2. - madd_2 = "70000000ST", - maddu_2 = "70000001ST", - mul_3 = "70000002DST", - msub_2 = "70000004ST", - msubu_2 = "70000005ST", - clz_2 = "70000020DS=", - clo_2 = "70000021DS=", - sdbbp_0 = "7000003f", - sdbbp_1 = "7000003fY", - -- Opcode SPECIAL3. ext_4 = "7c000000TSAM", -- Note: last arg is msbd = size-1 + dextm_4 = mips64 and "7c000001TSAM", -- Args: pos | size-1-32 + dextu_4 = mips64 and "7c000002TSAM", -- Args: pos-32 | size-1 + dext_4 = mips64 and "7c000003TSAM", -- Args: pos | size-1 + zextw_2 = mips64 and "7c00f803TS", ins_4 = "7c000004TSAM", -- Note: last arg is msb = pos+size-1 + dinsm_4 = mips64 and "7c000005TSAM", -- Args: pos | pos+size-33 + dinsu_4 = mips64 and "7c000006TSAM", -- Args: pos-32 | pos+size-33 + dins_4 = mips64 and "7c000007TSAM", -- Args: pos | pos+size-1 wsbh_2 = "7c0000a0DT", + dsbh_2 = mips64 and "7c0000a4DT", + dshd_2 = mips64 and "7c000164DT", seb_2 = "7c000420DT", seh_2 = "7c000620DT", rdhwr_2 = "7c00003bTD", @@ -370,8 +355,12 @@ local map_op = { -- Opcode COP0. mfc0_2 = "40000000TD", mfc0_3 = "40000000TDW", + dmfc0_2 = mips64 and "40200000TD", + dmfc0_3 = mips64 and "40200000TDW", mtc0_2 = "40800000TD", mtc0_3 = "40800000TDW", + dmtc0_2 = mips64 and "40a00000TD", + dmtc0_3 = mips64 and "40a00000TDW", rdpgpr_2 = "41400000DT", di_0 = "41606000", di_1 = "41606000T", @@ -388,21 +377,14 @@ local map_op = { -- Opcode COP1. mfc1_2 = "44000000TG", + dmfc1_2 = mips64 and "44200000TG", cfc1_2 = "44400000TG", mfhc1_2 = "44600000TG", mtc1_2 = "44800000TG", + dmtc1_2 = mips64 and "44a00000TG", ctc1_2 = "44c00000TG", mthc1_2 = "44e00000TG", - bc1f_1 = "45000000B", - bc1f_2 = "45000000CB", - bc1t_1 = "45010000B", - bc1t_2 = "45010000CB", - bc1fl_1 = "45020000B", - bc1fl_2 = "45020000CB", - bc1tl_1 = "45030000B", - bc1tl_2 = "45030000CB", - ["add.s_3"] = "46000000FGH", ["sub.s_3"] = "46000001FGH", ["mul.s_3"] = "46000002FGH", @@ -419,51 +401,11 @@ local map_op = { ["trunc.w.s_2"] = "4600000dFG", ["ceil.w.s_2"] = "4600000eFG", ["floor.w.s_2"] = "4600000fFG", - ["movf.s_2"] = "46000011FG", - ["movf.s_3"] = "46000011FGC", - ["movt.s_2"] = "46010011FG", - ["movt.s_3"] = "46010011FGC", - ["movz.s_3"] = "46000012FGT", - ["movn.s_3"] = "46000013FGT", ["recip.s_2"] = "46000015FG", ["rsqrt.s_2"] = "46000016FG", ["cvt.d.s_2"] = "46000021FG", ["cvt.w.s_2"] = "46000024FG", ["cvt.l.s_2"] = "46000025FG", - ["cvt.ps.s_3"] = "46000026FGH", - ["c.f.s_2"] = "46000030GH", - ["c.f.s_3"] = "46000030VGH", - ["c.un.s_2"] = "46000031GH", - ["c.un.s_3"] = "46000031VGH", - ["c.eq.s_2"] = "46000032GH", - ["c.eq.s_3"] = "46000032VGH", - ["c.ueq.s_2"] = "46000033GH", - ["c.ueq.s_3"] = "46000033VGH", - ["c.olt.s_2"] = "46000034GH", - ["c.olt.s_3"] = "46000034VGH", - ["c.ult.s_2"] = "46000035GH", - ["c.ult.s_3"] = "46000035VGH", - ["c.ole.s_2"] = "46000036GH", - ["c.ole.s_3"] = "46000036VGH", - ["c.ule.s_2"] = "46000037GH", - ["c.ule.s_3"] = "46000037VGH", - ["c.sf.s_2"] = "46000038GH", - ["c.sf.s_3"] = "46000038VGH", - ["c.ngle.s_2"] = "46000039GH", - ["c.ngle.s_3"] = "46000039VGH", - ["c.seq.s_2"] = "4600003aGH", - ["c.seq.s_3"] = "4600003aVGH", - ["c.ngl.s_2"] = "4600003bGH", - ["c.ngl.s_3"] = "4600003bVGH", - ["c.lt.s_2"] = "4600003cGH", - ["c.lt.s_3"] = "4600003cVGH", - ["c.nge.s_2"] = "4600003dGH", - ["c.nge.s_3"] = "4600003dVGH", - ["c.le.s_2"] = "4600003eGH", - ["c.le.s_3"] = "4600003eVGH", - ["c.ngt.s_2"] = "4600003fGH", - ["c.ngt.s_3"] = "4600003fVGH", - ["add.d_3"] = "46200000FGH", ["sub.d_3"] = "46200001FGH", ["mul.d_3"] = "46200002FGH", @@ -480,130 +422,410 @@ local map_op = { ["trunc.w.d_2"] = "4620000dFG", ["ceil.w.d_2"] = "4620000eFG", ["floor.w.d_2"] = "4620000fFG", - ["movf.d_2"] = "46200011FG", - ["movf.d_3"] = "46200011FGC", - ["movt.d_2"] = "46210011FG", - ["movt.d_3"] = "46210011FGC", - ["movz.d_3"] = "46200012FGT", - ["movn.d_3"] = "46200013FGT", ["recip.d_2"] = "46200015FG", ["rsqrt.d_2"] = "46200016FG", ["cvt.s.d_2"] = "46200020FG", ["cvt.w.d_2"] = "46200024FG", ["cvt.l.d_2"] = "46200025FG", - ["c.f.d_2"] = "46200030GH", - ["c.f.d_3"] = "46200030VGH", - ["c.un.d_2"] = "46200031GH", - ["c.un.d_3"] = "46200031VGH", - ["c.eq.d_2"] = "46200032GH", - ["c.eq.d_3"] = "46200032VGH", - ["c.ueq.d_2"] = "46200033GH", - ["c.ueq.d_3"] = "46200033VGH", - ["c.olt.d_2"] = "46200034GH", - ["c.olt.d_3"] = "46200034VGH", - ["c.ult.d_2"] = "46200035GH", - ["c.ult.d_3"] = "46200035VGH", - ["c.ole.d_2"] = "46200036GH", - ["c.ole.d_3"] = "46200036VGH", - ["c.ule.d_2"] = "46200037GH", - ["c.ule.d_3"] = "46200037VGH", - ["c.sf.d_2"] = "46200038GH", - ["c.sf.d_3"] = "46200038VGH", - ["c.ngle.d_2"] = "46200039GH", - ["c.ngle.d_3"] = "46200039VGH", - ["c.seq.d_2"] = "4620003aGH", - ["c.seq.d_3"] = "4620003aVGH", - ["c.ngl.d_2"] = "4620003bGH", - ["c.ngl.d_3"] = "4620003bVGH", - ["c.lt.d_2"] = "4620003cGH", - ["c.lt.d_3"] = "4620003cVGH", - ["c.nge.d_2"] = "4620003dGH", - ["c.nge.d_3"] = "4620003dVGH", - ["c.le.d_2"] = "4620003eGH", - ["c.le.d_3"] = "4620003eVGH", - ["c.ngt.d_2"] = "4620003fGH", - ["c.ngt.d_3"] = "4620003fVGH", - - ["add.ps_3"] = "46c00000FGH", - ["sub.ps_3"] = "46c00001FGH", - ["mul.ps_3"] = "46c00002FGH", - ["abs.ps_2"] = "46c00005FG", - ["mov.ps_2"] = "46c00006FG", - ["neg.ps_2"] = "46c00007FG", - ["movf.ps_2"] = "46c00011FG", - ["movf.ps_3"] = "46c00011FGC", - ["movt.ps_2"] = "46c10011FG", - ["movt.ps_3"] = "46c10011FGC", - ["movz.ps_3"] = "46c00012FGT", - ["movn.ps_3"] = "46c00013FGT", - ["cvt.s.pu_2"] = "46c00020FG", - ["cvt.s.pl_2"] = "46c00028FG", - ["pll.ps_3"] = "46c0002cFGH", - ["plu.ps_3"] = "46c0002dFGH", - ["pul.ps_3"] = "46c0002eFGH", - ["puu.ps_3"] = "46c0002fFGH", - ["c.f.ps_2"] = "46c00030GH", - ["c.f.ps_3"] = "46c00030VGH", - ["c.un.ps_2"] = "46c00031GH", - ["c.un.ps_3"] = "46c00031VGH", - ["c.eq.ps_2"] = "46c00032GH", - ["c.eq.ps_3"] = "46c00032VGH", - ["c.ueq.ps_2"] = "46c00033GH", - ["c.ueq.ps_3"] = "46c00033VGH", - ["c.olt.ps_2"] = "46c00034GH", - ["c.olt.ps_3"] = "46c00034VGH", - ["c.ult.ps_2"] = "46c00035GH", - ["c.ult.ps_3"] = "46c00035VGH", - ["c.ole.ps_2"] = "46c00036GH", - ["c.ole.ps_3"] = "46c00036VGH", - ["c.ule.ps_2"] = "46c00037GH", - ["c.ule.ps_3"] = "46c00037VGH", - ["c.sf.ps_2"] = "46c00038GH", - ["c.sf.ps_3"] = "46c00038VGH", - ["c.ngle.ps_2"] = "46c00039GH", - ["c.ngle.ps_3"] = "46c00039VGH", - ["c.seq.ps_2"] = "46c0003aGH", - ["c.seq.ps_3"] = "46c0003aVGH", - ["c.ngl.ps_2"] = "46c0003bGH", - ["c.ngl.ps_3"] = "46c0003bVGH", - ["c.lt.ps_2"] = "46c0003cGH", - ["c.lt.ps_3"] = "46c0003cVGH", - ["c.nge.ps_2"] = "46c0003dGH", - ["c.nge.ps_3"] = "46c0003dVGH", - ["c.le.ps_2"] = "46c0003eGH", - ["c.le.ps_3"] = "46c0003eVGH", - ["c.ngt.ps_2"] = "46c0003fGH", - ["c.ngt.ps_3"] = "46c0003fVGH", - ["cvt.s.w_2"] = "46800020FG", ["cvt.d.w_2"] = "46800021FG", - ["cvt.s.l_2"] = "46a00020FG", ["cvt.d.l_2"] = "46a00021FG", - - -- Opcode COP1X. - lwxc1_2 = "4c000000FX", - ldxc1_2 = "4c000001FX", - luxc1_2 = "4c000005FX", - swxc1_2 = "4c000008FX", - sdxc1_2 = "4c000009FX", - suxc1_2 = "4c00000dFX", - prefx_2 = "4c00000fMX", - ["alnv.ps_4"] = "4c00001eFGHS", - ["madd.s_4"] = "4c000020FRGH", - ["madd.d_4"] = "4c000021FRGH", - ["madd.ps_4"] = "4c000026FRGH", - ["msub.s_4"] = "4c000028FRGH", - ["msub.d_4"] = "4c000029FRGH", - ["msub.ps_4"] = "4c00002eFRGH", - ["nmadd.s_4"] = "4c000030FRGH", - ["nmadd.d_4"] = "4c000031FRGH", - ["nmadd.ps_4"] = "4c000036FRGH", - ["nmsub.s_4"] = "4c000038FRGH", - ["nmsub.d_4"] = "4c000039FRGH", - ["nmsub.ps_4"] = "4c00003eFRGH", } +if mipsr6 then -- Instructions added with MIPSR6. + + for k,v in pairs({ + + -- Add immediate to upper bits. + aui_3 = "3c000000TSI", + daui_3 = mips64 and "74000000TSI", + dahi_2 = mips64 and "04060000SI", + dati_2 = mips64 and "041e0000SI", + + -- TODO: addiupc, auipc, aluipc, lwpc, lwupc, ldpc. + + -- Compact branches. + blezalc_2 = "18000000TB", -- rt != 0. + bgezalc_2 = "18000000T=SB", -- rt != 0. + bgtzalc_2 = "1c000000TB", -- rt != 0. + bltzalc_2 = "1c000000T=SB", -- rt != 0. + + blezc_2 = "58000000TB", -- rt != 0. + bgezc_2 = "58000000T=SB", -- rt != 0. + bgec_3 = "58000000STB", -- rs != rt. + blec_3 = "58000000TSB", -- rt != rs. + + bgtzc_2 = "5c000000TB", -- rt != 0. + bltzc_2 = "5c000000T=SB", -- rt != 0. + bltc_3 = "5c000000STB", -- rs != rt. + bgtc_3 = "5c000000TSB", -- rt != rs. + + bgeuc_3 = "18000000STB", -- rs != rt. + bleuc_3 = "18000000TSB", -- rt != rs. + bltuc_3 = "1c000000STB", -- rs != rt. + bgtuc_3 = "1c000000TSB", -- rt != rs. + + beqzalc_2 = "20000000TB", -- rt != 0. + bnezalc_2 = "60000000TB", -- rt != 0. + beqc_3 = "20000000STB", -- rs < rt. + bnec_3 = "60000000STB", -- rs < rt. + bovc_3 = "20000000STB", -- rs >= rt. + bnvc_3 = "60000000STB", -- rs >= rt. + + beqzc_2 = "d8000000SK", -- rs != 0. + bnezc_2 = "f8000000SK", -- rs != 0. + jic_2 = "d8000000TI", + jialc_2 = "f8000000TI", + bc_1 = "c8000000L", + balc_1 = "e8000000L", + + -- Opcode SPECIAL. + jr_1 = "00000009S", + sdbbp_0 = "0000000e", + sdbbp_1 = "0000000eY", + lsa_4 = "00000005DSTA", + dlsa_4 = mips64 and "00000015DSTA", + seleqz_3 = "00000035DST", + selnez_3 = "00000037DST", + clz_2 = "00000050DS", + clo_2 = "00000051DS", + dclz_2 = mips64 and "00000052DS", + dclo_2 = mips64 and "00000053DS", + mul_3 = "00000098DST", + muh_3 = "000000d8DST", + mulu_3 = "00000099DST", + muhu_3 = "000000d9DST", + div_3 = "0000009aDST", + mod_3 = "000000daDST", + divu_3 = "0000009bDST", + modu_3 = "000000dbDST", + dmul_3 = mips64 and "0000009cDST", + dmuh_3 = mips64 and "000000dcDST", + dmulu_3 = mips64 and "0000009dDST", + dmuhu_3 = mips64 and "000000ddDST", + ddiv_3 = mips64 and "0000009eDST", + dmod_3 = mips64 and "000000deDST", + ddivu_3 = mips64 and "0000009fDST", + dmodu_3 = mips64 and "000000dfDST", + + -- Opcode SPECIAL3. + align_4 = "7c000220DSTA", + dalign_4 = mips64 and "7c000224DSTA", + bitswap_2 = "7c000020DT", + dbitswap_2 = mips64 and "7c000024DT", + + -- Opcode COP1. + bc1eqz_2 = "45200000HB", + bc1nez_2 = "45a00000HB", + + ["sel.s_3"] = "46000010FGH", + ["seleqz.s_3"] = "46000014FGH", + ["selnez.s_3"] = "46000017FGH", + ["maddf.s_3"] = "46000018FGH", + ["msubf.s_3"] = "46000019FGH", + ["rint.s_2"] = "4600001aFG", + ["class.s_2"] = "4600001bFG", + ["min.s_3"] = "4600001cFGH", + ["mina.s_3"] = "4600001dFGH", + ["max.s_3"] = "4600001eFGH", + ["maxa.s_3"] = "4600001fFGH", + ["cmp.af.s_3"] = "46800000FGH", + ["cmp.un.s_3"] = "46800001FGH", + ["cmp.or.s_3"] = "46800011FGH", + ["cmp.eq.s_3"] = "46800002FGH", + ["cmp.une.s_3"] = "46800012FGH", + ["cmp.ueq.s_3"] = "46800003FGH", + ["cmp.ne.s_3"] = "46800013FGH", + ["cmp.lt.s_3"] = "46800004FGH", + ["cmp.ult.s_3"] = "46800005FGH", + ["cmp.le.s_3"] = "46800006FGH", + ["cmp.ule.s_3"] = "46800007FGH", + ["cmp.saf.s_3"] = "46800008FGH", + ["cmp.sun.s_3"] = "46800009FGH", + ["cmp.sor.s_3"] = "46800019FGH", + ["cmp.seq.s_3"] = "4680000aFGH", + ["cmp.sune.s_3"] = "4680001aFGH", + ["cmp.sueq.s_3"] = "4680000bFGH", + ["cmp.sne.s_3"] = "4680001bFGH", + ["cmp.slt.s_3"] = "4680000cFGH", + ["cmp.sult.s_3"] = "4680000dFGH", + ["cmp.sle.s_3"] = "4680000eFGH", + ["cmp.sule.s_3"] = "4680000fFGH", + + ["sel.d_3"] = "46200010FGH", + ["seleqz.d_3"] = "46200014FGH", + ["selnez.d_3"] = "46200017FGH", + ["maddf.d_3"] = "46200018FGH", + ["msubf.d_3"] = "46200019FGH", + ["rint.d_2"] = "4620001aFG", + ["class.d_2"] = "4620001bFG", + ["min.d_3"] = "4620001cFGH", + ["mina.d_3"] = "4620001dFGH", + ["max.d_3"] = "4620001eFGH", + ["maxa.d_3"] = "4620001fFGH", + ["cmp.af.d_3"] = "46a00000FGH", + ["cmp.un.d_3"] = "46a00001FGH", + ["cmp.or.d_3"] = "46a00011FGH", + ["cmp.eq.d_3"] = "46a00002FGH", + ["cmp.une.d_3"] = "46a00012FGH", + ["cmp.ueq.d_3"] = "46a00003FGH", + ["cmp.ne.d_3"] = "46a00013FGH", + ["cmp.lt.d_3"] = "46a00004FGH", + ["cmp.ult.d_3"] = "46a00005FGH", + ["cmp.le.d_3"] = "46a00006FGH", + ["cmp.ule.d_3"] = "46a00007FGH", + ["cmp.saf.d_3"] = "46a00008FGH", + ["cmp.sun.d_3"] = "46a00009FGH", + ["cmp.sor.d_3"] = "46a00019FGH", + ["cmp.seq.d_3"] = "46a0000aFGH", + ["cmp.sune.d_3"] = "46a0001aFGH", + ["cmp.sueq.d_3"] = "46a0000bFGH", + ["cmp.sne.d_3"] = "46a0001bFGH", + ["cmp.slt.d_3"] = "46a0000cFGH", + ["cmp.sult.d_3"] = "46a0000dFGH", + ["cmp.sle.d_3"] = "46a0000eFGH", + ["cmp.sule.d_3"] = "46a0000fFGH", + + }) do map_op[k] = v end + +else -- Instructions removed by MIPSR6. + + for k,v in pairs({ + -- Traps, don't use. + addi_3 = "20000000TSI", + daddi_3 = mips64 and "60000000TSI", + + -- Branch on likely, don't use. + beqzl_2 = "50000000SB", + beql_3 = "50000000STB", + bnezl_2 = "54000000SB", + bnel_3 = "54000000STB", + blezl_2 = "58000000SB", + bgtzl_2 = "5c000000SB", + + lwl_2 = "88000000TO", + lwr_2 = "98000000TO", + swl_2 = "a8000000TO", + sdl_2 = mips64 and "b0000000TO", + sdr_2 = mips64 and "b1000000TO", + swr_2 = "b8000000TO", + cache_2 = "bc000000NO", + ll_2 = "c0000000TO", + pref_2 = "cc000000NO", + sc_2 = "e0000000TO", + scd_2 = mips64 and "f0000000TO", + + -- Opcode SPECIAL. + movf_2 = "00000001DS", + movf_3 = "00000001DSC", + movt_2 = "00010001DS", + movt_3 = "00010001DSC", + jr_1 = "00000008S", + movz_3 = "0000000aDST", + movn_3 = "0000000bDST", + mfhi_1 = "00000010D", + mthi_1 = "00000011S", + mflo_1 = "00000012D", + mtlo_1 = "00000013S", + mult_2 = "00000018ST", + multu_2 = "00000019ST", + div_3 = "0000001aST", + divu_3 = "0000001bST", + ddiv_3 = mips64 and "0000001eST", + ddivu_3 = mips64 and "0000001fST", + dmult_2 = mips64 and "0000001cST", + dmultu_2 = mips64 and "0000001dST", + + -- Opcode REGIMM. + tgei_2 = "04080000SI", + tgeiu_2 = "04090000SI", + tlti_2 = "040a0000SI", + tltiu_2 = "040b0000SI", + teqi_2 = "040c0000SI", + tnei_2 = "040e0000SI", + bltzal_2 = "04100000SB", + bgezal_2 = "04110000SB", + bltzall_2 = "04120000SB", + bgezall_2 = "04130000SB", + + -- Opcode SPECIAL2. + madd_2 = "70000000ST", + maddu_2 = "70000001ST", + mul_3 = "70000002DST", + msub_2 = "70000004ST", + msubu_2 = "70000005ST", + clz_2 = "70000020D=TS", + clo_2 = "70000021D=TS", + dclz_2 = mips64 and "70000024D=TS", + dclo_2 = mips64 and "70000025D=TS", + sdbbp_0 = "7000003f", + sdbbp_1 = "7000003fY", + + -- Opcode COP1. + bc1f_1 = "45000000B", + bc1f_2 = "45000000CB", + bc1t_1 = "45010000B", + bc1t_2 = "45010000CB", + bc1fl_1 = "45020000B", + bc1fl_2 = "45020000CB", + bc1tl_1 = "45030000B", + bc1tl_2 = "45030000CB", + + ["movf.s_2"] = "46000011FG", + ["movf.s_3"] = "46000011FGC", + ["movt.s_2"] = "46010011FG", + ["movt.s_3"] = "46010011FGC", + ["movz.s_3"] = "46000012FGT", + ["movn.s_3"] = "46000013FGT", + ["cvt.ps.s_3"] = "46000026FGH", + ["c.f.s_2"] = "46000030GH", + ["c.f.s_3"] = "46000030VGH", + ["c.un.s_2"] = "46000031GH", + ["c.un.s_3"] = "46000031VGH", + ["c.eq.s_2"] = "46000032GH", + ["c.eq.s_3"] = "46000032VGH", + ["c.ueq.s_2"] = "46000033GH", + ["c.ueq.s_3"] = "46000033VGH", + ["c.olt.s_2"] = "46000034GH", + ["c.olt.s_3"] = "46000034VGH", + ["c.ult.s_2"] = "46000035GH", + ["c.ult.s_3"] = "46000035VGH", + ["c.ole.s_2"] = "46000036GH", + ["c.ole.s_3"] = "46000036VGH", + ["c.ule.s_2"] = "46000037GH", + ["c.ule.s_3"] = "46000037VGH", + ["c.sf.s_2"] = "46000038GH", + ["c.sf.s_3"] = "46000038VGH", + ["c.ngle.s_2"] = "46000039GH", + ["c.ngle.s_3"] = "46000039VGH", + ["c.seq.s_2"] = "4600003aGH", + ["c.seq.s_3"] = "4600003aVGH", + ["c.ngl.s_2"] = "4600003bGH", + ["c.ngl.s_3"] = "4600003bVGH", + ["c.lt.s_2"] = "4600003cGH", + ["c.lt.s_3"] = "4600003cVGH", + ["c.nge.s_2"] = "4600003dGH", + ["c.nge.s_3"] = "4600003dVGH", + ["c.le.s_2"] = "4600003eGH", + ["c.le.s_3"] = "4600003eVGH", + ["c.ngt.s_2"] = "4600003fGH", + ["c.ngt.s_3"] = "4600003fVGH", + ["movf.d_2"] = "46200011FG", + ["movf.d_3"] = "46200011FGC", + ["movt.d_2"] = "46210011FG", + ["movt.d_3"] = "46210011FGC", + ["movz.d_3"] = "46200012FGT", + ["movn.d_3"] = "46200013FGT", + ["c.f.d_2"] = "46200030GH", + ["c.f.d_3"] = "46200030VGH", + ["c.un.d_2"] = "46200031GH", + ["c.un.d_3"] = "46200031VGH", + ["c.eq.d_2"] = "46200032GH", + ["c.eq.d_3"] = "46200032VGH", + ["c.ueq.d_2"] = "46200033GH", + ["c.ueq.d_3"] = "46200033VGH", + ["c.olt.d_2"] = "46200034GH", + ["c.olt.d_3"] = "46200034VGH", + ["c.ult.d_2"] = "46200035GH", + ["c.ult.d_3"] = "46200035VGH", + ["c.ole.d_2"] = "46200036GH", + ["c.ole.d_3"] = "46200036VGH", + ["c.ule.d_2"] = "46200037GH", + ["c.ule.d_3"] = "46200037VGH", + ["c.sf.d_2"] = "46200038GH", + ["c.sf.d_3"] = "46200038VGH", + ["c.ngle.d_2"] = "46200039GH", + ["c.ngle.d_3"] = "46200039VGH", + ["c.seq.d_2"] = "4620003aGH", + ["c.seq.d_3"] = "4620003aVGH", + ["c.ngl.d_2"] = "4620003bGH", + ["c.ngl.d_3"] = "4620003bVGH", + ["c.lt.d_2"] = "4620003cGH", + ["c.lt.d_3"] = "4620003cVGH", + ["c.nge.d_2"] = "4620003dGH", + ["c.nge.d_3"] = "4620003dVGH", + ["c.le.d_2"] = "4620003eGH", + ["c.le.d_3"] = "4620003eVGH", + ["c.ngt.d_2"] = "4620003fGH", + ["c.ngt.d_3"] = "4620003fVGH", + ["add.ps_3"] = "46c00000FGH", + ["sub.ps_3"] = "46c00001FGH", + ["mul.ps_3"] = "46c00002FGH", + ["abs.ps_2"] = "46c00005FG", + ["mov.ps_2"] = "46c00006FG", + ["neg.ps_2"] = "46c00007FG", + ["movf.ps_2"] = "46c00011FG", + ["movf.ps_3"] = "46c00011FGC", + ["movt.ps_2"] = "46c10011FG", + ["movt.ps_3"] = "46c10011FGC", + ["movz.ps_3"] = "46c00012FGT", + ["movn.ps_3"] = "46c00013FGT", + ["cvt.s.pu_2"] = "46c00020FG", + ["cvt.s.pl_2"] = "46c00028FG", + ["pll.ps_3"] = "46c0002cFGH", + ["plu.ps_3"] = "46c0002dFGH", + ["pul.ps_3"] = "46c0002eFGH", + ["puu.ps_3"] = "46c0002fFGH", + ["c.f.ps_2"] = "46c00030GH", + ["c.f.ps_3"] = "46c00030VGH", + ["c.un.ps_2"] = "46c00031GH", + ["c.un.ps_3"] = "46c00031VGH", + ["c.eq.ps_2"] = "46c00032GH", + ["c.eq.ps_3"] = "46c00032VGH", + ["c.ueq.ps_2"] = "46c00033GH", + ["c.ueq.ps_3"] = "46c00033VGH", + ["c.olt.ps_2"] = "46c00034GH", + ["c.olt.ps_3"] = "46c00034VGH", + ["c.ult.ps_2"] = "46c00035GH", + ["c.ult.ps_3"] = "46c00035VGH", + ["c.ole.ps_2"] = "46c00036GH", + ["c.ole.ps_3"] = "46c00036VGH", + ["c.ule.ps_2"] = "46c00037GH", + ["c.ule.ps_3"] = "46c00037VGH", + ["c.sf.ps_2"] = "46c00038GH", + ["c.sf.ps_3"] = "46c00038VGH", + ["c.ngle.ps_2"] = "46c00039GH", + ["c.ngle.ps_3"] = "46c00039VGH", + ["c.seq.ps_2"] = "46c0003aGH", + ["c.seq.ps_3"] = "46c0003aVGH", + ["c.ngl.ps_2"] = "46c0003bGH", + ["c.ngl.ps_3"] = "46c0003bVGH", + ["c.lt.ps_2"] = "46c0003cGH", + ["c.lt.ps_3"] = "46c0003cVGH", + ["c.nge.ps_2"] = "46c0003dGH", + ["c.nge.ps_3"] = "46c0003dVGH", + ["c.le.ps_2"] = "46c0003eGH", + ["c.le.ps_3"] = "46c0003eVGH", + ["c.ngt.ps_2"] = "46c0003fGH", + ["c.ngt.ps_3"] = "46c0003fVGH", + + -- Opcode COP1X. + lwxc1_2 = "4c000000FX", + ldxc1_2 = "4c000001FX", + luxc1_2 = "4c000005FX", + swxc1_2 = "4c000008FX", + sdxc1_2 = "4c000009FX", + suxc1_2 = "4c00000dFX", + prefx_2 = "4c00000fMX", + ["alnv.ps_4"] = "4c00001eFGHS", + ["madd.s_4"] = "4c000020FRGH", + ["madd.d_4"] = "4c000021FRGH", + ["madd.ps_4"] = "4c000026FRGH", + ["msub.s_4"] = "4c000028FRGH", + ["msub.d_4"] = "4c000029FRGH", + ["msub.ps_4"] = "4c00002eFRGH", + ["nmadd.s_4"] = "4c000030FRGH", + ["nmadd.d_4"] = "4c000031FRGH", + ["nmadd.ps_4"] = "4c000036FRGH", + ["nmsub.s_4"] = "4c000038FRGH", + ["nmsub.d_4"] = "4c000039FRGH", + ["nmsub.ps_4"] = "4c00003eFRGH", + + }) do map_op[k] = v end + +end + ------------------------------------------------------------------------------ local function parse_gpr(expr) @@ -633,7 +855,7 @@ local function parse_fpr(expr) werror("bad register name `"..expr.."'") end -local function parse_imm(imm, bits, shift, scale, signed) +local function parse_imm(imm, bits, shift, scale, signed, action) local n = tonumber(imm) if n then local m = sar(n, scale) @@ -651,7 +873,8 @@ local function parse_imm(imm, bits, shift, scale, signed) match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then werror("expected immediate operand, got register") else - waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) + waction(action or "IMM", + (signed and 32768 or 0)+shl(scale, 10)+shl(bits, 5)+shift, imm) return 0 end end @@ -756,13 +979,18 @@ map_op[".template__"] = function(params, template, nparams) op = op + parse_disp(params[n]); n = n + 1 elseif p == "X" then op = op + parse_index(params[n]); n = n + 1 - elseif p == "B" or p == "J" then + elseif p == "B" or p == "J" or p == "K" or p == "L" then local mode, m, s = parse_label(params[n], false) - if p == "B" then m = m + 2048 end + if p == "J" then m = m + 0xa800 + elseif p == "K" then m = m + 0x5000 + elseif p == "L" then m = m + 0xa000 end waction("REL_"..mode, m, s, 1) n = n + 1 elseif p == "A" then op = op + parse_imm(params[n], 5, 6, 0, false); n = n + 1 + elseif p == "a" then + local m = parse_imm(params[n], 6, 6, 0, false, "IMMS"); n = n + 1 + op = op + band(m, 0x7c0) + band(shr(m, 9), 4) elseif p == "M" then op = op + parse_imm(params[n], 5, 11, 0, false); n = n + 1 elseif p == "N" then @@ -778,7 +1006,7 @@ map_op[".template__"] = function(params, template, nparams) elseif p == "Z" then op = op + parse_imm(params[n], 10, 6, 0, false); n = n + 1 elseif p == "=" then - op = op + shl(band(op, 0xf800), 5) -- Copy D to T for clz, clo. + n = n - 1 -- Re-use previous parameter for next template char. else assert(false) end diff --git a/dynasm/dasm_mips64.lua b/dynasm/dasm_mips64.lua new file mode 100644 index 00000000..b4f8707d --- /dev/null +++ b/dynasm/dasm_mips64.lua @@ -0,0 +1,12 @@ +------------------------------------------------------------------------------ +-- DynASM MIPS64 module. +-- +-- Copyright (C) 2005-2022 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ +-- This module just sets 64 bit mode for the combined MIPS/MIPS64 module. +-- All the interesting stuff is there. +------------------------------------------------------------------------------ + +mips64 = true -- Using a global is an ugly, but effective solution. +return require("dasm_mips") diff --git a/dynasm/dasm_ppc.h b/dynasm/dasm_ppc.h index 4ea6729f..fdb89bce 100644 --- a/dynasm/dasm_ppc.h +++ b/dynasm/dasm_ppc.h @@ -1,5 +1,5 @@ /* -** DynASM PPC encoding engine. +** DynASM PPC/PPC64 encoding engine. ** Copyright (C) 2005-2022 Mike Pall. All rights reserved. ** Released under the MIT license. See dynasm.lua for full copyright notice. */ @@ -21,7 +21,7 @@ enum { /* The following actions need a buffer position. */ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, /* The following actions also have an argument. */ - DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, + DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMSH, DASM__MAX }; @@ -244,6 +244,10 @@ void dasm_put(Dst_DECL, int start, ...) #endif b[pos++] = n; break; + case DASM_IMMSH: + CK((n >> 6) == 0, RANGE_I); + b[pos++] = n; + break; } } } @@ -273,7 +277,7 @@ int dasm_link(Dst_DECL, size_t *szp) { /* Handle globals not defined in this translation unit. */ int idx; - for (idx = 20; idx*sizeof(int) < D->lgsize; idx++) { + for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { int n = D->lglabels[idx]; /* Undefined label: Collapse rel chain and replace with marker (< 0). */ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } @@ -299,7 +303,7 @@ int dasm_link(Dst_DECL, size_t *szp) case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; case DASM_REL_LG: case DASM_REL_PC: pos++; break; case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; - case DASM_IMM: pos++; break; + case DASM_IMM: case DASM_IMMSH: pos++; break; } } stop: (void)0; @@ -349,7 +353,10 @@ int dasm_encode(Dst_DECL, void *buffer) ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; break; case DASM_REL_LG: - CK(n >= 0, UNDEF_LG); + if (n < 0) { + n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp); + goto patchrel; + } /* fallthrough */ case DASM_REL_PC: CK(n >= 0, UNDEF_PC); @@ -367,6 +374,9 @@ int dasm_encode(Dst_DECL, void *buffer) case DASM_IMM: cp[-1] |= (n & ((1<<((ins>>5)&31))-1)) << (ins&31); break; + case DASM_IMMSH: + cp[-1] |= (ins & 1) ? ((n&31)<<11)|((n&32)>>4) : ((n&31)<<6)|(n&32); + break; default: *cp++ = ins; break; } } diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua index c05d6573..3624e882 100644 --- a/dynasm/dasm_ppc.lua +++ b/dynasm/dasm_ppc.lua @@ -1,17 +1,19 @@ ------------------------------------------------------------------------------ --- DynASM PPC module. +-- DynASM PPC/PPC64 module. -- -- Copyright (C) 2005-2022 Mike Pall. All rights reserved. -- See dynasm.lua for full copyright notice. +-- +-- Support for various extensions contributed by Caio Souza Oliveira. ------------------------------------------------------------------------------ -- Module information: local _info = { arch = "ppc", description = "DynASM PPC module", - version = "1.3.0", - vernum = 10300, - release = "2011-05-05", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", author = "Mike Pall", license = "MIT", } @@ -39,7 +41,7 @@ local wline, werror, wfatal, wwarn local action_names = { "STOP", "SECTION", "ESC", "REL_EXT", "ALIGN", "REL_LG", "LABEL_LG", - "REL_PC", "LABEL_PC", "IMM", + "REL_PC", "LABEL_PC", "IMM", "IMMSH" } -- Maximum number of section buffer positions for dasm_put(). @@ -228,8 +230,18 @@ local map_cond = { ------------------------------------------------------------------------------ +local map_op, op_template + +local function op_alias(opname, f) + return function(params, nparams) + if not params then return "-> "..opname:sub(1, -3) end + f(params, nparams) + op_template(params, map_op[opname], nparams) + end +end + -- Template strings for PPC instructions. -local map_op = { +map_op = { tdi_3 = "08000000ARI", twi_3 = "0c000000ARI", mulli_3 = "1c000000RRI", @@ -297,6 +309,250 @@ local map_op = { std_2 = "f8000000RD", stdu_2 = "f8000001RD", + subi_3 = op_alias("addi_3", function(p) p[3] = "-("..p[3]..")" end), + subis_3 = op_alias("addis_3", function(p) p[3] = "-("..p[3]..")" end), + subic_3 = op_alias("addic_3", function(p) p[3] = "-("..p[3]..")" end), + ["subic._3"] = op_alias("addic._3", function(p) p[3] = "-("..p[3]..")" end), + + rotlwi_3 = op_alias("rlwinm_5", function(p) + p[4] = "0"; p[5] = "31" + end), + rotrwi_3 = op_alias("rlwinm_5", function(p) + p[3] = "32-("..p[3]..")"; p[4] = "0"; p[5] = "31" + end), + rotlw_3 = op_alias("rlwnm_5", function(p) + p[4] = "0"; p[5] = "31" + end), + slwi_3 = op_alias("rlwinm_5", function(p) + p[5] = "31-("..p[3]..")"; p[4] = "0" + end), + srwi_3 = op_alias("rlwinm_5", function(p) + p[4] = p[3]; p[3] = "32-("..p[3]..")"; p[5] = "31" + end), + clrlwi_3 = op_alias("rlwinm_5", function(p) + p[4] = p[3]; p[3] = "0"; p[5] = "31" + end), + clrrwi_3 = op_alias("rlwinm_5", function(p) + p[5] = "31-("..p[3]..")"; p[3] = "0"; p[4] = "0" + end), + + -- Primary opcode 4: + mulhhwu_3 = "10000010RRR.", + machhwu_3 = "10000018RRR.", + mulhhw_3 = "10000050RRR.", + nmachhw_3 = "1000005cRRR.", + machhwsu_3 = "10000098RRR.", + machhws_3 = "100000d8RRR.", + nmachhws_3 = "100000dcRRR.", + mulchwu_3 = "10000110RRR.", + macchwu_3 = "10000118RRR.", + mulchw_3 = "10000150RRR.", + macchw_3 = "10000158RRR.", + nmacchw_3 = "1000015cRRR.", + macchwsu_3 = "10000198RRR.", + macchws_3 = "100001d8RRR.", + nmacchws_3 = "100001dcRRR.", + mullhw_3 = "10000350RRR.", + maclhw_3 = "10000358RRR.", + nmaclhw_3 = "1000035cRRR.", + maclhwsu_3 = "10000398RRR.", + maclhws_3 = "100003d8RRR.", + nmaclhws_3 = "100003dcRRR.", + machhwuo_3 = "10000418RRR.", + nmachhwo_3 = "1000045cRRR.", + machhwsuo_3 = "10000498RRR.", + machhwso_3 = "100004d8RRR.", + nmachhwso_3 = "100004dcRRR.", + macchwuo_3 = "10000518RRR.", + macchwo_3 = "10000558RRR.", + nmacchwo_3 = "1000055cRRR.", + macchwsuo_3 = "10000598RRR.", + macchwso_3 = "100005d8RRR.", + nmacchwso_3 = "100005dcRRR.", + maclhwo_3 = "10000758RRR.", + nmaclhwo_3 = "1000075cRRR.", + maclhwsuo_3 = "10000798RRR.", + maclhwso_3 = "100007d8RRR.", + nmaclhwso_3 = "100007dcRRR.", + + vaddubm_3 = "10000000VVV", + vmaxub_3 = "10000002VVV", + vrlb_3 = "10000004VVV", + vcmpequb_3 = "10000006VVV", + vmuloub_3 = "10000008VVV", + vaddfp_3 = "1000000aVVV", + vmrghb_3 = "1000000cVVV", + vpkuhum_3 = "1000000eVVV", + vmhaddshs_4 = "10000020VVVV", + vmhraddshs_4 = "10000021VVVV", + vmladduhm_4 = "10000022VVVV", + vmsumubm_4 = "10000024VVVV", + vmsummbm_4 = "10000025VVVV", + vmsumuhm_4 = "10000026VVVV", + vmsumuhs_4 = "10000027VVVV", + vmsumshm_4 = "10000028VVVV", + vmsumshs_4 = "10000029VVVV", + vsel_4 = "1000002aVVVV", + vperm_4 = "1000002bVVVV", + vsldoi_4 = "1000002cVVVP", + vpermxor_4 = "1000002dVVVV", + vmaddfp_4 = "1000002eVVVV~", + vnmsubfp_4 = "1000002fVVVV~", + vaddeuqm_4 = "1000003cVVVV", + vaddecuq_4 = "1000003dVVVV", + vsubeuqm_4 = "1000003eVVVV", + vsubecuq_4 = "1000003fVVVV", + vadduhm_3 = "10000040VVV", + vmaxuh_3 = "10000042VVV", + vrlh_3 = "10000044VVV", + vcmpequh_3 = "10000046VVV", + vmulouh_3 = "10000048VVV", + vsubfp_3 = "1000004aVVV", + vmrghh_3 = "1000004cVVV", + vpkuwum_3 = "1000004eVVV", + vadduwm_3 = "10000080VVV", + vmaxuw_3 = "10000082VVV", + vrlw_3 = "10000084VVV", + vcmpequw_3 = "10000086VVV", + vmulouw_3 = "10000088VVV", + vmuluwm_3 = "10000089VVV", + vmrghw_3 = "1000008cVVV", + vpkuhus_3 = "1000008eVVV", + vaddudm_3 = "100000c0VVV", + vmaxud_3 = "100000c2VVV", + vrld_3 = "100000c4VVV", + vcmpeqfp_3 = "100000c6VVV", + vcmpequd_3 = "100000c7VVV", + vpkuwus_3 = "100000ceVVV", + vadduqm_3 = "10000100VVV", + vmaxsb_3 = "10000102VVV", + vslb_3 = "10000104VVV", + vmulosb_3 = "10000108VVV", + vrefp_2 = "1000010aV-V", + vmrglb_3 = "1000010cVVV", + vpkshus_3 = "1000010eVVV", + vaddcuq_3 = "10000140VVV", + vmaxsh_3 = "10000142VVV", + vslh_3 = "10000144VVV", + vmulosh_3 = "10000148VVV", + vrsqrtefp_2 = "1000014aV-V", + vmrglh_3 = "1000014cVVV", + vpkswus_3 = "1000014eVVV", + vaddcuw_3 = "10000180VVV", + vmaxsw_3 = "10000182VVV", + vslw_3 = "10000184VVV", + vmulosw_3 = "10000188VVV", + vexptefp_2 = "1000018aV-V", + vmrglw_3 = "1000018cVVV", + vpkshss_3 = "1000018eVVV", + vmaxsd_3 = "100001c2VVV", + vsl_3 = "100001c4VVV", + vcmpgefp_3 = "100001c6VVV", + vlogefp_2 = "100001caV-V", + vpkswss_3 = "100001ceVVV", + vadduhs_3 = "10000240VVV", + vminuh_3 = "10000242VVV", + vsrh_3 = "10000244VVV", + vcmpgtuh_3 = "10000246VVV", + vmuleuh_3 = "10000248VVV", + vrfiz_2 = "1000024aV-V", + vsplth_3 = "1000024cVV3", + vupkhsh_2 = "1000024eV-V", + vminuw_3 = "10000282VVV", + vminud_3 = "100002c2VVV", + vcmpgtud_3 = "100002c7VVV", + vrfim_2 = "100002caV-V", + vcmpgtsb_3 = "10000306VVV", + vcfux_3 = "1000030aVVA~", + vaddshs_3 = "10000340VVV", + vminsh_3 = "10000342VVV", + vsrah_3 = "10000344VVV", + vcmpgtsh_3 = "10000346VVV", + vmulesh_3 = "10000348VVV", + vcfsx_3 = "1000034aVVA~", + vspltish_2 = "1000034cVS", + vupkhpx_2 = "1000034eV-V", + vaddsws_3 = "10000380VVV", + vminsw_3 = "10000382VVV", + vsraw_3 = "10000384VVV", + vcmpgtsw_3 = "10000386VVV", + vmulesw_3 = "10000388VVV", + vctuxs_3 = "1000038aVVA~", + vspltisw_2 = "1000038cVS", + vminsd_3 = "100003c2VVV", + vsrad_3 = "100003c4VVV", + vcmpbfp_3 = "100003c6VVV", + vcmpgtsd_3 = "100003c7VVV", + vctsxs_3 = "100003caVVA~", + vupklpx_2 = "100003ceV-V", + vsububm_3 = "10000400VVV", + ["bcdadd._4"] = "10000401VVVy.", + vavgub_3 = "10000402VVV", + vand_3 = "10000404VVV", + ["vcmpequb._3"] = "10000406VVV", + vmaxfp_3 = "1000040aVVV", + vsubuhm_3 = "10000440VVV", + ["bcdsub._4"] = "10000441VVVy.", + vavguh_3 = "10000442VVV", + vandc_3 = "10000444VVV", + ["vcmpequh._3"] = "10000446VVV", + vminfp_3 = "1000044aVVV", + vpkudum_3 = "1000044eVVV", + vsubuwm_3 = "10000480VVV", + vavguw_3 = "10000482VVV", + vor_3 = "10000484VVV", + ["vcmpequw._3"] = "10000486VVV", + vpmsumw_3 = "10000488VVV", + ["vcmpeqfp._3"] = "100004c6VVV", + ["vcmpequd._3"] = "100004c7VVV", + vpkudus_3 = "100004ceVVV", + vavgsb_3 = "10000502VVV", + vavgsh_3 = "10000542VVV", + vorc_3 = "10000544VVV", + vbpermq_3 = "1000054cVVV", + vpksdus_3 = "1000054eVVV", + vavgsw_3 = "10000582VVV", + vsld_3 = "100005c4VVV", + ["vcmpgefp._3"] = "100005c6VVV", + vpksdss_3 = "100005ceVVV", + vsububs_3 = "10000600VVV", + mfvscr_1 = "10000604V--", + vsum4ubs_3 = "10000608VVV", + vsubuhs_3 = "10000640VVV", + mtvscr_1 = "10000644--V", + ["vcmpgtuh._3"] = "10000646VVV", + vsum4shs_3 = "10000648VVV", + vupkhsw_2 = "1000064eV-V", + vsubuws_3 = "10000680VVV", + vshasigmaw_4 = "10000682VVYp", + veqv_3 = "10000684VVV", + vsum2sws_3 = "10000688VVV", + vmrgow_3 = "1000068cVVV", + vshasigmad_4 = "100006c2VVYp", + vsrd_3 = "100006c4VVV", + ["vcmpgtud._3"] = "100006c7VVV", + vupklsw_2 = "100006ceV-V", + vupkslw_2 = "100006ceV-V", + vsubsbs_3 = "10000700VVV", + vclzb_2 = "10000702V-V", + vpopcntb_2 = "10000703V-V", + ["vcmpgtsb._3"] = "10000706VVV", + vsum4sbs_3 = "10000708VVV", + vsubshs_3 = "10000740VVV", + vclzh_2 = "10000742V-V", + vpopcnth_2 = "10000743V-V", + ["vcmpgtsh._3"] = "10000746VVV", + vsubsws_3 = "10000780VVV", + vclzw_2 = "10000782V-V", + vpopcntw_2 = "10000783V-V", + ["vcmpgtsw._3"] = "10000786VVV", + vsumsws_3 = "10000788VVV", + vmrgew_3 = "1000078cVVV", + vclzd_2 = "100007c2V-V", + vpopcntd_2 = "100007c3V-V", + ["vcmpbfp._3"] = "100007c6VVV", + ["vcmpgtsd._3"] = "100007c7VVV", + -- Primary opcode 19: mcrf_2 = "4c000000XX", isync_0 = "4c00012c", @@ -316,6 +572,8 @@ local map_op = { bclrl_2 = "4c000021AA", bcctr_2 = "4c000420AA", bcctrl_2 = "4c000421AA", + bctar_2 = "4c000460AA", + bctarl_2 = "4c000461AA", blr_0 = "4e800020", blrl_0 = "4e800021", bctr_0 = "4e800420", @@ -327,6 +585,7 @@ local map_op = { cmpd_3 = "7c200000XRR", cmpd_2 = "7c200000-RR", tw_3 = "7c000008ARR", + lvsl_3 = "7c00000cVRR", subfc_3 = "7c000010RRR.", subc_3 = "7c000010RRR~.", mulhdu_3 = "7c000012RRR.", @@ -351,50 +610,68 @@ local map_op = { cmplw_2 = "7c000040-RR", cmpld_3 = "7c200040XRR", cmpld_2 = "7c200040-RR", + lvsr_3 = "7c00004cVRR", subf_3 = "7c000050RRR.", sub_3 = "7c000050RRR~.", + lbarx_3 = "7c000068RR0R", ldux_3 = "7c00006aRR0R", dcbst_2 = "7c00006c-RR", lwzux_3 = "7c00006eRR0R", cntlzd_2 = "7c000074RR~", andc_3 = "7c000078RR~R.", td_3 = "7c000088ARR", + lvewx_3 = "7c00008eVRR", mulhd_3 = "7c000092RRR.", + addg6s_3 = "7c000094RRR", mulhw_3 = "7c000096RRR.", + dlmzb_3 = "7c00009cRR~R.", ldarx_3 = "7c0000a8RR0R", dcbf_2 = "7c0000ac-RR", lbzx_3 = "7c0000aeRR0R", + lvx_3 = "7c0000ceVRR", neg_2 = "7c0000d0RR.", + lharx_3 = "7c0000e8RR0R", lbzux_3 = "7c0000eeRR0R", popcntb_2 = "7c0000f4RR~", not_2 = "7c0000f8RR~%.", nor_3 = "7c0000f8RR~R.", + stvebx_3 = "7c00010eVRR", subfe_3 = "7c000110RRR.", sube_3 = "7c000110RRR~.", adde_3 = "7c000114RRR.", stdx_3 = "7c00012aRR0R", - stwcx_3 = "7c00012cRR0R.", + ["stwcx._3"] = "7c00012dRR0R.", stwx_3 = "7c00012eRR0R", prtyw_2 = "7c000134RR~", + stvehx_3 = "7c00014eVRR", stdux_3 = "7c00016aRR0R", + ["stqcx._3"] = "7c00016dR:R0R.", stwux_3 = "7c00016eRR0R", prtyd_2 = "7c000174RR~", + stvewx_3 = "7c00018eVRR", subfze_2 = "7c000190RR.", addze_2 = "7c000194RR.", - stdcx_3 = "7c0001acRR0R.", + ["stdcx._3"] = "7c0001adRR0R.", stbx_3 = "7c0001aeRR0R", + stvx_3 = "7c0001ceVRR", subfme_2 = "7c0001d0RR.", mulld_3 = "7c0001d2RRR.", addme_2 = "7c0001d4RR.", mullw_3 = "7c0001d6RRR.", dcbtst_2 = "7c0001ec-RR", stbux_3 = "7c0001eeRR0R", + bpermd_3 = "7c0001f8RR~R", + lvepxl_3 = "7c00020eVRR", add_3 = "7c000214RRR.", + lqarx_3 = "7c000228R:R0R", dcbt_2 = "7c00022c-RR", lhzx_3 = "7c00022eRR0R", + cdtbcd_2 = "7c000234RR~", eqv_3 = "7c000238RR~R.", + lvepx_3 = "7c00024eVRR", eciwx_3 = "7c00026cRR0R", lhzux_3 = "7c00026eRR0R", + cbcdtd_2 = "7c000274RR~", xor_3 = "7c000278RR~R.", mfspefscr_1 = "7c0082a6R", mfxer_1 = "7c0102a6R", @@ -404,8 +681,12 @@ local map_op = { lhax_3 = "7c0002aeRR0R", mftb_1 = "7c0c42e6R", mftbu_1 = "7c0d42e6R", + lvxl_3 = "7c0002ceVRR", lwaux_3 = "7c0002eaRR0R", lhaux_3 = "7c0002eeRR0R", + popcntw_2 = "7c0002f4RR~", + divdeu_3 = "7c000312RRR.", + divweu_3 = "7c000316RRR.", sthx_3 = "7c00032eRR0R", orc_3 = "7c000338RR~R.", ecowx_3 = "7c00036cRR0R", @@ -420,10 +701,14 @@ local map_op = { mtctr_1 = "7c0903a6R", dcbi_2 = "7c0003ac-RR", nand_3 = "7c0003b8RR~R.", + dsn_2 = "7c0003c6-RR", + stvxl_3 = "7c0003ceVRR", divd_3 = "7c0003d2RRR.", divw_3 = "7c0003d6RRR.", + popcntd_2 = "7c0003f4RR~", cmpb_3 = "7c0003f8RR~R.", mcrxr_1 = "7c000400X", + lbdx_3 = "7c000406RRR", subfco_3 = "7c000410RRR.", subco_3 = "7c000410RRR~.", addco_3 = "7c000414RRR.", @@ -433,16 +718,20 @@ local map_op = { lfsx_3 = "7c00042eFR0R", srw_3 = "7c000430RR~R.", srd_3 = "7c000436RR~R.", + lhdx_3 = "7c000446RRR", subfo_3 = "7c000450RRR.", subo_3 = "7c000450RRR~.", lfsux_3 = "7c00046eFR0R", + lwdx_3 = "7c000486RRR", lswi_3 = "7c0004aaRR0A", sync_0 = "7c0004ac", lwsync_0 = "7c2004ac", ptesync_0 = "7c4004ac", lfdx_3 = "7c0004aeFR0R", + lddx_3 = "7c0004c6RRR", nego_2 = "7c0004d0RR.", lfdux_3 = "7c0004eeFR0R", + stbdx_3 = "7c000506RRR", subfeo_3 = "7c000510RRR.", subeo_3 = "7c000510RRR~.", addeo_3 = "7c000514RRR.", @@ -450,27 +739,42 @@ local map_op = { stswx_3 = "7c00052aRR0R", stwbrx_3 = "7c00052cRR0R", stfsx_3 = "7c00052eFR0R", + sthdx_3 = "7c000546RRR", + ["stbcx._3"] = "7c00056dRRR", stfsux_3 = "7c00056eFR0R", + stwdx_3 = "7c000586RRR", subfzeo_2 = "7c000590RR.", addzeo_2 = "7c000594RR.", stswi_3 = "7c0005aaRR0A", + ["sthcx._3"] = "7c0005adRRR", stfdx_3 = "7c0005aeFR0R", + stddx_3 = "7c0005c6RRR", subfmeo_2 = "7c0005d0RR.", mulldo_3 = "7c0005d2RRR.", addmeo_2 = "7c0005d4RR.", mullwo_3 = "7c0005d6RRR.", dcba_2 = "7c0005ec-RR", stfdux_3 = "7c0005eeFR0R", + stvepxl_3 = "7c00060eVRR", addo_3 = "7c000614RRR.", lhbrx_3 = "7c00062cRR0R", + lfdpx_3 = "7c00062eF:RR", sraw_3 = "7c000630RR~R.", srad_3 = "7c000634RR~R.", + lfddx_3 = "7c000646FRR", + stvepx_3 = "7c00064eVRR", srawi_3 = "7c000670RR~A.", sradi_3 = "7c000674RR~H.", eieio_0 = "7c0006ac", lfiwax_3 = "7c0006aeFR0R", + divdeuo_3 = "7c000712RRR.", + divweuo_3 = "7c000716RRR.", sthbrx_3 = "7c00072cRR0R", + stfdpx_3 = "7c00072eF:RR", extsh_2 = "7c000734RR~.", + stfddx_3 = "7c000746FRR", + divdeo_3 = "7c000752RRR.", + divweo_3 = "7c000756RRR.", extsb_2 = "7c000774RR~.", divduo_3 = "7c000792RRR.", divwou_3 = "7c000796RRR.", @@ -481,6 +785,40 @@ local map_op = { divwo_3 = "7c0007d6RRR.", dcbz_2 = "7c0007ec-RR", + ["tbegin._1"] = "7c00051d1", + ["tbegin._0"] = "7c00051d", + ["tend._1"] = "7c00055dY", + ["tend._0"] = "7c00055d", + ["tendall._0"] = "7e00055d", + tcheck_1 = "7c00059cX", + ["tsr._1"] = "7c0005dd1", + ["tsuspend._0"] = "7c0005dd", + ["tresume._0"] = "7c2005dd", + ["tabortwc._3"] = "7c00061dARR", + ["tabortdc._3"] = "7c00065dARR", + ["tabortwci._3"] = "7c00069dARS", + ["tabortdci._3"] = "7c0006ddARS", + ["tabort._1"] = "7c00071d-R-", + ["treclaim._1"] = "7c00075d-R", + ["trechkpt._0"] = "7c0007dd", + + lxsiwzx_3 = "7c000018QRR", + lxsiwax_3 = "7c000098QRR", + mfvsrd_2 = "7c000066-Rq", + mfvsrwz_2 = "7c0000e6-Rq", + stxsiwx_3 = "7c000118QRR", + mtvsrd_2 = "7c000166QR", + mtvsrwa_2 = "7c0001a6QR", + lxvdsx_3 = "7c000298QRR", + lxsspx_3 = "7c000418QRR", + lxsdx_3 = "7c000498QRR", + stxsspx_3 = "7c000518QRR", + stxsdx_3 = "7c000598QRR", + lxvw4x_3 = "7c000618QRR", + lxvd2x_3 = "7c000698QRR", + stxvw4x_3 = "7c000718QRR", + stxvd2x_3 = "7c000798QRR", + -- Primary opcode 30: rldicl_4 = "78000000RR~HM.", rldicr_4 = "78000004RR~HM.", @@ -489,6 +827,34 @@ local map_op = { rldcl_4 = "78000010RR~RM.", rldcr_4 = "78000012RR~RM.", + rotldi_3 = op_alias("rldicl_4", function(p) + p[4] = "0" + end), + rotrdi_3 = op_alias("rldicl_4", function(p) + p[3] = "64-("..p[3]..")"; p[4] = "0" + end), + rotld_3 = op_alias("rldcl_4", function(p) + p[4] = "0" + end), + sldi_3 = op_alias("rldicr_4", function(p) + p[4] = "63-("..p[3]..")" + end), + srdi_3 = op_alias("rldicl_4", function(p) + p[4] = p[3]; p[3] = "64-("..p[3]..")" + end), + clrldi_3 = op_alias("rldicl_4", function(p) + p[4] = p[3]; p[3] = "0" + end), + clrrdi_3 = op_alias("rldicr_4", function(p) + p[4] = "63-("..p[3]..")"; p[3] = "0" + end), + + -- Primary opcode 56: + lq_2 = "e0000000R:D", -- NYI: displacement must be divisible by 8. + + -- Primary opcode 57: + lfdp_2 = "e4000000F:D", -- NYI: displacement must be divisible by 4. + -- Primary opcode 59: fdivs_3 = "ec000024FFF.", fsubs_3 = "ec000028FFF.", @@ -501,6 +867,200 @@ local map_op = { fmadds_4 = "ec00003aFFFF~.", fnmsubs_4 = "ec00003cFFFF~.", fnmadds_4 = "ec00003eFFFF~.", + fcfids_2 = "ec00069cF-F.", + fcfidus_2 = "ec00079cF-F.", + + dadd_3 = "ec000004FFF.", + dqua_4 = "ec000006FFFZ.", + dmul_3 = "ec000044FFF.", + drrnd_4 = "ec000046FFFZ.", + dscli_3 = "ec000084FF6.", + dquai_4 = "ec000086SF~FZ.", + dscri_3 = "ec0000c4FF6.", + drintx_4 = "ec0000c61F~FZ.", + dcmpo_3 = "ec000104XFF", + dtstex_3 = "ec000144XFF", + dtstdc_3 = "ec000184XF6", + dtstdg_3 = "ec0001c4XF6", + drintn_4 = "ec0001c61F~FZ.", + dctdp_2 = "ec000204F-F.", + dctfix_2 = "ec000244F-F.", + ddedpd_3 = "ec000284ZF~F.", + dxex_2 = "ec0002c4F-F.", + dsub_3 = "ec000404FFF.", + ddiv_3 = "ec000444FFF.", + dcmpu_3 = "ec000504XFF", + dtstsf_3 = "ec000544XFF", + drsp_2 = "ec000604F-F.", + dcffix_2 = "ec000644F-F.", + denbcd_3 = "ec000684YF~F.", + diex_3 = "ec0006c4FFF.", + + -- Primary opcode 60: + xsaddsp_3 = "f0000000QQQ", + xsmaddasp_3 = "f0000008QQQ", + xxsldwi_4 = "f0000010QQQz", + xsrsqrtesp_2 = "f0000028Q-Q", + xssqrtsp_2 = "f000002cQ-Q", + xxsel_4 = "f0000030QQQQ", + xssubsp_3 = "f0000040QQQ", + xsmaddmsp_3 = "f0000048QQQ", + xxpermdi_4 = "f0000050QQQz", + xsresp_2 = "f0000068Q-Q", + xsmulsp_3 = "f0000080QQQ", + xsmsubasp_3 = "f0000088QQQ", + xxmrghw_3 = "f0000090QQQ", + xsdivsp_3 = "f00000c0QQQ", + xsmsubmsp_3 = "f00000c8QQQ", + xsadddp_3 = "f0000100QQQ", + xsmaddadp_3 = "f0000108QQQ", + xscmpudp_3 = "f0000118XQQ", + xscvdpuxws_2 = "f0000120Q-Q", + xsrdpi_2 = "f0000124Q-Q", + xsrsqrtedp_2 = "f0000128Q-Q", + xssqrtdp_2 = "f000012cQ-Q", + xssubdp_3 = "f0000140QQQ", + xsmaddmdp_3 = "f0000148QQQ", + xscmpodp_3 = "f0000158XQQ", + xscvdpsxws_2 = "f0000160Q-Q", + xsrdpiz_2 = "f0000164Q-Q", + xsredp_2 = "f0000168Q-Q", + xsmuldp_3 = "f0000180QQQ", + xsmsubadp_3 = "f0000188QQQ", + xxmrglw_3 = "f0000190QQQ", + xsrdpip_2 = "f00001a4Q-Q", + xstsqrtdp_2 = "f00001a8X-Q", + xsrdpic_2 = "f00001acQ-Q", + xsdivdp_3 = "f00001c0QQQ", + xsmsubmdp_3 = "f00001c8QQQ", + xsrdpim_2 = "f00001e4Q-Q", + xstdivdp_3 = "f00001e8XQQ", + xvaddsp_3 = "f0000200QQQ", + xvmaddasp_3 = "f0000208QQQ", + xvcmpeqsp_3 = "f0000218QQQ", + xvcvspuxws_2 = "f0000220Q-Q", + xvrspi_2 = "f0000224Q-Q", + xvrsqrtesp_2 = "f0000228Q-Q", + xvsqrtsp_2 = "f000022cQ-Q", + xvsubsp_3 = "f0000240QQQ", + xvmaddmsp_3 = "f0000248QQQ", + xvcmpgtsp_3 = "f0000258QQQ", + xvcvspsxws_2 = "f0000260Q-Q", + xvrspiz_2 = "f0000264Q-Q", + xvresp_2 = "f0000268Q-Q", + xvmulsp_3 = "f0000280QQQ", + xvmsubasp_3 = "f0000288QQQ", + xxspltw_3 = "f0000290QQg~", + xvcmpgesp_3 = "f0000298QQQ", + xvcvuxwsp_2 = "f00002a0Q-Q", + xvrspip_2 = "f00002a4Q-Q", + xvtsqrtsp_2 = "f00002a8X-Q", + xvrspic_2 = "f00002acQ-Q", + xvdivsp_3 = "f00002c0QQQ", + xvmsubmsp_3 = "f00002c8QQQ", + xvcvsxwsp_2 = "f00002e0Q-Q", + xvrspim_2 = "f00002e4Q-Q", + xvtdivsp_3 = "f00002e8XQQ", + xvadddp_3 = "f0000300QQQ", + xvmaddadp_3 = "f0000308QQQ", + xvcmpeqdp_3 = "f0000318QQQ", + xvcvdpuxws_2 = "f0000320Q-Q", + xvrdpi_2 = "f0000324Q-Q", + xvrsqrtedp_2 = "f0000328Q-Q", + xvsqrtdp_2 = "f000032cQ-Q", + xvsubdp_3 = "f0000340QQQ", + xvmaddmdp_3 = "f0000348QQQ", + xvcmpgtdp_3 = "f0000358QQQ", + xvcvdpsxws_2 = "f0000360Q-Q", + xvrdpiz_2 = "f0000364Q-Q", + xvredp_2 = "f0000368Q-Q", + xvmuldp_3 = "f0000380QQQ", + xvmsubadp_3 = "f0000388QQQ", + xvcmpgedp_3 = "f0000398QQQ", + xvcvuxwdp_2 = "f00003a0Q-Q", + xvrdpip_2 = "f00003a4Q-Q", + xvtsqrtdp_2 = "f00003a8X-Q", + xvrdpic_2 = "f00003acQ-Q", + xvdivdp_3 = "f00003c0QQQ", + xvmsubmdp_3 = "f00003c8QQQ", + xvcvsxwdp_2 = "f00003e0Q-Q", + xvrdpim_2 = "f00003e4Q-Q", + xvtdivdp_3 = "f00003e8XQQ", + xsnmaddasp_3 = "f0000408QQQ", + xxland_3 = "f0000410QQQ", + xscvdpsp_2 = "f0000424Q-Q", + xscvdpspn_2 = "f000042cQ-Q", + xsnmaddmsp_3 = "f0000448QQQ", + xxlandc_3 = "f0000450QQQ", + xsrsp_2 = "f0000464Q-Q", + xsnmsubasp_3 = "f0000488QQQ", + xxlor_3 = "f0000490QQQ", + xscvuxdsp_2 = "f00004a0Q-Q", + xsnmsubmsp_3 = "f00004c8QQQ", + xxlxor_3 = "f00004d0QQQ", + xscvsxdsp_2 = "f00004e0Q-Q", + xsmaxdp_3 = "f0000500QQQ", + xsnmaddadp_3 = "f0000508QQQ", + xxlnor_3 = "f0000510QQQ", + xscvdpuxds_2 = "f0000520Q-Q", + xscvspdp_2 = "f0000524Q-Q", + xscvspdpn_2 = "f000052cQ-Q", + xsmindp_3 = "f0000540QQQ", + xsnmaddmdp_3 = "f0000548QQQ", + xxlorc_3 = "f0000550QQQ", + xscvdpsxds_2 = "f0000560Q-Q", + xsabsdp_2 = "f0000564Q-Q", + xscpsgndp_3 = "f0000580QQQ", + xsnmsubadp_3 = "f0000588QQQ", + xxlnand_3 = "f0000590QQQ", + xscvuxddp_2 = "f00005a0Q-Q", + xsnabsdp_2 = "f00005a4Q-Q", + xsnmsubmdp_3 = "f00005c8QQQ", + xxleqv_3 = "f00005d0QQQ", + xscvsxddp_2 = "f00005e0Q-Q", + xsnegdp_2 = "f00005e4Q-Q", + xvmaxsp_3 = "f0000600QQQ", + xvnmaddasp_3 = "f0000608QQQ", + ["xvcmpeqsp._3"] = "f0000618QQQ", + xvcvspuxds_2 = "f0000620Q-Q", + xvcvdpsp_2 = "f0000624Q-Q", + xvminsp_3 = "f0000640QQQ", + xvnmaddmsp_3 = "f0000648QQQ", + ["xvcmpgtsp._3"] = "f0000658QQQ", + xvcvspsxds_2 = "f0000660Q-Q", + xvabssp_2 = "f0000664Q-Q", + xvcpsgnsp_3 = "f0000680QQQ", + xvnmsubasp_3 = "f0000688QQQ", + ["xvcmpgesp._3"] = "f0000698QQQ", + xvcvuxdsp_2 = "f00006a0Q-Q", + xvnabssp_2 = "f00006a4Q-Q", + xvnmsubmsp_3 = "f00006c8QQQ", + xvcvsxdsp_2 = "f00006e0Q-Q", + xvnegsp_2 = "f00006e4Q-Q", + xvmaxdp_3 = "f0000700QQQ", + xvnmaddadp_3 = "f0000708QQQ", + ["xvcmpeqdp._3"] = "f0000718QQQ", + xvcvdpuxds_2 = "f0000720Q-Q", + xvcvspdp_2 = "f0000724Q-Q", + xvmindp_3 = "f0000740QQQ", + xvnmaddmdp_3 = "f0000748QQQ", + ["xvcmpgtdp._3"] = "f0000758QQQ", + xvcvdpsxds_2 = "f0000760Q-Q", + xvabsdp_2 = "f0000764Q-Q", + xvcpsgndp_3 = "f0000780QQQ", + xvnmsubadp_3 = "f0000788QQQ", + ["xvcmpgedp._3"] = "f0000798QQQ", + xvcvuxddp_2 = "f00007a0Q-Q", + xvnabsdp_2 = "f00007a4Q-Q", + xvnmsubmdp_3 = "f00007c8QQQ", + xvcvsxddp_2 = "f00007e0Q-Q", + xvnegdp_2 = "f00007e4Q-Q", + + -- Primary opcode 61: + stfdp_2 = "f4000000F:D", -- NYI: displacement must be divisible by 4. + + -- Primary opcode 62: + stq_2 = "f8000002R:D", -- NYI: displacement must be divisible by 8. -- Primary opcode 63: fdiv_3 = "fc000024FFF.", @@ -526,8 +1086,12 @@ local map_op = { frsp_2 = "fc000018F-F.", fctiw_2 = "fc00001cF-F.", fctiwz_2 = "fc00001eF-F.", + ftdiv_2 = "fc000100X-F.", + fctiwu_2 = "fc00011cF-F.", + fctiwuz_2 = "fc00011eF-F.", mtfsfi_2 = "fc00010cAA", -- NYI: upshift. fnabs_2 = "fc000110F-F.", + ftsqrt_2 = "fc000140X-F.", fabs_2 = "fc000210F-F.", frin_2 = "fc000310F-F.", friz_2 = "fc000350F-F.", @@ -537,7 +1101,38 @@ local map_op = { -- NYI: mtfsf, mtfsb0, mtfsb1. fctid_2 = "fc00065cF-F.", fctidz_2 = "fc00065eF-F.", + fmrgow_3 = "fc00068cFFF", fcfid_2 = "fc00069cF-F.", + fctidu_2 = "fc00075cF-F.", + fctiduz_2 = "fc00075eF-F.", + fmrgew_3 = "fc00078cFFF", + fcfidu_2 = "fc00079cF-F.", + + daddq_3 = "fc000004F:F:F:.", + dquaq_4 = "fc000006F:F:F:Z.", + dmulq_3 = "fc000044F:F:F:.", + drrndq_4 = "fc000046F:F:F:Z.", + dscliq_3 = "fc000084F:F:6.", + dquaiq_4 = "fc000086SF:~F:Z.", + dscriq_3 = "fc0000c4F:F:6.", + drintxq_4 = "fc0000c61F:~F:Z.", + dcmpoq_3 = "fc000104XF:F:", + dtstexq_3 = "fc000144XF:F:", + dtstdcq_3 = "fc000184XF:6", + dtstdgq_3 = "fc0001c4XF:6", + drintnq_4 = "fc0001c61F:~F:Z.", + dctqpq_2 = "fc000204F:-F:.", + dctfixq_2 = "fc000244F:-F:.", + ddedpdq_3 = "fc000284ZF:~F:.", + dxexq_2 = "fc0002c4F:-F:.", + dsubq_3 = "fc000404F:F:F:.", + ddivq_3 = "fc000444F:F:F:.", + dcmpuq_3 = "fc000504XF:F:", + dtstsfq_3 = "fc000544XF:F:", + drdpq_2 = "fc000604F:-F:.", + dcffixq_2 = "fc000644F:-F:.", + denbcdq_3 = "fc000684YF:~F:.", + diexq_3 = "fc0006c4F:FF:.", -- Primary opcode 4, SPE APU extension: evaddw_3 = "10000200RRR", @@ -822,7 +1417,7 @@ local map_op = { do local t = {} for k,v in pairs(map_op) do - if sub(v, -1) == "." then + if type(v) == "string" and sub(v, -1) == "." then local v2 = sub(v, 1, 7)..char(byte(v, 8)+1)..sub(v, 9, -2) t[sub(k, 1, -3).."."..sub(k, -2)] = v2 end @@ -884,6 +1479,24 @@ local function parse_fpr(expr) werror("bad register name `"..expr.."'") end +local function parse_vr(expr) + local r = match(expr, "^v([1-3]?[0-9])$") + if r then + r = tonumber(r) + if r <= 31 then return r end + end + werror("bad register name `"..expr.."'") +end + +local function parse_vs(expr) + local r = match(expr, "^vs([1-6]?[0-9])$") + if r then + r = tonumber(r) + if r <= 63 then return r end + end + werror("bad register name `"..expr.."'") +end + local function parse_cr(expr) local r = match(expr, "^cr([0-7])$") if r then return tonumber(r) end @@ -900,8 +1513,30 @@ local function parse_cond(expr) werror("bad condition bit name `"..expr.."'") end +local parse_ctx = {} + +local loadenv = setfenv and function(s) + local code = loadstring(s, "") + if code then setfenv(code, parse_ctx) end + return code +end or function(s) + return load(s, "", nil, parse_ctx) +end + +-- Try to parse simple arithmetic, too, since some basic ops are aliases. +local function parse_number(n) + local x = tonumber(n) + if x then return x end + local code = loadenv("return "..n) + if code then + local ok, y = pcall(code) + if ok then return y end + end + return nil +end + local function parse_imm(imm, bits, shift, scale, signed) - local n = tonumber(imm) + local n = parse_number(imm) if n then local m = sar(n, scale) if shl(m, scale) == n then @@ -914,7 +1549,8 @@ local function parse_imm(imm, bits, shift, scale, signed) end end werror("out of range immediate `"..imm.."'") - elseif match(imm, "^r([1-3]?[0-9])$") or + elseif match(imm, "^[rfv]([1-3]?[0-9])$") or + match(imm, "^vs([1-6]?[0-9])$") or match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then werror("expected immediate operand, got register") else @@ -924,11 +1560,11 @@ local function parse_imm(imm, bits, shift, scale, signed) end local function parse_shiftmask(imm, isshift) - local n = tonumber(imm) + local n = parse_number(imm) if n then if shr(n, 6) == 0 then - local lsb = band(imm, 31) - local msb = imm - lsb + local lsb = band(n, 31) + local msb = n - lsb return isshift and (shl(lsb, 11)+shr(msb, 4)) or (shl(lsb, 6)+msb) end werror("out of range immediate `"..imm.."'") @@ -936,7 +1572,8 @@ local function parse_shiftmask(imm, isshift) match(imm, "^([%w_]+):(r[1-3]?[0-9])$") then werror("expected immediate operand, got register") else - werror("NYI: parameterized 64 bit shift/mask") + waction("IMMSH", isshift and 1 or 0, imm) + return 0; end end @@ -1011,7 +1648,7 @@ end ------------------------------------------------------------------------------ -- Handle opcodes defined with template strings. -map_op[".template__"] = function(params, template, nparams) +op_template = function(params, template, nparams) if not params then return sub(template, 9) end local op = tonumber(sub(template, 1, 8), 16) local n, rs = 1, 26 @@ -1027,6 +1664,15 @@ map_op[".template__"] = function(params, template, nparams) rs = rs - 5; op = op + shl(parse_gpr(params[n]), rs); n = n + 1 elseif p == "F" then rs = rs - 5; op = op + shl(parse_fpr(params[n]), rs); n = n + 1 + elseif p == "V" then + rs = rs - 5; op = op + shl(parse_vr(params[n]), rs); n = n + 1 + elseif p == "Q" then + local vs = parse_vs(params[n]); n = n + 1; rs = rs - 5 + local sh = rs == 6 and 2 or 3 + band(shr(rs, 1), 3) + op = op + shl(band(vs, 31), rs) + shr(band(vs, 32), sh) + elseif p == "q" then + local vs = parse_vs(params[n]); n = n + 1 + op = op + shl(band(vs, 31), 21) + shr(band(vs, 32), 5) elseif p == "A" then rs = rs - 5; op = op + parse_imm(params[n], 5, rs, 0, false); n = n + 1 elseif p == "S" then @@ -1047,6 +1693,26 @@ map_op[".template__"] = function(params, template, nparams) rs = rs - 5; op = op + shl(parse_cond(params[n]), rs); n = n + 1 elseif p == "X" then rs = rs - 5; op = op + shl(parse_cr(params[n]), rs+2); n = n + 1 + elseif p == "1" then + rs = rs - 5; op = op + parse_imm(params[n], 1, rs, 0, false); n = n + 1 + elseif p == "g" then + rs = rs - 5; op = op + parse_imm(params[n], 2, rs, 0, false); n = n + 1 + elseif p == "3" then + rs = rs - 5; op = op + parse_imm(params[n], 3, rs, 0, false); n = n + 1 + elseif p == "P" then + rs = rs - 5; op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1 + elseif p == "p" then + op = op + parse_imm(params[n], 4, rs, 0, false); n = n + 1 + elseif p == "6" then + rs = rs - 6; op = op + parse_imm(params[n], 6, rs, 0, false); n = n + 1 + elseif p == "Y" then + rs = rs - 5; op = op + parse_imm(params[n], 1, rs+4, 0, false); n = n + 1 + elseif p == "y" then + rs = rs - 5; op = op + parse_imm(params[n], 1, rs+3, 0, false); n = n + 1 + elseif p == "Z" then + rs = rs - 5; op = op + parse_imm(params[n], 2, rs+3, 0, false); n = n + 1 + elseif p == "z" then + rs = rs - 5; op = op + parse_imm(params[n], 2, rs+2, 0, false); n = n + 1 elseif p == "W" then op = op + parse_cr(params[n]); n = n + 1 elseif p == "G" then @@ -1071,6 +1737,8 @@ map_op[".template__"] = function(params, template, nparams) local lo = band(op, mm) local hi = band(op, shl(mm, 5)) op = op - lo - hi + shl(lo, 5) + shr(hi, 5) + elseif p == ":" then + if band(shr(op, rs), 1) ~= 0 then werror("register pair expected") end elseif p == "-" then rs = rs - 5 elseif p == "." then @@ -1082,6 +1750,8 @@ map_op[".template__"] = function(params, template, nparams) wputpos(pos, op) end +map_op[".template__"] = op_template + ------------------------------------------------------------------------------ -- Pseudo-opcode to mark the position where the action list is to be emitted. diff --git a/dynasm/dasm_proto.h b/dynasm/dasm_proto.h index a16fadcc..d798554b 100644 --- a/dynasm/dasm_proto.h +++ b/dynasm/dasm_proto.h @@ -10,8 +10,8 @@ #include <stddef.h> #include <stdarg.h> -#define DASM_IDENT "DynASM 1.3.0" -#define DASM_VERSION 10300 /* 1.3.0 */ +#define DASM_IDENT "DynASM 1.5.0" +#define DASM_VERSION 10500 /* 1.5.0 */ #ifndef Dst_DECL #define Dst_DECL dasm_State **Dst diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h index c3089d91..f0327302 100644 --- a/dynasm/dasm_x86.h +++ b/dynasm/dasm_x86.h @@ -170,7 +170,7 @@ void dasm_put(Dst_DECL, int start, ...) dasm_State *D = Dst_REF; dasm_ActList p = D->actionlist + start; dasm_Section *sec = D->section; - int pos = sec->pos, ofs = sec->ofs, mrm = 4; + int pos = sec->pos, ofs = sec->ofs, mrm = -1; int *b; if (pos >= sec->epos) { @@ -193,7 +193,7 @@ void dasm_put(Dst_DECL, int start, ...) b[pos++] = n; switch (action) { case DASM_DISP: - if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; } + if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; } /* fallthrough */ case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; /* fallthrough */ case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ @@ -204,11 +204,17 @@ void dasm_put(Dst_DECL, int start, ...) case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; case DASM_SPACE: p++; ofs += n; break; case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ - case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG); - if (*p++ == 1 && *p == DASM_DISP) mrm = n; + case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG); + if (*p < 0x40 && p[1] == DASM_DISP) mrm = n; + if (*p < 0x20 && (n&7) == 4) ofs++; + switch ((*p++ >> 3) & 3) { + case 3: n |= b[pos-3]; /* fallthrough */ + case 2: n |= b[pos-2]; /* fallthrough */ + case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; } + } continue; } - mrm = 4; + mrm = -1; } else { int *pl, n; switch (action) { @@ -233,8 +239,11 @@ void dasm_put(Dst_DECL, int start, ...) } pos++; ofs += 4; /* Maximum offset needed. */ - if (action == DASM_REL_LG || action == DASM_REL_PC) + if (action == DASM_REL_LG || action == DASM_REL_PC) { b[pos++] = ofs; /* Store pass1 offset estimate. */ + } else if (sizeof(ptrdiff_t) == 8) { + ofs += 4; + } break; case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel; case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); @@ -359,10 +368,22 @@ int dasm_link(Dst_DECL, size_t *szp) do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0) #define dasmd(x) \ do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0) +#define dasmq(x) \ + do { *((unsigned long long *)cp) = (unsigned long long)(x); cp+=8; } while (0) #else #define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0) #define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0) +#define dasmq(x) do { dasmd(x); dasmd((x)>>32); } while (0) #endif +static unsigned char *dasma_(unsigned char *cp, ptrdiff_t x) +{ + if (sizeof(ptrdiff_t) == 8) + dasmq((unsigned long long)x); + else + dasmd((unsigned int)x); + return cp; +} +#define dasma(x) (cp = dasma_(cp, (x))) /* Pass 3: Encode sections. */ int dasm_encode(Dst_DECL, void *buffer) @@ -402,7 +423,27 @@ int dasm_encode(Dst_DECL, void *buffer) case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; /* fallthrough */ case DASM_IMM_W: dasmw(n); break; - case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; } + case DASM_VREG: { + int t = *p++; + unsigned char *ex = cp - (t&7); + if ((n & 8) && t < 0xa0) { + if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6); + n &= 7; + } else if (n & 0x10) { + if (*ex & 0x80) { + *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2; + } + while (++ex < cp) ex[-1] = *ex; + if (mark) mark--; + cp--; + n &= 7; + } + if (t >= 0xc0) n <<= 4; + else if (t >= 0x40) n <<= 3; + else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; } + cp[-1] ^= n; + break; + } case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; b++; n = (int)(ptrdiff_t)D->globals[-n]; /* fallthrough */ @@ -417,12 +458,13 @@ int dasm_encode(Dst_DECL, void *buffer) goto wb; } case DASM_IMM_LG: - p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; } + p++; + if (n < 0) { dasma((ptrdiff_t)D->globals[-n]); break; } /* fallthrough */ case DASM_IMM_PC: { int *pb = DASM_POS2PTR(D, n); - n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base); - goto wd; + dasma(*pb < 0 ? (ptrdiff_t)pb[1] : (*pb + (ptrdiff_t)base)); + break; } case DASM_LABEL_LG: { int idx = *p++; diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua index 24c07f37..b442cd0d 100644 --- a/dynasm/dasm_x86.lua +++ b/dynasm/dasm_x86.lua @@ -11,9 +11,9 @@ local x64 = x64 local _info = { arch = x64 and "x64" or "x86", description = "DynASM x86/x64 module", - version = "1.3.0", - vernum = 10300, - release = "2011-05-05", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", author = "Mike Pall", license = "MIT", } @@ -27,9 +27,9 @@ local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatabl local _s = string local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub -local concat, sort = table.concat, table.sort +local concat, sort, remove = table.concat, table.sort, table.remove local bit = bit or require("bit") -local band, shl, shr = bit.band, bit.lshift, bit.rshift +local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift -- Inherited tables and callbacks. local g_opt, g_arch @@ -41,7 +41,7 @@ local action_names = { -- int arg, 1 buffer pos: "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", -- action arg (1 byte), int arg, 1 buffer pos (reg/num): - "VREG", "SPACE", -- !x64: VREG support NYI. + "VREG", "SPACE", -- ptrdiff_t arg, 1 buffer pos (address): !x64 "SETLABEL", "REL_A", -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): @@ -83,6 +83,21 @@ local actargs = { 0 } -- Current number of section buffer positions for dasm_put(). local secpos = 1 +-- VREG kind encodings, pre-shifted by 5 bits. +local map_vreg = { + ["modrm.rm.m"] = 0x00, + ["modrm.rm.r"] = 0x20, + ["opcode"] = 0x20, + ["sib.base"] = 0x20, + ["sib.index"] = 0x40, + ["modrm.reg"] = 0x80, + ["vex.v"] = 0xa0, + ["imm.hi"] = 0xc0, +} + +-- Current number of VREG actions contributing to REX/VEX shrinkage. +local vreg_shrink_count = 0 + ------------------------------------------------------------------------------ -- Compute action numbers for action names. @@ -134,6 +149,21 @@ local function waction(action, a, num) if a or num then secpos = secpos + (num or 1) end end +-- Optionally add a VREG action. +local function wvreg(kind, vreg, psz, sk, defer) + if not vreg then return end + waction("VREG", vreg) + local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'") + if b < (sk or 0) then + vreg_shrink_count = vreg_shrink_count + 1 + end + if not defer then + b = b + vreg_shrink_count * 8 + vreg_shrink_count = 0 + end + wputxb(b + (psz or 0)) +end + -- Add call to embedded DynASM C code. local function wcall(func, args) wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) @@ -299,7 +329,7 @@ local function mkrmap(sz, cl, names) local iname = format("@%s%x%s", sz, i, needrex and "R" or "") if needrex then map_reg_needrex[iname] = true end local name - if sz == "o" then name = format("xmm%d", i) + if sz == "o" or sz == "y" then name = format("%s%d", cl, i) elseif sz == "f" then name = format("st%d", i) else name = format("r%d%s", i, sz == addrsize and "" or sz) end map_archdef[name] = iname @@ -326,6 +356,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"}) mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) map_reg_valid_index[map_archdef.esp] = false if x64 then map_reg_valid_index[map_archdef.rsp] = false end +if x64 then map_reg_needrex[map_archdef.Rb] = true end map_archdef["Ra"] = "@"..addrsize -- FP registers (internally tword sized, but use "f" as operand size). @@ -334,21 +365,24 @@ mkrmap("f", "Rf") -- SSE registers (oword sized, but qword and dword accessible). mkrmap("o", "xmm") +-- AVX registers (yword sized, but oword, qword and dword accessible). +mkrmap("y", "ymm") + -- Operand size prefixes to codes. local map_opsize = { - byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t", - aword = addrsize, + byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y", + tword = "t", aword = addrsize, } -- Operand size code to number. local map_opsizenum = { - b = 1, w = 2, d = 4, q = 8, o = 16, t = 10, + b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10, } -- Operand size code to name. local map_opsizename = { - b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword", - f = "fpword", + b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword", + t = "tword", f = "fpword", } -- Valid index register scale factors. @@ -450,6 +484,22 @@ local function wputdarg(n) end end +-- Put signed or unsigned qword or arg. +local function wputqarg(n) + local tn = type(n) + if tn == "number" then -- This is only used for numbers from -2^31..2^32-1. + wputb(band(n, 255)) + wputb(band(shr(n, 8), 255)) + wputb(band(shr(n, 16), 255)) + wputb(shr(n, 24)) + local sign = n < 0 and 255 or 0 + wputb(sign); wputb(sign); wputb(sign); wputb(sign) + else + waction("IMM_D", format("(unsigned int)(%s)", n)) + waction("IMM_D", format("(unsigned int)((unsigned long long)(%s)>>32)", n)) + end +end + -- Put operand-size dependent number or arg (defaults to dword). local function wputszarg(sz, n) if not sz or sz == "d" or sz == "q" then wputdarg(n) @@ -460,9 +510,45 @@ local function wputszarg(sz, n) end -- Put multi-byte opcode with operand-size dependent modifications. -local function wputop(sz, op, rex) +local function wputop(sz, op, rex, vex, vregr, vregxb) + local psz, sk = 0, nil + if vex then + local tail + if vex.m == 1 and band(rex, 11) == 0 then + if x64 and vregxb then + sk = map_vreg["modrm.reg"] + else + wputb(0xc5) + tail = shl(bxor(band(rex, 4), 4), 5) + psz = 3 + end + end + if not tail then + wputb(0xc4) + wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m) + tail = shl(band(rex, 8), 4) + psz = 4 + end + local reg, vreg = 0, nil + if vex.v then + reg = vex.v.reg + if not reg then werror("bad vex operand") end + if reg < 0 then reg = 0; vreg = vex.v.vreg end + end + if sz == "y" or vex.l then tail = tail + 4 end + wputb(tail + shl(bxor(reg, 15), 3) + vex.p) + wvreg("vex.v", vreg) + rex = 0 + if op >= 256 then werror("bad vex opcode") end + else + if rex ~= 0 then + if not x64 then werror("bad operand size") end + elseif (vregr or vregxb) and x64 then + rex = 0x10 + sk = map_vreg["vex.v"] + end + end local r - if rex ~= 0 and not x64 then werror("bad operand size") end if sz == "w" then wputb(102) end -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end @@ -471,20 +557,20 @@ local function wputop(sz, op, rex) if rex ~= 0 then local opc3 = band(op, 0xffff00) if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then - wputb(64 + band(rex, 15)); rex = 0 + wputb(64 + band(rex, 15)); rex = 0; psz = 2 end end - wputb(shr(op, 16)); op = band(op, 0xffff) + wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1 end if op >= 256 then local b = shr(op, 8) - if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end - wputb(b) - op = band(op, 255) + if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end + wputb(b); op = band(op, 255); psz = psz + 1 end - if rex ~= 0 then wputb(64 + band(rex, 15)) end + if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end if sz == "b" then op = op - 1 end wputb(op) + return psz, sk end -- Put ModRM or SIB formatted byte. @@ -494,7 +580,7 @@ local function wputmodrm(m, s, rm, vs, vrm) end -- Put ModRM/SIB plus optional displacement. -local function wputmrmsib(t, imark, s, vsreg) +local function wputmrmsib(t, imark, s, vsreg, psz, sk) local vreg, vxreg local reg, xreg = t.reg, t.xreg if reg and reg < 0 then reg = 0; vreg = t.vreg end @@ -504,8 +590,8 @@ local function wputmrmsib(t, imark, s, vsreg) -- Register mode. if sub(t.mode, 1, 1) == "r" then wputmodrm(3, s, reg) - if vsreg then waction("VREG", vsreg); wputxb(2) end - if vreg then waction("VREG", vreg); wputxb(0) end + wvreg("modrm.reg", vsreg, psz+1, sk, vreg) + wvreg("modrm.rm.r", vreg, psz+1, sk) return end @@ -519,21 +605,22 @@ local function wputmrmsib(t, imark, s, vsreg) -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) wputmodrm(0, s, 4) if imark == "I" then waction("MARK") end - if vsreg then waction("VREG", vsreg); wputxb(2) end + wvreg("modrm.reg", vsreg, psz+1, sk, vxreg) wputmodrm(t.xsc, xreg, 5) - if vxreg then waction("VREG", vxreg); wputxb(3) end + wvreg("sib.index", vxreg, psz+2, sk) else -- Pure 32 bit displacement. if x64 and tdisp ~= "table" then wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) + wvreg("modrm.reg", vsreg, psz+1, sk) if imark == "I" then waction("MARK") end wputmodrm(0, 4, 5) else riprel = x64 wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) + wvreg("modrm.reg", vsreg, psz+1, sk) if imark == "I" then waction("MARK") end end - if vsreg then waction("VREG", vsreg); wputxb(2) end end if riprel then -- Emit rip-relative displacement. if match("UWSiI", imark) then @@ -561,16 +648,16 @@ local function wputmrmsib(t, imark, s, vsreg) if xreg or band(reg, 7) == 4 then wputmodrm(m or 2, s, 4) -- ModRM. if m == nil or imark == "I" then waction("MARK") end - if vsreg then waction("VREG", vsreg); wputxb(2) end + wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg) wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. - if vxreg then waction("VREG", vxreg); wputxb(3) end - if vreg then waction("VREG", vreg); wputxb(1) end + wvreg("sib.index", vxreg, psz+2, sk, vreg) + wvreg("sib.base", vreg, psz+2, sk) else wputmodrm(m or 2, s, reg) -- ModRM. if (imark == "I" and (m == 1 or m == 2)) or (m == nil and (vsreg or vreg)) then waction("MARK") end - if vsreg then waction("VREG", vsreg); wputxb(2) end - if vreg then waction("VREG", vreg); wputxb(1) end + wvreg("modrm.reg", vsreg, psz+1, sk, vreg) + wvreg("modrm.rm.m", vreg, psz+1, sk) end -- Put displacement. @@ -592,10 +679,16 @@ local function opmodestr(op, args) end -- Convert number to valid integer or nil. -local function toint(expr) +local function toint(expr, isqword) local n = tonumber(expr) if n then - if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then + if n % 1 ~= 0 then + werror("not an integer number `"..expr.."'") + elseif isqword then + if n < -2147483648 or n > 2147483647 then + n = nil -- Handle it as an expression to avoid precision loss. + end + elseif n < -2147483648 or n > 4294967295 then werror("bad integer number `"..expr.."'") end return n @@ -678,7 +771,7 @@ local function rtexpr(expr) end -- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }. -local function parseoperand(param) +local function parseoperand(param, isqword) local t = {} local expr = param @@ -766,7 +859,7 @@ local function parseoperand(param) t.disp = dispexpr(tailx) else -- imm or opsize*imm - local imm = toint(expr) + local imm = toint(expr, isqword) if not imm and sub(expr, 1, 1) == "*" and t.opsize then imm = toint(sub(expr, 2)) if imm then @@ -881,9 +974,16 @@ end -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. -- The spare 3 bits are either filled with the last hex digit or -- the result from a previous "r"/"R". The opcode is restored. +-- "u" Use VEX encoding, vvvv unused. +-- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is +-- removed from the list used by future characters). +-- "w" Use VEX encoding, vvvv from 3rd operand. +-- "L" Force VEX.L -- -- All of the following characters force a flush of the opcode: -- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. +-- "s" stores a 4 bit immediate from the last register operand, +-- followed by 4 zero bits. -- "S" stores a signed 8 bit immediate from the last operand. -- "U" stores an unsigned 8 bit immediate from the last operand. -- "W" stores an unsigned 16 bit immediate from the last operand. @@ -1226,46 +1326,14 @@ local map_op = { movups_2 = "rmo:0F10rM|mro:0F11Rm", orpd_2 = "rmo:660F56rM", orps_2 = "rmo:0F56rM", - packssdw_2 = "rmo:660F6BrM", - packsswb_2 = "rmo:660F63rM", - packuswb_2 = "rmo:660F67rM", - paddb_2 = "rmo:660FFCrM", - paddd_2 = "rmo:660FFErM", - paddq_2 = "rmo:660FD4rM", - paddsb_2 = "rmo:660FECrM", - paddsw_2 = "rmo:660FEDrM", - paddusb_2 = "rmo:660FDCrM", - paddusw_2 = "rmo:660FDDrM", - paddw_2 = "rmo:660FFDrM", - pand_2 = "rmo:660FDBrM", - pandn_2 = "rmo:660FDFrM", pause_0 = "F390", - pavgb_2 = "rmo:660FE0rM", - pavgw_2 = "rmo:660FE3rM", - pcmpeqb_2 = "rmo:660F74rM", - pcmpeqd_2 = "rmo:660F76rM", - pcmpeqw_2 = "rmo:660F75rM", - pcmpgtb_2 = "rmo:660F64rM", - pcmpgtd_2 = "rmo:660F66rM", - pcmpgtw_2 = "rmo:660F65rM", pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only. pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", - pmaddwd_2 = "rmo:660FF5rM", - pmaxsw_2 = "rmo:660FEErM", - pmaxub_2 = "rmo:660FDErM", - pminsw_2 = "rmo:660FEArM", - pminub_2 = "rmo:660FDArM", pmovmskb_2 = "rr/do:660FD7rM", - pmulhuw_2 = "rmo:660FE4rM", - pmulhw_2 = "rmo:660FE5rM", - pmullw_2 = "rmo:660FD5rM", - pmuludq_2 = "rmo:660FF4rM", - por_2 = "rmo:660FEBrM", prefetchnta_1 = "xb:n0F180m", prefetcht0_1 = "xb:n0F181m", prefetcht1_1 = "xb:n0F182m", prefetcht2_1 = "xb:n0F183m", - psadbw_2 = "rmo:660FF6rM", pshufd_3 = "rmio:660F70rMU", pshufhw_3 = "rmio:F30F70rMU", pshuflw_3 = "rmio:F20F70rMU", @@ -1279,23 +1347,6 @@ local map_op = { psrldq_2 = "rio:660F733mU", psrlq_2 = "rmo:660FD3rM|rio:660F732mU", psrlw_2 = "rmo:660FD1rM|rio:660F712mU", - psubb_2 = "rmo:660FF8rM", - psubd_2 = "rmo:660FFArM", - psubq_2 = "rmo:660FFBrM", - psubsb_2 = "rmo:660FE8rM", - psubsw_2 = "rmo:660FE9rM", - psubusb_2 = "rmo:660FD8rM", - psubusw_2 = "rmo:660FD9rM", - psubw_2 = "rmo:660FF9rM", - punpckhbw_2 = "rmo:660F68rM", - punpckhdq_2 = "rmo:660F6ArM", - punpckhqdq_2 = "rmo:660F6DrM", - punpckhwd_2 = "rmo:660F69rM", - punpcklbw_2 = "rmo:660F60rM", - punpckldq_2 = "rmo:660F62rM", - punpcklqdq_2 = "rmo:660F6CrM", - punpcklwd_2 = "rmo:660F61rM", - pxor_2 = "rmo:660FEFrM", rcpps_2 = "rmo:0F53rM", rcpss_2 = "rro:F30F53rM|rx/od:", rsqrtps_2 = "rmo:0F52rM", @@ -1413,6 +1464,327 @@ local map_op = { movntsd_2 = "xr/qo:nF20F2BRm", movntss_2 = "xr/do:F30F2BRm", -- popcnt is also in SSE4.2 + + -- AES-NI + aesdec_2 = "rmo:660F38DErM", + aesdeclast_2 = "rmo:660F38DFrM", + aesenc_2 = "rmo:660F38DCrM", + aesenclast_2 = "rmo:660F38DDrM", + aesimc_2 = "rmo:660F38DBrM", + aeskeygenassist_3 = "rmio:660F3ADFrMU", + pclmulqdq_3 = "rmio:660F3A44rMU", + + -- AVX FP ops + vaddsubpd_3 = "rrmoy:660FVD0rM", + vaddsubps_3 = "rrmoy:F20FVD0rM", + vandpd_3 = "rrmoy:660FV54rM", + vandps_3 = "rrmoy:0FV54rM", + vandnpd_3 = "rrmoy:660FV55rM", + vandnps_3 = "rrmoy:0FV55rM", + vblendpd_4 = "rrmioy:660F3AV0DrMU", + vblendps_4 = "rrmioy:660F3AV0CrMU", + vblendvpd_4 = "rrmroy:660F3AV4BrMs", + vblendvps_4 = "rrmroy:660F3AV4ArMs", + vbroadcastf128_2 = "rx/yo:660F38u1ArM", + vcmppd_4 = "rrmioy:660FVC2rMU", + vcmpps_4 = "rrmioy:0FVC2rMU", + vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:", + vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:", + vcomisd_2 = "rro:660Fu2FrM|rx/oq:", + vcomiss_2 = "rro:0Fu2FrM|rx/od:", + vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:", + vcvtdq2ps_2 = "rmoy:0Fu5BrM", + vcvtpd2dq_2 = "rmoy:F20FuE6rM", + vcvtpd2ps_2 = "rmoy:660Fu5ArM", + vcvtps2dq_2 = "rmoy:660Fu5BrM", + vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:", + vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:", + vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:", + vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM", + vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM", + vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:", + vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:", + vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM", + vcvttps2dq_2 = "rmoy:F30Fu5BrM", + vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:", + vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:", + vdppd_4 = "rrmio:660F3AV41rMU", + vdpps_4 = "rrmioy:660F3AV40rMU", + vextractf128_3 = "mri/oy:660F3AuL19RmU", + vextractps_3 = "mri/do:660F3Au17RmU", + vhaddpd_3 = "rrmoy:660FV7CrM", + vhaddps_3 = "rrmoy:F20FV7CrM", + vhsubpd_3 = "rrmoy:660FV7DrM", + vhsubps_3 = "rrmoy:F20FV7DrM", + vinsertf128_4 = "rrmi/yyo:660F3AV18rMU", + vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:", + vldmxcsr_1 = "xd:0FuAE2m", + vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm", + vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm", + vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm", + vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm", + vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:", + vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm", + vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:", + vmovhlps_3 = "rrro:0FV12rM", + vmovhpd_2 = "xr/qo:660Fu17Rm", + vmovhpd_3 = "rrx/ooq:660FV16rM", + vmovhps_2 = "xr/qo:0Fu17Rm", + vmovhps_3 = "rrx/ooq:0FV16rM", + vmovlhps_3 = "rrro:0FV16rM", + vmovlpd_2 = "xr/qo:660Fu13Rm", + vmovlpd_3 = "rrx/ooq:660FV12rM", + vmovlps_2 = "xr/qo:0Fu13Rm", + vmovlps_3 = "rrx/ooq:0FV12rM", + vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM", + vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM", + vmovntpd_2 = "xroy:660Fu2BRm", + vmovntps_2 = "xroy:0Fu2BRm", + vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm", + vmovsd_3 = "rrro:F20FV10rM", + vmovshdup_2 = "rmoy:F30Fu16rM", + vmovsldup_2 = "rmoy:F30Fu12rM", + vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm", + vmovss_3 = "rrro:F30FV10rM", + vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm", + vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm", + vorpd_3 = "rrmoy:660FV56rM", + vorps_3 = "rrmoy:0FV56rM", + vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU", + vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU", + vperm2f128_4 = "rrmiy:660F3AV06rMU", + vptestpd_2 = "rmoy:660F38u0FrM", + vptestps_2 = "rmoy:660F38u0ErM", + vrcpps_2 = "rmoy:0Fu53rM", + vrcpss_3 = "rrro:F30FV53rM|rrx/ood:", + vrsqrtps_2 = "rmoy:0Fu52rM", + vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:", + vroundpd_3 = "rmioy:660F3Au09rMU", + vroundps_3 = "rmioy:660F3Au08rMU", + vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:", + vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:", + vshufpd_4 = "rrmioy:660FVC6rMU", + vshufps_4 = "rrmioy:0FVC6rMU", + vsqrtps_2 = "rmoy:0Fu51rM", + vsqrtss_2 = "rro:F30Fu51rM|rx/od:", + vsqrtpd_2 = "rmoy:660Fu51rM", + vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:", + vstmxcsr_1 = "xd:0FuAE3m", + vucomisd_2 = "rro:660Fu2ErM|rx/oq:", + vucomiss_2 = "rro:0Fu2ErM|rx/od:", + vunpckhpd_3 = "rrmoy:660FV15rM", + vunpckhps_3 = "rrmoy:0FV15rM", + vunpcklpd_3 = "rrmoy:660FV14rM", + vunpcklps_3 = "rrmoy:0FV14rM", + vxorpd_3 = "rrmoy:660FV57rM", + vxorps_3 = "rrmoy:0FV57rM", + vzeroall_0 = "0FuL77", + vzeroupper_0 = "0Fu77", + + -- AVX2 FP ops + vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:", + vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:", + -- *vgather* (!vsib) + vpermpd_3 = "rmiy:660F3AuX01rMU", + vpermps_3 = "rrmy:660F38V16rM", + + -- AVX, AVX2 integer ops + -- In general, xmm requires AVX, ymm requires AVX2. + vaesdec_3 = "rrmo:660F38VDErM", + vaesdeclast_3 = "rrmo:660F38VDFrM", + vaesenc_3 = "rrmo:660F38VDCrM", + vaesenclast_3 = "rrmo:660F38VDDrM", + vaesimc_2 = "rmo:660F38uDBrM", + vaeskeygenassist_3 = "rmio:660F3AuDFrMU", + vlddqu_2 = "rxoy:F20FuF0rM", + vmaskmovdqu_2 = "rro:660FuF7rM", + vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm", + vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm", + vmovntdq_2 = "xroy:660FuE7Rm", + vmovntdqa_2 = "rxoy:660F38u2ArM", + vmpsadbw_4 = "rrmioy:660F3AV42rMU", + vpabsb_2 = "rmoy:660F38u1CrM", + vpabsd_2 = "rmoy:660F38u1ErM", + vpabsw_2 = "rmoy:660F38u1DrM", + vpackusdw_3 = "rrmoy:660F38V2BrM", + vpalignr_4 = "rrmioy:660F3AV0FrMU", + vpblendvb_4 = "rrmroy:660F3AV4CrMs", + vpblendw_4 = "rrmioy:660F3AV0ErMU", + vpclmulqdq_4 = "rrmio:660F3AV44rMU", + vpcmpeqq_3 = "rrmoy:660F38V29rM", + vpcmpestri_3 = "rmio:660F3Au61rMU", + vpcmpestrm_3 = "rmio:660F3Au60rMU", + vpcmpgtq_3 = "rrmoy:660F38V37rM", + vpcmpistri_3 = "rmio:660F3Au63rMU", + vpcmpistrm_3 = "rmio:660F3Au62rMU", + vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:", + vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU", + vpextrd_3 = "mri/do:660F3Au16RmU", + vpextrq_3 = "mri/qo:660F3Au16RmU", + vphaddw_3 = "rrmoy:660F38V01rM", + vphaddd_3 = "rrmoy:660F38V02rM", + vphaddsw_3 = "rrmoy:660F38V03rM", + vphminposuw_2 = "rmo:660F38u41rM", + vphsubw_3 = "rrmoy:660F38V05rM", + vphsubd_3 = "rrmoy:660F38V06rM", + vphsubsw_3 = "rrmoy:660F38V07rM", + vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:", + vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:", + vpinsrd_4 = "rrmi/ood:660F3AV22rMU", + vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU", + vpmaddubsw_3 = "rrmoy:660F38V04rM", + vpmaxsb_3 = "rrmoy:660F38V3CrM", + vpmaxsd_3 = "rrmoy:660F38V3DrM", + vpmaxuw_3 = "rrmoy:660F38V3ErM", + vpmaxud_3 = "rrmoy:660F38V3FrM", + vpminsb_3 = "rrmoy:660F38V38rM", + vpminsd_3 = "rrmoy:660F38V39rM", + vpminuw_3 = "rrmoy:660F38V3ArM", + vpminud_3 = "rrmoy:660F38V3BrM", + vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM", + vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:", + vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:", + vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:", + vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:", + vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:", + vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:", + vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:", + vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:", + vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:", + vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:", + vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:", + vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:", + vpmuldq_3 = "rrmoy:660F38V28rM", + vpmulhrsw_3 = "rrmoy:660F38V0BrM", + vpmulld_3 = "rrmoy:660F38V40rM", + vpshufb_3 = "rrmoy:660F38V00rM", + vpshufd_3 = "rmioy:660Fu70rMU", + vpshufhw_3 = "rmioy:F30Fu70rMU", + vpshuflw_3 = "rmioy:F20Fu70rMU", + vpsignb_3 = "rrmoy:660F38V08rM", + vpsignw_3 = "rrmoy:660F38V09rM", + vpsignd_3 = "rrmoy:660F38V0ArM", + vpslldq_3 = "rrioy:660Fv737mU", + vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU", + vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU", + vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU", + vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU", + vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU", + vpsrldq_3 = "rrioy:660Fv733mU", + vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU", + vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU", + vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU", + vptest_2 = "rmoy:660F38u17rM", + + -- AVX2 integer ops + vbroadcasti128_2 = "rx/yo:660F38u5ArM", + vinserti128_4 = "rrmi/yyo:660F3AV38rMU", + vextracti128_3 = "mri/oy:660F3AuL39RmU", + vpblendd_4 = "rrmioy:660F3AV02rMU", + vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:", + vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:", + vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:", + vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:", + vpermd_3 = "rrmy:660F38V36rM", + vpermq_3 = "rmiy:660F3AuX00rMU", + -- *vpgather* (!vsib) + vperm2i128_4 = "rrmiy:660F3AV46rMU", + vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm", + vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm", + vpsllvd_3 = "rrmoy:660F38V47rM", + vpsllvq_3 = "rrmoy:660F38VX47rM", + vpsravd_3 = "rrmoy:660F38V46rM", + vpsrlvd_3 = "rrmoy:660F38V45rM", + vpsrlvq_3 = "rrmoy:660F38VX45rM", + + -- Intel ADX + adcx_2 = "rmqd:660F38F6rM", + adox_2 = "rmqd:F30F38F6rM", + + -- BMI1 + andn_3 = "rrmqd:0F38VF2rM", + bextr_3 = "rmrqd:0F38wF7rM", + blsi_2 = "rmqd:0F38vF33m", + blsmsk_2 = "rmqd:0F38vF32m", + blsr_2 = "rmqd:0F38vF31m", + tzcnt_2 = "rmqdw:F30FBCrM", + + -- BMI2 + bzhi_3 = "rmrqd:0F38wF5rM", + mulx_3 = "rrmqd:F20F38VF6rM", + pdep_3 = "rrmqd:F20F38VF5rM", + pext_3 = "rrmqd:F30F38VF5rM", + rorx_3 = "rmSqd:F20F3AuF0rMS", + sarx_3 = "rmrqd:F30F38wF7rM", + shrx_3 = "rmrqd:F20F38wF7rM", + shlx_3 = "rmrqd:660F38wF7rM", + + -- FMA3 + vfmaddsub132pd_3 = "rrmoy:660F38VX96rM", + vfmaddsub132ps_3 = "rrmoy:660F38V96rM", + vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM", + vfmaddsub213ps_3 = "rrmoy:660F38VA6rM", + vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM", + vfmaddsub231ps_3 = "rrmoy:660F38VB6rM", + + vfmsubadd132pd_3 = "rrmoy:660F38VX97rM", + vfmsubadd132ps_3 = "rrmoy:660F38V97rM", + vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM", + vfmsubadd213ps_3 = "rrmoy:660F38VA7rM", + vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM", + vfmsubadd231ps_3 = "rrmoy:660F38VB7rM", + + vfmadd132pd_3 = "rrmoy:660F38VX98rM", + vfmadd132ps_3 = "rrmoy:660F38V98rM", + vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:", + vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:", + vfmadd213pd_3 = "rrmoy:660F38VXA8rM", + vfmadd213ps_3 = "rrmoy:660F38VA8rM", + vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:", + vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:", + vfmadd231pd_3 = "rrmoy:660F38VXB8rM", + vfmadd231ps_3 = "rrmoy:660F38VB8rM", + vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:", + vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:", + + vfmsub132pd_3 = "rrmoy:660F38VX9ArM", + vfmsub132ps_3 = "rrmoy:660F38V9ArM", + vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:", + vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:", + vfmsub213pd_3 = "rrmoy:660F38VXAArM", + vfmsub213ps_3 = "rrmoy:660F38VAArM", + vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:", + vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:", + vfmsub231pd_3 = "rrmoy:660F38VXBArM", + vfmsub231ps_3 = "rrmoy:660F38VBArM", + vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:", + vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:", + + vfnmadd132pd_3 = "rrmoy:660F38VX9CrM", + vfnmadd132ps_3 = "rrmoy:660F38V9CrM", + vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:", + vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:", + vfnmadd213pd_3 = "rrmoy:660F38VXACrM", + vfnmadd213ps_3 = "rrmoy:660F38VACrM", + vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:", + vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:", + vfnmadd231pd_3 = "rrmoy:660F38VXBCrM", + vfnmadd231ps_3 = "rrmoy:660F38VBCrM", + vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:", + vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:", + + vfnmsub132pd_3 = "rrmoy:660F38VX9ErM", + vfnmsub132ps_3 = "rrmoy:660F38V9ErM", + vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:", + vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:", + vfnmsub213pd_3 = "rrmoy:660F38VXAErM", + vfnmsub213ps_3 = "rrmoy:660F38VAErM", + vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:", + vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:", + vfnmsub231pd_3 = "rrmoy:660F38VXBErM", + vfnmsub231ps_3 = "rrmoy:660F38VBErM", + vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:", + vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:", } ------------------------------------------------------------------------------ @@ -1463,28 +1835,58 @@ for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ end --- SSE FP arithmetic ops. +-- SSE / AVX FP arithmetic ops. for name,n in pairs{ sqrt = 1, add = 8, mul = 9, sub = 12, min = 13, div = 14, max = 15 } do map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) + if n ~= 1 then + map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n) + map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n) + map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n) + map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n) + end +end + +-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf). +for name,n in pairs{ + paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4, + paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B, + packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC, + paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0, + pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76, + pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66, + pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE, + pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA, + pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5, + pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8, + psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8, + psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9, + punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A, + punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61, + punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF +} do + map_op[name.."_2"] = format("rmo:660F%02XrM", n) + map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n) end ------------------------------------------------------------------------------ +local map_vexarg = { u = false, v = 1, V = 2, w = 3 } + -- Process pattern string. local function dopattern(pat, args, sz, op, needrex) - local digit, addin + local digit, addin, vex local opcode = 0 local szov = sz local narg = 1 local rex = 0 -- Limit number of section buffer positions used by a single dasm_put(). - -- A single opcode needs a maximum of 5 positions. - if secpos+5 > maxsecpos then wflush() end + -- A single opcode needs a maximum of 6 positions. + if secpos+6 > maxsecpos then wflush() end -- Process each character. for c in gmatch(pat.."|", ".") do @@ -1498,6 +1900,8 @@ local function dopattern(pat, args, sz, op, needrex) szov = nil elseif c == "X" then -- Force REX.W. rex = 8 + elseif c == "L" then -- Force VEX.L. + vex.l = true elseif c == "r" then -- Merge 1st operand regno. into opcode. addin = args[1]; opcode = opcode + (addin.reg % 8) if narg < 2 then narg = 2 end @@ -1521,21 +1925,42 @@ local function dopattern(pat, args, sz, op, needrex) if t.xreg and t.xreg > 7 then rex = rex + 2 end if s > 7 then rex = rex + 4 end if needrex then rex = rex + 16 end - wputop(szov, opcode, rex); opcode = nil + local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg) + opcode = nil local imark = sub(pat, -1) -- Force a mark (ugly). -- Put ModRM/SIB with regno/last digit as spare. - wputmrmsib(t, imark, s, addin and addin.vreg) + wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk) addin = nil + elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix + local b = band(opcode, 255); opcode = shr(opcode, 8) + local m = 1 + if b == 0x38 then m = 2 + elseif b == 0x3a then m = 3 end + if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end + if b ~= 0x0f then + werror("expected `0F', `0F38', or `0F3A' to precede `"..c.. + "' in pattern `"..pat.."' for `"..op.."'") + end + local v = map_vexarg[c] + if v then v = remove(args, v) end + b = band(opcode, 255) + local p = 0 + if b == 0x66 then p = 1 + elseif b == 0xf3 then p = 2 + elseif b == 0xf2 then p = 3 end + if p ~= 0 then opcode = shr(opcode, 8) end + if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end + vex = { m = m, p = p, v = v } else if opcode then -- Flush opcode. if szov == "q" and rex == 0 then rex = rex + 8 end if needrex then rex = rex + 16 end if addin and addin.reg == -1 then - wputop(szov, opcode - 7, rex) - waction("VREG", addin.vreg); wputxb(0) + local psz, sk = wputop(szov, opcode - 7, rex, vex, true) + wvreg("opcode", addin.vreg, psz, sk) else if addin and addin.reg > 7 then rex = rex + 1 end - wputop(szov, opcode, rex) + wputop(szov, opcode, rex, vex) end opcode = nil end @@ -1549,7 +1974,7 @@ local function dopattern(pat, args, sz, op, needrex) local a = args[narg] narg = narg + 1 local mode, imm = a.mode, a.imm - if mode == "iJ" and not match("iIJ", c) then + if mode == "iJ" and not match(x64 and "J" or "iIJ", c) then werror("bad operand size for label") end if c == "S" then @@ -1572,6 +1997,14 @@ local function dopattern(pat, args, sz, op, needrex) else wputlabel("REL_", imm, 2) end + elseif c == "s" then + local reg = a.reg + if reg < 0 then + wputb(0) + wvreg("imm.hi", a.vreg) + else + wputb(shl(reg, 4)) + end else werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") end @@ -1648,11 +2081,14 @@ map_op[".template__"] = function(params, template, nparams) if pat == "" then pat = lastpat else lastpat = pat end if matchtm(tm, args) then local prefix = sub(szm, 1, 1) - if prefix == "/" then -- Match both operand sizes. - if args[1].opsize == sub(szm, 2, 2) and - args[2].opsize == sub(szm, 3, 3) then - dopattern(pat, args, sz, params.op, needrex) -- Process pattern. - return + if prefix == "/" then -- Exactly match leading operand sizes. + for i = #szm,1,-1 do + if i == 1 then + dopattern(pat, args, sz, params.op, needrex) -- Process pattern. + return + elseif args[i-1].opsize ~= sub(szm, i, i) then + break + end end else -- Match common operand size. local szp = sz @@ -1717,8 +2153,8 @@ if x64 then rex = a.reg > 7 and 9 or 8 end end - wputop(sz, opcode, rex) - if vreg then waction("VREG", vreg); wputxb(0) end + local psz, sk = wputop(sz, opcode, rex, nil, vreg) + wvreg("opcode", vreg, psz, sk) waction("IMM_D", format("(unsigned int)(%s)", op64)) waction("IMM_D", format("(unsigned int)((%s)>>32)", op64)) end @@ -1730,14 +2166,16 @@ end local function op_data(params) if not params then return "imm..." end local sz = sub(params.op, 2, 2) - if sz == "a" then sz = addrsize end + if sz == "l" then sz = "d" elseif sz == "a" then sz = addrsize end for _,p in ipairs(params) do - local a = parseoperand(p) + local a = parseoperand(p, sz == "q") if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then werror("bad mode or size in `"..p.."'") end if a.mode == "iJ" then wputlabel("IMM_", a.imm, 1) + elseif sz == "q" then + wputqarg(a.imm) else wputszarg(sz, a.imm) end @@ -1749,7 +2187,11 @@ map_op[".byte_*"] = op_data map_op[".sbyte_*"] = op_data map_op[".word_*"] = op_data map_op[".dword_*"] = op_data +map_op[".qword_*"] = op_data map_op[".aword_*"] = op_data +map_op[".long_*"] = op_data +map_op[".quad_*"] = op_data +map_op[".addr_*"] = op_data ------------------------------------------------------------------------------ diff --git a/dynasm/dynasm.lua b/dynasm/dynasm.lua index 2c773cb6..95251b93 100644 --- a/dynasm/dynasm.lua +++ b/dynasm/dynasm.lua @@ -10,9 +10,9 @@ local _info = { name = "DynASM", description = "A dynamic assembler for code generation engines", - version = "1.3.0", - vernum = 10300, - release = "2011-05-05", + version = "1.5.0", + vernum = 10500, + release = "2021-05-02", author = "Mike Pall", url = "https://luajit.org/dynasm.html", license = "MIT", @@ -630,6 +630,7 @@ end -- Load architecture-specific module. local function loadarch(arch) if not match(arch, "^[%w_]+$") then return "bad arch name" end + _G._map_def = map_def local ok, m_arch = pcall(require, "dasm_"..arch) if not ok then return "cannot load module: "..m_arch end g_arch = m_arch diff --git a/etc/luajit.pc b/etc/luajit.pc index 9bac3a8b..39e1e577 100644 --- a/etc/luajit.pc +++ b/etc/luajit.pc @@ -1,8 +1,8 @@ # Package information for LuaJIT to be used by pkg-config. majver=2 -minver=0 -relver=5 -version=${majver}.${minver}.${relver} +minver=1 +relver=0 +version=${majver}.${minver}.${relver}-beta3 abiver=5.1 prefix=/usr/local diff --git a/src/.gitignore b/src/.gitignore index fc94e82c..1a30573c 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -4,4 +4,4 @@ lj_ffdef.h lj_libdef.h lj_recdef.h lj_folddef.h -lj_vm.s +lj_vm.[sS] diff --git a/src/Makefile b/src/Makefile index c4d0b14d..30d64be2 100644 --- a/src/Makefile +++ b/src/Makefile @@ -11,8 +11,8 @@ ############################################################################## MAJVER= 2 -MINVER= 0 -RELVER= 5 +MINVER= 1 +RELVER= 0 ABIVER= 5.1 NODOTABIVER= 51 @@ -44,17 +44,14 @@ CCOPT= -O2 -fomit-frame-pointer # # Target-specific compiler options: # -# x86 only: it's recommended to compile at least for i686. Better yet, -# compile for an architecture that has SSE2, too (-msse -msse2). -# # x86/x64 only: For GCC 4.2 or higher and if you don't intend to distribute # the binaries to a different machine you could also use: -march=native # -CCOPT_x86= -march=i686 +CCOPT_x86= -march=i686 -msse -msse2 -mfpmath=sse CCOPT_x64= CCOPT_arm= +CCOPT_arm64= CCOPT_ppc= -CCOPT_ppcspe= CCOPT_mips= # CCDEBUG= @@ -113,6 +110,9 @@ XCFLAGS= #XCFLAGS+= -DLUAJIT_NUMMODE=1 #XCFLAGS+= -DLUAJIT_NUMMODE=2 # +# Disable LJ_GC64 mode for x64. +#XCFLAGS+= -DLUAJIT_DISABLE_GC64 +# ############################################################################## ############################################################################## @@ -124,15 +124,14 @@ XCFLAGS= # # Use the system provided memory allocator (realloc) instead of the # bundled memory allocator. This is slower, but sometimes helpful for -# debugging. This option cannot be enabled on x64, since realloc usually -# doesn't return addresses in the right address range. +# debugging. This option cannot be enabled on x64 without GC64, since +# realloc usually doesn't return addresses in the right address range. # OTOH this option is mandatory for Valgrind's memcheck tool on x64 and # the only way to get useful results from it for all other architectures. #XCFLAGS+= -DLUAJIT_USE_SYSMALLOC # # This define is required to run LuaJIT under Valgrind. The Valgrind # header files must be installed. You should enable debug information, too. -# Use --suppressions=lj.supp to avoid some false positives. #XCFLAGS+= -DLUAJIT_USE_VALGRIND # # This is the client for the GDB JIT API. GDB 7.0 or higher is required @@ -189,7 +188,8 @@ endif # make HOST_CC="gcc -m32" CROSS=i586-mingw32msvc- TARGET_SYS=Windows # make HOST_CC="gcc -m32" CROSS=powerpc-linux-gnu- -CCOPTIONS= $(CCDEBUG) $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS) +ASOPTIONS= $(CCOPT) $(CCWARN) $(XCFLAGS) $(CFLAGS) +CCOPTIONS= $(CCDEBUG) $(ASOPTIONS) LDOPTIONS= $(CCDEBUG) $(LDFLAGS) HOST_CC= $(CC) @@ -229,6 +229,7 @@ TARGET_XLDFLAGS= TARGET_XLIBS= -lm TARGET_TCFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) TARGET_ACFLAGS= $(CCOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) +TARGET_ASFLAGS= $(ASOPTIONS) $(TARGET_XCFLAGS) $(TARGET_FLAGS) $(TARGET_CFLAGS) TARGET_ALDFLAGS= $(LDOPTIONS) $(TARGET_XLDFLAGS) $(TARGET_FLAGS) $(TARGET_LDFLAGS) TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SHLDFLAGS) TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) @@ -243,17 +244,29 @@ else ifneq (,$(findstring LJ_TARGET_ARM ,$(TARGET_TESTARCH))) TARGET_LJARCH= arm else +ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) + ifneq (,$(findstring __AARCH64EB__ ,$(TARGET_TESTARCH))) + TARGET_ARCH= -D__AARCH64EB__=1 + endif + TARGET_LJARCH= arm64 +else ifneq (,$(findstring LJ_TARGET_PPC ,$(TARGET_TESTARCH))) + ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH))) + TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_LE + else + TARGET_ARCH= -DLJ_ARCH_ENDIAN=LUAJIT_BE + endif TARGET_LJARCH= ppc else -ifneq (,$(findstring LJ_TARGET_PPCSPE ,$(TARGET_TESTARCH))) - TARGET_LJARCH= ppcspe -else ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) ifneq (,$(findstring MIPSEL ,$(TARGET_TESTARCH))) TARGET_ARCH= -D__MIPSEL__=1 endif - TARGET_LJARCH= mips + ifneq (,$(findstring LJ_TARGET_MIPS64 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= mips64 + else + TARGET_LJARCH= mips + endif else $(error Unsupported target architecture) endif @@ -267,6 +280,7 @@ ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) TARGET_SYS= PS3 TARGET_ARCH+= -D__CELLOS_LV2__ TARGET_XCFLAGS+= -DLUAJIT_USE_SYSMALLOC + TARGET_XLIBS+= -lpthread endif TARGET_XCFLAGS+= $(CCOPT_$(TARGET_LJARCH)) @@ -306,20 +320,27 @@ ifeq (Darwin,$(TARGET_SYS)) $(error missing: export MACOSX_DEPLOYMENT_TARGET=XX.YY) endif TARGET_STRIP+= -x + TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC TARGET_DYNXLDOPTS= TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) - ifeq (x64,$(TARGET_LJARCH)) - TARGET_XLDFLAGS+= -pagezero_size 10000 -image_base 100000000 - TARGET_XSHLDFLAGS+= -image_base 7fff04c4a000 - endif else ifeq (iOS,$(TARGET_SYS)) TARGET_STRIP+= -x TARGET_XSHLDFLAGS= -dynamiclib -single_module -undefined dynamic_lookup -fPIC TARGET_DYNXLDOPTS= TARGET_XSHLDFLAGS+= -install_name $(TARGET_DYLIBPATH) -compatibility_version $(MAJVER).$(MINVER) -current_version $(MAJVER).$(MINVER).$(RELVER) + ifeq (arm64,$(TARGET_LJARCH)) + TARGET_XCFLAGS+= -fno-omit-frame-pointer + endif else + ifeq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH))) + # Find out whether the target toolchain always generates unwind tables. + TARGET_TESTUNWIND=$(shell exec 2>/dev/null; echo 'extern void b(void);int a(void){b();return 0;}' | $(TARGET_CC) -c -x c - -o tmpunwind.o && { grep -qa -e eh_frame -e __unwind_info tmpunwind.o || grep -qU -e eh_frame -e __unwind_info tmpunwind.o; } && echo E; rm -f tmpunwind.o) + ifneq (,$(findstring E,$(TARGET_TESTUNWIND))) + TARGET_XCFLAGS+= -DLUAJIT_UNWIND_EXTERNAL + endif + endif ifneq (SunOS,$(TARGET_SYS)) ifneq (PS3,$(TARGET_SYS)) TARGET_XLDFLAGS+= -Wl,-E @@ -346,7 +367,7 @@ ifneq ($(HOST_SYS),$(TARGET_SYS)) HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX else ifeq (iOS,$(TARGET_SYS)) - HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX + HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OSX -DTARGET_OS_IPHONE=1 else HOST_XCFLAGS+= -DLUAJIT_OS=LUAJIT_OS_OTHER endif @@ -379,6 +400,11 @@ DASM_XFLAGS= DASM_AFLAGS= DASM_ARCH= $(TARGET_LJARCH) +ifneq (,$(findstring LJ_LE 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D ENDIAN_LE +else + DASM_AFLAGS+= -D ENDIAN_BE +endif ifneq (,$(findstring LJ_ARCH_BITS 64,$(TARGET_TESTARCH))) DASM_AFLAGS+= -D P64 endif @@ -411,19 +437,19 @@ DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subs ifeq (Windows,$(TARGET_SYS)) DASM_AFLAGS+= -D WIN endif -ifeq (x86,$(TARGET_LJARCH)) - ifneq (,$(findstring __SSE2__ 1,$(TARGET_TESTARCH))) - DASM_AFLAGS+= -D SSE - endif -else ifeq (x64,$(TARGET_LJARCH)) - DASM_ARCH= x86 + ifeq (,$(findstring LJ_FR2 1,$(TARGET_TESTARCH))) + DASM_ARCH= x86 + endif else ifeq (arm,$(TARGET_LJARCH)) ifeq (iOS,$(TARGET_SYS)) DASM_AFLAGS+= -D IOS endif else +ifneq (,$(findstring LJ_TARGET_MIPSR6 ,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D MIPSR6 +endif ifeq (ppc,$(TARGET_LJARCH)) ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH))) DASM_AFLAGS+= -D SQRT @@ -431,7 +457,7 @@ ifeq (ppc,$(TARGET_LJARCH)) ifneq (,$(findstring LJ_ARCH_ROUND 1,$(TARGET_TESTARCH))) DASM_AFLAGS+= -D ROUND endif - ifneq (,$(findstring LJ_ARCH_PPC64 1,$(TARGET_TESTARCH))) + ifneq (,$(findstring LJ_ARCH_PPC32ON64 1,$(TARGET_TESTARCH))) DASM_AFLAGS+= -D GPR64 endif ifeq (PS3,$(TARGET_SYS)) @@ -440,7 +466,6 @@ ifeq (ppc,$(TARGET_LJARCH)) endif endif endif -endif DASM_FLAGS= $(DASM_XFLAGS) $(DASM_AFLAGS) DASM_DASC= vm_$(DASM_ARCH).dasc @@ -453,19 +478,22 @@ BUILDVM_X= $(BUILDVM_T) HOST_O= $(MINILUA_O) $(BUILDVM_O) HOST_T= $(MINILUA_T) $(BUILDVM_T) -LJVM_S= lj_vm.s +LJVM_S= lj_vm.S LJVM_O= lj_vm.o LJVM_BOUT= $(LJVM_S) LJVM_MODE= elfasm LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \ - lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o + lib_io.o lib_os.o lib_package.o lib_debug.o lib_jit.o lib_ffi.o \ + lib_buffer.o LJLIB_C= $(LJLIB_O:.o=.c) -LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o \ +LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ - lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ - lj_api.o lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ + lj_prng.o lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o \ + lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_serialize.o \ + lj_api.o lj_profile.o \ + lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ lj_mcode.o lj_snap.o lj_record.o lj_crecord.o lj_ffrecord.o \ @@ -580,12 +608,15 @@ E= @echo default all: $(TARGET_T) amalg: - @grep "^[+|]" ljamalg.c $(MAKE) all "LJCORE_O=ljamalg.o" clean: $(HOST_RM) $(ALL_RM) +libbc: + ./$(LUAJIT_T) host/genlibbc.lua -o host/buildvm_libbc.h $(LJLIB_C) + $(MAKE) all + depend: @for file in $(ALL_HDRGEN); do \ test -f $$file || touch $$file; \ @@ -600,7 +631,7 @@ depend: test -s $$file || $(HOST_RM) $$file; \ done -.PHONY: default all amalg clean depend +.PHONY: default all amalg clean libbc depend ############################################################################## # Rules for generated files. @@ -610,7 +641,7 @@ $(MINILUA_T): $(MINILUA_O) $(E) "HOSTLINK $@" $(Q)$(HOST_CC) $(HOST_ALDFLAGS) -o $@ $(MINILUA_O) $(MINILUA_LIBS) $(HOST_ALIBS) -host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) lj_arch.h lua.h luaconf.h +host/buildvm_arch.h: $(DASM_DASC) $(DASM_DEP) $(DASM_DIR)/*.lua lj_arch.h lua.h luaconf.h $(E) "DYNASM $@" $(Q)$(DASM) $(DASM_FLAGS) -o $@ $(DASM_DASC) @@ -657,10 +688,10 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< -%.o: %.s +%.o: %.S $(E) "ASM $@" - $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< - $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< + $(Q)$(TARGET_DYNCC) $(TARGET_ASFLAGS) -c -o $(@:.o=_dyn.o) $< + $(Q)$(TARGET_CC) $(TARGET_ASFLAGS) -c -o $@ $< $(LUAJIT_O): $(E) "CC $@" diff --git a/src/Makefile.dep b/src/Makefile.dep index 9e14d617..1ad6701a 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -1,66 +1,79 @@ lib_aux.o: lib_aux.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ lj_arch.h lj_err.h lj_errmsg.h lj_state.h lj_trace.h lj_jit.h lj_ir.h \ - lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lj_alloc.h + lj_dispatch.h lj_bc.h lj_traceerr.h lj_lib.h lib_base.o: lib_base.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ - lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h \ - lj_tab.h lj_meta.h lj_state.h lj_ctype.h lj_cconv.h lj_bc.h lj_ff.h \ - lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h lj_strscan.h \ - lj_lib.h lj_libdef.h + lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h \ + lj_str.h lj_tab.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h \ + lj_cconv.h lj_ff.h lj_ffdef.h lj_dispatch.h lj_jit.h lj_ir.h lj_char.h \ + lj_strscan.h lj_strfmt.h lj_lib.h lj_libdef.h lib_bit.o: lib_bit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ - lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_lib.h lj_libdef.h + lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_strscan.h \ + lj_strfmt.h lj_ctype.h lj_cdata.h lj_cconv.h lj_carith.h lj_ff.h \ + lj_ffdef.h lj_lib.h lj_libdef.h +lib_buffer.o: lib_buffer.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ + lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ + lj_tab.h lj_udata.h lj_meta.h lj_ctype.h lj_cdata.h lj_cconv.h \ + lj_strfmt.h lj_serialize.h lj_lib.h lj_libdef.h lib_debug.o: lib_debug.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_lib.h \ lj_libdef.h lib_ffi.o: lib_ffi.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h \ lj_ctype.h lj_cparse.h lj_cdata.h lj_cconv.h lj_carith.h lj_ccall.h \ - lj_ccallback.h lj_clib.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h + lj_ccallback.h lj_clib.h lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h \ + lj_libdef.h lib_init.o: lib_init.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h lib_io.o: lib_io.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ - lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_ff.h \ - lj_ffdef.h lj_lib.h lj_libdef.h -lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_arch.h \ - lj_obj.h lj_def.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \ - lj_bc.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_target.h \ - lj_target_*.h lj_dispatch.h lj_vm.h lj_vmevent.h lj_lib.h luajit.h \ - lj_libdef.h + lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_state.h \ + lj_strfmt.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h +lib_jit.o: lib_jit.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ + lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h \ + lj_state.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \ + lj_target.h lj_target_*.h lj_trace.h lj_dispatch.h lj_traceerr.h \ + lj_vm.h lj_vmevent.h lj_lib.h luajit.h lj_libdef.h lib_math.o: lib_math.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ - lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_libdef.h + lj_def.h lj_arch.h lj_lib.h lj_vm.h lj_prng.h lj_libdef.h lib_os.o: lib_os.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h lj_def.h \ - lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lj_libdef.h + lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_lib.h \ + lj_libdef.h lib_package.o: lib_package.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ lj_def.h lj_arch.h lj_err.h lj_errmsg.h lj_lib.h lib_string.o: lib_string.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ - lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h \ - lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h lj_char.h \ - lj_lib.h lj_libdef.h + lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ + lj_tab.h lj_meta.h lj_state.h lj_ff.h lj_ffdef.h lj_bcdump.h lj_lex.h \ + lj_char.h lj_strfmt.h lj_lib.h lj_libdef.h lib_table.o: lib_table.c lua.h luaconf.h lauxlib.h lualib.h lj_obj.h \ - lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_lib.h \ - lj_libdef.h -lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h + lj_def.h lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h \ + lj_tab.h lj_ff.h lj_ffdef.h lj_lib.h lj_libdef.h +lj_alloc.o: lj_alloc.c lj_def.h lua.h luaconf.h lj_arch.h lj_alloc.h \ + lj_prng.h lj_api.o: lj_api.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ lj_meta.h lj_state.h lj_bc.h lj_frame.h lj_trace.h lj_jit.h lj_ir.h \ - lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h + lj_dispatch.h lj_traceerr.h lj_vm.h lj_strscan.h lj_strfmt.h lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h lj_jit.h \ - lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h lj_traceerr.h \ - lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h lj_emit_*.h \ - lj_asm_*.h + lj_buf.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_ir.h \ + lj_jit.h lj_ircall.h lj_iropt.h lj_mcode.h lj_trace.h lj_dispatch.h \ + lj_traceerr.h lj_snap.h lj_asm.h lj_vm.h lj_target.h lj_target_*.h \ + lj_emit_*.h lj_asm_*.h +lj_assert.o: lj_assert.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ lj_bcdef.h lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_bc.h lj_ctype.h \ - lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h + lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_bc.h \ + lj_ctype.h lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h \ + lj_strfmt.h lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h lj_ir.h \ - lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h + lj_gc.h lj_buf.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h \ + lj_ir.h lj_strfmt.h lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h +lj_buf.o: lj_buf.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ + lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_strfmt.h lj_carith.o: lj_carith.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ctype.h lj_cconv.h \ - lj_cdata.h lj_carith.h + lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_meta.h lj_ir.h lj_ctype.h \ + lj_cconv.h lj_cdata.h lj_carith.h lj_strscan.h lj_ccall.o: lj_ccall.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ - lj_cdata.h lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ + lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h \ + lj_ccall.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ lj_traceerr.h lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \ lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_state.h lj_frame.h \ @@ -68,110 +81,127 @@ lj_ccallback.o: lj_ccallback.c lj_obj.h lua.h luaconf.h lj_def.h \ lj_target_*.h lj_mcode.h lj_jit.h lj_ir.h lj_trace.h lj_dispatch.h \ lj_traceerr.h lj_vm.h lj_cconv.o: lj_cconv.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_gc.h lj_cdata.h lj_cconv.h \ - lj_ccallback.h + lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ctype.h \ + lj_cdata.h lj_cconv.h lj_ccallback.h lj_cdata.o: lj_cdata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cconv.h \ - lj_cdata.h + lj_gc.h lj_err.h lj_errmsg.h lj_tab.h lj_ctype.h lj_cconv.h lj_cdata.h lj_char.o: lj_char.c lj_char.h lj_def.h lua.h luaconf.h lj_clib.o: lj_clib.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_err.h lj_errmsg.h lj_tab.h lj_str.h lj_udata.h lj_ctype.h lj_cconv.h \ - lj_cdata.h lj_clib.h + lj_cdata.h lj_clib.h lj_strfmt.h lj_cparse.o: lj_cparse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_ctype.h lj_cparse.h lj_frame.h \ - lj_bc.h lj_vm.h lj_char.h lj_strscan.h + lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_ctype.h lj_cparse.h \ + lj_frame.h lj_bc.h lj_vm.h lj_char.h lj_strscan.h lj_strfmt.h lj_crecord.o: lj_crecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h \ - lj_gc.h lj_cdata.h lj_cparse.h lj_cconv.h lj_clib.h lj_ccall.h lj_ff.h \ - lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ + lj_err.h lj_errmsg.h lj_tab.h lj_frame.h lj_bc.h lj_ctype.h lj_gc.h \ + lj_cdata.h lj_cparse.h lj_cconv.h lj_carith.h lj_clib.h lj_ccall.h \ + lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_snap.h \ - lj_crecord.h + lj_crecord.h lj_strfmt.h lj_ctype.o: lj_ctype.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_ccallback.h + lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_strfmt.h lj_ctype.h \ + lj_ccallback.h lj_buf.h lj_debug.o: lj_debug.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_state.h lj_frame.h \ - lj_bc.h lj_vm.h lj_jit.h lj_ir.h + lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \ + lj_state.h lj_frame.h lj_bc.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.o: lj_dispatch.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_err.h lj_errmsg.h lj_func.h lj_str.h lj_tab.h lj_meta.h lj_debug.h \ - lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h lj_jit.h lj_ir.h \ - lj_ccallback.h lj_ctype.h lj_gc.h lj_trace.h lj_dispatch.h lj_traceerr.h \ - lj_vm.h luajit.h + lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_func.h lj_tab.h \ + lj_meta.h lj_debug.h lj_state.h lj_frame.h lj_bc.h lj_ff.h lj_ffdef.h \ + lj_strfmt.h lj_jit.h lj_ir.h lj_ccallback.h lj_ctype.h lj_trace.h \ + lj_dispatch.h lj_traceerr.h lj_profile.h lj_vm.h luajit.h lj_err.o: lj_err.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_err.h \ lj_errmsg.h lj_debug.h lj_str.h lj_func.h lj_state.h lj_frame.h lj_bc.h \ lj_ff.h lj_ffdef.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ - lj_traceerr.h lj_vm.h + lj_traceerr.h lj_vm.h lj_strfmt.h lj_ffrecord.o: lj_ffrecord.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_frame.h lj_bc.h lj_ff.h \ - lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ - lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h lj_crecord.h \ - lj_vm.h lj_strscan.h lj_recdef.h + lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_frame.h \ + lj_bc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \ + lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h lj_ffrecord.h \ + lj_crecord.h lj_vm.h lj_strscan.h lj_strfmt.h lj_serialize.h lj_recdef.h lj_func.o: lj_func.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_func.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_bc.h \ lj_traceerr.h lj_vm.h lj_gc.o: lj_gc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h \ - lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h \ - lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h + lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h lj_udata.h \ + lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h \ + lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_gdbjit.o: lj_gdbjit.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_jit.h \ - lj_ir.h lj_dispatch.h + lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_frame.h lj_bc.h lj_buf.h \ + lj_str.h lj_strfmt.h lj_jit.h lj_ir.h lj_dispatch.h lj_ir.o: lj_ir.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h lj_trace.h \ - lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h lj_carith.h \ - lj_vm.h lj_strscan.h lj_lib.h + lj_buf.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h lj_iropt.h \ + lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h lj_cdata.h \ + lj_carith.h lj_vm.h lj_strscan.h lj_serialize.h lj_strfmt.h lj_prng.h lj_lex.o: lj_lex.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h lualib.h \ - lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h + lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_ctype.h lj_cdata.h \ + lualib.h lj_state.h lj_lex.h lj_parse.h lj_char.h lj_strscan.h \ + lj_strfmt.h lj_lib.o: lj_lib.c lauxlib.h lua.h luaconf.h lj_obj.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_bc.h \ - lj_dispatch.h lj_jit.h lj_ir.h lj_vm.h lj_strscan.h lj_lib.h + lj_dispatch.h lj_jit.h lj_ir.h lj_ctype.h lj_vm.h lj_strscan.h \ + lj_strfmt.h lj_lex.h lj_bcdump.h lj_lib.h lj_load.o: lj_load.c lua.h luaconf.h lauxlib.h lj_obj.h lj_def.h \ - lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_func.h lj_frame.h \ - lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h + lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_func.h \ + lj_frame.h lj_bc.h lj_vm.h lj_lex.h lj_bcdump.h lj_parse.h lj_mcode.o: lj_mcode.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_jit.h lj_ir.h lj_mcode.h lj_trace.h \ - lj_dispatch.h lj_bc.h lj_traceerr.h lj_vm.h + lj_dispatch.h lj_bc.h lj_traceerr.h lj_prng.h lj_vm.h lj_meta.o: lj_meta.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ - lj_vm.h lj_strscan.h + lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_meta.h lj_frame.h \ + lj_bc.h lj_vm.h lj_strscan.h lj_strfmt.h lj_lib.h lj_obj.o: lj_obj.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_opt_dce.o: lj_opt_dce.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_ir.h lj_jit.h lj_iropt.h lj_opt_fold.o: lj_opt_fold.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ - lj_bc.h lj_traceerr.h lj_ctype.h lj_gc.h lj_carith.h lj_vm.h \ - lj_strscan.h lj_folddef.h + lj_buf.h lj_gc.h lj_str.h lj_tab.h lj_ir.h lj_jit.h lj_ircall.h \ + lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_ctype.h \ + lj_carith.h lj_vm.h lj_strscan.h lj_strfmt.h lj_folddef.h lj_opt_loop.o: lj_opt_loop.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h \ - lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h lj_vm.h + lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h lj_jit.h \ + lj_iropt.h lj_trace.h lj_dispatch.h lj_bc.h lj_traceerr.h lj_snap.h \ + lj_vm.h lj_opt_mem.o: lj_opt_mem.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_tab.h lj_ir.h lj_jit.h lj_iropt.h + lj_tab.h lj_ir.h lj_jit.h lj_iropt.h lj_ircall.h lj_dispatch.h lj_bc.h lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ lj_arch.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h lj_trace.h lj_dispatch.h \ lj_traceerr.h lj_vm.h lj_strscan.h lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ - lj_arch.h lj_err.h lj_errmsg.h lj_str.h lj_ir.h lj_jit.h lj_ircall.h \ - lj_iropt.h lj_vm.h + lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \ + lj_jit.h lj_ircall.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_tab.h lj_func.h \ - lj_state.h lj_bc.h lj_ctype.h lj_lex.h lj_parse.h lj_vm.h lj_vmevent.h + lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \ + lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \ + lj_vm.h lj_vmevent.h +lj_prng.o: lj_prng.c lj_def.h lua.h luaconf.h lj_arch.h lj_prng.h +lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \ + lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h lj_record.o: lj_record.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_meta.h lj_frame.h lj_bc.h \ - lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_ir.h lj_jit.h lj_ircall.h \ - lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h lj_record.h \ - lj_ffrecord.h lj_snap.h lj_vm.h + lj_ctype.h lj_gc.h lj_ff.h lj_ffdef.h lj_debug.h lj_ir.h lj_jit.h \ + lj_ircall.h lj_iropt.h lj_trace.h lj_dispatch.h lj_traceerr.h \ + lj_record.h lj_ffrecord.h lj_snap.h lj_vm.h lj_prng.h +lj_serialize.o: lj_serialize.c lj_obj.h lua.h luaconf.h lj_def.h \ + lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_tab.h \ + lj_udata.h lj_ctype.h lj_cdata.h lj_ir.h lj_serialize.h lj_snap.o: lj_snap.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_tab.h lj_state.h lj_frame.h lj_bc.h lj_ir.h lj_jit.h lj_iropt.h \ lj_trace.h lj_dispatch.h lj_traceerr.h lj_snap.h lj_target.h \ lj_target_*.h lj_ctype.h lj_cdata.h lj_state.o: lj_state.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h lj_meta.h \ - lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h lj_ir.h \ - lj_dispatch.h lj_traceerr.h lj_vm.h lj_lex.h lj_alloc.h + lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_func.h \ + lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_trace.h lj_jit.h \ + lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_prng.h lj_lex.h \ + lj_alloc.h luajit.h lj_str.o: lj_str.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ - lj_err.h lj_errmsg.h lj_str.h lj_state.h lj_char.h + lj_err.h lj_errmsg.h lj_str.h lj_char.h lj_prng.h +lj_strfmt.o: lj_strfmt.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ + lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_meta.h lj_state.h \ + lj_char.h lj_strfmt.h lj_ctype.h lj_lib.h +lj_strfmt_num.o: lj_strfmt_num.c lj_obj.h lua.h luaconf.h lj_def.h \ + lj_arch.h lj_buf.h lj_gc.h lj_str.h lj_strfmt.h lj_strscan.o: lj_strscan.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_char.h lj_strscan.h lj_tab.o: lj_tab.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ @@ -180,35 +210,37 @@ lj_trace.o: lj_trace.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_str.h lj_frame.h lj_bc.h \ lj_state.h lj_ir.h lj_jit.h lj_iropt.h lj_mcode.h lj_trace.h \ lj_dispatch.h lj_traceerr.h lj_snap.h lj_gdbjit.h lj_record.h lj_asm.h \ - lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h + lj_vm.h lj_vmevent.h lj_target.h lj_target_*.h lj_prng.h lj_udata.o: lj_udata.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_udata.h + lj_gc.h lj_err.h lj_errmsg.h lj_udata.h lj_vmevent.o: lj_vmevent.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_str.h lj_tab.h lj_state.h lj_dispatch.h lj_bc.h lj_jit.h lj_ir.h \ lj_vm.h lj_vmevent.h lj_vmmath.o: lj_vmmath.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_ir.h lj_vm.h -ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_gc.c lj_obj.h lj_def.h \ - lj_arch.h lj_gc.h lj_err.h lj_errmsg.h lj_str.h lj_tab.h lj_func.h \ - lj_udata.h lj_meta.h lj_state.h lj_frame.h lj_bc.h lj_ctype.h lj_cdata.h \ - lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h lj_traceerr.h lj_vm.h lj_err.c \ - lj_debug.h lj_ff.h lj_ffdef.h lj_char.c lj_char.h lj_bc.c lj_bcdef.h \ - lj_obj.c lj_str.c lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h \ - lj_debug.c lj_state.c lj_lex.h lj_alloc.h lj_dispatch.c lj_ccallback.h \ - luajit.h lj_vmevent.c lj_vmevent.h lj_vmmath.c lj_strscan.c lj_api.c \ - lj_lex.c lualib.h lj_parse.h lj_parse.c lj_bcread.c lj_bcdump.h \ - lj_bcwrite.c lj_load.c lj_ctype.c lj_cdata.c lj_cconv.h lj_cconv.c \ - lj_ccall.c lj_ccall.h lj_ccallback.c lj_target.h lj_target_*.h \ - lj_mcode.h lj_carith.c lj_carith.h lj_clib.c lj_clib.h lj_cparse.c \ - lj_cparse.h lj_lib.c lj_lib.h lj_ir.c lj_ircall.h lj_iropt.h \ - lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c lj_opt_dce.c \ - lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c lj_mcode.c \ - lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ +ljamalg.o: ljamalg.c lua.h luaconf.h lauxlib.h lj_assert.c lj_obj.h \ + lj_def.h lj_arch.h lj_gc.c lj_gc.h lj_err.h lj_errmsg.h lj_buf.h \ + lj_str.h lj_tab.h lj_func.h lj_udata.h lj_meta.h lj_state.h lj_frame.h \ + lj_bc.h lj_ctype.h lj_cdata.h lj_trace.h lj_jit.h lj_ir.h lj_dispatch.h \ + lj_traceerr.h lj_vm.h lj_err.c lj_debug.h lj_ff.h lj_ffdef.h lj_strfmt.h \ + lj_char.c lj_char.h lj_bc.c lj_bcdef.h lj_obj.c lj_buf.c lj_str.c \ + lj_prng.h lj_tab.c lj_func.c lj_udata.c lj_meta.c lj_strscan.h lj_lib.h \ + lj_debug.c lj_prng.c lj_state.c lj_lex.h lj_alloc.h luajit.h \ + lj_dispatch.c lj_ccallback.h lj_profile.h lj_vmevent.c lj_vmevent.h \ + lj_vmmath.c lj_strscan.c lj_strfmt.c lj_strfmt_num.c lj_serialize.c \ + lj_serialize.h lj_api.c lj_profile.c lj_lex.c lualib.h lj_parse.h \ + lj_parse.c lj_bcread.c lj_bcdump.h lj_bcwrite.c lj_load.c lj_ctype.c \ + lj_cdata.c lj_cconv.h lj_cconv.c lj_ccall.c lj_ccall.h lj_ccallback.c \ + lj_target.h lj_target_*.h lj_mcode.h lj_carith.c lj_carith.h lj_clib.c \ + lj_clib.h lj_cparse.c lj_cparse.h lj_lib.c lj_ir.c lj_ircall.h \ + lj_iropt.h lj_opt_mem.c lj_opt_fold.c lj_folddef.h lj_opt_narrow.c \ + lj_opt_dce.c lj_opt_loop.c lj_snap.h lj_opt_split.c lj_opt_sink.c \ + lj_mcode.c lj_snap.c lj_record.c lj_record.h lj_ffrecord.h lj_crecord.c \ lj_crecord.h lj_ffrecord.c lj_recdef.h lj_asm.c lj_asm.h lj_emit_*.h \ lj_asm_*.h lj_trace.c lj_gdbjit.h lj_gdbjit.c lj_alloc.c lib_aux.c \ lib_base.c lj_libdef.h lib_math.c lib_string.c lib_table.c lib_io.c \ lib_os.c lib_package.c lib_debug.c lib_bit.c lib_jit.c lib_ffi.c \ - lib_init.c + lib_buffer.c lib_init.c luajit.o: luajit.c lua.h luaconf.h lauxlib.h lualib.h luajit.h lj_arch.h host/buildvm.o: host/buildvm.c host/buildvm.h lj_def.h lua.h luaconf.h \ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_gc.h lj_obj.h lj_bc.h lj_ir.h \ @@ -220,7 +252,8 @@ host/buildvm_asm.o: host/buildvm_asm.c host/buildvm.h lj_def.h lua.h luaconf.h \ host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \ luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \ - lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_lib.h lj_obj.h + lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_bc.h lj_lib.h lj_obj.h \ + host/buildvm_libbc.h host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \ luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h host/minilua.o: host/minilua.c diff --git a/src/host/buildvm.c b/src/host/buildvm.c index a12245fd..9ee47ada 100644 --- a/src/host/buildvm.c +++ b/src/host/buildvm.c @@ -18,8 +18,10 @@ #include "lj_obj.h" #include "lj_gc.h" #include "lj_bc.h" +#if LJ_HASJIT #include "lj_ir.h" #include "lj_ircall.h" +#endif #include "lj_frame.h" #include "lj_dispatch.h" #if LJ_HASFFI @@ -59,10 +61,10 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); #include "../dynasm/dasm_x86.h" #elif LJ_TARGET_ARM #include "../dynasm/dasm_arm.h" +#elif LJ_TARGET_ARM64 +#include "../dynasm/dasm_arm64.h" #elif LJ_TARGET_PPC #include "../dynasm/dasm_ppc.h" -#elif LJ_TARGET_PPCSPE -#include "../dynasm/dasm_ppc.h" #elif LJ_TARGET_MIPS #include "../dynasm/dasm_mips.h" #else @@ -110,11 +112,11 @@ static const char *sym_decorate(BuildCtx *ctx, if (p) { #if LJ_TARGET_X86ORX64 if (!LJ_64 && (ctx->mode == BUILD_coffasm || ctx->mode == BUILD_peobj)) - name[0] = '@'; + name[0] = name[1] == 'R' ? '_' : '@'; /* Just for _RtlUnwind@16. */ else *p = '\0'; -#elif (LJ_TARGET_PPC || LJ_TARGET_PPCSPE) && !LJ_TARGET_CONSOLE - /* Keep @plt. */ +#elif LJ_TARGET_PPC && !LJ_TARGET_CONSOLE + /* Keep @plt etc. */ #else *p = '\0'; #endif @@ -179,6 +181,7 @@ static int build_code(BuildCtx *ctx) ctx->nreloc = 0; ctx->globnames = globnames; + ctx->extnames = extnames; ctx->relocsym = (const char **)malloc(NRELOCSYM*sizeof(const char *)); ctx->nrelocsym = 0; for (i = 0; i < (int)NRELOCSYM; i++) relocmap[i] = -1; @@ -249,6 +252,7 @@ BCDEF(BCNAME) NULL }; +#if LJ_HASJIT const char *const ir_names[] = { #define IRNAME(name, m, m1, m2) #name, IRDEF(IRNAME) @@ -289,7 +293,9 @@ static const char *const trace_errors[] = { #include "lj_traceerr.h" NULL }; +#endif +#if LJ_HASJIT static const char *lower(char *buf, const char *s) { char *p = buf; @@ -300,6 +306,7 @@ static const char *lower(char *buf, const char *s) *p = '\0'; return buf; } +#endif /* Emit C source code for bytecode-related definitions. */ static void emit_bcdef(BuildCtx *ctx) @@ -317,23 +324,26 @@ static void emit_bcdef(BuildCtx *ctx) /* Emit VM definitions as Lua code for debug modules. */ static void emit_vmdef(BuildCtx *ctx) { +#if LJ_HASJIT char buf[80]; +#endif int i; fprintf(ctx->fp, "-- This is a generated file. DO NOT EDIT!\n\n"); - fprintf(ctx->fp, "module(...)\n\n"); + fprintf(ctx->fp, "return {\n\n"); fprintf(ctx->fp, "bcnames = \""); for (i = 0; bc_names[i]; i++) fprintf(ctx->fp, "%-6s", bc_names[i]); - fprintf(ctx->fp, "\"\n\n"); + fprintf(ctx->fp, "\",\n\n"); +#if LJ_HASJIT fprintf(ctx->fp, "irnames = \""); for (i = 0; ir_names[i]; i++) fprintf(ctx->fp, "%-6s", ir_names[i]); - fprintf(ctx->fp, "\"\n\n"); + fprintf(ctx->fp, "\",\n\n"); fprintf(ctx->fp, "irfpm = { [0]="); for (i = 0; irfpm_names[i]; i++) fprintf(ctx->fp, "\"%s\", ", lower(buf, irfpm_names[i])); - fprintf(ctx->fp, "}\n\n"); + fprintf(ctx->fp, "},\n\n"); fprintf(ctx->fp, "irfield = { [0]="); for (i = 0; irfield_names[i]; i++) { @@ -343,17 +353,18 @@ static void emit_vmdef(BuildCtx *ctx) if (p) *p = '.'; fprintf(ctx->fp, "\"%s\", ", buf); } - fprintf(ctx->fp, "}\n\n"); + fprintf(ctx->fp, "},\n\n"); fprintf(ctx->fp, "ircall = {\n[0]="); for (i = 0; ircall_names[i]; i++) fprintf(ctx->fp, "\"%s\",\n", ircall_names[i]); - fprintf(ctx->fp, "}\n\n"); + fprintf(ctx->fp, "},\n\n"); fprintf(ctx->fp, "traceerr = {\n[0]="); for (i = 0; trace_errors[i]; i++) fprintf(ctx->fp, "\"%s\",\n", trace_errors[i]); - fprintf(ctx->fp, "}\n\n"); + fprintf(ctx->fp, "},\n\n"); +#endif } /* -- Argument parsing ---------------------------------------------------- */ @@ -490,6 +501,7 @@ int main(int argc, char **argv) case BUILD_vmdef: emit_vmdef(ctx); emit_lib(ctx); + fprintf(ctx->fp, "}\n\n"); break; case BUILD_ffdef: case BUILD_libdef: diff --git a/src/host/buildvm.h b/src/host/buildvm.h index 3b3110fb..18cd8848 100644 --- a/src/host/buildvm.h +++ b/src/host/buildvm.h @@ -82,6 +82,7 @@ typedef struct BuildCtx { const char *beginsym; /* Strings generated by DynASM. */ const char *const *globnames; + const char *const *extnames; const char *dasm_ident; const char *dasm_arch; /* Relocations. */ diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c index 390abbdd..7baa011f 100644 --- a/src/host/buildvm_asm.c +++ b/src/host/buildvm_asm.c @@ -51,8 +51,8 @@ static const char *const jccnames[] = { "js", "jns", "jpe", "jpo", "jl", "jge", "jle", "jg" }; -/* Emit relocation for the incredibly stupid OSX assembler. */ -static void emit_asm_reloc_mach(BuildCtx *ctx, uint8_t *cp, int n, +/* Emit x86/x64 text relocations. */ +static void emit_asm_reloc_text(BuildCtx *ctx, uint8_t *cp, int n, const char *sym) { const char *opname = NULL; @@ -71,6 +71,20 @@ err: exit(1); } emit_asm_bytes(ctx, cp, n); + if (strncmp(sym+(*sym == '_'), LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) { + /* Various fixups for external symbols outside of our binary. */ + if (ctx->mode == BUILD_elfasm) { + if (LJ_32) + fprintf(ctx->fp, "#if __PIC__\n\t%s lj_wrap_%s\n#else\n", opname, sym); + fprintf(ctx->fp, "\t%s %s@PLT\n", opname, sym); + if (LJ_32) + fprintf(ctx->fp, "#endif\n"); + return; + } else if (LJ_32 && ctx->mode == BUILD_machasm) { + fprintf(ctx->fp, "\t%s L%s$stub\n", opname, sym); + return; + } + } fprintf(ctx->fp, "\t%s %s\n", opname, sym); } #else @@ -79,10 +93,14 @@ static void emit_asm_words(BuildCtx *ctx, uint8_t *p, int n) { int i; for (i = 0; i < n; i += 4) { + uint32_t ins = *(uint32_t *)(p+i); +#if LJ_TARGET_ARM64 && LJ_BE + ins = lj_bswap(ins); /* ARM64 instructions are always little-endian. */ +#endif if ((i & 15) == 0) - fprintf(ctx->fp, "\t.long 0x%08x", *(uint32_t *)(p+i)); + fprintf(ctx->fp, "\t.long 0x%08x", ins); else - fprintf(ctx->fp, ",0x%08x", *(uint32_t *)(p+i)); + fprintf(ctx->fp, ",0x%08x", ins); if ((i & 15) == 12) putc('\n', ctx->fp); } if ((n & 15) != 0) putc('\n', ctx->fp); @@ -107,7 +125,16 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, ins, sym); exit(1); } -#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE +#elif LJ_TARGET_ARM64 + if ((ins >> 26) == 0x25u) { + fprintf(ctx->fp, "\tbl %s\n", sym); + } else { + fprintf(stderr, + "Error: unsupported opcode %08x for %s symbol relocation.\n", + ins, sym); + exit(1); + } +#elif LJ_TARGET_PPC #if LJ_TARGET_PS3 #define TOCPREFIX "." #else @@ -228,11 +255,20 @@ void emit_asm(BuildCtx *ctx) #if LJ_TARGET_ARM && defined(__GNUC__) && !LJ_NO_UNWIND /* This should really be moved into buildvm_arm.dasc. */ +#if LJ_ARCH_HASFPU + fprintf(ctx->fp, + ".fnstart\n" + ".save {r5, r6, r7, r8, r9, r10, r11, lr}\n" + ".vsave {d8-d15}\n" + ".save {r4}\n" + ".pad #28\n"); +#else fprintf(ctx->fp, ".fnstart\n" ".save {r4, r5, r6, r7, r8, r9, r10, r11, lr}\n" ".pad #28\n"); #endif +#endif #if LJ_TARGET_MIPS fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n"); #endif @@ -255,8 +291,9 @@ void emit_asm(BuildCtx *ctx) BuildReloc *r = &ctx->reloc[rel]; int n = r->ofs - ofs; #if LJ_TARGET_X86ORX64 - if (ctx->mode == BUILD_machasm && r->type != 0) { - emit_asm_reloc_mach(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]); + if (r->type != 0 && + (ctx->mode == BUILD_elfasm || ctx->mode == BUILD_machasm)) { + emit_asm_reloc_text(ctx, ctx->code+ofs, n, ctx->relocsym[r->sym]); } else { emit_asm_bytes(ctx, ctx->code+ofs, n); emit_asm_reloc(ctx, r->type, ctx->relocsym[r->sym]); @@ -290,10 +327,7 @@ void emit_asm(BuildCtx *ctx) #if !(LJ_TARGET_PS3 || LJ_TARGET_PSVITA) fprintf(ctx->fp, "\t.section .note.GNU-stack,\"\"," ELFASM_PX "progbits\n"); #endif -#if LJ_TARGET_PPCSPE - /* Soft-float ABI + SPE. */ - fprintf(ctx->fp, "\t.gnu_attribute 4, 2\n\t.gnu_attribute 8, 3\n"); -#elif LJ_TARGET_PPC && !LJ_TARGET_PS3 +#if LJ_TARGET_PPC && !LJ_TARGET_PS3 && !LJ_ABI_SOFTFP /* Hard-float ABI. */ fprintf(ctx->fp, "\t.gnu_attribute 4, 1\n"); #endif diff --git a/src/host/buildvm_fold.c b/src/host/buildvm_fold.c index 7f9ac058..edb55768 100644 --- a/src/host/buildvm_fold.c +++ b/src/host/buildvm_fold.c @@ -5,6 +5,7 @@ #include "buildvm.h" #include "lj_obj.h" +#if LJ_HASJIT #include "lj_ir.h" /* Context for the folding hash table generator. */ @@ -226,4 +227,10 @@ void emit_fold(BuildCtx *ctx) makehash(ctx); } +#else +void emit_fold(BuildCtx *ctx) +{ + UNUSED(ctx); +} +#endif diff --git a/src/host/buildvm_lib.c b/src/host/buildvm_lib.c index a9829d0d..b125ea12 100644 --- a/src/host/buildvm_lib.c +++ b/src/host/buildvm_lib.c @@ -5,7 +5,9 @@ #include "buildvm.h" #include "lj_obj.h" +#include "lj_bc.h" #include "lj_lib.h" +#include "buildvm_libbc.h" /* Context for library definitions. */ static uint8_t obuf[8192]; @@ -151,6 +153,62 @@ static void libdef_func(BuildCtx *ctx, char *p, int arg) regfunc = REGFUNC_OK; } +static uint8_t *libdef_uleb128(uint8_t *p, uint32_t *vv) +{ + uint32_t v = *p++; + if (v >= 0x80) { + int sh = 0; v &= 0x7f; + do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80); + } + *vv = v; + return p; +} + +static void libdef_fixupbc(uint8_t *p) +{ + uint32_t i, sizebc; + p += 4; + p = libdef_uleb128(p, &sizebc); + p = libdef_uleb128(p, &sizebc); + p = libdef_uleb128(p, &sizebc); + for (i = 0; i < sizebc; i++, p += 4) { + uint8_t op = p[libbc_endian ? 3 : 0]; + uint8_t ra = p[libbc_endian ? 2 : 1]; + uint8_t rc = p[libbc_endian ? 1 : 2]; + uint8_t rb = p[libbc_endian ? 0 : 3]; + if (!LJ_DUALNUM && op == BC_ISTYPE && rc == ~LJ_TNUMX+1) { + op = BC_ISNUM; rc++; + } + p[LJ_ENDIAN_SELECT(0, 3)] = op; + p[LJ_ENDIAN_SELECT(1, 2)] = ra; + p[LJ_ENDIAN_SELECT(2, 1)] = rc; + p[LJ_ENDIAN_SELECT(3, 0)] = rb; + } +} + +static void libdef_lua(BuildCtx *ctx, char *p, int arg) +{ + UNUSED(arg); + if (ctx->mode == BUILD_libdef) { + int i; + for (i = 0; libbc_map[i].name != NULL; i++) { + if (!strcmp(libbc_map[i].name, p)) { + int ofs = libbc_map[i].ofs; + int len = libbc_map[i+1].ofs - ofs; + obuf[2]++; /* Bump hash table size. */ + *optr++ = LIBINIT_LUA; + libdef_name(p, 0); + memcpy(optr, libbc_code + ofs, len); + libdef_fixupbc(optr); + optr += len; + return; + } + } + fprintf(stderr, "Error: missing libbc definition for %s\n", p); + exit(1); + } +} + static uint32_t find_rec(char *name) { char *p = (char *)obuf; @@ -277,6 +335,7 @@ static const LibDefHandler libdef_handlers[] = { { "CF(", ")", libdef_func, LIBINIT_CF }, { "ASM(", ")", libdef_func, LIBINIT_ASM }, { "ASM_(", ")", libdef_func, LIBINIT_ASM_ }, + { "LUA(", ")", libdef_lua, 0 }, { "REC(", ")", libdef_rec, 0 }, { "PUSH(", ")", libdef_push, 0 }, { "SET(", ")", libdef_set, 0 }, @@ -326,6 +385,8 @@ void emit_lib(BuildCtx *ctx) ok = LJ_HASJIT; else if (!strcmp(buf, "#if LJ_HASFFI\n")) ok = LJ_HASFFI; + else if (!strcmp(buf, "#if LJ_HASBUFFER\n")) + ok = LJ_HASBUFFER; if (!ok) { int lvl = 1; while (fgets(buf, sizeof(buf), fp) != NULL) { @@ -373,7 +434,7 @@ void emit_lib(BuildCtx *ctx) "#ifndef FF_NUM_ASMFUNC\n#define FF_NUM_ASMFUNC %d\n#endif\n\n", ffasmfunc); } else if (ctx->mode == BUILD_vmdef) { - fprintf(ctx->fp, "}\n\n"); + fprintf(ctx->fp, "},\n\n"); } else if (ctx->mode == BUILD_bcdef) { int i; fprintf(ctx->fp, "\n};\n\n"); diff --git a/src/host/buildvm_libbc.h b/src/host/buildvm_libbc.h new file mode 100644 index 00000000..276463b2 --- /dev/null +++ b/src/host/buildvm_libbc.h @@ -0,0 +1,81 @@ +/* This is a generated file. DO NOT EDIT! */ + +static const int libbc_endian = 0; + +static const uint8_t libbc_code[] = { +#if LJ_FR2 +/* math.deg */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,241,135,158,166,3, +220,203,178,130,4, +/* math.rad */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,243,244,148,165,20, +198,190,199,252,3, +/* string.len */ 0,1,2,0,0,0,3,BC_ISTYPE,0,5,0,BC_LEN,1,0,0,BC_RET1,1,2,0, +/* table.foreachi */ 0,2,10,0,0,0,15,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0, +BC_KSHORT,2,1,0,BC_LEN,3,0,0,BC_KSHORT,4,1,0,BC_FORI,2,8,128,BC_MOV,6,1,0, +BC_MOV,8,5,0,BC_TGETR,9,5,0,BC_CALL,6,3,2,BC_ISEQP,6,0,0,BC_JMP,7,1,128, +BC_RET1,6,2,0,BC_FORL,2,248,127,BC_RET0,0,1,0, +/* table.foreach */ 0,2,11,0,0,1,16,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,BC_KPRI, +2,0,0,BC_MOV,3,0,0,BC_KNUM,4,0,0,BC_JMP,5,7,128,BC_MOV,7,1,0,BC_MOV,9,5,0, +BC_MOV,10,6,0,BC_CALL,7,3,2,BC_ISEQP,7,0,0,BC_JMP,8,1,128,BC_RET1,7,2,0, +BC_ITERN,5,3,3,BC_ITERL,5,247,127,BC_RET0,0,1,0,1,255,255,249,255,15, +/* table.getn */ 0,1,2,0,0,0,3,BC_ISTYPE,0,12,0,BC_LEN,1,0,0,BC_RET1,1,2,0, +/* table.remove */ 0,2,10,0,0,2,30,BC_ISTYPE,0,12,0,BC_LEN,2,0,0,BC_ISNEP,1,0, +0,BC_JMP,3,7,128,BC_ISEQN,2,0,0,BC_JMP,3,23,128,BC_TGETR,3,2,0,BC_KPRI,4,0,0, +BC_TSETR,4,2,0,BC_RET1,3,2,0,BC_JMP,3,18,128,BC_ISTYPE,1,14,0,BC_KSHORT,3,1,0, +BC_ISGT,3,1,0,BC_JMP,3,14,128,BC_ISGT,1,2,0,BC_JMP,3,12,128,BC_TGETR,3,1,0, +BC_ADDVN,4,1,1,BC_MOV,5,2,0,BC_KSHORT,6,1,0,BC_FORI,4,4,128,BC_SUBVN,8,1,7, +BC_TGETR,9,7,0,BC_TSETR,9,8,0,BC_FORL,4,252,127,BC_KPRI,4,0,0,BC_TSETR,4,2,0, +BC_RET1,3,2,0,BC_RET0,0,1,0,0,2, +/* table.move */ 0,5,12,0,0,0,35,BC_ISTYPE,0,12,0,BC_ISTYPE,1,14,0,BC_ISTYPE, +2,14,0,BC_ISTYPE,3,14,0,BC_ISNEP,4,0,0,BC_JMP,5,1,128,BC_MOV,4,0,0,BC_ISTYPE, +4,12,0,BC_ISGT,1,2,0,BC_JMP,5,24,128,BC_SUBVV,5,1,3,BC_ISLT,2,3,0,BC_JMP,6,4, +128,BC_ISLE,3,1,0,BC_JMP,6,2,128,BC_ISEQV,4,0,0,BC_JMP,6,9,128,BC_MOV,6,1,0, +BC_MOV,7,2,0,BC_KSHORT,8,1,0,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0, +BC_TSETR,11,10,4,BC_FORL,6,252,127,BC_JMP,6,8,128,BC_MOV,6,2,0,BC_MOV,7,1,0, +BC_KSHORT,8,255,255,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,BC_TSETR, +11,10,4,BC_FORL,6,252,127,BC_RET1,4,2,0, +#else +/* math.deg */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,241,135,158,166,3, +220,203,178,130,4, +/* math.rad */ 0,1,2,0,0,1,2,BC_MULVN,1,0,0,BC_RET1,1,2,0,243,244,148,165,20, +198,190,199,252,3, +/* string.len */ 0,1,2,0,0,0,3,BC_ISTYPE,0,5,0,BC_LEN,1,0,0,BC_RET1,1,2,0, +/* table.foreachi */ 0,2,9,0,0,0,15,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0, +BC_KSHORT,2,1,0,BC_LEN,3,0,0,BC_KSHORT,4,1,0,BC_FORI,2,8,128,BC_MOV,6,1,0, +BC_MOV,7,5,0,BC_TGETR,8,5,0,BC_CALL,6,3,2,BC_ISEQP,6,0,0,BC_JMP,7,1,128, +BC_RET1,6,2,0,BC_FORL,2,248,127,BC_RET0,0,1,0, +/* table.foreach */ 0,2,10,0,0,1,16,BC_ISTYPE,0,12,0,BC_ISTYPE,1,9,0,BC_KPRI, +2,0,0,BC_MOV,3,0,0,BC_KNUM,4,0,0,BC_JMP,5,7,128,BC_MOV,7,1,0,BC_MOV,8,5,0, +BC_MOV,9,6,0,BC_CALL,7,3,2,BC_ISEQP,7,0,0,BC_JMP,8,1,128,BC_RET1,7,2,0, +BC_ITERN,5,3,3,BC_ITERL,5,247,127,BC_RET0,0,1,0,1,255,255,249,255,15, +/* table.getn */ 0,1,2,0,0,0,3,BC_ISTYPE,0,12,0,BC_LEN,1,0,0,BC_RET1,1,2,0, +/* table.remove */ 0,2,10,0,0,2,30,BC_ISTYPE,0,12,0,BC_LEN,2,0,0,BC_ISNEP,1,0, +0,BC_JMP,3,7,128,BC_ISEQN,2,0,0,BC_JMP,3,23,128,BC_TGETR,3,2,0,BC_KPRI,4,0,0, +BC_TSETR,4,2,0,BC_RET1,3,2,0,BC_JMP,3,18,128,BC_ISTYPE,1,14,0,BC_KSHORT,3,1,0, +BC_ISGT,3,1,0,BC_JMP,3,14,128,BC_ISGT,1,2,0,BC_JMP,3,12,128,BC_TGETR,3,1,0, +BC_ADDVN,4,1,1,BC_MOV,5,2,0,BC_KSHORT,6,1,0,BC_FORI,4,4,128,BC_SUBVN,8,1,7, +BC_TGETR,9,7,0,BC_TSETR,9,8,0,BC_FORL,4,252,127,BC_KPRI,4,0,0,BC_TSETR,4,2,0, +BC_RET1,3,2,0,BC_RET0,0,1,0,0,2, +/* table.move */ 0,5,12,0,0,0,35,BC_ISTYPE,0,12,0,BC_ISTYPE,1,14,0,BC_ISTYPE, +2,14,0,BC_ISTYPE,3,14,0,BC_ISNEP,4,0,0,BC_JMP,5,1,128,BC_MOV,4,0,0,BC_ISTYPE, +4,12,0,BC_ISGT,1,2,0,BC_JMP,5,24,128,BC_SUBVV,5,1,3,BC_ISLT,2,3,0,BC_JMP,6,4, +128,BC_ISLE,3,1,0,BC_JMP,6,2,128,BC_ISEQV,4,0,0,BC_JMP,6,9,128,BC_MOV,6,1,0, +BC_MOV,7,2,0,BC_KSHORT,8,1,0,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0, +BC_TSETR,11,10,4,BC_FORL,6,252,127,BC_JMP,6,8,128,BC_MOV,6,2,0,BC_MOV,7,1,0, +BC_KSHORT,8,255,255,BC_FORI,6,4,128,BC_ADDVV,10,5,9,BC_TGETR,11,9,0,BC_TSETR, +11,10,4,BC_FORL,6,252,127,BC_RET1,4,2,0, +#endif +0 +}; + +static const struct { const char *name; int ofs; } libbc_map[] = { +{"math_deg",0}, +{"math_rad",25}, +{"string_len",50}, +{"table_foreachi",69}, +{"table_foreach",136}, +{"table_getn",213}, +{"table_remove",232}, +{"table_move",361}, +{NULL,508} +}; + diff --git a/src/host/buildvm_peobj.c b/src/host/buildvm_peobj.c index 97c0698e..b030f234 100644 --- a/src/host/buildvm_peobj.c +++ b/src/host/buildvm_peobj.c @@ -9,7 +9,7 @@ #include "buildvm.h" #include "lj_bc.h" -#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC +#if LJ_TARGET_X86ORX64 /* Context for PE object emitter. */ static char *strtab; @@ -93,12 +93,6 @@ typedef struct PEsymaux { #define PEOBJ_RELOC_ADDR32NB 0x03 #define PEOBJ_RELOC_OFS 0 #define PEOBJ_TEXT_FLAGS 0x60500020 /* 60=r+x, 50=align16, 20=code. */ -#elif LJ_TARGET_PPC -#define PEOBJ_ARCH_TARGET 0x01f2 -#define PEOBJ_RELOC_REL32 0x06 -#define PEOBJ_RELOC_DIR32 0x02 -#define PEOBJ_RELOC_OFS (-4) -#define PEOBJ_TEXT_FLAGS 0x60400020 /* 60=r+x, 40=align8, 20=code. */ #endif /* Section numbers (0-based). */ @@ -109,6 +103,8 @@ enum { #if LJ_TARGET_X64 PEOBJ_SECT_PDATA, PEOBJ_SECT_XDATA, +#elif LJ_TARGET_X86 + PEOBJ_SECT_SXDATA, #endif PEOBJ_SECT_RDATA_Z, PEOBJ_NSECTIONS @@ -208,6 +204,13 @@ void emit_peobj(BuildCtx *ctx) sofs += (pesect[PEOBJ_SECT_XDATA].nreloc = 1) * PEOBJ_RELOC_SIZE; /* Flags: 40 = read, 30 = align4, 40 = initialized data. */ pesect[PEOBJ_SECT_XDATA].flags = 0x40300040; +#elif LJ_TARGET_X86 + memcpy(pesect[PEOBJ_SECT_SXDATA].name, ".sxdata", sizeof(".sxdata")-1); + pesect[PEOBJ_SECT_SXDATA].ofs = sofs; + sofs += (pesect[PEOBJ_SECT_SXDATA].size = 4); + pesect[PEOBJ_SECT_SXDATA].relocofs = sofs; + /* Flags: 40 = read, 30 = align4, 02 = lnk_info, 40 = initialized data. */ + pesect[PEOBJ_SECT_SXDATA].flags = 0x40300240; #endif memcpy(pesect[PEOBJ_SECT_RDATA_Z].name, ".rdata$Z", sizeof(".rdata$Z")-1); @@ -232,7 +235,7 @@ void emit_peobj(BuildCtx *ctx) nrsym = ctx->nrelocsym; pehdr.nsyms = 1+PEOBJ_NSECTIONS*2 + 1+ctx->nsym + nrsym; #if LJ_TARGET_X64 - pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win64. */ + pehdr.nsyms += 1; /* Symbol for lj_err_unwind_win. */ #endif /* Write PE object header and all sections. */ @@ -242,15 +245,8 @@ void emit_peobj(BuildCtx *ctx) /* Write .text section. */ host_endian.u = 1; if (host_endian.b != LJ_ENDIAN_SELECT(1, 0)) { -#if LJ_TARGET_PPC - uint32_t *p = (uint32_t *)ctx->code; - int n = (int)(ctx->codesz >> 2); - for (i = 0; i < n; i++, p++) - *p = lj_bswap(*p); /* Byteswap .text section. */ -#else fprintf(stderr, "Error: different byte order for host and target\n"); exit(1); -#endif } owrite(ctx, ctx->code, ctx->codesz); for (i = 0; i < ctx->nreloc; i++) { @@ -312,6 +308,19 @@ void emit_peobj(BuildCtx *ctx) reloc.type = PEOBJ_RELOC_ADDR32NB; owrite(ctx, &reloc, PEOBJ_RELOC_SIZE); } +#elif LJ_TARGET_X86 + /* Write .sxdata section. */ + for (i = 0; i < nrsym; i++) { + if (!strcmp(ctx->relocsym[i], "_lj_err_unwind_win")) { + uint32_t symidx = 1+2+i; + owrite(ctx, &symidx, 4); + break; + } + } + if (i == nrsym) { + fprintf(stderr, "Error: extern lj_err_unwind_win not used\n"); + exit(1); + } #endif /* Write .rdata$Z section. */ @@ -333,8 +342,10 @@ void emit_peobj(BuildCtx *ctx) #if LJ_TARGET_X64 emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_PDATA); emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_XDATA); - emit_peobj_sym(ctx, "lj_err_unwind_win64", 0, + emit_peobj_sym(ctx, "lj_err_unwind_win", 0, PEOBJ_SECT_UNDEF, PEOBJ_TYPE_FUNC, PEOBJ_SCL_EXTERN); +#elif LJ_TARGET_X86 + emit_peobj_sym_sect(ctx, pesect, PEOBJ_SECT_SXDATA); #endif emit_peobj_sym(ctx, ctx->beginsym, 0, diff --git a/src/host/genlibbc.lua b/src/host/genlibbc.lua new file mode 100644 index 00000000..ba18812c --- /dev/null +++ b/src/host/genlibbc.lua @@ -0,0 +1,225 @@ +---------------------------------------------------------------------------- +-- Lua script to dump the bytecode of the library functions written in Lua. +-- The resulting 'buildvm_libbc.h' is used for the build process of LuaJIT. +---------------------------------------------------------------------------- +-- Copyright (C) 2005-2022 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- + +local ffi = require("ffi") +local bit = require("bit") +local vmdef = require("jit.vmdef") +local bcnames = vmdef.bcnames + +local format = string.format + +local isbe = (string.byte(string.dump(function() end), 5) % 2 == 1) + +local function usage(arg) + io.stderr:write("Usage: ", arg and arg[0] or "genlibbc", + " [-o buildvm_libbc.h] lib_*.c\n") + os.exit(1) +end + +local function parse_arg(arg) + local outfile = "-" + if not (arg and arg[1]) then + usage(arg) + end + if arg[1] == "-o" then + outfile = arg[2] + if not outfile then usage(arg) end + table.remove(arg, 1) + table.remove(arg, 1) + end + return outfile +end + +local function read_files(names) + local src = "" + for _,name in ipairs(names) do + local fp = assert(io.open(name)) + src = src .. fp:read("*a") + fp:close() + end + return src +end + +local function transform_lua(code) + local fixup = {} + local n = -30000 + code = string.gsub(code, "CHECK_(%w*)%((.-)%)", function(tp, var) + n = n + 1 + fixup[n] = { "CHECK", tp } + return format("%s=%d", var, n) + end) + code = string.gsub(code, "PAIRS%((.-)%)", function(var) + fixup.PAIRS = true + return format("nil, %s, 0x4dp80", var) + end) + return "return "..code, fixup +end + +local function read_uleb128(p) + local v = p[0]; p = p + 1 + if v >= 128 then + local sh = 7; v = v - 128 + repeat + local r = p[0] + v = v + bit.lshift(bit.band(r, 127), sh) + sh = sh + 7 + p = p + 1 + until r < 128 + end + return p, v +end + +-- ORDER LJ_T +local name2itype = { + str = 5, func = 9, tab = 12, int = 14, num = 15 +} + +local BC, BCN = {}, {} +for i=0,#bcnames/6-1 do + local name = bcnames:sub(i*6+1, i*6+6):gsub(" ", "") + BC[name] = i + BCN[i] = name +end +local xop, xra = isbe and 3 or 0, isbe and 2 or 1 +local xrc, xrb = isbe and 1 or 2, isbe and 0 or 3 + +local function fixup_dump(dump, fixup) + local buf = ffi.new("uint8_t[?]", #dump+1, dump) + local p = buf+5 + local n, sizebc + p, n = read_uleb128(p) + local start = p + p = p + 4 + p = read_uleb128(p) + p = read_uleb128(p) + p, sizebc = read_uleb128(p) + local startbc = tonumber(p - start) + local rawtab = {} + for i=0,sizebc-1 do + local op = p[xop] + if op == BC.KSHORT then + local rd = p[xrc] + 256*p[xrb] + rd = bit.arshift(bit.lshift(rd, 16), 16) + local f = fixup[rd] + if f then + if f[1] == "CHECK" then + local tp = f[2] + if tp == "tab" then rawtab[p[xra]] = true end + p[xop] = tp == "num" and BC.ISNUM or BC.ISTYPE + p[xrb] = 0 + p[xrc] = name2itype[tp] + else + error("unhandled fixup type: "..f[1]) + end + end + elseif op == BC.TGETV then + if rawtab[p[xrb]] then + p[xop] = BC.TGETR + end + elseif op == BC.TSETV then + if rawtab[p[xrb]] then + p[xop] = BC.TSETR + end + elseif op == BC.ITERC then + if fixup.PAIRS then + p[xop] = BC.ITERN + end + end + p = p + 4 + end + local ndump = ffi.string(start, n) + -- Fixup hi-part of 0x4dp80 to LJ_KEYINDEX. + ndump = ndump:gsub("\x80\x80\xcd\xaa\x04", "\xff\xff\xf9\xff\x0f") + return { dump = ndump, startbc = startbc, sizebc = sizebc } +end + +local function find_defs(src) + local defs = {} + for name, code in string.gmatch(src, "LJLIB_LUA%(([^)]*)%)%s*/%*(.-)%*/") do + local env = {} + local tcode, fixup = transform_lua(code) + local func = assert(load(tcode, "", nil, env))() + defs[name] = fixup_dump(string.dump(func, true), fixup) + defs[#defs+1] = name + end + return defs +end + +local function gen_header(defs) + local t = {} + local function w(x) t[#t+1] = x end + w("/* This is a generated file. DO NOT EDIT! */\n\n") + w("static const int libbc_endian = ") w(isbe and 1 or 0) w(";\n\n") + local s, sb = "", "" + for i,name in ipairs(defs) do + local d = defs[name] + s = s .. d.dump + sb = sb .. string.char(i) .. ("\0"):rep(d.startbc - 1) + .. (isbe and "\0\0\0\255" or "\255\0\0\0"):rep(d.sizebc) + .. ("\0"):rep(#d.dump - d.startbc - d.sizebc*4) + end + w("static const uint8_t libbc_code[] = {\n") + local n = 0 + for i=1,#s do + local x = string.byte(s, i) + local xb = string.byte(sb, i) + if xb == 255 then + local name = BCN[x] + local m = #name + 4 + if n + m > 78 then n = 0; w("\n") end + n = n + m + w("BC_"); w(name) + else + local m = x < 10 and 2 or (x < 100 and 3 or 4) + if xb == 0 then + if n + m > 78 then n = 0; w("\n") end + else + local name = defs[xb]:gsub("_", ".") + if n ~= 0 then w("\n") end + w("/* "); w(name); w(" */ ") + n = #name + 7 + end + n = n + m + w(x) + end + w(",") + end + w("\n0\n};\n\n") + w("static const struct { const char *name; int ofs; } libbc_map[] = {\n") + local m = 0 + for _,name in ipairs(defs) do + w('{"'); w(name); w('",'); w(m) w('},\n') + m = m + #defs[name].dump + end + w("{NULL,"); w(m); w("}\n};\n\n") + return table.concat(t) +end + +local function write_file(name, data) + if name == "-" then + assert(io.write(data)) + assert(io.flush()) + else + local fp = io.open(name) + if fp then + local old = fp:read("*a") + fp:close() + if data == old then return end + end + fp = assert(io.open(name, "w")) + assert(fp:write(data)) + assert(fp:close()) + end +end + +local outfile = parse_arg(arg) +local src = read_files(arg) +local defs = find_defs(src) +local hdr = gen_header(defs) +write_file(outfile, hdr) + diff --git a/src/jit/bc.lua b/src/jit/bc.lua index d4c6d4a6..8d0844c0 100644 --- a/src/jit/bc.lua +++ b/src/jit/bc.lua @@ -41,7 +41,7 @@ -- Cache some library functions and objects. local jit = require("jit") -assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") +assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") local jutil = require("jit.util") local vmdef = require("jit.vmdef") local bit = require("bit") @@ -179,13 +179,12 @@ local function bcliston(outfile) end -- Public module functions. -module(...) - -line = bcline -dump = bcdump -targets = bctargets - -on = bcliston -off = bclistoff -start = bcliston -- For -j command line option. +return { + line = bcline, + dump = bcdump, + targets = bctargets, + on = bcliston, + off = bclistoff, + start = bcliston -- For -j command line option. +} diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index 086d5f88..90fe9daf 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua @@ -11,12 +11,16 @@ ------------------------------------------------------------------------------ local jit = require("jit") -assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") +assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") local bit = require("bit") -- Symbol name prefix for LuaJIT bytecode. local LJBC_PREFIX = "luaJIT_BC_" +local type, assert = type, assert +local format = string.format +local tremove, tconcat = table.remove, table.concat + ------------------------------------------------------------------------------ local function usage() @@ -29,6 +33,7 @@ Save LuaJIT bytecode: luajit -b[options] input output -t type Set output file type (default: auto-detect from output name). -a arch Override architecture for object files (default: native). -o os Override OS for object files (default: native). + -F name Override filename (default: input filename). -e chunk Use chunk string as input. -- Stop handling options. - Use stdin as input and/or stdout as output. @@ -45,10 +50,22 @@ local function check(ok, ...) os.exit(1) end -local function readfile(input) +local function readfile(ctx, input) if type(input) == "function" then return input end - if input == "-" then input = nil end - return check(loadfile(input)) + if ctx.filename then + local data + if input == "-" then + data = io.stdin:read("*a") + else + local fp = assert(io.open(input, "rb")) + data = assert(fp:read("*a")) + assert(fp:close()) + end + return check(load(data, ctx.filename)) + else + if input == "-" then input = nil end + return check(loadfile(input)) + end end local function savefile(name, mode) @@ -56,6 +73,11 @@ local function savefile(name, mode) return check(io.open(name, mode)) end +local function set_stdout_binary(ffi) + ffi.cdef[[int _setmode(int fd, int mode);]] + ffi.C._setmode(1, 0x8000) +end + ------------------------------------------------------------------------------ local map_type = { @@ -63,8 +85,18 @@ local map_type = { } local map_arch = { - x86 = true, x64 = true, arm = true, ppc = true, ppcspe = true, - mips = true, mipsel = true, + x86 = { e = "le", b = 32, m = 3, p = 0x14c, }, + x64 = { e = "le", b = 64, m = 62, p = 0x8664, }, + arm = { e = "le", b = 32, m = 40, p = 0x1c0, }, + arm64 = { e = "le", b = 64, m = 183, p = 0xaa64, }, + arm64be = { e = "be", b = 64, m = 183, }, + ppc = { e = "be", b = 32, m = 20, }, + mips = { e = "be", b = 32, m = 8, f = 0x50001006, }, + mipsel = { e = "le", b = 32, m = 8, f = 0x50001006, }, + mips64 = { e = "be", b = 64, m = 8, f = 0x80000007, }, + mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, }, + mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, }, + mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, }, } local map_os = { @@ -73,33 +105,33 @@ local map_os = { } local function checkarg(str, map, err) - str = string.lower(str) + str = str:lower() local s = check(map[str], "unknown ", err) - return s == true and str or s + return type(s) == "string" and s or str end local function detecttype(str) - local ext = string.match(string.lower(str), "%.(%a+)$") + local ext = str:lower():match("%.(%a+)$") return map_type[ext] or "raw" end local function checkmodname(str) - check(string.match(str, "^[%w_.%-]+$"), "bad module name") - return string.gsub(str, "[%.%-]", "_") + check(str:match("^[%w_.%-]+$"), "bad module name") + return str:gsub("[%.%-]", "_") end local function detectmodname(str) if type(str) == "string" then - local tail = string.match(str, "[^/\\]+$") + local tail = str:match("[^/\\]+$") if tail then str = tail end - local head = string.match(str, "^(.*)%.[^.]*$") + local head = str:match("^(.*)%.[^.]*$") if head then str = head end - str = string.match(str, "^[%w_.%-]+") + str = str:match("^[%w_.%-]+") else str = nil end check(str, "cannot derive module name, use -n name") - return string.gsub(str, "[%.%-]", "_") + return str:gsub("[%.%-]", "_") end ------------------------------------------------------------------------------ @@ -111,6 +143,11 @@ local function bcsave_tail(fp, output, s) end local function bcsave_raw(output, s) + if output == "-" and jit.os == "Windows" then + local ok, ffi = pcall(require, "ffi") + check(ok, "FFI library required to write binary file to stdout") + set_stdout_binary(ffi) + end local fp = savefile(output, "wb") bcsave_tail(fp, output, s) end @@ -118,19 +155,19 @@ end local function bcsave_c(ctx, output, s) local fp = savefile(output, "w") if ctx.type == "c" then - fp:write(string.format([[ + fp:write(format([[ #ifdef __cplusplus extern "C" #endif #ifdef _WIN32 __declspec(dllexport) #endif -const char %s%s[] = { +const unsigned char %s%s[] = { ]], LJBC_PREFIX, ctx.modname)) else - fp:write(string.format([[ + fp:write(format([[ #define %s%s_SIZE %d -static const char %s%s[] = { +static const unsigned char %s%s[] = { ]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname)) end local t, n, m = {}, 0, 0 @@ -138,13 +175,13 @@ static const char %s%s[] = { local b = tostring(string.byte(s, i)) m = m + #b + 1 if m > 78 then - fp:write(table.concat(t, ",", 1, n), ",\n") + fp:write(tconcat(t, ",", 1, n), ",\n") n, m = 0, #b + 1 end n = n + 1 t[n] = b end - bcsave_tail(fp, output, table.concat(t, ",", 1, n).."\n};\n") + bcsave_tail(fp, output, tconcat(t, ",", 1, n).."\n};\n") end local function bcsave_elfobj(ctx, output, s, ffi) @@ -199,12 +236,8 @@ typedef struct { } ELF64obj; ]] local symname = LJBC_PREFIX..ctx.modname - local is64, isbe = false, false - if ctx.arch == "x64" then - is64 = true - elseif ctx.arch == "ppc" or ctx.arch == "ppcspe" or ctx.arch == "mips" then - isbe = true - end + local ai = assert(map_arch[ctx.arch]) + local is64, isbe = ai.b == 64, ai.e == "be" -- Handle different host/target endianess. local function f32(x) return x end @@ -237,10 +270,8 @@ typedef struct { hdr.eendian = isbe and 2 or 1 hdr.eversion = 1 hdr.type = f16(1) - hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, ppcspe=20, mips=8, mipsel=8 })[ctx.arch]) - if ctx.arch == "mips" or ctx.arch == "mipsel" then - hdr.flags = f32(0x50001006) - end + hdr.machine = f16(ai.m) + hdr.flags = f32(ai.f or 0) hdr.version = f32(1) hdr.shofs = fofs(ffi.offsetof(o, "sect")) hdr.ehsize = f16(ffi.sizeof(hdr)) @@ -336,12 +367,8 @@ typedef struct { } PEobj; ]] local symname = LJBC_PREFIX..ctx.modname - local is64 = false - if ctx.arch == "x86" then - symname = "_"..symname - elseif ctx.arch == "x64" then - is64 = true - end + local ai = assert(map_arch[ctx.arch]) + local is64 = ai.b == 64 local symexport = " /EXPORT:"..symname..",DATA " -- The file format is always little-endian. Swap if the host is big-endian. @@ -355,7 +382,7 @@ typedef struct { -- Create PE object and fill in header. local o = ffi.new("PEobj") local hdr = o.hdr - hdr.arch = f16(({ x86=0x14c, x64=0x8664, arm=0x1c0, ppc=0x1f2, mips=0x366, mipsel=0x366 })[ctx.arch]) + hdr.arch = f16(assert(ai.p)) hdr.nsects = f16(2) hdr.symtabofs = f32(ffi.offsetof(o, "sym0")) hdr.nsyms = f32(6) @@ -442,18 +469,18 @@ typedef struct { uint32_t value; } mach_nlist; typedef struct { - uint32_t strx; + int32_t strx; uint8_t type, sect; uint16_t desc; uint64_t value; } mach_nlist_64; typedef struct { - uint32_t magic, nfat_arch; + int32_t magic, nfat_arch; } mach_fat_header; typedef struct { - uint32_t cputype, cpusubtype, offset, size, align; + int32_t cputype, cpusubtype, offset, size, align; } mach_fat_arch; typedef struct { struct { @@ -477,16 +504,28 @@ typedef struct { } mach_obj_64; typedef struct { mach_fat_header fat; - mach_fat_arch fat_arch[4]; + mach_fat_arch fat_arch[2]; struct { mach_header hdr; mach_segment_command seg; mach_section sec; mach_symtab_command sym; - } arch[4]; + } arch[2]; mach_nlist sym_entry; uint8_t space[4096]; } mach_fat_obj; +typedef struct { + mach_fat_header fat; + mach_fat_arch fat_arch[2]; + struct { + mach_header_64 hdr; + mach_segment_command_64 seg; + mach_section_64 sec; + mach_symtab_command sym; + } arch[2]; + mach_nlist_64 sym_entry; + uint8_t space[4096]; +} mach_fat_obj_64; ]] local symname = '_'..LJBC_PREFIX..ctx.modname local isfat, is64, align, mobj = false, false, 4, "mach_obj" @@ -494,6 +533,8 @@ typedef struct { is64, align, mobj = true, 8, "mach_obj_64" elseif ctx.arch == "arm" then isfat, mobj = true, "mach_fat_obj" + elseif ctx.arch == "arm64" then + is64, align, isfat, mobj = true, 8, true, "mach_fat_obj_64" else check(ctx.arch == "x86", "unsupported architecture for OSX") end @@ -503,8 +544,8 @@ typedef struct { -- Create Mach-O object and fill in header. local o = ffi.new(mobj) local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align) - local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12,12,12} })[ctx.arch] - local cpusubtype = ({ x86={3}, x64={3}, arm={3,6,9,11} })[ctx.arch] + local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12}, arm64={0x01000007,0x0100000c} })[ctx.arch] + local cpusubtype = ({ x86={3}, x64={3}, arm={3,9}, arm64={3,0} })[ctx.arch] if isfat then o.fat.magic = be32(0xcafebabe) o.fat.nfat_arch = be32(#cpusubtype) @@ -562,6 +603,9 @@ end local function bcsave_obj(ctx, output, s) local ok, ffi = pcall(require, "ffi") check(ok, "FFI library required to write this file type") + if output == "-" and jit.os == "Windows" then + set_stdout_binary(ffi) + end if ctx.os == "windows" then return bcsave_peobj(ctx, output, s, ffi) elseif ctx.os == "osx" then @@ -573,13 +617,13 @@ end ------------------------------------------------------------------------------ -local function bclist(input, output) - local f = readfile(input) +local function bclist(ctx, input, output) + local f = readfile(ctx, input) require("jit.bc").dump(f, savefile(output, "w"), true) end local function bcsave(ctx, input, output) - local f = readfile(input) + local f = readfile(ctx, input) local s = string.dump(f, ctx.strip) local t = ctx.type if not t then @@ -603,16 +647,16 @@ local function docmd(...) local n = 1 local list = false local ctx = { - strip = true, arch = jit.arch, os = string.lower(jit.os), + strip = true, arch = jit.arch, os = jit.os:lower(), type = false, modname = false, } while n <= #arg do local a = arg[n] - if type(a) == "string" and string.sub(a, 1, 1) == "-" and a ~= "-" then - table.remove(arg, n) + if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then + tremove(arg, n) if a == "--" then break end for m=2,#a do - local opt = string.sub(a, m, m) + local opt = a:sub(m, m) if opt == "l" then list = true elseif opt == "s" then @@ -625,13 +669,15 @@ local function docmd(...) if n ~= 1 then usage() end arg[1] = check(loadstring(arg[1])) elseif opt == "n" then - ctx.modname = checkmodname(table.remove(arg, n)) + ctx.modname = checkmodname(tremove(arg, n)) elseif opt == "t" then - ctx.type = checkarg(table.remove(arg, n), map_type, "file type") + ctx.type = checkarg(tremove(arg, n), map_type, "file type") elseif opt == "a" then - ctx.arch = checkarg(table.remove(arg, n), map_arch, "architecture") + ctx.arch = checkarg(tremove(arg, n), map_arch, "architecture") elseif opt == "o" then - ctx.os = checkarg(table.remove(arg, n), map_os, "OS name") + ctx.os = checkarg(tremove(arg, n), map_os, "OS name") + elseif opt == "F" then + ctx.filename = "@"..tremove(arg, n) else usage() end @@ -643,7 +689,7 @@ local function docmd(...) end if list then if #arg == 0 or #arg > 2 then usage() end - bclist(arg[1], arg[2] or "-") + bclist(ctx, arg[1], arg[2] or "-") else if #arg ~= 2 then usage() end bcsave(ctx, arg[1], arg[2]) @@ -653,7 +699,7 @@ end ------------------------------------------------------------------------------ -- Public module functions. -module(...) - -start = docmd -- Process -b command line option. +return { + start = docmd -- Process -b command line option. +} diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua index d572a5c3..18ab68df 100644 --- a/src/jit/dis_arm.lua +++ b/src/jit/dis_arm.lua @@ -658,7 +658,7 @@ local function disass_block(ctx, ofs, len) end -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). -local function create_(code, addr, out) +local function create(code, addr, out) local ctx = {} ctx.code = code ctx.addr = addr or 0 @@ -670,20 +670,20 @@ local function create_(code, addr, out) end -- Simple API: disassemble code (a string) at address and output via out. -local function disass_(code, addr, out) - create_(code, addr, out):disass() +local function disass(code, addr, out) + create(code, addr, out):disass() end -- Return register name for RID. -local function regname_(r) +local function regname(r) if r < 16 then return map_gpr[r] end return "d"..(r-16) end -- Public module functions. -module(...) - -create = create_ -disass = disass_ -regname = regname_ +return { + create = create, + disass = disass, + regname = regname +} diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua new file mode 100644 index 00000000..531584a1 --- /dev/null +++ b/src/jit/dis_arm64.lua @@ -0,0 +1,1216 @@ +---------------------------------------------------------------------------- +-- LuaJIT ARM64 disassembler module. +-- +-- Copyright (C) 2005-2022 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +-- +-- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. +-- Sponsored by Cisco Systems, Inc. +---------------------------------------------------------------------------- +-- This is a helper module used by the LuaJIT machine code dumper module. +-- +-- It disassembles most user-mode AArch64 instructions. +-- NYI: Advanced SIMD and VFP instructions. +------------------------------------------------------------------------------ + +local type = type +local sub, byte, format = string.sub, string.byte, string.format +local match, gmatch, gsub = string.match, string.gmatch, string.gsub +local concat = table.concat +local bit = require("bit") +local band, bor, bxor, tohex = bit.band, bit.bor, bit.bxor, bit.tohex +local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift +local ror = bit.ror + +------------------------------------------------------------------------------ +-- Opcode maps +------------------------------------------------------------------------------ + +local map_adr = { -- PC-relative addressing. + shift = 31, mask = 1, + [0] = "adrDBx", "adrpDBx" +} + +local map_addsubi = { -- Add/subtract immediate. + shift = 29, mask = 3, + [0] = "add|movDNIg", "adds|cmnD0NIg", "subDNIg", "subs|cmpD0NIg", +} + +local map_logi = { -- Logical immediate. + shift = 31, mask = 1, + [0] = { + shift = 22, mask = 1, + [0] = { + shift = 29, mask = 3, + [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig" + }, + false -- unallocated + }, + { + shift = 29, mask = 3, + [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig" + } +} + +local map_movwi = { -- Move wide immediate. + shift = 31, mask = 1, + [0] = { + shift = 22, mask = 1, + [0] = { + shift = 29, mask = 3, + [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg" + }, false -- unallocated + }, + { + shift = 29, mask = 3, + [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg" + }, +} + +local map_bitf = { -- Bitfield. + shift = 31, mask = 1, + [0] = { + shift = 22, mask = 1, + [0] = { + shift = 29, mask = 3, + [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12w", + "bfm|bfi|bfxilDN13w", + "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12w" + } + }, + { + shift = 22, mask = 1, + { + shift = 29, mask = 3, + [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12x", + "bfm|bfi|bfxilDN13x", + "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12x" + } + } +} + +local map_datai = { -- Data processing - immediate. + shift = 23, mask = 7, + [0] = map_adr, map_adr, map_addsubi, false, + map_logi, map_movwi, map_bitf, + { + shift = 15, mask = 0x1c0c1, + [0] = "extr|rorDNM4w", [0x10080] = "extr|rorDNM4x", + [0x10081] = "extr|rorDNM4x" + } +} + +local map_logsr = { -- Logical, shifted register. + shift = 31, mask = 1, + [0] = { + shift = 15, mask = 1, + [0] = { + shift = 29, mask = 3, + [0] = { + shift = 21, mask = 7, + [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", + "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" + }, + { + shift = 21, mask = 7, + [0] ="orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", + "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" + }, + { + shift = 21, mask = 7, + [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", + "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" + }, + { + shift = 21, mask = 7, + [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", + "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" + } + }, + false -- unallocated + }, + { + shift = 29, mask = 3, + [0] = { + shift = 21, mask = 7, + [0] = "andDNMSg", "bicDNMSg", "andDNMSg", "bicDNMSg", + "andDNMSg", "bicDNMSg", "andDNMg", "bicDNMg" + }, + { + shift = 21, mask = 7, + [0] = "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0MSg", "orn|mvnDN0MSg", + "orr|movDN0MSg", "orn|mvnDN0MSg", "orr|movDN0Mg", "orn|mvnDN0Mg" + }, + { + shift = 21, mask = 7, + [0] = "eorDNMSg", "eonDNMSg", "eorDNMSg", "eonDNMSg", + "eorDNMSg", "eonDNMSg", "eorDNMg", "eonDNMg" + }, + { + shift = 21, mask = 7, + [0] = "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMSg", "bicsDNMSg", + "ands|tstD0NMSg", "bicsDNMSg", "ands|tstD0NMg", "bicsDNMg" + } + } +} + +local map_assh = { + shift = 31, mask = 1, + [0] = { + shift = 15, mask = 1, + [0] = { + shift = 29, mask = 3, + [0] = { + shift = 22, mask = 3, + [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg" + }, + { + shift = 22, mask = 3, + [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", + "adds|cmnD0NMSg", "adds|cmnD0NMg" + }, + { + shift = 22, mask = 3, + [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg" + }, + { + shift = 22, mask = 3, + [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg", + "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg" + }, + }, + false -- unallocated + }, + { + shift = 29, mask = 3, + [0] = { + shift = 22, mask = 3, + [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg" + }, + { + shift = 22, mask = 3, + [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", "adds|cmnD0NMSg", + "adds|cmnD0NMg" + }, + { + shift = 22, mask = 3, + [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg" + }, + { + shift = 22, mask = 3, + [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg", + "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg" + } + } +} + +local map_addsubsh = { -- Add/subtract, shifted register. + shift = 22, mask = 3, + [0] = map_assh, map_assh, map_assh +} + +local map_addsubex = { -- Add/subtract, extended register. + shift = 22, mask = 3, + [0] = { + shift = 29, mask = 3, + [0] = "addDNMXg", "adds|cmnD0NMXg", "subDNMXg", "subs|cmpD0NMzXg", + } +} + +local map_addsubc = { -- Add/subtract, with carry. + shift = 10, mask = 63, + [0] = { + shift = 29, mask = 3, + [0] = "adcDNMg", "adcsDNMg", "sbc|ngcDN0Mg", "sbcs|ngcsDN0Mg", + } +} + +local map_ccomp = { + shift = 4, mask = 1, + [0] = { + shift = 10, mask = 3, + [0] = { -- Conditional compare register. + shift = 29, mask = 3, + "ccmnNMVCg", false, "ccmpNMVCg", + }, + [2] = { -- Conditional compare immediate. + shift = 29, mask = 3, + "ccmnN5VCg", false, "ccmpN5VCg", + } + } +} + +local map_csel = { -- Conditional select. + shift = 11, mask = 1, + [0] = { + shift = 10, mask = 1, + [0] = { + shift = 29, mask = 3, + [0] = "cselDNMzCg", false, "csinv|cinv|csetmDNMcg", false, + }, + { + shift = 29, mask = 3, + [0] = "csinc|cinc|csetDNMcg", false, "csneg|cnegDNMcg", false, + } + } +} + +local map_data1s = { -- Data processing, 1 source. + shift = 29, mask = 1, + [0] = { + shift = 31, mask = 1, + [0] = { + shift = 10, mask = 0x7ff, + [0] = "rbitDNg", "rev16DNg", "revDNw", false, "clzDNg", "clsDNg" + }, + { + shift = 10, mask = 0x7ff, + [0] = "rbitDNg", "rev16DNg", "rev32DNx", "revDNx", "clzDNg", "clsDNg" + } + } +} + +local map_data2s = { -- Data processing, 2 sources. + shift = 29, mask = 1, + [0] = { + shift = 10, mask = 63, + false, "udivDNMg", "sdivDNMg", false, false, false, false, "lslDNMg", + "lsrDNMg", "asrDNMg", "rorDNMg" + } +} + +local map_data3s = { -- Data processing, 3 sources. + shift = 29, mask = 7, + [0] = { + shift = 21, mask = 7, + [0] = { + shift = 15, mask = 1, + [0] = "madd|mulDNMA0g", "msub|mnegDNMA0g" + } + }, false, false, false, + { + shift = 15, mask = 1, + [0] = { + shift = 21, mask = 7, + [0] = "madd|mulDNMA0g", "smaddl|smullDxNMwA0x", "smulhDNMx", false, + false, "umaddl|umullDxNMwA0x", "umulhDNMx" + }, + { + shift = 21, mask = 7, + [0] = "msub|mnegDNMA0g", "smsubl|smneglDxNMwA0x", false, false, + false, "umsubl|umneglDxNMwA0x" + } + } +} + +local map_datar = { -- Data processing, register. + shift = 28, mask = 1, + [0] = { + shift = 24, mask = 1, + [0] = map_logsr, + { + shift = 21, mask = 1, + [0] = map_addsubsh, map_addsubex + } + }, + { + shift = 21, mask = 15, + [0] = map_addsubc, false, map_ccomp, false, map_csel, false, + { + shift = 30, mask = 1, + [0] = map_data2s, map_data1s + }, + false, map_data3s, map_data3s, map_data3s, map_data3s, map_data3s, + map_data3s, map_data3s, map_data3s + } +} + +local map_lrl = { -- Load register, literal. + shift = 26, mask = 1, + [0] = { + shift = 30, mask = 3, + [0] = "ldrDwB", "ldrDxB", "ldrswDxB" + }, + { + shift = 30, mask = 3, + [0] = "ldrDsB", "ldrDdB" + } +} + +local map_lsriind = { -- Load/store register, immediate pre/post-indexed. + shift = 30, mask = 3, + [0] = { + shift = 26, mask = 1, + [0] = { + shift = 22, mask = 3, + [0] = "strbDwzL", "ldrbDwzL", "ldrsbDxzL", "ldrsbDwzL" + } + }, + { + shift = 26, mask = 1, + [0] = { + shift = 22, mask = 3, + [0] = "strhDwzL", "ldrhDwzL", "ldrshDxzL", "ldrshDwzL" + } + }, + { + shift = 26, mask = 1, + [0] = { + shift = 22, mask = 3, + [0] = "strDwzL", "ldrDwzL", "ldrswDxzL" + }, + { + shift = 22, mask = 3, + [0] = "strDszL", "ldrDszL" + } + }, + { + shift = 26, mask = 1, + [0] = { + shift = 22, mask = 3, + [0] = "strDxzL", "ldrDxzL" + }, + { + shift = 22, mask = 3, + [0] = "strDdzL", "ldrDdzL" + } + } +} + +local map_lsriro = { + shift = 21, mask = 1, + [0] = { -- Load/store register immediate. + shift = 10, mask = 3, + [0] = { -- Unscaled immediate. + shift = 26, mask = 1, + [0] = { + shift = 30, mask = 3, + [0] = { + shift = 22, mask = 3, + [0] = "sturbDwK", "ldurbDwK" + }, + { + shift = 22, mask = 3, + [0] = "sturhDwK", "ldurhDwK" + }, + { + shift = 22, mask = 3, + [0] = "sturDwK", "ldurDwK" + }, + { + shift = 22, mask = 3, + [0] = "sturDxK", "ldurDxK" + } + } + }, map_lsriind, false, map_lsriind + }, + { -- Load/store register, register offset. + shift = 10, mask = 3, + [2] = { + shift = 26, mask = 1, + [0] = { + shift = 30, mask = 3, + [0] = { + shift = 22, mask = 3, + [0] = "strbDwO", "ldrbDwO", "ldrsbDxO", "ldrsbDwO" + }, + { + shift = 22, mask = 3, + [0] = "strhDwO", "ldrhDwO", "ldrshDxO", "ldrshDwO" + }, + { + shift = 22, mask = 3, + [0] = "strDwO", "ldrDwO", "ldrswDxO" + }, + { + shift = 22, mask = 3, + [0] = "strDxO", "ldrDxO" + } + }, + { + shift = 30, mask = 3, + [2] = { + shift = 22, mask = 3, + [0] = "strDsO", "ldrDsO" + }, + [3] = { + shift = 22, mask = 3, + [0] = "strDdO", "ldrDdO" + } + } + } + } +} + +local map_lsp = { -- Load/store register pair, offset. + shift = 22, mask = 1, + [0] = { + shift = 30, mask = 3, + [0] = { + shift = 26, mask = 1, + [0] = "stpDzAzwP", "stpDzAzsP", + }, + { + shift = 26, mask = 1, + "stpDzAzdP" + }, + { + shift = 26, mask = 1, + [0] = "stpDzAzxP" + } + }, + { + shift = 30, mask = 3, + [0] = { + shift = 26, mask = 1, + [0] = "ldpDzAzwP", "ldpDzAzsP", + }, + { + shift = 26, mask = 1, + [0] = "ldpswDAxP", "ldpDzAzdP" + }, + { + shift = 26, mask = 1, + [0] = "ldpDzAzxP" + } + } +} + +local map_ls = { -- Loads and stores. + shift = 24, mask = 0x31, + [0x10] = map_lrl, [0x30] = map_lsriro, + [0x20] = { + shift = 23, mask = 3, + map_lsp, map_lsp, map_lsp + }, + [0x21] = { + shift = 23, mask = 3, + map_lsp, map_lsp, map_lsp + }, + [0x31] = { + shift = 26, mask = 1, + [0] = { + shift = 30, mask = 3, + [0] = { + shift = 22, mask = 3, + [0] = "strbDwzU", "ldrbDwzU" + }, + { + shift = 22, mask = 3, + [0] = "strhDwzU", "ldrhDwzU" + }, + { + shift = 22, mask = 3, + [0] = "strDwzU", "ldrDwzU" + }, + { + shift = 22, mask = 3, + [0] = "strDxzU", "ldrDxzU" + } + }, + { + shift = 30, mask = 3, + [2] = { + shift = 22, mask = 3, + [0] = "strDszU", "ldrDszU" + }, + [3] = { + shift = 22, mask = 3, + [0] = "strDdzU", "ldrDdzU" + } + } + }, +} + +local map_datafp = { -- Data processing, SIMD and FP. + shift = 28, mask = 7, + { -- 001 + shift = 24, mask = 1, + [0] = { + shift = 21, mask = 1, + { + shift = 10, mask = 3, + [0] = { + shift = 12, mask = 1, + [0] = { + shift = 13, mask = 1, + [0] = { + shift = 14, mask = 1, + [0] = { + shift = 15, mask = 1, + [0] = { -- FP/int conversion. + shift = 31, mask = 1, + [0] = { + shift = 16, mask = 0xff, + [0x20] = "fcvtnsDwNs", [0x21] = "fcvtnuDwNs", + [0x22] = "scvtfDsNw", [0x23] = "ucvtfDsNw", + [0x24] = "fcvtasDwNs", [0x25] = "fcvtauDwNs", + [0x26] = "fmovDwNs", [0x27] = "fmovDsNw", + [0x28] = "fcvtpsDwNs", [0x29] = "fcvtpuDwNs", + [0x30] = "fcvtmsDwNs", [0x31] = "fcvtmuDwNs", + [0x38] = "fcvtzsDwNs", [0x39] = "fcvtzuDwNs", + [0x60] = "fcvtnsDwNd", [0x61] = "fcvtnuDwNd", + [0x62] = "scvtfDdNw", [0x63] = "ucvtfDdNw", + [0x64] = "fcvtasDwNd", [0x65] = "fcvtauDwNd", + [0x68] = "fcvtpsDwNd", [0x69] = "fcvtpuDwNd", + [0x70] = "fcvtmsDwNd", [0x71] = "fcvtmuDwNd", + [0x78] = "fcvtzsDwNd", [0x79] = "fcvtzuDwNd" + }, + { + shift = 16, mask = 0xff, + [0x20] = "fcvtnsDxNs", [0x21] = "fcvtnuDxNs", + [0x22] = "scvtfDsNx", [0x23] = "ucvtfDsNx", + [0x24] = "fcvtasDxNs", [0x25] = "fcvtauDxNs", + [0x28] = "fcvtpsDxNs", [0x29] = "fcvtpuDxNs", + [0x30] = "fcvtmsDxNs", [0x31] = "fcvtmuDxNs", + [0x38] = "fcvtzsDxNs", [0x39] = "fcvtzuDxNs", + [0x60] = "fcvtnsDxNd", [0x61] = "fcvtnuDxNd", + [0x62] = "scvtfDdNx", [0x63] = "ucvtfDdNx", + [0x64] = "fcvtasDxNd", [0x65] = "fcvtauDxNd", + [0x66] = "fmovDxNd", [0x67] = "fmovDdNx", + [0x68] = "fcvtpsDxNd", [0x69] = "fcvtpuDxNd", + [0x70] = "fcvtmsDxNd", [0x71] = "fcvtmuDxNd", + [0x78] = "fcvtzsDxNd", [0x79] = "fcvtzuDxNd" + } + } + }, + { -- FP data-processing, 1 source. + shift = 31, mask = 1, + [0] = { + shift = 22, mask = 3, + [0] = { + shift = 15, mask = 63, + [0] = "fmovDNf", "fabsDNf", "fnegDNf", + "fsqrtDNf", false, "fcvtDdNs", false, false, + "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf", + "frintaDNf", false, "frintxDNf", "frintiDNf", + }, + { + shift = 15, mask = 63, + [0] = "fmovDNf", "fabsDNf", "fnegDNf", + "fsqrtDNf", "fcvtDsNd", false, false, false, + "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf", + "frintaDNf", false, "frintxDNf", "frintiDNf", + } + } + } + }, + { -- FP compare. + shift = 31, mask = 1, + [0] = { + shift = 14, mask = 3, + [0] = { + shift = 23, mask = 1, + [0] = { + shift = 0, mask = 31, + [0] = "fcmpNMf", [8] = "fcmpNZf", + [16] = "fcmpeNMf", [24] = "fcmpeNZf", + } + } + } + } + }, + { -- FP immediate. + shift = 31, mask = 1, + [0] = { + shift = 5, mask = 31, + [0] = { + shift = 23, mask = 1, + [0] = "fmovDFf" + } + } + } + }, + { -- FP conditional compare. + shift = 31, mask = 1, + [0] = { + shift = 23, mask = 1, + [0] = { + shift = 4, mask = 1, + [0] = "fccmpNMVCf", "fccmpeNMVCf" + } + } + }, + { -- FP data-processing, 2 sources. + shift = 31, mask = 1, + [0] = { + shift = 23, mask = 1, + [0] = { + shift = 12, mask = 15, + [0] = "fmulDNMf", "fdivDNMf", "faddDNMf", "fsubDNMf", + "fmaxDNMf", "fminDNMf", "fmaxnmDNMf", "fminnmDNMf", + "fnmulDNMf" + } + } + }, + { -- FP conditional select. + shift = 31, mask = 1, + [0] = { + shift = 23, mask = 1, + [0] = "fcselDNMCf" + } + } + } + }, + { -- FP data-processing, 3 sources. + shift = 31, mask = 1, + [0] = { + shift = 15, mask = 1, + [0] = { + shift = 21, mask = 5, + [0] = "fmaddDNMAf", "fnmaddDNMAf" + }, + { + shift = 21, mask = 5, + [0] = "fmsubDNMAf", "fnmsubDNMAf" + } + } + } + } +} + +local map_br = { -- Branches, exception generating and system instructions. + shift = 29, mask = 7, + [0] = "bB", + { -- Compare & branch, immediate. + shift = 24, mask = 3, + [0] = "cbzDBg", "cbnzDBg", "tbzDTBw", "tbnzDTBw" + }, + { -- Conditional branch, immediate. + shift = 24, mask = 3, + [0] = { + shift = 4, mask = 1, + [0] = { + shift = 0, mask = 15, + [0] = "beqB", "bneB", "bhsB", "bloB", "bmiB", "bplB", "bvsB", "bvcB", + "bhiB", "blsB", "bgeB", "bltB", "bgtB", "bleB", "balB" + } + } + }, false, "blB", + { -- Compare & branch, immediate. + shift = 24, mask = 3, + [0] = "cbzDBg", "cbnzDBg", "tbzDTBx", "tbnzDTBx" + }, + { + shift = 24, mask = 3, + [0] = { -- Exception generation. + shift = 0, mask = 0xe0001f, + [0x200000] = "brkW" + }, + { -- System instructions. + shift = 0, mask = 0x3fffff, + [0x03201f] = "nop" + }, + { -- Unconditional branch, register. + shift = 0, mask = 0xfffc1f, + [0x1f0000] = "brNx", [0x3f0000] = "blrNx", + [0x5f0000] = "retNx" + }, + } +} + +local map_init = { + shift = 25, mask = 15, + [0] = false, false, false, false, map_ls, map_datar, map_ls, map_datafp, + map_datai, map_datai, map_br, map_br, map_ls, map_datar, map_ls, map_datafp +} + +------------------------------------------------------------------------------ + +local map_regs = { x = {}, w = {}, d = {}, s = {} } + +for i=0,30 do + map_regs.x[i] = "x"..i + map_regs.w[i] = "w"..i + map_regs.d[i] = "d"..i + map_regs.s[i] = "s"..i +end +map_regs.x[31] = "sp" +map_regs.w[31] = "wsp" +map_regs.d[31] = "d31" +map_regs.s[31] = "s31" + +local map_cond = { + [0] = "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", + "hi", "ls", "ge", "lt", "gt", "le", "al", +} + +local map_shift = { [0] = "lsl", "lsr", "asr", } + +local map_extend = { + [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx", +} + +------------------------------------------------------------------------------ + +-- Output a nicely formatted line with an opcode and operands. +local function putop(ctx, text, operands) + local pos = ctx.pos + local extra = "" + if ctx.rel then + local sym = ctx.symtab[ctx.rel] + if sym then + extra = "\t->"..sym + end + end + if ctx.hexdump > 0 then + ctx.out(format("%08x %s %-5s %s%s\n", + ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra)) + else + ctx.out(format("%08x %-5s %s%s\n", + ctx.addr+pos, text, concat(operands, ", "), extra)) + end + ctx.pos = pos + 4 +end + +-- Fallback for unknown opcodes. +local function unknown(ctx) + return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) +end + +local function match_reg(p, pat, regnum) + return map_regs[match(pat, p.."%w-([xwds])")][regnum] +end + +local function fmt_hex32(x) + if x < 0 then + return tohex(x) + else + return format("%x", x) + end +end + +local imm13_rep = { 0x55555555, 0x11111111, 0x01010101, 0x00010001, 0x00000001 } + +local function decode_imm13(op) + local imms = band(rshift(op, 10), 63) + local immr = band(rshift(op, 16), 63) + if band(op, 0x00400000) == 0 then + local len = 5 + if imms >= 56 then + if imms >= 60 then len = 1 else len = 2 end + elseif imms >= 48 then len = 3 elseif imms >= 32 then len = 4 end + local l = lshift(1, len)-1 + local s = band(imms, l) + local r = band(immr, l) + local imm = ror(rshift(-1, 31-s), r) + if len ~= 5 then imm = band(imm, lshift(1, l)-1) + rshift(imm, 31-l) end + imm = imm * imm13_rep[len] + local ix = fmt_hex32(imm) + if rshift(op, 31) ~= 0 then + return ix..tohex(imm) + else + return ix + end + else + local lo, hi = -1, 0 + if imms < 32 then lo = rshift(-1, 31-imms) else hi = rshift(-1, 63-imms) end + if immr ~= 0 then + lo, hi = ror(lo, immr), ror(hi, immr) + local x = immr == 32 and 0 or band(bxor(lo, hi), lshift(-1, 32-immr)) + lo, hi = bxor(lo, x), bxor(hi, x) + if immr >= 32 then lo, hi = hi, lo end + end + if hi ~= 0 then + return fmt_hex32(hi)..tohex(lo) + else + return fmt_hex32(lo) + end + end +end + +local function parse_immpc(op, name) + if name == "b" or name == "bl" then + return arshift(lshift(op, 6), 4) + elseif name == "adr" or name == "adrp" then + local immlo = band(rshift(op, 29), 3) + local immhi = lshift(arshift(lshift(op, 8), 13), 2) + return bor(immhi, immlo) + elseif name == "tbz" or name == "tbnz" then + return lshift(arshift(lshift(op, 13), 18), 2) + else + return lshift(arshift(lshift(op, 8), 13), 2) + end +end + +local function parse_fpimm8(op) + local sign = band(op, 0x100000) == 0 and 1 or -1 + local exp = bxor(rshift(arshift(lshift(op, 12), 5), 24), 0x80) - 131 + local frac = 16+band(rshift(op, 13), 15) + return sign * frac * 2^exp +end + +local function prefer_bfx(sf, uns, imms, immr) + if imms < immr or imms == 31 or imms == 63 then + return false + end + if immr == 0 then + if sf == 0 and (imms == 7 or imms == 15) then + return false + end + if sf ~= 0 and uns == 0 and (imms == 7 or imms == 15 or imms == 31) then + return false + end + end + return true +end + +-- Disassemble a single instruction. +local function disass_ins(ctx) + local pos = ctx.pos + local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) + local op = bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) + local operands = {} + local suffix = "" + local last, name, pat + local map_reg + ctx.op = op + ctx.rel = nil + last = nil + local opat + opat = map_init[band(rshift(op, 25), 15)] + while type(opat) ~= "string" do + if not opat then return unknown(ctx) end + opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ + end + name, pat = match(opat, "^([a-z0-9]*)(.*)") + local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") + if altname then pat = pat2 end + if sub(pat, 1, 1) == "." then + local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)") + suffix = suffix..s2 + pat = p2 + end + + local rt = match(pat, "[gf]") + if rt then + if rt == "g" then + map_reg = band(op, 0x80000000) ~= 0 and map_regs.x or map_regs.w + else + map_reg = band(op, 0x400000) ~= 0 and map_regs.d or map_regs.s + end + end + + local second0, immr + + for p in gmatch(pat, ".") do + local x = nil + if p == "D" then + local regnum = band(op, 31) + x = rt and map_reg[regnum] or match_reg(p, pat, regnum) + elseif p == "N" then + local regnum = band(rshift(op, 5), 31) + x = rt and map_reg[regnum] or match_reg(p, pat, regnum) + elseif p == "M" then + local regnum = band(rshift(op, 16), 31) + x = rt and map_reg[regnum] or match_reg(p, pat, regnum) + elseif p == "A" then + local regnum = band(rshift(op, 10), 31) + x = rt and map_reg[regnum] or match_reg(p, pat, regnum) + elseif p == "B" then + local addr = ctx.addr + pos + parse_immpc(op, name) + ctx.rel = addr + x = "0x"..tohex(addr) + elseif p == "T" then + x = bor(band(rshift(op, 26), 32), band(rshift(op, 19), 31)) + elseif p == "V" then + x = band(op, 15) + elseif p == "C" then + x = map_cond[band(rshift(op, 12), 15)] + elseif p == "c" then + local rn = band(rshift(op, 5), 31) + local rm = band(rshift(op, 16), 31) + local cond = band(rshift(op, 12), 15) + local invc = bxor(cond, 1) + x = map_cond[cond] + if altname and cond ~= 14 and cond ~= 15 then + local a1, a2 = match(altname, "([^|]*)|(.*)") + if rn == rm then + local n = #operands + operands[n] = nil + x = map_cond[invc] + if rn ~= 31 then + if a1 then name = a1 else name = altname end + else + operands[n-1] = nil + name = a2 + end + end + end + elseif p == "W" then + x = band(rshift(op, 5), 0xffff) + elseif p == "Y" then + x = band(rshift(op, 5), 0xffff) + local hw = band(rshift(op, 21), 3) + if altname and (hw == 0 or x ~= 0) then + name = altname + end + elseif p == "L" then + local rn = map_regs.x[band(rshift(op, 5), 31)] + local imm9 = arshift(lshift(op, 11), 23) + if band(op, 0x800) ~= 0 then + x = "["..rn..", #"..imm9.."]!" + else + x = "["..rn.."], #"..imm9 + end + elseif p == "U" then + local rn = map_regs.x[band(rshift(op, 5), 31)] + local sz = band(rshift(op, 30), 3) + local imm12 = lshift(arshift(lshift(op, 10), 20), sz) + if imm12 ~= 0 then + x = "["..rn..", #"..imm12.."]" + else + x = "["..rn.."]" + end + elseif p == "K" then + local rn = map_regs.x[band(rshift(op, 5), 31)] + local imm9 = arshift(lshift(op, 11), 23) + if imm9 ~= 0 then + x = "["..rn..", #"..imm9.."]" + else + x = "["..rn.."]" + end + elseif p == "O" then + local rn, rm = map_regs.x[band(rshift(op, 5), 31)] + local m = band(rshift(op, 13), 1) + if m == 0 then + rm = map_regs.w[band(rshift(op, 16), 31)] + else + rm = map_regs.x[band(rshift(op, 16), 31)] + end + x = "["..rn..", "..rm + local opt = band(rshift(op, 13), 7) + local s = band(rshift(op, 12), 1) + local sz = band(rshift(op, 30), 3) + -- extension to be applied + if opt == 3 then + if s == 0 then x = x.."]" + else x = x..", lsl #"..sz.."]" end + elseif opt == 2 or opt == 6 or opt == 7 then + if s == 0 then x = x..", "..map_extend[opt].."]" + else x = x..", "..map_extend[opt].." #"..sz.."]" end + else + x = x.."]" + end + elseif p == "P" then + local opcv, sh = rshift(op, 26), 2 + if opcv >= 0x2a then sh = 4 elseif opcv >= 0x1b then sh = 3 end + local imm7 = lshift(arshift(lshift(op, 10), 25), sh) + local rn = map_regs.x[band(rshift(op, 5), 31)] + local ind = band(rshift(op, 23), 3) + if ind == 1 then + x = "["..rn.."], #"..imm7 + elseif ind == 2 then + if imm7 == 0 then + x = "["..rn.."]" + else + x = "["..rn..", #"..imm7.."]" + end + elseif ind == 3 then + x = "["..rn..", #"..imm7.."]!" + end + elseif p == "I" then + local shf = band(rshift(op, 22), 3) + local imm12 = band(rshift(op, 10), 0x0fff) + local rn, rd = band(rshift(op, 5), 31), band(op, 31) + if altname == "mov" and shf == 0 and imm12 == 0 and (rn == 31 or rd == 31) then + name = altname + x = nil + elseif shf == 0 then + x = imm12 + elseif shf == 1 then + x = imm12..", lsl #12" + end + elseif p == "i" then + x = "#0x"..decode_imm13(op) + elseif p == "1" then + immr = band(rshift(op, 16), 63) + x = immr + elseif p == "2" then + x = band(rshift(op, 10), 63) + if altname then + local a1, a2, a3, a4, a5, a6 = + match(altname, "([^|]*)|([^|]*)|([^|]*)|([^|]*)|([^|]*)|(.*)") + local sf = band(rshift(op, 26), 32) + local uns = band(rshift(op, 30), 1) + if prefer_bfx(sf, uns, x, immr) then + name = a2 + x = x - immr + 1 + elseif immr == 0 and x == 7 then + local n = #operands + operands[n] = nil + if sf ~= 0 then + operands[n-1] = gsub(operands[n-1], "x", "w") + end + last = operands[n-1] + name = a6 + x = nil + elseif immr == 0 and x == 15 then + local n = #operands + operands[n] = nil + if sf ~= 0 then + operands[n-1] = gsub(operands[n-1], "x", "w") + end + last = operands[n-1] + name = a5 + x = nil + elseif x == 31 or x == 63 then + if x == 31 and immr == 0 and name == "sbfm" then + name = a4 + local n = #operands + operands[n] = nil + if sf ~= 0 then + operands[n-1] = gsub(operands[n-1], "x", "w") + end + last = operands[n-1] + else + name = a3 + end + x = nil + elseif band(x, 31) ~= 31 and immr == x+1 and name == "ubfm" then + name = a4 + last = "#"..(sf+32 - immr) + operands[#operands] = last + x = nil + elseif x < immr then + name = a1 + last = "#"..(sf+32 - immr) + operands[#operands] = last + x = x + 1 + end + end + elseif p == "3" then + x = band(rshift(op, 10), 63) + if altname then + local a1, a2 = match(altname, "([^|]*)|(.*)") + if x < immr then + name = a1 + local sf = band(rshift(op, 26), 32) + last = "#"..(sf+32 - immr) + operands[#operands] = last + x = x + 1 + else + name = a2 + x = x - immr + 1 + end + end + elseif p == "4" then + x = band(rshift(op, 10), 63) + local rn = band(rshift(op, 5), 31) + local rm = band(rshift(op, 16), 31) + if altname and rn == rm then + local n = #operands + operands[n] = nil + last = operands[n-1] + name = altname + end + elseif p == "5" then + x = band(rshift(op, 16), 31) + elseif p == "S" then + x = band(rshift(op, 10), 63) + if x == 0 then x = nil + else x = map_shift[band(rshift(op, 22), 3)].." #"..x end + elseif p == "X" then + local opt = band(rshift(op, 13), 7) + -- Width specifier <R>. + if opt ~= 3 and opt ~= 7 then + last = map_regs.w[band(rshift(op, 16), 31)] + operands[#operands] = last + end + x = band(rshift(op, 10), 7) + -- Extension. + if opt == 2 + band(rshift(op, 31), 1) and + band(rshift(op, second0 and 5 or 0), 31) == 31 then + if x == 0 then x = nil + else x = "lsl #"..x end + else + if x == 0 then x = map_extend[band(rshift(op, 13), 7)] + else x = map_extend[band(rshift(op, 13), 7)].." #"..x end + end + elseif p == "R" then + x = band(rshift(op,21), 3) + if x == 0 then x = nil + else x = "lsl #"..x*16 end + elseif p == "z" then + local n = #operands + if operands[n] == "sp" then operands[n] = "xzr" + elseif operands[n] == "wsp" then operands[n] = "wzr" + end + elseif p == "Z" then + x = 0 + elseif p == "F" then + x = parse_fpimm8(op) + elseif p == "g" or p == "f" or p == "x" or p == "w" or + p == "d" or p == "s" then + -- These are handled in D/N/M/A. + elseif p == "0" then + if last == "sp" or last == "wsp" then + local n = #operands + operands[n] = nil + last = operands[n-1] + if altname then + local a1, a2 = match(altname, "([^|]*)|(.*)") + if not a1 then + name = altname + elseif second0 then + name, altname = a2, a1 + else + name, altname = a1, a2 + end + end + end + second0 = true + else + assert(false) + end + if x then + last = x + if type(x) == "number" then x = "#"..x end + operands[#operands+1] = x + end + end + + return putop(ctx, name..suffix, operands) +end + +------------------------------------------------------------------------------ + +-- Disassemble a block of code. +local function disass_block(ctx, ofs, len) + if not ofs then ofs = 0 end + local stop = len and ofs+len or #ctx.code + ctx.pos = ofs + ctx.rel = nil + while ctx.pos < stop do disass_ins(ctx) end +end + +-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). +local function create(code, addr, out) + local ctx = {} + ctx.code = code + ctx.addr = addr or 0 + ctx.out = out or io.write + ctx.symtab = {} + ctx.disass = disass_block + ctx.hexdump = 8 + return ctx +end + +-- Simple API: disassemble code (a string) at address and output via out. +local function disass(code, addr, out) + create(code, addr, out):disass() +end + +-- Return register name for RID. +local function regname(r) + if r < 32 then return map_regs.x[r] end + return map_regs.d[r-32] +end + +-- Public module functions. +return { + create = create, + disass = disass, + regname = regname +} + diff --git a/src/jit/dis_arm64be.lua b/src/jit/dis_arm64be.lua new file mode 100644 index 00000000..7337f5b7 --- /dev/null +++ b/src/jit/dis_arm64be.lua @@ -0,0 +1,12 @@ +---------------------------------------------------------------------------- +-- LuaJIT ARM64BE disassembler wrapper module. +-- +-- Copyright (C) 2005-2022 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- ARM64 instructions are always little-endian. So just forward to the +-- common ARM64 disassembler module. All the interesting stuff is there. +------------------------------------------------------------------------------ + +return require((string.match(..., ".*%.") or "").."dis_arm64") + diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua index ed65702a..05dc30fd 100644 --- a/src/jit/dis_mips.lua +++ b/src/jit/dis_mips.lua @@ -19,13 +19,34 @@ local band, bor, tohex = bit.band, bit.bor, bit.tohex local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift ------------------------------------------------------------------------------ --- Primary and extended opcode maps +-- Extended opcode maps common to all MIPS releases ------------------------------------------------------------------------------ -local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", } local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", } local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", } +local map_cop0 = { + shift = 25, mask = 1, + [0] = { + shift = 21, mask = 15, + [0] = "mfc0TDW", [4] = "mtc0TDW", + [10] = "rdpgprDT", + [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", }, + [14] = "wrpgprDT", + }, { + shift = 0, mask = 63, + [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp", + [24] = "eret", [31] = "deret", + [32] = "wait", + }, +} + +------------------------------------------------------------------------------ +-- Primary and extended opcode maps for MIPS R1-R5 +------------------------------------------------------------------------------ + +local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", } + local map_special = { shift = 0, mask = 63, [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, @@ -34,15 +55,17 @@ local map_special = { "jrS", "jalrD1S", "movzDST", "movnDST", "syscallY", "breakY", false, "sync", "mfhiD", "mthiS", "mfloD", "mtloS", - false, false, false, false, + "dsllvDST", false, "dsrlvDST", "dsravDST", "multST", "multuST", "divST", "divuST", - false, false, false, false, + "dmultST", "dmultuST", "ddivST", "ddivuST", "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", - "andDST", "orDST", "xorDST", "nor|notDST0", + "andDST", "or|moveDST0", "xorDST", "nor|notDST0", false, false, "sltDST", "sltuDST", - false, false, false, false, + "daddDST", "dadduDST", "dsubDST", "dsubuDST", "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", - "teqSTZ", false, "tneSTZ", + "teqSTZ", false, "tneSTZ", false, + "dsllDTA", false, "dsrlDTA", "dsraDTA", + "dsll32DTA", false, "dsrl32DTA", "dsra32DTA", } local map_special2 = { @@ -60,11 +83,17 @@ local map_bshfl = { [24] = "sehDT", } +local map_dbshfl = { + shift = 6, mask = 31, + [2] = "dsbhDT", + [5] = "dshdDT", +} + local map_special3 = { shift = 0, mask = 63, - [0] = "extTSAK", [4] = "insTSAL", - [32] = map_bshfl, - [59] = "rdhwrTD", + [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK", + [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL", + [32] = map_bshfl, [36] = map_dbshfl, [59] = "rdhwrTD", } local map_regimm = { @@ -79,22 +108,6 @@ local map_regimm = { false, false, false, "synciSO", } -local map_cop0 = { - shift = 25, mask = 1, - [0] = { - shift = 21, mask = 15, - [0] = "mfc0TDW", [4] = "mtc0TDW", - [10] = "rdpgprDT", - [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", }, - [14] = "wrpgprDT", - }, { - shift = 0, mask = 63, - [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp", - [24] = "eret", [31] = "deret", - [32] = "wait", - }, -} - local map_cop1s = { shift = 0, mask = 63, [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", @@ -178,8 +191,8 @@ local map_cop1bc = { local map_cop1 = { shift = 21, mask = 31, - [0] = "mfc1TG", false, "cfc1TG", "mfhc1TG", - "mtc1TG", false, "ctc1TG", "mthc1TG", + [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG", + "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG", map_cop1bc, false, false, false, false, false, false, false, map_cop1s, map_cop1d, false, false, @@ -213,16 +226,218 @@ local map_pri = { "andiTSU", "ori|liTS0U", "xoriTSU", "luiTU", map_cop0, map_cop1, false, map_cop1x, "beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB", - false, false, false, false, - map_special2, false, false, map_special3, + "daddiTSI", "daddiuTSI", false, false, + map_special2, "jalxJ", false, map_special3, "lbTSO", "lhTSO", "lwlTSO", "lwTSO", "lbuTSO", "lhuTSO", "lwrTSO", false, "sbTSO", "shTSO", "swlTSO", "swTSO", false, false, "swrTSO", "cacheNSO", "llTSO", "lwc1HSO", "lwc2TSO", "prefNSO", - false, "ldc1HSO", "ldc2TSO", false, + false, "ldc1HSO", "ldc2TSO", "ldTSO", "scTSO", "swc1HSO", "swc2TSO", false, - false, "sdc1HSO", "sdc2TSO", false, + false, "sdc1HSO", "sdc2TSO", "sdTSO", +} + +------------------------------------------------------------------------------ +-- Primary and extended opcode maps for MIPS R6 +------------------------------------------------------------------------------ + +local map_mul_r6 = { shift = 6, mask = 3, [2] = "mulDST", [3] = "muhDST" } +local map_mulu_r6 = { shift = 6, mask = 3, [2] = "muluDST", [3] = "muhuDST" } +local map_div_r6 = { shift = 6, mask = 3, [2] = "divDST", [3] = "modDST" } +local map_divu_r6 = { shift = 6, mask = 3, [2] = "divuDST", [3] = "moduDST" } +local map_dmul_r6 = { shift = 6, mask = 3, [2] = "dmulDST", [3] = "dmuhDST" } +local map_dmulu_r6 = { shift = 6, mask = 3, [2] = "dmuluDST", [3] = "dmuhuDST" } +local map_ddiv_r6 = { shift = 6, mask = 3, [2] = "ddivDST", [3] = "dmodDST" } +local map_ddivu_r6 = { shift = 6, mask = 3, [2] = "ddivuDST", [3] = "dmoduDST" } + +local map_special_r6 = { + shift = 0, mask = 63, + [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, + false, map_srl, "sraDTA", + "sllvDTS", false, map_srlv, "sravDTS", + "jrS", "jalrD1S", false, false, + "syscallY", "breakY", false, "sync", + "clzDS", "cloDS", "dclzDS", "dcloDS", + "dsllvDST", "dlsaDSTA", "dsrlvDST", "dsravDST", + map_mul_r6, map_mulu_r6, map_div_r6, map_divu_r6, + map_dmul_r6, map_dmulu_r6, map_ddiv_r6, map_ddivu_r6, + "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", + "andDST", "or|moveDST0", "xorDST", "nor|notDST0", + false, false, "sltDST", "sltuDST", + "daddDST", "dadduDST", "dsubDST", "dsubuDST", + "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", + "teqSTZ", "seleqzDST", "tneSTZ", "selnezDST", + "dsllDTA", false, "dsrlDTA", "dsraDTA", + "dsll32DTA", false, "dsrl32DTA", "dsra32DTA", +} + +local map_bshfl_r6 = { + shift = 9, mask = 3, + [1] = "alignDSTa", + _ = { + shift = 6, mask = 31, + [0] = "bitswapDT", + [2] = "wsbhDT", + [16] = "sebDT", + [24] = "sehDT", + } +} + +local map_dbshfl_r6 = { + shift = 9, mask = 3, + [1] = "dalignDSTa", + _ = { + shift = 6, mask = 31, + [0] = "dbitswapDT", + [2] = "dsbhDT", + [5] = "dshdDT", + } +} + +local map_special3_r6 = { + shift = 0, mask = 63, + [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK", + [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL", + [32] = map_bshfl_r6, [36] = map_dbshfl_r6, [59] = "rdhwrTD", +} + +local map_regimm_r6 = { + shift = 16, mask = 31, + [0] = "bltzSB", [1] = "bgezSB", + [6] = "dahiSI", [30] = "datiSI", + [23] = "sigrieI", [31] = "synciSO", +} + +local map_pcrel_r6 = { + shift = 19, mask = 3, + [0] = "addiupcS2", "lwpcS2", "lwupcS2", { + shift = 18, mask = 1, + [0] = "ldpcS3", { shift = 16, mask = 3, [2] = "auipcSI", [3] = "aluipcSI" } + } +} + +local map_cop1s_r6 = { + shift = 0, mask = 63, + [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", + "sqrt.sFG", "abs.sFG", "mov.sFG", "neg.sFG", + "round.l.sFG", "trunc.l.sFG", "ceil.l.sFG", "floor.l.sFG", + "round.w.sFG", "trunc.w.sFG", "ceil.w.sFG", "floor.w.sFG", + "sel.sFGH", false, false, false, + "seleqz.sFGH", "recip.sFG", "rsqrt.sFG", "selnez.sFGH", + "maddf.sFGH", "msubf.sFGH", "rint.sFG", "class.sFG", + "min.sFGH", "mina.sFGH", "max.sFGH", "maxa.sFGH", + false, "cvt.d.sFG", false, false, + "cvt.w.sFG", "cvt.l.sFG", +} + +local map_cop1d_r6 = { + shift = 0, mask = 63, + [0] = "add.dFGH", "sub.dFGH", "mul.dFGH", "div.dFGH", + "sqrt.dFG", "abs.dFG", "mov.dFG", "neg.dFG", + "round.l.dFG", "trunc.l.dFG", "ceil.l.dFG", "floor.l.dFG", + "round.w.dFG", "trunc.w.dFG", "ceil.w.dFG", "floor.w.dFG", + "sel.dFGH", false, false, false, + "seleqz.dFGH", "recip.dFG", "rsqrt.dFG", "selnez.dFGH", + "maddf.dFGH", "msubf.dFGH", "rint.dFG", "class.dFG", + "min.dFGH", "mina.dFGH", "max.dFGH", "maxa.dFGH", + "cvt.s.dFG", false, false, false, + "cvt.w.dFG", "cvt.l.dFG", +} + +local map_cop1w_r6 = { + shift = 0, mask = 63, + [0] = "cmp.af.sFGH", "cmp.un.sFGH", "cmp.eq.sFGH", "cmp.ueq.sFGH", + "cmp.lt.sFGH", "cmp.ult.sFGH", "cmp.le.sFGH", "cmp.ule.sFGH", + "cmp.saf.sFGH", "cmp.sun.sFGH", "cmp.seq.sFGH", "cmp.sueq.sFGH", + "cmp.slt.sFGH", "cmp.sult.sFGH", "cmp.sle.sFGH", "cmp.sule.sFGH", + false, "cmp.or.sFGH", "cmp.une.sFGH", "cmp.ne.sFGH", + false, false, false, false, + false, "cmp.sor.sFGH", "cmp.sune.sFGH", "cmp.sne.sFGH", + false, false, false, false, + "cvt.s.wFG", "cvt.d.wFG", +} + +local map_cop1l_r6 = { + shift = 0, mask = 63, + [0] = "cmp.af.dFGH", "cmp.un.dFGH", "cmp.eq.dFGH", "cmp.ueq.dFGH", + "cmp.lt.dFGH", "cmp.ult.dFGH", "cmp.le.dFGH", "cmp.ule.dFGH", + "cmp.saf.dFGH", "cmp.sun.dFGH", "cmp.seq.dFGH", "cmp.sueq.dFGH", + "cmp.slt.dFGH", "cmp.sult.dFGH", "cmp.sle.dFGH", "cmp.sule.dFGH", + false, "cmp.or.dFGH", "cmp.une.dFGH", "cmp.ne.dFGH", + false, false, false, false, + false, "cmp.sor.dFGH", "cmp.sune.dFGH", "cmp.sne.dFGH", + false, false, false, false, + "cvt.s.lFG", "cvt.d.lFG", +} + +local map_cop1_r6 = { + shift = 21, mask = 31, + [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG", + "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG", + false, "bc1eqzHB", false, false, + false, "bc1nezHB", false, false, + map_cop1s_r6, map_cop1d_r6, false, false, + map_cop1w_r6, map_cop1l_r6, +} + +local function maprs_popTS(rs, rt) + if rt == 0 then return 0 elseif rs == 0 then return 1 + elseif rs == rt then return 2 else return 3 end +end + +local map_pop06_r6 = { + maprs = maprs_popTS, [0] = "blezSB", "blezalcTB", "bgezalcTB", "bgeucSTB" +} +local map_pop07_r6 = { + maprs = maprs_popTS, [0] = "bgtzSB", "bgtzalcTB", "bltzalcTB", "bltucSTB" +} +local map_pop26_r6 = { + maprs = maprs_popTS, "blezcTB", "bgezcTB", "bgecSTB" +} +local map_pop27_r6 = { + maprs = maprs_popTS, "bgtzcTB", "bltzcTB", "bltcSTB" +} + +local function maprs_popS(rs, rt) + if rs == 0 then return 0 else return 1 end +end + +local map_pop66_r6 = { + maprs = maprs_popS, [0] = "jicTI", "beqzcSb" +} +local map_pop76_r6 = { + maprs = maprs_popS, [0] = "jialcTI", "bnezcSb" +} + +local function maprs_popST(rs, rt) + if rs >= rt then return 0 elseif rs == 0 then return 1 else return 2 end +end + +local map_pop10_r6 = { + maprs = maprs_popST, [0] = "bovcSTB", "beqzalcTB", "beqcSTB" +} +local map_pop30_r6 = { + maprs = maprs_popST, [0] = "bnvcSTB", "bnezalcTB", "bnecSTB" +} + +local map_pri_r6 = { + [0] = map_special_r6, map_regimm_r6, "jJ", "jalJ", + "beq|beqz|bST00B", "bne|bnezST0B", map_pop06_r6, map_pop07_r6, + map_pop10_r6, "addiu|liTS0I", "sltiTSI", "sltiuTSI", + "andiTSU", "ori|liTS0U", "xoriTSU", "aui|luiTS0U", + map_cop0, map_cop1_r6, false, false, + false, false, map_pop26_r6, map_pop27_r6, + map_pop30_r6, "daddiuTSI", false, false, + false, "dauiTSI", false, map_special3_r6, + "lbTSO", "lhTSO", false, "lwTSO", + "lbuTSO", "lhuTSO", false, false, + "sbTSO", "shTSO", false, "swTSO", + false, false, false, false, + false, "lwc1HSO", "bc#", false, + false, "ldc1HSO", map_pop66_r6, "ldTSO", + false, "swc1HSO", "balc#", map_pcrel_r6, + false, "sdc1HSO", map_pop76_r6, "sdTSO", } ------------------------------------------------------------------------------ @@ -279,10 +494,14 @@ local function disass_ins(ctx) ctx.op = op ctx.rel = nil - local opat = map_pri[rshift(op, 26)] + local opat = ctx.map_pri[rshift(op, 26)] while type(opat) ~= "string" do if not opat then return unknown(ctx) end - opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ + if opat.maprs then + opat = opat[opat.maprs(band(rshift(op,21),31), band(rshift(op,16),31))] + else + opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ + end end local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") @@ -306,6 +525,10 @@ local function disass_ins(ctx) x = "f"..band(rshift(op, 21), 31) elseif p == "A" then x = band(rshift(op, 6), 31) + elseif p == "a" then + x = band(rshift(op, 6), 7) + elseif p == "E" then + x = band(rshift(op, 6), 31) + 32 elseif p == "M" then x = band(rshift(op, 11), 31) elseif p == "N" then @@ -315,10 +538,18 @@ local function disass_ins(ctx) if x == 0 then x = nil end elseif p == "K" then x = band(rshift(op, 11), 31) + 1 + elseif p == "P" then + x = band(rshift(op, 11), 31) + 33 elseif p == "L" then x = band(rshift(op, 11), 31) - last + 1 + elseif p == "Q" then + x = band(rshift(op, 11), 31) - last + 33 elseif p == "I" then x = arshift(lshift(op, 16), 16) + elseif p == "2" then + x = arshift(lshift(op, 13), 11) + elseif p == "3" then + x = arshift(lshift(op, 14), 11) elseif p == "U" then x = band(op, 0xffff) elseif p == "O" then @@ -328,13 +559,22 @@ local function disass_ins(ctx) local index = map_gpr[band(rshift(op, 16), 31)] operands[#operands] = format("%s(%s)", index, last) elseif p == "B" then - x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4 + x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 14) + 4 + ctx.rel = x + x = format("0x%08x", x) + elseif p == "b" then + x = ctx.addr + ctx.pos + arshift(lshift(op, 11), 9) + 4 ctx.rel = x - x = "0x"..tohex(x) + x = format("0x%08x", x) + elseif p == "#" then + x = ctx.addr + ctx.pos + arshift(lshift(op, 6), 4) + 4 + ctx.rel = x + x = format("0x%08x", x) elseif p == "J" then - x = band(ctx.addr + ctx.pos, 0xf0000000) + band(op, 0x03ffffff)*4 + local a = ctx.addr + ctx.pos + x = a - band(a, 0x0fffffff) + band(op, 0x03ffffff)*4 ctx.rel = x - x = "0x"..tohex(x) + x = format("0x%08x", x) elseif p == "V" then x = band(rshift(op, 8), 7) if x == 0 then x = nil end @@ -384,7 +624,7 @@ local function disass_block(ctx, ofs, len) end -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). -local function create_(code, addr, out) +local function create(code, addr, out) local ctx = {} ctx.code = code ctx.addr = addr or 0 @@ -393,36 +633,62 @@ local function create_(code, addr, out) ctx.disass = disass_block ctx.hexdump = 8 ctx.get = get_be + ctx.map_pri = map_pri + return ctx +end + +local function create_el(code, addr, out) + local ctx = create(code, addr, out) + ctx.get = get_le + return ctx +end + +local function create_r6(code, addr, out) + local ctx = create(code, addr, out) + ctx.map_pri = map_pri_r6 return ctx end -local function create_el_(code, addr, out) - local ctx = create_(code, addr, out) +local function create_r6_el(code, addr, out) + local ctx = create(code, addr, out) ctx.get = get_le + ctx.map_pri = map_pri_r6 return ctx end -- Simple API: disassemble code (a string) at address and output via out. -local function disass_(code, addr, out) - create_(code, addr, out):disass() +local function disass(code, addr, out) + create(code, addr, out):disass() +end + +local function disass_el(code, addr, out) + create_el(code, addr, out):disass() end -local function disass_el_(code, addr, out) - create_el_(code, addr, out):disass() +local function disass_r6(code, addr, out) + create_r6(code, addr, out):disass() +end + +local function disass_r6_el(code, addr, out) + create_r6_el(code, addr, out):disass() end -- Return register name for RID. -local function regname_(r) +local function regname(r) if r < 32 then return map_gpr[r] end return "f"..(r-32) end -- Public module functions. -module(...) - -create = create_ -create_el = create_el_ -disass = disass_ -disass_el = disass_el_ -regname = regname_ +return { + create = create, + create_el = create_el, + create_r6 = create_r6, + create_r6_el = create_r6_el, + disass = disass, + disass_el = disass_el, + disass_r6 = disass_r6, + disass_r6_el = disass_r6_el, + regname = regname +} diff --git a/src/jit/dis_mips64.lua b/src/jit/dis_mips64.lua new file mode 100644 index 00000000..1236e524 --- /dev/null +++ b/src/jit/dis_mips64.lua @@ -0,0 +1,17 @@ +---------------------------------------------------------------------------- +-- LuaJIT MIPS64 disassembler wrapper module. +-- +-- Copyright (C) 2005-2022 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- This module just exports the big-endian functions from the +-- MIPS disassembler module. All the interesting stuff is there. +------------------------------------------------------------------------------ + +local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") +return { + create = dis_mips.create, + disass = dis_mips.disass, + regname = dis_mips.regname +} + diff --git a/src/jit/dis_mips64el.lua b/src/jit/dis_mips64el.lua new file mode 100644 index 00000000..7c478d2d --- /dev/null +++ b/src/jit/dis_mips64el.lua @@ -0,0 +1,17 @@ +---------------------------------------------------------------------------- +-- LuaJIT MIPS64EL disassembler wrapper module. +-- +-- Copyright (C) 2005-2022 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- This module just exports the little-endian functions from the +-- MIPS disassembler module. All the interesting stuff is there. +------------------------------------------------------------------------------ + +local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") +return { + create = dis_mips.create_el, + disass = dis_mips.disass_el, + regname = dis_mips.regname +} + diff --git a/src/jit/dis_mips64r6.lua b/src/jit/dis_mips64r6.lua new file mode 100644 index 00000000..c5789ce4 --- /dev/null +++ b/src/jit/dis_mips64r6.lua @@ -0,0 +1,17 @@ +---------------------------------------------------------------------------- +-- LuaJIT MIPS64R6 disassembler wrapper module. +-- +-- Copyright (C) 2005-2022 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- This module just exports the r6 big-endian functions from the +-- MIPS disassembler module. All the interesting stuff is there. +------------------------------------------------------------------------------ + +local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") +return { + create = dis_mips.create_r6, + disass = dis_mips.disass_r6, + regname = dis_mips.regname +} + diff --git a/src/jit/dis_mips64r6el.lua b/src/jit/dis_mips64r6el.lua new file mode 100644 index 00000000..f67f6240 --- /dev/null +++ b/src/jit/dis_mips64r6el.lua @@ -0,0 +1,17 @@ +---------------------------------------------------------------------------- +-- LuaJIT MIPS64R6EL disassembler wrapper module. +-- +-- Copyright (C) 2005-2022 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- This module just exports the r6 little-endian functions from the +-- MIPS disassembler module. All the interesting stuff is there. +------------------------------------------------------------------------------ + +local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") +return { + create = dis_mips.create_r6_el, + disass = dis_mips.disass_r6_el, + regname = dis_mips.regname +} + diff --git a/src/jit/dis_mipsel.lua b/src/jit/dis_mipsel.lua index 4f75ca32..a4fa6c60 100644 --- a/src/jit/dis_mipsel.lua +++ b/src/jit/dis_mipsel.lua @@ -8,13 +8,10 @@ -- MIPS disassembler module. All the interesting stuff is there. ------------------------------------------------------------------------------ -local require = require - -module(...) - -local dis_mips = require(_PACKAGE.."dis_mips") - -create = dis_mips.create_el -disass = dis_mips.disass_el -regname = dis_mips.regname +local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") +return { + create = dis_mips.create_el, + disass = dis_mips.disass_el, + regname = dis_mips.regname +} diff --git a/src/jit/dis_ppc.lua b/src/jit/dis_ppc.lua index 6d3adfe0..8f65f25a 100644 --- a/src/jit/dis_ppc.lua +++ b/src/jit/dis_ppc.lua @@ -560,7 +560,7 @@ local function disass_block(ctx, ofs, len) end -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). -local function create_(code, addr, out) +local function create(code, addr, out) local ctx = {} ctx.code = code ctx.addr = addr or 0 @@ -572,20 +572,20 @@ local function create_(code, addr, out) end -- Simple API: disassemble code (a string) at address and output via out. -local function disass_(code, addr, out) - create_(code, addr, out):disass() +local function disass(code, addr, out) + create(code, addr, out):disass() end -- Return register name for RID. -local function regname_(r) +local function regname(r) if r < 32 then return map_gpr[r] end return "f"..(r-32) end -- Public module functions. -module(...) - -create = create_ -disass = disass_ -regname = regname_ +return { + create = create, + disass = disass, + regname = regname +} diff --git a/src/jit/dis_x64.lua b/src/jit/dis_x64.lua index 5a11c2cc..d076c6ae 100644 --- a/src/jit/dis_x64.lua +++ b/src/jit/dis_x64.lua @@ -8,13 +8,10 @@ -- x86/x64 disassembler module. All the interesting stuff is there. ------------------------------------------------------------------------------ -local require = require - -module(...) - -local dis_x86 = require(_PACKAGE.."dis_x86") - -create = dis_x86.create64 -disass = dis_x86.disass64 -regname = dis_x86.regname64 +local dis_x86 = require((string.match(..., ".*%.") or "").."dis_x86") +return { + create = dis_x86.create64, + disass = dis_x86.disass64, + regname = dis_x86.regname64 +} diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua index 25f60c77..84492fff 100644 --- a/src/jit/dis_x86.lua +++ b/src/jit/dis_x86.lua @@ -15,19 +15,20 @@ -- Intel and AMD manuals. The supported instruction set is quite extensive -- and reflects what a current generation Intel or AMD CPU implements in -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3, --- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM) --- instructions. +-- SSE4.1, SSE4.2, SSE4a, AVX, AVX2 and even privileged and hypervisor +-- (VMX/SVM) instructions. -- -- Notes: -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported. -- * No attempt at optimization has been made -- it's fast enough for my needs. --- * The public API may change when more architectures are added. ------------------------------------------------------------------------------ local type = type local sub, byte, format = string.sub, string.byte, string.format local match, gmatch, gsub = string.match, string.gmatch, string.gsub local lower, rep = string.lower, string.rep +local bit = require("bit") +local tohex = bit.tohex -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on. local map_opc1_32 = { @@ -76,7 +77,7 @@ local map_opc1_32 = { "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi", "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI", --Cx -"shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi", +"shift!Bmu","shift!Vmu","retBw","ret","vex*3$lesVrm","vex*2$ldsVrm","movBmi","movVmi", "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS", --Dx "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb", @@ -101,7 +102,7 @@ local map_opc1_64 = setmetatable({ [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb", [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb", [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb", - [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false, + [0x82]=false, [0x9a]=false, [0xc4]="vex*3", [0xc5]="vex*2", [0xce]=false, [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false, }, { __index = map_opc1_32 }) @@ -112,12 +113,12 @@ local map_opc2 = { [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret", "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu", --1x -"movupsXrm|movssXrm|movupdXrm|movsdXrm", -"movupsXmr|movssXmr|movupdXmr|movsdXmr", +"movupsXrm|movssXrvm|movupdXrm|movsdXrvm", +"movupsXmr|movssXmvr|movupdXmr|movsdXmvr", "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm", "movlpsXmr||movlpdXmr", -"unpcklpsXrm||unpcklpdXrm", -"unpckhpsXrm||unpckhpdXrm", +"unpcklpsXrvm||unpcklpdXrvm", +"unpckhpsXrvm||unpckhpdXrvm", "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm", "movhpsXmr||movhpdXmr", "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm", @@ -126,7 +127,7 @@ local map_opc2 = { "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil, "movapsXrm||movapdXrm", "movapsXmr||movapdXmr", -"cvtpi2psXrMm|cvtsi2ssXrVmt|cvtpi2pdXrMm|cvtsi2sdXrVmt", +"cvtpi2psXrMm|cvtsi2ssXrvVmt|cvtpi2pdXrMm|cvtsi2sdXrvVmt", "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr", "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm", "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm", @@ -142,27 +143,27 @@ local map_opc2 = { "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm", --5x "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm", -"rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm", -"andpsXrm||andpdXrm","andnpsXrm||andnpdXrm", -"orpsXrm||orpdXrm","xorpsXrm||xorpdXrm", -"addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm", -"cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm", +"rsqrtpsXrm|rsqrtssXrvm","rcppsXrm|rcpssXrvm", +"andpsXrvm||andpdXrvm","andnpsXrvm||andnpdXrvm", +"orpsXrvm||orpdXrvm","xorpsXrvm||xorpdXrvm", +"addpsXrvm|addssXrvm|addpdXrvm|addsdXrvm","mulpsXrvm|mulssXrvm|mulpdXrvm|mulsdXrvm", +"cvtps2pdXrm|cvtss2sdXrvm|cvtpd2psXrm|cvtsd2ssXrvm", "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm", -"subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm", -"divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm", +"subpsXrvm|subssXrvm|subpdXrvm|subsdXrvm","minpsXrvm|minssXrvm|minpdXrvm|minsdXrvm", +"divpsXrvm|divssXrvm|divpdXrvm|divsdXrvm","maxpsXrvm|maxssXrvm|maxpdXrvm|maxsdXrvm", --6x -"punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm", -"pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm", -"punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm", -"||punpcklqdqXrm","||punpckhqdqXrm", +"punpcklbwPrvm","punpcklwdPrvm","punpckldqPrvm","packsswbPrvm", +"pcmpgtbPrvm","pcmpgtwPrvm","pcmpgtdPrvm","packuswbPrvm", +"punpckhbwPrvm","punpckhwdPrvm","punpckhdqPrvm","packssdwPrvm", +"||punpcklqdqXrvm","||punpckhqdqXrvm", "movPrVSm","movqMrm|movdquXrm|movdqaXrm", --7x -"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu", -"pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu", -"pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|", +"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pvmu", +"pshiftd!Pvmu","pshiftq!Mvmu||pshiftdq!Xvmu", +"pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|", "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$", nil,nil, -"||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm", +"||haddpdXrvm|haddpsXrvm","||hsubpdXrvm|hsubpsXrvm", "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr", --8x "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj", @@ -180,27 +181,27 @@ nil,nil, "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt", --Cx "xaddBmr","xaddVmr", -"cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|", -"pinsrwPrWmu","pextrwDrPmu", -"shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp", +"cmppsXrvmu|cmpssXrvmu|cmppdXrvmu|cmpsdXrvmu","$movntiVmr|", +"pinsrwPrvWmu","pextrwDrPmu", +"shufpsXrvmu||shufpdXrvmu","$cmpxchg!Qmp", "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR", --Dx -"||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm", -"paddqPrm","pmullwPrm", +"||addsubpdXrvm|addsubpsXrvm","psrlwPrvm","psrldPrvm","psrlqPrvm", +"paddqPrvm","pmullwPrvm", "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm", -"psubusbPrm","psubuswPrm","pminubPrm","pandPrm", -"paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm", +"psubusbPrvm","psubuswPrvm","pminubPrvm","pandPrvm", +"paddusbPrvm","padduswPrvm","pmaxubPrvm","pandnPrvm", --Ex -"pavgbPrm","psrawPrm","psradPrm","pavgwPrm", -"pmulhuwPrm","pmulhwPrm", +"pavgbPrvm","psrawPrvm","psradPrvm","pavgwPrvm", +"pmulhuwPrvm","pmulhwPrvm", "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr", -"psubsbPrm","psubswPrm","pminswPrm","porPrm", -"paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm", +"psubsbPrvm","psubswPrvm","pminswPrvm","porPrvm", +"paddsbPrvm","paddswPrvm","pmaxswPrvm","pxorPrvm", --Fx -"|||lddquXrm","psllwPrm","pslldPrm","psllqPrm", -"pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$", -"psubbPrm","psubwPrm","psubdPrm","psubqPrm", -"paddbPrm","paddwPrm","padddPrm","ud", +"|||lddquXrm","psllwPrvm","pslldPrvm","psllqPrvm", +"pmuludqPrvm","pmaddwdPrvm","psadbwPrvm","maskmovqMrm||maskmovdquXrm$", +"psubbPrvm","psubwPrvm","psubdPrvm","psubqPrvm", +"paddbPrvm","paddwPrvm","padddPrvm","ud", } assert(map_opc2[255] == "ud") @@ -208,49 +209,91 @@ assert(map_opc2[255] == "ud") local map_opc3 = { ["38"] = { -- [66] 0f 38 xx --0x -[0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm", -"pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm", -"psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm", -nil,nil,nil,nil, +[0]="pshufbPrvm","phaddwPrvm","phadddPrvm","phaddswPrvm", +"pmaddubswPrvm","phsubwPrvm","phsubdPrvm","phsubswPrvm", +"psignbPrvm","psignwPrvm","psigndPrvm","pmulhrswPrvm", +"||permilpsXrvm","||permilpdXrvm",nil,nil, --1x "||pblendvbXrma",nil,nil,nil, -"||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm", -nil,nil,nil,nil, +"||blendvpsXrma","||blendvpdXrma","||permpsXrvm","||ptestXrm", +"||broadcastssXrm","||broadcastsdXrm","||broadcastf128XrlXm",nil, "pabsbPrm","pabswPrm","pabsdPrm",nil, --2x "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm", "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil, -"||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm", -nil,nil,nil,nil, +"||pmuldqXrvm","||pcmpeqqXrvm","||$movntdqaXrm","||packusdwXrvm", +"||maskmovpsXrvm","||maskmovpdXrvm","||maskmovpsXmvr","||maskmovpdXmvr", --3x "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm", -"||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm", -"||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm", -"||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm", +"||pmovzxwqXrm","||pmovzxdqXrm","||permdXrvm","||pcmpgtqXrvm", +"||pminsbXrvm","||pminsdXrvm","||pminuwXrvm","||pminudXrvm", +"||pmaxsbXrvm","||pmaxsdXrvm","||pmaxuwXrvm","||pmaxudXrvm", --4x -"||pmulddXrm","||phminposuwXrm", +"||pmulddXrvm","||phminposuwXrm",nil,nil, +nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm", +--5x +[0x58] = "||pbroadcastdXrlXm",[0x59] = "||pbroadcastqXrlXm", +[0x5a] = "||broadcasti128XrlXm", +--7x +[0x78] = "||pbroadcastbXrlXm",[0x79] = "||pbroadcastwXrlXm", +--8x +[0x8c] = "||pmaskmovXrvVSm", +[0x8e] = "||pmaskmovVSmXvr", +--9x +[0x96] = "||fmaddsub132pHXrvm",[0x97] = "||fmsubadd132pHXrvm", +[0x98] = "||fmadd132pHXrvm",[0x99] = "||fmadd132sHXrvm", +[0x9a] = "||fmsub132pHXrvm",[0x9b] = "||fmsub132sHXrvm", +[0x9c] = "||fnmadd132pHXrvm",[0x9d] = "||fnmadd132sHXrvm", +[0x9e] = "||fnmsub132pHXrvm",[0x9f] = "||fnmsub132sHXrvm", +--Ax +[0xa6] = "||fmaddsub213pHXrvm",[0xa7] = "||fmsubadd213pHXrvm", +[0xa8] = "||fmadd213pHXrvm",[0xa9] = "||fmadd213sHXrvm", +[0xaa] = "||fmsub213pHXrvm",[0xab] = "||fmsub213sHXrvm", +[0xac] = "||fnmadd213pHXrvm",[0xad] = "||fnmadd213sHXrvm", +[0xae] = "||fnmsub213pHXrvm",[0xaf] = "||fnmsub213sHXrvm", +--Bx +[0xb6] = "||fmaddsub231pHXrvm",[0xb7] = "||fmsubadd231pHXrvm", +[0xb8] = "||fmadd231pHXrvm",[0xb9] = "||fmadd231sHXrvm", +[0xba] = "||fmsub231pHXrvm",[0xbb] = "||fmsub231sHXrvm", +[0xbc] = "||fnmadd231pHXrvm",[0xbd] = "||fnmadd231sHXrvm", +[0xbe] = "||fnmsub231pHXrvm",[0xbf] = "||fnmsub231sHXrvm", +--Dx +[0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm", +[0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm", --Fx [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt", +[0xf7] = "| sarxVrmv| shlxVrmv| shrxVrmv", }, ["3a"] = { -- [66] 0f 3a xx --0x -[0x00]=nil,nil,nil,nil,nil,nil,nil,nil, -"||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu", -"||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu", +[0x00]="||permqXrmu","||permpdXrmu","||pblenddXrvmu",nil, +"||permilpsXrmu","||permilpdXrmu","||perm2f128Xrvmu",nil, +"||roundpsXrmu","||roundpdXrmu","||roundssXrvmu","||roundsdXrvmu", +"||blendpsXrvmu","||blendpdXrvmu","||pblendwXrvmu","palignrPrvmu", --1x nil,nil,nil,nil, "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru", -nil,nil,nil,nil,nil,nil,nil,nil, +"||insertf128XrvlXmu","||extractf128XlXmYru",nil,nil, +nil,nil,nil,nil, --2x -"||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil, +"||pinsrbXrvVmu","||insertpsXrvmu","||pinsrXrvVmuS",nil, +--3x +[0x38] = "||inserti128Xrvmu",[0x39] = "||extracti128XlXmYru", --4x -[0x40] = "||dppsXrmu", -[0x41] = "||dppdXrmu", -[0x42] = "||mpsadbwXrmu", +[0x40] = "||dppsXrvmu", +[0x41] = "||dppdXrvmu", +[0x42] = "||mpsadbwXrvmu", +[0x44] = "||pclmulqdqXrvmu", +[0x46] = "||perm2i128Xrvmu", +[0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb", +[0x4c] = "||pblendvbXrvmb", --6x [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu", [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu", +[0xdf] = "||aeskeygenassistXrmu", +--Fx +[0xf0] = "||| rorxVrmu", }, } @@ -354,17 +397,19 @@ local map_regs = { "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext! X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" }, + Y = { "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", + "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" }, } local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" } -- Maps for size names. local map_sz2n = { - B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, + B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, Y = 32, } local map_sz2prefix = { B = "byte", W = "word", D = "dword", Q = "qword", - M = "qword", X = "xword", + M = "qword", X = "xword", Y = "yword", F = "dword", G = "qword", -- No need for sizes/register names for these two. } @@ -387,10 +432,13 @@ local function putop(ctx, text, operands) if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end if ctx.rex then local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "").. - (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "") - if t ~= "" then text = "rex."..t.." "..text end + (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "").. + (ctx.vexl and "l" or "") + if ctx.vexv and ctx.vexv ~= 0 then t = t.."v"..ctx.vexv end + if t ~= "" then text = ctx.rex.."."..t.." "..gsub(text, "^ ", "") + elseif ctx.rex == "vex" then text = gsub("v"..text, "^v ", "") end ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false - ctx.rex = false + ctx.rex = false; ctx.vexl = false; ctx.vexv = false end if ctx.seg then local text2, n = gsub(text, "%[", "["..ctx.seg..":") @@ -405,6 +453,7 @@ local function putop(ctx, text, operands) end ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text)) ctx.mrm = false + ctx.vexv = false ctx.start = pos ctx.imm = nil end @@ -413,7 +462,7 @@ end local function clearprefixes(ctx) ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false - ctx.rex = false; ctx.a32 = false + ctx.rex = false; ctx.a32 = false; ctx.vexl = false end -- Fallback for incomplete opcodes at the end. @@ -450,9 +499,9 @@ end -- Process pattern string and generate the operands. local function putpat(ctx, name, pat) local operands, regs, sz, mode, sp, rm, sc, rx, sdisp - local code, pos, stop = ctx.code, ctx.pos, ctx.stop + local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl - -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz + -- Chars used: 1DFGHIMPQRSTUVWXYabcdfgijlmoprstuvwxyz for p in gmatch(pat, ".") do local x = nil if p == "V" or p == "U" then @@ -467,12 +516,17 @@ local function putpat(ctx, name, pat) elseif p == "B" then sz = "B" regs = ctx.rex and map_regs.B64 or map_regs.B - elseif match(p, "[WDQMXFG]") then + elseif match(p, "[WDQMXYFG]") then sz = p + if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end regs = map_regs[sz] elseif p == "P" then sz = ctx.o16 and "X" or "M"; ctx.o16 = false + if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end regs = map_regs[sz] + elseif p == "H" then + name = name..(ctx.rexw and "d" or "s") + ctx.rexw = false elseif p == "S" then name = name..lower(sz) elseif p == "s" then @@ -484,6 +538,10 @@ local function putpat(ctx, name, pat) local imm = getimm(ctx, pos, 1); if not imm then return end x = format("0x%02x", imm) pos = pos+1 + elseif p == "b" then + local imm = getimm(ctx, pos, 1); if not imm then return end + x = regs[imm/16+1] + pos = pos+1 elseif p == "w" then local imm = getimm(ctx, pos, 2); if not imm then return end x = format("0x%x", imm) @@ -532,7 +590,7 @@ local function putpat(ctx, name, pat) local lo = imm % 0x1000000 x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo) else - x = format("0x%08x", imm) + x = "0x"..tohex(imm) end elseif p == "R" then local r = byte(code, pos-1, pos-1)%8 @@ -616,8 +674,13 @@ local function putpat(ctx, name, pat) else x = "CR"..sp end + elseif p == "v" then + if ctx.vexv then + x = regs[ctx.vexv+1]; ctx.vexv = false + end elseif p == "y" then x = "DR"..sp elseif p == "z" then x = "TR"..sp + elseif p == "l" then vexl = false elseif p == "t" then else error("bad pattern `"..pat.."'") @@ -692,7 +755,8 @@ map_act = { B = putpat, W = putpat, D = putpat, Q = putpat, V = putpat, U = putpat, T = putpat, M = putpat, X = putpat, P = putpat, - F = putpat, G = putpat, + F = putpat, G = putpat, Y = putpat, + H = putpat, -- Collect prefixes. [":"] = function(ctx, name, pat) @@ -753,15 +817,68 @@ map_act = { -- REX prefix. rex = function(ctx, name, pat) - if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed. + if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed. for p in gmatch(pat, ".") do ctx["rex"..p] = true end - ctx.rex = true + ctx.rex = "rex" + end, + + -- VEX prefix. + vex = function(ctx, name, pat) + if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed. + ctx.rex = "vex" + local pos = ctx.pos + if ctx.mrm then + ctx.mrm = nil + pos = pos-1 + end + local b = byte(ctx.code, pos, pos) + if not b then return incomplete(ctx) end + pos = pos+1 + if b < 128 then ctx.rexr = true end + local m = 1 + if pat == "3" then + m = b%32; b = (b-m)/32 + local nb = b%2; b = (b-nb)/2 + if nb == 0 then ctx.rexb = true end + local nx = b%2 + if nx == 0 then ctx.rexx = true end + b = byte(ctx.code, pos, pos) + if not b then return incomplete(ctx) end + pos = pos+1 + if b >= 128 then ctx.rexw = true end + end + ctx.pos = pos + local map + if m == 1 then map = map_opc2 + elseif m == 2 then map = map_opc3["38"] + elseif m == 3 then map = map_opc3["3a"] + else return unknown(ctx) end + local p = b%4; b = (b-p)/4 + if p == 1 then ctx.o16 = "o16" + elseif p == 2 then ctx.rep = "rep" + elseif p == 3 then ctx.rep = "repne" end + local l = b%2; b = (b-l)/2 + if l ~= 0 then ctx.vexl = true end + ctx.vexv = (-1-b)%16 + return dispatchmap(ctx, map) end, -- Special case for nop with REX prefix. nop = function(ctx, name, pat) return dispatch(ctx, ctx.rex and pat or "nop") end, + + -- Special case for 0F 77. + emms = function(ctx, name, pat) + if ctx.rex ~= "vex" then + return putop(ctx, "emms") + elseif ctx.vexl then + ctx.vexl = false + return putop(ctx, "zeroall") + else + return putop(ctx, "zeroupper") + end + end, } ------------------------------------------------------------------------------ @@ -782,7 +899,7 @@ local function disass_block(ctx, ofs, len) end -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). -local function create_(code, addr, out) +local function create(code, addr, out) local ctx = {} ctx.code = code ctx.addr = (addr or 0) - 1 @@ -796,8 +913,8 @@ local function create_(code, addr, out) return ctx end -local function create64_(code, addr, out) - local ctx = create_(code, addr, out) +local function create64(code, addr, out) + local ctx = create(code, addr, out) ctx.x64 = true ctx.map1 = map_opc1_64 ctx.aregs = map_regs.Q @@ -805,32 +922,32 @@ local function create64_(code, addr, out) end -- Simple API: disassemble code (a string) at address and output via out. -local function disass_(code, addr, out) - create_(code, addr, out):disass() +local function disass(code, addr, out) + create(code, addr, out):disass() end -local function disass64_(code, addr, out) - create64_(code, addr, out):disass() +local function disass64(code, addr, out) + create64(code, addr, out):disass() end -- Return register name for RID. -local function regname_(r) +local function regname(r) if r < 8 then return map_regs.D[r+1] end return map_regs.X[r-7] end -local function regname64_(r) +local function regname64(r) if r < 16 then return map_regs.Q[r+1] end return map_regs.X[r-15] end -- Public module functions. -module(...) - -create = create_ -create64 = create64_ -disass = disass_ -disass64 = disass64_ -regname = regname_ -regname64 = regname64_ +return { + create = create, + create64 = create64, + disass = disass, + disass64 = disass64, + regname = regname, + regname64 = regname64 +} diff --git a/src/jit/dump.lua b/src/jit/dump.lua index 344fa926..18e7a4b7 100644 --- a/src/jit/dump.lua +++ b/src/jit/dump.lua @@ -55,7 +55,7 @@ -- Cache some library functions and objects. local jit = require("jit") -assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") +assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") local jutil = require("jit.util") local vmdef = require("jit.vmdef") local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc @@ -63,7 +63,7 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr local bit = require("bit") -local band, shr = bit.band, bit.rshift +local band, shr, tohex = bit.band, bit.rshift, bit.tohex local sub, gsub, format = string.sub, string.gsub, string.format local byte, rep = string.byte, string.rep local type, tostring = type, tostring @@ -85,12 +85,13 @@ local nexitsym = 0 local function fillsymtab_tr(tr, nexit) local t = {} symtabmt.__index = t - if jit.arch == "mips" or jit.arch == "mipsel" then + if jit.arch:sub(1, 4) == "mips" then t[traceexitstub(tr, 0)] = "exit" return end for i=0,nexit-1 do local addr = traceexitstub(tr, i) + if addr < 0 then addr = addr + 2^32 end t[addr] = tostring(i) end local addr = traceexitstub(tr, nexit) @@ -101,10 +102,15 @@ end local function fillsymtab(tr, nexit) local t = symtab if nexitsym == 0 then + local maskaddr = jit.arch == "arm" and -2 local ircall = vmdef.ircall for i=0,#ircall do local addr = ircalladdr(i) - if addr ~= 0 then t[addr] = ircall[i] end + if addr ~= 0 then + if maskaddr then addr = band(addr, maskaddr) end + if addr < 0 then addr = addr + 2^32 end + t[addr] = ircall[i] + end end end if nexitsym == 1000000 then -- Per-trace exit stubs. @@ -118,6 +124,7 @@ local function fillsymtab(tr, nexit) nexit = 1000000 break end + if addr < 0 then addr = addr + 2^32 end t[addr] = tostring(i) end nexitsym = nexit @@ -136,6 +143,7 @@ local function dump_mcode(tr) local mcode, addr, loop = tracemc(tr) if not mcode then return end if not disass then disass = require("jit.dis_"..jit.arch) end + if addr < 0 then addr = addr + 2^32 end out:write("---- TRACE ", tr, " mcode ", #mcode, "\n") local ctx = disass.create(mcode, addr, dumpwrite) ctx.hexdump = 0 @@ -211,8 +219,10 @@ local function colorize_text(s) return s end -local function colorize_ansi(s, t) - return format(colortype_ansi[t], s) +local function colorize_ansi(s, t, extra) + local out = format(colortype_ansi[t], s) + if extra then out = "\027[3m"..out end + return out end local irtype_ansi = setmetatable({}, @@ -221,9 +231,10 @@ local irtype_ansi = setmetatable({}, local html_escape = { ["<"] = "<", [">"] = ">", ["&"] = "&", } -local function colorize_html(s, t) +local function colorize_html(s, t, extra) s = gsub(s, "[<>&]", html_escape) - return format('<span class="irt_%s">%s</span>', irtype_text[t], s) + return format('<span class="irt_%s%s">%s</span>', + irtype_text[t], extra and " irt_extra" or "", s) end local irtype_html = setmetatable({}, @@ -248,6 +259,7 @@ span.irt_tab { color: #c00000; } span.irt_udt, span.irt_lud { color: #00c0c0; } span.irt_num { color: #4040c0; } span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; } +span.irt_extra { font-style: italic; } </style> ]] @@ -263,6 +275,7 @@ local litname = { if band(mode, 8) ~= 0 then s = s.."C" end if band(mode, 16) ~= 0 then s = s.."R" end if band(mode, 32) ~= 0 then s = s.."I" end + if band(mode, 64) ~= 0 then s = s.."K" end t[mode] = s return s end}), @@ -270,16 +283,20 @@ local litname = { ["CONV "] = setmetatable({}, { __index = function(t, mode) local s = irtype[band(mode, 31)] s = irtype[band(shr(mode, 5), 31)].."."..s - if band(mode, 0x400) ~= 0 then s = s.." trunc" - elseif band(mode, 0x800) ~= 0 then s = s.." sext" end + if band(mode, 0x800) ~= 0 then s = s.." sext" end local c = shr(mode, 12) - if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end + if c == 1 then s = s.." none" + elseif c == 2 then s = s.." index" + elseif c == 3 then s = s.." check" end t[mode] = s return s end}), ["FLOAD "] = vmdef.irfield, ["FREF "] = vmdef.irfield, ["FPMATH"] = vmdef.irfpm, + ["TMPREF"] = { [0] = "", "IN", "OUT", "INOUT", "", "", "OUT2", "INOUT2" }, + ["BUFHDR"] = { [0] = "RESET", "APPEND", "WRITE" }, + ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" }, } local function ctlsub(c) @@ -303,15 +320,19 @@ local function fmtfunc(func, pc) end end -local function formatk(tr, idx) +local function formatk(tr, idx, sn) local k, t, slot = tracek(tr, idx) local tn = type(k) local s if tn == "number" then - if k == 2^52+2^51 then + if t < 12 then + s = k == 0 and "NULL" or format("[0x%08x]", k) + elseif band(sn or 0, 0x30000) ~= 0 then + s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz" + elseif k == 2^52+2^51 then s = "bias" else - s = format("%+.14g", k) + s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k) end elseif tn == "string" then s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) @@ -329,10 +350,12 @@ local function formatk(tr, idx) elseif t == 21 then -- int64_t s = sub(tostring(k), 1, -3) if sub(s, 1, 1) ~= "-" then s = "+"..s end + elseif sn == 0x1057fff then -- SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL) + return "----" -- Special case for LJ_FR2 slot 1. else s = tostring(k) -- For primitives. end - s = colorize(format("%-4s", s), t) + s = colorize(format("%-4s", s), t, band(sn or 0, 0x100000) ~= 0) if slot then s = format("%s @%d", s, slot) end @@ -347,12 +370,12 @@ local function printsnap(tr, snap) n = n + 1 local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS if ref < 0 then - out:write(formatk(tr, ref)) + out:write(formatk(tr, ref, sn)) elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) else local m, ot, op1, op2 = traceir(tr, ref) - out:write(colorize(format("%04d", ref), band(ot, 31))) + out:write(colorize(format("%04d", ref), band(ot, 31), band(sn, 0x100000) ~= 0)) end out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME else @@ -545,7 +568,7 @@ local function dump_trace(what, tr, func, pc, otr, oex) if what == "start" then if dumpmode.H then out:write('<pre class="ljdump">\n') end out:write("---- TRACE ", tr, " ", what) - if otr then out:write(" ", otr, "/", oex) end + if otr then out:write(" ", otr, "/", oex == -1 and "stitch" or oex) end out:write(" ", fmtfunc(func, pc), "\n") elseif what == "stop" or what == "abort" then out:write("---- TRACE ", tr, " ", what) @@ -595,23 +618,26 @@ end ------------------------------------------------------------------------------ +local gpr64 = jit.arch:match("64") +local fprmips32 = jit.arch == "mips" or jit.arch == "mipsel" + -- Dump taken trace exits. local function dump_texit(tr, ex, ngpr, nfpr, ...) out:write("---- TRACE ", tr, " exit ", ex, "\n") if dumpmode.X then local regs = {...} - if jit.arch == "x64" then + if gpr64 then for i=1,ngpr do out:write(format(" %016x", regs[i])) if i % 4 == 0 then out:write("\n") end end else for i=1,ngpr do - out:write(format(" %08x", regs[i])) + out:write(" ", tohex(regs[i])) if i % 8 == 0 then out:write("\n") end end end - if jit.arch == "mips" or jit.arch == "mipsel" then + if fprmips32 then for i=1,nfpr,2 do out:write(format(" %+17.14g", regs[ngpr+i])) if i % 8 == 7 then out:write("\n") end @@ -692,9 +718,9 @@ local function dumpon(opt, outfile) end -- Public module functions. -module(...) - -on = dumpon -off = dumpoff -start = dumpon -- For -j command line option. +return { + on = dumpon, + off = dumpoff, + start = dumpon -- For -j command line option. +} diff --git a/src/jit/p.lua b/src/jit/p.lua new file mode 100644 index 00000000..f225c312 --- /dev/null +++ b/src/jit/p.lua @@ -0,0 +1,312 @@ +---------------------------------------------------------------------------- +-- LuaJIT profiler. +-- +-- Copyright (C) 2005-2022 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- +-- This module is a simple command line interface to the built-in +-- low-overhead profiler of LuaJIT. +-- +-- The lower-level API of the profiler is accessible via the "jit.profile" +-- module or the luaJIT_profile_* C API. +-- +-- Example usage: +-- +-- luajit -jp myapp.lua +-- luajit -jp=s myapp.lua +-- luajit -jp=-s myapp.lua +-- luajit -jp=vl myapp.lua +-- luajit -jp=G,profile.txt myapp.lua +-- +-- The following dump features are available: +-- +-- f Stack dump: function name, otherwise module:line. Default mode. +-- F Stack dump: ditto, but always prepend module. +-- l Stack dump: module:line. +-- <number> stack dump depth (callee < caller). Default: 1. +-- -<number> Inverse stack dump depth (caller > callee). +-- s Split stack dump after first stack level. Implies abs(depth) >= 2. +-- p Show full path for module names. +-- v Show VM states. Can be combined with stack dumps, e.g. vf or fv. +-- z Show zones. Can be combined with stack dumps, e.g. zf or fz. +-- r Show raw sample counts. Default: show percentages. +-- a Annotate excerpts from source code files. +-- A Annotate complete source code files. +-- G Produce raw output suitable for graphical tools (e.g. flame graphs). +-- m<number> Minimum sample percentage to be shown. Default: 3. +-- i<number> Sampling interval in milliseconds. Default: 10. +-- +---------------------------------------------------------------------------- + +-- Cache some library functions and objects. +local jit = require("jit") +assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") +local profile = require("jit.profile") +local vmdef = require("jit.vmdef") +local math = math +local pairs, ipairs, tonumber, floor = pairs, ipairs, tonumber, math.floor +local sort, format = table.sort, string.format +local stdout = io.stdout +local zone -- Load jit.zone module on demand. + +-- Output file handle. +local out + +------------------------------------------------------------------------------ + +local prof_ud +local prof_states, prof_split, prof_min, prof_raw, prof_fmt, prof_depth +local prof_ann, prof_count1, prof_count2, prof_samples + +local map_vmmode = { + N = "Compiled", + I = "Interpreted", + C = "C code", + G = "Garbage Collector", + J = "JIT Compiler", +} + +-- Profiler callback. +local function prof_cb(th, samples, vmmode) + prof_samples = prof_samples + samples + local key_stack, key_stack2, key_state + -- Collect keys for sample. + if prof_states then + if prof_states == "v" then + key_state = map_vmmode[vmmode] or vmmode + else + key_state = zone:get() or "(none)" + end + end + if prof_fmt then + key_stack = profile.dumpstack(th, prof_fmt, prof_depth) + key_stack = key_stack:gsub("%[builtin#(%d+)%]", function(x) + return vmdef.ffnames[tonumber(x)] + end) + if prof_split == 2 then + local k1, k2 = key_stack:match("(.-) [<>] (.*)") + if k2 then key_stack, key_stack2 = k1, k2 end + elseif prof_split == 3 then + key_stack2 = profile.dumpstack(th, "l", 1) + end + end + -- Order keys. + local k1, k2 + if prof_split == 1 then + if key_state then + k1 = key_state + if key_stack then k2 = key_stack end + end + elseif key_stack then + k1 = key_stack + if key_stack2 then k2 = key_stack2 elseif key_state then k2 = key_state end + end + -- Coalesce samples in one or two levels. + if k1 then + local t1 = prof_count1 + t1[k1] = (t1[k1] or 0) + samples + if k2 then + local t2 = prof_count2 + local t3 = t2[k1] + if not t3 then t3 = {}; t2[k1] = t3 end + t3[k2] = (t3[k2] or 0) + samples + end + end +end + +------------------------------------------------------------------------------ + +-- Show top N list. +local function prof_top(count1, count2, samples, indent) + local t, n = {}, 0 + for k in pairs(count1) do + n = n + 1 + t[n] = k + end + sort(t, function(a, b) return count1[a] > count1[b] end) + for i=1,n do + local k = t[i] + local v = count1[k] + local pct = floor(v*100/samples + 0.5) + if pct < prof_min then break end + if not prof_raw then + out:write(format("%s%2d%% %s\n", indent, pct, k)) + elseif prof_raw == "r" then + out:write(format("%s%5d %s\n", indent, v, k)) + else + out:write(format("%s %d\n", k, v)) + end + if count2 then + local r = count2[k] + if r then + prof_top(r, nil, v, (prof_split == 3 or prof_split == 1) and " -- " or + (prof_depth < 0 and " -> " or " <- ")) + end + end + end +end + +-- Annotate source code +local function prof_annotate(count1, samples) + local files = {} + local ms = 0 + for k, v in pairs(count1) do + local pct = floor(v*100/samples + 0.5) + ms = math.max(ms, v) + if pct >= prof_min then + local file, line = k:match("^(.*):(%d+)$") + if not file then file = k; line = 0 end + local fl = files[file] + if not fl then fl = {}; files[file] = fl; files[#files+1] = file end + line = tonumber(line) + fl[line] = prof_raw and v or pct + end + end + sort(files) + local fmtv, fmtn = " %3d%% | %s\n", " | %s\n" + if prof_raw then + local n = math.max(5, math.ceil(math.log10(ms))) + fmtv = "%"..n.."d | %s\n" + fmtn = (" "):rep(n).." | %s\n" + end + local ann = prof_ann + for _, file in ipairs(files) do + local f0 = file:byte() + if f0 == 40 or f0 == 91 then + out:write(format("\n====== %s ======\n[Cannot annotate non-file]\n", file)) + break + end + local fp, err = io.open(file) + if not fp then + out:write(format("====== ERROR: %s: %s\n", file, err)) + break + end + out:write(format("\n====== %s ======\n", file)) + local fl = files[file] + local n, show = 1, false + if ann ~= 0 then + for i=1,ann do + if fl[i] then show = true; out:write("@@ 1 @@\n"); break end + end + end + for line in fp:lines() do + if line:byte() == 27 then + out:write("[Cannot annotate bytecode file]\n") + break + end + local v = fl[n] + if ann ~= 0 then + local v2 = fl[n+ann] + if show then + if v2 then show = n+ann elseif v then show = n + elseif show+ann < n then show = false end + elseif v2 then + show = n+ann + out:write(format("@@ %d @@\n", n)) + end + if not show then goto next end + end + if v then + out:write(format(fmtv, v, line)) + else + out:write(format(fmtn, line)) + end + ::next:: + n = n + 1 + end + fp:close() + end +end + +------------------------------------------------------------------------------ + +-- Finish profiling and dump result. +local function prof_finish() + if prof_ud then + profile.stop() + local samples = prof_samples + if samples == 0 then + if prof_raw ~= true then out:write("[No samples collected]\n") end + return + end + if prof_ann then + prof_annotate(prof_count1, samples) + else + prof_top(prof_count1, prof_count2, samples, "") + end + prof_count1 = nil + prof_count2 = nil + prof_ud = nil + if out ~= stdout then out:close() end + end +end + +-- Start profiling. +local function prof_start(mode) + local interval = "" + mode = mode:gsub("i%d*", function(s) interval = s; return "" end) + prof_min = 3 + mode = mode:gsub("m(%d+)", function(s) prof_min = tonumber(s); return "" end) + prof_depth = 1 + mode = mode:gsub("%-?%d+", function(s) prof_depth = tonumber(s); return "" end) + local m = {} + for c in mode:gmatch(".") do m[c] = c end + prof_states = m.z or m.v + if prof_states == "z" then zone = require("jit.zone") end + local scope = m.l or m.f or m.F or (prof_states and "" or "f") + local flags = (m.p or "") + prof_raw = m.r + if m.s then + prof_split = 2 + if prof_depth == -1 or m["-"] then prof_depth = -2 + elseif prof_depth == 1 then prof_depth = 2 end + elseif mode:find("[fF].*l") then + scope = "l" + prof_split = 3 + else + prof_split = (scope == "" or mode:find("[zv].*[lfF]")) and 1 or 0 + end + prof_ann = m.A and 0 or (m.a and 3) + if prof_ann then + scope = "l" + prof_fmt = "pl" + prof_split = 0 + prof_depth = 1 + elseif m.G and scope ~= "" then + prof_fmt = flags..scope.."Z;" + prof_depth = -100 + prof_raw = true + prof_min = 0 + elseif scope == "" then + prof_fmt = false + else + local sc = prof_split == 3 and m.f or m.F or scope + prof_fmt = flags..sc..(prof_depth >= 0 and "Z < " or "Z > ") + end + prof_count1 = {} + prof_count2 = {} + prof_samples = 0 + profile.start(scope:lower()..interval, prof_cb) + prof_ud = newproxy(true) + getmetatable(prof_ud).__gc = prof_finish +end + +------------------------------------------------------------------------------ + +local function start(mode, outfile) + if not outfile then outfile = os.getenv("LUAJIT_PROFILEFILE") end + if outfile then + out = outfile == "-" and stdout or assert(io.open(outfile, "w")) + else + out = stdout + end + prof_start(mode or "f") +end + +-- Public module functions. +return { + start = start, -- For -j command line option. + stop = prof_finish +} + diff --git a/src/jit/v.lua b/src/jit/v.lua index 9624688b..ac8b19db 100644 --- a/src/jit/v.lua +++ b/src/jit/v.lua @@ -59,7 +59,7 @@ -- Cache some library functions and objects. local jit = require("jit") -assert(jit.version_num == 20005, "LuaJIT core/library version mismatch") +assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") local jutil = require("jit.util") local vmdef = require("jit.vmdef") local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo @@ -99,7 +99,7 @@ end local function dump_trace(what, tr, func, pc, otr, oex) if what == "start" then startloc = fmtfunc(func, pc) - startex = otr and "("..otr.."/"..oex..") " or "" + startex = otr and "("..otr.."/"..(oex == -1 and "stitch" or oex)..") " or "" else if what == "abort" then local loc = fmtfunc(func, pc) @@ -116,6 +116,9 @@ local function dump_trace(what, tr, func, pc, otr, oex) if ltype == "interpreter" then out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n", tr, startex, startloc)) + elseif ltype == "stitch" then + out:write(format("[TRACE %3s %s%s %s %s]\n", + tr, startex, startloc, ltype, fmtfunc(func, pc))) elseif link == tr or link == 0 then out:write(format("[TRACE %3s %s%s %s]\n", tr, startex, startloc, ltype)) @@ -159,9 +162,9 @@ local function dumpon(outfile) end -- Public module functions. -module(...) - -on = dumpon -off = dumpoff -start = dumpon -- For -j command line option. +return { + on = dumpon, + off = dumpoff, + start = dumpon -- For -j command line option. +} diff --git a/src/jit/zone.lua b/src/jit/zone.lua new file mode 100644 index 00000000..1308cb74 --- /dev/null +++ b/src/jit/zone.lua @@ -0,0 +1,45 @@ +---------------------------------------------------------------------------- +-- LuaJIT profiler zones. +-- +-- Copyright (C) 2005-2022 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- +-- This module implements a simple hierarchical zone model. +-- +-- Example usage: +-- +-- local zone = require("jit.zone") +-- zone("AI") +-- ... +-- zone("A*") +-- ... +-- print(zone:get()) --> "A*" +-- ... +-- zone() +-- ... +-- print(zone:get()) --> "AI" +-- ... +-- zone() +-- +---------------------------------------------------------------------------- + +local remove = table.remove + +return setmetatable({ + flush = function(t) + for i=#t,1,-1 do t[i] = nil end + end, + get = function(t) + return t[#t] + end +}, { + __call = function(t, zone) + if zone then + t[#t+1] = zone + else + return (assert(remove(t), "empty zone stack")) + end + end +}) + diff --git a/src/lauxlib.h b/src/lauxlib.h index fed1491b..a44f0272 100644 --- a/src/lauxlib.h +++ b/src/lauxlib.h @@ -15,9 +15,6 @@ #include "lua.h" -#define luaL_getn(L,i) ((int)lua_objlen(L, i)) -#define luaL_setn(L,i,j) ((void)0) /* no op! */ - /* extra error code for `luaL_load' */ #define LUA_ERRFILE (LUA_ERRERR+1) @@ -58,6 +55,10 @@ LUALIB_API int (luaL_error) (lua_State *L, const char *fmt, ...); LUALIB_API int (luaL_checkoption) (lua_State *L, int narg, const char *def, const char *const lst[]); +/* pre-defined references */ +#define LUA_NOREF (-2) +#define LUA_REFNIL (-1) + LUALIB_API int (luaL_ref) (lua_State *L, int t); LUALIB_API void (luaL_unref) (lua_State *L, int t, int ref); @@ -84,6 +85,11 @@ LUALIB_API int (luaL_loadbufferx) (lua_State *L, const char *buff, size_t sz, const char *name, const char *mode); LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, int level); +LUALIB_API void (luaL_setfuncs) (lua_State *L, const luaL_Reg *l, int nup); +LUALIB_API void (luaL_pushmodule) (lua_State *L, const char *modname, + int sizehint); +LUALIB_API void *(luaL_testudata) (lua_State *L, int ud, const char *tname); +LUALIB_API void (luaL_setmetatable) (lua_State *L, const char *tname); /* @@ -113,6 +119,11 @@ LUALIB_API void luaL_traceback (lua_State *L, lua_State *L1, const char *msg, #define luaL_opt(L,f,n,d) (lua_isnoneornil(L,(n)) ? (d) : f(L,(n))) +/* From Lua 5.2. */ +#define luaL_newlibtable(L, l) \ + lua_createtable(L, 0, sizeof(l)/sizeof((l)[0]) - 1) +#define luaL_newlib(L, l) (luaL_newlibtable(L, l), luaL_setfuncs(L, l, 0)) + /* ** {====================================================== ** Generic Buffer manipulation @@ -147,21 +158,4 @@ LUALIB_API void (luaL_pushresult) (luaL_Buffer *B); /* }====================================================== */ - -/* compatibility with ref system */ - -/* pre-defined references */ -#define LUA_NOREF (-2) -#define LUA_REFNIL (-1) - -#define lua_ref(L,lock) ((lock) ? luaL_ref(L, LUA_REGISTRYINDEX) : \ - (lua_pushstring(L, "unlocked references are obsolete"), lua_error(L), 0)) - -#define lua_unref(L,ref) luaL_unref(L, LUA_REGISTRYINDEX, (ref)) - -#define lua_getref(L,ref) lua_rawgeti(L, LUA_REGISTRYINDEX, (ref)) - - -#define luaL_reg luaL_Reg - #endif diff --git a/src/lib_aux.c b/src/lib_aux.c index 14dd57e3..b8e56436 100644 --- a/src/lib_aux.c +++ b/src/lib_aux.c @@ -107,38 +107,36 @@ LUALIB_API const char *luaL_findtable(lua_State *L, int idx, static int libsize(const luaL_Reg *l) { int size = 0; - for (; l->name; l++) size++; + for (; l && l->name; l++) size++; return size; } +LUALIB_API void luaL_pushmodule(lua_State *L, const char *modname, int sizehint) +{ + luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16); + lua_getfield(L, -1, modname); + if (!lua_istable(L, -1)) { + lua_pop(L, 1); + if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, sizehint) != NULL) + lj_err_callerv(L, LJ_ERR_BADMODN, modname); + lua_pushvalue(L, -1); + lua_setfield(L, -3, modname); /* _LOADED[modname] = new table. */ + } + lua_remove(L, -2); /* Remove _LOADED table. */ +} + LUALIB_API void luaL_openlib(lua_State *L, const char *libname, const luaL_Reg *l, int nup) { lj_lib_checkfpu(L); if (libname) { - int size = libsize(l); - /* check whether lib already exists */ - luaL_findtable(L, LUA_REGISTRYINDEX, "_LOADED", 16); - lua_getfield(L, -1, libname); /* get _LOADED[libname] */ - if (!lua_istable(L, -1)) { /* not found? */ - lua_pop(L, 1); /* remove previous result */ - /* try global variable (and create one if it does not exist) */ - if (luaL_findtable(L, LUA_GLOBALSINDEX, libname, size) != NULL) - lj_err_callerv(L, LJ_ERR_BADMODN, libname); - lua_pushvalue(L, -1); - lua_setfield(L, -3, libname); /* _LOADED[libname] = new table */ - } - lua_remove(L, -2); /* remove _LOADED table */ - lua_insert(L, -(nup+1)); /* move library table to below upvalues */ + luaL_pushmodule(L, libname, libsize(l)); + lua_insert(L, -(nup + 1)); /* Move module table below upvalues. */ } - for (; l->name; l++) { - int i; - for (i = 0; i < nup; i++) /* copy upvalues to the top */ - lua_pushvalue(L, -nup); - lua_pushcclosure(L, l->func, nup); - lua_setfield(L, -(nup+2), l->name); - } - lua_pop(L, nup); /* remove upvalues */ + if (l) + luaL_setfuncs(L, l, nup); + else + lua_pop(L, nup); /* Remove upvalues. */ } LUALIB_API void luaL_register(lua_State *L, const char *libname, @@ -147,6 +145,19 @@ LUALIB_API void luaL_register(lua_State *L, const char *libname, luaL_openlib(L, libname, l, 0); } +LUALIB_API void luaL_setfuncs(lua_State *L, const luaL_Reg *l, int nup) +{ + luaL_checkstack(L, nup, "too many upvalues"); + for (; l->name; l++) { + int i; + for (i = 0; i < nup; i++) /* Copy upvalues to the top. */ + lua_pushvalue(L, -nup); + lua_pushcclosure(L, l->func, nup); + lua_setfield(L, -(nup + 2), l->name); + } + lua_pop(L, nup); /* Remove upvalues. */ +} + LUALIB_API const char *luaL_gsub(lua_State *L, const char *s, const char *p, const char *r) { @@ -207,8 +218,15 @@ LUALIB_API char *luaL_prepbuffer(luaL_Buffer *B) LUALIB_API void luaL_addlstring(luaL_Buffer *B, const char *s, size_t l) { - while (l--) - luaL_addchar(B, *s++); + if (l <= bufffree(B)) { + memcpy(B->p, s, l); + B->p += l; + } else { + emptybuffer(B); + lua_pushlstring(B->L, s, l); + B->lvl++; + adjuststack(B); + } } LUALIB_API void luaL_addstring(luaL_Buffer *B, const char *s) @@ -302,7 +320,7 @@ static int panic(lua_State *L) #ifdef LUAJIT_USE_SYSMALLOC -#if LJ_64 && !defined(LUAJIT_USE_VALGRIND) +#if LJ_64 && !LJ_GC64 && !defined(LUAJIT_USE_VALGRIND) #error "Must use builtin allocator for 64 bit target" #endif @@ -327,23 +345,19 @@ LUALIB_API lua_State *luaL_newstate(void) #else -#include "lj_alloc.h" - LUALIB_API lua_State *luaL_newstate(void) { lua_State *L; - void *ud = lj_alloc_create(); - if (ud == NULL) return NULL; -#if LJ_64 - L = lj_state_newstate(lj_alloc_f, ud); +#if LJ_64 && !LJ_GC64 + L = lj_state_newstate(LJ_ALLOCF_INTERNAL, NULL); #else - L = lua_newstate(lj_alloc_f, ud); + L = lua_newstate(LJ_ALLOCF_INTERNAL, NULL); #endif if (L) G(L)->panic = panic; return L; } -#if LJ_64 +#if LJ_64 && !LJ_GC64 LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) { UNUSED(f); UNUSED(ud); diff --git a/src/lib_base.c b/src/lib_base.c index 6c96e8d5..98ec67c7 100644 --- a/src/lib_base.c +++ b/src/lib_base.c @@ -19,10 +19,12 @@ #include "lj_gc.h" #include "lj_err.h" #include "lj_debug.h" +#include "lj_buf.h" #include "lj_str.h" #include "lj_tab.h" #include "lj_meta.h" #include "lj_state.h" +#include "lj_frame.h" #if LJ_HASFFI #include "lj_ctype.h" #include "lj_cconv.h" @@ -32,6 +34,7 @@ #include "lj_dispatch.h" #include "lj_char.h" #include "lj_strscan.h" +#include "lj_strfmt.h" #include "lj_lib.h" /* -- Base library: checks ------------------------------------------------ */ @@ -40,13 +43,13 @@ LJLIB_ASM(assert) LJLIB_REC(.) { - GCstr *s; lj_lib_checkany(L, 1); - s = lj_lib_optstr(L, 2); - if (s) - lj_err_callermsg(L, strdata(s)); - else + if (L->top == L->base+1) lj_err_caller(L, LJ_ERR_ASSERT); + else if (tvisstr(L->base+1) || tvisnumber(L->base+1)) + lj_err_callermsg(L, strdata(lj_lib_checkstr(L, 2))); + else + lj_err_run(L); return FFH_UNREACHABLE; } @@ -73,9 +76,10 @@ LJLIB_ASM_(type) LJLIB_REC(.) /* This solves a circular dependency problem -- change FF_next_N as needed. */ LJ_STATIC_ASSERT((int)FF_next == FF_next_N); -LJLIB_ASM(next) +LJLIB_ASM(next) LJLIB_REC(.) { lj_lib_checktab(L, 1); + lj_err_msg(L, LJ_ERR_NEXTIDX); return FFH_UNREACHABLE; } @@ -86,10 +90,11 @@ static int ffh_pairs(lua_State *L, MMS mm) cTValue *mo = lj_meta_lookup(L, o, mm); if ((LJ_52 || tviscdata(o)) && !tvisnil(mo)) { L->top = o+1; /* Only keep one argument. */ - copyTV(L, L->base-1, mo); /* Replace callable. */ + copyTV(L, L->base-1-LJ_FR2, mo); /* Replace callable. */ return FFH_TAILCALL; } else { if (!tvistab(o)) lj_err_argt(L, 1, LUA_TTABLE); + if (LJ_FR2) { copyTV(L, o-1, o); o--; } setfuncV(L, o-1, funcV(lj_lib_upvalue(L, 1))); if (mm == MM_pairs) setnilV(o+1); else setintV(o+1, 0); return FFH_RES(3); @@ -100,7 +105,7 @@ static int ffh_pairs(lua_State *L, MMS mm) #endif LJLIB_PUSH(lastcl) -LJLIB_ASM(pairs) +LJLIB_ASM(pairs) LJLIB_REC(xpairs 0) { return ffh_pairs(L, MM_pairs); } @@ -113,7 +118,7 @@ LJLIB_NOREGUV LJLIB_ASM(ipairs_aux) LJLIB_REC(.) } LJLIB_PUSH(lastcl) -LJLIB_ASM(ipairs) LJLIB_REC(.) +LJLIB_ASM(ipairs) LJLIB_REC(xpairs 1) { return ffh_pairs(L, MM_ipairs); } @@ -131,11 +136,11 @@ LJLIB_ASM(setmetatable) LJLIB_REC(.) lj_err_caller(L, LJ_ERR_PROTMT); setgcref(t->metatable, obj2gco(mt)); if (mt) { lj_gc_objbarriert(L, t, mt); } - settabV(L, L->base-1, t); + settabV(L, L->base-1-LJ_FR2, t); return FFH_RES(1); } -LJLIB_CF(getfenv) +LJLIB_CF(getfenv) LJLIB_REC(.) { GCfunc *fn; cTValue *o = L->base; @@ -144,6 +149,7 @@ LJLIB_CF(getfenv) o = lj_debug_frame(L, level, &level); if (o == NULL) lj_err_arg(L, 1, LJ_ERR_INVLVL); + if (LJ_FR2) o--; } fn = &gcval(o)->fn; settabV(L, L->top++, isluafunc(fn) ? tabref(fn->l.env) : tabref(L->env)); @@ -165,6 +171,7 @@ LJLIB_CF(setfenv) o = lj_debug_frame(L, level, &level); if (o == NULL) lj_err_arg(L, 1, LJ_ERR_INVLVL); + if (LJ_FR2) o--; } fn = &gcval(o)->fn; if (!isluafunc(fn)) @@ -259,7 +266,7 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) if (base == 10) { TValue *o = lj_lib_checkany(L, 1); if (lj_strscan_numberobj(o)) { - copyTV(L, L->base-1, o); + copyTV(L, L->base-1-LJ_FR2, o); return FFH_RES(1); } #if LJ_HASFFI @@ -272,11 +279,11 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) ct->size <= 4 && !(ct->size == 4 && (ct->info & CTF_UNSIGNED))) { int32_t i; lj_cconv_ct_tv(cts, ctype_get(cts, CTID_INT32), (uint8_t *)&i, o, 0); - setintV(L->base-1, i); + setintV(L->base-1-LJ_FR2, i); return FFH_RES(1); } lj_cconv_ct_tv(cts, ctype_get(cts, CTID_DOUBLE), - (uint8_t *)&(L->base-1)->n, o, 0); + (uint8_t *)&(L->base-1-LJ_FR2)->n, o, 0); return FFH_RES(1); } } @@ -284,53 +291,46 @@ LJLIB_ASM(tonumber) LJLIB_REC(.) } else { const char *p = strdata(lj_lib_checkstr(L, 1)); char *ep; + unsigned int neg = 0; unsigned long ul; if (base < 2 || base > 36) lj_err_arg(L, 2, LJ_ERR_BASERNG); - ul = strtoul(p, &ep, base); - if (p != ep) { - while (lj_char_isspace((unsigned char)(*ep))) ep++; - if (*ep == '\0') { - if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u)) - setintV(L->base-1, (int32_t)ul); - else - setnumV(L->base-1, (lua_Number)ul); - return FFH_RES(1); + while (lj_char_isspace((unsigned char)(*p))) p++; + if (*p == '-') { p++; neg = 1; } else if (*p == '+') { p++; } + if (lj_char_isalnum((unsigned char)(*p))) { + ul = strtoul(p, &ep, base); + if (p != ep) { + while (lj_char_isspace((unsigned char)(*ep))) ep++; + if (*ep == '\0') { + if (LJ_DUALNUM && LJ_LIKELY(ul < 0x80000000u+neg)) { + if (neg) ul = (unsigned long)-(long)ul; + setintV(L->base-1-LJ_FR2, (int32_t)ul); + } else { + lua_Number n = (lua_Number)ul; + if (neg) n = -n; + setnumV(L->base-1-LJ_FR2, n); + } + return FFH_RES(1); + } } } } - setnilV(L->base-1); + setnilV(L->base-1-LJ_FR2); return FFH_RES(1); } -LJLIB_PUSH("nil") -LJLIB_PUSH("false") -LJLIB_PUSH("true") LJLIB_ASM(tostring) LJLIB_REC(.) { TValue *o = lj_lib_checkany(L, 1); cTValue *mo; L->top = o+1; /* Only keep one argument. */ if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { - copyTV(L, L->base-1, mo); /* Replace callable. */ + copyTV(L, L->base-1-LJ_FR2, mo); /* Replace callable. */ return FFH_TAILCALL; - } else { - GCstr *s; - if (tvisnumber(o)) { - s = lj_str_fromnumber(L, o); - } else if (tvispri(o)) { - s = strV(lj_lib_upvalue(L, -(int32_t)itype(o))); - } else { - if (tvisfunc(o) && isffunc(funcV(o))) - lua_pushfstring(L, "function: builtin#%d", funcV(o)->c.ffid); - else - lua_pushfstring(L, "%s: %p", lj_typename(o), lua_topointer(L, 1)); - /* Note: lua_pushfstring calls the GC which may invalidate o. */ - s = strV(L->top-1); - } - setstrV(L, L->base-1, s); - return FFH_RES(1); } + lj_gc_check(L); + setstrV(L, L->base-1-LJ_FR2, lj_strfmt_obj(L, L->base)); + return FFH_RES(1); } /* -- Base library: throw and catch errors -------------------------------- */ @@ -359,7 +359,7 @@ LJLIB_ASM_(xpcall) LJLIB_REC(.) static int load_aux(lua_State *L, int status, int envarg) { - if (status == 0) { + if (status == LUA_OK) { if (tvistab(L->base+envarg-1)) { GCfunc *fn = funcV(L->top-1); GCtab *t = tabV(L->base+envarg-1); @@ -408,10 +408,22 @@ LJLIB_CF(load) GCstr *name = lj_lib_optstr(L, 2); GCstr *mode = lj_lib_optstr(L, 3); int status; - if (L->base < L->top && (tvisstr(L->base) || tvisnumber(L->base))) { - GCstr *s = lj_lib_checkstr(L, 1); + if (L->base < L->top && + (tvisstr(L->base) || tvisnumber(L->base) || tvisbuf(L->base))) { + const char *s; + MSize len; + if (tvisbuf(L->base)) { + SBufExt *sbx = bufV(L->base); + s = sbx->r; + len = sbufxlen(sbx); + if (!name) name = &G(L)->strempty; /* Buffers are not NUL-terminated. */ + } else { + GCstr *str = lj_lib_checkstr(L, 1); + s = strdata(str); + len = str->len; + } lua_settop(L, 4); /* Ensure env arg exists. */ - status = luaL_loadbufferx(L, strdata(s), s->len, strdata(name ? name : s), + status = luaL_loadbufferx(L, s, len, name ? strdata(name) : s, mode ? strdata(mode) : NULL); } else { lj_lib_checkfunc(L, 1); @@ -432,7 +444,7 @@ LJLIB_CF(dofile) GCstr *fname = lj_lib_optstr(L, 1); setnilV(L->top); L->top = L->base+1; - if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != 0) + if (luaL_loadfile(L, fname ? strdata(fname) : NULL) != LUA_OK) lua_error(L); lua_call(L, 0, LUA_MULTRET); return (int)(L->top - L->base) - 1; @@ -442,20 +454,20 @@ LJLIB_CF(dofile) LJLIB_CF(gcinfo) { - setintV(L->top++, (G(L)->gc.total >> 10)); + setintV(L->top++, (int32_t)(G(L)->gc.total >> 10)); return 1; } LJLIB_CF(collectgarbage) { int opt = lj_lib_checkopt(L, 1, LUA_GCCOLLECT, /* ORDER LUA_GC* */ - "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul"); + "\4stop\7restart\7collect\5count\1\377\4step\10setpause\12setstepmul\1\377\11isrunning"); int32_t data = lj_lib_optint(L, 2, 0); if (opt == LUA_GCCOUNT) { setnumV(L->top, (lua_Number)G(L)->gc.total/1024.0); } else { int res = lua_gc(L, opt, data); - if (opt == LUA_GCSTEP) + if (opt == LUA_GCSTEP || opt == LUA_GCISRUNNING) setboolV(L->top, res); else setintV(L->top, res); @@ -507,23 +519,14 @@ LJLIB_CF(print) tv = L->top-1; } shortcut = (tvisfunc(tv) && funcV(tv)->c.ffid == FF_tostring) && - !gcrefu(basemt_it(G(L), LJ_TNUMX)); + !gcrefu(basemt_it(G(L), LJ_TNUMX)); for (i = 0; i < nargs; i++) { + cTValue *o = &L->base[i]; const char *str; size_t size; - cTValue *o = &L->base[i]; - if (shortcut && tvisstr(o)) { - str = strVdata(o); - size = strV(o)->len; - } else if (shortcut && tvisint(o)) { - char buf[LJ_STR_INTBUF]; - char *p = lj_str_bufint(buf, intV(o)); - size = (size_t)(buf+LJ_STR_INTBUF-p); - str = p; - } else if (shortcut && tvisnum(o)) { - char buf[LJ_STR_NUMBUF]; - size = lj_str_bufnum(buf, o); - str = buf; + MSize len; + if (shortcut && (str = lj_strfmt_wstrnum(L, o, &len)) != NULL) { + size = len; } else { copyTV(L, L->top+1, o); copyTV(L, L->top, L->top-1); @@ -560,8 +563,8 @@ LJLIB_CF(coroutine_status) co = threadV(L->base); if (co == L) s = "running"; else if (co->status == LUA_YIELD) s = "suspended"; - else if (co->status != 0) s = "dead"; - else if (co->base > tvref(co->stack)+1) s = "normal"; + else if (co->status != LUA_OK) s = "dead"; + else if (co->base > tvref(co->stack)+1+LJ_FR2) s = "normal"; else if (co->top == co->base) s = "dead"; else s = "suspended"; lua_pushstring(L, s); @@ -581,6 +584,12 @@ LJLIB_CF(coroutine_running) #endif } +LJLIB_CF(coroutine_isyieldable) +{ + setboolV(L->top++, cframe_canyield(L->cframe)); + return 1; +} + LJLIB_CF(coroutine_create) { lua_State *L1; @@ -600,11 +609,11 @@ LJLIB_ASM(coroutine_yield) static int ffh_resume(lua_State *L, lua_State *co, int wrap) { if (co->cframe != NULL || co->status > LUA_YIELD || - (co->status == 0 && co->top == co->base)) { + (co->status == LUA_OK && co->top == co->base)) { ErrMsg em = co->cframe ? LJ_ERR_CORUN : LJ_ERR_CODEAD; if (wrap) lj_err_caller(L, em); - setboolV(L->base-1, 0); - setstrV(L, L->base, lj_err_str(L, em)); + setboolV(L->base-1-LJ_FR2, 0); + setstrV(L, L->base-LJ_FR2, lj_err_str(L, em)); return FFH_RES(2); } lj_state_growstack(co, (MSize)(L->top - L->base)); @@ -645,9 +654,10 @@ static void setpc_wrap_aux(lua_State *L, GCfunc *fn); LJLIB_CF(coroutine_wrap) { + GCfunc *fn; lj_cf_coroutine_create(L); - lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1); - setpc_wrap_aux(L, funcV(L->top-1)); + fn = lj_lib_pushcc(L, lj_ffh_coroutine_wrap_aux, FF_coroutine_wrap_aux, 1); + setpc_wrap_aux(L, fn); return 1; } diff --git a/src/lib_bit.c b/src/lib_bit.c index 9e75eef3..38c0f578 100644 --- a/src/lib_bit.c +++ b/src/lib_bit.c @@ -12,26 +12,99 @@ #include "lj_obj.h" #include "lj_err.h" -#include "lj_str.h" +#include "lj_buf.h" +#include "lj_strscan.h" +#include "lj_strfmt.h" +#if LJ_HASFFI +#include "lj_ctype.h" +#include "lj_cdata.h" +#include "lj_cconv.h" +#include "lj_carith.h" +#endif +#include "lj_ff.h" #include "lj_lib.h" /* ------------------------------------------------------------------------ */ #define LJLIB_MODULE_bit -LJLIB_ASM(bit_tobit) LJLIB_REC(bit_unary IR_TOBIT) +#if LJ_HASFFI +static int bit_result64(lua_State *L, CTypeID id, uint64_t x) { + GCcdata *cd = lj_cdata_new_(L, id, 8); + *(uint64_t *)cdataptr(cd) = x; + setcdataV(L, L->base-1-LJ_FR2, cd); + return FFH_RES(1); +} +#else +static int32_t bit_checkbit(lua_State *L, int narg) +{ + TValue *o = L->base + narg-1; + if (!(o < L->top && lj_strscan_numberobj(o))) + lj_err_argt(L, narg, LUA_TNUMBER); + if (LJ_LIKELY(tvisint(o))) { + return intV(o); + } else { + int32_t i = lj_num2bit(numV(o)); + if (LJ_DUALNUM) setintV(o, i); + return i; + } +} +#endif + +LJLIB_ASM(bit_tobit) LJLIB_REC(bit_tobit) +{ +#if LJ_HASFFI + CTypeID id = 0; + setintV(L->base-1-LJ_FR2, (int32_t)lj_carith_check64(L, 1, &id)); + return FFH_RES(1); +#else + lj_lib_checknumber(L, 1); + return FFH_RETRY; +#endif +} + +LJLIB_ASM(bit_bnot) LJLIB_REC(bit_unary IR_BNOT) +{ +#if LJ_HASFFI + CTypeID id = 0; + uint64_t x = lj_carith_check64(L, 1, &id); + return id ? bit_result64(L, id, ~x) : FFH_RETRY; +#else lj_lib_checknumber(L, 1); return FFH_RETRY; +#endif +} + +LJLIB_ASM(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP) +{ +#if LJ_HASFFI + CTypeID id = 0; + uint64_t x = lj_carith_check64(L, 1, &id); + return id ? bit_result64(L, id, lj_bswap64(x)) : FFH_RETRY; +#else + lj_lib_checknumber(L, 1); + return FFH_RETRY; +#endif } -LJLIB_ASM_(bit_bnot) LJLIB_REC(bit_unary IR_BNOT) -LJLIB_ASM_(bit_bswap) LJLIB_REC(bit_unary IR_BSWAP) LJLIB_ASM(bit_lshift) LJLIB_REC(bit_shift IR_BSHL) { +#if LJ_HASFFI + CTypeID id = 0, id2 = 0; + uint64_t x = lj_carith_check64(L, 1, &id); + int32_t sh = (int32_t)lj_carith_check64(L, 2, &id2); + if (id) { + x = lj_carith_shift64(x, sh, curr_func(L)->c.ffid - (int)FF_bit_lshift); + return bit_result64(L, id, x); + } + if (id2) setintV(L->base+1, sh); + return FFH_RETRY; +#else lj_lib_checknumber(L, 1); - lj_lib_checkbit(L, 2); + bit_checkbit(L, 2); return FFH_RETRY; +#endif } LJLIB_ASM_(bit_rshift) LJLIB_REC(bit_shift IR_BSHR) LJLIB_ASM_(bit_arshift) LJLIB_REC(bit_shift IR_BSAR) @@ -40,25 +113,58 @@ LJLIB_ASM_(bit_ror) LJLIB_REC(bit_shift IR_BROR) LJLIB_ASM(bit_band) LJLIB_REC(bit_nary IR_BAND) { +#if LJ_HASFFI + CTypeID id = 0; + TValue *o = L->base, *top = L->top; + int i = 0; + do { lj_carith_check64(L, ++i, &id); } while (++o < top); + if (id) { + CTState *cts = ctype_cts(L); + CType *ct = ctype_get(cts, id); + int op = curr_func(L)->c.ffid - (int)FF_bit_bor; + uint64_t x, y = op >= 0 ? 0 : ~(uint64_t)0; + o = L->base; + do { + lj_cconv_ct_tv(cts, ct, (uint8_t *)&x, o, 0); + if (op < 0) y &= x; else if (op == 0) y |= x; else y ^= x; + } while (++o < top); + return bit_result64(L, id, y); + } + return FFH_RETRY; +#else int i = 0; do { lj_lib_checknumber(L, ++i); } while (L->base+i < L->top); return FFH_RETRY; +#endif } LJLIB_ASM_(bit_bor) LJLIB_REC(bit_nary IR_BOR) LJLIB_ASM_(bit_bxor) LJLIB_REC(bit_nary IR_BXOR) /* ------------------------------------------------------------------------ */ -LJLIB_CF(bit_tohex) +LJLIB_CF(bit_tohex) LJLIB_REC(.) { - uint32_t b = (uint32_t)lj_lib_checkbit(L, 1); - int32_t i, n = L->base+1 >= L->top ? 8 : lj_lib_checkbit(L, 2); - const char *hexdigits = "0123456789abcdef"; - char buf[8]; - if (n < 0) { n = -n; hexdigits = "0123456789ABCDEF"; } - if (n > 8) n = 8; - for (i = n; --i >= 0; ) { buf[i] = hexdigits[b & 15]; b >>= 4; } - lua_pushlstring(L, buf, (size_t)n); +#if LJ_HASFFI + CTypeID id = 0, id2 = 0; + uint64_t b = lj_carith_check64(L, 1, &id); + int32_t n = L->base+1>=L->top ? (id ? 16 : 8) : + (int32_t)lj_carith_check64(L, 2, &id2); +#else + uint32_t b = (uint32_t)bit_checkbit(L, 1); + int32_t n = L->base+1>=L->top ? 8 : bit_checkbit(L, 2); +#endif + SBuf *sb = lj_buf_tmp_(L); + SFormat sf = (STRFMT_UINT|STRFMT_T_HEX); + if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; } + sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC); +#if LJ_HASFFI + if (n < 16) b &= ((uint64_t)1 << 4*n)-1; +#else + if (n < 8) b &= (1u << 4*n)-1; +#endif + sb = lj_strfmt_putfxint(sb, sf, b); + setstrV(L, L->top-1, lj_buf_str(L, sb)); + lj_gc_check(L); return 1; } diff --git a/src/lib_buffer.c b/src/lib_buffer.c new file mode 100644 index 00000000..d6ff1346 --- /dev/null +++ b/src/lib_buffer.c @@ -0,0 +1,360 @@ +/* +** Buffer library. +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lib_buffer_c +#define LUA_LIB + +#include "lua.h" +#include "lauxlib.h" +#include "lualib.h" + +#include "lj_obj.h" + +#if LJ_HASBUFFER +#include "lj_gc.h" +#include "lj_err.h" +#include "lj_buf.h" +#include "lj_str.h" +#include "lj_tab.h" +#include "lj_udata.h" +#include "lj_meta.h" +#if LJ_HASFFI +#include "lj_ctype.h" +#include "lj_cdata.h" +#include "lj_cconv.h" +#endif +#include "lj_strfmt.h" +#include "lj_serialize.h" +#include "lj_lib.h" + +/* -- Helper functions ---------------------------------------------------- */ + +/* Check that the first argument is a string buffer. */ +static SBufExt *buffer_tobuf(lua_State *L) +{ + if (!(L->base < L->top && tvisbuf(L->base))) + lj_err_argtype(L, 1, "buffer"); + return bufV(L->base); +} + +/* Ditto, but for writers. */ +static LJ_AINLINE SBufExt *buffer_tobufw(lua_State *L) +{ + SBufExt *sbx = buffer_tobuf(L); + setsbufXL_(sbx, L); + return sbx; +} + +#define buffer_toudata(sbx) ((GCudata *)(sbx)-1) + +/* -- Buffer methods ------------------------------------------------------ */ + +#define LJLIB_MODULE_buffer_method + +LJLIB_CF(buffer_method_free) +{ + SBufExt *sbx = buffer_tobuf(L); + lj_bufx_free(L, sbx); + L->top = L->base+1; /* Chain buffer object. */ + return 1; +} + +LJLIB_CF(buffer_method_reset) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobuf(L); + lj_bufx_reset(sbx); + L->top = L->base+1; /* Chain buffer object. */ + return 1; +} + +LJLIB_CF(buffer_method_skip) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobuf(L); + MSize n = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF); + MSize len = sbufxlen(sbx); + if (n < len) { + sbx->r += n; + } else if (sbufiscow(sbx)) { + sbx->r = sbx->w; + } else { + sbx->r = sbx->w = sbx->b; + } + L->top = L->base+1; /* Chain buffer object. */ + return 1; +} + +LJLIB_CF(buffer_method_set) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobuf(L); + GCobj *ref; + const char *p; + MSize len; +#if LJ_HASFFI + if (tviscdata(L->base+1)) { + CTState *cts = ctype_cts(L); + lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p, + L->base+1, CCF_ARG(2)); + len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF); + } else +#endif + { + GCstr *str = lj_lib_checkstrx(L, 2); + p = strdata(str); + len = str->len; + } + lj_bufx_free(L, sbx); + lj_bufx_set_cow(L, sbx, p, len); + ref = gcV(L->base+1); + setgcref(sbx->cowref, ref); + lj_gc_objbarrier(L, buffer_toudata(sbx), ref); + L->top = L->base+1; /* Chain buffer object. */ + return 1; +} + +LJLIB_CF(buffer_method_put) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobufw(L); + ptrdiff_t arg, narg = L->top - L->base; + for (arg = 1; arg < narg; arg++) { + cTValue *o = &L->base[arg], *mo = NULL; + retry: + if (tvisstr(o)) { + lj_buf_putstr((SBuf *)sbx, strV(o)); + } else if (tvisint(o)) { + lj_strfmt_putint((SBuf *)sbx, intV(o)); + } else if (tvisnum(o)) { + lj_strfmt_putfnum((SBuf *)sbx, STRFMT_G14, numV(o)); + } else if (tvisbuf(o)) { + SBufExt *sbx2 = bufV(o); + if (sbx2 == sbx) lj_err_arg(L, (int)(arg+1), LJ_ERR_BUFFER_SELF); + lj_buf_putmem((SBuf *)sbx, sbx2->r, sbufxlen(sbx2)); + } else if (!mo && !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { + /* Call __tostring metamethod inline. */ + copyTV(L, L->top++, mo); + copyTV(L, L->top++, o); + lua_call(L, 1, 1); + o = &L->base[arg]; /* The stack may have been reallocated. */ + copyTV(L, &L->base[arg], L->top-1); + L->top = L->base + narg; + goto retry; /* Retry with the result. */ + } else { + lj_err_argtype(L, (int)(arg+1), "string/number/__tostring"); + } + /* Probably not useful to inline other __tostring MMs, e.g. FFI numbers. */ + } + L->top = L->base+1; /* Chain buffer object. */ + lj_gc_check(L); + return 1; +} + +LJLIB_CF(buffer_method_putf) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobufw(L); + lj_strfmt_putarg(L, (SBuf *)sbx, 2, 2); + L->top = L->base+1; /* Chain buffer object. */ + lj_gc_check(L); + return 1; +} + +LJLIB_CF(buffer_method_get) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobuf(L); + ptrdiff_t arg, narg = L->top - L->base; + if (narg == 1) { + narg++; + setnilV(L->top++); /* get() is the same as get(nil). */ + } + for (arg = 1; arg < narg; arg++) { + TValue *o = &L->base[arg]; + MSize n = tvisnil(o) ? LJ_MAX_BUF : + (MSize) lj_lib_checkintrange(L, (int)(arg+1), 0, LJ_MAX_BUF); + MSize len = sbufxlen(sbx); + if (n > len) n = len; + setstrV(L, o, lj_str_new(L, sbx->r, n)); + sbx->r += n; + } + if (sbx->r == sbx->w && !sbufiscow(sbx)) sbx->r = sbx->w = sbx->b; + lj_gc_check(L); + return (int)(narg-1); +} + +#if LJ_HASFFI +LJLIB_CF(buffer_method_putcdata) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobufw(L); + const char *p; + MSize len; + if (tviscdata(L->base+1)) { + CTState *cts = ctype_cts(L); + lj_cconv_ct_tv(cts, ctype_get(cts, CTID_P_CVOID), (uint8_t *)&p, + L->base+1, CCF_ARG(2)); + } else { + lj_err_argtype(L, 2, "cdata"); + } + len = (MSize)lj_lib_checkintrange(L, 3, 0, LJ_MAX_BUF); + lj_buf_putmem((SBuf *)sbx, p, len); + L->top = L->base+1; /* Chain buffer object. */ + return 1; +} + +LJLIB_CF(buffer_method_reserve) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobufw(L); + MSize sz = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF); + GCcdata *cd; + lj_buf_more((SBuf *)sbx, sz); + ctype_loadffi(L); + cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR); + *(void **)cdataptr(cd) = sbx->w; + setcdataV(L, L->top++, cd); + setintV(L->top++, sbufleft(sbx)); + return 2; +} + +LJLIB_CF(buffer_method_commit) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobuf(L); + MSize len = (MSize)lj_lib_checkintrange(L, 2, 0, LJ_MAX_BUF); + if (len > sbufleft(sbx)) lj_err_arg(L, 2, LJ_ERR_NUMRNG); + sbx->w += len; + L->top = L->base+1; /* Chain buffer object. */ + return 1; +} + +LJLIB_CF(buffer_method_ref) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobuf(L); + GCcdata *cd; + ctype_loadffi(L); + cd = lj_cdata_new_(L, CTID_P_UINT8, CTSIZE_PTR); + *(void **)cdataptr(cd) = sbx->r; + setcdataV(L, L->top++, cd); + setintV(L->top++, sbufxlen(sbx)); + return 2; +} +#endif + +LJLIB_CF(buffer_method_encode) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobufw(L); + cTValue *o = lj_lib_checkany(L, 2); + lj_serialize_put(sbx, o); + lj_gc_check(L); + L->top = L->base+1; /* Chain buffer object. */ + return 1; +} + +LJLIB_CF(buffer_method_decode) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobufw(L); + setnilV(L->top++); + sbx->r = lj_serialize_get(sbx, L->top-1); + lj_gc_check(L); + return 1; +} + +LJLIB_CF(buffer_method___gc) +{ + SBufExt *sbx = buffer_tobuf(L); + lj_bufx_free(L, sbx); + return 0; +} + +LJLIB_CF(buffer_method___tostring) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobuf(L); + setstrV(L, L->top-1, lj_str_new(L, sbx->r, sbufxlen(sbx))); + lj_gc_check(L); + return 1; +} + +LJLIB_CF(buffer_method___len) LJLIB_REC(.) +{ + SBufExt *sbx = buffer_tobuf(L); + setintV(L->top-1, (int32_t)sbufxlen(sbx)); + return 1; +} + +LJLIB_PUSH("buffer") LJLIB_SET(__metatable) +LJLIB_PUSH(top-1) LJLIB_SET(__index) + +/* -- Buffer library functions -------------------------------------------- */ + +#define LJLIB_MODULE_buffer + +LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */ + +LJLIB_CF(buffer_new) +{ + MSize sz = 0; + int targ = 1; + GCtab *env, *dict_str = NULL, *dict_mt = NULL; + GCudata *ud; + SBufExt *sbx; + if (L->base < L->top && !tvistab(L->base)) { + targ = 2; + if (!tvisnil(L->base)) + sz = (MSize)lj_lib_checkintrange(L, 1, 0, LJ_MAX_BUF); + } + if (L->base+targ-1 < L->top) { + GCtab *options = lj_lib_checktab(L, targ); + cTValue *opt_dict, *opt_mt; + opt_dict = lj_tab_getstr(options, lj_str_newlit(L, "dict")); + if (opt_dict && tvistab(opt_dict)) { + dict_str = tabV(opt_dict); + lj_serialize_dict_prep_str(L, dict_str); + } + opt_mt = lj_tab_getstr(options, lj_str_newlit(L, "metatable")); + if (opt_mt && tvistab(opt_mt)) { + dict_mt = tabV(opt_mt); + lj_serialize_dict_prep_mt(L, dict_mt); + } + } + env = tabref(curr_func(L)->c.env); + ud = lj_udata_new(L, sizeof(SBufExt), env); + ud->udtype = UDTYPE_BUFFER; + /* NOBARRIER: The GCudata is new (marked white). */ + setgcref(ud->metatable, obj2gco(env)); + setudataV(L, L->top++, ud); + sbx = (SBufExt *)uddata(ud); + lj_bufx_init(L, sbx); + setgcref(sbx->dict_str, obj2gco(dict_str)); + setgcref(sbx->dict_mt, obj2gco(dict_mt)); + if (sz > 0) lj_buf_need2((SBuf *)sbx, sz); + lj_gc_check(L); + return 1; +} + +LJLIB_CF(buffer_encode) LJLIB_REC(.) +{ + cTValue *o = lj_lib_checkany(L, 1); + setstrV(L, L->top++, lj_serialize_encode(L, o)); + lj_gc_check(L); + return 1; +} + +LJLIB_CF(buffer_decode) LJLIB_REC(.) +{ + GCstr *str = lj_lib_checkstrx(L, 1); + setnilV(L->top++); + lj_serialize_decode(L, L->top-1, str); + lj_gc_check(L); + return 1; +} + +/* ------------------------------------------------------------------------ */ + +#include "lj_libdef.h" + +int luaopen_string_buffer(lua_State *L) +{ + LJ_LIB_REG(L, NULL, buffer_method); + lua_getfield(L, -1, "__tostring"); + lua_setfield(L, -2, "tostring"); + LJ_LIB_REG(L, NULL, buffer); + return 1; +} + +#endif diff --git a/src/lib_debug.c b/src/lib_debug.c index e7d8d24a..3af7a353 100644 --- a/src/lib_debug.c +++ b/src/lib_debug.c @@ -29,7 +29,7 @@ LJLIB_CF(debug_getregistry) return 1; } -LJLIB_CF(debug_getmetatable) +LJLIB_CF(debug_getmetatable) LJLIB_REC(.) { lj_lib_checkany(L, 1); if (!lua_getmetatable(L, 1)) { @@ -231,8 +231,8 @@ LJLIB_CF(debug_upvalueid) int32_t n = lj_lib_checkint(L, 2) - 1; if ((uint32_t)n >= fn->l.nupvalues) lj_err_arg(L, 2, LJ_ERR_IDXRNG); - setlightudV(L->top-1, isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) : - (void *)&fn->c.upvalue[n]); + lua_pushlightuserdata(L, isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) : + (void *)&fn->c.upvalue[n]); return 1; } @@ -283,13 +283,13 @@ LJLIB_CF(debug_setuservalue) /* ------------------------------------------------------------------------ */ -static const char KEY_HOOK = 'h'; +#define KEY_HOOK (U64x(80000000,00000000)|'h') static void hookf(lua_State *L, lua_Debug *ar) { static const char *const hooknames[] = {"call", "return", "line", "count", "tail return"}; - lua_pushlightuserdata(L, (void *)&KEY_HOOK); + (L->top++)->u64 = KEY_HOOK; lua_rawget(L, LUA_REGISTRYINDEX); if (lua_isfunction(L, -1)) { lua_pushstring(L, hooknames[(int)ar->event]); @@ -334,7 +334,7 @@ LJLIB_CF(debug_sethook) count = luaL_optint(L, arg+3, 0); func = hookf; mask = makemask(smask, count); } - lua_pushlightuserdata(L, (void *)&KEY_HOOK); + (L->top++)->u64 = KEY_HOOK; lua_pushvalue(L, arg+1); lua_rawset(L, LUA_REGISTRYINDEX); lua_sethook(L, func, mask, count); @@ -349,7 +349,7 @@ LJLIB_CF(debug_gethook) if (hook != NULL && hook != hookf) { /* external hook? */ lua_pushliteral(L, "external hook"); } else { - lua_pushlightuserdata(L, (void *)&KEY_HOOK); + (L->top++)->u64 = KEY_HOOK; lua_rawget(L, LUA_REGISTRYINDEX); /* get hook */ } lua_pushstring(L, unmakemask(mask, buff)); diff --git a/src/lib_ffi.c b/src/lib_ffi.c index 654e71a2..2295cf15 100644 --- a/src/lib_ffi.c +++ b/src/lib_ffi.c @@ -29,6 +29,7 @@ #include "lj_ccall.h" #include "lj_ccallback.h" #include "lj_clib.h" +#include "lj_strfmt.h" #include "lj_ff.h" #include "lj_lib.h" @@ -137,7 +138,7 @@ static int ffi_index_meta(lua_State *L, CTState *cts, CType *ct, MMS mm) } } copyTV(L, base, L->top); - tv = L->top-1; + tv = L->top-1-LJ_FR2; } return lj_meta_tailcall(L, tv); } @@ -318,7 +319,7 @@ LJLIB_CF(ffi_meta___tostring) } } } - lj_str_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p); + lj_strfmt_pushf(L, msg, strdata(lj_ctype_repr(L, id, NULL)), p); checkgc: lj_gc_check(L); return 1; @@ -504,10 +505,7 @@ LJLIB_CF(ffi_new) LJLIB_REC(.) } if (sz == CTSIZE_INVALID) lj_err_arg(L, 1, LJ_ERR_FFI_INVSIZE); - if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN) - cd = lj_cdata_new(cts, id, sz); - else - cd = lj_cdata_newv(cts, id, sz, ctype_align(info)); + cd = lj_cdata_newx(cts, id, sz, info); setcdataV(L, o-1, cd); /* Anchor the uninitialized cdata. */ lj_cconv_ct_init(cts, ct, sz, cdataptr(cd), o, (MSize)(L->top - o)); /* Initialize cdata. */ @@ -558,6 +556,32 @@ LJLIB_CF(ffi_typeof) LJLIB_REC(.) return 1; } +/* Internal and unsupported API. */ +LJLIB_CF(ffi_typeinfo) +{ + CTState *cts = ctype_cts(L); + CTypeID id = (CTypeID)ffi_checkint(L, 1); + if (id > 0 && id < cts->top) { + CType *ct = ctype_get(cts, id); + GCtab *t; + lua_createtable(L, 0, 4); /* Increment hash size if fields are added. */ + t = tabV(L->top-1); + setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "info")), (int32_t)ct->info); + if (ct->size != CTSIZE_INVALID) + setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "size")), (int32_t)ct->size); + if (ct->sib) + setintV(lj_tab_setstr(L, t, lj_str_newlit(L, "sib")), (int32_t)ct->sib); + if (gcref(ct->name)) { + GCstr *s = gco2str(gcref(ct->name)); + if (isdead(G(L), obj2gco(s))) flipwhite(obj2gco(s)); + setstrV(L, lj_tab_setstr(L, t, lj_str_newlit(L, "name")), s); + } + lj_gc_check(L); + return 1; + } + return 0; +} + LJLIB_CF(ffi_istype) LJLIB_REC(.) { CTState *cts = ctype_cts(L); @@ -697,44 +721,47 @@ LJLIB_CF(ffi_fill) LJLIB_REC(.) return 0; } -#define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be) - /* Test ABI string. */ LJLIB_CF(ffi_abi) LJLIB_REC(.) { GCstr *s = lj_lib_checkstr(L, 1); - int b = 0; - switch (s->hash) { + int b = lj_cparse_case(s, #if LJ_64 - case H_(849858eb,ad35fd06): b = 1; break; /* 64bit */ + "\00564bit" #else - case H_(662d3c79,d0e22477): b = 1; break; /* 32bit */ + "\00532bit" #endif #if LJ_ARCH_HASFPU - case H_(e33ee463,e33ee463): b = 1; break; /* fpu */ + "\003fpu" #endif #if LJ_ABI_SOFTFP - case H_(61211a23,c2e8c81c): b = 1; break; /* softfp */ + "\006softfp" #else - case H_(539417a8,8ce0812f): b = 1; break; /* hardfp */ + "\006hardfp" #endif #if LJ_ABI_EABI - case H_(2182df8f,f2ed1152): b = 1; break; /* eabi */ + "\004eabi" #endif #if LJ_ABI_WIN - case H_(4ab624a8,4ab624a8): b = 1; break; /* win */ + "\003win" #endif - case H_(3af93066,1f001464): b = 1; break; /* le/be */ - default: - break; - } +#if LJ_TARGET_UWP + "\003uwp" +#endif +#if LJ_LE + "\002le" +#else + "\002be" +#endif +#if LJ_GC64 + "\004gc64" +#endif + ) >= 0; setboolV(L->top-1, b); setboolV(&G(L)->tmptv2, b); /* Remember for trace recorder. */ return 1; } -#undef H_ - LJLIB_PUSH(top-8) LJLIB_SET(!) /* Store reference to miscmap table. */ LJLIB_CF(ffi_metatype) @@ -768,19 +795,11 @@ LJLIB_CF(ffi_gc) LJLIB_REC(.) GCcdata *cd = ffi_checkcdata(L, 1); TValue *fin = lj_lib_checkany(L, 2); CTState *cts = ctype_cts(L); - GCtab *t = cts->finalizer; CType *ct = ctype_raw(cts, cd->ctypeid); if (!(ctype_isptr(ct->info) || ctype_isstruct(ct->info) || ctype_isrefarray(ct->info))) lj_err_arg(L, 1, LJ_ERR_FFI_INVTYPE); - if (gcref(t->metatable)) { /* Update finalizer table, if still enabled. */ - copyTV(L, lj_tab_set(L, t, L->base), fin); - lj_gc_anybarriert(L, t); - if (!tvisnil(fin)) - cd->marked |= LJ_GC_CDATA_FIN; - else - cd->marked &= ~LJ_GC_CDATA_FIN; - } + lj_cdata_setfin(L, cd, gcval(fin), itype(fin)); L->top = L->base+1; /* Pass through the cdata object. */ return 1; } diff --git a/src/lib_io.c b/src/lib_io.c index d5786e5d..c22faa24 100644 --- a/src/lib_io.c +++ b/src/lib_io.c @@ -19,8 +19,10 @@ #include "lj_obj.h" #include "lj_gc.h" #include "lj_err.h" +#include "lj_buf.h" #include "lj_str.h" #include "lj_state.h" +#include "lj_strfmt.h" #include "lj_ff.h" #include "lj_lib.h" @@ -84,7 +86,7 @@ static IOFileUD *io_file_open(lua_State *L, const char *mode) IOFileUD *iof = io_file_new(L); iof->fp = fopen(fname, mode); if (iof->fp == NULL) - luaL_argerror(L, 1, lj_str_pushf(L, "%s: %s", fname, strerror(errno))); + luaL_argerror(L, 1, lj_strfmt_pushf(L, "%s: %s", fname, strerror(errno))); return iof; } @@ -97,11 +99,8 @@ static int io_file_close(lua_State *L, IOFileUD *iof) int stat = -1; #if LJ_TARGET_POSIX stat = pclose(iof->fp); -#elif LJ_TARGET_WINDOWS +#elif LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP stat = _pclose(iof->fp); -#else - lua_assert(0); - return 0; #endif #if LJ_52 iof->fp = NULL; @@ -110,7 +109,8 @@ static int io_file_close(lua_State *L, IOFileUD *iof) ok = (stat != -1); #endif } else { - lua_assert((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF); + lj_assertL((iof->type & IOFILE_TYPE_MASK) == IOFILE_TYPE_STDF, + "close of unknown FILE* type"); setnilV(L->top++); lua_pushliteral(L, "cannot close standard file"); return 2; @@ -145,7 +145,7 @@ static int io_file_readline(lua_State *L, FILE *fp, MSize chop) MSize m = LUAL_BUFFERSIZE, n = 0, ok = 0; char *buf; for (;;) { - buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); + buf = lj_buf_tmp(L, m); if (fgets(buf+n, m-n, fp) == NULL) break; n += (MSize)strlen(buf+n); ok |= n; @@ -161,7 +161,7 @@ static void io_file_readall(lua_State *L, FILE *fp) { MSize m, n; for (m = LUAL_BUFFERSIZE, n = 0; ; m += m) { - char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); + char *buf = lj_buf_tmp(L, m); n += (MSize)fread(buf+n, 1, m-n, fp); if (n != m) { setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); @@ -174,7 +174,7 @@ static void io_file_readall(lua_State *L, FILE *fp) static int io_file_readlen(lua_State *L, FILE *fp, MSize m) { if (m) { - char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, m); + char *buf = lj_buf_tmp(L, m); MSize n = (MSize)fread(buf, 1, m, fp); setstrV(L, L->top++, lj_str_new(L, buf, (size_t)n)); lj_gc_check(L); @@ -202,13 +202,12 @@ static int io_file_read(lua_State *L, IOFileUD *iof, int start) for (n = start; nargs-- && ok; n++) { if (tvisstr(L->base+n)) { const char *p = strVdata(L->base+n); - if (p[0] != '*') - lj_err_arg(L, n+1, LJ_ERR_INVOPT); - if (p[1] == 'n') + if (p[0] == '*') p++; + if (p[0] == 'n') ok = io_file_readnum(L, fp); - else if ((p[1] & ~0x20) == 'L') - ok = io_file_readline(L, fp, (p[1] == 'l')); - else if (p[1] == 'a') + else if ((p[0] & ~0x20) == 'L') + ok = io_file_readline(L, fp, (p[0] == 'l')); + else if (p[0] == 'a') io_file_readall(L, fp); else lj_err_arg(L, n+1, LJ_ERR_INVFMT); @@ -232,19 +231,11 @@ static int io_file_write(lua_State *L, IOFileUD *iof, int start) cTValue *tv; int status = 1; for (tv = L->base+start; tv < L->top; tv++) { - if (tvisstr(tv)) { - MSize len = strV(tv)->len; - status = status && (fwrite(strVdata(tv), 1, len, fp) == len); - } else if (tvisint(tv)) { - char buf[LJ_STR_INTBUF]; - char *p = lj_str_bufint(buf, intV(tv)); - size_t len = (size_t)(buf+LJ_STR_INTBUF-p); - status = status && (fwrite(p, 1, len, fp) == len); - } else if (tvisnum(tv)) { - status = status && (fprintf(fp, LUA_NUMBER_FMT, numV(tv)) > 0); - } else { + MSize len; + const char *p = lj_strfmt_wstrnum(L, tv, &len); + if (!p) lj_err_argt(L, (int)(tv - L->base) + 1, LUA_TSTRING); - } + status = status && (fwrite(p, 1, len, fp) == len); } if (LJ_52 && status) { L->top = L->base+1; @@ -319,6 +310,14 @@ LJLIB_CF(io_method_flush) LJLIB_REC(io_flush 0) return luaL_fileresult(L, fflush(io_tofile(L)->fp) == 0, NULL); } +#if LJ_32 && defined(__ANDROID__) && __ANDROID_API__ < 24 +/* The Android NDK is such an unmatched marvel of engineering. */ +extern int fseeko32(FILE *, long int, int) __asm__("fseeko"); +extern long int ftello32(FILE *) __asm__("ftello"); +#define fseeko(fp, pos, whence) (fseeko32((fp), (pos), (whence))) +#define ftello(fp) (ftello32((fp))) +#endif + LJLIB_CF(io_method_seek) { FILE *fp = io_tofile(L)->fp; @@ -419,7 +418,7 @@ LJLIB_CF(io_open) LJLIB_CF(io_popen) { -#if LJ_TARGET_POSIX || LJ_TARGET_WINDOWS +#if LJ_TARGET_POSIX || (LJ_TARGET_WINDOWS && !LJ_TARGET_XBOXONE && !LJ_TARGET_UWP) const char *fname = strdata(lj_lib_checkstr(L, 1)); GCstr *s = lj_lib_optstr(L, 2); const char *mode = s ? strdata(s) : "r"; @@ -440,7 +439,7 @@ LJLIB_CF(io_popen) LJLIB_CF(io_tmpfile) { IOFileUD *iof = io_file_new(L); -#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA +#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA || LJ_TARGET_NX iof->fp = NULL; errno = ENOSYS; #else iof->fp = tmpfile(); diff --git a/src/lib_jit.c b/src/lib_jit.c index 83ee0984..2867d420 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -10,13 +10,17 @@ #include "lauxlib.h" #include "lualib.h" -#include "lj_arch.h" #include "lj_obj.h" +#include "lj_gc.h" #include "lj_err.h" #include "lj_debug.h" #include "lj_str.h" #include "lj_tab.h" +#include "lj_state.h" #include "lj_bc.h" +#if LJ_HASFFI +#include "lj_ctype.h" +#endif #if LJ_HASJIT #include "lj_ir.h" #include "lj_jit.h" @@ -24,6 +28,7 @@ #include "lj_iropt.h" #include "lj_target.h" #endif +#include "lj_trace.h" #include "lj_dispatch.h" #include "lj_vm.h" #include "lj_vmevent.h" @@ -99,8 +104,8 @@ LJLIB_CF(jit_status) jit_State *J = L2J(L); L->top = L->base; setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0); - flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING); - flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING); + flagbits_to_strings(L, J->flags, JIT_F_CPU, JIT_F_CPUSTRING); + flagbits_to_strings(L, J->flags, JIT_F_OPT, JIT_F_OPTSTRING); return (int)(L->top - L->base); #else setboolV(L->top++, 0); @@ -108,6 +113,13 @@ LJLIB_CF(jit_status) #endif } +LJLIB_CF(jit_security) +{ + int idx = lj_lib_checkopt(L, 1, -1, LJ_SECURITY_MODESTRING); + setintV(L->top++, ((LJ_SECURITY_MODE >> (2*idx)) & 3)); + return 1; +} + LJLIB_CF(jit_attach) { #ifdef LUAJIT_DISABLE_VMEVENT @@ -222,7 +234,7 @@ LJLIB_CF(jit_util_funcbc) if (pc < pt->sizebc) { BCIns ins = proto_bc(pt)[pc]; BCOp op = bc_op(ins); - lua_assert(op < BC__MAX); + lj_assertL(op < BC__MAX, "bad bytecode op %d", op); setintV(L->top, ins); setintV(L->top+1, lj_bc_mode[op]); L->top += 2; @@ -280,7 +292,7 @@ static GCtrace *jit_checktrace(lua_State *L) /* Names of link types. ORDER LJ_TRLINK */ static const char *const jit_trlinkname[] = { "none", "root", "loop", "tail-recursion", "up-recursion", "down-recursion", - "interpreter", "return" + "interpreter", "return", "stitch" }; /* local info = jit.util.traceinfo(tr) */ @@ -333,6 +345,9 @@ LJLIB_CF(jit_util_tracek) slot = ir->op2; ir = &T->ir[ir->op1]; } +#if LJ_HASFFI + if (ir->o == IR_KINT64) ctype_loadffi(L); +#endif lj_ir_kvalue(L, L->top-2, ir); setintV(L->top-1, (int32_t)irt_type(ir->t)); if (slot == -1) @@ -417,6 +432,12 @@ LJLIB_CF(jit_util_ircalladdr) #include "lj_libdef.h" +static int luaopen_jit_util(lua_State *L) +{ + LJ_LIB_REG(L, NULL, jit_util); + return 1; +} + /* -- jit.opt module ------------------------------------------------------ */ #if LJ_HASJIT @@ -453,7 +474,7 @@ static int jitopt_flag(jit_State *J, const char *str) str += str[2] == '-' ? 3 : 2; set = 0; } - for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) { + for (opt = JIT_F_OPT; ; opt <<= 1) { size_t len = *(const uint8_t *)lst; if (len == 0) break; @@ -473,7 +494,7 @@ static int jitopt_param(jit_State *J, const char *str) int i; for (i = 0; i < JIT_P__MAX; i++) { size_t len = *(const uint8_t *)lst; - lua_assert(len != 0); + lj_assertJ(len != 0, "bad JIT_P_STRING"); if (strncmp(str, lst+1, len) == 0 && str[len] == '=') { int32_t n = 0; const char *p = &str[len+1]; @@ -514,6 +535,104 @@ LJLIB_CF(jit_opt_start) #endif +/* -- jit.profile module -------------------------------------------------- */ + +#if LJ_HASPROFILE + +#define LJLIB_MODULE_jit_profile + +/* Not loaded by default, use: local profile = require("jit.profile") */ + +#define KEY_PROFILE_THREAD (U64x(80000000,00000000)|'t') +#define KEY_PROFILE_FUNC (U64x(80000000,00000000)|'f') + +static void jit_profile_callback(lua_State *L2, lua_State *L, int samples, + int vmstate) +{ + TValue key; + cTValue *tv; + key.u64 = KEY_PROFILE_FUNC; + tv = lj_tab_get(L, tabV(registry(L)), &key); + if (tvisfunc(tv)) { + char vmst = (char)vmstate; + int status; + setfuncV(L2, L2->top++, funcV(tv)); + setthreadV(L2, L2->top++, L); + setintV(L2->top++, samples); + setstrV(L2, L2->top++, lj_str_new(L2, &vmst, 1)); + status = lua_pcall(L2, 3, 0, 0); /* callback(thread, samples, vmstate) */ + if (status) { + if (G(L2)->panic) G(L2)->panic(L2); + exit(EXIT_FAILURE); + } + lj_trace_abort(G(L2)); + } +} + +/* profile.start(mode, cb) */ +LJLIB_CF(jit_profile_start) +{ + GCtab *registry = tabV(registry(L)); + GCstr *mode = lj_lib_optstr(L, 1); + GCfunc *func = lj_lib_checkfunc(L, 2); + lua_State *L2 = lua_newthread(L); /* Thread that runs profiler callback. */ + TValue key; + /* Anchor thread and function in registry. */ + key.u64 = KEY_PROFILE_THREAD; + setthreadV(L, lj_tab_set(L, registry, &key), L2); + key.u64 = KEY_PROFILE_FUNC; + setfuncV(L, lj_tab_set(L, registry, &key), func); + lj_gc_anybarriert(L, registry); + luaJIT_profile_start(L, mode ? strdata(mode) : "", + (luaJIT_profile_callback)jit_profile_callback, L2); + return 0; +} + +/* profile.stop() */ +LJLIB_CF(jit_profile_stop) +{ + GCtab *registry; + TValue key; + luaJIT_profile_stop(L); + registry = tabV(registry(L)); + key.u64 = KEY_PROFILE_THREAD; + setnilV(lj_tab_set(L, registry, &key)); + key.u64 = KEY_PROFILE_FUNC; + setnilV(lj_tab_set(L, registry, &key)); + lj_gc_anybarriert(L, registry); + return 0; +} + +/* dump = profile.dumpstack([thread,] fmt, depth) */ +LJLIB_CF(jit_profile_dumpstack) +{ + lua_State *L2 = L; + int arg = 0; + size_t len; + int depth; + GCstr *fmt; + const char *p; + if (L->top > L->base && tvisthread(L->base)) { + L2 = threadV(L->base); + arg = 1; + } + fmt = lj_lib_checkstr(L, arg+1); + depth = lj_lib_checkint(L, arg+2); + p = luaJIT_profile_dumpstack(L2, strdata(fmt), depth, &len); + lua_pushlstring(L, p, len); + return 1; +} + +#include "lj_libdef.h" + +static int luaopen_jit_profile(lua_State *L) +{ + LJ_LIB_REG(L, NULL, jit_profile); + return 1; +} + +#endif + /* -- JIT compiler initialization ----------------------------------------- */ #if LJ_HASJIT @@ -524,66 +643,41 @@ JIT_PARAMDEF(JIT_PARAMINIT) #undef JIT_PARAMINIT 0 }; -#endif #if LJ_TARGET_ARM && LJ_TARGET_LINUX #include <sys/utsname.h> #endif -/* Arch-dependent CPU detection. */ -static uint32_t jit_cpudetect(lua_State *L) +/* Arch-dependent CPU feature detection. */ +static uint32_t jit_cpudetect(void) { uint32_t flags = 0; #if LJ_TARGET_X86ORX64 + uint32_t vendor[4]; uint32_t features[4]; if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) { -#if !LJ_HASJIT -#define JIT_F_CMOV 1 -#define JIT_F_SSE2 2 -#endif - flags |= ((features[3] >> 15)&1) * JIT_F_CMOV; - flags |= ((features[3] >> 26)&1) * JIT_F_SSE2; -#if LJ_HASJIT flags |= ((features[2] >> 0)&1) * JIT_F_SSE3; flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1; - if (vendor[2] == 0x6c65746e) { /* Intel. */ - if ((features[0] & 0x0ff00f00) == 0x00000f00) /* P4. */ - flags |= JIT_F_P4; /* Currently unused. */ - else if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */ - flags |= JIT_F_LEA_AGU; - } else if (vendor[2] == 0x444d4163) { /* AMD. */ - uint32_t fam = (features[0] & 0x0ff00f00); - if (fam == 0x00000f00) /* K8. */ - flags |= JIT_F_SPLIT_XMM; - if (fam >= 0x00000f00) /* K8, K10. */ - flags |= JIT_F_PREFER_IMUL; + if (vendor[0] >= 7) { + uint32_t xfeatures[4]; + lj_vm_cpuid(7, xfeatures); + flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2; } -#endif } - /* Check for required instruction set support on x86 (unnecessary on x64). */ -#if LJ_TARGET_X86 -#if !defined(LUAJIT_CPU_NOCMOV) - if (!(flags & JIT_F_CMOV)) - luaL_error(L, "CPU not supported"); -#endif -#if defined(LUAJIT_CPU_SSE2) - if (!(flags & JIT_F_SSE2)) - luaL_error(L, "CPU does not support SSE2 (recompile without -DLUAJIT_CPU_SSE2)"); -#endif -#endif + /* Don't bother checking for SSE2 -- the VM will crash before getting here. */ + #elif LJ_TARGET_ARM -#if LJ_HASJIT + int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */ #if LJ_TARGET_LINUX if (ver < 70) { /* Runtime ARM CPU detection. */ struct utsname ut; uname(&ut); if (strncmp(ut.machine, "armv", 4) == 0) { - if (ut.machine[4] >= '7') - ver = 70; - else if (ut.machine[4] == '6') - ver = 60; + if (ut.machine[4] >= '8') ver = 80; + else if (ut.machine[4] == '7') ver = 70; + else if (ut.machine[4] == '6') ver = 60; } } #endif @@ -591,74 +685,77 @@ static uint32_t jit_cpudetect(lua_State *L) ver >= 61 ? JIT_F_ARMV6T2_ : ver >= 60 ? JIT_F_ARMV6_ : 0; flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2; -#endif + +#elif LJ_TARGET_ARM64 + + /* No optional CPU features to detect (for now). */ + #elif LJ_TARGET_PPC -#if LJ_HASJIT + #if LJ_ARCH_SQRT flags |= JIT_F_SQRT; #endif #if LJ_ARCH_ROUND flags |= JIT_F_ROUND; #endif -#endif -#elif LJ_TARGET_PPCSPE - /* Nothing to do. */ + #elif LJ_TARGET_MIPS -#if LJ_HASJIT + /* Compile-time MIPS CPU detection. */ #if LJ_ARCH_VERSION >= 20 - flags |= JIT_F_MIPS32R2; + flags |= JIT_F_MIPSXXR2; #endif /* Runtime MIPS CPU detection. */ #if defined(__GNUC__) - if (!(flags & JIT_F_MIPS32R2)) { + if (!(flags & JIT_F_MIPSXXR2)) { int x; +#ifdef __mips16 + x = 0; /* Runtime detection is difficult. Ensure optimal -march flags. */ +#else /* On MIPS32R1 rotr is treated as srl. rotr r2,r2,1 -> srl r2,r2,1. */ __asm__("li $2, 1\n\t.long 0x00221042\n\tmove %0, $2" : "=r"(x) : : "$2"); - if (x) flags |= JIT_F_MIPS32R2; /* Either 0x80000000 (R2) or 0 (R1). */ - } #endif + if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */ + } #endif + #else #error "Missing CPU detection for this architecture" #endif - UNUSED(L); return flags; } /* Initialize JIT compiler. */ static void jit_init(lua_State *L) { - uint32_t flags = jit_cpudetect(L); -#if LJ_HASJIT jit_State *J = L2J(L); -#if LJ_TARGET_X86 - /* Silently turn off the JIT compiler on CPUs without SSE2. */ - if ((flags & JIT_F_SSE2)) -#endif - J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT; + J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT; memcpy(J->param, jit_param_default, sizeof(J->param)); lj_dispatch_update(G(L)); -#else - UNUSED(flags); -#endif } +#endif LUALIB_API int luaopen_jit(lua_State *L) { +#if LJ_HASJIT + jit_init(L); +#endif lua_pushliteral(L, LJ_OS_NAME); lua_pushliteral(L, LJ_ARCH_NAME); lua_pushinteger(L, LUAJIT_VERSION_NUM); lua_pushliteral(L, LUAJIT_VERSION); LJ_LIB_REG(L, LUA_JITLIBNAME, jit); +#if LJ_HASPROFILE + lj_lib_prereg(L, LUA_JITLIBNAME ".profile", luaopen_jit_profile, + tabref(L->env)); +#endif #ifndef LUAJIT_DISABLE_JITUTIL - LJ_LIB_REG(L, "jit.util", jit_util); + lj_lib_prereg(L, LUA_JITLIBNAME ".util", luaopen_jit_util, tabref(L->env)); #endif #if LJ_HASJIT LJ_LIB_REG(L, "jit.opt", jit_opt); #endif L->top -= 2; - jit_init(L); return 1; } diff --git a/src/lib_math.c b/src/lib_math.c index 56fb091b..b677bbcd 100644 --- a/src/lib_math.c +++ b/src/lib_math.c @@ -15,6 +15,7 @@ #include "lj_obj.h" #include "lj_lib.h" #include "lj_vm.h" +#include "lj_prng.h" /* ------------------------------------------------------------------------ */ @@ -33,25 +34,19 @@ LJLIB_ASM(math_sqrt) LJLIB_REC(math_unary IRFPM_SQRT) lj_lib_checknum(L, 1); return FFH_RETRY; } -LJLIB_ASM_(math_log10) LJLIB_REC(math_unary IRFPM_LOG10) -LJLIB_ASM_(math_exp) LJLIB_REC(math_unary IRFPM_EXP) -LJLIB_ASM_(math_sin) LJLIB_REC(math_unary IRFPM_SIN) -LJLIB_ASM_(math_cos) LJLIB_REC(math_unary IRFPM_COS) -LJLIB_ASM_(math_tan) LJLIB_REC(math_unary IRFPM_TAN) -LJLIB_ASM_(math_asin) LJLIB_REC(math_atrig FF_math_asin) -LJLIB_ASM_(math_acos) LJLIB_REC(math_atrig FF_math_acos) -LJLIB_ASM_(math_atan) LJLIB_REC(math_atrig FF_math_atan) -LJLIB_ASM_(math_sinh) LJLIB_REC(math_htrig IRCALL_sinh) -LJLIB_ASM_(math_cosh) LJLIB_REC(math_htrig IRCALL_cosh) -LJLIB_ASM_(math_tanh) LJLIB_REC(math_htrig IRCALL_tanh) +LJLIB_ASM_(math_log10) LJLIB_REC(math_call IRCALL_log10) +LJLIB_ASM_(math_exp) LJLIB_REC(math_call IRCALL_exp) +LJLIB_ASM_(math_sin) LJLIB_REC(math_call IRCALL_sin) +LJLIB_ASM_(math_cos) LJLIB_REC(math_call IRCALL_cos) +LJLIB_ASM_(math_tan) LJLIB_REC(math_call IRCALL_tan) +LJLIB_ASM_(math_asin) LJLIB_REC(math_call IRCALL_asin) +LJLIB_ASM_(math_acos) LJLIB_REC(math_call IRCALL_acos) +LJLIB_ASM_(math_atan) LJLIB_REC(math_call IRCALL_atan) +LJLIB_ASM_(math_sinh) LJLIB_REC(math_call IRCALL_sinh) +LJLIB_ASM_(math_cosh) LJLIB_REC(math_call IRCALL_cosh) +LJLIB_ASM_(math_tanh) LJLIB_REC(math_call IRCALL_tanh) LJLIB_ASM_(math_frexp) -LJLIB_ASM_(math_modf) LJLIB_REC(.) - -LJLIB_PUSH(57.29577951308232) -LJLIB_ASM_(math_deg) LJLIB_REC(math_degrad) - -LJLIB_PUSH(0.017453292519943295) -LJLIB_ASM_(math_rad) LJLIB_REC(math_degrad) +LJLIB_ASM_(math_modf) LJLIB_ASM(math_log) LJLIB_REC(math_log) { @@ -63,12 +58,15 @@ LJLIB_ASM(math_log) LJLIB_REC(math_log) #else x = lj_vm_log2(x); y = 1.0 / lj_vm_log2(y); #endif - setnumV(L->base-1, x*y); /* Do NOT join the expression to x / y. */ + setnumV(L->base-1-LJ_FR2, x*y); /* Do NOT join the expression to x / y. */ return FFH_RES(1); } return FFH_RETRY; } +LJLIB_LUA(math_deg) /* function(x) return x * 57.29577951308232 end */ +LJLIB_LUA(math_rad) /* function(x) return x * 0.017453292519943295 end */ + LJLIB_ASM(math_atan2) LJLIB_REC(.) { lj_lib_checknum(L, 1); @@ -108,34 +106,11 @@ LJLIB_PUSH(1e310) LJLIB_SET(huge) ** Full-period ME-CF generator with L=64, J=4, k=223, N1=49. */ -/* PRNG state. */ -struct RandomState { - uint64_t gen[4]; /* State of the 4 LFSR generators. */ - int valid; /* State is valid. */ -}; - /* Union needed for bit-pattern conversion between uint64_t and double. */ typedef union { uint64_t u64; double d; } U64double; -/* Update generator i and compute a running xor of all states. */ -#define TW223_GEN(i, k, q, s) \ - z = rs->gen[i]; \ - z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \ - r ^= z; rs->gen[i] = z; - -/* PRNG step function. Returns a double in the range 1.0 <= d < 2.0. */ -LJ_NOINLINE uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs) -{ - uint64_t z, r = 0; - TW223_GEN(0, 63, 31, 18) - TW223_GEN(1, 58, 19, 28) - TW223_GEN(2, 55, 24, 7) - TW223_GEN(3, 47, 21, 8) - return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000); -} - -/* PRNG initialization function. */ -static void random_init(RandomState *rs, double d) +/* PRNG seeding function. */ +static void random_seed(PRNGState *rs, double d) { uint32_t r = 0x11090601; /* 64-k[i] as four 8 bit constants. */ int i; @@ -144,24 +119,22 @@ static void random_init(RandomState *rs, double d) uint32_t m = 1u << (r&255); r >>= 8; u.d = d = d * 3.14159265358979323846 + 2.7182818284590452354; - if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of gen[i] are non-zero. */ - rs->gen[i] = u.u64; + if (u.u64 < m) u.u64 += m; /* Ensure k[i] MSB of u[i] are non-zero. */ + rs->u[i] = u.u64; } - rs->valid = 1; for (i = 0; i < 10; i++) - lj_math_random_step(rs); + (void)lj_prng_u64(rs); } /* PRNG extract function. */ -LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ +LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */ LJLIB_CF(math_random) LJLIB_REC(.) { int n = (int)(L->top - L->base); - RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); + PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); U64double u; double d; - if (LJ_UNLIKELY(!rs->valid)) random_init(rs, 0.0); - u.u64 = lj_math_random_step(rs); + u.u64 = lj_prng_u64d(rs); d = u.d - 1.0; if (n > 0) { #if LJ_DUALNUM @@ -206,11 +179,11 @@ LJLIB_CF(math_random) LJLIB_REC(.) } /* PRNG seed function. */ -LJLIB_PUSH(top-2) /* Upvalue holds userdata with RandomState. */ +LJLIB_PUSH(top-2) /* Upvalue holds userdata with PRNGState. */ LJLIB_CF(math_randomseed) { - RandomState *rs = (RandomState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); - random_init(rs, lj_lib_checknum(L, 1)); + PRNGState *rs = (PRNGState *)(uddata(udataV(lj_lib_upvalue(L, 1)))); + random_seed(rs, lj_lib_checknum(L, 1)); return 0; } @@ -220,14 +193,9 @@ LJLIB_CF(math_randomseed) LUALIB_API int luaopen_math(lua_State *L) { - RandomState *rs; - rs = (RandomState *)lua_newuserdata(L, sizeof(RandomState)); - rs->valid = 0; /* Use lazy initialization to save some time on startup. */ + PRNGState *rs = (PRNGState *)lua_newuserdata(L, sizeof(PRNGState)); + lj_prng_seed_fixed(rs); LJ_LIB_REG(L, LUA_MATHLIBNAME, math); -#if defined(LUA_COMPAT_MOD) && !LJ_52 - lua_getfield(L, -1, "fmod"); - lua_setfield(L, -2, "mod"); -#endif return 1; } diff --git a/src/lib_os.c b/src/lib_os.c index 7ad7dfaf..6bcd0147 100644 --- a/src/lib_os.c +++ b/src/lib_os.c @@ -17,7 +17,10 @@ #include "lualib.h" #include "lj_obj.h" +#include "lj_gc.h" #include "lj_err.h" +#include "lj_buf.h" +#include "lj_str.h" #include "lj_lib.h" #if LJ_TARGET_POSIX @@ -73,7 +76,7 @@ LJLIB_CF(os_rename) LJLIB_CF(os_tmpname) { -#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PSVITA +#if LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA || LJ_TARGET_NX lj_err_caller(L, LJ_ERR_OSUNIQF); return 0; #else @@ -188,7 +191,7 @@ LJLIB_CF(os_date) #endif } if (stm == NULL) { /* Invalid date? */ - setnilV(L->top-1); + setnilV(L->top++); } else if (strcmp(s, "*t") == 0) { lua_createtable(L, 0, 9); /* 9 = number of fields */ setfield(L, "sec", stm->tm_sec); @@ -200,23 +203,25 @@ LJLIB_CF(os_date) setfield(L, "wday", stm->tm_wday+1); setfield(L, "yday", stm->tm_yday+1); setboolfield(L, "isdst", stm->tm_isdst); - } else { - char cc[3]; - luaL_Buffer b; - cc[0] = '%'; cc[2] = '\0'; - luaL_buffinit(L, &b); - for (; *s; s++) { - if (*s != '%' || *(s + 1) == '\0') { /* No conversion specifier? */ - luaL_addchar(&b, *s); - } else { - size_t reslen; - char buff[200]; /* Should be big enough for any conversion result. */ - cc[1] = *(++s); - reslen = strftime(buff, sizeof(buff), cc, stm); - luaL_addlstring(&b, buff, reslen); + } else if (*s) { + SBuf *sb = &G(L)->tmpbuf; + MSize sz = 0, retry = 4; + const char *q; + for (q = s; *q; q++) + sz += (*q == '%') ? 30 : 1; /* Overflow doesn't matter. */ + setsbufL(sb, L); + while (retry--) { /* Limit growth for invalid format or empty result. */ + char *buf = lj_buf_need(sb, sz); + size_t len = strftime(buf, sbufsz(sb), s, stm); + if (len) { + setstrV(L, L->top++, lj_str_new(L, buf, len)); + lj_gc_check(L); + break; } + sz += (sz|1); } - luaL_pushresult(&b); + } else { + setstrV(L, L->top++, &G(L)->strempty); } return 1; } diff --git a/src/lib_package.c b/src/lib_package.c index d2ef474f..63a91211 100644 --- a/src/lib_package.c +++ b/src/lib_package.c @@ -76,6 +76,20 @@ static const char *ll_bcsym(void *lib, const char *sym) BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); #endif +#if LJ_TARGET_UWP +void *LJ_WIN_LOADLIBA(const char *path) +{ + DWORD err = GetLastError(); + wchar_t wpath[256]; + HANDLE lib = NULL; + if (MultiByteToWideChar(CP_ACP, 0, path, -1, wpath, 256) > 0) { + lib = LoadPackagedLibrary(wpath, 0); + } + SetLastError(err); + return lib; +} +#endif + #undef setprogdir static void setprogdir(lua_State *L) @@ -96,9 +110,17 @@ static void setprogdir(lua_State *L) static void pusherror(lua_State *L) { DWORD error = GetLastError(); +#if LJ_TARGET_XBOXONE + wchar_t wbuffer[128]; + char buffer[128*2]; + if (FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM, + NULL, error, 0, wbuffer, sizeof(wbuffer)/sizeof(wchar_t), NULL) && + WideCharToMultiByte(CP_ACP, 0, wbuffer, 128, buffer, 128*2, NULL, NULL)) +#else char buffer[128]; if (FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS | FORMAT_MESSAGE_FROM_SYSTEM, NULL, error, 0, buffer, sizeof(buffer), NULL)) +#endif lua_pushstring(L, buffer); else lua_pushfstring(L, "system error %d\n", error); @@ -111,7 +133,7 @@ static void ll_unloadlib(void *lib) static void *ll_load(lua_State *L, const char *path, int gl) { - HINSTANCE lib = LoadLibraryA(path); + HINSTANCE lib = LJ_WIN_LOADLIBA(path); if (lib == NULL) pusherror(L); UNUSED(gl); return lib; @@ -124,17 +146,25 @@ static lua_CFunction ll_sym(lua_State *L, void *lib, const char *sym) return f; } +#if LJ_TARGET_UWP +EXTERN_C IMAGE_DOS_HEADER __ImageBase; +#endif + static const char *ll_bcsym(void *lib, const char *sym) { if (lib) { return (const char *)GetProcAddress((HINSTANCE)lib, sym); } else { +#if LJ_TARGET_UWP + return (const char *)GetProcAddress((HINSTANCE)&__ImageBase, sym); +#else HINSTANCE h = GetModuleHandleA(NULL); const char *p = (const char *)GetProcAddress(h, sym); if (p == NULL && GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, (const char *)ll_bcsym, &h)) p = (const char *)GetProcAddress(h, sym); return p; +#endif } } @@ -185,8 +215,7 @@ static void **ll_register(lua_State *L, const char *path) lua_pop(L, 1); plib = (void **)lua_newuserdata(L, sizeof(void *)); *plib = NULL; - luaL_getmetatable(L, "_LOADLIB"); - lua_setmetatable(L, -2); + luaL_setmetatable(L, "_LOADLIB"); lua_pushfstring(L, "LOADLIB: %s", path); lua_pushvalue(L, -2); lua_settable(L, LUA_REGISTRYINDEX); @@ -396,8 +425,7 @@ static int lj_cf_package_loader_preload(lua_State *L) /* ------------------------------------------------------------------------ */ -static const int sentinel_ = 0; -#define sentinel ((void *)&sentinel_) +#define KEY_SENTINEL (U64x(80000000,00000000)|'s') static int lj_cf_package_require(lua_State *L) { @@ -407,7 +435,7 @@ static int lj_cf_package_require(lua_State *L) lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); lua_getfield(L, 2, name); if (lua_toboolean(L, -1)) { /* is it there? */ - if (lua_touserdata(L, -1) == sentinel) /* check loops */ + if ((L->top-1)->u64 == KEY_SENTINEL) /* check loops */ luaL_error(L, "loop or previous error loading module " LUA_QS, name); return 1; /* package is already loaded */ } @@ -430,14 +458,14 @@ static int lj_cf_package_require(lua_State *L) else lua_pop(L, 1); } - lua_pushlightuserdata(L, sentinel); + (L->top++)->u64 = KEY_SENTINEL; lua_setfield(L, 2, name); /* _LOADED[name] = sentinel */ lua_pushstring(L, name); /* pass name as argument to module */ lua_call(L, 1, 1); /* run loaded module */ if (!lua_isnil(L, -1)) /* non-nil return? */ lua_setfield(L, 2, name); /* _LOADED[name] = returned value */ lua_getfield(L, 2, name); - if (lua_touserdata(L, -1) == sentinel) { /* module did not set a value? */ + if ((L->top-1)->u64 == KEY_SENTINEL) { /* module did not set a value? */ lua_pushboolean(L, 1); /* use true as result */ lua_pushvalue(L, -1); /* extra copy to be returned */ lua_setfield(L, 2, name); /* _LOADED[name] = true */ @@ -487,29 +515,19 @@ static void modinit(lua_State *L, const char *modname) static int lj_cf_package_module(lua_State *L) { const char *modname = luaL_checkstring(L, 1); - int loaded = lua_gettop(L) + 1; /* index of _LOADED table */ - lua_getfield(L, LUA_REGISTRYINDEX, "_LOADED"); - lua_getfield(L, loaded, modname); /* get _LOADED[modname] */ - if (!lua_istable(L, -1)) { /* not found? */ - lua_pop(L, 1); /* remove previous result */ - /* try global variable (and create one if it does not exist) */ - if (luaL_findtable(L, LUA_GLOBALSINDEX, modname, 1) != NULL) - lj_err_callerv(L, LJ_ERR_BADMODN, modname); - lua_pushvalue(L, -1); - lua_setfield(L, loaded, modname); /* _LOADED[modname] = new table */ - } - /* check whether table already has a _NAME field */ + int lastarg = (int)(L->top - L->base); + luaL_pushmodule(L, modname, 1); lua_getfield(L, -1, "_NAME"); - if (!lua_isnil(L, -1)) { /* is table an initialized module? */ + if (!lua_isnil(L, -1)) { /* Module already initialized? */ lua_pop(L, 1); - } else { /* no; initialize it */ + } else { lua_pop(L, 1); modinit(L, modname); } lua_pushvalue(L, -1); setfenv(L); - dooptions(L, loaded - 1); - return 0; + dooptions(L, lastarg); + return LJ_52; } static int lj_cf_package_seeall(lua_State *L) @@ -580,13 +598,16 @@ LUALIB_API int luaopen_package(lua_State *L) lj_lib_pushcf(L, lj_cf_package_unloadlib, 1); lua_setfield(L, -2, "__gc"); luaL_register(L, LUA_LOADLIBNAME, package_lib); - lua_pushvalue(L, -1); - lua_replace(L, LUA_ENVIRONINDEX); + lua_copy(L, -1, LUA_ENVIRONINDEX); lua_createtable(L, sizeof(package_loaders)/sizeof(package_loaders[0])-1, 0); for (i = 0; package_loaders[i] != NULL; i++) { lj_lib_pushcf(L, package_loaders[i], 1); lua_rawseti(L, -2, i+1); } +#if LJ_52 + lua_pushvalue(L, -1); + lua_setfield(L, -3, "searchers"); +#endif lua_setfield(L, -2, "loaders"); lua_getfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); noenv = lua_toboolean(L, -1); diff --git a/src/lib_string.c b/src/lib_string.c index 60bb8088..79aeddfc 100644 --- a/src/lib_string.c +++ b/src/lib_string.c @@ -6,8 +6,6 @@ ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h */ -#include <stdio.h> - #define lib_string_c #define LUA_LIB @@ -18,6 +16,7 @@ #include "lj_obj.h" #include "lj_gc.h" #include "lj_err.h" +#include "lj_buf.h" #include "lj_str.h" #include "lj_tab.h" #include "lj_meta.h" @@ -25,17 +24,19 @@ #include "lj_ff.h" #include "lj_bcdump.h" #include "lj_char.h" +#include "lj_strfmt.h" #include "lj_lib.h" /* ------------------------------------------------------------------------ */ #define LJLIB_MODULE_string -LJLIB_ASM(string_len) LJLIB_REC(.) -{ - lj_lib_checkstr(L, 1); - return FFH_RETRY; -} +LJLIB_LUA(string_len) /* + function(s) + CHECK_str(s) + return #s + end +*/ LJLIB_ASM(string_byte) LJLIB_REC(string_range 0) { @@ -57,21 +58,21 @@ LJLIB_ASM(string_byte) LJLIB_REC(string_range 0) lj_state_checkstack(L, (MSize)n); p = (const unsigned char *)strdata(s) + start; for (i = 0; i < n; i++) - setintV(L->base + i-1, p[i]); + setintV(L->base + i-1-LJ_FR2, p[i]); return FFH_RES(n); } -LJLIB_ASM(string_char) +LJLIB_ASM(string_char) LJLIB_REC(.) { int i, nargs = (int)(L->top - L->base); - char *buf = lj_str_needbuf(L, &G(L)->tmpbuf, (MSize)nargs); + char *buf = lj_buf_tmp(L, (MSize)nargs); for (i = 1; i <= nargs; i++) { int32_t k = lj_lib_checkint(L, i); if (!checku8(k)) lj_err_arg(L, i, LJ_ERR_BADVAL); buf[i-1] = (char)k; } - setstrV(L, L->base-1, lj_str_new(L, buf, (size_t)nargs)); + setstrV(L, L->base-1-LJ_FR2, lj_str_new(L, buf, (size_t)nargs)); return FFH_RES(1); } @@ -83,68 +84,38 @@ LJLIB_ASM(string_sub) LJLIB_REC(string_range 1) return FFH_RETRY; } -LJLIB_ASM(string_rep) +LJLIB_CF(string_rep) LJLIB_REC(.) { GCstr *s = lj_lib_checkstr(L, 1); - int32_t k = lj_lib_checkint(L, 2); + int32_t rep = lj_lib_checkint(L, 2); GCstr *sep = lj_lib_optstr(L, 3); - int32_t len = (int32_t)s->len; - global_State *g = G(L); - int64_t tlen; - const char *src; - char *buf; - if (k <= 0) { - empty: - setstrV(L, L->base-1, &g->strempty); - return FFH_RES(1); - } - if (sep) { - tlen = (int64_t)len + sep->len; - if (tlen > LJ_MAX_STR) - lj_err_caller(L, LJ_ERR_STROV); - tlen *= k; - if (tlen > LJ_MAX_STR) - lj_err_caller(L, LJ_ERR_STROV); - } else { - tlen = (int64_t)k * len; - if (tlen > LJ_MAX_STR) - lj_err_caller(L, LJ_ERR_STROV); - } - if (tlen == 0) goto empty; - buf = lj_str_needbuf(L, &g->tmpbuf, (MSize)tlen); - src = strdata(s); - if (sep) { - tlen -= sep->len; /* Ignore trailing separator. */ - if (k > 1) { /* Paste one string and one separator. */ - int32_t i; - i = 0; while (i < len) *buf++ = src[i++]; - src = strdata(sep); len = sep->len; - i = 0; while (i < len) *buf++ = src[i++]; - src = g->tmpbuf.buf; len += s->len; k--; /* Now copy that k-1 times. */ - } + SBuf *sb = lj_buf_tmp_(L); + if (sep && rep > 1) { + GCstr *s2 = lj_buf_cat2str(L, sep, s); + lj_buf_reset(sb); + lj_buf_putstr(sb, s); + s = s2; + rep--; } - do { - int32_t i = 0; - do { *buf++ = src[i++]; } while (i < len); - } while (--k > 0); - setstrV(L, L->base-1, lj_str_new(L, g->tmpbuf.buf, (size_t)tlen)); - return FFH_RES(1); + sb = lj_buf_putstr_rep(sb, s, rep); + setstrV(L, L->top-1, lj_buf_str(L, sb)); + lj_gc_check(L); + return 1; } -LJLIB_ASM(string_reverse) +LJLIB_ASM(string_reverse) LJLIB_REC(string_op IRCALL_lj_buf_putstr_reverse) { - GCstr *s = lj_lib_checkstr(L, 1); - lj_str_needbuf(L, &G(L)->tmpbuf, s->len); + lj_lib_checkstr(L, 1); return FFH_RETRY; } -LJLIB_ASM_(string_lower) -LJLIB_ASM_(string_upper) +LJLIB_ASM_(string_lower) LJLIB_REC(string_op IRCALL_lj_buf_putstr_lower) +LJLIB_ASM_(string_upper) LJLIB_REC(string_op IRCALL_lj_buf_putstr_upper) /* ------------------------------------------------------------------------ */ -static int writer_buf(lua_State *L, const void *p, size_t size, void *b) +static int writer_buf(lua_State *L, const void *p, size_t size, void *sb) { - luaL_addlstring((luaL_Buffer *)b, (const char *)p, size); + lj_buf_putmem((SBuf *)sb, p, (MSize)size); UNUSED(L); return 0; } @@ -153,19 +124,19 @@ LJLIB_CF(string_dump) { GCfunc *fn = lj_lib_checkfunc(L, 1); int strip = L->base+1 < L->top && tvistruecond(L->base+1); - luaL_Buffer b; + SBuf *sb = lj_buf_tmp_(L); /* Assumes lj_bcwrite() doesn't use tmpbuf. */ L->top = L->base+1; - luaL_buffinit(L, &b); - if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, &b, strip)) + if (!isluafunc(fn) || lj_bcwrite(L, funcproto(fn), writer_buf, sb, strip)) lj_err_caller(L, LJ_ERR_STRDUMP); - luaL_pushresult(&b); + setstrV(L, L->top-1, lj_buf_str(L, sb)); + lj_gc_check(L); return 1; } /* ------------------------------------------------------------------------ */ /* macro to `unsign' a character */ -#define uchar(c) ((unsigned char)(c)) +#define uchar(c) ((unsigned char)(c)) #define CAP_UNFINISHED (-1) #define CAP_POSITION (-2) @@ -183,7 +154,6 @@ typedef struct MatchState { } MatchState; #define L_ESC '%' -#define SPECIALS "^$*+?.([%-" static int check_capture(MatchState *ms, int l) { @@ -450,30 +420,6 @@ static const char *match(MatchState *ms, const char *s, const char *p) return s; } -static const char *lmemfind(const char *s1, size_t l1, - const char *s2, size_t l2) -{ - if (l2 == 0) { - return s1; /* empty strings are everywhere */ - } else if (l2 > l1) { - return NULL; /* avoids a negative `l1' */ - } else { - const char *init; /* to search for a `*s2' inside `s1' */ - l2--; /* 1st char will be checked by `memchr' */ - l1 = l1-l2; /* `s2' cannot be found after that */ - while (l1 > 0 && (init = (const char *)memchr(s1, *s2, l1)) != NULL) { - init++; /* 1st char is already checked */ - if (memcmp(init, s2+1, l2) == 0) { - return init-1; - } else { /* correct `l1' and `s1' to try again */ - l1 -= (size_t)(init-s1); - s1 = init; - } - } - return NULL; /* not found */ - } -} - static void push_onecapture(MatchState *ms, int i, const char *s, const char *e) { if (i >= ms->level) { @@ -501,64 +447,60 @@ static int push_captures(MatchState *ms, const char *s, const char *e) return nlevels; /* number of strings pushed */ } -static ptrdiff_t posrelat(ptrdiff_t pos, size_t len) -{ - /* relative string position: negative means back from end */ - if (pos < 0) pos += (ptrdiff_t)len + 1; - return (pos >= 0) ? pos : 0; -} - static int str_find_aux(lua_State *L, int find) { - size_t l1, l2; - const char *s = luaL_checklstring(L, 1, &l1); - const char *p = luaL_checklstring(L, 2, &l2); - ptrdiff_t init = posrelat(luaL_optinteger(L, 3, 1), l1) - 1; - if (init < 0) { - init = 0; - } else if ((size_t)(init) > l1) { + GCstr *s = lj_lib_checkstr(L, 1); + GCstr *p = lj_lib_checkstr(L, 2); + int32_t start = lj_lib_optint(L, 3, 1); + MSize st; + if (start < 0) start += (int32_t)s->len; else start--; + if (start < 0) start = 0; + st = (MSize)start; + if (st > s->len) { #if LJ_52 setnilV(L->top-1); return 1; #else - init = (ptrdiff_t)l1; + st = s->len; #endif } - if (find && (lua_toboolean(L, 4) || /* explicit request? */ - strpbrk(p, SPECIALS) == NULL)) { /* or no special characters? */ - /* do a plain search */ - const char *s2 = lmemfind(s+init, l1-(size_t)init, p, l2); - if (s2) { - lua_pushinteger(L, s2-s+1); - lua_pushinteger(L, s2-s+(ptrdiff_t)l2); + if (find && ((L->base+3 < L->top && tvistruecond(L->base+3)) || + !lj_str_haspattern(p))) { /* Search for fixed string. */ + const char *q = lj_str_find(strdata(s)+st, strdata(p), s->len-st, p->len); + if (q) { + setintV(L->top-2, (int32_t)(q-strdata(s)) + 1); + setintV(L->top-1, (int32_t)(q-strdata(s)) + (int32_t)p->len); return 2; } - } else { + } else { /* Search for pattern. */ MatchState ms; - int anchor = (*p == '^') ? (p++, 1) : 0; - const char *s1=s+init; + const char *pstr = strdata(p); + const char *sstr = strdata(s) + st; + int anchor = 0; + if (*pstr == '^') { pstr++; anchor = 1; } ms.L = L; - ms.src_init = s; - ms.src_end = s+l1; - do { - const char *res; + ms.src_init = strdata(s); + ms.src_end = strdata(s) + s->len; + do { /* Loop through string and try to match the pattern. */ + const char *q; ms.level = ms.depth = 0; - if ((res=match(&ms, s1, p)) != NULL) { + q = match(&ms, sstr, pstr); + if (q) { if (find) { - lua_pushinteger(L, s1-s+1); /* start */ - lua_pushinteger(L, res-s); /* end */ - return push_captures(&ms, NULL, 0) + 2; + setintV(L->top++, (int32_t)(sstr-(strdata(s)-1))); + setintV(L->top++, (int32_t)(q-strdata(s))); + return push_captures(&ms, NULL, NULL) + 2; } else { - return push_captures(&ms, s1, res); + return push_captures(&ms, sstr, q); } } - } while (s1++ < ms.src_end && !anchor); + } while (sstr++ < ms.src_end && !anchor); } - lua_pushnil(L); /* not found */ + setnilV(L->top-1); /* Not found. */ return 1; } -LJLIB_CF(string_find) +LJLIB_CF(string_find) LJLIB_REC(.) { return str_find_aux(L, 1); } @@ -698,222 +640,16 @@ LJLIB_CF(string_gsub) /* ------------------------------------------------------------------------ */ -/* maximum size of each formatted item (> len(format('%99.99f', -1e308))) */ -#define MAX_FMTITEM 512 -/* valid flags in a format specification */ -#define FMT_FLAGS "-+ #0" -/* -** maximum size of each format specification (such as '%-099.99d') -** (+10 accounts for %99.99x plus margin of error) -*/ -#define MAX_FMTSPEC (sizeof(FMT_FLAGS) + sizeof(LUA_INTFRMLEN) + 10) - -static void addquoted(lua_State *L, luaL_Buffer *b, int arg) -{ - GCstr *str = lj_lib_checkstr(L, arg); - int32_t len = (int32_t)str->len; - const char *s = strdata(str); - luaL_addchar(b, '"'); - while (len--) { - uint32_t c = uchar(*s); - if (c == '"' || c == '\\' || c == '\n') { - luaL_addchar(b, '\\'); - } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ - uint32_t d; - luaL_addchar(b, '\\'); - if (c >= 100 || lj_char_isdigit(uchar(s[1]))) { - luaL_addchar(b, '0'+(c >= 100)); if (c >= 100) c -= 100; - goto tens; - } else if (c >= 10) { - tens: - d = (c * 205) >> 11; c -= d * 10; luaL_addchar(b, '0'+d); - } - c += '0'; - } - luaL_addchar(b, c); - s++; - } - luaL_addchar(b, '"'); -} - -static const char *scanformat(lua_State *L, const char *strfrmt, char *form) -{ - const char *p = strfrmt; - while (*p != '\0' && strchr(FMT_FLAGS, *p) != NULL) p++; /* skip flags */ - if ((size_t)(p - strfrmt) >= sizeof(FMT_FLAGS)) - lj_err_caller(L, LJ_ERR_STRFMTR); - if (lj_char_isdigit(uchar(*p))) p++; /* skip width */ - if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */ - if (*p == '.') { - p++; - if (lj_char_isdigit(uchar(*p))) p++; /* skip precision */ - if (lj_char_isdigit(uchar(*p))) p++; /* (2 digits at most) */ - } - if (lj_char_isdigit(uchar(*p))) - lj_err_caller(L, LJ_ERR_STRFMTW); - *(form++) = '%'; - strncpy(form, strfrmt, (size_t)(p - strfrmt + 1)); - form += p - strfrmt + 1; - *form = '\0'; - return p; -} - -static void addintlen(char *form) -{ - size_t l = strlen(form); - char spec = form[l - 1]; - strcpy(form + l - 1, LUA_INTFRMLEN); - form[l + sizeof(LUA_INTFRMLEN) - 2] = spec; - form[l + sizeof(LUA_INTFRMLEN) - 1] = '\0'; -} - -static unsigned LUA_INTFRM_T num2intfrm(lua_State *L, int arg) -{ - if (sizeof(LUA_INTFRM_T) == 4) { - return (LUA_INTFRM_T)lj_lib_checkbit(L, arg); - } else { - cTValue *o; - lj_lib_checknumber(L, arg); - o = L->base+arg-1; - if (tvisint(o)) - return (LUA_INTFRM_T)intV(o); - else - return (LUA_INTFRM_T)numV(o); - } -} - -static unsigned LUA_INTFRM_T num2uintfrm(lua_State *L, int arg) +LJLIB_CF(string_format) LJLIB_REC(.) { - if (sizeof(LUA_INTFRM_T) == 4) { - return (unsigned LUA_INTFRM_T)lj_lib_checkbit(L, arg); - } else { - cTValue *o; - lj_lib_checknumber(L, arg); - o = L->base+arg-1; - if (tvisint(o)) - return (unsigned LUA_INTFRM_T)intV(o); - else if ((int32_t)o->u32.hi < 0) - return (unsigned LUA_INTFRM_T)(LUA_INTFRM_T)numV(o); - else - return (unsigned LUA_INTFRM_T)numV(o); - } -} - -static GCstr *meta_tostring(lua_State *L, int arg) -{ - TValue *o = L->base+arg-1; - cTValue *mo; - lua_assert(o < L->top); /* Caller already checks for existence. */ - if (LJ_LIKELY(tvisstr(o))) - return strV(o); - if (!tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { - copyTV(L, L->top++, mo); - copyTV(L, L->top++, o); - lua_call(L, 1, 1); - L->top--; - if (tvisstr(L->top)) - return strV(L->top); - o = L->base+arg-1; - copyTV(L, o, L->top); - } - if (tvisnumber(o)) { - return lj_str_fromnumber(L, o); - } else if (tvisnil(o)) { - return lj_str_newlit(L, "nil"); - } else if (tvisfalse(o)) { - return lj_str_newlit(L, "false"); - } else if (tvistrue(o)) { - return lj_str_newlit(L, "true"); - } else { - if (tvisfunc(o) && isffunc(funcV(o))) - lj_str_pushf(L, "function: builtin#%d", funcV(o)->c.ffid); - else - lj_str_pushf(L, "%s: %p", lj_typename(o), lua_topointer(L, arg)); - L->top--; - return strV(L->top); - } -} - -LJLIB_CF(string_format) -{ - int arg = 1, top = (int)(L->top - L->base); - GCstr *fmt = lj_lib_checkstr(L, arg); - const char *strfrmt = strdata(fmt); - const char *strfrmt_end = strfrmt + fmt->len; - luaL_Buffer b; - luaL_buffinit(L, &b); - while (strfrmt < strfrmt_end) { - if (*strfrmt != L_ESC) { - luaL_addchar(&b, *strfrmt++); - } else if (*++strfrmt == L_ESC) { - luaL_addchar(&b, *strfrmt++); /* %% */ - } else { /* format item */ - char form[MAX_FMTSPEC]; /* to store the format (`%...') */ - char buff[MAX_FMTITEM]; /* to store the formatted item */ - int n = 0; - if (++arg > top) - luaL_argerror(L, arg, lj_obj_typename[0]); - strfrmt = scanformat(L, strfrmt, form); - switch (*strfrmt++) { - case 'c': - n = sprintf(buff, form, lj_lib_checkint(L, arg)); - break; - case 'd': case 'i': - addintlen(form); - n = sprintf(buff, form, num2intfrm(L, arg)); - break; - case 'o': case 'u': case 'x': case 'X': - addintlen(form); - n = sprintf(buff, form, num2uintfrm(L, arg)); - break; - case 'e': case 'E': case 'f': case 'g': case 'G': case 'a': case 'A': { - TValue tv; - tv.n = lj_lib_checknum(L, arg); - if (LJ_UNLIKELY((tv.u32.hi << 1) >= 0xffe00000)) { - /* Canonicalize output of non-finite values. */ - char *p, nbuf[LJ_STR_NUMBUF]; - size_t len = lj_str_bufnum(nbuf, &tv); - if (strfrmt[-1] < 'a') { - nbuf[len-3] = nbuf[len-3] - 0x20; - nbuf[len-2] = nbuf[len-2] - 0x20; - nbuf[len-1] = nbuf[len-1] - 0x20; - } - nbuf[len] = '\0'; - for (p = form; *p < 'A' && *p != '.'; p++) ; - *p++ = 's'; *p = '\0'; - n = sprintf(buff, form, nbuf); - break; - } - n = sprintf(buff, form, (double)tv.n); - break; - } - case 'q': - addquoted(L, &b, arg); - continue; - case 'p': - lj_str_pushf(L, "%p", lua_topointer(L, arg)); - luaL_addvalue(&b); - continue; - case 's': { - GCstr *str = meta_tostring(L, arg); - if (!strchr(form, '.') && str->len >= 100) { - /* no precision and string is too long to be formatted; - keep original string */ - setstrV(L, L->top++, str); - luaL_addvalue(&b); - continue; - } - n = sprintf(buff, form, strdata(str)); - break; - } - default: - lj_err_callerv(L, LJ_ERR_STRFMTO, *(strfrmt -1)); - break; - } - luaL_addlstring(&b, buff, n); - } - } - luaL_pushresult(&b); + int retry = 0; + SBuf *sb; + do { + sb = lj_buf_tmp_(L); + retry = lj_strfmt_putarg(L, sb, 1, -retry); + } while (retry > 0); + setstrV(L, L->top-1, lj_buf_str(L, sb)); + lj_gc_check(L); return 1; } @@ -926,16 +662,15 @@ LUALIB_API int luaopen_string(lua_State *L) GCtab *mt; global_State *g; LJ_LIB_REG(L, LUA_STRLIBNAME, string); -#if defined(LUA_COMPAT_GFIND) && !LJ_52 - lua_getfield(L, -1, "gmatch"); - lua_setfield(L, -2, "gfind"); -#endif mt = lj_tab_new(L, 0, 1); /* NOBARRIER: basemt is a GC root. */ g = G(L); setgcref(basemt_it(g, LJ_TSTR), obj2gco(mt)); settabV(L, lj_tab_setstr(L, mt, mmname_str(g, MM_index)), tabV(L->top-1)); mt->nomm = (uint8_t)(~(1u<<MM_index)); +#if LJ_HASBUFFER + lj_lib_prereg(L, LUA_STRLIBNAME ".buffer", luaopen_string_buffer, tabV(L->top-1)); +#endif return 1; } diff --git a/src/lib_table.c b/src/lib_table.c index dc89116f..a723326a 100644 --- a/src/lib_table.c +++ b/src/lib_table.c @@ -16,57 +16,43 @@ #include "lj_obj.h" #include "lj_gc.h" #include "lj_err.h" +#include "lj_buf.h" #include "lj_tab.h" +#include "lj_ff.h" #include "lj_lib.h" /* ------------------------------------------------------------------------ */ #define LJLIB_MODULE_table -LJLIB_CF(table_foreachi) -{ - GCtab *t = lj_lib_checktab(L, 1); - GCfunc *func = lj_lib_checkfunc(L, 2); - MSize i, n = lj_tab_len(t); - for (i = 1; i <= n; i++) { - cTValue *val; - setfuncV(L, L->top, func); - setintV(L->top+1, i); - val = lj_tab_getint(t, (int32_t)i); - if (val) { copyTV(L, L->top+2, val); } else { setnilV(L->top+2); } - L->top += 3; - lua_call(L, 2, 1); - if (!tvisnil(L->top-1)) - return 1; - L->top--; - } - return 0; -} +LJLIB_LUA(table_foreachi) /* + function(t, f) + CHECK_tab(t) + CHECK_func(f) + for i=1,#t do + local r = f(i, t[i]) + if r ~= nil then return r end + end + end +*/ -LJLIB_CF(table_foreach) -{ - GCtab *t = lj_lib_checktab(L, 1); - GCfunc *func = lj_lib_checkfunc(L, 2); - L->top = L->base+3; - setnilV(L->top-1); - while (lj_tab_next(L, t, L->top-1)) { - copyTV(L, L->top+2, L->top); - copyTV(L, L->top+1, L->top-1); - setfuncV(L, L->top, func); - L->top += 3; - lua_call(L, 2, 1); - if (!tvisnil(L->top-1)) - return 1; - L->top--; - } - return 0; -} +LJLIB_LUA(table_foreach) /* + function(t, f) + CHECK_tab(t) + CHECK_func(f) + for k, v in PAIRS(t) do + local r = f(k, v) + if r ~= nil then return r end + end + end +*/ -LJLIB_ASM(table_getn) LJLIB_REC(.) -{ - lj_lib_checktab(L, 1); - return FFH_UNREACHABLE; -} +LJLIB_LUA(table_getn) /* + function(t) + CHECK_tab(t) + return #t + end +*/ LJLIB_CF(table_maxn) { @@ -119,52 +105,67 @@ LJLIB_CF(table_insert) LJLIB_REC(.) return 0; } -LJLIB_CF(table_remove) LJLIB_REC(.) -{ - GCtab *t = lj_lib_checktab(L, 1); - int32_t e = (int32_t)lj_tab_len(t); - int32_t pos = lj_lib_optint(L, 2, e); - if (!(1 <= pos && pos <= e)) /* Nothing to remove? */ - return 0; - lua_rawgeti(L, 1, pos); /* Get previous value. */ - /* NOBARRIER: This just moves existing elements around. */ - for (; pos < e; pos++) { - cTValue *src = lj_tab_getint(t, pos+1); - TValue *dst = lj_tab_setint(L, t, pos); - if (src) { - copyTV(L, dst, src); - } else { - setnilV(dst); - } - } - setnilV(lj_tab_setint(L, t, e)); /* Remove (last) value. */ - return 1; /* Return previous value. */ -} +LJLIB_LUA(table_remove) /* + function(t, pos) + CHECK_tab(t) + local len = #t + if pos == nil then + if len ~= 0 then + local old = t[len] + t[len] = nil + return old + end + else + CHECK_int(pos) + if pos >= 1 and pos <= len then + local old = t[pos] + for i=pos+1,len do + t[i-1] = t[i] + end + t[len] = nil + return old + end + end + end +*/ + +LJLIB_LUA(table_move) /* + function(a1, f, e, t, a2) + CHECK_tab(a1) + CHECK_int(f) + CHECK_int(e) + CHECK_int(t) + if a2 == nil then a2 = a1 end + CHECK_tab(a2) + if e >= f then + local d = t - f + if t > e or t <= f or a2 ~= a1 then + for i=f,e do a2[i+d] = a1[i] end + else + for i=e,f,-1 do a2[i+d] = a1[i] end + end + end + return a2 + end +*/ -LJLIB_CF(table_concat) +LJLIB_CF(table_concat) LJLIB_REC(.) { - luaL_Buffer b; GCtab *t = lj_lib_checktab(L, 1); GCstr *sep = lj_lib_optstr(L, 2); - MSize seplen = sep ? sep->len : 0; int32_t i = lj_lib_optint(L, 3, 1); int32_t e = (L->base+3 < L->top && !tvisnil(L->base+3)) ? lj_lib_checkint(L, 4) : (int32_t)lj_tab_len(t); - luaL_buffinit(L, &b); - if (i <= e) { - for (;;) { - cTValue *o; - lua_rawgeti(L, 1, i); - o = L->top-1; - if (!(tvisstr(o) || tvisnumber(o))) - lj_err_callerv(L, LJ_ERR_TABCAT, lj_typename(o), i); - luaL_addvalue(&b); - if (i++ == e) break; - if (seplen) - luaL_addlstring(&b, strdata(sep), seplen); - } + SBuf *sb = lj_buf_tmp_(L); + SBuf *sbx = lj_buf_puttab(sb, t, sep, i, e); + if (LJ_UNLIKELY(!sbx)) { /* Error: bad element type. */ + int32_t idx = (int32_t)(intptr_t)sb->w; + cTValue *o = lj_tab_getint(t, idx); + lj_err_callerv(L, LJ_ERR_TABCAT, + lj_obj_itypename[o ? itypemap(o) : ~LJ_TNIL], idx); } - luaL_pushresult(&b); + setstrV(L, L->top-1, lj_buf_str(L, sbx)); + lj_gc_check(L); return 1; } @@ -284,6 +285,30 @@ LJLIB_CF(table_pack) } #endif +LJLIB_NOREG LJLIB_CF(table_new) LJLIB_REC(.) +{ + int32_t a = lj_lib_checkint(L, 1); + int32_t h = lj_lib_checkint(L, 2); + lua_createtable(L, a, h); + return 1; +} + +LJLIB_NOREG LJLIB_CF(table_clear) LJLIB_REC(.) +{ + lj_tab_clear(lj_lib_checktab(L, 1)); + return 0; +} + +static int luaopen_table_new(lua_State *L) +{ + return lj_lib_postreg(L, lj_cf_table_new, FF_table_new, "new"); +} + +static int luaopen_table_clear(lua_State *L) +{ + return lj_lib_postreg(L, lj_cf_table_clear, FF_table_clear, "clear"); +} + /* ------------------------------------------------------------------------ */ #include "lj_libdef.h" @@ -295,6 +320,8 @@ LUALIB_API int luaopen_table(lua_State *L) lua_getglobal(L, "unpack"); lua_setfield(L, -2, "unpack"); #endif + lj_lib_prereg(L, LUA_TABLIBNAME ".new", luaopen_table_new, tabV(L->top-1)); + lj_lib_prereg(L, LUA_TABLIBNAME ".clear", luaopen_table_clear, tabV(L->top-1)); return 1; } diff --git a/src/lj.supp b/src/lj.supp deleted file mode 100644 index 217f7c89..00000000 --- a/src/lj.supp +++ /dev/null @@ -1,41 +0,0 @@ -# Valgrind suppression file for LuaJIT 2.0. -{ - Optimized string compare - Memcheck:Addr4 - fun:lj_str_cmp -} -{ - Optimized string compare - Memcheck:Addr1 - fun:lj_str_cmp -} -{ - Optimized string compare - Memcheck:Addr4 - fun:lj_str_new -} -{ - Optimized string compare - Memcheck:Addr1 - fun:lj_str_new -} -{ - Optimized string compare - Memcheck:Cond - fun:lj_str_new -} -{ - Optimized string compare - Memcheck:Addr4 - fun:str_fastcmp -} -{ - Optimized string compare - Memcheck:Addr1 - fun:str_fastcmp -} -{ - Optimized string compare - Memcheck:Cond - fun:str_fastcmp -} diff --git a/src/lj_alloc.c b/src/lj_alloc.c index 9adaa0e5..20e60493 100644 --- a/src/lj_alloc.c +++ b/src/lj_alloc.c @@ -31,6 +31,7 @@ #include "lj_def.h" #include "lj_arch.h" #include "lj_alloc.h" +#include "lj_prng.h" #ifndef LUAJIT_USE_SYSMALLOC @@ -72,15 +73,58 @@ #define IS_DIRECT_BIT (SIZE_T_ONE) + +/* Determine system-specific block allocation method. */ #if LJ_TARGET_WINDOWS #define WIN32_LEAN_AND_MEAN #include <windows.h> +#define LJ_ALLOC_VIRTUALALLOC 1 + +#if LJ_64 && !LJ_GC64 +#define LJ_ALLOC_NTAVM 1 +#endif + +#else + +#include <errno.h> +/* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */ +#include <sys/mman.h> + +#define LJ_ALLOC_MMAP 1 + #if LJ_64 +#define LJ_ALLOC_MMAP_PROBE 1 + +#if LJ_GC64 +#define LJ_ALLOC_MBITS 47 /* 128 TB in LJ_GC64 mode. */ +#elif LJ_TARGET_X64 && LJ_HASJIT +/* Due to limitations in the x64 compiler backend. */ +#define LJ_ALLOC_MBITS 31 /* 2 GB on x64 with !LJ_GC64. */ +#else +#define LJ_ALLOC_MBITS 32 /* 4 GB on other archs with !LJ_GC64. */ +#endif + +#endif + +#if LJ_64 && !LJ_GC64 && defined(MAP_32BIT) +#define LJ_ALLOC_MMAP32 1 +#endif + +#if LJ_TARGET_LINUX +#define LJ_ALLOC_MREMAP 1 +#endif + +#endif + + +#if LJ_ALLOC_VIRTUALALLOC + +#if LJ_ALLOC_NTAVM /* Undocumented, but hey, that's what we all love so much about Windows. */ -typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG zbits, +typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG_PTR zbits, size_t *size, ULONG alloctype, ULONG prot); static PNTAVM ntavm; @@ -89,14 +133,15 @@ static PNTAVM ntavm; */ #define NTAVM_ZEROBITS 1 -static void INIT_MMAP(void) +static void init_mmap(void) { ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"), "NtAllocateVirtualMemory"); } +#define INIT_MMAP() init_mmap() /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */ -static LJ_AINLINE void *CALL_MMAP(size_t size) +static void *mmap_plain(size_t size) { DWORD olderr = GetLastError(); void *ptr = NULL; @@ -107,7 +152,7 @@ static LJ_AINLINE void *CALL_MMAP(size_t size) } /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ -static LJ_AINLINE void *DIRECT_MMAP(size_t size) +static void *direct_mmap(size_t size) { DWORD olderr = GetLastError(); void *ptr = NULL; @@ -119,31 +164,32 @@ static LJ_AINLINE void *DIRECT_MMAP(size_t size) #else -#define INIT_MMAP() ((void)0) - /* Win32 MMAP via VirtualAlloc */ -static LJ_AINLINE void *CALL_MMAP(size_t size) +static void *mmap_plain(size_t size) { DWORD olderr = GetLastError(); - void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); + void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); SetLastError(olderr); return ptr ? ptr : MFAIL; } /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ -static LJ_AINLINE void *DIRECT_MMAP(size_t size) +static void *direct_mmap(size_t size) { DWORD olderr = GetLastError(); - void *ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, - PAGE_READWRITE); + void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, + PAGE_READWRITE); SetLastError(olderr); return ptr ? ptr : MFAIL; } #endif +#define CALL_MMAP(prng, size) mmap_plain(size) +#define DIRECT_MMAP(prng, size) direct_mmap(size) + /* This function supports releasing coalesed segments */ -static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) +static int CALL_MUNMAP(void *ptr, size_t size) { DWORD olderr = GetLastError(); MEMORY_BASIC_INFORMATION minfo; @@ -163,10 +209,7 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) return 0; } -#else - -#include <errno.h> -#include <sys/mman.h> +#elif LJ_ALLOC_MMAP #define MMAP_PROT (PROT_READ|PROT_WRITE) #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) @@ -174,105 +217,134 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) #endif #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) -#if LJ_64 -/* 64 bit mode needs special support for allocating memory in the lower 2GB. */ +#if LJ_ALLOC_MMAP_PROBE -#if defined(MAP_32BIT) - -#if defined(__sun__) -#define MMAP_REGION_START ((uintptr_t)0x1000) +#ifdef MAP_TRYFIXED +#define MMAP_FLAGS_PROBE (MMAP_FLAGS|MAP_TRYFIXED) #else -/* Actually this only gives us max. 1GB in current Linux kernels. */ -#define MMAP_REGION_START ((uintptr_t)0) +#define MMAP_FLAGS_PROBE MMAP_FLAGS #endif -static LJ_AINLINE void *CALL_MMAP(size_t size) +#define LJ_ALLOC_MMAP_PROBE_MAX 30 +#define LJ_ALLOC_MMAP_PROBE_LINEAR 5 + +#define LJ_ALLOC_MMAP_PROBE_LOWER ((uintptr_t)0x4000) + +static void *mmap_probe(PRNGState *rs, size_t size) { + /* Hint for next allocation. Doesn't need to be thread-safe. */ + static uintptr_t hint_addr = 0; int olderr = errno; - void *ptr = mmap((void *)MMAP_REGION_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0); + int retry; + for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) { + void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0); + uintptr_t addr = (uintptr_t)p; + if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER && + ((addr + size) >> LJ_ALLOC_MBITS) == 0) { + /* We got a suitable address. Bump the hint address. */ + hint_addr = addr + size; + errno = olderr; + return p; + } + if (p != MFAIL) { + munmap(p, size); + } else if (errno == ENOMEM) { + return MFAIL; + } + if (hint_addr) { + /* First, try linear probing. */ + if (retry < LJ_ALLOC_MMAP_PROBE_LINEAR) { + hint_addr += 0x1000000; + if (((hint_addr + size) >> LJ_ALLOC_MBITS) != 0) + hint_addr = 0; + continue; + } else if (retry == LJ_ALLOC_MMAP_PROBE_LINEAR) { + /* Next, try a no-hint probe to get back an ASLR address. */ + hint_addr = 0; + continue; + } + } + /* Finally, try pseudo-random probing. */ + do { + hint_addr = lj_prng_u64(rs) & (((uintptr_t)1<<LJ_ALLOC_MBITS)-LJ_PAGESIZE); + } while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER); + } errno = olderr; - return ptr; + return MFAIL; } -#elif LJ_TARGET_OSX || LJ_TARGET_PS4 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__sun__) || LJ_TARGET_CYGWIN +#endif + +#if LJ_ALLOC_MMAP32 -/* OSX and FreeBSD mmap() use a naive first-fit linear search. -** That's perfect for us. Except that -pagezero_size must be set for OSX, -** otherwise the lower 4GB are blocked. And the 32GB RLIMIT_DATA needs -** to be reduced to 250MB on FreeBSD. -*/ -#if LJ_TARGET_OSX || defined(__DragonFly__) -#define MMAP_REGION_START ((uintptr_t)0x10000) -#elif LJ_TARGET_PS4 -#define MMAP_REGION_START ((uintptr_t)0x4000) +#if LJ_TARGET_SOLARIS +#define LJ_ALLOC_MMAP32_START ((uintptr_t)0x1000) #else -#define MMAP_REGION_START ((uintptr_t)0x10000000) +#define LJ_ALLOC_MMAP32_START ((uintptr_t)0) #endif -#define MMAP_REGION_END ((uintptr_t)0x80000000) -#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 -#include <sys/resource.h> +#if LJ_ALLOC_MMAP_PROBE +static void *mmap_map32(PRNGState *rs, size_t size) +#else +static void *mmap_map32(size_t size) #endif - -static LJ_AINLINE void *CALL_MMAP(size_t size) { - int olderr = errno; - /* Hint for next allocation. Doesn't need to be thread-safe. */ - static uintptr_t alloc_hint = MMAP_REGION_START; - int retry = 0; -#if (defined(__FreeBSD__) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 - static int rlimit_modified = 0; - if (LJ_UNLIKELY(rlimit_modified == 0)) { - struct rlimit rlim; - rlim.rlim_cur = rlim.rlim_max = MMAP_REGION_START; - setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail below. */ - rlimit_modified = 1; - } +#if LJ_ALLOC_MMAP_PROBE + static int fallback = 0; + if (fallback) + return mmap_probe(rs, size); #endif - for (;;) { - void *p = mmap((void *)alloc_hint, size, MMAP_PROT, MMAP_FLAGS, -1, 0); - if ((uintptr_t)p >= MMAP_REGION_START && - (uintptr_t)p + size < MMAP_REGION_END) { - alloc_hint = (uintptr_t)p + size; - errno = olderr; - return p; + { + int olderr = errno; + void *ptr = mmap((void *)LJ_ALLOC_MMAP32_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0); + errno = olderr; + /* This only allows 1GB on Linux. So fallback to probing to get 2GB. */ +#if LJ_ALLOC_MMAP_PROBE + if (ptr == MFAIL) { + fallback = 1; + return mmap_probe(rs, size); } - if (p != CMFAIL) munmap(p, size); -#if defined(__sun__) || defined(__DragonFly__) - alloc_hint += 0x1000000; /* Need near-exhaustive linear scan. */ - if (alloc_hint + size < MMAP_REGION_END) continue; #endif - if (retry) break; - retry = 1; - alloc_hint = MMAP_REGION_START; + return ptr; } - errno = olderr; - return CMFAIL; } -#else - -#error "NYI: need an equivalent of MAP_32BIT for this 64 bit OS" - #endif +#if LJ_ALLOC_MMAP32 +#if LJ_ALLOC_MMAP_PROBE +#define CALL_MMAP(prng, size) mmap_map32(prng, size) #else - -/* 32 bit mode is easy. */ -static LJ_AINLINE void *CALL_MMAP(size_t size) +#define CALL_MMAP(prng, size) mmap_map32(size) +#endif +#elif LJ_ALLOC_MMAP_PROBE +#define CALL_MMAP(prng, size) mmap_probe(prng, size) +#else +static void *mmap_plain(size_t size) { int olderr = errno; void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0); errno = olderr; return ptr; } - +#define CALL_MMAP(prng, size) mmap_plain(size) #endif -#define INIT_MMAP() ((void)0) -#define DIRECT_MMAP(s) CALL_MMAP(s) +#if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__ < 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4 && !LJ_TARGET_PS5 + +#include <sys/resource.h> -static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) +static void init_mmap(void) +{ + struct rlimit rlim; + rlim.rlim_cur = rlim.rlim_max = 0x10000; + setrlimit(RLIMIT_DATA, &rlim); /* Ignore result. May fail later. */ +} +#define INIT_MMAP() init_mmap() + +#endif + +static int CALL_MUNMAP(void *ptr, size_t size) { int olderr = errno; int ret = munmap(ptr, size); @@ -280,10 +352,9 @@ static LJ_AINLINE int CALL_MUNMAP(void *ptr, size_t size) return ret; } -#if LJ_TARGET_LINUX +#if LJ_ALLOC_MREMAP /* Need to define _GNU_SOURCE to get the mremap prototype. */ -static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, - int flags) +static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags) { int olderr = errno; ptr = mremap(ptr, osz, nsz, flags); @@ -294,7 +365,7 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) #define CALL_MREMAP_NOMOVE 0 #define CALL_MREMAP_MAYMOVE 1 -#if LJ_64 +#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64) #define CALL_MREMAP_MV CALL_MREMAP_NOMOVE #else #define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE @@ -303,6 +374,15 @@ static LJ_AINLINE void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, #endif + +#ifndef INIT_MMAP +#define INIT_MMAP() ((void)0) +#endif + +#ifndef DIRECT_MMAP +#define DIRECT_MMAP(prng, s) CALL_MMAP(prng, s) +#endif + #ifndef CALL_MREMAP #define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL) #endif @@ -459,6 +539,7 @@ struct malloc_state { mchunkptr smallbins[(NSMALLBINS+1)*2]; tbinptr treebins[NTREEBINS]; msegment seg; + PRNGState *prng; }; typedef struct malloc_state *mstate; @@ -516,7 +597,7 @@ static int has_segment_link(mstate m, msegmentptr ss) noncontiguous segments are added. */ #define TOP_FOOT_SIZE\ - (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) + (align_offset(TWO_SIZE_T_SIZES)+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) /* ---------------------------- Indexing Bins ---------------------------- */ @@ -741,11 +822,11 @@ static int has_segment_link(mstate m, msegmentptr ss) /* ----------------------- Direct-mmapping chunks ----------------------- */ -static void *direct_alloc(size_t nb) +static void *direct_alloc(mstate m, size_t nb) { size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); if (LJ_LIKELY(mmsize > nb)) { /* Check for wrap around 0 */ - char *mm = (char *)(DIRECT_MMAP(mmsize)); + char *mm = (char *)(DIRECT_MMAP(m->prng, mmsize)); if (mm != CMFAIL) { size_t offset = align_offset(chunk2mem(mm)); size_t psize = mmsize - offset - DIRECT_FOOT_PAD; @@ -757,6 +838,7 @@ static void *direct_alloc(size_t nb) return chunk2mem(p); } } + UNUSED(m); return NULL; } @@ -905,7 +987,7 @@ static void *alloc_sys(mstate m, size_t nb) /* Directly map large chunks */ if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) { - void *mem = direct_alloc(nb); + void *mem = direct_alloc(m, nb); if (mem != 0) return mem; } @@ -914,7 +996,7 @@ static void *alloc_sys(mstate m, size_t nb) size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE; size_t rsize = granularity_align(req); if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */ - char *mp = (char *)(CALL_MMAP(rsize)); + char *mp = (char *)(CALL_MMAP(m->prng, rsize)); if (mp != CMFAIL) { tbase = mp; tsize = rsize; @@ -1141,12 +1223,13 @@ static void *tmalloc_small(mstate m, size_t nb) /* ----------------------------------------------------------------------- */ -void *lj_alloc_create(void) +void *lj_alloc_create(PRNGState *rs) { size_t tsize = DEFAULT_GRANULARITY; char *tbase; INIT_MMAP(); - tbase = (char *)(CALL_MMAP(tsize)); + UNUSED(rs); + tbase = (char *)(CALL_MMAP(rs, tsize)); if (tbase != CMFAIL) { size_t msize = pad_request(sizeof(struct malloc_state)); mchunkptr mn; @@ -1165,6 +1248,12 @@ void *lj_alloc_create(void) return NULL; } +void lj_alloc_setprng(void *msp, PRNGState *rs) +{ + mstate ms = (mstate)msp; + ms->prng = rs; +} + void lj_alloc_destroy(void *msp) { mstate ms = (mstate)msp; diff --git a/src/lj_alloc.h b/src/lj_alloc.h index f87a7cf3..669f50b7 100644 --- a/src/lj_alloc.h +++ b/src/lj_alloc.h @@ -9,7 +9,8 @@ #include "lj_def.h" #ifndef LUAJIT_USE_SYSMALLOC -LJ_FUNC void *lj_alloc_create(void); +LJ_FUNC void *lj_alloc_create(PRNGState *rs); +LJ_FUNC void lj_alloc_setprng(void *msp, PRNGState *rs); LJ_FUNC void lj_alloc_destroy(void *msp); LJ_FUNC void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize); #endif diff --git a/src/lj_api.c b/src/lj_api.c index 04a41792..e6b67478 100644 --- a/src/lj_api.c +++ b/src/lj_api.c @@ -24,11 +24,12 @@ #include "lj_trace.h" #include "lj_vm.h" #include "lj_strscan.h" +#include "lj_strfmt.h" /* -- Common helper functions --------------------------------------------- */ -#define api_checknelems(L, n) api_check(L, (n) <= (L->top - L->base)) -#define api_checkvalidindex(L, i) api_check(L, (i) != niltv(L)) +#define lj_checkapi_slot(idx) \ + lj_checkapi((idx) <= (L->top - L->base), "stack slot %d out of range", (idx)) static TValue *index2adr(lua_State *L, int idx) { @@ -36,7 +37,8 @@ static TValue *index2adr(lua_State *L, int idx) TValue *o = L->base + (idx - 1); return o < L->top ? o : niltv(L); } else if (idx > LUA_REGISTRYINDEX) { - api_check(L, idx != 0 && -idx <= L->top - L->base); + lj_checkapi(idx != 0 && -idx <= L->top - L->base, + "bad stack slot %d", idx); return L->top + idx; } else if (idx == LUA_GLOBALSINDEX) { TValue *o = &G(L)->tmptv; @@ -46,7 +48,8 @@ static TValue *index2adr(lua_State *L, int idx) return registry(L); } else { GCfunc *fn = curr_func(L); - api_check(L, fn->c.gct == ~LJ_TFUNC && !isluafunc(fn)); + lj_checkapi(fn->c.gct == ~LJ_TFUNC && !isluafunc(fn), + "calling frame is not a C function"); if (idx == LUA_ENVIRONINDEX) { TValue *o = &G(L)->tmptv; settabV(L, o, tabref(fn->c.env)); @@ -58,13 +61,27 @@ static TValue *index2adr(lua_State *L, int idx) } } -static TValue *stkindex2adr(lua_State *L, int idx) +static LJ_AINLINE TValue *index2adr_check(lua_State *L, int idx) +{ + TValue *o = index2adr(L, idx); + lj_checkapi(o != niltv(L), "invalid stack slot %d", idx); + return o; +} + +static TValue *index2adr_stack(lua_State *L, int idx) { if (idx > 0) { TValue *o = L->base + (idx - 1); + if (o < L->top) { + return o; + } else { + lj_checkapi(0, "invalid stack slot %d", idx); + return niltv(L); + } return o < L->top ? o : niltv(L); } else { - api_check(L, idx != 0 && -idx <= L->top - L->base); + lj_checkapi(idx != 0 && -idx <= L->top - L->base, + "invalid stack slot %d", idx); return L->top + idx; } } @@ -98,17 +115,24 @@ LUALIB_API void luaL_checkstack(lua_State *L, int size, const char *msg) lj_err_callerv(L, LJ_ERR_STKOVM, msg); } -LUA_API void lua_xmove(lua_State *from, lua_State *to, int n) +LUA_API void lua_xmove(lua_State *L, lua_State *to, int n) { TValue *f, *t; - if (from == to) return; - api_checknelems(from, n); - api_check(from, G(from) == G(to)); + if (L == to) return; + lj_checkapi_slot(n); + lj_checkapi(G(L) == G(to), "move across global states"); lj_state_checkstack(to, (MSize)n); - f = from->top; + f = L->top; t = to->top = to->top + n; while (--n >= 0) copyTV(to, --t, --f); - from->top = f; + L->top = f; +} + +LUA_API const lua_Number *lua_version(lua_State *L) +{ + static const lua_Number version = LUA_VERSION_NUM; + UNUSED(L); + return &version; } /* -- Stack manipulation -------------------------------------------------- */ @@ -121,7 +145,7 @@ LUA_API int lua_gettop(lua_State *L) LUA_API void lua_settop(lua_State *L, int idx) { if (idx >= 0) { - api_check(L, idx <= tvref(L->maxstack) - L->base); + lj_checkapi(idx <= tvref(L->maxstack) - L->base, "bad stack slot %d", idx); if (L->base + idx > L->top) { if (L->base + idx >= tvref(L->maxstack)) lj_state_growstack(L, (MSize)idx - (MSize)(L->top - L->base)); @@ -130,51 +154,58 @@ LUA_API void lua_settop(lua_State *L, int idx) L->top = L->base + idx; } } else { - api_check(L, -(idx+1) <= (L->top - L->base)); + lj_checkapi(-(idx+1) <= (L->top - L->base), "bad stack slot %d", idx); L->top += idx+1; /* Shrinks top (idx < 0). */ } } LUA_API void lua_remove(lua_State *L, int idx) { - TValue *p = stkindex2adr(L, idx); - api_checkvalidindex(L, p); + TValue *p = index2adr_stack(L, idx); while (++p < L->top) copyTV(L, p-1, p); L->top--; } LUA_API void lua_insert(lua_State *L, int idx) { - TValue *q, *p = stkindex2adr(L, idx); - api_checkvalidindex(L, p); + TValue *q, *p = index2adr_stack(L, idx); for (q = L->top; q > p; q--) copyTV(L, q, q-1); copyTV(L, p, L->top); } -LUA_API void lua_replace(lua_State *L, int idx) +static void copy_slot(lua_State *L, TValue *f, int idx) { - api_checknelems(L, 1); if (idx == LUA_GLOBALSINDEX) { - api_check(L, tvistab(L->top-1)); + lj_checkapi(tvistab(f), "stack slot %d is not a table", idx); /* NOBARRIER: A thread (i.e. L) is never black. */ - setgcref(L->env, obj2gco(tabV(L->top-1))); + setgcref(L->env, obj2gco(tabV(f))); } else if (idx == LUA_ENVIRONINDEX) { GCfunc *fn = curr_func(L); if (fn->c.gct != ~LJ_TFUNC) lj_err_msg(L, LJ_ERR_NOENV); - api_check(L, tvistab(L->top-1)); - setgcref(fn->c.env, obj2gco(tabV(L->top-1))); - lj_gc_barrier(L, fn, L->top-1); + lj_checkapi(tvistab(f), "stack slot %d is not a table", idx); + setgcref(fn->c.env, obj2gco(tabV(f))); + lj_gc_barrier(L, fn, f); } else { - TValue *o = index2adr(L, idx); - api_checkvalidindex(L, o); - copyTV(L, o, L->top-1); + TValue *o = index2adr_check(L, idx); + copyTV(L, o, f); if (idx < LUA_GLOBALSINDEX) /* Need a barrier for upvalues. */ - lj_gc_barrier(L, curr_func(L), L->top-1); + lj_gc_barrier(L, curr_func(L), f); } +} + +LUA_API void lua_replace(lua_State *L, int idx) +{ + lj_checkapi_slot(1); + copy_slot(L, L->top - 1, idx); L->top--; } +LUA_API void lua_copy(lua_State *L, int fromidx, int toidx) +{ + copy_slot(L, index2adr(L, fromidx), toidx); +} + LUA_API void lua_pushvalue(lua_State *L, int idx) { copyTV(L, L->top, index2adr(L, idx)); @@ -188,7 +219,7 @@ LUA_API int lua_type(lua_State *L, int idx) cTValue *o = index2adr(L, idx); if (tvisnumber(o)) { return LUA_TNUMBER; -#if LJ_64 +#if LJ_64 && !LJ_GC64 } else if (tvislightud(o)) { return LUA_TLIGHTUSERDATA; #endif @@ -201,7 +232,7 @@ LUA_API int lua_type(lua_State *L, int idx) #else int tt = (int)(((t < 8 ? 0x98042110u : 0x75a06u) >> 4*(t&7)) & 15u); #endif - lua_assert(tt != LUA_TNIL || tvisnil(o)); + lj_assertL(tt != LUA_TNIL || tvisnil(o), "bad tag conversion"); return tt; } } @@ -268,7 +299,7 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2) return 0; } else if (tvispri(o1)) { return o1 != niltv(L) && o2 != niltv(L); -#if LJ_64 +#if LJ_64 && !LJ_GC64 } else if (tvislightud(o1)) { return o1->u64 == o2->u64; #endif @@ -283,8 +314,8 @@ LUA_API int lua_equal(lua_State *L, int idx1, int idx2) } else { L->top = base+2; lj_vm_call(L, base, 1+1); - L->top -= 2; - return tvistruecond(L->top+1); + L->top -= 2+LJ_FR2; + return tvistruecond(L->top+1+LJ_FR2); } } } @@ -306,8 +337,8 @@ LUA_API int lua_lessthan(lua_State *L, int idx1, int idx2) } else { L->top = base+2; lj_vm_call(L, base, 1+1); - L->top -= 2; - return tvistruecond(L->top+1); + L->top -= 2+LJ_FR2; + return tvistruecond(L->top+1+LJ_FR2); } } } @@ -324,6 +355,22 @@ LUA_API lua_Number lua_tonumber(lua_State *L, int idx) return 0; } +LUA_API lua_Number lua_tonumberx(lua_State *L, int idx, int *ok) +{ + cTValue *o = index2adr(L, idx); + TValue tmp; + if (LJ_LIKELY(tvisnumber(o))) { + if (ok) *ok = 1; + return numberVnum(o); + } else if (tvisstr(o) && lj_strscan_num(strV(o), &tmp)) { + if (ok) *ok = 1; + return numV(&tmp); + } else { + if (ok) *ok = 0; + return 0; + } +} + LUALIB_API lua_Number luaL_checknumber(lua_State *L, int idx) { cTValue *o = index2adr(L, idx); @@ -361,9 +408,38 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx) if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) return 0; if (tvisint(&tmp)) - return (lua_Integer)intV(&tmp); + return intV(&tmp); + n = numV(&tmp); + } +#if LJ_64 + return (lua_Integer)n; +#else + return lj_num2int(n); +#endif +} + +LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok) +{ + cTValue *o = index2adr(L, idx); + TValue tmp; + lua_Number n; + if (LJ_LIKELY(tvisint(o))) { + if (ok) *ok = 1; + return intV(o); + } else if (LJ_LIKELY(tvisnum(o))) { + n = numV(o); + } else { + if (!(tvisstr(o) && lj_strscan_number(strV(o), &tmp))) { + if (ok) *ok = 0; + return 0; + } + if (tvisint(&tmp)) { + if (ok) *ok = 1; + return intV(&tmp); + } n = numV(&tmp); } + if (ok) *ok = 1; #if LJ_64 return (lua_Integer)n; #else @@ -434,7 +510,7 @@ LUA_API const char *lua_tolstring(lua_State *L, int idx, size_t *len) } else if (tvisnumber(o)) { lj_gc_check(L); o = index2adr(L, idx); /* GC may move the stack. */ - s = lj_str_fromnumber(L, o); + s = lj_strfmt_number(L, o); setstrV(L, o, s); } else { if (len != NULL) *len = 0; @@ -453,7 +529,7 @@ LUALIB_API const char *luaL_checklstring(lua_State *L, int idx, size_t *len) } else if (tvisnumber(o)) { lj_gc_check(L); o = index2adr(L, idx); /* GC may move the stack. */ - s = lj_str_fromnumber(L, o); + s = lj_strfmt_number(L, o); setstrV(L, o, s); } else { lj_err_argt(L, idx, LUA_TSTRING); @@ -475,7 +551,7 @@ LUALIB_API const char *luaL_optlstring(lua_State *L, int idx, } else if (tvisnumber(o)) { lj_gc_check(L); o = index2adr(L, idx); /* GC may move the stack. */ - s = lj_str_fromnumber(L, o); + s = lj_strfmt_number(L, o); setstrV(L, o, s); } else { lj_err_argt(L, idx, LUA_TSTRING); @@ -507,7 +583,7 @@ LUA_API size_t lua_objlen(lua_State *L, int idx) } else if (tvisudata(o)) { return udataV(o)->len; } else if (tvisnumber(o)) { - GCstr *s = lj_str_fromnumber(L, o); + GCstr *s = lj_strfmt_number(L, o); setstrV(L, o, s); return s->len; } else { @@ -532,7 +608,7 @@ LUA_API void *lua_touserdata(lua_State *L, int idx) if (tvisudata(o)) return uddata(udataV(o)); else if (tvislightud(o)) - return lightudV(o); + return lightudV(G(L), o); else return NULL; } @@ -545,17 +621,7 @@ LUA_API lua_State *lua_tothread(lua_State *L, int idx) LUA_API const void *lua_topointer(lua_State *L, int idx) { - cTValue *o = index2adr(L, idx); - if (tvisudata(o)) - return uddata(udataV(o)); - else if (tvislightud(o)) - return lightudV(o); - else if (tviscdata(o)) - return cdataptr(cdataV(o)); - else if (tvisgcv(o)) - return gcV(o); - else - return NULL; + return lj_obj_ptr(G(L), index2adr(L, idx)); } /* -- Stack setters (object creation) ------------------------------------- */ @@ -606,7 +672,7 @@ LUA_API const char *lua_pushvfstring(lua_State *L, const char *fmt, va_list argp) { lj_gc_check(L); - return lj_str_pushvf(L, fmt, argp); + return lj_strfmt_pushvf(L, fmt, argp); } LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...) @@ -615,7 +681,7 @@ LUA_API const char *lua_pushfstring(lua_State *L, const char *fmt, ...) va_list argp; lj_gc_check(L); va_start(argp, fmt); - ret = lj_str_pushvf(L, fmt, argp); + ret = lj_strfmt_pushvf(L, fmt, argp); va_end(argp); return ret; } @@ -624,14 +690,14 @@ LUA_API void lua_pushcclosure(lua_State *L, lua_CFunction f, int n) { GCfunc *fn; lj_gc_check(L); - api_checknelems(L, n); + lj_checkapi_slot(n); fn = lj_func_newC(L, (MSize)n, getcurrenv(L)); fn->c.f = f; L->top -= n; while (n--) copyTV(L, &fn->c.upvalue[n], L->top+n); setfuncV(L, L->top, fn); - lua_assert(iswhite(obj2gco(fn))); + lj_assertL(iswhite(obj2gco(fn)), "new GC object is not white"); incr_top(L); } @@ -643,16 +709,17 @@ LUA_API void lua_pushboolean(lua_State *L, int b) LUA_API void lua_pushlightuserdata(lua_State *L, void *p) { - setlightudV(L->top, checklightudptr(L, p)); +#if LJ_64 + p = lj_lightud_intern(L, p); +#endif + setrawlightudV(L->top, p); incr_top(L); } LUA_API void lua_createtable(lua_State *L, int narray, int nrec) { - GCtab *t; lj_gc_check(L); - t = lj_tab_new(L, (uint32_t)(narray > 0 ? narray+1 : 0), hsize2hbits(nrec)); - settabV(L, L->top, t); + settabV(L, L->top, lj_tab_new_ah(L, narray, nrec)); incr_top(L); } @@ -703,7 +770,7 @@ LUA_API void *lua_newuserdata(lua_State *L, size_t size) LUA_API void lua_concat(lua_State *L, int n) { - api_checknelems(L, n); + lj_checkapi_slot(n); if (n >= 2) { n--; do { @@ -712,11 +779,11 @@ LUA_API void lua_concat(lua_State *L, int n) L->top -= n; break; } - n -= (int)(L->top - top); + n -= (int)(L->top - (top - 2*LJ_FR2)); L->top = top+2; lj_vm_call(L, top, 1+1); - L->top--; - copyTV(L, L->top-1, L->top); + L->top -= 1+LJ_FR2; + copyTV(L, L->top-1, L->top+LJ_FR2); } while (--n > 0); } else if (n == 0) { /* Push empty string. */ setstrV(L, L->top, &G(L)->strempty); @@ -729,30 +796,28 @@ LUA_API void lua_concat(lua_State *L, int n) LUA_API void lua_gettable(lua_State *L, int idx) { - cTValue *v, *t = index2adr(L, idx); - api_checkvalidindex(L, t); - v = lj_meta_tget(L, t, L->top-1); + cTValue *t = index2adr_check(L, idx); + cTValue *v = lj_meta_tget(L, t, L->top-1); if (v == NULL) { L->top += 2; lj_vm_call(L, L->top-2, 1+1); - L->top -= 2; - v = L->top+1; + L->top -= 2+LJ_FR2; + v = L->top+1+LJ_FR2; } copyTV(L, L->top-1, v); } LUA_API void lua_getfield(lua_State *L, int idx, const char *k) { - cTValue *v, *t = index2adr(L, idx); + cTValue *v, *t = index2adr_check(L, idx); TValue key; - api_checkvalidindex(L, t); setstrV(L, &key, lj_str_newz(L, k)); v = lj_meta_tget(L, t, &key); if (v == NULL) { L->top += 2; lj_vm_call(L, L->top-2, 1+1); - L->top -= 2; - v = L->top+1; + L->top -= 2+LJ_FR2; + v = L->top+1+LJ_FR2; } copyTV(L, L->top, v); incr_top(L); @@ -761,14 +826,14 @@ LUA_API void lua_getfield(lua_State *L, int idx, const char *k) LUA_API void lua_rawget(lua_State *L, int idx) { cTValue *t = index2adr(L, idx); - api_check(L, tvistab(t)); + lj_checkapi(tvistab(t), "stack slot %d is not a table", idx); copyTV(L, L->top-1, lj_tab_get(L, tabV(t), L->top-1)); } LUA_API void lua_rawgeti(lua_State *L, int idx, int n) { cTValue *v, *t = index2adr(L, idx); - api_check(L, tvistab(t)); + lj_checkapi(tvistab(t), "stack slot %d is not a table", idx); v = lj_tab_getint(tabV(t), n); if (v) { copyTV(L, L->top, v); @@ -810,8 +875,7 @@ LUALIB_API int luaL_getmetafield(lua_State *L, int idx, const char *field) LUA_API void lua_getfenv(lua_State *L, int idx) { - cTValue *o = index2adr(L, idx); - api_checkvalidindex(L, o); + cTValue *o = index2adr_check(L, idx); if (tvisfunc(o)) { settabV(L, L->top, tabref(funcV(o)->c.env)); } else if (tvisudata(o)) { @@ -828,12 +892,14 @@ LUA_API int lua_next(lua_State *L, int idx) { cTValue *t = index2adr(L, idx); int more; - api_check(L, tvistab(t)); - more = lj_tab_next(L, tabV(t), L->top-1); - if (more) { + lj_checkapi(tvistab(t), "stack slot %d is not a table", idx); + more = lj_tab_next(tabV(t), L->top-1, L->top-1); + if (more > 0) { incr_top(L); /* Return new key and value slot. */ - } else { /* End of traversal. */ + } else if (!more) { /* End of traversal. */ L->top--; /* Remove key slot. */ + } else { + lj_err_msg(L, LJ_ERR_NEXTIDX); } return more; } @@ -854,7 +920,7 @@ LUA_API void *lua_upvalueid(lua_State *L, int idx, int n) { GCfunc *fn = funcV(index2adr(L, idx)); n--; - api_check(L, (uint32_t)n < fn->l.nupvalues); + lj_checkapi((uint32_t)n < fn->l.nupvalues, "bad upvalue %d", n); return isluafunc(fn) ? (void *)gcref(fn->l.uvptr[n]) : (void *)&fn->c.upvalue[n]; } @@ -864,13 +930,15 @@ LUA_API void lua_upvaluejoin(lua_State *L, int idx1, int n1, int idx2, int n2) GCfunc *fn1 = funcV(index2adr(L, idx1)); GCfunc *fn2 = funcV(index2adr(L, idx2)); n1--; n2--; - api_check(L, isluafunc(fn1) && (uint32_t)n1 < fn1->l.nupvalues); - api_check(L, isluafunc(fn2) && (uint32_t)n2 < fn2->l.nupvalues); + lj_checkapi(isluafunc(fn1), "stack slot %d is not a Lua function", idx1); + lj_checkapi(isluafunc(fn2), "stack slot %d is not a Lua function", idx2); + lj_checkapi((uint32_t)n1 < fn1->l.nupvalues, "bad upvalue %d", n1+1); + lj_checkapi((uint32_t)n2 < fn2->l.nupvalues, "bad upvalue %d", n2+1); setgcrefr(fn1->l.uvptr[n1], fn2->l.uvptr[n2]); lj_gc_objbarrier(L, fn1, gcref(fn1->l.uvptr[n1])); } -LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) +LUALIB_API void *luaL_testudata(lua_State *L, int idx, const char *tname) { cTValue *o = index2adr(L, idx); if (tvisudata(o)) { @@ -879,8 +947,14 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) if (tv && tvistab(tv) && tabV(tv) == tabref(ud->metatable)) return uddata(ud); } - lj_err_argtype(L, idx, tname); - return NULL; /* unreachable */ + return NULL; /* value is not a userdata with a metatable */ +} + +LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) +{ + void *p = luaL_testudata(L, idx, tname); + if (!p) lj_err_argtype(L, idx, tname); + return p; } /* -- Object setters ------------------------------------------------------ */ @@ -888,19 +962,19 @@ LUALIB_API void *luaL_checkudata(lua_State *L, int idx, const char *tname) LUA_API void lua_settable(lua_State *L, int idx) { TValue *o; - cTValue *t = index2adr(L, idx); - api_checknelems(L, 2); - api_checkvalidindex(L, t); + cTValue *t = index2adr_check(L, idx); + lj_checkapi_slot(2); o = lj_meta_tset(L, t, L->top-2); if (o) { /* NOBARRIER: lj_meta_tset ensures the table is not black. */ - copyTV(L, o, L->top-1); L->top -= 2; + copyTV(L, o, L->top+1); } else { - L->top += 3; - copyTV(L, L->top-1, L->top-6); - lj_vm_call(L, L->top-3, 0+1); - L->top -= 3; + TValue *base = L->top; + copyTV(L, base+2, base-3-2*LJ_FR2); + L->top = base+3; + lj_vm_call(L, base, 0+1); + L->top -= 3+LJ_FR2; } } @@ -908,20 +982,19 @@ LUA_API void lua_setfield(lua_State *L, int idx, const char *k) { TValue *o; TValue key; - cTValue *t = index2adr(L, idx); - api_checknelems(L, 1); - api_checkvalidindex(L, t); + cTValue *t = index2adr_check(L, idx); + lj_checkapi_slot(1); setstrV(L, &key, lj_str_newz(L, k)); o = lj_meta_tset(L, t, &key); if (o) { - L->top--; /* NOBARRIER: lj_meta_tset ensures the table is not black. */ - copyTV(L, o, L->top); + copyTV(L, o, --L->top); } else { - L->top += 3; - copyTV(L, L->top-1, L->top-6); - lj_vm_call(L, L->top-3, 0+1); - L->top -= 2; + TValue *base = L->top; + copyTV(L, base+2, base-3-2*LJ_FR2); + L->top = base+3; + lj_vm_call(L, base, 0+1); + L->top -= 2+LJ_FR2; } } @@ -929,7 +1002,7 @@ LUA_API void lua_rawset(lua_State *L, int idx) { GCtab *t = tabV(index2adr(L, idx)); TValue *dst, *key; - api_checknelems(L, 2); + lj_checkapi_slot(2); key = L->top-2; dst = lj_tab_set(L, t, key); copyTV(L, dst, key+1); @@ -941,7 +1014,7 @@ LUA_API void lua_rawseti(lua_State *L, int idx, int n) { GCtab *t = tabV(index2adr(L, idx)); TValue *dst, *src; - api_checknelems(L, 1); + lj_checkapi_slot(1); dst = lj_tab_setint(L, t, n); src = L->top-1; copyTV(L, dst, src); @@ -953,13 +1026,12 @@ LUA_API int lua_setmetatable(lua_State *L, int idx) { global_State *g; GCtab *mt; - cTValue *o = index2adr(L, idx); - api_checknelems(L, 1); - api_checkvalidindex(L, o); + cTValue *o = index2adr_check(L, idx); + lj_checkapi_slot(1); if (tvisnil(L->top-1)) { mt = NULL; } else { - api_check(L, tvistab(L->top-1)); + lj_checkapi(tvistab(L->top-1), "top stack slot is not a table"); mt = tabV(L->top-1); } g = G(L); @@ -988,13 +1060,18 @@ LUA_API int lua_setmetatable(lua_State *L, int idx) return 1; } +LUALIB_API void luaL_setmetatable(lua_State *L, const char *tname) +{ + lua_getfield(L, LUA_REGISTRYINDEX, tname); + lua_setmetatable(L, -2); +} + LUA_API int lua_setfenv(lua_State *L, int idx) { - cTValue *o = index2adr(L, idx); + cTValue *o = index2adr_check(L, idx); GCtab *t; - api_checknelems(L, 1); - api_checkvalidindex(L, o); - api_check(L, tvistab(L->top-1)); + lj_checkapi_slot(1); + lj_checkapi(tvistab(L->top-1), "top stack slot is not a table"); t = tabV(L->top-1); if (tvisfunc(o)) { setgcref(funcV(o)->c.env, obj2gco(t)); @@ -1017,7 +1094,7 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n) TValue *val; GCobj *o; const char *name; - api_checknelems(L, 1); + lj_checkapi_slot(1); name = lj_debug_uvnamev(f, (uint32_t)(n-1), &val, &o); if (name) { L->top--; @@ -1029,11 +1106,25 @@ LUA_API const char *lua_setupvalue(lua_State *L, int idx, int n) /* -- Calls --------------------------------------------------------------- */ +#if LJ_FR2 +static TValue *api_call_base(lua_State *L, int nargs) +{ + TValue *o = L->top, *base = o - nargs; + L->top = o+1; + for (; o > base; o--) copyTV(L, o, o-1); + setnilV(o); + return o+1; +} +#else +#define api_call_base(L, nargs) (L->top - (nargs)) +#endif + LUA_API void lua_call(lua_State *L, int nargs, int nresults) { - api_check(L, L->status == 0 || L->status == LUA_ERRERR); - api_checknelems(L, nargs+1); - lj_vm_call(L, L->top - nargs, nresults+1); + lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR, + "thread called in wrong state %d", L->status); + lj_checkapi_slot(nargs+1); + lj_vm_call(L, api_call_base(L, nargs), nresults+1); } LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) @@ -1042,16 +1133,16 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) uint8_t oldh = hook_save(g); ptrdiff_t ef; int status; - api_check(L, L->status == 0 || L->status == LUA_ERRERR); - api_checknelems(L, nargs+1); + lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR, + "thread called in wrong state %d", L->status); + lj_checkapi_slot(nargs+1); if (errfunc == 0) { ef = 0; } else { - cTValue *o = stkindex2adr(L, errfunc); - api_checkvalidindex(L, o); + cTValue *o = index2adr_stack(L, errfunc); ef = savestack(L, o); } - status = lj_vm_pcall(L, L->top - nargs, nresults+1, ef); + status = lj_vm_pcall(L, api_call_base(L, nargs), nresults+1, ef); if (status) hook_restore(g, oldh); return status; } @@ -1059,12 +1150,17 @@ LUA_API int lua_pcall(lua_State *L, int nargs, int nresults, int errfunc) static TValue *cpcall(lua_State *L, lua_CFunction func, void *ud) { GCfunc *fn = lj_func_newC(L, 0, getcurrenv(L)); + TValue *top = L->top; fn->c.f = func; - setfuncV(L, L->top, fn); - setlightudV(L->top+1, checklightudptr(L, ud)); + setfuncV(L, top++, fn); + if (LJ_FR2) setnilV(top++); +#if LJ_64 + ud = lj_lightud_intern(L, ud); +#endif + setrawlightudV(top++, ud); cframe_nres(L->cframe) = 1+0; /* Zero results. */ - L->top += 2; - return L->top-1; /* Now call the newly allocated C function. */ + L->top = top; + return top-1; /* Now call the newly allocated C function. */ } LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) @@ -1072,7 +1168,8 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) global_State *g = G(L); uint8_t oldh = hook_save(g); int status; - api_check(L, L->status == 0 || L->status == LUA_ERRERR); + lj_checkapi(L->status == LUA_OK || L->status == LUA_ERRERR, + "thread called in wrong state %d", L->status); status = lj_vm_cpcall(L, func, ud, cpcall); if (status) hook_restore(g, oldh); return status; @@ -1081,10 +1178,11 @@ LUA_API int lua_cpcall(lua_State *L, lua_CFunction func, void *ud) LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field) { if (luaL_getmetafield(L, idx, field)) { - TValue *base = L->top--; - copyTV(L, base, index2adr(L, idx)); - L->top = base+1; - lj_vm_call(L, base, 1+1); + TValue *top = L->top--; + if (LJ_FR2) setnilV(top++); + copyTV(L, top++, index2adr(L, idx)); + L->top = top; + lj_vm_call(L, top-1, 1+1); return 1; } return 0; @@ -1092,6 +1190,11 @@ LUALIB_API int luaL_callmeta(lua_State *L, int idx, const char *field) /* -- Coroutine yield and resume ------------------------------------------ */ +LUA_API int lua_isyieldable(lua_State *L) +{ + return cframe_canyield(L->cframe); +} + LUA_API int lua_yield(lua_State *L, int nresults) { void *cf = L->cframe; @@ -1111,13 +1214,16 @@ LUA_API int lua_yield(lua_State *L, int nresults) } else { /* Yield from hook: add a pseudo-frame. */ TValue *top = L->top; hook_leave(g); - top->u64 = cframe_multres(cf); - setcont(top+1, lj_cont_hook); - setframe_pc(top+1, cframe_pc(cf)-1); - setframe_gc(top+2, obj2gco(L)); - setframe_ftsz(top+2, (int)((char *)(top+3)-(char *)L->base)+FRAME_CONT); - L->top = L->base = top+3; -#if LJ_TARGET_X64 + (top++)->u64 = cframe_multres(cf); + setcont(top, lj_cont_hook); + if (LJ_FR2) top++; + setframe_pc(top, cframe_pc(cf)-1); + top++; + setframe_gc(top, obj2gco(L), LJ_TTHREAD); + if (LJ_FR2) top++; + setframe_ftsz(top, ((char *)(top+1)-(char *)L->base)+FRAME_CONT); + L->top = L->base = top+1; +#if ((defined(__GNUC__) || defined(__clang__)) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND) || LJ_TARGET_WINDOWS lj_err_throw(L, LUA_YIELD); #else L->cframe = NULL; @@ -1133,7 +1239,9 @@ LUA_API int lua_yield(lua_State *L, int nresults) LUA_API int lua_resume(lua_State *L, int nargs) { if (L->cframe == NULL && L->status <= LUA_YIELD) - return lj_vm_resume(L, L->top - nargs, 0, 0); + return lj_vm_resume(L, + L->status == LUA_OK ? api_call_base(L, nargs) : L->top - nargs, + 0, 0); L->top = L->base; setstrV(L, L->top, lj_err_str(L, LJ_ERR_COSUSP)); incr_top(L); @@ -1163,7 +1271,7 @@ LUA_API int lua_gc(lua_State *L, int what, int data) res = (int)(g->gc.total & 0x3ff); break; case LUA_GCSTEP: { - MSize a = (MSize)data << 10; + GCSize a = (GCSize)data << 10; g->gc.threshold = (a <= g->gc.total) ? (g->gc.total - a) : 0; while (g->gc.total >= g->gc.threshold) if (lj_gc_step(L) > 0) { @@ -1180,6 +1288,9 @@ LUA_API int lua_gc(lua_State *L, int what, int data) res = (int)(g->gc.stepmul); g->gc.stepmul = (MSize)data; break; + case LUA_GCISRUNNING: + res = (g->gc.threshold != LJ_MAX_MEM); + break; default: res = -1; /* Invalid option. */ } diff --git a/src/lj_arch.h b/src/lj_arch.h index db46f886..5fb798d9 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -8,6 +8,8 @@ #include "lua.h" +/* -- Target definitions -------------------------------------------------- */ + /* Target endianess. */ #define LUAJIT_LE 0 #define LUAJIT_BE 1 @@ -19,12 +21,16 @@ #define LUAJIT_ARCH_x64 2 #define LUAJIT_ARCH_ARM 3 #define LUAJIT_ARCH_arm 3 -#define LUAJIT_ARCH_PPC 4 -#define LUAJIT_ARCH_ppc 4 -#define LUAJIT_ARCH_PPCSPE 5 -#define LUAJIT_ARCH_ppcspe 5 +#define LUAJIT_ARCH_ARM64 4 +#define LUAJIT_ARCH_arm64 4 +#define LUAJIT_ARCH_PPC 5 +#define LUAJIT_ARCH_ppc 5 #define LUAJIT_ARCH_MIPS 6 #define LUAJIT_ARCH_mips 6 +#define LUAJIT_ARCH_MIPS32 6 +#define LUAJIT_ARCH_mips32 6 +#define LUAJIT_ARCH_MIPS64 7 +#define LUAJIT_ARCH_mips64 7 /* Target OS. */ #define LUAJIT_OS_OTHER 0 @@ -34,6 +40,14 @@ #define LUAJIT_OS_BSD 4 #define LUAJIT_OS_POSIX 5 +/* Number mode. */ +#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ +#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */ +#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */ +#define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */ + +/* -- Target detection ---------------------------------------------------- */ + /* Select native target if no target defined. */ #ifndef LUAJIT_TARGET @@ -43,14 +57,14 @@ #define LUAJIT_TARGET LUAJIT_ARCH_X64 #elif defined(__arm__) || defined(__arm) || defined(__ARM__) || defined(__ARM) #define LUAJIT_TARGET LUAJIT_ARCH_ARM +#elif defined(__aarch64__) +#define LUAJIT_TARGET LUAJIT_ARCH_ARM64 #elif defined(__ppc__) || defined(__ppc) || defined(__PPC__) || defined(__PPC) || defined(__powerpc__) || defined(__powerpc) || defined(__POWERPC__) || defined(__POWERPC) || defined(_M_PPC) -#ifdef __NO_FPRS__ -#define LUAJIT_TARGET LUAJIT_ARCH_PPCSPE -#else #define LUAJIT_TARGET LUAJIT_ARCH_PPC -#endif +#elif defined(__mips64__) || defined(__mips64) || defined(__MIPS64__) || defined(__MIPS64) +#define LUAJIT_TARGET LUAJIT_ARCH_MIPS64 #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) -#define LUAJIT_TARGET LUAJIT_ARCH_MIPS +#define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 #else #error "No support for this architecture (yet)" #endif @@ -65,16 +79,23 @@ #elif defined(__linux__) #define LUAJIT_OS LUAJIT_OS_LINUX #elif defined(__MACH__) && defined(__APPLE__) +#include "TargetConditionals.h" #define LUAJIT_OS LUAJIT_OS_OSX #elif (defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || \ defined(__NetBSD__) || defined(__OpenBSD__) || \ - defined(__DragonFly__)) && !defined(__ORBIS__) + defined(__DragonFly__)) && !defined(__ORBIS__) && !defined(__PROSPERO__) #define LUAJIT_OS LUAJIT_OS_BSD #elif (defined(__sun__) && defined(__svr4__)) +#define LJ_TARGET_SOLARIS 1 +#define LUAJIT_OS LUAJIT_OS_POSIX +#elif defined(__HAIKU__) #define LUAJIT_OS LUAJIT_OS_POSIX #elif defined(__CYGWIN__) #define LJ_TARGET_CYGWIN 1 #define LUAJIT_OS LUAJIT_OS_POSIX +#elif defined(__QNX__) +#define LJ_TARGET_QNX 1 +#define LUAJIT_OS LUAJIT_OS_POSIX #else #define LUAJIT_OS LUAJIT_OS_OTHER #endif @@ -99,10 +120,16 @@ #define LJ_TARGET_WINDOWS (LUAJIT_OS == LUAJIT_OS_WINDOWS) #define LJ_TARGET_LINUX (LUAJIT_OS == LUAJIT_OS_LINUX) #define LJ_TARGET_OSX (LUAJIT_OS == LUAJIT_OS_OSX) -#define LJ_TARGET_IOS (LJ_TARGET_OSX && LUAJIT_TARGET == LUAJIT_ARCH_ARM) +#define LJ_TARGET_BSD (LUAJIT_OS == LUAJIT_OS_BSD) #define LJ_TARGET_POSIX (LUAJIT_OS > LUAJIT_OS_WINDOWS) #define LJ_TARGET_DLOPEN LJ_TARGET_POSIX +#if TARGET_OS_IPHONE +#define LJ_TARGET_IOS 1 +#else +#define LJ_TARGET_IOS 0 +#endif + #ifdef __CELLOS_LV2__ #define LJ_TARGET_PS3 1 #define LJ_TARGET_CONSOLE 1 @@ -115,6 +142,13 @@ #define NULL ((void*)0) #endif +#ifdef __PROSPERO__ +#define LJ_TARGET_PS5 1 +#define LJ_TARGET_CONSOLE 1 +#undef NULL +#define NULL ((void*)0) +#endif + #ifdef __psp2__ #define LJ_TARGET_PSVITA 1 #define LJ_TARGET_CONSOLE 1 @@ -125,10 +159,27 @@ #define LJ_TARGET_CONSOLE 1 #endif -#define LJ_NUMMODE_SINGLE 0 /* Single-number mode only. */ -#define LJ_NUMMODE_SINGLE_DUAL 1 /* Default to single-number mode. */ -#define LJ_NUMMODE_DUAL 2 /* Dual-number mode only. */ -#define LJ_NUMMODE_DUAL_SINGLE 3 /* Default to dual-number mode. */ +#ifdef _DURANGO +#define LJ_TARGET_XBOXONE 1 +#define LJ_TARGET_CONSOLE 1 +#define LJ_TARGET_GC64 1 +#endif + +#ifdef __NX__ +#define LJ_TARGET_NX 1 +#define LJ_TARGET_CONSOLE 1 +#undef NULL +#define NULL ((void*)0) +#endif + +#ifdef _UWP +#define LJ_TARGET_UWP 1 +#if LUAJIT_TARGET == LUAJIT_ARCH_X64 +#define LJ_TARGET_GC64 1 +#endif +#endif + +/* -- Arch-specific settings ---------------------------------------------- */ /* Set target architecture properties. */ #if LUAJIT_TARGET == LUAJIT_ARCH_X86 @@ -136,14 +187,10 @@ #define LJ_ARCH_NAME "x86" #define LJ_ARCH_BITS 32 #define LJ_ARCH_ENDIAN LUAJIT_LE -#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN -#define LJ_ABI_WIN 1 -#else -#define LJ_ABI_WIN 0 -#endif #define LJ_TARGET_X86 1 #define LJ_TARGET_X86ORX64 1 #define LJ_TARGET_EHRETREG 0 +#define LJ_TARGET_EHRAREG 8 #define LJ_TARGET_MASKSHIFT 1 #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNALIGNED 1 @@ -154,19 +201,20 @@ #define LJ_ARCH_NAME "x64" #define LJ_ARCH_BITS 64 #define LJ_ARCH_ENDIAN LUAJIT_LE -#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN -#define LJ_ABI_WIN 1 -#else -#define LJ_ABI_WIN 0 -#endif #define LJ_TARGET_X64 1 #define LJ_TARGET_X86ORX64 1 #define LJ_TARGET_EHRETREG 0 +#define LJ_TARGET_EHRAREG 16 #define LJ_TARGET_JUMPRANGE 31 /* +-2^31 = +-2GB */ #define LJ_TARGET_MASKSHIFT 1 #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNALIGNED 1 #define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE_DUAL +#ifndef LUAJIT_DISABLE_GC64 +#define LJ_TARGET_GC64 1 +#elif LJ_TARGET_OSX +#error "macOS requires GC64 -- don't disable it" +#endif #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM @@ -182,40 +230,105 @@ #define LJ_ABI_EABI 1 #define LJ_TARGET_ARM 1 #define LJ_TARGET_EHRETREG 0 +#define LJ_TARGET_EHRAREG 14 #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ #define LJ_TARGET_MASKSHIFT 0 #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__ +#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__ #define LJ_ARCH_VERSION 80 -#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ +#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__ #define LJ_ARCH_VERSION 70 #elif __ARM_ARCH_6T2__ #define LJ_ARCH_VERSION 61 -#elif __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ +#elif __ARM_ARCH == 6 || __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__ #define LJ_ARCH_VERSION 60 #else #define LJ_ARCH_VERSION 50 #endif +#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64 + +#define LJ_ARCH_BITS 64 +#if defined(__AARCH64EB__) +#define LJ_ARCH_NAME "arm64be" +#define LJ_ARCH_ENDIAN LUAJIT_BE +#else +#define LJ_ARCH_NAME "arm64" +#define LJ_ARCH_ENDIAN LUAJIT_LE +#endif +#define LJ_TARGET_ARM64 1 +#define LJ_TARGET_EHRETREG 0 +#define LJ_TARGET_EHRAREG 30 +#define LJ_TARGET_JUMPRANGE 27 /* +-2^27 = +-128MB */ +#define LJ_TARGET_MASKSHIFT 1 +#define LJ_TARGET_MASKROT 1 +#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ +#define LJ_TARGET_GC64 1 +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL + +#define LJ_ARCH_VERSION 80 + #elif LUAJIT_TARGET == LUAJIT_ARCH_PPC -#define LJ_ARCH_NAME "ppc" +#ifndef LJ_ARCH_ENDIAN +#if __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ +#define LJ_ARCH_ENDIAN LUAJIT_LE +#else +#define LJ_ARCH_ENDIAN LUAJIT_BE +#endif +#endif + #if _LP64 #define LJ_ARCH_BITS 64 +#if LJ_ARCH_ENDIAN == LUAJIT_LE +#define LJ_ARCH_NAME "ppc64le" +#else +#define LJ_ARCH_NAME "ppc64" +#endif #else #define LJ_ARCH_BITS 32 +#define LJ_ARCH_NAME "ppc" + +#if !defined(LJ_ARCH_HASFPU) +#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) +#define LJ_ARCH_HASFPU 0 +#else +#define LJ_ARCH_HASFPU 1 #endif -#define LJ_ARCH_ENDIAN LUAJIT_BE +#endif + +#if !defined(LJ_ABI_SOFTFP) +#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) +#define LJ_ABI_SOFTFP 1 +#else +#define LJ_ABI_SOFTFP 0 +#endif +#endif +#endif + +#if LJ_ABI_SOFTFP +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL +#else +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE +#endif + #define LJ_TARGET_PPC 1 #define LJ_TARGET_EHRETREG 3 +#define LJ_TARGET_EHRAREG 65 #define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ #define LJ_TARGET_MASKSHIFT 0 #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ -#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL_SINGLE + +#if LJ_TARGET_CONSOLE +#define LJ_ARCH_PPC32ON64 1 +#define LJ_ARCH_NOFFI 1 +#elif LJ_ARCH_BITS == 64 +#error "No support for PPC64" +#endif #if _ARCH_PWR7 #define LJ_ARCH_VERSION 70 @@ -230,10 +343,6 @@ #else #define LJ_ARCH_VERSION 0 #endif -#if __PPC64__ || __powerpc64__ || LJ_TARGET_CONSOLE -#define LJ_ARCH_PPC64 1 -#define LJ_ARCH_NOFFI 1 -#endif #if _ARCH_PPCSQ #define LJ_ARCH_SQRT 1 #endif @@ -247,44 +356,80 @@ #define LJ_ARCH_XENON 1 #endif -#elif LUAJIT_TARGET == LUAJIT_ARCH_PPCSPE - -#define LJ_ARCH_NAME "ppcspe" -#define LJ_ARCH_BITS 32 -#define LJ_ARCH_ENDIAN LUAJIT_BE -#ifndef LJ_ABI_SOFTFP -#define LJ_ABI_SOFTFP 1 -#endif -#define LJ_ABI_EABI 1 -#define LJ_TARGET_PPCSPE 1 -#define LJ_TARGET_EHRETREG 3 -#define LJ_TARGET_JUMPRANGE 25 /* +-2^25 = +-32MB */ -#define LJ_TARGET_MASKSHIFT 0 -#define LJ_TARGET_MASKROT 1 -#define LJ_TARGET_UNIFYROT 1 /* Want only IR_BROL. */ -#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE -#define LJ_ARCH_NOFFI 1 /* NYI: comparisons, calls. */ -#define LJ_ARCH_NOJIT 1 - -#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS +#elif LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 || LUAJIT_TARGET == LUAJIT_ARCH_MIPS64 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) +#if __mips_isa_rev >= 6 +#define LJ_TARGET_MIPSR6 1 +#define LJ_TARGET_UNALIGNED 1 +#endif +#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 +#if LJ_TARGET_MIPSR6 +#define LJ_ARCH_NAME "mips32r6el" +#else #define LJ_ARCH_NAME "mipsel" +#endif +#else +#if LJ_TARGET_MIPSR6 +#define LJ_ARCH_NAME "mips64r6el" +#else +#define LJ_ARCH_NAME "mips64el" +#endif +#endif #define LJ_ARCH_ENDIAN LUAJIT_LE #else +#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 +#if LJ_TARGET_MIPSR6 +#define LJ_ARCH_NAME "mips32r6" +#else #define LJ_ARCH_NAME "mips" +#endif +#else +#if LJ_TARGET_MIPSR6 +#define LJ_ARCH_NAME "mips64r6" +#else +#define LJ_ARCH_NAME "mips64" +#endif +#endif #define LJ_ARCH_ENDIAN LUAJIT_BE #endif + +#if !defined(LJ_ARCH_HASFPU) +#ifdef __mips_soft_float +#define LJ_ARCH_HASFPU 0 +#else +#define LJ_ARCH_HASFPU 1 +#endif +#endif + +#if !defined(LJ_ABI_SOFTFP) +#ifdef __mips_soft_float +#define LJ_ABI_SOFTFP 1 +#else +#define LJ_ABI_SOFTFP 0 +#endif +#endif + +#if LUAJIT_TARGET == LUAJIT_ARCH_MIPS32 #define LJ_ARCH_BITS 32 +#define LJ_TARGET_MIPS32 1 +#else +#define LJ_ARCH_BITS 64 +#define LJ_TARGET_MIPS64 1 +#define LJ_TARGET_GC64 1 +#endif #define LJ_TARGET_MIPS 1 #define LJ_TARGET_EHRETREG 4 +#define LJ_TARGET_EHRAREG 31 #define LJ_TARGET_JUMPRANGE 27 /* 2*2^27 = 256MB-aligned region */ #define LJ_TARGET_MASKSHIFT 1 #define LJ_TARGET_MASKROT 1 #define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */ -#define LJ_ARCH_NUMMODE LJ_NUMMODE_SINGLE +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -#if _MIPS_ARCH_MIPS32R2 +#if LJ_TARGET_MIPSR6 +#define LJ_ARCH_VERSION 60 +#elif _MIPS_ARCH_MIPS32R2 || _MIPS_ARCH_MIPS64R2 #define LJ_ARCH_VERSION 20 #else #define LJ_ARCH_VERSION 10 @@ -294,9 +439,7 @@ #error "No target architecture defined" #endif -#ifndef LJ_PAGESIZE -#define LJ_PAGESIZE 4096 -#endif +/* -- Checks for requirements --------------------------------------------- */ /* Check for minimum required compiler versions. */ #if defined(__GNUC__) @@ -312,6 +455,16 @@ #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 2) #error "Need at least GCC 4.2 or newer" #endif +#elif LJ_TARGET_ARM64 +#if __clang__ +#if ((__clang_major__ < 3) || ((__clang_major__ == 3) && __clang_minor__ < 5)) && !defined(__NX_TOOLCHAIN_MAJOR__) +#error "Need at least Clang 3.5 or newer" +#endif +#else +#if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 8) +#error "Need at least GCC 4.8 or newer" +#endif +#endif #elif !LJ_TARGET_PS3 #if (__GNUC__ < 4) || ((__GNUC__ == 4) && __GNUC_MINOR__ < 3) #error "Need at least GCC 4.3 or newer" @@ -335,26 +488,35 @@ #if !(__ARM_EABI__ || LJ_TARGET_IOS) #error "Only ARM EABI or iOS 3.0+ ABI is supported" #endif -#elif LJ_TARGET_PPC || LJ_TARGET_PPCSPE -#if defined(_SOFT_FLOAT) || defined(_SOFT_DOUBLE) -#error "No support for PowerPC CPUs without double-precision FPU" +#elif LJ_TARGET_ARM64 +#if defined(_ILP32) +#error "No support for ILP32 model on ARM64" #endif +#elif LJ_TARGET_PPC #if defined(_LITTLE_ENDIAN) && (!defined(_BYTE_ORDER) || (_BYTE_ORDER == _LITTLE_ENDIAN)) -#error "No support for little-endian PowerPC" +#error "No support for little-endian PPC32" +#endif +#if defined(__NO_FPRS__) && !defined(_SOFT_FLOAT) +#error "No support for PPC/e500 anymore (use LuaJIT 2.0)" #endif -#if defined(_LP64) -#error "No support for PowerPC 64 bit mode" +#elif LJ_TARGET_MIPS32 +#if !((defined(_MIPS_SIM_ABI32) && _MIPS_SIM == _MIPS_SIM_ABI32) || (defined(_ABIO32) && _MIPS_SIM == _ABIO32)) +#error "Only o32 ABI supported for MIPS32" #endif -#elif LJ_TARGET_MIPS -#if defined(__mips_soft_float) -#error "No support for MIPS CPUs without FPU" +#if LJ_TARGET_MIPSR6 +/* Not that useful, since most available r6 CPUs are 64 bit. */ +#error "No support for MIPS32R6" #endif -#if defined(_LP64) -#error "No support for MIPS64" +#elif LJ_TARGET_MIPS64 +#if !((defined(_MIPS_SIM_ABI64) && _MIPS_SIM == _MIPS_SIM_ABI64) || (defined(_ABI64) && _MIPS_SIM == _ABI64)) +/* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */ +#error "Only n64 ABI supported for MIPS64" #endif #endif #endif +/* -- Derived defines ----------------------------------------------------- */ + /* Enable or disable the dual-number mode for the VM. */ #if (LJ_ARCH_NUMMODE == LJ_NUMMODE_SINGLE && LUAJIT_NUMMODE == 2) || \ (LJ_ARCH_NUMMODE == LJ_NUMMODE_DUAL && LUAJIT_NUMMODE == 1) @@ -376,6 +538,20 @@ #endif #endif +/* 64 bit GC references. */ +#if LJ_TARGET_GC64 +#define LJ_GC64 1 +#else +#define LJ_GC64 0 +#endif + +/* 2-slot frame info. */ +#if LJ_GC64 +#define LJ_FR2 1 +#else +#define LJ_FR2 0 +#endif + /* Disable or enable the JIT compiler. */ #if defined(LUAJIT_DISABLE_JIT) || defined(LJ_ARCH_NOJIT) || defined(LJ_OS_NOJIT) #define LJ_HASJIT 0 @@ -390,6 +566,28 @@ #define LJ_HASFFI 1 #endif +/* Disable or enable the string buffer extension. */ +#if defined(LUAJIT_DISABLE_BUFFER) +#define LJ_HASBUFFER 0 +#else +#define LJ_HASBUFFER 1 +#endif + +#if defined(LUAJIT_DISABLE_PROFILE) +#define LJ_HASPROFILE 0 +#elif LJ_TARGET_POSIX +#define LJ_HASPROFILE 1 +#define LJ_PROFILE_SIGPROF 1 +#elif LJ_TARGET_PS3 +#define LJ_HASPROFILE 1 +#define LJ_PROFILE_PTHREAD 1 +#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOX360 +#define LJ_HASPROFILE 1 +#define LJ_PROFILE_WTHREAD 1 +#else +#define LJ_HASPROFILE 0 +#endif + #ifndef LJ_ARCH_HASFPU #define LJ_ARCH_HASFPU 1 #endif @@ -397,6 +595,7 @@ #define LJ_ABI_SOFTFP 0 #endif #define LJ_SOFTFP (!LJ_ARCH_HASFPU) +#define LJ_SOFTFP32 (LJ_SOFTFP && LJ_32) #if LJ_ARCH_ENDIAN == LUAJIT_BE #define LJ_LE 0 @@ -422,26 +621,52 @@ #define LJ_TARGET_UNALIGNED 0 #endif -/* Various workarounds for embedded operating systems. */ -#if (defined(__ANDROID__) && !defined(LJ_TARGET_X86ORX64)) || defined(__symbian__) || LJ_TARGET_XBOX360 -#define LUAJIT_NO_LOG2 +#ifndef LJ_PAGESIZE +#define LJ_PAGESIZE 4096 #endif -#if defined(__symbian__) -#define LUAJIT_NO_EXP2 + +/* Various workarounds for embedded operating systems or weak C runtimes. */ +#if defined(__ANDROID__) || defined(__symbian__) || LJ_TARGET_XBOX360 || LJ_TARGET_WINDOWS +#define LUAJIT_NO_LOG2 #endif #if LJ_TARGET_CONSOLE || (LJ_TARGET_IOS && __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0) #define LJ_NO_SYSTEM 1 #endif -#if !defined(LUAJIT_NO_UNWIND) && __GNU_COMPACT_EH__ -/* NYI: no support for compact unwind specification, yet. */ -#define LUAJIT_NO_UNWIND 1 +#if LJ_TARGET_WINDOWS || LJ_TARGET_CYGWIN +#define LJ_ABI_WIN 1 +#else +#define LJ_ABI_WIN 0 +#endif + +#if LJ_TARGET_WINDOWS +#if LJ_TARGET_UWP +#define LJ_WIN_VALLOC VirtualAllocFromApp +#define LJ_WIN_VPROTECT VirtualProtectFromApp +extern void *LJ_WIN_LOADLIBA(const char *path); +#else +#define LJ_WIN_VALLOC VirtualAlloc +#define LJ_WIN_VPROTECT VirtualProtect +#define LJ_WIN_LOADLIBA(path) LoadLibraryExA((path), NULL, 0) +#endif #endif -#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 +#if defined(LUAJIT_NO_UNWIND) || __GNU_COMPACT_EH__ || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_PS5 #define LJ_NO_UNWIND 1 #endif +#if !LJ_NO_UNWIND && !defined(LUAJIT_UNWIND_INTERNAL) && (LJ_ABI_WIN || (defined(LUAJIT_UNWIND_EXTERNAL) && (defined(__GNUC__) || defined(__clang__)))) +#define LJ_UNWIND_EXT 1 +#else +#define LJ_UNWIND_EXT 0 +#endif + +#if LJ_UNWIND_EXT && LJ_HASJIT && !LJ_TARGET_ARM && !(LJ_ABI_WIN && LJ_TARGET_X86) +#define LJ_UNWIND_JIT 1 +#else +#define LJ_UNWIND_JIT 0 +#endif + /* Compatibility with Lua 5.1 vs. 5.2. */ #ifdef LUAJIT_ENABLE_LUA52COMPAT #define LJ_52 1 @@ -449,4 +674,46 @@ #define LJ_52 0 #endif +/* -- VM security --------------------------------------------------------- */ + +/* Don't make any changes here. Instead build with: +** make "XCFLAGS=-DLUAJIT_SECURITY_flag=value" +** +** Important note to distro maintainers: DO NOT change the defaults for a +** regular distro build -- neither upwards, nor downwards! +** These build-time configurable security flags are intended for embedders +** who may have specific needs wrt. security vs. performance. +*/ + +/* Security defaults. */ +#ifndef LUAJIT_SECURITY_PRNG +/* PRNG init: 0 = fixed/insecure, 1 = secure from OS. */ +#define LUAJIT_SECURITY_PRNG 1 +#endif + +#ifndef LUAJIT_SECURITY_STRHASH +/* String hash: 0 = sparse only, 1 = sparse + dense. */ +#define LUAJIT_SECURITY_STRHASH 1 +#endif + +#ifndef LUAJIT_SECURITY_STRID +/* String IDs: 0 = linear, 1 = reseed < 255, 2 = reseed < 15, 3 = random. */ +#define LUAJIT_SECURITY_STRID 1 +#endif + +#ifndef LUAJIT_SECURITY_MCODE +/* Machine code page protection: 0 = insecure RWX, 1 = secure RW^X. */ +#define LUAJIT_SECURITY_MCODE 1 +#endif + +#define LJ_SECURITY_MODE \ + ( 0u \ + | ((LUAJIT_SECURITY_PRNG & 3) << 0) \ + | ((LUAJIT_SECURITY_STRHASH & 3) << 2) \ + | ((LUAJIT_SECURITY_STRID & 3) << 4) \ + | ((LUAJIT_SECURITY_MCODE & 3) << 6) \ + ) +#define LJ_SECURITY_MODESTRING \ + "\004prng\007strhash\005strid\005mcode" + #endif diff --git a/src/lj_asm.c b/src/lj_asm.c index 9ff9215f..6f5e0c45 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -11,6 +11,7 @@ #if LJ_HASJIT #include "lj_gc.h" +#include "lj_buf.h" #include "lj_str.h" #include "lj_tab.h" #include "lj_frame.h" @@ -71,6 +72,7 @@ typedef struct ASMState { IRRef snaprename; /* Rename highwater mark for snapshot check. */ SnapNo snapno; /* Current snapshot number. */ SnapNo loopsnapno; /* Loop snapshot number. */ + int snapalloc; /* Current snapshot needs allocation. */ BloomFilter snapfilt1, snapfilt2; /* Filled with snapshot refs. */ IRRef fuseref; /* Fusion limit (loopref, 0 or FUSE_DISABLED). */ @@ -85,18 +87,25 @@ typedef struct ASMState { MCode *mcbot; /* Bottom of reserved MCode. */ MCode *mctop; /* Top of generated MCode. */ + MCode *mctoporig; /* Original top of generated MCode. */ MCode *mcloop; /* Pointer to loop MCode (or NULL). */ MCode *invmcp; /* Points to invertible loop branch (or NULL). */ MCode *flagmcp; /* Pending opportunity to merge flag setting ins. */ MCode *realign; /* Realign loop if not NULL. */ #ifdef RID_NUM_KREF - int32_t krefk[RID_NUM_KREF]; + intptr_t krefk[RID_NUM_KREF]; #endif IRRef1 phireg[RID_MAX]; /* PHI register references. */ uint16_t parentmap[LJ_MAX_JSLOTS]; /* Parent instruction to RegSP map. */ } ASMState; +#ifdef LUA_USE_ASSERT +#define lj_assertA(c, ...) lj_assertG_(J2G(as->J), (c), __VA_ARGS__) +#else +#define lj_assertA(c, ...) ((void)as) +#endif + #define IR(ref) (&as->ir[(ref)]) #define ASMREF_TMP1 REF_TRUE /* Temp. register. */ @@ -128,9 +137,8 @@ static LJ_AINLINE void checkmclim(ASMState *as) #ifdef LUA_USE_ASSERT if (as->mcp + MCLIM_REDZONE < as->mcp_prev) { IRIns *ir = IR(as->curins+1); - fprintf(stderr, "RED ZONE OVERFLOW: %p IR %04d %02d %04d %04d\n", as->mcp, - as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); - lua_assert(0); + lj_assertA(0, "red zone overflow: %p IR %04d %02d %04d %04d\n", as->mcp, + as->curins+1-REF_BIAS, ir->o, ir->op1-REF_BIAS, ir->op2-REF_BIAS); } #endif if (LJ_UNLIKELY(as->mcp < as->mclim)) asm_mclimit(as); @@ -144,7 +152,7 @@ static LJ_AINLINE void checkmclim(ASMState *as) #define ra_krefreg(ref) ((Reg)(RID_MIN_KREF + (Reg)(ref))) #define ra_krefk(as, ref) (as->krefk[(ref)]) -static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, int32_t k) +static LJ_AINLINE void ra_setkref(ASMState *as, Reg r, intptr_t k) { IRRef ref = (IRRef)(r - RID_MIN_KREF); as->krefk[ref] = k; @@ -171,6 +179,8 @@ IRFLDEF(FLOFS) #include "lj_emit_x86.h" #elif LJ_TARGET_ARM #include "lj_emit_arm.h" +#elif LJ_TARGET_ARM64 +#include "lj_emit_arm64.h" #elif LJ_TARGET_PPC #include "lj_emit_ppc.h" #elif LJ_TARGET_MIPS @@ -179,6 +189,12 @@ IRFLDEF(FLOFS) #error "Missing instruction emitter for target CPU" #endif +/* Generic load/store of register from/to stack slot. */ +#define emit_spload(as, ir, r, ofs) \ + emit_loadofs(as, ir, (r), RID_SP, (ofs)) +#define emit_spstore(as, ir, r, ofs) \ + emit_storeofs(as, ir, (r), RID_SP, (ofs)) + /* -- Register allocator debugging ---------------------------------------- */ /* #define LUAJIT_DEBUG_RA */ @@ -236,7 +252,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) *p++ = *q >= 'A' && *q <= 'Z' ? *q + 0x20 : *q; } else { *p++ = '?'; - lua_assert(0); + lj_assertA(0, "bad register %d for debug format \"%s\"", r, fmt); } } else if (e[1] == 'f' || e[1] == 'i') { IRRef ref; @@ -254,7 +270,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...) } else if (e[1] == 'x') { p += sprintf(p, "%08x", va_arg(argp, int32_t)); } else { - lua_assert(0); + lj_assertA(0, "bad debug format code"); } fmt = e+2; } @@ -313,37 +329,51 @@ static Reg ra_rematk(ASMState *as, IRRef ref) Reg r; if (ra_iskref(ref)) { r = ra_krefreg(ref); - lua_assert(!rset_test(as->freeset, r)); + lj_assertA(!rset_test(as->freeset, r), "rematk of free reg %d", r); ra_free(as, r); ra_modified(as, r); +#if LJ_64 + emit_loadu64(as, r, ra_krefk(as, ref)); +#else emit_loadi(as, r, ra_krefk(as, ref)); +#endif return r; } ir = IR(ref); r = ir->r; - lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); + lj_assertA(ra_hasreg(r), "rematk of K%03d has no reg", REF_BIAS - ref); + lj_assertA(!ra_hasspill(ir->s), + "rematk of K%03d has spill slot [%x]", REF_BIAS - ref, ir->s); ra_free(as, r); ra_modified(as, r); ir->r = RID_INIT; /* Do not keep any hint. */ RA_DBGX((as, "remat $i $r", ir, r)); -#if !LJ_SOFTFP +#if !LJ_SOFTFP32 if (ir->o == IR_KNUM) { - emit_loadn(as, r, ir_knum(ir)); + emit_loadk64(as, r, ir); } else #endif if (emit_canremat(REF_BASE) && ir->o == IR_BASE) { ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */ emit_getgl(as, r, jit_base); } else if (emit_canremat(ASMREF_L) && ir->o == IR_KPRI) { - lua_assert(irt_isnil(ir->t)); /* REF_NIL stores ASMREF_L register. */ - emit_getgl(as, r, jit_L); + /* REF_NIL stores ASMREF_L register. */ + lj_assertA(irt_isnil(ir->t), "rematk of bad ASMREF_L"); + emit_getgl(as, r, cur_L); #if LJ_64 } else if (ir->o == IR_KINT64) { emit_loadu64(as, r, ir_kint64(ir)->u64); +#if LJ_GC64 + } else if (ir->o == IR_KGC) { + emit_loadu64(as, r, (uintptr_t)ir_kgc(ir)); + } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { + emit_loadu64(as, r, (uintptr_t)ir_kptr(ir)); +#endif #endif } else { - lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || - ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); + lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC || + ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL, + "rematk of bad IR op %d", ir->o); emit_loadi(as, r, ir->i); } return r; @@ -353,7 +383,8 @@ static Reg ra_rematk(ASMState *as, IRRef ref) static int32_t ra_spill(ASMState *as, IRIns *ir) { int32_t slot = ir->s; - lua_assert(ir >= as->ir + REF_TRUE); + lj_assertA(ir >= as->ir + REF_TRUE, + "spill of K%03d", REF_BIAS - (int)(ir - as->ir)); if (!ra_hasspill(slot)) { if (irt_is64(ir->t)) { slot = as->evenspill; @@ -378,7 +409,9 @@ static Reg ra_releasetmp(ASMState *as, IRRef ref) { IRIns *ir = IR(ref); Reg r = ir->r; - lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s)); + lj_assertA(ra_hasreg(r), "release of TMP%d has no reg", ref-ASMREF_TMP1+1); + lj_assertA(!ra_hasspill(ir->s), + "release of TMP%d has spill slot [%x]", ref-ASMREF_TMP1+1, ir->s); ra_free(as, r); ra_modified(as, r); ir->r = RID_INIT; @@ -394,7 +427,7 @@ static Reg ra_restore(ASMState *as, IRRef ref) IRIns *ir = IR(ref); int32_t ofs = ra_spill(as, ir); /* Force a spill slot. */ Reg r = ir->r; - lua_assert(ra_hasreg(r)); + lj_assertA(ra_hasreg(r), "restore of IR %04d has no reg", ref - REF_BIAS); ra_sethint(ir->r, r); /* Keep hint. */ ra_free(as, r); if (!rset_test(as->weakset, r)) { /* Only restore non-weak references. */ @@ -423,14 +456,15 @@ static Reg ra_evict(ASMState *as, RegSet allow) { IRRef ref; RegCost cost = ~(RegCost)0; - lua_assert(allow != RSET_EMPTY); + lj_assertA(allow != RSET_EMPTY, "evict from empty set"); if (RID_NUM_FPR == 0 || allow < RID2RSET(RID_MAX_GPR)) { GPRDEF(MINCOST) } else { FPRDEF(MINCOST) } ref = regcost_ref(cost); - lua_assert(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins)); + lj_assertA(ra_iskref(ref) || (ref >= as->T->nk && ref < as->T->nins), + "evict of out-of-range IR %04d", ref - REF_BIAS); /* Preferably pick any weak ref instead of a non-weak, non-const ref. */ if (!irref_isk(ref) && (as->weakset & allow)) { IRIns *ir = IR(ref); @@ -512,7 +546,7 @@ static void ra_evictk(ASMState *as) #ifdef RID_NUM_KREF /* Allocate a register for a constant. */ -static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow) { /* First try to find a register which already holds the same constant. */ RegSet pick, work = ~as->freeset & RSET_GPR; @@ -521,9 +555,31 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) IRRef ref; r = rset_pickbot(work); ref = regcost_ref(as->cost[r]); +#if LJ_64 + if (ref < ASMREF_L) { + if (ra_iskref(ref)) { + if (k == ra_krefk(as, ref)) + return r; + } else { + IRIns *ir = IR(ref); + if ((ir->o == IR_KINT64 && k == (int64_t)ir_kint64(ir)->u64) || +#if LJ_GC64 + (ir->o == IR_KINT && k == ir->i) || + (ir->o == IR_KGC && k == (intptr_t)ir_kgc(ir)) || + ((ir->o == IR_KPTR || ir->o == IR_KKPTR) && + k == (intptr_t)ir_kptr(ir)) +#else + (ir->o != IR_KINT64 && k == ir->i) +#endif + ) + return r; + } + } +#else if (ref < ASMREF_L && k == (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i)) return r; +#endif rset_clear(work, r); } pick = as->freeset & allow; @@ -543,7 +599,7 @@ static Reg ra_allock(ASMState *as, int32_t k, RegSet allow) } /* Allocate a specific register for a constant. */ -static void ra_allockreg(ASMState *as, int32_t k, Reg r) +static void ra_allockreg(ASMState *as, intptr_t k, Reg r) { Reg kr = ra_allock(as, k, RID2RSET(r)); if (kr != r) { @@ -566,7 +622,8 @@ static Reg ra_allocref(ASMState *as, IRRef ref, RegSet allow) IRIns *ir = IR(ref); RegSet pick = as->freeset & allow; Reg r; - lua_assert(ra_noreg(ir->r)); + lj_assertA(ra_noreg(ir->r), + "IR %04d already has reg %d", ref - REF_BIAS, ir->r); if (pick) { /* First check register hint from propagation or PHI. */ if (ra_hashint(ir->r)) { @@ -613,15 +670,27 @@ static Reg ra_alloc1(ASMState *as, IRRef ref, RegSet allow) return r; } +/* Add a register rename to the IR. */ +static void ra_addrename(ASMState *as, Reg down, IRRef ref, SnapNo snapno) +{ + IRRef ren; + lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, snapno); + ren = tref_ref(lj_ir_emit(as->J)); + as->J->cur.ir[ren].r = (uint8_t)down; + as->J->cur.ir[ren].s = SPS_NONE; +} + /* Rename register allocation and emit move. */ static void ra_rename(ASMState *as, Reg down, Reg up) { - IRRef ren, ref = regcost_ref(as->cost[up] = as->cost[down]); + IRRef ref = regcost_ref(as->cost[up] = as->cost[down]); IRIns *ir = IR(ref); ir->r = (uint8_t)up; as->cost[down] = 0; - lua_assert((down < RID_MAX_GPR) == (up < RID_MAX_GPR)); - lua_assert(!rset_test(as->freeset, down) && rset_test(as->freeset, up)); + lj_assertA((down < RID_MAX_GPR) == (up < RID_MAX_GPR), + "rename between GPR/FPR %d and %d", down, up); + lj_assertA(!rset_test(as->freeset, down), "rename from free reg %d", down); + lj_assertA(rset_test(as->freeset, up), "rename to non-free reg %d", up); ra_free(as, down); /* 'down' is free ... */ ra_modified(as, down); rset_clear(as->freeset, up); /* ... and 'up' is now allocated. */ @@ -629,11 +698,14 @@ static void ra_rename(ASMState *as, Reg down, Reg up) RA_DBGX((as, "rename $f $r $r", regcost_ref(as->cost[up]), down, up)); emit_movrr(as, ir, down, up); /* Backwards codegen needs inverse move. */ if (!ra_hasspill(IR(ref)->s)) { /* Add the rename to the IR. */ - lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), ref, as->snapno); - ren = tref_ref(lj_ir_emit(as->J)); - as->ir = as->T->ir; /* The IR may have been reallocated. */ - IR(ren)->r = (uint8_t)down; - IR(ren)->s = SPS_NONE; + /* + ** The rename is effective at the subsequent (already emitted) exit + ** branch. This is for the current snapshot (as->snapno). Except if we + ** haven't yet allocated any refs for the snapshot (as->snapalloc == 1), + ** then it belongs to the next snapshot. + ** See also the discussion at asm_snap_checkrename(). + */ + ra_addrename(as, down, ref, as->snapno + as->snapalloc); } } @@ -666,7 +738,7 @@ static void ra_destreg(ASMState *as, IRIns *ir, Reg r) { Reg dest = ra_dest(as, ir, RID2RSET(r)); if (dest != r) { - lua_assert(rset_test(as->freeset, r)); + lj_assertA(rset_test(as->freeset, r), "dest reg %d is not free", r); ra_modified(as, r); emit_movrr(as, ir, dest, r); } @@ -683,20 +755,25 @@ static void ra_left(ASMState *as, Reg dest, IRRef lref) if (ra_noreg(left)) { if (irref_isk(lref)) { if (ir->o == IR_KNUM) { - cTValue *tv = ir_knum(ir); /* FP remat needs a load except for +0. Still better than eviction. */ - if (tvispzero(tv) || !(as->freeset & RSET_FPR)) { - emit_loadn(as, dest, tv); + if (tvispzero(ir_knum(ir)) || !(as->freeset & RSET_FPR)) { + emit_loadk64(as, dest, ir); return; } #if LJ_64 } else if (ir->o == IR_KINT64) { - emit_loadu64(as, dest, ir_kint64(ir)->u64); + emit_loadk64(as, dest, ir); + return; +#if LJ_GC64 + } else if (ir->o == IR_KGC || ir->o == IR_KPTR || ir->o == IR_KKPTR) { + emit_loadk64(as, dest, ir); return; #endif - } else { - lua_assert(ir->o == IR_KINT || ir->o == IR_KGC || - ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL); +#endif + } else if (ir->o != IR_KPRI) { + lj_assertA(ir->o == IR_KINT || ir->o == IR_KGC || + ir->o == IR_KPTR || ir->o == IR_KKPTR || ir->o == IR_KNULL, + "K%03d has bad IR op %d", REF_BIAS - lref, ir->o); emit_loadi(as, dest, ir->i); return; } @@ -741,11 +818,11 @@ static void ra_leftov(ASMState *as, Reg dest, IRRef lref) } #endif -#if !LJ_64 /* Force a RID_RETLO/RID_RETHI destination register pair (marked as free). */ static void ra_destpair(ASMState *as, IRIns *ir) { Reg destlo = ir->r, desthi = (ir+1)->r; + IRIns *irx = (LJ_64 && !irt_is64(ir->t)) ? ir+1 : ir; /* First spill unrelated refs blocking the destination registers. */ if (!rset_test(as->freeset, RID_RETLO) && destlo != RID_RETLO && desthi != RID_RETLO) @@ -769,29 +846,29 @@ static void ra_destpair(ASMState *as, IRIns *ir) /* Check for conflicts and shuffle the registers as needed. */ if (destlo == RID_RETHI) { if (desthi == RID_RETLO) { -#if LJ_TARGET_X86 +#if LJ_TARGET_X86ORX64 *--as->mcp = XI_XCHGa + RID_RETHI; + if (LJ_64 && irt_is64(irx->t)) *--as->mcp = 0x48; #else - emit_movrr(as, ir, RID_RETHI, RID_TMP); - emit_movrr(as, ir, RID_RETLO, RID_RETHI); - emit_movrr(as, ir, RID_TMP, RID_RETLO); + emit_movrr(as, irx, RID_RETHI, RID_TMP); + emit_movrr(as, irx, RID_RETLO, RID_RETHI); + emit_movrr(as, irx, RID_TMP, RID_RETLO); #endif } else { - emit_movrr(as, ir, RID_RETHI, RID_RETLO); - if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); + emit_movrr(as, irx, RID_RETHI, RID_RETLO); + if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI); } } else if (desthi == RID_RETLO) { - emit_movrr(as, ir, RID_RETLO, RID_RETHI); - if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); + emit_movrr(as, irx, RID_RETLO, RID_RETHI); + if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO); } else { - if (desthi != RID_RETHI) emit_movrr(as, ir, desthi, RID_RETHI); - if (destlo != RID_RETLO) emit_movrr(as, ir, destlo, RID_RETLO); + if (desthi != RID_RETHI) emit_movrr(as, irx, desthi, RID_RETHI); + if (destlo != RID_RETLO) emit_movrr(as, irx, destlo, RID_RETLO); } /* Restore spill slots (if any). */ if (ra_hasspill((ir+1)->s)) ra_save(as, ir+1, RID_RETHI); if (ra_hasspill(ir->s)) ra_save(as, ir, RID_RETLO); } -#endif /* -- Snapshot handling --------- ----------------------------------------- */ @@ -841,11 +918,14 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref) #endif { /* Allocate stored values for TNEW, TDUP and CNEW. */ IRIns *irs; - lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW); + lj_assertA(ir->o == IR_TNEW || ir->o == IR_TDUP || ir->o == IR_CNEW, + "sink of IR %04d has bad op %d", ref - REF_BIAS, ir->o); for (irs = IR(as->snapref-1); irs > ir; irs--) if (irs->r == RID_SINK && asm_sunk_store(as, ir, irs)) { - lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || - irs->o == IR_FSTORE || irs->o == IR_XSTORE); + lj_assertA(irs->o == IR_ASTORE || irs->o == IR_HSTORE || + irs->o == IR_FSTORE || irs->o == IR_XSTORE, + "sunk store IR %04d has bad op %d", + (int)(irs - as->ir) - REF_BIAS, irs->o); asm_snap_alloc1(as, irs->op2); if (LJ_32 && (irs+1)->o == IR_HIOP) asm_snap_alloc1(as, (irs+1)->op2); @@ -881,9 +961,9 @@ static void asm_snap_alloc1(ASMState *as, IRRef ref) } /* Allocate refs escaping to a snapshot. */ -static void asm_snap_alloc(ASMState *as) +static void asm_snap_alloc(ASMState *as, int snapno) { - SnapShot *snap = &as->T->snap[as->snapno]; + SnapShot *snap = &as->T->snap[snapno]; SnapEntry *map = &as->T->snapmap[snap->mapofs]; MSize n, nent = snap->nent; as->snapfilt1 = as->snapfilt2 = 0; @@ -893,7 +973,9 @@ static void asm_snap_alloc(ASMState *as) if (!irref_isk(ref)) { asm_snap_alloc1(as, ref); if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) { - lua_assert(irt_type(IR(ref+1)->t) == IRT_SOFTFP); + lj_assertA(irt_type(IR(ref+1)->t) == IRT_SOFTFP, + "snap %d[%d] points to bad SOFTFP IR %04d", + snapno, n, ref - REF_BIAS); asm_snap_alloc1(as, ref+1); } } @@ -919,67 +1001,55 @@ static int asm_snap_checkrename(ASMState *as, IRRef ren) return 0; /* Not found. */ } -/* Prepare snapshot for next guard instruction. */ +/* Prepare snapshot for next guard or throwing instruction. */ static void asm_snap_prep(ASMState *as) { - if (as->curins < as->snapref) { - do { - if (as->snapno == 0) return; /* Called by sunk stores before snap #0. */ - as->snapno--; - as->snapref = as->T->snap[as->snapno].ref; - } while (as->curins < as->snapref); - asm_snap_alloc(as); + if (as->snapalloc) { + /* Alloc on first invocation for each snapshot. */ + as->snapalloc = 0; + asm_snap_alloc(as, as->snapno); as->snaprename = as->T->nins; } else { - /* Process any renames above the highwater mark. */ + /* Check any renames above the highwater mark. */ for (; as->snaprename < as->T->nins; as->snaprename++) { - IRIns *ir = IR(as->snaprename); + IRIns *ir = &as->T->ir[as->snaprename]; if (asm_snap_checkrename(as, ir->op1)) ir->op2 = REF_BIAS-1; /* Kill rename. */ } } } -/* -- Miscellaneous helpers ----------------------------------------------- */ - -/* Collect arguments from CALL* and CARG instructions. */ -static void asm_collectargs(ASMState *as, IRIns *ir, - const CCallInfo *ci, IRRef *args) +/* Move to previous snapshot when we cross the current snapshot ref. */ +static void asm_snap_prev(ASMState *as) { - uint32_t n = CCI_NARGS(ci); - lua_assert(n <= CCI_NARGS_MAX*2); /* Account for split args. */ - if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } - while (n-- > 1) { - ir = IR(ir->op1); - lua_assert(ir->o == IR_CARG); - args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; + if (as->curins < as->snapref) { + uintptr_t ofs = (uintptr_t)(as->mctoporig - as->mcp); + if (ofs >= 0x10000) lj_trace_err(as->J, LJ_TRERR_MCODEOV); + do { + if (as->snapno == 0) return; + as->snapno--; + as->snapref = as->T->snap[as->snapno].ref; + as->T->snap[as->snapno].mcofs = (uint16_t)ofs; /* Remember mcode ofs. */ + } while (as->curins < as->snapref); /* May have no ins inbetween. */ + as->snapalloc = 1; } - args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; - lua_assert(IR(ir->op1)->o != IR_CARG); } -/* Reconstruct CCallInfo flags for CALLX*. */ -static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) +/* Fixup snapshot mcode offsetst. */ +static void asm_snap_fixup_mcofs(ASMState *as) { - uint32_t nargs = 0; - if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ - IRIns *ira = IR(ir->op1); - nargs++; - while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } + uint32_t sz = (uint32_t)(as->mctoporig - as->mcp); + SnapShot *snap = as->T->snap; + SnapNo i; + for (i = as->T->nsnap-1; i > 0; i--) { + /* Compute offset from mcode start and store in correct snapshot. */ + snap[i].mcofs = (uint16_t)(sz - snap[i-1].mcofs); } -#if LJ_HASFFI - if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */ - CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i; - CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id); - nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0); -#if LJ_TARGET_X86 - nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT); -#endif - } -#endif - return (nargs | (ir->t.irt << CCI_OTSHIFT)); + snap[0].mcofs = 0; } +/* -- Miscellaneous helpers ----------------------------------------------- */ + /* Calculate stack adjustment. */ static int32_t asm_stack_adjust(ASMState *as) { @@ -989,21 +1059,26 @@ static int32_t asm_stack_adjust(ASMState *as) } /* Must match with hash*() in lj_tab.c. */ -static uint32_t ir_khash(IRIns *ir) +static uint32_t ir_khash(ASMState *as, IRIns *ir) { uint32_t lo, hi; + UNUSED(as); if (irt_isstr(ir->t)) { - return ir_kstr(ir)->hash; + return ir_kstr(ir)->sid; } else if (irt_isnum(ir->t)) { lo = ir_knum(ir)->u32.lo; hi = ir_knum(ir)->u32.hi << 1; } else if (irt_ispri(ir->t)) { - lua_assert(!irt_isnil(ir->t)); + lj_assertA(!irt_isnil(ir->t), "hash of nil key"); return irt_type(ir->t)-IRT_FALSE; } else { - lua_assert(irt_isgcv(ir->t)); + lj_assertA(irt_isgcv(ir->t), "hash of bad IR type %d", irt_type(ir->t)); lo = u32ptr(ir_kgc(ir)); +#if LJ_GC64 + hi = (uint32_t)(u64ptr(ir_kgc(ir)) >> 32) | (irt_toitype(ir->t) << 15); +#else hi = lo + HASH_BIAS; +#endif } return hashrot(lo, hi); } @@ -1017,6 +1092,7 @@ static void asm_snew(ASMState *as, IRIns *ir) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new]; IRRef args[3]; + asm_snap_prep(as); args[0] = ASMREF_L; /* lua_State *L */ args[1] = ir->op1; /* const char *str */ args[2] = ir->op2; /* size_t len */ @@ -1029,6 +1105,7 @@ static void asm_tnew(ASMState *as, IRIns *ir) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1]; IRRef args[2]; + asm_snap_prep(as); args[0] = ASMREF_L; /* lua_State *L */ args[1] = ASMREF_TMP1; /* uint32_t ahsize */ as->gcsteps++; @@ -1041,6 +1118,7 @@ static void asm_tdup(ASMState *as, IRIns *ir) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup]; IRRef args[2]; + asm_snap_prep(as); args[0] = ASMREF_L; /* lua_State *L */ args[1] = ir->op1; /* const GCtab *kt */ as->gcsteps++; @@ -1064,6 +1142,260 @@ static void asm_gcstep(ASMState *as, IRIns *ir) as->gcsteps = 0x80000000; /* Prevent implicit GC check further up. */ } +/* -- Buffer operations --------------------------------------------------- */ + +static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode); +#if LJ_HASBUFFER +static void asm_bufhdr_write(ASMState *as, Reg sb); +#endif + +static void asm_bufhdr(ASMState *as, IRIns *ir) +{ + Reg sb = ra_dest(as, ir, RSET_GPR); + switch (ir->op2) { + case IRBUFHDR_RESET: { + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); + IRIns irbp; + irbp.ot = IRT(0, IRT_PTR); /* Buffer data pointer type. */ + emit_storeofs(as, &irbp, tmp, sb, offsetof(SBuf, w)); + emit_loadofs(as, &irbp, tmp, sb, offsetof(SBuf, b)); + break; + } + case IRBUFHDR_APPEND: { + /* Rematerialize const buffer pointer instead of likely spill. */ + IRIns *irp = IR(ir->op1); + if (!(ra_hasreg(irp->r) || irp == ir-1 || + (irp == ir-2 && !ra_used(ir-1)))) { + while (!(irp->o == IR_BUFHDR && irp->op2 == IRBUFHDR_RESET)) + irp = IR(irp->op1); + if (irref_isk(irp->op1)) { + ra_weak(as, ra_allocref(as, ir->op1, RSET_GPR)); + ir = irp; + } + } + break; + } +#if LJ_HASBUFFER + case IRBUFHDR_WRITE: + asm_bufhdr_write(as, sb); + break; +#endif + default: lj_assertA(0, "bad BUFHDR op2 %d", ir->op2); break; + } +#if LJ_TARGET_X86ORX64 + ra_left(as, sb, ir->op1); +#else + ra_leftov(as, sb, ir->op1); +#endif +} + +static void asm_bufput(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_putstr]; + IRRef args[3]; + IRIns *irs; + int kchar = -129; + args[0] = ir->op1; /* SBuf * */ + args[1] = ir->op2; /* GCstr * */ + irs = IR(ir->op2); + lj_assertA(irt_isstr(irs->t), + "BUFPUT of non-string IR %04d", ir->op2 - REF_BIAS); + if (irs->o == IR_KGC) { + GCstr *s = ir_kstr(irs); + if (s->len == 1) { /* Optimize put of single-char string constant. */ + kchar = (int8_t)strdata(s)[0]; /* Signed! */ + args[1] = ASMREF_TMP1; /* int, truncated to char */ + ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; + } + } else if (mayfuse(as, ir->op2) && ra_noreg(irs->r)) { + if (irs->o == IR_TOSTR) { /* Fuse number to string conversions. */ + if (irs->op2 == IRTOSTR_NUM) { + args[1] = ASMREF_TMP1; /* TValue * */ + ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putnum]; + } else { + lj_assertA(irt_isinteger(IR(irs->op1)->t), + "TOSTR of non-numeric IR %04d", irs->op1); + args[1] = irs->op1; /* int */ + if (irs->op2 == IRTOSTR_INT) + ci = &lj_ir_callinfo[IRCALL_lj_strfmt_putint]; + else + ci = &lj_ir_callinfo[IRCALL_lj_buf_putchar]; + } + } else if (irs->o == IR_SNEW) { /* Fuse string allocation. */ + args[1] = irs->op1; /* const void * */ + args[2] = irs->op2; /* MSize */ + ci = &lj_ir_callinfo[IRCALL_lj_buf_putmem]; + } + } + asm_setupresult(as, ir, ci); /* SBuf * */ + asm_gencall(as, ci, args); + if (args[1] == ASMREF_TMP1) { + Reg tmp = ra_releasetmp(as, ASMREF_TMP1); + if (kchar == -129) + asm_tvptr(as, tmp, irs->op1, IRTMPREF_IN1); + else + ra_allockreg(as, kchar, tmp); + } +} + +static void asm_bufstr(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_buf_tostr]; + IRRef args[1]; + args[0] = ir->op1; /* SBuf *sb */ + as->gcsteps++; + asm_setupresult(as, ir, ci); /* GCstr * */ + asm_gencall(as, ci, args); +} + +/* -- Type conversions ---------------------------------------------------- */ + +static void asm_tostr(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci; + IRRef args[2]; + asm_snap_prep(as); + args[0] = ASMREF_L; + as->gcsteps++; + if (ir->op2 == IRTOSTR_NUM) { + args[1] = ASMREF_TMP1; /* cTValue * */ + ci = &lj_ir_callinfo[IRCALL_lj_strfmt_num]; + } else { + args[1] = ir->op1; /* int32_t k */ + if (ir->op2 == IRTOSTR_INT) + ci = &lj_ir_callinfo[IRCALL_lj_strfmt_int]; + else + ci = &lj_ir_callinfo[IRCALL_lj_strfmt_char]; + } + asm_setupresult(as, ir, ci); /* GCstr * */ + asm_gencall(as, ci, args); + if (ir->op2 == IRTOSTR_NUM) + asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1, IRTMPREF_IN1); +} + +#if LJ_32 && LJ_HASFFI && !LJ_SOFTFP && !LJ_TARGET_X86 +static void asm_conv64(ASMState *as, IRIns *ir) +{ + IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); + IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); + IRCallID id; + IRRef args[2]; + lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP, + "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS); + args[LJ_BE] = (ir-1)->op1; + args[LJ_LE] = ir->op1; + if (st == IRT_NUM || st == IRT_FLOAT) { + id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); + ir--; + } else { + id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); + } + { +#if LJ_TARGET_ARM && !LJ_ABI_SOFTFP + CCallInfo cim = lj_ir_callinfo[id], *ci = &cim; + cim.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ +#else + const CCallInfo *ci = &lj_ir_callinfo[id]; +#endif + asm_setupresult(as, ir, ci); + asm_gencall(as, ci, args); + } +} +#endif + +/* -- Memory references --------------------------------------------------- */ + +static void asm_newref(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; + IRRef args[3]; + if (ir->r == RID_SINK) + return; + asm_snap_prep(as); + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* GCtab *t */ + args[2] = ASMREF_TMP1; /* cTValue *key */ + asm_setupresult(as, ir, ci); /* TValue * */ + asm_gencall(as, ci, args); + asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2, IRTMPREF_IN1); +} + +static void asm_tmpref(ASMState *as, IRIns *ir) +{ + Reg r = ra_dest(as, ir, RSET_GPR); + asm_tvptr(as, r, ir->op1, ir->op2); +} + +static void asm_lref(ASMState *as, IRIns *ir) +{ + Reg r = ra_dest(as, ir, RSET_GPR); +#if LJ_TARGET_X86ORX64 + ra_left(as, r, ASMREF_L); +#else + ra_leftov(as, r, ASMREF_L); +#endif +} + +/* -- Calls --------------------------------------------------------------- */ + +/* Collect arguments from CALL* and CARG instructions. */ +static void asm_collectargs(ASMState *as, IRIns *ir, + const CCallInfo *ci, IRRef *args) +{ + uint32_t n = CCI_XNARGS(ci); + /* Account for split args. */ + lj_assertA(n <= CCI_NARGS_MAX*2, "too many args %d to collect", n); + if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; } + while (n-- > 1) { + ir = IR(ir->op1); + lj_assertA(ir->o == IR_CARG, "malformed CALL arg tree"); + args[n] = ir->op2 == REF_NIL ? 0 : ir->op2; + } + args[0] = ir->op1 == REF_NIL ? 0 : ir->op1; + lj_assertA(IR(ir->op1)->o != IR_CARG, "malformed CALL arg tree"); +} + +/* Reconstruct CCallInfo flags for CALLX*. */ +static uint32_t asm_callx_flags(ASMState *as, IRIns *ir) +{ + uint32_t nargs = 0; + if (ir->op1 != REF_NIL) { /* Count number of arguments first. */ + IRIns *ira = IR(ir->op1); + nargs++; + while (ira->o == IR_CARG) { nargs++; ira = IR(ira->op1); } + } +#if LJ_HASFFI + if (IR(ir->op2)->o == IR_CARG) { /* Copy calling convention info. */ + CTypeID id = (CTypeID)IR(IR(ir->op2)->op2)->i; + CType *ct = ctype_get(ctype_ctsG(J2G(as->J)), id); + nargs |= ((ct->info & CTF_VARARG) ? CCI_VARARG : 0); +#if LJ_TARGET_X86 + nargs |= (ctype_cconv(ct->info) << CCI_CC_SHIFT); +#endif + } +#endif + return (nargs | (ir->t.irt << CCI_OTSHIFT)); +} + +static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) +{ + const CCallInfo *ci = &lj_ir_callinfo[id]; + IRRef args[2]; + args[0] = ir->op1; + args[1] = ir->op2; + asm_setupresult(as, ir, ci); + asm_gencall(as, ci, args); +} + +static void asm_call(ASMState *as, IRIns *ir) +{ + IRRef args[CCI_NARGS_MAX]; + const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; + asm_collectargs(as, ir, ci, args); + asm_setupresult(as, ir, ci); + asm_gencall(as, ci, args); +} + /* -- PHI and loop handling ----------------------------------------------- */ /* Break a PHI cycle by renaming to a free register (evict if needed). */ @@ -1249,12 +1581,7 @@ static void asm_phi_fixup(ASMState *as) irt_clearmark(ir->t); /* Left PHI gained a spill slot before the loop? */ if (ra_hasspill(ir->s)) { - IRRef ren; - lj_ir_set(as->J, IRT(IR_RENAME, IRT_NIL), lref, as->loopsnapno); - ren = tref_ref(lj_ir_emit(as->J)); - as->ir = as->T->ir; /* The IR may have been reallocated. */ - IR(ren)->r = (uint8_t)r; - IR(ren)->s = SPS_NONE; + ra_addrename(as, r, lref, as->loopsnapno); } } rset_clear(work, r); @@ -1329,6 +1656,8 @@ static void asm_loop(ASMState *as) #include "lj_asm_x86.h" #elif LJ_TARGET_ARM #include "lj_asm_arm.h" +#elif LJ_TARGET_ARM64 +#include "lj_asm_arm64.h" #elif LJ_TARGET_PPC #include "lj_asm_ppc.h" #elif LJ_TARGET_MIPS @@ -1337,6 +1666,200 @@ static void asm_loop(ASMState *as) #error "Missing assembler for target CPU" #endif +/* -- Common instruction helpers ------------------------------------------ */ + +#if !LJ_SOFTFP32 +#if !LJ_TARGET_X86ORX64 +#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) +#endif + +static void asm_pow(ASMState *as, IRIns *ir) +{ +#if LJ_64 && LJ_HASFFI + if (!irt_isnum(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : + IRCALL_lj_carith_powu64); + else +#endif + asm_callid(as, ir, IRCALL_pow); +} + +static void asm_div(ASMState *as, IRIns *ir) +{ +#if LJ_64 && LJ_HASFFI + if (!irt_isnum(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : + IRCALL_lj_carith_divu64); + else +#endif + asm_fpdiv(as, ir); +} +#endif + +static void asm_mod(ASMState *as, IRIns *ir) +{ +#if LJ_64 && LJ_HASFFI + if (!irt_isint(ir->t)) + asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : + IRCALL_lj_carith_modu64); + else +#endif + asm_callid(as, ir, IRCALL_lj_vm_modi); +} + +static void asm_fuseequal(ASMState *as, IRIns *ir) +{ + /* Fuse HREF + EQ/NE. */ + if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { + as->curins--; + asm_href(as, ir-1, (IROp)ir->o); + } else { + asm_equal(as, ir); + } +} + +static void asm_alen(ASMState *as, IRIns *ir) +{ + asm_callid(as, ir, ir->op2 == REF_NIL ? IRCALL_lj_tab_len : + IRCALL_lj_tab_len_hint); +} + +/* -- Instruction dispatch ------------------------------------------------ */ + +/* Assemble a single instruction. */ +static void asm_ir(ASMState *as, IRIns *ir) +{ + switch ((IROp)ir->o) { + /* Miscellaneous ops. */ + case IR_LOOP: asm_loop(as); break; + case IR_NOP: case IR_XBAR: + lj_assertA(!ra_used(ir), + "IR %04d not unused", (int)(ir - as->ir) - REF_BIAS); + break; + case IR_USE: + ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; + case IR_PHI: asm_phi(as, ir); break; + case IR_HIOP: asm_hiop(as, ir); break; + case IR_GCSTEP: asm_gcstep(as, ir); break; + case IR_PROF: asm_prof(as, ir); break; + + /* Guarded assertions. */ + case IR_LT: case IR_GE: case IR_LE: case IR_GT: + case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: + case IR_ABC: + asm_comp(as, ir); + break; + case IR_EQ: case IR_NE: asm_fuseequal(as, ir); break; + + case IR_RETF: asm_retf(as, ir); break; + + /* Bit ops. */ + case IR_BNOT: asm_bnot(as, ir); break; + case IR_BSWAP: asm_bswap(as, ir); break; + case IR_BAND: asm_band(as, ir); break; + case IR_BOR: asm_bor(as, ir); break; + case IR_BXOR: asm_bxor(as, ir); break; + case IR_BSHL: asm_bshl(as, ir); break; + case IR_BSHR: asm_bshr(as, ir); break; + case IR_BSAR: asm_bsar(as, ir); break; + case IR_BROL: asm_brol(as, ir); break; + case IR_BROR: asm_bror(as, ir); break; + + /* Arithmetic ops. */ + case IR_ADD: asm_add(as, ir); break; + case IR_SUB: asm_sub(as, ir); break; + case IR_MUL: asm_mul(as, ir); break; + case IR_MOD: asm_mod(as, ir); break; + case IR_NEG: asm_neg(as, ir); break; +#if LJ_SOFTFP32 + case IR_DIV: case IR_POW: case IR_ABS: + case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: + /* Unused for LJ_SOFTFP32. */ + lj_assertA(0, "IR %04d with unused op %d", + (int)(ir - as->ir) - REF_BIAS, ir->o); + break; +#else + case IR_DIV: asm_div(as, ir); break; + case IR_POW: asm_pow(as, ir); break; + case IR_ABS: asm_abs(as, ir); break; + case IR_LDEXP: asm_ldexp(as, ir); break; + case IR_FPMATH: asm_fpmath(as, ir); break; + case IR_TOBIT: asm_tobit(as, ir); break; +#endif + case IR_MIN: asm_min(as, ir); break; + case IR_MAX: asm_max(as, ir); break; + + /* Overflow-checking arithmetic ops. */ + case IR_ADDOV: asm_addov(as, ir); break; + case IR_SUBOV: asm_subov(as, ir); break; + case IR_MULOV: asm_mulov(as, ir); break; + + /* Memory references. */ + case IR_AREF: asm_aref(as, ir); break; + case IR_HREF: asm_href(as, ir, 0); break; + case IR_HREFK: asm_hrefk(as, ir); break; + case IR_NEWREF: asm_newref(as, ir); break; + case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; + case IR_FREF: asm_fref(as, ir); break; + case IR_TMPREF: asm_tmpref(as, ir); break; + case IR_STRREF: asm_strref(as, ir); break; + case IR_LREF: asm_lref(as, ir); break; + + /* Loads and stores. */ + case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: + asm_ahuvload(as, ir); + break; + case IR_FLOAD: asm_fload(as, ir); break; + case IR_XLOAD: asm_xload(as, ir); break; + case IR_SLOAD: asm_sload(as, ir); break; + case IR_ALEN: asm_alen(as, ir); break; + + case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; + case IR_FSTORE: asm_fstore(as, ir); break; + case IR_XSTORE: asm_xstore(as, ir); break; + + /* Allocations. */ + case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; + case IR_TNEW: asm_tnew(as, ir); break; + case IR_TDUP: asm_tdup(as, ir); break; + case IR_CNEW: case IR_CNEWI: +#if LJ_HASFFI + asm_cnew(as, ir); +#else + lj_assertA(0, "IR %04d with unused op %d", + (int)(ir - as->ir) - REF_BIAS, ir->o); +#endif + break; + + /* Buffer operations. */ + case IR_BUFHDR: asm_bufhdr(as, ir); break; + case IR_BUFPUT: asm_bufput(as, ir); break; + case IR_BUFSTR: asm_bufstr(as, ir); break; + + /* Write barriers. */ + case IR_TBAR: asm_tbar(as, ir); break; + case IR_OBAR: asm_obar(as, ir); break; + + /* Type conversions. */ + case IR_CONV: asm_conv(as, ir); break; + case IR_TOSTR: asm_tostr(as, ir); break; + case IR_STRTO: asm_strto(as, ir); break; + + /* Calls. */ + case IR_CALLA: + as->gcsteps++; + /* fallthrough */ + case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; + case IR_CALLXS: asm_callx(as, ir); break; + case IR_CARG: break; + + default: + setintV(&as->J->errinfo, ir->o); + lj_trace_err_info(as->J, LJ_TRERR_NYIIR); + break; + } +} + /* -- Head of trace ------------------------------------------------------- */ /* Head of a root trace. */ @@ -1373,8 +1896,7 @@ static void asm_head_side(ASMState *as) if (as->snapno && as->topslot > as->parent->topslot) { /* Force snap #0 alloc to prevent register overwrite in stack check. */ - as->snapno = 0; - asm_snap_alloc(as); + asm_snap_alloc(as, 0); } allow = asm_head_side_base(as, irp, allow); @@ -1382,8 +1904,10 @@ static void asm_head_side(ASMState *as) for (i = as->stopins; i > REF_BASE; i--) { IRIns *ir = IR(i); RegSP rs; - lua_assert((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || - (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL); + lj_assertA((ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_PARENT)) || + (LJ_SOFTFP && ir->o == IR_HIOP) || ir->o == IR_PVAL, + "IR %04d has bad parent op %d", + (int)(ir - as->ir) - REF_BIAS, ir->o); rs = as->parentmap[i - REF_FIRST]; if (ra_hasreg(ir->r)) { rset_clear(allow, ir->r); @@ -1535,7 +2059,7 @@ static BCReg asm_baseslot(ASMState *as, SnapShot *snap, int *gotframe) SnapEntry sn = map[n-1]; if ((sn & SNAP_FRAME)) { *gotframe = 1; - return snap_slot(sn); + return snap_slot(sn) - LJ_FR2; } } return 0; @@ -1555,19 +2079,23 @@ static void asm_tail_link(ASMState *as) if (as->T->link == 0) { /* Setup fixed registers for exit to interpreter. */ - const BCIns *pc = snap_pc(as->T->snapmap[snap->mapofs + snap->nent]); + const BCIns *pc = snap_pc(&as->T->snapmap[snap->mapofs + snap->nent]); int32_t mres; if (bc_op(*pc) == BC_JLOOP) { /* NYI: find a better way to do this. */ BCIns *retpc = &traceref(as->J, bc_d(*pc))->startins; if (bc_isret(bc_op(*retpc))) pc = retpc; } +#if LJ_GC64 + emit_loadu64(as, RID_LPC, u64ptr(pc)); +#else ra_allockreg(as, i32ptr(J2GG(as->J)->dispatch), RID_DISPATCH); ra_allockreg(as, i32ptr(pc), RID_LPC); - mres = (int32_t)(snap->nslots - baseslot); +#endif + mres = (int32_t)(snap->nslots - baseslot - LJ_FR2); switch (bc_op(*pc)) { case BC_CALLM: case BC_CALLMT: - mres -= (int32_t)(1 + bc_a(*pc) + bc_c(*pc)); break; + mres -= (int32_t)(1 + LJ_FR2 + bc_a(*pc) + bc_c(*pc)); break; case BC_RETM: mres -= (int32_t)(bc_a(*pc) + bc_d(*pc)); break; case BC_TSETM: mres -= (int32_t)bc_a(*pc); break; default: if (bc_op(*pc) < BC_FUNCF) mres = 0; break; @@ -1579,6 +2107,11 @@ static void asm_tail_link(ASMState *as) } emit_addptr(as, RID_BASE, 8*(int32_t)baseslot); + if (as->J->ktrace) { /* Patch ktrace slot with the final GCtrace pointer. */ + setgcref(IR(as->J->ktrace)[LJ_GC64].gcr, obj2gco(as->J->curfinal)); + IR(as->J->ktrace)->o = IR_KGC; + } + /* Sync the interpreter state with the on-trace state. */ asm_stack_restore(as, snap); @@ -1602,22 +2135,32 @@ static void asm_setup_regsp(ASMState *as) #endif ra_setup(as); +#if LJ_TARGET_ARM64 + ra_setkref(as, RID_GL, (intptr_t)J2G(as->J)); +#endif /* Clear reg/sp for constants. */ - for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) + for (ir = IR(T->nk), lastir = IR(REF_BASE); ir < lastir; ir++) { ir->prev = REGSP_INIT; + if (irt_is64(ir->t) && ir->o != IR_KNULL) { +#if LJ_GC64 + /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ + ir->i = 0; /* Will become non-zero only for RIP-relative addresses. */ +#else + /* Make life easier for backends by putting address of constant in i. */ + ir->i = (int32_t)(intptr_t)(ir+1); +#endif + ir++; + } + } /* REF_BASE is used for implicit references to the BASE register. */ lastir->prev = REGSP_HINT(RID_BASE); - ir = IR(nins-1); - if (ir->o == IR_RENAME) { - do { ir--; nins--; } while (ir->o == IR_RENAME); - T->nins = nins; /* Remove any renames left over from ASM restart. */ - } as->snaprename = nins; as->snapref = nins; as->snapno = T->nsnap; + as->snapalloc = 0; as->stopins = REF_BASE; as->orignins = nins; @@ -1627,7 +2170,7 @@ static void asm_setup_regsp(ASMState *as) ir = IR(REF_FIRST); if (as->parent) { uint16_t *p; - lastir = lj_snap_regspmap(as->parent, as->J->exitno, ir); + lastir = lj_snap_regspmap(as->J, as->parent, as->J->exitno, ir); if (lastir - ir > LJ_MAX_JSLOTS) lj_trace_err(as->J, LJ_TRERR_NYICOAL); as->stopins = (IRRef)((lastir-1) - as->ir); @@ -1666,6 +2209,10 @@ static void asm_setup_regsp(ASMState *as) ir->prev = (uint16_t)REGSP_HINT((rload & 15)); rload = lj_ror(rload, 4); continue; + case IR_TMPREF: + if ((ir->op2 & IRTMPREF_OUT2) && as->evenspill < 4) + as->evenspill = 4; /* TMPREF OUT2 needs two TValues on the stack. */ + break; #endif case IR_CALLXS: { CCallInfo ci; @@ -1675,7 +2222,17 @@ static void asm_setup_regsp(ASMState *as) as->modset |= RSET_SCRATCH; continue; } - case IR_CALLN: case IR_CALLL: case IR_CALLS: { + case IR_CALLL: + /* lj_vm_next needs two TValues on the stack. */ +#if LJ_TARGET_X64 && LJ_ABI_WIN + if (ir->op2 == IRCALL_lj_vm_next && as->evenspill < SPS_FIRST + 4) + as->evenspill = SPS_FIRST + 4; +#else + if (SPS_FIRST < 4 && ir->op2 == IRCALL_lj_vm_next && as->evenspill < 4) + as->evenspill = 4; +#endif + /* fallthrough */ + case IR_CALLN: case IR_CALLA: case IR_CALLS: { const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; ir->prev = asm_setup_call_slots(as, ir, ci); if (inloop) @@ -1683,7 +2240,6 @@ static void asm_setup_regsp(ASMState *as) (RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH; continue; } -#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) case IR_HIOP: switch ((ir-1)->o) { #if LJ_SOFTFP && LJ_TARGET_ARM @@ -1694,15 +2250,15 @@ static void asm_setup_regsp(ASMState *as) } break; #endif -#if !LJ_SOFTFP && LJ_NEED_FP64 +#if !LJ_SOFTFP && LJ_NEED_FP64 && LJ_32 && LJ_HASFFI case IR_CONV: if (irt_isfp((ir-1)->t)) { ir->prev = REGSP_HINT(RID_FPRET); continue; } - /* fallthrough */ #endif - case IR_CALLN: case IR_CALLXS: + /* fallthrough */ + case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: #if LJ_SOFTFP case IR_MIN: case IR_MAX: #endif @@ -1713,18 +2269,29 @@ static void asm_setup_regsp(ASMState *as) break; } break; -#endif #if LJ_SOFTFP case IR_MIN: case IR_MAX: if ((ir+1)->o != IR_HIOP) break; #endif /* fallthrough */ /* C calls evict all scratch regs and return results in RID_RET. */ - case IR_SNEW: case IR_XSNEW: case IR_NEWREF: + case IR_SNEW: case IR_XSNEW: case IR_NEWREF: case IR_BUFPUT: if (REGARG_NUMGPR < 3 && as->evenspill < 3) as->evenspill = 3; /* lj_str_new and lj_tab_newkey need 3 args. */ +#if LJ_TARGET_X86 && LJ_HASFFI + if (0) { + case IR_CNEW: + if (ir->op2 != REF_NIL && as->evenspill < 4) + as->evenspill = 4; /* lj_cdata_newv needs 4 args. */ + } /* fallthrough */ - case IR_TNEW: case IR_TDUP: case IR_CNEW: case IR_CNEWI: case IR_TOSTR: +#else + /* fallthrough */ + case IR_CNEW: +#endif + /* fallthrough */ + case IR_TNEW: case IR_TDUP: case IR_CNEWI: case IR_TOSTR: + case IR_BUFSTR: ir->prev = REGSP_HINT(RID_RET); if (inloop) as->modset = RSET_SCRATCH; @@ -1733,58 +2300,73 @@ static void asm_setup_regsp(ASMState *as) if (inloop) as->modset = RSET_SCRATCH; break; -#if !LJ_TARGET_X86ORX64 && !LJ_SOFTFP - case IR_ATAN2: case IR_LDEXP: +#if !LJ_SOFTFP +#if !LJ_TARGET_X86ORX64 + case IR_LDEXP: #endif +#endif + /* fallthrough */ case IR_POW: if (!LJ_SOFTFP && irt_isnum(ir->t)) { -#if LJ_TARGET_X86ORX64 - ir->prev = REGSP_HINT(RID_XMM0); if (inloop) - as->modset |= RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); + as->modset |= RSET_SCRATCH; +#if LJ_TARGET_X86 + if (irt_isnum(IR(ir->op2)->t)) { + if (as->evenspill < 4) /* Leave room to call pow(). */ + as->evenspill = 4; + } + break; #else ir->prev = REGSP_HINT(RID_FPRET); - if (inloop) - as->modset |= RSET_SCRATCH; -#endif continue; +#endif } /* fallthrough */ /* for integer POW */ case IR_DIV: case IR_MOD: - if (!irt_isnum(ir->t)) { + if ((LJ_64 && LJ_SOFTFP) || !irt_isnum(ir->t)) { ir->prev = REGSP_HINT(RID_RET); if (inloop) as->modset |= (RSET_SCRATCH & RSET_GPR); continue; } break; - case IR_FPMATH: -#if LJ_TARGET_X86ORX64 - if (ir->op2 == IRFPM_EXP2) { /* May be joined to lj_vm_pow_sse. */ - ir->prev = REGSP_HINT(RID_XMM0); -#if !LJ_64 - if (as->evenspill < 4) /* Leave room for 16 byte scratch area. */ - as->evenspill = 4; -#endif - if (inloop) - as->modset |= RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); - continue; - } else if (ir->op2 <= IRFPM_TRUNC && !(as->flags & JIT_F_SSE4_1)) { - ir->prev = REGSP_HINT(RID_XMM0); +#if LJ_64 && LJ_SOFTFP + case IR_ADD: case IR_SUB: case IR_MUL: + if (irt_isnum(ir->t)) { + ir->prev = REGSP_HINT(RID_RET); if (inloop) - as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); + as->modset |= (RSET_SCRATCH & RSET_GPR); continue; } break; -#else - ir->prev = REGSP_HINT(RID_FPRET); +#endif + case IR_FPMATH: +#if LJ_TARGET_X86ORX64 + if (ir->op2 <= IRFPM_TRUNC) { + if (!(as->flags & JIT_F_SSE4_1)) { + ir->prev = REGSP_HINT(RID_XMM0); + if (inloop) + as->modset |= RSET_RANGE(RID_XMM0, RID_XMM3+1)|RID2RSET(RID_EAX); + continue; + } + break; + } +#endif if (inloop) as->modset |= RSET_SCRATCH; +#if LJ_TARGET_X86 + break; +#else + ir->prev = REGSP_HINT(RID_FPRET); continue; #endif #if LJ_TARGET_X86ORX64 /* Non-constant shift counts need to be in RID_ECX on x86/x64. */ - case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: + case IR_BSHL: case IR_BSHR: case IR_BSAR: + if ((as->flags & JIT_F_BMI2)) /* Except if BMI2 is available. */ + break; + /* fallthrough */ + case IR_BROL: case IR_BROR: if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r)) { IR(ir->op2)->r = REGSP_HINT(RID_ECX); if (inloop) @@ -1828,16 +2410,26 @@ void lj_asm_trace(jit_State *J, GCtrace *T) { ASMState as_; ASMState *as = &as_; - MCode *origtop; + + /* Remove nops/renames left over from ASM restart due to LJ_TRERR_MCODELM. */ + { + IRRef nins = T->nins; + IRIns *ir = &T->ir[nins-1]; + if (ir->o == IR_NOP || ir->o == IR_RENAME) { + do { ir--; nins--; } while (ir->o == IR_NOP || ir->o == IR_RENAME); + T->nins = nins; + } + } /* Ensure an initialized instruction beyond the last one for HIOP checks. */ - J->cur.nins = lj_ir_nextins(J); - lj_ir_nop(&J->cur.ir[J->cur.nins]); + /* This also allows one RENAME to be added without reallocating curfinal. */ + as->orignins = lj_ir_nextins(J); + lj_ir_nop(&J->cur.ir[as->orignins]); /* Setup initial state. Copy some fields to reduce indirections. */ as->J = J; as->T = T; - as->ir = T->ir; + J->curfinal = lj_trace_alloc(J->L, T); /* This copies the IR, too. */ as->flags = J->flags; as->loopref = J->loopref; as->realign = NULL; @@ -1845,17 +2437,46 @@ void lj_asm_trace(jit_State *J, GCtrace *T) as->parent = J->parent ? traceref(J, J->parent) : NULL; /* Reserve MCode memory. */ - as->mctop = origtop = lj_mcode_reserve(J, &as->mcbot); + as->mctop = as->mctoporig = lj_mcode_reserve(J, &as->mcbot); as->mcp = as->mctop; as->mclim = as->mcbot + MCLIM_REDZONE; asm_setup_target(as); - do { + /* + ** This is a loop, because the MCode may have to be (re-)assembled + ** multiple times: + ** + ** 1. as->realign is set (and the assembly aborted), if the arch-specific + ** backend wants the MCode to be aligned differently. + ** + ** This is currently only the case on x86/x64, where small loops get + ** an aligned loop body plus a short branch. Not much effort is wasted, + ** because the abort happens very quickly and only once. + ** + ** 2. The IR is immovable, since the MCode embeds pointers to various + ** constants inside the IR. But RENAMEs may need to be added to the IR + ** during assembly, which might grow and reallocate the IR. We check + ** at the end if the IR (in J->cur.ir) has actually grown, resize the + ** copy (in J->curfinal.ir) and try again. + ** + ** 95% of all traces have zero RENAMEs, 3% have one RENAME, 1.5% have + ** 2 RENAMEs and only 0.5% have more than that. That's why we opt to + ** always have one spare slot in the IR (see above), which means we + ** have to redo the assembly for only ~2% of all traces. + ** + ** Very, very rarely, this needs to be done repeatedly, since the + ** location of constants inside the IR (actually, reachability from + ** a global pointer) may affect register allocation and thus the + ** number of RENAMEs. + */ + for (;;) { as->mcp = as->mctop; #ifdef LUA_USE_ASSERT as->mcp_prev = as->mcp; #endif - as->curins = T->nins; + as->ir = J->curfinal->ir; /* Use the copied IR. */ + as->curins = J->cur.nins = as->orignins; + RA_DBG_START(); RA_DBGX((as, "===== STOP =====")); @@ -1874,7 +2495,11 @@ void lj_asm_trace(jit_State *J, GCtrace *T) /* Assemble a trace in linear backwards order. */ for (as->curins--; as->curins > as->stopins; as->curins--) { IRIns *ir = IR(as->curins); - lua_assert(!(LJ_32 && irt_isint64(ir->t))); /* Handled by SPLIT. */ + /* 64 bit types handled by SPLIT for 32 bit archs. */ + lj_assertA(!(LJ_32 && irt_isint64(ir->t)), + "IR %04d has unsplit 64 bit type", + (int)(ir - as->ir) - REF_BIAS); + asm_snap_prev(as); if (!ra_used(ir) && !ir_sideeff(ir) && (as->flags & JIT_F_OPT_DCE)) continue; /* Dead-code elimination can be soooo easy. */ if (irt_isguard(ir->t)) @@ -1883,22 +2508,43 @@ void lj_asm_trace(jit_State *J, GCtrace *T) checkmclim(as); asm_ir(as, ir); } - } while (as->realign); /* Retry in case the MCode needs to be realigned. */ - /* Emit head of trace. */ - RA_DBG_REF(); - checkmclim(as); - if (as->gcsteps > 0) { - as->curins = as->T->snap[0].ref; - asm_snap_prep(as); /* The GC check is a guard. */ - asm_gc_check(as); + if (as->realign && J->curfinal->nins >= T->nins) + continue; /* Retry in case only the MCode needs to be realigned. */ + + /* Emit head of trace. */ + RA_DBG_REF(); + checkmclim(as); + if (as->gcsteps > 0) { + as->curins = as->T->snap[0].ref; + asm_snap_prep(as); /* The GC check is a guard. */ + asm_gc_check(as); + as->curins = as->stopins; + } + ra_evictk(as); + if (as->parent) + asm_head_side(as); + else + asm_head_root(as); + asm_phi_fixup(as); + + if (J->curfinal->nins >= T->nins) { /* IR didn't grow? */ + lj_assertA(J->curfinal->nk == T->nk, "unexpected IR constant growth"); + memcpy(J->curfinal->ir + as->orignins, T->ir + as->orignins, + (T->nins - as->orignins) * sizeof(IRIns)); /* Copy RENAMEs. */ + T->nins = J->curfinal->nins; + /* Fill mcofs of any unprocessed snapshots. */ + as->curins = REF_FIRST; + asm_snap_prev(as); + break; /* Done. */ + } + + /* Otherwise try again with a bigger IR. */ + lj_trace_free(J2G(J), J->curfinal); + J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */ + J->curfinal = lj_trace_alloc(J->L, T); + as->realign = NULL; } - ra_evictk(as); - if (as->parent) - asm_head_side(as); - else - asm_head_root(as); - asm_phi_fixup(as); RA_DBGX((as, "===== START ====")); RA_DBG_FLUSH(); @@ -1908,10 +2554,16 @@ void lj_asm_trace(jit_State *J, GCtrace *T) /* Set trace entry point before fixing up tail to allow link to self. */ T->mcode = as->mcp; T->mcloop = as->mcloop ? (MSize)((char *)as->mcloop - (char *)as->mcp) : 0; - if (!as->loopref) + if (as->loopref) + asm_loop_tail_fixup(as); + else asm_tail_fixup(as, T->link); /* Note: this may change as->mctop! */ T->szmcode = (MSize)((char *)as->mctop - (char *)as->mcp); - lj_mcode_sync(T->mcode, origtop); + asm_snap_fixup_mcofs(as); +#if LJ_TARGET_MCODE_FIXUP + asm_mcode_fixup(T->mcode, T->szmcode); +#endif + lj_mcode_sync(T->mcode, as->mctoporig); } #undef IR diff --git a/src/lj_asm_arm.h b/src/lj_asm_arm.h index 262fa59e..326330f4 100644 --- a/src/lj_asm_arm.h +++ b/src/lj_asm_arm.h @@ -41,7 +41,7 @@ static Reg ra_scratchpair(ASMState *as, RegSet allow) } } } - lua_assert(rset_test(RSET_GPREVEN, r)); + lj_assertA(rset_test(RSET_GPREVEN, r), "odd reg %d", r); ra_modified(as, r); ra_modified(as, r+1); RA_DBGX((as, "scratchpair $r $r", r, r+1)); @@ -185,6 +185,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, *ofsp = (ofs & 255); /* Mask out less bits to allow LDRD. */ return ra_allock(as, (ofs & ~255), allow); } + } else if (ir->o == IR_TMPREF) { + *ofsp = 0; + return RID_SP; } } *ofsp = 0; @@ -269,7 +272,7 @@ static void asm_fusexref(ASMState *as, ARMIns ai, Reg rd, IRRef ref, return; } } else if (ir->o == IR_STRREF && !(!LJ_SOFTFP && (ai & 0x08000000))) { - lua_assert(ofs == 0); + lj_assertA(ofs == 0, "bad usage"); ofs = (int32_t)sizeof(GCstr); if (irref_isk(ir->op2)) { ofs += IR(ir->op2)->i; @@ -338,7 +341,7 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, ARMIns ai, ARMIns air) /* Generate a call to a C function. */ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) { - uint32_t n, nargs = CCI_NARGS(ci); + uint32_t n, nargs = CCI_XNARGS(ci); int32_t ofs = 0; #if LJ_SOFTFP Reg gpr = REGARG_FIRSTGPR; @@ -389,9 +392,11 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1u; if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Must have been evicted. */ if (irt_isnum(ir->t)) { - lua_assert(rset_test(as->freeset, gpr+1)); /* Ditto. */ + lj_assertA(rset_test(as->freeset, gpr+1), + "reg %d not free", gpr+1); /* Ditto. */ emit_dnm(as, ARMI_VMOV_RR_D, gpr, gpr+1, (src & 15)); gpr += 2; } else { @@ -408,7 +413,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) #endif { if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */ + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Must have been evicted. */ if (ref) ra_leftov(as, gpr, ref); gpr++; } else { @@ -433,7 +439,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ ra_evictset(as, drop); /* Evictions must be performed first. */ if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); + lj_assertA(!irt_ispri(ir->t), "PRI dest"); if (!LJ_SOFTFP && irt_isfp(ir->t)) { if (LJ_ABI_SOFTFP || (ci->flags & (CCI_CASTU64|CCI_VARARG))) { Reg dest = (ra_dest(as, ir, RSET_FPR) & 15); @@ -453,15 +459,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) UNUSED(ci); } -static void asm_call(ASMState *as, IRIns *ir) -{ - IRRef args[CCI_NARGS_MAX]; - const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; - asm_collectargs(as, ir, ci, args); - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); -} - static void asm_callx(ASMState *as, IRIns *ir) { IRRef args[CCI_NARGS_MAX*2]; @@ -490,7 +487,7 @@ static void asm_retf(ASMState *as, IRIns *ir) { Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); void *pc = ir_kptr(IR(ir->op2)); - int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); + int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); as->topslot -= (BCReg)delta; if ((int32_t)as->topslot < 0) as->topslot = 0; irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ @@ -504,6 +501,30 @@ static void asm_retf(ASMState *as, IRIns *ir) emit_lso(as, ARMI_LDR, RID_TMP, base, -4); } +/* -- Buffer operations --------------------------------------------------- */ + +#if LJ_HASBUFFER +static void asm_bufhdr_write(ASMState *as, Reg sb) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); + IRIns irgc; + int32_t addr = i32ptr((void *)&J2G(as->J)->cur_L); + irgc.ot = IRT(0, IRT_PGC); /* GC type. */ + emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); + if ((as->flags & JIT_F_ARMV6T2)) { + emit_dnm(as, ARMI_BFI, RID_TMP, lj_fls(SBUF_MASK_FLAG), tmp); + } else { + emit_dnm(as, ARMI_ORR, RID_TMP, RID_TMP, tmp); + emit_dn(as, ARMI_AND|ARMI_K12|SBUF_MASK_FLAG, tmp, tmp); + } + emit_lso(as, ARMI_LDR, RID_TMP, + ra_allock(as, (addr & ~4095), + rset_exclude(rset_exclude(RSET_GPR, sb), tmp)), + (addr & 4095)); + emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); +} +#endif + /* -- Type conversions ---------------------------------------------------- */ #if !LJ_SOFTFP @@ -539,13 +560,17 @@ static void asm_conv(ASMState *as, IRIns *ir) #endif IRRef lref = ir->op1; /* 64 bit integer conversions are handled by SPLIT. */ - lua_assert(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64)); + lj_assertA(!irt_isint64(ir->t) && !(st == IRT_I64 || st == IRT_U64), + "IR %04d has unsplit 64 bit type", + (int)(ir - as->ir) - REF_BIAS); #if LJ_SOFTFP /* FP conversions are handled by SPLIT. */ - lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT)); + lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), + "IR %04d has FP type", + (int)(ir - as->ir) - REF_BIAS); /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ #else - lua_assert(irt_type(ir->t) != st); + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); if (irt_isfp(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); if (stfp) { /* FP to FP conversion. */ @@ -562,7 +587,8 @@ static void asm_conv(ASMState *as, IRIns *ir) } else if (stfp) { /* FP to integer conversion. */ if (irt_isguard(ir->t)) { /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); } else { Reg left = ra_alloc1(as, lref, RSET_FPR); @@ -581,7 +607,7 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg dest = ra_dest(as, ir, RSET_GPR); if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ Reg left = ra_alloc1(as, lref, RSET_GPR); - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); if ((as->flags & JIT_F_ARMV6)) { ARMIns ai = st == IRT_I8 ? ARMI_SXTB : st == IRT_U8 ? ARMI_UXTB : @@ -601,31 +627,6 @@ static void asm_conv(ASMState *as, IRIns *ir) } } -#if !LJ_SOFTFP && LJ_HASFFI -static void asm_conv64(ASMState *as, IRIns *ir) -{ - IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); - IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); - IRCallID id; - CCallInfo ci; - IRRef args[2]; - args[0] = (ir-1)->op1; - args[1] = ir->op1; - if (st == IRT_NUM || st == IRT_FLOAT) { - id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); - ir--; - } else { - id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); - } - ci = lj_ir_callinfo[id]; -#if !LJ_ABI_SOFTFP - ci.flags |= CCI_VARARG; /* These calls don't use the hard-float ABI! */ -#endif - asm_setupresult(as, ir, &ci); - asm_gencall(as, &ci, args); -} -#endif - static void asm_strto(ASMState *as, IRIns *ir) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; @@ -689,60 +690,61 @@ static void asm_strto(ASMState *as, IRIns *ir) emit_opk(as, ARMI_ADD, tmp, RID_SP, ofs, RSET_GPR); } +/* -- Memory references --------------------------------------------------- */ + /* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) +static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) { - IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - if (irref_isk(ref)) { - /* Use the number constant itself as a TValue. */ - ra_allockreg(as, i32ptr(ir_knum(ir)), dest); - } else { + if ((mode & IRTMPREF_IN1)) { + IRIns *ir = IR(ref); + if (irt_isnum(ir->t)) { + if ((mode & IRTMPREF_OUT1)) { #if LJ_SOFTFP - lua_assert(0); + lj_assertA(irref_isk(ref), "unsplit FP op"); + emit_dm(as, ARMI_MOV, dest, RID_SP); + emit_lso(as, ARMI_STR, + ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR), + RID_SP, 0); + emit_lso(as, ARMI_STR, + ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR), + RID_SP, 4); #else - /* Otherwise force a spill and use the spill slot. */ - emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_dm(as, ARMI_MOV, dest, RID_SP); + emit_vlso(as, ARMI_VSTR_D, src, RID_SP, 0); #endif + } else if (irref_isk(ref)) { + /* Use the number constant itself as a TValue. */ + ra_allockreg(as, i32ptr(ir_knum(ir)), dest); + } else { +#if LJ_SOFTFP + lj_assertA(0, "unsplit FP op"); +#else + /* Otherwise force a spill and use the spill slot. */ + emit_opk(as, ARMI_ADD, dest, RID_SP, ra_spill(as, ir), RSET_GPR); +#endif + } + } else { + /* Otherwise use [sp] and [sp+4] to hold the TValue. + ** This assumes the following call has max. 4 args. + */ + Reg type; + emit_dm(as, ARMI_MOV, dest, RID_SP); + if (!irt_ispri(ir->t)) { + Reg src = ra_alloc1(as, ref, RSET_GPR); + emit_lso(as, ARMI_STR, src, RID_SP, 0); + } + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)) + type = ra_alloc1(as, ref+1, RSET_GPR); + else + type = ra_allock(as, irt_toitype(ir->t), RSET_GPR); + emit_lso(as, ARMI_STR, type, RID_SP, 4); } } else { - /* Otherwise use [sp] and [sp+4] to hold the TValue. */ - RegSet allow = rset_exclude(RSET_GPR, dest); - Reg type; emit_dm(as, ARMI_MOV, dest, RID_SP); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - emit_lso(as, ARMI_STR, src, RID_SP, 0); - } - if ((ir+1)->o == IR_HIOP) - type = ra_alloc1(as, ref+1, allow); - else - type = ra_allock(as, irt_toitype(ir->t), allow); - emit_lso(as, ARMI_STR, type, RID_SP, 4); } } -static void asm_tostr(ASMState *as, IRIns *ir) -{ - IRRef args[2]; - args[0] = ASMREF_L; - as->gcsteps++; - if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; - args[1] = ASMREF_TMP1; /* const lua_Number * */ - asm_setupresult(as, ir, ci); /* GCstr * */ - asm_gencall(as, ci, args); - asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); - } else { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; - args[1] = ir->op1; /* int32_t k */ - asm_setupresult(as, ir, ci); /* GCstr * */ - asm_gencall(as, ci, args); - } -} - -/* -- Memory references --------------------------------------------------- */ - static void asm_aref(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -864,16 +866,16 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) *l_loop = ARMF_CC(ARMI_B, CC_NE) | ((as->mcp-l_loop-2) & 0x00ffffffu); /* Load main position relative to tab->node into dest. */ - khash = irref_isk(refkey) ? ir_khash(irkey) : 1; + khash = irref_isk(refkey) ? ir_khash(as, irkey) : 1; if (khash == 0) { emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); } else { emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 3), dest, dest, tmp); emit_dnm(as, ARMI_ADD|ARMF_SH(ARMSH_LSL, 1), tmp, tmp, tmp); - if (irt_isstr(kt)) { /* Fetch of str->hash is cheaper than ra_allock. */ + if (irt_isstr(kt)) { /* Fetch of str->sid is cheaper than ra_allock. */ emit_dnm(as, ARMI_AND, tmp, tmp+1, RID_TMP); emit_lso(as, ARMI_LDR, dest, tab, (int32_t)offsetof(GCtab, node)); - emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, hash)); + emit_lso(as, ARMI_LDR, tmp+1, key, (int32_t)offsetof(GCstr, sid)); emit_lso(as, ARMI_LDR, RID_TMP, tab, (int32_t)offsetof(GCtab, hmask)); } else if (irref_isk(refkey)) { emit_opk(as, ARMI_AND, tmp, RID_TMP, (int32_t)khash, @@ -920,7 +922,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) Reg node = ra_alloc1(as, ir->op1, RSET_GPR); Reg key = RID_NONE, type = RID_TMP, idx = node; RegSet allow = rset_exclude(RSET_GPR, node); - lua_assert(ofs % sizeof(Node) == 0); + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); if (ofs > 4095) { idx = dest; rset_clear(allow, dest); @@ -960,20 +962,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) emit_opk(as, ARMI_ADD, dest, node, ofs, RSET_GPR); } -static void asm_newref(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; - IRRef args[3]; - if (ir->r == RID_SINK) - return; - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ir->op1; /* GCtab *t */ - args[2] = ASMREF_TMP1; /* cTValue *key */ - asm_setupresult(as, ir, ci); /* TValue * */ - asm_gencall(as, ci, args); - asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); -} - static void asm_uref(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -1001,7 +989,7 @@ static void asm_uref(ASMState *as, IRIns *ir) static void asm_fref(ASMState *as, IRIns *ir) { UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); + lj_assertA(!ra_used(ir), "unfused FREF"); } static void asm_strref(ASMState *as, IRIns *ir) @@ -1038,25 +1026,27 @@ static void asm_strref(ASMState *as, IRIns *ir) /* -- Loads and stores ---------------------------------------------------- */ -static ARMIns asm_fxloadins(IRIns *ir) +static ARMIns asm_fxloadins(ASMState *as, IRIns *ir) { + UNUSED(as); switch (irt_type(ir->t)) { case IRT_I8: return ARMI_LDRSB; case IRT_U8: return ARMI_LDRB; case IRT_I16: return ARMI_LDRSH; case IRT_U16: return ARMI_LDRH; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VLDR_D; + case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VLDR_D; case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VLDR_S; /* fallthrough */ default: return ARMI_LDR; } } -static ARMIns asm_fxstoreins(IRIns *ir) +static ARMIns asm_fxstoreins(ASMState *as, IRIns *ir) { + UNUSED(as); switch (irt_type(ir->t)) { case IRT_I8: case IRT_U8: return ARMI_STRB; case IRT_I16: case IRT_U16: return ARMI_STRH; - case IRT_NUM: lua_assert(!LJ_SOFTFP); return ARMI_VSTR_D; + case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return ARMI_VSTR_D; case IRT_FLOAT: if (!LJ_SOFTFP) return ARMI_VSTR_S; /* fallthrough */ default: return ARMI_STR; } @@ -1065,17 +1055,23 @@ static ARMIns asm_fxstoreins(IRIns *ir) static void asm_fload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); - ARMIns ai = asm_fxloadins(ir); + ARMIns ai = asm_fxloadins(as, ir); + Reg idx; int32_t ofs; - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); - return; + if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ + idx = ra_allock(as, (int32_t)(ir->op2<<2) + (int32_t)J2GG(as->J), RSET_GPR); + ofs = 0; + } else { + idx = ra_alloc1(as, ir->op1, RSET_GPR); + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ + emit_dn(as, ARMI_ADD|ARMI_K12|ofs, dest, idx); + return; + } } + ofs = field_ofs[ir->op2]; } - ofs = field_ofs[ir->op2]; if ((ai & 0x04000000)) emit_lso(as, ai, dest, idx, ofs); else @@ -1089,7 +1085,7 @@ static void asm_fstore(ASMState *as, IRIns *ir) IRIns *irf = IR(ir->op1); Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); int32_t ofs = field_ofs[irf->op2]; - ARMIns ai = asm_fxstoreins(ir); + ARMIns ai = asm_fxstoreins(as, ir); if ((ai & 0x04000000)) emit_lso(as, ai, src, idx, ofs); else @@ -1101,20 +1097,22 @@ static void asm_xload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); + lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); + asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); } -static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) +static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) { if (ir->r != RID_SINK) { Reg src = ra_alloc1(as, ir->op2, (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, + asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, rset_exclude(RSET_GPR, src), ofs); } } +#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) + static void asm_ahuvload(ASMState *as, IRIns *ir) { int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); @@ -1127,13 +1125,15 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) rset_clear(allow, type); } if (ra_used(ir)) { - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); + lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t), + "bad load type %d", irt_type(ir->t)); dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); rset_clear(allow, dest); } idx = asm_fuseahuref(as, ir->op1, &ofs, allow, (!LJ_SOFTFP && t == IRT_NUM) ? 1024 : 4096); + if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; if (!hiop || type == RID_NONE) { rset_clear(allow, idx); if (ofs < 256 && ra_hasreg(dest) && (dest & 1) == 0 && @@ -1194,10 +1194,13 @@ static void asm_sload(ASMState *as, IRIns *ir) IRType t = hiop ? IRT_NUM : irt_type(ir->t); Reg dest = RID_NONE, type = RID_NONE, base; RegSet allow = RSET_GPR; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); #if LJ_SOFTFP - lua_assert(!(ir->op2 & IRSLOAD_CONVERT)); /* Handled by LJ_SOFTFP SPLIT. */ + lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), + "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ if (hiop && ra_used(ir+1)) { type = ra_dest(as, ir+1, allow); rset_clear(allow, type); @@ -1213,8 +1216,9 @@ static void asm_sload(ASMState *as, IRIns *ir) Reg tmp = RID_NONE; if ((ir->op2 & IRSLOAD_CONVERT)) tmp = ra_scratch(as, t == IRT_INT ? RSET_FPR : RSET_GPR); - lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || - irt_isint(ir->t) || irt_isaddr(ir->t)); + lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t), + "bad SLOAD type %d", irt_type(ir->t)); dest = ra_dest(as, ir, (!LJ_SOFTFP && t == IRT_NUM) ? RSET_FPR : allow); rset_clear(allow, dest); base = ra_alloc1(as, REF_BASE, allow); @@ -1246,7 +1250,12 @@ dotypecheck: } } asm_guardcc(as, t == IRT_NUM ? CC_HS : CC_NE); - emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type); + if ((ir->op2 & IRSLOAD_KEYINDEX)) { + emit_n(as, ARMI_CMN|ARMI_K12|1, type); + emit_dn(as, ARMI_EOR^emit_isk12(ARMI_EOR, ~LJ_KEYINDEX), type, type); + } else { + emit_n(as, ARMI_CMN|ARMI_K12|-irt_toitype_(t), type); + } } if (ra_hasreg(dest)) { #if !LJ_SOFTFP @@ -1272,19 +1281,17 @@ dotypecheck: static void asm_cnew(ASMState *as, IRIns *ir) { CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; - CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? - lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; + CTypeID id = (CTypeID)IR(ir->op1)->i; + CTSize sz; + CTInfo info = lj_ctype_info(cts, id, &sz); const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[2]; + IRRef args[4]; RegSet allow = (RSET_GPR & ~RSET_SCRATCH); RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID); + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ as->gcsteps++; - if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); /* Dest reg handled below. */ ra_evictset(as, drop); @@ -1294,10 +1301,10 @@ static void asm_cnew(ASMState *as, IRIns *ir) /* Initialize immutable cdata object. */ if (ir->o == IR_CNEWI) { int32_t ofs = sizeof(GCcdata); - lua_assert(sz == 4 || sz == 8); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); if (sz == 8) { ofs += 4; ir++; - lua_assert(ir->o == IR_HIOP); + lj_assertA(ir->o == IR_HIOP, "expected HIOP for CNEWI"); } for (;;) { Reg r = ra_alloc1(as, ir->op2, allow); @@ -1306,22 +1313,32 @@ static void asm_cnew(ASMState *as, IRIns *ir) if (ofs == sizeof(GCcdata)) break; ofs -= 4; ir--; } + } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ + ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* CTypeID id */ + args[2] = ir->op2; /* CTSize sz */ + args[3] = ASMREF_TMP1; /* CTSize align */ + asm_gencall(as, ci, args); + emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); + return; } + /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ { - uint32_t k = emit_isk12(ARMI_MOV, ctypeid); - Reg r = k ? RID_R1 : ra_allock(as, ctypeid, allow); + uint32_t k = emit_isk12(ARMI_MOV, id); + Reg r = k ? RID_R1 : ra_allock(as, id, allow); emit_lso(as, ARMI_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); emit_lsox(as, ARMI_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); emit_d(as, ARMI_MOV|ARMI_K12|~LJ_TCDATA, RID_TMP); if (k) emit_d(as, ARMI_MOV^k, RID_R1); } + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ASMREF_TMP1; /* MSize size */ asm_gencall(as, ci, args); ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ra_releasetmp(as, ASMREF_TMP1)); } -#else -#define asm_cnew(as, ir) ((void)0) #endif /* -- Write barriers ------------------------------------------------------ */ @@ -1353,7 +1370,7 @@ static void asm_obar(ASMState *as, IRIns *ir) MCLabel l_end; Reg obj, val, tmp; /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); ra_evictset(as, RSET_SCRATCH); l_end = emit_label(as); args[0] = ASMREF_TMP1; /* global_State *g */ @@ -1392,23 +1409,36 @@ static void asm_fpunary(ASMState *as, IRIns *ir, ARMIns ai) emit_dm(as, ai, (dest & 15), (left & 15)); } -static int asm_fpjoin_pow(ASMState *as, IRIns *ir) -{ - IRIns *irp = IR(ir->op1); - if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { - IRIns *irpp = IR(irp->op1); - if (irpp == ir-2 && irpp->o == IR_FPMATH && - irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; - IRRef args[2]; - args[0] = irpp->op1; - args[1] = irp->op2; - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); - return 1; - } - } - return 0; +static void asm_callround(ASMState *as, IRIns *ir, int id) +{ + /* The modified regs must match with the *.dasc implementation. */ + RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)| + RID2RSET(RID_R3)|RID2RSET(RID_R12); + RegSet of; + Reg dest, src; + ra_evictset(as, drop); + dest = ra_dest(as, ir, RSET_FPR); + emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15)); + emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf : + id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf : + (void *)lj_vm_trunc_sf); + /* Workaround to protect argument GPRs from being used for remat. */ + of = as->freeset; + as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1); + as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L); + src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */ + as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1)); + emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15)); +} + +static void asm_fpmath(ASMState *as, IRIns *ir) +{ + if (ir->op2 <= IRFPM_TRUNC) + asm_callround(as, ir, ir->op2); + else if (ir->op2 == IRFPM_SQRT) + asm_fpunary(as, ir, ARMI_VSQRT_D); + else + asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); } #endif @@ -1474,19 +1504,6 @@ static void asm_intop_s(ASMState *as, IRIns *ir, ARMIns ai) asm_intop(as, ir, asm_drop_cmp0(as, ai)); } -static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) -{ - ai = asm_drop_cmp0(as, ai); - if (ir->op2 == 0) { - Reg dest = ra_dest(as, ir, RSET_GPR); - uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); - emit_d(as, ai^m, dest); - } else { - /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */ - asm_intop(as, ir, ai); - } -} - static void asm_intneg(ASMState *as, IRIns *ir, ARMIns ai) { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -1552,6 +1569,15 @@ static void asm_mul(ASMState *as, IRIns *ir) asm_intmul(as, ir); } +#define asm_addov(as, ir) asm_add(as, ir) +#define asm_subov(as, ir) asm_sub(as, ir) +#define asm_mulov(as, ir) asm_mul(as, ir) + +#if !LJ_SOFTFP +#define asm_fpdiv(as, ir) asm_fparith(as, ir, ARMI_VDIV_D) +#define asm_abs(as, ir) asm_fpunary(as, ir, ARMI_VABS_D) +#endif + static void asm_neg(ASMState *as, IRIns *ir) { #if !LJ_SOFTFP @@ -1563,41 +1589,22 @@ static void asm_neg(ASMState *as, IRIns *ir) asm_intneg(as, ir, ARMI_RSB); } -static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) +static void asm_bitop(ASMState *as, IRIns *ir, ARMIns ai) { - const CCallInfo *ci = &lj_ir_callinfo[id]; - IRRef args[2]; - args[0] = ir->op1; - args[1] = ir->op2; - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); + ai = asm_drop_cmp0(as, ai); + if (ir->op2 == 0) { + Reg dest = ra_dest(as, ir, RSET_GPR); + uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); + emit_d(as, ai^m, dest); + } else { + /* NYI: Turn BAND !k12 into uxtb, uxth or bfc or shl+shr. */ + asm_intop(as, ir, ai); + } } -#if !LJ_SOFTFP -static void asm_callround(ASMState *as, IRIns *ir, int id) -{ - /* The modified regs must match with the *.dasc implementation. */ - RegSet drop = RID2RSET(RID_R0)|RID2RSET(RID_R1)|RID2RSET(RID_R2)| - RID2RSET(RID_R3)|RID2RSET(RID_R12); - RegSet of; - Reg dest, src; - ra_evictset(as, drop); - dest = ra_dest(as, ir, RSET_FPR); - emit_dnm(as, ARMI_VMOV_D_RR, RID_RETLO, RID_RETHI, (dest & 15)); - emit_call(as, id == IRFPM_FLOOR ? (void *)lj_vm_floor_sf : - id == IRFPM_CEIL ? (void *)lj_vm_ceil_sf : - (void *)lj_vm_trunc_sf); - /* Workaround to protect argument GPRs from being used for remat. */ - of = as->freeset; - as->freeset &= ~RSET_RANGE(RID_R0, RID_R1+1); - as->cost[RID_R0] = as->cost[RID_R1] = REGCOST(~0u, ASMREF_L); - src = ra_alloc1(as, ir->op1, RSET_FPR); /* May alloc GPR to remat FPR. */ - as->freeset |= (of & RSET_RANGE(RID_R0, RID_R1+1)); - emit_dnm(as, ARMI_VMOV_RR_D, RID_R0, RID_R1, (src & 15)); -} -#endif +#define asm_bnot(as, ir) asm_bitop(as, ir, ARMI_MVN) -static void asm_bitswap(ASMState *as, IRIns *ir) +static void asm_bswap(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); Reg left = ra_alloc1(as, ir->op1, RSET_GPR); @@ -1614,6 +1621,10 @@ static void asm_bitswap(ASMState *as, IRIns *ir) } } +#define asm_band(as, ir) asm_bitop(as, ir, ARMI_AND) +#define asm_bor(as, ir) asm_bitop(as, ir, ARMI_ORR) +#define asm_bxor(as, ir) asm_bitop(as, ir, ARMI_EOR) + static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) { if (irref_isk(ir->op2)) { /* Constant shifts. */ @@ -1631,6 +1642,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, ARMShift sh) } } +#define asm_bshl(as, ir) asm_bitshift(as, ir, ARMSH_LSL) +#define asm_bshr(as, ir) asm_bitshift(as, ir, ARMSH_LSR) +#define asm_bsar(as, ir) asm_bitshift(as, ir, ARMSH_ASR) +#define asm_bror(as, ir) asm_bitshift(as, ir, ARMSH_ROR) +#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL") + static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) { uint32_t kcmp = 0, kmov = 0; @@ -1704,6 +1721,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc, int fcc) asm_intmin_max(as, ir, cc); } +#define asm_min(as, ir) asm_min_max(as, ir, CC_GT, CC_PL) +#define asm_max(as, ir) asm_min_max(as, ir, CC_LT, CC_LE) + /* -- Comparisons --------------------------------------------------------- */ /* Map of comparisons to flags. ORDER IR. */ @@ -1777,7 +1797,8 @@ static void asm_intcomp(ASMState *as, IRIns *ir) Reg left; uint32_t m; int cmpprev0 = 0; - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t), + "bad comparison data type %d", irt_type(ir->t)); if (asm_swapops(as, lref, rref)) { Reg tmp = lref; lref = rref; rref = tmp; if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ @@ -1819,6 +1840,18 @@ notst: as->flagmcp = as->mcp; /* Allow elimination of the compare. */ } +static void asm_comp(ASMState *as, IRIns *ir) +{ +#if !LJ_SOFTFP + if (irt_isnum(ir->t)) + asm_fpcomp(as, ir); + else +#endif + asm_intcomp(as, ir); +} + +#define asm_equal(as, ir) asm_comp(as, ir) + #if LJ_HASFFI /* 64 bit integer comparisons. */ static void asm_int64comp(ASMState *as, IRIns *ir) @@ -1857,15 +1890,15 @@ static void asm_int64comp(ASMState *as, IRIns *ir) } #endif -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ +/* -- Split register ops -------------------------------------------------- */ -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ +/* Hiword op of a split 32/32 bit op. Previous op is the loword op. */ static void asm_hiop(ASMState *as, IRIns *ir) { -#if LJ_HASFFI || LJ_SOFTFP /* HIOP is marked as a store because it needs its own DCE logic. */ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; +#if LJ_HASFFI || LJ_SOFTFP if ((ir-1)->o <= IR_NE) { /* 64 bit integer or FP comparisons. ORDER IR. */ as->curins--; /* Always skip the loword comparison. */ #if LJ_SOFTFP @@ -1882,7 +1915,7 @@ static void asm_hiop(ASMState *as, IRIns *ir) } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { as->curins--; /* Always skip the loword min/max. */ if (uselo || usehi) - asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_HI : CC_LO); + asm_sfpmin_max(as, ir-1, (ir-1)->o == IR_MIN ? CC_PL : CC_LE); return; #elif LJ_HASFFI } else if ((ir-1)->o == IR_CONV) { @@ -1893,9 +1926,10 @@ static void asm_hiop(ASMState *as, IRIns *ir) #endif } else if ((ir-1)->o == IR_XSTORE) { if ((ir-1)->r != RID_SINK) - asm_xstore(as, ir, 4); + asm_xstore_(as, ir, 4); return; } +#endif if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ switch ((ir-1)->o) { #if LJ_HASFFI @@ -1914,6 +1948,9 @@ static void asm_hiop(ASMState *as, IRIns *ir) asm_intneg(as, ir, ARMI_RSC); asm_intneg(as, ir-1, ARMI_RSB|ARMI_S); break; + case IR_CNEWI: + /* Nothing to do here. Handled by lo op itself. */ + break; #endif #if LJ_SOFTFP case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: @@ -1921,24 +1958,26 @@ static void asm_hiop(ASMState *as, IRIns *ir) if (!uselo) ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ break; + case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF: + /* Nothing to do here. Handled by lo op itself. */ + break; #endif - case IR_CALLN: - case IR_CALLS: - case IR_CALLXS: + case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: if (!uselo) ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ break; -#if LJ_SOFTFP - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: -#endif - case IR_CNEWI: - /* Nothing to do here. Handled by lo op itself. */ - break; - default: lua_assert(0); break; + default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; } -#else - UNUSED(as); UNUSED(ir); lua_assert(0); -#endif +} + +/* -- Profiling ----------------------------------------------------------- */ + +static void asm_prof(ASMState *as, IRIns *ir) +{ + UNUSED(ir); + asm_guardcc(as, CC_NE); + emit_n(as, ARMI_TST|ARMI_K12|HOOK_PROFILE, RID_TMP); + emit_lsptr(as, ARMI_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); } /* -- Stack handling ------------------------------------------------------ */ @@ -1952,7 +1991,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, if (irp) { if (!ra_hasspill(irp->s)) { pbase = irp->r; - lua_assert(ra_hasreg(pbase)); + lj_assertA(ra_hasreg(pbase), "base reg lost"); } else if (allow) { pbase = rset_pickbot(allow); } else { @@ -1964,13 +2003,13 @@ static void asm_stack_check(ASMState *as, BCReg topslot, } emit_branch(as, ARMF_CC(ARMI_BL, CC_LS), exitstub_addr(as->J, exitno)); k = emit_isk12(0, (int32_t)(8*topslot)); - lua_assert(k); + lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); emit_n(as, ARMI_CMP^k, RID_TMP); emit_dnm(as, ARMI_SUB, RID_TMP, RID_TMP, pbase); emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (int32_t)offsetof(lua_State, maxstack)); if (irp) { /* Must not spill arbitrary registers in head of side trace. */ - int32_t i = i32ptr(&J2G(as->J)->jit_L); + int32_t i = i32ptr(&J2G(as->J)->cur_L); if (ra_hasspill(irp->s)) emit_lso(as, ARMI_LDR, pbase, RID_SP, sps_scale(irp->s)); emit_lso(as, ARMI_LDR, RID_TMP, RID_TMP, (i & 4095)); @@ -1978,7 +2017,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, emit_lso(as, ARMI_STR, RID_RET, RID_SP, 0); /* Save temp. register. */ emit_loadi(as, RID_TMP, (i & ~4095)); } else { - emit_getgl(as, RID_TMP, jit_L); + emit_getgl(as, RID_TMP, cur_L); } } @@ -2001,7 +2040,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) #if LJ_SOFTFP RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); Reg tmp; - lua_assert(irref_isk(ref)); /* LJ_SOFTFP: must be a number constant. */ + /* LJ_SOFTFP: must be a number constant. */ + lj_assertA(irref_isk(ref), "unsplit FP op"); tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, rset_exclude(RSET_GPREVEN, RID_BASE)); emit_lso(as, ARMI_STR, tmp, RID_BASE, ofs); @@ -2015,7 +2055,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } else { RegSet odd = rset_exclude(RSET_GPRODD, RID_BASE); Reg type; - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "restore of IR type %d", irt_type(ir->t)); if (!irt_ispri(ir->t)) { Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPREVEN, RID_BASE)); emit_lso(as, ARMI_STR, src, RID_BASE, ofs); @@ -2028,6 +2069,8 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } else if ((sn & SNAP_SOFTFPNUM)) { type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPRODD, RID_BASE)); #endif + } else if ((sn & SNAP_KEYINDEX)) { + type = ra_allock(as, (int32_t)LJ_KEYINDEX, odd); } else { type = ra_allock(as, (int32_t)irt_toitype(ir->t), odd); } @@ -2035,7 +2078,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } checkmclim(as); } - lua_assert(map + nent == flinks); + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); } /* -- GC handling --------------------------------------------------------- */ @@ -2089,15 +2132,21 @@ static void asm_loop_fixup(ASMState *as) } } +/* Fixup the tail of the loop. */ +static void asm_loop_tail_fixup(ASMState *as) +{ + UNUSED(as); /* Nothing to do. */ +} + /* -- Head of trace ------------------------------------------------------- */ -/* Reload L register from g->jit_L. */ +/* Reload L register from g->cur_L. */ static void asm_head_lreg(ASMState *as) { IRIns *ir = IR(ASMREF_L); if (ra_used(ir)) { Reg r = ra_dest(as, ir, RSET_GPR); - emit_getgl(as, r, jit_L); + emit_getgl(as, r, cur_L); ra_evictk(as); } } @@ -2125,7 +2174,7 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) rset_clear(allow, ra_dest(as, ir, allow)); } else { Reg r = irp->r; - lua_assert(ra_hasreg(r)); + lj_assertA(ra_hasreg(r), "base reg lost"); rset_clear(allow, r); if (r != ir->r && !rset_test(as->freeset, r)) ra_restore(as, regcost_ref(as->cost[r])); @@ -2147,7 +2196,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) } else { /* Patch stack adjustment. */ uint32_t k = emit_isk12(ARMI_ADD, spadj); - lua_assert(k); + lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); p[-2] = (ARMI_ADD^k) | ARMF_D(RID_SP) | ARMF_N(RID_SP); } /* Patch exit branch. */ @@ -2168,143 +2217,13 @@ static void asm_tail_prep(ASMState *as) *p = 0; /* Prevent load/store merging. */ } -/* -- Instruction dispatch ------------------------------------------------ */ - -/* Assemble a single instruction. */ -static void asm_ir(ASMState *as, IRIns *ir) -{ - switch ((IROp)ir->o) { - /* Miscellaneous ops. */ - case IR_LOOP: asm_loop(as); break; - case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; - case IR_USE: - ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; - case IR_PHI: asm_phi(as, ir); break; - case IR_HIOP: asm_hiop(as, ir); break; - case IR_GCSTEP: asm_gcstep(as, ir); break; - - /* Guarded assertions. */ - case IR_EQ: case IR_NE: - if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { - as->curins--; - asm_href(as, ir-1, (IROp)ir->o); - break; - } - /* fallthrough */ - case IR_LT: case IR_GE: case IR_LE: case IR_GT: - case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: - case IR_ABC: -#if !LJ_SOFTFP - if (irt_isnum(ir->t)) { asm_fpcomp(as, ir); break; } -#endif - asm_intcomp(as, ir); - break; - - case IR_RETF: asm_retf(as, ir); break; - - /* Bit ops. */ - case IR_BNOT: asm_bitop(as, ir, ARMI_MVN); break; - case IR_BSWAP: asm_bitswap(as, ir); break; - - case IR_BAND: asm_bitop(as, ir, ARMI_AND); break; - case IR_BOR: asm_bitop(as, ir, ARMI_ORR); break; - case IR_BXOR: asm_bitop(as, ir, ARMI_EOR); break; - - case IR_BSHL: asm_bitshift(as, ir, ARMSH_LSL); break; - case IR_BSHR: asm_bitshift(as, ir, ARMSH_LSR); break; - case IR_BSAR: asm_bitshift(as, ir, ARMSH_ASR); break; - case IR_BROR: asm_bitshift(as, ir, ARMSH_ROR); break; - case IR_BROL: lua_assert(0); break; - - /* Arithmetic ops. */ - case IR_ADD: case IR_ADDOV: asm_add(as, ir); break; - case IR_SUB: case IR_SUBOV: asm_sub(as, ir); break; - case IR_MUL: case IR_MULOV: asm_mul(as, ir); break; - case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break; - case IR_NEG: asm_neg(as, ir); break; - -#if LJ_SOFTFP - case IR_DIV: case IR_POW: case IR_ABS: - case IR_ATAN2: case IR_LDEXP: case IR_FPMATH: case IR_TOBIT: - lua_assert(0); /* Unused for LJ_SOFTFP. */ - break; -#else - case IR_DIV: asm_fparith(as, ir, ARMI_VDIV_D); break; - case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break; - case IR_ABS: asm_fpunary(as, ir, ARMI_VABS_D); break; - case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break; - case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break; - case IR_FPMATH: - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - break; - if (ir->op2 <= IRFPM_TRUNC) - asm_callround(as, ir, ir->op2); - else if (ir->op2 == IRFPM_SQRT) - asm_fpunary(as, ir, ARMI_VSQRT_D); - else - asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); - break; - case IR_TOBIT: asm_tobit(as, ir); break; -#endif - - case IR_MIN: asm_min_max(as, ir, CC_GT, CC_HI); break; - case IR_MAX: asm_min_max(as, ir, CC_LT, CC_LO); break; - - /* Memory references. */ - case IR_AREF: asm_aref(as, ir); break; - case IR_HREF: asm_href(as, ir, 0); break; - case IR_HREFK: asm_hrefk(as, ir); break; - case IR_NEWREF: asm_newref(as, ir); break; - case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; - case IR_FREF: asm_fref(as, ir); break; - case IR_STRREF: asm_strref(as, ir); break; - - /* Loads and stores. */ - case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - asm_ahuvload(as, ir); - break; - case IR_FLOAD: asm_fload(as, ir); break; - case IR_XLOAD: asm_xload(as, ir); break; - case IR_SLOAD: asm_sload(as, ir); break; - - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; - case IR_FSTORE: asm_fstore(as, ir); break; - case IR_XSTORE: asm_xstore(as, ir, 0); break; - - /* Allocations. */ - case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; - case IR_TNEW: asm_tnew(as, ir); break; - case IR_TDUP: asm_tdup(as, ir); break; - case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; - - /* Write barriers. */ - case IR_TBAR: asm_tbar(as, ir); break; - case IR_OBAR: asm_obar(as, ir); break; - - /* Type conversions. */ - case IR_CONV: asm_conv(as, ir); break; - case IR_TOSTR: asm_tostr(as, ir); break; - case IR_STRTO: asm_strto(as, ir); break; - - /* Calls. */ - case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; - case IR_CALLXS: asm_callx(as, ir); break; - case IR_CARG: break; - - default: - setintV(&as->J->errinfo, ir->o); - lj_trace_err_info(as->J, LJ_TRERR_NYIIR); - break; - } -} - /* -- Trace setup --------------------------------------------------------- */ /* Ensure there are enough stack slots for call arguments. */ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) { IRRef args[CCI_NARGS_MAX*2]; - uint32_t i, nargs = (int)CCI_NARGS(ci); + uint32_t i, nargs = CCI_XNARGS(ci); int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR, fprodd = 0; asm_collectargs(as, ir, ci, args); for (i = 0; i < nargs; i++) { @@ -2360,7 +2279,7 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) if (!cstart) cstart = p; } } - lua_assert(cstart != NULL); + lj_assertJ(cstart != NULL, "exit stub %d not found", exitno); lj_mcode_sync(cstart, cend); lj_mcode_patch(J, mcarea, 1); } diff --git a/src/lj_asm_arm64.h b/src/lj_asm_arm64.h new file mode 100644 index 00000000..4b7066f2 --- /dev/null +++ b/src/lj_asm_arm64.h @@ -0,0 +1,2070 @@ +/* +** ARM64 IR assembler (SSA IR -> machine code). +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +** +** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. +** Sponsored by Cisco Systems, Inc. +*/ + +/* -- Register allocator extensions --------------------------------------- */ + +/* Allocate a register with a hint. */ +static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) +{ + Reg r = IR(ref)->r; + if (ra_noreg(r)) { + if (!ra_hashint(r) && !iscrossref(as, ref)) + ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ + r = ra_allocref(as, ref, allow); + } + ra_noweak(as, r); + return r; +} + +/* Allocate two source registers for three-operand instructions. */ +static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) +{ + IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); + Reg left = irl->r, right = irr->r; + if (ra_hasreg(left)) { + ra_noweak(as, left); + if (ra_noreg(right)) + right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); + else + ra_noweak(as, right); + } else if (ra_hasreg(right)) { + ra_noweak(as, right); + left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); + } else if (ra_hashint(right)) { + right = ra_allocref(as, ir->op2, allow); + left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); + } else { + left = ra_allocref(as, ir->op1, allow); + right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); + } + return left | (right << 8); +} + +/* -- Guard handling ------------------------------------------------------ */ + +/* Setup all needed exit stubs. */ +static void asm_exitstub_setup(ASMState *as, ExitNo nexits) +{ + ExitNo i; + MCode *mxp = as->mctop; + if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) + asm_mclimit(as); + /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */ + for (i = nexits-1; (int32_t)i >= 0; i--) + *--mxp = A64I_LE(A64I_BL | A64F_S26(-3-i)); + *--mxp = A64I_LE(A64I_MOVZw | A64F_U16(as->T->traceno)); + mxp--; + *mxp = A64I_LE(A64I_BL | A64F_S26(((MCode *)(void *)lj_vm_exit_handler-mxp))); + *--mxp = A64I_LE(A64I_STRx | A64F_D(RID_LR) | A64F_N(RID_SP)); + as->mctop = mxp; +} + +static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) +{ + /* Keep this in-sync with exitstub_trace_addr(). */ + return as->mctop + exitno + 3; +} + +/* Emit conditional branch to exit for guard. */ +static void asm_guardcc(ASMState *as, A64CC cc) +{ + MCode *target = asm_exitstub_addr(as, as->snapno); + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->loopinv = 1; + *p = A64I_B | A64F_S26(target-p); + emit_cond_branch(as, cc^1, p-1); + return; + } + emit_cond_branch(as, cc, target); +} + +/* Emit test and branch instruction to exit for guard. */ +static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit) +{ + MCode *target = asm_exitstub_addr(as, as->snapno); + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->loopinv = 1; + *p = A64I_B | A64F_S26(target-p); + emit_tnb(as, ai^0x01000000u, r, bit, p-1); + return; + } + emit_tnb(as, ai, r, bit, target); +} + +/* Emit compare and branch instruction to exit for guard. */ +static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r) +{ + MCode *target = asm_exitstub_addr(as, as->snapno); + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->loopinv = 1; + *p = A64I_B | A64F_S26(target-p); + emit_cnb(as, ai^0x01000000u, r, p-1); + return; + } + emit_cnb(as, ai, r, target); +} + +/* -- Operand fusion ------------------------------------------------------ */ + +/* Limit linear search to this distance. Avoids O(n^2) behavior. */ +#define CONFLICT_SEARCH_LIM 31 + +static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) +{ + if (irref_isk(ref)) { + IRIns *ir = IR(ref); + if (ir->o == IR_KNULL || !irt_is64(ir->t)) { + *k = ir->i; + return 1; + } else if (checki32((int64_t)ir_k64(ir)->u64)) { + *k = (int32_t)ir_k64(ir)->u64; + return 1; + } + } + return 0; +} + +/* Check if there's no conflicting instruction between curins and ref. */ +static int noconflict(ASMState *as, IRRef ref, IROp conflict) +{ + IRIns *ir = as->ir; + IRRef i = as->curins; + if (i > ref + CONFLICT_SEARCH_LIM) + return 0; /* Give up, ref is too far away. */ + while (--i > ref) + if (ir[i].o == conflict) + return 0; /* Conflict found. */ + return 1; /* Ok, no conflict. */ +} + +/* Fuse the array base of colocated arrays. */ +static int32_t asm_fuseabase(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && + !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) + return (int32_t)sizeof(GCtab); + return 0; +} + +#define FUSE_REG 0x40000000 + +/* Fuse array/hash/upvalue reference into register+offset operand. */ +static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow, + A64Ins ins) +{ + IRIns *ir = IR(ref); + if (ra_noreg(ir->r)) { + if (ir->o == IR_AREF) { + if (mayfuse(as, ref)) { + if (irref_isk(ir->op2)) { + IRRef tab = IR(ir->op1)->op1; + int32_t ofs = asm_fuseabase(as, tab); + IRRef refa = ofs ? tab : ir->op1; + ofs += 8*IR(ir->op2)->i; + if (emit_checkofs(ins, ofs)) { + *ofsp = ofs; + return ra_alloc1(as, refa, allow); + } + } else { + Reg base = ra_alloc1(as, ir->op1, allow); + *ofsp = FUSE_REG|ra_alloc1(as, ir->op2, rset_exclude(allow, base)); + return base; + } + } + } else if (ir->o == IR_HREFK) { + if (mayfuse(as, ref)) { + int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); + if (emit_checkofs(ins, ofs)) { + *ofsp = ofs; + return ra_alloc1(as, ir->op1, allow); + } + } + } else if (ir->o == IR_UREFC) { + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; + int64_t ofs = glofs(as, &uv->tv); + if (emit_checkofs(ins, ofs)) { + *ofsp = (int32_t)ofs; + return RID_GL; + } + } + } else if (ir->o == IR_TMPREF) { + *ofsp = (int32_t)glofs(as, &J2G(as->J)->tmptv); + return RID_GL; + } + } + *ofsp = 0; + return ra_alloc1(as, ref, allow); +} + +/* Fuse m operand into arithmetic/logic instructions. */ +static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow) +{ + IRIns *ir = IR(ref); + if (ra_hasreg(ir->r)) { + ra_noweak(as, ir->r); + return A64F_M(ir->r); + } else if (irref_isk(ref)) { + uint32_t m; + int64_t k = get_k64val(as, ref); + if ((ai & 0x1f000000) == 0x0a000000) + m = emit_isk13(k, irt_is64(ir->t)); + else + m = emit_isk12(k); + if (m) + return m; + } else if (mayfuse(as, ref)) { + if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR && irref_isk(ir->op2)) || + (ir->o == IR_ADD && ir->op1 == ir->op2)) { + A64Shift sh = ir->o == IR_BSHR ? A64SH_LSR : + ir->o == IR_BSAR ? A64SH_ASR : A64SH_LSL; + int shift = ir->o == IR_ADD ? 1 : + (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31)); + IRIns *irl = IR(ir->op1); + if (sh == A64SH_LSL && + irl->o == IR_CONV && + irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) && + shift <= 4 && + canfuse(as, irl)) { + Reg m = ra_alloc1(as, irl->op1, allow); + return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift); + } else { + Reg m = ra_alloc1(as, ir->op1, allow); + return A64F_M(m) | A64F_SH(sh, shift); + } + } else if (ir->o == IR_CONV && + ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) { + Reg m = ra_alloc1(as, ir->op1, allow); + return A64F_M(m) | A64F_EX(A64EX_SXTW); + } + } + return A64F_M(ra_allocref(as, ref, allow)); +} + +/* Fuse XLOAD/XSTORE reference into load/store operand. */ +static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref, + RegSet allow) +{ + IRIns *ir = IR(ref); + Reg base; + int32_t ofs = 0; + if (ra_noreg(ir->r) && canfuse(as, ir)) { + if (ir->o == IR_ADD) { + if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) { + ref = ir->op1; + } else { + Reg rn, rm; + IRRef lref = ir->op1, rref = ir->op2; + IRIns *irl = IR(lref); + if (mayfuse(as, irl->op1)) { + unsigned int shift = 4; + if (irl->o == IR_BSHL && irref_isk(irl->op2)) { + shift = (IR(irl->op2)->i & 63); + } else if (irl->o == IR_ADD && irl->op1 == irl->op2) { + shift = 1; + } + if ((ai >> 30) == shift) { + lref = irl->op1; + irl = IR(lref); + ai |= A64I_LS_SH; + } + } + if (irl->o == IR_CONV && + irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) && + canfuse(as, irl)) { + lref = irl->op1; + ai |= A64I_LS_SXTWx; + } else { + ai |= A64I_LS_LSLx; + } + rm = ra_alloc1(as, lref, allow); + rn = ra_alloc1(as, rref, rset_exclude(allow, rm)); + emit_dnm(as, (ai^A64I_LS_R), (rd & 31), rn, rm); + return; + } + } else if (ir->o == IR_STRREF) { + if (asm_isk32(as, ir->op2, &ofs)) { + ref = ir->op1; + } else if (asm_isk32(as, ir->op1, &ofs)) { + ref = ir->op2; + } else { + Reg refk = irref_isk(ir->op1) ? ir->op1 : ir->op2; + Reg refv = irref_isk(ir->op1) ? ir->op2 : ir->op1; + Reg rn = ra_alloc1(as, refv, allow); + IRIns *irr = IR(refk); + uint32_t m; + if (irr+1 == ir && !ra_used(irr) && + irr->o == IR_ADD && irref_isk(irr->op2)) { + ofs = sizeof(GCstr) + IR(irr->op2)->i; + if (emit_checkofs(ai, ofs)) { + Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn)); + m = A64F_M(rm) | A64F_EX(A64EX_SXTW); + goto skipopm; + } + } + m = asm_fuseopm(as, 0, refk, rset_exclude(allow, rn)); + ofs = sizeof(GCstr); + skipopm: + emit_lso(as, ai, rd, rd, ofs); + emit_dn(as, A64I_ADDx^m, rd, rn); + return; + } + ofs += sizeof(GCstr); + if (!emit_checkofs(ai, ofs)) { + Reg rn = ra_alloc1(as, ref, allow); + Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn)); + emit_dnm(as, (ai^A64I_LS_R)|A64I_LS_UXTWx, rd, rn, rm); + return; + } + } + } + base = ra_alloc1(as, ref, allow); + emit_lso(as, ai, (rd & 31), base, ofs); +} + +/* Fuse FP multiply-add/sub. */ +static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air) +{ + IRRef lref = ir->op1, rref = ir->op2; + IRIns *irm; + if (lref != rref && + ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && + ra_noreg(irm->r)) || + (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && + (rref = lref, ai = air, ra_noreg(irm->r))))) { + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); + Reg left = ra_alloc2(as, irm, + rset_exclude(rset_exclude(RSET_FPR, dest), add)); + Reg right = (left >> 8); left &= 255; + emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31)); + return 1; + } + return 0; +} + +/* Fuse BAND + BSHL/BSHR into UBFM. */ +static int asm_fuseandshift(ASMState *as, IRIns *ir) +{ + IRIns *irl = IR(ir->op1); + lj_assertA(ir->o == IR_BAND, "bad usage"); + if (canfuse(as, irl) && irref_isk(ir->op2)) { + uint64_t mask = get_k64val(as, ir->op2); + if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) { + int32_t shmask = irt_is64(irl->t) ? 63 : 31; + int32_t shift = (IR(irl->op2)->i & shmask); + int32_t imms = shift; + if (irl->o == IR_BSHL) { + mask >>= shift; + shift = (shmask-shift+1) & shmask; + imms = 0; + } + if (mask && !((mask+1) & mask)) { /* Contiguous 1-bits at the bottom. */ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, irl->op1, RSET_GPR); + A64Ins ai = shmask == 63 ? A64I_UBFMx : A64I_UBFMw; + imms += 63 - emit_clz64(mask); + if (imms > shmask) imms = shmask; + emit_dn(as, ai | A64F_IMMS(imms) | A64F_IMMR(shift), dest, left); + return 1; + } + } + } + return 0; +} + +/* Fuse BOR(BSHL, BSHR) into EXTR/ROR. */ +static int asm_fuseorshift(ASMState *as, IRIns *ir) +{ + IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); + lj_assertA(ir->o == IR_BOR, "bad usage"); + if (canfuse(as, irl) && canfuse(as, irr) && + ((irl->o == IR_BSHR && irr->o == IR_BSHL) || + (irl->o == IR_BSHL && irr->o == IR_BSHR))) { + if (irref_isk(irl->op2) && irref_isk(irr->op2)) { + IRRef lref = irl->op1, rref = irr->op1; + uint32_t lshift = IR(irl->op2)->i, rshift = IR(irr->op2)->i; + if (irl->o == IR_BSHR) { /* BSHR needs to be the right operand. */ + uint32_t tmp2; + IRRef tmp1 = lref; lref = rref; rref = tmp1; + tmp2 = lshift; lshift = rshift; rshift = tmp2; + } + if (rshift + lshift == (irt_is64(ir->t) ? 64 : 32)) { + A64Ins ai = irt_is64(ir->t) ? A64I_EXTRx : A64I_EXTRw; + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, lref, RSET_GPR); + Reg right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left)); + emit_dnm(as, ai | A64F_IMMS(rshift), dest, left, right); + return 1; + } + } + } + return 0; +} + +/* -- Calls --------------------------------------------------------------- */ + +/* Generate a call to a C function. */ +static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) +{ + uint32_t n, nargs = CCI_XNARGS(ci); + int32_t ofs = 0; + Reg gpr, fpr = REGARG_FIRSTFPR; + if ((void *)ci->func) + emit_call(as, (void *)ci->func); + for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) + as->cost[gpr] = REGCOST(~0u, ASMREF_L); + gpr = REGARG_FIRSTGPR; + for (n = 0; n < nargs; n++) { /* Setup args. */ + IRRef ref = args[n]; + IRIns *ir = IR(ref); + if (ref) { + if (irt_isfp(ir->t)) { + if (fpr <= REGARG_LASTFPR) { + lj_assertA(rset_test(as->freeset, fpr), + "reg %d not free", fpr); /* Must have been evicted. */ + ra_leftov(as, fpr, ref); + fpr++; + } else { + Reg r = ra_alloc1(as, ref, RSET_FPR); + emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0)); + ofs += 8; + } + } else { + if (gpr <= REGARG_LASTGPR) { + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Must have been evicted. */ + ra_leftov(as, gpr, ref); + gpr++; + } else { + Reg r = ra_alloc1(as, ref, RSET_GPR); + emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0)); + ofs += 8; + } + } + } + } +} + +/* Setup result reg/sp for call. Evict scratch regs. */ +static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) +{ + RegSet drop = RSET_SCRATCH; + int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + if (hiop && ra_hasreg((ir+1)->r)) + rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ + ra_evictset(as, drop); /* Evictions must be performed first. */ + if (ra_used(ir)) { + lj_assertA(!irt_ispri(ir->t), "PRI dest"); + if (irt_isfp(ir->t)) { + if (ci->flags & CCI_CASTU64) { + Reg dest = ra_dest(as, ir, RSET_FPR) & 31; + emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D_R : A64I_FMOV_S_R, + dest, RID_RET); + } else { + ra_destreg(as, ir, RID_FPRET); + } + } else if (hiop) { + ra_destpair(as, ir); + } else { + ra_destreg(as, ir, RID_RET); + } + } + UNUSED(ci); +} + +static void asm_callx(ASMState *as, IRIns *ir) +{ + IRRef args[CCI_NARGS_MAX*2]; + CCallInfo ci; + IRRef func; + IRIns *irf; + ci.flags = asm_callx_flags(as, ir); + asm_collectargs(as, ir, &ci, args); + asm_setupresult(as, ir, &ci); + func = ir->op2; irf = IR(func); + if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } + if (irref_isk(func)) { /* Call to constant address. */ + ci.func = (ASMFunction)(ir_k64(irf)->u64); + } else { /* Need a non-argument register for indirect calls. */ + Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); + emit_n(as, A64I_BLR, freg); + ci.func = (ASMFunction)(void *)0; + } + asm_gencall(as, &ci, args); +} + +/* -- Returns ------------------------------------------------------------- */ + +/* Return to lower frame. Guard that it goes to the right spot. */ +static void asm_retf(ASMState *as, IRIns *ir) +{ + Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); + void *pc = ir_kptr(IR(ir->op2)); + int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); + as->topslot -= (BCReg)delta; + if ((int32_t)as->topslot < 0) as->topslot = 0; + irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ + /* Need to force a spill on REF_BASE now to update the stack slot. */ + emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE))); + emit_setgl(as, base, jit_base); + emit_addptr(as, base, -8*delta); + asm_guardcc(as, CC_NE); + emit_nm(as, A64I_CMPx, RID_TMP, + ra_allock(as, i64ptr(pc), rset_exclude(RSET_GPR, base))); + emit_lso(as, A64I_LDRx, RID_TMP, base, -8); +} + +/* -- Buffer operations --------------------------------------------------- */ + +#if LJ_HASBUFFER +static void asm_bufhdr_write(ASMState *as, Reg sb) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); + IRIns irgc; + irgc.ot = IRT(0, IRT_PGC); /* GC type. */ + emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); + emit_dn(as, A64I_BFMx | A64F_IMMS(lj_fls(SBUF_MASK_FLAG)) | A64F_IMMR(0), RID_TMP, tmp); + emit_getgl(as, RID_TMP, cur_L); + emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); +} +#endif + +/* -- Type conversions ---------------------------------------------------- */ + +static void asm_tointg(ASMState *as, IRIns *ir, Reg left) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); + Reg dest = ra_dest(as, ir, RSET_GPR); + asm_guardcc(as, CC_NE); + emit_nm(as, A64I_FCMPd, (tmp & 31), (left & 31)); + emit_dn(as, A64I_FCVT_F64_S32, (tmp & 31), dest); + emit_dn(as, A64I_FCVT_S32_F64, dest, (left & 31)); +} + +static void asm_tobit(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_FPR; + Reg left = ra_alloc1(as, ir->op1, allow); + Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); + Reg tmp = ra_scratch(as, rset_clear(allow, right)); + Reg dest = ra_dest(as, ir, RSET_GPR); + emit_dn(as, A64I_FMOV_R_S, dest, (tmp & 31)); + emit_dnm(as, A64I_FADDd, (tmp & 31), (left & 31), (right & 31)); +} + +static void asm_conv(ASMState *as, IRIns *ir) +{ + IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); + int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); + int stfp = (st == IRT_NUM || st == IRT_FLOAT); + IRRef lref = ir->op1; + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); + if (irt_isfp(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + if (stfp) { /* FP to FP conversion. */ + emit_dn(as, st == IRT_NUM ? A64I_FCVT_F32_F64 : A64I_FCVT_F64_F32, + (dest & 31), (ra_alloc1(as, lref, RSET_FPR) & 31)); + } else { /* Integer to FP conversion. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + A64Ins ai = irt_isfloat(ir->t) ? + (((IRT_IS64 >> st) & 1) ? + (st == IRT_I64 ? A64I_FCVT_F32_S64 : A64I_FCVT_F32_U64) : + (st == IRT_INT ? A64I_FCVT_F32_S32 : A64I_FCVT_F32_U32)) : + (((IRT_IS64 >> st) & 1) ? + (st == IRT_I64 ? A64I_FCVT_F64_S64 : A64I_FCVT_F64_U64) : + (st == IRT_INT ? A64I_FCVT_F64_S32 : A64I_FCVT_F64_U32)); + emit_dn(as, ai, (dest & 31), left); + } + } else if (stfp) { /* FP to integer conversion. */ + if (irt_isguard(ir->t)) { + /* Checked conversions are only supported from number to int. */ + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); + asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); + } else { + Reg left = ra_alloc1(as, lref, RSET_FPR); + Reg dest = ra_dest(as, ir, RSET_GPR); + A64Ins ai = irt_is64(ir->t) ? + (st == IRT_NUM ? + (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) : + (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) : + (st == IRT_NUM ? + (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) : + (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32)); + emit_dn(as, ai, dest, (left & 31)); + } + } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, lref, RSET_GPR); + A64Ins ai = st == IRT_I8 ? A64I_SXTBw : + st == IRT_U8 ? A64I_UXTBw : + st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw; + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); + emit_dn(as, ai, dest, left); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + if (irt_is64(ir->t)) { + if (st64 || !(ir->op2 & IRCONV_SEXT)) { + /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */ + ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ + } else { /* 32 to 64 bit sign extension. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + emit_dn(as, A64I_SXTW, dest, left); + } + } else { + if (st64 && !(ir->op2 & IRCONV_NONE)) { + /* This is either a 32 bit reg/reg mov which zeroes the hiword + ** or a load of the loword from a 64 bit address. + */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + emit_dm(as, A64I_MOVw, dest, left); + } else { /* 32/32 bit no-op (cast). */ + ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ + } + } + } +} + +static void asm_strto(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; + IRRef args[2]; + Reg dest = 0, tmp; + int destused = ra_used(ir); + int32_t ofs = 0; + ra_evictset(as, RSET_SCRATCH); + if (destused) { + if (ra_hasspill(ir->s)) { + ofs = sps_scale(ir->s); + destused = 0; + if (ra_hasreg(ir->r)) { + ra_free(as, ir->r); + ra_modified(as, ir->r); + emit_spload(as, ir, ir->r, ofs); + } + } else { + dest = ra_dest(as, ir, RSET_FPR); + } + } + if (destused) + emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0); + asm_guardcnb(as, A64I_CBZ, RID_RET); + args[0] = ir->op1; /* GCstr *str */ + args[1] = ASMREF_TMP1; /* TValue *n */ + asm_gencall(as, ci, args); + tmp = ra_releasetmp(as, ASMREF_TMP1); + emit_opk(as, A64I_ADDx, tmp, RID_SP, ofs, RSET_GPR); +} + +/* -- Memory references --------------------------------------------------- */ + +/* Store tagged value for ref at base+ofs. */ +static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) +{ + RegSet allow = rset_exclude(RSET_GPR, base); + IRIns *ir = IR(ref); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "store of IR type %d", irt_type(ir->t)); + if (irref_isk(ref)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); + emit_lso(as, A64I_STRx, ra_allock(as, k.u64, allow), base, ofs); + } else { + Reg src = ra_alloc1(as, ref, allow); + rset_clear(allow, src); + if (irt_isinteger(ir->t)) { + Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); + emit_lso(as, A64I_STRx, RID_TMP, base, ofs); + emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src); + } else { + Reg type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); + emit_lso(as, A64I_STRx, RID_TMP, base, ofs); + emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type); + } + } +} + +/* Get pointer to TValue. */ +static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) +{ + if ((mode & IRTMPREF_IN1)) { + IRIns *ir = IR(ref); + if (irt_isnum(ir->t)) { + if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) { + /* Use the number constant itself as a TValue. */ + ra_allockreg(as, i64ptr(ir_knum(ir)), dest); + return; + } + emit_lso(as, A64I_STRd, (ra_alloc1(as, ref, RSET_FPR) & 31), dest, 0); + } else { + asm_tvstore64(as, dest, 0, ref); + } + } + /* g->tmptv holds the TValue(s). */ + emit_dn(as, A64I_ADDx^emit_isk12(glofs(as, &J2G(as->J)->tmptv)), dest, RID_GL); +} + +static void asm_aref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg idx, base; + if (irref_isk(ir->op2)) { + IRRef tab = IR(ir->op1)->op1; + int32_t ofs = asm_fuseabase(as, tab); + IRRef refa = ofs ? tab : ir->op1; + uint32_t k = emit_isk12(ofs + 8*IR(ir->op2)->i); + if (k) { + base = ra_alloc1(as, refa, RSET_GPR); + emit_dn(as, A64I_ADDx^k, dest, base); + return; + } + } + base = ra_alloc1(as, ir->op1, RSET_GPR); + idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); + emit_dnm(as, A64I_ADDx | A64F_EXSH(A64EX_UXTW, 3), dest, base, idx); +} + +/* Inlined hash lookup. Specialized for key type and for const keys. +** The equivalent C code is: +** Node *n = hashkey(t, key); +** do { +** if (lj_obj_equal(&n->key, key)) return &n->val; +** } while ((n = nextnode(n))); +** return niltv(L); +*/ +static void asm_href(ASMState *as, IRIns *ir, IROp merge) +{ + RegSet allow = RSET_GPR; + int destused = ra_used(ir); + Reg dest = ra_dest(as, ir, allow); + Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); + Reg key = 0, tmp = RID_TMP; + Reg ftmp = RID_NONE, type = RID_NONE, scr = RID_NONE, tisnum = RID_NONE; + IRRef refkey = ir->op2; + IRIns *irkey = IR(refkey); + int isk = irref_isk(ir->op2); + IRType1 kt = irkey->t; + uint32_t k = 0; + uint32_t khash; + MCLabel l_end, l_loop, l_next; + rset_clear(allow, tab); + + if (!isk) { + key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); + rset_clear(allow, key); + if (!irt_isstr(kt)) { + tmp = ra_scratch(as, allow); + rset_clear(allow, tmp); + } + } else if (irt_isnum(kt)) { + int64_t val = (int64_t)ir_knum(irkey)->u64; + if (!(k = emit_isk12(val))) { + key = ra_allock(as, val, allow); + rset_clear(allow, key); + } + } else if (!irt_ispri(kt)) { + if (!(k = emit_isk12(irkey->i))) { + key = ra_alloc1(as, refkey, allow); + rset_clear(allow, key); + } + } + + /* Allocate constants early. */ + if (irt_isnum(kt)) { + if (!isk) { + tisnum = ra_allock(as, LJ_TISNUM << 15, allow); + ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key)); + rset_clear(allow, tisnum); + } + } else if (irt_isaddr(kt)) { + if (isk) { + int64_t kk = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; + scr = ra_allock(as, kk, allow); + } else { + scr = ra_scratch(as, allow); + } + rset_clear(allow, scr); + } else { + lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); + type = ra_allock(as, ~((int64_t)~irt_toitype(kt) << 47), allow); + scr = ra_scratch(as, rset_clear(allow, type)); + rset_clear(allow, scr); + } + + /* Key not found in chain: jump to exit (if merged) or load niltv. */ + l_end = emit_label(as); + as->invmcp = NULL; + if (merge == IR_NE) + asm_guardcc(as, CC_AL); + else if (destused) + emit_loada(as, dest, niltvg(J2G(as->J))); + + /* Follow hash chain until the end. */ + l_loop = --as->mcp; + emit_n(as, A64I_CMPx^A64I_K12^0, dest); + emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next)); + l_next = emit_label(as); + + /* Type and value comparison. */ + if (merge == IR_EQ) + asm_guardcc(as, CC_EQ); + else + emit_cond_branch(as, CC_EQ, l_end); + + if (irt_isnum(kt)) { + if (isk) { + /* Assumes -0.0 is already canonicalized to +0.0. */ + if (k) + emit_n(as, A64I_CMPx^k, tmp); + else + emit_nm(as, A64I_CMPx, key, tmp); + emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); + } else { + emit_nm(as, A64I_FCMPd, key, ftmp); + emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31)); + emit_cond_branch(as, CC_LO, l_next); + emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp); + emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n)); + } + } else if (irt_isaddr(kt)) { + if (isk) { + emit_nm(as, A64I_CMPx, scr, tmp); + emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64)); + } else { + emit_nm(as, A64I_CMPx, tmp, scr); + emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64)); + } + } else { + emit_nm(as, A64I_CMPx, scr, type); + emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key)); + } + + *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE; + if (!isk && irt_isaddr(kt)) { + type = ra_allock(as, (int32_t)irt_toitype(kt), allow); + emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type); + rset_clear(allow, type); + } + /* Load main position relative to tab->node into dest. */ + khash = isk ? ir_khash(as, irkey) : 1; + if (khash == 0) { + emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node)); + } else { + emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 3), dest, tmp, dest); + emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 1), dest, dest, dest); + emit_lso(as, A64I_LDRx, tmp, tab, offsetof(GCtab, node)); + if (isk) { + Reg tmphash = ra_allock(as, khash, allow); + emit_dnm(as, A64I_ANDw, dest, dest, tmphash); + emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); + } else if (irt_isstr(kt)) { + /* Fetch of str->sid is cheaper than ra_allock. */ + emit_dnm(as, A64I_ANDw, dest, dest, tmp); + emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, sid)); + emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask)); + } else { /* Must match with hash*() in lj_tab.c. */ + emit_dnm(as, A64I_ANDw, dest, dest, tmp); + emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask)); + emit_dnm(as, A64I_SUBw, dest, dest, tmp); + emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp); + emit_dnm(as, A64I_EORw, dest, dest, tmp); + emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest); + emit_dnm(as, A64I_SUBw, tmp, tmp, dest); + emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest); + emit_dnm(as, A64I_EORw, tmp, tmp, dest); + if (irt_isnum(kt)) { + emit_dnm(as, A64I_ADDw, dest, dest, dest); + emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); + emit_dm(as, A64I_MOVw, tmp, dest); + emit_dn(as, A64I_FMOV_R_D, dest, (key & 31)); + } else { + checkmclim(as); + emit_dm(as, A64I_MOVw, tmp, key); + emit_dnm(as, A64I_EORw, dest, dest, + ra_allock(as, irt_toitype(kt) << 15, allow)); + emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest); + emit_dm(as, A64I_MOVx, dest, key); + } + } + } +} + +static void asm_hrefk(ASMState *as, IRIns *ir) +{ + IRIns *kslot = IR(ir->op2); + IRIns *irkey = IR(kslot->op1); + int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); + int32_t kofs = ofs + (int32_t)offsetof(Node, key); + int bigofs = !emit_checkofs(A64I_LDRx, ofs); + Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; + Reg node = ra_alloc1(as, ir->op1, RSET_GPR); + Reg key, idx = node; + RegSet allow = rset_exclude(RSET_GPR, node); + uint64_t k; + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); + if (bigofs) { + idx = dest; + rset_clear(allow, dest); + kofs = (int32_t)offsetof(Node, key); + } else if (ra_hasreg(dest)) { + emit_opk(as, A64I_ADDx, dest, node, ofs, allow); + } + asm_guardcc(as, CC_NE); + if (irt_ispri(irkey->t)) { + k = ~((int64_t)~irt_toitype(irkey->t) << 47); + } else if (irt_isnum(irkey->t)) { + k = ir_knum(irkey)->u64; + } else { + k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey); + } + key = ra_scratch(as, allow); + emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key))); + emit_lso(as, A64I_LDRx, key, idx, kofs); + if (bigofs) + emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR); +} + +static void asm_uref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; + emit_lsptr(as, A64I_LDRx, dest, v); + } else { + Reg uv = ra_scratch(as, RSET_GPR); + Reg func = ra_alloc1(as, ir->op1, RSET_GPR); + if (ir->o == IR_UREFC) { + asm_guardcc(as, CC_NE); + emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP); + emit_opk(as, A64I_ADDx, dest, uv, + (int32_t)offsetof(GCupval, tv), RSET_GPR); + emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); + } else { + emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v)); + } + emit_lso(as, A64I_LDRx, uv, func, + (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8)); + } +} + +static void asm_fref(ASMState *as, IRIns *ir) +{ + UNUSED(as); UNUSED(ir); + lj_assertA(!ra_used(ir), "unfused FREF"); +} + +static void asm_strref(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_GPR; + Reg dest = ra_dest(as, ir, allow); + Reg base = ra_alloc1(as, ir->op1, allow); + IRIns *irr = IR(ir->op2); + int32_t ofs = sizeof(GCstr); + uint32_t m; + rset_clear(allow, base); + if (irref_isk(ir->op2) && (m = emit_isk12(ofs + irr->i))) { + emit_dn(as, A64I_ADDx^m, dest, base); + } else { + emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, dest); + emit_dnm(as, A64I_ADDx, dest, base, ra_alloc1(as, ir->op2, allow)); + } +} + +/* -- Loads and stores ---------------------------------------------------- */ + +static A64Ins asm_fxloadins(IRIns *ir) +{ + switch (irt_type(ir->t)) { + case IRT_I8: return A64I_LDRB ^ A64I_LS_S; + case IRT_U8: return A64I_LDRB; + case IRT_I16: return A64I_LDRH ^ A64I_LS_S; + case IRT_U16: return A64I_LDRH; + case IRT_NUM: return A64I_LDRd; + case IRT_FLOAT: return A64I_LDRs; + default: return irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw; + } +} + +static A64Ins asm_fxstoreins(IRIns *ir) +{ + switch (irt_type(ir->t)) { + case IRT_I8: case IRT_U8: return A64I_STRB; + case IRT_I16: case IRT_U16: return A64I_STRH; + case IRT_NUM: return A64I_STRd; + case IRT_FLOAT: return A64I_STRs; + default: return irt_is64(ir->t) ? A64I_STRx : A64I_STRw; + } +} + +static void asm_fload(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg idx; + A64Ins ai = asm_fxloadins(ir); + int32_t ofs; + if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ + idx = RID_GL; + ofs = (ir->op2 << 2) - GG_OFS(g); + } else { + idx = ra_alloc1(as, ir->op1, RSET_GPR); + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ + emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, idx); + return; + } + } + ofs = field_ofs[ir->op2]; + } + emit_lso(as, ai, (dest & 31), idx, ofs); +} + +static void asm_fstore(ASMState *as, IRIns *ir) +{ + if (ir->r != RID_SINK) { + Reg src = ra_alloc1(as, ir->op2, RSET_GPR); + IRIns *irf = IR(ir->op1); + Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); + int32_t ofs = field_ofs[irf->op2]; + emit_lso(as, asm_fxstoreins(ir), (src & 31), idx, ofs); + } +} + +static void asm_xload(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); + asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR); +} + +static void asm_xstore(ASMState *as, IRIns *ir) +{ + if (ir->r != RID_SINK) { + Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, + rset_exclude(RSET_GPR, src)); + } +} + +static void asm_ahuvload(ASMState *as, IRIns *ir) +{ + Reg idx, tmp, type; + int32_t ofs = 0; + RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; + lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || + irt_isint(ir->t), + "bad load type %d", irt_type(ir->t)); + if (ra_used(ir)) { + Reg dest = ra_dest(as, ir, allow); + tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest; + if (irt_isaddr(ir->t)) { + emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); + } else if (irt_isnum(ir->t)) { + emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); + } else if (irt_isint(ir->t)) { + emit_dm(as, A64I_MOVw, dest, dest); + } + } else { + tmp = ra_scratch(as, gpr); + } + type = ra_scratch(as, rset_clear(gpr, tmp)); + idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx); + if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; + /* Always do the type check, even if the load result is unused. */ + asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE); + if (irt_type(ir->t) >= IRT_NUM) { + lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t), + "bad load type %d", irt_type(ir->t)); + emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), + ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp); + } else if (irt_isaddr(ir->t)) { + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type); + emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); + } else if (irt_isnil(ir->t)) { + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); + } else { + emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), + ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, gpr), tmp); + } + if (ofs & FUSE_REG) + emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31)); + else + emit_lso(as, A64I_LDRx, tmp, idx, ofs); +} + +static void asm_ahustore(ASMState *as, IRIns *ir) +{ + if (ir->r != RID_SINK) { + RegSet allow = RSET_GPR; + Reg idx, src = RID_NONE, tmp = RID_TMP, type = RID_NONE; + int32_t ofs = 0; + if (irt_isnum(ir->t)) { + src = ra_alloc1(as, ir->op2, RSET_FPR); + idx = asm_fuseahuref(as, ir->op1, &ofs, allow, A64I_STRd); + if (ofs & FUSE_REG) + emit_dnm(as, (A64I_STRd^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, (src & 31), idx, (ofs &31)); + else + emit_lso(as, A64I_STRd, (src & 31), idx, ofs); + } else { + if (!irt_ispri(ir->t)) { + src = ra_alloc1(as, ir->op2, allow); + rset_clear(allow, src); + if (irt_isinteger(ir->t)) + type = ra_allock(as, (uint64_t)(int32_t)LJ_TISNUM << 47, allow); + else + type = ra_allock(as, irt_toitype(ir->t), allow); + } else { + tmp = type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t)<<47), allow); + } + idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type), + A64I_STRx); + if (ofs & FUSE_REG) + emit_dnm(as, (A64I_STRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31)); + else + emit_lso(as, A64I_STRx, tmp, idx, ofs); + if (ra_hasreg(src)) { + if (irt_isinteger(ir->t)) { + emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), tmp, type, src); + } else { + emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, src, type); + } + } + } + } +} + +static void asm_sload(ASMState *as, IRIns *ir) +{ + int32_t ofs = 8*((int32_t)ir->op1-2); + IRType1 t = ir->t; + Reg dest = RID_NONE, base; + RegSet allow = RSET_GPR; + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); + if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { + dest = ra_scratch(as, RSET_FPR); + asm_tointg(as, ir, dest); + t.irt = IRT_NUM; /* Continue with a regular number type check. */ + } else if (ra_used(ir)) { + Reg tmp = RID_NONE; + if ((ir->op2 & IRSLOAD_CONVERT)) + tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR); + lj_assertA((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t), + "bad SLOAD type %d", irt_type(t)); + dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); + base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest)); + if (irt_isaddr(t)) { + emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest); + } else if ((ir->op2 & IRSLOAD_CONVERT)) { + if (irt_isint(t)) { + emit_dn(as, A64I_FCVT_S32_F64, dest, (tmp & 31)); + /* If value is already loaded for type check, move it to FPR. */ + if ((ir->op2 & IRSLOAD_TYPECHECK)) + emit_dn(as, A64I_FMOV_D_R, (tmp & 31), dest); + else + dest = tmp; + t.irt = IRT_NUM; /* Check for original type. */ + } else { + emit_dn(as, A64I_FCVT_F64_S32, (dest & 31), tmp); + dest = tmp; + t.irt = IRT_INT; /* Check for original type. */ + } + } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { + emit_dm(as, A64I_MOVw, dest, dest); + } + goto dotypecheck; + } + base = ra_alloc1(as, REF_BASE, allow); +dotypecheck: + rset_clear(allow, base); + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + Reg tmp; + if (ra_hasreg(dest) && rset_test(RSET_GPR, dest)) { + tmp = dest; + } else { + tmp = ra_scratch(as, allow); + rset_clear(allow, tmp); + } + if (ra_hasreg(dest) && irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT)) + emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp); + /* Need type check, even if the load result is unused. */ + asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE); + if (irt_type(t) >= IRT_NUM) { + lj_assertA(irt_isinteger(t) || irt_isnum(t), + "bad SLOAD type %d", irt_type(t)); + emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), + ra_allock(as, (ir->op2 & IRSLOAD_KEYINDEX) ? LJ_KEYINDEX : (LJ_TISNUM << 15), allow), tmp); + } else if (irt_isnil(t)) { + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp); + } else if (irt_ispri(t)) { + emit_nm(as, A64I_CMPx, + ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp); + } else { + Reg type = ra_scratch(as, allow); + emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type); + emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp); + } + emit_lso(as, A64I_LDRx, tmp, base, ofs); + return; + } + if (ra_hasreg(dest)) { + emit_lso(as, irt_isnum(t) ? A64I_LDRd : + (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base, + ofs ^ ((LJ_BE && irt_isint(t) ? 4 : 0))); + } +} + +/* -- Allocations --------------------------------------------------------- */ + +#if LJ_HASFFI +static void asm_cnew(ASMState *as, IRIns *ir) +{ + CTState *cts = ctype_ctsG(J2G(as->J)); + CTypeID id = (CTypeID)IR(ir->op1)->i; + CTSize sz; + CTInfo info = lj_ctype_info(cts, id, &sz); + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; + IRRef args[4]; + RegSet allow = (RSET_GPR & ~RSET_SCRATCH); + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); + + as->gcsteps++; + asm_setupresult(as, ir, ci); /* GCcdata * */ + /* Initialize immutable cdata object. */ + if (ir->o == IR_CNEWI) { + int32_t ofs = sizeof(GCcdata); + Reg r = ra_alloc1(as, ir->op2, allow); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); + emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs); + } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ + ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* CTypeID id */ + args[2] = ir->op2; /* CTSize sz */ + args[3] = ASMREF_TMP1; /* CTSize align */ + asm_gencall(as, ci, args); + emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); + return; + } + + /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ + { + Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow); + emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct)); + emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid)); + emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP); + if (id < 65536) emit_d(as, A64I_MOVZw | A64F_U16(id), RID_X1); + } + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ASMREF_TMP1; /* MSize size */ + asm_gencall(as, ci, args); + ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), + ra_releasetmp(as, ASMREF_TMP1)); +} +#endif + +/* -- Write barriers ------------------------------------------------------ */ + +static void asm_tbar(ASMState *as, IRIns *ir) +{ + Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); + Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab)); + Reg mark = RID_TMP; + MCLabel l_end = emit_label(as); + emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist)); + emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked)); + emit_setgl(as, tab, gc.grayagain); + emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark); + emit_getgl(as, link, gc.grayagain); + emit_cond_branch(as, CC_EQ, l_end); + emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark); + emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked)); +} + +static void asm_obar(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; + IRRef args[2]; + MCLabel l_end; + RegSet allow = RSET_GPR; + Reg obj, val, tmp; + /* No need for other object barriers (yet). */ + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ir->op1; /* TValue *tv */ + asm_gencall(as, ci, args); + emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL); + obj = IR(ir->op1)->r; + tmp = ra_scratch(as, rset_exclude(allow, obj)); + emit_cond_branch(as, CC_EQ, l_end); + emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp); + emit_cond_branch(as, CC_EQ, l_end); + emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP); + val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); + emit_lso(as, A64I_LDRB, tmp, obj, + (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); + emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked)); +} + +/* -- Arithmetic and logic operations ------------------------------------- */ + +static void asm_fparith(ASMState *as, IRIns *ir, A64Ins ai) +{ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + emit_dnm(as, ai, (dest & 31), (left & 31), (right & 31)); +} + +static void asm_fpunary(ASMState *as, IRIns *ir, A64Ins ai) +{ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); + emit_dn(as, ai, (dest & 31), (left & 31)); +} + +static void asm_fpmath(ASMState *as, IRIns *ir) +{ + IRFPMathOp fpm = (IRFPMathOp)ir->op2; + if (fpm == IRFPM_SQRT) { + asm_fpunary(as, ir, A64I_FSQRTd); + } else if (fpm <= IRFPM_TRUNC) { + asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd : + fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd); + } else { + asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); + } +} + +static int asm_swapops(ASMState *as, IRRef lref, IRRef rref) +{ + IRIns *ir; + if (irref_isk(rref)) + return 0; /* Don't swap constants to the left. */ + if (irref_isk(lref)) + return 1; /* But swap constants to the right. */ + ir = IR(rref); + if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || + (ir->o == IR_ADD && ir->op1 == ir->op2) || + (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT))) + return 0; /* Don't swap fusable operands to the left. */ + ir = IR(lref); + if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) || + (ir->o == IR_ADD && ir->op1 == ir->op2) || + (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT))) + return 1; /* But swap fusable operands to the right. */ + return 0; /* Otherwise don't swap. */ +} + +static void asm_intop(ASMState *as, IRIns *ir, A64Ins ai) +{ + IRRef lref = ir->op1, rref = ir->op2; + Reg left, dest = ra_dest(as, ir, RSET_GPR); + uint32_t m; + if ((ai & ~A64I_S) != A64I_SUBw && asm_swapops(as, lref, rref)) { + IRRef tmp = lref; lref = rref; rref = tmp; + } + left = ra_hintalloc(as, lref, dest, RSET_GPR); + if (irt_is64(ir->t)) ai |= A64I_X; + m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); + if (irt_isguard(ir->t)) { /* For IR_ADDOV etc. */ + asm_guardcc(as, CC_VS); + ai |= A64I_S; + } + emit_dn(as, ai^m, dest, left); +} + +static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai) +{ + if (as->flagmcp == as->mcp) { /* Drop cmp r, #0. */ + as->flagmcp = NULL; + as->mcp++; + ai |= A64I_S; + } + asm_intop(as, ir, ai); +} + +static void asm_intneg(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + emit_dm(as, irt_is64(ir->t) ? A64I_NEGx : A64I_NEGw, dest, left); +} + +/* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */ +static void asm_intmul(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + if (irt_isguard(ir->t)) { /* IR_MULOV */ + asm_guardcc(as, CC_NE); + emit_dm(as, A64I_MOVw, dest, dest); /* Zero-extend. */ + emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest); + emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest); + emit_dnm(as, A64I_SMULL, dest, right, left); + } else { + emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right); + } +} + +static void asm_add(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd)) + asm_fparith(as, ir, A64I_FADDd); + return; + } + asm_intop_s(as, ir, A64I_ADDw); +} + +static void asm_sub(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd)) + asm_fparith(as, ir, A64I_FSUBd); + return; + } + asm_intop_s(as, ir, A64I_SUBw); +} + +static void asm_mul(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + asm_fparith(as, ir, A64I_FMULd); + return; + } + asm_intmul(as, ir); +} + +#define asm_addov(as, ir) asm_add(as, ir) +#define asm_subov(as, ir) asm_sub(as, ir) +#define asm_mulov(as, ir) asm_mul(as, ir) + +#define asm_fpdiv(as, ir) asm_fparith(as, ir, A64I_FDIVd) +#define asm_abs(as, ir) asm_fpunary(as, ir, A64I_FABS) + +static void asm_neg(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + asm_fpunary(as, ir, A64I_FNEGd); + return; + } + asm_intneg(as, ir); +} + +static void asm_band(ASMState *as, IRIns *ir) +{ + A64Ins ai = A64I_ANDw; + if (asm_fuseandshift(as, ir)) + return; + if (as->flagmcp == as->mcp) { + /* Try to drop cmp r, #0. */ + as->flagmcp = NULL; + as->mcp++; + ai = A64I_ANDSw; + } + asm_intop(as, ir, ai); +} + +static void asm_borbxor(ASMState *as, IRIns *ir, A64Ins ai) +{ + IRRef lref = ir->op1, rref = ir->op2; + IRIns *irl = IR(lref), *irr = IR(rref); + if ((canfuse(as, irl) && irl->o == IR_BNOT && !irref_isk(rref)) || + (canfuse(as, irr) && irr->o == IR_BNOT && !irref_isk(lref))) { + Reg left, dest = ra_dest(as, ir, RSET_GPR); + uint32_t m; + if (irl->o == IR_BNOT) { + IRRef tmp = lref; lref = rref; rref = tmp; + } + left = ra_alloc1(as, lref, RSET_GPR); + ai |= A64I_ON; + if (irt_is64(ir->t)) ai |= A64I_X; + m = asm_fuseopm(as, ai, IR(rref)->op1, rset_exclude(RSET_GPR, left)); + emit_dn(as, ai^m, dest, left); + } else { + asm_intop(as, ir, ai); + } +} + +static void asm_bor(ASMState *as, IRIns *ir) +{ + if (asm_fuseorshift(as, ir)) + return; + asm_borbxor(as, ir, A64I_ORRw); +} + +#define asm_bxor(as, ir) asm_borbxor(as, ir, A64I_EORw) + +static void asm_bnot(ASMState *as, IRIns *ir) +{ + A64Ins ai = A64I_MVNw; + Reg dest = ra_dest(as, ir, RSET_GPR); + uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR); + if (irt_is64(ir->t)) ai |= A64I_X; + emit_d(as, ai^m, dest); +} + +static void asm_bswap(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); + emit_dn(as, irt_is64(ir->t) ? A64I_REVx : A64I_REVw, dest, left); +} + +static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh) +{ + int32_t shmask = irt_is64(ir->t) ? 63 : 31; + if (irref_isk(ir->op2)) { /* Constant shifts. */ + Reg left, dest = ra_dest(as, ir, RSET_GPR); + int32_t shift = (IR(ir->op2)->i & shmask); + IRIns *irl = IR(ir->op1); + if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw; + + /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */ + if ((sh == A64SH_LSR || sh == A64SH_ASR) && canfuse(as, irl)) { + if (irl->o == IR_BSHL && irref_isk(irl->op2)) { + int32_t shift2 = (IR(irl->op2)->i & shmask); + shift = ((shift - shift2) & shmask); + shmask -= shift2; + ir = irl; + } + } + + left = ra_alloc1(as, ir->op1, RSET_GPR); + switch (sh) { + case A64SH_LSL: + emit_dn(as, ai | A64F_IMMS(shmask-shift) | + A64F_IMMR((shmask-shift+1)&shmask), dest, left); + break; + case A64SH_LSR: case A64SH_ASR: + emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left); + break; + case A64SH_ROR: + emit_dnm(as, ai | A64F_IMMS(shift), dest, left, left); + break; + } + } else { /* Variable-length shifts. */ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_dnm(as, (shmask == 63 ? A64I_SHRx : A64I_SHRw) | A64F_BSH(sh), dest, left, right); + } +} + +#define asm_bshl(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL) +#define asm_bshr(as, ir) asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR) +#define asm_bsar(as, ir) asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR) +#define asm_bror(as, ir) asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR) +#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL") + +static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_dnm(as, A64I_CSELw|A64F_CC(cc), dest, left, right); + emit_nm(as, A64I_CMPw, left, right); +} + +static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc) +{ + Reg dest = (ra_dest(as, ir, RSET_FPR) & 31); + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = ((left >> 8) & 31); left &= 31; + emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, right, left); + emit_nm(as, A64I_FCMPd, left, right); +} + +static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc) +{ + if (irt_isnum(ir->t)) + asm_fpmin_max(as, ir, fcc); + else + asm_intmin_max(as, ir, cc); +} + +#define asm_min(as, ir) asm_min_max(as, ir, CC_LT, CC_PL) +#define asm_max(as, ir) asm_min_max(as, ir, CC_GT, CC_LE) + +/* -- Comparisons --------------------------------------------------------- */ + +/* Map of comparisons to flags. ORDER IR. */ +static const uint8_t asm_compmap[IR_ABC+1] = { + /* op FP swp int cc FP cc */ + /* LT */ CC_GE + (CC_HS << 4), + /* GE x */ CC_LT + (CC_HI << 4), + /* LE */ CC_GT + (CC_HI << 4), + /* GT x */ CC_LE + (CC_HS << 4), + /* ULT x */ CC_HS + (CC_LS << 4), + /* UGE */ CC_LO + (CC_LO << 4), + /* ULE x */ CC_HI + (CC_LO << 4), + /* UGT */ CC_LS + (CC_LS << 4), + /* EQ */ CC_NE + (CC_NE << 4), + /* NE */ CC_EQ + (CC_EQ << 4), + /* ABC */ CC_LS + (CC_LS << 4) /* Same as UGT. */ +}; + +/* FP comparisons. */ +static void asm_fpcomp(ASMState *as, IRIns *ir) +{ + Reg left, right; + A64Ins ai; + int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1); + if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) { + left = (ra_alloc1(as, ir->op1, RSET_FPR) & 31); + right = 0; + ai = A64I_FCMPZd; + } else { + left = ra_alloc2(as, ir, RSET_FPR); + if (swp) { + right = (left & 31); left = ((left >> 8) & 31); + } else { + right = ((left >> 8) & 31); left &= 31; + } + ai = A64I_FCMPd; + } + asm_guardcc(as, (asm_compmap[ir->o] >> 4)); + emit_nm(as, ai, left, right); +} + +/* Integer comparisons. */ +static void asm_intcomp(ASMState *as, IRIns *ir) +{ + A64CC oldcc, cc = (asm_compmap[ir->o] & 15); + A64Ins ai = irt_is64(ir->t) ? A64I_CMPx : A64I_CMPw; + IRRef lref = ir->op1, rref = ir->op2; + Reg left; + uint32_t m; + int cmpprev0 = 0; + lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) || + irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t), + "bad comparison data type %d", irt_type(ir->t)); + if (asm_swapops(as, lref, rref)) { + IRRef tmp = lref; lref = rref; rref = tmp; + if (cc >= CC_GE) cc ^= 7; /* LT <-> GT, LE <-> GE */ + else if (cc > CC_NE) cc ^= 11; /* LO <-> HI, LS <-> HS */ + } + oldcc = cc; + if (irref_isk(rref) && get_k64val(as, rref) == 0) { + IRIns *irl = IR(lref); + if (cc == CC_GE) cc = CC_PL; + else if (cc == CC_LT) cc = CC_MI; + else if (cc > CC_NE) goto nocombine; /* Other conds don't work with tst. */ + cmpprev0 = (irl+1 == ir); + /* Combine and-cmp-bcc into tbz/tbnz or and-cmp into tst. */ + if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) { + IRRef blref = irl->op1, brref = irl->op2; + uint32_t m2 = 0; + Reg bleft; + if (asm_swapops(as, blref, brref)) { + Reg tmp = blref; blref = brref; brref = tmp; + } + if (irref_isk(brref)) { + uint64_t k = get_k64val(as, brref); + if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) { + asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ, + ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k)); + return; + } + m2 = emit_isk13(k, irt_is64(irl->t)); + } + bleft = ra_alloc1(as, blref, RSET_GPR); + ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw); + if (!m2) + m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft)); + asm_guardcc(as, cc); + emit_n(as, ai^m2, bleft); + return; + } + if (cc == CC_EQ || cc == CC_NE) { + /* Combine cmp-bcc into cbz/cbnz. */ + ai = cc == CC_EQ ? A64I_CBZ : A64I_CBNZ; + if (irt_is64(ir->t)) ai |= A64I_X; + asm_guardcnb(as, ai, ra_alloc1(as, lref, RSET_GPR)); + return; + } + } +nocombine: + left = ra_alloc1(as, lref, RSET_GPR); + m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left)); + asm_guardcc(as, cc); + emit_n(as, ai^m, left); + /* Signed comparison with zero and referencing previous ins? */ + if (cmpprev0 && (oldcc <= CC_NE || oldcc >= CC_GE)) + as->flagmcp = as->mcp; /* Allow elimination of the compare. */ +} + +static void asm_comp(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) + asm_fpcomp(as, ir); + else + asm_intcomp(as, ir); +} + +#define asm_equal(as, ir) asm_comp(as, ir) + +/* -- Split register ops -------------------------------------------------- */ + +/* Hiword op of a split 64/64 bit op. Previous op is the loword op. */ +static void asm_hiop(ASMState *as, IRIns *ir) +{ + /* HIOP is marked as a store because it needs its own DCE logic. */ + int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ + if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; + if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ + switch ((ir-1)->o) { + case IR_CALLN: + case IR_CALLL: + case IR_CALLS: + case IR_CALLXS: + if (!uselo) + ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ + break; + default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; + } +} + +/* -- Profiling ----------------------------------------------------------- */ + +static void asm_prof(ASMState *as, IRIns *ir) +{ + uint32_t k = emit_isk13(HOOK_PROFILE, 0); + lj_assertA(k != 0, "HOOK_PROFILE does not fit in K13"); + UNUSED(ir); + asm_guardcc(as, CC_NE); + emit_n(as, A64I_TSTw^k, RID_TMP); + emit_lsptr(as, A64I_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask); +} + +/* -- Stack handling ------------------------------------------------------ */ + +/* Check Lua stack size for overflow. Use exit handler as fallback. */ +static void asm_stack_check(ASMState *as, BCReg topslot, + IRIns *irp, RegSet allow, ExitNo exitno) +{ + Reg pbase; + uint32_t k; + if (irp) { + if (!ra_hasspill(irp->s)) { + pbase = irp->r; + lj_assertA(ra_hasreg(pbase), "base reg lost"); + } else if (allow) { + pbase = rset_pickbot(allow); + } else { + pbase = RID_RET; + emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0); /* Restore temp register. */ + } + } else { + pbase = RID_BASE; + } + emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno)); + k = emit_isk12((8*topslot)); + lj_assertA(k, "slot offset %d does not fit in K12", 8*topslot); + emit_n(as, A64I_CMPx^k, RID_TMP); + emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase); + emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP, + (int32_t)offsetof(lua_State, maxstack)); + if (irp) { /* Must not spill arbitrary registers in head of side trace. */ + if (ra_hasspill(irp->s)) + emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s)); + emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L)); + if (ra_hasspill(irp->s) && !allow) + emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0); /* Save temp register. */ + } else { + emit_getgl(as, RID_TMP, cur_L); + } +} + +/* Restore Lua stack from on-trace state. */ +static void asm_stack_restore(ASMState *as, SnapShot *snap) +{ + SnapEntry *map = &as->T->snapmap[snap->mapofs]; +#ifdef LUA_USE_ASSERT + SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; +#endif + MSize n, nent = snap->nent; + /* Store the value of all modified slots to the Lua stack. */ + for (n = 0; n < nent; n++) { + SnapEntry sn = map[n]; + BCReg s = snap_slot(sn); + int32_t ofs = 8*((int32_t)s-1-LJ_FR2); + IRRef ref = snap_ref(sn); + IRIns *ir = IR(ref); + if ((sn & SNAP_NORESTORE)) + continue; + if ((sn & SNAP_KEYINDEX)) { + RegSet allow = rset_exclude(RSET_GPR, RID_BASE); + Reg r = irref_isk(ref) ? ra_allock(as, ir->i, allow) : + ra_alloc1(as, ref, allow); + rset_clear(allow, r); + emit_lso(as, A64I_STRw, r, RID_BASE, ofs); + emit_lso(as, A64I_STRw, ra_allock(as, LJ_KEYINDEX, allow), RID_BASE, ofs+4); + } else if (irt_isnum(ir->t)) { + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs); + } else { + asm_tvstore64(as, RID_BASE, ofs, ref); + } + checkmclim(as); + } + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); +} + +/* -- GC handling --------------------------------------------------------- */ + +/* Marker to prevent patching the GC check exit. */ +#define ARM64_NOPATCH_GC_CHECK \ + (A64I_ORRx|A64F_D(RID_TMP)|A64F_M(RID_TMP)|A64F_N(RID_TMP)) + +/* Check GC threshold and do one or more GC steps. */ +static void asm_gc_check(ASMState *as) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; + IRRef args[2]; + MCLabel l_end; + Reg tmp2; + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ + asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */ + *--as->mcp = ARM64_NOPATCH_GC_CHECK; + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ASMREF_TMP2; /* MSize steps */ + asm_gencall(as, ci, args); + emit_dm(as, A64I_MOVx, ra_releasetmp(as, ASMREF_TMP1), RID_GL); + tmp2 = ra_releasetmp(as, ASMREF_TMP2); + emit_loadi(as, tmp2, as->gcsteps); + /* Jump around GC step if GC total < GC threshold. */ + emit_cond_branch(as, CC_LS, l_end); + emit_nm(as, A64I_CMPx, RID_TMP, tmp2); + emit_getgl(as, tmp2, gc.threshold); + emit_getgl(as, RID_TMP, gc.total); + as->gcsteps = 0; + checkmclim(as); +} + +/* -- Loop handling ------------------------------------------------------- */ + +/* Fixup the loop branch. */ +static void asm_loop_fixup(ASMState *as) +{ + MCode *p = as->mctop; + MCode *target = as->mcp; + if (as->loopinv) { /* Inverted loop branch? */ + uint32_t mask = (p[-2] & 0x7e000000) == 0x36000000 ? 0x3fffu : 0x7ffffu; + ptrdiff_t delta = target - (p - 2); + /* asm_guard* already inverted the bcc/tnb/cnb and patched the final b. */ + p[-2] |= ((uint32_t)delta & mask) << 5; + } else { + ptrdiff_t delta = target - (p - 1); + p[-1] = A64I_B | A64F_S26(delta); + } +} + +/* Fixup the tail of the loop. */ +static void asm_loop_tail_fixup(ASMState *as) +{ + UNUSED(as); /* Nothing to do. */ +} + +/* -- Head of trace ------------------------------------------------------- */ + +/* Reload L register from g->cur_L. */ +static void asm_head_lreg(ASMState *as) +{ + IRIns *ir = IR(ASMREF_L); + if (ra_used(ir)) { + Reg r = ra_dest(as, ir, RSET_GPR); + emit_getgl(as, r, cur_L); + ra_evictk(as); + } +} + +/* Coalesce BASE register for a root trace. */ +static void asm_head_root_base(ASMState *as) +{ + IRIns *ir; + asm_head_lreg(as); + ir = IR(REF_BASE); + if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) + ra_spill(as, ir); + ra_destreg(as, ir, RID_BASE); +} + +/* Coalesce BASE register for a side trace. */ +static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) +{ + IRIns *ir; + asm_head_lreg(as); + ir = IR(REF_BASE); + if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t))) + ra_spill(as, ir); + if (ra_hasspill(irp->s)) { + rset_clear(allow, ra_dest(as, ir, allow)); + } else { + Reg r = irp->r; + lj_assertA(ra_hasreg(r), "base reg lost"); + rset_clear(allow, r); + if (r != ir->r && !rset_test(as->freeset, r)) + ra_restore(as, regcost_ref(as->cost[r])); + ra_destreg(as, ir, r); + } + return allow; +} + +/* -- Tail of trace ------------------------------------------------------- */ + +/* Fixup the tail code. */ +static void asm_tail_fixup(ASMState *as, TraceNo lnk) +{ + MCode *p = as->mctop; + MCode *target; + /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */ + int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED)); + if (spadj == 0) { + *--p = A64I_LE(A64I_NOP); + as->mctop = p; + } else { + /* Patch stack adjustment. */ + uint32_t k = emit_isk12(spadj); + lj_assertA(k, "stack adjustment %d does not fit in K12", spadj); + p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP); + } + /* Patch exit branch. */ + target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; + p[-1] = A64I_B | A64F_S26((target-p)+1); +} + +/* Prepare tail of code. */ +static void asm_tail_prep(ASMState *as) +{ + MCode *p = as->mctop - 1; /* Leave room for exit branch. */ + if (as->loopref) { + as->invmcp = as->mcp = p; + } else { + as->mcp = p-1; /* Leave room for stack pointer adjustment. */ + as->invmcp = NULL; + } + *p = 0; /* Prevent load/store merging. */ +} + +/* -- Trace setup --------------------------------------------------------- */ + +/* Ensure there are enough stack slots for call arguments. */ +static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) +{ + IRRef args[CCI_NARGS_MAX*2]; + uint32_t i, nargs = CCI_XNARGS(ci); + int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; + asm_collectargs(as, ir, ci, args); + for (i = 0; i < nargs; i++) { + if (args[i] && irt_isfp(IR(args[i])->t)) { + if (nfpr > 0) nfpr--; else nslots += 2; + } else { + if (ngpr > 0) ngpr--; else nslots += 2; + } + } + if (nslots > as->evenspill) /* Leave room for args in stack slots. */ + as->evenspill = nslots; + return REGSP_HINT(RID_RET); +} + +static void asm_setup_target(ASMState *as) +{ + /* May need extra exit for asm_stack_check on side traces. */ + asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); +} + +#if LJ_BE +/* ARM64 instructions are always little-endian. Swap for ARM64BE. */ +static void asm_mcode_fixup(MCode *mcode, MSize size) +{ + MCode *pe = (MCode *)((char *)mcode + size); + while (mcode < pe) { + MCode ins = *mcode; + *mcode++ = lj_bswap(ins); + } +} +#define LJ_TARGET_MCODE_FIXUP 1 +#endif + +/* -- Trace patching ------------------------------------------------------ */ + +/* Patch exit jumps of existing machine code to a new target. */ +void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) +{ + MCode *p = T->mcode; + MCode *pe = (MCode *)((char *)p + T->szmcode); + MCode *cstart = NULL; + MCode *mcarea = lj_mcode_patch(J, p, 0); + MCode *px = exitstub_trace_addr(T, exitno); + int patchlong = 1; + /* Note: this assumes a trace exit is only ever patched once. */ + for (; p < pe; p++) { + /* Look for exitstub branch, replace with branch to target. */ + ptrdiff_t delta = target - p; + MCode ins = A64I_LE(*p); + if ((ins & 0xff000000u) == 0x54000000u && + ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { + /* Patch bcc, if within range. */ + if (A64F_S_OK(delta, 19)) { + *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta)); + if (!cstart) cstart = p; + } + } else if ((ins & 0xfc000000u) == 0x14000000u && + ((ins ^ (px-p)) & 0x03ffffffu) == 0) { + /* Patch b. */ + lj_assertJ(A64F_S_OK(delta, 26), "branch target out of range"); + *p = A64I_LE((ins & 0xfc000000u) | A64F_S26(delta)); + if (!cstart) cstart = p; + } else if ((ins & 0x7e000000u) == 0x34000000u && + ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) { + /* Patch cbz/cbnz, if within range. */ + if (p[-1] == ARM64_NOPATCH_GC_CHECK) { + patchlong = 0; + } else if (A64F_S_OK(delta, 19)) { + *p = A64I_LE((ins & 0xff00001fu) | A64F_S19(delta)); + if (!cstart) cstart = p; + } + } else if ((ins & 0x7e000000u) == 0x36000000u && + ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) { + /* Patch tbz/tbnz, if within range. */ + if (A64F_S_OK(delta, 14)) { + *p = A64I_LE((ins & 0xfff8001fu) | A64F_S14(delta)); + if (!cstart) cstart = p; + } + } + } + /* Always patch long-range branch in exit stub itself. Except, if we can't. */ + if (patchlong) { + ptrdiff_t delta = target - px; + lj_assertJ(A64F_S_OK(delta, 26), "branch target out of range"); + *px = A64I_B | A64F_S26(delta); + if (!cstart) cstart = px; + } + if (cstart) lj_mcode_sync(cstart, px+1); + lj_mcode_patch(J, mcarea, 1); +} + diff --git a/src/lj_asm_mips.h b/src/lj_asm_mips.h index c0e491a6..1686b40f 100644 --- a/src/lj_asm_mips.h +++ b/src/lj_asm_mips.h @@ -23,7 +23,7 @@ static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) { Reg r = IR(ref)->r; if (ra_noreg(r)) { - if (!(allow & RSET_FPR) && irref_isk(ref) && IR(ref)->i == 0) + if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(as, ref) == 0) return RID_ZERO; r = ra_allocref(as, ref, allow); } else { @@ -64,17 +64,29 @@ static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) /* Setup spare long-range jump slots per mcarea. */ static void asm_sparejump_setup(ASMState *as) { - MCode *mxp = as->mcbot; - if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == sizeof(MCLink)) { - lua_assert(MIPSI_NOP == 0); + MCode *mxp = as->mctop; + if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) { + mxp -= MIPS_SPAREJUMP*2; + lj_assertA(MIPSI_NOP == 0, "bad NOP"); memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode)); - mxp += MIPS_SPAREJUMP*2; - lua_assert(mxp < as->mctop); - lj_mcode_sync(as->mcbot, mxp); - lj_mcode_commitbot(as->J, mxp); - as->mcbot = mxp; - as->mclim = as->mcbot + MCLIM_REDZONE; + as->mctop = mxp; + } +} + +static MCode *asm_sparejump_use(MCode *mcarea, MCode tjump) +{ + MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size); + int slot = MIPS_SPAREJUMP; + while (slot--) { + mxp -= 2; + if (*mxp == tjump) { + return mxp; + } else if (*mxp == MIPSI_NOP) { + *mxp = tjump; + return mxp; + } } + return NULL; } /* Setup exit stub after the end of each trace. */ @@ -84,7 +96,8 @@ static void asm_exitstub_setup(ASMState *as) /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */ *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno; *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu); - lua_assert(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0); + lj_assertA(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0, + "branch target out of range"); *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0; as->mctop = mxp; } @@ -101,7 +114,12 @@ static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt) as->invmcp = NULL; as->loopinv = 1; as->mcp = p+1; +#if !LJ_TARGET_MIPSR6 mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u); /* Invert cond. */ +#else + mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : + (mi>>28) == 4 ? 0x00800000u : 0x00010000u); /* Invert cond. */ +#endif target = p; /* Patch target later in asm_loop_fixup. */ } emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); @@ -165,9 +183,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) } else if (ir->o == IR_UREFC) { if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); - int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); - int32_t jgl = (intptr_t)J2G(as->J); - if ((uint32_t)(ofs-jgl) < 65536) { + intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv; + intptr_t jgl = (intptr_t)J2G(as->J); + if ((uintptr_t)(ofs-jgl) < 65536) { *ofsp = ofs-jgl-32768; return RID_JGL; } else { @@ -175,6 +193,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) return ra_allock(as, ofs-(int16_t)ofs, allow); } } + } else if (ir->o == IR_TMPREF) { + *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768); + return RID_JGL; } } *ofsp = 0; @@ -189,20 +210,21 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, Reg base; if (ra_noreg(ir->r) && canfuse(as, ir)) { if (ir->o == IR_ADD) { - int32_t ofs2; - if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) { + intptr_t ofs2; + if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2), + checki16(ofs2))) { ref = ir->op1; - ofs = ofs2; + ofs = (int32_t)ofs2; } } else if (ir->o == IR_STRREF) { - int32_t ofs2 = 65536; - lua_assert(ofs == 0); + intptr_t ofs2 = 65536; + lj_assertA(ofs == 0, "bad usage"); ofs = (int32_t)sizeof(GCstr); if (irref_isk(ir->op2)) { - ofs2 = ofs + IR(ir->op2)->i; + ofs2 = ofs + get_kval(as, ir->op2); ref = ir->op1; } else if (irref_isk(ir->op1)) { - ofs2 = ofs + IR(ir->op1)->i; + ofs2 = ofs + get_kval(as, ir->op1); ref = ir->op2; } if (!checki16(ofs2)) { @@ -210,7 +232,7 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, Reg right, left = ra_alloc2(as, ir, allow); right = (left >> 8); left &= 255; emit_hsi(as, mi, rt, RID_TMP, ofs); - emit_dst(as, MIPSI_ADDU, RID_TMP, left, right); + emit_dst(as, MIPSI_AADDU, RID_TMP, left, right); return; } ofs = ofs2; @@ -225,29 +247,43 @@ static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref, /* Generate a call to a C function. */ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) { - uint32_t n, nargs = CCI_NARGS(ci); - int32_t ofs = 16; + uint32_t n, nargs = CCI_XNARGS(ci); + int32_t ofs = LJ_32 ? 16 : 0; +#if LJ_SOFTFP + Reg gpr = REGARG_FIRSTGPR; +#else Reg gpr, fpr = REGARG_FIRSTFPR; +#endif if ((void *)ci->func) - emit_call(as, (void *)ci->func); + emit_call(as, (void *)ci->func, 1); +#if !LJ_SOFTFP for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) as->cost[gpr] = REGCOST(~0u, ASMREF_L); gpr = REGARG_FIRSTGPR; +#endif for (n = 0; n < nargs; n++) { /* Setup args. */ IRRef ref = args[n]; if (ref) { IRIns *ir = IR(ref); +#if !LJ_SOFTFP if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR && !(ci->flags & CCI_VARARG)) { - lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ + lj_assertA(rset_test(as->freeset, fpr), + "reg %d not free", fpr); /* Already evicted. */ ra_leftov(as, fpr, ref); - fpr += 2; - gpr += irt_isnum(ir->t) ? 2 : 1; - } else { + fpr += LJ_32 ? 2 : 1; + gpr += (LJ_32 && irt_isnum(ir->t)) ? 2 : 1; + } else +#endif + { +#if LJ_32 && !LJ_SOFTFP fpr = REGARG_LASTFPR+1; - if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1; +#endif + if (LJ_32 && irt_isnum(ir->t)) gpr = (gpr+1) & ~1; if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Already evicted. */ +#if !LJ_SOFTFP if (irt_isfp(ir->t)) { RegSet of = as->freeset; Reg r; @@ -256,31 +292,56 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) r = ra_alloc1(as, ref, RSET_FPR); as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); if (irt_isnum(ir->t)) { +#if LJ_32 emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1); emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r); - lua_assert(rset_test(as->freeset, gpr+1)); /* Already evicted. */ + lj_assertA(rset_test(as->freeset, gpr+1), + "reg %d not free", gpr+1); /* Already evicted. */ gpr += 2; +#else + emit_tg(as, MIPSI_DMFC1, gpr, r); + gpr++; fpr++; +#endif } else if (irt_isfloat(ir->t)) { emit_tg(as, MIPSI_MFC1, gpr, r); gpr++; +#if LJ_64 + fpr++; +#endif } - } else { + } else +#endif + { ra_leftov(as, gpr, ref); gpr++; +#if LJ_64 && !LJ_SOFTFP + fpr++; +#endif } } else { - Reg r = ra_alloc1z(as, ref, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + Reg r = ra_alloc1z(as, ref, !LJ_SOFTFP && irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); +#if LJ_32 if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; emit_spstore(as, ir, r, ofs); ofs += irt_isnum(ir->t) ? 8 : 4; +#else + emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isfp(ir->t) && !irt_is64(ir->t)) ? 4 : 0)); + ofs += 8; +#endif } } } else { +#if !LJ_SOFTFP fpr = REGARG_LASTFPR+1; - if (gpr <= REGARG_LASTGPR) +#endif + if (gpr <= REGARG_LASTGPR) { gpr++; - else - ofs += 4; +#if LJ_64 && !LJ_SOFTFP + fpr++; +#endif + } else { + ofs += LJ_32 ? 4 : 8; + } } checkmclim(as); } @@ -291,28 +352,38 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) { RegSet drop = RSET_SCRATCH; int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); +#if !LJ_SOFTFP if ((ci->flags & CCI_NOFPRCLOBBER)) drop &= ~RSET_FPR; +#endif if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); /* Dest reg handled below. */ if (hiop && ra_hasreg((ir+1)->r)) rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ ra_evictset(as, drop); /* Evictions must be performed first. */ if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); - if (irt_isfp(ir->t)) { + lj_assertA(!irt_ispri(ir->t), "PRI dest"); + if (!LJ_SOFTFP && irt_isfp(ir->t)) { if ((ci->flags & CCI_CASTU64)) { int32_t ofs = sps_scale(ir->s); Reg dest = ir->r; if (ra_hasreg(dest)) { ra_free(as, dest); ra_modified(as, dest); +#if LJ_32 emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1); emit_tg(as, MIPSI_MTC1, RID_RETLO, dest); +#else + emit_tg(as, MIPSI_DMTC1, RID_RET, dest); +#endif } if (ofs) { +#if LJ_32 emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0)); emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4)); +#else + emit_tsi(as, MIPSI_SD, RID_RET, RID_SP, ofs); +#endif } } else { ra_destreg(as, ir, RID_FPRET); @@ -325,15 +396,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) } } -static void asm_call(ASMState *as, IRIns *ir) -{ - IRRef args[CCI_NARGS_MAX]; - const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; - asm_collectargs(as, ir, ci, args); - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); -} - static void asm_callx(ASMState *as, IRIns *ir) { IRRef args[CCI_NARGS_MAX*2]; @@ -346,7 +408,7 @@ static void asm_callx(ASMState *as, IRIns *ir) func = ir->op2; irf = IR(func); if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } if (irref_isk(func)) { /* Call to constant address. */ - ci.func = (ASMFunction)(void *)(irf->i); + ci.func = (ASMFunction)(void *)get_kval(as, func); } else { /* Need specific register for indirect calls. */ Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); MCode *p = as->mcp; @@ -361,27 +423,23 @@ static void asm_callx(ASMState *as, IRIns *ir) asm_gencall(as, &ci, args); } -static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) -{ - const CCallInfo *ci = &lj_ir_callinfo[id]; - IRRef args[2]; - args[0] = ir->op1; - args[1] = ir->op2; - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); -} - +#if !LJ_SOFTFP static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) { /* The modified regs must match with the *.dasc implementation. */ RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)| - RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR); + RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR) +#if LJ_TARGET_MIPSR6 + |RID2RSET(RID_F21) +#endif + ; if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); ra_evictset(as, drop); ra_destreg(as, ir, RID_FPRET); - emit_call(as, (void *)lj_ir_callinfo[id].func); + emit_call(as, (void *)lj_ir_callinfo[id].func, 0); ra_leftov(as, REGARG_FIRSTFPR, ir->op1); } +#endif /* -- Returns ------------------------------------------------------------- */ @@ -390,25 +448,52 @@ static void asm_retf(ASMState *as, IRIns *ir) { Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); void *pc = ir_kptr(IR(ir->op2)); - int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); + int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); as->topslot -= (BCReg)delta; if ((int32_t)as->topslot < 0) as->topslot = 0; irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ emit_setgl(as, base, jit_base); emit_addptr(as, base, -8*delta); asm_guard(as, MIPSI_BNE, RID_TMP, - ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); - emit_tsi(as, MIPSI_LW, RID_TMP, base, -8); + ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base))); + emit_tsi(as, MIPSI_AL, RID_TMP, base, -8); } +/* -- Buffer operations --------------------------------------------------- */ + +#if LJ_HASBUFFER +static void asm_bufhdr_write(ASMState *as, Reg sb) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); + IRIns irgc; + irgc.ot = IRT(0, IRT_PGC); /* GC type. */ + emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); + if ((as->flags & JIT_F_MIPSXXR2)) { + emit_tsml(as, LJ_64 ? MIPSI_DINS : MIPSI_INS, RID_TMP, tmp, + lj_fls(SBUF_MASK_FLAG), 0); + } else { + emit_dst(as, MIPSI_OR, RID_TMP, RID_TMP, tmp); + emit_tsi(as, MIPSI_ANDI, tmp, tmp, SBUF_MASK_FLAG); + } + emit_getgl(as, RID_TMP, cur_L); + emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); +} +#endif + /* -- Type conversions ---------------------------------------------------- */ +#if !LJ_SOFTFP static void asm_tointg(ASMState *as, IRIns *ir, Reg left) { Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); Reg dest = ra_dest(as, ir, RSET_GPR); +#if !LJ_TARGET_MIPSR6 asm_guard(as, MIPSI_BC1F, 0, 0); emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left); +#else + asm_guard(as, MIPSI_BC1EQZ, 0, (tmp&31)); + emit_fgh(as, MIPSI_CMP_EQ_D, tmp, tmp, left); +#endif emit_fg(as, MIPSI_CVT_D_W, tmp, tmp); emit_tg(as, MIPSI_MFC1, dest, tmp); emit_fg(as, MIPSI_CVT_W_D, tmp, left); @@ -424,15 +509,57 @@ static void asm_tobit(ASMState *as, IRIns *ir) emit_tg(as, MIPSI_MFC1, dest, tmp); emit_fgh(as, MIPSI_ADD_D, tmp, left, right); } +#elif LJ_64 /* && LJ_SOFTFP */ +static void asm_tointg(ASMState *as, IRIns *ir, Reg r) +{ + /* The modified regs must match with the *.dasc implementation. */ + RegSet drop = RID2RSET(REGARG_FIRSTGPR)|RID2RSET(RID_RET)|RID2RSET(RID_RET+1)| + RID2RSET(RID_R1)|RID2RSET(RID_R12); + if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); + ra_evictset(as, drop); + /* Return values are in RID_RET (converted value) and RID_RET+1 (status). */ + ra_destreg(as, ir, RID_RET); + asm_guard(as, MIPSI_BNE, RID_RET+1, RID_ZERO); + emit_call(as, (void *)lj_ir_callinfo[IRCALL_lj_vm_tointg].func, 0); + if (r == RID_NONE) + ra_leftov(as, REGARG_FIRSTGPR, ir->op1); + else if (r != REGARG_FIRSTGPR) + emit_move(as, REGARG_FIRSTGPR, r); +} + +static void asm_tobit(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + emit_dta(as, MIPSI_SLL, dest, dest, 0); + asm_callid(as, ir, IRCALL_lj_vm_tobit); +} +#endif static void asm_conv(ASMState *as, IRIns *ir) { IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); +#if !LJ_SOFTFP32 int stfp = (st == IRT_NUM || st == IRT_FLOAT); +#endif +#if LJ_64 + int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); +#endif IRRef lref = ir->op1; - lua_assert(irt_type(ir->t) != st); - lua_assert(!(irt_isint64(ir->t) || - (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ +#if LJ_32 + /* 64 bit integer conversions are handled by SPLIT. */ + lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)), + "IR %04d has unsplit 64 bit type", + (int)(ir - as->ir) - REF_BIAS); +#endif +#if LJ_SOFTFP32 + /* FP conversions are handled by SPLIT. */ + lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), + "IR %04d has FP type", + (int)(ir - as->ir) - REF_BIAS); + /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ +#else + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); +#if !LJ_SOFTFP if (irt_isfp(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); if (stfp) { /* FP to FP conversion. */ @@ -448,27 +575,56 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); emit_fg(as, MIPSI_CVT_D_W, dest, dest); emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), - RSET_GPR); + (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); emit_tg(as, MIPSI_MTC1, RID_TMP, dest); emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left); emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); +#if LJ_64 + } else if(st == IRT_U64) { /* U64 to FP conversion. */ + /* if (x >= 1u<<63) y = (double)(int64_t)(x&(1u<<63)-1) + pow(2.0, 63) */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest)); + MCLabel l_end = emit_label(as); + if (irt_isfloat(ir->t)) { + emit_fgh(as, MIPSI_ADD_S, dest, dest, tmp); + emit_lsptr(as, MIPSI_LWC1, (tmp & 31), (void *)&as->J->k32[LJ_K32_2P63], + rset_exclude(RSET_GPR, left)); + emit_fg(as, MIPSI_CVT_S_L, dest, dest); + } else { + emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp); + emit_lsptr(as, MIPSI_LDC1, (tmp & 31), (void *)&as->J->k64[LJ_K64_2P63], + rset_exclude(RSET_GPR, left)); + emit_fg(as, MIPSI_CVT_D_L, dest, dest); + } + emit_branch(as, MIPSI_BGEZ, left, RID_ZERO, l_end); + emit_tg(as, MIPSI_DMTC1, RID_TMP, dest); + emit_tsml(as, MIPSI_DEXTM, RID_TMP, left, 30, 0); +#endif } else { /* Integer to FP conversion. */ Reg left = ra_alloc1(as, lref, RSET_GPR); +#if LJ_32 emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W, dest, dest); emit_tg(as, MIPSI_MTC1, left, dest); +#else + MIPSIns mi = irt_isfloat(ir->t) ? + (st64 ? MIPSI_CVT_S_L : MIPSI_CVT_S_W) : + (st64 ? MIPSI_CVT_D_L : MIPSI_CVT_D_W); + emit_fg(as, mi, dest, dest); + emit_tg(as, st64 ? MIPSI_DMTC1 : MIPSI_MTC1, left, dest); +#endif } } else if (stfp) { /* FP to integer conversion. */ if (irt_isguard(ir->t)) { /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); } else { Reg dest = ra_dest(as, ir, RSET_GPR); Reg left = ra_alloc1(as, lref, RSET_FPR); Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); - if (irt_isu32(ir->t)) { + if (irt_isu32(ir->t)) { /* FP to U32 conversion. */ /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */ emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP); emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000); @@ -479,25 +635,112 @@ static void asm_conv(ASMState *as, IRIns *ir) tmp, left, tmp); if (st == IRT_FLOAT) emit_lsptr(as, MIPSI_LWC1, (tmp & 31), - (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)), - RSET_GPR); + (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); else emit_lsptr(as, MIPSI_LDC1, (tmp & 31), - (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)), - RSET_GPR); + (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR); +#if LJ_64 + } else if (irt_isu64(ir->t)) { /* FP to U64 conversion. */ + MCLabel l_end; + emit_tg(as, MIPSI_DMFC1, dest, tmp); + l_end = emit_label(as); + /* For inputs >= 2^63 add -2^64 and convert again. */ + if (st == IRT_NUM) { + emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp); + emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp); + emit_lsptr(as, MIPSI_LDC1, (tmp & 31), + (void *)&as->J->k64[LJ_K64_M2P64], + rset_exclude(RSET_GPR, dest)); + emit_fg(as, MIPSI_TRUNC_L_D, tmp, left); /* Delay slot. */ +#if !LJ_TARGET_MIPSR6 + emit_branch(as, MIPSI_BC1T, 0, 0, l_end); + emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp); +#else + emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end); + emit_fgh(as, MIPSI_CMP_LT_D, left, left, tmp); +#endif + emit_lsptr(as, MIPSI_LDC1, (tmp & 31), + (void *)&as->J->k64[LJ_K64_2P63], + rset_exclude(RSET_GPR, dest)); + } else { + emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp); + emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp); + emit_lsptr(as, MIPSI_LWC1, (tmp & 31), + (void *)&as->J->k32[LJ_K32_M2P64], + rset_exclude(RSET_GPR, dest)); + emit_fg(as, MIPSI_TRUNC_L_S, tmp, left); /* Delay slot. */ +#if !LJ_TARGET_MIPSR6 + emit_branch(as, MIPSI_BC1T, 0, 0, l_end); + emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp); +#else + emit_branch(as, MIPSI_BC1NEZ, 0, (left&31), l_end); + emit_fgh(as, MIPSI_CMP_LT_S, left, left, tmp); +#endif + emit_lsptr(as, MIPSI_LWC1, (tmp & 31), + (void *)&as->J->k32[LJ_K32_2P63], + rset_exclude(RSET_GPR, dest)); + } +#endif } else { +#if LJ_32 emit_tg(as, MIPSI_MFC1, dest, tmp); emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D, tmp, left); +#else + MIPSIns mi = irt_is64(ir->t) ? + (st == IRT_NUM ? MIPSI_TRUNC_L_D : MIPSI_TRUNC_L_S) : + (st == IRT_NUM ? MIPSI_TRUNC_W_D : MIPSI_TRUNC_W_S); + emit_tg(as, irt_is64(ir->t) ? MIPSI_DMFC1 : MIPSI_MFC1, dest, left); + emit_fg(as, mi, left, left); +#endif } } - } else { + } else +#else + if (irt_isfp(ir->t)) { +#if LJ_64 && LJ_HASFFI + if (stfp) { /* FP to FP conversion. */ + asm_callid(as, ir, irt_isnum(ir->t) ? IRCALL_softfp_f2d : + IRCALL_softfp_d2f); + } else { /* Integer to FP conversion. */ + IRCallID cid = ((IRT_IS64 >> st) & 1) ? + (irt_isnum(ir->t) ? + (st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d) : + (st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f)) : + (irt_isnum(ir->t) ? + (st == IRT_INT ? IRCALL_softfp_i2d : IRCALL_softfp_ui2d) : + (st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f)); + asm_callid(as, ir, cid); + } +#else + asm_callid(as, ir, IRCALL_softfp_i2d); +#endif + } else if (stfp) { /* FP to integer conversion. */ + if (irt_isguard(ir->t)) { + /* Checked conversions are only supported from number to int. */ + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); + asm_tointg(as, ir, RID_NONE); + } else { + IRCallID cid = irt_is64(ir->t) ? + ((st == IRT_NUM) ? + (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) : + (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) : + ((st == IRT_NUM) ? + (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : + (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)); + asm_callid(as, ir, cid); + } + } else +#endif +#endif + { Reg dest = ra_dest(as, ir, RSET_GPR); if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); if ((ir->op2 & IRCONV_SEXT)) { - if ((as->flags & JIT_F_MIPS32R2)) { + if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left); } else { uint32_t shift = st == IRT_I8 ? 24 : 16; @@ -509,94 +752,171 @@ static void asm_conv(ASMState *as, IRIns *ir) (int32_t)(st == IRT_U8 ? 0xff : 0xffff)); } } else { /* 32/64 bit integer conversions. */ +#if LJ_32 /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ +#else + if (irt_is64(ir->t)) { + if (st64) { + /* 64/64 bit no-op (cast)*/ + ra_leftov(as, dest, lref); + } else { + Reg left = ra_alloc1(as, lref, RSET_GPR); + if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */ + emit_dta(as, MIPSI_SLL, dest, left, 0); + } else { /* 32 to 64 bit zero extension. */ + emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0); + } + } + } else { + if (st64 && !(ir->op2 & IRCONV_NONE)) { + /* This is either a 32 bit reg/reg mov which zeroes the hiword + ** or a load of the loword from a 64 bit address. + */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + emit_tsml(as, MIPSI_DEXT, dest, left, 31, 0); + } else { /* 32/32 bit no-op (cast). */ + /* Do nothing, but may need to move regs. */ + ra_leftov(as, dest, lref); + } + } +#endif } } } -#if LJ_HASFFI -static void asm_conv64(ASMState *as, IRIns *ir) -{ - IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); - IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); - IRCallID id; - const CCallInfo *ci; - IRRef args[2]; - args[LJ_BE?0:1] = ir->op1; - args[LJ_BE?1:0] = (ir-1)->op1; - if (st == IRT_NUM || st == IRT_FLOAT) { - id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); - ir--; - } else { - id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); - } - ci = &lj_ir_callinfo[id]; - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); -} -#endif - static void asm_strto(ASMState *as, IRIns *ir) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; IRRef args[2]; + int32_t ofs = 0; +#if LJ_SOFTFP32 + ra_evictset(as, RSET_SCRATCH); + if (ra_used(ir)) { + if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && + (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { + int i; + for (i = 0; i < 2; i++) { + Reg r = (ir+i)->r; + if (ra_hasreg(r)) { + ra_free(as, r); + ra_modified(as, r); + emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); + } + } + ofs = sps_scale(ir->s & ~1); + } else { + Reg rhi = ra_dest(as, ir+1, RSET_GPR); + Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); + emit_tsi(as, MIPSI_LW, rhi, RID_SP, ofs+(LJ_BE?0:4)); + emit_tsi(as, MIPSI_LW, rlo, RID_SP, ofs+(LJ_BE?4:0)); + } + } +#else RegSet drop = RSET_SCRATCH; if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ ra_evictset(as, drop); + ofs = sps_scale(ir->s); +#endif asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ args[0] = ir->op1; /* GCstr *str */ args[1] = ASMREF_TMP1; /* TValue *n */ asm_gencall(as, ci, args); /* Store the result to the spill slot or temp slots. */ - emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), - RID_SP, sps_scale(ir->s)); + emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), + RID_SP, ofs); } -/* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) +/* -- Memory references --------------------------------------------------- */ + +#if LJ_64 +/* Store tagged value for ref at base+ofs. */ +static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) { + RegSet allow = rset_exclude(RSET_GPR, base); IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ - ra_allockreg(as, i32ptr(ir_knum(ir)), dest); - else /* Otherwise force a spill and use the spill slot. */ - emit_tsi(as, MIPSI_ADDIU, dest, RID_SP, ra_spill(as, ir)); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "store of IR type %d", irt_type(ir->t)); + if (irref_isk(ref)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); + emit_tsi(as, MIPSI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs); } else { - /* Otherwise use g->tmptv to hold the TValue. */ - RegSet allow = rset_exclude(RSET_GPR, dest); - Reg type; - emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, offsetof(global_State, tmptv)-32768); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - emit_setgl(as, src, tmptv.gcr); + Reg src = ra_alloc1(as, ref, allow); + Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, + rset_exclude(allow, src)); + emit_tsi(as, MIPSI_SD, RID_TMP, base, ofs); + if (irt_isinteger(ir->t)) { + emit_dst(as, MIPSI_DADDU, RID_TMP, RID_TMP, type); + emit_tsml(as, MIPSI_DEXT, RID_TMP, src, 31, 0); + } else { + emit_dst(as, MIPSI_DADDU, RID_TMP, src, type); } - type = ra_allock(as, irt_toitype(ir->t), allow); - emit_setgl(as, type, tmptv.it); } } +#endif -static void asm_tostr(ASMState *as, IRIns *ir) +/* Get pointer to TValue. */ +static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) { - IRRef args[2]; - args[0] = ASMREF_L; - as->gcsteps++; - if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; - args[1] = ASMREF_TMP1; /* const lua_Number * */ - asm_setupresult(as, ir, ci); /* GCstr * */ - asm_gencall(as, ci, args); - asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); + int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768); + if ((mode & IRTMPREF_IN1)) { + IRIns *ir = IR(ref); + if (irt_isnum(ir->t)) { + if ((mode & IRTMPREF_OUT1)) { +#if LJ_SOFTFP + emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs); +#if LJ_64 + emit_setgl(as, ra_alloc1(as, ref, RSET_GPR), tmptv.u64); +#else + lj_assertA(irref_isk(ref), "unsplit FP op"); + emit_setgl(as, + ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR), + tmptv.u32.lo); + emit_setgl(as, + ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR), + tmptv.u32.hi); +#endif +#else + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs); + emit_tsi(as, MIPSI_SDC1, (src & 31), RID_JGL, tmpofs); +#endif + } else if (irref_isk(ref)) { + /* Use the number constant itself as a TValue. */ + ra_allockreg(as, igcptr(ir_knum(ir)), dest); + } else { +#if LJ_SOFTFP32 + lj_assertA(0, "unsplit FP op"); +#else + /* Otherwise force a spill and use the spill slot. */ + emit_tsi(as, MIPSI_AADDIU, dest, RID_SP, ra_spill(as, ir)); +#endif + } + } else { + /* Otherwise use g->tmptv to hold the TValue. */ +#if LJ_32 + Reg type; + emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, tmpofs); + if (!irt_ispri(ir->t)) { + Reg src = ra_alloc1(as, ref, RSET_GPR); + emit_setgl(as, src, tmptv.gcr); + } + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)) + type = ra_alloc1(as, ref+1, RSET_GPR); + else + type = ra_allock(as, (int32_t)irt_toitype(ir->t), RSET_GPR); + emit_setgl(as, type, tmptv.it); +#else + asm_tvstore64(as, dest, 0, ref); + emit_tsi(as, MIPSI_DADDIU, dest, RID_JGL, tmpofs); +#endif + } } else { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; - args[1] = ir->op1; /* int32_t k */ - asm_setupresult(as, ir, ci); /* GCstr * */ - asm_gencall(as, ci, args); + emit_tsi(as, MIPSI_AADDIU, dest, RID_JGL, tmpofs); } } -/* -- Memory references --------------------------------------------------- */ - static void asm_aref(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -608,14 +928,18 @@ static void asm_aref(ASMState *as, IRIns *ir) ofs += 8*IR(ir->op2)->i; if (checki16(ofs)) { base = ra_alloc1(as, refa, RSET_GPR); - emit_tsi(as, MIPSI_ADDIU, dest, base, ofs); + emit_tsi(as, MIPSI_AADDIU, dest, base, ofs); return; } } base = ra_alloc1(as, ir->op1, RSET_GPR); idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); - emit_dst(as, MIPSI_ADDU, dest, RID_TMP, base); +#if !LJ_TARGET_MIPSR6 + emit_dst(as, MIPSI_AADDU, dest, RID_TMP, base); emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3); +#else + emit_dst(as, MIPSI_ALSA | MIPSF_A(3-1), dest, idx, base); +#endif } /* Inlined hash lookup. Specialized for key type and for const keys. @@ -626,21 +950,25 @@ static void asm_aref(ASMState *as, IRIns *ir) ** } while ((n = nextnode(n))); ** return niltv(L); */ -static void asm_href(ASMState *as, IRIns *ir) +static void asm_href(ASMState *as, IRIns *ir, IROp merge) { RegSet allow = RSET_GPR; int destused = ra_used(ir); Reg dest = ra_dest(as, ir, allow); Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2; +#if LJ_64 + Reg cmp64 = RID_NONE; +#endif IRRef refkey = ir->op2; IRIns *irkey = IR(refkey); + int isk = irref_isk(refkey); IRType1 kt = irkey->t; uint32_t khash; MCLabel l_end, l_loop, l_next; rset_clear(allow, tab); - if (irt_isnum(kt)) { + if (!LJ_SOFTFP && irt_isnum(kt)) { key = ra_alloc1(as, refkey, RSET_FPR); tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); } else { @@ -648,31 +976,76 @@ static void asm_href(ASMState *as, IRIns *ir) key = ra_alloc1(as, refkey, allow); rset_clear(allow, key); } - type = ra_allock(as, irt_toitype(irkey->t), allow); - rset_clear(allow, type); +#if LJ_32 + if (LJ_SOFTFP && irkey[1].o == IR_HIOP) { + if (ra_hasreg((irkey+1)->r)) { + type = tmpnum = (irkey+1)->r; + tmp1 = ra_scratch(as, allow); + rset_clear(allow, tmp1); + ra_noweak(as, tmpnum); + } else { + type = tmpnum = ra_allocref(as, refkey+1, allow); + } + rset_clear(allow, tmpnum); + } else { + type = ra_allock(as, (int32_t)irt_toitype(kt), allow); + rset_clear(allow, type); + } +#endif } tmp2 = ra_scratch(as, allow); rset_clear(allow, tmp2); +#if LJ_64 + if (LJ_SOFTFP || !irt_isnum(kt)) { + /* Allocate cmp64 register used for 64-bit comparisons */ + if (LJ_SOFTFP && irt_isnum(kt)) { + cmp64 = key; + } else if (!isk && irt_isaddr(kt)) { + cmp64 = tmp2; + } else { + int64_t k; + if (isk && irt_isaddr(kt)) { + k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; + } else { + lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); + k = ~((int64_t)~irt_toitype(kt) << 47); + } + cmp64 = ra_allock(as, k, allow); + rset_clear(allow, cmp64); + } + } +#endif - /* Key not found in chain: load niltv. */ + /* Key not found in chain: jump to exit (if merged) or load niltv. */ l_end = emit_label(as); - if (destused) + as->invmcp = NULL; + if (merge == IR_NE) + asm_guard(as, MIPSI_B, RID_ZERO, RID_ZERO); + else if (destused) emit_loada(as, dest, niltvg(J2G(as->J))); - else - *--as->mcp = MIPSI_NOP; /* Follow hash chain until the end. */ emit_move(as, dest, tmp1); l_loop = --as->mcp; - emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, next)); + emit_tsi(as, MIPSI_AL, tmp1, dest, (int32_t)offsetof(Node, next)); l_next = emit_label(as); /* Type and value comparison. */ - if (irt_isnum(kt)) { + if (merge == IR_EQ) { /* Must match asm_guard(). */ + emit_ti(as, MIPSI_LI, RID_TMP, as->snapno); + l_end = asm_exitstub_addr(as); + } + if (!LJ_SOFTFP && irt_isnum(kt)) { +#if !LJ_TARGET_MIPSR6 emit_branch(as, MIPSI_BC1T, 0, 0, l_end); emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key); - emit_tg(as, MIPSI_MFC1, tmp1, key+1); +#else + emit_branch(as, MIPSI_BC1NEZ, 0, (tmpnum&31), l_end); + emit_fgh(as, MIPSI_CMP_EQ_D, tmpnum, tmpnum, key); +#endif + *--as->mcp = MIPSI_NOP; /* Avoid NaN comparison overhead. */ emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next); emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM); +#if LJ_32 emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n)); } else { if (irt_ispri(kt)) { @@ -685,36 +1058,52 @@ static void asm_href(ASMState *as, IRIns *ir) } emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it)); *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); +#else + emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 15); + emit_tg(as, MIPSI_DMTC1, tmp1, tmpnum); + emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); + } else { + emit_branch(as, MIPSI_BEQ, tmp1, cmp64, l_end); + emit_tsi(as, MIPSI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); + } + *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu); + if (!isk && irt_isaddr(kt)) { + type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow); + emit_dst(as, MIPSI_DADDU, tmp2, key, type); + rset_clear(allow, type); + } +#endif /* Load main position relative to tab->node into dest. */ - khash = irref_isk(refkey) ? ir_khash(irkey) : 1; + khash = isk ? ir_khash(as, irkey) : 1; if (khash == 0) { - emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); + emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node)); } else { Reg tmphash = tmp1; - if (irref_isk(refkey)) + if (isk) tmphash = ra_allock(as, khash, allow); - emit_dst(as, MIPSI_ADDU, dest, dest, tmp1); - lua_assert(sizeof(Node) == 24); + emit_dst(as, MIPSI_AADDU, dest, dest, tmp1); + lj_assertA(sizeof(Node) == 24, "bad Node size"); emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1); emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3); emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5); emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash); - emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node)); + emit_tsi(as, MIPSI_AL, dest, tab, (int32_t)offsetof(GCtab, node)); emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); - if (irref_isk(refkey)) { + if (isk) { /* Nothing to do. */ } else if (irt_isstr(kt)) { - emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash)); + emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid)); } else { /* Must match with hash*() in lj_tab.c. */ emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2); emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31); emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2); emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31); emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest); - if (irt_isnum(kt)) { +#if LJ_32 + if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); - if ((as->flags & JIT_F_MIPS32R2)) { + if ((as->flags & JIT_F_MIPSXXR2)) { emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); } else { emit_dst(as, MIPSI_OR, dest, dest, tmp1); @@ -722,13 +1111,35 @@ static void asm_href(ASMState *as, IRIns *ir) emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31); } emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); +#if LJ_SOFTFP + emit_ds(as, MIPSI_MOVE, tmp1, type); + emit_ds(as, MIPSI_MOVE, tmp2, key); +#else emit_tg(as, MIPSI_MFC1, tmp2, key); emit_tg(as, MIPSI_MFC1, tmp1, key+1); +#endif } else { emit_dst(as, MIPSI_XOR, tmp2, key, tmp1); emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31); emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow)); } +#else + emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1); + emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31); + if (irt_isnum(kt)) { + emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1); + emit_dta(as, MIPSI_DSRA32, tmp1, LJ_SOFTFP ? key : tmp1, 0); + emit_dta(as, MIPSI_SLL, tmp2, LJ_SOFTFP ? key : tmp1, 0); +#if !LJ_SOFTFP + emit_tg(as, MIPSI_DMFC1, tmp1, key); +#endif + } else { + checkmclim(as); + emit_dta(as, MIPSI_DSRA32, tmp1, tmp1, 0); + emit_dta(as, MIPSI_SLL, tmp2, key, 0); + emit_dst(as, MIPSI_DADDU, tmp1, key, type); + } +#endif } } } @@ -741,17 +1152,24 @@ static void asm_hrefk(ASMState *as, IRIns *ir) int32_t kofs = ofs + (int32_t)offsetof(Node, key); Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; Reg node = ra_alloc1(as, ir->op1, RSET_GPR); - Reg key = RID_NONE, type = RID_TMP, idx = node; RegSet allow = rset_exclude(RSET_GPR, node); + Reg idx = node; +#if LJ_32 + Reg key = RID_NONE, type = RID_TMP; int32_t lo, hi; - lua_assert(ofs % sizeof(Node) == 0); +#else + Reg key = ra_scratch(as, allow); + int64_t k; +#endif + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); if (ofs > 32736) { idx = dest; rset_clear(allow, dest); kofs = (int32_t)offsetof(Node, key); } else if (ra_hasreg(dest)) { - emit_tsi(as, MIPSI_ADDIU, dest, node, ofs); + emit_tsi(as, MIPSI_AADDIU, dest, node, ofs); } +#if LJ_32 if (!irt_ispri(irkey->t)) { key = ra_scratch(as, allow); rset_clear(allow, key); @@ -770,22 +1188,20 @@ nolo: asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO); if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0)); emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4)); - if (ofs > 32736) - emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow)); -} - -static void asm_newref(ASMState *as, IRIns *ir) -{ - if (ir->r != RID_SINK) { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; - IRRef args[3]; - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ir->op1; /* GCtab *t */ - args[2] = ASMREF_TMP1; /* cTValue *key */ - asm_setupresult(as, ir, ci); /* TValue * */ - asm_gencall(as, ci, args); - asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); +#else + if (irt_ispri(irkey->t)) { + lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); + k = ~((int64_t)~irt_toitype(irkey->t) << 47); + } else if (irt_isnum(irkey->t)) { + k = (int64_t)ir_knum(irkey)->u64; + } else { + k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); } + asm_guard(as, MIPSI_BNE, key, ra_allock(as, k, allow)); + emit_tsi(as, MIPSI_LD, key, idx, kofs); +#endif + if (ofs > 32736) + emit_tsi(as, MIPSI_AADDU, dest, node, ra_allock(as, ofs, allow)); } static void asm_uref(ASMState *as, IRIns *ir) @@ -794,30 +1210,31 @@ static void asm_uref(ASMState *as, IRIns *ir) if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; - emit_lsptr(as, MIPSI_LW, dest, v, RSET_GPR); + emit_lsptr(as, MIPSI_AL, dest, v, RSET_GPR); } else { Reg uv = ra_scratch(as, RSET_GPR); Reg func = ra_alloc1(as, ir->op1, RSET_GPR); if (ir->o == IR_UREFC) { asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_ADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); + emit_tsi(as, MIPSI_AADDIU, dest, uv, (int32_t)offsetof(GCupval, tv)); emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); } else { - emit_tsi(as, MIPSI_LW, dest, uv, (int32_t)offsetof(GCupval, v)); + emit_tsi(as, MIPSI_AL, dest, uv, (int32_t)offsetof(GCupval, v)); } - emit_tsi(as, MIPSI_LW, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); + emit_tsi(as, MIPSI_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) + + (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); } } static void asm_fref(ASMState *as, IRIns *ir) { UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); + lj_assertA(!ra_used(ir), "unfused FREF"); } static void asm_strref(ASMState *as, IRIns *ir) { +#if LJ_32 Reg dest = ra_dest(as, ir, RSET_GPR); IRRef ref = ir->op2, refk = ir->op1; int32_t ofs = (int32_t)sizeof(GCstr); @@ -849,49 +1266,79 @@ static void asm_strref(ASMState *as, IRIns *ir) else emit_dst(as, MIPSI_ADDU, dest, r, ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); +#else + RegSet allow = RSET_GPR; + Reg dest = ra_dest(as, ir, allow); + Reg base = ra_alloc1(as, ir->op1, allow); + IRIns *irr = IR(ir->op2); + int32_t ofs = sizeof(GCstr); + rset_clear(allow, base); + if (irref_isk(ir->op2) && checki16(ofs + irr->i)) { + emit_tsi(as, MIPSI_DADDIU, dest, base, ofs + irr->i); + } else { + emit_tsi(as, MIPSI_DADDIU, dest, dest, ofs); + emit_dst(as, MIPSI_DADDU, dest, base, ra_alloc1(as, ir->op2, allow)); + } +#endif } /* -- Loads and stores ---------------------------------------------------- */ -static MIPSIns asm_fxloadins(IRIns *ir) +static MIPSIns asm_fxloadins(ASMState *as, IRIns *ir) { + UNUSED(as); switch (irt_type(ir->t)) { case IRT_I8: return MIPSI_LB; case IRT_U8: return MIPSI_LBU; case IRT_I16: return MIPSI_LH; case IRT_U16: return MIPSI_LHU; - case IRT_NUM: return MIPSI_LDC1; - case IRT_FLOAT: return MIPSI_LWC1; - default: return MIPSI_LW; + case IRT_NUM: + lj_assertA(!LJ_SOFTFP32, "unsplit FP op"); + if (!LJ_SOFTFP) return MIPSI_LDC1; + /* fallthrough */ + case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_LWC1; + /* fallthrough */ + default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_LD : MIPSI_LW; } } -static MIPSIns asm_fxstoreins(IRIns *ir) +static MIPSIns asm_fxstoreins(ASMState *as, IRIns *ir) { + UNUSED(as); switch (irt_type(ir->t)) { case IRT_I8: case IRT_U8: return MIPSI_SB; case IRT_I16: case IRT_U16: return MIPSI_SH; - case IRT_NUM: return MIPSI_SDC1; - case IRT_FLOAT: return MIPSI_SWC1; - default: return MIPSI_SW; + case IRT_NUM: + lj_assertA(!LJ_SOFTFP32, "unsplit FP op"); + if (!LJ_SOFTFP) return MIPSI_SDC1; + /* fallthrough */ + case IRT_FLOAT: if (!LJ_SOFTFP) return MIPSI_SWC1; + /* fallthrough */ + default: return (LJ_64 && irt_is64(ir->t)) ? MIPSI_SD : MIPSI_SW; } } static void asm_fload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); - MIPSIns mi = asm_fxloadins(ir); + MIPSIns mi = asm_fxloadins(as, ir); + Reg idx; int32_t ofs; - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs); - return; + if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ + idx = RID_JGL; + ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); + } else { + idx = ra_alloc1(as, ir->op1, RSET_GPR); + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ + emit_tsi(as, MIPSI_AADDIU, dest, idx, ofs); + return; + } } + ofs = field_ofs[ir->op2]; } - ofs = field_ofs[ir->op2]; - lua_assert(!irt_isfp(ir->t)); + lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD"); emit_tsi(as, mi, dest, idx, ofs); } @@ -902,51 +1349,90 @@ static void asm_fstore(ASMState *as, IRIns *ir) IRIns *irf = IR(ir->op1); Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); int32_t ofs = field_ofs[irf->op2]; - MIPSIns mi = asm_fxstoreins(ir); - lua_assert(!irt_isfp(ir->t)); + MIPSIns mi = asm_fxstoreins(as, ir); + lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE"); emit_tsi(as, mi, src, idx, ofs); } } static void asm_xload(ASMState *as, IRIns *ir) { - Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); + Reg dest = ra_dest(as, ir, + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); + lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED), + "unaligned XLOAD"); + asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); } -static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) +static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) { if (ir->r != RID_SINK) { - Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, + Reg src = ra_alloc1z(as, ir->op2, + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); + asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, rset_exclude(RSET_GPR, src), ofs); } } +#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) + static void asm_ahuvload(ASMState *as, IRIns *ir) { - IRType1 t = ir->t; + int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP); Reg dest = RID_NONE, type = RID_TMP, idx; RegSet allow = RSET_GPR; int32_t ofs = 0; + IRType1 t = ir->t; + if (hiop) { + t.irt = IRT_NUM; + if (ra_used(ir+1)) { + type = ra_dest(as, ir+1, allow); + rset_clear(allow, type); + } + } if (ra_used(ir)) { - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); + lj_assertA((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t), + "bad load type %d", irt_type(ir->t)); + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); rset_clear(allow, dest); +#if LJ_64 + if (irt_isaddr(t)) + emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0); + else if (irt_isint(t)) + emit_dta(as, MIPSI_SLL, dest, dest, 0); +#endif } idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; rset_clear(allow, idx); if (irt_isnum(t)) { - asm_guard(as, MIPSI_BEQ, type, RID_ZERO); - emit_tsi(as, MIPSI_SLTIU, type, type, (int32_t)LJ_TISNUM); - if (ra_hasreg(dest)) - emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); + asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); + emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); } else { - asm_guard(as, MIPSI_BNE, type, ra_allock(as, irt_toitype(t), allow)); - if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0)); + asm_guard(as, MIPSI_BNE, type, + ra_allock(as, (int32_t)irt_toitype(t), allow)); + } +#if LJ_32 + if (ra_hasreg(dest)) { + if (!LJ_SOFTFP && irt_isnum(t)) + emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); + else + emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0)); } emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4)); +#else + if (ra_hasreg(dest)) { + if (!LJ_SOFTFP && irt_isnum(t)) { + emit_hsi(as, MIPSI_LDC1, dest, idx, ofs); + dest = type; + } + } else { + dest = type; + } + emit_dta(as, MIPSI_DSRA32, type, dest, 15); + emit_tsi(as, MIPSI_LD, dest, idx, ofs); +#endif } static void asm_ahustore(ASMState *as, IRIns *ir) @@ -956,81 +1442,184 @@ static void asm_ahustore(ASMState *as, IRIns *ir) int32_t ofs = 0; if (ir->r == RID_SINK) return; - if (irt_isnum(ir->t)) { - src = ra_alloc1(as, ir->op2, RSET_FPR); + if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { + src = ra_alloc1(as, ir->op2, LJ_SOFTFP ? RSET_GPR : RSET_FPR); + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + emit_hsi(as, LJ_SOFTFP ? MIPSI_SD : MIPSI_SDC1, src, idx, ofs); } else { +#if LJ_32 if (!irt_ispri(ir->t)) { src = ra_alloc1(as, ir->op2, allow); rset_clear(allow, src); } - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) + type = ra_alloc1(as, (ir+1)->op2, allow); + else + type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); rset_clear(allow, type); - } - idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - if (irt_isnum(ir->t)) { - emit_hsi(as, MIPSI_SDC1, src, idx, ofs); - } else { + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); if (ra_hasreg(src)) emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0)); emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4)); +#else + Reg tmp = RID_TMP; + if (irt_ispri(ir->t)) { + tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); + rset_clear(allow, tmp); + } else { + src = ra_alloc1(as, ir->op2, allow); + rset_clear(allow, src); + type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); + rset_clear(allow, type); + } + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + emit_tsi(as, MIPSI_SD, tmp, idx, ofs); + if (ra_hasreg(src)) { + if (irt_isinteger(ir->t)) { + emit_dst(as, MIPSI_DADDU, tmp, tmp, type); + emit_tsml(as, MIPSI_DEXT, tmp, src, 31, 0); + } else { + emit_dst(as, MIPSI_DADDU, tmp, src, type); + } + } +#endif } } static void asm_sload(ASMState *as, IRIns *ir) { - int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); - IRType1 t = ir->t; Reg dest = RID_NONE, type = RID_NONE, base; RegSet allow = RSET_GPR; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); - lua_assert(!irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); + IRType1 t = ir->t; +#if LJ_32 + int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); + int hiop = (LJ_SOFTFP32 && (ir+1)->o == IR_HIOP); + if (hiop) + t.irt = IRT_NUM; +#else + int32_t ofs = 8*((int32_t)ir->op1-2); +#endif + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); +#if LJ_SOFTFP32 + lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), + "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ + if (hiop && ra_used(ir+1)) { + type = ra_dest(as, ir+1, allow); + rset_clear(allow, type); + } +#else if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { - dest = ra_scratch(as, RSET_FPR); + dest = ra_scratch(as, LJ_SOFTFP ? allow : RSET_FPR); asm_tointg(as, ir, dest); t.irt = IRT_NUM; /* Continue with a regular number type check. */ - } else if (ra_used(ir)) { - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); + } else +#endif + if (ra_used(ir)) { + lj_assertA((LJ_SOFTFP32 ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t), + "bad SLOAD type %d", irt_type(ir->t)); + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); rset_clear(allow, dest); base = ra_alloc1(as, REF_BASE, allow); rset_clear(allow, base); - if ((ir->op2 & IRSLOAD_CONVERT)) { + if (!LJ_SOFTFP32 && (ir->op2 & IRSLOAD_CONVERT)) { if (irt_isint(t)) { - Reg tmp = ra_scratch(as, RSET_FPR); + Reg tmp = ra_scratch(as, LJ_SOFTFP ? RSET_GPR : RSET_FPR); +#if LJ_SOFTFP + ra_evictset(as, rset_exclude(RSET_SCRATCH, dest)); + ra_destreg(as, ir, RID_RET); + emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_d2i].func, 0); + if (tmp != REGARG_FIRSTGPR) + emit_move(as, REGARG_FIRSTGPR, tmp); +#else emit_tg(as, MIPSI_MFC1, dest, tmp); - emit_fg(as, MIPSI_CVT_W_D, tmp, tmp); + emit_fg(as, MIPSI_TRUNC_W_D, tmp, tmp); +#endif dest = tmp; t.irt = IRT_NUM; /* Check for original type. */ } else { Reg tmp = ra_scratch(as, RSET_GPR); +#if LJ_SOFTFP + ra_evictset(as, rset_exclude(RSET_SCRATCH, dest)); + ra_destreg(as, ir, RID_RET); + emit_call(as, (void *)lj_ir_callinfo[IRCALL_softfp_i2d].func, 0); + emit_dta(as, MIPSI_SLL, REGARG_FIRSTGPR, tmp, 0); +#else emit_fg(as, MIPSI_CVT_D_W, dest, dest); emit_tg(as, MIPSI_MTC1, tmp, dest); +#endif dest = tmp; t.irt = IRT_INT; /* Check for original type. */ } } +#if LJ_64 + else if (irt_isaddr(t)) { + /* Clear type from pointers. */ + emit_tsml(as, MIPSI_DEXTM, dest, dest, 14, 0); + } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { + /* Sign-extend integers. */ + emit_dta(as, MIPSI_SLL, dest, dest, 0); + } +#endif goto dotypecheck; } base = ra_alloc1(as, REF_BASE, allow); rset_clear(allow, base); dotypecheck: - if (irt_isnum(t)) { - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); - emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM); +#if LJ_32 + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + if (ra_noreg(type)) type = RID_TMP; + if (irt_isnum(t)) { + asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); + emit_tsi(as, MIPSI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); + } else { + Reg ktype = ra_allock(as, (ir->op2 & IRSLOAD_KEYINDEX) ? LJ_KEYINDEX : irt_toitype(t), allow); + asm_guard(as, MIPSI_BNE, type, ktype); } - if (ra_hasreg(dest)) emit_hsi(as, MIPSI_LDC1, dest, base, ofs); - } else { - if ((ir->op2 & IRSLOAD_TYPECHECK)) { - Reg ktype = ra_allock(as, irt_toitype(t), allow); - asm_guard(as, MIPSI_BNE, RID_TMP, ktype); - type = RID_TMP; + } + if (ra_hasreg(dest)) { + if (!LJ_SOFTFP && irt_isnum(t)) + emit_hsi(as, MIPSI_LDC1, dest, base, ofs); + else + emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0)); + } + if (ra_hasreg(type)) + emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4)); +#else + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + type = dest < RID_MAX_GPR ? dest : RID_TMP; + if (irt_ispri(t)) { + asm_guard(as, MIPSI_BNE, type, + ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow)); + } else if ((ir->op2 & IRSLOAD_KEYINDEX)) { + asm_guard(as, MIPSI_BNE, RID_TMP, + ra_allock(as, (int32_t)LJ_KEYINDEX, allow)); + emit_dta(as, MIPSI_DSRA32, RID_TMP, type, 0); + } else { + if (irt_isnum(t)) { + asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO); + emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM); + if (!LJ_SOFTFP && ra_hasreg(dest)) + emit_hsi(as, MIPSI_LDC1, dest, base, ofs); + } else { + asm_guard(as, MIPSI_BNE, RID_TMP, + ra_allock(as, (int32_t)irt_toitype(t), allow)); + } + emit_dta(as, MIPSI_DSRA32, RID_TMP, type, 15); } - if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0)); + emit_tsi(as, MIPSI_LD, type, base, ofs); + } else if (ra_hasreg(dest)) { + if (!LJ_SOFTFP && irt_isnum(t)) + emit_hsi(as, MIPSI_LDC1, dest, base, ofs); + else + emit_tsi(as, irt_isint(t) ? MIPSI_LW : MIPSI_LD, dest, base, + ofs ^ ((LJ_BE && irt_isint(t)) ? 4 : 0)); } - if (ra_hasreg(type)) emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4)); +#endif } /* -- Allocations --------------------------------------------------------- */ @@ -1039,19 +1628,16 @@ dotypecheck: static void asm_cnew(ASMState *as, IRIns *ir) { CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; - CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? - lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; + CTypeID id = (CTypeID)IR(ir->op1)->i; + CTSize sz; + CTInfo info = lj_ctype_info(cts, id, &sz); const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[2]; - RegSet allow = (RSET_GPR & ~RSET_SCRATCH); + IRRef args[4]; RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID); + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ as->gcsteps++; - if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); /* Dest reg handled below. */ ra_evictset(as, drop); @@ -1060,11 +1646,12 @@ static void asm_cnew(ASMState *as, IRIns *ir) /* Initialize immutable cdata object. */ if (ir->o == IR_CNEWI) { + RegSet allow = (RSET_GPR & ~RSET_SCRATCH); +#if LJ_32 int32_t ofs = sizeof(GCcdata); - lua_assert(sz == 4 || sz == 8); if (sz == 8) { ofs += 4; - lua_assert((ir+1)->o == IR_HIOP); + lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI"); if (LJ_LE) ir++; } for (;;) { @@ -1074,18 +1661,33 @@ static void asm_cnew(ASMState *as, IRIns *ir) if (ofs == sizeof(GCcdata)) break; ofs -= 4; if (LJ_BE) ir++; else ir--; } +#else + emit_tsi(as, sz == 8 ? MIPSI_SD : MIPSI_SW, ra_alloc1(as, ir->op2, allow), + RID_RET, sizeof(GCcdata)); +#endif + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); + } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ + ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* CTypeID id */ + args[2] = ir->op2; /* CTSize sz */ + args[3] = ASMREF_TMP1; /* CTSize align */ + asm_gencall(as, ci, args); + emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); + return; } + /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA); - emit_ti(as, MIPSI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ + emit_ti(as, MIPSI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ASMREF_TMP1; /* MSize size */ asm_gencall(as, ci, args); ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ra_releasetmp(as, ASMREF_TMP1)); } -#else -#define asm_cnew(as, ir) ((void)0) #endif /* -- Write barriers ------------------------------------------------------ */ @@ -1096,7 +1698,7 @@ static void asm_tbar(ASMState *as, IRIns *ir) Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); Reg link = RID_TMP; MCLabel l_end = emit_label(as); - emit_tsi(as, MIPSI_SW, link, tab, (int32_t)offsetof(GCtab, gclist)); + emit_tsi(as, MIPSI_AS, link, tab, (int32_t)offsetof(GCtab, gclist)); emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); emit_setgl(as, tab, gc.grayagain); emit_getgl(as, link, gc.grayagain); @@ -1113,13 +1715,13 @@ static void asm_obar(ASMState *as, IRIns *ir) MCLabel l_end; Reg obj, val, tmp; /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); ra_evictset(as, RSET_SCRATCH); l_end = emit_label(as); args[0] = ASMREF_TMP1; /* global_State *g */ args[1] = ir->op1; /* TValue *tv */ asm_gencall(as, ci, args); - emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); + emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); obj = IR(ir->op1)->r; tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end); @@ -1134,6 +1736,7 @@ static void asm_obar(ASMState *as, IRIns *ir) /* -- Arithmetic and logic operations ------------------------------------- */ +#if !LJ_SOFTFP static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi) { Reg dest = ra_dest(as, ir, RSET_FPR); @@ -1148,83 +1751,147 @@ static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi) Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); emit_fg(as, mi, dest, left); } +#endif -static int asm_fpjoin_pow(ASMState *as, IRIns *ir) -{ - IRIns *irp = IR(ir->op1); - if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { - IRIns *irpp = IR(irp->op1); - if (irpp == ir-2 && irpp->o == IR_FPMATH && - irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; - IRRef args[2]; - args[0] = irpp->op1; - args[1] = irp->op2; - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); - return 1; - } - } - return 0; +#if !LJ_SOFTFP32 +static void asm_fpmath(ASMState *as, IRIns *ir) +{ +#if !LJ_SOFTFP + if (ir->op2 <= IRFPM_TRUNC) + asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); + else if (ir->op2 == IRFPM_SQRT) + asm_fpunary(as, ir, MIPSI_SQRT_D); + else +#endif + asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); } +#endif + +#if !LJ_SOFTFP +#define asm_fpadd(as, ir) asm_fparith(as, ir, MIPSI_ADD_D) +#define asm_fpsub(as, ir) asm_fparith(as, ir, MIPSI_SUB_D) +#define asm_fpmul(as, ir) asm_fparith(as, ir, MIPSI_MUL_D) +#elif LJ_64 /* && LJ_SOFTFP */ +#define asm_fpadd(as, ir) asm_callid(as, ir, IRCALL_softfp_add) +#define asm_fpsub(as, ir) asm_callid(as, ir, IRCALL_softfp_sub) +#define asm_fpmul(as, ir) asm_callid(as, ir, IRCALL_softfp_mul) +#endif static void asm_add(ASMState *as, IRIns *ir) { - if (irt_isnum(ir->t)) { - asm_fparith(as, ir, MIPSI_ADD_D); - } else { + IRType1 t = ir->t; +#if !LJ_SOFTFP32 + if (irt_isnum(t)) { + asm_fpadd(as, ir); + } else +#endif + { + /* TODO MIPSR6: Fuse ADD(BSHL(a,1-4),b) or ADD(ADD(a,a),b) to MIPSI_ALSA. */ Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; + intptr_t k = get_kval(as, ir->op2); if (checki16(k)) { - emit_tsi(as, MIPSI_ADDIU, dest, left, k); + emit_tsi(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDIU : MIPSI_ADDIU, dest, + left, k); return; } } right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_dst(as, MIPSI_ADDU, dest, left, right); + emit_dst(as, (LJ_64 && irt_is64(t)) ? MIPSI_DADDU : MIPSI_ADDU, dest, + left, right); } } static void asm_sub(ASMState *as, IRIns *ir) { +#if !LJ_SOFTFP32 if (irt_isnum(ir->t)) { - asm_fparith(as, ir, MIPSI_SUB_D); - } else { + asm_fpsub(as, ir); + } else +#endif + { Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_alloc2(as, ir, RSET_GPR); right = (left >> 8); left &= 255; - emit_dst(as, MIPSI_SUBU, dest, left, right); + emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest, + left, right); } } static void asm_mul(ASMState *as, IRIns *ir) { +#if !LJ_SOFTFP32 if (irt_isnum(ir->t)) { - asm_fparith(as, ir, MIPSI_MUL_D); - } else { + asm_fpmul(as, ir); + } else +#endif + { Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_alloc2(as, ir, RSET_GPR); right = (left >> 8); left &= 255; - emit_dst(as, MIPSI_MUL, dest, left, right); + if (LJ_64 && irt_is64(ir->t)) { +#if !LJ_TARGET_MIPSR6 + emit_dst(as, MIPSI_MFLO, dest, 0, 0); + emit_dst(as, MIPSI_DMULT, 0, left, right); +#else + emit_dst(as, MIPSI_DMUL, dest, left, right); +#endif + } else { + emit_dst(as, MIPSI_MUL, dest, left, right); + } } } +#if !LJ_SOFTFP32 +static void asm_fpdiv(ASMState *as, IRIns *ir) +{ +#if !LJ_SOFTFP + asm_fparith(as, ir, MIPSI_DIV_D); +#else + asm_callid(as, ir, IRCALL_softfp_div); +#endif +} +#endif + static void asm_neg(ASMState *as, IRIns *ir) { +#if !LJ_SOFTFP if (irt_isnum(ir->t)) { asm_fpunary(as, ir, MIPSI_NEG_D); - } else { + } else +#elif LJ_64 /* && LJ_SOFTFP */ + if (irt_isnum(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + emit_dst(as, MIPSI_XOR, dest, left, + ra_allock(as, 0x8000000000000000ll, rset_exclude(RSET_GPR, dest))); + } else +#endif + { Reg dest = ra_dest(as, ir, RSET_GPR); Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left); + emit_dst(as, (LJ_64 && irt_is64(ir->t)) ? MIPSI_DSUBU : MIPSI_SUBU, dest, + RID_ZERO, left); } } +#if !LJ_SOFTFP +#define asm_abs(as, ir) asm_fpunary(as, ir, MIPSI_ABS_D) +#elif LJ_64 /* && LJ_SOFTFP */ +static void asm_abs(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); + emit_tsml(as, MIPSI_DEXTM, dest, left, 30, 0); +} +#endif + static void asm_arithov(ASMState *as, IRIns *ir) { + /* TODO MIPSR6: bovc/bnvc. Caveat: no delay slot to load RID_TMP. */ Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); + lj_assertA(!irt_is64(ir->t), "bad usage"); if (irref_isk(ir->op2)) { int k = IR(ir->op2)->i; if (ir->o == IR_SUBOV) k = -k; @@ -1255,16 +1922,29 @@ static void asm_arithov(ASMState *as, IRIns *ir) emit_move(as, RID_TMP, dest == left ? left : right); } +#define asm_addov(as, ir) asm_arithov(as, ir) +#define asm_subov(as, ir) asm_arithov(as, ir) + static void asm_mulov(ASMState *as, IRIns *ir) { -#if LJ_DUALNUM -#error "NYI: MULOV" + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), + right), dest)); + asm_guard(as, MIPSI_BNE, RID_TMP, tmp); + emit_dta(as, MIPSI_SRA, RID_TMP, dest, 31); +#if !LJ_TARGET_MIPSR6 + emit_dst(as, MIPSI_MFHI, tmp, 0, 0); + emit_dst(as, MIPSI_MFLO, dest, 0, 0); + emit_dst(as, MIPSI_MULT, 0, left, right); #else - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused in single-number mode. */ + emit_dst(as, MIPSI_MUL, dest, left, right); + emit_dst(as, MIPSI_MUH, tmp, left, right); #endif } -#if LJ_HASFFI +#if LJ_32 && LJ_HASFFI static void asm_add64(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -1348,7 +2028,7 @@ static void asm_neg64(ASMState *as, IRIns *ir) } #endif -static void asm_bitnot(ASMState *as, IRIns *ir) +static void asm_bnot(ASMState *as, IRIns *ir) { Reg left, right, dest = ra_dest(as, ir, RSET_GPR); IRIns *irl = IR(ir->op1); @@ -1362,11 +2042,12 @@ static void asm_bitnot(ASMState *as, IRIns *ir) emit_dst(as, MIPSI_NOR, dest, left, right); } -static void asm_bitswap(ASMState *as, IRIns *ir) +static void asm_bswap(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - if ((as->flags & JIT_F_MIPS32R2)) { +#if LJ_32 + if ((as->flags & JIT_F_MIPSXXR2)) { emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); } else { @@ -1381,6 +2062,15 @@ static void asm_bitswap(ASMState *as, IRIns *ir) emit_dta(as, MIPSI_SRL, tmp, left, 24); emit_dta(as, MIPSI_SLL, RID_TMP, left, 24); } +#else + if (irt_is64(ir->t)) { + emit_dst(as, MIPSI_DSHD, dest, 0, RID_TMP); + emit_dst(as, MIPSI_DSBH, RID_TMP, 0, left); + } else { + emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16); + emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left); + } +#endif } static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) @@ -1388,7 +2078,7 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; + intptr_t k = get_kval(as, ir->op2); if (checku16(k)) { emit_tsi(as, mik, dest, left, k); return; @@ -1398,22 +2088,34 @@ static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) emit_dst(as, mi, dest, left, right); } +#define asm_band(as, ir) asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI) +#define asm_bor(as, ir) asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI) +#define asm_bxor(as, ir) asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI) + static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik) { Reg dest = ra_dest(as, ir, RSET_GPR); if (irref_isk(ir->op2)) { /* Constant shifts. */ - uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31); - emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), shift); + uint32_t shift = (uint32_t)IR(ir->op2)->i; + if (LJ_64 && irt_is64(ir->t)) mik |= (shift & 32) ? MIPSI_D32 : MIPSI_D; + emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), + (shift & 31)); } else { Reg right, left = ra_alloc2(as, ir, RSET_GPR); right = (left >> 8); left &= 255; + if (LJ_64 && irt_is64(ir->t)) mi |= MIPSI_DV; emit_dst(as, mi, dest, right, left); /* Shift amount is in rs. */ } } -static void asm_bitror(ASMState *as, IRIns *ir) +#define asm_bshl(as, ir) asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL) +#define asm_bshr(as, ir) asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL) +#define asm_bsar(as, ir) asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA) +#define asm_brol(as, ir) lj_assertA(0, "unexpected BROL") + +static void asm_bror(ASMState *as, IRIns *ir) { - if ((as->flags & JIT_F_MIPS32R2)) { + if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR); } else { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -1432,55 +2134,182 @@ static void asm_bitror(ASMState *as, IRIns *ir) } } +#if LJ_SOFTFP +static void asm_sfpmin_max(ASMState *as, IRIns *ir) +{ + CCallInfo ci = lj_ir_callinfo[(IROp)ir->o == IR_MIN ? IRCALL_lj_vm_sfmin : IRCALL_lj_vm_sfmax]; +#if LJ_64 + IRRef args[2]; + args[0] = ir->op1; + args[1] = ir->op2; +#else + IRRef args[4]; + args[0^LJ_BE] = ir->op1; + args[1^LJ_BE] = (ir+1)->op1; + args[2^LJ_BE] = ir->op2; + args[3^LJ_BE] = (ir+1)->op2; +#endif + asm_setupresult(as, ir, &ci); + emit_call(as, (void *)ci.func, 0); + ci.func = NULL; + asm_gencall(as, &ci, args); +} +#endif + static void asm_min_max(ASMState *as, IRIns *ir, int ismax) { - if (irt_isnum(ir->t)) { + if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { +#if LJ_SOFTFP + asm_sfpmin_max(as, ir); +#else Reg dest = ra_dest(as, ir, RSET_FPR); Reg right, left = ra_alloc2(as, ir, RSET_FPR); right = (left >> 8); left &= 255; +#if !LJ_TARGET_MIPSR6 if (dest == left) { - emit_fg(as, MIPSI_MOVT_D, dest, right); + emit_fg(as, MIPSI_MOVF_D, dest, right); } else { - emit_fg(as, MIPSI_MOVF_D, dest, left); + emit_fg(as, MIPSI_MOVT_D, dest, left); if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right); } - emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left); + emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? right : left, ismax ? left : right); +#else + emit_fgh(as, ismax ? MIPSI_MAX_D : MIPSI_MIN_D, dest, left, right); +#endif +#endif } else { Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_alloc2(as, ir, RSET_GPR); right = (left >> 8); left &= 255; - if (dest == left) { - emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); + if (left == right) { + if (dest != left) emit_move(as, dest, left); } else { - emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); - if (dest != right) emit_move(as, dest, right); +#if !LJ_TARGET_MIPSR6 + if (dest == left) { + emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP); + } else { + emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP); + if (dest != right) emit_move(as, dest, right); + } +#else + emit_dst(as, MIPSI_OR, dest, dest, RID_TMP); + if (dest != right) { + emit_dst(as, MIPSI_SELNEZ, RID_TMP, right, RID_TMP); + emit_dst(as, MIPSI_SELEQZ, dest, left, RID_TMP); + } else { + emit_dst(as, MIPSI_SELEQZ, RID_TMP, left, RID_TMP); + emit_dst(as, MIPSI_SELNEZ, dest, right, RID_TMP); + } +#endif + emit_dst(as, MIPSI_SLT, RID_TMP, + ismax ? left : right, ismax ? right : left); } - emit_dst(as, MIPSI_SLT, RID_TMP, - ismax ? left : right, ismax ? right : left); } } +#define asm_min(as, ir) asm_min_max(as, ir, 0) +#define asm_max(as, ir) asm_min_max(as, ir, 1) + /* -- Comparisons --------------------------------------------------------- */ +#if LJ_SOFTFP +/* SFP comparisons. */ +static void asm_sfpcomp(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; + RegSet drop = RSET_SCRATCH; + Reg r; +#if LJ_64 + IRRef args[2]; + args[0] = ir->op1; + args[1] = ir->op2; +#else + IRRef args[4]; + args[LJ_LE ? 0 : 1] = ir->op1; args[LJ_LE ? 1 : 0] = (ir+1)->op1; + args[LJ_LE ? 2 : 3] = ir->op2; args[LJ_LE ? 3 : 2] = (ir+1)->op2; +#endif + + for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+(LJ_64?1:3); r++) { + if (!rset_test(as->freeset, r) && + regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) + rset_clear(drop, r); + } + ra_evictset(as, drop); + + asm_setupresult(as, ir, ci); + + switch ((IROp)ir->o) { + case IR_LT: + asm_guard(as, MIPSI_BGEZ, RID_RET, 0); + break; + case IR_ULT: + asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); + emit_loadi(as, RID_TMP, 1); + asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO); + break; + case IR_GE: + asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); + emit_loadi(as, RID_TMP, 2); + asm_guard(as, MIPSI_BLTZ, RID_RET, 0); + break; + case IR_LE: + asm_guard(as, MIPSI_BGTZ, RID_RET, 0); + break; + case IR_GT: + asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); + emit_loadi(as, RID_TMP, 2); + asm_guard(as, MIPSI_BLEZ, RID_RET, 0); + break; + case IR_UGE: + asm_guard(as, MIPSI_BLTZ, RID_RET, 0); + break; + case IR_ULE: + asm_guard(as, MIPSI_BEQ, RID_RET, RID_TMP); + emit_loadi(as, RID_TMP, 1); + break; + case IR_UGT: case IR_ABC: + asm_guard(as, MIPSI_BLEZ, RID_RET, 0); + break; + case IR_EQ: case IR_NE: + asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_RET, RID_ZERO); + default: + break; + } + asm_gencall(as, ci, args); +} +#endif + static void asm_comp(ASMState *as, IRIns *ir) { /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ IROp op = ir->o; - if (irt_isnum(ir->t)) { + if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { +#if LJ_SOFTFP + asm_sfpcomp(as, ir); +#else +#if !LJ_TARGET_MIPSR6 Reg right, left = ra_alloc2(as, ir, RSET_FPR); right = (left >> 8); left &= 255; asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right); +#else + Reg tmp, right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right)); + asm_guard(as, (op&1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31)); + emit_fgh(as, MIPSI_CMP_LT_D + ((op&3) ^ ((op>>2)&1)), tmp, left, right); +#endif +#endif } else { Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); if (op == IR_ABC) op = IR_UGT; - if ((op&4) == 0 && irref_isk(ir->op2) && IR(ir->op2)->i == 0) { + if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) { MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) : ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ); asm_guard(as, mi, left, 0); } else { if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; + intptr_t k = get_kval(as, ir->op2); if ((op&2)) k++; if (checki16(k)) { asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO); @@ -1497,19 +2326,28 @@ static void asm_comp(ASMState *as, IRIns *ir) } } -static void asm_compeq(ASMState *as, IRIns *ir) +static void asm_equal(ASMState *as, IRIns *ir) { - Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR); + Reg right, left = ra_alloc2(as, ir, (!LJ_SOFTFP && irt_isnum(ir->t)) ? + RSET_FPR : RSET_GPR); right = (left >> 8); left &= 255; - if (irt_isnum(ir->t)) { + if (!LJ_SOFTFP32 && irt_isnum(ir->t)) { +#if LJ_SOFTFP + asm_sfpcomp(as, ir); +#elif !LJ_TARGET_MIPSR6 asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0); emit_fgh(as, MIPSI_C_EQ_D, 0, left, right); +#else + Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_FPR, left), right)); + asm_guard(as, (ir->o & 1) ? MIPSI_BC1NEZ : MIPSI_BC1EQZ, 0, (tmp&31)); + emit_fgh(as, MIPSI_CMP_EQ_D, tmp, left, right); +#endif } else { asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right); } } -#if LJ_HASFFI +#if LJ_32 && LJ_HASFFI /* 64 bit integer comparisons. */ static void asm_comp64(ASMState *as, IRIns *ir) { @@ -1546,54 +2384,99 @@ static void asm_comp64eq(ASMState *as, IRIns *ir) } #endif -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ +/* -- Split register ops -------------------------------------------------- */ -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ +/* Hiword op of a split 32/32 or 64/64 bit op. Previous op is the loword op. */ static void asm_hiop(ASMState *as, IRIns *ir) { -#if LJ_HASFFI /* HIOP is marked as a store because it needs its own DCE logic. */ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; +#if LJ_32 && (LJ_HASFFI || LJ_SOFTFP) if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ as->curins--; /* Always skip the CONV. */ +#if LJ_HASFFI && !LJ_SOFTFP if (usehi || uselo) asm_conv64(as, ir); return; +#endif } else if ((ir-1)->o < IR_EQ) { /* 64 bit integer comparisons. ORDER IR. */ as->curins--; /* Always skip the loword comparison. */ +#if LJ_SOFTFP + if (!irt_isint(ir->t)) { + asm_sfpcomp(as, ir-1); + return; + } +#endif +#if LJ_HASFFI asm_comp64(as, ir); +#endif return; } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ as->curins--; /* Always skip the loword comparison. */ +#if LJ_SOFTFP + if (!irt_isint(ir->t)) { + asm_sfpcomp(as, ir-1); + return; + } +#endif +#if LJ_HASFFI asm_comp64eq(as, ir); +#endif + return; +#if LJ_SOFTFP + } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { + as->curins--; /* Always skip the loword min/max. */ + if (uselo || usehi) + asm_sfpmin_max(as, ir-1); return; +#endif } else if ((ir-1)->o == IR_XSTORE) { as->curins--; /* Handle both stores here. */ if ((ir-1)->r != RID_SINK) { - asm_xstore(as, ir, LJ_LE ? 4 : 0); - asm_xstore(as, ir-1, LJ_LE ? 0 : 4); + asm_xstore_(as, ir, LJ_LE ? 4 : 0); + asm_xstore_(as, ir-1, LJ_LE ? 0 : 4); } return; } +#endif if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ switch ((ir-1)->o) { +#if LJ_32 && LJ_HASFFI case IR_ADD: as->curins--; asm_add64(as, ir); break; case IR_SUB: as->curins--; asm_sub64(as, ir); break; case IR_NEG: as->curins--; asm_neg64(as, ir); break; - case IR_CALLN: - case IR_CALLXS: + case IR_CNEWI: + /* Nothing to do here. Handled by lo op itself. */ + break; +#endif +#if LJ_32 && LJ_SOFTFP + case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: + case IR_STRTO: if (!uselo) - ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ + ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ break; - case IR_CNEWI: + case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF: /* Nothing to do here. Handled by lo op itself. */ break; - default: lua_assert(0); break; - } -#else - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ #endif + case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: + if (!uselo) + ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ + break; + default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; + } +} + +/* -- Profiling ----------------------------------------------------------- */ + +static void asm_prof(ASMState *as, IRIns *ir) +{ + UNUSED(ir); + asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); + emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE); + emit_lsglptr(as, MIPSI_LBU, RID_TMP, + (int32_t)offsetof(global_State, hookmask)); } /* -- Stack handling ------------------------------------------------------ */ @@ -1606,47 +2489,70 @@ static void asm_stack_check(ASMState *as, BCReg topslot, Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; ExitNo oldsnap = as->snapno; rset_clear(allow, pbase); +#if LJ_32 tmp = allow ? rset_pickbot(allow) : (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); +#else + tmp = allow ? rset_pickbot(allow) : RID_RET; +#endif as->snapno = exitno; asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO); as->snapno = oldsnap; if (allow == RSET_EMPTY) /* Restore temp. register. */ - emit_tsi(as, MIPSI_LW, tmp, RID_SP, 0); + emit_tsi(as, MIPSI_AL, tmp, RID_SP, 0); else ra_modified(as, tmp); emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); - emit_dst(as, MIPSI_SUBU, RID_TMP, tmp, pbase); - emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack)); + emit_dst(as, MIPSI_ASUBU, RID_TMP, tmp, pbase); + emit_tsi(as, MIPSI_AL, tmp, tmp, offsetof(lua_State, maxstack)); if (pbase == RID_TMP) emit_getgl(as, RID_TMP, jit_base); - emit_getgl(as, tmp, jit_L); + emit_getgl(as, tmp, cur_L); if (allow == RSET_EMPTY) /* Spill temp. register. */ - emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0); + emit_tsi(as, MIPSI_AS, tmp, RID_SP, 0); } /* Restore Lua stack from on-trace state. */ static void asm_stack_restore(ASMState *as, SnapShot *snap) { SnapEntry *map = &as->T->snapmap[snap->mapofs]; - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; +#if LJ_32 || defined(LUA_USE_ASSERT) + SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; +#endif MSize n, nent = snap->nent; /* Store the value of all modified slots to the Lua stack. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1); + int32_t ofs = 8*((int32_t)s-1-LJ_FR2); IRRef ref = snap_ref(sn); IRIns *ir = IR(ref); if ((sn & SNAP_NORESTORE)) continue; if (irt_isnum(ir->t)) { +#if LJ_SOFTFP32 + Reg tmp; + RegSet allow = rset_exclude(RSET_GPR, RID_BASE); + /* LJ_SOFTFP: must be a number constant. */ + lj_assertA(irref_isk(ref), "unsplit FP op"); + tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); + emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); + if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); + tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); + emit_tsi(as, MIPSI_SW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); +#elif LJ_SOFTFP /* && LJ_64 */ + Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); + emit_tsi(as, MIPSI_SD, src, RID_BASE, ofs); +#else Reg src = ra_alloc1(as, ref, RSET_FPR); emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs); +#endif } else { - Reg type; +#if LJ_32 RegSet allow = rset_exclude(RSET_GPR, RID_BASE); - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); + Reg type; + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "restore of IR type %d", irt_type(ir->t)); if (!irt_ispri(ir->t)) { Reg src = ra_alloc1(as, ref, allow); rset_clear(allow, src); @@ -1655,14 +2561,38 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) if ((sn & (SNAP_CONT|SNAP_FRAME))) { if (s == 0) continue; /* Do not overwrite link to previous frame. */ type = ra_allock(as, (int32_t)(*flinks--), allow); +#if LJ_SOFTFP + } else if ((sn & SNAP_SOFTFPNUM)) { + type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); +#endif + } else if ((sn & SNAP_KEYINDEX)) { + type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow); } else { type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); } emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4)); +#else + if ((sn & SNAP_KEYINDEX)) { + RegSet allow = rset_exclude(RSET_GPR, RID_BASE); + int64_t kki = (int64_t)LJ_KEYINDEX << 32; + if (irref_isk(ref)) { + emit_tsi(as, MIPSI_SD, + ra_allock(as, kki | (int64_t)(uint32_t)ir->i, allow), + RID_BASE, ofs); + } else { + Reg src = ra_alloc1(as, ref, allow); + Reg rki = ra_allock(as, kki, rset_exclude(allow, src)); + emit_tsi(as, MIPSI_SD, RID_TMP, RID_BASE, ofs); + emit_dst(as, MIPSI_DADDU, RID_TMP, src, rki); + } + } else { + asm_tvstore64(as, RID_BASE, ofs, ref); + } +#endif } checkmclim(as); } - lua_assert(map + nent == flinks); + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); } /* -- GC handling --------------------------------------------------------- */ @@ -1686,7 +2616,7 @@ static void asm_gc_check(ASMState *as) args[1] = ASMREF_TMP2; /* MSize steps */ asm_gencall(as, ci, args); l_end[-3] = MIPS_NOPATCH_GC_CHECK; /* Replace the nop after the call. */ - emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); + emit_tsi(as, MIPSI_AADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); tmp = ra_releasetmp(as, ASMREF_TMP2); emit_loadi(as, tmp, as->gcsteps); /* Jump around GC step if GC total < GC threshold. */ @@ -1714,6 +2644,12 @@ static void asm_loop_fixup(ASMState *as) } } +/* Fixup the tail of the loop. */ +static void asm_loop_tail_fixup(ASMState *as) +{ + if (as->loopinv) as->mctop--; +} + /* -- Head of trace ------------------------------------------------------- */ /* Coalesce BASE register for a root trace. */ @@ -1721,7 +2657,6 @@ static void asm_head_root_base(ASMState *as) { IRIns *ir = IR(REF_BASE); Reg r = ir->r; - if (as->loopinv) as->mctop--; if (ra_hasreg(r)) { ra_free(as, r); if (rset_test(as->modset, r) || irt_ismarked(ir->t)) @@ -1736,7 +2671,6 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) { IRIns *ir = IR(REF_BASE); Reg r = ir->r; - if (as->loopinv) as->mctop--; if (ra_hasreg(r)) { ra_free(as, r); if (rset_test(as->modset, r) || irt_ismarked(ir->t)) @@ -1761,7 +2695,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; int32_t spadj = as->T->spadjust; MCode *p = as->mctop-1; - *p = spadj ? (MIPSI_ADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; + *p = spadj ? (MIPSI_AADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP; p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu); } @@ -1772,139 +2706,26 @@ static void asm_tail_prep(ASMState *as) as->invmcp = as->loopref ? as->mcp : NULL; } -/* -- Instruction dispatch ------------------------------------------------ */ - -/* Assemble a single instruction. */ -static void asm_ir(ASMState *as, IRIns *ir) -{ - switch ((IROp)ir->o) { - /* Miscellaneous ops. */ - case IR_LOOP: asm_loop(as); break; - case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; - case IR_USE: - ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; - case IR_PHI: asm_phi(as, ir); break; - case IR_HIOP: asm_hiop(as, ir); break; - case IR_GCSTEP: asm_gcstep(as, ir); break; - - /* Guarded assertions. */ - case IR_EQ: case IR_NE: asm_compeq(as, ir); break; - case IR_LT: case IR_GE: case IR_LE: case IR_GT: - case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: - case IR_ABC: - asm_comp(as, ir); - break; - - case IR_RETF: asm_retf(as, ir); break; - - /* Bit ops. */ - case IR_BNOT: asm_bitnot(as, ir); break; - case IR_BSWAP: asm_bitswap(as, ir); break; - - case IR_BAND: asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI); break; - case IR_BOR: asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI); break; - case IR_BXOR: asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI); break; - - case IR_BSHL: asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL); break; - case IR_BSHR: asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL); break; - case IR_BSAR: asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA); break; - case IR_BROL: lua_assert(0); break; - case IR_BROR: asm_bitror(as, ir); break; - - /* Arithmetic ops. */ - case IR_ADD: asm_add(as, ir); break; - case IR_SUB: asm_sub(as, ir); break; - case IR_MUL: asm_mul(as, ir); break; - case IR_DIV: asm_fparith(as, ir, MIPSI_DIV_D); break; - case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break; - case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break; - case IR_NEG: asm_neg(as, ir); break; - - case IR_ABS: asm_fpunary(as, ir, MIPSI_ABS_D); break; - case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break; - case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break; - case IR_MIN: asm_min_max(as, ir, 0); break; - case IR_MAX: asm_min_max(as, ir, 1); break; - case IR_FPMATH: - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - break; - if (ir->op2 <= IRFPM_TRUNC) - asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2); - else if (ir->op2 == IRFPM_SQRT) - asm_fpunary(as, ir, MIPSI_SQRT_D); - else - asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); - break; - - /* Overflow-checking arithmetic ops. */ - case IR_ADDOV: asm_arithov(as, ir); break; - case IR_SUBOV: asm_arithov(as, ir); break; - case IR_MULOV: asm_mulov(as, ir); break; - - /* Memory references. */ - case IR_AREF: asm_aref(as, ir); break; - case IR_HREF: asm_href(as, ir); break; - case IR_HREFK: asm_hrefk(as, ir); break; - case IR_NEWREF: asm_newref(as, ir); break; - case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; - case IR_FREF: asm_fref(as, ir); break; - case IR_STRREF: asm_strref(as, ir); break; - - /* Loads and stores. */ - case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - asm_ahuvload(as, ir); - break; - case IR_FLOAD: asm_fload(as, ir); break; - case IR_XLOAD: asm_xload(as, ir); break; - case IR_SLOAD: asm_sload(as, ir); break; - - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; - case IR_FSTORE: asm_fstore(as, ir); break; - case IR_XSTORE: asm_xstore(as, ir, 0); break; - - /* Allocations. */ - case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; - case IR_TNEW: asm_tnew(as, ir); break; - case IR_TDUP: asm_tdup(as, ir); break; - case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; - - /* Write barriers. */ - case IR_TBAR: asm_tbar(as, ir); break; - case IR_OBAR: asm_obar(as, ir); break; - - /* Type conversions. */ - case IR_CONV: asm_conv(as, ir); break; - case IR_TOBIT: asm_tobit(as, ir); break; - case IR_TOSTR: asm_tostr(as, ir); break; - case IR_STRTO: asm_strto(as, ir); break; - - /* Calls. */ - case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; - case IR_CALLXS: asm_callx(as, ir); break; - case IR_CARG: break; - - default: - setintV(&as->J->errinfo, ir->o); - lj_trace_err_info(as->J, LJ_TRERR_NYIIR); - break; - } -} - /* -- Trace setup --------------------------------------------------------- */ /* Ensure there are enough stack slots for call arguments. */ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) { IRRef args[CCI_NARGS_MAX*2]; - uint32_t i, nargs = (int)CCI_NARGS(ci); + uint32_t i, nargs = CCI_XNARGS(ci); +#if LJ_32 int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; +#else + int nslots = 0, ngpr = REGARG_NUMGPR; +#endif asm_collectargs(as, ir, ci, args); for (i = 0; i < nargs; i++) { - if (args[i] && irt_isfp(IR(args[i])->t) && +#if LJ_32 + if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t) && nfpr > 0 && !(ci->flags & CCI_VARARG)) { nfpr--; ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1; - } else if (args[i] && irt_isnum(IR(args[i])->t)) { + } else if (!LJ_SOFTFP && args[i] && irt_isnum(IR(args[i])->t)) { nfpr = 0; ngpr = ngpr & ~1; if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1; @@ -1912,6 +2733,9 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) nfpr = 0; if (ngpr > 0) ngpr--; else nslots++; } +#else + if (ngpr > 0) ngpr--; else nslots += 2; +#endif } if (nslots > as->evenspill) /* Leave room for args in stack slots. */ as->evenspill = nslots; @@ -1942,35 +2766,35 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) if (((p[-1] ^ (px-p)) & 0xffffu) == 0 && ((p[-1] & 0xf0000000u) == MIPSI_BEQ || (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ || - (p[-1] & 0xffe00000u) == MIPSI_BC1F) && - p[-2] != MIPS_NOPATCH_GC_CHECK) { +#if !LJ_TARGET_MIPSR6 + (p[-1] & 0xffe00000u) == MIPSI_BC1F +#else + (p[-1] & 0xff600000u) == MIPSI_BC1EQZ +#endif + ) && p[-2] != MIPS_NOPATCH_GC_CHECK) { ptrdiff_t delta = target - p; if (((delta + 0x8000) >> 16) == 0) { /* Patch in-range branch. */ patchbranch: p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu); *p = MIPSI_NOP; /* Replace the load of the exit number. */ - cstop = p; + cstop = p+1; if (!cstart) cstart = p-1; } else { /* Branch out of range. Use spare jump slot in mcarea. */ - int i; - for (i = (int)(sizeof(MCLink)/sizeof(MCode)); - i < (int)(sizeof(MCLink)/sizeof(MCode)+MIPS_SPAREJUMP*2); - i += 2) { - if (mcarea[i] == tjump) { - delta = mcarea+i - p; - goto patchbranch; - } else if (mcarea[i] == MIPSI_NOP) { - mcarea[i] = tjump; - cstart = mcarea+i; - delta = mcarea+i - p; + MCode *mcjump = asm_sparejump_use(mcarea, tjump); + if (mcjump) { + lj_mcode_sync(mcjump, mcjump+1); + delta = mcjump - p; + if (((delta + 0x8000) >> 16) == 0) { goto patchbranch; + } else { + lj_assertJ(0, "spare jump out of range: -Osizemcode too big"); } } /* Ignore jump slot overflow. Child trace is simply not attached. */ } } else if (p+1 == pe) { /* Patch NOP after code for inverted loop branch. Use of J is ok. */ - lua_assert(p[1] == MIPSI_NOP); + lj_assertJ(p[1] == MIPSI_NOP, "expected NOP"); p[1] = tjump; *p = MIPSI_NOP; /* Replace the load of the exit number. */ cstop = p+2; diff --git a/src/lj_asm_ppc.h b/src/lj_asm_ppc.h index 5fd35d2e..546b8e5d 100644 --- a/src/lj_asm_ppc.h +++ b/src/lj_asm_ppc.h @@ -156,6 +156,9 @@ static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) return ra_allock(as, ofs-(int16_t)ofs, allow); } } + } else if (ir->o == IR_TMPREF) { + *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768); + return RID_JGL; } } *ofsp = 0; @@ -181,7 +184,7 @@ static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref, return; } } else if (ir->o == IR_STRREF) { - lua_assert(ofs == 0); + lj_assertA(ofs == 0, "bad usage"); ofs = (int32_t)sizeof(GCstr); if (irref_isk(ir->op2)) { ofs += IR(ir->op2)->i; @@ -226,6 +229,7 @@ static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref, emit_tab(as, pi, rt, left, right); } +#if !LJ_SOFTFP /* Fuse to multiply-add/sub instruction. */ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) { @@ -245,24 +249,30 @@ static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) } return 0; } +#endif /* -- Calls --------------------------------------------------------------- */ /* Generate a call to a C function. */ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) { - uint32_t n, nargs = CCI_NARGS(ci); + uint32_t n, nargs = CCI_XNARGS(ci); int32_t ofs = 8; - Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR; + Reg gpr = REGARG_FIRSTGPR; +#if !LJ_SOFTFP + Reg fpr = REGARG_FIRSTFPR; +#endif if ((void *)ci->func) emit_call(as, (void *)ci->func); for (n = 0; n < nargs; n++) { /* Setup args. */ IRRef ref = args[n]; if (ref) { IRIns *ir = IR(ref); +#if !LJ_SOFTFP if (irt_isfp(ir->t)) { if (fpr <= REGARG_LASTFPR) { - lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ + lj_assertA(rset_test(as->freeset, fpr), + "reg %d not free", fpr); /* Already evicted. */ ra_leftov(as, fpr, ref); fpr++; } else { @@ -271,9 +281,12 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) emit_spstore(as, ir, r, ofs); ofs += irt_isnum(ir->t) ? 8 : 4; } - } else { + } else +#endif + { if (gpr <= REGARG_LASTGPR) { - lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Already evicted. */ ra_leftov(as, gpr, ref); gpr++; } else { @@ -290,8 +303,10 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) } checkmclim(as); } +#if !LJ_SOFTFP if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); +#endif } /* Setup result reg/sp for call. Evict scratch regs. */ @@ -299,16 +314,18 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) { RegSet drop = RSET_SCRATCH; int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); +#if !LJ_SOFTFP if ((ci->flags & CCI_NOFPRCLOBBER)) drop &= ~RSET_FPR; +#endif if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); /* Dest reg handled below. */ if (hiop && ra_hasreg((ir+1)->r)) rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ ra_evictset(as, drop); /* Evictions must be performed first. */ if (ra_used(ir)) { - lua_assert(!irt_ispri(ir->t)); - if (irt_isfp(ir->t)) { + lj_assertA(!irt_ispri(ir->t), "PRI dest"); + if (!LJ_SOFTFP && irt_isfp(ir->t)) { if ((ci->flags & CCI_CASTU64)) { /* Use spill slot or temp slots. */ int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; @@ -331,15 +348,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) } } -static void asm_call(ASMState *as, IRIns *ir) -{ - IRRef args[CCI_NARGS_MAX]; - const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; - asm_collectargs(as, ir, ci, args); - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); -} - static void asm_callx(ASMState *as, IRIns *ir) { IRRef args[CCI_NARGS_MAX*2]; @@ -352,7 +360,7 @@ static void asm_callx(ASMState *as, IRIns *ir) func = ir->op2; irf = IR(func); if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } if (irref_isk(func)) { /* Call to constant address. */ - ci.func = (ASMFunction)(void *)(irf->i); + ci.func = (ASMFunction)(void *)(intptr_t)(irf->i); } else { /* Need a non-argument register for indirect calls. */ RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); Reg freg = ra_alloc1(as, func, allow); @@ -363,16 +371,6 @@ static void asm_callx(ASMState *as, IRIns *ir) asm_gencall(as, &ci, args); } -static void asm_callid(ASMState *as, IRIns *ir, IRCallID id) -{ - const CCallInfo *ci = &lj_ir_callinfo[id]; - IRRef args[2]; - args[0] = ir->op1; - args[1] = ir->op2; - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); -} - /* -- Returns ------------------------------------------------------------- */ /* Return to lower frame. Guard that it goes to the right spot. */ @@ -380,7 +378,7 @@ static void asm_retf(ASMState *as, IRIns *ir) { Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); void *pc = ir_kptr(IR(ir->op2)); - int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); + int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); as->topslot -= (BCReg)delta; if ((int32_t)as->topslot < 0) as->topslot = 0; irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ @@ -392,8 +390,24 @@ static void asm_retf(ASMState *as, IRIns *ir) emit_tai(as, PPCI_LWZ, RID_TMP, base, -8); } +/* -- Buffer operations --------------------------------------------------- */ + +#if LJ_HASBUFFER +static void asm_bufhdr_write(ASMState *as, Reg sb) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); + IRIns irgc; + irgc.ot = IRT(0, IRT_PGC); /* GC type. */ + emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); + emit_rot(as, PPCI_RLWIMI, RID_TMP, tmp, 0, 31-lj_fls(SBUF_MASK_FLAG), 31); + emit_getgl(as, RID_TMP, cur_L); + emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); +} +#endif + /* -- Type conversions ---------------------------------------------------- */ +#if !LJ_SOFTFP static void asm_tointg(ASMState *as, IRIns *ir, Reg left) { RegSet allow = RSET_FPR; @@ -410,8 +424,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); emit_lsptr(as, PPCI_LFS, (fbias & 31), - (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), - RSET_GPR); + (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR); emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); emit_fb(as, PPCI_FCTIWZ, tmp, left); } @@ -427,15 +440,27 @@ static void asm_tobit(ASMState *as, IRIns *ir) emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); emit_fab(as, PPCI_FADD, tmp, left, right); } +#endif static void asm_conv(ASMState *as, IRIns *ir) { IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); +#if !LJ_SOFTFP int stfp = (st == IRT_NUM || st == IRT_FLOAT); +#endif IRRef lref = ir->op1; - lua_assert(irt_type(ir->t) != st); - lua_assert(!(irt_isint64(ir->t) || - (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */ + /* 64 bit integer conversions are handled by SPLIT. */ + lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)), + "IR %04d has unsplit 64 bit type", + (int)(ir - as->ir) - REF_BIAS); +#if LJ_SOFTFP + /* FP conversions are handled by SPLIT. */ + lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), + "IR %04d has FP type", + (int)(ir - as->ir) - REF_BIAS); + /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ +#else + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); if (irt_isfp(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); if (stfp) { /* FP to FP conversion. */ @@ -450,13 +475,11 @@ static void asm_conv(ASMState *as, IRIns *ir) Reg left = ra_alloc1(as, lref, allow); Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); - const float *kbias; if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); emit_fab(as, PPCI_FSUB, dest, dest, fbias); emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); - kbias = (const float *)lj_ir_k64_find(as->J, U64x(59800004,59800000)); - if (st == IRT_U32) kbias++; - emit_lsptr(as, PPCI_LFS, (fbias & 31), (void *)kbias, + emit_lsptr(as, PPCI_LFS, (fbias & 31), + &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31], rset_clear(allow, hibias)); emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, RID_SP, SPOFS_TMPLO); @@ -466,7 +489,8 @@ static void asm_conv(ASMState *as, IRIns *ir) } else if (stfp) { /* FP to integer conversion. */ if (irt_isguard(ir->t)) { /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); } else { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -489,19 +513,20 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_fb(as, PPCI_FCTIWZ, tmp, tmp); emit_fab(as, PPCI_FSUB, tmp, left, tmp); emit_lsptr(as, PPCI_LFS, (tmp & 31), - (void *)lj_ir_k64_find(as->J, U64x(4f000000,00000000)), - RSET_GPR); + (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); } else { emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); emit_fb(as, PPCI_FCTIWZ, tmp, left); } } - } else { + } else +#endif + { Reg dest = ra_dest(as, ir, RSET_GPR); if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ Reg left = ra_alloc1(as, ir->op1, RSET_GPR); - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); if ((ir->op2 & IRCONV_SEXT)) emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left); else @@ -513,90 +538,102 @@ static void asm_conv(ASMState *as, IRIns *ir) } } -#if LJ_HASFFI -static void asm_conv64(ASMState *as, IRIns *ir) -{ - IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); - IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); - IRCallID id; - const CCallInfo *ci; - IRRef args[2]; - args[0] = ir->op1; - args[1] = (ir-1)->op1; - if (st == IRT_NUM || st == IRT_FLOAT) { - id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64); - ir--; - } else { - id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64); - } - ci = &lj_ir_callinfo[id]; - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); -} -#endif - static void asm_strto(ASMState *as, IRIns *ir) { const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; IRRef args[2]; - int32_t ofs; + int32_t ofs = SPOFS_TMP; +#if LJ_SOFTFP + ra_evictset(as, RSET_SCRATCH); + if (ra_used(ir)) { + if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && + (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { + int i; + for (i = 0; i < 2; i++) { + Reg r = (ir+i)->r; + if (ra_hasreg(r)) { + ra_free(as, r); + ra_modified(as, r); + emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); + } + } + ofs = sps_scale(ir->s & ~1); + } else { + Reg rhi = ra_dest(as, ir+1, RSET_GPR); + Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); + emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs); + emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4); + } + } +#else RegSet drop = RSET_SCRATCH; if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ ra_evictset(as, drop); + if (ir->s) ofs = sps_scale(ir->s); +#endif asm_guardcc(as, CC_EQ); emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ args[0] = ir->op1; /* GCstr *str */ args[1] = ASMREF_TMP1; /* TValue *n */ asm_gencall(as, ci, args); /* Store the result to the spill slot or temp slots. */ - ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); } +/* -- Memory references --------------------------------------------------- */ + /* Get pointer to TValue. */ -static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) +static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) { - IRIns *ir = IR(ref); - if (irt_isnum(ir->t)) { - if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ - ra_allockreg(as, i32ptr(ir_knum(ir)), dest); - else /* Otherwise force a spill and use the spill slot. */ - emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir)); - } else { - /* Otherwise use g->tmptv to hold the TValue. */ - RegSet allow = rset_exclude(RSET_GPR, dest); - Reg type; - emit_tai(as, PPCI_ADDI, dest, RID_JGL, offsetof(global_State, tmptv)-32768); - if (!irt_ispri(ir->t)) { - Reg src = ra_alloc1(as, ref, allow); - emit_setgl(as, src, tmptv.gcr); + int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768); + if ((mode & IRTMPREF_IN1)) { + IRIns *ir = IR(ref); + if (irt_isnum(ir->t)) { + if ((mode & IRTMPREF_OUT1)) { +#if LJ_SOFTFP + lj_assertA(irref_isk(ref), "unsplit FP op"); + emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); + emit_setgl(as, + ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR), + tmptv.u32.lo); + emit_setgl(as, + ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR), + tmptv.u32.hi); +#else + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); + emit_fai(as, PPCI_STFD, src, RID_JGL, tmpofs); +#endif + } else if (irref_isk(ref)) { + /* Use the number constant itself as a TValue. */ + ra_allockreg(as, i32ptr(ir_knum(ir)), dest); + } else { +#if LJ_SOFTFP + lj_assertA(0, "unsplit FP op"); +#else + /* Otherwise force a spill and use the spill slot. */ + emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir)); +#endif + } + } else { + /* Otherwise use g->tmptv to hold the TValue. */ + Reg type; + emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); + if (!irt_ispri(ir->t)) { + Reg src = ra_alloc1(as, ref, RSET_GPR); + emit_setgl(as, src, tmptv.gcr); + } + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)) + type = ra_alloc1(as, ref+1, RSET_GPR); + else + type = ra_allock(as, irt_toitype(ir->t), RSET_GPR); + emit_setgl(as, type, tmptv.it); } - type = ra_allock(as, irt_toitype(ir->t), allow); - emit_setgl(as, type, tmptv.it); - } -} - -static void asm_tostr(ASMState *as, IRIns *ir) -{ - IRRef args[2]; - args[0] = ASMREF_L; - as->gcsteps++; - if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; - args[1] = ASMREF_TMP1; /* const lua_Number * */ - asm_setupresult(as, ir, ci); /* GCstr * */ - asm_gencall(as, ci, args); - asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1); } else { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; - args[1] = ir->op1; /* int32_t k */ - asm_setupresult(as, ir, ci); /* GCstr * */ - asm_gencall(as, ci, args); + emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); } } -/* -- Memory references --------------------------------------------------- */ - static void asm_aref(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -636,11 +673,27 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) Reg tisnum = RID_NONE, tmpnum = RID_NONE; IRRef refkey = ir->op2; IRIns *irkey = IR(refkey); + int isk = irref_isk(refkey); IRType1 kt = irkey->t; uint32_t khash; MCLabel l_end, l_loop, l_next; rset_clear(allow, tab); +#if LJ_SOFTFP + if (!isk) { + key = ra_alloc1(as, refkey, allow); + rset_clear(allow, key); + if (irkey[1].o == IR_HIOP) { + if (ra_hasreg((irkey+1)->r)) { + tmpnum = (irkey+1)->r; + ra_noweak(as, tmpnum); + } else { + tmpnum = ra_allocref(as, refkey+1, allow); + } + rset_clear(allow, tmpnum); + } + } +#else if (irt_isnum(kt)) { key = ra_alloc1(as, refkey, RSET_FPR); tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); @@ -650,6 +703,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) key = ra_alloc1(as, refkey, allow); rset_clear(allow, key); } +#endif tmp2 = ra_scratch(as, allow); rset_clear(allow, tmp2); @@ -672,7 +726,7 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) asm_guardcc(as, CC_EQ); else emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); - if (irt_isnum(kt)) { + if (!LJ_SOFTFP && irt_isnum(kt)) { emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); emit_condbranch(as, PPCI_BC, CC_GE, l_next); emit_ab(as, PPCI_CMPLW, tmp1, tisnum); @@ -682,7 +736,10 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) emit_ab(as, PPCI_CMPW, tmp2, key); emit_condbranch(as, PPCI_BC, CC_NE, l_next); } - emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); + if (LJ_SOFTFP && ra_hasreg(tmpnum)) + emit_ab(as, PPCI_CMPW, tmp1, tmpnum); + else + emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); if (!irt_ispri(kt)) emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); } @@ -691,35 +748,41 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge) (((char *)as->mcp-(char *)l_loop) & 0xffffu); /* Load main position relative to tab->node into dest. */ - khash = irref_isk(refkey) ? ir_khash(irkey) : 1; + khash = isk ? ir_khash(as, irkey) : 1; if (khash == 0) { emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); } else { Reg tmphash = tmp1; - if (irref_isk(refkey)) + if (isk) tmphash = ra_allock(as, khash, allow); emit_tab(as, PPCI_ADD, dest, dest, tmp1); emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); - if (irref_isk(refkey)) { + if (isk) { /* Nothing to do. */ } else if (irt_isstr(kt)) { - emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash)); + emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, sid)); } else { /* Must match with hash*() in lj_tab.c. */ emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1); emit_rotlwi(as, tmp2, tmp2, HASH_ROT3); emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); - if (irt_isnum(kt)) { + if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { +#if LJ_SOFTFP + emit_asb(as, PPCI_XOR, tmp2, key, tmp1); + emit_rotlwi(as, dest, tmp1, HASH_ROT1); + emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum); +#else int32_t ofs = ra_spill(as, irkey); emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); emit_rotlwi(as, dest, tmp1, HASH_ROT1); emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); +#endif } else { emit_asb(as, PPCI_XOR, tmp2, key, tmp1); emit_rotlwi(as, dest, tmp1, HASH_ROT1); @@ -740,7 +803,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir) Reg node = ra_alloc1(as, ir->op1, RSET_GPR); Reg key = RID_NONE, type = RID_TMP, idx = node; RegSet allow = rset_exclude(RSET_GPR, node); - lua_assert(ofs % sizeof(Node) == 0); + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); if (ofs > 32736) { idx = dest; rset_clear(allow, dest); @@ -773,20 +836,6 @@ static void asm_hrefk(ASMState *as, IRIns *ir) } } -static void asm_newref(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; - IRRef args[3]; - if (ir->r == RID_SINK) - return; - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ir->op1; /* GCtab *t */ - args[2] = ASMREF_TMP1; /* cTValue *key */ - asm_setupresult(as, ir, ci); /* TValue * */ - asm_gencall(as, ci, args); - asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2); -} - static void asm_uref(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -813,7 +862,7 @@ static void asm_uref(ASMState *as, IRIns *ir) static void asm_fref(ASMState *as, IRIns *ir) { UNUSED(as); UNUSED(ir); - lua_assert(!ra_used(ir)); + lj_assertA(!ra_used(ir), "unfused FREF"); } static void asm_strref(ASMState *as, IRIns *ir) @@ -853,26 +902,28 @@ static void asm_strref(ASMState *as, IRIns *ir) /* -- Loads and stores ---------------------------------------------------- */ -static PPCIns asm_fxloadins(IRIns *ir) +static PPCIns asm_fxloadins(ASMState *as, IRIns *ir) { + UNUSED(as); switch (irt_type(ir->t)) { case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */ case IRT_U8: return PPCI_LBZ; case IRT_I16: return PPCI_LHA; case IRT_U16: return PPCI_LHZ; - case IRT_NUM: return PPCI_LFD; - case IRT_FLOAT: return PPCI_LFS; + case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_LFD; + case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS; default: return PPCI_LWZ; } } -static PPCIns asm_fxstoreins(IRIns *ir) +static PPCIns asm_fxstoreins(ASMState *as, IRIns *ir) { + UNUSED(as); switch (irt_type(ir->t)) { case IRT_I8: case IRT_U8: return PPCI_STB; case IRT_I16: case IRT_U16: return PPCI_STH; - case IRT_NUM: return PPCI_STFD; - case IRT_FLOAT: return PPCI_STFS; + case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_STFD; + case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS; default: return PPCI_STW; } } @@ -880,18 +931,24 @@ static PPCIns asm_fxstoreins(IRIns *ir) static void asm_fload(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); - Reg idx = ra_alloc1(as, ir->op1, RSET_GPR); - PPCIns pi = asm_fxloadins(ir); + PPCIns pi = asm_fxloadins(as, ir); + Reg idx; int32_t ofs; - if (ir->op2 == IRFL_TAB_ARRAY) { - ofs = asm_fuseabase(as, ir->op1); - if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ - emit_tai(as, PPCI_ADDI, dest, idx, ofs); - return; + if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ + idx = RID_JGL; + ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); + } else { + idx = ra_alloc1(as, ir->op1, RSET_GPR); + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ + emit_tai(as, PPCI_ADDI, dest, idx, ofs); + return; + } } + ofs = field_ofs[ir->op2]; } - ofs = field_ofs[ir->op2]; - lua_assert(!irt_isi8(ir->t)); + lj_assertA(!irt_isi8(ir->t), "unsupported FLOAD I8"); emit_tai(as, pi, dest, idx, ofs); } @@ -902,21 +959,22 @@ static void asm_fstore(ASMState *as, IRIns *ir) IRIns *irf = IR(ir->op1); Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); int32_t ofs = field_ofs[irf->op2]; - PPCIns pi = asm_fxstoreins(ir); + PPCIns pi = asm_fxstoreins(as, ir); emit_tai(as, pi, src, idx, ofs); } } static void asm_xload(ASMState *as, IRIns *ir) { - Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); + Reg dest = ra_dest(as, ir, + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); + lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); if (irt_isi8(ir->t)) emit_as(as, PPCI_EXTSB, dest, dest); - asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); + asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); } -static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) +static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) { IRIns *irb; if (ir->r == RID_SINK) @@ -927,36 +985,54 @@ static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs) Reg src = ra_alloc1(as, irb->op1, RSET_GPR); asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); } else { - Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); - asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, + Reg src = ra_alloc1(as, ir->op2, + (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); + asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, rset_exclude(RSET_GPR, src), ofs); } } +#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) + static void asm_ahuvload(ASMState *as, IRIns *ir) { IRType1 t = ir->t; Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; RegSet allow = RSET_GPR; int32_t ofs = AHUREF_LSX; + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) { + t.irt = IRT_NUM; + if (ra_used(ir+1)) { + type = ra_dest(as, ir+1, allow); + rset_clear(allow, type); + } + ofs = 0; + } if (ra_used(ir)) { - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); - if (!irt_isnum(t)) ofs = 0; - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); + lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || + irt_isint(ir->t) || irt_isaddr(ir->t), + "bad load type %d", irt_type(ir->t)); + if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0; + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); rset_clear(allow, dest); } idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + if (ir->o == IR_VLOAD) { + ofs = ofs != AHUREF_LSX ? ofs + 8 * ir->op2 : + ir->op2 ? 8 * ir->op2 : AHUREF_LSX; + } if (irt_isnum(t)) { Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx)); asm_guardcc(as, CC_GE); emit_ab(as, PPCI_CMPLW, type, tisnum); if (ra_hasreg(dest)) { - if (ofs == AHUREF_LSX) { + if (!LJ_SOFTFP && ofs == AHUREF_LSX) { tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, (idx&255)), (idx>>8))); emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); } else { - emit_fai(as, PPCI_LFD, dest, idx, ofs); + emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx, + ofs+4*LJ_SOFTFP); } } } else { @@ -979,7 +1055,7 @@ static void asm_ahustore(ASMState *as, IRIns *ir) int32_t ofs = AHUREF_LSX; if (ir->r == RID_SINK) return; - if (irt_isnum(ir->t)) { + if (!LJ_SOFTFP && irt_isnum(ir->t)) { src = ra_alloc1(as, ir->op2, RSET_FPR); } else { if (!irt_ispri(ir->t)) { @@ -987,11 +1063,14 @@ static void asm_ahustore(ASMState *as, IRIns *ir) rset_clear(allow, src); ofs = 0; } - type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); + if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) + type = ra_alloc1(as, (ir+1)->op2, allow); + else + type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); rset_clear(allow, type); } idx = asm_fuseahuref(as, ir->op1, &ofs, allow); - if (irt_isnum(ir->t)) { + if (!LJ_SOFTFP && irt_isnum(ir->t)) { if (ofs == AHUREF_LSX) { emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); emit_slwi(as, RID_TMP, (idx>>8), 3); @@ -1016,21 +1095,39 @@ static void asm_sload(ASMState *as, IRIns *ir) IRType1 t = ir->t; Reg dest = RID_NONE, type = RID_NONE, base; RegSet allow = RSET_GPR; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); - lua_assert(LJ_DUALNUM || - !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); + int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); + if (hiop) + t.irt = IRT_NUM; + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); + lj_assertA(LJ_DUALNUM || + !irt_isint(t) || + (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)), + "bad SLOAD type"); +#if LJ_SOFTFP + lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), + "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ + if (hiop && ra_used(ir+1)) { + type = ra_dest(as, ir+1, allow); + rset_clear(allow, type); + } +#else if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { dest = ra_scratch(as, RSET_FPR); asm_tointg(as, ir, dest); t.irt = IRT_NUM; /* Continue with a regular number type check. */ - } else if (ra_used(ir)) { - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); - dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR); + } else +#endif + if (ra_used(ir)) { + lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t), + "bad SLOAD type %d", irt_type(ir->t)); + dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); rset_clear(allow, dest); base = ra_alloc1(as, REF_BASE, allow); rset_clear(allow, base); - if ((ir->op2 & IRSLOAD_CONVERT)) { + if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { if (irt_isint(t)) { emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); dest = ra_scratch(as, RSET_FPR); @@ -1044,7 +1141,7 @@ static void asm_sload(ASMState *as, IRIns *ir) emit_fab(as, PPCI_FSUB, dest, dest, fbias); emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); emit_lsptr(as, PPCI_LFS, (fbias & 31), - (void *)lj_ir_k64_find(as->J, U64x(59800004,59800000)), + (void *)&as->J->k32[LJ_K32_2P52_2P31], rset_clear(allow, hibias)); emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); @@ -1062,14 +1159,22 @@ dotypecheck: if ((ir->op2 & IRSLOAD_TYPECHECK)) { Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); asm_guardcc(as, CC_GE); - emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum); +#if !LJ_SOFTFP type = RID_TMP; +#endif + emit_ab(as, PPCI_CMPLW, type, tisnum); } - if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4); + if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, + base, ofs-(LJ_SOFTFP?0:4)); } else { if ((ir->op2 & IRSLOAD_TYPECHECK)) { asm_guardcc(as, CC_NE); - emit_ai(as, PPCI_CMPWI, RID_TMP, irt_toitype(t)); + if ((ir->op2 & IRSLOAD_KEYINDEX)) { + emit_ai(as, PPCI_CMPWI, RID_TMP, (LJ_KEYINDEX & 0xffff)); + emit_asi(as, PPCI_XORIS, RID_TMP, RID_TMP, (LJ_KEYINDEX >> 16)); + } else { + emit_ai(as, PPCI_CMPWI, RID_TMP, irt_toitype(t)); + } type = RID_TMP; } if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, base, ofs); @@ -1083,19 +1188,16 @@ dotypecheck: static void asm_cnew(ASMState *as, IRIns *ir) { CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; - CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? - lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; + CTypeID id = (CTypeID)IR(ir->op1)->i; + CTSize sz; + CTInfo info = lj_ctype_info(cts, id, &sz); const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[2]; - RegSet allow = (RSET_GPR & ~RSET_SCRATCH); + IRRef args[4]; RegSet drop = RSET_SCRATCH; - lua_assert(sz != CTSIZE_INVALID); + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ as->gcsteps++; - if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); /* Dest reg handled below. */ ra_evictset(as, drop); @@ -1104,11 +1206,12 @@ static void asm_cnew(ASMState *as, IRIns *ir) /* Initialize immutable cdata object. */ if (ir->o == IR_CNEWI) { + RegSet allow = (RSET_GPR & ~RSET_SCRATCH); int32_t ofs = sizeof(GCcdata); - lua_assert(sz == 4 || sz == 8); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); if (sz == 8) { ofs += 4; - lua_assert((ir+1)->o == IR_HIOP); + lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI"); } for (;;) { Reg r = ra_alloc1(as, ir->op2, allow); @@ -1117,18 +1220,28 @@ static void asm_cnew(ASMState *as, IRIns *ir) if (ofs == sizeof(GCcdata)) break; ofs -= 4; ir++; } + } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ + ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* CTypeID id */ + args[2] = ir->op2; /* CTSize sz */ + args[3] = ASMREF_TMP1; /* CTSize align */ + asm_gencall(as, ci, args); + emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); + return; } + /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); - emit_ti(as, PPCI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */ + emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ASMREF_TMP1; /* MSize size */ asm_gencall(as, ci, args); ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ra_releasetmp(as, ASMREF_TMP1)); } -#else -#define asm_cnew(as, ir) ((void)0) #endif /* -- Write barriers ------------------------------------------------------ */ @@ -1142,7 +1255,7 @@ static void asm_tbar(ASMState *as, IRIns *ir) emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist)); emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked)); emit_setgl(as, tab, gc.grayagain); - lua_assert(LJ_GC_BLACK == 0x04); + lj_assertA(LJ_GC_BLACK == 0x04, "bad LJ_GC_BLACK"); emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */ emit_getgl(as, link, gc.grayagain); emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); @@ -1157,7 +1270,7 @@ static void asm_obar(ASMState *as, IRIns *ir) MCLabel l_end; Reg obj, val, tmp; /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); ra_evictset(as, RSET_SCRATCH); l_end = emit_label(as); args[0] = ASMREF_TMP1; /* global_State *g */ @@ -1178,6 +1291,7 @@ static void asm_obar(ASMState *as, IRIns *ir) /* -- Arithmetic and logic operations ------------------------------------- */ +#if !LJ_SOFTFP static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) { Reg dest = ra_dest(as, ir, RSET_FPR); @@ -1196,31 +1310,24 @@ static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi) emit_fb(as, pi, dest, left); } -static int asm_fpjoin_pow(ASMState *as, IRIns *ir) -{ - IRIns *irp = IR(ir->op1); - if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { - IRIns *irpp = IR(irp->op1); - if (irpp == ir-2 && irpp->o == IR_FPMATH && - irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow]; - IRRef args[2]; - args[0] = irpp->op1; - args[1] = irp->op2; - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); - return 1; - } - } - return 0; +static void asm_fpmath(ASMState *as, IRIns *ir) +{ + if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) + asm_fpunary(as, ir, PPCI_FSQRT); + else + asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); } +#endif static void asm_add(ASMState *as, IRIns *ir) { +#if !LJ_SOFTFP if (irt_isnum(ir->t)) { if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) asm_fparith(as, ir, PPCI_FADD); - } else { + } else +#endif + { Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); PPCIns pi; @@ -1259,10 +1366,13 @@ static void asm_add(ASMState *as, IRIns *ir) static void asm_sub(ASMState *as, IRIns *ir) { +#if !LJ_SOFTFP if (irt_isnum(ir->t)) { if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) asm_fparith(as, ir, PPCI_FSUB); - } else { + } else +#endif + { PPCIns pi = PPCI_SUBF; Reg dest = ra_dest(as, ir, RSET_GPR); Reg left, right; @@ -1288,9 +1398,12 @@ static void asm_sub(ASMState *as, IRIns *ir) static void asm_mul(ASMState *as, IRIns *ir) { +#if !LJ_SOFTFP if (irt_isnum(ir->t)) { asm_fparith(as, ir, PPCI_FMUL); - } else { + } else +#endif + { PPCIns pi = PPCI_MULLW; Reg dest = ra_dest(as, ir, RSET_GPR); Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); @@ -1312,11 +1425,16 @@ static void asm_mul(ASMState *as, IRIns *ir) } } +#define asm_fpdiv(as, ir) asm_fparith(as, ir, PPCI_FDIV) + static void asm_neg(ASMState *as, IRIns *ir) { +#if !LJ_SOFTFP if (irt_isnum(ir->t)) { asm_fpunary(as, ir, PPCI_FNEG); - } else { + } else +#endif + { Reg dest, left; PPCIns pi = PPCI_NEG; if (as->flagmcp == as->mcp) { @@ -1330,6 +1448,8 @@ static void asm_neg(ASMState *as, IRIns *ir) } } +#define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS) + static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) { Reg dest, left, right; @@ -1345,6 +1465,10 @@ static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) emit_tab(as, pi|PPCF_DOT, dest, left, right); } +#define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO) +#define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO) +#define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO) + #if LJ_HASFFI static void asm_add64(ASMState *as, IRIns *ir) { @@ -1424,7 +1548,7 @@ static void asm_neg64(ASMState *as, IRIns *ir) } #endif -static void asm_bitnot(ASMState *as, IRIns *ir) +static void asm_bnot(ASMState *as, IRIns *ir) { Reg dest, left, right; PPCIns pi = PPCI_NOR; @@ -1451,7 +1575,7 @@ nofuse: emit_asb(as, pi, dest, left, right); } -static void asm_bitswap(ASMState *as, IRIns *ir) +static void asm_bswap(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); IRIns *irx; @@ -1472,32 +1596,6 @@ static void asm_bitswap(ASMState *as, IRIns *ir) } } -static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) -{ - Reg dest = ra_dest(as, ir, RSET_GPR); - Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); - if (irref_isk(ir->op2)) { - int32_t k = IR(ir->op2)->i; - Reg tmp = left; - if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) { - if (!checku16(k)) { - emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16)); - if ((k & 0xffff) == 0) return; - } - emit_asi(as, pik, dest, left, k); - return; - } - } - /* May fail due to spills/restores above, but simplifies the logic. */ - if (as->flagmcp == as->mcp) { - as->flagmcp = NULL; - as->mcp++; - pi |= PPCF_DOT; - } - right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); - emit_asb(as, pi, dest, left, right); -} - /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) { @@ -1528,7 +1626,7 @@ nofuse: *--as->mcp = pi | PPCF_T(left); } -static void asm_bitand(ASMState *as, IRIns *ir) +static void asm_band(ASMState *as, IRIns *ir) { Reg dest, left, right; IRRef lref = ir->op1; @@ -1583,6 +1681,35 @@ static void asm_bitand(ASMState *as, IRIns *ir) emit_asb(as, PPCI_AND ^ dot, dest, left, right); } +static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + if (irref_isk(ir->op2)) { + int32_t k = IR(ir->op2)->i; + Reg tmp = left; + if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) { + if (!checku16(k)) { + emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16)); + if ((k & 0xffff) == 0) return; + } + emit_asi(as, pik, dest, left, k); + return; + } + } + /* May fail due to spills/restores above, but simplifies the logic. */ + if (as->flagmcp == as->mcp) { + as->flagmcp = NULL; + as->mcp++; + pi |= PPCF_DOT; + } + right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_asb(as, pi, dest, left, right); +} + +#define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI) +#define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI) + static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) { Reg dest, left; @@ -1608,9 +1735,48 @@ static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) } } +#define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0) +#define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1) +#define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI) +#define asm_brol(as, ir) \ + asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \ + PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)) +#define asm_bror(as, ir) lj_assertA(0, "unexpected BROR") + +#if LJ_SOFTFP +static void asm_sfpmin_max(ASMState *as, IRIns *ir) +{ + CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp]; + IRRef args[4]; + MCLabel l_right, l_end; + Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR); + Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR); + Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR); + PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE; + righthi = (lefthi >> 8); lefthi &= 255; + rightlo = (leftlo >> 8); leftlo &= 255; + args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; + args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; + l_end = emit_label(as); + if (desthi != righthi) emit_mr(as, desthi, righthi); + if (destlo != rightlo) emit_mr(as, destlo, rightlo); + l_right = emit_label(as); + if (l_end != l_right) emit_jmp(as, l_end); + if (desthi != lefthi) emit_mr(as, desthi, lefthi); + if (destlo != leftlo) emit_mr(as, destlo, leftlo); + if (l_right == as->mcp+1) { + cond ^= 4; l_right = l_end; ++as->mcp; + } + emit_condbranch(as, PPCI_BC, cond, l_right); + ra_evictset(as, RSET_SCRATCH); + emit_cmpi(as, RID_RET, 1); + asm_gencall(as, &ci, args); +} +#endif + static void asm_min_max(ASMState *as, IRIns *ir, int ismax) { - if (irt_isnum(ir->t)) { + if (!LJ_SOFTFP && irt_isnum(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); Reg tmp = dest; Reg right, left = ra_alloc2(as, ir, RSET_FPR); @@ -1618,9 +1784,8 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) if (tmp == left || tmp == right) tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR, dest), left), right)); - emit_facb(as, PPCI_FSEL, dest, tmp, - ismax ? left : right, ismax ? right : left); - emit_fab(as, PPCI_FSUB, tmp, left, right); + emit_facb(as, PPCI_FSEL, dest, tmp, left, right); + emit_fab(as, PPCI_FSUB, tmp, ismax ? left : right, ismax ? right : left); } else { Reg dest = ra_dest(as, ir, RSET_GPR); Reg tmp1 = RID_TMP, tmp2 = dest; @@ -1638,6 +1803,9 @@ static void asm_min_max(ASMState *as, IRIns *ir, int ismax) } } +#define asm_min(as, ir) asm_min_max(as, ir, 0) +#define asm_max(as, ir) asm_min_max(as, ir, 1) + /* -- Comparisons --------------------------------------------------------- */ #define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ @@ -1695,7 +1863,7 @@ static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc) static void asm_comp(ASMState *as, IRIns *ir) { PPCCC cc = asm_compmap[ir->o]; - if (irt_isnum(ir->t)) { + if (!LJ_SOFTFP && irt_isnum(ir->t)) { Reg right, left = ra_alloc2(as, ir, RSET_FPR); right = (left >> 8); left &= 255; asm_guardcc(as, (cc >> 4)); @@ -1714,6 +1882,46 @@ static void asm_comp(ASMState *as, IRIns *ir) } } +#define asm_equal(as, ir) asm_comp(as, ir) + +#if LJ_SOFTFP +/* SFP comparisons. */ +static void asm_sfpcomp(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; + RegSet drop = RSET_SCRATCH; + Reg r; + IRRef args[4]; + args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; + args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; + + for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { + if (!rset_test(as->freeset, r) && + regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) + rset_clear(drop, r); + } + ra_evictset(as, drop); + asm_setupresult(as, ir, ci); + switch ((IROp)ir->o) { + case IR_ULT: + asm_guardcc(as, CC_EQ); + emit_ai(as, PPCI_CMPWI, RID_RET, 0); + case IR_ULE: + asm_guardcc(as, CC_EQ); + emit_ai(as, PPCI_CMPWI, RID_RET, 1); + break; + case IR_GE: case IR_GT: + asm_guardcc(as, CC_EQ); + emit_ai(as, PPCI_CMPWI, RID_RET, 2); + default: + asm_guardcc(as, (asm_compmap[ir->o] & 0xf)); + emit_ai(as, PPCI_CMPWI, RID_RET, 0); + break; + } + asm_gencall(as, ci, args); +} +#endif + #if LJ_HASFFI /* 64 bit integer comparisons. */ static void asm_comp64(ASMState *as, IRIns *ir) @@ -1738,50 +1946,87 @@ static void asm_comp64(ASMState *as, IRIns *ir) } #endif -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ +/* -- Split register ops -------------------------------------------------- */ -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ +/* Hiword op of a split 32/32 bit op. Previous op is be the loword op. */ static void asm_hiop(ASMState *as, IRIns *ir) { -#if LJ_HASFFI /* HIOP is marked as a store because it needs its own DCE logic. */ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; +#if LJ_HASFFI || LJ_SOFTFP if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ as->curins--; /* Always skip the CONV. */ +#if LJ_HASFFI && !LJ_SOFTFP if (usehi || uselo) asm_conv64(as, ir); return; +#endif } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ as->curins--; /* Always skip the loword comparison. */ +#if LJ_SOFTFP + if (!irt_isint(ir->t)) { + asm_sfpcomp(as, ir-1); + return; + } +#endif +#if LJ_HASFFI asm_comp64(as, ir); +#endif return; +#if LJ_SOFTFP + } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { + as->curins--; /* Always skip the loword min/max. */ + if (uselo || usehi) + asm_sfpmin_max(as, ir-1); + return; +#endif } else if ((ir-1)->o == IR_XSTORE) { as->curins--; /* Handle both stores here. */ if ((ir-1)->r != RID_SINK) { - asm_xstore(as, ir, 0); - asm_xstore(as, ir-1, 4); + asm_xstore_(as, ir, 0); + asm_xstore_(as, ir-1, 4); } return; } +#endif if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ switch ((ir-1)->o) { +#if LJ_HASFFI case IR_ADD: as->curins--; asm_add64(as, ir); break; case IR_SUB: as->curins--; asm_sub64(as, ir); break; case IR_NEG: as->curins--; asm_neg64(as, ir); break; - case IR_CALLN: - case IR_CALLXS: + case IR_CNEWI: + /* Nothing to do here. Handled by lo op itself. */ + break; +#endif +#if LJ_SOFTFP + case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: + case IR_STRTO: if (!uselo) - ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ + ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ break; - case IR_CNEWI: + case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF: /* Nothing to do here. Handled by lo op itself. */ break; - default: lua_assert(0); break; - } -#else - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused without FFI. */ #endif + case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: + if (!uselo) + ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ + break; + default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; + } +} + +/* -- Profiling ----------------------------------------------------------- */ + +static void asm_prof(ASMState *as, IRIns *ir) +{ + UNUSED(ir); + asm_guardcc(as, CC_NE); + emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE); + emit_lsglptr(as, PPCI_LBZ, RID_TMP, + (int32_t)offsetof(global_State, hookmask)); } /* -- Stack handling ------------------------------------------------------ */ @@ -1805,7 +2050,7 @@ static void asm_stack_check(ASMState *as, BCReg topslot, emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); if (pbase == RID_TMP) emit_getgl(as, RID_TMP, jit_base); - emit_getgl(as, tmp, jit_L); + emit_getgl(as, tmp, cur_L); if (allow == RSET_EMPTY) /* Spill temp. register. */ emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); } @@ -1826,12 +2071,25 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) if ((sn & SNAP_NORESTORE)) continue; if (irt_isnum(ir->t)) { +#if LJ_SOFTFP + Reg tmp; + RegSet allow = rset_exclude(RSET_GPR, RID_BASE); + /* LJ_SOFTFP: must be a number constant. */ + lj_assertA(irref_isk(ref), "unsplit FP op"); + tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); + emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); + if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); + tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); + emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); +#else Reg src = ra_alloc1(as, ref, RSET_FPR); emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); +#endif } else { Reg type; RegSet allow = rset_exclude(RSET_GPR, RID_BASE); - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "restore of IR type %d", irt_type(ir->t)); if (!irt_ispri(ir->t)) { Reg src = ra_alloc1(as, ref, allow); rset_clear(allow, src); @@ -1840,6 +2098,12 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) if ((sn & (SNAP_CONT|SNAP_FRAME))) { if (s == 0) continue; /* Do not overwrite link to previous frame. */ type = ra_allock(as, (int32_t)(*flinks--), allow); +#if LJ_SOFTFP + } else if ((sn & SNAP_SOFTFPNUM)) { + type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); +#endif + } else if ((sn & SNAP_KEYINDEX)) { + type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow); } else { type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); } @@ -1847,7 +2111,7 @@ static void asm_stack_restore(ASMState *as, SnapShot *snap) } checkmclim(as); } - lua_assert(map + nent == flinks); + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); } /* -- GC handling --------------------------------------------------------- */ @@ -1898,6 +2162,12 @@ static void asm_loop_fixup(ASMState *as) } } +/* Fixup the tail of the loop. */ +static void asm_loop_tail_fixup(ASMState *as) +{ + UNUSED(as); /* Nothing to do. */ +} + /* -- Head of trace ------------------------------------------------------- */ /* Coalesce BASE register for a root trace. */ @@ -1949,7 +2219,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) as->mctop = p; } else { /* Patch stack adjustment. */ - lua_assert(checki16(CFRAME_SIZE+spadj)); + lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range"); p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; } @@ -1970,147 +2240,25 @@ static void asm_tail_prep(ASMState *as) } } -/* -- Instruction dispatch ------------------------------------------------ */ - -/* Assemble a single instruction. */ -static void asm_ir(ASMState *as, IRIns *ir) -{ - switch ((IROp)ir->o) { - /* Miscellaneous ops. */ - case IR_LOOP: asm_loop(as); break; - case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; - case IR_USE: - ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; - case IR_PHI: asm_phi(as, ir); break; - case IR_HIOP: asm_hiop(as, ir); break; - case IR_GCSTEP: asm_gcstep(as, ir); break; - - /* Guarded assertions. */ - case IR_EQ: case IR_NE: - if ((ir-1)->o == IR_HREF && ir->op1 == as->curins-1) { - as->curins--; - asm_href(as, ir-1, (IROp)ir->o); - break; - } - /* fallthrough */ - case IR_LT: case IR_GE: case IR_LE: case IR_GT: - case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: - case IR_ABC: - asm_comp(as, ir); - break; - - case IR_RETF: asm_retf(as, ir); break; - - /* Bit ops. */ - case IR_BNOT: asm_bitnot(as, ir); break; - case IR_BSWAP: asm_bitswap(as, ir); break; - - case IR_BAND: asm_bitand(as, ir); break; - case IR_BOR: asm_bitop(as, ir, PPCI_OR, PPCI_ORI); break; - case IR_BXOR: asm_bitop(as, ir, PPCI_XOR, PPCI_XORI); break; - - case IR_BSHL: asm_bitshift(as, ir, PPCI_SLW, 0); break; - case IR_BSHR: asm_bitshift(as, ir, PPCI_SRW, 1); break; - case IR_BSAR: asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI); break; - case IR_BROL: asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), - PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)); break; - case IR_BROR: lua_assert(0); break; - - /* Arithmetic ops. */ - case IR_ADD: asm_add(as, ir); break; - case IR_SUB: asm_sub(as, ir); break; - case IR_MUL: asm_mul(as, ir); break; - case IR_DIV: asm_fparith(as, ir, PPCI_FDIV); break; - case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break; - case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break; - case IR_NEG: asm_neg(as, ir); break; - - case IR_ABS: asm_fpunary(as, ir, PPCI_FABS); break; - case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break; - case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break; - case IR_MIN: asm_min_max(as, ir, 0); break; - case IR_MAX: asm_min_max(as, ir, 1); break; - case IR_FPMATH: - if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) - break; - if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) - asm_fpunary(as, ir, PPCI_FSQRT); - else - asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); - break; - - /* Overflow-checking arithmetic ops. */ - case IR_ADDOV: asm_arithov(as, ir, PPCI_ADDO); break; - case IR_SUBOV: asm_arithov(as, ir, PPCI_SUBFO); break; - case IR_MULOV: asm_arithov(as, ir, PPCI_MULLWO); break; - - /* Memory references. */ - case IR_AREF: asm_aref(as, ir); break; - case IR_HREF: asm_href(as, ir, 0); break; - case IR_HREFK: asm_hrefk(as, ir); break; - case IR_NEWREF: asm_newref(as, ir); break; - case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; - case IR_FREF: asm_fref(as, ir); break; - case IR_STRREF: asm_strref(as, ir); break; - - /* Loads and stores. */ - case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - asm_ahuvload(as, ir); - break; - case IR_FLOAD: asm_fload(as, ir); break; - case IR_XLOAD: asm_xload(as, ir); break; - case IR_SLOAD: asm_sload(as, ir); break; - - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; - case IR_FSTORE: asm_fstore(as, ir); break; - case IR_XSTORE: asm_xstore(as, ir, 0); break; - - /* Allocations. */ - case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; - case IR_TNEW: asm_tnew(as, ir); break; - case IR_TDUP: asm_tdup(as, ir); break; - case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; - - /* Write barriers. */ - case IR_TBAR: asm_tbar(as, ir); break; - case IR_OBAR: asm_obar(as, ir); break; - - /* Type conversions. */ - case IR_CONV: asm_conv(as, ir); break; - case IR_TOBIT: asm_tobit(as, ir); break; - case IR_TOSTR: asm_tostr(as, ir); break; - case IR_STRTO: asm_strto(as, ir); break; - - /* Calls. */ - case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; - case IR_CALLXS: asm_callx(as, ir); break; - case IR_CARG: break; - - default: - setintV(&as->J->errinfo, ir->o); - lj_trace_err_info(as->J, LJ_TRERR_NYIIR); - break; - } -} - /* -- Trace setup --------------------------------------------------------- */ /* Ensure there are enough stack slots for call arguments. */ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) { IRRef args[CCI_NARGS_MAX*2]; - uint32_t i, nargs = (int)CCI_NARGS(ci); + uint32_t i, nargs = CCI_XNARGS(ci); int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; asm_collectargs(as, ir, ci, args); for (i = 0; i < nargs; i++) - if (args[i] && irt_isfp(IR(args[i])->t)) { + if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) { if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; } else { if (ngpr > 0) ngpr--; else nslots++; } if (nslots > as->evenspill) /* Leave room for args in stack slots. */ as->evenspill = nslots; - return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); + return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) : + REGSP_HINT(RID_RET); } static void asm_setup_target(ASMState *as) @@ -2150,7 +2298,8 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) } else if ((ins & 0xfc000000u) == PPCI_B && ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) { ptrdiff_t delta = (char *)target - (char *)p; - lua_assert(((delta + 0x02000000) >> 26) == 0); + lj_assertJ(((delta + 0x02000000) >> 26) == 0, + "branch target out of range"); *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu); if (!cstart) cstart = p; } @@ -2158,7 +2307,8 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) /* Always patch long-range branch in exit stub itself. Except, if we can't. */ if (patchlong) { ptrdiff_t delta = (char *)target - (char *)px - clearso; - lua_assert(((delta + 0x02000000) >> 26) == 0); + lj_assertJ(((delta + 0x02000000) >> 26) == 0, + "branch target out of range"); *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu); } if (!cstart) cstart = px; diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index 8b529086..2bf9d939 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h @@ -21,15 +21,17 @@ static MCode *asm_exitstub_gen(ASMState *as, ExitNo group) } /* Push the high byte of the exitno for each exit stub group. */ *mxp++ = XI_PUSHi8; *mxp++ = (MCode)((group*EXITSTUBS_PER_GROUP)>>8); +#if !LJ_GC64 /* Store DISPATCH at original stack slot 0. Account for the two push ops. */ *mxp++ = XI_MOVmi; *mxp++ = MODRM(XM_OFS8, 0, RID_ESP); *mxp++ = MODRM(XM_SCALE1, RID_ESP, RID_ESP); *mxp++ = 2*sizeof(void *); *(int32_t *)mxp = ptr2addr(J2GG(as->J)->dispatch); mxp += 4; +#endif /* Jump to exit handler which fills in the ExitState. */ *mxp++ = XI_JMP; mxp += 4; - *((int32_t *)(mxp-4)) = jmprel(mxp, (MCode *)(void *)lj_vm_exit_handler); + *((int32_t *)(mxp-4)) = jmprel(as->J, mxp, (MCode *)(void *)lj_vm_exit_handler); /* Commit the code for this group (even if assembly fails later on). */ lj_mcode_commitbot(as->J, mxp); as->mcbot = mxp; @@ -58,14 +60,18 @@ static void asm_guardcc(ASMState *as, int cc) MCode *p = as->mcp; if (LJ_UNLIKELY(p == as->invmcp)) { as->loopinv = 1; - *(int32_t *)(p+1) = jmprel(p+5, target); + *(int32_t *)(p+1) = jmprel(as->J, p+5, target); target = p; cc ^= 1; if (as->realign) { + if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP)) + as->mrm.ofs += 2; /* Fixup RIP offset for pending fused load. */ emit_sjcc(as, cc, target); return; } } + if (LJ_GC64 && LJ_UNLIKELY(as->mrm.base == RID_RIP)) + as->mrm.ofs += 6; /* Fixup RIP offset for pending fused load. */ emit_jcc(as, cc, target); } @@ -79,6 +85,15 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) { if (irref_isk(ref)) { IRIns *ir = IR(ref); +#if LJ_GC64 + if (ir->o == IR_KNULL || !irt_is64(ir->t)) { + *k = ir->i; + return 1; + } else if (checki32((int64_t)ir_k64(ir)->u64)) { + *k = (int32_t)ir_k64(ir)->u64; + return 1; + } +#else if (ir->o != IR_KINT64) { *k = ir->i; return 1; @@ -86,6 +101,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k) *k = (int32_t)ir_kint64(ir)->u64; return 1; } +#endif } return 0; } @@ -115,7 +131,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref) as->mrm.ofs = 0; if (irb->o == IR_FLOAD) { IRIns *ira = IR(irb->op1); - lua_assert(irb->op2 == IRFL_TAB_ARRAY); + lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY"); /* We can avoid the FLOAD of t->array for colocated arrays. */ if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE && !neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) { @@ -134,7 +150,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref) static void asm_fusearef(ASMState *as, IRIns *ir, RegSet allow) { IRIns *irx; - lua_assert(ir->o == IR_AREF); + lj_assertA(ir->o == IR_AREF, "expected AREF"); as->mrm.base = (uint8_t)ra_alloc1(as, asm_fuseabase(as, ir->op1), allow); irx = IR(ir->op2); if (irref_isk(ir->op2)) { @@ -185,14 +201,32 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; +#if LJ_GC64 + int64_t ofs = dispofs(as, &uv->tv); + if (checki32(ofs) && checki32(ofs+4)) { + as->mrm.ofs = (int32_t)ofs; + as->mrm.base = RID_DISPATCH; + as->mrm.idx = RID_NONE; + return; + } +#else as->mrm.ofs = ptr2addr(&uv->tv); as->mrm.base = as->mrm.idx = RID_NONE; return; +#endif } break; + case IR_TMPREF: +#if LJ_GC64 + as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->tmptv); + as->mrm.base = RID_DISPATCH; + as->mrm.idx = RID_NONE; +#else + as->mrm.ofs = igcptr(&J2G(as->J)->tmptv); + as->mrm.base = as->mrm.idx = RID_NONE; +#endif + return; default: - lua_assert(ir->o == IR_HREF || ir->o == IR_NEWREF || ir->o == IR_UREFO || - ir->o == IR_KKPTR); break; } } @@ -204,26 +238,53 @@ static void asm_fuseahuref(ASMState *as, IRRef ref, RegSet allow) /* Fuse FLOAD/FREF reference into memory operand. */ static void asm_fusefref(ASMState *as, IRIns *ir, RegSet allow) { - lua_assert(ir->o == IR_FLOAD || ir->o == IR_FREF); - as->mrm.ofs = field_ofs[ir->op2]; + lj_assertA(ir->o == IR_FLOAD || ir->o == IR_FREF, + "bad IR op %d", ir->o); as->mrm.idx = RID_NONE; + if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ +#if LJ_GC64 + as->mrm.ofs = (int32_t)(ir->op2 << 2) - GG_OFS(dispatch); + as->mrm.base = RID_DISPATCH; +#else + as->mrm.ofs = (int32_t)(ir->op2 << 2) + ptr2addr(J2GG(as->J)); + as->mrm.base = RID_NONE; +#endif + return; + } + as->mrm.ofs = field_ofs[ir->op2]; if (irref_isk(ir->op1)) { - as->mrm.ofs += IR(ir->op1)->i; + IRIns *op1 = IR(ir->op1); +#if LJ_GC64 + if (ir->op1 == REF_NIL) { + as->mrm.ofs -= GG_OFS(dispatch); + as->mrm.base = RID_DISPATCH; + return; + } else if (op1->o == IR_KPTR || op1->o == IR_KKPTR) { + intptr_t ofs = dispofs(as, ir_kptr(op1)); + if (checki32(as->mrm.ofs + ofs)) { + as->mrm.ofs += (int32_t)ofs; + as->mrm.base = RID_DISPATCH; + return; + } + } +#else + as->mrm.ofs += op1->i; as->mrm.base = RID_NONE; - } else { - as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); + return; +#endif } + as->mrm.base = (uint8_t)ra_alloc1(as, ir->op1, allow); } /* Fuse string reference into memory operand. */ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow) { IRIns *irr; - lua_assert(ir->o == IR_STRREF); + lj_assertA(ir->o == IR_STRREF, "bad IR op %d", ir->o); as->mrm.base = as->mrm.idx = RID_NONE; as->mrm.scale = XM_SCALE1; as->mrm.ofs = sizeof(GCstr); - if (irref_isk(ir->op1)) { + if (!LJ_GC64 && irref_isk(ir->op1)) { as->mrm.ofs += IR(ir->op1)->i; } else { Reg r = ra_alloc1(as, ir->op1, allow); @@ -255,10 +316,20 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) IRIns *ir = IR(ref); as->mrm.idx = RID_NONE; if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { +#if LJ_GC64 + intptr_t ofs = dispofs(as, ir_kptr(ir)); + if (checki32(ofs)) { + as->mrm.ofs = (int32_t)ofs; + as->mrm.base = RID_DISPATCH; + return; + } + } if (0) { +#else as->mrm.ofs = ir->i; as->mrm.base = RID_NONE; } else if (ir->o == IR_STRREF) { asm_fusestrref(as, ir, allow); +#endif } else { as->mrm.ofs = 0; if (canfuse(as, ir) && ir->o == IR_ADD && ra_noreg(ir->r)) { @@ -301,7 +372,47 @@ static void asm_fusexref(ASMState *as, IRRef ref, RegSet allow) } } -/* Fuse load into memory operand. */ +/* Fuse load of 64 bit IR constant into memory operand. */ +static Reg asm_fuseloadk64(ASMState *as, IRIns *ir) +{ + const uint64_t *k = &ir_k64(ir)->u64; + if (!LJ_GC64 || checki32((intptr_t)k)) { + as->mrm.ofs = ptr2addr(k); + as->mrm.base = RID_NONE; +#if LJ_GC64 + } else if (checki32(dispofs(as, k))) { + as->mrm.ofs = (int32_t)dispofs(as, k); + as->mrm.base = RID_DISPATCH; + } else if (checki32(mcpofs(as, k)) && checki32(mcpofs(as, k+1)) && + checki32(mctopofs(as, k)) && checki32(mctopofs(as, k+1))) { + as->mrm.ofs = (int32_t)mcpofs(as, k); + as->mrm.base = RID_RIP; + } else { /* Intern 64 bit constant at bottom of mcode. */ + if (ir->i) { + lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i), + "bad interned 64 bit constant"); + } else { + while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; + *(uint64_t*)as->mcbot = *k; + ir->i = (int32_t)(as->mctop - as->mcbot); + as->mcbot += 8; + as->mclim = as->mcbot + MCLIM_REDZONE; + lj_mcode_commitbot(as->J, as->mcbot); + } + as->mrm.ofs = (int32_t)mcpofs(as, as->mctop - ir->i); + as->mrm.base = RID_RIP; +#endif + } + as->mrm.idx = RID_NONE; + return RID_MRM; +} + +/* Fuse load into memory operand. +** +** Important caveat: this may emit RIP-relative loads! So don't place any +** code emitters between this function and the use of its result. +** The only permitted exception is asm_guardcc(). +*/ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) { IRIns *ir = IR(ref); @@ -319,27 +430,36 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) } if (ir->o == IR_KNUM) { RegSet avail = as->freeset & ~as->modset & RSET_FPR; - lua_assert(allow != RSET_EMPTY); - if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ - as->mrm.ofs = ptr2addr(ir_knum(ir)); - as->mrm.base = as->mrm.idx = RID_NONE; - return RID_MRM; - } + lj_assertA(allow != RSET_EMPTY, "no register allowed"); + if (!(avail & (avail-1))) /* Fuse if less than two regs available. */ + return asm_fuseloadk64(as, ir); } else if (ref == REF_BASE || ir->o == IR_KINT64) { RegSet avail = as->freeset & ~as->modset & RSET_GPR; - lua_assert(allow != RSET_EMPTY); + lj_assertA(allow != RSET_EMPTY, "no register allowed"); if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */ - as->mrm.ofs = ptr2addr(ref == REF_BASE ? (void *)&J2G(as->J)->jit_base : (void *)ir_kint64(ir)); - as->mrm.base = as->mrm.idx = RID_NONE; - return RID_MRM; + if (ref == REF_BASE) { +#if LJ_GC64 + as->mrm.ofs = (int32_t)dispofs(as, &J2G(as->J)->jit_base); + as->mrm.base = RID_DISPATCH; +#else + as->mrm.ofs = ptr2addr(&J2G(as->J)->jit_base); + as->mrm.base = RID_NONE; +#endif + as->mrm.idx = RID_NONE; + return RID_MRM; + } else { + return asm_fuseloadk64(as, ir); + } } } else if (mayfuse(as, ref)) { RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR; if (ir->o == IR_SLOAD) { if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) && - noconflict(as, ref, IR_RETF, 0)) { + noconflict(as, ref, IR_RETF, 0) && + !(LJ_GC64 && irt_isaddr(ir->t))) { as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow); - as->mrm.ofs = 8*((int32_t)ir->op1-1) + ((ir->op2&IRSLOAD_FRAME)?4:0); + as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + + (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); as->mrm.idx = RID_NONE; return RID_MRM; } @@ -351,7 +471,8 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) return RID_MRM; } } else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) { - if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0)) { + if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) && + !(LJ_GC64 && irt_isaddr(ir->t))) { asm_fuseahuref(as, ir->op1, xallow); return RID_MRM; } @@ -364,11 +485,17 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow) asm_fusexref(as, ir->op1, xallow); return RID_MRM; } - } else if (ir->o == IR_VLOAD) { + } else if (ir->o == IR_VLOAD && IR(ir->op1)->o == IR_AREF && + !(LJ_GC64 && irt_isaddr(ir->t))) { asm_fuseahuref(as, ir->op1, xallow); + as->mrm.ofs += 8 * ir->op2; return RID_MRM; } } + if (ir->o == IR_FLOAD && ir->op1 == REF_NIL) { + asm_fusefref(as, ir, RSET_EMPTY); + return RID_MRM; + } if (!(as->freeset & allow) && !emit_canremat(ref) && (allow == RSET_EMPTY || ra_hasspill(ir->s) || iscrossref(as, ref))) goto fusespill; @@ -392,7 +519,7 @@ static Reg asm_fuseloadm(ASMState *as, IRRef ref, RegSet allow, int is64) /* Count the required number of stack slots for a call. */ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) { - uint32_t i, nargs = CCI_NARGS(ci); + uint32_t i, nargs = CCI_XNARGS(ci); int nslots = 0; #if LJ_64 if (LJ_ABI_WIN) { @@ -425,7 +552,7 @@ static int asm_count_call_slots(ASMState *as, const CCallInfo *ci, IRRef *args) /* Generate a call to a C function. */ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) { - uint32_t n, nargs = CCI_NARGS(ci); + uint32_t n, nargs = CCI_XNARGS(ci); int32_t ofs = STACKARG_OFS; #if LJ_64 uint32_t gprs = REGARG_GPRS; @@ -485,13 +612,14 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) if (r) { /* Argument is in a register. */ if (r < RID_MAX_GPR && ref < ASMREF_TMP1) { #if LJ_64 - if (ir->o == IR_KINT64) - emit_loadu64(as, r, ir_kint64(ir)->u64); + if (LJ_GC64 ? !(ir->o == IR_KINT || ir->o == IR_KNULL) : ir->o == IR_KINT64) + emit_loadu64(as, r, ir_k64(ir)->u64); else #endif emit_loadi(as, r, ir->i); } else { - lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */ + /* Must have been evicted. */ + lj_assertA(rset_test(as->freeset, r), "reg %d not free", r); if (ra_hasreg(ir->r)) { ra_noweak(as, ir->r); emit_movrr(as, ir, r, ir->r); @@ -500,7 +628,8 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) } } } else if (irt_isfp(ir->t)) { /* FP argument is on stack. */ - lua_assert(!(irt_isfloat(ir->t) && irref_isk(ref))); /* No float k. */ + lj_assertA(!(irt_isfloat(ir->t) && irref_isk(ref)), + "unexpected float constant"); if (LJ_32 && (ofs & 4) && irref_isk(ref)) { /* Split stores for unaligned FP consts. */ emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo); @@ -531,7 +660,7 @@ static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) { RegSet drop = RSET_SCRATCH; - int hiop = (LJ_32 && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); + int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); if ((ci->flags & CCI_NOFPRCLOBBER)) drop &= ~RSET_FPR; if (ra_hasreg(ir->r)) @@ -560,7 +689,7 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) if (ra_hasreg(dest)) { ra_free(as, dest); ra_modified(as, dest); - emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, + emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs); } if ((ci->flags & CCI_CASTU64)) { @@ -571,12 +700,10 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); } #endif -#if LJ_32 } else if (hiop) { ra_destpair(as, ir); -#endif } else { - lua_assert(!irt_ispri(ir->t)); + lj_assertA(!irt_ispri(ir->t), "PRI dest"); ra_destreg(as, ir, RID_RET); } } else if (LJ_32 && irt_isfp(ir->t) && !(ci->flags & CCI_CASTU64)) { @@ -584,15 +711,6 @@ static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) } } -static void asm_call(ASMState *as, IRIns *ir) -{ - IRRef args[CCI_NARGS_MAX]; - const CCallInfo *ci = &lj_ir_callinfo[ir->op2]; - asm_collectargs(as, ir, ci, args); - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); -} - /* Return a constant function pointer or NULL for indirect calls. */ static void *asm_callx_func(ASMState *as, IRIns *irf, IRRef func) { @@ -651,16 +769,39 @@ static void asm_callx(ASMState *as, IRIns *ir) static void asm_retf(ASMState *as, IRIns *ir) { Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); +#if LJ_FR2 + Reg rpc = ra_scratch(as, rset_exclude(RSET_GPR, base)); +#endif void *pc = ir_kptr(IR(ir->op2)); - int32_t delta = 1+bc_a(*((const BCIns *)pc - 1)); + int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); as->topslot -= (BCReg)delta; if ((int32_t)as->topslot < 0) as->topslot = 0; irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ emit_setgl(as, base, jit_base); emit_addptr(as, base, -8*delta); asm_guardcc(as, CC_NE); +#if LJ_FR2 + emit_rmro(as, XO_CMP, rpc|REX_GC64, base, -8); + emit_loadu64(as, rpc, u64ptr(pc)); +#else emit_gmroi(as, XG_ARITHi(XOg_CMP), base, -4, ptr2addr(pc)); +#endif +} + +/* -- Buffer operations --------------------------------------------------- */ + +#if LJ_HASBUFFER +static void asm_bufhdr_write(ASMState *as, Reg sb) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); + IRIns irgc; + irgc.ot = IRT(0, IRT_PGC); /* GC type. */ + emit_storeofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); + emit_opgl(as, XO_ARITH(XOg_OR), tmp|REX_GC64, cur_L); + emit_gri(as, XG_ARITHi(XOg_AND), tmp, SBUF_MASK_FLAG); + emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); } +#endif /* -- Type conversions ---------------------------------------------------- */ @@ -672,8 +813,7 @@ static void asm_tointg(ASMState *as, IRIns *ir, Reg left) asm_guardcc(as, CC_NE); emit_rr(as, XO_UCOMISD, left, tmp); emit_rr(as, XO_CVTSI2SD, tmp, dest); - if (!(as->flags & JIT_F_SPLIT_XMM)) - emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ + emit_rr(as, XO_XORPS, tmp, tmp); /* Avoid partial register stall. */ emit_rr(as, XO_CVTTSD2SI, dest, left); /* Can't fuse since left is needed twice. */ } @@ -684,8 +824,9 @@ static void asm_tobit(ASMState *as, IRIns *ir) Reg tmp = ra_noreg(IR(ir->op1)->r) ? ra_alloc1(as, ir->op1, RSET_FPR) : ra_scratch(as, RSET_FPR); - Reg right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); + Reg right; emit_rr(as, XO_MOVDto, tmp, dest); + right = asm_fuseload(as, ir->op2, rset_exclude(RSET_FPR, tmp)); emit_mrm(as, XO_ADDSD, tmp, right); ra_left(as, tmp, ir->op1); } @@ -696,8 +837,10 @@ static void asm_conv(ASMState *as, IRIns *ir) int st64 = (st == IRT_I64 || st == IRT_U64 || (LJ_64 && st == IRT_P64)); int stfp = (st == IRT_NUM || st == IRT_FLOAT); IRRef lref = ir->op1; - lua_assert(irt_type(ir->t) != st); - lua_assert(!(LJ_32 && (irt_isint64(ir->t) || st64))); /* Handled by SPLIT. */ + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); + lj_assertA(!(LJ_32 && (irt_isint64(ir->t) || st64)), + "IR %04d has unsplit 64 bit type", + (int)(ir - as->ir) - REF_BIAS); if (irt_isfp(ir->t)) { Reg dest = ra_dest(as, ir, RSET_FPR); if (stfp) { /* FP to FP conversion. */ @@ -706,13 +849,13 @@ static void asm_conv(ASMState *as, IRIns *ir) if (left == dest) return; /* Avoid the XO_XORPS. */ } else if (LJ_32 && st == IRT_U32) { /* U32 to FP conversion on x86. */ /* number = (2^52+2^51 .. u32) - (2^52+2^51) */ - cTValue *k = lj_ir_k64_find(as->J, U64x(43380000,00000000)); + cTValue *k = &as->J->k64[LJ_K64_TOBIT]; Reg bias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); if (irt_isfloat(ir->t)) emit_rr(as, XO_CVTSD2SS, dest, dest); emit_rr(as, XO_SUBSD, dest, bias); /* Subtract 2^52+2^51 bias. */ emit_rr(as, XO_XORPS, dest, bias); /* Merge bias and integer. */ - emit_loadn(as, bias, k); + emit_rma(as, XO_MOVSD, bias, k); emit_mrm(as, XO_MOVD, dest, asm_fuseload(as, lref, RSET_GPR)); return; } else { /* Integer to FP conversion. */ @@ -721,7 +864,7 @@ static void asm_conv(ASMState *as, IRIns *ir) asm_fuseloadm(as, lref, RSET_GPR, st64); if (LJ_64 && st == IRT_U64) { MCLabel l_end = emit_label(as); - const void *k = lj_ir_k64_find(as->J, U64x(43f00000,00000000)); + cTValue *k = &as->J->k64[LJ_K64_2P64]; emit_rma(as, XO_ADDSD, dest, k); /* Add 2^64 to compensate. */ emit_sjcc(as, CC_NS, l_end); emit_rr(as, XO_TEST, left|REX_64, left); /* Check if u64 >= 2^63. */ @@ -729,18 +872,16 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_mrm(as, irt_isnum(ir->t) ? XO_CVTSI2SD : XO_CVTSI2SS, dest|((LJ_64 && (st64 || st == IRT_U32)) ? REX_64 : 0), left); } - if (!(as->flags & JIT_F_SPLIT_XMM)) - emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ + emit_rr(as, XO_XORPS, dest, dest); /* Avoid partial register stall. */ } else if (stfp) { /* FP to integer conversion. */ if (irt_isguard(ir->t)) { /* Checked conversions are only supported from number to int. */ - lua_assert(irt_isint(ir->t) && st == IRT_NUM); + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); } else { Reg dest = ra_dest(as, ir, RSET_GPR); - x86Op op = st == IRT_NUM ? - ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSD2SI : XO_CVTSD2SI) : - ((ir->op2 & IRCONV_TRUNC) ? XO_CVTTSS2SI : XO_CVTSS2SI); + x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI; if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) { /* LJ_64: For inputs >= 2^63 add -2^64, convert again. */ /* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */ @@ -751,30 +892,27 @@ static void asm_conv(ASMState *as, IRIns *ir) emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000); emit_rr(as, op, dest|REX_64, tmp); if (st == IRT_NUM) - emit_rma(as, XO_ADDSD, tmp, lj_ir_k64_find(as->J, - LJ_64 ? U64x(c3f00000,00000000) : U64x(c1e00000,00000000))); + emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]); else - emit_rma(as, XO_ADDSS, tmp, lj_ir_k64_find(as->J, - LJ_64 ? U64x(00000000,df800000) : U64x(00000000,cf000000))); + emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]); emit_sjcc(as, CC_NS, l_end); emit_rr(as, XO_TEST, dest|REX_64, dest); /* Check if dest negative. */ emit_rr(as, op, dest|REX_64, tmp); ra_left(as, tmp, lref); } else { - Reg left = asm_fuseload(as, lref, RSET_FPR); if (LJ_64 && irt_isu32(ir->t)) emit_rr(as, XO_MOV, dest, dest); /* Zero hiword. */ emit_mrm(as, op, dest|((LJ_64 && (irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0), - left); + asm_fuseload(as, lref, RSET_FPR)); } } } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ Reg left, dest = ra_dest(as, ir, RSET_GPR); RegSet allow = RSET_GPR; x86Op op; - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); if (st == IRT_I8) { op = XO_MOVSXb; allow = RSET_GPR8; dest |= FORCE_REX; } else if (st == IRT_U8) { @@ -808,7 +946,7 @@ static void asm_conv(ASMState *as, IRIns *ir) } } else { Reg dest = ra_dest(as, ir, RSET_GPR); - if (st64) { + if (st64 && !(ir->op2 & IRCONV_NONE)) { Reg left = asm_fuseload(as, lref, RSET_GPR); /* This is either a 32 bit reg/reg mov which zeroes the hiword ** or a load of the loword from a 64 bit address. @@ -834,20 +972,18 @@ static void asm_conv_fp_int64(ASMState *as, IRIns *ir) if (ra_hasreg(dest)) { ra_free(as, dest); ra_modified(as, dest); - emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, - dest, RID_ESP, ofs); + emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, dest, RID_ESP, ofs); } emit_rmro(as, irt_isnum(ir->t) ? XO_FSTPq : XO_FSTPd, irt_isnum(ir->t) ? XOg_FSTPq : XOg_FSTPd, RID_ESP, ofs); if (((ir-1)->op2 & IRCONV_SRCMASK) == IRT_U64) { /* For inputs in [2^63,2^64-1] add 2^64 to compensate. */ MCLabel l_end = emit_label(as); - emit_rma(as, XO_FADDq, XOg_FADDq, - lj_ir_k64_find(as->J, U64x(43f00000,00000000))); + emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_2P64]); emit_sjcc(as, CC_NS, l_end); emit_rr(as, XO_TEST, hi, hi); /* Check if u64 >= 2^63. */ } else { - lua_assert(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64); + lj_assertA(((ir-1)->op2 & IRCONV_SRCMASK) == IRT_I64, "bad type for CONV"); } emit_rmro(as, XO_FILDq, XOg_FILDq, RID_ESP, 0); /* NYI: Avoid narrow-to-wide store-to-load forwarding stall. */ @@ -861,9 +997,8 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK); IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH); Reg lo, hi; - lua_assert(st == IRT_NUM || st == IRT_FLOAT); - lua_assert(dt == IRT_I64 || dt == IRT_U64); - lua_assert(((ir-1)->op2 & IRCONV_TRUNC)); + lj_assertA(st == IRT_NUM || st == IRT_FLOAT, "bad type for CONV"); + lj_assertA(dt == IRT_I64 || dt == IRT_U64, "bad type for CONV"); hi = ra_dest(as, ir, RSET_GPR); lo = ra_dest(as, ir-1, rset_exclude(RSET_GPR, hi)); if (ra_used(ir-1)) emit_rmro(as, XO_MOV, lo, RID_ESP, 0); @@ -884,8 +1019,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0); else emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0); - emit_rma(as, XO_FADDq, XOg_FADDq, - lj_ir_k64_find(as->J, U64x(c3f00000,00000000))); + emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]); emit_sjcc(as, CC_NS, l_pop); emit_rr(as, XO_TEST, hi, hi); /* Check if out-of-range (2^63). */ } @@ -906,6 +1040,14 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir) st == IRT_NUM ? XOg_FLDq: XOg_FLDd, asm_fuseload(as, ir->op1, RSET_EMPTY)); } + +static void asm_conv64(ASMState *as, IRIns *ir) +{ + if (irt_isfp(ir->t)) + asm_conv_fp_int64(as, ir); + else + asm_conv_int64_fp(as, ir); +} #endif static void asm_strto(ASMState *as, IRIns *ir) @@ -927,54 +1069,61 @@ static void asm_strto(ASMState *as, IRIns *ir) RID_ESP, sps_scale(ir->s)); } -static void asm_tostr(ASMState *as, IRIns *ir) +/* -- Memory references --------------------------------------------------- */ + +/* Get pointer to TValue. */ +static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) { - IRIns *irl = IR(ir->op1); - IRRef args[2]; - args[0] = ASMREF_L; - as->gcsteps++; - if (irt_isnum(irl->t)) { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum]; - args[1] = ASMREF_TMP1; /* const lua_Number * */ - asm_setupresult(as, ir, ci); /* GCstr * */ - asm_gencall(as, ci, args); - emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1)|REX_64, - RID_ESP, ra_spill(as, irl)); - } else { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint]; - args[1] = ir->op1; /* int32_t k */ - asm_setupresult(as, ir, ci); /* GCstr * */ - asm_gencall(as, ci, args); + if ((mode & IRTMPREF_IN1)) { + IRIns *ir = IR(ref); + if (irt_isnum(ir->t)) { + if (irref_isk(ref) && !(mode & IRTMPREF_OUT1)) { + /* Use the number constant itself as a TValue. */ + emit_loada(as, dest, ir_knum(ir)); + return; + } + emit_rmro(as, XO_MOVSDto, ra_alloc1(as, ref, RSET_FPR), dest, 0); + } else { +#if LJ_GC64 + if (irref_isk(ref)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); + emit_movmroi(as, dest, 4, k.u32.hi); + emit_movmroi(as, dest, 0, k.u32.lo); + } else { + /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ + Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); + if (irt_is64(ir->t)) { + emit_u32(as, irt_toitype(ir->t) << 15); + emit_rmro(as, XO_ARITHi, XOg_OR, dest, 4); + } else { + emit_movmroi(as, dest, 4, (irt_toitype(ir->t) << 15)); + } + emit_movtomro(as, REX_64IR(ir, src), dest, 0); + } +#else + if (!irref_isk(ref)) { + Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, dest)); + emit_movtomro(as, REX_64IR(ir, src), dest, 0); + } else if (!irt_ispri(ir->t)) { + emit_movmroi(as, dest, 0, ir->i); + } + if (!(LJ_64 && irt_islightud(ir->t))) + emit_movmroi(as, dest, 4, irt_toitype(ir->t)); +#endif + } } + emit_loada(as, dest, &J2G(as->J)->tmptv); /* g->tmptv holds the TValue(s). */ } -/* -- Memory references --------------------------------------------------- */ - static void asm_aref(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); asm_fusearef(as, ir, RSET_GPR); if (!(as->mrm.idx == RID_NONE && as->mrm.ofs == 0)) - emit_mrm(as, XO_LEA, dest, RID_MRM); + emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM); else if (as->mrm.base != dest) - emit_rr(as, XO_MOV, dest, as->mrm.base); -} - -/* Merge NE(HREF, niltv) check. */ -static MCode *merge_href_niltv(ASMState *as, IRIns *ir) -{ - /* Assumes nothing else generates NE of HREF. */ - if ((ir[1].o == IR_NE || ir[1].o == IR_EQ) && ir[1].op1 == as->curins && - ra_hasreg(ir->r)) { - MCode *p = as->mcp; - p += (LJ_64 && *p != XI_ARITHi) ? 7+6 : 6+6; - /* Ensure no loop branch inversion happened. */ - if (p[-6] == 0x0f && p[-5] == XI_JCCn+(CC_NE^(ir[1].o & 1))) { - as->mcp = p; /* Kill cmp reg, imm32 + jz exit. */ - return p + *(int32_t *)(p-4); /* Return exit address. */ - } - } - return NULL; + emit_rr(as, XO_MOV, dest|REX_GC64, as->mrm.base); } /* Inlined hash lookup. Specialized for key type and for const keys. @@ -985,10 +1134,10 @@ static MCode *merge_href_niltv(ASMState *as, IRIns *ir) ** } while ((n = nextnode(n))); ** return niltv(L); */ -static void asm_href(ASMState *as, IRIns *ir) +static void asm_href(ASMState *as, IRIns *ir, IROp merge) { - MCode *nilexit = merge_href_niltv(as, ir); /* Do this before any restores. */ RegSet allow = RSET_GPR; + int destused = ra_used(ir); Reg dest = ra_dest(as, ir, allow); Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); Reg key = RID_NONE, tmp = RID_NONE; @@ -1001,28 +1150,26 @@ static void asm_href(ASMState *as, IRIns *ir) if (!isk) { rset_clear(allow, tab); key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow); - if (!irt_isstr(kt)) + if (LJ_GC64 || !irt_isstr(kt)) tmp = ra_scratch(as, rset_exclude(allow, key)); } - /* Key not found in chain: jump to exit (if merged with NE) or load niltv. */ + /* Key not found in chain: jump to exit (if merged) or load niltv. */ l_end = emit_label(as); - if (nilexit && ir[1].o == IR_NE) { - emit_jcc(as, CC_E, nilexit); /* XI_JMP is not found by lj_asm_patchexit. */ - nilexit = NULL; - } else { + if (merge == IR_NE) + asm_guardcc(as, CC_E); /* XI_JMP is not found by lj_asm_patchexit. */ + else if (destused) emit_loada(as, dest, niltvg(J2G(as->J))); - } /* Follow hash chain until the end. */ l_loop = emit_sjcc_label(as, CC_NZ); - emit_rr(as, XO_TEST, dest, dest); - emit_rmro(as, XO_MOV, dest, dest, offsetof(Node, next)); + emit_rr(as, XO_TEST, dest|REX_GC64, dest); + emit_rmro(as, XO_MOV, dest|REX_GC64, dest, offsetof(Node, next)); l_next = emit_label(as); /* Type and value comparison. */ - if (nilexit) - emit_jcc(as, CC_E, nilexit); + if (merge == IR_EQ) + asm_guardcc(as, CC_E); else emit_sjcc(as, CC_E, l_end); if (irt_isnum(kt)) { @@ -1038,7 +1185,7 @@ static void asm_href(ASMState *as, IRIns *ir) emit_rmro(as, XO_UCOMISD, key, dest, offsetof(Node, key.n)); emit_sjcc(as, CC_AE, l_next); /* The type check avoids NaN penalties and complaints from Valgrind. */ -#if LJ_64 +#if LJ_64 && !LJ_GC64 emit_u32(as, LJ_TISNUM); emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); #else @@ -1046,13 +1193,31 @@ static void asm_href(ASMState *as, IRIns *ir) emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); #endif } -#if LJ_64 +#if LJ_64 && !LJ_GC64 } else if (irt_islightud(kt)) { emit_rmro(as, XO_CMP, key|REX_64, dest, offsetof(Node, key.u64)); #endif +#if LJ_GC64 + } else if (irt_isaddr(kt)) { + if (isk) { + TValue k; + k.u64 = ((uint64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; + emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.lo), + k.u32.lo); + emit_sjcc(as, CC_NE, l_next); + emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.u32.hi), + k.u32.hi); + } else { + emit_rmro(as, XO_CMP, tmp|REX_64, dest, offsetof(Node, key.u64)); + } + } else { + lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); + emit_u32(as, (irt_toitype(kt)<<15)|0x7fff); + emit_rmro(as, XO_ARITHi, XOg_CMP, dest, offsetof(Node, key.it)); +#else } else { if (!irt_ispri(kt)) { - lua_assert(irt_isaddr(kt)); + lj_assertA(irt_isaddr(kt), "bad HREF key type"); if (isk) emit_gmroi(as, XG_ARITHi(XOg_CMP), dest, offsetof(Node, key.gcr), ptr2addr(ir_kgc(irkey))); @@ -1060,31 +1225,33 @@ static void asm_href(ASMState *as, IRIns *ir) emit_rmro(as, XO_CMP, key, dest, offsetof(Node, key.gcr)); emit_sjcc(as, CC_NE, l_next); } - lua_assert(!irt_isnil(kt)); + lj_assertA(!irt_isnil(kt), "bad HREF key type"); emit_i8(as, irt_toitype(kt)); emit_rmro(as, XO_ARITHi8, XOg_CMP, dest, offsetof(Node, key.it)); +#endif } emit_sfixup(as, l_loop); checkmclim(as); +#if LJ_GC64 + if (!isk && irt_isaddr(kt)) { + emit_rr(as, XO_OR, tmp|REX_64, key); + emit_loadu64(as, tmp, (uint64_t)irt_toitype(kt) << 47); + } +#endif /* Load main position relative to tab->node into dest. */ - khash = isk ? ir_khash(irkey) : 1; + khash = isk ? ir_khash(as, irkey) : 1; if (khash == 0) { - emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, node)); + emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node)); } else { - emit_rmro(as, XO_ARITH(XOg_ADD), dest, tab, offsetof(GCtab, node)); - if ((as->flags & JIT_F_PREFER_IMUL)) { - emit_i8(as, sizeof(Node)); - emit_rr(as, XO_IMULi8, dest, dest); - } else { - emit_shifti(as, XOg_SHL, dest, 3); - emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); - } + emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node)); + emit_shifti(as, XOg_SHL, dest, 3); + emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0); if (isk) { emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash); emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); } else if (irt_isstr(kt)) { - emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, hash)); + emit_rmro(as, XO_ARITH(XOg_AND), dest, key, offsetof(GCstr, sid)); emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask)); } else { /* Must match with hashrot() in lj_tab.c. */ emit_rmro(as, XO_ARITH(XOg_AND), dest, tab, offsetof(GCtab, hmask)); @@ -1107,7 +1274,19 @@ static void asm_href(ASMState *as, IRIns *ir) #endif } else { emit_rr(as, XO_MOV, tmp, key); +#if LJ_GC64 + checkmclim(as); + emit_gri(as, XG_ARITHi(XOg_XOR), dest, irt_toitype(kt) << 15); + if ((as->flags & JIT_F_BMI2)) { + emit_i8(as, 32); + emit_mrm(as, XV_RORX|VEX_64, dest, key); + } else { + emit_shifti(as, XOg_SHR|REX_64, dest, 32); + emit_rr(as, XO_MOV, dest|REX_64, key|REX_64); + } +#else emit_rmro(as, XO_LEA, dest, key, HASH_BIAS); +#endif } } } @@ -1123,15 +1302,15 @@ static void asm_hrefk(ASMState *as, IRIns *ir) #if !LJ_64 MCLabel l_exit; #endif - lua_assert(ofs % sizeof(Node) == 0); + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); if (ra_hasreg(dest)) { if (ofs != 0) { - if (dest == node && !(as->flags & JIT_F_LEA_AGU)) - emit_gri(as, XG_ARITHi(XOg_ADD), dest, ofs); + if (dest == node) + emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs); else - emit_rmro(as, XO_LEA, dest, node, ofs); + emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs); } else if (dest != node) { - emit_rr(as, XO_MOV, dest, node); + emit_rr(as, XO_MOV, dest|REX_GC64, node); } } asm_guardcc(as, CC_NE); @@ -1140,16 +1319,28 @@ static void asm_hrefk(ASMState *as, IRIns *ir) Reg key = ra_scratch(as, rset_exclude(RSET_GPR, node)); emit_rmro(as, XO_CMP, key|REX_64, node, ofs + (int32_t)offsetof(Node, key.u64)); - lua_assert(irt_isnum(irkey->t) || irt_isgcv(irkey->t)); + lj_assertA(irt_isnum(irkey->t) || irt_isgcv(irkey->t), + "bad HREFK key type"); /* Assumes -0.0 is already canonicalized to +0.0. */ emit_loadu64(as, key, irt_isnum(irkey->t) ? ir_knum(irkey)->u64 : +#if LJ_GC64 + ((uint64_t)irt_toitype(irkey->t) << 47) | + (uint64_t)ir_kgc(irkey)); +#else ((uint64_t)irt_toitype(irkey->t) << 32) | (uint64_t)(uint32_t)ptr2addr(ir_kgc(irkey))); +#endif } else { - lua_assert(!irt_isnil(irkey->t)); + lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); +#if LJ_GC64 + emit_i32(as, (irt_toitype(irkey->t)<<15)|0x7fff); + emit_rmro(as, XO_ARITHi, XOg_CMP, node, + ofs + (int32_t)offsetof(Node, key.it)); +#else emit_i8(as, irt_toitype(irkey->t)); emit_rmro(as, XO_ARITHi8, XOg_CMP, node, ofs + (int32_t)offsetof(Node, key.it)); +#endif } #else l_exit = emit_label(as); @@ -1164,13 +1355,13 @@ static void asm_hrefk(ASMState *as, IRIns *ir) (int32_t)ir_knum(irkey)->u32.hi); } else { if (!irt_ispri(irkey->t)) { - lua_assert(irt_isgcv(irkey->t)); + lj_assertA(irt_isgcv(irkey->t), "bad HREFK key type"); emit_gmroi(as, XG_ARITHi(XOg_CMP), node, ofs + (int32_t)offsetof(Node, key.gcr), ptr2addr(ir_kgc(irkey))); emit_sjcc(as, CC_NE, l_exit); } - lua_assert(!irt_isnil(irkey->t)); + lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); emit_i8(as, irt_toitype(irkey->t)); emit_rmro(as, XO_ARITHi8, XOg_CMP, node, ofs + (int32_t)offsetof(Node, key.it)); @@ -1178,61 +1369,27 @@ static void asm_hrefk(ASMState *as, IRIns *ir) #endif } -static void asm_newref(ASMState *as, IRIns *ir) -{ - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey]; - IRRef args[3]; - IRIns *irkey; - Reg tmp; - if (ir->r == RID_SINK) - return; - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ir->op1; /* GCtab *t */ - args[2] = ASMREF_TMP1; /* cTValue *key */ - asm_setupresult(as, ir, ci); /* TValue * */ - asm_gencall(as, ci, args); - tmp = ra_releasetmp(as, ASMREF_TMP1); - irkey = IR(ir->op2); - if (irt_isnum(irkey->t)) { - /* For numbers use the constant itself or a spill slot as a TValue. */ - if (irref_isk(ir->op2)) - emit_loada(as, tmp, ir_knum(irkey)); - else - emit_rmro(as, XO_LEA, tmp|REX_64, RID_ESP, ra_spill(as, irkey)); - } else { - /* Otherwise use g->tmptv to hold the TValue. */ - if (!irref_isk(ir->op2)) { - Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp)); - emit_movtomro(as, REX_64IR(irkey, src), tmp, 0); - } else if (!irt_ispri(irkey->t)) { - emit_movmroi(as, tmp, 0, irkey->i); - } - if (!(LJ_64 && irt_islightud(irkey->t))) - emit_movmroi(as, tmp, 4, irt_toitype(irkey->t)); - emit_loada(as, tmp, &J2G(as->J)->tmptv); - } -} - static void asm_uref(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); if (irref_isk(ir->op1)) { GCfunc *fn = ir_kfunc(IR(ir->op1)); MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; - emit_rma(as, XO_MOV, dest, v); + emit_rma(as, XO_MOV, dest|REX_GC64, v); } else { Reg uv = ra_scratch(as, RSET_GPR); Reg func = ra_alloc1(as, ir->op1, RSET_GPR); if (ir->o == IR_UREFC) { - emit_rmro(as, XO_LEA, dest, uv, offsetof(GCupval, tv)); + emit_rmro(as, XO_LEA, dest|REX_GC64, uv, offsetof(GCupval, tv)); asm_guardcc(as, CC_NE); emit_i8(as, 1); emit_rmro(as, XO_ARITHib, XOg_CMP, uv, offsetof(GCupval, closed)); } else { - emit_rmro(as, XO_MOV, dest, uv, offsetof(GCupval, v)); + emit_rmro(as, XO_MOV, dest|REX_GC64, uv, offsetof(GCupval, v)); } - emit_rmro(as, XO_MOV, uv, func, - (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); + emit_rmro(as, XO_MOV, uv|REX_GC64, func, + (int32_t)offsetof(GCfuncL, uvptr) + + (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); } } @@ -1250,9 +1407,9 @@ static void asm_strref(ASMState *as, IRIns *ir) if (as->mrm.base == RID_NONE) emit_loadi(as, dest, as->mrm.ofs); else if (as->mrm.base == dest && as->mrm.idx == RID_NONE) - emit_gri(as, XG_ARITHi(XOg_ADD), dest, as->mrm.ofs); + emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, as->mrm.ofs); else - emit_mrm(as, XO_LEA, dest, RID_MRM); + emit_mrm(as, XO_LEA, dest|REX_GC64, RID_MRM); } /* -- Loads and stores ---------------------------------------------------- */ @@ -1271,19 +1428,23 @@ static void asm_fxload(ASMState *as, IRIns *ir) case IRT_U8: xo = XO_MOVZXb; break; case IRT_I16: xo = XO_MOVSXw; break; case IRT_U16: xo = XO_MOVZXw; break; - case IRT_NUM: xo = XMM_MOVRM(as); break; + case IRT_NUM: xo = XO_MOVSD; break; case IRT_FLOAT: xo = XO_MOVSS; break; default: if (LJ_64 && irt_is64(ir->t)) dest |= REX_64; else - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t), + "unsplit 64 bit load"); xo = XO_MOV; break; } emit_mrm(as, xo, dest, RID_MRM); } +#define asm_fload(as, ir) asm_fxload(as, ir) +#define asm_xload(as, ir) asm_fxload(as, ir) + static void asm_fxstore(ASMState *as, IRIns *ir) { RegSet allow = RSET_GPR; @@ -1318,14 +1479,17 @@ static void asm_fxstore(ASMState *as, IRIns *ir) case IRT_I16: case IRT_U16: xo = XO_MOVtow; break; case IRT_NUM: xo = XO_MOVSDto; break; case IRT_FLOAT: xo = XO_MOVSSto; break; -#if LJ_64 - case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ +#if LJ_64 && !LJ_GC64 + case IRT_LIGHTUD: + /* NYI: mask 64 bit lightuserdata. */ + lj_assertA(0, "store of lightuserdata"); #endif default: if (LJ_64 && irt_is64(ir->t)) src |= REX_64; else - lua_assert(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)); + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t), + "unsplit 64 bit store"); xo = XO_MOVto; break; } @@ -1339,15 +1503,18 @@ static void asm_fxstore(ASMState *as, IRIns *ir) emit_i8(as, k); emit_mrm(as, XO_MOVmib, 0, RID_MRM); } else { - lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) || - irt_isaddr(ir->t)); + lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) || irt_isu32(ir->t) || + irt_isaddr(ir->t), "bad store type"); emit_i32(as, k); emit_mrm(as, XO_MOVmi, REX_64IR(ir, 0), RID_MRM); } } } -#if LJ_64 +#define asm_fstore(as, ir) asm_fxstore(as, ir) +#define asm_xstore(as, ir) asm_fxstore(as, ir) + +#if LJ_64 && !LJ_GC64 static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) { if (ra_used(ir) || typecheck) { @@ -1369,13 +1536,18 @@ static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) static void asm_ahuvload(ASMState *as, IRIns *ir) { - lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || - (LJ_DUALNUM && irt_isint(ir->t))); -#if LJ_64 +#if LJ_GC64 + Reg tmp = RID_NONE; +#endif + lj_assertA(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) || + (LJ_DUALNUM && irt_isint(ir->t)), + "bad load type %d", irt_type(ir->t)); +#if LJ_64 && !LJ_GC64 if (irt_islightud(ir->t)) { Reg dest = asm_load_lightud64(as, ir, 1); if (ra_hasreg(dest)) { asm_fuseahuref(as, ir->op1, RSET_GPR); + if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2; emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); } return; @@ -1385,20 +1557,67 @@ static void asm_ahuvload(ASMState *as, IRIns *ir) RegSet allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR; Reg dest = ra_dest(as, ir, allow); asm_fuseahuref(as, ir->op1, RSET_GPR); - emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XMM_MOVRM(as), dest, RID_MRM); + if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2; +#if LJ_GC64 + if (irt_isaddr(ir->t)) { + emit_shifti(as, XOg_SHR|REX_64, dest, 17); + asm_guardcc(as, CC_NE); + emit_i8(as, irt_toitype(ir->t)); + emit_rr(as, XO_ARITHi8, XOg_CMP, dest); + emit_i8(as, XI_O16); + if ((as->flags & JIT_F_BMI2)) { + emit_i8(as, 47); + emit_mrm(as, XV_RORX|VEX_64, dest, RID_MRM); + } else { + emit_shifti(as, XOg_ROR|REX_64, dest, 47); + emit_mrm(as, XO_MOV, dest|REX_64, RID_MRM); + } + return; + } else +#endif + emit_mrm(as, dest < RID_MAX_GPR ? XO_MOV : XO_MOVSD, dest, RID_MRM); } else { - asm_fuseahuref(as, ir->op1, RSET_GPR); + RegSet gpr = RSET_GPR; +#if LJ_GC64 + if (irt_isaddr(ir->t)) { + tmp = ra_scratch(as, RSET_GPR); + gpr = rset_exclude(gpr, tmp); + } +#endif + asm_fuseahuref(as, ir->op1, gpr); + if (ir->o == IR_VLOAD) as->mrm.ofs += 8 * ir->op2; } /* Always do the type check, even if the load result is unused. */ as->mrm.ofs += 4; asm_guardcc(as, irt_isnum(ir->t) ? CC_AE : CC_NE); if (LJ_64 && irt_type(ir->t) >= IRT_NUM) { - lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t)); + lj_assertA(irt_isinteger(ir->t) || irt_isnum(ir->t), + "bad load type %d", irt_type(ir->t)); +#if LJ_GC64 + emit_u32(as, LJ_TISNUM << 15); +#else emit_u32(as, LJ_TISNUM); +#endif + emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); +#if LJ_GC64 + } else if (irt_isaddr(ir->t)) { + as->mrm.ofs -= 4; + emit_i8(as, irt_toitype(ir->t)); + emit_mrm(as, XO_ARITHi8, XOg_CMP, tmp); + emit_shifti(as, XOg_SAR|REX_64, tmp, 47); + emit_mrm(as, XO_MOV, tmp|REX_64, RID_MRM); + } else if (irt_isnil(ir->t)) { + as->mrm.ofs -= 4; + emit_i8(as, -1); + emit_mrm(as, XO_ARITHi8, XOg_CMP|REX_64, RID_MRM); + } else { + emit_u32(as, (irt_toitype(ir->t) << 15) | 0x7fff); emit_mrm(as, XO_ARITHi, XOg_CMP, RID_MRM); +#else } else { emit_i8(as, irt_toitype(ir->t)); emit_mrm(as, XO_ARITHi8, XOg_CMP, RID_MRM); +#endif } } @@ -1410,12 +1629,28 @@ static void asm_ahustore(ASMState *as, IRIns *ir) Reg src = ra_alloc1(as, ir->op2, RSET_FPR); asm_fuseahuref(as, ir->op1, RSET_GPR); emit_mrm(as, XO_MOVSDto, src, RID_MRM); -#if LJ_64 +#if LJ_64 && !LJ_GC64 } else if (irt_islightud(ir->t)) { Reg src = ra_alloc1(as, ir->op2, RSET_GPR); asm_fuseahuref(as, ir->op1, rset_exclude(RSET_GPR, src)); emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); #endif +#if LJ_GC64 + } else if (irref_isk(ir->op2)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, IR(ir->op2)); + asm_fuseahuref(as, ir->op1, RSET_GPR); + if (tvisnil(&k)) { + emit_i32(as, -1); + emit_mrm(as, XO_MOVmi, REX_64, RID_MRM); + } else { + emit_u32(as, k.u32.lo); + emit_mrm(as, XO_MOVmi, 0, RID_MRM); + as->mrm.ofs += 4; + emit_u32(as, k.u32.hi); + emit_mrm(as, XO_MOVmi, 0, RID_MRM); + } +#endif } else { IRIns *irr = IR(ir->op2); RegSet allow = RSET_GPR; @@ -1426,34 +1661,56 @@ static void asm_ahustore(ASMState *as, IRIns *ir) } asm_fuseahuref(as, ir->op1, allow); if (ra_hasreg(src)) { +#if LJ_GC64 + if (!(LJ_DUALNUM && irt_isinteger(ir->t))) { + /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ + as->mrm.ofs += 4; + emit_u32(as, irt_toitype(ir->t) << 15); + emit_mrm(as, XO_ARITHi, XOg_OR, RID_MRM); + as->mrm.ofs -= 4; + emit_mrm(as, XO_MOVto, src|REX_64, RID_MRM); + return; + } +#endif emit_mrm(as, XO_MOVto, src, RID_MRM); } else if (!irt_ispri(irr->t)) { - lua_assert(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t))); + lj_assertA(irt_isaddr(ir->t) || (LJ_DUALNUM && irt_isinteger(ir->t)), + "bad store type"); emit_i32(as, irr->i); emit_mrm(as, XO_MOVmi, 0, RID_MRM); } as->mrm.ofs += 4; +#if LJ_GC64 + lj_assertA(LJ_DUALNUM && irt_isinteger(ir->t), "bad store type"); + emit_i32(as, LJ_TNUMX << 15); +#else emit_i32(as, (int32_t)irt_toitype(ir->t)); +#endif emit_mrm(as, XO_MOVmi, 0, RID_MRM); } } static void asm_sload(ASMState *as, IRIns *ir) { - int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0); + int32_t ofs = 8*((int32_t)ir->op1-1-LJ_FR2) + + (!LJ_FR2 && (ir->op2 & IRSLOAD_FRAME) ? 4 : 0); IRType1 t = ir->t; Reg base; - lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ - lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK)); - lua_assert(LJ_DUALNUM || - !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME))); + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); + lj_assertA(LJ_DUALNUM || + !irt_isint(t) || + (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)), + "bad SLOAD type"); if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { Reg left = ra_scratch(as, RSET_FPR); asm_tointg(as, ir, left); /* Frees dest reg. Do this before base alloc. */ base = ra_alloc1(as, REF_BASE, RSET_GPR); - emit_rmro(as, XMM_MOVRM(as), left, base, ofs); + emit_rmro(as, XO_MOVSD, left, base, ofs); t.irt = IRT_NUM; /* Continue with a regular number type check. */ -#if LJ_64 +#if LJ_64 && !LJ_GC64 } else if (irt_islightud(t)) { Reg dest = asm_load_lightud64(as, ir, (ir->op2 & IRSLOAD_TYPECHECK)); if (ra_hasreg(dest)) { @@ -1466,14 +1723,43 @@ static void asm_sload(ASMState *as, IRIns *ir) RegSet allow = irt_isnum(t) ? RSET_FPR : RSET_GPR; Reg dest = ra_dest(as, ir, allow); base = ra_alloc1(as, REF_BASE, RSET_GPR); - lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t)); + lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t), + "bad SLOAD type %d", irt_type(t)); if ((ir->op2 & IRSLOAD_CONVERT)) { t.irt = irt_isint(t) ? IRT_NUM : IRT_INT; /* Check for original type. */ - emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTSD2SI, dest, base, ofs); - } else if (irt_isnum(t)) { - emit_rmro(as, XMM_MOVRM(as), dest, base, ofs); + emit_rmro(as, irt_isint(t) ? XO_CVTSI2SD : XO_CVTTSD2SI, dest, base, ofs); } else { - emit_rmro(as, XO_MOV, dest, base, ofs); +#if LJ_GC64 + if (irt_isaddr(t)) { + /* LJ_GC64 type check + tag removal without BMI2 and with BMI2: + ** + ** mov r64, [addr] rorx r64, [addr], 47 + ** ror r64, 47 + ** cmp r16, itype cmp r16, itype + ** jne ->exit jne ->exit + ** shr r64, 16 shr r64, 16 + */ + emit_shifti(as, XOg_SHR|REX_64, dest, 17); + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + asm_guardcc(as, CC_NE); + emit_i8(as, irt_toitype(t)); + emit_rr(as, XO_ARITHi8, XOg_CMP, dest); + emit_i8(as, XI_O16); + } + if ((as->flags & JIT_F_BMI2)) { + emit_i8(as, 47); + emit_rmro(as, XV_RORX|VEX_64, dest, base, ofs); + } else { + if ((ir->op2 & IRSLOAD_TYPECHECK)) + emit_shifti(as, XOg_ROR|REX_64, dest, 47); + else + emit_shifti(as, XOg_SHL|REX_64, dest, 17); + emit_rmro(as, XO_MOV, dest|REX_64, base, ofs); + } + return; + } else +#endif + emit_rmro(as, irt_isnum(t) ? XO_MOVSD : XO_MOV, dest, base, ofs); } } else { if (!(ir->op2 & IRSLOAD_TYPECHECK)) @@ -1483,13 +1769,42 @@ static void asm_sload(ASMState *as, IRIns *ir) if ((ir->op2 & IRSLOAD_TYPECHECK)) { /* Need type check, even if the load result is unused. */ asm_guardcc(as, irt_isnum(t) ? CC_AE : CC_NE); - if (LJ_64 && irt_type(t) >= IRT_NUM) { - lua_assert(irt_isinteger(t) || irt_isnum(t)); - emit_u32(as, LJ_TISNUM); + if ((LJ_64 && irt_type(t) >= IRT_NUM) || (ir->op2 & IRSLOAD_KEYINDEX)) { + lj_assertA(irt_isinteger(t) || irt_isnum(t), + "bad SLOAD type %d", irt_type(t)); + emit_u32(as, (ir->op2 & IRSLOAD_KEYINDEX) ? LJ_KEYINDEX : + LJ_GC64 ? (LJ_TISNUM << 15) : LJ_TISNUM); emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); +#if LJ_GC64 + } else if (irt_isnil(t)) { + /* LJ_GC64 type check for nil: + ** + ** cmp qword [addr], -1 + ** jne ->exit + */ + emit_i8(as, -1); + emit_rmro(as, XO_ARITHi8, XOg_CMP|REX_64, base, ofs); + } else if (irt_ispri(t)) { + emit_u32(as, (irt_toitype(t) << 15) | 0x7fff); + emit_rmro(as, XO_ARITHi, XOg_CMP, base, ofs+4); + } else { + /* LJ_GC64 type check only: + ** + ** mov r64, [addr] + ** sar r64, 47 + ** cmp r32, itype + ** jne ->exit + */ + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, base)); + emit_i8(as, irt_toitype(t)); + emit_rr(as, XO_ARITHi8, XOg_CMP, tmp); + emit_shifti(as, XOg_SAR|REX_64, tmp, 47); + emit_rmro(as, XO_MOV, tmp|REX_64, base, ofs); +#else } else { emit_i8(as, irt_toitype(t)); emit_rmro(as, XO_ARITHi8, XOg_CMP, base, ofs+4); +#endif } } } @@ -1500,15 +1815,14 @@ static void asm_sload(ASMState *as, IRIns *ir) static void asm_cnew(ASMState *as, IRIns *ir) { CTState *cts = ctype_ctsG(J2G(as->J)); - CTypeID ctypeid = (CTypeID)IR(ir->op1)->i; - CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ? - lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i; + CTypeID id = (CTypeID)IR(ir->op1)->i; + CTSize sz; + CTInfo info = lj_ctype_info(cts, id, &sz); const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; - IRRef args[2]; - lua_assert(sz != CTSIZE_INVALID); + IRRef args[4]; + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); - args[0] = ASMREF_L; /* lua_State *L */ - args[1] = ASMREF_TMP1; /* MSize size */ as->gcsteps++; asm_setupresult(as, ir, ci); /* GCcdata * */ @@ -1519,8 +1833,9 @@ static void asm_cnew(ASMState *as, IRIns *ir) Reg r64 = sz == 8 ? REX_64 : 0; if (irref_isk(ir->op2)) { IRIns *irk = IR(ir->op2); - uint64_t k = irk->o == IR_KINT64 ? ir_k64(irk)->u64 : - (uint64_t)(uint32_t)irk->i; + uint64_t k = (irk->o == IR_KINT64 || + (LJ_GC64 && (irk->o == IR_KPTR || irk->o == IR_KKPTR))) ? + ir_k64(irk)->u64 : (uint64_t)(uint32_t)irk->i; if (sz == 4 || checki32((int64_t)k)) { emit_i32(as, (int32_t)k); emit_rmro(as, XO_MOVmi, r64, RID_RET, sizeof(GCcdata)); @@ -1536,7 +1851,7 @@ static void asm_cnew(ASMState *as, IRIns *ir) int32_t ofs = sizeof(GCcdata); if (sz == 8) { ofs += 4; ir++; - lua_assert(ir->o == IR_HIOP); + lj_assertA(ir->o == IR_HIOP, "missing CNEWI HIOP"); } do { if (irref_isk(ir->op2)) { @@ -1550,21 +1865,30 @@ static void asm_cnew(ASMState *as, IRIns *ir) ofs -= 4; ir--; } while (1); #endif - lua_assert(sz == 4 || sz == 8); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); + } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ + ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* CTypeID id */ + args[2] = ir->op2; /* CTSize sz */ + args[3] = ASMREF_TMP1; /* CTSize align */ + asm_gencall(as, ci, args); + emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); + return; } /* Combine initialization of marked, gct and ctypeid. */ emit_movtomro(as, RID_ECX, RID_RET, offsetof(GCcdata, marked)); emit_gri(as, XG_ARITHi(XOg_OR), RID_ECX, - (int32_t)((~LJ_TCDATA<<8)+(ctypeid<<16))); + (int32_t)((~LJ_TCDATA<<8)+(id<<16))); emit_gri(as, XG_ARITHi(XOg_AND), RID_ECX, LJ_GC_WHITES); emit_opgl(as, XO_MOVZXb, RID_ECX, gc.currentwhite); + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ASMREF_TMP1; /* MSize size */ asm_gencall(as, ci, args); emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)(sz+sizeof(GCcdata))); } -#else -#define asm_cnew(as, ir) ((void)0) #endif /* -- Write barriers ------------------------------------------------------ */ @@ -1574,7 +1898,7 @@ static void asm_tbar(ASMState *as, IRIns *ir) Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, tab)); MCLabel l_end = emit_label(as); - emit_movtomro(as, tmp, tab, offsetof(GCtab, gclist)); + emit_movtomro(as, tmp|REX_GC64, tab, offsetof(GCtab, gclist)); emit_setgl(as, tab, gc.grayagain); emit_getgl(as, tmp, gc.grayagain); emit_i8(as, ~LJ_GC_BLACK); @@ -1591,7 +1915,7 @@ static void asm_obar(ASMState *as, IRIns *ir) MCLabel l_end; Reg obj; /* No need for other object barriers (yet). */ - lua_assert(IR(ir->op1)->o == IR_UREFC); + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); ra_evictset(as, RSET_SCRATCH); l_end = emit_label(as); args[0] = ASMREF_TMP1; /* global_State *g */ @@ -1637,36 +1961,9 @@ static void asm_x87load(ASMState *as, IRRef ref) } } -/* Try to rejoin pow from EXP2, MUL and LOG2 (if still unsplit). */ -static int fpmjoin_pow(ASMState *as, IRIns *ir) -{ - IRIns *irp = IR(ir->op1); - if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) { - IRIns *irpp = IR(irp->op1); - if (irpp == ir-2 && irpp->o == IR_FPMATH && - irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) { - /* The modified regs must match with the *.dasc implementation. */ - RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM2+1)|RID2RSET(RID_EAX); - IRIns *irx; - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - ra_evictset(as, drop); - ra_destreg(as, ir, RID_XMM0); - emit_call(as, lj_vm_pow_sse); - irx = IR(irpp->op1); - if (ra_noreg(irx->r) && ra_gethint(irx->r) == RID_XMM1) - irx->r = RID_INIT; /* Avoid allocating xmm1 for x. */ - ra_left(as, RID_XMM0, irpp->op1); - ra_left(as, RID_XMM1, irp->op2); - return 1; - } - } - return 0; -} - static void asm_fpmath(ASMState *as, IRIns *ir) { - IRFPMathOp fpm = ir->o == IR_FPMATH ? (IRFPMathOp)ir->op2 : IRFPM_OTHER; + IRFPMathOp fpm = (IRFPMathOp)ir->op2; if (fpm == IRFPM_SQRT) { Reg dest = ra_dest(as, ir, RSET_FPR); Reg left = asm_fuseload(as, ir->op1, RSET_FPR); @@ -1697,93 +1994,32 @@ static void asm_fpmath(ASMState *as, IRIns *ir) fpm == IRFPM_CEIL ? lj_vm_ceil_sse : lj_vm_trunc_sse); ra_left(as, RID_XMM0, ir->op1); } - } else if (fpm == IRFPM_EXP2 && fpmjoin_pow(as, ir)) { - /* Rejoined to pow(). */ - } else { /* Handle x87 ops. */ - int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ - Reg dest = ir->r; - if (ra_hasreg(dest)) { - ra_free(as, dest); - ra_modified(as, dest); - emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs); - } - emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); - switch (fpm) { /* st0 = lj_vm_*(st0) */ - case IRFPM_EXP: emit_call(as, lj_vm_exp_x87); break; - case IRFPM_EXP2: emit_call(as, lj_vm_exp2_x87); break; - case IRFPM_SIN: emit_x87op(as, XI_FSIN); break; - case IRFPM_COS: emit_x87op(as, XI_FCOS); break; - case IRFPM_TAN: emit_x87op(as, XI_FPOP); emit_x87op(as, XI_FPTAN); break; - case IRFPM_LOG: case IRFPM_LOG2: case IRFPM_LOG10: - /* Note: the use of fyl2xp1 would be pointless here. When computing - ** log(1.0+eps) the precision is already lost after 1.0 is added. - ** Subtracting 1.0 won't recover it. OTOH math.log1p would make sense. - */ - emit_x87op(as, XI_FYL2X); break; - case IRFPM_OTHER: - switch (ir->o) { - case IR_ATAN2: - emit_x87op(as, XI_FPATAN); asm_x87load(as, ir->op2); break; - case IR_LDEXP: - emit_x87op(as, XI_FPOP1); emit_x87op(as, XI_FSCALE); break; - default: lua_assert(0); break; - } - break; - default: lua_assert(0); break; - } - asm_x87load(as, ir->op1); - switch (fpm) { - case IRFPM_LOG: emit_x87op(as, XI_FLDLN2); break; - case IRFPM_LOG2: emit_x87op(as, XI_FLD1); break; - case IRFPM_LOG10: emit_x87op(as, XI_FLDLG2); break; - case IRFPM_OTHER: - if (ir->o == IR_LDEXP) asm_x87load(as, ir->op2); - break; - default: break; - } + } else { + asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); } } -static void asm_fppowi(ASMState *as, IRIns *ir) -{ - /* The modified regs must match with the *.dasc implementation. */ - RegSet drop = RSET_RANGE(RID_XMM0, RID_XMM1+1)|RID2RSET(RID_EAX); - if (ra_hasreg(ir->r)) - rset_clear(drop, ir->r); /* Dest reg handled below. */ - ra_evictset(as, drop); - ra_destreg(as, ir, RID_XMM0); - emit_call(as, lj_vm_powi_sse); - ra_left(as, RID_XMM0, ir->op1); - ra_left(as, RID_EAX, ir->op2); -} - -#if LJ_64 && LJ_HASFFI -static void asm_arith64(ASMState *as, IRIns *ir, IRCallID id) -{ - const CCallInfo *ci = &lj_ir_callinfo[id]; - IRRef args[2]; - args[0] = ir->op1; - args[1] = ir->op2; - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); -} -#endif - -static void asm_intmod(ASMState *as, IRIns *ir) +static void asm_ldexp(ASMState *as, IRIns *ir) { - const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_vm_modi]; - IRRef args[2]; - args[0] = ir->op1; - args[1] = ir->op2; - asm_setupresult(as, ir, ci); - asm_gencall(as, ci, args); + int32_t ofs = sps_scale(ir->s); /* Use spill slot or temp slots. */ + Reg dest = ir->r; + if (ra_hasreg(dest)) { + ra_free(as, dest); + ra_modified(as, dest); + emit_rmro(as, XO_MOVSD, dest, RID_ESP, ofs); + } + emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs); + emit_x87op(as, XI_FPOP1); + emit_x87op(as, XI_FSCALE); + asm_x87load(as, ir->op1); + asm_x87load(as, ir->op2); } static int asm_swapops(ASMState *as, IRIns *ir) { IRIns *irl = IR(ir->op1); IRIns *irr = IR(ir->op2); - lua_assert(ra_noreg(irr->r)); + lj_assertA(ra_noreg(irr->r), "bad usage"); if (!irm_iscomm(lj_ir_mode[ir->o])) return 0; /* Can't swap non-commutative operations. */ if (irref_isk(ir->op2)) @@ -1955,11 +2191,28 @@ static void asm_add(ASMState *as, IRIns *ir) { if (irt_isnum(ir->t)) asm_fparith(as, ir, XO_ADDSD); - else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp || - irt_is64(ir->t) || !asm_lea(as, ir)) + else if (as->flagmcp == as->mcp || irt_is64(ir->t) || !asm_lea(as, ir)) asm_intarith(as, ir, XOg_ADD); } +static void asm_sub(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) + asm_fparith(as, ir, XO_SUBSD); + else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ + asm_intarith(as, ir, XOg_SUB); +} + +static void asm_mul(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) + asm_fparith(as, ir, XO_MULSD); + else + asm_intarith(as, ir, XOg_X_IMUL); +} + +#define asm_fpdiv(as, ir) asm_fparith(as, ir, XO_DIVSD) + static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) { Reg dest = ra_dest(as, ir, RSET_GPR); @@ -1967,7 +2220,17 @@ static void asm_neg_not(ASMState *as, IRIns *ir, x86Group3 xg) ra_left(as, dest, ir->op1); } -static void asm_min_max(ASMState *as, IRIns *ir, int cc) +static void asm_neg(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) + asm_fparith(as, ir, XO_XORPS); + else + asm_neg_not(as, ir, XOg_NEG); +} + +#define asm_abs(as, ir) asm_fparith(as, ir, XO_ANDPS) + +static void asm_intmin_max(ASMState *as, IRIns *ir, int cc) { Reg right, dest = ra_dest(as, ir, RSET_GPR); IRRef lref = ir->op1, rref = ir->op2; @@ -1978,7 +2241,30 @@ static void asm_min_max(ASMState *as, IRIns *ir, int cc) ra_left(as, dest, lref); } -static void asm_bitswap(ASMState *as, IRIns *ir) +static void asm_min(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) + asm_fparith(as, ir, XO_MINSD); + else + asm_intmin_max(as, ir, CC_G); +} + +static void asm_max(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) + asm_fparith(as, ir, XO_MAXSD); + else + asm_intmin_max(as, ir, CC_L); +} + +/* Note: don't use LEA for overflow-checking arithmetic! */ +#define asm_addov(as, ir) asm_intarith(as, ir, XOg_ADD) +#define asm_subov(as, ir) asm_intarith(as, ir, XOg_SUB) +#define asm_mulov(as, ir) asm_intarith(as, ir, XOg_X_IMUL) + +#define asm_bnot(as, ir) asm_neg_not(as, ir, XOg_NOT) + +static void asm_bswap(ASMState *as, IRIns *ir) { Reg dest = ra_dest(as, ir, RSET_GPR); as->mcp = emit_op(XO_BSWAP + ((dest&7) << 24), @@ -1986,7 +2272,11 @@ static void asm_bitswap(ASMState *as, IRIns *ir) ra_left(as, dest, ir->op1); } -static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) +#define asm_band(as, ir) asm_intarith(as, ir, XOg_AND) +#define asm_bor(as, ir) asm_intarith(as, ir, XOg_OR) +#define asm_bxor(as, ir) asm_intarith(as, ir, XOg_XOR) + +static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs, x86Op xv) { IRRef rref = ir->op2; IRIns *irr = IR(rref); @@ -1995,17 +2285,33 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) int shift; dest = ra_dest(as, ir, RSET_GPR); shift = irr->i & (irt_is64(ir->t) ? 63 : 31); + if (!xv && shift && (as->flags & JIT_F_BMI2)) { + Reg left = asm_fuseloadm(as, ir->op1, RSET_GPR, irt_is64(ir->t)); + if (left != dest) { /* BMI2 rotate right by constant. */ + emit_i8(as, xs == XOg_ROL ? -shift : shift); + emit_mrm(as, VEX_64IR(ir, XV_RORX), dest, left); + return; + } + } switch (shift) { case 0: break; case 1: emit_rr(as, XO_SHIFT1, REX_64IR(ir, xs), dest); break; default: emit_shifti(as, REX_64IR(ir, xs), dest, shift); break; } + } else if ((as->flags & JIT_F_BMI2) && xv) { /* BMI2 variable shifts. */ + Reg left, right; + dest = ra_dest(as, ir, RSET_GPR); + right = ra_alloc1(as, rref, RSET_GPR); + left = asm_fuseloadm(as, ir->op1, rset_exclude(RSET_GPR, right), + irt_is64(ir->t)); + emit_mrm(as, VEX_64IR(ir, xv) ^ (right << 19), dest, left); + return; } else { /* Variable shifts implicitly use register cl (i.e. ecx). */ Reg right; dest = ra_dest(as, ir, rset_exclude(RSET_GPR, RID_ECX)); if (dest == RID_ECX) { dest = ra_scratch(as, rset_exclude(RSET_GPR, RID_ECX)); - emit_rr(as, XO_MOV, RID_ECX, dest); + emit_rr(as, XO_MOV, REX_64IR(ir, RID_ECX), dest); } right = irr->r; if (ra_noreg(right)) @@ -2025,6 +2331,12 @@ static void asm_bitshift(ASMState *as, IRIns *ir, x86Shift xs) */ } +#define asm_bshl(as, ir) asm_bitshift(as, ir, XOg_SHL, XV_SHLX) +#define asm_bshr(as, ir) asm_bitshift(as, ir, XOg_SHR, XV_SHRX) +#define asm_bsar(as, ir) asm_bitshift(as, ir, XOg_SAR, XV_SARX) +#define asm_brol(as, ir) asm_bitshift(as, ir, XOg_ROL, 0) +#define asm_bror(as, ir) asm_bitshift(as, ir, XOg_ROR, 0) + /* -- Comparisons --------------------------------------------------------- */ /* Virtual flags for unordered FP comparisons. */ @@ -2051,8 +2363,9 @@ static const uint16_t asm_compmap[IR_ABC+1] = { }; /* FP and integer comparisons. */ -static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) +static void asm_comp(ASMState *as, IRIns *ir) { + uint32_t cc = asm_compmap[ir->o]; if (irt_isnum(ir->t)) { IRRef lref = ir->op1; IRRef rref = ir->op2; @@ -2073,7 +2386,6 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) cc ^= (VCC_PS|(5<<4)); /* A <-> B, AE <-> BE, PS <-> none */ } left = ra_alloc1(as, lref, RSET_FPR); - right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left)); l_around = emit_label(as); asm_guardcc(as, cc >> 4); if (cc & VCC_P) { /* Extra CC_P branch required? */ @@ -2090,14 +2402,16 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) emit_jcc(as, CC_P, as->mcp); } } + right = asm_fuseload(as, rref, rset_exclude(RSET_FPR, left)); emit_mrm(as, XO_UCOMISD, left, right); } else { IRRef lref = ir->op1, rref = ir->op2; IROp leftop = (IROp)(IR(lref)->o); Reg r64 = REX_64IR(ir, 0); int32_t imm = 0; - lua_assert(irt_is64(ir->t) || irt_isint(ir->t) || - irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t)); + lj_assertA(irt_is64(ir->t) || irt_isint(ir->t) || + irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t), + "bad comparison data type %d", irt_type(ir->t)); /* Swap constants (only for ABC) and fusable loads to the right. */ if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) { if ((cc & 0xc) == 0xc) cc ^= 0x53; /* L <-> G, LE <-> GE */ @@ -2179,7 +2493,7 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) /* Use test r,r instead of cmp r,0. */ x86Op xo = XO_TEST; if (irt_isu8(ir->t)) { - lua_assert(ir->o == IR_EQ || ir->o == IR_NE); + lj_assertA(ir->o == IR_EQ || ir->o == IR_NE, "bad usage"); xo = XO_TESTb; if (!rset_test(RSET_RANGE(RID_EAX, RID_EBX+1), left)) { if (LJ_64) { @@ -2207,6 +2521,8 @@ static void asm_comp(ASMState *as, IRIns *ir, uint32_t cc) } } +#define asm_equal(as, ir) asm_comp(as, ir) + #if LJ_32 && LJ_HASFFI /* 64 bit integer comparisons in 32 bit mode. */ static void asm_comp_int64(ASMState *as, IRIns *ir) @@ -2279,23 +2595,19 @@ static void asm_comp_int64(ASMState *as, IRIns *ir) } #endif -/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ +/* -- Split register ops -------------------------------------------------- */ -/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ +/* Hiword op of a split 32/32 or 64/64 bit op. Previous op is the loword op. */ static void asm_hiop(ASMState *as, IRIns *ir) { -#if LJ_32 && LJ_HASFFI /* HIOP is marked as a store because it needs its own DCE logic. */ int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; +#if LJ_32 && LJ_HASFFI if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ - if (usehi || uselo) { - if (irt_isfp(ir->t)) - asm_conv_fp_int64(as, ir); - else - asm_conv_int64_fp(as, ir); - } as->curins--; /* Always skip the CONV. */ + if (usehi || uselo) + asm_conv64(as, ir); return; } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ asm_comp_int64(as, ir); @@ -2305,8 +2617,10 @@ static void asm_hiop(ASMState *as, IRIns *ir) asm_fxstore(as, ir); return; } +#endif if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ switch ((ir-1)->o) { +#if LJ_32 && LJ_HASFFI case IR_ADD: as->flagmcp = NULL; as->curins--; @@ -2329,19 +2643,26 @@ static void asm_hiop(ASMState *as, IRIns *ir) asm_neg_not(as, ir-1, XOg_NEG); break; } - case IR_CALLN: - case IR_CALLXS: - if (!uselo) - ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ - break; case IR_CNEWI: /* Nothing to do here. Handled by CNEWI itself. */ break; - default: lua_assert(0); break; - } -#else - UNUSED(as); UNUSED(ir); lua_assert(0); /* Unused on x64 or without FFI. */ #endif + case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: + if (!uselo) + ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ + break; + default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; + } +} + +/* -- Profiling ----------------------------------------------------------- */ + +static void asm_prof(ASMState *as, IRIns *ir) +{ + UNUSED(ir); + asm_guardcc(as, CC_NE); + emit_i8(as, HOOK_PROFILE); + emit_rma(as, XO_GROUP3b, XOg_TEST, &J2G(as->J)->hookmask); } /* -- Stack handling ------------------------------------------------------ */ @@ -2358,14 +2679,19 @@ static void asm_stack_check(ASMState *as, BCReg topslot, emit_rmro(as, XO_MOV, r|REX_64, RID_ESP, 0); else ra_modified(as, r); - emit_gri(as, XG_ARITHi(XOg_CMP), r, (int32_t)(8*topslot)); + emit_gri(as, XG_ARITHi(XOg_CMP), r|REX_GC64, (int32_t)(8*topslot)); if (ra_hasreg(pbase) && pbase != r) - emit_rr(as, XO_ARITH(XOg_SUB), r, pbase); + emit_rr(as, XO_ARITH(XOg_SUB), r|REX_GC64, pbase); else +#if LJ_GC64 + emit_rmro(as, XO_ARITH(XOg_SUB), r|REX_64, RID_DISPATCH, + (int32_t)dispofs(as, &J2G(as->J)->jit_base)); +#else emit_rmro(as, XO_ARITH(XOg_SUB), r, RID_NONE, ptr2addr(&J2G(as->J)->jit_base)); - emit_rmro(as, XO_MOV, r, r, offsetof(lua_State, maxstack)); - emit_getgl(as, r, jit_L); +#endif + emit_rmro(as, XO_MOV, r|REX_GC64, r, offsetof(lua_State, maxstack)); + emit_getgl(as, r, cur_L); if (allow == RSET_EMPTY) /* Spill temp. register. */ emit_rmro(as, XO_MOVto, r|REX_64, RID_ESP, 0); } @@ -2374,40 +2700,79 @@ static void asm_stack_check(ASMState *as, BCReg topslot, static void asm_stack_restore(ASMState *as, SnapShot *snap) { SnapEntry *map = &as->T->snapmap[snap->mapofs]; - SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; +#if !LJ_FR2 || defined(LUA_USE_ASSERT) + SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; +#endif MSize n, nent = snap->nent; /* Store the value of all modified slots to the Lua stack. */ for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; BCReg s = snap_slot(sn); - int32_t ofs = 8*((int32_t)s-1); + int32_t ofs = 8*((int32_t)s-1-LJ_FR2); IRRef ref = snap_ref(sn); IRIns *ir = IR(ref); if ((sn & SNAP_NORESTORE)) continue; - if (irt_isnum(ir->t)) { + if ((sn & SNAP_KEYINDEX)) { + emit_movmroi(as, RID_BASE, ofs+4, LJ_KEYINDEX); + if (irref_isk(ref)) { + emit_movmroi(as, RID_BASE, ofs, ir->i); + } else { + Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); + emit_movtomro(as, src, RID_BASE, ofs); + } + } else if (irt_isnum(ir->t)) { Reg src = ra_alloc1(as, ref, RSET_FPR); emit_rmro(as, XO_MOVSDto, src, RID_BASE, ofs); } else { - lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || - (LJ_DUALNUM && irt_isinteger(ir->t))); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || + (LJ_DUALNUM && irt_isinteger(ir->t)), + "restore of IR type %d", irt_type(ir->t)); if (!irref_isk(ref)) { Reg src = ra_alloc1(as, ref, rset_exclude(RSET_GPR, RID_BASE)); +#if LJ_GC64 + if (irt_is64(ir->t)) { + /* TODO: 64 bit store + 32 bit load-modify-store is suboptimal. */ + emit_u32(as, irt_toitype(ir->t) << 15); + emit_rmro(as, XO_ARITHi, XOg_OR, RID_BASE, ofs+4); + } else if (LJ_DUALNUM && irt_isinteger(ir->t)) { + emit_movmroi(as, RID_BASE, ofs+4, LJ_TISNUM << 15); + } else { + emit_movmroi(as, RID_BASE, ofs+4, (irt_toitype(ir->t)<<15)|0x7fff); + } +#endif emit_movtomro(as, REX_64IR(ir, src), RID_BASE, ofs); +#if LJ_GC64 + } else { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); + if (tvisnil(&k)) { + emit_i32(as, -1); + emit_rmro(as, XO_MOVmi, REX_64, RID_BASE, ofs); + } else { + emit_movmroi(as, RID_BASE, ofs+4, k.u32.hi); + emit_movmroi(as, RID_BASE, ofs, k.u32.lo); + } +#else } else if (!irt_ispri(ir->t)) { emit_movmroi(as, RID_BASE, ofs, ir->i); +#endif } if ((sn & (SNAP_CONT|SNAP_FRAME))) { +#if !LJ_FR2 if (s != 0) /* Do not overwrite link to previous frame. */ emit_movmroi(as, RID_BASE, ofs+4, (int32_t)(*flinks--)); +#endif +#if !LJ_GC64 } else { if (!(LJ_64 && irt_islightud(ir->t))) emit_movmroi(as, RID_BASE, ofs+4, irt_toitype(ir->t)); +#endif } } checkmclim(as); } - lua_assert(map + nent == flinks); + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); } /* -- GC handling --------------------------------------------------------- */ @@ -2428,11 +2793,15 @@ static void asm_gc_check(ASMState *as) args[1] = ASMREF_TMP2; /* MSize steps */ asm_gencall(as, ci, args); tmp = ra_releasetmp(as, ASMREF_TMP1); +#if LJ_GC64 + emit_rmro(as, XO_LEA, tmp|REX_64, RID_DISPATCH, GG_DISP2G); +#else emit_loada(as, tmp, J2G(as->J)); +#endif emit_loadi(as, ra_releasetmp(as, ASMREF_TMP2), as->gcsteps); /* Jump around GC step if GC total < GC threshold. */ emit_sjcc(as, CC_B, l_end); - emit_opgl(as, XO_ARITH(XOg_CMP), tmp, gc.threshold); + emit_opgl(as, XO_ARITH(XOg_CMP), tmp|REX_GC64, gc.threshold); emit_getgl(as, tmp, gc.total); as->gcsteps = 0; checkmclim(as); @@ -2447,16 +2816,16 @@ static void asm_loop_fixup(ASMState *as) MCode *target = as->mcp; if (as->realign) { /* Realigned loops use short jumps. */ as->realign = NULL; /* Stop another retry. */ - lua_assert(((intptr_t)target & 15) == 0); + lj_assertA(((intptr_t)target & 15) == 0, "loop realign failed"); if (as->loopinv) { /* Inverted loop branch? */ p -= 5; p[0] = XI_JMP; - lua_assert(target - p >= -128); + lj_assertA(target - p >= -128, "loop realign failed"); p[-1] = (MCode)(target - p); /* Patch sjcc. */ if (as->loopinv == 2) p[-3] = (MCode)(target - p + 2); /* Patch opt. short jp. */ } else { - lua_assert(target - p >= -128); + lj_assertA(target - p >= -128, "loop realign failed"); p[-1] = (MCode)(int8_t)(target - p); /* Patch short jmp. */ p[-2] = XI_JMPs; } @@ -2485,6 +2854,12 @@ static void asm_loop_fixup(ASMState *as) } } +/* Fixup the tail of the loop. */ +static void asm_loop_tail_fixup(ASMState *as) +{ + UNUSED(as); /* Nothing to do. */ +} + /* -- Head of trace ------------------------------------------------------- */ /* Coalesce BASE register for a root trace. */ @@ -2497,7 +2872,7 @@ static void asm_head_root_base(ASMState *as) if (rset_test(as->modset, r) || irt_ismarked(ir->t)) ir->r = RID_INIT; /* No inheritance for modified BASE register. */ if (r != RID_BASE) - emit_rr(as, XO_MOV, r, RID_BASE); + emit_rr(as, XO_MOV, r|REX_GC64, RID_BASE); } } @@ -2513,8 +2888,9 @@ static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) if (irp->r == r) { rset_clear(allow, r); /* Mark same BASE register as coalesced. */ } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { + /* Move from coalesced parent reg. */ rset_clear(allow, irp->r); - emit_rr(as, XO_MOV, r, irp->r); /* Move from coalesced parent reg. */ + emit_rr(as, XO_MOV, r|REX_GC64, irp->r); } else { emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ } @@ -2532,7 +2908,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) MCode *target, *q; int32_t spadj = as->T->spadjust; if (spadj == 0) { - p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0); + p -= LJ_64 ? 7 : 6; } else { MCode *p1; /* Patch stack adjustment. */ @@ -2544,24 +2920,15 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk) p1 = p-9; *(int32_t *)p1 = spadj; } - if ((as->flags & JIT_F_LEA_AGU)) { #if LJ_64 - p1[-4] = 0x48; + p1[-3] = 0x48; #endif - p1[-3] = (MCode)XI_LEA; - p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP); - p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP); - } else { -#if LJ_64 - p1[-3] = 0x48; -#endif - p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); - p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); - } + p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi); + p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP); } /* Patch exit branch. */ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; - *(int32_t *)(p-4) = jmprel(p, target); + *(int32_t *)(p-4) = jmprel(as->J, p, target); p[-5] = XI_JMP; /* Drop unused mcode tail. Fill with NOPs to make the prefetcher happy. */ for (q = as->mctop-1; q >= p; q--) @@ -2588,168 +2955,11 @@ static void asm_tail_prep(ASMState *as) as->invmcp = as->mcp = p; } else { /* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */ - as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0)); + as->mcp = p - (LJ_64 ? 7 : 6); as->invmcp = NULL; } } -/* -- Instruction dispatch ------------------------------------------------ */ - -/* Assemble a single instruction. */ -static void asm_ir(ASMState *as, IRIns *ir) -{ - switch ((IROp)ir->o) { - /* Miscellaneous ops. */ - case IR_LOOP: asm_loop(as); break; - case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break; - case IR_USE: - ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break; - case IR_PHI: asm_phi(as, ir); break; - case IR_HIOP: asm_hiop(as, ir); break; - case IR_GCSTEP: asm_gcstep(as, ir); break; - - /* Guarded assertions. */ - case IR_LT: case IR_GE: case IR_LE: case IR_GT: - case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: - case IR_EQ: case IR_NE: case IR_ABC: - asm_comp(as, ir, asm_compmap[ir->o]); - break; - - case IR_RETF: asm_retf(as, ir); break; - - /* Bit ops. */ - case IR_BNOT: asm_neg_not(as, ir, XOg_NOT); break; - case IR_BSWAP: asm_bitswap(as, ir); break; - - case IR_BAND: asm_intarith(as, ir, XOg_AND); break; - case IR_BOR: asm_intarith(as, ir, XOg_OR); break; - case IR_BXOR: asm_intarith(as, ir, XOg_XOR); break; - - case IR_BSHL: asm_bitshift(as, ir, XOg_SHL); break; - case IR_BSHR: asm_bitshift(as, ir, XOg_SHR); break; - case IR_BSAR: asm_bitshift(as, ir, XOg_SAR); break; - case IR_BROL: asm_bitshift(as, ir, XOg_ROL); break; - case IR_BROR: asm_bitshift(as, ir, XOg_ROR); break; - - /* Arithmetic ops. */ - case IR_ADD: asm_add(as, ir); break; - case IR_SUB: - if (irt_isnum(ir->t)) - asm_fparith(as, ir, XO_SUBSD); - else /* Note: no need for LEA trick here. i-k is encoded as i+(-k). */ - asm_intarith(as, ir, XOg_SUB); - break; - case IR_MUL: - if (irt_isnum(ir->t)) - asm_fparith(as, ir, XO_MULSD); - else - asm_intarith(as, ir, XOg_X_IMUL); - break; - case IR_DIV: -#if LJ_64 && LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : - IRCALL_lj_carith_divu64); - else -#endif - asm_fparith(as, ir, XO_DIVSD); - break; - case IR_MOD: -#if LJ_64 && LJ_HASFFI - if (!irt_isint(ir->t)) - asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : - IRCALL_lj_carith_modu64); - else -#endif - asm_intmod(as, ir); - break; - - case IR_NEG: - if (irt_isnum(ir->t)) - asm_fparith(as, ir, XO_XORPS); - else - asm_neg_not(as, ir, XOg_NEG); - break; - case IR_ABS: asm_fparith(as, ir, XO_ANDPS); break; - - case IR_MIN: - if (irt_isnum(ir->t)) - asm_fparith(as, ir, XO_MINSD); - else - asm_min_max(as, ir, CC_G); - break; - case IR_MAX: - if (irt_isnum(ir->t)) - asm_fparith(as, ir, XO_MAXSD); - else - asm_min_max(as, ir, CC_L); - break; - - case IR_FPMATH: case IR_ATAN2: case IR_LDEXP: - asm_fpmath(as, ir); - break; - case IR_POW: -#if LJ_64 && LJ_HASFFI - if (!irt_isnum(ir->t)) - asm_arith64(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : - IRCALL_lj_carith_powu64); - else -#endif - asm_fppowi(as, ir); - break; - - /* Overflow-checking arithmetic ops. Note: don't use LEA here! */ - case IR_ADDOV: asm_intarith(as, ir, XOg_ADD); break; - case IR_SUBOV: asm_intarith(as, ir, XOg_SUB); break; - case IR_MULOV: asm_intarith(as, ir, XOg_X_IMUL); break; - - /* Memory references. */ - case IR_AREF: asm_aref(as, ir); break; - case IR_HREF: asm_href(as, ir); break; - case IR_HREFK: asm_hrefk(as, ir); break; - case IR_NEWREF: asm_newref(as, ir); break; - case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break; - case IR_FREF: asm_fref(as, ir); break; - case IR_STRREF: asm_strref(as, ir); break; - - /* Loads and stores. */ - case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: - asm_ahuvload(as, ir); - break; - case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break; - case IR_SLOAD: asm_sload(as, ir); break; - - case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break; - case IR_FSTORE: case IR_XSTORE: asm_fxstore(as, ir); break; - - /* Allocations. */ - case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break; - case IR_TNEW: asm_tnew(as, ir); break; - case IR_TDUP: asm_tdup(as, ir); break; - case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break; - - /* Write barriers. */ - case IR_TBAR: asm_tbar(as, ir); break; - case IR_OBAR: asm_obar(as, ir); break; - - /* Type conversions. */ - case IR_TOBIT: asm_tobit(as, ir); break; - case IR_CONV: asm_conv(as, ir); break; - case IR_TOSTR: asm_tostr(as, ir); break; - case IR_STRTO: asm_strto(as, ir); break; - - /* Calls. */ - case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break; - case IR_CALLXS: asm_callx(as, ir); break; - case IR_CARG: break; - - default: - setintV(&as->J->errinfo, ir->o); - lj_trace_err_info(as->J, LJ_TRERR_NYIIR); - break; - } -} - /* -- Trace setup --------------------------------------------------------- */ /* Ensure there are enough stack slots for call arguments. */ @@ -2772,6 +2982,7 @@ static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) static void asm_setup_target(ASMState *as) { asm_exitstub_setup(as, as->T->nsnap); + as->mrm.base = 0; } /* -- Trace patching ------------------------------------------------------ */ @@ -2885,18 +3096,24 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) MCode *px = exitstub_addr(J, exitno) - 6; MCode *pe = p+len-6; MCode *pgc = NULL; - uint32_t stateaddr = u32ptr(&J2G(J)->vmstate); +#if LJ_GC64 + uint32_t statei = (uint32_t)(GG_OFS(g.vmstate) - GG_OFS(dispatch)); +#else + uint32_t statei = u32ptr(&J2G(J)->vmstate); +#endif if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) - *(int32_t *)(p+len-4) = jmprel(p+len, target); + *(int32_t *)(p+len-4) = jmprel(J, p+len, target); /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ - for (; p < pe; p += asm_x86_inslen(p)) - if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) + for (; p < pe; p += asm_x86_inslen(p)) { + intptr_t ofs = LJ_GC64 ? (p[0] & 0xf0) == 0x40 : LJ_64; + if (*(uint32_t *)(p+2+ofs) == statei && p[ofs+LJ_GC64-LJ_64] == XI_MOVmi) break; - lua_assert(p < pe); + } + lj_assertJ(p < pe, "instruction length decoder failed"); for (; p < pe; p += asm_x86_inslen(p)) { if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px && p != pgc) { - *(int32_t *)(p+2) = jmprel(p+6, target); + *(int32_t *)(p+2) = jmprel(J, p+6, target); } else if (*p == XI_CALL && (void *)(p+5+*(int32_t *)(p+1)) == (void *)lj_gc_step_jit) { pgc = p+7; /* Do not patch GC check exit. */ diff --git a/src/lj_assert.c b/src/lj_assert.c new file mode 100644 index 00000000..4b713b2b --- /dev/null +++ b/src/lj_assert.c @@ -0,0 +1,28 @@ +/* +** Internal assertions. +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_assert_c +#define LUA_CORE + +#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) + +#include <stdio.h> + +#include "lj_obj.h" + +void lj_assert_fail(global_State *g, const char *file, int line, + const char *func, const char *fmt, ...) +{ + va_list argp; + va_start(argp, fmt); + fprintf(stderr, "LuaJIT ASSERT %s:%d: %s: ", file, line, func); + vfprintf(stderr, fmt, argp); + fputc('\n', stderr); + va_end(argp); + UNUSED(g); /* May be NULL. TODO: optionally dump state. */ + abort(); +} + +#endif diff --git a/src/lj_bc.h b/src/lj_bc.h index 22c43caa..02356e5b 100644 --- a/src/lj_bc.h +++ b/src/lj_bc.h @@ -89,6 +89,8 @@ _(ISFC, dst, ___, var, ___) \ _(IST, ___, ___, var, ___) \ _(ISF, ___, ___, var, ___) \ + _(ISTYPE, var, ___, lit, ___) \ + _(ISNUM, var, ___, lit, ___) \ \ /* Unary ops. */ \ _(MOV, dst, ___, var, ___) \ @@ -143,10 +145,12 @@ _(TGETV, dst, var, var, index) \ _(TGETS, dst, var, str, index) \ _(TGETB, dst, var, lit, index) \ + _(TGETR, dst, var, var, index) \ _(TSETV, var, var, var, newindex) \ _(TSETS, var, var, str, newindex) \ _(TSETB, var, var, lit, newindex) \ _(TSETM, base, ___, num, newindex) \ + _(TSETR, var, var, var, newindex) \ \ /* Calls and vararg handling. T = tail call. */ \ _(CALLM, base, lit, lit, call) \ diff --git a/src/lj_bcdump.h b/src/lj_bcdump.h index 8ca62f80..69da16e9 100644 --- a/src/lj_bcdump.h +++ b/src/lj_bcdump.h @@ -36,14 +36,15 @@ /* If you perform *any* kind of private modifications to the bytecode itself ** or to the dump format, you *must* set BCDUMP_VERSION to 0x80 or higher. */ -#define BCDUMP_VERSION 1 +#define BCDUMP_VERSION 2 /* Compatibility flags. */ #define BCDUMP_F_BE 0x01 #define BCDUMP_F_STRIP 0x02 #define BCDUMP_F_FFI 0x04 +#define BCDUMP_F_FR2 0x08 -#define BCDUMP_F_KNOWN (BCDUMP_F_FFI*2-1) +#define BCDUMP_F_KNOWN (BCDUMP_F_FR2*2-1) /* Type codes for the GC constants of a prototype. Plus length for strings. */ enum { @@ -61,6 +62,7 @@ enum { LJ_FUNC int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data, int strip); +LJ_FUNC GCproto *lj_bcread_proto(LexState *ls); LJ_FUNC GCproto *lj_bcread(LexState *ls); #endif diff --git a/src/lj_bcread.c b/src/lj_bcread.c index 4a925f1c..2ce05707 100644 --- a/src/lj_bcread.c +++ b/src/lj_bcread.c @@ -9,6 +9,7 @@ #include "lj_obj.h" #include "lj_gc.h" #include "lj_err.h" +#include "lj_buf.h" #include "lj_str.h" #include "lj_tab.h" #include "lj_bc.h" @@ -20,6 +21,7 @@ #include "lj_lex.h" #include "lj_bcdump.h" #include "lj_state.h" +#include "lj_strfmt.h" /* Reuse some lexer fields for our own purposes. */ #define bcread_flags(ls) ls->level @@ -38,85 +40,74 @@ static LJ_NOINLINE void bcread_error(LexState *ls, ErrMsg em) const char *name = ls->chunkarg; if (*name == BCDUMP_HEAD1) name = "(binary)"; else if (*name == '@' || *name == '=') name++; - lj_str_pushf(L, "%s: %s", name, err2msg(em)); + lj_strfmt_pushf(L, "%s: %s", name, err2msg(em)); lj_err_throw(L, LUA_ERRSYNTAX); } -/* Resize input buffer. */ -static void bcread_resize(LexState *ls, MSize len) -{ - if (ls->sb.sz < len) { - MSize sz = ls->sb.sz * 2; - while (len > sz) sz = sz * 2; - lj_str_resizebuf(ls->L, &ls->sb, sz); - /* Caveat: this may change ls->sb.buf which may affect ls->p. */ - } -} - -/* Refill buffer if needed. */ +/* Refill buffer. */ static LJ_NOINLINE void bcread_fill(LexState *ls, MSize len, int need) { - lua_assert(len != 0); - if (len > LJ_MAX_MEM || ls->current < 0) + lj_assertLS(len != 0, "empty refill"); + if (len > LJ_MAX_BUF || ls->c < 0) bcread_error(ls, LJ_ERR_BCBAD); do { const char *buf; - size_t size; - if (ls->n) { /* Copy remainder to buffer. */ - if (ls->sb.n) { /* Move down in buffer. */ - lua_assert(ls->p + ls->n == ls->sb.buf + ls->sb.n); - if (ls->n != ls->sb.n) - memmove(ls->sb.buf, ls->p, ls->n); + size_t sz; + char *p = ls->sb.b; + MSize n = (MSize)(ls->pe - ls->p); + if (n) { /* Copy remainder to buffer. */ + if (sbuflen(&ls->sb)) { /* Move down in buffer. */ + lj_assertLS(ls->pe == ls->sb.w, "bad buffer pointer"); + if (ls->p != p) memmove(p, ls->p, n); } else { /* Copy from buffer provided by reader. */ - bcread_resize(ls, len); - memcpy(ls->sb.buf, ls->p, ls->n); + p = lj_buf_need(&ls->sb, len); + memcpy(p, ls->p, n); } - ls->p = ls->sb.buf; + ls->p = p; + ls->pe = p + n; } - ls->sb.n = ls->n; - buf = ls->rfunc(ls->L, ls->rdata, &size); /* Get more data from reader. */ - if (buf == NULL || size == 0) { /* EOF? */ + ls->sb.w = p + n; + buf = ls->rfunc(ls->L, ls->rdata, &sz); /* Get more data from reader. */ + if (buf == NULL || sz == 0) { /* EOF? */ if (need) bcread_error(ls, LJ_ERR_BCBAD); - ls->current = -1; /* Only bad if we get called again. */ + ls->c = -1; /* Only bad if we get called again. */ break; } - if (size >= LJ_MAX_MEM - ls->sb.n) lj_err_mem(ls->L); - if (ls->sb.n) { /* Append to buffer. */ - MSize n = ls->sb.n + (MSize)size; - bcread_resize(ls, n < len ? len : n); - memcpy(ls->sb.buf + ls->sb.n, buf, size); - ls->n = ls->sb.n = n; - ls->p = ls->sb.buf; + if (sz >= LJ_MAX_BUF - n) lj_err_mem(ls->L); + if (n) { /* Append to buffer. */ + n += (MSize)sz; + p = lj_buf_need(&ls->sb, n < len ? len : n); + memcpy(ls->sb.w, buf, sz); + ls->sb.w = p + n; + ls->p = p; + ls->pe = p + n; } else { /* Return buffer provided by reader. */ - ls->n = (MSize)size; ls->p = buf; + ls->pe = buf + sz; } - } while (ls->n < len); + } while ((MSize)(ls->pe - ls->p) < len); } /* Need a certain number of bytes. */ static LJ_AINLINE void bcread_need(LexState *ls, MSize len) { - if (LJ_UNLIKELY(ls->n < len)) + if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len)) bcread_fill(ls, len, 1); } /* Want to read up to a certain number of bytes, but may need less. */ static LJ_AINLINE void bcread_want(LexState *ls, MSize len) { - if (LJ_UNLIKELY(ls->n < len)) + if (LJ_UNLIKELY((MSize)(ls->pe - ls->p) < len)) bcread_fill(ls, len, 0); } -#define bcread_dec(ls) check_exp(ls->n > 0, ls->n--) -#define bcread_consume(ls, len) check_exp(ls->n >= (len), ls->n -= (len)) - /* Return memory block from buffer. */ -static uint8_t *bcread_mem(LexState *ls, MSize len) +static LJ_AINLINE uint8_t *bcread_mem(LexState *ls, MSize len) { uint8_t *p = (uint8_t *)ls->p; - bcread_consume(ls, len); - ls->p = (char *)p + len; + ls->p += len; + lj_assertLS(ls->p <= ls->pe, "buffer read overflow"); return p; } @@ -129,25 +120,15 @@ static void bcread_block(LexState *ls, void *q, MSize len) /* Read byte from buffer. */ static LJ_AINLINE uint32_t bcread_byte(LexState *ls) { - bcread_dec(ls); + lj_assertLS(ls->p < ls->pe, "buffer read overflow"); return (uint32_t)(uint8_t)*ls->p++; } /* Read ULEB128 value from buffer. */ -static uint32_t bcread_uleb128(LexState *ls) +static LJ_AINLINE uint32_t bcread_uleb128(LexState *ls) { - const uint8_t *p = (const uint8_t *)ls->p; - uint32_t v = *p++; - if (LJ_UNLIKELY(v >= 0x80)) { - int sh = 0; - v &= 0x7f; - do { - v |= ((*p & 0x7f) << (sh += 7)); - bcread_dec(ls); - } while (*p++ >= 0x80); - } - bcread_dec(ls); - ls->p = (char *)p; + uint32_t v = lj_buf_ruleb128(&ls->p); + lj_assertLS(ls->p <= ls->pe, "buffer read overflow"); return v; } @@ -161,11 +142,10 @@ static uint32_t bcread_uleb128_33(LexState *ls) v &= 0x3f; do { v |= ((*p & 0x7f) << (sh += 7)); - bcread_dec(ls); } while (*p++ >= 0x80); } - bcread_dec(ls); ls->p = (char *)p; + lj_assertLS(ls->p <= ls->pe, "buffer read overflow"); return v; } @@ -212,8 +192,8 @@ static void bcread_ktabk(LexState *ls, TValue *o) o->u32.lo = bcread_uleb128(ls); o->u32.hi = bcread_uleb128(ls); } else { - lua_assert(tp <= BCDUMP_KTAB_TRUE); - setitype(o, ~tp); + lj_assertLS(tp <= BCDUMP_KTAB_TRUE, "bad constant type %d", tp); + setpriV(o, ~tp); } } @@ -234,7 +214,7 @@ static GCtab *bcread_ktab(LexState *ls) for (i = 0; i < nhash; i++) { TValue key; bcread_ktabk(ls, &key); - lua_assert(!tvisnil(&key)); + lj_assertLS(!tvisnil(&key), "nil key"); bcread_ktabk(ls, lj_tab_set(ls->L, t, &key)); } } @@ -271,7 +251,7 @@ static void bcread_kgc(LexState *ls, GCproto *pt, MSize sizekgc) #endif } else { lua_State *L = ls->L; - lua_assert(tp == BCDUMP_KGC_CHILD); + lj_assertLS(tp == BCDUMP_KGC_CHILD, "bad constant type %d", tp); if (L->top <= bcread_oldtop(L, ls)) /* Stack underflow? */ bcread_error(ls, LJ_ERR_BCBAD); L->top--; @@ -327,25 +307,13 @@ static void bcread_uv(LexState *ls, GCproto *pt, MSize sizeuv) } /* Read a prototype. */ -static GCproto *bcread_proto(LexState *ls) +GCproto *lj_bcread_proto(LexState *ls) { GCproto *pt; MSize framesize, numparams, flags, sizeuv, sizekgc, sizekn, sizebc, sizept; MSize ofsk, ofsuv, ofsdbg; MSize sizedbg = 0; BCLine firstline = 0, numline = 0; - MSize len, startn; - - /* Read length. */ - if (ls->n > 0 && ls->p[0] == 0) { /* Shortcut EOF. */ - ls->n--; ls->p++; - return NULL; - } - bcread_want(ls, 5); - len = bcread_uleb128(ls); - if (!len) return NULL; /* EOF */ - bcread_need(ls, len); - startn = ls->n; /* Read prototype header. */ flags = bcread_byte(ls); @@ -414,9 +382,6 @@ static GCproto *bcread_proto(LexState *ls) setmref(pt->uvinfo, NULL); setmref(pt->varinfo, NULL); } - - if (len != startn - ls->n) - bcread_error(ls, LJ_ERR_BCBAD); return pt; } @@ -430,14 +395,11 @@ static int bcread_header(LexState *ls) bcread_byte(ls) != BCDUMP_VERSION) return 0; bcread_flags(ls) = flags = bcread_uleb128(ls); if ((flags & ~(BCDUMP_F_KNOWN)) != 0) return 0; + if ((flags & BCDUMP_F_FR2) != LJ_FR2*BCDUMP_F_FR2) return 0; if ((flags & BCDUMP_F_FFI)) { #if LJ_HASFFI lua_State *L = ls->L; - if (!ctype_ctsG(G(L))) { - ptrdiff_t oldtop = savestack(L, L->top); - luaopen_ffi(L); /* Load FFI library on-demand. */ - L->top = restorestack(L, oldtop); - } + ctype_loadffi(L); #else return 0; #endif @@ -456,19 +418,33 @@ static int bcread_header(LexState *ls) GCproto *lj_bcread(LexState *ls) { lua_State *L = ls->L; - lua_assert(ls->current == BCDUMP_HEAD1); + lj_assertLS(ls->c == BCDUMP_HEAD1, "bad bytecode header"); bcread_savetop(L, ls, L->top); - lj_str_resetbuf(&ls->sb); + lj_buf_reset(&ls->sb); /* Check for a valid bytecode dump header. */ if (!bcread_header(ls)) bcread_error(ls, LJ_ERR_BCFMT); for (;;) { /* Process all prototypes in the bytecode dump. */ - GCproto *pt = bcread_proto(ls); - if (!pt) break; + GCproto *pt; + MSize len; + const char *startp; + /* Read length. */ + if (ls->p < ls->pe && ls->p[0] == 0) { /* Shortcut EOF. */ + ls->p++; + break; + } + bcread_want(ls, 5); + len = bcread_uleb128(ls); + if (!len) break; /* EOF */ + bcread_need(ls, len); + startp = ls->p; + pt = lj_bcread_proto(ls); + if (ls->p != startp + len) + bcread_error(ls, LJ_ERR_BCBAD); setprotoV(L, L->top, pt); incr_top(L); } - if ((ls->n && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls)) + if ((ls->pe != ls->p && !ls->endmark) || L->top-1 != bcread_oldtop(L, ls)) bcread_error(ls, LJ_ERR_BCBAD); /* Pop off last prototype. */ L->top--; diff --git a/src/lj_bcwrite.c b/src/lj_bcwrite.c index d836497e..2c70ff47 100644 --- a/src/lj_bcwrite.c +++ b/src/lj_bcwrite.c @@ -8,7 +8,7 @@ #include "lj_obj.h" #include "lj_gc.h" -#include "lj_str.h" +#include "lj_buf.h" #include "lj_bc.h" #if LJ_HASFFI #include "lj_ctype.h" @@ -17,99 +17,67 @@ #include "lj_dispatch.h" #include "lj_jit.h" #endif +#include "lj_strfmt.h" #include "lj_bcdump.h" #include "lj_vm.h" /* Context for bytecode writer. */ typedef struct BCWriteCtx { SBuf sb; /* Output buffer. */ - lua_State *L; /* Lua state. */ GCproto *pt; /* Root prototype. */ lua_Writer wfunc; /* Writer callback. */ void *wdata; /* Writer callback data. */ int strip; /* Strip debug info. */ int status; /* Status from writer callback. */ +#ifdef LUA_USE_ASSERT + global_State *g; +#endif } BCWriteCtx; -/* -- Output buffer handling ---------------------------------------------- */ - -/* Resize buffer if needed. */ -static LJ_NOINLINE void bcwrite_resize(BCWriteCtx *ctx, MSize len) -{ - MSize sz = ctx->sb.sz * 2; - while (ctx->sb.n + len > sz) sz = sz * 2; - lj_str_resizebuf(ctx->L, &ctx->sb, sz); -} - -/* Need a certain amount of buffer space. */ -static LJ_AINLINE void bcwrite_need(BCWriteCtx *ctx, MSize len) -{ - if (LJ_UNLIKELY(ctx->sb.n + len > ctx->sb.sz)) - bcwrite_resize(ctx, len); -} - -/* Add memory block to buffer. */ -static void bcwrite_block(BCWriteCtx *ctx, const void *p, MSize len) -{ - uint8_t *q = (uint8_t *)(ctx->sb.buf + ctx->sb.n); - MSize i; - ctx->sb.n += len; - for (i = 0; i < len; i++) q[i] = ((uint8_t *)p)[i]; -} - -/* Add byte to buffer. */ -static LJ_AINLINE void bcwrite_byte(BCWriteCtx *ctx, uint8_t b) -{ - ctx->sb.buf[ctx->sb.n++] = b; -} - -/* Add ULEB128 value to buffer. */ -static void bcwrite_uleb128(BCWriteCtx *ctx, uint32_t v) -{ - MSize n = ctx->sb.n; - uint8_t *p = (uint8_t *)ctx->sb.buf; - for (; v >= 0x80; v >>= 7) - p[n++] = (uint8_t)((v & 0x7f) | 0x80); - p[n++] = (uint8_t)v; - ctx->sb.n = n; -} +#ifdef LUA_USE_ASSERT +#define lj_assertBCW(c, ...) lj_assertG_(ctx->g, (c), __VA_ARGS__) +#else +#define lj_assertBCW(c, ...) ((void)ctx) +#endif /* -- Bytecode writer ----------------------------------------------------- */ /* Write a single constant key/value of a template table. */ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow) { - bcwrite_need(ctx, 1+10); + char *p = lj_buf_more(&ctx->sb, 1+10); if (tvisstr(o)) { const GCstr *str = strV(o); MSize len = str->len; - bcwrite_need(ctx, 5+len); - bcwrite_uleb128(ctx, BCDUMP_KTAB_STR+len); - bcwrite_block(ctx, strdata(str), len); + p = lj_buf_more(&ctx->sb, 5+len); + p = lj_strfmt_wuleb128(p, BCDUMP_KTAB_STR+len); + p = lj_buf_wmem(p, strdata(str), len); } else if (tvisint(o)) { - bcwrite_byte(ctx, BCDUMP_KTAB_INT); - bcwrite_uleb128(ctx, intV(o)); + *p++ = BCDUMP_KTAB_INT; + p = lj_strfmt_wuleb128(p, intV(o)); } else if (tvisnum(o)) { if (!LJ_DUALNUM && narrow) { /* Narrow number constants to integers. */ lua_Number num = numV(o); int32_t k = lj_num2int(num); if (num == (lua_Number)k) { /* -0 is never a constant. */ - bcwrite_byte(ctx, BCDUMP_KTAB_INT); - bcwrite_uleb128(ctx, k); + *p++ = BCDUMP_KTAB_INT; + p = lj_strfmt_wuleb128(p, k); + ctx->sb.w = p; return; } } - bcwrite_byte(ctx, BCDUMP_KTAB_NUM); - bcwrite_uleb128(ctx, o->u32.lo); - bcwrite_uleb128(ctx, o->u32.hi); + *p++ = BCDUMP_KTAB_NUM; + p = lj_strfmt_wuleb128(p, o->u32.lo); + p = lj_strfmt_wuleb128(p, o->u32.hi); } else { - lua_assert(tvispri(o)); - bcwrite_byte(ctx, BCDUMP_KTAB_NIL+~itype(o)); + lj_assertBCW(tvispri(o), "unhandled type %d", itype(o)); + *p++ = BCDUMP_KTAB_NIL+~itype(o); } + ctx->sb.w = p; } /* Write a template table. */ -static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t) +static void bcwrite_ktab(BCWriteCtx *ctx, char *p, const GCtab *t) { MSize narray = 0, nhash = 0; if (t->asize > 0) { /* Determine max. length of array part. */ @@ -127,8 +95,9 @@ static void bcwrite_ktab(BCWriteCtx *ctx, const GCtab *t) nhash += !tvisnil(&node[i].val); } /* Write number of array slots and hash slots. */ - bcwrite_uleb128(ctx, narray); - bcwrite_uleb128(ctx, nhash); + p = lj_strfmt_wuleb128(p, narray); + p = lj_strfmt_wuleb128(p, nhash); + ctx->sb.w = p; if (narray) { /* Write array entries (may contain nil). */ MSize i; TValue *o = tvref(t->array); @@ -155,12 +124,13 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) for (i = 0; i < sizekgc; i++, kr++) { GCobj *o = gcref(*kr); MSize tp, need = 1; + char *p; /* Determine constant type and needed size. */ if (o->gch.gct == ~LJ_TSTR) { tp = BCDUMP_KGC_STR + gco2str(o)->len; need = 5+gco2str(o)->len; } else if (o->gch.gct == ~LJ_TPROTO) { - lua_assert((pt->flags & PROTO_CHILD)); + lj_assertBCW((pt->flags & PROTO_CHILD), "prototype has unexpected child"); tp = BCDUMP_KGC_CHILD; #if LJ_HASFFI } else if (o->gch.gct == ~LJ_TCDATA) { @@ -171,34 +141,38 @@ static void bcwrite_kgc(BCWriteCtx *ctx, GCproto *pt) } else if (id == CTID_UINT64) { tp = BCDUMP_KGC_U64; } else { - lua_assert(id == CTID_COMPLEX_DOUBLE); + lj_assertBCW(id == CTID_COMPLEX_DOUBLE, + "bad cdata constant CTID %d", id); tp = BCDUMP_KGC_COMPLEX; } #endif } else { - lua_assert(o->gch.gct == ~LJ_TTAB); + lj_assertBCW(o->gch.gct == ~LJ_TTAB, + "bad constant GC type %d", o->gch.gct); tp = BCDUMP_KGC_TAB; need = 1+2*5; } /* Write constant type. */ - bcwrite_need(ctx, need); - bcwrite_uleb128(ctx, tp); + p = lj_buf_more(&ctx->sb, need); + p = lj_strfmt_wuleb128(p, tp); /* Write constant data (if any). */ if (tp >= BCDUMP_KGC_STR) { - bcwrite_block(ctx, strdata(gco2str(o)), gco2str(o)->len); + p = lj_buf_wmem(p, strdata(gco2str(o)), gco2str(o)->len); } else if (tp == BCDUMP_KGC_TAB) { - bcwrite_ktab(ctx, gco2tab(o)); + bcwrite_ktab(ctx, p, gco2tab(o)); + continue; #if LJ_HASFFI } else if (tp != BCDUMP_KGC_CHILD) { - cTValue *p = (TValue *)cdataptr(gco2cd(o)); - bcwrite_uleb128(ctx, p[0].u32.lo); - bcwrite_uleb128(ctx, p[0].u32.hi); + cTValue *q = (TValue *)cdataptr(gco2cd(o)); + p = lj_strfmt_wuleb128(p, q[0].u32.lo); + p = lj_strfmt_wuleb128(p, q[0].u32.hi); if (tp == BCDUMP_KGC_COMPLEX) { - bcwrite_uleb128(ctx, p[1].u32.lo); - bcwrite_uleb128(ctx, p[1].u32.hi); + p = lj_strfmt_wuleb128(p, q[1].u32.lo); + p = lj_strfmt_wuleb128(p, q[1].u32.hi); } #endif } + ctx->sb.w = p; } } @@ -207,7 +181,7 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt) { MSize i, sizekn = pt->sizekn; cTValue *o = mref(pt->k, TValue); - bcwrite_need(ctx, 10*sizekn); + char *p = lj_buf_more(&ctx->sb, 10*sizekn); for (i = 0; i < sizekn; i++, o++) { int32_t k; if (tvisint(o)) { @@ -220,55 +194,55 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt) k = lj_num2int(num); if (num == (lua_Number)k) { /* -0 is never a constant. */ save_int: - bcwrite_uleb128(ctx, 2*(uint32_t)k | ((uint32_t)k & 0x80000000u)); - if (k < 0) { - char *p = &ctx->sb.buf[ctx->sb.n-1]; - *p = (*p & 7) | ((k>>27) & 0x18); - } + p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u)); + if (k < 0) + p[-1] = (p[-1] & 7) | ((k>>27) & 0x18); continue; } } - bcwrite_uleb128(ctx, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u))); - if (o->u32.lo >= 0x80000000u) { - char *p = &ctx->sb.buf[ctx->sb.n-1]; - *p = (*p & 7) | ((o->u32.lo>>27) & 0x18); - } - bcwrite_uleb128(ctx, o->u32.hi); + p = lj_strfmt_wuleb128(p, 1+(2*o->u32.lo | (o->u32.lo & 0x80000000u))); + if (o->u32.lo >= 0x80000000u) + p[-1] = (p[-1] & 7) | ((o->u32.lo>>27) & 0x18); + p = lj_strfmt_wuleb128(p, o->u32.hi); } } + ctx->sb.w = p; } /* Write bytecode instructions. */ -static void bcwrite_bytecode(BCWriteCtx *ctx, GCproto *pt) +static char *bcwrite_bytecode(BCWriteCtx *ctx, char *p, GCproto *pt) { MSize nbc = pt->sizebc-1; /* Omit the [JI]FUNC* header. */ #if LJ_HASJIT - uint8_t *p = (uint8_t *)&ctx->sb.buf[ctx->sb.n]; + uint8_t *q = (uint8_t *)p; #endif - bcwrite_block(ctx, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns)); + p = lj_buf_wmem(p, proto_bc(pt)+1, nbc*(MSize)sizeof(BCIns)); + UNUSED(ctx); #if LJ_HASJIT /* Unpatch modified bytecode containing ILOOP/JLOOP etc. */ if ((pt->flags & PROTO_ILOOP) || pt->trace) { - jit_State *J = L2J(ctx->L); + jit_State *J = L2J(sbufL(&ctx->sb)); MSize i; - for (i = 0; i < nbc; i++, p += sizeof(BCIns)) { - BCOp op = (BCOp)p[LJ_ENDIAN_SELECT(0, 3)]; + for (i = 0; i < nbc; i++, q += sizeof(BCIns)) { + BCOp op = (BCOp)q[LJ_ENDIAN_SELECT(0, 3)]; if (op == BC_IFORL || op == BC_IITERL || op == BC_ILOOP || op == BC_JFORI) { - p[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL); + q[LJ_ENDIAN_SELECT(0, 3)] = (uint8_t)(op-BC_IFORL+BC_FORL); } else if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { - BCReg rd = p[LJ_ENDIAN_SELECT(2, 1)] + (p[LJ_ENDIAN_SELECT(3, 0)] << 8); - memcpy(p, &traceref(J, rd)->startins, 4); + BCReg rd = q[LJ_ENDIAN_SELECT(2, 1)] + (q[LJ_ENDIAN_SELECT(3, 0)] << 8); + memcpy(q, &traceref(J, rd)->startins, 4); } } } #endif + return p; } /* Write prototype. */ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) { MSize sizedbg = 0; + char *p; /* Recursively write children of prototype. */ if ((pt->flags & PROTO_CHILD)) { @@ -282,31 +256,32 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) } /* Start writing the prototype info to a buffer. */ - lj_str_resetbuf(&ctx->sb); - ctx->sb.n = 5; /* Leave room for final size. */ - bcwrite_need(ctx, 4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2); + p = lj_buf_need(&ctx->sb, + 5+4+6*5+(pt->sizebc-1)*(MSize)sizeof(BCIns)+pt->sizeuv*2); + p += 5; /* Leave room for final size. */ /* Write prototype header. */ - bcwrite_byte(ctx, (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI))); - bcwrite_byte(ctx, pt->numparams); - bcwrite_byte(ctx, pt->framesize); - bcwrite_byte(ctx, pt->sizeuv); - bcwrite_uleb128(ctx, pt->sizekgc); - bcwrite_uleb128(ctx, pt->sizekn); - bcwrite_uleb128(ctx, pt->sizebc-1); + *p++ = (pt->flags & (PROTO_CHILD|PROTO_VARARG|PROTO_FFI)); + *p++ = pt->numparams; + *p++ = pt->framesize; + *p++ = pt->sizeuv; + p = lj_strfmt_wuleb128(p, pt->sizekgc); + p = lj_strfmt_wuleb128(p, pt->sizekn); + p = lj_strfmt_wuleb128(p, pt->sizebc-1); if (!ctx->strip) { if (proto_lineinfo(pt)) sizedbg = pt->sizept - (MSize)((char *)proto_lineinfo(pt) - (char *)pt); - bcwrite_uleb128(ctx, sizedbg); + p = lj_strfmt_wuleb128(p, sizedbg); if (sizedbg) { - bcwrite_uleb128(ctx, pt->firstline); - bcwrite_uleb128(ctx, pt->numline); + p = lj_strfmt_wuleb128(p, pt->firstline); + p = lj_strfmt_wuleb128(p, pt->numline); } } /* Write bytecode instructions and upvalue refs. */ - bcwrite_bytecode(ctx, pt); - bcwrite_block(ctx, proto_uv(pt), pt->sizeuv*2); + p = bcwrite_bytecode(ctx, p, pt); + p = lj_buf_wmem(p, proto_uv(pt), pt->sizeuv*2); + ctx->sb.w = p; /* Write constants. */ bcwrite_kgc(ctx, pt); @@ -314,18 +289,19 @@ static void bcwrite_proto(BCWriteCtx *ctx, GCproto *pt) /* Write debug info, if not stripped. */ if (sizedbg) { - bcwrite_need(ctx, sizedbg); - bcwrite_block(ctx, proto_lineinfo(pt), sizedbg); + p = lj_buf_more(&ctx->sb, sizedbg); + p = lj_buf_wmem(p, proto_lineinfo(pt), sizedbg); + ctx->sb.w = p; } /* Pass buffer to writer function. */ if (ctx->status == 0) { - MSize n = ctx->sb.n - 5; + MSize n = sbuflen(&ctx->sb) - 5; MSize nn = (lj_fls(n)+8)*9 >> 6; - ctx->sb.n = 5 - nn; - bcwrite_uleb128(ctx, n); /* Fill in final size. */ - lua_assert(ctx->sb.n == 5); - ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf+5-nn, nn+n, ctx->wdata); + char *q = ctx->sb.b + (5 - nn); + p = lj_strfmt_wuleb128(q, n); /* Fill in final size. */ + lj_assertBCW(p == ctx->sb.b + 5, "bad ULEB128 write"); + ctx->status = ctx->wfunc(sbufL(&ctx->sb), q, nn+n, ctx->wdata); } } @@ -335,20 +311,21 @@ static void bcwrite_header(BCWriteCtx *ctx) GCstr *chunkname = proto_chunkname(ctx->pt); const char *name = strdata(chunkname); MSize len = chunkname->len; - lj_str_resetbuf(&ctx->sb); - bcwrite_need(ctx, 5+5+len); - bcwrite_byte(ctx, BCDUMP_HEAD1); - bcwrite_byte(ctx, BCDUMP_HEAD2); - bcwrite_byte(ctx, BCDUMP_HEAD3); - bcwrite_byte(ctx, BCDUMP_VERSION); - bcwrite_byte(ctx, (ctx->strip ? BCDUMP_F_STRIP : 0) + - (LJ_BE ? BCDUMP_F_BE : 0) + - ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0)); + char *p = lj_buf_need(&ctx->sb, 5+5+len); + *p++ = BCDUMP_HEAD1; + *p++ = BCDUMP_HEAD2; + *p++ = BCDUMP_HEAD3; + *p++ = BCDUMP_VERSION; + *p++ = (ctx->strip ? BCDUMP_F_STRIP : 0) + + LJ_BE*BCDUMP_F_BE + + ((ctx->pt->flags & PROTO_FFI) ? BCDUMP_F_FFI : 0) + + LJ_FR2*BCDUMP_F_FR2; if (!ctx->strip) { - bcwrite_uleb128(ctx, len); - bcwrite_block(ctx, name, len); + p = lj_strfmt_wuleb128(p, len); + p = lj_buf_wmem(p, name, len); } - ctx->status = ctx->wfunc(ctx->L, ctx->sb.buf, ctx->sb.n, ctx->wdata); + ctx->status = ctx->wfunc(sbufL(&ctx->sb), ctx->sb.b, + (MSize)(p - ctx->sb.b), ctx->wdata); } /* Write footer of bytecode dump. */ @@ -356,7 +333,7 @@ static void bcwrite_footer(BCWriteCtx *ctx) { if (ctx->status == 0) { uint8_t zero = 0; - ctx->status = ctx->wfunc(ctx->L, &zero, 1, ctx->wdata); + ctx->status = ctx->wfunc(sbufL(&ctx->sb), &zero, 1, ctx->wdata); } } @@ -364,8 +341,8 @@ static void bcwrite_footer(BCWriteCtx *ctx) static TValue *cpwriter(lua_State *L, lua_CFunction dummy, void *ud) { BCWriteCtx *ctx = (BCWriteCtx *)ud; - UNUSED(dummy); - lj_str_resizebuf(L, &ctx->sb, 1024); /* Avoids resize for most prototypes. */ + UNUSED(L); UNUSED(dummy); + lj_buf_need(&ctx->sb, 1024); /* Avoids resize for most prototypes. */ bcwrite_header(ctx); bcwrite_proto(ctx, ctx->pt); bcwrite_footer(ctx); @@ -378,16 +355,18 @@ int lj_bcwrite(lua_State *L, GCproto *pt, lua_Writer writer, void *data, { BCWriteCtx ctx; int status; - ctx.L = L; ctx.pt = pt; ctx.wfunc = writer; ctx.wdata = data; ctx.strip = strip; ctx.status = 0; - lj_str_initbuf(&ctx.sb); +#ifdef LUA_USE_ASSERT + ctx.g = G(L); +#endif + lj_buf_init(L, &ctx.sb); status = lj_vm_cpcall(L, NULL, &ctx, cpwriter); if (status == 0) status = ctx.status; - lj_str_freebuf(G(ctx.L), &ctx.sb); + lj_buf_free(G(sbufL(&ctx.sb)), &ctx.sb); return status; } diff --git a/src/lj_buf.c b/src/lj_buf.c new file mode 100644 index 00000000..cf268af2 --- /dev/null +++ b/src/lj_buf.c @@ -0,0 +1,305 @@ +/* +** Buffer handling. +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_buf_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_gc.h" +#include "lj_err.h" +#include "lj_buf.h" +#include "lj_str.h" +#include "lj_tab.h" +#include "lj_strfmt.h" + +/* -- Buffer management --------------------------------------------------- */ + +static void buf_grow(SBuf *sb, MSize sz) +{ + MSize osz = sbufsz(sb), len = sbuflen(sb), nsz = osz; + char *b; + GCSize flag; + if (nsz < LJ_MIN_SBUF) nsz = LJ_MIN_SBUF; + while (nsz < sz) nsz += nsz; + flag = sbufflag(sb); + if ((flag & SBUF_FLAG_COW)) { /* Copy-on-write semantics. */ + lj_assertG_(G(sbufL(sb)), sb->w == sb->e, "bad SBuf COW"); + b = (char *)lj_mem_new(sbufL(sb), nsz); + setsbufflag(sb, flag & ~(GCSize)SBUF_FLAG_COW); + setgcrefnull(sbufX(sb)->cowref); + memcpy(b, sb->b, osz); + } else { + b = (char *)lj_mem_realloc(sbufL(sb), sb->b, osz, nsz); + } + if ((flag & SBUF_FLAG_EXT)) { + sbufX(sb)->r = sbufX(sb)->r - sb->b + b; /* Adjust read pointer, too. */ + } + /* Adjust buffer pointers. */ + sb->b = b; + sb->w = b + len; + sb->e = b + nsz; + if ((flag & SBUF_FLAG_BORROW)) { /* Adjust borrowed buffer pointers. */ + SBuf *bsb = mref(sbufX(sb)->bsb, SBuf); + bsb->b = b; + bsb->w = b + len; + bsb->e = b + nsz; + } +} + +LJ_NOINLINE char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz) +{ + lj_assertG_(G(sbufL(sb)), sz > sbufsz(sb), "SBuf overflow"); + if (LJ_UNLIKELY(sz > LJ_MAX_BUF)) + lj_err_mem(sbufL(sb)); + buf_grow(sb, sz); + return sb->b; +} + +LJ_NOINLINE char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz) +{ + if (sbufisext(sb)) { + SBufExt *sbx = (SBufExt *)sb; + MSize len = sbufxlen(sbx); + if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF)) + lj_err_mem(sbufL(sbx)); + if (len + sz > sbufsz(sbx)) { /* Must grow. */ + buf_grow((SBuf *)sbx, len + sz); + } else if (sbufiscow(sb) || sbufxslack(sbx) < (sbufsz(sbx) >> 3)) { + /* Also grow to avoid excessive compactions, if slack < size/8. */ + buf_grow((SBuf *)sbx, sbuflen(sbx) + sz); /* Not sbufxlen! */ + return sbx->w; + } + if (sbx->r != sbx->b) { /* Compact by moving down. */ + memmove(sbx->b, sbx->r, len); + sbx->r = sbx->b; + sbx->w = sbx->b + len; + lj_assertG_(G(sbufL(sbx)), len + sz <= sbufsz(sbx), "bad SBuf compact"); + } + } else { + MSize len = sbuflen(sb); + lj_assertG_(G(sbufL(sb)), sz > sbufleft(sb), "SBuf overflow"); + if (LJ_UNLIKELY(sz > LJ_MAX_BUF || len + sz > LJ_MAX_BUF)) + lj_err_mem(sbufL(sb)); + buf_grow(sb, len + sz); + } + return sb->w; +} + +void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb) +{ + char *b = sb->b; + MSize osz = (MSize)(sb->e - b); + if (osz > 2*LJ_MIN_SBUF) { + MSize n = (MSize)(sb->w - b); + b = lj_mem_realloc(L, b, osz, (osz >> 1)); + sb->b = b; + sb->w = b + n; + sb->e = b + (osz >> 1); + } + lj_assertG_(G(sbufL(sb)), !sbufisext(sb), "YAGNI shrink SBufExt"); +} + +char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz) +{ + SBuf *sb = &G(L)->tmpbuf; + setsbufL(sb, L); + return lj_buf_need(sb, sz); +} + +#if LJ_HASBUFFER && LJ_HASJIT +void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *ref) +{ + lua_State *L = sbufL(sbx); + lj_bufx_free(L, sbx); + lj_bufx_set_cow(L, sbx, p, len); + setgcref(sbx->cowref, ref); + lj_gc_objbarrier(L, (GCudata *)sbx - 1, ref); +} + +#if LJ_HASFFI +MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz) +{ + lj_buf_more((SBuf *)sbx, sz); + return sbufleft(sbx); +} +#endif +#endif + +/* -- Low-level buffer put operations ------------------------------------- */ + +SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len) +{ + char *w = lj_buf_more(sb, len); + w = lj_buf_wmem(w, q, len); + sb->w = w; + return sb; +} + +#if LJ_HASJIT || LJ_HASFFI +static LJ_NOINLINE SBuf * LJ_FASTCALL lj_buf_putchar2(SBuf *sb, int c) +{ + char *w = lj_buf_more2(sb, 1); + *w++ = (char)c; + sb->w = w; + return sb; +} + +SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c) +{ + char *w = sb->w; + if (LJ_LIKELY(w < sb->e)) { + *w++ = (char)c; + sb->w = w; + return sb; + } + return lj_buf_putchar2(sb, c); +} +#endif + +SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s) +{ + MSize len = s->len; + char *w = lj_buf_more(sb, len); + w = lj_buf_wmem(w, strdata(s), len); + sb->w = w; + return sb; +} + +/* -- High-level buffer put operations ------------------------------------ */ + +SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s) +{ + MSize len = s->len; + char *w = lj_buf_more(sb, len), *e = w+len; + const char *q = strdata(s)+len-1; + while (w < e) + *w++ = *q--; + sb->w = w; + return sb; +} + +SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s) +{ + MSize len = s->len; + char *w = lj_buf_more(sb, len), *e = w+len; + const char *q = strdata(s); + for (; w < e; w++, q++) { + uint32_t c = *(unsigned char *)q; +#if LJ_TARGET_PPC + *w = c + ((c >= 'A' && c <= 'Z') << 5); +#else + if (c >= 'A' && c <= 'Z') c += 0x20; + *w = c; +#endif + } + sb->w = w; + return sb; +} + +SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s) +{ + MSize len = s->len; + char *w = lj_buf_more(sb, len), *e = w+len; + const char *q = strdata(s); + for (; w < e; w++, q++) { + uint32_t c = *(unsigned char *)q; +#if LJ_TARGET_PPC + *w = c - ((c >= 'a' && c <= 'z') << 5); +#else + if (c >= 'a' && c <= 'z') c -= 0x20; + *w = c; +#endif + } + sb->w = w; + return sb; +} + +SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep) +{ + MSize len = s->len; + if (rep > 0 && len) { + uint64_t tlen = (uint64_t)rep * len; + char *w; + if (LJ_UNLIKELY(tlen > LJ_MAX_STR)) + lj_err_mem(sbufL(sb)); + w = lj_buf_more(sb, (MSize)tlen); + if (len == 1) { /* Optimize a common case. */ + uint32_t c = strdata(s)[0]; + do { *w++ = c; } while (--rep > 0); + } else { + const char *e = strdata(s) + len; + do { + const char *q = strdata(s); + do { *w++ = *q++; } while (q < e); + } while (--rep > 0); + } + sb->w = w; + } + return sb; +} + +SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, int32_t i, int32_t e) +{ + MSize seplen = sep ? sep->len : 0; + if (i <= e) { + for (;;) { + cTValue *o = lj_tab_getint(t, i); + char *w; + if (!o) { + badtype: /* Error: bad element type. */ + sb->w = (char *)(intptr_t)i; /* Store failing index. */ + return NULL; + } else if (tvisstr(o)) { + MSize len = strV(o)->len; + w = lj_buf_wmem(lj_buf_more(sb, len + seplen), strVdata(o), len); + } else if (tvisint(o)) { + w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT+seplen), intV(o)); + } else if (tvisnum(o)) { + w = lj_buf_more(lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)), seplen); + } else { + goto badtype; + } + if (i++ == e) { + sb->w = w; + break; + } + if (seplen) w = lj_buf_wmem(w, strdata(sep), seplen); + sb->w = w; + } + } + return sb; +} + +/* -- Miscellaneous buffer operations ------------------------------------- */ + +GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb) +{ + return lj_str_new(sbufL(sb), sb->b, sbuflen(sb)); +} + +/* Concatenate two strings. */ +GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2) +{ + MSize len1 = s1->len, len2 = s2->len; + char *buf = lj_buf_tmp(L, len1 + len2); + memcpy(buf, strdata(s1), len1); + memcpy(buf+len1, strdata(s2), len2); + return lj_str_new(L, buf, len1 + len2); +} + +/* Read ULEB128 from buffer. */ +uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp) +{ + const uint8_t *w = (const uint8_t *)*pp; + uint32_t v = *w++; + if (LJ_UNLIKELY(v >= 0x80)) { + int sh = 0; + v &= 0x7f; + do { v |= ((*w & 0x7f) << (sh += 7)); } while (*w++ >= 0x80); + } + *pp = (const char *)w; + return v; +} + diff --git a/src/lj_buf.h b/src/lj_buf.h new file mode 100644 index 00000000..76114201 --- /dev/null +++ b/src/lj_buf.h @@ -0,0 +1,198 @@ +/* +** Buffer handling. +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_BUF_H +#define _LJ_BUF_H + +#include "lj_obj.h" +#include "lj_gc.h" +#include "lj_str.h" + +/* Resizable string buffers. */ + +/* The SBuf struct definition is in lj_obj.h: +** char *w; Write pointer. +** char *e; End pointer. +** char *b; Base pointer. +** MRef L; lua_State, used for buffer resizing. Extension bits in 3 LSB. +*/ + +/* Extended string buffer. */ +typedef struct SBufExt { + SBufHeader; + union { + GCRef cowref; /* Copy-on-write object reference. */ + MRef bsb; /* Borrowed string buffer. */ + }; + char *r; /* Read pointer. */ + GCRef dict_str; /* Serialization string dictionary table. */ + GCRef dict_mt; /* Serialization metatable dictionary table. */ + int depth; /* Remaining recursion depth. */ +} SBufExt; + +#define sbufsz(sb) ((MSize)((sb)->e - (sb)->b)) +#define sbuflen(sb) ((MSize)((sb)->w - (sb)->b)) +#define sbufleft(sb) ((MSize)((sb)->e - (sb)->w)) +#define sbufxlen(sbx) ((MSize)((sbx)->w - (sbx)->r)) +#define sbufxslack(sbx) ((MSize)((sbx)->r - (sbx)->b)) + +#define SBUF_MASK_FLAG (7) +#define SBUF_MASK_L (~(GCSize)SBUF_MASK_FLAG) +#define SBUF_FLAG_EXT 1 /* Extended string buffer. */ +#define SBUF_FLAG_COW 2 /* Copy-on-write buffer. */ +#define SBUF_FLAG_BORROW 4 /* Borrowed string buffer. */ + +#define sbufL(sb) \ + ((lua_State *)(void *)(uintptr_t)(mrefu((sb)->L) & SBUF_MASK_L)) +#define setsbufL(sb, l) (setmref((sb)->L, (l))) +#define setsbufXL(sb, l, flag) \ + (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) + (flag))) +#define setsbufXL_(sb, l) \ + (setmrefu((sb)->L, (GCSize)(uintptr_t)(void *)(l) | (mrefu((sb)->L) & SBUF_MASK_FLAG))) + +#define sbufflag(sb) (mrefu((sb)->L)) +#define sbufisext(sb) (sbufflag((sb)) & SBUF_FLAG_EXT) +#define sbufiscow(sb) (sbufflag((sb)) & SBUF_FLAG_COW) +#define sbufisborrow(sb) (sbufflag((sb)) & SBUF_FLAG_BORROW) +#define sbufiscoworborrow(sb) (sbufflag((sb)) & (SBUF_FLAG_COW|SBUF_FLAG_BORROW)) +#define sbufX(sb) \ + (lj_assertG_(G(sbufL(sb)), sbufisext(sb), "not an SBufExt"), (SBufExt *)(sb)) +#define setsbufflag(sb, flag) (setmrefu((sb)->L, (flag))) + +#define tvisbuf(o) \ + (LJ_HASBUFFER && tvisudata(o) && udataV(o)->udtype == UDTYPE_BUFFER) +#define bufV(o) check_exp(tvisbuf(o), ((SBufExt *)uddata(udataV(o)))) + +/* Buffer management */ +LJ_FUNC char *LJ_FASTCALL lj_buf_need2(SBuf *sb, MSize sz); +LJ_FUNC char *LJ_FASTCALL lj_buf_more2(SBuf *sb, MSize sz); +LJ_FUNC void LJ_FASTCALL lj_buf_shrink(lua_State *L, SBuf *sb); +LJ_FUNC char * LJ_FASTCALL lj_buf_tmp(lua_State *L, MSize sz); + +static LJ_AINLINE void lj_buf_init(lua_State *L, SBuf *sb) +{ + setsbufL(sb, L); + sb->w = sb->e = sb->b = NULL; +} + +static LJ_AINLINE void lj_buf_reset(SBuf *sb) +{ + sb->w = sb->b; +} + +static LJ_AINLINE SBuf *lj_buf_tmp_(lua_State *L) +{ + SBuf *sb = &G(L)->tmpbuf; + setsbufL(sb, L); + lj_buf_reset(sb); + return sb; +} + +static LJ_AINLINE void lj_buf_free(global_State *g, SBuf *sb) +{ + lj_assertG(!sbufisext(sb), "bad free of SBufExt"); + lj_mem_free(g, sb->b, sbufsz(sb)); +} + +static LJ_AINLINE char *lj_buf_need(SBuf *sb, MSize sz) +{ + if (LJ_UNLIKELY(sz > sbufsz(sb))) + return lj_buf_need2(sb, sz); + return sb->b; +} + +static LJ_AINLINE char *lj_buf_more(SBuf *sb, MSize sz) +{ + if (LJ_UNLIKELY(sz > sbufleft(sb))) + return lj_buf_more2(sb, sz); + return sb->w; +} + +/* Extended buffer management */ +static LJ_AINLINE void lj_bufx_init(lua_State *L, SBufExt *sbx) +{ + memset(sbx, 0, sizeof(SBufExt)); + setsbufXL(sbx, L, SBUF_FLAG_EXT); +} + +static LJ_AINLINE void lj_bufx_set_borrow(lua_State *L, SBufExt *sbx, SBuf *sb) +{ + setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_BORROW); + setmref(sbx->bsb, sb); + sbx->r = sbx->w = sbx->b = sb->b; + sbx->e = sb->e; +} + +static LJ_AINLINE void lj_bufx_set_cow(lua_State *L, SBufExt *sbx, + const char *p, MSize len) +{ + setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_COW); + sbx->r = sbx->b = (char *)p; + sbx->w = sbx->e = (char *)p + len; +} + +static LJ_AINLINE void lj_bufx_reset(SBufExt *sbx) +{ + if (sbufiscow(sbx)) { + setmrefu(sbx->L, (mrefu(sbx->L) & ~(GCSize)SBUF_FLAG_COW)); + setgcrefnull(sbx->cowref); + sbx->b = sbx->e = NULL; + } + sbx->r = sbx->w = sbx->b; +} + +static LJ_AINLINE void lj_bufx_free(lua_State *L, SBufExt *sbx) +{ + if (!sbufiscoworborrow(sbx)) lj_mem_free(G(L), sbx->b, sbufsz(sbx)); + setsbufXL(sbx, L, SBUF_FLAG_EXT); + setgcrefnull(sbx->cowref); + sbx->r = sbx->w = sbx->b = sbx->e = NULL; +} + +#if LJ_HASBUFFER && LJ_HASJIT +LJ_FUNC void lj_bufx_set(SBufExt *sbx, const char *p, MSize len, GCobj *o); +#if LJ_HASFFI +LJ_FUNC MSize LJ_FASTCALL lj_bufx_more(SBufExt *sbx, MSize sz); +#endif +#endif + +/* Low-level buffer put operations */ +LJ_FUNC SBuf *lj_buf_putmem(SBuf *sb, const void *q, MSize len); +#if LJ_HASJIT || LJ_HASFFI +LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putchar(SBuf *sb, int c); +#endif +LJ_FUNC SBuf * LJ_FASTCALL lj_buf_putstr(SBuf *sb, GCstr *s); + +static LJ_AINLINE char *lj_buf_wmem(char *p, const void *q, MSize len) +{ + return (char *)memcpy(p, q, len) + len; +} + +static LJ_AINLINE void lj_buf_putb(SBuf *sb, int c) +{ + char *w = lj_buf_more(sb, 1); + *w++ = (char)c; + sb->w = w; +} + +/* High-level buffer put operations */ +LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_reverse(SBuf *sb, GCstr *s); +LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_lower(SBuf *sb, GCstr *s); +LJ_FUNCA SBuf * LJ_FASTCALL lj_buf_putstr_upper(SBuf *sb, GCstr *s); +LJ_FUNC SBuf *lj_buf_putstr_rep(SBuf *sb, GCstr *s, int32_t rep); +LJ_FUNC SBuf *lj_buf_puttab(SBuf *sb, GCtab *t, GCstr *sep, + int32_t i, int32_t e); + +/* Miscellaneous buffer operations */ +LJ_FUNCA GCstr * LJ_FASTCALL lj_buf_tostr(SBuf *sb); +LJ_FUNC GCstr *lj_buf_cat2str(lua_State *L, GCstr *s1, GCstr *s2); +LJ_FUNC uint32_t LJ_FASTCALL lj_buf_ruleb128(const char **pp); + +static LJ_AINLINE GCstr *lj_buf_str(lua_State *L, SBuf *sb) +{ + return lj_str_new(L, sb->b, sbuflen(sb)); +} + +#endif diff --git a/src/lj_carith.c b/src/lj_carith.c index 462dbae4..1a2a058f 100644 --- a/src/lj_carith.c +++ b/src/lj_carith.c @@ -11,10 +11,12 @@ #include "lj_err.h" #include "lj_tab.h" #include "lj_meta.h" +#include "lj_ir.h" #include "lj_ctype.h" #include "lj_cconv.h" #include "lj_cdata.h" #include "lj_carith.h" +#include "lj_strscan.h" /* -- C data arithmetic --------------------------------------------------- */ @@ -120,7 +122,7 @@ static int carith_ptr(lua_State *L, CTState *cts, CDArith *ca, MMS mm) setboolV(L->top-1, ((uintptr_t)pp < (uintptr_t)pp2)); return 1; } else { - lua_assert(mm == MM_le); + lj_assertL(mm == MM_le, "bad metamethod %d", mm); setboolV(L->top-1, ((uintptr_t)pp <= (uintptr_t)pp2)); return 1; } @@ -206,7 +208,9 @@ static int carith_int64(lua_State *L, CTState *cts, CDArith *ca, MMS mm) *up = lj_carith_powu64(u0, u1); break; case MM_unm: *up = (uint64_t)-(int64_t)u0; break; - default: lua_assert(0); break; + default: + lj_assertL(0, "bad metamethod %d", mm); + break; } lj_gc_check(L); return 1; @@ -272,6 +276,81 @@ int lj_carith_op(lua_State *L, MMS mm) return lj_carith_meta(L, cts, &ca, mm); } +/* -- 64 bit bit operations helpers --------------------------------------- */ + +#if LJ_64 +#define B64DEF(name) \ + static LJ_AINLINE uint64_t lj_carith_##name(uint64_t x, int32_t sh) +#else +/* Not inlined on 32 bit archs, since some of these are quite lengthy. */ +#define B64DEF(name) \ + uint64_t LJ_NOINLINE lj_carith_##name(uint64_t x, int32_t sh) +#endif + +B64DEF(shl64) { return x << (sh&63); } +B64DEF(shr64) { return x >> (sh&63); } +B64DEF(sar64) { return (uint64_t)((int64_t)x >> (sh&63)); } +B64DEF(rol64) { return lj_rol(x, (sh&63)); } +B64DEF(ror64) { return lj_ror(x, (sh&63)); } + +#undef B64DEF + +uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op) +{ + switch (op) { + case IR_BSHL-IR_BSHL: x = lj_carith_shl64(x, sh); break; + case IR_BSHR-IR_BSHL: x = lj_carith_shr64(x, sh); break; + case IR_BSAR-IR_BSHL: x = lj_carith_sar64(x, sh); break; + case IR_BROL-IR_BSHL: x = lj_carith_rol64(x, sh); break; + case IR_BROR-IR_BSHL: x = lj_carith_ror64(x, sh); break; + default: + lj_assertX(0, "bad shift op %d", op); + break; + } + return x; +} + +/* Equivalent to lj_lib_checkbit(), but handles cdata. */ +uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id) +{ + TValue *o = L->base + narg-1; + if (o >= L->top) { + err: + lj_err_argt(L, narg, LUA_TNUMBER); + } else if (LJ_LIKELY(tvisnumber(o))) { + /* Handled below. */ + } else if (tviscdata(o)) { + CTState *cts = ctype_cts(L); + uint8_t *sp = (uint8_t *)cdataptr(cdataV(o)); + CTypeID sid = cdataV(o)->ctypeid; + CType *s = ctype_get(cts, sid); + uint64_t x; + if (ctype_isref(s->info)) { + sp = *(void **)sp; + sid = ctype_cid(s->info); + } + s = ctype_raw(cts, sid); + if (ctype_isenum(s->info)) s = ctype_child(cts, s); + if ((s->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) == + CTINFO(CT_NUM, CTF_UNSIGNED) && s->size == 8) + *id = CTID_UINT64; /* Use uint64_t, since it has the highest rank. */ + else if (!*id) + *id = CTID_INT64; /* Use int64_t, unless already set. */ + lj_cconv_ct_ct(cts, ctype_get(cts, *id), s, + (uint8_t *)&x, sp, CCF_ARG(narg)); + return x; + } else if (!(tvisstr(o) && lj_strscan_number(strV(o), o))) { + goto err; + } + if (LJ_LIKELY(tvisint(o))) { + return (uint32_t)intV(o); + } else { + int32_t i = lj_num2bit(numV(o)); + if (LJ_DUALNUM) setintV(o, i); + return (uint32_t)i; + } +} + /* -- 64 bit integer arithmetic helpers ----------------------------------- */ #if LJ_32 && LJ_HASJIT diff --git a/src/lj_carith.h b/src/lj_carith.h index 269c60ea..9d6b1dc9 100644 --- a/src/lj_carith.h +++ b/src/lj_carith.h @@ -12,6 +12,16 @@ LJ_FUNC int lj_carith_op(lua_State *L, MMS mm); +#if LJ_32 +LJ_FUNC uint64_t lj_carith_shl64(uint64_t x, int32_t sh); +LJ_FUNC uint64_t lj_carith_shr64(uint64_t x, int32_t sh); +LJ_FUNC uint64_t lj_carith_sar64(uint64_t x, int32_t sh); +LJ_FUNC uint64_t lj_carith_rol64(uint64_t x, int32_t sh); +LJ_FUNC uint64_t lj_carith_ror64(uint64_t x, int32_t sh); +#endif +LJ_FUNC uint64_t lj_carith_shift64(uint64_t x, int32_t sh, int op); +LJ_FUNC uint64_t lj_carith_check64(lua_State *L, int narg, CTypeID *id); + #if LJ_32 && LJ_HASJIT LJ_FUNC int64_t lj_carith_mul64(int64_t x, int64_t k); #endif diff --git a/src/lj_ccall.c b/src/lj_ccall.c index 4a859c73..25f54dee 100644 --- a/src/lj_ccall.c +++ b/src/lj_ccall.c @@ -9,7 +9,6 @@ #include "lj_gc.h" #include "lj_err.h" -#include "lj_str.h" #include "lj_tab.h" #include "lj_ctype.h" #include "lj_cconv.h" @@ -291,56 +290,85 @@ #define CCALL_HANDLE_RET \ if ((ct->info & CTF_VARARG)) sp = (uint8_t *)&cc->gpr[0]; -#elif LJ_TARGET_PPC -/* -- PPC calling conventions --------------------------------------------- */ +#elif LJ_TARGET_ARM64 +/* -- ARM64 calling conventions ------------------------------------------- */ #define CCALL_HANDLE_STRUCTRET \ - cc->retref = 1; /* Return all structs by reference. */ \ - cc->gpr[ngpr++] = (GPRArg)dp; + cc->retref = !ccall_classify_struct(cts, ctr); \ + if (cc->retref) cc->retp = dp; + +#define CCALL_HANDLE_STRUCTRET2 \ + unsigned int cl = ccall_classify_struct(cts, ctr); \ + if ((cl & 4)) { /* Combine float HFA from separate registers. */ \ + CTSize i = (cl >> 8) - 1; \ + do { ((uint32_t *)dp)[i] = cc->fpr[i].lo; } while (i--); \ + } else { \ + if (cl > 1) sp = (uint8_t *)&cc->fpr[0]; \ + memcpy(dp, sp, ctr->size); \ + } #define CCALL_HANDLE_COMPLEXRET \ - /* Complex values are returned in 2 or 4 GPRs. */ \ + /* Complex values are returned in one or two FPRs. */ \ cc->retref = 0; #define CCALL_HANDLE_COMPLEXRET2 \ - memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */ + if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ + ((float *)dp)[0] = cc->fpr[0].f; \ + ((float *)dp)[1] = cc->fpr[1].f; \ + } else { /* Copy complex double from FPRs. */ \ + ((double *)dp)[0] = cc->fpr[0].d; \ + ((double *)dp)[1] = cc->fpr[1].d; \ + } #define CCALL_HANDLE_STRUCTARG \ - rp = cdataptr(lj_cdata_new(cts, did, sz)); \ - sz = CTSIZE_PTR; /* Pass all structs by reference. */ + unsigned int cl = ccall_classify_struct(cts, d); \ + if (cl == 0) { /* Pass struct by reference. */ \ + rp = cdataptr(lj_cdata_new(cts, did, sz)); \ + sz = CTSIZE_PTR; \ + } else if (cl > 1) { /* Pass struct in FPRs or on stack. */ \ + isfp = (cl & 4) ? 2 : 1; \ + } /* else: Pass struct in GPRs or on stack. */ #define CCALL_HANDLE_COMPLEXARG \ - /* Pass complex by value in 2 or 4 GPRs. */ + /* Pass complex by value in separate (!) FPRs or on stack. */ \ + isfp = sz == 2*sizeof(float) ? 2 : 1; #define CCALL_HANDLE_REGARG \ - if (isfp) { /* Try to pass argument in FPRs. */ \ - if (nfpr + 1 <= CCALL_NARG_FPR) { \ + if (LJ_TARGET_OSX && isva) { \ + /* IOS: All variadic arguments are on the stack. */ \ + } else if (isfp) { /* Try to pass argument in FPRs. */ \ + int n2 = ctype_isvector(d->info) ? 1 : \ + isfp == 1 ? n : (d->size >> (4-isfp)); \ + if (nfpr + n2 <= CCALL_NARG_FPR) { \ dp = &cc->fpr[nfpr]; \ - nfpr += 1; \ - d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ + nfpr += n2; \ goto done; \ + } else { \ + nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ + if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \ } \ } else { /* Try to pass argument in GPRs. */ \ - if (n > 1) { \ - lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ - if (ctype_isinteger(d->info)) \ - ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ - else if (ngpr + n > maxgpr) \ - ngpr = maxgpr; /* Prevent reordering. */ \ - } \ + if (!LJ_TARGET_OSX && (d->info & CTF_ALIGN) > CTALIGN_PTR) \ + ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ if (ngpr + n <= maxgpr) { \ dp = &cc->gpr[ngpr]; \ ngpr += n; \ goto done; \ + } else { \ + ngpr = maxgpr; /* Prevent reordering. */ \ + if (LJ_TARGET_OSX && d->size < 8) goto err_nyi; \ } \ } +#if LJ_BE #define CCALL_HANDLE_RET \ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ + sp = (uint8_t *)&cc->fpr[0].f; +#endif -#elif LJ_TARGET_PPCSPE -/* -- PPC/SPE calling conventions ----------------------------------------- */ + +#elif LJ_TARGET_PPC +/* -- PPC calling conventions --------------------------------------------- */ #define CCALL_HANDLE_STRUCTRET \ cc->retref = 1; /* Return all structs by reference. */ \ @@ -360,12 +388,13 @@ #define CCALL_HANDLE_COMPLEXARG \ /* Pass complex by value in 2 or 4 GPRs. */ -/* PPC/SPE has a softfp ABI. */ -#define CCALL_HANDLE_REGARG \ - if (n > 1) { /* Doesn't fit in a single GPR? */ \ - lua_assert(n == 2 || n == 4); /* int64_t, double or complex (float). */ \ - if (n == 2) \ - ngpr = (ngpr + 1u) & ~1u; /* Only align 64 bit value to regpair. */ \ +#define CCALL_HANDLE_GPR \ + /* Try to pass argument in GPRs. */ \ + if (n > 1) { \ + /* int64_t or complex (float). */ \ + lj_assertL(n == 2 || n == 4, "bad GPR size %d", n); \ + if (ctype_isinteger(d->info) || ctype_isfp(d->info)) \ + ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ else if (ngpr + n > maxgpr) \ ngpr = maxgpr; /* Prevent reordering. */ \ } \ @@ -373,10 +402,32 @@ dp = &cc->gpr[ngpr]; \ ngpr += n; \ goto done; \ + } \ + +#if LJ_ABI_SOFTFP +#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR +#else +#define CCALL_HANDLE_REGARG \ + if (isfp) { /* Try to pass argument in FPRs. */ \ + if (nfpr + 1 <= CCALL_NARG_FPR) { \ + dp = &cc->fpr[nfpr]; \ + nfpr += 1; \ + d = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ + goto done; \ + } \ + } else { \ + CCALL_HANDLE_GPR \ } +#endif -#elif LJ_TARGET_MIPS -/* -- MIPS calling conventions -------------------------------------------- */ +#if !LJ_ABI_SOFTFP +#define CCALL_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ +#endif + +#elif LJ_TARGET_MIPS32 +/* -- MIPS o32 calling conventions ---------------------------------------- */ #define CCALL_HANDLE_STRUCTRET \ cc->retref = 1; /* Return all structs by reference. */ \ @@ -386,6 +437,18 @@ /* Complex values are returned in 1 or 2 FPRs. */ \ cc->retref = 0; +#if LJ_ABI_SOFTFP +#define CCALL_HANDLE_COMPLEXRET2 \ + if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \ + ((intptr_t *)dp)[0] = cc->gpr[0]; \ + ((intptr_t *)dp)[1] = cc->gpr[1]; \ + } else { /* Copy complex double from GPRs. */ \ + ((intptr_t *)dp)[0] = cc->gpr[0]; \ + ((intptr_t *)dp)[1] = cc->gpr[1]; \ + ((intptr_t *)dp)[2] = cc->gpr[2]; \ + ((intptr_t *)dp)[3] = cc->gpr[3]; \ + } +#else #define CCALL_HANDLE_COMPLEXRET2 \ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ ((float *)dp)[0] = cc->fpr[0].f; \ @@ -394,6 +457,7 @@ ((double *)dp)[0] = cc->fpr[0].d; \ ((double *)dp)[1] = cc->fpr[1].d; \ } +#endif #define CCALL_HANDLE_STRUCTARG \ /* Pass all structs by value in registers and/or on the stack. */ @@ -401,6 +465,22 @@ #define CCALL_HANDLE_COMPLEXARG \ /* Pass complex by value in 2 or 4 GPRs. */ +#define CCALL_HANDLE_GPR \ + if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \ + ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ + if (ngpr < maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + if (ngpr + n > maxgpr) { \ + nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ + if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ + ngpr = maxgpr; \ + } else { \ + ngpr += n; \ + } \ + goto done; \ + } + +#if !LJ_ABI_SOFTFP /* MIPS32 hard-float */ #define CCALL_HANDLE_REGARG \ if (isfp && nfpr < CCALL_NARG_FPR && !(ct->info & CTF_VARARG)) { \ /* Try to pass argument in FPRs. */ \ @@ -409,25 +489,91 @@ goto done; \ } else { /* Try to pass argument in GPRs. */ \ nfpr = CCALL_NARG_FPR; \ - if ((d->info & CTF_ALIGN) > CTALIGN_PTR) \ - ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ - if (ngpr < maxgpr) { \ - dp = &cc->gpr[ngpr]; \ - if (ngpr + n > maxgpr) { \ - nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ - if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ - ngpr = maxgpr; \ - } else { \ - ngpr += n; \ - } \ - goto done; \ - } \ + CCALL_HANDLE_GPR \ + } +#else /* MIPS32 soft-float */ +#define CCALL_HANDLE_REGARG CCALL_HANDLE_GPR +#endif + +#if !LJ_ABI_SOFTFP +/* On MIPS64 soft-float, position of float return values is endian-dependant. */ +#define CCALL_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + sp = (uint8_t *)&cc->fpr[0].f; +#endif + +#elif LJ_TARGET_MIPS64 +/* -- MIPS n64 calling conventions ---------------------------------------- */ + +#define CCALL_HANDLE_STRUCTRET \ + cc->retref = !(sz <= 16); \ + if (cc->retref) cc->gpr[ngpr++] = (GPRArg)dp; + +#define CCALL_HANDLE_STRUCTRET2 \ + ccall_copy_struct(cc, ctr, dp, sp, ccall_classify_struct(cts, ctr, ct)); + +#define CCALL_HANDLE_COMPLEXRET \ + /* Complex values are returned in 1 or 2 FPRs. */ \ + cc->retref = 0; + +#if LJ_ABI_SOFTFP /* MIPS64 soft-float */ + +#define CCALL_HANDLE_COMPLEXRET2 \ + if (ctr->size == 2*sizeof(float)) { /* Copy complex float from GPRs. */ \ + ((intptr_t *)dp)[0] = cc->gpr[0]; \ + } else { /* Copy complex double from GPRs. */ \ + ((intptr_t *)dp)[0] = cc->gpr[0]; \ + ((intptr_t *)dp)[1] = cc->gpr[1]; \ + } + +#define CCALL_HANDLE_COMPLEXARG \ + /* Pass complex by value in 2 or 4 GPRs. */ + +/* Position of soft-float 'float' return value depends on endianess. */ +#define CCALL_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + sp = (uint8_t *)cc->gpr + LJ_ENDIAN_SELECT(0, 4); + +#else /* MIPS64 hard-float */ + +#define CCALL_HANDLE_COMPLEXRET2 \ + if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ + ((float *)dp)[0] = cc->fpr[0].f; \ + ((float *)dp)[1] = cc->fpr[1].f; \ + } else { /* Copy complex double from FPRs. */ \ + ((double *)dp)[0] = cc->fpr[0].d; \ + ((double *)dp)[1] = cc->fpr[1].d; \ + } + +#define CCALL_HANDLE_COMPLEXARG \ + if (sz == 2*sizeof(float)) { \ + isfp = 2; \ + if (ngpr < maxgpr) \ + sz *= 2; \ } #define CCALL_HANDLE_RET \ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ sp = (uint8_t *)&cc->fpr[0].f; +#endif + +#define CCALL_HANDLE_STRUCTARG \ + /* Pass all structs by value in registers and/or on the stack. */ + +#define CCALL_HANDLE_REGARG \ + if (ngpr < maxgpr) { \ + dp = &cc->gpr[ngpr]; \ + if (ngpr + n > maxgpr) { \ + nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ + if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ + ngpr = maxgpr; \ + } else { \ + ngpr += n; \ + } \ + goto done; \ + } + #else #error "Missing calling convention definitions for this architecture" #endif @@ -497,7 +643,8 @@ static void ccall_classify_ct(CTState *cts, CType *ct, int *rcl, CTSize ofs) ccall_classify_struct(cts, ct, rcl, ofs); } else { int cl = ctype_isfp(ct->info) ? CCALL_RCL_SSE : CCALL_RCL_INT; - lua_assert(ctype_hassize(ct->info)); + lj_assertCTS(ctype_hassize(ct->info), + "classify ctype %08x without size", ct->info); if ((ofs & (ct->size-1))) cl = CCALL_RCL_MEM; /* Unaligned. */ rcl[(ofs >= 8)] |= cl; } @@ -522,12 +669,13 @@ static int ccall_classify_struct(CTState *cts, CType *ct, int *rcl, CTSize ofs) } /* Try to split up a small struct into registers. */ -static int ccall_struct_reg(CCallState *cc, GPRArg *dp, int *rcl) +static int ccall_struct_reg(CCallState *cc, CTState *cts, GPRArg *dp, int *rcl) { MSize ngpr = cc->ngpr, nfpr = cc->nfpr; uint32_t i; + UNUSED(cts); for (i = 0; i < 2; i++) { - lua_assert(!(rcl[i] & CCALL_RCL_MEM)); + lj_assertCTS(!(rcl[i] & CCALL_RCL_MEM), "pass mem struct in reg"); if ((rcl[i] & CCALL_RCL_INT)) { /* Integer class takes precedence. */ if (ngpr >= CCALL_NARG_GPR) return 1; /* Register overflow. */ cc->gpr[ngpr++] = dp[i]; @@ -548,7 +696,8 @@ static int ccall_struct_arg(CCallState *cc, CTState *cts, CType *d, int *rcl, dp[0] = dp[1] = 0; /* Convert to temp. struct. */ lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); - if (ccall_struct_reg(cc, dp, rcl)) { /* Register overflow? Pass on stack. */ + if (ccall_struct_reg(cc, cts, dp, rcl)) { + /* Register overflow? Pass on stack. */ MSize nsp = cc->nsp, n = rcl[1] ? 2 : 1; if (nsp + n > CCALL_MAXSTACK) return 1; /* Too many arguments. */ cc->nsp = nsp + n; @@ -621,6 +770,125 @@ noth: /* Not a homogeneous float/double aggregate. */ #endif +/* -- ARM64 ABI struct classification ------------------------------------- */ + +#if LJ_TARGET_ARM64 + +/* Classify a struct based on its fields. */ +static unsigned int ccall_classify_struct(CTState *cts, CType *ct) +{ + CTSize sz = ct->size; + unsigned int r = 0, n = 0, isu = (ct->info & CTF_UNION); + while (ct->sib) { + CType *sct; + ct = ctype_get(cts, ct->sib); + if (ctype_isfield(ct->info)) { + sct = ctype_rawchild(cts, ct); + if (ctype_isfp(sct->info)) { + r |= sct->size; + if (!isu) n++; else if (n == 0) n = 1; + } else if (ctype_iscomplex(sct->info)) { + r |= (sct->size >> 1); + if (!isu) n += 2; else if (n < 2) n = 2; + } else if (ctype_isstruct(sct->info)) { + goto substruct; + } else { + goto noth; + } + } else if (ctype_isbitfield(ct->info)) { + goto noth; + } else if (ctype_isxattrib(ct->info, CTA_SUBTYPE)) { + sct = ctype_rawchild(cts, ct); + substruct: + if (sct->size > 0) { + unsigned int s = ccall_classify_struct(cts, sct); + if (s <= 1) goto noth; + r |= (s & 255); + if (!isu) n += (s >> 8); else if (n < (s >>8)) n = (s >> 8); + } + } + } + if ((r == 4 || r == 8) && n <= 4) + return r + (n << 8); +noth: /* Not a homogeneous float/double aggregate. */ + return (sz <= 16); /* Return structs of size <= 16 in GPRs. */ +} + +#endif + +/* -- MIPS64 ABI struct classification ---------------------------- */ + +#if LJ_TARGET_MIPS64 + +#define FTYPE_FLOAT 1 +#define FTYPE_DOUBLE 2 + +/* Classify FP fields (max. 2) and their types. */ +static unsigned int ccall_classify_struct(CTState *cts, CType *ct, CType *ctf) +{ + int n = 0, ft = 0; + if ((ctf->info & CTF_VARARG) || (ct->info & CTF_UNION)) + goto noth; + while (ct->sib) { + CType *sct; + ct = ctype_get(cts, ct->sib); + if (n == 2) { + goto noth; + } else if (ctype_isfield(ct->info)) { + sct = ctype_rawchild(cts, ct); + if (ctype_isfp(sct->info)) { + ft |= (sct->size == 4 ? FTYPE_FLOAT : FTYPE_DOUBLE) << 2*n; + n++; + } else { + goto noth; + } + } else if (ctype_isbitfield(ct->info) || + ctype_isxattrib(ct->info, CTA_SUBTYPE)) { + goto noth; + } + } + if (n <= 2) + return ft; +noth: /* Not a homogeneous float/double aggregate. */ + return 0; /* Struct is in GPRs. */ +} + +static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, + int ft) +{ + if (LJ_ABI_SOFTFP ? ft : + ((ft & 3) == FTYPE_FLOAT || (ft >> 2) == FTYPE_FLOAT)) { + int i, ofs = 0; + for (i = 0; ft != 0; i++, ft >>= 2) { + if ((ft & 3) == FTYPE_FLOAT) { +#if LJ_ABI_SOFTFP + /* The 2nd FP struct result is in CARG1 (gpr[2]) and not CRET2. */ + memcpy((uint8_t *)dp + ofs, + (uint8_t *)&cc->gpr[2*i] + LJ_ENDIAN_SELECT(0, 4), 4); +#else + *(float *)((uint8_t *)dp + ofs) = cc->fpr[i].f; +#endif + ofs += 4; + } else { + ofs = (ofs + 7) & ~7; /* 64 bit alignment. */ +#if LJ_ABI_SOFTFP + *(intptr_t *)((uint8_t *)dp + ofs) = cc->gpr[2*i]; +#else + *(double *)((uint8_t *)dp + ofs) = cc->fpr[i].d; +#endif + ofs += 8; + } + } + } else { +#if !LJ_ABI_SOFTFP + if (ft) sp = (uint8_t *)&cc->fpr[0]; +#endif + memcpy(dp, sp, ctr->size); + } +} + +#endif + /* -- Common C call handling ---------------------------------------------- */ /* Infer the destination CTypeID for a vararg argument. */ @@ -726,7 +994,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, if (fid) { /* Get argument type from field. */ CType *ctf = ctype_get(cts, fid); fid = ctf->sib; - lua_assert(ctype_isfield(ctf->info)); + lj_assertL(ctype_isfield(ctf->info), "field expected"); did = ctype_cid(ctf->info); } else { if (!(ct->info & CTF_VARARG)) @@ -788,6 +1056,19 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, *(int32_t *)dp = d->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } +#if LJ_TARGET_ARM64 && LJ_BE + if (isfp && d->size == sizeof(float)) + ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */ +#endif +#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) + if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) +#if LJ_TARGET_MIPS64 + || (isfp && nsp == 0) +#endif + ) && d->size <= 4) { + *(int64_t *)dp = (int64_t)*(int32_t *)dp; /* Sign-extend to 64 bit. */ + } +#endif #if LJ_TARGET_X64 && LJ_ABI_WIN if (isva) { /* Windows/x64 mirrors varargs in both register sets. */ if (nfpr == ngpr) @@ -803,13 +1084,19 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ cc->fpr[nfpr-2].d[1] = 0; } +#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP) + if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) { + /* Split float HFA or complex float into separate registers. */ + CTSize i = (sz >> 2) - 1; + do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--); + } #else UNUSED(isfp); #endif } if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ -#if LJ_TARGET_X64 || LJ_TARGET_PPC +#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) cc->nfpr = nfpr; /* Required for vararg functions. */ #endif cc->nsp = nsp; @@ -844,7 +1131,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct, CCALL_HANDLE_COMPLEXRET2 return 1; /* One GC step. */ } - if (LJ_BE && ctype_isinteger_or_bool(ctr->info) && ctr->size < CTSIZE_PTR) + if (LJ_BE && ctr->size < CTSIZE_PTR && + (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info))) sp += (CTSIZE_PTR - ctr->size); #if CCALL_NUM_FPR if (ctype_isfp(ctr->info) || ctype_isvector(ctr->info)) @@ -854,7 +1142,8 @@ static int ccall_get_results(lua_State *L, CTState *cts, CType *ct, CCALL_HANDLE_RET #endif /* No reference types end up here, so there's no need for the CTypeID. */ - lua_assert(!(ctype_isrefarray(ctr->info) || ctype_isstruct(ctr->info))); + lj_assertL(!(ctype_isrefarray(ctr->info) || ctype_isstruct(ctr->info)), + "unexpected reference ctype"); return lj_cconv_tv_ct(cts, ctr, 0, L->top-1, sp); } @@ -878,7 +1167,7 @@ int lj_ccall_func(lua_State *L, GCcdata *cd) lj_vm_ffi_call(&cc); if (cts->cb.slot != ~0u) { /* Blacklist function that called a callback. */ TValue tv; - setlightudV(&tv, (void *)cc.func); + tv.u64 = ((uintptr_t)(void *)cc.func >> 2) | U64x(800000000, 00000000); setboolV(lj_tab_set(L, cts->miscmap, &tv), 1); } ct = (CType *)((intptr_t)ct+(intptr_t)cts->tab); /* May be reallocated. */ diff --git a/src/lj_ccall.h b/src/lj_ccall.h index b46483f1..0b3c5244 100644 --- a/src/lj_ccall.h +++ b/src/lj_ccall.h @@ -68,35 +68,56 @@ typedef union FPRArg { float f[2]; } FPRArg; -#elif LJ_TARGET_PPC +#elif LJ_TARGET_ARM64 #define CCALL_NARG_GPR 8 +#define CCALL_NRET_GPR 2 #define CCALL_NARG_FPR 8 +#define CCALL_NRET_FPR 4 +#define CCALL_SPS_FREE 0 + +typedef intptr_t GPRArg; +typedef union FPRArg { + double d; + struct { LJ_ENDIAN_LOHI(float f; , float g;) }; + struct { LJ_ENDIAN_LOHI(uint32_t lo; , uint32_t hi;) }; +} FPRArg; + +#elif LJ_TARGET_PPC + +#define CCALL_NARG_GPR 8 +#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 8) #define CCALL_NRET_GPR 4 /* For complex double. */ -#define CCALL_NRET_FPR 1 +#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 1) #define CCALL_SPS_EXTRA 4 #define CCALL_SPS_FREE 0 typedef intptr_t GPRArg; typedef double FPRArg; -#elif LJ_TARGET_PPCSPE +#elif LJ_TARGET_MIPS32 -#define CCALL_NARG_GPR 8 -#define CCALL_NARG_FPR 0 -#define CCALL_NRET_GPR 4 /* For softfp complex double. */ -#define CCALL_NRET_FPR 0 -#define CCALL_SPS_FREE 0 /* NYI */ +#define CCALL_NARG_GPR 4 +#define CCALL_NARG_FPR (LJ_ABI_SOFTFP ? 0 : 2) +#define CCALL_NRET_GPR (LJ_ABI_SOFTFP ? 4 : 2) +#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2) +#define CCALL_SPS_EXTRA 7 +#define CCALL_SPS_FREE 1 typedef intptr_t GPRArg; +typedef union FPRArg { + double d; + struct { LJ_ENDIAN_LOHI(float f; , float g;) }; +} FPRArg; -#elif LJ_TARGET_MIPS +#elif LJ_TARGET_MIPS64 -#define CCALL_NARG_GPR 4 -#define CCALL_NARG_FPR 2 +/* FP args are positional and overlay the GPR array. */ +#define CCALL_NARG_GPR 8 +#define CCALL_NARG_FPR 0 #define CCALL_NRET_GPR 2 -#define CCALL_NRET_FPR 2 -#define CCALL_SPS_EXTRA 7 +#define CCALL_NRET_FPR (LJ_ABI_SOFTFP ? 0 : 2) +#define CCALL_SPS_EXTRA 3 #define CCALL_SPS_FREE 1 typedef intptr_t GPRArg; @@ -145,6 +166,8 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { uint8_t nfpr; /* Number of arguments in FPRs. */ #elif LJ_TARGET_X86 uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ +#elif LJ_TARGET_ARM64 + void *retp; /* Aggregate return pointer in x8. */ #elif LJ_TARGET_PPC uint8_t nfpr; /* Number of arguments in FPRs. */ #endif diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c index 5a6785c6..43e44305 100644 --- a/src/lj_ccallback.c +++ b/src/lj_ccallback.c @@ -27,7 +27,7 @@ #if LJ_OS_NOJIT -/* Disabled callback support. */ +/* Callbacks disabled. */ #define CALLBACK_SLOT2OFS(slot) (0*(slot)) #define CALLBACK_OFS2SLOT(ofs) (0*(ofs)) #define CALLBACK_MAX_SLOT 0 @@ -35,7 +35,7 @@ #elif LJ_TARGET_X86ORX64 #define CALLBACK_MCODE_HEAD (LJ_64 ? 8 : 0) -#define CALLBACK_MCODE_GROUP (-2+1+2+5+(LJ_64 ? 6 : 5)) +#define CALLBACK_MCODE_GROUP (-2+1+2+(LJ_GC64 ? 10 : 5)+(LJ_64 ? 6 : 5)) #define CALLBACK_SLOT2OFS(slot) \ (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_GROUP*((slot)/32) + 4*(slot)) @@ -54,23 +54,22 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) #elif LJ_TARGET_ARM #define CALLBACK_MCODE_HEAD 32 -#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) -#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) -#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) + +#elif LJ_TARGET_ARM64 + +#define CALLBACK_MCODE_HEAD 32 #elif LJ_TARGET_PPC #define CALLBACK_MCODE_HEAD 24 -#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) -#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) -#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) -#elif LJ_TARGET_MIPS +#elif LJ_TARGET_MIPS32 -#define CALLBACK_MCODE_HEAD 24 -#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) -#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) -#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) +#define CALLBACK_MCODE_HEAD 20 + +#elif LJ_TARGET_MIPS64 + +#define CALLBACK_MCODE_HEAD 52 #else @@ -81,6 +80,12 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) #endif +#ifndef CALLBACK_SLOT2OFS +#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) +#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) +#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) +#endif + /* Convert callback slot number to callback function pointer. */ static void *callback_slot2ptr(CTState *cts, MSize slot) { @@ -102,9 +107,9 @@ MSize lj_ccallback_ptr2slot(CTState *cts, void *p) /* Initialize machine code for callback function pointers. */ #if LJ_OS_NOJIT /* Disabled callback support. */ -#define callback_mcode_init(g, p) UNUSED(p) +#define callback_mcode_init(g, p) (p) #elif LJ_TARGET_X86ORX64 -static void callback_mcode_init(global_State *g, uint8_t *page) +static void *callback_mcode_init(global_State *g, uint8_t *page) { uint8_t *p = page; uint8_t *target = (uint8_t *)(void *)lj_vm_ffi_callback; @@ -119,8 +124,13 @@ static void callback_mcode_init(global_State *g, uint8_t *page) /* push ebp/rbp; mov ah, slot>>8; mov ebp, &g. */ *p++ = XI_PUSH + RID_EBP; *p++ = XI_MOVrib | (RID_EAX+4); *p++ = (uint8_t)(slot >> 8); +#if LJ_GC64 + *p++ = 0x48; *p++ = XI_MOVri | RID_EBP; + *(uint64_t *)p = (uint64_t)(g); p += 8; +#else *p++ = XI_MOVri | RID_EBP; *(int32_t *)p = i32ptr(g); p += 4; +#endif #if LJ_64 /* jmp [rip-pageofs] where lj_vm_ffi_callback is stored. */ *p++ = XI_GROUP5; *p++ = XM_OFS0 + (XOg_JMP<<3) + RID_EBP; @@ -133,10 +143,10 @@ static void callback_mcode_init(global_State *g, uint8_t *page) *p++ = XI_JMPs; *p++ = (uint8_t)((2+2)*(31-(slot&31)) - 2); } } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); + return p; } #elif LJ_TARGET_ARM -static void callback_mcode_init(global_State *g, uint32_t *page) +static void *callback_mcode_init(global_State *g, uint32_t *page) { uint32_t *p = page; void *target = (void *)lj_vm_ffi_callback; @@ -155,10 +165,30 @@ static void callback_mcode_init(global_State *g, uint32_t *page) *p = ARMI_B | ((page-p-2) & 0x00ffffffu); p++; } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); + return p; +} +#elif LJ_TARGET_ARM64 +static void *callback_mcode_init(global_State *g, uint32_t *page) +{ + uint32_t *p = page; + void *target = (void *)lj_vm_ffi_callback; + MSize slot; + *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X11) | A64F_S19(4)); + *p++ = A64I_LE(A64I_LDRLx | A64F_D(RID_X10) | A64F_S19(5)); + *p++ = A64I_LE(A64I_BR | A64F_N(RID_X11)); + *p++ = A64I_LE(A64I_NOP); + ((void **)p)[0] = target; + ((void **)p)[1] = g; + p += 4; + for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { + *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot)); + *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu)); + p++; + } + return p; } #elif LJ_TARGET_PPC -static void callback_mcode_init(global_State *g, uint32_t *page) +static void *callback_mcode_init(global_State *g, uint32_t *page) { uint32_t *p = page; void *target = (void *)lj_vm_ffi_callback; @@ -174,30 +204,43 @@ static void callback_mcode_init(global_State *g, uint32_t *page) *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2); p++; } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); + return p; } #elif LJ_TARGET_MIPS -static void callback_mcode_init(global_State *g, uint32_t *page) +static void *callback_mcode_init(global_State *g, uint32_t *page) { uint32_t *p = page; - void *target = (void *)lj_vm_ffi_callback; + uintptr_t target = (uintptr_t)(void *)lj_vm_ffi_callback; + uintptr_t ug = (uintptr_t)(void *)g; MSize slot; - *p++ = MIPSI_SW | MIPSF_T(RID_R1)|MIPSF_S(RID_SP) | 0; - *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (u32ptr(target) >> 16); - *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (u32ptr(g) >> 16); - *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) |(u32ptr(target)&0xffff); +#if LJ_TARGET_MIPS32 + *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 16); + *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 16); +#else + *p++ = MIPSI_LUI | MIPSF_T(RID_R3) | (target >> 48); + *p++ = MIPSI_LUI | MIPSF_T(RID_R2) | (ug >> 48); + *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 32) & 0xffff); + *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 32) & 0xffff); + *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16); + *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16); + *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | ((target >> 16) & 0xffff); + *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | ((ug >> 16) & 0xffff); + *p++ = MIPSI_DSLL | MIPSF_D(RID_R3)|MIPSF_T(RID_R3) | MIPSF_A(16); + *p++ = MIPSI_DSLL | MIPSF_D(RID_R2)|MIPSF_T(RID_R2) | MIPSF_A(16); +#endif + *p++ = MIPSI_ORI | MIPSF_T(RID_R3)|MIPSF_S(RID_R3) | (target & 0xffff); *p++ = MIPSI_JR | MIPSF_S(RID_R3); - *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (u32ptr(g)&0xffff); + *p++ = MIPSI_ORI | MIPSF_T(RID_R2)|MIPSF_S(RID_R2) | (ug & 0xffff); for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { *p = MIPSI_B | ((page-p-1) & 0x0000ffffu); p++; *p++ = MIPSI_LI | MIPSF_T(RID_R1) | slot; } - lua_assert(p - page <= CALLBACK_MCODE_SIZE); + return p; } #else /* Missing support for this architecture. */ -#define callback_mcode_init(g, p) UNUSED(p) +#define callback_mcode_init(g, p) (p) #endif /* -- Machine code management --------------------------------------------- */ @@ -213,6 +256,11 @@ static void callback_mcode_init(global_State *g, uint32_t *page) #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS MAP_ANON #endif +#ifdef PROT_MPROTECT +#define CCPROT_CREATE (PROT_MPROTECT(PROT_EXEC)) +#else +#define CCPROT_CREATE 0 +#endif #endif @@ -220,15 +268,15 @@ static void callback_mcode_init(global_State *g, uint32_t *page) static void callback_mcode_new(CTState *cts) { size_t sz = (size_t)CALLBACK_MCODE_SIZE; - void *p; + void *p, *pe; if (CALLBACK_MAX_SLOT == 0) lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); #if LJ_TARGET_WINDOWS - p = VirtualAlloc(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); + p = LJ_WIN_VALLOC(NULL, sz, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); if (!p) lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); #elif LJ_TARGET_POSIX - p = mmap(NULL, sz, (PROT_READ|PROT_WRITE), MAP_PRIVATE|MAP_ANONYMOUS, + p = mmap(NULL, sz, (PROT_READ|PROT_WRITE|CCPROT_CREATE), MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (p == MAP_FAILED) lj_err_caller(cts->L, LJ_ERR_FFI_CBACKOV); @@ -237,12 +285,15 @@ static void callback_mcode_new(CTState *cts) p = lj_mem_new(cts->L, sz); #endif cts->cb.mcode = p; - callback_mcode_init(cts->g, p); + pe = callback_mcode_init(cts->g, p); + UNUSED(pe); + lj_assertCTS((size_t)((char *)pe - (char *)p) <= sz, + "miscalculated CALLBACK_MAX_SLOT"); lj_mcode_sync(p, (char *)p + sz); #if LJ_TARGET_WINDOWS { DWORD oprot; - VirtualProtect(p, sz, PAGE_EXECUTE_READ, &oprot); + LJ_WIN_VPROTECT(p, sz, PAGE_EXECUTE_READ, &oprot); } #elif LJ_TARGET_POSIX mprotect(p, sz, (PROT_READ|PROT_EXEC)); @@ -351,33 +402,78 @@ void lj_ccallback_mcode_free(CTState *cts) goto done; \ } CALLBACK_HANDLE_REGARG_FP2 -#elif LJ_TARGET_PPC +#elif LJ_TARGET_ARM64 #define CALLBACK_HANDLE_REGARG \ if (isfp) { \ - if (nfpr + 1 <= CCALL_NARG_FPR) { \ - sp = &cts->cb.fpr[nfpr++]; \ - cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ + if (nfpr + n <= CCALL_NARG_FPR) { \ + sp = &cts->cb.fpr[nfpr]; \ + nfpr += n; \ goto done; \ + } else { \ + nfpr = CCALL_NARG_FPR; /* Prevent reordering. */ \ } \ - } else { /* Try to pass argument in GPRs. */ \ - if (n > 1) { \ - lua_assert(ctype_isinteger(cta->info) && n == 2); /* int64_t. */ \ - ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ - } \ + } else { \ + if (!LJ_TARGET_OSX && n > 1) \ + ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ if (ngpr + n <= maxgpr) { \ sp = &cts->cb.gpr[ngpr]; \ ngpr += n; \ goto done; \ + } else { \ + ngpr = CCALL_NARG_GPR; /* Prevent reordering. */ \ + } \ + } + +#elif LJ_TARGET_PPC + +#define CALLBACK_HANDLE_GPR \ + if (n > 1) { \ + lj_assertCTS(((LJ_ABI_SOFTFP && ctype_isnum(cta->info)) || /* double. */ \ + ctype_isinteger(cta->info)) && n == 2, /* int64_t. */ \ + "bad GPR type"); \ + ngpr = (ngpr + 1u) & ~1u; /* Align int64_t to regpair. */ \ + } \ + if (ngpr + n <= maxgpr) { \ + sp = &cts->cb.gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } + +#if LJ_ABI_SOFTFP +#define CALLBACK_HANDLE_REGARG \ + CALLBACK_HANDLE_GPR \ + UNUSED(isfp); +#else +#define CALLBACK_HANDLE_REGARG \ + if (isfp) { \ + if (nfpr + 1 <= CCALL_NARG_FPR) { \ + sp = &cts->cb.fpr[nfpr++]; \ + cta = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ \ + goto done; \ } \ + } else { /* Try to pass argument in GPRs. */ \ + CALLBACK_HANDLE_GPR \ } +#endif +#if !LJ_ABI_SOFTFP #define CALLBACK_HANDLE_RET \ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ +#endif -#elif LJ_TARGET_MIPS +#elif LJ_TARGET_MIPS32 +#define CALLBACK_HANDLE_GPR \ + if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ + if (ngpr + n <= maxgpr) { \ + sp = &cts->cb.gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } + +#if !LJ_ABI_SOFTFP /* MIPS32 hard-float */ #define CALLBACK_HANDLE_REGARG \ if (isfp && nfpr < CCALL_NARG_FPR) { /* Try to pass argument in FPRs. */ \ sp = (void *)((uint8_t *)&cts->cb.fpr[nfpr] + ((LJ_BE && n==1) ? 4 : 0)); \ @@ -385,13 +481,36 @@ void lj_ccallback_mcode_free(CTState *cts) goto done; \ } else { /* Try to pass argument in GPRs. */ \ nfpr = CCALL_NARG_FPR; \ - if (n > 1) ngpr = (ngpr + 1u) & ~1u; /* Align to regpair. */ \ - if (ngpr + n <= maxgpr) { \ - sp = &cts->cb.gpr[ngpr]; \ - ngpr += n; \ - goto done; \ - } \ + CALLBACK_HANDLE_GPR \ + } +#else /* MIPS32 soft-float */ +#define CALLBACK_HANDLE_REGARG \ + CALLBACK_HANDLE_GPR \ + UNUSED(isfp); +#endif + +#define CALLBACK_HANDLE_RET \ + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + ((float *)dp)[1] = *(float *)dp; + +#elif LJ_TARGET_MIPS64 + +#if !LJ_ABI_SOFTFP /* MIPS64 hard-float */ +#define CALLBACK_HANDLE_REGARG \ + if (ngpr + n <= maxgpr) { \ + sp = isfp ? (void*) &cts->cb.fpr[ngpr] : (void*) &cts->cb.gpr[ngpr]; \ + ngpr += n; \ + goto done; \ } +#else /* MIPS64 soft-float */ +#define CALLBACK_HANDLE_REGARG \ + if (ngpr + n <= maxgpr) { \ + UNUSED(isfp); \ + sp = (void*) &cts->cb.gpr[ngpr]; \ + ngpr += n; \ + goto done; \ + } +#endif #define CALLBACK_HANDLE_RET \ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ @@ -411,6 +530,7 @@ static void callback_conv_args(CTState *cts, lua_State *L) int gcsteps = 0; CType *ct; GCfunc *fn; + int fntp; MSize ngpr = 0, nsp = 0, maxgpr = CCALL_NARG_GPR; #if CCALL_NARG_FPR MSize nfpr = 0; @@ -421,18 +541,27 @@ static void callback_conv_args(CTState *cts, lua_State *L) if (slot < cts->cb.sizeid && (id = cts->cb.cbid[slot]) != 0) { ct = ctype_get(cts, id); - rid = ctype_cid(ct->info); + rid = ctype_cid(ct->info); /* Return type. x86: +(spadj<<16). */ fn = funcV(lj_tab_getint(cts->miscmap, (int32_t)slot)); + fntp = LJ_TFUNC; } else { /* Must set up frame first, before throwing the error. */ ct = NULL; rid = 0; fn = (GCfunc *)L; + fntp = LJ_TTHREAD; + } + /* Continuation returns from callback. */ + if (LJ_FR2) { + (o++)->u64 = LJ_CONT_FFI_CALLBACK; + (o++)->u64 = rid; + } else { + o->u32.lo = LJ_CONT_FFI_CALLBACK; + o->u32.hi = rid; + o++; } - o->u32.lo = LJ_CONT_FFI_CALLBACK; /* Continuation returns from callback. */ - o->u32.hi = rid; /* Return type. x86: +(spadj<<16). */ - o++; - setframe_gc(o, obj2gco(fn)); - setframe_ftsz(o, (int)((char *)(o+1) - (char *)L->base) + FRAME_CONT); + setframe_gc(o, obj2gco(fn), fntp); + if (LJ_FR2) o++; + setframe_ftsz(o, ((char *)(o+1) - (char *)L->base) + FRAME_CONT); L->top = L->base = ++o; if (!ct) lj_err_caller(cts->L, LJ_ERR_FFI_BADCBACK); @@ -459,7 +588,7 @@ static void callback_conv_args(CTState *cts, lua_State *L) CTSize sz; int isfp; MSize n; - lua_assert(ctype_isfield(ctf->info)); + lj_assertCTS(ctype_isfield(ctf->info), "field expected"); cta = ctype_rawchild(cts, ctf); isfp = ctype_isfp(cta->info); sz = (cta->size + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); @@ -474,7 +603,11 @@ static void callback_conv_args(CTState *cts, lua_State *L) nsp += n; done: - if (LJ_BE && cta->size < CTSIZE_PTR) + if (LJ_BE && cta->size < CTSIZE_PTR +#if LJ_TARGET_MIPS64 + && !(isfp && nsp) +#endif + ) sp = (void *)((uint8_t *)sp + CTSIZE_PTR-cta->size); gcsteps += lj_cconv_tv_ct(cts, cta, 0, o++, sp); } @@ -483,9 +616,14 @@ static void callback_conv_args(CTState *cts, lua_State *L) L->top = o; #if LJ_TARGET_X86 /* Store stack adjustment for returns from non-cdecl callbacks. */ - if (ctype_cconv(ct->info) != CTCC_CDECL) + if (ctype_cconv(ct->info) != CTCC_CDECL) { +#if LJ_FR2 + (L->base-3)->u64 |= (nsp << (16+2)); +#else (L->base-2)->u32.hi |= (nsp << (16+2)); #endif + } +#endif while (gcsteps-- > 0) lj_gc_check(L); } @@ -493,7 +631,11 @@ static void callback_conv_args(CTState *cts, lua_State *L) /* Convert Lua object to callback result. */ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) { +#if LJ_FR2 + CType *ctr = ctype_raw(cts, (uint16_t)(L->base-3)->u64); +#else CType *ctr = ctype_raw(cts, (uint16_t)(L->base-2)->u32.hi); +#endif #if LJ_TARGET_X86 cts->cb.gpr[2] = 0; #endif @@ -503,6 +645,10 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) if (ctype_isfp(ctr->info)) dp = (uint8_t *)&cts->cb.fpr[0]; #endif +#if LJ_TARGET_ARM64 && LJ_BE + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) + dp = (uint8_t *)&cts->cb.fpr[0].f[1]; +#endif lj_cconv_ct_tv(cts, ctr, dp, o, 0); #ifdef CALLBACK_HANDLE_RET CALLBACK_HANDLE_RET @@ -516,6 +662,12 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } +#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) + /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ + if (ctr->size <= 4 && + (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) + *(int64_t *)dp = (int64_t)*(int32_t *)dp; +#endif #if LJ_TARGET_X86 if (ctype_isfp(ctr->info)) cts->cb.gpr[2] = ctr->size == sizeof(float) ? 1 : 2; @@ -528,8 +680,8 @@ lua_State * LJ_FASTCALL lj_ccallback_enter(CTState *cts, void *cf) { lua_State *L = cts->L; global_State *g = cts->g; - lua_assert(L != NULL); - if (gcref(g->jit_L)) { + lj_assertG(L != NULL, "uninitialized cts->L in callback"); + if (tvref(g->jit_base)) { setstrV(L, L->top++, lj_err_str(L, LJ_ERR_FFI_BADCBACK)); if (g->panic) g->panic(L); exit(EXIT_FAILURE); @@ -562,9 +714,9 @@ void LJ_FASTCALL lj_ccallback_leave(CTState *cts, TValue *o) } callback_conv_result(cts, L, o); /* Finally drop C frame and continuation frame. */ - L->cframe = cframe_prev(L->cframe); - L->top -= 2; + L->top -= 2+2*LJ_FR2; L->base = obase; + L->cframe = cframe_prev(L->cframe); cts->cb.slot = 0; /* Blacklist C function that called the callback. */ } @@ -613,7 +765,7 @@ static CType *callback_checkfunc(CTState *cts, CType *ct) CType *ctf = ctype_get(cts, fid); if (!ctype_isattrib(ctf->info)) { CType *cta; - lua_assert(ctype_isfield(ctf->info)); + lj_assertCTS(ctype_isfield(ctf->info), "field expected"); cta = ctype_rawchild(cts, ctf); if (!(ctype_isenum(cta->info) || ctype_isptr(cta->info) || (ctype_isnum(cta->info) && cta->size <= 8)) || diff --git a/src/lj_cconv.c b/src/lj_cconv.c index 8556952f..3bbfd3f1 100644 --- a/src/lj_cconv.c +++ b/src/lj_cconv.c @@ -8,6 +8,7 @@ #if LJ_HASFFI #include "lj_err.h" +#include "lj_buf.h" #include "lj_tab.h" #include "lj_ctype.h" #include "lj_cdata.h" @@ -122,19 +123,25 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s, CTInfo dinfo = d->info, sinfo = s->info; void *tmpptr; - lua_assert(!ctype_isenum(dinfo) && !ctype_isenum(sinfo)); - lua_assert(!ctype_isattrib(dinfo) && !ctype_isattrib(sinfo)); + lj_assertCTS(!ctype_isenum(dinfo) && !ctype_isenum(sinfo), + "unresolved enum"); + lj_assertCTS(!ctype_isattrib(dinfo) && !ctype_isattrib(sinfo), + "unstripped attribute"); if (ctype_type(dinfo) > CT_MAYCONVERT || ctype_type(sinfo) > CT_MAYCONVERT) goto err_conv; /* Some basic sanity checks. */ - lua_assert(!ctype_isnum(dinfo) || dsize > 0); - lua_assert(!ctype_isnum(sinfo) || ssize > 0); - lua_assert(!ctype_isbool(dinfo) || dsize == 1 || dsize == 4); - lua_assert(!ctype_isbool(sinfo) || ssize == 1 || ssize == 4); - lua_assert(!ctype_isinteger(dinfo) || (1u<<lj_fls(dsize)) == dsize); - lua_assert(!ctype_isinteger(sinfo) || (1u<<lj_fls(ssize)) == ssize); + lj_assertCTS(!ctype_isnum(dinfo) || dsize > 0, "bad size for number type"); + lj_assertCTS(!ctype_isnum(sinfo) || ssize > 0, "bad size for number type"); + lj_assertCTS(!ctype_isbool(dinfo) || dsize == 1 || dsize == 4, + "bad size for bool type"); + lj_assertCTS(!ctype_isbool(sinfo) || ssize == 1 || ssize == 4, + "bad size for bool type"); + lj_assertCTS(!ctype_isinteger(dinfo) || (1u<<lj_fls(dsize)) == dsize, + "bad size for integer type"); + lj_assertCTS(!ctype_isinteger(sinfo) || (1u<<lj_fls(ssize)) == ssize, + "bad size for integer type"); switch (cconv_idx2(dinfo, sinfo)) { /* Destination is a bool. */ @@ -357,7 +364,7 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s, if ((flags & CCF_CAST) || (d->info & CTF_VLA) || d != s) goto err_conv; /* Must be exact same type. */ copyval: /* Copy value. */ - lua_assert(dsize == ssize); + lj_assertCTS(dsize == ssize, "value copy with different sizes"); memcpy(dp, sp, dsize); break; @@ -389,7 +396,7 @@ int lj_cconv_tv_ct(CTState *cts, CType *s, CTypeID sid, lj_cconv_ct_ct(cts, ctype_get(cts, CTID_DOUBLE), s, (uint8_t *)&o->n, sp, 0); /* Numbers are NOT canonicalized here! Beware of uninitialized data. */ - lua_assert(tvisnum(o)); + lj_assertCTS(tvisnum(o), "non-canonical NaN passed"); } } else { uint32_t b = s->size == 1 ? (*sp != 0) : (*(int *)sp != 0); @@ -406,7 +413,7 @@ int lj_cconv_tv_ct(CTState *cts, CType *s, CTypeID sid, CTSize sz; copyval: /* Copy value. */ sz = s->size; - lua_assert(sz != CTSIZE_INVALID); + lj_assertCTS(sz != CTSIZE_INVALID, "value copy with invalid size"); /* Attributes are stripped, qualifiers are kept (but mostly ignored). */ cd = lj_cdata_new(cts, ctype_typeid(cts, s), sz); setcdataV(cts->L, o, cd); @@ -421,19 +428,22 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp) CTInfo info = s->info; CTSize pos, bsz; uint32_t val; - lua_assert(ctype_isbitfield(info)); + lj_assertCTS(ctype_isbitfield(info), "bitfield expected"); /* NYI: packed bitfields may cause misaligned reads. */ switch (ctype_bitcsz(info)) { case 4: val = *(uint32_t *)sp; break; case 2: val = *(uint16_t *)sp; break; case 1: val = *(uint8_t *)sp; break; - default: lua_assert(0); val = 0; break; + default: + lj_assertCTS(0, "bad bitfield container size %d", ctype_bitcsz(info)); + val = 0; + break; } /* Check if a packed bitfield crosses a container boundary. */ pos = ctype_bitpos(info); bsz = ctype_bitbsz(info); - lua_assert(pos < 8*ctype_bitcsz(info)); - lua_assert(bsz > 0 && bsz <= 8*ctype_bitcsz(info)); + lj_assertCTS(pos < 8*ctype_bitcsz(info), "bad bitfield position"); + lj_assertCTS(bsz > 0 && bsz <= 8*ctype_bitcsz(info), "bad bitfield size"); if (pos + bsz > 8*ctype_bitcsz(info)) lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT); if (!(info & CTF_BOOL)) { @@ -448,8 +458,10 @@ int lj_cconv_tv_bf(CTState *cts, CType *s, TValue *o, uint8_t *sp) setintV(o, (int32_t)val); } } else { - lua_assert(bsz == 1); - setboolV(o, (val >> pos) & 1); + uint32_t b = (val >> pos) & 1; + lj_assertCTS(bsz == 1, "bad bool bitfield size"); + setboolV(o, b); + setboolV(&cts->g->tmptv2, b); /* Remember for trace recorder. */ } return 0; /* No GC step needed. */ } @@ -551,7 +563,7 @@ void lj_cconv_ct_tv(CTState *cts, CType *d, sid = cdataV(o)->ctypeid; s = ctype_get(cts, sid); if (ctype_isref(s->info)) { /* Resolve reference for value. */ - lua_assert(s->size == CTSIZE_PTR); + lj_assertCTS(s->size == CTSIZE_PTR, "ref is not pointer-sized"); sp = *(void **)sp; sid = ctype_cid(s->info); } @@ -571,7 +583,7 @@ void lj_cconv_ct_tv(CTState *cts, CType *d, CType *cct = lj_ctype_getfield(cts, d, str, &ofs); if (!cct || !ctype_isconstval(cct->info)) goto err_conv; - lua_assert(d->size == 4); + lj_assertCTS(d->size == 4, "only 32 bit enum supported"); /* NYI */ sp = (uint8_t *)&cct->size; sid = ctype_cid(cct->info); } else if (ctype_isrefarray(d->info)) { /* Copy string to array. */ @@ -610,8 +622,10 @@ void lj_cconv_ct_tv(CTState *cts, CType *d, tmpptr = uddata(ud); if (ud->udtype == UDTYPE_IO_FILE) tmpptr = *(void **)tmpptr; + else if (ud->udtype == UDTYPE_BUFFER) + tmpptr = ((SBufExt *)tmpptr)->r; } else if (tvislightud(o)) { - tmpptr = lightudV(o); + tmpptr = lightudV(cts->g, o); } else if (tvisfunc(o)) { void *p = lj_ccallback_new(cts, d, funcV(o)); if (p) { @@ -635,10 +649,10 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o) CTInfo info = d->info; CTSize pos, bsz; uint32_t val, mask; - lua_assert(ctype_isbitfield(info)); + lj_assertCTS(ctype_isbitfield(info), "bitfield expected"); if ((info & CTF_BOOL)) { uint8_t tmpbool; - lua_assert(ctype_bitbsz(info) == 1); + lj_assertCTS(ctype_bitbsz(info) == 1, "bad bool bitfield size"); lj_cconv_ct_tv(cts, ctype_get(cts, CTID_BOOL), &tmpbool, o, 0); val = tmpbool; } else { @@ -647,8 +661,8 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o) } pos = ctype_bitpos(info); bsz = ctype_bitbsz(info); - lua_assert(pos < 8*ctype_bitcsz(info)); - lua_assert(bsz > 0 && bsz <= 8*ctype_bitcsz(info)); + lj_assertCTS(pos < 8*ctype_bitcsz(info), "bad bitfield position"); + lj_assertCTS(bsz > 0 && bsz <= 8*ctype_bitcsz(info), "bad bitfield size"); /* Check if a packed bitfield crosses a container boundary. */ if (pos + bsz > 8*ctype_bitcsz(info)) lj_err_caller(cts->L, LJ_ERR_FFI_NYIPACKBIT); @@ -659,7 +673,9 @@ void lj_cconv_bf_tv(CTState *cts, CType *d, uint8_t *dp, TValue *o) case 4: *(uint32_t *)dp = (*(uint32_t *)dp & ~mask) | (uint32_t)val; break; case 2: *(uint16_t *)dp = (*(uint16_t *)dp & ~mask) | (uint16_t)val; break; case 1: *(uint8_t *)dp = (*(uint8_t *)dp & ~mask) | (uint8_t)val; break; - default: lua_assert(0); break; + default: + lj_assertCTS(0, "bad bitfield container size %d", ctype_bitcsz(info)); + break; } } diff --git a/src/lj_cconv.h b/src/lj_cconv.h index 2d1cb273..45b0ca1e 100644 --- a/src/lj_cconv.h +++ b/src/lj_cconv.h @@ -27,13 +27,14 @@ enum { static LJ_AINLINE uint32_t cconv_idx(CTInfo info) { uint32_t idx = ((info >> 26) & 15u); /* Dispatch bits. */ - lua_assert(ctype_type(info) <= CT_MAYCONVERT); + lj_assertX(ctype_type(info) <= CT_MAYCONVERT, + "cannot convert ctype %08x", info); #if LJ_64 idx = ((uint32_t)(U64x(f436fff5,fff7f021) >> 4*idx) & 15u); #else idx = (((idx < 8 ? 0xfff7f021u : 0xf436fff5) >> 4*(idx & 7u)) & 15u); #endif - lua_assert(idx < 8); + lj_assertX(idx < 8, "cannot convert ctype %08x", info); return idx; } diff --git a/src/lj_cdata.c b/src/lj_cdata.c index 425e6bcf..01a74f5d 100644 --- a/src/lj_cdata.c +++ b/src/lj_cdata.c @@ -9,7 +9,6 @@ #include "lj_gc.h" #include "lj_err.h" -#include "lj_str.h" #include "lj_tab.h" #include "lj_ctype.h" #include "lj_cconv.h" @@ -27,20 +26,20 @@ GCcdata *lj_cdata_newref(CTState *cts, const void *p, CTypeID id) } /* Allocate variable-sized or specially aligned C data object. */ -GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align) +GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align) { global_State *g; MSize extra = sizeof(GCcdataVar) + sizeof(GCcdata) + (align > CT_MEMALIGN ? (1u<<align) - (1u<<CT_MEMALIGN) : 0); - char *p = lj_mem_newt(cts->L, extra + sz, char); + char *p = lj_mem_newt(L, extra + sz, char); uintptr_t adata = (uintptr_t)p + sizeof(GCcdataVar) + sizeof(GCcdata); uintptr_t almask = (1u << align) - 1u; GCcdata *cd = (GCcdata *)(((adata + almask) & ~almask) - sizeof(GCcdata)); - lua_assert((char *)cd - p < 65536); + lj_assertL((char *)cd - p < 65536, "excessive cdata alignment"); cdatav(cd)->offset = (uint16_t)((char *)cd - p); cdatav(cd)->extra = extra; cdatav(cd)->len = sz; - g = cts->g; + g = G(L); setgcrefr(cd->nextgc, g->gc.root); setgcref(g->gc.root, obj2gco(cd)); newwhite(g, obj2gco(cd)); @@ -50,6 +49,15 @@ GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, CTSize align) return cd; } +/* Allocate arbitrary C data object. */ +GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz, CTInfo info) +{ + if (!(info & CTF_VLA) && ctype_align(info) <= CT_MEMALIGN) + return lj_cdata_new(cts, id, sz); + else + return lj_cdata_newv(cts->L, id, sz, ctype_align(info)); +} + /* Free a C data object. */ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd) { @@ -68,29 +76,30 @@ void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd) } else if (LJ_LIKELY(!cdataisv(cd))) { CType *ct = ctype_raw(ctype_ctsG(g), cd->ctypeid); CTSize sz = ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR; - lua_assert(ctype_hassize(ct->info) || ctype_isfunc(ct->info) || - ctype_isextern(ct->info)); + lj_assertG(ctype_hassize(ct->info) || ctype_isfunc(ct->info) || + ctype_isextern(ct->info), "free of ctype without a size"); lj_mem_free(g, cd, sizeof(GCcdata) + sz); } else { lj_mem_free(g, memcdatav(cd), sizecdatav(cd)); } } -TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd) +void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, uint32_t it) { - global_State *g = G(L); - GCtab *t = ctype_ctsG(g)->finalizer; + GCtab *t = ctype_ctsG(G(L))->finalizer; if (gcref(t->metatable)) { /* Add cdata to finalizer table, if still enabled. */ TValue *tv, tmp; setcdataV(L, &tmp, cd); lj_gc_anybarriert(L, t); tv = lj_tab_set(L, t, &tmp); - cd->marked |= LJ_GC_CDATA_FIN; - return tv; - } else { - /* Otherwise return dummy TValue. */ - return &g->tmptv; + if (it == LJ_TNIL) { + setnilV(tv); + cd->marked &= ~LJ_GC_CDATA_FIN; + } else { + setgcV(L, tv, obj, it); + cd->marked |= LJ_GC_CDATA_FIN; + } } } @@ -106,7 +115,7 @@ CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, uint8_t **pp, /* Resolve reference for cdata object. */ if (ctype_isref(ct->info)) { - lua_assert(ct->size == CTSIZE_PTR); + lj_assertCTS(ct->size == CTSIZE_PTR, "ref is not pointer-sized"); p = *(uint8_t **)p; ct = ctype_child(cts, ct); } @@ -117,13 +126,19 @@ collect_attrib: if (ctype_attrib(ct->info) == CTA_QUAL) *qual |= ct->size; ct = ctype_child(cts, ct); } - lua_assert(!ctype_isref(ct->info)); /* Interning rejects refs to refs. */ + /* Interning rejects refs to refs. */ + lj_assertCTS(!ctype_isref(ct->info), "bad ref of ref"); if (tvisint(key)) { idx = (ptrdiff_t)intV(key); goto integer_key; } else if (tvisnum(key)) { /* Numeric key. */ - idx = LJ_64 ? (ptrdiff_t)numV(key) : (ptrdiff_t)lj_num2int(numV(key)); +#ifdef _MSC_VER + /* Workaround for MSVC bug. */ + volatile +#endif + lua_Number n = numV(key); + idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n); integer_key: if (ctype_ispointer(ct->info)) { CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info)); /* Element size. */ @@ -198,7 +213,8 @@ collect_attrib: static void cdata_getconst(CTState *cts, TValue *o, CType *ct) { CType *ctt = ctype_child(cts, ct); - lua_assert(ctype_isinteger(ctt->info) && ctt->size <= 4); + lj_assertCTS(ctype_isinteger(ctt->info) && ctt->size <= 4, + "only 32 bit const supported"); /* NYI */ /* Constants are already zero-extended/sign-extended to 32 bits. */ if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0) setnumV(o, (lua_Number)(uint32_t)ct->size); @@ -219,13 +235,14 @@ int lj_cdata_get(CTState *cts, CType *s, TValue *o, uint8_t *sp) } /* Get child type of pointer/array/field. */ - lua_assert(ctype_ispointer(s->info) || ctype_isfield(s->info)); + lj_assertCTS(ctype_ispointer(s->info) || ctype_isfield(s->info), + "pointer or field expected"); sid = ctype_cid(s->info); s = ctype_get(cts, sid); /* Resolve reference for field. */ if (ctype_isref(s->info)) { - lua_assert(s->size == CTSIZE_PTR); + lj_assertCTS(s->size == CTSIZE_PTR, "ref is not pointer-sized"); sp = *(uint8_t **)sp; sid = ctype_cid(s->info); s = ctype_get(cts, sid); @@ -252,12 +269,13 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual) } /* Get child type of pointer/array/field. */ - lua_assert(ctype_ispointer(d->info) || ctype_isfield(d->info)); + lj_assertCTS(ctype_ispointer(d->info) || ctype_isfield(d->info), + "pointer or field expected"); d = ctype_child(cts, d); /* Resolve reference for field. */ if (ctype_isref(d->info)) { - lua_assert(d->size == CTSIZE_PTR); + lj_assertCTS(d->size == CTSIZE_PTR, "ref is not pointer-sized"); dp = *(uint8_t **)dp; d = ctype_child(cts, d); } @@ -272,7 +290,8 @@ void lj_cdata_set(CTState *cts, CType *d, uint8_t *dp, TValue *o, CTInfo qual) d = ctype_child(cts, d); } - lua_assert(ctype_hassize(d->info) && !ctype_isvoid(d->info)); + lj_assertCTS(ctype_hassize(d->info), "store to ctype without size"); + lj_assertCTS(!ctype_isvoid(d->info), "store to void type"); if (((d->info|qual) & CTF_CONST)) { err_const: diff --git a/src/lj_cdata.h b/src/lj_cdata.h index 2a82a9d8..de52e8aa 100644 --- a/src/lj_cdata.h +++ b/src/lj_cdata.h @@ -18,7 +18,7 @@ static LJ_AINLINE void *cdata_getptr(void *p, CTSize sz) if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ return ((void *)(uintptr_t)*(uint32_t *)p); } else { - lua_assert(sz == CTSIZE_PTR); + lj_assertX(sz == CTSIZE_PTR, "bad pointer size %d", sz); return *(void **)p; } } @@ -29,7 +29,7 @@ static LJ_AINLINE void cdata_setptr(void *p, CTSize sz, const void *v) if (LJ_64 && sz == 4) { /* Support 32 bit pointers on 64 bit targets. */ *(uint32_t *)p = (uint32_t)(uintptr_t)v; } else { - lua_assert(sz == CTSIZE_PTR); + lj_assertX(sz == CTSIZE_PTR, "bad pointer size %d", sz); *(void **)p = (void *)v; } } @@ -40,7 +40,8 @@ static LJ_AINLINE GCcdata *lj_cdata_new(CTState *cts, CTypeID id, CTSize sz) GCcdata *cd; #ifdef LUA_USE_ASSERT CType *ct = ctype_raw(cts, id); - lua_assert((ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR) == sz); + lj_assertCTS((ctype_hassize(ct->info) ? ct->size : CTSIZE_PTR) == sz, + "inconsistent size of fixed-size cdata alloc"); #endif cd = (GCcdata *)lj_mem_newgco(cts->L, sizeof(GCcdata) + sz); cd->gct = ~LJ_TCDATA; @@ -58,11 +59,14 @@ static LJ_AINLINE GCcdata *lj_cdata_new_(lua_State *L, CTypeID id, CTSize sz) } LJ_FUNC GCcdata *lj_cdata_newref(CTState *cts, const void *pp, CTypeID id); -LJ_FUNC GCcdata *lj_cdata_newv(CTState *cts, CTypeID id, CTSize sz, +LJ_FUNC GCcdata *lj_cdata_newv(lua_State *L, CTypeID id, CTSize sz, CTSize align); +LJ_FUNC GCcdata *lj_cdata_newx(CTState *cts, CTypeID id, CTSize sz, + CTInfo info); LJ_FUNC void LJ_FASTCALL lj_cdata_free(global_State *g, GCcdata *cd); -LJ_FUNCA TValue * LJ_FASTCALL lj_cdata_setfin(lua_State *L, GCcdata *cd); +LJ_FUNC void lj_cdata_setfin(lua_State *L, GCcdata *cd, GCobj *obj, + uint32_t it); LJ_FUNC CType *lj_cdata_index(CTState *cts, GCcdata *cd, cTValue *key, uint8_t **pp, CTInfo *qual); diff --git a/src/lj_clib.c b/src/lj_clib.c index ab2db33a..f0ef6edd 100644 --- a/src/lj_clib.c +++ b/src/lj_clib.c @@ -16,6 +16,7 @@ #include "lj_cconv.h" #include "lj_cdata.h" #include "lj_clib.h" +#include "lj_strfmt.h" /* -- OS-specific functions ----------------------------------------------- */ @@ -61,7 +62,7 @@ static const char *clib_extname(lua_State *L, const char *name) #endif ) { if (!strchr(name, '.')) { - name = lj_str_pushf(L, CLIB_SOEXT, name); + name = lj_strfmt_pushf(L, CLIB_SOEXT, name); L->top--; #if LJ_TARGET_CYGWIN } else { @@ -70,7 +71,7 @@ static const char *clib_extname(lua_State *L, const char *name) } if (!(name[0] == CLIB_SOPREFIX[0] && name[1] == CLIB_SOPREFIX[1] && name[2] == CLIB_SOPREFIX[2])) { - name = lj_str_pushf(L, CLIB_SOPREFIX "%s", name); + name = lj_strfmt_pushf(L, CLIB_SOPREFIX "%s", name); L->top--; } } @@ -158,11 +159,13 @@ BOOL WINAPI GetModuleHandleExA(DWORD, LPCSTR, HMODULE*); /* Default libraries. */ enum { CLIB_HANDLE_EXE, +#if !LJ_TARGET_UWP CLIB_HANDLE_DLL, CLIB_HANDLE_CRT, CLIB_HANDLE_KERNEL32, CLIB_HANDLE_USER32, CLIB_HANDLE_GDI32, +#endif CLIB_HANDLE_MAX }; @@ -172,11 +175,19 @@ LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt, const char *name) { DWORD err = GetLastError(); +#if LJ_TARGET_XBOXONE + wchar_t wbuf[128]; + char buf[128*2]; + if (!FormatMessageW(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM, + NULL, err, 0, wbuf, sizeof(wbuf)/sizeof(wchar_t), NULL) || + !WideCharToMultiByte(CP_ACP, 0, wbuf, 128, buf, 128*2, NULL, NULL)) +#else char buf[128]; if (!FormatMessageA(FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0, buf, sizeof(buf), NULL)) +#endif buf[0] = '\0'; - lj_err_callermsg(L, lj_str_pushf(L, fmt, name, buf)); + lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, buf)); } static int clib_needext(const char *s) @@ -191,7 +202,7 @@ static int clib_needext(const char *s) static const char *clib_extname(lua_State *L, const char *name) { if (clib_needext(name)) { - name = lj_str_pushf(L, "%s.dll", name); + name = lj_strfmt_pushf(L, "%s.dll", name); L->top--; } return name; @@ -200,7 +211,7 @@ static const char *clib_extname(lua_State *L, const char *name) static void *clib_loadlib(lua_State *L, const char *name, int global) { DWORD oldwerr = GetLastError(); - void *h = (void *)LoadLibraryA(clib_extname(L, name)); + void *h = LJ_WIN_LOADLIBA(clib_extname(L, name)); if (!h) clib_error(L, "cannot load module " LUA_QS ": %s", name); SetLastError(oldwerr); UNUSED(global); @@ -210,6 +221,7 @@ static void *clib_loadlib(lua_State *L, const char *name, int global) static void clib_unloadlib(CLibrary *cl) { if (cl->handle == CLIB_DEFHANDLE) { +#if !LJ_TARGET_UWP MSize i; for (i = CLIB_HANDLE_KERNEL32; i < CLIB_HANDLE_MAX; i++) { void *h = clib_def_handle[i]; @@ -218,11 +230,16 @@ static void clib_unloadlib(CLibrary *cl) FreeLibrary((HINSTANCE)h); } } +#endif } else if (cl->handle) { FreeLibrary((HINSTANCE)cl->handle); } } +#if LJ_TARGET_UWP +EXTERN_C IMAGE_DOS_HEADER __ImageBase; +#endif + static void *clib_getsym(CLibrary *cl, const char *name) { void *p = NULL; @@ -231,6 +248,9 @@ static void *clib_getsym(CLibrary *cl, const char *name) for (i = 0; i < CLIB_HANDLE_MAX; i++) { HINSTANCE h = (HINSTANCE)clib_def_handle[i]; if (!(void *)h) { /* Resolve default library handles (once). */ +#if LJ_TARGET_UWP + h = (HINSTANCE)&__ImageBase; +#else switch (i) { case CLIB_HANDLE_EXE: GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, NULL, &h); break; case CLIB_HANDLE_DLL: @@ -241,11 +261,12 @@ static void *clib_getsym(CLibrary *cl, const char *name) GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS|GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, (const char *)&_fmode, &h); break; - case CLIB_HANDLE_KERNEL32: h = LoadLibraryA("kernel32.dll"); break; - case CLIB_HANDLE_USER32: h = LoadLibraryA("user32.dll"); break; - case CLIB_HANDLE_GDI32: h = LoadLibraryA("gdi32.dll"); break; + case CLIB_HANDLE_KERNEL32: h = LJ_WIN_LOADLIBA("kernel32.dll"); break; + case CLIB_HANDLE_USER32: h = LJ_WIN_LOADLIBA("user32.dll"); break; + case CLIB_HANDLE_GDI32: h = LJ_WIN_LOADLIBA("gdi32.dll"); break; } if (!h) continue; +#endif clib_def_handle[i] = (void *)h; } p = (void *)GetProcAddress(h, name); @@ -264,7 +285,7 @@ static void *clib_getsym(CLibrary *cl, const char *name) LJ_NORET LJ_NOINLINE static void clib_error(lua_State *L, const char *fmt, const char *name) { - lj_err_callermsg(L, lj_str_pushf(L, fmt, name, "no support for this OS")); + lj_err_callermsg(L, lj_strfmt_pushf(L, fmt, name, "no support for this OS")); } static void *clib_loadlib(lua_State *L, const char *name, int global) @@ -329,7 +350,8 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name) lj_err_callerv(L, LJ_ERR_FFI_NODECL, strdata(name)); if (ctype_isconstval(ct->info)) { CType *ctt = ctype_child(cts, ct); - lua_assert(ctype_isinteger(ctt->info) && ctt->size <= 4); + lj_assertCTS(ctype_isinteger(ctt->info) && ctt->size <= 4, + "only 32 bit const supported"); /* NYI */ if ((ctt->info & CTF_UNSIGNED) && (int32_t)ct->size < 0) setnumV(tv, (lua_Number)(uint32_t)ct->size); else @@ -341,14 +363,15 @@ TValue *lj_clib_index(lua_State *L, CLibrary *cl, GCstr *name) #endif void *p = clib_getsym(cl, sym); GCcdata *cd; - lua_assert(ctype_isfunc(ct->info) || ctype_isextern(ct->info)); + lj_assertCTS(ctype_isfunc(ct->info) || ctype_isextern(ct->info), + "unexpected ctype %08x in clib", ct->info); #if LJ_TARGET_X86 && LJ_ABI_WIN /* Retry with decorated name for fastcall/stdcall functions. */ if (!p && ctype_isfunc(ct->info)) { CTInfo cconv = ctype_cconv(ct->info); if (cconv == CTCC_FASTCALL || cconv == CTCC_STDCALL) { CTSize sz = clib_func_argsize(cts, ct); - const char *symd = lj_str_pushf(L, + const char *symd = lj_strfmt_pushf(L, cconv == CTCC_FASTCALL ? "@%s@%d" : "_%s@%d", sym, sz); L->top--; diff --git a/src/lj_cparse.c b/src/lj_cparse.c index df85d23b..7fd83997 100644 --- a/src/lj_cparse.c +++ b/src/lj_cparse.c @@ -9,13 +9,14 @@ #include "lj_gc.h" #include "lj_err.h" -#include "lj_str.h" +#include "lj_buf.h" #include "lj_ctype.h" #include "lj_cparse.h" #include "lj_frame.h" #include "lj_vm.h" #include "lj_char.h" #include "lj_strscan.h" +#include "lj_strfmt.h" /* ** Important note: this is NOT a validating C parser! This is a minimal @@ -27,6 +28,30 @@ ** If in doubt, please check the input against your favorite C compiler. */ +#ifdef LUA_USE_ASSERT +#define lj_assertCP(c, ...) (lj_assertG_(G(cp->L), (c), __VA_ARGS__)) +#else +#define lj_assertCP(c, ...) ((void)cp) +#endif + +/* -- Miscellaneous ------------------------------------------------------- */ + +/* Match string against a C literal. */ +#define cp_str_is(str, k) \ + ((str)->len == sizeof(k)-1 && !memcmp(strdata(str), k, sizeof(k)-1)) + +/* Check string against a linear list of matches. */ +int lj_cparse_case(GCstr *str, const char *match) +{ + MSize len; + int n; + for (n = 0; (len = (MSize)*match++); n++, match += len) { + if (str->len == len && !memcmp(match, strdata(str), len)) + return n; + } + return -1; +} + /* -- C lexer ------------------------------------------------------------- */ /* C lexer token names. */ @@ -42,13 +67,13 @@ LJ_NORET static void cp_err(CPState *cp, ErrMsg em); static const char *cp_tok2str(CPState *cp, CPToken tok) { - lua_assert(tok < CTOK_FIRSTDECL); + lj_assertCP(tok < CTOK_FIRSTDECL, "bad CPToken %d", tok); if (tok > CTOK_OFS) return ctoknames[tok-CTOK_OFS-1]; else if (!lj_char_iscntrl(tok)) - return lj_str_pushf(cp->L, "%c", tok); + return lj_strfmt_pushf(cp->L, "%c", tok); else - return lj_str_pushf(cp->L, "char(%d)", tok); + return lj_strfmt_pushf(cp->L, "char(%d)", tok); } /* End-of-line? */ @@ -85,24 +110,10 @@ static LJ_NOINLINE CPChar cp_get_bs(CPState *cp) return cp_get(cp); } -/* Grow save buffer. */ -static LJ_NOINLINE void cp_save_grow(CPState *cp, CPChar c) -{ - MSize newsize; - if (cp->sb.sz >= CPARSE_MAX_BUF/2) - cp_err(cp, LJ_ERR_XELEM); - newsize = cp->sb.sz * 2; - lj_str_resizebuf(cp->L, &cp->sb, newsize); - cp->sb.buf[cp->sb.n++] = (char)c; -} - /* Save character in buffer. */ static LJ_AINLINE void cp_save(CPState *cp, CPChar c) { - if (LJ_UNLIKELY(cp->sb.n + 1 > cp->sb.sz)) - cp_save_grow(cp, c); - else - cp->sb.buf[cp->sb.n++] = (char)c; + lj_buf_putb(&cp->sb, c); } /* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */ @@ -122,20 +133,20 @@ LJ_NORET static void cp_errmsg(CPState *cp, CPToken tok, ErrMsg em, ...) tokstr = NULL; } else if (tok == CTOK_IDENT || tok == CTOK_INTEGER || tok == CTOK_STRING || tok >= CTOK_FIRSTDECL) { - if (cp->sb.n == 0) cp_save(cp, '$'); + if (cp->sb.w == cp->sb.b) cp_save(cp, '$'); cp_save(cp, '\0'); - tokstr = cp->sb.buf; + tokstr = cp->sb.b; } else { tokstr = cp_tok2str(cp, tok); } L = cp->L; va_start(argp, em); - msg = lj_str_pushvf(L, err2msg(em), argp); + msg = lj_strfmt_pushvf(L, err2msg(em), argp); va_end(argp); if (tokstr) - msg = lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr); + msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tokstr); if (cp->linenumber > 1) - msg = lj_str_pushf(L, "%s at line %d", msg, cp->linenumber); + msg = lj_strfmt_pushf(L, "%s at line %d", msg, cp->linenumber); lj_err_callermsg(L, msg); } @@ -164,7 +175,8 @@ static CPToken cp_number(CPState *cp) TValue o; do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); cp_save(cp, '\0'); - fmt = lj_strscan_scan((const uint8_t *)cp->sb.buf, &o, STRSCAN_OPT_C); + fmt = lj_strscan_scan((const uint8_t *)(cp->sb.b), sbuflen(&cp->sb)-1, + &o, STRSCAN_OPT_C); if (fmt == STRSCAN_INT) cp->val.id = CTID_INT32; else if (fmt == STRSCAN_U32) cp->val.id = CTID_UINT32; else if (!(cp->mode & CPARSE_MODE_SKIP)) @@ -177,7 +189,7 @@ static CPToken cp_number(CPState *cp) static CPToken cp_ident(CPState *cp) { do { cp_save(cp, cp->c); } while (lj_char_isident(cp_get(cp))); - cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); + cp->str = lj_buf_str(cp->L, &cp->sb); cp->val.id = lj_ctype_getname(cp->cts, &cp->ct, cp->str, cp->tmask); if (ctype_type(cp->ct->info) == CT_KW) return ctype_cid(cp->ct->info); @@ -263,11 +275,11 @@ static CPToken cp_string(CPState *cp) } cp_get(cp); if (delim == '"') { - cp->str = lj_str_new(cp->L, cp->sb.buf, cp->sb.n); + cp->str = lj_buf_str(cp->L, &cp->sb); return CTOK_STRING; } else { - if (cp->sb.n != 1) cp_err_token(cp, '\''); - cp->val.i32 = (int32_t)(char)cp->sb.buf[0]; + if (sbuflen(&cp->sb) != 1) cp_err_token(cp, '\''); + cp->val.i32 = (int32_t)(char)*cp->sb.b; cp->val.id = CTID_INT32; return CTOK_INTEGER; } @@ -296,7 +308,7 @@ static void cp_comment_cpp(CPState *cp) /* Lexical scanner for C. Only a minimal subset is implemented. */ static CPToken cp_next_(CPState *cp) { - lj_str_resetbuf(&cp->sb); + lj_buf_reset(&cp->sb); for (;;) { if (lj_char_isident(cp->c)) return lj_char_isdigit(cp->c) ? cp_number(cp) : cp_ident(cp); @@ -385,9 +397,8 @@ static void cp_init(CPState *cp) cp->depth = 0; cp->curpack = 0; cp->packstack[0] = 255; - lj_str_initbuf(&cp->sb); - lj_str_resizebuf(cp->L, &cp->sb, LJ_MIN_SBUF); - lua_assert(cp->p != NULL); + lj_buf_init(cp->L, &cp->sb); + lj_assertCP(cp->p != NULL, "uninitialized cp->p"); cp_get(cp); /* Read-ahead first char. */ cp->tok = 0; cp->tmask = CPNS_DEFAULT; @@ -398,7 +409,7 @@ static void cp_init(CPState *cp) static void cp_cleanup(CPState *cp) { global_State *g = G(cp->L); - lj_str_freebuf(g, &cp->sb); + lj_buf_free(g, &cp->sb); } /* Check and consume optional token. */ @@ -848,12 +859,13 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) /* The cid is already part of info for copies of pointers/functions. */ idx = ct->next; if (ctype_istypedef(info)) { - lua_assert(id == 0); + lj_assertCP(id == 0, "typedef not at toplevel"); id = ctype_cid(info); /* Always refetch info/size, since struct/enum may have been completed. */ cinfo = ctype_get(cp->cts, id)->info; csize = ctype_get(cp->cts, id)->size; - lua_assert(ctype_isstruct(cinfo) || ctype_isenum(cinfo)); + lj_assertCP(ctype_isstruct(cinfo) || ctype_isenum(cinfo), + "typedef of bad type"); } else if (ctype_isfunc(info)) { /* Intern function. */ CType *fct; CTypeID fid; @@ -886,7 +898,7 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) /* Inherit csize/cinfo from original type. */ } else { if (ctype_isnum(info)) { /* Handle mode/vector-size attributes. */ - lua_assert(id == 0); + lj_assertCP(id == 0, "number not at toplevel"); if (!(info & CTF_BOOL)) { CTSize msize = ctype_msizeP(decl->attr); CTSize vsize = ctype_vsizeP(decl->attr); @@ -941,7 +953,7 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) info = (info & ~CTF_ALIGN) | (cinfo & CTF_ALIGN); info |= (cinfo & CTF_QUAL); /* Inherit qual. */ } else { - lua_assert(ctype_isvoid(info)); + lj_assertCP(ctype_isvoid(info), "bad ctype %08x", info); } csize = size; cinfo = info+id; @@ -953,8 +965,6 @@ static CTypeID cp_decl_intern(CPState *cp, CPDecl *decl) /* -- C declaration parser ------------------------------------------------ */ -#define H_(le, be) LJ_ENDIAN_SELECT(0x##le, 0x##be) - /* Reset declaration state to declaration specifier. */ static void cp_decl_reset(CPDecl *decl) { @@ -1031,7 +1041,7 @@ static void cp_decl_asm(CPState *cp, CPDecl *decl) if (cp->tok == CTOK_STRING) { GCstr *str = cp->str; while (cp_next(cp) == CTOK_STRING) { - lj_str_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str)); + lj_strfmt_pushf(cp->L, "%s%s", strdata(str), strdata(cp->str)); cp->L->top--; str = strV(cp->L->top); } @@ -1083,44 +1093,57 @@ static void cp_decl_gccattribute(CPState *cp, CPDecl *decl) if (cp->tok == CTOK_IDENT) { GCstr *attrstr = cp->str; cp_next(cp); - switch (attrstr->hash) { - case H_(64a9208e,8ce14319): case H_(8e6331b2,95a282af): /* aligned */ + switch (lj_cparse_case(attrstr, + "\007aligned" "\013__aligned__" + "\006packed" "\012__packed__" + "\004mode" "\010__mode__" + "\013vector_size" "\017__vector_size__" +#if LJ_TARGET_X86 + "\007regparm" "\013__regparm__" + "\005cdecl" "\011__cdecl__" + "\010thiscall" "\014__thiscall__" + "\010fastcall" "\014__fastcall__" + "\007stdcall" "\013__stdcall__" + "\012sseregparm" "\016__sseregparm__" +#endif + )) { + case 0: case 1: /* aligned */ cp_decl_align(cp, decl); break; - case H_(42eb47de,f0ede26c): case H_(29f48a09,cf383e0c): /* packed */ + case 2: case 3: /* packed */ decl->attr |= CTFP_PACKED; break; - case H_(0a84eef6,8dfab04c): case H_(995cf92c,d5696591): /* mode */ + case 4: case 5: /* mode */ cp_decl_mode(cp, decl); break; - case H_(0ab31997,2d5213fa): case H_(bf875611,200e9990): /* vector_size */ + case 6: case 7: /* vector_size */ { CTSize vsize = cp_decl_sizeattr(cp); if (vsize) CTF_INSERT(decl->attr, VSIZEP, lj_fls(vsize)); } break; #if LJ_TARGET_X86 - case H_(5ad22db8,c689b848): case H_(439150fa,65ea78cb): /* regparm */ + case 8: case 9: /* regparm */ CTF_INSERT(decl->fattr, REGPARM, cp_decl_sizeattr(cp)); decl->fattr |= CTFP_CCONV; break; - case H_(18fc0b98,7ff4c074): case H_(4e62abed,0a747424): /* cdecl */ + case 10: case 11: /* cdecl */ CTF_INSERT(decl->fattr, CCONV, CTCC_CDECL); decl->fattr |= CTFP_CCONV; break; - case H_(72b2e41b,494c5a44): case H_(f2356d59,f25fc9bd): /* thiscall */ + case 12: case 13: /* thiscall */ CTF_INSERT(decl->fattr, CCONV, CTCC_THISCALL); decl->fattr |= CTFP_CCONV; break; - case H_(0d0ffc42,ab746f88): case H_(21c54ba1,7f0ca7e3): /* fastcall */ + case 14: case 15: /* fastcall */ CTF_INSERT(decl->fattr, CCONV, CTCC_FASTCALL); decl->fattr |= CTFP_CCONV; break; - case H_(ef76b040,9412e06a): case H_(de56697b,c750e6e1): /* stdcall */ + case 16: case 17: /* stdcall */ CTF_INSERT(decl->fattr, CCONV, CTCC_STDCALL); decl->fattr |= CTFP_CCONV; break; - case H_(ea78b622,f234bd8e): case H_(252ffb06,8d50f34b): /* sseregparm */ + case 18: case 19: /* sseregparm */ decl->fattr |= CTF_SSEREGPARM; decl->fattr |= CTFP_CCONV; break; @@ -1152,16 +1175,13 @@ static void cp_decl_msvcattribute(CPState *cp, CPDecl *decl) while (cp->tok == CTOK_IDENT) { GCstr *attrstr = cp->str; cp_next(cp); - switch (attrstr->hash) { - case H_(bc2395fa,98f267f8): /* align */ + if (cp_str_is(attrstr, "align")) { cp_decl_align(cp, decl); - break; - default: /* Ignore all other attributes. */ + } else { /* Ignore all other attributes. */ if (cp_opt(cp, '(')) { while (cp->tok != ')' && cp->tok != CTOK_EOF) cp_next(cp); cp_check(cp, ')'); } - break; } } cp_check(cp, ')'); @@ -1572,7 +1592,7 @@ end_decl: cp_errmsg(cp, cp->tok, LJ_ERR_FFI_DECLSPEC); sz = sizeof(int); } - lua_assert(sz != 0); + lj_assertCP(sz != 0, "basic ctype with zero size"); info += CTALIGN(lj_fls(sz)); /* Use natural alignment. */ info += (decl->attr & CTF_QUAL); /* Merge qualifiers. */ cp_push(decl, info, sz); @@ -1741,17 +1761,16 @@ static CTypeID cp_decl_abstract(CPState *cp) static void cp_pragma(CPState *cp, BCLine pragmaline) { cp_next(cp); - if (cp->tok == CTOK_IDENT && - cp->str->hash == H_(e79b999f,42ca3e85)) { /* pack */ + if (cp->tok == CTOK_IDENT && cp_str_is(cp->str, "pack")) { cp_next(cp); cp_check(cp, '('); if (cp->tok == CTOK_IDENT) { - if (cp->str->hash == H_(738e923c,a1b65954)) { /* push */ + if (cp_str_is(cp->str, "push")) { if (cp->curpack < CPARSE_MAX_PACKSTACK) { cp->packstack[cp->curpack+1] = cp->packstack[cp->curpack]; cp->curpack++; } - } else if (cp->str->hash == H_(6c71cf27,6c71cf27)) { /* pop */ + } else if (cp_str_is(cp->str, "pop")) { if (cp->curpack > 0) cp->curpack--; } else { cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); @@ -1773,6 +1792,16 @@ static void cp_pragma(CPState *cp, BCLine pragmaline) } } +/* Handle line number. */ +static void cp_line(CPState *cp, BCLine hashline) +{ + BCLine newline = cp->val.u32; + /* TODO: Handle file name and include it in error messages. */ + while (cp->tok != CTOK_EOF && cp->linenumber == hashline) + cp_next(cp); + cp->linenumber = newline; +} + /* Parse multiple C declarations of types or extern identifiers. */ static void cp_decl_multi(CPState *cp) { @@ -1785,12 +1814,21 @@ static void cp_decl_multi(CPState *cp) continue; } if (cp->tok == '#') { /* Workaround, since we have no preprocessor, yet. */ - BCLine pragmaline = cp->linenumber; - if (!(cp_next(cp) == CTOK_IDENT && - cp->str->hash == H_(f5e6b4f8,1d509107))) /* pragma */ + BCLine hashline = cp->linenumber; + CPToken tok = cp_next(cp); + if (tok == CTOK_INTEGER) { + cp_line(cp, hashline); + continue; + } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "line")) { + if (cp_next(cp) != CTOK_INTEGER) cp_err_token(cp, tok); + cp_line(cp, hashline); + continue; + } else if (tok == CTOK_IDENT && cp_str_is(cp->str, "pragma")) { + cp_pragma(cp, hashline); + continue; + } else { cp_errmsg(cp, cp->tok, LJ_ERR_XSYMBOL); - cp_pragma(cp, pragmaline); - continue; + } } scl = cp_decl_spec(cp, &decl, CDF_TYPEDEF|CDF_EXTERN|CDF_STATIC); if ((cp->tok == ';' || cp->tok == CTOK_EOF) && @@ -1814,7 +1852,7 @@ static void cp_decl_multi(CPState *cp) /* Treat both static and extern function declarations as extern. */ ct = ctype_get(cp->cts, ctypeid); /* We always get new anonymous functions (typedefs are copied). */ - lua_assert(gcref(ct->name) == NULL); + lj_assertCP(gcref(ct->name) == NULL, "unexpected named function"); id = ctypeid; /* Just name it. */ } else if ((scl & CDF_STATIC)) { /* Accept static constants. */ id = cp_decl_constinit(cp, &ct, ctypeid); @@ -1856,8 +1894,6 @@ static void cp_decl_single(CPState *cp) if (cp->tok != CTOK_EOF) cp_err_token(cp, CTOK_EOF); } -#undef H_ - /* ------------------------------------------------------------------------ */ /* Protected callback for C parser. */ @@ -1873,7 +1909,7 @@ static TValue *cpcparser(lua_State *L, lua_CFunction dummy, void *ud) cp_decl_single(cp); if (cp->param && cp->param != cp->L->top) cp_err(cp, LJ_ERR_FFI_NUMPARAM); - lua_assert(cp->depth == 0); + lj_assertCP(cp->depth == 0, "unbalanced cparser declaration depth"); return NULL; } diff --git a/src/lj_cparse.h b/src/lj_cparse.h index df884497..c0f61edc 100644 --- a/src/lj_cparse.h +++ b/src/lj_cparse.h @@ -60,6 +60,8 @@ typedef struct CPState { LJ_FUNC int lj_cparse(CPState *cp); +LJ_FUNC int lj_cparse_case(GCstr *str, const char *match); + #endif #endif diff --git a/src/lj_crecord.c b/src/lj_crecord.c index 3f3552a6..bc21d859 100644 --- a/src/lj_crecord.c +++ b/src/lj_crecord.c @@ -11,13 +11,13 @@ #if LJ_HASJIT && LJ_HASFFI #include "lj_err.h" -#include "lj_str.h" #include "lj_tab.h" #include "lj_frame.h" #include "lj_ctype.h" #include "lj_cdata.h" #include "lj_cparse.h" #include "lj_cconv.h" +#include "lj_carith.h" #include "lj_clib.h" #include "lj_ccall.h" #include "lj_ff.h" @@ -31,6 +31,7 @@ #include "lj_snap.h" #include "lj_crecord.h" #include "lj_dispatch.h" +#include "lj_strfmt.h" /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) @@ -60,7 +61,8 @@ static GCcdata *argv2cdata(jit_State *J, TRef tr, cTValue *o) static CTypeID crec_constructor(jit_State *J, GCcdata *cd, TRef tr) { CTypeID id; - lua_assert(tref_iscdata(tr) && cd->ctypeid == CTID_CTYPEID); + lj_assertJ(tref_iscdata(tr) && cd->ctypeid == CTID_CTYPEID, + "expected CTypeID cdata"); id = *(CTypeID *)cdataptr(cd); tr = emitir(IRT(IR_FLOAD, IRT_INT), tr, IRFL_CDATA_INT); emitir(IRTG(IR_EQ, IRT_INT), tr, lj_ir_kint(J, (int32_t)id)); @@ -211,7 +213,7 @@ static void crec_copy_emit(jit_State *J, CRecMemList *ml, MSize mlp, ml[i].trval = emitir(IRT(IR_XLOAD, ml[i].tp), trsptr, 0); ml[i].trofs = trofs; i++; - rwin += (LJ_SOFTFP && ml[i].tp == IRT_NUM) ? 2 : 1; + rwin += (LJ_SOFTFP32 && ml[i].tp == IRT_NUM) ? 2 : 1; if (rwin >= CREC_COPY_REGWIN || i >= mlp) { /* Flush buffered stores. */ rwin = 0; for ( ; j < i; j++) { @@ -236,13 +238,14 @@ static void crec_copy(jit_State *J, TRef trdst, TRef trsrc, TRef trlen, if (len > CREC_COPY_MAXLEN) goto fallback; if (ct) { CTState *cts = ctype_ctsG(J2G(J)); - lua_assert(ctype_isarray(ct->info) || ctype_isstruct(ct->info)); + lj_assertJ(ctype_isarray(ct->info) || ctype_isstruct(ct->info), + "copy of non-aggregate"); if (ctype_isarray(ct->info)) { CType *cct = ctype_rawchild(cts, ct); tp = crec_ct2irt(cts, cct); if (tp == IRT_CDATA) goto rawcopy; step = lj_ir_type_size[tp]; - lua_assert((len & (step-1)) == 0); + lj_assertJ((len & (step-1)) == 0, "copy of fractional size"); } else if ((ct->info & CTF_UNION)) { step = (1u << ctype_align(ct->info)); goto rawcopy; @@ -441,7 +444,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, /* fallthrough */ case CCX(I, F): if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi; - sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_TRUNC|IRCONV_ANY); + sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY); goto xstore; case CCX(I, P): case CCX(I, A): @@ -521,7 +524,7 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp, if (st == IRT_CDATA) goto err_nyi; /* The signed conversion is cheaper. x64 really has 47 bit pointers. */ sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32, - st, IRCONV_TRUNC|IRCONV_ANY); + st, IRCONV_ANY); goto xstore; /* Destination is an array. */ @@ -613,10 +616,12 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval) sp = lj_ir_kptr(J, NULL); } else if (tref_isudata(sp)) { GCudata *ud = udataV(sval); - if (ud->udtype == UDTYPE_IO_FILE) { + if (ud->udtype == UDTYPE_IO_FILE || ud->udtype == UDTYPE_BUFFER) { TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), sp, IRFL_UDATA_UDTYPE); - emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, UDTYPE_IO_FILE)); - sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp, IRFL_UDATA_FILE); + emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, ud->udtype)); + sp = emitir(IRT(IR_FLOAD, IRT_PTR), sp, + ud->udtype == UDTYPE_IO_FILE ? IRFL_UDATA_FILE : + IRFL_SBUF_R); } else { sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCudata))); } @@ -628,7 +633,8 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval) /* Specialize to the name of the enum constant. */ emitir(IRTG(IR_EQ, IRT_STR), sp, lj_ir_kstr(J, str)); if (cct && ctype_isconstval(cct->info)) { - lua_assert(ctype_child(cts, cct)->size == 4); + lj_assertJ(ctype_child(cts, cct)->size == 4, + "only 32 bit const supported"); /* NYI */ svisnz = (void *)(intptr_t)(ofs != 0); sp = lj_ir_kint(J, (int32_t)ofs); sid = ctype_cid(cct->info); @@ -640,12 +646,22 @@ static TRef crec_ct_tv(jit_State *J, CType *d, TRef dp, TRef sp, cTValue *sval) sp = emitir(IRT(IR_ADD, IRT_PTR), sp, lj_ir_kintp(J, sizeof(GCstr))); sid = CTID_A_CCHAR; } - } else { /* NYI: tref_istab(sp), tref_islightud(sp). */ + } else if (tref_islightud(sp)) { +#if LJ_64 + lj_trace_err(J, LJ_TRERR_NYICONV); +#endif + } else { /* NYI: tref_istab(sp). */ IRType t; sid = argv2cdata(J, sp, sval)->ctypeid; s = ctype_raw(cts, sid); svisnz = cdataptr(cdataV(sval)); - t = crec_ct2irt(cts, s); + if (ctype_isfunc(s->info)) { + sid = lj_ctype_intern(cts, CTINFO(CT_PTR, CTALIGN_PTR|sid), CTSIZE_PTR); + s = ctype_get(cts, sid); + t = IRT_PTR; + } else { + t = crec_ct2irt(cts, s); + } if (ctype_isptr(s->info)) { sp = emitir(IRT(IR_FLOAD, t), sp, IRFL_CDATA_PTR); if (ctype_isref(s->info)) { @@ -700,6 +716,19 @@ static TRef crec_reassoc_ofs(jit_State *J, TRef tr, ptrdiff_t *ofsp, MSize sz) return tr; } +/* Tailcall to function. */ +static void crec_tailcall(jit_State *J, RecordFFData *rd, cTValue *tv) +{ + TRef kfunc = lj_ir_kfunc(J, funcV(tv)); +#if LJ_FR2 + J->base[-2] = kfunc; + J->base[-1] = TREF_FRAME; +#else + J->base[-1] = kfunc | TREF_FRAME; +#endif + rd->nres = -1; /* Pending tailcall. */ +} + /* Record ctype __index/__newindex metamethods. */ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, RecordFFData *rd) @@ -709,8 +738,7 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, if (!tv) lj_trace_err(J, LJ_TRERR_BADTYPE); if (tvisfunc(tv)) { - J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; - rd->nres = -1; /* Pending tailcall. */ + crec_tailcall(J, rd, tv); } else if (rd->data == 0 && tvistab(tv) && tref_isstr(J->base[1])) { /* Specialize to result of __index lookup. */ cTValue *o = lj_tab_get(J->L, tabV(tv), &rd->argv[1]); @@ -727,6 +755,48 @@ static void crec_index_meta(jit_State *J, CTState *cts, CType *ct, } } +/* Record bitfield load/store. */ +static void crec_index_bf(jit_State *J, RecordFFData *rd, TRef ptr, CTInfo info) +{ + IRType t = IRT_I8 + 2*lj_fls(ctype_bitcsz(info)) + ((info&CTF_UNSIGNED)?1:0); + TRef tr = emitir(IRT(IR_XLOAD, t), ptr, 0); + CTSize pos = ctype_bitpos(info), bsz = ctype_bitbsz(info), shift = 32 - bsz; + lj_assertJ(t <= IRT_U32, "only 32 bit bitfields supported"); /* NYI */ + if (rd->data == 0) { /* __index metamethod. */ + if ((info & CTF_BOOL)) { + tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << pos)))); + /* Assume not equal to zero. Fixup and emit pending guard later. */ + lj_ir_set(J, IRTGI(IR_NE), tr, lj_ir_kint(J, 0)); + J->postproc = LJ_POST_FIXGUARD; + tr = TREF_TRUE; + } else if (!(info & CTF_UNSIGNED)) { + tr = emitir(IRTI(IR_BSHL), tr, lj_ir_kint(J, shift - pos)); + tr = emitir(IRTI(IR_BSAR), tr, lj_ir_kint(J, shift)); + } else { + lj_assertJ(bsz < 32, "unexpected full bitfield index"); + tr = emitir(IRTI(IR_BSHR), tr, lj_ir_kint(J, pos)); + tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << bsz)-1))); + /* We can omit the U32 to NUM conversion, since bsz < 32. */ + } + J->base[0] = tr; + } else { /* __newindex metamethod. */ + CTState *cts = ctype_ctsG(J2G(J)); + CType *ct = ctype_get(cts, + (info & CTF_BOOL) ? CTID_BOOL : + (info & CTF_UNSIGNED) ? CTID_UINT32 : CTID_INT32); + int32_t mask = (int32_t)(((1u << bsz)-1) << pos); + TRef sp = crec_ct_tv(J, ct, 0, J->base[2], &rd->argv[2]); + sp = emitir(IRTI(IR_BSHL), sp, lj_ir_kint(J, pos)); + /* Use of the target type avoids forwarding conversions. */ + sp = emitir(IRT(IR_BAND, t), sp, lj_ir_kint(J, mask)); + tr = emitir(IRT(IR_BAND, t), tr, lj_ir_kint(J, (int32_t)~mask)); + tr = emitir(IRT(IR_BOR, t), tr, sp); + emitir(IRT(IR_XSTORE, t), ptr, tr); + rd->nres = 0; + J->needsnap = 1; + } +} + void LJ_FASTCALL recff_cdata_index(jit_State *J, RecordFFData *rd) { TRef idx, ptr = J->base[0]; @@ -801,6 +871,7 @@ again: CType *fct; fct = lj_ctype_getfield(cts, ct, name, &fofs); if (fct) { + ofs += (ptrdiff_t)fofs; /* Always specialize to the field name. */ emitir(IRTG(IR_EQ, IRT_STR), idx, lj_ir_kstr(J, name)); if (ctype_isconstval(fct->info)) { @@ -812,12 +883,14 @@ again: J->base[0] = lj_ir_kint(J, (int32_t)fct->size); return; /* Interpreter will throw for newindex. */ } else if (ctype_isbitfield(fct->info)) { - lj_trace_err(J, LJ_TRERR_NYICONV); + if (ofs) + ptr = emitir(IRT(IR_ADD, IRT_PTR), ptr, lj_ir_kintp(J, ofs)); + crec_index_bf(J, rd, ptr, fct->info); + return; } else { - lua_assert(ctype_isfield(fct->info)); + lj_assertJ(ctype_isfield(fct->info), "field expected"); sid = ctype_cid(fct->info); } - ofs += (ptrdiff_t)fofs; } } else if (ctype_iscomplex(ct->info)) { if (name->len == 2 && @@ -867,21 +940,17 @@ again: } /* Record setting a finalizer. */ -static void crec_finalizer(jit_State *J, TRef trcd, cTValue *fin) +static void crec_finalizer(jit_State *J, TRef trcd, TRef trfin, cTValue *fin) { - TRef trlo = lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd); - TRef trhi = emitir(IRT(IR_ADD, IRT_P32), trlo, lj_ir_kint(J, 4)); - if (LJ_BE) { TRef tmp = trlo; trlo = trhi; trhi = tmp; } - if (tvisfunc(fin)) { - emitir(IRT(IR_XSTORE, IRT_P32), trlo, lj_ir_kfunc(J, funcV(fin))); - emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TFUNC)); - } else if (tviscdata(fin)) { - emitir(IRT(IR_XSTORE, IRT_P32), trlo, - lj_ir_kgc(J, obj2gco(cdataV(fin)), IRT_CDATA)); - emitir(IRTI(IR_XSTORE), trhi, lj_ir_kint(J, LJ_TCDATA)); + if (tvisgcv(fin)) { + if (!trfin) trfin = lj_ir_kptr(J, gcval(fin)); + } else if (tvisnil(fin)) { + trfin = lj_ir_kptr(J, NULL); } else { lj_trace_err(J, LJ_TRERR_BADTYPE); } + lj_ir_call(J, IRCALL_lj_cdata_setfin, trcd, + trfin, lj_ir_kint(J, (int32_t)itype(fin))); J->needsnap = 1; } @@ -892,10 +961,8 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) CTSize sz; CTInfo info = lj_ctype_info(cts, id, &sz); CType *d = ctype_raw(cts, id); - TRef trid; - if (!sz || sz > 128 || (info & CTF_VLA) || ctype_align(info) > CT_MEMALIGN) - lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: large/special allocations. */ - trid = lj_ir_kint(J, id); + TRef trcd, trid = lj_ir_kint(J, id); + cTValue *fin; /* Use special instruction to box pointer or 32/64 bit integer. */ if (ctype_isptr(info) || (ctype_isinteger(info) && (sz == 4 || sz == 8))) { TRef sp = J->base[1] ? crec_ct_tv(J, d, 0, J->base[1], &rd->argv[1]) : @@ -903,11 +970,36 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) sz == 4 ? lj_ir_kint(J, 0) : (lj_needsplit(J), lj_ir_kint64(J, 0)); J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), trid, sp); + return; } else { - TRef trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, TREF_NIL); - cTValue *fin; - J->base[0] = trcd; - if (J->base[1] && !J->base[2] && + TRef trsz = TREF_NIL; + if ((info & CTF_VLA)) { /* Calculate VLA/VLS size at runtime. */ + CTSize sz0, sz1; + if (!J->base[1] || J->base[2]) + lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init VLA/VLS. */ + trsz = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0, + J->base[1], &rd->argv[1]); + sz0 = lj_ctype_vlsize(cts, d, 0); + sz1 = lj_ctype_vlsize(cts, d, 1); + trsz = emitir(IRTGI(IR_MULOV), trsz, lj_ir_kint(J, (int32_t)(sz1-sz0))); + trsz = emitir(IRTGI(IR_ADDOV), trsz, lj_ir_kint(J, (int32_t)sz0)); + J->base[1] = 0; /* Simplify logic below. */ + } else if (ctype_align(info) > CT_MEMALIGN) { + trsz = lj_ir_kint(J, sz); + } + trcd = emitir(IRTG(IR_CNEW, IRT_CDATA), trid, trsz); + if (sz > 128 || (info & CTF_VLA)) { + TRef dp; + CTSize align; + special: /* Only handle bulk zero-fill for large/VLA/VLS types. */ + if (J->base[1]) + lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init large/VLA/VLS types. */ + dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, sizeof(GCcdata))); + if (trsz == TREF_NIL) trsz = lj_ir_kint(J, sz); + align = ctype_align(info); + if (align < CT_MEMALIGN) align = CT_MEMALIGN; + crec_fill(J, dp, trsz, lj_ir_kint(J, 0), (1u << align)); + } else if (J->base[1] && !J->base[2] && !lj_cconv_multi_init(cts, d, &rd->argv[1])) { goto single_init; } else if (ctype_isarray(d->info)) { @@ -918,8 +1010,9 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) TValue *sval = &tv; MSize i; tv.u64 = 0; - if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info))) - lj_trace_err(J, LJ_TRERR_NYICONV); /* NYI: init array of aggregates. */ + if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info)) || + esize * CREC_FILL_MAXUNROLL < sz) + goto special; for (i = 1, ofs = 0; ofs < sz; ofs += esize) { TRef dp = emitir(IRT(IR_ADD, IRT_PTR), trcd, lj_ir_kintp(J, ofs + sizeof(GCcdata))); @@ -933,8 +1026,26 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) crec_ct_tv(J, dc, dp, sp, sval); } } else if (ctype_isstruct(d->info)) { - CTypeID fid = d->sib; + CTypeID fid; MSize i = 1; + if (!J->base[1]) { /* Handle zero-fill of struct-of-NYI. */ + fid = d->sib; + while (fid) { + CType *df = ctype_get(cts, fid); + fid = df->sib; + if (ctype_isfield(df->info)) { + CType *dc; + if (!gcref(df->name)) continue; /* Ignore unnamed fields. */ + dc = ctype_rawchild(cts, df); /* Field type. */ + if (!(ctype_isnum(dc->info) || ctype_isptr(dc->info) || + ctype_isenum(dc->info))) + goto special; + } else if (!ctype_isconstval(df->info)) { + goto special; + } + } + } + fid = d->sib; while (fid) { CType *df = ctype_get(cts, fid); fid = df->sib; @@ -981,11 +1092,12 @@ static void crec_alloc(jit_State *J, RecordFFData *rd, CTypeID id) crec_ct_tv(J, d, dp, lj_ir_kint(J, 0), &tv); } } - /* Handle __gc metamethod. */ - fin = lj_ctype_meta(cts, id, MM_gc); - if (fin) - crec_finalizer(J, trcd, fin); } + J->base[0] = trcd; + /* Handle __gc metamethod. */ + fin = lj_ctype_meta(cts, id, MM_gc); + if (fin) + crec_finalizer(J, trcd, 0, fin); } /* Record argument conversions. */ @@ -1026,7 +1138,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, if (fid) { /* Get argument type from field. */ CType *ctf = ctype_get(cts, fid); fid = ctf->sib; - lua_assert(ctype_isfield(ctf->info)); + lj_assertJ(ctype_isfield(ctf->info), "field expected"); did = ctype_cid(ctf->info); } else { if (!(ct->info & CTF_VARARG)) @@ -1045,7 +1157,7 @@ static TRef crec_call_args(jit_State *J, RecordFFData *rd, else tr = emitconv(tr, IRT_INT, d->size==1 ? IRT_I8 : IRT_I16,IRCONV_SEXT); } - } else if (LJ_SOFTFP && ctype_isfp(d->info) && d->size > 4) { + } else if (LJ_SOFTFP32 && ctype_isfp(d->info) && d->size > 4) { lj_needsplit(J); } #if LJ_TARGET_X86 @@ -1091,20 +1203,20 @@ static void crec_snap_caller(jit_State *J) lua_State *L = J->L; TValue *base = L->base, *top = L->top; const BCIns *pc = J->pc; - TRef ftr = J->base[-1]; + TRef ftr = J->base[-1-LJ_FR2]; ptrdiff_t delta; if (!frame_islua(base-1) || J->framedepth <= 0) lj_trace_err(J, LJ_TRERR_NYICALL); - J->pc = frame_pc(base-1); delta = 1+bc_a(J->pc[-1]); + J->pc = frame_pc(base-1); delta = 1+LJ_FR2+bc_a(J->pc[-1]); L->top = base; L->base = base - delta; - J->base[-1] = TREF_FALSE; + J->base[-1-LJ_FR2] = TREF_FALSE; J->base -= delta; J->baseslot -= (BCReg)delta; - J->maxslot = (BCReg)delta; J->framedepth--; + J->maxslot = (BCReg)delta-LJ_FR2; J->framedepth--; lj_snap_add(J); L->base = base; L->top = top; J->framedepth++; J->maxslot = 1; J->base += delta; J->baseslot += (BCReg)delta; - J->base[-1] = ftr; J->pc = pc; + J->base[-1-LJ_FR2] = ftr; J->pc = pc; } /* Record function call. */ @@ -1124,8 +1236,7 @@ static int crec_call(jit_State *J, RecordFFData *rd, GCcdata *cd) TRef tr; TValue tv; /* Check for blacklisted C functions that might call a callback. */ - setlightudV(&tv, - cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4)); + tv.u64 = ((uintptr_t)cdata_getptr(cdataptr(cd), (LJ_64 && tp == IRT_P64) ? 8 : 4) >> 2) | U64x(800000000, 00000000); if (tvistrue(lj_tab_get(J->L, cts->miscmap, &tv))) lj_trace_err(J, LJ_TRERR_BLACKL); if (ctype_isvoid(ctr->info)) { @@ -1196,8 +1307,7 @@ void LJ_FASTCALL recff_cdata_call(jit_State *J, RecordFFData *rd) tv = lj_ctype_meta(cts, ctype_isptr(ct->info) ? ctype_cid(ct->info) : id, mm); if (tv) { if (tvisfunc(tv)) { - J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; - rd->nres = -1; /* Pending tailcall. */ + crec_tailcall(J, rd, tv); return; } } else if (mm == MM_new) { @@ -1238,7 +1348,7 @@ static TRef crec_arith_int64(jit_State *J, TRef *sp, CType **s, MMS mm) for (i = 0; i < 2; i++) { IRType st = tref_type(sp[i]); if (st == IRT_NUM || st == IRT_FLOAT) - sp[i] = emitconv(sp[i], dt, st, IRCONV_TRUNC|IRCONV_ANY); + sp[i] = emitconv(sp[i], dt, st, IRCONV_ANY); else if (!(st == IRT_I64 || st == IRT_U64)) sp[i] = emitconv(sp[i], dt, IRT_INT, (s[i]->info & CTF_UNSIGNED) ? 0 : IRCONV_SEXT); @@ -1307,15 +1417,14 @@ static TRef crec_arith_ptr(jit_State *J, TRef *sp, CType **s, MMS mm) CTypeID id; #if LJ_64 if (t == IRT_NUM || t == IRT_FLOAT) - tr = emitconv(tr, IRT_INTP, t, IRCONV_TRUNC|IRCONV_ANY); + tr = emitconv(tr, IRT_INTP, t, IRCONV_ANY); else if (!(t == IRT_I64 || t == IRT_U64)) tr = emitconv(tr, IRT_INTP, IRT_INT, ((t - IRT_I8) & 1) ? 0 : IRCONV_SEXT); #else if (!tref_typerange(sp[1], IRT_I8, IRT_U32)) { tr = emitconv(tr, IRT_INTP, t, - (t == IRT_NUM || t == IRT_FLOAT) ? - IRCONV_TRUNC|IRCONV_ANY : 0); + (t == IRT_NUM || t == IRT_FLOAT) ? IRCONV_ANY : 0); } #endif tr = emitir(IRT(IR_MUL, IRT_INTP), tr, lj_ir_kintp(J, sz)); @@ -1347,8 +1456,7 @@ static TRef crec_arith_meta(jit_State *J, TRef *sp, CType **s, CTState *cts, } if (tv) { if (tvisfunc(tv)) { - J->base[-1] = lj_ir_kfunc(J, funcV(tv)) | TREF_FRAME; - rd->nres = -1; /* Pending tailcall. */ + crec_tailcall(J, rd, tv); return 0; } /* NYI: non-function metamethods. */ } else if ((MMS)rd->data == MM_eq) { /* Fallback cdata pointer comparison. */ @@ -1460,8 +1568,7 @@ void LJ_FASTCALL recff_cdata_arith(jit_State *J, RecordFFData *rd) !irt_isguard(J->guardemit)) { const BCIns *pc = frame_contpc(J->L->base-1) - 1; if (bc_op(*pc) <= BC_ISNEP) { - setframe_pc(&J2G(J)->tmptv, pc); - J2G(J)->tmptv.u32.lo = ((tref_istrue(tr) ^ bc_op(*pc)) & 1); + J2G(J)->tmptv.u64 = (uint64_t)(uintptr_t)pc; J->postproc = LJ_POST_FIXCOMP; } } @@ -1650,7 +1757,139 @@ void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd) void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd) { argv2cdata(J, J->base[0], &rd->argv[0]); - crec_finalizer(J, J->base[0], &rd->argv[1]); + if (!J->base[1]) + lj_trace_err(J, LJ_TRERR_BADTYPE); + crec_finalizer(J, J->base[0], J->base[1], &rd->argv[1]); +} + +/* -- 64 bit bit.* library functions -------------------------------------- */ + +/* Determine bit operation type from argument type. */ +static CTypeID crec_bit64_type(CTState *cts, cTValue *tv) +{ + if (tviscdata(tv)) { + CType *ct = lj_ctype_rawref(cts, cdataV(tv)->ctypeid); + if (ctype_isenum(ct->info)) ct = ctype_child(cts, ct); + if ((ct->info & (CTMASK_NUM|CTF_BOOL|CTF_FP|CTF_UNSIGNED)) == + CTINFO(CT_NUM, CTF_UNSIGNED) && ct->size == 8) + return CTID_UINT64; /* Use uint64_t, since it has the highest rank. */ + return CTID_INT64; /* Otherwise use int64_t. */ + } + return 0; /* Use regular 32 bit ops. */ +} + +void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd) +{ + CTState *cts = ctype_ctsG(J2G(J)); + TRef tr = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0, + J->base[0], &rd->argv[0]); + if (!tref_isinteger(tr)) + tr = emitconv(tr, IRT_INT, tref_type(tr), 0); + J->base[0] = tr; +} + +int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd) +{ + CTState *cts = ctype_ctsG(J2G(J)); + CTypeID id = crec_bit64_type(cts, &rd->argv[0]); + if (id) { + TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]); + tr = emitir(IRT(rd->data, id-CTID_INT64+IRT_I64), tr, 0); + J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); + return 1; + } + return 0; +} + +int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd) +{ + CTState *cts = ctype_ctsG(J2G(J)); + CTypeID id = 0; + MSize i; + for (i = 0; J->base[i] != 0; i++) { + CTypeID aid = crec_bit64_type(cts, &rd->argv[i]); + if (id < aid) id = aid; /* Determine highest type rank of all arguments. */ + } + if (id) { + CType *ct = ctype_get(cts, id); + uint32_t ot = IRT(rd->data, id-CTID_INT64+IRT_I64); + TRef tr = crec_ct_tv(J, ct, 0, J->base[0], &rd->argv[0]); + for (i = 1; J->base[i] != 0; i++) { + TRef tr2 = crec_ct_tv(J, ct, 0, J->base[i], &rd->argv[i]); + tr = emitir(ot, tr, tr2); + } + J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); + return 1; + } + return 0; +} + +int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd) +{ + CTState *cts = ctype_ctsG(J2G(J)); + CTypeID id; + TRef tsh = 0; + if (J->base[0] && tref_iscdata(J->base[1])) { + tsh = crec_ct_tv(J, ctype_get(cts, CTID_INT64), 0, + J->base[1], &rd->argv[1]); + if (!tref_isinteger(tsh)) + tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0); + J->base[1] = tsh; + } + id = crec_bit64_type(cts, &rd->argv[0]); + if (id) { + TRef tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]); + uint32_t op = rd->data; + if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]); + if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && + !tref_isk(tsh)) + tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63)); +#ifdef LJ_TARGET_UNIFYROT + if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { + op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; + tsh = emitir(IRTI(IR_NEG), tsh, tsh); + } +#endif + tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh); + J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr); + return 1; + } + return 0; +} + +TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr) +{ + CTState *cts = ctype_ctsG(J2G(J)); + CTypeID id = crec_bit64_type(cts, &rd->argv[0]); + TRef tr, trsf = J->base[1]; + SFormat sf = (STRFMT_UINT|STRFMT_T_HEX); + int32_t n; + if (trsf) { + CTypeID id2 = 0; + n = (int32_t)lj_carith_check64(J->L, 2, &id2); + if (id2) + trsf = crec_ct_tv(J, ctype_get(cts, CTID_INT32), 0, trsf, &rd->argv[1]); + else + trsf = lj_opt_narrow_tobit(J, trsf); + emitir(IRTGI(IR_EQ), trsf, lj_ir_kint(J, n)); /* Specialize to n. */ + } else { + n = id ? 16 : 8; + } + if (n < 0) { n = -n; sf |= STRFMT_F_UPPER; } + sf |= ((SFormat)((n+1)&255) << STRFMT_SH_PREC); + if (id) { + tr = crec_ct_tv(J, ctype_get(cts, id), 0, J->base[0], &rd->argv[0]); + if (n < 16) + tr = emitir(IRT(IR_BAND, IRT_U64), tr, + lj_ir_kint64(J, ((uint64_t)1 << 4*n)-1)); + } else { + tr = lj_opt_narrow_tobit(J, J->base[0]); + if (n < 8) + tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (int32_t)((1u << 4*n)-1))); + tr = emitconv(tr, IRT_U64, IRT_INT, 0); /* No sign-extension. */ + lj_needsplit(J); + } + return lj_ir_call(J, IRCALL_lj_strfmt_putfxint, hdr, lj_ir_kint(J, sf), tr); } /* -- Miscellaneous library functions ------------------------------------- */ @@ -1674,6 +1913,30 @@ void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd) } } +TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o) +{ + CTypeID id = argv2cdata(J, tr, o)->ctypeid; + if (!(id == CTID_INT64 || id == CTID_UINT64)) + lj_trace_err(J, LJ_TRERR_BADTYPE); + lj_needsplit(J); + return emitir(IRT(IR_FLOAD, id == CTID_INT64 ? IRT_I64 : IRT_U64), tr, + IRFL_CDATA_INT64); +} + +#if LJ_HASBUFFER +TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o) +{ + CTState *cts = ctype_ctsG(J2G(J)); + if (!tref_iscdata(tr)) lj_trace_err(J, LJ_TRERR_BADTYPE); + return crec_ct_tv(J, ctype_get(cts, CTID_P_CVOID), 0, tr, o); +} + +TRef lj_crecord_topuint8(jit_State *J, TRef tr) +{ + return emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, CTID_P_UINT8), tr); +} +#endif + #undef IR #undef emitir #undef emitconv diff --git a/src/lj_crecord.h b/src/lj_crecord.h index 513ded7b..2c8cf05c 100644 --- a/src/lj_crecord.h +++ b/src/lj_crecord.h @@ -25,7 +25,19 @@ LJ_FUNC void LJ_FASTCALL recff_ffi_istype(jit_State *J, RecordFFData *rd); LJ_FUNC void LJ_FASTCALL recff_ffi_abi(jit_State *J, RecordFFData *rd); LJ_FUNC void LJ_FASTCALL recff_ffi_xof(jit_State *J, RecordFFData *rd); LJ_FUNC void LJ_FASTCALL recff_ffi_gc(jit_State *J, RecordFFData *rd); + +LJ_FUNC void LJ_FASTCALL recff_bit64_tobit(jit_State *J, RecordFFData *rd); +LJ_FUNC int LJ_FASTCALL recff_bit64_unary(jit_State *J, RecordFFData *rd); +LJ_FUNC int LJ_FASTCALL recff_bit64_nary(jit_State *J, RecordFFData *rd); +LJ_FUNC int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd); +LJ_FUNC TRef recff_bit64_tohex(jit_State *J, RecordFFData *rd, TRef hdr); + LJ_FUNC void LJ_FASTCALL lj_crecord_tonumber(jit_State *J, RecordFFData *rd); +LJ_FUNC TRef lj_crecord_loadiu64(jit_State *J, TRef tr, cTValue *o); +#if LJ_HASBUFFER +LJ_FUNC TRef lj_crecord_topcvoid(jit_State *J, TRef tr, cTValue *o); +LJ_FUNC TRef lj_crecord_topuint8(jit_State *J, TRef tr); +#endif #endif #endif diff --git a/src/lj_ctype.c b/src/lj_ctype.c index 7ef00521..204be034 100644 --- a/src/lj_ctype.c +++ b/src/lj_ctype.c @@ -11,8 +11,10 @@ #include "lj_err.h" #include "lj_str.h" #include "lj_tab.h" +#include "lj_strfmt.h" #include "lj_ctype.h" #include "lj_ccallback.h" +#include "lj_buf.h" /* -- C type definitions -------------------------------------------------- */ @@ -37,6 +39,8 @@ _("uint64_t", UINT64) \ _("intptr_t", INT_PSZ) \ _("uintptr_t", UINT_PSZ) \ + /* From POSIX. */ \ + _("ssize_t", INT_PSZ) \ /* End of typedef list. */ /* Keywords (only the ones we actually care for). */ @@ -149,7 +153,7 @@ CTypeID lj_ctype_new(CTState *cts, CType **ctp) { CTypeID id = cts->top; CType *ct; - lua_assert(cts->L); + lj_assertCTS(cts->L, "uninitialized cts->L"); if (LJ_UNLIKELY(id >= cts->sizetab)) { if (id >= CTID_MAX) lj_err_msg(cts->L, LJ_ERR_TABOV); #ifdef LUAJIT_CTYPE_CHECK_ANCHOR @@ -178,7 +182,7 @@ CTypeID lj_ctype_intern(CTState *cts, CTInfo info, CTSize size) { uint32_t h = ct_hashtype(info, size); CTypeID id = cts->hash[h]; - lua_assert(cts->L); + lj_assertCTS(cts->L, "uninitialized cts->L"); while (id) { CType *ct = ctype_get(cts, id); if (ct->info == info && ct->size == size) @@ -294,9 +298,9 @@ CTSize lj_ctype_vlsize(CTState *cts, CType *ct, CTSize nelem) } ct = ctype_raw(cts, arrid); } - lua_assert(ctype_isvlarray(ct->info)); /* Must be a VLA. */ + lj_assertCTS(ctype_isvlarray(ct->info), "VLA expected"); ct = ctype_rawchild(cts, ct); /* Get array element. */ - lua_assert(ctype_hassize(ct->info)); + lj_assertCTS(ctype_hassize(ct->info), "bad VLA without size"); /* Calculate actual size of VLA and check for overflow. */ xsz += (uint64_t)ct->size * nelem; return xsz < 0x80000000u ? (CTSize)xsz : CTSIZE_INVALID; @@ -319,7 +323,8 @@ CTInfo lj_ctype_info(CTState *cts, CTypeID id, CTSize *szp) } else { if (!(qual & CTFP_ALIGNED)) qual |= (info & CTF_ALIGN); qual |= (info & ~(CTF_ALIGN|CTMASK_CID)); - lua_assert(ctype_hassize(info) || ctype_isfunc(info)); + lj_assertCTS(ctype_hassize(info) || ctype_isfunc(info), + "ctype without size"); *szp = ctype_isfunc(info) ? CTSIZE_INVALID : ct->size; break; } @@ -532,7 +537,7 @@ static void ctype_repr(CTRepr *ctr, CTypeID id) ctype_appc(ctr, ')'); break; default: - lua_assert(0); + lj_assertG_(ctr->cts->g, 0, "bad ctype %08x", info); break; } ct = ctype_get(ctr->cts, ctype_cid(info)); @@ -576,19 +581,18 @@ GCstr *lj_ctype_repr_int64(lua_State *L, uint64_t n, int isunsigned) /* Convert complex to string with 'i' or 'I' suffix. */ GCstr *lj_ctype_repr_complex(lua_State *L, void *sp, CTSize size) { - char buf[2*LJ_STR_NUMBUF+2+1]; + SBuf *sb = lj_buf_tmp_(L); TValue re, im; - size_t len; if (size == 2*sizeof(double)) { re.n = *(double *)sp; im.n = ((double *)sp)[1]; } else { re.n = (double)*(float *)sp; im.n = (double)((float *)sp)[1]; } - len = lj_str_bufnum(buf, &re); - if (!(im.u32.hi & 0x80000000u) || im.n != im.n) buf[len++] = '+'; - len += lj_str_bufnum(buf+len, &im); - buf[len] = buf[len-1] >= 'a' ? 'I' : 'i'; - return lj_str_new(L, buf, len+1); + lj_strfmt_putfnum(sb, STRFMT_G14, re.n); + if (!(im.u32.hi & 0x80000000u) || im.n != im.n) lj_buf_putchar(sb, '+'); + lj_strfmt_putfnum(sb, STRFMT_G14, im.n); + lj_buf_putchar(sb, sb->w[-1] >= 'a' ? 'I' : 'i'); + return lj_buf_str(L, sb); } /* -- C type state -------------------------------------------------------- */ diff --git a/src/lj_ctype.h b/src/lj_ctype.h index 4979a7ac..3dbcdbfb 100644 --- a/src/lj_ctype.h +++ b/src/lj_ctype.h @@ -260,10 +260,16 @@ typedef struct CTState { #define CT_MEMALIGN 3 /* Alignment guaranteed by memory allocator. */ +#ifdef LUA_USE_ASSERT +#define lj_assertCTS(c, ...) (lj_assertG_(cts->g, (c), __VA_ARGS__)) +#else +#define lj_assertCTS(c, ...) ((void)cts) +#endif + /* -- Predefined types ---------------------------------------------------- */ /* Target-dependent types. */ -#if LJ_TARGET_PPC || LJ_TARGET_PPCSPE +#if LJ_TARGET_PPC #define CTTYDEFP(_) \ _(LINT32, 4, CT_NUM, CTF_LONG|CTALIGN(2)) #else @@ -292,6 +298,7 @@ typedef struct CTState { _(P_VOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_VOID) \ _(P_CVOID, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CVOID) \ _(P_CCHAR, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_CCHAR) \ + _(P_UINT8, CTSIZE_PTR, CT_PTR, CTALIGN_PTR|CTID_UINT8) \ _(A_CCHAR, -1, CT_ARRAY, CTF_CONST|CTALIGN(0)|CTID_CCHAR) \ _(CTYPEID, 4, CT_ENUM, CTALIGN(2)|CTID_INT32) \ CTTYDEFP(_) \ @@ -383,6 +390,16 @@ static LJ_AINLINE CTState *ctype_cts(lua_State *L) return cts; } +/* Load FFI library on-demand. */ +#define ctype_loadffi(L) \ + do { \ + if (!ctype_ctsG(G(L))) { \ + ptrdiff_t oldtop = (char *)L->top - mref(L->stack, char); \ + luaopen_ffi(L); \ + L->top = (TValue *)(mref(L->stack, char) + oldtop); \ + } \ + } while (0) + /* Save and restore state of C type table. */ #define LJ_CTYPE_SAVE(cts) CTState savects_ = *(cts) #define LJ_CTYPE_RESTORE(cts) \ @@ -392,7 +409,8 @@ static LJ_AINLINE CTState *ctype_cts(lua_State *L) /* Check C type ID for validity when assertions are enabled. */ static LJ_AINLINE CTypeID ctype_check(CTState *cts, CTypeID id) { - lua_assert(id > 0 && id < cts->top); UNUSED(cts); + UNUSED(cts); + lj_assertCTS(id > 0 && id < cts->top, "bad CTID %d", id); return id; } @@ -408,8 +426,9 @@ static LJ_AINLINE CType *ctype_get(CTState *cts, CTypeID id) /* Get child C type. */ static LJ_AINLINE CType *ctype_child(CTState *cts, CType *ct) { - lua_assert(!(ctype_isvoid(ct->info) || ctype_isstruct(ct->info) || - ctype_isbitfield(ct->info))); /* These don't have children. */ + lj_assertCTS(!(ctype_isvoid(ct->info) || ctype_isstruct(ct->info) || + ctype_isbitfield(ct->info)), + "ctype %08x has no children", ct->info); return ctype_get(cts, ctype_cid(ct->info)); } diff --git a/src/lj_debug.c b/src/lj_debug.c index 65ec26f0..112f5358 100644 --- a/src/lj_debug.c +++ b/src/lj_debug.c @@ -9,12 +9,12 @@ #include "lj_obj.h" #include "lj_err.h" #include "lj_debug.h" -#include "lj_str.h" +#include "lj_buf.h" #include "lj_tab.h" #include "lj_state.h" #include "lj_frame.h" #include "lj_bc.h" -#include "lj_vm.h" +#include "lj_strfmt.h" #if LJ_HASJIT #include "lj_jit.h" #endif @@ -24,11 +24,11 @@ /* Get frame corresponding to a level. */ cTValue *lj_debug_frame(lua_State *L, int level, int *size) { - cTValue *frame, *nextframe, *bot = tvref(L->stack); + cTValue *frame, *nextframe, *bot = tvref(L->stack)+LJ_FR2; /* Traverse frames backwards. */ for (nextframe = frame = L->base-1; frame > bot; ) { if (frame_gc(frame) == obj2gco(L)) - level++; /* Skip dummy frames. See lj_meta_call(). */ + level++; /* Skip dummy frames. See lj_err_optype_call(). */ if (level-- == 0) { *size = (int)(nextframe - frame); return frame; /* Level found. */ @@ -55,7 +55,8 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) const BCIns *ins; GCproto *pt; BCPos pos; - lua_assert(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD); + lj_assertL(fn->c.gct == ~LJ_TFUNC || fn->c.gct == ~LJ_TTHREAD, + "function or frame expected"); if (!isluafunc(fn)) { /* Cannot derive a PC for non-Lua functions. */ return NO_BCPOS; } else if (nextframe == NULL) { /* Lua function on top. */ @@ -87,8 +88,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) if (frame_islua(f)) { f = frame_prevl(f); } else { - if (frame_isc(f) || (LJ_HASFFI && frame_iscont(f) && - (f-1)->u32.lo == LJ_CONT_FFI_CALLBACK)) + if (frame_isc(f) || (frame_iscont(f) && frame_iscont_fficb(f))) cf = cframe_raw(cframe_prev(cf)); f = frame_prevd(f); } @@ -102,7 +102,7 @@ static BCPos debug_framepc(lua_State *L, GCfunc *fn, cTValue *nextframe) #if LJ_HASJIT if (pos > pt->sizebc) { /* Undo the effects of lj_trace_exit for JLOOP. */ GCtrace *T = (GCtrace *)((char *)(ins-1) - offsetof(GCtrace, startins)); - lua_assert(bc_isret(bc_op(ins[-1]))); + lj_assertL(bc_isret(bc_op(ins[-1])), "return bytecode expected"); pos = proto_bcpos(pt, mref(T->startpc, const BCIns)); } #endif @@ -135,7 +135,7 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe) BCPos pc = debug_framepc(L, fn, nextframe); if (pc != NO_BCPOS) { GCproto *pt = funcproto(fn); - lua_assert(pc <= pt->sizebc); + lj_assertL(pc <= pt->sizebc, "PC out of range"); return lj_debug_line(pt, pc); } return -1; @@ -143,38 +143,25 @@ static BCLine debug_frameline(lua_State *L, GCfunc *fn, cTValue *nextframe) /* -- Variable names ------------------------------------------------------ */ -/* Read ULEB128 value. */ -static uint32_t debug_read_uleb128(const uint8_t **pp) -{ - const uint8_t *p = *pp; - uint32_t v = *p++; - if (LJ_UNLIKELY(v >= 0x80)) { - int sh = 0; - v &= 0x7f; - do { v |= ((*p & 0x7f) << (sh += 7)); } while (*p++ >= 0x80); - } - *pp = p; - return v; -} - /* Get name of a local variable from slot number and PC. */ static const char *debug_varname(const GCproto *pt, BCPos pc, BCReg slot) { - const uint8_t *p = proto_varinfo(pt); + const char *p = (const char *)proto_varinfo(pt); if (p) { BCPos lastpc = 0; for (;;) { - const char *name = (const char *)p; - uint32_t vn = *p++; + const char *name = p; + uint32_t vn = *(const uint8_t *)p; BCPos startpc, endpc; if (vn < VARNAME__MAX) { if (vn == VARNAME_END) break; /* End of varinfo. */ } else { - while (*p++) ; /* Skip over variable name string. */ + do { p++; } while (*(const uint8_t *)p); /* Skip over variable name. */ } - lastpc = startpc = lastpc + debug_read_uleb128(&p); + p++; + lastpc = startpc = lastpc + lj_buf_ruleb128(&p); if (startpc > pc) break; - endpc = startpc + debug_read_uleb128(&p); + endpc = startpc + lj_buf_ruleb128(&p); if (pc < endpc && slot-- == 0) { if (vn < VARNAME__MAX) { #define VARNAMESTR(name, str) str "\0" @@ -199,7 +186,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar, TValue *nextframe = size ? frame + size : NULL; GCfunc *fn = frame_func(frame); BCPos pc = debug_framepc(L, fn, nextframe); - if (!nextframe) nextframe = L->top; + if (!nextframe) nextframe = L->top+LJ_FR2; if ((int)slot1 < 0) { /* Negative slot number is for varargs. */ if (pc != NO_BCPOS) { GCproto *pt = funcproto(fn); @@ -209,7 +196,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar, nextframe = frame; frame = frame_prevd(frame); } - if (frame + slot1 < nextframe) { + if (frame + slot1+LJ_FR2 < nextframe) { *name = "(*vararg)"; return frame+slot1; } @@ -220,7 +207,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar, if (pc != NO_BCPOS && (*name = debug_varname(funcproto(fn), pc, slot1-1)) != NULL) ; - else if (slot1 > 0 && frame + slot1 < nextframe) + else if (slot1 > 0 && frame + slot1+LJ_FR2 < nextframe) *name = "(*temporary)"; return frame+slot1; } @@ -229,7 +216,7 @@ static TValue *debug_localname(lua_State *L, const lua_Debug *ar, const char *lj_debug_uvname(GCproto *pt, uint32_t idx) { const uint8_t *p = proto_uvinfo(pt); - lua_assert(idx < pt->sizeuv); + lj_assertX(idx < pt->sizeuv, "bad upvalue index"); if (!p) return ""; if (idx) while (*p++ || --idx) ; return (const char *)p; @@ -286,7 +273,7 @@ restart: *name = strdata(gco2str(proto_kgc(pt, ~(ptrdiff_t)bc_c(ins)))); if (ip > proto_bc(pt)) { BCIns insp = ip[-1]; - if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1 && + if (bc_op(insp) == BC_MOV && bc_a(insp) == ra+1+LJ_FR2 && bc_d(insp) == bc_b(ins)) return "method"; } @@ -303,12 +290,12 @@ restart: } /* Deduce function name from caller of a frame. */ -const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name) +const char *lj_debug_funcname(lua_State *L, cTValue *frame, const char **name) { - TValue *pframe; + cTValue *pframe; GCfunc *fn; BCPos pc; - if (frame <= tvref(L->stack)) + if (frame <= tvref(L->stack)+LJ_FR2) return NULL; if (frame_isvarg(frame)) frame = frame_prevd(frame); @@ -334,7 +321,7 @@ const char *lj_debug_funcname(lua_State *L, TValue *frame, const char **name) /* -- Source code locations ----------------------------------------------- */ /* Generate shortened source name. */ -void lj_debug_shortname(char *out, GCstr *str) +void lj_debug_shortname(char *out, GCstr *str, BCLine line) { const char *src = strdata(str); if (*src == '=') { @@ -348,11 +335,11 @@ void lj_debug_shortname(char *out, GCstr *str) *out++ = '.'; *out++ = '.'; *out++ = '.'; } strcpy(out, src); - } else { /* Output [string "string"]. */ + } else { /* Output [string "string"] or [builtin:name]. */ size_t len; /* Length, up to first control char. */ for (len = 0; len < LUA_IDSIZE-12; len++) if (((const unsigned char *)src)[len] < ' ') break; - strcpy(out, "[string \""); out += 9; + strcpy(out, line == ~(BCLine)0 ? "[builtin:" : "[string \""); out += 9; if (src[len] != '\0') { /* Must truncate? */ if (len > LUA_IDSIZE-15) len = LUA_IDSIZE-15; strncpy(out, src, len); out += len; @@ -360,7 +347,7 @@ void lj_debug_shortname(char *out, GCstr *str) } else { strcpy(out, src); out += len; } - strcpy(out, "\"]"); + strcpy(out, line == ~(BCLine)0 ? "]" : "\"]"); } } @@ -373,14 +360,15 @@ void lj_debug_addloc(lua_State *L, const char *msg, if (isluafunc(fn)) { BCLine line = debug_frameline(L, fn, nextframe); if (line >= 0) { + GCproto *pt = funcproto(fn); char buf[LUA_IDSIZE]; - lj_debug_shortname(buf, proto_chunkname(funcproto(fn))); - lj_str_pushf(L, "%s:%d: %s", buf, line, msg); + lj_debug_shortname(buf, proto_chunkname(pt), pt->firstline); + lj_strfmt_pushf(L, "%s:%d: %s", buf, line, msg); return; } } } - lj_str_pushf(L, "%s", msg); + lj_strfmt_pushf(L, "%s", msg); } /* Push location string for a bytecode position to Lua stack. */ @@ -390,20 +378,22 @@ void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc) const char *s = strdata(name); MSize i, len = name->len; BCLine line = lj_debug_line(pt, pc); - if (*s == '@') { + if (pt->firstline == ~(BCLine)0) { + lj_strfmt_pushf(L, "builtin:%s", s); + } else if (*s == '@') { s++; len--; for (i = len; i > 0; i--) if (s[i] == '/' || s[i] == '\\') { s += i+1; break; } - lj_str_pushf(L, "%s:%d", s, line); + lj_strfmt_pushf(L, "%s:%d", s, line); } else if (len > 40) { - lj_str_pushf(L, "%p:%d", pt, line); + lj_strfmt_pushf(L, "%p:%d", pt, line); } else if (*s == '=') { - lj_str_pushf(L, "%s:%d", s+1, line); + lj_strfmt_pushf(L, "%s:%d", s+1, line); } else { - lj_str_pushf(L, "\"%s\":%d", s, line); + lj_strfmt_pushf(L, "\"%s\":%d", s, line); } } @@ -451,13 +441,14 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext) } else { uint32_t offset = (uint32_t)ar->i_ci & 0xffff; uint32_t size = (uint32_t)ar->i_ci >> 16; - lua_assert(offset != 0); + lj_assertL(offset != 0, "bad frame offset"); frame = tvref(L->stack) + offset; if (size) nextframe = frame + size; - lua_assert(frame <= tvref(L->maxstack) && - (!nextframe || nextframe <= tvref(L->maxstack))); + lj_assertL(frame <= tvref(L->maxstack) && + (!nextframe || nextframe <= tvref(L->maxstack)), + "broken frame chain"); fn = frame_func(frame); - lua_assert(fn->c.gct == ~LJ_TFUNC); + lj_assertL(fn->c.gct == ~LJ_TFUNC, "bad frame function"); } for (; *what; what++) { if (*what == 'S') { @@ -466,7 +457,7 @@ int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext) BCLine firstline = pt->firstline; GCstr *name = proto_chunkname(pt); ar->source = strdata(name); - lj_debug_shortname(ar->short_src, name); + lj_debug_shortname(ar->short_src, name, pt->firstline); ar->linedefined = (int)firstline; ar->lastlinedefined = (int)(firstline + pt->numline); ar->what = (firstline || !pt->numline) ? "Lua" : "main"; @@ -556,6 +547,111 @@ LUA_API int lua_getstack(lua_State *L, int level, lua_Debug *ar) } } +#if LJ_HASPROFILE +/* Put the chunkname into a buffer. */ +static int debug_putchunkname(SBuf *sb, GCproto *pt, int pathstrip) +{ + GCstr *name = proto_chunkname(pt); + const char *p = strdata(name); + if (pt->firstline == ~(BCLine)0) { + lj_buf_putmem(sb, "[builtin:", 9); + lj_buf_putstr(sb, name); + lj_buf_putb(sb, ']'); + return 0; + } + if (*p == '=' || *p == '@') { + MSize len = name->len-1; + p++; + if (pathstrip) { + int i; + for (i = len-1; i >= 0; i--) + if (p[i] == '/' || p[i] == '\\') { + len -= i+1; + p = p+i+1; + break; + } + } + lj_buf_putmem(sb, p, len); + } else { + lj_buf_putmem(sb, "[string]", 8); + } + return 1; +} + +/* Put a compact stack dump into a buffer. */ +void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, int depth) +{ + int level = 0, dir = 1, pathstrip = 1; + MSize lastlen = 0; + if (depth < 0) { level = ~depth; depth = dir = -1; } /* Reverse frames. */ + while (level != depth) { /* Loop through all frame. */ + int size; + cTValue *frame = lj_debug_frame(L, level, &size); + if (frame) { + cTValue *nextframe = size ? frame+size : NULL; + GCfunc *fn = frame_func(frame); + const uint8_t *p = (const uint8_t *)fmt; + int c; + while ((c = *p++)) { + switch (c) { + case 'p': /* Preserve full path. */ + pathstrip = 0; + break; + case 'F': case 'f': { /* Dump function name. */ + const char *name; + const char *what = lj_debug_funcname(L, frame, &name); + if (what) { + if (c == 'F' && isluafunc(fn)) { /* Dump module:name for 'F'. */ + GCproto *pt = funcproto(fn); + if (pt->firstline != ~(BCLine)0) { /* Not a bytecode builtin. */ + debug_putchunkname(sb, pt, pathstrip); + lj_buf_putb(sb, ':'); + } + } + lj_buf_putmem(sb, name, (MSize)strlen(name)); + break; + } /* else: can't derive a name, dump module:line. */ + } + /* fallthrough */ + case 'l': /* Dump module:line. */ + if (isluafunc(fn)) { + GCproto *pt = funcproto(fn); + if (debug_putchunkname(sb, pt, pathstrip)) { + /* Regular Lua function. */ + BCLine line = c == 'l' ? debug_frameline(L, fn, nextframe) : + pt->firstline; + lj_buf_putb(sb, ':'); + lj_strfmt_putint(sb, line >= 0 ? line : pt->firstline); + } + } else if (isffunc(fn)) { /* Dump numbered builtins. */ + lj_buf_putmem(sb, "[builtin#", 9); + lj_strfmt_putint(sb, fn->c.ffid); + lj_buf_putb(sb, ']'); + } else { /* Dump C function address. */ + lj_buf_putb(sb, '@'); + lj_strfmt_putptr(sb, fn->c.f); + } + break; + case 'Z': /* Zap trailing separator. */ + lastlen = sbuflen(sb); + break; + default: + lj_buf_putb(sb, c); + break; + } + } + } else if (dir == 1) { + break; + } else { + level -= size; /* Reverse frame order: quickly skip missing level. */ + } + level += dir; + } + if (lastlen) + sb->w = sb->b + lastlen; /* Zap trailing separator. */ +} +#endif + /* Number of frames for the leading and trailing part of a traceback. */ #define TRACEBACK_LEVELS1 12 #define TRACEBACK_LEVELS2 10 diff --git a/src/lj_debug.h b/src/lj_debug.h index 15cdee3c..28127ae9 100644 --- a/src/lj_debug.h +++ b/src/lj_debug.h @@ -33,14 +33,18 @@ LJ_FUNC const char *lj_debug_uvnamev(cTValue *o, uint32_t idx, TValue **tvp, GCobj **op); LJ_FUNC const char *lj_debug_slotname(GCproto *pt, const BCIns *pc, BCReg slot, const char **name); -LJ_FUNC const char *lj_debug_funcname(lua_State *L, TValue *frame, +LJ_FUNC const char *lj_debug_funcname(lua_State *L, cTValue *frame, const char **name); -LJ_FUNC void lj_debug_shortname(char *out, GCstr *str); +LJ_FUNC void lj_debug_shortname(char *out, GCstr *str, BCLine line); LJ_FUNC void lj_debug_addloc(lua_State *L, const char *msg, cTValue *frame, cTValue *nextframe); LJ_FUNC void lj_debug_pushloc(lua_State *L, GCproto *pt, BCPos pc); LJ_FUNC int lj_debug_getinfo(lua_State *L, const char *what, lj_Debug *ar, int ext); +#if LJ_HASPROFILE +LJ_FUNC void lj_debug_dumpstack(lua_State *L, SBuf *sb, const char *fmt, + int depth); +#endif /* Fixed internal variable names. */ #define VARNAMEDEF(_) \ diff --git a/src/lj_def.h b/src/lj_def.h index d09ebb10..b61297aa 100644 --- a/src/lj_def.h +++ b/src/lj_def.h @@ -46,10 +46,14 @@ typedef unsigned int uintptr_t; #include <stdlib.h> /* Various VM limits. */ -#define LJ_MAX_MEM 0x7fffff00 /* Max. total memory allocation. */ +#define LJ_MAX_MEM32 0x7fffff00 /* Max. 32 bit memory allocation. */ +#define LJ_MAX_MEM64 ((uint64_t)1<<47) /* Max. 64 bit memory allocation. */ +/* Max. total memory allocation. */ +#define LJ_MAX_MEM (LJ_GC64 ? LJ_MAX_MEM64 : LJ_MAX_MEM32) #define LJ_MAX_ALLOC LJ_MAX_MEM /* Max. individual allocation length. */ -#define LJ_MAX_STR LJ_MAX_MEM /* Max. string length. */ -#define LJ_MAX_UDATA LJ_MAX_MEM /* Max. userdata length. */ +#define LJ_MAX_STR LJ_MAX_MEM32 /* Max. string length. */ +#define LJ_MAX_BUF LJ_MAX_MEM32 /* Max. buffer length. */ +#define LJ_MAX_UDATA LJ_MAX_MEM32 /* Max. userdata length. */ #define LJ_MAX_STRTAB (1<<26) /* Max. string table size. */ #define LJ_MAX_HBITS 26 /* Max. hash bits. */ @@ -57,7 +61,7 @@ typedef unsigned int uintptr_t; #define LJ_MAX_ASIZE ((1<<(LJ_MAX_ABITS-1))+1) /* Max. array part size. */ #define LJ_MAX_COLOSIZE 16 /* Max. elems for colocated array. */ -#define LJ_MAX_LINE LJ_MAX_MEM /* Max. source code line number. */ +#define LJ_MAX_LINE LJ_MAX_MEM32 /* Max. source code line number. */ #define LJ_MAX_XLEVEL 200 /* Max. syntactic nesting level. */ #define LJ_MAX_BCINS (1<<26) /* Max. # of bytecode instructions. */ #define LJ_MAX_SLOTS 250 /* Max. # of slots in a Lua func. */ @@ -65,7 +69,7 @@ typedef unsigned int uintptr_t; #define LJ_MAX_UPVAL 60 /* Max. # of upvalues. */ #define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ -#define LJ_STACK_EXTRA 5 /* Extra stack space (metamethods). */ +#define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */ #define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */ @@ -76,7 +80,6 @@ typedef unsigned int uintptr_t; #define LJ_MIN_SBUF 32 /* Min. string buffer length. */ #define LJ_MIN_VECSZ 8 /* Min. size for growable vectors. */ #define LJ_MIN_IRSZ 32 /* Min. size for growable IR. */ -#define LJ_MIN_K64SZ 16 /* Min. size for chained K64Array. */ /* JIT compiler limits. */ #define LJ_MAX_JSLOTS 250 /* Max. # of stack slots for a trace. */ @@ -91,6 +94,9 @@ typedef unsigned int uintptr_t; #define U64x(hi, lo) (((uint64_t)0x##hi << 32) + (uint64_t)0x##lo) #define i32ptr(p) ((int32_t)(intptr_t)(void *)(p)) #define u32ptr(p) ((uint32_t)(intptr_t)(void *)(p)) +#define i64ptr(p) ((int64_t)(intptr_t)(void *)(p)) +#define u64ptr(p) ((uint64_t)(intptr_t)(void *)(p)) +#define igcptr(p) (LJ_GC64 ? i64ptr(p) : i32ptr(p)) #define checki8(x) ((x) == (int32_t)(int8_t)(x)) #define checku8(x) ((x) == (int32_t)(uint8_t)(x)) @@ -98,7 +104,10 @@ typedef unsigned int uintptr_t; #define checku16(x) ((x) == (int32_t)(uint16_t)(x)) #define checki32(x) ((x) == (int32_t)(x)) #define checku32(x) ((x) == (uint32_t)(x)) +#define checkptr31(x) (((uint64_t)(uintptr_t)(x) >> 31) == 0) #define checkptr32(x) ((uintptr_t)(x) == (uint32_t)(uintptr_t)(x)) +#define checkptr47(x) (((uint64_t)(uintptr_t)(x) >> 47) == 0) +#define checkptrGC(x) (LJ_GC64 ? checkptr47((x)) : LJ_64 ? checkptr31((x)) :1) /* Every half-decent C compiler transforms this into a rotate instruction. */ #define lj_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1)))) @@ -111,7 +120,7 @@ typedef uintptr_t BloomFilter; #define bloomset(b, x) ((b) |= bloombit((x))) #define bloomtest(b, x) ((b) & bloombit((x))) -#if defined(__GNUC__) || defined(__psp2__) +#if defined(__GNUC__) || defined(__clang__) || defined(__psp2__) #define LJ_NORET __attribute__((noreturn)) #define LJ_ALIGN(n) __attribute__((aligned(n))) @@ -173,7 +182,7 @@ static LJ_AINLINE uint64_t lj_bswap64(uint64_t x) { return ((uint64_t)lj_bswap((uint32_t)x)<<32) | lj_bswap((uint32_t)(x>>32)); } -#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) +#elif (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __clang__ static LJ_AINLINE uint32_t lj_bswap(uint32_t x) { return (uint32_t)__builtin_bswap32((int32_t)x); @@ -329,14 +338,28 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v) #define LJ_FUNCA_NORET LJ_FUNCA LJ_NORET #define LJ_ASMF_NORET LJ_ASMF LJ_NORET -/* Runtime assertions. */ -#ifdef lua_assert -#define check_exp(c, e) (lua_assert(c), (e)) -#define api_check(l, e) lua_assert(e) +/* Internal assertions. */ +#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) +#define lj_assert_check(g, c, ...) \ + ((c) ? (void)0 : \ + (lj_assert_fail((g), __FILE__, __LINE__, __func__, __VA_ARGS__), 0)) +#define lj_checkapi(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__) #else -#define lua_assert(c) ((void)0) +#define lj_checkapi(c, ...) ((void)L) +#endif + +#ifdef LUA_USE_ASSERT +#define lj_assertG_(g, c, ...) lj_assert_check((g), (c), __VA_ARGS__) +#define lj_assertG(c, ...) lj_assert_check(g, (c), __VA_ARGS__) +#define lj_assertL(c, ...) lj_assert_check(G(L), (c), __VA_ARGS__) +#define lj_assertX(c, ...) lj_assert_check(NULL, (c), __VA_ARGS__) +#define check_exp(c, e) (lj_assertX((c), #c), (e)) +#else +#define lj_assertG_(g, c, ...) ((void)0) +#define lj_assertG(c, ...) ((void)g) +#define lj_assertL(c, ...) ((void)L) +#define lj_assertX(c, ...) ((void)0) #define check_exp(c, e) (e) -#define api_check luai_apicheck #endif /* Static assertions. */ @@ -350,4 +373,9 @@ static LJ_AINLINE uint32_t lj_getu32(const void *v) extern void LJ_ASSERT_NAME(__LINE__)(int STATIC_ASSERTION_FAILED[(cond)?1:-1]) #endif +/* PRNG state. Need this here, details in lj_prng.h. */ +typedef struct PRNGState { + uint64_t u[4]; +} PRNGState; + #endif diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index 54c86038..ded382aa 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c @@ -8,6 +8,7 @@ #include "lj_obj.h" #include "lj_err.h" +#include "lj_buf.h" #include "lj_func.h" #include "lj_str.h" #include "lj_tab.h" @@ -17,6 +18,7 @@ #include "lj_frame.h" #include "lj_bc.h" #include "lj_ff.h" +#include "lj_strfmt.h" #if LJ_HASJIT #include "lj_jit.h" #endif @@ -25,6 +27,9 @@ #endif #include "lj_trace.h" #include "lj_dispatch.h" +#if LJ_HASPROFILE +#include "lj_profile.h" +#endif #include "lj_vm.h" #include "luajit.h" @@ -37,6 +42,12 @@ LJ_STATIC_ASSERT(GG_NUM_ASMFF == FF_NUM_ASMFUNC); #include <math.h> LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, lua_State *co); +#if !LJ_HASJIT +#define lj_dispatch_stitch lj_dispatch_ins +#endif +#if !LJ_HASPROFILE +#define lj_dispatch_profile lj_dispatch_ins +#endif #define GOTFUNC(name) (ASMFunction)name, static const ASMFunction dispatch_got[] = { @@ -57,6 +68,8 @@ void lj_dispatch_init(GG_State *GG) /* The JIT engine is off by default. luaopen_jit() turns it on. */ disp[BC_FORL] = disp[BC_IFORL]; disp[BC_ITERL] = disp[BC_IITERL]; + /* Workaround for stable v2.1 bytecode. TODO: Replace with BC_IITERN. */ + disp[BC_ITERN] = &lj_vm_IITERN; disp[BC_LOOP] = disp[BC_ILOOP]; disp[BC_FUNCF] = disp[BC_IFUNCF]; disp[BC_FUNCV] = disp[BC_IFUNCV]; @@ -64,7 +77,7 @@ void lj_dispatch_init(GG_State *GG) for (i = 0; i < GG_NUM_ASMFF; i++) GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0); #if LJ_TARGET_MIPS - memcpy(GG->got, dispatch_got, LJ_GOT__MAX*4); + memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *)); #endif } @@ -82,11 +95,12 @@ void lj_dispatch_init_hotcount(global_State *g) #endif /* Internal dispatch mode bits. */ -#define DISPMODE_JIT 0x01 /* JIT compiler on. */ -#define DISPMODE_REC 0x02 /* Recording active. */ +#define DISPMODE_CALL 0x01 /* Override call dispatch. */ +#define DISPMODE_RET 0x02 /* Override return dispatch. */ #define DISPMODE_INS 0x04 /* Override instruction dispatch. */ -#define DISPMODE_CALL 0x08 /* Override call dispatch. */ -#define DISPMODE_RET 0x10 /* Override return dispatch. */ +#define DISPMODE_JIT 0x10 /* JIT compiler on. */ +#define DISPMODE_REC 0x20 /* Recording active. */ +#define DISPMODE_PROF 0x40 /* Profiling active. */ /* Update dispatch table depending on various flags. */ void lj_dispatch_update(global_State *g) @@ -98,24 +112,29 @@ void lj_dispatch_update(global_State *g) mode |= G2J(g)->state != LJ_TRACE_IDLE ? (DISPMODE_REC|DISPMODE_INS|DISPMODE_CALL) : 0; #endif +#if LJ_HASPROFILE + mode |= (g->hookmask & HOOK_PROFILE) ? (DISPMODE_PROF|DISPMODE_INS) : 0; +#endif mode |= (g->hookmask & (LUA_MASKLINE|LUA_MASKCOUNT)) ? DISPMODE_INS : 0; mode |= (g->hookmask & LUA_MASKCALL) ? DISPMODE_CALL : 0; mode |= (g->hookmask & LUA_MASKRET) ? DISPMODE_RET : 0; if (oldmode != mode) { /* Mode changed? */ ASMFunction *disp = G2GG(g)->dispatch; - ASMFunction f_forl, f_iterl, f_loop, f_funcf, f_funcv; + ASMFunction f_forl, f_iterl, f_itern, f_loop, f_funcf, f_funcv; g->dispatchmode = mode; /* Hotcount if JIT is on, but not while recording. */ if ((mode & (DISPMODE_JIT|DISPMODE_REC)) == DISPMODE_JIT) { f_forl = makeasmfunc(lj_bc_ofs[BC_FORL]); f_iterl = makeasmfunc(lj_bc_ofs[BC_ITERL]); + f_itern = makeasmfunc(lj_bc_ofs[BC_ITERN]); f_loop = makeasmfunc(lj_bc_ofs[BC_LOOP]); f_funcf = makeasmfunc(lj_bc_ofs[BC_FUNCF]); f_funcv = makeasmfunc(lj_bc_ofs[BC_FUNCV]); } else { /* Otherwise use the non-hotcounting instructions. */ f_forl = disp[GG_LEN_DDISP+BC_IFORL]; f_iterl = disp[GG_LEN_DDISP+BC_IITERL]; + f_itern = &lj_vm_IITERN; f_loop = disp[GG_LEN_DDISP+BC_ILOOP]; f_funcf = makeasmfunc(lj_bc_ofs[BC_IFUNCF]); f_funcv = makeasmfunc(lj_bc_ofs[BC_IFUNCV]); @@ -123,12 +142,13 @@ void lj_dispatch_update(global_State *g) /* Init static counting instruction dispatch first (may be copied below). */ disp[GG_LEN_DDISP+BC_FORL] = f_forl; disp[GG_LEN_DDISP+BC_ITERL] = f_iterl; + disp[GG_LEN_DDISP+BC_ITERN] = f_itern; disp[GG_LEN_DDISP+BC_LOOP] = f_loop; /* Set dynamic instruction dispatch. */ - if ((oldmode ^ mode) & (DISPMODE_REC|DISPMODE_INS)) { + if ((oldmode ^ mode) & (DISPMODE_PROF|DISPMODE_REC|DISPMODE_INS)) { /* Need to update the whole table. */ - if (!(mode & (DISPMODE_REC|DISPMODE_INS))) { /* No ins dispatch? */ + if (!(mode & DISPMODE_INS)) { /* No ins dispatch? */ /* Copy static dispatch table to dynamic dispatch table. */ memcpy(&disp[0], &disp[GG_LEN_DDISP], GG_LEN_SDISP*sizeof(ASMFunction)); /* Overwrite with dynamic return dispatch. */ @@ -140,15 +160,17 @@ void lj_dispatch_update(global_State *g) } } else { /* The recording dispatch also checks for hooks. */ - ASMFunction f = (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook; + ASMFunction f = (mode & DISPMODE_PROF) ? lj_vm_profhook : + (mode & DISPMODE_REC) ? lj_vm_record : lj_vm_inshook; uint32_t i; for (i = 0; i < GG_LEN_SDISP; i++) disp[i] = f; } - } else if (!(mode & (DISPMODE_REC|DISPMODE_INS))) { + } else if (!(mode & DISPMODE_INS)) { /* Otherwise set dynamic counting ins. */ disp[BC_FORL] = f_forl; disp[BC_ITERL] = f_iterl; + disp[BC_ITERN] = f_itern; disp[BC_LOOP] = f_loop; /* Set dynamic return dispatch. */ if ((mode & DISPMODE_RET)) { @@ -236,22 +258,15 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) } else { if (!(mode & LUAJIT_MODE_ON)) G2J(g)->flags &= ~(uint32_t)JIT_F_ON; -#if LJ_TARGET_X86ORX64 - else if ((G2J(g)->flags & JIT_F_SSE2)) - G2J(g)->flags |= (uint32_t)JIT_F_ON; - else - return 0; /* Don't turn on JIT compiler without SSE2 support. */ -#else else G2J(g)->flags |= (uint32_t)JIT_F_ON; -#endif lj_dispatch_update(g); } break; case LUAJIT_MODE_FUNC: case LUAJIT_MODE_ALLFUNC: case LUAJIT_MODE_ALLSUBFUNC: { - cTValue *tv = idx == 0 ? frame_prev(L->base-1) : + cTValue *tv = idx == 0 ? frame_prev(L->base-1)-LJ_FR2 : idx > 0 ? L->base + (idx-1) : L->top + idx; GCproto *pt; if ((idx == 0 || tvisfunc(tv)) && isluafunc(&gcval(tv)->fn)) @@ -286,7 +301,7 @@ int luaJIT_setmode(lua_State *L, int idx, int mode) if (idx != 0) { cTValue *tv = idx > 0 ? L->base + (idx-1) : L->top + idx; if (tvislightud(tv)) - g->wrapf = (lua_CFunction)lightudV(tv); + g->wrapf = (lua_CFunction)lightudV(g, tv); else return 0; /* Failed. */ } else { @@ -352,10 +367,19 @@ static void callhook(lua_State *L, int event, BCLine line) /* Top frame, nextframe = NULL. */ ar.i_ci = (int)((L->base-1) - tvref(L->stack)); lj_state_checkstack(L, 1+LUA_MINSTACK); +#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF + lj_profile_hook_enter(g); +#else hook_enter(g); +#endif hookf(L, &ar); - lua_assert(hook_active(g)); + lj_assertG(hook_active(g), "active hook flag removed"); + setgcref(g->cur_L, obj2gco(L)); +#if LJ_HASPROFILE && !LJ_PROFILE_SIGPROF + lj_profile_hook_leave(g); +#else hook_leave(g); +#endif } } @@ -368,7 +392,7 @@ static BCReg cur_topslot(GCproto *pt, const BCIns *pc, uint32_t nres) if (bc_op(ins) == BC_UCLO) ins = pc[bc_j(ins)]; switch (bc_op(ins)) { - case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1; + case BC_CALLM: case BC_CALLMT: return bc_a(ins) + bc_c(ins) + nres-1+1+LJ_FR2; case BC_RETM: return bc_a(ins) + bc_d(ins) + nres-1; case BC_TSETM: return bc_a(ins) + nres-1; default: return pt->framesize; @@ -397,7 +421,8 @@ void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc) #endif J->L = L; lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ - lua_assert(L->top - L->base == delta); + lj_assertG(L->top - L->base == delta, + "unbalanced stack after tracing of instruction"); } } #endif @@ -457,7 +482,8 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) #endif pc = (const BCIns *)((uintptr_t)pc & ~(uintptr_t)1); lj_trace_hot(J, pc); - lua_assert(L->top - L->base == delta); + lj_assertG(L->top - L->base == delta, + "unbalanced stack after hot call"); goto out; } else if (J->state != LJ_TRACE_IDLE && !(g->hookmask & (HOOK_GC|HOOK_VMEVENT))) { @@ -466,7 +492,8 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) #endif /* Record the FUNC* bytecodes, too. */ lj_trace_ins(J, pc-1); /* The interpreter bytecode PC is offset by 1. */ - lua_assert(L->top - L->base == delta); + lj_assertG(L->top - L->base == delta, + "unbalanced stack after hot instruction"); } #endif if ((g->hookmask & LUA_MASKCALL)) { @@ -492,3 +519,41 @@ out: return makeasmfunc(lj_bc_ofs[op]); /* Return static dispatch target. */ } +#if LJ_HASJIT +/* Stitch a new trace. */ +void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc) +{ + ERRNO_SAVE + lua_State *L = J->L; + void *cf = cframe_raw(L->cframe); + const BCIns *oldpc = cframe_pc(cf); + setcframe_pc(cf, pc); + /* Before dispatch, have to bias PC by 1. */ + L->top = L->base + cur_topslot(curr_proto(L), pc+1, cframe_multres_n(cf)); + lj_trace_stitch(J, pc-1); /* Point to the CALL instruction. */ + setcframe_pc(cf, oldpc); + ERRNO_RESTORE +} +#endif + +#if LJ_HASPROFILE +/* Profile dispatch. */ +void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc) +{ + ERRNO_SAVE + GCfunc *fn = curr_func(L); + GCproto *pt = funcproto(fn); + void *cf = cframe_raw(L->cframe); + const BCIns *oldpc = cframe_pc(cf); + global_State *g; + setcframe_pc(cf, pc); + L->top = L->base + cur_topslot(pt, pc, cframe_multres_n(cf)); + lj_profile_interpreter(L); + setcframe_pc(cf, oldpc); + g = G(L); + setgcref(g->cur_L, obj2gco(L)); + setvmstate(g, INTERP); + ERRNO_RESTORE +} +#endif + diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h index cb4cbf8e..52762eea 100644 --- a/src/lj_dispatch.h +++ b/src/lj_dispatch.h @@ -14,8 +14,24 @@ #if LJ_TARGET_MIPS /* Need our own global offset table for the dreaded MIPS calling conventions. */ + +#ifndef _LJ_VM_H +LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b); +#endif + +#if LJ_SOFTFP +#ifndef _LJ_IRCALL_H +extern double __adddf3(double a, double b); +extern double __subdf3(double a, double b); +extern double __muldf3(double a, double b); +extern double __divdf3(double a, double b); +#endif +#define SFGOTDEF(_) _(sqrt) _(__adddf3) _(__subdf3) _(__muldf3) _(__divdf3) +#else +#define SFGOTDEF(_) +#endif #if LJ_HASJIT -#define JITGOTDEF(_) _(lj_trace_exit) _(lj_trace_hot) +#define JITGOTDEF(_) _(lj_err_trace) _(lj_trace_exit) _(lj_trace_hot) #else #define JITGOTDEF(_) #endif @@ -28,16 +44,19 @@ #define GOTDEF(_) \ _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ - _(pow) _(fmod) _(ldexp) \ - _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_err_throw) _(lj_err_run) \ + _(pow) _(fmod) _(ldexp) _(lj_vm_modi) \ + _(lj_dispatch_call) _(lj_dispatch_ins) _(lj_dispatch_stitch) \ + _(lj_dispatch_profile) _(lj_err_throw) \ _(lj_ffh_coroutine_wrap_err) _(lj_func_closeuv) _(lj_func_newL_gc) \ _(lj_gc_barrieruv) _(lj_gc_step) _(lj_gc_step_fixtop) _(lj_meta_arith) \ _(lj_meta_call) _(lj_meta_cat) _(lj_meta_comp) _(lj_meta_equal) \ - _(lj_meta_for) _(lj_meta_len) _(lj_meta_tget) _(lj_meta_tset) \ - _(lj_state_growstack) _(lj_str_fromnum) _(lj_str_fromnumber) _(lj_str_new) \ - _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) _(lj_tab_new) \ - _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ - JITGOTDEF(_) FFIGOTDEF(_) + _(lj_meta_for) _(lj_meta_istype) _(lj_meta_len) _(lj_meta_tget) \ + _(lj_meta_tset) _(lj_state_growstack) _(lj_strfmt_number) \ + _(lj_str_new) _(lj_tab_dup) _(lj_tab_get) _(lj_tab_getinth) _(lj_tab_len) \ + _(lj_tab_new) _(lj_tab_newkey) _(lj_tab_next) _(lj_tab_reasize) \ + _(lj_tab_setinth) _(lj_buf_putstr_reverse) _(lj_buf_putstr_lower) \ + _(lj_buf_putstr_upper) _(lj_buf_tostr) \ + JITGOTDEF(_) FFIGOTDEF(_) SFGOTDEF(_) enum { #define GOTENUM(name) LJ_GOT_##name, @@ -60,7 +79,7 @@ typedef uint16_t HotCount; #define HOTCOUNT_CALL 1 /* This solves a circular dependency problem -- bump as needed. Sigh. */ -#define GG_NUM_ASMFF 62 +#define GG_NUM_ASMFF 57 #define GG_LEN_DDISP (BC__MAX + GG_NUM_ASMFF) #define GG_LEN_SDISP BC_FUNCF @@ -70,7 +89,7 @@ typedef uint16_t HotCount; typedef struct GG_State { lua_State L; /* Main thread. */ global_State g; /* Global state. */ -#if LJ_TARGET_ARM +#if LJ_TARGET_ARM && !LJ_TARGET_NX /* Make g reachable via K12 encoded DISPATCH-relative addressing. */ uint8_t align1[(16-sizeof(global_State))&15]; #endif @@ -80,7 +99,7 @@ typedef struct GG_State { #if LJ_HASJIT jit_State J; /* JIT state. */ HotCount hotcount[HOTCOUNT_SIZE]; /* Hot counters. */ -#if LJ_TARGET_ARM +#if LJ_TARGET_ARM && !LJ_TARGET_NX /* Ditto for J. */ uint8_t align2[(16-sizeof(jit_State)-sizeof(HotCount)*HOTCOUNT_SIZE)&15]; #endif @@ -96,6 +115,7 @@ typedef struct GG_State { #define J2G(J) (&J2GG(J)->g) #define G2J(gl) (&G2GG(gl)->J) #define L2J(L) (&L2GG(L)->J) +#define GG_G2J (GG_OFS(J) - GG_OFS(g)) #define GG_G2DISP (GG_OFS(dispatch) - GG_OFS(g)) #define GG_DISP2G (GG_OFS(g) - GG_OFS(dispatch)) #define GG_DISP2J (GG_OFS(J) - GG_OFS(dispatch)) @@ -117,7 +137,12 @@ LJ_FUNC void lj_dispatch_update(global_State *g); /* Instruction dispatch callback for hooks or when recording. */ LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc); LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc); -LJ_FUNCA void LJ_FASTCALL lj_dispatch_return(lua_State *L, const BCIns *pc); +#if LJ_HASJIT +LJ_FUNCA void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc); +#endif +#if LJ_HASPROFILE +LJ_FUNCA void LJ_FASTCALL lj_dispatch_profile(lua_State *L, const BCIns *pc); +#endif #if LJ_HASFFI && !defined(_BUILDVM_H) /* Save/restore errno and GetLastError() around hooks, exits and recording. */ diff --git a/src/lj_emit_arm.h b/src/lj_emit_arm.h index 2db07ef6..cfb174f4 100644 --- a/src/lj_emit_arm.h +++ b/src/lj_emit_arm.h @@ -81,7 +81,8 @@ static void emit_m(ASMState *as, ARMIns ai, Reg rm) static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) { - lua_assert(ofs >= -255 && ofs <= 255); + lj_assertA(ofs >= -255 && ofs <= 255, + "load/store offset %d out of range", ofs); if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; *--as->mcp = ai | ARMI_LS_P | ARMI_LSX_I | ARMF_D(rd) | ARMF_N(rn) | ((ofs & 0xf0) << 4) | (ofs & 0x0f); @@ -89,7 +90,8 @@ static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) { - lua_assert(ofs >= -4095 && ofs <= 4095); + lj_assertA(ofs >= -4095 && ofs <= 4095, + "load/store offset %d out of range", ofs); /* Combine LDR/STR pairs to LDRD/STRD. */ if (*as->mcp == (ai|ARMI_LS_P|ARMI_LS_U|ARMF_D(rd^1)|ARMF_N(rn)|(ofs^4)) && (ai & ~(ARMI_LDR^ARMI_STR)) == ARMI_STR && rd != rn && @@ -106,7 +108,8 @@ static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) #if !LJ_SOFTFP static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) { - lua_assert(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0); + lj_assertA(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0, + "load/store offset %d out of range", ofs); if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2); } @@ -124,7 +127,7 @@ static int emit_kdelta1(ASMState *as, Reg d, int32_t i) while (work) { Reg r = rset_picktop(work); IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != d); + lj_assertA(r != d, "dest reg not free"); if (emit_canremat(ref)) { int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); uint32_t k = emit_isk12(ARMI_ADD, delta); @@ -142,13 +145,13 @@ static int emit_kdelta1(ASMState *as, Reg d, int32_t i) } /* Try to find a two step delta relative to another constant. */ -static int emit_kdelta2(ASMState *as, Reg d, int32_t i) +static int emit_kdelta2(ASMState *as, Reg rd, int32_t i) { RegSet work = ~as->freeset & RSET_GPR; while (work) { Reg r = rset_picktop(work); IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != d); + lj_assertA(r != rd, "dest reg %d not free", rd); if (emit_canremat(ref)) { int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i; if (other) { @@ -159,8 +162,8 @@ static int emit_kdelta2(ASMState *as, Reg d, int32_t i) k2 = emit_isk12(0, delta & (255 << sh)); k = emit_isk12(0, delta & ~(255 << sh)); if (k) { - emit_dn(as, ARMI_ADD^k2^inv, d, d); - emit_dn(as, ARMI_ADD^k^inv, d, r); + emit_dn(as, ARMI_ADD^k2^inv, rd, rd); + emit_dn(as, ARMI_ADD^k^inv, rd, r); return 1; } } @@ -171,23 +174,24 @@ static int emit_kdelta2(ASMState *as, Reg d, int32_t i) } /* Load a 32 bit constant into a GPR. */ -static void emit_loadi(ASMState *as, Reg r, int32_t i) +static void emit_loadi(ASMState *as, Reg rd, int32_t i) { uint32_t k = emit_isk12(ARMI_MOV, i); - lua_assert(rset_test(as->freeset, r) || r == RID_TMP); + lj_assertA(rset_test(as->freeset, rd) || rd == RID_TMP, + "dest reg %d not free", rd); if (k) { /* Standard K12 constant. */ - emit_d(as, ARMI_MOV^k, r); + emit_d(as, ARMI_MOV^k, rd); } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) { /* 16 bit loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); - } else if (emit_kdelta1(as, r, i)) { + emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); + } else if (emit_kdelta1(as, rd, i)) { /* One step delta relative to another constant. */ } else if ((as->flags & JIT_F_ARMV6T2)) { /* 32 bit hiword/loword constant for ARMv6T2. */ - emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), r); - emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), r); - } else if (emit_kdelta2(as, r, i)) { + emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), rd); + emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); + } else if (emit_kdelta2(as, rd, i)) { /* Two step delta relative to another constant. */ } else { /* Otherwise construct the constant with up to 4 instructions. */ @@ -197,17 +201,17 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) int32_t m = i & (255 << sh); i &= ~(255 << sh); if (i == 0) { - emit_d(as, ARMI_MOV ^ emit_isk12(0, m), r); + emit_d(as, ARMI_MOV ^ emit_isk12(0, m), rd); break; } - emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), r, r); + emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), rd, rd); } } } -#define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) +#define emit_loada(as, rd, addr) emit_loadi(as, (rd), i32ptr((addr))) -static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); /* Get/set from constant pointer. */ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) @@ -219,8 +223,9 @@ static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) #if !LJ_SOFTFP /* Load a number constant into an FPR. */ -static void emit_loadn(ASMState *as, Reg r, cTValue *tv) +static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) { + cTValue *tv = ir_knum(ir); int32_t i; if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { uint32_t hi = tv->u32.hi; @@ -260,7 +265,7 @@ static void emit_branch(ASMState *as, ARMIns ai, MCode *target) { MCode *p = as->mcp; ptrdiff_t delta = (target - p) - 1; - lua_assert(((delta + 0x00800000) >> 24) == 0); + lj_assertA(((delta + 0x00800000) >> 24) == 0, "branch target out of range"); *--p = ai | ((uint32_t)delta & 0x00ffffffu); as->mcp = p; } @@ -288,7 +293,7 @@ static void emit_call(ASMState *as, void *target) static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) { #if LJ_SOFTFP - lua_assert(!irt_isnum(ir->t)); UNUSED(ir); + lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); #else if (dst >= RID_MAX_GPR) { emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S, @@ -308,30 +313,30 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) emit_dm(as, ARMI_MOV, dst, src); } -/* Generic load of register from stack slot. */ -static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) +/* Generic load of register with base and (small) offset address. */ +static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) { #if LJ_SOFTFP - lua_assert(!irt_isnum(ir->t)); UNUSED(ir); + lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); #else if (r >= RID_MAX_GPR) - emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, RID_SP, ofs); + emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs); else #endif - emit_lso(as, ARMI_LDR, r, RID_SP, ofs); + emit_lso(as, ARMI_LDR, r, base, ofs); } -/* Generic store of register to stack slot. */ -static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) +/* Generic store of register with base and (small) offset address. */ +static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) { #if LJ_SOFTFP - lua_assert(!irt_isnum(ir->t)); UNUSED(ir); + lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); #else if (r >= RID_MAX_GPR) - emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, RID_SP, ofs); + emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs); else #endif - emit_lso(as, ARMI_STR, r, RID_SP, ofs); + emit_lso(as, ARMI_STR, r, base, ofs); } /* Emit an arithmetic/logic operation with a constant operand. */ diff --git a/src/lj_emit_arm64.h b/src/lj_emit_arm64.h new file mode 100644 index 00000000..c4b4c147 --- /dev/null +++ b/src/lj_emit_arm64.h @@ -0,0 +1,424 @@ +/* +** ARM64 instruction emitter. +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +** +** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. +** Sponsored by Cisco Systems, Inc. +*/ + +/* -- Constant encoding --------------------------------------------------- */ + +static uint64_t get_k64val(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + if (ir->o == IR_KINT64) { + return ir_kint64(ir)->u64; + } else if (ir->o == IR_KGC) { + return (uint64_t)ir_kgc(ir); + } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { + return (uint64_t)ir_kptr(ir); + } else { + lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, + "bad 64 bit const IR op %d", ir->o); + return ir->i; /* Sign-extended. */ + } +} + +/* Encode constant in K12 format for data processing instructions. */ +static uint32_t emit_isk12(int64_t n) +{ + uint64_t k = (n < 0) ? -n : n; + uint32_t m = (n < 0) ? 0x40000000 : 0; + if (k < 0x1000) { + return A64I_K12|m|A64F_U12(k); + } else if ((k & 0xfff000) == k) { + return A64I_K12|m|0x400000|A64F_U12(k>>12); + } + return 0; +} + +#define emit_clz64(n) __builtin_clzll(n) +#define emit_ctz64(n) __builtin_ctzll(n) + +/* Encode constant in K13 format for logical data processing instructions. */ +static uint32_t emit_isk13(uint64_t n, int is64) +{ + int inv = 0, w = 128, lz, tz; + if (n & 1) { n = ~n; w = 64; inv = 1; } /* Avoid wrap-around of ones. */ + if (!n) return 0; /* Neither all-zero nor all-ones are allowed. */ + do { /* Find the repeat width. */ + if (is64 && (uint32_t)(n^(n>>32))) break; + n = (uint32_t)n; + if (!n) return 0; /* Ditto when passing n=0xffffffff and is64=0. */ + w = 32; if ((n^(n>>16)) & 0xffff) break; + n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break; + n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break; + n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break; + n = n & 0x3; w = 2; + } while (0); + lz = emit_clz64(n); + tz = emit_ctz64(n); + if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */ + if (inv) + return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10); + else + return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10); +} + +static uint32_t emit_isfpk64(uint64_t n) +{ + uint64_t etop9 = ((n >> 54) & 0x1ff); + if ((n << 16) == 0 && (etop9 == 0x100 || etop9 == 0x0ff)) { + return (uint32_t)(((n >> 48) & 0x7f) | ((n >> 56) & 0x80)); + } + return ~0u; +} + +/* -- Emit basic instructions --------------------------------------------- */ + +static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra) +{ + *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra); +} + +static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm) +{ + *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm); +} + +static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm) +{ + *--as->mcp = ai | A64F_D(rd) | A64F_M(rm); +} + +static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn) +{ + *--as->mcp = ai | A64F_D(rd) | A64F_N(rn); +} + +static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm) +{ + *--as->mcp = ai | A64F_N(rn) | A64F_M(rm); +} + +static void emit_d(ASMState *as, A64Ins ai, Reg rd) +{ + *--as->mcp = ai | A64F_D(rd); +} + +static void emit_n(ASMState *as, A64Ins ai, Reg rn) +{ + *--as->mcp = ai | A64F_N(rn); +} + +static int emit_checkofs(A64Ins ai, int64_t ofs) +{ + int scale = (ai >> 30) & 3; + if (ofs < 0 || (ofs & ((1<<scale)-1))) { + return (ofs >= -256 && ofs <= 255) ? -1 : 0; + } else { + return (ofs < (4096<<scale)) ? 1 : 0; + } +} + +static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs) +{ + int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3; + lj_assertA(ot, "load/store offset %d out of range", ofs); + /* Combine LDR/STR pairs to LDP/STP. */ + if ((sc == 2 || sc == 3) && + (!(ai & 0x400000) || rd != rn) && + as->mcp != as->mcloop) { + uint32_t prev = *as->mcp & ~A64F_D(31); + int ofsm = ofs - (1<<sc), ofsp = ofs + (1<<sc); + A64Ins aip; + if (prev == (ai | A64F_N(rn) | A64F_U12(ofsm>>sc)) || + prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) { + aip = (A64F_A(rd) | A64F_D(*as->mcp & 31)); + } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) || + prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) { + aip = (A64F_D(rd) | A64F_A(*as->mcp & 31)); + ofsm = ofs; + } else { + goto nopair; + } + if (ofsm >= (int)((unsigned int)-64<<sc) && ofsm <= (63<<sc)) { + *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) | + (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000)); + return; + } + } +nopair: + if (ot == 1) + *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_U12(ofs >> sc); + else + *--as->mcp = (ai^A64I_LS_U) | A64F_D(rd) | A64F_N(rn) | A64F_S9(ofs & 0x1ff); +} + +/* -- Emit loads/stores --------------------------------------------------- */ + +/* Prefer rematerialization of BASE/L from global_State over spills. */ +#define emit_canremat(ref) ((ref) <= ASMREF_L) + +/* Try to find an N-step delta relative to other consts with N < lim. */ +static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim) +{ + RegSet work = (~as->freeset & RSET_GPR) | RID2RSET(RID_GL); + if (lim <= 1) return 0; /* Can't beat that. */ + while (work) { + Reg r = rset_picktop(work); + IRRef ref = regcost_ref(as->cost[r]); + lj_assertA(r != rd, "dest reg %d not free", rd); + if (ref < REF_TRUE) { + uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) : + get_k64val(as, ref); + int64_t delta = (int64_t)(k - kx); + if (delta == 0) { + emit_dm(as, A64I_MOVx, rd, r); + return 1; + } else { + uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta); + if (k12) { + emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r); + return 1; + } + /* Do other ops or multi-step deltas pay off? Probably not. + ** E.g. XOR rarely helps with pointer consts. + */ + } + } + rset_clear(work, r); + } + return 0; /* Failed. */ +} + +static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64) +{ + int i, zeros = 0, ones = 0, neg; + if (!is64) u64 = (int64_t)(int32_t)u64; /* Sign-extend. */ + /* Count homogeneous 16 bit fragments. */ + for (i = 0; i < 4; i++) { + uint64_t frag = (u64 >> i*16) & 0xffff; + zeros += (frag == 0); + ones += (frag == 0xffff); + } + neg = ones > zeros; /* Use MOVN if it pays off. */ + if ((neg ? ones : zeros) < 3) { /* Need 2+ ins. Try shorter K13 encoding. */ + uint32_t k13 = emit_isk13(u64, is64); + if (k13) { + emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO); + return; + } + } + if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) { + int shift = 0, lshift = 0; + uint64_t n64 = neg ? ~u64 : u64; + if (n64 != 0) { + /* Find first/last fragment to be filled. */ + shift = (63-emit_clz64(n64)) & ~15; + lshift = emit_ctz64(n64) & ~15; + } + /* MOVK requires the original value (u64). */ + while (shift > lshift) { + uint32_t u16 = (u64 >> shift) & 0xffff; + /* Skip fragments that are correctly filled by MOVN/MOVZ. */ + if (u16 != (neg ? 0xffff : 0)) + emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd); + shift -= 16; + } + /* But MOVN needs an inverted value (n64). */ + emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) | + A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd); + } +} + +/* Load a 32 bit constant into a GPR. */ +#define emit_loadi(as, rd, i) emit_loadk(as, rd, i, 0) + +/* Load a 64 bit constant into a GPR. */ +#define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X) + +#define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr)) + +#define glofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g)) +#define mcpofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1))) +#define checkmcpofs(as, k) \ + (A64F_S_OK(mcpofs(as, k)>>2, 19)) + +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); + +/* Get/set from constant pointer. */ +static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p) +{ + /* First, check if ip + offset is in range. */ + if ((ai & 0x00400000) && checkmcpofs(as, p)) { + emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r); + } else { + Reg base = RID_GL; /* Next, try GL + offset. */ + int64_t ofs = glofs(as, p); + if (!emit_checkofs(ai, ofs)) { /* Else split up into base reg + offset. */ + int64_t i64 = i64ptr(p); + base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r)); + ofs = i64 & 0x7fffull; + } + emit_lso(as, ai, r, base, ofs); + } +} + +/* Load 64 bit IR constant into register. */ +static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) +{ + const uint64_t *k = &ir_k64(ir)->u64; + int64_t ofs; + if (r >= RID_MAX_GPR) { + uint32_t fpk = emit_isfpk64(*k); + if (fpk != ~0u) { + emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31)); + return; + } + } + ofs = glofs(as, k); + if (emit_checkofs(A64I_LDRx, ofs)) { + emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx, + (r & 31), RID_GL, ofs); + } else { + if (r >= RID_MAX_GPR) { + emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP); + r = RID_TMP; + } + if (checkmcpofs(as, k)) + emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r); + else + emit_loadu64(as, r, *k); + } +} + +/* Get/set global_State fields. */ +#define emit_getgl(as, r, field) \ + emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field) +#define emit_setgl(as, r, field) \ + emit_lsptr(as, A64I_STRx, (r), (void *)&J2G(as->J)->field) + +/* Trace number is determined from pc of exit instruction. */ +#define emit_setvmstate(as, i) UNUSED(i) + +/* -- Emit control-flow instructions -------------------------------------- */ + +/* Label for internal jumps. */ +typedef MCode *MCLabel; + +/* Return label pointing to current PC. */ +#define emit_label(as) ((as)->mcp) + +static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target) +{ + MCode *p = --as->mcp; + ptrdiff_t delta = target - p; + lj_assertA(A64F_S_OK(delta, 19), "branch target out of range"); + *p = A64I_BCC | A64F_S19(delta) | cond; +} + +static void emit_branch(ASMState *as, A64Ins ai, MCode *target) +{ + MCode *p = --as->mcp; + ptrdiff_t delta = target - p; + lj_assertA(A64F_S_OK(delta, 26), "branch target out of range"); + *p = ai | A64F_S26(delta); +} + +static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target) +{ + MCode *p = --as->mcp; + ptrdiff_t delta = target - p; + lj_assertA(bit < 63, "bit number out of range"); + lj_assertA(A64F_S_OK(delta, 14), "branch target out of range"); + if (bit > 31) ai |= A64I_X; + *p = ai | A64F_BIT(bit & 31) | A64F_S14(delta) | r; +} + +static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target) +{ + MCode *p = --as->mcp; + ptrdiff_t delta = target - p; + lj_assertA(A64F_S_OK(delta, 19), "branch target out of range"); + *p = ai | A64F_S19(delta) | r; +} + +#define emit_jmp(as, target) emit_branch(as, A64I_B, (target)) + +static void emit_call(ASMState *as, void *target) +{ + MCode *p = --as->mcp; + ptrdiff_t delta = (char *)target - (char *)p; + if (A64F_S_OK(delta>>2, 26)) { + *p = A64I_BL | A64F_S26(delta>>2); + } else { /* Target out of range: need indirect call. But don't use R0-R7. */ + Reg r = ra_allock(as, i64ptr(target), + RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED); + *p = A64I_BLR | A64F_N(r); + } +} + +/* -- Emit generic operations --------------------------------------------- */ + +/* Generic move between two regs. */ +static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) +{ + if (dst >= RID_MAX_GPR) { + emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S, + (dst & 31), (src & 31)); + return; + } + if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */ + MCode ins = *as->mcp, swp = (src^dst); + if ((ins & 0xbf800000) == 0xb9000000) { + if (!((ins ^ (dst << 5)) & 0x000003e0)) + *as->mcp = ins ^ (swp << 5); /* Swap N in load/store. */ + if (!(ins & 0x00400000) && !((ins ^ dst) & 0x0000001f)) + *as->mcp = ins ^ swp; /* Swap D in store. */ + } + } + emit_dm(as, A64I_MOVx, dst, src); +} + +/* Generic load of register with base and (small) offset address. */ +static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) +{ + if (r >= RID_MAX_GPR) + emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, (r & 31), base, ofs); + else + emit_lso(as, irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw, r, base, ofs); +} + +/* Generic store of register with base and (small) offset address. */ +static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) +{ + if (r >= RID_MAX_GPR) + emit_lso(as, irt_isnum(ir->t) ? A64I_STRd : A64I_STRs, (r & 31), base, ofs); + else + emit_lso(as, irt_is64(ir->t) ? A64I_STRx : A64I_STRw, r, base, ofs); +} + +/* Emit an arithmetic operation with a constant operand. */ +static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src, + int32_t i, RegSet allow) +{ + uint32_t k = emit_isk12(i); + if (k) + emit_dn(as, ai^k, dest, src); + else + emit_dnm(as, ai, dest, src, ra_allock(as, i, allow)); +} + +/* Add offset to pointer. */ +static void emit_addptr(ASMState *as, Reg r, int32_t ofs) +{ + if (ofs) + emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r, + ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r)); +} + +#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) + diff --git a/src/lj_emit_mips.h b/src/lj_emit_mips.h index 366cf7ab..0cea5479 100644 --- a/src/lj_emit_mips.h +++ b/src/lj_emit_mips.h @@ -3,6 +3,32 @@ ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h */ +#if LJ_64 +static intptr_t get_k64val(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + if (ir->o == IR_KINT64) { + return (intptr_t)ir_kint64(ir)->u64; + } else if (ir->o == IR_KGC) { + return (intptr_t)ir_kgc(ir); + } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { + return (intptr_t)ir_kptr(ir); + } else if (LJ_SOFTFP && ir->o == IR_KNUM) { + return (intptr_t)ir_knum(ir)->u64; + } else { + lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, + "bad 64 bit const IR op %d", ir->o); + return ir->i; /* Sign-extended. */ + } +} +#endif + +#if LJ_64 +#define get_kval(as, ref) get_k64val(as, ref) +#else +#define get_kval(as, ref) (IR((ref))->i) +#endif + /* -- Emit basic instructions --------------------------------------------- */ static void emit_dst(ASMState *as, MIPSIns mi, Reg rd, Reg rs, Reg rt) @@ -35,7 +61,7 @@ static void emit_fgh(ASMState *as, MIPSIns mi, Reg rf, Reg rg, Reg rh) static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) { - if ((as->flags & JIT_F_MIPS32R2)) { + if (LJ_64 || (as->flags & JIT_F_MIPSXXR2)) { emit_dta(as, MIPSI_ROTR, dest, src, shift); } else { emit_dst(as, MIPSI_OR, dest, dest, tmp); @@ -44,23 +70,32 @@ static void emit_rotr(ASMState *as, Reg dest, Reg src, Reg tmp, uint32_t shift) } } +#if LJ_64 || LJ_HASBUFFER +static void emit_tsml(ASMState *as, MIPSIns mi, Reg rt, Reg rs, uint32_t msb, + uint32_t lsb) +{ + *--as->mcp = mi | MIPSF_T(rt) | MIPSF_S(rs) | MIPSF_M(msb) | MIPSF_L(lsb); +} +#endif + /* -- Emit loads/stores --------------------------------------------------- */ /* Prefer rematerialization of BASE/L from global_State over spills. */ #define emit_canremat(ref) ((ref) <= REF_BASE) /* Try to find a one step delta relative to another constant. */ -static int emit_kdelta1(ASMState *as, Reg t, int32_t i) +static int emit_kdelta1(ASMState *as, Reg rd, intptr_t i) { RegSet work = ~as->freeset & RSET_GPR; while (work) { Reg r = rset_picktop(work); IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != t); + lj_assertA(r != rd, "dest reg %d not free", rd); if (ref < ASMREF_L) { - int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); + intptr_t delta = (intptr_t)((uintptr_t)i - + (uintptr_t)(ra_iskref(ref) ? ra_krefk(as, ref) : get_kval(as, ref))); if (checki16(delta)) { - emit_tsi(as, MIPSI_ADDIU, t, r, delta); + emit_tsi(as, MIPSI_AADDIU, rd, r, delta); return 1; } } @@ -76,8 +111,8 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) emit_ti(as, MIPSI_LI, r, i); } else { if ((i & 0xffff)) { - int32_t jgl = i32ptr(J2G(as->J)); - if ((uint32_t)(i-jgl) < 65536) { + intptr_t jgl = (intptr_t)(void *)J2G(as->J); + if ((uintptr_t)(i-jgl) < 65536) { emit_tsi(as, MIPSI_ADDIU, r, RID_JGL, i-jgl-32768); return; } else if (emit_kdelta1(as, r, i)) { @@ -92,16 +127,49 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) } } +#if LJ_64 +/* Load a 64 bit constant into a GPR. */ +static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) +{ + if (checki32((int64_t)u64)) { + emit_loadi(as, r, (int32_t)u64); + } else { + uint64_t delta = u64 - (uint64_t)(void *)J2G(as->J); + if (delta < 65536) { + emit_tsi(as, MIPSI_DADDIU, r, RID_JGL, (int32_t)(delta-32768)); + } else if (emit_kdelta1(as, r, (intptr_t)u64)) { + return; + } else { + /* TODO MIPSR6: Use DAHI & DATI. Caveat: sign-extension. */ + if ((u64 & 0xffff)) { + emit_tsi(as, MIPSI_ORI, r, r, u64 & 0xffff); + } + if (((u64 >> 16) & 0xffff)) { + emit_dta(as, MIPSI_DSLL, r, r, 16); + emit_tsi(as, MIPSI_ORI, r, r, (u64 >> 16) & 0xffff); + emit_dta(as, MIPSI_DSLL, r, r, 16); + } else { + emit_dta(as, MIPSI_DSLL32, r, r, 0); + } + emit_loadi(as, r, (int32_t)(u64 >> 32)); + } + /* TODO: There are probably more optimization opportunities. */ + } +} + +#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr))) +#else #define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) +#endif -static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); -static void ra_allockreg(ASMState *as, int32_t k, Reg r); +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); +static void ra_allockreg(ASMState *as, intptr_t k, Reg r); /* Get/set from constant pointer. */ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) { - int32_t jgl = i32ptr(J2G(as->J)); - int32_t i = i32ptr(p); + intptr_t jgl = (intptr_t)(J2G(as->J)); + intptr_t i = (intptr_t)(p); Reg base; if ((uint32_t)(i-jgl) < 65536) { i = i-jgl-32768; @@ -112,8 +180,24 @@ static void emit_lsptr(ASMState *as, MIPSIns mi, Reg r, void *p, RegSet allow) emit_tsi(as, mi, r, base, i); } -#define emit_loadn(as, r, tv) \ - emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)(tv), RSET_GPR) +#if LJ_64 +static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) +{ + const uint64_t *k = &ir_k64(ir)->u64; + Reg r64 = r; + if (rset_test(RSET_FPR, r)) { + r64 = RID_TMP; + emit_tg(as, MIPSI_DMTC1, r64, r); + } + if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536) + emit_lsptr(as, MIPSI_LD, r64, (void *)k, 0); + else + emit_loadu64(as, r64, *k); +} +#else +#define emit_loadk64(as, r, ir) \ + emit_lsptr(as, MIPSI_LDC1, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) +#endif /* Get/set global_State fields. */ static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) @@ -122,9 +206,9 @@ static void emit_lsglptr(ASMState *as, MIPSIns mi, Reg r, int32_t ofs) } #define emit_getgl(as, r, field) \ - emit_lsglptr(as, MIPSI_LW, (r), (int32_t)offsetof(global_State, field)) + emit_lsglptr(as, MIPSI_AL, (r), (int32_t)offsetof(global_State, field)) #define emit_setgl(as, r, field) \ - emit_lsglptr(as, MIPSI_SW, (r), (int32_t)offsetof(global_State, field)) + emit_lsglptr(as, MIPSI_AS, (r), (int32_t)offsetof(global_State, field)) /* Trace number is determined from per-trace exit stubs. */ #define emit_setvmstate(as, i) UNUSED(i) @@ -141,7 +225,7 @@ static void emit_branch(ASMState *as, MIPSIns mi, Reg rs, Reg rt, MCode *target) { MCode *p = as->mcp; ptrdiff_t delta = target - p; - lua_assert(((delta + 0x8000) >> 16) == 0); + lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of range"); *--p = mi | MIPSF_S(rs) | MIPSF_T(rt) | ((uint32_t)delta & 0xffffu); as->mcp = p; } @@ -152,16 +236,31 @@ static void emit_jmp(ASMState *as, MCode *target) emit_branch(as, MIPSI_B, RID_ZERO, RID_ZERO, (target)); } -static void emit_call(ASMState *as, void *target) +static void emit_call(ASMState *as, void *target, int needcfa) { MCode *p = as->mcp; - *--p = MIPSI_NOP; - if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) +#if LJ_TARGET_MIPSR6 + ptrdiff_t delta = (char *)target - (char *)p; + if ((((delta>>2) + 0x02000000) >> 26) == 0) { /* Try compact call first. */ + *--p = MIPSI_BALC | (((uintptr_t)delta >>2) & 0x03ffffffu); + as->mcp = p; + return; + } +#endif + *--p = MIPSI_NOP; /* Delay slot. */ + if ((((uintptr_t)target ^ (uintptr_t)p) >> 28) == 0) { +#if !LJ_TARGET_MIPSR6 + *--p = (((uintptr_t)target & 1) ? MIPSI_JALX : MIPSI_JAL) | + (((uintptr_t)target >>2) & 0x03ffffffu); +#else *--p = MIPSI_JAL | (((uintptr_t)target >>2) & 0x03ffffffu); - else /* Target out of range: need indirect call. */ +#endif + } else { /* Target out of range: need indirect call. */ *--p = MIPSI_JALR | MIPSF_S(RID_CFUNCADDR); + needcfa = 1; + } as->mcp = p; - ra_allockreg(as, i32ptr(target), RID_CFUNCADDR); + if (needcfa) ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR); } /* -- Emit generic operations --------------------------------------------- */ @@ -178,32 +277,32 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) emit_fg(as, irt_isnum(ir->t) ? MIPSI_MOV_D : MIPSI_MOV_S, dst, src); } -/* Generic load of register from stack slot. */ -static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) +/* Generic load of register with base and (small) offset address. */ +static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) { if (r < RID_MAX_GPR) - emit_tsi(as, MIPSI_LW, r, RID_SP, ofs); + emit_tsi(as, irt_is64(ir->t) ? MIPSI_LD : MIPSI_LW, r, base, ofs); else emit_tsi(as, irt_isnum(ir->t) ? MIPSI_LDC1 : MIPSI_LWC1, - (r & 31), RID_SP, ofs); + (r & 31), base, ofs); } -/* Generic store of register to stack slot. */ -static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) +/* Generic store of register with base and (small) offset address. */ +static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) { if (r < RID_MAX_GPR) - emit_tsi(as, MIPSI_SW, r, RID_SP, ofs); + emit_tsi(as, irt_is64(ir->t) ? MIPSI_SD : MIPSI_SW, r, base, ofs); else emit_tsi(as, irt_isnum(ir->t) ? MIPSI_SDC1 : MIPSI_SWC1, - (r&31), RID_SP, ofs); + (r&31), base, ofs); } /* Add offset to pointer. */ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) { if (ofs) { - lua_assert(checki16(ofs)); - emit_tsi(as, MIPSI_ADDIU, r, r, ofs); + lj_assertA(checki16(ofs), "offset %d out of range", ofs); + emit_tsi(as, MIPSI_AADDIU, r, r, ofs); } } diff --git a/src/lj_emit_ppc.h b/src/lj_emit_ppc.h index 6d0ea185..86760e78 100644 --- a/src/lj_emit_ppc.h +++ b/src/lj_emit_ppc.h @@ -41,13 +41,13 @@ static void emit_rot(ASMState *as, PPCIns pi, Reg ra, Reg rs, static void emit_slwi(ASMState *as, Reg ra, Reg rs, int32_t n) { - lua_assert(n >= 0 && n < 32); + lj_assertA(n >= 0 && n < 32, "shift out or range"); emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31-n); } static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) { - lua_assert(n >= 0 && n < 32); + lj_assertA(n >= 0 && n < 32, "shift out or range"); emit_rot(as, PPCI_RLWINM, ra, rs, n, 0, 31); } @@ -57,17 +57,17 @@ static void emit_rotlwi(ASMState *as, Reg ra, Reg rs, int32_t n) #define emit_canremat(ref) ((ref) <= REF_BASE) /* Try to find a one step delta relative to another constant. */ -static int emit_kdelta1(ASMState *as, Reg t, int32_t i) +static int emit_kdelta1(ASMState *as, Reg rd, int32_t i) { RegSet work = ~as->freeset & RSET_GPR; while (work) { Reg r = rset_picktop(work); IRRef ref = regcost_ref(as->cost[r]); - lua_assert(r != t); + lj_assertA(r != rd, "dest reg %d not free", rd); if (ref < ASMREF_L) { int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); if (checki16(delta)) { - emit_tai(as, PPCI_ADDI, t, r, delta); + emit_tai(as, PPCI_ADDI, rd, r, delta); return 1; } } @@ -98,7 +98,7 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) #define emit_loada(as, r, addr) emit_loadi(as, (r), i32ptr((addr))) -static Reg ra_allock(ASMState *as, int32_t k, RegSet allow); +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); /* Get/set from constant pointer. */ static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow) @@ -115,8 +115,8 @@ static void emit_lsptr(ASMState *as, PPCIns pi, Reg r, void *p, RegSet allow) emit_tai(as, pi, r, base, i); } -#define emit_loadn(as, r, tv) \ - emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)(tv), RSET_GPR) +#define emit_loadk64(as, r, ir) \ + emit_lsptr(as, PPCI_LFD, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR) /* Get/set global_State fields. */ static void emit_lsglptr(ASMState *as, PPCIns pi, Reg r, int32_t ofs) @@ -144,7 +144,7 @@ static void emit_condbranch(ASMState *as, PPCIns pi, PPCCC cc, MCode *target) { MCode *p = --as->mcp; ptrdiff_t delta = (char *)target - (char *)p; - lua_assert(((delta + 0x8000) >> 16) == 0); + lj_assertA(((delta + 0x8000) >> 16) == 0, "branch target out of range"); pi ^= (delta & 0x8000) * (PPCF_Y/0x8000); *p = pi | PPCF_CC(cc) | ((uint32_t)delta & 0xffffu); } @@ -186,22 +186,22 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) emit_fb(as, PPCI_FMR, dst, src); } -/* Generic load of register from stack slot. */ -static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) +/* Generic load of register with base and (small) offset address. */ +static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) { if (r < RID_MAX_GPR) - emit_tai(as, PPCI_LWZ, r, RID_SP, ofs); + emit_tai(as, PPCI_LWZ, r, base, ofs); else - emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, RID_SP, ofs); + emit_fai(as, irt_isnum(ir->t) ? PPCI_LFD : PPCI_LFS, r, base, ofs); } -/* Generic store of register to stack slot. */ -static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) +/* Generic store of register with base and (small) offset address. */ +static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) { if (r < RID_MAX_GPR) - emit_tai(as, PPCI_STW, r, RID_SP, ofs); + emit_tai(as, PPCI_STW, r, base, ofs); else - emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, RID_SP, ofs); + emit_fai(as, irt_isnum(ir->t) ? PPCI_STFD : PPCI_STFS, r, base, ofs); } /* Emit a compare (for equality) with a constant operand. */ diff --git a/src/lj_emit_x86.h b/src/lj_emit_x86.h index d8b4b8a0..3d3beda3 100644 --- a/src/lj_emit_x86.h +++ b/src/lj_emit_x86.h @@ -13,10 +13,17 @@ if (rex != 0x40) *--(p) = rex; } #define FORCE_REX 0x200 #define REX_64 (FORCE_REX|0x080000) +#define VEX_64 0x800000 #else #define REXRB(p, rr, rb) ((void)0) #define FORCE_REX 0 #define REX_64 0 +#define VEX_64 0 +#endif +#if LJ_GC64 +#define REX_GC64 REX_64 +#else +#define REX_GC64 0 #endif #define emit_i8(as, i) (*--as->mcp = (MCode)(i)) @@ -31,7 +38,14 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx, MCode *p, int delta) { int n = (int8_t)xo; -#if defined(__GNUC__) + if (n == -60) { /* VEX-encoded instruction */ +#if LJ_64 + xo ^= (((rr>>1)&4)+((rx>>2)&2)+((rb>>3)&1))<<13; +#endif + *(uint32_t *)(p+delta-5) = (uint32_t)xo; + return p+delta-5; + } +#if defined(__GNUC__) || defined(__clang__) if (__builtin_constant_p(xo) && n == -2) p[delta-2] = (MCode)(xo >> 24); else if (__builtin_constant_p(xo) && n == -3) @@ -78,33 +92,24 @@ static void emit_rr(ASMState *as, x86Op xo, Reg r1, Reg r2) /* [addr] is sign-extended in x64 and must be in lower 2G (not 4G). */ static int32_t ptr2addr(const void *p) { - lua_assert((uintptr_t)p < (uintptr_t)0x80000000); + lj_assertX((uintptr_t)p < (uintptr_t)0x80000000, "pointer outside 2G range"); return i32ptr(p); } #else #define ptr2addr(p) (i32ptr((p))) #endif -/* op r, [addr] */ -static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) -{ - MCode *p = as->mcp; - *(int32_t *)(p-4) = ptr2addr(addr); -#if LJ_64 - p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); - as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); -#else - as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); -#endif -} - /* op r, [base+ofs] */ static void emit_rmro(ASMState *as, x86Op xo, Reg rr, Reg rb, int32_t ofs) { MCode *p = as->mcp; x86Mode mode; if (ra_hasreg(rb)) { - if (ofs == 0 && (rb&7) != RID_EBP) { + if (LJ_GC64 && rb == RID_RIP) { + mode = XM_OFS0; + p -= 4; + *(int32_t *)p = ofs; + } else if (ofs == 0 && (rb&7) != RID_EBP) { mode = XM_OFS0; } else if (checki8(ofs)) { *--p = (MCode)ofs; @@ -202,6 +207,11 @@ static void emit_mrm(ASMState *as, x86Op xo, Reg rr, Reg rb) *--p = MODRM(XM_SCALE1, RID_ESP, RID_EBP); rb = RID_ESP; #endif + } else if (LJ_GC64 && rb == RID_RIP) { + lj_assertA(as->mrm.idx == RID_NONE, "RIP-rel mrm cannot have index"); + mode = XM_OFS0; + p -= 4; + *(int32_t *)p = as->mrm.ofs; } else { if (as->mrm.ofs == 0 && (rb&7) != RID_EBP) { mode = XM_OFS0; @@ -241,10 +251,6 @@ static void emit_gmrmi(ASMState *as, x86Group xg, Reg rb, int32_t i) /* -- Emit loads/stores --------------------------------------------------- */ -/* Instruction selection for XMM moves. */ -#define XMM_MOVRR(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVSD : XO_MOVAPS) -#define XMM_MOVRM(as) ((as->flags & JIT_F_SPLIT_XMM) ? XO_MOVLPD : XO_MOVSD) - /* mov [base+ofs], i */ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) { @@ -259,8 +265,8 @@ static void emit_movmroi(ASMState *as, Reg base, int32_t ofs, int32_t i) /* Get/set global_State fields. */ #define emit_opgl(as, xo, r, field) \ emit_rma(as, (xo), (r), (void *)&J2G(as->J)->field) -#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r), field) -#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r), field) +#define emit_getgl(as, r, field) emit_opgl(as, XO_MOV, (r)|REX_GC64, field) +#define emit_setgl(as, r, field) emit_opgl(as, XO_MOVto, (r)|REX_GC64, field) #define emit_setvmstate(as, i) \ (emit_i32(as, i), emit_opgl(as, XO_MOVmi, 0, vmstate)) @@ -285,9 +291,21 @@ static void emit_loadi(ASMState *as, Reg r, int32_t i) } } +#if LJ_GC64 +#define dispofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)J2GG(as->J)->dispatch)) +#define mcpofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mcp)) +#define mctopofs(as, k) \ + ((intptr_t)((uintptr_t)(k) - (uintptr_t)as->mctop)) +/* mov r, addr */ +#define emit_loada(as, r, addr) \ + emit_loadu64(as, (r), (uintptr_t)(addr)) +#else /* mov r, addr */ #define emit_loada(as, r, addr) \ emit_loadi(as, (r), ptr2addr((addr))) +#endif #if LJ_64 /* mov r, imm64 or shorter 32 bit extended load. */ @@ -299,6 +317,15 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) MCode *p = as->mcp; *(int32_t *)(p-4) = (int32_t)u64; as->mcp = emit_opm(XO_MOVmi, XM_REG, REX_64, r, p, -4); +#if LJ_GC64 + } else if (checki32(dispofs(as, u64))) { + emit_rmro(as, XO_LEA, r|REX_64, RID_DISPATCH, (int32_t)dispofs(as, u64)); + } else if (checki32(mcpofs(as, u64)) && checki32(mctopofs(as, u64))) { + /* Since as->realign assumes the code size doesn't change, check + ** RIP-relative addressing reachability for both as->mcp and as->mctop. + */ + emit_rmro(as, XO_LEA, r|REX_64, RID_RIP, (int32_t)mcpofs(as, u64)); +#endif } else { /* Full-size 64 bit load. */ MCode *p = as->mcp; *(uint64_t *)(p-8) = u64; @@ -310,13 +337,90 @@ static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) } #endif -/* movsd r, [&tv->n] / xorps r, r */ -static void emit_loadn(ASMState *as, Reg r, cTValue *tv) +/* op r, [addr] */ +static void emit_rma(ASMState *as, x86Op xo, Reg rr, const void *addr) { - if (tvispzero(tv)) /* Use xor only for +0. */ - emit_rr(as, XO_XORPS, r, r); - else - emit_rma(as, XMM_MOVRM(as), r, &tv->n); +#if LJ_GC64 + if (checki32(dispofs(as, addr))) { + emit_rmro(as, xo, rr, RID_DISPATCH, (int32_t)dispofs(as, addr)); + } else if (checki32(mcpofs(as, addr)) && checki32(mctopofs(as, addr))) { + emit_rmro(as, xo, rr, RID_RIP, (int32_t)mcpofs(as, addr)); + } else if (!checki32((intptr_t)addr)) { + Reg ra = (rr & 15); + if (xo != XO_MOV) { + /* We can't allocate a register here. Use and restore DISPATCH. Ugly. */ + uint64_t dispaddr = (uintptr_t)J2GG(as->J)->dispatch; + uint8_t i8 = xo == XO_GROUP3b ? *as->mcp++ : 0; + ra = RID_DISPATCH; + if (checku32(dispaddr)) { + emit_loadi(as, ra, (int32_t)dispaddr); + } else { /* Full-size 64 bit load. */ + MCode *p = as->mcp; + *(uint64_t *)(p-8) = dispaddr; + p[-9] = (MCode)(XI_MOVri+(ra&7)); + p[-10] = 0x48 + ((ra>>3)&1); + p -= 10; + as->mcp = p; + } + if (xo == XO_GROUP3b) emit_i8(as, i8); + } + emit_rmro(as, xo, rr, ra, 0); + emit_loadu64(as, ra, (uintptr_t)addr); + } else +#endif + { + MCode *p = as->mcp; + *(int32_t *)(p-4) = ptr2addr(addr); +#if LJ_64 + p[-5] = MODRM(XM_SCALE1, RID_ESP, RID_EBP); + as->mcp = emit_opm(xo, XM_OFS0, rr, RID_ESP, p, -5); +#else + as->mcp = emit_opm(xo, XM_OFS0, rr, RID_EBP, p, -4); +#endif + } +} + +/* Load 64 bit IR constant into register. */ +static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) +{ + Reg r64; + x86Op xo; + const uint64_t *k = &ir_k64(ir)->u64; + if (rset_test(RSET_FPR, r)) { + r64 = r; + xo = XO_MOVSD; + } else { + r64 = r | REX_64; + xo = XO_MOV; + } + if (*k == 0) { + emit_rr(as, rset_test(RSET_FPR, r) ? XO_XORPS : XO_ARITH(XOg_XOR), r, r); +#if LJ_GC64 + } else if (checki32((intptr_t)k) || checki32(dispofs(as, k)) || + (checki32(mcpofs(as, k)) && checki32(mctopofs(as, k)))) { + emit_rma(as, xo, r64, k); + } else { + if (ir->i) { + lj_assertA(*k == *(uint64_t*)(as->mctop - ir->i), + "bad interned 64 bit constant"); + } else if (as->curins <= as->stopins && rset_test(RSET_GPR, r)) { + emit_loadu64(as, r, *k); + return; + } else { + /* If all else fails, add the FP constant at the MCode area bottom. */ + while ((uintptr_t)as->mcbot & 7) *as->mcbot++ = XI_INT3; + *(uint64_t *)as->mcbot = *k; + ir->i = (int32_t)(as->mctop - as->mcbot); + as->mcbot += 8; + as->mclim = as->mcbot + MCLIM_REDZONE; + lj_mcode_commitbot(as->J, as->mcbot); + } + emit_rmro(as, xo, r64, RID_RIP, (int32_t)mcpofs(as, as->mctop - ir->i)); +#else + } else { + emit_rma(as, xo, r64, k); +#endif + } } /* -- Emit control-flow instructions -------------------------------------- */ @@ -330,7 +434,7 @@ static void emit_sjmp(ASMState *as, MCLabel target) { MCode *p = as->mcp; ptrdiff_t delta = target - p; - lua_assert(delta == (int8_t)delta); + lj_assertA(delta == (int8_t)delta, "short jump target out of range"); p[-1] = (MCode)(int8_t)delta; p[-2] = XI_JMPs; as->mcp = p - 2; @@ -342,7 +446,7 @@ static void emit_sjcc(ASMState *as, int cc, MCLabel target) { MCode *p = as->mcp; ptrdiff_t delta = target - p; - lua_assert(delta == (int8_t)delta); + lj_assertA(delta == (int8_t)delta, "short jump target out of range"); p[-1] = (MCode)(int8_t)delta; p[-2] = (MCode)(XI_JCCs+(cc&15)); as->mcp = p - 2; @@ -368,10 +472,11 @@ static void emit_sfixup(ASMState *as, MCLabel source) #define emit_label(as) ((as)->mcp) /* Compute relative 32 bit offset for jump and call instructions. */ -static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target) +static LJ_AINLINE int32_t jmprel(jit_State *J, MCode *p, MCode *target) { ptrdiff_t delta = target - p; - lua_assert(delta == (int32_t)delta); + UNUSED(J); + lj_assertJ(delta == (int32_t)delta, "jump target out of range"); return (int32_t)delta; } @@ -379,7 +484,7 @@ static LJ_AINLINE int32_t jmprel(MCode *p, MCode *target) static void emit_jcc(ASMState *as, int cc, MCode *target) { MCode *p = as->mcp; - *(int32_t *)(p-4) = jmprel(p, target); + *(int32_t *)(p-4) = jmprel(as->J, p, target); p[-5] = (MCode)(XI_JCCn+(cc&15)); p[-6] = 0x0f; as->mcp = p - 6; @@ -389,7 +494,7 @@ static void emit_jcc(ASMState *as, int cc, MCode *target) static void emit_jmp(ASMState *as, MCode *target) { MCode *p = as->mcp; - *(int32_t *)(p-4) = jmprel(p, target); + *(int32_t *)(p-4) = jmprel(as->J, p, target); p[-5] = XI_JMP; as->mcp = p - 5; } @@ -406,7 +511,7 @@ static void emit_call_(ASMState *as, MCode *target) return; } #endif - *(int32_t *)(p-4) = jmprel(p, target); + *(int32_t *)(p-4) = jmprel(as->J, p, target); p[-5] = XI_CALL; as->mcp = p - 5; } @@ -418,8 +523,10 @@ static void emit_call_(ASMState *as, MCode *target) /* Use 64 bit operations to handle 64 bit IR types. */ #if LJ_64 #define REX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? REX_64 : 0)) +#define VEX_64IR(ir, r) ((r) + (irt_is64((ir)->t) ? VEX_64 : 0)) #else #define REX_64IR(ir, r) (r) +#define VEX_64IR(ir, r) (r) #endif /* Generic move between two regs. */ @@ -429,35 +536,32 @@ static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) if (dst < RID_MAX_GPR) emit_rr(as, XO_MOV, REX_64IR(ir, dst), src); else - emit_rr(as, XMM_MOVRR(as), dst, src); + emit_rr(as, XO_MOVAPS, dst, src); } -/* Generic load of register from stack slot. */ -static void emit_spload(ASMState *as, IRIns *ir, Reg r, int32_t ofs) +/* Generic load of register with base and (small) offset address. */ +static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) { if (r < RID_MAX_GPR) - emit_rmro(as, XO_MOV, REX_64IR(ir, r), RID_ESP, ofs); + emit_rmro(as, XO_MOV, REX_64IR(ir, r), base, ofs); else - emit_rmro(as, irt_isnum(ir->t) ? XMM_MOVRM(as) : XO_MOVSS, r, RID_ESP, ofs); + emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSD : XO_MOVSS, r, base, ofs); } -/* Generic store of register to stack slot. */ -static void emit_spstore(ASMState *as, IRIns *ir, Reg r, int32_t ofs) +/* Generic store of register with base and (small) offset address. */ +static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) { if (r < RID_MAX_GPR) - emit_rmro(as, XO_MOVto, REX_64IR(ir, r), RID_ESP, ofs); + emit_rmro(as, XO_MOVto, REX_64IR(ir, r), base, ofs); else - emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, RID_ESP, ofs); + emit_rmro(as, irt_isnum(ir->t) ? XO_MOVSDto : XO_MOVSSto, r, base, ofs); } /* Add offset to pointer. */ static void emit_addptr(ASMState *as, Reg r, int32_t ofs) { if (ofs) { - if ((as->flags & JIT_F_LEA_AGU)) - emit_rmro(as, XO_LEA, r, r, ofs); - else - emit_gri(as, XG_ARITHi(XOg_ADD), r, ofs); + emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs); } } diff --git a/src/lj_err.c b/src/lj_err.c index d9783722..283c3d18 100644 --- a/src/lj_err.c +++ b/src/lj_err.c @@ -16,6 +16,7 @@ #include "lj_ff.h" #include "lj_trace.h" #include "lj_vm.h" +#include "lj_strfmt.h" /* ** LuaJIT can either use internal or external frame unwinding: @@ -28,12 +29,18 @@ ** Pros and Cons: ** ** - EXT requires unwind tables for *all* functions on the C stack between -** the pcall/catch and the error/throw. This is the default on x64, -** but needs to be manually enabled on x86/PPC for non-C++ code. +** the pcall/catch and the error/throw. C modules used by Lua code can +** throw errors, so these need to have unwind tables, too. Transitively +** this applies to all system libraries used by C modules -- at least +** when they have callbacks which may throw an error. ** -** - INT is faster when actually throwing errors (but this happens rarely). +** - INT is faster when actually throwing errors, but this happens rarely. ** Setting up error handlers is zero-cost in any case. ** +** - INT needs to save *all* callee-saved registers when entering the +** interpreter. EXT only needs to save those actually used inside the +** interpreter. JIT-compiled code may need to save some more. +** ** - EXT provides full interoperability with C++ exceptions. You can throw ** Lua errors or C++ exceptions through a mix of Lua frames and C++ frames. ** C++ destructors are called as needed. C++ exceptions caught by pcall @@ -45,27 +52,38 @@ ** the wrapper function feature. Lua errors thrown through C++ frames ** cannot be caught by C++ code and C++ destructors are not run. ** -** EXT is the default on x64 systems, INT is the default on all other systems. +** - EXT can handle errors from internal helper functions that are called +** from JIT-compiled code (except for Windows/x86 and 32 bit ARM). +** INT has no choice but to call the panic handler, if this happens. +** Note: this is mainly relevant for out-of-memory errors. +** +** EXT is the default on all systems where the toolchain produces unwind +** tables by default (*). This is hard-coded and/or detected in src/Makefile. +** You can thwart the detection with: TARGET_XCFLAGS=-DLUAJIT_UNWIND_INTERNAL +** +** INT is the default on all other systems. +** +** EXT can be manually enabled for toolchains that are able to produce +** conforming unwind tables: +** "TARGET_XCFLAGS=-funwind-tables -DLUAJIT_UNWIND_EXTERNAL" +** As explained above, *all* C code used directly or indirectly by LuaJIT +** must be compiled with -funwind-tables (or -fexceptions). C++ code must +** *not* be compiled with -fno-exceptions. +** +** If you're unsure whether error handling inside the VM works correctly, +** try running this and check whether it prints "OK": ** -** EXT can be manually enabled on POSIX systems using GCC and DWARF2 stack -** unwinding with -DLUAJIT_UNWIND_EXTERNAL. *All* C code must be compiled -** with -funwind-tables (or -fexceptions). This includes LuaJIT itself (set -** TARGET_CFLAGS), all of your C/Lua binding code, all loadable C modules -** and all C libraries that have callbacks which may be used to call back -** into Lua. C++ code must *not* be compiled with -fno-exceptions. +** luajit -e "print(select(2, load('OK')):match('OK'))" ** -** EXT cannot be enabled on WIN32 since system exceptions use code-driven SEH. -** EXT is mandatory on WIN64 since the calling convention has an abundance -** of callee-saved registers (rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15). -** The POSIX/x64 interpreter only saves r12/r13 for INT (e.g. PS4). +** (*) Originally, toolchains only generated unwind tables for C++ code. For +** interoperability reasons, this can be manually enabled for plain C code, +** too (with -funwind-tables). With the introduction of the x64 architecture, +** the corresponding POSIX and Windows ABIs mandated unwind tables for all +** code. Over the following years most desktop and server platforms have +** enabled unwind tables by default on all architectures. OTOH mobile and +** embedded platforms do not consistently mandate unwind tables. */ -#if defined(__GNUC__) && (LJ_TARGET_X64 || defined(LUAJIT_UNWIND_EXTERNAL)) && !LJ_NO_UNWIND -#define LJ_UNWIND_EXT 1 -#elif LJ_TARGET_X64 && LJ_TARGET_WINDOWS -#define LJ_UNWIND_EXT 1 -#endif - /* -- Error messages ------------------------------------------------------ */ /* Error message strings. */ @@ -98,14 +116,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) TValue *top = restorestack(L, -nres); if (frame < top) { /* Frame reached? */ if (errcode) { - L->cframe = cframe_prev(cf); L->base = frame+1; + L->cframe = cframe_prev(cf); unwindstack(L, top); } return cf; } } - if (frame <= tvref(L->stack)) + if (frame <= tvref(L->stack)+LJ_FR2) break; switch (frame_typep(frame)) { case FRAME_LUA: /* Lua frame. */ @@ -113,14 +131,12 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) frame = frame_prevl(frame); break; case FRAME_C: /* C frame. */ -#if LJ_HASFFI unwind_c: -#endif #if LJ_UNWIND_EXT if (errcode) { - L->cframe = cframe_prev(cf); L->base = frame_prevd(frame) + 1; - unwindstack(L, frame); + L->cframe = cframe_prev(cf); + unwindstack(L, frame - LJ_FR2); } else if (cf != stopcf) { cf = cframe_prev(cf); frame = frame_prevd(frame); @@ -143,16 +159,14 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) return cf; } if (errcode) { - L->cframe = cframe_prev(cf); L->base = frame_prevd(frame) + 1; - unwindstack(L, frame); + L->cframe = cframe_prev(cf); + unwindstack(L, frame - LJ_FR2); } return cf; case FRAME_CONT: /* Continuation frame. */ -#if LJ_HASFFI - if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK) + if (frame_iscont_fficb(frame)) goto unwind_c; -#endif /* fallthrough */ case FRAME_VARG: /* Vararg frame. */ frame = frame_prevd(frame); @@ -166,8 +180,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) } if (frame_typep(frame) == FRAME_PCALL) hook_leave(G(L)); - L->cframe = cf; L->base = frame_prevd(frame) + 1; + L->cframe = cf; unwindstack(L, L->base); } return (void *)((intptr_t)cf | CFRAME_UNWIND_FF); @@ -175,8 +189,8 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) } /* No C frame. */ if (errcode) { + L->base = tvref(L->stack)+1+LJ_FR2; L->cframe = NULL; - L->base = tvref(L->stack)+1; unwindstack(L, L->base); if (G(L)->panic) G(L)->panic(L); @@ -187,33 +201,206 @@ static void *err_unwind(lua_State *L, void *stopcf, int errcode) /* -- External frame unwinding -------------------------------------------- */ -#if defined(__GNUC__) && !LJ_NO_UNWIND && !LJ_ABI_WIN +#if LJ_ABI_WIN /* -** We have to use our own definitions instead of the mandatory (!) unwind.h, -** since various OS, distros and compilers mess up the header installation. +** Someone in Redmond owes me several days of my life. A lot of this is +** undocumented or just plain wrong on MSDN. Some of it can be gathered +** from 3rd party docs or must be found by trial-and-error. They really +** don't want you to write your own language-specific exception handler +** or to interact gracefully with MSVC. :-( +** +** Apparently MSVC doesn't call C++ destructors for foreign exceptions +** unless you compile your C++ code with /EHa. Unfortunately this means +** catch (...) also catches things like access violations. The use of +** _set_se_translator doesn't really help, because it requires /EHa, too. */ -typedef struct _Unwind_Exception +#define WIN32_LEAN_AND_MEAN +#include <windows.h> + +#if LJ_TARGET_X86 +typedef void *UndocumentedDispatcherContext; /* Unused on x86. */ +#else +/* Taken from: http://www.nynaeve.net/?p=99 */ +typedef struct UndocumentedDispatcherContext { + ULONG64 ControlPc; + ULONG64 ImageBase; + PRUNTIME_FUNCTION FunctionEntry; + ULONG64 EstablisherFrame; + ULONG64 TargetIp; + PCONTEXT ContextRecord; + void (*LanguageHandler)(void); + PVOID HandlerData; + PUNWIND_HISTORY_TABLE HistoryTable; + ULONG ScopeIndex; + ULONG Fill0; +} UndocumentedDispatcherContext; +#endif + +/* Another wild guess. */ +extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow); + +#if LJ_TARGET_X64 && defined(MINGW_SDK_INIT) +/* Workaround for broken MinGW64 declaration. */ +VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx"); +#define RtlUnwindEx RtlUnwindEx_FIXED +#endif + +#define LJ_MSVC_EXCODE ((DWORD)0xe06d7363) +#define LJ_GCC_EXCODE ((DWORD)0x20474343) + +#define LJ_EXCODE ((DWORD)0xe24c4a00) +#define LJ_EXCODE_MAKE(c) (LJ_EXCODE | (DWORD)(c)) +#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff) +#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff)) + +/* Windows exception handler for interpreter frame. */ +LJ_FUNCA int lj_err_unwind_win(EXCEPTION_RECORD *rec, + void *f, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch) { - uint64_t exclass; - void (*excleanup)(int, struct _Unwind_Exception *); - uintptr_t p1, p2; -} __attribute__((__aligned__)) _Unwind_Exception; +#if LJ_TARGET_X86 + void *cf = (char *)f - CFRAME_OFS_SEH; +#else + void *cf = f; +#endif + lua_State *L = cframe_L(cf); + int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ? + LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN; + if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */ + /* Unwind internal frames. */ + err_unwind(L, cf, errcode); + } else { + void *cf2 = err_unwind(L, cf, 0); + if (cf2) { /* We catch it, so start unwinding the upper frames. */ + if (rec->ExceptionCode == LJ_MSVC_EXCODE || + rec->ExceptionCode == LJ_GCC_EXCODE) { +#if !LJ_TARGET_CYGWIN + __DestructExceptionObject(rec, 1); +#endif + setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); + } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) { + /* Don't catch access violations etc. */ + return 1; /* ExceptionContinueSearch */ + } +#if LJ_TARGET_X86 + UNUSED(ctx); + UNUSED(dispatch); + /* Call all handlers for all lower C frames (including ourselves) again + ** with EH_UNWINDING set. Then call the specified function, passing cf + ** and errcode. + */ + lj_vm_rtlunwind(cf, (void *)rec, + (cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? + (void *)lj_vm_unwind_ff : (void *)lj_vm_unwind_c, errcode); + /* lj_vm_rtlunwind does not return. */ +#else + /* Unwind the stack and call all handlers for all lower C frames + ** (including ourselves) again with EH_UNWINDING set. Then set + ** stack pointer = cf, result = errcode and jump to the specified target. + */ + RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? + lj_vm_unwind_ff_eh : + lj_vm_unwind_c_eh), + rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable); + /* RtlUnwindEx should never return. */ +#endif + } + } + return 1; /* ExceptionContinueSearch */ +} + +#if LJ_UNWIND_JIT + +#if LJ_TARGET_X64 +#define CONTEXT_REG_PC Rip +#elif LJ_TARGET_ARM64 +#define CONTEXT_REG_PC Pc +#else +#error "NYI: Windows arch-specific unwinder for JIT-compiled code" +#endif + +/* Windows unwinder for JIT-compiled code. */ +static void err_unwind_win_jit(global_State *g, int errcode) +{ + CONTEXT ctx; + UNWIND_HISTORY_TABLE hist; + + memset(&hist, 0, sizeof(hist)); + RtlCaptureContext(&ctx); + while (1) { + uintptr_t frame, base, addr = ctx.CONTEXT_REG_PC; + void *hdata; + PRUNTIME_FUNCTION func = RtlLookupFunctionEntry(addr, &base, &hist); + if (!func) { /* Found frame without .pdata: must be JIT-compiled code. */ + ExitNo exitno; + uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno); + if (stub) { /* Jump to side exit to unwind the trace. */ + ctx.CONTEXT_REG_PC = stub; + G2J(g)->exitcode = errcode; + RtlRestoreContext(&ctx, NULL); /* Does not return. */ + } + break; + } + RtlVirtualUnwind(UNW_FLAG_NHANDLER, base, addr, func, + &ctx, &hdata, &frame, NULL); + if (!addr) break; + } + /* Unwinding failed, if we end up here. */ +} +#endif + +/* Raise Windows exception. */ +static void err_raise_ext(global_State *g, int errcode) +{ +#if LJ_UNWIND_JIT + if (tvref(g->jit_base)) { + err_unwind_win_jit(g, errcode); + return; /* Unwinding failed. */ + } +#elif LJ_HASJIT + /* Cannot catch on-trace errors for Windows/x86 SEH. Unwind to interpreter. */ + setmref(g->jit_base, NULL); +#endif + UNUSED(g); + RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL); +} + +#elif !LJ_NO_UNWIND && (defined(__GNUC__) || defined(__clang__)) + +/* +** We have to use our own definitions instead of the mandatory (!) unwind.h, +** since various OS, distros and compilers mess up the header installation. +*/ typedef struct _Unwind_Context _Unwind_Context; #define _URC_OK 0 +#define _URC_FATAL_PHASE2_ERROR 2 #define _URC_FATAL_PHASE1_ERROR 3 #define _URC_HANDLER_FOUND 6 #define _URC_INSTALL_CONTEXT 7 #define _URC_CONTINUE_UNWIND 8 #define _URC_FAILURE 9 +#define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */ +#define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c)) +#define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff) +#define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff)) + #if !LJ_TARGET_ARM +typedef struct _Unwind_Exception +{ + uint64_t exclass; + void (*excleanup)(int, struct _Unwind_Exception *); + uintptr_t p1, p2; +} __attribute__((__aligned__)) _Unwind_Exception; +#define UNWIND_EXCEPTION_TYPE _Unwind_Exception + extern uintptr_t _Unwind_GetCFA(_Unwind_Context *); extern void _Unwind_SetGR(_Unwind_Context *, int, uintptr_t); +extern uintptr_t _Unwind_GetIP(_Unwind_Context *); extern void _Unwind_SetIP(_Unwind_Context *, uintptr_t); extern void _Unwind_DeleteException(_Unwind_Exception *); extern int _Unwind_RaiseException(_Unwind_Exception *); @@ -223,11 +410,6 @@ extern int _Unwind_RaiseException(_Unwind_Exception *); #define _UA_HANDLER_FRAME 4 #define _UA_FORCE_UNWIND 8 -#define LJ_UEXCLASS 0x4c55414a49543200ULL /* LUAJIT2\0 */ -#define LJ_UEXCLASS_MAKE(c) (LJ_UEXCLASS | (uint64_t)(c)) -#define LJ_UEXCLASS_CHECK(cl) (((cl) ^ LJ_UEXCLASS) <= 0xff) -#define LJ_UEXCLASS_ERRCODE(cl) ((int)((cl) & 0xff)) - /* DWARF2 personality handler referenced from interpreter .eh_frame. */ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx) @@ -236,7 +418,6 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, lua_State *L; if (version != 1) return _URC_FATAL_PHASE1_ERROR; - UNUSED(uexclass); cf = (void *)_Unwind_GetCFA(ctx); L = cframe_L(cf); if ((actions & _UA_SEARCH_PHASE)) { @@ -284,27 +465,162 @@ LJ_FUNCA int lj_err_unwind_dwarf(int version, int actions, ** it on non-x64 because the interpreter restores all callee-saved regs. */ lj_err_throw(L, errcode); +#if LJ_TARGET_X64 +#error "Broken build system -- only use the provided Makefiles!" +#endif #endif } return _URC_CONTINUE_UNWIND; } -#if LJ_UNWIND_EXT -static __thread _Unwind_Exception static_uex; +#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT) +struct dwarf_eh_bases { void *tbase, *dbase, *func; }; +extern const void *_Unwind_Find_FDE(void *pc, struct dwarf_eh_bases *bases); -/* Raise DWARF2 exception. */ -static void err_raise_ext(int errcode) +/* Verify that external error handling actually has a chance to work. */ +void lj_err_verify(void) { - static_uex.exclass = LJ_UEXCLASS_MAKE(errcode); - static_uex.excleanup = NULL; - _Unwind_RaiseException(&static_uex); +#if !LJ_TARGET_OSX + /* Check disabled on MacOS due to brilliant software engineering at Apple. */ + struct dwarf_eh_bases ehb; + lj_assertX(_Unwind_Find_FDE((void *)lj_err_throw, &ehb), "broken build: external frame unwinding enabled, but missing -funwind-tables"); +#endif + /* Check disabled, because of broken Fedora/ARM64. See #722. + lj_assertX(_Unwind_Find_FDE((void *)_Unwind_RaiseException, &ehb), "broken build: external frame unwinding enabled, but system libraries have no unwind tables"); + */ } #endif +#if LJ_UNWIND_JIT +/* DWARF2 personality handler for JIT-compiled code. */ +static int err_unwind_jit(int version, int actions, + uint64_t uexclass, _Unwind_Exception *uex, _Unwind_Context *ctx) +{ + /* NYI: FFI C++ exception interoperability. */ + if (version != 1 || !LJ_UEXCLASS_CHECK(uexclass)) + return _URC_FATAL_PHASE1_ERROR; + if ((actions & _UA_SEARCH_PHASE)) { + return _URC_HANDLER_FOUND; + } + if ((actions & _UA_CLEANUP_PHASE)) { + global_State *g = *(global_State **)(uex+1); + ExitNo exitno; + uintptr_t addr = _Unwind_GetIP(ctx); /* Return address _after_ call. */ + uintptr_t stub = lj_trace_unwind(G2J(g), addr - sizeof(MCode), &exitno); + lj_assertG(tvref(g->jit_base), "unexpected throw across mcode frame"); + if (stub) { /* Jump to side exit to unwind the trace. */ + G2J(g)->exitcode = LJ_UEXCLASS_ERRCODE(uexclass); +#ifdef LJ_TARGET_MIPS + _Unwind_SetGR(ctx, 4, stub); + _Unwind_SetGR(ctx, 5, exitno); + _Unwind_SetIP(ctx, (uintptr_t)(void *)lj_vm_unwind_stub); +#else + _Unwind_SetIP(ctx, stub); +#endif + return _URC_INSTALL_CONTEXT; + } + return _URC_FATAL_PHASE2_ERROR; + } + return _URC_FATAL_PHASE1_ERROR; +} + +/* DWARF2 template frame info for JIT-compiled code. +** +** After copying the template to the start of the mcode segment, +** the frame handler function and the code size is patched. +** The frame handler always installs a new context to jump to the exit, +** so don't bother to add any unwind opcodes. +*/ +static const uint8_t err_frame_jit_template[] = { +#if LJ_BE + 0,0,0, +#endif + LJ_64 ? 0x1c : 0x14, /* CIE length. */ +#if LJ_LE + 0,0,0, +#endif + 0,0,0,0, 1, 'z','P','R',0, /* CIE mark, CIE version, augmentation. */ + 1, LJ_64 ? 0x78 : 0x7c, LJ_TARGET_EHRAREG, /* Code/data align, RA. */ +#if LJ_64 + 10, 0, 0,0,0,0,0,0,0,0, 0x1b, /* Aug. data ABS handler, PCREL|SDATA4 code. */ + 0,0,0,0,0, /* Alignment. */ +#else + 6, 0, 0,0,0,0, 0x1b, /* Aug. data ABS handler, PCREL|SDATA4 code. */ + 0, /* Alignment. */ +#endif +#if LJ_BE + 0,0,0, +#endif + LJ_64 ? 0x14 : 0x10, /* FDE length. */ + 0,0,0, + LJ_64 ? 0x24 : 0x1c, /* CIE offset. */ + 0,0,0, + LJ_64 ? 0x14 : 0x10, /* Code offset. After Final FDE. */ +#if LJ_LE + 0,0,0, +#endif + 0,0,0,0, 0, 0,0,0, /* Code size, augmentation length, alignment. */ +#if LJ_64 + 0,0,0,0, /* Alignment. */ +#endif + 0,0,0,0 /* Final FDE. */ +}; + +#define ERR_FRAME_JIT_OFS_HANDLER 0x12 +#define ERR_FRAME_JIT_OFS_FDE (LJ_64 ? 0x20 : 0x18) +#define ERR_FRAME_JIT_OFS_CODE_SIZE (LJ_64 ? 0x2c : 0x24) +#if LJ_TARGET_OSX +#define ERR_FRAME_JIT_OFS_REGISTER ERR_FRAME_JIT_OFS_FDE #else +#define ERR_FRAME_JIT_OFS_REGISTER 0 +#endif + +extern void __register_frame(const void *); +extern void __deregister_frame(const void *); + +uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info) +{ + void **handler; + memcpy(info, err_frame_jit_template, sizeof(err_frame_jit_template)); + handler = (void *)err_unwind_jit; + memcpy(info + ERR_FRAME_JIT_OFS_HANDLER, &handler, sizeof(handler)); + *(uint32_t *)(info + ERR_FRAME_JIT_OFS_CODE_SIZE) = + (uint32_t)(sz - sizeof(err_frame_jit_template) - (info - (uint8_t *)base)); + __register_frame(info + ERR_FRAME_JIT_OFS_REGISTER); +#ifdef LUA_USE_ASSERT + { + struct dwarf_eh_bases ehb; + lj_assertX(_Unwind_Find_FDE(info + sizeof(err_frame_jit_template)+1, &ehb), + "bad JIT unwind table registration"); + } +#endif + return info + sizeof(err_frame_jit_template); +} + +void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info) +{ + UNUSED(base); UNUSED(sz); + __deregister_frame(info + ERR_FRAME_JIT_OFS_REGISTER); +} +#endif + +#else /* LJ_TARGET_ARM */ + +#define _US_VIRTUAL_UNWIND_FRAME 0 +#define _US_UNWIND_FRAME_STARTING 1 +#define _US_ACTION_MASK 3 +#define _US_FORCE_UNWIND 8 + +typedef struct _Unwind_Control_Block _Unwind_Control_Block; +#define UNWIND_EXCEPTION_TYPE _Unwind_Control_Block + +struct _Unwind_Control_Block { + uint64_t exclass; + uint32_t misc[20]; +}; -extern void _Unwind_DeleteException(void *); -extern int __gnu_unwind_frame (void *, _Unwind_Context *); +extern int _Unwind_RaiseException(_Unwind_Control_Block *); +extern int __gnu_unwind_frame(_Unwind_Control_Block *, _Unwind_Context *); extern int _Unwind_VRS_Set(_Unwind_Context *, int, uint32_t, int, void *); extern int _Unwind_VRS_Get(_Unwind_Context *, int, uint32_t, int, void *); @@ -320,126 +636,98 @@ static inline void _Unwind_SetGR(_Unwind_Context *ctx, int r, uint32_t v) _Unwind_VRS_Set(ctx, 0, r, 0, &v); } -#define _US_VIRTUAL_UNWIND_FRAME 0 -#define _US_UNWIND_FRAME_STARTING 1 -#define _US_ACTION_MASK 3 -#define _US_FORCE_UNWIND 8 +extern void lj_vm_unwind_ext(void); /* ARM unwinder personality handler referenced from interpreter .ARM.extab. */ -LJ_FUNCA int lj_err_unwind_arm(int state, void *ucb, _Unwind_Context *ctx) +LJ_FUNCA int lj_err_unwind_arm(int state, _Unwind_Control_Block *ucb, + _Unwind_Context *ctx) { void *cf = (void *)_Unwind_GetGR(ctx, 13); lua_State *L = cframe_L(cf); - if ((state & _US_ACTION_MASK) == _US_VIRTUAL_UNWIND_FRAME) { - setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); + int errcode; + + switch ((state & _US_ACTION_MASK)) { + case _US_VIRTUAL_UNWIND_FRAME: + if ((state & _US_FORCE_UNWIND)) break; return _URC_HANDLER_FOUND; - } - if ((state&(_US_ACTION_MASK|_US_FORCE_UNWIND)) == _US_UNWIND_FRAME_STARTING) { - _Unwind_DeleteException(ucb); - _Unwind_SetGR(ctx, 15, (uint32_t)(void *)lj_err_throw); - _Unwind_SetGR(ctx, 0, (uint32_t)L); - _Unwind_SetGR(ctx, 1, (uint32_t)LUA_ERRRUN); + case _US_UNWIND_FRAME_STARTING: + if (LJ_UEXCLASS_CHECK(ucb->exclass)) { + errcode = LJ_UEXCLASS_ERRCODE(ucb->exclass); + } else { + errcode = LUA_ERRRUN; + setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); + } + cf = err_unwind(L, cf, errcode); + if ((state & _US_FORCE_UNWIND) || cf == NULL) break; + _Unwind_SetGR(ctx, 15, (uint32_t)lj_vm_unwind_ext); + _Unwind_SetGR(ctx, 0, (uint32_t)ucb); + _Unwind_SetGR(ctx, 1, (uint32_t)errcode); + _Unwind_SetGR(ctx, 2, cframe_unwind_ff(cf) ? + (uint32_t)lj_vm_unwind_ff_eh : + (uint32_t)lj_vm_unwind_c_eh); return _URC_INSTALL_CONTEXT; + default: + return _URC_FAILURE; } if (__gnu_unwind_frame(ucb, ctx) != _URC_OK) return _URC_FAILURE; +#ifdef LUA_USE_ASSERT + /* We should never get here unless this is a forced unwind aka backtrace. */ + if (_Unwind_GetGR(ctx, 0) == 0xff33aa77) { + _Unwind_SetGR(ctx, 0, 0xff33aa88); + } +#endif return _URC_CONTINUE_UNWIND; } -#endif +#if LJ_UNWIND_EXT && defined(LUA_USE_ASSERT) +typedef int (*_Unwind_Trace_Fn)(_Unwind_Context *, void *); +extern int _Unwind_Backtrace(_Unwind_Trace_Fn, void *); + +static int err_verify_bt(_Unwind_Context *ctx, int *got) +{ + if (_Unwind_GetGR(ctx, 0) == 0xff33aa88) { *got = 2; } + else if (*got == 0) { *got = 1; _Unwind_SetGR(ctx, 0, 0xff33aa77); } + return _URC_OK; +} -#elif LJ_TARGET_X64 && LJ_ABI_WIN +/* Verify that external error handling actually has a chance to work. */ +void lj_err_verify(void) +{ + int got = 0; + _Unwind_Backtrace((_Unwind_Trace_Fn)err_verify_bt, &got); + lj_assertX(got == 2, "broken build: external frame unwinding enabled, but missing -funwind-tables"); +} +#endif /* -** Someone in Redmond owes me several days of my life. A lot of this is -** undocumented or just plain wrong on MSDN. Some of it can be gathered -** from 3rd party docs or must be found by trial-and-error. They really -** don't want you to write your own language-specific exception handler -** or to interact gracefully with MSVC. :-( +** Note: LJ_UNWIND_JIT is not implemented for 32 bit ARM. ** -** Apparently MSVC doesn't call C++ destructors for foreign exceptions -** unless you compile your C++ code with /EHa. Unfortunately this means -** catch (...) also catches things like access violations. The use of -** _set_se_translator doesn't really help, because it requires /EHa, too. +** The quirky ARM unwind API doesn't have __register_frame(). +** A potential workaround might involve _Unwind_Backtrace. +** But most 32 bit ARM targets don't qualify for LJ_UNWIND_EXT, anyway, +** since they are built without unwind tables by default. */ -#define WIN32_LEAN_AND_MEAN -#include <windows.h> +#endif /* LJ_TARGET_ARM */ -/* Taken from: http://www.nynaeve.net/?p=99 */ -typedef struct UndocumentedDispatcherContext { - ULONG64 ControlPc; - ULONG64 ImageBase; - PRUNTIME_FUNCTION FunctionEntry; - ULONG64 EstablisherFrame; - ULONG64 TargetIp; - PCONTEXT ContextRecord; - void (*LanguageHandler)(void); - PVOID HandlerData; - PUNWIND_HISTORY_TABLE HistoryTable; - ULONG ScopeIndex; - ULONG Fill0; -} UndocumentedDispatcherContext; - -/* Another wild guess. */ -extern void __DestructExceptionObject(EXCEPTION_RECORD *rec, int nothrow); - -#ifdef MINGW_SDK_INIT -/* Workaround for broken MinGW64 declaration. */ -VOID RtlUnwindEx_FIXED(PVOID,PVOID,PVOID,PVOID,PVOID,PVOID) asm("RtlUnwindEx"); -#define RtlUnwindEx RtlUnwindEx_FIXED -#endif - -#define LJ_MSVC_EXCODE ((DWORD)0xe06d7363) -#define LJ_GCC_EXCODE ((DWORD)0x20474343) -#define LJ_EXCODE ((DWORD)0xe24c4a00) -#define LJ_EXCODE_MAKE(c) (LJ_EXCODE | (DWORD)(c)) -#define LJ_EXCODE_CHECK(cl) (((cl) ^ LJ_EXCODE) <= 0xff) -#define LJ_EXCODE_ERRCODE(cl) ((int)((cl) & 0xff)) +#if LJ_UNWIND_EXT +static __thread struct { + UNWIND_EXCEPTION_TYPE ex; + global_State *g; +} static_uex; -/* Win64 exception handler for interpreter frame. */ -LJ_FUNCA EXCEPTION_DISPOSITION lj_err_unwind_win64(EXCEPTION_RECORD *rec, - void *cf, CONTEXT *ctx, UndocumentedDispatcherContext *dispatch) +/* Raise external exception. */ +static void err_raise_ext(global_State *g, int errcode) { - lua_State *L = cframe_L(cf); - int errcode = LJ_EXCODE_CHECK(rec->ExceptionCode) ? - LJ_EXCODE_ERRCODE(rec->ExceptionCode) : LUA_ERRRUN; - if ((rec->ExceptionFlags & 6)) { /* EH_UNWINDING|EH_EXIT_UNWIND */ - /* Unwind internal frames. */ - err_unwind(L, cf, errcode); - } else { - void *cf2 = err_unwind(L, cf, 0); - if (cf2) { /* We catch it, so start unwinding the upper frames. */ - if (rec->ExceptionCode == LJ_MSVC_EXCODE || - rec->ExceptionCode == LJ_GCC_EXCODE) { -#if LJ_TARGET_WINDOWS - __DestructExceptionObject(rec, 1); -#endif - setstrV(L, L->top++, lj_err_str(L, LJ_ERR_ERRCPP)); - } else if (!LJ_EXCODE_CHECK(rec->ExceptionCode)) { - /* Don't catch access violations etc. */ - return ExceptionContinueSearch; - } - /* Unwind the stack and call all handlers for all lower C frames - ** (including ourselves) again with EH_UNWINDING set. Then set - ** rsp = cf, rax = errcode and jump to the specified target. - */ - RtlUnwindEx(cf, (void *)((cframe_unwind_ff(cf2) && errcode != LUA_YIELD) ? - lj_vm_unwind_ff_eh : - lj_vm_unwind_c_eh), - rec, (void *)(uintptr_t)errcode, ctx, dispatch->HistoryTable); - /* RtlUnwindEx should never return. */ - } - } - return ExceptionContinueSearch; + memset(&static_uex, 0, sizeof(static_uex)); + static_uex.ex.exclass = LJ_UEXCLASS_MAKE(errcode); + static_uex.g = g; + _Unwind_RaiseException(&static_uex.ex); } -/* Raise Windows exception. */ -static void err_raise_ext(int errcode) -{ - RaiseException(LJ_EXCODE_MAKE(errcode), 1 /* EH_NONCONTINUABLE */, 0, NULL); -} +#endif #endif @@ -450,22 +738,23 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode) { global_State *g = G(L); lj_trace_abort(g); - setgcrefnull(g->jit_L); - L->status = 0; + L->status = LUA_OK; #if LJ_UNWIND_EXT - err_raise_ext(errcode); + err_raise_ext(g, errcode); /* ** A return from this function signals a corrupt C stack that cannot be ** unwound. We have no choice but to call the panic function and exit. ** ** Usually this is caused by a C function without unwind information. - ** This should never happen on x64, but may happen if you've manually - ** enabled LUAJIT_UNWIND_EXTERNAL and forgot to recompile *every* - ** non-C++ file with -funwind-tables. + ** This may happen if you've manually enabled LUAJIT_UNWIND_EXTERNAL + ** and forgot to recompile *every* non-C++ file with -funwind-tables. */ if (G(L)->panic) G(L)->panic(L); #else +#if LJ_HASJIT + setmref(g->jit_base, NULL); +#endif { void *cf = err_unwind(L, NULL, errcode); if (cframe_unwind_ff(cf)) @@ -496,7 +785,7 @@ LJ_NOINLINE void lj_err_mem(lua_State *L) /* Find error function for runtime errors. Requires an extra stack traversal. */ static ptrdiff_t finderrfunc(lua_State *L) { - cTValue *frame = L->base-1, *bot = tvref(L->stack); + cTValue *frame = L->base-1, *bot = tvref(L->stack)+LJ_FR2; void *cf = L->cframe; while (frame > bot && cf) { while (cframe_nres(cframe_raw(cf)) < 0) { /* cframe without frame? */ @@ -520,10 +809,8 @@ static ptrdiff_t finderrfunc(lua_State *L) frame = frame_prevd(frame); break; case FRAME_CONT: -#if LJ_HASFFI - if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK) + if (frame_iscont_fficb(frame)) cf = cframe_prev(cf); -#endif frame = frame_prevd(frame); break; case FRAME_CP: @@ -535,11 +822,11 @@ static ptrdiff_t finderrfunc(lua_State *L) break; case FRAME_PCALL: case FRAME_PCALLH: - if (frame_ftsz(frame) >= (ptrdiff_t)(2*sizeof(TValue))) /* xpcall? */ - return savestack(L, frame-1); /* Point to xpcall's errorfunc. */ + if (frame_func(frame_prevd(frame))->c.ffid == FF_xpcall) + return savestack(L, frame_prevd(frame)+1); /* xpcall's errorfunc. */ return 0; default: - lua_assert(0); + lj_assertL(0, "bad frame type"); return 0; } } @@ -549,7 +836,7 @@ static ptrdiff_t finderrfunc(lua_State *L) /* Runtime error. */ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L) { - ptrdiff_t ef = finderrfunc(L); + ptrdiff_t ef = (LJ_HASJIT && tvref(G(L)->jit_base)) ? 0 : finderrfunc(L); if (ef) { TValue *errfunc = restorestack(L, ef); TValue *top = L->top; @@ -559,14 +846,25 @@ LJ_NOINLINE void LJ_FASTCALL lj_err_run(lua_State *L) lj_err_throw(L, LUA_ERRERR); } L->status = LUA_ERRERR; - copyTV(L, top, top-1); + copyTV(L, top+LJ_FR2, top-1); copyTV(L, top-1, errfunc); + if (LJ_FR2) setnilV(top++); L->top = top+1; lj_vm_call(L, top, 1+1); /* Stack: |errfunc|msg| -> |msg| */ } lj_err_throw(L, LUA_ERRRUN); } +#if LJ_HASJIT +LJ_NOINLINE void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode) +{ + if (errcode == LUA_ERRRUN) + lj_err_run(L); + else + lj_err_throw(L, errcode); +} +#endif + /* Formatted runtime error message. */ LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...) { @@ -574,7 +872,7 @@ LJ_NORET LJ_NOINLINE static void err_msgv(lua_State *L, ErrMsg em, ...) va_list argp; va_start(argp, em); if (curr_funcisL(L)) L->top = curr_topL(L); - msg = lj_str_pushvf(L, err2msg(em), argp); + msg = lj_strfmt_pushvf(L, err2msg(em), argp); va_end(argp); lj_debug_addloc(L, msg, L->base-1, NULL); lj_err_run(L); @@ -592,11 +890,11 @@ LJ_NOINLINE void lj_err_lex(lua_State *L, GCstr *src, const char *tok, { char buff[LUA_IDSIZE]; const char *msg; - lj_debug_shortname(buff, src); - msg = lj_str_pushvf(L, err2msg(em), argp); - msg = lj_str_pushf(L, "%s:%d: %s", buff, line, msg); + lj_debug_shortname(buff, src, line); + msg = lj_strfmt_pushvf(L, err2msg(em), argp); + msg = lj_strfmt_pushf(L, "%s:%d: %s", buff, line, msg); if (tok) - lj_str_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok); + lj_strfmt_pushf(L, err2msg(LJ_ERR_XNEAR), msg, tok); lj_err_throw(L, LUA_ERRSYNTAX); } @@ -635,8 +933,9 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o) const BCIns *pc = cframe_Lpc(L); if (((ptrdiff_t)pc & FRAME_TYPE) != FRAME_LUA) { const char *tname = lj_typename(o); + setframe_gc(o, obj2gco(L), LJ_TTHREAD); + if (LJ_FR2) o++; setframe_pc(o, pc); - setframe_gc(o, obj2gco(L)); L->top = L->base = o+1; err_msgv(L, LJ_ERR_BADCALL, tname); } @@ -646,28 +945,27 @@ LJ_NOINLINE void lj_err_optype_call(lua_State *L, TValue *o) /* Error in context of caller. */ LJ_NOINLINE void lj_err_callermsg(lua_State *L, const char *msg) { - TValue *frame = L->base-1; - TValue *pframe = NULL; - if (frame_islua(frame)) { - pframe = frame_prevl(frame); - } else if (frame_iscont(frame)) { + TValue *frame = NULL, *pframe = NULL; + if (!(LJ_HASJIT && tvref(G(L)->jit_base))) { + frame = L->base-1; + if (frame_islua(frame)) { + pframe = frame_prevl(frame); + } else if (frame_iscont(frame)) { + if (frame_iscont_fficb(frame)) { + pframe = frame; + frame = NULL; + } else { + pframe = frame_prevd(frame); #if LJ_HASFFI - if ((frame-1)->u32.lo == LJ_CONT_FFI_CALLBACK) { - pframe = frame; - frame = NULL; - } else + /* Remove frame for FFI metamethods. */ + if (frame_func(frame)->c.ffid >= FF_ffi_meta___index && + frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) { + L->base = pframe+1; + L->top = frame; + setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame)); + } #endif - { - pframe = frame_prevd(frame); -#if LJ_HASFFI - /* Remove frame for FFI metamethods. */ - if (frame_func(frame)->c.ffid >= FF_ffi_meta___index && - frame_func(frame)->c.ffid <= FF_ffi_meta___tostring) { - L->base = pframe+1; - L->top = frame; - setcframe_pc(cframe_raw(L->cframe), frame_contpc(frame)); } -#endif } } lj_debug_addloc(L, msg, pframe, frame); @@ -680,7 +978,7 @@ LJ_NOINLINE void lj_err_callerv(lua_State *L, ErrMsg em, ...) const char *msg; va_list argp; va_start(argp, em); - msg = lj_str_pushvf(L, err2msg(em), argp); + msg = lj_strfmt_pushvf(L, err2msg(em), argp); va_end(argp); lj_err_callermsg(L, msg); } @@ -700,9 +998,9 @@ LJ_NORET LJ_NOINLINE static void err_argmsg(lua_State *L, int narg, if (narg < 0 && narg > LUA_REGISTRYINDEX) narg = (int)(L->top - L->base) + narg + 1; if (ftype && ftype[3] == 'h' && --narg == 0) /* Check for "method". */ - msg = lj_str_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg); + msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADSELF), fname, msg); else - msg = lj_str_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg); + msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADARG), narg, fname, msg); lj_err_callermsg(L, msg); } @@ -712,7 +1010,7 @@ LJ_NOINLINE void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...) const char *msg; va_list argp; va_start(argp, em); - msg = lj_str_pushvf(L, err2msg(em), argp); + msg = lj_strfmt_pushvf(L, err2msg(em), argp); va_end(argp); err_argmsg(L, narg, msg); } @@ -742,7 +1040,7 @@ LJ_NOINLINE void lj_err_argtype(lua_State *L, int narg, const char *xname) TValue *o = narg < 0 ? L->top + narg : L->base + narg-1; tname = o < L->top ? lj_typename(o) : lj_obj_typename[0]; } - msg = lj_str_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname); + msg = lj_strfmt_pushf(L, err2msg(LJ_ERR_BADTYPE), xname, tname); err_argmsg(L, narg, msg); } @@ -792,7 +1090,7 @@ LUALIB_API int luaL_error(lua_State *L, const char *fmt, ...) const char *msg; va_list argp; va_start(argp, fmt); - msg = lj_str_pushvf(L, fmt, argp); + msg = lj_strfmt_pushvf(L, fmt, argp); va_end(argp); lj_err_callermsg(L, msg); return 0; /* unreachable */ diff --git a/src/lj_err.h b/src/lj_err.h index 59253b58..bd4de9ae 100644 --- a/src/lj_err.h +++ b/src/lj_err.h @@ -23,7 +23,10 @@ LJ_DATA const char *lj_err_allmsg; LJ_FUNC GCstr *lj_err_str(lua_State *L, ErrMsg em); LJ_FUNCA_NORET void LJ_FASTCALL lj_err_throw(lua_State *L, int errcode); LJ_FUNC_NORET void lj_err_mem(lua_State *L); -LJ_FUNCA_NORET void LJ_FASTCALL lj_err_run(lua_State *L); +LJ_FUNC_NORET void LJ_FASTCALL lj_err_run(lua_State *L); +#if LJ_HASJIT +LJ_FUNCA_NORET void LJ_FASTCALL lj_err_trace(lua_State *L, int errcode); +#endif LJ_FUNC_NORET void lj_err_msg(lua_State *L, ErrMsg em); LJ_FUNC_NORET void lj_err_lex(lua_State *L, GCstr *src, const char *tok, BCLine line, ErrMsg em, va_list argp); @@ -38,4 +41,18 @@ LJ_FUNC_NORET void lj_err_argv(lua_State *L, int narg, ErrMsg em, ...); LJ_FUNC_NORET void lj_err_argtype(lua_State *L, int narg, const char *xname); LJ_FUNC_NORET void lj_err_argt(lua_State *L, int narg, int tt); +#if LJ_UNWIND_JIT && !LJ_ABI_WIN +LJ_FUNC uint8_t *lj_err_register_mcode(void *base, size_t sz, uint8_t *info); +LJ_FUNC void lj_err_deregister_mcode(void *base, size_t sz, uint8_t *info); +#else +#define lj_err_register_mcode(base, sz, info) (info) +#define lj_err_deregister_mcode(base, sz, info) UNUSED(base) +#endif + +#if LJ_UNWIND_EXT && !LJ_ABI_WIN && defined(LUA_USE_ASSERT) +LJ_FUNC void lj_err_verify(void); +#else +#define lj_err_verify() ((void)0) +#endif + #endif diff --git a/src/lj_errmsg.h b/src/lj_errmsg.h index 2c26a4f1..2e5c776a 100644 --- a/src/lj_errmsg.h +++ b/src/lj_errmsg.h @@ -67,6 +67,7 @@ ERRDEF(PROTMT, "cannot change a protected metatable") ERRDEF(UNPACK, "too many results to unpack") ERRDEF(RDRSTR, "reader function must return a string") ERRDEF(PRTOSTR, LUA_QL("tostring") " must return a string to " LUA_QL("print")) +ERRDEF(NUMRNG, "number out of range") ERRDEF(IDXRNG, "index out of range") ERRDEF(BASERNG, "base out of range") ERRDEF(LVLRNG, "level out of range") @@ -96,18 +97,12 @@ ERRDEF(STRPATX, "pattern too complex") ERRDEF(STRCAPI, "invalid capture index") ERRDEF(STRCAPN, "too many captures") ERRDEF(STRCAPU, "unfinished capture") -ERRDEF(STRFMTO, "invalid option " LUA_QL("%%%c") " to " LUA_QL("format")) -ERRDEF(STRFMTR, "invalid format (repeated flags)") -ERRDEF(STRFMTW, "invalid format (width or precision too long)") +ERRDEF(STRFMT, "invalid option " LUA_QS " to " LUA_QL("format")) ERRDEF(STRGSRV, "invalid replacement value (a %s)") ERRDEF(BADMODN, "name conflict for module " LUA_QS) #if LJ_HASJIT ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?") -#if LJ_TARGET_X86ORX64 -ERRDEF(NOJIT, "JIT compiler disabled, CPU does not support SSE2") -#else ERRDEF(NOJIT, "JIT compiler disabled") -#endif #elif defined(LJ_ARCH_NOJIT) ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)") #else @@ -118,7 +113,6 @@ ERRDEF(JITOPT, "unknown or malformed optimization flag " LUA_QS) /* Lexer/parser errors. */ ERRDEF(XMODE, "attempt to load chunk with wrong mode") ERRDEF(XNEAR, "%s near " LUA_QS) -ERRDEF(XELEM, "lexical element too long") ERRDEF(XLINES, "chunk has too many lines") ERRDEF(XLEVELS, "chunk has too many syntax levels") ERRDEF(XNUMBER, "malformed number") @@ -186,6 +180,19 @@ ERRDEF(FFI_NYIPACKBIT, "NYI: packed bit fields") ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)") #endif +#if LJ_HASBUFFER +/* String buffer errors. */ +ERRDEF(BUFFER_SELF, "cannot put buffer into itself") +ERRDEF(BUFFER_BADOPT, "bad options table") +ERRDEF(BUFFER_BADENC, "cannot serialize " LUA_QS) +ERRDEF(BUFFER_BADDEC, "cannot deserialize tag 0x%02x") +ERRDEF(BUFFER_BADDICTX, "cannot deserialize dictionary index %d") +ERRDEF(BUFFER_DEPTH, "too deep to serialize") +ERRDEF(BUFFER_DUPKEY, "duplicate table key") +ERRDEF(BUFFER_EOB, "unexpected end of buffer") +ERRDEF(BUFFER_LEFTOV, "left-over data in buffer") +#endif + #undef ERRDEF /* Detecting unused error messages: diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c index f833bc16..022de1aa 100644 --- a/src/lj_ffrecord.c +++ b/src/lj_ffrecord.c @@ -11,6 +11,7 @@ #if LJ_HASJIT #include "lj_err.h" +#include "lj_buf.h" #include "lj_str.h" #include "lj_tab.h" #include "lj_frame.h" @@ -27,6 +28,8 @@ #include "lj_dispatch.h" #include "lj_vm.h" #include "lj_strscan.h" +#include "lj_strfmt.h" +#include "lj_serialize.h" /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) @@ -79,10 +82,7 @@ static GCstr *argv2str(jit_State *J, TValue *o) GCstr *s; if (!tvisnumber(o)) lj_trace_err(J, LJ_TRERR_BADTYPE); - if (tvisint(o)) - s = lj_str_fromint(J->L, intV(o)); - else - s = lj_str_fromnum(J->L, &o->n); + s = lj_strfmt_number(J->L, o); setstrV(J->L, o, s); return s; } @@ -98,27 +98,102 @@ static ptrdiff_t results_wanted(jit_State *J) return -1; } -/* Throw error for unsupported variant of fast function. */ -LJ_NORET static void recff_nyiu(jit_State *J) +/* Trace stitching: add continuation below frame to start a new trace. */ +static void recff_stitch(jit_State *J) { - setfuncV(J->L, &J->errinfo, J->fn); - lj_trace_err_info(J, LJ_TRERR_NYIFFU); + ASMFunction cont = lj_cont_stitch; + lua_State *L = J->L; + TValue *base = L->base; + BCReg nslot = J->maxslot + 1 + LJ_FR2; + TValue *nframe = base + 1 + LJ_FR2; + const BCIns *pc = frame_pc(base-1); + TValue *pframe = frame_prevl(base-1); + + /* Check for this now. Throwing in lj_record_stop messes up the stack. */ + if (J->cur.nsnap >= (MSize)J->param[JIT_P_maxsnap]) + lj_trace_err(J, LJ_TRERR_SNAPOV); + + /* Move func + args up in Lua stack and insert continuation. */ + memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot); + setframe_ftsz(nframe, ((char *)nframe - (char *)pframe) + FRAME_CONT); + setcont(base-LJ_FR2, cont); + setframe_pc(base, pc); + setnilV(base-1-LJ_FR2); /* Incorrect, but rec_check_slots() won't run anymore. */ + L->base += 2 + LJ_FR2; + L->top += 2 + LJ_FR2; + + /* Ditto for the IR. */ + memmove(&J->base[1], &J->base[-1-LJ_FR2], sizeof(TRef)*nslot); +#if LJ_FR2 + J->base[2] = TREF_FRAME; + J->base[-1] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); + J->base[0] = lj_ir_k64(J, IR_KNUM, u64ptr(pc)) | TREF_CONT; +#else + J->base[0] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; +#endif + J->ktrace = tref_ref((J->base[-1-LJ_FR2] = lj_ir_ktrace(J))); + J->base += 2 + LJ_FR2; + J->baseslot += 2 + LJ_FR2; + J->framedepth++; + + lj_record_stop(J, LJ_TRLINK_STITCH, 0); + + /* Undo Lua stack changes. */ + memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot); + setframe_pc(base-1, pc); + L->base -= 2 + LJ_FR2; + L->top -= 2 + LJ_FR2; } -/* Fallback handler for all fast functions that are not recorded (yet). */ +/* Fallback handler for fast functions that are not recorded (yet). */ static void LJ_FASTCALL recff_nyi(jit_State *J, RecordFFData *rd) { - setfuncV(J->L, &J->errinfo, J->fn); - lj_trace_err_info(J, LJ_TRERR_NYIFF); - UNUSED(rd); + if (J->cur.nins < (IRRef)J->param[JIT_P_minstitch] + REF_BASE) { + lj_trace_err_info(J, LJ_TRERR_TRACEUV); + } else { + /* Can only stitch from Lua call. */ + if (J->framedepth && frame_islua(J->L->base-1)) { + BCOp op = bc_op(*frame_pc(J->L->base-1)); + /* Stitched trace cannot start with *M op with variable # of args. */ + if (!(op == BC_CALLM || op == BC_CALLMT || + op == BC_RETM || op == BC_TSETM)) { + switch (J->fn->c.ffid) { + case FF_error: + case FF_debug_sethook: + case FF_jit_flush: + break; /* Don't stitch across special builtins. */ + default: + recff_stitch(J); /* Use trace stitching. */ + rd->nres = -1; + return; + } + } + } + /* Otherwise stop trace and return to interpreter. */ + lj_record_stop(J, LJ_TRLINK_RETURN, 0); + rd->nres = -1; + } } -/* C functions can have arbitrary side-effects and are not recorded (yet). */ -static void LJ_FASTCALL recff_c(jit_State *J, RecordFFData *rd) +/* Fallback handler for unsupported variants of fast functions. */ +#define recff_nyiu recff_nyi + +/* Must stop the trace for classic C functions with arbitrary side-effects. */ +#define recff_c recff_nyi + +/* Emit BUFHDR for the global temporary buffer. */ +static TRef recff_bufhdr(jit_State *J) { - setfuncV(J->L, &J->errinfo, J->fn); - lj_trace_err_info(J, LJ_TRERR_NYICF); - UNUSED(rd); + return emitir(IRT(IR_BUFHDR, IRT_PGC), + lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); +} + +/* Emit TMPREF. */ +static TRef recff_tmpref(jit_State *J, TRef tr, int mode) +{ + if (!LJ_DUALNUM && tref_isinteger(tr)) + tr = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); + return emitir(IRT(IR_TMPREF, IRT_PGC), tr, mode); } /* -- Base library fast functions ----------------------------------------- */ @@ -135,7 +210,7 @@ static void LJ_FASTCALL recff_type(jit_State *J, RecordFFData *rd) uint32_t t; if (tvisnumber(&rd->argv[0])) t = ~LJ_TNUMX; - else if (LJ_64 && tvislightud(&rd->argv[0])) + else if (LJ_64 && !LJ_GC64 && tvislightud(&rd->argv[0])) t = ~LJ_TLIGHTUD; else t = ~itype(&rd->argv[0]); @@ -167,7 +242,7 @@ static void LJ_FASTCALL recff_setmetatable(jit_State *J, RecordFFData *rd) ix.tab = tr; copyTV(J->L, &ix.tabv, &rd->argv[0]); lj_record_mm_lookup(J, &ix, MM_metatable); /* Guard for no __metatable. */ - fref = emitir(IRT(IR_FREF, IRT_P32), tr, IRFL_TAB_META); + fref = emitir(IRT(IR_FREF, IRT_PGC), tr, IRFL_TAB_META); mtref = tref_isnil(mt) ? lj_ir_knull(J, IRT_TAB) : mt; emitir(IRT(IR_FSTORE, IRT_TAB), fref, mtref); if (!tref_isnil(mt)) @@ -220,7 +295,7 @@ static void LJ_FASTCALL recff_rawlen(jit_State *J, RecordFFData *rd) if (tref_isstr(tr)) J->base[0] = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN); else if (tref_istab(tr)) - J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, tr); + J->base[0] = emitir(IRTI(IR_ALEN), tr, TREF_NIL); /* else: Interpreter will throw. */ UNUSED(rd); } @@ -233,9 +308,9 @@ int32_t lj_ffrecord_select_mode(jit_State *J, TRef tr, TValue *tv) if (strV(tv)->len == 1) { emitir(IRTG(IR_EQ, IRT_STR), tr, lj_ir_kstr(J, strV(tv))); } else { - TRef trptr = emitir(IRT(IR_STRREF, IRT_P32), tr, lj_ir_kint(J, 0)); + TRef trptr = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0)); TRef trchar = emitir(IRT(IR_XLOAD, IRT_U8), trptr, IRXLOAD_READONLY); - emitir(IRTG(IR_EQ, IRT_INT), trchar, lj_ir_kint(J, '#')); + emitir(IRTGI(IR_EQ), trchar, lj_ir_kint(J, '#')); } return 0; } else { /* select(n, ...) */ @@ -263,7 +338,8 @@ static void LJ_FASTCALL recff_select(jit_State *J, RecordFFData *rd) J->base[i] = J->base[start+i]; } /* else: Interpreter will throw. */ } else { - recff_nyiu(J); + recff_nyiu(J, rd); + return; } } /* else: Interpreter will throw. */ } @@ -274,14 +350,18 @@ static void LJ_FASTCALL recff_tonumber(jit_State *J, RecordFFData *rd) TRef base = J->base[1]; if (tr && !tref_isnil(base)) { base = lj_opt_narrow_toint(J, base); - if (!tref_isk(base) || IR(tref_ref(base))->i != 10) - recff_nyiu(J); + if (!tref_isk(base) || IR(tref_ref(base))->i != 10) { + recff_nyiu(J, rd); + return; + } } if (tref_isnumber_str(tr)) { if (tref_isstr(tr)) { TValue tmp; - if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) - recff_nyiu(J); /* Would need an inverted STRTO for this case. */ + if (!lj_strscan_num(strV(&rd->argv[0]), &tmp)) { + recff_nyiu(J, rd); /* Would need an inverted STRTO for this case. */ + return; + } tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0); } #if LJ_HASFFI @@ -313,10 +393,10 @@ static int recff_metacall(jit_State *J, RecordFFData *rd, MMS mm) int errcode; TValue argv0; /* Temporarily insert metamethod below object. */ - J->base[1] = J->base[0]; + J->base[1+LJ_FR2] = J->base[0]; J->base[0] = ix.mobj; copyTV(J->L, &argv0, &rd->argv[0]); - copyTV(J->L, &rd->argv[1], &rd->argv[0]); + copyTV(J->L, &rd->argv[1+LJ_FR2], &rd->argv[0]); copyTV(J->L, &rd->argv[0], &ix.mobjv); /* Need to protect lj_record_tailcall because it may throw. */ errcode = lj_vm_cpcall(J->L, NULL, J, recff_metacall_cp); @@ -336,13 +416,15 @@ static void LJ_FASTCALL recff_tostring(jit_State *J, RecordFFData *rd) if (tref_isstr(tr)) { /* Ignore __tostring in the string base metatable. */ /* Pass on result in J->base[0]. */ - } else if (!recff_metacall(J, rd, MM_tostring)) { + } else if (tr && !recff_metacall(J, rd, MM_tostring)) { if (tref_isnumber(tr)) { - J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); + J->base[0] = emitir(IRT(IR_TOSTR, IRT_STR), tr, + tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT); } else if (tref_ispri(tr)) { - J->base[0] = lj_ir_kstr(J, strV(&J->fn->c.upvalue[tref_type(tr)])); + J->base[0] = lj_ir_kstr(J, lj_strfmt_obj(J->L, &rd->argv[0])); } else { - recff_nyiu(J); + recff_nyiu(J, rd); + return; } } } @@ -364,15 +446,15 @@ static void LJ_FASTCALL recff_ipairs_aux(jit_State *J, RecordFFData *rd) } /* else: Interpreter will throw. */ } -static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd) +static void LJ_FASTCALL recff_xpairs(jit_State *J, RecordFFData *rd) { TRef tr = J->base[0]; if (!((LJ_52 || (LJ_HASFFI && tref_iscdata(tr))) && - recff_metacall(J, rd, MM_ipairs))) { + recff_metacall(J, rd, MM_pairs + rd->data))) { if (tref_istab(tr)) { J->base[0] = lj_ir_kfunc(J, funcV(&J->fn->c.upvalue[0])); J->base[1] = tr; - J->base[2] = lj_ir_kint(J, 0); + J->base[2] = rd->data ? lj_ir_kint(J, 0) : TREF_NIL; rd->nres = 3; } /* else: Interpreter will throw. */ } @@ -381,8 +463,13 @@ static void LJ_FASTCALL recff_ipairs(jit_State *J, RecordFFData *rd) static void LJ_FASTCALL recff_pcall(jit_State *J, RecordFFData *rd) { if (J->maxslot >= 1) { +#if LJ_FR2 + /* Shift function arguments up. */ + memmove(J->base + 1, J->base, sizeof(TRef) * J->maxslot); +#endif lj_record_call(J, 0, J->maxslot - 1); rd->nres = -1; /* Pending call. */ + J->needsnap = 1; /* Start catching on-trace errors. */ } /* else: Interpreter will throw. */ } @@ -406,6 +493,10 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd) copyTV(J->L, &argv1, &rd->argv[1]); copyTV(J->L, &rd->argv[0], &argv1); copyTV(J->L, &rd->argv[1], &argv0); +#if LJ_FR2 + /* Shift function arguments up. */ + memmove(J->base + 2, J->base + 1, sizeof(TRef) * (J->maxslot-1)); +#endif /* Need to protect lj_record_call because it may throw. */ errcode = lj_vm_cpcall(J->L, NULL, J, recff_xpcall_cp); /* Always undo Lua stack swap to avoid confusing the interpreter. */ @@ -414,15 +505,62 @@ static void LJ_FASTCALL recff_xpcall(jit_State *J, RecordFFData *rd) if (errcode) lj_err_throw(J->L, errcode); /* Propagate errors. */ rd->nres = -1; /* Pending call. */ + J->needsnap = 1; /* Start catching on-trace errors. */ } /* else: Interpreter will throw. */ } +static void LJ_FASTCALL recff_getfenv(jit_State *J, RecordFFData *rd) +{ + TRef tr = J->base[0]; + /* Only support getfenv(0) for now. */ + if (tref_isint(tr) && tref_isk(tr) && IR(tref_ref(tr))->i == 0) { + TRef trl = emitir(IRT(IR_LREF, IRT_THREAD), 0, 0); + J->base[0] = emitir(IRT(IR_FLOAD, IRT_TAB), trl, IRFL_THREAD_ENV); + return; + } + recff_nyiu(J, rd); +} + +static void LJ_FASTCALL recff_next(jit_State *J, RecordFFData *rd) +{ +#if LJ_BE + /* YAGNI: Disabled on big-endian due to issues with lj_vm_next, + ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair. + */ + recff_nyi(J, rd); +#else + TRef tab = J->base[0]; + if (tref_istab(tab)) { + RecordIndex ix; + cTValue *keyv; + ix.tab = tab; + if (tref_isnil(J->base[1])) { /* Shortcut for start of traversal. */ + ix.key = lj_ir_kint(J, 0); + keyv = niltvg(J2G(J)); + } else { + TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1); + ix.key = lj_ir_call(J, IRCALL_lj_tab_keyindex, tab, tmp); + keyv = &rd->argv[1]; + } + copyTV(J->L, &ix.tabv, &rd->argv[0]); + ix.keyv.u32.lo = lj_tab_keyindex(tabV(&ix.tabv), keyv); + /* Omit the value, if not used by the caller. */ + ix.idxchain = (J->framedepth && frame_islua(J->L->base-1) && + bc_b(frame_pc(J->L->base-1)[-1])-1 < 2); + ix.mobj = 0; /* We don't need the next index. */ + rd->nres = lj_record_next(J, &ix); + J->base[0] = ix.key; + J->base[1] = ix.val; + } /* else: Interpreter will throw. */ +#endif +} + /* -- Math library fast functions ----------------------------------------- */ static void LJ_FASTCALL recff_math_abs(jit_State *J, RecordFFData *rd) { TRef tr = lj_ir_tonum(J, J->base[0]); - J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_knum_abs(J)); + J->base[0] = emitir(IRTN(IR_ABS), tr, lj_ir_ksimd(J, LJ_KSIMD_ABS)); UNUSED(rd); } @@ -475,7 +613,7 @@ static void LJ_FASTCALL recff_math_atan2(jit_State *J, RecordFFData *rd) { TRef tr = lj_ir_tonum(J, J->base[0]); TRef tr2 = lj_ir_tonum(J, J->base[1]); - J->base[0] = emitir(IRTN(IR_ATAN2), tr, tr2); + J->base[0] = lj_ir_call(J, IRCALL_atan2, tr, tr2); UNUSED(rd); } @@ -492,55 +630,16 @@ static void LJ_FASTCALL recff_math_ldexp(jit_State *J, RecordFFData *rd) UNUSED(rd); } -/* Record math.asin, math.acos, math.atan. */ -static void LJ_FASTCALL recff_math_atrig(jit_State *J, RecordFFData *rd) -{ - TRef y = lj_ir_tonum(J, J->base[0]); - TRef x = lj_ir_knum_one(J); - uint32_t ffid = rd->data; - if (ffid != FF_math_atan) { - TRef tmp = emitir(IRTN(IR_MUL), y, y); - tmp = emitir(IRTN(IR_SUB), x, tmp); - tmp = emitir(IRTN(IR_FPMATH), tmp, IRFPM_SQRT); - if (ffid == FF_math_asin) { x = tmp; } else { x = y; y = tmp; } - } - J->base[0] = emitir(IRTN(IR_ATAN2), y, x); -} - -static void LJ_FASTCALL recff_math_htrig(jit_State *J, RecordFFData *rd) +static void LJ_FASTCALL recff_math_call(jit_State *J, RecordFFData *rd) { TRef tr = lj_ir_tonum(J, J->base[0]); J->base[0] = emitir(IRTN(IR_CALLN), tr, rd->data); } -static void LJ_FASTCALL recff_math_modf(jit_State *J, RecordFFData *rd) -{ - TRef tr = J->base[0]; - if (tref_isinteger(tr)) { - J->base[0] = tr; - J->base[1] = lj_ir_kint(J, 0); - } else { - TRef trt; - tr = lj_ir_tonum(J, tr); - trt = emitir(IRTN(IR_FPMATH), tr, IRFPM_TRUNC); - J->base[0] = trt; - J->base[1] = emitir(IRTN(IR_SUB), tr, trt); - } - rd->nres = 2; -} - -static void LJ_FASTCALL recff_math_degrad(jit_State *J, RecordFFData *rd) -{ - TRef tr = lj_ir_tonum(J, J->base[0]); - TRef trm = lj_ir_knum(J, numV(&J->fn->c.upvalue[0])); - J->base[0] = emitir(IRTN(IR_MUL), tr, trm); - UNUSED(rd); -} - static void LJ_FASTCALL recff_math_pow(jit_State *J, RecordFFData *rd) { - J->base[0] = lj_opt_narrow_pow(J, J->base[0], J->base[1], - &rd->argv[0], &rd->argv[1]); + J->base[0] = lj_opt_narrow_arith(J, J->base[0], J->base[1], + &rd->argv[0], &rd->argv[1], IR_POW); UNUSED(rd); } @@ -567,7 +666,7 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd) GCudata *ud = udataV(&J->fn->c.upvalue[0]); TRef tr, one; lj_ir_kgc(J, obj2gco(ud), IRT_UDATA); /* Prevent collection. */ - tr = lj_ir_call(J, IRCALL_lj_math_random_step, lj_ir_kptr(J, uddata(ud))); + tr = lj_ir_call(J, IRCALL_lj_prng_u64d, lj_ir_kptr(J, uddata(ud))); one = lj_ir_knum_one(J); tr = emitir(IRTN(IR_SUB), tr, one); if (J->base[0]) { @@ -591,48 +690,105 @@ static void LJ_FASTCALL recff_math_random(jit_State *J, RecordFFData *rd) /* -- Bit library fast functions ------------------------------------------ */ -/* Record unary bit.tobit, bit.bnot, bit.bswap. */ +/* Record bit.tobit. */ +static void LJ_FASTCALL recff_bit_tobit(jit_State *J, RecordFFData *rd) +{ + TRef tr = J->base[0]; +#if LJ_HASFFI + if (tref_iscdata(tr)) { recff_bit64_tobit(J, rd); return; } +#endif + J->base[0] = lj_opt_narrow_tobit(J, tr); + UNUSED(rd); +} + +/* Record unary bit.bnot, bit.bswap. */ static void LJ_FASTCALL recff_bit_unary(jit_State *J, RecordFFData *rd) { - TRef tr = lj_opt_narrow_tobit(J, J->base[0]); - J->base[0] = (rd->data == IR_TOBIT) ? tr : emitir(IRTI(rd->data), tr, 0); +#if LJ_HASFFI + if (recff_bit64_unary(J, rd)) + return; +#endif + J->base[0] = emitir(IRTI(rd->data), lj_opt_narrow_tobit(J, J->base[0]), 0); } /* Record N-ary bit.band, bit.bor, bit.bxor. */ static void LJ_FASTCALL recff_bit_nary(jit_State *J, RecordFFData *rd) { - TRef tr = lj_opt_narrow_tobit(J, J->base[0]); - uint32_t op = rd->data; - BCReg i; - for (i = 1; J->base[i] != 0; i++) - tr = emitir(IRTI(op), tr, lj_opt_narrow_tobit(J, J->base[i])); - J->base[0] = tr; +#if LJ_HASFFI + if (recff_bit64_nary(J, rd)) + return; +#endif + { + TRef tr = lj_opt_narrow_tobit(J, J->base[0]); + uint32_t ot = IRTI(rd->data); + BCReg i; + for (i = 1; J->base[i] != 0; i++) + tr = emitir(ot, tr, lj_opt_narrow_tobit(J, J->base[i])); + J->base[0] = tr; + } } /* Record bit shifts. */ static void LJ_FASTCALL recff_bit_shift(jit_State *J, RecordFFData *rd) { - TRef tr = lj_opt_narrow_tobit(J, J->base[0]); - TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); - IROp op = (IROp)rd->data; - if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && - !tref_isk(tsh)) - tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); +#if LJ_HASFFI + if (recff_bit64_shift(J, rd)) + return; +#endif + { + TRef tr = lj_opt_narrow_tobit(J, J->base[0]); + TRef tsh = lj_opt_narrow_tobit(J, J->base[1]); + IROp op = (IROp)rd->data; + if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && + !tref_isk(tsh)) + tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 31)); #ifdef LJ_TARGET_UNIFYROT - if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { - op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; - tsh = emitir(IRTI(IR_NEG), tsh, tsh); + if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) { + op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR; + tsh = emitir(IRTI(IR_NEG), tsh, tsh); + } +#endif + J->base[0] = emitir(IRTI(op), tr, tsh); } +} + +static void LJ_FASTCALL recff_bit_tohex(jit_State *J, RecordFFData *rd) +{ +#if LJ_HASFFI + TRef hdr = recff_bufhdr(J); + TRef tr = recff_bit64_tohex(J, rd, hdr); + J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); +#else + recff_nyiu(J, rd); /* Don't bother working around this NYI. */ #endif - J->base[0] = emitir(IRTI(op), tr, tsh); } /* -- String library fast functions --------------------------------------- */ -static void LJ_FASTCALL recff_string_len(jit_State *J, RecordFFData *rd) +/* Specialize to relative starting position for string. */ +static TRef recff_string_start(jit_State *J, GCstr *s, int32_t *st, TRef tr, + TRef trlen, TRef tr0) { - J->base[0] = emitir(IRTI(IR_FLOAD), lj_ir_tostr(J, J->base[0]), IRFL_STR_LEN); - UNUSED(rd); + int32_t start = *st; + if (start < 0) { + emitir(IRTGI(IR_LT), tr, tr0); + tr = emitir(IRTI(IR_ADD), trlen, tr); + start = start + (int32_t)s->len; + emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), tr, tr0); + if (start < 0) { + tr = tr0; + start = 0; + } + } else if (start == 0) { + emitir(IRTGI(IR_EQ), tr, tr0); + tr = tr0; + } else { + tr = emitir(IRTI(IR_ADD), tr, lj_ir_kint(J, -1)); + emitir(IRTGI(IR_GE), tr, tr0); + start--; + } + *st = start; + return tr; } /* Handle string.byte (rd->data = 0) and string.sub (rd->data = 1). */ @@ -679,39 +835,21 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) } else if ((MSize)end <= str->len) { emitir(IRTGI(IR_ULE), trend, trlen); } else { - emitir(IRTGI(IR_GT), trend, trlen); + emitir(IRTGI(IR_UGT), trend, trlen); end = (int32_t)str->len; trend = trlen; } - if (start < 0) { - emitir(IRTGI(IR_LT), trstart, tr0); - trstart = emitir(IRTI(IR_ADD), trlen, trstart); - start = start+(int32_t)str->len; - emitir(start < 0 ? IRTGI(IR_LT) : IRTGI(IR_GE), trstart, tr0); - if (start < 0) { - trstart = tr0; - start = 0; - } - } else { - if (start == 0) { - emitir(IRTGI(IR_EQ), trstart, tr0); - trstart = tr0; - } else { - trstart = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, -1)); - emitir(IRTGI(IR_GE), trstart, tr0); - start--; - } - } + trstart = recff_string_start(J, str, &start, trstart, trlen, tr0); if (rd->data) { /* Return string.sub result. */ if (end - start >= 0) { /* Also handle empty range here, to avoid extra traces. */ TRef trptr, trslen = emitir(IRTI(IR_SUB), trend, trstart); emitir(IRTGI(IR_GE), trslen, tr0); - trptr = emitir(IRT(IR_STRREF, IRT_P32), trstr, trstart); + trptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart); J->base[0] = emitir(IRT(IR_SNEW, IRT_STR), trptr, trslen); } else { /* Range underflow: return empty string. */ emitir(IRTGI(IR_LT), trend, trstart); - J->base[0] = lj_ir_kstr(J, lj_str_new(J->L, strdata(str), 0)); + J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty); } } else { /* Return string.byte result(s). */ ptrdiff_t i, len = end - start; @@ -723,7 +861,7 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) rd->nres = len; for (i = 0; i < len; i++) { TRef tmp = emitir(IRTI(IR_ADD), trstart, lj_ir_kint(J, (int32_t)i)); - tmp = emitir(IRT(IR_STRREF, IRT_P32), trstr, tmp); + tmp = emitir(IRT(IR_STRREF, IRT_PGC), trstr, tmp); J->base[i] = emitir(IRT(IR_XLOAD, IRT_U8), tmp, IRXLOAD_READONLY); } } else { /* Empty range or range underflow: return no results. */ @@ -733,48 +871,535 @@ static void LJ_FASTCALL recff_string_range(jit_State *J, RecordFFData *rd) } } -/* -- Table library fast functions ---------------------------------------- */ - -static void LJ_FASTCALL recff_table_getn(jit_State *J, RecordFFData *rd) +static void LJ_FASTCALL recff_string_char(jit_State *J, RecordFFData *rd) { - if (tref_istab(J->base[0])) - J->base[0] = lj_ir_call(J, IRCALL_lj_tab_len, J->base[0]); - /* else: Interpreter will throw. */ + TRef k255 = lj_ir_kint(J, 255); + BCReg i; + for (i = 0; J->base[i] != 0; i++) { /* Convert char values to strings. */ + TRef tr = lj_opt_narrow_toint(J, J->base[i]); + emitir(IRTGI(IR_ULE), tr, k255); + J->base[i] = emitir(IRT(IR_TOSTR, IRT_STR), tr, IRTOSTR_CHAR); + } + if (i > 1) { /* Concatenate the strings, if there's more than one. */ + TRef hdr = recff_bufhdr(J), tr = hdr; + for (i = 0; J->base[i] != 0; i++) + tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, J->base[i]); + J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); + } else if (i == 0) { + J->base[0] = lj_ir_kstr(J, &J2G(J)->strempty); + } UNUSED(rd); } -static void LJ_FASTCALL recff_table_remove(jit_State *J, RecordFFData *rd) +static void LJ_FASTCALL recff_string_rep(jit_State *J, RecordFFData *rd) { - TRef tab = J->base[0]; - rd->nres = 0; - if (tref_istab(tab)) { - if (tref_isnil(J->base[1])) { /* Simple pop: t[#t] = nil */ - TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, tab); - GCtab *t = tabV(&rd->argv[0]); - MSize len = lj_tab_len(t); - emitir(IRTGI(len ? IR_NE : IR_EQ), trlen, lj_ir_kint(J, 0)); - if (len) { - RecordIndex ix; - ix.tab = tab; - ix.key = trlen; - settabV(J->L, &ix.tabv, t); - setintV(&ix.keyv, len); - ix.idxchain = 0; - if (results_wanted(J) != 0) { /* Specialize load only if needed. */ - ix.val = 0; - J->base[0] = lj_record_idx(J, &ix); /* Load previous value. */ - rd->nres = 1; - /* Assumes ix.key/ix.tab is not modified for raw lj_record_idx(). */ + TRef str = lj_ir_tostr(J, J->base[0]); + TRef rep = lj_opt_narrow_toint(J, J->base[1]); + TRef hdr, tr, str2 = 0; + if (!tref_isnil(J->base[2])) { + TRef sep = lj_ir_tostr(J, J->base[2]); + int32_t vrep = argv2int(J, &rd->argv[1]); + emitir(IRTGI(vrep > 1 ? IR_GT : IR_LE), rep, lj_ir_kint(J, 1)); + if (vrep > 1) { + TRef hdr2 = recff_bufhdr(J); + TRef tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), hdr2, sep); + tr2 = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr2, str); + str2 = emitir(IRTG(IR_BUFSTR, IRT_STR), tr2, hdr2); + } + } + tr = hdr = recff_bufhdr(J); + if (str2) { + tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, str); + str = str2; + rep = emitir(IRTI(IR_ADD), rep, lj_ir_kint(J, -1)); + } + tr = lj_ir_call(J, IRCALL_lj_buf_putstr_rep, tr, str, rep); + J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); +} + +static void LJ_FASTCALL recff_string_op(jit_State *J, RecordFFData *rd) +{ + TRef str = lj_ir_tostr(J, J->base[0]); + TRef hdr = recff_bufhdr(J); + TRef tr = lj_ir_call(J, rd->data, hdr, str); + J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); +} + +static void LJ_FASTCALL recff_string_find(jit_State *J, RecordFFData *rd) +{ + TRef trstr = lj_ir_tostr(J, J->base[0]); + TRef trpat = lj_ir_tostr(J, J->base[1]); + TRef trlen = emitir(IRTI(IR_FLOAD), trstr, IRFL_STR_LEN); + TRef tr0 = lj_ir_kint(J, 0); + TRef trstart; + GCstr *str = argv2str(J, &rd->argv[0]); + GCstr *pat = argv2str(J, &rd->argv[1]); + int32_t start; + J->needsnap = 1; + if (tref_isnil(J->base[2])) { + trstart = lj_ir_kint(J, 1); + start = 1; + } else { + trstart = lj_opt_narrow_toint(J, J->base[2]); + start = argv2int(J, &rd->argv[2]); + } + trstart = recff_string_start(J, str, &start, trstart, trlen, tr0); + if ((MSize)start <= str->len) { + emitir(IRTGI(IR_ULE), trstart, trlen); + } else { + emitir(IRTGI(IR_UGT), trstart, trlen); +#if LJ_52 + J->base[0] = TREF_NIL; + return; +#else + trstart = trlen; + start = str->len; +#endif + } + /* Fixed arg or no pattern matching chars? (Specialized to pattern string.) */ + if ((J->base[2] && tref_istruecond(J->base[3])) || + (emitir(IRTG(IR_EQ, IRT_STR), trpat, lj_ir_kstr(J, pat)), + !lj_str_haspattern(pat))) { /* Search for fixed string. */ + TRef trsptr = emitir(IRT(IR_STRREF, IRT_PGC), trstr, trstart); + TRef trpptr = emitir(IRT(IR_STRREF, IRT_PGC), trpat, tr0); + TRef trslen = emitir(IRTI(IR_SUB), trlen, trstart); + TRef trplen = emitir(IRTI(IR_FLOAD), trpat, IRFL_STR_LEN); + TRef tr = lj_ir_call(J, IRCALL_lj_str_find, trsptr, trpptr, trslen, trplen); + TRef trp0 = lj_ir_kkptr(J, NULL); + if (lj_str_find(strdata(str)+(MSize)start, strdata(pat), + str->len-(MSize)start, pat->len)) { + TRef pos; + emitir(IRTG(IR_NE, IRT_PGC), tr, trp0); + /* Recompute offset. trsptr may not point into trstr after folding. */ + pos = emitir(IRTI(IR_ADD), emitir(IRTI(IR_SUB), tr, trsptr), trstart); + J->base[0] = emitir(IRTI(IR_ADD), pos, lj_ir_kint(J, 1)); + J->base[1] = emitir(IRTI(IR_ADD), pos, trplen); + rd->nres = 2; + } else { + emitir(IRTG(IR_EQ, IRT_PGC), tr, trp0); + J->base[0] = TREF_NIL; + } + } else { /* Search for pattern. */ + recff_nyiu(J, rd); + return; + } +} + +static void recff_format(jit_State *J, RecordFFData *rd, TRef hdr, int sbufx) +{ + ptrdiff_t arg = sbufx; + TRef tr = hdr, trfmt = lj_ir_tostr(J, J->base[arg]); + GCstr *fmt = argv2str(J, &rd->argv[arg]); + FormatState fs; + SFormat sf; + /* Specialize to the format string. */ + emitir(IRTG(IR_EQ, IRT_STR), trfmt, lj_ir_kstr(J, fmt)); + lj_strfmt_init(&fs, strdata(fmt), fmt->len); + while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { /* Parse format. */ + TRef tra = sf == STRFMT_LIT ? 0 : J->base[++arg]; + TRef trsf = lj_ir_kint(J, (int32_t)sf); + IRCallID id; + switch (STRFMT_TYPE(sf)) { + case STRFMT_LIT: + tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, + lj_ir_kstr(J, lj_str_new(J->L, fs.str, fs.len))); + break; + case STRFMT_INT: + id = IRCALL_lj_strfmt_putfnum_int; + handle_int: + if (!tref_isinteger(tra)) { +#if LJ_HASFFI + if (tref_iscdata(tra)) { + tra = lj_crecord_loadiu64(J, tra, &rd->argv[arg]); + tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra); + break; } - ix.val = TREF_NIL; - lj_record_idx(J, &ix); /* Remove value. */ +#endif + goto handle_num; + } + if (sf == STRFMT_INT) { /* Shortcut for plain %d. */ + tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, + emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_INT)); + } else { +#if LJ_HASFFI + tra = emitir(IRT(IR_CONV, IRT_U64), tra, + (IRT_INT|(IRT_U64<<5)|IRCONV_SEXT)); + tr = lj_ir_call(J, IRCALL_lj_strfmt_putfxint, tr, trsf, tra); + lj_needsplit(J); +#else + recff_nyiu(J, rd); /* Don't bother working around this NYI. */ + return; +#endif } - } else { /* Complex case: remove in the middle. */ - recff_nyiu(J); + break; + case STRFMT_UINT: + id = IRCALL_lj_strfmt_putfnum_uint; + goto handle_int; + case STRFMT_NUM: + id = IRCALL_lj_strfmt_putfnum; + handle_num: + tra = lj_ir_tonum(J, tra); + tr = lj_ir_call(J, id, tr, trsf, tra); + if (LJ_SOFTFP32) lj_needsplit(J); + break; + case STRFMT_STR: + if (!tref_isstr(tra)) { + recff_nyiu(J, rd); /* NYI: __tostring and non-string types for %s. */ + /* NYI: also buffers. */ + return; + } + if (sf == STRFMT_STR) /* Shortcut for plain %s. */ + tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, tra); + else if ((sf & STRFMT_T_QUOTED)) + tr = lj_ir_call(J, IRCALL_lj_strfmt_putquoted, tr, tra); + else + tr = lj_ir_call(J, IRCALL_lj_strfmt_putfstr, tr, trsf, tra); + break; + case STRFMT_CHAR: + tra = lj_opt_narrow_toint(J, tra); + if (sf == STRFMT_CHAR) /* Shortcut for plain %c. */ + tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, + emitir(IRT(IR_TOSTR, IRT_STR), tra, IRTOSTR_CHAR)); + else + tr = lj_ir_call(J, IRCALL_lj_strfmt_putfchar, tr, trsf, tra); + break; + case STRFMT_PTR: /* NYI */ + case STRFMT_ERR: + default: + recff_nyiu(J, rd); + return; + } + } + if (sbufx) { + emitir(IRT(IR_USE, IRT_NIL), tr, 0); + } else { + J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); + } +} + +static void LJ_FASTCALL recff_string_format(jit_State *J, RecordFFData *rd) +{ + recff_format(J, rd, recff_bufhdr(J), 0); +} + +/* -- Buffer library fast functions --------------------------------------- */ + +#if LJ_HASBUFFER + +static LJ_AINLINE TRef recff_sbufx_get_L(jit_State *J, TRef ud) +{ + return emitir(IRT(IR_FLOAD, IRT_PGC), ud, IRFL_SBUF_L); +} + +static LJ_AINLINE void recff_sbufx_set_L(jit_State *J, TRef ud, TRef val) +{ + TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_L); + emitir(IRT(IR_FSTORE, IRT_PGC), fref, val); +} + +static LJ_AINLINE TRef recff_sbufx_get_ptr(jit_State *J, TRef ud, IRFieldID fl) +{ + return emitir(IRT(IR_FLOAD, IRT_PTR), ud, fl); +} + +static LJ_AINLINE void recff_sbufx_set_ptr(jit_State *J, TRef ud, IRFieldID fl, TRef val) +{ + TRef fref = emitir(IRT(IR_FREF, IRT_PTR), ud, fl); + emitir(IRT(IR_FSTORE, IRT_PTR), fref, val); +} + +static LJ_AINLINE TRef recff_sbufx_len(jit_State *J, TRef trr, TRef trw) +{ + TRef len = emitir(IRT(IR_SUB, IRT_INTP), trw, trr); + if (LJ_64) + len = emitir(IRTI(IR_CONV), len, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE); + return len; +} + +/* Emit typecheck for string buffer. */ +static TRef recff_sbufx_check(jit_State *J, RecordFFData *rd, ptrdiff_t arg) +{ + TRef trtype, ud = J->base[arg]; + if (!tvisbuf(&rd->argv[arg])) lj_trace_err(J, LJ_TRERR_BADTYPE); + trtype = emitir(IRT(IR_FLOAD, IRT_U8), ud, IRFL_UDATA_UDTYPE); + emitir(IRTGI(IR_EQ), trtype, lj_ir_kint(J, UDTYPE_BUFFER)); + J->needsnap = 1; + return ud; +} + +/* Emit BUFHDR for write to extended string buffer. */ +static TRef recff_sbufx_write(jit_State *J, TRef ud) +{ + TRef trbuf = emitir(IRT(IR_ADD, IRT_PGC), ud, lj_ir_kint(J, sizeof(GCudata))); + return emitir(IRT(IR_BUFHDR, IRT_PGC), trbuf, IRBUFHDR_WRITE); +} + +/* Check for integer in range for the buffer API. */ +static TRef recff_sbufx_checkint(jit_State *J, RecordFFData *rd, ptrdiff_t arg) +{ + TRef tr = J->base[arg]; + TRef trlim = lj_ir_kint(J, LJ_MAX_BUF); + if (tref_isinteger(tr)) { + emitir(IRTGI(IR_ULE), tr, trlim); + } else if (tref_isnum(tr)) { + tr = emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY); + emitir(IRTGI(IR_ULE), tr, trlim); +#if LJ_HASFFI + } else if (tref_iscdata(tr)) { + tr = lj_crecord_loadiu64(J, tr, &rd->argv[arg]); + emitir(IRTG(IR_ULE, IRT_U64), tr, lj_ir_kint64(J, LJ_MAX_BUF)); + tr = emitir(IRTI(IR_CONV), tr, (IRT_INT<<5)|IRT_I64|IRCONV_NONE); +#else + UNUSED(rd); +#endif + } else { + lj_trace_err(J, LJ_TRERR_BADTYPE); + } + return tr; +} + +static void LJ_FASTCALL recff_buffer_method_reset(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + SBufExt *sbx = bufV(&rd->argv[0]); + int iscow = (int)sbufiscow(sbx); + TRef trl = recff_sbufx_get_L(J, ud); + TRef trcow = emitir(IRT(IR_BAND, IRT_IGC), trl, lj_ir_kint(J, SBUF_FLAG_COW)); + TRef zero = lj_ir_kint(J, 0); + emitir(IRTG(iscow ? IR_NE : IR_EQ, IRT_IGC), trcow, zero); + if (iscow) { + trl = emitir(IRT(IR_BXOR, IRT_IGC), trl, + LJ_GC64 ? lj_ir_kint64(J, SBUF_FLAG_COW) : + lj_ir_kint(J, SBUF_FLAG_COW)); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, zero); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_E, zero); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_B, zero); + recff_sbufx_set_L(J, ud, trl); + emitir(IRT(IR_FSTORE, IRT_PGC), + emitir(IRT(IR_FREF, IRT_PGC), ud, IRFL_SBUF_REF), zero); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, zero); + } else { + TRef trb = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_B); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trb); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trb); + } +} + +static void LJ_FASTCALL recff_buffer_method_skip(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); + TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); + TRef len = recff_sbufx_len(J, trr, trw); + TRef trn = recff_sbufx_checkint(J, rd, 1); + len = emitir(IRTI(IR_MIN), len, trn); + trr = emitir(IRT(IR_ADD, IRT_PTR), trr, len); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr); +} + +static void LJ_FASTCALL recff_buffer_method_set(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trbuf = recff_sbufx_write(J, ud); + TRef tr = J->base[1]; + if (tref_isstr(tr)) { + TRef trp = emitir(IRT(IR_STRREF, IRT_PGC), tr, lj_ir_kint(J, 0)); + TRef len = emitir(IRTI(IR_FLOAD), tr, IRFL_STR_LEN); + lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr); +#if LJ_HASFFI + } else if (tref_iscdata(tr)) { + TRef trp = lj_crecord_topcvoid(J, tr, &rd->argv[1]); + TRef len = recff_sbufx_checkint(J, rd, 2); + lj_ir_call(J, IRCALL_lj_bufx_set, trbuf, trp, len, tr); +#endif + } /* else: Interpreter will throw. */ +} + +static void LJ_FASTCALL recff_buffer_method_put(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trbuf = recff_sbufx_write(J, ud); + TRef tr; + ptrdiff_t arg; + if (!J->base[1]) return; + for (arg = 1; (tr = J->base[arg]); arg++) { + if (tref_isudata(tr)) { + TRef ud2 = recff_sbufx_check(J, rd, arg); + emitir(IRTG(IR_NE, IRT_PGC), ud, ud2); + } + } + for (arg = 1; (tr = J->base[arg]); arg++) { + if (tref_isstr(tr)) { + trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf, tr); + } else if (tref_isnumber(tr)) { + trbuf = emitir(IRTG(IR_BUFPUT, IRT_PGC), trbuf, + emitir(IRT(IR_TOSTR, IRT_STR), tr, + tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT)); + } else if (tref_isudata(tr)) { + TRef trr = recff_sbufx_get_ptr(J, tr, IRFL_SBUF_R); + TRef trw = recff_sbufx_get_ptr(J, tr, IRFL_SBUF_W); + TRef len = recff_sbufx_len(J, trr, trw); + trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, trr, len); + } else { + recff_nyiu(J, rd); + } + } + emitir(IRT(IR_USE, IRT_NIL), trbuf, 0); +} + +static void LJ_FASTCALL recff_buffer_method_putf(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trbuf = recff_sbufx_write(J, ud); + recff_format(J, rd, trbuf, 1); +} + +static void LJ_FASTCALL recff_buffer_method_get(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); + TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); + TRef tr; + ptrdiff_t arg; + if (!J->base[1]) { J->base[1] = TREF_NIL; J->base[2] = 0; } + for (arg = 0; (tr = J->base[arg+1]); arg++) { + if (!tref_isnil(tr)) { + J->base[arg+1] = recff_sbufx_checkint(J, rd, arg+1); } + } + for (arg = 0; (tr = J->base[arg+1]); arg++) { + TRef len = recff_sbufx_len(J, trr, trw); + if (tref_isnil(tr)) { + J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len); + trr = trw; + } else { + TRef tru; + len = emitir(IRTI(IR_MIN), len, tr); + tru = emitir(IRT(IR_ADD, IRT_PTR), trr, len); + J->base[arg] = emitir(IRT(IR_XSNEW, IRT_STR), trr, len); + trr = tru; /* Doing the ADD before the SNEW generates better code. */ + } + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr); + } + rd->nres = arg; +} + +static void LJ_FASTCALL recff_buffer_method___tostring(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); + TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); + J->base[0] = emitir(IRT(IR_XSNEW, IRT_STR), trr, recff_sbufx_len(J, trr, trw)); +} + +static void LJ_FASTCALL recff_buffer_method___len(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); + TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); + J->base[0] = recff_sbufx_len(J, trr, trw); +} + +#if LJ_HASFFI +static void LJ_FASTCALL recff_buffer_method_putcdata(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trbuf = recff_sbufx_write(J, ud); + TRef tr = lj_crecord_topcvoid(J, J->base[1], &rd->argv[1]); + TRef len = recff_sbufx_checkint(J, rd, 2); + trbuf = lj_ir_call(J, IRCALL_lj_buf_putmem, trbuf, tr, len); + emitir(IRT(IR_USE, IRT_NIL), trbuf, 0); +} + +static void LJ_FASTCALL recff_buffer_method_reserve(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trbuf = recff_sbufx_write(J, ud); + TRef trsz = recff_sbufx_checkint(J, rd, 1); + J->base[1] = lj_ir_call(J, IRCALL_lj_bufx_more, trbuf, trsz); + J->base[0] = lj_crecord_topuint8(J, recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W)); + rd->nres = 2; +} + +static void LJ_FASTCALL recff_buffer_method_commit(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef len = recff_sbufx_checkint(J, rd, 1); + TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); + TRef tre = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_E); + TRef left = emitir(IRT(IR_SUB, IRT_INTP), tre, trw); + if (LJ_64) + left = emitir(IRTI(IR_CONV), left, (IRT_INT<<5)|IRT_INTP|IRCONV_NONE); + emitir(IRTGI(IR_ULE), len, left); + trw = emitir(IRT(IR_ADD, IRT_PTR), trw, len); + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_W, trw); +} + +static void LJ_FASTCALL recff_buffer_method_ref(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trr = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_R); + TRef trw = recff_sbufx_get_ptr(J, ud, IRFL_SBUF_W); + J->base[0] = lj_crecord_topuint8(J, trr); + J->base[1] = recff_sbufx_len(J, trr, trw); + rd->nres = 2; +} +#endif + +static void LJ_FASTCALL recff_buffer_method_encode(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trbuf = recff_sbufx_write(J, ud); + TRef tmp = recff_tmpref(J, J->base[1], IRTMPREF_IN1); + lj_ir_call(J, IRCALL_lj_serialize_put, trbuf, tmp); + /* No IR_USE needed, since the call is a store. */ +} + +static void LJ_FASTCALL recff_buffer_method_decode(jit_State *J, RecordFFData *rd) +{ + TRef ud = recff_sbufx_check(J, rd, 0); + TRef trbuf = recff_sbufx_write(J, ud); + TRef tmp = recff_tmpref(J, TREF_NIL, IRTMPREF_OUT1); + TRef trr = lj_ir_call(J, IRCALL_lj_serialize_get, trbuf, tmp); + IRType t = (IRType)lj_serialize_peektype(bufV(&rd->argv[0])); + /* No IR_USE needed, since the call is a store. */ + J->base[0] = lj_record_vload(J, tmp, 0, t); + /* The sbx->r store must be after the VLOAD type check, in case it fails. */ + recff_sbufx_set_ptr(J, ud, IRFL_SBUF_R, trr); +} + +static void LJ_FASTCALL recff_buffer_encode(jit_State *J, RecordFFData *rd) +{ + TRef tmp = recff_tmpref(J, J->base[0], IRTMPREF_IN1); + J->base[0] = lj_ir_call(J, IRCALL_lj_serialize_encode, tmp); + /* IR_USE needed for IR_CALLA, because the encoder may throw non-OOM. */ + emitir(IRT(IR_USE, IRT_NIL), J->base[0], 0); + UNUSED(rd); +} + +static void LJ_FASTCALL recff_buffer_decode(jit_State *J, RecordFFData *rd) +{ + if (tvisstr(&rd->argv[0])) { + GCstr *str = strV(&rd->argv[0]); + SBufExt sbx; + IRType t; + TRef tmp = recff_tmpref(J, TREF_NIL, IRTMPREF_OUT1); + TRef tr = lj_ir_call(J, IRCALL_lj_serialize_decode, tmp, J->base[0]); + /* IR_USE needed for IR_CALLA, because the decoder may throw non-OOM. + ** That's why IRCALL_lj_serialize_decode needs a fake INT result. + */ + emitir(IRT(IR_USE, IRT_NIL), tr, 0); + memset(&sbx, 0, sizeof(SBufExt)); + lj_bufx_set_cow(J->L, &sbx, strdata(str), str->len); + t = (IRType)lj_serialize_peektype(&sbx); + J->base[0] = lj_record_vload(J, tmp, 0, t); } /* else: Interpreter will throw. */ } +#endif + +/* -- Table library fast functions ---------------------------------------- */ + static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) { RecordIndex ix; @@ -783,7 +1408,7 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) rd->nres = 0; if (tref_istab(ix.tab) && ix.val) { if (!J->base[2]) { /* Simple push: t[#t+1] = v */ - TRef trlen = lj_ir_call(J, IRCALL_lj_tab_len, ix.tab); + TRef trlen = emitir(IRTI(IR_ALEN), ix.tab, TREF_NIL); GCtab *t = tabV(&rd->argv[0]); ix.key = emitir(IRTI(IR_ADD), trlen, lj_ir_kint(J, 1)); settabV(J->L, &ix.tabv, t); @@ -791,11 +1416,49 @@ static void LJ_FASTCALL recff_table_insert(jit_State *J, RecordFFData *rd) ix.idxchain = 0; lj_record_idx(J, &ix); /* Set new value. */ } else { /* Complex case: insert in the middle. */ - recff_nyiu(J); + recff_nyiu(J, rd); + return; } } /* else: Interpreter will throw. */ } +static void LJ_FASTCALL recff_table_concat(jit_State *J, RecordFFData *rd) +{ + TRef tab = J->base[0]; + if (tref_istab(tab)) { + TRef sep = !tref_isnil(J->base[1]) ? + lj_ir_tostr(J, J->base[1]) : lj_ir_knull(J, IRT_STR); + TRef tri = (J->base[1] && !tref_isnil(J->base[2])) ? + lj_opt_narrow_toint(J, J->base[2]) : lj_ir_kint(J, 1); + TRef tre = (J->base[1] && J->base[2] && !tref_isnil(J->base[3])) ? + lj_opt_narrow_toint(J, J->base[3]) : + emitir(IRTI(IR_ALEN), tab, TREF_NIL); + TRef hdr = recff_bufhdr(J); + TRef tr = lj_ir_call(J, IRCALL_lj_buf_puttab, hdr, tab, sep, tri, tre); + emitir(IRTG(IR_NE, IRT_PTR), tr, lj_ir_kptr(J, NULL)); + J->base[0] = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); + } /* else: Interpreter will throw. */ + UNUSED(rd); +} + +static void LJ_FASTCALL recff_table_new(jit_State *J, RecordFFData *rd) +{ + TRef tra = lj_opt_narrow_toint(J, J->base[0]); + TRef trh = lj_opt_narrow_toint(J, J->base[1]); + J->base[0] = lj_ir_call(J, IRCALL_lj_tab_new_ah, tra, trh); + UNUSED(rd); +} + +static void LJ_FASTCALL recff_table_clear(jit_State *J, RecordFFData *rd) +{ + TRef tr = J->base[0]; + if (tref_istab(tr)) { + rd->nres = 0; + lj_ir_call(J, IRCALL_lj_tab_clear, tr); + J->needsnap = 1; + } /* else: Interpreter will throw. */ +} + /* -- I/O library fast functions ------------------------------------------ */ /* Get FILE* for I/O function. Any I/O error aborts recording, so there's @@ -805,8 +1468,7 @@ static TRef recff_io_fp(jit_State *J, TRef *udp, int32_t id) { TRef tr, ud, fp; if (id) { /* io.func() */ - tr = lj_ir_kptr(J, &J2G(J)->gcroot[id]); - ud = emitir(IRT(IR_XLOAD, IRT_UDATA), tr, 0); + ud = lj_ir_ggfload(J, IRT_UDATA, GG_OFS(g.gcroot[id])); } else { /* fp:method() */ ud = J->base[0]; if (!tref_isudata(ud)) @@ -828,10 +1490,13 @@ static void LJ_FASTCALL recff_io_write(jit_State *J, RecordFFData *rd) ptrdiff_t i = rd->data == 0 ? 1 : 0; for (; J->base[i]; i++) { TRef str = lj_ir_tostr(J, J->base[i]); - TRef buf = emitir(IRT(IR_STRREF, IRT_P32), str, zero); + TRef buf = emitir(IRT(IR_STRREF, IRT_PGC), str, zero); TRef len = emitir(IRTI(IR_FLOAD), str, IRFL_STR_LEN); if (tref_isk(len) && IR(tref_ref(len))->i == 1) { - TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY); + IRIns *irs = IR(tref_ref(str)); + TRef tr = (irs->o == IR_TOSTR && irs->op2 == IRTOSTR_CHAR) ? + irs->op1 : + emitir(IRT(IR_XLOAD, IRT_U8), buf, IRXLOAD_READONLY); tr = lj_ir_call(J, IRCALL_fputc, tr, fp); if (results_wanted(J) != 0) /* Check result only if not ignored. */ emitir(IRTGI(IR_NE), tr, lj_ir_kint(J, -1)); @@ -853,6 +1518,28 @@ static void LJ_FASTCALL recff_io_flush(jit_State *J, RecordFFData *rd) J->base[0] = TREF_TRUE; } +/* -- Debug library fast functions ---------------------------------------- */ + +static void LJ_FASTCALL recff_debug_getmetatable(jit_State *J, RecordFFData *rd) +{ + GCtab *mt; + TRef mtref; + TRef tr = J->base[0]; + if (tref_istab(tr)) { + mt = tabref(tabV(&rd->argv[0])->metatable); + mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_TAB_META); + } else if (tref_isudata(tr)) { + mt = tabref(udataV(&rd->argv[0])->metatable); + mtref = emitir(IRT(IR_FLOAD, IRT_TAB), tr, IRFL_UDATA_META); + } else { + mt = tabref(basemt_obj(J2G(J), &rd->argv[0])); + J->base[0] = mt ? lj_ir_ktab(J, mt) : TREF_NIL; + return; + } + emitir(IRTG(mt ? IR_NE : IR_EQ, IRT_TAB), mtref, lj_ir_knull(J, IRT_TAB)); + J->base[0] = mt ? mtref : TREF_NIL; +} + /* -- Record calls to fast functions -------------------------------------- */ #include "lj_recdef.h" diff --git a/src/lj_frame.h b/src/lj_frame.h index 33bd8e3e..aa1dc11a 100644 --- a/src/lj_frame.h +++ b/src/lj_frame.h @@ -11,7 +11,16 @@ /* -- Lua stack frame ----------------------------------------------------- */ -/* Frame type markers in callee function slot (callee base-1). */ +/* Frame type markers in LSB of PC (4-byte aligned) or delta (8-byte aligned: +** +** PC 00 Lua frame +** delta 001 C frame +** delta 010 Continuation frame +** delta 011 Lua vararg frame +** delta 101 cpcall() frame +** delta 110 ff pcall() frame +** delta 111 ff pcall() frame with active hook +*/ enum { FRAME_LUA, FRAME_C, FRAME_CONT, FRAME_VARG, FRAME_LUAP, FRAME_CP, FRAME_PCALL, FRAME_PCALLH @@ -21,9 +30,47 @@ enum { #define FRAME_TYPEP (FRAME_TYPE|FRAME_P) /* Macros to access and modify Lua frames. */ +#if LJ_FR2 +/* Two-slot frame info, required for 64 bit PC/GCRef: +** +** base-2 base-1 | base base+1 ... +** [func PC/delta/ft] | [slots ...] +** ^-- frame | ^-- base ^-- top +** +** Continuation frames: +** +** base-4 base-3 base-2 base-1 | base base+1 ... +** [cont PC ] [func PC/delta/ft] | [slots ...] +** ^-- frame | ^-- base ^-- top +*/ +#define frame_gc(f) (gcval((f)-1)) +#define frame_ftsz(f) ((ptrdiff_t)(f)->ftsz) +#define frame_pc(f) ((const BCIns *)frame_ftsz(f)) +#define setframe_gc(f, p, tp) (setgcVraw((f), (p), (tp))) +#define setframe_ftsz(f, sz) ((f)->ftsz = (sz)) +#define setframe_pc(f, pc) ((f)->ftsz = (int64_t)(intptr_t)(pc)) +#else +/* One-slot frame info, sufficient for 32 bit PC/GCRef: +** +** base-1 | base base+1 ... +** lo hi | +** [func | PC/delta/ft] | [slots ...] +** ^-- frame | ^-- base ^-- top +** +** Continuation frames: +** +** base-2 base-1 | base base+1 ... +** lo hi lo hi | +** [cont | PC] [func | PC/delta/ft] | [slots ...] +** ^-- frame | ^-- base ^-- top +*/ #define frame_gc(f) (gcref((f)->fr.func)) -#define frame_func(f) (&frame_gc(f)->fn) -#define frame_ftsz(f) ((f)->fr.tp.ftsz) +#define frame_ftsz(f) ((ptrdiff_t)(f)->fr.tp.ftsz) +#define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns)) +#define setframe_gc(f, p, tp) (setgcref((f)->fr.func, (p)), UNUSED(tp)) +#define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (int32_t)(sz)) +#define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc))) +#endif #define frame_type(f) (frame_ftsz(f) & FRAME_TYPE) #define frame_typep(f) (frame_ftsz(f) & FRAME_TYPEP) @@ -33,33 +80,53 @@ enum { #define frame_isvarg(f) (frame_typep(f) == FRAME_VARG) #define frame_ispcall(f) ((frame_ftsz(f) & 6) == FRAME_PCALL) -#define frame_pc(f) (mref((f)->fr.tp.pcr, const BCIns)) +#define frame_func(f) (&frame_gc(f)->fn) +#define frame_delta(f) (frame_ftsz(f) >> 3) +#define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP) + +enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ + +#if LJ_FR2 +#define frame_contpc(f) (frame_pc((f)-2)) +#define frame_contv(f) (((f)-3)->u64) +#else #define frame_contpc(f) (frame_pc((f)-1)) -#if LJ_64 +#define frame_contv(f) (((f)-1)->u32.lo) +#endif +#if LJ_FR2 +#define frame_contf(f) ((ASMFunction)(uintptr_t)((f)-3)->u64) +#elif LJ_64 #define frame_contf(f) \ ((ASMFunction)(void *)((intptr_t)lj_vm_asm_begin + \ (intptr_t)(int32_t)((f)-1)->u32.lo)) #else #define frame_contf(f) ((ASMFunction)gcrefp(((f)-1)->gcr, void)) #endif -#define frame_delta(f) (frame_ftsz(f) >> 3) -#define frame_sized(f) (frame_ftsz(f) & ~FRAME_TYPEP) +#define frame_iscont_fficb(f) \ + (LJ_HASFFI && frame_contv(f) == LJ_CONT_FFI_CALLBACK) -#define frame_prevl(f) ((f) - (1+bc_a(frame_pc(f)[-1]))) +#define frame_prevl(f) ((f) - (1+LJ_FR2+bc_a(frame_pc(f)[-1]))) #define frame_prevd(f) ((TValue *)((char *)(f) - frame_sized(f))) #define frame_prev(f) (frame_islua(f)?frame_prevl(f):frame_prevd(f)) /* Note: this macro does not skip over FRAME_VARG. */ -#define setframe_pc(f, pc) (setmref((f)->fr.tp.pcr, (pc))) -#define setframe_ftsz(f, sz) ((f)->fr.tp.ftsz = (sz)) -#define setframe_gc(f, p) (setgcref((f)->fr.func, (p))) - /* -- C stack frame ------------------------------------------------------- */ /* Macros to access and modify the C stack frame chain. */ /* These definitions must match with the arch-specific *.dasc files. */ #if LJ_TARGET_X86 +#if LJ_ABI_WIN +#define CFRAME_OFS_ERRF (19*4) +#define CFRAME_OFS_NRES (18*4) +#define CFRAME_OFS_PREV (17*4) +#define CFRAME_OFS_L (16*4) +#define CFRAME_OFS_SEH (9*4) +#define CFRAME_OFS_PC (6*4) +#define CFRAME_OFS_MULTRES (5*4) +#define CFRAME_SIZE (16*4) +#define CFRAME_SHIFT_MULTRES 0 +#else #define CFRAME_OFS_ERRF (15*4) #define CFRAME_OFS_NRES (14*4) #define CFRAME_OFS_PREV (13*4) @@ -68,24 +135,41 @@ enum { #define CFRAME_OFS_MULTRES (5*4) #define CFRAME_SIZE (12*4) #define CFRAME_SHIFT_MULTRES 0 +#endif #elif LJ_TARGET_X64 #if LJ_ABI_WIN #define CFRAME_OFS_PREV (13*8) +#if LJ_GC64 +#define CFRAME_OFS_PC (12*8) +#define CFRAME_OFS_L (11*8) +#define CFRAME_OFS_ERRF (21*4) +#define CFRAME_OFS_NRES (20*4) +#define CFRAME_OFS_MULTRES (8*4) +#else #define CFRAME_OFS_PC (25*4) #define CFRAME_OFS_L (24*4) #define CFRAME_OFS_ERRF (23*4) #define CFRAME_OFS_NRES (22*4) #define CFRAME_OFS_MULTRES (21*4) +#endif #define CFRAME_SIZE (10*8) #define CFRAME_SIZE_JIT (CFRAME_SIZE + 9*16 + 4*8) #define CFRAME_SHIFT_MULTRES 0 #else #define CFRAME_OFS_PREV (4*8) +#if LJ_GC64 +#define CFRAME_OFS_PC (3*8) +#define CFRAME_OFS_L (2*8) +#define CFRAME_OFS_ERRF (3*4) +#define CFRAME_OFS_NRES (2*4) +#define CFRAME_OFS_MULTRES (0*4) +#else #define CFRAME_OFS_PC (7*4) #define CFRAME_OFS_L (6*4) #define CFRAME_OFS_ERRF (5*4) #define CFRAME_OFS_NRES (4*4) #define CFRAME_OFS_MULTRES (1*4) +#endif #if LJ_NO_UNWIND #define CFRAME_SIZE (12*8) #else @@ -107,6 +191,15 @@ enum { #define CFRAME_SIZE 64 #endif #define CFRAME_SHIFT_MULTRES 3 +#elif LJ_TARGET_ARM64 +#define CFRAME_OFS_ERRF 36 +#define CFRAME_OFS_NRES 40 +#define CFRAME_OFS_PREV 0 +#define CFRAME_OFS_L 16 +#define CFRAME_OFS_PC 8 +#define CFRAME_OFS_MULTRES 32 +#define CFRAME_SIZE 208 +#define CFRAME_SHIFT_MULTRES 3 #elif LJ_TARGET_PPC #if LJ_TARGET_XBOX360 #define CFRAME_OFS_ERRF 424 @@ -117,7 +210,7 @@ enum { #define CFRAME_OFS_MULTRES 408 #define CFRAME_SIZE 384 #define CFRAME_SHIFT_MULTRES 3 -#elif LJ_ARCH_PPC64 +#elif LJ_ARCH_PPC32ON64 #define CFRAME_OFS_ERRF 472 #define CFRAME_OFS_NRES 468 #define CFRAME_OFS_PREV 448 @@ -133,26 +226,43 @@ enum { #define CFRAME_OFS_L 36 #define CFRAME_OFS_PC 32 #define CFRAME_OFS_MULTRES 28 -#define CFRAME_SIZE 272 +#define CFRAME_SIZE (LJ_ARCH_HASFPU ? 272 : 128) #define CFRAME_SHIFT_MULTRES 3 #endif -#elif LJ_TARGET_PPCSPE -#define CFRAME_OFS_ERRF 28 -#define CFRAME_OFS_NRES 24 -#define CFRAME_OFS_PREV 20 -#define CFRAME_OFS_L 16 -#define CFRAME_OFS_PC 12 -#define CFRAME_OFS_MULTRES 8 -#define CFRAME_SIZE 184 -#define CFRAME_SHIFT_MULTRES 3 -#elif LJ_TARGET_MIPS +#elif LJ_TARGET_MIPS32 +#if LJ_ARCH_HASFPU #define CFRAME_OFS_ERRF 124 #define CFRAME_OFS_NRES 120 #define CFRAME_OFS_PREV 116 #define CFRAME_OFS_L 112 +#define CFRAME_SIZE 112 +#else +#define CFRAME_OFS_ERRF 76 +#define CFRAME_OFS_NRES 72 +#define CFRAME_OFS_PREV 68 +#define CFRAME_OFS_L 64 +#define CFRAME_SIZE 64 +#endif #define CFRAME_OFS_PC 20 #define CFRAME_OFS_MULTRES 16 -#define CFRAME_SIZE 112 +#define CFRAME_SHIFT_MULTRES 3 +#elif LJ_TARGET_MIPS64 +#if LJ_ARCH_HASFPU +#define CFRAME_OFS_ERRF 188 +#define CFRAME_OFS_NRES 184 +#define CFRAME_OFS_PREV 176 +#define CFRAME_OFS_L 168 +#define CFRAME_OFS_PC 160 +#define CFRAME_SIZE 192 +#else +#define CFRAME_OFS_ERRF 124 +#define CFRAME_OFS_NRES 120 +#define CFRAME_OFS_PREV 112 +#define CFRAME_OFS_L 104 +#define CFRAME_OFS_PC 96 +#define CFRAME_SIZE 128 +#endif +#define CFRAME_OFS_MULTRES 0 #define CFRAME_SHIFT_MULTRES 3 #else #error "Missing CFRAME_* definitions for this architecture" diff --git a/src/lj_func.c b/src/lj_func.c index 5df652d8..9795a771 100644 --- a/src/lj_func.c +++ b/src/lj_func.c @@ -24,9 +24,11 @@ void LJ_FASTCALL lj_func_freeproto(global_State *g, GCproto *pt) /* -- Upvalues ------------------------------------------------------------ */ -static void unlinkuv(GCupval *uv) +static void unlinkuv(global_State *g, GCupval *uv) { - lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); + UNUSED(g); + lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv, + "broken upvalue chain"); setgcrefr(uvnext(uv)->prev, uv->prev); setgcrefr(uvprev(uv)->next, uv->next); } @@ -40,7 +42,7 @@ static GCupval *func_finduv(lua_State *L, TValue *slot) GCupval *uv; /* Search the sorted list of open upvalues. */ while (gcref(*pp) != NULL && uvval((p = gco2uv(gcref(*pp)))) >= slot) { - lua_assert(!p->closed && uvval(p) != &p->tv); + lj_assertG(!p->closed && uvval(p) != &p->tv, "closed upvalue in chain"); if (uvval(p) == slot) { /* Found open upvalue pointing to same slot? */ if (isdead(g, obj2gco(p))) /* Resurrect it, if it's dead. */ flipwhite(obj2gco(p)); @@ -61,7 +63,8 @@ static GCupval *func_finduv(lua_State *L, TValue *slot) setgcrefr(uv->next, g->uvhead.next); setgcref(uvnext(uv)->prev, obj2gco(uv)); setgcref(g->uvhead.next, obj2gco(uv)); - lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); + lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv, + "broken upvalue chain"); return uv; } @@ -84,12 +87,13 @@ void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level) while (gcref(L->openupval) != NULL && uvval((uv = gco2uv(gcref(L->openupval)))) >= level) { GCobj *o = obj2gco(uv); - lua_assert(!isblack(o) && !uv->closed && uvval(uv) != &uv->tv); + lj_assertG(!isblack(o), "bad black upvalue"); + lj_assertG(!uv->closed && uvval(uv) != &uv->tv, "closed upvalue in chain"); setgcrefr(L->openupval, uv->nextgc); /* No longer in open list. */ if (isdead(g, o)) { lj_func_freeuv(g, uv); } else { - unlinkuv(uv); + unlinkuv(g, uv); lj_gc_closeuv(g, uv); } } @@ -98,7 +102,7 @@ void LJ_FASTCALL lj_func_closeuv(lua_State *L, TValue *level) void LJ_FASTCALL lj_func_freeuv(global_State *g, GCupval *uv) { if (!uv->closed) - unlinkuv(uv); + unlinkuv(g, uv); lj_mem_freet(g, uv); } diff --git a/src/lj_gc.c b/src/lj_gc.c index 899b4e02..2fc52ec1 100644 --- a/src/lj_gc.c +++ b/src/lj_gc.c @@ -12,6 +12,7 @@ #include "lj_obj.h" #include "lj_gc.h" #include "lj_err.h" +#include "lj_buf.h" #include "lj_str.h" #include "lj_tab.h" #include "lj_func.h" @@ -24,6 +25,7 @@ #include "lj_cdata.h" #endif #include "lj_trace.h" +#include "lj_dispatch.h" #include "lj_vm.h" #define GCSTEPSIZE 1024u @@ -40,7 +42,8 @@ /* Mark a TValue (if needed). */ #define gc_marktv(g, tv) \ - { lua_assert(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct)); \ + { lj_assertG(!tvisgcv(tv) || (~itype(tv) == gcval(tv)->gch.gct), \ + "TValue and GC type mismatch"); \ if (tviswhite(tv)) gc_mark(g, gcV(tv)); } /* Mark a GCobj (if needed). */ @@ -54,21 +57,32 @@ static void gc_mark(global_State *g, GCobj *o) { int gct = o->gch.gct; - lua_assert(iswhite(o) && !isdead(g, o)); + lj_assertG(iswhite(o), "mark of non-white object"); + lj_assertG(!isdead(g, o), "mark of dead object"); white2gray(o); if (LJ_UNLIKELY(gct == ~LJ_TUDATA)) { GCtab *mt = tabref(gco2ud(o)->metatable); gray2black(o); /* Userdata are never gray. */ if (mt) gc_markobj(g, mt); gc_markobj(g, tabref(gco2ud(o)->env)); + if (LJ_HASBUFFER && gco2ud(o)->udtype == UDTYPE_BUFFER) { + SBufExt *sbx = (SBufExt *)uddata(gco2ud(o)); + if (sbufiscow(sbx) && gcref(sbx->cowref)) + gc_markobj(g, gcref(sbx->cowref)); + if (gcref(sbx->dict_str)) + gc_markobj(g, gcref(sbx->dict_str)); + if (gcref(sbx->dict_mt)) + gc_markobj(g, gcref(sbx->dict_mt)); + } } else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) { GCupval *uv = gco2uv(o); gc_marktv(g, uvval(uv)); if (uv->closed) gray2black(o); /* Closed upvalues are never gray. */ } else if (gct != ~LJ_TSTR && gct != ~LJ_TCDATA) { - lua_assert(gct == ~LJ_TFUNC || gct == ~LJ_TTAB || - gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO); + lj_assertG(gct == ~LJ_TFUNC || gct == ~LJ_TTAB || + gct == ~LJ_TTHREAD || gct == ~LJ_TPROTO || gct == ~LJ_TTRACE, + "bad GC type %d", gct); setgcrefr(o->gch.gclist, g->gc.gray); setgcref(g->gc.gray, o); } @@ -101,7 +115,8 @@ static void gc_mark_uv(global_State *g) { GCupval *uv; for (uv = uvnext(&g->uvhead); uv != &g->uvhead; uv = uvnext(uv)) { - lua_assert(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv); + lj_assertG(uvprev(uvnext(uv)) == uv && uvnext(uvprev(uv)) == uv, + "broken upvalue chain"); if (isgray(obj2gco(uv))) gc_marktv(g, uvval(uv)); } @@ -196,7 +211,7 @@ static int gc_traverse_tab(global_State *g, GCtab *t) for (i = 0; i <= hmask; i++) { Node *n = &node[i]; if (!tvisnil(&n->val)) { /* Mark non-empty slot. */ - lua_assert(!tvisnil(&n->key)); + lj_assertG(!tvisnil(&n->key), "mark of nil key in non-empty slot"); if (!(weak & LJ_GC_WEAKKEY)) gc_marktv(g, &n->key); if (!(weak & LJ_GC_WEAKVAL)) gc_marktv(g, &n->val); } @@ -211,7 +226,8 @@ static void gc_traverse_func(global_State *g, GCfunc *fn) gc_markobj(g, tabref(fn->c.env)); if (isluafunc(fn)) { uint32_t i; - lua_assert(fn->l.nupvalues <= funcproto(fn)->sizeuv); + lj_assertG(fn->l.nupvalues <= funcproto(fn)->sizeuv, + "function upvalues out of range"); gc_markobj(g, funcproto(fn)); for (i = 0; i < fn->l.nupvalues; i++) /* Mark Lua function upvalues. */ gc_markobj(g, &gcref(fn->l.uvptr[i])->uv); @@ -227,7 +243,7 @@ static void gc_traverse_func(global_State *g, GCfunc *fn) static void gc_marktrace(global_State *g, TraceNo traceno) { GCobj *o = obj2gco(traceref(G2J(g), traceno)); - lua_assert(traceno != G2J(g)->cur.traceno); + lj_assertG(traceno != G2J(g)->cur.traceno, "active trace escaped"); if (iswhite(o)) { white2gray(o); setgcrefr(o->gch.gclist, g->gc.gray); @@ -244,6 +260,8 @@ static void gc_traverse_trace(global_State *g, GCtrace *T) IRIns *ir = &T->ir[ref]; if (ir->o == IR_KGC) gc_markobj(g, ir_kgc(ir)); + if (irt_is64(ir->t) && ir->o != IR_KNULL) + ref++; } if (T->link) gc_marktrace(g, T->link); if (T->nextroot) gc_marktrace(g, T->nextroot); @@ -274,12 +292,12 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th) { TValue *frame, *top = th->top-1, *bot = tvref(th->stack); /* Note: extra vararg frame not skipped, marks function twice (harmless). */ - for (frame = th->base-1; frame > bot; frame = frame_prev(frame)) { + for (frame = th->base-1; frame > bot+LJ_FR2; frame = frame_prev(frame)) { GCfunc *fn = frame_func(frame); TValue *ftop = frame; if (isluafunc(fn)) ftop += funcproto(fn)->framesize; if (ftop > top) top = ftop; - gc_markobj(g, fn); /* Need to mark hidden function (or L). */ + if (!LJ_FR2) gc_markobj(g, fn); /* Need to mark hidden function (or L). */ } top++; /* Correct bias of -1 (frame == base-1). */ if (top > tvref(th->maxstack)) top = tvref(th->maxstack); @@ -290,7 +308,7 @@ static MSize gc_traverse_frames(global_State *g, lua_State *th) static void gc_traverse_thread(global_State *g, lua_State *th) { TValue *o, *top = th->top; - for (o = tvref(th->stack)+1; o < top; o++) + for (o = tvref(th->stack)+1+LJ_FR2; o < top; o++) gc_marktv(g, o); if (g->gc.state == GCSatomic) { top = tvref(th->stack) + th->stacksize; @@ -306,7 +324,7 @@ static size_t propagatemark(global_State *g) { GCobj *o = gcref(g->gc.gray); int gct = o->gch.gct; - lua_assert(isgray(o)); + lj_assertG(isgray(o), "propagation of non-gray object"); gray2black(o); setgcrefr(g->gc.gray, o->gch.gclist); /* Remove from gray list. */ if (LJ_LIKELY(gct == ~LJ_TTAB)) { @@ -338,7 +356,7 @@ static size_t propagatemark(global_State *g) return ((sizeof(GCtrace)+7)&~7) + (T->nins-T->nk)*sizeof(IRIns) + T->nsnap*sizeof(SnapShot) + T->nsnapmap*sizeof(SnapEntry); #else - lua_assert(0); + lj_assertG(0, "bad GC type %d", gct); return 0; #endif } @@ -355,15 +373,6 @@ static size_t gc_propagate_gray(global_State *g) /* -- Sweep phase --------------------------------------------------------- */ -/* Try to shrink some common data structures. */ -static void gc_shrink(global_State *g, lua_State *L) -{ - if (g->strnum <= (g->strmask >> 2) && g->strmask > LJ_MIN_STRTAB*2-1) - lj_str_resize(L, g->strmask >> 1); /* Shrink string table. */ - if (g->tmpbuf.sz > LJ_MIN_SBUF*2) - lj_str_resizebuf(L, &g->tmpbuf, g->tmpbuf.sz >> 1); /* Shrink temp buf. */ -} - /* Type of GC free functions. */ typedef void (LJ_FASTCALL *GCFreeFunc)(global_State *g, GCobj *o); @@ -389,7 +398,7 @@ static const GCFreeFunc gc_freefunc[] = { }; /* Full sweep of a GC list. */ -#define gc_fullsweep(g, p) gc_sweep(g, (p), LJ_MAX_MEM) +#define gc_fullsweep(g, p) gc_sweep(g, (p), ~(uint32_t)0) /* Partial sweep of a GC list. */ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) @@ -401,11 +410,13 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) if (o->gch.gct == ~LJ_TTHREAD) /* Need to sweep open upvalues, too. */ gc_fullsweep(g, &gco2th(o)->openupval); if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */ - lua_assert(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED)); + lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED), + "sweep of undead object"); makewhite(g, o); /* Value is alive, change to the current white. */ p = &o->gch.nextgc; } else { /* Otherwise value is dead, free it. */ - lua_assert(isdead(g, o) || ow == LJ_GC_SFIXED); + lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED, + "sweep of unlive object"); setgcrefr(*p, o->gch.nextgc); if (o == gcref(g->gc.root)) setgcrefr(g->gc.root, o->gch.nextgc); /* Adjust list anchor. */ @@ -415,6 +426,32 @@ static GCRef *gc_sweep(global_State *g, GCRef *p, uint32_t lim) return p; } +/* Sweep one string interning table chain. Preserves hashalg bit. */ +static void gc_sweepstr(global_State *g, GCRef *chain) +{ + /* Mask with other white and LJ_GC_FIXED. Or LJ_GC_SFIXED on shutdown. */ + int ow = otherwhite(g); + uintptr_t u = gcrefu(*chain); + GCRef q; + GCRef *p = &q; + GCobj *o; + setgcrefp(q, (u & ~(uintptr_t)1)); + while ((o = gcref(*p)) != NULL) { + if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* Black or current white? */ + lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED), + "sweep of undead string"); + makewhite(g, o); /* String is alive, change to the current white. */ + p = &o->gch.nextgc; + } else { /* Otherwise string is dead, free it. */ + lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED, + "sweep of unlive string"); + setgcrefr(*p, o->gch.nextgc); + lj_str_free(g, gco2str(o)); + } + } + setgcrefp(*chain, (gcrefu(q) | (u & 1))); +} + /* Check whether we can clear a key or a value slot from a table. */ static int gc_mayclear(cTValue *o, int val) { @@ -432,11 +469,12 @@ static int gc_mayclear(cTValue *o, int val) } /* Clear collected entries from weak tables. */ -static void gc_clearweak(GCobj *o) +static void gc_clearweak(global_State *g, GCobj *o) { + UNUSED(g); while (o) { GCtab *t = gco2tab(o); - lua_assert((t->marked & LJ_GC_WEAK)); + lj_assertG((t->marked & LJ_GC_WEAK), "clear of non-weak table"); if ((t->marked & LJ_GC_WEAKVAL)) { MSize i, asize = t->asize; for (i = 0; i < asize; i++) { @@ -467,18 +505,21 @@ static void gc_call_finalizer(global_State *g, lua_State *L, { /* Save and restore lots of state around the __gc callback. */ uint8_t oldh = hook_save(g); - MSize oldt = g->gc.threshold; + GCSize oldt = g->gc.threshold; int errcode; TValue *top; lj_trace_abort(g); - top = L->top; - L->top = top+2; hook_entergc(g); /* Disable hooks and new traces during __gc. */ + if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g); g->gc.threshold = LJ_MAX_MEM; /* Prevent GC steps. */ - copyTV(L, top, mo); - setgcV(L, top+1, o, ~o->gch.gct); - errcode = lj_vm_pcall(L, top+1, 1+0, -1); /* Stack: |mo|o| -> | */ + top = L->top; + copyTV(L, top++, mo); + if (LJ_FR2) setnilV(top++); + setgcV(L, top, o, ~o->gch.gct); + L->top = top+1; + errcode = lj_vm_pcall(L, top, 1+0, -1); /* Stack: |mo|o| -> | */ hook_restore(g, oldh); + if (LJ_HASPROFILE && (oldh & HOOK_PROFILE)) lj_dispatch_update(g); g->gc.threshold = oldt; /* Restore GC threshold. */ if (errcode) lj_err_throw(L, errcode); /* Propagate errors. */ @@ -490,7 +531,7 @@ static void gc_finalize(lua_State *L) global_State *g = G(L); GCobj *o = gcnext(gcref(g->gc.mmudata)); cTValue *mo; - lua_assert(gcref(g->jit_L) == NULL); /* Must not be called on trace. */ + lj_assertG(tvref(g->jit_base) == NULL, "finalizer called on trace"); /* Unchain from list of userdata to be finalized. */ if (o == gcref(g->gc.mmudata)) setgcrefnull(g->gc.mmudata); @@ -565,9 +606,9 @@ void lj_gc_freeall(global_State *g) /* Free everything, except super-fixed objects (the main thread). */ g->gc.currentwhite = LJ_GC_WHITES | LJ_GC_SFIXED; gc_fullsweep(g, &g->gc.root); - strmask = g->strmask; + strmask = g->str.mask; for (i = 0; i <= strmask; i++) /* Free all string hash chains. */ - gc_fullsweep(g, &g->strhash[i]); + gc_sweepstr(g, &g->str.tab[i]); } /* -- Collector ----------------------------------------------------------- */ @@ -582,7 +623,7 @@ static void atomic(global_State *g, lua_State *L) setgcrefr(g->gc.gray, g->gc.weak); /* Empty the list of weak tables. */ setgcrefnull(g->gc.weak); - lua_assert(!iswhite(obj2gco(mainthread(g)))); + lj_assertG(!iswhite(obj2gco(mainthread(g))), "main thread turned white"); gc_markobj(g, L); /* Mark running thread. */ gc_traverse_curtrace(g); /* Traverse current trace. */ gc_mark_gcroot(g); /* Mark GC roots (again). */ @@ -597,13 +638,15 @@ static void atomic(global_State *g, lua_State *L) udsize += gc_propagate_gray(g); /* And propagate the marks. */ /* All marking done, clear weak tables. */ - gc_clearweak(gcref(g->gc.weak)); + gc_clearweak(g, gcref(g->gc.weak)); + + lj_buf_shrink(L, &g->tmpbuf); /* Shrink temp buffer. */ /* Prepare for sweep phase. */ g->gc.currentwhite = (uint8_t)otherwhite(g); /* Flip current white. */ g->strempty.marked = g->gc.currentwhite; setmref(g->gc.sweep, &g->gc.root); - g->gc.estimate = g->gc.total - (MSize)udsize; /* Initial estimate. */ + g->gc.estimate = g->gc.total - (GCSize)udsize; /* Initial estimate. */ } /* GC state machine. Returns a cost estimate for each step performed. */ @@ -620,28 +663,29 @@ static size_t gc_onestep(lua_State *L) g->gc.state = GCSatomic; /* End of mark phase. */ return 0; case GCSatomic: - if (gcref(g->jit_L)) /* Don't run atomic phase on trace. */ + if (tvref(g->jit_base)) /* Don't run atomic phase on trace. */ return LJ_MAX_MEM; atomic(g, L); g->gc.state = GCSsweepstring; /* Start of sweep phase. */ g->gc.sweepstr = 0; return 0; case GCSsweepstring: { - MSize old = g->gc.total; - gc_fullsweep(g, &g->strhash[g->gc.sweepstr++]); /* Sweep one chain. */ - if (g->gc.sweepstr > g->strmask) + GCSize old = g->gc.total; + gc_sweepstr(g, &g->str.tab[g->gc.sweepstr++]); /* Sweep one chain. */ + if (g->gc.sweepstr > g->str.mask) g->gc.state = GCSsweep; /* All string hash chains sweeped. */ - lua_assert(old >= g->gc.total); + lj_assertG(old >= g->gc.total, "sweep increased memory"); g->gc.estimate -= old - g->gc.total; return GCSWEEPCOST; } case GCSsweep: { - MSize old = g->gc.total; + GCSize old = g->gc.total; setmref(g->gc.sweep, gc_sweep(g, mref(g->gc.sweep, GCRef), GCSWEEPMAX)); - lua_assert(old >= g->gc.total); + lj_assertG(old >= g->gc.total, "sweep increased memory"); g->gc.estimate -= old - g->gc.total; if (gcref(*mref(g->gc.sweep, GCRef)) == NULL) { - gc_shrink(g, L); + if (g->str.num <= (g->str.mask >> 2) && g->str.mask > LJ_MIN_STRTAB*2-1) + lj_str_resize(L, g->str.mask >> 1); /* Shrink string table. */ if (gcref(g->gc.mmudata)) { /* Need any finalizations? */ g->gc.state = GCSfinalize; #if LJ_HASFFI @@ -656,9 +700,12 @@ static size_t gc_onestep(lua_State *L) } case GCSfinalize: if (gcref(g->gc.mmudata) != NULL) { - if (gcref(g->jit_L)) /* Don't call finalizers on trace. */ + GCSize old = g->gc.total; + if (tvref(g->jit_base)) /* Don't call finalizers on trace. */ return LJ_MAX_MEM; gc_finalize(L); /* Finalize one userdata object. */ + if (old >= g->gc.total && g->gc.estimate > old - g->gc.total) + g->gc.estimate -= old - g->gc.total; if (g->gc.estimate > GCFINALIZECOST) g->gc.estimate -= GCFINALIZECOST; return GCFINALIZECOST; @@ -670,7 +717,7 @@ static size_t gc_onestep(lua_State *L) g->gc.debt = 0; return 0; default: - lua_assert(0); + lj_assertG(0, "bad GC state"); return 0; } } @@ -679,7 +726,7 @@ static size_t gc_onestep(lua_State *L) int LJ_FASTCALL lj_gc_step(lua_State *L) { global_State *g = G(L); - MSize lim; + GCSize lim; int32_t ostate = g->vmstate; setvmstate(g, GC); lim = (GCSTEPSIZE/100) * g->gc.stepmul; @@ -688,13 +735,13 @@ int LJ_FASTCALL lj_gc_step(lua_State *L) if (g->gc.total > g->gc.threshold) g->gc.debt += g->gc.total - g->gc.threshold; do { - lim -= (MSize)gc_onestep(L); + lim -= (GCSize)gc_onestep(L); if (g->gc.state == GCSpause) { g->gc.threshold = (g->gc.estimate/100) * g->gc.pause; g->vmstate = ostate; return 1; /* Finished a GC cycle. */ } - } while ((int32_t)lim > 0); + } while (sizeof(lim) == 8 ? ((int64_t)lim > 0) : ((int32_t)lim > 0)); if (g->gc.debt < GCSTEPSIZE) { g->gc.threshold = g->gc.total + GCSTEPSIZE; g->vmstate = ostate; @@ -718,8 +765,8 @@ void LJ_FASTCALL lj_gc_step_fixtop(lua_State *L) /* Perform multiple GC steps. Called from JIT-compiled code. */ int LJ_FASTCALL lj_gc_step_jit(global_State *g, MSize steps) { - lua_State *L = gco2th(gcref(g->jit_L)); - L->base = mref(G(L)->jit_base, TValue); + lua_State *L = gco2th(gcref(g->cur_L)); + L->base = tvref(G(L)->jit_base); L->top = curr_topL(L); while (steps-- > 0 && lj_gc_step(L) == 0) ; @@ -744,7 +791,8 @@ void lj_gc_fullgc(lua_State *L) } while (g->gc.state == GCSsweepstring || g->gc.state == GCSsweep) gc_onestep(L); /* Finish sweep. */ - lua_assert(g->gc.state == GCSfinalize || g->gc.state == GCSpause); + lj_assertG(g->gc.state == GCSfinalize || g->gc.state == GCSpause, + "bad GC state"); /* Now perform a full GC. */ g->gc.state = GCSpause; do { gc_onestep(L); } while (g->gc.state != GCSpause); @@ -757,9 +805,11 @@ void lj_gc_fullgc(lua_State *L) /* Move the GC propagation frontier forward. */ void lj_gc_barrierf(global_State *g, GCobj *o, GCobj *v) { - lua_assert(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o)); - lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); - lua_assert(o->gch.gct != ~LJ_TTAB); + lj_assertG(isblack(o) && iswhite(v) && !isdead(g, v) && !isdead(g, o), + "bad object states for forward barrier"); + lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause, + "bad GC state"); + lj_assertG(o->gch.gct != ~LJ_TTAB, "barrier object is not a table"); /* Preserve invariant during propagation. Otherwise it doesn't matter. */ if (g->gc.state == GCSpropagate || g->gc.state == GCSatomic) gc_mark(g, v); /* Move frontier forward. */ @@ -796,7 +846,8 @@ void lj_gc_closeuv(global_State *g, GCupval *uv) lj_gc_barrierf(g, o, gcV(&uv->tv)); } else { makewhite(g, o); /* Make it white, i.e. sweep the upvalue. */ - lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); + lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause, + "bad GC state"); } } } @@ -813,27 +864,29 @@ void lj_gc_barriertrace(global_State *g, uint32_t traceno) /* -- Allocator ----------------------------------------------------------- */ /* Call pluggable memory allocator to allocate or resize a fragment. */ -void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz) +void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz) { global_State *g = G(L); - lua_assert((osz == 0) == (p == NULL)); + lj_assertG((osz == 0) == (p == NULL), "realloc API violation"); p = g->allocf(g->allocd, p, osz, nsz); if (p == NULL && nsz > 0) lj_err_mem(L); - lua_assert((nsz == 0) == (p == NULL)); - lua_assert(checkptr32(p)); + lj_assertG((nsz == 0) == (p == NULL), "allocf API violation"); + lj_assertG(checkptrGC(p), + "allocated memory address %p outside required range", p); g->gc.total = (g->gc.total - osz) + nsz; return p; } /* Allocate new GC object and link it to the root set. */ -void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size) +void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size) { global_State *g = G(L); GCobj *o = (GCobj *)g->allocf(g->allocd, NULL, 0, size); if (o == NULL) lj_err_mem(L); - lua_assert(checkptr32(o)); + lj_assertG(checkptrGC(o), + "allocated memory address %p outside required range", o); g->gc.total += size; setgcrefr(o->gch.nextgc, g->gc.root); setgcref(g->gc.root, o); diff --git a/src/lj_gc.h b/src/lj_gc.h index c211e072..0df7dee6 100644 --- a/src/lj_gc.h +++ b/src/lj_gc.h @@ -81,8 +81,10 @@ LJ_FUNC void lj_gc_barriertrace(global_State *g, uint32_t traceno); static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t) { GCobj *o = obj2gco(t); - lua_assert(isblack(o) && !isdead(g, o)); - lua_assert(g->gc.state != GCSfinalize && g->gc.state != GCSpause); + lj_assertG(isblack(o) && !isdead(g, o), + "bad object states for backward barrier"); + lj_assertG(g->gc.state != GCSfinalize && g->gc.state != GCSpause, + "bad GC state"); black2gray(o); setgcrefr(t->gclist, g->gc.grayagain); setgcref(g->gc.grayagain, o); @@ -107,8 +109,8 @@ static LJ_AINLINE void lj_gc_barrierback(global_State *g, GCtab *t) lj_gc_barrierf(G(L), obj2gco(p), obj2gco(o)); } /* Allocator. */ -LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, MSize osz, MSize nsz); -LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, MSize size); +LJ_FUNC void *lj_mem_realloc(lua_State *L, void *p, GCSize osz, GCSize nsz); +LJ_FUNC void * LJ_FASTCALL lj_mem_newgco(lua_State *L, GCSize size); LJ_FUNC void *lj_mem_grow(lua_State *L, void *p, MSize *szp, MSize lim, MSize esz); @@ -116,13 +118,13 @@ LJ_FUNC void *lj_mem_grow(lua_State *L, void *p, static LJ_AINLINE void lj_mem_free(global_State *g, void *p, size_t osize) { - g->gc.total -= (MSize)osize; + g->gc.total -= (GCSize)osize; g->allocf(g->allocd, p, osize, 0); } -#define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (MSize)((n)*sizeof(t)))) +#define lj_mem_newvec(L, n, t) ((t *)lj_mem_new(L, (GCSize)((n)*sizeof(t)))) #define lj_mem_reallocvec(L, p, on, n, t) \ - ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (MSize)((n)*sizeof(t)))) + ((p) = (t *)lj_mem_realloc(L, p, (on)*sizeof(t), (GCSize)((n)*sizeof(t)))) #define lj_mem_growvec(L, p, n, m, t) \ ((p) = (t *)lj_mem_grow(L, (p), &(n), (m), (MSize)sizeof(t))) #define lj_mem_freevec(g, p, n, t) lj_mem_free(g, (p), (n)*sizeof(t)) diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c index e4b68375..c50d0d4c 100644 --- a/src/lj_gdbjit.c +++ b/src/lj_gdbjit.c @@ -14,6 +14,8 @@ #include "lj_err.h" #include "lj_debug.h" #include "lj_frame.h" +#include "lj_buf.h" +#include "lj_strfmt.h" #include "lj_jit.h" #include "lj_dispatch.h" @@ -294,6 +296,9 @@ enum { #elif LJ_TARGET_ARM DW_REG_SP = 13, DW_REG_RA = 14, +#elif LJ_TARGET_ARM64 + DW_REG_SP = 31, + DW_REG_RA = 30, #elif LJ_TARGET_PPC DW_REG_SP = 1, DW_REG_RA = 65, @@ -358,7 +363,7 @@ static const ELFheader elfhdr_template = { .eosabi = 12, #elif defined(__DragonFly__) .eosabi = 0, -#elif (defined(__sun__) && defined(__svr4__)) +#elif LJ_TARGET_SOLARIS .eosabi = 6, #else .eosabi = 0, @@ -372,6 +377,8 @@ static const ELFheader elfhdr_template = { .machine = 62, #elif LJ_TARGET_ARM .machine = 40, +#elif LJ_TARGET_ARM64 + .machine = 183, #elif LJ_TARGET_PPC .machine = 20, #elif LJ_TARGET_MIPS @@ -428,16 +435,6 @@ static void gdbjit_catnum(GDBJITctx *ctx, uint32_t n) *ctx->p++ = '0' + n; } -/* Add a ULEB128 value. */ -static void gdbjit_uleb128(GDBJITctx *ctx, uint32_t v) -{ - uint8_t *p = ctx->p; - for (; v >= 0x80; v >>= 7) - *p++ = (uint8_t)((v & 0x7f) | 0x80); - *p++ = (uint8_t)v; - ctx->p = p; -} - /* Add a SLEB128 value. */ static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v) { @@ -454,7 +451,7 @@ static void gdbjit_sleb128(GDBJITctx *ctx, int32_t v) #define DU16(x) (*(uint16_t *)p = (x), p += 2) #define DU32(x) (*(uint32_t *)p = (x), p += 4) #define DADDR(x) (*(uintptr_t *)p = (x), p += sizeof(uintptr_t)) -#define DUV(x) (ctx->p = p, gdbjit_uleb128(ctx, (x)), p = ctx->p) +#define DUV(x) (p = (uint8_t *)lj_strfmt_wuleb128((char *)p, (x))) #define DSV(x) (ctx->p = p, gdbjit_sleb128(ctx, (x)), p = ctx->p) #define DSTR(str) (ctx->p = p, gdbjit_strz(ctx, (str)), p = ctx->p) #define DALIGNNOP(s) while ((uintptr_t)p & ((s)-1)) *p++ = DW_CFA_nop @@ -564,13 +561,20 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx) DB(DW_CFA_offset|DW_REG_15); DUV(4); DB(DW_CFA_offset|DW_REG_14); DUV(5); /* Extra registers saved for JIT-compiled code. */ - DB(DW_CFA_offset|DW_REG_13); DUV(9); - DB(DW_CFA_offset|DW_REG_12); DUV(10); + DB(DW_CFA_offset|DW_REG_13); DUV(LJ_GC64 ? 10 : 9); + DB(DW_CFA_offset|DW_REG_12); DUV(LJ_GC64 ? 11 : 10); #elif LJ_TARGET_ARM { int i; for (i = 11; i >= 4; i--) { DB(DW_CFA_offset|i); DUV(2+(11-i)); } } +#elif LJ_TARGET_ARM64 + { + int i; + DB(DW_CFA_offset|31); DUV(2); + for (i = 28; i >= 19; i--) { DB(DW_CFA_offset|i); DUV(3+(28-i)); } + for (i = 15; i >= 8; i--) { DB(DW_CFA_offset|32|i); DUV(28-i); } + } #elif LJ_TARGET_PPC { int i; @@ -720,13 +724,27 @@ static void gdbjit_buildobj(GDBJITctx *ctx) SECTALIGN(ctx->p, sizeof(uintptr_t)); gdbjit_initsect(ctx, GDBJIT_SECT_eh_frame, gdbjit_ehframe); ctx->objsize = (size_t)((char *)ctx->p - (char *)obj); - lua_assert(ctx->objsize < sizeof(GDBJITobj)); + lj_assertX(ctx->objsize < sizeof(GDBJITobj), "GDBJITobj overflow"); } #undef SECTALIGN /* -- Interface to GDB JIT API -------------------------------------------- */ +static int gdbjit_lock; + +static void gdbjit_lock_acquire() +{ + while (__sync_lock_test_and_set(&gdbjit_lock, 1)) { + /* Just spin; futexes or pthreads aren't worth the portability cost. */ + } +} + +static void gdbjit_lock_release() +{ + __sync_lock_release(&gdbjit_lock); +} + /* Add new entry to GDB JIT symbol chain. */ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) { @@ -738,6 +756,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) ctx->T->gdbjit_entry = (void *)eo; /* Link new entry to chain and register it. */ eo->entry.prev_entry = NULL; + gdbjit_lock_acquire(); eo->entry.next_entry = __jit_debug_descriptor.first_entry; if (eo->entry.next_entry) eo->entry.next_entry->prev_entry = &eo->entry; @@ -747,6 +766,7 @@ static void gdbjit_newentry(lua_State *L, GDBJITctx *ctx) __jit_debug_descriptor.relevant_entry = &eo->entry; __jit_debug_descriptor.action_flag = GDBJIT_REGISTER; __jit_debug_register_code(); + gdbjit_lock_release(); } /* Add debug info for newly compiled trace and notify GDB. */ @@ -762,7 +782,8 @@ void lj_gdbjit_addtrace(jit_State *J, GCtrace *T) ctx.spadjp = CFRAME_SIZE_JIT + (MSize)(parent ? traceref(J, parent)->spadjust : 0); ctx.spadj = CFRAME_SIZE_JIT + T->spadjust; - lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc); + lj_assertJ(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc, + "start PC out of range"); ctx.lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); ctx.filename = proto_chunknamestr(pt); if (*ctx.filename == '@' || *ctx.filename == '=') @@ -778,6 +799,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T) { GDBJITentryobj *eo = (GDBJITentryobj *)T->gdbjit_entry; if (eo) { + gdbjit_lock_acquire(); if (eo->entry.prev_entry) eo->entry.prev_entry->next_entry = eo->entry.next_entry; else @@ -787,6 +809,7 @@ void lj_gdbjit_deltrace(jit_State *J, GCtrace *T) __jit_debug_descriptor.relevant_entry = &eo->entry; __jit_debug_descriptor.action_flag = GDBJIT_UNREGISTER; __jit_debug_register_code(); + gdbjit_lock_release(); lj_mem_free(J2G(J), eo, eo->sz); } } diff --git a/src/lj_ir.c b/src/lj_ir.c index b2846680..65901510 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c @@ -15,6 +15,7 @@ #if LJ_HASJIT #include "lj_gc.h" +#include "lj_buf.h" #include "lj_str.h" #include "lj_tab.h" #include "lj_ir.h" @@ -29,14 +30,16 @@ #endif #include "lj_vm.h" #include "lj_strscan.h" -#include "lj_lib.h" +#include "lj_serialize.h" +#include "lj_strfmt.h" +#include "lj_prng.h" /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) #define fins (&J->fold.ins) /* Pass IR on to next optimization in chain (FOLD). */ -#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) +#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) /* -- IR tables ----------------------------------------------------------- */ @@ -88,8 +91,9 @@ static void lj_ir_growbot(jit_State *J) { IRIns *baseir = J->irbuf + J->irbotlim; MSize szins = J->irtoplim - J->irbotlim; - lua_assert(szins != 0); - lua_assert(J->cur.nk == J->irbotlim); + lj_assertJ(szins != 0, "zero IR size"); + lj_assertJ(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim, + "unexpected IR growth"); if (J->cur.nins + (szins >> 1) < J->irtoplim) { /* More than half of the buffer is free on top: shift up by a quarter. */ MSize ofs = szins >> 2; @@ -143,6 +147,17 @@ TRef lj_ir_call(jit_State *J, IRCallID id, ...) return emitir(CCI_OPTYPE(ci), tr, id); } +/* Load field of type t from GG_State + offset. Must be 32 bit aligned. */ +TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs) +{ + lj_assertJ((ofs & 3) == 0, "unaligned GG_State field offset"); + ofs >>= 2; + lj_assertJ(ofs >= IRFL__MAX && ofs <= 0x3ff, + "GG_State field offset breaks 10 bit FOLD key limit"); + lj_ir_set(J, IRT(IR_FLOAD, t), REF_NIL, ofs); + return lj_opt_fold(J); +} + /* -- Interning of constants ---------------------------------------------- */ /* @@ -163,6 +178,24 @@ static LJ_AINLINE IRRef ir_nextk(jit_State *J) return ref; } +/* Get ref of next 64 bit IR constant and optionally grow IR. +** Note: this may invalidate all IRIns *! +*/ +static LJ_AINLINE IRRef ir_nextk64(jit_State *J) +{ + IRRef ref = J->cur.nk - 2; + lj_assertJ(J->state != LJ_TRACE_ASM, "bad JIT state"); + if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J); + J->cur.nk = ref; + return ref; +} + +#if LJ_GC64 +#define ir_nextkgc ir_nextk64 +#else +#define ir_nextkgc ir_nextk +#endif + /* Intern int32_t constant. */ TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k) { @@ -182,79 +215,21 @@ found: return TREF(ref, IRT_INT); } -/* The MRef inside the KNUM/KINT64 IR instructions holds the address of the -** 64 bit constant. The constants themselves are stored in a chained array -** and shared across traces. -** -** Rationale for choosing this data structure: -** - The address of the constants is embedded in the generated machine code -** and must never move. A resizable array or hash table wouldn't work. -** - Most apps need very few non-32 bit integer constants (less than a dozen). -** - Linear search is hard to beat in terms of speed and low complexity. -*/ -typedef struct K64Array { - MRef next; /* Pointer to next list. */ - MSize numk; /* Number of used elements in this array. */ - TValue k[LJ_MIN_K64SZ]; /* Array of constants. */ -} K64Array; - -/* Free all chained arrays. */ -void lj_ir_k64_freeall(jit_State *J) -{ - K64Array *k; - for (k = mref(J->k64, K64Array); k; ) { - K64Array *next = mref(k->next, K64Array); - lj_mem_free(J2G(J), k, sizeof(K64Array)); - k = next; - } -} - -/* Find 64 bit constant in chained array or add it. */ -cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64) -{ - K64Array *k, *kp = NULL; - TValue *ntv; - MSize idx; - /* Search for the constant in the whole chain of arrays. */ - for (k = mref(J->k64, K64Array); k; k = mref(k->next, K64Array)) { - kp = k; /* Remember previous element in list. */ - for (idx = 0; idx < k->numk; idx++) { /* Search one array. */ - TValue *tv = &k->k[idx]; - if (tv->u64 == u64) /* Needed for +-0/NaN/absmask. */ - return tv; - } - } - /* Constant was not found, need to add it. */ - if (!(kp && kp->numk < LJ_MIN_K64SZ)) { /* Allocate a new array. */ - K64Array *kn = lj_mem_newt(J->L, sizeof(K64Array), K64Array); - setmref(kn->next, NULL); - kn->numk = 0; - if (kp) - setmref(kp->next, kn); /* Chain to the end of the list. */ - else - setmref(J->k64, kn); /* Link first array. */ - kp = kn; - } - ntv = &kp->k[kp->numk++]; /* Add to current array. */ - ntv->u64 = u64; - return ntv; -} - -/* Intern 64 bit constant, given by its address. */ -TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv) +/* Intern 64 bit constant, given by its 64 bit pattern. */ +TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64) { IRIns *ir, *cir = J->cur.ir; IRRef ref; IRType t = op == IR_KNUM ? IRT_NUM : IRT_I64; for (ref = J->chain[op]; ref; ref = cir[ref].prev) - if (ir_k64(&cir[ref]) == tv) + if (ir_k64(&cir[ref])->u64 == u64) goto found; - ref = ir_nextk(J); + ref = ir_nextk64(J); ir = IR(ref); - lua_assert(checkptr32(tv)); - setmref(ir->ptr, tv); + ir[1].tv.u64 = u64; ir->t.irt = t; ir->o = op; + ir->op12 = 0; ir->prev = J->chain[op]; J->chain[op] = (IRRef1)ref; found: @@ -264,13 +239,13 @@ found: /* Intern FP constant, given by its 64 bit pattern. */ TRef lj_ir_knum_u64(jit_State *J, uint64_t u64) { - return lj_ir_k64(J, IR_KNUM, lj_ir_k64_find(J, u64)); + return lj_ir_k64(J, IR_KNUM, u64); } /* Intern 64 bit integer constant. */ TRef lj_ir_kint64(jit_State *J, uint64_t u64) { - return lj_ir_k64(J, IR_KINT64, lj_ir_k64_find(J, u64)); + return lj_ir_k64(J, IR_KINT64, u64); } /* Check whether a number is int and return it. -0 is NOT considered an int. */ @@ -305,14 +280,15 @@ TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t) { IRIns *ir, *cir = J->cur.ir; IRRef ref; - lua_assert(!isdead(J2G(J), o)); + lj_assertJ(!isdead(J2G(J), o), "interning of dead GC object"); for (ref = J->chain[IR_KGC]; ref; ref = cir[ref].prev) if (ir_kgc(&cir[ref]) == o) goto found; - ref = ir_nextk(J); + ref = ir_nextkgc(J); ir = IR(ref); /* NOBARRIER: Current trace is a GC root. */ - setgcref(ir->gcr, o); + ir->op12 = 0; + setgcref(ir[LJ_GC64].gcr, o); ir->t.irt = (uint8_t)t; ir->o = IR_KGC; ir->prev = J->chain[IR_KGC]; @@ -321,24 +297,44 @@ found: return TREF(ref, t); } -/* Intern 32 bit pointer constant. */ +/* Allocate GCtrace constant placeholder (no interning). */ +TRef lj_ir_ktrace(jit_State *J) +{ + IRRef ref = ir_nextkgc(J); + IRIns *ir = IR(ref); + lj_assertJ(irt_toitype_(IRT_P64) == LJ_TTRACE, "mismatched type mapping"); + ir->t.irt = IRT_P64; + ir->o = LJ_GC64 ? IR_KNUM : IR_KNULL; /* Not IR_KGC yet, but same size. */ + ir->op12 = 0; + ir->prev = 0; + return TREF(ref, IRT_P64); +} + +/* Intern pointer constant. */ TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr) { IRIns *ir, *cir = J->cur.ir; IRRef ref; - lua_assert((void *)(intptr_t)i32ptr(ptr) == ptr); +#if LJ_64 && !LJ_GC64 + lj_assertJ((void *)(uintptr_t)u32ptr(ptr) == ptr, "out-of-range GC pointer"); +#endif for (ref = J->chain[op]; ref; ref = cir[ref].prev) - if (mref(cir[ref].ptr, void) == ptr) + if (ir_kptr(&cir[ref]) == ptr) goto found; +#if LJ_GC64 + ref = ir_nextk64(J); +#else ref = ir_nextk(J); +#endif ir = IR(ref); - setmref(ir->ptr, ptr); - ir->t.irt = IRT_P32; + ir->op12 = 0; + setmref(ir[LJ_GC64].ptr, ptr); + ir->t.irt = IRT_PGC; ir->o = op; ir->prev = J->chain[op]; J->chain[op] = (IRRef1)ref; found: - return TREF(ref, IRT_P32); + return TREF(ref, IRT_PGC); } /* Intern typed NULL constant. */ @@ -367,7 +363,8 @@ TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot) IRRef2 op12 = IRREF2((IRRef1)key, (IRRef1)slot); IRRef ref; /* Const part is not touched by CSE/DCE, so 0-65535 is ok for IRMlit here. */ - lua_assert(tref_isk(key) && slot == (IRRef)(IRRef1)slot); + lj_assertJ(tref_isk(key) && slot == (IRRef)(IRRef1)slot, + "out-of-range key/slot"); for (ref = J->chain[IR_KSLOT]; ref; ref = cir[ref].prev) if (cir[ref].op12 == op12) goto found; @@ -388,14 +385,15 @@ found: void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) { UNUSED(L); - lua_assert(ir->o != IR_KSLOT); /* Common mistake. */ + lj_assertL(ir->o != IR_KSLOT, "unexpected KSLOT"); /* Common mistake. */ switch (ir->o) { - case IR_KPRI: setitype(tv, irt_toitype(ir->t)); break; + case IR_KPRI: setpriV(tv, irt_toitype(ir->t)); break; case IR_KINT: setintV(tv, ir->i); break; case IR_KGC: setgcV(L, tv, ir_kgc(ir), irt_toitype(ir->t)); break; - case IR_KPTR: case IR_KKPTR: case IR_KNULL: - setlightudV(tv, mref(ir->ptr, void)); + case IR_KPTR: case IR_KKPTR: + setnumV(tv, (lua_Number)(uintptr_t)ir_kptr(ir)); break; + case IR_KNULL: setintV(tv, 0); break; case IR_KNUM: setnumV(tv, ir_knum(ir)->n); break; #if LJ_HASFFI case IR_KINT64: { @@ -405,7 +403,7 @@ void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir) break; } #endif - default: lua_assert(0); break; + default: lj_assertL(0, "bad IR constant op %d", ir->o); break; } } @@ -443,7 +441,8 @@ TRef LJ_FASTCALL lj_ir_tostr(jit_State *J, TRef tr) if (!tref_isstr(tr)) { if (!tref_isnumber(tr)) lj_trace_err(J, LJ_TRERR_BADTYPE); - tr = emitir(IRT(IR_TOSTR, IRT_STR), tr, 0); + tr = emitir(IRT(IR_TOSTR, IRT_STR), tr, + tref_isnum(tr) ? IRTOSTR_NUM : IRTOSTR_INT); } return tr; } @@ -464,7 +463,7 @@ int lj_ir_numcmp(lua_Number a, lua_Number b, IROp op) case IR_UGE: return !(a < b); case IR_ULE: return !(a > b); case IR_UGT: return !(a <= b); - default: lua_assert(0); return 0; + default: lj_assertX(0, "bad IR op %d", op); return 0; } } @@ -477,7 +476,7 @@ int lj_ir_strcmp(GCstr *a, GCstr *b, IROp op) case IR_GE: return (res >= 0); case IR_LE: return (res <= 0); case IR_GT: return (res > 0); - default: lua_assert(0); return 0; + default: lj_assertX(0, "bad IR op %d", op); return 0; } } diff --git a/src/lj_ir.h b/src/lj_ir.h index da73a4b7..ed492e93 100644 --- a/src/lj_ir.h +++ b/src/lj_ir.h @@ -40,6 +40,7 @@ _(USE, S , ref, ___) \ _(PHI, S , ref, ref) \ _(RENAME, S , ref, lit) \ + _(PROF, S , ___, ___) \ \ /* Constants. */ \ _(KPRI, N , ___, ___) \ @@ -74,7 +75,6 @@ _(NEG, N , ref, ref) \ \ _(ABS, N , ref, ref) \ - _(ATAN2, N , ref, ref) \ _(LDEXP, N , ref, ref) \ _(MIN, C , ref, ref) \ _(MAX, C , ref, ref) \ @@ -95,7 +95,9 @@ _(UREFO, LW, ref, lit) \ _(UREFC, LW, ref, lit) \ _(FREF, R , ref, lit) \ + _(TMPREF, S , ref, lit) \ _(STRREF, N , ref, ref) \ + _(LREF, L , ___, ___) \ \ /* Loads and Stores. These must be in the same order. */ \ _(ALOAD, L , ref, ___) \ @@ -104,7 +106,8 @@ _(FLOAD, L , ref, lit) \ _(XLOAD, L , ref, lit) \ _(SLOAD, L , lit, lit) \ - _(VLOAD, L , ref, ___) \ + _(VLOAD, L , ref, lit) \ + _(ALEN, L , ref, ref) \ \ _(ASTORE, S , ref, ref) \ _(HSTORE, S , ref, ref) \ @@ -120,6 +123,11 @@ _(CNEW, AW, ref, ref) \ _(CNEWI, NW, ref, ref) /* CSE is ok, not marked as A. */ \ \ + /* Buffer operations. */ \ + _(BUFHDR, L , ref, lit) \ + _(BUFPUT, LW, ref, ref) \ + _(BUFSTR, AW, ref, ref) \ + \ /* Barriers. */ \ _(TBAR, S , ref, ___) \ _(OBAR, S , ref, ref) \ @@ -128,12 +136,13 @@ /* Type conversions. */ \ _(CONV, N , ref, lit) \ _(TOBIT, N , ref, ref) \ - _(TOSTR, N , ref, ___) \ + _(TOSTR, N , ref, lit) \ _(STRTO, N , ref, ___) \ \ /* Calls. */ \ - _(CALLN, N , ref, lit) \ - _(CALLL, L , ref, lit) \ + _(CALLN, NW, ref, lit) \ + _(CALLA, AW, ref, lit) \ + _(CALLL, LW, ref, lit) \ _(CALLS, S , ref, lit) \ _(CALLXS, S , ref, ref) \ _(CARG, N , ref, ref) \ @@ -170,8 +179,7 @@ LJ_STATIC_ASSERT((int)IR_XLOAD + IRDELTA_L2S == (int)IR_XSTORE); /* FPMATH sub-functions. ORDER FPM. */ #define IRFPMDEF(_) \ _(FLOOR) _(CEIL) _(TRUNC) /* Must be first and in this order. */ \ - _(SQRT) _(EXP) _(EXP2) _(LOG) _(LOG2) _(LOG10) \ - _(SIN) _(COS) _(TAN) \ + _(SQRT) _(LOG) _(LOG2) \ _(OTHER) typedef enum { @@ -186,6 +194,8 @@ IRFPMDEF(FPMENUM) _(STR_LEN, offsetof(GCstr, len)) \ _(FUNC_ENV, offsetof(GCfunc, l.env)) \ _(FUNC_PC, offsetof(GCfunc, l.pc)) \ + _(FUNC_FFID, offsetof(GCfunc, l.ffid)) \ + _(THREAD_ENV, offsetof(lua_State, env)) \ _(TAB_META, offsetof(GCtab, metatable)) \ _(TAB_ARRAY, offsetof(GCtab, array)) \ _(TAB_NODE, offsetof(GCtab, node)) \ @@ -195,9 +205,15 @@ IRFPMDEF(FPMENUM) _(UDATA_META, offsetof(GCudata, metatable)) \ _(UDATA_UDTYPE, offsetof(GCudata, udtype)) \ _(UDATA_FILE, sizeof(GCudata)) \ + _(SBUF_W, sizeof(GCudata) + offsetof(SBufExt, w)) \ + _(SBUF_E, sizeof(GCudata) + offsetof(SBufExt, e)) \ + _(SBUF_B, sizeof(GCudata) + offsetof(SBufExt, b)) \ + _(SBUF_L, sizeof(GCudata) + offsetof(SBufExt, L)) \ + _(SBUF_REF, sizeof(GCudata) + offsetof(SBufExt, cowref)) \ + _(SBUF_R, sizeof(GCudata) + offsetof(SBufExt, r)) \ _(CDATA_CTYPEID, offsetof(GCcdata, ctypeid)) \ _(CDATA_PTR, sizeof(GCcdata)) \ - _(CDATA_INT, sizeof(GCcdata)) \ + _(CDATA_INT, sizeof(GCcdata)) \ _(CDATA_INT64, sizeof(GCcdata)) \ _(CDATA_INT64_4, sizeof(GCcdata) + 4) @@ -208,18 +224,29 @@ IRFLDEF(FLENUM) IRFL__MAX } IRFieldID; +/* TMPREF mode bits, stored in op2. */ +#define IRTMPREF_IN1 0x01 /* First input value. */ +#define IRTMPREF_OUT1 0x02 /* First output value. */ +#define IRTMPREF_OUT2 0x04 /* Second output value. */ + /* SLOAD mode bits, stored in op2. */ #define IRSLOAD_PARENT 0x01 /* Coalesce with parent trace. */ -#define IRSLOAD_FRAME 0x02 /* Load hiword of frame. */ +#define IRSLOAD_FRAME 0x02 /* Load 32 bits of ftsz. */ #define IRSLOAD_TYPECHECK 0x04 /* Needs type check. */ #define IRSLOAD_CONVERT 0x08 /* Number to integer conversion. */ #define IRSLOAD_READONLY 0x10 /* Read-only, omit slot store. */ #define IRSLOAD_INHERIT 0x20 /* Inherited by exits/side traces. */ +#define IRSLOAD_KEYINDEX 0x40 /* Table traversal key index. */ -/* XLOAD mode, stored in op2. */ -#define IRXLOAD_READONLY 1 /* Load from read-only data. */ -#define IRXLOAD_VOLATILE 2 /* Load from volatile data. */ -#define IRXLOAD_UNALIGNED 4 /* Unaligned load. */ +/* XLOAD mode bits, stored in op2. */ +#define IRXLOAD_READONLY 0x01 /* Load from read-only data. */ +#define IRXLOAD_VOLATILE 0x02 /* Load from volatile data. */ +#define IRXLOAD_UNALIGNED 0x04 /* Unaligned load. */ + +/* BUFHDR mode, stored in op2. */ +#define IRBUFHDR_RESET 0 /* Reset buffer. */ +#define IRBUFHDR_APPEND 1 /* Append to buffer. */ +#define IRBUFHDR_WRITE 2 /* Write to string buffer. */ /* CONV mode, stored in op2. */ #define IRCONV_SRCMASK 0x001f /* Source IRType. */ @@ -227,7 +254,6 @@ IRFLDEF(FLENUM) #define IRCONV_DSH 5 #define IRCONV_NUM_INT ((IRT_NUM<<IRCONV_DSH)|IRT_INT) #define IRCONV_INT_NUM ((IRT_INT<<IRCONV_DSH)|IRT_NUM) -#define IRCONV_TRUNC 0x0400 /* Truncate number to integer. */ #define IRCONV_SEXT 0x0800 /* Sign-extend integer to integer. */ #define IRCONV_MODEMASK 0x0fff #define IRCONV_CONVMASK 0xf000 @@ -237,6 +263,12 @@ IRFLDEF(FLENUM) #define IRCONV_ANY (1<<IRCONV_CSH) /* Any FP number is ok. */ #define IRCONV_INDEX (2<<IRCONV_CSH) /* Check + special backprop rules. */ #define IRCONV_CHECK (3<<IRCONV_CSH) /* Number checked for integerness. */ +#define IRCONV_NONE IRCONV_ANY /* INT|*64 no conv, but change type. */ + +/* TOSTR mode, stored in op2. */ +#define IRTOSTR_INT 0 /* Convert integer to string. */ +#define IRTOSTR_NUM 1 /* Convert number to string. */ +#define IRTOSTR_CHAR 2 /* Convert char value to string. */ /* -- IR operands --------------------------------------------------------- */ @@ -276,7 +308,9 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; /* -- IR instruction types ------------------------------------------------ */ -/* Map of itypes to non-negative numbers. ORDER LJ_T. +#define IRTSIZE_PGC (LJ_GC64 ? 8 : 4) + +/* Map of itypes to non-negative numbers and their sizes. ORDER LJ_T. ** LJ_TUPVAL/LJ_TTRACE never appear in a TValue. Use these itypes for ** IRT_P32 and IRT_P64, which never escape the IR. ** The various integers are only used in the IR and can only escape to @@ -284,12 +318,13 @@ LJ_DATA const uint8_t lj_ir_mode[IR__MAX+1]; ** contiguous and next to IRT_NUM (see the typerange macros below). */ #define IRTDEF(_) \ - _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) _(STR, 4) \ - _(P32, 4) _(THREAD, 4) _(PROTO, 4) _(FUNC, 4) _(P64, 8) _(CDATA, 4) \ - _(TAB, 4) _(UDATA, 4) \ + _(NIL, 4) _(FALSE, 4) _(TRUE, 4) _(LIGHTUD, LJ_64 ? 8 : 4) \ + _(STR, IRTSIZE_PGC) _(P32, 4) _(THREAD, IRTSIZE_PGC) _(PROTO, IRTSIZE_PGC) \ + _(FUNC, IRTSIZE_PGC) _(P64, 8) _(CDATA, IRTSIZE_PGC) _(TAB, IRTSIZE_PGC) \ + _(UDATA, IRTSIZE_PGC) \ _(FLOAT, 4) _(NUM, 8) _(I8, 1) _(U8, 1) _(I16, 2) _(U16, 2) \ _(INT, 4) _(U32, 4) _(I64, 8) _(U64, 8) \ - _(SOFTFP, 4) /* There is room for 9 more types. */ + _(SOFTFP, 4) /* There is room for 8 more types. */ /* IR result type and flags (8 bit). */ typedef enum { @@ -300,6 +335,8 @@ IRTDEF(IRTENUM) /* Native pointer type and the corresponding integer type. */ IRT_PTR = LJ_64 ? IRT_P64 : IRT_P32, + IRT_PGC = LJ_GC64 ? IRT_P64 : IRT_P32, + IRT_IGC = LJ_GC64 ? IRT_I64 : IRT_INT, IRT_INTP = LJ_64 ? IRT_I64 : IRT_INT, IRT_UINTP = LJ_64 ? IRT_U64 : IRT_U32, @@ -353,7 +390,14 @@ typedef struct IRType1 { uint8_t irt; } IRType1; #define irt_isaddr(t) (irt_typerange((t), IRT_LIGHTUD, IRT_UDATA)) #define irt_isint64(t) (irt_typerange((t), IRT_I64, IRT_U64)) -#if LJ_64 +#if LJ_GC64 +/* Include IRT_NIL, so IR(ASMREF_L) (aka REF_NIL) is considered 64 bit. */ +#define IRT_IS64 \ + ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|\ + (1u<<IRT_LIGHTUD)|(1u<<IRT_STR)|(1u<<IRT_THREAD)|(1u<<IRT_PROTO)|\ + (1u<<IRT_FUNC)|(1u<<IRT_CDATA)|(1u<<IRT_TAB)|(1u<<IRT_UDATA)|\ + (1u<<IRT_NIL)) +#elif LJ_64 #define IRT_IS64 \ ((1u<<IRT_NUM)|(1u<<IRT_I64)|(1u<<IRT_U64)|(1u<<IRT_P64)|(1u<<IRT_LIGHTUD)) #else @@ -374,7 +418,7 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv) return IRT_INT; else if (tvisnum(tv)) return IRT_NUM; -#if LJ_64 +#if LJ_64 && !LJ_GC64 else if (tvislightud(tv)) return IRT_LIGHTUD; #endif @@ -384,11 +428,12 @@ static LJ_AINLINE IRType itype2irt(const TValue *tv) static LJ_AINLINE uint32_t irt_toitype_(IRType t) { - lua_assert(!LJ_64 || t != IRT_LIGHTUD); + lj_assertX(!LJ_64 || LJ_GC64 || t != IRT_LIGHTUD, + "no plain type tag for lightuserdata"); if (LJ_DUALNUM && t > IRT_NUM) { return LJ_TISNUM; } else { - lua_assert(t <= IRT_NUM); + lj_assertX(t <= IRT_NUM, "no plain type tag for IR type %d", t); return ~(uint32_t)t; } } @@ -451,6 +496,7 @@ typedef uint32_t TRef; #define TREF_REFMASK 0x0000ffff #define TREF_FRAME 0x00010000 #define TREF_CONT 0x00020000 +#define TREF_KEYINDEX 0x00100000 #define TREF(ref, t) ((TRef)((ref) + ((t)<<24))) @@ -464,6 +510,7 @@ typedef uint32_t TRef; #define tref_isnil(tr) (tref_istype((tr), IRT_NIL)) #define tref_isfalse(tr) (tref_istype((tr), IRT_FALSE)) #define tref_istrue(tr) (tref_istype((tr), IRT_TRUE)) +#define tref_islightud(tr) (tref_istype((tr), IRT_LIGHTUD)) #define tref_isstr(tr) (tref_istype((tr), IRT_STR)) #define tref_isfunc(tr) (tref_istype((tr), IRT_FUNC)) #define tref_iscdata(tr) (tref_istype((tr), IRT_CDATA)) @@ -496,7 +543,9 @@ typedef uint32_t TRef; ** +-------+-------+---+---+---+---+ ** | op1 | op2 | t | o | r | s | ** +-------+-------+---+---+---+---+ -** | op12/i/gco | ot | prev | (alternative fields in union) +** | op12/i/gco32 | ot | prev | (alternative fields in union) +** +-------+-------+---+---+---+---+ +** | TValue/gco64 | (2nd IR slot for 64 bit constants) ** +---------------+-------+-------+ ** 32 16 16 ** @@ -524,21 +573,27 @@ typedef union IRIns { ) }; int32_t i; /* 32 bit signed integer literal (overlaps op12). */ - GCRef gcr; /* GCobj constant (overlaps op12). */ - MRef ptr; /* Pointer constant (overlaps op12). */ + GCRef gcr; /* GCobj constant (overlaps op12 or entire slot). */ + MRef ptr; /* Pointer constant (overlaps op12 or entire slot). */ + TValue tv; /* TValue constant (overlaps entire slot). */ } IRIns; -#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)->gcr)) +#define ir_isk64(ir) \ + ((ir)->o == IR_KNUM || (ir)->o == IR_KINT64 || \ + (LJ_GC64 && \ + ((ir)->o == IR_KGC || (ir)->o == IR_KPTR || (ir)->o == IR_KKPTR))) + +#define ir_kgc(ir) check_exp((ir)->o == IR_KGC, gcref((ir)[LJ_GC64].gcr)) #define ir_kstr(ir) (gco2str(ir_kgc((ir)))) #define ir_ktab(ir) (gco2tab(ir_kgc((ir)))) #define ir_kfunc(ir) (gco2func(ir_kgc((ir)))) #define ir_kcdata(ir) (gco2cd(ir_kgc((ir)))) -#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, mref((ir)->ptr, cTValue)) -#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) -#define ir_k64(ir) \ - check_exp((ir)->o == IR_KNUM || (ir)->o == IR_KINT64, mref((ir)->ptr,cTValue)) +#define ir_knum(ir) check_exp((ir)->o == IR_KNUM, &(ir)[1].tv) +#define ir_kint64(ir) check_exp((ir)->o == IR_KINT64, &(ir)[1].tv) +#define ir_k64(ir) check_exp(ir_isk64(ir), &(ir)[1].tv) #define ir_kptr(ir) \ - check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, mref((ir)->ptr, void)) + check_exp((ir)->o == IR_KPTR || (ir)->o == IR_KKPTR, \ + mref((ir)[LJ_GC64].ptr, void)) /* A store or any other op with a non-weak guard has a side-effect. */ static LJ_AINLINE int ir_sideeff(IRIns *ir) diff --git a/src/lj_ircall.h b/src/lj_ircall.h index 9ddfb156..67fb58ae 100644 --- a/src/lj_ircall.h +++ b/src/lj_ircall.h @@ -16,22 +16,26 @@ typedef struct CCallInfo { uint32_t flags; /* Number of arguments and flags. */ } CCallInfo; -#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* Extract # of args. */ +#define CCI_NARGS(ci) ((ci)->flags & 0xff) /* # of args. */ #define CCI_NARGS_MAX 32 /* Max. # of args. */ #define CCI_OTSHIFT 16 #define CCI_OPTYPE(ci) ((ci)->flags >> CCI_OTSHIFT) /* Get op/type. */ +#define CCI_TYPE(ci) (((ci)->flags>>CCI_OTSHIFT) & IRT_TYPE) #define CCI_OPSHIFT 24 #define CCI_OP(ci) ((ci)->flags >> CCI_OPSHIFT) /* Get op. */ #define CCI_CALL_N (IR_CALLN << CCI_OPSHIFT) +#define CCI_CALL_A (IR_CALLA << CCI_OPSHIFT) #define CCI_CALL_L (IR_CALLL << CCI_OPSHIFT) #define CCI_CALL_S (IR_CALLS << CCI_OPSHIFT) #define CCI_CALL_FN (CCI_CALL_N|CCI_CC_FASTCALL) +#define CCI_CALL_FA (CCI_CALL_A|CCI_CC_FASTCALL) #define CCI_CALL_FL (CCI_CALL_L|CCI_CC_FASTCALL) #define CCI_CALL_FS (CCI_CALL_S|CCI_CC_FASTCALL) /* C call info flags. */ +#define CCI_T (IRT_GUARD << CCI_OTSHIFT) /* May throw. */ #define CCI_L 0x0100 /* Implicit L arg. */ #define CCI_CASTU64 0x0200 /* Cast u64 result to number. */ #define CCI_NOFPRCLOBBER 0x0400 /* Does not clobber any FPRs. */ @@ -45,6 +49,17 @@ typedef struct CCallInfo { #define CCI_CC_FASTCALL 0x2000 /* Fastcall calling convention. */ #define CCI_CC_STDCALL 0x3000 /* Stdcall calling convention. */ +/* Extra args for SOFTFP, SPLIT 64 bit. */ +#define CCI_XARGS_SHIFT 14 +#define CCI_XARGS(ci) (((ci)->flags >> CCI_XARGS_SHIFT) & 3) +#define CCI_XA (1u << CCI_XARGS_SHIFT) + +#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) +#define CCI_XNARGS(ci) (CCI_NARGS((ci)) + CCI_XARGS((ci))) +#else +#define CCI_XNARGS(ci) CCI_NARGS((ci)) +#endif + /* Helpers for conditional function definitions. */ #define IRCALLCOND_ANY(x) x @@ -66,6 +81,18 @@ typedef struct CCallInfo { #define IRCALLCOND_SOFTFP_FFI(x) NULL #endif +#if LJ_SOFTFP && LJ_TARGET_MIPS +#define IRCALLCOND_SOFTFP_MIPS(x) x +#else +#define IRCALLCOND_SOFTFP_MIPS(x) NULL +#endif + +#if LJ_SOFTFP && LJ_TARGET_MIPS64 +#define IRCALLCOND_SOFTFP_MIPS64(x) x +#else +#define IRCALLCOND_SOFTFP_MIPS64(x) NULL +#endif + #define LJ_NEED_FP64 (LJ_TARGET_ARM || LJ_TARGET_PPC || LJ_TARGET_MIPS) #if LJ_HASFFI && (LJ_SOFTFP || LJ_NEED_FP64) @@ -86,93 +113,158 @@ typedef struct CCallInfo { #define IRCALLCOND_FFI32(x) NULL #endif +#if LJ_HASBUFFER +#define IRCALLCOND_BUFFER(x) x +#else +#define IRCALLCOND_BUFFER(x) NULL +#endif + +#if LJ_HASBUFFER && LJ_HASFFI +#define IRCALLCOND_BUFFFI(x) x +#else +#define IRCALLCOND_BUFFFI(x) NULL +#endif + #if LJ_SOFTFP -#define ARG1_FP 2 /* Treat as 2 32 bit arguments. */ +#define XA_FP CCI_XA +#define XA2_FP (CCI_XA+CCI_XA) #else -#define ARG1_FP 1 +#define XA_FP 0 +#define XA2_FP 0 +#endif + +#if LJ_SOFTFP32 +#define XA_FP32 CCI_XA +#define XA2_FP32 (CCI_XA+CCI_XA) +#else +#define XA_FP32 0 +#define XA2_FP32 0 #endif #if LJ_32 -#define ARG2_64 4 /* Treat as 4 32 bit arguments. */ +#define XA_64 CCI_XA +#define XA2_64 (CCI_XA+CCI_XA) #else -#define ARG2_64 2 +#define XA_64 0 +#define XA2_64 0 #endif /* Function definitions for CALL* instructions. */ #define IRCALLDEF(_) \ _(ANY, lj_str_cmp, 2, FN, INT, CCI_NOFPRCLOBBER) \ - _(ANY, lj_str_new, 3, S, STR, CCI_L) \ + _(ANY, lj_str_find, 4, N, PGC, 0) \ + _(ANY, lj_str_new, 3, S, STR, CCI_L|CCI_T) \ _(ANY, lj_strscan_num, 2, FN, INT, 0) \ - _(ANY, lj_str_fromint, 2, FN, STR, CCI_L) \ - _(ANY, lj_str_fromnum, 2, FN, STR, CCI_L) \ - _(ANY, lj_tab_new1, 2, FS, TAB, CCI_L) \ - _(ANY, lj_tab_dup, 2, FS, TAB, CCI_L) \ - _(ANY, lj_tab_newkey, 3, S, P32, CCI_L) \ + _(ANY, lj_strfmt_int, 2, FN, STR, CCI_L|CCI_T) \ + _(ANY, lj_strfmt_num, 2, FN, STR, CCI_L|CCI_T) \ + _(ANY, lj_strfmt_char, 2, FN, STR, CCI_L|CCI_T) \ + _(ANY, lj_strfmt_putint, 2, FL, PGC, CCI_T) \ + _(ANY, lj_strfmt_putnum, 2, FL, PGC, CCI_T) \ + _(ANY, lj_strfmt_putquoted, 2, FL, PGC, CCI_T) \ + _(ANY, lj_strfmt_putfxint, 3, L, PGC, XA_64|CCI_T) \ + _(ANY, lj_strfmt_putfnum_int, 3, L, PGC, XA_FP|CCI_T) \ + _(ANY, lj_strfmt_putfnum_uint, 3, L, PGC, XA_FP|CCI_T) \ + _(ANY, lj_strfmt_putfnum, 3, L, PGC, XA_FP|CCI_T) \ + _(ANY, lj_strfmt_putfstr, 3, L, PGC, CCI_T) \ + _(ANY, lj_strfmt_putfchar, 3, L, PGC, CCI_T) \ + _(ANY, lj_buf_putmem, 3, S, PGC, CCI_T) \ + _(ANY, lj_buf_putstr, 2, FL, PGC, CCI_T) \ + _(ANY, lj_buf_putchar, 2, FL, PGC, CCI_T) \ + _(ANY, lj_buf_putstr_reverse, 2, FL, PGC, CCI_T) \ + _(ANY, lj_buf_putstr_lower, 2, FL, PGC, CCI_T) \ + _(ANY, lj_buf_putstr_upper, 2, FL, PGC, CCI_T) \ + _(ANY, lj_buf_putstr_rep, 3, L, PGC, CCI_T) \ + _(ANY, lj_buf_puttab, 5, L, PGC, CCI_T) \ + _(BUFFER, lj_bufx_set, 4, S, NIL, 0) \ + _(BUFFFI, lj_bufx_more, 2, FS, INT, CCI_T) \ + _(BUFFER, lj_serialize_put, 2, FS, PGC, CCI_T) \ + _(BUFFER, lj_serialize_get, 2, FS, PTR, CCI_T) \ + _(BUFFER, lj_serialize_encode, 2, FA, STR, CCI_L|CCI_T) \ + _(BUFFER, lj_serialize_decode, 3, A, INT, CCI_L|CCI_T) \ + _(ANY, lj_buf_tostr, 1, FL, STR, CCI_T) \ + _(ANY, lj_tab_new_ah, 3, A, TAB, CCI_L|CCI_T) \ + _(ANY, lj_tab_new1, 2, FA, TAB, CCI_L|CCI_T) \ + _(ANY, lj_tab_dup, 2, FA, TAB, CCI_L|CCI_T) \ + _(ANY, lj_tab_clear, 1, FS, NIL, 0) \ + _(ANY, lj_tab_newkey, 3, S, PGC, CCI_L|CCI_T) \ + _(ANY, lj_tab_keyindex, 2, FL, INT, 0) \ + _(ANY, lj_vm_next, 2, FL, PTR, 0) \ _(ANY, lj_tab_len, 1, FL, INT, 0) \ + _(ANY, lj_tab_len_hint, 2, FL, INT, 0) \ _(ANY, lj_gc_step_jit, 2, FS, NIL, CCI_L) \ _(ANY, lj_gc_barrieruv, 2, FS, NIL, 0) \ - _(ANY, lj_mem_newgco, 2, FS, P32, CCI_L) \ - _(ANY, lj_math_random_step, 1, FS, NUM, CCI_CASTU64) \ + _(ANY, lj_mem_newgco, 2, FA, PGC, CCI_L|CCI_T) \ + _(ANY, lj_prng_u64d, 1, FS, NUM, CCI_CASTU64) \ _(ANY, lj_vm_modi, 2, FN, INT, 0) \ - _(ANY, sinh, ARG1_FP, N, NUM, 0) \ - _(ANY, cosh, ARG1_FP, N, NUM, 0) \ - _(ANY, tanh, ARG1_FP, N, NUM, 0) \ - _(ANY, fputc, 2, S, INT, 0) \ - _(ANY, fwrite, 4, S, INT, 0) \ - _(ANY, fflush, 1, S, INT, 0) \ + _(ANY, log10, 1, N, NUM, XA_FP) \ + _(ANY, exp, 1, N, NUM, XA_FP) \ + _(ANY, sin, 1, N, NUM, XA_FP) \ + _(ANY, cos, 1, N, NUM, XA_FP) \ + _(ANY, tan, 1, N, NUM, XA_FP) \ + _(ANY, asin, 1, N, NUM, XA_FP) \ + _(ANY, acos, 1, N, NUM, XA_FP) \ + _(ANY, atan, 1, N, NUM, XA_FP) \ + _(ANY, sinh, 1, N, NUM, XA_FP) \ + _(ANY, cosh, 1, N, NUM, XA_FP) \ + _(ANY, tanh, 1, N, NUM, XA_FP) \ + _(ANY, fputc, 2, S, INT, 0) \ + _(ANY, fwrite, 4, S, INT, 0) \ + _(ANY, fflush, 1, S, INT, 0) \ /* ORDER FPM */ \ - _(FPMATH, lj_vm_floor, ARG1_FP, N, NUM, 0) \ - _(FPMATH, lj_vm_ceil, ARG1_FP, N, NUM, 0) \ - _(FPMATH, lj_vm_trunc, ARG1_FP, N, NUM, 0) \ - _(FPMATH, sqrt, ARG1_FP, N, NUM, 0) \ - _(FPMATH, exp, ARG1_FP, N, NUM, 0) \ - _(FPMATH, lj_vm_exp2, ARG1_FP, N, NUM, 0) \ - _(FPMATH, log, ARG1_FP, N, NUM, 0) \ - _(FPMATH, lj_vm_log2, ARG1_FP, N, NUM, 0) \ - _(FPMATH, log10, ARG1_FP, N, NUM, 0) \ - _(FPMATH, sin, ARG1_FP, N, NUM, 0) \ - _(FPMATH, cos, ARG1_FP, N, NUM, 0) \ - _(FPMATH, tan, ARG1_FP, N, NUM, 0) \ - _(FPMATH, lj_vm_powi, ARG1_FP+1, N, NUM, 0) \ - _(FPMATH, pow, ARG1_FP*2, N, NUM, 0) \ - _(FPMATH, atan2, ARG1_FP*2, N, NUM, 0) \ - _(FPMATH, ldexp, ARG1_FP+1, N, NUM, 0) \ - _(SOFTFP, lj_vm_tobit, 2, N, INT, 0) \ - _(SOFTFP, softfp_add, 4, N, NUM, 0) \ - _(SOFTFP, softfp_sub, 4, N, NUM, 0) \ - _(SOFTFP, softfp_mul, 4, N, NUM, 0) \ - _(SOFTFP, softfp_div, 4, N, NUM, 0) \ - _(SOFTFP, softfp_cmp, 4, N, NIL, 0) \ + _(FPMATH, lj_vm_floor, 1, N, NUM, XA_FP) \ + _(FPMATH, lj_vm_ceil, 1, N, NUM, XA_FP) \ + _(FPMATH, lj_vm_trunc, 1, N, NUM, XA_FP) \ + _(FPMATH, sqrt, 1, N, NUM, XA_FP) \ + _(ANY, log, 1, N, NUM, XA_FP) \ + _(ANY, lj_vm_log2, 1, N, NUM, XA_FP) \ + _(ANY, pow, 2, N, NUM, XA2_FP) \ + _(ANY, atan2, 2, N, NUM, XA2_FP) \ + _(ANY, ldexp, 2, N, NUM, XA_FP) \ + _(SOFTFP, lj_vm_tobit, 1, N, INT, XA_FP32) \ + _(SOFTFP, softfp_add, 2, N, NUM, XA2_FP32) \ + _(SOFTFP, softfp_sub, 2, N, NUM, XA2_FP32) \ + _(SOFTFP, softfp_mul, 2, N, NUM, XA2_FP32) \ + _(SOFTFP, softfp_div, 2, N, NUM, XA2_FP32) \ + _(SOFTFP, softfp_cmp, 2, N, NIL, XA2_FP32) \ _(SOFTFP, softfp_i2d, 1, N, NUM, 0) \ - _(SOFTFP, softfp_d2i, 2, N, INT, 0) \ + _(SOFTFP, softfp_d2i, 1, N, INT, XA_FP32) \ + _(SOFTFP_MIPS, lj_vm_sfmin, 2, N, NUM, XA2_FP32) \ + _(SOFTFP_MIPS, lj_vm_sfmax, 2, N, NUM, XA2_FP32) \ + _(SOFTFP_MIPS64, lj_vm_tointg, 1, N, INT, 0) \ _(SOFTFP_FFI, softfp_ui2d, 1, N, NUM, 0) \ _(SOFTFP_FFI, softfp_f2d, 1, N, NUM, 0) \ - _(SOFTFP_FFI, softfp_d2ui, 2, N, INT, 0) \ - _(SOFTFP_FFI, softfp_d2f, 2, N, FLOAT, 0) \ + _(SOFTFP_FFI, softfp_d2ui, 1, N, INT, XA_FP32) \ + _(SOFTFP_FFI, softfp_d2f, 1, N, FLOAT, XA_FP32) \ _(SOFTFP_FFI, softfp_i2f, 1, N, FLOAT, 0) \ _(SOFTFP_FFI, softfp_ui2f, 1, N, FLOAT, 0) \ _(SOFTFP_FFI, softfp_f2i, 1, N, INT, 0) \ _(SOFTFP_FFI, softfp_f2ui, 1, N, INT, 0) \ - _(FP64_FFI, fp64_l2d, 2, N, NUM, 0) \ - _(FP64_FFI, fp64_ul2d, 2, N, NUM, 0) \ - _(FP64_FFI, fp64_l2f, 2, N, FLOAT, 0) \ - _(FP64_FFI, fp64_ul2f, 2, N, FLOAT, 0) \ - _(FP64_FFI, fp64_d2l, ARG1_FP, N, I64, 0) \ - _(FP64_FFI, fp64_d2ul, ARG1_FP, N, U64, 0) \ + _(FP64_FFI, fp64_l2d, 1, N, NUM, XA_64) \ + _(FP64_FFI, fp64_ul2d, 1, N, NUM, XA_64) \ + _(FP64_FFI, fp64_l2f, 1, N, FLOAT, XA_64) \ + _(FP64_FFI, fp64_ul2f, 1, N, FLOAT, XA_64) \ + _(FP64_FFI, fp64_d2l, 1, N, I64, XA_FP) \ + _(FP64_FFI, fp64_d2ul, 1, N, U64, XA_FP) \ _(FP64_FFI, fp64_f2l, 1, N, I64, 0) \ _(FP64_FFI, fp64_f2ul, 1, N, U64, 0) \ - _(FFI, lj_carith_divi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ - _(FFI, lj_carith_divu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ - _(FFI, lj_carith_modi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ - _(FFI, lj_carith_modu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ - _(FFI, lj_carith_powi64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) \ - _(FFI, lj_carith_powu64, ARG2_64, N, U64, CCI_NOFPRCLOBBER) \ - _(FFI, lj_cdata_setfin, 2, FN, P32, CCI_L) \ - _(FFI, strlen, 1, L, INTP, 0) \ - _(FFI, memcpy, 3, S, PTR, 0) \ - _(FFI, memset, 3, S, PTR, 0) \ - _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \ - _(FFI32, lj_carith_mul64, ARG2_64, N, I64, CCI_NOFPRCLOBBER) + _(FFI, lj_carith_divi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ + _(FFI, lj_carith_divu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \ + _(FFI, lj_carith_modi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ + _(FFI, lj_carith_modu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \ + _(FFI, lj_carith_powi64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ + _(FFI, lj_carith_powu64, 2, N, U64, XA2_64|CCI_NOFPRCLOBBER) \ + _(FFI, lj_cdata_newv, 4, S, CDATA, CCI_L) \ + _(FFI, lj_cdata_setfin, 4, S, NIL, CCI_L) \ + _(FFI, strlen, 1, L, INTP, 0) \ + _(FFI, memcpy, 3, S, PTR, 0) \ + _(FFI, memset, 3, S, PTR, 0) \ + _(FFI, lj_vm_errno, 0, S, INT, CCI_NOFPRCLOBBER) \ + _(FFI32, lj_carith_mul64, 2, N, I64, XA2_64|CCI_NOFPRCLOBBER) \ + _(FFI32, lj_carith_shl64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \ + _(FFI32, lj_carith_shr64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \ + _(FFI32, lj_carith_sar64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \ + _(FFI32, lj_carith_rol64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \ + _(FFI32, lj_carith_ror64, 2, N, U64, XA_64|CCI_NOFPRCLOBBER) \ \ /* End of list. */ @@ -220,6 +312,22 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1]; #define fp64_f2l __aeabi_f2lz #define fp64_f2ul __aeabi_f2ulz #endif +#elif LJ_TARGET_MIPS || LJ_TARGET_PPC +#define softfp_add __adddf3 +#define softfp_sub __subdf3 +#define softfp_mul __muldf3 +#define softfp_div __divdf3 +#define softfp_cmp __ledf2 +#define softfp_i2d __floatsidf +#define softfp_d2i __fixdfsi +#define softfp_ui2d __floatunsidf +#define softfp_f2d __extendsfdf2 +#define softfp_d2ui __fixunsdfsi +#define softfp_d2f __truncdfsf2 +#define softfp_i2f __floatsisf +#define softfp_ui2f __floatunsisf +#define softfp_f2i __fixsfsi +#define softfp_f2ui __fixunssfsi #else #error "Missing soft-float definitions for target architecture" #endif @@ -240,10 +348,14 @@ extern float softfp_ui2f(uint32_t a); extern int32_t softfp_f2i(float a); extern uint32_t softfp_f2ui(float a); #endif +#if LJ_TARGET_MIPS +extern double lj_vm_sfmin(double a, double b); +extern double lj_vm_sfmax(double a, double b); +#endif #endif #if LJ_HASFFI && LJ_NEED_FP64 && !(LJ_TARGET_ARM && LJ_SOFTFP) -#ifdef __GNUC__ +#if defined(__GNUC__) || defined(__clang__) #define fp64_l2d __floatdidf #define fp64_ul2d __floatundidf #define fp64_l2f __floatdisf diff --git a/src/lj_iropt.h b/src/lj_iropt.h index e89d796f..d239f173 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h @@ -36,11 +36,11 @@ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J) return ref; } +LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs); + /* Interning of constants. */ LJ_FUNC TRef LJ_FASTCALL lj_ir_kint(jit_State *J, int32_t k); -LJ_FUNC void lj_ir_k64_freeall(jit_State *J); -LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, cTValue *tv); -LJ_FUNC cTValue *lj_ir_k64_find(jit_State *J, uint64_t u64); +LJ_FUNC TRef lj_ir_k64(jit_State *J, IROp op, uint64_t u64); LJ_FUNC TRef lj_ir_knum_u64(jit_State *J, uint64_t u64); LJ_FUNC TRef lj_ir_knumint(jit_State *J, lua_Number n); LJ_FUNC TRef lj_ir_kint64(jit_State *J, uint64_t u64); @@ -48,6 +48,7 @@ LJ_FUNC TRef lj_ir_kgc(jit_State *J, GCobj *o, IRType t); LJ_FUNC TRef lj_ir_kptr_(jit_State *J, IROp op, void *ptr); LJ_FUNC TRef lj_ir_knull(jit_State *J, IRType t); LJ_FUNC TRef lj_ir_kslot(jit_State *J, TRef key, IRRef slot); +LJ_FUNC TRef lj_ir_ktrace(jit_State *J); #if LJ_64 #define lj_ir_kintp(J, k) lj_ir_kint64(J, (uint64_t)(k)) @@ -74,8 +75,8 @@ static LJ_AINLINE TRef lj_ir_knum(jit_State *J, lua_Number n) #define lj_ir_knum_tobit(J) lj_ir_knum_u64(J, U64x(43380000,00000000)) /* Special 128 bit SIMD constants. */ -#define lj_ir_knum_abs(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_ABS)) -#define lj_ir_knum_neg(J) lj_ir_k64(J, IR_KNUM, LJ_KSIMD(J, LJ_KSIMD_NEG)) +#define lj_ir_ksimd(J, idx) \ + lj_ir_ggfload(J, IRT_NUM, (uintptr_t)LJ_KSIMD(J, idx) - (uintptr_t)J2GG(J)) /* Access to constants. */ LJ_FUNC void lj_ir_kvalue(lua_State *L, TValue *tv, const IRIns *ir); @@ -119,10 +120,11 @@ LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hload(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_fload(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_xload(jit_State *J); -LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J); +LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J); LJ_FUNC TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J); LJ_FUNC int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J); LJ_FUNC int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim); +LJ_FUNC int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim); LJ_FUNC int lj_opt_fwd_wasnonnil(jit_State *J, IROpT loadop, IRRef xref); /* Dead-store elimination. */ @@ -143,13 +145,12 @@ LJ_FUNC TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc, IROp op); LJ_FUNC TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc); LJ_FUNC TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc); -LJ_FUNC TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc); LJ_FUNC IRType lj_opt_narrow_forl(jit_State *J, cTValue *forbase); /* Optimization passes. */ LJ_FUNC void lj_opt_dce(jit_State *J); LJ_FUNC int lj_opt_loop(jit_State *J); -#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) +#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) LJ_FUNC void lj_opt_split(jit_State *J); #else #define lj_opt_split(J) UNUSED(J) diff --git a/src/lj_jit.h b/src/lj_jit.h index 02850935..32b3861a 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -7,73 +7,88 @@ #define _LJ_JIT_H #include "lj_obj.h" +#if LJ_HASJIT #include "lj_ir.h" -/* JIT engine flags. */ +/* -- JIT engine flags ---------------------------------------------------- */ + +/* General JIT engine flags. 4 bits. */ #define JIT_F_ON 0x00000001 -/* CPU-specific JIT engine flags. */ +/* CPU-specific JIT engine flags. 12 bits. Flags and strings must match. */ +#define JIT_F_CPU 0x00000010 + #if LJ_TARGET_X86ORX64 -#define JIT_F_CMOV 0x00000010 -#define JIT_F_SSE2 0x00000020 -#define JIT_F_SSE3 0x00000040 -#define JIT_F_SSE4_1 0x00000080 -#define JIT_F_P4 0x00000100 -#define JIT_F_PREFER_IMUL 0x00000200 -#define JIT_F_SPLIT_XMM 0x00000400 -#define JIT_F_LEA_AGU 0x00000800 - -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_CMOV -#define JIT_F_CPUSTRING "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM" + +#define JIT_F_SSE3 (JIT_F_CPU << 0) +#define JIT_F_SSE4_1 (JIT_F_CPU << 1) +#define JIT_F_BMI2 (JIT_F_CPU << 2) + + +#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2" + #elif LJ_TARGET_ARM -#define JIT_F_ARMV6_ 0x00000010 -#define JIT_F_ARMV6T2_ 0x00000020 -#define JIT_F_ARMV7 0x00000040 -#define JIT_F_VFPV2 0x00000080 -#define JIT_F_VFPV3 0x00000100 - -#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7) -#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7) + +#define JIT_F_ARMV6_ (JIT_F_CPU << 0) +#define JIT_F_ARMV6T2_ (JIT_F_CPU << 1) +#define JIT_F_ARMV7 (JIT_F_CPU << 2) +#define JIT_F_ARMV8 (JIT_F_CPU << 3) +#define JIT_F_VFPV2 (JIT_F_CPU << 4) +#define JIT_F_VFPV3 (JIT_F_CPU << 5) + +#define JIT_F_ARMV6 (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8) +#define JIT_F_ARMV6T2 (JIT_F_ARMV6T2_|JIT_F_ARMV7|JIT_F_ARMV8) #define JIT_F_VFP (JIT_F_VFPV2|JIT_F_VFPV3) -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_ARMV6_ -#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3" +#define JIT_F_CPUSTRING "\5ARMv6\7ARMv6T2\5ARMv7\5ARMv8\5VFPv2\5VFPv3" + #elif LJ_TARGET_PPC -#define JIT_F_SQRT 0x00000010 -#define JIT_F_ROUND 0x00000020 -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_SQRT +#define JIT_F_SQRT (JIT_F_CPU << 0) +#define JIT_F_ROUND (JIT_F_CPU << 1) + #define JIT_F_CPUSTRING "\4SQRT\5ROUND" + #elif LJ_TARGET_MIPS -#define JIT_F_MIPS32R2 0x00000010 -/* Names for the CPU-specific flags. Must match the order above. */ -#define JIT_F_CPU_FIRST JIT_F_MIPS32R2 +#define JIT_F_MIPSXXR2 (JIT_F_CPU << 0) + +#if LJ_TARGET_MIPS32 +#if LJ_TARGET_MIPSR6 +#define JIT_F_CPUSTRING "\010MIPS32R6" +#else #define JIT_F_CPUSTRING "\010MIPS32R2" +#endif +#else +#if LJ_TARGET_MIPSR6 +#define JIT_F_CPUSTRING "\010MIPS64R6" #else -#define JIT_F_CPU_FIRST 0 +#define JIT_F_CPUSTRING "\010MIPS64R2" +#endif +#endif + +#else + #define JIT_F_CPUSTRING "" + #endif -/* Optimization flags. */ +/* Optimization flags. 12 bits. */ +#define JIT_F_OPT 0x00010000 #define JIT_F_OPT_MASK 0x0fff0000 -#define JIT_F_OPT_FOLD 0x00010000 -#define JIT_F_OPT_CSE 0x00020000 -#define JIT_F_OPT_DCE 0x00040000 -#define JIT_F_OPT_FWD 0x00080000 -#define JIT_F_OPT_DSE 0x00100000 -#define JIT_F_OPT_NARROW 0x00200000 -#define JIT_F_OPT_LOOP 0x00400000 -#define JIT_F_OPT_ABC 0x00800000 -#define JIT_F_OPT_SINK 0x01000000 -#define JIT_F_OPT_FUSE 0x02000000 +#define JIT_F_OPT_FOLD (JIT_F_OPT << 0) +#define JIT_F_OPT_CSE (JIT_F_OPT << 1) +#define JIT_F_OPT_DCE (JIT_F_OPT << 2) +#define JIT_F_OPT_FWD (JIT_F_OPT << 3) +#define JIT_F_OPT_DSE (JIT_F_OPT << 4) +#define JIT_F_OPT_NARROW (JIT_F_OPT << 5) +#define JIT_F_OPT_LOOP (JIT_F_OPT << 6) +#define JIT_F_OPT_ABC (JIT_F_OPT << 7) +#define JIT_F_OPT_SINK (JIT_F_OPT << 8) +#define JIT_F_OPT_FUSE (JIT_F_OPT << 9) /* Optimizations names for -O. Must match the order above. */ -#define JIT_F_OPT_FIRST JIT_F_OPT_FOLD #define JIT_F_OPTSTRING \ "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse" @@ -85,6 +100,8 @@ JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE) #define JIT_F_OPT_DEFAULT JIT_F_OPT_3 +/* -- JIT engine parameters ----------------------------------------------- */ + #if LJ_TARGET_WINDOWS || LJ_64 /* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */ #define JIT_P_sizemcode_DEFAULT 64 @@ -100,6 +117,7 @@ _(\012, maxirconst, 500) /* Max. # of IR constants of a trace. */ \ _(\007, maxside, 100) /* Max. # of side traces of a root trace. */ \ _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ + _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \ \ _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ @@ -126,11 +144,14 @@ JIT_PARAMDEF(JIT_PARAMENUM) #define JIT_PARAMSTR(len, name, value) #len #name #define JIT_P_STRING JIT_PARAMDEF(JIT_PARAMSTR) +/* -- JIT engine data structures ------------------------------------------ */ + /* Trace compiler state. */ typedef enum { LJ_TRACE_IDLE, /* Trace compiler idle. */ LJ_TRACE_ACTIVE = 0x10, LJ_TRACE_RECORD, /* Bytecode recording active. */ + LJ_TRACE_RECORD_1ST, /* Record 1st instruction, too. */ LJ_TRACE_START, /* New trace started. */ LJ_TRACE_END, /* End of trace. */ LJ_TRACE_ASM, /* Assemble trace. */ @@ -165,6 +186,7 @@ typedef struct MCLink { typedef struct SnapShot { uint32_t mapofs; /* Offset into snapshot map. */ IRRef1 ref; /* First IR ref for this snapshot. */ + uint16_t mcofs; /* Offset into machine code in MCode units. */ uint8_t nslots; /* Number of valid slots. */ uint8_t topslot; /* Maximum frame extent. */ uint8_t nent; /* Number of compressed entries. */ @@ -180,20 +202,35 @@ typedef uint32_t SnapEntry; #define SNAP_CONT 0x020000 /* Continuation slot. */ #define SNAP_NORESTORE 0x040000 /* No need to restore slot. */ #define SNAP_SOFTFPNUM 0x080000 /* Soft-float number. */ +#define SNAP_KEYINDEX 0x100000 /* Traversal key index. */ LJ_STATIC_ASSERT(SNAP_FRAME == TREF_FRAME); LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT); +LJ_STATIC_ASSERT(SNAP_KEYINDEX == TREF_KEYINDEX); #define SNAP(slot, flags, ref) (((SnapEntry)(slot) << 24) + (flags) + (ref)) #define SNAP_TR(slot, tr) \ - (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK))) + (((SnapEntry)(slot) << 24) + \ + ((tr) & (TREF_KEYINDEX|TREF_CONT|TREF_FRAME|TREF_REFMASK))) +#if !LJ_FR2 #define SNAP_MKPC(pc) ((SnapEntry)u32ptr(pc)) +#endif #define SNAP_MKFTSZ(ftsz) ((SnapEntry)(ftsz)) #define snap_ref(sn) ((sn) & 0xffff) #define snap_slot(sn) ((BCReg)((sn) >> 24)) #define snap_isframe(sn) ((sn) & SNAP_FRAME) -#define snap_pc(sn) ((const BCIns *)(uintptr_t)(sn)) #define snap_setref(sn, ref) (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref)) +static LJ_AINLINE const BCIns *snap_pc(SnapEntry *sn) +{ +#if LJ_FR2 + uint64_t pcbase; + memcpy(&pcbase, sn, sizeof(uint64_t)); + return (const BCIns *)(pcbase >> 8); +#else + return (const BCIns *)(uintptr_t)*sn; +#endif +} + /* Snapshot and exit numbers. */ typedef uint32_t SnapNo; typedef uint32_t ExitNo; @@ -211,7 +248,8 @@ typedef enum { LJ_TRLINK_UPREC, /* Up-recursion. */ LJ_TRLINK_DOWNREC, /* Down-recursion. */ LJ_TRLINK_INTERP, /* Fallback to interpreter. */ - LJ_TRLINK_RETURN /* Return to interpreter. */ + LJ_TRLINK_RETURN, /* Return to interpreter. */ + LJ_TRLINK_STITCH /* Trace stitching. */ } TraceLink; /* Trace object. */ @@ -219,6 +257,9 @@ typedef struct GCtrace { GCHeader; uint16_t nsnap; /* Number of snapshots. */ IRRef nins; /* Next IR instruction. Biased with REF_BIAS. */ +#if LJ_GC64 + uint32_t unused_gc64; +#endif GCRef gclist; IRIns *ir; /* IR instructions/constants. Biased with REF_BIAS. */ IRRef nk; /* Lowest IR constant. Biased with REF_BIAS. */ @@ -294,6 +335,16 @@ typedef struct ScEvEntry { uint8_t dir; /* Direction. 1: +, 0: -. */ } ScEvEntry; +/* Reverse bytecode map (IRRef -> PC). Only for selected instructions. */ +typedef struct RBCHashEntry { + MRef pc; /* Bytecode PC. */ + GCRef pt; /* Prototype. */ + IRRef ref; /* IR reference. */ +} RBCHashEntry; + +/* Number of slots in the reverse bytecode hash table. Must be a power of 2. */ +#define RBCHASH_SLOTS 8 + /* 128 bit SIMD constants. */ enum { LJ_KSIMD_ABS, @@ -301,12 +352,53 @@ enum { LJ_KSIMD__MAX }; +enum { +#if LJ_TARGET_X86ORX64 + LJ_K64_TOBIT, /* 2^52 + 2^51 */ + LJ_K64_2P64, /* 2^64 */ + LJ_K64_M2P64, /* -2^64 */ +#if LJ_32 + LJ_K64_M2P64_31, /* -2^64 or -2^31 */ +#else + LJ_K64_M2P64_31 = LJ_K64_M2P64, +#endif +#endif +#if LJ_TARGET_MIPS + LJ_K64_2P31, /* 2^31 */ +#if LJ_64 + LJ_K64_2P63, /* 2^63 */ + LJ_K64_M2P64, /* -2^64 */ +#endif +#endif + LJ_K64__MAX, +}; +#define LJ_K64__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS) + +enum { +#if LJ_TARGET_X86ORX64 + LJ_K32_M2P64_31, /* -2^64 or -2^31 */ +#endif +#if LJ_TARGET_PPC + LJ_K32_2P52_2P31, /* 2^52 + 2^31 */ + LJ_K32_2P52, /* 2^52 */ +#endif +#if LJ_TARGET_PPC || LJ_TARGET_MIPS + LJ_K32_2P31, /* 2^31 */ +#endif +#if LJ_TARGET_MIPS64 + LJ_K32_2P63, /* 2^63 */ + LJ_K32_M2P64, /* -2^64 */ +#endif + LJ_K32__MAX +}; +#define LJ_K32__USED (LJ_TARGET_X86ORX64 || LJ_TARGET_PPC || LJ_TARGET_MIPS) + /* Get 16 byte aligned pointer to SIMD constant. */ #define LJ_KSIMD(J, n) \ ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15)) /* Set/reset flag to activate the SPLIT pass for the current trace. */ -#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) +#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) #define lj_needsplit(J) (J->needsplit = 1) #define lj_resetsplit(J) (J->needsplit = 0) #else @@ -317,13 +409,14 @@ enum { /* Fold state is used to fold instructions on-the-fly. */ typedef struct FoldState { IRIns ins; /* Currently emitted instruction. */ - IRIns left; /* Instruction referenced by left operand. */ - IRIns right; /* Instruction referenced by right operand. */ + IRIns left[2]; /* Instruction referenced by left operand. */ + IRIns right[2]; /* Instruction referenced by right operand. */ } FoldState; /* JIT compiler state. */ typedef struct jit_State { GCtrace cur; /* Current trace. */ + GCtrace *curfinal; /* Final address of current trace (set during asm). */ lua_State *L; /* Current Lua state. */ const BCIns *pc; /* Current PC. */ @@ -353,8 +446,13 @@ typedef struct jit_State { int32_t framedepth; /* Current frame depth. */ int32_t retdepth; /* Return frame depth (count of RETF). */ - MRef k64; /* Pointer to chained array of 64 bit constants. */ +#if LJ_K32__USED + uint32_t k32[LJ_K32__MAX]; /* Common 4 byte constants used by backends. */ +#endif TValue ksimd[LJ_KSIMD__MAX*2+1]; /* 16 byte aligned SIMD constants. */ +#if LJ_K64__USED + TValue k64[LJ_K64__MAX]; /* Common 8 byte constants. */ +#endif IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ @@ -367,13 +465,15 @@ typedef struct jit_State { MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ PostProc postproc; /* Required post-processing after execution. */ -#if LJ_SOFTFP || (LJ_32 && LJ_HASFFI) - int needsplit; /* Need SPLIT pass. */ +#if LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI) + uint8_t needsplit; /* Need SPLIT pass. */ #endif + uint8_t retryrec; /* Retry recording. */ GCRef *trace; /* Array of traces. */ TraceNo freetrace; /* Start of scan for next free trace. */ MSize sizetrace; /* Size of trace array. */ + IRRef1 ktrace; /* Reference to KGC with GCtrace. */ IRRef1 chain[IR__MAX]; /* IR instruction skip-list chain anchors. */ TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA]; /* Stack slot map. */ @@ -384,7 +484,10 @@ typedef struct jit_State { HotPenalty penalty[PENALTY_SLOTS]; /* Penalty slots. */ uint32_t penaltyslot; /* Round-robin index into penalty slots. */ - uint32_t prngstate; /* PRNG state. */ + +#ifdef LUAJIT_ENABLE_TABLE_BUMP + RBCHashEntry rbchash[RBCHASH_SLOTS]; /* Reverse bytecode map. */ +#endif BPropEntry bpropcache[BPROP_SLOTS]; /* Backpropagation cache slots. */ uint32_t bpropslot; /* Round-robin index into bpropcache slots. */ @@ -394,6 +497,7 @@ typedef struct jit_State { const BCIns *startpc; /* Bytecode PC of starting instruction. */ TraceNo parent; /* Parent of current side trace (0 for root traces). */ ExitNo exitno; /* Exit number in parent of current side trace. */ + int exitcode; /* Exit code from unwound trace. */ BCIns *patchpc; /* PC for pending re-patch. */ BCIns patchins; /* Instruction for pending re-patch. */ @@ -406,14 +510,19 @@ typedef struct jit_State { size_t szallmcarea; /* Total size of all allocated mcode areas. */ TValue errinfo; /* Additional info element for trace errors. */ + +#if LJ_HASPROFILE + GCproto *prev_pt; /* Previous prototype. */ + BCLine prev_line; /* Previous line. */ + int prof_mode; /* Profiling mode: 0, 'f', 'l'. */ +#endif } jit_State; -/* Trivial PRNG e.g. used for penalty randomization. */ -static LJ_AINLINE uint32_t LJ_PRNG_BITS(jit_State *J, int bits) -{ - /* Yes, this LCG is very weak, but that doesn't matter for our use case. */ - J->prngstate = J->prngstate * 1103515245 + 12345; - return J->prngstate >> (32-bits); -} +#ifdef LUA_USE_ASSERT +#define lj_assertJ(c, ...) lj_assertG_(J2G(J), (c), __VA_ARGS__) +#else +#define lj_assertJ(c, ...) ((void)J) +#endif +#endif #endif diff --git a/src/lj_lex.c b/src/lj_lex.c index a74b4d6d..463a87ce 100644 --- a/src/lj_lex.c +++ b/src/lj_lex.c @@ -12,6 +12,7 @@ #include "lj_obj.h" #include "lj_gc.h" #include "lj_err.h" +#include "lj_buf.h" #include "lj_str.h" #if LJ_HASFFI #include "lj_tab.h" @@ -24,6 +25,7 @@ #include "lj_parse.h" #include "lj_char.h" #include "lj_strscan.h" +#include "lj_strfmt.h" /* Lua lexer token names. */ static const char *const tokennames[] = { @@ -37,54 +39,54 @@ TKDEF(TKSTR1, TKSTR2) /* -- Buffer handling ----------------------------------------------------- */ -#define char2int(c) ((int)(uint8_t)(c)) -#define next(ls) \ - (ls->current = (ls->n--) > 0 ? char2int(*ls->p++) : fillbuf(ls)) -#define save_and_next(ls) (save(ls, ls->current), next(ls)) -#define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') -#define END_OF_STREAM (-1) +#define LEX_EOF (-1) +#define lex_iseol(ls) (ls->c == '\n' || ls->c == '\r') -static int fillbuf(LexState *ls) +/* Get more input from reader. */ +static LJ_NOINLINE LexChar lex_more(LexState *ls) { size_t sz; - const char *buf = ls->rfunc(ls->L, ls->rdata, &sz); - if (buf == NULL || sz == 0) return END_OF_STREAM; - if (sz >= LJ_MAX_MEM) { + const char *p = ls->rfunc(ls->L, ls->rdata, &sz); + if (p == NULL || sz == 0) return LEX_EOF; + if (sz >= LJ_MAX_BUF) { if (sz != ~(size_t)0) lj_err_mem(ls->L); + sz = ~(uintptr_t)0 - (uintptr_t)p; + if (sz >= LJ_MAX_BUF) sz = LJ_MAX_BUF-1; ls->endmark = 1; } - ls->n = (MSize)sz - 1; - ls->p = buf; - return char2int(*(ls->p++)); + ls->pe = p + sz; + ls->p = p + 1; + return (LexChar)(uint8_t)p[0]; } -static LJ_NOINLINE void save_grow(LexState *ls, int c) +/* Get next character. */ +static LJ_AINLINE LexChar lex_next(LexState *ls) { - MSize newsize; - if (ls->sb.sz >= LJ_MAX_STR/2) - lj_lex_error(ls, 0, LJ_ERR_XELEM); - newsize = ls->sb.sz * 2; - lj_str_resizebuf(ls->L, &ls->sb, newsize); - ls->sb.buf[ls->sb.n++] = (char)c; + return (ls->c = ls->p < ls->pe ? (LexChar)(uint8_t)*ls->p++ : lex_more(ls)); } -static LJ_AINLINE void save(LexState *ls, int c) +/* Save character. */ +static LJ_AINLINE void lex_save(LexState *ls, LexChar c) { - if (LJ_UNLIKELY(ls->sb.n + 1 > ls->sb.sz)) - save_grow(ls, c); - else - ls->sb.buf[ls->sb.n++] = (char)c; + lj_buf_putb(&ls->sb, c); +} + +/* Save previous character and get next character. */ +static LJ_AINLINE LexChar lex_savenext(LexState *ls) +{ + lex_save(ls, ls->c); + return lex_next(ls); } -static void inclinenumber(LexState *ls) +/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */ +static void lex_newline(LexState *ls) { - int old = ls->current; - lua_assert(currIsNewline(ls)); - next(ls); /* skip `\n' or `\r' */ - if (currIsNewline(ls) && ls->current != old) - next(ls); /* skip `\n\r' or `\r\n' */ + LexChar old = ls->c; + lj_assertLS(lex_iseol(ls), "bad usage"); + lex_next(ls); /* Skip "\n" or "\r". */ + if (lex_iseol(ls) && ls->c != old) lex_next(ls); /* Skip "\n\r" or "\r\n". */ if (++ls->linenumber >= LJ_MAX_LINE) - lj_lex_error(ls, ls->token, LJ_ERR_XLINES); + lj_lex_error(ls, ls->tok, LJ_ERR_XLINES); } /* -- Scanner for terminals ----------------------------------------------- */ @@ -93,19 +95,17 @@ static void inclinenumber(LexState *ls) static void lex_number(LexState *ls, TValue *tv) { StrScanFmt fmt; - int c, xp = 'e'; - lua_assert(lj_char_isdigit(ls->current)); - if ((c = ls->current) == '0') { - save_and_next(ls); - if ((ls->current | 0x20) == 'x') xp = 'p'; - } - while (lj_char_isident(ls->current) || ls->current == '.' || - ((ls->current == '-' || ls->current == '+') && (c | 0x20) == xp)) { - c = ls->current; - save_and_next(ls); + LexChar c, xp = 'e'; + lj_assertLS(lj_char_isdigit(ls->c), "bad usage"); + if ((c = ls->c) == '0' && (lex_savenext(ls) | 0x20) == 'x') + xp = 'p'; + while (lj_char_isident(ls->c) || ls->c == '.' || + ((ls->c == '-' || ls->c == '+') && (c | 0x20) == xp)) { + c = ls->c; + lex_savenext(ls); } - save(ls, '\0'); - fmt = lj_strscan_scan((const uint8_t *)ls->sb.buf, tv, + lex_save(ls, '\0'); + fmt = lj_strscan_scan((const uint8_t *)ls->sb.b, sbuflen(&ls->sb)-1, tv, (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); if (LJ_DUALNUM && fmt == STRSCAN_INT) { @@ -116,12 +116,9 @@ static void lex_number(LexState *ls, TValue *tv) } else if (fmt != STRSCAN_ERROR) { lua_State *L = ls->L; GCcdata *cd; - lua_assert(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG); - if (!ctype_ctsG(G(L))) { - ptrdiff_t oldtop = savestack(L, L->top); - luaopen_ffi(L); /* Load FFI library on-demand. */ - L->top = restorestack(L, oldtop); - } + lj_assertLS(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG, + "unexpected number format %d", fmt); + ctype_loadffi(L); if (fmt == STRSCAN_IMAG) { cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double)); ((double *)cdataptr(cd))[0] = 0; @@ -133,65 +130,66 @@ static void lex_number(LexState *ls, TValue *tv) lj_parse_keepcdata(ls, tv, cd); #endif } else { - lua_assert(fmt == STRSCAN_ERROR); + lj_assertLS(fmt == STRSCAN_ERROR, + "unexpected number format %d", fmt); lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); } } -static int skip_sep(LexState *ls) +/* Skip equal signs for "[=...=[" and "]=...=]" and return their count. */ +static int lex_skipeq(LexState *ls) { int count = 0; - int s = ls->current; - lua_assert(s == '[' || s == ']'); - save_and_next(ls); - while (ls->current == '=' && count < 0x20000000) { - save_and_next(ls); + LexChar s = ls->c; + lj_assertLS(s == '[' || s == ']', "bad usage"); + while (lex_savenext(ls) == '=' && count < 0x20000000) count++; - } - return (ls->current == s) ? count : (-count) - 1; + return (ls->c == s) ? count : (-count) - 1; } -static void read_long_string(LexState *ls, TValue *tv, int sep) +/* Parse a long string or long comment (tv set to NULL). */ +static void lex_longstring(LexState *ls, TValue *tv, int sep) { - save_and_next(ls); /* skip 2nd `[' */ - if (currIsNewline(ls)) /* string starts with a newline? */ - inclinenumber(ls); /* skip it */ + lex_savenext(ls); /* Skip second '['. */ + if (lex_iseol(ls)) /* Skip initial newline. */ + lex_newline(ls); for (;;) { - switch (ls->current) { - case END_OF_STREAM: + switch (ls->c) { + case LEX_EOF: lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM); break; case ']': - if (skip_sep(ls) == sep) { - save_and_next(ls); /* skip 2nd `]' */ + if (lex_skipeq(ls) == sep) { + lex_savenext(ls); /* Skip second ']'. */ goto endloop; } break; case '\n': case '\r': - save(ls, '\n'); - inclinenumber(ls); - if (!tv) lj_str_resetbuf(&ls->sb); /* avoid wasting space */ + lex_save(ls, '\n'); + lex_newline(ls); + if (!tv) lj_buf_reset(&ls->sb); /* Don't waste space for comments. */ break; default: - if (tv) save_and_next(ls); - else next(ls); + lex_savenext(ls); break; } } endloop: if (tv) { - GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep), - ls->sb.n - 2*(2 + (MSize)sep)); + GCstr *str = lj_parse_keepstr(ls, ls->sb.b + (2 + (MSize)sep), + sbuflen(&ls->sb) - 2*(2 + (MSize)sep)); setstrV(ls->L, tv, str); } } -static void read_string(LexState *ls, int delim, TValue *tv) +/* Parse a string. */ +static void lex_string(LexState *ls, TValue *tv) { - save_and_next(ls); - while (ls->current != delim) { - switch (ls->current) { - case END_OF_STREAM: + LexChar delim = ls->c; /* Delimiter is '\'' or '"'. */ + lex_savenext(ls); + while (ls->c != delim) { + switch (ls->c) { + case LEX_EOF: lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); continue; case '\n': @@ -199,7 +197,7 @@ static void read_string(LexState *ls, int delim, TValue *tv) lj_lex_error(ls, TK_string, LJ_ERR_XSTR); continue; case '\\': { - int c = next(ls); /* Skip the '\\'. */ + LexChar c = lex_next(ls); /* Skip the '\\'. */ switch (c) { case 'a': c = '\a'; break; case 'b': c = '\b'; break; @@ -209,111 +207,139 @@ static void read_string(LexState *ls, int delim, TValue *tv) case 't': c = '\t'; break; case 'v': c = '\v'; break; case 'x': /* Hexadecimal escape '\xXX'. */ - c = (next(ls) & 15u) << 4; - if (!lj_char_isdigit(ls->current)) { - if (!lj_char_isxdigit(ls->current)) goto err_xesc; + c = (lex_next(ls) & 15u) << 4; + if (!lj_char_isdigit(ls->c)) { + if (!lj_char_isxdigit(ls->c)) goto err_xesc; c += 9 << 4; } - c += (next(ls) & 15u); - if (!lj_char_isdigit(ls->current)) { - if (!lj_char_isxdigit(ls->current)) goto err_xesc; + c += (lex_next(ls) & 15u); + if (!lj_char_isdigit(ls->c)) { + if (!lj_char_isxdigit(ls->c)) goto err_xesc; c += 9; } break; + case 'u': /* Unicode escape '\u{XX...}'. */ + if (lex_next(ls) != '{') goto err_xesc; + lex_next(ls); + c = 0; + do { + c = (c << 4) | (ls->c & 15u); + if (!lj_char_isdigit(ls->c)) { + if (!lj_char_isxdigit(ls->c)) goto err_xesc; + c += 9; + } + if (c >= 0x110000) goto err_xesc; /* Out of Unicode range. */ + } while (lex_next(ls) != '}'); + if (c < 0x800) { + if (c < 0x80) break; + lex_save(ls, 0xc0 | (c >> 6)); + } else { + if (c >= 0x10000) { + lex_save(ls, 0xf0 | (c >> 18)); + lex_save(ls, 0x80 | ((c >> 12) & 0x3f)); + } else { + if (c >= 0xd800 && c < 0xe000) goto err_xesc; /* No surrogates. */ + lex_save(ls, 0xe0 | (c >> 12)); + } + lex_save(ls, 0x80 | ((c >> 6) & 0x3f)); + } + c = 0x80 | (c & 0x3f); + break; case 'z': /* Skip whitespace. */ - next(ls); - while (lj_char_isspace(ls->current)) - if (currIsNewline(ls)) inclinenumber(ls); else next(ls); + lex_next(ls); + while (lj_char_isspace(ls->c)) + if (lex_iseol(ls)) lex_newline(ls); else lex_next(ls); continue; - case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue; + case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); continue; case '\\': case '\"': case '\'': break; - case END_OF_STREAM: continue; + case LEX_EOF: continue; default: if (!lj_char_isdigit(c)) goto err_xesc; c -= '0'; /* Decimal escape '\ddd'. */ - if (lj_char_isdigit(next(ls))) { - c = c*10 + (ls->current - '0'); - if (lj_char_isdigit(next(ls))) { - c = c*10 + (ls->current - '0'); + if (lj_char_isdigit(lex_next(ls))) { + c = c*10 + (ls->c - '0'); + if (lj_char_isdigit(lex_next(ls))) { + c = c*10 + (ls->c - '0'); if (c > 255) { err_xesc: lj_lex_error(ls, TK_string, LJ_ERR_XESC); } - next(ls); + lex_next(ls); } } - save(ls, c); + lex_save(ls, c); continue; } - save(ls, c); - next(ls); + lex_save(ls, c); + lex_next(ls); continue; } default: - save_and_next(ls); + lex_savenext(ls); break; } } - save_and_next(ls); /* skip delimiter */ - setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2)); + lex_savenext(ls); /* Skip trailing delimiter. */ + setstrV(ls->L, tv, + lj_parse_keepstr(ls, ls->sb.b+1, sbuflen(&ls->sb)-2)); } /* -- Main lexical scanner ------------------------------------------------ */ -static int llex(LexState *ls, TValue *tv) +/* Get next lexical token. */ +static LexToken lex_scan(LexState *ls, TValue *tv) { - lj_str_resetbuf(&ls->sb); + lj_buf_reset(&ls->sb); for (;;) { - if (lj_char_isident(ls->current)) { + if (lj_char_isident(ls->c)) { GCstr *s; - if (lj_char_isdigit(ls->current)) { /* Numeric literal. */ + if (lj_char_isdigit(ls->c)) { /* Numeric literal. */ lex_number(ls, tv); return TK_number; } /* Identifier or reserved word. */ do { - save_and_next(ls); - } while (lj_char_isident(ls->current)); - s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n); + lex_savenext(ls); + } while (lj_char_isident(ls->c)); + s = lj_parse_keepstr(ls, ls->sb.b, sbuflen(&ls->sb)); setstrV(ls->L, tv, s); if (s->reserved > 0) /* Reserved word? */ return TK_OFS + s->reserved; return TK_name; } - switch (ls->current) { + switch (ls->c) { case '\n': case '\r': - inclinenumber(ls); + lex_newline(ls); continue; case ' ': case '\t': case '\v': case '\f': - next(ls); + lex_next(ls); continue; case '-': - next(ls); - if (ls->current != '-') return '-'; - /* else is a comment */ - next(ls); - if (ls->current == '[') { - int sep = skip_sep(ls); - lj_str_resetbuf(&ls->sb); /* `skip_sep' may dirty the buffer */ + lex_next(ls); + if (ls->c != '-') return '-'; + lex_next(ls); + if (ls->c == '[') { /* Long comment "--[=*[...]=*]". */ + int sep = lex_skipeq(ls); + lj_buf_reset(&ls->sb); /* `lex_skipeq' may dirty the buffer */ if (sep >= 0) { - read_long_string(ls, NULL, sep); /* long comment */ - lj_str_resetbuf(&ls->sb); + lex_longstring(ls, NULL, sep); + lj_buf_reset(&ls->sb); continue; } } - /* else short comment */ - while (!currIsNewline(ls) && ls->current != END_OF_STREAM) - next(ls); + /* Short comment "--.*\n". */ + while (!lex_iseol(ls) && ls->c != LEX_EOF) + lex_next(ls); continue; case '[': { - int sep = skip_sep(ls); + int sep = lex_skipeq(ls); if (sep >= 0) { - read_long_string(ls, tv, sep); + lex_longstring(ls, tv, sep); return TK_string; } else if (sep == -1) { return '['; @@ -323,44 +349,43 @@ static int llex(LexState *ls, TValue *tv) } } case '=': - next(ls); - if (ls->current != '=') return '='; else { next(ls); return TK_eq; } + lex_next(ls); + if (ls->c != '=') return '='; else { lex_next(ls); return TK_eq; } case '<': - next(ls); - if (ls->current != '=') return '<'; else { next(ls); return TK_le; } + lex_next(ls); + if (ls->c != '=') return '<'; else { lex_next(ls); return TK_le; } case '>': - next(ls); - if (ls->current != '=') return '>'; else { next(ls); return TK_ge; } + lex_next(ls); + if (ls->c != '=') return '>'; else { lex_next(ls); return TK_ge; } case '~': - next(ls); - if (ls->current != '=') return '~'; else { next(ls); return TK_ne; } + lex_next(ls); + if (ls->c != '=') return '~'; else { lex_next(ls); return TK_ne; } case ':': - next(ls); - if (ls->current != ':') return ':'; else { next(ls); return TK_label; } + lex_next(ls); + if (ls->c != ':') return ':'; else { lex_next(ls); return TK_label; } case '"': case '\'': - read_string(ls, ls->current, tv); + lex_string(ls, tv); return TK_string; case '.': - save_and_next(ls); - if (ls->current == '.') { - next(ls); - if (ls->current == '.') { - next(ls); + if (lex_savenext(ls) == '.') { + lex_next(ls); + if (ls->c == '.') { + lex_next(ls); return TK_dots; /* ... */ } return TK_concat; /* .. */ - } else if (!lj_char_isdigit(ls->current)) { + } else if (!lj_char_isdigit(ls->c)) { return '.'; } else { lex_number(ls, tv); return TK_number; } - case END_OF_STREAM: + case LEX_EOF: return TK_eof; default: { - int c = ls->current; - next(ls); + LexChar c = ls->c; + lex_next(ls); return c; /* Single-char tokens (+ - / ...). */ } } @@ -375,36 +400,33 @@ int lj_lex_setup(lua_State *L, LexState *ls) int header = 0; ls->L = L; ls->fs = NULL; - ls->n = 0; - ls->p = NULL; + ls->pe = ls->p = NULL; ls->vstack = NULL; ls->sizevstack = 0; ls->vtop = 0; ls->bcstack = NULL; ls->sizebcstack = 0; - ls->token = 0; + ls->tok = 0; ls->lookahead = TK_eof; /* No look-ahead token. */ ls->linenumber = 1; ls->lastline = 1; ls->endmark = 0; - lj_str_resizebuf(ls->L, &ls->sb, LJ_MIN_SBUF); - next(ls); /* Read-ahead first char. */ - if (ls->current == 0xef && ls->n >= 2 && char2int(ls->p[0]) == 0xbb && - char2int(ls->p[1]) == 0xbf) { /* Skip UTF-8 BOM (if buffered). */ - ls->n -= 2; + lex_next(ls); /* Read-ahead first char. */ + if (ls->c == 0xef && ls->p + 2 <= ls->pe && (uint8_t)ls->p[0] == 0xbb && + (uint8_t)ls->p[1] == 0xbf) { /* Skip UTF-8 BOM (if buffered). */ ls->p += 2; - next(ls); + lex_next(ls); header = 1; } - if (ls->current == '#') { /* Skip POSIX #! header line. */ + if (ls->c == '#') { /* Skip POSIX #! header line. */ do { - next(ls); - if (ls->current == END_OF_STREAM) return 0; - } while (!currIsNewline(ls)); - inclinenumber(ls); + lex_next(ls); + if (ls->c == LEX_EOF) return 0; + } while (!lex_iseol(ls)); + lex_newline(ls); header = 1; } - if (ls->current == LUA_SIGNATURE[0]) { /* Bytecode dump. */ + if (ls->c == LUA_SIGNATURE[0]) { /* Bytecode dump. */ if (header) { /* ** Loading bytecode with an extra header is disabled for security @@ -426,55 +448,60 @@ void lj_lex_cleanup(lua_State *L, LexState *ls) global_State *g = G(L); lj_mem_freevec(g, ls->bcstack, ls->sizebcstack, BCInsLine); lj_mem_freevec(g, ls->vstack, ls->sizevstack, VarInfo); - lj_str_freebuf(g, &ls->sb); + lj_buf_free(g, &ls->sb); } +/* Return next lexical token. */ void lj_lex_next(LexState *ls) { ls->lastline = ls->linenumber; if (LJ_LIKELY(ls->lookahead == TK_eof)) { /* No lookahead token? */ - ls->token = llex(ls, &ls->tokenval); /* Get next token. */ + ls->tok = lex_scan(ls, &ls->tokval); /* Get next token. */ } else { /* Otherwise return lookahead token. */ - ls->token = ls->lookahead; + ls->tok = ls->lookahead; ls->lookahead = TK_eof; - ls->tokenval = ls->lookaheadval; + ls->tokval = ls->lookaheadval; } } +/* Look ahead for the next token. */ LexToken lj_lex_lookahead(LexState *ls) { - lua_assert(ls->lookahead == TK_eof); - ls->lookahead = llex(ls, &ls->lookaheadval); + lj_assertLS(ls->lookahead == TK_eof, "double lookahead"); + ls->lookahead = lex_scan(ls, &ls->lookaheadval); return ls->lookahead; } -const char *lj_lex_token2str(LexState *ls, LexToken token) +/* Convert token to string. */ +const char *lj_lex_token2str(LexState *ls, LexToken tok) { - if (token > TK_OFS) - return tokennames[token-TK_OFS-1]; - else if (!lj_char_iscntrl(token)) - return lj_str_pushf(ls->L, "%c", token); + if (tok > TK_OFS) + return tokennames[tok-TK_OFS-1]; + else if (!lj_char_iscntrl(tok)) + return lj_strfmt_pushf(ls->L, "%c", tok); else - return lj_str_pushf(ls->L, "char(%d)", token); + return lj_strfmt_pushf(ls->L, "char(%d)", tok); } -void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...) +/* Lexer error. */ +void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...) { - const char *tok; + const char *tokstr; va_list argp; - if (token == 0) { - tok = NULL; - } else if (token == TK_name || token == TK_string || token == TK_number) { - save(ls, '\0'); - tok = ls->sb.buf; + if (tok == 0) { + tokstr = NULL; + } else if (tok == TK_name || tok == TK_string || tok == TK_number) { + lex_save(ls, '\0'); + tokstr = ls->sb.b; } else { - tok = lj_lex_token2str(ls, token); + tokstr = lj_lex_token2str(ls, tok); } va_start(argp, em); - lj_err_lex(ls->L, ls->chunkname, tok, ls->linenumber, em, argp); + lj_err_lex(ls->L, ls->chunkname, tokstr, ls->linenumber, em, argp); va_end(argp); } +/* Initialize strings for reserved words. */ void lj_lex_init(lua_State *L) { uint32_t i; diff --git a/src/lj_lex.h b/src/lj_lex.h index a284af19..cb5b5769 100644 --- a/src/lj_lex.h +++ b/src/lj_lex.h @@ -30,7 +30,8 @@ TKDEF(TKENUM1, TKENUM2) TK_RESERVED = TK_while - TK_OFS }; -typedef int LexToken; +typedef int LexChar; /* Lexical character. Unsigned ext. from char. */ +typedef int LexToken; /* Lexical token. */ /* Combined bytecode ins/line. Only used during bytecode generation. */ typedef struct BCInsLine { @@ -51,13 +52,13 @@ typedef struct VarInfo { typedef struct LexState { struct FuncState *fs; /* Current FuncState. Defined in lj_parse.c. */ struct lua_State *L; /* Lua state. */ - TValue tokenval; /* Current token value. */ + TValue tokval; /* Current token value. */ TValue lookaheadval; /* Lookahead token value. */ - int current; /* Current character (charint). */ - LexToken token; /* Current token. */ - LexToken lookahead; /* Lookahead token. */ - MSize n; /* Bytes left in input buffer. */ const char *p; /* Current position in input buffer. */ + const char *pe; /* End of input buffer. */ + LexChar c; /* Current character. */ + LexToken tok; /* Current token. */ + LexToken lookahead; /* Lookahead token. */ SBuf sb; /* String buffer for tokens. */ lua_Reader rfunc; /* Reader callback. */ void *rdata; /* Reader callback data. */ @@ -79,8 +80,14 @@ LJ_FUNC int lj_lex_setup(lua_State *L, LexState *ls); LJ_FUNC void lj_lex_cleanup(lua_State *L, LexState *ls); LJ_FUNC void lj_lex_next(LexState *ls); LJ_FUNC LexToken lj_lex_lookahead(LexState *ls); -LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken token); -LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken token, ErrMsg em, ...); +LJ_FUNC const char *lj_lex_token2str(LexState *ls, LexToken tok); +LJ_FUNC_NORET void lj_lex_error(LexState *ls, LexToken tok, ErrMsg em, ...); LJ_FUNC void lj_lex_init(lua_State *L); +#ifdef LUA_USE_ASSERT +#define lj_assertLS(c, ...) (lj_assertG_(G(ls->L), (c), __VA_ARGS__)) +#else +#define lj_assertLS(c, ...) ((void)ls) +#endif + #endif diff --git a/src/lj_lib.c b/src/lj_lib.c index 5796766a..82a9e256 100644 --- a/src/lj_lib.c +++ b/src/lj_lib.c @@ -16,8 +16,14 @@ #include "lj_func.h" #include "lj_bc.h" #include "lj_dispatch.h" +#if LJ_HASFFI +#include "lj_ctype.h" +#endif #include "lj_vm.h" #include "lj_strscan.h" +#include "lj_strfmt.h" +#include "lj_lex.h" +#include "lj_bcdump.h" #include "lj_lib.h" /* -- Library initialization ---------------------------------------------- */ @@ -43,6 +49,28 @@ static GCtab *lib_create_table(lua_State *L, const char *libname, int hsize) return tabV(L->top-1); } +static const uint8_t *lib_read_lfunc(lua_State *L, const uint8_t *p, GCtab *tab) +{ + int len = *p++; + GCstr *name = lj_str_new(L, (const char *)p, len); + LexState ls; + GCproto *pt; + GCfunc *fn; + memset(&ls, 0, sizeof(ls)); + ls.L = L; + ls.p = (const char *)(p+len); + ls.pe = (const char *)~(uintptr_t)0; + ls.c = -1; + ls.level = (BCDUMP_F_STRIP|(LJ_BE*BCDUMP_F_BE)); + ls.chunkname = name; + pt = lj_bcread_proto(&ls); + pt->firstline = ~(BCLine)0; + fn = lj_func_newL_empty(L, pt, tabref(L->env)); + /* NOBARRIER: See below for common barrier. */ + setfuncV(L, lj_tab_setstr(L, tab, name), fn); + return (const uint8_t *)ls.p; +} + void lj_lib_register(lua_State *L, const char *libname, const uint8_t *p, const lua_CFunction *cf) { @@ -87,6 +115,9 @@ void lj_lib_register(lua_State *L, const char *libname, ofn = fn; } else { switch (tag | len) { + case LIBINIT_LUA: + p = lib_read_lfunc(L, p, tab); + break; case LIBINIT_SET: L->top -= 2; if (tvisstr(L->top+1) && strV(L->top+1)->len == 0) @@ -120,6 +151,37 @@ void lj_lib_register(lua_State *L, const char *libname, } } +/* Push internal function on the stack. */ +GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n) +{ + GCfunc *fn; + lua_pushcclosure(L, f, n); + fn = funcV(L->top-1); + fn->c.ffid = (uint8_t)id; + setmref(fn->c.pc, &G(L)->bc_cfunc_int); + return fn; +} + +void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f, GCtab *env) +{ + luaL_findtable(L, LUA_REGISTRYINDEX, "_PRELOAD", 4); + lua_pushcfunction(L, f); + /* NOBARRIER: The function is new (marked white). */ + setgcref(funcV(L->top-1)->c.env, obj2gco(env)); + lua_setfield(L, -2, name); + L->top--; +} + +int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id, const char *name) +{ + GCfunc *fn = lj_lib_pushcf(L, cf, id); + GCtab *t = tabref(curr_func(L)->c.env); /* Reference to parent table. */ + setfuncV(L, lj_tab_setstr(L, t, lj_str_newz(L, name)), fn); + lj_gc_anybarriert(L, t); + setfuncV(L, L->top++, fn); + return 1; +} + /* -- Type checks --------------------------------------------------------- */ TValue *lj_lib_checkany(lua_State *L, int narg) @@ -137,7 +199,7 @@ GCstr *lj_lib_checkstr(lua_State *L, int narg) if (LJ_LIKELY(tvisstr(o))) { return strV(o); } else if (tvisnumber(o)) { - GCstr *s = lj_str_fromnumber(L, o); + GCstr *s = lj_strfmt_number(L, o); setstrV(L, o, s); return s; } @@ -196,20 +258,6 @@ int32_t lj_lib_optint(lua_State *L, int narg, int32_t def) return (o < L->top && !tvisnil(o)) ? lj_lib_checkint(L, narg) : def; } -int32_t lj_lib_checkbit(lua_State *L, int narg) -{ - TValue *o = L->base + narg-1; - if (!(o < L->top && lj_strscan_numberobj(o))) - lj_err_argt(L, narg, LUA_TNUMBER); - if (LJ_LIKELY(tvisint(o))) { - return intV(o); - } else { - int32_t i = lj_num2bit(numV(o)); - if (LJ_DUALNUM) setintV(o, i); - return i; - } -} - GCfunc *lj_lib_checkfunc(lua_State *L, int narg) { TValue *o = L->base + narg-1; @@ -256,3 +304,56 @@ int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst) return def; } +/* -- Strict type checks -------------------------------------------------- */ + +/* The following type checks do not coerce between strings and numbers. +** And they handle plain int64_t/uint64_t FFI numbers, too. +*/ + +#if LJ_HASBUFFER +GCstr *lj_lib_checkstrx(lua_State *L, int narg) +{ + TValue *o = L->base + narg-1; + if (!(o < L->top && tvisstr(o))) lj_err_argt(L, narg, LUA_TSTRING); + return strV(o); +} + +int32_t lj_lib_checkintrange(lua_State *L, int narg, int32_t a, int32_t b) +{ + TValue *o = L->base + narg-1; + lj_assertL(b >= 0, "expected range must be non-negative"); + if (o < L->top) { + if (LJ_LIKELY(tvisint(o))) { + int32_t i = intV(o); + if (i >= a && i <= b) return i; + } else if (LJ_LIKELY(tvisnum(o))) { + /* For performance reasons, this doesn't check for integerness or + ** integer overflow. Overflow detection still works, since all FPUs + ** return either MININT or MAXINT, which is then out of range. + */ + int32_t i = (int32_t)numV(o); + if (i >= a && i <= b) return i; +#if LJ_HASFFI + } else if (tviscdata(o)) { + GCcdata *cd = cdataV(o); + if (cd->ctypeid == CTID_INT64) { + int64_t i = *(int64_t *)cdataptr(cd); + if (i >= (int64_t)a && i <= (int64_t)b) return (int32_t)i; + } else if (cd->ctypeid == CTID_UINT64) { + uint64_t i = *(uint64_t *)cdataptr(cd); + if ((a < 0 || i >= (uint64_t)a) && i <= (uint64_t)b) return (int32_t)i; + } else { + goto badtype; + } +#endif + } else { + goto badtype; + } + lj_err_arg(L, narg, LJ_ERR_NUMRNG); + } +badtype: + lj_err_argt(L, narg, LUA_TNUMBER); + return 0; /* unreachable */ +} +#endif + diff --git a/src/lj_lib.h b/src/lj_lib.h index 55529ad8..a18f52bf 100644 --- a/src/lj_lib.h +++ b/src/lj_lib.h @@ -41,15 +41,28 @@ LJ_FUNC void lj_lib_checknumber(lua_State *L, int narg); LJ_FUNC lua_Number lj_lib_checknum(lua_State *L, int narg); LJ_FUNC int32_t lj_lib_checkint(lua_State *L, int narg); LJ_FUNC int32_t lj_lib_optint(lua_State *L, int narg, int32_t def); -LJ_FUNC int32_t lj_lib_checkbit(lua_State *L, int narg); LJ_FUNC GCfunc *lj_lib_checkfunc(lua_State *L, int narg); LJ_FUNC GCtab *lj_lib_checktab(lua_State *L, int narg); LJ_FUNC GCtab *lj_lib_checktabornil(lua_State *L, int narg); LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); +#if LJ_HASBUFFER +LJ_FUNC GCstr *lj_lib_checkstrx(lua_State *L, int narg); +LJ_FUNC int32_t lj_lib_checkintrange(lua_State *L, int narg, + int32_t a, int32_t b); +#endif + /* Avoid including lj_frame.h. */ +#if LJ_GC64 +#define lj_lib_upvalue(L, n) \ + (&gcval(L->base-2)->fn.c.upvalue[(n)-1]) +#elif LJ_FR2 +#define lj_lib_upvalue(L, n) \ + (&gcref((L->base-2)->gcr)->fn.c.upvalue[(n)-1]) +#else #define lj_lib_upvalue(L, n) \ (&gcref((L->base-1)->fr.func)->fn.c.upvalue[(n)-1]) +#endif #if LJ_TARGET_WINDOWS #define lj_lib_checkfpu(L) \ @@ -60,23 +73,14 @@ LJ_FUNC int lj_lib_checkopt(lua_State *L, int narg, int def, const char *lst); #define lj_lib_checkfpu(L) UNUSED(L) #endif -/* Push internal function on the stack. */ -static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f, - int id, int n) -{ - GCfunc *fn; - lua_pushcclosure(L, f, n); - fn = funcV(L->top-1); - fn->c.ffid = (uint8_t)id; - setmref(fn->c.pc, &G(L)->bc_cfunc_int); -} - +LJ_FUNC GCfunc *lj_lib_pushcc(lua_State *L, lua_CFunction f, int id, int n); #define lj_lib_pushcf(L, fn, id) (lj_lib_pushcc(L, (fn), (id), 0)) /* Library function declarations. Scanned by buildvm. */ #define LJLIB_CF(name) static int lj_cf_##name(lua_State *L) #define LJLIB_ASM(name) static int lj_ffh_##name(lua_State *L) #define LJLIB_ASM_(name) +#define LJLIB_LUA(name) #define LJLIB_SET(name) #define LJLIB_PUSH(arg) #define LJLIB_REC(handler) @@ -88,6 +92,10 @@ static LJ_AINLINE void lj_lib_pushcc(lua_State *L, lua_CFunction f, LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, const uint8_t *init, const lua_CFunction *cf); +LJ_FUNC void lj_lib_prereg(lua_State *L, const char *name, lua_CFunction f, + GCtab *env); +LJ_FUNC int lj_lib_postreg(lua_State *L, lua_CFunction cf, int id, + const char *name); /* Library init data tags. */ #define LIBINIT_LENMASK 0x3f @@ -96,7 +104,8 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, #define LIBINIT_ASM 0x40 #define LIBINIT_ASM_ 0x80 #define LIBINIT_STRING 0xc0 -#define LIBINIT_MAXSTR 0x39 +#define LIBINIT_MAXSTR 0x38 +#define LIBINIT_LUA 0xf9 #define LIBINIT_SET 0xfa #define LIBINIT_NUMBER 0xfb #define LIBINIT_COPY 0xfc @@ -104,9 +113,4 @@ LJ_FUNC void lj_lib_register(lua_State *L, const char *libname, #define LIBINIT_FFID 0xfe #define LIBINIT_END 0xff -/* Exported library functions. */ - -typedef struct RandomState RandomState; -LJ_FUNC uint64_t LJ_FASTCALL lj_math_random_step(RandomState *rs); - #endif diff --git a/src/lj_load.c b/src/lj_load.c index dbd36ac7..0aab4884 100644 --- a/src/lj_load.c +++ b/src/lj_load.c @@ -15,7 +15,7 @@ #include "lj_obj.h" #include "lj_gc.h" #include "lj_err.h" -#include "lj_str.h" +#include "lj_buf.h" #include "lj_func.h" #include "lj_frame.h" #include "lj_vm.h" @@ -54,7 +54,7 @@ LUA_API int lua_loadx(lua_State *L, lua_Reader reader, void *data, ls.rdata = data; ls.chunkarg = chunkname ? chunkname : "?"; ls.mode = mode; - lj_str_initbuf(&ls.sb); + lj_buf_init(L, &ls.sb); status = lj_vm_cpcall(L, NULL, &ls, cpparser); lj_lex_cleanup(L, &ls); lj_gc_check(L); @@ -159,7 +159,7 @@ LUALIB_API int luaL_loadstring(lua_State *L, const char *s) LUA_API int lua_dump(lua_State *L, lua_Writer writer, void *data) { cTValue *o = L->top-1; - api_check(L, L->top > L->base); + lj_checkapi(L->top > L->base, "top slot empty"); if (tvisfunc(o) && isluafunc(funcV(o))) return lj_bcwrite(L, funcproto(funcV(o)), writer, data, 0); else diff --git a/src/lj_mcode.c b/src/lj_mcode.c index 0a3b3fa3..163aada4 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -14,6 +14,7 @@ #include "lj_mcode.h" #include "lj_trace.h" #include "lj_dispatch.h" +#include "lj_prng.h" #endif #if LJ_HASJIT || LJ_HASFFI #include "lj_vm.h" @@ -44,7 +45,7 @@ void lj_mcode_sync(void *start, void *end) sys_icache_invalidate(start, (char *)end-(char *)start); #elif LJ_TARGET_PPC lj_vm_cachesync(start, end); -#elif defined(__GNUC__) +#elif defined(__GNUC__) || defined(__clang__) __clear_cache(start, end); #else #error "Missing builtin to flush instruction cache" @@ -66,8 +67,8 @@ void lj_mcode_sync(void *start, void *end) static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, DWORD prot) { - void *p = VirtualAlloc((void *)hint, sz, - MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); + void *p = LJ_WIN_VALLOC((void *)hint, sz, + MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, prot); if (!p && !hint) lj_trace_err(J, LJ_TRERR_MCODEAL); return p; @@ -82,7 +83,7 @@ static void mcode_free(jit_State *J, void *p, size_t sz) static int mcode_setprot(void *p, size_t sz, DWORD prot) { DWORD oprot; - return !VirtualProtect(p, sz, prot, &oprot); + return !LJ_WIN_VPROTECT(p, sz, prot, &oprot); } #elif LJ_TARGET_POSIX @@ -96,10 +97,15 @@ static int mcode_setprot(void *p, size_t sz, DWORD prot) #define MCPROT_RW (PROT_READ|PROT_WRITE) #define MCPROT_RX (PROT_READ|PROT_EXEC) #define MCPROT_RWX (PROT_READ|PROT_WRITE|PROT_EXEC) +#ifdef PROT_MPROTECT +#define MCPROT_CREATE (PROT_MPROTECT(MCPROT_RWX)) +#else +#define MCPROT_CREATE 0 +#endif static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) { - void *p = mmap((void *)hint, sz, prot, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + void *p = mmap((void *)hint, sz, prot|MCPROT_CREATE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (p == MAP_FAILED) { if (!hint) lj_trace_err(J, LJ_TRERR_MCODEAL); p = NULL; @@ -118,52 +124,34 @@ static int mcode_setprot(void *p, size_t sz, int prot) return mprotect(p, sz, prot); } -#elif LJ_64 - -#error "Missing OS support for explicit placement of executable memory" - #else -/* Fallback allocator. This will fail if memory is not executable by default. */ -#define LUAJIT_UNPROTECT_MCODE -#define MCPROT_RW 0 -#define MCPROT_RX 0 -#define MCPROT_RWX 0 - -static void *mcode_alloc_at(jit_State *J, uintptr_t hint, size_t sz, int prot) -{ - UNUSED(hint); UNUSED(prot); - return lj_mem_new(J->L, sz); -} - -static void mcode_free(jit_State *J, void *p, size_t sz) -{ - lj_mem_free(J2G(J), p, sz); -} +#error "Missing OS support for explicit placement of executable memory" #endif /* -- MCode area protection ----------------------------------------------- */ -/* Define this ONLY if page protection twiddling becomes a bottleneck. */ -#ifdef LUAJIT_UNPROTECT_MCODE +#if LUAJIT_SECURITY_MCODE == 0 -/* It's generally considered to be a potential security risk to have +/* Define this ONLY if page protection twiddling becomes a bottleneck. +** +** It's generally considered to be a potential security risk to have ** pages with simultaneous write *and* execute access in a process. ** ** Do not even think about using this mode for server processes or -** apps handling untrusted external data (such as a browser). +** apps handling untrusted external data. ** ** The security risk is not in LuaJIT itself -- but if an adversary finds -** any *other* flaw in your C application logic, then any RWX memory page -** simplifies writing an exploit considerably. +** any *other* flaw in your C application logic, then any RWX memory pages +** simplify writing an exploit considerably. */ #define MCPROT_GEN MCPROT_RWX #define MCPROT_RUN MCPROT_RWX static void mcode_protect(jit_State *J, int prot) { - UNUSED(J); UNUSED(prot); + UNUSED(J); UNUSED(prot); UNUSED(mcode_setprot); } #else @@ -222,8 +210,8 @@ static void *mcode_alloc(jit_State *J, size_t sz) */ #if LJ_TARGET_MIPS /* Use the middle of the 256MB-aligned region. */ - uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & 0xf0000000u) + - 0x08000000u; + uintptr_t target = ((uintptr_t)(void *)lj_vm_exit_handler & + ~(uintptr_t)0x0fffffffu) + 0x08000000u; #else uintptr_t target = (uintptr_t)(void *)lj_vm_exit_handler & ~(uintptr_t)0xffff; #endif @@ -243,7 +231,7 @@ static void *mcode_alloc(jit_State *J, size_t sz) } /* Next try probing 64K-aligned pseudo-random addresses. */ do { - hint = LJ_PRNG_BITS(J, LJ_TARGET_JUMPRANGE-16) << 16; + hint = lj_prng_u64(&J2G(J)->prng) & ((1u<<LJ_TARGET_JUMPRANGE)-0x10000); } while (!(hint + sz < range+range)); hint = target + hint - range; } @@ -256,7 +244,7 @@ static void *mcode_alloc(jit_State *J, size_t sz) /* All memory addresses are reachable by relative jumps. */ static void *mcode_alloc(jit_State *J, size_t sz) { -#ifdef __OpenBSD__ +#if defined(__OpenBSD__) || defined(__NetBSD__) || LJ_TARGET_UWP /* Allow better executable memory allocation for OpenBSD W^X mode. */ void *p = mcode_alloc_at(J, 0, sz, MCPROT_RUN); if (p && mcode_setprot(p, sz, MCPROT_GEN)) { @@ -287,6 +275,7 @@ static void mcode_allocarea(jit_State *J) ((MCLink *)J->mcarea)->next = oldarea; ((MCLink *)J->mcarea)->size = sz; J->szallmcarea += sz; + J->mcbot = (MCode *)lj_err_register_mcode(J->mcarea, sz, (uint8_t *)J->mcbot); } /* Free all MCode areas. */ @@ -297,7 +286,9 @@ void lj_mcode_free(jit_State *J) J->szallmcarea = 0; while (mc) { MCode *next = ((MCLink *)mc)->next; - mcode_free(J, mc, ((MCLink *)mc)->size); + size_t sz = ((MCLink *)mc)->size; + lj_err_deregister_mcode(mc, sz, (uint8_t *)mc + sizeof(MCLink)); + mcode_free(J, mc, sz); mc = next; } } @@ -332,35 +323,36 @@ void lj_mcode_abort(jit_State *J) /* Set/reset protection to allow patching of MCode areas. */ MCode *lj_mcode_patch(jit_State *J, MCode *ptr, int finish) { -#ifdef LUAJIT_UNPROTECT_MCODE - UNUSED(J); UNUSED(ptr); UNUSED(finish); - return NULL; -#else if (finish) { +#if LUAJIT_SECURITY_MCODE if (J->mcarea == ptr) mcode_protect(J, MCPROT_RUN); else if (LJ_UNLIKELY(mcode_setprot(ptr, ((MCLink *)ptr)->size, MCPROT_RUN))) mcode_protfail(J); +#endif return NULL; } else { MCode *mc = J->mcarea; /* Try current area first to use the protection cache. */ if (ptr >= mc && ptr < (MCode *)((char *)mc + J->szmcarea)) { +#if LUAJIT_SECURITY_MCODE mcode_protect(J, MCPROT_GEN); +#endif return mc; } /* Otherwise search through the list of MCode areas. */ for (;;) { mc = ((MCLink *)mc)->next; - lua_assert(mc != NULL); + lj_assertJ(mc != NULL, "broken MCode area chain"); if (ptr >= mc && ptr < (MCode *)((char *)mc + ((MCLink *)mc)->size)) { +#if LUAJIT_SECURITY_MCODE if (LJ_UNLIKELY(mcode_setprot(mc, ((MCLink *)mc)->size, MCPROT_GEN))) mcode_protfail(J); +#endif return mc; } } } -#endif } /* Limit of MCode reservation reached. */ diff --git a/src/lj_meta.c b/src/lj_meta.c index 1d4d2234..5324c666 100644 --- a/src/lj_meta.c +++ b/src/lj_meta.c @@ -12,6 +12,7 @@ #include "lj_obj.h" #include "lj_gc.h" #include "lj_err.h" +#include "lj_buf.h" #include "lj_str.h" #include "lj_tab.h" #include "lj_meta.h" @@ -19,6 +20,8 @@ #include "lj_bc.h" #include "lj_vm.h" #include "lj_strscan.h" +#include "lj_strfmt.h" +#include "lj_lib.h" /* -- Metamethod handling ------------------------------------------------- */ @@ -44,7 +47,7 @@ void lj_meta_init(lua_State *L) cTValue *lj_meta_cache(GCtab *mt, MMS mm, GCstr *name) { cTValue *mo = lj_tab_getstr(mt, name); - lua_assert(mm <= MM_FAST); + lj_assertX(mm <= MM_FAST, "bad metamethod %d", mm); if (!mo || tvisnil(mo)) { /* No metamethod? */ mt->nomm |= (uint8_t)(1u<<mm); /* Set negative cache flag. */ return NULL; @@ -77,12 +80,16 @@ int lj_meta_tailcall(lua_State *L, cTValue *tv) TValue *base = L->base; TValue *top = L->top; const BCIns *pc = frame_pc(base-1); /* Preserve old PC from frame. */ - copyTV(L, base-1, tv); /* Replace frame with new object. */ - top->u32.lo = LJ_CONT_TAILCALL; - setframe_pc(top, pc); - setframe_gc(top+1, obj2gco(L)); /* Dummy frame object. */ - setframe_ftsz(top+1, (int)((char *)(top+2) - (char *)base) + FRAME_CONT); - L->base = L->top = top+2; + copyTV(L, base-1-LJ_FR2, tv); /* Replace frame with new object. */ + if (LJ_FR2) + (top++)->u64 = LJ_CONT_TAILCALL; + else + top->u32.lo = LJ_CONT_TAILCALL; + setframe_pc(top++, pc); + setframe_gc(top, obj2gco(L), LJ_TTHREAD); /* Dummy frame object. */ + if (LJ_FR2) top++; + setframe_ftsz(top, ((char *)(top+1) - (char *)base) + FRAME_CONT); + L->base = L->top = top+1; /* ** before: [old_mo|PC] [... ...] ** ^base ^top @@ -113,11 +120,13 @@ static TValue *mmcall(lua_State *L, ASMFunction cont, cTValue *mo, */ TValue *top = L->top; if (curr_funcisL(L)) top = curr_topL(L); - setcont(top, cont); /* Assembler VM stores PC in upper word. */ - copyTV(L, top+1, mo); /* Store metamethod and two arguments. */ - copyTV(L, top+2, a); - copyTV(L, top+3, b); - return top+2; /* Return new base. */ + setcont(top++, cont); /* Assembler VM stores PC in upper word or FR2. */ + if (LJ_FR2) setnilV(top++); + copyTV(L, top++, mo); /* Store metamethod and two arguments. */ + if (LJ_FR2) setnilV(top++); + copyTV(L, top, a); + copyTV(L, top+1, b); + return top; /* Return new base. */ } /* -- C helpers for some instructions, called from assembler VM ----------- */ @@ -225,27 +234,14 @@ TValue *lj_meta_arith(lua_State *L, TValue *ra, cTValue *rb, cTValue *rc, } } -/* In-place coercion of a number to a string. */ -static LJ_AINLINE int tostring(lua_State *L, TValue *o) -{ - if (tvisstr(o)) { - return 1; - } else if (tvisnumber(o)) { - setstrV(L, o, lj_str_fromnumber(L, o)); - return 1; - } else { - return 0; - } -} - /* Helper for CAT. Coercion, iterative concat, __concat metamethod. */ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) { int fromc = 0; if (left < 0) { left = -left; fromc = 1; } do { - int n = 1; - if (!(tvisstr(top-1) || tvisnumber(top-1)) || !tostring(L, top)) { + if (!(tvisstr(top) || tvisnumber(top) || tvisbuf(top)) || + !(tvisstr(top-1) || tvisnumber(top-1) || tvisbuf(top-1))) { cTValue *mo = lj_meta_lookup(L, top-1, MM_concat); if (tvisnil(mo)) { mo = lj_meta_lookup(L, top, MM_concat); @@ -266,13 +262,12 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) ** after mm: [...][CAT stack ...] <--push-- [result] ** next step: [...][CAT stack .............] */ - copyTV(L, top+2, top); /* Careful with the order of stack copies! */ - copyTV(L, top+1, top-1); - copyTV(L, top, mo); + copyTV(L, top+2*LJ_FR2+2, top); /* Carefully ordered stack copies! */ + copyTV(L, top+2*LJ_FR2+1, top-1); + copyTV(L, top+LJ_FR2, mo); setcont(top-1, lj_cont_cat); + if (LJ_FR2) { setnilV(top); setnilV(top+2); top += 2; } return top+1; /* Trigger metamethod call. */ - } else if (strV(top)->len == 0) { /* Shortcut. */ - (void)tostring(L, top-1); } else { /* Pick as many strings as possible from the top and concatenate them: ** @@ -281,27 +276,33 @@ TValue *lj_meta_cat(lua_State *L, TValue *top, int left) ** concat: [...][CAT stack ...] [result] ** next step: [...][CAT stack ............] */ - MSize tlen = strV(top)->len; - char *buffer; - int i; - for (n = 1; n <= left && tostring(L, top-n); n++) { - MSize len = strV(top-n)->len; - if (len >= LJ_MAX_STR - tlen) - lj_err_msg(L, LJ_ERR_STROV); - tlen += len; - } - buffer = lj_str_needbuf(L, &G(L)->tmpbuf, tlen); - n--; - tlen = 0; - for (i = n; i >= 0; i--) { - MSize len = strV(top-i)->len; - memcpy(buffer + tlen, strVdata(top-i), len); - tlen += len; + TValue *e, *o = top; + uint64_t tlen = tvisstr(o) ? strV(o)->len : + tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM; + SBuf *sb; + do { + o--; tlen += tvisstr(o) ? strV(o)->len : + tvisbuf(o) ? sbufxlen(bufV(o)) : STRFMT_MAXBUF_NUM; + } while (--left > 0 && (tvisstr(o-1) || tvisnumber(o-1))); + if (tlen >= LJ_MAX_STR) lj_err_msg(L, LJ_ERR_STROV); + sb = lj_buf_tmp_(L); + lj_buf_more(sb, (MSize)tlen); + for (e = top, top = o; o <= e; o++) { + if (tvisstr(o)) { + GCstr *s = strV(o); + MSize len = s->len; + lj_buf_putmem(sb, strdata(s), len); + } else if (tvisbuf(o)) { + SBufExt *sbx = bufV(o); + lj_buf_putmem(sb, sbx->r, sbufxlen(sbx)); + } else if (tvisint(o)) { + lj_strfmt_putint(sb, intV(o)); + } else { + lj_strfmt_putfnum(sb, STRFMT_G14, numV(o)); + } } - setstrV(L, top-n, lj_str_new(L, buffer, tlen)); + setstrV(L, top, lj_buf_str(L, sb)); } - left -= n; - top -= n; } while (left >= 1); if (LJ_UNLIKELY(G(L)->gc.total >= G(L)->gc.threshold)) { if (!fromc) L->top = curr_topL(L); @@ -338,12 +339,14 @@ TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne) return (TValue *)(intptr_t)ne; } top = curr_top(L); - setcont(top, ne ? lj_cont_condf : lj_cont_condt); - copyTV(L, top+1, mo); + setcont(top++, ne ? lj_cont_condf : lj_cont_condt); + if (LJ_FR2) setnilV(top++); + copyTV(L, top++, mo); + if (LJ_FR2) setnilV(top++); it = ~(uint32_t)o1->gch.gct; - setgcV(L, top+2, o1, it); - setgcV(L, top+3, o2, it); - return top+2; /* Trigger metamethod call. */ + setgcV(L, top, o1, it); + setgcV(L, top+1, o2, it); + return top; /* Trigger metamethod call. */ } return (TValue *)(intptr_t)ne; } @@ -365,8 +368,8 @@ TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins) } else if (op == BC_ISEQN) { o2 = &mref(curr_proto(L)->k, cTValue)[bc_d(ins)]; } else { - lua_assert(op == BC_ISEQP); - setitype(&tv, ~bc_d(ins)); + lj_assertL(op == BC_ISEQP, "bad bytecode op %d", op); + setpriV(&tv, ~bc_d(ins)); o2 = &tv; } mo = lj_meta_lookup(L, o1mm, MM_eq); @@ -423,6 +426,18 @@ TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op) } } +/* Helper for ISTYPE and ISNUM. Implicit coercion or error. */ +void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp) +{ + L->top = curr_topL(L); + ra++; tp--; + lj_assertL(LJ_DUALNUM || tp != ~LJ_TNUMX, "bad type for ISTYPE"); + if (LJ_DUALNUM && tp == ~LJ_TNUMX) lj_lib_checkint(L, ra); + else if (tp == ~LJ_TNUMX+1) lj_lib_checknum(L, ra); + else if (tp == ~LJ_TSTR) lj_lib_checkstr(L, ra); + else lj_err_argtype(L, ra, lj_obj_itypename[tp]); +} + /* Helper for calls. __call metamethod. */ void lj_meta_call(lua_State *L, TValue *func, TValue *top) { @@ -430,7 +445,8 @@ void lj_meta_call(lua_State *L, TValue *func, TValue *top) TValue *p; if (!tvisfunc(mo)) lj_err_optype_call(L, func); - for (p = top; p > func; p--) copyTV(L, p, p-1); + for (p = top; p > func+2*LJ_FR2; p--) copyTV(L, p, p-1); + if (LJ_FR2) copyTV(L, func+2, func); copyTV(L, func, mo); } diff --git a/src/lj_meta.h b/src/lj_meta.h index 9c36aea5..3a6eaac2 100644 --- a/src/lj_meta.h +++ b/src/lj_meta.h @@ -31,6 +31,7 @@ LJ_FUNCA TValue * LJ_FASTCALL lj_meta_len(lua_State *L, cTValue *o); LJ_FUNCA TValue *lj_meta_equal(lua_State *L, GCobj *o1, GCobj *o2, int ne); LJ_FUNCA TValue * LJ_FASTCALL lj_meta_equal_cd(lua_State *L, BCIns ins); LJ_FUNCA TValue *lj_meta_comp(lua_State *L, cTValue *o1, cTValue *o2, int op); +LJ_FUNCA void lj_meta_istype(lua_State *L, BCReg ra, BCReg tp); LJ_FUNCA void lj_meta_call(lua_State *L, TValue *func, TValue *top); LJ_FUNCA void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o); diff --git a/src/lj_obj.c b/src/lj_obj.c index 528b3a58..65cbe1a1 100644 --- a/src/lj_obj.c +++ b/src/lj_obj.c @@ -20,7 +20,7 @@ LJ_DATADEF const char *const lj_obj_itypename[] = { /* ORDER LJ_T */ }; /* Compare two objects without calling metamethods. */ -int lj_obj_equal(cTValue *o1, cTValue *o2) +int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2) { if (itype(o1) == itype(o2)) { if (tvispri(o1)) @@ -33,3 +33,19 @@ int lj_obj_equal(cTValue *o1, cTValue *o2) return numberVnum(o1) == numberVnum(o2); } +/* Return pointer to object or its object data. */ +const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o) +{ + UNUSED(g); + if (tvisudata(o)) + return uddata(udataV(o)); + else if (tvislightud(o)) + return lightudV(g, o); + else if (LJ_HASFFI && tviscdata(o)) + return cdataptr(cdataV(o)); + else if (tvisgcv(o)) + return gcV(o); + else + return NULL; +} + diff --git a/src/lj_obj.h b/src/lj_obj.h index ea8fe870..67e41181 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -13,44 +13,81 @@ #include "lj_def.h" #include "lj_arch.h" -/* -- Memory references (32 bit address space) ---------------------------- */ +/* -- Memory references --------------------------------------------------- */ -/* Memory size. */ +/* Memory and GC object sizes. */ typedef uint32_t MSize; +#if LJ_GC64 +typedef uint64_t GCSize; +#else +typedef uint32_t GCSize; +#endif /* Memory reference */ typedef struct MRef { +#if LJ_GC64 + uint64_t ptr64; /* True 64 bit pointer. */ +#else uint32_t ptr32; /* Pseudo 32 bit pointer. */ +#endif } MRef; +#if LJ_GC64 +#define mref(r, t) ((t *)(void *)(r).ptr64) +#define mrefu(r) ((r).ptr64) + +#define setmref(r, p) ((r).ptr64 = (uint64_t)(void *)(p)) +#define setmrefu(r, u) ((r).ptr64 = (uint64_t)(u)) +#define setmrefr(r, v) ((r).ptr64 = (v).ptr64) +#else #define mref(r, t) ((t *)(void *)(uintptr_t)(r).ptr32) +#define mrefu(r) ((r).ptr32) #define setmref(r, p) ((r).ptr32 = (uint32_t)(uintptr_t)(void *)(p)) +#define setmrefu(r, u) ((r).ptr32 = (uint32_t)(u)) #define setmrefr(r, v) ((r).ptr32 = (v).ptr32) +#endif -/* -- GC object references (32 bit address space) ------------------------- */ +/* -- GC object references ------------------------------------------------ */ /* GCobj reference */ typedef struct GCRef { +#if LJ_GC64 + uint64_t gcptr64; /* True 64 bit pointer. */ +#else uint32_t gcptr32; /* Pseudo 32 bit pointer. */ +#endif } GCRef; /* Common GC header for all collectable objects. */ #define GCHeader GCRef nextgc; uint8_t marked; uint8_t gct /* This occupies 6 bytes, so use the next 2 bytes for non-32 bit fields. */ +#if LJ_GC64 +#define gcref(r) ((GCobj *)(r).gcptr64) +#define gcrefp(r, t) ((t *)(void *)(r).gcptr64) +#define gcrefu(r) ((r).gcptr64) +#define gcrefeq(r1, r2) ((r1).gcptr64 == (r2).gcptr64) + +#define setgcref(r, gc) ((r).gcptr64 = (uint64_t)&(gc)->gch) +#define setgcreft(r, gc, it) \ + (r).gcptr64 = (uint64_t)&(gc)->gch | (((uint64_t)(it)) << 47) +#define setgcrefp(r, p) ((r).gcptr64 = (uint64_t)(p)) +#define setgcrefnull(r) ((r).gcptr64 = 0) +#define setgcrefr(r, v) ((r).gcptr64 = (v).gcptr64) +#else #define gcref(r) ((GCobj *)(uintptr_t)(r).gcptr32) #define gcrefp(r, t) ((t *)(void *)(uintptr_t)(r).gcptr32) #define gcrefu(r) ((r).gcptr32) -#define gcrefi(r) ((int32_t)(r).gcptr32) #define gcrefeq(r1, r2) ((r1).gcptr32 == (r2).gcptr32) -#define gcnext(gc) (gcref((gc)->gch.nextgc)) #define setgcref(r, gc) ((r).gcptr32 = (uint32_t)(uintptr_t)&(gc)->gch) -#define setgcrefi(r, i) ((r).gcptr32 = (uint32_t)(i)) #define setgcrefp(r, p) ((r).gcptr32 = (uint32_t)(uintptr_t)(p)) #define setgcrefnull(r) ((r).gcptr32 = 0) #define setgcrefr(r, v) ((r).gcptr32 = (v).gcptr32) +#endif + +#define gcnext(gc) (gcref((gc)->gch.nextgc)) /* IMPORTANT NOTE: ** @@ -119,11 +156,10 @@ typedef int32_t BCLine; /* Bytecode line number. */ /* Internal assembler functions. Never call these directly from C. */ typedef void (*ASMFunction)(void); -/* Resizable string buffer. Need this here, details in lj_str.h. */ +/* Resizable string buffer. Need this here, details in lj_buf.h. */ +#define SBufHeader char *w, *e, *b; MRef L typedef struct SBuf { - char *buf; /* String buffer base. */ - MSize n; /* String buffer length. */ - MSize sz; /* String buffer size. */ + SBufHeader; } SBuf; /* -- Tags and values ----------------------------------------------------- */ @@ -131,13 +167,23 @@ typedef struct SBuf { /* Frame link. */ typedef union { int32_t ftsz; /* Frame type and size of previous frame. */ - MRef pcr; /* Overlaps PC for Lua frames. */ + MRef pcr; /* Or PC for Lua frames. */ } FrameLink; /* Tagged value. */ typedef LJ_ALIGN(8) union TValue { uint64_t u64; /* 64 bit pattern overlaps number. */ lua_Number n; /* Number object overlaps split tag/value object. */ +#if LJ_GC64 + GCRef gcr; /* GCobj reference with tag. */ + int64_t it64; + struct { + LJ_ENDIAN_LOHI( + int32_t i; /* Integer value. */ + , uint32_t it; /* Internal object tag. Must overlap MSW of number. */ + ) + }; +#else struct { LJ_ENDIAN_LOHI( union { @@ -147,12 +193,17 @@ typedef LJ_ALIGN(8) union TValue { , uint32_t it; /* Internal object tag. Must overlap MSW of number. */ ) }; +#endif +#if LJ_FR2 + int64_t ftsz; /* Frame type and size of previous frame, or PC. */ +#else struct { LJ_ENDIAN_LOHI( GCRef func; /* Function for next frame (or dummy L). */ , FrameLink tp; /* Link to previous frame. */ ) } fr; +#endif struct { LJ_ENDIAN_LOHI( uint32_t lo; /* Lower 32 bits of number. */ @@ -172,6 +223,8 @@ typedef const TValue cTValue; /* Internal object tags. ** +** Format for 32 bit GC references (!LJ_GC64): +** ** Internal tags overlap the MSW of a number object (must be a double). ** Interpreted as a double these are special NaNs. The FPU only generates ** one type of NaN (0xfff8_0000_0000_0000). So MSWs > 0xfff80000 are available @@ -181,11 +234,24 @@ typedef const TValue cTValue; ** ---MSW---.---LSW--- ** primitive types | itype | | ** lightuserdata | itype | void * | (32 bit platforms) -** lightuserdata |ffff| void * | (64 bit platforms, 47 bit pointers) +** lightuserdata |ffff|seg| ofs | (64 bit platforms) ** GC objects | itype | GCRef | ** int (LJ_DUALNUM)| itype | int | ** number -------double------ ** +** Format for 64 bit GC references (LJ_GC64): +** +** The upper 13 bits must be 1 (0xfff8...) for a special NaN. The next +** 4 bits hold the internal tag. The lowest 47 bits either hold a pointer, +** a zero-extended 32 bit integer or all bits set to 1 for primitive types. +** +** ------MSW------.------LSW------ +** primitive types |1..1|itype|1..................1| +** GC objects |1..1|itype|-------GCRef--------| +** lightuserdata |1..1|itype|seg|------ofs-------| +** int (LJ_DUALNUM) |1..1|itype|0..0|-----int-------| +** number ------------double------------- +** ** ORDER LJ_T ** Primitive types nil/false/true must be first, lightuserdata next. ** GC objects are at the end, table/userdata must be lowest. @@ -208,7 +274,7 @@ typedef const TValue cTValue; #define LJ_TNUMX (~13u) /* Integers have itype == LJ_TISNUM doubles have itype < LJ_TISNUM */ -#if LJ_64 +#if LJ_64 && !LJ_GC64 #define LJ_TISNUM 0xfffeffffu #else #define LJ_TISNUM LJ_TNUMX @@ -218,14 +284,31 @@ typedef const TValue cTValue; #define LJ_TISGCV (LJ_TSTR+1) #define LJ_TISTABUD LJ_TTAB +/* Type marker for slot holding a traversal index. Must be lightuserdata. */ +#define LJ_KEYINDEX 0xfffe7fffu + +#if LJ_GC64 +#define LJ_GCVMASK (((uint64_t)1 << 47) - 1) +#endif + +#if LJ_64 +/* To stay within 47 bits, lightuserdata is segmented. */ +#define LJ_LIGHTUD_BITS_SEG 8 +#define LJ_LIGHTUD_BITS_LO (47 - LJ_LIGHTUD_BITS_SEG) +#endif + /* -- String object ------------------------------------------------------- */ +typedef uint32_t StrHash; /* String hash value. */ +typedef uint32_t StrID; /* String ID. */ + /* String object header. String payload follows. */ typedef struct GCstr { GCHeader; uint8_t reserved; /* Used by lexer for fast lookup of reserved words. */ - uint8_t unused; - MSize hash; /* Hash of string. */ + uint8_t hashalg; /* Hash algorithm. */ + StrID sid; /* Interned string ID. */ + StrHash hash; /* Hash of string. */ MSize len; /* Size of string. */ } GCstr; @@ -233,7 +316,6 @@ typedef struct GCstr { #define strdata(s) ((const char *)((s)+1)) #define strdatawr(s) ((char *)((s)+1)) #define strVdata(o) strdata(strV(o)) -#define sizestring(s) (sizeof(struct GCstr)+(s)->len+1) /* -- Userdata object ----------------------------------------------------- */ @@ -253,6 +335,7 @@ enum { UDTYPE_USERDATA, /* Regular userdata. */ UDTYPE_IO_FILE, /* I/O library FILE. */ UDTYPE_FFI_CLIB, /* FFI C library namespace. */ + UDTYPE_BUFFER, /* String buffer. */ UDTYPE__MAX }; @@ -291,6 +374,9 @@ typedef struct GCproto { uint8_t numparams; /* Number of parameters. */ uint8_t framesize; /* Fixed frame size. */ MSize sizebc; /* Number of bytecode instructions. */ +#if LJ_GC64 + uint32_t unused_gc64; +#endif GCRef gclist; MRef k; /* Split constant array (points to the middle). */ MRef uv; /* Upvalue list. local slot|0x8000 or parent uv idx. */ @@ -402,7 +488,9 @@ typedef struct Node { TValue val; /* Value object. Must be first field. */ TValue key; /* Key object. */ MRef next; /* Hash chain. */ +#if !LJ_GC64 MRef freetop; /* Top of free elements (stored in t->node[0]). */ +#endif } Node; LJ_STATIC_ASSERT(offsetof(Node, val) == 0); @@ -417,12 +505,22 @@ typedef struct GCtab { MRef node; /* Hash part. */ uint32_t asize; /* Size of array part (keys [0, asize-1]). */ uint32_t hmask; /* Hash part mask (size of hash part - 1). */ +#if LJ_GC64 + MRef freetop; /* Top of free elements. */ +#endif } GCtab; #define sizetabcolo(n) ((n)*sizeof(TValue) + sizeof(GCtab)) #define tabref(r) ((GCtab *)gcref((r))) #define noderef(r) (mref((r), Node)) #define nextnode(n) (mref((n)->next, Node)) +#if LJ_GC64 +#define getfreetop(t, n) (noderef((t)->freetop)) +#define setfreetop(t, n, v) (setmref((t)->freetop, (v))) +#else +#define getfreetop(t, n) (noderef((n)->freetop)) +#define setfreetop(t, n, v) (setmref((n)->freetop, (v))) +#endif /* -- State objects ------------------------------------------------------- */ @@ -488,13 +586,18 @@ typedef enum { #define basemt_obj(g, o) ((g)->gcroot[GCROOT_BASEMT+itypemap(o)]) #define mmname_str(g, mm) (strref((g)->gcroot[GCROOT_MMNAME+(mm)])) +/* Garbage collector state. */ typedef struct GCState { - MSize total; /* Memory currently allocated. */ - MSize threshold; /* Memory threshold. */ + GCSize total; /* Memory currently allocated. */ + GCSize threshold; /* Memory threshold. */ uint8_t currentwhite; /* Current white color. */ uint8_t state; /* GC state. */ uint8_t nocdatafin; /* No cdata finalizer called. */ - uint8_t unused2; +#if LJ_64 + uint8_t lightudnum; /* Number of lightuserdata segments - 1. */ +#else + uint8_t unused1; +#endif MSize sweepstr; /* Sweep position in string table. */ GCRef root; /* List of all collectable objects. */ MRef sweep; /* Sweep position in root list. */ @@ -502,42 +605,57 @@ typedef struct GCState { GCRef grayagain; /* List of objects for atomic traversal. */ GCRef weak; /* List of weak tables (to be cleared). */ GCRef mmudata; /* List of userdata (to be finalized). */ + GCSize debt; /* Debt (how much GC is behind schedule). */ + GCSize estimate; /* Estimate of memory actually in use. */ MSize stepmul; /* Incremental GC step granularity. */ - MSize debt; /* Debt (how much GC is behind schedule). */ - MSize estimate; /* Estimate of memory actually in use. */ MSize pause; /* Pause between successive GC cycles. */ +#if LJ_64 + MRef lightudseg; /* Upper bits of lightuserdata segments. */ +#endif } GCState; +/* String interning state. */ +typedef struct StrInternState { + GCRef *tab; /* String hash table anchors. */ + MSize mask; /* String hash mask (size of hash table - 1). */ + MSize num; /* Number of strings in hash table. */ + StrID id; /* Next string ID. */ + uint8_t idreseed; /* String ID reseed counter. */ + uint8_t second; /* String interning table uses secondary hashing. */ + uint8_t unused1; + uint8_t unused2; + LJ_ALIGN(8) uint64_t seed; /* Random string seed. */ +} StrInternState; + /* Global state, shared by all threads of a Lua universe. */ typedef struct global_State { - GCRef *strhash; /* String hash table (hash chain anchors). */ - MSize strmask; /* String hash mask (size of hash table - 1). */ - MSize strnum; /* Number of strings in hash table. */ lua_Alloc allocf; /* Memory allocator. */ void *allocd; /* Memory allocator data. */ GCState gc; /* Garbage collector. */ - SBuf tmpbuf; /* Temporary buffer for string concatenation. */ - Node nilnode; /* Fallback 1-element hash part (nil key and value). */ GCstr strempty; /* Empty string. */ uint8_t stremptyz; /* Zero terminator of empty string. */ uint8_t hookmask; /* Hook mask. */ uint8_t dispatchmode; /* Dispatch mode. */ uint8_t vmevmask; /* VM event mask. */ + StrInternState str; /* String interning. */ + volatile int32_t vmstate; /* VM state or current JIT code trace number. */ GCRef mainthref; /* Link to main thread. */ - TValue registrytv; /* Anchor for registry. */ + SBuf tmpbuf; /* Temporary string buffer. */ TValue tmptv, tmptv2; /* Temporary TValues. */ + Node nilnode; /* Fallback 1-element hash part (nil key and value). */ + TValue registrytv; /* Anchor for registry. */ GCupval uvhead; /* Head of double-linked list of all open upvalues. */ int32_t hookcount; /* Instruction hook countdown. */ int32_t hookcstart; /* Start count for instruction hook counter. */ lua_Hook hookf; /* Hook function. */ lua_CFunction wrapf; /* Wrapper for C function calls. */ lua_CFunction panic; /* Called as a last resort for errors. */ - volatile int32_t vmstate; /* VM state or current JIT code trace number. */ BCIns bc_cfunc_int; /* Bytecode for internal C function calls. */ BCIns bc_cfunc_ext; /* Bytecode for external C function calls. */ - GCRef jit_L; /* Current JIT code lua_State or NULL. */ - MRef jit_base; /* Current JIT code L->base. */ + GCRef cur_L; /* Currently executing lua_State. */ + MRef jit_base; /* Current JIT code L->base or NULL. */ MRef ctype_state; /* Pointer to C type state. */ + PRNGState prng; /* Global PRNG state. */ GCRef gcroot[GCROOT_MAX]; /* GC roots. */ } global_State; @@ -553,9 +671,11 @@ typedef struct global_State { #define HOOK_ACTIVE_SHIFT 4 #define HOOK_VMEVENT 0x20 #define HOOK_GC 0x40 +#define HOOK_PROFILE 0x80 #define hook_active(g) ((g)->hookmask & HOOK_ACTIVE) #define hook_enter(g) ((g)->hookmask |= HOOK_ACTIVE) -#define hook_entergc(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_GC)) +#define hook_entergc(g) \ + ((g)->hookmask = ((g)->hookmask | (HOOK_ACTIVE|HOOK_GC)) & ~HOOK_PROFILE) #define hook_vmevent(g) ((g)->hookmask |= (HOOK_ACTIVE|HOOK_VMEVENT)) #define hook_leave(g) ((g)->hookmask &= ~HOOK_ACTIVE) #define hook_save(g) ((g)->hookmask & ~HOOK_EVENTMASK) @@ -583,12 +703,23 @@ struct lua_State { #define registry(L) (&G(L)->registrytv) /* Macros to access the currently executing (Lua) function. */ +#if LJ_GC64 +#define curr_func(L) (&gcval(L->base-2)->fn) +#elif LJ_FR2 +#define curr_func(L) (&gcref((L->base-2)->gcr)->fn) +#else #define curr_func(L) (&gcref((L->base-1)->fr.func)->fn) +#endif #define curr_funcisL(L) (isluafunc(curr_func(L))) #define curr_proto(L) (funcproto(curr_func(L))) #define curr_topL(L) (L->base + curr_proto(L)->framesize) #define curr_top(L) (curr_funcisL(L) ? curr_topL(L) : L->top) +#if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) +LJ_FUNC_NORET void lj_assert_fail(global_State *g, const char *file, int line, + const char *func, const char *fmt, ...); +#endif + /* -- GC object definition and conversions -------------------------------- */ /* GC header for generic access to common fields of GC objects. */ @@ -642,17 +773,18 @@ typedef union GCobj { /* -- TValue getters/setters ---------------------------------------------- */ -#ifdef LUA_USE_ASSERT -#include "lj_gc.h" -#endif - /* Macros to test types. */ +#if LJ_GC64 +#define itype(o) ((uint32_t)((o)->it64 >> 47)) +#define tvisnil(o) ((o)->it64 == -1) +#else #define itype(o) ((o)->it) #define tvisnil(o) (itype(o) == LJ_TNIL) +#endif #define tvisfalse(o) (itype(o) == LJ_TFALSE) #define tvistrue(o) (itype(o) == LJ_TTRUE) #define tvisbool(o) (tvisfalse(o) || tvistrue(o)) -#if LJ_64 +#if LJ_64 && !LJ_GC64 #define tvislightud(o) (((int32_t)itype(o) >> 15) == -2) #else #define tvislightud(o) (itype(o) == LJ_TLIGHTUD) @@ -686,7 +818,7 @@ typedef union GCobj { #define rawnumequal(o1, o2) ((o1)->u64 == (o2)->u64) /* Macros to convert type ids. */ -#if LJ_64 +#if LJ_64 && !LJ_GC64 #define itypemap(o) \ (tvisnumber(o) ? ~LJ_TNUMX : tvislightud(o) ? ~LJ_TLIGHTUD : ~itype(o)) #else @@ -694,13 +826,31 @@ typedef union GCobj { #endif /* Macros to get tagged values. */ +#if LJ_GC64 +#define gcval(o) ((GCobj *)(gcrefu((o)->gcr) & LJ_GCVMASK)) +#else #define gcval(o) (gcref((o)->gcr)) -#define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - (o)->it)) +#endif +#define boolV(o) check_exp(tvisbool(o), (LJ_TFALSE - itype(o))) #if LJ_64 -#define lightudV(o) \ - check_exp(tvislightud(o), (void *)((o)->u64 & U64x(00007fff,ffffffff))) +#define lightudseg(u) \ + (((u) >> LJ_LIGHTUD_BITS_LO) & ((1 << LJ_LIGHTUD_BITS_SEG)-1)) +#define lightudlo(u) \ + ((u) & (((uint64_t)1 << LJ_LIGHTUD_BITS_LO) - 1)) +#define lightudup(p) \ + ((uint32_t)(((p) >> LJ_LIGHTUD_BITS_LO) << (LJ_LIGHTUD_BITS_LO-32))) +static LJ_AINLINE void *lightudV(global_State *g, cTValue *o) +{ + uint64_t u = o->u64; + uint64_t seg = lightudseg(u); + uint32_t *segmap = mref(g->gc.lightudseg, uint32_t); + lj_assertG(tvislightud(o), "lightuserdata expected"); + if (seg == (1 << LJ_LIGHTUD_BITS_SEG)-1) return NULL; + lj_assertG(seg <= g->gc.lightudnum, "bad lightuserdata segment %d", seg); + return (void *)(((uint64_t)segmap[seg] << 32) | lightudlo(u)); +} #else -#define lightudV(o) check_exp(tvislightud(o), gcrefp((o)->gcr, void)) +#define lightudV(g, o) check_exp(tvislightud(o), gcrefp((o)->gcr, void)) #endif #define gcV(o) check_exp(tvisgcv(o), gcval(o)) #define strV(o) check_exp(tvisstr(o), &gcval(o)->str) @@ -714,40 +864,70 @@ typedef union GCobj { #define intV(o) check_exp(tvisint(o), (int32_t)(o)->i) /* Macros to set tagged values. */ +#if LJ_GC64 +#define setitype(o, i) ((o)->it = ((i) << 15)) +#define setnilV(o) ((o)->it64 = -1) +#define setpriV(o, x) ((o)->it64 = (int64_t)~((uint64_t)~(x)<<47)) +#define setboolV(o, x) ((o)->it64 = (int64_t)~((uint64_t)((x)+1)<<47)) +#else #define setitype(o, i) ((o)->it = (i)) #define setnilV(o) ((o)->it = LJ_TNIL) #define setboolV(o, x) ((o)->it = LJ_TFALSE-(uint32_t)(x)) +#define setpriV(o, i) (setitype((o), (i))) +#endif -static LJ_AINLINE void setlightudV(TValue *o, void *p) +static LJ_AINLINE void setrawlightudV(TValue *o, void *p) { -#if LJ_64 +#if LJ_GC64 + o->u64 = (uint64_t)p | (((uint64_t)LJ_TLIGHTUD) << 47); +#elif LJ_64 o->u64 = (uint64_t)p | (((uint64_t)0xffff) << 48); #else setgcrefp(o->gcr, p); setitype(o, LJ_TLIGHTUD); #endif } -#if LJ_64 -#define checklightudptr(L, p) \ - (((uint64_t)(p) >> 47) ? (lj_err_msg(L, LJ_ERR_BADLU), NULL) : (p)) +#if LJ_FR2 || LJ_32 +#define contptr(f) ((void *)(f)) +#define setcont(o, f) ((o)->u64 = (uint64_t)(uintptr_t)contptr(f)) +#else +#define contptr(f) \ + ((void *)(uintptr_t)(uint32_t)((intptr_t)(f) - (intptr_t)lj_vm_asm_begin)) #define setcont(o, f) \ ((o)->u64 = (uint64_t)(void *)(f) - (uint64_t)lj_vm_asm_begin) -#else -#define checklightudptr(L, p) (p) -#define setcont(o, f) setlightudV((o), (void *)(f)) #endif -#define tvchecklive(L, o) \ - UNUSED(L), lua_assert(!tvisgcv(o) || \ - ((~itype(o) == gcval(o)->gch.gct) && !isdead(G(L), gcval(o)))) +static LJ_AINLINE void checklivetv(lua_State *L, TValue *o, const char *msg) +{ + UNUSED(L); UNUSED(o); UNUSED(msg); +#if LUA_USE_ASSERT + if (tvisgcv(o)) { + lj_assertL(~itype(o) == gcval(o)->gch.gct, + "mismatch of TValue type %d vs GC type %d", + ~itype(o), gcval(o)->gch.gct); + /* Copy of isdead check from lj_gc.h to avoid circular include. */ + lj_assertL(!(gcval(o)->gch.marked & (G(L)->gc.currentwhite ^ 3) & 3), msg); + } +#endif +} + +static LJ_AINLINE void setgcVraw(TValue *o, GCobj *v, uint32_t itype) +{ +#if LJ_GC64 + setgcreft(o->gcr, v, itype); +#else + setgcref(o->gcr, v); setitype(o, itype); +#endif +} -static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t itype) +static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t it) { - setgcref(o->gcr, v); setitype(o, itype); tvchecklive(L, o); + setgcVraw(o, v, it); + checklivetv(L, o, "store to dead GC object"); } #define define_setV(name, type, tag) \ -static LJ_AINLINE void name(lua_State *L, TValue *o, type *v) \ +static LJ_AINLINE void name(lua_State *L, TValue *o, const type *v) \ { \ setgcV(L, o, obj2gco(v), tag); \ } @@ -790,13 +970,17 @@ static LJ_AINLINE void setint64V(TValue *o, int64_t i) /* Copy tagged values. */ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2) { - *o1 = *o2; tvchecklive(L, o1); + *o1 = *o2; + checklivetv(L, o1, "copy of dead GC object"); } /* -- Number to integer conversion ---------------------------------------- */ #if LJ_SOFTFP LJ_ASMF int32_t lj_vm_tobit(double x); +#if LJ_TARGET_MIPS64 +LJ_ASMF int32_t lj_vm_tointg(double x); +#endif #endif static LJ_AINLINE int32_t lj_num2bit(lua_Number n) @@ -810,11 +994,7 @@ static LJ_AINLINE int32_t lj_num2bit(lua_Number n) #endif } -#if LJ_TARGET_X86 && !defined(__SSE2__) -#define lj_num2int(n) lj_num2bit((n)) -#else #define lj_num2int(n) ((int32_t)(n)) -#endif /* ** This must match the JIT backend behavior. In particular for archs @@ -859,6 +1039,7 @@ LJ_DATA const char *const lj_obj_itypename[~LJ_TNUMX+1]; #define lj_typename(o) (lj_obj_itypename[itypemap(o)]) /* Compare two objects without calling metamethods. */ -LJ_FUNC int lj_obj_equal(cTValue *o1, cTValue *o2); +LJ_FUNC int LJ_FASTCALL lj_obj_equal(cTValue *o1, cTValue *o2); +LJ_FUNC const void * LJ_FASTCALL lj_obj_ptr(global_State *g, cTValue *o); #endif diff --git a/src/lj_opt_fold.c b/src/lj_opt_fold.c index e9a6532a..7ef09a1f 100644 --- a/src/lj_opt_fold.c +++ b/src/lj_opt_fold.c @@ -14,18 +14,21 @@ #if LJ_HASJIT +#include "lj_buf.h" #include "lj_str.h" #include "lj_tab.h" #include "lj_ir.h" #include "lj_jit.h" +#include "lj_ircall.h" #include "lj_iropt.h" #include "lj_trace.h" #if LJ_HASFFI #include "lj_ctype.h" -#endif #include "lj_carith.h" +#endif #include "lj_vm.h" #include "lj_strscan.h" +#include "lj_strfmt.h" /* Here's a short description how the FOLD engine processes instructions: ** @@ -133,8 +136,8 @@ /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) #define fins (&J->fold.ins) -#define fleft (&J->fold.left) -#define fright (&J->fold.right) +#define fleft (J->fold.left) +#define fright (J->fold.right) #define knumleft (ir_knum(fleft)->n) #define knumright (ir_knum(fright)->n) @@ -155,13 +158,14 @@ typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J); /* Barrier to prevent folding across a GC step. ** GC steps can only happen at the head of a trace and at LOOP. -** And the GC is only driven forward if there is at least one allocation. +** And the GC is only driven forward if there's at least one allocation. */ #define gcstep_barrier(J, ref) \ ((ref) < J->chain[IR_LOOP] && \ (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ - J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR])) + J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \ + J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA])) /* -- Constant folding for FP numbers ------------------------------------- */ @@ -169,9 +173,6 @@ LJFOLD(ADD KNUM KNUM) LJFOLD(SUB KNUM KNUM) LJFOLD(MUL KNUM KNUM) LJFOLD(DIV KNUM KNUM) -LJFOLD(NEG KNUM KNUM) -LJFOLD(ABS KNUM KNUM) -LJFOLD(ATAN2 KNUM KNUM) LJFOLD(LDEXP KNUM KNUM) LJFOLD(MIN KNUM KNUM) LJFOLD(MAX KNUM KNUM) @@ -183,6 +184,15 @@ LJFOLDF(kfold_numarith) return lj_ir_knum(J, y); } +LJFOLD(NEG KNUM FLOAD) +LJFOLD(ABS KNUM FLOAD) +LJFOLDF(kfold_numabsneg) +{ + lua_Number a = knumleft; + lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD); + return lj_ir_knum(J, y); +} + LJFOLD(LDEXP KNUM KINT) LJFOLDF(kfold_ldexp) { @@ -202,13 +212,34 @@ LJFOLDF(kfold_fpmath) return lj_ir_knum(J, y); } -LJFOLD(POW KNUM KINT) +LJFOLD(CALLN KNUM any) +LJFOLDF(kfold_fpcall1) +{ + const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; + if (CCI_TYPE(ci) == IRT_NUM) { + double y = ((double (*)(double))ci->func)(knumleft); + return lj_ir_knum(J, y); + } + return NEXTFOLD; +} + +LJFOLD(CALLN CARG IRCALL_atan2) +LJFOLDF(kfold_fpcall2) +{ + if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { + const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; + double a = ir_knum(IR(fleft->op1))->n; + double b = ir_knum(IR(fleft->op2))->n; + double y = ((double (*)(double, double))ci->func)(a, b); + return lj_ir_knum(J, y); + } + return NEXTFOLD; +} + +LJFOLD(POW KNUM KNUM) LJFOLDF(kfold_numpow) { - lua_Number a = knumleft; - lua_Number b = (lua_Number)fright->i; - lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD); - return lj_ir_knum(J, y); + return lj_ir_knum(J, lj_vm_foldarith(knumleft, knumright, IR_POW - IR_ADD)); } /* Must not use kfold_kref for numbers (could be NaN). */ @@ -247,7 +278,7 @@ static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op) case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break; case IR_MIN: k1 = k1 < k2 ? k1 : k2; break; case IR_MAX: k1 = k1 > k2 ? k1 : k2; break; - default: lua_assert(0); break; + default: lj_assertX(0, "bad IR op %d", op); break; } return k1; } @@ -319,7 +350,7 @@ LJFOLDF(kfold_intcomp) case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b); case IR_ABC: case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b); - default: lua_assert(0); return FAILFOLD; + default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD; } } @@ -333,21 +364,29 @@ LJFOLDF(kfold_intcomp0) /* -- Constant folding for 64 bit integers -------------------------------- */ -static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) +static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2, + IROp op) { + UNUSED(J); +#if LJ_HASFFI switch (op) { -#if LJ_64 || LJ_HASFFI case IR_ADD: k1 += k2; break; case IR_SUB: k1 -= k2; break; -#endif -#if LJ_HASFFI case IR_MUL: k1 *= k2; break; case IR_BAND: k1 &= k2; break; case IR_BOR: k1 |= k2; break; case IR_BXOR: k1 ^= k2; break; -#endif - default: UNUSED(k2); lua_assert(0); break; + case IR_BSHL: k1 <<= (k2 & 63); break; + case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 63)); break; + case IR_BSAR: k1 >>= (k2 & 63); break; + case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 63)); break; + case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 63)); break; + default: lj_assertJ(0, "bad IR op %d", op); break; } +#else + UNUSED(k2); UNUSED(op); + lj_assertJ(0, "FFI IR op without FFI"); +#endif return k1; } @@ -359,7 +398,7 @@ LJFOLD(BOR KINT64 KINT64) LJFOLD(BXOR KINT64 KINT64) LJFOLDF(kfold_int64arith) { - return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64, + return INT64FOLD(kfold_int64arith(J, ir_k64(fleft)->u64, ir_k64(fright)->u64, (IROp)fins->o)); } @@ -381,7 +420,7 @@ LJFOLDF(kfold_int64arith2) } return INT64FOLD(k1); #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -392,22 +431,12 @@ LJFOLD(BROL KINT64 KINT) LJFOLD(BROR KINT64 KINT) LJFOLDF(kfold_int64shift) { -#if LJ_HASFFI || LJ_64 +#if LJ_HASFFI uint64_t k = ir_k64(fleft)->u64; int32_t sh = (fright->i & 63); - switch ((IROp)fins->o) { - case IR_BSHL: k <<= sh; break; -#if LJ_HASFFI - case IR_BSHR: k >>= sh; break; - case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break; - case IR_BROL: k = lj_rol(k, sh); break; - case IR_BROR: k = lj_ror(k, sh); break; -#endif - default: lua_assert(0); break; - } - return INT64FOLD(k); + return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL)); #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -417,7 +446,7 @@ LJFOLDF(kfold_bnot64) #if LJ_HASFFI return INT64FOLD(~ir_k64(fleft)->u64); #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -427,7 +456,7 @@ LJFOLDF(kfold_bswap64) #if LJ_HASFFI return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64)); #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -452,10 +481,10 @@ LJFOLDF(kfold_int64comp) case IR_UGE: return CONDFOLD(a >= b); case IR_ULE: return CONDFOLD(a <= b); case IR_UGT: return CONDFOLD(a > b); - default: lua_assert(0); return FAILFOLD; + default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD; } #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -467,7 +496,7 @@ LJFOLDF(kfold_int64comp0) return DROPFOLD; return NEXTFOLD; #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } @@ -481,6 +510,7 @@ LJFOLDF(kfold_snew_kptr) } LJFOLD(SNEW any KINT) +LJFOLD(XSNEW any KINT) LJFOLDF(kfold_snew_empty) { if (fright->i == 0) @@ -492,7 +522,7 @@ LJFOLD(STRREF KGC KINT) LJFOLDF(kfold_strref) { GCstr *str = ir_kstr(fleft); - lua_assert((MSize)fright->i <= str->len); + lj_assertJ((MSize)fright->i <= str->len, "bad string ref"); return lj_ir_kkptr(J, (char *)strdata(str) + fright->i); } @@ -510,7 +540,7 @@ LJFOLDF(kfold_strref_snew) PHIBARRIER(ir); fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ fins->op1 = str; - fins->ot = IRT(IR_STRREF, IRT_P32); + fins->ot = IRT(IR_STRREF, IRT_PGC); return RETRYFOLD; } } @@ -528,6 +558,211 @@ LJFOLDF(kfold_strcmp) return NEXTFOLD; } +/* -- Constant folding and forwarding for buffers ------------------------- */ + +/* +** Buffer ops perform stores, but their effect is limited to the buffer +** itself. Also, buffer ops are chained: a use of an op implies a use of +** all other ops up the chain. Conversely, if an op is unused, all ops +** up the chain can go unsed. This largely eliminates the need to treat +** them as stores. +** +** Alas, treating them as normal (IRM_N) ops doesn't work, because they +** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP +** or if FOLD is disabled. +** +** The compromise is to declare them as loads, emit them like stores and +** CSE whole chains manually when the BUFSTR is to be emitted. Any chain +** fragments left over from CSE are eliminated by DCE. +** +** The string buffer methods emit a USE instead of a BUFSTR to keep the +** chain alive. +*/ + +LJFOLD(BUFHDR any any) +LJFOLDF(bufhdr_merge) +{ + return fins->op2 == IRBUFHDR_WRITE ? CSEFOLD : EMITFOLD; +} + +LJFOLD(BUFPUT any BUFSTR) +LJFOLDF(bufput_bufstr) +{ + if ((J->flags & JIT_F_OPT_FWD)) { + IRRef hdr = fright->op2; + /* New buffer, no other buffer op inbetween and same buffer? */ + if (fleft->o == IR_BUFHDR && fleft->op2 == IRBUFHDR_RESET && + fleft->prev == hdr && + fleft->op1 == IR(hdr)->op1 && + !(irt_isphi(fright->t) && IR(hdr)->prev) && + (!LJ_HASBUFFER || J->chain[IR_CALLA] < hdr)) { + IRRef ref = fins->op1; + IR(ref)->op2 = IRBUFHDR_APPEND; /* Modify BUFHDR. */ + IR(ref)->op1 = fright->op1; + return ref; + } + /* Replay puts to global temporary buffer. */ + if (IR(hdr)->op2 == IRBUFHDR_RESET && !irt_isphi(fright->t)) { + IRIns *ir = IR(fright->op1); + /* For now only handle single string.reverse .lower .upper .rep. */ + if (ir->o == IR_CALLL && + ir->op2 >= IRCALL_lj_buf_putstr_reverse && + ir->op2 <= IRCALL_lj_buf_putstr_rep) { + IRIns *carg1 = IR(ir->op1); + if (ir->op2 == IRCALL_lj_buf_putstr_rep) { + IRIns *carg2 = IR(carg1->op1); + if (carg2->op1 == hdr) { + return lj_ir_call(J, ir->op2, fins->op1, carg2->op2, carg1->op2); + } + } else if (carg1->op1 == hdr) { + return lj_ir_call(J, ir->op2, fins->op1, carg1->op2); + } + } + } + } + return EMITFOLD; /* Always emit, CSE later. */ +} + +LJFOLD(BUFPUT any any) +LJFOLDF(bufput_kgc) +{ + if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) { + GCstr *s2 = ir_kstr(fright); + if (s2->len == 0) { /* Empty string? */ + return LEFTFOLD; + } else { + if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) && + !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */ + GCstr *s1 = ir_kstr(IR(fleft->op2)); + IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2)); + /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */ + IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */ + return fins->op1; + } + } + } + return EMITFOLD; /* Always emit, CSE later. */ +} + +LJFOLD(BUFSTR any any) +LJFOLDF(bufstr_kfold_cse) +{ + lj_assertJ(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT || + fleft->o == IR_CALLL, + "bad buffer constructor IR op %d", fleft->o); + if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { + if (fleft->o == IR_BUFHDR) { /* No put operations? */ + if (fleft->op2 == IRBUFHDR_RESET) /* Empty buffer? */ + return lj_ir_kstr(J, &J2G(J)->strempty); + fins->op1 = fleft->op1; + fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */ + return CSEFOLD; + } else if (fleft->o == IR_BUFPUT) { + IRIns *irb = IR(fleft->op1); + if (irb->o == IR_BUFHDR && irb->op2 == IRBUFHDR_RESET) + return fleft->op2; /* Shortcut for a single put operation. */ + } + } + /* Try to CSE the whole chain. */ + if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { + IRRef ref = J->chain[IR_BUFSTR]; + while (ref) { + IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1); + while (ira->o == irb->o && ira->op2 == irb->op2) { + lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT || + ira->o == IR_CALLL || ira->o == IR_CARG, + "bad buffer constructor IR op %d", ira->o); + if (ira->o == IR_BUFHDR && ira->op2 == IRBUFHDR_RESET) + return ref; /* CSE succeeded. */ + if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab) + break; + ira = IR(ira->op1); + irb = IR(irb->op1); + } + ref = irs->prev; + } + } + return EMITFOLD; /* No CSE possible. */ +} + +LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse) +LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper) +LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower) +LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted) +LJFOLDF(bufput_kfold_op) +{ + if (irref_isk(fleft->op2)) { + const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; + SBuf *sb = lj_buf_tmp_(J->L); + sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb, + ir_kstr(IR(fleft->op2))); + fins->o = IR_BUFPUT; + fins->op1 = fleft->op1; + fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb)); + return RETRYFOLD; + } + return EMITFOLD; /* Always emit, CSE later. */ +} + +LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep) +LJFOLDF(bufput_kfold_rep) +{ + if (irref_isk(fleft->op2)) { + IRIns *irc = IR(fleft->op1); + if (irref_isk(irc->op2)) { + SBuf *sb = lj_buf_tmp_(J->L); + sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i); + fins->o = IR_BUFPUT; + fins->op1 = irc->op1; + fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb)); + return RETRYFOLD; + } + } + return EMITFOLD; /* Always emit, CSE later. */ +} + +LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint) +LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int) +LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint) +LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum) +LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr) +LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar) +LJFOLDF(bufput_kfold_fmt) +{ + IRIns *irc = IR(fleft->op1); + lj_assertJ(irref_isk(irc->op2), "SFormat must be const"); + if (irref_isk(fleft->op2)) { + SFormat sf = (SFormat)IR(irc->op2)->i; + IRIns *ira = IR(fleft->op2); + SBuf *sb = lj_buf_tmp_(J->L); + switch (fins->op2) { + case IRCALL_lj_strfmt_putfxint: + sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64); + break; + case IRCALL_lj_strfmt_putfstr: + sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira)); + break; + case IRCALL_lj_strfmt_putfchar: + sb = lj_strfmt_putfchar(sb, sf, ira->i); + break; + case IRCALL_lj_strfmt_putfnum_int: + case IRCALL_lj_strfmt_putfnum_uint: + case IRCALL_lj_strfmt_putfnum: + default: { + const CCallInfo *ci = &lj_ir_callinfo[fins->op2]; + sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf, + ir_knum(ira)->n); + break; + } + } + fins->o = IR_BUFPUT; + fins->op1 = irc->op1; + fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb)); + return RETRYFOLD; + } + return EMITFOLD; /* Always emit, CSE later. */ +} + /* -- Constant folding of pointer arithmetic ------------------------------ */ LJFOLD(ADD KGC KINT) @@ -648,21 +883,17 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM) LJFOLDF(kfold_conv_knum_int_num) { lua_Number n = knumleft; - if (!(fins->op2 & IRCONV_TRUNC)) { - int32_t k = lj_num2int(n); - if (irt_isguard(fins->t) && n != (lua_Number)k) { - /* We're about to create a guard which always fails, like CONV +1.5. - ** Some pathological loops cause this during LICM, e.g.: - ** local x,k,t = 0,1.5,{1,[1.5]=2} - ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end - ** assert(x == 300) - */ - return FAILFOLD; - } - return INTFOLD(k); - } else { - return INTFOLD((int32_t)n); + int32_t k = lj_num2int(n); + if (irt_isguard(fins->t) && n != (lua_Number)k) { + /* We're about to create a guard which always fails, like CONV +1.5. + ** Some pathological loops cause this during LICM, e.g.: + ** local x,k,t = 0,1.5,{1,[1.5]=2} + ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end + ** assert(x == 300) + */ + return FAILFOLD; } + return INTFOLD(k); } LJFOLD(CONV KNUM IRCONV_U32_NUM) @@ -690,16 +921,18 @@ LJFOLDF(kfold_conv_knum_u64_num) return INT64FOLD(lj_num2u64(knumleft)); } -LJFOLD(TOSTR KNUM) +LJFOLD(TOSTR KNUM any) LJFOLDF(kfold_tostr_knum) { - return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft)); + return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft))); } -LJFOLD(TOSTR KINT) +LJFOLD(TOSTR KINT any) LJFOLDF(kfold_tostr_kint) { - return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i)); + return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ? + lj_strfmt_int(J->L, fleft->i) : + lj_strfmt_char(J->L, fleft->i)); } LJFOLD(STRTO KGC) @@ -747,13 +980,13 @@ LJFOLDF(shortcut_round) return NEXTFOLD; } -LJFOLD(ABS ABS KNUM) +LJFOLD(ABS ABS FLOAD) LJFOLDF(shortcut_left) { return LEFTFOLD; /* f(g(x)) ==> g(x) */ } -LJFOLD(ABS NEG KNUM) +LJFOLD(ABS NEG FLOAD) LJFOLDF(shortcut_dropleft) { PHIBARRIER(fleft); @@ -833,8 +1066,10 @@ LJFOLDF(simplify_nummuldiv_k) if (n == 1.0) { /* x o 1 ==> x */ return LEFTFOLD; } else if (n == -1.0) { /* x o -1 ==> -x */ + IRRef op1 = fins->op1; + fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */ + fins->op1 = op1; fins->o = IR_NEG; - fins->op2 = (IRRef1)lj_ir_knum_neg(J); return RETRYFOLD; } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ fins->o = IR_ADD; @@ -874,52 +1109,17 @@ LJFOLDF(simplify_nummuldiv_negneg) return RETRYFOLD; } -LJFOLD(POW any KINT) -LJFOLDF(simplify_numpow_xk) +LJFOLD(POW any KNUM) +LJFOLDF(simplify_numpow_k) { - int32_t k = fright->i; - TRef ref = fins->op1; - if (k == 0) /* x ^ 0 ==> 1 */ + if (knumright == 0.0) /* x ^ 0 ==> 1 */ return lj_ir_knum_one(J); /* Result must be a number, not an int. */ - if (k == 1) /* x ^ 1 ==> x */ + else if (knumright == 1.0) /* x ^ 1 ==> x */ return LEFTFOLD; - if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */ + else if (knumright == 2.0) /* x ^ 2 ==> x * x */ + return emitir(IRTN(IR_MUL), fins->op1, fins->op1); + else return NEXTFOLD; - if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */ - ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref); - k = -k; - } - /* Unroll x^k for 1 <= k <= 65536. */ - for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */ - ref = emitir(IRTN(IR_MUL), ref, ref); - if ((k >>= 1) != 0) { /* Handle trailing bits. */ - TRef tmp = emitir(IRTN(IR_MUL), ref, ref); - for (; k != 1; k >>= 1) { - if (k & 1) - ref = emitir(IRTN(IR_MUL), ref, tmp); - tmp = emitir(IRTN(IR_MUL), tmp, tmp); - } - ref = emitir(IRTN(IR_MUL), ref, tmp); - } - return ref; -} - -LJFOLD(POW KNUM any) -LJFOLDF(simplify_numpow_kx) -{ - lua_Number n = knumleft; - if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */ - fins->o = IR_CONV; -#if LJ_TARGET_X86ORX64 - fins->op1 = fins->op2; - fins->op2 = IRCONV_NUM_INT; - fins->op2 = (IRRef1)lj_opt_fold(J); -#endif - fins->op1 = (IRRef1)lj_ir_knum_one(J); - fins->o = IR_LDEXP; - return RETRYFOLD; - } - return NEXTFOLD; } /* -- Simplify conversions ------------------------------------------------ */ @@ -1004,10 +1204,10 @@ LJFOLDF(simplify_tobit_conv) { /* Fold even across PHI to avoid expensive num->int conversions in loop. */ if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) { - lua_assert(irt_isnum(fleft->t)); + lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg"); return fleft->op1; } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { - lua_assert(irt_isnum(fleft->t)); + lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg"); fins->o = IR_CONV; fins->op1 = fleft->op1; fins->op2 = (IRT_INT<<5)|IRT_U32; @@ -1047,7 +1247,7 @@ LJFOLDF(simplify_conv_sext) /* Use scalar evolution analysis results to strength-reduce sign-extension. */ if (ref == J->scev.idx) { IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; - lua_assert(irt_isint(J->scev.t)); + lj_assertJ(irt_isint(J->scev.t), "only int SCEV supported"); if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) { ok_reduce: #if LJ_TARGET_X64 @@ -1078,6 +1278,10 @@ LJFOLD(CONV SUB IRCONV_U32_U64) LJFOLD(CONV MUL IRCONV_U32_U64) LJFOLDF(simplify_conv_narrow) { +#if LJ_64 + UNUSED(J); + return NEXTFOLD; +#else IROp op = (IROp)fleft->o; IRType t = irt_type(fins->t); IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2; @@ -1088,6 +1292,7 @@ LJFOLDF(simplify_conv_narrow) fins->op1 = op1; fins->op2 = op2; return RETRYFOLD; +#endif } /* Special CSE rule for CONV. */ @@ -1123,7 +1328,8 @@ LJFOLDF(narrow_convert) /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */ if (J->chain[IR_LOOP]) return NEXTFOLD; - lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT); + lj_assertJ(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT, + "unexpected CONV TOBIT"); return lj_opt_narrow_convert(J); } @@ -1201,7 +1407,9 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k) ** But this is mainly intended for simple address arithmetic. ** Also it's easier for the backend to optimize the original multiplies. */ - if (k == 1) { /* i * 1 ==> i */ + if (k == 0) { /* i * 0 ==> 0 */ + return RIGHTFOLD; + } else if (k == 1) { /* i * 1 ==> i */ return LEFTFOLD; } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ fins->o = IR_BSHL; @@ -1214,9 +1422,7 @@ static TRef simplify_intmul_k(jit_State *J, int32_t k) LJFOLD(MUL any KINT) LJFOLDF(simplify_intmul_k32) { - if (fright->i == 0) /* i * 0 ==> 0 */ - return INTFOLD(0); - else if (fright->i > 0) + if (fright->i >= 0) return simplify_intmul_k(J, fright->i); return NEXTFOLD; } @@ -1224,21 +1430,20 @@ LJFOLDF(simplify_intmul_k32) LJFOLD(MUL any KINT64) LJFOLDF(simplify_intmul_k64) { - if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ - return INT64FOLD(0); -#if LJ_64 - /* NYI: SPLIT for BSHL and 32 bit backend support. */ - else if (ir_kint64(fright)->u64 < 0x80000000u) +#if LJ_HASFFI + if (ir_kint64(fright)->u64 < 0x80000000u) return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); -#endif return NEXTFOLD; +#else + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; +#endif } LJFOLD(MOD any KINT) LJFOLDF(simplify_intmod_k) { int32_t k = fright->i; - lua_assert(k != 0); + lj_assertJ(k != 0, "integer mod 0"); if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */ fins->o = IR_BAND; fins->op2 = lj_ir_kint(J, k-1); @@ -1487,6 +1692,15 @@ LJFOLDF(simplify_shiftk_andk) fins->op2 = (IRRef1)lj_ir_kint(J, k); fins->ot = IRTI(IR_BAND); return RETRYFOLD; + } else if (irk->o == IR_KINT64) { + uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, fright->i, + (IROp)fins->o); + IROpT ot = fleft->ot; + fins->op1 = fleft->op1; + fins->op1 = (IRRef1)lj_opt_fold(J); + fins->op2 = (IRRef1)lj_ir_kint64(J, k); + fins->ot = ot; + return RETRYFOLD; } return NEXTFOLD; } @@ -1502,6 +1716,47 @@ LJFOLDF(simplify_andk_shiftk) return NEXTFOLD; } +LJFOLD(BAND BOR KINT) +LJFOLD(BOR BAND KINT) +LJFOLDF(simplify_andor_k) +{ + IRIns *irk = IR(fleft->op2); + PHIBARRIER(fleft); + if (irk->o == IR_KINT) { + int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o); + /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */ + /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */ + if (k == (fins->o == IR_BAND ? 0 : -1)) { + fins->op1 = fleft->op1; + return RETRYFOLD; + } + } + return NEXTFOLD; +} + +LJFOLD(BAND BOR KINT64) +LJFOLD(BOR BAND KINT64) +LJFOLDF(simplify_andor_k64) +{ +#if LJ_HASFFI + IRIns *irk = IR(fleft->op2); + PHIBARRIER(fleft); + if (irk->o == IR_KINT64) { + uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64, + (IROp)fins->o); + /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */ + /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */ + if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) { + fins->op1 = fleft->op1; + return RETRYFOLD; + } + } + return NEXTFOLD; +#else + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; +#endif +} + /* -- Reassociation ------------------------------------------------------- */ LJFOLD(ADD ADD KINT) @@ -1531,11 +1786,11 @@ LJFOLD(BOR BOR KINT64) LJFOLD(BXOR BXOR KINT64) LJFOLDF(reassoc_intarith_k64) { -#if LJ_HASFFI || LJ_64 +#if LJ_HASFFI IRIns *irk = IR(fleft->op2); if (irk->o == IR_KINT64) { - uint64_t k = kfold_int64arith(ir_k64(irk)->u64, - ir_k64(fright)->u64, (IROp)fins->o); + uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64, + (IROp)fins->o); PHIBARRIER(fleft); fins->op1 = fleft->op1; fins->op2 = (IRRef1)lj_ir_kint64(J, k); @@ -1543,12 +1798,10 @@ LJFOLDF(reassoc_intarith_k64) } return NEXTFOLD; #else - UNUSED(J); lua_assert(0); return FAILFOLD; + UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD; #endif } -LJFOLD(MIN MIN any) -LJFOLD(MAX MAX any) LJFOLD(BAND BAND any) LJFOLD(BOR BOR any) LJFOLDF(reassoc_dup) @@ -1558,6 +1811,15 @@ LJFOLDF(reassoc_dup) return NEXTFOLD; } +LJFOLD(MIN MIN any) +LJFOLD(MAX MAX any) +LJFOLDF(reassoc_dup_minmax) +{ + if (fins->op2 == fleft->op2) + return LEFTFOLD; /* (a o b) o b ==> a o b */ + return NEXTFOLD; +} + LJFOLD(BXOR BXOR any) LJFOLDF(reassoc_bxor) { @@ -1596,23 +1858,12 @@ LJFOLDF(reassoc_shift) return NEXTFOLD; } -LJFOLD(MIN MIN KNUM) -LJFOLD(MAX MAX KNUM) LJFOLD(MIN MIN KINT) LJFOLD(MAX MAX KINT) LJFOLDF(reassoc_minmax_k) { IRIns *irk = IR(fleft->op2); - if (irk->o == IR_KNUM) { - lua_Number a = ir_knum(irk)->n; - lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD); - if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ - return LEFTFOLD; - PHIBARRIER(fleft); - fins->op1 = fleft->op1; - fins->op2 = (IRRef1)lj_ir_knum(J, y); - return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */ - } else if (irk->o == IR_KINT) { + if (irk->o == IR_KINT) { int32_t a = irk->i; int32_t y = kfold_intop(a, fright->i, fins->o); if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ @@ -1625,24 +1876,6 @@ LJFOLDF(reassoc_minmax_k) return NEXTFOLD; } -LJFOLD(MIN MAX any) -LJFOLD(MAX MIN any) -LJFOLDF(reassoc_minmax_left) -{ - if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2) - return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */ - return NEXTFOLD; -} - -LJFOLD(MIN any MAX) -LJFOLD(MAX any MIN) -LJFOLDF(reassoc_minmax_right) -{ - if (fins->op1 == fright->op1 || fins->op1 == fright->op2) - return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */ - return NEXTFOLD; -} - /* -- Array bounds check elimination -------------------------------------- */ /* Eliminate ABC across PHIs to handle t[i-1] forwarding case. @@ -1769,8 +2002,6 @@ LJFOLDF(comm_comp) LJFOLD(BAND any any) LJFOLD(BOR any any) -LJFOLD(MIN any any) -LJFOLD(MAX any any) LJFOLDF(comm_dup) { if (fins->op1 == fins->op2) /* x o x ==> x */ @@ -1778,6 +2009,15 @@ LJFOLDF(comm_dup) return fold_comm_swap(J); } +LJFOLD(MIN any any) +LJFOLD(MAX any any) +LJFOLDF(comm_dup_minmax) +{ + if (fins->op1 == fins->op2) /* x o x ==> x */ + return LEFTFOLD; + return NEXTFOLD; +} + LJFOLD(BXOR any any) LJFOLDF(comm_bxor) { @@ -1814,7 +2054,7 @@ LJFOLDF(merge_eqne_snew_kgc) { GCstr *kstr = ir_kstr(fright); int32_t len = (int32_t)kstr->len; - lua_assert(irt_isstr(fins->t)); + lj_assertJ(irt_isstr(fins->t), "bad equality IR type"); #if LJ_TARGET_UNALIGNED #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */ @@ -1878,7 +2118,7 @@ LJFOLD(HLOAD KKPTR) LJFOLDF(kfold_hload_kkptr) { UNUSED(J); - lua_assert(ir_kptr(fleft) == niltvg(J2G(J))); + lj_assertJ(ir_kptr(fleft) == niltvg(J2G(J)), "expected niltv"); return TREF_NIL; } @@ -1888,8 +2128,8 @@ LJFOLDX(lj_opt_fwd_hload) LJFOLD(ULOAD any) LJFOLDX(lj_opt_fwd_uload) -LJFOLD(CALLL any IRCALL_lj_tab_len) -LJFOLDX(lj_opt_fwd_tab_len) +LJFOLD(ALEN any any) +LJFOLDX(lj_opt_fwd_alen) /* Upvalue refs are really loads, but there are no corresponding stores. ** So CSE is ok for them, except for UREFO across a GC step (see below). @@ -1950,6 +2190,7 @@ LJFOLDF(fwd_href_tdup) ** an aliased table, as it may invalidate all of the pointers and fields. ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on ** FLOADs. And NEWREF itself is treated like a store (see below). +** LREF is constant (per trace) since coroutine switches are not inlined. */ LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) LJFOLDF(fload_tab_tnew_asize) @@ -2013,6 +2254,35 @@ LJFOLDF(fload_str_len_snew) return NEXTFOLD; } +LJFOLD(FLOAD TOSTR IRFL_STR_LEN) +LJFOLDF(fload_str_len_tostr) +{ + if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR) + return INTFOLD(1); + return NEXTFOLD; +} + +LJFOLD(FLOAD any IRFL_SBUF_W) +LJFOLD(FLOAD any IRFL_SBUF_E) +LJFOLD(FLOAD any IRFL_SBUF_B) +LJFOLD(FLOAD any IRFL_SBUF_L) +LJFOLD(FLOAD any IRFL_SBUF_REF) +LJFOLD(FLOAD any IRFL_SBUF_R) +LJFOLDF(fload_sbuf) +{ + TRef tr = lj_opt_fwd_fload(J); + return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD; +} + +/* The fast function ID of function objects is immutable. */ +LJFOLD(FLOAD KGC IRFL_FUNC_FFID) +LJFOLDF(fload_func_ffid_kgc) +{ + if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) + return INTFOLD((int32_t)ir_kfunc(fleft)->c.ffid); + return NEXTFOLD; +} + /* The C type ID of cdata objects is immutable. */ LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID) LJFOLDF(fload_cdata_typeid_kgc) @@ -2059,6 +2329,8 @@ LJFOLDF(fload_cdata_ptr_int64_cnew) } LJFOLD(FLOAD any IRFL_STR_LEN) +LJFOLD(FLOAD any IRFL_FUNC_ENV) +LJFOLD(FLOAD any IRFL_THREAD_ENV) LJFOLD(FLOAD any IRFL_CDATA_CTYPEID) LJFOLD(FLOAD any IRFL_CDATA_PTR) LJFOLD(FLOAD any IRFL_CDATA_INT) @@ -2078,7 +2350,7 @@ LJFOLDF(fwd_sload) TRef tr = lj_opt_cse(J); return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr; } else { - lua_assert(J->slot[fins->op1] != 0); + lj_assertJ(J->slot[fins->op1] != 0, "uninitialized slot accessed"); return J->slot[fins->op1]; } } @@ -2135,6 +2407,17 @@ LJFOLDF(barrier_tnew_tdup) return DROPFOLD; } +/* -- Profiling ----------------------------------------------------------- */ + +LJFOLD(PROF any any) +LJFOLDF(prof) +{ + IRRef ref = J->chain[IR_PROF]; + if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */ + return ref; + return EMITFOLD; +} + /* -- Stores and allocations ---------------------------------------------- */ /* Stores and allocations cannot be folded or passed on to CSE in general. @@ -2157,8 +2440,10 @@ LJFOLD(XSTORE any any) LJFOLDX(lj_opt_dse_xstore) LJFOLD(NEWREF any any) /* Treated like a store. */ -LJFOLD(CALLS any any) +LJFOLD(TMPREF any any) +LJFOLD(CALLA any any) LJFOLD(CALLL any any) /* Safeguard fallback. */ +LJFOLD(CALLS any any) LJFOLD(CALLXS any any) LJFOLD(XBAR) LJFOLD(RETF any any) /* Modifies BASE. */ @@ -2191,8 +2476,9 @@ TRef LJ_FASTCALL lj_opt_fold(jit_State *J) IRRef ref; if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) { - lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | - JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT); + lj_assertJ(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | + JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT, + "bad JIT_F_OPT_DEFAULT"); /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */ if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N) return lj_opt_cse(J); @@ -2217,10 +2503,14 @@ retry: if (fins->op1 >= J->cur.nk) { key += (uint32_t)IR(fins->op1)->o << 10; *fleft = *IR(fins->op1); + if (fins->op1 < REF_TRUE) + fleft[1] = IR(fins->op1)[1]; } if (fins->op2 >= J->cur.nk) { key += (uint32_t)IR(fins->op2)->o; *fright = *IR(fins->op2); + if (fins->op2 < REF_TRUE) + fright[1] = IR(fins->op2)[1]; } else { key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ } @@ -2250,7 +2540,7 @@ retry: return lj_ir_kint(J, fins->i); if (ref == FAILFOLD) lj_trace_err(J, LJ_TRERR_GFAIL); - lua_assert(ref == DROPFOLD); + lj_assertJ(ref == DROPFOLD, "bad fold result"); return REF_DROP; } diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index 466f88de..ee3ee049 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c @@ -11,7 +11,7 @@ #if LJ_HASJIT #include "lj_err.h" -#include "lj_str.h" +#include "lj_buf.h" #include "lj_ir.h" #include "lj_jit.h" #include "lj_iropt.h" @@ -225,6 +225,7 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap, /* Setup new snapshot. */ snap->mapofs = (uint32_t)nmapofs; snap->ref = (IRRef1)J->cur.nins; + snap->mcofs = 0; snap->nslots = nslots; snap->topslot = osnap->topslot; snap->count = 0; @@ -254,9 +255,16 @@ static void loop_subst_snap(jit_State *J, SnapShot *osnap, J->cur.nsnapmap = (uint32_t)(nmap - J->cur.snapmap); } +typedef struct LoopState { + jit_State *J; + IRRef1 *subst; + MSize sizesubst; +} LoopState; + /* Unroll loop. */ -static void loop_unroll(jit_State *J) +static void loop_unroll(LoopState *lps) { + jit_State *J = lps->J; IRRef1 phi[LJ_MAX_PHI]; uint32_t nphi = 0; IRRef1 *subst; @@ -265,13 +273,13 @@ static void loop_unroll(jit_State *J) SnapEntry *loopmap, *psentinel; IRRef ins, invar; - /* Use temp buffer for substitution table. + /* Allocate substitution table. ** Only non-constant refs in [REF_BIAS,invar) are valid indexes. - ** Caveat: don't call into the VM or run the GC or the buffer may be gone. */ invar = J->cur.nins; - subst = (IRRef1 *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, - (invar-REF_BIAS)*sizeof(IRRef1)) - REF_BIAS; + lps->sizesubst = invar - REF_BIAS; + lps->subst = lj_mem_newvec(J->L, lps->sizesubst, IRRef1); + subst = lps->subst - REF_BIAS; subst[REF_BASE] = REF_BASE; /* LOOP separates the pre-roll from the loop body. */ @@ -292,7 +300,8 @@ static void loop_unroll(jit_State *J) loopmap = &J->cur.snapmap[loopsnap->mapofs]; /* The PC of snapshot #0 and the loop snapshot must match. */ psentinel = &loopmap[loopsnap->nent]; - lua_assert(*psentinel == J->cur.snapmap[J->cur.snap[0].nent]); + lj_assertJ(*psentinel == J->cur.snapmap[J->cur.snap[0].nent], + "mismatched PC for loop snapshot"); *psentinel = SNAP(255, 0, 0); /* Replace PC with temporary sentinel. */ /* Start substitution with snapshot #1 (#0 is empty for root traces). */ @@ -345,10 +354,12 @@ static void loop_unroll(jit_State *J) irr = IR(ref); goto phiconv; } - } else if (ref != REF_DROP && irr->o == IR_CONV && - ref > invar && irr->op1 < invar) { - /* May need an extra PHI for a CONV. */ - ref = irr->op1; + } else if (ref != REF_DROP && ref > invar && + ((irr->o == IR_CONV && irr->op1 < invar) || + (irr->o == IR_ALEN && irr->op2 < invar && + irr->op2 != REF_NIL))) { + /* May need an extra PHI for a CONV or ALEN hint. */ + ref = irr->o == IR_CONV ? irr->op1 : irr->op2; irr = IR(ref); phiconv: if (ref < invar && !irref_isk(ref) && !irt_isphi(irr->t)) { @@ -363,7 +374,7 @@ static void loop_unroll(jit_State *J) } if (!irt_isguard(J->guardemit)) /* Drop redundant snapshot. */ J->cur.nsnapmap = (uint32_t)J->cur.snap[--J->cur.nsnap].mapofs; - lua_assert(J->cur.nsnapmap <= J->sizesnapmap); + lj_assertJ(J->cur.nsnapmap <= J->sizesnapmap, "bad snapshot map index"); *psentinel = J->cur.snapmap[J->cur.snap[0].nent]; /* Restore PC. */ loop_emit_phi(J, subst, phi, nphi, onsnap); @@ -396,7 +407,7 @@ static void loop_undo(jit_State *J, IRRef ins, SnapNo nsnap, MSize nsnapmap) static TValue *cploop_opt(lua_State *L, lua_CFunction dummy, void *ud) { UNUSED(L); UNUSED(dummy); - loop_unroll((jit_State *)ud); + loop_unroll((LoopState *)ud); return NULL; } @@ -406,7 +417,13 @@ int lj_opt_loop(jit_State *J) IRRef nins = J->cur.nins; SnapNo nsnap = J->cur.nsnap; MSize nsnapmap = J->cur.nsnapmap; - int errcode = lj_vm_cpcall(J->L, NULL, J, cploop_opt); + LoopState lps; + int errcode; + lps.J = J; + lps.subst = NULL; + lps.sizesubst = 0; + errcode = lj_vm_cpcall(J->L, NULL, &lps, cploop_opt); + lj_mem_freevec(J2G(J), lps.subst, lps.sizesubst, IRRef1); if (LJ_UNLIKELY(errcode)) { lua_State *L = J->L; if (errcode == LUA_ERRRUN && tvisnumber(L->top-1)) { /* Trace error? */ diff --git a/src/lj_opt_mem.c b/src/lj_opt_mem.c index feec6bb7..09de2f05 100644 --- a/src/lj_opt_mem.c +++ b/src/lj_opt_mem.c @@ -17,12 +17,14 @@ #include "lj_ir.h" #include "lj_jit.h" #include "lj_iropt.h" +#include "lj_ircall.h" +#include "lj_dispatch.h" /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) #define fins (&J->fold.ins) -#define fleft (&J->fold.left) -#define fright (&J->fold.right) +#define fleft (J->fold.left) +#define fright (J->fold.right) /* ** Caveat #1: return value is not always a TRef -- only use with tref_ref(). @@ -55,8 +57,8 @@ static AliasRet aa_table(jit_State *J, IRRef ta, IRRef tb) { IRIns *taba = IR(ta), *tabb = IR(tb); int newa, newb; - lua_assert(ta != tb); - lua_assert(irt_istab(taba->t) && irt_istab(tabb->t)); + lj_assertJ(ta != tb, "bad usage"); + lj_assertJ(irt_istab(taba->t) && irt_istab(tabb->t), "bad usage"); /* Disambiguate new allocations. */ newa = (taba->o == IR_TNEW || taba->o == IR_TDUP); newb = (tabb->o == IR_TNEW || tabb->o == IR_TDUP); @@ -70,6 +72,34 @@ static AliasRet aa_table(jit_State *J, IRRef ta, IRRef tb) return aa_escape(J, taba, tabb); } +/* Check whether there's no aliasing table.clear. */ +static int fwd_aa_tab_clear(jit_State *J, IRRef lim, IRRef ta) +{ + IRRef ref = J->chain[IR_CALLS]; + while (ref > lim) { + IRIns *calls = IR(ref); + if (calls->op2 == IRCALL_lj_tab_clear && + (ta == calls->op1 || aa_table(J, ta, calls->op1) != ALIAS_NO)) + return 0; /* Conflict. */ + ref = calls->prev; + } + return 1; /* No conflict. Can safely FOLD/CSE. */ +} + +/* Check whether there's no aliasing NEWREF/table.clear for the left operand. */ +int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim) +{ + IRRef ta = fins->op1; + IRRef ref = J->chain[IR_NEWREF]; + while (ref > lim) { + IRIns *newref = IR(ref); + if (ta == newref->op1 || aa_table(J, ta, newref->op1) != ALIAS_NO) + return 0; /* Conflict. */ + ref = newref->prev; + } + return fwd_aa_tab_clear(J, lim, ta); +} + /* Alias analysis for array and hash access using key-based disambiguation. */ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb) { @@ -98,7 +128,7 @@ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb) /* Disambiguate array references based on index arithmetic. */ int32_t ofsa = 0, ofsb = 0; IRRef basea = ka, baseb = kb; - lua_assert(refb->o == IR_AREF); + lj_assertJ(refb->o == IR_AREF, "expected AREF"); /* Gather base and offset from t[base] or t[base+-ofs]. */ if (keya->o == IR_ADD && irref_isk(keya->op2)) { basea = keya->op1; @@ -116,8 +146,9 @@ static AliasRet aa_ahref(jit_State *J, IRIns *refa, IRIns *refb) return ALIAS_NO; /* t[base+-o1] vs. t[base+-o2] and o1 != o2. */ } else { /* Disambiguate hash references based on the type of their keys. */ - lua_assert((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) && - (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF)); + lj_assertJ((refa->o==IR_HREF || refa->o==IR_HREFK || refa->o==IR_NEWREF) && + (refb->o==IR_HREF || refb->o==IR_HREFK || refb->o==IR_NEWREF), + "bad xREF IR op %d or %d", refa->o, refb->o); if (!irt_sametype(keya->t, keyb->t)) return ALIAS_NO; /* Different key types. */ } @@ -151,7 +182,8 @@ static TRef fwd_ahload(jit_State *J, IRRef xref) IRIns *ir = (xr->o == IR_HREFK || xr->o == IR_AREF) ? IR(xr->op1) : xr; IRRef tab = ir->op1; ir = IR(tab); - if (ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) { + if ((ir->o == IR_TNEW || (ir->o == IR_TDUP && irref_isk(xr->op2))) && + fwd_aa_tab_clear(J, tab, tab)) { /* A NEWREF with a number key may end up pointing to the array part. ** But it's referenced from HSTORE and not found in the ASTORE chain. ** For now simply consider this a conflict without forwarding anything. @@ -191,7 +223,8 @@ static TRef fwd_ahload(jit_State *J, IRRef xref) if (key->o == IR_KSLOT) key = IR(key->op1); lj_ir_kvalue(J->L, &keyv, key); tv = lj_tab_get(J->L, ir_ktab(IR(ir->op1)), &keyv); - lua_assert(itype2irt(tv) == irt_type(fins->t)); + lj_assertJ(itype2irt(tv) == irt_type(fins->t), + "mismatched type in constant table"); if (irt_isnum(fins->t)) return lj_ir_knum_u64(J, tv->u64); else if (LJ_DUALNUM && irt_isint(fins->t)) @@ -265,7 +298,7 @@ TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J) while (ref > tab) { IRIns *newref = IR(ref); if (tab == newref->op1) { - if (fright->op1 == newref->op2) + if (fright->op1 == newref->op2 && fwd_aa_tab_clear(J, ref, tab)) return ref; /* Forward from NEWREF. */ else goto docse; @@ -275,7 +308,7 @@ TRef LJ_FASTCALL lj_opt_fwd_hrefk(jit_State *J) ref = newref->prev; } /* No conflicting NEWREF: key location unchanged for HREFK of TDUP. */ - if (IR(tab)->o == IR_TDUP) + if (IR(tab)->o == IR_TDUP && fwd_aa_tab_clear(J, tab, tab)) fins->t.irt &= ~IRT_GUARD; /* Drop HREFK guard. */ docse: return CSEFOLD; @@ -309,20 +342,6 @@ int LJ_FASTCALL lj_opt_fwd_href_nokey(jit_State *J) return 1; /* No conflict. Can fold to niltv. */ } -/* Check whether there's no aliasing NEWREF for the left operand. */ -int LJ_FASTCALL lj_opt_fwd_tptr(jit_State *J, IRRef lim) -{ - IRRef ta = fins->op1; - IRRef ref = J->chain[IR_NEWREF]; - while (ref > lim) { - IRIns *newref = IR(ref); - if (ta == newref->op1 || aa_table(J, ta, newref->op1) != ALIAS_NO) - return 0; /* Conflict. */ - ref = newref->prev; - } - return 1; /* No conflict. Can safely FOLD/CSE. */ -} - /* ASTORE/HSTORE elimination. */ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J) { @@ -346,9 +365,12 @@ TRef LJ_FASTCALL lj_opt_dse_ahstore(jit_State *J) /* Different value: try to eliminate the redundant store. */ if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ IRIns *ir; - /* Check for any intervening guards (includes conflicting loads). */ + /* Check for any intervening guards (includes conflicting loads). + ** Note that lj_tab_keyindex and lj_vm_next don't need guards, + ** since they are followed by at least one guarded VLOAD. + */ for (ir = IR(J->cur.nins-1); ir > store; ir--) - if (irt_isguard(ir->t) || ir->o == IR_CALLL) + if (irt_isguard(ir->t) || ir->o == IR_ALEN) goto doemit; /* No elimination possible. */ /* Remove redundant store from chain and replace with NOP. */ *refp = store->prev; @@ -363,6 +385,67 @@ doemit: return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ } +/* ALEN forwarding. */ +TRef LJ_FASTCALL lj_opt_fwd_alen(jit_State *J) +{ + IRRef tab = fins->op1; /* Table reference. */ + IRRef lim = tab; /* Search limit. */ + IRRef ref; + + /* Search for conflicting HSTORE with numeric key. */ + ref = J->chain[IR_HSTORE]; + while (ref > lim) { + IRIns *store = IR(ref); + IRIns *href = IR(store->op1); + IRIns *key = IR(href->op2); + if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) { + lim = ref; /* Conflicting store found, limits search for ALEN. */ + break; + } + ref = store->prev; + } + + /* Try to find a matching ALEN. */ + ref = J->chain[IR_ALEN]; + while (ref > lim) { + /* CSE for ALEN only depends on the table, not the hint. */ + if (IR(ref)->op1 == tab) { + IRRef sref; + + /* Search for aliasing table.clear. */ + if (!fwd_aa_tab_clear(J, ref, tab)) + break; + + /* Search for hint-forwarding or conflicting store. */ + sref = J->chain[IR_ASTORE]; + while (sref > ref) { + IRIns *store = IR(sref); + IRIns *aref = IR(store->op1); + IRIns *fref = IR(aref->op1); + if (tab == fref->op1) { /* ASTORE to the same table. */ + /* Detect t[#t+1] = x idiom for push. */ + IRIns *idx = IR(aref->op2); + if (!irt_isnil(store->t) && + idx->o == IR_ADD && idx->op1 == ref && + IR(idx->op2)->o == IR_KINT && IR(idx->op2)->i == 1) { + /* Note: this requires an extra PHI check in loop unroll. */ + fins->op2 = aref->op2; /* Set ALEN hint. */ + } + goto doemit; /* Conflicting store, possibly giving a hint. */ + } else if (aa_table(J, tab, fref->op1) != ALIAS_NO) { + goto doemit; /* Conflicting store. */ + } + sref = store->prev; + } + + return ref; /* Plain ALEN forwarding. */ + } + ref = IR(ref)->prev; + } +doemit: + return EMITFOLD; +} + /* -- ULOAD forwarding ---------------------------------------------------- */ /* The current alias analysis for upvalues is very simplistic. It only @@ -412,7 +495,6 @@ TRef LJ_FASTCALL lj_opt_fwd_uload(jit_State *J) cselim: /* Try to find a matching load. Below the conflicting store, if any. */ - ref = J->chain[IR_ULOAD]; while (ref > lim) { IRIns *ir = IR(ref); @@ -542,8 +624,9 @@ TRef LJ_FASTCALL lj_opt_dse_fstore(jit_State *J) goto doemit; break; /* Otherwise continue searching. */ case ALIAS_MUST: - if (store->op2 == val) /* Same value: drop the new store. */ - return DROPFOLD; + if (store->op2 == val && + !(xr->op2 >= IRFL_SBUF_W && xr->op2 <= IRFL_SBUF_R)) + return DROPFOLD; /* Same value: drop the new store. */ /* Different value: try to eliminate the redundant store. */ if (ref > J->chain[IR_LOOP]) { /* Quick check to avoid crossing LOOP. */ IRIns *ir; @@ -564,6 +647,29 @@ doemit: return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ } +/* Check whether there's no aliasing buffer op between IRFL_SBUF_*. */ +int LJ_FASTCALL lj_opt_fwd_sbuf(jit_State *J, IRRef lim) +{ + IRRef ref; + if (J->chain[IR_BUFPUT] > lim) + return 0; /* Conflict. */ + ref = J->chain[IR_CALLS]; + while (ref > lim) { + IRIns *ir = IR(ref); + if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr) + return 0; /* Conflict. */ + ref = ir->prev; + } + ref = J->chain[IR_CALLL]; + while (ref > lim) { + IRIns *ir = IR(ref); + if (ir->op2 >= IRCALL_lj_strfmt_putint && ir->op2 < IRCALL_lj_buf_tostr) + return 0; /* Conflict. */ + ref = ir->prev; + } + return 1; /* No conflict. Can safely FOLD/CSE. */ +} + /* -- XLOAD forwarding and XSTORE elimination ----------------------------- */ /* Find cdata allocation for a reference (if any). */ @@ -815,35 +921,6 @@ doemit: return EMITFOLD; /* Otherwise we have a conflict or simply no match. */ } -/* -- Forwarding of lj_tab_len -------------------------------------------- */ - -/* This is rather simplistic right now, but better than nothing. */ -TRef LJ_FASTCALL lj_opt_fwd_tab_len(jit_State *J) -{ - IRRef tab = fins->op1; /* Table reference. */ - IRRef lim = tab; /* Search limit. */ - IRRef ref; - - /* Any ASTORE is a conflict and limits the search. */ - if (J->chain[IR_ASTORE] > lim) lim = J->chain[IR_ASTORE]; - - /* Search for conflicting HSTORE with numeric key. */ - ref = J->chain[IR_HSTORE]; - while (ref > lim) { - IRIns *store = IR(ref); - IRIns *href = IR(store->op1); - IRIns *key = IR(href->op2); - if (irt_isnum(key->o == IR_KSLOT ? IR(key->op1)->t : key->t)) { - lim = ref; /* Conflicting store found, limits search for TLEN. */ - break; - } - ref = store->prev; - } - - /* Try to find a matching load. Below the conflicting store, if any. */ - return lj_opt_cselim(J, lim); -} - /* -- ASTORE/HSTORE previous type analysis -------------------------------- */ /* Check whether the previous value for a table store is non-nil. diff --git a/src/lj_opt_narrow.c b/src/lj_opt_narrow.c index 34fe6c39..586f1bc7 100644 --- a/src/lj_opt_narrow.c +++ b/src/lj_opt_narrow.c @@ -372,17 +372,17 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) } else if (op == NARROW_CONV) { *sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */ } else if (op == NARROW_SEXT) { - lua_assert(sp >= nc->stack+1); + lj_assertJ(sp >= nc->stack+1, "stack underflow"); sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1], (IRT_I64<<5)|IRT_INT|IRCONV_SEXT); } else if (op == NARROW_INT) { - lua_assert(next < last); + lj_assertJ(next < last, "missing arg to NARROW_INT"); *sp++ = nc->t == IRT_I64 ? lj_ir_kint64(J, (int64_t)(int32_t)*next++) : lj_ir_kint(J, *next++); } else { /* Regular IROpT. Pops two operands and pushes one result. */ IRRef mode = nc->mode; - lua_assert(sp >= nc->stack+2); + lj_assertJ(sp >= nc->stack+2, "stack underflow"); sp--; /* Omit some overflow checks for array indexing. See comments above. */ if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) { @@ -398,7 +398,7 @@ static IRRef narrow_conv_emit(jit_State *J, NarrowConv *nc) narrow_bpc_set(J, narrow_ref(ref), narrow_ref(sp[-1]), mode); } } - lua_assert(sp == nc->stack+1); + lj_assertJ(sp == nc->stack+1, "stack misalignment"); return nc->stack[0]; } @@ -452,7 +452,7 @@ static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode) TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr) { IRIns *ir; - lua_assert(tref_isnumber(tr)); + lj_assertJ(tref_isnumber(tr), "expected number type"); if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */ return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX); /* Omit some overflow checks for array indexing. See comments above. */ @@ -499,7 +499,7 @@ TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr) /* Narrow C array index (overflow undefined). */ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr) { - lua_assert(tref_isnumber(tr)); + lj_assertJ(tref_isnumber(tr), "expected number type"); if (tref_isnum(tr)) return emitir(IRT(IR_CONV, IRT_INTP), tr, (IRT_INTP<<5)|IRT_NUM|IRCONV_ANY); /* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */ @@ -551,11 +551,16 @@ TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc) { rc = conv_str_tonum(J, rc, vc); if (tref_isinteger(rc)) { - if ((uint32_t)numberVint(vc) != 0x80000000u) - return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc); + uint32_t k = (uint32_t)numberVint(vc); + if ((LJ_DUALNUM || k != 0) && k != 0x80000000u) { + TRef zero = lj_ir_kint(J, 0); + if (!LJ_DUALNUM) + emitir(IRTGI(IR_NE), rc, zero); + return emitir(IRTGI(IR_SUBOV), zero, rc); + } rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); } - return emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J)); + return emitir(IRTN(IR_NEG), rc, lj_ir_ksimd(J, LJ_KSIMD_NEG)); } /* Narrowing of modulo operator. */ @@ -579,44 +584,6 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) return emitir(IRTN(IR_SUB), rb, tmp); } -/* Narrowing of power operator or math.pow. */ -TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc) -{ - rb = conv_str_tonum(J, rb, vb); - rb = lj_ir_tonum(J, rb); /* Left arg is always treated as an FP number. */ - rc = conv_str_tonum(J, rc, vc); - /* Narrowing must be unconditional to preserve (-x)^i semantics. */ - if (tvisint(vc) || numisint(numV(vc))) { - int checkrange = 0; - /* Split pow is faster for bigger exponents. But do this only for (+k)^i. */ - if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) { - int32_t k = numberVint(vc); - if (!(k >= -65536 && k <= 65536)) goto split_pow; - checkrange = 1; - } - if (!tref_isinteger(rc)) { - /* Guarded conversion to integer! */ - rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK); - } - if (checkrange && !tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */ - TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536)); - emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536)); - } - return emitir(IRTN(IR_POW), rb, rc); - } -split_pow: - /* FOLD covers most cases, but some are easier to do here. */ - if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb))))) - return rb; /* 1 ^ x ==> 1 */ - rc = lj_ir_tonum(J, rc); - if (tref_isk(rc) && ir_knum(IR(tref_ref(rc)))->n == 0.5) - return emitir(IRTN(IR_FPMATH), rb, IRFPM_SQRT); /* x ^ 0.5 ==> sqrt(x) */ - /* Split up b^c into exp2(c*log2(b)). Assembler may rejoin later. */ - rb = emitir(IRTN(IR_FPMATH), rb, IRFPM_LOG2); - rc = emitir(IRTN(IR_MUL), rb, rc); - return emitir(IRTN(IR_FPMATH), rc, IRFPM_EXP2); -} - /* -- Predictive narrowing of induction variables ------------------------- */ /* Narrow a single runtime value. */ @@ -630,9 +597,10 @@ static int narrow_forl(jit_State *J, cTValue *o) /* Narrow the FORL index type by looking at the runtime values. */ IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv) { - lua_assert(tvisnumber(&tv[FORL_IDX]) && + lj_assertJ(tvisnumber(&tv[FORL_IDX]) && tvisnumber(&tv[FORL_STOP]) && - tvisnumber(&tv[FORL_STEP])); + tvisnumber(&tv[FORL_STEP]), + "expected number types"); /* Narrow only if the runtime values of start/stop/step are all integers. */ if (narrow_forl(J, &tv[FORL_IDX]) && narrow_forl(J, &tv[FORL_STOP]) && diff --git a/src/lj_opt_sink.c b/src/lj_opt_sink.c index 784d9a1a..4b9008be 100644 --- a/src/lj_opt_sink.c +++ b/src/lj_opt_sink.c @@ -86,8 +86,7 @@ static void sink_mark_ins(jit_State *J) switch (ir->o) { case IR_BASE: return; /* Finished. */ - case IR_CALLL: /* IRCALL_lj_tab_len */ - case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: + case IR_ALOAD: case IR_HLOAD: case IR_XLOAD: case IR_TBAR: case IR_ALEN: irt_setmark(IR(ir->op1)->t); /* Mark ref for remaining loads. */ break; case IR_FLOAD: @@ -173,8 +172,8 @@ static void sink_remark_phi(jit_State *J) /* Sweep instructions and tag sunken allocations and stores. */ static void sink_sweep_ins(jit_State *J) { - IRIns *ir, *irfirst = IR(J->cur.nk); - for (ir = IR(J->cur.nins-1) ; ir >= irfirst; ir--) { + IRIns *ir, *irbase = IR(REF_BASE); + for (ir = IR(J->cur.nins-1) ; ir >= irbase; ir--) { switch (ir->o) { case IR_ASTORE: case IR_HSTORE: case IR_FSTORE: case IR_XSTORE: { IRIns *ira = sink_checkalloc(J, ir); @@ -224,6 +223,13 @@ static void sink_sweep_ins(jit_State *J) break; } } + for (ir = IR(J->cur.nk); ir < irbase; ir++) { + irt_clearmark(ir->t); + ir->prev = REGSP_INIT; + /* The false-positive of irt_is64() for ASMREF_L (REF_NIL) is OK here. */ + if (irt_is64(ir->t) && ir->o != IR_KNULL) + ir++; + } } /* Allocation sinking and store sinking. diff --git a/src/lj_opt_split.c b/src/lj_opt_split.c index 190b6ba4..506b9814 100644 --- a/src/lj_opt_split.c +++ b/src/lj_opt_split.c @@ -8,14 +8,15 @@ #include "lj_obj.h" -#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) +#if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) #include "lj_err.h" -#include "lj_str.h" +#include "lj_buf.h" #include "lj_ir.h" #include "lj_jit.h" #include "lj_ircall.h" #include "lj_iropt.h" +#include "lj_dispatch.h" #include "lj_vm.h" /* SPLIT pass: @@ -139,6 +140,7 @@ static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir, ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); } +#endif /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, @@ -155,7 +157,6 @@ static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); } -#endif /* Emit a CALLN with two split 64 bit arguments. */ static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, @@ -192,9 +193,121 @@ static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref) nref = ir->op1; if (ofs == 0) return nref; } - return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs)); + return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs)); } +#if LJ_HASFFI +static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst, + IRIns *oir, IRIns *nir, IRIns *ir) +{ + IROp op = ir->o; + IRRef kref = nir->op2; + if (irref_isk(kref)) { /* Optimize constant shifts. */ + int32_t k = (IR(kref)->i & 63); + IRRef lo = nir->op1, hi = hisubst[ir->op1]; + if (op == IR_BROL || op == IR_BROR) { + if (op == IR_BROR) k = (-k & 63); + if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; } + if (k == 0) { + passthrough: + J->cur.nins--; + ir->prev = lo; + return hi; + } else { + TRef k1, k2; + IRRef t1, t2, t3, t4; + J->cur.nins--; + k1 = lj_ir_kint(J, k); + k2 = lj_ir_kint(J, (-k & 31)); + t1 = split_emit(J, IRTI(IR_BSHL), lo, k1); + t2 = split_emit(J, IRTI(IR_BSHL), hi, k1); + t3 = split_emit(J, IRTI(IR_BSHR), lo, k2); + t4 = split_emit(J, IRTI(IR_BSHR), hi, k2); + ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4); + return split_emit(J, IRTI(IR_BOR), t2, t3); + } + } else if (k == 0) { + goto passthrough; + } else if (k < 32) { + if (op == IR_BSHL) { + IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref); + IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31))); + return split_emit(J, IRTI(IR_BOR), t1, t2); + } else { + IRRef t1 = ir->prev, t2; + lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage"); + nir->o = IR_BSHR; + t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31))); + ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2); + return split_emit(J, IRTI(op), hi, kref); + } + } else { + if (op == IR_BSHL) { + if (k == 32) + J->cur.nins--; + else + lo = ir->prev; + ir->prev = lj_ir_kint(J, 0); + return lo; + } else { + lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage"); + if (k == 32) { + J->cur.nins--; + ir->prev = hi; + } else { + nir->op1 = hi; + } + if (op == IR_BSHR) + return lj_ir_kint(J, 0); + else + return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31)); + } + } + } + return split_call_li(J, hisubst, oir, ir, + op - IR_BSHL + IRCALL_lj_carith_shl64); +} + +static IRRef split_bitop(jit_State *J, IRRef1 *hisubst, + IRIns *nir, IRIns *ir) +{ + IROp op = ir->o; + IRRef hi, kref = nir->op2; + if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */ + int32_t k = IR(kref)->i; + if (k == 0 || k == -1) { + if (op == IR_BAND) k = ~k; + if (k == 0) { + J->cur.nins--; + ir->prev = nir->op1; + } else if (op == IR_BXOR) { + nir->o = IR_BNOT; + nir->op2 = 0; + } else { + J->cur.nins--; + ir->prev = kref; + } + } + } + hi = hisubst[ir->op1]; + kref = hisubst[ir->op2]; + if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */ + int32_t k = IR(kref)->i; + if (k == 0 || k == -1) { + if (op == IR_BAND) k = ~k; + if (k == 0) { + return hi; + } else if (op == IR_BXOR) { + return split_emit(J, IRTI(IR_BNOT), hi, 0); + } else { + return kref; + } + } + } + return split_emit(J, IRTI(op), hi, kref); +} +#endif + /* Substitute references of a snapshot. */ static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) { @@ -214,7 +327,7 @@ static void split_ir(jit_State *J) IRRef nins = J->cur.nins, nk = J->cur.nk; MSize irlen = nins - nk; MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); - IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); + IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need); IRRef1 *hisubst; IRRef ref, snref; SnapShot *snap; @@ -241,6 +354,8 @@ static void split_ir(jit_State *J) ir->prev = ref; /* Identity substitution for loword. */ hisubst[ref] = 0; } + if (irt_is64(ir->t) && ir->o != IR_KNULL) + ref++; } /* Process old IR instructions. */ @@ -285,35 +400,11 @@ static void split_ir(jit_State *J) hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div); break; case IR_POW: - hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); + hi = split_call_ll(J, hisubst, oir, ir, IRCALL_pow); break; case IR_FPMATH: - /* Try to rejoin pow from EXP2, MUL and LOG2. */ - if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) { - IRIns *irp = IR(nir->op1); - if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) { - IRIns *irm4 = IR(irp->op1); - IRIns *irm3 = IR(irm4->op1); - IRIns *irm12 = IR(irm3->op1); - IRIns *irl1 = IR(irm12->op1); - if (irm12->op1 > J->loopref && irl1->o == IR_CALLN && - irl1->op2 == IRCALL_lj_vm_log2) { - IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */ - IRRef arg3 = irm3->op2, arg4 = irm4->op2; - J->cur.nins--; - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3); - tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4); - ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow); - hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); - break; - } - } - } hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); break; - case IR_ATAN2: - hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2); - break; case IR_LDEXP: hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); break; @@ -321,7 +412,8 @@ static void split_ir(jit_State *J) nir->o = IR_CONV; /* Pass through loword. */ nir->op2 = (IRT_INT << 5) | IRT_INT; hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), - hisubst[ir->op1], hisubst[ir->op2]); + hisubst[ir->op1], + lj_ir_kint(J, (int32_t)(0x7fffffffu + (ir->o == IR_NEG)))); break; case IR_SLOAD: if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ @@ -336,15 +428,24 @@ static void split_ir(jit_State *J) case IR_STRTO: hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); break; + case IR_FLOAD: + lj_assertJ(ir->op1 == REF_NIL, "expected FLOAD from GG_State"); + hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4)); + nir->op2 += LJ_BE*4; + break; case IR_XLOAD: { IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ J->cur.nins--; hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ +#if LJ_BE + hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2); + inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD); +#endif nref = lj_ir_nextins(J); nir = IR(nref); - *nir = inslo; /* Re-emit lo XLOAD immediately before hi XLOAD. */ - hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); + *nir = inslo; /* Re-emit lo XLOAD. */ #if LJ_LE + hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); ir->prev = nref; #else ir->prev = hi; hi = nref; @@ -364,8 +465,9 @@ static void split_ir(jit_State *J) break; } #endif - lua_assert(st == IRT_INT || - (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT))); + lj_assertJ(st == IRT_INT || + (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)), + "bad source type for CONV"); nir->o = IR_CALLN; #if LJ_32 && LJ_HASFFI nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : @@ -395,7 +497,8 @@ static void split_ir(jit_State *J) hi = nir->op2; break; default: - lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX); + lj_assertJ(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX, + "bad IR op %d", ir->o); hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), hisubst[ir->op1], hisubst[ir->op2]); break; @@ -438,8 +541,21 @@ static void split_ir(jit_State *J) irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : IRCALL_lj_carith_powu64); break; + case IR_BNOT: + hi = split_emit(J, IRTI(IR_BNOT), hiref, 0); + break; + case IR_BSWAP: + ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0); + hi = nref; + break; + case IR_BAND: case IR_BOR: case IR_BXOR: + hi = split_bitop(J, hisubst, nir, ir); + break; + case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: + hi = split_bitshift(J, hisubst, oir, nir, ir); + break; case IR_FLOAD: - lua_assert(ir->op2 == IRFL_CDATA_INT64); + lj_assertJ(ir->op2 == IRFL_CDATA_INT64, "only INT64 supported"); hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); #if LJ_BE ir->prev = hi; hi = nref; @@ -505,7 +621,7 @@ static void split_ir(jit_State *J) hi = nir->op2; break; default: - lua_assert(ir->o <= IR_NE); /* Comparisons. */ + lj_assertJ(ir->o <= IR_NE, "bad IR op %d", ir->o); /* Comparisons. */ split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); break; } @@ -529,7 +645,7 @@ static void split_ir(jit_State *J) tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); #endif ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); - } else if (ir->o == IR_TOSTR) { + } else if (ir->o == IR_TOSTR || ir->o == IR_TMPREF) { if (hisubst[ir->op1]) { if (irref_isk(ir->op1)) nir->op1 = ir->op1; @@ -583,7 +699,7 @@ static void split_ir(jit_State *J) #if LJ_SOFTFP if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { if (irt_isguard(ir->t)) { - lua_assert(st == IRT_NUM && irt_isint(ir->t)); + lj_assertJ(st == IRT_NUM && irt_isint(ir->t), "bad CONV types"); J->cur.nins--; ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); } else { @@ -714,7 +830,7 @@ void lj_opt_split(jit_State *J) if (!J->needsplit) J->needsplit = split_needsplit(J); #else - lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */ + lj_assertJ(J->needsplit >= split_needsplit(J), "bad SPLIT state"); #endif if (J->needsplit) { int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); diff --git a/src/lj_parse.c b/src/lj_parse.c index 5a8bcff9..9ddf60ed 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -13,6 +13,7 @@ #include "lj_gc.h" #include "lj_err.h" #include "lj_debug.h" +#include "lj_buf.h" #include "lj_str.h" #include "lj_tab.h" #include "lj_func.h" @@ -21,6 +22,7 @@ #if LJ_HASFFI #include "lj_ctype.h" #endif +#include "lj_strfmt.h" #include "lj_lex.h" #include "lj_parse.h" #include "lj_vm.h" @@ -161,16 +163,22 @@ LJ_STATIC_ASSERT((int)BC_MULVV-(int)BC_ADDVV == (int)OPR_MUL-(int)OPR_ADD); LJ_STATIC_ASSERT((int)BC_DIVVV-(int)BC_ADDVV == (int)OPR_DIV-(int)OPR_ADD); LJ_STATIC_ASSERT((int)BC_MODVV-(int)BC_ADDVV == (int)OPR_MOD-(int)OPR_ADD); +#ifdef LUA_USE_ASSERT +#define lj_assertFS(c, ...) (lj_assertG_(G(fs->L), (c), __VA_ARGS__)) +#else +#define lj_assertFS(c, ...) ((void)fs) +#endif + /* -- Error handling ------------------------------------------------------ */ LJ_NORET LJ_NOINLINE static void err_syntax(LexState *ls, ErrMsg em) { - lj_lex_error(ls, ls->token, em); + lj_lex_error(ls, ls->tok, em); } -LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken token) +LJ_NORET LJ_NOINLINE static void err_token(LexState *ls, LexToken tok) { - lj_lex_error(ls, ls->token, LJ_ERR_XTOKEN, lj_lex_token2str(ls, token)); + lj_lex_error(ls, ls->tok, LJ_ERR_XTOKEN, lj_lex_token2str(ls, tok)); } LJ_NORET static void err_limit(FuncState *fs, uint32_t limit, const char *what) @@ -198,7 +206,7 @@ static BCReg const_num(FuncState *fs, ExpDesc *e) { lua_State *L = fs->L; TValue *o; - lua_assert(expr_isnumk(e)); + lj_assertFS(expr_isnumk(e), "bad usage"); o = lj_tab_set(L, fs->kt, &e->u.nval); if (tvhaskslot(o)) return tvkslot(o); @@ -223,7 +231,7 @@ static BCReg const_gc(FuncState *fs, GCobj *gc, uint32_t itype) /* Add a string constant. */ static BCReg const_str(FuncState *fs, ExpDesc *e) { - lua_assert(expr_isstrk(e) || e->k == VGLOBAL); + lj_assertFS(expr_isstrk(e) || e->k == VGLOBAL, "bad usage"); return const_gc(fs, obj2gco(e->u.sval), LJ_TSTR); } @@ -311,7 +319,7 @@ static void jmp_patchins(FuncState *fs, BCPos pc, BCPos dest) { BCIns *jmp = &fs->bcbase[pc].ins; BCPos offset = dest-(pc+1)+BCBIAS_J; - lua_assert(dest != NO_JMP); + lj_assertFS(dest != NO_JMP, "uninitialized jump target"); if (offset > BCMAX_D) err_syntax(fs->ls, LJ_ERR_XJUMP); setbc_d(jmp, offset); @@ -360,7 +368,7 @@ static void jmp_patch(FuncState *fs, BCPos list, BCPos target) if (target == fs->pc) { jmp_tohere(fs, list); } else { - lua_assert(target < fs->pc); + lj_assertFS(target < fs->pc, "bad jump target"); jmp_patchval(fs, list, target, NO_REG, target); } } @@ -390,7 +398,7 @@ static void bcreg_free(FuncState *fs, BCReg reg) { if (reg >= fs->nactvar) { fs->freereg--; - lua_assert(reg == fs->freereg); + lj_assertFS(reg == fs->freereg, "bad regfree"); } } @@ -540,7 +548,7 @@ static void expr_toreg_nobranch(FuncState *fs, ExpDesc *e, BCReg reg) } else if (e->k <= VKTRUE) { ins = BCINS_AD(BC_KPRI, reg, const_pri(e)); } else { - lua_assert(e->k == VVOID || e->k == VJMP); + lj_assertFS(e->k == VVOID || e->k == VJMP, "bad expr type %d", e->k); return; } bcemit_INS(fs, ins); @@ -635,7 +643,7 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e) ins = BCINS_AD(BC_GSET, ra, const_str(fs, var)); } else { BCReg ra, rc; - lua_assert(var->k == VINDEXED); + lj_assertFS(var->k == VINDEXED, "bad expr type %d", var->k); ra = expr_toanyreg(fs, e); rc = var->u.s.aux; if ((int32_t)rc < 0) { @@ -643,10 +651,12 @@ static void bcemit_store(FuncState *fs, ExpDesc *var, ExpDesc *e) } else if (rc > BCMAX_C) { ins = BCINS_ABC(BC_TSETB, ra, var->u.s.info, rc-(BCMAX_C+1)); } else { +#ifdef LUA_USE_ASSERT /* Free late alloced key reg to avoid assert on free of value reg. */ /* This can only happen when called from expr_table(). */ - lua_assert(e->k != VNONRELOC || ra < fs->nactvar || - rc < ra || (bcreg_free(fs, rc),1)); + if (e->k == VNONRELOC && ra >= fs->nactvar && rc >= ra) + bcreg_free(fs, rc); +#endif ins = BCINS_ABC(BC_TSETV, ra, var->u.s.info, rc); } } @@ -660,16 +670,16 @@ static void bcemit_method(FuncState *fs, ExpDesc *e, ExpDesc *key) BCReg idx, func, obj = expr_toanyreg(fs, e); expr_free(fs, e); func = fs->freereg; - bcemit_AD(fs, BC_MOV, func+1, obj); /* Copy object to first argument. */ - lua_assert(expr_isstrk(key)); + bcemit_AD(fs, BC_MOV, func+1+LJ_FR2, obj); /* Copy object to 1st argument. */ + lj_assertFS(expr_isstrk(key), "bad usage"); idx = const_str(fs, key); if (idx <= BCMAX_C) { - bcreg_reserve(fs, 2); + bcreg_reserve(fs, 2+LJ_FR2); bcemit_ABC(fs, BC_TGETS, func, obj, idx); } else { - bcreg_reserve(fs, 3); - bcemit_AD(fs, BC_KSTR, func+2, idx); - bcemit_ABC(fs, BC_TGETV, func, obj, func+2); + bcreg_reserve(fs, 3+LJ_FR2); + bcemit_AD(fs, BC_KSTR, func+2+LJ_FR2, idx); + bcemit_ABC(fs, BC_TGETV, func, obj, func+2+LJ_FR2); fs->freereg--; } e->u.s.info = func; @@ -801,7 +811,8 @@ static void bcemit_arith(FuncState *fs, BinOpr opr, ExpDesc *e1, ExpDesc *e2) else rc = expr_toanyreg(fs, e2); /* 1st operand discharged by bcemit_binop_left, but need KNUM/KSHORT. */ - lua_assert(expr_isnumk(e1) || e1->k == VNONRELOC); + lj_assertFS(expr_isnumk(e1) || e1->k == VNONRELOC, + "bad expr type %d", e1->k); expr_toval(fs, e1); /* Avoid two consts to satisfy bytecode constraints. */ if (expr_isnumk(e1) && !expr_isnumk(e2) && @@ -889,19 +900,20 @@ static void bcemit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2) if (op <= OPR_POW) { bcemit_arith(fs, op, e1, e2); } else if (op == OPR_AND) { - lua_assert(e1->t == NO_JMP); /* List must be closed. */ + lj_assertFS(e1->t == NO_JMP, "jump list not closed"); expr_discharge(fs, e2); jmp_append(fs, &e2->f, e1->f); *e1 = *e2; } else if (op == OPR_OR) { - lua_assert(e1->f == NO_JMP); /* List must be closed. */ + lj_assertFS(e1->f == NO_JMP, "jump list not closed"); expr_discharge(fs, e2); jmp_append(fs, &e2->t, e1->t); *e1 = *e2; } else if (op == OPR_CONCAT) { expr_toval(fs, e2); if (e2->k == VRELOCABLE && bc_op(*bcptr(fs, e2)) == BC_CAT) { - lua_assert(e1->u.s.info == bc_b(*bcptr(fs, e2))-1); + lj_assertFS(e1->u.s.info == bc_b(*bcptr(fs, e2))-1, + "bad CAT stack layout"); expr_free(fs, e1); setbc_b(bcptr(fs, e2), e1->u.s.info); e1->u.s.info = e2->u.s.info; @@ -913,8 +925,9 @@ static void bcemit_binop(FuncState *fs, BinOpr op, ExpDesc *e1, ExpDesc *e2) } e1->k = VRELOCABLE; } else { - lua_assert(op == OPR_NE || op == OPR_EQ || - op == OPR_LT || op == OPR_GE || op == OPR_LE || op == OPR_GT); + lj_assertFS(op == OPR_NE || op == OPR_EQ || + op == OPR_LT || op == OPR_GE || op == OPR_LE || op == OPR_GT, + "bad binop %d", op); bcemit_comp(fs, op, e1, e2); } } @@ -943,10 +956,10 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e) e->u.s.info = fs->freereg-1; e->k = VNONRELOC; } else { - lua_assert(e->k == VNONRELOC); + lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k); } } else { - lua_assert(op == BC_UNM || op == BC_LEN); + lj_assertFS(op == BC_UNM || op == BC_LEN, "bad unop %d", op); if (op == BC_UNM && !expr_hasjump(e)) { /* Constant-fold negations. */ #if LJ_HASFFI if (e->k == VKCDATA) { /* Fold in-place since cdata is not interned. */ @@ -986,7 +999,7 @@ static void bcemit_unop(FuncState *fs, BCOp op, ExpDesc *e) /* Check and consume optional token. */ static int lex_opt(LexState *ls, LexToken tok) { - if (ls->token == tok) { + if (ls->tok == tok) { lj_lex_next(ls); return 1; } @@ -996,7 +1009,7 @@ static int lex_opt(LexState *ls, LexToken tok) /* Check and consume token. */ static void lex_check(LexState *ls, LexToken tok) { - if (ls->token != tok) + if (ls->tok != tok) err_token(ls, tok); lj_lex_next(ls); } @@ -1010,7 +1023,7 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line) } else { const char *swhat = lj_lex_token2str(ls, what); const char *swho = lj_lex_token2str(ls, who); - lj_lex_error(ls, ls->token, LJ_ERR_XMATCH, swhat, swho, line); + lj_lex_error(ls, ls->tok, LJ_ERR_XMATCH, swhat, swho, line); } } } @@ -1019,9 +1032,9 @@ static void lex_match(LexState *ls, LexToken what, LexToken who, BCLine line) static GCstr *lex_str(LexState *ls) { GCstr *s; - if (ls->token != TK_name && (LJ_52 || ls->token != TK_goto)) + if (ls->tok != TK_name && (LJ_52 || ls->tok != TK_goto)) err_token(ls, TK_name); - s = strV(&ls->tokenval); + s = strV(&ls->tokval); lj_lex_next(ls); return s; } @@ -1041,8 +1054,9 @@ static void var_new(LexState *ls, BCReg n, GCstr *name) lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK); lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo); } - lua_assert((uintptr_t)name < VARNAME__MAX || - lj_tab_getstr(fs->kt, name) != NULL); + lj_assertFS((uintptr_t)name < VARNAME__MAX || + lj_tab_getstr(fs->kt, name) != NULL, + "unanchored variable name"); /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */ setgcref(ls->vstack[vtop].name, obj2gco(name)); fs->varmap[fs->nactvar+n] = (uint16_t)vtop; @@ -1097,7 +1111,7 @@ static MSize var_lookup_uv(FuncState *fs, MSize vidx, ExpDesc *e) return i; /* Already exists. */ /* Otherwise create a new one. */ checklimit(fs, fs->nuv, LJ_MAX_UPVAL, "upvalues"); - lua_assert(e->k == VLOCAL || e->k == VUPVAL); + lj_assertFS(e->k == VLOCAL || e->k == VUPVAL, "bad expr type %d", e->k); fs->uvmap[n] = (uint16_t)vidx; fs->uvtmp[n] = (uint16_t)(e->k == VLOCAL ? vidx : LJ_MAX_VSTACK+e->u.s.info); fs->nuv = n+1; @@ -1148,7 +1162,8 @@ static MSize gola_new(LexState *ls, GCstr *name, uint8_t info, BCPos pc) lj_lex_error(ls, 0, LJ_ERR_XLIMC, LJ_MAX_VSTACK); lj_mem_growvec(ls->L, ls->vstack, ls->sizevstack, LJ_MAX_VSTACK, VarInfo); } - lua_assert(name == NAME_BREAK || lj_tab_getstr(fs->kt, name) != NULL); + lj_assertFS(name == NAME_BREAK || lj_tab_getstr(fs->kt, name) != NULL, + "unanchored label name"); /* NOBARRIER: name is anchored in fs->kt and ls->vstack is not a GCobj. */ setgcref(ls->vstack[vtop].name, obj2gco(name)); ls->vstack[vtop].startpc = pc; @@ -1178,8 +1193,9 @@ static void gola_close(LexState *ls, VarInfo *vg) FuncState *fs = ls->fs; BCPos pc = vg->startpc; BCIns *ip = &fs->bcbase[pc].ins; - lua_assert(gola_isgoto(vg)); - lua_assert(bc_op(*ip) == BC_JMP || bc_op(*ip) == BC_UCLO); + lj_assertFS(gola_isgoto(vg), "expected goto"); + lj_assertFS(bc_op(*ip) == BC_JMP || bc_op(*ip) == BC_UCLO, + "bad bytecode op %d", bc_op(*ip)); setbc_a(ip, vg->slot); if (bc_op(*ip) == BC_JMP) { BCPos next = jmp_next(fs, pc); @@ -1198,9 +1214,9 @@ static void gola_resolve(LexState *ls, FuncScope *bl, MSize idx) if (gcrefeq(vg->name, vl->name) && gola_isgoto(vg)) { if (vg->slot < vl->slot) { GCstr *name = strref(var_get(ls, ls->fs, vg->slot).name); - lua_assert((uintptr_t)name >= VARNAME__MAX); + lj_assertLS((uintptr_t)name >= VARNAME__MAX, "expected goto name"); ls->linenumber = ls->fs->bcbase[vg->startpc].line; - lua_assert(strref(vg->name) != NAME_BREAK); + lj_assertLS(strref(vg->name) != NAME_BREAK, "unexpected break"); lj_lex_error(ls, 0, LJ_ERR_XGSCOPE, strdata(strref(vg->name)), strdata(name)); } @@ -1264,7 +1280,7 @@ static void fscope_begin(FuncState *fs, FuncScope *bl, int flags) bl->vstart = fs->ls->vtop; bl->prev = fs->bl; fs->bl = bl; - lua_assert(fs->freereg == fs->nactvar); + lj_assertFS(fs->freereg == fs->nactvar, "bad regalloc"); } /* End a scope. */ @@ -1275,7 +1291,7 @@ static void fscope_end(FuncState *fs) fs->bl = bl->prev; var_remove(ls, bl->nactvar); fs->freereg = fs->nactvar; - lua_assert(bl->nactvar == fs->nactvar); + lj_assertFS(bl->nactvar == fs->nactvar, "bad regalloc"); if ((bl->flags & (FSCOPE_UPVAL|FSCOPE_NOCLOSE)) == FSCOPE_UPVAL) bcemit_AJ(fs, BC_UCLO, bl->nactvar, 0); if ((bl->flags & FSCOPE_BREAK)) { @@ -1362,13 +1378,13 @@ static void fs_fixup_k(FuncState *fs, GCproto *pt, void *kptr) Node *n = &node[i]; if (tvhaskslot(&n->val)) { ptrdiff_t kidx = (ptrdiff_t)tvkslot(&n->val); - lua_assert(!tvisint(&n->key)); + lj_assertFS(!tvisint(&n->key), "unexpected integer key"); if (tvisnum(&n->key)) { TValue *tv = &((TValue *)kptr)[kidx]; if (LJ_DUALNUM) { lua_Number nn = numV(&n->key); int32_t k = lj_num2int(nn); - lua_assert(!tvismzero(&n->key)); + lj_assertFS(!tvismzero(&n->key), "unexpected -0 key"); if ((lua_Number)k == nn) setintV(tv, k); else @@ -1416,98 +1432,66 @@ static void fs_fixup_line(FuncState *fs, GCproto *pt, uint8_t *li = (uint8_t *)lineinfo; do { BCLine delta = base[i].line - first; - lua_assert(delta >= 0 && delta < 256); + lj_assertFS(delta >= 0 && delta < 256, "bad line delta"); li[i] = (uint8_t)delta; } while (++i < n); } else if (LJ_LIKELY(numline < 65536)) { uint16_t *li = (uint16_t *)lineinfo; do { BCLine delta = base[i].line - first; - lua_assert(delta >= 0 && delta < 65536); + lj_assertFS(delta >= 0 && delta < 65536, "bad line delta"); li[i] = (uint16_t)delta; } while (++i < n); } else { uint32_t *li = (uint32_t *)lineinfo; do { BCLine delta = base[i].line - first; - lua_assert(delta >= 0); + lj_assertFS(delta >= 0, "bad line delta"); li[i] = (uint32_t)delta; } while (++i < n); } } -/* Resize buffer if needed. */ -static LJ_NOINLINE void fs_buf_resize(LexState *ls, MSize len) -{ - MSize sz = ls->sb.sz * 2; - while (ls->sb.n + len > sz) sz = sz * 2; - lj_str_resizebuf(ls->L, &ls->sb, sz); -} - -static LJ_AINLINE void fs_buf_need(LexState *ls, MSize len) -{ - if (LJ_UNLIKELY(ls->sb.n + len > ls->sb.sz)) - fs_buf_resize(ls, len); -} - -/* Add string to buffer. */ -static void fs_buf_str(LexState *ls, const char *str, MSize len) -{ - char *p = ls->sb.buf + ls->sb.n; - MSize i; - ls->sb.n += len; - for (i = 0; i < len; i++) p[i] = str[i]; -} - -/* Add ULEB128 value to buffer. */ -static void fs_buf_uleb128(LexState *ls, uint32_t v) -{ - MSize n = ls->sb.n; - uint8_t *p = (uint8_t *)ls->sb.buf; - for (; v >= 0x80; v >>= 7) - p[n++] = (uint8_t)((v & 0x7f) | 0x80); - p[n++] = (uint8_t)v; - ls->sb.n = n; -} - /* Prepare variable info for prototype. */ static size_t fs_prep_var(LexState *ls, FuncState *fs, size_t *ofsvar) { VarInfo *vs =ls->vstack, *ve; MSize i, n; BCPos lastpc; - lj_str_resetbuf(&ls->sb); /* Copy to temp. string buffer. */ + lj_buf_reset(&ls->sb); /* Copy to temp. string buffer. */ /* Store upvalue names. */ for (i = 0, n = fs->nuv; i < n; i++) { GCstr *s = strref(vs[fs->uvmap[i]].name); MSize len = s->len+1; - fs_buf_need(ls, len); - fs_buf_str(ls, strdata(s), len); + char *p = lj_buf_more(&ls->sb, len); + p = lj_buf_wmem(p, strdata(s), len); + ls->sb.w = p; } - *ofsvar = ls->sb.n; + *ofsvar = sbuflen(&ls->sb); lastpc = 0; /* Store local variable names and compressed ranges. */ for (ve = vs + ls->vtop, vs += fs->vbase; vs < ve; vs++) { if (!gola_isgotolabel(vs)) { GCstr *s = strref(vs->name); BCPos startpc; + char *p; if ((uintptr_t)s < VARNAME__MAX) { - fs_buf_need(ls, 1 + 2*5); - ls->sb.buf[ls->sb.n++] = (uint8_t)(uintptr_t)s; + p = lj_buf_more(&ls->sb, 1 + 2*5); + *p++ = (char)(uintptr_t)s; } else { MSize len = s->len+1; - fs_buf_need(ls, len + 2*5); - fs_buf_str(ls, strdata(s), len); + p = lj_buf_more(&ls->sb, len + 2*5); + p = lj_buf_wmem(p, strdata(s), len); } startpc = vs->startpc; - fs_buf_uleb128(ls, startpc-lastpc); - fs_buf_uleb128(ls, vs->endpc-startpc); + p = lj_strfmt_wuleb128(p, startpc-lastpc); + p = lj_strfmt_wuleb128(p, vs->endpc-startpc); + ls->sb.w = p; lastpc = startpc; } } - fs_buf_need(ls, 1); - ls->sb.buf[ls->sb.n++] = '\0'; /* Terminator for varinfo. */ - return ls->sb.n; + lj_buf_putb(&ls->sb, '\0'); /* Terminator for varinfo. */ + return sbuflen(&ls->sb); } /* Fixup variable info for prototype. */ @@ -1515,7 +1499,7 @@ static void fs_fixup_var(LexState *ls, GCproto *pt, uint8_t *p, size_t ofsvar) { setmref(pt->uvinfo, p); setmref(pt->varinfo, (char *)p + ofsvar); - memcpy(p, ls->sb.buf, ls->sb.n); /* Copy from temp. string buffer. */ + memcpy(p, ls->sb.b, sbuflen(&ls->sb)); /* Copy from temp. buffer. */ } #else @@ -1552,7 +1536,7 @@ static void fs_fixup_ret(FuncState *fs) } fs->bl->flags |= FSCOPE_NOCLOSE; /* Handled above. */ fscope_end(fs); - lua_assert(fs->bl == NULL); + lj_assertFS(fs->bl == NULL, "bad scope nesting"); /* May need to fixup returns encoded before first function was created. */ if (fs->flags & PROTO_FIXUP_RETURN) { BCPos pc; @@ -1624,7 +1608,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line) L->top--; /* Pop table of constants. */ ls->vtop = fs->vbase; /* Reset variable stack. */ ls->fs = fs->prev; - lua_assert(ls->fs != NULL || ls->token == TK_eof); + lj_assertL(ls->fs != NULL || ls->tok == TK_eof, "bad parser state"); return pt; } @@ -1718,15 +1702,15 @@ static void expr_bracket(LexState *ls, ExpDesc *v) } /* Get value of constant expression. */ -static void expr_kvalue(TValue *v, ExpDesc *e) +static void expr_kvalue(FuncState *fs, TValue *v, ExpDesc *e) { + UNUSED(fs); if (e->k <= VKTRUE) { - setitype(v, ~(uint32_t)e->k); + setpriV(v, ~(uint32_t)e->k); } else if (e->k == VKSTR) { - setgcref(v->gcr, obj2gco(e->u.sval)); - setitype(v, LJ_TSTR); + setgcVraw(v, obj2gco(e->u.sval), LJ_TSTR); } else { - lua_assert(tvisnumber(expr_numtv(e))); + lj_assertFS(tvisnumber(expr_numtv(e)), "bad number constant"); *v = *expr_numtv(e); } } @@ -1746,15 +1730,15 @@ static void expr_table(LexState *ls, ExpDesc *e) bcreg_reserve(fs, 1); freg++; lex_check(ls, '{'); - while (ls->token != '}') { + while (ls->tok != '}') { ExpDesc key, val; vcall = 0; - if (ls->token == '[') { + if (ls->tok == '[') { expr_bracket(ls, &key); /* Already calls expr_toval. */ if (!expr_isk(&key)) expr_index(fs, e, &key); if (expr_isnumk(&key) && expr_numiszero(&key)) needarr = 1; else nhash++; lex_check(ls, '='); - } else if ((ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) && + } else if ((ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) && lj_lex_lookahead(ls) == '=') { expr_str(ls, &key); lex_check(ls, '='); @@ -1776,11 +1760,11 @@ static void expr_table(LexState *ls, ExpDesc *e) fs->bcbase[pc].ins = BCINS_AD(BC_TDUP, freg-1, kidx); } vcall = 0; - expr_kvalue(&k, &key); + expr_kvalue(fs, &k, &key); v = lj_tab_set(fs->L, t, &k); lj_gc_anybarriert(fs->L, t); if (expr_isk_nojump(&val)) { /* Add const key/value to template table. */ - expr_kvalue(v, &val); + expr_kvalue(fs, v, &val); } else { /* Otherwise create dummy string key (avoids lj_tab_newkey). */ settabV(fs->L, v, t); /* Preserve key with table itself as value. */ fixt = 1; /* Fix this later, after all resizes. */ @@ -1799,8 +1783,9 @@ static void expr_table(LexState *ls, ExpDesc *e) if (vcall) { BCInsLine *ilp = &fs->bcbase[fs->pc-1]; ExpDesc en; - lua_assert(bc_a(ilp->ins) == freg && - bc_op(ilp->ins) == (narr > 256 ? BC_TSETV : BC_TSETB)); + lj_assertFS(bc_a(ilp->ins) == freg && + bc_op(ilp->ins) == (narr > 256 ? BC_TSETV : BC_TSETB), + "bad CALL code generation"); expr_init(&en, VKNUM, 0); en.u.nval.u32.lo = narr-1; en.u.nval.u32.hi = 0x43300000; /* Biased integer to avoid denormals. */ @@ -1830,7 +1815,7 @@ static void expr_table(LexState *ls, ExpDesc *e) for (i = 0; i <= hmask; i++) { Node *n = &node[i]; if (tvistab(&n->val)) { - lua_assert(tabV(&n->val) == t); + lj_assertFS(tabV(&n->val) == t, "bad dummy key in template table"); setnilV(&n->val); /* Turn value into nil. */ } } @@ -1847,11 +1832,11 @@ static BCReg parse_params(LexState *ls, int needself) lex_check(ls, '('); if (needself) var_new_lit(ls, nparams++, "self"); - if (ls->token != ')') { + if (ls->tok != ')') { do { - if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { + if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) { var_new(ls, nparams++, lex_str(ls)); - } else if (ls->token == TK_dots) { + } else if (ls->tok == TK_dots) { lj_lex_next(ls); fs->flags |= PROTO_VARARG; break; @@ -1861,7 +1846,7 @@ static BCReg parse_params(LexState *ls, int needself) } while (lex_opt(ls, ',')); } var_add(ls, nparams); - lua_assert(fs->nactvar == nparams); + lj_assertFS(fs->nactvar == nparams, "bad regalloc"); bcreg_reserve(fs, nparams); lex_check(ls, ')'); return nparams; @@ -1885,7 +1870,7 @@ static void parse_body(LexState *ls, ExpDesc *e, int needself, BCLine line) fs.bclim = pfs->bclim - pfs->pc; bcemit_AD(&fs, BC_FUNCF, 0, 0); /* Placeholder. */ parse_chunk(ls); - if (ls->token != TK_end) lex_match(ls, TK_end, TK_function, line); + if (ls->tok != TK_end) lex_match(ls, TK_end, TK_function, line); pt = fs_finish(ls, (ls->lastline = ls->linenumber)); pfs->bcbase = ls->bcstack + oldbase; /* May have been reallocated. */ pfs->bclim = (BCPos)(ls->sizebcstack - oldbase); @@ -1924,13 +1909,13 @@ static void parse_args(LexState *ls, ExpDesc *e) BCIns ins; BCReg base; BCLine line = ls->linenumber; - if (ls->token == '(') { + if (ls->tok == '(') { #if !LJ_52 if (line != ls->lastline) err_syntax(ls, LJ_ERR_XAMBIG); #endif lj_lex_next(ls); - if (ls->token == ')') { /* f(). */ + if (ls->tok == ')') { /* f(). */ args.k = VVOID; } else { expr_list(ls, &args); @@ -1938,24 +1923,24 @@ static void parse_args(LexState *ls, ExpDesc *e) setbc_b(bcptr(fs, &args), 0); /* Pass on multiple results. */ } lex_match(ls, ')', '(', line); - } else if (ls->token == '{') { + } else if (ls->tok == '{') { expr_table(ls, &args); - } else if (ls->token == TK_string) { + } else if (ls->tok == TK_string) { expr_init(&args, VKSTR, 0); - args.u.sval = strV(&ls->tokenval); + args.u.sval = strV(&ls->tokval); lj_lex_next(ls); } else { err_syntax(ls, LJ_ERR_XFUNARG); return; /* Silence compiler. */ } - lua_assert(e->k == VNONRELOC); + lj_assertFS(e->k == VNONRELOC, "bad expr type %d", e->k); base = e->u.s.info; /* Base register for call. */ if (args.k == VCALL) { - ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1); + ins = BCINS_ABC(BC_CALLM, base, 2, args.u.s.aux - base - 1 - LJ_FR2); } else { if (args.k != VVOID) expr_tonextreg(fs, &args); - ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base); + ins = BCINS_ABC(BC_CALL, base, 2, fs->freereg - base - LJ_FR2); } expr_init(e, VCALL, bcemit_INS(fs, ins)); e->u.s.aux = base; @@ -1968,33 +1953,34 @@ static void expr_primary(LexState *ls, ExpDesc *v) { FuncState *fs = ls->fs; /* Parse prefix expression. */ - if (ls->token == '(') { + if (ls->tok == '(') { BCLine line = ls->linenumber; lj_lex_next(ls); expr(ls, v); lex_match(ls, ')', '(', line); expr_discharge(ls->fs, v); - } else if (ls->token == TK_name || (!LJ_52 && ls->token == TK_goto)) { + } else if (ls->tok == TK_name || (!LJ_52 && ls->tok == TK_goto)) { var_lookup(ls, v); } else { err_syntax(ls, LJ_ERR_XSYMBOL); } for (;;) { /* Parse multiple expression suffixes. */ - if (ls->token == '.') { + if (ls->tok == '.') { expr_field(ls, v); - } else if (ls->token == '[') { + } else if (ls->tok == '[') { ExpDesc key; expr_toanyreg(fs, v); expr_bracket(ls, &key); expr_index(fs, v, &key); - } else if (ls->token == ':') { + } else if (ls->tok == ':') { ExpDesc key; lj_lex_next(ls); expr_str(ls, &key); bcemit_method(fs, v, &key); parse_args(ls, v); - } else if (ls->token == '(' || ls->token == TK_string || ls->token == '{') { + } else if (ls->tok == '(' || ls->tok == TK_string || ls->tok == '{') { expr_tonextreg(fs, v); + if (LJ_FR2) bcreg_reserve(fs, 1); parse_args(ls, v); } else { break; @@ -2005,14 +1991,14 @@ static void expr_primary(LexState *ls, ExpDesc *v) /* Parse simple expression. */ static void expr_simple(LexState *ls, ExpDesc *v) { - switch (ls->token) { + switch (ls->tok) { case TK_number: - expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokenval)) ? VKCDATA : VKNUM, 0); - copyTV(ls->L, &v->u.nval, &ls->tokenval); + expr_init(v, (LJ_HASFFI && tviscdata(&ls->tokval)) ? VKCDATA : VKNUM, 0); + copyTV(ls->L, &v->u.nval, &ls->tokval); break; case TK_string: expr_init(v, VKSTR, 0); - v->u.sval = strV(&ls->tokenval); + v->u.sval = strV(&ls->tokval); break; case TK_nil: expr_init(v, VKNIL, 0); @@ -2100,11 +2086,11 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit); static void expr_unop(LexState *ls, ExpDesc *v) { BCOp op; - if (ls->token == TK_not) { + if (ls->tok == TK_not) { op = BC_NOT; - } else if (ls->token == '-') { + } else if (ls->tok == '-') { op = BC_UNM; - } else if (ls->token == '#') { + } else if (ls->tok == '#') { op = BC_LEN; } else { expr_simple(ls, v); @@ -2121,7 +2107,7 @@ static BinOpr expr_binop(LexState *ls, ExpDesc *v, uint32_t limit) BinOpr op; synlevel_begin(ls); expr_unop(ls, v); - op = token2binop(ls->token); + op = token2binop(ls->tok); while (op != OPR_NOBINOPR && priority[op].left > limit) { ExpDesc v2; BinOpr nextop; @@ -2310,9 +2296,9 @@ static void parse_func(LexState *ls, BCLine line) lj_lex_next(ls); /* Skip 'function'. */ /* Parse function name. */ var_lookup(ls, &v); - while (ls->token == '.') /* Multiple dot-separated fields. */ + while (ls->tok == '.') /* Multiple dot-separated fields. */ expr_field(ls, &v); - if (ls->token == ':') { /* Optional colon to signify method call. */ + if (ls->tok == ':') { /* Optional colon to signify method call. */ needself = 1; expr_field(ls, &v); } @@ -2325,9 +2311,9 @@ static void parse_func(LexState *ls, BCLine line) /* -- Control transfer statements ----------------------------------------- */ /* Check for end of block. */ -static int endofblock(LexToken token) +static int parse_isend(LexToken tok) { - switch (token) { + switch (tok) { case TK_else: case TK_elseif: case TK_end: case TK_until: case TK_eof: return 1; default: @@ -2342,7 +2328,7 @@ static void parse_return(LexState *ls) FuncState *fs = ls->fs; lj_lex_next(ls); /* Skip 'return'. */ fs->flags |= PROTO_HAS_RETURN; - if (endofblock(ls->token) || ls->token == ';') { /* Bare return. */ + if (parse_isend(ls->tok) || ls->tok == ';') { /* Bare return. */ ins = BCINS_AD(BC_RET0, 0, 1); } else { /* Return with one or more values. */ ExpDesc e; /* Receives the _last_ expression in the list. */ @@ -2408,18 +2394,18 @@ static void parse_label(LexState *ls) lex_check(ls, TK_label); /* Recursively parse trailing statements: labels and ';' (Lua 5.2 only). */ for (;;) { - if (ls->token == TK_label) { + if (ls->tok == TK_label) { synlevel_begin(ls); parse_label(ls); synlevel_end(ls); - } else if (LJ_52 && ls->token == ';') { + } else if (LJ_52 && ls->tok == ';') { lj_lex_next(ls); } else { break; } } /* Trailing label is considered to be outside of scope. */ - if (endofblock(ls->token) && ls->token != TK_until) + if (parse_isend(ls->tok) && ls->tok != TK_until) ls->vstack[idx].slot = fs->bl->nactvar; gola_resolve(ls, fs->bl, idx); } @@ -2575,7 +2561,8 @@ static void parse_for_iter(LexState *ls, GCstr *indexname) lex_check(ls, TK_in); line = ls->linenumber; assign_adjust(ls, 3, expr_list(ls, &e), &e); - bcreg_bump(fs, 3); /* The iterator needs another 3 slots (func + 2 args). */ + /* The iterator needs another 3 [4] slots (func [pc] | state ctl). */ + bcreg_bump(fs, 3+LJ_FR2); isnext = (nvars <= 5 && predict_next(ls, fs, exprpc)); var_add(ls, 3); /* Hidden control variables. */ lex_check(ls, TK_do); @@ -2603,9 +2590,9 @@ static void parse_for(LexState *ls, BCLine line) fscope_begin(fs, &bl, FSCOPE_LOOP); lj_lex_next(ls); /* Skip 'for'. */ varname = lex_str(ls); /* Get first variable name. */ - if (ls->token == '=') + if (ls->tok == '=') parse_for_num(ls, varname, line); - else if (ls->token == ',' || ls->token == TK_in) + else if (ls->tok == ',' || ls->tok == TK_in) parse_for_iter(ls, varname); else err_syntax(ls, LJ_ERR_XFOR); @@ -2631,12 +2618,12 @@ static void parse_if(LexState *ls, BCLine line) BCPos flist; BCPos escapelist = NO_JMP; flist = parse_then(ls); - while (ls->token == TK_elseif) { /* Parse multiple 'elseif' blocks. */ + while (ls->tok == TK_elseif) { /* Parse multiple 'elseif' blocks. */ jmp_append(fs, &escapelist, bcemit_jmp(fs)); jmp_tohere(fs, flist); flist = parse_then(ls); } - if (ls->token == TK_else) { /* Parse optional 'else' block. */ + if (ls->tok == TK_else) { /* Parse optional 'else' block. */ jmp_append(fs, &escapelist, bcemit_jmp(fs)); jmp_tohere(fs, flist); lj_lex_next(ls); /* Skip 'else'. */ @@ -2654,7 +2641,7 @@ static void parse_if(LexState *ls, BCLine line) static int parse_stmt(LexState *ls) { BCLine line = ls->linenumber; - switch (ls->token) { + switch (ls->tok) { case TK_if: parse_if(ls, line); break; @@ -2713,11 +2700,12 @@ static void parse_chunk(LexState *ls) { int islast = 0; synlevel_begin(ls); - while (!islast && !endofblock(ls->token)) { + while (!islast && !parse_isend(ls->tok)) { islast = parse_stmt(ls); lex_opt(ls, ';'); - lua_assert(ls->fs->framesize >= ls->fs->freereg && - ls->fs->freereg >= ls->fs->nactvar); + lj_assertLS(ls->fs->framesize >= ls->fs->freereg && + ls->fs->freereg >= ls->fs->nactvar, + "bad regalloc"); ls->fs->freereg = ls->fs->nactvar; /* Free registers after each stmt. */ } synlevel_end(ls); @@ -2748,13 +2736,12 @@ GCproto *lj_parse(LexState *ls) bcemit_AD(&fs, BC_FUNCV, 0, 0); /* Placeholder. */ lj_lex_next(ls); /* Read-ahead first token. */ parse_chunk(ls); - if (ls->token != TK_eof) + if (ls->tok != TK_eof) err_token(ls, TK_eof); pt = fs_finish(ls, ls->linenumber); L->top--; /* Drop chunkname. */ - lua_assert(fs.prev == NULL); - lua_assert(ls->fs == NULL); - lua_assert(pt->sizeuv == 0); + lj_assertL(fs.prev == NULL && ls->fs == NULL, "mismatched frame nesting"); + lj_assertL(pt->sizeuv == 0, "toplevel proto has upvalues"); return pt; } diff --git a/src/lj_prng.c b/src/lj_prng.c new file mode 100644 index 00000000..01935e57 --- /dev/null +++ b/src/lj_prng.c @@ -0,0 +1,259 @@ +/* +** Pseudo-random number generation. +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_prng_c +#define LUA_CORE + +/* To get the syscall prototype. */ +#if defined(__linux__) && !defined(_GNU_SOURCE) +#define _GNU_SOURCE +#endif + +#include "lj_def.h" +#include "lj_arch.h" +#include "lj_prng.h" + +/* -- PRNG step function -------------------------------------------------- */ + +/* This implements a Tausworthe PRNG with period 2^223. Based on: +** Tables of maximally-equidistributed combined LFSR generators, +** Pierre L'Ecuyer, 1991, table 3, 1st entry. +** Full-period ME-CF generator with L=64, J=4, k=223, N1=49. +** +** Important note: This PRNG is NOT suitable for cryptographic use! +** +** But it works fine for math.random(), which has an API that's not +** suitable for cryptography, anyway. +** +** When used as a securely seeded global PRNG, it substantially raises +** the difficulty for various attacks on the VM. +*/ + +/* Update generator i and compute a running xor of all states. */ +#define TW223_GEN(rs, z, r, i, k, q, s) \ + z = rs->u[i]; \ + z = (((z<<q)^z) >> (k-s)) ^ ((z&((uint64_t)(int64_t)-1 << (64-k)))<<s); \ + r ^= z; rs->u[i] = z; + +#define TW223_STEP(rs, z, r) \ + TW223_GEN(rs, z, r, 0, 63, 31, 18) \ + TW223_GEN(rs, z, r, 1, 58, 19, 28) \ + TW223_GEN(rs, z, r, 2, 55, 24, 7) \ + TW223_GEN(rs, z, r, 3, 47, 21, 8) + +/* PRNG step function with uint64_t result. */ +LJ_NOINLINE uint64_t LJ_FASTCALL lj_prng_u64(PRNGState *rs) +{ + uint64_t z, r = 0; + TW223_STEP(rs, z, r) + return r; +} + +/* PRNG step function with double in uint64_t result. */ +LJ_NOINLINE uint64_t LJ_FASTCALL lj_prng_u64d(PRNGState *rs) +{ + uint64_t z, r = 0; + TW223_STEP(rs, z, r) + /* Returns a double bit pattern in the range 1.0 <= d < 2.0. */ + return (r & U64x(000fffff,ffffffff)) | U64x(3ff00000,00000000); +} + +/* Condition seed: ensure k[i] MSB of u[i] are non-zero. */ +static LJ_AINLINE void lj_prng_condition(PRNGState *rs) +{ + if (rs->u[0] < (1u << 1)) rs->u[0] += (1u << 1); + if (rs->u[1] < (1u << 6)) rs->u[1] += (1u << 6); + if (rs->u[2] < (1u << 9)) rs->u[2] += (1u << 9); + if (rs->u[3] < (1u << 17)) rs->u[3] += (1u << 17); +} + +/* -- PRNG seeding from OS ------------------------------------------------ */ + +#if LUAJIT_SECURITY_PRNG == 0 + +/* Nothing to define. */ + +#elif LJ_TARGET_XBOX360 + +extern int XNetRandom(void *buf, unsigned int len); + +#elif LJ_TARGET_PS3 + +extern int sys_get_random_number(void *buf, uint64_t len); + +#elif LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA + +extern int sceRandomGetRandomNumber(void *buf, size_t len); + +#elif LJ_TARGET_NX + +#include <unistd.h> + +#elif LJ_TARGET_WINDOWS || LJ_TARGET_XBOXONE + +#define WIN32_LEAN_AND_MEAN +#include <windows.h> + +#if LJ_TARGET_UWP || LJ_TARGET_XBOXONE +/* Must use BCryptGenRandom. */ +#include <bcrypt.h> +#pragma comment(lib, "bcrypt.lib") +#else +/* If you wonder about this mess, then search online for RtlGenRandom. */ +typedef BOOLEAN (WINAPI *PRGR)(void *buf, ULONG len); +static PRGR libfunc_rgr; +#endif + +#elif LJ_TARGET_POSIX + +#if LJ_TARGET_LINUX +/* Avoid a dependency on glibc 2.25+ and use the getrandom syscall instead. */ +#include <sys/syscall.h> +#else + +#if LJ_TARGET_OSX && !LJ_TARGET_IOS +/* +** In their infinite wisdom Apple decided to disallow getentropy() in the +** iOS App Store. Even though the call is common to all BSD-ish OS, it's +** recommended by Apple in their own security-related docs, and, to top +** off the foolery, /dev/urandom is handled by the same kernel code, +** yet accessing it is actually permitted (but less efficient). +*/ +#include <Availability.h> +#if __MAC_OS_X_VERSION_MIN_REQUIRED >= 101200 +#define LJ_TARGET_HAS_GETENTROPY 1 +#endif +#elif (LJ_TARGET_BSD && !defined(__NetBSD__)) || LJ_TARGET_SOLARIS || LJ_TARGET_CYGWIN || LJ_TARGET_QNX +#define LJ_TARGET_HAS_GETENTROPY 1 +#endif + +#if LJ_TARGET_HAS_GETENTROPY +extern int getentropy(void *buf, size_t len) +#ifdef __ELF__ + __attribute__((weak)) +#endif +; +#endif + +#endif + +/* For the /dev/urandom fallback. */ +#include <fcntl.h> +#include <unistd.h> + +#endif + +#if LUAJIT_SECURITY_PRNG == 0 + +/* If you really don't care about security, then define +** LUAJIT_SECURITY_PRNG=0. This yields a predictable seed +** and provides NO SECURITY against various attacks on the VM. +** +** BTW: This is NOT the way to get predictable table iteration, +** predictable trace generation, predictable bytecode generation, etc. +*/ +int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs) +{ + lj_prng_seed_fixed(rs); /* The fixed seed is already conditioned. */ + return 1; +} + +#else + +/* Securely seed PRNG from system entropy. Returns 0 on failure. */ +int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs) +{ +#if LJ_TARGET_XBOX360 + + if (XNetRandom(rs->u, (unsigned int)sizeof(rs->u)) == 0) + goto ok; + +#elif LJ_TARGET_PS3 + + if (sys_get_random_number(rs->u, sizeof(rs->u)) == 0) + goto ok; + +#elif LJ_TARGET_PS4 || LJ_TARGET_PS5 || LJ_TARGET_PSVITA + + if (sceRandomGetRandomNumber(rs->u, sizeof(rs->u)) == 0) + goto ok; + +#elif LJ_TARGET_NX + + if (getentropy(rs->u, sizeof(rs->u)) == 0) + goto ok; + +#elif LJ_TARGET_UWP || LJ_TARGET_XBOXONE + + if (BCryptGenRandom(NULL, (PUCHAR)(rs->u), (ULONG)sizeof(rs->u), + BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0) + goto ok; + +#elif LJ_TARGET_WINDOWS + + /* Keep the library loaded in case multiple VMs are started. */ + if (!libfunc_rgr) { + HMODULE lib = LJ_WIN_LOADLIBA("advapi32.dll"); + if (!lib) return 0; + libfunc_rgr = (PRGR)GetProcAddress(lib, "SystemFunction036"); + if (!libfunc_rgr) return 0; + } + if (libfunc_rgr(rs->u, (ULONG)sizeof(rs->u))) + goto ok; + +#elif LJ_TARGET_POSIX + +#if LJ_TARGET_LINUX && defined(SYS_getrandom) + + if (syscall(SYS_getrandom, rs->u, sizeof(rs->u), 0) == (long)sizeof(rs->u)) + goto ok; + +#elif LJ_TARGET_HAS_GETENTROPY + +#ifdef __ELF__ + if (&getentropy && getentropy(rs->u, sizeof(rs->u)) == 0) + goto ok; +#else + if (getentropy(rs->u, sizeof(rs->u)) == 0) + goto ok; +#endif + +#endif + + /* Fallback to /dev/urandom. This may fail if the device is not + ** existent or accessible in a chroot or container, or if the process + ** or the OS ran out of file descriptors. + */ + { + int fd = open("/dev/urandom", O_RDONLY|O_CLOEXEC); + if (fd != -1) { + ssize_t n = read(fd, rs->u, sizeof(rs->u)); + (void)close(fd); + if (n == (ssize_t)sizeof(rs->u)) + goto ok; + } + } + +#else + + /* Add an elif above for your OS with a secure PRNG seed. + ** Note that fiddling around with rand(), getpid(), time() or coercing + ** ASLR to yield a few bits of randomness is not helpful. + ** If you don't want any security, then don't pretend you have any + ** and simply define LUAJIT_SECURITY_PRNG=0 for the build. + */ +#error "Missing secure PRNG seed for this OS" + +#endif + return 0; /* Fail. */ + +ok: + lj_prng_condition(rs); + (void)lj_prng_u64(rs); + return 1; /* Success. */ +} + +#endif + diff --git a/src/lj_prng.h b/src/lj_prng.h new file mode 100644 index 00000000..bdc958ab --- /dev/null +++ b/src/lj_prng.h @@ -0,0 +1,24 @@ +/* +** Pseudo-random number generation. +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_PRNG_H +#define _LJ_PRNG_H + +#include "lj_def.h" + +LJ_FUNC int LJ_FASTCALL lj_prng_seed_secure(PRNGState *rs); +LJ_FUNC uint64_t LJ_FASTCALL lj_prng_u64(PRNGState *rs); +LJ_FUNC uint64_t LJ_FASTCALL lj_prng_u64d(PRNGState *rs); + +/* This is just the precomputed result of lib_math.c:random_seed(rs, 0.0). */ +static LJ_AINLINE void lj_prng_seed_fixed(PRNGState *rs) +{ + rs->u[0] = U64x(a0d27757,0a345b8c); + rs->u[1] = U64x(764a296c,5d4aa64f); + rs->u[2] = U64x(51220704,070adeaa); + rs->u[3] = U64x(2a2717b5,a7b7b927); +} + +#endif diff --git a/src/lj_profile.c b/src/lj_profile.c new file mode 100644 index 00000000..4a13537d --- /dev/null +++ b/src/lj_profile.c @@ -0,0 +1,371 @@ +/* +** Low-overhead profiling. +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_profile_c +#define LUA_CORE + +#include "lj_obj.h" + +#if LJ_HASPROFILE + +#include "lj_buf.h" +#include "lj_frame.h" +#include "lj_debug.h" +#include "lj_dispatch.h" +#if LJ_HASJIT +#include "lj_jit.h" +#include "lj_trace.h" +#endif +#include "lj_profile.h" + +#include "luajit.h" + +#if LJ_PROFILE_SIGPROF + +#include <sys/time.h> +#include <signal.h> +#define profile_lock(ps) UNUSED(ps) +#define profile_unlock(ps) UNUSED(ps) + +#elif LJ_PROFILE_PTHREAD + +#include <pthread.h> +#include <time.h> +#if LJ_TARGET_PS3 +#include <sys/timer.h> +#endif +#define profile_lock(ps) pthread_mutex_lock(&ps->lock) +#define profile_unlock(ps) pthread_mutex_unlock(&ps->lock) + +#elif LJ_PROFILE_WTHREAD + +#define WIN32_LEAN_AND_MEAN +#if LJ_TARGET_XBOX360 +#include <xtl.h> +#include <xbox.h> +#else +#include <windows.h> +#endif +typedef unsigned int (WINAPI *WMM_TPFUNC)(unsigned int); +#define profile_lock(ps) EnterCriticalSection(&ps->lock) +#define profile_unlock(ps) LeaveCriticalSection(&ps->lock) + +#endif + +/* Profiler state. */ +typedef struct ProfileState { + global_State *g; /* VM state that started the profiler. */ + luaJIT_profile_callback cb; /* Profiler callback. */ + void *data; /* Profiler callback data. */ + SBuf sb; /* String buffer for stack dumps. */ + int interval; /* Sample interval in milliseconds. */ + int samples; /* Number of samples for next callback. */ + int vmstate; /* VM state when profile timer triggered. */ +#if LJ_PROFILE_SIGPROF + struct sigaction oldsa; /* Previous SIGPROF state. */ +#elif LJ_PROFILE_PTHREAD + pthread_mutex_t lock; /* g->hookmask update lock. */ + pthread_t thread; /* Timer thread. */ + int abort; /* Abort timer thread. */ +#elif LJ_PROFILE_WTHREAD +#if LJ_TARGET_WINDOWS + HINSTANCE wmm; /* WinMM library handle. */ + WMM_TPFUNC wmm_tbp; /* WinMM timeBeginPeriod function. */ + WMM_TPFUNC wmm_tep; /* WinMM timeEndPeriod function. */ +#endif + CRITICAL_SECTION lock; /* g->hookmask update lock. */ + HANDLE thread; /* Timer thread. */ + int abort; /* Abort timer thread. */ +#endif +} ProfileState; + +/* Sadly, we have to use a static profiler state. +** +** The SIGPROF variant needs a static pointer to the global state, anyway. +** And it would be hard to extend for multiple threads. You can still use +** multiple VMs in multiple threads, but only profile one at a time. +*/ +static ProfileState profile_state; + +/* Default sample interval in milliseconds. */ +#define LJ_PROFILE_INTERVAL_DEFAULT 10 + +/* -- Profiler/hook interaction ------------------------------------------- */ + +#if !LJ_PROFILE_SIGPROF +void LJ_FASTCALL lj_profile_hook_enter(global_State *g) +{ + ProfileState *ps = &profile_state; + if (ps->g) { + profile_lock(ps); + hook_enter(g); + profile_unlock(ps); + } else { + hook_enter(g); + } +} + +void LJ_FASTCALL lj_profile_hook_leave(global_State *g) +{ + ProfileState *ps = &profile_state; + if (ps->g) { + profile_lock(ps); + hook_leave(g); + profile_unlock(ps); + } else { + hook_leave(g); + } +} +#endif + +/* -- Profile callbacks --------------------------------------------------- */ + +/* Callback from profile hook (HOOK_PROFILE already cleared). */ +void LJ_FASTCALL lj_profile_interpreter(lua_State *L) +{ + ProfileState *ps = &profile_state; + global_State *g = G(L); + uint8_t mask; + profile_lock(ps); + mask = (g->hookmask & ~HOOK_PROFILE); + if (!(mask & HOOK_VMEVENT)) { + int samples = ps->samples; + ps->samples = 0; + g->hookmask = HOOK_VMEVENT; + lj_dispatch_update(g); + profile_unlock(ps); + ps->cb(ps->data, L, samples, ps->vmstate); /* Invoke user callback. */ + profile_lock(ps); + mask |= (g->hookmask & HOOK_PROFILE); + } + g->hookmask = mask; + lj_dispatch_update(g); + profile_unlock(ps); +} + +/* Trigger profile hook. Asynchronous call from OS-specific profile timer. */ +static void profile_trigger(ProfileState *ps) +{ + global_State *g = ps->g; + uint8_t mask; + profile_lock(ps); + ps->samples++; /* Always increment number of samples. */ + mask = g->hookmask; + if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT|HOOK_GC))) { /* Set profile hook. */ + int st = g->vmstate; + ps->vmstate = st >= 0 ? 'N' : + st == ~LJ_VMST_INTERP ? 'I' : + st == ~LJ_VMST_C ? 'C' : + st == ~LJ_VMST_GC ? 'G' : 'J'; + g->hookmask = (mask | HOOK_PROFILE); + lj_dispatch_update(g); + } + profile_unlock(ps); +} + +/* -- OS-specific profile timer handling ---------------------------------- */ + +#if LJ_PROFILE_SIGPROF + +/* SIGPROF handler. */ +static void profile_signal(int sig) +{ + UNUSED(sig); + profile_trigger(&profile_state); +} + +/* Start profiling timer. */ +static void profile_timer_start(ProfileState *ps) +{ + int interval = ps->interval; + struct itimerval tm; + struct sigaction sa; + tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000; + tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000; + setitimer(ITIMER_PROF, &tm, NULL); +#if LJ_TARGET_QNX + sa.sa_flags = 0; +#else + sa.sa_flags = SA_RESTART; +#endif + sa.sa_handler = profile_signal; + sigemptyset(&sa.sa_mask); + sigaction(SIGPROF, &sa, &ps->oldsa); +} + +/* Stop profiling timer. */ +static void profile_timer_stop(ProfileState *ps) +{ + struct itimerval tm; + tm.it_value.tv_sec = tm.it_interval.tv_sec = 0; + tm.it_value.tv_usec = tm.it_interval.tv_usec = 0; + setitimer(ITIMER_PROF, &tm, NULL); + sigaction(SIGPROF, &ps->oldsa, NULL); +} + +#elif LJ_PROFILE_PTHREAD + +/* POSIX timer thread. */ +static void *profile_thread(ProfileState *ps) +{ + int interval = ps->interval; +#if !LJ_TARGET_PS3 + struct timespec ts; + ts.tv_sec = interval / 1000; + ts.tv_nsec = (interval % 1000) * 1000000; +#endif + while (1) { +#if LJ_TARGET_PS3 + sys_timer_usleep(interval * 1000); +#else + nanosleep(&ts, NULL); +#endif + if (ps->abort) break; + profile_trigger(ps); + } + return NULL; +} + +/* Start profiling timer thread. */ +static void profile_timer_start(ProfileState *ps) +{ + pthread_mutex_init(&ps->lock, 0); + ps->abort = 0; + pthread_create(&ps->thread, NULL, (void *(*)(void *))profile_thread, ps); +} + +/* Stop profiling timer thread. */ +static void profile_timer_stop(ProfileState *ps) +{ + ps->abort = 1; + pthread_join(ps->thread, NULL); + pthread_mutex_destroy(&ps->lock); +} + +#elif LJ_PROFILE_WTHREAD + +/* Windows timer thread. */ +static DWORD WINAPI profile_thread(void *psx) +{ + ProfileState *ps = (ProfileState *)psx; + int interval = ps->interval; +#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP + ps->wmm_tbp(interval); +#endif + while (1) { + Sleep(interval); + if (ps->abort) break; + profile_trigger(ps); + } +#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP + ps->wmm_tep(interval); +#endif + return 0; +} + +/* Start profiling timer thread. */ +static void profile_timer_start(ProfileState *ps) +{ +#if LJ_TARGET_WINDOWS && !LJ_TARGET_UWP + if (!ps->wmm) { /* Load WinMM library on-demand. */ + ps->wmm = LJ_WIN_LOADLIBA("winmm.dll"); + if (ps->wmm) { + ps->wmm_tbp = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeBeginPeriod"); + ps->wmm_tep = (WMM_TPFUNC)GetProcAddress(ps->wmm, "timeEndPeriod"); + if (!ps->wmm_tbp || !ps->wmm_tep) { + ps->wmm = NULL; + return; + } + } + } +#endif + InitializeCriticalSection(&ps->lock); + ps->abort = 0; + ps->thread = CreateThread(NULL, 0, profile_thread, ps, 0, NULL); +} + +/* Stop profiling timer thread. */ +static void profile_timer_stop(ProfileState *ps) +{ + ps->abort = 1; + WaitForSingleObject(ps->thread, INFINITE); + DeleteCriticalSection(&ps->lock); +} + +#endif + +/* -- Public profiling API ------------------------------------------------ */ + +/* Start profiling. */ +LUA_API void luaJIT_profile_start(lua_State *L, const char *mode, + luaJIT_profile_callback cb, void *data) +{ + ProfileState *ps = &profile_state; + int interval = LJ_PROFILE_INTERVAL_DEFAULT; + while (*mode) { + int m = *mode++; + switch (m) { + case 'i': + interval = 0; + while (*mode >= '0' && *mode <= '9') + interval = interval * 10 + (*mode++ - '0'); + if (interval <= 0) interval = 1; + break; +#if LJ_HASJIT + case 'l': case 'f': + L2J(L)->prof_mode = m; + lj_trace_flushall(L); + break; +#endif + default: /* Ignore unknown mode chars. */ + break; + } + } + if (ps->g) { + luaJIT_profile_stop(L); + if (ps->g) return; /* Profiler in use by another VM. */ + } + ps->g = G(L); + ps->interval = interval; + ps->cb = cb; + ps->data = data; + ps->samples = 0; + lj_buf_init(L, &ps->sb); + profile_timer_start(ps); +} + +/* Stop profiling. */ +LUA_API void luaJIT_profile_stop(lua_State *L) +{ + ProfileState *ps = &profile_state; + global_State *g = ps->g; + if (G(L) == g) { /* Only stop profiler if started by this VM. */ + profile_timer_stop(ps); + g->hookmask &= ~HOOK_PROFILE; + lj_dispatch_update(g); +#if LJ_HASJIT + G2J(g)->prof_mode = 0; + lj_trace_flushall(L); +#endif + lj_buf_free(g, &ps->sb); + ps->sb.w = ps->sb.e = NULL; + ps->g = NULL; + } +} + +/* Return a compact stack dump. */ +LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt, + int depth, size_t *len) +{ + ProfileState *ps = &profile_state; + SBuf *sb = &ps->sb; + setsbufL(sb, L); + lj_buf_reset(sb); + lj_debug_dumpstack(L, sb, fmt, depth); + *len = (size_t)sbuflen(sb); + return sb->b; +} + +#endif diff --git a/src/lj_profile.h b/src/lj_profile.h new file mode 100644 index 00000000..3969f8e8 --- /dev/null +++ b/src/lj_profile.h @@ -0,0 +1,21 @@ +/* +** Low-overhead profiling. +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_PROFILE_H +#define _LJ_PROFILE_H + +#include "lj_obj.h" + +#if LJ_HASPROFILE + +LJ_FUNC void LJ_FASTCALL lj_profile_interpreter(lua_State *L); +#if !LJ_PROFILE_SIGPROF +LJ_FUNC void LJ_FASTCALL lj_profile_hook_enter(global_State *g); +LJ_FUNC void LJ_FASTCALL lj_profile_hook_leave(global_State *g); +#endif + +#endif + +#endif diff --git a/src/lj_record.c b/src/lj_record.c index f7552db0..bfd41236 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -20,6 +20,9 @@ #endif #include "lj_bc.h" #include "lj_ff.h" +#if LJ_HASPROFILE +#include "lj_debug.h" +#endif #include "lj_ir.h" #include "lj_jit.h" #include "lj_ircall.h" @@ -30,6 +33,7 @@ #include "lj_snap.h" #include "lj_dispatch.h" #include "lj_vm.h" +#include "lj_prng.h" /* Some local macros to save typing. Undef'd at the end. */ #define IR(ref) (&J->cur.ir[(ref)]) @@ -47,31 +51,52 @@ static void rec_check_ir(jit_State *J) { IRRef i, nins = J->cur.nins, nk = J->cur.nk; - lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536); - for (i = nins-1; i >= nk; i--) { + lj_assertJ(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536, + "inconsistent IR layout"); + for (i = nk; i < nins; i++) { IRIns *ir = IR(i); uint32_t mode = lj_ir_mode[ir->o]; IRRef op1 = ir->op1; IRRef op2 = ir->op2; + const char *err = NULL; switch (irm_op1(mode)) { - case IRMnone: lua_assert(op1 == 0); break; - case IRMref: lua_assert(op1 >= nk); - lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; + case IRMnone: + if (op1 != 0) err = "IRMnone op1 used"; + break; + case IRMref: + if (op1 < nk || (i >= REF_BIAS ? op1 >= i : op1 <= i)) + err = "IRMref op1 out of range"; + break; case IRMlit: break; - case IRMcst: lua_assert(i < REF_BIAS); continue; + case IRMcst: + if (i >= REF_BIAS) { err = "constant in IR range"; break; } + if (irt_is64(ir->t) && ir->o != IR_KNULL) + i++; + continue; } switch (irm_op2(mode)) { - case IRMnone: lua_assert(op2 == 0); break; - case IRMref: lua_assert(op2 >= nk); - lua_assert(i >= REF_BIAS ? op2 < i : op2 > i); break; + case IRMnone: + if (op2) err = "IRMnone op2 used"; + break; + case IRMref: + if (op2 < nk || (i >= REF_BIAS ? op2 >= i : op2 <= i)) + err = "IRMref op2 out of range"; + break; case IRMlit: break; - case IRMcst: lua_assert(0); break; + case IRMcst: err = "IRMcst op2"; break; } - if (ir->prev) { - lua_assert(ir->prev >= nk); - lua_assert(i >= REF_BIAS ? ir->prev < i : ir->prev > i); - lua_assert(ir->o == IR_NOP || IR(ir->prev)->o == ir->o); + if (!err && ir->prev) { + if (ir->prev < nk || (i >= REF_BIAS ? ir->prev >= i : ir->prev <= i)) + err = "chain out of range"; + else if (ir->o != IR_NOP && IR(ir->prev)->o != ir->o) + err = "chain to different op"; } + lj_assertJ(!err, "bad IR %04d op %d(%04d,%04d): %s", + i-REF_BIAS, + ir->o, + irm_op1(mode) == IRMref ? op1-REF_BIAS : op1, + irm_op2(mode) == IRMref ? op2-REF_BIAS : op2, + err); } } @@ -81,48 +106,79 @@ static void rec_check_slots(jit_State *J) BCReg s, nslots = J->baseslot + J->maxslot; int32_t depth = 0; cTValue *base = J->L->base - J->baseslot; - lua_assert(J->baseslot >= 1); - lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME)); - lua_assert(nslots <= LJ_MAX_JSLOTS); + lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot"); + lj_assertJ(J->baseslot == 1+LJ_FR2 || (J->slot[J->baseslot-1] & TREF_FRAME), + "baseslot does not point to frame"); + lj_assertJ(nslots <= LJ_MAX_JSLOTS, "slot overflow"); for (s = 0; s < nslots; s++) { TRef tr = J->slot[s]; if (tr) { cTValue *tv = &base[s]; IRRef ref = tref_ref(tr); - IRIns *ir; - lua_assert(ref >= J->cur.nk && ref < J->cur.nins); - ir = IR(ref); - lua_assert(irt_t(ir->t) == tref_t(tr)); + IRIns *ir = NULL; /* Silence compiler. */ + if (!LJ_FR2 || ref || !(tr & (TREF_FRAME | TREF_CONT))) { + lj_assertJ(ref >= J->cur.nk && ref < J->cur.nins, + "slot %d ref %04d out of range", s, ref - REF_BIAS); + ir = IR(ref); + lj_assertJ(irt_t(ir->t) == tref_t(tr), "slot %d IR type mismatch", s); + } if (s == 0) { - lua_assert(tref_isfunc(tr)); + lj_assertJ(tref_isfunc(tr), "frame slot 0 is not a function"); +#if LJ_FR2 + } else if (s == 1) { + lj_assertJ((tr & ~TREF_FRAME) == 0, "bad frame slot 1"); +#endif } else if ((tr & TREF_FRAME)) { GCfunc *fn = gco2func(frame_gc(tv)); BCReg delta = (BCReg)(tv - frame_prev(tv)); - lua_assert(tref_isfunc(tr)); - if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); - lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta)); +#if LJ_FR2 + lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64, + "frame slot %d PC mismatch", s); + tr = J->slot[s-1]; + ir = IR(tref_ref(tr)); +#endif + lj_assertJ(tref_isfunc(tr), + "frame slot %d is not a function", s-LJ_FR2); + lj_assertJ(!tref_isk(tr) || fn == ir_kfunc(ir), + "frame slot %d function mismatch", s-LJ_FR2); + lj_assertJ(s > delta + LJ_FR2 ? (J->slot[s-delta] & TREF_FRAME) + : (s == delta + LJ_FR2), + "frame slot %d broken chain", s-LJ_FR2); depth++; } else if ((tr & TREF_CONT)) { - lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); - lua_assert((J->slot[s+1] & TREF_FRAME)); +#if LJ_FR2 + lj_assertJ(!ref || ir_knum(ir)->u64 == tv->u64, + "cont slot %d continuation mismatch", s); +#else + lj_assertJ(ir_kptr(ir) == gcrefp(tv->gcr, void), + "cont slot %d continuation mismatch", s); +#endif + lj_assertJ((J->slot[s+1+LJ_FR2] & TREF_FRAME), + "cont slot %d not followed by frame", s); depth++; + } else if ((tr & TREF_KEYINDEX)) { + lj_assertJ(tref_isint(tr), "keyindex slot %d bad type %d", + s, tref_type(tr)); } else { - if (tvisnumber(tv)) - lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */ - else - lua_assert(itype2irt(tv) == tref_type(tr)); + /* Number repr. may differ, but other types must be the same. */ + lj_assertJ(tvisnumber(tv) ? tref_isnumber(tr) : + itype2irt(tv) == tref_type(tr), + "slot %d type mismatch: stack type %d vs IR type %d", + s, itypemap(tv), tref_type(tr)); if (tref_isk(tr)) { /* Compare constants. */ TValue tvk; lj_ir_kvalue(J->L, &tvk, ir); - if (!(tvisnum(&tvk) && tvisnan(&tvk))) - lua_assert(lj_obj_equal(tv, &tvk)); - else - lua_assert(tvisnum(tv) && tvisnan(tv)); + lj_assertJ((tvisnum(&tvk) && tvisnan(&tvk)) ? + (tvisnum(tv) && tvisnan(tv)) : + lj_obj_equal(tv, &tvk), + "slot %d const mismatch: stack %016llx vs IR %016llx", + s, tv->u64, tvk.u64); } } } } - lua_assert(J->framedepth == depth); + lj_assertJ(J->framedepth == depth, + "frame depth mismatch %d vs %d", J->framedepth, depth); } #endif @@ -156,10 +212,11 @@ static TRef sload(jit_State *J, int32_t slot) /* Get TRef for current function. */ static TRef getcurrf(jit_State *J) { - if (J->base[-1]) - return J->base[-1]; - lua_assert(J->baseslot == 1); - return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY); + if (J->base[-1-LJ_FR2]) + return J->base[-1-LJ_FR2]; + /* Non-base frame functions ought to be loaded already. */ + lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot"); + return sloadt(J, -1-LJ_FR2, IRT_FUNC, IRSLOAD_READONLY); } /* Compare for raw object equality. @@ -205,6 +262,14 @@ TRef lj_record_constify(jit_State *J, cTValue *o) return 0; /* Can't represent lightuserdata (pointless). */ } +/* Emit a VLOAD with the correct type. */ +TRef lj_record_vload(jit_State *J, TRef ref, MSize idx, IRType t) +{ + TRef tr = emitir(IRTG(IR_VLOAD, t), ref, idx); + if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ + return tr; +} + /* -- Record loop ops ----------------------------------------------------- */ /* Loop event. */ @@ -221,17 +286,21 @@ static void canonicalize_slots(jit_State *J) if (LJ_DUALNUM) return; for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { TRef tr = J->slot[s]; - if (tref_isinteger(tr)) { + if (tref_isinteger(tr) && !(tr & TREF_KEYINDEX)) { IRIns *ir = IR(tref_ref(tr)); - if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY))) + if (!(ir->o == IR_SLOAD && (ir->op2 & (IRSLOAD_READONLY)))) J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); } } } /* Stop recording. */ -static void rec_stop(jit_State *J, TraceLink linktype, TraceNo lnk) +void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk) { +#ifdef LUAJIT_ENABLE_TABLE_BUMP + if (J->retryrec) + lj_trace_err(J, LJ_TRERR_RETRY); +#endif lj_trace_end(J); J->cur.linktype = (uint8_t)linktype; J->cur.link = (uint16_t)lnk; @@ -399,7 +468,8 @@ static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry *scev, TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode); TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode); int tc, dir = rec_for_direction(&tv[FORL_STEP]); - lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); + lj_assertJ(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI, + "bad bytecode %d instead of FORI/JFORI", bc_op(*fori)); scev->t.irt = t; scev->dir = dir; scev->stop = tref_ref(stop); @@ -455,7 +525,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) IRT_NUM; for (i = FORL_IDX; i <= FORL_STEP; i++) { if (!tr[i]) sload(J, ra+i); - lua_assert(tref_isnumber_str(tr[i])); + lj_assertJ(tref_isnumber_str(tr[i]), "bad FORI argument type"); if (tref_isstr(tr[i])) tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0); if (t == IRT_INT) { @@ -499,8 +569,7 @@ static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) { BCReg ra = bc_a(iterins); - lua_assert(J->base[ra] != 0); - if (!tref_isnil(J->base[ra])) { /* Looping back? */ + if (!tref_isnil(getslot(J, ra))) { /* Looping back? */ J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ J->maxslot = ra-1+bc_b(J->pc[-1]); J->pc += bc_j(iterins)+1; @@ -538,12 +607,13 @@ static int innerloopleft(jit_State *J, const BCIns *pc) /* Handle the case when an interpreted loop op is hit. */ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) { - if (J->parent == 0) { + if (J->parent == 0 && J->exitno == 0) { if (pc == J->startpc && J->framedepth + J->retdepth == 0) { + if (bc_op(J->cur.startins) == BC_ITERN) return; /* See rec_itern(). */ /* Same loop? */ if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ lj_trace_err(J, LJ_TRERR_LLEAVE); - rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping root trace. */ + lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */ } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */ /* It's usually better to abort here and wait until the inner loop ** is traced. But if the inner loop repeatedly didn't loop back, @@ -568,18 +638,136 @@ static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) /* Handle the case when an already compiled loop op is hit. */ static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) { - if (J->parent == 0) { /* Root trace hit an inner loop. */ + if (J->parent == 0 && J->exitno == 0) { /* Root trace hit an inner loop. */ /* Better let the inner loop spawn a side trace back here. */ lj_trace_err(J, LJ_TRERR_LINNER); } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */ J->instunroll = 0; /* Cannot continue across a compiled loop op. */ if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) - rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form an extra loop. */ + lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form extra loop. */ else - rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */ + lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */ } /* Side trace continues across a loop that's left or not entered. */ } +/* Record ITERN. */ +static LoopEvent rec_itern(jit_State *J, BCReg ra, BCReg rb) +{ +#if LJ_BE + /* YAGNI: Disabled on big-endian due to issues with lj_vm_next, + ** IR_HIOP, RID_RETLO/RID_RETHI and ra_destpair. + */ + UNUSED(ra); UNUSED(rb); + setintV(&J->errinfo, (int32_t)BC_ITERN); + lj_trace_err_info(J, LJ_TRERR_NYIBC); +#else + RecordIndex ix; + /* Since ITERN is recorded at the start, we need our own loop detection. */ + if (J->pc == J->startpc && + J->framedepth + J->retdepth == 0 && J->parent == 0 && J->exitno == 0) { + IRRef ref = REF_FIRST + LJ_HASPROFILE; +#ifdef LUAJIT_ENABLE_CHECKHOOK + ref += 3; +#endif + if (J->cur.nins > ref || + (LJ_HASPROFILE && J->cur.nins == ref && J->cur.ir[ref-1].o != IR_PROF)) { + J->instunroll = 0; /* Cannot continue unrolling across an ITERN. */ + lj_record_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping trace. */ + return LOOPEV_ENTER; + } + } + J->maxslot = ra; + lj_snap_add(J); /* Required to make JLOOP the first ins in a side-trace. */ + ix.tab = getslot(J, ra-2); + ix.key = J->base[ra-1] ? J->base[ra-1] : + sloadt(J, (int32_t)(ra-1), IRT_GUARD|IRT_INT, + IRSLOAD_TYPECHECK|IRSLOAD_KEYINDEX); + copyTV(J->L, &ix.tabv, &J->L->base[ra-2]); + copyTV(J->L, &ix.keyv, &J->L->base[ra-1]); + ix.idxchain = (rb < 3); /* Omit value type check, if unused. */ + ix.mobj = 1; /* We need the next index, too. */ + J->maxslot = ra + lj_record_next(J, &ix); + J->needsnap = 1; + if (!tref_isnil(ix.key)) { /* Looping back? */ + J->base[ra-1] = ix.mobj | TREF_KEYINDEX; /* Control var has next index. */ + J->base[ra] = ix.key; + J->base[ra+1] = ix.val; + J->pc += bc_j(J->pc[1])+2; + return LOOPEV_ENTER; + } else { + J->maxslot = ra-3; + J->pc += 2; + return LOOPEV_LEAVE; + } +#endif +} + +/* Record ISNEXT. */ +static void rec_isnext(jit_State *J, BCReg ra) +{ + cTValue *b = &J->L->base[ra-3]; + if (tvisfunc(b) && funcV(b)->c.ffid == FF_next && + tvistab(b+1) && tvisnil(b+2)) { + /* These checks are folded away for a compiled pairs(). */ + TRef func = getslot(J, ra-3); + TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), func, IRFL_FUNC_FFID); + emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, FF_next)); + (void)getslot(J, ra-2); /* Type check for table. */ + (void)getslot(J, ra-1); /* Type check for nil key. */ + J->base[ra-1] = lj_ir_kint(J, 0) | TREF_KEYINDEX; + J->maxslot = ra; + } else { /* Abort trace. Interpreter will despecialize bytecode. */ + lj_trace_err(J, LJ_TRERR_RECERR); + } +} + +/* -- Record profiler hook checks ----------------------------------------- */ + +#if LJ_HASPROFILE + +/* Need to insert profiler hook check? */ +static int rec_profile_need(jit_State *J, GCproto *pt, const BCIns *pc) +{ + GCproto *ppt; + lj_assertJ(J->prof_mode == 'f' || J->prof_mode == 'l', + "bad profiler mode %c", J->prof_mode); + if (!pt) + return 0; + ppt = J->prev_pt; + J->prev_pt = pt; + if (pt != ppt && ppt) { + J->prev_line = -1; + return 1; + } + if (J->prof_mode == 'l') { + BCLine line = lj_debug_line(pt, proto_bcpos(pt, pc)); + BCLine pline = J->prev_line; + J->prev_line = line; + if (pline != line) + return 1; + } + return 0; +} + +static void rec_profile_ins(jit_State *J, const BCIns *pc) +{ + if (J->prof_mode && rec_profile_need(J, J->pt, pc)) { + emitir(IRTG(IR_PROF, IRT_NIL), 0, 0); + lj_snap_add(J); + } +} + +static void rec_profile_ret(jit_State *J) +{ + if (J->prof_mode == 'f') { + emitir(IRTG(IR_PROF, IRT_NIL), 0, 0); + J->prev_pt = NULL; + lj_snap_add(J); + } +} + +#endif + /* -- Record calls and returns -------------------------------------------- */ /* Specialize to the runtime value of the called function or its prototype. */ @@ -590,11 +778,26 @@ static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr) GCproto *pt = funcproto(fn); /* Too many closures created? Probably not a monomorphic function. */ if (pt->flags >= PROTO_CLC_POLY) { /* Specialize to prototype instead. */ - TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC); - emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt))); + TRef trpt = emitir(IRT(IR_FLOAD, IRT_PGC), tr, IRFL_FUNC_PC); + emitir(IRTG(IR_EQ, IRT_PGC), trpt, lj_ir_kptr(J, proto_bc(pt))); (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ return tr; } + } else { + /* Don't specialize to non-monomorphic builtins. */ + switch (fn->c.ffid) { + case FF_coroutine_wrap_aux: + case FF_string_gmatch_aux: + /* NYI: io_file_iter doesn't have an ffid, yet. */ + { /* Specialize to the ffid. */ + TRef trid = emitir(IRT(IR_FLOAD, IRT_U8), tr, IRFL_FUNC_FFID); + emitir(IRTGI(IR_EQ), trid, lj_ir_kint(J, fn->c.ffid)); + } + return tr; + default: + /* NYI: don't specialize to non-monomorphic C functions. */ + break; + } } /* Otherwise specialize to the function (closure) value itself. */ kfunc = lj_ir_kfunc(J, fn); @@ -607,21 +810,31 @@ static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs) { RecordIndex ix; TValue *functv = &J->L->base[func]; - TRef *fbase = &J->base[func]; + TRef kfunc, *fbase = &J->base[func]; ptrdiff_t i; - for (i = 0; i <= nargs; i++) - (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ + (void)getslot(J, func); /* Ensure func has a reference. */ + for (i = 1; i <= nargs; i++) + (void)getslot(J, func+LJ_FR2+i); /* Ensure all args have a reference. */ if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ ix.tab = fbase[0]; copyTV(J->L, &ix.tabv, functv); if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) lj_trace_err(J, LJ_TRERR_NOMM); - for (i = ++nargs; i > 0; i--) /* Shift arguments up. */ - fbase[i] = fbase[i-1]; + for (i = ++nargs; i > LJ_FR2; i--) /* Shift arguments up. */ + fbase[i+LJ_FR2] = fbase[i+LJ_FR2-1]; +#if LJ_FR2 + fbase[2] = fbase[0]; +#endif fbase[0] = ix.mobj; /* Replace function. */ functv = &ix.mobjv; } - fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]); + kfunc = rec_call_specialize(J, funcV(functv), fbase[0]); +#if LJ_FR2 + fbase[0] = kfunc; + fbase[1] = TREF_FRAME; +#else + fbase[0] = kfunc | TREF_FRAME; +#endif J->maxslot = (BCReg)nargs; } @@ -631,8 +844,8 @@ void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs) rec_call_setup(J, func, nargs); /* Bump frame. */ J->framedepth++; - J->base += func+1; - J->baseslot += func+1; + J->base += func+1+LJ_FR2; + J->baseslot += func+1+LJ_FR2; if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS) lj_trace_err(J, LJ_TRERR_STACKOV); } @@ -650,7 +863,9 @@ void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs) func += cbase; } /* Move func + args down. */ - memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1)); + if (LJ_FR2 && J->baseslot == 2) + J->base[func+1] = TREF_FRAME; + memmove(&J->base[-1-LJ_FR2], &J->base[func], sizeof(TRef)*(J->maxslot+1+LJ_FR2)); /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ /* Tailcalls can form a loop, so count towards the loop unroll limit. */ if (++J->tailcalled > J->loopunroll) @@ -680,6 +895,8 @@ static int check_downrec_unroll(jit_State *J, GCproto *pt) return 0; } +static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot); + /* Record return. */ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) { @@ -691,30 +908,32 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) BCReg cbase = (BCReg)frame_delta(frame); if (--J->framedepth <= 0) lj_trace_err(J, LJ_TRERR_NYIRETL); - lua_assert(J->baseslot > 1); + lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return"); gotresults++; rbase += cbase; J->baseslot -= (BCReg)cbase; J->base -= cbase; J->base[--rbase] = TREF_TRUE; /* Prepend true to results. */ frame = frame_prevd(frame); + J->needsnap = 1; /* Stop catching on-trace errors. */ } /* Return to lower frame via interpreter for unhandled cases. */ if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) && (!frame_islua(frame) || - (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))))) { + (J->parent == 0 && J->exitno == 0 && + !bc_isret(bc_op(J->cur.startins))))) { /* NYI: specialize to frame type and return directly, not via RET*. */ for (i = 0; i < (ptrdiff_t)rbase; i++) J->base[i] = 0; /* Purge dead slots. */ J->maxslot = rbase + (BCReg)gotresults; - rec_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */ + lj_record_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */ return; } if (frame_isvarg(frame)) { BCReg cbase = (BCReg)frame_delta(frame); if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ lj_trace_err(J, LJ_TRERR_NYIRETL); - lua_assert(J->baseslot > 1); + lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return"); rbase += cbase; J->baseslot -= (BCReg)cbase; J->base -= cbase; @@ -724,27 +943,28 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) BCIns callins = *(frame_pc(frame)-1); ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; BCReg cbase = bc_a(callins); - GCproto *pt = funcproto(frame_func(frame - (cbase+1))); + GCproto *pt = funcproto(frame_func(frame - (cbase+1+LJ_FR2))); if ((pt->flags & PROTO_NOJIT)) lj_trace_err(J, LJ_TRERR_CJITOFF); if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { if (check_downrec_unroll(J, pt)) { J->maxslot = (BCReg)(rbase + gotresults); lj_snap_purge(J); - rec_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-recursion. */ + lj_record_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-rec. */ return; } lj_snap_add(J); } for (i = 0; i < nresults; i++) /* Adjust results. */ - J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL; + J->base[i-1-LJ_FR2] = i < gotresults ? J->base[rbase+i] : TREF_NIL; J->maxslot = cbase+(BCReg)nresults; if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ J->framedepth--; - lua_assert(J->baseslot > cbase+1); - J->baseslot -= cbase+1; - J->base -= cbase+1; - } else if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { + lj_assertJ(J->baseslot > cbase+1+LJ_FR2, "bad baseslot for return"); + J->baseslot -= cbase+1+LJ_FR2; + J->base -= cbase+1+LJ_FR2; + } else if (J->parent == 0 && J->exitno == 0 && + !bc_isret(bc_op(J->cur.startins))) { /* Return to lower frame would leave the loop in a root trace. */ lj_trace_err(J, LJ_TRERR_LLEAVE); } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */ @@ -752,13 +972,13 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) } else { /* Return to lower frame. Guard for the target we return to. */ TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); - emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc); + emitir(IRTG(IR_RETF, IRT_PGC), trpt, trpc); J->retdepth++; J->needsnap = 1; - lua_assert(J->baseslot == 1); + lj_assertJ(J->baseslot == 1+LJ_FR2, "bad baseslot for return"); /* Shift result slots up and clear the slots of the new frame below. */ - memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults); - memset(J->base-1, 0, sizeof(TRef)*(cbase+1)); + memmove(J->base + cbase, J->base-1-LJ_FR2, sizeof(TRef)*nresults); + memset(J->base-1-LJ_FR2, 0, sizeof(TRef)*(cbase+1+LJ_FR2)); } } else if (frame_iscont(frame)) { /* Return to continuation frame. */ ASMFunction cont = frame_contf(frame); @@ -767,24 +987,52 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) lj_trace_err(J, LJ_TRERR_NYIRETL); J->baseslot -= (BCReg)cbase; J->base -= cbase; - J->maxslot = cbase-2; + J->maxslot = cbase-(2<<LJ_FR2); if (cont == lj_cont_ra) { /* Copy result to destination slot. */ BCReg dst = bc_a(*(frame_contpc(frame)-1)); J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; - if (dst >= J->maxslot) J->maxslot = dst+1; + if (dst >= J->maxslot) { + J->maxslot = dst+1; + } } else if (cont == lj_cont_nop) { /* Nothing to do here. */ } else if (cont == lj_cont_cat) { - lua_assert(0); + BCReg bslot = bc_b(*(frame_contpc(frame)-1)); + TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL; + if (bslot != J->maxslot) { /* Concatenate the remainder. */ + TValue *b = J->L->base, save; /* Simulate lower frame and result. */ + /* Can't handle MM_concat + CALLT + fast func side-effects. */ + if (J->postproc != LJ_POST_NONE) + lj_trace_err(J, LJ_TRERR_NYIRETL); + J->base[J->maxslot] = tr; + copyTV(J->L, &save, b-(2<<LJ_FR2)); + if (gotresults) + copyTV(J->L, b-(2<<LJ_FR2), b+rbase); + else + setnilV(b-(2<<LJ_FR2)); + J->L->base = b - cbase; + tr = rec_cat(J, bslot, cbase-(2<<LJ_FR2)); + b = J->L->base + cbase; /* Undo. */ + J->L->base = b; + copyTV(J->L, b-(2<<LJ_FR2), &save); + } + if (tr) { /* Store final result. */ + BCReg dst = bc_a(*(frame_contpc(frame)-1)); + J->base[dst] = tr; + if (dst >= J->maxslot) { + J->maxslot = dst+1; + } + } /* Otherwise continue with another __concat call. */ } else { /* Result type already specialized. */ - lua_assert(cont == lj_cont_condf || cont == lj_cont_condt); + lj_assertJ(cont == lj_cont_condf || cont == lj_cont_condt, + "bad continuation type"); } } else { lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ } - lua_assert(J->baseslot >= 1); + lj_assertJ(J->baseslot >= 1+LJ_FR2, "bad baseslot for return"); } /* -- Metamethod handling ------------------------------------------------- */ @@ -792,19 +1040,17 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) /* Prepare to record call to metamethod. */ static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) { - BCReg s, top = curr_proto(J->L)->framesize; - TRef trcont; - setcont(&J->L->base[top], cont); -#if LJ_64 - trcont = lj_ir_kptr(J, (void *)((int64_t)cont - (int64_t)lj_vm_asm_begin)); + BCReg s, top = cont == lj_cont_cat ? J->maxslot : curr_proto(J->L)->framesize; +#if LJ_FR2 + J->base[top] = lj_ir_k64(J, IR_KNUM, u64ptr(contptr(cont))); + J->base[top+1] = TREF_CONT; #else - trcont = lj_ir_kptr(J, (void *)cont); + J->base[top] = lj_ir_kptr(J, contptr(cont)) | TREF_CONT; #endif - J->base[top] = trcont | TREF_CONT; J->framedepth++; for (s = J->maxslot; s < top; s++) J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ - return top+1; + return top+1+LJ_FR2; } /* Record metamethod lookup. */ @@ -823,7 +1069,7 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) cTValue *mo; if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) { /* Specialize to the C library namespace object. */ - emitir(IRTG(IR_EQ, IRT_P32), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv))); + emitir(IRTG(IR_EQ, IRT_PGC), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv))); } else { /* Specialize to the type of userdata. */ TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE); @@ -852,7 +1098,8 @@ int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) } /* The cdata metatable is treated as immutable. */ if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; - ix->mt = mix.tab = lj_ir_ktab(J, mt); + ix->mt = mix.tab = lj_ir_ggfload(J, IRT_TAB, + GG_OFS(g.gcroot[GCROOT_BASEMT+itypemap(&ix->tabv)])); goto nocheck; } ix->mt = mt ? mix.tab : TREF_NIL; @@ -879,12 +1126,12 @@ nocheck: static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) { /* Set up metamethod call first to save ix->tab and ix->tabv. */ - BCReg func = rec_mm_prep(J, lj_cont_ra); + BCReg func = rec_mm_prep(J, mm == MM_concat ? lj_cont_cat : lj_cont_ra); TRef *base = J->base + func; TValue *basev = J->L->base + func; - base[1] = ix->tab; base[2] = ix->key; - copyTV(J->L, basev+1, &ix->tabv); - copyTV(J->L, basev+2, &ix->keyv); + base[1+LJ_FR2] = ix->tab; base[2+LJ_FR2] = ix->key; + copyTV(J->L, basev+1+LJ_FR2, &ix->tabv); + copyTV(J->L, basev+2+LJ_FR2, &ix->keyv); if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ if (mm != MM_unm) { ix->tab = ix->key; @@ -896,6 +1143,9 @@ static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) } ok: base[0] = ix->mobj; +#if LJ_FR2 + base[1] = 0; +#endif copyTV(J->L, basev+0, &ix->mobjv); lj_record_call(J, func, 2); return 0; /* No result yet. */ @@ -912,6 +1162,8 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) TRef *base = J->base + func; TValue *basev = J->L->base + func; base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); + base += LJ_FR2; + basev += LJ_FR2; base[1] = tr; copyTV(J->L, basev+1, tv); #if LJ_52 base[2] = tr; copyTV(J->L, basev+2, tv); @@ -921,7 +1173,7 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) lj_record_call(J, func, 2); } else { if (LJ_52 && tref_istab(tr)) - return lj_ir_call(J, IRCALL_lj_tab_len, tr); + return emitir(IRTI(IR_ALEN), tr, TREF_NIL); lj_trace_err(J, LJ_TRERR_NOMM); } return 0; /* No result yet. */ @@ -931,10 +1183,10 @@ static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) { BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); - TRef *base = J->base + func; - TValue *tv = J->L->base + func; - base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key; - copyTV(J->L, tv+0, &ix->mobjv); + TRef *base = J->base + func + LJ_FR2; + TValue *tv = J->L->base + func + LJ_FR2; + base[-LJ_FR2] = ix->mobj; base[1] = ix->val; base[2] = ix->key; + copyTV(J->L, tv-LJ_FR2, &ix->mobjv); copyTV(J->L, tv+1, &ix->valv); copyTV(J->L, tv+2, &ix->keyv); lj_record_call(J, func, 2); @@ -1030,7 +1282,7 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm) ix->tab = ix->val; copyTV(J->L, &ix->tabv, &ix->valv); } else { - lua_assert(tref_iscdata(ix->key)); + lj_assertJ(tref_iscdata(ix->key), "cdata expected"); ix->tab = ix->key; copyTV(J->L, &ix->tabv, &ix->keyv); } @@ -1041,6 +1293,72 @@ static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm) /* -- Indexed access ------------------------------------------------------ */ +#ifdef LUAJIT_ENABLE_TABLE_BUMP +/* Bump table allocations in bytecode when they grow during recording. */ +static void rec_idx_bump(jit_State *J, RecordIndex *ix) +{ + RBCHashEntry *rbc = &J->rbchash[(ix->tab & (RBCHASH_SLOTS-1))]; + if (tref_ref(ix->tab) == rbc->ref) { + const BCIns *pc = mref(rbc->pc, const BCIns); + GCtab *tb = tabV(&ix->tabv); + uint32_t nhbits; + IRIns *ir; + if (!tvisnil(&ix->keyv)) + (void)lj_tab_set(J->L, tb, &ix->keyv); /* Grow table right now. */ + nhbits = tb->hmask > 0 ? lj_fls(tb->hmask)+1 : 0; + ir = IR(tref_ref(ix->tab)); + if (ir->o == IR_TNEW) { + uint32_t ah = bc_d(*pc); + uint32_t asize = ah & 0x7ff, hbits = ah >> 11; + if (nhbits > hbits) hbits = nhbits; + if (tb->asize > asize) { + asize = tb->asize <= 0x7ff ? tb->asize : 0x7ff; + } + if ((asize | (hbits<<11)) != ah) { /* Has the size changed? */ + /* Patch bytecode, but continue recording (for more patching). */ + setbc_d(pc, (asize | (hbits<<11))); + /* Patching TNEW operands is only safe if the trace is aborted. */ + ir->op1 = asize; ir->op2 = hbits; + J->retryrec = 1; /* Abort the trace at the end of recording. */ + } + } else if (ir->o == IR_TDUP) { + GCtab *tpl = gco2tab(proto_kgc(&gcref(rbc->pt)->pt, ~(ptrdiff_t)bc_d(*pc))); + /* Grow template table, but preserve keys with nil values. */ + if ((tb->asize > tpl->asize && (1u << nhbits)-1 == tpl->hmask) || + (tb->asize == tpl->asize && (1u << nhbits)-1 > tpl->hmask)) { + Node *node = noderef(tpl->node); + uint32_t i, hmask = tpl->hmask, asize; + TValue *array; + for (i = 0; i <= hmask; i++) { + if (!tvisnil(&node[i].key) && tvisnil(&node[i].val)) + settabV(J->L, &node[i].val, tpl); + } + if (!tvisnil(&ix->keyv) && tref_isk(ix->key)) { + TValue *o = lj_tab_set(J->L, tpl, &ix->keyv); + if (tvisnil(o)) settabV(J->L, o, tpl); + } + lj_tab_resize(J->L, tpl, tb->asize, nhbits); + node = noderef(tpl->node); + hmask = tpl->hmask; + for (i = 0; i <= hmask; i++) { + /* This is safe, since template tables only hold immutable values. */ + if (tvistab(&node[i].val)) + setnilV(&node[i].val); + } + /* The shape of the table may have changed. Clean up array part, too. */ + asize = tpl->asize; + array = tvref(tpl->array); + for (i = 0; i < asize; i++) { + if (tvistab(&array[i])) + setnilV(&array[i]); + } + J->retryrec = 1; /* Abort the trace at the end of recording. */ + } + } + } +} +#endif + /* Record bounds-check. */ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) { @@ -1061,7 +1379,8 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) /* Got scalar evolution analysis results for this reference? */ if (ref == J->scev.idx) { int32_t stop; - lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); + lj_assertJ(irt_isint(J->scev.t) && ir->o == IR_SLOAD, + "only int SCEV supported"); stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]); /* Runtime value for stop of loop is within bounds? */ if ((uint64_t)stop + ofs < (uint64_t)asize) { @@ -1080,11 +1399,14 @@ static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) } /* Record indexed key lookup. */ -static TRef rec_idx_key(jit_State *J, RecordIndex *ix) +static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref, + IRType1 *rbguard) { TRef key; GCtab *t = tabV(&ix->tabv); ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */ + *rbref = 0; + rbguard->irt = 0; /* Integer keys are looked up in the array part first. */ key = ix->key; @@ -1098,8 +1420,8 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix) if ((MSize)k < t->asize) { /* Currently an array key? */ TRef arrayref; rec_idx_abc(J, asizeref, ikey, t->asize); - arrayref = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_ARRAY); - return emitir(IRT(IR_AREF, IRT_P32), arrayref, ikey); + arrayref = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_ARRAY); + return emitir(IRT(IR_AREF, IRT_PGC), arrayref, ikey); } else { /* Currently not in array (may be an array extension)? */ emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */ if (k == 0 && tref_isk(key)) @@ -1131,19 +1453,21 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix) key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); if (tref_isk(key)) { /* Optimize lookup of constant hash keys. */ - MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val); - if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) && - hslot <= 65535*(MSize)sizeof(Node)) { - TRef node, kslot; - TRef hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); + GCSize hslot = (GCSize)((char *)ix->oldv-(char *)&noderef(t->node)[0].val); + if (hslot <= t->hmask*(GCSize)sizeof(Node) && + hslot <= 65535*(GCSize)sizeof(Node)) { + TRef node, kslot, hm; + *rbref = J->cur.nins; /* Mark possible rollback point. */ + *rbguard = J->guardemit; + hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask)); - node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE); - kslot = lj_ir_kslot(J, key, hslot / sizeof(Node)); - return emitir(IRTG(IR_HREFK, IRT_P32), node, kslot); + node = emitir(IRT(IR_FLOAD, IRT_PGC), ix->tab, IRFL_TAB_NODE); + kslot = lj_ir_kslot(J, key, (IRRef)(hslot / sizeof(Node))); + return emitir(IRTG(IR_HREFK, IRT_PGC), node, kslot); } } /* Fall back to a regular hash lookup. */ - return emitir(IRT(IR_HREF, IRT_P32), ix->tab, key); + return emitir(IRT(IR_HREF, IRT_PGC), ix->tab, key); } /* Determine whether a key is NOT one of the fast metamethod names. */ @@ -1168,20 +1492,22 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) { TRef xref; IROp xrefop, loadop; + IRRef rbref; + IRType1 rbguard; cTValue *oldv; while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */ /* Never call raw lj_record_idx() on non-table. */ - lua_assert(ix->idxchain != 0); + lj_assertJ(ix->idxchain != 0, "bad usage"); if (!lj_record_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index)) lj_trace_err(J, LJ_TRERR_NOMM); handlemm: if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); - TRef *base = J->base + func; - TValue *tv = J->L->base + func; - base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; - setfuncV(J->L, tv+0, funcV(&ix->mobjv)); + TRef *base = J->base + func + LJ_FR2; + TValue *tv = J->L->base + func + LJ_FR2; + base[-LJ_FR2] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; + setfuncV(J->L, tv-LJ_FR2, funcV(&ix->mobjv)); copyTV(J->L, tv+1, &ix->tabv); copyTV(J->L, tv+2, &ix->keyv); if (ix->val) { @@ -1194,6 +1520,16 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) return 0; /* No result yet. */ } } +#if LJ_HASBUFFER + /* The index table of buffer objects is treated as immutable. */ + if (ix->mt == TREF_NIL && !ix->val && + tref_isudata(ix->tab) && udataV(&ix->tabv)->udtype == UDTYPE_BUFFER && + tref_istab(ix->mobj) && tref_isstr(ix->key) && tref_isk(ix->key)) { + cTValue *val = lj_tab_getstr(tabV(&ix->mobjv), strV(&ix->keyv)); + TRef tr = lj_record_constify(J, val); + if (tr) return tr; /* Specialize to the value, i.e. a method. */ + } +#endif /* Otherwise retry lookup with metaobject. */ ix->tab = ix->mobj; copyTV(J->L, &ix->tabv, &ix->mobjv); @@ -1213,7 +1549,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) } /* Record the key lookup. */ - xref = rec_idx_key(J, ix); + xref = rec_idx_key(J, ix, &rbref, &rbguard); xrefop = IR(tref_ref(xref))->o; loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD; /* The lj_meta_tset() inconsistency is gone, but better play safe. */ @@ -1223,11 +1559,15 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) IRType t = itype2irt(oldv); TRef res; if (oldv == niltvg(J2G(J))) { - emitir(IRTG(IR_EQ, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); + emitir(IRTG(IR_EQ, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); res = TREF_NIL; } else { res = emitir(IRTG(loadop, t), xref, 0); } + if (tref_ref(res) < rbref) { /* HREFK + load forwarded? */ + lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */ + J->guardemit = rbguard; + } if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index)) goto handlemm; if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */ @@ -1235,6 +1575,10 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) } else { /* Indexed store. */ GCtab *mt = tabref(tabV(&ix->tabv)->metatable); int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val); + if (tref_ref(xref) < rbref) { /* HREFK forwarded? */ + lj_ir_rollback(J, rbref); /* Rollback to eliminate hmask guard. */ + J->guardemit = rbguard; + } if (tvisnil(oldv)) { /* Previous value was nil? */ /* Need to duplicate the hasmm check for the early guards. */ int hasmm = 0; @@ -1245,24 +1589,28 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) if (hasmm) emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */ else if (xrefop == IR_HREF) - emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_P32), + emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) { - lua_assert(hasmm); + lj_assertJ(hasmm, "inconsistent metamethod handling"); goto handlemm; } - lua_assert(!hasmm); + lj_assertJ(!hasmm, "inconsistent metamethod handling"); if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */ TRef key = ix->key; if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); - xref = emitir(IRT(IR_NEWREF, IRT_P32), ix->tab, key); + xref = emitir(IRT(IR_NEWREF, IRT_PGC), ix->tab, key); keybarrier = 0; /* NEWREF already takes care of the key barrier. */ +#ifdef LUAJIT_ENABLE_TABLE_BUMP + if ((J->flags & JIT_F_OPT_SINK)) /* Avoid a separate flag. */ + rec_idx_bump(J, ix); +#endif } } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) { /* Cannot derive that the previous value was non-nil, must do checks. */ if (xrefop == IR_HREF) /* Guard against store to niltv. */ - emitir(IRTG(IR_NE, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); + emitir(IRTG(IR_NE, IRT_PGC), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); if (ix->idxchain) { /* Metamethod lookup required? */ /* A check for NULL metatable is cheaper (hoistable) than a load. */ if (!mt) { @@ -1284,7 +1632,7 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0); /* Invalidate neg. metamethod cache for stores with certain string keys. */ if (!nommstr(J, ix->key)) { - TRef fref = emitir(IRT(IR_FREF, IRT_P32), ix->tab, IRFL_TAB_NOMM); + TRef fref = emitir(IRT(IR_FREF, IRT_PGC), ix->tab, IRFL_TAB_NOMM); emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0)); } J->needsnap = 1; @@ -1292,6 +1640,72 @@ TRef lj_record_idx(jit_State *J, RecordIndex *ix) } } +/* Determine result type of table traversal. */ +static IRType rec_next_types(GCtab *t, uint32_t idx) +{ + for (; idx < t->asize; idx++) { + cTValue *a = arrayslot(t, idx); + if (LJ_LIKELY(!tvisnil(a))) + return (LJ_DUALNUM ? IRT_INT : IRT_NUM) + (itype2irt(a) << 8); + } + idx -= t->asize; + for (; idx <= t->hmask; idx++) { + Node *n = &noderef(t->node)[idx]; + if (!tvisnil(&n->val)) + return itype2irt(&n->key) + (itype2irt(&n->val) << 8); + } + return IRT_NIL + (IRT_NIL << 8); +} + +/* Record a table traversal step aka next(). */ +int lj_record_next(jit_State *J, RecordIndex *ix) +{ + IRType t, tkey, tval; + TRef trvk; + t = rec_next_types(tabV(&ix->tabv), ix->keyv.u32.lo); + tkey = (t & 0xff); tval = (t >> 8); + trvk = lj_ir_call(J, IRCALL_lj_vm_next, ix->tab, ix->key); + if (ix->mobj || tkey == IRT_NIL) { + TRef idx = emitir(IRTI(IR_HIOP), trvk, trvk); + /* Always check for invalid key from next() for nil result. */ + if (!ix->mobj) emitir(IRTGI(IR_NE), idx, lj_ir_kint(J, -1)); + ix->mobj = idx; + } + ix->key = lj_record_vload(J, trvk, 1, tkey); + if (tkey == IRT_NIL || ix->idxchain) { /* Omit value type check. */ + ix->val = TREF_NIL; + return 1; + } else { /* Need value. */ + ix->val = lj_record_vload(J, trvk, 0, tval); + return 2; + } +} + +static void rec_tsetm(jit_State *J, BCReg ra, BCReg rn, int32_t i) +{ + RecordIndex ix; + cTValue *basev = J->L->base; + GCtab *t = tabV(&basev[ra-1]); + settabV(J->L, &ix.tabv, t); + ix.tab = getslot(J, ra-1); + ix.idxchain = 0; +#ifdef LUAJIT_ENABLE_TABLE_BUMP + if ((J->flags & JIT_F_OPT_SINK)) { + if (t->asize < i+rn-ra) + lj_tab_reasize(J->L, t, i+rn-ra); + setnilV(&ix.keyv); + rec_idx_bump(J, &ix); + } +#endif + for (; ra < rn; i++, ra++) { + setintV(&ix.keyv, i); + ix.key = lj_ir_kint(J, i); + copyTV(J->L, &ix.valv, &basev[ra]); + ix.val = getslot(J, ra); + lj_record_idx(J, &ix); + } +} + /* -- Upvalue access ------------------------------------------------------ */ /* Check whether upvalue is immutable and ok to constify. */ @@ -1328,13 +1742,17 @@ static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) int needbarrier = 0; if (rec_upvalue_constify(J, uvp)) { /* Try to constify immutable upvalue. */ TRef tr, kfunc; - lua_assert(val == 0); + lj_assertJ(val == 0, "bad usage"); if (!tref_isk(fn)) { /* Late specialization of current function. */ if (J->pt->flags >= PROTO_CLC_POLY) goto noconstify; kfunc = lj_ir_kfunc(J, J->fn); emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc); - J->base[-1] = TREF_FRAME | kfunc; +#if LJ_FR2 + J->base[-2] = kfunc; +#else + J->base[-1] = kfunc | TREF_FRAME; +#endif fn = kfunc; } tr = lj_record_constify(J, uvval(uvp)); @@ -1345,16 +1763,16 @@ noconstify: /* Note: this effectively limits LJ_MAX_UPVAL to 127. */ uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff); if (!uvp->closed) { - uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_P32), fn, uv)); + uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_PGC), fn, uv)); /* In current stack? */ if (uvval(uvp) >= tvref(J->L->stack) && uvval(uvp) < tvref(J->L->maxstack)) { int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot)); if (slot >= 0) { /* Aliases an SSA slot? */ - emitir(IRTG(IR_EQ, IRT_P32), + emitir(IRTG(IR_EQ, IRT_PGC), REF_BASE, - emitir(IRT(IR_ADD, IRT_P32), uref, - lj_ir_kint(J, (slot - 1) * -8))); + emitir(IRT(IR_ADD, IRT_PGC), uref, + lj_ir_kint(J, (slot - 1 - LJ_FR2) * -8))); slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ if (val == 0) { return getslot(J, slot); @@ -1365,12 +1783,12 @@ noconstify: } } } - emitir(IRTG(IR_UGT, IRT_P32), - emitir(IRT(IR_SUB, IRT_P32), uref, REF_BASE), + emitir(IRTG(IR_UGT, IRT_PGC), + emitir(IRT(IR_SUB, IRT_PGC), uref, REF_BASE), lj_ir_kint(J, (J->baseslot + J->maxslot) * 8)); } else { needbarrier = 1; - uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_P32), fn, uv)); + uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_PGC), fn, uv)); } if (val == 0) { /* Upvalue load */ IRType t = itype2irt(uvval(uvp)); @@ -1409,16 +1827,16 @@ static void check_call_unroll(jit_State *J, TraceNo lnk) if (count + J->tailcalled > J->param[JIT_P_recunroll]) { J->pc++; if (J->framedepth + J->retdepth == 0) - rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-recursion. */ + lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-rec. */ else - rec_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */ + lj_record_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */ } } else { if (count > J->param[JIT_P_callunroll]) { if (lnk) { /* Possible tail- or up-recursion. */ lj_trace_flush(J, lnk); /* Flush trace that only returns. */ /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */ - hotcount_set(J2GG(J), J->pc+1, LJ_PRNG_BITS(J, 4)); + hotcount_set(J2GG(J), J->pc+1, lj_prng_u64(&J2G(J)->prng) & 15u); } lj_trace_err(J, LJ_TRERR_CUNROLL); } @@ -1445,11 +1863,14 @@ static void rec_func_setup(jit_State *J) static void rec_func_vararg(jit_State *J) { GCproto *pt = J->pt; - BCReg s, fixargs, vframe = J->maxslot+1; - lua_assert((pt->flags & PROTO_VARARG)); + BCReg s, fixargs, vframe = J->maxslot+1+LJ_FR2; + lj_assertJ((pt->flags & PROTO_VARARG), "FUNCV in non-vararg function"); if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) lj_trace_err(J, LJ_TRERR_STACKOV); - J->base[vframe-1] = J->base[-1]; /* Copy function up. */ + J->base[vframe-1-LJ_FR2] = J->base[-1-LJ_FR2]; /* Copy function up. */ +#if LJ_FR2 + J->base[vframe-1] = TREF_FRAME; +#endif /* Copy fixarg slots up and set their original slots to nil. */ fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; for (s = 0; s < fixargs; s++) { @@ -1485,9 +1906,9 @@ static void rec_func_jit(jit_State *J, TraceNo lnk) } J->instunroll = 0; /* Cannot continue across a compiled function. */ if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) - rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-recursion. */ + lj_record_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-rec. */ else - rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */ + lj_record_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */ } /* -- Vararg handling ----------------------------------------------------- */ @@ -1511,8 +1932,10 @@ static int select_detect(jit_State *J) static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) { int32_t numparams = J->pt->numparams; - ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1; - lua_assert(frame_isvarg(J->L->base-1)); + ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1 - LJ_FR2; + lj_assertJ(frame_isvarg(J->L->base-1), "VARG in non-vararg frame"); + if (LJ_FR2 && dst > J->maxslot) + J->base[dst-1] = 0; /* Prevent resurrection of unrelated slot. */ if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ ptrdiff_t i; if (nvararg < 0) nvararg = 0; @@ -1523,10 +1946,10 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) J->maxslot = dst + (BCReg)nresults; } for (i = 0; i < nresults; i++) - J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL; + J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1 - LJ_FR2) : TREF_NIL; } else { /* Unknown number of varargs passed to trace. */ - TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME); - int32_t frofs = 8*(1+numparams)+FRAME_VARG; + TRef fr = emitir(IRTI(IR_SLOAD), LJ_FR2, IRSLOAD_READONLY|IRSLOAD_FRAME); + int32_t frofs = 8*(1+LJ_FR2+numparams)+FRAME_VARG; if (nresults >= 0) { /* Known fixed number of results. */ ptrdiff_t i; if (nvararg > 0) { @@ -1535,16 +1958,13 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) if (nvararg >= nresults) emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults)); else - emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, frame_ftsz(J->L->base-1))); - vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); - vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); + emitir(IRTGI(IR_EQ), fr, + lj_ir_kint(J, (int32_t)frame_ftsz(J->L->base-1))); + vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); + vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, lj_ir_kint(J, frofs-8*(1+LJ_FR2))); for (i = 0; i < nload; i++) { - IRType t = itype2irt(&J->L->base[i-1-nvararg]); - TRef aref = emitir(IRT(IR_AREF, IRT_P32), - vbase, lj_ir_kint(J, (int32_t)i)); - TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0); - if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ - J->base[dst+i] = tr; + IRType t = itype2irt(&J->L->base[i-1-LJ_FR2-nvararg]); + J->base[dst+i] = lj_record_vload(J, vbase, (MSize)i, t); } } else { emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs)); @@ -1586,15 +2006,15 @@ static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) } if (idx != 0 && idx <= nvararg) { IRType t; - TRef aref, vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); - vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); - t = itype2irt(&J->L->base[idx-2-nvararg]); - aref = emitir(IRT(IR_AREF, IRT_P32), vbase, tridx); - tr = emitir(IRTG(IR_VLOAD, t), aref, 0); - if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ + TRef aref, vbase = emitir(IRT(IR_SUB, IRT_IGC), REF_BASE, fr); + vbase = emitir(IRT(IR_ADD, IRT_PGC), vbase, + lj_ir_kint(J, frofs-(8<<LJ_FR2))); + t = itype2irt(&J->L->base[idx-2-LJ_FR2-nvararg]); + aref = emitir(IRT(IR_AREF, IRT_PGC), vbase, tridx); + tr = lj_record_vload(J, aref, 0, t); } - J->base[dst-2] = tr; - J->maxslot = dst-1; + J->base[dst-2-LJ_FR2] = tr; + J->maxslot = dst-1-LJ_FR2; J->bcskip = 2; /* Skip CALLM + select. */ } else { nyivarg: @@ -1612,8 +2032,63 @@ static TRef rec_tnew(jit_State *J, uint32_t ah) { uint32_t asize = ah & 0x7ff; uint32_t hbits = ah >> 11; + TRef tr; if (asize == 0x7ff) asize = 0x801; - return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits); + tr = emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits); +#ifdef LUAJIT_ENABLE_TABLE_BUMP + J->rbchash[(tr & (RBCHASH_SLOTS-1))].ref = tref_ref(tr); + setmref(J->rbchash[(tr & (RBCHASH_SLOTS-1))].pc, J->pc); + setgcref(J->rbchash[(tr & (RBCHASH_SLOTS-1))].pt, obj2gco(J->pt)); +#endif + return tr; +} + +/* -- Concatenation ------------------------------------------------------- */ + +static TRef rec_cat(jit_State *J, BCReg baseslot, BCReg topslot) +{ + TRef *top = &J->base[topslot]; + TValue savetv[5+LJ_FR2]; + BCReg s; + RecordIndex ix; + lj_assertJ(baseslot < topslot, "bad CAT arg"); + for (s = baseslot; s <= topslot; s++) + (void)getslot(J, s); /* Ensure all arguments have a reference. */ + if (tref_isnumber_str(top[0]) && tref_isnumber_str(top[-1])) { + TRef tr, hdr, *trp, *xbase, *base = &J->base[baseslot]; + /* First convert numbers to strings. */ + for (trp = top; trp >= base; trp--) { + if (tref_isnumber(*trp)) + *trp = emitir(IRT(IR_TOSTR, IRT_STR), *trp, + tref_isnum(*trp) ? IRTOSTR_NUM : IRTOSTR_INT); + else if (!tref_isstr(*trp)) + break; + } + xbase = ++trp; + tr = hdr = emitir(IRT(IR_BUFHDR, IRT_PGC), + lj_ir_kptr(J, &J2G(J)->tmpbuf), IRBUFHDR_RESET); + do { + tr = emitir(IRTG(IR_BUFPUT, IRT_PGC), tr, *trp++); + } while (trp <= top); + tr = emitir(IRTG(IR_BUFSTR, IRT_STR), tr, hdr); + J->maxslot = (BCReg)(xbase - J->base); + if (xbase == base) return tr; /* Return simple concatenation result. */ + /* Pass partial result. */ + topslot = J->maxslot--; + *xbase = tr; + top = xbase; + setstrV(J->L, &ix.keyv, &J2G(J)->strempty); /* Simulate string result. */ + } else { + J->maxslot = topslot-1; + copyTV(J->L, &ix.keyv, &J->L->base[topslot]); + } + copyTV(J->L, &ix.tabv, &J->L->base[topslot-1]); + ix.tab = top[-1]; + ix.key = top[0]; + memcpy(savetv, &J->L->base[topslot-1], sizeof(savetv)); /* Save slots. */ + rec_mm_arith(J, &ix, MM_concat); /* Call __concat metamethod. */ + memcpy(&J->L->base[topslot-1], savetv, sizeof(savetv)); /* Restore slots. */ + return 0; /* No result yet. */ } /* -- Record bytecode ops ------------------------------------------------- */ @@ -1634,7 +2109,15 @@ static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond) const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ +#if LJ_FR2 + SnapEntry *flink = &J->cur.snapmap[snap->mapofs + snap->nent]; + uint64_t pcbase; + memcpy(&pcbase, flink, sizeof(uint64_t)); + pcbase = (pcbase & 0xff) | (u64ptr(npc) << 8); + memcpy(flink, &pcbase, sizeof(uint64_t)); +#else J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); +#endif J->needsnap = 1; if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); lj_snap_shrink(J); /* Shrink last snapshot if possible. */ @@ -1654,7 +2137,7 @@ void lj_record_ins(jit_State *J) if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) { switch (J->postproc) { case LJ_POST_FIXCOMP: /* Fixup comparison. */ - pc = frame_pc(&J2G(J)->tmptv); + pc = (const BCIns *)(uintptr_t)J2G(J)->tmptv.u64; rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1))); /* fallthrough */ case LJ_POST_FIXGUARD: /* Fixup and emit pending guard. */ @@ -1692,7 +2175,7 @@ void lj_record_ins(jit_State *J) if (bc_op(*J->pc) >= BC__MAX) return; break; - default: lua_assert(0); break; + default: lj_assertJ(0, "bad post-processing mode"); break; } J->postproc = LJ_POST_NONE; } @@ -1700,7 +2183,7 @@ void lj_record_ins(jit_State *J) /* Need snapshot before recording next bytecode (e.g. after a store). */ if (J->needsnap) { J->needsnap = 0; - lj_snap_purge(J); + if (J->pt) lj_snap_purge(J); lj_snap_add(J); J->mergesnap = 1; } @@ -1722,6 +2205,10 @@ void lj_record_ins(jit_State *J) rec_check_ir(J); #endif +#if LJ_HASPROFILE + rec_profile_ins(J, pc); +#endif + /* Keep a copy of the runtime values of var/num/str operands. */ #define rav (&ix.valv) #define rbv (&ix.tabv) @@ -1748,9 +2235,10 @@ void lj_record_ins(jit_State *J) switch (bcmode_c(op)) { case BCMvar: copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; - case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; + case BCMpri: setpriV(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc); copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) : + tv->u32.hi == LJ_KEYINDEX ? (lj_ir_kint(J, 0) | TREF_KEYINDEX) : lj_ir_knumint(J, numV(tv)); } break; case BCMstr: { GCstr *s = gco2str(proto_kgc(J->pt, ~(ptrdiff_t)rc)); setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break; @@ -1843,6 +2331,18 @@ void lj_record_ins(jit_State *J) J->maxslot = bc_a(pc[1]); /* Shrink used slots. */ break; + case BC_ISTYPE: case BC_ISNUM: + /* These coercions need to correspond with lj_meta_istype(). */ + if (LJ_DUALNUM && rc == ~LJ_TNUMX+1) + ra = lj_opt_narrow_toint(J, ra); + else if (rc == ~LJ_TNUMX+2) + ra = lj_ir_tonum(J, ra); + else if (rc == ~LJ_TSTR+1) + ra = lj_ir_tostr(J, ra); + /* else: type specialization suffices. */ + J->base[bc_a(ins)] = ra; + break; + /* -- Unary ops --------------------------------------------------------- */ case BC_NOT: @@ -1854,7 +2354,7 @@ void lj_record_ins(jit_State *J) if (tref_isstr(rc)) rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); else if (!LJ_52 && tref_istab(rc)) - rc = lj_ir_call(J, IRCALL_lj_tab_len, rc); + rc = emitir(IRTI(IR_ALEN), rc, TREF_NIL); else rc = rec_mm_len(J, rc, rcv); break; @@ -1901,16 +2401,28 @@ void lj_record_ins(jit_State *J) case BC_POW: if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) - rc = lj_opt_narrow_pow(J, rb, rc, rbv, rcv); + rc = lj_opt_narrow_arith(J, rb, rc, rbv, rcv, IR_POW); else rc = rec_mm_arith(J, &ix, MM_pow); break; + /* -- Miscellaneous ops ------------------------------------------------- */ + + case BC_CAT: + rc = rec_cat(J, rb, rc); + break; + /* -- Constant and move ops --------------------------------------------- */ case BC_MOV: /* Clear gap of method call to avoid resurrecting previous refs. */ - if (ra > J->maxslot) J->base[ra-1] = 0; + if (ra > J->maxslot) { +#if LJ_FR2 + memset(J->base + J->maxslot, 0, (ra - J->maxslot) * sizeof(TRef)); +#else + J->base[ra-1] = 0; +#endif + } break; case BC_KSTR: case BC_KNUM: case BC_KPRI: break; @@ -1918,6 +2430,8 @@ void lj_record_ins(jit_State *J) rc = lj_ir_kint(J, (int32_t)(int16_t)rc); break; case BC_KNIL: + if (LJ_FR2 && ra > J->maxslot) + J->base[ra-1] = 0; while (ra <= rc) J->base[ra++] = TREF_NIL; if (rc >= J->maxslot) J->maxslot = rc+1; @@ -1954,6 +2468,14 @@ void lj_record_ins(jit_State *J) ix.idxchain = LJ_MAX_IDXCHAIN; rc = lj_record_idx(J, &ix); break; + case BC_TGETR: case BC_TSETR: + ix.idxchain = 0; + rc = lj_record_idx(J, &ix); + break; + + case BC_TSETM: + rec_tsetm(J, ra, (BCReg)(J->L->top - J->L->base), (int32_t)rcv->u32.lo); + break; case BC_TNEW: rc = rec_tnew(J, rc); @@ -1961,33 +2483,38 @@ void lj_record_ins(jit_State *J) case BC_TDUP: rc = emitir(IRTG(IR_TDUP, IRT_TAB), lj_ir_ktab(J, gco2tab(proto_kgc(J->pt, ~(ptrdiff_t)rc))), 0); +#ifdef LUAJIT_ENABLE_TABLE_BUMP + J->rbchash[(rc & (RBCHASH_SLOTS-1))].ref = tref_ref(rc); + setmref(J->rbchash[(rc & (RBCHASH_SLOTS-1))].pc, pc); + setgcref(J->rbchash[(rc & (RBCHASH_SLOTS-1))].pt, obj2gco(J->pt)); +#endif break; /* -- Calls and vararg handling ----------------------------------------- */ case BC_ITERC: J->base[ra] = getslot(J, ra-3); - J->base[ra+1] = getslot(J, ra-2); - J->base[ra+2] = getslot(J, ra-1); + J->base[ra+1+LJ_FR2] = getslot(J, ra-2); + J->base[ra+2+LJ_FR2] = getslot(J, ra-1); { /* Do the actual copy now because lj_record_call needs the values. */ TValue *b = &J->L->base[ra]; copyTV(J->L, b, b-3); - copyTV(J->L, b+1, b-2); - copyTV(J->L, b+2, b-1); + copyTV(J->L, b+1+LJ_FR2, b-2); + copyTV(J->L, b+2+LJ_FR2, b-1); } lj_record_call(J, ra, (ptrdiff_t)rc-1); break; /* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */ case BC_CALLM: - rc = (BCReg)(J->L->top - J->L->base) - ra; + rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2; /* fallthrough */ case BC_CALL: lj_record_call(J, ra, (ptrdiff_t)rc-1); break; case BC_CALLMT: - rc = (BCReg)(J->L->top - J->L->base) - ra; + rc = (BCReg)(J->L->top - J->L->base) - ra - LJ_FR2; /* fallthrough */ case BC_CALLT: lj_record_tailcall(J, ra, (ptrdiff_t)rc-1); @@ -2004,6 +2531,9 @@ void lj_record_ins(jit_State *J) rc = (BCReg)(J->L->top - J->L->base) - ra + 1; /* fallthrough */ case BC_RET: case BC_RET0: case BC_RET1: +#if LJ_HASPROFILE + rec_profile_ret(J); +#endif lj_record_ret(J, ra, (ptrdiff_t)rc-1); break; @@ -2014,9 +2544,10 @@ void lj_record_ins(jit_State *J) J->loopref = J->cur.nins; break; case BC_JFORI: - lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL); + lj_assertJ(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL, + "JFORI does not point to JFORL"); if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ - rec_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); + lj_record_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); /* Continue tracing if the loop is not entered. */ break; @@ -2026,6 +2557,9 @@ void lj_record_ins(jit_State *J) case BC_ITERL: rec_loop_interp(J, pc, rec_iterl(J, *pc)); break; + case BC_ITERN: + rec_loop_interp(J, pc, rec_itern(J, ra, rb)); + break; case BC_LOOP: rec_loop_interp(J, pc, rec_loop(J, ra, 1)); break; @@ -2038,7 +2572,8 @@ void lj_record_ins(jit_State *J) break; case BC_JLOOP: rec_loop_jit(J, rc, rec_loop(J, ra, - !bc_isret(bc_op(traceref(J, rc)->startins)))); + !bc_isret(bc_op(traceref(J, rc)->startins)) && + bc_op(traceref(J, rc)->startins) != BC_ITERN)); break; case BC_IFORL: @@ -2054,6 +2589,10 @@ void lj_record_ins(jit_State *J) J->maxslot = ra; /* Shrink used slots. */ break; + case BC_ISNEXT: + rec_isnext(J, ra); + break; + /* -- Function headers -------------------------------------------------- */ case BC_FUNCF: @@ -2068,7 +2607,8 @@ void lj_record_ins(jit_State *J) rec_func_lua(J); break; case BC_JFUNCV: - lua_assert(0); /* Cannot happen. No hotcall counting for varag funcs. */ + /* Cannot happen. No hotcall counting for varag funcs. */ + lj_assertJ(0, "unsupported vararg hotcall"); break; case BC_FUNCC: @@ -2082,12 +2622,8 @@ void lj_record_ins(jit_State *J) break; } /* fallthrough */ - case BC_ITERN: - case BC_ISNEXT: - case BC_CAT: case BC_UCLO: case BC_FNEW: - case BC_TSETM: setintV(&J->errinfo, (int32_t)op); lj_trace_err_info(J, LJ_TRERR_NYIBC); break; @@ -2096,15 +2632,21 @@ void lj_record_ins(jit_State *J) /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ if (bcmode_a(op) == BCMdst && rc) { J->base[ra] = rc; - if (ra >= J->maxslot) J->maxslot = ra+1; + if (ra >= J->maxslot) { +#if LJ_FR2 + if (ra > J->maxslot) J->base[ra-1] = 0; +#endif + J->maxslot = ra+1; + } } #undef rav #undef rbv #undef rcv - /* Limit the number of recorded IR instructions. */ - if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord]) + /* Limit the number of recorded IR instructions and constants. */ + if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord] || + J->cur.nk < REF_BIAS-(IRRef)J->param[JIT_P_maxirconst]) lj_trace_err(J, LJ_TRERR_TRACEOV); } @@ -2124,13 +2666,22 @@ static const BCIns *rec_setup_root(jit_State *J) J->bc_min = pc; break; case BC_ITERL: - lua_assert(bc_op(pc[-1]) == BC_ITERC); + if (bc_op(pc[-1]) == BC_JLOOP) + lj_trace_err(J, LJ_TRERR_LINNER); + lj_assertJ(bc_op(pc[-1]) == BC_ITERC, "no ITERC before ITERL"); J->maxslot = ra + bc_b(pc[-1]) - 1; J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); pc += 1+bc_j(ins); - lua_assert(bc_op(pc[-1]) == BC_JMP); + lj_assertJ(bc_op(pc[-1]) == BC_JMP, "ITERL does not point to JMP+1"); J->bc_min = pc; break; + case BC_ITERN: + lj_assertJ(bc_op(pc[1]) == BC_ITERL, "no ITERL after ITERN"); + J->maxslot = ra; + J->bc_extent = (MSize)(-bc_j(pc[1]))*sizeof(BCIns); + J->bc_min = pc+2 + bc_j(pc[1]); + J->state = LJ_TRACE_RECORD_1ST; /* Record the first ITERN, too. */ + break; case BC_LOOP: /* Only check BC range for real loops, but not for "repeat until true". */ pcj = pc + bc_j(ins); @@ -2153,8 +2704,14 @@ static const BCIns *rec_setup_root(jit_State *J) J->maxslot = J->pt->numparams; pc++; break; + case BC_CALLM: + case BC_CALL: + case BC_ITERC: + /* No bytecode range check for stitched traces. */ + pc++; + break; default: - lua_assert(0); + lj_assertJ(0, "bad root trace start bytecode %d", bc_op(ins)); break; } return pc; @@ -2168,11 +2725,14 @@ void lj_record_setup(jit_State *J) /* Initialize state related to current trace. */ memset(J->slot, 0, sizeof(J->slot)); memset(J->chain, 0, sizeof(J->chain)); +#ifdef LUAJIT_ENABLE_TABLE_BUMP + memset(J->rbchash, 0, sizeof(J->rbchash)); +#endif memset(J->bpropcache, 0, sizeof(J->bpropcache)); J->scev.idx = REF_NIL; setmref(J->scev.pc, NULL); - J->baseslot = 1; /* Invoking function is at base[-1]. */ + J->baseslot = 1+LJ_FR2; /* Invoking function is at base[-1-LJ_FR2]. */ J->base = J->slot + J->baseslot; J->maxslot = 0; J->framedepth = 0; @@ -2187,7 +2747,7 @@ void lj_record_setup(jit_State *J) J->bc_extent = ~(MSize)0; /* Emit instructions for fixed references. Also triggers initial IR alloc. */ - emitir_raw(IRT(IR_BASE, IRT_P32), J->parent, J->exitno); + emitir_raw(IRT(IR_BASE, IRT_PGC), J->parent, J->exitno); for (i = 0; i <= 2; i++) { IRIns *ir = IR(REF_NIL-i); ir->i = 0; @@ -2218,10 +2778,15 @@ void lj_record_setup(jit_State *J) } lj_snap_replay(J, T); sidecheck: - if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || - T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + - J->param[JIT_P_tryside]) { - rec_stop(J, LJ_TRLINK_INTERP, 0); + if ((traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || + T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + + J->param[JIT_P_tryside])) { + if (bc_op(*J->pc) == BC_JLOOP) { + BCIns startins = traceref(J, bc_d(*J->pc))->startins; + if (bc_op(startins) == BC_ITERN) + rec_itern(J, bc_a(startins), bc_b(startins)); + } + lj_record_stop(J, LJ_TRLINK_INTERP, 0); } } else { /* Root trace. */ J->cur.root = 0; @@ -2229,13 +2794,20 @@ void lj_record_setup(jit_State *J) J->pc = rec_setup_root(J); /* Note: the loop instruction itself is recorded at the end and not ** at the start! So snapshot #0 needs to point to the *next* instruction. + ** The one exception is BC_ITERN, which sets LJ_TRACE_RECORD_1ST. */ lj_snap_add(J); if (bc_op(J->cur.startins) == BC_FORL) rec_for_loop(J, J->pc-1, &J->scev, 1); + else if (bc_op(J->cur.startins) == BC_ITERC) + J->startpc = NULL; if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) lj_trace_err(J, LJ_TRERR_STACKOV); } +#if LJ_HASPROFILE + J->prev_pt = NULL; + J->prev_line = -1; +#endif #ifdef LUAJIT_ENABLE_CHECKHOOK /* Regularly check for instruction/line hooks from compiled code and ** exit to the interpreter if the hooks are set. diff --git a/src/lj_record.h b/src/lj_record.h index 4b180fc2..ab2f4c8d 100644 --- a/src/lj_record.h +++ b/src/lj_record.h @@ -28,7 +28,9 @@ typedef struct RecordIndex { LJ_FUNC int lj_record_objcmp(jit_State *J, TRef a, TRef b, cTValue *av, cTValue *bv); +LJ_FUNC void lj_record_stop(jit_State *J, TraceLink linktype, TraceNo lnk); LJ_FUNC TRef lj_record_constify(jit_State *J, cTValue *o); +LJ_FUNC TRef lj_record_vload(jit_State *J, TRef ref, MSize idx, IRType t); LJ_FUNC void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs); LJ_FUNC void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs); @@ -36,6 +38,7 @@ LJ_FUNC void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults); LJ_FUNC int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm); LJ_FUNC TRef lj_record_idx(jit_State *J, RecordIndex *ix); +LJ_FUNC int lj_record_next(jit_State *J, RecordIndex *ix); LJ_FUNC void lj_record_ins(jit_State *J); LJ_FUNC void lj_record_setup(jit_State *J); diff --git a/src/lj_serialize.c b/src/lj_serialize.c new file mode 100644 index 00000000..f7e51828 --- /dev/null +++ b/src/lj_serialize.c @@ -0,0 +1,539 @@ +/* +** Object de/serialization. +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +*/ + +#define lj_serialize_c +#define LUA_CORE + +#include "lj_obj.h" + +#if LJ_HASBUFFER +#include "lj_err.h" +#include "lj_buf.h" +#include "lj_str.h" +#include "lj_tab.h" +#include "lj_udata.h" +#if LJ_HASFFI +#include "lj_ctype.h" +#include "lj_cdata.h" +#endif +#if LJ_HASJIT +#include "lj_ir.h" +#endif +#include "lj_serialize.h" + +/* Tags for internal serialization format. */ +enum { + SER_TAG_NIL, /* 0x00 */ + SER_TAG_FALSE, + SER_TAG_TRUE, + SER_TAG_NULL, + SER_TAG_LIGHTUD32, + SER_TAG_LIGHTUD64, + SER_TAG_INT, + SER_TAG_NUM, + SER_TAG_TAB, /* 0x08 */ + SER_TAG_DICT_MT = SER_TAG_TAB+6, + SER_TAG_DICT_STR, + SER_TAG_INT64, /* 0x10 */ + SER_TAG_UINT64, + SER_TAG_COMPLEX, + SER_TAG_0x13, + SER_TAG_0x14, + SER_TAG_0x15, + SER_TAG_0x16, + SER_TAG_0x17, + SER_TAG_0x18, /* 0x18 */ + SER_TAG_0x19, + SER_TAG_0x1a, + SER_TAG_0x1b, + SER_TAG_0x1c, + SER_TAG_0x1d, + SER_TAG_0x1e, + SER_TAG_0x1f, + SER_TAG_STR, /* 0x20 + str->len */ +}; +LJ_STATIC_ASSERT((SER_TAG_TAB & 7) == 0); + +/* -- Helper functions ---------------------------------------------------- */ + +static LJ_AINLINE char *serialize_more(char *w, SBufExt *sbx, MSize sz) +{ + if (LJ_UNLIKELY(sz > (MSize)(sbx->e - w))) { + sbx->w = w; + w = lj_buf_more2((SBuf *)sbx, sz); + } + return w; +} + +/* Write U124 to buffer. */ +static LJ_NOINLINE char *serialize_wu124_(char *w, uint32_t v) +{ + if (v < 0x1fe0) { + v -= 0xe0; + *w++ = (char)(0xe0 | (v >> 8)); *w++ = (char)v; + } else { + *w++ = (char)0xff; +#if LJ_BE + v = lj_bswap(v); +#endif + memcpy(w, &v, 4); w += 4; + } + return w; +} + +static LJ_AINLINE char *serialize_wu124(char *w, uint32_t v) +{ + if (LJ_LIKELY(v < 0xe0)) { + *w++ = (char)v; + return w; + } else { + return serialize_wu124_(w, v); + } +} + +static LJ_NOINLINE char *serialize_ru124_(char *r, char *w, uint32_t *pv) +{ + uint32_t v = *pv; + if (v != 0xff) { + if (r >= w) return NULL; + v = ((v & 0x1f) << 8) + *(uint8_t *)r + 0xe0; r++; + } else { + if (r + 4 > w) return NULL; + v = lj_getu32(r); r += 4; +#if LJ_BE + v = lj_bswap(v); +#endif + } + *pv = v; + return r; +} + +static LJ_AINLINE char *serialize_ru124(char *r, char *w, uint32_t *pv) +{ + if (LJ_LIKELY(r < w)) { + uint32_t v = *(uint8_t *)r; r++; + *pv = v; + if (LJ_UNLIKELY(v >= 0xe0)) { + r = serialize_ru124_(r, w, pv); + } + return r; + } + return NULL; +} + +/* Prepare string dictionary for use (once). */ +void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict) +{ + if (!dict->hmask) { /* No hash part means not prepared, yet. */ + MSize i, len = lj_tab_len(dict); + if (!len) return; + lj_tab_resize(L, dict, dict->asize, hsize2hbits(len)); + for (i = 1; i <= len && i < dict->asize; i++) { + cTValue *o = arrayslot(dict, i); + if (tvisstr(o)) { + if (!lj_tab_getstr(dict, strV(o))) { /* Ignore dups. */ + lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1); + } + } else if (!tvisfalse(o)) { + lj_err_caller(L, LJ_ERR_BUFFER_BADOPT); + } + } + } +} + +/* Prepare metatable dictionary for use (once). */ +void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict) +{ + if (!dict->hmask) { /* No hash part means not prepared, yet. */ + MSize i, len = lj_tab_len(dict); + if (!len) return; + lj_tab_resize(L, dict, dict->asize, hsize2hbits(len)); + for (i = 1; i <= len && i < dict->asize; i++) { + cTValue *o = arrayslot(dict, i); + if (tvistab(o)) { + if (tvisnil(lj_tab_get(L, dict, o))) { /* Ignore dups. */ + lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1); + } + } else if (!tvisfalse(o)) { + lj_err_caller(L, LJ_ERR_BUFFER_BADOPT); + } + } + } +} + +/* -- Internal serializer ------------------------------------------------- */ + +/* Put serialized object into buffer. */ +static char *serialize_put(char *w, SBufExt *sbx, cTValue *o) +{ + if (LJ_LIKELY(tvisstr(o))) { + const GCstr *str = strV(o); + MSize len = str->len; + w = serialize_more(w, sbx, 5+len); + w = serialize_wu124(w, SER_TAG_STR + len); + w = lj_buf_wmem(w, strdata(str), len); + } else if (tvisint(o)) { + uint32_t x = LJ_BE ? lj_bswap((uint32_t)intV(o)) : (uint32_t)intV(o); + w = serialize_more(w, sbx, 1+4); + *w++ = SER_TAG_INT; memcpy(w, &x, 4); w += 4; + } else if (tvisnum(o)) { + uint64_t x = LJ_BE ? lj_bswap64(o->u64) : o->u64; + w = serialize_more(w, sbx, 1+sizeof(lua_Number)); + *w++ = SER_TAG_NUM; memcpy(w, &x, 8); w += 8; + } else if (tvispri(o)) { + w = serialize_more(w, sbx, 1); + *w++ = (char)(SER_TAG_NIL + ~itype(o)); + } else if (tvistab(o)) { + const GCtab *t = tabV(o); + uint32_t narray = 0, nhash = 0, one = 2; + if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH); + sbx->depth--; + if (t->asize > 0) { /* Determine max. length of array part. */ + ptrdiff_t i; + TValue *array = tvref(t->array); + for (i = (ptrdiff_t)t->asize-1; i >= 0; i--) + if (!tvisnil(&array[i])) + break; + narray = (uint32_t)(i+1); + if (narray && tvisnil(&array[0])) one = 4; + } + if (t->hmask > 0) { /* Count number of used hash slots. */ + uint32_t i, hmask = t->hmask; + Node *node = noderef(t->node); + for (i = 0; i <= hmask; i++) + nhash += !tvisnil(&node[i].val); + } + /* Write metatable index. */ + if (LJ_UNLIKELY(tabref(sbx->dict_mt)) && tabref(t->metatable)) { + TValue mto; + Node *n; + settabV(sbufL(sbx), &mto, tabref(t->metatable)); + n = hashgcref(tabref(sbx->dict_mt), mto.gcr); + do { + if (n->key.u64 == mto.u64) { + uint32_t idx = n->val.u32.lo; + w = serialize_more(w, sbx, 1+5); + *w++ = SER_TAG_DICT_MT; + w = serialize_wu124(w, idx); + break; + } + } while ((n = nextnode(n))); + } + /* Write number of array slots and hash slots. */ + w = serialize_more(w, sbx, 1+2*5); + *w++ = (char)(SER_TAG_TAB + (nhash ? 1 : 0) + (narray ? one : 0)); + if (narray) w = serialize_wu124(w, narray); + if (nhash) w = serialize_wu124(w, nhash); + if (narray) { /* Write array entries. */ + cTValue *oa = tvref(t->array) + (one >> 2); + cTValue *oe = tvref(t->array) + narray; + while (oa < oe) w = serialize_put(w, sbx, oa++); + } + if (nhash) { /* Write hash entries. */ + const Node *node = noderef(t->node) + t->hmask; + GCtab *dict_str = tabref(sbx->dict_str); + if (LJ_UNLIKELY(dict_str)) { + for (;; node--) + if (!tvisnil(&node->val)) { + if (LJ_LIKELY(tvisstr(&node->key))) { + /* Inlined lj_tab_getstr is 30% faster. */ + const GCstr *str = strV(&node->key); + Node *n = hashstr(dict_str, str); + do { + if (tvisstr(&n->key) && strV(&n->key) == str) { + uint32_t idx = n->val.u32.lo; + w = serialize_more(w, sbx, 1+5); + *w++ = SER_TAG_DICT_STR; + w = serialize_wu124(w, idx); + break; + } + n = nextnode(n); + if (!n) { + MSize len = str->len; + w = serialize_more(w, sbx, 5+len); + w = serialize_wu124(w, SER_TAG_STR + len); + w = lj_buf_wmem(w, strdata(str), len); + break; + } + } while (1); + } else { + w = serialize_put(w, sbx, &node->key); + } + w = serialize_put(w, sbx, &node->val); + if (--nhash == 0) break; + } + } else { + for (;; node--) + if (!tvisnil(&node->val)) { + w = serialize_put(w, sbx, &node->key); + w = serialize_put(w, sbx, &node->val); + if (--nhash == 0) break; + } + } + } + sbx->depth++; +#if LJ_HASFFI + } else if (tviscdata(o)) { + CTState *cts = ctype_cts(sbufL(sbx)); + CType *s = ctype_raw(cts, cdataV(o)->ctypeid); + uint8_t *sp = cdataptr(cdataV(o)); + if (ctype_isinteger(s->info) && s->size == 8) { + w = serialize_more(w, sbx, 1+8); + *w++ = (s->info & CTF_UNSIGNED) ? SER_TAG_UINT64 : SER_TAG_INT64; +#if LJ_BE + { uint64_t u = lj_bswap64(*(uint64_t *)sp); memcpy(w, &u, 8); } +#else + memcpy(w, sp, 8); +#endif + w += 8; + } else if (ctype_iscomplex(s->info) && s->size == 16) { + w = serialize_more(w, sbx, 1+16); + *w++ = SER_TAG_COMPLEX; +#if LJ_BE + { /* Only swap the doubles. The re/im order stays the same. */ + uint64_t u = lj_bswap64(((uint64_t *)sp)[0]); memcpy(w, &u, 8); + u = lj_bswap64(((uint64_t *)sp)[1]); memcpy(w+8, &u, 8); + } +#else + memcpy(w, sp, 16); +#endif + w += 16; + } else { + goto badenc; /* NYI other cdata */ + } +#endif + } else if (tvislightud(o)) { + uintptr_t ud = (uintptr_t)lightudV(G(sbufL(sbx)), o); + w = serialize_more(w, sbx, 1+sizeof(ud)); + if (ud == 0) { + *w++ = SER_TAG_NULL; + } else if (LJ_32 || checku32(ud)) { +#if LJ_BE && LJ_64 + ud = lj_bswap64(ud); +#elif LJ_BE + ud = lj_bswap(ud); +#endif + *w++ = SER_TAG_LIGHTUD32; memcpy(w, &ud, 4); w += 4; +#if LJ_64 + } else { +#if LJ_BE + ud = lj_bswap64(ud); +#endif + *w++ = SER_TAG_LIGHTUD64; memcpy(w, &ud, 8); w += 8; +#endif + } + } else { + /* NYI userdata */ +#if LJ_HASFFI + badenc: +#endif + lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADENC, lj_typename(o)); + } + return w; +} + +/* Get serialized object from buffer. */ +static char *serialize_get(char *r, SBufExt *sbx, TValue *o) +{ + char *w = sbx->w; + uint32_t tp; + r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob; + if (LJ_LIKELY(tp >= SER_TAG_STR)) { + uint32_t len = tp - SER_TAG_STR; + if (LJ_UNLIKELY(len > (uint32_t)(w - r))) goto eob; + setstrV(sbufL(sbx), o, lj_str_new(sbufL(sbx), r, len)); + r += len; + } else if (tp == SER_TAG_INT) { + if (LJ_UNLIKELY(r + 4 > w)) goto eob; + setintV(o, (int32_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r))); + r += 4; + } else if (tp == SER_TAG_NUM) { + if (LJ_UNLIKELY(r + 8 > w)) goto eob; + memcpy(o, r, 8); r += 8; +#if LJ_BE + o->u64 = lj_bswap64(o->u64); +#endif + if (!tvisnum(o)) setnanV(o); /* Fix non-canonical NaNs. */ + } else if (tp <= SER_TAG_TRUE) { + setpriV(o, ~tp); + } else if (tp == SER_TAG_DICT_STR) { + GCtab *dict_str; + uint32_t idx; + r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob; + idx++; + dict_str = tabref(sbx->dict_str); + if (dict_str && idx < dict_str->asize && tvisstr(arrayslot(dict_str, idx))) + copyTV(sbufL(sbx), o, arrayslot(dict_str, idx)); + else + lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx); + } else if (tp >= SER_TAG_TAB && tp <= SER_TAG_DICT_MT) { + uint32_t narray = 0, nhash = 0; + GCtab *t, *mt = NULL; + if (sbx->depth <= 0) lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DEPTH); + sbx->depth--; + if (tp == SER_TAG_DICT_MT) { + GCtab *dict_mt; + uint32_t idx; + r = serialize_ru124(r, w, &idx); if (LJ_UNLIKELY(!r)) goto eob; + idx++; + dict_mt = tabref(sbx->dict_mt); + if (dict_mt && idx < dict_mt->asize && tvistab(arrayslot(dict_mt, idx))) + mt = tabV(arrayslot(dict_mt, idx)); + else + lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx); + r = serialize_ru124(r, w, &tp); if (LJ_UNLIKELY(!r)) goto eob; + if (!(tp >= SER_TAG_TAB && tp < SER_TAG_DICT_MT)) goto badtag; + } + if (tp >= SER_TAG_TAB+2) { + r = serialize_ru124(r, w, &narray); if (LJ_UNLIKELY(!r)) goto eob; + } + if ((tp & 1)) { + r = serialize_ru124(r, w, &nhash); if (LJ_UNLIKELY(!r)) goto eob; + } + t = lj_tab_new(sbufL(sbx), narray, hsize2hbits(nhash)); + /* NOBARRIER: The table is new (marked white). */ + setgcref(t->metatable, obj2gco(mt)); + settabV(sbufL(sbx), o, t); + if (narray) { + TValue *oa = tvref(t->array) + (tp >= SER_TAG_TAB+4); + TValue *oe = tvref(t->array) + narray; + while (oa < oe) r = serialize_get(r, sbx, oa++); + } + if (nhash) { + do { + TValue k, *v; + r = serialize_get(r, sbx, &k); + v = lj_tab_set(sbufL(sbx), t, &k); + if (LJ_UNLIKELY(!tvisnil(v))) + lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_DUPKEY); + r = serialize_get(r, sbx, v); + } while (--nhash); + } + sbx->depth++; +#if LJ_HASFFI + } else if (tp >= SER_TAG_INT64 && tp <= SER_TAG_COMPLEX) { + uint32_t sz = tp == SER_TAG_COMPLEX ? 16 : 8; + GCcdata *cd; + if (LJ_UNLIKELY(r + sz > w)) goto eob; + if (LJ_UNLIKELY(!ctype_ctsG(G(sbufL(sbx))))) goto badtag; + cd = lj_cdata_new_(sbufL(sbx), + tp == SER_TAG_INT64 ? CTID_INT64 : + tp == SER_TAG_UINT64 ? CTID_UINT64 : CTID_COMPLEX_DOUBLE, + sz); + memcpy(cdataptr(cd), r, sz); r += sz; +#if LJ_BE + *(uint64_t *)cdataptr(cd) = lj_bswap64(*(uint64_t *)cdataptr(cd)); + if (sz == 16) + ((uint64_t *)cdataptr(cd))[1] = lj_bswap64(((uint64_t *)cdataptr(cd))[1]); +#endif + if (sz == 16) { /* Fix non-canonical NaNs. */ + TValue *cdo = (TValue *)cdataptr(cd); + if (!tvisnum(&cdo[0])) setnanV(&cdo[0]); + if (!tvisnum(&cdo[1])) setnanV(&cdo[1]); + } + setcdataV(sbufL(sbx), o, cd); +#endif + } else if (tp <= (LJ_64 ? SER_TAG_LIGHTUD64 : SER_TAG_LIGHTUD32)) { + uintptr_t ud = 0; + if (tp == SER_TAG_LIGHTUD32) { + if (LJ_UNLIKELY(r + 4 > w)) goto eob; + ud = (uintptr_t)(LJ_BE ? lj_bswap(lj_getu32(r)) : lj_getu32(r)); + r += 4; + } +#if LJ_64 + else if (tp == SER_TAG_LIGHTUD64) { + if (LJ_UNLIKELY(r + 8 > w)) goto eob; + memcpy(&ud, r, 8); r += 8; +#if LJ_BE + ud = lj_bswap64(ud); +#endif + } + setrawlightudV(o, lj_lightud_intern(sbufL(sbx), (void *)ud)); +#else + setrawlightudV(o, (void *)ud); +#endif + } else { +badtag: + lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDEC, tp); + } + return r; +eob: + lj_err_caller(sbufL(sbx), LJ_ERR_BUFFER_EOB); + return NULL; +} + +/* -- External serialization API ------------------------------------------ */ + +/* Encode to buffer. */ +SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o) +{ + sbx->depth = LJ_SERIALIZE_DEPTH; + sbx->w = serialize_put(sbx->w, sbx, o); + return sbx; +} + +/* Decode from buffer. */ +char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o) +{ + sbx->depth = LJ_SERIALIZE_DEPTH; + return serialize_get(sbx->r, sbx, o); +} + +/* Stand-alone encoding, borrowing from global temporary buffer. */ +GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o) +{ + SBufExt sbx; + char *w; + memset(&sbx, 0, sizeof(SBufExt)); + lj_bufx_set_borrow(L, &sbx, &G(L)->tmpbuf); + sbx.depth = LJ_SERIALIZE_DEPTH; + w = serialize_put(sbx.w, &sbx, o); + return lj_str_new(L, sbx.b, (size_t)(w - sbx.b)); +} + +/* Stand-alone decoding, copy-on-write from string. */ +void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str) +{ + SBufExt sbx; + char *r; + memset(&sbx, 0, sizeof(SBufExt)); + lj_bufx_set_cow(L, &sbx, strdata(str), str->len); + /* No need to set sbx.cowref here. */ + sbx.depth = LJ_SERIALIZE_DEPTH; + r = serialize_get(sbx.r, &sbx, o); + if (r != sbx.w) lj_err_caller(L, LJ_ERR_BUFFER_LEFTOV); +} + +#if LJ_HASJIT +/* Peek into buffer to find the result IRType for specialization purposes. */ +LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx) +{ + uint32_t tp; + if (serialize_ru124(sbx->r, sbx->w, &tp)) { + /* This must match the handling of all tags in the decoder above. */ + switch (tp) { + case SER_TAG_NIL: return IRT_NIL; + case SER_TAG_FALSE: return IRT_FALSE; + case SER_TAG_TRUE: return IRT_TRUE; + case SER_TAG_NULL: case SER_TAG_LIGHTUD32: case SER_TAG_LIGHTUD64: + return IRT_LIGHTUD; + case SER_TAG_INT: return LJ_DUALNUM ? IRT_INT : IRT_NUM; + case SER_TAG_NUM: return IRT_NUM; + case SER_TAG_TAB: case SER_TAG_TAB+1: case SER_TAG_TAB+2: + case SER_TAG_TAB+3: case SER_TAG_TAB+4: case SER_TAG_TAB+5: + case SER_TAG_DICT_MT: + return IRT_TAB; + case SER_TAG_INT64: case SER_TAG_UINT64: case SER_TAG_COMPLEX: + return IRT_CDATA; + case SER_TAG_DICT_STR: + default: + return IRT_STR; + } + } + return IRT_NIL; /* Will fail on actual decode. */ +} +#endif + +#endif diff --git a/src/lj_serialize.h b/src/lj_serialize.h new file mode 100644 index 00000000..d3f4275a --- /dev/null +++ b/src/lj_serialize.h @@ -0,0 +1,28 @@ +/* +** Object de/serialization. +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_SERIALIZE_H +#define _LJ_SERIALIZE_H + +#include "lj_obj.h" +#include "lj_buf.h" + +#if LJ_HASBUFFER + +#define LJ_SERIALIZE_DEPTH 100 /* Default depth. */ + +LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_str(lua_State *L, GCtab *dict); +LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep_mt(lua_State *L, GCtab *dict); +LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o); +LJ_FUNC char * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o); +LJ_FUNC GCstr * LJ_FASTCALL lj_serialize_encode(lua_State *L, cTValue *o); +LJ_FUNC void lj_serialize_decode(lua_State *L, TValue *o, GCstr *str); +#if LJ_HASJIT +LJ_FUNC MSize LJ_FASTCALL lj_serialize_peektype(SBufExt *sbx); +#endif + +#endif + +#endif diff --git a/src/lj_snap.c b/src/lj_snap.c index e2da4b3e..4140fdb7 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -68,20 +68,37 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) for (s = 0; s < nslots; s++) { TRef tr = J->slot[s]; IRRef ref = tref_ref(tr); +#if LJ_FR2 + if (s == 1) { /* Ignore slot 1 in LJ_FR2 mode, except if tailcalled. */ + if ((tr & TREF_FRAME)) + map[n++] = SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL); + continue; + } + if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) { + cTValue *base = J->L->base - J->baseslot; + tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64); + ref = tref_ref(tr); + } +#endif if (ref) { SnapEntry sn = SNAP_TR(s, tr); IRIns *ir = &J->cur.ir[ref]; - if (!(sn & (SNAP_CONT|SNAP_FRAME)) && + if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) && ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { - /* No need to snapshot unmodified non-inherited slots. */ - if (!(ir->op2 & IRSLOAD_INHERIT)) + /* + ** No need to snapshot unmodified non-inherited slots. + ** But always snapshot the function below a frame in LJ_FR2 mode. + */ + if (!(ir->op2 & IRSLOAD_INHERIT) && + (!LJ_FR2 || s == 0 || s+1 == nslots || + !(J->slot[s+1] & (TREF_CONT|TREF_FRAME)))) continue; /* No need to restore readonly slots and unmodified non-parent slots. */ if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) && (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) sn |= SNAP_NORESTORE; } - if (LJ_SOFTFP && irt_isnum(ir->t)) + if (LJ_SOFTFP32 && irt_isnum(ir->t)) sn |= SNAP_SOFTFPNUM; map[n++] = sn; } @@ -90,35 +107,54 @@ static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) } /* Add frame links at the end of the snapshot. */ -static BCReg snapshot_framelinks(jit_State *J, SnapEntry *map) +static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) { cTValue *frame = J->L->base - 1; - cTValue *lim = J->L->base - J->baseslot; - cTValue *ftop = frame + funcproto(frame_func(frame))->framesize; + cTValue *lim = J->L->base - J->baseslot + LJ_FR2; + GCfunc *fn = frame_func(frame); + cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; +#if LJ_FR2 + uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2); + lj_assertJ(2 <= J->baseslot && J->baseslot <= 257, "bad baseslot"); + memcpy(map, &pcbase, sizeof(uint64_t)); +#else MSize f = 0; map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ - lua_assert(!J->pt || +#endif + lj_assertJ(!J->pt || (J->pc >= proto_bc(J->pt) && - J->pc < proto_bc(J->pt) + J->pt->sizebc)); + J->pc < proto_bc(J->pt) + J->pt->sizebc), "bad snapshot PC"); while (frame > lim) { /* Backwards traversal of all frames above base. */ if (frame_islua(frame)) { +#if !LJ_FR2 map[f++] = SNAP_MKPC(frame_pc(frame)); +#endif frame = frame_prevl(frame); } else if (frame_iscont(frame)) { +#if !LJ_FR2 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); map[f++] = SNAP_MKPC(frame_contpc(frame)); +#endif frame = frame_prevd(frame); } else { - lua_assert(!frame_isc(frame)); + lj_assertJ(!frame_isc(frame), "broken frame chain"); +#if !LJ_FR2 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); +#endif frame = frame_prevd(frame); continue; } if (frame + funcproto(frame_func(frame))->framesize > ftop) ftop = frame + funcproto(frame_func(frame))->framesize; } - lua_assert(f == (MSize)(1 + J->framedepth)); - return (BCReg)(ftop - lim); + *topslot = (uint8_t)(ftop - lim); +#if LJ_FR2 + lj_assertJ(sizeof(SnapEntry) * 2 == sizeof(uint64_t), "bad SnapEntry def"); + return 2; +#else + lj_assertJ(f == (MSize)(1 + J->framedepth), "miscalculated snapshot size"); + return f; +#endif } /* Take a snapshot of the current stack. */ @@ -128,16 +164,17 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) MSize nent; SnapEntry *p; /* Conservative estimate. */ - lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth+1); + lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1)); p = &J->cur.snapmap[nsnapmap]; nent = snapshot_slots(J, p, nslots); - snap->topslot = (uint8_t)snapshot_framelinks(J, p + nent); + snap->nent = (uint8_t)nent; + nent += snapshot_framelinks(J, p + nent, &snap->topslot); snap->mapofs = (uint32_t)nsnapmap; snap->ref = (IRRef1)J->cur.nins; - snap->nent = (uint8_t)nent; + snap->mcofs = 0; snap->nslots = (uint8_t)nslots; snap->count = 0; - J->cur.nsnapmap = (uint32_t)(nsnapmap + nent + 1 + J->framedepth); + J->cur.nsnapmap = (uint32_t)(nsnapmap + nent); } /* Add or merge a snapshot. */ @@ -146,8 +183,8 @@ void lj_snap_add(jit_State *J) MSize nsnap = J->cur.nsnap; MSize nsnapmap = J->cur.nsnapmap; /* Merge if no ins. inbetween or if requested and no guard inbetween. */ - if (J->mergesnap ? !irt_isguard(J->guardemit) : - (nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) { + if ((nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins) || + (J->mergesnap && !irt_isguard(J->guardemit))) { if (nsnap == 1) { /* But preserve snap #0 PC. */ emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0); goto nomerge; @@ -194,7 +231,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, #define DEF_SLOT(s) udf[(s)] *= 3 /* Scan through following bytecode and check for uses/defs. */ - lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); + lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc, + "snapshot PC out of range"); for (;;) { BCIns ins = *pc++; BCOp op = bc_op(ins); @@ -205,7 +243,7 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, switch (bcmode_c(op)) { case BCMvar: USE_SLOT(bc_c(ins)); break; case BCMrbase: - lua_assert(op == BC_CAT); + lj_assertJ(op == BC_CAT, "unhandled op %d with RC rbase", op); for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s); for (; s < maxslot; s++) DEF_SLOT(s); break; @@ -245,7 +283,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, case BCMbase: if (op >= BC_CALLM && op <= BC_ITERN) { BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ? - maxslot : (bc_a(ins) + bc_c(ins)); + maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2); + if (LJ_FR2) DEF_SLOT(bc_a(ins)+1); s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0); for (; s < top; s++) USE_SLOT(s); for (; s < maxslot; s++) DEF_SLOT(s); @@ -263,7 +302,8 @@ static BCReg snap_usedef(jit_State *J, uint8_t *udf, break; default: break; } - lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc); + lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc, + "use/def analysis PC out of range"); } #undef USE_SLOT @@ -321,8 +361,8 @@ void lj_snap_shrink(jit_State *J) MSize n, m, nlim, nent = snap->nent; uint8_t udf[SNAP_USEDEF_SLOTS]; BCReg maxslot = J->maxslot; - BCReg minslot = snap_usedef(J, udf, snap_pc(map[nent]), maxslot); BCReg baseslot = J->baseslot; + BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot); if (minslot < maxslot) snap_useuv(J->pt, udf); maxslot += baseslot; minslot += baseslot; @@ -365,25 +405,26 @@ static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs) } /* Copy RegSP from parent snapshot to the parent links of the IR. */ -IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) +IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno, IRIns *ir) { SnapShot *snap = &T->snap[snapno]; SnapEntry *map = &T->snapmap[snap->mapofs]; BloomFilter rfilt = snap_renamefilter(T, snapno); MSize n = 0; IRRef ref = 0; + UNUSED(J); for ( ; ; ir++) { uint32_t rs; if (ir->o == IR_SLOAD) { if (!(ir->op2 & IRSLOAD_PARENT)) break; for ( ; ; n++) { - lua_assert(n < snap->nent); + lj_assertJ(n < snap->nent, "slot %d not found in snapshot", ir->op1); if (snap_slot(map[n]) == ir->op1) { ref = snap_ref(map[n++]); break; } } - } else if (LJ_SOFTFP && ir->o == IR_HIOP) { + } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) { ref++; } else if (ir->o == IR_PVAL) { ref = ir->op1 + REF_BIAS; @@ -394,7 +435,7 @@ IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir) if (bloomtest(rfilt, ref)) rs = snap_renameref(T, snapno, ref, rs); ir->prev = (uint16_t)rs; - lua_assert(regsp_used(rs)); + lj_assertJ(regsp_used(rs), "unused IR %04d in snapshot", ref - REF_BIAS); } return ir; } @@ -409,10 +450,10 @@ static TRef snap_replay_const(jit_State *J, IRIns *ir) case IR_KPRI: return TREF_PRI(irt_type(ir->t)); case IR_KINT: return lj_ir_kint(J, ir->i); case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); - case IR_KNUM: return lj_ir_k64(J, IR_KNUM, ir_knum(ir)); - case IR_KINT64: return lj_ir_k64(J, IR_KINT64, ir_kint64(ir)); + case IR_KNUM: case IR_KINT64: + return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64); case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ - default: lua_assert(0); return TREF_NIL; break; + default: lj_assertJ(0, "bad IR constant op %d", ir->o); return TREF_NIL; } } @@ -422,7 +463,7 @@ static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref) MSize j; for (j = 0; j < nmax; j++) if (snap_ref(map[j]) == ref) - return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME); + return J->slot[snap_slot(map[j])] & ~(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME); return 0; } @@ -483,21 +524,27 @@ void lj_snap_replay(jit_State *J, GCtrace *T) goto setslot; bloomset(seen, ref); if (irref_isk(ref)) { - tr = snap_replay_const(J, ir); + /* See special treatment of LJ_FR2 slot 1 in snapshot_slots() above. */ + if (LJ_FR2 && (sn == SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL))) + tr = 0; + else + tr = snap_replay_const(J, ir); } else if (!regsp_used(ir->prev)) { pass23 = 1; - lua_assert(s != 0); + lj_assertJ(s != 0, "unused slot 0 in snapshot"); tr = s; } else { IRType t = irt_type(ir->t); uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; - if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; + if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); + if ((sn & SNAP_KEYINDEX)) mode |= IRSLOAD_KEYINDEX; tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); } setslot: - J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ - J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s); + /* Same as TREF_* flags. */ + J->slot[s] = tr | (sn&(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME)); + J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2)); if ((sn & SNAP_FRAME)) J->baseslot = s+1; } @@ -512,8 +559,9 @@ void lj_snap_replay(jit_State *J, GCtrace *T) if (regsp_reg(ir->r) == RID_SUNK) { if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue; pass23 = 1; - lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || - ir->o == IR_CNEW || ir->o == IR_CNEWI); + lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP || + ir->o == IR_CNEW || ir->o == IR_CNEWI, + "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o); if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1); if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2); if (LJ_HASFFI && ir->o == IR_CNEWI) { @@ -525,13 +573,14 @@ void lj_snap_replay(jit_State *J, GCtrace *T) if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); - else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && + else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && irs+1 < irlast && (irs+1)->o == IR_HIOP) snap_pref(J, T, map, nent, seen, (irs+1)->op2); } } } else if (!irref_isk(refp) && !regsp_used(ir->prev)) { - lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); + lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, + "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o); J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1); } } @@ -581,20 +630,21 @@ void lj_snap_replay(jit_State *J, GCtrace *T) val = snap_pref(J, T, map, nent, seen, irs->op2); if (val == 0) { IRIns *irc = &T->ir[irs->op2]; - lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT); + lj_assertJ(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT, + "sunk store for parent IR %04d with bad op %d", + refp - REF_BIAS, irc->o); val = snap_pref(J, T, map, nent, seen, irc->op1); val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); - } else if ((LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) && + } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && irs+1 < irlast && (irs+1)->o == IR_HIOP) { IRType t = IRT_I64; - if (LJ_SOFTFP && irt_type((irs+1)->t) == IRT_SOFTFP) + if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP) t = IRT_NUM; lj_needsplit(J); if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { uint64_t k = (uint32_t)T->ir[irs->op2].i + ((uint64_t)T->ir[(irs+1)->op2].i << 32); - val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, - lj_ir_k64_find(J, k)); + val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k); } else { val = emitir_raw(IRT(IR_HIOP, t), val, snap_pref(J, T, map, nent, seen, (irs+1)->op2)); @@ -632,7 +682,14 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, IRType1 t = ir->t; RegSP rs = ir->prev; if (irref_isk(ref)) { /* Restore constant slot. */ - lj_ir_kvalue(J->L, o, ir); + if (ir->o == IR_KPTR) { + o->u64 = (uint64_t)(uintptr_t)ir_kptr(ir); + } else { + lj_assertJ(!(ir->o == IR_KKPTR || ir->o == IR_KNULL), + "restore of const from IR %04d with bad op %d", + ref - REF_BIAS, ir->o); + lj_ir_kvalue(J->L, o, ir); + } return; } if (LJ_UNLIKELY(bloomtest(rfilt, ref))) @@ -641,22 +698,24 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, int32_t *sps = &ex->spill[regsp_spill(rs)]; if (irt_isinteger(t)) { setintV(o, *sps); -#if !LJ_SOFTFP +#if !LJ_SOFTFP32 } else if (irt_isnum(t)) { o->u64 = *(uint64_t *)sps; #endif - } else if (LJ_64 && irt_islightud(t)) { +#if LJ_64 && !LJ_GC64 + } else if (irt_islightud(t)) { /* 64 bit lightuserdata which may escape already has the tag bits. */ o->u64 = *(uint64_t *)sps; +#endif } else { - lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */ - setgcrefi(o->gcr, *sps); - setitype(o, irt_toitype(t)); + lj_assertJ(!irt_ispri(t), "PRI ref with spill slot"); + setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); } } else { /* Restore from register. */ Reg r = regsp_reg(rs); if (ra_noreg(r)) { - lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); + lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, + "restore from IR %04d has no reg", ref - REF_BIAS); snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o); if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o)); return; @@ -665,21 +724,26 @@ static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, #if !LJ_SOFTFP } else if (irt_isnum(t)) { setnumV(o, ex->fpr[r-RID_MIN_FPR]); +#elif LJ_64 /* && LJ_SOFTFP */ + } else if (irt_isnum(t)) { + o->u64 = ex->gpr[r-RID_MIN_GPR]; #endif - } else if (LJ_64 && irt_islightud(t)) { - /* 64 bit lightuserdata which may escape already has the tag bits. */ +#if LJ_64 && !LJ_GC64 + } else if (irt_is64(t)) { + /* 64 bit values that already have the tag bits. */ o->u64 = ex->gpr[r-RID_MIN_GPR]; +#endif + } else if (irt_ispri(t)) { + setpriV(o, irt_toitype(t)); } else { - if (!irt_ispri(t)) - setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]); - setitype(o, irt_toitype(t)); + setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t)); } } } #if LJ_HASFFI /* Restore raw data from the trace exit state. */ -static void snap_restoredata(GCtrace *T, ExitState *ex, +static void snap_restoredata(jit_State *J, GCtrace *T, ExitState *ex, SnapNo snapno, BloomFilter rfilt, IRRef ref, void *dst, CTSize sz) { @@ -687,9 +751,10 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, RegSP rs = ir->prev; int32_t *src; uint64_t tmp; + UNUSED(J); if (irref_isk(ref)) { - if (ir->o == IR_KNUM || ir->o == IR_KINT64) { - src = mref(ir->ptr, int32_t); + if (ir_isk64(ir)) { + src = (int32_t *)&ir[1]; } else if (sz == 8) { tmp = (uint64_t)(uint32_t)ir->i; src = (int32_t *)&tmp; @@ -709,8 +774,9 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, Reg r = regsp_reg(rs); if (ra_noreg(r)) { /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */ - lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT); - snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4); + lj_assertJ(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, + "restore from IR %04d has no reg", ref - REF_BIAS); + snap_restoredata(J, T, ex, snapno, rfilt, ir->op1, dst, 4); *(lua_Number *)dst = (lua_Number)*(int32_t *)dst; return; } @@ -726,11 +792,13 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, #else if (LJ_BE && sz == 4) src++; #endif - } + } else #endif + if (LJ_64 && LJ_BE && sz == 4) src++; } } - lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8); + lj_assertJ(sz == 1 || sz == 2 || sz == 4 || sz == 8, + "restore from IR %04d with bad size %d", ref - REF_BIAS, sz); if (sz == 4) *(int32_t *)dst = *src; else if (sz == 8) *(int64_t *)dst = *(int64_t *)src; else if (sz == 1) *(int8_t *)dst = (int8_t)*src; @@ -743,24 +811,27 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, SnapNo snapno, BloomFilter rfilt, IRIns *ir, TValue *o) { - lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP || - ir->o == IR_CNEW || ir->o == IR_CNEWI); + lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP || + ir->o == IR_CNEW || ir->o == IR_CNEWI, + "sunk allocation with bad op %d", ir->o); #if LJ_HASFFI if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { CTState *cts = ctype_cts(J->L); CTypeID id = (CTypeID)T->ir[ir->op1].i; - CTSize sz = lj_ctype_size(cts, id); - GCcdata *cd = lj_cdata_new(cts, id, sz); + CTSize sz; + CTInfo info = lj_ctype_info(cts, id, &sz); + GCcdata *cd = lj_cdata_newx(cts, id, sz, info); setcdataV(J->L, o, cd); if (ir->o == IR_CNEWI) { uint8_t *p = (uint8_t *)cdataptr(cd); - lua_assert(sz == 4 || sz == 8); + lj_assertJ(sz == 4 || sz == 8, "sunk cdata with bad size %d", sz); if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) { - snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4); + snap_restoredata(J, T, ex, snapno, rfilt, (ir+1)->op2, + LJ_LE ? p+4 : p, 4); if (LJ_BE) p += 4; sz = 4; } - snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz); + snap_restoredata(J, T, ex, snapno, rfilt, ir->op2, p, sz); } else { IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref]; for (irs = ir+1; irs < irlast; irs++) @@ -768,8 +839,11 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, IRIns *iro = &T->ir[T->ir[irs->op1].op2]; uint8_t *p = (uint8_t *)cd; CTSize szs; - lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD); - lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64); + lj_assertJ(irs->o == IR_XSTORE, "sunk store with bad op %d", irs->o); + lj_assertJ(T->ir[irs->op1].o == IR_ADD, + "sunk store with bad add op %d", T->ir[irs->op1].o); + lj_assertJ(iro->o == IR_KINT || iro->o == IR_KINT64, + "sunk store with bad const offset op %d", iro->o); if (irt_is64(irs->t)) szs = 8; else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1; else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2; @@ -778,14 +852,16 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, p += (int64_t)ir_k64(iro)->u64; else p += iro->i; - lua_assert(p >= (uint8_t *)cdataptr(cd) && - p + szs <= (uint8_t *)cdataptr(cd) + sz); + lj_assertJ(p >= (uint8_t *)cdataptr(cd) && + p + szs <= (uint8_t *)cdataptr(cd) + sz, + "sunk store with offset out of range"); if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { - lua_assert(szs == 4); - snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4); + lj_assertJ(szs == 4, "sunk store with bad size %d", szs); + snap_restoredata(J, T, ex, snapno, rfilt, (irs+1)->op2, + LJ_LE ? p+4 : p, 4); if (LJ_BE) p += 4; } - snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs); + snap_restoredata(J, T, ex, snapno, rfilt, irs->op2, p, szs); } } } else @@ -800,10 +876,12 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { IRIns *irk = &T->ir[irs->op1]; TValue tmp, *val; - lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE || - irs->o == IR_FSTORE); + lj_assertJ(irs->o == IR_ASTORE || irs->o == IR_HSTORE || + irs->o == IR_FSTORE, + "sunk store with bad op %d", irs->o); if (irk->o == IR_FREF) { - lua_assert(irk->op2 == IRFL_TAB_META); + lj_assertJ(irk->op2 == IRFL_TAB_META, + "sunk store with bad field %d", irk->op2); snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); /* NOBARRIER: The table is new (marked white). */ setgcref(t->metatable, obj2gco(tabV(&tmp))); @@ -814,7 +892,7 @@ static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, val = lj_tab_set(J->L, t, &tmp); /* NOBARRIER: The table is new (marked white). */ snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); - if (LJ_SOFTFP && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { + if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); val->u32.hi = tmp.u32.lo; } @@ -832,11 +910,15 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) SnapShot *snap = &T->snap[snapno]; MSize n, nent = snap->nent; SnapEntry *map = &T->snapmap[snap->mapofs]; - SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1]; - int32_t ftsz0; +#if !LJ_FR2 || defined(LUA_USE_ASSERT) + SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2]; +#endif +#if !LJ_FR2 + ptrdiff_t ftsz0; +#endif TValue *frame; BloomFilter rfilt = snap_renamefilter(T, snapno); - const BCIns *pc = snap_pc(map[nent]); + const BCIns *pc = snap_pc(&map[nent]); lua_State *L = J->L; /* Set interpreter PC to the next PC to get correct error messages. */ @@ -849,8 +931,10 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) } /* Fill stack slots with data from the registers and spill slots. */ - frame = L->base-1; + frame = L->base-1-LJ_FR2; +#if !LJ_FR2 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ +#endif for (n = 0; n < nent; n++) { SnapEntry sn = map[n]; if (!(sn & SNAP_NORESTORE)) { @@ -869,18 +953,27 @@ const BCIns *lj_snap_restore(jit_State *J, void *exptr) continue; } snap_restoreval(J, T, ex, snapno, rfilt, ref, o); - if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { + if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { TValue tmp; snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); o->u32.hi = tmp.u32.lo; +#if !LJ_FR2 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { /* Overwrite tag with frame link. */ - o->fr.tp.ftsz = snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0; + setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); L->base = o+1; +#endif + } else if ((sn & SNAP_KEYINDEX)) { + /* A IRT_INT key index slot is restored as a number. Undo this. */ + o->u32.lo = (uint32_t)(LJ_DUALNUM ? intV(o) : lj_num2int(numV(o))); + o->u32.hi = LJ_KEYINDEX; } } } - lua_assert(map + nent == flinks); +#if LJ_FR2 + L->base += (map[nent+LJ_BE] & 0xff); +#endif + lj_assertJ(map + nent == flinks, "inconsistent frames in snapshot"); /* Compute current stack top. */ switch (bc_op(*pc)) { diff --git a/src/lj_snap.h b/src/lj_snap.h index 03cf9038..b7dabed8 100644 --- a/src/lj_snap.h +++ b/src/lj_snap.h @@ -13,7 +13,8 @@ LJ_FUNC void lj_snap_add(jit_State *J); LJ_FUNC void lj_snap_purge(jit_State *J); LJ_FUNC void lj_snap_shrink(jit_State *J); -LJ_FUNC IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir); +LJ_FUNC IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno, + IRIns *ir); LJ_FUNC void lj_snap_replay(jit_State *J, GCtrace *T); LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr); LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need); diff --git a/src/lj_state.c b/src/lj_state.c index 1e2cfde9..0b9c46ba 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -12,6 +12,7 @@ #include "lj_obj.h" #include "lj_gc.h" #include "lj_err.h" +#include "lj_buf.h" #include "lj_str.h" #include "lj_tab.h" #include "lj_func.h" @@ -24,8 +25,10 @@ #include "lj_trace.h" #include "lj_dispatch.h" #include "lj_vm.h" +#include "lj_prng.h" #include "lj_lex.h" #include "lj_alloc.h" +#include "luajit.h" /* -- Stack handling ------------------------------------------------------ */ @@ -47,6 +50,7 @@ ** one extra slot if mobj is not a function. Only lj_meta_tset needs 5 ** slots above top, but then mobj is always a function. So we can get by ** with 5 extra slots. +** LJ_FR2: We need 2 more slots for the frame PC and the continuation PC. */ /* Resize stack slots and adjust pointers in state. */ @@ -57,9 +61,10 @@ static void resizestack(lua_State *L, MSize n) MSize oldsize = L->stacksize; MSize realsize = n + 1 + LJ_STACK_EXTRA; GCobj *up; - lua_assert((MSize)(tvref(L->maxstack)-oldst)==L->stacksize-LJ_STACK_EXTRA-1); + lj_assertL((MSize)(tvref(L->maxstack)-oldst) == L->stacksize-LJ_STACK_EXTRA-1, + "inconsistent stack size"); st = (TValue *)lj_mem_realloc(L, tvref(L->stack), - (MSize)(L->stacksize*sizeof(TValue)), + (MSize)(oldsize*sizeof(TValue)), (MSize)(realsize*sizeof(TValue))); setmref(L->stack, st); delta = (char *)st - (char *)oldst; @@ -67,12 +72,12 @@ static void resizestack(lua_State *L, MSize n) while (oldsize < realsize) /* Clear new slots. */ setnilV(st + oldsize++); L->stacksize = realsize; + if ((size_t)(mref(G(L)->jit_base, char) - (char *)oldst) < oldsize) + setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta); L->base = (TValue *)((char *)L->base + delta); L->top = (TValue *)((char *)L->top + delta); for (up = gcref(L->openupval); up != NULL; up = gcnext(up)) setmref(gco2uv(up)->v, (TValue *)((char *)uvval(gco2uv(up)) + delta)); - if (obj2gco(L) == gcref(G(L)->jit_L)) - setmref(G(L)->jit_base, mref(G(L)->jit_base, char) + delta); } /* Relimit stack after error, in case the limit was overdrawn. */ @@ -89,7 +94,8 @@ void lj_state_shrinkstack(lua_State *L, MSize used) return; /* Avoid stack shrinking while handling stack overflow. */ if (4*used < L->stacksize && 2*(LJ_STACK_START+LJ_STACK_EXTRA) < L->stacksize && - obj2gco(L) != gcref(G(L)->jit_L)) /* Don't shrink stack of live trace. */ + /* Don't shrink stack of live trace. */ + (tvref(G(L)->jit_base) == NULL || obj2gco(L) != gcref(G(L)->cur_L))) resizestack(L, L->stacksize >> 1); } @@ -125,8 +131,9 @@ static void stack_init(lua_State *L1, lua_State *L) L1->stacksize = LJ_STACK_START + LJ_STACK_EXTRA; stend = st + L1->stacksize; setmref(L1->maxstack, stend - LJ_STACK_EXTRA - 1); - L1->base = L1->top = st+1; - setthreadV(L1, st, L1); /* Needed for curr_funcisL() on empty stack. */ + setthreadV(L1, st++, L1); /* Needed for curr_funcisL() on empty stack. */ + if (LJ_FR2) setnilV(st++); + L1->base = L1->top = st; while (st < stend) /* Clear new slots. */ setnilV(st++); } @@ -143,12 +150,13 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud) /* NOBARRIER: State initialization, all objects are white. */ setgcref(L->env, obj2gco(lj_tab_new(L, 0, LJ_MIN_GLOBAL))); settabV(L, registry(L), lj_tab_new(L, 0, LJ_MIN_REGISTRY)); - lj_str_resize(L, LJ_MIN_STRTAB-1); + lj_str_init(L); lj_meta_init(L); lj_lex_init(L); fixstring(lj_err_str(L, LJ_ERR_ERRMEM)); /* Preallocate memory error msg. */ g->gc.threshold = 4*g->gc.total; lj_trace_initstate(g); + lj_err_verify(); return NULL; } @@ -157,16 +165,25 @@ static void close_state(lua_State *L) global_State *g = G(L); lj_func_closeuv(L, tvref(L->stack)); lj_gc_freeall(g); - lua_assert(gcref(g->gc.root) == obj2gco(L)); - lua_assert(g->strnum == 0); + lj_assertG(gcref(g->gc.root) == obj2gco(L), + "main thread is not first GC object"); + lj_assertG(g->str.num == 0, "leaked %d strings", g->str.num); lj_trace_freestate(g); #if LJ_HASFFI lj_ctype_freestate(g); #endif - lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); - lj_str_freebuf(g, &g->tmpbuf); + lj_str_freetab(g); + lj_buf_free(g, &g->tmpbuf); lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); - lua_assert(g->gc.total == sizeof(GG_State)); +#if LJ_64 + if (mref(g->gc.lightudseg, uint32_t)) { + MSize segnum = g->gc.lightudnum ? (2 << lj_fls(g->gc.lightudnum)) : 2; + lj_mem_freevec(g, mref(g->gc.lightudseg, uint32_t), segnum, uint32_t); + } +#endif + lj_assertG(g->gc.total == sizeof(GG_State), + "memory leak of %lld bytes", + (long long)(g->gc.total - sizeof(GG_State))); #ifndef LUAJIT_USE_SYSMALLOC if (g->allocf == lj_alloc_f) lj_alloc_destroy(g->allocd); @@ -175,17 +192,34 @@ static void close_state(lua_State *L) g->allocf(g->allocd, G2GG(g), sizeof(GG_State), 0); } -#if LJ_64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) -lua_State *lj_state_newstate(lua_Alloc f, void *ud) +#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) +lua_State *lj_state_newstate(lua_Alloc allocf, void *allocd) #else -LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) +LUA_API lua_State *lua_newstate(lua_Alloc allocf, void *allocd) #endif { - GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State)); - lua_State *L = &GG->L; - global_State *g = &GG->g; - if (GG == NULL || !checkptr32(GG)) return NULL; + PRNGState prng; + GG_State *GG; + lua_State *L; + global_State *g; + /* We need the PRNG for the memory allocator, so initialize this first. */ + if (!lj_prng_seed_secure(&prng)) { + lj_assertX(0, "secure PRNG seeding failed"); + /* Can only return NULL here, so this errors with "not enough memory". */ + return NULL; + } +#ifndef LUAJIT_USE_SYSMALLOC + if (allocf == LJ_ALLOCF_INTERNAL) { + allocd = lj_alloc_create(&prng); + if (!allocd) return NULL; + allocf = lj_alloc_f; + } +#endif + GG = (GG_State *)allocf(allocd, NULL, 0, sizeof(GG_State)); + if (GG == NULL || !checkptrGC(GG)) return NULL; memset(GG, 0, sizeof(GG_State)); + L = &GG->L; + g = &GG->g; L->gct = ~LJ_TTHREAD; L->marked = LJ_GC_WHITE0 | LJ_GC_FIXED | LJ_GC_SFIXED; /* Prevent free. */ L->dummy_ffid = FF_C; @@ -193,17 +227,25 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) g->gc.currentwhite = LJ_GC_WHITE0 | LJ_GC_FIXED; g->strempty.marked = LJ_GC_WHITE0; g->strempty.gct = ~LJ_TSTR; - g->allocf = f; - g->allocd = ud; + g->allocf = allocf; + g->allocd = allocd; + g->prng = prng; +#ifndef LUAJIT_USE_SYSMALLOC + if (allocf == lj_alloc_f) { + lj_alloc_setprng(allocd, &g->prng); + } +#endif setgcref(g->mainthref, obj2gco(L)); setgcref(g->uvhead.prev, obj2gco(&g->uvhead)); setgcref(g->uvhead.next, obj2gco(&g->uvhead)); - g->strmask = ~(MSize)0; + g->str.mask = ~(MSize)0; setnilV(registry(L)); setnilV(&g->nilnode.val); setnilV(&g->nilnode.key); +#if !LJ_GC64 setmref(g->nilnode.freetop, &g->nilnode); - lj_str_initbuf(&g->tmpbuf); +#endif + lj_buf_init(NULL, &g->tmpbuf); g->gc.state = GCSpause; setgcref(g->gc.root, obj2gco(L)); setmref(g->gc.sweep, &g->gc.root); @@ -217,7 +259,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) close_state(L); return NULL; } - L->status = 0; + L->status = LUA_OK; return L; } @@ -236,6 +278,10 @@ LUA_API void lua_close(lua_State *L) global_State *g = G(L); int i; L = mainthread(g); /* Only the main thread can be closed. */ +#if LJ_HASPROFILE + luaJIT_profile_stop(L); +#endif + setgcrefnull(g->cur_L); lj_func_closeuv(L, tvref(L->stack)); lj_gc_separateudata(g, 1); /* Separate udata which have GC metamethods. */ #if LJ_HASJIT @@ -245,10 +291,10 @@ LUA_API void lua_close(lua_State *L) #endif for (i = 0;;) { hook_enter(g); - L->status = 0; + L->status = LUA_OK; + L->base = L->top = tvref(L->stack) + 1 + LJ_FR2; L->cframe = NULL; - L->base = L->top = tvref(L->stack) + 1; - if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == 0) { + if (lj_vm_cpcall(L, NULL, NULL, cpfinalize) == LUA_OK) { if (++i >= 10) break; lj_gc_separateudata(g, 1); /* Separate udata again. */ if (gcref(g->gc.mmudata) == NULL) /* Until nothing is left to do. */ @@ -263,7 +309,7 @@ lua_State *lj_state_new(lua_State *L) lua_State *L1 = lj_mem_newobj(L, lua_State); L1->gct = ~LJ_TTHREAD; L1->dummy_ffid = FF_C; - L1->status = 0; + L1->status = LUA_OK; L1->stacksize = 0; setmref(L1->stack, NULL); L1->cframe = NULL; @@ -272,15 +318,17 @@ lua_State *lj_state_new(lua_State *L) setmrefr(L1->glref, L->glref); setgcrefr(L1->env, L->env); stack_init(L1, L); /* init stack */ - lua_assert(iswhite(obj2gco(L1))); + lj_assertL(iswhite(obj2gco(L1)), "new thread object is not white"); return L1; } void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L) { - lua_assert(L != mainthread(g)); + lj_assertG(L != mainthread(g), "free of main thread"); + if (obj2gco(L) == gcref(g->cur_L)) + setgcrefnull(g->cur_L); lj_func_closeuv(L, tvref(L->stack)); - lua_assert(gcref(L->openupval) == NULL); + lj_assertG(gcref(L->openupval) == NULL, "stale open upvalues"); lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); lj_mem_freet(g, L); } diff --git a/src/lj_state.h b/src/lj_state.h index 48c4d700..d22b7a6f 100644 --- a/src/lj_state.h +++ b/src/lj_state.h @@ -28,8 +28,10 @@ static LJ_AINLINE void lj_state_checkstack(lua_State *L, MSize need) LJ_FUNC lua_State *lj_state_new(lua_State *L); LJ_FUNC void LJ_FASTCALL lj_state_free(global_State *g, lua_State *L); -#if LJ_64 +#if LJ_64 && !LJ_GC64 && !(defined(LUAJIT_USE_VALGRIND) && defined(LUAJIT_USE_SYSMALLOC)) LJ_FUNC lua_State *lj_state_newstate(lua_Alloc f, void *ud); #endif +#define LJ_ALLOCF_INTERNAL ((lua_Alloc)(void *)(uintptr_t)(1237<<4)) + #endif diff --git a/src/lj_str.c b/src/lj_str.c index 60912aed..a5282da6 100644 --- a/src/lj_str.c +++ b/src/lj_str.c @@ -1,13 +1,8 @@ /* ** String handling. ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h -** -** Portions taken verbatim or adapted from the Lua interpreter. -** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h */ -#include <stdio.h> - #define lj_str_c #define LUA_CORE @@ -15,10 +10,10 @@ #include "lj_gc.h" #include "lj_err.h" #include "lj_str.h" -#include "lj_state.h" #include "lj_char.h" +#include "lj_prng.h" -/* -- String interning ---------------------------------------------------- */ +/* -- String helpers ------------------------------------------------------ */ /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) @@ -43,297 +38,333 @@ int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) return (int32_t)(a->len - b->len); } -/* Fast string data comparison. Caveat: unaligned access to 1st string! */ -static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) +/* Find fixed string p inside string s. */ +const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen) { - MSize i = 0; - lua_assert(len > 0); - lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4); - do { /* Note: innocuous access up to end of string + 3. */ - uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i); - if (v) { - i -= len; -#if LJ_LE - return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1; -#else - return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1; -#endif + if (plen <= slen) { + if (plen == 0) { + return s; + } else { + int c = *(const uint8_t *)p++; + plen--; slen -= plen; + while (slen) { + const char *q = (const char *)memchr(s, c, slen); + if (!q) break; + if (memcmp(q+1, p, plen) == 0) return q; + q++; slen -= (MSize)(q-s); s = q; + } } - i += 4; - } while (i < len); - return 0; + } + return NULL; } -/* Resize the string hash table (grow and shrink). */ -void lj_str_resize(lua_State *L, MSize newmask) +/* Check whether a string has a pattern matching character. */ +int lj_str_haspattern(GCstr *s) { - global_State *g = G(L); - GCRef *newhash; - MSize i; - if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) - return; /* No resizing during GC traversal or if already too big. */ - newhash = lj_mem_newvec(L, newmask+1, GCRef); - memset(newhash, 0, (newmask+1)*sizeof(GCRef)); - for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */ - GCobj *p = gcref(g->strhash[i]); - while (p) { /* Follow each hash chain and reinsert all strings. */ - MSize h = gco2str(p)->hash & newmask; - GCobj *next = gcnext(p); - /* NOBARRIER: The string table is a GC root. */ - setgcrefr(p->gch.nextgc, newhash[h]); - setgcref(newhash[h], p); - p = next; - } + const char *p = strdata(s), *q = p + s->len; + while (p < q) { + int c = *(const uint8_t *)p++; + if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c)) + return 1; /* Found a pattern matching char. */ } - lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); - g->strmask = newmask; - g->strhash = newhash; + return 0; /* No pattern matching chars found. */ } -/* Intern a string and return string object. */ -GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) +/* -- String hashing ------------------------------------------------------ */ + +/* Keyed sparse ARX string hash. Constant time. */ +static StrHash hash_sparse(uint64_t seed, const char *str, MSize len) { - global_State *g; - GCstr *s; - GCobj *o; - MSize len = (MSize)lenx; - MSize a, b, h = len; - if (lenx >= LJ_MAX_STR) - lj_err_msg(L, LJ_ERR_STROV); - g = G(L); - /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */ + /* Constants taken from lookup3 hash by Bob Jenkins. */ + StrHash a, b, h = len ^ (StrHash)seed; if (len >= 4) { /* Caveat: unaligned access! */ a = lj_getu32(str); h ^= lj_getu32(str+len-4); b = lj_getu32(str+(len>>1)-2); h ^= b; h -= lj_rol(b, 14); b += lj_getu32(str+(len>>2)-1); - } else if (len > 0) { + } else { a = *(const uint8_t *)str; h ^= *(const uint8_t *)(str+len-1); b = *(const uint8_t *)(str+(len>>1)); h ^= b; h -= lj_rol(b, 14); - } else { - return &g->strempty; } a ^= h; a -= lj_rol(h, 11); b ^= a; b -= lj_rol(a, 25); h ^= b; h -= lj_rol(b, 16); - /* Check if the string has already been interned. */ - o = gcref(g->strhash[h & g->strmask]); - if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) { - while (o != NULL) { - GCstr *sx = gco2str(o); - if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) { - /* Resurrect if dead. Can only happen with fixstring() (keywords). */ - if (isdead(g, o)) flipwhite(o); - return sx; /* Return existing string. */ - } - o = gcnext(o); - } - } else { /* Slow path: end of string is too close to a page boundary. */ - while (o != NULL) { - GCstr *sx = gco2str(o); - if (sx->len == len && memcmp(str, strdata(sx), len) == 0) { - /* Resurrect if dead. Can only happen with fixstring() (keywords). */ - if (isdead(g, o)) flipwhite(o); - return sx; /* Return existing string. */ - } - o = gcnext(o); - } - } - /* Nope, create a new string. */ - s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr); - newwhite(g, s); - s->gct = ~LJ_TSTR; - s->len = len; - s->hash = h; - s->reserved = 0; - memcpy(strdatawr(s), str, len); - strdatawr(s)[len] = '\0'; /* Zero-terminate string. */ - /* Add it to string hash table. */ - h &= g->strmask; - s->nextgc = g->strhash[h]; - /* NOBARRIER: The string table is a GC root. */ - setgcref(g->strhash[h], obj2gco(s)); - if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */ - lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */ - return s; /* Return newly interned string. */ + return h; } -void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) +#if LUAJIT_SECURITY_STRHASH +/* Keyed dense ARX string hash. Linear time. */ +static LJ_NOINLINE StrHash hash_dense(uint64_t seed, StrHash h, + const char *str, MSize len) { - g->strnum--; - lj_mem_free(g, s, sizestring(s)); + StrHash b = lj_bswap(lj_rol(h ^ (StrHash)(seed >> 32), 4)); + if (len > 12) { + StrHash a = (StrHash)seed; + const char *pe = str+len-12, *p = pe, *q = str; + do { + a += lj_getu32(p); + b += lj_getu32(p+4); + h += lj_getu32(p+8); + p = q; q += 12; + h ^= b; h -= lj_rol(b, 14); + a ^= h; a -= lj_rol(h, 11); + b ^= a; b -= lj_rol(a, 25); + } while (p < pe); + h ^= b; h -= lj_rol(b, 16); + a ^= h; a -= lj_rol(h, 4); + b ^= a; b -= lj_rol(a, 14); + } + return b; } +#endif -/* -- Type conversions ---------------------------------------------------- */ +/* -- String interning ---------------------------------------------------- */ -/* Print number to buffer. Canonicalizes non-finite values. */ -size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o) -{ - if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */ - lua_Number n = o->n; -#if __BIONIC__ - if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; } -#endif - return (size_t)lua_number2str(s, n); - } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) { - s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3; - } else if ((o->u32.hi & 0x80000000) == 0) { - s[0] = 'i'; s[1] = 'n'; s[2] = 'f'; return 3; - } else { - s[0] = '-'; s[1] = 'i'; s[2] = 'n'; s[3] = 'f'; return 4; - } -} +#define LJ_STR_MAXCOLL 32 -/* Print integer to buffer. Returns pointer to start. */ -char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k) +/* Resize the string interning hash table (grow and shrink). */ +void lj_str_resize(lua_State *L, MSize newmask) { - uint32_t u = (uint32_t)(k < 0 ? -k : k); - p += 1+10; - do { *--p = (char)('0' + u % 10); } while (u /= 10); - if (k < 0) *--p = '-'; - return p; -} + global_State *g = G(L); + GCRef *newtab, *oldtab = g->str.tab; + MSize i; -/* Convert number to string. */ -GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np) -{ - char buf[LJ_STR_NUMBUF]; - size_t len = lj_str_bufnum(buf, (TValue *)np); - return lj_str_new(L, buf, len); -} + /* No resizing during GC traversal or if already too big. */ + if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) + return; -/* Convert integer to string. */ -GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k) -{ - char s[1+10]; - char *p = lj_str_bufint(s, k); - return lj_str_new(L, p, (size_t)(s+sizeof(s)-p)); -} + newtab = lj_mem_newvec(L, newmask+1, GCRef); + memset(newtab, 0, (newmask+1)*sizeof(GCRef)); -GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o) -{ - return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n); -} +#if LUAJIT_SECURITY_STRHASH + /* Check which chains need secondary hashes. */ + if (g->str.second) { + int newsecond = 0; + /* Compute primary chain lengths. */ + for (i = g->str.mask; i != ~(MSize)0; i--) { + GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1); + while (o) { + GCstr *s = gco2str(o); + MSize hash = s->hashalg ? hash_sparse(g->str.seed, strdata(s), s->len) : + s->hash; + hash &= newmask; + setgcrefp(newtab[hash], gcrefu(newtab[hash]) + 1); + o = gcnext(o); + } + } + /* Mark secondary chains. */ + for (i = newmask; i != ~(MSize)0; i--) { + int secondary = gcrefu(newtab[i]) > LJ_STR_MAXCOLL; + newsecond |= secondary; + setgcrefp(newtab[i], secondary); + } + g->str.second = newsecond; + } +#endif -/* -- String formatting --------------------------------------------------- */ + /* Reinsert all strings from the old table into the new table. */ + for (i = g->str.mask; i != ~(MSize)0; i--) { + GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1); + while (o) { + GCobj *next = gcnext(o); + GCstr *s = gco2str(o); + MSize hash = s->hash; +#if LUAJIT_SECURITY_STRHASH + uintptr_t u; + if (LJ_LIKELY(!s->hashalg)) { /* String hashed with primary hash. */ + hash &= newmask; + u = gcrefu(newtab[hash]); + if (LJ_UNLIKELY(u & 1)) { /* Switch string to secondary hash. */ + s->hash = hash = hash_dense(g->str.seed, s->hash, strdata(s), s->len); + s->hashalg = 1; + hash &= newmask; + u = gcrefu(newtab[hash]); + } + } else { /* String hashed with secondary hash. */ + MSize shash = hash_sparse(g->str.seed, strdata(s), s->len); + u = gcrefu(newtab[shash & newmask]); + if (u & 1) { + hash &= newmask; + u = gcrefu(newtab[hash]); + } else { /* Revert string back to primary hash. */ + s->hash = shash; + s->hashalg = 0; + hash = (shash & newmask); + } + } + /* NOBARRIER: The string table is a GC root. */ + setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1)); + setgcrefp(newtab[hash], ((uintptr_t)o | (u & 1))); +#else + hash &= newmask; + /* NOBARRIER: The string table is a GC root. */ + setgcrefr(o->gch.nextgc, newtab[hash]); + setgcref(newtab[hash], o); +#endif + o = next; + } + } + + /* Free old table and replace with new table. */ + lj_str_freetab(g); + g->str.tab = newtab; + g->str.mask = newmask; +} -static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len) +#if LUAJIT_SECURITY_STRHASH +/* Rehash and rechain all strings in a chain. */ +static LJ_NOINLINE GCstr *lj_str_rehash_chain(lua_State *L, StrHash hashc, + const char *str, MSize len) { - char *p; - MSize i; - if (sb->n + len > sb->sz) { - MSize sz = sb->sz * 2; - while (sb->n + len > sz) sz = sz * 2; - lj_str_resizebuf(L, sb, sz); + global_State *g = G(L); + int ow = g->gc.state == GCSsweepstring ? otherwhite(g) : 0; /* Sweeping? */ + GCRef *strtab = g->str.tab; + MSize strmask = g->str.mask; + GCobj *o = gcref(strtab[hashc & strmask]); + setgcrefp(strtab[hashc & strmask], (void *)((uintptr_t)1)); + g->str.second = 1; + while (o) { + uintptr_t u; + GCobj *next = gcnext(o); + GCstr *s = gco2str(o); + StrHash hash; + if (ow) { /* Must sweep while rechaining. */ + if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* String alive? */ + lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED), + "sweep of undead string"); + makewhite(g, o); + } else { /* Free dead string. */ + lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED, + "sweep of unlive string"); + lj_str_free(g, s); + o = next; + continue; + } + } + hash = s->hash; + if (!s->hashalg) { /* Rehash with secondary hash. */ + hash = hash_dense(g->str.seed, hash, strdata(s), s->len); + s->hash = hash; + s->hashalg = 1; + } + /* Rechain. */ + hash &= strmask; + u = gcrefu(strtab[hash]); + setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1)); + setgcrefp(strtab[hash], ((uintptr_t)o | (u & 1))); + o = next; } - p = sb->buf + sb->n; - sb->n += len; - for (i = 0; i < len; i++) p[i] = str[i]; + /* Try to insert the pending string again. */ + return lj_str_new(L, str, len); } +#endif + +/* Reseed String ID from PRNG after random interval < 2^bits. */ +#if LUAJIT_SECURITY_STRID == 1 +#define STRID_RESEED_INTERVAL 8 +#elif LUAJIT_SECURITY_STRID == 2 +#define STRID_RESEED_INTERVAL 4 +#elif LUAJIT_SECURITY_STRID >= 3 +#define STRID_RESEED_INTERVAL 0 +#endif -static void addchar(lua_State *L, SBuf *sb, int c) +/* Allocate a new string and add to string interning table. */ +static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len, + StrHash hash, int hashalg) { - if (sb->n + 1 > sb->sz) { - MSize sz = sb->sz * 2; - lj_str_resizebuf(L, sb, sz); + GCstr *s = lj_mem_newt(L, lj_str_size(len), GCstr); + global_State *g = G(L); + uintptr_t u; + newwhite(g, s); + s->gct = ~LJ_TSTR; + s->len = len; + s->hash = hash; +#ifndef STRID_RESEED_INTERVAL + s->sid = g->str.id++; +#elif STRID_RESEED_INTERVAL + if (!g->str.idreseed--) { + uint64_t r = lj_prng_u64(&g->prng); + g->str.id = (StrID)r; + g->str.idreseed = (uint8_t)(r >> (64 - STRID_RESEED_INTERVAL)); } - sb->buf[sb->n++] = (char)c; + s->sid = g->str.id++; +#else + s->sid = (StrID)lj_prng_u64(&g->prng); +#endif + s->reserved = 0; + s->hashalg = (uint8_t)hashalg; + /* Clear last 4 bytes of allocated memory. Implies zero-termination, too. */ + *(uint32_t *)(strdatawr(s)+(len & ~(MSize)3)) = 0; + memcpy(strdatawr(s), str, len); + /* Add to string hash table. */ + hash &= g->str.mask; + u = gcrefu(g->str.tab[hash]); + setgcrefp(s->nextgc, (u & ~(uintptr_t)1)); + /* NOBARRIER: The string table is a GC root. */ + setgcrefp(g->str.tab[hash], ((uintptr_t)s | (u & 1))); + if (g->str.num++ > g->str.mask) /* Allow a 100% load factor. */ + lj_str_resize(L, (g->str.mask<<1)+1); /* Grow string table. */ + return s; /* Return newly interned string. */ } -/* Push formatted message as a string object to Lua stack. va_list variant. */ -const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp) +/* Intern a string and return string object. */ +GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) { - SBuf *sb = &G(L)->tmpbuf; - lj_str_needbuf(L, sb, (MSize)strlen(fmt)); - lj_str_resetbuf(sb); - for (;;) { - const char *e = strchr(fmt, '%'); - if (e == NULL) break; - addstr(L, sb, fmt, (MSize)(e-fmt)); - /* This function only handles %s, %c, %d, %f and %p formats. */ - switch (e[1]) { - case 's': { - const char *s = va_arg(argp, char *); - if (s == NULL) s = "(null)"; - addstr(L, sb, s, (MSize)strlen(s)); - break; - } - case 'c': - addchar(L, sb, va_arg(argp, int)); - break; - case 'd': { - char buf[LJ_STR_INTBUF]; - char *p = lj_str_bufint(buf, va_arg(argp, int32_t)); - addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p)); - break; - } - case 'f': { - char buf[LJ_STR_NUMBUF]; - TValue tv; - MSize len; - tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER)); - len = (MSize)lj_str_bufnum(buf, &tv); - addstr(L, sb, buf, len); - break; - } - case 'p': { -#define FMTP_CHARS (2*sizeof(ptrdiff_t)) - char buf[2+FMTP_CHARS]; - ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *)); - ptrdiff_t i, lasti = 2+FMTP_CHARS; - if (p == 0) { - addstr(L, sb, "NULL", 4); - break; - } -#if LJ_64 - /* Shorten output for 64 bit pointers. */ - lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0); + global_State *g = G(L); + if (lenx-1 < LJ_MAX_STR-1) { + MSize len = (MSize)lenx; + StrHash hash = hash_sparse(g->str.seed, str, len); + MSize coll = 0; + int hashalg = 0; + /* Check if the string has already been interned. */ + GCobj *o = gcref(g->str.tab[hash & g->str.mask]); +#if LUAJIT_SECURITY_STRHASH + if (LJ_UNLIKELY((uintptr_t)o & 1)) { /* Secondary hash for this chain? */ + hashalg = 1; + hash = hash_dense(g->str.seed, hash, str, len); + o = (GCobj *)(gcrefu(g->str.tab[hash & g->str.mask]) & ~(uintptr_t)1); + } #endif - buf[0] = '0'; - buf[1] = 'x'; - for (i = lasti-1; i >= 2; i--, p >>= 4) - buf[i] = "0123456789abcdef"[(p & 15)]; - addstr(L, sb, buf, (MSize)lasti); - break; + while (o != NULL) { + GCstr *sx = gco2str(o); + if (sx->hash == hash && sx->len == len) { + if (memcmp(str, strdata(sx), len) == 0) { + if (isdead(g, o)) flipwhite(o); /* Resurrect if dead. */ + return sx; /* Return existing string. */ + } + coll++; } - case '%': - addchar(L, sb, '%'); - break; - default: - addchar(L, sb, '%'); - addchar(L, sb, e[1]); - break; + coll++; + o = gcnext(o); + } +#if LUAJIT_SECURITY_STRHASH + /* Rehash chain if there are too many collisions. */ + if (LJ_UNLIKELY(coll > LJ_STR_MAXCOLL) && !hashalg) { + return lj_str_rehash_chain(L, hash, str, len); } - fmt = e+2; +#endif + /* Otherwise allocate a new string. */ + return lj_str_alloc(L, str, len, hash, hashalg); + } else { + if (lenx) + lj_err_msg(L, LJ_ERR_STROV); + return &g->strempty; } - addstr(L, sb, fmt, (MSize)strlen(fmt)); - setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n)); - incr_top(L); - return strVdata(L->top - 1); } -/* Push formatted message as a string object to Lua stack. Vararg variant. */ -const char *lj_str_pushf(lua_State *L, const char *fmt, ...) +void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) { - const char *msg; - va_list argp; - va_start(argp, fmt); - msg = lj_str_pushvf(L, fmt, argp); - va_end(argp); - return msg; + g->str.num--; + lj_mem_free(g, s, lj_str_size(s->len)); } -/* -- Buffer handling ----------------------------------------------------- */ - -char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz) +void LJ_FASTCALL lj_str_init(lua_State *L) { - if (sz > sb->sz) { - if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF; - lj_str_resizebuf(L, sb, sz); - } - return sb->buf; + global_State *g = G(L); + g->str.seed = lj_prng_u64(&g->prng); + lj_str_resize(L, LJ_MIN_STRTAB-1); } diff --git a/src/lj_str.h b/src/lj_str.h index e304f72f..28edb5a5 100644 --- a/src/lj_str.h +++ b/src/lj_str.h @@ -10,41 +10,22 @@ #include "lj_obj.h" -/* String interning. */ +/* String helpers. */ LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); +LJ_FUNC const char *lj_str_find(const char *s, const char *f, + MSize slen, MSize flen); +LJ_FUNC int lj_str_haspattern(GCstr *s); + +/* String interning. */ LJ_FUNC void lj_str_resize(lua_State *L, MSize newmask); LJ_FUNCA GCstr *lj_str_new(lua_State *L, const char *str, size_t len); LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); +LJ_FUNC void LJ_FASTCALL lj_str_init(lua_State *L); +#define lj_str_freetab(g) \ + (lj_mem_freevec(g, g->str.tab, g->str.mask+1, GCRef)) #define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) - -/* Type conversions. */ -LJ_FUNC size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o); -LJ_FUNC char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k); -LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np); -LJ_FUNC GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k); -LJ_FUNCA GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o); - -#define LJ_STR_INTBUF (1+10) -#define LJ_STR_NUMBUF LUAI_MAXNUMBER2STR - -/* String formatting. */ -LJ_FUNC const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp); -LJ_FUNC const char *lj_str_pushf(lua_State *L, const char *fmt, ...) -#if defined(__GNUC__) - __attribute__ ((format (printf, 2, 3))) -#endif - ; - -/* Resizable string buffers. Struct definition in lj_obj.h. */ -LJ_FUNC char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz); - -#define lj_str_initbuf(sb) ((sb)->buf = NULL, (sb)->sz = 0) -#define lj_str_resetbuf(sb) ((sb)->n = 0) -#define lj_str_resizebuf(L, sb, size) \ - ((sb)->buf = (char *)lj_mem_realloc(L, (sb)->buf, (sb)->sz, (size)), \ - (sb)->sz = (size)) -#define lj_str_freebuf(g, sb) lj_mem_free(g, (void *)(sb)->buf, (sb)->sz) +#define lj_str_size(len) (sizeof(GCstr) + (((len)+4) & ~(MSize)3)) #endif diff --git a/src/lj_strfmt.c b/src/lj_strfmt.c new file mode 100644 index 00000000..5c808290 --- /dev/null +++ b/src/lj_strfmt.c @@ -0,0 +1,606 @@ +/* +** String formatting. +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +*/ + +#include <stdio.h> + +#define lj_strfmt_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_err.h" +#include "lj_buf.h" +#include "lj_str.h" +#include "lj_meta.h" +#include "lj_state.h" +#include "lj_char.h" +#include "lj_strfmt.h" +#if LJ_HASFFI +#include "lj_ctype.h" +#endif +#include "lj_lib.h" + +/* -- Format parser ------------------------------------------------------- */ + +static const uint8_t strfmt_map[('x'-'A')+1] = { + STRFMT_A,0,0,0,STRFMT_E,STRFMT_F,STRFMT_G,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,STRFMT_X,0,0, + 0,0,0,0,0,0, + STRFMT_A,0,STRFMT_C,STRFMT_D,STRFMT_E,STRFMT_F,STRFMT_G,0,STRFMT_I,0,0,0,0, + 0,STRFMT_O,STRFMT_P,STRFMT_Q,0,STRFMT_S,0,STRFMT_U,0,0,STRFMT_X +}; + +SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs) +{ + const uint8_t *p = fs->p, *e = fs->e; + fs->str = (const char *)p; + for (; p < e; p++) { + if (*p == '%') { /* Escape char? */ + if (p[1] == '%') { /* '%%'? */ + fs->p = ++p+1; + goto retlit; + } else { + SFormat sf = 0; + uint32_t c; + if (p != (const uint8_t *)fs->str) + break; + for (p++; (uint32_t)*p - ' ' <= (uint32_t)('0' - ' '); p++) { + /* Parse flags. */ + if (*p == '-') sf |= STRFMT_F_LEFT; + else if (*p == '+') sf |= STRFMT_F_PLUS; + else if (*p == '0') sf |= STRFMT_F_ZERO; + else if (*p == ' ') sf |= STRFMT_F_SPACE; + else if (*p == '#') sf |= STRFMT_F_ALT; + else break; + } + if ((uint32_t)*p - '0' < 10) { /* Parse width. */ + uint32_t width = (uint32_t)*p++ - '0'; + if ((uint32_t)*p - '0' < 10) + width = (uint32_t)*p++ - '0' + width*10; + sf |= (width << STRFMT_SH_WIDTH); + } + if (*p == '.') { /* Parse precision. */ + uint32_t prec = 0; + p++; + if ((uint32_t)*p - '0' < 10) { + prec = (uint32_t)*p++ - '0'; + if ((uint32_t)*p - '0' < 10) + prec = (uint32_t)*p++ - '0' + prec*10; + } + sf |= ((prec+1) << STRFMT_SH_PREC); + } + /* Parse conversion. */ + c = (uint32_t)*p - 'A'; + if (LJ_LIKELY(c <= (uint32_t)('x' - 'A'))) { + uint32_t sx = strfmt_map[c]; + if (sx) { + fs->p = p+1; + return (sf | sx | ((c & 0x20) ? 0 : STRFMT_F_UPPER)); + } + } + /* Return error location. */ + if (*p >= 32) p++; + fs->len = (MSize)(p - (const uint8_t *)fs->str); + fs->p = fs->e; + return STRFMT_ERR; + } + } + } + fs->p = p; +retlit: + fs->len = (MSize)(p - (const uint8_t *)fs->str); + return fs->len ? STRFMT_LIT : STRFMT_EOF; +} + +/* -- Raw conversions ----------------------------------------------------- */ + +#define WINT_R(x, sh, sc) \ + { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); } + +/* Write integer to buffer. */ +char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k) +{ + uint32_t u = (uint32_t)k; + if (k < 0) { u = (uint32_t)-k; *p++ = '-'; } + if (u < 10000) { + if (u < 10) goto dig1; + if (u < 100) goto dig2; + if (u < 1000) goto dig3; + } else { + uint32_t v = u / 10000; u -= v * 10000; + if (v < 10000) { + if (v < 10) goto dig5; + if (v < 100) goto dig6; + if (v < 1000) goto dig7; + } else { + uint32_t w = v / 10000; v -= w * 10000; + if (w >= 10) WINT_R(w, 10, 10) + *p++ = (char)('0'+w); + } + WINT_R(v, 23, 1000) + dig7: WINT_R(v, 12, 100) + dig6: WINT_R(v, 10, 10) + dig5: *p++ = (char)('0'+v); + } + WINT_R(u, 23, 1000) + dig3: WINT_R(u, 12, 100) + dig2: WINT_R(u, 10, 10) + dig1: *p++ = (char)('0'+u); + return p; +} +#undef WINT_R + +/* Write pointer to buffer. */ +char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v) +{ + ptrdiff_t x = (ptrdiff_t)v; + MSize i, n = STRFMT_MAXBUF_PTR; + if (x == 0) { + *p++ = 'N'; *p++ = 'U'; *p++ = 'L'; *p++ = 'L'; + return p; + } +#if LJ_64 + /* Shorten output for 64 bit pointers. */ + n = 2+2*4+((x >> 32) ? 2+2*(lj_fls((uint32_t)(x >> 32))>>3) : 0); +#endif + p[0] = '0'; + p[1] = 'x'; + for (i = n-1; i >= 2; i--, x >>= 4) + p[i] = "0123456789abcdef"[(x & 15)]; + return p+n; +} + +/* Write ULEB128 to buffer. */ +char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v) +{ + for (; v >= 0x80; v >>= 7) + *p++ = (char)((v & 0x7f) | 0x80); + *p++ = (char)v; + return p; +} + +/* Return string or write number to tmp buffer and return pointer to start. */ +const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp) +{ + SBuf *sb; + if (tvisstr(o)) { + *lenp = strV(o)->len; + return strVdata(o); + } else if (tvisbuf(o)) { + SBufExt *sbx = bufV(o); + *lenp = sbufxlen(sbx); + return sbx->r; + } else if (tvisint(o)) { + sb = lj_strfmt_putint(lj_buf_tmp_(L), intV(o)); + } else if (tvisnum(o)) { + sb = lj_strfmt_putfnum(lj_buf_tmp_(L), STRFMT_G14, o->n); + } else { + return NULL; + } + *lenp = sbuflen(sb); + return sb->b; +} + +/* -- Unformatted conversions to buffer ----------------------------------- */ + +/* Add integer to buffer. */ +SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k) +{ + sb->w = lj_strfmt_wint(lj_buf_more(sb, STRFMT_MAXBUF_INT), k); + return sb; +} + +#if LJ_HASJIT +/* Add number to buffer. */ +SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o) +{ + return lj_strfmt_putfnum(sb, STRFMT_G14, o->n); +} +#endif + +SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v) +{ + sb->w = lj_strfmt_wptr(lj_buf_more(sb, STRFMT_MAXBUF_PTR), v); + return sb; +} + +/* Add quoted string to buffer. */ +static SBuf *strfmt_putquotedlen(SBuf *sb, const char *s, MSize len) +{ + lj_buf_putb(sb, '"'); + while (len--) { + uint32_t c = (uint32_t)(uint8_t)*s++; + char *w = lj_buf_more(sb, 4); + if (c == '"' || c == '\\' || c == '\n') { + *w++ = '\\'; + } else if (lj_char_iscntrl(c)) { /* This can only be 0-31 or 127. */ + uint32_t d; + *w++ = '\\'; + if (c >= 100 || lj_char_isdigit((uint8_t)*s)) { + *w++ = (char)('0'+(c >= 100)); if (c >= 100) c -= 100; + goto tens; + } else if (c >= 10) { + tens: + d = (c * 205) >> 11; c -= d * 10; *w++ = (char)('0'+d); + } + c += '0'; + } + *w++ = (char)c; + sb->w = w; + } + lj_buf_putb(sb, '"'); + return sb; +} + +#if LJ_HASJIT +SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str) +{ + return strfmt_putquotedlen(sb, strdata(str), str->len); +} +#endif + +/* -- Formatted conversions to buffer ------------------------------------- */ + +/* Add formatted char to buffer. */ +SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat sf, int32_t c) +{ + MSize width = STRFMT_WIDTH(sf); + char *w = lj_buf_more(sb, width > 1 ? width : 1); + if ((sf & STRFMT_F_LEFT)) *w++ = (char)c; + while (width-- > 1) *w++ = ' '; + if (!(sf & STRFMT_F_LEFT)) *w++ = (char)c; + sb->w = w; + return sb; +} + +/* Add formatted string to buffer. */ +static SBuf *strfmt_putfstrlen(SBuf *sb, SFormat sf, const char *s, MSize len) +{ + MSize width = STRFMT_WIDTH(sf); + char *w; + if (len > STRFMT_PREC(sf)) len = STRFMT_PREC(sf); + w = lj_buf_more(sb, width > len ? width : len); + if ((sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len); + while (width-- > len) *w++ = ' '; + if (!(sf & STRFMT_F_LEFT)) w = lj_buf_wmem(w, s, len); + sb->w = w; + return sb; +} + +#if LJ_HASJIT +SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat sf, GCstr *str) +{ + return strfmt_putfstrlen(sb, sf, strdata(str), str->len); +} +#endif + +/* Add formatted signed/unsigned integer to buffer. */ +SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k) +{ + char buf[STRFMT_MAXBUF_XINT], *q = buf + sizeof(buf), *w; +#ifdef LUA_USE_ASSERT + char *ws; +#endif + MSize prefix = 0, len, prec, pprec, width, need; + + /* Figure out signed prefixes. */ + if (STRFMT_TYPE(sf) == STRFMT_INT) { + if ((int64_t)k < 0) { + k = (uint64_t)-(int64_t)k; + prefix = 256 + '-'; + } else if ((sf & STRFMT_F_PLUS)) { + prefix = 256 + '+'; + } else if ((sf & STRFMT_F_SPACE)) { + prefix = 256 + ' '; + } + } + + /* Convert number and store to fixed-size buffer in reverse order. */ + prec = STRFMT_PREC(sf); + if ((int32_t)prec >= 0) sf &= ~STRFMT_F_ZERO; + if (k == 0) { /* Special-case zero argument. */ + if (prec != 0 || + (sf & (STRFMT_T_OCT|STRFMT_F_ALT)) == (STRFMT_T_OCT|STRFMT_F_ALT)) + *--q = '0'; + } else if (!(sf & (STRFMT_T_HEX|STRFMT_T_OCT))) { /* Decimal. */ + uint32_t k2; + while ((k >> 32)) { *--q = (char)('0' + k % 10); k /= 10; } + k2 = (uint32_t)k; + do { *--q = (char)('0' + k2 % 10); k2 /= 10; } while (k2); + } else if ((sf & STRFMT_T_HEX)) { /* Hex. */ + const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEF" : + "0123456789abcdef"; + do { *--q = hexdig[(k & 15)]; k >>= 4; } while (k); + if ((sf & STRFMT_F_ALT)) prefix = 512 + ((sf & STRFMT_F_UPPER) ? 'X' : 'x'); + } else { /* Octal. */ + do { *--q = (char)('0' + (uint32_t)(k & 7)); k >>= 3; } while (k); + if ((sf & STRFMT_F_ALT)) *--q = '0'; + } + + /* Calculate sizes. */ + len = (MSize)(buf + sizeof(buf) - q); + if ((int32_t)len >= (int32_t)prec) prec = len; + width = STRFMT_WIDTH(sf); + pprec = prec + (prefix >> 8); + need = width > pprec ? width : pprec; + w = lj_buf_more(sb, need); +#ifdef LUA_USE_ASSERT + ws = w; +#endif + + /* Format number with leading/trailing whitespace and zeros. */ + if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == 0) + while (width-- > pprec) *w++ = ' '; + if (prefix) { + if ((char)prefix >= 'X') *w++ = '0'; + *w++ = (char)prefix; + } + if ((sf & (STRFMT_F_LEFT|STRFMT_F_ZERO)) == STRFMT_F_ZERO) + while (width-- > pprec) *w++ = '0'; + while (prec-- > len) *w++ = '0'; + while (q < buf + sizeof(buf)) *w++ = *q++; /* Add number itself. */ + if ((sf & STRFMT_F_LEFT)) + while (width-- > pprec) *w++ = ' '; + + lj_assertX(need == (MSize)(w - ws), "miscalculated format size"); + sb->w = w; + return sb; +} + +/* Add number formatted as signed integer to buffer. */ +SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n) +{ + int64_t k = (int64_t)n; + if (checki32(k) && sf == STRFMT_INT) + return lj_strfmt_putint(sb, (int32_t)k); /* Shortcut for plain %d. */ + else + return lj_strfmt_putfxint(sb, sf, (uint64_t)k); +} + +/* Add number formatted as unsigned integer to buffer. */ +SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n) +{ + int64_t k; + if (n >= 9223372036854775808.0) + k = (int64_t)(n - 18446744073709551616.0); + else + k = (int64_t)n; + return lj_strfmt_putfxint(sb, sf, (uint64_t)k); +} + +/* Format stack arguments to buffer. */ +int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry) +{ + int narg = (int)(L->top - L->base); + GCstr *fmt = lj_lib_checkstr(L, arg); + FormatState fs; + SFormat sf; + lj_strfmt_init(&fs, strdata(fmt), fmt->len); + while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { + if (sf == STRFMT_LIT) { + lj_buf_putmem(sb, fs.str, fs.len); + } else if (sf == STRFMT_ERR) { + lj_err_callerv(L, LJ_ERR_STRFMT, + strdata(lj_str_new(L, fs.str, fs.len))); + } else { + TValue *o = &L->base[arg++]; + if (arg > narg) + lj_err_arg(L, arg, LJ_ERR_NOVAL); + switch (STRFMT_TYPE(sf)) { + case STRFMT_INT: + if (tvisint(o)) { + int32_t k = intV(o); + if (sf == STRFMT_INT) + lj_strfmt_putint(sb, k); /* Shortcut for plain %d. */ + else + lj_strfmt_putfxint(sb, sf, k); + break; + } +#if LJ_HASFFI + if (tviscdata(o)) { + GCcdata *cd = cdataV(o); + if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) { + lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd)); + break; + } + } +#endif + lj_strfmt_putfnum_int(sb, sf, lj_lib_checknum(L, arg)); + break; + case STRFMT_UINT: + if (tvisint(o)) { + lj_strfmt_putfxint(sb, sf, intV(o)); + break; + } +#if LJ_HASFFI + if (tviscdata(o)) { + GCcdata *cd = cdataV(o); + if (cd->ctypeid == CTID_INT64 || cd->ctypeid == CTID_UINT64) { + lj_strfmt_putfxint(sb, sf, *(uint64_t *)cdataptr(cd)); + break; + } + } +#endif + lj_strfmt_putfnum_uint(sb, sf, lj_lib_checknum(L, arg)); + break; + case STRFMT_NUM: + lj_strfmt_putfnum(sb, sf, lj_lib_checknum(L, arg)); + break; + case STRFMT_STR: { + MSize len; + const char *s; + cTValue *mo; + if (LJ_UNLIKELY(!tvisstr(o) && !tvisbuf(o)) && retry >= 0 && + !tvisnil(mo = lj_meta_lookup(L, o, MM_tostring))) { + /* Call __tostring metamethod once. */ + copyTV(L, L->top++, mo); + copyTV(L, L->top++, o); + lua_call(L, 1, 1); + o = &L->base[arg-1]; /* Stack may have been reallocated. */ + copyTV(L, o, --L->top); /* Replace inline for retry. */ + if (retry < 2) { /* Global buffer may have been overwritten. */ + retry = 1; + break; + } + } + if (LJ_LIKELY(tvisstr(o))) { + len = strV(o)->len; + s = strVdata(o); +#if LJ_HASBUFFER + } else if (tvisbuf(o)) { + SBufExt *sbx = bufV(o); + if (sbx == (SBufExt *)sb) lj_err_arg(L, arg+1, LJ_ERR_BUFFER_SELF); + len = sbufxlen(sbx); + s = sbx->r; +#endif + } else { + GCstr *str = lj_strfmt_obj(L, o); + len = str->len; + s = strdata(str); + } + if ((sf & STRFMT_T_QUOTED)) + strfmt_putquotedlen(sb, s, len); /* No formatting. */ + else + strfmt_putfstrlen(sb, sf, s, len); + break; + } + case STRFMT_CHAR: + lj_strfmt_putfchar(sb, sf, lj_lib_checkint(L, arg)); + break; + case STRFMT_PTR: /* No formatting. */ + lj_strfmt_putptr(sb, lj_obj_ptr(G(L), o)); + break; + default: + lj_assertL(0, "bad string format type"); + break; + } + } + } + return retry; +} + +/* -- Conversions to strings ---------------------------------------------- */ + +/* Convert integer to string. */ +GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k) +{ + char buf[STRFMT_MAXBUF_INT]; + MSize len = (MSize)(lj_strfmt_wint(buf, k) - buf); + return lj_str_new(L, buf, len); +} + +/* Convert integer or number to string. */ +GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o) +{ + return tvisint(o) ? lj_strfmt_int(L, intV(o)) : lj_strfmt_num(L, o); +} + +#if LJ_HASJIT +/* Convert char value to string. */ +GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c) +{ + char buf[1]; + buf[0] = c; + return lj_str_new(L, buf, 1); +} +#endif + +/* Raw conversion of object to string. */ +GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o) +{ + if (tvisstr(o)) { + return strV(o); + } else if (tvisnumber(o)) { + return lj_strfmt_number(L, o); + } else if (tvisnil(o)) { + return lj_str_newlit(L, "nil"); + } else if (tvisfalse(o)) { + return lj_str_newlit(L, "false"); + } else if (tvistrue(o)) { + return lj_str_newlit(L, "true"); + } else { + char buf[8+2+2+16], *p = buf; + p = lj_buf_wmem(p, lj_typename(o), (MSize)strlen(lj_typename(o))); + *p++ = ':'; *p++ = ' '; + if (tvisfunc(o) && isffunc(funcV(o))) { + p = lj_buf_wmem(p, "builtin#", 8); + p = lj_strfmt_wint(p, funcV(o)->c.ffid); + } else { + p = lj_strfmt_wptr(p, lj_obj_ptr(G(L), o)); + } + return lj_str_new(L, buf, (size_t)(p - buf)); + } +} + +/* -- Internal string formatting ------------------------------------------ */ + +/* +** These functions are only used for lua_pushfstring(), lua_pushvfstring() +** and for internal string formatting (e.g. error messages). Caveat: unlike +** string.format(), only a limited subset of formats and flags are supported! +** +** LuaJIT has support for a couple more formats than Lua 5.1/5.2: +** - %d %u %o %x with full formatting, 32 bit integers only. +** - %f and other FP formats are really %.14g. +** - %s %c %p without formatting. +*/ + +/* Push formatted message as a string object to Lua stack. va_list variant. */ +const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, va_list argp) +{ + SBuf *sb = lj_buf_tmp_(L); + FormatState fs; + SFormat sf; + GCstr *str; + lj_strfmt_init(&fs, fmt, (MSize)strlen(fmt)); + while ((sf = lj_strfmt_parse(&fs)) != STRFMT_EOF) { + switch (STRFMT_TYPE(sf)) { + case STRFMT_LIT: + lj_buf_putmem(sb, fs.str, fs.len); + break; + case STRFMT_INT: + lj_strfmt_putfxint(sb, sf, va_arg(argp, int32_t)); + break; + case STRFMT_UINT: + lj_strfmt_putfxint(sb, sf, va_arg(argp, uint32_t)); + break; + case STRFMT_NUM: + lj_strfmt_putfnum(sb, STRFMT_G14, va_arg(argp, lua_Number)); + break; + case STRFMT_STR: { + const char *s = va_arg(argp, char *); + if (s == NULL) s = "(null)"; + lj_buf_putmem(sb, s, (MSize)strlen(s)); + break; + } + case STRFMT_CHAR: + lj_buf_putb(sb, va_arg(argp, int)); + break; + case STRFMT_PTR: + lj_strfmt_putptr(sb, va_arg(argp, void *)); + break; + case STRFMT_ERR: + default: + lj_buf_putb(sb, '?'); + lj_assertL(0, "bad string format near offset %d", fs.len); + break; + } + } + str = lj_buf_str(L, sb); + setstrV(L, L->top, str); + incr_top(L); + return strdata(str); +} + +/* Push formatted message as a string object to Lua stack. Vararg variant. */ +const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...) +{ + const char *msg; + va_list argp; + va_start(argp, fmt); + msg = lj_strfmt_pushvf(L, fmt, argp); + va_end(argp); + return msg; +} + diff --git a/src/lj_strfmt.h b/src/lj_strfmt.h new file mode 100644 index 00000000..a4529604 --- /dev/null +++ b/src/lj_strfmt.h @@ -0,0 +1,131 @@ +/* +** String formatting. +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_STRFMT_H +#define _LJ_STRFMT_H + +#include "lj_obj.h" + +typedef uint32_t SFormat; /* Format indicator. */ + +/* Format parser state. */ +typedef struct FormatState { + const uint8_t *p; /* Current format string pointer. */ + const uint8_t *e; /* End of format string. */ + const char *str; /* Returned literal string. */ + MSize len; /* Size of literal string. */ +} FormatState; + +/* Format types (max. 16). */ +typedef enum FormatType { + STRFMT_EOF, STRFMT_ERR, STRFMT_LIT, + STRFMT_INT, STRFMT_UINT, STRFMT_NUM, STRFMT_STR, STRFMT_CHAR, STRFMT_PTR +} FormatType; + +/* Format subtypes (bits are reused). */ +#define STRFMT_T_HEX 0x0010 /* STRFMT_UINT */ +#define STRFMT_T_OCT 0x0020 /* STRFMT_UINT */ +#define STRFMT_T_FP_A 0x0000 /* STRFMT_NUM */ +#define STRFMT_T_FP_E 0x0010 /* STRFMT_NUM */ +#define STRFMT_T_FP_F 0x0020 /* STRFMT_NUM */ +#define STRFMT_T_FP_G 0x0030 /* STRFMT_NUM */ +#define STRFMT_T_QUOTED 0x0010 /* STRFMT_STR */ + +/* Format flags. */ +#define STRFMT_F_LEFT 0x0100 +#define STRFMT_F_PLUS 0x0200 +#define STRFMT_F_ZERO 0x0400 +#define STRFMT_F_SPACE 0x0800 +#define STRFMT_F_ALT 0x1000 +#define STRFMT_F_UPPER 0x2000 + +/* Format indicator fields. */ +#define STRFMT_SH_WIDTH 16 +#define STRFMT_SH_PREC 24 + +#define STRFMT_TYPE(sf) ((FormatType)((sf) & 15)) +#define STRFMT_WIDTH(sf) (((sf) >> STRFMT_SH_WIDTH) & 255u) +#define STRFMT_PREC(sf) ((((sf) >> STRFMT_SH_PREC) & 255u) - 1u) +#define STRFMT_FP(sf) (((sf) >> 4) & 3) + +/* Formats for conversion characters. */ +#define STRFMT_A (STRFMT_NUM|STRFMT_T_FP_A) +#define STRFMT_C (STRFMT_CHAR) +#define STRFMT_D (STRFMT_INT) +#define STRFMT_E (STRFMT_NUM|STRFMT_T_FP_E) +#define STRFMT_F (STRFMT_NUM|STRFMT_T_FP_F) +#define STRFMT_G (STRFMT_NUM|STRFMT_T_FP_G) +#define STRFMT_I STRFMT_D +#define STRFMT_O (STRFMT_UINT|STRFMT_T_OCT) +#define STRFMT_P (STRFMT_PTR) +#define STRFMT_Q (STRFMT_STR|STRFMT_T_QUOTED) +#define STRFMT_S (STRFMT_STR) +#define STRFMT_U (STRFMT_UINT) +#define STRFMT_X (STRFMT_UINT|STRFMT_T_HEX) +#define STRFMT_G14 (STRFMT_G | ((14+1) << STRFMT_SH_PREC)) + +/* Maximum buffer sizes for conversions. */ +#define STRFMT_MAXBUF_XINT (1+22) /* '0' prefix + uint64_t in octal. */ +#define STRFMT_MAXBUF_INT (1+10) /* Sign + int32_t in decimal. */ +#define STRFMT_MAXBUF_NUM 32 /* Must correspond with STRFMT_G14. */ +#define STRFMT_MAXBUF_PTR (2+2*sizeof(ptrdiff_t)) /* "0x" + hex ptr. */ + +/* Format parser. */ +LJ_FUNC SFormat LJ_FASTCALL lj_strfmt_parse(FormatState *fs); + +static LJ_AINLINE void lj_strfmt_init(FormatState *fs, const char *p, MSize len) +{ + fs->p = (const uint8_t *)p; + fs->e = (const uint8_t *)p + len; + /* Must be NUL-terminated. May have NULs inside, too. */ + lj_assertX(*fs->e == 0, "format not NUL-terminated"); +} + +/* Raw conversions. */ +LJ_FUNC char * LJ_FASTCALL lj_strfmt_wint(char *p, int32_t k); +LJ_FUNC char * LJ_FASTCALL lj_strfmt_wptr(char *p, const void *v); +LJ_FUNC char * LJ_FASTCALL lj_strfmt_wuleb128(char *p, uint32_t v); +LJ_FUNC const char *lj_strfmt_wstrnum(lua_State *L, cTValue *o, MSize *lenp); + +/* Unformatted conversions to buffer. */ +LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putint(SBuf *sb, int32_t k); +#if LJ_HASJIT +LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putnum(SBuf *sb, cTValue *o); +#endif +LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putptr(SBuf *sb, const void *v); +#if LJ_HASJIT +LJ_FUNC SBuf * LJ_FASTCALL lj_strfmt_putquoted(SBuf *sb, GCstr *str); +#endif + +/* Formatted conversions to buffer. */ +LJ_FUNC SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k); +LJ_FUNC SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n); +LJ_FUNC SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n); +LJ_FUNC SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat, lua_Number n); +LJ_FUNC SBuf *lj_strfmt_putfchar(SBuf *sb, SFormat, int32_t c); +#if LJ_HASJIT +LJ_FUNC SBuf *lj_strfmt_putfstr(SBuf *sb, SFormat, GCstr *str); +#endif +LJ_FUNC int lj_strfmt_putarg(lua_State *L, SBuf *sb, int arg, int retry); + +/* Conversions to strings. */ +LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_int(lua_State *L, int32_t k); +LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o); +LJ_FUNCA GCstr * LJ_FASTCALL lj_strfmt_number(lua_State *L, cTValue *o); +#if LJ_HASJIT +LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_char(lua_State *L, int c); +#endif +LJ_FUNC GCstr * LJ_FASTCALL lj_strfmt_obj(lua_State *L, cTValue *o); + +/* Internal string formatting. */ +LJ_FUNC const char *lj_strfmt_pushvf(lua_State *L, const char *fmt, + va_list argp); +LJ_FUNC const char *lj_strfmt_pushf(lua_State *L, const char *fmt, ...) +#if defined(__GNUC__) || defined(__clang__) + __attribute__ ((format (printf, 2, 3))) +#endif + ; + +#endif diff --git a/src/lj_strfmt_num.c b/src/lj_strfmt_num.c new file mode 100644 index 00000000..3c60695c --- /dev/null +++ b/src/lj_strfmt_num.c @@ -0,0 +1,592 @@ +/* +** String formatting for floating-point numbers. +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +** Contributed by Peter Cawley. +*/ + +#include <stdio.h> + +#define lj_strfmt_num_c +#define LUA_CORE + +#include "lj_obj.h" +#include "lj_buf.h" +#include "lj_str.h" +#include "lj_strfmt.h" + +/* -- Precomputed tables -------------------------------------------------- */ + +/* Rescale factors to push the exponent of a number towards zero. */ +#define RESCALE_EXPONENTS(P, N) \ + P(308), P(289), P(270), P(250), P(231), P(212), P(193), P(173), P(154), \ + P(135), P(115), P(96), P(77), P(58), P(38), P(0), P(0), P(0), N(39), N(58), \ + N(77), N(96), N(116), N(135), N(154), N(174), N(193), N(212), N(231), \ + N(251), N(270), N(289) + +#define ONE_E_P(X) 1e+0 ## X +#define ONE_E_N(X) 1e-0 ## X +static const int16_t rescale_e[] = { RESCALE_EXPONENTS(-, +) }; +static const double rescale_n[] = { RESCALE_EXPONENTS(ONE_E_P, ONE_E_N) }; +#undef ONE_E_N +#undef ONE_E_P + +/* +** For p in range -70 through 57, this table encodes pairs (m, e) such that +** 4*2^p <= (uint8_t)m*10^e, and is the smallest value for which this holds. +*/ +static const int8_t four_ulp_m_e[] = { + 34, -21, 68, -21, 14, -20, 28, -20, 55, -20, 2, -19, 3, -19, 5, -19, 9, -19, + -82, -18, 35, -18, 7, -17, -117, -17, 28, -17, 56, -17, 112, -16, -33, -16, + 45, -16, 89, -16, -78, -15, 36, -15, 72, -15, -113, -14, 29, -14, 57, -14, + 114, -13, -28, -13, 46, -13, 91, -12, -74, -12, 37, -12, 73, -12, 15, -11, 3, + -11, 59, -11, 2, -10, 3, -10, 5, -10, 1, -9, -69, -9, 38, -9, 75, -9, 15, -7, + 3, -7, 6, -7, 12, -6, -17, -7, 48, -7, 96, -7, -65, -6, 39, -6, 77, -6, -103, + -5, 31, -5, 62, -5, 123, -4, -11, -4, 49, -4, 98, -4, -60, -3, 4, -2, 79, -3, + 16, -2, 32, -2, 63, -2, 2, -1, 25, 0, 5, 1, 1, 2, 2, 2, 4, 2, 8, 2, 16, 2, + 32, 2, 64, 2, -128, 2, 26, 2, 52, 2, 103, 3, -51, 3, 41, 4, 82, 4, -92, 4, + 33, 4, 66, 4, -124, 5, 27, 5, 53, 5, 105, 6, 21, 6, 42, 6, 84, 6, 17, 7, 34, + 7, 68, 7, 2, 8, 3, 8, 6, 8, 108, 9, -41, 9, 43, 10, 86, 9, -84, 10, 35, 10, + 69, 10, -118, 11, 28, 11, 55, 12, 11, 13, 22, 13, 44, 13, 88, 13, -80, 13, + 36, 13, 71, 13, -115, 14, 29, 14, 57, 14, 113, 15, -30, 15, 46, 15, 91, 15, + 19, 16, 37, 16, 73, 16, 2, 17, 3, 17, 6, 17 +}; + +/* min(2^32-1, 10^e-1) for e in range 0 through 10 */ +static uint32_t ndigits_dec_threshold[] = { + 0, 9U, 99U, 999U, 9999U, 99999U, 999999U, + 9999999U, 99999999U, 999999999U, 0xffffffffU +}; + +/* -- Helper functions ---------------------------------------------------- */ + +/* Compute the number of digits in the decimal representation of x. */ +static MSize ndigits_dec(uint32_t x) +{ + MSize t = ((lj_fls(x | 1) * 77) >> 8) + 1; /* 2^8/77 is roughly log2(10) */ + return t + (x > ndigits_dec_threshold[t]); +} + +#define WINT_R(x, sh, sc) \ + { uint32_t d = (x*(((1<<sh)+sc-1)/sc))>>sh; x -= d*sc; *p++ = (char)('0'+d); } + +/* Write 9-digit unsigned integer to buffer. */ +static char *lj_strfmt_wuint9(char *p, uint32_t u) +{ + uint32_t v = u / 10000, w; + u -= v * 10000; + w = v / 10000; + v -= w * 10000; + *p++ = (char)('0'+w); + WINT_R(v, 23, 1000) + WINT_R(v, 12, 100) + WINT_R(v, 10, 10) + *p++ = (char)('0'+v); + WINT_R(u, 23, 1000) + WINT_R(u, 12, 100) + WINT_R(u, 10, 10) + *p++ = (char)('0'+u); + return p; +} +#undef WINT_R + +/* -- Extended precision arithmetic --------------------------------------- */ + +/* +** The "nd" format is a fixed-precision decimal representation for numbers. It +** consists of up to 64 uint32_t values, with each uint32_t storing a value +** in the range [0, 1e9). A number in "nd" format consists of three variables: +** +** uint32_t nd[64]; +** uint32_t ndlo; +** uint32_t ndhi; +** +** The integral part of the number is stored in nd[0 ... ndhi], the value of +** which is sum{i in [0, ndhi] | nd[i] * 10^(9*i)}. If the fractional part of +** the number is zero, ndlo is zero. Otherwise, the fractional part is stored +** in nd[ndlo ... 63], the value of which is taken to be +** sum{i in [ndlo, 63] | nd[i] * 10^(9*(i-64))}. +** +** If the array part had 128 elements rather than 64, then every double would +** have an exact representation in "nd" format. With 64 elements, all integral +** doubles have an exact representation, and all non-integral doubles have +** enough digits to make both %.99e and %.99f do the right thing. +*/ + +#if LJ_64 +#define ND_MUL2K_MAX_SHIFT 29 +#define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) / 1000000000)) +#else +#define ND_MUL2K_MAX_SHIFT 11 +#define ND_MUL2K_DIV1E9(val) ((uint32_t)((val) >> 9) / 1953125) +#endif + +/* Multiply nd by 2^k and add carry_in (ndlo is assumed to be zero). */ +static uint32_t nd_mul2k(uint32_t* nd, uint32_t ndhi, uint32_t k, + uint32_t carry_in, SFormat sf) +{ + uint32_t i, ndlo = 0, start = 1; + /* Performance hacks. */ + if (k > ND_MUL2K_MAX_SHIFT*2 && STRFMT_FP(sf) != STRFMT_FP(STRFMT_T_FP_F)) { + start = ndhi - (STRFMT_PREC(sf) + 17) / 8; + } + /* Real logic. */ + while (k >= ND_MUL2K_MAX_SHIFT) { + for (i = ndlo; i <= ndhi; i++) { + uint64_t val = ((uint64_t)nd[i] << ND_MUL2K_MAX_SHIFT) | carry_in; + carry_in = ND_MUL2K_DIV1E9(val); + nd[i] = (uint32_t)val - carry_in * 1000000000; + } + if (carry_in) { + nd[++ndhi] = carry_in; carry_in = 0; + if (start++ == ndlo) ++ndlo; + } + k -= ND_MUL2K_MAX_SHIFT; + } + if (k) { + for (i = ndlo; i <= ndhi; i++) { + uint64_t val = ((uint64_t)nd[i] << k) | carry_in; + carry_in = ND_MUL2K_DIV1E9(val); + nd[i] = (uint32_t)val - carry_in * 1000000000; + } + if (carry_in) nd[++ndhi] = carry_in; + } + return ndhi; +} + +/* Divide nd by 2^k (ndlo is assumed to be zero). */ +static uint32_t nd_div2k(uint32_t* nd, uint32_t ndhi, uint32_t k, SFormat sf) +{ + uint32_t ndlo = 0, stop1 = ~0, stop2 = ~0; + /* Performance hacks. */ + if (!ndhi) { + if (!nd[0]) { + return 0; + } else { + uint32_t s = lj_ffs(nd[0]); + if (s >= k) { nd[0] >>= k; return 0; } + nd[0] >>= s; k -= s; + } + } + if (k > 18) { + if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_F)) { + stop1 = 63 - (int32_t)STRFMT_PREC(sf) / 9; + } else { + int32_t floorlog2 = ndhi * 29 + lj_fls(nd[ndhi]) - k; + int32_t floorlog10 = (int32_t)(floorlog2 * 0.30102999566398114); + stop1 = 62 + (floorlog10 - (int32_t)STRFMT_PREC(sf)) / 9; + stop2 = 61 + ndhi - (int32_t)STRFMT_PREC(sf) / 8; + } + } + /* Real logic. */ + while (k >= 9) { + uint32_t i = ndhi, carry = 0; + for (;;) { + uint32_t val = nd[i]; + nd[i] = (val >> 9) + carry; + carry = (val & 0x1ff) * 1953125; + if (i == ndlo) break; + i = (i - 1) & 0x3f; + } + if (ndlo != stop1 && ndlo != stop2) { + if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; } + if (!nd[ndhi]) { ndhi = (ndhi - 1) & 0x3f; stop2--; } + } else if (!nd[ndhi]) { + if (ndhi != ndlo) { ndhi = (ndhi - 1) & 0x3f; stop2--; } + else return ndlo; + } + k -= 9; + } + if (k) { + uint32_t mask = (1U << k) - 1, mul = 1000000000 >> k, i = ndhi, carry = 0; + for (;;) { + uint32_t val = nd[i]; + nd[i] = (val >> k) + carry; + carry = (val & mask) * mul; + if (i == ndlo) break; + i = (i - 1) & 0x3f; + } + if (carry) { ndlo = (ndlo - 1) & 0x3f; nd[ndlo] = carry; } + } + return ndlo; +} + +/* Add m*10^e to nd (assumes ndlo <= e/9 <= ndhi and 0 <= m <= 9). */ +static uint32_t nd_add_m10e(uint32_t* nd, uint32_t ndhi, uint8_t m, int32_t e) +{ + uint32_t i, carry; + if (e >= 0) { + i = (uint32_t)e/9; + carry = m * (ndigits_dec_threshold[e - (int32_t)i*9] + 1); + } else { + int32_t f = (e-8)/9; + i = (uint32_t)(64 + f); + carry = m * (ndigits_dec_threshold[e - f*9] + 1); + } + for (;;) { + uint32_t val = nd[i] + carry; + if (LJ_UNLIKELY(val >= 1000000000)) { + val -= 1000000000; + nd[i] = val; + if (LJ_UNLIKELY(i == ndhi)) { + ndhi = (ndhi + 1) & 0x3f; + nd[ndhi] = 1; + break; + } + carry = 1; + i = (i + 1) & 0x3f; + } else { + nd[i] = val; + break; + } + } + return ndhi; +} + +/* Test whether two "nd" values are equal in their most significant digits. */ +static int nd_similar(uint32_t* nd, uint32_t ndhi, uint32_t* ref, MSize hilen, + MSize prec) +{ + char nd9[9], ref9[9]; + if (hilen <= prec) { + if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0; + prec -= hilen; ref--; ndhi = (ndhi - 1) & 0x3f; + if (prec >= 9) { + if (LJ_UNLIKELY(nd[ndhi] != *ref)) return 0; + prec -= 9; ref--; ndhi = (ndhi - 1) & 0x3f; + } + } else { + prec -= hilen - 9; + } + lj_assertX(prec < 9, "bad precision %d", prec); + lj_strfmt_wuint9(nd9, nd[ndhi]); + lj_strfmt_wuint9(ref9, *ref); + return !memcmp(nd9, ref9, prec) && (nd9[prec] < '5') == (ref9[prec] < '5'); +} + +/* -- Formatted conversions to buffer ------------------------------------- */ + +/* Write formatted floating-point number to either sb or p. */ +static char *lj_strfmt_wfnum(SBuf *sb, SFormat sf, lua_Number n, char *p) +{ + MSize width = STRFMT_WIDTH(sf), prec = STRFMT_PREC(sf), len; + TValue t; + t.n = n; + if (LJ_UNLIKELY((t.u32.hi << 1) >= 0xffe00000)) { + /* Handle non-finite values uniformly for %a, %e, %f, %g. */ + int prefix = 0, ch = (sf & STRFMT_F_UPPER) ? 0x202020 : 0; + if (((t.u32.hi & 0x000fffff) | t.u32.lo) != 0) { + ch ^= ('n' << 16) | ('a' << 8) | 'n'; + if ((sf & STRFMT_F_SPACE)) prefix = ' '; + } else { + ch ^= ('i' << 16) | ('n' << 8) | 'f'; + if ((t.u32.hi & 0x80000000)) prefix = '-'; + else if ((sf & STRFMT_F_PLUS)) prefix = '+'; + else if ((sf & STRFMT_F_SPACE)) prefix = ' '; + } + len = 3 + (prefix != 0); + if (!p) p = lj_buf_more(sb, width > len ? width : len); + if (!(sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' '; + if (prefix) *p++ = prefix; + *p++ = (char)(ch >> 16); *p++ = (char)(ch >> 8); *p++ = (char)ch; + } else if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_A)) { + /* %a */ + const char *hexdig = (sf & STRFMT_F_UPPER) ? "0123456789ABCDEFPX" + : "0123456789abcdefpx"; + int32_t e = (t.u32.hi >> 20) & 0x7ff; + char prefix = 0, eprefix = '+'; + if (t.u32.hi & 0x80000000) prefix = '-'; + else if ((sf & STRFMT_F_PLUS)) prefix = '+'; + else if ((sf & STRFMT_F_SPACE)) prefix = ' '; + t.u32.hi &= 0xfffff; + if (e) { + t.u32.hi |= 0x100000; + e -= 1023; + } else if (t.u32.lo | t.u32.hi) { + /* Non-zero denormal - normalise it. */ + uint32_t shift = t.u32.hi ? 20-lj_fls(t.u32.hi) : 52-lj_fls(t.u32.lo); + e = -1022 - shift; + t.u64 <<= shift; + } + /* abs(n) == t.u64 * 2^(e - 52) */ + /* If n != 0, bit 52 of t.u64 is set, and is the highest set bit. */ + if ((int32_t)prec < 0) { + /* Default precision: use smallest precision giving exact result. */ + prec = t.u32.lo ? 13-lj_ffs(t.u32.lo)/4 : 5-lj_ffs(t.u32.hi|0x100000)/4; + } else if (prec < 13) { + /* Precision is sufficiently low as to maybe require rounding. */ + t.u64 += (((uint64_t)1) << (51 - prec*4)); + } + if (e < 0) { + eprefix = '-'; + e = -e; + } + len = 5 + ndigits_dec((uint32_t)e) + prec + (prefix != 0) + + ((prec | (sf & STRFMT_F_ALT)) != 0); + if (!p) p = lj_buf_more(sb, width > len ? width : len); + if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) { + while (width-- > len) *p++ = ' '; + } + if (prefix) *p++ = prefix; + *p++ = '0'; + *p++ = hexdig[17]; /* x or X */ + if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) { + while (width-- > len) *p++ = '0'; + } + *p++ = '0' + (t.u32.hi >> 20); /* Usually '1', sometimes '0' or '2'. */ + if ((prec | (sf & STRFMT_F_ALT))) { + /* Emit fractional part. */ + char *q = p + 1 + prec; + *p = '.'; + if (prec < 13) t.u64 >>= (52 - prec*4); + else while (prec > 13) p[prec--] = '0'; + while (prec) { p[prec--] = hexdig[t.u64 & 15]; t.u64 >>= 4; } + p = q; + } + *p++ = hexdig[16]; /* p or P */ + *p++ = eprefix; /* + or - */ + p = lj_strfmt_wint(p, e); + } else { + /* %e or %f or %g - begin by converting n to "nd" format. */ + uint32_t nd[64]; + uint32_t ndhi = 0, ndlo, i; + int32_t e = (t.u32.hi >> 20) & 0x7ff, ndebias = 0; + char prefix = 0, *q; + if (t.u32.hi & 0x80000000) prefix = '-'; + else if ((sf & STRFMT_F_PLUS)) prefix = '+'; + else if ((sf & STRFMT_F_SPACE)) prefix = ' '; + prec += ((int32_t)prec >> 31) & 7; /* Default precision is 6. */ + if (STRFMT_FP(sf) == STRFMT_FP(STRFMT_T_FP_G)) { + /* %g - decrement precision if non-zero (to make it like %e). */ + prec--; + prec ^= (uint32_t)((int32_t)prec >> 31); + } + if ((sf & STRFMT_T_FP_E) && prec < 14 && n != 0) { + /* Precision is sufficiently low that rescaling will probably work. */ + if ((ndebias = rescale_e[e >> 6])) { + t.n = n * rescale_n[e >> 6]; + if (LJ_UNLIKELY(!e)) t.n *= 1e10, ndebias -= 10; + t.u64 -= 2; /* Convert 2ulp below (later we convert 2ulp above). */ + nd[0] = 0x100000 | (t.u32.hi & 0xfffff); + e = ((t.u32.hi >> 20) & 0x7ff) - 1075 - (ND_MUL2K_MAX_SHIFT < 29); + goto load_t_lo; rescale_failed: + t.n = n; + e = (t.u32.hi >> 20) & 0x7ff; + ndebias = ndhi = 0; + } + } + nd[0] = t.u32.hi & 0xfffff; + if (e == 0) e++; else nd[0] |= 0x100000; + e -= 1043; + if (t.u32.lo) { + e -= 32 + (ND_MUL2K_MAX_SHIFT < 29); load_t_lo: +#if ND_MUL2K_MAX_SHIFT >= 29 + nd[0] = (nd[0] << 3) | (t.u32.lo >> 29); + ndhi = nd_mul2k(nd, ndhi, 29, t.u32.lo & 0x1fffffff, sf); +#elif ND_MUL2K_MAX_SHIFT >= 11 + ndhi = nd_mul2k(nd, ndhi, 11, t.u32.lo >> 21, sf); + ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo >> 10) & 0x7ff, sf); + ndhi = nd_mul2k(nd, ndhi, 11, (t.u32.lo << 1) & 0x7ff, sf); +#else +#error "ND_MUL2K_MAX_SHIFT too small" +#endif + } + if (e >= 0) { + ndhi = nd_mul2k(nd, ndhi, (uint32_t)e, 0, sf); + ndlo = 0; + } else { + ndlo = nd_div2k(nd, ndhi, (uint32_t)-e, sf); + if (ndhi && !nd[ndhi]) ndhi--; + } + /* abs(n) == nd * 10^ndebias (for slightly loose interpretation of ==) */ + if ((sf & STRFMT_T_FP_E)) { + /* %e or %g - assume %e and start by calculating nd's exponent (nde). */ + char eprefix = '+'; + int32_t nde = -1; + MSize hilen; + if (ndlo && !nd[ndhi]) { + ndhi = 64; do {} while (!nd[--ndhi]); + nde -= 64 * 9; + } + hilen = ndigits_dec(nd[ndhi]); + nde += ndhi * 9 + hilen; + if (ndebias) { + /* + ** Rescaling was performed, but this introduced some error, and might + ** have pushed us across a rounding boundary. We check whether this + ** error affected the result by introducing even more error (2ulp in + ** either direction), and seeing whether a rounding boundary was + ** crossed. Having already converted the -2ulp case, we save off its + ** most significant digits, convert the +2ulp case, and compare them. + */ + int32_t eidx = e + 70 + (ND_MUL2K_MAX_SHIFT < 29) + + (t.u32.lo >= 0xfffffffe && !(~t.u32.hi << 12)); + const int8_t *m_e = four_ulp_m_e + eidx * 2; + lj_assertG_(G(sbufL(sb)), 0 <= eidx && eidx < 128, "bad eidx %d", eidx); + nd[33] = nd[ndhi]; + nd[32] = nd[(ndhi - 1) & 0x3f]; + nd[31] = nd[(ndhi - 2) & 0x3f]; + nd_add_m10e(nd, ndhi, (uint8_t)*m_e, m_e[1]); + if (LJ_UNLIKELY(!nd_similar(nd, ndhi, nd + 33, hilen, prec + 1))) { + goto rescale_failed; + } + } + if ((int32_t)(prec - nde) < (0x3f & -(int32_t)ndlo) * 9) { + /* Precision is sufficiently low as to maybe require rounding. */ + ndhi = nd_add_m10e(nd, ndhi, 5, nde - prec - 1); + nde += (hilen != ndigits_dec(nd[ndhi])); + } + nde += ndebias; + if ((sf & STRFMT_T_FP_F)) { + /* %g */ + if ((int32_t)prec >= nde && nde >= -4) { + if (nde < 0) ndhi = 0; + prec -= nde; + goto g_format_like_f; + } else if (!(sf & STRFMT_F_ALT) && prec && width > 5) { + /* Decrease precision in order to strip trailing zeroes. */ + char tail[9]; + uint32_t maxprec = hilen - 1 + ((ndhi - ndlo) & 0x3f) * 9; + if (prec >= maxprec) prec = maxprec; + else ndlo = (ndhi - (((int32_t)(prec - hilen) + 9) / 9)) & 0x3f; + i = prec - hilen - (((ndhi - ndlo) & 0x3f) * 9) + 10; + lj_strfmt_wuint9(tail, nd[ndlo]); + while (prec && tail[--i] == '0') { + prec--; + if (!i) { + if (ndlo == ndhi) { prec = 0; break; } + lj_strfmt_wuint9(tail, nd[++ndlo]); + i = 9; + } + } + } + } + if (nde < 0) { + /* Make nde non-negative. */ + eprefix = '-'; + nde = -nde; + } + len = 3 + prec + (prefix != 0) + ndigits_dec((uint32_t)nde) + (nde < 10) + + ((prec | (sf & STRFMT_F_ALT)) != 0); + if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 5); + if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) { + while (width-- > len) *p++ = ' '; + } + if (prefix) *p++ = prefix; + if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) { + while (width-- > len) *p++ = '0'; + } + q = lj_strfmt_wint(p + 1, nd[ndhi]); + p[0] = p[1]; /* Put leading digit in the correct place. */ + if ((prec | (sf & STRFMT_F_ALT))) { + /* Emit fractional part. */ + p[1] = '.'; p += 2; + prec -= (MSize)(q - p); p = q; /* Account for digits already emitted. */ + /* Then emit chunks of 9 digits (this may emit 8 digits too many). */ + for (i = ndhi; (int32_t)prec > 0 && i != ndlo; prec -= 9) { + i = (i - 1) & 0x3f; + p = lj_strfmt_wuint9(p, nd[i]); + } + if ((sf & STRFMT_T_FP_F) && !(sf & STRFMT_F_ALT)) { + /* %g (and not %#g) - strip trailing zeroes. */ + p += (int32_t)prec & ((int32_t)prec >> 31); + while (p[-1] == '0') p--; + if (p[-1] == '.') p--; + } else { + /* %e (or %#g) - emit trailing zeroes. */ + while ((int32_t)prec > 0) { *p++ = '0'; prec--; } + p += (int32_t)prec; + } + } else { + p++; + } + *p++ = (sf & STRFMT_F_UPPER) ? 'E' : 'e'; + *p++ = eprefix; /* + or - */ + if (nde < 10) *p++ = '0'; /* Always at least two digits of exponent. */ + p = lj_strfmt_wint(p, nde); + } else { + /* %f (or, shortly, %g in %f style) */ + if (prec < (MSize)(0x3f & -(int32_t)ndlo) * 9) { + /* Precision is sufficiently low as to maybe require rounding. */ + ndhi = nd_add_m10e(nd, ndhi, 5, 0 - prec - 1); + } + g_format_like_f: + if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT) && prec && width) { + /* Decrease precision in order to strip trailing zeroes. */ + if (ndlo) { + /* nd has a fractional part; we need to look at its digits. */ + char tail[9]; + uint32_t maxprec = (64 - ndlo) * 9; + if (prec >= maxprec) prec = maxprec; + else ndlo = 64 - (prec + 8) / 9; + i = prec - ((63 - ndlo) * 9); + lj_strfmt_wuint9(tail, nd[ndlo]); + while (prec && tail[--i] == '0') { + prec--; + if (!i) { + if (ndlo == 63) { prec = 0; break; } + lj_strfmt_wuint9(tail, nd[++ndlo]); + i = 9; + } + } + } else { + /* nd has no fractional part, so precision goes straight to zero. */ + prec = 0; + } + } + len = ndhi * 9 + ndigits_dec(nd[ndhi]) + prec + (prefix != 0) + + ((prec | (sf & STRFMT_F_ALT)) != 0); + if (!p) p = lj_buf_more(sb, (width > len ? width : len) + 8); + if (!(sf & (STRFMT_F_LEFT | STRFMT_F_ZERO))) { + while (width-- > len) *p++ = ' '; + } + if (prefix) *p++ = prefix; + if ((sf & (STRFMT_F_LEFT | STRFMT_F_ZERO)) == STRFMT_F_ZERO) { + while (width-- > len) *p++ = '0'; + } + /* Emit integer part. */ + p = lj_strfmt_wint(p, nd[ndhi]); + i = ndhi; + while (i) p = lj_strfmt_wuint9(p, nd[--i]); + if ((prec | (sf & STRFMT_F_ALT))) { + /* Emit fractional part. */ + *p++ = '.'; + /* Emit chunks of 9 digits (this may emit 8 digits too many). */ + while ((int32_t)prec > 0 && i != ndlo) { + i = (i - 1) & 0x3f; + p = lj_strfmt_wuint9(p, nd[i]); + prec -= 9; + } + if ((sf & STRFMT_T_FP_E) && !(sf & STRFMT_F_ALT)) { + /* %g (and not %#g) - strip trailing zeroes. */ + p += (int32_t)prec & ((int32_t)prec >> 31); + while (p[-1] == '0') p--; + if (p[-1] == '.') p--; + } else { + /* %f (or %#g) - emit trailing zeroes. */ + while ((int32_t)prec > 0) { *p++ = '0'; prec--; } + p += (int32_t)prec; + } + } + } + } + if ((sf & STRFMT_F_LEFT)) while (width-- > len) *p++ = ' '; + return p; +} + +/* Add formatted floating-point number to buffer. */ +SBuf *lj_strfmt_putfnum(SBuf *sb, SFormat sf, lua_Number n) +{ + sb->w = lj_strfmt_wfnum(sb, sf, n, NULL); + return sb; +} + +/* -- Conversions to strings ---------------------------------------------- */ + +/* Convert number to string. */ +GCstr * LJ_FASTCALL lj_strfmt_num(lua_State *L, cTValue *o) +{ + char buf[STRFMT_MAXBUF_NUM]; + MSize len = (MSize)(lj_strfmt_wfnum(NULL, STRFMT_G14, o->n, buf) - buf); + return lj_str_new(L, buf, len); +} + diff --git a/src/lj_strscan.c b/src/lj_strscan.c index 914cfb7a..1d1c1c74 100644 --- a/src/lj_strscan.c +++ b/src/lj_strscan.c @@ -80,7 +80,7 @@ static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t neg) /* Avoid double rounding for denormals. */ if (LJ_UNLIKELY(ex2 <= -1075 && x != 0)) { /* NYI: all of this generates way too much code on 32 bit CPUs. */ -#if defined(__GNUC__) && LJ_64 +#if (defined(__GNUC__) || defined(__clang__)) && LJ_64 int32_t b = (int32_t)(__builtin_clzll(x)^63); #else int32_t b = (x>>32) ? 32+(int32_t)lj_fls((uint32_t)(x>>32)) : @@ -94,7 +94,7 @@ static void strscan_double(uint64_t x, TValue *o, int32_t ex2, int32_t neg) } /* Convert to double using a signed int64_t conversion, then rescale. */ - lua_assert((int64_t)x >= 0); + lj_assertX((int64_t)x >= 0, "bad double conversion"); n = (double)(int64_t)x; if (neg) n = -n; if (ex2) n = ldexp(n, ex2); @@ -142,7 +142,7 @@ static StrScanFmt strscan_hex(const uint8_t *p, TValue *o, break; } - /* Reduce range then convert to double. */ + /* Reduce range, then convert to double. */ if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; } strscan_double(x, o, ex2, neg); return fmt; @@ -264,7 +264,7 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o, uint32_t hi = 0, lo = (uint32_t)(xip-xi); int32_t ex2 = 0, idig = (int32_t)lo + (ex10 >> 1); - lua_assert(lo > 0 && (ex10 & 1) == 0); + lj_assertX(lo > 0 && (ex10 & 1) == 0, "bad lo %d ex10 %d", lo, ex10); /* Handle simple overflow/underflow. */ if (idig > 310/2) { if (neg) setminfV(o); else setpinfV(o); return fmt; } @@ -328,10 +328,55 @@ static StrScanFmt strscan_dec(const uint8_t *p, TValue *o, return fmt; } +/* Parse binary number. */ +static StrScanFmt strscan_bin(const uint8_t *p, TValue *o, + StrScanFmt fmt, uint32_t opt, + int32_t ex2, int32_t neg, uint32_t dig) +{ + uint64_t x = 0; + uint32_t i; + + if (ex2 || dig > 64) return STRSCAN_ERROR; + + /* Scan binary digits. */ + for (i = dig; i; i--, p++) { + if ((*p & ~1) != '0') return STRSCAN_ERROR; + x = (x << 1) | (*p & 1); + } + + /* Format-specific handling. */ + switch (fmt) { + case STRSCAN_INT: + if (!(opt & STRSCAN_OPT_TONUM) && x < 0x80000000u+neg) { + o->i = neg ? -(int32_t)x : (int32_t)x; + return STRSCAN_INT; /* Fast path for 32 bit integers. */ + } + if (!(opt & STRSCAN_OPT_C)) { fmt = STRSCAN_NUM; break; } + /* fallthrough */ + case STRSCAN_U32: + if (dig > 32) return STRSCAN_ERROR; + o->i = neg ? -(int32_t)x : (int32_t)x; + return STRSCAN_U32; + case STRSCAN_I64: + case STRSCAN_U64: + o->u64 = neg ? (uint64_t)-(int64_t)x : x; + return fmt; + default: + break; + } + + /* Reduce range, then convert to double. */ + if ((x & U64x(c0000000,0000000))) { x = (x >> 2) | (x & 3); ex2 += 2; } + strscan_double(x, o, ex2, neg); + return fmt; +} + /* Scan string containing a number. Returns format. Returns value in o. */ -StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) +StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o, + uint32_t opt) { int32_t neg = 0; + const uint8_t *pe = p + len; /* Remove leading space, parse sign and non-numbers. */ if (LJ_UNLIKELY(!lj_char_isdigit(*p))) { @@ -349,7 +394,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) p += 3; } while (lj_char_isspace(*p)) p++; - if (*p) return STRSCAN_ERROR; + if (*p || p < pe) return STRSCAN_ERROR; o->u64 = tmp.u64; return STRSCAN_NUM; } @@ -366,8 +411,12 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) /* Determine base and skip leading zeros. */ if (LJ_UNLIKELY(*p <= '0')) { - if (*p == '0' && casecmp(p[1], 'x')) - base = 16, cmask = LJ_CHAR_XDIGIT, p += 2; + if (*p == '0') { + if (casecmp(p[1], 'x')) + base = 16, cmask = LJ_CHAR_XDIGIT, p += 2; + else if (casecmp(p[1], 'b')) + base = 2, cmask = LJ_CHAR_DIGIT, p += 2; + } for ( ; ; p++) { if (*p == '0') { hasdig = 1; @@ -396,6 +445,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) /* Handle decimal point. */ if (dp) { + if (base == 2) return STRSCAN_ERROR; fmt = STRSCAN_NUM; if (dig) { ex = (int32_t)(dp-(p-1)); dp = p-1; @@ -406,7 +456,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) } /* Parse exponent. */ - if (casecmp(*p, (uint32_t)(base == 16 ? 'p' : 'e'))) { + if (base >= 10 && casecmp(*p, (uint32_t)(base == 16 ? 'p' : 'e'))) { uint32_t xx; int negx = 0; fmt = STRSCAN_NUM; p++; @@ -445,6 +495,7 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) while (lj_char_isspace(*p)) p++; if (*p) return STRSCAN_ERROR; } + if (p < pe) return STRSCAN_ERROR; /* Fast path for decimal 32 bit integers. */ if (fmt == STRSCAN_INT && base == 10 && @@ -466,6 +517,8 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) return strscan_oct(sp, o, fmt, neg, dig); if (base == 16) fmt = strscan_hex(sp, o, fmt, opt, ex, neg, dig); + else if (base == 2) + fmt = strscan_bin(sp, o, fmt, opt, ex, neg, dig); else fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig); @@ -481,18 +534,19 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt) int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o) { - StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o, + StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o, STRSCAN_OPT_TONUM); - lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM); + lj_assertX(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM, "bad scan format"); return (fmt != STRSCAN_ERROR); } #if LJ_DUALNUM int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o) { - StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), o, + StrScanFmt fmt = lj_strscan_scan((const uint8_t *)strdata(str), str->len, o, STRSCAN_OPT_TOINT); - lua_assert(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT); + lj_assertX(fmt == STRSCAN_ERROR || fmt == STRSCAN_NUM || fmt == STRSCAN_INT, + "bad scan format"); if (fmt == STRSCAN_INT) setitype(o, LJ_TISNUM); return (fmt != STRSCAN_ERROR); } diff --git a/src/lj_strscan.h b/src/lj_strscan.h index 61ddcb45..8ed31542 100644 --- a/src/lj_strscan.h +++ b/src/lj_strscan.h @@ -22,7 +22,8 @@ typedef enum { STRSCAN_INT, STRSCAN_U32, STRSCAN_I64, STRSCAN_U64, } StrScanFmt; -LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, TValue *o, uint32_t opt); +LJ_FUNC StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o, + uint32_t opt); LJ_FUNC int LJ_FASTCALL lj_strscan_num(GCstr *str, TValue *o); #if LJ_DUALNUM LJ_FUNC int LJ_FASTCALL lj_strscan_number(GCstr *str, TValue *o); diff --git a/src/lj_tab.c b/src/lj_tab.c index c5b6bcbf..c3609b38 100644 --- a/src/lj_tab.c +++ b/src/lj_tab.c @@ -16,25 +16,10 @@ /* -- Object hashing ------------------------------------------------------ */ -/* Hash values are masked with the table hash mask and used as an index. */ -static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash) -{ - Node *n = noderef(t->node); - return &n[hash & t->hmask]; -} - -/* String hashes are precomputed when they are interned. */ -#define hashstr(t, s) hashmask(t, (s)->hash) - -#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi))) -#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1)) -#define hashptr(t, p) hashlohi((t), u32ptr(p), u32ptr(p) + HASH_BIAS) -#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS) - /* Hash an arbitrary key and return its anchor position in the hash table. */ static Node *hashkey(const GCtab *t, cTValue *key) { - lua_assert(!tvisint(key)); + lj_assertX(!tvisint(key), "attempt to hash integer"); if (tvisstr(key)) return hashstr(t, strV(key)); else if (tvisnum(key)) @@ -53,13 +38,13 @@ static LJ_AINLINE void newhpart(lua_State *L, GCtab *t, uint32_t hbits) { uint32_t hsize; Node *node; - lua_assert(hbits != 0); + lj_assertL(hbits != 0, "zero hash size"); if (hbits > LJ_MAX_HBITS) lj_err_msg(L, LJ_ERR_TABOV); hsize = 1u << hbits; node = lj_mem_newvec(L, hsize, Node); - setmref(node->freetop, &node[hsize]); setmref(t->node, node); + setfreetop(t, node, &node[hsize]); t->hmask = hsize-1; } @@ -74,7 +59,7 @@ static LJ_AINLINE void clearhpart(GCtab *t) { uint32_t i, hmask = t->hmask; Node *node = noderef(t->node); - lua_assert(t->hmask != 0); + lj_assertX(t->hmask != 0, "empty hash part"); for (i = 0; i <= hmask; i++) { Node *n = &node[i]; setmref(n->next, NULL); @@ -98,7 +83,8 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits) GCtab *t; /* First try to colocate the array part. */ if (LJ_MAX_COLOSIZE != 0 && asize > 0 && asize <= LJ_MAX_COLOSIZE) { - lua_assert((sizeof(GCtab) & 7) == 0); + Node *nilnode; + lj_assertL((sizeof(GCtab) & 7) == 0, "bad GCtab size"); t = (GCtab *)lj_mem_newgco(L, sizetabcolo(asize)); t->gct = ~LJ_TTAB; t->nomm = (uint8_t)~0; @@ -107,8 +93,13 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits) setgcrefnull(t->metatable); t->asize = asize; t->hmask = 0; - setmref(t->node, &G(L)->nilnode); + nilnode = &G(L)->nilnode; + setmref(t->node, nilnode); +#if LJ_GC64 + setmref(t->freetop, nilnode); +#endif } else { /* Otherwise separately allocate the array part. */ + Node *nilnode; t = lj_mem_newobj(L, GCtab); t->gct = ~LJ_TTAB; t->nomm = (uint8_t)~0; @@ -117,7 +108,11 @@ static GCtab *newtab(lua_State *L, uint32_t asize, uint32_t hbits) setgcrefnull(t->metatable); t->asize = 0; /* In case the array allocation fails. */ t->hmask = 0; - setmref(t->node, &G(L)->nilnode); + nilnode = &G(L)->nilnode; + setmref(t->node, nilnode); +#if LJ_GC64 + setmref(t->freetop, nilnode); +#endif if (asize > 0) { if (asize > LJ_MAX_ASIZE) lj_err_msg(L, LJ_ERR_TABOV); @@ -149,6 +144,12 @@ GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits) return t; } +/* The API of this function conforms to lua_createtable(). */ +GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h) +{ + return lj_tab_new(L, (uint32_t)(a > 0 ? a+1 : 0), hsize2hbits(h)); +} + #if LJ_HASJIT GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize) { @@ -165,7 +166,8 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) GCtab *t; uint32_t asize, hmask; t = newtab(L, kt->asize, kt->hmask > 0 ? lj_fls(kt->hmask)+1 : 0); - lua_assert(kt->asize == t->asize && kt->hmask == t->hmask); + lj_assertL(kt->asize == t->asize && kt->hmask == t->hmask, + "mismatched size of table and template"); t->nomm = 0; /* Keys with metamethod names may be present. */ asize = kt->asize; if (asize > 0) { @@ -185,7 +187,7 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) Node *node = noderef(t->node); Node *knode = noderef(kt->node); ptrdiff_t d = (char *)node - (char *)knode; - setmref(node->freetop, (Node *)((char *)noderef(knode->freetop) + d)); + setfreetop(t, node, (Node *)((char *)getfreetop(kt, knode) + d)); for (i = 0; i <= hmask; i++) { Node *kn = &knode[i]; Node *n = &node[i]; @@ -198,6 +200,17 @@ GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt) return t; } +/* Clear a table. */ +void LJ_FASTCALL lj_tab_clear(GCtab *t) +{ + clearapart(t); + if (t->hmask > 0) { + Node *node = noderef(t->node); + setfreetop(t, node, &node[t->hmask+1]); + clearhpart(t); + } +} + /* Free a table. */ void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t) { @@ -214,7 +227,7 @@ void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t) /* -- Table resizing ------------------------------------------------------ */ /* Resize a table to fit the new array/hash part sizes. */ -static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) +void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) { Node *oldnode = noderef(t->node); uint32_t oldasize = t->asize; @@ -247,6 +260,9 @@ static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) } else { global_State *g = G(L); setmref(t->node, &g->nilnode); +#if LJ_GC64 + setmref(t->freetop, &g->nilnode); +#endif t->hmask = 0; } if (asize < oldasize) { /* Array part shrinks? */ @@ -276,7 +292,7 @@ static void resizetab(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits) static uint32_t countint(cTValue *key, uint32_t *bins) { - lua_assert(!tvisint(key)); + lj_assertX(!tvisint(key), "bad integer key"); if (tvisnum(key)) { lua_Number nk = numV(key); int32_t k = lj_num2int(nk); @@ -348,7 +364,7 @@ static void rehashtab(lua_State *L, GCtab *t, cTValue *ek) asize += countint(ek, bins); na = bestasize(bins, &asize); total -= na; - resizetab(L, t, asize, hsize2hbits(total)); + lj_tab_resize(L, t, asize, hsize2hbits(total)); } #if LJ_HASFFI @@ -360,7 +376,7 @@ void lj_tab_rehash(lua_State *L, GCtab *t) void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize) { - resizetab(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0); + lj_tab_resize(L, t, nasize+1, t->hmask > 0 ? lj_fls(t->hmask)+1 : 0); } /* -- Table getters ------------------------------------------------------- */ @@ -378,7 +394,7 @@ cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key) return NULL; } -cTValue *lj_tab_getstr(GCtab *t, GCstr *key) +cTValue *lj_tab_getstr(GCtab *t, const GCstr *key) { Node *n = hashstr(t, key); do { @@ -428,16 +444,17 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) Node *n = hashkey(t, key); if (!tvisnil(&n->val) || t->hmask == 0) { Node *nodebase = noderef(t->node); - Node *collide, *freenode = noderef(nodebase->freetop); - lua_assert(freenode >= nodebase && freenode <= nodebase+t->hmask+1); + Node *collide, *freenode = getfreetop(t, nodebase); + lj_assertL(freenode >= nodebase && freenode <= nodebase+t->hmask+1, + "bad freenode"); do { if (freenode == nodebase) { /* No free node found? */ rehashtab(L, t, key); /* Rehash table. */ return lj_tab_set(L, t, key); /* Retry key insertion. */ } } while (!tvisnil(&(--freenode)->key)); - setmref(nodebase->freetop, freenode); - lua_assert(freenode != &G(L)->nilnode); + setfreetop(t, nodebase, freenode); + lj_assertL(freenode != &G(L)->nilnode, "store to fallback hash"); collide = hashkey(t, &n->key); if (collide != n) { /* Colliding node not the main node? */ while (noderef(collide->next) != n) /* Find predecessor. */ @@ -493,7 +510,7 @@ TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) if (LJ_UNLIKELY(tvismzero(&n->key))) n->key.u64 = 0; lj_gc_anybarriert(L, t); - lua_assert(tvisnil(&n->val)); + lj_assertL(tvisnil(&n->val), "new hash slot is not empty"); return &n->val; } @@ -510,7 +527,7 @@ TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key) return lj_tab_newkey(L, t, &k); } -TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key) +TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key) { TValue k; Node *n = hashstr(t, key); @@ -551,103 +568,126 @@ TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key) /* -- Table traversal ----------------------------------------------------- */ -/* Get the traversal index of a key. */ -static uint32_t keyindex(lua_State *L, GCtab *t, cTValue *key) +/* Table traversal indexes: +** +** Array key index: [0 .. t->asize-1] +** Hash key index: [t->asize .. t->asize+t->hmask] +** Invalid key: ~0 +*/ + +/* Get the successor traversal index of a key. */ +uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key) { TValue tmp; if (tvisint(key)) { int32_t k = intV(key); if ((uint32_t)k < t->asize) - return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */ + return (uint32_t)k + 1; setnumV(&tmp, (lua_Number)k); key = &tmp; } else if (tvisnum(key)) { lua_Number nk = numV(key); int32_t k = lj_num2int(nk); if ((uint32_t)k < t->asize && nk == (lua_Number)k) - return (uint32_t)k; /* Array key indexes: [0..t->asize-1] */ + return (uint32_t)k + 1; } if (!tvisnil(key)) { Node *n = hashkey(t, key); do { if (lj_obj_equal(&n->key, key)) - return t->asize + (uint32_t)(n - noderef(t->node)); - /* Hash key indexes: [t->asize..t->asize+t->nmask] */ + return t->asize + (uint32_t)((n+1) - noderef(t->node)); } while ((n = nextnode(n))); - if (key->u32.hi == 0xfffe7fff) /* ITERN was despecialized while running. */ - return key->u32.lo - 1; - lj_err_msg(L, LJ_ERR_NEXTIDX); - return 0; /* unreachable */ + if (key->u32.hi == LJ_KEYINDEX) /* Despecialized ITERN while running. */ + return key->u32.lo; + return ~0u; /* Invalid key to next. */ } - return ~0u; /* A nil key starts the traversal. */ + return 0; /* A nil key starts the traversal. */ } -/* Advance to the next step in a table traversal. */ -int lj_tab_next(lua_State *L, GCtab *t, TValue *key) +/* Get the next key/value pair of a table traversal. */ +int lj_tab_next(GCtab *t, cTValue *key, TValue *o) { - uint32_t i = keyindex(L, t, key); /* Find predecessor key index. */ - for (i++; i < t->asize; i++) /* First traverse the array keys. */ - if (!tvisnil(arrayslot(t, i))) { - setintV(key, i); - copyTV(L, key+1, arrayslot(t, i)); + uint32_t idx = lj_tab_keyindex(t, key); /* Find successor index of key. */ + /* First traverse the array part. */ + for (; idx < t->asize; idx++) { + cTValue *a = arrayslot(t, idx); + if (LJ_LIKELY(!tvisnil(a))) { + setintV(o, idx); + o[1] = *a; return 1; } - for (i -= t->asize; i <= t->hmask; i++) { /* Then traverse the hash keys. */ - Node *n = &noderef(t->node)[i]; + } + idx -= t->asize; + /* Then traverse the hash part. */ + for (; idx <= t->hmask; idx++) { + Node *n = &noderef(t->node)[idx]; if (!tvisnil(&n->val)) { - copyTV(L, key, &n->key); - copyTV(L, key+1, &n->val); + o[0] = n->key; + o[1] = n->val; return 1; } } - return 0; /* End of traversal. */ + return (int32_t)idx < 0 ? -1 : 0; /* Invalid key or end of traversal. */ } /* -- Table length calculation -------------------------------------------- */ -static MSize unbound_search(GCtab *t, MSize j) +/* Compute table length. Slow path with mixed array/hash lookups. */ +LJ_NOINLINE static MSize tab_len_slow(GCtab *t, size_t hi) { cTValue *tv; - MSize i = j; /* i is zero or a present index */ - j++; - /* find `i' and `j' such that i is present and j is not */ - while ((tv = lj_tab_getint(t, (int32_t)j)) && !tvisnil(tv)) { - i = j; - j *= 2; - if (j > (MSize)(INT_MAX-2)) { /* overflow? */ - /* table was built with bad purposes: resort to linear search */ - i = 1; - while ((tv = lj_tab_getint(t, (int32_t)i)) && !tvisnil(tv)) i++; - return i - 1; + size_t lo = hi; + hi++; + /* Widening search for an upper bound. */ + while ((tv = lj_tab_getint(t, (int32_t)hi)) && !tvisnil(tv)) { + lo = hi; + hi += hi; + if (hi > (size_t)(INT_MAX-2)) { /* Punt and do a linear search. */ + lo = 1; + while ((tv = lj_tab_getint(t, (int32_t)lo)) && !tvisnil(tv)) lo++; + return (MSize)(lo - 1); } } - /* now do a binary search between them */ - while (j - i > 1) { - MSize m = (i+j)/2; - cTValue *tvb = lj_tab_getint(t, (int32_t)m); - if (tvb && !tvisnil(tvb)) i = m; else j = m; + /* Binary search to find a non-nil to nil transition. */ + while (hi - lo > 1) { + size_t mid = (lo+hi) >> 1; + cTValue *tvb = lj_tab_getint(t, (int32_t)mid); + if (tvb && !tvisnil(tvb)) lo = mid; else hi = mid; } - return i; + return (MSize)lo; } -/* -** Try to find a boundary in table `t'. A `boundary' is an integer index -** such that t[i] is non-nil and t[i+1] is nil (and 0 if t[1] is nil). -*/ +/* Compute table length. Fast path. */ MSize LJ_FASTCALL lj_tab_len(GCtab *t) { - MSize j = (MSize)t->asize; - if (j > 1 && tvisnil(arrayslot(t, j-1))) { - MSize i = 1; - while (j - i > 1) { - MSize m = (i+j)/2; - if (tvisnil(arrayslot(t, m-1))) j = m; else i = m; + size_t hi = (size_t)t->asize; + if (hi) hi--; + /* In a growing array the last array element is very likely nil. */ + if (hi > 0 && LJ_LIKELY(tvisnil(arrayslot(t, hi)))) { + /* Binary search to find a non-nil to nil transition in the array. */ + size_t lo = 0; + while (hi - lo > 1) { + size_t mid = (lo+hi) >> 1; + if (tvisnil(arrayslot(t, mid))) hi = mid; else lo = mid; } - return i-1; + return (MSize)lo; } - if (j) j--; - if (t->hmask <= 0) - return j; - return unbound_search(t, j); + /* Without a hash part, there's an implicit nil after the last element. */ + return t->hmask ? tab_len_slow(t, hi) : (MSize)hi; } +#if LJ_HASJIT +/* Verify hinted table length or compute it. */ +MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint) +{ + size_t asize = (size_t)t->asize; + cTValue *tv = arrayslot(t, hint); + if (LJ_LIKELY(hint+1 < asize)) { + if (LJ_LIKELY(!tvisnil(tv) && tvisnil(tv+1))) return (MSize)hint; + } else if (hint+1 <= asize && LJ_LIKELY(t->hmask == 0) && !tvisnil(tv)) { + return (MSize)hint; + } + return lj_tab_len(t); +} +#endif + diff --git a/src/lj_tab.h b/src/lj_tab.h index 4a106873..2a3f76bf 100644 --- a/src/lj_tab.h +++ b/src/lj_tab.h @@ -31,30 +31,52 @@ static LJ_AINLINE uint32_t hashrot(uint32_t lo, uint32_t hi) return hi; } +/* Hash values are masked with the table hash mask and used as an index. */ +static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash) +{ + Node *n = noderef(t->node); + return &n[hash & t->hmask]; +} + +/* String IDs are generated when a string is interned. */ +#define hashstr(t, s) hashmask(t, (s)->sid) + +#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi))) +#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1)) +#if LJ_GC64 +#define hashgcref(t, r) \ + hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32)) +#else +#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS) +#endif + #define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0) LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits); +LJ_FUNC GCtab *lj_tab_new_ah(lua_State *L, int32_t a, int32_t h); #if LJ_HASJIT LJ_FUNC GCtab * LJ_FASTCALL lj_tab_new1(lua_State *L, uint32_t ahsize); #endif LJ_FUNCA GCtab * LJ_FASTCALL lj_tab_dup(lua_State *L, const GCtab *kt); +LJ_FUNC void LJ_FASTCALL lj_tab_clear(GCtab *t); LJ_FUNC void LJ_FASTCALL lj_tab_free(global_State *g, GCtab *t); #if LJ_HASFFI LJ_FUNC void lj_tab_rehash(lua_State *L, GCtab *t); #endif +LJ_FUNC void lj_tab_resize(lua_State *L, GCtab *t, uint32_t asize, uint32_t hbits); LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize); /* Caveat: all getters except lj_tab_get() can return NULL! */ LJ_FUNCA cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key); -LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, GCstr *key); +LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, const GCstr *key); LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key); /* Caveat: all setters require a write barrier for the stored value. */ LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key); -LJ_FUNC TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); -LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key); +LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key); +LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key); LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); #define inarray(t, key) ((MSize)(key) < (MSize)(t)->asize) @@ -64,7 +86,11 @@ LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key); #define lj_tab_setint(L, t, key) \ (inarray((t), (key)) ? arrayslot((t), (key)) : lj_tab_setinth(L, (t), (key))) -LJ_FUNCA int lj_tab_next(lua_State *L, GCtab *t, TValue *key); +LJ_FUNC uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key); +LJ_FUNCA int lj_tab_next(GCtab *t, cTValue *key, TValue *o); LJ_FUNCA MSize LJ_FASTCALL lj_tab_len(GCtab *t); +#if LJ_HASJIT +LJ_FUNC MSize LJ_FASTCALL lj_tab_len_hint(GCtab *t, size_t hint); +#endif #endif diff --git a/src/lj_target.h b/src/lj_target.h index 75eb965f..19716928 100644 --- a/src/lj_target.h +++ b/src/lj_target.h @@ -55,7 +55,7 @@ typedef uint32_t RegSP; /* Bitset for registers. 32 registers suffice for most architectures. ** Note that one set holds bits for both GPRs and FPRs. */ -#if LJ_TARGET_PPC || LJ_TARGET_MIPS +#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 typedef uint64_t RegSet; #else typedef uint32_t RegSet; @@ -69,7 +69,7 @@ typedef uint32_t RegSet; #define rset_set(rs, r) (rs |= RID2RSET(r)) #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) -#if LJ_TARGET_PPC || LJ_TARGET_MIPS +#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) #else @@ -138,6 +138,8 @@ typedef uint32_t RegCost; #include "lj_target_x86.h" #elif LJ_TARGET_ARM #include "lj_target_arm.h" +#elif LJ_TARGET_ARM64 +#include "lj_target_arm64.h" #elif LJ_TARGET_PPC #include "lj_target_ppc.h" #elif LJ_TARGET_MIPS @@ -150,7 +152,8 @@ typedef uint32_t RegCost; /* Return the address of an exit stub. */ static LJ_AINLINE char *exitstub_addr_(char **group, uint32_t exitno) { - lua_assert(group[exitno / EXITSTUBS_PER_GROUP] != NULL); + lj_assertX(group[exitno / EXITSTUBS_PER_GROUP] != NULL, + "exit stub group for exit %d uninitialized", exitno); return (char *)group[exitno / EXITSTUBS_PER_GROUP] + EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP); } diff --git a/src/lj_target_arm.h b/src/lj_target_arm.h index 76a30710..48f487a5 100644 --- a/src/lj_target_arm.h +++ b/src/lj_target_arm.h @@ -211,6 +211,7 @@ typedef enum ARMIns { /* ARMv6T2 */ ARMI_MOVW = 0xe3000000, ARMI_MOVT = 0xe3400000, + ARMI_BFI = 0xe7c00010, /* VFP */ ARMI_VMOV_D = 0xeeb00b40, @@ -243,10 +244,6 @@ typedef enum ARMIns { ARMI_VCVT_S32_F64 = 0xeebd0bc0, ARMI_VCVT_U32_F32 = 0xeebc0ac0, ARMI_VCVT_U32_F64 = 0xeebc0bc0, - ARMI_VCVTR_S32_F32 = 0xeebd0a40, - ARMI_VCVTR_S32_F64 = 0xeebd0b40, - ARMI_VCVTR_U32_F32 = 0xeebc0a40, - ARMI_VCVTR_U32_F64 = 0xeebc0b40, ARMI_VCVT_F32_S32 = 0xeeb80ac0, ARMI_VCVT_F64_S32 = 0xeeb80bc0, ARMI_VCVT_F32_U32 = 0xeeb80a40, diff --git a/src/lj_target_arm64.h b/src/lj_target_arm64.h new file mode 100644 index 00000000..d45af2e4 --- /dev/null +++ b/src/lj_target_arm64.h @@ -0,0 +1,336 @@ +/* +** Definitions for ARM64 CPUs. +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +*/ + +#ifndef _LJ_TARGET_ARM64_H +#define _LJ_TARGET_ARM64_H + +/* -- Registers IDs ------------------------------------------------------- */ + +#define GPRDEF(_) \ + _(X0) _(X1) _(X2) _(X3) _(X4) _(X5) _(X6) _(X7) \ + _(X8) _(X9) _(X10) _(X11) _(X12) _(X13) _(X14) _(X15) \ + _(X16) _(X17) _(X18) _(X19) _(X20) _(X21) _(X22) _(X23) \ + _(X24) _(X25) _(X26) _(X27) _(X28) _(FP) _(LR) _(SP) +#define FPRDEF(_) \ + _(D0) _(D1) _(D2) _(D3) _(D4) _(D5) _(D6) _(D7) \ + _(D8) _(D9) _(D10) _(D11) _(D12) _(D13) _(D14) _(D15) \ + _(D16) _(D17) _(D18) _(D19) _(D20) _(D21) _(D22) _(D23) \ + _(D24) _(D25) _(D26) _(D27) _(D28) _(D29) _(D30) _(D31) +#define VRIDDEF(_) + +#define RIDENUM(name) RID_##name, + +enum { + GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ + FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ + RID_MAX, + RID_TMP = RID_LR, + RID_ZERO = RID_SP, + + /* Calling conventions. */ + RID_RET = RID_X0, + RID_RETLO = RID_X0, + RID_RETHI = RID_X1, + RID_FPRET = RID_D0, + + /* These definitions must match with the *.dasc file(s): */ + RID_BASE = RID_X19, /* Interpreter BASE. */ + RID_LPC = RID_X21, /* Interpreter PC. */ + RID_GL = RID_X22, /* Interpreter GL. */ + RID_LREG = RID_X23, /* Interpreter L. */ + + /* Register ranges [min, max) and number of registers. */ + RID_MIN_GPR = RID_X0, + RID_MAX_GPR = RID_SP+1, + RID_MIN_FPR = RID_MAX_GPR, + RID_MAX_FPR = RID_D31+1, + RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, + RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR +}; + +#define RID_NUM_KREF RID_NUM_GPR +#define RID_MIN_KREF RID_X0 + +/* -- Register sets ------------------------------------------------------- */ + +/* Make use of all registers, except for x18, fp, lr and sp. */ +#define RSET_FIXED \ + (RID2RSET(RID_X18)|RID2RSET(RID_FP)|RID2RSET(RID_LR)|RID2RSET(RID_SP)|\ + RID2RSET(RID_GL)) +#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) +#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) +#define RSET_ALL (RSET_GPR|RSET_FPR) +#define RSET_INIT RSET_ALL + +/* lr is an implicit scratch register. */ +#define RSET_SCRATCH_GPR (RSET_RANGE(RID_X0, RID_X17+1)) +#define RSET_SCRATCH_FPR \ + (RSET_RANGE(RID_D0, RID_D7+1)|RSET_RANGE(RID_D16, RID_D31+1)) +#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) +#define REGARG_FIRSTGPR RID_X0 +#define REGARG_LASTGPR RID_X7 +#define REGARG_NUMGPR 8 +#define REGARG_FIRSTFPR RID_D0 +#define REGARG_LASTFPR RID_D7 +#define REGARG_NUMFPR 8 + +/* -- Spill slots --------------------------------------------------------- */ + +/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. +** +** SPS_FIXED: Available fixed spill slots in interpreter frame. +** This definition must match with the vm_arm64.dasc file. +** Pre-allocate some slots to avoid sp adjust in every root trace. +** +** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. +*/ +#define SPS_FIXED 4 +#define SPS_FIRST 2 + +#define SPOFS_TMP 0 + +#define sps_scale(slot) (4 * (int32_t)(slot)) +#define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) + +/* -- Exit state ---------------------------------------------------------- */ + +/* This definition must match with the *.dasc file(s). */ +typedef struct { + lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ + intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ + int32_t spill[256]; /* Spill slots. */ +} ExitState; + +/* Highest exit + 1 indicates stack check. */ +#define EXITSTATE_CHECKEXIT 1 + +/* Return the address of a per-trace exit stub. */ +static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) +{ + while (*p == (LJ_LE ? 0xd503201f : 0x1f2003d5)) p++; /* Skip A64I_NOP. */ + return p + 3 + exitno; +} +/* Avoid dependence on lj_jit.h if only including lj_target.h. */ +#define exitstub_trace_addr(T, exitno) \ + exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode), (exitno)) + +/* -- Instructions -------------------------------------------------------- */ + +/* ARM64 instructions are always little-endian. Swap for ARM64BE. */ +#if LJ_BE +#define A64I_LE(x) (lj_bswap(x)) +#else +#define A64I_LE(x) (x) +#endif + +/* Instruction fields. */ +#define A64F_D(r) (r) +#define A64F_N(r) ((r) << 5) +#define A64F_A(r) ((r) << 10) +#define A64F_M(r) ((r) << 16) +#define A64F_IMMS(x) ((x) << 10) +#define A64F_IMMR(x) ((x) << 16) +#define A64F_U16(x) ((x) << 5) +#define A64F_U12(x) ((x) << 10) +#define A64F_S26(x) (((uint32_t)(x) & 0x03ffffffu)) +#define A64F_S19(x) (((uint32_t)(x) & 0x7ffffu) << 5) +#define A64F_S14(x) (((uint32_t)(x) & 0x3fffu) << 5) +#define A64F_S9(x) ((x) << 12) +#define A64F_BIT(x) ((x) << 19) +#define A64F_SH(sh, x) (((sh) << 22) | ((x) << 10)) +#define A64F_EX(ex) (A64I_EX | ((ex) << 13)) +#define A64F_EXSH(ex,x) (A64I_EX | ((ex) << 13) | ((x) << 10)) +#define A64F_FP8(x) ((x) << 13) +#define A64F_CC(cc) ((cc) << 12) +#define A64F_LSL16(x) (((x) / 16) << 21) +#define A64F_BSH(sh) ((sh) << 10) + +/* Check for valid field range. */ +#define A64F_S_OK(x, b) ((((x) + (1 << (b-1))) >> (b)) == 0) + +typedef enum A64Ins { + A64I_S = 0x20000000, + A64I_X = 0x80000000, + A64I_EX = 0x00200000, + A64I_ON = 0x00200000, + A64I_K12 = 0x1a000000, + A64I_K13 = 0x18000000, + A64I_LS_U = 0x01000000, + A64I_LS_S = 0x00800000, + A64I_LS_R = 0x01200800, + A64I_LS_SH = 0x00001000, + A64I_LS_UXTWx = 0x00004000, + A64I_LS_SXTWx = 0x0000c000, + A64I_LS_SXTXx = 0x0000e000, + A64I_LS_LSLx = 0x00006000, + + A64I_ADDw = 0x0b000000, + A64I_ADDx = 0x8b000000, + A64I_ADDSw = 0x2b000000, + A64I_ADDSx = 0xab000000, + A64I_NEGw = 0x4b0003e0, + A64I_NEGx = 0xcb0003e0, + A64I_SUBw = 0x4b000000, + A64I_SUBx = 0xcb000000, + A64I_SUBSw = 0x6b000000, + A64I_SUBSx = 0xeb000000, + + A64I_MULw = 0x1b007c00, + A64I_MULx = 0x9b007c00, + A64I_SMULL = 0x9b207c00, + + A64I_ANDw = 0x0a000000, + A64I_ANDx = 0x8a000000, + A64I_ANDSw = 0x6a000000, + A64I_ANDSx = 0xea000000, + A64I_EORw = 0x4a000000, + A64I_EORx = 0xca000000, + A64I_ORRw = 0x2a000000, + A64I_ORRx = 0xaa000000, + A64I_TSTw = 0x6a00001f, + A64I_TSTx = 0xea00001f, + + A64I_CMPw = 0x6b00001f, + A64I_CMPx = 0xeb00001f, + A64I_CMNw = 0x2b00001f, + A64I_CMNx = 0xab00001f, + A64I_CCMPw = 0x7a400000, + A64I_CCMPx = 0xfa400000, + A64I_CSELw = 0x1a800000, + A64I_CSELx = 0x9a800000, + + A64I_ASRw = 0x13007c00, + A64I_ASRx = 0x9340fc00, + A64I_LSLx = 0xd3400000, + A64I_LSRx = 0xd340fc00, + A64I_SHRw = 0x1ac02000, + A64I_SHRx = 0x9ac02000, /* lsl/lsr/asr/ror x0, x0, x0 */ + A64I_REVw = 0x5ac00800, + A64I_REVx = 0xdac00c00, + + A64I_EXTRw = 0x13800000, + A64I_EXTRx = 0x93c00000, + A64I_BFMw = 0x33000000, + A64I_BFMx = 0xb3400000, + A64I_SBFMw = 0x13000000, + A64I_SBFMx = 0x93400000, + A64I_SXTBw = 0x13001c00, + A64I_SXTHw = 0x13003c00, + A64I_SXTW = 0x93407c00, + A64I_UBFMw = 0x53000000, + A64I_UBFMx = 0xd3400000, + A64I_UXTBw = 0x53001c00, + A64I_UXTHw = 0x53003c00, + + A64I_MOVw = 0x2a0003e0, + A64I_MOVx = 0xaa0003e0, + A64I_MVNw = 0x2a2003e0, + A64I_MVNx = 0xaa2003e0, + A64I_MOVKw = 0x72800000, + A64I_MOVKx = 0xf2800000, + A64I_MOVZw = 0x52800000, + A64I_MOVZx = 0xd2800000, + A64I_MOVNw = 0x12800000, + A64I_MOVNx = 0x92800000, + + A64I_LDRB = 0x39400000, + A64I_LDRH = 0x79400000, + A64I_LDRw = 0xb9400000, + A64I_LDRx = 0xf9400000, + A64I_LDRLw = 0x18000000, + A64I_LDRLx = 0x58000000, + A64I_STRB = 0x39000000, + A64I_STRH = 0x79000000, + A64I_STRw = 0xb9000000, + A64I_STRx = 0xf9000000, + A64I_STPw = 0x29000000, + A64I_STPx = 0xa9000000, + A64I_LDPw = 0x29400000, + A64I_LDPx = 0xa9400000, + + A64I_B = 0x14000000, + A64I_BCC = 0x54000000, + A64I_BL = 0x94000000, + A64I_BR = 0xd61f0000, + A64I_BLR = 0xd63f0000, + A64I_TBZ = 0x36000000, + A64I_TBNZ = 0x37000000, + A64I_CBZ = 0x34000000, + A64I_CBNZ = 0x35000000, + + A64I_NOP = 0xd503201f, + + /* FP */ + A64I_FADDd = 0x1e602800, + A64I_FSUBd = 0x1e603800, + A64I_FMADDd = 0x1f400000, + A64I_FMSUBd = 0x1f408000, + A64I_FNMADDd = 0x1f600000, + A64I_FNMSUBd = 0x1f608000, + A64I_FMULd = 0x1e600800, + A64I_FDIVd = 0x1e601800, + A64I_FNEGd = 0x1e614000, + A64I_FABS = 0x1e60c000, + A64I_FSQRTd = 0x1e61c000, + A64I_LDRs = 0xbd400000, + A64I_LDRd = 0xfd400000, + A64I_STRs = 0xbd000000, + A64I_STRd = 0xfd000000, + A64I_LDPs = 0x2d400000, + A64I_LDPd = 0x6d400000, + A64I_STPs = 0x2d000000, + A64I_STPd = 0x6d000000, + A64I_FCMPd = 0x1e602000, + A64I_FCMPZd = 0x1e602008, + A64I_FCSELd = 0x1e600c00, + A64I_FRINTMd = 0x1e654000, + A64I_FRINTPd = 0x1e64c000, + A64I_FRINTZd = 0x1e65c000, + + A64I_FCVT_F32_F64 = 0x1e624000, + A64I_FCVT_F64_F32 = 0x1e22c000, + A64I_FCVT_F32_S32 = 0x1e220000, + A64I_FCVT_F64_S32 = 0x1e620000, + A64I_FCVT_F32_U32 = 0x1e230000, + A64I_FCVT_F64_U32 = 0x1e630000, + A64I_FCVT_F32_S64 = 0x9e220000, + A64I_FCVT_F64_S64 = 0x9e620000, + A64I_FCVT_F32_U64 = 0x9e230000, + A64I_FCVT_F64_U64 = 0x9e630000, + A64I_FCVT_S32_F64 = 0x1e780000, + A64I_FCVT_S32_F32 = 0x1e380000, + A64I_FCVT_U32_F64 = 0x1e790000, + A64I_FCVT_U32_F32 = 0x1e390000, + A64I_FCVT_S64_F64 = 0x9e780000, + A64I_FCVT_S64_F32 = 0x9e380000, + A64I_FCVT_U64_F64 = 0x9e790000, + A64I_FCVT_U64_F32 = 0x9e390000, + + A64I_FMOV_S = 0x1e204000, + A64I_FMOV_D = 0x1e604000, + A64I_FMOV_R_S = 0x1e260000, + A64I_FMOV_S_R = 0x1e270000, + A64I_FMOV_R_D = 0x9e660000, + A64I_FMOV_D_R = 0x9e670000, + A64I_FMOV_DI = 0x1e601000, +} A64Ins; + +typedef enum A64Shift { + A64SH_LSL, A64SH_LSR, A64SH_ASR, A64SH_ROR +} A64Shift; + +typedef enum A64Extend { + A64EX_UXTB, A64EX_UXTH, A64EX_UXTW, A64EX_UXTX, + A64EX_SXTB, A64EX_SXTH, A64EX_SXTW, A64EX_SXTX, +} A64Extend; + +/* ARM condition codes. */ +typedef enum A64CC { + CC_EQ, CC_NE, CC_CS, CC_CC, CC_MI, CC_PL, CC_VS, CC_VC, + CC_HI, CC_LS, CC_GE, CC_LT, CC_GT, CC_LE, CC_AL, + CC_HS = CC_CS, CC_LO = CC_CC +} A64CC; + +#endif diff --git a/src/lj_target_mips.h b/src/lj_target_mips.h index ec935494..da72d61a 100644 --- a/src/lj_target_mips.h +++ b/src/lj_target_mips.h @@ -13,11 +13,15 @@ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \ _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \ _(R24) _(R25) _(SYS1) _(SYS2) _(R28) _(SP) _(R30) _(RA) +#if LJ_SOFTFP +#define FPRDEF(_) +#else #define FPRDEF(_) \ _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) +#endif #define VRIDDEF(_) #define RIDENUM(name) RID_##name, @@ -39,7 +43,11 @@ enum { RID_RETHI = RID_R2, RID_RETLO = RID_R3, #endif +#if LJ_SOFTFP + RID_FPRET = RID_R2, +#else RID_FPRET = RID_F0, +#endif RID_CFUNCADDR = RID_R25, /* These definitions must match with the *.dasc file(s): */ @@ -52,8 +60,12 @@ enum { /* Register ranges [min, max) and number of registers. */ RID_MIN_GPR = RID_R0, RID_MAX_GPR = RID_RA+1, - RID_MIN_FPR = RID_F0, + RID_MIN_FPR = RID_MAX_GPR, +#if LJ_SOFTFP + RID_MAX_FPR = RID_MIN_FPR, +#else RID_MAX_FPR = RID_F31+1, +#endif RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */ }; @@ -68,28 +80,60 @@ enum { (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ RID2RSET(RID_SYS1)|RID2RSET(RID_SYS2)|RID2RSET(RID_JGL)|RID2RSET(RID_GP)) #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) +#if LJ_SOFTFP +#define RSET_FPR 0 +#else +#if LJ_32 #define RSET_FPR \ (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ RID2RSET(RID_F16)|RID2RSET(RID_F18)|RID2RSET(RID_F20)|RID2RSET(RID_F22)|\ RID2RSET(RID_F24)|RID2RSET(RID_F26)|RID2RSET(RID_F28)|RID2RSET(RID_F30)) -#define RSET_ALL (RSET_GPR|RSET_FPR) -#define RSET_INIT RSET_ALL +#else +#define RSET_FPR RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) +#endif +#endif +#define RSET_ALL (RSET_GPR|RSET_FPR) +#define RSET_INIT RSET_ALL #define RSET_SCRATCH_GPR \ (RSET_RANGE(RID_R1, RID_R15+1)|\ RID2RSET(RID_R24)|RID2RSET(RID_R25)) +#if LJ_SOFTFP +#define RSET_SCRATCH_FPR 0 +#else +#if LJ_32 #define RSET_SCRATCH_FPR \ (RID2RSET(RID_F0)|RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(RID_F6)|\ RID2RSET(RID_F8)|RID2RSET(RID_F10)|RID2RSET(RID_F12)|RID2RSET(RID_F14)|\ RID2RSET(RID_F16)|RID2RSET(RID_F18)) +#else +#define RSET_SCRATCH_FPR RSET_RANGE(RID_F0, RID_F24) +#endif +#endif #define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) #define REGARG_FIRSTGPR RID_R4 +#if LJ_32 #define REGARG_LASTGPR RID_R7 #define REGARG_NUMGPR 4 +#else +#define REGARG_LASTGPR RID_R11 +#define REGARG_NUMGPR 8 +#endif +#if LJ_ABI_SOFTFP +#define REGARG_FIRSTFPR 0 +#define REGARG_LASTFPR 0 +#define REGARG_NUMFPR 0 +#else #define REGARG_FIRSTFPR RID_F12 +#if LJ_32 #define REGARG_LASTFPR RID_F14 #define REGARG_NUMFPR 2 +#else +#define REGARG_LASTFPR RID_F19 +#define REGARG_NUMFPR 8 +#endif +#endif /* -- Spill slots --------------------------------------------------------- */ @@ -100,7 +144,11 @@ enum { ** ** SPS_FIRST: First spill slot for general use. */ +#if LJ_32 #define SPS_FIXED 5 +#else +#define SPS_FIXED 4 +#endif #define SPS_FIRST 4 #define SPOFS_TMP 0 @@ -112,8 +160,10 @@ enum { /* This definition must match with the *.dasc file(s). */ typedef struct { +#if !LJ_SOFTFP lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ - int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ +#endif + intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ int32_t spill[256]; /* Spill slots. */ } ExitState; @@ -142,52 +192,87 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p) #define MIPSF_F(r) ((r) << 6) #define MIPSF_A(n) ((n) << 6) #define MIPSF_M(n) ((n) << 11) +#define MIPSF_L(n) ((n) << 6) typedef enum MIPSIns { + MIPSI_D = 0x38, + MIPSI_DV = 0x10, + MIPSI_D32 = 0x3c, /* Integer instructions. */ - MIPSI_MOVE = 0x00000021, + MIPSI_MOVE = 0x00000025, MIPSI_NOP = 0x00000000, MIPSI_LI = 0x24000000, MIPSI_LU = 0x34000000, MIPSI_LUI = 0x3c000000, - MIPSI_ADDIU = 0x24000000, + MIPSI_AND = 0x00000024, MIPSI_ANDI = 0x30000000, + MIPSI_OR = 0x00000025, MIPSI_ORI = 0x34000000, + MIPSI_XOR = 0x00000026, MIPSI_XORI = 0x38000000, + MIPSI_NOR = 0x00000027, + + MIPSI_SLT = 0x0000002a, + MIPSI_SLTU = 0x0000002b, MIPSI_SLTI = 0x28000000, MIPSI_SLTIU = 0x2c000000, MIPSI_ADDU = 0x00000021, + MIPSI_ADDIU = 0x24000000, + MIPSI_SUB = 0x00000022, MIPSI_SUBU = 0x00000023, + +#if !LJ_TARGET_MIPSR6 MIPSI_MUL = 0x70000002, - MIPSI_AND = 0x00000024, - MIPSI_OR = 0x00000025, - MIPSI_XOR = 0x00000026, - MIPSI_NOR = 0x00000027, - MIPSI_SLT = 0x0000002a, - MIPSI_SLTU = 0x0000002b, + MIPSI_DIV = 0x0000001a, + MIPSI_DIVU = 0x0000001b, + MIPSI_MOVZ = 0x0000000a, MIPSI_MOVN = 0x0000000b, + MIPSI_MFHI = 0x00000010, + MIPSI_MFLO = 0x00000012, + MIPSI_MULT = 0x00000018, +#else + MIPSI_MUL = 0x00000098, + MIPSI_MUH = 0x000000d8, + MIPSI_DIV = 0x0000009a, + MIPSI_DIVU = 0x0000009b, + + MIPSI_SELEQZ = 0x00000035, + MIPSI_SELNEZ = 0x00000037, +#endif MIPSI_SLL = 0x00000000, MIPSI_SRL = 0x00000002, MIPSI_SRA = 0x00000003, - MIPSI_ROTR = 0x00200002, /* MIPS32R2 */ + MIPSI_ROTR = 0x00200002, /* MIPSXXR2 */ + MIPSI_DROTR = 0x0020003a, + MIPSI_DROTR32 = 0x0020003e, MIPSI_SLLV = 0x00000004, MIPSI_SRLV = 0x00000006, MIPSI_SRAV = 0x00000007, - MIPSI_ROTRV = 0x00000046, /* MIPS32R2 */ + MIPSI_ROTRV = 0x00000046, /* MIPSXXR2 */ + MIPSI_DROTRV = 0x00000056, + + MIPSI_INS = 0x7c000004, /* MIPSXXR2 */ - MIPSI_SEB = 0x7c000420, /* MIPS32R2 */ - MIPSI_SEH = 0x7c000620, /* MIPS32R2 */ - MIPSI_WSBH = 0x7c0000a0, /* MIPS32R2 */ + MIPSI_SEB = 0x7c000420, /* MIPSXXR2 */ + MIPSI_SEH = 0x7c000620, /* MIPSXXR2 */ + MIPSI_WSBH = 0x7c0000a0, /* MIPSXXR2 */ + MIPSI_DSBH = 0x7c0000a4, MIPSI_B = 0x10000000, MIPSI_J = 0x08000000, MIPSI_JAL = 0x0c000000, +#if !LJ_TARGET_MIPSR6 + MIPSI_JALX = 0x74000000, MIPSI_JR = 0x00000008, +#else + MIPSI_JR = 0x00000009, + MIPSI_BALC = 0xe8000000, +#endif MIPSI_JALR = 0x0000f809, MIPSI_BEQ = 0x10000000, @@ -199,7 +284,9 @@ typedef enum MIPSIns { /* Load/store instructions. */ MIPSI_LW = 0x8c000000, + MIPSI_LD = 0xdc000000, MIPSI_SW = 0xac000000, + MIPSI_SD = 0xfc000000, MIPSI_LB = 0x80000000, MIPSI_SB = 0xa0000000, MIPSI_LH = 0x84000000, @@ -211,11 +298,69 @@ typedef enum MIPSIns { MIPSI_LDC1 = 0xd4000000, MIPSI_SDC1 = 0xf4000000, + /* MIPS64 instructions. */ + MIPSI_DADD = 0x0000002c, + MIPSI_DADDU = 0x0000002d, + MIPSI_DADDIU = 0x64000000, + MIPSI_DSUB = 0x0000002e, + MIPSI_DSUBU = 0x0000002f, +#if !LJ_TARGET_MIPSR6 + MIPSI_DDIV = 0x0000001e, + MIPSI_DDIVU = 0x0000001f, + MIPSI_DMULT = 0x0000001c, + MIPSI_DMULTU = 0x0000001d, +#else + MIPSI_DDIV = 0x0000009e, + MIPSI_DMOD = 0x000000de, + MIPSI_DDIVU = 0x0000009f, + MIPSI_DMODU = 0x000000df, + MIPSI_DMUL = 0x0000009c, + MIPSI_DMUH = 0x000000dc, +#endif + + MIPSI_DSLL = 0x00000038, + MIPSI_DSRL = 0x0000003a, + MIPSI_DSLLV = 0x00000014, + MIPSI_DSRLV = 0x00000016, + MIPSI_DSRA = 0x0000003b, + MIPSI_DSRAV = 0x00000017, + MIPSI_DSRA32 = 0x0000003f, + MIPSI_DSLL32 = 0x0000003c, + MIPSI_DSRL32 = 0x0000003e, + MIPSI_DSHD = 0x7c000164, + + MIPSI_AADDU = LJ_32 ? MIPSI_ADDU : MIPSI_DADDU, + MIPSI_AADDIU = LJ_32 ? MIPSI_ADDIU : MIPSI_DADDIU, + MIPSI_ASUBU = LJ_32 ? MIPSI_SUBU : MIPSI_DSUBU, + MIPSI_AL = LJ_32 ? MIPSI_LW : MIPSI_LD, + MIPSI_AS = LJ_32 ? MIPSI_SW : MIPSI_SD, +#if LJ_TARGET_MIPSR6 + MIPSI_LSA = 0x00000005, + MIPSI_DLSA = 0x00000015, + MIPSI_ALSA = LJ_32 ? MIPSI_LSA : MIPSI_DLSA, +#endif + + /* Extract/insert instructions. */ + MIPSI_DEXTM = 0x7c000001, + MIPSI_DEXTU = 0x7c000002, + MIPSI_DEXT = 0x7c000003, + MIPSI_DINSM = 0x7c000005, + MIPSI_DINSU = 0x7c000006, + MIPSI_DINS = 0x7c000007, + + MIPSI_FLOOR_D = 0x4620000b, + /* FP instructions. */ MIPSI_MOV_S = 0x46000006, MIPSI_MOV_D = 0x46200006, +#if !LJ_TARGET_MIPSR6 MIPSI_MOVT_D = 0x46210011, MIPSI_MOVF_D = 0x46200011, +#else + MIPSI_MIN_D = 0x4620001C, + MIPSI_MAX_D = 0x4620001E, + MIPSI_SEL_D = 0x46200010, +#endif MIPSI_ABS_D = 0x46200005, MIPSI_NEG_D = 0x46200007, @@ -235,23 +380,37 @@ typedef enum MIPSIns { MIPSI_CVT_W_D = 0x46200024, MIPSI_CVT_S_W = 0x46800020, MIPSI_CVT_D_W = 0x46800021, + MIPSI_CVT_S_L = 0x46a00020, + MIPSI_CVT_D_L = 0x46a00021, MIPSI_TRUNC_W_S = 0x4600000d, MIPSI_TRUNC_W_D = 0x4620000d, + MIPSI_TRUNC_L_S = 0x46000009, + MIPSI_TRUNC_L_D = 0x46200009, MIPSI_FLOOR_W_S = 0x4600000f, MIPSI_FLOOR_W_D = 0x4620000f, MIPSI_MFC1 = 0x44000000, MIPSI_MTC1 = 0x44800000, + MIPSI_DMTC1 = 0x44a00000, + MIPSI_DMFC1 = 0x44200000, +#if !LJ_TARGET_MIPSR6 MIPSI_BC1F = 0x45000000, MIPSI_BC1T = 0x45010000, - MIPSI_C_EQ_D = 0x46200032, + MIPSI_C_OLT_S = 0x46000034, MIPSI_C_OLT_D = 0x46200034, MIPSI_C_ULT_D = 0x46200035, MIPSI_C_OLE_D = 0x46200036, MIPSI_C_ULE_D = 0x46200037, +#else + MIPSI_BC1EQZ = 0x45200000, + MIPSI_BC1NEZ = 0x45a00000, + MIPSI_CMP_EQ_D = 0x46a00002, + MIPSI_CMP_LT_S = 0x46800004, + MIPSI_CMP_LT_D = 0x46a00004, +#endif } MIPSIns; diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h index b4f600eb..bc9802a4 100644 --- a/src/lj_target_ppc.h +++ b/src/lj_target_ppc.h @@ -104,7 +104,7 @@ enum { /* This definition must match with the *.dasc file(s). */ typedef struct { lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ - int32_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ + intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ int32_t spill[256]; /* Spill slots. */ } ExitState; diff --git a/src/lj_target_x86.h b/src/lj_target_x86.h index 69aec37c..69cb8ca5 100644 --- a/src/lj_target_x86.h +++ b/src/lj_target_x86.h @@ -22,7 +22,7 @@ _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) #endif #define VRIDDEF(_) \ - _(MRM) + _(MRM) _(RIP) #define RIDENUM(name) RID_##name, @@ -31,15 +31,16 @@ enum { FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ RID_MAX, RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ + RID_RIP = RID_MAX+5, /* Pseudo-id for RIP (x64 only), rm bits = 5. */ /* Calling conventions. */ + RID_SP = RID_ESP, RID_RET = RID_EAX, #if LJ_64 RID_FPRET = RID_XMM0, -#else +#endif RID_RETLO = RID_EAX, RID_RETHI = RID_EDX, -#endif /* These definitions must match with the *.dasc file(s): */ RID_BASE = RID_EDX, /* Interpreter BASE. */ @@ -62,8 +63,10 @@ enum { /* -- Register sets ------------------------------------------------------- */ -/* Make use of all registers, except the stack pointer. */ -#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR)-RID2RSET(RID_ESP)) +/* Make use of all registers, except the stack pointer (and maybe DISPATCH). */ +#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \ + - RID2RSET(RID_ESP) \ + - LJ_GC64*RID2RSET(RID_DISPATCH)) #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) #define RSET_ALL (RSET_GPR|RSET_FPR) #define RSET_INIT RSET_ALL @@ -131,7 +134,11 @@ enum { #define SPS_FIXED (4*2) #define SPS_FIRST (4*2) /* Don't use callee register save area. */ #else +#if LJ_GC64 +#define SPS_FIXED 2 +#else #define SPS_FIXED 4 +#endif #define SPS_FIRST 2 #endif #else @@ -157,6 +164,8 @@ typedef struct { #define EXITSTUB_SPACING (2+2) #define EXITSTUBS_PER_GROUP 32 +#define EXITTRACE_VMSTATE 1 /* g->vmstate has traceno on exit. */ + /* -- x86 ModRM operand encoding ------------------------------------------ */ typedef enum { @@ -184,12 +193,18 @@ typedef struct { #define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24))) #define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24))) +#define XV_660f38(o) ((uint32_t)(0x79e2c4 + (0x##o<<24))) +#define XV_f20f38(o) ((uint32_t)(0x7be2c4 + (0x##o<<24))) +#define XV_f20f3a(o) ((uint32_t)(0x7be3c4 + (0x##o<<24))) +#define XV_f30f38(o) ((uint32_t)(0x7ae2c4 + (0x##o<<24))) + /* This list of x86 opcodes is not intended to be complete. Opcodes are only ** included when needed. Take a look at DynASM or jit.dis_x86 to see the ** whole mess. */ typedef enum { /* Fixed length opcodes. XI_* prefix. */ + XI_O16 = 0x66, XI_NOP = 0x90, XI_XCHGa = 0x90, XI_CALL = 0xe8, @@ -207,26 +222,28 @@ typedef enum { XI_PUSHi8 = 0x6a, XI_TESTb = 0x84, XI_TEST = 0x85, + XI_INT3 = 0xcc, XI_MOVmi = 0xc7, XI_GROUP5 = 0xff, /* Note: little-endian byte-order! */ XI_FLDZ = 0xeed9, XI_FLD1 = 0xe8d9, - XI_FLDLG2 = 0xecd9, - XI_FLDLN2 = 0xedd9, XI_FDUP = 0xc0d9, /* Really fld st0. */ XI_FPOP = 0xd8dd, /* Really fstp st0. */ XI_FPOP1 = 0xd9dd, /* Really fstp st1. */ XI_FRNDINT = 0xfcd9, - XI_FSIN = 0xfed9, - XI_FCOS = 0xffd9, - XI_FPTAN = 0xf2d9, - XI_FPATAN = 0xf3d9, XI_FSCALE = 0xfdd9, XI_FYL2X = 0xf1d9, + /* VEX-encoded instructions. XV_* prefix. */ + XV_RORX = XV_f20f3a(f0), + XV_SARX = XV_f30f38(f7), + XV_SHLX = XV_660f38(f7), + XV_SHRX = XV_f20f38(f7), + /* Variable-length opcodes. XO_* prefix. */ + XO_OR = XO_(0b), XO_MOV = XO_(8b), XO_MOVto = XO_(89), XO_MOVtow = XO_66(89), @@ -277,10 +294,8 @@ typedef enum { XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */ XO_UCOMISD = XO_660f(2e), XO_CVTSI2SD = XO_f20f(2a), - XO_CVTSD2SI = XO_f20f(2d), XO_CVTTSD2SI= XO_f20f(2c), XO_CVTSI2SS = XO_f30f(2a), - XO_CVTSS2SI = XO_f30f(2d), XO_CVTTSS2SI= XO_f30f(2c), XO_CVTSS2SD = XO_f30f(5a), XO_CVTSD2SS = XO_f20f(5a), diff --git a/src/lj_trace.c b/src/lj_trace.c index 89c3c5ed..c2329394 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -30,6 +30,7 @@ #include "lj_vm.h" #include "lj_vmevent.h" #include "lj_target.h" +#include "lj_prng.h" /* -- Error handling ------------------------------------------------------ */ @@ -104,7 +105,8 @@ static void perftools_addtrace(GCtrace *T) name++; else name = "(string)"; - lua_assert(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc); + lj_assertX(startpc >= proto_bc(pt) && startpc < proto_bc(pt) + pt->sizebc, + "trace PC out of range"); lineno = lj_debug_line(pt, proto_bcpos(pt, startpc)); if (!fp) { char fname[40]; @@ -117,15 +119,26 @@ static void perftools_addtrace(GCtrace *T) } #endif -/* Allocate space for copy of trace. */ -static GCtrace *trace_save_alloc(jit_State *J) +/* Allocate space for copy of T. */ +GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T) { size_t sztr = ((sizeof(GCtrace)+7)&~7); - size_t szins = (J->cur.nins-J->cur.nk)*sizeof(IRIns); + size_t szins = (T->nins-T->nk)*sizeof(IRIns); size_t sz = sztr + szins + - J->cur.nsnap*sizeof(SnapShot) + - J->cur.nsnapmap*sizeof(SnapEntry); - return lj_mem_newt(J->L, (MSize)sz, GCtrace); + T->nsnap*sizeof(SnapShot) + + T->nsnapmap*sizeof(SnapEntry); + GCtrace *T2 = lj_mem_newt(L, (MSize)sz, GCtrace); + char *p = (char *)T2 + sztr; + T2->gct = ~LJ_TTRACE; + T2->marked = 0; + T2->traceno = 0; + T2->ir = (IRIns *)p - T->nk; + T2->nins = T->nins; + T2->nk = T->nk; + T2->nsnap = T->nsnap; + T2->nsnapmap = T->nsnapmap; + memcpy(p, T->ir + T->nk, szins); + return T2; } /* Save current trace by copying and compacting it. */ @@ -139,12 +152,12 @@ static void trace_save(jit_State *J, GCtrace *T) setgcrefp(J2G(J)->gc.root, T); newwhite(J2G(J), T); T->gct = ~LJ_TTRACE; - T->ir = (IRIns *)p - J->cur.nk; - memcpy(p, J->cur.ir+J->cur.nk, szins); + T->ir = (IRIns *)p - J->cur.nk; /* The IR has already been copied above. */ p += szins; TRACE_APPENDVEC(snap, nsnap, SnapShot) TRACE_APPENDVEC(snapmap, nsnapmap, SnapEntry) J->cur.traceno = 0; + J->curfinal = NULL; setgcrefp(J->trace[T->traceno], T); lj_gc_barriertrace(J2G(J), T->traceno); lj_gdbjit_addtrace(J, T); @@ -172,7 +185,7 @@ void lj_trace_reenableproto(GCproto *pt) { if ((pt->flags & PROTO_ILOOP)) { BCIns *bc = proto_bc(pt); - BCPos i, sizebc = pt->sizebc;; + BCPos i, sizebc = pt->sizebc; pt->flags &= ~PROTO_ILOOP; if (bc_op(bc[0]) == BC_IFUNCF) setbc_op(&bc[0], BC_FUNCF); @@ -194,27 +207,28 @@ static void trace_unpatch(jit_State *J, GCtrace *T) return; /* No need to unpatch branches in parent traces (yet). */ switch (bc_op(*pc)) { case BC_JFORL: - lua_assert(traceref(J, bc_d(*pc)) == T); + lj_assertJ(traceref(J, bc_d(*pc)) == T, "JFORL references other trace"); *pc = T->startins; pc += bc_j(T->startins); - lua_assert(bc_op(*pc) == BC_JFORI); + lj_assertJ(bc_op(*pc) == BC_JFORI, "FORL does not point to JFORI"); setbc_op(pc, BC_FORI); break; case BC_JITERL: case BC_JLOOP: - lua_assert(op == BC_ITERL || op == BC_LOOP || bc_isret(op)); + lj_assertJ(op == BC_ITERL || op == BC_ITERN || op == BC_LOOP || + bc_isret(op), "bad original bytecode %d", op); *pc = T->startins; break; case BC_JMP: - lua_assert(op == BC_ITERL); + lj_assertJ(op == BC_ITERL, "bad original bytecode %d", op); pc += bc_j(*pc)+2; if (bc_op(*pc) == BC_JITERL) { - lua_assert(traceref(J, bc_d(*pc)) == T); + lj_assertJ(traceref(J, bc_d(*pc)) == T, "JITERL references other trace"); *pc = T->startins; } break; case BC_JFUNCF: - lua_assert(op == BC_FUNCF); + lj_assertJ(op == BC_FUNCF, "bad original bytecode %d", op); *pc = T->startins; break; default: /* Already unpatched. */ @@ -226,7 +240,8 @@ static void trace_unpatch(jit_State *J, GCtrace *T) static void trace_flushroot(jit_State *J, GCtrace *T) { GCproto *pt = &gcref(T->startpt)->pt; - lua_assert(T->root == 0 && pt != NULL); + lj_assertJ(T->root == 0, "not a root trace"); + lj_assertJ(pt != NULL, "trace has no prototype"); /* First unpatch any modified bytecode. */ trace_unpatch(J, T); /* Unlink root trace from chain anchored in prototype. */ @@ -274,7 +289,7 @@ int lj_trace_flushall(lua_State *L) if (T->root == 0) trace_flushroot(J, T); lj_gdbjit_deltrace(J, T); - T->traceno = 0; + T->traceno = T->link = 0; /* Blacklist the link for cont_stitch. */ setgcrefnull(J->trace[i]); } } @@ -296,13 +311,42 @@ void lj_trace_initstate(global_State *g) { jit_State *J = G2J(g); TValue *tv; - /* Initialize SIMD constants. */ + + /* Initialize aligned SIMD constants. */ tv = LJ_KSIMD(J, LJ_KSIMD_ABS); tv[0].u64 = U64x(7fffffff,ffffffff); tv[1].u64 = U64x(7fffffff,ffffffff); tv = LJ_KSIMD(J, LJ_KSIMD_NEG); tv[0].u64 = U64x(80000000,00000000); tv[1].u64 = U64x(80000000,00000000); + + /* Initialize 32/64 bit constants. */ +#if LJ_TARGET_X86ORX64 + J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000); +#if LJ_32 + J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000); +#endif + J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000); + J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000; +#endif +#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64 + J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000); +#endif +#if LJ_TARGET_PPC + J->k32[LJ_K32_2P52_2P31] = 0x59800004; + J->k32[LJ_K32_2P52] = 0x59800000; +#endif +#if LJ_TARGET_PPC || LJ_TARGET_MIPS + J->k32[LJ_K32_2P31] = 0x4f000000; +#endif +#if LJ_TARGET_MIPS + J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000); +#if LJ_64 + J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000); + J->k32[LJ_K32_2P63] = 0x5f000000; + J->k32[LJ_K32_M2P64] = 0xdf800000; +#endif +#endif } /* Free everything associated with the JIT compiler state. */ @@ -313,11 +357,11 @@ void lj_trace_freestate(global_State *g) { /* This assumes all traces have already been freed. */ ptrdiff_t i; for (i = 1; i < (ptrdiff_t)J->sizetrace; i++) - lua_assert(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL); + lj_assertG(i == (ptrdiff_t)J->cur.traceno || traceref(J, i) == NULL, + "trace still allocated"); } #endif lj_mcode_free(J); - lj_ir_k64_freeall(J); lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry); lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot); lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns); @@ -329,8 +373,13 @@ void lj_trace_freestate(global_State *g) /* Blacklist a bytecode instruction. */ static void blacklist_pc(GCproto *pt, BCIns *pc) { - setbc_op(pc, (int)bc_op(*pc)+(int)BC_ILOOP-(int)BC_LOOP); - pt->flags |= PROTO_ILOOP; + if (bc_op(*pc) == BC_ITERN) { + setbc_op(pc, BC_ITERC); + setbc_op(pc+1+bc_j(pc[1]), BC_JMP); + } else { + setbc_op(pc, (int)bc_op(*pc)+(int)BC_ILOOP-(int)BC_LOOP); + pt->flags |= PROTO_ILOOP; + } } /* Penalize a bytecode instruction. */ @@ -341,7 +390,7 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) if (mref(J->penalty[i].pc, const BCIns) == pc) { /* Cache slot found? */ /* First try to bump its hotcount several times. */ val = ((uint32_t)J->penalty[i].val << 1) + - LJ_PRNG_BITS(J, PENALTY_RNDBITS); + (lj_prng_u64(&J2G(J)->prng) & ((1u<<PENALTY_RNDBITS)-1)); if (val > PENALTY_MAX) { blacklist_pc(pt, pc); /* Blacklist it, if that didn't help. */ return; @@ -367,10 +416,11 @@ static void trace_start(jit_State *J) TraceNo traceno; if ((J->pt->flags & PROTO_NOJIT)) { /* JIT disabled for this proto? */ - if (J->parent == 0) { + if (J->parent == 0 && J->exitno == 0 && bc_op(*J->pc) != BC_ITERN) { /* Lazy bytecode patching to disable hotcount events. */ - lua_assert(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || - bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF); + lj_assertJ(bc_op(*J->pc) == BC_FORL || bc_op(*J->pc) == BC_ITERL || + bc_op(*J->pc) == BC_LOOP || bc_op(*J->pc) == BC_FUNCF, + "bad hot bytecode %d", bc_op(*J->pc)); setbc_op(J->pc, (int)bc_op(*J->pc)+(int)BC_ILOOP-(int)BC_LOOP); J->pt->flags |= PROTO_ILOOP; } @@ -381,7 +431,8 @@ static void trace_start(jit_State *J) /* Get a new trace number. */ traceno = trace_findfree(J); if (LJ_UNLIKELY(traceno == 0)) { /* No free trace? */ - lua_assert((J2G(J)->hookmask & HOOK_GC) == 0); + lj_assertJ((J2G(J)->hookmask & HOOK_GC) == 0, + "recorder called from GC hook"); lj_trace_flushall(J->L); J->state = LJ_TRACE_IDLE; /* Silently ignored. */ return; @@ -401,6 +452,8 @@ static void trace_start(jit_State *J) J->guardemit.irt = 0; J->postproc = LJ_POST_NONE; lj_resetsplit(J); + J->retryrec = 0; + J->ktrace = 0; setgcref(J->cur.startpt, obj2gco(J->pt)); L = J->L; @@ -412,6 +465,12 @@ static void trace_start(jit_State *J) if (J->parent) { setintV(L->top++, J->parent); setintV(L->top++, J->exitno); + } else { + BCOp op = bc_op(*J->pc); + if (op == BC_CALLM || op == BC_CALL || op == BC_ITERC) { + setintV(L->top++, J->exitno); /* Parent of stitched trace. */ + setintV(L->top++, -1); + } } ); lj_record_setup(J); @@ -424,7 +483,7 @@ static void trace_stop(jit_State *J) BCOp op = bc_op(J->cur.startins); GCproto *pt = &gcref(J->cur.startpt)->pt; TraceNo traceno = J->cur.traceno; - GCtrace *T = trace_save_alloc(J); /* Do this first. May throw OOM. */ + GCtrace *T = J->curfinal; lua_State *L; switch (op) { @@ -442,6 +501,7 @@ static void trace_stop(jit_State *J) J->cur.nextroot = pt->trace; pt->trace = (TraceNo1)traceno; break; + case BC_ITERN: case BC_RET: case BC_RET0: case BC_RET1: @@ -449,7 +509,7 @@ static void trace_stop(jit_State *J) goto addroot; case BC_JMP: /* Patch exit branch in parent to side trace entry. */ - lua_assert(J->parent != 0 && J->cur.root != 0); + lj_assertJ(J->parent != 0 && J->cur.root != 0, "not a side trace"); lj_asm_patchexit(J, traceref(J, J->parent), J->exitno, J->cur.mcode); /* Avoid compiling a side trace twice (stack resizing uses parent exit). */ { @@ -465,8 +525,14 @@ static void trace_stop(jit_State *J) root->nextside = (TraceNo1)traceno; } break; + case BC_CALLM: + case BC_CALL: + case BC_ITERC: + /* Trace stitching: patch link of previous trace. */ + traceref(J, J->exitno)->link = traceno; + break; default: - lua_assert(0); + lj_assertJ(0, "bad stop bytecode %d", op); break; } @@ -479,6 +545,7 @@ static void trace_stop(jit_State *J) lj_vmevent_send(L, TRACE, setstrV(L, L->top++, lj_str_newlit(L, "stop")); setintV(L->top++, traceno); + setfuncV(L, L->top++, J->fn); ); } @@ -486,8 +553,8 @@ static void trace_stop(jit_State *J) static int trace_downrec(jit_State *J) { /* Restart recording at the return instruction. */ - lua_assert(J->pt != NULL); - lua_assert(bc_isret(bc_op(*J->pc))); + lj_assertJ(J->pt != NULL, "no active prototype"); + lj_assertJ(bc_isret(bc_op(*J->pc)), "not at a return bytecode"); if (bc_op(*J->pc) == BC_RETM) return 0; /* NYI: down-recursion with RETM. */ J->parent = 0; @@ -506,6 +573,10 @@ static int trace_abort(jit_State *J) J->postproc = LJ_POST_NONE; lj_mcode_abort(J); + if (J->curfinal) { + lj_trace_free(J2G(J), J->curfinal); + J->curfinal = NULL; + } if (tvisnumber(L->top-1)) e = (TraceError)numberVint(L->top-1); if (e == LJ_TRERR_MCODELM) { @@ -514,8 +585,17 @@ static int trace_abort(jit_State *J) return 1; /* Retry ASM with new MCode area. */ } /* Penalize or blacklist starting bytecode instruction. */ - if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) - penalty_pc(J, &gcref(J->cur.startpt)->pt, mref(J->cur.startpc, BCIns), e); + if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { + if (J->exitno == 0) { + BCIns *startpc = mref(J->cur.startpc, BCIns); + if (e == LJ_TRERR_RETRY) + hotcount_set(J2GG(J), startpc+1, 1); /* Immediate retry. */ + else + penalty_pc(J, &gcref(J->cur.startpt)->pt, startpc, e); + } else { + traceref(J, J->exitno)->link = J->exitno; /* Self-link is blacklisted. */ + } + } /* Is there anything to abort? */ traceno = J->cur.traceno; @@ -581,8 +661,13 @@ static TValue *trace_state(lua_State *L, lua_CFunction dummy, void *ud) J->state = LJ_TRACE_RECORD; /* trace_start() may change state. */ trace_start(J); lj_dispatch_update(J2G(J)); - break; + if (J->state != LJ_TRACE_RECORD_1ST) + break; + /* fallthrough */ + case LJ_TRACE_RECORD_1ST: + J->state = LJ_TRACE_RECORD; + /* fallthrough */ case LJ_TRACE_RECORD: trace_pendpatch(J, 0); setvmstate(J2G(J), RECORD); @@ -688,15 +773,30 @@ static void trace_hotside(jit_State *J, const BCIns *pc) { SnapShot *snap = &traceref(J, J->parent)->snap[J->exitno]; if (!(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT)) && + isluafunc(curr_func(J->L)) && snap->count != SNAPCOUNT_DONE && ++snap->count >= J->param[JIT_P_hotexit]) { - lua_assert(J->state == LJ_TRACE_IDLE); + lj_assertJ(J->state == LJ_TRACE_IDLE, "hot side exit while recording"); /* J->parent is non-zero for a side trace. */ J->state = LJ_TRACE_START; lj_trace_ins(J, pc); } } +/* Stitch a new trace to the previous trace. */ +void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc) +{ + /* Only start a new trace if not recording or inside __gc call or vmevent. */ + if (J->state == LJ_TRACE_IDLE && + !(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) { + J->parent = 0; /* Have to treat it like a root trace. */ + /* J->exitno is set to the invoking trace. */ + J->state = LJ_TRACE_START; + lj_trace_ins(J, pc); + } +} + + /* Tiny struct to pass data to protected call. */ typedef struct ExitDataCP { jit_State *J; @@ -740,7 +840,7 @@ static void trace_exit_regs(lua_State *L, ExitState *ex) } #endif -#ifdef EXITSTATE_PCREG +#if defined(EXITSTATE_PCREG) || (LJ_UNWIND_JIT && !EXITTRACE_VMSTATE) /* Determine trace number from pc of exit instruction. */ static TraceNo trace_exit_find(jit_State *J, MCode *pc) { @@ -750,7 +850,7 @@ static TraceNo trace_exit_find(jit_State *J, MCode *pc) if (T && pc >= T->mcode && pc < (MCode *)((char *)T->mcode + T->szmcode)) return traceno; } - lua_assert(0); + lj_assertJ(0, "bad exit pc"); return 0; } #endif @@ -762,40 +862,55 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) lua_State *L = J->L; ExitState *ex = (ExitState *)exptr; ExitDataCP exd; - int errcode; + int errcode, exitcode = J->exitcode; + TValue exiterr; const BCIns *pc; void *cf; GCtrace *T; + + setnilV(&exiterr); + if (exitcode) { /* Trace unwound with error code. */ + J->exitcode = 0; + copyTV(L, &exiterr, L->top-1); + } + #ifdef EXITSTATE_PCREG J->parent = trace_exit_find(J, (MCode *)(intptr_t)ex->gpr[EXITSTATE_PCREG]); #endif T = traceref(J, J->parent); UNUSED(T); #ifdef EXITSTATE_CHECKEXIT if (J->exitno == T->nsnap) { /* Treat stack check like a parent exit. */ - lua_assert(T->root != 0); + lj_assertJ(T->root != 0, "stack check in root trace"); J->exitno = T->ir[REF_BASE].op2; J->parent = T->ir[REF_BASE].op1; T = traceref(J, J->parent); } #endif - lua_assert(T != NULL && J->exitno < T->nsnap); + lj_assertJ(T != NULL && J->exitno < T->nsnap, "bad trace or exit number"); exd.J = J; exd.exptr = exptr; errcode = lj_vm_cpcall(L, NULL, &exd, trace_exit_cp); if (errcode) return -errcode; /* Return negated error code. */ - lj_vmevent_send(L, TEXIT, - lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); - setintV(L->top++, J->parent); - setintV(L->top++, J->exitno); - trace_exit_regs(L, ex); - ); + if (exitcode) copyTV(L, L->top++, &exiterr); /* Anchor the error object. */ + + if (!(LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE))) + lj_vmevent_send(L, TEXIT, + lj_state_checkstack(L, 4+RID_NUM_GPR+RID_NUM_FPR+LUA_MINSTACK); + setintV(L->top++, J->parent); + setintV(L->top++, J->exitno); + trace_exit_regs(L, ex); + ); pc = exd.pc; cf = cframe_raw(L->cframe); setcframe_pc(cf, pc); - if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) { + if (exitcode) { + return -exitcode; + } else if (LJ_HASPROFILE && (G(L)->hookmask & HOOK_PROFILE)) { + /* Just exit to interpreter. */ + } else if (G(L)->gc.state == GCSatomic || G(L)->gc.state == GCSfinalize) { if (!(G(L)->hookmask & HOOK_GC)) lj_gc_step(L); /* Exited because of GC: drive GC forward. */ } else { @@ -803,13 +918,14 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) } if (bc_op(*pc) == BC_JLOOP) { BCIns *retpc = &traceref(J, bc_d(*pc))->startins; - if (bc_isret(bc_op(*retpc))) { + int isret = bc_isret(bc_op(*retpc)); + if (isret || bc_op(*retpc) == BC_ITERN) { if (J->state == LJ_TRACE_RECORD) { J->patchins = *pc; J->patchpc = (BCIns *)pc; *J->patchpc = *retpc; J->bcskip = 1; - } else { + } else if (isret) { pc = retpc; setcframe_pc(cf, pc); } @@ -819,7 +935,7 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) ERRNO_RESTORE switch (bc_op(*pc)) { case BC_CALLM: case BC_CALLMT: - return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc)); + return (int)((BCReg)(L->top - L->base) - bc_a(*pc) - bc_c(*pc) - LJ_FR2); case BC_RETM: return (int)((BCReg)(L->top - L->base) + 1 - bc_a(*pc) - bc_d(*pc)); case BC_TSETM: @@ -831,4 +947,41 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) } } +#if LJ_UNWIND_JIT +/* Given an mcode address determine trace exit address for unwinding. */ +uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep) +{ +#if EXITTRACE_VMSTATE + TraceNo traceno = J2G(J)->vmstate; +#else + TraceNo traceno = trace_exit_find(J, (MCode *)addr); +#endif + GCtrace *T = traceref(J, traceno); + if (T +#if EXITTRACE_VMSTATE + && addr >= (uintptr_t)T->mcode && addr < (uintptr_t)T->mcode + T->szmcode +#endif + ) { + SnapShot *snap = T->snap; + SnapNo lo = 0, exitno = T->nsnap; + uintptr_t ofs = (uintptr_t)((MCode *)addr - T->mcode); /* MCode units! */ + /* Rightmost binary search for mcode offset to determine exit number. */ + do { + SnapNo mid = (lo+exitno) >> 1; + if (ofs < snap[mid].mcofs) exitno = mid; else lo = mid + 1; + } while (lo < exitno); + exitno--; + *ep = exitno; +#ifdef EXITSTUBS_PER_GROUP + return (uintptr_t)exitstub_addr(J, exitno); +#else + return (uintptr_t)exitstub_trace_addr(T, exitno); +#endif + } + /* Cannot correlate addr with trace/exit. This will be fatal. */ + lj_assertJ(0, "bad exit pc"); + return 0; +} +#endif + #endif diff --git a/src/lj_trace.h b/src/lj_trace.h index 0fc03672..3d7f76f0 100644 --- a/src/lj_trace.h +++ b/src/lj_trace.h @@ -23,6 +23,7 @@ LJ_FUNC_NORET void lj_trace_err(jit_State *J, TraceError e); LJ_FUNC_NORET void lj_trace_err_info(jit_State *J, TraceError e); /* Trace management. */ +LJ_FUNC GCtrace * LJ_FASTCALL lj_trace_alloc(lua_State *L, GCtrace *T); LJ_FUNC void LJ_FASTCALL lj_trace_free(global_State *g, GCtrace *T); LJ_FUNC void lj_trace_reenableproto(GCproto *pt); LJ_FUNC void lj_trace_flushproto(global_State *g, GCproto *pt); @@ -34,7 +35,11 @@ LJ_FUNC void lj_trace_freestate(global_State *g); /* Event handling. */ LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc); LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc); +LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc); LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); +#if LJ_UNWIND_EXT +LJ_FUNC uintptr_t LJ_FASTCALL lj_trace_unwind(jit_State *J, uintptr_t addr, ExitNo *ep); +#endif /* Signal asynchronous abort of trace or end of trace. */ #define lj_trace_abort(g) (G2J(g)->state &= ~LJ_TRACE_ACTIVE) diff --git a/src/lj_traceerr.h b/src/lj_traceerr.h index a4e5ae64..8ed8ac82 100644 --- a/src/lj_traceerr.h +++ b/src/lj_traceerr.h @@ -7,10 +7,12 @@ /* Recording. */ TREDEF(RECERR, "error thrown or hook called during recording") +TREDEF(TRACEUV, "trace too short") TREDEF(TRACEOV, "trace too long") TREDEF(STACKOV, "trace too deep") TREDEF(SNAPOV, "too many snapshots") TREDEF(BLACKL, "blacklisted") +TREDEF(RETRY, "retry recording") TREDEF(NYIBC, "NYI: bytecode %d") /* Recording loop ops. */ @@ -23,8 +25,6 @@ TREDEF(BADTYPE, "bad argument type") TREDEF(CJITOFF, "JIT compilation disabled for function") TREDEF(CUNROLL, "call unroll limit reached") TREDEF(DOWNREC, "down-recursion, restarting") -TREDEF(NYICF, "NYI: C function %s") -TREDEF(NYIFF, "NYI: FastFunc %s") TREDEF(NYIFFU, "NYI: unsupported variant of FastFunc %s") TREDEF(NYIRETL, "NYI: return to lower frame") diff --git a/src/lj_udata.c b/src/lj_udata.c index 7dada848..ee4a145d 100644 --- a/src/lj_udata.c +++ b/src/lj_udata.c @@ -8,6 +8,7 @@ #include "lj_obj.h" #include "lj_gc.h" +#include "lj_err.h" #include "lj_udata.h" GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env) @@ -32,3 +33,30 @@ void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud) lj_mem_free(g, ud, sizeudata(ud)); } +#if LJ_64 +void *lj_lightud_intern(lua_State *L, void *p) +{ + global_State *g = G(L); + uint64_t u = (uint64_t)p; + uint32_t up = lightudup(u); + uint32_t *segmap = mref(g->gc.lightudseg, uint32_t); + MSize segnum = g->gc.lightudnum; + if (segmap) { + MSize seg; + for (seg = 0; seg <= segnum; seg++) + if (segmap[seg] == up) /* Fast path. */ + return (void *)(((uint64_t)seg << LJ_LIGHTUD_BITS_LO) | lightudlo(u)); + segnum++; + /* Leave last segment unused to avoid clash with ITERN key. */ + if (segnum >= (1 << LJ_LIGHTUD_BITS_SEG)-1) lj_err_msg(L, LJ_ERR_BADLU); + } + if (!((segnum-1) & segnum) && segnum != 1) { + lj_mem_reallocvec(L, segmap, segnum, segnum ? 2*segnum : 2u, uint32_t); + setmref(g->gc.lightudseg, segmap); + } + g->gc.lightudnum = segnum; + segmap[segnum] = up; + return (void *)(((uint64_t)segnum << LJ_LIGHTUD_BITS_LO) | lightudlo(u)); +} +#endif + diff --git a/src/lj_udata.h b/src/lj_udata.h index acd136a7..503c9e30 100644 --- a/src/lj_udata.h +++ b/src/lj_udata.h @@ -10,5 +10,8 @@ LJ_FUNC GCudata *lj_udata_new(lua_State *L, MSize sz, GCtab *env); LJ_FUNC void LJ_FASTCALL lj_udata_free(global_State *g, GCudata *ud); +#if LJ_64 +LJ_FUNC void * LJ_FASTCALL lj_lightud_intern(lua_State *L, void *p); +#endif #endif diff --git a/src/lj_vm.h b/src/lj_vm.h index b66f5b85..c66db004 100644 --- a/src/lj_vm.h +++ b/src/lj_vm.h @@ -17,11 +17,18 @@ LJ_ASMF int lj_vm_cpcall(lua_State *L, lua_CFunction func, void *ud, LJ_ASMF int lj_vm_resume(lua_State *L, TValue *base, int nres1, ptrdiff_t ef); LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_c(void *cframe, int errcode); LJ_ASMF_NORET void LJ_FASTCALL lj_vm_unwind_ff(void *cframe); +#if LJ_ABI_WIN && LJ_TARGET_X86 +LJ_ASMF_NORET void LJ_FASTCALL lj_vm_rtlunwind(void *cframe, void *excptrec, + void *unwinder, int errcode); +#endif LJ_ASMF void lj_vm_unwind_c_eh(void); LJ_ASMF void lj_vm_unwind_ff_eh(void); #if LJ_TARGET_X86ORX64 LJ_ASMF void lj_vm_unwind_rethrow(void); #endif +#if LJ_TARGET_MIPS +LJ_ASMF void lj_vm_unwind_stub(void); +#endif /* Miscellaneous functions. */ #if LJ_TARGET_X86ORX64 @@ -43,13 +50,15 @@ LJ_ASMF void lj_vm_record(void); LJ_ASMF void lj_vm_inshook(void); LJ_ASMF void lj_vm_rethook(void); LJ_ASMF void lj_vm_callhook(void); +LJ_ASMF void lj_vm_profhook(void); +LJ_ASMF void lj_vm_IITERN(void); /* Trace exit handling. */ LJ_ASMF void lj_vm_exit_handler(void); LJ_ASMF void lj_vm_exit_interp(void); /* Internal math helper functions. */ -#if LJ_TARGET_X86ORX64 || LJ_TARGET_PPC +#if LJ_TARGET_PPC || LJ_TARGET_ARM64 || (LJ_TARGET_MIPS && LJ_ABI_SOFTFP) #define lj_vm_floor floor #define lj_vm_ceil ceil #else @@ -60,23 +69,22 @@ LJ_ASMF double lj_vm_floor_sf(double); LJ_ASMF double lj_vm_ceil_sf(double); #endif #endif -#if defined(LUAJIT_NO_LOG2) || LJ_TARGET_X86ORX64 +#ifdef LUAJIT_NO_LOG2 LJ_ASMF double lj_vm_log2(double); #else #define lj_vm_log2 log2 #endif +#if !(defined(_LJ_DISPATCH_H) && LJ_TARGET_MIPS) +LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); +#endif #if LJ_HASJIT #if LJ_TARGET_X86ORX64 LJ_ASMF void lj_vm_floor_sse(void); LJ_ASMF void lj_vm_ceil_sse(void); LJ_ASMF void lj_vm_trunc_sse(void); -LJ_ASMF void lj_vm_exp_x87(void); -LJ_ASMF void lj_vm_exp2_x87(void); -LJ_ASMF void lj_vm_pow_sse(void); -LJ_ASMF void lj_vm_powi_sse(void); -#else -#if LJ_TARGET_PPC +#endif +#if LJ_TARGET_PPC || LJ_TARGET_ARM64 #define lj_vm_trunc trunc #else LJ_ASMF double lj_vm_trunc(double); @@ -84,17 +92,10 @@ LJ_ASMF double lj_vm_trunc(double); LJ_ASMF double lj_vm_trunc_sf(double); #endif #endif -LJ_ASMF double lj_vm_powi(double, int32_t); -#ifdef LUAJIT_NO_EXP2 -LJ_ASMF double lj_vm_exp2(double); -#else -#define lj_vm_exp2 exp2 -#endif -#endif -LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t); #if LJ_HASFFI LJ_ASMF int lj_vm_errno(void); #endif +LJ_ASMF TValue *lj_vm_next(GCtab *t, uint32_t idx); #endif /* Continuations for metamethods. */ @@ -104,8 +105,7 @@ LJ_ASMF void lj_cont_nop(void); /* Do nothing, just continue execution. */ LJ_ASMF void lj_cont_condt(void); /* Branch if result is true. */ LJ_ASMF void lj_cont_condf(void); /* Branch if result is false. */ LJ_ASMF void lj_cont_hook(void); /* Continue from hook yield. */ - -enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ +LJ_ASMF void lj_cont_stitch(void); /* Trace stitching. */ /* Start of the ASM code. */ LJ_ASMF char lj_vm_asm_begin[]; diff --git a/src/lj_vmevent.c b/src/lj_vmevent.c index 704af254..c8491d82 100644 --- a/src/lj_vmevent.c +++ b/src/lj_vmevent.c @@ -27,6 +27,7 @@ ptrdiff_t lj_vmevent_prepare(lua_State *L, VMEvent ev) if (tv && tvisfunc(tv)) { lj_state_checkstack(L, LUA_MINSTACK); setfuncV(L, L->top++, funcV(tv)); + if (LJ_FR2) setnilV(L->top++); return savestack(L, L->top); } } diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c index ff41ba28..b6cc60ba 100644 --- a/src/lj_vmmath.c +++ b/src/lj_vmmath.c @@ -13,16 +13,29 @@ #include "lj_ir.h" #include "lj_vm.h" -/* -- Helper functions for generated machine code ------------------------- */ +/* -- Wrapper functions --------------------------------------------------- */ -#if LJ_TARGET_X86ORX64 -/* Wrapper functions to avoid linker issues on OSX. */ -LJ_FUNCA double lj_vm_sinh(double x) { return sinh(x); } -LJ_FUNCA double lj_vm_cosh(double x) { return cosh(x); } -LJ_FUNCA double lj_vm_tanh(double x) { return tanh(x); } +#if LJ_TARGET_X86 && __ELF__ && __PIC__ +/* Wrapper functions to deal with the ELF/x86 PIC disaster. */ +LJ_FUNCA double lj_wrap_log(double x) { return log(x); } +LJ_FUNCA double lj_wrap_log10(double x) { return log10(x); } +LJ_FUNCA double lj_wrap_exp(double x) { return exp(x); } +LJ_FUNCA double lj_wrap_sin(double x) { return sin(x); } +LJ_FUNCA double lj_wrap_cos(double x) { return cos(x); } +LJ_FUNCA double lj_wrap_tan(double x) { return tan(x); } +LJ_FUNCA double lj_wrap_asin(double x) { return asin(x); } +LJ_FUNCA double lj_wrap_acos(double x) { return acos(x); } +LJ_FUNCA double lj_wrap_atan(double x) { return atan(x); } +LJ_FUNCA double lj_wrap_sinh(double x) { return sinh(x); } +LJ_FUNCA double lj_wrap_cosh(double x) { return cosh(x); } +LJ_FUNCA double lj_wrap_tanh(double x) { return tanh(x); } +LJ_FUNCA double lj_wrap_atan2(double x, double y) { return atan2(x, y); } +LJ_FUNCA double lj_wrap_pow(double x, double y) { return pow(x, y); } +LJ_FUNCA double lj_wrap_fmod(double x, double y) { return fmod(x, y); } #endif -#if !LJ_TARGET_X86ORX64 +/* -- Helper functions ---------------------------------------------------- */ + double lj_vm_foldarith(double x, double y, int op) { switch (op) { @@ -35,37 +48,22 @@ double lj_vm_foldarith(double x, double y, int op) case IR_NEG - IR_ADD: return -x; break; case IR_ABS - IR_ADD: return fabs(x); break; #if LJ_HASJIT - case IR_ATAN2 - IR_ADD: return atan2(x, y); break; case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break; - case IR_MIN - IR_ADD: return x > y ? y : x; break; - case IR_MAX - IR_ADD: return x < y ? y : x; break; + case IR_MIN - IR_ADD: return x < y ? x : y; break; + case IR_MAX - IR_ADD: return x > y ? x : y; break; #endif default: return x; } } -#endif -#if LJ_HASJIT - -#ifdef LUAJIT_NO_LOG2 -double lj_vm_log2(double a) -{ - return log(a) * 1.4426950408889634074; -} -#endif - -#ifdef LUAJIT_NO_EXP2 -double lj_vm_exp2(double a) -{ - return exp(a * 0.6931471805599453); -} -#endif +/* -- Helper functions for generated machine code ------------------------- */ -#if !(LJ_TARGET_ARM || LJ_TARGET_PPC) +#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) { uint32_t y, ua, ub; - lua_assert(b != 0); /* This must be checked before using this function. */ + /* This must be checked before using this function. */ + lj_assertX(b != 0, "modulo with zero divisor"); ua = a < 0 ? (uint32_t)-a : (uint32_t)a; ub = b < 0 ? (uint32_t)-b : (uint32_t)b; y = ua % ub; @@ -75,38 +73,14 @@ int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) } #endif -#if !LJ_TARGET_X86ORX64 -/* Unsigned x^k. */ -static double lj_vm_powui(double x, uint32_t k) -{ - double y; - lua_assert(k != 0); - for (; (k & 1) == 0; k >>= 1) x *= x; - y = x; - if ((k >>= 1) != 0) { - for (;;) { - x *= x; - if (k == 1) break; - if (k & 1) y *= x; - k >>= 1; - } - y *= x; - } - return y; -} +#if LJ_HASJIT -/* Signed x^k. */ -double lj_vm_powi(double x, int32_t k) +#ifdef LUAJIT_NO_LOG2 +double lj_vm_log2(double a) { - if (k > 1) - return lj_vm_powui(x, (uint32_t)k); - else if (k == 1) - return x; - else if (k == 0) - return 1.0; - else - return 1.0 / lj_vm_powui(x, (uint32_t)-k); + return log(a) * 1.4426950408889634074; } +#endif /* Computes fpm(x) for extended math functions. */ double lj_vm_foldfpm(double x, int fpm) @@ -116,19 +90,12 @@ double lj_vm_foldfpm(double x, int fpm) case IRFPM_CEIL: return lj_vm_ceil(x); case IRFPM_TRUNC: return lj_vm_trunc(x); case IRFPM_SQRT: return sqrt(x); - case IRFPM_EXP: return exp(x); - case IRFPM_EXP2: return lj_vm_exp2(x); case IRFPM_LOG: return log(x); case IRFPM_LOG2: return lj_vm_log2(x); - case IRFPM_LOG10: return log10(x); - case IRFPM_SIN: return sin(x); - case IRFPM_COS: return cos(x); - case IRFPM_TAN: return tan(x); - default: lua_assert(0); + default: lj_assertX(0, "bad fpm %d", fpm); } return 0; } -#endif #if LJ_HASFFI int lj_vm_errno(void) diff --git a/src/ljamalg.c b/src/ljamalg.c index 92f070da..cae8356c 100644 --- a/src/ljamalg.c +++ b/src/ljamalg.c @@ -3,16 +3,6 @@ ** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h */ -/* -+--------------------------------------------------------------------------+ -| WARNING: Compiling the amalgamation needs a lot of virtual memory | -| (around 300 MB with GCC 4.x)! If you don't have enough physical memory | -| your machine will start swapping to disk and the compile will not finish | -| within a reasonable amount of time. | -| So either compile on a bigger machine or use the non-amalgamated build. | -+--------------------------------------------------------------------------+ -*/ - #define ljamalg_c #define LUA_CORE @@ -28,23 +18,30 @@ #include "lua.h" #include "lauxlib.h" +#include "lj_assert.c" #include "lj_gc.c" #include "lj_err.c" #include "lj_char.c" #include "lj_bc.c" #include "lj_obj.c" +#include "lj_buf.c" #include "lj_str.c" #include "lj_tab.c" #include "lj_func.c" #include "lj_udata.c" #include "lj_meta.c" #include "lj_debug.c" +#include "lj_prng.c" #include "lj_state.c" #include "lj_dispatch.c" #include "lj_vmevent.c" #include "lj_vmmath.c" #include "lj_strscan.c" +#include "lj_strfmt.c" +#include "lj_strfmt_num.c" +#include "lj_serialize.c" #include "lj_api.c" +#include "lj_profile.c" #include "lj_lex.c" #include "lj_parse.c" #include "lj_bcread.c" @@ -89,5 +86,6 @@ #include "lib_bit.c" #include "lib_jit.c" #include "lib_ffi.c" +#include "lib_buffer.c" #include "lib_init.c" @@ -39,7 +39,8 @@ #define lua_upvalueindex(i) (LUA_GLOBALSINDEX-(i)) -/* thread status; 0 is OK */ +/* thread status */ +#define LUA_OK 0 #define LUA_YIELD 1 #define LUA_ERRRUN 2 #define LUA_ERRSYNTAX 3 @@ -226,6 +227,7 @@ LUA_API int (lua_status) (lua_State *L); #define LUA_GCSTEP 5 #define LUA_GCSETPAUSE 6 #define LUA_GCSETSTEPMUL 7 +#define LUA_GCISRUNNING 9 LUA_API int (lua_gc) (lua_State *L, int what, int data); @@ -346,6 +348,13 @@ LUA_API void *lua_upvalueid (lua_State *L, int idx, int n); LUA_API void lua_upvaluejoin (lua_State *L, int idx1, int n1, int idx2, int n2); LUA_API int lua_loadx (lua_State *L, lua_Reader reader, void *dt, const char *chunkname, const char *mode); +LUA_API const lua_Number *lua_version (lua_State *L); +LUA_API void lua_copy (lua_State *L, int fromidx, int toidx); +LUA_API lua_Number lua_tonumberx (lua_State *L, int idx, int *isnum); +LUA_API lua_Integer lua_tointegerx (lua_State *L, int idx, int *isnum); + +/* From Lua 5.3. */ +LUA_API int lua_isyieldable (lua_State *L); struct lua_Debug { diff --git a/src/luaconf.h b/src/luaconf.h index 9d587e9d..e8790c1d 100644 --- a/src/luaconf.h +++ b/src/luaconf.h @@ -37,7 +37,7 @@ #endif #define LUA_LROOT "/usr/local" #define LUA_LUADIR "/lua/5.1/" -#define LUA_LJDIR "/luajit-2.0.5/" +#define LUA_LJDIR "/luajit-2.1.0-beta3/" #ifdef LUA_ROOT #define LUA_JROOT LUA_ROOT @@ -79,7 +79,7 @@ #define LUA_IGMARK "-" #define LUA_PATH_CONFIG \ LUA_DIRSEP "\n" LUA_PATHSEP "\n" LUA_PATH_MARK "\n" \ - LUA_EXECDIR "\n" LUA_IGMARK + LUA_EXECDIR "\n" LUA_IGMARK "\n" /* Quoting in error messages. */ #define LUA_QL(x) "'" x "'" @@ -92,10 +92,6 @@ #define LUAI_GCMUL 200 /* Run GC at 200% of allocation speed. */ #define LUA_MAXCAPTURES 32 /* Max. pattern captures. */ -/* Compatibility with older library function names. */ -#define LUA_COMPAT_MOD /* OLD: math.mod, NEW: math.fmod */ -#define LUA_COMPAT_GFIND /* OLD: string.gfind, NEW: string.gmatch */ - /* Configuration for the frontend (the luajit executable). */ #if defined(luajit_c) #define LUA_PROGNAME "luajit" /* Fallback frontend name. */ @@ -140,7 +136,7 @@ #define LUALIB_API LUA_API -/* Support for internal assertions. */ +/* Compatibility support for assertions. */ #if defined(LUA_USE_ASSERT) || defined(LUA_USE_APICHECK) #include <assert.h> #endif diff --git a/src/luajit.c b/src/luajit.c index cf4982a6..6dd64026 100644 --- a/src/luajit.c +++ b/src/luajit.c @@ -62,8 +62,9 @@ static void laction(int i) static void print_usage(void) { - fprintf(stderr, - "usage: %s [options]... [script [args]...].\n" + fputs("usage: ", stderr); + fputs(progname, stderr); + fputs(" [options]... [script [args]...].\n" "Available options are:\n" " -e chunk Execute string " LUA_QL("chunk") ".\n" " -l name Require library " LUA_QL("name") ".\n" @@ -74,16 +75,14 @@ static void print_usage(void) " -v Show version information.\n" " -E Ignore environment variables.\n" " -- Stop handling options.\n" - " - Execute stdin and stop handling options.\n" - , - progname); + " - Execute stdin and stop handling options.\n", stderr); fflush(stderr); } static void l_message(const char *msg) { - if (progname) fprintf(stderr, "%s: ", progname); - fprintf(stderr, "%s\n", msg); + if (progname) { fputs(progname, stderr); fputc(':', stderr); fputc(' ', stderr); } + fputs(msg, stderr); fputc('\n', stderr); fflush(stderr); } @@ -126,7 +125,7 @@ static int docall(lua_State *L, int narg, int clear) #endif lua_remove(L, base); /* remove traceback function */ /* force a complete garbage collection in case of errors */ - if (status != 0) lua_gc(L, LUA_GCCOLLECT, 0); + if (status != LUA_OK) lua_gc(L, LUA_GCCOLLECT, 0); return status; } @@ -155,22 +154,15 @@ static void print_jit_status(lua_State *L) lua_settop(L, 0); /* clear stack */ } -static int getargs(lua_State *L, char **argv, int n) +static void createargtable(lua_State *L, char **argv, int argc, int argf) { - int narg; int i; - int argc = 0; - while (argv[argc]) argc++; /* count total number of arguments */ - narg = argc - (n + 1); /* number of arguments to the script */ - luaL_checkstack(L, narg + 3, "too many arguments to script"); - for (i = n+1; i < argc; i++) - lua_pushstring(L, argv[i]); - lua_createtable(L, narg, n + 1); + lua_createtable(L, argc - argf, argf); for (i = 0; i < argc; i++) { lua_pushstring(L, argv[i]); - lua_rawseti(L, -2, i - n); + lua_rawseti(L, -2, i - argf); } - return narg; + lua_setglobal(L, "arg"); } static int dofile(lua_State *L, const char *name) @@ -259,9 +251,9 @@ static void dotty(lua_State *L) const char *oldprogname = progname; progname = NULL; while ((status = loadline(L)) != -1) { - if (status == 0) status = docall(L, 0, 0); + if (status == LUA_OK) status = docall(L, 0, 0); report(L, status); - if (status == 0 && lua_gettop(L) > 0) { /* any result to print? */ + if (status == LUA_OK && lua_gettop(L) > 0) { /* any result to print? */ lua_getglobal(L, "print"); lua_insert(L, 1); if (lua_pcall(L, lua_gettop(L)-1, 0, 0) != 0) @@ -275,21 +267,30 @@ static void dotty(lua_State *L) progname = oldprogname; } -static int handle_script(lua_State *L, char **argv, int n) +static int handle_script(lua_State *L, char **argx) { int status; - const char *fname; - int narg = getargs(L, argv, n); /* collect arguments */ - lua_setglobal(L, "arg"); - fname = argv[n]; - if (strcmp(fname, "-") == 0 && strcmp(argv[n-1], "--") != 0) + const char *fname = argx[0]; + if (strcmp(fname, "-") == 0 && strcmp(argx[-1], "--") != 0) fname = NULL; /* stdin */ status = luaL_loadfile(L, fname); - lua_insert(L, -(narg+1)); - if (status == 0) + if (status == LUA_OK) { + /* Fetch args from arg table. LUA_INIT or -e might have changed them. */ + int narg = 0; + lua_getglobal(L, "arg"); + if (lua_istable(L, -1)) { + do { + narg++; + lua_rawgeti(L, -narg, narg); + } while (!lua_isnil(L, -1)); + lua_pop(L, 1); + lua_remove(L, -narg); + narg--; + } else { + lua_pop(L, 1); + } status = docall(L, narg, 0); - else - lua_pop(L, narg); + } return report(L, status); } @@ -385,7 +386,8 @@ static int dobytecode(lua_State *L, char **argv) } for (argv++; *argv != NULL; narg++, argv++) lua_pushstring(L, *argv); - return report(L, lua_pcall(L, narg, 0, 0)); + report(L, lua_pcall(L, narg, 0, 0)); + return -1; } /* check that argument has no extra characters at the end */ @@ -406,7 +408,7 @@ static int collectargs(char **argv, int *flags) switch (argv[i][1]) { /* Check option. */ case '-': notail(argv[i]); - return (argv[i+1] != NULL ? i+1 : 0); + return i+1; case '\0': return i; case 'i': @@ -432,23 +434,23 @@ static int collectargs(char **argv, int *flags) case 'b': /* LuaJIT extension */ if (*flags) return -1; *flags |= FLAGS_EXEC; - return 0; + return i+1; case 'E': *flags |= FLAGS_NOENV; break; default: return -1; /* invalid option */ } } - return 0; + return i; } -static int runargs(lua_State *L, char **argv, int n) +static int runargs(lua_State *L, char **argv, int argn) { int i; - for (i = 1; i < n; i++) { + for (i = 1; i < argn; i++) { if (argv[i] == NULL) continue; lua_assert(argv[i][0] == '-'); - switch (argv[i][1]) { /* option */ + switch (argv[i][1]) { case 'e': { const char *chunk = argv[i] + 2; if (*chunk == '\0') chunk = argv[++i]; @@ -462,10 +464,10 @@ static int runargs(lua_State *L, char **argv, int n) if (*filename == '\0') filename = argv[++i]; lua_assert(filename != NULL); if (dolibrary(L, filename)) - return 1; /* stop if file fails */ + return 1; break; } - case 'j': { /* LuaJIT extension */ + case 'j': { /* LuaJIT extension. */ const char *cmd = argv[i] + 2; if (*cmd == '\0') cmd = argv[++i]; lua_assert(cmd != NULL); @@ -473,16 +475,16 @@ static int runargs(lua_State *L, char **argv, int n) return 1; break; } - case 'O': /* LuaJIT extension */ + case 'O': /* LuaJIT extension. */ if (dojitopt(L, argv[i] + 2)) return 1; break; - case 'b': /* LuaJIT extension */ + case 'b': /* LuaJIT extension. */ return dobytecode(L, argv+i); default: break; } } - return 0; + return LUA_OK; } static int handle_luainit(lua_State *L) @@ -493,7 +495,7 @@ static int handle_luainit(lua_State *L) const char *init = getenv(LUA_INIT); #endif if (init == NULL) - return 0; /* status OK */ + return LUA_OK; else if (init[0] == '@') return dofile(L, init+1); else @@ -510,44 +512,55 @@ static int pmain(lua_State *L) { struct Smain *s = &smain; char **argv = s->argv; - int script; + int argn; int flags = 0; globalL = L; - LUAJIT_VERSION_SYM(); /* linker-enforced version check */ - script = collectargs(argv, &flags); - if (script < 0) { /* invalid args? */ + LUAJIT_VERSION_SYM(); /* Linker-enforced version check. */ + + argn = collectargs(argv, &flags); + if (argn < 0) { /* Invalid args? */ print_usage(); s->status = 1; return 0; } + if ((flags & FLAGS_NOENV)) { lua_pushboolean(L, 1); lua_setfield(L, LUA_REGISTRYINDEX, "LUA_NOENV"); } - lua_gc(L, LUA_GCSTOP, 0); /* stop collector during initialization */ - luaL_openlibs(L); /* open libraries */ + + /* Stop collector during library initialization. */ + lua_gc(L, LUA_GCSTOP, 0); + luaL_openlibs(L); lua_gc(L, LUA_GCRESTART, -1); + + createargtable(L, argv, s->argc, argn); + if (!(flags & FLAGS_NOENV)) { s->status = handle_luainit(L); - if (s->status != 0) return 0; + if (s->status != LUA_OK) return 0; } + if ((flags & FLAGS_VERSION)) print_version(); - s->status = runargs(L, argv, (script > 0) ? script : s->argc); - if (s->status != 0) return 0; - if (script) { - s->status = handle_script(L, argv, script); - if (s->status != 0) return 0; + + s->status = runargs(L, argv, argn); + if (s->status != LUA_OK) return 0; + + if (s->argc > argn) { + s->status = handle_script(L, argv + argn); + if (s->status != LUA_OK) return 0; } + if ((flags & FLAGS_INTERACTIVE)) { print_jit_status(L); dotty(L); - } else if (script == 0 && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) { + } else if (s->argc == argn && !(flags & (FLAGS_EXEC|FLAGS_VERSION))) { if (lua_stdin_is_tty()) { print_version(); print_jit_status(L); dotty(L); } else { - dofile(L, NULL); /* executes stdin as a file */ + dofile(L, NULL); /* Executes stdin as a file. */ } } return 0; @@ -558,7 +571,7 @@ int main(int argc, char **argv) int status; lua_State *L; if (!argv[0]) argv = empty_argv; else if (argv[0][0]) progname = argv[0]; - L = lua_open(); /* create state */ + L = lua_open(); if (L == NULL) { l_message("cannot create state: not enough memory"); return EXIT_FAILURE; @@ -568,6 +581,6 @@ int main(int argc, char **argv) status = lua_cpcall(L, pmain, NULL); report(L, status); lua_close(L); - return (status || smain.status) ? EXIT_FAILURE : EXIT_SUCCESS; + return (status || smain.status > 0) ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/src/luajit.h b/src/luajit.h index 8b666f63..31f1eb1f 100644 --- a/src/luajit.h +++ b/src/luajit.h @@ -30,9 +30,9 @@ #include "lua.h" -#define LUAJIT_VERSION "LuaJIT 2.0.5" -#define LUAJIT_VERSION_NUM 20005 /* Version 2.0.5 = 02.00.05. */ -#define LUAJIT_VERSION_SYM luaJIT_version_2_0_5 +#define LUAJIT_VERSION "LuaJIT 2.1.0-beta3" +#define LUAJIT_VERSION_NUM 20100 /* Version 2.1.0 = 02.01.00. */ +#define LUAJIT_VERSION_SYM luaJIT_version_2_1_0_beta3 #define LUAJIT_COPYRIGHT "Copyright (C) 2005-2022 Mike Pall" #define LUAJIT_URL "https://luajit.org/" @@ -64,6 +64,15 @@ enum { /* Control the JIT engine. */ LUA_API int luaJIT_setmode(lua_State *L, int idx, int mode); +/* Low-overhead profiling API. */ +typedef void (*luaJIT_profile_callback)(void *data, lua_State *L, + int samples, int vmstate); +LUA_API void luaJIT_profile_start(lua_State *L, const char *mode, + luaJIT_profile_callback cb, void *data); +LUA_API void luaJIT_profile_stop(lua_State *L); +LUA_API const char *luaJIT_profile_dumpstack(lua_State *L, const char *fmt, + int depth, size_t *len); + /* Enforce (dynamic) linker error for version mismatches. Call from main. */ LUA_API void LUAJIT_VERSION_SYM(void); diff --git a/src/lualib.h b/src/lualib.h index 4a2f8692..87748456 100644 --- a/src/lualib.h +++ b/src/lualib.h @@ -33,6 +33,7 @@ LUALIB_API int luaopen_debug(lua_State *L); LUALIB_API int luaopen_bit(lua_State *L); LUALIB_API int luaopen_jit(lua_State *L); LUALIB_API int luaopen_ffi(lua_State *L); +LUALIB_API int luaopen_string_buffer(lua_State *L); LUALIB_API void luaL_openlibs(lua_State *L); diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat index 045965f8..d323d8d4 100644 --- a/src/msvcbuild.bat +++ b/src/msvcbuild.bat @@ -5,6 +5,7 @@ @rem Then cd to this directory and run this script. Use the following
@rem options (in order), if needed. The default is a dynamic release build.
@rem
+@rem nogc64 disable LJ_GC64 mode for x64
@rem debug emit debug symbols
@rem amalg amalgamated build
@rem static static linkage
@@ -20,10 +21,11 @@ @set LJLIB=lib /nologo /nodefaultlib
@set DASMDIR=..\dynasm
@set DASM=%DASMDIR%\dynasm.lua
+@set DASC=vm_x64.dasc
@set LJDLLNAME=lua51.dll
@set LJLIBNAME=lua51.lib
@set BUILDTYPE=release
-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
%LJCOMPILE% host\minilua.c
@if errorlevel 1 goto :BAD
@@ -36,10 +38,17 @@ if exist minilua.exe.manifest^ @set LJARCH=x64
@minilua
@if errorlevel 8 goto :X64
+@set DASC=vm_x86.dasc
@set DASMFLAGS=-D WIN -D JIT -D FFI
@set LJARCH=x86
+@set LJCOMPILE=%LJCOMPILE% /arch:SSE2
:X64
-minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc
+@if "%1" neq "nogc64" goto :GC64
+@shift
+@set DASC=vm_x86.dasc
+@set LJCOMPILE=%LJCOMPILE% /DLUAJIT_DISABLE_GC64
+:GC64
+minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
@if errorlevel 1 goto :BAD
%LJCOMPILE% /I "." /I %DASMDIR% host\buildvm*.c
@@ -68,6 +77,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c @shift
@set BUILDTYPE=debug
@set LJCOMPILE=%LJCOMPILE% /Zi %DEBUGCFLAGS%
+@set LJLINK=%LJLINK% /opt:ref /opt:icf /incremental:no
:NODEBUG
@set LJLINK=%LJLINK% /%BUILDTYPE%
@if "%1"=="amalg" goto :AMALGDLL
diff --git a/src/nxbuild.bat b/src/nxbuild.bat new file mode 100644 index 00000000..c4a21f05 --- /dev/null +++ b/src/nxbuild.bat @@ -0,0 +1,159 @@ +@rem Script to build LuaJIT with NintendoSDK + NX Addon.
+@rem Donated to the public domain by Swyter.
+@rem
+@rem To run this script you must open a "Native Tools Command Prompt for VS".
+@rem
+@rem Either the x86 version for NX32, or x64 for the NX64 target.
+@rem This is because the pointer size of the LuaJIT host tools (buildvm.exe)
+@rem must match the cross-compiled target (32 or 64 bits).
+@rem
+@rem Then cd to this directory and run this script.
+@rem
+@rem Recommended invocation:
+@rem
+@rem nxbuild # release build, amalgamated
+@rem nxbuild debug # debug build, amalgamated
+@rem
+@rem Additional command-line options (not generally recommended):
+@rem
+@rem noamalg # (after debug) non-amalgamated build
+
+@if not defined INCLUDE goto :FAIL
+@if not defined NINTENDO_SDK_ROOT goto :FAIL
+@if not defined PLATFORM goto :FAIL
+
+@if "%platform%" == "x86" goto :DO_NX32
+@if "%platform%" == "x64" goto :DO_NX64
+
+@echo Error: Current host platform is %platform%!
+@echo.
+@goto :FAIL
+
+@setlocal
+
+:DO_NX32
+@set DASC=vm_arm.dasc
+@set DASMFLAGS= -D HFABI -D FPU
+@set DASMTARGET= -D LUAJIT_TARGET=LUAJIT_ARCH_ARM
+@set HOST_PTR_SIZE=4
+goto :BEGIN
+
+:DO_NX64
+@set DASC=vm_arm64.dasc
+@set DASMFLAGS= -D ENDIAN_LE
+@set DASMTARGET= -D LUAJIT_TARGET=LUAJIT_ARCH_ARM64
+@set HOST_PTR_SIZE=8
+
+:BEGIN
+@rem ---- Host compiler ----
+@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /wo4146 /wo4244 /D_CRT_SECURE_NO_DEPRECATE
+@set LJLINK=link /nologo
+@set LJMT=mt /nologo
+@set DASMDIR=..\dynasm
+@set DASM=%DASMDIR%\dynasm.lua
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
+
+%LJCOMPILE% host\minilua.c
+@if errorlevel 1 goto :BAD
+%LJLINK% /out:minilua.exe minilua.obj
+@if errorlevel 1 goto :BAD
+if exist minilua.exe.manifest^
+ %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
+
+@rem Check that we have the right 32/64 bit host compiler to generate the right virtual machine files.
+@minilua
+@if "%ERRORLEVEL%" == "%HOST_PTR_SIZE%" goto :PASSED_PTR_CHECK
+
+@echo The pointer size of the host in bytes (%HOST_PTR_SIZE%) does not match the expected value (%errorlevel%).
+@echo Check that the script is being ran under the correct x86/x64 VS prompt.
+@goto :BAD
+
+:PASSED_PTR_CHECK
+@set DASMFLAGS=%DASMFLAGS% %DASMTARGET% -D LJ_TARGET_NX -D LUAJIT_OS=LUAJIT_OS_OTHER -D LUAJIT_DISABLE_JIT -D LUAJIT_DISABLE_FFI
+minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
+@if errorlevel 1 goto :BAD
+%LJCOMPILE% /I "." /I %DASMDIR% %DASMTARGET% -D LJ_TARGET_NX -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI host\buildvm*.c
+@if errorlevel 1 goto :BAD
+%LJLINK% /out:buildvm.exe buildvm*.obj
+@if errorlevel 1 goto :BAD
+if exist buildvm.exe.manifest^
+ %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
+
+buildvm -m elfasm -o lj_vm.s
+@if errorlevel 1 goto :BAD
+buildvm -m bcdef -o lj_bcdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m ffdef -o lj_ffdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m libdef -o lj_libdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m recdef -o lj_recdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
+@if errorlevel 1 goto :BAD
+
+@rem ---- Cross compiler ----
+@if "%platform%" neq "x64" goto :NX32_CROSSBUILD
+@set LJCOMPILE="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\aarch64\bin\clang" -Wall -I%NINTENDO_SDK_ROOT%\Include %DASMTARGET% -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -c
+@set LJLIB="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\aarch64\bin\aarch64-nintendo-nx-elf-ar" rc
+@set TARGETLIB_SUFFIX=nx64
+
+%NINTENDO_SDK_ROOT%\Compilers\NX\nx\aarch64\bin\aarch64-nintendo-nx-elf-as -o lj_vm.o lj_vm.s
+goto :DEBUGCHECK
+
+:NX32_CROSSBUILD
+@set LJCOMPILE="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\armv7l\bin\clang" -Wall -I%NINTENDO_SDK_ROOT%\Include %DASMTARGET% -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_USE_SYSMALLOC -c
+@set LJLIB="%NINTENDO_SDK_ROOT%\Compilers\NX\nx\armv7l\bin\armv7l-nintendo-nx-eabihf-ar" rc
+@set TARGETLIB_SUFFIX=nx32
+
+%NINTENDO_SDK_ROOT%\Compilers\NX\nx\armv7l\bin\armv7l-nintendo-nx-eabihf-as -o lj_vm.o lj_vm.s
+:DEBUGCHECK
+
+@if "%1" neq "debug" goto :NODEBUG
+@shift
+@set LJCOMPILE=%LJCOMPILE% -DNN_SDK_BUILD_DEBUG -g -O0
+@set TARGETLIB=libluajitD_%TARGETLIB_SUFFIX%.a
+goto :BUILD
+:NODEBUG
+@set LJCOMPILE=%LJCOMPILE% -DNN_SDK_BUILD_RELEASE -O3
+@set TARGETLIB=libluajit_%TARGETLIB_SUFFIX%.a
+:BUILD
+del %TARGETLIB%
+@if "%1" neq "noamalg" goto :AMALG
+for %%f in (lj_*.c lib_*.c) do (
+ %LJCOMPILE% %%f
+ @if errorlevel 1 goto :BAD
+)
+
+%LJLIB% %TARGETLIB% lj_*.o lib_*.o
+@if errorlevel 1 goto :BAD
+@goto :NOAMALG
+:AMALG
+%LJCOMPILE% ljamalg.c
+@if errorlevel 1 goto :BAD
+%LJLIB% %TARGETLIB% ljamalg.o lj_vm.o
+@if errorlevel 1 goto :BAD
+:NOAMALG
+
+@del *.o *.obj *.manifest minilua.exe buildvm.exe
+@echo.
+@echo === Successfully built LuaJIT for Nintendo Switch (%TARGETLIB_SUFFIX%) ===
+
+@goto :END
+:BAD
+@echo.
+@echo *******************************************************
+@echo *** Build FAILED -- Please check the error messages ***
+@echo *******************************************************
+@goto :END
+:FAIL
+@echo To run this script you must open a "Native Tools Command Prompt for VS".
+@echo.
+@echo Either the x86 version for NX32, or x64 for the NX64 target.
+@echo This is because the pointer size of the LuaJIT host tools (buildvm.exe)
+@echo must match the cross-compiled target (32 or 64 bits).
+@echo.
+@echo Keep in mind that NintendoSDK + NX Addon must be installed, too.
+:END
diff --git a/src/ps4build.bat b/src/ps4build.bat index 337a44fa..fdd09d81 100644 --- a/src/ps4build.bat +++ b/src/ps4build.bat @@ -2,7 +2,19 @@ @rem Donated to the public domain.
@rem
@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler)
+@rem or "VS2015 x64 Native Tools Command Prompt".
+@rem
@rem Then cd to this directory and run this script.
+@rem
+@rem Recommended invocation:
+@rem
+@rem ps4build release build, amalgamated, 64-bit GC
+@rem ps4build debug debug build, amalgamated, 64-bit GC
+@rem
+@rem Additional command-line options (not generally recommended):
+@rem
+@rem gc32 (before debug) 32-bit GC
+@rem noamalg (after debug) non-amalgamated build
@if not defined INCLUDE goto :FAIL
@if not defined SCE_ORBIS_SDK_DIR goto :FAIL
@@ -14,7 +26,15 @@ @set LJMT=mt /nologo
@set DASMDIR=..\dynasm
@set DASM=%DASMDIR%\dynasm.lua
-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
+@set GC64=
+@set DASC=vm_x64.dasc
+
+@if "%1" neq "gc32" goto :NOGC32
+@shift
+@set GC64=-DLUAJIT_DISABLE_GC64
+@set DASC=vm_x86.dasc
+:NOGC32
%LJCOMPILE% host\minilua.c
@if errorlevel 1 goto :BAD
@@ -28,10 +48,10 @@ if exist minilua.exe.manifest^ @if not errorlevel 8 goto :FAIL
@set DASMFLAGS=-D P64 -D NO_UNWIND
-minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x86.dasc
+minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
@if errorlevel 1 goto :BAD
-%LJCOMPILE% /I "." /I %DASMDIR% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c
+%LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c
@if errorlevel 1 goto :BAD
%LJLINK% /out:buildvm.exe buildvm*.obj
@if errorlevel 1 goto :BAD
@@ -54,7 +74,7 @@ buildvm -m folddef -o lj_folddef.h lj_opt_fold.c @if errorlevel 1 goto :BAD
@rem ---- Cross compiler ----
-@set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI
+@set LJCOMPILE="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-clang" -c -Wall -DLUAJIT_DISABLE_FFI %GC64%
@set LJLIB="%SCE_ORBIS_SDK_DIR%\host_tools\bin\orbis-ar" rcus
@set INCLUDE=""
@@ -63,14 +83,14 @@ orbis-as -o lj_vm.o lj_vm.s @if "%1" neq "debug" goto :NODEBUG
@shift
@set LJCOMPILE=%LJCOMPILE% -g -O0
-@set TARGETLIB=libluajitD.a
+@set TARGETLIB=libluajitD_ps4.a
goto :BUILD
:NODEBUG
@set LJCOMPILE=%LJCOMPILE% -O2
-@set TARGETLIB=libluajit.a
+@set TARGETLIB=libluajit_ps4.a
:BUILD
del %TARGETLIB%
-@if "%1"=="amalg" goto :AMALG
+@if "%1" neq "noamalg" goto :AMALG
for %%f in (lj_*.c lib_*.c) do (
%LJCOMPILE% %%f
@if errorlevel 1 goto :BAD
diff --git a/src/ps5build.bat b/src/ps5build.bat new file mode 100644 index 00000000..0b1ebd5b --- /dev/null +++ b/src/ps5build.bat @@ -0,0 +1,123 @@ +@rem Script to build LuaJIT with the PS5 SDK.
+@rem Donated to the public domain.
+@rem
+@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler)
+@rem or "VS20xx x64 Native Tools Command Prompt".
+@rem
+@rem Then cd to this directory and run this script.
+@rem
+@rem Recommended invocation:
+@rem
+@rem ps5build release build, amalgamated, 64-bit GC
+@rem ps5build debug debug build, amalgamated, 64-bit GC
+@rem
+@rem Additional command-line options (not generally recommended):
+@rem
+@rem gc32 (before debug) 32-bit GC
+@rem noamalg (after debug) non-amalgamated build
+
+@if not defined INCLUDE goto :FAIL
+@if not defined SCE_PROSPERO_SDK_DIR goto :FAIL
+
+@setlocal
+@rem ---- Host compiler ----
+@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE
+@set LJLINK=link /nologo
+@set LJMT=mt /nologo
+@set DASMDIR=..\dynasm
+@set DASM=%DASMDIR%\dynasm.lua
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
+@set GC64=
+@set DASC=vm_x64.dasc
+
+@if "%1" neq "gc32" goto :NOGC32
+@shift
+@set GC64=-DLUAJIT_DISABLE_GC64
+@set DASC=vm_x86.dasc
+:NOGC32
+
+%LJCOMPILE% host\minilua.c
+@if errorlevel 1 goto :BAD
+%LJLINK% /out:minilua.exe minilua.obj
+@if errorlevel 1 goto :BAD
+if exist minilua.exe.manifest^
+ %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
+
+@rem Check for 64 bit host compiler.
+@minilua
+@if not errorlevel 8 goto :FAIL
+
+@set DASMFLAGS=-D P64 -D NO_UNWIND
+minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h %DASC%
+@if errorlevel 1 goto :BAD
+
+%LJCOMPILE% /I "." /I %DASMDIR% %GC64% -DLUAJIT_TARGET=LUAJIT_ARCH_X64 -DLUAJIT_OS=LUAJIT_OS_OTHER -DLUAJIT_DISABLE_JIT -DLUAJIT_DISABLE_FFI -DLUAJIT_NO_UNWIND host\buildvm*.c
+@if errorlevel 1 goto :BAD
+%LJLINK% /out:buildvm.exe buildvm*.obj
+@if errorlevel 1 goto :BAD
+if exist buildvm.exe.manifest^
+ %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
+
+buildvm -m elfasm -o lj_vm.s
+@if errorlevel 1 goto :BAD
+buildvm -m bcdef -o lj_bcdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m ffdef -o lj_ffdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m libdef -o lj_libdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m recdef -o lj_recdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
+@if errorlevel 1 goto :BAD
+
+@rem ---- Cross compiler ----
+@set LJCOMPILE="%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-clang" -c -Wall -DLUAJIT_DISABLE_FFI %GC64%
+@set LJLIB="%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-llvm-ar" rcus
+@set INCLUDE=""
+
+%SCE_PROSPERO_SDK_DIR%\host_tools\bin\prospero-llvm-as -o lj_vm.o lj_vm.s
+
+@if "%1" neq "debug" goto :NODEBUG
+@shift
+@set LJCOMPILE=%LJCOMPILE% -g -O0
+@set TARGETLIB=libluajitD_ps5.a
+goto :BUILD
+:NODEBUG
+@set LJCOMPILE=%LJCOMPILE% -O2
+@set TARGETLIB=libluajit_ps5.a
+:BUILD
+del %TARGETLIB%
+@if "%1" neq "noamalg" goto :AMALG
+for %%f in (lj_*.c lib_*.c) do (
+ %LJCOMPILE% %%f
+ @if errorlevel 1 goto :BAD
+)
+
+%LJLIB% %TARGETLIB% lj_*.o lib_*.o
+@if errorlevel 1 goto :BAD
+@goto :NOAMALG
+:AMALG
+%LJCOMPILE% ljamalg.c
+@if errorlevel 1 goto :BAD
+%LJLIB% %TARGETLIB% ljamalg.o lj_vm.o
+@if errorlevel 1 goto :BAD
+:NOAMALG
+
+@del *.o *.obj *.manifest minilua.exe buildvm.exe
+@echo.
+@echo === Successfully built LuaJIT for PS5 ===
+
+@goto :END
+:BAD
+@echo.
+@echo *******************************************************
+@echo *** Build FAILED -- Please check the error messages ***
+@echo *******************************************************
+@goto :END
+:FAIL
+@echo To run this script you must open a "Visual Studio .NET Command Prompt"
+@echo (64 bit host compiler). The PS5 Prospero SDK must be installed, too.
+:END
diff --git a/src/psvitabuild.bat b/src/psvitabuild.bat index 3991dc65..2980e157 100644 --- a/src/psvitabuild.bat +++ b/src/psvitabuild.bat @@ -14,7 +14,7 @@ @set LJMT=mt /nologo
@set DASMDIR=..\dynasm
@set DASM=%DASMDIR%\dynasm.lua
-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
%LJCOMPILE% host\minilua.c
@if errorlevel 1 goto :BAD
diff --git a/src/vm_arm.dasc b/src/vm_arm.dasc index 4a13c68b..770c1602 100644 --- a/src/vm_arm.dasc +++ b/src/vm_arm.dasc @@ -99,6 +99,7 @@ |.type NODE, Node |.type NARGS8, int |.type TRACE, GCtrace +|.type SBUF, SBuf | |//----------------------------------------------------------------------- | @@ -372,6 +373,17 @@ static void build_subroutines(BuildCtx *ctx) | st_vmstate CARG2 | b ->vm_returnc | + |->vm_unwind_ext: // Complete external unwind. +#if !LJ_NO_UNWIND + | push {r0, r1, r2, lr} + | bl extern _Unwind_Complete + | ldr r0, [sp] + | bl extern _Unwind_DeleteException + | pop {r0, r1, r2, lr} + | mov r0, r1 + | bx r2 +#endif + | |//----------------------------------------------------------------------- |//-- Grow stack for calls ----------------------------------------------- |//----------------------------------------------------------------------- @@ -418,13 +430,14 @@ static void build_subroutines(BuildCtx *ctx) | add CARG2, sp, #CFRAME_RESUME | ldrb CARG1, L->status | str CARG3, SAVE_ERRF - | str CARG2, L->cframe + | str L, SAVE_PC // Any value outside of bytecode is ok. | str CARG3, SAVE_CFRAME | cmp CARG1, #0 - | str L, SAVE_PC // Any value outside of bytecode is ok. + | str CARG2, L->cframe | beq >3 | | // Resume after yield (like a return). + | str L, [DISPATCH, #DISPATCH_GL(cur_L)] | mov RA, BASE | ldr BASE, L->base | ldr CARG1, L->top @@ -458,14 +471,15 @@ static void build_subroutines(BuildCtx *ctx) | str CARG3, SAVE_NRES | mov L, CARG1 | str CARG1, SAVE_L - | mov BASE, CARG2 - | str sp, L->cframe // Add our C frame to cframe chain. | ldr DISPATCH, L->glref // Setup pointer to dispatch table. + | mov BASE, CARG2 | str CARG1, SAVE_PC // Any value outside of bytecode is ok. | str RC, SAVE_CFRAME | add DISPATCH, DISPATCH, #GG_G2DISP + | str sp, L->cframe // Add our C frame to cframe chain. | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). + | str L, [DISPATCH, #DISPATCH_GL(cur_L)] | ldr RB, L->base // RB = old base (for vmeta_call). | ldr CARG1, L->top | mov MASKR8, #255 @@ -491,20 +505,21 @@ static void build_subroutines(BuildCtx *ctx) | mov L, CARG1 | ldr RA, L:CARG1->stack | str CARG1, SAVE_L + | ldr DISPATCH, L->glref // Setup pointer to dispatch table. | ldr RB, L->top | str CARG1, SAVE_PC // Any value outside of bytecode is ok. | ldr RC, L->cframe + | add DISPATCH, DISPATCH, #GG_G2DISP | sub RA, RA, RB // Compute -savestack(L, L->top). - | str sp, L->cframe // Add our C frame to cframe chain. | mov RB, #0 | str RA, SAVE_NRES // Neg. delta means cframe w/o frame. | str RB, SAVE_ERRF // No error function. | str RC, SAVE_CFRAME + | str sp, L->cframe // Add our C frame to cframe chain. + | str L, [DISPATCH, #DISPATCH_GL(cur_L)] | blx CARG4 // (lua_State *L, lua_CFunction func, void *ud) - | ldr DISPATCH, L->glref // Setup pointer to dispatch table. | movs BASE, CRET1 - | mov PC, #FRAME_CP - | add DISPATCH, DISPATCH, #GG_G2DISP + | mov PC, #FRAME_CP | bne <3 // Else continue with the call. | b ->vm_leave_cp // No base? Just remove C frame. | @@ -614,6 +629,16 @@ static void build_subroutines(BuildCtx *ctx) | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. | b ->vm_call_dispatch_f | + |->vmeta_tgetr: + | .IOS mov RC, BASE + | bl extern lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | .IOS mov BASE, RC + | cmp CRET1, #0 + | ldrdne CARG12, [CRET1] + | mvneq CARG2, #~LJ_TNIL + | b ->BC_TGETR_Z + | |//----------------------------------------------------------------------- | |->vmeta_tsets1: @@ -671,6 +696,16 @@ static void build_subroutines(BuildCtx *ctx) | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. | b ->vm_call_dispatch_f | + |->vmeta_tsetr: + | str BASE, L->base + | .IOS mov RC, BASE + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + | // Returns TValue *. + | .IOS mov BASE, RC + | b ->BC_TSETR_Z + | |//-- Comparison metamethods --------------------------------------------- | |->vmeta_comp: @@ -735,6 +770,17 @@ static void build_subroutines(BuildCtx *ctx) | b <3 |.endif | + |->vmeta_istype: + | sub PC, PC, #4 + | str BASE, L->base + | mov CARG1, L + | lsr CARG2, RA, #3 + | mov CARG3, RC + | str PC, SAVE_PC + | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) + | .IOS ldr BASE, L->base + | b ->cont_nop + | |//-- Arithmetic metamethods --------------------------------------------- | |->vmeta_arith_vn: @@ -966,9 +1012,9 @@ static void build_subroutines(BuildCtx *ctx) | cmp TAB:RB, #0 | beq ->fff_restv | ldr CARG3, TAB:RB->hmask - | ldr CARG4, STR:RC->hash + | ldr CARG4, STR:RC->sid | ldr NODE:INS, TAB:RB->node - | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask + | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask | add CARG3, CARG3, CARG3, lsl #1 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 |3: // Rearranged logic, because we expect _not_ to find the key. @@ -1052,7 +1098,7 @@ static void build_subroutines(BuildCtx *ctx) | ffgccheck | mov CARG1, L | mov CARG2, BASE - | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) + | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) | // Returns GCstr *. | ldr BASE, L->base | mvn CARG2, #~LJ_TSTR @@ -1065,24 +1111,18 @@ static void build_subroutines(BuildCtx *ctx) | checktab CARG2, ->fff_fallback | strd CARG34, [BASE, NARGS8:RC] // Set missing 2nd arg to nil. | ldr PC, [BASE, FRAME_PC] - | mov CARG2, CARG1 - | str BASE, L->base // Add frame since C call can throw. - | mov CARG1, L - | str BASE, L->top // Dummy frame length is ok. - | add CARG3, BASE, #8 - | str PC, SAVE_PC - | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Returns 0 at end of traversal. + | add CARG2, BASE, #8 + | sub CARG3, BASE, #8 + | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // Returns 1=found, 0=end, -1=error. | .IOS ldr BASE, L->base | cmp CRET1, #0 - | mvneq CRET2, #~LJ_TNIL - | beq ->fff_restv // End of traversal: return nil. - | ldrd CARG12, [BASE, #8] // Copy key and value to results. - | ldrd CARG34, [BASE, #16] - | mov RC, #(2+1)*8 - | strd CARG12, [BASE, #-8] - | strd CARG34, [BASE] - | b ->fff_res + | mov RC, #(2+1)*8 + | bgt ->fff_res // Found key/value. + | bmi ->fff_fallback // Invalid key. + | // End of traversal: return nil. + | mvn CRET2, #~LJ_TNIL + | b ->fff_restv | |.ffunc_1 pairs | checktab CARG2, ->fff_fallback @@ -1230,9 +1270,10 @@ static void build_subroutines(BuildCtx *ctx) | ldr CARG3, L:RA->base | mv_vmstate CARG2, INTERP | ldr CARG4, L:RA->top - | st_vmstate CARG2 | cmp CRET1, #LUA_YIELD | ldr BASE, L->base + | str L, [DISPATCH, #DISPATCH_GL(cur_L)] + | st_vmstate CARG2 | bhi >8 | subs RC, CARG4, CARG3 | ldr CARG1, L->maxstack @@ -1500,19 +1541,6 @@ static void build_subroutines(BuildCtx *ctx) | math_extern2 atan2 | math_extern2 fmod | - |->ff_math_deg: - |.if FPU - | .ffunc_d math_rad - | vldr d1, CFUNC:CARG3->upvalue[0] - | vmul.f64 d0, d0, d1 - | b ->fff_resd - |.else - | .ffunc_n math_rad - | ldrd CARG34, CFUNC:CARG3->upvalue[0] - | bl extern __aeabi_dmul - | b ->fff_restv - |.endif - | |.if HFABI | .ffunc math_ldexp | ldr CARG4, [BASE, #4] @@ -1682,17 +1710,11 @@ static void build_subroutines(BuildCtx *ctx) |.endif |.endmacro | - | math_minmax math_min, gt, hi - | math_minmax math_max, lt, lo + | math_minmax math_min, gt, pl + | math_minmax math_max, lt, le | |//-- String library ----------------------------------------------------- | - |.ffunc_1 string_len - | checkstr CARG2, ->fff_fallback - | ldr CARG1, STR:CARG1->len - | mvn CARG2, #~LJ_TISNUM - | b ->fff_restv - | |.ffunc string_byte // Only handle the 1-arg case here. | ldrd CARG12, [BASE] | ldr PC, [BASE, FRAME_PC] @@ -1725,6 +1747,7 @@ static void build_subroutines(BuildCtx *ctx) | mov CARG1, L | str PC, SAVE_PC | bl extern lj_str_new // (lua_State *L, char *str, size_t l) + |->fff_resstr: | // Returns GCstr *. | ldr BASE, L->base | mvn CARG2, #~LJ_TSTR @@ -1768,91 +1791,28 @@ static void build_subroutines(BuildCtx *ctx) | mvn CARG2, #~LJ_TSTR | b ->fff_restv | - |.ffunc string_rep // Only handle the 1-char case inline. - | ffgccheck - | ldrd CARG12, [BASE] - | ldrd CARG34, [BASE, #8] - | cmp NARGS8:RC, #16 - | bne ->fff_fallback // Exactly 2 arguments - | checktp CARG2, LJ_TSTR - | checktpeq CARG4, LJ_TISNUM - | bne ->fff_fallback - | subs CARG4, CARG3, #1 - | ldr CARG2, STR:CARG1->len - | blt ->fff_emptystr // Count <= 0? - | cmp CARG2, #1 - | blo ->fff_emptystr // Zero-length string? - | bne ->fff_fallback // Fallback for > 1-char strings. - | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)] - | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)] - | ldr CARG1, STR:CARG1[1] - | cmp RB, CARG3 - | blo ->fff_fallback - |1: // Fill buffer with char. - | strb CARG1, [CARG2, CARG4] - | subs CARG4, CARG4, #1 - | bge <1 - | b ->fff_newstr - | - |.ffunc string_reverse - | ffgccheck - | ldrd CARG12, [BASE] - | cmp NARGS8:RC, #8 - | blo ->fff_fallback - | checkstr CARG2, ->fff_fallback - | ldr CARG3, STR:CARG1->len - | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)] - | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)] - | mov CARG4, CARG3 - | add CARG1, STR:CARG1, #sizeof(GCstr) - | cmp RB, CARG3 - | blo ->fff_fallback - |1: // Reverse string copy. - | ldrb RB, [CARG1], #1 - | subs CARG4, CARG4, #1 - | blt ->fff_newstr - | strb RB, [CARG2, CARG4] - | b <1 - | - |.macro ffstring_case, name, lo - | .ffunc name + |.macro ffstring_op, name + | .ffunc string_ .. name | ffgccheck - | ldrd CARG12, [BASE] + | ldr CARG3, [BASE, #4] | cmp NARGS8:RC, #8 + | ldr STR:CARG2, [BASE] | blo ->fff_fallback - | checkstr CARG2, ->fff_fallback - | ldr CARG3, STR:CARG1->len - | ldr RB, [DISPATCH, #DISPATCH_GL(tmpbuf.sz)] - | ldr CARG2, [DISPATCH, #DISPATCH_GL(tmpbuf.buf)] - | mov CARG4, #0 - | add CARG1, STR:CARG1, #sizeof(GCstr) - | cmp RB, CARG3 - | blo ->fff_fallback - |1: // ASCII case conversion. - | ldrb RB, [CARG1, CARG4] - | cmp CARG4, CARG3 - | bhs ->fff_newstr - | sub RC, RB, #lo - | cmp RC, #26 - | eorlo RB, RB, #0x20 - | strb RB, [CARG2, CARG4] - | add CARG4, CARG4, #1 - | b <1 + | sub SBUF:CARG1, DISPATCH, #-DISPATCH_GL(tmpbuf) + | checkstr CARG3, ->fff_fallback + | ldr CARG4, SBUF:CARG1->b + | str BASE, L->base + | str PC, SAVE_PC + | str L, SBUF:CARG1->L + | str CARG4, SBUF:CARG1->w + | bl extern lj_buf_putstr_ .. name + | bl extern lj_buf_tostr + | b ->fff_resstr |.endmacro | - |ffstring_case string_lower, 65 - |ffstring_case string_upper, 97 - | - |//-- Table library ------------------------------------------------------ - | - |.ffunc_1 table_getn - | checktab CARG2, ->fff_fallback - | .IOS mov RA, BASE - | bl extern lj_tab_len // (GCtab *t) - | // Returns uint32_t (but less than 2^31). - | .IOS mov BASE, RA - | mvn CARG2, #~LJ_TISNUM - | b ->fff_restv + |ffstring_op reverse + |ffstring_op lower + |ffstring_op upper | |//-- Bit library -------------------------------------------------------- | @@ -2127,6 +2087,66 @@ static void build_subroutines(BuildCtx *ctx) | ldr INS, [PC, #-4] | bx CRET1 | + |->cont_stitch: // Trace stitching. + |.if JIT + | // RA = resultptr, CARG4 = meta base + | ldr RB, SAVE_MULTRES + | ldr INS, [PC, #-4] + | ldr TRACE:CARG3, [CARG4, #-24] // Save previous trace. + | subs RB, RB, #8 + | decode_RA8 RC, INS // Call base. + | beq >2 + |1: // Move results down. + | ldrd CARG12, [RA] + | add RA, RA, #8 + | subs RB, RB, #8 + | strd CARG12, [BASE, RC] + | add RC, RC, #8 + | bne <1 + |2: + | decode_RA8 RA, INS + | decode_RB8 RB, INS + | add RA, RA, RB + |3: + | cmp RA, RC + | mvn CARG2, #~LJ_TNIL + | bhi >9 // More results wanted? + | + | ldrh RA, TRACE:CARG3->traceno + | ldrh RC, TRACE:CARG3->link + | cmp RC, RA + | beq ->cont_nop // Blacklisted. + | cmp RC, #0 + | bne =>BC_JLOOP // Jump to stitched trace. + | + | // Stitch a new trace to the previous trace. + | str RA, [DISPATCH, #DISPATCH_J(exitno)] + | str L, [DISPATCH, #DISPATCH_J(L)] + | str BASE, L->base + | sub CARG1, DISPATCH, #-GG_DISP2J + | mov CARG2, PC + | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) + | ldr BASE, L->base + | b ->cont_nop + | + |9: // Fill up results with nil. + | strd CARG12, [BASE, RC] + | add RC, RC, #8 + | b <3 + |.endif + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | mov CARG1, L + | str BASE, L->base + | mov CARG2, PC + | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | ldr BASE, L->base + | sub PC, PC, #4 + | b ->cont_nop +#endif + | |//----------------------------------------------------------------------- |//-- Trace exit handler ------------------------------------------------- |//----------------------------------------------------------------------- @@ -2151,14 +2171,14 @@ static void build_subroutines(BuildCtx *ctx) | add CARG1, CARG1, CARG2, asr #6 | ldr CARG2, [lr, #4] // Load exit stub group offset. | sub CARG1, CARG1, lr - | ldr L, [DISPATCH, #DISPATCH_GL(jit_L)] + | ldr L, [DISPATCH, #DISPATCH_GL(cur_L)] | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. | ldr BASE, [DISPATCH, #DISPATCH_GL(jit_base)] | str CARG1, [DISPATCH, #DISPATCH_J(exitno)] | mov CARG4, #0 - | str L, [DISPATCH, #DISPATCH_J(L)] | str BASE, L->base - | str CARG4, [DISPATCH, #DISPATCH_GL(jit_L)] + | str L, [DISPATCH, #DISPATCH_J(L)] + | str CARG4, [DISPATCH, #DISPATCH_GL(jit_base)] | sub CARG1, DISPATCH, #-GG_DISP2J | mov CARG2, sp | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) @@ -2177,13 +2197,14 @@ static void build_subroutines(BuildCtx *ctx) | ldr L, SAVE_L |1: | cmp CARG1, #0 - | blt >3 // Check for error from exit. + | blt >9 // Check for error from exit. | lsl RC, CARG1, #3 | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | str RC, SAVE_MULTRES | mov CARG3, #0 + | str BASE, L->base | ldr CARG2, LFUNC:CARG2->field_pc - | str CARG3, [DISPATCH, #DISPATCH_GL(jit_L)] + | str CARG3, [DISPATCH, #DISPATCH_GL(jit_base)] | mv_vmstate CARG4, INTERP | ldr KBASE, [CARG2, #PC2PROTO(k)] | // Modified copy of ins_next which handles function header dispatch, too. @@ -2192,17 +2213,35 @@ static void build_subroutines(BuildCtx *ctx) | ldr INS, [PC], #4 | lsl MASKR8, MASKR8, #3 // MASKR8 = 255*8. | st_vmstate CARG4 + | cmp OP, #BC_FUNCC+2 // Fast function? + | bhs >4 + |2: | cmp OP, #BC_FUNCF // Function header? | ldr OP, [DISPATCH, OP, lsl #2] | decode_RA8 RA, INS | lsrlo RC, INS, #16 // No: Decode operands A*8 and D. | subhs RC, RC, #8 | addhs RA, RA, BASE // Yes: RA = BASE+framesize*8, RC = nargs*8 + | ldrhs CARG3, [BASE, FRAME_FUNC] | bx OP | - |3: // Rethrow error from the right C frame. + |4: // Check frame below fast function. + | ldr CARG1, [BASE, FRAME_PC] + | ands CARG2, CARG1, #FRAME_TYPE + | bne <2 // Trace stitching continuation? + | // Otherwise set KBASE for Lua function below fast function. + | ldr CARG3, [CARG1, #-4] + | decode_RA8 CARG1, CARG3 + | sub CARG2, BASE, CARG1 + | ldr LFUNC:CARG3, [CARG2, #-16] + | ldr CARG3, LFUNC:CARG3->field_pc + | ldr KBASE, [CARG3, #PC2PROTO(k)] + | b <2 + | + |9: // Rethrow error from the right C frame. + | rsb CARG2, CARG1, #0 | mov CARG1, L - | bl extern lj_err_run // (lua_State *L) + | bl extern lj_err_trace // (lua_State *L, int errcode) |.endif | |//----------------------------------------------------------------------- @@ -2385,6 +2424,64 @@ static void build_subroutines(BuildCtx *ctx) |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_RES, CARG1 + |.define NEXT_IDX, CARG2 + |.define NEXT_TMP0, CARG3 + |.define NEXT_TMP1, CARG4 + |.define NEXT_LIM, r12 + |.define NEXT_RES_PTR, sp + |.define NEXT_RES_VAL, [sp] + |.define NEXT_RES_KEY_I, [sp, #8] + |.define NEXT_RES_KEY_IT, [sp, #12] + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in CRET2. + |->vm_next: + |.if JIT + | ldr NEXT_TMP0, NEXT_TAB->array + | ldr NEXT_LIM, NEXT_TAB->asize + | add NEXT_TMP0, NEXT_TMP0, NEXT_IDX, lsl #3 + |1: // Traverse array part. + | subs NEXT_TMP1, NEXT_IDX, NEXT_LIM + | bhs >5 + | ldr NEXT_TMP1, [NEXT_TMP0, #4] + | str NEXT_IDX, NEXT_RES_KEY_I + | add NEXT_TMP0, NEXT_TMP0, #8 + | add NEXT_IDX, NEXT_IDX, #1 + | checktp NEXT_TMP1, LJ_TNIL + | beq <1 // Skip holes in array part. + | ldr NEXT_TMP0, [NEXT_TMP0, #-8] + | mov NEXT_RES, NEXT_RES_PTR + | strd NEXT_TMP0, NEXT_RES_VAL // Stores NEXT_TMP1, too. + | mvn NEXT_TMP0, #~LJ_TISNUM + | str NEXT_TMP0, NEXT_RES_KEY_IT + | bx lr + | + |5: // Traverse hash part. + | ldr NEXT_TMP0, NEXT_TAB->hmask + | ldr NODE:NEXT_RES, NEXT_TAB->node + | add NEXT_TMP1, NEXT_TMP1, NEXT_TMP1, lsl #1 + | add NEXT_LIM, NEXT_LIM, NEXT_TMP0 + | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP1, lsl #3 + |6: + | cmp NEXT_IDX, NEXT_LIM + | bhi >9 + | ldr NEXT_TMP1, NODE:NEXT_RES->val.it + | checktp NEXT_TMP1, LJ_TNIL + | add NEXT_IDX, NEXT_IDX, #1 + | bxne lr + | // Skip holes in hash part. + | add NEXT_RES, NEXT_RES, #sizeof(Node) + | b <6 + | + |9: // End of iteration. Set the key to nil (not the value). + | mvn NEXT_TMP0, #0 + | mov NEXT_RES, NEXT_RES_PTR + | str NEXT_TMP0, NEXT_RES_KEY_IT + | bx lr + |.endif + | |//----------------------------------------------------------------------- |//-- FFI helper functions ----------------------------------------------- |//----------------------------------------------------------------------- @@ -2832,6 +2929,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_next break; + case BC_ISTYPE: + | // RA = src*8, RC = -type + | ldrd CARG12, [BASE, RA] + | ins_next1 + | cmn CARG2, RC + | ins_next2 + | bne ->vmeta_istype + | ins_next3 + break; + case BC_ISNUM: + | // RA = src*8, RC = -(TISNUM-1) + | ldrd CARG12, [BASE, RA] + | ins_next1 + | checktp CARG2, LJ_TISNUM + | ins_next2 + | bhs ->vmeta_istype + | ins_next3 + break; + /* -- Unary ops --------------------------------------------------------- */ case BC_MOV: @@ -3436,10 +3552,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TGETS_Z: | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 | ldr CARG3, TAB:CARG1->hmask - | ldr CARG4, STR:RC->hash + | ldr CARG4, STR:RC->sid | ldr NODE:INS, TAB:CARG1->node | mov TAB:RB, TAB:CARG1 - | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask + | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask | add CARG3, CARG3, CARG3, lsl #1 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 |1: @@ -3502,6 +3618,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bne <1 // 'no __index' flag set: done. | b ->vmeta_tgetb break; + case BC_TGETR: + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | // RA = dst*8, RB = table*8, RC = key*8 + | ldr TAB:CARG1, [BASE, RB] + | ldr CARG2, [BASE, RC] + | ldr CARG4, TAB:CARG1->array + | ldr CARG3, TAB:CARG1->asize + | add CARG4, CARG4, CARG2, lsl #3 + | cmp CARG2, CARG3 // In array part? + | bhs ->vmeta_tgetr + | ldrd CARG12, [CARG4] + |->BC_TGETR_Z: + | ins_next1 + | ins_next2 + | strd CARG12, [BASE, RA] + | ins_next3 + break; case BC_TSETV: | decode_RB8 RB, INS @@ -3565,10 +3699,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TSETS_Z: | // (TAB:RB =) TAB:CARG1 = GCtab *, STR:RC = GCstr *, RA = dst*8 | ldr CARG3, TAB:CARG1->hmask - | ldr CARG4, STR:RC->hash + | ldr CARG4, STR:RC->sid | ldr NODE:INS, TAB:CARG1->node | mov TAB:RB, TAB:CARG1 - | and CARG3, CARG3, CARG4 // idx = str->hash & tab->hmask + | and CARG3, CARG3, CARG4 // idx = str->sid & tab->hmask | add CARG3, CARG3, CARG3, lsl #1 | mov CARG4, #0 | add NODE:INS, NODE:INS, CARG3, lsl #3 // node = tab->node + idx*3*8 @@ -3672,6 +3806,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | barrierback TAB:CARG1, INS, CARG3 | b <2 break; + case BC_TSETR: + | decode_RB8 RB, INS + | decode_RC8 RC, INS + | // RA = src*8, RB = table*8, RC = key*8 + | ldr TAB:CARG2, [BASE, RB] + | ldr CARG3, [BASE, RC] + | ldrb INS, TAB:CARG2->marked + | ldr CARG1, TAB:CARG2->array + | ldr CARG4, TAB:CARG2->asize + | tst INS, #LJ_GC_BLACK // isblack(table) + | add CARG1, CARG1, CARG3, lsl #3 + | bne >7 + |2: + | cmp CARG3, CARG4 // In array part? + | bhs ->vmeta_tsetr + |->BC_TSETR_Z: + | ldrd CARG34, [BASE, RA] + | ins_next1 + | ins_next2 + | strd CARG34, [CARG1] + | ins_next3 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:CARG2, INS, RB + | b <2 + break; case BC_TSETM: | // RA = base*8 (table at base-1), RC = num_const (start index) @@ -3812,10 +3972,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ITERN: - | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) |.if JIT - | // NYI: add hotloop, record BC_ITERN. + | hotloop |.endif + |->vm_IITERN: + | // RA = base*8, (RB = nresults+1, RC = nargs+1 (2+1)) | add RA, BASE, RA | ldr TAB:RB, [RA, #-16] | ldr CARG1, [RA, #-8] // Get index from control var. @@ -3881,7 +4042,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_next1 | ins_next2 | mov CARG1, #0 - | mvn CARG2, #0x00018000 + | mvn CARG2, #~LJ_KEYINDEX | strd CARG1, [RA, #-8] // Initialize control var. |1: | ins_next3 @@ -3890,9 +4051,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov OP, #BC_ITERC | strb CARG1, [PC, #-4] | sub PC, RC, #0x20000 + |.if JIT + | ldrb CARG1, [PC] + | cmp CARG1, #BC_ITERN + | bne >6 + |.endif | strb OP, [PC] // Subsumes ins_next1. | ins_next2 | b <1 + |.if JIT + |6: // Unpatch JLOOP. + | ldr CARG1, [DISPATCH, #DISPATCH_J(trace)] + | ldrh CARG2, [PC, #2] + | ldr TRACE:CARG1, [CARG1, CARG2, lsl #2] + | // Subsumes ins_next1 and ins_next2. + | ldr INS, TRACE:CARG1->startins + | bfi INS, OP, #0, #8 + | str INS, [PC], #4 + | b <1 + |.endif break; case BC_VARG: @@ -4269,7 +4446,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | st_vmstate CARG2 | ldr RA, TRACE:RC->mcode | str BASE, [DISPATCH, #DISPATCH_GL(jit_base)] - | str L, [DISPATCH, #DISPATCH_GL(jit_L)] + | str L, [DISPATCH, #DISPATCH_GL(tmpbuf.L)] | bx RA |.endif break; @@ -4387,6 +4564,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ldr BASE, L->base | mv_vmstate CARG3, INTERP | ldr CRET2, L->top + | str L, [DISPATCH, #DISPATCH_GL(cur_L)] | lsl RC, CRET1, #3 | st_vmstate CARG3 | ldr PC, [BASE, FRAME_PC] diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc new file mode 100644 index 00000000..3448d0d2 --- /dev/null +++ b/src/vm_arm64.dasc @@ -0,0 +1,4158 @@ +|// Low-level VM code for ARM64 CPUs. +|// Bytecode interpreter, fast functions and helper functions. +|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +| +|.arch arm64 +|.section code_op, code_sub +| +|.actionlist build_actionlist +|.globals GLOB_ +|.globalnames globnames +|.externnames extnames +| +|// Note: The ragged indentation of the instructions is intentional. +|// The starting columns indicate data dependencies. +| +|//----------------------------------------------------------------------- +| +|// ARM64 registers and the AAPCS64 ABI 1.0 at a glance: +|// +|// x0-x17 temp, x19-x28 callee-saved, x29 fp, x30 lr +|// x18 is reserved on most platforms. Don't use it, save it or restore it. +|// x31 doesn't exist. Register number 31 either means xzr/wzr (zero) or sp, +|// depending on the instruction. +|// v0-v7 temp, v8-v15 callee-saved (only d8-d15 preserved), v16-v31 temp +|// +|// x0-x7/v0-v7 hold parameters and results. +| +|// Fixed register assignments for the interpreter. +| +|// The following must be C callee-save. +|.define BASE, x19 // Base of current Lua stack frame. +|.define KBASE, x20 // Constants of current Lua function. +|.define PC, x21 // Next PC. +|.define GLREG, x22 // Global state. +|.define LREG, x23 // Register holding lua_State (also in SAVE_L). +|.define TISNUM, x24 // Constant LJ_TISNUM << 47. +|.define TISNUMhi, x25 // Constant LJ_TISNUM << 15. +|.define TISNIL, x26 // Constant -1LL. +|.define fp, x29 // Yes, we have to maintain a frame pointer. +| +|.define ST_INTERP, w26 // Constant -1. +| +|// The following temporaries are not saved across C calls, except for RA/RC. +|.define RA, x27 +|.define RC, x28 +|.define RB, x17 +|.define RAw, w27 +|.define RCw, w28 +|.define RBw, w17 +|.define INS, x16 +|.define INSw, w16 +|.define ITYPE, x15 +|.define TMP0, x8 +|.define TMP1, x9 +|.define TMP2, x10 +|.define TMP3, x11 +|.define TMP0w, w8 +|.define TMP1w, w9 +|.define TMP2w, w10 +|.define TMP3w, w11 +| +|// Calling conventions. Also used as temporaries. +|.define CARG1, x0 +|.define CARG2, x1 +|.define CARG3, x2 +|.define CARG4, x3 +|.define CARG5, x4 +|.define CARG1w, w0 +|.define CARG2w, w1 +|.define CARG3w, w2 +|.define CARG4w, w3 +|.define CARG5w, w4 +| +|.define FARG1, d0 +|.define FARG2, d1 +| +|.define CRET1, x0 +|.define CRET1w, w0 +| +|// Stack layout while in interpreter. Must match with lj_frame.h. +| +|.define CFRAME_SPACE, 208 +|//----- 16 byte aligned, <-- sp entering interpreter +|.define SAVE_FP_LR_, 192 +|.define SAVE_GPR_, 112 // 112+10*8: 64 bit GPR saves +|.define SAVE_FPR_, 48 // 48+8*8: 64 bit FPR saves +|// Unused [sp, #44] // 32 bit values +|.define SAVE_NRES, [sp, #40] +|.define SAVE_ERRF, [sp, #36] +|.define SAVE_MULTRES, [sp, #32] +|.define TMPD, [sp, #24] // 64 bit values +|.define SAVE_L, [sp, #16] +|.define SAVE_PC, [sp, #8] +|.define SAVE_CFRAME, [sp, #0] +|//----- 16 byte aligned, <-- sp while in interpreter. +| +|.define TMPDofs, #24 +| +|.macro save_, gpr1, gpr2, fpr1, fpr2 +| stp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8] +| stp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8] +|.endmacro +|.macro rest_, gpr1, gpr2, fpr1, fpr2 +| ldp d..fpr2, d..fpr1, [sp, # SAVE_FPR_+(14-fpr1)*8] +| ldp x..gpr2, x..gpr1, [sp, # SAVE_GPR_+(27-gpr1)*8] +|.endmacro +| +|.macro saveregs +| sub sp, sp, # CFRAME_SPACE +| stp fp, lr, [sp, # SAVE_FP_LR_] +| add fp, sp, # SAVE_FP_LR_ +| stp x20, x19, [sp, # SAVE_GPR_+(27-19)*8] +| save_ 21, 22, 8, 9 +| save_ 23, 24, 10, 11 +| save_ 25, 26, 12, 13 +| save_ 27, 28, 14, 15 +|.endmacro +|.macro restoreregs +| ldp x20, x19, [sp, # SAVE_GPR_+(27-19)*8] +| rest_ 21, 22, 8, 9 +| rest_ 23, 24, 10, 11 +| rest_ 25, 26, 12, 13 +| rest_ 27, 28, 14, 15 +| ldp fp, lr, [sp, # SAVE_FP_LR_] +| add sp, sp, # CFRAME_SPACE +|.endmacro +| +|// Type definitions. Some of these are only used for documentation. +|.type L, lua_State, LREG +|.type GL, global_State, GLREG +|.type TVALUE, TValue +|.type GCOBJ, GCobj +|.type STR, GCstr +|.type TAB, GCtab +|.type LFUNC, GCfuncL +|.type CFUNC, GCfuncC +|.type PROTO, GCproto +|.type UPVAL, GCupval +|.type NODE, Node +|.type NARGS8, int +|.type TRACE, GCtrace +|.type SBUF, SBuf +| +|//----------------------------------------------------------------------- +| +|// Trap for not-yet-implemented parts. +|.macro NYI; brk; .endmacro +| +|//----------------------------------------------------------------------- +| +|// Access to frame relative to BASE. +|.define FRAME_FUNC, #-16 +|.define FRAME_PC, #-8 +| +|// Endian-specific defines. +|.if ENDIAN_LE +|.define LO, 0 +|.define OFS_RD, 2 +|.define OFS_RB, 3 +|.define OFS_RA, 1 +|.define OFS_OP, 0 +|.else +|.define LO, 4 +|.define OFS_RD, 0 +|.define OFS_RB, 0 +|.define OFS_RA, 2 +|.define OFS_OP, 3 +|.endif +| +|.macro decode_RA, dst, ins; ubfx dst, ins, #8, #8; .endmacro +|.macro decode_RB, dst, ins; ubfx dst, ins, #24, #8; .endmacro +|.macro decode_RC, dst, ins; ubfx dst, ins, #16, #8; .endmacro +|.macro decode_RD, dst, ins; ubfx dst, ins, #16, #16; .endmacro +|.macro decode_RC8RD, dst, src; ubfiz dst, src, #3, #8; .endmacro +| +|// Instruction decode+dispatch. +|.macro ins_NEXT +| ldr INSw, [PC], #4 +| add TMP1, GL, INS, uxtb #3 +| decode_RA RA, INS +| ldr TMP0, [TMP1, #GG_G2DISP] +| decode_RD RC, INS +| br TMP0 +|.endmacro +| +|// Instruction footer. +|.if 1 +| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. +| .define ins_next, ins_NEXT +| .define ins_next_, ins_NEXT +|.else +| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. +| // Affects only certain kinds of benchmarks (and only with -j off). +| .macro ins_next +| b ->ins_next +| .endmacro +| .macro ins_next_ +| ->ins_next: +| ins_NEXT +| .endmacro +|.endif +| +|// Call decode and dispatch. +|.macro ins_callt +| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC +| ldr PC, LFUNC:CARG3->pc +| ldr INSw, [PC], #4 +| add TMP1, GL, INS, uxtb #3 +| decode_RA RA, INS +| ldr TMP0, [TMP1, #GG_G2DISP] +| add RA, BASE, RA, lsl #3 +| br TMP0 +|.endmacro +| +|.macro ins_call +| // BASE = new base, CARG3 = LFUNC/CFUNC, RC = nargs*8, PC = caller PC +| str PC, [BASE, FRAME_PC] +| ins_callt +|.endmacro +| +|//----------------------------------------------------------------------- +| +|// Macros to check the TValue type and extract the GCobj. Branch on failure. +|.macro checktp, reg, tp, target +| asr ITYPE, reg, #47 +| cmn ITYPE, #-tp +| and reg, reg, #LJ_GCVMASK +| bne target +|.endmacro +|.macro checktp, dst, reg, tp, target +| asr ITYPE, reg, #47 +| cmn ITYPE, #-tp +| and dst, reg, #LJ_GCVMASK +| bne target +|.endmacro +|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro +|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro +|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro +|.macro checkint, reg, target +| cmp TISNUMhi, reg, lsr #32 +| bne target +|.endmacro +|.macro checknum, reg, target +| cmp TISNUMhi, reg, lsr #32 +| bls target +|.endmacro +|.macro checknumber, reg, target +| cmp TISNUMhi, reg, lsr #32 +| blo target +|.endmacro +| +|.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro +|.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro +| +#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field)) +| +#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) +| +|.macro hotcheck, delta +| lsr CARG1, PC, #1 +| and CARG1, CARG1, #126 +| add CARG1, CARG1, #GG_G2DISP+GG_DISP2HOT +| ldrh CARG2w, [GL, CARG1] +| subs CARG2, CARG2, #delta +| strh CARG2w, [GL, CARG1] +|.endmacro +| +|.macro hotloop +| hotcheck HOTCOUNT_LOOP +| blo ->vm_hotloop +|.endmacro +| +|.macro hotcall +| hotcheck HOTCOUNT_CALL +| blo ->vm_hotcall +|.endmacro +| +|// Set current VM state. +|.macro mv_vmstate, reg, st; movn reg, #LJ_VMST_..st; .endmacro +|.macro st_vmstate, reg; str reg, GL->vmstate; .endmacro +| +|// Move table write barrier back. Overwrites mark and tmp. +|.macro barrierback, tab, mark, tmp +| ldr tmp, GL->gc.grayagain +| and mark, mark, #~LJ_GC_BLACK // black2gray(tab) +| str tab, GL->gc.grayagain +| strb mark, tab->marked +| str tmp, tab->gclist +|.endmacro +| +|//----------------------------------------------------------------------- + +#if !LJ_DUALNUM +#error "Only dual-number mode supported for ARM64 target" +#endif + +/* Generate subroutines used by opcodes and other parts of the VM. */ +/* The .code_sub section should be last to help static branch prediction. */ +static void build_subroutines(BuildCtx *ctx) +{ + |.code_sub + | + |//----------------------------------------------------------------------- + |//-- Return handling ---------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_returnp: + | // See vm_return. Also: RB = previous base. + | tbz PC, #2, ->cont_dispatch // (PC & FRAME_P) == 0? + | + | // Return from pcall or xpcall fast func. + | ldr PC, [RB, FRAME_PC] // Fetch PC of previous frame. + | mov_true TMP0 + | mov BASE, RB + | // Prepending may overwrite the pcall frame, so do it at the end. + | str TMP0, [RA, #-8]! // Prepend true to results. + | + |->vm_returnc: + | adds RC, RC, #8 // RC = (nresults+1)*8. + | mov CRET1, #LUA_YIELD + | beq ->vm_unwind_c_eh + | str RCw, SAVE_MULTRES + | ands CARG1, PC, #FRAME_TYPE + | beq ->BC_RET_Z // Handle regular return to Lua. + | + |->vm_return: + | // BASE = base, RA = resultptr, RC/MULTRES = (nresults+1)*8, PC = return + | // CARG1 = PC & FRAME_TYPE + | and RB, PC, #~FRAME_TYPEP + | cmp CARG1, #FRAME_C + | sub RB, BASE, RB // RB = previous base. + | bne ->vm_returnp + | + | str RB, L->base + | ldrsw CARG2, SAVE_NRES // CARG2 = nresults+1. + | mv_vmstate TMP0w, C + | sub BASE, BASE, #16 + | subs TMP2, RC, #8 + | st_vmstate TMP0w + | beq >2 + |1: + | subs TMP2, TMP2, #8 + | ldr TMP0, [RA], #8 + | str TMP0, [BASE], #8 + | bne <1 + |2: + | cmp RC, CARG2, lsl #3 // More/less results wanted? + | bne >6 + |3: + | str BASE, L->top // Store new top. + | + |->vm_leave_cp: + | ldr RC, SAVE_CFRAME // Restore previous C frame. + | mov CRET1, #0 // Ok return status for vm_pcall. + | str RC, L->cframe + | + |->vm_leave_unw: + | restoreregs + | ret + | + |6: + | bgt >7 // Less results wanted? + | // More results wanted. Check stack size and fill up results with nil. + | ldr CARG3, L->maxstack + | cmp BASE, CARG3 + | bhs >8 + | str TISNIL, [BASE], #8 + | add RC, RC, #8 + | b <2 + | + |7: // Less results wanted. + | cbz CARG2, <3 // LUA_MULTRET+1 case? + | sub CARG1, RC, CARG2, lsl #3 + | sub BASE, BASE, CARG1 // Shrink top. + | b <3 + | + |8: // Corner case: need to grow stack for filling up results. + | // This can happen if: + | // - A C function grows the stack (a lot). + | // - The GC shrinks the stack in between. + | // - A return back from a lua_call() with (high) nresults adjustment. + | str BASE, L->top // Save current top held in BASE (yes). + | mov CARG1, L + | bl extern lj_state_growstack // (lua_State *L, int n) + | ldr BASE, L->top // Need the (realloced) L->top in BASE. + | ldrsw CARG2, SAVE_NRES + | b <2 + | + |->vm_unwind_c: // Unwind C stack, return from vm_pcall. + | // (void *cframe, int errcode) + | mov sp, CARG1 + | mov CRET1, CARG2 + |->vm_unwind_c_eh: // Landing pad for external unwinder. + | ldr L, SAVE_L + | mv_vmstate TMP0w, C + | ldr GL, L->glref + | st_vmstate TMP0w + | b ->vm_leave_unw + | + |->vm_unwind_ff: // Unwind C stack, return from ff pcall. + | // (void *cframe) + | and sp, CARG1, #CFRAME_RAWMASK + |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | ldr L, SAVE_L + | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 + | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 + | movn TISNIL, #0 + | mov RC, #16 // 2 results: false + error message. + | ldr BASE, L->base + | ldr GL, L->glref // Setup pointer to global state. + | mov_false TMP0 + | sub RA, BASE, #8 // Results start at BASE-8. + | ldr PC, [BASE, FRAME_PC] // Fetch PC of previous frame. + | str TMP0, [BASE, #-8] // Prepend false to error message. + | st_vmstate ST_INTERP + | b ->vm_returnc + | + |//----------------------------------------------------------------------- + |//-- Grow stack for calls ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_growstack_c: // Grow stack for C function. + | // CARG1 = L + | mov CARG2, #LUA_MINSTACK + | b >2 + | + |->vm_growstack_l: // Grow stack for Lua function. + | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC + | add RC, BASE, RC + | sub RA, RA, BASE + | mov CARG1, L + | stp BASE, RC, L->base + | add PC, PC, #4 // Must point after first instruction. + | lsr CARG2, RA, #3 + |2: + | // L->base = new base, L->top = top + | str PC, SAVE_PC + | bl extern lj_state_growstack // (lua_State *L, int n) + | ldp BASE, RC, L->base + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] + | sub NARGS8:RC, RC, BASE + | and LFUNC:CARG3, CARG3, #LJ_GCVMASK + | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC + | ins_callt // Just retry the call. + | + |//----------------------------------------------------------------------- + |//-- Entry points into the assembler VM --------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_resume: // Setup C frame and resume thread. + | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) + | saveregs + | mov L, CARG1 + | ldr GL, L->glref // Setup pointer to global state. + | mov BASE, CARG2 + | str L, SAVE_L + | mov PC, #FRAME_CP + | str wzr, SAVE_NRES + | add TMP0, sp, #CFRAME_RESUME + | ldrb TMP1w, L->status + | str wzr, SAVE_ERRF + | str L, SAVE_PC // Any value outside of bytecode is ok. + | str xzr, SAVE_CFRAME + | str TMP0, L->cframe + | cbz TMP1w, >3 + | + | // Resume after yield (like a return). + | str L, GL->cur_L + | mov RA, BASE + | ldp BASE, CARG1, L->base + | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 + | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 + | ldr PC, [BASE, FRAME_PC] + | strb wzr, L->status + | movn TISNIL, #0 + | sub RC, CARG1, BASE + | ands CARG1, PC, #FRAME_TYPE + | add RC, RC, #8 + | st_vmstate ST_INTERP + | str RCw, SAVE_MULTRES + | beq ->BC_RET_Z + | b ->vm_return + | + |->vm_pcall: // Setup protected C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) + | saveregs + | mov PC, #FRAME_CP + | str CARG4w, SAVE_ERRF + | b >1 + | + |->vm_call: // Setup C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1) + | saveregs + | mov PC, #FRAME_C + | + |1: // Entry point for vm_pcall above (PC = ftype). + | ldr RC, L:CARG1->cframe + | str CARG3w, SAVE_NRES + | mov L, CARG1 + | str CARG1, SAVE_L + | ldr GL, L->glref // Setup pointer to global state. + | mov BASE, CARG2 + | str CARG1, SAVE_PC // Any value outside of bytecode is ok. + | add TMP0, sp, #0 + | str RC, SAVE_CFRAME + | str TMP0, L->cframe // Add our C frame to cframe chain. + | + |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). + | str L, GL->cur_L + | ldp RB, CARG1, L->base // RB = old base (for vmeta_call). + | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 + | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 + | add PC, PC, BASE + | movn TISNIL, #0 + | sub PC, PC, RB // PC = frame delta + frame type + | sub NARGS8:RC, CARG1, BASE + | st_vmstate ST_INTERP + | + |->vm_call_dispatch: + | // RB = old base, BASE = new base, RC = nargs*8, PC = caller PC + | ldr CARG3, [BASE, FRAME_FUNC] + | checkfunc CARG3, ->vmeta_call + | + |->vm_call_dispatch_f: + | ins_call + | // BASE = new base, CARG3 = func, RC = nargs*8, PC = caller PC + | + |->vm_cpcall: // Setup protected C frame, call C. + | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) + | saveregs + | mov L, CARG1 + | ldr RA, L:CARG1->stack + | str CARG1, SAVE_L + | ldr GL, L->glref // Setup pointer to global state. + | ldr RB, L->top + | str CARG1, SAVE_PC // Any value outside of bytecode is ok. + | ldr RC, L->cframe + | sub RA, RA, RB // Compute -savestack(L, L->top). + | str RAw, SAVE_NRES // Neg. delta means cframe w/o frame. + | str wzr, SAVE_ERRF // No error function. + | add TMP0, sp, #0 + | str RC, SAVE_CFRAME + | str TMP0, L->cframe // Add our C frame to cframe chain. + | str L, GL->cur_L + | blr CARG4 // (lua_State *L, lua_CFunction func, void *ud) + | mov BASE, CRET1 + | mov PC, #FRAME_CP + | cbnz BASE, <3 // Else continue with the call. + | b ->vm_leave_cp // No base? Just remove C frame. + | + |//----------------------------------------------------------------------- + |//-- Metamethod handling ------------------------------------------------ + |//----------------------------------------------------------------------- + | + |//-- Continuation dispatch ---------------------------------------------- + | + |->cont_dispatch: + | // BASE = meta base, RA = resultptr, RC = (nresults+1)*8 + | ldr LFUNC:CARG3, [RB, FRAME_FUNC] + | ldr CARG1, [BASE, #-32] // Get continuation. + | mov CARG4, BASE + | mov BASE, RB // Restore caller BASE. + | and LFUNC:CARG3, CARG3, #LJ_GCVMASK + |.if FFI + | cmp CARG1, #1 + |.endif + | ldr PC, [CARG4, #-24] // Restore PC from [cont|PC]. + | add TMP0, RA, RC + | str TISNIL, [TMP0, #-8] // Ensure one valid arg. + |.if FFI + | bls >1 + |.endif + | ldr CARG3, LFUNC:CARG3->pc + | ldr KBASE, [CARG3, #PC2PROTO(k)] + | // BASE = base, RA = resultptr, CARG4 = meta base + | br CARG1 + | + |.if FFI + |1: + | beq ->cont_ffi_callback // cont = 1: return from FFI callback. + | // cont = 0: tailcall from C function. + | sub CARG4, CARG4, #32 + | sub RC, CARG4, BASE + | b ->vm_call_tail + |.endif + | + |->cont_cat: // RA = resultptr, CARG4 = meta base + | ldr INSw, [PC, #-4] + | sub CARG2, CARG4, #32 + | ldr TMP0, [RA] + | str BASE, L->base + | decode_RB RB, INS + | decode_RA RA, INS + | add TMP1, BASE, RB, lsl #3 + | subs TMP1, CARG2, TMP1 + | beq >1 + | str TMP0, [CARG2] + | lsr CARG3, TMP1, #3 + | b ->BC_CAT_Z + | + |1: + | str TMP0, [BASE, RA, lsl #3] + | b ->cont_nop + | + |//-- Table indexing metamethods ----------------------------------------- + | + |->vmeta_tgets1: + | movn CARG4, #~LJ_TSTR + | add CARG2, BASE, RB, lsl #3 + | add CARG4, STR:RC, CARG4, lsl #47 + | b >2 + | + |->vmeta_tgets: + | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48 + | str CARG2, GL->tmptv + | add CARG2, GL, #offsetof(global_State, tmptv) + |2: + | add CARG3, sp, TMPDofs + | str CARG4, TMPD + | b >1 + | + |->vmeta_tgetb: // RB = table, RC = index + | add RC, RC, TISNUM + | add CARG2, BASE, RB, lsl #3 + | add CARG3, sp, TMPDofs + | str RC, TMPD + | b >1 + | + |->vmeta_tgetv: // RB = table, RC = key + | add CARG2, BASE, RB, lsl #3 + | add CARG3, BASE, RC, lsl #3 + |1: + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) + | // Returns TValue * (finished) or NULL (metamethod). + | cbz CRET1, >3 + | ldr TMP0, [CRET1] + | str TMP0, [BASE, RA, lsl #3] + | ins_next + | + |3: // Call __index metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k + | sub TMP1, BASE, #FRAME_CONT + | ldr BASE, L->top + | mov NARGS8:RC, #16 // 2 args for func(t, k). + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. + | str PC, [BASE, #-24] // [cont|PC] + | sub PC, BASE, TMP1 + | and LFUNC:CARG3, CARG3, #LJ_GCVMASK + | b ->vm_call_dispatch_f + | + |->vmeta_tgetr: + | sxtw CARG2, TMP1w + | bl extern lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | mov TMP0, TISNIL + | cbz CRET1, ->BC_TGETR_Z + | ldr TMP0, [CRET1] + | b ->BC_TGETR_Z + | + |//----------------------------------------------------------------------- + | + |->vmeta_tsets1: + | movn CARG4, #~LJ_TSTR + | add CARG2, BASE, RB, lsl #3 + | add CARG4, STR:RC, CARG4, lsl #47 + | b >2 + | + |->vmeta_tsets: + | movk CARG2, #(LJ_TTAB>>1)&0xffff, lsl #48 + | str CARG2, GL->tmptv + | add CARG2, GL, #offsetof(global_State, tmptv) + |2: + | add CARG3, sp, TMPDofs + | str CARG4, TMPD + | b >1 + | + |->vmeta_tsetb: // RB = table, RC = index + | add RC, RC, TISNUM + | add CARG2, BASE, RB, lsl #3 + | add CARG3, sp, TMPDofs + | str RC, TMPD + | b >1 + | + |->vmeta_tsetv: + | add CARG2, BASE, RB, lsl #3 + | add CARG3, BASE, RC, lsl #3 + |1: + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) + | // Returns TValue * (finished) or NULL (metamethod). + | ldr TMP0, [BASE, RA, lsl #3] + | cbz CRET1, >3 + | // NOBARRIER: lj_meta_tset ensures the table is not black. + | str TMP0, [CRET1] + | ins_next + | + |3: // Call __newindex metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) + | sub TMP1, BASE, #FRAME_CONT + | ldr BASE, L->top + | mov NARGS8:RC, #24 // 3 args for func(t, k, v). + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. + | str TMP0, [BASE, #16] // Copy value to third argument. + | str PC, [BASE, #-24] // [cont|PC] + | sub PC, BASE, TMP1 + | and LFUNC:CARG3, CARG3, #LJ_GCVMASK + | b ->vm_call_dispatch_f + | + |->vmeta_tsetr: + | sxtw CARG3, TMP1w + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + | // Returns TValue *. + | b ->BC_TSETR_Z + | + |//-- Comparison metamethods --------------------------------------------- + | + |->vmeta_comp: + | add CARG2, BASE, RA, lsl #3 + | sub PC, PC, #4 + | add CARG3, BASE, RC, lsl #3 + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | uxtb CARG4w, INSw + | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) + | // Returns 0/1 or TValue * (metamethod). + |3: + | cmp CRET1, #1 + | bhi ->vmeta_binop + |4: + | ldrh RBw, [PC, # OFS_RD] + | add PC, PC, #4 + | add RB, PC, RB, lsl #2 + | sub RB, RB, #0x20000 + | csel PC, PC, RB, lo + |->cont_nop: + | ins_next + | + |->cont_ra: // RA = resultptr + | ldr INSw, [PC, #-4] + | ldr TMP0, [RA] + | decode_RA TMP1, INS + | str TMP0, [BASE, TMP1, lsl #3] + | b ->cont_nop + | + |->cont_condt: // RA = resultptr + | ldr TMP0, [RA] + | mov_true TMP1 + | cmp TMP1, TMP0 // Branch if result is true. + | b <4 + | + |->cont_condf: // RA = resultptr + | ldr TMP0, [RA] + | mov_false TMP1 + | cmp TMP0, TMP1 // Branch if result is false. + | b <4 + | + |->vmeta_equal: + | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. + | and TAB:CARG3, CARG3, #LJ_GCVMASK + | sub PC, PC, #4 + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) + | // Returns 0/1 or TValue * (metamethod). + | b <3 + | + |->vmeta_equal_cd: + |.if FFI + | sub PC, PC, #4 + | str BASE, L->base + | mov CARG1, L + | mov CARG2, INS + | str PC, SAVE_PC + | bl extern lj_meta_equal_cd // (lua_State *L, BCIns op) + | // Returns 0/1 or TValue * (metamethod). + | b <3 + |.endif + | + |->vmeta_istype: + | sub PC, PC, #4 + | str BASE, L->base + | mov CARG1, L + | mov CARG2, RA + | mov CARG3, RC + | str PC, SAVE_PC + | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) + | b ->cont_nop + | + |//-- Arithmetic metamethods --------------------------------------------- + | + |->vmeta_arith_vn: + | add CARG3, BASE, RB, lsl #3 + | add CARG4, KBASE, RC, lsl #3 + | b >1 + | + |->vmeta_arith_nv: + | add CARG4, BASE, RB, lsl #3 + | add CARG3, KBASE, RC, lsl #3 + | b >1 + | + |->vmeta_unm: + | add CARG3, BASE, RC, lsl #3 + | mov CARG4, CARG3 + | b >1 + | + |->vmeta_arith_vv: + | add CARG3, BASE, RB, lsl #3 + | add CARG4, BASE, RC, lsl #3 + |1: + | uxtb CARG5w, INSw + | add CARG2, BASE, RA, lsl #3 + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) + | // Returns NULL (finished) or TValue * (metamethod). + | cbz CRET1, ->cont_nop + | + | // Call metamethod for binary op. + |->vmeta_binop: + | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 + | sub TMP1, CRET1, BASE + | str PC, [CRET1, #-24] // [cont|PC] + | add PC, TMP1, #FRAME_CONT + | mov BASE, CRET1 + | mov NARGS8:RC, #16 // 2 args for func(o1, o2). + | b ->vm_call_dispatch + | + |->vmeta_len: + | add CARG2, BASE, RC, lsl #3 +#if LJ_52 + | mov TAB:RC, TAB:CARG1 // Save table (ignored for other types). +#endif + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_meta_len // (lua_State *L, TValue *o) + | // Returns NULL (retry) or TValue * (metamethod base). +#if LJ_52 + | cbnz CRET1, ->vmeta_binop // Binop call for compatibility. + | mov TAB:CARG1, TAB:RC + | b ->BC_LEN_Z +#else + | b ->vmeta_binop // Binop call for compatibility. +#endif + | + |//-- Call metamethod ---------------------------------------------------- + | + |->vmeta_call: // Resolve and call __call metamethod. + | // RB = old base, BASE = new base, RC = nargs*8 + | mov CARG1, L + | str RB, L->base // This is the callers base! + | sub CARG2, BASE, #16 + | str PC, SAVE_PC + | add CARG3, BASE, NARGS8:RC + | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Guaranteed to be a function here. + | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. + | and LFUNC:CARG3, CARG3, #LJ_GCVMASK + | ins_call + | + |->vmeta_callt: // Resolve __call for BC_CALLT. + | // BASE = old base, RA = new base, RC = nargs*8 + | mov CARG1, L + | str BASE, L->base + | sub CARG2, RA, #16 + | str PC, SAVE_PC + | add CARG3, RA, NARGS8:RC + | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + | ldr TMP1, [RA, FRAME_FUNC] // Guaranteed to be a function here. + | ldr PC, [BASE, FRAME_PC] + | add NARGS8:RC, NARGS8:RC, #8 // Got one more argument now. + | and LFUNC:CARG3, TMP1, #LJ_GCVMASK + | b ->BC_CALLT2_Z + | + |//-- Argument coercion for 'for' statement ------------------------------ + | + |->vmeta_for: + | mov CARG1, L + | str BASE, L->base + | mov CARG2, RA + | str PC, SAVE_PC + | bl extern lj_meta_for // (lua_State *L, TValue *base) + | ldr INSw, [PC, #-4] + |.if JIT + | uxtb TMP0w, INSw + |.endif + | decode_RA RA, INS + | decode_RD RC, INS + |.if JIT + | cmp TMP0, #BC_JFORI + | beq =>BC_JFORI + |.endif + | b =>BC_FORI + | + |//----------------------------------------------------------------------- + |//-- Fast functions ----------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro .ffunc, name + |->ff_ .. name: + |.endmacro + | + |.macro .ffunc_1, name + |->ff_ .. name: + | ldr CARG1, [BASE] + | cmp NARGS8:RC, #8 + | blo ->fff_fallback + |.endmacro + | + |.macro .ffunc_2, name + |->ff_ .. name: + | ldp CARG1, CARG2, [BASE] + | cmp NARGS8:RC, #16 + | blo ->fff_fallback + |.endmacro + | + |.macro .ffunc_n, name + | .ffunc name + | ldr CARG1, [BASE] + | cmp NARGS8:RC, #8 + | ldr FARG1, [BASE] + | blo ->fff_fallback + | checknum CARG1, ->fff_fallback + |.endmacro + | + |.macro .ffunc_nn, name + | .ffunc name + | ldp CARG1, CARG2, [BASE] + | cmp NARGS8:RC, #16 + | ldp FARG1, FARG2, [BASE] + | blo ->fff_fallback + | checknum CARG1, ->fff_fallback + | checknum CARG2, ->fff_fallback + |.endmacro + | + |// Inlined GC threshold check. Caveat: uses CARG1 and CARG2. + |.macro ffgccheck + | ldp CARG1, CARG2, GL->gc.total // Assumes threshold follows total. + | cmp CARG1, CARG2 + | blt >1 + | bl ->fff_gcstep + |1: + |.endmacro + | + |//-- Base library: checks ----------------------------------------------- + | + |.ffunc_1 assert + | ldr PC, [BASE, FRAME_PC] + | mov_false TMP1 + | cmp CARG1, TMP1 + | bhs ->fff_fallback + | str CARG1, [BASE, #-16] + | sub RB, BASE, #8 + | subs RA, NARGS8:RC, #8 + | add RC, NARGS8:RC, #8 // Compute (nresults+1)*8. + | cbz RA, ->fff_res // Done if exactly 1 argument. + |1: + | ldr CARG1, [RB, #16] + | sub RA, RA, #8 + | str CARG1, [RB], #8 + | cbnz RA, <1 + | b ->fff_res + | + |.ffunc_1 type + | mov TMP0, #~LJ_TISNUM + | asr ITYPE, CARG1, #47 + | cmn ITYPE, #~LJ_TISNUM + | csinv TMP1, TMP0, ITYPE, lo + | add TMP1, TMP1, #offsetof(GCfuncC, upvalue)/8 + | ldr CARG1, [CFUNC:CARG3, TMP1, lsl #3] + | b ->fff_restv + | + |//-- Base library: getters and setters --------------------------------- + | + |.ffunc_1 getmetatable + | asr ITYPE, CARG1, #47 + | cmn ITYPE, #-LJ_TTAB + | ccmn ITYPE, #-LJ_TUDATA, #4, ne + | and TAB:CARG1, CARG1, #LJ_GCVMASK + | bne >6 + |1: // Field metatable must be at same offset for GCtab and GCudata! + | ldr TAB:RB, TAB:CARG1->metatable + |2: + | mov CARG1, TISNIL + | ldr STR:RC, GL->gcroot[GCROOT_MMNAME+MM_metatable] + | cbz TAB:RB, ->fff_restv + | ldr TMP1w, TAB:RB->hmask + | ldr TMP2w, STR:RC->sid + | ldr NODE:CARG3, TAB:RB->node + | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask + | add TMP1, TMP1, TMP1, lsl #1 + | movn CARG4, #~LJ_TSTR + | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 + | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for. + |3: // Rearranged logic, because we expect _not_ to find the key. + | ldp CARG1, TMP0, NODE:CARG3->val + | ldr NODE:CARG3, NODE:CARG3->next + | cmp TMP0, CARG4 + | beq >5 + | cbnz NODE:CARG3, <3 + |4: + | mov CARG1, RB // Use metatable as default result. + | movk CARG1, #(LJ_TTAB>>1)&0xffff, lsl #48 + | b ->fff_restv + |5: + | cmp TMP0, TISNIL + | bne ->fff_restv + | b <4 + | + |6: + | movn TMP0, #~LJ_TISNUM + | cmp ITYPE, TMP0 + | csel ITYPE, ITYPE, TMP0, hs + | sub TMP1, GL, ITYPE, lsl #3 + | ldr TAB:RB, [TMP1, #offsetof(global_State, gcroot[GCROOT_BASEMT])-8] + | b <2 + | + |.ffunc_2 setmetatable + | // Fast path: no mt for table yet and not clearing the mt. + | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback + | ldr TAB:TMP0, TAB:TMP1->metatable + | asr ITYPE, CARG2, #47 + | ldrb TMP2w, TAB:TMP1->marked + | cmn ITYPE, #-LJ_TTAB + | and TAB:CARG2, CARG2, #LJ_GCVMASK + | ccmp TAB:TMP0, #0, #0, eq + | bne ->fff_fallback + | str TAB:CARG2, TAB:TMP1->metatable + | tbz TMP2w, #2, ->fff_restv // isblack(table) + | barrierback TAB:TMP1, TMP2w, TMP0 + | b ->fff_restv + | + |.ffunc rawget + | ldr CARG2, [BASE] + | cmp NARGS8:RC, #16 + | blo ->fff_fallback + | checktab CARG2, ->fff_fallback + | mov CARG1, L + | add CARG3, BASE, #8 + | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + | // Returns cTValue *. + | ldr CARG1, [CRET1] + | b ->fff_restv + | + |//-- Base library: conversions ------------------------------------------ + | + |.ffunc tonumber + | // Only handles the number case inline (without a base argument). + | ldr CARG1, [BASE] + | cmp NARGS8:RC, #8 + | bne ->fff_fallback + | checknumber CARG1, ->fff_fallback + | b ->fff_restv + | + |.ffunc_1 tostring + | // Only handles the string or number case inline. + | asr ITYPE, CARG1, #47 + | cmn ITYPE, #-LJ_TSTR + | // A __tostring method in the string base metatable is ignored. + | beq ->fff_restv + | // Handle numbers inline, unless a number base metatable is present. + | ldr TMP1, GL->gcroot[GCROOT_BASEMT_NUM] + | str BASE, L->base + | cmn ITYPE, #-LJ_TISNUM + | ccmp TMP1, #0, #0, ls + | str PC, SAVE_PC // Redundant (but a defined value). + | bne ->fff_fallback + | ffgccheck + | mov CARG1, L + | mov CARG2, BASE + | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) + | // Returns GCstr *. + | movn TMP1, #~LJ_TSTR + | ldr BASE, L->base + | add CARG1, CARG1, TMP1, lsl #47 + | b ->fff_restv + | + |//-- Base library: iterators ------------------------------------------- + | + |.ffunc_1 next + | checktp CARG1, LJ_TTAB, ->fff_fallback + | str TISNIL, [BASE, NARGS8:RC] // Set missing 2nd arg to nil. + | ldr PC, [BASE, FRAME_PC] + | add CARG2, BASE, #8 + | sub CARG3, BASE, #16 + | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // Returns 1=found, 0=end, -1=error. + | mov RC, #(2+1)*8 + | tbnz CRET1w, #31, ->fff_fallback // Invalid key. + | cbnz CRET1, ->fff_res // Found key/value. + | // End of traversal: return nil. + | str TISNIL, [BASE, #-16] + | b ->fff_res1 + | + |.ffunc_1 pairs + | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback +#if LJ_52 + | ldr TAB:CARG2, TAB:TMP1->metatable +#endif + | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0] + | ldr PC, [BASE, FRAME_PC] +#if LJ_52 + | cbnz TAB:CARG2, ->fff_fallback +#endif + | mov RC, #(3+1)*8 + | stp CARG1, TISNIL, [BASE, #-8] + | str CFUNC:CARG4, [BASE, #-16] + | b ->fff_res + | + |.ffunc_2 ipairs_aux + | checktab CARG1, ->fff_fallback + | checkint CARG2, ->fff_fallback + | ldr TMP1w, TAB:CARG1->asize + | ldr CARG3, TAB:CARG1->array + | ldr TMP0w, TAB:CARG1->hmask + | add CARG2w, CARG2w, #1 + | cmp CARG2w, TMP1w + | ldr PC, [BASE, FRAME_PC] + | add TMP2, CARG2, TISNUM + | mov RC, #(0+1)*8 + | str TMP2, [BASE, #-16] + | bhs >2 // Not in array part? + | ldr TMP0, [CARG3, CARG2, lsl #3] + |1: + | mov TMP1, #(2+1)*8 + | cmp TMP0, TISNIL + | str TMP0, [BASE, #-8] + | csel RC, RC, TMP1, eq + | b ->fff_res + |2: // Check for empty hash part first. Otherwise call C function. + | cbz TMP0w, ->fff_res + | bl extern lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | cbz CRET1, ->fff_res + | ldr TMP0, [CRET1] + | b <1 + | + |.ffunc_1 ipairs + | checktp TMP1, CARG1, LJ_TTAB, ->fff_fallback +#if LJ_52 + | ldr TAB:CARG2, TAB:TMP1->metatable +#endif + | ldr CFUNC:CARG4, CFUNC:CARG3->upvalue[0] + | ldr PC, [BASE, FRAME_PC] +#if LJ_52 + | cbnz TAB:CARG2, ->fff_fallback +#endif + | mov RC, #(3+1)*8 + | stp CARG1, TISNUM, [BASE, #-8] + | str CFUNC:CARG4, [BASE, #-16] + | b ->fff_res + | + |//-- Base library: catch errors ---------------------------------------- + | + |.ffunc pcall + | cmp NARGS8:RC, #8 + | ldrb TMP0w, GL->hookmask + | blo ->fff_fallback + | sub NARGS8:RC, NARGS8:RC, #8 + | mov RB, BASE + | add BASE, BASE, #16 + | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1 + | add PC, TMP0, #16+FRAME_PCALL + | beq ->vm_call_dispatch + |1: + | add TMP2, BASE, NARGS8:RC + |2: + | ldr TMP0, [TMP2, #-16] + | str TMP0, [TMP2, #-8]! + | cmp TMP2, BASE + | bne <2 + | b ->vm_call_dispatch + | + |.ffunc xpcall + | ldp CARG1, CARG2, [BASE] + | ldrb TMP0w, GL->hookmask + | subs NARGS8:TMP1, NARGS8:RC, #16 + | blo ->fff_fallback + | mov RB, BASE + | asr ITYPE, CARG2, #47 + | ubfx TMP0w, TMP0w, #HOOK_ACTIVE_SHIFT, #1 + | cmn ITYPE, #-LJ_TFUNC + | add PC, TMP0, #24+FRAME_PCALL + | bne ->fff_fallback // Traceback must be a function. + | mov NARGS8:RC, NARGS8:TMP1 + | add BASE, BASE, #24 + | stp CARG2, CARG1, [RB] // Swap function and traceback. + | cbz NARGS8:RC, ->vm_call_dispatch + | b <1 + | + |//-- Coroutine library -------------------------------------------------- + | + |.macro coroutine_resume_wrap, resume + |.if resume + |.ffunc_1 coroutine_resume + | checktp CARG1, LJ_TTHREAD, ->fff_fallback + |.else + |.ffunc coroutine_wrap_aux + | ldr L:CARG1, CFUNC:CARG3->upvalue[0].gcr + | and L:CARG1, CARG1, #LJ_GCVMASK + |.endif + | ldr PC, [BASE, FRAME_PC] + | str BASE, L->base + | ldp RB, CARG2, L:CARG1->base + | ldrb TMP1w, L:CARG1->status + | add TMP0, CARG2, TMP1 + | str PC, SAVE_PC + | cmp TMP0, RB + | beq ->fff_fallback + | cmp TMP1, #LUA_YIELD + | add TMP0, CARG2, #8 + | csel CARG2, CARG2, TMP0, hs + | ldr CARG4, L:CARG1->maxstack + | add CARG3, CARG2, NARGS8:RC + | ldr RB, L:CARG1->cframe + | ccmp CARG3, CARG4, #2, ls + | ccmp RB, #0, #2, ls + | bhi ->fff_fallback + |.if resume + | sub CARG3, CARG3, #8 // Keep resumed thread in stack for GC. + | add BASE, BASE, #8 + | sub NARGS8:RC, NARGS8:RC, #8 + |.endif + | str CARG3, L:CARG1->top + | str BASE, L->top + | cbz NARGS8:RC, >3 + |2: // Move args to coroutine. + | ldr TMP0, [BASE, RB] + | cmp RB, NARGS8:RC + | str TMP0, [CARG2, RB] + | add RB, RB, #8 + | bne <2 + |3: + | mov CARG3, #0 + | mov L:RA, L:CARG1 + | mov CARG4, #0 + | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) + | // Returns thread status. + |4: + | ldp CARG3, CARG4, L:RA->base + | cmp CRET1, #LUA_YIELD + | ldr BASE, L->base + | str L, GL->cur_L + | st_vmstate ST_INTERP + | bhi >8 + | sub RC, CARG4, CARG3 + | ldr CARG1, L->maxstack + | add CARG2, BASE, RC + | cbz RC, >6 // No results? + | cmp CARG2, CARG1 + | mov RB, #0 + | bhi >9 // Need to grow stack? + | + | sub CARG4, RC, #8 + | str CARG3, L:RA->top // Clear coroutine stack. + |5: // Move results from coroutine. + | ldr TMP0, [CARG3, RB] + | cmp RB, CARG4 + | str TMP0, [BASE, RB] + | add RB, RB, #8 + | bne <5 + |6: + |.if resume + | mov_true TMP1 + | add RC, RC, #16 + |7: + | str TMP1, [BASE, #-8] // Prepend true/false to results. + | sub RA, BASE, #8 + |.else + | mov RA, BASE + | add RC, RC, #8 + |.endif + | ands CARG1, PC, #FRAME_TYPE + | str PC, SAVE_PC + | str RCw, SAVE_MULTRES + | beq ->BC_RET_Z + | b ->vm_return + | + |8: // Coroutine returned with error (at co->top-1). + |.if resume + | ldr TMP0, [CARG4, #-8]! + | mov_false TMP1 + | mov RC, #(2+1)*8 + | str CARG4, L:RA->top // Remove error from coroutine stack. + | str TMP0, [BASE] // Copy error message. + | b <7 + |.else + | mov CARG1, L + | mov CARG2, L:RA + | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) + | // Never returns. + |.endif + | + |9: // Handle stack expansion on return from yield. + | mov CARG1, L + | lsr CARG2, RC, #3 + | bl extern lj_state_growstack // (lua_State *L, int n) + | mov CRET1, #0 + | b <4 + |.endmacro + | + | coroutine_resume_wrap 1 // coroutine.resume + | coroutine_resume_wrap 0 // coroutine.wrap + | + |.ffunc coroutine_yield + | ldr TMP0, L->cframe + | add TMP1, BASE, NARGS8:RC + | mov CRET1, #LUA_YIELD + | stp BASE, TMP1, L->base + | tbz TMP0, #0, ->fff_fallback + | str xzr, L->cframe + | strb CRET1w, L->status + | b ->vm_leave_unw + | + |//-- Math library ------------------------------------------------------- + | + |.macro math_round, func, round + | .ffunc math_ .. func + | ldr CARG1, [BASE] + | cmp NARGS8:RC, #8 + | ldr d0, [BASE] + | blo ->fff_fallback + | cmp TISNUMhi, CARG1, lsr #32 + | beq ->fff_restv + | blo ->fff_fallback + | round d0, d0 + | b ->fff_resn + |.endmacro + | + | math_round floor, frintm + | math_round ceil, frintp + | + |.ffunc_1 math_abs + | checknumber CARG1, ->fff_fallback + | and CARG1, CARG1, #U64x(7fffffff,ffffffff) + | bne ->fff_restv + | eor CARG2w, CARG1w, CARG1w, asr #31 + | movz CARG3, #0x41e0, lsl #48 // 2^31. + | subs CARG1w, CARG2w, CARG1w, asr #31 + | add CARG1, CARG1, TISNUM + | csel CARG1, CARG1, CARG3, pl + | // Fallthrough. + | + |->fff_restv: + | // CARG1 = TValue result. + | ldr PC, [BASE, FRAME_PC] + | str CARG1, [BASE, #-16] + |->fff_res1: + | // PC = return. + | mov RC, #(1+1)*8 + |->fff_res: + | // RC = (nresults+1)*8, PC = return. + | ands CARG1, PC, #FRAME_TYPE + | str RCw, SAVE_MULTRES + | sub RA, BASE, #16 + | bne ->vm_return + | ldr INSw, [PC, #-4] + | decode_RB RB, INS + |5: + | cmp RC, RB, lsl #3 // More results expected? + | blo >6 + | decode_RA TMP1, INS + | // Adjust BASE. KBASE is assumed to be set for the calling frame. + | sub BASE, RA, TMP1, lsl #3 + | ins_next + | + |6: // Fill up results with nil. + | add TMP1, RA, RC + | add RC, RC, #8 + | str TISNIL, [TMP1, #-8] + | b <5 + | + |.macro math_extern, func + | .ffunc_n math_ .. func + | bl extern func + | b ->fff_resn + |.endmacro + | + |.macro math_extern2, func + | .ffunc_nn math_ .. func + | bl extern func + | b ->fff_resn + |.endmacro + | + |.ffunc_n math_sqrt + | fsqrt d0, d0 + |->fff_resn: + | ldr PC, [BASE, FRAME_PC] + | str d0, [BASE, #-16] + | b ->fff_res1 + | + |.ffunc math_log + | ldr CARG1, [BASE] + | cmp NARGS8:RC, #8 + | ldr FARG1, [BASE] + | bne ->fff_fallback // Need exactly 1 argument. + | checknum CARG1, ->fff_fallback + | bl extern log + | b ->fff_resn + | + | math_extern log10 + | math_extern exp + | math_extern sin + | math_extern cos + | math_extern tan + | math_extern asin + | math_extern acos + | math_extern atan + | math_extern sinh + | math_extern cosh + | math_extern tanh + | math_extern2 pow + | math_extern2 atan2 + | math_extern2 fmod + | + |.ffunc_2 math_ldexp + | ldr FARG1, [BASE] + | checknum CARG1, ->fff_fallback + | checkint CARG2, ->fff_fallback + | sxtw CARG1, CARG2w + | bl extern ldexp // (double x, int exp) + | b ->fff_resn + | + |.ffunc_n math_frexp + | add CARG1, sp, TMPDofs + | bl extern frexp + | ldr CARG2w, TMPD + | ldr PC, [BASE, FRAME_PC] + | str d0, [BASE, #-16] + | mov RC, #(2+1)*8 + | add CARG2, CARG2, TISNUM + | str CARG2, [BASE, #-8] + | b ->fff_res + | + |.ffunc_n math_modf + | sub CARG1, BASE, #16 + | ldr PC, [BASE, FRAME_PC] + | bl extern modf + | mov RC, #(2+1)*8 + | str d0, [BASE, #-8] + | b ->fff_res + | + |.macro math_minmax, name, cond, fcond + | .ffunc_1 name + | add RB, BASE, RC + | add RA, BASE, #8 + | checkint CARG1, >4 + |1: // Handle integers. + | ldr CARG2, [RA] + | cmp RA, RB + | bhs ->fff_restv + | checkint CARG2, >3 + | cmp CARG1w, CARG2w + | add RA, RA, #8 + | csel CARG1, CARG2, CARG1, cond + | b <1 + |3: // Convert intermediate result to number and continue below. + | scvtf d0, CARG1w + | blo ->fff_fallback + | ldr d1, [RA] + | b >6 + | + |4: + | ldr d0, [BASE] + | blo ->fff_fallback + |5: // Handle numbers. + | ldr CARG2, [RA] + | ldr d1, [RA] + | cmp RA, RB + | bhs ->fff_resn + | checknum CARG2, >7 + |6: + | fcmp d0, d1 + | add RA, RA, #8 + | fcsel d0, d1, d0, fcond + | b <5 + |7: // Convert integer to number and continue above. + | scvtf d1, CARG2w + | blo ->fff_fallback + | b <6 + |.endmacro + | + | math_minmax math_min, gt, pl + | math_minmax math_max, lt, le + | + |//-- String library ----------------------------------------------------- + | + |.ffunc string_byte // Only handle the 1-arg case here. + | ldp PC, CARG1, [BASE, FRAME_PC] + | cmp NARGS8:RC, #8 + | asr ITYPE, CARG1, #47 + | ccmn ITYPE, #-LJ_TSTR, #0, eq + | and STR:CARG1, CARG1, #LJ_GCVMASK + | bne ->fff_fallback + | ldrb TMP0w, STR:CARG1[1] // Access is always ok (NUL at end). + | ldr CARG3w, STR:CARG1->len + | add TMP0, TMP0, TISNUM + | str TMP0, [BASE, #-16] + | mov RC, #(0+1)*8 + | cbz CARG3, ->fff_res + | b ->fff_res1 + | + |.ffunc string_char // Only handle the 1-arg case here. + | ffgccheck + | ldp PC, CARG1, [BASE, FRAME_PC] + | cmp CARG1w, #255 + | ccmp NARGS8:RC, #8, #0, ls // Need exactly 1 argument. + | bne ->fff_fallback + | checkint CARG1, ->fff_fallback + | mov CARG3, #1 + | // Point to the char inside the integer in the stack slot. + |.if ENDIAN_LE + | mov CARG2, BASE + |.else + | add CARG2, BASE, #7 + |.endif + |->fff_newstr: + | // CARG2 = str, CARG3 = len. + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_str_new // (lua_State *L, char *str, size_t l) + |->fff_resstr: + | // Returns GCstr *. + | ldr BASE, L->base + | movn TMP1, #~LJ_TSTR + | add CARG1, CARG1, TMP1, lsl #47 + | b ->fff_restv + | + |.ffunc string_sub + | ffgccheck + | ldr CARG1, [BASE] + | ldr CARG3, [BASE, #16] + | cmp NARGS8:RC, #16 + | movn RB, #0 + | beq >1 + | blo ->fff_fallback + | checkint CARG3, ->fff_fallback + | sxtw RB, CARG3w + |1: + | ldr CARG2, [BASE, #8] + | checkstr CARG1, ->fff_fallback + | ldr TMP1w, STR:CARG1->len + | checkint CARG2, ->fff_fallback + | sxtw CARG2, CARG2w + | // CARG1 = str, TMP1 = str->len, CARG2 = start, RB = end + | add TMP2, RB, TMP1 + | cmp RB, #0 + | add TMP0, CARG2, TMP1 + | csinc RB, RB, TMP2, ge // if (end < 0) end += len+1 + | cmp CARG2, #0 + | csinc CARG2, CARG2, TMP0, ge // if (start < 0) start += len+1 + | cmp RB, #0 + | csel RB, RB, xzr, ge // if (end < 0) end = 0 + | cmp CARG2, #1 + | csinc CARG2, CARG2, xzr, ge // if (start < 1) start = 1 + | cmp RB, TMP1 + | csel RB, RB, TMP1, le // if (end > len) end = len + | add CARG1, STR:CARG1, #sizeof(GCstr)-1 + | subs CARG3, RB, CARG2 // len = end - start + | add CARG2, CARG1, CARG2 + | add CARG3, CARG3, #1 // len += 1 + | bge ->fff_newstr + | add STR:CARG1, GL, #offsetof(global_State, strempty) + | movn TMP1, #~LJ_TSTR + | add CARG1, CARG1, TMP1, lsl #47 + | b ->fff_restv + | + |.macro ffstring_op, name + | .ffunc string_ .. name + | ffgccheck + | ldr CARG2, [BASE] + | cmp NARGS8:RC, #8 + | asr ITYPE, CARG2, #47 + | ccmn ITYPE, #-LJ_TSTR, #0, hs + | and STR:CARG2, CARG2, #LJ_GCVMASK + | bne ->fff_fallback + | ldr TMP0, GL->tmpbuf.b + | add SBUF:CARG1, GL, #offsetof(global_State, tmpbuf) + | str BASE, L->base + | str PC, SAVE_PC + | str L, GL->tmpbuf.L + | str TMP0, GL->tmpbuf.w + | bl extern lj_buf_putstr_ .. name + | bl extern lj_buf_tostr + | b ->fff_resstr + |.endmacro + | + |ffstring_op reverse + |ffstring_op lower + |ffstring_op upper + | + |//-- Bit library -------------------------------------------------------- + | + |// FP number to bit conversion for soft-float. Clobbers CARG1-CARG3 + |->vm_tobit_fb: + | bls ->fff_fallback + | add CARG2, CARG1, CARG1 + | mov CARG3, #1076 + | sub CARG3, CARG3, CARG2, lsr #53 + | cmp CARG3, #53 + | bhi >1 + | and CARG2, CARG2, #U64x(001fffff,ffffffff) + | orr CARG2, CARG2, #U64x(00200000,00000000) + | cmp CARG1, #0 + | lsr CARG2, CARG2, CARG3 + | cneg CARG1w, CARG2w, mi + | br lr + |1: + | mov CARG1w, #0 + | br lr + | + |.macro .ffunc_bit, name + | .ffunc_1 bit_..name + | adr lr, >1 + | checkint CARG1, ->vm_tobit_fb + |1: + |.endmacro + | + |.macro .ffunc_bit_op, name, ins + | .ffunc_bit name + | mov RA, #8 + | mov TMP0w, CARG1w + | adr lr, >2 + |1: + | ldr CARG1, [BASE, RA] + | cmp RA, NARGS8:RC + | add RA, RA, #8 + | bge >9 + | checkint CARG1, ->vm_tobit_fb + |2: + | ins TMP0w, TMP0w, CARG1w + | b <1 + |.endmacro + | + |.ffunc_bit_op band, and + |.ffunc_bit_op bor, orr + |.ffunc_bit_op bxor, eor + | + |.ffunc_bit tobit + | mov TMP0w, CARG1w + |9: // Label reused by .ffunc_bit_op users. + | add CARG1, TMP0, TISNUM + | b ->fff_restv + | + |.ffunc_bit bswap + | rev TMP0w, CARG1w + | add CARG1, TMP0, TISNUM + | b ->fff_restv + | + |.ffunc_bit bnot + | mvn TMP0w, CARG1w + | add CARG1, TMP0, TISNUM + | b ->fff_restv + | + |.macro .ffunc_bit_sh, name, ins, shmod + | .ffunc bit_..name + | ldp TMP0, CARG1, [BASE] + | cmp NARGS8:RC, #16 + | blo ->fff_fallback + | adr lr, >1 + | checkint CARG1, ->vm_tobit_fb + |1: + |.if shmod == 0 + | mov TMP1, CARG1 + |.else + | neg TMP1, CARG1 + |.endif + | mov CARG1, TMP0 + | adr lr, >2 + | checkint CARG1, ->vm_tobit_fb + |2: + | ins TMP0w, CARG1w, TMP1w + | add CARG1, TMP0, TISNUM + | b ->fff_restv + |.endmacro + | + |.ffunc_bit_sh lshift, lsl, 0 + |.ffunc_bit_sh rshift, lsr, 0 + |.ffunc_bit_sh arshift, asr, 0 + |.ffunc_bit_sh rol, ror, 1 + |.ffunc_bit_sh ror, ror, 0 + | + |//----------------------------------------------------------------------- + | + |->fff_fallback: // Call fast function fallback handler. + | // BASE = new base, RC = nargs*8 + | ldp CFUNC:CARG3, PC, [BASE, FRAME_FUNC] // Fallback may overwrite PC. + | ldr TMP2, L->maxstack + | add TMP1, BASE, NARGS8:RC + | stp BASE, TMP1, L->base + | and CFUNC:CARG3, CARG3, #LJ_GCVMASK + | add TMP1, TMP1, #8*LUA_MINSTACK + | ldr CARG3, CFUNC:CARG3->f + | str PC, SAVE_PC // Redundant (but a defined value). + | cmp TMP1, TMP2 + | mov CARG1, L + | bhi >5 // Need to grow stack. + | blr CARG3 // (lua_State *L) + | // Either throws an error, or recovers and returns -1, 0 or nresults+1. + | ldr BASE, L->base + | cmp CRET1w, #0 + | lsl RC, CRET1, #3 + | sub RA, BASE, #16 + | bgt ->fff_res // Returned nresults+1? + |1: // Returned 0 or -1: retry fast path. + | ldr CARG1, L->top + | ldr CFUNC:CARG3, [BASE, FRAME_FUNC] + | sub NARGS8:RC, CARG1, BASE + | bne ->vm_call_tail // Returned -1? + | and CFUNC:CARG3, CARG3, #LJ_GCVMASK + | ins_callt // Returned 0: retry fast path. + | + |// Reconstruct previous base for vmeta_call during tailcall. + |->vm_call_tail: + | ands TMP0, PC, #FRAME_TYPE + | and TMP1, PC, #~FRAME_TYPEP + | bne >3 + | ldrb RAw, [PC, #-4+OFS_RA] + | lsl RA, RA, #3 + | add TMP1, RA, #16 + |3: + | sub RB, BASE, TMP1 + | b ->vm_call_dispatch // Resolve again for tailcall. + | + |5: // Grow stack for fallback handler. + | mov CARG2, #LUA_MINSTACK + | bl extern lj_state_growstack // (lua_State *L, int n) + | ldr BASE, L->base + | cmp CARG1, CARG1 // Set zero-flag to force retry. + | b <1 + | + |->fff_gcstep: // Call GC step function. + | // BASE = new base, RC = nargs*8 + | add CARG2, BASE, NARGS8:RC // Calculate L->top. + | mov RA, lr + | stp BASE, CARG2, L->base + | str PC, SAVE_PC // Redundant (but a defined value). + | mov CARG1, L + | bl extern lj_gc_step // (lua_State *L) + | ldp BASE, CARG2, L->base + | ldr CFUNC:CARG3, [BASE, FRAME_FUNC] + | mov lr, RA // Help return address predictor. + | sub NARGS8:RC, CARG2, BASE // Calculate nargs*8. + | and CFUNC:CARG3, CARG3, #LJ_GCVMASK + | ret + | + |//----------------------------------------------------------------------- + |//-- Special dispatch targets ------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_record: // Dispatch target for recording phase. + |.if JIT + | ldrb CARG1w, GL->hookmask + | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. + | bne >5 + | // Decrement the hookcount for consistency, but always do the call. + | ldr CARG2w, GL->hookcount + | tst CARG1, #HOOK_ACTIVE + | bne >1 + | sub CARG2w, CARG2w, #1 + | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT + | beq >1 + | str CARG2w, GL->hookcount + | b >1 + |.endif + | + |->vm_rethook: // Dispatch target for return hooks. + | ldrb TMP2w, GL->hookmask + | tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active? + |5: // Re-dispatch to static ins. + | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC] + | br TMP0 + | + |->vm_inshook: // Dispatch target for instr/line hooks. + | ldrb TMP2w, GL->hookmask + | ldr TMP3w, GL->hookcount + | tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5 // Hook already active? + | tst TMP2w, #LUA_MASKLINE|LUA_MASKCOUNT + | beq <5 + | sub TMP3w, TMP3w, #1 + | str TMP3w, GL->hookcount + | cbz TMP3w, >1 + | tbz TMP2w, #LUA_HOOKLINE, <5 + |1: + | mov CARG1, L + | str BASE, L->base + | mov CARG2, PC + | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. + | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) + |3: + | ldr BASE, L->base + |4: // Re-dispatch to static ins. + | ldr INSw, [PC, #-4] + | add TMP1, GL, INS, uxtb #3 + | decode_RA RA, INS + | ldr TMP0, [TMP1, #GG_G2DISP+GG_DISP2STATIC] + | decode_RD RC, INS + | br TMP0 + | + |->cont_hook: // Continue from hook yield. + | ldr CARG1, [CARG4, #-40] + | add PC, PC, #4 + | str CARG1w, SAVE_MULTRES // Restore MULTRES for *M ins. + | b <4 + | + |->vm_hotloop: // Hot loop counter underflow. + |.if JIT + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L). + | add CARG1, GL, #GG_G2DISP+GG_DISP2J + | and LFUNC:CARG3, CARG3, #LJ_GCVMASK + | str PC, SAVE_PC + | ldr CARG3, LFUNC:CARG3->pc + | mov CARG2, PC + | str L, [GL, #GL_J(L)] + | ldrb CARG3w, [CARG3, #PC2PROTO(framesize)] + | str BASE, L->base + | add CARG3, BASE, CARG3, lsl #3 + | str CARG3, L->top + | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) + | b <3 + |.endif + | + |->vm_callhook: // Dispatch target for call hooks. + | mov CARG2, PC + |.if JIT + | b >1 + |.endif + | + |->vm_hotcall: // Hot call counter underflow. + |.if JIT + | orr CARG2, PC, #1 + |1: + |.endif + | add TMP1, BASE, NARGS8:RC + | str PC, SAVE_PC + | mov CARG1, L + | sub RA, RA, BASE + | stp BASE, TMP1, L->base + | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) + | // Returns ASMFunction. + | ldp BASE, TMP1, L->base + | str xzr, SAVE_PC // Invalidate for subsequent line hook. + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] + | add RA, BASE, RA + | sub NARGS8:RC, TMP1, BASE + | ldr INSw, [PC, #-4] + | and LFUNC:CARG3, CARG3, #LJ_GCVMASK + | br CRET1 + | + |->cont_stitch: // Trace stitching. + |.if JIT + | // RA = resultptr, CARG4 = meta base + | ldr RBw, SAVE_MULTRES + | ldr INSw, [PC, #-4] + | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. + | subs RB, RB, #8 + | decode_RA RC, INS // Call base. + | and CARG3, CARG3, #LJ_GCVMASK + | beq >2 + |1: // Move results down. + | ldr CARG1, [RA] + | add RA, RA, #8 + | subs RB, RB, #8 + | str CARG1, [BASE, RC, lsl #3] + | add RC, RC, #1 + | bne <1 + |2: + | decode_RA RA, INS + | decode_RB RB, INS + | add RA, RA, RB + |3: + | cmp RA, RC + | bhi >9 // More results wanted? + | + | ldrh RAw, TRACE:CARG3->traceno + | ldrh RCw, TRACE:CARG3->link + | cmp RCw, RAw + | beq ->cont_nop // Blacklisted. + | cmp RCw, #0 + | bne =>BC_JLOOP // Jump to stitched trace. + | + | // Stitch a new trace to the previous trace. + | mov CARG1, #GL_J(exitno) + | str RAw, [GL, CARG1] + | mov CARG1, #GL_J(L) + | str L, [GL, CARG1] + | str BASE, L->base + | add CARG1, GL, #GG_G2J + | mov CARG2, PC + | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) + | ldr BASE, L->base + | b ->cont_nop + | + |9: // Fill up results with nil. + | str TISNIL, [BASE, RC, lsl #3] + | add RC, RC, #1 + | b <3 + |.endif + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | mov CARG1, L + | str BASE, L->base + | mov CARG2, PC + | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | ldr BASE, L->base + | sub PC, PC, #4 + | b ->cont_nop +#endif + | + |//----------------------------------------------------------------------- + |//-- Trace exit handler ------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro savex_, a, b + | stp d..a, d..b, [sp, #a*8] + | stp x..a, x..b, [sp, #32*8+a*8] + |.endmacro + | + |->vm_exit_handler: + |.if JIT + | sub sp, sp, #(64*8) + | savex_, 0, 1 + | savex_, 2, 3 + | savex_, 4, 5 + | savex_, 6, 7 + | savex_, 8, 9 + | savex_, 10, 11 + | savex_, 12, 13 + | savex_, 14, 15 + | savex_, 16, 17 + | savex_, 18, 19 + | savex_, 20, 21 + | savex_, 22, 23 + | savex_, 24, 25 + | savex_, 26, 27 + | savex_, 28, 29 + | stp d30, d31, [sp, #30*8] + | ldr CARG1, [sp, #64*8] // Load original value of lr. + | add CARG3, sp, #64*8 // Recompute original value of sp. + | mv_vmstate CARG4w, EXIT + | stp xzr, CARG3, [sp, #62*8] // Store 0/sp in RID_LR/RID_SP. + | sub CARG1, CARG1, lr + | ldr L, GL->cur_L + | lsr CARG1, CARG1, #2 + | ldr BASE, GL->jit_base + | sub CARG1, CARG1, #2 + | ldr CARG2w, [lr] // Load trace number. + | st_vmstate CARG4w + |.if ENDIAN_BE + | rev32 CARG2, CARG2 + |.endif + | str BASE, L->base + | ubfx CARG2w, CARG2w, #5, #16 + | str CARG1w, [GL, #GL_J(exitno)] + | str CARG2w, [GL, #GL_J(parent)] + | str L, [GL, #GL_J(L)] + | str xzr, GL->jit_base + | add CARG1, GL, #GG_G2J + | mov CARG2, sp + | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) + | // Returns MULTRES (unscaled) or negated error code. + | ldr CARG2, L->cframe + | ldr BASE, L->base + | and sp, CARG2, #CFRAME_RAWMASK + | ldr PC, SAVE_PC // Get SAVE_PC. + | str L, SAVE_L // Set SAVE_L (on-trace resume/yield). + | b >1 + |.endif + | + |->vm_exit_interp: + | // CARG1 = MULTRES or negated error code, BASE, PC and GL set. + |.if JIT + | ldr L, SAVE_L + |1: + | cmp CARG1w, #0 + | blt >9 // Check for error from exit. + | lsl RC, CARG1, #3 + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 + | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 + | movn TISNIL, #0 + | and LFUNC:CARG2, CARG2, #LJ_GCVMASK + | str RCw, SAVE_MULTRES + | str BASE, L->base + | ldr CARG2, LFUNC:CARG2->pc + | str xzr, GL->jit_base + | mv_vmstate CARG4w, INTERP + | ldr KBASE, [CARG2, #PC2PROTO(k)] + | // Modified copy of ins_next which handles function header dispatch, too. + | ldrb RBw, [PC, # OFS_OP] + | ldr INSw, [PC], #4 + | st_vmstate CARG4w + | cmp RBw, #BC_FUNCC+2 // Fast function? + | add TMP1, GL, INS, uxtb #3 + | bhs >4 + |2: + | cmp RBw, #BC_FUNCF // Function header? + | add TMP0, GL, RB, uxtb #3 + | ldr RB, [TMP0, #GG_G2DISP] + | decode_RA RA, INS + | lsr TMP0, INS, #16 + | csel RC, TMP0, RC, lo + | blo >5 + | ldr CARG3, [BASE, FRAME_FUNC] + | sub RC, RC, #8 + | add RA, BASE, RA, lsl #3 // Yes: RA = BASE+framesize*8, RC = nargs*8 + | and LFUNC:CARG3, CARG3, #LJ_GCVMASK + |5: + | br RB + | + |4: // Check frame below fast function. + | ldr CARG1, [BASE, FRAME_PC] + | ands CARG2, CARG1, #FRAME_TYPE + | bne <2 // Trace stitching continuation? + | // Otherwise set KBASE for Lua function below fast function. + | ldr CARG3w, [CARG1, #-4] + | decode_RA CARG1, CARG3 + | sub CARG2, BASE, CARG1, lsl #3 + | ldr LFUNC:CARG3, [CARG2, #-32] + | and LFUNC:CARG3, CARG3, #LJ_GCVMASK + | ldr CARG3, LFUNC:CARG3->pc + | ldr KBASE, [CARG3, #PC2PROTO(k)] + | b <2 + | + |9: // Rethrow error from the right C frame. + | neg CARG2w, CARG1w + | mov CARG1, L + | bl extern lj_err_trace // (lua_State *L, int errcode) + |.endif + | + |//----------------------------------------------------------------------- + |//-- Math helper functions ---------------------------------------------- + |//----------------------------------------------------------------------- + | + | // int lj_vm_modi(int dividend, int divisor); + |->vm_modi: + | eor CARG4w, CARG1w, CARG2w + | cmp CARG4w, #0 + | eor CARG3w, CARG1w, CARG1w, asr #31 + | eor CARG4w, CARG2w, CARG2w, asr #31 + | sub CARG3w, CARG3w, CARG1w, asr #31 + | sub CARG4w, CARG4w, CARG2w, asr #31 + | udiv CARG1w, CARG3w, CARG4w + | msub CARG1w, CARG1w, CARG4w, CARG3w + | ccmp CARG1w, #0, #4, mi + | sub CARG3w, CARG1w, CARG4w + | csel CARG1w, CARG1w, CARG3w, eq + | eor CARG3w, CARG1w, CARG2w + | cmp CARG3w, #0 + | cneg CARG1w, CARG1w, mi + | ret + | + |//----------------------------------------------------------------------- + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_RES, CARG1 + |.define NEXT_IDX, CARG2w + |.define NEXT_LIM, CARG3w + |.define NEXT_TMP0, TMP0 + |.define NEXT_TMP0w, TMP0w + |.define NEXT_TMP1, TMP1 + |.define NEXT_TMP1w, TMP1w + |.define NEXT_RES_PTR, sp + |.define NEXT_RES_VAL, [sp] + |.define NEXT_RES_KEY, [sp, #8] + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in CRET2w. + |->vm_next: + |.if JIT + | ldr NEXT_LIM, NEXT_TAB->asize + | ldr NEXT_TMP1, NEXT_TAB->array + |1: // Traverse array part. + | subs NEXT_TMP0w, NEXT_IDX, NEXT_LIM + | bhs >5 // Index points after array part? + | ldr NEXT_TMP0, [NEXT_TMP1, NEXT_IDX, uxtw #3] + | cmn NEXT_TMP0, #-LJ_TNIL + | cinc NEXT_IDX, NEXT_IDX, eq + | beq <1 // Skip holes in array part. + | str NEXT_TMP0, NEXT_RES_VAL + | movz NEXT_TMP0w, #(LJ_TISNUM>>1)&0xffff, lsl #16 + | stp NEXT_IDX, NEXT_TMP0w, NEXT_RES_KEY + | add NEXT_IDX, NEXT_IDX, #1 + | mov NEXT_RES, NEXT_RES_PTR + |4: + | ret + | + |5: // Traverse hash part. + | ldr NEXT_TMP1w, NEXT_TAB->hmask + | ldr NODE:NEXT_RES, NEXT_TAB->node + | add NEXT_TMP0w, NEXT_TMP0w, NEXT_TMP0w, lsl #1 + | add NEXT_LIM, NEXT_LIM, NEXT_TMP1w + | add NODE:NEXT_RES, NODE:NEXT_RES, NEXT_TMP0w, uxtw #3 + |6: + | cmp NEXT_IDX, NEXT_LIM + | bhi >9 + | ldr NEXT_TMP0, NODE:NEXT_RES->val + | cmn NEXT_TMP0, #-LJ_TNIL + | add NEXT_IDX, NEXT_IDX, #1 + | bne <4 + | // Skip holes in hash part. + | add NODE:NEXT_RES, NODE:NEXT_RES, #sizeof(Node) + | b <6 + | + |9: // End of iteration. Set the key to nil (not the value). + | movn NEXT_TMP0, #0 + | str NEXT_TMP0, NEXT_RES_KEY + | mov NEXT_RES, NEXT_RES_PTR + | ret + |.endif + | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Handler for callback functions. + |// Saveregs already performed. Callback slot number in [sp], g in r12. + |->vm_ffi_callback: + |.if FFI + |.type CTSTATE, CTState, PC + | saveregs + | ldr CTSTATE, GL:x10->ctype_state + | mov GL, x10 + | add x10, sp, # CFRAME_SPACE + | str w9, CTSTATE->cb.slot + | stp x0, x1, CTSTATE->cb.gpr[0] + | stp d0, d1, CTSTATE->cb.fpr[0] + | stp x2, x3, CTSTATE->cb.gpr[2] + | stp d2, d3, CTSTATE->cb.fpr[2] + | stp x4, x5, CTSTATE->cb.gpr[4] + | stp d4, d5, CTSTATE->cb.fpr[4] + | stp x6, x7, CTSTATE->cb.gpr[6] + | stp d6, d7, CTSTATE->cb.fpr[6] + | str x10, CTSTATE->cb.stack + | mov CARG1, CTSTATE + | str CTSTATE, SAVE_PC // Any value outside of bytecode is ok. + | mov CARG2, sp + | bl extern lj_ccallback_enter // (CTState *cts, void *cf) + | // Returns lua_State *. + | ldp BASE, RC, L:CRET1->base + | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 + | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 + | movn TISNIL, #0 + | mov L, CRET1 + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] + | sub RC, RC, BASE + | st_vmstate ST_INTERP + | and LFUNC:CARG3, CARG3, #LJ_GCVMASK + | ins_callt + |.endif + | + |->cont_ffi_callback: // Return from FFI callback. + |.if FFI + | ldr CTSTATE, GL->ctype_state + | stp BASE, CARG4, L->base + | str L, CTSTATE->L + | mov CARG1, CTSTATE + | mov CARG2, RA + | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) + | ldp x0, x1, CTSTATE->cb.gpr[0] + | ldp d0, d1, CTSTATE->cb.fpr[0] + | b ->vm_leave_unw + |.endif + | + |->vm_ffi_call: // Call C function via FFI. + | // Caveat: needs special frame unwinding, see below. + |.if FFI + | .type CCSTATE, CCallState, x19 + | stp x20, CCSTATE, [sp, #-32]! + | stp fp, lr, [sp, #16] + | add fp, sp, #16 + | mov CCSTATE, x0 + | ldr TMP0w, CCSTATE:x0->spadj + | ldrb TMP1w, CCSTATE->nsp + | add TMP2, CCSTATE, #offsetof(CCallState, stack) + | subs TMP1, TMP1, #1 + | ldr TMP3, CCSTATE->func + | sub sp, sp, TMP0 + | bmi >2 + |1: // Copy stack slots + | ldr TMP0, [TMP2, TMP1, lsl #3] + | str TMP0, [sp, TMP1, lsl #3] + | subs TMP1, TMP1, #1 + | bpl <1 + |2: + | ldp x0, x1, CCSTATE->gpr[0] + | ldp d0, d1, CCSTATE->fpr[0] + | ldp x2, x3, CCSTATE->gpr[2] + | ldp d2, d3, CCSTATE->fpr[2] + | ldp x4, x5, CCSTATE->gpr[4] + | ldp d4, d5, CCSTATE->fpr[4] + | ldp x6, x7, CCSTATE->gpr[6] + | ldp d6, d7, CCSTATE->fpr[6] + | ldr x8, CCSTATE->retp + | blr TMP3 + | sub sp, fp, #16 + | stp x0, x1, CCSTATE->gpr[0] + | stp d0, d1, CCSTATE->fpr[0] + | stp d2, d3, CCSTATE->fpr[2] + | ldp fp, lr, [sp, #16] + | ldp x20, CCSTATE, [sp], #32 + | ret + |.endif + |// Note: vm_ffi_call must be the last function in this object file! + | + |//----------------------------------------------------------------------- +} + +/* Generate the code for a single instruction. */ +static void build_ins(BuildCtx *ctx, BCOp op, int defop) +{ + int vk = 0; + |=>defop: + + switch (op) { + + /* -- Comparison ops ---------------------------------------------------- */ + + /* Remember: all ops branch for a true comparison, fall through otherwise. */ + + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + | // RA = src1, RC = src2, JMP with RC = target + | ldr CARG1, [BASE, RA, lsl #3] + | ldrh RBw, [PC, # OFS_RD] + | ldr CARG2, [BASE, RC, lsl #3] + | add PC, PC, #4 + | add RB, PC, RB, lsl #2 + | sub RB, RB, #0x20000 + | checkint CARG1, >3 + | checkint CARG2, >4 + | cmp CARG1w, CARG2w + if (op == BC_ISLT) { + | csel PC, RB, PC, lt + } else if (op == BC_ISGE) { + | csel PC, RB, PC, ge + } else if (op == BC_ISLE) { + | csel PC, RB, PC, le + } else { + | csel PC, RB, PC, gt + } + |1: + | ins_next + | + |3: // RA not int. + | ldr FARG1, [BASE, RA, lsl #3] + | blo ->vmeta_comp + | ldr FARG2, [BASE, RC, lsl #3] + | cmp TISNUMhi, CARG2, lsr #32 + | bhi >5 + | bne ->vmeta_comp + | // RA number, RC int. + | scvtf FARG2, CARG2w + | b >5 + | + |4: // RA int, RC not int + | ldr FARG2, [BASE, RC, lsl #3] + | blo ->vmeta_comp + | // RA int, RC number. + | scvtf FARG1, CARG1w + | + |5: // RA number, RC number + | fcmp FARG1, FARG2 + | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. + if (op == BC_ISLT) { + | csel PC, RB, PC, lo + } else if (op == BC_ISGE) { + | csel PC, RB, PC, hs + } else if (op == BC_ISLE) { + | csel PC, RB, PC, ls + } else { + | csel PC, RB, PC, hi + } + | b <1 + break; + + case BC_ISEQV: case BC_ISNEV: + vk = op == BC_ISEQV; + | // RA = src1, RC = src2, JMP with RC = target + | ldr CARG1, [BASE, RA, lsl #3] + | add RC, BASE, RC, lsl #3 + | ldrh RBw, [PC, # OFS_RD] + | ldr CARG3, [RC] + | add PC, PC, #4 + | add RB, PC, RB, lsl #2 + | sub RB, RB, #0x20000 + | asr ITYPE, CARG3, #47 + | cmn ITYPE, #-LJ_TISNUM + if (vk) { + | bls ->BC_ISEQN_Z + } else { + | bls ->BC_ISNEN_Z + } + | // RC is not a number. + | asr TMP0, CARG1, #47 + |.if FFI + | // Check if RC or RA is a cdata. + | cmn ITYPE, #-LJ_TCDATA + | ccmn TMP0, #-LJ_TCDATA, #4, ne + | beq ->vmeta_equal_cd + |.endif + | cmp CARG1, CARG3 + | bne >2 + | // Tag and value are equal. + if (vk) { + |->BC_ISEQV_Z: + | mov PC, RB // Perform branch. + } + |1: + | ins_next + | + |2: // Check if the tags are the same and it's a table or userdata. + | cmp ITYPE, TMP0 + | ccmn ITYPE, #-LJ_TISTABUD, #2, eq + if (vk) { + | bhi <1 + } else { + | bhi ->BC_ISEQV_Z // Reuse code from opposite instruction. + } + | // Different tables or userdatas. Need to check __eq metamethod. + | // Field metatable must be at same offset for GCtab and GCudata! + | and TAB:CARG2, CARG1, #LJ_GCVMASK + | ldr TAB:TMP2, TAB:CARG2->metatable + if (vk) { + | cbz TAB:TMP2, <1 // No metatable? + | ldrb TMP1w, TAB:TMP2->nomm + | mov CARG4, #0 // ne = 0 + | tbnz TMP1w, #MM_eq, <1 // 'no __eq' flag set: done. + } else { + | cbz TAB:TMP2, ->BC_ISEQV_Z // No metatable? + | ldrb TMP1w, TAB:TMP2->nomm + | mov CARG4, #1 // ne = 1. + | tbnz TMP1w, #MM_eq, ->BC_ISEQV_Z // 'no __eq' flag set: done. + } + | b ->vmeta_equal + break; + + case BC_ISEQS: case BC_ISNES: + vk = op == BC_ISEQS; + | // RA = src, RC = str_const (~), JMP with RC = target + | ldr CARG1, [BASE, RA, lsl #3] + | mvn RC, RC + | ldrh RBw, [PC, # OFS_RD] + | ldr CARG2, [KBASE, RC, lsl #3] + | add PC, PC, #4 + | movn TMP0, #~LJ_TSTR + |.if FFI + | asr ITYPE, CARG1, #47 + |.endif + | add RB, PC, RB, lsl #2 + | add CARG2, CARG2, TMP0, lsl #47 + | sub RB, RB, #0x20000 + |.if FFI + | cmn ITYPE, #-LJ_TCDATA + | beq ->vmeta_equal_cd + |.endif + | cmp CARG1, CARG2 + if (vk) { + | csel PC, RB, PC, eq + } else { + | csel PC, RB, PC, ne + } + | ins_next + break; + + case BC_ISEQN: case BC_ISNEN: + vk = op == BC_ISEQN; + | // RA = src, RC = num_const (~), JMP with RC = target + | ldr CARG1, [BASE, RA, lsl #3] + | add RC, KBASE, RC, lsl #3 + | ldrh RBw, [PC, # OFS_RD] + | ldr CARG3, [RC] + | add PC, PC, #4 + | add RB, PC, RB, lsl #2 + | sub RB, RB, #0x20000 + if (vk) { + |->BC_ISEQN_Z: + } else { + |->BC_ISNEN_Z: + } + | checkint CARG1, >4 + | checkint CARG3, >6 + | cmp CARG1w, CARG3w + |1: + if (vk) { + | csel PC, RB, PC, eq + |2: + } else { + |2: + | csel PC, RB, PC, ne + } + |3: + | ins_next + | + |4: // RA not int. + |.if FFI + | blo >7 + |.else + | blo <2 + |.endif + | ldr FARG1, [BASE, RA, lsl #3] + | ldr FARG2, [RC] + | cmp TISNUMhi, CARG3, lsr #32 + | bne >5 + | // RA number, RC int. + | scvtf FARG2, CARG3w + |5: + | // RA number, RC number. + | fcmp FARG1, FARG2 + | b <1 + | + |6: // RA int, RC number + | ldr FARG2, [RC] + | scvtf FARG1, CARG1w + | fcmp FARG1, FARG2 + | b <1 + | + |.if FFI + |7: + | asr ITYPE, CARG1, #47 + | cmn ITYPE, #-LJ_TCDATA + | bne <2 + | b ->vmeta_equal_cd + |.endif + break; + + case BC_ISEQP: case BC_ISNEP: + vk = op == BC_ISEQP; + | // RA = src, RC = primitive_type (~), JMP with RC = target + | ldr TMP0, [BASE, RA, lsl #3] + | ldrh RBw, [PC, # OFS_RD] + | add PC, PC, #4 + | add RC, RC, #1 + | add RB, PC, RB, lsl #2 + |.if FFI + | asr ITYPE, TMP0, #47 + | cmn ITYPE, #-LJ_TCDATA + | beq ->vmeta_equal_cd + | cmn RC, ITYPE + |.else + | cmn RC, TMP0, asr #47 + |.endif + | sub RB, RB, #0x20000 + if (vk) { + | csel PC, RB, PC, eq + } else { + | csel PC, RB, PC, ne + } + | ins_next + break; + + /* -- Unary test and copy ops ------------------------------------------- */ + + case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: + | // RA = dst or unused, RC = src, JMP with RC = target + | ldrh RBw, [PC, # OFS_RD] + | ldr TMP0, [BASE, RC, lsl #3] + | add PC, PC, #4 + | mov_false TMP1 + | add RB, PC, RB, lsl #2 + | cmp TMP0, TMP1 + | sub RB, RB, #0x20000 + if (op == BC_ISTC || op == BC_IST) { + if (op == BC_ISTC) { + | csel RA, RA, RC, lo + } + | csel PC, RB, PC, lo + } else { + if (op == BC_ISFC) { + | csel RA, RA, RC, hs + } + | csel PC, RB, PC, hs + } + if (op == BC_ISTC || op == BC_ISFC) { + | str TMP0, [BASE, RA, lsl #3] + } + | ins_next + break; + + case BC_ISTYPE: + | // RA = src, RC = -type + | ldr TMP0, [BASE, RA, lsl #3] + | cmn RC, TMP0, asr #47 + | bne ->vmeta_istype + | ins_next + break; + case BC_ISNUM: + | // RA = src, RC = -(TISNUM-1) + | ldr TMP0, [BASE, RA] + | checknum TMP0, ->vmeta_istype + | ins_next + break; + + /* -- Unary ops --------------------------------------------------------- */ + + case BC_MOV: + | // RA = dst, RC = src + | ldr TMP0, [BASE, RC, lsl #3] + | str TMP0, [BASE, RA, lsl #3] + | ins_next + break; + case BC_NOT: + | // RA = dst, RC = src + | ldr TMP0, [BASE, RC, lsl #3] + | mov_false TMP1 + | mov_true TMP2 + | cmp TMP0, TMP1 + | csel TMP0, TMP1, TMP2, lo + | str TMP0, [BASE, RA, lsl #3] + | ins_next + break; + case BC_UNM: + | // RA = dst, RC = src + | ldr TMP0, [BASE, RC, lsl #3] + | asr ITYPE, TMP0, #47 + | cmn ITYPE, #-LJ_TISNUM + | bhi ->vmeta_unm + | eor TMP0, TMP0, #U64x(80000000,00000000) + | bne >5 + | negs TMP0w, TMP0w + | movz CARG3, #0x41e0, lsl #48 // 2^31. + | add TMP0, TMP0, TISNUM + | csel TMP0, TMP0, CARG3, vc + |5: + | str TMP0, [BASE, RA, lsl #3] + | ins_next + break; + case BC_LEN: + | // RA = dst, RC = src + | ldr CARG1, [BASE, RC, lsl #3] + | asr ITYPE, CARG1, #47 + | cmn ITYPE, #-LJ_TSTR + | and CARG1, CARG1, #LJ_GCVMASK + | bne >2 + | ldr CARG1w, STR:CARG1->len + |1: + | add CARG1, CARG1, TISNUM + | str CARG1, [BASE, RA, lsl #3] + | ins_next + | + |2: + | cmn ITYPE, #-LJ_TTAB + | bne ->vmeta_len +#if LJ_52 + | ldr TAB:CARG2, TAB:CARG1->metatable + | cbnz TAB:CARG2, >9 + |3: +#endif + |->BC_LEN_Z: + | bl extern lj_tab_len // (GCtab *t) + | // Returns uint32_t (but less than 2^31). + | b <1 + | +#if LJ_52 + |9: + | ldrb TMP1w, TAB:CARG2->nomm + | tbnz TMP1w, #MM_len, <3 // 'no __len' flag set: done. + | b ->vmeta_len +#endif + break; + + /* -- Binary ops -------------------------------------------------------- */ + + |.macro ins_arithcheck_int, target + | checkint CARG1, target + | checkint CARG2, target + |.endmacro + | + |.macro ins_arithcheck_num, target + | checknum CARG1, target + | checknum CARG2, target + |.endmacro + | + |.macro ins_arithcheck_nzdiv, target + | cbz CARG2w, target + |.endmacro + | + |.macro ins_arithhead + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||if (vk == 1) { + | and RC, RC, #255 + | decode_RB RB, INS + ||} else { + | decode_RB RB, INS + | and RC, RC, #255 + ||} + |.endmacro + | + |.macro ins_arithload, reg1, reg2 + | // RA = dst, RB = src1, RC = src2 | num_const + ||switch (vk) { + ||case 0: + | ldr reg1, [BASE, RB, lsl #3] + | ldr reg2, [KBASE, RC, lsl #3] + || break; + ||case 1: + | ldr reg1, [KBASE, RC, lsl #3] + | ldr reg2, [BASE, RB, lsl #3] + || break; + ||default: + | ldr reg1, [BASE, RB, lsl #3] + | ldr reg2, [BASE, RC, lsl #3] + || break; + ||} + |.endmacro + | + |.macro ins_arithfallback, ins + ||switch (vk) { + ||case 0: + | ins ->vmeta_arith_vn + || break; + ||case 1: + | ins ->vmeta_arith_nv + || break; + ||default: + | ins ->vmeta_arith_vv + || break; + ||} + |.endmacro + | + |.macro ins_arithmod, res, reg1, reg2 + | fdiv d2, reg1, reg2 + | frintm d2, d2 + | fmsub res, d2, reg2, reg1 + |.endmacro + | + |.macro ins_arithdn, intins, fpins + | ins_arithhead + | ins_arithload CARG1, CARG2 + | ins_arithcheck_int >5 + |.if "intins" == "smull" + | smull CARG1, CARG1w, CARG2w + | cmp CARG1, CARG1, sxtw + | mov CARG1w, CARG1w + | ins_arithfallback bne + |.elif "intins" == "ins_arithmodi" + | ins_arithfallback ins_arithcheck_nzdiv + | bl ->vm_modi + |.else + | intins CARG1w, CARG1w, CARG2w + | ins_arithfallback bvs + |.endif + | add CARG1, CARG1, TISNUM + | str CARG1, [BASE, RA, lsl #3] + |4: + | ins_next + | + |5: // FP variant. + | ins_arithload FARG1, FARG2 + | ins_arithfallback ins_arithcheck_num + | fpins FARG1, FARG1, FARG2 + | str FARG1, [BASE, RA, lsl #3] + | b <4 + |.endmacro + | + |.macro ins_arithfp, fpins + | ins_arithhead + | ins_arithload CARG1, CARG2 + | ins_arithload FARG1, FARG2 + | ins_arithfallback ins_arithcheck_num + |.if "fpins" == "fpow" + | bl extern pow + |.else + | fpins FARG1, FARG1, FARG2 + |.endif + | str FARG1, [BASE, RA, lsl #3] + | ins_next + |.endmacro + + case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: + | ins_arithdn adds, fadd + break; + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: + | ins_arithdn subs, fsub + break; + case BC_MULVN: case BC_MULNV: case BC_MULVV: + | ins_arithdn smull, fmul + break; + case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: + | ins_arithfp fdiv + break; + case BC_MODVN: case BC_MODNV: case BC_MODVV: + | ins_arithdn ins_arithmodi, ins_arithmod + break; + case BC_POW: + | // NYI: (partial) integer arithmetic. + | ins_arithfp fpow + break; + + case BC_CAT: + | decode_RB RB, INS + | and RC, RC, #255 + | // RA = dst, RB = src_start, RC = src_end + | str BASE, L->base + | sub CARG3, RC, RB + | add CARG2, BASE, RC, lsl #3 + |->BC_CAT_Z: + | // RA = dst, CARG2 = top-1, CARG3 = left + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) + | // Returns NULL (finished) or TValue * (metamethod). + | ldrb RBw, [PC, #-4+OFS_RB] + | ldr BASE, L->base + | cbnz CRET1, ->vmeta_binop + | ldr TMP0, [BASE, RB, lsl #3] + | str TMP0, [BASE, RA, lsl #3] // Copy result to RA. + | ins_next + break; + + /* -- Constant ops ------------------------------------------------------ */ + + case BC_KSTR: + | // RA = dst, RC = str_const (~) + | mvn RC, RC + | ldr TMP0, [KBASE, RC, lsl #3] + | movn TMP1, #~LJ_TSTR + | add TMP0, TMP0, TMP1, lsl #47 + | str TMP0, [BASE, RA, lsl #3] + | ins_next + break; + case BC_KCDATA: + |.if FFI + | // RA = dst, RC = cdata_const (~) + | mvn RC, RC + | ldr TMP0, [KBASE, RC, lsl #3] + | movn TMP1, #~LJ_TCDATA + | add TMP0, TMP0, TMP1, lsl #47 + | str TMP0, [BASE, RA, lsl #3] + | ins_next + |.endif + break; + case BC_KSHORT: + | // RA = dst, RC = int16_literal + | sxth RCw, RCw + | add TMP0, RC, TISNUM + | str TMP0, [BASE, RA, lsl #3] + | ins_next + break; + case BC_KNUM: + | // RA = dst, RC = num_const + | ldr TMP0, [KBASE, RC, lsl #3] + | str TMP0, [BASE, RA, lsl #3] + | ins_next + break; + case BC_KPRI: + | // RA = dst, RC = primitive_type (~) + | mvn TMP0, RC, lsl #47 + | str TMP0, [BASE, RA, lsl #3] + | ins_next + break; + case BC_KNIL: + | // RA = base, RC = end + | add RA, BASE, RA, lsl #3 + | add RC, BASE, RC, lsl #3 + | str TISNIL, [RA], #8 + |1: + | cmp RA, RC + | str TISNIL, [RA], #8 + | blt <1 + | ins_next_ + break; + + /* -- Upvalue and function ops ------------------------------------------ */ + + case BC_UGET: + | // RA = dst, RC = uvnum + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | add RC, RC, #offsetof(GCfuncL, uvptr)/8 + | and LFUNC:CARG2, CARG2, #LJ_GCVMASK + | ldr UPVAL:CARG2, [LFUNC:CARG2, RC, lsl #3] + | ldr CARG2, UPVAL:CARG2->v + | ldr TMP0, [CARG2] + | str TMP0, [BASE, RA, lsl #3] + | ins_next + break; + case BC_USETV: + | // RA = uvnum, RC = src + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | add RA, RA, #offsetof(GCfuncL, uvptr)/8 + | and LFUNC:CARG2, CARG2, #LJ_GCVMASK + | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3] + | ldr CARG3, [BASE, RC, lsl #3] + | ldr CARG2, UPVAL:CARG1->v + | ldrb TMP2w, UPVAL:CARG1->marked + | ldrb TMP0w, UPVAL:CARG1->closed + | asr ITYPE, CARG3, #47 + | str CARG3, [CARG2] + | add ITYPE, ITYPE, #-LJ_TISGCV + | tst TMP2w, #LJ_GC_BLACK // isblack(uv) + | ccmp TMP0w, #0, #4, ne // && uv->closed + | ccmn ITYPE, #-(LJ_TNUMX - LJ_TISGCV), #0, ne // && tvisgcv(v) + | bhi >2 + |1: + | ins_next + | + |2: // Check if new value is white. + | and GCOBJ:CARG3, CARG3, #LJ_GCVMASK + | ldrb TMP1w, GCOBJ:CARG3->gch.marked + | tst TMP1w, #LJ_GC_WHITES // iswhite(str) + | beq <1 + | // Crossed a write barrier. Move the barrier forward. + | mov CARG1, GL + | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) + | b <1 + break; + case BC_USETS: + | // RA = uvnum, RC = str_const (~) + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | add RA, RA, #offsetof(GCfuncL, uvptr)/8 + | mvn RC, RC + | and LFUNC:CARG2, CARG2, #LJ_GCVMASK + | ldr UPVAL:CARG1, [LFUNC:CARG2, RA, lsl #3] + | ldr STR:CARG3, [KBASE, RC, lsl #3] + | movn TMP0, #~LJ_TSTR + | ldr CARG2, UPVAL:CARG1->v + | ldrb TMP2w, UPVAL:CARG1->marked + | add TMP0, STR:CARG3, TMP0, lsl #47 + | ldrb TMP1w, STR:CARG3->marked + | str TMP0, [CARG2] + | tbnz TMP2w, #2, >2 // isblack(uv) + |1: + | ins_next + | + |2: // Check if string is white and ensure upvalue is closed. + | ldrb TMP0w, UPVAL:CARG1->closed + | tst TMP1w, #LJ_GC_WHITES // iswhite(str) + | ccmp TMP0w, #0, #4, ne + | beq <1 + | // Crossed a write barrier. Move the barrier forward. + | mov CARG1, GL + | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) + | b <1 + break; + case BC_USETN: + | // RA = uvnum, RC = num_const + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | add RA, RA, #offsetof(GCfuncL, uvptr)/8 + | and LFUNC:CARG2, CARG2, #LJ_GCVMASK + | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3] + | ldr TMP0, [KBASE, RC, lsl #3] + | ldr CARG2, UPVAL:CARG2->v + | str TMP0, [CARG2] + | ins_next + break; + case BC_USETP: + | // RA = uvnum, RC = primitive_type (~) + | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] + | add RA, RA, #offsetof(GCfuncL, uvptr)/8 + | and LFUNC:CARG2, CARG2, #LJ_GCVMASK + | ldr UPVAL:CARG2, [LFUNC:CARG2, RA, lsl #3] + | mvn TMP0, RC, lsl #47 + | ldr CARG2, UPVAL:CARG2->v + | str TMP0, [CARG2] + | ins_next + break; + + case BC_UCLO: + | // RA = level, RC = target + | ldr CARG3, L->openupval + | add RC, PC, RC, lsl #2 + | str BASE, L->base + | sub PC, RC, #0x20000 + | cbz CARG3, >1 + | mov CARG1, L + | add CARG2, BASE, RA, lsl #3 + | bl extern lj_func_closeuv // (lua_State *L, TValue *level) + | ldr BASE, L->base + |1: + | ins_next + break; + + case BC_FNEW: + | // RA = dst, RC = proto_const (~) (holding function prototype) + | mvn RC, RC + | str BASE, L->base + | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] + | str PC, SAVE_PC + | ldr CARG2, [KBASE, RC, lsl #3] + | mov CARG1, L + | and LFUNC:CARG3, CARG3, #LJ_GCVMASK + | // (lua_State *L, GCproto *pt, GCfuncL *parent) + | bl extern lj_func_newL_gc + | // Returns GCfuncL *. + | ldr BASE, L->base + | movn TMP0, #~LJ_TFUNC + | add CRET1, CRET1, TMP0, lsl #47 + | str CRET1, [BASE, RA, lsl #3] + | ins_next + break; + + /* -- Table ops --------------------------------------------------------- */ + + case BC_TNEW: + case BC_TDUP: + | // RA = dst, RC = (hbits|asize) | tab_const (~) + | ldp CARG3, CARG4, GL->gc.total // Assumes threshold follows total. + | str BASE, L->base + | str PC, SAVE_PC + | mov CARG1, L + | cmp CARG3, CARG4 + | bhs >5 + |1: + if (op == BC_TNEW) { + | and CARG2, RC, #0x7ff + | lsr CARG3, RC, #11 + | cmp CARG2, #0x7ff + | mov TMP0, #0x801 + | csel CARG2, CARG2, TMP0, ne + | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) + | // Returns GCtab *. + } else { + | mvn RC, RC + | ldr CARG2, [KBASE, RC, lsl #3] + | bl extern lj_tab_dup // (lua_State *L, Table *kt) + | // Returns GCtab *. + } + | ldr BASE, L->base + | movk CRET1, #(LJ_TTAB>>1)&0xffff, lsl #48 + | str CRET1, [BASE, RA, lsl #3] + | ins_next + | + |5: + | bl extern lj_gc_step_fixtop // (lua_State *L) + | mov CARG1, L + | b <1 + break; + + case BC_GGET: + | // RA = dst, RC = str_const (~) + case BC_GSET: + | // RA = src, RC = str_const (~) + | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] + | mvn RC, RC + | and LFUNC:CARG1, CARG1, #LJ_GCVMASK + | ldr TAB:CARG2, LFUNC:CARG1->env + | ldr STR:RC, [KBASE, RC, lsl #3] + if (op == BC_GGET) { + | b ->BC_TGETS_Z + } else { + | b ->BC_TSETS_Z + } + break; + + case BC_TGETV: + | decode_RB RB, INS + | and RC, RC, #255 + | // RA = dst, RB = table, RC = key + | ldr CARG2, [BASE, RB, lsl #3] + | ldr TMP1, [BASE, RC, lsl #3] + | checktab CARG2, ->vmeta_tgetv + | checkint TMP1, >9 // Integer key? + | ldr CARG3, TAB:CARG2->array + | ldr CARG1w, TAB:CARG2->asize + | add CARG3, CARG3, TMP1, uxtw #3 + | cmp TMP1w, CARG1w // In array part? + | bhs ->vmeta_tgetv + | ldr TMP0, [CARG3] + | cmp TMP0, TISNIL + | beq >5 + |1: + | str TMP0, [BASE, RA, lsl #3] + | ins_next + | + |5: // Check for __index if table value is nil. + | ldr TAB:CARG1, TAB:CARG2->metatable + | cbz TAB:CARG1, <1 // No metatable: done. + | ldrb TMP1w, TAB:CARG1->nomm + | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done. + | b ->vmeta_tgetv + | + |9: + | asr ITYPE, TMP1, #47 + | cmn ITYPE, #-LJ_TSTR // String key? + | bne ->vmeta_tgetv + | and STR:RC, TMP1, #LJ_GCVMASK + | b ->BC_TGETS_Z + break; + case BC_TGETS: + | decode_RB RB, INS + | and RC, RC, #255 + | // RA = dst, RB = table, RC = str_const (~) + | ldr CARG2, [BASE, RB, lsl #3] + | mvn RC, RC + | ldr STR:RC, [KBASE, RC, lsl #3] + | checktab CARG2, ->vmeta_tgets1 + |->BC_TGETS_Z: + | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = dst + | ldr TMP1w, TAB:CARG2->hmask + | ldr TMP2w, STR:RC->sid + | ldr NODE:CARG3, TAB:CARG2->node + | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask + | add TMP1, TMP1, TMP1, lsl #1 + | movn CARG4, #~LJ_TSTR + | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 + | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for. + |1: + | ldp TMP0, CARG1, NODE:CARG3->val + | ldr NODE:CARG3, NODE:CARG3->next + | cmp CARG1, CARG4 + | bne >4 + | cmp TMP0, TISNIL + | beq >5 + |3: + | str TMP0, [BASE, RA, lsl #3] + | ins_next + | + |4: // Follow hash chain. + | cbnz NODE:CARG3, <1 + | // End of hash chain: key not found, nil result. + | mov TMP0, TISNIL + | + |5: // Check for __index if table value is nil. + | ldr TAB:CARG1, TAB:CARG2->metatable + | cbz TAB:CARG1, <3 // No metatable: done. + | ldrb TMP1w, TAB:CARG1->nomm + | tbnz TMP1w, #MM_index, <3 // 'no __index' flag set: done. + | b ->vmeta_tgets + break; + case BC_TGETB: + | decode_RB RB, INS + | and RC, RC, #255 + | // RA = dst, RB = table, RC = index + | ldr CARG2, [BASE, RB, lsl #3] + | checktab CARG2, ->vmeta_tgetb + | ldr CARG3, TAB:CARG2->array + | ldr CARG1w, TAB:CARG2->asize + | add CARG3, CARG3, RC, lsl #3 + | cmp RCw, CARG1w // In array part? + | bhs ->vmeta_tgetb + | ldr TMP0, [CARG3] + | cmp TMP0, TISNIL + | beq >5 + |1: + | str TMP0, [BASE, RA, lsl #3] + | ins_next + | + |5: // Check for __index if table value is nil. + | ldr TAB:CARG1, TAB:CARG2->metatable + | cbz TAB:CARG1, <1 // No metatable: done. + | ldrb TMP1w, TAB:CARG1->nomm + | tbnz TMP1w, #MM_index, <1 // 'no __index' flag set: done. + | b ->vmeta_tgetb + break; + case BC_TGETR: + | decode_RB RB, INS + | and RC, RC, #255 + | // RA = dst, RB = table, RC = key + | ldr CARG1, [BASE, RB, lsl #3] + | ldr TMP1, [BASE, RC, lsl #3] + | and TAB:CARG1, CARG1, #LJ_GCVMASK + | ldr CARG3, TAB:CARG1->array + | ldr TMP2w, TAB:CARG1->asize + | add CARG3, CARG3, TMP1w, uxtw #3 + | cmp TMP1w, TMP2w // In array part? + | bhs ->vmeta_tgetr + | ldr TMP0, [CARG3] + |->BC_TGETR_Z: + | str TMP0, [BASE, RA, lsl #3] + | ins_next + break; + + case BC_TSETV: + | decode_RB RB, INS + | and RC, RC, #255 + | // RA = src, RB = table, RC = key + | ldr CARG2, [BASE, RB, lsl #3] + | ldr TMP1, [BASE, RC, lsl #3] + | checktab CARG2, ->vmeta_tsetv + | checkint TMP1, >9 // Integer key? + | ldr CARG3, TAB:CARG2->array + | ldr CARG1w, TAB:CARG2->asize + | add CARG3, CARG3, TMP1, uxtw #3 + | cmp TMP1w, CARG1w // In array part? + | bhs ->vmeta_tsetv + | ldr TMP1, [CARG3] + | ldr TMP0, [BASE, RA, lsl #3] + | ldrb TMP2w, TAB:CARG2->marked + | cmp TMP1, TISNIL // Previous value is nil? + | beq >5 + |1: + | str TMP0, [CARG3] + | tbnz TMP2w, #2, >7 // isblack(table) + |2: + | ins_next + | + |5: // Check for __newindex if previous value is nil. + | ldr TAB:CARG1, TAB:CARG2->metatable + | cbz TAB:CARG1, <1 // No metatable: done. + | ldrb TMP1w, TAB:CARG1->nomm + | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done. + | b ->vmeta_tsetv + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP2w, TMP1 + | b <2 + | + |9: + | asr ITYPE, TMP1, #47 + | cmn ITYPE, #-LJ_TSTR // String key? + | bne ->vmeta_tsetv + | and STR:RC, TMP1, #LJ_GCVMASK + | b ->BC_TSETS_Z + break; + case BC_TSETS: + | decode_RB RB, INS + | and RC, RC, #255 + | // RA = dst, RB = table, RC = str_const (~) + | ldr CARG2, [BASE, RB, lsl #3] + | mvn RC, RC + | ldr STR:RC, [KBASE, RC, lsl #3] + | checktab CARG2, ->vmeta_tsets1 + |->BC_TSETS_Z: + | // TAB:CARG2 = GCtab *, STR:RC = GCstr *, RA = src + | ldr TMP1w, TAB:CARG2->hmask + | ldr TMP2w, STR:RC->sid + | ldr NODE:CARG3, TAB:CARG2->node + | and TMP1w, TMP1w, TMP2w // idx = str->sid & tab->hmask + | add TMP1, TMP1, TMP1, lsl #1 + | movn CARG4, #~LJ_TSTR + | add NODE:CARG3, NODE:CARG3, TMP1, lsl #3 // node = tab->node + idx*3*8 + | add CARG4, STR:RC, CARG4, lsl #47 // Tagged key to look for. + | strb wzr, TAB:CARG2->nomm // Clear metamethod cache. + |1: + | ldp TMP1, CARG1, NODE:CARG3->val + | ldr NODE:TMP3, NODE:CARG3->next + | ldrb TMP2w, TAB:CARG2->marked + | cmp CARG1, CARG4 + | bne >5 + | ldr TMP0, [BASE, RA, lsl #3] + | cmp TMP1, TISNIL // Previous value is nil? + | beq >4 + |2: + | str TMP0, NODE:CARG3->val + | tbnz TMP2w, #2, >7 // isblack(table) + |3: + | ins_next + | + |4: // Check for __newindex if previous value is nil. + | ldr TAB:CARG1, TAB:CARG2->metatable + | cbz TAB:CARG1, <2 // No metatable: done. + | ldrb TMP1w, TAB:CARG1->nomm + | tbnz TMP1w, #MM_newindex, <2 // 'no __newindex' flag set: done. + | b ->vmeta_tsets + | + |5: // Follow hash chain. + | mov NODE:CARG3, NODE:TMP3 + | cbnz NODE:TMP3, <1 + | // End of hash chain: key not found, add a new one. + | + | // But check for __newindex first. + | ldr TAB:CARG1, TAB:CARG2->metatable + | cbz TAB:CARG1, >6 // No metatable: continue. + | ldrb TMP1w, TAB:CARG1->nomm + | // 'no __newindex' flag NOT set: check. + | tbz TMP1w, #MM_newindex, ->vmeta_tsets + |6: + | movn TMP1, #~LJ_TSTR + | str PC, SAVE_PC + | add TMP0, STR:RC, TMP1, lsl #47 + | str BASE, L->base + | mov CARG1, L + | str TMP0, TMPD + | add CARG3, sp, TMPDofs + | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) + | // Returns TValue *. + | ldr BASE, L->base + | ldr TMP0, [BASE, RA, lsl #3] + | str TMP0, [CRET1] + | b <3 // No 2nd write barrier needed. + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP2w, TMP1 + | b <3 + break; + case BC_TSETB: + | decode_RB RB, INS + | and RC, RC, #255 + | // RA = src, RB = table, RC = index + | ldr CARG2, [BASE, RB, lsl #3] + | checktab CARG2, ->vmeta_tsetb + | ldr CARG3, TAB:CARG2->array + | ldr CARG1w, TAB:CARG2->asize + | add CARG3, CARG3, RC, lsl #3 + | cmp RCw, CARG1w // In array part? + | bhs ->vmeta_tsetb + | ldr TMP1, [CARG3] + | ldr TMP0, [BASE, RA, lsl #3] + | ldrb TMP2w, TAB:CARG2->marked + | cmp TMP1, TISNIL // Previous value is nil? + | beq >5 + |1: + | str TMP0, [CARG3] + | tbnz TMP2w, #2, >7 // isblack(table) + |2: + | ins_next + | + |5: // Check for __newindex if previous value is nil. + | ldr TAB:CARG1, TAB:CARG2->metatable + | cbz TAB:CARG1, <1 // No metatable: done. + | ldrb TMP1w, TAB:CARG1->nomm + | tbnz TMP1w, #MM_newindex, <1 // 'no __newindex' flag set: done. + | b ->vmeta_tsetb + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP2w, TMP1 + | b <2 + break; + case BC_TSETR: + | decode_RB RB, INS + | and RC, RC, #255 + | // RA = src, RB = table, RC = key + | ldr CARG2, [BASE, RB, lsl #3] + | ldr TMP1, [BASE, RC, lsl #3] + | and TAB:CARG2, CARG2, #LJ_GCVMASK + | ldr CARG1, TAB:CARG2->array + | ldrb TMP2w, TAB:CARG2->marked + | ldr CARG4w, TAB:CARG2->asize + | add CARG1, CARG1, TMP1, uxtw #3 + | tbnz TMP2w, #2, >7 // isblack(table) + |2: + | cmp TMP1w, CARG4w // In array part? + | bhs ->vmeta_tsetr + |->BC_TSETR_Z: + | ldr TMP0, [BASE, RA, lsl #3] + | str TMP0, [CARG1] + | ins_next + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP2w, TMP0 + | b <2 + break; + + case BC_TSETM: + | // RA = base (table at base-1), RC = num_const (start index) + | add RA, BASE, RA, lsl #3 + |1: + | ldr RBw, SAVE_MULTRES + | ldr TAB:CARG2, [RA, #-8] // Guaranteed to be a table. + | ldr TMP1, [KBASE, RC, lsl #3] // Integer constant is in lo-word. + | sub RB, RB, #8 + | cbz RB, >4 // Nothing to copy? + | and TAB:CARG2, CARG2, #LJ_GCVMASK + | ldr CARG1w, TAB:CARG2->asize + | add CARG3w, TMP1w, RBw, lsr #3 + | ldr CARG4, TAB:CARG2->array + | cmp CARG3, CARG1 + | add RB, RA, RB + | bhi >5 + | add TMP1, CARG4, TMP1w, uxtw #3 + | ldrb TMP2w, TAB:CARG2->marked + |3: // Copy result slots to table. + | ldr TMP0, [RA], #8 + | str TMP0, [TMP1], #8 + | cmp RA, RB + | blo <3 + | tbnz TMP2w, #2, >7 // isblack(table) + |4: + | ins_next + | + |5: // Need to resize array part. + | str BASE, L->base + | mov CARG1, L + | str PC, SAVE_PC + | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) + | // Must not reallocate the stack. + | b <1 + | + |7: // Possible table write barrier for any value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP2w, TMP1 + | b <4 + break; + + /* -- Calls and vararg handling ----------------------------------------- */ + + case BC_CALLM: + | // RA = base, (RB = nresults+1,) RC = extra_nargs + | ldr TMP0w, SAVE_MULTRES + | decode_RC8RD NARGS8:RC, RC + | add NARGS8:RC, NARGS8:RC, TMP0 + | b ->BC_CALL_Z + break; + case BC_CALL: + | decode_RC8RD NARGS8:RC, RC + | // RA = base, (RB = nresults+1,) RC = (nargs+1)*8 + |->BC_CALL_Z: + | mov RB, BASE // Save old BASE for vmeta_call. + | add BASE, BASE, RA, lsl #3 + | ldr CARG3, [BASE] + | sub NARGS8:RC, NARGS8:RC, #8 + | add BASE, BASE, #16 + | checkfunc CARG3, ->vmeta_call + | ins_call + break; + + case BC_CALLMT: + | // RA = base, (RB = 0,) RC = extra_nargs + | ldr TMP0w, SAVE_MULTRES + | add NARGS8:RC, TMP0, RC, lsl #3 + | b ->BC_CALLT1_Z + break; + case BC_CALLT: + | lsl NARGS8:RC, RC, #3 + | // RA = base, (RB = 0,) RC = (nargs+1)*8 + |->BC_CALLT1_Z: + | add RA, BASE, RA, lsl #3 + | ldr TMP1, [RA] + | sub NARGS8:RC, NARGS8:RC, #8 + | add RA, RA, #16 + | checktp CARG3, TMP1, LJ_TFUNC, ->vmeta_callt + | ldr PC, [BASE, FRAME_PC] + |->BC_CALLT2_Z: + | mov RB, #0 + | ldrb TMP2w, LFUNC:CARG3->ffid + | tst PC, #FRAME_TYPE + | bne >7 + |1: + | str TMP1, [BASE, FRAME_FUNC] // Copy function down, but keep PC. + | cbz NARGS8:RC, >3 + |2: + | ldr TMP0, [RA, RB] + | add TMP1, RB, #8 + | cmp TMP1, NARGS8:RC + | str TMP0, [BASE, RB] + | mov RB, TMP1 + | bne <2 + |3: + | cmp TMP2, #1 // (> FF_C) Calling a fast function? + | bhi >5 + |4: + | ins_callt + | + |5: // Tailcall to a fast function with a Lua frame below. + | ldrb RAw, [PC, #-4+OFS_RA] + | sub CARG1, BASE, RA, lsl #3 + | ldr LFUNC:CARG1, [CARG1, #-32] + | and LFUNC:CARG1, CARG1, #LJ_GCVMASK + | ldr CARG1, LFUNC:CARG1->pc + | ldr KBASE, [CARG1, #PC2PROTO(k)] + | b <4 + | + |7: // Tailcall from a vararg function. + | eor PC, PC, #FRAME_VARG + | tst PC, #FRAME_TYPEP // Vararg frame below? + | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below. + | bne <1 + | sub BASE, BASE, PC + | ldr PC, [BASE, FRAME_PC] + | tst PC, #FRAME_TYPE + | csel TMP2, RB, TMP2, ne // Clear ffid if no Lua function below. + | b <1 + break; + + case BC_ITERC: + | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) + | add RA, BASE, RA, lsl #3 + | ldr CARG3, [RA, #-24] + | mov RB, BASE // Save old BASE for vmeta_call. + | ldp CARG1, CARG2, [RA, #-16] + | add BASE, RA, #16 + | mov NARGS8:RC, #16 // Iterators get 2 arguments. + | str CARG3, [RA] // Copy callable. + | stp CARG1, CARG2, [RA, #16] // Copy state and control var. + | checkfunc CARG3, ->vmeta_call + | ins_call + break; + + case BC_ITERN: + |.if JIT + | hotloop + |.endif + |->vm_IITERN: + | // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) + | add RA, BASE, RA, lsl #3 + | ldr TAB:RB, [RA, #-16] + | ldrh TMP3w, [PC, # OFS_RD] + | ldr CARG1w, [RA, #-8+LO] // Get index from control var. + | add PC, PC, #4 + | add TMP3, PC, TMP3, lsl #2 + | and TAB:RB, RB, #LJ_GCVMASK + | sub TMP3, TMP3, #0x20000 + | ldr TMP1w, TAB:RB->asize + | ldr CARG2, TAB:RB->array + |1: // Traverse array part. + | subs RC, CARG1, TMP1 + | add CARG3, CARG2, CARG1, lsl #3 + | bhs >5 // Index points after array part? + | ldr TMP0, [CARG3] + | cmp TMP0, TISNIL + | cinc CARG1, CARG1, eq // Skip holes in array part. + | beq <1 + | add CARG1, CARG1, TISNUM + | stp CARG1, TMP0, [RA] + | add CARG1, CARG1, #1 + |3: + | str CARG1w, [RA, #-8+LO] // Update control var. + | mov PC, TMP3 + |4: + | ins_next + | + |5: // Traverse hash part. + | ldr TMP2w, TAB:RB->hmask + | ldr NODE:RB, TAB:RB->node + |6: + | add CARG1, RC, RC, lsl #1 + | cmp RC, TMP2 // End of iteration? Branch to ITERN+1. + | add NODE:CARG3, NODE:RB, CARG1, lsl #3 // node = tab->node + idx*3*8 + | bhi <4 + | ldp TMP0, CARG1, NODE:CARG3->val + | cmp TMP0, TISNIL + | add RC, RC, #1 + | beq <6 // Skip holes in hash part. + | stp CARG1, TMP0, [RA] + | add CARG1, RC, TMP1 + | b <3 + break; + + case BC_ISNEXT: + | // RA = base, RC = target (points to ITERN) + | add RA, BASE, RA, lsl #3 + | ldr CFUNC:CARG1, [RA, #-24] + | add RC, PC, RC, lsl #2 + | ldp TAB:CARG3, CARG4, [RA, #-16] + | sub RC, RC, #0x20000 + | checkfunc CFUNC:CARG1, >5 + | asr TMP0, TAB:CARG3, #47 + | ldrb TMP1w, CFUNC:CARG1->ffid + | cmn TMP0, #-LJ_TTAB + | ccmp CARG4, TISNIL, #0, eq + | ccmp TMP1w, #FF_next_N, #0, eq + | bne >5 + | mov TMP0w, #0xfffe7fff // LJ_KEYINDEX + | lsl TMP0, TMP0, #32 + | str TMP0, [RA, #-8] // Initialize control var. + |1: + | mov PC, RC + | ins_next + | + |5: // Despecialize bytecode if any of the checks fail. + |.if JIT + | ldrb TMP2w, [RC, # OFS_OP] + |.endif + | mov TMP0, #BC_JMP + | mov TMP1, #BC_ITERC + | strb TMP0w, [PC, #-4+OFS_OP] + |.if JIT + | cmp TMP2w, #BC_ITERN + | bne >6 + |.endif + | strb TMP1w, [RC, # OFS_OP] + | b <1 + |.if JIT + |6: // Unpatch JLOOP. + | ldr RA, [GL, #GL_J(trace)] + | ldrh TMP2w, [RC, # OFS_RD] + | ldr TRACE:RA, [RA, TMP2, lsl #3] + | ldr TMP2w, TRACE:RA->startins + | bfxil TMP2w, TMP1w, #0, #8 + | str TMP2w, [RC] + | b <1 + |.endif + break; + + case BC_VARG: + | decode_RB RB, INS + | and RC, RC, #255 + | // RA = base, RB = (nresults+1), RC = numparams + | ldr TMP1, [BASE, FRAME_PC] + | add RC, BASE, RC, lsl #3 + | add RA, BASE, RA, lsl #3 + | add RC, RC, #FRAME_VARG + | add TMP2, RA, RB, lsl #3 + | sub RC, RC, TMP1 // RC = vbase + | // Note: RC may now be even _above_ BASE if nargs was < numparams. + | sub TMP3, BASE, #16 // TMP3 = vtop + | cbz RB, >5 + | sub TMP2, TMP2, #16 + |1: // Copy vararg slots to destination slots. + | cmp RC, TMP3 + | ldr TMP0, [RC], #8 + | csel TMP0, TMP0, TISNIL, lo + | cmp RA, TMP2 + | str TMP0, [RA], #8 + | blo <1 + |2: + | ins_next + | + |5: // Copy all varargs. + | ldr TMP0, L->maxstack + | subs TMP2, TMP3, RC + | csel RB, xzr, TMP2, le // MULTRES = (max(vtop-vbase,0)+1)*8 + | add RB, RB, #8 + | add TMP1, RA, TMP2 + | str RBw, SAVE_MULTRES + | ble <2 // Nothing to copy. + | cmp TMP1, TMP0 + | bhi >7 + |6: + | ldr TMP0, [RC], #8 + | str TMP0, [RA], #8 + | cmp RC, TMP3 + | blo <6 + | b <2 + | + |7: // Grow stack for varargs. + | lsr CARG2, TMP2, #3 + | stp BASE, RA, L->base + | mov CARG1, L + | sub RC, RC, BASE // Need delta, because BASE may change. + | str PC, SAVE_PC + | bl extern lj_state_growstack // (lua_State *L, int n) + | ldp BASE, RA, L->base + | add RC, BASE, RC + | sub TMP3, BASE, #16 + | b <6 + break; + + /* -- Returns ----------------------------------------------------------- */ + + case BC_RETM: + | // RA = results, RC = extra results + | ldr TMP0w, SAVE_MULTRES + | ldr PC, [BASE, FRAME_PC] + | add RA, BASE, RA, lsl #3 + | add RC, TMP0, RC, lsl #3 + | b ->BC_RETM_Z + break; + + case BC_RET: + | // RA = results, RC = nresults+1 + | ldr PC, [BASE, FRAME_PC] + | lsl RC, RC, #3 + | add RA, BASE, RA, lsl #3 + |->BC_RETM_Z: + | str RCw, SAVE_MULTRES + |1: + | ands CARG1, PC, #FRAME_TYPE + | eor CARG2, PC, #FRAME_VARG + | bne ->BC_RETV2_Z + | + |->BC_RET_Z: + | // BASE = base, RA = resultptr, RC = (nresults+1)*8, PC = return + | ldr INSw, [PC, #-4] + | subs TMP1, RC, #8 + | sub CARG3, BASE, #16 + | beq >3 + |2: + | ldr TMP0, [RA], #8 + | add BASE, BASE, #8 + | sub TMP1, TMP1, #8 + | str TMP0, [BASE, #-24] + | cbnz TMP1, <2 + |3: + | decode_RA RA, INS + | sub CARG4, CARG3, RA, lsl #3 + | decode_RB RB, INS + | ldr LFUNC:CARG1, [CARG4, FRAME_FUNC] + |5: + | cmp RC, RB, lsl #3 // More results expected? + | blo >6 + | and LFUNC:CARG1, CARG1, #LJ_GCVMASK + | mov BASE, CARG4 + | ldr CARG2, LFUNC:CARG1->pc + | ldr KBASE, [CARG2, #PC2PROTO(k)] + | ins_next + | + |6: // Fill up results with nil. + | add BASE, BASE, #8 + | add RC, RC, #8 + | str TISNIL, [BASE, #-24] + | b <5 + | + |->BC_RETV1_Z: // Non-standard return case. + | add RA, BASE, RA, lsl #3 + |->BC_RETV2_Z: + | tst CARG2, #FRAME_TYPEP + | bne ->vm_return + | // Return from vararg function: relocate BASE down. + | sub BASE, BASE, CARG2 + | ldr PC, [BASE, FRAME_PC] + | b <1 + break; + + case BC_RET0: case BC_RET1: + | // RA = results, RC = nresults+1 + | ldr PC, [BASE, FRAME_PC] + | lsl RC, RC, #3 + | str RCw, SAVE_MULTRES + | ands CARG1, PC, #FRAME_TYPE + | eor CARG2, PC, #FRAME_VARG + | bne ->BC_RETV1_Z + | ldr INSw, [PC, #-4] + if (op == BC_RET1) { + | ldr TMP0, [BASE, RA, lsl #3] + } + | sub CARG4, BASE, #16 + | decode_RA RA, INS + | sub BASE, CARG4, RA, lsl #3 + if (op == BC_RET1) { + | str TMP0, [CARG4], #8 + } + | decode_RB RB, INS + | ldr LFUNC:CARG1, [BASE, FRAME_FUNC] + |5: + | cmp RC, RB, lsl #3 + | blo >6 + | and LFUNC:CARG1, CARG1, #LJ_GCVMASK + | ldr CARG2, LFUNC:CARG1->pc + | ldr KBASE, [CARG2, #PC2PROTO(k)] + | ins_next + | + |6: // Fill up results with nil. + | add RC, RC, #8 + | str TISNIL, [CARG4], #8 + | b <5 + break; + + /* -- Loops and branches ------------------------------------------------ */ + + |.define FOR_IDX, [RA]; .define FOR_TIDX, [RA, #4] + |.define FOR_STOP, [RA, #8]; .define FOR_TSTOP, [RA, #12] + |.define FOR_STEP, [RA, #16]; .define FOR_TSTEP, [RA, #20] + |.define FOR_EXT, [RA, #24]; .define FOR_TEXT, [RA, #28] + + case BC_FORL: + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_IFORL follows. + break; + + case BC_JFORI: + case BC_JFORL: +#if !LJ_HASJIT + break; +#endif + case BC_FORI: + case BC_IFORL: + | // RA = base, RC = target (after end of loop or start of loop) + vk = (op == BC_IFORL || op == BC_JFORL); + | add RA, BASE, RA, lsl #3 + | ldp CARG1, CARG2, FOR_IDX // CARG1 = IDX, CARG2 = STOP + | ldr CARG3, FOR_STEP // CARG3 = STEP + if (op != BC_JFORL) { + | add RC, PC, RC, lsl #2 + | sub RC, RC, #0x20000 + } + | checkint CARG1, >5 + if (!vk) { + | checkint CARG2, ->vmeta_for + | checkint CARG3, ->vmeta_for + | tbnz CARG3w, #31, >4 + | cmp CARG1w, CARG2w + } else { + | adds CARG1w, CARG1w, CARG3w + | bvs >2 + | add TMP0, CARG1, TISNUM + | tbnz CARG3w, #31, >4 + | cmp CARG1w, CARG2w + } + |1: + if (op == BC_FORI) { + | csel PC, RC, PC, gt + } else if (op == BC_JFORI) { + | mov PC, RC + | ldrh RCw, [RC, #-4+OFS_RD] + } else if (op == BC_IFORL) { + | csel PC, RC, PC, le + } + if (vk) { + | str TMP0, FOR_IDX + | str TMP0, FOR_EXT + } else { + | str CARG1, FOR_EXT + } + if (op == BC_JFORI || op == BC_JFORL) { + | ble =>BC_JLOOP + } + |2: + | ins_next + | + |4: // Invert check for negative step. + | cmp CARG2w, CARG1w + | b <1 + | + |5: // FP loop. + | ldp d0, d1, FOR_IDX + | blo ->vmeta_for + if (!vk) { + | checknum CARG2, ->vmeta_for + | checknum CARG3, ->vmeta_for + | str d0, FOR_EXT + } else { + | ldr d2, FOR_STEP + | fadd d0, d0, d2 + } + | tbnz CARG3, #63, >7 + | fcmp d0, d1 + |6: + if (vk) { + | str d0, FOR_IDX + | str d0, FOR_EXT + } + if (op == BC_FORI) { + | csel PC, RC, PC, hi + } else if (op == BC_JFORI) { + | ldrh RCw, [RC, #-4+OFS_RD] + | bls =>BC_JLOOP + } else if (op == BC_IFORL) { + | csel PC, RC, PC, ls + } else { + | bls =>BC_JLOOP + } + | b <2 + | + |7: // Invert check for negative step. + | fcmp d1, d0 + | b <6 + break; + + case BC_ITERL: + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_IITERL follows. + break; + + case BC_JITERL: +#if !LJ_HASJIT + break; +#endif + case BC_IITERL: + | // RA = base, RC = target + | ldr CARG1, [BASE, RA, lsl #3] + | add TMP1, BASE, RA, lsl #3 + | cmp CARG1, TISNIL + | beq >1 // Stop if iterator returned nil. + if (op == BC_JITERL) { + | str CARG1, [TMP1, #-8] + | b =>BC_JLOOP + } else { + | add TMP0, PC, RC, lsl #2 // Otherwise save control var + branch. + | sub PC, TMP0, #0x20000 + | str CARG1, [TMP1, #-8] + } + |1: + | ins_next + break; + + case BC_LOOP: + | // RA = base, RC = target (loop extent) + | // Note: RA/RC is only used by trace recorder to determine scope/extent + | // This opcode does NOT jump, it's only purpose is to detect a hot loop. + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_ILOOP follows. + break; + + case BC_ILOOP: + | // RA = base, RC = target (loop extent) + | ins_next + break; + + case BC_JLOOP: + |.if JIT + | // RA = base (ignored), RC = traceno + | ldr CARG1, [GL, #GL_J(trace)] + | mov CARG2w, #0 // Traces on ARM64 don't store the trace #, so use 0. + | ldr TRACE:RC, [CARG1, RC, lsl #3] + | st_vmstate CARG2w + | ldr RA, TRACE:RC->mcode + | str BASE, GL->jit_base + | str L, GL->tmpbuf.L + | sub sp, sp, #16 // See SPS_FIXED. Avoids sp adjust in every root trace. + | br RA + |.endif + break; + + case BC_JMP: + | // RA = base (only used by trace recorder), RC = target + | add RC, PC, RC, lsl #2 + | sub PC, RC, #0x20000 + | ins_next + break; + + /* -- Function headers -------------------------------------------------- */ + + case BC_FUNCF: + |.if JIT + | hotcall + |.endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. + break; + + case BC_JFUNCF: +#if !LJ_HASJIT + break; +#endif + case BC_IFUNCF: + | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 + | ldr CARG1, L->maxstack + | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)] + | ldr KBASE, [PC, #-4+PC2PROTO(k)] + | cmp RA, CARG1 + | bhi ->vm_growstack_l + |2: + | cmp NARGS8:RC, TMP1, lsl #3 // Check for missing parameters. + | blo >3 + if (op == BC_JFUNCF) { + | decode_RD RC, INS + | b =>BC_JLOOP + } else { + | ins_next + } + | + |3: // Clear missing parameters. + | str TISNIL, [BASE, NARGS8:RC] + | add NARGS8:RC, NARGS8:RC, #8 + | b <2 + break; + + case BC_JFUNCV: +#if !LJ_HASJIT + break; +#endif + | NYI // NYI: compiled vararg functions + break; /* NYI: compiled vararg functions. */ + + case BC_IFUNCV: + | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 + | ldr CARG1, L->maxstack + | movn TMP0, #~LJ_TFUNC + | add TMP2, BASE, RC + | add LFUNC:CARG3, CARG3, TMP0, lsl #47 + | add RA, RA, RC + | add TMP0, RC, #16+FRAME_VARG + | str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC. + | ldr KBASE, [PC, #-4+PC2PROTO(k)] + | cmp RA, CARG1 + | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG. + | bhs ->vm_growstack_l + | sub RC, TMP2, #16 + | ldrb TMP1w, [PC, #-4+PC2PROTO(numparams)] + | mov RA, BASE + | mov BASE, TMP2 + | cbz TMP1, >2 + |1: + | cmp RA, RC // Less args than parameters? + | bhs >3 + | ldr TMP0, [RA] + | sub TMP1, TMP1, #1 + | str TISNIL, [RA], #8 // Clear old fixarg slot (help the GC). + | str TMP0, [TMP2], #8 + | cbnz TMP1, <1 + |2: + | ins_next + | + |3: + | sub TMP1, TMP1, #1 + | str TISNIL, [TMP2], #8 + | cbz TMP1, <2 + | b <3 + break; + + case BC_FUNCC: + case BC_FUNCCW: + | // BASE = new base, RA = BASE+framesize*8, CARG3 = CFUNC, RC = nargs*8 + if (op == BC_FUNCC) { + | ldr CARG4, CFUNC:CARG3->f + } else { + | ldr CARG4, GL->wrapf + } + | add CARG2, RA, NARGS8:RC + | ldr CARG1, L->maxstack + | add RC, BASE, NARGS8:RC + | cmp CARG2, CARG1 + | stp BASE, RC, L->base + if (op == BC_FUNCCW) { + | ldr CARG2, CFUNC:CARG3->f + } + | mv_vmstate TMP0w, C + | mov CARG1, L + | bhi ->vm_growstack_c // Need to grow stack. + | st_vmstate TMP0w + | blr CARG4 // (lua_State *L [, lua_CFunction f]) + | // Returns nresults. + | ldp BASE, TMP1, L->base + | str L, GL->cur_L + | sbfiz RC, CRET1, #3, #32 + | st_vmstate ST_INTERP + | ldr PC, [BASE, FRAME_PC] + | sub RA, TMP1, RC // RA = L->top - nresults*8 + | b ->vm_returnc + break; + + /* ---------------------------------------------------------------------- */ + + default: + fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); + exit(2); + break; + } +} + +static int build_backend(BuildCtx *ctx) +{ + int op; + + dasm_growpc(Dst, BC__MAX); + + build_subroutines(ctx); + + |.code_op + for (op = 0; op < BC__MAX; op++) + build_ins(ctx, (BCOp)op, op); + + return BC__MAX; +} + +/* Emit pseudo frame-info for all assembler functions. */ +static void emit_asm_debug(BuildCtx *ctx) +{ + int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); + int i; + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, "\t.section .debug_frame,\"\",%%progbits\n"); + fprintf(ctx->fp, + ".Lframe0:\n" + "\t.long .LECIE0-.LSCIE0\n" + ".LSCIE0:\n" + "\t.long 0xffffffff\n" + "\t.byte 0x1\n" + "\t.string \"\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 30\n" /* Return address is in lr. */ + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ + "\t.align 3\n" + ".LECIE0:\n\n"); + fprintf(ctx->fp, + ".LSFDE0:\n" + "\t.long .LEFDE0-.LASFDE0\n" + ".LASFDE0:\n" + "\t.long .Lframe0\n" + "\t.quad .Lbegin\n" + "\t.quad %d\n" + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ + "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */ + fcofs); + for (i = 19; i <= 28; i++) /* offset x19-x28 */ + fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19)); + for (i = 8; i <= 15; i++) /* offset d8-d15 */ + fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", + 64+i, i+(3+(28-19+1)-8)); + fprintf(ctx->fp, + "\t.align 3\n" + ".LEFDE0:\n\n"); +#if LJ_HASFFI + fprintf(ctx->fp, + ".LSFDE1:\n" + "\t.long .LEFDE1-.LASFDE1\n" + ".LASFDE1:\n" + "\t.long .Lframe0\n" + "\t.quad lj_vm_ffi_call\n" + "\t.quad %d\n" + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ + "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */ + "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */ + "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */ + "\t.align 3\n" + ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); +#endif +#if !LJ_NO_UNWIND + fprintf(ctx->fp, "\t.section .eh_frame,\"a\",%%progbits\n"); + fprintf(ctx->fp, + ".Lframe1:\n" + "\t.long .LECIE1-.LSCIE1\n" + ".LSCIE1:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.string \"zPR\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 30\n" /* Return address is in lr. */ + "\t.uleb128 6\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.long lj_err_unwind_dwarf-.\n" + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ + "\t.align 3\n" + ".LECIE1:\n\n"); + fprintf(ctx->fp, + ".LSFDE2:\n" + "\t.long .LEFDE2-.LASFDE2\n" + ".LASFDE2:\n" + "\t.long .LASFDE2-.Lframe1\n" + "\t.long .Lbegin-.\n" + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ + "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */ + fcofs); + for (i = 19; i <= 28; i++) /* offset x19-x28 */ + fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19)); + for (i = 8; i <= 15; i++) /* offset d8-d15 */ + fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", + 64+i, i+(3+(28-19+1)-8)); + fprintf(ctx->fp, + "\t.align 3\n" + ".LEFDE2:\n\n"); +#if LJ_HASFFI + fprintf(ctx->fp, + ".Lframe2:\n" + "\t.long .LECIE2-.LSCIE2\n" + ".LSCIE2:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.string \"zR\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 30\n" /* Return address is in lr. */ + "\t.uleb128 1\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ + "\t.align 3\n" + ".LECIE2:\n\n"); + fprintf(ctx->fp, + ".LSFDE3:\n" + "\t.long .LEFDE3-.LASFDE3\n" + ".LASFDE3:\n" + "\t.long .LASFDE3-.Lframe2\n" + "\t.long lj_vm_ffi_call-.\n" + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ + "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */ + "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */ + "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */ + "\t.align 3\n" + ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); +#endif +#endif + break; +#if !LJ_NO_UNWIND + case BUILD_machasm: { +#if LJ_HASFFI + int fcsize = 0; +#endif + int j; + fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); + fprintf(ctx->fp, + "EH_frame1:\n" + "\t.set L$set$x,LECIEX-LSCIEX\n" + "\t.long L$set$x\n" + "LSCIEX:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.ascii \"zPR\\0\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 30\n" /* Return address is in lr. */ + "\t.uleb128 6\n" /* augmentation length */ + "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ + "\t.long _lj_err_unwind_dwarf@GOT-.\n" + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ + "\t.align 3\n" + "LECIEX:\n\n"); + for (j = 0; j < ctx->nsym; j++) { + const char *name = ctx->sym[j].name; + int32_t size = ctx->sym[j+1].ofs - ctx->sym[j].ofs; + if (size == 0) continue; +#if LJ_HASFFI + if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } +#endif + fprintf(ctx->fp, + "LSFDE%d:\n" + "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" + "\t.long L$set$%d\n" + "LASFDE%d:\n" + "\t.long LASFDE%d-EH_frame1\n" + "\t.long %s-.\n" + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ + "\t.byte 0x9d\n\t.uleb128 2\n", /* offset fp */ + j, j, j, j, j, j, j, name, size); + for (i = 19; i <= 28; i++) /* offset x19-x28 */ + fprintf(ctx->fp, "\t.byte 0x%x\n\t.uleb128 %d\n", 0x80+i, i+(3-19)); + for (i = 8; i <= 15; i++) /* offset d8-d15 */ + fprintf(ctx->fp, "\t.byte 5\n\t.uleb128 0x%x\n\t.uleb128 %d\n", + 64+i, i+(3+(28-19+1)-8)); + fprintf(ctx->fp, + "\t.align 3\n" + "LEFDE%d:\n\n", j); + } +#if LJ_HASFFI + if (fcsize) { + fprintf(ctx->fp, + "EH_frame2:\n" + "\t.set L$set$y,LECIEY-LSCIEY\n" + "\t.long L$set$y\n" + "LSCIEY:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.ascii \"zR\\0\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 30\n" /* Return address is in lr. */ + "\t.uleb128 1\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 16\n" /* def_cfa fp 16 */ + "\t.align 3\n" + "LECIEY:\n\n"); + fprintf(ctx->fp, + "LSFDEY:\n" + "\t.set L$set$yy,LEFDEY-LASFDEY\n" + "\t.long L$set$yy\n" + "LASFDEY:\n" + "\t.long LASFDEY-EH_frame2\n" + "\t.long _lj_vm_ffi_call-.\n" + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0x9e\n\t.uleb128 1\n" /* offset lr */ + "\t.byte 0x9d\n\t.uleb128 2\n" /* offset fp */ + "\t.byte 0x93\n\t.uleb128 3\n" /* offset x19 */ + "\t.byte 0x94\n\t.uleb128 4\n" /* offset x20 */ + "\t.align 3\n" + "LEFDEY:\n\n", fcsize); + } +#endif + fprintf(ctx->fp, ".subsections_via_symbols\n"); + } + break; +#endif + default: + break; + } +} + diff --git a/src/vm_mips.dasc b/src/vm_mips.dasc index 866b8e3d..34645bf1 100644 --- a/src/vm_mips.dasc +++ b/src/vm_mips.dasc @@ -1,6 +1,9 @@ |// Low-level VM code for MIPS CPUs. |// Bytecode interpreter, fast functions and helper functions. |// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +|// +|// MIPS soft-float support contributed by Djordje Kovacevic and +|// Stefan Pejic from RT-RK.com, sponsored by Cisco Systems, Inc. | |.arch mips |.section code_op, code_sub @@ -18,6 +21,12 @@ |// Fixed register assignments for the interpreter. |// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra | +|.macro .FPU, a, b +|.if FPU +| a, b +|.endif +|.endmacro +| |// The following must be C callee-save (but BASE is often refetched). |.define BASE, r16 // Base of current Lua stack frame. |.define KBASE, r17 // Constants of current Lua function. @@ -25,13 +34,15 @@ |.define DISPATCH, r19 // Opcode dispatch table. |.define LREG, r20 // Register holding lua_State (also in SAVE_L). |.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. -|// NYI: r22 currently unused. | |.define JGL, r30 // On-trace: global_State + 32768. | |// Constants for type-comparisons, stores and conversions. C callee-save. +|.define TISNUM, r22 |.define TISNIL, r30 +|.if FPU |.define TOBIT, f30 // 2^52 + 2^51. +|.endif | |// The following temporaries are not saved across C calls, except for RA. |.define RA, r23 // Callee-save. @@ -46,7 +57,7 @@ |.define TMP2, r14 |.define TMP3, r15 | -|// Calling conventions. +|// MIPS o32 calling convention. |.define CFUNCADDR, r25 |.define CARG1, r4 |.define CARG2, r5 @@ -56,13 +67,33 @@ |.define CRET1, r2 |.define CRET2, r3 | +|.if ENDIAN_LE +|.define SFRETLO, CRET1 +|.define SFRETHI, CRET2 +|.define SFARG1LO, CARG1 +|.define SFARG1HI, CARG2 +|.define SFARG2LO, CARG3 +|.define SFARG2HI, CARG4 +|.else +|.define SFRETLO, CRET2 +|.define SFRETHI, CRET1 +|.define SFARG1LO, CARG2 +|.define SFARG1HI, CARG1 +|.define SFARG2LO, CARG4 +|.define SFARG2HI, CARG3 +|.endif +| +|.if FPU |.define FARG1, f12 |.define FARG2, f14 | |.define FRET1, f0 |.define FRET2, f2 +|.endif | |// Stack layout while in interpreter. Must match with lj_frame.h. +|.if FPU // MIPS32 hard-float. +| |.define CFRAME_SPACE, 112 // Delta for sp. | |.define SAVE_ERRF, 124(sp) // 32 bit C frame info. @@ -72,6 +103,20 @@ |//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. |.define SAVE_GPR_, 72 // .. 72+10*4: 32 bit GPR saves. |.define SAVE_FPR_, 24 // .. 24+6*8: 64 bit FPR saves. +| +|.else // MIPS32 soft-float +| +|.define CFRAME_SPACE, 64 // Delta for sp. +| +|.define SAVE_ERRF, 76(sp) // 32 bit C frame info. +|.define SAVE_NRES, 72(sp) +|.define SAVE_CFRAME, 68(sp) +|.define SAVE_L, 64(sp) +|//----- 8 byte aligned, ^^^^ 16 byte register save area, owned by interpreter. +|.define SAVE_GPR_, 24 // .. 24+10*4: 32 bit GPR saves. +| +|.endif +| |.define SAVE_PC, 20(sp) |.define ARG5, 16(sp) |.define CSAVE_4, 12(sp) @@ -83,43 +128,45 @@ |.define ARG5_OFS, 16 |.define SAVE_MULTRES, ARG5 | +|//----------------------------------------------------------------------- +| |.macro saveregs | addiu sp, sp, -CFRAME_SPACE | sw ra, SAVE_GPR_+9*4(sp) | sw r30, SAVE_GPR_+8*4(sp) -| sdc1 f30, SAVE_FPR_+5*8(sp) +| .FPU sdc1 f30, SAVE_FPR_+5*8(sp) | sw r23, SAVE_GPR_+7*4(sp) | sw r22, SAVE_GPR_+6*4(sp) -| sdc1 f28, SAVE_FPR_+4*8(sp) +| .FPU sdc1 f28, SAVE_FPR_+4*8(sp) | sw r21, SAVE_GPR_+5*4(sp) | sw r20, SAVE_GPR_+4*4(sp) -| sdc1 f26, SAVE_FPR_+3*8(sp) +| .FPU sdc1 f26, SAVE_FPR_+3*8(sp) | sw r19, SAVE_GPR_+3*4(sp) | sw r18, SAVE_GPR_+2*4(sp) -| sdc1 f24, SAVE_FPR_+2*8(sp) +| .FPU sdc1 f24, SAVE_FPR_+2*8(sp) | sw r17, SAVE_GPR_+1*4(sp) | sw r16, SAVE_GPR_+0*4(sp) -| sdc1 f22, SAVE_FPR_+1*8(sp) -| sdc1 f20, SAVE_FPR_+0*8(sp) +| .FPU sdc1 f22, SAVE_FPR_+1*8(sp) +| .FPU sdc1 f20, SAVE_FPR_+0*8(sp) |.endmacro | |.macro restoreregs_ret | lw ra, SAVE_GPR_+9*4(sp) | lw r30, SAVE_GPR_+8*4(sp) -| ldc1 f30, SAVE_FPR_+5*8(sp) +| .FPU ldc1 f30, SAVE_FPR_+5*8(sp) | lw r23, SAVE_GPR_+7*4(sp) | lw r22, SAVE_GPR_+6*4(sp) -| ldc1 f28, SAVE_FPR_+4*8(sp) +| .FPU ldc1 f28, SAVE_FPR_+4*8(sp) | lw r21, SAVE_GPR_+5*4(sp) | lw r20, SAVE_GPR_+4*4(sp) -| ldc1 f26, SAVE_FPR_+3*8(sp) +| .FPU ldc1 f26, SAVE_FPR_+3*8(sp) | lw r19, SAVE_GPR_+3*4(sp) | lw r18, SAVE_GPR_+2*4(sp) -| ldc1 f24, SAVE_FPR_+2*8(sp) +| .FPU ldc1 f24, SAVE_FPR_+2*8(sp) | lw r17, SAVE_GPR_+1*4(sp) | lw r16, SAVE_GPR_+0*4(sp) -| ldc1 f22, SAVE_FPR_+1*8(sp) -| ldc1 f20, SAVE_FPR_+0*8(sp) +| .FPU ldc1 f22, SAVE_FPR_+1*8(sp) +| .FPU ldc1 f20, SAVE_FPR_+0*8(sp) | jr ra | addiu sp, sp, CFRAME_SPACE |.endmacro @@ -138,11 +185,12 @@ |.type NODE, Node |.type NARGS8, int |.type TRACE, GCtrace +|.type SBUF, SBuf | |//----------------------------------------------------------------------- | |// Trap for not-yet-implemented parts. -|.macro NYI; .long 0xf0f0f0f0; .endmacro +|.macro NYI; .long 0xec1cf0f0; .endmacro | |// Macros to mark delay slots. |.macro ., a; a; .endmacro @@ -152,13 +200,23 @@ |//----------------------------------------------------------------------- | |// Endian-specific defines. -|.define FRAME_PC, LJ_ENDIAN_SELECT(-4,-8) -|.define FRAME_FUNC, LJ_ENDIAN_SELECT(-8,-4) -|.define HI, LJ_ENDIAN_SELECT(4,0) -|.define LO, LJ_ENDIAN_SELECT(0,4) -|.define OFS_RD, LJ_ENDIAN_SELECT(2,0) -|.define OFS_RA, LJ_ENDIAN_SELECT(1,2) -|.define OFS_OP, LJ_ENDIAN_SELECT(0,3) +|.if ENDIAN_LE +|.define FRAME_PC, -4 +|.define FRAME_FUNC, -8 +|.define HI, 4 +|.define LO, 0 +|.define OFS_RD, 2 +|.define OFS_RA, 1 +|.define OFS_OP, 0 +|.else +|.define FRAME_PC, -8 +|.define FRAME_FUNC, -4 +|.define HI, 0 +|.define LO, 4 +|.define OFS_RD, 0 +|.define OFS_RA, 2 +|.define OFS_OP, 3 +|.endif | |// Instruction decode. |.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro @@ -353,9 +411,11 @@ static void build_subroutines(BuildCtx *ctx) |. sll TMP2, TMP2, 3 |1: | addiu TMP1, TMP1, -8 - | ldc1 f0, 0(RA) + | lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) | addiu RA, RA, 8 - | sdc1 f0, 0(BASE) + | sw SFRETHI, HI(BASE) + | sw SFRETLO, LO(BASE) | bnez TMP1, <1 |. addiu BASE, BASE, 8 | @@ -424,15 +484,16 @@ static void build_subroutines(BuildCtx *ctx) | and sp, CARG1, AT |->vm_unwind_ff_eh: // Landing pad for external unwinder. | lw L, SAVE_L - | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | li TISNUM, LJ_TISNUM // Setup type comparison constants. | li TISNIL, LJ_TNIL | lw BASE, L->base | lw DISPATCH, L->glref // Setup pointer to dispatch table. - | mtc1 TMP3, TOBIT + | .FPU mtc1 TMP3, TOBIT | li TMP1, LJ_TFALSE | li_vmstate INTERP | lw PC, FRAME_PC(BASE) // Fetch PC of previous frame. - | cvt.d.s TOBIT, TOBIT + | .FPU cvt.d.s TOBIT, TOBIT | addiu RA, BASE, -8 // Results start at BASE-8. | addiu DISPATCH, DISPATCH, GG_G2DISP | sw TMP1, HI(RA) // Prepend false to error message. @@ -440,6 +501,10 @@ static void build_subroutines(BuildCtx *ctx) | b ->vm_returnc |. li RD, 16 // 2 results: false + error message. | + |->vm_unwind_stub: // Jump to exit stub from unwinder. + | jr CARG1 + |. move ra, CARG2 + | |//----------------------------------------------------------------------- |//-- Grow stack for calls ----------------------------------------------- |//----------------------------------------------------------------------- @@ -486,21 +551,23 @@ static void build_subroutines(BuildCtx *ctx) | addiu DISPATCH, DISPATCH, GG_G2DISP | sw r0, SAVE_NRES | sw r0, SAVE_ERRF - | sw TMP0, L->cframe + | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. | sw r0, SAVE_CFRAME | beqz TMP1, >3 - |. sw CARG1, SAVE_PC // Any value outside of bytecode is ok. + |. sw TMP0, L->cframe | | // Resume after yield (like a return). + | sw L, DISPATCH_GL(cur_L)(DISPATCH) | move RA, BASE | lw BASE, L->base + | li TISNUM, LJ_TISNUM // Setup type comparison constants. | lw TMP1, L->top | lw PC, FRAME_PC(BASE) - | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | subu RD, TMP1, BASE - | mtc1 TMP3, TOBIT + | .FPU mtc1 TMP3, TOBIT | sb r0, L->status - | cvt.d.s TOBIT, TOBIT + | .FPU cvt.d.s TOBIT, TOBIT | li_vmstate INTERP | addiu RD, RD, 8 | st_vmstate @@ -525,25 +592,27 @@ static void build_subroutines(BuildCtx *ctx) | |1: // Entry point for vm_pcall above (PC = ftype). | lw TMP1, L:CARG1->cframe - | sw CARG3, SAVE_NRES | move L, CARG1 - | sw CARG1, SAVE_L - | move BASE, CARG2 - | sw sp, L->cframe // Add our C frame to cframe chain. + | sw CARG3, SAVE_NRES | lw DISPATCH, L->glref // Setup pointer to dispatch table. + | sw CARG1, SAVE_L + | move BASE, CARG2 + | addiu DISPATCH, DISPATCH, GG_G2DISP | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. | sw TMP1, SAVE_CFRAME - | addiu DISPATCH, DISPATCH, GG_G2DISP + | sw sp, L->cframe // Add our C frame to cframe chain. | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). + | sw L, DISPATCH_GL(cur_L)(DISPATCH) | lw TMP2, L->base // TMP2 = old base (used in vmeta_call). - | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | li TISNUM, LJ_TISNUM // Setup type comparison constants. + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | lw TMP1, L->top - | mtc1 TMP3, TOBIT + | .FPU mtc1 TMP3, TOBIT | addu PC, PC, BASE | subu NARGS8:RC, TMP1, BASE | subu PC, PC, TMP2 // PC = frame delta + frame type - | cvt.d.s TOBIT, TOBIT + | .FPU cvt.d.s TOBIT, TOBIT | li_vmstate INTERP | li TISNIL, LJ_TNIL | st_vmstate @@ -566,20 +635,21 @@ static void build_subroutines(BuildCtx *ctx) | lw TMP0, L:CARG1->stack | sw CARG1, SAVE_L | lw TMP1, L->top + | lw DISPATCH, L->glref // Setup pointer to dispatch table. | sw CARG1, SAVE_PC // Any value outside of bytecode is ok. | subu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). | lw TMP1, L->cframe - | sw sp, L->cframe // Add our C frame to cframe chain. + | addiu DISPATCH, DISPATCH, GG_G2DISP | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. | sw r0, SAVE_ERRF // No error function. - | move CFUNCADDR, CARG4 + | sw TMP1, SAVE_CFRAME + | sw sp, L->cframe // Add our C frame to cframe chain. + | sw L, DISPATCH_GL(cur_L)(DISPATCH) | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) - |. sw TMP1, SAVE_CFRAME + |. move CFUNCADDR, CARG4 | move BASE, CRET1 - | lw DISPATCH, L->glref // Setup pointer to dispatch table. - | li PC, FRAME_CP | bnez CRET1, <3 // Else continue with the call. - |. addiu DISPATCH, DISPATCH, GG_G2DISP + |. li PC, FRAME_CP | b ->vm_leave_cp // No base? Just remove C frame. |. nop | @@ -624,7 +694,8 @@ static void build_subroutines(BuildCtx *ctx) |->cont_cat: // RA = resultptr, RB = meta base | lw INS, -4(PC) | addiu CARG2, RB, -16 - | ldc1 f0, 0(RA) + | lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) | decode_RB8a MULTRES, INS | decode_RA8a RA, INS | decode_RB8b MULTRES @@ -632,11 +703,13 @@ static void build_subroutines(BuildCtx *ctx) | addu TMP1, BASE, MULTRES | sw BASE, L->base | subu CARG3, CARG2, TMP1 + | sw SFRETHI, HI(CARG2) | bne TMP1, CARG2, ->BC_CAT_Z - |. sdc1 f0, 0(CARG2) + |. sw SFRETLO, LO(CARG2) | addu RA, BASE, RA + | sw SFRETHI, HI(RA) | b ->cont_nop - |. sdc1 f0, 0(RA) + |. sw SFRETLO, LO(RA) | |//-- Table indexing metamethods ----------------------------------------- | @@ -659,10 +732,9 @@ static void build_subroutines(BuildCtx *ctx) |. sw TMP1, HI(CARG3) | |->vmeta_tgetb: // TMP0 = index - | mtc1 TMP0, f0 - | cvt.d.w f0, f0 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | sdc1 f0, 0(CARG3) + | sw TMP0, LO(CARG3) + | sw TISNUM, HI(CARG3) | |->vmeta_tgetv: |1: @@ -674,9 +746,11 @@ static void build_subroutines(BuildCtx *ctx) | // Returns TValue * (finished) or NULL (metamethod). | beqz CRET1, >3 |. addiu TMP1, BASE, -FRAME_CONT - | ldc1 f0, 0(CRET1) + | lw SFARG1HI, HI(CRET1) + | lw SFARG2HI, LO(CRET1) | ins_next1 - | sdc1 f0, 0(RA) + | sw SFARG1HI, HI(RA) + | sw SFARG2HI, LO(RA) | ins_next2 | |3: // Call __index metamethod. @@ -688,6 +762,17 @@ static void build_subroutines(BuildCtx *ctx) | b ->vm_call_dispatch_f |. li NARGS8:RC, 16 // 2 args for func(t, k). | + |->vmeta_tgetr: + | load_got lj_tab_getinth + | call_intern lj_tab_getinth // (GCtab *t, int32_t key) + |. nop + | // Returns cTValue * or NULL. + | beqz CRET1, ->BC_TGETR_Z + |. move SFARG2HI, TISNIL + | lw SFARG2HI, HI(CRET1) + | b ->BC_TGETR_Z + |. lw SFARG2LO, LO(CRET1) + | |//----------------------------------------------------------------------- | |->vmeta_tsets1: @@ -709,10 +794,9 @@ static void build_subroutines(BuildCtx *ctx) |. sw TMP1, HI(CARG3) | |->vmeta_tsetb: // TMP0 = index - | mtc1 TMP0, f0 - | cvt.d.w f0, f0 | addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) - | sdc1 f0, 0(CARG3) + | sw TMP0, LO(CARG3) + | sw TISNUM, HI(CARG3) | |->vmeta_tsetv: |1: @@ -722,11 +806,13 @@ static void build_subroutines(BuildCtx *ctx) | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) |. move CARG1, L | // Returns TValue * (finished) or NULL (metamethod). + | lw SFARG1HI, HI(RA) | beqz CRET1, >3 - |. ldc1 f0, 0(RA) + |. lw SFARG1LO, LO(RA) | // NOBARRIER: lj_meta_tset ensures the table is not black. | ins_next1 - | sdc1 f0, 0(CRET1) + | sw SFARG1HI, HI(CRET1) + | sw SFARG1LO, LO(CRET1) | ins_next2 | |3: // Call __newindex metamethod. @@ -736,14 +822,27 @@ static void build_subroutines(BuildCtx *ctx) | sw PC, -16+HI(BASE) // [cont|PC] | subu PC, BASE, TMP1 | lw LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | sdc1 f0, 16(BASE) // Copy value to third argument. + | sw SFARG1HI, 16+HI(BASE) // Copy value to third argument. + | sw SFARG1LO, 16+LO(BASE) | b ->vm_call_dispatch_f |. li NARGS8:RC, 24 // 3 args for func(t, k, v) | + |->vmeta_tsetr: + | load_got lj_tab_setinth + | sw BASE, L->base + | sw PC, SAVE_PC + | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + |. move CARG1, L + | // Returns TValue *. + | b ->BC_TSETR_Z + |. nop + | |//-- Comparison metamethods --------------------------------------------- | |->vmeta_comp: - | // CARG2, CARG3 are already set by BC_ISLT/BC_ISGE/BC_ISLE/BC_ISGT. + | // RA/RD point to o1/o2. + | move CARG2, RA + | move CARG3, RD | load_got lj_meta_comp | addiu PC, PC, -4 | sw BASE, L->base @@ -769,11 +868,13 @@ static void build_subroutines(BuildCtx *ctx) | |->cont_ra: // RA = resultptr | lbu TMP1, -4+OFS_RA(PC) - | ldc1 f0, 0(RA) + | lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) | sll TMP1, TMP1, 3 | addu TMP1, BASE, TMP1 + | sw SFRETHI, HI(TMP1) | b ->cont_nop - |. sdc1 f0, 0(TMP1) + |. sw SFRETLO, LO(TMP1) | |->cont_condt: // RA = resultptr | lw TMP0, HI(RA) @@ -788,8 +889,11 @@ static void build_subroutines(BuildCtx *ctx) |. addiu TMP2, AT, -1 // Branch if result is false. | |->vmeta_equal: - | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. + | // SFARG1LO/SFARG2LO point to o1/o2. TMP0 is set to 0/1. | load_got lj_meta_equal + | move CARG2, SFARG1LO + | move CARG3, SFARG2LO + | move CARG4, TMP0 | addiu PC, PC, -4 | sw BASE, L->base | sw PC, SAVE_PC @@ -813,17 +917,31 @@ static void build_subroutines(BuildCtx *ctx) |. nop |.endif | + |->vmeta_istype: + | load_got lj_meta_istype + | addiu PC, PC, -4 + | sw BASE, L->base + | srl CARG2, RA, 3 + | srl CARG3, RD, 3 + | sw PC, SAVE_PC + | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) + |. move CARG1, L + | b ->cont_nop + |. nop + | |//-- Arithmetic metamethods --------------------------------------------- | |->vmeta_unm: - | move CARG4, CARG3 + | move RC, RB | |->vmeta_arith: | load_got lj_meta_arith | decode_OP1 TMP0, INS | sw BASE, L->base - | sw PC, SAVE_PC | move CARG2, RA + | sw PC, SAVE_PC + | move CARG3, RB + | move CARG4, RC | sw TMP0, ARG5 | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) |. move CARG1, L @@ -931,40 +1049,52 @@ static void build_subroutines(BuildCtx *ctx) | |.macro .ffunc_1, name |->ff_ .. name: + | lw SFARG1HI, HI(BASE) | beqz NARGS8:RC, ->fff_fallback - |. lw CARG3, HI(BASE) - | lw CARG1, LO(BASE) + |. lw SFARG1LO, LO(BASE) |.endmacro | |.macro .ffunc_2, name |->ff_ .. name: | sltiu AT, NARGS8:RC, 16 - | lw CARG3, HI(BASE) + | lw SFARG1HI, HI(BASE) | bnez AT, ->fff_fallback - |. lw CARG4, 8+HI(BASE) - | lw CARG1, LO(BASE) - | lw CARG2, 8+LO(BASE) + |. lw SFARG2HI, 8+HI(BASE) + | lw SFARG1LO, LO(BASE) + | lw SFARG2LO, 8+LO(BASE) |.endmacro | |.macro .ffunc_n, name // Caveat: has delay slot! |->ff_ .. name: - | lw CARG3, HI(BASE) + | lw SFARG1HI, HI(BASE) + |.if FPU + | ldc1 FARG1, 0(BASE) + |.else + | lw SFARG1LO, LO(BASE) + |.endif | beqz NARGS8:RC, ->fff_fallback - |. ldc1 FARG1, 0(BASE) - | sltiu AT, CARG3, LJ_TISNUM + |. sltiu AT, SFARG1HI, LJ_TISNUM | beqz AT, ->fff_fallback |.endmacro | |.macro .ffunc_nn, name // Caveat: has delay slot! |->ff_ .. name: | sltiu AT, NARGS8:RC, 16 - | lw CARG3, HI(BASE) + | lw SFARG1HI, HI(BASE) | bnez AT, ->fff_fallback - |. lw CARG4, 8+HI(BASE) - | ldc1 FARG1, 0(BASE) - | ldc1 FARG2, 8(BASE) - | sltiu TMP0, CARG3, LJ_TISNUM - | sltiu TMP1, CARG4, LJ_TISNUM + |. lw SFARG2HI, 8+HI(BASE) + | sltiu TMP0, SFARG1HI, LJ_TISNUM + |.if FPU + | ldc1 FARG1, 0(BASE) + |.else + | lw SFARG1LO, LO(BASE) + |.endif + | sltiu TMP1, SFARG2HI, LJ_TISNUM + |.if FPU + | ldc1 FARG2, 8(BASE) + |.else + | lw SFARG2LO, 8+LO(BASE) + |.endif | and TMP0, TMP0, TMP1 | beqz TMP0, ->fff_fallback |.endmacro @@ -980,53 +1110,55 @@ static void build_subroutines(BuildCtx *ctx) |//-- Base library: checks ----------------------------------------------- | |.ffunc_1 assert - | sltiu AT, CARG3, LJ_TISTRUECOND + | sltiu AT, SFARG1HI, LJ_TISTRUECOND | beqz AT, ->fff_fallback |. addiu RA, BASE, -8 | lw PC, FRAME_PC(BASE) | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. | addu TMP2, RA, NARGS8:RC - | sw CARG3, HI(RA) + | sw SFARG1HI, HI(RA) | addiu TMP1, BASE, 8 | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. - |. sw CARG1, LO(RA) + |. sw SFARG1LO, LO(RA) |1: - | ldc1 f0, 0(TMP1) - | sdc1 f0, -8(TMP1) + | lw SFRETHI, HI(TMP1) + | lw SFRETLO, LO(TMP1) + | sw SFRETHI, -8+HI(TMP1) + | sw SFRETLO, -8+LO(TMP1) | bne TMP1, TMP2, <1 |. addiu TMP1, TMP1, 8 | b ->fff_res |. nop | |.ffunc type - | lw CARG3, HI(BASE) - | li TMP1, LJ_TISNUM + | lw SFARG1HI, HI(BASE) | beqz NARGS8:RC, ->fff_fallback - |. sltiu TMP0, CARG3, LJ_TISNUM - | movz TMP1, CARG3, TMP0 - | not TMP1, TMP1 + |. sltiu TMP0, SFARG1HI, LJ_TISNUM + | movn SFARG1HI, TISNUM, TMP0 + | not TMP1, SFARG1HI | sll TMP1, TMP1, 3 | addu TMP1, CFUNC:RB, TMP1 - | b ->fff_resn - |. ldc1 FRET1, CFUNC:TMP1->upvalue + | lw SFARG1HI, CFUNC:TMP1->upvalue[0].u32.hi + | b ->fff_restv + |. lw SFARG1LO, CFUNC:TMP1->upvalue[0].u32.lo | |//-- Base library: getters and setters --------------------------------- | |.ffunc_1 getmetatable | li AT, LJ_TTAB - | bne CARG3, AT, >6 + | bne SFARG1HI, AT, >6 |. li AT, LJ_TUDATA |1: // Field metatable must be at same offset for GCtab and GCudata! - | lw TAB:CARG1, TAB:CARG1->metatable + | lw TAB:SFARG1LO, TAB:SFARG1LO->metatable |2: | lw STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) - | beqz TAB:CARG1, ->fff_restv - |. li CARG3, LJ_TNIL - | lw TMP0, TAB:CARG1->hmask - | li CARG3, LJ_TTAB // Use metatable as default result. - | lw TMP1, STR:RC->hash - | lw NODE:TMP2, TAB:CARG1->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | beqz TAB:SFARG1LO, ->fff_restv + |. li SFARG1HI, LJ_TNIL + | lw TMP0, TAB:SFARG1LO->hmask + | li SFARG1HI, LJ_TTAB // Use metatable as default result. + | lw TMP1, STR:RC->sid + | lw NODE:TMP2, TAB:SFARG1LO->node + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | sll TMP0, TMP1, 5 | sll TMP1, TMP1, 3 | subu TMP1, TMP0, TMP1 @@ -1037,7 +1169,7 @@ static void build_subroutines(BuildCtx *ctx) | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) | lw NODE:TMP3, NODE:TMP2->next | bne CARG4, AT, >4 - |. lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) + |. lw CARG3, offsetof(Node, val)+HI(NODE:TMP2) | beq TMP0, STR:RC, >5 |. lw TMP1, offsetof(Node, val)+LO(NODE:TMP2) |4: @@ -1046,36 +1178,35 @@ static void build_subroutines(BuildCtx *ctx) | b <3 |. nop |5: - | beq CARG2, TISNIL, ->fff_restv // Ditto for nil value. + | beq CARG3, TISNIL, ->fff_restv // Ditto for nil value. |. nop - | move CARG3, CARG2 // Return value of mt.__metatable. + | move SFARG1HI, CARG3 // Return value of mt.__metatable. | b ->fff_restv - |. move CARG1, TMP1 + |. move SFARG1LO, TMP1 | |6: - | beq CARG3, AT, <1 - |. sltiu TMP0, CARG3, LJ_TISNUM - | li TMP1, LJ_TISNUM - | movz TMP1, CARG3, TMP0 - | not TMP1, TMP1 + | beq SFARG1HI, AT, <1 + |. sltu AT, TISNUM, SFARG1HI + | movz SFARG1HI, TISNUM, AT + | not TMP1, SFARG1HI | sll TMP1, TMP1, 2 | addu TMP1, DISPATCH, TMP1 | b <2 - |. lw TAB:CARG1, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1) + |. lw TAB:SFARG1LO, DISPATCH_GL(gcroot[GCROOT_BASEMT])(TMP1) | |.ffunc_2 setmetatable | // Fast path: no mt for table yet and not clearing the mt. | li AT, LJ_TTAB - | bne CARG3, AT, ->fff_fallback - |. addiu CARG4, CARG4, -LJ_TTAB - | lw TAB:TMP1, TAB:CARG1->metatable - | lbu TMP3, TAB:CARG1->marked - | or AT, CARG4, TAB:TMP1 + | bne SFARG1HI, AT, ->fff_fallback + |. addiu SFARG2HI, SFARG2HI, -LJ_TTAB + | lw TAB:TMP1, TAB:SFARG1LO->metatable + | lbu TMP3, TAB:SFARG1LO->marked + | or AT, SFARG2HI, TAB:TMP1 | bnez AT, ->fff_fallback |. andi AT, TMP3, LJ_GC_BLACK // isblack(table) | beqz AT, ->fff_restv - |. sw TAB:CARG2, TAB:CARG1->metatable - | barrierback TAB:CARG1, TMP3, TMP0, ->fff_restv + |. sw TAB:SFARG2LO, TAB:SFARG1LO->metatable + | barrierback TAB:SFARG1LO, TMP3, TMP0, ->fff_restv | |.ffunc rawget | lw CARG4, HI(BASE) @@ -1089,90 +1220,89 @@ static void build_subroutines(BuildCtx *ctx) | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) |. move CARG1, L | // Returns cTValue *. - | b ->fff_resn - |. ldc1 FRET1, 0(CRET1) + | lw SFARG1HI, HI(CRET1) + | b ->fff_restv + |. lw SFARG1LO, LO(CRET1) | |//-- Base library: conversions ------------------------------------------ | |.ffunc tonumber | // Only handles the number case inline (without a base argument). | lw CARG1, HI(BASE) - | xori AT, NARGS8:RC, 8 - | sltiu CARG1, CARG1, LJ_TISNUM - | movn CARG1, r0, AT - | beqz CARG1, ->fff_fallback // Exactly one number argument. - |. ldc1 FRET1, 0(BASE) - | b ->fff_resn - |. nop + | xori AT, NARGS8:RC, 8 // Exactly one number argument. + | sltu TMP0, TISNUM, CARG1 + | or AT, AT, TMP0 + | bnez AT, ->fff_fallback + |. lw SFARG1HI, HI(BASE) + | b ->fff_restv + |. lw SFARG1LO, LO(BASE) | |.ffunc_1 tostring | // Only handles the string or number case inline. | li AT, LJ_TSTR | // A __tostring method in the string base metatable is ignored. - | beq CARG3, AT, ->fff_restv // String key? + | beq SFARG1HI, AT, ->fff_restv // String key? | // Handle numbers inline, unless a number base metatable is present. |. lw TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) - | sltiu TMP0, CARG3, LJ_TISNUM - | sltiu TMP1, TMP1, 1 - | and TMP0, TMP0, TMP1 - | beqz TMP0, ->fff_fallback + | sltu TMP0, TISNUM, SFARG1HI + | or TMP0, TMP0, TMP1 + | bnez TMP0, ->fff_fallback |. sw BASE, L->base // Add frame since C call can throw. | ffgccheck |. sw PC, SAVE_PC // Redundant (but a defined value). - | load_got lj_str_fromnum + | load_got lj_strfmt_number | move CARG1, L - | call_intern lj_str_fromnum // (lua_State *L, lua_Number *np) + | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) |. move CARG2, BASE | // Returns GCstr *. - | li CARG3, LJ_TSTR + | li SFARG1HI, LJ_TSTR | b ->fff_restv - |. move CARG1, CRET1 + |. move SFARG1LO, CRET1 | |//-- Base library: iterators ------------------------------------------- | |.ffunc next - | lw CARG1, HI(BASE) - | lw TAB:CARG2, LO(BASE) + | lw CARG2, HI(BASE) + | lw TAB:CARG1, LO(BASE) | beqz NARGS8:RC, ->fff_fallback |. addu TMP2, BASE, NARGS8:RC | li AT, LJ_TTAB | sw TISNIL, HI(TMP2) // Set missing 2nd arg to nil. - | bne CARG1, AT, ->fff_fallback + | bne CARG2, AT, ->fff_fallback |. lw PC, FRAME_PC(BASE) | load_got lj_tab_next - | sw BASE, L->base // Add frame since C call can throw. - | sw BASE, L->top // Dummy frame length is ok. - | addiu CARG3, BASE, 8 - | sw PC, SAVE_PC - | call_intern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - |. move CARG1, L - | // Returns 0 at end of traversal. + | addiu CARG2, BASE, 8 + | call_intern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + |. addiu CARG3, BASE, -8 + | // Returns 1=found, 0=end, -1=error. + | addiu RA, BASE, -8 + | bgtz CRET1, ->fff_res // Found key/value. + |. li RD, (2+1)*8 | beqz CRET1, ->fff_restv // End of traversal: return nil. - |. li CARG3, LJ_TNIL - | ldc1 f0, 8(BASE) // Copy key and value to results. - | addiu RA, BASE, -8 - | ldc1 f2, 16(BASE) - | li RD, (2+1)*8 - | sdc1 f0, 0(RA) - | b ->fff_res - |. sdc1 f2, 8(RA) + |. li SFARG1HI, LJ_TNIL + | lw CFUNC:RB, FRAME_FUNC(BASE) + | b ->fff_fallback // Invalid key. + |. li RC, 2*8 | |.ffunc_1 pairs | li AT, LJ_TTAB - | bne CARG3, AT, ->fff_fallback + | bne SFARG1HI, AT, ->fff_fallback |. lw PC, FRAME_PC(BASE) #if LJ_52 - | lw TAB:TMP2, TAB:CARG1->metatable - | ldc1 f0, CFUNC:RB->upvalue[0] + | lw TAB:TMP2, TAB:SFARG1LO->metatable + | lw TMP0, CFUNC:RB->upvalue[0].u32.hi + | lw TMP1, CFUNC:RB->upvalue[0].u32.lo | bnez TAB:TMP2, ->fff_fallback #else - | ldc1 f0, CFUNC:RB->upvalue[0] + | lw TMP0, CFUNC:RB->upvalue[0].u32.hi + | lw TMP1, CFUNC:RB->upvalue[0].u32.lo #endif |. addiu RA, BASE, -8 | sw TISNIL, 8+HI(BASE) - | li RD, (3+1)*8 + | sw TMP0, HI(RA) + | sw TMP1, LO(RA) | b ->fff_res - |. sdc1 f0, 0(RA) + |. li RD, (3+1)*8 | |.ffunc ipairs_aux | sltiu AT, NARGS8:RC, 16 @@ -1180,35 +1310,32 @@ static void build_subroutines(BuildCtx *ctx) | lw TAB:CARG1, LO(BASE) | lw CARG4, 8+HI(BASE) | bnez AT, ->fff_fallback - |. ldc1 FARG2, 8(BASE) - | addiu CARG3, CARG3, -LJ_TTAB - | sltiu AT, CARG4, LJ_TISNUM - | li TMP0, 1 - | movn AT, r0, CARG3 - | mtc1 TMP0, FARG1 - | beqz AT, ->fff_fallback + |. addiu CARG3, CARG3, -LJ_TTAB + | xor CARG4, CARG4, TISNUM + | and AT, CARG3, CARG4 + | bnez AT, ->fff_fallback |. lw PC, FRAME_PC(BASE) - | cvt.w.d FRET1, FARG2 - | cvt.d.w FARG1, FARG1 + | lw TMP2, 8+LO(BASE) | lw TMP0, TAB:CARG1->asize | lw TMP1, TAB:CARG1->array - | mfc1 TMP2, FRET1 - | addiu RA, BASE, -8 - | add.d FARG2, FARG2, FARG1 | addiu TMP2, TMP2, 1 + | sw TISNUM, -8+HI(BASE) | sltu AT, TMP2, TMP0 + | sw TMP2, -8+LO(BASE) + | beqz AT, >2 // Not in array part? + |. addiu RA, BASE, -8 | sll TMP3, TMP2, 3 | addu TMP3, TMP1, TMP3 - | beqz AT, >2 // Not in array part? - |. sdc1 FARG2, 0(RA) - | lw TMP2, HI(TMP3) - | ldc1 f0, 0(TMP3) + | lw TMP1, HI(TMP3) + | lw TMP2, LO(TMP3) |1: - | beq TMP2, TISNIL, ->fff_res // End of iteration, return 0 results. + | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. |. li RD, (0+1)*8 - | li RD, (2+1)*8 + | sw TMP1, 8+HI(RA) + | sw TMP2, 8+LO(RA) | b ->fff_res - |. sdc1 f0, 8(RA) + |. li RD, (2+1)*8 + | |2: // Check for empty hash part first. Otherwise call C function. | lw TMP0, TAB:CARG1->hmask | load_got lj_tab_getinth @@ -1219,27 +1346,30 @@ static void build_subroutines(BuildCtx *ctx) | // Returns cTValue * or NULL. | beqz CRET1, ->fff_res |. li RD, (0+1)*8 - | lw TMP2, HI(CRET1) + | lw TMP1, HI(CRET1) | b <1 - |. ldc1 f0, 0(CRET1) + |. lw TMP2, LO(CRET1) | |.ffunc_1 ipairs | li AT, LJ_TTAB - | bne CARG3, AT, ->fff_fallback + | bne SFARG1HI, AT, ->fff_fallback |. lw PC, FRAME_PC(BASE) #if LJ_52 - | lw TAB:TMP2, TAB:CARG1->metatable - | ldc1 f0, CFUNC:RB->upvalue[0] + | lw TAB:TMP2, TAB:SFARG1LO->metatable + | lw TMP0, CFUNC:RB->upvalue[0].u32.hi + | lw TMP1, CFUNC:RB->upvalue[0].u32.lo | bnez TAB:TMP2, ->fff_fallback #else - | ldc1 f0, CFUNC:RB->upvalue[0] + | lw TMP0, CFUNC:RB->upvalue[0].u32.hi + | lw TMP1, CFUNC:RB->upvalue[0].u32.lo #endif |. addiu RA, BASE, -8 - | sw r0, 8+HI(BASE) + | sw TISNUM, 8+HI(BASE) | sw r0, 8+LO(BASE) - | li RD, (3+1)*8 + | sw TMP0, HI(RA) + | sw TMP1, LO(RA) | b ->fff_res - |. sdc1 f0, 0(RA) + |. li RD, (3+1)*8 | |//-- Base library: catch errors ---------------------------------------- | @@ -1259,8 +1389,9 @@ static void build_subroutines(BuildCtx *ctx) | sltiu AT, NARGS8:RC, 16 | lw CARG4, 8+HI(BASE) | bnez AT, ->fff_fallback - |. ldc1 FARG2, 8(BASE) - | ldc1 FARG1, 0(BASE) + |. lw CARG3, 8+LO(BASE) + | lw CARG1, LO(BASE) + | lw CARG2, HI(BASE) | lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) | li AT, LJ_TFUNC | move TMP2, BASE @@ -1268,9 +1399,11 @@ static void build_subroutines(BuildCtx *ctx) | addiu BASE, BASE, 16 | // Remember active hook before pcall. | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT - | sdc1 FARG2, 0(TMP2) // Swap function and traceback. + | sw CARG3, LO(TMP2) // Swap function and traceback. + | sw CARG4, HI(TMP2) | andi TMP3, TMP3, 1 - | sdc1 FARG1, 8(TMP2) + | sw CARG1, 8+LO(TMP2) + | sw CARG2, 8+HI(TMP2) | addiu PC, TMP3, 16+FRAME_PCALL | b ->vm_call_dispatch |. addiu NARGS8:RC, NARGS8:RC, -16 @@ -1279,7 +1412,10 @@ static void build_subroutines(BuildCtx *ctx) | |.macro coroutine_resume_wrap, resume |.if resume - |.ffunc_1 coroutine_resume + |.ffunc coroutine_resume + | lw CARG3, HI(BASE) + | beqz NARGS8:RC, ->fff_fallback + |. lw CARG1, LO(BASE) | li AT, LJ_TTHREAD | bne CARG3, AT, ->fff_fallback |.else @@ -1314,11 +1450,13 @@ static void build_subroutines(BuildCtx *ctx) | move CARG3, CARG2 | sw BASE, L->top |2: // Move args to coroutine. - | ldc1 f0, 0(BASE) + | lw SFRETHI, HI(BASE) + | lw SFRETLO, LO(BASE) | sltu AT, BASE, TMP1 | beqz AT, >3 |. addiu BASE, BASE, 8 - | sdc1 f0, 0(CARG3) + | sw SFRETHI, HI(CARG3) + | sw SFRETLO, LO(CARG3) | b <2 |. addiu CARG3, CARG3, 8 |3: @@ -1331,6 +1469,7 @@ static void build_subroutines(BuildCtx *ctx) | lw TMP3, L:RA->top | li_vmstate INTERP | lw BASE, L->base + | sw L, DISPATCH_GL(cur_L)(DISPATCH) | st_vmstate | beqz AT, >8 |. subu RD, TMP3, TMP2 @@ -1343,10 +1482,12 @@ static void build_subroutines(BuildCtx *ctx) | sw TMP2, L:RA->top // Clear coroutine stack. | move TMP1, BASE |5: // Move results from coroutine. - | ldc1 f0, 0(TMP2) + | lw SFRETHI, HI(TMP2) + | lw SFRETLO, LO(TMP2) | addiu TMP2, TMP2, 8 | sltu AT, TMP2, TMP3 - | sdc1 f0, 0(TMP1) + | sw SFRETHI, HI(TMP1) + | sw SFRETLO, LO(TMP1) | bnez AT, <5 |. addiu TMP1, TMP1, 8 |6: @@ -1371,12 +1512,14 @@ static void build_subroutines(BuildCtx *ctx) |.if resume | addiu TMP3, TMP3, -8 | li TMP1, LJ_TFALSE - | ldc1 f0, 0(TMP3) + | lw SFRETHI, HI(TMP3) + | lw SFRETLO, LO(TMP3) | sw TMP3, L:RA->top // Remove error from coroutine stack. | li RD, (2+1)*8 | sw TMP1, -8+HI(BASE) // Prepend false to results. | addiu RA, BASE, -8 - | sdc1 f0, 0(BASE) // Copy error message. + | sw SFRETHI, HI(BASE) // Copy error message. + | sw SFRETLO, LO(BASE) | b <7 |. andi TMP0, PC, FRAME_TYPE |.else @@ -1412,20 +1555,29 @@ static void build_subroutines(BuildCtx *ctx) | |//-- Math library ------------------------------------------------------- | - |.ffunc_n math_abs - |. abs.d FRET1, FARG1 - |->fff_resn: - | lw PC, FRAME_PC(BASE) - | addiu RA, BASE, -8 - | b ->fff_res1 - |. sdc1 FRET1, -8(BASE) + |.ffunc_1 math_abs + | bne SFARG1HI, TISNUM, >1 + |. sra TMP0, SFARG1LO, 31 + | xor TMP1, SFARG1LO, TMP0 + | subu SFARG1LO, TMP1, TMP0 + | bgez SFARG1LO, ->fff_restv + |. nop + | lui SFARG1HI, 0x41e0 // 2^31 as a double. + | b ->fff_restv + |. li SFARG1LO, 0 + |1: + | sltiu AT, SFARG1HI, LJ_TISNUM + | beqz AT, ->fff_fallback + |. sll SFARG1HI, SFARG1HI, 1 + | srl SFARG1HI, SFARG1HI, 1 + |// fallthrough | |->fff_restv: - | // CARG3/CARG1 = TValue result. + | // SFARG1LO/SFARG1HI = TValue result. | lw PC, FRAME_PC(BASE) - | sw CARG3, -8+HI(BASE) + | sw SFARG1HI, -8+HI(BASE) | addiu RA, BASE, -8 - | sw CARG1, -8+LO(BASE) + | sw SFARG1LO, -8+LO(BASE) |->fff_res1: | // RA = results, PC = return. | li RD, (1+1)*8 @@ -1454,15 +1606,19 @@ static void build_subroutines(BuildCtx *ctx) |. sw TISNIL, -8+HI(TMP1) | |.macro math_extern, func - |->ff_math_ .. func: - | lw CARG3, HI(BASE) + | .ffunc math_ .. func + | lw SFARG1HI, HI(BASE) | beqz NARGS8:RC, ->fff_fallback |. load_got func - | sltiu AT, CARG3, LJ_TISNUM + | sltiu AT, SFARG1HI, LJ_TISNUM | beqz AT, ->fff_fallback - |. nop - | call_extern + |.if FPU |. ldc1 FARG1, 0(BASE) + |.else + |. lw SFARG1LO, LO(BASE) + |.endif + | call_extern + |. nop | b ->fff_resn |. nop |.endmacro @@ -1476,10 +1632,22 @@ static void build_subroutines(BuildCtx *ctx) |. nop |.endmacro | + |// TODO: Return integer type if result is integer (own sf implementation). |.macro math_round, func - | .ffunc_n math_ .. func - |. nop + |->ff_math_ .. func: + | lw SFARG1HI, HI(BASE) + | beqz NARGS8:RC, ->fff_fallback + |. lw SFARG1LO, LO(BASE) + | beq SFARG1HI, TISNUM, ->fff_restv + |. sltu AT, SFARG1HI, TISNUM + | beqz AT, ->fff_fallback + |.if FPU + |. ldc1 FARG1, 0(BASE) | bal ->vm_ .. func + |.else + |. load_got func + | call_extern + |.endif |. nop | b ->fff_resn |. nop @@ -1489,15 +1657,19 @@ static void build_subroutines(BuildCtx *ctx) | math_round ceil | |.ffunc math_log - | lw CARG3, HI(BASE) | li AT, 8 | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. - |. load_got log - | sltiu AT, CARG3, LJ_TISNUM + |. lw SFARG1HI, HI(BASE) + | sltiu AT, SFARG1HI, LJ_TISNUM | beqz AT, ->fff_fallback - |. nop + |. load_got log + |.if FPU | call_extern |. ldc1 FARG1, 0(BASE) + |.else + | call_extern + |. lw SFARG1LO, LO(BASE) + |.endif | b ->fff_resn |. nop | @@ -1516,23 +1688,43 @@ static void build_subroutines(BuildCtx *ctx) | math_extern2 atan2 | math_extern2 fmod | + |.if FPU |.ffunc_n math_sqrt |. sqrt.d FRET1, FARG1 - | b ->fff_resn - |. nop + |// fallthrough to ->fff_resn + |.else + | math_extern sqrt + |.endif + | + |->fff_resn: + | lw PC, FRAME_PC(BASE) + | addiu RA, BASE, -8 + |.if FPU + | b ->fff_res1 + |. sdc1 FRET1, -8(BASE) + |.else + | sw SFRETHI, -8+HI(BASE) + | b ->fff_res1 + |. sw SFRETLO, -8+LO(BASE) + |.endif | - |->ff_math_deg: - |.ffunc_n math_rad - |. ldc1 FARG2, CFUNC:RB->upvalue[0] - | b ->fff_resn - |. mul.d FRET1, FARG1, FARG2 | - |.ffunc_nn math_ldexp - | cvt.w.d FARG2, FARG2 + |.ffunc math_ldexp + | sltiu AT, NARGS8:RC, 16 + | lw SFARG1HI, HI(BASE) + | bnez AT, ->fff_fallback + |. lw CARG4, 8+HI(BASE) + | bne CARG4, TISNUM, ->fff_fallback | load_got ldexp - | mfc1 CARG3, FARG2 + |. sltu AT, SFARG1HI, TISNUM + | beqz AT, ->fff_fallback + |.if FPU + |. ldc1 FARG1, 0(BASE) + |.else + |. lw SFARG1LO, LO(BASE) + |.endif | call_extern - |. nop + |. lw CARG3, 8+LO(BASE) | b ->fff_resn |. nop | @@ -1543,10 +1735,17 @@ static void build_subroutines(BuildCtx *ctx) |. addiu CARG3, DISPATCH, DISPATCH_GL(tmptv) | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) | addiu RA, BASE, -8 + |.if FPU | mtc1 TMP1, FARG2 | sdc1 FRET1, 0(RA) | cvt.d.w FARG2, FARG2 | sdc1 FARG2, 8(RA) + |.else + | sw SFRETLO, LO(RA) + | sw SFRETHI, HI(RA) + | sw TMP1, 8+LO(RA) + | sw TISNUM, 8+HI(RA) + |.endif | b ->fff_res |. li RD, (2+1)*8 | @@ -1556,49 +1755,109 @@ static void build_subroutines(BuildCtx *ctx) | call_extern |. addiu CARG3, BASE, -8 | addiu RA, BASE, -8 + |.if FPU | sdc1 FRET1, 0(BASE) + |.else + | sw SFRETLO, LO(BASE) + | sw SFRETHI, HI(BASE) + |.endif | b ->fff_res |. li RD, (2+1)*8 | - |.macro math_minmax, name, ismax - |->ff_ .. name: - | lw CARG3, HI(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. ldc1 FRET1, 0(BASE) - | sltiu AT, CARG3, LJ_TISNUM + |.macro math_minmax, name, intins, ismax + | .ffunc_1 name + | addu TMP3, BASE, NARGS8:RC + | bne SFARG1HI, TISNUM, >5 + |. addiu TMP2, BASE, 8 + |1: // Handle integers. + |. lw SFARG2HI, HI(TMP2) + | beq TMP2, TMP3, ->fff_restv + |. lw SFARG2LO, LO(TMP2) + | bne SFARG2HI, TISNUM, >3 + |. slt AT, SFARG1LO, SFARG2LO + | intins SFARG1LO, SFARG2LO, AT + | b <1 + |. addiu TMP2, TMP2, 8 + | + |3: // Convert intermediate result to number and continue with number loop. + | sltiu AT, SFARG2HI, LJ_TISNUM | beqz AT, ->fff_fallback - |. addu TMP2, BASE, NARGS8:RC - | addiu TMP1, BASE, 8 - | beq TMP1, TMP2, ->fff_resn - |1: - |. lw CARG3, HI(TMP1) - | ldc1 FARG1, 0(TMP1) - | addiu TMP1, TMP1, 8 - | sltiu AT, CARG3, LJ_TISNUM + |.if FPU + |. mtc1 SFARG1LO, FRET1 + | cvt.d.w FRET1, FRET1 + | b >7 + |. ldc1 FARG1, 0(TMP2) + |.else + |. nop + | bal ->vm_sfi2d_1 + |. nop + | b >7 + |. nop + |.endif + | + |5: + |. sltiu AT, SFARG1HI, LJ_TISNUM | beqz AT, ->fff_fallback + |.if FPU + |. ldc1 FRET1, 0(BASE) + |.endif + | + |6: // Handle numbers. + |. lw SFARG2HI, HI(TMP2) + |.if FPU + | beq TMP2, TMP3, ->fff_resn + |.else + | beq TMP2, TMP3, ->fff_restv + |.endif + |. sltiu AT, SFARG2HI, LJ_TISNUM + | beqz AT, >8 + |.if FPU + |. ldc1 FARG1, 0(TMP2) + |.else + |. lw SFARG2LO, LO(TMP2) + |.endif + |7: + |.if FPU |.if ismax - |. c.olt.d FARG1, FRET1 + | c.olt.d FARG1, FRET1 |.else - |. c.olt.d FRET1, FARG1 + | c.olt.d FRET1, FARG1 + |.endif + | movf.d FRET1, FARG1 + |.else + |.if ismax + | bal ->vm_sfcmpogt + |.else + | bal ->vm_sfcmpolt |.endif - | bne TMP1, TMP2, <1 - |. movf.d FRET1, FARG1 - | b ->fff_resn |. nop + | movz SFARG1LO, SFARG2LO, CRET1 + | movz SFARG1HI, SFARG2HI, CRET1 + |.endif + | b <6 + |. addiu TMP2, TMP2, 8 + | + |8: // Convert integer to number and continue with number loop. + | bne SFARG2HI, TISNUM, ->fff_fallback + |.if FPU + |. lwc1 FARG1, LO(TMP2) + | b <7 + |. cvt.d.w FARG1, FARG1 + |.else + |. nop + | bal ->vm_sfi2d_2 + |. nop + | b <7 + |. nop + |.endif + | |.endmacro | - | math_minmax math_min, 0 - | math_minmax math_max, 1 + | math_minmax math_min, movz, 0 + | math_minmax math_max, movn, 1 | |//-- String library ----------------------------------------------------- | - |.ffunc_1 string_len - | li AT, LJ_TSTR - | bne CARG3, AT, ->fff_fallback - |. nop - | b ->fff_resi - |. lw CRET1, STR:CARG1->len - | |.ffunc string_byte // Only handle the 1-arg case here. | lw CARG3, HI(BASE) | lw STR:CARG1, LO(BASE) @@ -1608,33 +1867,31 @@ static void build_subroutines(BuildCtx *ctx) | bnez AT, ->fff_fallback // Need exactly 1 string argument. |. nop | lw TMP0, STR:CARG1->len - | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). | addiu RA, BASE, -8 + | lw PC, FRAME_PC(BASE) | sltu RD, r0, TMP0 - | mtc1 TMP1, f0 + | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). | addiu RD, RD, 1 - | cvt.d.w f0, f0 - | lw PC, FRAME_PC(BASE) | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 + | sw TISNUM, HI(RA) | b ->fff_res - |. sdc1 f0, 0(RA) + |. sw TMP1, LO(RA) | |.ffunc string_char // Only handle the 1-arg case here. | ffgccheck |. nop | lw CARG3, HI(BASE) - | ldc1 FARG1, 0(BASE) - | li AT, 8 - | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. - |. sltiu AT, CARG3, LJ_TISNUM - | beqz AT, ->fff_fallback + | lw CARG1, LO(BASE) + | li TMP1, 255 + | xori AT, NARGS8:RC, 8 // Exactly 1 argument. + | xor TMP0, CARG3, TISNUM // Integer. + | sltu TMP1, TMP1, CARG1 // !(255 < n). + | or AT, AT, TMP0 + | or AT, AT, TMP1 + | bnez AT, ->fff_fallback |. li CARG3, 1 - | cvt.w.d FARG1, FARG1 | addiu CARG2, sp, ARG5_OFS - | sltiu AT, TMP0, 256 - | mfc1 TMP0, FARG1 - | beqz AT, ->fff_fallback - |. sw TMP0, ARG5 + | sb CARG1, ARG5 |->fff_newstr: | load_got lj_str_new | sw BASE, L->base @@ -1643,35 +1900,30 @@ static void build_subroutines(BuildCtx *ctx) |. move CARG1, L | // Returns GCstr *. | lw BASE, L->base - | move CARG1, CRET1 + |->fff_resstr: + | move SFARG1LO, CRET1 | b ->fff_restv - |. li CARG3, LJ_TSTR + |. li SFARG1HI, LJ_TSTR | |.ffunc string_sub | ffgccheck |. nop | addiu AT, NARGS8:RC, -16 | lw CARG3, 16+HI(BASE) - | ldc1 f0, 16(BASE) | lw TMP0, HI(BASE) | lw STR:CARG1, LO(BASE) | bltz AT, ->fff_fallback - | lw CARG2, 8+HI(BASE) - | ldc1 f2, 8(BASE) + |. lw CARG2, 8+HI(BASE) | beqz AT, >1 |. li CARG4, -1 - | cvt.w.d f0, f0 - | sltiu AT, CARG3, LJ_TISNUM - | beqz AT, ->fff_fallback - |. mfc1 CARG4, f0 + | bne CARG3, TISNUM, ->fff_fallback + |. lw CARG4, 16+LO(BASE) |1: - | sltiu AT, CARG2, LJ_TISNUM - | beqz AT, ->fff_fallback + | bne CARG2, TISNUM, ->fff_fallback |. li AT, LJ_TSTR - | cvt.w.d f2, f2 | bne TMP0, AT, ->fff_fallback - |. lw CARG2, STR:CARG1->len - | mfc1 CARG3, f2 + |. lw CARG3, 8+LO(BASE) + | lw CARG2, STR:CARG1->len | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end | slt AT, CARG4, r0 | addiu TMP0, CARG2, 1 @@ -1693,139 +1945,130 @@ static void build_subroutines(BuildCtx *ctx) | bgez CARG3, ->fff_newstr |. addiu CARG3, CARG3, 1 // len++ |->fff_emptystr: // Return empty string. - | addiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty) + | addiu STR:SFARG1LO, DISPATCH, DISPATCH_GL(strempty) | b ->fff_restv - |. li CARG3, LJ_TSTR + |. li SFARG1HI, LJ_TSTR | - |.ffunc string_rep // Only handle the 1-char case inline. - | ffgccheck - |. nop - | lw TMP0, HI(BASE) - | addiu AT, NARGS8:RC, -16 // Exactly 2 arguments. - | lw CARG4, 8+HI(BASE) - | lw STR:CARG1, LO(BASE) - | addiu TMP0, TMP0, -LJ_TSTR - | ldc1 f0, 8(BASE) - | or AT, AT, TMP0 - | bnez AT, ->fff_fallback - |. sltiu AT, CARG4, LJ_TISNUM - | cvt.w.d f0, f0 - | beqz AT, ->fff_fallback - |. lw TMP0, STR:CARG1->len - | mfc1 CARG3, f0 - | lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) - | li AT, 1 - | blez CARG3, ->fff_emptystr // Count <= 0? - |. sltu AT, AT, TMP0 - | beqz TMP0, ->fff_emptystr // Zero length string? - |. sltu TMP0, TMP1, CARG3 - | or AT, AT, TMP0 - | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) - | bnez AT, ->fff_fallback // Fallback for > 1-char strings. - |. lbu TMP0, STR:CARG1[1] - | addu TMP2, CARG2, CARG3 - |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). - | addiu TMP2, TMP2, -1 - | sltu AT, CARG2, TMP2 - | bnez AT, <1 - |. sb TMP0, 0(TMP2) - | b ->fff_newstr - |. nop - | - |.ffunc string_reverse + |.macro ffstring_op, name + | .ffunc string_ .. name | ffgccheck |. nop | lw CARG3, HI(BASE) - | lw STR:CARG1, LO(BASE) + | lw STR:CARG2, LO(BASE) | beqz NARGS8:RC, ->fff_fallback |. li AT, LJ_TSTR | bne CARG3, AT, ->fff_fallback - |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) - | lw CARG3, STR:CARG1->len - | addiu CARG1, STR:CARG1, #STR - | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) - | sltu AT, TMP1, CARG3 - | bnez AT, ->fff_fallback - |. addu TMP3, CARG1, CARG3 - | addu CARG4, CARG2, CARG3 - |1: // Reverse string copy. - | lbu TMP1, 0(CARG1) - | sltu AT, CARG1, TMP3 - | beqz AT, ->fff_newstr - |. addiu CARG1, CARG1, 1 - | addiu CARG4, CARG4, -1 - | b <1 - | sb TMP1, 0(CARG4) - | - |.macro ffstring_case, name, lo - | .ffunc name - | ffgccheck - |. nop - | lw CARG3, HI(BASE) - | lw STR:CARG1, LO(BASE) - | beqz NARGS8:RC, ->fff_fallback - |. li AT, LJ_TSTR - | bne CARG3, AT, ->fff_fallback - |. lw TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) - | lw CARG3, STR:CARG1->len - | addiu CARG1, STR:CARG1, #STR - | lw CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) - | sltu AT, TMP1, CARG3 - | bnez AT, ->fff_fallback - |. addu TMP3, CARG1, CARG3 - | move CARG4, CARG2 - |1: // ASCII case conversion. - | lbu TMP1, 0(CARG1) - | sltu AT, CARG1, TMP3 - | beqz AT, ->fff_newstr - |. addiu TMP0, TMP1, -lo - | xori TMP2, TMP1, 0x20 - | sltiu AT, TMP0, 26 - | movn TMP1, TMP2, AT - | addiu CARG1, CARG1, 1 - | sb TMP1, 0(CARG4) - | b <1 - |. addiu CARG4, CARG4, 1 + |. addiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf) + | load_got lj_buf_putstr_ .. name + | lw TMP0, SBUF:CARG1->b + | sw L, SBUF:CARG1->L + | sw BASE, L->base + | sw TMP0, SBUF:CARG1->w + | call_intern extern lj_buf_putstr_ .. name + |. sw PC, SAVE_PC + | load_got lj_buf_tostr + | call_intern lj_buf_tostr + |. move SBUF:CARG1, SBUF:CRET1 + | b ->fff_resstr + |. lw BASE, L->base |.endmacro | - |ffstring_case string_lower, 65 - |ffstring_case string_upper, 97 + |ffstring_op reverse + |ffstring_op lower + |ffstring_op upper | - |//-- Table library ------------------------------------------------------ + |//-- Bit library -------------------------------------------------------- | - |.ffunc_1 table_getn - | li AT, LJ_TTAB - | bne CARG3, AT, ->fff_fallback - |. load_got lj_tab_len - | call_intern lj_tab_len // (GCtab *t) - |. nop - | // Returns uint32_t (but less than 2^31). - | b ->fff_resi + |->vm_tobit_fb: + | beqz TMP1, ->fff_fallback + |.if FPU + |. ldc1 FARG1, 0(BASE) + | add.d FARG1, FARG1, TOBIT + | jr ra + |. mfc1 CRET1, FARG1 + |.else + |// FP number to bit conversion for soft-float. + |->vm_tobit: + | sll TMP0, SFARG1HI, 1 + | lui AT, 0x0020 + | addu TMP0, TMP0, AT + | slt AT, TMP0, r0 + | movz SFARG1LO, r0, AT + | beqz AT, >2 + |. li TMP1, 0x3e0 + | not TMP1, TMP1 + | sra TMP0, TMP0, 21 + | subu TMP0, TMP1, TMP0 + | slt AT, TMP0, r0 + | bnez AT, >1 + |. sll TMP1, SFARG1HI, 11 + | lui AT, 0x8000 + | or TMP1, TMP1, AT + | srl AT, SFARG1LO, 21 + | or TMP1, TMP1, AT + | slt AT, SFARG1HI, r0 + | beqz AT, >2 + |. srlv SFARG1LO, TMP1, TMP0 + | subu SFARG1LO, r0, SFARG1LO + |2: + | jr ra + |. move CRET1, SFARG1LO + |1: + | addiu TMP0, TMP0, 21 + | srlv TMP1, SFARG1LO, TMP0 + | li AT, 20 + | subu TMP0, AT, TMP0 + | sll SFARG1LO, SFARG1HI, 12 + | sllv AT, SFARG1LO, TMP0 + | or SFARG1LO, TMP1, AT + | slt AT, SFARG1HI, r0 + | beqz AT, <2 |. nop - | - |//-- Bit library -------------------------------------------------------- + | jr ra + |. subu CRET1, r0, SFARG1LO + |.endif | |.macro .ffunc_bit, name - | .ffunc_n bit_..name - |. add.d FARG1, FARG1, TOBIT - | mfc1 CRET1, FARG1 + | .ffunc_1 bit_..name + | beq SFARG1HI, TISNUM, >6 + |. move CRET1, SFARG1LO + | bal ->vm_tobit_fb + |. sltu TMP1, SFARG1HI, TISNUM + |6: |.endmacro | |.macro .ffunc_bit_op, name, ins | .ffunc_bit name - | addiu TMP1, BASE, 8 - | addu TMP2, BASE, NARGS8:RC + | addiu TMP2, BASE, 8 + | addu TMP3, BASE, NARGS8:RC |1: - | lw CARG4, HI(TMP1) - | beq TMP1, TMP2, ->fff_resi - |. ldc1 FARG1, 0(TMP1) - | sltiu AT, CARG4, LJ_TISNUM - | beqz AT, ->fff_fallback - | add.d FARG1, FARG1, TOBIT - | mfc1 CARG2, FARG1 - | ins CRET1, CRET1, CARG2 + | lw SFARG1HI, HI(TMP2) + | beq TMP2, TMP3, ->fff_resi + |. lw SFARG1LO, LO(TMP2) + |.if FPU + | bne SFARG1HI, TISNUM, >2 + |. addiu TMP2, TMP2, 8 | b <1 - |. addiu TMP1, TMP1, 8 + |. ins CRET1, CRET1, SFARG1LO + |2: + | ldc1 FARG1, -8(TMP2) + | sltu TMP1, SFARG1HI, TISNUM + | beqz TMP1, ->fff_fallback + |. add.d FARG1, FARG1, TOBIT + | mfc1 SFARG1LO, FARG1 + | b <1 + |. ins CRET1, CRET1, SFARG1LO + |.else + | beq SFARG1HI, TISNUM, >2 + |. move CRET2, CRET1 + | bal ->vm_tobit_fb + |. sltu TMP1, SFARG1HI, TISNUM + | move SFARG1LO, CRET2 + |2: + | ins CRET1, CRET1, SFARG1LO + | b <1 + |. addiu TMP2, TMP2, 8 + |.endif |.endmacro | |.ffunc_bit_op band, and @@ -1849,24 +2092,28 @@ static void build_subroutines(BuildCtx *ctx) |. not CRET1, CRET1 | |.macro .ffunc_bit_sh, name, ins, shmod - | .ffunc_nn bit_..name - |. add.d FARG1, FARG1, TOBIT - | add.d FARG2, FARG2, TOBIT - | mfc1 CARG1, FARG1 - | mfc1 CARG2, FARG2 + | .ffunc_2 bit_..name + | beq SFARG1HI, TISNUM, >1 + |. nop + | bal ->vm_tobit_fb + |. sltu TMP1, SFARG1HI, TISNUM + | move SFARG1LO, CRET1 + |1: + | bne SFARG2HI, TISNUM, ->fff_fallback + |. nop |.if shmod == 1 | li AT, 32 - | subu TMP0, AT, CARG2 - | sllv CARG2, CARG1, CARG2 - | srlv CARG1, CARG1, TMP0 + | subu TMP0, AT, SFARG2LO + | sllv SFARG2LO, SFARG1LO, SFARG2LO + | srlv SFARG1LO, SFARG1LO, TMP0 |.elif shmod == 2 | li AT, 32 - | subu TMP0, AT, CARG2 - | srlv CARG2, CARG1, CARG2 - | sllv CARG1, CARG1, TMP0 + | subu TMP0, AT, SFARG2LO + | srlv SFARG2LO, SFARG1LO, SFARG2LO + | sllv SFARG1LO, SFARG1LO, TMP0 |.endif | b ->fff_resi - |. ins CRET1, CARG1, CARG2 + |. ins CRET1, SFARG1LO, SFARG2LO |.endmacro | |.ffunc_bit_sh lshift, sllv, 0 @@ -1878,9 +2125,11 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc_bit tobit |->fff_resi: - | mtc1 CRET1, FRET1 - | b ->fff_resn - |. cvt.d.w FRET1, FRET1 + | lw PC, FRAME_PC(BASE) + | addiu RA, BASE, -8 + | sw TISNUM, -8+HI(BASE) + | b ->fff_res1 + |. sw CRET1, -8+LO(BASE) | |//----------------------------------------------------------------------- | @@ -2067,19 +2316,96 @@ static void build_subroutines(BuildCtx *ctx) | jr CRET1 |. lw INS, -4(PC) | + |->cont_stitch: // Trace stitching. + |.if JIT + | // RA = resultptr, RB = meta base + | lw INS, -4(PC) + | lw TMP2, -24+LO(RB) // Save previous trace. + | decode_RA8a RC, INS + | addiu AT, MULTRES, -8 + | decode_RA8b RC + | beqz AT, >2 + |. addu RC, BASE, RC // Call base. + |1: // Move results down. + | lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) + | addiu AT, AT, -8 + | addiu RA, RA, 8 + | sw SFRETHI, HI(RC) + | sw SFRETLO, LO(RC) + | bnez AT, <1 + |. addiu RC, RC, 8 + |2: + | decode_RA8a RA, INS + | decode_RB8a RB, INS + | decode_RA8b RA + | decode_RB8b RB + | addu RA, RA, RB + | addu RA, BASE, RA + |3: + | sltu AT, RC, RA + | bnez AT, >9 // More results wanted? + |. nop + | + | lhu TMP3, TRACE:TMP2->traceno + | lhu RD, TRACE:TMP2->link + | beq RD, TMP3, ->cont_nop // Blacklisted. + |. load_got lj_dispatch_stitch + | bnez RD, =>BC_JLOOP // Jump to stitched trace. + |. sll RD, RD, 3 + | + | // Stitch a new trace to the previous trace. + | sw TMP3, DISPATCH_J(exitno)(DISPATCH) + | sw L, DISPATCH_J(L)(DISPATCH) + | sw BASE, L->base + | addiu CARG1, DISPATCH, GG_DISP2J + | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) + |. move CARG2, PC + | b ->cont_nop + |. lw BASE, L->base + | + |9: + | sw TISNIL, HI(RC) + | b <3 + |. addiu RC, RC, 8 + |.endif + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | load_got lj_dispatch_profile + | sw MULTRES, SAVE_MULTRES + | move CARG2, PC + | sw BASE, L->base + | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc) + |. move CARG1, L + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | addiu PC, PC, -4 + | b ->cont_nop + |. lw BASE, L->base +#endif + | |//----------------------------------------------------------------------- |//-- Trace exit handler ------------------------------------------------- |//----------------------------------------------------------------------- | |.macro savex_, a, b + |.if FPU | sdc1 f..a, 16+a*8(sp) | sw r..a, 16+32*8+a*4(sp) | sw r..b, 16+32*8+b*4(sp) + |.else + | sw r..a, 16+a*4(sp) + | sw r..b, 16+b*4(sp) + |.endif |.endmacro | |->vm_exit_handler: |.if JIT + |.if FPU | addiu sp, sp, -(16+32*8+32*4) + |.else + | addiu sp, sp, -(16+32*4) + |.endif | savex_ 0, 1 | savex_ 2, 3 | savex_ 4, 5 @@ -2094,25 +2420,34 @@ static void build_subroutines(BuildCtx *ctx) | savex_ 22, 23 | savex_ 24, 25 | savex_ 26, 27 + |.if FPU | sdc1 f28, 16+28*8(sp) - | sw r28, 16+32*8+28*4(sp) | sdc1 f30, 16+30*8(sp) + | sw r28, 16+32*8+28*4(sp) | sw r30, 16+32*8+30*4(sp) | sw r0, 16+32*8+31*4(sp) // Clear RID_TMP. + | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp. + | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP + |.else + | sw r28, 16+28*4(sp) + | sw r30, 16+30*4(sp) + | sw r0, 16+31*4(sp) // Clear RID_TMP. + | addiu TMP2, sp, 16+32*4 // Recompute original value of sp. + | sw TMP2, 16+29*4(sp) // Store sp in RID_SP + |.endif | li_vmstate EXIT - | addiu TMP2, sp, 16+32*8+32*4 // Recompute original value of sp. | addiu DISPATCH, JGL, -GG_DISP2G-32768 | lw TMP1, 0(TMP2) // Load exit number. | st_vmstate - | sw TMP2, 16+32*8+29*4(sp) // Store sp in RID_SP. - | lw L, DISPATCH_GL(jit_L)(DISPATCH) - | lw BASE, DISPATCH_GL(jit_base)(DISPATCH) + | lw L, DISPATCH_GL(cur_L)(DISPATCH) + | lw BASE, DISPATCH_GL(jit_base)(DISPATCH) | load_got lj_trace_exit | sw L, DISPATCH_J(L)(DISPATCH) | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. + | sw BASE, L->base | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. | addiu CARG1, DISPATCH, GG_DISP2J - | sw BASE, L->base + | sw r0, DISPATCH_GL(jit_base)(DISPATCH) | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) |. addiu CARG2, sp, 16 | // Returns MULTRES (unscaled) or negated error code. @@ -2128,19 +2463,21 @@ static void build_subroutines(BuildCtx *ctx) |.if JIT | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. | lw L, SAVE_L - | addiu DISPATCH, JGL, -GG_DISP2G-32768 + | addiu DISPATCH, JGL, -GG_DISP2G-32768 + | sw BASE, L->base |1: - | bltz CRET1, >3 // Check for error from exit. - |. lw LFUNC:TMP1, FRAME_FUNC(BASE) - | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | bltz CRET1, >9 // Check for error from exit. + |. lw LFUNC:RB, FRAME_FUNC(BASE) + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | sll MULTRES, CRET1, 3 | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM // Setup type comparison constants. | sw MULTRES, SAVE_MULTRES - | mtc1 TMP3, TOBIT - | lw TMP1, LFUNC:TMP1->pc - | sw r0, DISPATCH_GL(jit_L)(DISPATCH) + | .FPU mtc1 TMP3, TOBIT + | lw TMP1, LFUNC:RB->pc + | sw r0, DISPATCH_GL(jit_base)(DISPATCH) | lw KBASE, PC2PROTO(k)(TMP1) - | cvt.d.s TOBIT, TOBIT + | .FPU cvt.d.s TOBIT, TOBIT | // Modified copy of ins_next which handles function header dispatch, too. | lw INS, 0(PC) | addiu PC, PC, 4 @@ -2148,7 +2485,7 @@ static void build_subroutines(BuildCtx *ctx) | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) | decode_OP4a TMP1, INS | decode_OP4b TMP1 - | sltiu TMP2, TMP1, BC_FUNCF*4 // Function header? + | sltiu TMP2, TMP1, BC_FUNCF*4 | addu TMP0, DISPATCH, TMP1 | decode_RD8a RD, INS | lw AT, 0(TMP0) @@ -2158,13 +2495,30 @@ static void build_subroutines(BuildCtx *ctx) | jr AT |. decode_RD8b RD |2: + | sltiu TMP2, TMP1, (BC_FUNCC+2)*4 // Fast function? + | bnez TMP2, >3 + |. lw TMP1, FRAME_PC(BASE) + | // Check frame below fast function. + | andi TMP0, TMP1, FRAME_TYPE + | bnez TMP0, >3 // Trace stitching continuation? + |. nop + | // Otherwise set KBASE for Lua function below fast function. + | lw TMP2, -4(TMP1) + | decode_RA8a TMP0, TMP2 + | decode_RA8b TMP0 + | subu TMP1, BASE, TMP0 + | lw LFUNC:TMP2, -8+FRAME_FUNC(TMP1) + | lw TMP1, LFUNC:TMP2->pc + | lw KBASE, PC2PROTO(k)(TMP1) + |3: | addiu RC, MULTRES, -8 | jr AT |. addu RA, RA, BASE | - |3: // Rethrow error from the right C frame. - | load_got lj_err_run - | call_intern lj_err_run // (lua_State *L) + |9: // Rethrow error from the right C frame. + | load_got lj_err_trace + | sub CARG2, r0, CRET1 + | call_intern lj_err_trace // (lua_State *L, int errcode) |. move CARG1, L |.endif | @@ -2172,8 +2526,9 @@ static void build_subroutines(BuildCtx *ctx) |//-- Math helper functions ---------------------------------------------- |//----------------------------------------------------------------------- | + |// Hard-float round to integer. |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. - |.macro vm_round, func + |.macro vm_round_hf, func | lui TMP0, 0x4330 // Hiword of 2^52 (double). | mtc1 r0, f4 | mtc1 TMP0, f5 @@ -2215,6 +2570,12 @@ static void build_subroutines(BuildCtx *ctx) |. mov.d FRET1, FARG1 |.endmacro | + |.macro vm_round, func + |.if FPU + | vm_round_hf, func + |.endif + |.endmacro + | |->vm_floor: | vm_round floor |->vm_ceil: @@ -2224,10 +2585,286 @@ static void build_subroutines(BuildCtx *ctx) | vm_round trunc |.endif | + |// Soft-float integer to number conversion. + |.macro sfi2d, AHI, ALO + |.if not FPU + | beqz ALO, >9 // Handle zero first. + |. sra TMP0, ALO, 31 + | xor TMP1, ALO, TMP0 + | subu TMP1, TMP1, TMP0 // Absolute value in TMP1. + | clz AHI, TMP1 + | andi TMP0, TMP0, 0x800 // Mask sign bit. + | li AT, 0x3ff+31-1 + | sllv TMP1, TMP1, AHI // Align mantissa left with leading 1. + | subu AHI, AT, AHI // Exponent - 1 in AHI. + | sll ALO, TMP1, 21 + | or AHI, AHI, TMP0 // Sign | Exponent. + | srl TMP1, TMP1, 11 + | sll AHI, AHI, 20 // Align left. + | jr ra + |. addu AHI, AHI, TMP1 // Add mantissa, increment exponent. + |9: + | jr ra + |. li AHI, 0 + |.endif + |.endmacro + | + |// Input SFARG1LO. Output: SFARG1*. Temporaries: AT, TMP0, TMP1. + |->vm_sfi2d_1: + | sfi2d SFARG1HI, SFARG1LO + | + |// Input SFARG2LO. Output: SFARG2*. Temporaries: AT, TMP0, TMP1. + |->vm_sfi2d_2: + | sfi2d SFARG2HI, SFARG2LO + | + |// Soft-float comparison. Equivalent to c.eq.d. + |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1. + |->vm_sfcmpeq: + |.if not FPU + | sll AT, SFARG1HI, 1 + | sll TMP0, SFARG2HI, 1 + | or CRET1, SFARG1LO, SFARG2LO + | or TMP1, AT, TMP0 + | or TMP1, TMP1, CRET1 + | beqz TMP1, >8 // Both args +-0: return 1. + |. sltu CRET1, r0, SFARG1LO + | lui TMP1, 0xffe0 + | addu AT, AT, CRET1 + | sltu CRET1, r0, SFARG2LO + | sltu AT, TMP1, AT + | addu TMP0, TMP0, CRET1 + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0; + |. xor TMP0, SFARG1HI, SFARG2HI + | xor TMP1, SFARG1LO, SFARG2LO + | or AT, TMP0, TMP1 + | jr ra + |. sltiu CRET1, AT, 1 // Same values: return 1. + |8: + | jr ra + |. li CRET1, 1 + |9: + | jr ra + |. li CRET1, 0 + |.endif + | + |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d. + |// Input: SFARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2. + |->vm_sfcmpult: + |.if not FPU + | b >1 + |. li CRET2, 1 + |.endif + | + |->vm_sfcmpolt: + |.if not FPU + | li CRET2, 0 + |1: + | sll AT, SFARG1HI, 1 + | sll TMP0, SFARG2HI, 1 + | or CRET1, SFARG1LO, SFARG2LO + | or TMP1, AT, TMP0 + | or TMP1, TMP1, CRET1 + | beqz TMP1, >8 // Both args +-0: return 0. + |. sltu CRET1, r0, SFARG1LO + | lui TMP1, 0xffe0 + | addu AT, AT, CRET1 + | sltu CRET1, r0, SFARG2LO + | sltu AT, TMP1, AT + | addu TMP0, TMP0, CRET1 + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; + |. and AT, SFARG1HI, SFARG2HI + | bltz AT, >5 // Both args negative? + |. nop + | beq SFARG1HI, SFARG2HI, >8 + |. sltu CRET1, SFARG1LO, SFARG2LO + | jr ra + |. slt CRET1, SFARG1HI, SFARG2HI + |5: // Swap conditions if both operands are negative. + | beq SFARG1HI, SFARG2HI, >8 + |. sltu CRET1, SFARG2LO, SFARG1LO + | jr ra + |. slt CRET1, SFARG2HI, SFARG1HI + |8: + | jr ra + |. nop + |9: + | jr ra + |. move CRET1, CRET2 + |.endif + | + |->vm_sfcmpogt: + |.if not FPU + | sll AT, SFARG2HI, 1 + | sll TMP0, SFARG1HI, 1 + | or CRET1, SFARG2LO, SFARG1LO + | or TMP1, AT, TMP0 + | or TMP1, TMP1, CRET1 + | beqz TMP1, >8 // Both args +-0: return 0. + |. sltu CRET1, r0, SFARG2LO + | lui TMP1, 0xffe0 + | addu AT, AT, CRET1 + | sltu CRET1, r0, SFARG1LO + | sltu AT, TMP1, AT + | addu TMP0, TMP0, CRET1 + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; + |. and AT, SFARG2HI, SFARG1HI + | bltz AT, >5 // Both args negative? + |. nop + | beq SFARG2HI, SFARG1HI, >8 + |. sltu CRET1, SFARG2LO, SFARG1LO + | jr ra + |. slt CRET1, SFARG2HI, SFARG1HI + |5: // Swap conditions if both operands are negative. + | beq SFARG2HI, SFARG1HI, >8 + |. sltu CRET1, SFARG1LO, SFARG2LO + | jr ra + |. slt CRET1, SFARG1HI, SFARG2HI + |8: + | jr ra + |. nop + |9: + | jr ra + |. li CRET1, 0 + |.endif + | + |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. + |// Input: SFARG*, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. + |->vm_sfcmpolex: + |.if not FPU + | sll AT, SFARG1HI, 1 + | sll TMP0, SFARG2HI, 1 + | or CRET1, SFARG1LO, SFARG2LO + | or TMP1, AT, TMP0 + | or TMP1, TMP1, CRET1 + | beqz TMP1, >8 // Both args +-0: return 1. + |. sltu CRET1, r0, SFARG1LO + | lui TMP1, 0xffe0 + | addu AT, AT, CRET1 + | sltu CRET1, r0, SFARG2LO + | sltu AT, TMP1, AT + | addu TMP0, TMP0, CRET1 + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0; + |. and AT, SFARG1HI, SFARG2HI + | xor AT, AT, TMP3 + | bltz AT, >5 // Both args negative? + |. nop + | beq SFARG1HI, SFARG2HI, >6 + |. sltu CRET1, SFARG2LO, SFARG1LO + | jr ra + |. slt CRET1, SFARG2HI, SFARG1HI + |5: // Swap conditions if both operands are negative. + | beq SFARG1HI, SFARG2HI, >6 + |. sltu CRET1, SFARG1LO, SFARG2LO + | slt CRET1, SFARG1HI, SFARG2HI + |6: + | jr ra + |. nop + |8: + | jr ra + |. li CRET1, 1 + |9: + | jr ra + |. li CRET1, 0 + |.endif + | + |.macro sfmin_max, name, fpcall + |->vm_sf .. name: + |.if JIT and not FPU + | move TMP2, ra + | bal ->fpcall + |. nop + | move TMP0, CRET1 + | move SFRETHI, SFARG1HI + | move SFRETLO, SFARG1LO + | move ra, TMP2 + | movz SFRETHI, SFARG2HI, TMP0 + | jr ra + |. movz SFRETLO, SFARG2LO, TMP0 + |.endif + |.endmacro + | + | sfmin_max min, vm_sfcmpolt + | sfmin_max max, vm_sfcmpogt + | |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- |//----------------------------------------------------------------------- | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_IDX, CARG2 + |.define NEXT_ASIZE, CARG3 + |.define NEXT_NIL, CARG4 + |.define NEXT_TMP0, r12 + |.define NEXT_TMP1, r13 + |.define NEXT_TMP2, r14 + |.define NEXT_RES_VK, CRET1 + |.define NEXT_RES_IDX, CRET2 + |.define NEXT_RES_PTR, sp + |.define NEXT_RES_VAL_I, 0(sp) + |.define NEXT_RES_VAL_IT, 4(sp) + |.define NEXT_RES_KEY_I, 8(sp) + |.define NEXT_RES_KEY_IT, 12(sp) + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in CRET2. + |->vm_next: + |.if JIT and ENDIAN_LE + | lw NEXT_ASIZE, NEXT_TAB->asize + | lw NEXT_TMP0, NEXT_TAB->array + | li NEXT_NIL, LJ_TNIL + |1: // Traverse array part. + | sltu AT, NEXT_IDX, NEXT_ASIZE + | sll NEXT_TMP1, NEXT_IDX, 3 + | beqz AT, >5 + |. addu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1 + | lw NEXT_TMP2, 4(NEXT_TMP1) + | sw NEXT_IDX, NEXT_RES_KEY_I + | beq NEXT_TMP2, NEXT_NIL, <1 + |. addiu NEXT_IDX, NEXT_IDX, 1 + | lw NEXT_TMP0, 0(NEXT_TMP1) + | li AT, LJ_TISNUM + | sw NEXT_TMP2, NEXT_RES_VAL_IT + | sw AT, NEXT_RES_KEY_IT + | sw NEXT_TMP0, NEXT_RES_VAL_I + | move NEXT_RES_VK, NEXT_RES_PTR + | jr ra + |. move NEXT_RES_IDX, NEXT_IDX + | + |5: // Traverse hash part. + | subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE + | lw NODE:NEXT_RES_VK, NEXT_TAB->node + | sll NEXT_TMP2, NEXT_RES_IDX, 5 + | lw NEXT_TMP0, NEXT_TAB->hmask + | sll AT, NEXT_RES_IDX, 3 + | subu AT, NEXT_TMP2, AT + | addu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT + |6: + | sltu AT, NEXT_TMP0, NEXT_RES_IDX + | bnez AT, >8 + |. nop + | lw NEXT_TMP2, NODE:NEXT_RES_VK->val.it + | bne NEXT_TMP2, NEXT_NIL, >9 + |. addiu NEXT_RES_IDX, NEXT_RES_IDX, 1 + | // Skip holes in hash part. + | b <6 + |. addiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node) + | + |8: // End of iteration. Set the key to nil (not the value). + | sw NEXT_NIL, NEXT_RES_KEY_IT + | move NEXT_RES_VK, NEXT_RES_PTR + |9: + | jr ra + |. addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE + |.endif + | |//----------------------------------------------------------------------- |//-- FFI helper functions ----------------------------------------------- |//----------------------------------------------------------------------- @@ -2243,10 +2880,10 @@ static void build_subroutines(BuildCtx *ctx) | sw r1, CTSTATE->cb.slot | sw CARG1, CTSTATE->cb.gpr[0] | sw CARG2, CTSTATE->cb.gpr[1] - | sdc1 FARG1, CTSTATE->cb.fpr[0] + | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0] | sw CARG3, CTSTATE->cb.gpr[2] | sw CARG4, CTSTATE->cb.gpr[3] - | sdc1 FARG2, CTSTATE->cb.fpr[1] + | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1] | addiu TMP0, sp, CFRAME_SPACE+16 | sw TMP0, CTSTATE->cb.stack | sw r0, SAVE_PC // Any value outside of bytecode is ok. @@ -2256,15 +2893,16 @@ static void build_subroutines(BuildCtx *ctx) | // Returns lua_State *. | lw BASE, L:CRET1->base | lw RC, L:CRET1->top + | li TISNUM, LJ_TISNUM // Setup type comparison constants. | move L, CRET1 - | lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | lw LFUNC:RB, FRAME_FUNC(BASE) - | mtc1 TMP3, TOBIT + | .FPU mtc1 TMP3, TOBIT | li_vmstate INTERP | li TISNIL, LJ_TNIL | subu RC, RC, BASE | st_vmstate - | cvt.d.s TOBIT, TOBIT + | .FPU cvt.d.s TOBIT, TOBIT | ins_callt |.endif | @@ -2278,11 +2916,11 @@ static void build_subroutines(BuildCtx *ctx) | move CARG2, RA | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) |. move CARG1, CTSTATE + | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0] | lw CRET1, CTSTATE->cb.gpr[0] - | ldc1 FRET1, CTSTATE->cb.fpr[0] - | lw CRET2, CTSTATE->cb.gpr[1] + | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1] | b ->vm_leave_unw - |. ldc1 FRET2, CTSTATE->cb.fpr[1] + |. lw CRET2, CTSTATE->cb.gpr[1] |.endif | |->vm_ffi_call: // Call C function via FFI. @@ -2314,8 +2952,8 @@ static void build_subroutines(BuildCtx *ctx) | lw CARG2, CCSTATE->gpr[1] | lw CARG3, CCSTATE->gpr[2] | lw CARG4, CCSTATE->gpr[3] - | ldc1 FARG1, CCSTATE->fpr[0] - | ldc1 FARG2, CCSTATE->fpr[1] + | .FPU ldc1 FARG1, CCSTATE->fpr[0] + | .FPU ldc1 FARG2, CCSTATE->fpr[1] | jalr CFUNCADDR |. lw CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. | lw CCSTATE:TMP1, -12(r16) @@ -2323,8 +2961,13 @@ static void build_subroutines(BuildCtx *ctx) | lw ra, -4(r16) | sw CRET1, CCSTATE:TMP1->gpr[0] | sw CRET2, CCSTATE:TMP1->gpr[1] + |.if FPU | sdc1 FRET1, CCSTATE:TMP1->fpr[0] | sdc1 FRET2, CCSTATE:TMP1->fpr[1] + |.else + | sw CARG1, CCSTATE:TMP1->gpr[2] // Soft-float: complex double .im part. + | sw CARG2, CCSTATE:TMP1->gpr[3] + |.endif | move sp, r16 | jr ra |. move r16, TMP2 @@ -2348,82 +2991,143 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | // RA = src1*8, RD = src2*8, JMP with RD = target - | addu CARG2, BASE, RA - | addu CARG3, BASE, RD - | lw TMP0, HI(CARG2) - | lw TMP1, HI(CARG3) - | ldc1 f0, 0(CARG2) - | ldc1 f2, 0(CARG3) - | sltiu TMP0, TMP0, LJ_TISNUM - | sltiu TMP1, TMP1, LJ_TISNUM + |.macro bc_comp, FRA, FRD, RAHI, RALO, RDHI, RDLO, movop, fmovop, fcomp, sfcomp + | addu RA, BASE, RA + | addu RD, BASE, RD + | lw RAHI, HI(RA) + | lw RDHI, HI(RD) | lhu TMP2, OFS_RD(PC) - | and TMP0, TMP0, TMP1 | addiu PC, PC, 4 - | beqz TMP0, ->vmeta_comp - |. lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) - | decode_RD4b TMP2 - | addu TMP2, TMP2, TMP1 - if (op == BC_ISLT || op == BC_ISGE) { - | c.olt.d f0, f2 - } else { - | c.ole.d f0, f2 - } - if (op == BC_ISLT || op == BC_ISLE) { - | movf TMP2, r0 - } else { - | movt TMP2, r0 - } - | addu PC, PC, TMP2 + | bne RAHI, TISNUM, >2 + |. lw RALO, LO(RA) + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | lw RDLO, LO(RD) + | bne RDHI, TISNUM, >5 + |. decode_RD4b TMP2 + | slt AT, SFARG1LO, SFARG2LO + | addu TMP2, TMP2, TMP3 + | movop TMP2, r0, AT |1: + | addu PC, PC, TMP2 | ins_next + | + |2: // RA is not an integer. + | sltiu AT, RAHI, LJ_TISNUM + | beqz AT, ->vmeta_comp + |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | sltiu AT, RDHI, LJ_TISNUM + |.if FPU + | ldc1 FRA, 0(RA) + | ldc1 FRD, 0(RD) + |.else + | lw RDLO, LO(RD) + |.endif + | beqz AT, >4 + |. decode_RD4b TMP2 + |3: // RA and RD are both numbers. + |.if FPU + | fcomp f20, f22 + | addu TMP2, TMP2, TMP3 + | b <1 + |. fmovop TMP2, r0 + |.else + | bal sfcomp + |. addu TMP2, TMP2, TMP3 + | b <1 + |. movop TMP2, r0, CRET1 + |.endif + | + |4: // RA is a number, RD is not a number. + | bne RDHI, TISNUM, ->vmeta_comp + | // RA is a number, RD is an integer. Convert RD to a number. + |.if FPU + |. lwc1 FRD, LO(RD) + | b <3 + |. cvt.d.w FRD, FRD + |.else + |. nop + |.if "RDHI" == "SFARG1HI" + | bal ->vm_sfi2d_1 + |.else + | bal ->vm_sfi2d_2 + |.endif + |. nop + | b <3 + |. nop + |.endif + | + |5: // RA is an integer, RD is not an integer + | sltiu AT, RDHI, LJ_TISNUM + | beqz AT, ->vmeta_comp + | // RA is an integer, RD is a number. Convert RA to a number. + |.if FPU + |. mtc1 RALO, FRA + | ldc1 FRD, 0(RD) + | b <3 + | cvt.d.w FRA, FRA + |.else + |. nop + |.if "RAHI" == "SFARG1HI" + | bal ->vm_sfi2d_1 + |.else + | bal ->vm_sfi2d_2 + |.endif + |. nop + | b <3 + |. nop + |.endif + |.endmacro + | + if (op == BC_ISLT) { + | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movz, movf, c.olt.d, ->vm_sfcmpolt + } else if (op == BC_ISGE) { + | bc_comp f20, f22, SFARG1HI, SFARG1LO, SFARG2HI, SFARG2LO, movn, movt, c.olt.d, ->vm_sfcmpolt + } else if (op == BC_ISLE) { + | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movn, movt, c.ult.d, ->vm_sfcmpult + } else { + | bc_comp f22, f20, SFARG2HI, SFARG2LO, SFARG1HI, SFARG1LO, movz, movf, c.ult.d, ->vm_sfcmpult + } break; case BC_ISEQV: case BC_ISNEV: vk = op == BC_ISEQV; | // RA = src1*8, RD = src2*8, JMP with RD = target | addu RA, BASE, RA - | addiu PC, PC, 4 - | lw TMP0, HI(RA) - | ldc1 f0, 0(RA) + | addiu PC, PC, 4 | addu RD, BASE, RD + | lw SFARG1HI, HI(RA) | lhu TMP2, -4+OFS_RD(PC) - | lw TMP1, HI(RD) - | ldc1 f2, 0(RD) + | lw SFARG2HI, HI(RD) | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sltiu AT, TMP0, LJ_TISNUM - | sltiu CARG1, TMP1, LJ_TISNUM - | decode_RD4b TMP2 - | and AT, AT, CARG1 - | beqz AT, >5 - |. addu TMP2, TMP2, TMP3 - | c.eq.d f0, f2 + | sltu AT, TISNUM, SFARG1HI + | sltu TMP0, TISNUM, SFARG2HI + | or AT, AT, TMP0 if (vk) { - | movf TMP2, r0 + | beqz AT, ->BC_ISEQN_Z } else { - | movt TMP2, r0 + | beqz AT, ->BC_ISNEN_Z } - |1: - | addu PC, PC, TMP2 - | ins_next - |5: // Either or both types are not numbers. - | lw CARG2, LO(RA) - | lw CARG3, LO(RD) + |. decode_RD4b TMP2 + | // Either or both types are not numbers. + | lw SFARG1LO, LO(RA) + | lw SFARG2LO, LO(RD) + | addu TMP2, TMP2, TMP3 |.if FFI | li TMP3, LJ_TCDATA - | beq TMP0, TMP3, ->vmeta_equal_cd + | beq SFARG1HI, TMP3, ->vmeta_equal_cd |.endif - |. sltiu AT, TMP0, LJ_TISPRI // Not a primitive? + |. sltiu AT, SFARG1HI, LJ_TISPRI // Not a primitive? |.if FFI - | beq TMP1, TMP3, ->vmeta_equal_cd + | beq SFARG2HI, TMP3, ->vmeta_equal_cd |.endif - |. xor TMP3, CARG2, CARG3 // Same tv? - | xor TMP1, TMP1, TMP0 // Same type? - | sltiu CARG1, TMP0, LJ_TISTABUD+1 // Table or userdata? + |. xor TMP3, SFARG1LO, SFARG2LO // Same tv? + | xor SFARG2HI, SFARG2HI, SFARG1HI // Same type? + | sltiu TMP0, SFARG1HI, LJ_TISTABUD+1 // Table or userdata? | movz TMP3, r0, AT // Ignore tv if primitive. - | movn CARG1, r0, TMP1 // Tab/ud and same type? - | or AT, TMP1, TMP3 // Same type && (pri||same tv). - | movz CARG1, r0, AT - | beqz CARG1, <1 // Done if not tab/ud or not same type or same tv. + | movn TMP0, r0, SFARG2HI // Tab/ud and same type? + | or AT, SFARG2HI, TMP3 // Same type && (pri||same tv). + | movz TMP0, r0, AT + | beqz TMP0, >1 // Done if not tab/ud or not same type or same tv. if (vk) { |. movn TMP2, r0, AT } else { @@ -2431,15 +3135,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } | // Different tables or userdatas. Need to check __eq metamethod. | // Field metatable must be at same offset for GCtab and GCudata! - | lw TAB:TMP1, TAB:CARG2->metatable - | beqz TAB:TMP1, <1 // No metatable? + | lw TAB:TMP1, TAB:SFARG1LO->metatable + | beqz TAB:TMP1, >1 // No metatable? |. nop | lbu TMP1, TAB:TMP1->nomm | andi TMP1, TMP1, 1<<MM_eq - | bnez TMP1, <1 // Or 'no __eq' flag set? + | bnez TMP1, >1 // Or 'no __eq' flag set? |. nop | b ->vmeta_equal // Handle __eq metamethod. - |. li CARG4, 1-vk // ne = 0 or 1. + |. li TMP0, 1-vk // ne = 0 or 1. + |1: + | addu PC, PC, TMP2 + | ins_next break; case BC_ISEQS: case BC_ISNES: @@ -2476,38 +3183,124 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) vk = op == BC_ISEQN; | // RA = src*8, RD = num_const*8, JMP with RD = target | addu RA, BASE, RA - | addiu PC, PC, 4 - | lw TMP0, HI(RA) - | ldc1 f0, 0(RA) - | addu RD, KBASE, RD - | lhu TMP2, -4+OFS_RD(PC) - | ldc1 f2, 0(RD) + | addu RD, KBASE, RD + | lw SFARG1HI, HI(RA) + | lw SFARG2HI, HI(RD) + | lhu TMP2, OFS_RD(PC) + | addiu PC, PC, 4 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sltiu AT, TMP0, LJ_TISNUM | decode_RD4b TMP2 - |.if FFI - | beqz AT, >5 - |.else - | beqz AT, >1 - |.endif - |. addu TMP2, TMP2, TMP3 - | c.eq.d f0, f2 if (vk) { - | movf TMP2, r0 - | addu PC, PC, TMP2 + |->BC_ISEQN_Z: + } else { + |->BC_ISNEN_Z: + } + | bne SFARG1HI, TISNUM, >3 + |. lw SFARG1LO, LO(RA) + | lw SFARG2LO, LO(RD) + | addu TMP2, TMP2, TMP3 + | bne SFARG2HI, TISNUM, >6 + |. xor AT, SFARG1LO, SFARG2LO + if (vk) { + | movn TMP2, r0, AT |1: + | addu PC, PC, TMP2 + |2: } else { - | movt TMP2, r0 + | movz TMP2, r0, AT |1: + |2: | addu PC, PC, TMP2 } | ins_next + | + |3: // RA is not an integer. + | sltiu AT, SFARG1HI, LJ_TISNUM |.if FFI - |5: - | li AT, LJ_TCDATA - | beq TMP0, AT, ->vmeta_equal_cd + | beqz AT, >8 + |.else + | beqz AT, <2 + |.endif + |. addu TMP2, TMP2, TMP3 + | sltiu AT, SFARG2HI, LJ_TISNUM + |.if FPU + | ldc1 f20, 0(RA) + | ldc1 f22, 0(RD) + |.endif + | beqz AT, >5 + |. lw SFARG2LO, LO(RD) + |4: // RA and RD are both numbers. + |.if FPU + | c.eq.d f20, f22 + | b <1 + if (vk) { + |. movf TMP2, r0 + } else { + |. movt TMP2, r0 + } + |.else + | bal ->vm_sfcmpeq |. nop | b <1 + if (vk) { + |. movz TMP2, r0, CRET1 + } else { + |. movn TMP2, r0, CRET1 + } + |.endif + | + |5: // RA is a number, RD is not a number. + |.if FFI + | bne SFARG2HI, TISNUM, >9 + |.else + | bne SFARG2HI, TISNUM, <2 + |.endif + | // RA is a number, RD is an integer. Convert RD to a number. + |.if FPU + |. lwc1 f22, LO(RD) + | b <4 + |. cvt.d.w f22, f22 + |.else + |. nop + | bal ->vm_sfi2d_2 + |. nop + | b <4 + |. nop + |.endif + | + |6: // RA is an integer, RD is not an integer + | sltiu AT, SFARG2HI, LJ_TISNUM + |.if FFI + | beqz AT, >9 + |.else + | beqz AT, <2 + |.endif + | // RA is an integer, RD is a number. Convert RA to a number. + |.if FPU + |. mtc1 SFARG1LO, f20 + | ldc1 f22, 0(RD) + | b <4 + | cvt.d.w f20, f20 + |.else + |. nop + | bal ->vm_sfi2d_1 + |. nop + | b <4 + |. nop + |.endif + | + |.if FFI + |8: + | li AT, LJ_TCDATA + | bne SFARG1HI, AT, <2 + |. nop + | b ->vmeta_equal_cd + |. nop + |9: + | li AT, LJ_TCDATA + | bne SFARG2HI, AT, <2 + |. nop + | b ->vmeta_equal_cd |. nop |.endif break; @@ -2559,7 +3352,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | addu PC, PC, TMP2 } else { | sltiu TMP0, TMP0, LJ_TISTRUECOND - | ldc1 f0, 0(RD) + | lw SFRETHI, HI(RD) + | lw SFRETLO, LO(RD) if (op == BC_ISTC) { | beqz TMP0, >1 } else { @@ -2569,22 +3363,45 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | decode_RD4b TMP2 | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) | addu TMP2, TMP2, TMP3 - | sdc1 f0, 0(RA) + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) | addu PC, PC, TMP2 |1: } | ins_next break; + case BC_ISTYPE: + | // RA = src*8, RD = -type*8 + | addu TMP2, BASE, RA + | srl TMP1, RD, 3 + | lw TMP0, HI(TMP2) + | ins_next1 + | addu AT, TMP0, TMP1 + | bnez AT, ->vmeta_istype + |. ins_next2 + break; + case BC_ISNUM: + | // RA = src*8, RD = -(TISNUM-1)*8 + | addu TMP2, BASE, RA + | lw TMP0, HI(TMP2) + | ins_next1 + | sltiu AT, TMP0, LJ_TISNUM + | beqz AT, ->vmeta_istype + |. ins_next2 + break; + /* -- Unary ops --------------------------------------------------------- */ case BC_MOV: | // RA = dst*8, RD = src*8 | addu RD, BASE, RD - | addu RA, BASE, RA - | ldc1 f0, 0(RD) + | addu RA, BASE, RA + | lw SFRETHI, HI(RD) + | lw SFRETLO, LO(RD) | ins_next1 - | sdc1 f0, 0(RA) + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) | ins_next2 break; case BC_NOT: @@ -2601,16 +3418,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_UNM: | // RA = dst*8, RD = src*8 - | addu CARG3, BASE, RD + | addu RB, BASE, RD + | lw SFARG1HI, HI(RB) | addu RA, BASE, RA - | lw TMP0, HI(CARG3) - | ldc1 f0, 0(CARG3) - | sltiu AT, TMP0, LJ_TISNUM - | beqz AT, ->vmeta_unm - |. neg.d f0, f0 + | bne SFARG1HI, TISNUM, >2 + |. lw SFARG1LO, LO(RB) + | lui TMP1, 0x8000 + | beq SFARG1LO, TMP1, ->vmeta_unm // Meta handler deals with -2^31. + |. negu SFARG1LO, SFARG1LO + |1: | ins_next1 - | sdc1 f0, 0(RA) + | sw SFARG1HI, HI(RA) + | sw SFARG1LO, LO(RA) | ins_next2 + |2: + | sltiu AT, SFARG1HI, LJ_TISNUM + | beqz AT, ->vmeta_unm + |. lui TMP1, 0x8000 + | b <1 + |. xor SFARG1HI, SFARG1HI, TMP1 break; case BC_LEN: | // RA = dst*8, RD = src*8 @@ -2621,12 +3447,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li AT, LJ_TSTR | bne TMP0, AT, >2 |. li AT, LJ_TTAB - | lw CRET1, STR:CARG1->len + | lw CRET1, STR:CARG1->len |1: - | mtc1 CRET1, f0 - | cvt.d.w f0, f0 | ins_next1 - | sdc1 f0, 0(RA) + | sw TISNUM, HI(RA) + | sw CRET1, LO(RA) | ins_next2 |2: | bne TMP0, AT, ->vmeta_len @@ -2657,104 +3482,232 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) /* -- Binary ops -------------------------------------------------------- */ - |.macro ins_arithpre + |.macro fpmod, a, b, c + | bal ->vm_floor // floor(b/c) + |. div.d FARG1, b, c + | mul.d a, FRET1, c + | sub.d a, b, a // b - floor(b/c)*c + |.endmacro + + |.macro sfpmod + | addiu sp, sp, -16 + | + | load_got __divdf3 + | sw SFARG1HI, HI(sp) + | sw SFARG1LO, LO(sp) + | sw SFARG2HI, 8+HI(sp) + | call_extern + |. sw SFARG2LO, 8+LO(sp) + | + | load_got floor + | move SFARG1HI, SFRETHI + | call_extern + |. move SFARG1LO, SFRETLO + | + | load_got __muldf3 + | move SFARG1HI, SFRETHI + | move SFARG1LO, SFRETLO + | lw SFARG2HI, 8+HI(sp) + | call_extern + |. lw SFARG2LO, 8+LO(sp) + | + | load_got __subdf3 + | lw SFARG1HI, HI(sp) + | lw SFARG1LO, LO(sp) + | move SFARG2HI, SFRETHI + | call_extern + |. move SFARG2LO, SFRETLO + | + | addiu sp, sp, 16 + |.endmacro + + |.macro ins_arithpre, label ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 ||switch (vk) { ||case 0: - | addu CARG3, BASE, RB - | addu CARG4, KBASE, RC - | lw TMP1, HI(CARG3) - | ldc1 f20, 0(CARG3) - | ldc1 f22, 0(CARG4) - | sltiu AT, TMP1, LJ_TISNUM + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | // RA = dst*8, RB = src1*8, RC = num_const*8 + | addu RB, BASE, RB + |.if "label" ~= "none" + | b label + |.endif + |. addu RC, KBASE, RC || break; ||case 1: - | addu CARG4, BASE, RB - | addu CARG3, KBASE, RC - | lw TMP1, HI(CARG4) - | ldc1 f22, 0(CARG4) - | ldc1 f20, 0(CARG3) - | sltiu AT, TMP1, LJ_TISNUM + | decode_RB8a RC, INS + | decode_RB8b RC + | decode_RDtoRC8 RB, RD + | // RA = dst*8, RB = num_const*8, RC = src1*8 + | addu RC, BASE, RC + |.if "label" ~= "none" + | b label + |.endif + |. addu RB, KBASE, RB || break; ||default: - | addu CARG3, BASE, RB - | addu CARG4, BASE, RC - | lw TMP1, HI(CARG3) - | lw TMP2, HI(CARG4) - | ldc1 f20, 0(CARG3) - | ldc1 f22, 0(CARG4) - | sltiu AT, TMP1, LJ_TISNUM - | sltiu TMP0, TMP2, LJ_TISNUM - | and AT, AT, TMP0 + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | // RA = dst*8, RB = src1*8, RC = src2*8 + | addu RB, BASE, RB + |.if "label" ~= "none" + | b label + |.endif + |. addu RC, BASE, RC || break; ||} - | beqz AT, ->vmeta_arith - |. addu RA, BASE, RA |.endmacro | - |.macro fpmod, a, b, c - |->BC_MODVN_Z: - | bal ->vm_floor // floor(b/c) - |. div.d FARG1, b, c - | mul.d a, FRET1, c - | sub.d a, b, a // b - floor(b/c)*c - |.endmacro + |.macro ins_arith, intins, fpins, fpcall, label + | ins_arithpre none | - |.macro ins_arith, ins - | ins_arithpre - |.if "ins" == "fpmod_" - | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. - |. nop + |.if "label" ~= "none" + |label: + |.endif + | + | lw SFARG1HI, HI(RB) + | lw SFARG2HI, HI(RC) + | + |.if "intins" ~= "div" + | + | // Check for two integers. + | lw SFARG1LO, LO(RB) + | bne SFARG1HI, TISNUM, >5 + |. lw SFARG2LO, LO(RC) + | bne SFARG2HI, TISNUM, >5 + | + |.if "intins" == "addu" + |. intins CRET1, SFARG1LO, SFARG2LO + | xor TMP1, CRET1, SFARG1LO // ((y^a) & (y^b)) < 0: overflow. + | xor TMP2, CRET1, SFARG2LO + | and TMP1, TMP1, TMP2 + | bltz TMP1, ->vmeta_arith + |. addu RA, BASE, RA + |.elif "intins" == "subu" + |. intins CRET1, SFARG1LO, SFARG2LO + | xor TMP1, CRET1, SFARG1LO // ((y^a) & (a^b)) < 0: overflow. + | xor TMP2, SFARG1LO, SFARG2LO + | and TMP1, TMP1, TMP2 + | bltz TMP1, ->vmeta_arith + |. addu RA, BASE, RA + |.elif "intins" == "mult" + |. intins SFARG1LO, SFARG2LO + | mflo CRET1 + | mfhi TMP2 + | sra TMP1, CRET1, 31 + | bne TMP1, TMP2, ->vmeta_arith + |. addu RA, BASE, RA |.else - | ins f0, f20, f22 + |. load_got lj_vm_modi + | beqz SFARG2LO, ->vmeta_arith + |. addu RA, BASE, RA + |.if ENDIAN_BE + | move CARG1, SFARG1LO + |.endif + | call_extern + |. move CARG2, SFARG2LO + |.endif + | + | ins_next1 + | sw TISNUM, HI(RA) + | sw CRET1, LO(RA) + |3: + | ins_next2 + | + |.elif not FPU + | + | lw SFARG1LO, LO(RB) + | lw SFARG2LO, LO(RC) + | + |.endif + | + |5: // Check for two numbers. + | .FPU ldc1 f20, 0(RB) + | sltiu AT, SFARG1HI, LJ_TISNUM + | sltiu TMP0, SFARG2HI, LJ_TISNUM + | .FPU ldc1 f22, 0(RC) + | and AT, AT, TMP0 + | beqz AT, ->vmeta_arith + |. addu RA, BASE, RA + | + |.if FPU + | fpins FRET1, f20, f22 + |.elif "fpcall" == "sfpmod" + | sfpmod + |.else + | load_got fpcall + | call_extern + |. nop + |.endif + | | ins_next1 - | sdc1 f0, 0(RA) + |.if not FPU + | sw SFRETHI, HI(RA) + |.endif + |.if "intins" ~= "div" + | b <3 + |.endif + |.if FPU + |. sdc1 FRET1, 0(RA) + |.else + |. sw SFRETLO, LO(RA) + |.endif + |.if "intins" == "div" | ins_next2 |.endif + | |.endmacro case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arith add.d + | ins_arith addu, add.d, __adddf3, none break; case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arith sub.d + | ins_arith subu, sub.d, __subdf3, none break; case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arith mul.d + | ins_arith mult, mul.d, __muldf3, none + break; + case BC_DIVVN: + | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z break; - case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arith div.d + case BC_DIVNV: case BC_DIVVV: + | ins_arithpre ->BC_DIVVN_Z break; case BC_MODVN: - | ins_arith fpmod + | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z break; case BC_MODNV: case BC_MODVV: - | ins_arith fpmod_ + | ins_arithpre ->BC_MODVN_Z break; case BC_POW: - | decode_RB8a RB, INS - | decode_RB8b RB - | decode_RDtoRC8 RC, RD - | addu CARG3, BASE, RB - | addu CARG4, BASE, RC - | lw TMP1, HI(CARG3) - | lw TMP2, HI(CARG4) - | ldc1 FARG1, 0(CARG3) - | ldc1 FARG2, 0(CARG4) - | sltiu AT, TMP1, LJ_TISNUM - | sltiu TMP0, TMP2, LJ_TISNUM + | ins_arithpre none + | lw SFARG1HI, HI(RB) + | lw SFARG2HI, HI(RC) + | sltiu AT, SFARG1HI, LJ_TISNUM + | sltiu TMP0, SFARG2HI, LJ_TISNUM | and AT, AT, TMP0 | load_got pow | beqz AT, ->vmeta_arith |. addu RA, BASE, RA + |.if FPU + | ldc1 FARG1, 0(RB) + | ldc1 FARG2, 0(RC) + |.else + | lw SFARG1LO, LO(RB) + | lw SFARG2LO, LO(RC) + |.endif | call_extern |. nop | ins_next1 + |.if FPU | sdc1 FRET1, 0(RA) + |.else + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) + |.endif | ins_next2 break; @@ -2777,10 +3730,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bnez CRET1, ->vmeta_binop |. lw BASE, L->base | addu RB, BASE, MULTRES - | ldc1 f0, 0(RB) + | lw SFRETHI, HI(RB) + | lw SFRETLO, LO(RB) | addu RA, BASE, RA | ins_next1 - | sdc1 f0, 0(RA) // Copy result from RB to RA. + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) | ins_next2 break; @@ -2815,20 +3770,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_KSHORT: | // RA = dst*8, RD = int16_literal*8 | sra RD, INS, 16 - | mtc1 RD, f0 | addu RA, BASE, RA - | cvt.d.w f0, f0 | ins_next1 - | sdc1 f0, 0(RA) + | sw TISNUM, HI(RA) + | sw RD, LO(RA) | ins_next2 break; case BC_KNUM: | // RA = dst*8, RD = num_const*8 | addu RD, KBASE, RD | addu RA, BASE, RA - | ldc1 f0, 0(RD) + | lw SFRETHI, HI(RD) + | lw SFRETLO, LO(RD) | ins_next1 - | sdc1 f0, 0(RA) + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) | ins_next2 break; case BC_KPRI: @@ -2864,9 +3820,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lw UPVAL:RB, LFUNC:RD->uvptr | ins_next1 | lw TMP1, UPVAL:RB->v - | ldc1 f0, 0(TMP1) + | lw SFRETHI, HI(TMP1) + | lw SFRETLO, LO(TMP1) | addu RA, BASE, RA - | sdc1 f0, 0(RA) + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) | ins_next2 break; case BC_USETV: @@ -2875,26 +3833,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | srl RA, RA, 1 | addu RD, BASE, RD | addu RA, RA, LFUNC:RB - | ldc1 f0, 0(RD) | lw UPVAL:RB, LFUNC:RA->uvptr + | lw SFRETHI, HI(RD) + | lw SFRETLO, LO(RD) | lbu TMP3, UPVAL:RB->marked | lw CARG2, UPVAL:RB->v | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) | lbu TMP0, UPVAL:RB->closed - | lw TMP2, HI(RD) - | sdc1 f0, 0(CARG2) + | sw SFRETHI, HI(CARG2) + | sw SFRETLO, LO(CARG2) | li AT, LJ_GC_BLACK|1 | or TMP3, TMP3, TMP0 | beq TMP3, AT, >2 // Upvalue is closed and black? - |. addiu TMP2, TMP2, -(LJ_TNUMX+1) + |. addiu TMP2, SFRETHI, -(LJ_TNUMX+1) |1: | ins_next | |2: // Check if new value is collectable. | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) | beqz AT, <1 // tvisgcv(v) - |. lw TMP1, LO(RD) - | lbu TMP3, GCOBJ:TMP1->gch.marked + |. nop + | lbu TMP3, GCOBJ:SFRETLO->gch.marked | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) | beqz TMP3, <1 |. load_got lj_gc_barrieruv @@ -2942,11 +3901,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | srl RA, RA, 1 | addu RD, KBASE, RD | addu RA, RA, LFUNC:RB - | ldc1 f0, 0(RD) - | lw UPVAL:RB, LFUNC:RA->uvptr + | lw UPVAL:RB, LFUNC:RA->uvptr + | lw SFRETHI, HI(RD) + | lw SFRETLO, LO(RD) + | lw TMP1, UPVAL:RB->v | ins_next1 - | lw TMP1, UPVAL:RB->v - | sdc1 f0, 0(TMP1) + | sw SFRETHI, HI(TMP1) + | sw SFRETLO, LO(TMP1) | ins_next2 break; case BC_USETP: @@ -2956,10 +3917,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | srl TMP0, RD, 3 | addu RA, RA, LFUNC:RB | not TMP0, TMP0 - | lw UPVAL:RB, LFUNC:RA->uvptr + | lw UPVAL:RB, LFUNC:RA->uvptr | ins_next1 - | lw TMP1, UPVAL:RB->v - | sw TMP0, HI(TMP1) + | lw TMP1, UPVAL:RB->v + | sw TMP0, HI(TMP1) | ins_next2 break; @@ -2995,8 +3956,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li TMP0, LJ_TFUNC | ins_next1 | addu RA, BASE, RA - | sw TMP0, HI(RA) | sw LFUNC:CRET1, LO(RA) + | sw TMP0, HI(RA) | ins_next2 break; @@ -3077,31 +4038,23 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lw TMP2, HI(CARG3) | lw TAB:RB, LO(CARG2) | li AT, LJ_TTAB - | ldc1 f0, 0(CARG3) | bne TMP1, AT, ->vmeta_tgetv |. addu RA, BASE, RA - | sltiu AT, TMP2, LJ_TISNUM - | beqz AT, >5 - |. li AT, LJ_TSTR - | - | // Convert number key to integer, check for integerness and range. - | cvt.w.d f2, f0 - | lw TMP0, TAB:RB->asize - | mfc1 TMP2, f2 - | cvt.d.w f4, f2 + | bne TMP2, TISNUM, >5 + |. lw RC, LO(CARG3) + | lw TMP0, TAB:RB->asize | lw TMP1, TAB:RB->array - | c.eq.d f0, f4 - | sltu AT, TMP2, TMP0 - | movf AT, r0 - | sll TMP2, TMP2, 3 + | sltu AT, RC, TMP0 + | sll TMP2, RC, 3 | beqz AT, ->vmeta_tgetv // Integer key and in array part? |. addu TMP2, TMP1, TMP2 - | lw TMP0, HI(TMP2) - | beq TMP0, TISNIL, >2 - |. ldc1 f0, 0(TMP2) + | lw SFRETHI, HI(TMP2) + | beq SFRETHI, TISNIL, >2 + |. lw SFRETLO, LO(TMP2) |1: | ins_next1 - | sdc1 f0, 0(RA) + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) | ins_next2 | |2: // Check for __index if table value is nil. @@ -3116,8 +4069,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |. nop | |5: + | li AT, LJ_TSTR | bne TMP2, AT, ->vmeta_tgetv - |. lw STR:RC, LO(CARG3) + |. nop | b ->BC_TGETS_Z // String key? |. nop break; @@ -3138,9 +4092,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TGETS_Z: | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash + | lw TMP1, STR:RC->sid | lw NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | sll TMP0, TMP1, 5 | sll TMP1, TMP1, 3 | subu TMP1, TMP0, TMP1 @@ -3149,18 +4103,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) | lw NODE:TMP1, NODE:TMP2->next - | lw CARG2, offsetof(Node, val)+HI(NODE:TMP2) + | lw SFRETHI, offsetof(Node, val)+HI(NODE:TMP2) | addiu CARG1, CARG1, -LJ_TSTR | xor TMP0, TMP0, STR:RC | or AT, CARG1, TMP0 | bnez AT, >4 |. lw TAB:TMP3, TAB:RB->metatable - | beq CARG2, TISNIL, >5 // Key found, but nil value? - |. lw CARG1, offsetof(Node, val)+LO(NODE:TMP2) + | beq SFRETHI, TISNIL, >5 // Key found, but nil value? + |. lw SFRETLO, offsetof(Node, val)+LO(NODE:TMP2) |3: | ins_next1 - | sw CARG2, HI(RA) - | sw CARG1, LO(RA) + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) | ins_next2 | |4: // Follow hash chain. @@ -3170,7 +4124,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |5: // Check for __index if table value is nil. | beqz TAB:TMP3, <3 // No metatable: done. - |. li CARG2, LJ_TNIL + |. li SFRETHI, LJ_TNIL | lbu TMP0, TAB:TMP3->nomm | andi TMP0, TMP0, 1<<MM_index | bnez TMP0, <3 // 'no __index' flag set: done. @@ -3195,12 +4149,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | sltu AT, TMP0, TMP1 | beqz AT, ->vmeta_tgetb |. addu RC, TMP2, RC - | lw TMP1, HI(RC) - | beq TMP1, TISNIL, >5 - |. ldc1 f0, 0(RC) + | lw SFRETHI, HI(RC) + | beq SFRETHI, TISNIL, >5 + |. lw SFRETLO, LO(RC) |1: | ins_next1 - | sdc1 f0, 0(RA) + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) | ins_next2 | |5: // Check for __index if table value is nil. @@ -3211,9 +4166,33 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | andi TMP1, TMP1, 1<<MM_index | bnez TMP1, <1 // 'no __index' flag set: done. |. nop - | b ->vmeta_tgetb // Caveat: preserve TMP0! + | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2! |. nop break; + case BC_TGETR: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | addu RB, BASE, RB + | addu RC, BASE, RC + | lw TAB:CARG1, LO(RB) + | lw CARG2, LO(RC) + | addu RA, BASE, RA + | lw TMP0, TAB:CARG1->asize + | lw TMP1, TAB:CARG1->array + | sltu AT, CARG2, TMP0 + | sll TMP2, CARG2, 3 + | beqz AT, ->vmeta_tgetr // In array part? + |. addu CRET1, TMP1, TMP2 + | lw SFARG2HI, HI(CRET1) + | lw SFARG2LO, LO(CRET1) + |->BC_TGETR_Z: + | ins_next1 + | sw SFARG2HI, HI(RA) + | sw SFARG2LO, LO(RA) + | ins_next2 + break; case BC_TSETV: | // RA = src*8, RB = table*8, RC = key*8 @@ -3226,33 +4205,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lw TMP2, HI(CARG3) | lw TAB:RB, LO(CARG2) | li AT, LJ_TTAB - | ldc1 f0, 0(CARG3) | bne TMP1, AT, ->vmeta_tsetv |. addu RA, BASE, RA - | sltiu AT, TMP2, LJ_TISNUM - | beqz AT, >5 - |. li AT, LJ_TSTR - | - | // Convert number key to integer, check for integerness and range. - | cvt.w.d f2, f0 - | lw TMP0, TAB:RB->asize - | mfc1 TMP2, f2 - | cvt.d.w f4, f2 + | bne TMP2, TISNUM, >5 + |. lw RC, LO(CARG3) + | lw TMP0, TAB:RB->asize | lw TMP1, TAB:RB->array - | c.eq.d f0, f4 - | sltu AT, TMP2, TMP0 - | movf AT, r0 - | sll TMP2, TMP2, 3 + | sltu AT, RC, TMP0 + | sll TMP2, RC, 3 | beqz AT, ->vmeta_tsetv // Integer key and in array part? |. addu TMP1, TMP1, TMP2 - | lbu TMP3, TAB:RB->marked | lw TMP0, HI(TMP1) + | lbu TMP3, TAB:RB->marked + | lw SFRETHI, HI(RA) | beq TMP0, TISNIL, >3 - |. ldc1 f0, 0(RA) + |. lw SFRETLO, LO(RA) |1: - | andi AT, TMP3, LJ_GC_BLACK // isblack(table) - | bnez AT, >7 - |. sdc1 f0, 0(TMP1) + | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + | sw SFRETHI, HI(TMP1) + | bnez AT, >7 + |. sw SFRETLO, LO(TMP1) |2: | ins_next | @@ -3268,8 +4240,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |. nop | |5: + | li AT, LJ_TSTR | bne TMP2, AT, ->vmeta_tsetv - |. lw STR:RC, LO(CARG3) + |. nop | b ->BC_TSETS_Z // String key? |. nop | @@ -3293,15 +4266,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TSETS_Z: | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 | lw TMP0, TAB:RB->hmask - | lw TMP1, STR:RC->hash + | lw TMP1, STR:RC->sid | lw NODE:TMP2, TAB:RB->node | sb r0, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | sll TMP0, TMP1, 5 | sll TMP1, TMP1, 3 | subu TMP1, TMP0, TMP1 | addu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + |.if FPU | ldc1 f20, 0(RA) + |.else + | lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) + |.endif |1: | lw CARG1, offsetof(Node, key)+HI(NODE:TMP2) | lw TMP0, offsetof(Node, key)+LO(NODE:TMP2) @@ -3315,8 +4293,14 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |. lw TAB:TMP0, TAB:RB->metatable |2: | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + |.if FPU | bnez AT, >7 |. sdc1 f20, NODE:TMP2->val + |.else + | sw SFRETHI, NODE:TMP2->val.u32.hi + | bnez AT, >7 + |. sw SFRETLO, NODE:TMP2->val.u32.lo + |.endif |3: | ins_next | @@ -3354,8 +4338,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |. move CARG1, L | // Returns TValue *. | lw BASE, L->base + |.if FPU | b <3 // No 2nd write barrier needed. |. sdc1 f20, 0(CRET1) + |.else + | lw SFARG1HI, HI(RA) + | lw SFARG1LO, LO(RA) + | sw SFARG1HI, HI(CRET1) + | b <3 // No 2nd write barrier needed. + |. sw SFARG1LO, LO(CRET1) + |.endif | |7: // Possible table write barrier for the value. Skip valiswhite check. | barrierback TAB:RB, TMP3, TMP0, <3 @@ -3380,11 +4372,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lw TMP1, HI(RC) | lbu TMP3, TAB:RB->marked | beq TMP1, TISNIL, >5 - |. ldc1 f0, 0(RA) |1: + |. lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + | sw SFRETHI, HI(RC) | bnez AT, >7 - |. sdc1 f0, 0(RC) + |. sw SFRETLO, LO(RC) |2: | ins_next | @@ -3396,12 +4390,43 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | andi TMP1, TMP1, 1<<MM_newindex | bnez TMP1, <1 // 'no __newindex' flag set: done. |. nop - | b ->vmeta_tsetb // Caveat: preserve TMP0! + | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2! |. nop | |7: // Possible table write barrier for the value. Skip valiswhite check. | barrierback TAB:RB, TMP3, TMP0, <2 break; + case BC_TSETR: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | addu CARG1, BASE, RB + | addu CARG3, BASE, RC + | lw TAB:CARG2, LO(CARG1) + | lw CARG3, LO(CARG3) + | lbu TMP3, TAB:CARG2->marked + | lw TMP0, TAB:CARG2->asize + | lw TMP1, TAB:CARG2->array + | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + | bnez AT, >7 + |. addu RA, BASE, RA + |2: + | sltu AT, CARG3, TMP0 + | sll TMP2, CARG3, 3 + | beqz AT, ->vmeta_tsetr // In array part? + |. addu CRET1, TMP1, TMP2 + |->BC_TSETR_Z: + | lw SFARG1HI, HI(RA) + | lw SFARG1LO, LO(RA) + | ins_next1 + | sw SFARG1HI, HI(CRET1) + | sw SFARG1LO, LO(CRET1) + | ins_next2 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP3, CRET1, <2 + break; case BC_TSETM: | // RA = base*8 (table at base-1), RD = num_const*8 (start index) @@ -3424,10 +4449,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | addu TMP1, TMP1, CARG1 | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) |3: // Copy result slots to table. - | ldc1 f0, 0(RA) + | lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) | addiu RA, RA, 8 | sltu AT, RA, TMP2 - | sdc1 f0, 0(TMP1) + | sw SFRETHI, HI(TMP1) + | sw SFRETLO, LO(TMP1) | bnez AT, <3 |. addiu TMP1, TMP1, 8 | bnez TMP0, >7 @@ -3502,10 +4529,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | beqz NARGS8:RC, >3 |. move TMP3, NARGS8:RC |2: - | ldc1 f0, 0(RA) + | lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) | addiu RA, RA, 8 | addiu TMP3, TMP3, -8 - | sdc1 f0, 0(TMP2) + | sw SFRETHI, HI(TMP2) + | sw SFRETLO, LO(TMP2) | bnez TMP3, <2 |. addiu TMP2, TMP2, 8 |3: @@ -3542,12 +4571,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li AT, LJ_TFUNC | lw TMP1, -24+HI(BASE) | lw LFUNC:RB, -24+LO(BASE) - | ldc1 f2, -8(BASE) - | ldc1 f0, -16(BASE) + | lw SFARG1HI, -16+HI(BASE) + | lw SFARG1LO, -16+LO(BASE) + | lw SFARG2HI, -8+HI(BASE) + | lw SFARG2LO, -8+LO(BASE) | sw TMP1, HI(BASE) // Copy callable. | sw LFUNC:RB, LO(BASE) - | sdc1 f2, 16(BASE) // Copy control var. - | sdc1 f0, 8(BASE) // Copy state. + | sw SFARG1HI, 8+HI(BASE) // Copy state. + | sw SFARG1LO, 8+LO(BASE) + | sw SFARG2HI, 16+HI(BASE) // Copy control var. + | sw SFARG2LO, 16+LO(BASE) | addiu BASE, BASE, 8 | bne TMP1, AT, ->vmeta_call |. li NARGS8:RC, 16 // Iterators get 2 arguments. @@ -3555,10 +4588,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ITERN: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. + |.if JIT and ENDIAN_LE + | hotloop |.endif + |->vm_IITERN: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) | addu RA, BASE, RA | lw TAB:RB, -16+LO(RA) | lw RC, -8+LO(RA) // Get index from control var. @@ -3570,20 +4604,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | beqz AT, >5 // Index points after array part? |. sll TMP3, RC, 3 | addu TMP3, TMP1, TMP3 - | lw TMP2, HI(TMP3) - | ldc1 f0, 0(TMP3) - | mtc1 RC, f2 + | lw SFARG1HI, HI(TMP3) + | lw SFARG1LO, LO(TMP3) | lhu RD, -4+OFS_RD(PC) - | beq TMP2, TISNIL, <1 // Skip holes in array part. + | sw TISNUM, HI(RA) + | sw RC, LO(RA) + | beq SFARG1HI, TISNIL, <1 // Skip holes in array part. |. addiu RC, RC, 1 - | cvt.d.w f2, f2 + | sw SFARG1HI, 8+HI(RA) + | sw SFARG1LO, 8+LO(RA) | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sdc1 f0, 8(RA) | decode_RD4b RD | addu RD, RD, TMP3 | sw RC, -8+LO(RA) // Update control var. | addu PC, PC, RD - | sdc1 f2, 0(RA) |3: | ins_next | @@ -3598,18 +4632,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | sll RB, RC, 3 | subu TMP3, TMP3, RB | addu NODE:TMP3, TMP3, TMP2 - | lw RB, HI(NODE:TMP3) - | ldc1 f0, 0(NODE:TMP3) + | lw SFARG1HI, NODE:TMP3->val.u32.hi + | lw SFARG1LO, NODE:TMP3->val.u32.lo | lhu RD, -4+OFS_RD(PC) - | beq RB, TISNIL, <6 // Skip holes in hash part. + | beq SFARG1HI, TISNIL, <6 // Skip holes in hash part. |. addiu RC, RC, 1 - | ldc1 f2, NODE:TMP3->key + | lw SFARG2HI, NODE:TMP3->key.u32.hi + | lw SFARG2LO, NODE:TMP3->key.u32.lo | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) - | sdc1 f0, 8(RA) + | sw SFARG1HI, 8+HI(RA) + | sw SFARG1LO, 8+LO(RA) | addu RC, RC, TMP0 | decode_RD4b RD | addu RD, RD, TMP3 - | sdc1 f2, 0(RA) + | sw SFARG2HI, HI(RA) + | sw SFARG2LO, LO(RA) | addu PC, PC, RD | b <3 |. sw RC, -8+LO(RA) // Update control var. @@ -3634,9 +4671,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | addiu CARG2, CARG2, -FF_next_N | or CARG2, CARG2, CARG3 | bnez CARG2, >5 - |. lui TMP1, 0xfffe + |. lui TMP1, (LJ_KEYINDEX >> 16) | addu PC, TMP0, TMP2 - | ori TMP1, TMP1, 0x7fff + | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff) | sw r0, -8+LO(RA) // Initialize control var. | sw TMP1, -8+HI(RA) |1: @@ -3645,9 +4682,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li TMP3, BC_JMP | li TMP1, BC_ITERC | sb TMP3, -4+OFS_OP(PC) - | addu PC, TMP0, TMP2 + | addu PC, TMP0, TMP2 + |.if JIT + | lb TMP0, OFS_OP(PC) + | li AT, BC_ITERN + | bne TMP0, AT, >6 + |. lhu TMP2, OFS_RD(PC) + |.endif | b <1 |. sb TMP1, OFS_OP(PC) + |.if JIT + |6: // Unpatch JLOOP. + | lw TMP0, DISPATCH_J(trace)(DISPATCH) + | sll TMP2, TMP2, 2 + | addu TMP0, TMP0, TMP2 + | lw TRACE:TMP2, 0(TMP0) + | lw TMP0, TRACE:TMP2->startins + | li AT, -256 + | and TMP0, TMP0, AT + | or TMP0, TMP0, TMP1 + | b <1 + |. sw TMP0, 0(PC) + |.endif break; case BC_VARG: @@ -3689,9 +4745,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bnez AT, >7 |. addiu MULTRES, TMP1, 8 |6: - | ldc1 f0, 0(RC) + | lw SFRETHI, HI(RC) + | lw SFRETLO, LO(RC) | addiu RC, RC, 8 - | sdc1 f0, 0(RA) + | sw SFRETHI, HI(RA) + | sw SFRETLO, LO(RA) | sltu AT, RC, TMP3 | bnez AT, <6 // More vararg slots? |. addiu RA, RA, 8 @@ -3747,10 +4805,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | beqz RC, >3 |. subu BASE, TMP2, TMP0 |2: - | ldc1 f0, 0(RA) + | lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) | addiu RA, RA, 8 | addiu RC, RC, -8 - | sdc1 f0, 0(TMP2) + | sw SFRETHI, HI(TMP2) + | sw SFRETLO, LO(TMP2) | bnez RC, <2 |. addiu TMP2, TMP2, 8 |3: @@ -3791,14 +4851,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lw INS, -4(PC) | addiu TMP2, BASE, -8 if (op == BC_RET1) { - | ldc1 f0, 0(RA) + | lw SFRETHI, HI(RA) + | lw SFRETLO, LO(RA) } | decode_RB8a RB, INS | decode_RA8a RA, INS | decode_RB8b RB | decode_RA8b RA if (op == BC_RET1) { - | sdc1 f0, 0(TMP2) + | sw SFRETHI, HI(TMP2) + | sw SFRETLO, LO(TMP2) } | subu BASE, TMP2, RA |5: @@ -3840,69 +4902,147 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA = base*8, RD = target (after end of loop or start of loop) vk = (op == BC_IFORL || op == BC_JFORL); | addu RA, BASE, RA - if (vk) { - | ldc1 f0, FORL_IDX*8(RA) - | ldc1 f4, FORL_STEP*8(RA) - | ldc1 f2, FORL_STOP*8(RA) - | lw TMP3, FORL_STEP*8+HI(RA) - | add.d f0, f0, f4 - | sdc1 f0, FORL_IDX*8(RA) - } else { - | lw TMP1, FORL_IDX*8+HI(RA) - | lw TMP3, FORL_STEP*8+HI(RA) - | lw TMP2, FORL_STOP*8+HI(RA) - | sltiu TMP1, TMP1, LJ_TISNUM - | sltiu TMP0, TMP3, LJ_TISNUM - | sltiu TMP2, TMP2, LJ_TISNUM - | and TMP1, TMP1, TMP0 - | and TMP1, TMP1, TMP2 - | ldc1 f0, FORL_IDX*8(RA) - | beqz TMP1, ->vmeta_for - |. ldc1 f2, FORL_STOP*8(RA) - } + | lw SFARG1HI, FORL_IDX*8+HI(RA) + | lw SFARG1LO, FORL_IDX*8+LO(RA) if (op != BC_JFORL) { | srl RD, RD, 1 - | lui TMP0, (-(BCBIAS_J*4 >> 16) & 65535) + | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) + | addu TMP2, RD, TMP2 + } + if (!vk) { + | lw SFARG2HI, FORL_STOP*8+HI(RA) + | lw SFARG2LO, FORL_STOP*8+LO(RA) + | bne SFARG1HI, TISNUM, >5 + |. lw SFRETHI, FORL_STEP*8+HI(RA) + | xor AT, SFARG2HI, TISNUM + | lw SFRETLO, FORL_STEP*8+LO(RA) + | xor TMP0, SFRETHI, TISNUM + | or AT, AT, TMP0 + | bnez AT, ->vmeta_for + |. slt AT, SFRETLO, r0 + | slt CRET1, SFARG2LO, SFARG1LO + | slt TMP1, SFARG1LO, SFARG2LO + | movn CRET1, TMP1, AT + } else { + | bne SFARG1HI, TISNUM, >5 + |. lw SFARG2LO, FORL_STEP*8+LO(RA) + | lw SFRETLO, FORL_STOP*8+LO(RA) + | move TMP3, SFARG1LO + | addu SFARG1LO, SFARG1LO, SFARG2LO + | xor TMP0, SFARG1LO, TMP3 + | xor TMP1, SFARG1LO, SFARG2LO + | and TMP0, TMP0, TMP1 + | slt TMP1, SFARG1LO, SFRETLO + | slt CRET1, SFRETLO, SFARG1LO + | slt AT, SFARG2LO, r0 + | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. + | movn CRET1, TMP1, AT + | or CRET1, CRET1, TMP0 + } + |1: + if (op == BC_FORI) { + | movz TMP2, r0, CRET1 + | addu PC, PC, TMP2 + } else if (op == BC_JFORI) { + | addu PC, PC, TMP2 + | lhu RD, -4+OFS_RD(PC) + } else if (op == BC_IFORL) { + | movn TMP2, r0, CRET1 + | addu PC, PC, TMP2 + } + if (vk) { + | sw SFARG1HI, FORL_IDX*8+HI(RA) + | sw SFARG1LO, FORL_IDX*8+LO(RA) } - | c.le.d 0, f0, f2 - | c.le.d 1, f2, f0 - | sdc1 f0, FORL_EXT*8(RA) + | ins_next1 + | sw SFARG1HI, FORL_EXT*8+HI(RA) + | sw SFARG1LO, FORL_EXT*8+LO(RA) + |2: if (op == BC_JFORI) { - | li TMP1, 1 - | li TMP2, 1 - | addu TMP0, RD, TMP0 - | slt TMP3, TMP3, r0 - | movf TMP1, r0, 0 - | addu PC, PC, TMP0 - | movf TMP2, r0, 1 - | lhu RD, -4+OFS_RD(PC) - | movn TMP1, TMP2, TMP3 - | bnez TMP1, =>BC_JLOOP + | beqz CRET1, =>BC_JLOOP |. decode_RD8b RD } else if (op == BC_JFORL) { - | li TMP1, 1 - | li TMP2, 1 - | slt TMP3, TMP3, r0 - | movf TMP1, r0, 0 - | movf TMP2, r0, 1 - | movn TMP1, TMP2, TMP3 - | bnez TMP1, =>BC_JLOOP + | beqz CRET1, =>BC_JLOOP + } + | ins_next2 + | + |5: // FP loop. + |.if FPU + if (!vk) { + | ldc1 f0, FORL_IDX*8(RA) + | ldc1 f2, FORL_STOP*8(RA) + | sltiu TMP0, SFARG1HI, LJ_TISNUM + | sltiu TMP1, SFARG2HI, LJ_TISNUM + | sltiu AT, SFRETHI, LJ_TISNUM + | and TMP0, TMP0, TMP1 + | and AT, AT, TMP0 + | beqz AT, ->vmeta_for + |. slt TMP3, SFRETHI, r0 + | c.ole.d 0, f0, f2 + | c.ole.d 1, f2, f0 + | li CRET1, 1 + | movt CRET1, r0, 0 + | movt AT, r0, 1 + | b <1 + |. movn CRET1, AT, TMP3 + } else { + | ldc1 f0, FORL_IDX*8(RA) + | ldc1 f4, FORL_STEP*8(RA) + | ldc1 f2, FORL_STOP*8(RA) + | lw SFARG2HI, FORL_STEP*8+HI(RA) + | add.d f0, f0, f4 + | c.ole.d 0, f0, f2 + | c.ole.d 1, f2, f0 + | slt TMP3, SFARG2HI, r0 + | li CRET1, 1 + | li AT, 1 + | movt CRET1, r0, 0 + | movt AT, r0, 1 + | movn CRET1, AT, TMP3 + if (op == BC_IFORL) { + | movn TMP2, r0, CRET1 + | addu PC, PC, TMP2 + } + | sdc1 f0, FORL_IDX*8(RA) + | ins_next1 + | b <2 + |. sdc1 f0, FORL_EXT*8(RA) + } + |.else + if (!vk) { + | sltiu TMP0, SFARG1HI, LJ_TISNUM + | sltiu TMP1, SFARG2HI, LJ_TISNUM + | sltiu AT, SFRETHI, LJ_TISNUM + | and TMP0, TMP0, TMP1 + | and AT, AT, TMP0 + | beqz AT, ->vmeta_for + |. nop + | bal ->vm_sfcmpolex + |. move TMP3, SFRETHI + | b <1 |. nop } else { - | addu TMP1, RD, TMP0 - | slt TMP3, TMP3, r0 - | move TMP2, TMP1 - if (op == BC_FORI) { - | movt TMP1, r0, 0 - | movt TMP2, r0, 1 + | lw SFARG2HI, FORL_STEP*8+HI(RA) + | load_got __adddf3 + | call_extern + |. sw TMP2, ARG5 + | lw SFARG2HI, FORL_STOP*8+HI(RA) + | lw SFARG2LO, FORL_STOP*8+LO(RA) + | move SFARG1HI, SFRETHI + | move SFARG1LO, SFRETLO + | bal ->vm_sfcmpolex + |. lw TMP3, FORL_STEP*8+HI(RA) + if ( op == BC_JFORL ) { + | lhu RD, -4+OFS_RD(PC) + | lw TMP2, ARG5 + | b <1 + |. decode_RD8b RD } else { - | movf TMP1, r0, 0 - | movf TMP2, r0, 1 + | b <1 + |. lw TMP2, ARG5 } - | movn TMP1, TMP2, TMP3 - | addu PC, PC, TMP1 } - | ins_next + |.endif break; case BC_ITERL: @@ -3961,8 +5101,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | sw AT, DISPATCH_GL(vmstate)(DISPATCH) | lw TRACE:TMP2, 0(TMP1) | sw BASE, DISPATCH_GL(jit_base)(DISPATCH) - | sw L, DISPATCH_GL(jit_L)(DISPATCH) | lw TMP2, TRACE:TMP2->mcode + | sw L, DISPATCH_GL(tmpbuf.L)(DISPATCH) | jr TMP2 |. addiu JGL, DISPATCH, GG_DISP2G+32768 |.endif @@ -4088,6 +5228,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li_vmstate INTERP | lw PC, FRAME_PC(BASE) // Fetch PC of caller. | subu RA, TMP1, RD // RA = L->top - nresults*8 + | sw L, DISPATCH_GL(cur_L)(DISPATCH) | b ->vm_returnc |. st_vmstate break; @@ -4150,8 +5291,10 @@ static void emit_asm_debug(BuildCtx *ctx) fcofs, CFRAME_SIZE); for (i = 23; i >= 16; i--) fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); +#if !LJ_SOFTFP for (i = 30; i >= 20; i -= 2) fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); +#endif fprintf(ctx->fp, "\t.align 2\n" ".LEFDE0:\n\n"); @@ -4203,8 +5346,10 @@ static void emit_asm_debug(BuildCtx *ctx) fcofs, CFRAME_SIZE); for (i = 23; i >= 16; i--) fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 26-i); +#if !LJ_SOFTFP for (i = 30; i >= 20; i -= 2) fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 42-i); +#endif fprintf(ctx->fp, "\t.align 2\n" ".LEFDE2:\n\n"); diff --git a/src/vm_mips64.dasc b/src/vm_mips64.dasc new file mode 100644 index 00000000..651bc42e --- /dev/null +++ b/src/vm_mips64.dasc @@ -0,0 +1,5538 @@ +|// Low-level VM code for MIPS64 CPUs. +|// Bytecode interpreter, fast functions and helper functions. +|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +|// +|// Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com. +|// Sponsored by Cisco Systems, Inc. +| +|.arch mips64 +|.section code_op, code_sub +| +|.actionlist build_actionlist +|.globals GLOB_ +|.globalnames globnames +|.externnames extnames +| +|// Note: The ragged indentation of the instructions is intentional. +|// The starting columns indicate data dependencies. +| +|//----------------------------------------------------------------------- +| +|// Fixed register assignments for the interpreter. +|// Don't use: r0 = 0, r26/r27 = reserved, r28 = gp, r29 = sp, r31 = ra +| +|.macro .FPU, a, b +|.if FPU +| a, b +|.endif +|.endmacro +| +|// The following must be C callee-save (but BASE is often refetched). +|.define BASE, r16 // Base of current Lua stack frame. +|.define KBASE, r17 // Constants of current Lua function. +|.define PC, r18 // Next PC. +|.define DISPATCH, r19 // Opcode dispatch table. +|.define LREG, r20 // Register holding lua_State (also in SAVE_L). +|.define MULTRES, r21 // Size of multi-result: (nresults+1)*8. +| +|.define JGL, r30 // On-trace: global_State + 32768. +| +|// Constants for type-comparisons, stores and conversions. C callee-save. +|.define TISNIL, r30 +|.define TISNUM, r22 +|.if FPU +|.define TOBIT, f30 // 2^52 + 2^51. +|.endif +| +|// The following temporaries are not saved across C calls, except for RA. +|.define RA, r23 // Callee-save. +|.define RB, r8 +|.define RC, r9 +|.define RD, r10 +|.define INS, r11 +| +|.define AT, r1 // Assembler temporary. +|.define TMP0, r12 +|.define TMP1, r13 +|.define TMP2, r14 +|.define TMP3, r15 +| +|// MIPS n64 calling convention. +|.define CFUNCADDR, r25 +|.define CARG1, r4 +|.define CARG2, r5 +|.define CARG3, r6 +|.define CARG4, r7 +|.define CARG5, r8 +|.define CARG6, r9 +|.define CARG7, r10 +|.define CARG8, r11 +| +|.define CRET1, r2 +|.define CRET2, r3 +| +|.if FPU +|.define FARG1, f12 +|.define FARG2, f13 +|.define FARG3, f14 +|.define FARG4, f15 +|.define FARG5, f16 +|.define FARG6, f17 +|.define FARG7, f18 +|.define FARG8, f19 +| +|.define FRET1, f0 +|.define FRET2, f2 +| +|.define FTMP0, f20 +|.define FTMP1, f21 +|.define FTMP2, f22 +|.endif +| +|// Stack layout while in interpreter. Must match with lj_frame.h. +|.if FPU // MIPS64 hard-float. +| +|.define CFRAME_SPACE, 192 // Delta for sp. +| +|//----- 16 byte aligned, <-- sp entering interpreter +|.define SAVE_ERRF, 188(sp) // 32 bit values. +|.define SAVE_NRES, 184(sp) +|.define SAVE_CFRAME, 176(sp) // 64 bit values. +|.define SAVE_L, 168(sp) +|.define SAVE_PC, 160(sp) +|//----- 16 byte aligned +|.define SAVE_GPR_, 80 // .. 80+10*8: 64 bit GPR saves. +|.define SAVE_FPR_, 16 // .. 16+8*8: 64 bit FPR saves. +| +|.else // MIPS64 soft-float +| +|.define CFRAME_SPACE, 128 // Delta for sp. +| +|//----- 16 byte aligned, <-- sp entering interpreter +|.define SAVE_ERRF, 124(sp) // 32 bit values. +|.define SAVE_NRES, 120(sp) +|.define SAVE_CFRAME, 112(sp) // 64 bit values. +|.define SAVE_L, 104(sp) +|.define SAVE_PC, 96(sp) +|//----- 16 byte aligned +|.define SAVE_GPR_, 16 // .. 16+10*8: 64 bit GPR saves. +| +|.endif +| +|.define TMPX, 8(sp) // Unused by interpreter, temp for JIT code. +|.define TMPD, 0(sp) +|//----- 16 byte aligned +| +|.define TMPD_OFS, 0 +| +|.define SAVE_MULTRES, TMPD +| +|//----------------------------------------------------------------------- +| +|.macro saveregs +| daddiu sp, sp, -CFRAME_SPACE +| sd ra, SAVE_GPR_+9*8(sp) +| sd r30, SAVE_GPR_+8*8(sp) +| .FPU sdc1 f31, SAVE_FPR_+7*8(sp) +| sd r23, SAVE_GPR_+7*8(sp) +| .FPU sdc1 f30, SAVE_FPR_+6*8(sp) +| sd r22, SAVE_GPR_+6*8(sp) +| .FPU sdc1 f29, SAVE_FPR_+5*8(sp) +| sd r21, SAVE_GPR_+5*8(sp) +| .FPU sdc1 f28, SAVE_FPR_+4*8(sp) +| sd r20, SAVE_GPR_+4*8(sp) +| .FPU sdc1 f27, SAVE_FPR_+3*8(sp) +| sd r19, SAVE_GPR_+3*8(sp) +| .FPU sdc1 f26, SAVE_FPR_+2*8(sp) +| sd r18, SAVE_GPR_+2*8(sp) +| .FPU sdc1 f25, SAVE_FPR_+1*8(sp) +| sd r17, SAVE_GPR_+1*8(sp) +| .FPU sdc1 f24, SAVE_FPR_+0*8(sp) +| sd r16, SAVE_GPR_+0*8(sp) +|.endmacro +| +|.macro restoreregs_ret +| ld ra, SAVE_GPR_+9*8(sp) +| ld r30, SAVE_GPR_+8*8(sp) +| ld r23, SAVE_GPR_+7*8(sp) +| .FPU ldc1 f31, SAVE_FPR_+7*8(sp) +| ld r22, SAVE_GPR_+6*8(sp) +| .FPU ldc1 f30, SAVE_FPR_+6*8(sp) +| ld r21, SAVE_GPR_+5*8(sp) +| .FPU ldc1 f29, SAVE_FPR_+5*8(sp) +| ld r20, SAVE_GPR_+4*8(sp) +| .FPU ldc1 f28, SAVE_FPR_+4*8(sp) +| ld r19, SAVE_GPR_+3*8(sp) +| .FPU ldc1 f27, SAVE_FPR_+3*8(sp) +| ld r18, SAVE_GPR_+2*8(sp) +| .FPU ldc1 f26, SAVE_FPR_+2*8(sp) +| ld r17, SAVE_GPR_+1*8(sp) +| .FPU ldc1 f25, SAVE_FPR_+1*8(sp) +| ld r16, SAVE_GPR_+0*8(sp) +| .FPU ldc1 f24, SAVE_FPR_+0*8(sp) +| jr ra +| daddiu sp, sp, CFRAME_SPACE +|.endmacro +| +|// Type definitions. Some of these are only used for documentation. +|.type L, lua_State, LREG +|.type GL, global_State +|.type TVALUE, TValue +|.type GCOBJ, GCobj +|.type STR, GCstr +|.type TAB, GCtab +|.type LFUNC, GCfuncL +|.type CFUNC, GCfuncC +|.type PROTO, GCproto +|.type UPVAL, GCupval +|.type NODE, Node +|.type NARGS8, int +|.type TRACE, GCtrace +|.type SBUF, SBuf +| +|//----------------------------------------------------------------------- +| +|// Trap for not-yet-implemented parts. +|.macro NYI; .long 0xec1cf0f0; .endmacro +| +|// Macros to mark delay slots. +|.macro ., a; a; .endmacro +|.macro ., a,b; a,b; .endmacro +|.macro ., a,b,c; a,b,c; .endmacro +|.macro ., a,b,c,d; a,b,c,d; .endmacro +| +|.define FRAME_PC, -8 +|.define FRAME_FUNC, -16 +| +|//----------------------------------------------------------------------- +| +|// Endian-specific defines. +|.if ENDIAN_LE +|.define HI, 4 +|.define LO, 0 +|.define OFS_RD, 2 +|.define OFS_RA, 1 +|.define OFS_OP, 0 +|.else +|.define HI, 0 +|.define LO, 4 +|.define OFS_RD, 0 +|.define OFS_RA, 2 +|.define OFS_OP, 3 +|.endif +| +|// Instruction decode. +|.macro decode_OP1, dst, ins; andi dst, ins, 0xff; .endmacro +|.macro decode_OP8a, dst, ins; andi dst, ins, 0xff; .endmacro +|.macro decode_OP8b, dst; sll dst, dst, 3; .endmacro +|.macro decode_RC8a, dst, ins; srl dst, ins, 13; .endmacro +|.macro decode_RC8b, dst; andi dst, dst, 0x7f8; .endmacro +|.macro decode_RD4b, dst; sll dst, dst, 2; .endmacro +|.macro decode_RA8a, dst, ins; srl dst, ins, 5; .endmacro +|.macro decode_RA8b, dst; andi dst, dst, 0x7f8; .endmacro +|.macro decode_RB8a, dst, ins; srl dst, ins, 21; .endmacro +|.macro decode_RB8b, dst; andi dst, dst, 0x7f8; .endmacro +|.macro decode_RD8a, dst, ins; srl dst, ins, 16; .endmacro +|.macro decode_RD8b, dst; sll dst, dst, 3; .endmacro +|.macro decode_RDtoRC8, dst, src; andi dst, src, 0x7f8; .endmacro +| +|// Instruction fetch. +|.macro ins_NEXT1 +| lw INS, 0(PC) +| daddiu PC, PC, 4 +|.endmacro +|// Instruction decode+dispatch. +|.macro ins_NEXT2 +| decode_OP8a TMP1, INS +| decode_OP8b TMP1 +| daddu TMP0, DISPATCH, TMP1 +| decode_RD8a RD, INS +| ld AT, 0(TMP0) +| decode_RA8a RA, INS +| decode_RD8b RD +| jr AT +| decode_RA8b RA +|.endmacro +|.macro ins_NEXT +| ins_NEXT1 +| ins_NEXT2 +|.endmacro +| +|// Instruction footer. +|.if 1 +| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. +| .define ins_next, ins_NEXT +| .define ins_next_, ins_NEXT +| .define ins_next1, ins_NEXT1 +| .define ins_next2, ins_NEXT2 +|.else +| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. +| // Affects only certain kinds of benchmarks (and only with -j off). +| .macro ins_next +| b ->ins_next +| .endmacro +| .macro ins_next1 +| .endmacro +| .macro ins_next2 +| b ->ins_next +| .endmacro +| .macro ins_next_ +| ->ins_next: +| ins_NEXT +| .endmacro +|.endif +| +|// Call decode and dispatch. +|.macro ins_callt +| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC +| ld PC, LFUNC:RB->pc +| lw INS, 0(PC) +| daddiu PC, PC, 4 +| decode_OP8a TMP1, INS +| decode_RA8a RA, INS +| decode_OP8b TMP1 +| decode_RA8b RA +| daddu TMP0, DISPATCH, TMP1 +| ld TMP0, 0(TMP0) +| jr TMP0 +| daddu RA, RA, BASE +|.endmacro +| +|.macro ins_call +| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC +| sd PC, FRAME_PC(BASE) +| ins_callt +|.endmacro +| +|//----------------------------------------------------------------------- +| +|.macro branch_RD +| srl TMP0, RD, 1 +| lui AT, (-(BCBIAS_J*4 >> 16) & 65535) +| addu TMP0, TMP0, AT +| daddu PC, PC, TMP0 +|.endmacro +| +|// Assumes DISPATCH is relative to GL. +#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) +#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) +#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) +#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name) +| +#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) +| +|.macro load_got, func +| ld CFUNCADDR, DISPATCH_GOT(func)(DISPATCH) +|.endmacro +|// Much faster. Sadly, there's no easy way to force the required code layout. +|// .macro call_intern, func; bal extern func; .endmacro +|.macro call_intern, func; jalr CFUNCADDR; .endmacro +|.macro call_extern; jalr CFUNCADDR; .endmacro +|.macro jmp_extern; jr CFUNCADDR; .endmacro +| +|.macro hotcheck, delta, target +| dsrl TMP1, PC, 1 +| andi TMP1, TMP1, 126 +| daddu TMP1, TMP1, DISPATCH +| lhu TMP2, GG_DISP2HOT(TMP1) +| addiu TMP2, TMP2, -delta +| bltz TMP2, target +|. sh TMP2, GG_DISP2HOT(TMP1) +|.endmacro +| +|.macro hotloop +| hotcheck HOTCOUNT_LOOP, ->vm_hotloop +|.endmacro +| +|.macro hotcall +| hotcheck HOTCOUNT_CALL, ->vm_hotcall +|.endmacro +| +|// Set current VM state. Uses TMP0. +|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro +|.macro st_vmstate; sw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro +| +|// Move table write barrier back. Overwrites mark and tmp. +|.macro barrierback, tab, mark, tmp, target +| ld tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) +| andi mark, mark, ~LJ_GC_BLACK & 255 // black2gray(tab) +| sd tab, DISPATCH_GL(gc.grayagain)(DISPATCH) +| sb mark, tab->marked +| b target +|. sd tmp, tab->gclist +|.endmacro +| +|// Clear type tag. Isolate lowest 14+32+1=47 bits of reg. +|.macro cleartp, reg; dextm reg, reg, 0, 14; .endmacro +|.macro cleartp, dst, reg; dextm dst, reg, 0, 14; .endmacro +| +|// Set type tag: Merge 17 type bits into bits [15+32=47, 31+32+1=64) of dst. +|.macro settp, dst, tp; dinsu dst, tp, 15, 31; .endmacro +| +|// Extract (negative) type tag. +|.macro gettp, dst, src; dsra dst, src, 47; .endmacro +| +|// Macros to check the TValue type and extract the GCobj. Branch on failure. +|.macro checktp, reg, tp, target +| gettp AT, reg +| daddiu AT, AT, tp +| bnez AT, target +|. cleartp reg +|.endmacro +|.macro checktp, dst, reg, tp, target +| gettp AT, reg +| daddiu AT, AT, tp +| bnez AT, target +|. cleartp dst, reg +|.endmacro +|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro +|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro +|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro +|.macro checkint, reg, target // Caveat: has delay slot! +| gettp AT, reg +| bne AT, TISNUM, target +|.endmacro +|.macro checknum, reg, target // Caveat: has delay slot! +| gettp AT, reg +| sltiu AT, AT, LJ_TISNUM +| beqz AT, target +|.endmacro +| +|.macro mov_false, reg +| lu reg, 0x8000 +| dsll reg, reg, 32 +| not reg, reg +|.endmacro +|.macro mov_true, reg +| li reg, 0x0001 +| dsll reg, reg, 48 +| not reg, reg +|.endmacro +| +|//----------------------------------------------------------------------- + +/* Generate subroutines used by opcodes and other parts of the VM. */ +/* The .code_sub section should be last to help static branch prediction. */ +static void build_subroutines(BuildCtx *ctx) +{ + |.code_sub + | + |//----------------------------------------------------------------------- + |//-- Return handling ---------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_returnp: + | // See vm_return. Also: TMP2 = previous base. + | andi AT, PC, FRAME_P + | beqz AT, ->cont_dispatch + | + | // Return from pcall or xpcall fast func. + |. mov_true TMP1 + | ld PC, FRAME_PC(TMP2) // Fetch PC of previous frame. + | move BASE, TMP2 // Restore caller base. + | // Prepending may overwrite the pcall frame, so do it at the end. + | sd TMP1, -8(RA) // Prepend true to results. + | daddiu RA, RA, -8 + | + |->vm_returnc: + | addiu RD, RD, 8 // RD = (nresults+1)*8. + | andi TMP0, PC, FRAME_TYPE + | beqz RD, ->vm_unwind_c_eh + |. li CRET1, LUA_YIELD + | beqz TMP0, ->BC_RET_Z // Handle regular return to Lua. + |. move MULTRES, RD + | + |->vm_return: + | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return + | // TMP0 = PC & FRAME_TYPE + | li TMP2, -8 + | xori AT, TMP0, FRAME_C + | and TMP2, PC, TMP2 + | bnez AT, ->vm_returnp + | dsubu TMP2, BASE, TMP2 // TMP2 = previous base. + | + | addiu TMP1, RD, -8 + | sd TMP2, L->base + | li_vmstate C + | lw TMP2, SAVE_NRES + | daddiu BASE, BASE, -16 + | st_vmstate + | beqz TMP1, >2 + |. sll TMP2, TMP2, 3 + |1: + | addiu TMP1, TMP1, -8 + | ld CRET1, 0(RA) + | daddiu RA, RA, 8 + | sd CRET1, 0(BASE) + | bnez TMP1, <1 + |. daddiu BASE, BASE, 8 + | + |2: + | bne TMP2, RD, >6 + |3: + |. sd BASE, L->top // Store new top. + | + |->vm_leave_cp: + | ld TMP0, SAVE_CFRAME // Restore previous C frame. + | move CRET1, r0 // Ok return status for vm_pcall. + | sd TMP0, L->cframe + | + |->vm_leave_unw: + | restoreregs_ret + | + |6: + | ld TMP1, L->maxstack + | slt AT, TMP2, RD + | bnez AT, >7 // Less results wanted? + | // More results wanted. Check stack size and fill up results with nil. + |. slt AT, BASE, TMP1 + | beqz AT, >8 + |. nop + | sd TISNIL, 0(BASE) + | addiu RD, RD, 8 + | b <2 + |. daddiu BASE, BASE, 8 + | + |7: // Less results wanted. + | subu TMP0, RD, TMP2 + | dsubu TMP0, BASE, TMP0 // Either keep top or shrink it. + |.if MIPSR6 + | selnez TMP0, TMP0, TMP2 // LUA_MULTRET+1 case? + | seleqz BASE, BASE, TMP2 + | b <3 + |. or BASE, BASE, TMP0 + |.else + | b <3 + |. movn BASE, TMP0, TMP2 // LUA_MULTRET+1 case? + |.endif + | + |8: // Corner case: need to grow stack for filling up results. + | // This can happen if: + | // - A C function grows the stack (a lot). + | // - The GC shrinks the stack in between. + | // - A return back from a lua_call() with (high) nresults adjustment. + | load_got lj_state_growstack + | move MULTRES, RD + | srl CARG2, TMP2, 3 + | call_intern lj_state_growstack // (lua_State *L, int n) + |. move CARG1, L + | lw TMP2, SAVE_NRES + | ld BASE, L->top // Need the (realloced) L->top in BASE. + | move RD, MULTRES + | b <2 + |. sll TMP2, TMP2, 3 + | + |->vm_unwind_c: // Unwind C stack, return from vm_pcall. + | // (void *cframe, int errcode) + | move sp, CARG1 + | move CRET1, CARG2 + |->vm_unwind_c_eh: // Landing pad for external unwinder. + | ld L, SAVE_L + | li TMP0, ~LJ_VMST_C + | ld GL:TMP1, L->glref + | b ->vm_leave_unw + |. sw TMP0, GL:TMP1->vmstate + | + |->vm_unwind_ff: // Unwind C stack, return from ff pcall. + | // (void *cframe) + | li AT, -4 + | and sp, CARG1, AT + |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | ld L, SAVE_L + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM + | ld BASE, L->base + | ld DISPATCH, L->glref // Setup pointer to dispatch table. + | .FPU mtc1 TMP3, TOBIT + | mov_false TMP1 + | li_vmstate INTERP + | ld PC, FRAME_PC(BASE) // Fetch PC of previous frame. + | .FPU cvt.d.s TOBIT, TOBIT + | daddiu RA, BASE, -8 // Results start at BASE-8. + | daddiu DISPATCH, DISPATCH, GG_G2DISP + | sd TMP1, 0(RA) // Prepend false to error message. + | st_vmstate + | b ->vm_returnc + |. li RD, 16 // 2 results: false + error message. + | + |->vm_unwind_stub: // Jump to exit stub from unwinder. + | jr CARG1 + |. move ra, CARG2 + | + |//----------------------------------------------------------------------- + |//-- Grow stack for calls ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_growstack_c: // Grow stack for C function. + | b >2 + |. li CARG2, LUA_MINSTACK + | + |->vm_growstack_l: // Grow stack for Lua function. + | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC + | daddu RC, BASE, RC + | dsubu RA, RA, BASE + | sd BASE, L->base + | daddiu PC, PC, 4 // Must point after first instruction. + | sd RC, L->top + | srl CARG2, RA, 3 + |2: + | // L->base = new base, L->top = top + | load_got lj_state_growstack + | sd PC, SAVE_PC + | call_intern lj_state_growstack // (lua_State *L, int n) + |. move CARG1, L + | ld BASE, L->base + | ld RC, L->top + | ld LFUNC:RB, FRAME_FUNC(BASE) + | dsubu RC, RC, BASE + | cleartp LFUNC:RB + | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC + | ins_callt // Just retry the call. + | + |//----------------------------------------------------------------------- + |//-- Entry points into the assembler VM --------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_resume: // Setup C frame and resume thread. + | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) + | saveregs + | move L, CARG1 + | ld DISPATCH, L->glref // Setup pointer to dispatch table. + | move BASE, CARG2 + | lbu TMP1, L->status + | sd L, SAVE_L + | li PC, FRAME_CP + | daddiu TMP0, sp, CFRAME_RESUME + | daddiu DISPATCH, DISPATCH, GG_G2DISP + | sw r0, SAVE_NRES + | sw r0, SAVE_ERRF + | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. + | sd r0, SAVE_CFRAME + | beqz TMP1, >3 + |. sd TMP0, L->cframe + | + | // Resume after yield (like a return). + | sd L, DISPATCH_GL(cur_L)(DISPATCH) + | move RA, BASE + | ld BASE, L->base + | ld TMP1, L->top + | ld PC, FRAME_PC(BASE) + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | dsubu RD, TMP1, BASE + | .FPU mtc1 TMP3, TOBIT + | sb r0, L->status + | .FPU cvt.d.s TOBIT, TOBIT + | li_vmstate INTERP + | daddiu RD, RD, 8 + | st_vmstate + | move MULTRES, RD + | andi TMP0, PC, FRAME_TYPE + | li TISNIL, LJ_TNIL + | beqz TMP0, ->BC_RET_Z + |. li TISNUM, LJ_TISNUM + | b ->vm_return + |. nop + | + |->vm_pcall: // Setup protected C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) + | saveregs + | sw CARG4, SAVE_ERRF + | b >1 + |. li PC, FRAME_CP + | + |->vm_call: // Setup C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1) + | saveregs + | li PC, FRAME_C + | + |1: // Entry point for vm_pcall above (PC = ftype). + | ld TMP1, L:CARG1->cframe + | move L, CARG1 + | sw CARG3, SAVE_NRES + | ld DISPATCH, L->glref // Setup pointer to dispatch table. + | sd CARG1, SAVE_L + | move BASE, CARG2 + | daddiu DISPATCH, DISPATCH, GG_G2DISP + | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. + | sd TMP1, SAVE_CFRAME + | sd sp, L->cframe // Add our C frame to cframe chain. + | + |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). + | sd L, DISPATCH_GL(cur_L)(DISPATCH) + | ld TMP2, L->base // TMP2 = old base (used in vmeta_call). + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | ld TMP1, L->top + | .FPU mtc1 TMP3, TOBIT + | daddu PC, PC, BASE + | dsubu NARGS8:RC, TMP1, BASE + | li TISNUM, LJ_TISNUM + | dsubu PC, PC, TMP2 // PC = frame delta + frame type + | .FPU cvt.d.s TOBIT, TOBIT + | li_vmstate INTERP + | li TISNIL, LJ_TNIL + | st_vmstate + | + |->vm_call_dispatch: + | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC + | ld LFUNC:RB, FRAME_FUNC(BASE) + | checkfunc LFUNC:RB, ->vmeta_call + | + |->vm_call_dispatch_f: + | ins_call + | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC + | + |->vm_cpcall: // Setup protected C frame, call C. + | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) + | saveregs + | move L, CARG1 + | ld TMP0, L:CARG1->stack + | sd CARG1, SAVE_L + | ld TMP1, L->top + | ld DISPATCH, L->glref // Setup pointer to dispatch table. + | sd CARG1, SAVE_PC // Any value outside of bytecode is ok. + | dsubu TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). + | ld TMP1, L->cframe + | daddiu DISPATCH, DISPATCH, GG_G2DISP + | sw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. + | sw r0, SAVE_ERRF // No error function. + | sd TMP1, SAVE_CFRAME + | sd sp, L->cframe // Add our C frame to cframe chain. + | sd L, DISPATCH_GL(cur_L)(DISPATCH) + | jalr CARG4 // (lua_State *L, lua_CFunction func, void *ud) + |. move CFUNCADDR, CARG4 + | move BASE, CRET1 + | bnez CRET1, <3 // Else continue with the call. + |. li PC, FRAME_CP + | b ->vm_leave_cp // No base? Just remove C frame. + |. nop + | + |//----------------------------------------------------------------------- + |//-- Metamethod handling ------------------------------------------------ + |//----------------------------------------------------------------------- + | + |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the + |// stack, so BASE doesn't need to be reloaded across these calls. + | + |//-- Continuation dispatch ---------------------------------------------- + | + |->cont_dispatch: + | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 + | ld TMP0, -32(BASE) // Continuation. + | move RB, BASE + | move BASE, TMP2 // Restore caller BASE. + | ld LFUNC:TMP1, FRAME_FUNC(TMP2) + |.if FFI + | sltiu AT, TMP0, 2 + |.endif + | ld PC, -24(RB) // Restore PC from [cont|PC]. + | cleartp LFUNC:TMP1 + | daddu TMP2, RA, RD + |.if FFI + | bnez AT, >1 + |.endif + |. sd TISNIL, -8(TMP2) // Ensure one valid arg. + | ld TMP1, LFUNC:TMP1->pc + | // BASE = base, RA = resultptr, RB = meta base + | jr TMP0 // Jump to continuation. + |. ld KBASE, PC2PROTO(k)(TMP1) + | + |.if FFI + |1: + | bnez TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback. + | // cont = 0: tailcall from C function. + |. daddiu TMP1, RB, -32 + | b ->vm_call_tail + |. dsubu RC, TMP1, BASE + |.endif + | + |->cont_cat: // RA = resultptr, RB = meta base + | lw INS, -4(PC) + | daddiu CARG2, RB, -32 + | ld CRET1, 0(RA) + | decode_RB8a MULTRES, INS + | decode_RA8a RA, INS + | decode_RB8b MULTRES + | decode_RA8b RA + | daddu TMP1, BASE, MULTRES + | sd BASE, L->base + | dsubu CARG3, CARG2, TMP1 + | bne TMP1, CARG2, ->BC_CAT_Z + |. sd CRET1, 0(CARG2) + | daddu RA, BASE, RA + | b ->cont_nop + |. sd CRET1, 0(RA) + | + |//-- Table indexing metamethods ----------------------------------------- + | + |->vmeta_tgets1: + | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) + | li TMP0, LJ_TSTR + | settp STR:RC, TMP0 + | b >1 + |. sd STR:RC, 0(CARG3) + | + |->vmeta_tgets: + | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) + | li TMP0, LJ_TTAB + | li TMP1, LJ_TSTR + | settp TAB:RB, TMP0 + | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) + | sd TAB:RB, 0(CARG2) + | settp STR:RC, TMP1 + | b >1 + |. sd STR:RC, 0(CARG3) + | + |->vmeta_tgetb: // TMP0 = index + | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) + | settp TMP0, TISNUM + | sd TMP0, 0(CARG3) + | + |->vmeta_tgetv: + |1: + | load_got lj_meta_tget + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) + |. move CARG1, L + | // Returns TValue * (finished) or NULL (metamethod). + | beqz CRET1, >3 + |. daddiu TMP1, BASE, -FRAME_CONT + | ld CARG1, 0(CRET1) + | ins_next1 + | sd CARG1, 0(RA) + | ins_next2 + | + |3: // Call __index metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k + | ld BASE, L->top + | sd PC, -24(BASE) // [cont|PC] + | dsubu PC, BASE, TMP1 + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | cleartp LFUNC:RB + | b ->vm_call_dispatch_f + |. li NARGS8:RC, 16 // 2 args for func(t, k). + | + |->vmeta_tgetr: + | load_got lj_tab_getinth + | call_intern lj_tab_getinth // (GCtab *t, int32_t key) + |. nop + | // Returns cTValue * or NULL. + | beqz CRET1, ->BC_TGETR_Z + |. move CARG2, TISNIL + | b ->BC_TGETR_Z + |. ld CARG2, 0(CRET1) + | + |//----------------------------------------------------------------------- + | + |->vmeta_tsets1: + | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) + | li TMP0, LJ_TSTR + | settp STR:RC, TMP0 + | b >1 + |. sd STR:RC, 0(CARG3) + | + |->vmeta_tsets: + | daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) + | li TMP0, LJ_TTAB + | li TMP1, LJ_TSTR + | settp TAB:RB, TMP0 + | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv2) + | sd TAB:RB, 0(CARG2) + | settp STR:RC, TMP1 + | b >1 + |. sd STR:RC, 0(CARG3) + | + |->vmeta_tsetb: // TMP0 = index + | daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) + | settp TMP0, TISNUM + | sd TMP0, 0(CARG3) + | + |->vmeta_tsetv: + |1: + | load_got lj_meta_tset + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) + |. move CARG1, L + | // Returns TValue * (finished) or NULL (metamethod). + | beqz CRET1, >3 + |. ld CARG1, 0(RA) + | // NOBARRIER: lj_meta_tset ensures the table is not black. + | ins_next1 + | sd CARG1, 0(CRET1) + | ins_next2 + | + |3: // Call __newindex metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) + | daddiu TMP1, BASE, -FRAME_CONT + | ld BASE, L->top + | sd PC, -24(BASE) // [cont|PC] + | dsubu PC, BASE, TMP1 + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | cleartp LFUNC:RB + | sd CARG1, 16(BASE) // Copy value to third argument. + | b ->vm_call_dispatch_f + |. li NARGS8:RC, 24 // 3 args for func(t, k, v) + | + |->vmeta_tsetr: + | load_got lj_tab_setinth + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + |. move CARG1, L + | // Returns TValue *. + | b ->BC_TSETR_Z + |. nop + | + |//-- Comparison metamethods --------------------------------------------- + | + |->vmeta_comp: + | // RA/RD point to o1/o2. + | move CARG2, RA + | move CARG3, RD + | load_got lj_meta_comp + | daddiu PC, PC, -4 + | sd BASE, L->base + | sd PC, SAVE_PC + | decode_OP1 CARG4, INS + | call_intern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) + |. move CARG1, L + | // Returns 0/1 or TValue * (metamethod). + |3: + | sltiu AT, CRET1, 2 + | beqz AT, ->vmeta_binop + | negu TMP2, CRET1 + |4: + | lhu RD, OFS_RD(PC) + | daddiu PC, PC, 4 + | lui TMP1, (-(BCBIAS_J*4 >> 16) & 65535) + | sll RD, RD, 2 + | addu RD, RD, TMP1 + | and RD, RD, TMP2 + | daddu PC, PC, RD + |->cont_nop: + | ins_next + | + |->cont_ra: // RA = resultptr + | lbu TMP1, -4+OFS_RA(PC) + | ld CRET1, 0(RA) + | sll TMP1, TMP1, 3 + | daddu TMP1, BASE, TMP1 + | b ->cont_nop + |. sd CRET1, 0(TMP1) + | + |->cont_condt: // RA = resultptr + | ld TMP0, 0(RA) + | gettp TMP0, TMP0 + | sltiu AT, TMP0, LJ_TISTRUECOND + | b <4 + |. negu TMP2, AT // Branch if result is true. + | + |->cont_condf: // RA = resultptr + | ld TMP0, 0(RA) + | gettp TMP0, TMP0 + | sltiu AT, TMP0, LJ_TISTRUECOND + | b <4 + |. addiu TMP2, AT, -1 // Branch if result is false. + | + |->vmeta_equal: + | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1. + | load_got lj_meta_equal + | cleartp LFUNC:CARG3, CARG2 + | cleartp LFUNC:CARG2, CARG1 + | move CARG4, TMP0 + | daddiu PC, PC, -4 + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) + |. move CARG1, L + | // Returns 0/1 or TValue * (metamethod). + | b <3 + |. nop + | + |->vmeta_equal_cd: + |.if FFI + | load_got lj_meta_equal_cd + | move CARG2, INS + | daddiu PC, PC, -4 + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_meta_equal_cd // (lua_State *L, BCIns op) + |. move CARG1, L + | // Returns 0/1 or TValue * (metamethod). + | b <3 + |. nop + |.endif + | + |->vmeta_istype: + | load_got lj_meta_istype + | daddiu PC, PC, -4 + | sd BASE, L->base + | srl CARG2, RA, 3 + | srl CARG3, RD, 3 + | sd PC, SAVE_PC + | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) + |. move CARG1, L + | b ->cont_nop + |. nop + | + |//-- Arithmetic metamethods --------------------------------------------- + | + |->vmeta_unm: + | move RC, RB + | + |->vmeta_arith: + | load_got lj_meta_arith + | sd BASE, L->base + | move CARG2, RA + | sd PC, SAVE_PC + | move CARG3, RB + | move CARG4, RC + | decode_OP1 CARG5, INS // CARG5 == RB. + | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) + |. move CARG1, L + | // Returns NULL (finished) or TValue * (metamethod). + | beqz CRET1, ->cont_nop + |. nop + | + | // Call metamethod for binary op. + |->vmeta_binop: + | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 + | dsubu TMP1, CRET1, BASE + | sd PC, -24(CRET1) // [cont|PC] + | move TMP2, BASE + | daddiu PC, TMP1, FRAME_CONT + | move BASE, CRET1 + | b ->vm_call_dispatch + |. li NARGS8:RC, 16 // 2 args for func(o1, o2). + | + |->vmeta_len: + | // CARG2 already set by BC_LEN. +#if LJ_52 + | move MULTRES, CARG1 +#endif + | load_got lj_meta_len + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_meta_len // (lua_State *L, TValue *o) + |. move CARG1, L + | // Returns NULL (retry) or TValue * (metamethod base). +#if LJ_52 + | bnez CRET1, ->vmeta_binop // Binop call for compatibility. + |. nop + | b ->BC_LEN_Z + |. move CARG1, MULTRES +#else + | b ->vmeta_binop // Binop call for compatibility. + |. nop +#endif + | + |//-- Call metamethod ---------------------------------------------------- + | + |->vmeta_call: // Resolve and call __call metamethod. + | // TMP2 = old base, BASE = new base, RC = nargs*8 + | load_got lj_meta_call + | sd TMP2, L->base // This is the callers base! + | daddiu CARG2, BASE, -16 + | sd PC, SAVE_PC + | daddu CARG3, BASE, RC + | move MULTRES, NARGS8:RC + | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + |. move CARG1, L + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now. + | cleartp LFUNC:RB + | ins_call + | + |->vmeta_callt: // Resolve __call for BC_CALLT. + | // BASE = old base, RA = new base, RC = nargs*8 + | load_got lj_meta_call + | sd BASE, L->base + | daddiu CARG2, RA, -16 + | sd PC, SAVE_PC + | daddu CARG3, RA, RC + | move MULTRES, NARGS8:RC + | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + |. move CARG1, L + | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here. + | ld TMP1, FRAME_PC(BASE) + | daddiu NARGS8:RC, MULTRES, 8 // Got one more argument now. + | b ->BC_CALLT_Z + |. cleartp LFUNC:CARG3, RB + | + |//-- Argument coercion for 'for' statement ------------------------------ + | + |->vmeta_for: + | load_got lj_meta_for + | sd BASE, L->base + | move CARG2, RA + | sd PC, SAVE_PC + | move MULTRES, INS + | call_intern lj_meta_for // (lua_State *L, TValue *base) + |. move CARG1, L + |.if JIT + | decode_OP1 TMP0, MULTRES + | li AT, BC_JFORI + |.endif + | decode_RA8a RA, MULTRES + | decode_RD8a RD, MULTRES + | decode_RA8b RA + |.if JIT + | beq TMP0, AT, =>BC_JFORI + |. decode_RD8b RD + | b =>BC_FORI + |. nop + |.else + | b =>BC_FORI + |. decode_RD8b RD + |.endif + | + |//----------------------------------------------------------------------- + |//-- Fast functions ----------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro .ffunc, name + |->ff_ .. name: + |.endmacro + | + |.macro .ffunc_1, name + |->ff_ .. name: + | beqz NARGS8:RC, ->fff_fallback + |. ld CARG1, 0(BASE) + |.endmacro + | + |.macro .ffunc_2, name + |->ff_ .. name: + | sltiu AT, NARGS8:RC, 16 + | ld CARG1, 0(BASE) + | bnez AT, ->fff_fallback + |. ld CARG2, 8(BASE) + |.endmacro + | + |.macro .ffunc_n, name // Caveat: has delay slot! + |->ff_ .. name: + | ld CARG1, 0(BASE) + | beqz NARGS8:RC, ->fff_fallback + | // Either ldc1 or the 1st instruction of checknum is in the delay slot. + | .FPU ldc1 FARG1, 0(BASE) + | checknum CARG1, ->fff_fallback + |.endmacro + | + |.macro .ffunc_nn, name // Caveat: has delay slot! + |->ff_ .. name: + | ld CARG1, 0(BASE) + | sltiu AT, NARGS8:RC, 16 + | ld CARG2, 8(BASE) + | bnez AT, ->fff_fallback + |. gettp TMP0, CARG1 + | gettp TMP1, CARG2 + | sltiu TMP0, TMP0, LJ_TISNUM + | sltiu TMP1, TMP1, LJ_TISNUM + | .FPU ldc1 FARG1, 0(BASE) + | and TMP0, TMP0, TMP1 + | .FPU ldc1 FARG2, 8(BASE) + | beqz TMP0, ->fff_fallback + |.endmacro + | + |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1 and has delay slot! + |// MIPSR6: no delay slot, but a forbidden slot. + |.macro ffgccheck + | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) + | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) + | dsubu AT, TMP0, TMP1 + |.if MIPSR6 + | bgezalc AT, ->fff_gcstep + |.else + | bgezal AT, ->fff_gcstep + |.endif + |.endmacro + | + |//-- Base library: checks ----------------------------------------------- + |.ffunc_1 assert + | gettp AT, CARG1 + | sltiu AT, AT, LJ_TISTRUECOND + | beqz AT, ->fff_fallback + |. daddiu RA, BASE, -16 + | ld PC, FRAME_PC(BASE) + | addiu RD, NARGS8:RC, 8 // Compute (nresults+1)*8. + | daddu TMP2, RA, RD + | daddiu TMP1, BASE, 8 + | beq BASE, TMP2, ->fff_res // Done if exactly 1 argument. + |. sd CARG1, 0(RA) + |1: + | ld CRET1, 0(TMP1) + | sd CRET1, -16(TMP1) + | bne TMP1, TMP2, <1 + |. daddiu TMP1, TMP1, 8 + | b ->fff_res + |. nop + | + |.ffunc_1 type + | gettp TMP0, CARG1 + | sltu TMP1, TISNUM, TMP0 + | not TMP2, TMP0 + | li TMP3, ~LJ_TISNUM + |.if MIPSR6 + | selnez TMP2, TMP2, TMP1 + | seleqz TMP3, TMP3, TMP1 + | or TMP2, TMP2, TMP3 + |.else + | movz TMP2, TMP3, TMP1 + |.endif + | dsll TMP2, TMP2, 3 + | daddu TMP2, CFUNC:RB, TMP2 + | b ->fff_restv + |. ld CARG1, CFUNC:TMP2->upvalue + | + |//-- Base library: getters and setters --------------------------------- + | + |.ffunc_1 getmetatable + | gettp TMP2, CARG1 + | daddiu TMP0, TMP2, -LJ_TTAB + | daddiu TMP1, TMP2, -LJ_TUDATA + |.if MIPSR6 + | selnez TMP0, TMP1, TMP0 + |.else + | movn TMP0, TMP1, TMP0 + |.endif + | bnez TMP0, >6 + |. cleartp TAB:CARG1 + |1: // Field metatable must be at same offset for GCtab and GCudata! + | ld TAB:RB, TAB:CARG1->metatable + |2: + | ld STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) + | beqz TAB:RB, ->fff_restv + |. li CARG1, LJ_TNIL + | lw TMP0, TAB:RB->hmask + | lw TMP1, STR:RC->sid + | ld NODE:TMP2, TAB:RB->node + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | dsll TMP0, TMP1, 5 + | dsll TMP1, TMP1, 3 + | dsubu TMP1, TMP0, TMP1 + | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + | li CARG4, LJ_TSTR + | settp STR:RC, CARG4 // Tagged key to look for. + |3: // Rearranged logic, because we expect _not_ to find the key. + | ld TMP0, NODE:TMP2->key + | ld CARG1, NODE:TMP2->val + | ld NODE:TMP2, NODE:TMP2->next + | beq RC, TMP0, >5 + |. li AT, LJ_TTAB + | bnez NODE:TMP2, <3 + |. nop + |4: + | move CARG1, RB + | b ->fff_restv // Not found, keep default result. + |. settp CARG1, AT + |5: + | bne CARG1, TISNIL, ->fff_restv + |. nop + | b <4 // Ditto for nil value. + |. nop + | + |6: + | sltiu AT, TMP2, LJ_TISNUM + |.if MIPSR6 + | selnez TMP0, TISNUM, AT + | seleqz AT, TMP2, AT + | or TMP2, TMP0, AT + |.else + | movn TMP2, TISNUM, AT + |.endif + | dsll TMP2, TMP2, 3 + | dsubu TMP0, DISPATCH, TMP2 + | b <2 + |. ld TAB:RB, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8(TMP0) + | + |.ffunc_2 setmetatable + | // Fast path: no mt for table yet and not clearing the mt. + | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | gettp TMP3, CARG2 + | ld TAB:TMP0, TAB:TMP1->metatable + | lbu TMP2, TAB:TMP1->marked + | daddiu AT, TMP3, -LJ_TTAB + | cleartp TAB:CARG2 + | or AT, AT, TAB:TMP0 + | bnez AT, ->fff_fallback + |. andi AT, TMP2, LJ_GC_BLACK // isblack(table) + | beqz AT, ->fff_restv + |. sd TAB:CARG2, TAB:TMP1->metatable + | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv + | + |.ffunc rawget + | ld CARG2, 0(BASE) + | sltiu AT, NARGS8:RC, 16 + | load_got lj_tab_get + | gettp TMP0, CARG2 + | cleartp CARG2 + | daddiu TMP0, TMP0, -LJ_TTAB + | or AT, AT, TMP0 + | bnez AT, ->fff_fallback + |. daddiu CARG3, BASE, 8 + | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + |. move CARG1, L + | b ->fff_restv + |. ld CARG1, 0(CRET1) + | + |//-- Base library: conversions ------------------------------------------ + | + |.ffunc tonumber + | // Only handles the number case inline (without a base argument). + | ld CARG1, 0(BASE) + | xori AT, NARGS8:RC, 8 // Exactly one number argument. + | gettp TMP1, CARG1 + | sltu TMP0, TISNUM, TMP1 + | or AT, AT, TMP0 + | bnez AT, ->fff_fallback + |. nop + | b ->fff_restv + |. nop + | + |.ffunc_1 tostring + | // Only handles the string or number case inline. + | gettp TMP0, CARG1 + | daddiu AT, TMP0, -LJ_TSTR + | // A __tostring method in the string base metatable is ignored. + | beqz AT, ->fff_restv // String key? + | // Handle numbers inline, unless a number base metatable is present. + |. ld TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) + | sltu TMP0, TISNUM, TMP0 + | or TMP0, TMP0, TMP1 + | bnez TMP0, ->fff_fallback + |. sd BASE, L->base // Add frame since C call can throw. + |.if MIPSR6 + | sd PC, SAVE_PC // Redundant (but a defined value). + | ffgccheck + |.else + | ffgccheck + |. sd PC, SAVE_PC // Redundant (but a defined value). + |.endif + | load_got lj_strfmt_number + | move CARG1, L + | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) + |. move CARG2, BASE + | // Returns GCstr *. + | li AT, LJ_TSTR + | settp CRET1, AT + | b ->fff_restv + |. move CARG1, CRET1 + | + |//-- Base library: iterators ------------------------------------------- + | + |.ffunc_1 next + | checktp CARG1, -LJ_TTAB, ->fff_fallback + | daddu TMP2, BASE, NARGS8:RC + | sd TISNIL, 0(TMP2) // Set missing 2nd arg to nil. + | load_got lj_tab_next + | ld PC, FRAME_PC(BASE) + | daddiu CARG2, BASE, 8 + | call_intern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + |. daddiu CARG3, BASE, -16 + | // Returns 1=found, 0=end, -1=error. + | daddiu RA, BASE, -16 + | bgtz CRET1, ->fff_res // Found key/value. + |. li RD, (2+1)*8 + | beqz CRET1, ->fff_restv // End of traversal: return nil. + |. move CARG1, TISNIL + | ld CFUNC:RB, FRAME_FUNC(BASE) + | cleartp CFUNC:RB + | b ->fff_fallback // Invalid key. + |. li RC, 2*8 + | + |.ffunc_1 pairs + | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | ld PC, FRAME_PC(BASE) +#if LJ_52 + | ld TAB:TMP2, TAB:TMP1->metatable + | ld TMP0, CFUNC:RB->upvalue[0] + | bnez TAB:TMP2, ->fff_fallback +#else + | ld TMP0, CFUNC:RB->upvalue[0] +#endif + |. daddiu RA, BASE, -16 + | sd TISNIL, 0(BASE) + | sd CARG1, -8(BASE) + | sd TMP0, 0(RA) + | b ->fff_res + |. li RD, (3+1)*8 + | + |.ffunc_2 ipairs_aux + | checktab CARG1, ->fff_fallback + | checkint CARG2, ->fff_fallback + |. lw TMP0, TAB:CARG1->asize + | ld TMP1, TAB:CARG1->array + | ld PC, FRAME_PC(BASE) + | sextw TMP2, CARG2 + | addiu TMP2, TMP2, 1 + | sltu AT, TMP2, TMP0 + | daddiu RA, BASE, -16 + | zextw TMP0, TMP2 + | settp TMP0, TISNUM + | beqz AT, >2 // Not in array part? + |. sd TMP0, 0(RA) + | dsll TMP3, TMP2, 3 + | daddu TMP3, TMP1, TMP3 + | ld TMP1, 0(TMP3) + |1: + | beq TMP1, TISNIL, ->fff_res // End of iteration, return 0 results. + |. li RD, (0+1)*8 + | sd TMP1, -8(BASE) + | b ->fff_res + |. li RD, (2+1)*8 + |2: // Check for empty hash part first. Otherwise call C function. + | lw TMP0, TAB:CARG1->hmask + | load_got lj_tab_getinth + | beqz TMP0, ->fff_res + |. li RD, (0+1)*8 + | call_intern lj_tab_getinth // (GCtab *t, int32_t key) + |. move CARG2, TMP2 + | // Returns cTValue * or NULL. + | beqz CRET1, ->fff_res + |. li RD, (0+1)*8 + | b <1 + |. ld TMP1, 0(CRET1) + | + |.ffunc_1 ipairs + | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback + | ld PC, FRAME_PC(BASE) +#if LJ_52 + | ld TAB:TMP2, TAB:TMP1->metatable + | ld CFUNC:TMP0, CFUNC:RB->upvalue[0] + | bnez TAB:TMP2, ->fff_fallback +#else + | ld TMP0, CFUNC:RB->upvalue[0] +#endif + | daddiu RA, BASE, -16 + | dsll AT, TISNUM, 47 + | sd CARG1, -8(BASE) + | sd AT, 0(BASE) + | sd CFUNC:TMP0, 0(RA) + | b ->fff_res + |. li RD, (3+1)*8 + | + |//-- Base library: catch errors ---------------------------------------- + | + |.ffunc pcall + | daddiu NARGS8:RC, NARGS8:RC, -8 + | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | bltz NARGS8:RC, ->fff_fallback + |. move TMP2, BASE + | daddiu BASE, BASE, 16 + | // Remember active hook before pcall. + | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT + | andi TMP3, TMP3, 1 + | daddiu PC, TMP3, 16+FRAME_PCALL + | beqz NARGS8:RC, ->vm_call_dispatch + |1: + |. daddu TMP0, BASE, NARGS8:RC + |2: + | ld TMP1, -16(TMP0) + | sd TMP1, -8(TMP0) + | daddiu TMP0, TMP0, -8 + | bne TMP0, BASE, <2 + |. nop + | b ->vm_call_dispatch + |. nop + | + |.ffunc xpcall + | daddiu NARGS8:TMP0, NARGS8:RC, -16 + | ld CARG1, 0(BASE) + | ld CARG2, 8(BASE) + | bltz NARGS8:TMP0, ->fff_fallback + |. lbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) + | gettp AT, CARG2 + | daddiu AT, AT, -LJ_TFUNC + | bnez AT, ->fff_fallback // Traceback must be a function. + |. move TMP2, BASE + | move NARGS8:RC, NARGS8:TMP0 + | daddiu BASE, BASE, 24 + | // Remember active hook before pcall. + | srl TMP3, TMP3, HOOK_ACTIVE_SHIFT + | sd CARG2, 0(TMP2) // Swap function and traceback. + | andi TMP3, TMP3, 1 + | sd CARG1, 8(TMP2) + | beqz NARGS8:RC, ->vm_call_dispatch + |. daddiu PC, TMP3, 24+FRAME_PCALL + | b <1 + |. nop + | + |//-- Coroutine library -------------------------------------------------- + | + |.macro coroutine_resume_wrap, resume + |.if resume + |.ffunc_1 coroutine_resume + | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback + |.else + |.ffunc coroutine_wrap_aux + | ld L:CARG1, CFUNC:RB->upvalue[0].gcr + | cleartp L:CARG1 + |.endif + | lbu TMP0, L:CARG1->status + | ld TMP1, L:CARG1->cframe + | ld CARG2, L:CARG1->top + | ld TMP2, L:CARG1->base + | addiu AT, TMP0, -LUA_YIELD + | daddu CARG3, CARG2, TMP0 + | daddiu TMP3, CARG2, 8 + |.if MIPSR6 + | seleqz CARG2, CARG2, AT + | selnez TMP3, TMP3, AT + | bgtz AT, ->fff_fallback // st > LUA_YIELD? + |. or CARG2, TMP3, CARG2 + |.else + | bgtz AT, ->fff_fallback // st > LUA_YIELD? + |. movn CARG2, TMP3, AT + |.endif + | xor TMP2, TMP2, CARG3 + | bnez TMP1, ->fff_fallback // cframe != 0? + |. or AT, TMP2, TMP0 + | ld TMP0, L:CARG1->maxstack + | beqz AT, ->fff_fallback // base == top && st == 0? + |. ld PC, FRAME_PC(BASE) + | daddu TMP2, CARG2, NARGS8:RC + | sltu AT, TMP0, TMP2 + | bnez AT, ->fff_fallback // Stack overflow? + |. sd PC, SAVE_PC + | sd BASE, L->base + |1: + |.if resume + | daddiu BASE, BASE, 8 // Keep resumed thread in stack for GC. + | daddiu NARGS8:RC, NARGS8:RC, -8 + | daddiu TMP2, TMP2, -8 + |.endif + | sd TMP2, L:CARG1->top + | daddu TMP1, BASE, NARGS8:RC + | move CARG3, CARG2 + | sd BASE, L->top + |2: // Move args to coroutine. + | ld CRET1, 0(BASE) + | sltu AT, BASE, TMP1 + | beqz AT, >3 + |. daddiu BASE, BASE, 8 + | sd CRET1, 0(CARG3) + | b <2 + |. daddiu CARG3, CARG3, 8 + |3: + | bal ->vm_resume // (lua_State *L, TValue *base, 0, 0) + |. move L:RA, L:CARG1 + | // Returns thread status. + |4: + | ld TMP2, L:RA->base + | sltiu AT, CRET1, LUA_YIELD+1 + | ld TMP3, L:RA->top + | li_vmstate INTERP + | ld BASE, L->base + | sd L, DISPATCH_GL(cur_L)(DISPATCH) + | st_vmstate + | beqz AT, >8 + |. dsubu RD, TMP3, TMP2 + | ld TMP0, L->maxstack + | beqz RD, >6 // No results? + |. daddu TMP1, BASE, RD + | sltu AT, TMP0, TMP1 + | bnez AT, >9 // Need to grow stack? + |. daddu TMP3, TMP2, RD + | sd TMP2, L:RA->top // Clear coroutine stack. + | move TMP1, BASE + |5: // Move results from coroutine. + | ld CRET1, 0(TMP2) + | daddiu TMP2, TMP2, 8 + | sltu AT, TMP2, TMP3 + | sd CRET1, 0(TMP1) + | bnez AT, <5 + |. daddiu TMP1, TMP1, 8 + |6: + | andi TMP0, PC, FRAME_TYPE + |.if resume + | mov_true TMP1 + | daddiu RA, BASE, -8 + | sd TMP1, -8(BASE) // Prepend true to results. + | daddiu RD, RD, 16 + |.else + | move RA, BASE + | daddiu RD, RD, 8 + |.endif + |7: + | sd PC, SAVE_PC + | beqz TMP0, ->BC_RET_Z + |. move MULTRES, RD + | b ->vm_return + |. nop + | + |8: // Coroutine returned with error (at co->top-1). + |.if resume + | daddiu TMP3, TMP3, -8 + | mov_false TMP1 + | ld CRET1, 0(TMP3) + | sd TMP3, L:RA->top // Remove error from coroutine stack. + | li RD, (2+1)*8 + | sd TMP1, -8(BASE) // Prepend false to results. + | daddiu RA, BASE, -8 + | sd CRET1, 0(BASE) // Copy error message. + | b <7 + |. andi TMP0, PC, FRAME_TYPE + |.else + | load_got lj_ffh_coroutine_wrap_err + | move CARG2, L:RA + | call_intern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) + |. move CARG1, L + |.endif + | + |9: // Handle stack expansion on return from yield. + | load_got lj_state_growstack + | srl CARG2, RD, 3 + | call_intern lj_state_growstack // (lua_State *L, int n) + |. move CARG1, L + | b <4 + |. li CRET1, 0 + |.endmacro + | + | coroutine_resume_wrap 1 // coroutine.resume + | coroutine_resume_wrap 0 // coroutine.wrap + | + |.ffunc coroutine_yield + | ld TMP0, L->cframe + | daddu TMP1, BASE, NARGS8:RC + | sd BASE, L->base + | andi TMP0, TMP0, CFRAME_RESUME + | sd TMP1, L->top + | beqz TMP0, ->fff_fallback + |. li CRET1, LUA_YIELD + | sd r0, L->cframe + | b ->vm_leave_unw + |. sb CRET1, L->status + | + |//-- Math library ------------------------------------------------------- + | + |.ffunc_1 math_abs + | gettp CARG2, CARG1 + | daddiu AT, CARG2, -LJ_TISNUM + | bnez AT, >1 + |. sextw TMP1, CARG1 + | sra TMP0, TMP1, 31 // Extract sign. + | xor TMP1, TMP1, TMP0 + | dsubu CARG1, TMP1, TMP0 + | dsll TMP3, CARG1, 32 + | bgez TMP3, ->fff_restv + |. settp CARG1, TISNUM + | li CARG1, 0x41e0 // 2^31 as a double. + | b ->fff_restv + |. dsll CARG1, CARG1, 48 + |1: + | sltiu AT, CARG2, LJ_TISNUM + | beqz AT, ->fff_fallback + |. dextm CARG1, CARG1, 0, 30 + |// fallthrough + | + |->fff_restv: + | // CARG1 = TValue result. + | ld PC, FRAME_PC(BASE) + | daddiu RA, BASE, -16 + | sd CARG1, -16(BASE) + |->fff_res1: + | // RA = results, PC = return. + | li RD, (1+1)*8 + |->fff_res: + | // RA = results, RD = (nresults+1)*8, PC = return. + | andi TMP0, PC, FRAME_TYPE + | bnez TMP0, ->vm_return + |. move MULTRES, RD + | lw INS, -4(PC) + | decode_RB8a RB, INS + | decode_RB8b RB + |5: + | sltu AT, RD, RB + | bnez AT, >6 // More results expected? + |. decode_RA8a TMP0, INS + | decode_RA8b TMP0 + | ins_next1 + | // Adjust BASE. KBASE is assumed to be set for the calling frame. + | dsubu BASE, RA, TMP0 + | ins_next2 + | + |6: // Fill up results with nil. + | daddu TMP1, RA, RD + | daddiu RD, RD, 8 + | b <5 + |. sd TISNIL, -8(TMP1) + | + |.macro math_extern, func + | .ffunc_n math_ .. func + | load_got func + | call_extern + |. nop + | b ->fff_resn + |. nop + |.endmacro + | + |.macro math_extern2, func + | .ffunc_nn math_ .. func + |. load_got func + | call_extern + |. nop + | b ->fff_resn + |. nop + |.endmacro + | + |// TODO: Return integer type if result is integer (own sf implementation). + |.macro math_round, func + |->ff_math_ .. func: + | ld CARG1, 0(BASE) + | beqz NARGS8:RC, ->fff_fallback + |. gettp TMP0, CARG1 + | beq TMP0, TISNUM, ->fff_restv + |. sltu AT, TMP0, TISNUM + | beqz AT, ->fff_fallback + |.if FPU + |. ldc1 FARG1, 0(BASE) + | bal ->vm_ .. func + |. nop + |.else + |. load_got func + | call_extern + |. nop + |.endif + | b ->fff_resn + |. nop + |.endmacro + | + | math_round floor + | math_round ceil + | + |.ffunc math_log + | li AT, 8 + | bne NARGS8:RC, AT, ->fff_fallback // Exactly 1 argument. + |. ld CARG1, 0(BASE) + | checknum CARG1, ->fff_fallback + |. load_got log + |.if FPU + | call_extern + |. ldc1 FARG1, 0(BASE) + |.else + | call_extern + |. nop + |.endif + | b ->fff_resn + |. nop + | + | math_extern log10 + | math_extern exp + | math_extern sin + | math_extern cos + | math_extern tan + | math_extern asin + | math_extern acos + | math_extern atan + | math_extern sinh + | math_extern cosh + | math_extern tanh + | math_extern2 pow + | math_extern2 atan2 + | math_extern2 fmod + | + |.if FPU + |.ffunc_n math_sqrt + |. sqrt.d FRET1, FARG1 + |// fallthrough to ->fff_resn + |.else + | math_extern sqrt + |.endif + | + |->fff_resn: + | ld PC, FRAME_PC(BASE) + | daddiu RA, BASE, -16 + | b ->fff_res1 + |.if FPU + |. sdc1 FRET1, 0(RA) + |.else + |. sd CRET1, 0(RA) + |.endif + | + | + |.ffunc_2 math_ldexp + | checknum CARG1, ->fff_fallback + | checkint CARG2, ->fff_fallback + |. load_got ldexp + | .FPU ldc1 FARG1, 0(BASE) + | call_extern + |. lw CARG2, 8+LO(BASE) + | b ->fff_resn + |. nop + | + |.ffunc_n math_frexp + | load_got frexp + | ld PC, FRAME_PC(BASE) + | call_extern + |. daddiu CARG2, DISPATCH, DISPATCH_GL(tmptv) + | lw TMP1, DISPATCH_GL(tmptv)(DISPATCH) + | daddiu RA, BASE, -16 + |.if FPU + | mtc1 TMP1, FARG2 + | sdc1 FRET1, 0(RA) + | cvt.d.w FARG2, FARG2 + | sdc1 FARG2, 8(RA) + |.else + | sd CRET1, 0(RA) + | zextw TMP1, TMP1 + | settp TMP1, TISNUM + | sd TMP1, 8(RA) + |.endif + | b ->fff_res + |. li RD, (2+1)*8 + | + |.ffunc_n math_modf + | load_got modf + | ld PC, FRAME_PC(BASE) + | call_extern + |. daddiu CARG2, BASE, -16 + | daddiu RA, BASE, -16 + |.if FPU + | sdc1 FRET1, -8(BASE) + |.else + | sd CRET1, -8(BASE) + |.endif + | b ->fff_res + |. li RD, (2+1)*8 + | + |.macro math_minmax, name, intins, intinsc, fpins + | .ffunc_1 name + | daddu TMP3, BASE, NARGS8:RC + | checkint CARG1, >5 + |. daddiu TMP2, BASE, 8 + |1: // Handle integers. + | beq TMP2, TMP3, ->fff_restv + |. ld CARG2, 0(TMP2) + | checkint CARG2, >3 + |. sextw CARG1, CARG1 + | lw CARG2, LO(TMP2) + |. slt AT, CARG1, CARG2 + |.if MIPSR6 + | intins TMP1, CARG2, AT + | intinsc CARG1, CARG1, AT + | or CARG1, CARG1, TMP1 + |.else + | intins CARG1, CARG2, AT + |.endif + | daddiu TMP2, TMP2, 8 + | zextw CARG1, CARG1 + | b <1 + |. settp CARG1, TISNUM + | + |3: // Convert intermediate result to number and continue with number loop. + | checknum CARG2, ->fff_fallback + |.if FPU + |. mtc1 CARG1, FRET1 + | cvt.d.w FRET1, FRET1 + | b >7 + |. ldc1 FARG1, 0(TMP2) + |.else + |. nop + | bal ->vm_sfi2d_1 + |. nop + | b >7 + |. nop + |.endif + | + |5: + | .FPU ldc1 FRET1, 0(BASE) + | checknum CARG1, ->fff_fallback + |6: // Handle numbers. + |. ld CARG2, 0(TMP2) + | beq TMP2, TMP3, ->fff_resn + |.if FPU + | ldc1 FARG1, 0(TMP2) + |.else + | move CRET1, CARG1 + |.endif + | checknum CARG2, >8 + |. nop + |7: + |.if FPU + |.if MIPSR6 + | fpins FRET1, FRET1, FARG1 + |.else + |.if fpins // ismax + | c.olt.d FARG1, FRET1 + |.else + | c.olt.d FRET1, FARG1 + |.endif + | movf.d FRET1, FARG1 + |.endif + |.else + |.if fpins // ismax + | bal ->vm_sfcmpogt + |.else + | bal ->vm_sfcmpolt + |.endif + |. nop + |.if MIPSR6 + | seleqz AT, CARG2, CRET1 + | selnez CARG1, CARG1, CRET1 + | or CARG1, CARG1, AT + |.else + | movz CARG1, CARG2, CRET1 + |.endif + |.endif + | b <6 + |. daddiu TMP2, TMP2, 8 + | + |8: // Convert integer to number and continue with number loop. + | checkint CARG2, ->fff_fallback + |.if FPU + |. lwc1 FARG1, LO(TMP2) + | b <7 + |. cvt.d.w FARG1, FARG1 + |.else + |. lw CARG2, LO(TMP2) + | bal ->vm_sfi2d_2 + |. nop + | b <7 + |. nop + |.endif + | + |.endmacro + | + |.if MIPSR6 + | math_minmax math_min, seleqz, selnez, min.d + | math_minmax math_max, selnez, seleqz, max.d + |.else + | math_minmax math_min, movz, _, 0 + | math_minmax math_max, movn, _, 1 + |.endif + | + |//-- String library ----------------------------------------------------- + | + |.ffunc string_byte // Only handle the 1-arg case here. + | ld CARG1, 0(BASE) + | gettp TMP0, CARG1 + | xori AT, NARGS8:RC, 8 + | daddiu TMP0, TMP0, -LJ_TSTR + | or AT, AT, TMP0 + | bnez AT, ->fff_fallback // Need exactly 1 string argument. + |. cleartp STR:CARG1 + | lw TMP0, STR:CARG1->len + | daddiu RA, BASE, -16 + | ld PC, FRAME_PC(BASE) + | sltu RD, r0, TMP0 + | lbu TMP1, STR:CARG1[1] // Access is always ok (NUL at end). + | addiu RD, RD, 1 + | sll RD, RD, 3 // RD = ((str->len != 0)+1)*8 + | settp TMP1, TISNUM + | b ->fff_res + |. sd TMP1, 0(RA) + | + |.ffunc string_char // Only handle the 1-arg case here. + | ffgccheck + |.if not MIPSR6 + |. nop + |.endif + | ld CARG1, 0(BASE) + | gettp TMP0, CARG1 + | xori AT, NARGS8:RC, 8 // Exactly 1 argument. + | daddiu TMP0, TMP0, -LJ_TISNUM // Integer. + | li TMP1, 255 + | sextw CARG1, CARG1 + | or AT, AT, TMP0 + | sltu TMP1, TMP1, CARG1 // !(255 < n). + | or AT, AT, TMP1 + | bnez AT, ->fff_fallback + |. li CARG3, 1 + | daddiu CARG2, sp, TMPD_OFS + | sb CARG1, TMPD + |->fff_newstr: + | load_got lj_str_new + | sd BASE, L->base + | sd PC, SAVE_PC + | call_intern lj_str_new // (lua_State *L, char *str, size_t l) + |. move CARG1, L + | // Returns GCstr *. + | ld BASE, L->base + |->fff_resstr: + | li AT, LJ_TSTR + | settp CRET1, AT + | b ->fff_restv + |. move CARG1, CRET1 + | + |.ffunc string_sub + | ffgccheck + |.if not MIPSR6 + |. nop + |.endif + | addiu AT, NARGS8:RC, -16 + | ld TMP0, 0(BASE) + | bltz AT, ->fff_fallback + |. gettp TMP3, TMP0 + | cleartp STR:CARG1, TMP0 + | ld CARG2, 8(BASE) + | beqz AT, >1 + |. li CARG4, -1 + | ld CARG3, 16(BASE) + | checkint CARG3, ->fff_fallback + |. sextw CARG4, CARG3 + |1: + | checkint CARG2, ->fff_fallback + |. li AT, LJ_TSTR + | bne TMP3, AT, ->fff_fallback + |. sextw CARG3, CARG2 + | lw CARG2, STR:CARG1->len + | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end + | slt AT, CARG4, r0 + | addiu TMP0, CARG2, 1 + | addu TMP1, CARG4, TMP0 + | slt TMP3, CARG3, r0 + |.if MIPSR6 + | seleqz CARG4, CARG4, AT + | selnez TMP1, TMP1, AT + | or CARG4, TMP1, CARG4 // if (end < 0) end += len+1 + |.else + | movn CARG4, TMP1, AT // if (end < 0) end += len+1 + |.endif + | addu TMP1, CARG3, TMP0 + |.if MIPSR6 + | selnez TMP1, TMP1, TMP3 + | seleqz CARG3, CARG3, TMP3 + | or CARG3, TMP1, CARG3 // if (start < 0) start += len+1 + | li TMP2, 1 + | slt AT, CARG4, r0 + | slt TMP3, r0, CARG3 + | seleqz CARG4, CARG4, AT // if (end < 0) end = 0 + | selnez CARG3, CARG3, TMP3 + | seleqz TMP2, TMP2, TMP3 + | or CARG3, TMP2, CARG3 // if (start < 1) start = 1 + | slt AT, CARG2, CARG4 + | seleqz CARG4, CARG4, AT + | selnez CARG2, CARG2, AT + | or CARG4, CARG2, CARG4 // if (end > len) end = len + |.else + | movn CARG3, TMP1, TMP3 // if (start < 0) start += len+1 + | li TMP2, 1 + | slt AT, CARG4, r0 + | slt TMP3, r0, CARG3 + | movn CARG4, r0, AT // if (end < 0) end = 0 + | movz CARG3, TMP2, TMP3 // if (start < 1) start = 1 + | slt AT, CARG2, CARG4 + | movn CARG4, CARG2, AT // if (end > len) end = len + |.endif + | daddu CARG2, STR:CARG1, CARG3 + | subu CARG3, CARG4, CARG3 // len = end - start + | daddiu CARG2, CARG2, sizeof(GCstr)-1 + | bgez CARG3, ->fff_newstr + |. addiu CARG3, CARG3, 1 // len++ + |->fff_emptystr: // Return empty string. + | li AT, LJ_TSTR + | daddiu STR:CARG1, DISPATCH, DISPATCH_GL(strempty) + | b ->fff_restv + |. settp CARG1, AT + | + |.macro ffstring_op, name + | .ffunc string_ .. name + | ffgccheck + |. nop + | beqz NARGS8:RC, ->fff_fallback + |. ld CARG2, 0(BASE) + | checkstr STR:CARG2, ->fff_fallback + | daddiu SBUF:CARG1, DISPATCH, DISPATCH_GL(tmpbuf) + | load_got lj_buf_putstr_ .. name + | ld TMP0, SBUF:CARG1->b + | sd L, SBUF:CARG1->L + | sd BASE, L->base + | sd TMP0, SBUF:CARG1->w + | call_intern extern lj_buf_putstr_ .. name + |. sd PC, SAVE_PC + | load_got lj_buf_tostr + | call_intern lj_buf_tostr + |. move SBUF:CARG1, SBUF:CRET1 + | b ->fff_resstr + |. ld BASE, L->base + |.endmacro + | + |ffstring_op reverse + |ffstring_op lower + |ffstring_op upper + | + |//-- Bit library -------------------------------------------------------- + | + |->vm_tobit_fb: + | beqz TMP1, ->fff_fallback + |.if FPU + |. ldc1 FARG1, 0(BASE) + | add.d FARG1, FARG1, TOBIT + | mfc1 CRET1, FARG1 + | jr ra + |. zextw CRET1, CRET1 + |.else + |// FP number to bit conversion for soft-float. + |->vm_tobit: + | dsll TMP0, CARG1, 1 + | li CARG3, 1076 + | dsrl AT, TMP0, 53 + | dsubu CARG3, CARG3, AT + | sltiu AT, CARG3, 54 + | beqz AT, >1 + |. dextm TMP0, TMP0, 0, 20 + | dinsu TMP0, AT, 21, 21 + | slt AT, CARG1, r0 + | dsrlv CRET1, TMP0, CARG3 + | dsubu TMP0, r0, CRET1 + |.if MIPSR6 + | selnez TMP0, TMP0, AT + | seleqz CRET1, CRET1, AT + | or CRET1, CRET1, TMP0 + |.else + | movn CRET1, TMP0, AT + |.endif + | jr ra + |. zextw CRET1, CRET1 + |1: + | jr ra + |. move CRET1, r0 + | + |// FP number to int conversion with a check for soft-float. + |// Modifies CARG1, CRET1, CRET2, TMP0, AT. + |->vm_tointg: + |.if JIT + | dsll CRET2, CARG1, 1 + | beqz CRET2, >2 + |. li TMP0, 1076 + | dsrl AT, CRET2, 53 + | dsubu TMP0, TMP0, AT + | sltiu AT, TMP0, 54 + | beqz AT, >1 + |. dextm CRET2, CRET2, 0, 20 + | dinsu CRET2, AT, 21, 21 + | slt AT, CARG1, r0 + | dsrlv CRET1, CRET2, TMP0 + | dsubu CARG1, r0, CRET1 + |.if MIPSR6 + | seleqz CRET1, CRET1, AT + | selnez CARG1, CARG1, AT + | or CRET1, CRET1, CARG1 + |.else + | movn CRET1, CARG1, AT + |.endif + | li CARG1, 64 + | subu TMP0, CARG1, TMP0 + | dsllv CRET2, CRET2, TMP0 // Integer check. + | sextw AT, CRET1 + | xor AT, CRET1, AT // Range check. + |.if MIPSR6 + | seleqz AT, AT, CRET2 + | selnez CRET2, CRET2, CRET2 + | jr ra + |. or CRET2, AT, CRET2 + |.else + | jr ra + |. movz CRET2, AT, CRET2 + |.endif + |1: + | jr ra + |. li CRET2, 1 + |2: + | jr ra + |. move CRET1, r0 + |.endif + |.endif + | + |.macro .ffunc_bit, name + | .ffunc_1 bit_..name + | gettp TMP0, CARG1 + | beq TMP0, TISNUM, >6 + |. zextw CRET1, CARG1 + | bal ->vm_tobit_fb + |. sltiu TMP1, TMP0, LJ_TISNUM + |6: + |.endmacro + | + |.macro .ffunc_bit_op, name, bins + | .ffunc_bit name + | daddiu TMP2, BASE, 8 + | daddu TMP3, BASE, NARGS8:RC + |1: + | beq TMP2, TMP3, ->fff_resi + |. ld CARG1, 0(TMP2) + | gettp TMP0, CARG1 + |.if FPU + | bne TMP0, TISNUM, >2 + |. daddiu TMP2, TMP2, 8 + | zextw CARG1, CARG1 + | b <1 + |. bins CRET1, CRET1, CARG1 + |2: + | ldc1 FARG1, -8(TMP2) + | sltiu AT, TMP0, LJ_TISNUM + | beqz AT, ->fff_fallback + |. add.d FARG1, FARG1, TOBIT + | mfc1 CARG1, FARG1 + | zextw CARG1, CARG1 + | b <1 + |. bins CRET1, CRET1, CARG1 + |.else + | beq TMP0, TISNUM, >2 + |. move CRET2, CRET1 + | bal ->vm_tobit_fb + |. sltiu TMP1, TMP0, LJ_TISNUM + | move CARG1, CRET2 + |2: + | zextw CARG1, CARG1 + | bins CRET1, CRET1, CARG1 + | b <1 + |. daddiu TMP2, TMP2, 8 + |.endif + |.endmacro + | + |.ffunc_bit_op band, and + |.ffunc_bit_op bor, or + |.ffunc_bit_op bxor, xor + | + |.ffunc_bit bswap + | dsrl TMP0, CRET1, 8 + | dsrl TMP1, CRET1, 24 + | andi TMP2, TMP0, 0xff00 + | dins TMP1, CRET1, 24, 31 + | dins TMP2, TMP0, 16, 23 + | b ->fff_resi + |. or CRET1, TMP1, TMP2 + | + |.ffunc_bit bnot + | not CRET1, CRET1 + | b ->fff_resi + |. zextw CRET1, CRET1 + | + |.macro .ffunc_bit_sh, name, shins, shmod + | .ffunc_2 bit_..name + | gettp TMP0, CARG1 + | beq TMP0, TISNUM, >1 + |. nop + | bal ->vm_tobit_fb + |. sltiu TMP1, TMP0, LJ_TISNUM + | move CARG1, CRET1 + |1: + | gettp TMP0, CARG2 + | bne TMP0, TISNUM, ->fff_fallback + |. zextw CARG2, CARG2 + | sextw CARG1, CARG1 + |.if shmod == 1 + | negu CARG2, CARG2 + |.endif + | shins CRET1, CARG1, CARG2 + | b ->fff_resi + |. zextw CRET1, CRET1 + |.endmacro + | + |.ffunc_bit_sh lshift, sllv, 0 + |.ffunc_bit_sh rshift, srlv, 0 + |.ffunc_bit_sh arshift, srav, 0 + |.ffunc_bit_sh rol, rotrv, 1 + |.ffunc_bit_sh ror, rotrv, 0 + | + |.ffunc_bit tobit + |->fff_resi: + | ld PC, FRAME_PC(BASE) + | daddiu RA, BASE, -16 + | settp CRET1, TISNUM + | b ->fff_res1 + |. sd CRET1, -16(BASE) + | + |//----------------------------------------------------------------------- + |->fff_fallback: // Call fast function fallback handler. + | // BASE = new base, RB = CFUNC, RC = nargs*8 + | ld TMP3, CFUNC:RB->f + | daddu TMP1, BASE, NARGS8:RC + | ld PC, FRAME_PC(BASE) // Fallback may overwrite PC. + | daddiu TMP0, TMP1, 8*LUA_MINSTACK + | ld TMP2, L->maxstack + | sd PC, SAVE_PC // Redundant (but a defined value). + | sltu AT, TMP2, TMP0 + | sd BASE, L->base + | sd TMP1, L->top + | bnez AT, >5 // Need to grow stack. + |. move CFUNCADDR, TMP3 + | jalr TMP3 // (lua_State *L) + |. move CARG1, L + | // Either throws an error, or recovers and returns -1, 0 or nresults+1. + | ld BASE, L->base + | sll RD, CRET1, 3 + | bgtz CRET1, ->fff_res // Returned nresults+1? + |. daddiu RA, BASE, -16 + |1: // Returned 0 or -1: retry fast path. + | ld LFUNC:RB, FRAME_FUNC(BASE) + | ld TMP0, L->top + | cleartp LFUNC:RB + | bnez CRET1, ->vm_call_tail // Returned -1? + |. dsubu NARGS8:RC, TMP0, BASE + | ins_callt // Returned 0: retry fast path. + | + |// Reconstruct previous base for vmeta_call during tailcall. + |->vm_call_tail: + | andi TMP0, PC, FRAME_TYPE + | li AT, -4 + | bnez TMP0, >3 + |. and TMP1, PC, AT + | lbu TMP1, OFS_RA(PC) + | sll TMP1, TMP1, 3 + | addiu TMP1, TMP1, 16 + |3: + | b ->vm_call_dispatch // Resolve again for tailcall. + |. dsubu TMP2, BASE, TMP1 + | + |5: // Grow stack for fallback handler. + | load_got lj_state_growstack + | li CARG2, LUA_MINSTACK + | call_intern lj_state_growstack // (lua_State *L, int n) + |. move CARG1, L + | ld BASE, L->base + | b <1 + |. li CRET1, 0 // Force retry. + | + |->fff_gcstep: // Call GC step function. + | // BASE = new base, RC = nargs*8 + | move MULTRES, ra + | load_got lj_gc_step + | sd BASE, L->base + | daddu TMP0, BASE, NARGS8:RC + | sd PC, SAVE_PC // Redundant (but a defined value). + | sd TMP0, L->top + | call_intern lj_gc_step // (lua_State *L) + |. move CARG1, L + | ld BASE, L->base + | move ra, MULTRES + | ld TMP0, L->top + | ld CFUNC:RB, FRAME_FUNC(BASE) + | cleartp CFUNC:RB + | jr ra + |. dsubu NARGS8:RC, TMP0, BASE + | + |//----------------------------------------------------------------------- + |//-- Special dispatch targets ------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_record: // Dispatch target for recording phase. + |.if JIT + | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | andi AT, TMP3, HOOK_VMEVENT // No recording while in vmevent. + | bnez AT, >5 + | // Decrement the hookcount for consistency, but always do the call. + |. lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) + | andi AT, TMP3, HOOK_ACTIVE + | bnez AT, >1 + |. addiu TMP2, TMP2, -1 + | andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT + | beqz AT, >1 + |. nop + | b >1 + |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) + |.endif + | + |->vm_rethook: // Dispatch target for return hooks. + | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | andi AT, TMP3, HOOK_ACTIVE // Hook already active? + | beqz AT, >1 + |5: // Re-dispatch to static ins. + |. ld AT, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4. + | jr AT + |. nop + | + |->vm_inshook: // Dispatch target for instr/line hooks. + | lbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) + | lw TMP2, DISPATCH_GL(hookcount)(DISPATCH) + | andi AT, TMP3, HOOK_ACTIVE // Hook already active? + | bnez AT, <5 + |. andi AT, TMP3, LUA_MASKLINE|LUA_MASKCOUNT + | beqz AT, <5 + |. addiu TMP2, TMP2, -1 + | beqz TMP2, >1 + |. sw TMP2, DISPATCH_GL(hookcount)(DISPATCH) + | andi AT, TMP3, LUA_MASKLINE + | beqz AT, <5 + |1: + |. load_got lj_dispatch_ins + | sw MULTRES, SAVE_MULTRES + | move CARG2, PC + | sd BASE, L->base + | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. + | call_intern lj_dispatch_ins // (lua_State *L, const BCIns *pc) + |. move CARG1, L + |3: + | ld BASE, L->base + |4: // Re-dispatch to static ins. + | lw INS, -4(PC) + | decode_OP8a TMP1, INS + | decode_OP8b TMP1 + | daddu TMP0, DISPATCH, TMP1 + | decode_RD8a RD, INS + | ld AT, GG_DISP2STATIC(TMP0) + | decode_RA8a RA, INS + | decode_RD8b RD + | jr AT + | decode_RA8b RA + | + |->cont_hook: // Continue from hook yield. + | daddiu PC, PC, 4 + | b <4 + |. lw MULTRES, -24+LO(RB) // Restore MULTRES for *M ins. + | + |->vm_hotloop: // Hot loop counter underflow. + |.if JIT + | ld LFUNC:TMP1, FRAME_FUNC(BASE) + | daddiu CARG1, DISPATCH, GG_DISP2J + | cleartp LFUNC:TMP1 + | sd PC, SAVE_PC + | ld TMP1, LFUNC:TMP1->pc + | move CARG2, PC + | sd L, DISPATCH_J(L)(DISPATCH) + | lbu TMP1, PC2PROTO(framesize)(TMP1) + | load_got lj_trace_hot + | sd BASE, L->base + | dsll TMP1, TMP1, 3 + | daddu TMP1, BASE, TMP1 + | call_intern lj_trace_hot // (jit_State *J, const BCIns *pc) + |. sd TMP1, L->top + | b <3 + |. nop + |.endif + | + | + |->vm_callhook: // Dispatch target for call hooks. + |.if JIT + | b >1 + |.endif + |. move CARG2, PC + | + |->vm_hotcall: // Hot call counter underflow. + |.if JIT + | ori CARG2, PC, 1 + |1: + |.endif + | load_got lj_dispatch_call + | daddu TMP0, BASE, RC + | sd PC, SAVE_PC + | sd BASE, L->base + | dsubu RA, RA, BASE + | sd TMP0, L->top + | call_intern lj_dispatch_call // (lua_State *L, const BCIns *pc) + |. move CARG1, L + | // Returns ASMFunction. + | ld BASE, L->base + | ld TMP0, L->top + | sd r0, SAVE_PC // Invalidate for subsequent line hook. + | dsubu NARGS8:RC, TMP0, BASE + | daddu RA, BASE, RA + | ld LFUNC:RB, FRAME_FUNC(BASE) + | cleartp LFUNC:RB + | jr CRET1 + |. lw INS, -4(PC) + | + |->cont_stitch: // Trace stitching. + |.if JIT + | // RA = resultptr, RB = meta base + | lw INS, -4(PC) + | ld TRACE:TMP2, -40(RB) // Save previous trace. + | decode_RA8a RC, INS + | daddiu AT, MULTRES, -8 + | cleartp TRACE:TMP2 + | decode_RA8b RC + | beqz AT, >2 + |. daddu RC, BASE, RC // Call base. + |1: // Move results down. + | ld CARG1, 0(RA) + | daddiu AT, AT, -8 + | daddiu RA, RA, 8 + | sd CARG1, 0(RC) + | bnez AT, <1 + |. daddiu RC, RC, 8 + |2: + | decode_RA8a RA, INS + | decode_RB8a RB, INS + | decode_RA8b RA + | decode_RB8b RB + | daddu RA, RA, RB + | daddu RA, BASE, RA + |3: + | sltu AT, RC, RA + | bnez AT, >9 // More results wanted? + |. nop + | + | lhu TMP3, TRACE:TMP2->traceno + | lhu RD, TRACE:TMP2->link + | beq RD, TMP3, ->cont_nop // Blacklisted. + |. load_got lj_dispatch_stitch + | bnez RD, =>BC_JLOOP // Jump to stitched trace. + |. sll RD, RD, 3 + | + | // Stitch a new trace to the previous trace. + | sw TMP3, DISPATCH_J(exitno)(DISPATCH) + | sd L, DISPATCH_J(L)(DISPATCH) + | sd BASE, L->base + | daddiu CARG1, DISPATCH, GG_DISP2J + | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) + |. move CARG2, PC + | b ->cont_nop + |. ld BASE, L->base + | + |9: + | sd TISNIL, 0(RC) + | b <3 + |. daddiu RC, RC, 8 + |.endif + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | load_got lj_dispatch_profile + | sw MULTRES, SAVE_MULTRES + | move CARG2, PC + | sd BASE, L->base + | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc) + |. move CARG1, L + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | daddiu PC, PC, -4 + | b ->cont_nop + |. ld BASE, L->base +#endif + | + |//----------------------------------------------------------------------- + |//-- Trace exit handler ------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro savex_, a, b + |.if FPU + | sdc1 f..a, a*8(sp) + | sdc1 f..b, b*8(sp) + | sd r..a, 32*8+a*8(sp) + | sd r..b, 32*8+b*8(sp) + |.else + | sd r..a, a*8(sp) + | sd r..b, b*8(sp) + |.endif + |.endmacro + | + |->vm_exit_handler: + |.if JIT + |.if FPU + | daddiu sp, sp, -(32*8+32*8) + |.else + | daddiu sp, sp, -(32*8) + |.endif + | savex_ 0, 1 + | savex_ 2, 3 + | savex_ 4, 5 + | savex_ 6, 7 + | savex_ 8, 9 + | savex_ 10, 11 + | savex_ 12, 13 + | savex_ 14, 15 + | savex_ 16, 17 + | savex_ 18, 19 + | savex_ 20, 21 + | savex_ 22, 23 + | savex_ 24, 25 + | savex_ 26, 27 + | savex_ 28, 30 + |.if FPU + | sdc1 f29, 29*8(sp) + | sdc1 f31, 31*8(sp) + | sd r0, 32*8+31*8(sp) // Clear RID_TMP. + | daddiu TMP2, sp, 32*8+32*8 // Recompute original value of sp. + | sd TMP2, 32*8+29*8(sp) // Store sp in RID_SP + |.else + | sd r0, 31*8(sp) // Clear RID_TMP. + | daddiu TMP2, sp, 32*8 // Recompute original value of sp. + | sd TMP2, 29*8(sp) // Store sp in RID_SP + |.endif + | li_vmstate EXIT + | daddiu DISPATCH, JGL, -GG_DISP2G-32768 + | lw TMP1, 0(TMP2) // Load exit number. + | st_vmstate + | ld L, DISPATCH_GL(cur_L)(DISPATCH) + | ld BASE, DISPATCH_GL(jit_base)(DISPATCH) + | load_got lj_trace_exit + | sd L, DISPATCH_J(L)(DISPATCH) + | sw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. + | sd BASE, L->base + | sw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. + | daddiu CARG1, DISPATCH, GG_DISP2J + | sd r0, DISPATCH_GL(jit_base)(DISPATCH) + | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) + |. move CARG2, sp + | // Returns MULTRES (unscaled) or negated error code. + | ld TMP1, L->cframe + | li AT, -4 + | ld BASE, L->base + | and sp, TMP1, AT + | ld PC, SAVE_PC // Get SAVE_PC. + | b >1 + |. sd L, SAVE_L // Set SAVE_L (on-trace resume/yield). + |.endif + |->vm_exit_interp: + |.if JIT + | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. + | ld L, SAVE_L + | daddiu DISPATCH, JGL, -GG_DISP2G-32768 + | sd BASE, L->base + |1: + | bltz CRET1, >9 // Check for error from exit. + |. ld LFUNC:RB, FRAME_FUNC(BASE) + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | dsll MULTRES, CRET1, 3 + | cleartp LFUNC:RB + | sw MULTRES, SAVE_MULTRES + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM // Setup type comparison constants. + | .FPU mtc1 TMP3, TOBIT + | ld TMP1, LFUNC:RB->pc + | sd r0, DISPATCH_GL(jit_base)(DISPATCH) + | ld KBASE, PC2PROTO(k)(TMP1) + | .FPU cvt.d.s TOBIT, TOBIT + | // Modified copy of ins_next which handles function header dispatch, too. + | lw INS, 0(PC) + | daddiu PC, PC, 4 + | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 + | sw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) + | decode_OP8a TMP1, INS + | decode_OP8b TMP1 + | sltiu TMP2, TMP1, BC_FUNCF*8 + | daddu TMP0, DISPATCH, TMP1 + | decode_RD8a RD, INS + | ld AT, 0(TMP0) + | decode_RA8a RA, INS + | beqz TMP2, >2 + |. decode_RA8b RA + | jr AT + |. decode_RD8b RD + |2: + | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function? + | bnez TMP2, >3 + |. ld TMP1, FRAME_PC(BASE) + | // Check frame below fast function. + | andi TMP0, TMP1, FRAME_TYPE + | bnez TMP0, >3 // Trace stitching continuation? + |. nop + | // Otherwise set KBASE for Lua function below fast function. + | lw TMP2, -4(TMP1) + | decode_RA8a TMP0, TMP2 + | decode_RA8b TMP0 + | dsubu TMP1, BASE, TMP0 + | ld LFUNC:TMP2, -32(TMP1) + | cleartp LFUNC:TMP2 + | ld TMP1, LFUNC:TMP2->pc + | ld KBASE, PC2PROTO(k)(TMP1) + |3: + | daddiu RC, MULTRES, -8 + | jr AT + |. daddu RA, RA, BASE + | + |9: // Rethrow error from the right C frame. + | load_got lj_err_trace + | sub CARG2, r0, CRET1 + | call_intern lj_err_trace // (lua_State *L, int errcode) + |. move CARG1, L + |.endif + | + |//----------------------------------------------------------------------- + |//-- Math helper functions ---------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Hard-float round to integer. + |// Modifies AT, TMP0, FRET1, FRET2, f4. Keeps all others incl. FARG1. + |// MIPSR6: Modifies FTMP1, too. + |.macro vm_round_hf, func + | lui TMP0, 0x4330 // Hiword of 2^52 (double). + | dsll TMP0, TMP0, 32 + | dmtc1 TMP0, f4 + | abs.d FRET2, FARG1 // |x| + | dmfc1 AT, FARG1 + |.if MIPSR6 + | cmp.lt.d FTMP1, FRET2, f4 + | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 + | bc1eqz FTMP1, >1 // Truncate only if |x| < 2^52. + |.else + | c.olt.d 0, FRET2, f4 + | add.d FRET1, FRET2, f4 // (|x| + 2^52) - 2^52 + | bc1f 0, >1 // Truncate only if |x| < 2^52. + |.endif + |. sub.d FRET1, FRET1, f4 + | slt AT, AT, r0 + |.if "func" == "ceil" + | lui TMP0, 0xbff0 // Hiword of -1 (double). Preserves -0. + |.else + | lui TMP0, 0x3ff0 // Hiword of +1 (double). + |.endif + |.if "func" == "trunc" + | dsll TMP0, TMP0, 32 + | dmtc1 TMP0, f4 + |.if MIPSR6 + | cmp.lt.d FTMP1, FRET2, FRET1 // |x| < result? + | sub.d FRET2, FRET1, f4 + | sel.d FTMP1, FRET1, FRET2 // If yes, subtract +1. + | dmtc1 AT, FRET1 + | neg.d FRET2, FTMP1 + | jr ra + |. sel.d FRET1, FTMP1, FRET2 // Merge sign bit back in. + |.else + | c.olt.d 0, FRET2, FRET1 // |x| < result? + | sub.d FRET2, FRET1, f4 + | movt.d FRET1, FRET2, 0 // If yes, subtract +1. + | neg.d FRET2, FRET1 + | jr ra + |. movn.d FRET1, FRET2, AT // Merge sign bit back in. + |.endif + |.else + | neg.d FRET2, FRET1 + | dsll TMP0, TMP0, 32 + | dmtc1 TMP0, f4 + |.if MIPSR6 + | dmtc1 AT, FTMP1 + | sel.d FTMP1, FRET1, FRET2 + |.if "func" == "ceil" + | cmp.lt.d FRET1, FTMP1, FARG1 // x > result? + |.else + | cmp.lt.d FRET1, FARG1, FTMP1 // x < result? + |.endif + | sub.d FRET2, FTMP1, f4 // If yes, subtract +-1. + | jr ra + |. sel.d FRET1, FTMP1, FRET2 + |.else + | movn.d FRET1, FRET2, AT // Merge sign bit back in. + |.if "func" == "ceil" + | c.olt.d 0, FRET1, FARG1 // x > result? + |.else + | c.olt.d 0, FARG1, FRET1 // x < result? + |.endif + | sub.d FRET2, FRET1, f4 // If yes, subtract +-1. + | jr ra + |. movt.d FRET1, FRET2, 0 + |.endif + |.endif + |1: + | jr ra + |. mov.d FRET1, FARG1 + |.endmacro + | + |.macro vm_round, func + |.if FPU + | vm_round_hf, func + |.endif + |.endmacro + | + |->vm_floor: + | vm_round floor + |->vm_ceil: + | vm_round ceil + |->vm_trunc: + |.if JIT + | vm_round trunc + |.endif + | + |// Soft-float integer to number conversion. + |.macro sfi2d, ARG + |.if not FPU + | beqz ARG, >9 // Handle zero first. + |. sra TMP0, ARG, 31 + | xor TMP1, ARG, TMP0 + | dsubu TMP1, TMP1, TMP0 // Absolute value in TMP1. + | dclz ARG, TMP1 + | addiu ARG, ARG, -11 + | li AT, 0x3ff+63-11-1 + | dsllv TMP1, TMP1, ARG // Align mantissa left with leading 1. + | subu ARG, AT, ARG // Exponent - 1. + | ins ARG, TMP0, 11, 11 // Sign | Exponent. + | dsll ARG, ARG, 52 // Align left. + | jr ra + |. daddu ARG, ARG, TMP1 // Add mantissa, increment exponent. + |9: + | jr ra + |. nop + |.endif + |.endmacro + | + |// Input CARG1. Output: CARG1. Temporaries: AT, TMP0, TMP1. + |->vm_sfi2d_1: + | sfi2d CARG1 + | + |// Input CARG2. Output: CARG2. Temporaries: AT, TMP0, TMP1. + |->vm_sfi2d_2: + | sfi2d CARG2 + | + |// Soft-float comparison. Equivalent to c.eq.d. + |// Input: CARG*. Output: CRET1. Temporaries: AT, TMP0, TMP1. + |->vm_sfcmpeq: + |.if not FPU + | dsll AT, CARG1, 1 + | dsll TMP0, CARG2, 1 + | or TMP1, AT, TMP0 + | beqz TMP1, >8 // Both args +-0: return 1. + |. lui TMP1, 0xffe0 + | dsll TMP1, TMP1, 32 + | sltu AT, TMP1, AT + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0; + |. xor AT, CARG1, CARG2 + | jr ra + |. sltiu CRET1, AT, 1 // Same values: return 1. + |8: + | jr ra + |. li CRET1, 1 + |9: + | jr ra + |. li CRET1, 0 + |.endif + | + |// Soft-float comparison. Equivalent to c.ult.d and c.olt.d. + |// Input: CARG1, CARG2. Output: CRET1. Temporaries: AT, TMP0, TMP1, CRET2. + |->vm_sfcmpult: + |.if not FPU + | b >1 + |. li CRET2, 1 + |.endif + | + |->vm_sfcmpolt: + |.if not FPU + | li CRET2, 0 + |1: + | dsll AT, CARG1, 1 + | dsll TMP0, CARG2, 1 + | or TMP1, AT, TMP0 + | beqz TMP1, >8 // Both args +-0: return 0. + |. lui TMP1, 0xffe0 + | dsll TMP1, TMP1, 32 + | sltu AT, TMP1, AT + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; + |. and AT, CARG1, CARG2 + | bltz AT, >5 // Both args negative? + |. nop + | jr ra + |. slt CRET1, CARG1, CARG2 + |5: // Swap conditions if both operands are negative. + | jr ra + |. slt CRET1, CARG2, CARG1 + |8: + | jr ra + |. li CRET1, 0 + |9: + | jr ra + |. move CRET1, CRET2 + |.endif + | + |->vm_sfcmpogt: + |.if not FPU + | dsll AT, CARG2, 1 + | dsll TMP0, CARG1, 1 + | or TMP1, AT, TMP0 + | beqz TMP1, >8 // Both args +-0: return 0. + |. lui TMP1, 0xffe0 + | dsll TMP1, TMP1, 32 + | sltu AT, TMP1, AT + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0 or 1; + |. and AT, CARG2, CARG1 + | bltz AT, >5 // Both args negative? + |. nop + | jr ra + |. slt CRET1, CARG2, CARG1 + |5: // Swap conditions if both operands are negative. + | jr ra + |. slt CRET1, CARG1, CARG2 + |8: + | jr ra + |. li CRET1, 0 + |9: + | jr ra + |. li CRET1, 0 + |.endif + | + |// Soft-float comparison. Equivalent to c.ole.d a, b or c.ole.d b, a. + |// Input: CARG1, CARG2, TMP3. Output: CRET1. Temporaries: AT, TMP0, TMP1. + |->vm_sfcmpolex: + |.if not FPU + | dsll AT, CARG1, 1 + | dsll TMP0, CARG2, 1 + | or TMP1, AT, TMP0 + | beqz TMP1, >8 // Both args +-0: return 1. + |. lui TMP1, 0xffe0 + | dsll TMP1, TMP1, 32 + | sltu AT, TMP1, AT + | sltu TMP0, TMP1, TMP0 + | or TMP1, AT, TMP0 + | bnez TMP1, >9 // Either arg is NaN: return 0; + |. and AT, CARG1, CARG2 + | xor AT, AT, TMP3 + | bltz AT, >5 // Both args negative? + |. nop + | jr ra + |. slt CRET1, CARG2, CARG1 + |5: // Swap conditions if both operands are negative. + | jr ra + |. slt CRET1, CARG1, CARG2 + |8: + | jr ra + |. li CRET1, 1 + |9: + | jr ra + |. li CRET1, 0 + |.endif + | + |.macro sfmin_max, name, fpcall + |->vm_sf .. name: + |.if JIT and not FPU + | move TMP2, ra + | bal ->fpcall + |. nop + | move ra, TMP2 + | move TMP0, CRET1 + | move CRET1, CARG1 + |.if MIPSR6 + | selnez CRET1, CRET1, TMP0 + | seleqz TMP0, CARG2, TMP0 + | jr ra + |. or CRET1, CRET1, TMP0 + |.else + | jr ra + |. movz CRET1, CARG2, TMP0 + |.endif + |.endif + |.endmacro + | + | sfmin_max min, vm_sfcmpolt + | sfmin_max max, vm_sfcmpogt + | + |//----------------------------------------------------------------------- + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_IDX, CARG2 + |.define NEXT_ASIZE, CARG3 + |.define NEXT_NIL, CARG4 + |.define NEXT_TMP0, r12 + |.define NEXT_TMP1, r13 + |.define NEXT_TMP2, r14 + |.define NEXT_RES_VK, CRET1 + |.define NEXT_RES_IDX, CRET2 + |.define NEXT_RES_PTR, sp + |.define NEXT_RES_VAL, 0(sp) + |.define NEXT_RES_KEY, 8(sp) + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in CRET2. + |->vm_next: + |.if JIT and ENDIAN_LE + | lw NEXT_ASIZE, NEXT_TAB->asize + | ld NEXT_TMP0, NEXT_TAB->array + | li NEXT_NIL, LJ_TNIL + |1: // Traverse array part. + | sltu AT, NEXT_IDX, NEXT_ASIZE + | sll NEXT_TMP1, NEXT_IDX, 3 + | beqz AT, >5 + |. daddu NEXT_TMP1, NEXT_TMP0, NEXT_TMP1 + | li AT, LJ_TISNUM + | ld NEXT_TMP2, 0(NEXT_TMP1) + | dsll AT, AT, 47 + | or NEXT_TMP1, NEXT_IDX, AT + | beq NEXT_TMP2, NEXT_NIL, <1 + |. addiu NEXT_IDX, NEXT_IDX, 1 + | sd NEXT_TMP2, NEXT_RES_VAL + | sd NEXT_TMP1, NEXT_RES_KEY + | move NEXT_RES_VK, NEXT_RES_PTR + | jr ra + |. move NEXT_RES_IDX, NEXT_IDX + | + |5: // Traverse hash part. + | subu NEXT_RES_IDX, NEXT_IDX, NEXT_ASIZE + | ld NODE:NEXT_RES_VK, NEXT_TAB->node + | sll NEXT_TMP2, NEXT_RES_IDX, 5 + | lw NEXT_TMP0, NEXT_TAB->hmask + | sll AT, NEXT_RES_IDX, 3 + | subu AT, NEXT_TMP2, AT + | daddu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, AT + |6: + | sltu AT, NEXT_TMP0, NEXT_RES_IDX + | bnez AT, >8 + |. nop + | ld NEXT_TMP2, NODE:NEXT_RES_VK->val + | bne NEXT_TMP2, NEXT_NIL, >9 + |. addiu NEXT_RES_IDX, NEXT_RES_IDX, 1 + | // Skip holes in hash part. + | b <6 + |. daddiu NODE:NEXT_RES_VK, NODE:NEXT_RES_VK, sizeof(Node) + | + |8: // End of iteration. Set the key to nil (not the value). + | sd NEXT_NIL, NEXT_RES_KEY + | move NEXT_RES_VK, NEXT_RES_PTR + |9: + | jr ra + |. addu NEXT_RES_IDX, NEXT_RES_IDX, NEXT_ASIZE + |.endif + | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Handler for callback functions. Callback slot number in r1, g in r2. + |->vm_ffi_callback: + |.if FFI + |.type CTSTATE, CTState, PC + | saveregs + | ld CTSTATE, GL:r2->ctype_state + | daddiu DISPATCH, r2, GG_G2DISP + | load_got lj_ccallback_enter + | sw r1, CTSTATE->cb.slot + | sd CARG1, CTSTATE->cb.gpr[0] + | .FPU sdc1 FARG1, CTSTATE->cb.fpr[0] + | sd CARG2, CTSTATE->cb.gpr[1] + | .FPU sdc1 FARG2, CTSTATE->cb.fpr[1] + | sd CARG3, CTSTATE->cb.gpr[2] + | .FPU sdc1 FARG3, CTSTATE->cb.fpr[2] + | sd CARG4, CTSTATE->cb.gpr[3] + | .FPU sdc1 FARG4, CTSTATE->cb.fpr[3] + | sd CARG5, CTSTATE->cb.gpr[4] + | .FPU sdc1 FARG5, CTSTATE->cb.fpr[4] + | sd CARG6, CTSTATE->cb.gpr[5] + | .FPU sdc1 FARG6, CTSTATE->cb.fpr[5] + | sd CARG7, CTSTATE->cb.gpr[6] + | .FPU sdc1 FARG7, CTSTATE->cb.fpr[6] + | sd CARG8, CTSTATE->cb.gpr[7] + | .FPU sdc1 FARG8, CTSTATE->cb.fpr[7] + | daddiu TMP0, sp, CFRAME_SPACE + | sd TMP0, CTSTATE->cb.stack + | sd r0, SAVE_PC // Any value outside of bytecode is ok. + | move CARG2, sp + | call_intern lj_ccallback_enter // (CTState *cts, void *cf) + |. move CARG1, CTSTATE + | // Returns lua_State *. + | ld BASE, L:CRET1->base + | ld RC, L:CRET1->top + | move L, CRET1 + | .FPU lui TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | ld LFUNC:RB, FRAME_FUNC(BASE) + | .FPU mtc1 TMP3, TOBIT + | li TISNIL, LJ_TNIL + | li TISNUM, LJ_TISNUM + | li_vmstate INTERP + | subu RC, RC, BASE + | cleartp LFUNC:RB + | st_vmstate + | .FPU cvt.d.s TOBIT, TOBIT + | ins_callt + |.endif + | + |->cont_ffi_callback: // Return from FFI callback. + |.if FFI + | load_got lj_ccallback_leave + | ld CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) + | sd BASE, L->base + | sd RB, L->top + | sd L, CTSTATE->L + | move CARG2, RA + | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) + |. move CARG1, CTSTATE + | .FPU ldc1 FRET1, CTSTATE->cb.fpr[0] + | ld CRET1, CTSTATE->cb.gpr[0] + | .FPU ldc1 FRET2, CTSTATE->cb.fpr[1] + | b ->vm_leave_unw + |. ld CRET2, CTSTATE->cb.gpr[1] + |.endif + | + |->vm_ffi_call: // Call C function via FFI. + | // Caveat: needs special frame unwinding, see below. + |.if FFI + | .type CCSTATE, CCallState, CARG1 + | lw TMP1, CCSTATE->spadj + | lbu CARG2, CCSTATE->nsp + | move TMP2, sp + | dsubu sp, sp, TMP1 + | sd ra, -8(TMP2) + | sll CARG2, CARG2, 3 + | sd r16, -16(TMP2) + | sd CCSTATE, -24(TMP2) + | move r16, TMP2 + | daddiu TMP1, CCSTATE, offsetof(CCallState, stack) + | move TMP2, sp + | beqz CARG2, >2 + |. daddu TMP3, TMP1, CARG2 + |1: + | ld TMP0, 0(TMP1) + | daddiu TMP1, TMP1, 8 + | sltu AT, TMP1, TMP3 + | sd TMP0, 0(TMP2) + | bnez AT, <1 + |. daddiu TMP2, TMP2, 8 + |2: + | ld CFUNCADDR, CCSTATE->func + | .FPU ldc1 FARG1, CCSTATE->gpr[0] + | ld CARG2, CCSTATE->gpr[1] + | .FPU ldc1 FARG2, CCSTATE->gpr[1] + | ld CARG3, CCSTATE->gpr[2] + | .FPU ldc1 FARG3, CCSTATE->gpr[2] + | ld CARG4, CCSTATE->gpr[3] + | .FPU ldc1 FARG4, CCSTATE->gpr[3] + | ld CARG5, CCSTATE->gpr[4] + | .FPU ldc1 FARG5, CCSTATE->gpr[4] + | ld CARG6, CCSTATE->gpr[5] + | .FPU ldc1 FARG6, CCSTATE->gpr[5] + | ld CARG7, CCSTATE->gpr[6] + | .FPU ldc1 FARG7, CCSTATE->gpr[6] + | ld CARG8, CCSTATE->gpr[7] + | .FPU ldc1 FARG8, CCSTATE->gpr[7] + | jalr CFUNCADDR + |. ld CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. + | ld CCSTATE:TMP1, -24(r16) + | ld TMP2, -16(r16) + | ld ra, -8(r16) + | sd CRET1, CCSTATE:TMP1->gpr[0] + | sd CRET2, CCSTATE:TMP1->gpr[1] + |.if FPU + | sdc1 FRET1, CCSTATE:TMP1->fpr[0] + | sdc1 FRET2, CCSTATE:TMP1->fpr[1] + |.else + | sd CARG1, CCSTATE:TMP1->gpr[2] // 2nd FP struct field for soft-float. + |.endif + | move sp, r16 + | jr ra + |. move r16, TMP2 + |.endif + |// Note: vm_ffi_call must be the last function in this object file! + | + |//----------------------------------------------------------------------- +} + +/* Generate the code for a single instruction. */ +static void build_ins(BuildCtx *ctx, BCOp op, int defop) +{ + int vk = 0; + |=>defop: + + switch (op) { + + /* -- Comparison ops ---------------------------------------------------- */ + + /* Remember: all ops branch for a true comparison, fall through otherwise. */ + + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + | // RA = src1*8, RD = src2*8, JMP with RD = target + |.macro bc_comp, FRA, FRD, ARGRA, ARGRD, movop, fmovop, fcomp, sfcomp + | daddu RA, BASE, RA + | daddu RD, BASE, RD + | ld ARGRA, 0(RA) + | ld ARGRD, 0(RD) + | lhu TMP2, OFS_RD(PC) + | gettp CARG3, ARGRA + | gettp CARG4, ARGRD + | bne CARG3, TISNUM, >2 + |. daddiu PC, PC, 4 + | bne CARG4, TISNUM, >5 + |. decode_RD4b TMP2 + | sextw ARGRA, ARGRA + | sextw ARGRD, ARGRD + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | slt AT, CARG1, CARG2 + | addu TMP2, TMP2, TMP3 + |.if MIPSR6 + | movop TMP2, TMP2, AT + |.else + | movop TMP2, r0, AT + |.endif + |1: + | daddu PC, PC, TMP2 + | ins_next + | + |2: // RA is not an integer. + | sltiu AT, CARG3, LJ_TISNUM + | beqz AT, ->vmeta_comp + |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | sltiu AT, CARG4, LJ_TISNUM + | beqz AT, >4 + |. decode_RD4b TMP2 + |.if FPU + | ldc1 FRA, 0(RA) + | ldc1 FRD, 0(RD) + |.endif + |3: // RA and RD are both numbers. + |.if FPU + |.if MIPSR6 + | fcomp FTMP0, FTMP0, FTMP2 + | addu TMP2, TMP2, TMP3 + | mfc1 TMP3, FTMP0 + | b <1 + |. fmovop TMP2, TMP2, TMP3 + |.else + | fcomp FTMP0, FTMP2 + | addu TMP2, TMP2, TMP3 + | b <1 + |. fmovop TMP2, r0 + |.endif + |.else + | bal sfcomp + |. addu TMP2, TMP2, TMP3 + | b <1 + |.if MIPSR6 + |. movop TMP2, TMP2, CRET1 + |.else + |. movop TMP2, r0, CRET1 + |.endif + |.endif + | + |4: // RA is a number, RD is not a number. + | bne CARG4, TISNUM, ->vmeta_comp + | // RA is a number, RD is an integer. Convert RD to a number. + |.if FPU + |. lwc1 FRD, LO(RD) + | ldc1 FRA, 0(RA) + | b <3 + |. cvt.d.w FRD, FRD + |.else + |.if "ARGRD" == "CARG1" + |. sextw CARG1, CARG1 + | bal ->vm_sfi2d_1 + |. nop + |.else + |. sextw CARG2, CARG2 + | bal ->vm_sfi2d_2 + |. nop + |.endif + | b <3 + |. nop + |.endif + | + |5: // RA is an integer, RD is not an integer + | sltiu AT, CARG4, LJ_TISNUM + | beqz AT, ->vmeta_comp + |. lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | // RA is an integer, RD is a number. Convert RA to a number. + |.if FPU + | lwc1 FRA, LO(RA) + | ldc1 FRD, 0(RD) + | b <3 + | cvt.d.w FRA, FRA + |.else + |.if "ARGRA" == "CARG1" + | bal ->vm_sfi2d_1 + |. sextw CARG1, CARG1 + |.else + | bal ->vm_sfi2d_2 + |. sextw CARG2, CARG2 + |.endif + | b <3 + |. nop + |.endif + |.endmacro + | + |.if MIPSR6 + if (op == BC_ISLT) { + | bc_comp FTMP0, FTMP2, CARG1, CARG2, selnez, selnez, cmp.lt.d, ->vm_sfcmpolt + } else if (op == BC_ISGE) { + | bc_comp FTMP0, FTMP2, CARG1, CARG2, seleqz, seleqz, cmp.lt.d, ->vm_sfcmpolt + } else if (op == BC_ISLE) { + | bc_comp FTMP2, FTMP0, CARG2, CARG1, seleqz, seleqz, cmp.ult.d, ->vm_sfcmpult + } else { + | bc_comp FTMP2, FTMP0, CARG2, CARG1, selnez, selnez, cmp.ult.d, ->vm_sfcmpult + } + |.else + if (op == BC_ISLT) { + | bc_comp FTMP0, FTMP2, CARG1, CARG2, movz, movf, c.olt.d, ->vm_sfcmpolt + } else if (op == BC_ISGE) { + | bc_comp FTMP0, FTMP2, CARG1, CARG2, movn, movt, c.olt.d, ->vm_sfcmpolt + } else if (op == BC_ISLE) { + | bc_comp FTMP2, FTMP0, CARG2, CARG1, movn, movt, c.ult.d, ->vm_sfcmpult + } else { + | bc_comp FTMP2, FTMP0, CARG2, CARG1, movz, movf, c.ult.d, ->vm_sfcmpult + } + |.endif + break; + + case BC_ISEQV: case BC_ISNEV: + vk = op == BC_ISEQV; + | // RA = src1*8, RD = src2*8, JMP with RD = target + | daddu RA, BASE, RA + | daddiu PC, PC, 4 + | daddu RD, BASE, RD + | ld CARG1, 0(RA) + | lhu TMP2, -4+OFS_RD(PC) + | ld CARG2, 0(RD) + | gettp CARG3, CARG1 + | gettp CARG4, CARG2 + | sltu AT, TISNUM, CARG3 + | sltu TMP1, TISNUM, CARG4 + | or AT, AT, TMP1 + if (vk) { + | beqz AT, ->BC_ISEQN_Z + } else { + | beqz AT, ->BC_ISNEN_Z + } + | // Either or both types are not numbers. + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + |.if FFI + |. li AT, LJ_TCDATA + | beq CARG3, AT, ->vmeta_equal_cd + |.endif + | decode_RD4b TMP2 + |.if FFI + | beq CARG4, AT, ->vmeta_equal_cd + |. nop + |.endif + | bne CARG1, CARG2, >2 + |. addu TMP2, TMP2, TMP3 + | // Tag and value are equal. + if (vk) { + |->BC_ISEQV_Z: + | daddu PC, PC, TMP2 + } + |1: + | ins_next + | + |2: // Check if the tags are the same and it's a table or userdata. + | xor AT, CARG3, CARG4 // Same type? + | sltiu TMP0, CARG3, LJ_TISTABUD+1 // Table or userdata? + |.if MIPSR6 + | seleqz TMP0, TMP0, AT + |.else + | movn TMP0, r0, AT + |.endif + if (vk) { + | beqz TMP0, <1 + } else { + | beqz TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction. + } + | // Different tables or userdatas. Need to check __eq metamethod. + | // Field metatable must be at same offset for GCtab and GCudata! + |. cleartp TAB:TMP1, CARG1 + | ld TAB:TMP3, TAB:TMP1->metatable + if (vk) { + | beqz TAB:TMP3, <1 // No metatable? + |. nop + | lbu TMP3, TAB:TMP3->nomm + | andi TMP3, TMP3, 1<<MM_eq + | bnez TMP3, >1 // Or 'no __eq' flag set? + } else { + | beqz TAB:TMP3,->BC_ISEQV_Z // No metatable? + |. nop + | lbu TMP3, TAB:TMP3->nomm + | andi TMP3, TMP3, 1<<MM_eq + | bnez TMP3, ->BC_ISEQV_Z // Or 'no __eq' flag set? + } + |. nop + | b ->vmeta_equal // Handle __eq metamethod. + |. li TMP0, 1-vk // ne = 0 or 1. + break; + + case BC_ISEQS: case BC_ISNES: + vk = op == BC_ISEQS; + | // RA = src*8, RD = str_const*8 (~), JMP with RD = target + | daddu RA, BASE, RA + | daddiu PC, PC, 4 + | ld CARG1, 0(RA) + | dsubu RD, KBASE, RD + | lhu TMP2, -4+OFS_RD(PC) + | ld CARG2, -8(RD) // KBASE-8-str_const*8 + |.if FFI + | gettp TMP0, CARG1 + | li AT, LJ_TCDATA + |.endif + | li TMP1, LJ_TSTR + | decode_RD4b TMP2 + |.if FFI + | beq TMP0, AT, ->vmeta_equal_cd + |.endif + |. settp CARG2, TMP1 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | xor TMP1, CARG1, CARG2 + | addu TMP2, TMP2, TMP3 + |.if MIPSR6 + if (vk) { + | seleqz TMP2, TMP2, TMP1 + } else { + | selnez TMP2, TMP2, TMP1 + } + |.else + if (vk) { + | movn TMP2, r0, TMP1 + } else { + | movz TMP2, r0, TMP1 + } + |.endif + | daddu PC, PC, TMP2 + | ins_next + break; + + case BC_ISEQN: case BC_ISNEN: + vk = op == BC_ISEQN; + | // RA = src*8, RD = num_const*8, JMP with RD = target + | daddu RA, BASE, RA + | daddu RD, KBASE, RD + | ld CARG1, 0(RA) + | ld CARG2, 0(RD) + | lhu TMP2, OFS_RD(PC) + | gettp CARG3, CARG1 + | gettp CARG4, CARG2 + | daddiu PC, PC, 4 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + if (vk) { + |->BC_ISEQN_Z: + } else { + |->BC_ISNEN_Z: + } + | bne CARG3, TISNUM, >3 + |. decode_RD4b TMP2 + | bne CARG4, TISNUM, >6 + |. addu TMP2, TMP2, TMP3 + | xor AT, CARG1, CARG2 + |.if MIPSR6 + if (vk) { + | seleqz TMP2, TMP2, AT + |1: + | daddu PC, PC, TMP2 + |2: + } else { + | selnez TMP2, TMP2, AT + |1: + |2: + | daddu PC, PC, TMP2 + } + |.else + if (vk) { + | movn TMP2, r0, AT + |1: + | daddu PC, PC, TMP2 + |2: + } else { + | movz TMP2, r0, AT + |1: + |2: + | daddu PC, PC, TMP2 + } + |.endif + | ins_next + | + |3: // RA is not an integer. + | sltu AT, CARG3, TISNUM + |.if FFI + | beqz AT, >8 + |.else + | beqz AT, <2 + |.endif + |. addu TMP2, TMP2, TMP3 + | sltu AT, CARG4, TISNUM + |.if FPU + | ldc1 FTMP0, 0(RA) + | ldc1 FTMP2, 0(RD) + |.endif + | beqz AT, >5 + |. nop + |4: // RA and RD are both numbers. + |.if FPU + |.if MIPSR6 + | cmp.eq.d FTMP0, FTMP0, FTMP2 + | dmfc1 TMP1, FTMP0 + | b <1 + if (vk) { + |. selnez TMP2, TMP2, TMP1 + } else { + |. seleqz TMP2, TMP2, TMP1 + } + |.else + | c.eq.d FTMP0, FTMP2 + | b <1 + if (vk) { + |. movf TMP2, r0 + } else { + |. movt TMP2, r0 + } + |.endif + |.else + | bal ->vm_sfcmpeq + |. nop + | b <1 + |.if MIPSR6 + if (vk) { + |. selnez TMP2, TMP2, CRET1 + } else { + |. seleqz TMP2, TMP2, CRET1 + } + |.else + if (vk) { + |. movz TMP2, r0, CRET1 + } else { + |. movn TMP2, r0, CRET1 + } + |.endif + |.endif + | + |5: // RA is a number, RD is not a number. + |.if FFI + | bne CARG4, TISNUM, >9 + |.else + | bne CARG4, TISNUM, <2 + |.endif + | // RA is a number, RD is an integer. Convert RD to a number. + |.if FPU + |. lwc1 FTMP2, LO(RD) + | b <4 + |. cvt.d.w FTMP2, FTMP2 + |.else + |. sextw CARG2, CARG2 + | bal ->vm_sfi2d_2 + |. nop + | b <4 + |. nop + |.endif + | + |6: // RA is an integer, RD is not an integer + | sltu AT, CARG4, TISNUM + |.if FFI + | beqz AT, >9 + |.else + | beqz AT, <2 + |.endif + | // RA is an integer, RD is a number. Convert RA to a number. + |.if FPU + |. lwc1 FTMP0, LO(RA) + | ldc1 FTMP2, 0(RD) + | b <4 + | cvt.d.w FTMP0, FTMP0 + |.else + |. sextw CARG1, CARG1 + | bal ->vm_sfi2d_1 + |. nop + | b <4 + |. nop + |.endif + | + |.if FFI + |8: + | li AT, LJ_TCDATA + | bne CARG3, AT, <2 + |. nop + | b ->vmeta_equal_cd + |. nop + |9: + | li AT, LJ_TCDATA + | bne CARG4, AT, <2 + |. nop + | b ->vmeta_equal_cd + |. nop + |.endif + break; + + case BC_ISEQP: case BC_ISNEP: + vk = op == BC_ISEQP; + | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target + | daddu RA, BASE, RA + | srl TMP1, RD, 3 + | ld TMP0, 0(RA) + | lhu TMP2, OFS_RD(PC) + | not TMP1, TMP1 + | gettp TMP0, TMP0 + | daddiu PC, PC, 4 + |.if FFI + | li AT, LJ_TCDATA + | beq TMP0, AT, ->vmeta_equal_cd + |.endif + |. xor TMP0, TMP0, TMP1 + | decode_RD4b TMP2 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | addu TMP2, TMP2, TMP3 + |.if MIPSR6 + if (vk) { + | seleqz TMP2, TMP2, TMP0 + } else { + | selnez TMP2, TMP2, TMP0 + } + |.else + if (vk) { + | movn TMP2, r0, TMP0 + } else { + | movz TMP2, r0, TMP0 + } + |.endif + | daddu PC, PC, TMP2 + | ins_next + break; + + /* -- Unary test and copy ops ------------------------------------------- */ + + case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: + | // RA = dst*8 or unused, RD = src*8, JMP with RD = target + | daddu RD, BASE, RD + | lhu TMP2, OFS_RD(PC) + | ld TMP0, 0(RD) + | daddiu PC, PC, 4 + | gettp TMP0, TMP0 + | sltiu TMP0, TMP0, LJ_TISTRUECOND + if (op == BC_IST || op == BC_ISF) { + | decode_RD4b TMP2 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | addu TMP2, TMP2, TMP3 + |.if MIPSR6 + if (op == BC_IST) { + | selnez TMP2, TMP2, TMP0; + } else { + | seleqz TMP2, TMP2, TMP0; + } + |.else + if (op == BC_IST) { + | movz TMP2, r0, TMP0 + } else { + | movn TMP2, r0, TMP0 + } + |.endif + | daddu PC, PC, TMP2 + } else { + | ld CRET1, 0(RD) + if (op == BC_ISTC) { + | beqz TMP0, >1 + } else { + | bnez TMP0, >1 + } + |. daddu RA, BASE, RA + | decode_RD4b TMP2 + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | addu TMP2, TMP2, TMP3 + | sd CRET1, 0(RA) + | daddu PC, PC, TMP2 + |1: + } + | ins_next + break; + + case BC_ISTYPE: + | // RA = src*8, RD = -type*8 + | daddu TMP2, BASE, RA + | srl TMP1, RD, 3 + | ld TMP0, 0(TMP2) + | ins_next1 + | gettp TMP0, TMP0 + | daddu AT, TMP0, TMP1 + | bnez AT, ->vmeta_istype + |. ins_next2 + break; + case BC_ISNUM: + | // RA = src*8, RD = -(TISNUM-1)*8 + | daddu TMP2, BASE, RA + | ld TMP0, 0(TMP2) + | ins_next1 + | checknum TMP0, ->vmeta_istype + |. ins_next2 + break; + + /* -- Unary ops --------------------------------------------------------- */ + + case BC_MOV: + | // RA = dst*8, RD = src*8 + | daddu RD, BASE, RD + | daddu RA, BASE, RA + | ld CRET1, 0(RD) + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + break; + case BC_NOT: + | // RA = dst*8, RD = src*8 + | daddu RD, BASE, RD + | daddu RA, BASE, RA + | ld TMP0, 0(RD) + | li AT, LJ_TTRUE + | gettp TMP0, TMP0 + | sltu TMP0, AT, TMP0 + | addiu TMP0, TMP0, 1 + | dsll TMP0, TMP0, 47 + | not TMP0, TMP0 + | ins_next1 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_UNM: + | // RA = dst*8, RD = src*8 + | daddu RB, BASE, RD + | ld CARG1, 0(RB) + | daddu RA, BASE, RA + | gettp CARG3, CARG1 + | bne CARG3, TISNUM, >2 + |. lui TMP1, 0x8000 + | sextw CARG1, CARG1 + | beq CARG1, TMP1, ->vmeta_unm // Meta handler deals with -2^31. + |. negu CARG1, CARG1 + | zextw CARG1, CARG1 + | settp CARG1, TISNUM + |1: + | ins_next1 + | sd CARG1, 0(RA) + | ins_next2 + |2: + | sltiu AT, CARG3, LJ_TISNUM + | beqz AT, ->vmeta_unm + |. dsll TMP1, TMP1, 32 + | b <1 + |. xor CARG1, CARG1, TMP1 + break; + case BC_LEN: + | // RA = dst*8, RD = src*8 + | daddu CARG2, BASE, RD + | daddu RA, BASE, RA + | ld TMP0, 0(CARG2) + | gettp TMP1, TMP0 + | daddiu AT, TMP1, -LJ_TSTR + | bnez AT, >2 + |. cleartp STR:CARG1, TMP0 + | lw CRET1, STR:CARG1->len + |1: + | settp CRET1, TISNUM + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + |2: + | daddiu AT, TMP1, -LJ_TTAB + | bnez AT, ->vmeta_len + |. nop +#if LJ_52 + | ld TAB:TMP2, TAB:CARG1->metatable + | bnez TAB:TMP2, >9 + |. nop + |3: +#endif + |->BC_LEN_Z: + | load_got lj_tab_len + | call_intern lj_tab_len // (GCtab *t) + |. nop + | // Returns uint32_t (but less than 2^31). + | b <1 + |. nop +#if LJ_52 + |9: + | lbu TMP0, TAB:TMP2->nomm + | andi TMP0, TMP0, 1<<MM_len + | bnez TMP0, <3 // 'no __len' flag set: done. + |. nop + | b ->vmeta_len + |. nop +#endif + break; + + /* -- Binary ops -------------------------------------------------------- */ + + |.macro fpmod, a, b, c + | bal ->vm_floor // floor(b/c) + |. div.d FARG1, b, c + | mul.d a, FRET1, c + | sub.d a, b, a // b - floor(b/c)*c + |.endmacro + + |.macro sfpmod + | daddiu sp, sp, -16 + | + | load_got __divdf3 + | sd CARG1, 0(sp) + | call_extern + |. sd CARG2, 8(sp) + | + | load_got floor + | call_extern + |. move CARG1, CRET1 + | + | load_got __muldf3 + | move CARG1, CRET1 + | call_extern + |. ld CARG2, 8(sp) + | + | load_got __subdf3 + | ld CARG1, 0(sp) + | call_extern + |. move CARG2, CRET1 + | + | daddiu sp, sp, 16 + |.endmacro + + |.macro ins_arithpre, label + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 + ||switch (vk) { + ||case 0: + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | // RA = dst*8, RB = src1*8, RC = num_const*8 + | daddu RB, BASE, RB + |.if "label" ~= "none" + | b label + |.endif + |. daddu RC, KBASE, RC + || break; + ||case 1: + | decode_RB8a RC, INS + | decode_RB8b RC + | decode_RDtoRC8 RB, RD + | // RA = dst*8, RB = num_const*8, RC = src1*8 + | daddu RC, BASE, RC + |.if "label" ~= "none" + | b label + |.endif + |. daddu RB, KBASE, RB + || break; + ||default: + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | // RA = dst*8, RB = src1*8, RC = src2*8 + | daddu RB, BASE, RB + |.if "label" ~= "none" + | b label + |.endif + |. daddu RC, BASE, RC + || break; + ||} + |.endmacro + | + |.macro ins_arith, intins, fpins, fpcall, label + | ins_arithpre none + | + |.if "label" ~= "none" + |label: + |.endif + | + |// Used in 5. + | ld CARG1, 0(RB) + | ld CARG2, 0(RC) + | gettp TMP0, CARG1 + | gettp TMP1, CARG2 + | + |.if "intins" ~= "div" + | + | // Check for two integers. + | sextw CARG3, CARG1 + | bne TMP0, TISNUM, >5 + |. sextw CARG4, CARG2 + | bne TMP1, TISNUM, >5 + | + |.if "intins" == "addu" + |. intins CRET1, CARG3, CARG4 + | xor TMP1, CRET1, CARG3 // ((y^a) & (y^b)) < 0: overflow. + | xor TMP2, CRET1, CARG4 + | and TMP1, TMP1, TMP2 + | bltz TMP1, ->vmeta_arith + |. daddu RA, BASE, RA + |.elif "intins" == "subu" + |. intins CRET1, CARG3, CARG4 + | xor TMP1, CRET1, CARG3 // ((y^a) & (a^b)) < 0: overflow. + | xor TMP2, CARG3, CARG4 + | and TMP1, TMP1, TMP2 + | bltz TMP1, ->vmeta_arith + |. daddu RA, BASE, RA + |.elif "intins" == "mult" + |.if MIPSR6 + |. nop + | mul CRET1, CARG3, CARG4 + | muh TMP2, CARG3, CARG4 + |.else + |. intins CARG3, CARG4 + | mflo CRET1 + | mfhi TMP2 + |.endif + | sra TMP1, CRET1, 31 + | bne TMP1, TMP2, ->vmeta_arith + |. daddu RA, BASE, RA + |.else + |. load_got lj_vm_modi + | beqz CARG4, ->vmeta_arith + |. daddu RA, BASE, RA + | move CARG1, CARG3 + | call_extern + |. move CARG2, CARG4 + |.endif + | + | zextw CRET1, CRET1 + | settp CRET1, TISNUM + | ins_next1 + | sd CRET1, 0(RA) + |3: + | ins_next2 + | + |.endif + | + |5: // Check for two numbers. + | .FPU ldc1 FTMP0, 0(RB) + | sltu AT, TMP0, TISNUM + | sltu TMP0, TMP1, TISNUM + | .FPU ldc1 FTMP2, 0(RC) + | and AT, AT, TMP0 + | beqz AT, ->vmeta_arith + |. daddu RA, BASE, RA + | + |.if FPU + | fpins FRET1, FTMP0, FTMP2 + |.elif "fpcall" == "sfpmod" + | sfpmod + |.else + | load_got fpcall + | call_extern + |. nop + |.endif + | + | ins_next1 + |.if "intins" ~= "div" + | b <3 + |.endif + |.if FPU + |. sdc1 FRET1, 0(RA) + |.else + |. sd CRET1, 0(RA) + |.endif + |.if "intins" == "div" + | ins_next2 + |.endif + | + |.endmacro + + case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: + | ins_arith addu, add.d, __adddf3, none + break; + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: + | ins_arith subu, sub.d, __subdf3, none + break; + case BC_MULVN: case BC_MULNV: case BC_MULVV: + | ins_arith mult, mul.d, __muldf3, none + break; + case BC_DIVVN: + | ins_arith div, div.d, __divdf3, ->BC_DIVVN_Z + break; + case BC_DIVNV: case BC_DIVVV: + | ins_arithpre ->BC_DIVVN_Z + break; + case BC_MODVN: + | ins_arith modi, fpmod, sfpmod, ->BC_MODVN_Z + break; + case BC_MODNV: case BC_MODVV: + | ins_arithpre ->BC_MODVN_Z + break; + case BC_POW: + | ins_arithpre none + | ld CARG1, 0(RB) + | ld CARG2, 0(RC) + | gettp TMP0, CARG1 + | gettp TMP1, CARG2 + | sltiu TMP0, TMP0, LJ_TISNUM + | sltiu TMP1, TMP1, LJ_TISNUM + | and AT, TMP0, TMP1 + | load_got pow + | beqz AT, ->vmeta_arith + |. daddu RA, BASE, RA + |.if FPU + | ldc1 FARG1, 0(RB) + | ldc1 FARG2, 0(RC) + |.endif + | call_extern + |. nop + | ins_next1 + |.if FPU + | sdc1 FRET1, 0(RA) + |.else + | sd CRET1, 0(RA) + |.endif + | ins_next2 + break; + + case BC_CAT: + | // RA = dst*8, RB = src_start*8, RC = src_end*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | dsubu CARG3, RC, RB + | sd BASE, L->base + | daddu CARG2, BASE, RC + | move MULTRES, RB + |->BC_CAT_Z: + | load_got lj_meta_cat + | srl CARG3, CARG3, 3 + | sd PC, SAVE_PC + | call_intern lj_meta_cat // (lua_State *L, TValue *top, int left) + |. move CARG1, L + | // Returns NULL (finished) or TValue * (metamethod). + | bnez CRET1, ->vmeta_binop + |. ld BASE, L->base + | daddu RB, BASE, MULTRES + | ld CRET1, 0(RB) + | daddu RA, BASE, RA + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + break; + + /* -- Constant ops ------------------------------------------------------ */ + + case BC_KSTR: + | // RA = dst*8, RD = str_const*8 (~) + | dsubu TMP1, KBASE, RD + | ins_next1 + | li TMP2, LJ_TSTR + | ld TMP0, -8(TMP1) // KBASE-8-str_const*8 + | daddu RA, BASE, RA + | settp TMP0, TMP2 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_KCDATA: + |.if FFI + | // RA = dst*8, RD = cdata_const*8 (~) + | dsubu TMP1, KBASE, RD + | ins_next1 + | ld TMP0, -8(TMP1) // KBASE-8-cdata_const*8 + | li TMP2, LJ_TCDATA + | daddu RA, BASE, RA + | settp TMP0, TMP2 + | sd TMP0, 0(RA) + | ins_next2 + |.endif + break; + case BC_KSHORT: + | // RA = dst*8, RD = int16_literal*8 + | sra RD, INS, 16 + | daddu RA, BASE, RA + | zextw RD, RD + | ins_next1 + | settp RD, TISNUM + | sd RD, 0(RA) + | ins_next2 + break; + case BC_KNUM: + | // RA = dst*8, RD = num_const*8 + | daddu RD, KBASE, RD + | daddu RA, BASE, RA + | ld CRET1, 0(RD) + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + break; + case BC_KPRI: + | // RA = dst*8, RD = primitive_type*8 (~) + | daddu RA, BASE, RA + | dsll TMP0, RD, 44 + | not TMP0, TMP0 + | ins_next1 + | sd TMP0, 0(RA) + | ins_next2 + break; + case BC_KNIL: + | // RA = base*8, RD = end*8 + | daddu RA, BASE, RA + | sd TISNIL, 0(RA) + | daddiu RA, RA, 8 + | daddu RD, BASE, RD + |1: + | sd TISNIL, 0(RA) + | slt AT, RA, RD + | bnez AT, <1 + |. daddiu RA, RA, 8 + | ins_next_ + break; + + /* -- Upvalue and function ops ------------------------------------------ */ + + case BC_UGET: + | // RA = dst*8, RD = uvnum*8 + | ld LFUNC:RB, FRAME_FUNC(BASE) + | daddu RA, BASE, RA + | cleartp LFUNC:RB + | daddu RD, RD, LFUNC:RB + | ld UPVAL:RB, LFUNC:RD->uvptr + | ins_next1 + | ld TMP1, UPVAL:RB->v + | ld CRET1, 0(TMP1) + | sd CRET1, 0(RA) + | ins_next2 + break; + case BC_USETV: + | // RA = uvnum*8, RD = src*8 + | ld LFUNC:RB, FRAME_FUNC(BASE) + | daddu RD, BASE, RD + | cleartp LFUNC:RB + | daddu RA, RA, LFUNC:RB + | ld UPVAL:RB, LFUNC:RA->uvptr + | ld CRET1, 0(RD) + | lbu TMP3, UPVAL:RB->marked + | ld CARG2, UPVAL:RB->v + | andi TMP3, TMP3, LJ_GC_BLACK // isblack(uv) + | lbu TMP0, UPVAL:RB->closed + | gettp TMP2, CRET1 + | sd CRET1, 0(CARG2) + | li AT, LJ_GC_BLACK|1 + | or TMP3, TMP3, TMP0 + | beq TMP3, AT, >2 // Upvalue is closed and black? + |. daddiu TMP2, TMP2, -(LJ_TNUMX+1) + |1: + | ins_next + | + |2: // Check if new value is collectable. + | sltiu AT, TMP2, LJ_TISGCV - (LJ_TNUMX+1) + | beqz AT, <1 // tvisgcv(v) + |. cleartp GCOBJ:CRET1, CRET1 + | lbu TMP3, GCOBJ:CRET1->gch.marked + | andi TMP3, TMP3, LJ_GC_WHITES // iswhite(v) + | beqz TMP3, <1 + |. load_got lj_gc_barrieruv + | // Crossed a write barrier. Move the barrier forward. + | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) + |. daddiu CARG1, DISPATCH, GG_DISP2G + | b <1 + |. nop + break; + case BC_USETS: + | // RA = uvnum*8, RD = str_const*8 (~) + | ld LFUNC:RB, FRAME_FUNC(BASE) + | dsubu TMP1, KBASE, RD + | cleartp LFUNC:RB + | daddu RA, RA, LFUNC:RB + | ld UPVAL:RB, LFUNC:RA->uvptr + | ld STR:TMP1, -8(TMP1) // KBASE-8-str_const*8 + | lbu TMP2, UPVAL:RB->marked + | ld CARG2, UPVAL:RB->v + | lbu TMP3, STR:TMP1->marked + | andi AT, TMP2, LJ_GC_BLACK // isblack(uv) + | lbu TMP2, UPVAL:RB->closed + | li TMP0, LJ_TSTR + | settp TMP1, TMP0 + | bnez AT, >2 + |. sd TMP1, 0(CARG2) + |1: + | ins_next + | + |2: // Check if string is white and ensure upvalue is closed. + | beqz TMP2, <1 + |. andi AT, TMP3, LJ_GC_WHITES // iswhite(str) + | beqz AT, <1 + |. load_got lj_gc_barrieruv + | // Crossed a write barrier. Move the barrier forward. + | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) + |. daddiu CARG1, DISPATCH, GG_DISP2G + | b <1 + |. nop + break; + case BC_USETN: + | // RA = uvnum*8, RD = num_const*8 + | ld LFUNC:RB, FRAME_FUNC(BASE) + | daddu RD, KBASE, RD + | cleartp LFUNC:RB + | daddu RA, RA, LFUNC:RB + | ld UPVAL:RB, LFUNC:RA->uvptr + | ld CRET1, 0(RD) + | ld TMP1, UPVAL:RB->v + | ins_next1 + | sd CRET1, 0(TMP1) + | ins_next2 + break; + case BC_USETP: + | // RA = uvnum*8, RD = primitive_type*8 (~) + | ld LFUNC:RB, FRAME_FUNC(BASE) + | dsll TMP0, RD, 44 + | cleartp LFUNC:RB + | daddu RA, RA, LFUNC:RB + | not TMP0, TMP0 + | ld UPVAL:RB, LFUNC:RA->uvptr + | ins_next1 + | ld TMP1, UPVAL:RB->v + | sd TMP0, 0(TMP1) + | ins_next2 + break; + + case BC_UCLO: + | // RA = level*8, RD = target + | ld TMP2, L->openupval + | branch_RD // Do this first since RD is not saved. + | load_got lj_func_closeuv + | sd BASE, L->base + | beqz TMP2, >1 + |. move CARG1, L + | call_intern lj_func_closeuv // (lua_State *L, TValue *level) + |. daddu CARG2, BASE, RA + | ld BASE, L->base + |1: + | ins_next + break; + + case BC_FNEW: + | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) + | load_got lj_func_newL_gc + | dsubu TMP1, KBASE, RD + | ld CARG3, FRAME_FUNC(BASE) + | ld CARG2, -8(TMP1) // KBASE-8-tab_const*8 + | sd BASE, L->base + | sd PC, SAVE_PC + | cleartp CARG3 + | // (lua_State *L, GCproto *pt, GCfuncL *parent) + | call_intern lj_func_newL_gc + |. move CARG1, L + | // Returns GCfuncL *. + | li TMP0, LJ_TFUNC + | ld BASE, L->base + | ins_next1 + | settp CRET1, TMP0 + | daddu RA, BASE, RA + | sd CRET1, 0(RA) + | ins_next2 + break; + + /* -- Table ops --------------------------------------------------------- */ + + case BC_TNEW: + case BC_TDUP: + | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) + | ld TMP0, DISPATCH_GL(gc.total)(DISPATCH) + | ld TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) + | sd BASE, L->base + | sd PC, SAVE_PC + | sltu AT, TMP0, TMP1 + | beqz AT, >5 + |1: + if (op == BC_TNEW) { + | load_got lj_tab_new + | srl CARG2, RD, 3 + | andi CARG2, CARG2, 0x7ff + | li TMP0, 0x801 + | addiu AT, CARG2, -0x7ff + | srl CARG3, RD, 14 + |.if MIPSR6 + | seleqz TMP0, TMP0, AT + | selnez CARG2, CARG2, AT + | or CARG2, CARG2, TMP0 + |.else + | movz CARG2, TMP0, AT + |.endif + | // (lua_State *L, int32_t asize, uint32_t hbits) + | call_intern lj_tab_new + |. move CARG1, L + | // Returns Table *. + } else { + | load_got lj_tab_dup + | dsubu TMP1, KBASE, RD + | move CARG1, L + | call_intern lj_tab_dup // (lua_State *L, Table *kt) + |. ld CARG2, -8(TMP1) // KBASE-8-str_const*8 + | // Returns Table *. + } + | li TMP0, LJ_TTAB + | ld BASE, L->base + | ins_next1 + | daddu RA, BASE, RA + | settp CRET1, TMP0 + | sd CRET1, 0(RA) + | ins_next2 + |5: + | load_got lj_gc_step_fixtop + | move MULTRES, RD + | call_intern lj_gc_step_fixtop // (lua_State *L) + |. move CARG1, L + | b <1 + |. move RD, MULTRES + break; + + case BC_GGET: + | // RA = dst*8, RD = str_const*8 (~) + case BC_GSET: + | // RA = src*8, RD = str_const*8 (~) + | ld LFUNC:TMP2, FRAME_FUNC(BASE) + | dsubu TMP1, KBASE, RD + | ld STR:RC, -8(TMP1) // KBASE-8-str_const*8 + | cleartp LFUNC:TMP2 + | ld TAB:RB, LFUNC:TMP2->env + if (op == BC_GGET) { + | b ->BC_TGETS_Z + } else { + | b ->BC_TSETS_Z + } + |. daddu RA, BASE, RA + break; + + case BC_TGETV: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | daddu CARG2, BASE, RB + | daddu CARG3, BASE, RC + | ld TAB:RB, 0(CARG2) + | ld TMP2, 0(CARG3) + | daddu RA, BASE, RA + | checktab TAB:RB, ->vmeta_tgetv + | gettp TMP3, TMP2 + | bne TMP3, TISNUM, >5 // Integer key? + |. lw TMP0, TAB:RB->asize + | sextw TMP2, TMP2 + | ld TMP1, TAB:RB->array + | sltu AT, TMP2, TMP0 + | sll TMP2, TMP2, 3 + | beqz AT, ->vmeta_tgetv // Integer key and in array part? + |. daddu TMP2, TMP1, TMP2 + | ld AT, 0(TMP2) + | beq AT, TISNIL, >2 + |. ld CRET1, 0(TMP2) + |1: + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + | + |2: // Check for __index if table value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + |. nop + | lbu TMP0, TAB:TMP2->nomm + | andi TMP0, TMP0, 1<<MM_index + | bnez TMP0, <1 // 'no __index' flag set: done. + |. nop + | b ->vmeta_tgetv + |. nop + | + |5: + | li AT, LJ_TSTR + | bne TMP3, AT, ->vmeta_tgetv + |. cleartp RC, TMP2 + | b ->BC_TGETS_Z // String key? + |. nop + break; + case BC_TGETS: + | // RA = dst*8, RB = table*8, RC = str_const*8 (~) + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RC8a RC, INS + | daddu CARG2, BASE, RB + | decode_RC8b RC + | ld TAB:RB, 0(CARG2) + | dsubu CARG3, KBASE, RC + | daddu RA, BASE, RA + | ld STR:RC, -8(CARG3) // KBASE-8-str_const*8 + | checktab TAB:RB, ->vmeta_tgets1 + |->BC_TGETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 + | lw TMP0, TAB:RB->hmask + | lw TMP1, STR:RC->sid + | ld NODE:TMP2, TAB:RB->node + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | sll TMP0, TMP1, 5 + | sll TMP1, TMP1, 3 + | subu TMP1, TMP0, TMP1 + | li TMP3, LJ_TSTR + | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + | settp STR:RC, TMP3 // Tagged key to look for. + |1: + | ld CARG1, NODE:TMP2->key + | ld CRET1, NODE:TMP2->val + | ld NODE:TMP1, NODE:TMP2->next + | bne CARG1, RC, >4 + |. ld TAB:TMP3, TAB:RB->metatable + | beq CRET1, TISNIL, >5 // Key found, but nil value? + |. nop + |3: + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + | + |4: // Follow hash chain. + | bnez NODE:TMP1, <1 + |. move NODE:TMP2, NODE:TMP1 + | // End of hash chain: key not found, nil result. + | + |5: // Check for __index if table value is nil. + | beqz TAB:TMP3, <3 // No metatable: done. + |. move CRET1, TISNIL + | lbu TMP0, TAB:TMP3->nomm + | andi TMP0, TMP0, 1<<MM_index + | bnez TMP0, <3 // 'no __index' flag set: done. + |. nop + | b ->vmeta_tgets + |. nop + break; + case BC_TGETB: + | // RA = dst*8, RB = table*8, RC = index*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | daddu CARG2, BASE, RB + | decode_RDtoRC8 RC, RD + | ld TAB:RB, 0(CARG2) + | daddu RA, BASE, RA + | srl TMP0, RC, 3 + | checktab TAB:RB, ->vmeta_tgetb + | lw TMP1, TAB:RB->asize + | ld TMP2, TAB:RB->array + | sltu AT, TMP0, TMP1 + | beqz AT, ->vmeta_tgetb + |. daddu RC, TMP2, RC + | ld AT, 0(RC) + | beq AT, TISNIL, >5 + |. ld CRET1, 0(RC) + |1: + | ins_next1 + | sd CRET1, 0(RA) + | ins_next2 + | + |5: // Check for __index if table value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + |. nop + | lbu TMP1, TAB:TMP2->nomm + | andi TMP1, TMP1, 1<<MM_index + | bnez TMP1, <1 // 'no __index' flag set: done. + |. nop + | b ->vmeta_tgetb // Caveat: preserve TMP0 and CARG2! + |. nop + break; + case BC_TGETR: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | daddu RB, BASE, RB + | daddu RC, BASE, RC + | ld TAB:CARG1, 0(RB) + | lw CARG2, LO(RC) + | daddu RA, BASE, RA + | cleartp TAB:CARG1 + | lw TMP0, TAB:CARG1->asize + | ld TMP1, TAB:CARG1->array + | sltu AT, CARG2, TMP0 + | sll TMP2, CARG2, 3 + | beqz AT, ->vmeta_tgetr // In array part? + |. daddu CRET1, TMP1, TMP2 + | ld CARG2, 0(CRET1) + |->BC_TGETR_Z: + | ins_next1 + | sd CARG2, 0(RA) + | ins_next2 + break; + + case BC_TSETV: + | // RA = src*8, RB = table*8, RC = key*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | daddu CARG2, BASE, RB + | daddu CARG3, BASE, RC + | ld RB, 0(CARG2) + | ld TMP2, 0(CARG3) + | daddu RA, BASE, RA + | checktab RB, ->vmeta_tsetv + | checkint TMP2, >5 + |. sextw RC, TMP2 + | lw TMP0, TAB:RB->asize + | ld TMP1, TAB:RB->array + | sltu AT, RC, TMP0 + | sll TMP2, RC, 3 + | beqz AT, ->vmeta_tsetv // Integer key and in array part? + |. daddu TMP1, TMP1, TMP2 + | ld TMP0, 0(TMP1) + | lbu TMP3, TAB:RB->marked + | beq TMP0, TISNIL, >3 + |. ld CRET1, 0(RA) + |1: + | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + | bnez AT, >7 + |. sd CRET1, 0(TMP1) + |2: + | ins_next + | + |3: // Check for __newindex if previous value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + |. nop + | lbu TMP2, TAB:TMP2->nomm + | andi TMP2, TMP2, 1<<MM_newindex + | bnez TMP2, <1 // 'no __newindex' flag set: done. + |. nop + | b ->vmeta_tsetv + |. nop + | + |5: + | gettp AT, TMP2 + | daddiu AT, AT, -LJ_TSTR + | bnez AT, ->vmeta_tsetv + |. nop + | b ->BC_TSETS_Z // String key? + |. cleartp STR:RC, TMP2 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <2 + break; + case BC_TSETS: + | // RA = src*8, RB = table*8, RC = str_const*8 (~) + | decode_RB8a RB, INS + | decode_RB8b RB + | daddu CARG2, BASE, RB + | decode_RC8a RC, INS + | ld TAB:RB, 0(CARG2) + | decode_RC8b RC + | dsubu CARG3, KBASE, RC + | ld RC, -8(CARG3) // KBASE-8-str_const*8 + | daddu RA, BASE, RA + | cleartp STR:RC + | checktab TAB:RB, ->vmeta_tsets1 + |->BC_TSETS_Z: + | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 + | lw TMP0, TAB:RB->hmask + | lw TMP1, STR:RC->sid + | ld NODE:TMP2, TAB:RB->node + | sb r0, TAB:RB->nomm // Clear metamethod cache. + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + | sll TMP0, TMP1, 5 + | sll TMP1, TMP1, 3 + | subu TMP1, TMP0, TMP1 + | li TMP3, LJ_TSTR + | daddu NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) + | settp STR:RC, TMP3 // Tagged key to look for. + |.if FPU + | ldc1 FTMP0, 0(RA) + |.else + | ld CRET1, 0(RA) + |.endif + |1: + | ld TMP0, NODE:TMP2->key + | ld CARG2, NODE:TMP2->val + | ld NODE:TMP1, NODE:TMP2->next + | bne TMP0, RC, >5 + |. lbu TMP3, TAB:RB->marked + | beq CARG2, TISNIL, >4 // Key found, but nil value? + |. ld TAB:TMP0, TAB:RB->metatable + |2: + | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + | bnez AT, >7 + |.if FPU + |. sdc1 FTMP0, NODE:TMP2->val + |.else + |. sd CRET1, NODE:TMP2->val + |.endif + |3: + | ins_next + | + |4: // Check for __newindex if previous value is nil. + | beqz TAB:TMP0, <2 // No metatable: done. + |. nop + | lbu TMP0, TAB:TMP0->nomm + | andi TMP0, TMP0, 1<<MM_newindex + | bnez TMP0, <2 // 'no __newindex' flag set: done. + |. nop + | b ->vmeta_tsets + |. nop + | + |5: // Follow hash chain. + | bnez NODE:TMP1, <1 + |. move NODE:TMP2, NODE:TMP1 + | // End of hash chain: key not found, add a new one + | + | // But check for __newindex first. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, >6 // No metatable: continue. + |. daddiu CARG3, DISPATCH, DISPATCH_GL(tmptv) + | lbu TMP0, TAB:TMP2->nomm + | andi TMP0, TMP0, 1<<MM_newindex + | beqz TMP0, ->vmeta_tsets // 'no __newindex' flag NOT set: check. + |6: + | load_got lj_tab_newkey + | sd RC, 0(CARG3) + | sd BASE, L->base + | move CARG2, TAB:RB + | sd PC, SAVE_PC + | call_intern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k + |. move CARG1, L + | // Returns TValue *. + | ld BASE, L->base + |.if FPU + | b <3 // No 2nd write barrier needed. + |. sdc1 FTMP0, 0(CRET1) + |.else + | ld CARG1, 0(RA) + | b <3 // No 2nd write barrier needed. + |. sd CARG1, 0(CRET1) + |.endif + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <3 + break; + case BC_TSETB: + | // RA = src*8, RB = table*8, RC = index*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | daddu CARG2, BASE, RB + | decode_RDtoRC8 RC, RD + | ld TAB:RB, 0(CARG2) + | daddu RA, BASE, RA + | srl TMP0, RC, 3 + | checktab RB, ->vmeta_tsetb + | lw TMP1, TAB:RB->asize + | ld TMP2, TAB:RB->array + | sltu AT, TMP0, TMP1 + | beqz AT, ->vmeta_tsetb + |. daddu RC, TMP2, RC + | ld TMP1, 0(RC) + | lbu TMP3, TAB:RB->marked + | beq TMP1, TISNIL, >5 + |1: + |. ld CRET1, 0(RA) + | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + | bnez AT, >7 + |. sd CRET1, 0(RC) + |2: + | ins_next + | + |5: // Check for __newindex if previous value is nil. + | ld TAB:TMP2, TAB:RB->metatable + | beqz TAB:TMP2, <1 // No metatable: done. + |. nop + | lbu TMP1, TAB:TMP2->nomm + | andi TMP1, TMP1, 1<<MM_newindex + | bnez TMP1, <1 // 'no __newindex' flag set: done. + |. nop + | b ->vmeta_tsetb // Caveat: preserve TMP0 and CARG2! + |. nop + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMP3, TMP0, <2 + break; + case BC_TSETR: + | // RA = dst*8, RB = table*8, RC = key*8 + | decode_RB8a RB, INS + | decode_RB8b RB + | decode_RDtoRC8 RC, RD + | daddu CARG1, BASE, RB + | daddu CARG3, BASE, RC + | ld TAB:CARG2, 0(CARG1) + | lw CARG3, LO(CARG3) + | cleartp TAB:CARG2 + | lbu TMP3, TAB:CARG2->marked + | lw TMP0, TAB:CARG2->asize + | ld TMP1, TAB:CARG2->array + | andi AT, TMP3, LJ_GC_BLACK // isblack(table) + | bnez AT, >7 + |. daddu RA, BASE, RA + |2: + | sltu AT, CARG3, TMP0 + | sll TMP2, CARG3, 3 + | beqz AT, ->vmeta_tsetr // In array part? + |. daddu CRET1, TMP1, TMP2 + |->BC_TSETR_Z: + | ld CARG1, 0(RA) + | ins_next1 + | sd CARG1, 0(CRET1) + | ins_next2 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP3, CRET1, <2 + break; + + case BC_TSETM: + | // RA = base*8 (table at base-1), RD = num_const*8 (start index) + | daddu RA, BASE, RA + |1: + | daddu TMP3, KBASE, RD + | ld TAB:CARG2, -8(RA) // Guaranteed to be a table. + | addiu TMP0, MULTRES, -8 + | lw TMP3, LO(TMP3) // Integer constant is in lo-word. + | beqz TMP0, >4 // Nothing to copy? + |. srl CARG3, TMP0, 3 + | cleartp CARG2 + | addu CARG3, CARG3, TMP3 + | lw TMP2, TAB:CARG2->asize + | sll TMP1, TMP3, 3 + | lbu TMP3, TAB:CARG2->marked + | ld CARG1, TAB:CARG2->array + | sltu AT, TMP2, CARG3 + | bnez AT, >5 + |. daddu TMP2, RA, TMP0 + | daddu TMP1, TMP1, CARG1 + | andi TMP0, TMP3, LJ_GC_BLACK // isblack(table) + |3: // Copy result slots to table. + | ld CRET1, 0(RA) + | daddiu RA, RA, 8 + | sltu AT, RA, TMP2 + | sd CRET1, 0(TMP1) + | bnez AT, <3 + |. daddiu TMP1, TMP1, 8 + | bnez TMP0, >7 + |. nop + |4: + | ins_next + | + |5: // Need to resize array part. + | load_got lj_tab_reasize + | sd BASE, L->base + | sd PC, SAVE_PC + | move BASE, RD + | call_intern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) + |. move CARG1, L + | // Must not reallocate the stack. + | move RD, BASE + | b <1 + |. ld BASE, L->base // Reload BASE for lack of a saved register. + | + |7: // Possible table write barrier for any value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP3, TMP0, <4 + break; + + /* -- Calls and vararg handling ----------------------------------------- */ + + case BC_CALLM: + | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 + | decode_RDtoRC8 NARGS8:RC, RD + | b ->BC_CALL_Z + |. addu NARGS8:RC, NARGS8:RC, MULTRES + break; + case BC_CALL: + | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 + | decode_RDtoRC8 NARGS8:RC, RD + |->BC_CALL_Z: + | move TMP2, BASE + | daddu BASE, BASE, RA + | ld LFUNC:RB, 0(BASE) + | daddiu BASE, BASE, 16 + | addiu NARGS8:RC, NARGS8:RC, -8 + | checkfunc RB, ->vmeta_call + | ins_call + break; + + case BC_CALLMT: + | // RA = base*8, (RB = 0,) RC = extra_nargs*8 + | addu NARGS8:RD, NARGS8:RD, MULTRES // BC_CALLT gets RC from RD. + | // Fall through. Assumes BC_CALLT follows. + break; + case BC_CALLT: + | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 + | daddu RA, BASE, RA + | ld RB, 0(RA) + | move NARGS8:RC, RD + | ld TMP1, FRAME_PC(BASE) + | daddiu RA, RA, 16 + | addiu NARGS8:RC, NARGS8:RC, -8 + | checktp CARG3, RB, -LJ_TFUNC, ->vmeta_callt + |->BC_CALLT_Z: + | andi TMP0, TMP1, FRAME_TYPE // Caveat: preserve TMP0 until the 'or'. + | lbu TMP3, LFUNC:CARG3->ffid + | bnez TMP0, >7 + |. xori TMP2, TMP1, FRAME_VARG + |1: + | sd RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. + | sltiu AT, TMP3, 2 // (> FF_C) Calling a fast function? + | move TMP2, BASE + | move RB, CARG3 + | beqz NARGS8:RC, >3 + |. move TMP3, NARGS8:RC + |2: + | ld CRET1, 0(RA) + | daddiu RA, RA, 8 + | addiu TMP3, TMP3, -8 + | sd CRET1, 0(TMP2) + | bnez TMP3, <2 + |. daddiu TMP2, TMP2, 8 + |3: + | or TMP0, TMP0, AT + | beqz TMP0, >5 + |. nop + |4: + | ins_callt + | + |5: // Tailcall to a fast function with a Lua frame below. + | lw INS, -4(TMP1) + | decode_RA8a RA, INS + | decode_RA8b RA + | dsubu TMP1, BASE, RA + | ld TMP1, -32(TMP1) + | cleartp LFUNC:TMP1 + | ld TMP1, LFUNC:TMP1->pc + | b <4 + |. ld KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. + | + |7: // Tailcall from a vararg function. + | andi AT, TMP2, FRAME_TYPEP + | bnez AT, <1 // Vararg frame below? + |. dsubu TMP2, BASE, TMP2 // Relocate BASE down. + | move BASE, TMP2 + | ld TMP1, FRAME_PC(TMP2) + | b <1 + |. andi TMP0, TMP1, FRAME_TYPE + break; + + case BC_ITERC: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) + | move TMP2, BASE // Save old BASE fir vmeta_call. + | daddu BASE, BASE, RA + | ld RB, -24(BASE) + | ld CARG1, -16(BASE) + | ld CARG2, -8(BASE) + | li NARGS8:RC, 16 // Iterators get 2 arguments. + | sd RB, 0(BASE) // Copy callable. + | sd CARG1, 16(BASE) // Copy state. + | sd CARG2, 24(BASE) // Copy control var. + | daddiu BASE, BASE, 16 + | checkfunc RB, ->vmeta_call + | ins_call + break; + + case BC_ITERN: + |.if JIT and ENDIAN_LE + | hotloop + |.endif + |->vm_IITERN: + | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) + | daddu RA, BASE, RA + | ld TAB:RB, -16(RA) + | lw RC, -8+LO(RA) // Get index from control var. + | cleartp TAB:RB + | daddiu PC, PC, 4 + | lw TMP0, TAB:RB->asize + | ld TMP1, TAB:RB->array + | dsll CARG3, TISNUM, 47 + |1: // Traverse array part. + | sltu AT, RC, TMP0 + | beqz AT, >5 // Index points after array part? + |. sll TMP3, RC, 3 + | daddu TMP3, TMP1, TMP3 + | ld CARG1, 0(TMP3) + | lhu RD, -4+OFS_RD(PC) + | or TMP2, RC, CARG3 + | beq CARG1, TISNIL, <1 // Skip holes in array part. + |. addiu RC, RC, 1 + | sd TMP2, 0(RA) + | sd CARG1, 8(RA) + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | decode_RD4b RD + | daddu RD, RD, TMP3 + | sw RC, -8+LO(RA) // Update control var. + | daddu PC, PC, RD + |3: + | ins_next + | + |5: // Traverse hash part. + | lw TMP1, TAB:RB->hmask + | subu RC, RC, TMP0 + | ld TMP2, TAB:RB->node + |6: + | sltu AT, TMP1, RC // End of iteration? Branch to ITERL+1. + | bnez AT, <3 + |. sll TMP3, RC, 5 + | sll RB, RC, 3 + | subu TMP3, TMP3, RB + | daddu NODE:TMP3, TMP3, TMP2 + | ld CARG1, 0(NODE:TMP3) + | lhu RD, -4+OFS_RD(PC) + | beq CARG1, TISNIL, <6 // Skip holes in hash part. + |. addiu RC, RC, 1 + | ld CARG2, NODE:TMP3->key + | lui TMP3, (-(BCBIAS_J*4 >> 16) & 65535) + | sd CARG1, 8(RA) + | addu RC, RC, TMP0 + | decode_RD4b RD + | addu RD, RD, TMP3 + | sd CARG2, 0(RA) + | daddu PC, PC, RD + | b <3 + |. sw RC, -8+LO(RA) // Update control var. + break; + + case BC_ISNEXT: + | // RA = base*8, RD = target (points to ITERN) + | daddu RA, BASE, RA + | srl TMP0, RD, 1 + | ld CFUNC:CARG1, -24(RA) + | daddu TMP0, PC, TMP0 + | ld CARG2, -16(RA) + | ld CARG3, -8(RA) + | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) + | checkfunc CFUNC:CARG1, >5 + | gettp CARG2, CARG2 + | daddiu CARG2, CARG2, -LJ_TTAB + | lbu TMP1, CFUNC:CARG1->ffid + | daddiu CARG3, CARG3, -LJ_TNIL + | or AT, CARG2, CARG3 + | daddiu TMP1, TMP1, -FF_next_N + | or AT, AT, TMP1 + | bnez AT, >5 + |. lui TMP1, (LJ_KEYINDEX >> 16) + | daddu PC, TMP0, TMP2 + | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff) + | dsll TMP1, TMP1, 32 + | sd TMP1, -8(RA) + |1: + | ins_next + |5: // Despecialize bytecode if any of the checks fail. + | li TMP3, BC_JMP + | li TMP1, BC_ITERC + | sb TMP3, -4+OFS_OP(PC) + | daddu PC, TMP0, TMP2 + |.if JIT + | lb TMP0, OFS_OP(PC) + | li AT, BC_ITERN + | bne TMP0, AT, >6 + |. lhu TMP2, OFS_RD(PC) + |.endif + | b <1 + |. sb TMP1, OFS_OP(PC) + |.if JIT + |6: // Unpatch JLOOP. + | ld TMP0, DISPATCH_J(trace)(DISPATCH) + | sll TMP2, TMP2, 3 + | daddu TMP0, TMP0, TMP2 + | ld TRACE:TMP2, 0(TMP0) + | lw TMP0, TRACE:TMP2->startins + | li AT, -256 + | and TMP0, TMP0, AT + | or TMP0, TMP0, TMP1 + | b <1 + |. sw TMP0, 0(PC) + |.endif + break; + + case BC_VARG: + | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 + | ld TMP0, FRAME_PC(BASE) + | decode_RDtoRC8 RC, RD + | decode_RB8a RB, INS + | daddu RC, BASE, RC + | decode_RB8b RB + | daddu RA, BASE, RA + | daddiu RC, RC, FRAME_VARG + | daddu TMP2, RA, RB + | daddiu TMP3, BASE, -16 // TMP3 = vtop + | dsubu RC, RC, TMP0 // RC = vbase + | // Note: RC may now be even _above_ BASE if nargs was < numparams. + | beqz RB, >5 // Copy all varargs? + |. dsubu TMP1, TMP3, RC + | daddiu TMP2, TMP2, -16 + |1: // Copy vararg slots to destination slots. + | ld CARG1, 0(RC) + | sltu AT, RC, TMP3 + | daddiu RC, RC, 8 + |.if MIPSR6 + | selnez CARG1, CARG1, AT + | seleqz AT, TISNIL, AT + | or CARG1, CARG1, AT + |.else + | movz CARG1, TISNIL, AT + |.endif + | sd CARG1, 0(RA) + | sltu AT, RA, TMP2 + | bnez AT, <1 + |. daddiu RA, RA, 8 + |3: + | ins_next + | + |5: // Copy all varargs. + | ld TMP0, L->maxstack + | blez TMP1, <3 // No vararg slots? + |. li MULTRES, 8 // MULTRES = (0+1)*8 + | daddu TMP2, RA, TMP1 + | sltu AT, TMP0, TMP2 + | bnez AT, >7 + |. daddiu MULTRES, TMP1, 8 + |6: + | ld CRET1, 0(RC) + | daddiu RC, RC, 8 + | sd CRET1, 0(RA) + | sltu AT, RC, TMP3 + | bnez AT, <6 // More vararg slots? + |. daddiu RA, RA, 8 + | b <3 + |. nop + | + |7: // Grow stack for varargs. + | load_got lj_state_growstack + | sd RA, L->top + | dsubu RA, RA, BASE + | sd BASE, L->base + | dsubu BASE, RC, BASE // Need delta, because BASE may change. + | sd PC, SAVE_PC + | srl CARG2, TMP1, 3 + | call_intern lj_state_growstack // (lua_State *L, int n) + |. move CARG1, L + | move RC, BASE + | ld BASE, L->base + | daddu RA, BASE, RA + | daddu RC, BASE, RC + | b <6 + |. daddiu TMP3, BASE, -16 + break; + + /* -- Returns ----------------------------------------------------------- */ + + case BC_RETM: + | // RA = results*8, RD = extra_nresults*8 + | addu RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. + | // Fall through. Assumes BC_RET follows. + break; + + case BC_RET: + | // RA = results*8, RD = (nresults+1)*8 + | ld PC, FRAME_PC(BASE) + | daddu RA, BASE, RA + | move MULTRES, RD + |1: + | andi TMP0, PC, FRAME_TYPE + | bnez TMP0, ->BC_RETV_Z + |. xori TMP1, PC, FRAME_VARG + | + |->BC_RET_Z: + | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return + | lw INS, -4(PC) + | daddiu TMP2, BASE, -16 + | daddiu RC, RD, -8 + | decode_RA8a TMP0, INS + | decode_RB8a RB, INS + | decode_RA8b TMP0 + | decode_RB8b RB + | daddu TMP3, TMP2, RB + | beqz RC, >3 + |. dsubu BASE, TMP2, TMP0 + |2: + | ld CRET1, 0(RA) + | daddiu RA, RA, 8 + | daddiu RC, RC, -8 + | sd CRET1, 0(TMP2) + | bnez RC, <2 + |. daddiu TMP2, TMP2, 8 + |3: + | daddiu TMP3, TMP3, -8 + |5: + | sltu AT, TMP2, TMP3 + | bnez AT, >6 + |. ld LFUNC:TMP1, FRAME_FUNC(BASE) + | ins_next1 + | cleartp LFUNC:TMP1 + | ld TMP1, LFUNC:TMP1->pc + | ld KBASE, PC2PROTO(k)(TMP1) + | ins_next2 + | + |6: // Fill up results with nil. + | sd TISNIL, 0(TMP2) + | b <5 + |. daddiu TMP2, TMP2, 8 + | + |->BC_RETV_Z: // Non-standard return case. + | andi TMP2, TMP1, FRAME_TYPEP + | bnez TMP2, ->vm_return + |. nop + | // Return from vararg function: relocate BASE down. + | dsubu BASE, BASE, TMP1 + | b <1 + |. ld PC, FRAME_PC(BASE) + break; + + case BC_RET0: case BC_RET1: + | // RA = results*8, RD = (nresults+1)*8 + | ld PC, FRAME_PC(BASE) + | daddu RA, BASE, RA + | move MULTRES, RD + | andi TMP0, PC, FRAME_TYPE + | bnez TMP0, ->BC_RETV_Z + |. xori TMP1, PC, FRAME_VARG + | lw INS, -4(PC) + | daddiu TMP2, BASE, -16 + if (op == BC_RET1) { + | ld CRET1, 0(RA) + } + | decode_RB8a RB, INS + | decode_RA8a RA, INS + | decode_RB8b RB + | decode_RA8b RA + | dsubu BASE, TMP2, RA + if (op == BC_RET1) { + | sd CRET1, 0(TMP2) + } + |5: + | sltu AT, RD, RB + | bnez AT, >6 + |. ld TMP1, FRAME_FUNC(BASE) + | ins_next1 + | cleartp LFUNC:TMP1 + | ld TMP1, LFUNC:TMP1->pc + | ld KBASE, PC2PROTO(k)(TMP1) + | ins_next2 + | + |6: // Fill up results with nil. + | daddiu TMP2, TMP2, 8 + | daddiu RD, RD, 8 + | b <5 + if (op == BC_RET1) { + |. sd TISNIL, 0(TMP2) + } else { + |. sd TISNIL, -8(TMP2) + } + break; + + /* -- Loops and branches ------------------------------------------------ */ + + case BC_FORL: + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_IFORL follows. + break; + + case BC_JFORI: + case BC_JFORL: +#if !LJ_HASJIT + break; +#endif + case BC_FORI: + case BC_IFORL: + | // RA = base*8, RD = target (after end of loop or start of loop) + vk = (op == BC_IFORL || op == BC_JFORL); + | daddu RA, BASE, RA + | ld CARG1, FORL_IDX*8(RA) // IDX CARG1 - CARG3 type + | gettp CARG3, CARG1 + if (op != BC_JFORL) { + | srl RD, RD, 1 + | lui TMP2, (-(BCBIAS_J*4 >> 16) & 65535) + | daddu TMP2, RD, TMP2 + } + if (!vk) { + | ld CARG2, FORL_STOP*8(RA) // STOP CARG2 - CARG4 type + | ld CRET1, FORL_STEP*8(RA) // STEP CRET1 - CRET2 type + | gettp CARG4, CARG2 + | bne CARG3, TISNUM, >5 + |. gettp CRET2, CRET1 + | bne CARG4, TISNUM, ->vmeta_for + |. sextw CARG3, CARG1 + | bne CRET2, TISNUM, ->vmeta_for + |. sextw CARG2, CARG2 + | dext AT, CRET1, 31, 0 + | slt CRET1, CARG2, CARG3 + | slt TMP1, CARG3, CARG2 + |.if MIPSR6 + | selnez TMP1, TMP1, AT + | seleqz CRET1, CRET1, AT + | or CRET1, CRET1, TMP1 + |.else + | movn CRET1, TMP1, AT + |.endif + } else { + | bne CARG3, TISNUM, >5 + |. ld CARG2, FORL_STEP*8(RA) // STEP CARG2 - CARG4 type + | ld CRET1, FORL_STOP*8(RA) // STOP CRET1 - CRET2 type + | sextw TMP3, CARG1 + | sextw CARG2, CARG2 + | sextw CRET1, CRET1 + | addu CARG1, TMP3, CARG2 + | xor TMP0, CARG1, TMP3 + | xor TMP1, CARG1, CARG2 + | and TMP0, TMP0, TMP1 + | slt TMP1, CARG1, CRET1 + | slt CRET1, CRET1, CARG1 + | slt AT, CARG2, r0 + | slt TMP0, TMP0, r0 // ((y^a) & (y^b)) < 0: overflow. + |.if MIPSR6 + | selnez TMP1, TMP1, AT + | seleqz CRET1, CRET1, AT + | or CRET1, CRET1, TMP1 + |.else + | movn CRET1, TMP1, AT + |.endif + | or CRET1, CRET1, TMP0 + | zextw CARG1, CARG1 + | settp CARG1, TISNUM + } + |1: + if (op == BC_FORI) { + |.if MIPSR6 + | selnez TMP2, TMP2, CRET1 + |.else + | movz TMP2, r0, CRET1 + |.endif + | daddu PC, PC, TMP2 + } else if (op == BC_JFORI) { + | daddu PC, PC, TMP2 + | lhu RD, -4+OFS_RD(PC) + } else if (op == BC_IFORL) { + |.if MIPSR6 + | seleqz TMP2, TMP2, CRET1 + |.else + | movn TMP2, r0, CRET1 + |.endif + | daddu PC, PC, TMP2 + } + if (vk) { + | sd CARG1, FORL_IDX*8(RA) + } + | ins_next1 + | sd CARG1, FORL_EXT*8(RA) + |2: + if (op == BC_JFORI) { + | beqz CRET1, =>BC_JLOOP + |. decode_RD8b RD + } else if (op == BC_JFORL) { + | beqz CRET1, =>BC_JLOOP + } + | ins_next2 + | + |5: // FP loop. + |.if FPU + if (!vk) { + | ldc1 f0, FORL_IDX*8(RA) + | ldc1 f2, FORL_STOP*8(RA) + | sltiu TMP0, CARG3, LJ_TISNUM + | sltiu TMP1, CARG4, LJ_TISNUM + | sltiu AT, CRET2, LJ_TISNUM + | ld TMP3, FORL_STEP*8(RA) + | and TMP0, TMP0, TMP1 + | and AT, AT, TMP0 + | beqz AT, ->vmeta_for + |. slt TMP3, TMP3, r0 + |.if MIPSR6 + | dmtc1 TMP3, FTMP2 + | cmp.lt.d FTMP0, f0, f2 + | cmp.lt.d FTMP1, f2, f0 + | sel.d FTMP2, FTMP1, FTMP0 + | b <1 + |. dmfc1 CRET1, FTMP2 + |.else + | c.ole.d 0, f0, f2 + | c.ole.d 1, f2, f0 + | li CRET1, 1 + | movt CRET1, r0, 0 + | movt AT, r0, 1 + | b <1 + |. movn CRET1, AT, TMP3 + |.endif + } else { + | ldc1 f0, FORL_IDX*8(RA) + | ldc1 f4, FORL_STEP*8(RA) + | ldc1 f2, FORL_STOP*8(RA) + | ld TMP3, FORL_STEP*8(RA) + | add.d f0, f0, f4 + |.if MIPSR6 + | slt TMP3, TMP3, r0 + | dmtc1 TMP3, FTMP2 + | cmp.lt.d FTMP0, f0, f2 + | cmp.lt.d FTMP1, f2, f0 + | sel.d FTMP2, FTMP1, FTMP0 + | dmfc1 CRET1, FTMP2 + if (op == BC_IFORL) { + | seleqz TMP2, TMP2, CRET1 + | daddu PC, PC, TMP2 + } + |.else + | c.ole.d 0, f0, f2 + | c.ole.d 1, f2, f0 + | slt TMP3, TMP3, r0 + | li CRET1, 1 + | li AT, 1 + | movt CRET1, r0, 0 + | movt AT, r0, 1 + | movn CRET1, AT, TMP3 + if (op == BC_IFORL) { + | movn TMP2, r0, CRET1 + | daddu PC, PC, TMP2 + } + |.endif + | sdc1 f0, FORL_IDX*8(RA) + | ins_next1 + | b <2 + |. sdc1 f0, FORL_EXT*8(RA) + } + |.else + if (!vk) { + | sltiu TMP0, CARG3, LJ_TISNUM + | sltiu TMP1, CARG4, LJ_TISNUM + | sltiu AT, CRET2, LJ_TISNUM + | and TMP0, TMP0, TMP1 + | and AT, AT, TMP0 + | beqz AT, ->vmeta_for + |. nop + | bal ->vm_sfcmpolex + |. lw TMP3, FORL_STEP*8+HI(RA) + | b <1 + |. nop + } else { + | load_got __adddf3 + | call_extern + |. sw TMP2, TMPD + | ld CARG2, FORL_STOP*8(RA) + | move CARG1, CRET1 + if ( op == BC_JFORL ) { + | lhu RD, -4+OFS_RD(PC) + | decode_RD8b RD + } + | bal ->vm_sfcmpolex + |. lw TMP3, FORL_STEP*8+HI(RA) + | b <1 + |. lw TMP2, TMPD + } + |.endif + break; + + case BC_ITERL: + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_IITERL follows. + break; + + case BC_JITERL: +#if !LJ_HASJIT + break; +#endif + case BC_IITERL: + | // RA = base*8, RD = target + | daddu RA, BASE, RA + | ld TMP1, 0(RA) + | beq TMP1, TISNIL, >1 // Stop if iterator returned nil. + |. nop + if (op == BC_JITERL) { + | b =>BC_JLOOP + |. sd TMP1, -8(RA) + } else { + | branch_RD // Otherwise save control var + branch. + | sd TMP1, -8(RA) + } + |1: + | ins_next + break; + + case BC_LOOP: + | // RA = base*8, RD = target (loop extent) + | // Note: RA/RD is only used by trace recorder to determine scope/extent + | // This opcode does NOT jump, it's only purpose is to detect a hot loop. + |.if JIT + | hotloop + |.endif + | // Fall through. Assumes BC_ILOOP follows. + break; + + case BC_ILOOP: + | // RA = base*8, RD = target (loop extent) + | ins_next + break; + + case BC_JLOOP: + |.if JIT + | // RA = base*8 (ignored), RD = traceno*8 + | ld TMP1, DISPATCH_J(trace)(DISPATCH) + | li AT, 0 + | daddu TMP1, TMP1, RD + | // Traces on MIPS don't store the trace number, so use 0. + | sd AT, DISPATCH_GL(vmstate)(DISPATCH) + | ld TRACE:TMP2, 0(TMP1) + | sd BASE, DISPATCH_GL(jit_base)(DISPATCH) + | ld TMP2, TRACE:TMP2->mcode + | sd L, DISPATCH_GL(tmpbuf.L)(DISPATCH) + | jr TMP2 + |. daddiu JGL, DISPATCH, GG_DISP2G+32768 + |.endif + break; + + case BC_JMP: + | // RA = base*8 (only used by trace recorder), RD = target + | branch_RD + | ins_next + break; + + /* -- Function headers -------------------------------------------------- */ + + case BC_FUNCF: + |.if JIT + | hotcall + |.endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. + break; + + case BC_JFUNCF: +#if !LJ_HASJIT + break; +#endif + case BC_IFUNCF: + | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 + | ld TMP2, L->maxstack + | lbu TMP1, -4+PC2PROTO(numparams)(PC) + | ld KBASE, -4+PC2PROTO(k)(PC) + | sltu AT, TMP2, RA + | bnez AT, ->vm_growstack_l + |. sll TMP1, TMP1, 3 + if (op != BC_JFUNCF) { + | ins_next1 + } + |2: + | sltu AT, NARGS8:RC, TMP1 // Check for missing parameters. + | bnez AT, >3 + |. daddu AT, BASE, NARGS8:RC + if (op == BC_JFUNCF) { + | decode_RD8a RD, INS + | b =>BC_JLOOP + |. decode_RD8b RD + } else { + | ins_next2 + } + | + |3: // Clear missing parameters. + | sd TISNIL, 0(AT) + | b <2 + |. addiu NARGS8:RC, NARGS8:RC, 8 + break; + + case BC_JFUNCV: +#if !LJ_HASJIT + break; +#endif + | NYI // NYI: compiled vararg functions + break; /* NYI: compiled vararg functions. */ + + case BC_IFUNCV: + | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 + | li TMP0, LJ_TFUNC + | daddu TMP1, BASE, RC + | ld TMP2, L->maxstack + | settp LFUNC:RB, TMP0 + | daddu TMP0, RA, RC + | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. + | daddiu TMP3, RC, 16+FRAME_VARG + | sltu AT, TMP0, TMP2 + | ld KBASE, -4+PC2PROTO(k)(PC) + | beqz AT, ->vm_growstack_l + |. sd TMP3, 8(TMP1) // Store delta + FRAME_VARG. + | lbu TMP2, -4+PC2PROTO(numparams)(PC) + | move RA, BASE + | move RC, TMP1 + | ins_next1 + | beqz TMP2, >3 + |. daddiu BASE, TMP1, 16 + |1: + | ld TMP0, 0(RA) + | sltu AT, RA, RC // Less args than parameters? + | move CARG1, TMP0 + |.if MIPSR6 + | selnez TMP0, TMP0, AT + | seleqz TMP3, TISNIL, AT + | or TMP0, TMP0, TMP3 + | seleqz TMP3, CARG1, AT + | selnez CARG1, TISNIL, AT + | or CARG1, CARG1, TMP3 + |.else + | movz TMP0, TISNIL, AT // Clear missing parameters. + | movn CARG1, TISNIL, AT // Clear old fixarg slot (help the GC). + |.endif + | addiu TMP2, TMP2, -1 + | sd TMP0, 16(TMP1) + | daddiu TMP1, TMP1, 8 + | sd CARG1, 0(RA) + | bnez TMP2, <1 + |. daddiu RA, RA, 8 + |3: + | ins_next2 + break; + + case BC_FUNCC: + case BC_FUNCCW: + | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 + if (op == BC_FUNCC) { + | ld CFUNCADDR, CFUNC:RB->f + } else { + | ld CFUNCADDR, DISPATCH_GL(wrapf)(DISPATCH) + } + | daddu TMP1, RA, NARGS8:RC + | ld TMP2, L->maxstack + | daddu RC, BASE, NARGS8:RC + | sd BASE, L->base + | sltu AT, TMP2, TMP1 + | sd RC, L->top + | li_vmstate C + if (op == BC_FUNCCW) { + | ld CARG2, CFUNC:RB->f + } + | bnez AT, ->vm_growstack_c // Need to grow stack. + |. move CARG1, L + | jalr CFUNCADDR // (lua_State *L [, lua_CFunction f]) + |. st_vmstate + | // Returns nresults. + | ld BASE, L->base + | sll RD, CRET1, 3 + | ld TMP1, L->top + | li_vmstate INTERP + | ld PC, FRAME_PC(BASE) // Fetch PC of caller. + | dsubu RA, TMP1, RD // RA = L->top - nresults*8 + | sd L, DISPATCH_GL(cur_L)(DISPATCH) + | b ->vm_returnc + |. st_vmstate + break; + + /* ---------------------------------------------------------------------- */ + + default: + fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); + exit(2); + break; + } +} + +static int build_backend(BuildCtx *ctx) +{ + int op; + + dasm_growpc(Dst, BC__MAX); + + build_subroutines(ctx); + + |.code_op + for (op = 0; op < BC__MAX; op++) + build_ins(ctx, (BCOp)op, op); + + return BC__MAX; +} + +/* Emit pseudo frame-info for all assembler functions. */ +static void emit_asm_debug(BuildCtx *ctx) +{ + int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); + int i; + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); + fprintf(ctx->fp, + ".Lframe0:\n" + "\t.4byte .LECIE0-.LSCIE0\n" + ".LSCIE0:\n" + "\t.4byte 0xffffffff\n" + "\t.byte 0x1\n" + "\t.string \"\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -4\n" + "\t.byte 31\n" + "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" + "\t.align 2\n" + ".LECIE0:\n\n"); + fprintf(ctx->fp, + ".LSFDE0:\n" + "\t.4byte .LEFDE0-.LASFDE0\n" + ".LASFDE0:\n" + "\t.4byte .Lframe0\n" + "\t.8byte .Lbegin\n" + "\t.8byte %d\n" + "\t.byte 0xe\n\t.uleb128 %d\n" + "\t.byte 0x9f\n\t.sleb128 2*5\n" + "\t.byte 0x9e\n\t.sleb128 2*6\n", + fcofs, CFRAME_SIZE); + for (i = 23; i >= 16; i--) + fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(30-i)); +#if !LJ_SOFTFP + for (i = 31; i >= 24; i--) + fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(46-i)); +#endif + fprintf(ctx->fp, + "\t.align 2\n" + ".LEFDE0:\n\n"); +#if LJ_HASFFI + fprintf(ctx->fp, + ".LSFDE1:\n" + "\t.4byte .LEFDE1-.LASFDE1\n" + ".LASFDE1:\n" + "\t.4byte .Lframe0\n" + "\t.4byte lj_vm_ffi_call\n" + "\t.4byte %d\n" + "\t.byte 0x9f\n\t.uleb128 2*1\n" + "\t.byte 0x90\n\t.uleb128 2*2\n" + "\t.byte 0xd\n\t.uleb128 0x10\n" + "\t.align 2\n" + ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); +#endif +#if !LJ_NO_UNWIND + /* NYI */ +#endif + break; + default: + break; + } +} + diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc index 48d0ed0f..3cad37d2 100644 --- a/src/vm_ppc.dasc +++ b/src/vm_ppc.dasc @@ -1,4 +1,4 @@ -|// Low-level VM code for PowerPC CPUs. +|// Low-level VM code for PowerPC 32 bit or 32on64 bit mode. |// Bytecode interpreter, fast functions and helper functions. |// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h | @@ -18,7 +18,6 @@ |// DynASM defines used by the PPC port: |// |// P64 64 bit pointers (only for GPR64 testing). -|// Note: a full PPC64 _LP64 port is not planned. |// GPR64 64 bit registers (but possibly 32 bit pointers, e.g. PS3). |// Affects reg saves, stack layout, carry/overflow/dot flags etc. |// FRAME32 Use 32 bit frame layout, even with GPR64 (Xbox 360). @@ -103,6 +102,18 @@ |// Fixed register assignments for the interpreter. |// Don't use: r1 = sp, r2 and r13 = reserved (TOC, TLS or SDATA) | +|.macro .FPU, a, b +|.if FPU +| a, b +|.endif +|.endmacro +| +|.macro .FPU, a, b, c +|.if FPU +| a, b, c +|.endif +|.endmacro +| |// The following must be C callee-save (but BASE is often refetched). |.define BASE, r14 // Base of current Lua stack frame. |.define KBASE, r15 // Constants of current Lua function. @@ -116,8 +127,10 @@ |.define TISNUM, r22 |.define TISNIL, r23 |.define ZERO, r24 +|.if FPU |.define TOBIT, f30 // 2^52 + 2^51. |.define TONUM, f31 // 2^52 + 2^51 + 2^31. +|.endif | |// The following temporaries are not saved across C calls, except for RA. |.define RA, r20 // Callee-save. @@ -133,6 +146,7 @@ | |// Saved temporaries. |.define SAVE0, r21 +|.define SAVE1, r25 | |// Calling conventions. |.define CARG1, r3 @@ -141,8 +155,10 @@ |.define CARG4, r6 // Overlaps TMP3. |.define CARG5, r7 // Overlaps INS. | +|.if FPU |.define FARG1, f1 |.define FARG2, f2 +|.endif | |.define CRET1, r3 |.define CRET2, r4 @@ -213,10 +229,16 @@ |.endif |.else | +|.if FPU |.define SAVE_LR, 276(sp) |.define CFRAME_SPACE, 272 // Delta for sp. |// Back chain for sp: 272(sp) <-- sp entering interpreter |.define SAVE_FPR_, 128 // .. 128+18*8: 64 bit FPR saves. +|.else +|.define SAVE_LR, 132(sp) +|.define CFRAME_SPACE, 128 // Delta for sp. +|// Back chain for sp: 128(sp) <-- sp entering interpreter +|.endif |.define SAVE_GPR_, 56 // .. 56+18*4: 32 bit GPR saves. |.define SAVE_CR, 52(sp) // 32 bit CR save. |.define SAVE_ERRF, 48(sp) // 32 bit C frame info. @@ -226,16 +248,25 @@ |.define SAVE_PC, 32(sp) |.define SAVE_MULTRES, 28(sp) |.define UNUSED1, 24(sp) +|.if FPU |.define TMPD_LO, 20(sp) |.define TMPD_HI, 16(sp) |.define TONUM_LO, 12(sp) |.define TONUM_HI, 8(sp) +|.else +|.define SFSAVE_4, 20(sp) +|.define SFSAVE_3, 16(sp) +|.define SFSAVE_2, 12(sp) +|.define SFSAVE_1, 8(sp) +|.endif |// Next frame lr: 4(sp) |// Back chain for sp: 0(sp) <-- sp while in interpreter | +|.if FPU |.define TMPD_BLO, 23(sp) |.define TMPD, TMPD_HI |.define TONUM_D, TONUM_HI +|.endif | |.endif | @@ -245,7 +276,7 @@ |.else | stw r..reg, SAVE_GPR_+(reg-14)*4(sp) |.endif -| stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) +| .FPU stfd f..reg, SAVE_FPR_+(reg-14)*8(sp) |.endmacro |.macro rest_, reg |.if GPR64 @@ -253,7 +284,7 @@ |.else | lwz r..reg, SAVE_GPR_+(reg-14)*4(sp) |.endif -| lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) +| .FPU lfd f..reg, SAVE_FPR_+(reg-14)*8(sp) |.endmacro | |.macro saveregs @@ -316,19 +347,14 @@ |.type NODE, Node |.type NARGS8, int |.type TRACE, GCtrace +|.type SBUF, SBuf | |//----------------------------------------------------------------------- | -|// These basic macros should really be part of DynASM. -|.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro -|.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro -|.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro -|.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro -|.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro -| |// Trap for not-yet-implemented parts. |.macro NYI; tw 4, sp, sp; .endmacro | +|.if FPU |// int/FP conversions. |.macro tonum_i, freg, reg | xoris reg, reg, 0x8000 @@ -352,6 +378,7 @@ |.macro toint, reg, freg | toint reg, freg, freg |.endmacro +|.endif | |//----------------------------------------------------------------------- | @@ -539,9 +566,19 @@ static void build_subroutines(BuildCtx *ctx) | beq >2 |1: | addic. TMP1, TMP1, -8 + |.if FPU | lfd f0, 0(RA) + |.else + | lwz CARG1, 0(RA) + | lwz CARG2, 4(RA) + |.endif | addi RA, RA, 8 + |.if FPU | stfd f0, 0(BASE) + |.else + | stw CARG1, 0(BASE) + | stw CARG2, 4(BASE) + |.endif | addi BASE, BASE, 8 | bney <1 | @@ -619,23 +656,23 @@ static void build_subroutines(BuildCtx *ctx) | .toc ld TOCREG, SAVE_TOC | li TISNUM, LJ_TISNUM // Setup type comparison constants. | lp BASE, L->base - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | lwz DISPATCH, L->glref // Setup pointer to dispatch table. | li ZERO, 0 - | stw TMP3, TMPD + | .FPU stw TMP3, TMPD | li TMP1, LJ_TFALSE - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). | li TISNIL, LJ_TNIL | li_vmstate INTERP - | lfs TOBIT, TMPD + | .FPU lfs TOBIT, TMPD | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. | la RA, -8(BASE) // Results start at BASE-8. - | stw TMP3, TMPD + | .FPU stw TMP3, TMPD | addi DISPATCH, DISPATCH, GG_G2DISP | stw TMP1, 0(RA) // Prepend false to error message. | li RD, 16 // 2 results: false + error message. | st_vmstate - | lfs TONUM, TMPD + | .FPU lfs TONUM, TMPD | b ->vm_returnc | |//----------------------------------------------------------------------- @@ -684,33 +721,34 @@ static void build_subroutines(BuildCtx *ctx) | stw CARG3, SAVE_NRES | cmplwi TMP1, 0 | stw CARG3, SAVE_ERRF - | stp TMP0, L->cframe | stp CARG3, SAVE_CFRAME | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. + | stp TMP0, L->cframe | beq >3 | | // Resume after yield (like a return). + | stw L, DISPATCH_GL(cur_L)(DISPATCH) | mr RA, BASE | lp BASE, L->base | li TISNUM, LJ_TISNUM // Setup type comparison constants. | lp TMP1, L->top | lwz PC, FRAME_PC(BASE) - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | stb CARG3, L->status - | stw TMP3, TMPD - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | lfs TOBIT, TMPD + | .FPU stw TMP3, TMPD + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). + | .FPU lfs TOBIT, TMPD | sub RD, TMP1, BASE - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) + | .FPU stw TMP3, TMPD + | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) | addi RD, RD, 8 - | stw TMP0, TONUM_HI + | .FPU stw TMP0, TONUM_HI | li_vmstate INTERP | li ZERO, 0 | st_vmstate | andix. TMP0, PC, FRAME_TYPE | mr MULTRES, RD - | lfs TONUM, TMPD + | .FPU lfs TONUM, TMPD | li TISNIL, LJ_TNIL | beq ->BC_RET_Z | b ->vm_return @@ -729,33 +767,34 @@ static void build_subroutines(BuildCtx *ctx) | |1: // Entry point for vm_pcall above (PC = ftype). | lp TMP1, L:CARG1->cframe - | stw CARG3, SAVE_NRES | mr L, CARG1 - | stw CARG1, SAVE_L - | mr BASE, CARG2 - | stp sp, L->cframe // Add our C frame to cframe chain. + | stw CARG3, SAVE_NRES | lwz DISPATCH, L->glref // Setup pointer to dispatch table. + | stw CARG1, SAVE_L + | mr BASE, CARG2 + | addi DISPATCH, DISPATCH, GG_G2DISP | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. | stp TMP1, SAVE_CFRAME - | addi DISPATCH, DISPATCH, GG_G2DISP + | stp sp, L->cframe // Add our C frame to cframe chain. | |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). + | stw L, DISPATCH_GL(cur_L)(DISPATCH) | lp TMP2, L->base // TMP2 = old base (used in vmeta_call). | li TISNUM, LJ_TISNUM // Setup type comparison constants. | lp TMP1, L->top - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | add PC, PC, BASE - | stw TMP3, TMPD + | .FPU stw TMP3, TMPD | li ZERO, 0 - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | lfs TOBIT, TMPD + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). + | .FPU lfs TOBIT, TMPD | sub PC, PC, TMP2 // PC = frame delta + frame type - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) + | .FPU stw TMP3, TMPD + | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) | sub NARGS8:RC, TMP1, BASE - | stw TMP0, TONUM_HI + | .FPU stw TMP0, TONUM_HI | li_vmstate INTERP - | lfs TONUM, TMPD + | .FPU lfs TONUM, TMPD | li TISNIL, LJ_TNIL | st_vmstate | @@ -776,15 +815,18 @@ static void build_subroutines(BuildCtx *ctx) | lwz TMP0, L:CARG1->stack | stw CARG1, SAVE_L | lp TMP1, L->top + | lwz DISPATCH, L->glref // Setup pointer to dispatch table. | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). | lp TMP1, L->cframe - | stp sp, L->cframe // Add our C frame to cframe chain. + | addi DISPATCH, DISPATCH, GG_G2DISP | .toc lp CARG4, 0(CARG4) | li TMP2, 0 | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. | stw TMP2, SAVE_ERRF // No error function. | stp TMP1, SAVE_CFRAME + | stp sp, L->cframe // Add our C frame to cframe chain. + | stw L, DISPATCH_GL(cur_L)(DISPATCH) | mtctr CARG4 | bctrl // (lua_State *L, lua_CFunction func, void *ud) |.if PPE @@ -793,9 +835,7 @@ static void build_subroutines(BuildCtx *ctx) |.else | mr. BASE, CRET1 |.endif - | lwz DISPATCH, L->glref // Setup pointer to dispatch table. - | li PC, FRAME_CP - | addi DISPATCH, DISPATCH, GG_G2DISP + | li PC, FRAME_CP | bne <3 // Else continue with the call. | b ->vm_leave_cp // No base? Just remove C frame. | @@ -842,15 +882,30 @@ static void build_subroutines(BuildCtx *ctx) | lwz INS, -4(PC) | subi CARG2, RB, 16 | decode_RB8 SAVE0, INS + |.if FPU | lfd f0, 0(RA) + |.else + | lwz TMP2, 0(RA) + | lwz TMP3, 4(RA) + |.endif | add TMP1, BASE, SAVE0 | stp BASE, L->base | cmplw TMP1, CARG2 | sub CARG3, CARG2, TMP1 | decode_RA8 RA, INS + |.if FPU | stfd f0, 0(CARG2) + |.else + | stw TMP2, 0(CARG2) + | stw TMP3, 4(CARG2) + |.endif | bney ->BC_CAT_Z + |.if FPU | stfdx f0, BASE, RA + |.else + | stwux TMP2, RA, BASE + | stw TMP3, 4(RA) + |.endif | b ->cont_nop | |//-- Table indexing metamethods ----------------------------------------- @@ -903,9 +958,19 @@ static void build_subroutines(BuildCtx *ctx) | // Returns TValue * (finished) or NULL (metamethod). | cmplwi CRET1, 0 | beq >3 + |.if FPU | lfd f0, 0(CRET1) + |.else + | lwz TMP0, 0(CRET1) + | lwz TMP1, 4(CRET1) + |.endif | ins_next1 + |.if FPU | stfdx f0, BASE, RA + |.else + | stwux TMP0, RA, BASE + | stw TMP1, 4(RA) + |.endif | ins_next2 | |3: // Call __index metamethod. @@ -918,6 +983,22 @@ static void build_subroutines(BuildCtx *ctx) | li NARGS8:RC, 16 // 2 args for func(t, k). | b ->vm_call_dispatch_f | + |->vmeta_tgetr: + | bl extern lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | cmplwi CRET1, 0 + | beq >1 + |.if FPU + | lfd f14, 0(CRET1) + |.else + | lwz SAVE0, 0(CRET1) + | lwz SAVE1, 4(CRET1) + |.endif + | b ->BC_TGETR_Z + |1: + | stwx TISNIL, BASE, RA + | b ->cont_nop + | |//----------------------------------------------------------------------- | |->vmeta_tsets1: @@ -967,11 +1048,21 @@ static void build_subroutines(BuildCtx *ctx) | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) | // Returns TValue * (finished) or NULL (metamethod). | cmplwi CRET1, 0 + |.if FPU | lfdx f0, BASE, RA + |.else + | lwzux TMP2, RA, BASE + | lwz TMP3, 4(RA) + |.endif | beq >3 | // NOBARRIER: lj_meta_tset ensures the table is not black. | ins_next1 + |.if FPU | stfd f0, 0(CRET1) + |.else + | stw TMP2, 0(CRET1) + | stw TMP3, 4(CRET1) + |.endif | ins_next2 | |3: // Call __newindex metamethod. @@ -982,9 +1073,28 @@ static void build_subroutines(BuildCtx *ctx) | add PC, TMP1, BASE | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. | li NARGS8:RC, 24 // 3 args for func(t, k, v) + |.if FPU | stfd f0, 16(BASE) // Copy value to third argument. + |.else + | stw TMP2, 16(BASE) + | stw TMP3, 20(BASE) + |.endif | b ->vm_call_dispatch_f | + |->vmeta_tsetr: + | stp BASE, L->base + | mr CARG1, L + | stw PC, SAVE_PC + | bl extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + | // Returns TValue *. + |.if FPU + | stfd f14, 0(CRET1) + |.else + | stw SAVE0, 0(CRET1) + | stw SAVE1, 4(CRET1) + |.endif + | b ->cont_nop + | |//-- Comparison metamethods --------------------------------------------- | |->vmeta_comp: @@ -1021,9 +1131,19 @@ static void build_subroutines(BuildCtx *ctx) | |->cont_ra: // RA = resultptr | lwz INS, -4(PC) + |.if FPU | lfd f0, 0(RA) + |.else + | lwz CARG1, 0(RA) + | lwz CARG2, 4(RA) + |.endif | decode_RA8 TMP1, INS + |.if FPU | stfdx f0, BASE, TMP1 + |.else + | stwux CARG1, TMP1, BASE + | stw CARG2, 4(TMP1) + |.endif | b ->cont_nop | |->cont_condt: // RA = resultptr @@ -1063,6 +1183,16 @@ static void build_subroutines(BuildCtx *ctx) | b <3 |.endif | + |->vmeta_istype: + | subi PC, PC, 4 + | stp BASE, L->base + | srwi CARG2, RA, 3 + | mr CARG1, L + | srwi CARG3, RD, 3 + | stw PC, SAVE_PC + | bl extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) + | b ->cont_nop + | |//-- Arithmetic metamethods --------------------------------------------- | |->vmeta_arith_nv: @@ -1219,22 +1349,32 @@ static void build_subroutines(BuildCtx *ctx) |.macro .ffunc_n, name |->ff_ .. name: | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) + | lwz CARG1, 0(BASE) + |.if FPU | lfd FARG1, 0(BASE) + |.else + | lwz CARG2, 4(BASE) + |.endif | blt ->fff_fallback - | checknum CARG3; bge ->fff_fallback + | checknum CARG1; bge ->fff_fallback |.endmacro | |.macro .ffunc_nn, name |->ff_ .. name: | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) + | lwz CARG1, 0(BASE) + |.if FPU | lfd FARG1, 0(BASE) - | lwz CARG4, 8(BASE) + | lwz CARG3, 8(BASE) | lfd FARG2, 8(BASE) + |.else + | lwz CARG2, 4(BASE) + | lwz CARG3, 8(BASE) + | lwz CARG4, 12(BASE) + |.endif | blt ->fff_fallback + | checknum CARG1; bge ->fff_fallback | checknum CARG3; bge ->fff_fallback - | checknum CARG4; bge ->fff_fallback |.endmacro | |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. @@ -1255,14 +1395,21 @@ static void build_subroutines(BuildCtx *ctx) | bge cr1, ->fff_fallback | stw CARG3, 0(RA) | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. + | addi TMP1, BASE, 8 + | add TMP2, RA, NARGS8:RC | stw CARG1, 4(RA) | beq ->fff_res // Done if exactly 1 argument. - | li TMP1, 8 - | subi RC, RC, 8 |1: - | cmplw TMP1, RC - | lfdx f0, BASE, TMP1 - | stfdx f0, RA, TMP1 + | cmplw TMP1, TMP2 + |.if FPU + | lfd f0, 0(TMP1) + | stfd f0, 0(TMP1) + |.else + | lwz CARG1, 0(TMP1) + | lwz CARG2, 4(TMP1) + | stw CARG1, -8(TMP1) + | stw CARG2, -4(TMP1) + |.endif | addi TMP1, TMP1, 8 | bney <1 | b ->fff_res @@ -1277,8 +1424,14 @@ static void build_subroutines(BuildCtx *ctx) | orc TMP1, TMP2, TMP0 | addi TMP1, TMP1, ~LJ_TISNUM+1 | slwi TMP1, TMP1, 3 + |.if FPU | la TMP2, CFUNC:RB->upvalue | lfdx FARG1, TMP2, TMP1 + |.else + | add TMP1, CFUNC:RB, TMP1 + | lwz CARG1, CFUNC:TMP1->upvalue[0].u32.hi + | lwz CARG2, CFUNC:TMP1->upvalue[0].u32.lo + |.endif | b ->fff_resn | |//-- Base library: getters and setters --------------------------------- @@ -1294,9 +1447,9 @@ static void build_subroutines(BuildCtx *ctx) | beq ->fff_restv | lwz TMP0, TAB:CARG1->hmask | li CARG3, LJ_TTAB // Use metatable as default result. - | lwz TMP1, STR:RC->hash + | lwz TMP1, STR:RC->sid | lwz NODE:TMP2, TAB:CARG1->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | slwi TMP0, TMP1, 5 | slwi TMP1, TMP1, 3 | sub TMP1, TMP0, TMP1 @@ -1356,7 +1509,12 @@ static void build_subroutines(BuildCtx *ctx) | mr CARG1, L | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) | // Returns cTValue *. + |.if FPU | lfd FARG1, 0(CRET1) + |.else + | lwz CARG2, 4(CRET1) + | lwz CARG1, 0(CRET1) // Caveat: CARG1 == CRET1. + |.endif | b ->fff_resn | |//-- Base library: conversions ------------------------------------------ @@ -1365,7 +1523,11 @@ static void build_subroutines(BuildCtx *ctx) | // Only handles the number case inline (without a base argument). | cmplwi NARGS8:RC, 8 | lwz CARG1, 0(BASE) + |.if FPU | lfd FARG1, 0(BASE) + |.else + | lwz CARG2, 4(BASE) + |.endif | bne ->fff_fallback // Exactly one argument. | checknum CARG1; bgt ->fff_fallback | b ->fff_resn @@ -1387,9 +1549,9 @@ static void build_subroutines(BuildCtx *ctx) | mr CARG1, L | mr CARG2, BASE |.if DUALNUM - | bl extern lj_str_fromnumber // (lua_State *L, cTValue *o) + | bl extern lj_strfmt_number // (lua_State *L, cTValue *o) |.else - | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np) + | bl extern lj_strfmt_num // (lua_State *L, lua_Number *np) |.endif | // Returns GCstr *. | li CARG3, LJ_TSTR @@ -1397,32 +1559,24 @@ static void build_subroutines(BuildCtx *ctx) | |//-- Base library: iterators ------------------------------------------- | - |.ffunc next - | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) - | lwz TAB:CARG2, 4(BASE) - | blt ->fff_fallback + |.ffunc_1 next | stwx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil. - | checktab CARG1 + | checktab CARG3 | lwz PC, FRAME_PC(BASE) | bne ->fff_fallback - | stp BASE, L->base // Add frame since C call can throw. - | mr CARG1, L - | stp BASE, L->top // Dummy frame length is ok. - | la CARG3, 8(BASE) - | stw PC, SAVE_PC - | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Returns 0 at end of traversal. - | cmplwi CRET1, 0 - | li CARG3, LJ_TNIL - | beq ->fff_restv // End of traversal: return nil. - | lfd f0, 8(BASE) // Copy key and value to results. + | la CARG2, 8(BASE) + | la CARG3, -8(BASE) + | bl extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // Returns 1=found, 0=end, -1=error. + | cmpwi CRET1, 0 | la RA, -8(BASE) - | lfd f1, 16(BASE) - | stfd f0, 0(RA) | li RD, (2+1)*8 - | stfd f1, 8(RA) - | b ->fff_res + | bgt ->fff_res // Found key/value. + | li CARG3, LJ_TNIL + | beq ->fff_restv // End of traversal: return nil. + | lwz CFUNC:RB, FRAME_FUNC(BASE) + | li NARGS8:RC, 2*8 + | b ->fff_fallback // Invalid key. | |.ffunc_1 pairs | checktab CARG3 @@ -1430,17 +1584,32 @@ static void build_subroutines(BuildCtx *ctx) | bne ->fff_fallback #if LJ_52 | lwz TAB:TMP2, TAB:CARG1->metatable + |.if FPU | lfd f0, CFUNC:RB->upvalue[0] + |.else + | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi + | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo + |.endif | cmplwi TAB:TMP2, 0 | la RA, -8(BASE) | bne ->fff_fallback #else + |.if FPU | lfd f0, CFUNC:RB->upvalue[0] + |.else + | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi + | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo + |.endif | la RA, -8(BASE) #endif | stw TISNIL, 8(BASE) | li RD, (3+1)*8 + |.if FPU | stfd f0, 0(RA) + |.else + | stw TMP0, 0(RA) + | stw TMP1, 4(RA) + |.endif | b ->fff_res | |.ffunc ipairs_aux @@ -1486,14 +1655,24 @@ static void build_subroutines(BuildCtx *ctx) | stfd FARG2, 0(RA) |.endif | ble >2 // Not in array part? + |.if FPU | lwzx TMP2, TMP1, TMP3 | lfdx f0, TMP1, TMP3 + |.else + | lwzux TMP2, TMP1, TMP3 + | lwz TMP3, 4(TMP1) + |.endif |1: | checknil TMP2 | li RD, (0+1)*8 | beq ->fff_res // End of iteration, return 0 results. | li RD, (2+1)*8 + |.if FPU | stfd f0, 8(RA) + |.else + | stw TMP2, 8(RA) + | stw TMP3, 12(RA) + |.endif | b ->fff_res |2: // Check for empty hash part first. Otherwise call C function. | lwz TMP0, TAB:CARG1->hmask @@ -1507,7 +1686,11 @@ static void build_subroutines(BuildCtx *ctx) | li RD, (0+1)*8 | beq ->fff_res | lwz TMP2, 0(CRET1) + |.if FPU | lfd f0, 0(CRET1) + |.else + | lwz TMP3, 4(CRET1) + |.endif | b <1 | |.ffunc_1 ipairs @@ -1516,12 +1699,22 @@ static void build_subroutines(BuildCtx *ctx) | bne ->fff_fallback #if LJ_52 | lwz TAB:TMP2, TAB:CARG1->metatable + |.if FPU | lfd f0, CFUNC:RB->upvalue[0] + |.else + | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi + | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo + |.endif | cmplwi TAB:TMP2, 0 | la RA, -8(BASE) | bne ->fff_fallback #else + |.if FPU | lfd f0, CFUNC:RB->upvalue[0] + |.else + | lwz TMP0, CFUNC:RB->upvalue[0].u32.hi + | lwz TMP1, CFUNC:RB->upvalue[0].u32.lo + |.endif | la RA, -8(BASE) #endif |.if DUALNUM @@ -1531,7 +1724,12 @@ static void build_subroutines(BuildCtx *ctx) |.endif | stw ZERO, 12(BASE) | li RD, (3+1)*8 + |.if FPU | stfd f0, 0(RA) + |.else + | stw TMP0, 0(RA) + | stw TMP1, 4(RA) + |.endif | b ->fff_res | |//-- Base library: catch errors ---------------------------------------- @@ -1550,19 +1748,32 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc xpcall | cmplwi NARGS8:RC, 16 - | lwz CARG4, 8(BASE) + | lwz CARG3, 8(BASE) + |.if FPU | lfd FARG2, 8(BASE) | lfd FARG1, 0(BASE) + |.else + | lwz CARG1, 0(BASE) + | lwz CARG2, 4(BASE) + | lwz CARG4, 12(BASE) + |.endif | blt ->fff_fallback | lbz TMP1, DISPATCH_GL(hookmask)(DISPATCH) | mr TMP2, BASE - | checkfunc CARG4; bne ->fff_fallback // Traceback must be a function. + | checkfunc CARG3; bne ->fff_fallback // Traceback must be a function. | la BASE, 16(BASE) | // Remember active hook before pcall. | rlwinm TMP1, TMP1, 32-HOOK_ACTIVE_SHIFT, 31, 31 + |.if FPU | stfd FARG2, 0(TMP2) // Swap function and traceback. - | subi NARGS8:RC, NARGS8:RC, 16 | stfd FARG1, 8(TMP2) + |.else + | stw CARG3, 0(TMP2) + | stw CARG4, 4(TMP2) + | stw CARG1, 8(TMP2) + | stw CARG2, 12(TMP2) + |.endif + | subi NARGS8:RC, NARGS8:RC, 16 | addi PC, TMP1, 16+FRAME_PCALL | b ->vm_call_dispatch | @@ -1605,9 +1816,21 @@ static void build_subroutines(BuildCtx *ctx) | stp BASE, L->top |2: // Move args to coroutine. | cmpw TMP1, NARGS8:RC + |.if FPU | lfdx f0, BASE, TMP1 + |.else + | add CARG3, BASE, TMP1 + | lwz TMP2, 0(CARG3) + | lwz TMP3, 4(CARG3) + |.endif | beq >3 + |.if FPU | stfdx f0, CARG2, TMP1 + |.else + | add CARG3, CARG2, TMP1 + | stw TMP2, 0(CARG3) + | stw TMP3, 4(CARG3) + |.endif | addi TMP1, TMP1, 8 | b <2 |3: @@ -1622,6 +1845,7 @@ static void build_subroutines(BuildCtx *ctx) | lp TMP3, L:SAVE0->top | li_vmstate INTERP | lp BASE, L->base + | stw L, DISPATCH_GL(cur_L)(DISPATCH) | st_vmstate | bgt >8 | sub RD, TMP3, TMP2 @@ -1637,8 +1861,17 @@ static void build_subroutines(BuildCtx *ctx) | stp TMP2, L:SAVE0->top // Clear coroutine stack. |5: // Move results from coroutine. | cmplw TMP1, TMP3 + |.if FPU | lfdx f0, TMP2, TMP1 | stfdx f0, BASE, TMP1 + |.else + | add CARG3, TMP2, TMP1 + | lwz CARG1, 0(CARG3) + | lwz CARG2, 4(CARG3) + | add CARG3, BASE, TMP1 + | stw CARG1, 0(CARG3) + | stw CARG2, 4(CARG3) + |.endif | addi TMP1, TMP1, 8 | bne <5 |6: @@ -1663,12 +1896,22 @@ static void build_subroutines(BuildCtx *ctx) | andix. TMP0, PC, FRAME_TYPE | la TMP3, -8(TMP3) | li TMP1, LJ_TFALSE + |.if FPU | lfd f0, 0(TMP3) + |.else + | lwz CARG1, 0(TMP3) + | lwz CARG2, 4(TMP3) + |.endif | stp TMP3, L:SAVE0->top // Remove error from coroutine stack. | li RD, (2+1)*8 | stw TMP1, -8(BASE) // Prepend false to results. | la RA, -8(BASE) + |.if FPU | stfd f0, 0(BASE) // Copy error message. + |.else + | stw CARG1, 0(BASE) // Copy error message. + | stw CARG2, 4(BASE) + |.endif | b <7 |.else | mr CARG1, L @@ -1847,7 +2090,12 @@ static void build_subroutines(BuildCtx *ctx) | lus CARG1, 0x8000 // -(2^31). | beqy ->fff_resi |5: + |.if FPU | lfd FARG1, 0(BASE) + |.else + | lwz CARG1, 0(BASE) + | lwz CARG2, 4(BASE) + |.endif | blex func | b ->fff_resn |.endmacro @@ -1871,10 +2119,14 @@ static void build_subroutines(BuildCtx *ctx) | |.ffunc math_log | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lfd FARG1, 0(BASE) + | lwz CARG1, 0(BASE) | bne ->fff_fallback // Need exactly 1 argument. - | checknum CARG3; bge ->fff_fallback + | checknum CARG1; bge ->fff_fallback + |.if FPU + | lfd FARG1, 0(BASE) + |.else + | lwz CARG2, 4(BASE) + |.endif | blex log | b ->fff_resn | @@ -1893,26 +2145,27 @@ static void build_subroutines(BuildCtx *ctx) | math_extern2 atan2 | math_extern2 fmod | - |->ff_math_deg: - |.ffunc_n math_rad - | lfd FARG2, CFUNC:RB->upvalue[0] - | fmul FARG1, FARG1, FARG2 - | b ->fff_resn - | |.if DUALNUM |.ffunc math_ldexp | cmplwi NARGS8:RC, 16 - | lwz CARG3, 0(BASE) + | lwz TMP0, 0(BASE) + |.if FPU | lfd FARG1, 0(BASE) - | lwz CARG4, 8(BASE) + |.else + | lwz CARG1, 0(BASE) + | lwz CARG2, 4(BASE) + |.endif + | lwz TMP1, 8(BASE) |.if GPR64 | lwz CARG2, 12(BASE) - |.else + |.elif FPU | lwz CARG1, 12(BASE) + |.else + | lwz CARG3, 12(BASE) |.endif | blt ->fff_fallback - | checknum CARG3; bge ->fff_fallback - | checknum CARG4; bne ->fff_fallback + | checknum TMP0; bge ->fff_fallback + | checknum TMP1; bne ->fff_fallback |.else |.ffunc_nn math_ldexp |.if GPR64 @@ -1927,8 +2180,10 @@ static void build_subroutines(BuildCtx *ctx) |.ffunc_n math_frexp |.if GPR64 | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) - |.else + |.elif FPU | la CARG1, DISPATCH_GL(tmptv)(DISPATCH) + |.else + | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) |.endif | lwz PC, FRAME_PC(BASE) | blex frexp @@ -1937,7 +2192,12 @@ static void build_subroutines(BuildCtx *ctx) |.if not DUALNUM | tonum_i FARG2, TMP1 |.endif + |.if FPU | stfd FARG1, 0(RA) + |.else + | stw CRET1, 0(RA) + | stw CRET2, 4(RA) + |.endif | li RD, (2+1)*8 |.if DUALNUM | stw TISNUM, 8(RA) @@ -1950,13 +2210,20 @@ static void build_subroutines(BuildCtx *ctx) |.ffunc_n math_modf |.if GPR64 | la CARG2, -8(BASE) - |.else + |.elif FPU | la CARG1, -8(BASE) + |.else + | la CARG3, -8(BASE) |.endif | lwz PC, FRAME_PC(BASE) | blex modf | la RA, -8(BASE) + |.if FPU | stfd FARG1, 0(BASE) + |.else + | stw CRET1, 0(BASE) + | stw CRET2, 4(BASE) + |.endif | li RD, (2+1)*8 | b ->fff_res | @@ -1964,13 +2231,13 @@ static void build_subroutines(BuildCtx *ctx) |.if DUALNUM | .ffunc_1 name | checknum CARG3 - | addi TMP1, BASE, 8 - | add TMP2, BASE, NARGS8:RC + | addi SAVE0, BASE, 8 + | add SAVE1, BASE, NARGS8:RC | bne >4 |1: // Handle integers. - | lwz CARG4, 0(TMP1) - | cmplw cr1, TMP1, TMP2 - | lwz CARG2, 4(TMP1) + | lwz CARG4, 0(SAVE0) + | cmplw cr1, SAVE0, SAVE1 + | lwz CARG2, 4(SAVE0) | bge cr1, ->fff_resi | checknum CARG4 | xoris TMP0, CARG1, 0x8000 @@ -1987,36 +2254,76 @@ static void build_subroutines(BuildCtx *ctx) |.if GPR64 | rldicl CARG1, CARG1, 0, 32 |.endif - | addi TMP1, TMP1, 8 + | addi SAVE0, SAVE0, 8 | b <1 |3: | bge ->fff_fallback | // Convert intermediate result to number and continue below. + |.if FPU | tonum_i FARG1, CARG1 - | lfd FARG2, 0(TMP1) + | lfd FARG2, 0(SAVE0) + |.else + | mr CARG2, CARG1 + | bl ->vm_sfi2d_1 + | lwz CARG3, 0(SAVE0) + | lwz CARG4, 4(SAVE0) + |.endif | b >6 |4: + |.if FPU | lfd FARG1, 0(BASE) + |.else + | lwz CARG1, 0(BASE) + | lwz CARG2, 4(BASE) + |.endif | bge ->fff_fallback |5: // Handle numbers. - | lwz CARG4, 0(TMP1) - | cmplw cr1, TMP1, TMP2 - | lfd FARG2, 0(TMP1) + | lwz CARG3, 0(SAVE0) + | cmplw cr1, SAVE0, SAVE1 + |.if FPU + | lfd FARG2, 0(SAVE0) + |.else + | lwz CARG4, 4(SAVE0) + |.endif | bge cr1, ->fff_resn - | checknum CARG4; bge >7 + | checknum CARG3; bge >7 |6: - | fsub f0, FARG1, FARG2 - | addi TMP1, TMP1, 8 + | addi SAVE0, SAVE0, 8 + |.if FPU |.if ismax + | fsub f0, FARG1, FARG2 + |.else + | fsub f0, FARG2, FARG1 + |.endif | fsel FARG1, f0, FARG1, FARG2 |.else - | fsel FARG1, f0, FARG2, FARG1 + | stw CARG1, SFSAVE_1 + | stw CARG2, SFSAVE_2 + | stw CARG3, SFSAVE_3 + | stw CARG4, SFSAVE_4 + | blex __ledf2 + | cmpwi CRET1, 0 + |.if ismax + | blt >8 + |.else + | bge >8 + |.endif + | lwz CARG1, SFSAVE_1 + | lwz CARG2, SFSAVE_2 + | b <5 + |8: + | lwz CARG1, SFSAVE_3 + | lwz CARG2, SFSAVE_4 |.endif | b <5 |7: // Convert integer to number and continue above. - | lwz CARG2, 4(TMP1) + | lwz CARG3, 4(SAVE0) | bne ->fff_fallback - | tonum_i FARG2, CARG2 + |.if FPU + | tonum_i FARG2, CARG3 + |.else + | bl ->vm_sfi2d_2 + |.endif | b <6 |.else | .ffunc_n name @@ -2028,13 +2335,13 @@ static void build_subroutines(BuildCtx *ctx) | checknum CARG2 | bge cr1, ->fff_resn | bge ->fff_fallback - | fsub f0, FARG1, FARG2 - | addi TMP1, TMP1, 8 |.if ismax - | fsel FARG1, f0, FARG1, FARG2 + | fsub f0, FARG1, FARG2 |.else - | fsel FARG1, f0, FARG2, FARG1 + | fsub f0, FARG2, FARG1 |.endif + | addi TMP1, TMP1, 8 + | fsel FARG1, f0, FARG1, FARG2 | b <1 |.endif |.endmacro @@ -2044,11 +2351,6 @@ static void build_subroutines(BuildCtx *ctx) | |//-- String library ----------------------------------------------------- | - |.ffunc_1 string_len - | checkstr CARG3; bne ->fff_fallback - | lwz CRET1, STR:CARG1->len - | b ->fff_resi - | |.ffunc string_byte // Only handle the 1-arg case here. | cmplwi NARGS8:RC, 8 | lwz CARG3, 0(BASE) @@ -2103,6 +2405,7 @@ static void build_subroutines(BuildCtx *ctx) | stp BASE, L->base | stw PC, SAVE_PC | bl extern lj_str_new // (lua_State *L, char *str, size_t l) + |->fff_resstr: | // Returns GCstr *. | lp BASE, L->base | li CARG3, LJ_TSTR @@ -2180,114 +2483,29 @@ static void build_subroutines(BuildCtx *ctx) | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) | b <3 | - |.ffunc string_rep // Only handle the 1-char case inline. - | ffgccheck - | cmplwi NARGS8:RC, 16 - | lwz TMP0, 0(BASE) - | lwz STR:CARG1, 4(BASE) - | lwz CARG4, 8(BASE) - |.if DUALNUM - | lwz CARG3, 12(BASE) - |.else - | lfd FARG2, 8(BASE) - |.endif - | bne ->fff_fallback // Exactly 2 arguments. - | checkstr TMP0; bne ->fff_fallback - |.if DUALNUM - | checknum CARG4; bne ->fff_fallback - |.else - | checknum CARG4; bge ->fff_fallback - | toint CARG3, FARG2 - |.endif - | lwz TMP0, STR:CARG1->len - | cmpwi CARG3, 0 - | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) - | ble >2 // Count <= 0? (or non-int) - | cmplwi TMP0, 1 - | subi TMP2, CARG3, 1 - | blt >2 // Zero length string? - | cmplw cr1, TMP1, CARG3 - | bne ->fff_fallback // Fallback for > 1-char strings. - | lbz TMP0, STR:CARG1[1] - | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) - | blt cr1, ->fff_fallback - |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). - | cmplwi TMP2, 0 - | stbx TMP0, CARG2, TMP2 - | subi TMP2, TMP2, 1 - | bne <1 - | b ->fff_newstr - |2: // Return empty string. - | la STR:CARG1, DISPATCH_GL(strempty)(DISPATCH) - | li CARG3, LJ_TSTR - | b ->fff_restv - | - |.ffunc string_reverse + |.macro ffstring_op, name + | .ffunc string_ .. name | ffgccheck | cmplwi NARGS8:RC, 8 | lwz CARG3, 0(BASE) - | lwz STR:CARG1, 4(BASE) + | lwz STR:CARG2, 4(BASE) | blt ->fff_fallback | checkstr CARG3 - | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) + | la SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH) | bne ->fff_fallback - | lwz CARG3, STR:CARG1->len - | la CARG1, #STR(STR:CARG1) - | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) - | li TMP2, 0 - | cmplw TMP1, CARG3 - | subi TMP3, CARG3, 1 - | blt ->fff_fallback - |1: // Reverse string copy. - | cmpwi TMP3, 0 - | lbzx TMP1, CARG1, TMP2 - | blty ->fff_newstr - | stbx TMP1, CARG2, TMP3 - | subi TMP3, TMP3, 1 - | addi TMP2, TMP2, 1 - | b <1 - | - |.macro ffstring_case, name, lo - | .ffunc name - | ffgccheck - | cmplwi NARGS8:RC, 8 - | lwz CARG3, 0(BASE) - | lwz STR:CARG1, 4(BASE) - | blt ->fff_fallback - | checkstr CARG3 - | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) - | bne ->fff_fallback - | lwz CARG3, STR:CARG1->len - | la CARG1, #STR(STR:CARG1) - | lp CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) - | cmplw TMP1, CARG3 - | li TMP2, 0 - | blt ->fff_fallback - |1: // ASCII case conversion. - | cmplw TMP2, CARG3 - | lbzx TMP1, CARG1, TMP2 - | bgey ->fff_newstr - | subi TMP0, TMP1, lo - | xori TMP3, TMP1, 0x20 - | addic TMP0, TMP0, -26 - | subfe TMP3, TMP3, TMP3 - | rlwinm TMP3, TMP3, 0, 26, 26 // x &= 0x20. - | xor TMP1, TMP1, TMP3 - | stbx TMP1, CARG2, TMP2 - | addi TMP2, TMP2, 1 - | b <1 + | lwz TMP0, SBUF:CARG1->b + | stw L, SBUF:CARG1->L + | stp BASE, L->base + | stw PC, SAVE_PC + | stw TMP0, SBUF:CARG1->w + | bl extern lj_buf_putstr_ .. name + | bl extern lj_buf_tostr + | b ->fff_resstr |.endmacro | - |ffstring_case string_lower, 65 - |ffstring_case string_upper, 97 - | - |//-- Table library ------------------------------------------------------ - | - |.ffunc_1 table_getn - | checktab CARG3; bne ->fff_fallback - | bl extern lj_tab_len // (GCtab *t) - | // Returns uint32_t (but less than 2^31). - | b ->fff_resi + |ffstring_op reverse + |ffstring_op lower + |ffstring_op upper | |//-- Bit library -------------------------------------------------------- | @@ -2305,28 +2523,37 @@ static void build_subroutines(BuildCtx *ctx) | |.macro .ffunc_bit_op, name, ins | .ffunc_bit name - | addi TMP1, BASE, 8 - | add TMP2, BASE, NARGS8:RC + | addi SAVE0, BASE, 8 + | add SAVE1, BASE, NARGS8:RC |1: - | lwz CARG4, 0(TMP1) - | cmplw cr1, TMP1, TMP2 + | lwz CARG4, 0(SAVE0) + | cmplw cr1, SAVE0, SAVE1 |.if DUALNUM - | lwz CARG2, 4(TMP1) + | lwz CARG2, 4(SAVE0) |.else - | lfd FARG1, 0(TMP1) + | lfd FARG1, 0(SAVE0) |.endif | bgey cr1, ->fff_resi | checknum CARG4 |.if DUALNUM + |.if FPU | bnel ->fff_bitop_fb |.else + | beq >3 + | stw CARG1, SFSAVE_1 + | bl ->fff_bitop_fb + | mr CARG2, CARG1 + | lwz CARG1, SFSAVE_1 + |3: + |.endif + |.else | fadd FARG1, FARG1, TOBIT | bge ->fff_fallback | stfd FARG1, TMPD | lwz CARG2, TMPD_LO |.endif | ins CARG1, CARG1, CARG2 - | addi TMP1, TMP1, 8 + | addi SAVE0, SAVE0, 8 | b <1 |.endmacro | @@ -2348,7 +2575,14 @@ static void build_subroutines(BuildCtx *ctx) |.macro .ffunc_bit_sh, name, ins, shmod |.if DUALNUM | .ffunc_2 bit_..name + |.if FPU | checknum CARG3; bnel ->fff_tobit_fb + |.else + | checknum CARG3; beq >1 + | bl ->fff_tobit_fb + | lwz CARG2, 12(BASE) // Conversion polluted CARG2. + |1: + |.endif | // Note: no inline conversion from number for 2nd argument! | checknum CARG4; bne ->fff_fallback |.else @@ -2385,27 +2619,77 @@ static void build_subroutines(BuildCtx *ctx) |->fff_resn: | lwz PC, FRAME_PC(BASE) | la RA, -8(BASE) + |.if FPU | stfd FARG1, -8(BASE) + |.else + | stw CARG1, -8(BASE) + | stw CARG2, -4(BASE) + |.endif | b ->fff_res1 | |// Fallback FP number to bit conversion. |->fff_tobit_fb: |.if DUALNUM + |.if FPU | lfd FARG1, 0(BASE) | bgt ->fff_fallback | fadd FARG1, FARG1, TOBIT | stfd FARG1, TMPD | lwz CARG1, TMPD_LO | blr + |.else + | bgt ->fff_fallback + | mr CARG2, CARG1 + | mr CARG1, CARG3 + |// Modifies: CARG1, CARG2, TMP0, TMP1, TMP2. + |->vm_tobit: + | slwi TMP2, CARG1, 1 + | addis TMP2, TMP2, 0x0020 + | cmpwi TMP2, 0 + | bge >2 + | li TMP1, 0x3e0 + | srawi TMP2, TMP2, 21 + | not TMP1, TMP1 + | sub. TMP2, TMP1, TMP2 + | cmpwi cr7, CARG1, 0 + | blt >1 + | slwi TMP1, CARG1, 11 + | srwi TMP0, CARG2, 21 + | oris TMP1, TMP1, 0x8000 + | or TMP1, TMP1, TMP0 + | srw CARG1, TMP1, TMP2 + | bclr 4, 28 // Return if cr7[lt] == 0, no hint. + | neg CARG1, CARG1 + | blr + |1: + | addi TMP2, TMP2, 21 + | srw TMP1, CARG2, TMP2 + | slwi CARG2, CARG1, 12 + | subfic TMP2, TMP2, 20 + | slw TMP0, CARG2, TMP2 + | or CARG1, TMP1, TMP0 + | bclr 4, 28 // Return if cr7[lt] == 0, no hint. + | neg CARG1, CARG1 + | blr + |2: + | li CARG1, 0 + | blr + |.endif |.endif |->fff_bitop_fb: |.if DUALNUM - | lfd FARG1, 0(TMP1) + |.if FPU + | lfd FARG1, 0(SAVE0) | bgt ->fff_fallback | fadd FARG1, FARG1, TOBIT | stfd FARG1, TMPD | lwz CARG2, TMPD_LO | blr + |.else + | bgt ->fff_fallback + | mr CARG1, CARG4 + | b ->vm_tobit + |.endif |.endif | |//----------------------------------------------------------------------- @@ -2589,15 +2873,88 @@ static void build_subroutines(BuildCtx *ctx) | mtctr CRET1 | bctr | + |->cont_stitch: // Trace stitching. + |.if JIT + | // RA = resultptr, RB = meta base + | lwz INS, -4(PC) + | lwz TRACE:TMP2, -20(RB) // Save previous trace. + | addic. TMP1, MULTRES, -8 + | decode_RA8 RC, INS // Call base. + | beq >2 + |1: // Move results down. + |.if FPU + | lfd f0, 0(RA) + |.else + | lwz CARG1, 0(RA) + | lwz CARG2, 4(RA) + |.endif + | addic. TMP1, TMP1, -8 + | addi RA, RA, 8 + |.if FPU + | stfdx f0, BASE, RC + |.else + | add CARG3, BASE, RC + | stw CARG1, 0(CARG3) + | stw CARG2, 4(CARG3) + |.endif + | addi RC, RC, 8 + | bne <1 + |2: + | decode_RA8 RA, INS + | decode_RB8 RB, INS + | add RA, RA, RB + |3: + | cmplw RA, RC + | bgt >9 // More results wanted? + | + | lhz TMP3, TRACE:TMP2->traceno + | lhz RD, TRACE:TMP2->link + | cmpw RD, TMP3 + | cmpwi cr1, RD, 0 + | beq ->cont_nop // Blacklisted. + | slwi RD, RD, 3 + | bne cr1, =>BC_JLOOP // Jump to stitched trace. + | + | // Stitch a new trace to the previous trace. + | stw TMP3, DISPATCH_J(exitno)(DISPATCH) + | stp L, DISPATCH_J(L)(DISPATCH) + | stp BASE, L->base + | addi CARG1, DISPATCH, GG_DISP2J + | mr CARG2, PC + | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) + | lp BASE, L->base + | b ->cont_nop + | + |9: + | stwx TISNIL, BASE, RC + | addi RC, RC, 8 + | b <3 + |.endif + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | mr CARG1, L + | stw MULTRES, SAVE_MULTRES + | mr CARG2, PC + | stp BASE, L->base + | bl extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | lp BASE, L->base + | subi PC, PC, 4 + | b ->cont_nop +#endif + | |//----------------------------------------------------------------------- |//-- Trace exit handler ------------------------------------------------- |//----------------------------------------------------------------------- | |.macro savex_, a, b, c, d + |.if FPU | stfd f..a, 16+a*8(sp) | stfd f..b, 16+b*8(sp) | stfd f..c, 16+c*8(sp) | stfd f..d, 16+d*8(sp) + |.endif |.endmacro | |->vm_exit_handler: @@ -2623,16 +2980,16 @@ static void build_subroutines(BuildCtx *ctx) | savex_ 20,21,22,23 | lhz CARG4, 2(CARG3) // Load trace number. | savex_ 24,25,26,27 - | lwz L, DISPATCH_GL(jit_L)(DISPATCH) + | lwz L, DISPATCH_GL(cur_L)(DISPATCH) | savex_ 28,29,30,31 | sub CARG3, TMP0, CARG3 // Compute exit number. | lp BASE, DISPATCH_GL(jit_base)(DISPATCH) | srwi CARG3, CARG3, 2 - | stw L, DISPATCH_J(L)(DISPATCH) + | stp L, DISPATCH_J(L)(DISPATCH) | subi CARG3, CARG3, 2 - | stw TMP1, DISPATCH_GL(jit_L)(DISPATCH) - | stw CARG4, DISPATCH_J(parent)(DISPATCH) | stp BASE, L->base + | stw CARG4, DISPATCH_J(parent)(DISPATCH) + | stw TMP1, DISPATCH_GL(jit_base)(DISPATCH) | addi CARG1, DISPATCH, GG_DISP2J | stw CARG3, DISPATCH_J(exitno)(DISPATCH) | addi CARG2, sp, 16 @@ -2656,28 +3013,29 @@ static void build_subroutines(BuildCtx *ctx) | // CARG1 = MULTRES or negated error code, BASE, PC and JGL set. | lwz L, SAVE_L | addi DISPATCH, JGL, -GG_DISP2G-32768 + | stp BASE, L->base |1: | cmpwi CARG1, 0 - | blt >3 // Check for error from exit. - | lwz LFUNC:TMP1, FRAME_FUNC(BASE) + | blt >9 // Check for error from exit. + | lwz LFUNC:RB, FRAME_FUNC(BASE) | slwi MULTRES, CARG1, 3 | li TMP2, 0 | stw MULTRES, SAVE_MULTRES - | lwz TMP1, LFUNC:TMP1->pc - | stw TMP2, DISPATCH_GL(jit_L)(DISPATCH) + | lwz TMP1, LFUNC:RB->pc + | stw TMP2, DISPATCH_GL(jit_base)(DISPATCH) | lwz KBASE, PC2PROTO(k)(TMP1) | // Setup type comparison constants. | li TISNUM, LJ_TISNUM - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). - | stw TMP3, TMPD + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | .FPU stw TMP3, TMPD | li ZERO, 0 - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | lfs TOBIT, TMPD - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). + | .FPU lfs TOBIT, TMPD + | .FPU stw TMP3, TMPD + | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) | li TISNIL, LJ_TNIL - | stw TMP0, TONUM_HI - | lfs TONUM, TMPD + | .FPU stw TMP0, TONUM_HI + | .FPU lfs TONUM, TMPD | // Modified copy of ins_next which handles function header dispatch, too. | lwz INS, 0(PC) | addi PC, PC, 4 @@ -2694,20 +3052,63 @@ static void build_subroutines(BuildCtx *ctx) | decode_RC8 RC, INS | bctr |2: + | cmplwi TMP1, (BC_FUNCC+2)*4 // Fast function? + | blt >3 + | // Check frame below fast function. + | lwz TMP1, FRAME_PC(BASE) + | andix. TMP0, TMP1, FRAME_TYPE + | bney >3 // Trace stitching continuation? + | // Otherwise set KBASE for Lua function below fast function. + | lwz TMP2, -4(TMP1) + | decode_RA8 TMP0, TMP2 + | sub TMP1, BASE, TMP0 + | lwz LFUNC:TMP2, -12(TMP1) + | lwz TMP1, LFUNC:TMP2->pc + | lwz KBASE, PC2PROTO(k)(TMP1) + |3: | subi RC, MULTRES, 8 | add RA, RA, BASE | bctr | - |3: // Rethrow error from the right C frame. + |9: // Rethrow error from the right C frame. + | neg CARG2, CARG1 | mr CARG1, L - | bl extern lj_err_run // (lua_State *L) + | bl extern lj_err_trace // (lua_State *L, int errcode) |.endif | |//----------------------------------------------------------------------- |//-- Math helper functions ---------------------------------------------- |//----------------------------------------------------------------------- | - |// NYI: Use internal implementations of floor, ceil, trunc. + |// NYI: Use internal implementations of floor, ceil, trunc, sfcmp. + | + |.macro sfi2d, AHI, ALO + |.if not FPU + | mr. AHI, ALO + | bclr 12, 2 // Handle zero first. + | srawi TMP0, ALO, 31 + | xor TMP1, ALO, TMP0 + | sub TMP1, TMP1, TMP0 // Absolute value in TMP1. + | cntlzw AHI, TMP1 + | andix. TMP0, TMP0, 0x800 // Mask sign bit. + | slw TMP1, TMP1, AHI // Align mantissa left with leading 1. + | subfic AHI, AHI, 0x3ff+31-1 // Exponent -1 in AHI. + | slwi ALO, TMP1, 21 + | or AHI, AHI, TMP0 // Sign | Exponent. + | srwi TMP1, TMP1, 11 + | slwi AHI, AHI, 20 // Align left. + | add AHI, AHI, TMP1 // Add mantissa, increment exponent. + | blr + |.endif + |.endmacro + | + |// Input: CARG2. Output: CARG1, CARG2. Temporaries: TMP0, TMP1. + |->vm_sfi2d_1: + | sfi2d CARG1, CARG2 + | + |// Input: CARG4. Output: CARG3, CARG4. Temporaries: TMP0, TMP1. + |->vm_sfi2d_2: + | sfi2d CARG3, CARG4 | |->vm_modi: | divwo. TMP0, CARG1, CARG2 @@ -2762,6 +3163,11 @@ static void build_subroutines(BuildCtx *ctx) | blr |.endif | + |->vm_next: + |.if JIT + | NYI // On big-endian. + |.endif + | |//----------------------------------------------------------------------- |//-- FFI helper functions ----------------------------------------------- |//----------------------------------------------------------------------- @@ -2775,21 +3181,21 @@ static void build_subroutines(BuildCtx *ctx) | addi DISPATCH, r12, GG_G2DISP | stw r11, CTSTATE->cb.slot | stw r3, CTSTATE->cb.gpr[0] - | stfd f1, CTSTATE->cb.fpr[0] + | .FPU stfd f1, CTSTATE->cb.fpr[0] | stw r4, CTSTATE->cb.gpr[1] - | stfd f2, CTSTATE->cb.fpr[1] + | .FPU stfd f2, CTSTATE->cb.fpr[1] | stw r5, CTSTATE->cb.gpr[2] - | stfd f3, CTSTATE->cb.fpr[2] + | .FPU stfd f3, CTSTATE->cb.fpr[2] | stw r6, CTSTATE->cb.gpr[3] - | stfd f4, CTSTATE->cb.fpr[3] + | .FPU stfd f4, CTSTATE->cb.fpr[3] | stw r7, CTSTATE->cb.gpr[4] - | stfd f5, CTSTATE->cb.fpr[4] + | .FPU stfd f5, CTSTATE->cb.fpr[4] | stw r8, CTSTATE->cb.gpr[5] - | stfd f6, CTSTATE->cb.fpr[5] + | .FPU stfd f6, CTSTATE->cb.fpr[5] | stw r9, CTSTATE->cb.gpr[6] - | stfd f7, CTSTATE->cb.fpr[6] + | .FPU stfd f7, CTSTATE->cb.fpr[6] | stw r10, CTSTATE->cb.gpr[7] - | stfd f8, CTSTATE->cb.fpr[7] + | .FPU stfd f8, CTSTATE->cb.fpr[7] | addi TMP0, sp, CFRAME_SPACE+8 | stw TMP0, CTSTATE->cb.stack | mr CARG1, CTSTATE @@ -2800,21 +3206,21 @@ static void build_subroutines(BuildCtx *ctx) | lp BASE, L:CRET1->base | li TISNUM, LJ_TISNUM // Setup type comparison constants. | lp RC, L:CRET1->top - | lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). + | .FPU lus TMP3, 0x59c0 // TOBIT = 2^52 + 2^51 (float). | li ZERO, 0 | mr L, CRET1 - | stw TMP3, TMPD - | lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) + | .FPU stw TMP3, TMPD + | .FPU lus TMP0, 0x4338 // Hiword of 2^52 + 2^51 (double) | lwz LFUNC:RB, FRAME_FUNC(BASE) - | ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). - | stw TMP0, TONUM_HI + | .FPU ori TMP3, TMP3, 0x0004 // TONUM = 2^52 + 2^51 + 2^31 (float). + | .FPU stw TMP0, TONUM_HI | li TISNIL, LJ_TNIL | li_vmstate INTERP - | lfs TOBIT, TMPD - | stw TMP3, TMPD + | .FPU lfs TOBIT, TMPD + | .FPU stw TMP3, TMPD | sub RC, RC, BASE | st_vmstate - | lfs TONUM, TMPD + | .FPU lfs TONUM, TMPD | ins_callt |.endif | @@ -2828,7 +3234,7 @@ static void build_subroutines(BuildCtx *ctx) | mr CARG2, RA | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) | lwz CRET1, CTSTATE->cb.gpr[0] - | lfd FARG1, CTSTATE->cb.fpr[0] + | .FPU lfd FARG1, CTSTATE->cb.fpr[0] | lwz CRET2, CTSTATE->cb.gpr[1] | b ->vm_leave_unw |.endif @@ -2862,14 +3268,14 @@ static void build_subroutines(BuildCtx *ctx) | bge <1 |2: | bney cr1, >3 - | lfd f1, CCSTATE->fpr[0] - | lfd f2, CCSTATE->fpr[1] - | lfd f3, CCSTATE->fpr[2] - | lfd f4, CCSTATE->fpr[3] - | lfd f5, CCSTATE->fpr[4] - | lfd f6, CCSTATE->fpr[5] - | lfd f7, CCSTATE->fpr[6] - | lfd f8, CCSTATE->fpr[7] + | .FPU lfd f1, CCSTATE->fpr[0] + | .FPU lfd f2, CCSTATE->fpr[1] + | .FPU lfd f3, CCSTATE->fpr[2] + | .FPU lfd f4, CCSTATE->fpr[3] + | .FPU lfd f5, CCSTATE->fpr[4] + | .FPU lfd f6, CCSTATE->fpr[5] + | .FPU lfd f7, CCSTATE->fpr[6] + | .FPU lfd f8, CCSTATE->fpr[7] |3: | lp TMP0, CCSTATE->func | lwz CARG2, CCSTATE->gpr[1] @@ -2886,7 +3292,7 @@ static void build_subroutines(BuildCtx *ctx) | lwz TMP2, -4(r14) | lwz TMP0, 4(r14) | stw CARG1, CCSTATE:TMP1->gpr[0] - | stfd FARG1, CCSTATE:TMP1->fpr[0] + | .FPU stfd FARG1, CCSTATE:TMP1->fpr[0] | stw CARG2, CCSTATE:TMP1->gpr[1] | mtlr TMP0 | stw CARG3, CCSTATE:TMP1->gpr[2] @@ -2915,19 +3321,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | // RA = src1*8, RD = src2*8, JMP with RD = target |.if DUALNUM - | lwzux TMP0, RA, BASE + | lwzux CARG1, RA, BASE | addi PC, PC, 4 | lwz CARG2, 4(RA) - | lwzux TMP1, RD, BASE + | lwzux CARG3, RD, BASE | lwz TMP2, -4(PC) - | checknum cr0, TMP0 - | lwz CARG3, 4(RD) + | checknum cr0, CARG1 + | lwz CARG4, 4(RD) | decode_RD4 TMP2, TMP2 - | checknum cr1, TMP1 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) + | checknum cr1, CARG3 + | addis SAVE0, TMP2, -(BCBIAS_J*4 >> 16) | bne cr0, >7 | bne cr1, >8 - | cmpw CARG2, CARG3 + | cmpw CARG2, CARG4 if (op == BC_ISLT) { | bge >2 } else if (op == BC_ISGE) { @@ -2938,28 +3344,41 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ble >2 } |1: - | add PC, PC, TMP2 + | add PC, PC, SAVE0 |2: | ins_next | |7: // RA is not an integer. | bgt cr0, ->vmeta_comp | // RA is a number. - | lfd f0, 0(RA) + | .FPU lfd f0, 0(RA) | bgt cr1, ->vmeta_comp | blt cr1, >4 | // RA is a number, RD is an integer. - | tonum_i f1, CARG3 + |.if FPU + | tonum_i f1, CARG4 + |.else + | bl ->vm_sfi2d_2 + |.endif | b >5 | |8: // RA is an integer, RD is not an integer. | bgt cr1, ->vmeta_comp | // RA is an integer, RD is a number. + |.if FPU | tonum_i f0, CARG2 + |.else + | bl ->vm_sfi2d_1 + |.endif |4: - | lfd f1, 0(RD) + | .FPU lfd f1, 0(RD) |5: + |.if FPU | fcmpu cr0, f0, f1 + |.else + | blex __ledf2 + | cmpwi CRET1, 0 + |.endif if (op == BC_ISLT) { | bge <2 } else if (op == BC_ISGE) { @@ -3007,42 +3426,42 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) vk = op == BC_ISEQV; | // RA = src1*8, RD = src2*8, JMP with RD = target |.if DUALNUM - | lwzux TMP0, RA, BASE + | lwzux CARG1, RA, BASE | addi PC, PC, 4 | lwz CARG2, 4(RA) - | lwzux TMP1, RD, BASE - | checknum cr0, TMP0 - | lwz TMP2, -4(PC) - | checknum cr1, TMP1 - | decode_RD4 TMP2, TMP2 - | lwz CARG3, 4(RD) + | lwzux CARG3, RD, BASE + | checknum cr0, CARG1 + | lwz SAVE0, -4(PC) + | checknum cr1, CARG3 + | decode_RD4 SAVE0, SAVE0 + | lwz CARG4, 4(RD) | cror 4*cr7+gt, 4*cr0+gt, 4*cr1+gt - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) + | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) if (vk) { | ble cr7, ->BC_ISEQN_Z } else { | ble cr7, ->BC_ISNEN_Z } |.else - | lwzux TMP0, RA, BASE - | lwz TMP2, 0(PC) + | lwzux CARG1, RA, BASE + | lwz SAVE0, 0(PC) | lfd f0, 0(RA) | addi PC, PC, 4 - | lwzux TMP1, RD, BASE - | checknum cr0, TMP0 - | decode_RD4 TMP2, TMP2 + | lwzux CARG3, RD, BASE + | checknum cr0, CARG1 + | decode_RD4 SAVE0, SAVE0 | lfd f1, 0(RD) - | checknum cr1, TMP1 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) + | checknum cr1, CARG3 + | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) | bge cr0, >5 | bge cr1, >5 | fcmpu cr0, f0, f1 if (vk) { | bne >1 - | add PC, PC, TMP2 + | add PC, PC, SAVE0 } else { | beq >1 - | add PC, PC, TMP2 + | add PC, PC, SAVE0 } |1: | ins_next @@ -3050,36 +3469,36 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |5: // Either or both types are not numbers. |.if not DUALNUM | lwz CARG2, 4(RA) - | lwz CARG3, 4(RD) + | lwz CARG4, 4(RD) |.endif |.if FFI - | cmpwi cr7, TMP0, LJ_TCDATA - | cmpwi cr5, TMP1, LJ_TCDATA + | cmpwi cr7, CARG1, LJ_TCDATA + | cmpwi cr5, CARG3, LJ_TCDATA |.endif - | not TMP3, TMP0 - | cmplw TMP0, TMP1 - | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? + | not TMP2, CARG1 + | cmplw CARG1, CARG3 + | cmplwi cr1, TMP2, ~LJ_TISPRI // Primitive? |.if FFI | cror 4*cr7+eq, 4*cr7+eq, 4*cr5+eq |.endif - | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? + | cmplwi cr6, TMP2, ~LJ_TISTABUD // Table or userdata? |.if FFI | beq cr7, ->vmeta_equal_cd |.endif - | cmplw cr5, CARG2, CARG3 + | cmplw cr5, CARG2, CARG4 | crandc 4*cr0+gt, 4*cr0+eq, 4*cr1+gt // 2: Same type and primitive. | crorc 4*cr0+lt, 4*cr5+eq, 4*cr0+eq // 1: Same tv or different type. | crand 4*cr0+eq, 4*cr0+eq, 4*cr5+eq // 0: Same type and same tv. - | mr SAVE0, PC + | mr SAVE1, PC | cror 4*cr0+eq, 4*cr0+eq, 4*cr0+gt // 0 or 2. | cror 4*cr0+lt, 4*cr0+lt, 4*cr0+gt // 1 or 2. if (vk) { | bne cr0, >6 - | add PC, PC, TMP2 + | add PC, PC, SAVE0 |6: } else { | beq cr0, >6 - | add PC, PC, TMP2 + | add PC, PC, SAVE0 |6: } |.if DUALNUM @@ -3094,6 +3513,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | | // Different tables or userdatas. Need to check __eq metamethod. | // Field metatable must be at same offset for GCtab and GCudata! + | mr CARG3, CARG4 | lwz TAB:TMP2, TAB:CARG2->metatable | li CARG4, 1-vk // ne = 0 or 1. | cmplwi TAB:TMP2, 0 @@ -3101,7 +3521,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lbz TMP2, TAB:TMP2->nomm | andix. TMP2, TMP2, 1<<MM_eq | bne <1 // Or 'no __eq' flag set? - | mr PC, SAVE0 // Restore old PC. + | mr PC, SAVE1 // Restore old PC. | b ->vmeta_equal // Handle __eq metamethod. break; @@ -3142,16 +3562,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) vk = op == BC_ISEQN; | // RA = src*8, RD = num_const*8, JMP with RD = target |.if DUALNUM - | lwzux TMP0, RA, BASE + | lwzux CARG1, RA, BASE | addi PC, PC, 4 | lwz CARG2, 4(RA) - | lwzux TMP1, RD, KBASE - | checknum cr0, TMP0 - | lwz TMP2, -4(PC) - | checknum cr1, TMP1 - | decode_RD4 TMP2, TMP2 - | lwz CARG3, 4(RD) - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) + | lwzux CARG3, RD, KBASE + | checknum cr0, CARG1 + | lwz SAVE0, -4(PC) + | checknum cr1, CARG3 + | decode_RD4 SAVE0, SAVE0 + | lwz CARG4, 4(RD) + | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) if (vk) { |->BC_ISEQN_Z: } else { @@ -3159,7 +3579,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } | bne cr0, >7 | bne cr1, >8 - | cmpw CARG2, CARG3 + | cmpw CARG2, CARG4 |4: |.else if (vk) { @@ -3167,20 +3587,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } else { |->BC_ISNEN_Z: // Dummy label. } - | lwzx TMP0, BASE, RA + | lwzx CARG1, BASE, RA | addi PC, PC, 4 | lfdx f0, BASE, RA - | lwz TMP2, -4(PC) + | lwz SAVE0, -4(PC) | lfdx f1, KBASE, RD - | decode_RD4 TMP2, TMP2 - | checknum TMP0 - | addis TMP2, TMP2, -(BCBIAS_J*4 >> 16) + | decode_RD4 SAVE0, SAVE0 + | checknum CARG1 + | addis SAVE0, SAVE0, -(BCBIAS_J*4 >> 16) | bge >3 | fcmpu cr0, f0, f1 |.endif if (vk) { | bne >1 - | add PC, PC, TMP2 + | add PC, PC, SAVE0 |1: |.if not FFI |3: @@ -3191,13 +3611,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.if not FFI |3: |.endif - | add PC, PC, TMP2 + | add PC, PC, SAVE0 |2: } | ins_next |.if FFI |3: - | cmpwi TMP0, LJ_TCDATA + | cmpwi CARG1, LJ_TCDATA | beq ->vmeta_equal_cd | b <1 |.endif @@ -3205,18 +3625,31 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |7: // RA is not an integer. | bge cr0, <3 | // RA is a number. - | lfd f0, 0(RA) + | .FPU lfd f0, 0(RA) | blt cr1, >1 | // RA is a number, RD is an integer. - | tonum_i f1, CARG3 + |.if FPU + | tonum_i f1, CARG4 + |.else + | bl ->vm_sfi2d_2 + |.endif | b >2 | |8: // RA is an integer, RD is a number. + |.if FPU | tonum_i f0, CARG2 + |.else + | bl ->vm_sfi2d_1 + |.endif |1: - | lfd f1, 0(RD) + | .FPU lfd f1, 0(RD) |2: + |.if FPU | fcmpu cr0, f0, f1 + |.else + | blex __ledf2 + | cmpwi CRET1, 0 + |.endif | b <4 |.endif break; @@ -3271,7 +3704,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add PC, PC, TMP2 } else { | li TMP1, LJ_TFALSE + |.if FPU | lfdx f0, BASE, RD + |.else + | lwzux CARG1, RD, BASE + | lwz CARG2, 4(RD) + |.endif | cmplw TMP0, TMP1 if (op == BC_ISTC) { | bge >1 @@ -3280,20 +3718,55 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) } | addis PC, PC, -(BCBIAS_J*4 >> 16) | decode_RD4 TMP2, INS + |.if FPU | stfdx f0, BASE, RA + |.else + | stwux CARG1, RA, BASE + | stw CARG2, 4(RA) + |.endif | add PC, PC, TMP2 |1: } | ins_next break; + case BC_ISTYPE: + | // RA = src*8, RD = -type*8 + | lwzx TMP0, BASE, RA + | srwi TMP1, RD, 3 + | ins_next1 + |.if not PPE and not GPR64 + | add. TMP0, TMP0, TMP1 + |.else + | neg TMP1, TMP1 + | cmpw TMP0, TMP1 + |.endif + | bne ->vmeta_istype + | ins_next2 + break; + case BC_ISNUM: + | // RA = src*8, RD = -(TISNUM-1)*8 + | lwzx TMP0, BASE, RA + | ins_next1 + | checknum TMP0 + | bge ->vmeta_istype + | ins_next2 + break; + /* -- Unary ops --------------------------------------------------------- */ case BC_MOV: | // RA = dst*8, RD = src*8 | ins_next1 + |.if FPU | lfdx f0, BASE, RD | stfdx f0, BASE, RA + |.else + | lwzux TMP0, RD, BASE + | lwz TMP1, 4(RD) + | stwux TMP0, RA, BASE + | stw TMP1, 4(RA) + |.endif | ins_next2 break; case BC_NOT: @@ -3395,44 +3868,65 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); ||switch (vk) { ||case 0: - | lwzx TMP1, BASE, RB + | lwzx CARG1, BASE, RB | .if DUALNUM - | lwzx TMP2, KBASE, RC + | lwzx CARG3, KBASE, RC | .endif + | .if FPU | lfdx f14, BASE, RB | lfdx f15, KBASE, RC + | .else + | add TMP1, BASE, RB + | add TMP2, KBASE, RC + | lwz CARG2, 4(TMP1) + | lwz CARG4, 4(TMP2) + | .endif | .if DUALNUM - | checknum cr0, TMP1 - | checknum cr1, TMP2 + | checknum cr0, CARG1 + | checknum cr1, CARG3 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | bge ->vmeta_arith_vn | .else - | checknum TMP1; bge ->vmeta_arith_vn + | checknum CARG1; bge ->vmeta_arith_vn | .endif || break; ||case 1: - | lwzx TMP1, BASE, RB + | lwzx CARG1, BASE, RB | .if DUALNUM - | lwzx TMP2, KBASE, RC + | lwzx CARG3, KBASE, RC | .endif + | .if FPU | lfdx f15, BASE, RB | lfdx f14, KBASE, RC + | .else + | add TMP1, BASE, RB + | add TMP2, KBASE, RC + | lwz CARG2, 4(TMP1) + | lwz CARG4, 4(TMP2) + | .endif | .if DUALNUM - | checknum cr0, TMP1 - | checknum cr1, TMP2 + | checknum cr0, CARG1 + | checknum cr1, CARG3 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | bge ->vmeta_arith_nv | .else - | checknum TMP1; bge ->vmeta_arith_nv + | checknum CARG1; bge ->vmeta_arith_nv | .endif || break; ||default: - | lwzx TMP1, BASE, RB - | lwzx TMP2, BASE, RC + | lwzx CARG1, BASE, RB + | lwzx CARG3, BASE, RC + | .if FPU | lfdx f14, BASE, RB | lfdx f15, BASE, RC - | checknum cr0, TMP1 - | checknum cr1, TMP2 + | .else + | add TMP1, BASE, RB + | add TMP2, BASE, RC + | lwz CARG2, 4(TMP1) + | lwz CARG4, 4(TMP2) + | .endif + | checknum cr0, CARG1 + | checknum cr1, CARG3 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | bge ->vmeta_arith_vv || break; @@ -3466,48 +3960,78 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | fsub a, b, a // b - floor(b/c)*c |.endmacro | + |.macro sfpmod + |->BC_MODVN_Z: + | stw CARG1, SFSAVE_1 + | stw CARG2, SFSAVE_2 + | mr SAVE0, CARG3 + | mr SAVE1, CARG4 + | blex __divdf3 + | blex floor + | mr CARG3, SAVE0 + | mr CARG4, SAVE1 + | blex __muldf3 + | mr CARG3, CRET1 + | mr CARG4, CRET2 + | lwz CARG1, SFSAVE_1 + | lwz CARG2, SFSAVE_2 + | blex __subdf3 + |.endmacro + | |.macro ins_arithfp, fpins | ins_arithpre |.if "fpins" == "fpmod_" | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. - |.else + |.elif FPU | fpins f0, f14, f15 | ins_next1 | stfdx f0, BASE, RA | ins_next2 + |.else + | blex __divdf3 // Only soft-float div uses this macro. + | ins_next1 + | stwux CRET1, RA, BASE + | stw CRET2, 4(RA) + | ins_next2 |.endif |.endmacro | - |.macro ins_arithdn, intins, fpins + |.macro ins_arithdn, intins, fpins, fpcall | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); ||switch (vk) { ||case 0: - | lwzux TMP1, RB, BASE - | lwzux TMP2, RC, KBASE - | lwz CARG1, 4(RB) - | checknum cr0, TMP1 - | lwz CARG2, 4(RC) + | lwzux CARG1, RB, BASE + | lwzux CARG3, RC, KBASE + | lwz CARG2, 4(RB) + | checknum cr0, CARG1 + | lwz CARG4, 4(RC) + | checknum cr1, CARG3 || break; ||case 1: - | lwzux TMP1, RB, BASE - | lwzux TMP2, RC, KBASE - | lwz CARG2, 4(RB) - | checknum cr0, TMP1 - | lwz CARG1, 4(RC) + | lwzux CARG3, RB, BASE + | lwzux CARG1, RC, KBASE + | lwz CARG4, 4(RB) + | checknum cr0, CARG3 + | lwz CARG2, 4(RC) + | checknum cr1, CARG1 || break; ||default: - | lwzux TMP1, RB, BASE - | lwzux TMP2, RC, BASE - | lwz CARG1, 4(RB) - | checknum cr0, TMP1 - | lwz CARG2, 4(RC) + | lwzux CARG1, RB, BASE + | lwzux CARG3, RC, BASE + | lwz CARG2, 4(RB) + | checknum cr0, CARG1 + | lwz CARG4, 4(RC) + | checknum cr1, CARG3 || break; ||} - | checknum cr1, TMP2 | bne >5 | bne cr1, >5 - | intins CARG1, CARG1, CARG2 + |.if "intins" == "intmod" + | mr CARG1, CARG2 + | mr CARG2, CARG4 + |.endif + | intins CARG1, CARG2, CARG4 | bso >4 |1: | ins_next1 @@ -3519,29 +4043,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | checkov TMP0, <1 // Ignore unrelated overflow. | ins_arithfallback b |5: // FP variant. + |.if FPU ||if (vk == 1) { | lfd f15, 0(RB) - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | lfd f14, 0(RC) ||} else { | lfd f14, 0(RB) - | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | lfd f15, 0(RC) ||} + |.endif + | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | ins_arithfallback bge |.if "fpins" == "fpmod_" | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. |.else + |.if FPU | fpins f0, f14, f15 - | ins_next1 | stfdx f0, BASE, RA + |.else + |.if "fpcall" == "sfpmod" + | sfpmod + |.else + | blex fpcall + |.endif + | stwux CRET1, RA, BASE + | stw CRET2, 4(RA) + |.endif + | ins_next1 | b <2 |.endif |.endmacro | - |.macro ins_arith, intins, fpins + |.macro ins_arith, intins, fpins, fpcall |.if DUALNUM - | ins_arithdn intins, fpins + | ins_arithdn intins, fpins, fpcall |.else | ins_arithfp fpins |.endif @@ -3556,9 +4091,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | addo. TMP0, TMP0, TMP1 | add y, a, b |.endmacro - | ins_arith addo32., fadd + | ins_arith addo32., fadd, __adddf3 |.else - | ins_arith addo., fadd + | ins_arith addo., fadd, __adddf3 |.endif break; case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: @@ -3570,36 +4105,48 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | subo. TMP0, TMP0, TMP1 | sub y, a, b |.endmacro - | ins_arith subo32., fsub + | ins_arith subo32., fsub, __subdf3 |.else - | ins_arith subo., fsub + | ins_arith subo., fsub, __subdf3 |.endif break; case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arith mullwo., fmul + | ins_arith mullwo., fmul, __muldf3 break; case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | ins_arithfp fdiv break; case BC_MODVN: - | ins_arith intmod, fpmod + | ins_arith intmod, fpmod, sfpmod break; case BC_MODNV: case BC_MODVV: - | ins_arith intmod, fpmod_ + | ins_arith intmod, fpmod_, sfpmod break; case BC_POW: | // NYI: (partial) integer arithmetic. - | lwzx TMP1, BASE, RB + | lwzx CARG1, BASE, RB + | lwzx CARG3, BASE, RC + |.if FPU | lfdx FARG1, BASE, RB - | lwzx TMP2, BASE, RC | lfdx FARG2, BASE, RC - | checknum cr0, TMP1 - | checknum cr1, TMP2 + |.else + | add TMP1, BASE, RB + | add TMP2, BASE, RC + | lwz CARG2, 4(TMP1) + | lwz CARG4, 4(TMP2) + |.endif + | checknum cr0, CARG1 + | checknum cr1, CARG3 | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt | bge ->vmeta_arith_vv | blex pow | ins_next1 + |.if FPU | stfdx FARG1, BASE, RA + |.else + | stwux CARG1, RA, BASE + | stw CARG2, 4(RA) + |.endif | ins_next2 break; @@ -3619,8 +4166,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lp BASE, L->base | bne ->vmeta_binop | ins_next1 + |.if FPU | lfdx f0, BASE, SAVE0 // Copy result from RB to RA. | stfdx f0, BASE, RA + |.else + | lwzux TMP0, SAVE0, BASE + | lwz TMP1, 4(SAVE0) + | stwux TMP0, RA, BASE + | stw TMP1, 4(RA) + |.endif | ins_next2 break; @@ -3683,8 +4237,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_KNUM: | // RA = dst*8, RD = num_const*8 | ins_next1 + |.if FPU | lfdx f0, KBASE, RD | stfdx f0, BASE, RA + |.else + | lwzux TMP0, RD, KBASE + | lwz TMP1, 4(RD) + | stwux TMP0, RA, BASE + | stw TMP1, 4(RA) + |.endif | ins_next2 break; case BC_KPRI: @@ -3717,8 +4278,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwzx UPVAL:RB, LFUNC:RB, RD | ins_next1 | lwz TMP1, UPVAL:RB->v + |.if FPU | lfd f0, 0(TMP1) | stfdx f0, BASE, RA + |.else + | lwz TMP2, 0(TMP1) + | lwz TMP3, 4(TMP1) + | stwux TMP2, RA, BASE + | stw TMP3, 4(RA) + |.endif | ins_next2 break; case BC_USETV: @@ -3726,14 +4294,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz LFUNC:RB, FRAME_FUNC(BASE) | srwi RA, RA, 1 | addi RA, RA, offsetof(GCfuncL, uvptr) + |.if FPU | lfdux f0, RD, BASE + |.else + | lwzux CARG1, RD, BASE + | lwz CARG3, 4(RD) + |.endif | lwzx UPVAL:RB, LFUNC:RB, RA | lbz TMP3, UPVAL:RB->marked | lwz CARG2, UPVAL:RB->v | andix. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) | lbz TMP0, UPVAL:RB->closed | lwz TMP2, 0(RD) + |.if FPU | stfd f0, 0(CARG2) + |.else + | stw CARG1, 0(CARG2) + | stw CARG3, 4(CARG2) + |.endif | cmplwi cr1, TMP0, 0 | lwz TMP1, 4(RD) | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq @@ -3789,11 +4367,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz LFUNC:RB, FRAME_FUNC(BASE) | srwi RA, RA, 1 | addi RA, RA, offsetof(GCfuncL, uvptr) + |.if FPU | lfdx f0, KBASE, RD + |.else + | lwzux TMP2, RD, KBASE + | lwz TMP3, 4(RD) + |.endif | lwzx UPVAL:RB, LFUNC:RB, RA | ins_next1 | lwz TMP1, UPVAL:RB->v + |.if FPU | stfd f0, 0(TMP1) + |.else + | stw TMP2, 0(TMP1) + | stw TMP3, 4(TMP1) + |.endif | ins_next2 break; case BC_USETP: @@ -3941,11 +4529,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif | ble ->vmeta_tgetv // Integer key and in array part? | lwzx TMP0, TMP1, TMP2 + |.if FPU | lfdx f14, TMP1, TMP2 + |.else + | lwzux SAVE0, TMP1, TMP2 + | lwz SAVE1, 4(TMP1) + |.endif | checknil TMP0; beq >2 |1: | ins_next1 + |.if FPU | stfdx f14, BASE, RA + |.else + | stwux SAVE0, RA, BASE + | stw SAVE1, 4(RA) + |.endif | ins_next2 | |2: // Check for __index if table value is nil. @@ -3976,9 +4574,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TGETS_Z: | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 | lwz TMP0, TAB:RB->hmask - | lwz TMP1, STR:RC->hash + | lwz TMP1, STR:RC->sid | lwz NODE:TMP2, TAB:RB->node - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask | slwi TMP0, TMP1, 5 | slwi TMP1, TMP1, 3 | sub TMP1, TMP0, TMP1 @@ -4021,12 +4619,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz TMP1, TAB:RB->asize | lwz TMP2, TAB:RB->array | cmplw TMP0, TMP1; bge ->vmeta_tgetb + |.if FPU | lwzx TMP1, TMP2, RC | lfdx f0, TMP2, RC + |.else + | lwzux TMP1, TMP2, RC + | lwz TMP3, 4(TMP2) + |.endif | checknil TMP1; beq >5 |1: | ins_next1 + |.if FPU | stfdx f0, BASE, RA + |.else + | stwux TMP1, RA, BASE + | stw TMP3, 4(RA) + |.endif | ins_next2 | |5: // Check for __index if table value is nil. @@ -4038,6 +4646,40 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bne <1 // 'no __index' flag set: done. | b ->vmeta_tgetb // Caveat: preserve TMP0! break; + case BC_TGETR: + | // RA = dst*8, RB = table*8, RC = key*8 + | add RB, BASE, RB + | lwz TAB:CARG1, 4(RB) + |.if DUALNUM + | add RC, BASE, RC + | lwz TMP0, TAB:CARG1->asize + | lwz CARG2, 4(RC) + | lwz TMP1, TAB:CARG1->array + |.else + | lfdx f0, BASE, RC + | lwz TMP0, TAB:CARG1->asize + | toint CARG2, f0 + | lwz TMP1, TAB:CARG1->array + |.endif + | cmplw TMP0, CARG2 + | slwi TMP2, CARG2, 3 + | ble ->vmeta_tgetr // In array part? + |.if FPU + | lfdx f14, TMP1, TMP2 + |.else + | lwzux SAVE0, TMP2, TMP1 + | lwz SAVE1, 4(TMP2) + |.endif + |->BC_TGETR_Z: + | ins_next1 + |.if FPU + | stfdx f14, BASE, RA + |.else + | stwux SAVE0, RA, BASE + | stw SAVE1, 4(RA) + |.endif + | ins_next2 + break; case BC_TSETV: | // RA = src*8, RB = table*8, RC = key*8 @@ -4076,11 +4718,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ble ->vmeta_tsetv // Integer key and in array part? | lwzx TMP2, TMP1, TMP0 | lbz TMP3, TAB:RB->marked + |.if FPU | lfdx f14, BASE, RA + |.else + | add SAVE1, BASE, RA + | lwz SAVE0, 0(SAVE1) + | lwz SAVE1, 4(SAVE1) + |.endif | checknil TMP2; beq >3 |1: | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) + |.if FPU | stfdx f14, TMP1, TMP0 + |.else + | stwux SAVE0, TMP1, TMP0 + | stw SAVE1, 4(TMP1) + |.endif | bne >7 |2: | ins_next @@ -4117,11 +4770,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |->BC_TSETS_Z: | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8 | lwz TMP0, TAB:RB->hmask - | lwz TMP1, STR:RC->hash + | lwz TMP1, STR:RC->sid | lwz NODE:TMP2, TAB:RB->node | stb ZERO, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask + | and TMP1, TMP1, TMP0 // idx = str->sid & tab->hmask + |.if FPU | lfdx f14, BASE, RA + |.else + | add CARG2, BASE, RA + | lwz SAVE0, 0(CARG2) + | lwz SAVE1, 4(CARG2) + |.endif | slwi TMP0, TMP1, 5 | slwi TMP1, TMP1, 3 | sub TMP1, TMP0, TMP1 @@ -4137,7 +4796,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | checknil CARG2; beq >4 // Key found, but nil value? |2: | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) + |.if FPU | stfd f14, NODE:TMP2->val + |.else + | stw SAVE0, NODE:TMP2->val.u32.hi + | stw SAVE1, NODE:TMP2->val.u32.lo + |.endif | bne >7 |3: | ins_next @@ -4176,7 +4840,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) | // Returns TValue *. | lp BASE, L->base + |.if FPU | stfd f14, 0(CRET1) + |.else + | stw SAVE0, 0(CRET1) + | stw SAVE1, 4(CRET1) + |.endif | b <3 // No 2nd write barrier needed. | |7: // Possible table write barrier for the value. Skip valiswhite check. @@ -4193,13 +4862,24 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lwz TMP2, TAB:RB->array | lbz TMP3, TAB:RB->marked | cmplw TMP0, TMP1 + |.if FPU | lfdx f14, BASE, RA + |.else + | add CARG2, BASE, RA + | lwz SAVE0, 0(CARG2) + | lwz SAVE1, 4(CARG2) + |.endif | bge ->vmeta_tsetb | lwzx TMP1, TMP2, RC | checknil TMP1; beq >5 |1: | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) + |.if FPU | stfdx f14, TMP2, RC + |.else + | stwux SAVE0, RC, TMP2 + | stw SAVE1, 4(RC) + |.endif | bne >7 |2: | ins_next @@ -4217,6 +4897,49 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | barrierback TAB:RB, TMP3, TMP0 | b <2 break; + case BC_TSETR: + | // RA = dst*8, RB = table*8, RC = key*8 + | add RB, BASE, RB + | lwz TAB:CARG2, 4(RB) + |.if DUALNUM + | add RC, BASE, RC + | lbz TMP3, TAB:CARG2->marked + | lwz TMP0, TAB:CARG2->asize + | lwz CARG3, 4(RC) + | lwz TMP1, TAB:CARG2->array + |.else + | lfdx f0, BASE, RC + | lbz TMP3, TAB:CARG2->marked + | lwz TMP0, TAB:CARG2->asize + | toint CARG3, f0 + | lwz TMP1, TAB:CARG2->array + |.endif + | andix. TMP2, TMP3, LJ_GC_BLACK // isblack(table) + | bne >7 + |2: + | cmplw TMP0, CARG3 + | slwi TMP2, CARG3, 3 + |.if FPU + | lfdx f14, BASE, RA + |.else + | lwzux SAVE0, RA, BASE + | lwz SAVE1, 4(RA) + |.endif + | ble ->vmeta_tsetr // In array part? + | ins_next1 + |.if FPU + | stfdx f14, TMP1, TMP2 + |.else + | stwux SAVE0, TMP1, TMP2 + | stw SAVE1, 4(TMP1) + |.endif + | ins_next2 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:CARG2, TMP3, TMP2 + | b <2 + break; + case BC_TSETM: | // RA = base*8 (table at base-1), RD = num_const*8 (start index) @@ -4239,10 +4962,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add TMP1, TMP1, TMP0 | andix. TMP0, TMP3, LJ_GC_BLACK // isblack(table) |3: // Copy result slots to table. + |.if FPU | lfd f0, 0(RA) + |.else + | lwz SAVE0, 0(RA) + | lwz SAVE1, 4(RA) + |.endif | addi RA, RA, 8 | cmpw cr1, RA, TMP2 + |.if FPU | stfd f0, 0(TMP1) + |.else + | stw SAVE0, 0(TMP1) + | stw SAVE1, 4(TMP1) + |.endif | addi TMP1, TMP1, 8 | blt cr1, <3 | bne >7 @@ -4309,9 +5042,20 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | beq cr1, >3 |2: | addi TMP3, TMP2, 8 + |.if FPU | lfdx f0, RA, TMP2 + |.else + | add CARG3, RA, TMP2 + | lwz CARG1, 0(CARG3) + | lwz CARG2, 4(CARG3) + |.endif | cmplw cr1, TMP3, NARGS8:RC + |.if FPU | stfdx f0, BASE, TMP2 + |.else + | stwux CARG1, TMP2, BASE + | stw CARG2, 4(TMP2) + |.endif | mr TMP2, TMP3 | bne cr1, <2 |3: @@ -4344,14 +5088,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | add BASE, BASE, RA | lwz TMP1, -24(BASE) | lwz LFUNC:RB, -20(BASE) + |.if FPU | lfd f1, -8(BASE) | lfd f0, -16(BASE) + |.else + | lwz CARG1, -8(BASE) + | lwz CARG2, -4(BASE) + | lwz CARG3, -16(BASE) + | lwz CARG4, -12(BASE) + |.endif | stw TMP1, 0(BASE) // Copy callable. | stw LFUNC:RB, 4(BASE) | checkfunc TMP1 - | stfd f1, 16(BASE) // Copy control var. | li NARGS8:RC, 16 // Iterators get 2 arguments. + |.if FPU + | stfd f1, 16(BASE) // Copy control var. | stfdu f0, 8(BASE) // Copy state. + |.else + | stw CARG1, 16(BASE) // Copy control var. + | stw CARG2, 20(BASE) + | stwu CARG3, 8(BASE) // Copy state. + | stw CARG4, 4(BASE) + |.endif | bne ->vmeta_call | ins_call break; @@ -4359,8 +5117,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ITERN: | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) |.if JIT - | // NYI: add hotloop, record BC_ITERN. + | // NYI on big-endian |.endif + |->vm_IITERN: | add RA, BASE, RA | lwz TAB:RB, -12(RA) | lwz RC, -4(RA) // Get index from control var. @@ -4372,7 +5131,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | slwi TMP3, RC, 3 | bge >5 // Index points after array part? | lwzx TMP2, TMP1, TMP3 + |.if FPU | lfdx f0, TMP1, TMP3 + |.else + | lwzux CARG1, TMP3, TMP1 + | lwz CARG2, 4(TMP3) + |.endif | checknil TMP2 | lwz INS, -4(PC) | beq >4 @@ -4384,7 +5148,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif | addi RC, RC, 1 | addis TMP3, PC, -(BCBIAS_J*4 >> 16) + |.if FPU | stfd f0, 8(RA) + |.else + | stw CARG1, 8(RA) + | stw CARG2, 12(RA) + |.endif | decode_RD4 TMP1, INS | stw RC, -4(RA) // Update control var. | add PC, TMP1, TMP3 @@ -4409,17 +5178,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | slwi RB, RC, 3 | sub TMP3, TMP3, RB | lwzx RB, TMP2, TMP3 + |.if FPU | lfdx f0, TMP2, TMP3 + |.else + | add CARG3, TMP2, TMP3 + | lwz CARG1, 0(CARG3) + | lwz CARG2, 4(CARG3) + |.endif | add NODE:TMP3, TMP2, TMP3 | checknil RB | lwz INS, -4(PC) | beq >7 + |.if FPU | lfd f1, NODE:TMP3->key + |.else + | lwz CARG3, NODE:TMP3->key.u32.hi + | lwz CARG4, NODE:TMP3->key.u32.lo + |.endif | addis TMP2, PC, -(BCBIAS_J*4 >> 16) + |.if FPU | stfd f0, 8(RA) + |.else + | stw CARG1, 8(RA) + | stw CARG2, 12(RA) + |.endif | add RC, RC, TMP0 | decode_RD4 TMP1, INS + |.if FPU | stfd f1, 0(RA) + |.else + | stw CARG3, 0(RA) + | stw CARG4, 4(RA) + |.endif | addi RC, RC, 1 | add PC, TMP1, TMP2 | stw RC, -4(RA) // Update control var. @@ -4448,8 +5238,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq | add TMP3, PC, TMP0 | bne cr0, >5 - | lus TMP1, 0xfffe - | ori TMP1, TMP1, 0x7fff + | lus TMP1, (LJ_KEYINDEX >> 16) + | ori TMP1, TMP1, (LJ_KEYINDEX & 0xffff) | stw ZERO, -4(RA) // Initialize control var. | stw TMP1, -8(RA) | addis PC, TMP3, -(BCBIAS_J*4 >> 16) @@ -4460,6 +5250,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li TMP1, BC_ITERC | stb TMP0, -1(PC) | addis PC, TMP3, -(BCBIAS_J*4 >> 16) + | // NYI on big-endian: unpatch JLOOP. | stb TMP1, 3(PC) | b <1 break; @@ -4485,9 +5276,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | subi TMP2, TMP2, 16 | ble >2 // No vararg slots? |1: // Copy vararg slots to destination slots. + |.if FPU | lfd f0, 0(RC) + |.else + | lwz CARG1, 0(RC) + | lwz CARG2, 4(RC) + |.endif | addi RC, RC, 8 + |.if FPU | stfd f0, 0(RA) + |.else + | stw CARG1, 0(RA) + | stw CARG2, 4(RA) + |.endif | cmplw RA, TMP2 | cmplw cr1, RC, TMP3 | bge >3 // All destination slots filled? @@ -4510,9 +5311,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | addi MULTRES, TMP1, 8 | bgt >7 |6: + |.if FPU | lfd f0, 0(RC) + |.else + | lwz CARG1, 0(RC) + | lwz CARG2, 4(RC) + |.endif | addi RC, RC, 8 + |.if FPU | stfd f0, 0(RA) + |.else + | stw CARG1, 0(RA) + | stw CARG2, 4(RA) + |.endif | cmplw RC, TMP3 | addi RA, RA, 8 | blt <6 // More vararg slots? @@ -4563,14 +5374,38 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li TMP1, 0 |2: | addi TMP3, TMP1, 8 + |.if FPU | lfdx f0, RA, TMP1 + |.else + | add CARG3, RA, TMP1 + | lwz CARG1, 0(CARG3) + | lwz CARG2, 4(CARG3) + |.endif | cmpw TMP3, RC + |.if FPU | stfdx f0, TMP2, TMP1 + |.else + | add CARG3, TMP2, TMP1 + | stw CARG1, 0(CARG3) + | stw CARG2, 4(CARG3) + |.endif | beq >3 | addi TMP1, TMP3, 8 + |.if FPU | lfdx f1, RA, TMP3 + |.else + | add CARG3, RA, TMP3 + | lwz CARG1, 0(CARG3) + | lwz CARG2, 4(CARG3) + |.endif | cmpw TMP1, RC + |.if FPU | stfdx f1, TMP2, TMP3 + |.else + | add CARG3, TMP2, TMP3 + | stw CARG1, 0(CARG3) + | stw CARG2, 4(CARG3) + |.endif | bne <2 |3: |5: @@ -4612,8 +5447,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | subi TMP2, BASE, 8 | decode_RB8 RB, INS if (op == BC_RET1) { + |.if FPU | lfd f0, 0(RA) | stfd f0, 0(TMP2) + |.else + | lwz CARG1, 0(RA) + | lwz CARG2, 4(RA) + | stw CARG1, 0(TMP2) + | stw CARG2, 4(TMP2) + |.endif } |5: | cmplw RB, RD @@ -4674,11 +5516,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |4: | stw CARG1, FORL_IDX*8+4(RA) } else { - | lwz TMP3, FORL_STEP*8(RA) + | lwz SAVE0, FORL_STEP*8(RA) | lwz CARG3, FORL_STEP*8+4(RA) | lwz TMP2, FORL_STOP*8(RA) | lwz CARG2, FORL_STOP*8+4(RA) - | cmplw cr7, TMP3, TISNUM + | cmplw cr7, SAVE0, TISNUM | cmplw cr1, TMP2, TISNUM | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq | crand 4*cr0+eq, 4*cr0+eq, 4*cr1+eq @@ -4721,41 +5563,80 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) if (vk) { |.if DUALNUM |9: // FP loop. + |.if FPU | lfd f1, FORL_IDX*8(RA) |.else + | lwz CARG1, FORL_IDX*8(RA) + | lwz CARG2, FORL_IDX*8+4(RA) + |.endif + |.else | lfdux f1, RA, BASE |.endif + |.if FPU | lfd f3, FORL_STEP*8(RA) | lfd f2, FORL_STOP*8(RA) - | lwz TMP3, FORL_STEP*8(RA) | fadd f1, f1, f3 | stfd f1, FORL_IDX*8(RA) + |.else + | lwz CARG3, FORL_STEP*8(RA) + | lwz CARG4, FORL_STEP*8+4(RA) + | mr SAVE1, RD + | blex __adddf3 + | mr RD, SAVE1 + | stw CRET1, FORL_IDX*8(RA) + | stw CRET2, FORL_IDX*8+4(RA) + | lwz CARG3, FORL_STOP*8(RA) + | lwz CARG4, FORL_STOP*8+4(RA) + |.endif + | lwz SAVE0, FORL_STEP*8(RA) } else { |.if DUALNUM |9: // FP loop. |.else | lwzux TMP1, RA, BASE - | lwz TMP3, FORL_STEP*8(RA) + | lwz SAVE0, FORL_STEP*8(RA) | lwz TMP2, FORL_STOP*8(RA) | cmplw cr0, TMP1, TISNUM - | cmplw cr7, TMP3, TISNUM + | cmplw cr7, SAVE0, TISNUM | cmplw cr1, TMP2, TISNUM |.endif + |.if FPU | lfd f1, FORL_IDX*8(RA) + |.else + | lwz CARG1, FORL_IDX*8(RA) + | lwz CARG2, FORL_IDX*8+4(RA) + |.endif | crand 4*cr0+lt, 4*cr0+lt, 4*cr7+lt | crand 4*cr0+lt, 4*cr0+lt, 4*cr1+lt + |.if FPU | lfd f2, FORL_STOP*8(RA) + |.else + | lwz CARG3, FORL_STOP*8(RA) + | lwz CARG4, FORL_STOP*8+4(RA) + |.endif | bge ->vmeta_for } - | cmpwi cr6, TMP3, 0 + | cmpwi cr6, SAVE0, 0 if (op != BC_JFORL) { | srwi RD, RD, 1 } + |.if FPU | stfd f1, FORL_EXT*8(RA) + |.else + | stw CARG1, FORL_EXT*8(RA) + | stw CARG2, FORL_EXT*8+4(RA) + |.endif if (op != BC_JFORL) { | add RD, PC, RD } + |.if FPU | fcmpu cr0, f1, f2 + |.else + | mr SAVE1, RD + | blex __ledf2 + | cmpwi CRET1, 0 + | mr RD, SAVE1 + |.endif if (op == BC_JFORI) { | addis PC, RD, -(BCBIAS_J*4 >> 16) } @@ -4858,8 +5739,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lp TMP2, TRACE:TMP2->mcode | stw BASE, DISPATCH_GL(jit_base)(DISPATCH) | mtctr TMP2 - | stw L, DISPATCH_GL(jit_L)(DISPATCH) | addi JGL, DISPATCH, GG_DISP2G+32768 + | stw L, DISPATCH_GL(tmpbuf.L)(DISPATCH) | bctr |.endif break; @@ -4994,6 +5875,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | lp TMP1, L->top | li_vmstate INTERP | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. + | stw L, DISPATCH_GL(cur_L)(DISPATCH) | sub RA, TMP1, RD // RA = L->top - nresults*8 | st_vmstate | b ->vm_returnc diff --git a/src/vm_ppcspe.dasc b/src/vm_ppcspe.dasc deleted file mode 100644 index 1d8f70f0..00000000 --- a/src/vm_ppcspe.dasc +++ /dev/null @@ -1,3691 +0,0 @@ -|// Low-level VM code for PowerPC/e500 CPUs. -|// Bytecode interpreter, fast functions and helper functions. -|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h -| -|.arch ppc -|.section code_op, code_sub -| -|.actionlist build_actionlist -|.globals GLOB_ -|.globalnames globnames -|.externnames extnames -| -|// Note: The ragged indentation of the instructions is intentional. -|// The starting columns indicate data dependencies. -| -|//----------------------------------------------------------------------- -| -|// Fixed register assignments for the interpreter. -|// Don't use: r1 = sp, r2 and r13 = reserved and/or small data area ptr -| -|// The following must be C callee-save (but BASE is often refetched). -|.define BASE, r14 // Base of current Lua stack frame. -|.define KBASE, r15 // Constants of current Lua function. -|.define PC, r16 // Next PC. -|.define DISPATCH, r17 // Opcode dispatch table. -|.define LREG, r18 // Register holding lua_State (also in SAVE_L). -|.define MULTRES, r19 // Size of multi-result: (nresults+1)*8. -| -|// Constants for vectorized type-comparisons (hi+low GPR). C callee-save. -|.define TISNUM, r22 -|.define TISSTR, r23 -|.define TISTAB, r24 -|.define TISFUNC, r25 -|.define TISNIL, r26 -|.define TOBIT, r27 -|.define ZERO, TOBIT // Zero in lo word. -| -|// The following temporaries are not saved across C calls, except for RA. -|.define RA, r20 // Callee-save. -|.define RB, r10 -|.define RC, r11 -|.define RD, r12 -|.define INS, r7 // Overlaps CARG5. -| -|.define TMP0, r0 -|.define TMP1, r8 -|.define TMP2, r9 -|.define TMP3, r6 // Overlaps CARG4. -| -|// Saved temporaries. -|.define SAVE0, r21 -| -|// Calling conventions. -|.define CARG1, r3 -|.define CARG2, r4 -|.define CARG3, r5 -|.define CARG4, r6 // Overlaps TMP3. -|.define CARG5, r7 // Overlaps INS. -| -|.define CRET1, r3 -|.define CRET2, r4 -| -|// Stack layout while in interpreter. Must match with lj_frame.h. -|.define SAVE_LR, 188(sp) -|.define CFRAME_SPACE, 184 // Delta for sp. -|// Back chain for sp: 184(sp) <-- sp entering interpreter -|.define SAVE_r31, 176(sp) // 64 bit register saves. -|.define SAVE_r30, 168(sp) -|.define SAVE_r29, 160(sp) -|.define SAVE_r28, 152(sp) -|.define SAVE_r27, 144(sp) -|.define SAVE_r26, 136(sp) -|.define SAVE_r25, 128(sp) -|.define SAVE_r24, 120(sp) -|.define SAVE_r23, 112(sp) -|.define SAVE_r22, 104(sp) -|.define SAVE_r21, 96(sp) -|.define SAVE_r20, 88(sp) -|.define SAVE_r19, 80(sp) -|.define SAVE_r18, 72(sp) -|.define SAVE_r17, 64(sp) -|.define SAVE_r16, 56(sp) -|.define SAVE_r15, 48(sp) -|.define SAVE_r14, 40(sp) -|.define SAVE_CR, 36(sp) -|.define UNUSED1, 32(sp) -|.define SAVE_ERRF, 28(sp) // 32 bit C frame info. -|.define SAVE_NRES, 24(sp) -|.define SAVE_CFRAME, 20(sp) -|.define SAVE_L, 16(sp) -|.define SAVE_PC, 12(sp) -|.define SAVE_MULTRES, 8(sp) -|// Next frame lr: 4(sp) -|// Back chain for sp: 0(sp) <-- sp while in interpreter -| -|.macro save_, reg; evstdd reg, SAVE_..reg; .endmacro -|.macro rest_, reg; evldd reg, SAVE_..reg; .endmacro -| -|.macro saveregs -| stwu sp, -CFRAME_SPACE(sp) -| save_ r14; save_ r15; save_ r16; save_ r17; save_ r18; save_ r19 -| mflr r0; mfcr r12 -| save_ r20; save_ r21; save_ r22; save_ r23; save_ r24; save_ r25 -| stw r0, SAVE_LR; stw r12, SAVE_CR -| save_ r26; save_ r27; save_ r28; save_ r29; save_ r30; save_ r31 -|.endmacro -| -|.macro restoreregs -| lwz r0, SAVE_LR; lwz r12, SAVE_CR -| rest_ r14; rest_ r15; rest_ r16; rest_ r17; rest_ r18; rest_ r19 -| mtlr r0; mtcrf 0x38, r12 -| rest_ r20; rest_ r21; rest_ r22; rest_ r23; rest_ r24; rest_ r25 -| rest_ r26; rest_ r27; rest_ r28; rest_ r29; rest_ r30; rest_ r31 -| addi sp, sp, CFRAME_SPACE -|.endmacro -| -|// Type definitions. Some of these are only used for documentation. -|.type L, lua_State, LREG -|.type GL, global_State -|.type TVALUE, TValue -|.type GCOBJ, GCobj -|.type STR, GCstr -|.type TAB, GCtab -|.type LFUNC, GCfuncL -|.type CFUNC, GCfuncC -|.type PROTO, GCproto -|.type UPVAL, GCupval -|.type NODE, Node -|.type NARGS8, int -|.type TRACE, GCtrace -| -|//----------------------------------------------------------------------- -| -|// These basic macros should really be part of DynASM. -|.macro srwi, rx, ry, n; rlwinm rx, ry, 32-n, n, 31; .endmacro -|.macro slwi, rx, ry, n; rlwinm rx, ry, n, 0, 31-n; .endmacro -|.macro rotlwi, rx, ry, n; rlwinm rx, ry, n, 0, 31; .endmacro -|.macro rotlw, rx, ry, rn; rlwnm rx, ry, rn, 0, 31; .endmacro -|.macro subi, rx, ry, i; addi rx, ry, -i; .endmacro -| -|// Trap for not-yet-implemented parts. -|.macro NYI; tw 4, sp, sp; .endmacro -| -|//----------------------------------------------------------------------- -| -|// Access to frame relative to BASE. -|.define FRAME_PC, -8 -|.define FRAME_FUNC, -4 -| -|// Instruction decode. -|.macro decode_OP4, dst, ins; rlwinm dst, ins, 2, 22, 29; .endmacro -|.macro decode_RA8, dst, ins; rlwinm dst, ins, 27, 21, 28; .endmacro -|.macro decode_RB8, dst, ins; rlwinm dst, ins, 11, 21, 28; .endmacro -|.macro decode_RC8, dst, ins; rlwinm dst, ins, 19, 21, 28; .endmacro -|.macro decode_RD8, dst, ins; rlwinm dst, ins, 19, 13, 28; .endmacro -| -|.macro decode_OP1, dst, ins; rlwinm dst, ins, 0, 24, 31; .endmacro -|.macro decode_RD4, dst, ins; rlwinm dst, ins, 18, 14, 29; .endmacro -| -|// Instruction fetch. -|.macro ins_NEXT1 -| lwz INS, 0(PC) -| addi PC, PC, 4 -|.endmacro -|// Instruction decode+dispatch. -|.macro ins_NEXT2 -| decode_OP4 TMP1, INS -| decode_RB8 RB, INS -| decode_RD8 RD, INS -| lwzx TMP0, DISPATCH, TMP1 -| decode_RA8 RA, INS -| decode_RC8 RC, INS -| mtctr TMP0 -| bctr -|.endmacro -|.macro ins_NEXT -| ins_NEXT1 -| ins_NEXT2 -|.endmacro -| -|// Instruction footer. -|.if 1 -| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. -| .define ins_next, ins_NEXT -| .define ins_next_, ins_NEXT -| .define ins_next1, ins_NEXT1 -| .define ins_next2, ins_NEXT2 -|.else -| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. -| // Affects only certain kinds of benchmarks (and only with -j off). -| .macro ins_next -| b ->ins_next -| .endmacro -| .macro ins_next1 -| .endmacro -| .macro ins_next2 -| b ->ins_next -| .endmacro -| .macro ins_next_ -| ->ins_next: -| ins_NEXT -| .endmacro -|.endif -| -|// Call decode and dispatch. -|.macro ins_callt -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC -| lwz PC, LFUNC:RB->pc -| lwz INS, 0(PC) -| addi PC, PC, 4 -| decode_OP4 TMP1, INS -| decode_RA8 RA, INS -| lwzx TMP0, DISPATCH, TMP1 -| add RA, RA, BASE -| mtctr TMP0 -| bctr -|.endmacro -| -|.macro ins_call -| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC -| stw PC, FRAME_PC(BASE) -| ins_callt -|.endmacro -| -|//----------------------------------------------------------------------- -| -|// Macros to test operand types. -|.macro checknum, reg; evcmpltu reg, TISNUM; .endmacro -|.macro checkstr, reg; evcmpeq reg, TISSTR; .endmacro -|.macro checktab, reg; evcmpeq reg, TISTAB; .endmacro -|.macro checkfunc, reg; evcmpeq reg, TISFUNC; .endmacro -|.macro checknil, reg; evcmpeq reg, TISNIL; .endmacro -|.macro checkok, label; blt label; .endmacro -|.macro checkfail, label; bge label; .endmacro -|.macro checkanyfail, label; bns label; .endmacro -|.macro checkallok, label; bso label; .endmacro -| -|.macro branch_RD -| srwi TMP0, RD, 1 -| add PC, PC, TMP0 -| addis PC, PC, -(BCBIAS_J*4 >> 16) -|.endmacro -| -|// Assumes DISPATCH is relative to GL. -#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) -#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) -| -#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) -| -|.macro hotloop -| NYI -|.endmacro -| -|.macro hotcall -| NYI -|.endmacro -| -|// Set current VM state. Uses TMP0. -|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro -|.macro st_vmstate; stw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro -| -|// Move table write barrier back. Overwrites mark and tmp. -|.macro barrierback, tab, mark, tmp -| lwz tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) -| // Assumes LJ_GC_BLACK is 0x04. -| rlwinm mark, mark, 0, 30, 28 // black2gray(tab) -| stw tab, DISPATCH_GL(gc.grayagain)(DISPATCH) -| stb mark, tab->marked -| stw tmp, tab->gclist -|.endmacro -| -|//----------------------------------------------------------------------- - -/* Generate subroutines used by opcodes and other parts of the VM. */ -/* The .code_sub section should be last to help static branch prediction. */ -static void build_subroutines(BuildCtx *ctx) -{ - |.code_sub - | - |//----------------------------------------------------------------------- - |//-- Return handling ---------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_returnp: - | // See vm_return. Also: TMP2 = previous base. - | andi. TMP0, PC, FRAME_P - | evsplati TMP1, LJ_TTRUE - | beq ->cont_dispatch - | - | // Return from pcall or xpcall fast func. - | lwz PC, FRAME_PC(TMP2) // Fetch PC of previous frame. - | mr BASE, TMP2 // Restore caller base. - | // Prepending may overwrite the pcall frame, so do it at the end. - | stwu TMP1, FRAME_PC(RA) // Prepend true to results. - | - |->vm_returnc: - | addi RD, RD, 8 // RD = (nresults+1)*8. - | andi. TMP0, PC, FRAME_TYPE - | cmpwi cr1, RD, 0 - | li CRET1, LUA_YIELD - | beq cr1, ->vm_unwind_c_eh - | mr MULTRES, RD - | beq ->BC_RET_Z // Handle regular return to Lua. - | - |->vm_return: - | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return - | // TMP0 = PC & FRAME_TYPE - | cmpwi TMP0, FRAME_C - | rlwinm TMP2, PC, 0, 0, 28 - | li_vmstate C - | sub TMP2, BASE, TMP2 // TMP2 = previous base. - | bne ->vm_returnp - | - | addic. TMP1, RD, -8 - | stw TMP2, L->base - | lwz TMP2, SAVE_NRES - | subi BASE, BASE, 8 - | st_vmstate - | slwi TMP2, TMP2, 3 - | beq >2 - |1: - | addic. TMP1, TMP1, -8 - | evldd TMP0, 0(RA) - | addi RA, RA, 8 - | evstdd TMP0, 0(BASE) - | addi BASE, BASE, 8 - | bne <1 - | - |2: - | cmpw TMP2, RD // More/less results wanted? - | bne >6 - |3: - | stw BASE, L->top // Store new top. - | - |->vm_leave_cp: - | lwz TMP0, SAVE_CFRAME // Restore previous C frame. - | li CRET1, 0 // Ok return status for vm_pcall. - | stw TMP0, L->cframe - | - |->vm_leave_unw: - | restoreregs - | blr - | - |6: - | ble >7 // Less results wanted? - | // More results wanted. Check stack size and fill up results with nil. - | lwz TMP1, L->maxstack - | cmplw BASE, TMP1 - | bge >8 - | evstdd TISNIL, 0(BASE) - | addi RD, RD, 8 - | addi BASE, BASE, 8 - | b <2 - | - |7: // Less results wanted. - | sub TMP0, RD, TMP2 - | cmpwi TMP2, 0 // LUA_MULTRET+1 case? - | sub TMP0, BASE, TMP0 // Subtract the difference. - | iseleq BASE, BASE, TMP0 // Either keep top or shrink it. - | b <3 - | - |8: // Corner case: need to grow stack for filling up results. - | // This can happen if: - | // - A C function grows the stack (a lot). - | // - The GC shrinks the stack in between. - | // - A return back from a lua_call() with (high) nresults adjustment. - | stw BASE, L->top // Save current top held in BASE (yes). - | mr SAVE0, RD - | mr CARG2, TMP2 - | mr CARG1, L - | bl extern lj_state_growstack // (lua_State *L, int n) - | lwz TMP2, SAVE_NRES - | mr RD, SAVE0 - | slwi TMP2, TMP2, 3 - | lwz BASE, L->top // Need the (realloced) L->top in BASE. - | b <2 - | - |->vm_unwind_c: // Unwind C stack, return from vm_pcall. - | // (void *cframe, int errcode) - | mr sp, CARG1 - | mr CRET1, CARG2 - |->vm_unwind_c_eh: // Landing pad for external unwinder. - | lwz L, SAVE_L - | li TMP0, ~LJ_VMST_C - | lwz GL:TMP1, L->glref - | stw TMP0, GL:TMP1->vmstate - | b ->vm_leave_unw - | - |->vm_unwind_ff: // Unwind C stack, return from ff pcall. - | // (void *cframe) - | rlwinm sp, CARG1, 0, 0, 29 - |->vm_unwind_ff_eh: // Landing pad for external unwinder. - | lwz L, SAVE_L - | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants. - | evsplati TISFUNC, LJ_TFUNC - | lus TOBIT, 0x4338 - | evsplati TISTAB, LJ_TTAB - | li TMP0, 0 - | lwz BASE, L->base - | evmergelo TOBIT, TOBIT, TMP0 - | lwz DISPATCH, L->glref // Setup pointer to dispatch table. - | evsplati TISSTR, LJ_TSTR - | li TMP1, LJ_TFALSE - | evsplati TISNIL, LJ_TNIL - | li_vmstate INTERP - | lwz PC, FRAME_PC(BASE) // Fetch PC of previous frame. - | la RA, -8(BASE) // Results start at BASE-8. - | addi DISPATCH, DISPATCH, GG_G2DISP - | stw TMP1, 0(RA) // Prepend false to error message. - | li RD, 16 // 2 results: false + error message. - | st_vmstate - | b ->vm_returnc - | - |//----------------------------------------------------------------------- - |//-- Grow stack for calls ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_growstack_c: // Grow stack for C function. - | li CARG2, LUA_MINSTACK - | b >2 - | - |->vm_growstack_l: // Grow stack for Lua function. - | // BASE = new base, RA = BASE+framesize*8, RC = nargs*8, PC = first PC - | add RC, BASE, RC - | sub RA, RA, BASE - | stw BASE, L->base - | addi PC, PC, 4 // Must point after first instruction. - | stw RC, L->top - | srwi CARG2, RA, 3 - |2: - | // L->base = new base, L->top = top - | stw PC, SAVE_PC - | mr CARG1, L - | bl extern lj_state_growstack // (lua_State *L, int n) - | lwz BASE, L->base - | lwz RC, L->top - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | sub RC, RC, BASE - | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC - | ins_callt // Just retry the call. - | - |//----------------------------------------------------------------------- - |//-- Entry points into the assembler VM --------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_resume: // Setup C frame and resume thread. - | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) - | saveregs - | mr L, CARG1 - | lwz DISPATCH, L->glref // Setup pointer to dispatch table. - | mr BASE, CARG2 - | lbz TMP1, L->status - | stw L, SAVE_L - | li PC, FRAME_CP - | addi TMP0, sp, CFRAME_RESUME - | addi DISPATCH, DISPATCH, GG_G2DISP - | stw CARG3, SAVE_NRES - | cmplwi TMP1, 0 - | stw CARG3, SAVE_ERRF - | stw TMP0, L->cframe - | stw CARG3, SAVE_CFRAME - | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | beq >3 - | - | // Resume after yield (like a return). - | mr RA, BASE - | lwz BASE, L->base - | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants. - | lwz TMP1, L->top - | evsplati TISFUNC, LJ_TFUNC - | lus TOBIT, 0x4338 - | evsplati TISTAB, LJ_TTAB - | lwz PC, FRAME_PC(BASE) - | li TMP2, 0 - | evsplati TISSTR, LJ_TSTR - | sub RD, TMP1, BASE - | evmergelo TOBIT, TOBIT, TMP2 - | stb CARG3, L->status - | andi. TMP0, PC, FRAME_TYPE - | li_vmstate INTERP - | addi RD, RD, 8 - | evsplati TISNIL, LJ_TNIL - | mr MULTRES, RD - | st_vmstate - | beq ->BC_RET_Z - | b ->vm_return - | - |->vm_pcall: // Setup protected C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) - | saveregs - | li PC, FRAME_CP - | stw CARG4, SAVE_ERRF - | b >1 - | - |->vm_call: // Setup C frame and enter VM. - | // (lua_State *L, TValue *base, int nres1) - | saveregs - | li PC, FRAME_C - | - |1: // Entry point for vm_pcall above (PC = ftype). - | lwz TMP1, L:CARG1->cframe - | stw CARG3, SAVE_NRES - | mr L, CARG1 - | stw CARG1, SAVE_L - | mr BASE, CARG2 - | stw sp, L->cframe // Add our C frame to cframe chain. - | lwz DISPATCH, L->glref // Setup pointer to dispatch table. - | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | stw TMP1, SAVE_CFRAME - | addi DISPATCH, DISPATCH, GG_G2DISP - | - |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). - | lwz TMP2, L->base // TMP2 = old base (used in vmeta_call). - | evsplati TISNUM, LJ_TISNUM+1 // Setup type comparison constants. - | lwz TMP1, L->top - | evsplati TISFUNC, LJ_TFUNC - | add PC, PC, BASE - | evsplati TISTAB, LJ_TTAB - | lus TOBIT, 0x4338 - | li TMP0, 0 - | sub PC, PC, TMP2 // PC = frame delta + frame type - | evsplati TISSTR, LJ_TSTR - | sub NARGS8:RC, TMP1, BASE - | evmergelo TOBIT, TOBIT, TMP0 - | li_vmstate INTERP - | evsplati TISNIL, LJ_TNIL - | st_vmstate - | - |->vm_call_dispatch: - | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC - | li TMP0, -8 - | evlddx LFUNC:RB, BASE, TMP0 - | checkfunc LFUNC:RB - | checkfail ->vmeta_call - | - |->vm_call_dispatch_f: - | ins_call - | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC - | - |->vm_cpcall: // Setup protected C frame, call C. - | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) - | saveregs - | mr L, CARG1 - | lwz TMP0, L:CARG1->stack - | stw CARG1, SAVE_L - | lwz TMP1, L->top - | stw CARG1, SAVE_PC // Any value outside of bytecode is ok. - | sub TMP0, TMP0, TMP1 // Compute -savestack(L, L->top). - | lwz TMP1, L->cframe - | stw sp, L->cframe // Add our C frame to cframe chain. - | li TMP2, 0 - | stw TMP0, SAVE_NRES // Neg. delta means cframe w/o frame. - | stw TMP2, SAVE_ERRF // No error function. - | stw TMP1, SAVE_CFRAME - | mtctr CARG4 - | bctrl // (lua_State *L, lua_CFunction func, void *ud) - | mr. BASE, CRET1 - | lwz DISPATCH, L->glref // Setup pointer to dispatch table. - | li PC, FRAME_CP - | addi DISPATCH, DISPATCH, GG_G2DISP - | bne <3 // Else continue with the call. - | b ->vm_leave_cp // No base? Just remove C frame. - | - |//----------------------------------------------------------------------- - |//-- Metamethod handling ------------------------------------------------ - |//----------------------------------------------------------------------- - | - |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the - |// stack, so BASE doesn't need to be reloaded across these calls. - | - |//-- Continuation dispatch ---------------------------------------------- - | - |->cont_dispatch: - | // BASE = meta base, RA = resultptr, RD = (nresults+1)*8 - | lwz TMP0, -12(BASE) // Continuation. - | mr RB, BASE - | mr BASE, TMP2 // Restore caller BASE. - | lwz LFUNC:TMP1, FRAME_FUNC(TMP2) - | cmplwi TMP0, 0 - | lwz PC, -16(RB) // Restore PC from [cont|PC]. - | beq >1 - | subi TMP2, RD, 8 - | lwz TMP1, LFUNC:TMP1->pc - | evstddx TISNIL, RA, TMP2 // Ensure one valid arg. - | lwz KBASE, PC2PROTO(k)(TMP1) - | // BASE = base, RA = resultptr, RB = meta base - | mtctr TMP0 - | bctr // Jump to continuation. - | - |1: // Tail call from C function. - | subi TMP1, RB, 16 - | sub RC, TMP1, BASE - | b ->vm_call_tail - | - |->cont_cat: // RA = resultptr, RB = meta base - | lwz INS, -4(PC) - | subi CARG2, RB, 16 - | decode_RB8 SAVE0, INS - | evldd TMP0, 0(RA) - | add TMP1, BASE, SAVE0 - | stw BASE, L->base - | cmplw TMP1, CARG2 - | sub CARG3, CARG2, TMP1 - | decode_RA8 RA, INS - | evstdd TMP0, 0(CARG2) - | bne ->BC_CAT_Z - | evstddx TMP0, BASE, RA - | b ->cont_nop - | - |//-- Table indexing metamethods ----------------------------------------- - | - |->vmeta_tgets1: - | evmergelo STR:RC, TISSTR, STR:RC - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | decode_RB8 RB, INS - | evstdd STR:RC, 0(CARG3) - | add CARG2, BASE, RB - | b >1 - | - |->vmeta_tgets: - | evmergelo TAB:RB, TISTAB, TAB:RB - | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) - | evmergelo STR:RC, TISSTR, STR:RC - | evstdd TAB:RB, 0(CARG2) - | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) - | evstdd STR:RC, 0(CARG3) - | b >1 - | - |->vmeta_tgetb: // TMP0 = index - | efdcfsi TMP0, TMP0 - | decode_RB8 RB, INS - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | add CARG2, BASE, RB - | evstdd TMP0, 0(CARG3) - | b >1 - | - |->vmeta_tgetv: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | add CARG2, BASE, RB - | add CARG3, BASE, RC - |1: - | stw BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | bl extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | cmplwi CRET1, 0 - | beq >3 - | evldd TMP0, 0(CRET1) - | evstddx TMP0, BASE, RA - | ins_next - | - |3: // Call __index metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k - | subfic TMP1, BASE, FRAME_CONT - | lwz BASE, L->top - | stw PC, -16(BASE) // [cont|PC] - | add PC, TMP1, BASE - | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | li NARGS8:RC, 16 // 2 args for func(t, k). - | b ->vm_call_dispatch_f - | - |//----------------------------------------------------------------------- - | - |->vmeta_tsets1: - | evmergelo STR:RC, TISSTR, STR:RC - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | decode_RB8 RB, INS - | evstdd STR:RC, 0(CARG3) - | add CARG2, BASE, RB - | b >1 - | - |->vmeta_tsets: - | evmergelo TAB:RB, TISTAB, TAB:RB - | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) - | evmergelo STR:RC, TISSTR, STR:RC - | evstdd TAB:RB, 0(CARG2) - | la CARG3, DISPATCH_GL(tmptv2)(DISPATCH) - | evstdd STR:RC, 0(CARG3) - | b >1 - | - |->vmeta_tsetb: // TMP0 = index - | efdcfsi TMP0, TMP0 - | decode_RB8 RB, INS - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | add CARG2, BASE, RB - | evstdd TMP0, 0(CARG3) - | b >1 - | - |->vmeta_tsetv: - | decode_RB8 RB, INS - | decode_RC8 RC, INS - | add CARG2, BASE, RB - | add CARG3, BASE, RC - |1: - | stw BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | bl extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) - | // Returns TValue * (finished) or NULL (metamethod). - | cmplwi CRET1, 0 - | evlddx TMP0, BASE, RA - | beq >3 - | // NOBARRIER: lj_meta_tset ensures the table is not black. - | evstdd TMP0, 0(CRET1) - | ins_next - | - |3: // Call __newindex metamethod. - | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) - | subfic TMP1, BASE, FRAME_CONT - | lwz BASE, L->top - | stw PC, -16(BASE) // [cont|PC] - | add PC, TMP1, BASE - | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | li NARGS8:RC, 24 // 3 args for func(t, k, v) - | evstdd TMP0, 16(BASE) // Copy value to third argument. - | b ->vm_call_dispatch_f - | - |//-- Comparison metamethods --------------------------------------------- - | - |->vmeta_comp: - | mr CARG1, L - | subi PC, PC, 4 - | add CARG2, BASE, RA - | stw PC, SAVE_PC - | add CARG3, BASE, RD - | stw BASE, L->base - | decode_OP1 CARG4, INS - | bl extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) - | // Returns 0/1 or TValue * (metamethod). - |3: - | cmplwi CRET1, 1 - | bgt ->vmeta_binop - |4: - | lwz INS, 0(PC) - | addi PC, PC, 4 - | decode_RD4 TMP2, INS - | addis TMP3, PC, -(BCBIAS_J*4 >> 16) - | add TMP2, TMP2, TMP3 - | isellt PC, PC, TMP2 - |->cont_nop: - | ins_next - | - |->cont_ra: // RA = resultptr - | lwz INS, -4(PC) - | evldd TMP0, 0(RA) - | decode_RA8 TMP1, INS - | evstddx TMP0, BASE, TMP1 - | b ->cont_nop - | - |->cont_condt: // RA = resultptr - | lwz TMP0, 0(RA) - | li TMP1, LJ_TTRUE - | cmplw TMP1, TMP0 // Branch if result is true. - | b <4 - | - |->cont_condf: // RA = resultptr - | lwz TMP0, 0(RA) - | li TMP1, LJ_TFALSE - | cmplw TMP0, TMP1 // Branch if result is false. - | b <4 - | - |->vmeta_equal: - | // CARG2, CARG3, CARG4 are already set by BC_ISEQV/BC_ISNEV. - | subi PC, PC, 4 - | stw BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | bl extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) - | // Returns 0/1 or TValue * (metamethod). - | b <3 - | - |//-- Arithmetic metamethods --------------------------------------------- - | - |->vmeta_arith_vn: - | add CARG3, BASE, RB - | add CARG4, KBASE, RC - | b >1 - | - |->vmeta_arith_nv: - | add CARG3, KBASE, RC - | add CARG4, BASE, RB - | b >1 - | - |->vmeta_unm: - | add CARG3, BASE, RD - | mr CARG4, CARG3 - | b >1 - | - |->vmeta_arith_vv: - | add CARG3, BASE, RB - | add CARG4, BASE, RC - |1: - | add CARG2, BASE, RA - | stw BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | decode_OP1 CARG5, INS // Caveat: CARG5 overlaps INS. - | bl extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) - | // Returns NULL (finished) or TValue * (metamethod). - | cmplwi CRET1, 0 - | beq ->cont_nop - | - | // Call metamethod for binary op. - |->vmeta_binop: - | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 - | sub TMP1, CRET1, BASE - | stw PC, -16(CRET1) // [cont|PC] - | mr TMP2, BASE - | addi PC, TMP1, FRAME_CONT - | mr BASE, CRET1 - | li NARGS8:RC, 16 // 2 args for func(o1, o2). - | b ->vm_call_dispatch - | - |->vmeta_len: -#if LJ_52 - | mr SAVE0, CARG1 -#endif - | add CARG2, BASE, RD - | stw BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | bl extern lj_meta_len // (lua_State *L, TValue *o) - | // Returns NULL (retry) or TValue * (metamethod base). -#if LJ_52 - | cmplwi CRET1, 0 - | bne ->vmeta_binop // Binop call for compatibility. - | mr CARG1, SAVE0 - | b ->BC_LEN_Z -#else - | b ->vmeta_binop // Binop call for compatibility. -#endif - | - |//-- Call metamethod ---------------------------------------------------- - | - |->vmeta_call: // Resolve and call __call metamethod. - | // TMP2 = old base, BASE = new base, RC = nargs*8 - | mr CARG1, L - | stw TMP2, L->base // This is the callers base! - | subi CARG2, BASE, 8 - | stw PC, SAVE_PC - | add CARG3, BASE, RC - | mr SAVE0, NARGS8:RC - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | lwz LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. - | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. - | ins_call - | - |->vmeta_callt: // Resolve __call for BC_CALLT. - | // BASE = old base, RA = new base, RC = nargs*8 - | mr CARG1, L - | stw BASE, L->base - | subi CARG2, RA, 8 - | stw PC, SAVE_PC - | add CARG3, RA, RC - | mr SAVE0, NARGS8:RC - | bl extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) - | lwz TMP1, FRAME_PC(BASE) - | addi NARGS8:RC, SAVE0, 8 // Got one more argument now. - | lwz LFUNC:RB, FRAME_FUNC(RA) // Guaranteed to be a function here. - | b ->BC_CALLT_Z - | - |//-- Argument coercion for 'for' statement ------------------------------ - | - |->vmeta_for: - | mr CARG1, L - | stw BASE, L->base - | mr CARG2, RA - | stw PC, SAVE_PC - | mr SAVE0, INS - | bl extern lj_meta_for // (lua_State *L, TValue *base) - |.if JIT - | decode_OP1 TMP0, SAVE0 - |.endif - | decode_RA8 RA, SAVE0 - |.if JIT - | cmpwi TMP0, BC_JFORI - |.endif - | decode_RD8 RD, SAVE0 - |.if JIT - | beq =>BC_JFORI - |.endif - | b =>BC_FORI - | - |//----------------------------------------------------------------------- - |//-- Fast functions ----------------------------------------------------- - |//----------------------------------------------------------------------- - | - |.macro .ffunc, name - |->ff_ .. name: - |.endmacro - | - |.macro .ffunc_1, name - |->ff_ .. name: - | cmplwi NARGS8:RC, 8 - | evldd CARG1, 0(BASE) - | blt ->fff_fallback - |.endmacro - | - |.macro .ffunc_2, name - |->ff_ .. name: - | cmplwi NARGS8:RC, 16 - | evldd CARG1, 0(BASE) - | evldd CARG2, 8(BASE) - | blt ->fff_fallback - |.endmacro - | - |.macro .ffunc_n, name - | .ffunc_1 name - | checknum CARG1 - | checkfail ->fff_fallback - |.endmacro - | - |.macro .ffunc_nn, name - | .ffunc_2 name - | evmergehi TMP0, CARG1, CARG2 - | checknum TMP0 - | checkanyfail ->fff_fallback - |.endmacro - | - |// Inlined GC threshold check. Caveat: uses TMP0 and TMP1. - |.macro ffgccheck - | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | cmplw TMP0, TMP1 - | bgel ->fff_gcstep - |.endmacro - | - |//-- Base library: checks ----------------------------------------------- - | - |.ffunc assert - | cmplwi NARGS8:RC, 8 - | evldd TMP0, 0(BASE) - | blt ->fff_fallback - | evaddw TMP1, TISNIL, TISNIL // Synthesize LJ_TFALSE. - | la RA, -8(BASE) - | evcmpltu cr1, TMP0, TMP1 - | lwz PC, FRAME_PC(BASE) - | bge cr1, ->fff_fallback - | evstdd TMP0, 0(RA) - | addi RD, NARGS8:RC, 8 // Compute (nresults+1)*8. - | beq ->fff_res // Done if exactly 1 argument. - | li TMP1, 8 - | subi RC, RC, 8 - |1: - | cmplw TMP1, RC - | evlddx TMP0, BASE, TMP1 - | evstddx TMP0, RA, TMP1 - | addi TMP1, TMP1, 8 - | bne <1 - | b ->fff_res - | - |.ffunc type - | cmplwi NARGS8:RC, 8 - | lwz CARG1, 0(BASE) - | blt ->fff_fallback - | li TMP2, ~LJ_TNUMX - | cmplw CARG1, TISNUM - | not TMP1, CARG1 - | isellt TMP1, TMP2, TMP1 - | slwi TMP1, TMP1, 3 - | la TMP2, CFUNC:RB->upvalue - | evlddx STR:CRET1, TMP2, TMP1 - | b ->fff_restv - | - |//-- Base library: getters and setters --------------------------------- - | - |.ffunc_1 getmetatable - | checktab CARG1 - | evmergehi TMP1, CARG1, CARG1 - | checkfail >6 - |1: // Field metatable must be at same offset for GCtab and GCudata! - | lwz TAB:RB, TAB:CARG1->metatable - |2: - | evmr CRET1, TISNIL - | cmplwi TAB:RB, 0 - | lwz STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) - | beq ->fff_restv - | lwz TMP0, TAB:RB->hmask - | evmergelo CRET1, TISTAB, TAB:RB // Use metatable as default result. - | lwz TMP1, STR:RC->hash - | lwz NODE:TMP2, TAB:RB->node - | evmergelo STR:RC, TISSTR, STR:RC - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | slwi TMP0, TMP1, 5 - | slwi TMP1, TMP1, 3 - | sub TMP1, TMP0, TMP1 - | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - |3: // Rearranged logic, because we expect _not_ to find the key. - | evldd TMP0, NODE:TMP2->key - | evldd TMP1, NODE:TMP2->val - | evcmpeq TMP0, STR:RC - | lwz NODE:TMP2, NODE:TMP2->next - | checkallok >5 - | cmplwi NODE:TMP2, 0 - | beq ->fff_restv // Not found, keep default result. - | b <3 - |5: - | checknil TMP1 - | checkok ->fff_restv // Ditto for nil value. - | evmr CRET1, TMP1 // Return value of mt.__metatable. - | b ->fff_restv - | - |6: - | cmpwi TMP1, LJ_TUDATA - | not TMP1, TMP1 - | beq <1 - | checknum CARG1 - | slwi TMP1, TMP1, 2 - | li TMP2, 4*~LJ_TNUMX - | isellt TMP1, TMP2, TMP1 - | la TMP2, DISPATCH_GL(gcroot[GCROOT_BASEMT])(DISPATCH) - | lwzx TAB:RB, TMP2, TMP1 - | b <2 - | - |.ffunc_2 setmetatable - | // Fast path: no mt for table yet and not clearing the mt. - | evmergehi TMP0, TAB:CARG1, TAB:CARG2 - | checktab TMP0 - | checkanyfail ->fff_fallback - | lwz TAB:TMP1, TAB:CARG1->metatable - | cmplwi TAB:TMP1, 0 - | lbz TMP3, TAB:CARG1->marked - | bne ->fff_fallback - | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table) - | stw TAB:CARG2, TAB:CARG1->metatable - | beq ->fff_restv - | barrierback TAB:CARG1, TMP3, TMP0 - | b ->fff_restv - | - |.ffunc rawget - | cmplwi NARGS8:RC, 16 - | evldd CARG2, 0(BASE) - | blt ->fff_fallback - | checktab CARG2 - | la CARG3, 8(BASE) - | checkfail ->fff_fallback - | mr CARG1, L - | bl extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) - | // Returns cTValue *. - | evldd CRET1, 0(CRET1) - | b ->fff_restv - | - |//-- Base library: conversions ------------------------------------------ - | - |.ffunc tonumber - | // Only handles the number case inline (without a base argument). - | cmplwi NARGS8:RC, 8 - | evldd CARG1, 0(BASE) - | bne ->fff_fallback // Exactly one argument. - | checknum CARG1 - | checkok ->fff_restv - | b ->fff_fallback - | - |.ffunc_1 tostring - | // Only handles the string or number case inline. - | checkstr CARG1 - | // A __tostring method in the string base metatable is ignored. - | checkok ->fff_restv // String key? - | // Handle numbers inline, unless a number base metatable is present. - | lwz TMP0, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) - | checknum CARG1 - | cmplwi cr1, TMP0, 0 - | stw BASE, L->base // Add frame since C call can throw. - | crand 4*cr0+eq, 4*cr0+lt, 4*cr1+eq - | stw PC, SAVE_PC // Redundant (but a defined value). - | bne ->fff_fallback - | ffgccheck - | mr CARG1, L - | mr CARG2, BASE - | bl extern lj_str_fromnum // (lua_State *L, lua_Number *np) - | // Returns GCstr *. - | evmergelo STR:CRET1, TISSTR, STR:CRET1 - | b ->fff_restv - | - |//-- Base library: iterators ------------------------------------------- - | - |.ffunc next - | cmplwi NARGS8:RC, 8 - | evldd CARG2, 0(BASE) - | blt ->fff_fallback - | evstddx TISNIL, BASE, NARGS8:RC // Set missing 2nd arg to nil. - | checktab TAB:CARG2 - | lwz PC, FRAME_PC(BASE) - | checkfail ->fff_fallback - | stw BASE, L->base // Add frame since C call can throw. - | mr CARG1, L - | stw BASE, L->top // Dummy frame length is ok. - | la CARG3, 8(BASE) - | stw PC, SAVE_PC - | bl extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Returns 0 at end of traversal. - | cmplwi CRET1, 0 - | evmr CRET1, TISNIL - | beq ->fff_restv // End of traversal: return nil. - | evldd TMP0, 8(BASE) // Copy key and value to results. - | la RA, -8(BASE) - | evldd TMP1, 16(BASE) - | evstdd TMP0, 0(RA) - | li RD, (2+1)*8 - | evstdd TMP1, 8(RA) - | b ->fff_res - | - |.ffunc_1 pairs - | checktab TAB:CARG1 - | lwz PC, FRAME_PC(BASE) - | checkfail ->fff_fallback -#if LJ_52 - | lwz TAB:TMP2, TAB:CARG1->metatable - | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0] - | cmplwi TAB:TMP2, 0 - | la RA, -8(BASE) - | bne ->fff_fallback -#else - | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0] - | la RA, -8(BASE) -#endif - | evstdd TISNIL, 8(BASE) - | li RD, (3+1)*8 - | evstdd CFUNC:TMP0, 0(RA) - | b ->fff_res - | - |.ffunc_2 ipairs_aux - | checktab TAB:CARG1 - | lwz PC, FRAME_PC(BASE) - | checkfail ->fff_fallback - | checknum CARG2 - | lus TMP3, 0x3ff0 - | checkfail ->fff_fallback - | efdctsi TMP2, CARG2 - | lwz TMP0, TAB:CARG1->asize - | evmergelo TMP3, TMP3, ZERO - | lwz TMP1, TAB:CARG1->array - | efdadd CARG2, CARG2, TMP3 - | addi TMP2, TMP2, 1 - | la RA, -8(BASE) - | cmplw TMP0, TMP2 - | slwi TMP3, TMP2, 3 - | evstdd CARG2, 0(RA) - | ble >2 // Not in array part? - | evlddx TMP1, TMP1, TMP3 - |1: - | checknil TMP1 - | li RD, (0+1)*8 - | checkok ->fff_res // End of iteration, return 0 results. - | li RD, (2+1)*8 - | evstdd TMP1, 8(RA) - | b ->fff_res - |2: // Check for empty hash part first. Otherwise call C function. - | lwz TMP0, TAB:CARG1->hmask - | cmplwi TMP0, 0 - | li RD, (0+1)*8 - | beq ->fff_res - | mr CARG2, TMP2 - | bl extern lj_tab_getinth // (GCtab *t, int32_t key) - | // Returns cTValue * or NULL. - | cmplwi CRET1, 0 - | li RD, (0+1)*8 - | beq ->fff_res - | evldd TMP1, 0(CRET1) - | b <1 - | - |.ffunc_1 ipairs - | checktab TAB:CARG1 - | lwz PC, FRAME_PC(BASE) - | checkfail ->fff_fallback -#if LJ_52 - | lwz TAB:TMP2, TAB:CARG1->metatable - | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0] - | cmplwi TAB:TMP2, 0 - | la RA, -8(BASE) - | bne ->fff_fallback -#else - | evldd CFUNC:TMP0, CFUNC:RB->upvalue[0] - | la RA, -8(BASE) -#endif - | evsplati TMP1, 0 - | li RD, (3+1)*8 - | evstdd TMP1, 8(BASE) - | evstdd CFUNC:TMP0, 0(RA) - | b ->fff_res - | - |//-- Base library: catch errors ---------------------------------------- - | - |.ffunc pcall - | cmplwi NARGS8:RC, 8 - | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | blt ->fff_fallback - | mr TMP2, BASE - | la BASE, 8(BASE) - | // Remember active hook before pcall. - | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31 - | subi NARGS8:RC, NARGS8:RC, 8 - | addi PC, TMP3, 8+FRAME_PCALL - | b ->vm_call_dispatch - | - |.ffunc_2 xpcall - | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | mr TMP2, BASE - | checkfunc CARG2 // Traceback must be a function. - | checkfail ->fff_fallback - | la BASE, 16(BASE) - | // Remember active hook before pcall. - | rlwinm TMP3, TMP3, 32-HOOK_ACTIVE_SHIFT, 31, 31 - | evstdd CARG2, 0(TMP2) // Swap function and traceback. - | subi NARGS8:RC, NARGS8:RC, 16 - | evstdd CARG1, 8(TMP2) - | addi PC, TMP3, 16+FRAME_PCALL - | b ->vm_call_dispatch - | - |//-- Coroutine library -------------------------------------------------- - | - |.macro coroutine_resume_wrap, resume - |.if resume - |.ffunc_1 coroutine_resume - | evmergehi TMP0, L:CARG1, L:CARG1 - |.else - |.ffunc coroutine_wrap_aux - | lwz L:CARG1, CFUNC:RB->upvalue[0].gcr - |.endif - |.if resume - | cmpwi TMP0, LJ_TTHREAD - | bne ->fff_fallback - |.endif - | lbz TMP0, L:CARG1->status - | lwz TMP1, L:CARG1->cframe - | lwz CARG2, L:CARG1->top - | cmplwi cr0, TMP0, LUA_YIELD - | lwz TMP2, L:CARG1->base - | cmplwi cr1, TMP1, 0 - | lwz TMP0, L:CARG1->maxstack - | cmplw cr7, CARG2, TMP2 - | lwz PC, FRAME_PC(BASE) - | crorc 4*cr6+lt, 4*cr0+gt, 4*cr1+eq // st>LUA_YIELD || cframe!=0 - | add TMP2, CARG2, NARGS8:RC - | crandc 4*cr6+gt, 4*cr7+eq, 4*cr0+eq // base==top && st!=LUA_YIELD - | cmplw cr1, TMP2, TMP0 - | cror 4*cr6+lt, 4*cr6+lt, 4*cr6+gt - | stw PC, SAVE_PC - | cror 4*cr6+lt, 4*cr6+lt, 4*cr1+gt // cond1 || cond2 || stackov - | stw BASE, L->base - | blt cr6, ->fff_fallback - |1: - |.if resume - | addi BASE, BASE, 8 // Keep resumed thread in stack for GC. - | subi NARGS8:RC, NARGS8:RC, 8 - | subi TMP2, TMP2, 8 - |.endif - | stw TMP2, L:CARG1->top - | li TMP1, 0 - | stw BASE, L->top - |2: // Move args to coroutine. - | cmpw TMP1, NARGS8:RC - | evlddx TMP0, BASE, TMP1 - | beq >3 - | evstddx TMP0, CARG2, TMP1 - | addi TMP1, TMP1, 8 - | b <2 - |3: - | li CARG3, 0 - | mr L:SAVE0, L:CARG1 - | li CARG4, 0 - | bl ->vm_resume // (lua_State *L, TValue *base, 0, 0) - | // Returns thread status. - |4: - | lwz TMP2, L:SAVE0->base - | cmplwi CRET1, LUA_YIELD - | lwz TMP3, L:SAVE0->top - | li_vmstate INTERP - | lwz BASE, L->base - | st_vmstate - | bgt >8 - | sub RD, TMP3, TMP2 - | lwz TMP0, L->maxstack - | cmplwi RD, 0 - | add TMP1, BASE, RD - | beq >6 // No results? - | cmplw TMP1, TMP0 - | li TMP1, 0 - | bgt >9 // Need to grow stack? - | - | subi TMP3, RD, 8 - | stw TMP2, L:SAVE0->top // Clear coroutine stack. - |5: // Move results from coroutine. - | cmplw TMP1, TMP3 - | evlddx TMP0, TMP2, TMP1 - | evstddx TMP0, BASE, TMP1 - | addi TMP1, TMP1, 8 - | bne <5 - |6: - | andi. TMP0, PC, FRAME_TYPE - |.if resume - | li TMP1, LJ_TTRUE - | la RA, -8(BASE) - | stw TMP1, -8(BASE) // Prepend true to results. - | addi RD, RD, 16 - |.else - | mr RA, BASE - | addi RD, RD, 8 - |.endif - |7: - | stw PC, SAVE_PC - | mr MULTRES, RD - | beq ->BC_RET_Z - | b ->vm_return - | - |8: // Coroutine returned with error (at co->top-1). - |.if resume - | andi. TMP0, PC, FRAME_TYPE - | la TMP3, -8(TMP3) - | li TMP1, LJ_TFALSE - | evldd TMP0, 0(TMP3) - | stw TMP3, L:SAVE0->top // Remove error from coroutine stack. - | li RD, (2+1)*8 - | stw TMP1, -8(BASE) // Prepend false to results. - | la RA, -8(BASE) - | evstdd TMP0, 0(BASE) // Copy error message. - | b <7 - |.else - | mr CARG1, L - | mr CARG2, L:SAVE0 - | bl extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) - |.endif - | - |9: // Handle stack expansion on return from yield. - | mr CARG1, L - | srwi CARG2, RD, 3 - | bl extern lj_state_growstack // (lua_State *L, int n) - | li CRET1, 0 - | b <4 - |.endmacro - | - | coroutine_resume_wrap 1 // coroutine.resume - | coroutine_resume_wrap 0 // coroutine.wrap - | - |.ffunc coroutine_yield - | lwz TMP0, L->cframe - | add TMP1, BASE, NARGS8:RC - | stw BASE, L->base - | andi. TMP0, TMP0, CFRAME_RESUME - | stw TMP1, L->top - | li CRET1, LUA_YIELD - | beq ->fff_fallback - | stw ZERO, L->cframe - | stb CRET1, L->status - | b ->vm_leave_unw - | - |//-- Math library ------------------------------------------------------- - | - |.ffunc_n math_abs - | efdabs CRET1, CARG1 - | // Fallthrough. - | - |->fff_restv: - | // CRET1 = TValue result. - | lwz PC, FRAME_PC(BASE) - | la RA, -8(BASE) - | evstdd CRET1, 0(RA) - |->fff_res1: - | // RA = results, PC = return. - | li RD, (1+1)*8 - |->fff_res: - | // RA = results, RD = (nresults+1)*8, PC = return. - | andi. TMP0, PC, FRAME_TYPE - | mr MULTRES, RD - | bne ->vm_return - | lwz INS, -4(PC) - | decode_RB8 RB, INS - |5: - | cmplw RB, RD // More results expected? - | decode_RA8 TMP0, INS - | bgt >6 - | ins_next1 - | // Adjust BASE. KBASE is assumed to be set for the calling frame. - | sub BASE, RA, TMP0 - | ins_next2 - | - |6: // Fill up results with nil. - | subi TMP1, RD, 8 - | addi RD, RD, 8 - | evstddx TISNIL, RA, TMP1 - | b <5 - | - |.macro math_extern, func - | .ffunc math_ .. func - | cmplwi NARGS8:RC, 8 - | evldd CARG2, 0(BASE) - | blt ->fff_fallback - | checknum CARG2 - | evmergehi CARG1, CARG2, CARG2 - | checkfail ->fff_fallback - | bl extern func@plt - | evmergelo CRET1, CRET1, CRET2 - | b ->fff_restv - |.endmacro - | - |.macro math_extern2, func - | .ffunc math_ .. func - | cmplwi NARGS8:RC, 16 - | evldd CARG2, 0(BASE) - | evldd CARG4, 8(BASE) - | blt ->fff_fallback - | evmergehi CARG1, CARG4, CARG2 - | checknum CARG1 - | evmergehi CARG3, CARG4, CARG4 - | checkanyfail ->fff_fallback - | bl extern func@plt - | evmergelo CRET1, CRET1, CRET2 - | b ->fff_restv - |.endmacro - | - |.macro math_round, func - | .ffunc math_ .. func - | cmplwi NARGS8:RC, 8 - | evldd CARG2, 0(BASE) - | blt ->fff_fallback - | checknum CARG2 - | evmergehi CARG1, CARG2, CARG2 - | checkfail ->fff_fallback - | lwz PC, FRAME_PC(BASE) - | bl ->vm_..func.._hilo; - | la RA, -8(BASE) - | evstdd CRET2, 0(RA) - | b ->fff_res1 - |.endmacro - | - | math_round floor - | math_round ceil - | - | math_extern sqrt - | - |.ffunc math_log - | cmplwi NARGS8:RC, 8 - | evldd CARG2, 0(BASE) - | bne ->fff_fallback // Need exactly 1 argument. - | checknum CARG2 - | evmergehi CARG1, CARG2, CARG2 - | checkfail ->fff_fallback - | bl extern log@plt - | evmergelo CRET1, CRET1, CRET2 - | b ->fff_restv - | - | math_extern log10 - | math_extern exp - | math_extern sin - | math_extern cos - | math_extern tan - | math_extern asin - | math_extern acos - | math_extern atan - | math_extern sinh - | math_extern cosh - | math_extern tanh - | math_extern2 pow - | math_extern2 atan2 - | math_extern2 fmod - | - |->ff_math_deg: - |.ffunc_n math_rad - | evldd CARG2, CFUNC:RB->upvalue[0] - | efdmul CRET1, CARG1, CARG2 - | b ->fff_restv - | - |.ffunc math_ldexp - | cmplwi NARGS8:RC, 16 - | evldd CARG2, 0(BASE) - | evldd CARG4, 8(BASE) - | blt ->fff_fallback - | evmergehi CARG1, CARG4, CARG2 - | checknum CARG1 - | checkanyfail ->fff_fallback - | efdctsi CARG3, CARG4 - | bl extern ldexp@plt - | evmergelo CRET1, CRET1, CRET2 - | b ->fff_restv - | - |.ffunc math_frexp - | cmplwi NARGS8:RC, 8 - | evldd CARG2, 0(BASE) - | blt ->fff_fallback - | checknum CARG2 - | evmergehi CARG1, CARG2, CARG2 - | checkfail ->fff_fallback - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | lwz PC, FRAME_PC(BASE) - | bl extern frexp@plt - | lwz TMP1, DISPATCH_GL(tmptv)(DISPATCH) - | evmergelo CRET1, CRET1, CRET2 - | efdcfsi CRET2, TMP1 - | la RA, -8(BASE) - | evstdd CRET1, 0(RA) - | li RD, (2+1)*8 - | evstdd CRET2, 8(RA) - | b ->fff_res - | - |.ffunc math_modf - | cmplwi NARGS8:RC, 8 - | evldd CARG2, 0(BASE) - | blt ->fff_fallback - | checknum CARG2 - | evmergehi CARG1, CARG2, CARG2 - | checkfail ->fff_fallback - | la CARG3, -8(BASE) - | lwz PC, FRAME_PC(BASE) - | bl extern modf@plt - | evmergelo CRET1, CRET1, CRET2 - | la RA, -8(BASE) - | evstdd CRET1, 0(BASE) - | li RD, (2+1)*8 - | b ->fff_res - | - |.macro math_minmax, name, cmpop - | .ffunc_1 name - | checknum CARG1 - | li TMP1, 8 - | checkfail ->fff_fallback - |1: - | evlddx CARG2, BASE, TMP1 - | cmplw cr1, TMP1, NARGS8:RC - | checknum CARG2 - | bge cr1, ->fff_restv // Ok, since CRET1 = CARG1. - | checkfail ->fff_fallback - | cmpop CARG2, CARG1 - | addi TMP1, TMP1, 8 - | crmove 4*cr0+lt, 4*cr0+gt - | evsel CARG1, CARG2, CARG1 - | b <1 - |.endmacro - | - | math_minmax math_min, efdtstlt - | math_minmax math_max, efdtstgt - | - |//-- String library ----------------------------------------------------- - | - |.ffunc_1 string_len - | checkstr STR:CARG1 - | checkfail ->fff_fallback - | lwz TMP0, STR:CARG1->len - | efdcfsi CRET1, TMP0 - | b ->fff_restv - | - |.ffunc string_byte // Only handle the 1-arg case here. - | cmplwi NARGS8:RC, 8 - | evldd STR:CARG1, 0(BASE) - | bne ->fff_fallback // Need exactly 1 argument. - | checkstr STR:CARG1 - | la RA, -8(BASE) - | checkfail ->fff_fallback - | lwz TMP0, STR:CARG1->len - | li RD, (0+1)*8 - | lbz TMP1, STR:CARG1[1] // Access is always ok (NUL at end). - | li TMP2, (1+1)*8 - | cmplwi TMP0, 0 - | lwz PC, FRAME_PC(BASE) - | efdcfsi CRET1, TMP1 - | iseleq RD, RD, TMP2 - | evstdd CRET1, 0(RA) - | b ->fff_res - | - |.ffunc string_char // Only handle the 1-arg case here. - | ffgccheck - | cmplwi NARGS8:RC, 8 - | evldd CARG1, 0(BASE) - | bne ->fff_fallback // Exactly 1 argument. - | checknum CARG1 - | la CARG2, DISPATCH_GL(tmptv)(DISPATCH) - | checkfail ->fff_fallback - | efdctsiz TMP0, CARG1 - | li CARG3, 1 - | cmplwi TMP0, 255 - | stb TMP0, 0(CARG2) - | bgt ->fff_fallback - |->fff_newstr: - | mr CARG1, L - | stw BASE, L->base - | stw PC, SAVE_PC - | bl extern lj_str_new // (lua_State *L, char *str, size_t l) - | // Returns GCstr *. - | lwz BASE, L->base - | evmergelo STR:CRET1, TISSTR, STR:CRET1 - | b ->fff_restv - | - |.ffunc string_sub - | ffgccheck - | cmplwi NARGS8:RC, 16 - | evldd CARG3, 16(BASE) - | evldd STR:CARG1, 0(BASE) - | blt ->fff_fallback - | evldd CARG2, 8(BASE) - | li TMP2, -1 - | beq >1 - | checknum CARG3 - | checkfail ->fff_fallback - | efdctsiz TMP2, CARG3 - |1: - | checknum CARG2 - | checkfail ->fff_fallback - | checkstr STR:CARG1 - | efdctsiz TMP1, CARG2 - | checkfail ->fff_fallback - | lwz TMP0, STR:CARG1->len - | cmplw TMP0, TMP2 // len < end? (unsigned compare) - | add TMP3, TMP2, TMP0 - | blt >5 - |2: - | cmpwi TMP1, 0 // start <= 0? - | add TMP3, TMP1, TMP0 - | ble >7 - |3: - | sub. CARG3, TMP2, TMP1 - | addi CARG2, STR:CARG1, #STR-1 - | addi CARG3, CARG3, 1 - | add CARG2, CARG2, TMP1 - | isellt CARG3, r0, CARG3 - | b ->fff_newstr - | - |5: // Negative end or overflow. - | cmpw TMP0, TMP2 - | addi TMP3, TMP3, 1 - | iselgt TMP2, TMP3, TMP0 // end = end > len ? len : end+len+1 - | b <2 - | - |7: // Negative start or underflow. - | cmpwi cr1, TMP3, 0 - | iseleq TMP1, r0, TMP3 - | isel TMP1, r0, TMP1, 4*cr1+lt - | addi TMP1, TMP1, 1 // start = 1 + (start ? start+len : 0) - | b <3 - | - |.ffunc string_rep // Only handle the 1-char case inline. - | ffgccheck - | cmplwi NARGS8:RC, 16 - | evldd CARG1, 0(BASE) - | evldd CARG2, 8(BASE) - | bne ->fff_fallback // Exactly 2 arguments. - | checknum CARG2 - | checkfail ->fff_fallback - | checkstr STR:CARG1 - | efdctsiz CARG3, CARG2 - | checkfail ->fff_fallback - | lwz TMP0, STR:CARG1->len - | cmpwi CARG3, 0 - | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) - | ble >2 // Count <= 0? (or non-int) - | cmplwi TMP0, 1 - | subi TMP2, CARG3, 1 - | blt >2 // Zero length string? - | cmplw cr1, TMP1, CARG3 - | bne ->fff_fallback // Fallback for > 1-char strings. - | lbz TMP0, STR:CARG1[1] - | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) - | blt cr1, ->fff_fallback - |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). - | cmplwi TMP2, 0 - | stbx TMP0, CARG2, TMP2 - | subi TMP2, TMP2, 1 - | bne <1 - | b ->fff_newstr - |2: // Return empty string. - | la STR:CRET1, DISPATCH_GL(strempty)(DISPATCH) - | evmergelo CRET1, TISSTR, STR:CRET1 - | b ->fff_restv - | - |.ffunc string_reverse - | ffgccheck - | cmplwi NARGS8:RC, 8 - | evldd CARG1, 0(BASE) - | blt ->fff_fallback - | checkstr STR:CARG1 - | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) - | checkfail ->fff_fallback - | lwz CARG3, STR:CARG1->len - | la CARG1, #STR(STR:CARG1) - | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) - | li TMP2, 0 - | cmplw TMP1, CARG3 - | subi TMP3, CARG3, 1 - | blt ->fff_fallback - |1: // Reverse string copy. - | cmpwi TMP3, 0 - | lbzx TMP1, CARG1, TMP2 - | blt ->fff_newstr - | stbx TMP1, CARG2, TMP3 - | subi TMP3, TMP3, 1 - | addi TMP2, TMP2, 1 - | b <1 - | - |.macro ffstring_case, name, lo - | .ffunc name - | ffgccheck - | cmplwi NARGS8:RC, 8 - | evldd CARG1, 0(BASE) - | blt ->fff_fallback - | checkstr STR:CARG1 - | lwz TMP1, DISPATCH_GL(tmpbuf.sz)(DISPATCH) - | checkfail ->fff_fallback - | lwz CARG3, STR:CARG1->len - | la CARG1, #STR(STR:CARG1) - | lwz CARG2, DISPATCH_GL(tmpbuf.buf)(DISPATCH) - | cmplw TMP1, CARG3 - | li TMP2, 0 - | blt ->fff_fallback - |1: // ASCII case conversion. - | cmplw TMP2, CARG3 - | lbzx TMP1, CARG1, TMP2 - | bge ->fff_newstr - | subi TMP0, TMP1, lo - | xori TMP3, TMP1, 0x20 - | cmplwi TMP0, 26 - | isellt TMP1, TMP3, TMP1 - | stbx TMP1, CARG2, TMP2 - | addi TMP2, TMP2, 1 - | b <1 - |.endmacro - | - |ffstring_case string_lower, 65 - |ffstring_case string_upper, 97 - | - |//-- Table library ------------------------------------------------------ - | - |.ffunc_1 table_getn - | checktab CARG1 - | checkfail ->fff_fallback - | bl extern lj_tab_len // (GCtab *t) - | // Returns uint32_t (but less than 2^31). - | efdcfsi CRET1, CRET1 - | b ->fff_restv - | - |//-- Bit library -------------------------------------------------------- - | - |.macro .ffunc_bit, name - | .ffunc_n bit_..name - | efdadd CARG1, CARG1, TOBIT - |.endmacro - | - |.ffunc_bit tobit - |->fff_resbit: - | efdcfsi CRET1, CARG1 - | b ->fff_restv - | - |.macro .ffunc_bit_op, name, ins - | .ffunc_bit name - | li TMP1, 8 - |1: - | evlddx CARG2, BASE, TMP1 - | cmplw cr1, TMP1, NARGS8:RC - | checknum CARG2 - | bge cr1, ->fff_resbit - | checkfail ->fff_fallback - | efdadd CARG2, CARG2, TOBIT - | ins CARG1, CARG1, CARG2 - | addi TMP1, TMP1, 8 - | b <1 - |.endmacro - | - |.ffunc_bit_op band, and - |.ffunc_bit_op bor, or - |.ffunc_bit_op bxor, xor - | - |.ffunc_bit bswap - | rotlwi TMP0, CARG1, 8 - | rlwimi TMP0, CARG1, 24, 0, 7 - | rlwimi TMP0, CARG1, 24, 16, 23 - | efdcfsi CRET1, TMP0 - | b ->fff_restv - | - |.ffunc_bit bnot - | not TMP0, CARG1 - | efdcfsi CRET1, TMP0 - | b ->fff_restv - | - |.macro .ffunc_bit_sh, name, ins, shmod - | .ffunc_nn bit_..name - | efdadd CARG2, CARG2, TOBIT - | efdadd CARG1, CARG1, TOBIT - |.if shmod == 1 - | rlwinm CARG2, CARG2, 0, 27, 31 - |.elif shmod == 2 - | neg CARG2, CARG2 - |.endif - | ins TMP0, CARG1, CARG2 - | efdcfsi CRET1, TMP0 - | b ->fff_restv - |.endmacro - | - |.ffunc_bit_sh lshift, slw, 1 - |.ffunc_bit_sh rshift, srw, 1 - |.ffunc_bit_sh arshift, sraw, 1 - |.ffunc_bit_sh rol, rotlw, 0 - |.ffunc_bit_sh ror, rotlw, 2 - | - |//----------------------------------------------------------------------- - | - |->fff_fallback: // Call fast function fallback handler. - | // BASE = new base, RB = CFUNC, RC = nargs*8 - | lwz TMP3, CFUNC:RB->f - | add TMP1, BASE, NARGS8:RC - | lwz PC, FRAME_PC(BASE) // Fallback may overwrite PC. - | addi TMP0, TMP1, 8*LUA_MINSTACK - | lwz TMP2, L->maxstack - | stw PC, SAVE_PC // Redundant (but a defined value). - | cmplw TMP0, TMP2 - | stw BASE, L->base - | stw TMP1, L->top - | mr CARG1, L - | bgt >5 // Need to grow stack. - | mtctr TMP3 - | bctrl // (lua_State *L) - | // Either throws an error, or recovers and returns -1, 0 or nresults+1. - | lwz BASE, L->base - | cmpwi CRET1, 0 - | slwi RD, CRET1, 3 - | la RA, -8(BASE) - | bgt ->fff_res // Returned nresults+1? - |1: // Returned 0 or -1: retry fast path. - | lwz TMP0, L->top - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | sub NARGS8:RC, TMP0, BASE - | bne ->vm_call_tail // Returned -1? - | ins_callt // Returned 0: retry fast path. - | - |// Reconstruct previous base for vmeta_call during tailcall. - |->vm_call_tail: - | andi. TMP0, PC, FRAME_TYPE - | rlwinm TMP1, PC, 0, 0, 28 - | bne >3 - | lwz INS, -4(PC) - | decode_RA8 TMP1, INS - | addi TMP1, TMP1, 8 - |3: - | sub TMP2, BASE, TMP1 - | b ->vm_call_dispatch // Resolve again for tailcall. - | - |5: // Grow stack for fallback handler. - | li CARG2, LUA_MINSTACK - | bl extern lj_state_growstack // (lua_State *L, int n) - | lwz BASE, L->base - | cmpw TMP0, TMP0 // Set 4*cr0+eq to force retry. - | b <1 - | - |->fff_gcstep: // Call GC step function. - | // BASE = new base, RC = nargs*8 - | mflr SAVE0 - | stw BASE, L->base - | add TMP0, BASE, NARGS8:RC - | stw PC, SAVE_PC // Redundant (but a defined value). - | stw TMP0, L->top - | mr CARG1, L - | bl extern lj_gc_step // (lua_State *L) - | lwz BASE, L->base - | mtlr SAVE0 - | lwz TMP0, L->top - | sub NARGS8:RC, TMP0, BASE - | lwz CFUNC:RB, FRAME_FUNC(BASE) - | blr - | - |//----------------------------------------------------------------------- - |//-- Special dispatch targets ------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_record: // Dispatch target for recording phase. - |.if JIT - | NYI - |.endif - | - |->vm_rethook: // Dispatch target for return hooks. - | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | andi. TMP0, TMP3, HOOK_ACTIVE // Hook already active? - | beq >1 - |5: // Re-dispatch to static ins. - | addi TMP1, TMP1, GG_DISP2STATIC // Assumes decode_OP4 TMP1, INS. - | lwzx TMP0, DISPATCH, TMP1 - | mtctr TMP0 - | bctr - | - |->vm_inshook: // Dispatch target for instr/line hooks. - | lbz TMP3, DISPATCH_GL(hookmask)(DISPATCH) - | lwz TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | andi. TMP0, TMP3, HOOK_ACTIVE // Hook already active? - | rlwinm TMP0, TMP3, 31-LUA_HOOKLINE, 31, 0 - | bne <5 - | - | cmpwi cr1, TMP0, 0 - | addic. TMP2, TMP2, -1 - | beq cr1, <5 - | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) - | beq >1 - | bge cr1, <5 - |1: - | mr CARG1, L - | stw MULTRES, SAVE_MULTRES - | mr CARG2, PC - | stw BASE, L->base - | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | bl extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) - |3: - | lwz BASE, L->base - |4: // Re-dispatch to static ins. - | lwz INS, -4(PC) - | decode_OP4 TMP1, INS - | decode_RB8 RB, INS - | addi TMP1, TMP1, GG_DISP2STATIC - | decode_RD8 RD, INS - | lwzx TMP0, DISPATCH, TMP1 - | decode_RA8 RA, INS - | decode_RC8 RC, INS - | mtctr TMP0 - | bctr - | - |->cont_hook: // Continue from hook yield. - | addi PC, PC, 4 - | lwz MULTRES, -20(RB) // Restore MULTRES for *M ins. - | b <4 - | - |->vm_hotloop: // Hot loop counter underflow. - |.if JIT - | NYI - |.endif - | - |->vm_callhook: // Dispatch target for call hooks. - | mr CARG2, PC - |.if JIT - | b >1 - |.endif - | - |->vm_hotcall: // Hot call counter underflow. - |.if JIT - | ori CARG2, PC, 1 - |1: - |.endif - | add TMP0, BASE, RC - | stw PC, SAVE_PC - | mr CARG1, L - | stw BASE, L->base - | sub RA, RA, BASE - | stw TMP0, L->top - | bl extern lj_dispatch_call // (lua_State *L, const BCIns *pc) - | // Returns ASMFunction. - | lwz BASE, L->base - | lwz TMP0, L->top - | stw ZERO, SAVE_PC // Invalidate for subsequent line hook. - | sub NARGS8:RC, TMP0, BASE - | add RA, BASE, RA - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | mtctr CRET1 - | bctr - | - |//----------------------------------------------------------------------- - |//-- Trace exit handler ------------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_exit_handler: - |.if JIT - | NYI - |.endif - |->vm_exit_interp: - |.if JIT - | NYI - |.endif - | - |//----------------------------------------------------------------------- - |//-- Math helper functions ---------------------------------------------- - |//----------------------------------------------------------------------- - | - |// FP value rounding. Called by math.floor/math.ceil fast functions - |// and from JIT code. - |// - |// This can be inlined if the CPU has the frin/friz/frip/frim instructions. - |// The alternative hard-float approaches have a deep dependency chain. - |// The resulting latency is at least 3x-7x the double-precision FP latency - |// (e500v2: 6cy, e600: 5cy, Cell: 10cy) or around 20-70 cycles. - |// - |// The soft-float approach is tedious, but much faster (e500v2: ~11cy/~6cy). - |// However it relies on a fast way to transfer the FP value to GPRs - |// (e500v2: 0cy for lo-word, 1cy for hi-word). - |// - |.macro vm_round, name, mode - | // Used temporaries: TMP0, TMP1, TMP2, TMP3. - |->name.._efd: // Input: CARG2, output: CRET2 - | evmergehi CARG1, CARG2, CARG2 - |->name.._hilo: - | // Input: CARG1 (hi), CARG2 (hi, lo), output: CRET2 - | rlwinm TMP2, CARG1, 12, 21, 31 - | addic. TMP2, TMP2, -1023 // exp = exponent(x) - 1023 - | li TMP1, -1 - | cmplwi cr1, TMP2, 51 // 0 <= exp <= 51? - | subfic TMP0, TMP2, 52 - | bgt cr1, >1 - | lus TMP3, 0xfff0 - | slw TMP0, TMP1, TMP0 // lomask = -1 << (52-exp) - | sraw TMP1, TMP3, TMP2 // himask = (int32_t)0xfff00000 >> exp - |.if mode == 2 // trunc(x): - | evmergelo TMP0, TMP1, TMP0 - | evand CRET2, CARG2, TMP0 // hi &= himask, lo &= lomask - |.else - | andc TMP2, CARG2, TMP0 - | andc TMP3, CARG1, TMP1 - | or TMP2, TMP2, TMP3 // ztest = (hi&~himask) | (lo&~lomask) - | srawi TMP3, CARG1, 31 // signmask = (int32_t)hi >> 31 - |.if mode == 0 // floor(x): - | and. TMP2, TMP2, TMP3 // iszero = ((ztest & signmask) == 0) - |.else // ceil(x): - | andc. TMP2, TMP2, TMP3 // iszero = ((ztest & ~signmask) == 0) - |.endif - | and CARG2, CARG2, TMP0 // lo &= lomask - | and CARG1, CARG1, TMP1 // hi &= himask - | subc TMP0, CARG2, TMP0 - | iseleq TMP0, CARG2, TMP0 // lo = iszero ? lo : lo-lomask - | sube TMP1, CARG1, TMP1 - | iseleq TMP1, CARG1, TMP1 // hi = iszero ? hi : hi-himask+carry - | evmergelo CRET2, TMP1, TMP0 - |.endif - | blr - |1: - | bgtlr // Already done if >=2^52, +-inf or nan. - |.if mode == 2 // trunc(x): - | rlwinm TMP1, CARG1, 0, 0, 0 // hi = sign(x) - | li TMP0, 0 - | evmergelo CRET2, TMP1, TMP0 - |.else - | rlwinm TMP2, CARG1, 0, 1, 31 - | srawi TMP0, CARG1, 31 // signmask = (int32_t)hi >> 31 - | or TMP2, TMP2, CARG2 // ztest = abs(hi) | lo - | lus TMP1, 0x3ff0 - |.if mode == 0 // floor(x): - | and. TMP2, TMP2, TMP0 // iszero = ((ztest & signmask) == 0) - |.else // ceil(x): - | andc. TMP2, TMP2, TMP0 // iszero = ((ztest & ~signmask) == 0) - |.endif - | li TMP0, 0 - | iseleq TMP1, r0, TMP1 - | rlwimi CARG1, TMP1, 0, 1, 31 // hi = sign(x) | (iszero ? 0.0 : 1.0) - | evmergelo CRET2, CARG1, TMP0 - |.endif - | blr - |.endmacro - | - |->vm_floor: - | mflr CARG3 - | evmergelo CARG2, CARG1, CARG2 - | bl ->vm_floor_hilo - | mtlr CARG3 - | evmergehi CRET1, CRET2, CRET2 - | blr - | - | vm_round vm_floor, 0 - | vm_round vm_ceil, 1 - |.if JIT - | vm_round vm_trunc, 2 - |.else - |->vm_trunc_efd: - |->vm_trunc_hilo: - |.endif - | - |//----------------------------------------------------------------------- - |//-- Miscellaneous functions -------------------------------------------- - |//----------------------------------------------------------------------- - | - |//----------------------------------------------------------------------- - |//-- FFI helper functions ----------------------------------------------- - |//----------------------------------------------------------------------- - | - |->vm_ffi_call: - |.if FFI - | NYI - |.endif - | - |//----------------------------------------------------------------------- -} - -/* Generate the code for a single instruction. */ -static void build_ins(BuildCtx *ctx, BCOp op, int defop) -{ - int vk = 0; - |=>defop: - - switch (op) { - - /* -- Comparison ops ---------------------------------------------------- */ - - /* Remember: all ops branch for a true comparison, fall through otherwise. */ - - case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: - | // RA = src1*8, RD = src2*8, JMP with RD = target - | evlddx TMP0, BASE, RA - | addi PC, PC, 4 - | evlddx TMP1, BASE, RD - | addis TMP3, PC, -(BCBIAS_J*4 >> 16) - | lwz TMP2, -4(PC) - | evmergehi RB, TMP0, TMP1 - | decode_RD4 TMP2, TMP2 - | checknum RB - | add TMP2, TMP2, TMP3 - | checkanyfail ->vmeta_comp - | efdcmplt TMP0, TMP1 - if (op == BC_ISLE || op == BC_ISGT) { - | efdcmpeq cr1, TMP0, TMP1 - | cror 4*cr0+gt, 4*cr0+gt, 4*cr1+gt - } - if (op == BC_ISLT || op == BC_ISLE) { - | iselgt PC, TMP2, PC - } else { - | iselgt PC, PC, TMP2 - } - | ins_next - break; - - case BC_ISEQV: case BC_ISNEV: - vk = op == BC_ISEQV; - | // RA = src1*8, RD = src2*8, JMP with RD = target - | evlddx CARG2, BASE, RA - | addi PC, PC, 4 - | evlddx CARG3, BASE, RD - | addis TMP3, PC, -(BCBIAS_J*4 >> 16) - | lwz TMP2, -4(PC) - | evmergehi RB, CARG2, CARG3 - | decode_RD4 TMP2, TMP2 - | checknum RB - | add TMP2, TMP2, TMP3 - | checkanyfail >5 - | efdcmpeq CARG2, CARG3 - if (vk) { - | iselgt PC, TMP2, PC - } else { - | iselgt PC, PC, TMP2 - } - |1: - | ins_next - | - |5: // Either or both types are not numbers. - | evcmpeq CARG2, CARG3 - | not TMP3, RB - | cmplwi cr1, TMP3, ~LJ_TISPRI // Primitive? - | crorc 4*cr7+lt, 4*cr0+so, 4*cr0+lt // 1: Same tv or different type. - | cmplwi cr6, TMP3, ~LJ_TISTABUD // Table or userdata? - | crandc 4*cr7+gt, 4*cr0+lt, 4*cr1+gt // 2: Same type and primitive. - | mr SAVE0, PC - if (vk) { - | isel PC, TMP2, PC, 4*cr7+gt - } else { - | isel TMP2, PC, TMP2, 4*cr7+gt - } - | cror 4*cr7+lt, 4*cr7+lt, 4*cr7+gt // 1 or 2. - if (vk) { - | isel PC, TMP2, PC, 4*cr0+so - } else { - | isel PC, PC, TMP2, 4*cr0+so - } - | blt cr7, <1 // Done if 1 or 2. - | blt cr6, <1 // Done if not tab/ud. - | - | // Different tables or userdatas. Need to check __eq metamethod. - | // Field metatable must be at same offset for GCtab and GCudata! - | lwz TAB:TMP2, TAB:CARG2->metatable - | li CARG4, 1-vk // ne = 0 or 1. - | cmplwi TAB:TMP2, 0 - | beq <1 // No metatable? - | lbz TMP2, TAB:TMP2->nomm - | andi. TMP2, TMP2, 1<<MM_eq - | bne <1 // Or 'no __eq' flag set? - | mr PC, SAVE0 // Restore old PC. - | b ->vmeta_equal // Handle __eq metamethod. - break; - - case BC_ISEQS: case BC_ISNES: - vk = op == BC_ISEQS; - | // RA = src*8, RD = str_const*8 (~), JMP with RD = target - | evlddx TMP0, BASE, RA - | srwi RD, RD, 1 - | lwz INS, 0(PC) - | subfic RD, RD, -4 - | addi PC, PC, 4 - | lwzx STR:TMP1, KBASE, RD // KBASE-4-str_const*4 - | addis TMP3, PC, -(BCBIAS_J*4 >> 16) - | decode_RD4 TMP2, INS - | evmergelo STR:TMP1, TISSTR, STR:TMP1 - | add TMP2, TMP2, TMP3 - | evcmpeq TMP0, STR:TMP1 - if (vk) { - | isel PC, TMP2, PC, 4*cr0+so - } else { - | isel PC, PC, TMP2, 4*cr0+so - } - | ins_next - break; - - case BC_ISEQN: case BC_ISNEN: - vk = op == BC_ISEQN; - | // RA = src*8, RD = num_const*8, JMP with RD = target - | evlddx TMP0, BASE, RA - | addi PC, PC, 4 - | evlddx TMP1, KBASE, RD - | addis TMP3, PC, -(BCBIAS_J*4 >> 16) - | lwz INS, -4(PC) - | checknum TMP0 - | checkfail >5 - | efdcmpeq TMP0, TMP1 - |1: - | decode_RD4 TMP2, INS - | add TMP2, TMP2, TMP3 - if (vk) { - | iselgt PC, TMP2, PC - |5: - } else { - | iselgt PC, PC, TMP2 - } - |3: - | ins_next - if (!vk) { - |5: - | decode_RD4 TMP2, INS - | add PC, TMP2, TMP3 - | b <3 - } - break; - - case BC_ISEQP: case BC_ISNEP: - vk = op == BC_ISEQP; - | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target - | lwzx TMP0, BASE, RA - | srwi TMP1, RD, 3 - | lwz INS, 0(PC) - | addi PC, PC, 4 - | not TMP1, TMP1 - | addis TMP3, PC, -(BCBIAS_J*4 >> 16) - | cmplw TMP0, TMP1 - | decode_RD4 TMP2, INS - | add TMP2, TMP2, TMP3 - if (vk) { - | iseleq PC, TMP2, PC - } else { - | iseleq PC, PC, TMP2 - } - | ins_next - break; - - /* -- Unary test and copy ops ------------------------------------------- */ - - case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: - | // RA = dst*8 or unused, RD = src*8, JMP with RD = target - | evlddx TMP0, BASE, RD - | evaddw TMP1, TISNIL, TISNIL // Synthesize LJ_TFALSE. - | lwz INS, 0(PC) - | evcmpltu TMP0, TMP1 - | addi PC, PC, 4 - if (op == BC_IST || op == BC_ISF) { - | addis TMP3, PC, -(BCBIAS_J*4 >> 16) - | decode_RD4 TMP2, INS - | add TMP2, TMP2, TMP3 - if (op == BC_IST) { - | isellt PC, TMP2, PC - } else { - | isellt PC, PC, TMP2 - } - } else { - if (op == BC_ISTC) { - | checkfail >1 - } else { - | checkok >1 - } - | addis PC, PC, -(BCBIAS_J*4 >> 16) - | decode_RD4 TMP2, INS - | evstddx TMP0, BASE, RA - | add PC, PC, TMP2 - |1: - } - | ins_next - break; - - /* -- Unary ops --------------------------------------------------------- */ - - case BC_MOV: - | // RA = dst*8, RD = src*8 - | ins_next1 - | evlddx TMP0, BASE, RD - | evstddx TMP0, BASE, RA - | ins_next2 - break; - case BC_NOT: - | // RA = dst*8, RD = src*8 - | ins_next1 - | lwzx TMP0, BASE, RD - | subfic TMP1, TMP0, LJ_TTRUE - | adde TMP0, TMP0, TMP1 - | stwx TMP0, BASE, RA - | ins_next2 - break; - case BC_UNM: - | // RA = dst*8, RD = src*8 - | evlddx TMP0, BASE, RD - | checknum TMP0 - | checkfail ->vmeta_unm - | efdneg TMP0, TMP0 - | ins_next1 - | evstddx TMP0, BASE, RA - | ins_next2 - break; - case BC_LEN: - | // RA = dst*8, RD = src*8 - | evlddx CARG1, BASE, RD - | checkstr CARG1 - | checkfail >2 - | lwz CRET1, STR:CARG1->len - |1: - | ins_next1 - | efdcfsi TMP0, CRET1 - | evstddx TMP0, BASE, RA - | ins_next2 - |2: - | checktab CARG1 - | checkfail ->vmeta_len -#if LJ_52 - | lwz TAB:TMP2, TAB:CARG1->metatable - | cmplwi TAB:TMP2, 0 - | bne >9 - |3: -#endif - |->BC_LEN_Z: - | bl extern lj_tab_len // (GCtab *t) - | // Returns uint32_t (but less than 2^31). - | b <1 -#if LJ_52 - |9: - | lbz TMP0, TAB:TMP2->nomm - | andi. TMP0, TMP0, 1<<MM_len - | bne <3 // 'no __len' flag set: done. - | b ->vmeta_len -#endif - break; - - /* -- Binary ops -------------------------------------------------------- */ - - |.macro ins_arithpre, t0, t1 - | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 - ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); - ||switch (vk) { - ||case 0: - | evlddx t0, BASE, RB - | checknum t0 - | evlddx t1, KBASE, RC - | checkfail ->vmeta_arith_vn - || break; - ||case 1: - | evlddx t1, BASE, RB - | checknum t1 - | evlddx t0, KBASE, RC - | checkfail ->vmeta_arith_nv - || break; - ||default: - | evlddx t0, BASE, RB - | evlddx t1, BASE, RC - | evmergehi TMP2, t0, t1 - | checknum TMP2 - | checkanyfail ->vmeta_arith_vv - || break; - ||} - |.endmacro - | - |.macro ins_arith, ins - | ins_arithpre TMP0, TMP1 - | ins_next1 - | ins TMP0, TMP0, TMP1 - | evstddx TMP0, BASE, RA - | ins_next2 - |.endmacro - - case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arith efdadd - break; - case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arith efdsub - break; - case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arith efdmul - break; - case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arith efddiv - break; - case BC_MODVN: - | ins_arithpre RD, SAVE0 - |->BC_MODVN_Z: - | efddiv CARG2, RD, SAVE0 - | bl ->vm_floor_efd // floor(b/c) - | efdmul TMP0, CRET2, SAVE0 - | ins_next1 - | efdsub TMP0, RD, TMP0 // b - floor(b/c)*c - | evstddx TMP0, BASE, RA - | ins_next2 - break; - case BC_MODNV: case BC_MODVV: - | ins_arithpre RD, SAVE0 - | b ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. - break; - case BC_POW: - | evlddx CARG2, BASE, RB - | evlddx CARG4, BASE, RC - | evmergehi CARG1, CARG4, CARG2 - | checknum CARG1 - | evmergehi CARG3, CARG4, CARG4 - | checkanyfail ->vmeta_arith_vv - | bl extern pow@plt - | evmergelo CRET2, CRET1, CRET2 - | evstddx CRET2, BASE, RA - | ins_next - break; - - case BC_CAT: - | // RA = dst*8, RB = src_start*8, RC = src_end*8 - | sub CARG3, RC, RB - | stw BASE, L->base - | add CARG2, BASE, RC - | mr SAVE0, RB - |->BC_CAT_Z: - | stw PC, SAVE_PC - | mr CARG1, L - | srwi CARG3, CARG3, 3 - | bl extern lj_meta_cat // (lua_State *L, TValue *top, int left) - | // Returns NULL (finished) or TValue * (metamethod). - | cmplwi CRET1, 0 - | lwz BASE, L->base - | bne ->vmeta_binop - | evlddx TMP0, BASE, SAVE0 // Copy result from RB to RA. - | evstddx TMP0, BASE, RA - | ins_next - break; - - /* -- Constant ops ------------------------------------------------------ */ - - case BC_KSTR: - | // RA = dst*8, RD = str_const*8 (~) - | ins_next1 - | srwi TMP1, RD, 1 - | subfic TMP1, TMP1, -4 - | lwzx TMP0, KBASE, TMP1 // KBASE-4-str_const*4 - | evmergelo TMP0, TISSTR, TMP0 - | evstddx TMP0, BASE, RA - | ins_next2 - break; - case BC_KCDATA: - |.if FFI - | // RA = dst*8, RD = cdata_const*8 (~) - | ins_next1 - | srwi TMP1, RD, 1 - | subfic TMP1, TMP1, -4 - | lwzx TMP0, KBASE, TMP1 // KBASE-4-cdata_const*4 - | li TMP2, LJ_TCDATA - | evmergelo TMP0, TMP2, TMP0 - | evstddx TMP0, BASE, RA - | ins_next2 - |.endif - break; - case BC_KSHORT: - | // RA = dst*8, RD = int16_literal*8 - | srwi TMP1, RD, 3 - | extsh TMP1, TMP1 - | ins_next1 - | efdcfsi TMP0, TMP1 - | evstddx TMP0, BASE, RA - | ins_next2 - break; - case BC_KNUM: - | // RA = dst*8, RD = num_const*8 - | evlddx TMP0, KBASE, RD - | ins_next1 - | evstddx TMP0, BASE, RA - | ins_next2 - break; - case BC_KPRI: - | // RA = dst*8, RD = primitive_type*8 (~) - | srwi TMP1, RD, 3 - | not TMP0, TMP1 - | ins_next1 - | stwx TMP0, BASE, RA - | ins_next2 - break; - case BC_KNIL: - | // RA = base*8, RD = end*8 - | evstddx TISNIL, BASE, RA - | addi RA, RA, 8 - |1: - | evstddx TISNIL, BASE, RA - | cmpw RA, RD - | addi RA, RA, 8 - | blt <1 - | ins_next_ - break; - - /* -- Upvalue and function ops ------------------------------------------ */ - - case BC_UGET: - | // RA = dst*8, RD = uvnum*8 - | ins_next1 - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RD, RD, 1 - | addi RD, RD, offsetof(GCfuncL, uvptr) - | lwzx UPVAL:RB, LFUNC:RB, RD - | lwz TMP1, UPVAL:RB->v - | evldd TMP0, 0(TMP1) - | evstddx TMP0, BASE, RA - | ins_next2 - break; - case BC_USETV: - | // RA = uvnum*8, RD = src*8 - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RA, RA, 1 - | addi RA, RA, offsetof(GCfuncL, uvptr) - | evlddx TMP1, BASE, RD - | lwzx UPVAL:RB, LFUNC:RB, RA - | lbz TMP3, UPVAL:RB->marked - | lwz CARG2, UPVAL:RB->v - | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) - | lbz TMP0, UPVAL:RB->closed - | evmergehi TMP2, TMP1, TMP1 - | evstdd TMP1, 0(CARG2) - | cmplwi cr1, TMP0, 0 - | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq - | subi TMP2, TMP2, (LJ_TISNUM+1) - | bne >2 // Upvalue is closed and black? - |1: - | ins_next - | - |2: // Check if new value is collectable. - | cmplwi TMP2, LJ_TISGCV - (LJ_TISNUM+1) - | bge <1 // tvisgcv(v) - | lbz TMP3, GCOBJ:TMP1->gch.marked - | andi. TMP3, TMP3, LJ_GC_WHITES // iswhite(v) - | la CARG1, GG_DISP2G(DISPATCH) - | // Crossed a write barrier. Move the barrier forward. - | beq <1 - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | b <1 - break; - case BC_USETS: - | // RA = uvnum*8, RD = str_const*8 (~) - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi TMP1, RD, 1 - | srwi RA, RA, 1 - | subfic TMP1, TMP1, -4 - | addi RA, RA, offsetof(GCfuncL, uvptr) - | lwzx STR:TMP1, KBASE, TMP1 // KBASE-4-str_const*4 - | lwzx UPVAL:RB, LFUNC:RB, RA - | evmergelo STR:TMP1, TISSTR, STR:TMP1 - | lbz TMP3, UPVAL:RB->marked - | lwz CARG2, UPVAL:RB->v - | andi. TMP3, TMP3, LJ_GC_BLACK // isblack(uv) - | lbz TMP3, STR:TMP1->marked - | lbz TMP2, UPVAL:RB->closed - | evstdd STR:TMP1, 0(CARG2) - | bne >2 - |1: - | ins_next - | - |2: // Check if string is white and ensure upvalue is closed. - | andi. TMP3, TMP3, LJ_GC_WHITES // iswhite(str) - | cmplwi cr1, TMP2, 0 - | cror 4*cr0+eq, 4*cr0+eq, 4*cr1+eq - | la CARG1, GG_DISP2G(DISPATCH) - | // Crossed a write barrier. Move the barrier forward. - | beq <1 - | bl extern lj_gc_barrieruv // (global_State *g, TValue *tv) - | b <1 - break; - case BC_USETN: - | // RA = uvnum*8, RD = num_const*8 - | ins_next1 - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RA, RA, 1 - | addi RA, RA, offsetof(GCfuncL, uvptr) - | evlddx TMP0, KBASE, RD - | lwzx UPVAL:RB, LFUNC:RB, RA - | lwz TMP1, UPVAL:RB->v - | evstdd TMP0, 0(TMP1) - | ins_next2 - break; - case BC_USETP: - | // RA = uvnum*8, RD = primitive_type*8 (~) - | ins_next1 - | lwz LFUNC:RB, FRAME_FUNC(BASE) - | srwi RA, RA, 1 - | addi RA, RA, offsetof(GCfuncL, uvptr) - | srwi TMP0, RD, 3 - | lwzx UPVAL:RB, LFUNC:RB, RA - | not TMP0, TMP0 - | lwz TMP1, UPVAL:RB->v - | stw TMP0, 0(TMP1) - | ins_next2 - break; - - case BC_UCLO: - | // RA = level*8, RD = target - | lwz TMP1, L->openupval - | branch_RD // Do this first since RD is not saved. - | stw BASE, L->base - | cmplwi TMP1, 0 - | mr CARG1, L - | beq >1 - | add CARG2, BASE, RA - | bl extern lj_func_closeuv // (lua_State *L, TValue *level) - | lwz BASE, L->base - |1: - | ins_next - break; - - case BC_FNEW: - | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) - | srwi TMP1, RD, 1 - | stw BASE, L->base - | subfic TMP1, TMP1, -4 - | stw PC, SAVE_PC - | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4 - | mr CARG1, L - | lwz CARG3, FRAME_FUNC(BASE) - | // (lua_State *L, GCproto *pt, GCfuncL *parent) - | bl extern lj_func_newL_gc - | // Returns GCfuncL *. - | lwz BASE, L->base - | evmergelo LFUNC:CRET1, TISFUNC, LFUNC:CRET1 - | evstddx LFUNC:CRET1, BASE, RA - | ins_next - break; - - /* -- Table ops --------------------------------------------------------- */ - - case BC_TNEW: - case BC_TDUP: - | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) - | lwz TMP0, DISPATCH_GL(gc.total)(DISPATCH) - | mr CARG1, L - | lwz TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) - | stw BASE, L->base - | cmplw TMP0, TMP1 - | stw PC, SAVE_PC - | bge >5 - |1: - if (op == BC_TNEW) { - | rlwinm CARG2, RD, 29, 21, 31 - | rlwinm CARG3, RD, 18, 27, 31 - | cmpwi CARG2, 0x7ff - | li TMP1, 0x801 - | iseleq CARG2, TMP1, CARG2 - | bl extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) - | // Returns Table *. - } else { - | srwi TMP1, RD, 1 - | subfic TMP1, TMP1, -4 - | lwzx CARG2, KBASE, TMP1 // KBASE-4-tab_const*4 - | bl extern lj_tab_dup // (lua_State *L, Table *kt) - | // Returns Table *. - } - | lwz BASE, L->base - | evmergelo TAB:CRET1, TISTAB, TAB:CRET1 - | evstddx TAB:CRET1, BASE, RA - | ins_next - |5: - | mr SAVE0, RD - | bl extern lj_gc_step_fixtop // (lua_State *L) - | mr RD, SAVE0 - | mr CARG1, L - | b <1 - break; - - case BC_GGET: - | // RA = dst*8, RD = str_const*8 (~) - case BC_GSET: - | // RA = src*8, RD = str_const*8 (~) - | lwz LFUNC:TMP2, FRAME_FUNC(BASE) - | srwi TMP1, RD, 1 - | lwz TAB:RB, LFUNC:TMP2->env - | subfic TMP1, TMP1, -4 - | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 - if (op == BC_GGET) { - | b ->BC_TGETS_Z - } else { - | b ->BC_TSETS_Z - } - break; - - case BC_TGETV: - | // RA = dst*8, RB = table*8, RC = key*8 - | evlddx TAB:RB, BASE, RB - | evlddx RC, BASE, RC - | checktab TAB:RB - | checkfail ->vmeta_tgetv - | checknum RC - | checkfail >5 - | // Convert number key to integer - | efdctsi TMP2, RC - | lwz TMP0, TAB:RB->asize - | efdcfsi TMP1, TMP2 - | cmplw cr0, TMP0, TMP2 - | efdcmpeq cr1, RC, TMP1 - | lwz TMP1, TAB:RB->array - | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+gt - | slwi TMP2, TMP2, 3 - | ble ->vmeta_tgetv // Integer key and in array part? - | evlddx TMP1, TMP1, TMP2 - | checknil TMP1 - | checkok >2 - |1: - | evstddx TMP1, BASE, RA - | ins_next - | - |2: // Check for __index if table value is nil. - | lwz TAB:TMP2, TAB:RB->metatable - | cmplwi TAB:TMP2, 0 - | beq <1 // No metatable: done. - | lbz TMP0, TAB:TMP2->nomm - | andi. TMP0, TMP0, 1<<MM_index - | bne <1 // 'no __index' flag set: done. - | b ->vmeta_tgetv - | - |5: - | checkstr STR:RC // String key? - | checkok ->BC_TGETS_Z - | b ->vmeta_tgetv - break; - case BC_TGETS: - | // RA = dst*8, RB = table*8, RC = str_const*8 (~) - | evlddx TAB:RB, BASE, RB - | srwi TMP1, RC, 1 - | checktab TAB:RB - | subfic TMP1, TMP1, -4 - | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 - | checkfail ->vmeta_tgets1 - |->BC_TGETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 - | lwz TMP0, TAB:RB->hmask - | lwz TMP1, STR:RC->hash - | lwz NODE:TMP2, TAB:RB->node - | evmergelo STR:RC, TISSTR, STR:RC - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | slwi TMP0, TMP1, 5 - | slwi TMP1, TMP1, 3 - | sub TMP1, TMP0, TMP1 - | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - |1: - | evldd TMP0, NODE:TMP2->key - | evldd TMP1, NODE:TMP2->val - | evcmpeq TMP0, STR:RC - | checkanyfail >4 - | checknil TMP1 - | checkok >5 // Key found, but nil value? - |3: - | evstddx TMP1, BASE, RA - | ins_next - | - |4: // Follow hash chain. - | lwz NODE:TMP2, NODE:TMP2->next - | cmplwi NODE:TMP2, 0 - | bne <1 - | // End of hash chain: key not found, nil result. - | evmr TMP1, TISNIL - | - |5: // Check for __index if table value is nil. - | lwz TAB:TMP2, TAB:RB->metatable - | cmplwi TAB:TMP2, 0 - | beq <3 // No metatable: done. - | lbz TMP0, TAB:TMP2->nomm - | andi. TMP0, TMP0, 1<<MM_index - | bne <3 // 'no __index' flag set: done. - | b ->vmeta_tgets - break; - case BC_TGETB: - | // RA = dst*8, RB = table*8, RC = index*8 - | evlddx TAB:RB, BASE, RB - | srwi TMP0, RC, 3 - | checktab TAB:RB - | checkfail ->vmeta_tgetb - | lwz TMP1, TAB:RB->asize - | lwz TMP2, TAB:RB->array - | cmplw TMP0, TMP1 - | bge ->vmeta_tgetb - | evlddx TMP1, TMP2, RC - | checknil TMP1 - | checkok >5 - |1: - | ins_next1 - | evstddx TMP1, BASE, RA - | ins_next2 - | - |5: // Check for __index if table value is nil. - | lwz TAB:TMP2, TAB:RB->metatable - | cmplwi TAB:TMP2, 0 - | beq <1 // No metatable: done. - | lbz TMP2, TAB:TMP2->nomm - | andi. TMP2, TMP2, 1<<MM_index - | bne <1 // 'no __index' flag set: done. - | b ->vmeta_tgetb // Caveat: preserve TMP0! - break; - - case BC_TSETV: - | // RA = src*8, RB = table*8, RC = key*8 - | evlddx TAB:RB, BASE, RB - | evlddx RC, BASE, RC - | checktab TAB:RB - | checkfail ->vmeta_tsetv - | checknum RC - | checkfail >5 - | // Convert number key to integer - | efdctsi TMP2, RC - | evlddx SAVE0, BASE, RA - | lwz TMP0, TAB:RB->asize - | efdcfsi TMP1, TMP2 - | cmplw cr0, TMP0, TMP2 - | efdcmpeq cr1, RC, TMP1 - | lwz TMP1, TAB:RB->array - | crand 4*cr0+gt, 4*cr0+gt, 4*cr1+gt - | slwi TMP0, TMP2, 3 - | ble ->vmeta_tsetv // Integer key and in array part? - | lbz TMP3, TAB:RB->marked - | evlddx TMP2, TMP1, TMP0 - | checknil TMP2 - | checkok >3 - |1: - | andi. TMP2, TMP3, LJ_GC_BLACK // isblack(table) - | evstddx SAVE0, TMP1, TMP0 - | bne >7 - |2: - | ins_next - | - |3: // Check for __newindex if previous value is nil. - | lwz TAB:TMP2, TAB:RB->metatable - | cmplwi TAB:TMP2, 0 - | beq <1 // No metatable: done. - | lbz TMP2, TAB:TMP2->nomm - | andi. TMP2, TMP2, 1<<MM_newindex - | bne <1 // 'no __newindex' flag set: done. - | b ->vmeta_tsetv - | - |5: - | checkstr STR:RC // String key? - | checkok ->BC_TSETS_Z - | b ->vmeta_tsetv - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0 - | b <2 - break; - case BC_TSETS: - | // RA = src*8, RB = table*8, RC = str_const*8 (~) - | evlddx TAB:RB, BASE, RB - | srwi TMP1, RC, 1 - | checktab TAB:RB - | subfic TMP1, TMP1, -4 - | lwzx STR:RC, KBASE, TMP1 // KBASE-4-str_const*4 - | checkfail ->vmeta_tsets1 - |->BC_TSETS_Z: - | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = src*8 - | lwz TMP0, TAB:RB->hmask - | lwz TMP1, STR:RC->hash - | lwz NODE:TMP2, TAB:RB->node - | evmergelo STR:RC, TISSTR, STR:RC - | stb ZERO, TAB:RB->nomm // Clear metamethod cache. - | and TMP1, TMP1, TMP0 // idx = str->hash & tab->hmask - | evlddx SAVE0, BASE, RA - | slwi TMP0, TMP1, 5 - | slwi TMP1, TMP1, 3 - | sub TMP1, TMP0, TMP1 - | lbz TMP3, TAB:RB->marked - | add NODE:TMP2, NODE:TMP2, TMP1 // node = tab->node + (idx*32-idx*8) - |1: - | evldd TMP0, NODE:TMP2->key - | evldd TMP1, NODE:TMP2->val - | evcmpeq TMP0, STR:RC - | checkanyfail >5 - | checknil TMP1 - | checkok >4 // Key found, but nil value? - |2: - | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table) - | evstdd SAVE0, NODE:TMP2->val - | bne >7 - |3: - | ins_next - | - |4: // Check for __newindex if previous value is nil. - | lwz TAB:TMP1, TAB:RB->metatable - | cmplwi TAB:TMP1, 0 - | beq <2 // No metatable: done. - | lbz TMP0, TAB:TMP1->nomm - | andi. TMP0, TMP0, 1<<MM_newindex - | bne <2 // 'no __newindex' flag set: done. - | b ->vmeta_tsets - | - |5: // Follow hash chain. - | lwz NODE:TMP2, NODE:TMP2->next - | cmplwi NODE:TMP2, 0 - | bne <1 - | // End of hash chain: key not found, add a new one. - | - | // But check for __newindex first. - | lwz TAB:TMP1, TAB:RB->metatable - | la CARG3, DISPATCH_GL(tmptv)(DISPATCH) - | stw PC, SAVE_PC - | mr CARG1, L - | cmplwi TAB:TMP1, 0 - | stw BASE, L->base - | beq >6 // No metatable: continue. - | lbz TMP0, TAB:TMP1->nomm - | andi. TMP0, TMP0, 1<<MM_newindex - | beq ->vmeta_tsets // 'no __newindex' flag NOT set: check. - |6: - | mr CARG2, TAB:RB - | evstdd STR:RC, 0(CARG3) - | bl extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) - | // Returns TValue *. - | lwz BASE, L->base - | evstdd SAVE0, 0(CRET1) - | b <3 // No 2nd write barrier needed. - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0 - | b <3 - break; - case BC_TSETB: - | // RA = src*8, RB = table*8, RC = index*8 - | evlddx TAB:RB, BASE, RB - | srwi TMP0, RC, 3 - | checktab TAB:RB - | checkfail ->vmeta_tsetb - | lwz TMP1, TAB:RB->asize - | lwz TMP2, TAB:RB->array - | lbz TMP3, TAB:RB->marked - | cmplw TMP0, TMP1 - | evlddx SAVE0, BASE, RA - | bge ->vmeta_tsetb - | evlddx TMP1, TMP2, RC - | checknil TMP1 - | checkok >5 - |1: - | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table) - | evstddx SAVE0, TMP2, RC - | bne >7 - |2: - | ins_next - | - |5: // Check for __newindex if previous value is nil. - | lwz TAB:TMP1, TAB:RB->metatable - | cmplwi TAB:TMP1, 0 - | beq <1 // No metatable: done. - | lbz TMP1, TAB:TMP1->nomm - | andi. TMP1, TMP1, 1<<MM_newindex - | bne <1 // 'no __newindex' flag set: done. - | b ->vmeta_tsetb // Caveat: preserve TMP0! - | - |7: // Possible table write barrier for the value. Skip valiswhite check. - | barrierback TAB:RB, TMP3, TMP0 - | b <2 - break; - - case BC_TSETM: - | // RA = base*8 (table at base-1), RD = num_const*8 (start index) - | add RA, BASE, RA - |1: - | add TMP3, KBASE, RD - | lwz TAB:CARG2, -4(RA) // Guaranteed to be a table. - | addic. TMP0, MULTRES, -8 - | lwz TMP3, 4(TMP3) // Integer constant is in lo-word. - | srwi CARG3, TMP0, 3 - | beq >4 // Nothing to copy? - | add CARG3, CARG3, TMP3 - | lwz TMP2, TAB:CARG2->asize - | slwi TMP1, TMP3, 3 - | lbz TMP3, TAB:CARG2->marked - | cmplw CARG3, TMP2 - | add TMP2, RA, TMP0 - | lwz TMP0, TAB:CARG2->array - | bgt >5 - | add TMP1, TMP1, TMP0 - | andi. TMP0, TMP3, LJ_GC_BLACK // isblack(table) - |3: // Copy result slots to table. - | evldd TMP0, 0(RA) - | addi RA, RA, 8 - | cmpw cr1, RA, TMP2 - | evstdd TMP0, 0(TMP1) - | addi TMP1, TMP1, 8 - | blt cr1, <3 - | bne >7 - |4: - | ins_next - | - |5: // Need to resize array part. - | stw BASE, L->base - | mr CARG1, L - | stw PC, SAVE_PC - | mr SAVE0, RD - | bl extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) - | // Must not reallocate the stack. - | mr RD, SAVE0 - | b <1 - | - |7: // Possible table write barrier for any value. Skip valiswhite check. - | barrierback TAB:CARG2, TMP3, TMP0 - | b <4 - break; - - /* -- Calls and vararg handling ----------------------------------------- */ - - case BC_CALLM: - | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 - | add NARGS8:RC, NARGS8:RC, MULTRES - | // Fall through. Assumes BC_CALL follows. - break; - case BC_CALL: - | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 - | evlddx LFUNC:RB, BASE, RA - | mr TMP2, BASE - | add BASE, BASE, RA - | subi NARGS8:RC, NARGS8:RC, 8 - | checkfunc LFUNC:RB - | addi BASE, BASE, 8 - | checkfail ->vmeta_call - | ins_call - break; - - case BC_CALLMT: - | // RA = base*8, (RB = 0,) RC = extra_nargs*8 - | add NARGS8:RC, NARGS8:RC, MULTRES - | // Fall through. Assumes BC_CALLT follows. - break; - case BC_CALLT: - | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 - | evlddx LFUNC:RB, BASE, RA - | add RA, BASE, RA - | lwz TMP1, FRAME_PC(BASE) - | subi NARGS8:RC, NARGS8:RC, 8 - | checkfunc LFUNC:RB - | addi RA, RA, 8 - | checkfail ->vmeta_callt - |->BC_CALLT_Z: - | andi. TMP0, TMP1, FRAME_TYPE // Caveat: preserve cr0 until the crand. - | lbz TMP3, LFUNC:RB->ffid - | xori TMP2, TMP1, FRAME_VARG - | cmplwi cr1, NARGS8:RC, 0 - | bne >7 - |1: - | stw LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. - | li TMP2, 0 - | cmplwi cr7, TMP3, 1 // (> FF_C) Calling a fast function? - | beq cr1, >3 - |2: - | addi TMP3, TMP2, 8 - | evlddx TMP0, RA, TMP2 - | cmplw cr1, TMP3, NARGS8:RC - | evstddx TMP0, BASE, TMP2 - | mr TMP2, TMP3 - | bne cr1, <2 - |3: - | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+gt - | beq >5 - |4: - | ins_callt - | - |5: // Tailcall to a fast function with a Lua frame below. - | lwz INS, -4(TMP1) - | decode_RA8 RA, INS - | sub TMP1, BASE, RA - | lwz LFUNC:TMP1, FRAME_FUNC-8(TMP1) - | lwz TMP1, LFUNC:TMP1->pc - | lwz KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. - | b <4 - | - |7: // Tailcall from a vararg function. - | andi. TMP0, TMP2, FRAME_TYPEP - | bne <1 // Vararg frame below? - | sub BASE, BASE, TMP2 // Relocate BASE down. - | lwz TMP1, FRAME_PC(BASE) - | andi. TMP0, TMP1, FRAME_TYPE - | b <1 - break; - - case BC_ITERC: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) - | subi RA, RA, 24 // evldd doesn't support neg. offsets. - | mr TMP2, BASE - | evlddx LFUNC:RB, BASE, RA - | add BASE, BASE, RA - | evldd TMP0, 8(BASE) - | evldd TMP1, 16(BASE) - | evstdd LFUNC:RB, 24(BASE) // Copy callable. - | checkfunc LFUNC:RB - | evstdd TMP0, 32(BASE) // Copy state. - | li NARGS8:RC, 16 // Iterators get 2 arguments. - | evstdd TMP1, 40(BASE) // Copy control var. - | addi BASE, BASE, 32 - | checkfail ->vmeta_call - | ins_call - break; - - case BC_ITERN: - | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) - |.if JIT - | // NYI: add hotloop, record BC_ITERN. - |.endif - | add RA, BASE, RA - | lwz TAB:RB, -12(RA) - | lwz RC, -4(RA) // Get index from control var. - | lwz TMP0, TAB:RB->asize - | lwz TMP1, TAB:RB->array - | addi PC, PC, 4 - |1: // Traverse array part. - | cmplw RC, TMP0 - | slwi TMP3, RC, 3 - | bge >5 // Index points after array part? - | evlddx TMP2, TMP1, TMP3 - | checknil TMP2 - | lwz INS, -4(PC) - | checkok >4 - | efdcfsi TMP0, RC - | addi RC, RC, 1 - | addis TMP3, PC, -(BCBIAS_J*4 >> 16) - | evstdd TMP2, 8(RA) - | decode_RD4 TMP1, INS - | stw RC, -4(RA) // Update control var. - | add PC, TMP1, TMP3 - | evstdd TMP0, 0(RA) - |3: - | ins_next - | - |4: // Skip holes in array part. - | addi RC, RC, 1 - | b <1 - | - |5: // Traverse hash part. - | lwz TMP1, TAB:RB->hmask - | sub RC, RC, TMP0 - | lwz TMP2, TAB:RB->node - |6: - | cmplw RC, TMP1 // End of iteration? Branch to ITERL+1. - | slwi TMP3, RC, 5 - | bgt <3 - | slwi RB, RC, 3 - | sub TMP3, TMP3, RB - | evlddx RB, TMP2, TMP3 - | add NODE:TMP3, TMP2, TMP3 - | checknil RB - | lwz INS, -4(PC) - | checkok >7 - | evldd TMP3, NODE:TMP3->key - | addis TMP2, PC, -(BCBIAS_J*4 >> 16) - | evstdd RB, 8(RA) - | add RC, RC, TMP0 - | decode_RD4 TMP1, INS - | evstdd TMP3, 0(RA) - | addi RC, RC, 1 - | add PC, TMP1, TMP2 - | stw RC, -4(RA) // Update control var. - | b <3 - | - |7: // Skip holes in hash part. - | addi RC, RC, 1 - | b <6 - break; - - case BC_ISNEXT: - | // RA = base*8, RD = target (points to ITERN) - | add RA, BASE, RA - | li TMP2, -24 - | evlddx CFUNC:TMP1, RA, TMP2 - | lwz TMP2, -16(RA) - | lwz TMP3, -8(RA) - | evmergehi TMP0, CFUNC:TMP1, CFUNC:TMP1 - | cmpwi cr0, TMP2, LJ_TTAB - | cmpwi cr1, TMP0, LJ_TFUNC - | cmpwi cr6, TMP3, LJ_TNIL - | bne cr1, >5 - | lbz TMP1, CFUNC:TMP1->ffid - | crand 4*cr0+eq, 4*cr0+eq, 4*cr6+eq - | cmpwi cr7, TMP1, FF_next_N - | srwi TMP0, RD, 1 - | crand 4*cr0+eq, 4*cr0+eq, 4*cr7+eq - | add TMP3, PC, TMP0 - | bne cr0, >5 - | lus TMP1, 0xfffe - | ori TMP1, TMP1, 0x7fff - | stw ZERO, -4(RA) // Initialize control var. - | stw TMP1, -8(RA) - | addis PC, TMP3, -(BCBIAS_J*4 >> 16) - |1: - | ins_next - |5: // Despecialize bytecode if any of the checks fail. - | li TMP0, BC_JMP - | li TMP1, BC_ITERC - | stb TMP0, -1(PC) - | addis PC, TMP3, -(BCBIAS_J*4 >> 16) - | stb TMP1, 3(PC) - | b <1 - break; - - case BC_VARG: - | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 - | lwz TMP0, FRAME_PC(BASE) - | add RC, BASE, RC - | add RA, BASE, RA - | addi RC, RC, FRAME_VARG - | add TMP2, RA, RB - | subi TMP3, BASE, 8 // TMP3 = vtop - | sub RC, RC, TMP0 // RC = vbase - | // Note: RC may now be even _above_ BASE if nargs was < numparams. - | cmplwi cr1, RB, 0 - | sub. TMP1, TMP3, RC - | beq cr1, >5 // Copy all varargs? - | subi TMP2, TMP2, 16 - | ble >2 // No vararg slots? - |1: // Copy vararg slots to destination slots. - | evldd TMP0, 0(RC) - | addi RC, RC, 8 - | evstdd TMP0, 0(RA) - | cmplw RA, TMP2 - | cmplw cr1, RC, TMP3 - | bge >3 // All destination slots filled? - | addi RA, RA, 8 - | blt cr1, <1 // More vararg slots? - |2: // Fill up remainder with nil. - | evstdd TISNIL, 0(RA) - | cmplw RA, TMP2 - | addi RA, RA, 8 - | blt <2 - |3: - | ins_next - | - |5: // Copy all varargs. - | lwz TMP0, L->maxstack - | li MULTRES, 8 // MULTRES = (0+1)*8 - | ble <3 // No vararg slots? - | add TMP2, RA, TMP1 - | cmplw TMP2, TMP0 - | addi MULTRES, TMP1, 8 - | bgt >7 - |6: - | evldd TMP0, 0(RC) - | addi RC, RC, 8 - | evstdd TMP0, 0(RA) - | cmplw RC, TMP3 - | addi RA, RA, 8 - | blt <6 // More vararg slots? - | b <3 - | - |7: // Grow stack for varargs. - | mr CARG1, L - | stw RA, L->top - | sub SAVE0, RC, BASE // Need delta, because BASE may change. - | stw BASE, L->base - | sub RA, RA, BASE - | stw PC, SAVE_PC - | srwi CARG2, TMP1, 3 - | bl extern lj_state_growstack // (lua_State *L, int n) - | lwz BASE, L->base - | add RA, BASE, RA - | add RC, BASE, SAVE0 - | subi TMP3, BASE, 8 - | b <6 - break; - - /* -- Returns ----------------------------------------------------------- */ - - case BC_RETM: - | // RA = results*8, RD = extra_nresults*8 - | add RD, RD, MULTRES // MULTRES >= 8, so RD >= 8. - | // Fall through. Assumes BC_RET follows. - break; - - case BC_RET: - | // RA = results*8, RD = (nresults+1)*8 - | lwz PC, FRAME_PC(BASE) - | add RA, BASE, RA - | mr MULTRES, RD - |1: - | andi. TMP0, PC, FRAME_TYPE - | xori TMP1, PC, FRAME_VARG - | bne ->BC_RETV_Z - | - |->BC_RET_Z: - | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return - | lwz INS, -4(PC) - | cmpwi RD, 8 - | subi TMP2, BASE, 8 - | subi RC, RD, 8 - | decode_RB8 RB, INS - | beq >3 - | li TMP1, 0 - |2: - | addi TMP3, TMP1, 8 - | evlddx TMP0, RA, TMP1 - | cmpw TMP3, RC - | evstddx TMP0, TMP2, TMP1 - | beq >3 - | addi TMP1, TMP3, 8 - | evlddx TMP0, RA, TMP3 - | cmpw TMP1, RC - | evstddx TMP0, TMP2, TMP3 - | bne <2 - |3: - |5: - | cmplw RB, RD - | decode_RA8 RA, INS - | bgt >6 - | sub BASE, TMP2, RA - | lwz LFUNC:TMP1, FRAME_FUNC(BASE) - | ins_next1 - | lwz TMP1, LFUNC:TMP1->pc - | lwz KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | subi TMP1, RD, 8 - | addi RD, RD, 8 - | evstddx TISNIL, TMP2, TMP1 - | b <5 - | - |->BC_RETV_Z: // Non-standard return case. - | andi. TMP2, TMP1, FRAME_TYPEP - | bne ->vm_return - | // Return from vararg function: relocate BASE down. - | sub BASE, BASE, TMP1 - | lwz PC, FRAME_PC(BASE) - | b <1 - break; - - case BC_RET0: case BC_RET1: - | // RA = results*8, RD = (nresults+1)*8 - | lwz PC, FRAME_PC(BASE) - | add RA, BASE, RA - | mr MULTRES, RD - | andi. TMP0, PC, FRAME_TYPE - | xori TMP1, PC, FRAME_VARG - | bne ->BC_RETV_Z - | - | lwz INS, -4(PC) - | subi TMP2, BASE, 8 - | decode_RB8 RB, INS - if (op == BC_RET1) { - | evldd TMP0, 0(RA) - | evstdd TMP0, 0(TMP2) - } - |5: - | cmplw RB, RD - | decode_RA8 RA, INS - | bgt >6 - | sub BASE, TMP2, RA - | lwz LFUNC:TMP1, FRAME_FUNC(BASE) - | ins_next1 - | lwz TMP1, LFUNC:TMP1->pc - | lwz KBASE, PC2PROTO(k)(TMP1) - | ins_next2 - | - |6: // Fill up results with nil. - | subi TMP1, RD, 8 - | addi RD, RD, 8 - | evstddx TISNIL, TMP2, TMP1 - | b <5 - break; - - /* -- Loops and branches ------------------------------------------------ */ - - case BC_FORL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IFORL follows. - break; - - case BC_JFORI: - case BC_JFORL: -#if !LJ_HASJIT - break; -#endif - case BC_FORI: - case BC_IFORL: - | // RA = base*8, RD = target (after end of loop or start of loop) - vk = (op == BC_IFORL || op == BC_JFORL); - | add RA, BASE, RA - | evldd TMP1, FORL_IDX*8(RA) - | evldd TMP3, FORL_STEP*8(RA) - | evldd TMP2, FORL_STOP*8(RA) - if (!vk) { - | evcmpgtu cr0, TMP1, TISNUM - | evcmpgtu cr7, TMP3, TISNUM - | evcmpgtu cr1, TMP2, TISNUM - | cror 4*cr0+lt, 4*cr0+lt, 4*cr7+lt - | cror 4*cr0+lt, 4*cr0+lt, 4*cr1+lt - | blt ->vmeta_for - } - if (vk) { - | efdadd TMP1, TMP1, TMP3 - | evstdd TMP1, FORL_IDX*8(RA) - } - | evcmpgts TMP3, TISNIL - | evstdd TMP1, FORL_EXT*8(RA) - | bge >2 - | efdcmpgt TMP1, TMP2 - |1: - if (op != BC_JFORL) { - | srwi RD, RD, 1 - | add RD, PC, RD - if (op == BC_JFORI) { - | addis PC, RD, -(BCBIAS_J*4 >> 16) - } else { - | addis RD, RD, -(BCBIAS_J*4 >> 16) - } - } - if (op == BC_FORI) { - | iselgt PC, RD, PC - } else if (op == BC_IFORL) { - | iselgt PC, PC, RD - } else { - | ble =>BC_JLOOP - } - | ins_next - |2: - | efdcmpgt TMP2, TMP1 - | b <1 - break; - - case BC_ITERL: - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_IITERL follows. - break; - - case BC_JITERL: -#if !LJ_HASJIT - break; -#endif - case BC_IITERL: - | // RA = base*8, RD = target - | evlddx TMP1, BASE, RA - | subi RA, RA, 8 - | checknil TMP1 - | checkok >1 // Stop if iterator returned nil. - if (op == BC_JITERL) { - | NYI - } else { - | branch_RD // Otherwise save control var + branch. - | evstddx TMP1, BASE, RA - } - |1: - | ins_next - break; - - case BC_LOOP: - | // RA = base*8, RD = target (loop extent) - | // Note: RA/RD is only used by trace recorder to determine scope/extent - | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT - | hotloop - |.endif - | // Fall through. Assumes BC_ILOOP follows. - break; - - case BC_ILOOP: - | // RA = base*8, RD = target (loop extent) - | ins_next - break; - - case BC_JLOOP: - |.if JIT - | NYI - |.endif - break; - - case BC_JMP: - | // RA = base*8 (only used by trace recorder), RD = target - | branch_RD - | ins_next - break; - - /* -- Function headers -------------------------------------------------- */ - - case BC_FUNCF: - |.if JIT - | hotcall - |.endif - case BC_FUNCV: /* NYI: compiled vararg functions. */ - | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. - break; - - case BC_JFUNCF: -#if !LJ_HASJIT - break; -#endif - case BC_IFUNCF: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | lwz TMP2, L->maxstack - | lbz TMP1, -4+PC2PROTO(numparams)(PC) - | lwz KBASE, -4+PC2PROTO(k)(PC) - | cmplw RA, TMP2 - | slwi TMP1, TMP1, 3 - | bgt ->vm_growstack_l - | ins_next1 - |2: - | cmplw NARGS8:RC, TMP1 // Check for missing parameters. - | ble >3 - if (op == BC_JFUNCF) { - | NYI - } else { - | ins_next2 - } - | - |3: // Clear missing parameters. - | evstddx TISNIL, BASE, NARGS8:RC - | addi NARGS8:RC, NARGS8:RC, 8 - | b <2 - break; - - case BC_JFUNCV: -#if !LJ_HASJIT - break; -#endif - | NYI // NYI: compiled vararg functions - break; /* NYI: compiled vararg functions. */ - - case BC_IFUNCV: - | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 - | lwz TMP2, L->maxstack - | add TMP1, BASE, RC - | add TMP0, RA, RC - | stw LFUNC:RB, 4(TMP1) // Store copy of LFUNC. - | addi TMP3, RC, 8+FRAME_VARG - | lwz KBASE, -4+PC2PROTO(k)(PC) - | cmplw TMP0, TMP2 - | stw TMP3, 0(TMP1) // Store delta + FRAME_VARG. - | bge ->vm_growstack_l - | lbz TMP2, -4+PC2PROTO(numparams)(PC) - | mr RA, BASE - | mr RC, TMP1 - | ins_next1 - | cmpwi TMP2, 0 - | addi BASE, TMP1, 8 - | beq >3 - |1: - | cmplw RA, RC // Less args than parameters? - | evldd TMP0, 0(RA) - | bge >4 - | evstdd TISNIL, 0(RA) // Clear old fixarg slot (help the GC). - | addi RA, RA, 8 - |2: - | addic. TMP2, TMP2, -1 - | evstdd TMP0, 8(TMP1) - | addi TMP1, TMP1, 8 - | bne <1 - |3: - | ins_next2 - | - |4: // Clear missing parameters. - | evmr TMP0, TISNIL - | b <2 - break; - - case BC_FUNCC: - case BC_FUNCCW: - | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 - if (op == BC_FUNCC) { - | lwz TMP3, CFUNC:RB->f - } else { - | lwz TMP3, DISPATCH_GL(wrapf)(DISPATCH) - } - | add TMP1, RA, NARGS8:RC - | lwz TMP2, L->maxstack - | add RC, BASE, NARGS8:RC - | stw BASE, L->base - | cmplw TMP1, TMP2 - | stw RC, L->top - | li_vmstate C - | mtctr TMP3 - if (op == BC_FUNCCW) { - | lwz CARG2, CFUNC:RB->f - } - | mr CARG1, L - | bgt ->vm_growstack_c // Need to grow stack. - | st_vmstate - | bctrl // (lua_State *L [, lua_CFunction f]) - | // Returns nresults. - | lwz TMP1, L->top - | slwi RD, CRET1, 3 - | lwz BASE, L->base - | li_vmstate INTERP - | lwz PC, FRAME_PC(BASE) // Fetch PC of caller. - | sub RA, TMP1, RD // RA = L->top - nresults*8 - | st_vmstate - | b ->vm_returnc - break; - - /* ---------------------------------------------------------------------- */ - - default: - fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); - exit(2); - break; - } -} - -static int build_backend(BuildCtx *ctx) -{ - int op; - - dasm_growpc(Dst, BC__MAX); - - build_subroutines(ctx); - - |.code_op - for (op = 0; op < BC__MAX; op++) - build_ins(ctx, (BCOp)op, op); - - return BC__MAX; -} - -/* Emit pseudo frame-info for all assembler functions. */ -static void emit_asm_debug(BuildCtx *ctx) -{ - int i; - switch (ctx->mode) { - case BUILD_elfasm: - fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); - fprintf(ctx->fp, - ".Lframe0:\n" - "\t.long .LECIE0-.LSCIE0\n" - ".LSCIE0:\n" - "\t.long 0xffffffff\n" - "\t.byte 0x1\n" - "\t.string \"\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 65\n" - "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE0:\n\n"); - fprintf(ctx->fp, - ".LSFDE0:\n" - "\t.long .LEFDE0-.LASFDE0\n" - ".LASFDE0:\n" - "\t.long .Lframe0\n" - "\t.long .Lbegin\n" - "\t.long %d\n" - "\t.byte 0xe\n\t.uleb128 %d\n" - "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x5\n\t.uleb128 70\n\t.sleb128 37\n", - (int)ctx->codesz, CFRAME_SIZE); - for (i = 14; i <= 31; i++) - fprintf(ctx->fp, - "\t.byte %d\n\t.uleb128 %d\n" - "\t.byte 5\n\t.uleb128 %d\n\t.uleb128 %d\n", - 0x80+i, 1+2*(31-i), 1200+i, 2+2*(31-i)); - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE0:\n\n"); - fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); - fprintf(ctx->fp, - ".Lframe1:\n" - "\t.long .LECIE1-.LSCIE1\n" - ".LSCIE1:\n" - "\t.long 0\n" - "\t.byte 0x1\n" - "\t.string \"zPR\"\n" - "\t.uleb128 0x1\n" - "\t.sleb128 -4\n" - "\t.byte 65\n" - "\t.uleb128 6\n" /* augmentation length */ - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.long lj_err_unwind_dwarf-.\n" - "\t.byte 0x1b\n" /* pcrel|sdata4 */ - "\t.byte 0xc\n\t.uleb128 1\n\t.uleb128 0\n" - "\t.align 2\n" - ".LECIE1:\n\n"); - fprintf(ctx->fp, - ".LSFDE1:\n" - "\t.long .LEFDE1-.LASFDE1\n" - ".LASFDE1:\n" - "\t.long .LASFDE1-.Lframe1\n" - "\t.long .Lbegin-.\n" - "\t.long %d\n" - "\t.uleb128 0\n" /* augmentation length */ - "\t.byte 0xe\n\t.uleb128 %d\n" - "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" - "\t.byte 0x5\n\t.uleb128 70\n\t.sleb128 37\n", - (int)ctx->codesz, CFRAME_SIZE); - for (i = 14; i <= 31; i++) - fprintf(ctx->fp, - "\t.byte %d\n\t.uleb128 %d\n" - "\t.byte 5\n\t.uleb128 %d\n\t.uleb128 %d\n", - 0x80+i, 1+2*(31-i), 1200+i, 2+2*(31-i)); - fprintf(ctx->fp, - "\t.align 2\n" - ".LEFDE1:\n\n"); - break; - default: - break; - } -} - diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc new file mode 100644 index 00000000..03d96557 --- /dev/null +++ b/src/vm_x64.dasc @@ -0,0 +1,4935 @@ +|// Low-level VM code for x64 CPUs in LJ_GC64 mode. +|// Bytecode interpreter, fast functions and helper functions. +|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +| +|.arch x64 +|.section code_op, code_sub +| +|.actionlist build_actionlist +|.globals GLOB_ +|.globalnames globnames +|.externnames extnames +| +|//----------------------------------------------------------------------- +| +|.if WIN +|.define X64WIN, 1 // Windows/x64 calling conventions. +|.endif +| +|// Fixed register assignments for the interpreter. +|// This is very fragile and has many dependencies. Caveat emptor. +|.define BASE, rdx // Not C callee-save, refetched anyway. +|.if X64WIN +|.define KBASE, rdi // Must be C callee-save. +|.define PC, rsi // Must be C callee-save. +|.define DISPATCH, rbx // Must be C callee-save. +|.define KBASEd, edi +|.define PCd, esi +|.define DISPATCHd, ebx +|.else +|.define KBASE, r15 // Must be C callee-save. +|.define PC, rbx // Must be C callee-save. +|.define DISPATCH, r14 // Must be C callee-save. +|.define KBASEd, r15d +|.define PCd, ebx +|.define DISPATCHd, r14d +|.endif +| +|.define RA, rcx +|.define RAd, ecx +|.define RAH, ch +|.define RAL, cl +|.define RB, rbp // Must be rbp (C callee-save). +|.define RBd, ebp +|.define RC, rax // Must be rax. +|.define RCd, eax +|.define RCW, ax +|.define RCH, ah +|.define RCL, al +|.define OP, RBd +|.define RD, RC +|.define RDd, RCd +|.define RDW, RCW +|.define RDL, RCL +|.define TMPR, r10 +|.define TMPRd, r10d +|.define ITYPE, r11 +|.define ITYPEd, r11d +| +|.if X64WIN +|.define CARG1, rcx // x64/WIN64 C call arguments. +|.define CARG2, rdx +|.define CARG3, r8 +|.define CARG4, r9 +|.define CARG1d, ecx +|.define CARG2d, edx +|.define CARG3d, r8d +|.define CARG4d, r9d +|.else +|.define CARG1, rdi // x64/POSIX C call arguments. +|.define CARG2, rsi +|.define CARG3, rdx +|.define CARG4, rcx +|.define CARG5, r8 +|.define CARG6, r9 +|.define CARG1d, edi +|.define CARG2d, esi +|.define CARG3d, edx +|.define CARG4d, ecx +|.define CARG5d, r8d +|.define CARG6d, r9d +|.endif +| +|// Type definitions. Some of these are only used for documentation. +|.type L, lua_State +|.type GL, global_State +|.type TVALUE, TValue +|.type GCOBJ, GCobj +|.type STR, GCstr +|.type TAB, GCtab +|.type LFUNC, GCfuncL +|.type CFUNC, GCfuncC +|.type PROTO, GCproto +|.type UPVAL, GCupval +|.type NODE, Node +|.type NARGS, int +|.type TRACE, GCtrace +|.type SBUF, SBuf +| +|// Stack layout while in interpreter. Must match with lj_frame.h. +|//----------------------------------------------------------------------- +|.if X64WIN // x64/Windows stack layout +| +|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). +|.macro saveregs_ +| push rdi; push rsi; push rbx +| sub rsp, CFRAME_SPACE +|.endmacro +|.macro saveregs +| push rbp; saveregs_ +|.endmacro +|.macro restoreregs +| add rsp, CFRAME_SPACE +| pop rbx; pop rsi; pop rdi; pop rbp +|.endmacro +| +|.define SAVE_CFRAME, aword [rsp+aword*13] +|.define SAVE_PC, aword [rsp+aword*12] +|.define SAVE_L, aword [rsp+aword*11] +|.define SAVE_ERRF, dword [rsp+dword*21] +|.define SAVE_NRES, dword [rsp+dword*20] +|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter +|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. +|.define SAVE_R4, aword [rsp+aword*8] +|.define SAVE_R3, aword [rsp+aword*7] +|.define SAVE_R2, aword [rsp+aword*6] +|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. +|.define ARG5, aword [rsp+aword*4] +|.define CSAVE_4, aword [rsp+aword*3] +|.define CSAVE_3, aword [rsp+aword*2] +|.define CSAVE_2, aword [rsp+aword*1] +|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter. +|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee +| +|.define ARG5d, dword [rsp+dword*8] +|.define TMP1, ARG5 // TMP1 overlaps ARG5 +|.define TMP1d, ARG5d +|.define TMP1hi, dword [rsp+dword*9] +|.define MULTRES, TMP1d // MULTRES overlaps TMP1d. +| +|//----------------------------------------------------------------------- +|.else // x64/POSIX stack layout +| +|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). +|.macro saveregs_ +| push rbx; push r15; push r14 +|.if NO_UNWIND +| push r13; push r12 +|.endif +| sub rsp, CFRAME_SPACE +|.endmacro +|.macro saveregs +| push rbp; saveregs_ +|.endmacro +|.macro restoreregs +| add rsp, CFRAME_SPACE +|.if NO_UNWIND +| pop r12; pop r13 +|.endif +| pop r14; pop r15; pop rbx; pop rbp +|.endmacro +| +|//----- 16 byte aligned, +|.if NO_UNWIND +|.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter. +|.define SAVE_R4, aword [rsp+aword*10] +|.define SAVE_R3, aword [rsp+aword*9] +|.define SAVE_R2, aword [rsp+aword*8] +|.define SAVE_R1, aword [rsp+aword*7] +|.define SAVE_RU2, aword [rsp+aword*6] +|.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves. +|.else +|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. +|.define SAVE_R4, aword [rsp+aword*8] +|.define SAVE_R3, aword [rsp+aword*7] +|.define SAVE_R2, aword [rsp+aword*6] +|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. +|.endif +|.define SAVE_CFRAME, aword [rsp+aword*4] +|.define SAVE_PC, aword [rsp+aword*3] +|.define SAVE_L, aword [rsp+aword*2] +|.define SAVE_ERRF, dword [rsp+dword*3] +|.define SAVE_NRES, dword [rsp+dword*2] +|.define TMP1, aword [rsp] //<-- rsp while in interpreter. +|//----- 16 byte aligned +| +|.define TMP1d, dword [rsp] +|.define TMP1hi, dword [rsp+dword*1] +|.define MULTRES, TMP1d // MULTRES overlaps TMP1d. +| +|.endif +| +|//----------------------------------------------------------------------- +| +|// Instruction headers. +|.macro ins_A; .endmacro +|.macro ins_AD; .endmacro +|.macro ins_AJ; .endmacro +|.macro ins_ABC; movzx RBd, RCH; movzx RCd, RCL; .endmacro +|.macro ins_AB_; movzx RBd, RCH; .endmacro +|.macro ins_A_C; movzx RCd, RCL; .endmacro +|.macro ins_AND; not RD; .endmacro +| +|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). +|.macro ins_NEXT +| mov RCd, [PC] +| movzx RAd, RCH +| movzx OP, RCL +| add PC, 4 +| shr RCd, 16 +| jmp aword [DISPATCH+OP*8] +|.endmacro +| +|// Instruction footer. +|.if 1 +| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. +| .define ins_next, ins_NEXT +| .define ins_next_, ins_NEXT +|.else +| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. +| // Affects only certain kinds of benchmarks (and only with -j off). +| // Around 10%-30% slower on Core2, a lot more slower on P4. +| .macro ins_next +| jmp ->ins_next +| .endmacro +| .macro ins_next_ +| ->ins_next: +| ins_NEXT +| .endmacro +|.endif +| +|// Call decode and dispatch. +|.macro ins_callt +| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-8] = PC +| mov PC, LFUNC:RB->pc +| mov RAd, [PC] +| movzx OP, RAL +| movzx RAd, RAH +| add PC, 4 +| jmp aword [DISPATCH+OP*8] +|.endmacro +| +|.macro ins_call +| // BASE = new base, RB = LFUNC, RD = nargs+1 +| mov [BASE-8], PC +| ins_callt +|.endmacro +| +|//----------------------------------------------------------------------- +| +|// Macros to clear or set tags. +|.macro cleartp, reg; shl reg, 17; shr reg, 17; .endmacro +|.macro settp, reg, tp +| mov64 ITYPE, ((uint64_t)tp<<47) +| or reg, ITYPE +|.endmacro +|.macro settp, dst, reg, tp +| mov64 dst, ((uint64_t)tp<<47) +| or dst, reg +|.endmacro +|.macro setint, reg +| settp reg, LJ_TISNUM +|.endmacro +|.macro setint, dst, reg +| settp dst, reg, LJ_TISNUM +|.endmacro +| +|// Macros to test operand types. +|.macro checktp_nc, reg, tp, target +| mov ITYPE, reg +| sar ITYPE, 47 +| cmp ITYPEd, tp +| jne target +|.endmacro +|.macro checktp, reg, tp, target +| mov ITYPE, reg +| cleartp reg +| sar ITYPE, 47 +| cmp ITYPEd, tp +| jne target +|.endmacro +|.macro checktptp, src, tp, target +| mov ITYPE, src +| sar ITYPE, 47 +| cmp ITYPEd, tp +| jne target +|.endmacro +|.macro checkstr, reg, target; checktp reg, LJ_TSTR, target; .endmacro +|.macro checktab, reg, target; checktp reg, LJ_TTAB, target; .endmacro +|.macro checkfunc, reg, target; checktp reg, LJ_TFUNC, target; .endmacro +| +|.macro checknumx, reg, target, jump +| mov ITYPE, reg +| sar ITYPE, 47 +| cmp ITYPEd, LJ_TISNUM +| jump target +|.endmacro +|.macro checkint, reg, target; checknumx reg, target, jne; .endmacro +|.macro checkinttp, src, target; checknumx src, target, jne; .endmacro +|.macro checknum, reg, target; checknumx reg, target, jae; .endmacro +|.macro checknumtp, src, target; checknumx src, target, jae; .endmacro +|.macro checknumber, src, target; checknumx src, target, ja; .endmacro +| +|.macro mov_false, reg; mov64 reg, (int64_t)~((uint64_t)1<<47); .endmacro +|.macro mov_true, reg; mov64 reg, (int64_t)~((uint64_t)2<<47); .endmacro +| +|// These operands must be used with movzx. +|.define PC_OP, byte [PC-4] +|.define PC_RA, byte [PC-3] +|.define PC_RB, byte [PC-1] +|.define PC_RC, byte [PC-2] +|.define PC_RD, word [PC-2] +| +|.macro branchPC, reg +| lea PC, [PC+reg*4-BCBIAS_J*4] +|.endmacro +| +|// Assumes DISPATCH is relative to GL. +#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) +#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) +| +#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) +| +|// Decrement hashed hotcount and trigger trace recorder if zero. +|.macro hotloop, reg +| mov reg, PCd +| shr reg, 1 +| and reg, HOTCOUNT_PCMASK +| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP +| jb ->vm_hotloop +|.endmacro +| +|.macro hotcall, reg +| mov reg, PCd +| shr reg, 1 +| and reg, HOTCOUNT_PCMASK +| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL +| jb ->vm_hotcall +|.endmacro +| +|// Set current VM state. +|.macro set_vmstate, st +| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st +|.endmacro +| +|.macro fpop1; fstp st1; .endmacro +| +|// Synthesize SSE FP constants. +|.macro sseconst_abs, reg, tmp // Synthesize abs mask. +| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp +|.endmacro +| +|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const. +| mov64 tmp, U64x(val,00000000); movd reg, tmp +|.endmacro +| +|.macro sseconst_sign, reg, tmp // Synthesize sign mask. +| sseconst_hi reg, tmp, 80000000 +|.endmacro +|.macro sseconst_1, reg, tmp // Synthesize 1.0. +| sseconst_hi reg, tmp, 3ff00000 +|.endmacro +|.macro sseconst_2p52, reg, tmp // Synthesize 2^52. +| sseconst_hi reg, tmp, 43300000 +|.endmacro +|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51. +| sseconst_hi reg, tmp, 43380000 +|.endmacro +| +|// Move table write barrier back. Overwrites reg. +|.macro barrierback, tab, reg +| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab) +| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)] +| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab +| mov tab->gclist, reg +|.endmacro +| +|//----------------------------------------------------------------------- + +/* Generate subroutines used by opcodes and other parts of the VM. */ +/* The .code_sub section should be last to help static branch prediction. */ +static void build_subroutines(BuildCtx *ctx) +{ + |.code_sub + | + |//----------------------------------------------------------------------- + |//-- Return handling ---------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_returnp: + | test PCd, FRAME_P + | jz ->cont_dispatch + | + | // Return from pcall or xpcall fast func. + | and PC, -8 + | sub BASE, PC // Restore caller base. + | lea RA, [RA+PC-8] // Rebase RA and prepend one result. + | mov PC, [BASE-8] // Fetch PC of previous frame. + | // Prepending may overwrite the pcall frame, so do it at the end. + | mov_true ITYPE + | mov aword [BASE+RA], ITYPE // Prepend true to results. + | + |->vm_returnc: + | add RDd, 1 // RD = nresults+1 + | jz ->vm_unwind_yield + | mov MULTRES, RDd + | test PC, FRAME_TYPE + | jz ->BC_RET_Z // Handle regular return to Lua. + | + |->vm_return: + | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return + | xor PC, FRAME_C + | test PCd, FRAME_TYPE + | jnz ->vm_returnp + | + | // Return to C. + | set_vmstate C + | and PC, -8 + | sub PC, BASE + | neg PC // Previous base = BASE - delta. + | + | sub RDd, 1 + | jz >2 + |1: // Move results down. + | mov RB, [BASE+RA] + | mov [BASE-16], RB + | add BASE, 8 + | sub RDd, 1 + | jnz <1 + |2: + | mov L:RB, SAVE_L + | mov L:RB->base, PC + |3: + | mov RDd, MULTRES + | mov RAd, SAVE_NRES // RA = wanted nresults+1 + |4: + | cmp RAd, RDd + | jne >6 // More/less results wanted? + |5: + | sub BASE, 16 + | mov L:RB->top, BASE + | + |->vm_leave_cp: + | mov RA, SAVE_CFRAME // Restore previous C frame. + | mov L:RB->cframe, RA + | xor eax, eax // Ok return status for vm_pcall. + | + |->vm_leave_unw: + | restoreregs + | ret + | + |6: + | jb >7 // Less results wanted? + | // More results wanted. Check stack size and fill up results with nil. + | cmp BASE, L:RB->maxstack + | ja >8 + | mov aword [BASE-16], LJ_TNIL + | add BASE, 8 + | add RDd, 1 + | jmp <4 + | + |7: // Less results wanted. + | test RAd, RAd + | jz <5 // But check for LUA_MULTRET+1. + | sub RA, RD // Negative result! + | lea BASE, [BASE+RA*8] // Correct top. + | jmp <5 + | + |8: // Corner case: need to grow stack for filling up results. + | // This can happen if: + | // - A C function grows the stack (a lot). + | // - The GC shrinks the stack in between. + | // - A return back from a lua_call() with (high) nresults adjustment. + | mov L:RB->top, BASE // Save current top held in BASE (yes). + | mov MULTRES, RDd // Need to fill only remainder with nil. + | mov CARG2d, RAd + | mov CARG1, L:RB + | call extern lj_state_growstack // (lua_State *L, int n) + | mov BASE, L:RB->top // Need the (realloced) L->top in BASE. + | jmp <3 + | + |->vm_unwind_yield: + | mov al, LUA_YIELD + | jmp ->vm_unwind_c_eh + | + |->vm_unwind_c: // Unwind C stack, return from vm_pcall. + | // (void *cframe, int errcode) + | mov eax, CARG2d // Error return status for vm_pcall. + | mov rsp, CARG1 + |->vm_unwind_c_eh: // Landing pad for external unwinder. + | mov L:RB, SAVE_L + | mov GL:RB, L:RB->glref + | mov dword GL:RB->vmstate, ~LJ_VMST_C + | jmp ->vm_leave_unw + | + |->vm_unwind_rethrow: + |.if not X64WIN + | mov CARG1, SAVE_L + | mov CARG2d, eax + | restoreregs + | jmp extern lj_err_throw // (lua_State *L, int errcode) + |.endif + | + |->vm_unwind_ff: // Unwind C stack, return from ff pcall. + | // (void *cframe) + | and CARG1, CFRAME_RAWMASK + | mov rsp, CARG1 + |->vm_unwind_ff_eh: // Landing pad for external unwinder. + | mov L:RB, SAVE_L + | mov RDd, 1+1 // Really 1+2 results, incr. later. + | mov BASE, L:RB->base + | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | add DISPATCH, GG_G2DISP + | mov PC, [BASE-8] // Fetch PC of previous frame. + | mov_false RA + | mov RB, [BASE] + | mov [BASE-16], RA // Prepend false to error message. + | mov [BASE-8], RB + | mov RA, -16 // Results start at BASE+RA = BASE-16. + | set_vmstate INTERP + | jmp ->vm_returnc // Increments RD/MULTRES and returns. + | + |//----------------------------------------------------------------------- + |//-- Grow stack for calls ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_growstack_c: // Grow stack for C function. + | mov CARG2d, LUA_MINSTACK + | jmp >2 + | + |->vm_growstack_v: // Grow stack for vararg Lua function. + | sub RD, 16 // LJ_FR2 + | jmp >1 + | + |->vm_growstack_f: // Grow stack for fixarg Lua function. + | // BASE = new base, RD = nargs+1, RB = L, PC = first PC + | lea RD, [BASE+NARGS:RD*8-8] + |1: + | movzx RAd, byte [PC-4+PC2PROTO(framesize)] + | add PC, 4 // Must point after first instruction. + | mov L:RB->base, BASE + | mov L:RB->top, RD + | mov SAVE_PC, PC + | mov CARG2, RA + |2: + | // RB = L, L->base = new base, L->top = top + | mov CARG1, L:RB + | call extern lj_state_growstack // (lua_State *L, int n) + | mov BASE, L:RB->base + | mov RD, L:RB->top + | mov LFUNC:RB, [BASE-16] + | cleartp LFUNC:RB + | sub RD, BASE + | shr RDd, 3 + | add NARGS:RDd, 1 + | // BASE = new base, RB = LFUNC, RD = nargs+1 + | ins_callt // Just retry the call. + | + |//----------------------------------------------------------------------- + |//-- Entry points into the assembler VM --------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_resume: // Setup C frame and resume thread. + | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) + | saveregs + | mov L:RB, CARG1 // Caveat: CARG1 may be RA. + | mov SAVE_L, CARG1 + | mov RA, CARG2 + | mov PCd, FRAME_CP + | xor RDd, RDd + | lea KBASE, [esp+CFRAME_RESUME] + | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | add DISPATCH, GG_G2DISP + | mov SAVE_PC, RD // Any value outside of bytecode is ok. + | mov SAVE_CFRAME, RD + | mov SAVE_NRES, RDd + | mov SAVE_ERRF, RDd + | mov L:RB->cframe, KBASE + | cmp byte L:RB->status, RDL + | je >2 // Initial resume (like a call). + | + | // Resume after yield (like a return). + | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB + | set_vmstate INTERP + | mov byte L:RB->status, RDL + | mov BASE, L:RB->base + | mov RD, L:RB->top + | sub RD, RA + | shr RDd, 3 + | add RDd, 1 // RD = nresults+1 + | sub RA, BASE // RA = resultofs + | mov PC, [BASE-8] + | mov MULTRES, RDd + | test PCd, FRAME_TYPE + | jz ->BC_RET_Z + | jmp ->vm_return + | + |->vm_pcall: // Setup protected C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) + | saveregs + | mov PCd, FRAME_CP + | mov SAVE_ERRF, CARG4d + | jmp >1 + | + |->vm_call: // Setup C frame and enter VM. + | // (lua_State *L, TValue *base, int nres1) + | saveregs + | mov PCd, FRAME_C + | + |1: // Entry point for vm_pcall above (PC = ftype). + | mov SAVE_NRES, CARG3d + | mov L:RB, CARG1 // Caveat: CARG1 may be RA. + | mov SAVE_L, CARG1 + | mov RA, CARG2 + | + | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. + | mov SAVE_CFRAME, KBASE + | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. + | add DISPATCH, GG_G2DISP + | mov L:RB->cframe, rsp + | + |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). + | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB + | set_vmstate INTERP + | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). + | add PC, RA + | sub PC, BASE // PC = frame delta + frame type + | + | mov RD, L:RB->top + | sub RD, RA + | shr NARGS:RDd, 3 + | add NARGS:RDd, 1 // RD = nargs+1 + | + |->vm_call_dispatch: + | mov LFUNC:RB, [RA-16] + | checkfunc LFUNC:RB, ->vmeta_call // Ensure KBASE defined and != BASE. + | + |->vm_call_dispatch_f: + | mov BASE, RA + | ins_call + | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC + | + |->vm_cpcall: // Setup protected C frame, call C. + | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) + | saveregs + | mov L:RB, CARG1 // Caveat: CARG1 may be RA. + | mov SAVE_L, CARG1 + | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. + | + | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). + | sub KBASE, L:RB->top + | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. + | mov SAVE_ERRF, 0 // No error function. + | mov SAVE_NRES, KBASEd // Neg. delta means cframe w/o frame. + | add DISPATCH, GG_G2DISP + | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). + | + | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. + | mov SAVE_CFRAME, KBASE + | mov L:RB->cframe, rsp + | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB + | + | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) + | // TValue * (new base) or NULL returned in eax (RC). + | test RC, RC + | jz ->vm_leave_cp // No base? Just remove C frame. + | mov RA, RC + | mov PCd, FRAME_CP + | jmp <2 // Else continue with the call. + | + |//----------------------------------------------------------------------- + |//-- Metamethod handling ------------------------------------------------ + |//----------------------------------------------------------------------- + | + |//-- Continuation dispatch ---------------------------------------------- + | + |->cont_dispatch: + | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) + | add RA, BASE + | and PC, -8 + | mov RB, BASE + | sub BASE, PC // Restore caller BASE. + | mov aword [RA+RD*8-8], LJ_TNIL // Ensure one valid arg. + | mov RC, RA // ... in [RC] + | mov PC, [RB-24] // Restore PC from [cont|PC]. + | mov RA, qword [RB-32] // May be negative on WIN64 with debug. + |.if FFI + | cmp RA, 1 + | jbe >1 + |.endif + | mov LFUNC:KBASE, [BASE-16] + | cleartp LFUNC:KBASE + | mov KBASE, LFUNC:KBASE->pc + | mov KBASE, [KBASE+PC2PROTO(k)] + | // BASE = base, RC = result, RB = meta base + | jmp RA // Jump to continuation. + | + |.if FFI + |1: + | je ->cont_ffi_callback // cont = 1: return from FFI callback. + | // cont = 0: Tail call from C function. + | sub RB, BASE + | shr RBd, 3 + | lea RDd, [RBd-3] + | jmp ->vm_call_tail + |.endif + | + |->cont_cat: // BASE = base, RC = result, RB = mbase + | movzx RAd, PC_RB + | sub RB, 32 + | lea RA, [BASE+RA*8] + | sub RA, RB + | je ->cont_ra + | neg RA + | shr RAd, 3 + |.if X64WIN + | mov CARG3d, RAd + | mov L:CARG1, SAVE_L + | mov L:CARG1->base, BASE + | mov RC, [RC] + | mov [RB], RC + | mov CARG2, RB + |.else + | mov L:CARG1, SAVE_L + | mov L:CARG1->base, BASE + | mov CARG3d, RAd + | mov RA, [RC] + | mov [RB], RA + | mov CARG2, RB + |.endif + | jmp ->BC_CAT_Z + | + |//-- Table indexing metamethods ----------------------------------------- + | + |->vmeta_tgets: + | settp STR:RC, LJ_TSTR // STR:RC = GCstr * + | mov TMP1, STR:RC + | lea RC, TMP1 + | cmp PC_OP, BC_GGET + | jne >1 + | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * + | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. + | mov [RB], TAB:RA + | jmp >2 + | + |->vmeta_tgetb: + | movzx RCd, PC_RC + |.if DUALNUM + | setint RC + | mov TMP1, RC + |.else + | cvtsi2sd xmm0, RCd + | movsd TMP1, xmm0 + |.endif + | lea RC, TMP1 + | jmp >1 + | + |->vmeta_tgetv: + | movzx RCd, PC_RC // Reload TValue *k from RC. + | lea RC, [BASE+RC*8] + |1: + | movzx RBd, PC_RB // Reload TValue *t from RB. + | lea RB, [BASE+RB*8] + |2: + | mov L:CARG1, SAVE_L + | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. + | mov CARG2, RB + | mov CARG3, RC + | mov L:RB, L:CARG1 + | mov SAVE_PC, PC + | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) + | // TValue * (finished) or NULL (metamethod) returned in eax (RC). + | mov BASE, L:RB->base + | test RC, RC + | jz >3 + |->cont_ra: // BASE = base, RC = result + | movzx RAd, PC_RA + | mov RB, [RC] + | mov [BASE+RA*8], RB + | ins_next + | + |3: // Call __index metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k + | mov RA, L:RB->top + | mov [RA-24], PC // [cont|PC] + | lea PC, [RA+FRAME_CONT] + | sub PC, BASE + | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here. + | mov NARGS:RDd, 2+1 // 2 args for func(t, k). + | cleartp LFUNC:RB + | jmp ->vm_call_dispatch_f + | + |->vmeta_tgetr: + | mov CARG1, TAB:RB + | mov RB, BASE // Save BASE. + | mov CARG2d, RCd // Caveat: CARG2 == BASE + | call extern lj_tab_getinth // (GCtab *t, int32_t key) + | // cTValue * or NULL returned in eax (RC). + | movzx RAd, PC_RA + | mov BASE, RB // Restore BASE. + | test RC, RC + | jnz ->BC_TGETR_Z + | mov ITYPE, LJ_TNIL + | jmp ->BC_TGETR2_Z + | + |//----------------------------------------------------------------------- + | + |->vmeta_tsets: + | settp STR:RC, LJ_TSTR // STR:RC = GCstr * + | mov TMP1, STR:RC + | lea RC, TMP1 + | cmp PC_OP, BC_GSET + | jne >1 + | settp TAB:RA, TAB:RB, LJ_TTAB // TAB:RB = GCtab * + | lea RB, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. + | mov [RB], TAB:RA + | jmp >2 + | + |->vmeta_tsetb: + | movzx RCd, PC_RC + |.if DUALNUM + | setint RC + | mov TMP1, RC + |.else + | cvtsi2sd xmm0, RCd + | movsd TMP1, xmm0 + |.endif + | lea RC, TMP1 + | jmp >1 + | + |->vmeta_tsetv: + | movzx RCd, PC_RC // Reload TValue *k from RC. + | lea RC, [BASE+RC*8] + |1: + | movzx RBd, PC_RB // Reload TValue *t from RB. + | lea RB, [BASE+RB*8] + |2: + | mov L:CARG1, SAVE_L + | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. + | mov CARG2, RB + | mov CARG3, RC + | mov L:RB, L:CARG1 + | mov SAVE_PC, PC + | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) + | // TValue * (finished) or NULL (metamethod) returned in eax (RC). + | mov BASE, L:RB->base + | test RC, RC + | jz >3 + | // NOBARRIER: lj_meta_tset ensures the table is not black. + | movzx RAd, PC_RA + | mov RB, [BASE+RA*8] + | mov [RC], RB + |->cont_nop: // BASE = base, (RC = result) + | ins_next + | + |3: // Call __newindex metamethod. + | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) + | mov RA, L:RB->top + | mov [RA-24], PC // [cont|PC] + | movzx RCd, PC_RA + | // Copy value to third argument. + | mov RB, [BASE+RC*8] + | mov [RA+16], RB + | lea PC, [RA+FRAME_CONT] + | sub PC, BASE + | mov LFUNC:RB, [RA-16] // Guaranteed to be a function here. + | mov NARGS:RDd, 3+1 // 3 args for func(t, k, v). + | cleartp LFUNC:RB + | jmp ->vm_call_dispatch_f + | + |->vmeta_tsetr: + |.if X64WIN + | mov L:CARG1, SAVE_L + | mov CARG3d, RCd + | mov L:CARG1->base, BASE + | xchg CARG2, TAB:RB // Caveat: CARG2 == BASE. + |.else + | mov L:CARG1, SAVE_L + | mov CARG2, TAB:RB + | mov L:CARG1->base, BASE + | mov RB, BASE // Save BASE. + | mov CARG3d, RCd // Caveat: CARG3 == BASE. + |.endif + | mov SAVE_PC, PC + | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + | // TValue * returned in eax (RC). + | movzx RAd, PC_RA + | mov BASE, RB // Restore BASE. + | jmp ->BC_TSETR_Z + | + |//-- Comparison metamethods --------------------------------------------- + | + |->vmeta_comp: + | movzx RDd, PC_RD + | movzx RAd, PC_RA + | mov L:RB, SAVE_L + | mov L:RB->base, BASE // Caveat: CARG2/CARG3 == BASE. + |.if X64WIN + | lea CARG3, [BASE+RD*8] + | lea CARG2, [BASE+RA*8] + |.else + | lea CARG2, [BASE+RA*8] + | lea CARG3, [BASE+RD*8] + |.endif + | mov CARG1, L:RB // Caveat: CARG1/CARG4 == RA. + | movzx CARG4d, PC_OP + | mov SAVE_PC, PC + | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) + | // 0/1 or TValue * (metamethod) returned in eax (RC). + |3: + | mov BASE, L:RB->base + | cmp RC, 1 + | ja ->vmeta_binop + |4: + | lea PC, [PC+4] + | jb >6 + |5: + | movzx RDd, PC_RD + | branchPC RD + |6: + | ins_next + | + |->cont_condt: // BASE = base, RC = result + | add PC, 4 + | mov ITYPE, [RC] + | sar ITYPE, 47 + | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is true. + | jb <5 + | jmp <6 + | + |->cont_condf: // BASE = base, RC = result + | mov ITYPE, [RC] + | sar ITYPE, 47 + | cmp ITYPEd, LJ_TISTRUECOND // Branch if result is false. + | jmp <4 + | + |->vmeta_equal: + | cleartp TAB:RD + | sub PC, 4 + |.if X64WIN + | mov CARG3, RD + | mov CARG4d, RBd + | mov L:RB, SAVE_L + | mov L:RB->base, BASE // Caveat: CARG2 == BASE. + | mov CARG2, RA + | mov CARG1, L:RB // Caveat: CARG1 == RA. + |.else + | mov CARG2, RA + | mov CARG4d, RBd // Caveat: CARG4 == RA. + | mov L:RB, SAVE_L + | mov L:RB->base, BASE // Caveat: CARG3 == BASE. + | mov CARG3, RD + | mov CARG1, L:RB + |.endif + | mov SAVE_PC, PC + | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) + | // 0/1 or TValue * (metamethod) returned in eax (RC). + | jmp <3 + | + |->vmeta_equal_cd: + |.if FFI + | sub PC, 4 + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov CARG1, L:RB + | mov CARG2d, dword [PC-4] + | mov SAVE_PC, PC + | call extern lj_meta_equal_cd // (lua_State *L, BCIns ins) + | // 0/1 or TValue * (metamethod) returned in eax (RC). + | jmp <3 + |.endif + | + |->vmeta_istype: + | mov L:RB, SAVE_L + | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE. + | mov CARG2d, RAd + | mov CARG3d, RDd + | mov L:CARG1, L:RB + | mov SAVE_PC, PC + | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) + | mov BASE, L:RB->base + | jmp <6 + | + |//-- Arithmetic metamethods --------------------------------------------- + | + |->vmeta_arith_vno: + |.if DUALNUM + | movzx RBd, PC_RB + | movzx RCd, PC_RC + |.endif + |->vmeta_arith_vn: + | lea RC, [KBASE+RC*8] + | jmp >1 + | + |->vmeta_arith_nvo: + |.if DUALNUM + | movzx RBd, PC_RB + | movzx RCd, PC_RC + |.endif + |->vmeta_arith_nv: + | lea TMPR, [KBASE+RC*8] + | lea RC, [BASE+RB*8] + | mov RB, TMPR + | jmp >2 + | + |->vmeta_unm: + | lea RC, [BASE+RD*8] + | mov RB, RC + | jmp >2 + | + |->vmeta_arith_vvo: + |.if DUALNUM + | movzx RBd, PC_RB + | movzx RCd, PC_RC + |.endif + |->vmeta_arith_vv: + | lea RC, [BASE+RC*8] + |1: + | lea RB, [BASE+RB*8] + |2: + | lea RA, [BASE+RA*8] + |.if X64WIN + | mov CARG3, RB + | mov CARG4, RC + | movzx RCd, PC_OP + | mov ARG5d, RCd + | mov L:RB, SAVE_L + | mov L:RB->base, BASE // Caveat: CARG2 == BASE. + | mov CARG2, RA + | mov CARG1, L:RB // Caveat: CARG1 == RA. + |.else + | movzx CARG5d, PC_OP + | mov CARG2, RA + | mov CARG4, RC // Caveat: CARG4 == RA. + | mov L:CARG1, SAVE_L + | mov L:CARG1->base, BASE // Caveat: CARG3 == BASE. + | mov CARG3, RB + | mov L:RB, L:CARG1 + |.endif + | mov SAVE_PC, PC + | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) + | // NULL (finished) or TValue * (metamethod) returned in eax (RC). + | mov BASE, L:RB->base + | test RC, RC + | jz ->cont_nop + | + | // Call metamethod for binary op. + |->vmeta_binop: + | // BASE = base, RC = new base, stack = cont/func/o1/o2 + | mov RA, RC + | sub RC, BASE + | mov [RA-24], PC // [cont|PC] + | lea PC, [RC+FRAME_CONT] + | mov NARGS:RDd, 2+1 // 2 args for func(o1, o2). + | jmp ->vm_call_dispatch + | + |->vmeta_len: + | movzx RDd, PC_RD + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | lea CARG2, [BASE+RD*8] // Caveat: CARG2 == BASE + | mov L:CARG1, L:RB + | mov SAVE_PC, PC + | call extern lj_meta_len // (lua_State *L, TValue *o) + | // NULL (retry) or TValue * (metamethod) returned in eax (RC). + | mov BASE, L:RB->base +#if LJ_52 + | test RC, RC + | jne ->vmeta_binop // Binop call for compatibility. + | movzx RDd, PC_RD + | mov TAB:CARG1, [BASE+RD*8] + | cleartp TAB:CARG1 + | jmp ->BC_LEN_Z +#else + | jmp ->vmeta_binop // Binop call for compatibility. +#endif + | + |//-- Call metamethod ---------------------------------------------------- + | + |->vmeta_call_ra: + | lea RA, [BASE+RA*8+16] + |->vmeta_call: // Resolve and call __call metamethod. + | // BASE = old base, RA = new base, RC = nargs+1, PC = return + | mov TMP1d, NARGS:RDd // Save RA, RC for us. + | mov RB, RA + |.if X64WIN + | mov L:TMPR, SAVE_L + | mov L:TMPR->base, BASE // Caveat: CARG2 is BASE. + | lea CARG2, [RA-16] + | lea CARG3, [RA+NARGS:RD*8-8] + | mov CARG1, L:TMPR // Caveat: CARG1 is RA. + |.else + | mov L:CARG1, SAVE_L + | mov L:CARG1->base, BASE // Caveat: CARG3 is BASE. + | lea CARG2, [RA-16] + | lea CARG3, [RA+NARGS:RD*8-8] + |.endif + | mov SAVE_PC, PC + | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) + | mov RA, RB + | mov L:RB, SAVE_L + | mov BASE, L:RB->base + | mov NARGS:RDd, TMP1d + | mov LFUNC:RB, [RA-16] + | add NARGS:RDd, 1 + | // This is fragile. L->base must not move, KBASE must always be defined. + | cmp KBASE, BASE // Continue with CALLT if flag set. + | je ->BC_CALLT_Z + | cleartp LFUNC:RB + | mov BASE, RA + | ins_call // Otherwise call resolved metamethod. + | + |//-- Argument coercion for 'for' statement ------------------------------ + | + |->vmeta_for: + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov CARG2, RA // Caveat: CARG2 == BASE + | mov L:CARG1, L:RB // Caveat: CARG1 == RA + | mov SAVE_PC, PC + | call extern lj_meta_for // (lua_State *L, TValue *base) + | mov BASE, L:RB->base + | mov RCd, [PC-4] + | movzx RAd, RCH + | movzx OP, RCL + | shr RCd, 16 + | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI. + | + |//----------------------------------------------------------------------- + |//-- Fast functions ----------------------------------------------------- + |//----------------------------------------------------------------------- + | + |.macro .ffunc, name + |->ff_ .. name: + |.endmacro + | + |.macro .ffunc_1, name + |->ff_ .. name: + | cmp NARGS:RDd, 1+1; jb ->fff_fallback + |.endmacro + | + |.macro .ffunc_2, name + |->ff_ .. name: + | cmp NARGS:RDd, 2+1; jb ->fff_fallback + |.endmacro + | + |.macro .ffunc_n, name, op + | .ffunc_1 name + | checknumtp [BASE], ->fff_fallback + | op xmm0, qword [BASE] + |.endmacro + | + |.macro .ffunc_n, name + | .ffunc_n name, movsd + |.endmacro + | + |.macro .ffunc_nn, name + | .ffunc_2 name + | checknumtp [BASE], ->fff_fallback + | checknumtp [BASE+8], ->fff_fallback + | movsd xmm0, qword [BASE] + | movsd xmm1, qword [BASE+8] + |.endmacro + | + |// Inlined GC threshold check. Caveat: uses label 1. + |.macro ffgccheck + | mov RB, [DISPATCH+DISPATCH_GL(gc.total)] + | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)] + | jb >1 + | call ->fff_gcstep + |1: + |.endmacro + | + |//-- Base library: checks ----------------------------------------------- + | + |.ffunc_1 assert + | mov ITYPE, [BASE] + | mov RB, ITYPE + | sar ITYPE, 47 + | cmp ITYPEd, LJ_TISTRUECOND; jae ->fff_fallback + | mov PC, [BASE-8] + | mov MULTRES, RDd + | mov RB, [BASE] + | mov [BASE-16], RB + | sub RDd, 2 + | jz >2 + | mov RA, BASE + |1: + | add RA, 8 + | mov RB, [RA] + | mov [RA-16], RB + | sub RDd, 1 + | jnz <1 + |2: + | mov RDd, MULTRES + | jmp ->fff_res_ + | + |.ffunc_1 type + | mov RC, [BASE] + | sar RC, 47 + | mov RBd, LJ_TISNUM + | cmp RCd, RBd + | cmovb RCd, RBd + | not RCd + |2: + | mov CFUNC:RB, [BASE-16] + | cleartp CFUNC:RB + | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] + | mov PC, [BASE-8] + | settp STR:RC, LJ_TSTR + | mov [BASE-16], STR:RC + | jmp ->fff_res1 + | + |//-- Base library: getters and setters --------------------------------- + | + |.ffunc_1 getmetatable + | mov TAB:RB, [BASE] + | mov PC, [BASE-8] + | checktab TAB:RB, >6 + |1: // Field metatable must be at same offset for GCtab and GCudata! + | mov TAB:RB, TAB:RB->metatable + |2: + | test TAB:RB, TAB:RB + | mov aword [BASE-16], LJ_TNIL + | jz ->fff_res1 + | settp TAB:RC, TAB:RB, LJ_TTAB + | mov [BASE-16], TAB:RC // Store metatable as default result. + | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+8*(GCROOT_MMNAME+MM_metatable)] + | mov RAd, TAB:RB->hmask + | and RAd, STR:RC->sid + | settp STR:RC, LJ_TSTR + | imul RAd, #NODE + | add NODE:RA, TAB:RB->node + |3: // Rearranged logic, because we expect _not_ to find the key. + | cmp NODE:RA->key, STR:RC + | je >5 + |4: + | mov NODE:RA, NODE:RA->next + | test NODE:RA, NODE:RA + | jnz <3 + | jmp ->fff_res1 // Not found, keep default result. + |5: + | mov RB, NODE:RA->val + | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value. + | mov [BASE-16], RB // Return value of mt.__metatable. + | jmp ->fff_res1 + | + |6: + | cmp ITYPEd, LJ_TUDATA; je <1 + | cmp ITYPEd, LJ_TISNUM; ja >7 + | mov ITYPEd, LJ_TISNUM + |7: + | not ITYPEd + | mov TAB:RB, [DISPATCH+ITYPE*8+DISPATCH_GL(gcroot[GCROOT_BASEMT])] + | jmp <2 + | + |.ffunc_2 setmetatable + | mov TAB:RB, [BASE] + | mov TAB:TMPR, TAB:RB + | checktab TAB:RB, ->fff_fallback + | // Fast path: no mt for table yet and not clearing the mt. + | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback + | mov TAB:RA, [BASE+8] + | checktab TAB:RA, ->fff_fallback + | mov TAB:RB->metatable, TAB:RA + | mov PC, [BASE-8] + | mov [BASE-16], TAB:TMPR // Return original table. + | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jz >1 + | // Possible write barrier. Table is black, but skip iswhite(mt) check. + | barrierback TAB:RB, RC + |1: + | jmp ->fff_res1 + | + |.ffunc_2 rawget + |.if X64WIN + | mov TAB:RA, [BASE] + | checktab TAB:RA, ->fff_fallback + | mov RB, BASE // Save BASE. + | lea CARG3, [BASE+8] + | mov CARG2, TAB:RA // Caveat: CARG2 == BASE. + | mov CARG1, SAVE_L + |.else + | mov TAB:CARG2, [BASE] + | checktab TAB:CARG2, ->fff_fallback + | mov RB, BASE // Save BASE. + | lea CARG3, [BASE+8] // Caveat: CARG3 == BASE. + | mov CARG1, SAVE_L + |.endif + | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + | // cTValue * returned in eax (RD). + | mov BASE, RB // Restore BASE. + | // Copy table slot. + | mov RB, [RD] + | mov PC, [BASE-8] + | mov [BASE-16], RB + | jmp ->fff_res1 + | + |//-- Base library: conversions ------------------------------------------ + | + |.ffunc tonumber + | // Only handles the number case inline (without a base argument). + | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument. + | mov RB, [BASE] + | checknumber RB, ->fff_fallback + | mov PC, [BASE-8] + | mov [BASE-16], RB + | jmp ->fff_res1 + | + |.ffunc_1 tostring + | // Only handles the string or number case inline. + | mov PC, [BASE-8] + | mov STR:RB, [BASE] + | checktp_nc STR:RB, LJ_TSTR, >3 + | // A __tostring method in the string base metatable is ignored. + |2: + | mov [BASE-16], STR:RB + | jmp ->fff_res1 + |3: // Handle numbers inline, unless a number base metatable is present. + | cmp ITYPEd, LJ_TISNUM; ja ->fff_fallback_1 + | cmp aword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0 + | jne ->fff_fallback + | ffgccheck // Caveat: uses label 1. + | mov L:RB, SAVE_L + | mov L:RB->base, BASE // Add frame since C call can throw. + | mov SAVE_PC, PC // Redundant (but a defined value). + |.if not X64WIN + | mov CARG2, BASE // Otherwise: CARG2 == BASE + |.endif + | mov L:CARG1, L:RB + |.if DUALNUM + | call extern lj_strfmt_number // (lua_State *L, cTValue *o) + |.else + | call extern lj_strfmt_num // (lua_State *L, lua_Number *np) + |.endif + | // GCstr returned in eax (RD). + | mov BASE, L:RB->base + | settp STR:RB, RD, LJ_TSTR + | jmp <2 + | + |//-- Base library: iterators ------------------------------------------- + | + |.ffunc_1 next + | je >2 // Missing 2nd arg? + |1: + | mov CARG1, [BASE] + | mov PC, [BASE-8] + | checktab CARG1, ->fff_fallback + | mov RB, BASE // Save BASE. + |.if X64WIN + | lea CARG3, [BASE-16] + | lea CARG2, [BASE+8] // Caveat: CARG2 == BASE. + |.else + | lea CARG2, [BASE+8] + | lea CARG3, [BASE-16] // Caveat: CARG3 == BASE. + |.endif + | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // 1=found, 0=end, -1=error returned in eax (RD). + | mov BASE, RB // Restore BASE. + | test RDd, RDd; jg ->fff_res2 // Found key/value. + | js ->fff_fallback_2 // Invalid key. + | // End of traversal: return nil. + | mov aword [BASE-16], LJ_TNIL + | jmp ->fff_res1 + |2: // Set missing 2nd arg to nil. + | mov aword [BASE+8], LJ_TNIL + | jmp <1 + | + |.ffunc_1 pairs + | mov TAB:RB, [BASE] + | mov TMPR, TAB:RB + | checktab TAB:RB, ->fff_fallback +#if LJ_52 + | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback +#endif + | mov CFUNC:RD, [BASE-16] + | cleartp CFUNC:RD + | mov CFUNC:RD, CFUNC:RD->upvalue[0] + | settp CFUNC:RD, LJ_TFUNC + | mov PC, [BASE-8] + | mov [BASE-16], CFUNC:RD + | mov [BASE-8], TMPR + | mov aword [BASE], LJ_TNIL + | mov RDd, 1+3 + | jmp ->fff_res + | + |.ffunc_2 ipairs_aux + | mov TAB:RB, [BASE] + | checktab TAB:RB, ->fff_fallback + |.if DUALNUM + | mov RA, [BASE+8] + | checkint RA, ->fff_fallback + |.else + | checknumtp [BASE+8], ->fff_fallback + | movsd xmm0, qword [BASE+8] + |.endif + | mov PC, [BASE-8] + |.if DUALNUM + | add RAd, 1 + | setint ITYPE, RA + | mov [BASE-16], ITYPE + |.else + | sseconst_1 xmm1, TMPR + | addsd xmm0, xmm1 + | cvttsd2si RAd, xmm0 + | movsd qword [BASE-16], xmm0 + |.endif + | cmp RAd, TAB:RB->asize; jae >2 // Not in array part? + | mov RD, TAB:RB->array + | lea RD, [RD+RA*8] + |1: + | cmp aword [RD], LJ_TNIL; je ->fff_res0 + | // Copy array slot. + | mov RB, [RD] + | mov [BASE-8], RB + |->fff_res2: + | mov RDd, 1+2 + | jmp ->fff_res + |2: // Check for empty hash part first. Otherwise call C function. + | cmp dword TAB:RB->hmask, 0; je ->fff_res0 + |.if X64WIN + | mov TMPR, BASE + | mov CARG2d, RAd + | mov CARG1, TAB:RB + | mov RB, TMPR + |.else + | mov CARG1, TAB:RB + | mov RB, BASE // Save BASE. + | mov CARG2d, RAd // Caveat: CARG2 == BASE + |.endif + | call extern lj_tab_getinth // (GCtab *t, int32_t key) + | // cTValue * or NULL returned in eax (RD). + | mov BASE, RB + | test RD, RD + | jnz <1 + |->fff_res0: + | mov RDd, 1+0 + | jmp ->fff_res + | + |.ffunc_1 ipairs + | mov TAB:RB, [BASE] + | mov TMPR, TAB:RB + | checktab TAB:RB, ->fff_fallback +#if LJ_52 + | cmp aword TAB:RB->metatable, 0; jne ->fff_fallback +#endif + | mov CFUNC:RD, [BASE-16] + | cleartp CFUNC:RD + | mov CFUNC:RD, CFUNC:RD->upvalue[0] + | settp CFUNC:RD, LJ_TFUNC + | mov PC, [BASE-8] + | mov [BASE-16], CFUNC:RD + | mov [BASE-8], TMPR + |.if DUALNUM + | mov64 RD, ((uint64_t)LJ_TISNUM<<47) + | mov [BASE], RD + |.else + | mov qword [BASE], 0 + |.endif + | mov RDd, 1+3 + | jmp ->fff_res + | + |//-- Base library: catch errors ---------------------------------------- + | + |.ffunc_1 pcall + | lea RA, [BASE+16] + | sub NARGS:RDd, 1 + | mov PCd, 16+FRAME_PCALL + |1: + | movzx RBd, byte [DISPATCH+DISPATCH_GL(hookmask)] + | shr RB, HOOK_ACTIVE_SHIFT + | and RB, 1 + | add PC, RB // Remember active hook before pcall. + | // Note: this does a (harmless) copy of the function to the PC slot, too. + | mov KBASE, RD + |2: + | mov RB, [RA+KBASE*8-24] + | mov [RA+KBASE*8-16], RB + | sub KBASE, 1 + | ja <2 + | jmp ->vm_call_dispatch + | + |.ffunc_2 xpcall + | mov LFUNC:RA, [BASE+8] + | checktp_nc LFUNC:RA, LJ_TFUNC, ->fff_fallback + | mov LFUNC:RB, [BASE] // Swap function and traceback. + | mov [BASE], LFUNC:RA + | mov [BASE+8], LFUNC:RB + | lea RA, [BASE+24] + | sub NARGS:RDd, 2 + | mov PCd, 24+FRAME_PCALL + | jmp <1 + | + |//-- Coroutine library -------------------------------------------------- + | + |.macro coroutine_resume_wrap, resume + |.if resume + |.ffunc_1 coroutine_resume + | mov L:RB, [BASE] + | cleartp L:RB + |.else + |.ffunc coroutine_wrap_aux + | mov CFUNC:RB, [BASE-16] + | cleartp CFUNC:RB + | mov L:RB, CFUNC:RB->upvalue[0].gcr + | cleartp L:RB + |.endif + | mov PC, [BASE-8] + | mov SAVE_PC, PC + | mov TMP1, L:RB + |.if resume + | checktptp [BASE], LJ_TTHREAD, ->fff_fallback + |.endif + | cmp aword L:RB->cframe, 0; jne ->fff_fallback + | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback + | mov RA, L:RB->top + | je >1 // Status != LUA_YIELD (i.e. 0)? + | cmp RA, L:RB->base // Check for presence of initial func. + | je ->fff_fallback + | mov PC, [RA-8] // Move initial function up. + | mov [RA], PC + | add RA, 8 + |1: + |.if resume + | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread). + |.else + | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1). + |.endif + | cmp PC, L:RB->maxstack; ja ->fff_fallback + | mov L:RB->top, PC + | + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + |.if resume + | add BASE, 8 // Keep resumed thread in stack for GC. + |.endif + | mov L:RB->top, BASE + |.if resume + | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move. + |.else + | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move. + |.endif + | sub RB, PC // Relative to PC. + | + | cmp PC, RA + | je >3 + |2: // Move args to coroutine. + | mov RC, [PC+RB] + | mov [PC-8], RC + | sub PC, 8 + | cmp PC, RA + | jne <2 + |3: + | mov CARG2, RA + | mov CARG1, TMP1 + | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) + | + | mov L:RB, SAVE_L + | mov L:PC, TMP1 + | mov BASE, L:RB->base + | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB + | set_vmstate INTERP + | + | cmp eax, LUA_YIELD + | ja >8 + |4: + | mov RA, L:PC->base + | mov KBASE, L:PC->top + | mov L:PC->top, RA // Clear coroutine stack. + | mov PC, KBASE + | sub PC, RA + | je >6 // No results? + | lea RD, [BASE+PC] + | shr PCd, 3 + | cmp RD, L:RB->maxstack + | ja >9 // Need to grow stack? + | + | mov RB, BASE + | sub RB, RA + |5: // Move results from coroutine. + | mov RD, [RA] + | mov [RA+RB], RD + | add RA, 8 + | cmp RA, KBASE + | jne <5 + |6: + |.if resume + | lea RDd, [PCd+2] // nresults+1 = 1 + true + results. + | mov_true ITYPE // Prepend true to results. + | mov [BASE-8], ITYPE + |.else + | lea RDd, [PCd+1] // nresults+1 = 1 + results. + |.endif + |7: + | mov PC, SAVE_PC + | mov MULTRES, RDd + |.if resume + | mov RA, -8 + |.else + | xor RAd, RAd + |.endif + | test PCd, FRAME_TYPE + | jz ->BC_RET_Z + | jmp ->vm_return + | + |8: // Coroutine returned with error (at co->top-1). + |.if resume + | mov_false ITYPE // Prepend false to results. + | mov [BASE-8], ITYPE + | mov RA, L:PC->top + | sub RA, 8 + | mov L:PC->top, RA // Clear error from coroutine stack. + | // Copy error message. + | mov RD, [RA] + | mov [BASE], RD + | mov RDd, 1+2 // nresults+1 = 1 + false + error. + | jmp <7 + |.else + | mov CARG2, L:PC + | mov CARG1, L:RB + | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) + | // Error function does not return. + |.endif + | + |9: // Handle stack expansion on return from yield. + | mov L:RA, TMP1 + | mov L:RA->top, KBASE // Undo coroutine stack clearing. + | mov CARG2, PC + | mov CARG1, L:RB + | call extern lj_state_growstack // (lua_State *L, int n) + | mov L:PC, TMP1 + | mov BASE, L:RB->base + | jmp <4 // Retry the stack move. + |.endmacro + | + | coroutine_resume_wrap 1 // coroutine.resume + | coroutine_resume_wrap 0 // coroutine.wrap + | + |.ffunc coroutine_yield + | mov L:RB, SAVE_L + | test aword L:RB->cframe, CFRAME_RESUME + | jz ->fff_fallback + | mov L:RB->base, BASE + | lea RD, [BASE+NARGS:RD*8-8] + | mov L:RB->top, RD + | xor RDd, RDd + | mov aword L:RB->cframe, RD + | mov al, LUA_YIELD + | mov byte L:RB->status, al + | jmp ->vm_leave_unw + | + |//-- Math library ------------------------------------------------------- + | + | .ffunc_1 math_abs + | mov RB, [BASE] + |.if DUALNUM + | checkint RB, >3 + | cmp RBd, 0; jns ->fff_resi + | neg RBd; js >2 + |->fff_resbit: + |->fff_resi: + | setint RB + |->fff_resRB: + | mov PC, [BASE-8] + | mov [BASE-16], RB + | jmp ->fff_res1 + |2: + | mov64 RB, U64x(41e00000,00000000) // 2^31. + | jmp ->fff_resRB + |3: + | ja ->fff_fallback + |.else + | checknum RB, ->fff_fallback + |.endif + | shl RB, 1 + | shr RB, 1 + | mov PC, [BASE-8] + | mov [BASE-16], RB + | jmp ->fff_res1 + | + |.ffunc_n math_sqrt, sqrtsd + |->fff_resxmm0: + | mov PC, [BASE-8] + | movsd qword [BASE-16], xmm0 + | // fallthrough + | + |->fff_res1: + | mov RDd, 1+1 + |->fff_res: + | mov MULTRES, RDd + |->fff_res_: + | test PCd, FRAME_TYPE + | jnz >7 + |5: + | cmp PC_RB, RDL // More results expected? + | ja >6 + | // Adjust BASE. KBASE is assumed to be set for the calling frame. + | movzx RAd, PC_RA + | neg RA + | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8 + | ins_next + | + |6: // Fill up results with nil. + | mov aword [BASE+RD*8-24], LJ_TNIL + | add RD, 1 + | jmp <5 + | + |7: // Non-standard return case. + | mov RA, -16 // Results start at BASE+RA = BASE-16. + | jmp ->vm_return + | + |.macro math_round, func + | .ffunc math_ .. func + |.if DUALNUM + | mov RB, [BASE] + | checknumx RB, ->fff_resRB, je + | ja ->fff_fallback + |.else + | checknumtp [BASE], ->fff_fallback + |.endif + | movsd xmm0, qword [BASE] + | call ->vm_ .. func .. _sse + |.if DUALNUM + | cvttsd2si RBd, xmm0 + | cmp RBd, 0x80000000 + | jne ->fff_resi + | cvtsi2sd xmm1, RBd + | ucomisd xmm0, xmm1 + | jp ->fff_resxmm0 + | je ->fff_resi + |.endif + | jmp ->fff_resxmm0 + |.endmacro + | + | math_round floor + | math_round ceil + | + |.ffunc math_log + | cmp NARGS:RDd, 1+1; jne ->fff_fallback // Exactly one argument. + | checknumtp [BASE], ->fff_fallback + | movsd xmm0, qword [BASE] + | mov RB, BASE + | call extern log + | mov BASE, RB + | jmp ->fff_resxmm0 + | + |.macro math_extern, func + | .ffunc_n math_ .. func + | mov RB, BASE + | call extern func + | mov BASE, RB + | jmp ->fff_resxmm0 + |.endmacro + | + |.macro math_extern2, func + | .ffunc_nn math_ .. func + | mov RB, BASE + | call extern func + | mov BASE, RB + | jmp ->fff_resxmm0 + |.endmacro + | + | math_extern log10 + | math_extern exp + | math_extern sin + | math_extern cos + | math_extern tan + | math_extern asin + | math_extern acos + | math_extern atan + | math_extern sinh + | math_extern cosh + | math_extern tanh + | math_extern2 pow + | math_extern2 atan2 + | math_extern2 fmod + | + |.ffunc_2 math_ldexp + | checknumtp [BASE], ->fff_fallback + | checknumtp [BASE+8], ->fff_fallback + | fld qword [BASE+8] + | fld qword [BASE] + | fscale + | fpop1 + | mov PC, [BASE-8] + | fstp qword [BASE-16] + | jmp ->fff_res1 + | + |.ffunc_n math_frexp + | mov RB, BASE + |.if X64WIN + | lea CARG2, TMP1 // Caveat: CARG2 == BASE + |.else + | lea CARG1, TMP1 + |.endif + | call extern frexp + | mov BASE, RB + | mov RBd, TMP1d + | mov PC, [BASE-8] + | movsd qword [BASE-16], xmm0 + |.if DUALNUM + | setint RB + | mov [BASE-8], RB + |.else + | cvtsi2sd xmm1, RBd + | movsd qword [BASE-8], xmm1 + |.endif + | mov RDd, 1+2 + | jmp ->fff_res + | + |.ffunc_n math_modf + | mov RB, BASE + |.if X64WIN + | lea CARG2, [BASE-16] // Caveat: CARG2 == BASE + |.else + | lea CARG1, [BASE-16] + |.endif + | call extern modf + | mov BASE, RB + | mov PC, [BASE-8] + | movsd qword [BASE-8], xmm0 + | mov RDd, 1+2 + | jmp ->fff_res + | + |.macro math_minmax, name, cmovop, sseop + | .ffunc_1 name + | mov RAd, 2 + |.if DUALNUM + | mov RB, [BASE] + | checkint RB, >4 + |1: // Handle integers. + | cmp RAd, RDd; jae ->fff_resRB + | mov TMPR, [BASE+RA*8-8] + | checkint TMPR, >3 + | cmp RBd, TMPRd + | cmovop RB, TMPR + | add RAd, 1 + | jmp <1 + |3: + | ja ->fff_fallback + | // Convert intermediate result to number and continue below. + | cvtsi2sd xmm0, RBd + | jmp >6 + |4: + | ja ->fff_fallback + |.else + | checknumtp [BASE], ->fff_fallback + |.endif + | + | movsd xmm0, qword [BASE] + |5: // Handle numbers or integers. + | cmp RAd, RDd; jae ->fff_resxmm0 + |.if DUALNUM + | mov RB, [BASE+RA*8-8] + | checknumx RB, >6, jb + | ja ->fff_fallback + | cvtsi2sd xmm1, RBd + | jmp >7 + |.else + | checknumtp [BASE+RA*8-8], ->fff_fallback + |.endif + |6: + | movsd xmm1, qword [BASE+RA*8-8] + |7: + | sseop xmm0, xmm1 + | add RAd, 1 + | jmp <5 + |.endmacro + | + | math_minmax math_min, cmovg, minsd + | math_minmax math_max, cmovl, maxsd + | + |//-- String library ----------------------------------------------------- + | + |.ffunc string_byte // Only handle the 1-arg case here. + | cmp NARGS:RDd, 1+1; jne ->fff_fallback + | mov STR:RB, [BASE] + | checkstr STR:RB, ->fff_fallback + | mov PC, [BASE-8] + | cmp dword STR:RB->len, 1 + | jb ->fff_res0 // Return no results for empty string. + | movzx RBd, byte STR:RB[1] + |.if DUALNUM + | jmp ->fff_resi + |.else + | cvtsi2sd xmm0, RBd; jmp ->fff_resxmm0 + |.endif + | + |.ffunc string_char // Only handle the 1-arg case here. + | ffgccheck + | cmp NARGS:RDd, 1+1; jne ->fff_fallback // *Exactly* 1 arg. + |.if DUALNUM + | mov RB, [BASE] + | checkint RB, ->fff_fallback + |.else + | checknumtp [BASE], ->fff_fallback + | cvttsd2si RBd, qword [BASE] + |.endif + | cmp RBd, 255; ja ->fff_fallback + | mov TMP1d, RBd + | mov TMPRd, 1 + | lea RD, TMP1 // Points to stack. Little-endian. + |->fff_newstr: + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov CARG3d, TMPRd // Zero-extended to size_t. + | mov CARG2, RD + | mov CARG1, L:RB + | mov SAVE_PC, PC + | call extern lj_str_new // (lua_State *L, char *str, size_t l) + |->fff_resstr: + | // GCstr * returned in eax (RD). + | mov BASE, L:RB->base + | mov PC, [BASE-8] + | settp STR:RD, LJ_TSTR + | mov [BASE-16], STR:RD + | jmp ->fff_res1 + | + |.ffunc string_sub + | ffgccheck + | mov TMPRd, -1 + | cmp NARGS:RDd, 1+2; jb ->fff_fallback + | jna >1 + |.if DUALNUM + | mov TMPR, [BASE+16] + | checkint TMPR, ->fff_fallback + |.else + | checknumtp [BASE+16], ->fff_fallback + | cvttsd2si TMPRd, qword [BASE+16] + |.endif + |1: + | mov STR:RB, [BASE] + | checkstr STR:RB, ->fff_fallback + |.if DUALNUM + | mov ITYPE, [BASE+8] + | mov RAd, ITYPEd // Must clear hiword for lea below. + | sar ITYPE, 47 + | cmp ITYPEd, LJ_TISNUM + | jne ->fff_fallback + |.else + | checknumtp [BASE+8], ->fff_fallback + | cvttsd2si RAd, qword [BASE+8] + |.endif + | mov RCd, STR:RB->len + | cmp RCd, TMPRd // len < end? (unsigned compare) + | jb >5 + |2: + | test RAd, RAd // start <= 0? + | jle >7 + |3: + | sub TMPRd, RAd // start > end? + | jl ->fff_emptystr + | lea RD, [STR:RB+RAd+#STR-1] + | add TMPRd, 1 + |4: + | jmp ->fff_newstr + | + |5: // Negative end or overflow. + | jl >6 + | lea TMPRd, [TMPRd+RCd+1] // end = end+(len+1) + | jmp <2 + |6: // Overflow. + | mov TMPRd, RCd // end = len + | jmp <2 + | + |7: // Negative start or underflow. + | je >8 + | add RAd, RCd // start = start+(len+1) + | add RAd, 1 + | jg <3 // start > 0? + |8: // Underflow. + | mov RAd, 1 // start = 1 + | jmp <3 + | + |->fff_emptystr: // Range underflow. + | xor TMPRd, TMPRd // Zero length. Any ptr in RD is ok. + | jmp <4 + | + |.macro ffstring_op, name + | .ffunc_1 string_ .. name + | ffgccheck + |.if X64WIN + | mov STR:TMPR, [BASE] + | checkstr STR:TMPR, ->fff_fallback + |.else + | mov STR:CARG2, [BASE] + | checkstr STR:CARG2, ->fff_fallback + |.endif + | mov L:RB, SAVE_L + | lea SBUF:CARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] + | mov L:RB->base, BASE + |.if X64WIN + | mov STR:CARG2, STR:TMPR // Caveat: CARG2 == BASE + |.endif + | mov RC, SBUF:CARG1->b + | mov SBUF:CARG1->L, L:RB + | mov SBUF:CARG1->w, RC + | mov SAVE_PC, PC + | call extern lj_buf_putstr_ .. name + | mov CARG1, rax + | call extern lj_buf_tostr + | jmp ->fff_resstr + |.endmacro + | + |ffstring_op reverse + |ffstring_op lower + |ffstring_op upper + | + |//-- Bit library -------------------------------------------------------- + | + |.macro .ffunc_bit, name, kind, fdef + | fdef name + |.if kind == 2 + | sseconst_tobit xmm1, RB + |.endif + |.if DUALNUM + | mov RB, [BASE] + | checkint RB, >1 + |.if kind > 0 + | jmp >2 + |.else + | jmp ->fff_resbit + |.endif + |1: + | ja ->fff_fallback + | movd xmm0, RB + |.else + | checknumtp [BASE], ->fff_fallback + | movsd xmm0, qword [BASE] + |.endif + |.if kind < 2 + | sseconst_tobit xmm1, RB + |.endif + | addsd xmm0, xmm1 + | movd RBd, xmm0 + |2: + |.endmacro + | + |.macro .ffunc_bit, name, kind + | .ffunc_bit name, kind, .ffunc_1 + |.endmacro + | + |.ffunc_bit bit_tobit, 0 + | jmp ->fff_resbit + | + |.macro .ffunc_bit_op, name, ins + | .ffunc_bit name, 2 + | mov TMPRd, NARGS:RDd // Save for fallback. + | lea RD, [BASE+NARGS:RD*8-16] + |1: + | cmp RD, BASE + | jbe ->fff_resbit + |.if DUALNUM + | mov RA, [RD] + | checkint RA, >2 + | ins RBd, RAd + | sub RD, 8 + | jmp <1 + |2: + | ja ->fff_fallback_bit_op + | movd xmm0, RA + |.else + | checknumtp [RD], ->fff_fallback_bit_op + | movsd xmm0, qword [RD] + |.endif + | addsd xmm0, xmm1 + | movd RAd, xmm0 + | ins RBd, RAd + | sub RD, 8 + | jmp <1 + |.endmacro + | + |.ffunc_bit_op bit_band, and + |.ffunc_bit_op bit_bor, or + |.ffunc_bit_op bit_bxor, xor + | + |.ffunc_bit bit_bswap, 1 + | bswap RBd + | jmp ->fff_resbit + | + |.ffunc_bit bit_bnot, 1 + | not RBd + |.if DUALNUM + | jmp ->fff_resbit + |.else + |->fff_resbit: + | cvtsi2sd xmm0, RBd + | jmp ->fff_resxmm0 + |.endif + | + |->fff_fallback_bit_op: + | mov NARGS:RDd, TMPRd // Restore for fallback + | jmp ->fff_fallback + | + |.macro .ffunc_bit_sh, name, ins + |.if DUALNUM + | .ffunc_bit name, 1, .ffunc_2 + | // Note: no inline conversion from number for 2nd argument! + | mov RA, [BASE+8] + | checkint RA, ->fff_fallback + |.else + | .ffunc_nn name + | sseconst_tobit xmm2, RB + | addsd xmm0, xmm2 + | addsd xmm1, xmm2 + | movd RBd, xmm0 + | movd RAd, xmm1 + |.endif + | ins RBd, cl // Assumes RA is ecx. + | jmp ->fff_resbit + |.endmacro + | + |.ffunc_bit_sh bit_lshift, shl + |.ffunc_bit_sh bit_rshift, shr + |.ffunc_bit_sh bit_arshift, sar + |.ffunc_bit_sh bit_rol, rol + |.ffunc_bit_sh bit_ror, ror + | + |//----------------------------------------------------------------------- + | + |->fff_fallback_2: + | mov NARGS:RDd, 1+2 // Other args are ignored, anyway. + | jmp ->fff_fallback + |->fff_fallback_1: + | mov NARGS:RDd, 1+1 // Other args are ignored, anyway. + |->fff_fallback: // Call fast function fallback handler. + | // BASE = new base, RD = nargs+1 + | mov L:RB, SAVE_L + | mov PC, [BASE-8] // Fallback may overwrite PC. + | mov SAVE_PC, PC // Redundant (but a defined value). + | mov L:RB->base, BASE + | lea RD, [BASE+NARGS:RD*8-8] + | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler. + | mov L:RB->top, RD + | mov CFUNC:RD, [BASE-16] + | cleartp CFUNC:RD + | cmp RA, L:RB->maxstack + | ja >5 // Need to grow stack. + | mov CARG1, L:RB + | call aword CFUNC:RD->f // (lua_State *L) + | mov BASE, L:RB->base + | // Either throws an error, or recovers and returns -1, 0 or nresults+1. + | test RDd, RDd; jg ->fff_res // Returned nresults+1? + |1: + | mov RA, L:RB->top + | sub RA, BASE + | shr RAd, 3 + | test RDd, RDd + | lea NARGS:RDd, [RAd+1] + | mov LFUNC:RB, [BASE-16] + | jne ->vm_call_tail // Returned -1? + | cleartp LFUNC:RB + | ins_callt // Returned 0: retry fast path. + | + |// Reconstruct previous base for vmeta_call during tailcall. + |->vm_call_tail: + | mov RA, BASE + | test PCd, FRAME_TYPE + | jnz >3 + | movzx RBd, PC_RA + | neg RB + | lea BASE, [BASE+RB*8-16] // base = base - (RB+2)*8 + | jmp ->vm_call_dispatch // Resolve again for tailcall. + |3: + | mov RB, PC + | and RB, -8 + | sub BASE, RB + | jmp ->vm_call_dispatch // Resolve again for tailcall. + | + |5: // Grow stack for fallback handler. + | mov CARG2d, LUA_MINSTACK + | mov CARG1, L:RB + | call extern lj_state_growstack // (lua_State *L, int n) + | mov BASE, L:RB->base + | xor RDd, RDd // Simulate a return 0. + | jmp <1 // Dumb retry (goes through ff first). + | + |->fff_gcstep: // Call GC step function. + | // BASE = new base, RD = nargs+1 + | pop RB // Must keep stack at same level. + | mov TMP1, RB // Save return address + | mov L:RB, SAVE_L + | mov SAVE_PC, PC // Redundant (but a defined value). + | mov L:RB->base, BASE + | lea RD, [BASE+NARGS:RD*8-8] + | mov CARG1, L:RB + | mov L:RB->top, RD + | call extern lj_gc_step // (lua_State *L) + | mov BASE, L:RB->base + | mov RD, L:RB->top + | sub RD, BASE + | shr RDd, 3 + | add NARGS:RDd, 1 + | mov RB, TMP1 + | push RB // Restore return address. + | ret + | + |//----------------------------------------------------------------------- + |//-- Special dispatch targets ------------------------------------------- + |//----------------------------------------------------------------------- + | + |->vm_record: // Dispatch target for recording phase. + |.if JIT + | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] + | test RDL, HOOK_VMEVENT // No recording while in vmevent. + | jnz >5 + | // Decrement the hookcount for consistency, but always do the call. + | test RDL, HOOK_ACTIVE + | jnz >1 + | test RDL, LUA_MASKLINE|LUA_MASKCOUNT + | jz >1 + | dec dword [DISPATCH+DISPATCH_GL(hookcount)] + | jmp >1 + |.endif + | + |->vm_rethook: // Dispatch target for return hooks. + | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] + | test RDL, HOOK_ACTIVE // Hook already active? + | jnz >5 + | jmp >1 + | + |->vm_inshook: // Dispatch target for instr/line hooks. + | movzx RDd, byte [DISPATCH+DISPATCH_GL(hookmask)] + | test RDL, HOOK_ACTIVE // Hook already active? + | jnz >5 + | + | test RDL, LUA_MASKLINE|LUA_MASKCOUNT + | jz >5 + | dec dword [DISPATCH+DISPATCH_GL(hookcount)] + | jz >1 + | test RDL, LUA_MASKLINE + | jz >5 + |1: + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov CARG2, PC // Caveat: CARG2 == BASE + | mov CARG1, L:RB + | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. + | call extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) + |3: + | mov BASE, L:RB->base + |4: + | movzx RAd, PC_RA + |5: + | movzx OP, PC_OP + | movzx RDd, PC_RD + | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins. + | + |->cont_hook: // Continue from hook yield. + | add PC, 4 + | mov RA, [RB-40] + | mov MULTRES, RAd // Restore MULTRES for *M ins. + | jmp <4 + | + |->vm_hotloop: // Hot loop counter underflow. + |.if JIT + | mov LFUNC:RB, [BASE-16] // Same as curr_topL(L). + | cleartp LFUNC:RB + | mov RB, LFUNC:RB->pc + | movzx RDd, byte [RB+PC2PROTO(framesize)] + | lea RD, [BASE+RD*8] + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov L:RB->top, RD + | mov CARG2, PC + | lea CARG1, [DISPATCH+GG_DISP2J] + | mov aword [DISPATCH+DISPATCH_J(L)], L:RB + | mov SAVE_PC, PC + | call extern lj_trace_hot // (jit_State *J, const BCIns *pc) + | jmp <3 + |.endif + | + |->vm_callhook: // Dispatch target for call hooks. + | mov SAVE_PC, PC + |.if JIT + | jmp >1 + |.endif + | + |->vm_hotcall: // Hot call counter underflow. + |.if JIT + | mov SAVE_PC, PC + | or PC, 1 // Marker for hot call. + |1: + |.endif + | lea RD, [BASE+NARGS:RD*8-8] + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov L:RB->top, RD + | mov CARG2, PC + | mov CARG1, L:RB + | call extern lj_dispatch_call // (lua_State *L, const BCIns *pc) + | // ASMFunction returned in eax/rax (RD). + | mov SAVE_PC, 0 // Invalidate for subsequent line hook. + |.if JIT + | and PC, -2 + |.endif + | mov BASE, L:RB->base + | mov RA, RD + | mov RD, L:RB->top + | sub RD, BASE + | mov RB, RA + | movzx RAd, PC_RA + | shr RDd, 3 + | add NARGS:RDd, 1 + | jmp RB + | + |->cont_stitch: // Trace stitching. + |.if JIT + | // BASE = base, RC = result, RB = mbase + | mov TRACE:ITYPE, [RB-40] // Save previous trace. + | cleartp TRACE:ITYPE + | mov TMPRd, MULTRES + | movzx RAd, PC_RA + | lea RA, [BASE+RA*8] // Call base. + | sub TMPRd, 1 + | jz >2 + |1: // Move results down. + | mov RB, [RC] + | mov [RA], RB + | add RC, 8 + | add RA, 8 + | sub TMPRd, 1 + | jnz <1 + |2: + | movzx RCd, PC_RA + | movzx RBd, PC_RB + | add RC, RB + | lea RC, [BASE+RC*8-8] + |3: + | cmp RC, RA + | ja >9 // More results wanted? + | + | test TRACE:ITYPE, TRACE:ITYPE + | jz ->cont_nop + | movzx RBd, word TRACE:ITYPE->traceno + | movzx RDd, word TRACE:ITYPE->link + | cmp RDd, RBd + | je ->cont_nop // Blacklisted. + | test RDd, RDd + | jne =>BC_JLOOP // Jump to stitched trace. + | + | // Stitch a new trace to the previous trace. + | mov [DISPATCH+DISPATCH_J(exitno)], RB + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov CARG2, PC + | lea CARG1, [DISPATCH+GG_DISP2J] + | mov aword [DISPATCH+DISPATCH_J(L)], L:RB + | call extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) + | mov BASE, L:RB->base + | jmp ->cont_nop + | + |9: // Fill up results with nil. + | mov aword [RA], LJ_TNIL + | add RA, 8 + | jmp <3 + |.endif + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov CARG2, PC // Caveat: CARG2 == BASE + | mov CARG1, L:RB + | call extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) + | mov BASE, L:RB->base + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | sub PC, 4 + | jmp ->cont_nop +#endif + | + |//----------------------------------------------------------------------- + |//-- Trace exit handler ------------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Called from an exit stub with the exit number on the stack. + |// The 16 bit exit number is stored with two (sign-extended) push imm8. + |->vm_exit_handler: + |.if JIT + | push r13; push r12 + | push r11; push r10; push r9; push r8 + | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp + | push rbx; push rdx; push rcx; push rax + | movzx RCd, byte [rbp-8] // Reconstruct exit number. + | mov RCH, byte [rbp-16] + | mov [rbp-8], r15; mov [rbp-16], r14 + | // DISPATCH is preserved on-trace in LJ_GC64 mode. + | mov RAd, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. + | set_vmstate EXIT + | mov [DISPATCH+DISPATCH_J(exitno)], RCd + | mov [DISPATCH+DISPATCH_J(parent)], RAd + |.if X64WIN + | sub rsp, 16*8+4*8 // Room for SSE regs + save area. + |.else + | sub rsp, 16*8 // Room for SSE regs. + |.endif + | add rbp, -128 + | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14 + | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12 + | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10 + | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8 + | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6 + | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4 + | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2 + | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0 + | // Caveat: RB is rbp. + | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)] + | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] + | mov aword [DISPATCH+DISPATCH_J(L)], L:RB + | mov L:RB->base, BASE + |.if X64WIN + | lea CARG2, [rsp+4*8] + |.else + | mov CARG2, rsp + |.endif + | lea CARG1, [DISPATCH+GG_DISP2J] + | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 + | call extern lj_trace_exit // (jit_State *J, ExitState *ex) + | // MULTRES or negated error code returned in eax (RD). + | mov RA, L:RB->cframe + | and RA, CFRAME_RAWMASK + | mov [RA+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield). + | mov BASE, L:RB->base + | mov PC, [RA+CFRAME_OFS_PC] // Get SAVE_PC. + | jmp >1 + |.endif + |->vm_exit_interp: + | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. + |.if JIT + | // Restore additional callee-save registers only used in compiled code. + |.if X64WIN + | lea RA, [rsp+10*16+4*8] + |1: + | movdqa xmm15, [RA-10*16] + | movdqa xmm14, [RA-9*16] + | movdqa xmm13, [RA-8*16] + | movdqa xmm12, [RA-7*16] + | movdqa xmm11, [RA-6*16] + | movdqa xmm10, [RA-5*16] + | movdqa xmm9, [RA-4*16] + | movdqa xmm8, [RA-3*16] + | movdqa xmm7, [RA-2*16] + | mov rsp, RA // Reposition stack to C frame. + | movdqa xmm6, [RA-1*16] + | mov r15, CSAVE_1 + | mov r14, CSAVE_2 + | mov r13, CSAVE_3 + | mov r12, CSAVE_4 + |.else + | lea RA, [rsp+16] + |1: + | mov r13, [RA-8] + | mov r12, [RA] + | mov rsp, RA // Reposition stack to C frame. + |.endif + | test RDd, RDd; js >9 // Check for error from exit. + | mov L:RB, SAVE_L + | mov MULTRES, RDd + | mov LFUNC:KBASE, [BASE-16] + | cleartp LFUNC:KBASE + | mov KBASE, LFUNC:KBASE->pc + | mov KBASE, [KBASE+PC2PROTO(k)] + | mov L:RB->base, BASE + | mov qword [DISPATCH+DISPATCH_GL(jit_base)], 0 + | set_vmstate INTERP + | // Modified copy of ins_next which handles function header dispatch, too. + | mov RCd, [PC] + | movzx RAd, RCH + | movzx OP, RCL + | add PC, 4 + | shr RCd, 16 + | cmp OP, BC_FUNCF // Function header? + | jb >3 + | cmp OP, BC_FUNCC+2 // Fast function? + | jae >4 + |2: + | mov RCd, MULTRES // RC/RD holds nres+1. + |3: + | jmp aword [DISPATCH+OP*8] + | + |4: // Check frame below fast function. + | mov RC, [BASE-8] + | test RCd, FRAME_TYPE + | jnz <2 // Trace stitching continuation? + | // Otherwise set KBASE for Lua function below fast function. + | movzx RCd, byte [RC-3] + | neg RC + | mov LFUNC:KBASE, [BASE+RC*8-32] + | cleartp LFUNC:KBASE + | mov KBASE, LFUNC:KBASE->pc + | mov KBASE, [KBASE+PC2PROTO(k)] + | jmp <2 + | + |9: // Rethrow error from the right C frame. + | mov CARG2d, RDd + | mov CARG1, L:RB + | neg CARG2d + | call extern lj_err_trace // (lua_State *L, int errcode) + |.endif + | + |//----------------------------------------------------------------------- + |//-- Math helper functions ---------------------------------------------- + |//----------------------------------------------------------------------- + | + |// FP value rounding. Called by math.floor/math.ceil fast functions + |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. + |.macro vm_round, name, mode, cond + |->name: + |->name .. _sse: + | sseconst_abs xmm2, RD + | sseconst_2p52 xmm3, RD + | movaps xmm1, xmm0 + | andpd xmm1, xmm2 // |x| + | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|. + | jbe >1 + | andnpd xmm2, xmm0 // Isolate sign bit. + |.if mode == 2 // trunc(x)? + | movaps xmm0, xmm1 + | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 + | subsd xmm1, xmm3 + | sseconst_1 xmm3, RD + | cmpsd xmm0, xmm1, 1 // |x| < result? + | andpd xmm0, xmm3 + | subsd xmm1, xmm0 // If yes, subtract -1. + | orpd xmm1, xmm2 // Merge sign bit back in. + |.else + | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 + | subsd xmm1, xmm3 + | orpd xmm1, xmm2 // Merge sign bit back in. + | sseconst_1 xmm3, RD + | .if mode == 1 // ceil(x)? + | cmpsd xmm0, xmm1, 6 // x > result? + | andpd xmm0, xmm3 + | addsd xmm1, xmm0 // If yes, add 1. + | orpd xmm1, xmm2 // Merge sign bit back in (again). + | .else // floor(x)? + | cmpsd xmm0, xmm1, 1 // x < result? + | andpd xmm0, xmm3 + | subsd xmm1, xmm0 // If yes, subtract 1. + | .endif + |.endif + | movaps xmm0, xmm1 + |1: + | ret + |.endmacro + | + | vm_round vm_floor, 0, 1 + | vm_round vm_ceil, 1, JIT + | vm_round vm_trunc, 2, JIT + | + |// FP modulo x%y. Called by BC_MOD* and vm_arith. + |->vm_mod: + |// Args in xmm0/xmm1, return value in xmm0. + |// Caveat: xmm0-xmm5 and RC (eax) modified! + | movaps xmm5, xmm0 + | divsd xmm0, xmm1 + | sseconst_abs xmm2, RD + | sseconst_2p52 xmm3, RD + | movaps xmm4, xmm0 + | andpd xmm4, xmm2 // |x/y| + | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|. + | jbe >1 + | andnpd xmm2, xmm0 // Isolate sign bit. + | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52 + | subsd xmm4, xmm3 + | orpd xmm4, xmm2 // Merge sign bit back in. + | sseconst_1 xmm2, RD + | cmpsd xmm0, xmm4, 1 // x/y < result? + | andpd xmm0, xmm2 + | subsd xmm4, xmm0 // If yes, subtract 1.0. + | movaps xmm0, xmm5 + | mulsd xmm1, xmm4 + | subsd xmm0, xmm1 + | ret + |1: + | mulsd xmm1, xmm0 + | movaps xmm0, xmm5 + | subsd xmm0, xmm1 + | ret + | + |//----------------------------------------------------------------------- + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | + |// int lj_vm_cpuid(uint32_t f, uint32_t res[4]) + |->vm_cpuid: + | mov eax, CARG1d + | .if X64WIN; push rsi; mov rsi, CARG2; .endif + | push rbx + | xor ecx, ecx + | cpuid + | mov [rsi], eax + | mov [rsi+4], ebx + | mov [rsi+8], ecx + | mov [rsi+12], edx + | pop rbx + | .if X64WIN; pop rsi; .endif + | ret + | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_IDX, CARG2d + |.define NEXT_IDXa, CARG2 + |.define NEXT_PTR, RC + |.define NEXT_PTRd, RCd + |.define NEXT_TMP, CARG3 + |.define NEXT_ASIZE, CARG4d + |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro + |.if X64WIN + |.define NEXT_RES_PTR, [rsp+aword*5] + |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro + |.else + |.define NEXT_RES_PTR, [rsp+aword*1] + |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro + |.endif + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in edx. + |->vm_next: + |.if JIT + | mov NEXT_ASIZE, NEXT_TAB->asize + |1: // Traverse array part. + | cmp NEXT_IDX, NEXT_ASIZE; jae >5 + | mov NEXT_TMP, NEXT_TAB->array + | mov NEXT_TMP, qword [NEXT_TMP+NEXT_IDX*8] + | cmp NEXT_TMP, LJ_TNIL; je >2 + | lea NEXT_PTR, NEXT_RES_PTR + | mov qword [NEXT_PTR], NEXT_TMP + |.if DUALNUM + | setint NEXT_TMP, NEXT_IDXa + | mov qword [NEXT_PTR+qword*1], NEXT_TMP + |.else + | cvtsi2sd xmm0, NEXT_IDX + | movsd qword [NEXT_PTR+qword*1], xmm0 + |.endif + | NEXT_RES_IDX 1 + | ret + |2: // Skip holes in array part. + | add NEXT_IDX, 1 + | jmp <1 + | + |5: // Traverse hash part. + | sub NEXT_IDX, NEXT_ASIZE + |6: + | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9 + | imul NEXT_PTRd, NEXT_IDX, #NODE + | add NODE:NEXT_PTR, NEXT_TAB->node + | cmp qword NODE:NEXT_PTR->val, LJ_TNIL; je >7 + | NEXT_RES_IDXL NEXT_ASIZE+1 + | ret + |7: // Skip holes in hash part. + | add NEXT_IDX, 1 + | jmp <6 + | + |9: // End of iteration. Set the key to nil (not the value). + | NEXT_RES_IDX NEXT_ASIZE + | lea NEXT_PTR, NEXT_RES_PTR + | mov qword [NEXT_PTR+qword*1], LJ_TNIL + | ret + |.endif + | + |//----------------------------------------------------------------------- + |//-- Assertions --------------------------------------------------------- + |//----------------------------------------------------------------------- + | + |->assert_bad_for_arg_type: +#ifdef LUA_USE_ASSERT + | int3 +#endif + | int3 + | + |//----------------------------------------------------------------------- + |//-- FFI helper functions ----------------------------------------------- + |//----------------------------------------------------------------------- + | + |// Handler for callback functions. Callback slot number in ah/al. + |->vm_ffi_callback: + |.if FFI + |.type CTSTATE, CTState, PC + | saveregs_ // ebp/rbp already saved. ebp now holds global_State *. + | lea DISPATCH, [ebp+GG_G2DISP] + | mov CTSTATE, GL:ebp->ctype_state + | movzx eax, ax + | mov CTSTATE->cb.slot, eax + | mov CTSTATE->cb.gpr[0], CARG1 + | mov CTSTATE->cb.gpr[1], CARG2 + | mov CTSTATE->cb.gpr[2], CARG3 + | mov CTSTATE->cb.gpr[3], CARG4 + | movsd qword CTSTATE->cb.fpr[0], xmm0 + | movsd qword CTSTATE->cb.fpr[1], xmm1 + | movsd qword CTSTATE->cb.fpr[2], xmm2 + | movsd qword CTSTATE->cb.fpr[3], xmm3 + |.if X64WIN + | lea rax, [rsp+CFRAME_SIZE+4*8] + |.else + | lea rax, [rsp+CFRAME_SIZE] + | mov CTSTATE->cb.gpr[4], CARG5 + | mov CTSTATE->cb.gpr[5], CARG6 + | movsd qword CTSTATE->cb.fpr[4], xmm4 + | movsd qword CTSTATE->cb.fpr[5], xmm5 + | movsd qword CTSTATE->cb.fpr[6], xmm6 + | movsd qword CTSTATE->cb.fpr[7], xmm7 + |.endif + | mov CTSTATE->cb.stack, rax + | mov CARG2, rsp + | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok. + | mov CARG1, CTSTATE + | call extern lj_ccallback_enter // (CTState *cts, void *cf) + | // lua_State * returned in eax (RD). + | set_vmstate INTERP + | mov BASE, L:RD->base + | mov RD, L:RD->top + | sub RD, BASE + | mov LFUNC:RB, [BASE-16] + | cleartp LFUNC:RB + | shr RD, 3 + | add RD, 1 + | ins_callt + |.endif + | + |->cont_ffi_callback: // Return from FFI callback. + |.if FFI + | mov L:RA, SAVE_L + | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] + | mov aword CTSTATE->L, L:RA + | mov L:RA->base, BASE + | mov L:RA->top, RB + | mov CARG1, CTSTATE + | mov CARG2, RC + | call extern lj_ccallback_leave // (CTState *cts, TValue *o) + | mov rax, CTSTATE->cb.gpr[0] + | movsd xmm0, qword CTSTATE->cb.fpr[0] + | jmp ->vm_leave_unw + |.endif + | + |->vm_ffi_call: // Call C function via FFI. + | // Caveat: needs special frame unwinding, see below. + |.if FFI + | .type CCSTATE, CCallState, rbx + | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 + | + | // Readjust stack. + | mov eax, CCSTATE->spadj + | sub rsp, rax + | + | // Copy stack slots. + | movzx ecx, byte CCSTATE->nsp + | sub ecx, 1 + | js >2 + |1: + | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] + | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax + | sub ecx, 1 + | jns <1 + |2: + | + | movzx eax, byte CCSTATE->nfpr + | mov CARG1, CCSTATE->gpr[0] + | mov CARG2, CCSTATE->gpr[1] + | mov CARG3, CCSTATE->gpr[2] + | mov CARG4, CCSTATE->gpr[3] + |.if not X64WIN + | mov CARG5, CCSTATE->gpr[4] + | mov CARG6, CCSTATE->gpr[5] + |.endif + | test eax, eax; jz >5 + | movaps xmm0, CCSTATE->fpr[0] + | movaps xmm1, CCSTATE->fpr[1] + | movaps xmm2, CCSTATE->fpr[2] + | movaps xmm3, CCSTATE->fpr[3] + |.if not X64WIN + | cmp eax, 4; jbe >5 + | movaps xmm4, CCSTATE->fpr[4] + | movaps xmm5, CCSTATE->fpr[5] + | movaps xmm6, CCSTATE->fpr[6] + | movaps xmm7, CCSTATE->fpr[7] + |.endif + |5: + | + | call aword CCSTATE->func + | + | mov CCSTATE->gpr[0], rax + | movaps CCSTATE->fpr[0], xmm0 + |.if not X64WIN + | mov CCSTATE->gpr[1], rdx + | movaps CCSTATE->fpr[1], xmm1 + |.endif + | + | mov rbx, [rbp-8]; leave; ret + |.endif + |// Note: vm_ffi_call must be the last function in this object file! + | + |//----------------------------------------------------------------------- +} + +/* Generate the code for a single instruction. */ +static void build_ins(BuildCtx *ctx, BCOp op, int defop) +{ + int vk = 0; + |// Note: aligning all instructions does not pay off. + |=>defop: + + switch (op) { + + /* -- Comparison ops ---------------------------------------------------- */ + + /* Remember: all ops branch for a true comparison, fall through otherwise. */ + + |.macro jmp_comp, lt, ge, le, gt, target + ||switch (op) { + ||case BC_ISLT: + | lt target + ||break; + ||case BC_ISGE: + | ge target + ||break; + ||case BC_ISLE: + | le target + ||break; + ||case BC_ISGT: + | gt target + ||break; + ||default: break; /* Shut up GCC. */ + ||} + |.endmacro + + case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: + | // RA = src1, RD = src2, JMP with RD = target + | ins_AD + | mov ITYPE, [BASE+RA*8] + | mov RB, [BASE+RD*8] + | mov RA, ITYPE + | mov RD, RB + | sar ITYPE, 47 + | sar RB, 47 + |.if DUALNUM + | cmp ITYPEd, LJ_TISNUM; jne >7 + | cmp RBd, LJ_TISNUM; jne >8 + | add PC, 4 + | cmp RAd, RDd + | jmp_comp jge, jl, jg, jle, >9 + |6: + | movzx RDd, PC_RD + | branchPC RD + |9: + | ins_next + | + |7: // RA is not an integer. + | ja ->vmeta_comp + | // RA is a number. + | cmp RBd, LJ_TISNUM; jb >1; jne ->vmeta_comp + | // RA is a number, RD is an integer. + | cvtsi2sd xmm0, RDd + | jmp >2 + | + |8: // RA is an integer, RD is not an integer. + | ja ->vmeta_comp + | // RA is an integer, RD is a number. + | cvtsi2sd xmm1, RAd + | movd xmm0, RD + | jmp >3 + |.else + | cmp ITYPEd, LJ_TISNUM; jae ->vmeta_comp + | cmp RBd, LJ_TISNUM; jae ->vmeta_comp + |.endif + |1: + | movd xmm0, RD + |2: + | movd xmm1, RA + |3: + | add PC, 4 + | ucomisd xmm0, xmm1 + | // Unordered: all of ZF CF PF set, ordered: PF clear. + | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. + |.if DUALNUM + | jmp_comp jbe, ja, jb, jae, <9 + | jmp <6 + |.else + | jmp_comp jbe, ja, jb, jae, >1 + | movzx RDd, PC_RD + | branchPC RD + |1: + | ins_next + |.endif + break; + + case BC_ISEQV: case BC_ISNEV: + vk = op == BC_ISEQV; + | ins_AD // RA = src1, RD = src2, JMP with RD = target + | mov RB, [BASE+RD*8] + | mov ITYPE, [BASE+RA*8] + | add PC, 4 + | mov RD, RB + | mov RA, ITYPE + | sar RB, 47 + | sar ITYPE, 47 + |.if DUALNUM + | cmp RBd, LJ_TISNUM; jne >7 + | cmp ITYPEd, LJ_TISNUM; jne >8 + | cmp RDd, RAd + if (vk) { + | jne >9 + } else { + | je >9 + } + | movzx RDd, PC_RD + | branchPC RD + |9: + | ins_next + | + |7: // RD is not an integer. + | ja >5 + | // RD is a number. + | movd xmm1, RD + | cmp ITYPEd, LJ_TISNUM; jb >1; jne >5 + | // RD is a number, RA is an integer. + | cvtsi2sd xmm0, RAd + | jmp >2 + | + |8: // RD is an integer, RA is not an integer. + | ja >5 + | // RD is an integer, RA is a number. + | cvtsi2sd xmm1, RDd + | jmp >1 + | + |.else + | cmp RBd, LJ_TISNUM; jae >5 + | cmp ITYPEd, LJ_TISNUM; jae >5 + | movd xmm1, RD + |.endif + |1: + | movd xmm0, RA + |2: + | ucomisd xmm0, xmm1 + |4: + iseqne_fp: + if (vk) { + | jp >2 // Unordered means not equal. + | jne >2 + } else { + | jp >2 // Unordered means not equal. + | je >1 + } + iseqne_end: + if (vk) { + |1: // EQ: Branch to the target. + | movzx RDd, PC_RD + | branchPC RD + |2: // NE: Fallthrough to next instruction. + |.if not FFI + |3: + |.endif + } else { + |.if not FFI + |3: + |.endif + |2: // NE: Branch to the target. + | movzx RDd, PC_RD + | branchPC RD + |1: // EQ: Fallthrough to next instruction. + } + if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV || + op == BC_ISEQN || op == BC_ISNEN)) { + | jmp <9 + } else { + | ins_next + } + | + if (op == BC_ISEQV || op == BC_ISNEV) { + |5: // Either or both types are not numbers. + |.if FFI + | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd + | cmp ITYPEd, LJ_TCDATA; je ->vmeta_equal_cd + |.endif + | cmp RA, RD + | je <1 // Same GCobjs or pvalues? + | cmp RBd, ITYPEd + | jne <2 // Not the same type? + | cmp RBd, LJ_TISTABUD + | ja <2 // Different objects and not table/ud? + | + | // Different tables or userdatas. Need to check __eq metamethod. + | // Field metatable must be at same offset for GCtab and GCudata! + | cleartp TAB:RA + | mov TAB:RB, TAB:RA->metatable + | test TAB:RB, TAB:RB + | jz <2 // No metatable? + | test byte TAB:RB->nomm, 1<<MM_eq + | jnz <2 // Or 'no __eq' flag set? + if (vk) { + | xor RBd, RBd // ne = 0 + } else { + | mov RBd, 1 // ne = 1 + } + | jmp ->vmeta_equal // Handle __eq metamethod. + } else { + |.if FFI + |3: + | cmp ITYPEd, LJ_TCDATA + if (LJ_DUALNUM && vk) { + | jne <9 + } else { + | jne <2 + } + | jmp ->vmeta_equal_cd + |.endif + } + break; + case BC_ISEQS: case BC_ISNES: + vk = op == BC_ISEQS; + | ins_AND // RA = src, RD = str const, JMP with RD = target + | mov RB, [BASE+RA*8] + | add PC, 4 + | checkstr RB, >3 + | cmp RB, [KBASE+RD*8] + iseqne_test: + if (vk) { + | jne >2 + } else { + | je >1 + } + goto iseqne_end; + case BC_ISEQN: case BC_ISNEN: + vk = op == BC_ISEQN; + | ins_AD // RA = src, RD = num const, JMP with RD = target + | mov RB, [BASE+RA*8] + | add PC, 4 + |.if DUALNUM + | checkint RB, >7 + | mov RD, [KBASE+RD*8] + | checkint RD, >8 + | cmp RBd, RDd + if (vk) { + | jne >9 + } else { + | je >9 + } + | movzx RDd, PC_RD + | branchPC RD + |9: + | ins_next + | + |7: // RA is not an integer. + | ja >3 + | // RA is a number. + | mov RD, [KBASE+RD*8] + | checkint RD, >1 + | // RA is a number, RD is an integer. + | cvtsi2sd xmm0, RDd + | jmp >2 + | + |8: // RA is an integer, RD is a number. + | cvtsi2sd xmm0, RBd + | movd xmm1, RD + | ucomisd xmm0, xmm1 + | jmp >4 + |1: + | movd xmm0, RD + |.else + | checknum RB, >3 + |1: + | movsd xmm0, qword [KBASE+RD*8] + |.endif + |2: + | ucomisd xmm0, qword [BASE+RA*8] + |4: + goto iseqne_fp; + case BC_ISEQP: case BC_ISNEP: + vk = op == BC_ISEQP; + | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target + | mov RB, [BASE+RA*8] + | sar RB, 47 + | add PC, 4 + | cmp RBd, RDd + if (!LJ_HASFFI) goto iseqne_test; + if (vk) { + | jne >3 + | movzx RDd, PC_RD + | branchPC RD + |2: + | ins_next + |3: + | cmp RBd, LJ_TCDATA; jne <2 + | jmp ->vmeta_equal_cd + } else { + | je >2 + | cmp RBd, LJ_TCDATA; je ->vmeta_equal_cd + | movzx RDd, PC_RD + | branchPC RD + |2: + | ins_next + } + break; + + /* -- Unary test and copy ops ------------------------------------------- */ + + case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: + | ins_AD // RA = dst or unused, RD = src, JMP with RD = target + | mov ITYPE, [BASE+RD*8] + | add PC, 4 + if (op == BC_ISTC || op == BC_ISFC) { + | mov RB, ITYPE + } + | sar ITYPE, 47 + | cmp ITYPEd, LJ_TISTRUECOND + if (op == BC_IST || op == BC_ISTC) { + | jae >1 + } else { + | jb >1 + } + if (op == BC_ISTC || op == BC_ISFC) { + | mov [BASE+RA*8], RB + } + | movzx RDd, PC_RD + | branchPC RD + |1: // Fallthrough to the next instruction. + | ins_next + break; + + case BC_ISTYPE: + | ins_AD // RA = src, RD = -type + | mov RB, [BASE+RA*8] + | sar RB, 47 + | add RBd, RDd + | jne ->vmeta_istype + | ins_next + break; + case BC_ISNUM: + | ins_AD // RA = src, RD = -(TISNUM-1) + | checknumtp [BASE+RA*8], ->vmeta_istype + | ins_next + break; + + /* -- Unary ops --------------------------------------------------------- */ + + case BC_MOV: + | ins_AD // RA = dst, RD = src + | mov RB, [BASE+RD*8] + | mov [BASE+RA*8], RB + | ins_next_ + break; + case BC_NOT: + | ins_AD // RA = dst, RD = src + | mov RB, [BASE+RD*8] + | sar RB, 47 + | mov RCd, 2 + | cmp RB, LJ_TISTRUECOND + | sbb RCd, 0 + | shl RC, 47 + | not RC + | mov [BASE+RA*8], RC + | ins_next + break; + case BC_UNM: + | ins_AD // RA = dst, RD = src + | mov RB, [BASE+RD*8] + |.if DUALNUM + | checkint RB, >5 + | neg RBd + | jo >4 + | setint RB + |9: + | mov [BASE+RA*8], RB + | ins_next + |4: + | mov64 RB, U64x(41e00000,00000000) // 2^31. + | jmp <9 + |5: + | ja ->vmeta_unm + |.else + | checknum RB, ->vmeta_unm + |.endif + | mov64 RD, U64x(80000000,00000000) + | xor RB, RD + |.if DUALNUM + | jmp <9 + |.else + | mov [BASE+RA*8], RB + | ins_next + |.endif + break; + case BC_LEN: + | ins_AD // RA = dst, RD = src + | mov RD, [BASE+RD*8] + | checkstr RD, >2 + |.if DUALNUM + | mov RDd, dword STR:RD->len + |1: + | setint RD + | mov [BASE+RA*8], RD + |.else + | xorps xmm0, xmm0 + | cvtsi2sd xmm0, dword STR:RD->len + |1: + | movsd qword [BASE+RA*8], xmm0 + |.endif + | ins_next + |2: + | cmp ITYPEd, LJ_TTAB; jne ->vmeta_len + | mov TAB:CARG1, TAB:RD +#if LJ_52 + | mov TAB:RB, TAB:RD->metatable + | cmp TAB:RB, 0 + | jnz >9 + |3: +#endif + |->BC_LEN_Z: + | mov RB, BASE // Save BASE. + | call extern lj_tab_len // (GCtab *t) + | // Length of table returned in eax (RD). + |.if DUALNUM + | // Nothing to do. + |.else + | cvtsi2sd xmm0, RDd + |.endif + | mov BASE, RB // Restore BASE. + | movzx RAd, PC_RA + | jmp <1 +#if LJ_52 + |9: // Check for __len. + | test byte TAB:RB->nomm, 1<<MM_len + | jnz <3 + | jmp ->vmeta_len // 'no __len' flag NOT set: check. +#endif + break; + + /* -- Binary ops -------------------------------------------------------- */ + + |.macro ins_arithpre, sseins, ssereg + | ins_ABC + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||switch (vk) { + ||case 0: + | checknumtp [BASE+RB*8], ->vmeta_arith_vn + | .if DUALNUM + | checknumtp [KBASE+RC*8], ->vmeta_arith_vn + | .endif + | movsd xmm0, qword [BASE+RB*8] + | sseins ssereg, qword [KBASE+RC*8] + || break; + ||case 1: + | checknumtp [BASE+RB*8], ->vmeta_arith_nv + | .if DUALNUM + | checknumtp [KBASE+RC*8], ->vmeta_arith_nv + | .endif + | movsd xmm0, qword [KBASE+RC*8] + | sseins ssereg, qword [BASE+RB*8] + || break; + ||default: + | checknumtp [BASE+RB*8], ->vmeta_arith_vv + | checknumtp [BASE+RC*8], ->vmeta_arith_vv + | movsd xmm0, qword [BASE+RB*8] + | sseins ssereg, qword [BASE+RC*8] + || break; + ||} + |.endmacro + | + |.macro ins_arithdn, intins + | ins_ABC + ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); + ||switch (vk) { + ||case 0: + | mov RB, [BASE+RB*8] + | mov RC, [KBASE+RC*8] + | checkint RB, ->vmeta_arith_vno + | checkint RC, ->vmeta_arith_vno + | intins RBd, RCd; jo ->vmeta_arith_vno + || break; + ||case 1: + | mov RB, [BASE+RB*8] + | mov RC, [KBASE+RC*8] + | checkint RB, ->vmeta_arith_nvo + | checkint RC, ->vmeta_arith_nvo + | intins RCd, RBd; jo ->vmeta_arith_nvo + || break; + ||default: + | mov RB, [BASE+RB*8] + | mov RC, [BASE+RC*8] + | checkint RB, ->vmeta_arith_vvo + | checkint RC, ->vmeta_arith_vvo + | intins RBd, RCd; jo ->vmeta_arith_vvo + || break; + ||} + ||if (vk == 1) { + | setint RC + | mov [BASE+RA*8], RC + ||} else { + | setint RB + | mov [BASE+RA*8], RB + ||} + | ins_next + |.endmacro + | + |.macro ins_arithpost + | movsd qword [BASE+RA*8], xmm0 + |.endmacro + | + |.macro ins_arith, sseins + | ins_arithpre sseins, xmm0 + | ins_arithpost + | ins_next + |.endmacro + | + |.macro ins_arith, intins, sseins + |.if DUALNUM + | ins_arithdn intins + |.else + | ins_arith, sseins + |.endif + |.endmacro + + | // RA = dst, RB = src1 or num const, RC = src2 or num const + case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: + | ins_arith add, addsd + break; + case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: + | ins_arith sub, subsd + break; + case BC_MULVN: case BC_MULNV: case BC_MULVV: + | ins_arith imul, mulsd + break; + case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: + | ins_arith divsd + break; + case BC_MODVN: + | ins_arithpre movsd, xmm1 + |->BC_MODVN_Z: + | call ->vm_mod + | ins_arithpost + | ins_next + break; + case BC_MODNV: case BC_MODVV: + | ins_arithpre movsd, xmm1 + | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. + break; + case BC_POW: + | ins_arithpre movsd, xmm1 + | mov RB, BASE + | call extern pow + | movzx RAd, PC_RA + | mov BASE, RB + | ins_arithpost + | ins_next + break; + + case BC_CAT: + | ins_ABC // RA = dst, RB = src_start, RC = src_end + | mov L:CARG1, SAVE_L + | mov L:CARG1->base, BASE + | lea CARG2, [BASE+RC*8] + | mov CARG3d, RCd + | sub CARG3d, RBd + |->BC_CAT_Z: + | mov L:RB, L:CARG1 + | mov SAVE_PC, PC + | call extern lj_meta_cat // (lua_State *L, TValue *top, int left) + | // NULL (finished) or TValue * (metamethod) returned in eax (RC). + | mov BASE, L:RB->base + | test RC, RC + | jnz ->vmeta_binop + | movzx RBd, PC_RB // Copy result to Stk[RA] from Stk[RB]. + | movzx RAd, PC_RA + | mov RC, [BASE+RB*8] + | mov [BASE+RA*8], RC + | ins_next + break; + + /* -- Constant ops ------------------------------------------------------ */ + + case BC_KSTR: + | ins_AND // RA = dst, RD = str const (~) + | mov RD, [KBASE+RD*8] + | settp RD, LJ_TSTR + | mov [BASE+RA*8], RD + | ins_next + break; + case BC_KCDATA: + |.if FFI + | ins_AND // RA = dst, RD = cdata const (~) + | mov RD, [KBASE+RD*8] + | settp RD, LJ_TCDATA + | mov [BASE+RA*8], RD + | ins_next + |.endif + break; + case BC_KSHORT: + | ins_AD // RA = dst, RD = signed int16 literal + |.if DUALNUM + | movsx RDd, RDW + | setint RD + | mov [BASE+RA*8], RD + |.else + | movsx RDd, RDW // Sign-extend literal. + | cvtsi2sd xmm0, RDd + | movsd qword [BASE+RA*8], xmm0 + |.endif + | ins_next + break; + case BC_KNUM: + | ins_AD // RA = dst, RD = num const + | movsd xmm0, qword [KBASE+RD*8] + | movsd qword [BASE+RA*8], xmm0 + | ins_next + break; + case BC_KPRI: + | ins_AD // RA = dst, RD = primitive type (~) + | shl RD, 47 + | not RD + | mov [BASE+RA*8], RD + | ins_next + break; + case BC_KNIL: + | ins_AD // RA = dst_start, RD = dst_end + | lea RA, [BASE+RA*8+8] + | lea RD, [BASE+RD*8] + | mov RB, LJ_TNIL + | mov [RA-8], RB // Sets minimum 2 slots. + |1: + | mov [RA], RB + | add RA, 8 + | cmp RA, RD + | jbe <1 + | ins_next + break; + + /* -- Upvalue and function ops ------------------------------------------ */ + + case BC_UGET: + | ins_AD // RA = dst, RD = upvalue # + | mov LFUNC:RB, [BASE-16] + | cleartp LFUNC:RB + | mov UPVAL:RB, [LFUNC:RB+RD*8+offsetof(GCfuncL, uvptr)] + | mov RB, UPVAL:RB->v + | mov RD, [RB] + | mov [BASE+RA*8], RD + | ins_next + break; + case BC_USETV: +#define TV2MARKOFS \ + ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) + | ins_AD // RA = upvalue #, RD = src + | mov LFUNC:RB, [BASE-16] + | cleartp LFUNC:RB + | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] + | cmp byte UPVAL:RB->closed, 0 + | mov RB, UPVAL:RB->v + | mov RA, [BASE+RD*8] + | mov [RB], RA + | jz >1 + | // Check barrier for closed upvalue. + | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv) + | jnz >2 + |1: + | ins_next + | + |2: // Upvalue is black. Check if new value is collectable and white. + | mov RD, RA + | sar RD, 47 + | sub RDd, LJ_TISGCV + | cmp RDd, LJ_TNUMX - LJ_TISGCV // tvisgcv(v) + | jbe <1 + | cleartp GCOBJ:RA + | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) + | jz <1 + | // Crossed a write barrier. Move the barrier forward. + |.if not X64WIN + | mov CARG2, RB + | mov RB, BASE // Save BASE. + |.else + | xchg CARG2, RB // Save BASE (CARG2 == BASE). + |.endif + | lea GL:CARG1, [DISPATCH+GG_DISP2G] + | call extern lj_gc_barrieruv // (global_State *g, TValue *tv) + | mov BASE, RB // Restore BASE. + | jmp <1 + break; +#undef TV2MARKOFS + case BC_USETS: + | ins_AND // RA = upvalue #, RD = str const (~) + | mov LFUNC:RB, [BASE-16] + | cleartp LFUNC:RB + | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] + | mov STR:RA, [KBASE+RD*8] + | mov RD, UPVAL:RB->v + | settp STR:ITYPE, STR:RA, LJ_TSTR + | mov [RD], STR:ITYPE + | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) + | jnz >2 + |1: + | ins_next + | + |2: // Check if string is white and ensure upvalue is closed. + | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) + | jz <1 + | cmp byte UPVAL:RB->closed, 0 + | jz <1 + | // Crossed a write barrier. Move the barrier forward. + | mov RB, BASE // Save BASE (CARG2 == BASE). + | mov CARG2, RD + | lea GL:CARG1, [DISPATCH+GG_DISP2G] + | call extern lj_gc_barrieruv // (global_State *g, TValue *tv) + | mov BASE, RB // Restore BASE. + | jmp <1 + break; + case BC_USETN: + | ins_AD // RA = upvalue #, RD = num const + | mov LFUNC:RB, [BASE-16] + | cleartp LFUNC:RB + | movsd xmm0, qword [KBASE+RD*8] + | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] + | mov RA, UPVAL:RB->v + | movsd qword [RA], xmm0 + | ins_next + break; + case BC_USETP: + | ins_AD // RA = upvalue #, RD = primitive type (~) + | mov LFUNC:RB, [BASE-16] + | cleartp LFUNC:RB + | mov UPVAL:RB, [LFUNC:RB+RA*8+offsetof(GCfuncL, uvptr)] + | shl RD, 47 + | not RD + | mov RA, UPVAL:RB->v + | mov [RA], RD + | ins_next + break; + case BC_UCLO: + | ins_AD // RA = level, RD = target + | branchPC RD // Do this first to free RD. + | mov L:RB, SAVE_L + | cmp aword L:RB->openupval, 0 + | je >1 + | mov L:RB->base, BASE + | lea CARG2, [BASE+RA*8] // Caveat: CARG2 == BASE + | mov L:CARG1, L:RB // Caveat: CARG1 == RA + | call extern lj_func_closeuv // (lua_State *L, TValue *level) + | mov BASE, L:RB->base + |1: + | ins_next + break; + + case BC_FNEW: + | ins_AND // RA = dst, RD = proto const (~) (holding function prototype) + | mov L:RB, SAVE_L + | mov L:RB->base, BASE // Caveat: CARG2/CARG3 may be BASE. + | mov CARG3, [BASE-16] + | cleartp CARG3 + | mov CARG2, [KBASE+RD*8] // Fetch GCproto *. + | mov CARG1, L:RB + | mov SAVE_PC, PC + | // (lua_State *L, GCproto *pt, GCfuncL *parent) + | call extern lj_func_newL_gc + | // GCfuncL * returned in eax (RC). + | mov BASE, L:RB->base + | movzx RAd, PC_RA + | settp LFUNC:RC, LJ_TFUNC + | mov [BASE+RA*8], LFUNC:RC + | ins_next + break; + + /* -- Table ops --------------------------------------------------------- */ + + case BC_TNEW: + | ins_AD // RA = dst, RD = hbits|asize + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] + | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] + | mov SAVE_PC, PC + | jae >5 + |1: + | mov CARG3d, RDd + | and RDd, 0x7ff + | shr CARG3d, 11 + | cmp RDd, 0x7ff + | je >3 + |2: + | mov L:CARG1, L:RB + | mov CARG2d, RDd + | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) + | // Table * returned in eax (RC). + | mov BASE, L:RB->base + | movzx RAd, PC_RA + | settp TAB:RC, LJ_TTAB + | mov [BASE+RA*8], TAB:RC + | ins_next + |3: // Turn 0x7ff into 0x801. + | mov RDd, 0x801 + | jmp <2 + |5: + | mov L:CARG1, L:RB + | call extern lj_gc_step_fixtop // (lua_State *L) + | movzx RDd, PC_RD + | jmp <1 + break; + case BC_TDUP: + | ins_AND // RA = dst, RD = table const (~) (holding template table) + | mov L:RB, SAVE_L + | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] + | mov SAVE_PC, PC + | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] + | mov L:RB->base, BASE + | jae >3 + |2: + | mov TAB:CARG2, [KBASE+RD*8] // Caveat: CARG2 == BASE + | mov L:CARG1, L:RB // Caveat: CARG1 == RA + | call extern lj_tab_dup // (lua_State *L, Table *kt) + | // Table * returned in eax (RC). + | mov BASE, L:RB->base + | movzx RAd, PC_RA + | settp TAB:RC, LJ_TTAB + | mov [BASE+RA*8], TAB:RC + | ins_next + |3: + | mov L:CARG1, L:RB + | call extern lj_gc_step_fixtop // (lua_State *L) + | movzx RDd, PC_RD // Need to reload RD. + | not RD + | jmp <2 + break; + + case BC_GGET: + | ins_AND // RA = dst, RD = str const (~) + | mov LFUNC:RB, [BASE-16] + | cleartp LFUNC:RB + | mov TAB:RB, LFUNC:RB->env + | mov STR:RC, [KBASE+RD*8] + | jmp ->BC_TGETS_Z + break; + case BC_GSET: + | ins_AND // RA = src, RD = str const (~) + | mov LFUNC:RB, [BASE-16] + | cleartp LFUNC:RB + | mov TAB:RB, LFUNC:RB->env + | mov STR:RC, [KBASE+RD*8] + | jmp ->BC_TSETS_Z + break; + + case BC_TGETV: + | ins_ABC // RA = dst, RB = table, RC = key + | mov TAB:RB, [BASE+RB*8] + | mov RC, [BASE+RC*8] + | checktab TAB:RB, ->vmeta_tgetv + | + | // Integer key? + |.if DUALNUM + | checkint RC, >5 + |.else + | // Convert number to int and back and compare. + | checknum RC, >5 + | movd xmm0, RC + | cvttsd2si RCd, xmm0 + | cvtsi2sd xmm1, RCd + | ucomisd xmm0, xmm1 + | jne ->vmeta_tgetv // Generic numeric key? Use fallback. + |.endif + | cmp RCd, TAB:RB->asize // Takes care of unordered, too. + | jae ->vmeta_tgetv // Not in array part? Use fallback. + | shl RCd, 3 + | add RC, TAB:RB->array + | // Get array slot. + | mov ITYPE, [RC] + | cmp ITYPE, LJ_TNIL // Avoid overwriting RB in fastpath. + | je >2 + |1: + | mov [BASE+RA*8], ITYPE + | ins_next + | + |2: // Check for __index if table value is nil. + | mov TAB:TMPR, TAB:RB->metatable + | test TAB:TMPR, TAB:TMPR + | jz <1 + | test byte TAB:TMPR->nomm, 1<<MM_index + | jz ->vmeta_tgetv // 'no __index' flag NOT set: check. + | jmp <1 + | + |5: // String key? + | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tgetv + | cleartp STR:RC + | jmp ->BC_TGETS_Z + break; + case BC_TGETS: + | ins_ABC // RA = dst, RB = table, RC = str const (~) + | mov TAB:RB, [BASE+RB*8] + | not RC + | mov STR:RC, [KBASE+RC*8] + | checktab TAB:RB, ->vmeta_tgets + |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr * + | mov TMPRd, TAB:RB->hmask + | and TMPRd, STR:RC->sid + | imul TMPRd, #NODE + | add NODE:TMPR, TAB:RB->node + | settp ITYPE, STR:RC, LJ_TSTR + |1: + | cmp NODE:TMPR->key, ITYPE + | jne >4 + | // Get node value. + | mov ITYPE, NODE:TMPR->val + | cmp ITYPE, LJ_TNIL + | je >5 // Key found, but nil value? + |2: + | mov [BASE+RA*8], ITYPE + | ins_next + | + |4: // Follow hash chain. + | mov NODE:TMPR, NODE:TMPR->next + | test NODE:TMPR, NODE:TMPR + | jnz <1 + | // End of hash chain: key not found, nil result. + | mov ITYPE, LJ_TNIL + | + |5: // Check for __index if table value is nil. + | mov TAB:TMPR, TAB:RB->metatable + | test TAB:TMPR, TAB:TMPR + | jz <2 // No metatable: done. + | test byte TAB:TMPR->nomm, 1<<MM_index + | jnz <2 // 'no __index' flag set: done. + | jmp ->vmeta_tgets // Caveat: preserve STR:RC. + break; + case BC_TGETB: + | ins_ABC // RA = dst, RB = table, RC = byte literal + | mov TAB:RB, [BASE+RB*8] + | checktab TAB:RB, ->vmeta_tgetb + | cmp RCd, TAB:RB->asize + | jae ->vmeta_tgetb + | shl RCd, 3 + | add RC, TAB:RB->array + | // Get array slot. + | mov ITYPE, [RC] + | cmp ITYPE, LJ_TNIL + | je >2 + |1: + | mov [BASE+RA*8], ITYPE + | ins_next + | + |2: // Check for __index if table value is nil. + | mov TAB:TMPR, TAB:RB->metatable + | test TAB:TMPR, TAB:TMPR + | jz <1 + | test byte TAB:TMPR->nomm, 1<<MM_index + | jz ->vmeta_tgetb // 'no __index' flag NOT set: check. + | jmp <1 + break; + case BC_TGETR: + | ins_ABC // RA = dst, RB = table, RC = key + | mov TAB:RB, [BASE+RB*8] + | cleartp TAB:RB + |.if DUALNUM + | mov RCd, dword [BASE+RC*8] + |.else + | cvttsd2si RCd, qword [BASE+RC*8] + |.endif + | cmp RCd, TAB:RB->asize + | jae ->vmeta_tgetr // Not in array part? Use fallback. + | shl RCd, 3 + | add RC, TAB:RB->array + | // Get array slot. + |->BC_TGETR_Z: + | mov ITYPE, [RC] + |->BC_TGETR2_Z: + | mov [BASE+RA*8], ITYPE + | ins_next + break; + + case BC_TSETV: + | ins_ABC // RA = src, RB = table, RC = key + | mov TAB:RB, [BASE+RB*8] + | mov RC, [BASE+RC*8] + | checktab TAB:RB, ->vmeta_tsetv + | + | // Integer key? + |.if DUALNUM + | checkint RC, >5 + |.else + | // Convert number to int and back and compare. + | checknum RC, >5 + | movd xmm0, RC + | cvttsd2si RCd, xmm0 + | cvtsi2sd xmm1, RCd + | ucomisd xmm0, xmm1 + | jne ->vmeta_tsetv // Generic numeric key? Use fallback. + |.endif + | cmp RCd, TAB:RB->asize // Takes care of unordered, too. + | jae ->vmeta_tsetv + | shl RCd, 3 + | add RC, TAB:RB->array + | cmp aword [RC], LJ_TNIL + | je >3 // Previous value is nil? + |1: + | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jnz >7 + |2: // Set array slot. + | mov RB, [BASE+RA*8] + | mov [RC], RB + | ins_next + | + |3: // Check for __newindex if previous value is nil. + | mov TAB:TMPR, TAB:RB->metatable + | test TAB:TMPR, TAB:TMPR + | jz <1 + | test byte TAB:TMPR->nomm, 1<<MM_newindex + | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check. + | jmp <1 + | + |5: // String key? + | cmp ITYPEd, LJ_TSTR; jne ->vmeta_tsetv + | cleartp STR:RC + | jmp ->BC_TSETS_Z + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMPR + | jmp <2 + break; + case BC_TSETS: + | ins_ABC // RA = src, RB = table, RC = str const (~) + | mov TAB:RB, [BASE+RB*8] + | not RC + | mov STR:RC, [KBASE+RC*8] + | checktab TAB:RB, ->vmeta_tsets + |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr * + | mov TMPRd, TAB:RB->hmask + | and TMPRd, STR:RC->sid + | imul TMPRd, #NODE + | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. + | add NODE:TMPR, TAB:RB->node + | settp ITYPE, STR:RC, LJ_TSTR + |1: + | cmp NODE:TMPR->key, ITYPE + | jne >5 + | // Ok, key found. Assumes: offsetof(Node, val) == 0 + | cmp aword [TMPR], LJ_TNIL + | je >4 // Previous value is nil? + |2: + | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jnz >7 + |3: // Set node value. + | mov ITYPE, [BASE+RA*8] + | mov [TMPR], ITYPE + | ins_next + | + |4: // Check for __newindex if previous value is nil. + | mov TAB:ITYPE, TAB:RB->metatable + | test TAB:ITYPE, TAB:ITYPE + | jz <2 + | test byte TAB:ITYPE->nomm, 1<<MM_newindex + | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. + | jmp <2 + | + |5: // Follow hash chain. + | mov NODE:TMPR, NODE:TMPR->next + | test NODE:TMPR, NODE:TMPR + | jnz <1 + | // End of hash chain: key not found, add a new one. + | + | // But check for __newindex first. + | mov TAB:TMPR, TAB:RB->metatable + | test TAB:TMPR, TAB:TMPR + | jz >6 // No metatable: continue. + | test byte TAB:TMPR->nomm, 1<<MM_newindex + | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. + |6: + | mov TMP1, ITYPE + | mov L:CARG1, SAVE_L + | mov L:CARG1->base, BASE + | lea CARG3, TMP1 + | mov CARG2, TAB:RB + | mov SAVE_PC, PC + | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) + | // Handles write barrier for the new key. TValue * returned in eax (RC). + | mov L:CARG1, SAVE_L + | mov BASE, L:CARG1->base + | mov TMPR, rax + | movzx RAd, PC_RA + | jmp <2 // Must check write barrier for value. + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, ITYPE + | jmp <3 + break; + case BC_TSETB: + | ins_ABC // RA = src, RB = table, RC = byte literal + | mov TAB:RB, [BASE+RB*8] + | checktab TAB:RB, ->vmeta_tsetb + | cmp RCd, TAB:RB->asize + | jae ->vmeta_tsetb + | shl RCd, 3 + | add RC, TAB:RB->array + | cmp aword [RC], LJ_TNIL + | je >3 // Previous value is nil? + |1: + | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jnz >7 + |2: // Set array slot. + | mov ITYPE, [BASE+RA*8] + | mov [RC], ITYPE + | ins_next + | + |3: // Check for __newindex if previous value is nil. + | mov TAB:TMPR, TAB:RB->metatable + | test TAB:TMPR, TAB:TMPR + | jz <1 + | test byte TAB:TMPR->nomm, 1<<MM_newindex + | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check. + | jmp <1 + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMPR + | jmp <2 + break; + case BC_TSETR: + | ins_ABC // RA = src, RB = table, RC = key + | mov TAB:RB, [BASE+RB*8] + | cleartp TAB:RB + |.if DUALNUM + | mov RC, [BASE+RC*8] + |.else + | cvttsd2si RCd, qword [BASE+RC*8] + |.endif + | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jnz >7 + |2: + | cmp RCd, TAB:RB->asize + | jae ->vmeta_tsetr + | shl RCd, 3 + | add RC, TAB:RB->array + | // Set array slot. + |->BC_TSETR_Z: + | mov ITYPE, [BASE+RA*8] + | mov [RC], ITYPE + | ins_next + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, TMPR + | jmp <2 + break; + + case BC_TSETM: + | ins_AD // RA = base (table at base-1), RD = num const (start index) + |1: + | mov TMPRd, dword [KBASE+RD*8] // Integer constant is in lo-word. + | lea RA, [BASE+RA*8] + | mov TAB:RB, [RA-8] // Guaranteed to be a table. + | cleartp TAB:RB + | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jnz >7 + |2: + | mov RDd, MULTRES + | sub RDd, 1 + | jz >4 // Nothing to copy? + | add RDd, TMPRd // Compute needed size. + | cmp RDd, TAB:RB->asize + | ja >5 // Doesn't fit into array part? + | sub RDd, TMPRd + | shl TMPRd, 3 + | add TMPR, TAB:RB->array + |3: // Copy result slots to table. + | mov RB, [RA] + | add RA, 8 + | mov [TMPR], RB + | add TMPR, 8 + | sub RDd, 1 + | jnz <3 + |4: + | ins_next + | + |5: // Need to resize array part. + | mov L:CARG1, SAVE_L + | mov L:CARG1->base, BASE // Caveat: CARG2/CARG3 may be BASE. + | mov CARG2, TAB:RB + | mov CARG3d, RDd + | mov L:RB, L:CARG1 + | mov SAVE_PC, PC + | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) + | mov BASE, L:RB->base + | movzx RAd, PC_RA // Restore RA. + | movzx RDd, PC_RD // Restore RD. + | jmp <1 // Retry. + | + |7: // Possible table write barrier for any value. Skip valiswhite check. + | barrierback TAB:RB, RD + | jmp <2 + break; + + /* -- Calls and vararg handling ----------------------------------------- */ + + case BC_CALL: case BC_CALLM: + | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs + if (op == BC_CALLM) { + | add NARGS:RDd, MULTRES + } + | mov LFUNC:RB, [BASE+RA*8] + | checkfunc LFUNC:RB, ->vmeta_call_ra + | lea BASE, [BASE+RA*8+16] + | ins_call + break; + + case BC_CALLMT: + | ins_AD // RA = base, RD = extra_nargs + | add NARGS:RDd, MULTRES + | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op. + break; + case BC_CALLT: + | ins_AD // RA = base, RD = nargs+1 + | lea RA, [BASE+RA*8+16] + | mov KBASE, BASE // Use KBASE for move + vmeta_call hint. + | mov LFUNC:RB, [RA-16] + | checktp_nc LFUNC:RB, LJ_TFUNC, ->vmeta_call + |->BC_CALLT_Z: + | mov PC, [BASE-8] + | test PCd, FRAME_TYPE + | jnz >7 + |1: + | mov [BASE-16], LFUNC:RB // Copy func+tag down, reloaded below. + | mov MULTRES, NARGS:RDd + | sub NARGS:RDd, 1 + | jz >3 + |2: // Move args down. + | mov RB, [RA] + | add RA, 8 + | mov [KBASE], RB + | add KBASE, 8 + | sub NARGS:RDd, 1 + | jnz <2 + | + | mov LFUNC:RB, [BASE-16] + |3: + | cleartp LFUNC:RB + | mov NARGS:RDd, MULTRES + | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function? + | ja >5 + |4: + | ins_callt + | + |5: // Tailcall to a fast function. + | test PCd, FRAME_TYPE // Lua frame below? + | jnz <4 + | movzx RAd, PC_RA + | neg RA + | mov LFUNC:KBASE, [BASE+RA*8-32] // Need to prepare KBASE. + | cleartp LFUNC:KBASE + | mov KBASE, LFUNC:KBASE->pc + | mov KBASE, [KBASE+PC2PROTO(k)] + | jmp <4 + | + |7: // Tailcall from a vararg function. + | sub PC, FRAME_VARG + | test PCd, FRAME_TYPEP + | jnz >8 // Vararg frame below? + | sub BASE, PC // Need to relocate BASE/KBASE down. + | mov KBASE, BASE + | mov PC, [BASE-8] + | jmp <1 + |8: + | add PCd, FRAME_VARG + | jmp <1 + break; + + case BC_ITERC: + | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1) + | lea RA, [BASE+RA*8+16] // fb = base+2 + | mov RB, [RA-32] // Copy state. fb[0] = fb[-4]. + | mov RC, [RA-24] // Copy control var. fb[1] = fb[-3]. + | mov [RA], RB + | mov [RA+8], RC + | mov LFUNC:RB, [RA-40] // Copy callable. fb[-2] = fb[-5] + | mov [RA-16], LFUNC:RB + | mov NARGS:RDd, 2+1 // Handle like a regular 2-arg call. + | checkfunc LFUNC:RB, ->vmeta_call + | mov BASE, RA + | ins_call + break; + + case BC_ITERN: + |.if JIT + | hotloop RBd + |.endif + |->vm_IITERN: + | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) + | mov TAB:RB, [BASE+RA*8-16] + | cleartp TAB:RB + | mov RCd, [BASE+RA*8-8] // Get index from control var. + | mov TMPRd, TAB:RB->asize + | add PC, 4 + | mov ITYPE, TAB:RB->array + |1: // Traverse array part. + | cmp RCd, TMPRd; jae >5 // Index points after array part? + | cmp aword [ITYPE+RC*8], LJ_TNIL; je >4 + |.if not DUALNUM + | cvtsi2sd xmm0, RCd + |.endif + | // Copy array slot to returned value. + | mov RB, [ITYPE+RC*8] + | mov [BASE+RA*8+8], RB + | // Return array index as a numeric key. + |.if DUALNUM + | setint ITYPE, RC + | mov [BASE+RA*8], ITYPE + |.else + | movsd qword [BASE+RA*8], xmm0 + |.endif + | add RCd, 1 + | mov [BASE+RA*8-8], RCd // Update control var. + |2: + | movzx RDd, PC_RD // Get target from ITERL. + | branchPC RD + |3: + | ins_next + | + |4: // Skip holes in array part. + | add RCd, 1 + | jmp <1 + | + |5: // Traverse hash part. + | sub RCd, TMPRd + |6: + | cmp RCd, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1. + | imul ITYPEd, RCd, #NODE + | add NODE:ITYPE, TAB:RB->node + | cmp aword NODE:ITYPE->val, LJ_TNIL; je >7 + | lea TMPRd, [RCd+TMPRd+1] + | // Copy key and value from hash slot. + | mov RB, NODE:ITYPE->key + | mov RC, NODE:ITYPE->val + | mov [BASE+RA*8], RB + | mov [BASE+RA*8+8], RC + | mov [BASE+RA*8-8], TMPRd + | jmp <2 + | + |7: // Skip holes in hash part. + | add RCd, 1 + | jmp <6 + break; + + case BC_ISNEXT: + | ins_AD // RA = base, RD = target (points to ITERN) + | mov CFUNC:RB, [BASE+RA*8-24] + | checkfunc CFUNC:RB, >5 + | checktptp [BASE+RA*8-16], LJ_TTAB, >5 + | cmp aword [BASE+RA*8-8], LJ_TNIL; jne >5 + | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 + | branchPC RD + | mov64 TMPR, ((uint64_t)LJ_KEYINDEX << 32) + | mov [BASE+RA*8-8], TMPR // Initialize control var. + |1: + | ins_next + |5: // Despecialize bytecode if any of the checks fail. + | mov PC_OP, BC_JMP + | branchPC RD + |.if JIT + | cmp byte [PC], BC_ITERN + | jne >6 + |.endif + | mov byte [PC], BC_ITERC + | jmp <1 + |.if JIT + |6: // Unpatch JLOOP. + | mov RA, [DISPATCH+DISPATCH_J(trace)] + | movzx RCd, word [PC+2] + | mov TRACE:RA, [RA+RC*8] + | mov eax, TRACE:RA->startins + | mov al, BC_ITERC + | mov dword [PC], eax + | jmp <1 + |.endif + break; + + case BC_VARG: + | ins_ABC // RA = base, RB = nresults+1, RC = numparams + | lea TMPR, [BASE+RC*8+(16+FRAME_VARG)] + | lea RA, [BASE+RA*8] + | sub TMPR, [BASE-8] + | // Note: TMPR may now be even _above_ BASE if nargs was < numparams. + | test RB, RB + | jz >5 // Copy all varargs? + | lea RB, [RA+RB*8-8] + | cmp TMPR, BASE // No vararg slots? + | jnb >2 + |1: // Copy vararg slots to destination slots. + | mov RC, [TMPR-16] + | add TMPR, 8 + | mov [RA], RC + | add RA, 8 + | cmp RA, RB // All destination slots filled? + | jnb >3 + | cmp TMPR, BASE // No more vararg slots? + | jb <1 + |2: // Fill up remainder with nil. + | mov aword [RA], LJ_TNIL + | add RA, 8 + | cmp RA, RB + | jb <2 + |3: + | ins_next + | + |5: // Copy all varargs. + | mov MULTRES, 1 // MULTRES = 0+1 + | mov RC, BASE + | sub RC, TMPR + | jbe <3 // No vararg slots? + | mov RBd, RCd + | shr RBd, 3 + | add RBd, 1 + | mov MULTRES, RBd // MULTRES = #varargs+1 + | mov L:RB, SAVE_L + | add RC, RA + | cmp RC, L:RB->maxstack + | ja >7 // Need to grow stack? + |6: // Copy all vararg slots. + | mov RC, [TMPR-16] + | add TMPR, 8 + | mov [RA], RC + | add RA, 8 + | cmp TMPR, BASE // No more vararg slots? + | jb <6 + | jmp <3 + | + |7: // Grow stack for varargs. + | mov L:RB->base, BASE + | mov L:RB->top, RA + | mov SAVE_PC, PC + | sub TMPR, BASE // Need delta, because BASE may change. + | mov TMP1hi, TMPRd + | mov CARG2d, MULTRES + | sub CARG2d, 1 + | mov CARG1, L:RB + | call extern lj_state_growstack // (lua_State *L, int n) + | mov BASE, L:RB->base + | movsxd TMPR, TMP1hi + | mov RA, L:RB->top + | add TMPR, BASE + | jmp <6 + break; + + /* -- Returns ----------------------------------------------------------- */ + + case BC_RETM: + | ins_AD // RA = results, RD = extra_nresults + | add RDd, MULTRES // MULTRES >=1, so RD >=1. + | // Fall through. Assumes BC_RET follows and ins_AD is a no-op. + break; + + case BC_RET: case BC_RET0: case BC_RET1: + | ins_AD // RA = results, RD = nresults+1 + if (op != BC_RET0) { + | shl RAd, 3 + } + |1: + | mov PC, [BASE-8] + | mov MULTRES, RDd // Save nresults+1. + | test PCd, FRAME_TYPE // Check frame type marker. + | jnz >7 // Not returning to a fixarg Lua func? + switch (op) { + case BC_RET: + |->BC_RET_Z: + | mov KBASE, BASE // Use KBASE for result move. + | sub RDd, 1 + | jz >3 + |2: // Move results down. + | mov RB, [KBASE+RA] + | mov [KBASE-16], RB + | add KBASE, 8 + | sub RDd, 1 + | jnz <2 + |3: + | mov RDd, MULTRES // Note: MULTRES may be >255. + | movzx RBd, PC_RB // So cannot compare with RDL! + |5: + | cmp RBd, RDd // More results expected? + | ja >6 + break; + case BC_RET1: + | mov RB, [BASE+RA] + | mov [BASE-16], RB + /* fallthrough */ + case BC_RET0: + |5: + | cmp PC_RB, RDL // More results expected? + | ja >6 + default: + break; + } + | movzx RAd, PC_RA + | neg RA + | lea BASE, [BASE+RA*8-16] // base = base - (RA+2)*8 + | mov LFUNC:KBASE, [BASE-16] + | cleartp LFUNC:KBASE + | mov KBASE, LFUNC:KBASE->pc + | mov KBASE, [KBASE+PC2PROTO(k)] + | ins_next + | + |6: // Fill up results with nil. + if (op == BC_RET) { + | mov aword [KBASE-16], LJ_TNIL // Note: relies on shifted base. + | add KBASE, 8 + } else { + | mov aword [BASE+RD*8-24], LJ_TNIL + } + | add RD, 1 + | jmp <5 + | + |7: // Non-standard return case. + | lea RB, [PC-FRAME_VARG] + | test RBd, FRAME_TYPEP + | jnz ->vm_return + | // Return from vararg function: relocate BASE down and RA up. + | sub BASE, RB + if (op != BC_RET0) { + | add RA, RB + } + | jmp <1 + break; + + /* -- Loops and branches ------------------------------------------------ */ + + |.define FOR_IDX, [RA] + |.define FOR_STOP, [RA+8] + |.define FOR_STEP, [RA+16] + |.define FOR_EXT, [RA+24] + + case BC_FORL: + |.if JIT + | hotloop RBd + |.endif + | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. + break; + + case BC_JFORI: + case BC_JFORL: +#if !LJ_HASJIT + break; +#endif + case BC_FORI: + case BC_IFORL: + vk = (op == BC_IFORL || op == BC_JFORL); + | ins_AJ // RA = base, RD = target (after end of loop or start of loop) + | lea RA, [BASE+RA*8] + if (LJ_DUALNUM) { + | mov RB, FOR_IDX + | checkint RB, >9 + | mov TMPR, FOR_STOP + if (!vk) { + | checkint TMPR, ->vmeta_for + | mov ITYPE, FOR_STEP + | test ITYPEd, ITYPEd; js >5 + | sar ITYPE, 47; + | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for + } else { +#ifdef LUA_USE_ASSERT + | checkinttp FOR_STOP, ->assert_bad_for_arg_type + | checkinttp FOR_STEP, ->assert_bad_for_arg_type +#endif + | mov ITYPE, FOR_STEP + | test ITYPEd, ITYPEd; js >5 + | add RBd, ITYPEd; jo >1 + | setint RB + | mov FOR_IDX, RB + } + | cmp RBd, TMPRd + | mov FOR_EXT, RB + if (op == BC_FORI) { + | jle >7 + |1: + |6: + | branchPC RD + } else if (op == BC_JFORI) { + | branchPC RD + | movzx RDd, PC_RD + | jle =>BC_JLOOP + |1: + |6: + } else if (op == BC_IFORL) { + | jg >7 + |6: + | branchPC RD + |1: + } else { + | jle =>BC_JLOOP + |1: + |6: + } + |7: + | ins_next + | + |5: // Invert check for negative step. + if (!vk) { + | sar ITYPE, 47; + | cmp ITYPEd, LJ_TISNUM; jne ->vmeta_for + } else { + | add RBd, ITYPEd; jo <1 + | setint RB + | mov FOR_IDX, RB + } + | cmp RBd, TMPRd + | mov FOR_EXT, RB + if (op == BC_FORI) { + | jge <7 + } else if (op == BC_JFORI) { + | branchPC RD + | movzx RDd, PC_RD + | jge =>BC_JLOOP + } else if (op == BC_IFORL) { + | jl <7 + } else { + | jge =>BC_JLOOP + } + | jmp <6 + |9: // Fallback to FP variant. + if (!vk) { + | jae ->vmeta_for + } + } else if (!vk) { + | checknumtp FOR_IDX, ->vmeta_for + } + if (!vk) { + | checknumtp FOR_STOP, ->vmeta_for + } else { +#ifdef LUA_USE_ASSERT + | checknumtp FOR_STOP, ->assert_bad_for_arg_type + | checknumtp FOR_STEP, ->assert_bad_for_arg_type +#endif + } + | mov RB, FOR_STEP + if (!vk) { + | checknum RB, ->vmeta_for + } + | movsd xmm0, qword FOR_IDX + | movsd xmm1, qword FOR_STOP + if (vk) { + | addsd xmm0, qword FOR_STEP + | movsd qword FOR_IDX, xmm0 + | test RB, RB; js >3 + } else { + | jl >3 + } + | ucomisd xmm1, xmm0 + |1: + | movsd qword FOR_EXT, xmm0 + if (op == BC_FORI) { + |.if DUALNUM + | jnb <7 + |.else + | jnb >2 + | branchPC RD + |.endif + } else if (op == BC_JFORI) { + | branchPC RD + | movzx RDd, PC_RD + | jnb =>BC_JLOOP + } else if (op == BC_IFORL) { + |.if DUALNUM + | jb <7 + |.else + | jb >2 + | branchPC RD + |.endif + } else { + | jnb =>BC_JLOOP + } + |.if DUALNUM + | jmp <6 + |.else + |2: + | ins_next + |.endif + | + |3: // Invert comparison if step is negative. + | ucomisd xmm0, xmm1 + | jmp <1 + break; + + case BC_ITERL: + |.if JIT + | hotloop RBd + |.endif + | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. + break; + + case BC_JITERL: +#if !LJ_HASJIT + break; +#endif + case BC_IITERL: + | ins_AJ // RA = base, RD = target + | lea RA, [BASE+RA*8] + | mov RB, [RA] + | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil. + if (op == BC_JITERL) { + | mov [RA-8], RB + | jmp =>BC_JLOOP + } else { + | branchPC RD // Otherwise save control var + branch. + | mov [RA-8], RB + } + |1: + | ins_next + break; + + case BC_LOOP: + | ins_A // RA = base, RD = target (loop extent) + | // Note: RA/RD is only used by trace recorder to determine scope/extent + | // This opcode does NOT jump, it's only purpose is to detect a hot loop. + |.if JIT + | hotloop RBd + |.endif + | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. + break; + + case BC_ILOOP: + | ins_A // RA = base, RD = target (loop extent) + | ins_next + break; + + case BC_JLOOP: + |.if JIT + | ins_AD // RA = base (ignored), RD = traceno + | mov RA, [DISPATCH+DISPATCH_J(trace)] + | mov TRACE:RD, [RA+RD*8] + | mov RD, TRACE:RD->mcode + | mov L:RB, SAVE_L + | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE + | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB + | // Save additional callee-save registers only used in compiled code. + |.if X64WIN + | mov CSAVE_4, r12 + | mov CSAVE_3, r13 + | mov CSAVE_2, r14 + | mov CSAVE_1, r15 + | mov RA, rsp + | sub rsp, 10*16+4*8 + | movdqa [RA-1*16], xmm6 + | movdqa [RA-2*16], xmm7 + | movdqa [RA-3*16], xmm8 + | movdqa [RA-4*16], xmm9 + | movdqa [RA-5*16], xmm10 + | movdqa [RA-6*16], xmm11 + | movdqa [RA-7*16], xmm12 + | movdqa [RA-8*16], xmm13 + | movdqa [RA-9*16], xmm14 + | movdqa [RA-10*16], xmm15 + |.else + | sub rsp, 16 + | mov [rsp+16], r12 + | mov [rsp+8], r13 + |.endif + | jmp RD + |.endif + break; + + case BC_JMP: + | ins_AJ // RA = unused, RD = target + | branchPC RD + | ins_next + break; + + /* -- Function headers -------------------------------------------------- */ + + /* + ** Reminder: A function may be called with func/args above L->maxstack, + ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, + ** too. This means all FUNC* ops (including fast functions) must check + ** for stack overflow _before_ adding more slots! + */ + + case BC_FUNCF: + |.if JIT + | hotcall RBd + |.endif + case BC_FUNCV: /* NYI: compiled vararg functions. */ + | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. + break; + + case BC_JFUNCF: +#if !LJ_HASJIT + break; +#endif + case BC_IFUNCF: + | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 + | mov KBASE, [PC-4+PC2PROTO(k)] + | mov L:RB, SAVE_L + | lea RA, [BASE+RA*8] // Top of frame. + | cmp RA, L:RB->maxstack + | ja ->vm_growstack_f + | movzx RAd, byte [PC-4+PC2PROTO(numparams)] + | cmp NARGS:RDd, RAd // Check for missing parameters. + | jbe >3 + |2: + if (op == BC_JFUNCF) { + | movzx RDd, PC_RD + | jmp =>BC_JLOOP + } else { + | ins_next + } + | + |3: // Clear missing parameters. + | mov aword [BASE+NARGS:RD*8-8], LJ_TNIL + | add NARGS:RDd, 1 + | cmp NARGS:RDd, RAd + | jbe <3 + | jmp <2 + break; + + case BC_JFUNCV: +#if !LJ_HASJIT + break; +#endif + | int3 // NYI: compiled vararg functions + break; /* NYI: compiled vararg functions. */ + + case BC_IFUNCV: + | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 + | lea RBd, [NARGS:RD*8+FRAME_VARG+8] + | lea RD, [BASE+NARGS:RD*8+8] + | mov LFUNC:KBASE, [BASE-16] + | mov [RD-8], RB // Store delta + FRAME_VARG. + | mov [RD-16], LFUNC:KBASE // Store copy of LFUNC. + | mov L:RB, SAVE_L + | lea RA, [RD+RA*8] + | cmp RA, L:RB->maxstack + | ja ->vm_growstack_v // Need to grow stack. + | mov RA, BASE + | mov BASE, RD + | movzx RBd, byte [PC-4+PC2PROTO(numparams)] + | test RBd, RBd + | jz >2 + | add RA, 8 + |1: // Copy fixarg slots up to new frame. + | add RA, 8 + | cmp RA, BASE + | jnb >3 // Less args than parameters? + | mov KBASE, [RA-16] + | mov [RD], KBASE + | add RD, 8 + | mov aword [RA-16], LJ_TNIL // Clear old fixarg slot (help the GC). + | sub RBd, 1 + | jnz <1 + |2: + if (op == BC_JFUNCV) { + | movzx RDd, PC_RD + | jmp =>BC_JLOOP + } else { + | mov KBASE, [PC-4+PC2PROTO(k)] + | ins_next + } + | + |3: // Clear missing parameters. + | mov aword [RD], LJ_TNIL + | add RD, 8 + | sub RBd, 1 + | jnz <3 + | jmp <2 + break; + + case BC_FUNCC: + case BC_FUNCCW: + | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1 + | mov CFUNC:RB, [BASE-16] + | cleartp CFUNC:RB + | mov KBASE, CFUNC:RB->f + | mov L:RB, SAVE_L + | lea RD, [BASE+NARGS:RD*8-8] + | mov L:RB->base, BASE + | lea RA, [RD+8*LUA_MINSTACK] + | cmp RA, L:RB->maxstack + | mov L:RB->top, RD + if (op == BC_FUNCC) { + | mov CARG1, L:RB // Caveat: CARG1 may be RA. + } else { + | mov CARG2, KBASE + | mov CARG1, L:RB // Caveat: CARG1 may be RA. + } + | ja ->vm_growstack_c // Need to grow stack. + | set_vmstate C + if (op == BC_FUNCC) { + | call KBASE // (lua_State *L) + } else { + | // (lua_State *L, lua_CFunction f) + | call aword [DISPATCH+DISPATCH_GL(wrapf)] + } + | // nresults returned in eax (RD). + | mov BASE, L:RB->base + | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB + | set_vmstate INTERP + | lea RA, [BASE+RD*8] + | neg RA + | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 + | mov PC, [BASE-8] // Fetch PC of caller. + | jmp ->vm_returnc + break; + + /* ---------------------------------------------------------------------- */ + + default: + fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); + exit(2); + break; + } +} + +static int build_backend(BuildCtx *ctx) +{ + int op; + dasm_growpc(Dst, BC__MAX); + build_subroutines(ctx); + |.code_op + for (op = 0; op < BC__MAX; op++) + build_ins(ctx, (BCOp)op, op); + return BC__MAX; +} + +/* Emit pseudo frame-info for all assembler functions. */ +static void emit_asm_debug(BuildCtx *ctx) +{ + int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); + switch (ctx->mode) { + case BUILD_elfasm: + fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); + fprintf(ctx->fp, + ".Lframe0:\n" + "\t.long .LECIE0-.LSCIE0\n" + ".LSCIE0:\n" + "\t.long 0xffffffff\n" + "\t.byte 0x1\n" + "\t.string \"\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 0x10\n" + "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" + "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" + "\t.align 8\n" + ".LECIE0:\n\n"); + fprintf(ctx->fp, + ".LSFDE0:\n" + "\t.long .LEFDE0-.LASFDE0\n" + ".LASFDE0:\n" + "\t.long .Lframe0\n" + "\t.quad .Lbegin\n" + "\t.quad %d\n" + "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ + "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ + "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ + "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ +#if LJ_NO_UNWIND + "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */ + "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */ +#endif + "\t.align 8\n" + ".LEFDE0:\n\n", fcofs, CFRAME_SIZE); +#if LJ_HASFFI + fprintf(ctx->fp, + ".LSFDE1:\n" + "\t.long .LEFDE1-.LASFDE1\n" + ".LASFDE1:\n" + "\t.long .Lframe0\n" + "\t.quad lj_vm_ffi_call\n" + "\t.quad %d\n" + "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ + "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ + "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ + "\t.align 8\n" + ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); +#endif +#if !LJ_NO_UNWIND +#if LJ_TARGET_SOLARIS + fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); +#else + fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); +#endif + fprintf(ctx->fp, + ".Lframe1:\n" + "\t.long .LECIE1-.LSCIE1\n" + ".LSCIE1:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.string \"zPR\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 0x10\n" + "\t.uleb128 6\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.long lj_err_unwind_dwarf-.\n" + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" + "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" + "\t.align 8\n" + ".LECIE1:\n\n"); + fprintf(ctx->fp, + ".LSFDE2:\n" + "\t.long .LEFDE2-.LASFDE2\n" + ".LASFDE2:\n" + "\t.long .LASFDE2-.Lframe1\n" + "\t.long .Lbegin-.\n" + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ + "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ + "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ + "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ + "\t.align 8\n" + ".LEFDE2:\n\n", fcofs, CFRAME_SIZE); +#if LJ_HASFFI + fprintf(ctx->fp, + ".Lframe2:\n" + "\t.long .LECIE2-.LSCIE2\n" + ".LSCIE2:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.string \"zR\"\n" + "\t.uleb128 0x1\n" + "\t.sleb128 -8\n" + "\t.byte 0x10\n" + "\t.uleb128 1\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.uleb128 0x7\n\t.uleb128 8\n" + "\t.byte 0x80+0x10\n\t.uleb128 0x1\n" + "\t.align 8\n" + ".LECIE2:\n\n"); + fprintf(ctx->fp, + ".LSFDE3:\n" + "\t.long .LEFDE3-.LASFDE3\n" + ".LASFDE3:\n" + "\t.long .LASFDE3-.Lframe2\n" + "\t.long lj_vm_ffi_call-.\n" + "\t.long %d\n" + "\t.uleb128 0\n" /* augmentation length */ + "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ + "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ + "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ + "\t.align 8\n" + ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); +#endif +#endif + break; +#if !LJ_NO_UNWIND + /* Mental note: never let Apple design an assembler. + ** Or a linker. Or a plastic case. But I digress. + */ + case BUILD_machasm: { +#if LJ_HASFFI + int fcsize = 0; +#endif + int i; + fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); + fprintf(ctx->fp, + "EH_frame1:\n" + "\t.set L$set$x,LECIEX-LSCIEX\n" + "\t.long L$set$x\n" + "LSCIEX:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.ascii \"zPR\\0\"\n" + "\t.byte 0x1\n" + "\t.byte 128-8\n" + "\t.byte 0x10\n" + "\t.byte 6\n" /* augmentation length */ + "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ + "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n" + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n" + "\t.byte 0x80+0x10\n\t.byte 0x1\n" + "\t.align 3\n" + "LECIEX:\n\n"); + for (i = 0; i < ctx->nsym; i++) { + const char *name = ctx->sym[i].name; + int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs; + if (size == 0) continue; +#if LJ_HASFFI + if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } +#endif + fprintf(ctx->fp, + "%s.eh:\n" + "LSFDE%d:\n" + "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" + "\t.long L$set$%d\n" + "LASFDE%d:\n" + "\t.long LASFDE%d-EH_frame1\n" + "\t.long %s-.\n" + "\t.long %d\n" + "\t.byte 0\n" /* augmentation length */ + "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ + "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ + "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */ + "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */ + "\t.align 3\n" + "LEFDE%d:\n\n", + name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i); + } +#if LJ_HASFFI + if (fcsize) { + fprintf(ctx->fp, + "EH_frame2:\n" + "\t.set L$set$y,LECIEY-LSCIEY\n" + "\t.long L$set$y\n" + "LSCIEY:\n" + "\t.long 0\n" + "\t.byte 0x1\n" + "\t.ascii \"zR\\0\"\n" + "\t.byte 0x1\n" + "\t.byte 128-8\n" + "\t.byte 0x10\n" + "\t.byte 1\n" /* augmentation length */ + "\t.byte 0x1b\n" /* pcrel|sdata4 */ + "\t.byte 0xc\n\t.byte 0x7\n\t.byte 8\n" + "\t.byte 0x80+0x10\n\t.byte 0x1\n" + "\t.align 3\n" + "LECIEY:\n\n"); + fprintf(ctx->fp, + "_lj_vm_ffi_call.eh:\n" + "LSFDEY:\n" + "\t.set L$set$yy,LEFDEY-LASFDEY\n" + "\t.long L$set$yy\n" + "LASFDEY:\n" + "\t.long LASFDEY-EH_frame2\n" + "\t.long _lj_vm_ffi_call-.\n" + "\t.long %d\n" + "\t.byte 0\n" /* augmentation length */ + "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */ + "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ + "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */ + "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ + "\t.align 3\n" + "LEFDEY:\n\n", fcsize); + } +#endif + fprintf(ctx->fp, ".subsections_via_symbols\n"); + } + break; +#endif + default: /* Difficult for other modes. */ + break; + } +} + diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 7db5e710..18ca87b5 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -18,7 +18,6 @@ | |.if P64 |.define X64, 1 -|.define SSE, 1 |.if WIN |.define X64WIN, 1 |.endif @@ -116,24 +115,74 @@ |.type NODE, Node |.type NARGS, int |.type TRACE, GCtrace +|.type SBUF, SBuf | |// Stack layout while in interpreter. Must match with lj_frame.h. |//----------------------------------------------------------------------- |.if not X64 // x86 stack layout. | -|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). +|.if WIN +| +|.define CFRAME_SPACE, aword*9 // Delta for esp (see <--). |.macro saveregs_ | push edi; push esi; push ebx +| push extern lj_err_unwind_win +| fs; push dword [0] +| fs; mov [0], esp | sub esp, CFRAME_SPACE |.endmacro -|.macro saveregs -| push ebp; saveregs_ +|.macro restoreregs +| add esp, CFRAME_SPACE +| fs; pop dword [0] +| pop edi // Short for esp += 4. +| pop ebx; pop esi; pop edi; pop ebp +|.endmacro +| +|.else +| +|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). +|.macro saveregs_ +| push edi; push esi; push ebx +| sub esp, CFRAME_SPACE |.endmacro |.macro restoreregs | add esp, CFRAME_SPACE | pop ebx; pop esi; pop edi; pop ebp |.endmacro | +|.endif +| +|.macro saveregs +| push ebp; saveregs_ +|.endmacro +| +|.if WIN +|.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only. +|.define SAVE_NRES, aword [esp+aword*18] +|.define SAVE_CFRAME, aword [esp+aword*17] +|.define SAVE_L, aword [esp+aword*16] +|//----- 16 byte aligned, ^^^ arguments from C caller +|.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter. +|.define SAVE_R4, aword [esp+aword*14] +|.define SAVE_R3, aword [esp+aword*13] +|.define SAVE_R2, aword [esp+aword*12] +|//----- 16 byte aligned +|.define SAVE_R1, aword [esp+aword*11] +|.define SEH_FUNC, aword [esp+aword*10] +|.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves. +|.define UNUSED2, aword [esp+aword*8] +|//----- 16 byte aligned +|.define UNUSED1, aword [esp+aword*7] +|.define SAVE_PC, aword [esp+aword*6] +|.define TMP2, aword [esp+aword*5] +|.define TMP1, aword [esp+aword*4] +|//----- 16 byte aligned +|.define ARG4, aword [esp+aword*3] +|.define ARG3, aword [esp+aword*2] +|.define ARG2, aword [esp+aword*1] +|.define ARG1, aword [esp] //<-- esp while in interpreter. +|//----- 16 byte aligned, ^^^ arguments for C callee +|.else |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. |.define SAVE_NRES, aword [esp+aword*14] |.define SAVE_CFRAME, aword [esp+aword*13] @@ -154,6 +203,7 @@ |.define ARG2, aword [esp+aword*1] |.define ARG1, aword [esp] //<-- esp while in interpreter. |//----- 16 byte aligned, ^^^ arguments for C callee +|.endif | |// FPARGx overlaps ARGx and ARG(x+1) on x86. |.define FPARG3, qword [esp+qword*1] @@ -389,7 +439,6 @@ | fpop |.endmacro | -|.macro fdup; fld st0; .endmacro |.macro fpop1; fstp st1; .endmacro | |// Synthesize SSE FP constants. @@ -552,6 +601,10 @@ static void build_subroutines(BuildCtx *ctx) |.else | mov eax, FCARG2 // Error return status for vm_pcall. | mov esp, FCARG1 + |.if WIN + | lea FCARG1, SEH_NEXT + | fs; mov [0], FCARG1 + |.endif |.endif |->vm_unwind_c_eh: // Landing pad for external unwinder. | mov L:RB, SAVE_L @@ -575,6 +628,10 @@ static void build_subroutines(BuildCtx *ctx) |.else | and FCARG1, CFRAME_RAWMASK | mov esp, FCARG1 + |.if WIN + | lea FCARG1, SEH_NEXT + | fs; mov [0], FCARG1 + |.endif |.endif |->vm_unwind_ff_eh: // Landing pad for external unwinder. | mov L:RB, SAVE_L @@ -588,6 +645,19 @@ static void build_subroutines(BuildCtx *ctx) | set_vmstate INTERP | jmp ->vm_returnc // Increments RD/MULTRES and returns. | + |.if WIN and not X64 + |->vm_rtlunwind@16: // Thin layer around RtlUnwind. + | // (void *cframe, void *excptrec, void *unwinder, int errcode) + | mov [esp], FCARG1 // Return value for RtlUnwind. + | push FCARG2 // Exception record for RtlUnwind. + | push 0 // Ignored by RtlUnwind. + | push dword [FCARG1+CFRAME_OFS_SEH] + | call extern RtlUnwind@16 // Violates ABI (clobbers too much). + | mov FCARG1, eax + | mov FCARG2, [esp+4] // errcode (for vm_unwind_c). + | ret // Jump to unwinder. + |.endif + | |//----------------------------------------------------------------------- |//-- Grow stack for calls ----------------------------------------------- |//----------------------------------------------------------------------- @@ -643,17 +713,18 @@ static void build_subroutines(BuildCtx *ctx) | lea KBASEa, [esp+CFRAME_RESUME] | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | add DISPATCH, GG_G2DISP - | mov L:RB->cframe, KBASEa | mov SAVE_PC, RD // Any value outside of bytecode is ok. | mov SAVE_CFRAME, RDa |.if X64 | mov SAVE_NRES, RD | mov SAVE_ERRF, RD |.endif + | mov L:RB->cframe, KBASEa | cmp byte L:RB->status, RDL - | je >3 // Initial resume (like a call). + | je >2 // Initial resume (like a call). | | // Resume after yield (like a return). + | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | set_vmstate INTERP | mov byte L:RB->status, RDL | mov BASE, L:RB->base @@ -693,20 +764,19 @@ static void build_subroutines(BuildCtx *ctx) | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! |.endif | + | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. | mov SAVE_CFRAME, KBASEa | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. + | add DISPATCH, GG_G2DISP |.if X64 | mov L:RB->cframe, rsp |.else | mov L:RB->cframe, esp |.endif | - |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype). - | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. - | add DISPATCH, GG_G2DISP - | - |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype). + |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). + | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | set_vmstate INTERP | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). | add PC, RA @@ -744,14 +814,17 @@ static void build_subroutines(BuildCtx *ctx) | | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). | sub KBASE, L:RB->top + | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | mov SAVE_ERRF, 0 // No error function. | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. + | add DISPATCH, GG_G2DISP | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). | |.if X64 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. | mov SAVE_CFRAME, KBASEa | mov L:RB->cframe, rsp + | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) |.else @@ -762,6 +835,7 @@ static void build_subroutines(BuildCtx *ctx) | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. | mov SAVE_CFRAME, KBASE | mov L:RB->cframe, esp + | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | | call BASE // (lua_State *L, lua_CFunction func, void *ud) |.endif @@ -869,13 +943,9 @@ static void build_subroutines(BuildCtx *ctx) |.if DUALNUM | mov TMP2, LJ_TISNUM | mov TMP1, RC - |.elif SSE + |.else | cvtsi2sd xmm0, RC | movsd TMPQ, xmm0 - |.else - | mov ARG4, RC - | fild ARG4 - | fstp TMPQ |.endif | lea RCa, TMPQ // Store temp. TValue in TMPQ. | jmp >1 @@ -929,6 +999,19 @@ static void build_subroutines(BuildCtx *ctx) | mov NARGS:RD, 2+1 // 2 args for func(t, k). | jmp ->vm_call_dispatch_f | + |->vmeta_tgetr: + | mov FCARG1, TAB:RB + | mov RB, BASE // Save BASE. + | mov FCARG2, RC // Caveat: FCARG2 == BASE + | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) + | // cTValue * or NULL returned in eax (RC). + | movzx RA, PC_RA + | mov BASE, RB // Restore BASE. + | test RC, RC + | jnz ->BC_TGETR_Z + | mov dword [BASE+RA*8+4], LJ_TNIL + | jmp ->BC_TGETR2_Z + | |//----------------------------------------------------------------------- | |->vmeta_tsets: @@ -948,13 +1031,9 @@ static void build_subroutines(BuildCtx *ctx) |.if DUALNUM | mov TMP2, LJ_TISNUM | mov TMP1, RC - |.elif SSE + |.else | cvtsi2sd xmm0, RC | movsd TMPQ, xmm0 - |.else - | mov ARG4, RC - | fild ARG4 - | fstp TMPQ |.endif | lea RCa, TMPQ // Store temp. TValue in TMPQ. | jmp >1 @@ -1020,6 +1099,33 @@ static void build_subroutines(BuildCtx *ctx) | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). | jmp ->vm_call_dispatch_f | + |->vmeta_tsetr: + |.if X64WIN + | mov L:CARG1d, SAVE_L + | mov CARG3d, RC + | mov L:CARG1d->base, BASE + | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE. + |.elif X64 + | mov L:CARG1d, SAVE_L + | mov CARG2d, TAB:RB + | mov L:CARG1d->base, BASE + | mov RB, BASE // Save BASE. + | mov CARG3d, RC // Caveat: CARG3d == BASE. + |.else + | mov L:RA, SAVE_L + | mov ARG2, TAB:RB + | mov RB, BASE // Save BASE. + | mov ARG3, RC + | mov ARG1, L:RA + | mov L:RA->base, BASE + |.endif + | mov SAVE_PC, PC + | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) + | // TValue * returned in eax (RC). + | movzx RA, PC_RA + | mov BASE, RB // Restore BASE. + | jmp ->BC_TSETR_Z + | |//-- Comparison metamethods --------------------------------------------- | |->vmeta_comp: @@ -1114,6 +1220,26 @@ static void build_subroutines(BuildCtx *ctx) | jmp <3 |.endif | + |->vmeta_istype: + |.if X64 + | mov L:RB, SAVE_L + | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. + | mov CARG2d, RA + | movzx CARG3d, PC_RD + | mov L:CARG1d, L:RB + |.else + | movzx RD, PC_RD + | mov ARG2, RA + | mov L:RB, SAVE_L + | mov ARG3, RD + | mov ARG1, L:RB + | mov L:RB->base, BASE + |.endif + | mov SAVE_PC, PC + | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) + | mov BASE, L:RB->base + | jmp <6 + | |//-- Arithmetic metamethods --------------------------------------------- | |->vmeta_arith_vno: @@ -1290,19 +1416,6 @@ static void build_subroutines(BuildCtx *ctx) | cmp NARGS:RD, 2+1; jb ->fff_fallback |.endmacro | - |.macro .ffunc_n, name - | .ffunc_1 name - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - | fld qword [BASE] - |.endmacro - | - |.macro .ffunc_n, name, op - | .ffunc_1 name - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - | op - | fld qword [BASE] - |.endmacro - | |.macro .ffunc_nsse, name, op | .ffunc_1 name | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback @@ -1313,14 +1426,6 @@ static void build_subroutines(BuildCtx *ctx) | .ffunc_nsse name, movsd |.endmacro | - |.macro .ffunc_nn, name - | .ffunc_2 name - | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback - | fld qword [BASE] - | fld qword [BASE+8] - |.endmacro - | |.macro .ffunc_nnsse, name | .ffunc_2 name | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback @@ -1418,7 +1523,7 @@ static void build_subroutines(BuildCtx *ctx) | mov dword [BASE-4], LJ_TTAB // Store metatable as default result. | mov [BASE-8], TAB:RB | mov RA, TAB:RB->hmask - | and RA, STR:RC->hash + | and RA, STR:RC->sid | imul RA, #NODE | add NODE:RA, TAB:RB->node |3: // Rearranged logic, because we expect _not_ to find the key. @@ -1526,11 +1631,7 @@ static void build_subroutines(BuildCtx *ctx) |.else | jae ->fff_fallback |.endif - |.if SSE | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 - |.else - | fld qword [BASE]; jmp ->fff_resn - |.endif | |.ffunc_1 tostring | // Only handles the string or number case inline. @@ -1555,9 +1656,9 @@ static void build_subroutines(BuildCtx *ctx) |.endif | mov L:FCARG1, L:RB |.if DUALNUM - | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o) + | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o) |.else - | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) + | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np) |.endif | // GCstr returned in eax (RD). | mov BASE, L:RB->base @@ -1569,55 +1670,35 @@ static void build_subroutines(BuildCtx *ctx) | je >2 // Missing 2nd arg? |1: | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback - | mov L:RB, SAVE_L - | mov L:RB->base, BASE // Add frame since C call can throw. - | mov L:RB->top, BASE // Dummy frame length is ok. | mov PC, [BASE-4] + | mov RB, BASE // Save BASE. |.if X64WIN - | lea CARG3d, [BASE+8] - | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. - | mov CARG1d, L:RB + | mov CARG1d, [BASE] + | lea CARG3d, [BASE-8] + | lea CARG2d, [BASE+8] // Caveat: CARG2d == BASE. |.elif X64 - | mov CARG2d, [BASE] - | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. - | mov CARG1d, L:RB + | mov CARG1d, [BASE] + | lea CARG2d, [BASE+8] + | lea CARG3d, [BASE-8] // Caveat: CARG3d == BASE. |.else | mov TAB:RD, [BASE] - | mov ARG2, TAB:RD - | mov ARG1, L:RB + | mov ARG1, TAB:RD | add BASE, 8 + | mov ARG2, BASE + | sub BASE, 8+8 | mov ARG3, BASE |.endif - | mov SAVE_PC, PC // Needed for ITERN fallback. - | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) - | // Flag returned in eax (RD). - | mov BASE, L:RB->base - | test RD, RD; jz >3 // End of traversal? - | // Copy key and value to results. - |.if X64 - | mov RBa, [BASE+8] - | mov RDa, [BASE+16] - | mov [BASE-8], RBa - | mov [BASE], RDa - |.else - | mov RB, [BASE+8] - | mov RD, [BASE+12] - | mov [BASE-8], RB - | mov [BASE-4], RD - | mov RB, [BASE+16] - | mov RD, [BASE+20] - | mov [BASE], RB - | mov [BASE+4], RD - |.endif - |->fff_res2: - | mov RD, 1+2 - | jmp ->fff_res + | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // 1=found, 0=end, -1=error returned in eax (RD). + | mov BASE, RB // Restore BASE. + | test RD, RD; jg ->fff_res2 // Found key/value. + | js ->fff_fallback_2 // Invalid key. + | // End of traversal: return nil. + | mov dword [BASE-4], LJ_TNIL + | jmp ->fff_res1 |2: // Set missing 2nd arg to nil. | mov dword [BASE+12], LJ_TNIL | jmp <1 - |3: // End of traversal: return nil. - | mov dword [BASE-4], LJ_TNIL - | jmp ->fff_res1 | |.ffunc_1 pairs | mov TAB:RB, [BASE] @@ -1648,19 +1729,12 @@ static void build_subroutines(BuildCtx *ctx) | add RD, 1 | mov dword [BASE-4], LJ_TISNUM | mov dword [BASE-8], RD - |.elif SSE + |.else | movsd xmm0, qword [BASE+8] | sseconst_1 xmm1, RBa | addsd xmm0, xmm1 - | cvtsd2si RD, xmm0 + | cvttsd2si RD, xmm0 | movsd qword [BASE-8], xmm0 - |.else - | fld qword [BASE+8] - | fld1 - | faddp st1 - | fist ARG1 - | fstp qword [BASE-8] - | mov RD, ARG1 |.endif | mov TAB:RB, [BASE] | cmp RD, TAB:RB->asize; jae >2 // Not in array part? @@ -1678,7 +1752,9 @@ static void build_subroutines(BuildCtx *ctx) | mov [BASE], RB | mov [BASE+4], RD |.endif - | jmp ->fff_res2 + |->fff_res2: + | mov RD, 1+2 + | jmp ->fff_res |2: // Check for empty hash part first. Otherwise call C function. | cmp dword TAB:RB->hmask, 0; je ->fff_res0 | mov FCARG1, TAB:RB @@ -1707,12 +1783,9 @@ static void build_subroutines(BuildCtx *ctx) |.if DUALNUM | mov dword [BASE+12], LJ_TISNUM | mov dword [BASE+8], 0 - |.elif SSE + |.else | xorps xmm0, xmm0 | movsd qword [BASE+8], xmm0 - |.else - | fldz - | fstp qword [BASE+8] |.endif | mov RD, 1+3 | jmp ->fff_res @@ -1819,7 +1892,6 @@ static void build_subroutines(BuildCtx *ctx) | mov ARG3, RA |.endif | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) - | set_vmstate INTERP | | mov L:RB, SAVE_L |.if X64 @@ -1828,6 +1900,9 @@ static void build_subroutines(BuildCtx *ctx) | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. |.endif | mov BASE, L:RB->base + | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB + | set_vmstate INTERP + | | cmp eax, LUA_YIELD | ja >8 |4: @@ -1942,12 +2017,10 @@ static void build_subroutines(BuildCtx *ctx) |->fff_resi: // Dummy. |.endif | - |.if SSE |->fff_resn: | mov PC, [BASE-4] | fstp qword [BASE-8] | jmp ->fff_res1 - |.endif | | .ffunc_1 math_abs |.if DUALNUM @@ -1971,8 +2044,6 @@ static void build_subroutines(BuildCtx *ctx) |.else | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |.endif - | - |.if SSE | movsd xmm0, qword [BASE] | sseconst_abs xmm1, RDa | andps xmm0, xmm1 @@ -1980,15 +2051,6 @@ static void build_subroutines(BuildCtx *ctx) | mov PC, [BASE-4] | movsd qword [BASE-8], xmm0 | // fallthrough - |.else - | fld qword [BASE] - | fabs - | // fallthrough - |->fff_resxmm0: // Dummy. - |->fff_resn: - | mov PC, [BASE-4] - | fstp qword [BASE-8] - |.endif | |->fff_res1: | mov RD, 1+1 @@ -2015,6 +2077,12 @@ static void build_subroutines(BuildCtx *ctx) | mov RAa, -8 // Results start at BASE+RA = BASE-8. | jmp ->vm_return | + |.if X64 + |.define fff_resfp, fff_resxmm0 + |.else + |.define fff_resfp, fff_resn + |.endif + | |.macro math_round, func | .ffunc math_ .. func |.if DUALNUM @@ -2025,107 +2093,75 @@ static void build_subroutines(BuildCtx *ctx) |.else | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback |.endif - |.if SSE | movsd xmm0, qword [BASE] - | call ->vm_ .. func - | .if DUALNUM - | cvtsd2si RB, xmm0 - | cmp RB, 0x80000000 - | jne ->fff_resi - | cvtsi2sd xmm1, RB - | ucomisd xmm0, xmm1 - | jp ->fff_resxmm0 - | je ->fff_resi - | .endif - | jmp ->fff_resxmm0 - |.else - | fld qword [BASE] - | call ->vm_ .. func - | .if DUALNUM - | fist ARG1 - | mov RB, ARG1 - | cmp RB, 0x80000000; jne >2 - | fdup - | fild ARG1 - | fcomparepp - | jp ->fff_resn - | jne ->fff_resn - |2: - | fpop - | jmp ->fff_resi - | .else - | jmp ->fff_resn - | .endif + | call ->vm_ .. func .. _sse + |.if DUALNUM + | cvttsd2si RB, xmm0 + | cmp RB, 0x80000000 + | jne ->fff_resi + | cvtsi2sd xmm1, RB + | ucomisd xmm0, xmm1 + | jp ->fff_resxmm0 + | je ->fff_resi |.endif + | jmp ->fff_resxmm0 |.endmacro | | math_round floor | math_round ceil | - |.if SSE |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 - |.else - |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn - |.endif | |.ffunc math_log | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback - | fldln2; fld qword [BASE]; fyl2x; jmp ->fff_resn - | - |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn - |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn - | - |.ffunc_n math_sin; fsin; jmp ->fff_resn - |.ffunc_n math_cos; fcos; jmp ->fff_resn - |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn - | - |.ffunc_n math_asin - | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan - | jmp ->fff_resn - |.ffunc_n math_acos - | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan - | jmp ->fff_resn - |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn + | movsd xmm0, qword [BASE] + |.if not X64 + | movsd FPARG1, xmm0 + |.endif + | mov RB, BASE + | call extern log + | mov BASE, RB + | jmp ->fff_resfp | |.macro math_extern, func - |.if SSE | .ffunc_nsse math_ .. func - | .if not X64 - | movsd FPARG1, xmm0 - | .endif - |.else - | .ffunc_n math_ .. func - | fstp FPARG1 + |.if not X64 + | movsd FPARG1, xmm0 |.endif | mov RB, BASE - | call extern lj_vm_ .. func + | call extern func | mov BASE, RB - | .if X64 - | jmp ->fff_resxmm0 - | .else - | jmp ->fff_resn - | .endif + | jmp ->fff_resfp |.endmacro | + |.macro math_extern2, func + | .ffunc_nnsse math_ .. func + |.if not X64 + | movsd FPARG1, xmm0 + | movsd FPARG3, xmm1 + |.endif + | mov RB, BASE + | call extern func + | mov BASE, RB + | jmp ->fff_resfp + |.endmacro + | + | math_extern log10 + | math_extern exp + | math_extern sin + | math_extern cos + | math_extern tan + | math_extern asin + | math_extern acos + | math_extern atan | math_extern sinh | math_extern cosh | math_extern tanh + | math_extern2 pow + | math_extern2 atan2 + | math_extern2 fmod | - |->ff_math_deg: - |.if SSE - |.ffunc_nsse math_rad - | mov CFUNC:RB, [BASE-8] - | mulsd xmm0, qword CFUNC:RB->upvalue[0] - | jmp ->fff_resxmm0 - |.else - |.ffunc_n math_rad - | mov CFUNC:RB, [BASE-8] - | fmul qword CFUNC:RB->upvalue[0] - | jmp ->fff_resn - |.endif - | - |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn | |.ffunc_1 math_frexp @@ -2140,65 +2176,34 @@ static void build_subroutines(BuildCtx *ctx) | cmp RB, 0x00200000; jb >4 |1: | shr RB, 21; sub RB, RC // Extract and unbias exponent. - |.if SSE | cvtsi2sd xmm0, RB - |.else - | mov TMP1, RB; fild TMP1 - |.endif | mov RB, [BASE-4] | and RB, 0x800fffff // Mask off exponent. | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. | mov [BASE-4], RB |2: - |.if SSE | movsd qword [BASE], xmm0 - |.else - | fstp qword [BASE] - |.endif | mov RD, 1+2 | jmp ->fff_res |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. - |.if SSE | xorps xmm0, xmm0; jmp <2 - |.else - | fldz; jmp <2 - |.endif |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. - |.if SSE | movsd xmm0, qword [BASE] | sseconst_hi xmm1, RBa, 43500000 // 2^54. | mulsd xmm0, xmm1 | movsd qword [BASE-8], xmm0 - |.else - | fld qword [BASE] - | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54 - | fstp qword [BASE-8] - |.endif | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 | - |.if SSE |.ffunc_nsse math_modf - |.else - |.ffunc_n math_modf - |.endif | mov RB, [BASE+4] | mov PC, [BASE-4] | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? - |.if SSE | movaps xmm4, xmm0 - | call ->vm_trunc + | call ->vm_trunc_sse | subsd xmm4, xmm0 |1: | movsd qword [BASE-8], xmm0 | movsd qword [BASE], xmm4 - |.else - | fdup - | call ->vm_trunc - | fsub st1, st0 - |1: - | fstp qword [BASE-8] - | fstp qword [BASE] - |.endif | mov RC, [BASE-4]; mov RB, [BASE+4] | xor RC, RB; js >3 // Need to adjust sign? |2: @@ -2208,25 +2213,10 @@ static void build_subroutines(BuildCtx *ctx) | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. | jmp <2 |4: - |.if SSE | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. - |.else - | fldz; fxch; jmp <1 // Return +-Inf and +-0. - |.endif | - |.ffunc_nnr math_fmod - |1: ; fprem; fnstsw ax; and ax, 0x400; jnz <1 - | fpop1 - | jmp ->fff_resn - | - |.if SSE - |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0 - |.else - |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn - |.endif - | - |.macro math_minmax, name, cmovop, fcmovop, sseop - | .ffunc name + |.macro math_minmax, name, cmovop, sseop + | .ffunc_1 name | mov RA, 2 | cmp dword [BASE+4], LJ_TISNUM |.if DUALNUM @@ -2242,12 +2232,7 @@ static void build_subroutines(BuildCtx *ctx) |3: | ja ->fff_fallback | // Convert intermediate result to number and continue below. - |.if SSE | cvtsi2sd xmm0, RB - |.else - | mov TMP1, RB - | fild TMP1 - |.endif | jmp >6 |4: | ja ->fff_fallback @@ -2255,7 +2240,6 @@ static void build_subroutines(BuildCtx *ctx) | jae ->fff_fallback |.endif | - |.if SSE | movsd xmm0, qword [BASE] |5: // Handle numbers or integers. | cmp RA, RD; jae ->fff_resxmm0 @@ -2274,48 +2258,13 @@ static void build_subroutines(BuildCtx *ctx) | sseop xmm0, xmm1 | add RA, 1 | jmp <5 - |.else - | fld qword [BASE] - |5: // Handle numbers or integers. - | cmp RA, RD; jae ->fff_resn - | cmp dword [BASE+RA*8-4], LJ_TISNUM - |.if DUALNUM - | jb >6 - | ja >9 - | fild dword [BASE+RA*8-8] - | jmp >7 - |.else - | jae >9 - |.endif - |6: - | fld qword [BASE+RA*8-8] - |7: - | fucomi st1; fcmovop st1; fpop1 - | add RA, 1 - | jmp <5 - |.endif |.endmacro | - | math_minmax math_min, cmovg, fcmovnbe, minsd - | math_minmax math_max, cmovl, fcmovbe, maxsd - |.if not SSE - |9: - | fpop; jmp ->fff_fallback - |.endif + | math_minmax math_min, cmovg, minsd + | math_minmax math_max, cmovl, maxsd | |//-- String library ----------------------------------------------------- | - |.ffunc_1 string_len - | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback - | mov STR:RB, [BASE] - |.if DUALNUM - | mov RB, dword STR:RB->len; jmp ->fff_resi - |.elif SSE - | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0 - |.else - | fild dword STR:RB->len; jmp ->fff_resn - |.endif - | |.ffunc string_byte // Only handle the 1-arg case here. | cmp NARGS:RD, 1+1; jne ->fff_fallback | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback @@ -2326,10 +2275,8 @@ static void build_subroutines(BuildCtx *ctx) | movzx RB, byte STR:RB[1] |.if DUALNUM | jmp ->fff_resi - |.elif SSE - | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 |.else - | mov TMP1, RB; fild TMP1; jmp ->fff_resn + | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 |.endif | |.ffunc string_char // Only handle the 1-arg case here. @@ -2341,16 +2288,11 @@ static void build_subroutines(BuildCtx *ctx) | mov RB, dword [BASE] | cmp RB, 255; ja ->fff_fallback | mov TMP2, RB - |.elif SSE + |.else | jae ->fff_fallback | cvttsd2si RB, qword [BASE] | cmp RB, 255; ja ->fff_fallback | mov TMP2, RB - |.else - | jae ->fff_fallback - | fld qword [BASE] - | fistp TMP2 - | cmp TMP2, 255; ja ->fff_fallback |.endif |.if X64 | mov TMP3, 1 @@ -2371,6 +2313,7 @@ static void build_subroutines(BuildCtx *ctx) |.endif | mov SAVE_PC, PC | call extern lj_str_new // (lua_State *L, char *str, size_t l) + |->fff_resstr: | // GCstr * returned in eax (RD). | mov BASE, L:RB->base | mov PC, [BASE-4] @@ -2388,14 +2331,10 @@ static void build_subroutines(BuildCtx *ctx) | jne ->fff_fallback | mov RB, dword [BASE+16] | mov TMP2, RB - |.elif SSE + |.else | jae ->fff_fallback | cvttsd2si RB, qword [BASE+16] | mov TMP2, RB - |.else - | jae ->fff_fallback - | fld qword [BASE+16] - | fistp TMP2 |.endif |1: | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback @@ -2410,12 +2349,8 @@ static void build_subroutines(BuildCtx *ctx) | mov RB, STR:RB->len |.if DUALNUM | mov RA, dword [BASE+8] - |.elif SSE - | cvttsd2si RA, qword [BASE+8] |.else - | fld qword [BASE+8] - | fistp ARG3 - | mov RA, ARG3 + | cvttsd2si RA, qword [BASE+8] |.endif | mov RC, TMP2 | cmp RB, RC // len < end? (unsigned compare) @@ -2459,136 +2394,34 @@ static void build_subroutines(BuildCtx *ctx) | xor RC, RC // Zero length. Any ptr in RB is ok. | jmp <4 | - |.ffunc string_rep // Only handle the 1-char case inline. + |.macro ffstring_op, name + | .ffunc_1 string_ .. name | ffgccheck - | cmp NARGS:RD, 2+1; jne ->fff_fallback // Exactly 2 arguments. | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback - | cmp dword [BASE+12], LJ_TISNUM - | mov STR:RB, [BASE] - |.if DUALNUM - | jne ->fff_fallback - | mov RC, dword [BASE+8] - |.elif SSE - | jae ->fff_fallback - | cvttsd2si RC, qword [BASE+8] - |.else - | jae ->fff_fallback - | fld qword [BASE+8] - | fistp TMP2 - | mov RC, TMP2 - |.endif - | test RC, RC - | jle ->fff_emptystr // Count <= 0? (or non-int) - | cmp dword STR:RB->len, 1 - | jb ->fff_emptystr // Zero length string? - | jne ->fff_fallback_2 // Fallback for > 1-char strings. - | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2 - | movzx RA, byte STR:RB[1] - | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] - |.if X64 - | mov TMP3, RC - |.else - | mov ARG3, RC - |.endif - |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?). - | mov [RB], RAL - | add RB, 1 - | sub RC, 1 - | jnz <1 - | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] - | jmp ->fff_newstr - | - |.ffunc_1 string_reverse - | ffgccheck - | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback - | mov STR:RB, [BASE] - | mov RC, STR:RB->len - | test RC, RC - | jz ->fff_emptystr // Zero length string? - | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 - | add RB, #STR - | mov TMP2, PC // Need another temp register. - |.if X64 - | mov TMP3, RC - |.else - | mov ARG3, RC - |.endif - | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] - |1: - | movzx RA, byte [RB] - | add RB, 1 - | sub RC, 1 - | mov [PC+RC], RAL - | jnz <1 - | mov RD, PC - | mov PC, TMP2 - | jmp ->fff_newstr - | - |.macro ffstring_case, name, lo, hi - | .ffunc_1 name - | ffgccheck - | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback - | mov STR:RB, [BASE] - | mov RC, STR:RB->len - | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 - | add RB, #STR - | mov TMP2, PC // Need another temp register. - |.if X64 - | mov TMP3, RC - |.else - | mov ARG3, RC - |.endif - | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)] - | jmp >3 - |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?). - | movzx RA, byte [RB+RC] - | cmp RA, lo - | jb >2 - | cmp RA, hi - | ja >2 - | xor RA, 0x20 - |2: - | mov [PC+RC], RAL - |3: - | sub RC, 1 - | jns <1 - | mov RD, PC - | mov PC, TMP2 - | jmp ->fff_newstr + | mov L:RB, SAVE_L + | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] + | mov L:RB->base, BASE + | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE + | mov RCa, SBUF:FCARG1->b + | mov SBUF:FCARG1->L, L:RB + | mov SBUF:FCARG1->w, RCa + | mov SAVE_PC, PC + | call extern lj_buf_putstr_ .. name .. @8 + | mov FCARG1, eax + | call extern lj_buf_tostr@4 + | jmp ->fff_resstr |.endmacro | - |ffstring_case string_lower, 0x41, 0x5a - |ffstring_case string_upper, 0x61, 0x7a - | - |//-- Table library ------------------------------------------------------ - | - |.ffunc_1 table_getn - | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback - | mov RB, BASE // Save BASE. - | mov TAB:FCARG1, [BASE] - | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t) - | // Length of table returned in eax (RD). - | mov BASE, RB // Restore BASE. - |.if DUALNUM - | mov RB, RD; jmp ->fff_resi - |.elif SSE - | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0 - |.else - | mov ARG1, RD; fild ARG1; jmp ->fff_resn - |.endif + |ffstring_op reverse + |ffstring_op lower + |ffstring_op upper | |//-- Bit library -------------------------------------------------------- | - |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!). - | |.macro .ffunc_bit, name, kind, fdef | fdef name |.if kind == 2 - |.if SSE | sseconst_tobit xmm1, RBa - |.else - | mov TMP1, TOBIT_BIAS - |.endif |.endif | cmp dword [BASE+4], LJ_TISNUM |.if DUALNUM @@ -2604,24 +2437,12 @@ static void build_subroutines(BuildCtx *ctx) |.else | jae ->fff_fallback |.endif - |.if SSE | movsd xmm0, qword [BASE] |.if kind < 2 | sseconst_tobit xmm1, RBa |.endif | addsd xmm0, xmm1 | movd RB, xmm0 - |.else - | fld qword [BASE] - |.if kind < 2 - | mov TMP1, TOBIT_BIAS - |.endif - | fadd TMP1 - | fstp FPARG1 - |.if kind > 0 - | mov RB, ARG1 - |.endif - |.endif |2: |.endmacro | @@ -2630,15 +2451,7 @@ static void build_subroutines(BuildCtx *ctx) |.endmacro | |.ffunc_bit bit_tobit, 0 - |.if DUALNUM or SSE - |.if not SSE - | mov RB, ARG1 - |.endif | jmp ->fff_resbit - |.else - | fild ARG1 - | jmp ->fff_resn - |.endif | |.macro .ffunc_bit_op, name, ins | .ffunc_bit name, 2 @@ -2658,17 +2471,10 @@ static void build_subroutines(BuildCtx *ctx) |.else | jae ->fff_fallback_bit_op |.endif - |.if SSE | movsd xmm0, qword [RD] | addsd xmm0, xmm1 | movd RA, xmm0 | ins RB, RA - |.else - | fld qword [RD] - | fadd TMP1 - | fstp FPARG1 - | ins RB, ARG1 - |.endif | sub RD, 8 | jmp <1 |.endmacro @@ -2685,15 +2491,10 @@ static void build_subroutines(BuildCtx *ctx) | not RB |.if DUALNUM | jmp ->fff_resbit - |.elif SSE + |.else |->fff_resbit: | cvtsi2sd xmm0, RB | jmp ->fff_resxmm0 - |.else - |->fff_resbit: - | mov ARG1, RB - | fild ARG1 - | jmp ->fff_resn |.endif | |->fff_fallback_bit_op: @@ -2706,22 +2507,13 @@ static void build_subroutines(BuildCtx *ctx) | // Note: no inline conversion from number for 2nd argument! | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback | mov RA, dword [BASE+8] - |.elif SSE + |.else | .ffunc_nnsse name | sseconst_tobit xmm2, RBa | addsd xmm0, xmm2 | addsd xmm1, xmm2 | movd RB, xmm0 | movd RA, xmm1 - |.else - | .ffunc_nn name - | mov TMP1, TOBIT_BIAS - | fadd TMP1 - | fstp FPARG3 - | fadd TMP1 - | fstp FPARG1 - | mov RA, ARG3 - | mov RB, ARG1 |.endif | ins RB, cl // Assumes RA is ecx. | jmp ->fff_resbit @@ -2855,7 +2647,7 @@ static void build_subroutines(BuildCtx *ctx) | mov FCARG2, PC // Caveat: FCARG2 == BASE | mov FCARG1, L:RB | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. - | call extern lj_dispatch_ins@8 // (lua_State *L, BCIns *pc) + | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc) |3: | mov BASE, L:RB->base |4: @@ -2926,6 +2718,79 @@ static void build_subroutines(BuildCtx *ctx) | add NARGS:RD, 1 | jmp RBa | + |->cont_stitch: // Trace stitching. + |.if JIT + | // BASE = base, RC = result, RB = mbase + | mov TRACE:RA, [RB-24] // Save previous trace. + | mov TMP1, TRACE:RA + | mov TMP3, DISPATCH // Need one more register. + | mov DISPATCH, MULTRES + | movzx RA, PC_RA + | lea RA, [BASE+RA*8] // Call base. + | sub DISPATCH, 1 + | jz >2 + |1: // Move results down. + |.if X64 + | mov RBa, [RC] + | mov [RA], RBa + |.else + | mov RB, [RC] + | mov [RA], RB + | mov RB, [RC+4] + | mov [RA+4], RB + |.endif + | add RC, 8 + | add RA, 8 + | sub DISPATCH, 1 + | jnz <1 + |2: + | movzx RC, PC_RA + | movzx RB, PC_RB + | add RC, RB + | lea RC, [BASE+RC*8-8] + |3: + | cmp RC, RA + | ja >9 // More results wanted? + | + | mov DISPATCH, TMP3 + | mov TRACE:RD, TMP1 // Get previous trace. + | movzx RB, word TRACE:RD->traceno + | movzx RD, word TRACE:RD->link + | cmp RD, RB + | je ->cont_nop // Blacklisted. + | test RD, RD + | jne =>BC_JLOOP // Jump to stitched trace. + | + | // Stitch a new trace to the previous trace. + | mov [DISPATCH+DISPATCH_J(exitno)], RB + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov FCARG2, PC + | lea FCARG1, [DISPATCH+GG_DISP2J] + | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa + | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc) + | mov BASE, L:RB->base + | jmp ->cont_nop + | + |9: // Fill up results with nil. + | mov dword [RA+4], LJ_TNIL + | add RA, 8 + | jmp <3 + |.endif + | + |->vm_profhook: // Dispatch target for profiler hook. +#if LJ_HASPROFILE + | mov L:RB, SAVE_L + | mov L:RB->base, BASE + | mov FCARG2, PC // Caveat: FCARG2 == BASE + | mov FCARG1, L:RB + | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc) + | mov BASE, L:RB->base + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | sub PC, 4 + | jmp ->cont_nop +#endif + | |//----------------------------------------------------------------------- |//-- Trace exit handler ------------------------------------------------- |//----------------------------------------------------------------------- @@ -2978,10 +2843,9 @@ static void build_subroutines(BuildCtx *ctx) | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 |.endif | // Caveat: RB is ebp. - | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)] + | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)] | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa - | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 | mov L:RB->base, BASE |.if X64WIN | lea CARG2, [rsp+4*8] @@ -2991,6 +2855,7 @@ static void build_subroutines(BuildCtx *ctx) | lea FCARG2, [esp+16] |.endif | lea FCARG1, [DISPATCH+GG_DISP2J] + | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) | // MULTRES or negated error code returned in eax (RD). | mov RAa, L:RB->cframe @@ -3037,12 +2902,14 @@ static void build_subroutines(BuildCtx *ctx) | mov r13, TMPa | mov r12, TMPQ |.endif - | test RD, RD; js >3 // Check for error from exit. + | test RD, RD; js >9 // Check for error from exit. + | mov L:RB, SAVE_L | mov MULTRES, RD | mov LFUNC:KBASE, [BASE-8] | mov KBASE, LFUNC:KBASE->pc | mov KBASE, [KBASE+PC2PROTO(k)] - | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0 + | mov L:RB->base, BASE + | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 | set_vmstate INTERP | // Modified copy of ins_next which handles function header dispatch, too. | mov RC, [PC] @@ -3051,18 +2918,35 @@ static void build_subroutines(BuildCtx *ctx) | add PC, 4 | shr RC, 16 | cmp OP, BC_FUNCF // Function header? - | jb >2 - | mov RC, MULTRES // RC/RD holds nres+1. + | jb >3 + | cmp OP, BC_FUNCC+2 // Fast function? + | jae >4 |2: + | mov RC, MULTRES // RC/RD holds nres+1. + |3: |.if X64 | jmp aword [DISPATCH+OP*8] |.else | jmp aword [DISPATCH+OP*4] |.endif | - |3: // Rethrow error from the right C frame. + |4: // Check frame below fast function. + | mov RC, [BASE-4] + | test RC, FRAME_TYPE + | jnz <2 // Trace stitching continuation? + | // Otherwise set KBASE for Lua function below fast function. + | movzx RC, byte [RC-3] + | not RCa + | mov LFUNC:KBASE, [BASE+RC*8-8] + | mov KBASE, LFUNC:KBASE->pc + | mov KBASE, [KBASE+PC2PROTO(k)] + | jmp <2 + | + |9: // Rethrow error from the right C frame. + | mov FCARG2, RD | mov FCARG1, L:RB - | call extern lj_err_run@4 // (lua_State *L) + | neg FCARG2 + | call extern lj_err_trace@8 // (lua_State *L, int errcode) |.endif | |//----------------------------------------------------------------------- @@ -3070,27 +2954,18 @@ static void build_subroutines(BuildCtx *ctx) |//----------------------------------------------------------------------- | |// FP value rounding. Called by math.floor/math.ceil fast functions - |// and from JIT code. - | - |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified. - |.macro vm_round_x87, mode1, mode2 - | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2. - | mov [esp+8], eax - | mov ax, mode1 - | or ax, [esp+4] - |.if mode2 ~= 0xffff - | and ax, mode2 - |.endif - | mov [esp+6], ax - | fldcw word [esp+6] - | frndint - | fldcw word [esp+4] - | mov eax, [esp+8] + |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. + |.macro vm_round, name, mode, cond + |->name: + |.if not X64 and cond + | movsd xmm0, qword [esp+4] + | call ->name .. _sse + | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. + | fld qword [esp+4] | ret - |.endmacro + |.endif | - |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. - |.macro vm_round_sse, mode + |->name .. _sse: | sseconst_abs xmm2, RDa | sseconst_2p52 xmm3, RDa | movaps xmm1, xmm0 @@ -3128,22 +3003,12 @@ static void build_subroutines(BuildCtx *ctx) | ret |.endmacro | - |.macro vm_round, name, ssemode, mode1, mode2 - |->name: - |.if not SSE - | vm_round_x87 mode1, mode2 - |.endif - |->name .. _sse: - | vm_round_sse ssemode - |.endmacro - | - | vm_round vm_floor, 0, 0x0400, 0xf7ff - | vm_round vm_ceil, 1, 0x0800, 0xfbff - | vm_round vm_trunc, 2, 0x0c00, 0xffff + | vm_round vm_floor, 0, 1 + | vm_round vm_ceil, 1, JIT + | vm_round vm_trunc, 2, JIT | |// FP modulo x%y. Called by BC_MOD* and vm_arith. |->vm_mod: - |.if SSE |// Args in xmm0/xmm1, return value in xmm0. |// Caveat: xmm0-xmm5 and RC (eax) modified! | movaps xmm5, xmm0 @@ -3171,488 +3036,6 @@ static void build_subroutines(BuildCtx *ctx) | movaps xmm0, xmm5 | subsd xmm0, xmm1 | ret - |.else - |// Args/ret on x87 stack (y on top). No xmm registers modified. - |// Caveat: needs 3 slots on x87 stack! RC (eax) modified! - | fld st1 - | fdiv st1 - | fnstcw word [esp+4] - | mov ax, 0x0400 - | or ax, [esp+4] - | and ax, 0xf7ff - | mov [esp+6], ax - | fldcw word [esp+6] - | frndint - | fldcw word [esp+4] - | fmulp st1 - | fsubp st1 - | ret - |.endif - | - |// FP log2(x). Called by math.log(x, base). - |->vm_log2: - |.if X64WIN - | movsd qword [rsp+8], xmm0 // Use scratch area. - | fld1 - | fld qword [rsp+8] - | fyl2x - | fstp qword [rsp+8] - | movsd xmm0, qword [rsp+8] - |.elif X64 - | movsd qword [rsp-8], xmm0 // Use red zone. - | fld1 - | fld qword [rsp-8] - | fyl2x - | fstp qword [rsp-8] - | movsd xmm0, qword [rsp-8] - |.else - | fld1 - | fld qword [esp+4] - | fyl2x - |.endif - | ret - | - |// FP exponentiation e^x and 2^x. Called by math.exp fast function and - |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified. - |// Caveat: needs 3 slots on x87 stack! - |->vm_exp_x87: - | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e)) - |->vm_exp2_x87: - | .if X64WIN - | .define expscratch, dword [rsp+8] // Use scratch area. - | .elif X64 - | .define expscratch, dword [rsp-8] // Use red zone. - | .else - | .define expscratch, dword [esp+4] // Needs 4 byte scratch area. - | .endif - | fst expscratch // Caveat: overwrites ARG1. - | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf - | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0 - |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check. - | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. - | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int - |1: - | ret - |2: - | fpop; fldz; ret - | - |// Generic power function x^y. Called by BC_POW, math.pow fast function, - |// and vm_arith. - |// Args/ret on x87 stack (y on top). RC (eax) modified. - |// Caveat: needs 3 slots on x87 stack! - |->vm_pow: - |.if not SSE - | fist dword [esp+4] // Store/reload int before comparison. - | fild dword [esp+4] // Integral exponent used in vm_powi. - | fucomip st1 - | jnz >8 // Branch for FP exponents. - | jp >9 // Branch for NaN exponent. - | fpop // Pop y and fallthrough to vm_powi. - | - |// FP/int power function x^i. Arg1/ret on x87 stack. - |// Arg2 (int) on C stack. RC (eax) modified. - |// Caveat: needs 2 slots on x87 stack! - | mov eax, [esp+4] - | cmp eax, 1; jle >6 // i<=1? - | // Now 1 < (unsigned)i <= 0x80000000. - |1: // Handle leading zeros. - | test eax, 1; jnz >2 - | fmul st0 - | shr eax, 1 - | jmp <1 - |2: - | shr eax, 1; jz >5 - | fdup - |3: // Handle trailing bits. - | fmul st0 - | shr eax, 1; jz >4 - | jnc <3 - | fmul st1, st0 - | jmp <3 - |4: - | fmulp st1 - |5: - | ret - |6: - | je <5 // x^1 ==> x - | jb >7 - | fld1; fdivrp st1 - | neg eax - | cmp eax, 1; je <5 // x^-1 ==> 1/x - | jmp <1 // x^-i ==> (1/x)^i - |7: - | fpop; fld1 // x^0 ==> 1 - | ret - | - |8: // FP/FP power function x^y. - | fst dword [esp+4] - | fxch - | fst dword [esp+8] - | mov eax, [esp+4]; shl eax, 1 - | cmp eax, 0xff000000; je >2 // x^+-Inf? - | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y? - | cmp eax, 0xff000000; je >4 // +-Inf^y? - | fyl2x - | jmp ->vm_exp2raw - | - |9: // Handle x^NaN. - | fld1 - | fucomip st2 - | je >1 // 1^NaN ==> 1 - | fxch // x^NaN ==> NaN - |1: - | fpop - | ret - | - |2: // Handle x^+-Inf. - | fabs - | fld1 - | fucomip st1 - | je >3 // +-1^+-Inf ==> 1 - | fpop; fabs; fldz; mov eax, 0; setc al - | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0 - | fxch - |3: - | fpop1; fabs - | ret - | - |4: // Handle +-0^y or +-Inf^y. - | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x| - | fpop; fpop - | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf - | fldz // y < 0, +-Inf^y ==> 0 - | ret - |5: - | mov dword [esp+4], 0x7f800000 // Return +Inf. - | fld dword [esp+4] - | ret - |.endif - | - |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified. - |// Needs 16 byte scratch area for x86. Also called from JIT code. - |->vm_pow_sse: - | cvtsd2si eax, xmm1 - | cvtsi2sd xmm2, eax - | ucomisd xmm1, xmm2 - | jnz >8 // Branch for FP exponents. - | jp >9 // Branch for NaN exponent. - | // Fallthrough to vm_powi_sse. - | - |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified. - |->vm_powi_sse: - | cmp eax, 1; jle >6 // i<=1? - | // Now 1 < (unsigned)i <= 0x80000000. - |1: // Handle leading zeros. - | test eax, 1; jnz >2 - | mulsd xmm0, xmm0 - | shr eax, 1 - | jmp <1 - |2: - | shr eax, 1; jz >5 - | movaps xmm1, xmm0 - |3: // Handle trailing bits. - | mulsd xmm0, xmm0 - | shr eax, 1; jz >4 - | jnc <3 - | mulsd xmm1, xmm0 - | jmp <3 - |4: - | mulsd xmm0, xmm1 - |5: - | ret - |6: - | je <5 // x^1 ==> x - | jb >7 // x^0 ==> 1 - | neg eax - | call <1 - | sseconst_1 xmm1, RDa - | divsd xmm1, xmm0 - | movaps xmm0, xmm1 - | ret - |7: - | sseconst_1 xmm0, RDa - | ret - | - |8: // FP/FP power function x^y. - |.if X64 - | movd rax, xmm1; shl rax, 1 - | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf? - | movd rax, xmm0; shl rax, 1; je >4 // +-0^y? - | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y? - | .if X64WIN - | movsd qword [rsp+16], xmm1 // Use scratch area. - | movsd qword [rsp+8], xmm0 - | fld qword [rsp+16] - | fld qword [rsp+8] - | .else - | movsd qword [rsp-16], xmm1 // Use red zone. - | movsd qword [rsp-8], xmm0 - | fld qword [rsp-16] - | fld qword [rsp-8] - | .endif - |.else - | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area. - | movsd qword [esp+4], xmm0 - | cmp dword [esp+12], 0; jne >1 - | mov eax, [esp+16]; shl eax, 1 - | cmp eax, 0xffe00000; je >2 // x^+-Inf? - |1: - | cmp dword [esp+4], 0; jne >1 - | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y? - | cmp eax, 0xffe00000; je >5 // +-Inf^y? - |1: - | fld qword [esp+12] - | fld qword [esp+4] - |.endif - | fyl2x // y*log2(x) - | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part. - | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int - |.if X64WIN - | fstp qword [rsp+8] // Use scratch area. - | movsd xmm0, qword [rsp+8] - |.elif X64 - | fstp qword [rsp-8] // Use red zone. - | movsd xmm0, qword [rsp-8] - |.else - | fstp qword [esp+4] // Needs 8 byte scratch area. - | movsd xmm0, qword [esp+4] - |.endif - | ret - | - |9: // Handle x^NaN. - | sseconst_1 xmm2, RDa - | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1 - | movaps xmm0, xmm1 // x^NaN ==> NaN - |1: - | ret - | - |2: // Handle x^+-Inf. - | sseconst_abs xmm2, RDa - | andpd xmm0, xmm2 // |x| - | sseconst_1 xmm2, RDa - | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1 - | movmskpd eax, xmm1 - | xorps xmm0, xmm0 - | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0 - |3: - | sseconst_hi xmm0, RDa, 7ff00000 // +Inf - | ret - | - |4: // Handle +-0^y. - | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf - | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0 - | ret - | - |5: // Handle +-Inf^y. - | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf - | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0 - | ret - | - |// Callable from C: double lj_vm_foldfpm(double x, int fpm) - |// Computes fpm(x) for extended math functions. ORDER FPM. - |->vm_foldfpm: - |.if JIT - |.if X64 - | .if X64WIN - | .define fpmop, CARG2d - | .else - | .define fpmop, CARG1d - | .endif - | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil - | cmp fpmop, 3; jb ->vm_trunc; ja >2 - | sqrtsd xmm0, xmm0; ret - |2: - | .if X64WIN - | movsd qword [rsp+8], xmm0 // Use scratch area. - | fld qword [rsp+8] - | .else - | movsd qword [rsp-8], xmm0 // Use red zone. - | fld qword [rsp-8] - | .endif - | cmp fpmop, 5; ja >2 - | .if X64WIN; pop rax; .endif - | je >1 - | call ->vm_exp_x87 - | .if X64WIN; push rax; .endif - | jmp >7 - |1: - | call ->vm_exp2_x87 - | .if X64WIN; push rax; .endif - | jmp >7 - |2: ; cmp fpmop, 7; je >1; ja >2 - | fldln2; fxch; fyl2x; jmp >7 - |1: ; fld1; fxch; fyl2x; jmp >7 - |2: ; cmp fpmop, 9; je >1; ja >2 - | fldlg2; fxch; fyl2x; jmp >7 - |1: ; fsin; jmp >7 - |2: ; cmp fpmop, 11; je >1; ja >9 - | fcos; jmp >7 - |1: ; fptan; fpop - |7: - | .if X64WIN - | fstp qword [rsp+8] // Use scratch area. - | movsd xmm0, qword [rsp+8] - | .else - | fstp qword [rsp-8] // Use red zone. - | movsd xmm0, qword [rsp-8] - | .endif - | ret - |.else // x86 calling convention. - | .define fpmop, eax - |.if SSE - | mov fpmop, [esp+12] - | movsd xmm0, qword [esp+4] - | cmp fpmop, 1; je >1; ja >2 - | call ->vm_floor; jmp >7 - |1: ; call ->vm_ceil; jmp >7 - |2: ; cmp fpmop, 3; je >1; ja >2 - | call ->vm_trunc; jmp >7 - |1: - | sqrtsd xmm0, xmm0 - |7: - | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. - | fld qword [esp+4] - | ret - |2: ; fld qword [esp+4] - | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 - |2: ; cmp fpmop, 7; je >1; ja >2 - | fldln2; fxch; fyl2x; ret - |1: ; fld1; fxch; fyl2x; ret - |2: ; cmp fpmop, 9; je >1; ja >2 - | fldlg2; fxch; fyl2x; ret - |1: ; fsin; ret - |2: ; cmp fpmop, 11; je >1; ja >9 - | fcos; ret - |1: ; fptan; fpop; ret - |.else - | mov fpmop, [esp+12] - | fld qword [esp+4] - | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil - | cmp fpmop, 3; jb ->vm_trunc; ja >2 - | fsqrt; ret - |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87 - | cmp fpmop, 7; je >1; ja >2 - | fldln2; fxch; fyl2x; ret - |1: ; fld1; fxch; fyl2x; ret - |2: ; cmp fpmop, 9; je >1; ja >2 - | fldlg2; fxch; fyl2x; ret - |1: ; fsin; ret - |2: ; cmp fpmop, 11; je >1; ja >9 - | fcos; ret - |1: ; fptan; fpop; ret - |.endif - |.endif - |9: ; int3 // Bad fpm. - |.endif - | - |// Callable from C: double lj_vm_foldarith(double x, double y, int op) - |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -) - |// and basic math functions. ORDER ARITH - |->vm_foldarith: - |.if X64 - | - | .if X64WIN - | .define foldop, CARG3d - | .else - | .define foldop, CARG1d - | .endif - | cmp foldop, 1; je >1; ja >2 - | addsd xmm0, xmm1; ret - |1: ; subsd xmm0, xmm1; ret - |2: ; cmp foldop, 3; je >1; ja >2 - | mulsd xmm0, xmm1; ret - |1: ; divsd xmm0, xmm1; ret - |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow - | cmp foldop, 7; je >1; ja >2 - | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret - |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret - |2: ; cmp foldop, 9; ja >2 - |.if X64WIN - | movsd qword [rsp+8], xmm0 // Use scratch area. - | movsd qword [rsp+16], xmm1 - | fld qword [rsp+8] - | fld qword [rsp+16] - |.else - | movsd qword [rsp-8], xmm0 // Use red zone. - | movsd qword [rsp-16], xmm1 - | fld qword [rsp-8] - | fld qword [rsp-16] - |.endif - | je >1 - | fpatan - |7: - |.if X64WIN - | fstp qword [rsp+8] // Use scratch area. - | movsd xmm0, qword [rsp+8] - |.else - | fstp qword [rsp-8] // Use red zone. - | movsd xmm0, qword [rsp-8] - |.endif - | ret - |1: ; fxch; fscale; fpop1; jmp <7 - |2: ; cmp foldop, 11; je >1; ja >9 - | minsd xmm0, xmm1; ret - |1: ; maxsd xmm0, xmm1; ret - |9: ; int3 // Bad op. - | - |.elif SSE // x86 calling convention with SSE ops. - | - | .define foldop, eax - | mov foldop, [esp+20] - | movsd xmm0, qword [esp+4] - | movsd xmm1, qword [esp+12] - | cmp foldop, 1; je >1; ja >2 - | addsd xmm0, xmm1 - |7: - | movsd qword [esp+4], xmm0 // Overwrite callee-owned args. - | fld qword [esp+4] - | ret - |1: ; subsd xmm0, xmm1; jmp <7 - |2: ; cmp foldop, 3; je >1; ja >2 - | mulsd xmm0, xmm1; jmp <7 - |1: ; divsd xmm0, xmm1; jmp <7 - |2: ; cmp foldop, 5 - | je >1; ja >2 - | call ->vm_mod; jmp <7 - |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area. - |2: ; cmp foldop, 7; je >1; ja >2 - | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7 - |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7 - |2: ; cmp foldop, 9; ja >2 - | fld qword [esp+4] // Reload from stack - | fld qword [esp+12] - | je >1 - | fpatan; ret - |1: ; fxch; fscale; fpop1; ret - |2: ; cmp foldop, 11; je >1; ja >9 - | minsd xmm0, xmm1; jmp <7 - |1: ; maxsd xmm0, xmm1; jmp <7 - |9: ; int3 // Bad op. - | - |.else // x86 calling convention with x87 ops. - | - | mov eax, [esp+20] - | fld qword [esp+4] - | fld qword [esp+12] - | cmp eax, 1; je >1; ja >2 - | faddp st1; ret - |1: ; fsubp st1; ret - |2: ; cmp eax, 3; je >1; ja >2 - | fmulp st1; ret - |1: ; fdivp st1; ret - |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow - | cmp eax, 7; je >1; ja >2 - | fpop; fchs; ret - |1: ; fpop; fabs; ret - |2: ; cmp eax, 9; je >1; ja >2 - | fpatan; ret - |1: ; fxch; fscale; fpop1; ret - |2: ; cmp eax, 11; je >1; ja >9 - | fucomi st1; fcmovnbe st1; fpop1; ret - |1: ; fucomi st1; fcmovbe st1; fpop1; ret - |9: ; int3 // Bad op. - | - |.endif | |//----------------------------------------------------------------------- |//-- Miscellaneous functions -------------------------------------------- @@ -3664,6 +3047,7 @@ static void build_subroutines(BuildCtx *ctx) | mov eax, CARG1d | .if X64WIN; push rsi; mov rsi, CARG2; .endif | push rbx + | xor ecx, ecx | cpuid | mov [rsi], eax | mov [rsi+4], ebx @@ -3687,6 +3071,7 @@ static void build_subroutines(BuildCtx *ctx) | mov eax, [esp+4] // Argument 1 is function number. | push edi | push ebx + | xor ecx, ecx | cpuid | mov edi, [esp+16] // Argument 2 is result area. | mov [edi], eax @@ -3699,6 +3084,86 @@ static void build_subroutines(BuildCtx *ctx) | ret |.endif | + |.define NEXT_TAB, TAB:FCARG1 + |.define NEXT_IDX, FCARG2 + |.define NEXT_PTR, RCa + |.define NEXT_PTRd, RC + |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro + |.if X64 + |.define NEXT_TMP, CARG3d + |.define NEXT_TMPq, CARG3 + |.define NEXT_ASIZE, CARG4d + |.macro NEXT_ENTER; .endmacro + |.macro NEXT_LEAVE; ret; .endmacro + |.if X64WIN + |.define NEXT_RES_PTR, [rsp+aword*5] + |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro + |.else + |.define NEXT_RES_PTR, [rsp+aword*1] + |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro + |.endif + |.else + |.define NEXT_ASIZE, esi + |.define NEXT_TMP, edi + |.macro NEXT_ENTER; push esi; push edi; .endmacro + |.macro NEXT_LEAVE; pop edi; pop esi; ret; .endmacro + |.define NEXT_RES_PTR, [esp+dword*3] + |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro + |.endif + | + |// TValue *lj_vm_next(GCtab *t, uint32_t idx) + |// Next idx returned in edx. + |->vm_next: + |.if JIT + | NEXT_ENTER + | mov NEXT_ASIZE, NEXT_TAB->asize + |1: // Traverse array part. + | cmp NEXT_IDX, NEXT_ASIZE; jae >5 + | mov NEXT_TMP, NEXT_TAB->array + | cmp dword [NEXT_TMP+NEXT_IDX*8+4], LJ_TNIL; je >2 + | lea NEXT_PTR, NEXT_RES_PTR + |.if X64 + | mov NEXT_TMPq, qword [NEXT_TMP+NEXT_IDX*8] + | mov qword [NEXT_PTR], NEXT_TMPq + |.else + | mov NEXT_ASIZE, dword [NEXT_TMP+NEXT_IDX*8+4] + | mov NEXT_TMP, dword [NEXT_TMP+NEXT_IDX*8] + | mov dword [NEXT_PTR+4], NEXT_ASIZE + | mov dword [NEXT_PTR], NEXT_TMP + |.endif + |.if DUALNUM + | mov dword [NEXT_PTR+dword*3], LJ_TISNUM + | mov dword [NEXT_PTR+dword*2], NEXT_IDX + |.else + | cvtsi2sd xmm0, NEXT_IDX + | movsd qword [NEXT_PTR+dword*2], xmm0 + |.endif + | NEXT_RES_IDX 1 + | NEXT_LEAVE + |2: // Skip holes in array part. + | add NEXT_IDX, 1 + | jmp <1 + | + |5: // Traverse hash part. + | sub NEXT_IDX, NEXT_ASIZE + |6: + | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9 + | imul NEXT_PTRd, NEXT_IDX, #NODE + | add NODE:NEXT_PTRd, dword NEXT_TAB->node + | cmp dword NODE:NEXT_PTR->val.it, LJ_TNIL; je >7 + | NEXT_RES_IDXL NEXT_ASIZE+1 + | NEXT_LEAVE + |7: // Skip holes in hash part. + | add NEXT_IDX, 1 + | jmp <6 + | + |9: // End of iteration. Set the key to nil (not the value). + | NEXT_RES_IDX NEXT_ASIZE + | lea NEXT_PTR, NEXT_RES_PTR + | mov dword [NEXT_PTR+dword*3], LJ_TNIL + | NEXT_LEAVE + |.endif + | |//----------------------------------------------------------------------- |//-- Assertions --------------------------------------------------------- |//----------------------------------------------------------------------- @@ -3964,19 +3429,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA is a number. | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp | // RA is a number, RD is an integer. - |.if SSE | cvtsi2sd xmm0, dword [BASE+RD*8] | jmp >2 - |.else - | fld qword [BASE+RA*8] - | fild dword [BASE+RD*8] - | jmp >3 - |.endif | |8: // RA is an integer, RD is not an integer. | ja ->vmeta_comp | // RA is an integer, RD is a number. - |.if SSE | cvtsi2sd xmm1, dword [BASE+RA*8] | movsd xmm0, qword [BASE+RD*8] | add PC, 4 @@ -3984,29 +3442,15 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | jmp_comp jbe, ja, jb, jae, <9 | jmp <6 |.else - | fild dword [BASE+RA*8] - | jmp >2 - |.endif - |.else | checknum RA, ->vmeta_comp | checknum RD, ->vmeta_comp |.endif - |.if SSE |1: | movsd xmm0, qword [BASE+RD*8] |2: | add PC, 4 | ucomisd xmm0, qword [BASE+RA*8] |3: - |.else - |1: - | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A. - |2: - | fld qword [BASE+RD*8] - |3: - | add PC, 4 - | fcomparepp - |.endif | // Unordered: all of ZF CF PF set, ordered: PF clear. | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. |.if DUALNUM @@ -4046,43 +3490,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RD is a number. | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 | // RD is a number, RA is an integer. - |.if SSE | cvtsi2sd xmm0, dword [BASE+RA*8] - |.else - | fild dword [BASE+RA*8] - |.endif | jmp >2 | |8: // RD is an integer, RA is not an integer. | ja >5 | // RD is an integer, RA is a number. - |.if SSE | cvtsi2sd xmm0, dword [BASE+RD*8] | ucomisd xmm0, qword [BASE+RA*8] - |.else - | fild dword [BASE+RD*8] - | fld qword [BASE+RA*8] - |.endif | jmp >4 | |.else | cmp RB, LJ_TISNUM; jae >5 | checknum RA, >5 |.endif - |.if SSE |1: | movsd xmm0, qword [BASE+RA*8] |2: | ucomisd xmm0, qword [BASE+RD*8] |4: - |.else - |1: - | fld qword [BASE+RA*8] - |2: - | fld qword [BASE+RD*8] - |4: - | fcomparepp - |.endif iseqne_fp: if (vk) { | jp >2 // Unordered means not equal. @@ -4205,39 +3631,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // RA is a number. | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 | // RA is a number, RD is an integer. - |.if SSE | cvtsi2sd xmm0, dword [KBASE+RD*8] - |.else - | fild dword [KBASE+RD*8] - |.endif | jmp >2 | |8: // RA is an integer, RD is a number. - |.if SSE | cvtsi2sd xmm0, dword [BASE+RA*8] | ucomisd xmm0, qword [KBASE+RD*8] - |.else - | fild dword [BASE+RA*8] - | fld qword [KBASE+RD*8] - |.endif | jmp >4 |.else | cmp RB, LJ_TISNUM; jae >3 |.endif - |.if SSE |1: | movsd xmm0, qword [KBASE+RD*8] |2: | ucomisd xmm0, qword [BASE+RA*8] |4: - |.else - |1: - | fld qword [KBASE+RD*8] - |2: - | fld qword [BASE+RA*8] - |4: - | fcomparepp - |.endif goto iseqne_fp; case BC_ISEQP: case BC_ISNEP: vk = op == BC_ISEQP; @@ -4288,6 +3696,18 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_next break; + case BC_ISTYPE: + | ins_AD // RA = src, RD = -type + | add RD, [BASE+RA*8+4] + | jne ->vmeta_istype + | ins_next + break; + case BC_ISNUM: + | ins_AD // RA = src, RD = -(TISNUM-1) + | checknum RA, ->vmeta_istype + | ins_next + break; + /* -- Unary ops --------------------------------------------------------- */ case BC_MOV: @@ -4331,16 +3751,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.else | checknum RD, ->vmeta_unm |.endif - |.if SSE | movsd xmm0, qword [BASE+RD*8] | sseconst_sign xmm1, RDa | xorps xmm0, xmm1 | movsd qword [BASE+RA*8], xmm0 - |.else - | fld qword [BASE+RD*8] - | fchs - | fstp qword [BASE+RA*8] - |.endif |.if DUALNUM | jmp <9 |.else @@ -4356,15 +3770,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |1: | mov dword [BASE+RA*8+4], LJ_TISNUM | mov dword [BASE+RA*8], RD - |.elif SSE + |.else | xorps xmm0, xmm0 | cvtsi2sd xmm0, dword STR:RD->len |1: | movsd qword [BASE+RA*8], xmm0 - |.else - | fild dword STR:RD->len - |1: - | fstp qword [BASE+RA*8] |.endif | ins_next |2: @@ -4382,11 +3792,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // Length of table returned in eax (RD). |.if DUALNUM | // Nothing to do. - |.elif SSE - | cvtsi2sd xmm0, RD |.else - | mov ARG1, RD - | fild ARG1 + | cvtsi2sd xmm0, RD |.endif | mov BASE, RB // Restore BASE. | movzx RA, PC_RA @@ -4401,7 +3808,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) /* -- Binary ops -------------------------------------------------------- */ - |.macro ins_arithpre, x87ins, sseins, ssereg + |.macro ins_arithpre, sseins, ssereg | ins_ABC ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); ||switch (vk) { @@ -4410,37 +3817,22 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | .if DUALNUM | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn | .endif - | .if SSE - | movsd xmm0, qword [BASE+RB*8] - | sseins ssereg, qword [KBASE+RC*8] - | .else - | fld qword [BASE+RB*8] - | x87ins qword [KBASE+RC*8] - | .endif + | movsd xmm0, qword [BASE+RB*8] + | sseins ssereg, qword [KBASE+RC*8] || break; ||case 1: | checknum RB, ->vmeta_arith_nv | .if DUALNUM | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv | .endif - | .if SSE - | movsd xmm0, qword [KBASE+RC*8] - | sseins ssereg, qword [BASE+RB*8] - | .else - | fld qword [KBASE+RC*8] - | x87ins qword [BASE+RB*8] - | .endif + | movsd xmm0, qword [KBASE+RC*8] + | sseins ssereg, qword [BASE+RB*8] || break; ||default: | checknum RB, ->vmeta_arith_vv | checknum RC, ->vmeta_arith_vv - | .if SSE - | movsd xmm0, qword [BASE+RB*8] - | sseins ssereg, qword [BASE+RC*8] - | .else - | fld qword [BASE+RB*8] - | x87ins qword [BASE+RC*8] - | .endif + | movsd xmm0, qword [BASE+RB*8] + | sseins ssereg, qword [BASE+RC*8] || break; ||} |.endmacro @@ -4478,55 +3870,62 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endmacro | |.macro ins_arithpost - |.if SSE | movsd qword [BASE+RA*8], xmm0 - |.else - | fstp qword [BASE+RA*8] - |.endif |.endmacro | - |.macro ins_arith, x87ins, sseins - | ins_arithpre x87ins, sseins, xmm0 + |.macro ins_arith, sseins + | ins_arithpre sseins, xmm0 | ins_arithpost | ins_next |.endmacro | - |.macro ins_arith, intins, x87ins, sseins + |.macro ins_arith, intins, sseins |.if DUALNUM | ins_arithdn intins |.else - | ins_arith, x87ins, sseins + | ins_arith, sseins |.endif |.endmacro | // RA = dst, RB = src1 or num const, RC = src2 or num const case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: - | ins_arith add, fadd, addsd + | ins_arith add, addsd break; case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: - | ins_arith sub, fsub, subsd + | ins_arith sub, subsd break; case BC_MULVN: case BC_MULNV: case BC_MULVV: - | ins_arith imul, fmul, mulsd + | ins_arith imul, mulsd break; case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: - | ins_arith fdiv, divsd + | ins_arith divsd break; case BC_MODVN: - | ins_arithpre fld, movsd, xmm1 + | ins_arithpre movsd, xmm1 |->BC_MODVN_Z: | call ->vm_mod | ins_arithpost | ins_next break; case BC_MODNV: case BC_MODVV: - | ins_arithpre fld, movsd, xmm1 + | ins_arithpre movsd, xmm1 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. break; case BC_POW: - | ins_arithpre fld, movsd, xmm1 - | call ->vm_pow + | ins_arithpre movsd, xmm1 + | mov RB, BASE + |.if not X64 + | movsd FPARG1, xmm0 + | movsd FPARG3, xmm1 + |.endif + | call extern pow + | movzx RA, PC_RA + | mov BASE, RB + |.if X64 | ins_arithpost + |.else + | fstp qword [BASE+RA*8] + |.endif | ins_next break; @@ -4594,25 +3993,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | movsx RD, RDW | mov dword [BASE+RA*8+4], LJ_TISNUM | mov dword [BASE+RA*8], RD - |.elif SSE + |.else | movsx RD, RDW // Sign-extend literal. | cvtsi2sd xmm0, RD | movsd qword [BASE+RA*8], xmm0 - |.else - | fild PC_RD // Refetch signed RD from instruction. - | fstp qword [BASE+RA*8] |.endif | ins_next break; case BC_KNUM: | ins_AD // RA = dst, RD = num const - |.if SSE | movsd xmm0, qword [KBASE+RD*8] | movsd qword [BASE+RA*8], xmm0 - |.else - | fld qword [KBASE+RD*8] - | fstp qword [BASE+RA*8] - |.endif | ins_next break; case BC_KPRI: @@ -4719,18 +4110,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_USETN: | ins_AD // RA = upvalue #, RD = num const | mov LFUNC:RB, [BASE-8] - |.if SSE | movsd xmm0, qword [KBASE+RD*8] - |.else - | fld qword [KBASE+RD*8] - |.endif | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | mov RA, UPVAL:RB->v - |.if SSE | movsd qword [RA], xmm0 - |.else - | fstp qword [RA] - |.endif | ins_next break; case BC_USETP: @@ -4884,18 +4267,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.else | // Convert number to int and back and compare. | checknum RC, >5 - |.if SSE | movsd xmm0, qword [BASE+RC*8] - | cvtsd2si RC, xmm0 + | cvttsd2si RC, xmm0 | cvtsi2sd xmm1, RC | ucomisd xmm0, xmm1 - |.else - | fld qword [BASE+RC*8] - | fist ARG1 - | fild ARG1 - | fcomparepp - | mov RC, ARG1 - |.endif | jne ->vmeta_tgetv // Generic numeric key? Use fallback. |.endif | cmp RC, TAB:RB->asize // Takes care of unordered, too. @@ -4941,7 +4316,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov TAB:RB, [BASE+RB*8] |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. | mov RA, TAB:RB->hmask - | and RA, STR:RC->hash + | and RA, STR:RC->sid | imul RA, #NODE | add NODE:RA, TAB:RB->node |1: @@ -5019,6 +4394,32 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov dword [BASE+RA*8+4], LJ_TNIL | jmp <1 break; + case BC_TGETR: + | ins_ABC // RA = dst, RB = table, RC = key + | mov TAB:RB, [BASE+RB*8] + |.if DUALNUM + | mov RC, dword [BASE+RC*8] + |.else + | cvttsd2si RC, qword [BASE+RC*8] + |.endif + | cmp RC, TAB:RB->asize + | jae ->vmeta_tgetr // Not in array part? Use fallback. + | shl RC, 3 + | add RC, TAB:RB->array + | // Get array slot. + |->BC_TGETR_Z: + |.if X64 + | mov RBa, [RC] + | mov [BASE+RA*8], RBa + |.else + | mov RB, [RC] + | mov RC, [RC+4] + | mov [BASE+RA*8], RB + | mov [BASE+RA*8+4], RC + |.endif + |->BC_TGETR2_Z: + | ins_next + break; case BC_TSETV: | ins_ABC // RA = src, RB = table, RC = key @@ -5032,18 +4433,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.else | // Convert number to int and back and compare. | checknum RC, >5 - |.if SSE | movsd xmm0, qword [BASE+RC*8] - | cvtsd2si RC, xmm0 + | cvttsd2si RC, xmm0 | cvtsi2sd xmm1, RC | ucomisd xmm0, xmm1 - |.else - | fld qword [BASE+RC*8] - | fist ARG1 - | fild ARG1 - | fcomparepp - | mov RC, ARG1 - |.endif | jne ->vmeta_tsetv // Generic numeric key? Use fallback. |.endif | cmp RC, TAB:RB->asize // Takes care of unordered, too. @@ -5094,7 +4487,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov TAB:RB, [BASE+RB*8] |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. | mov RA, TAB:RB->hmask - | and RA, STR:RC->hash + | and RA, STR:RC->sid | imul RA, #NODE | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. | add NODE:RA, TAB:RB->node @@ -5213,6 +4606,39 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | movzx RA, PC_RA // Restore RA. | jmp <2 break; + case BC_TSETR: + | ins_ABC // RA = src, RB = table, RC = key + | mov TAB:RB, [BASE+RB*8] + |.if DUALNUM + | mov RC, dword [BASE+RC*8] + |.else + | cvttsd2si RC, qword [BASE+RC*8] + |.endif + | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) + | jnz >7 + |2: + | cmp RC, TAB:RB->asize + | jae ->vmeta_tsetr + | shl RC, 3 + | add RC, TAB:RB->array + | // Set array slot. + |->BC_TSETR_Z: + |.if X64 + | mov RBa, [BASE+RA*8] + | mov [RC], RBa + |.else + | mov RB, [BASE+RA*8+4] + | mov RA, [BASE+RA*8] + | mov [RC+4], RB + | mov [RC], RA + |.endif + | ins_next + | + |7: // Possible table write barrier for the value. Skip valiswhite check. + | barrierback TAB:RB, RA + | movzx RA, PC_RA // Restore RA. + | jmp <2 + break; case BC_TSETM: | ins_AD // RA = base (table at base-1), RD = num const (start index) @@ -5389,10 +4815,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ITERN: - | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) |.if JIT - | // NYI: add hotloop, record BC_ITERN. + | hotloop RB |.endif + |->vm_IITERN: + | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | mov TMP1, KBASE // Need two more free registers. | mov TMP2, DISPATCH | mov TAB:RB, [BASE+RA*8-16] @@ -5406,10 +4833,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.if DUALNUM | mov dword [BASE+RA*8+4], LJ_TISNUM | mov dword [BASE+RA*8], RC - |.elif SSE - | cvtsi2sd xmm0, RC |.else - | fild dword [BASE+RA*8-8] + | cvtsi2sd xmm0, RC |.endif | // Copy array slot to returned value. |.if X64 @@ -5425,10 +4850,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // Return array index as a numeric key. |.if DUALNUM | // See above. - |.elif SSE - | movsd qword [BASE+RA*8], xmm0 |.else - | fstp qword [BASE+RA*8] + | movsd qword [BASE+RA*8], xmm0 |.endif | mov [BASE+RA*8-8], RC // Update control var. |2: @@ -5441,9 +4864,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |4: // Skip holes in array part. | add RC, 1 - |.if not (DUALNUM or SSE) - | mov [BASE+RA*8-8], RC - |.endif | jmp <1 | |5: // Traverse hash part. @@ -5487,14 +4907,28 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 | branchPC RD | mov dword [BASE+RA*8-8], 0 // Initialize control var. - | mov dword [BASE+RA*8-4], 0xfffe7fff + | mov dword [BASE+RA*8-4], LJ_KEYINDEX |1: | ins_next |5: // Despecialize bytecode if any of the checks fail. | mov PC_OP, BC_JMP | branchPC RD + |.if JIT + | cmp byte [PC], BC_ITERN + | jne >6 + |.endif | mov byte [PC], BC_ITERC | jmp <1 + |.if JIT + |6: // Unpatch JLOOP. + | mov RA, [DISPATCH+DISPATCH_J(trace)] + | movzx RC, word [PC+2] + | mov TRACE:RA, [RA+RC*4] + | mov eax, TRACE:RA->startins + | mov al, BC_ITERC + | mov dword [PC], eax + | jmp <1 + |.endif break; case BC_VARG: @@ -5777,7 +5211,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) if (!vk) { | cmp RB, LJ_TISNUM; jae ->vmeta_for } - |.if SSE | movsd xmm0, qword FOR_IDX | movsd xmm1, qword FOR_STOP if (vk) { @@ -5790,22 +5223,6 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ucomisd xmm1, xmm0 |1: | movsd qword FOR_EXT, xmm0 - |.else - | fld qword FOR_STOP - | fld qword FOR_IDX - if (vk) { - | fadd qword FOR_STEP // nidx = idx + step - | fst qword FOR_IDX - | fst qword FOR_EXT - | test RB, RB; js >1 - } else { - | fst qword FOR_EXT - | jl >1 - } - | fxch // Swap lim/(n)idx if step non-negative. - |1: - | fcomparepp - |.endif if (op == BC_FORI) { |.if DUALNUM | jnb <7 @@ -5833,11 +5250,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |2: | ins_next |.endif - |.if SSE + | |3: // Invert comparison if step is negative. | ucomisd xmm0, xmm1 | jmp <1 - |.endif break; case BC_ITERL: @@ -5875,7 +5291,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_A // RA = base, RD = target (loop extent) | // Note: RA/RD is only used by trace recorder to determine scope/extent | // This opcode does NOT jump, it's only purpose is to detect a hot loop. - |.if JIT + |.if JIT | hotloop RB |.endif | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. @@ -5894,7 +5310,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov RDa, TRACE:RD->mcode | mov L:RB, SAVE_L | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE - | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB + | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB | // Save additional callee-save registers only used in compiled code. |.if X64WIN | mov TMPQ, r12 @@ -6061,9 +5477,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // (lua_State *L, lua_CFunction f) | call aword [DISPATCH+DISPATCH_GL(wrapf)] } - | set_vmstate INTERP | // nresults returned in eax (RD). | mov BASE, L:RB->base + | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB + | set_vmstate INTERP | lea RA, [BASE+RD*8] | neg RA | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 @@ -6176,7 +5593,7 @@ static void emit_asm_debug(BuildCtx *ctx) ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); #endif #if !LJ_NO_UNWIND -#if (defined(__sun__) && defined(__svr4__)) +#if LJ_TARGET_SOLARIS #if LJ_64 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); #else @@ -6383,15 +5800,21 @@ static void emit_asm_debug(BuildCtx *ctx) "LEFDEY:\n\n", fcsize); } #endif -#if LJ_64 - fprintf(ctx->fp, "\t.subsections_via_symbols\n"); -#else +#if !LJ_64 fprintf(ctx->fp, "\t.non_lazy_symbol_pointer\n" "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" ".indirect_symbol _lj_err_unwind_dwarf\n" - ".long 0\n"); + ".long 0\n\n"); + fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n"); + { + const char *const *xn; + for (xn = ctx->extnames; *xn; xn++) + if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) + fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn); + } #endif + fprintf(ctx->fp, ".subsections_via_symbols\n"); } break; #endif diff --git a/src/xb1build.bat b/src/xb1build.bat new file mode 100644 index 00000000..2eb68171 --- /dev/null +++ b/src/xb1build.bat @@ -0,0 +1,101 @@ +@rem Script to build LuaJIT with the Xbox One SDK.
+@rem Donated to the public domain.
+@rem
+@rem Open a "Visual Studio .NET Command Prompt" (64 bit host compiler)
+@rem Then cd to this directory and run this script.
+
+@if not defined INCLUDE goto :FAIL
+@if not defined DurangoXDK goto :FAIL
+
+@setlocal
+@echo ---- Host compiler ----
+@set LJCOMPILE=cl /nologo /c /MD /O2 /W3 /D_CRT_SECURE_NO_DEPRECATE
+@set LJLINK=link /nologo
+@set LJMT=mt /nologo
+@set DASMDIR=..\dynasm
+@set DASM=%DASMDIR%\dynasm.lua
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
+
+%LJCOMPILE% host\minilua.c
+@if errorlevel 1 goto :BAD
+%LJLINK% /out:minilua.exe minilua.obj
+@if errorlevel 1 goto :BAD
+if exist minilua.exe.manifest^
+ %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe
+
+@rem Error out for 64 bit host compiler
+@minilua
+@if not errorlevel 8 goto :FAIL
+
+@set DASMFLAGS=-D WIN -D FFI -D P64
+minilua %DASM% -LN %DASMFLAGS% -o host\buildvm_arch.h vm_x64.dasc
+@if errorlevel 1 goto :BAD
+
+%LJCOMPILE% /I "." /I %DASMDIR% /D_DURANGO host\buildvm*.c
+@if errorlevel 1 goto :BAD
+%LJLINK% /out:buildvm.exe buildvm*.obj
+@if errorlevel 1 goto :BAD
+if exist buildvm.exe.manifest^
+ %LJMT% -manifest buildvm.exe.manifest -outputresource:buildvm.exe
+
+buildvm -m peobj -o lj_vm.obj
+@if errorlevel 1 goto :BAD
+buildvm -m bcdef -o lj_bcdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m ffdef -o lj_ffdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m libdef -o lj_libdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m recdef -o lj_recdef.h %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m vmdef -o jit\vmdef.lua %ALL_LIB%
+@if errorlevel 1 goto :BAD
+buildvm -m folddef -o lj_folddef.h lj_opt_fold.c
+@if errorlevel 1 goto :BAD
+
+@echo ---- Cross compiler ----
+
+@set CWD=%cd%
+@call "%DurangoXDK%\xdk\DurangoVars.cmd" XDK
+@cd /D "%CWD%"
+@shift
+
+@set LJCOMPILE="cl" /nologo /c /W3 /GF /Gm- /GR- /GS- /Gy /openmp- /D_CRT_SECURE_NO_DEPRECATE /D_LIB /D_UNICODE /D_DURANGO
+@set LJLIB="lib" /nologo
+
+@if "%1"=="debug" (
+ @shift
+ @set LJCOMPILE=%LJCOMPILE% /Zi /MDd /Od
+ @set LJLINK=%LJLINK% /debug
+) else (
+ @set LJCOMPILE=%LJCOMPILE% /MD /O2 /DNDEBUG
+)
+
+@if "%1"=="amalg" goto :AMALG
+%LJCOMPILE% /DLUA_BUILD_AS_DLL lj_*.c lib_*.c
+@if errorlevel 1 goto :BAD
+%LJLIB% /OUT:luajit.lib lj_*.obj lib_*.obj
+@if errorlevel 1 goto :BAD
+@goto :NOAMALG
+:AMALG
+%LJCOMPILE% /DLUA_BUILD_AS_DLL ljamalg.c
+@if errorlevel 1 goto :BAD
+%LJLIB% /OUT:luajit.lib ljamalg.obj lj_vm.obj
+@if errorlevel 1 goto :BAD
+:NOAMALG
+
+@del *.obj *.manifest minilua.exe buildvm.exe
+@echo.
+@echo === Successfully built LuaJIT for Xbox One ===
+
+@goto :END
+:BAD
+@echo.
+@echo *******************************************************
+@echo *** Build FAILED -- Please check the error messages ***
+@echo *******************************************************
+@goto :END
+:FAIL
+@echo To run this script you must open a "Visual Studio .NET Command Prompt"
+@echo (64 bit host compiler). The Xbox One SDK must be installed, too.
+:END
diff --git a/src/xedkbuild.bat b/src/xedkbuild.bat index 240ec878..37322d03 100644 --- a/src/xedkbuild.bat +++ b/src/xedkbuild.bat @@ -14,7 +14,7 @@ @set LJMT=mt /nologo
@set DASMDIR=..\dynasm
@set DASM=%DASMDIR%\dynasm.lua
-@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c
+@set ALL_LIB=lib_base.c lib_math.c lib_bit.c lib_string.c lib_table.c lib_io.c lib_os.c lib_package.c lib_debug.c lib_jit.c lib_ffi.c lib_buffer.c
%LJCOMPILE% host\minilua.c
@if errorlevel 1 goto :BAD
|