diff options
author | John Högberg <john@erlang.org> | 2022-01-11 14:01:48 +0100 |
---|---|---|
committer | John Högberg <john@erlang.org> | 2022-01-18 15:23:58 +0100 |
commit | 055e21851725e51c691cfb5654e5a8a58dc30386 (patch) | |
tree | e23096f4795573592b89f81adda0a9bd61aa2499 /erts/emulator/asmjit/x86/x86assembler.h | |
parent | a9225fd951aec7271839cba732cb759db24a84d7 (diff) | |
download | erlang-055e21851725e51c691cfb5654e5a8a58dc30386.tar.gz |
jit: Update asmjit version
Diffstat (limited to 'erts/emulator/asmjit/x86/x86assembler.h')
-rw-r--r-- | erts/emulator/asmjit/x86/x86assembler.h | 270 |
1 files changed, 106 insertions, 164 deletions
diff --git a/erts/emulator/asmjit/x86/x86assembler.h b/erts/emulator/asmjit/x86/x86assembler.h index 8cd10143b0..dbffae6289 100644 --- a/erts/emulator/asmjit/x86/x86assembler.h +++ b/erts/emulator/asmjit/x86/x86assembler.h @@ -1,25 +1,7 @@ -// AsmJit - Machine code generation for C++ +// This file is part of AsmJit project <https://asmjit.com> // -// * Official AsmJit Home Page: https://asmjit.com -// * Official Github Repository: https://github.com/asmjit/asmjit -// -// Copyright (c) 2008-2020 The AsmJit Authors -// -// This software is provided 'as-is', without any express or implied -// warranty. In no event will the authors be held liable for any damages -// arising from the use of this software. -// -// Permission is granted to anyone to use this software for any purpose, -// including commercial applications, and to alter it and redistribute it -// freely, subject to the following restrictions: -// -// 1. The origin of this software must not be misrepresented; you must not -// claim that you wrote the original software. If you use this software -// in a product, an acknowledgment in the product documentation would be -// appreciated but is not required. -// 2. Altered source versions must be plainly marked as such, and must not be -// misrepresented as being the original software. -// 3. This notice may not be removed or altered from any source distribution. +// See asmjit.h or LICENSE.md for license and copyright information +// SPDX-License-Identifier: Zlib #ifndef ASMJIT_X86_X86ASSEMBLER_H_INCLUDED #define ASMJIT_X86_X86ASSEMBLER_H_INCLUDED @@ -33,21 +15,15 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) //! \addtogroup asmjit_x86 //! \{ -// ============================================================================ -// [asmjit::Assembler] -// ============================================================================ - //! X86/X64 assembler implementation. //! -//! x86::Assembler is a code emitter that emits machine code directly into the -//! \ref CodeBuffer. The assembler is capable of targeting both 32-bit and 64-bit -//! instruction sets, the instruction set can be configured through \ref CodeHolder. +//! x86::Assembler is a code emitter that emits machine code directly into the \ref CodeBuffer. The assembler is capable +//! of targeting both 32-bit and 64-bit instruction sets, the instruction set can be configured through \ref CodeHolder. //! //! ### Basics //! -//! The following example shows a basic use of `x86::Assembler`, how to generate -//! a function that works in both 32-bit and 64-bit modes, and how to connect -//! \ref JitRuntime, \ref CodeHolder, and `x86::Assembler`. +//! The following example shows a basic use of `x86::Assembler`, how to generate a function that works in both 32-bit +//! and 64-bit modes, and how to connect \ref JitRuntime, \ref CodeHolder, and `x86::Assembler`. //! //! ``` //! #include <asmjit/x86.h> @@ -122,32 +98,26 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) //! } //! ``` //! -//! The example should be self-explanatory. It shows how to work with labels, -//! how to use operands, and how to emit instructions that can use different -//! registers based on runtime selection. It implements 32-bit CDECL, WIN64, +//! The example should be self-explanatory. It shows how to work with labels, how to use operands, and how to emit +//! instructions that can use different registers based on runtime selection. It implements 32-bit CDECL, WIN64, //! and SysV64 caling conventions and will work on most X86/X64 environments. //! -//! Although functions prologs / epilogs can be implemented manually, AsmJit -//! provides utilities that can be used to create function prologs and epilogs -//! automatically, see \ref asmjit_function for more details. +//! Although functions prologs / epilogs can be implemented manually, AsmJit provides utilities that can be used +//! to create function prologs and epilogs automatically, see \ref asmjit_function for more details. //! //! ### Instruction Validation //! -//! Assembler prefers speed over strictness by default. The implementation checks -//! the type of operands and fails if the signature of types is invalid, however, -//! it does only basic checks regarding registers and their groups used in -//! instructions. It's possible to pass operands that don't form any valid -//! signature to the implementation and succeed. This is usually not a problem -//! as Assembler provides typed API so operand types are normally checked by C++ -//! compiler at compile time, however, Assembler is fully dynamic and its \ref -//! emit() function can be called with any instruction id, options, and operands. -//! Moreover, it's also possible to form instructions that will be accepted by -//! the typed API, for example by calling `mov(x86::eax, x86::al)` - the C++ -//! compiler won't see a problem as both EAX and AL are \ref Gp registers. -//! -//! To help with common mistakes AsmJit allows to activate instruction validation. -//! This feature instruments the Assembler to call \ref InstAPI::validate() before -//! it attempts to encode any instruction. +//! Assembler prefers speed over strictness by default. The implementation checks the type of operands and fails +//! if the signature of types is invalid, however, it does only basic checks regarding registers and their groups +//! used in instructions. It's possible to pass operands that don't form any valid signature to the implementation +//! and succeed. This is usually not a problem as Assembler provides typed API so operand types are normally checked +//! by C++ compiler at compile time, however, Assembler is fully dynamic and its \ref emit() function can be called +//! with any instruction id, options, and operands. Moreover, it's also possible to form instructions that will be +//! accepted by the typed API, for example by calling `mov(x86::eax, x86::al)` - the C++ compiler won't see a problem +//! as both EAX and AL are \ref Gp registers. +//! +//! To help with common mistakes AsmJit allows to activate instruction validation. This feature instruments +//! the Assembler to call \ref InstAPI::validate() before it attempts to encode any instruction. //! //! The example below illustrates how validation can be turned on: //! @@ -165,7 +135,7 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) //! x86::Assembler a(&code); // Create and attach x86::Assembler to code. //! //! // Enable strict validation. -//! a.addValidationOptions(BaseEmitter::kValidationOptionAssembler); +//! a.addDiagnosticOptions(DiagnosticOptions::kValidateAssembler); //! //! // Try to encode invalid or ill-formed instructions. //! Error err; @@ -188,11 +158,10 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) //! //! ### Native Registers //! -//! All emitters provide functions to construct machine-size registers depending -//! on the target. This feature is for users that want to write code targeting -//! both 32-bit and 64-bit architectures at the same time. In AsmJit terminology -//! such registers have prefix `z`, so for example on X86 architecture the -//! following native registers are provided: +//! All emitters provide functions to construct machine-size registers depending on the target. This feature is +//! for users that want to write code targeting both 32-bit and 64-bit architectures at the same time. In AsmJit +//! terminology such registers have prefix `z`, so for example on X86 architecture the following native registers +//! are provided: //! //! - `zax` - mapped to either `eax` or `rax` //! - `zbx` - mapped to either `ebx` or `rbx` @@ -203,8 +172,8 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) //! - `zsi` - mapped to either `esi` or `rsi` //! - `zdi` - mapped to either `edi` or `rdi` //! -//! They are accessible through \ref x86::Assembler, \ref x86::Builder, and -//! \ref x86::Compiler. The example below illustrates how to use this feature: +//! They are accessible through \ref x86::Assembler, \ref x86::Builder, and \ref x86::Compiler. The example below +//! illustrates how to use this feature: //! //! ``` //! #include <asmjit/x86.h> @@ -253,11 +222,9 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) //! } //! ``` //! -//! The example just returns `0`, but the function generated contains a standard -//! prolog and epilog sequence and the function itself reserves 32 bytes of local -//! stack. The advantage is clear - a single code-base can handle multiple targets -//! easily. If you want to create a register of native size dynamically by -//! specifying its id it's also possible: +//! The example just returns `0`, but the function generated contains a standard prolog and epilog sequence and the +//! function itself reserves 32 bytes of local stack. The advantage is clear - a single code-base can handle multiple +//! targets easily. If you want to create a register of native size dynamically by specifying its id it's also possible: //! //! ``` //! void example(x86::Assembler& a) { @@ -274,14 +241,8 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) //! //! ### Data Embedding //! -//! x86::Assembler extends the standard \ref BaseAssembler with X86/X64 specific -//! conventions that are often used by assemblers to embed data next to the code. -//! The following functions can be used to embed data: -//! -//! - \ref x86::Assembler::db() - embeds byte (8 bits) (x86 naming). -//! - \ref x86::Assembler::dw() - embeds word (16 bits) (x86 naming). -//! - \ref x86::Assembler::dd() - embeds dword (32 bits) (x86 naming). -//! - \ref x86::Assembler::dq() - embeds qword (64 bits) (x86 naming). +//! x86::Assembler extends the standard \ref BaseAssembler with X86/X64 specific conventions that are often used by +//! assemblers to embed data next to the code. The following functions can be used to embed data: //! //! - \ref BaseAssembler::embedInt8() - embeds int8_t (portable naming). //! - \ref BaseAssembler::embedUInt8() - embeds uint8_t (portable naming). @@ -294,6 +255,11 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) //! - \ref BaseAssembler::embedFloat() - embeds float (portable naming). //! - \ref BaseAssembler::embedDouble() - embeds double (portable naming). //! +//! - \ref x86::Assembler::db() - embeds byte (8 bits) (x86 naming). +//! - \ref x86::Assembler::dw() - embeds word (16 bits) (x86 naming). +//! - \ref x86::Assembler::dd() - embeds dword (32 bits) (x86 naming). +//! - \ref x86::Assembler::dq() - embeds qword (64 bits) (x86 naming). +//! //! The following example illustrates how embed works: //! //! ``` @@ -308,8 +274,8 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) //! } //! ``` //! -//! Sometimes it's required to read the data that is embedded after code, for -//! example. This can be done through \ref Label as shown below: +//! Sometimes it's required to read the data that is embedded after code, for example. This can be done through +//! \ref Label as shown below: //! //! ``` //! #include <asmjit/x86.h> @@ -335,17 +301,14 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) //! //! ### Label Embedding //! -//! It's also possible to embed labels. In general AsmJit provides the following -//! options: +//! It's also possible to embed labels. In general AsmJit provides the following options: //! -//! - \ref BaseEmitter::embedLabel() - Embeds absolute address of a label. -//! This is target dependent and would embed either 32-bit or 64-bit data -//! that embeds absolute label address. This kind of embedding cannot be +//! - \ref BaseEmitter::embedLabel() - Embeds absolute address of a label. This is target dependent and would +//! embed either 32-bit or 64-bit data that embeds absolute label address. This kind of embedding cannot be //! used in a position independent code. //! -//! - \ref BaseEmitter::embedLabelDelta() - Embeds a difference between two -//! labels. The size of the difference can be specified so it's possible to -//! embed 8-bit, 16-bit, 32-bit, and 64-bit difference, which is sufficient +//! - \ref BaseEmitter::embedLabelDelta() - Embeds a difference between two labels. The size of the difference +//! can be specified so it's possible to embed 8-bit, 16-bit, 32-bit, and 64-bit difference, which is sufficient //! for most purposes. //! //! The following example demonstrates how to embed labels and their differences: @@ -368,9 +331,8 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) //! //! ### Using FuncFrame and FuncDetail with x86::Assembler //! -//! The example below demonstrates how \ref FuncFrame and \ref FuncDetail can be -//! used together with \ref x86::Assembler to generate a function that will use -//! platform dependent calling conventions automatically depending on the target: +//! The example below demonstrates how \ref FuncFrame and \ref FuncDetail can be used together with \ref x86::Assembler +//! to generate a function that will use platform dependent calling conventions automatically depending on the target: //! //! ``` //! #include <asmjit/x86.h> @@ -399,13 +361,13 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) //! //! // Create/initialize FuncDetail and FuncFrame. //! FuncDetail func; -//! func.init(FuncSignatureT<void, int*, const int*, const int*>(CallConv::kIdHost)); +//! func.init(FuncSignatureT<void, int*, const int*, const int*>(CallConvId::kHost)); //! //! FuncFrame frame; //! frame.init(func); //! -//! // Make XMM0 and XMM1 dirty - kGroupVec describes XMM|YMM|ZMM registers. -//! frame.setDirtyRegs(x86::Reg::kGroupVec, IntUtils::mask(0, 1)); +//! // Make XMM0 and XMM1 dirty - RegGroup::kVec describes XMM|YMM|ZMM registers. +//! frame.setDirtyRegs(RegGroup::kVec, IntUtils::mask(0, 1)); //! //! // Alternatively, if you don't want to use register masks you can pass BaseReg //! // to addDirtyRegs(). The following code would add both xmm0 and xmm1. @@ -444,50 +406,40 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) //! //! ### Using x86::Assembler as Code-Patcher //! -//! This is an advanced topic that is sometimes unavoidable. AsmJit by default -//! appends machine code it generates into a \ref CodeBuffer, however, it also -//! allows to set the offset in \ref CodeBuffer explicitly and to overwrite its -//! content. This technique is extremely dangerous as X86 instructions have -//! variable length (see below), so you should in general only patch code to -//! change instruction's immediate values or some other details not known the -//! at a time the instruction was emitted. A typical scenario that requires -//! code-patching is when you start emitting function and you don't know how -//! much stack you want to reserve for it. -//! -//! Before we go further it's important to introduce instruction options, because -//! they can help with code-patching (and not only patching, but that will be -//! explained in AVX-512 section): -//! -//! - Many general-purpose instructions (especially arithmetic ones) on X86 -//! have multiple encodings - in AsmJit this is usually called 'short form' -//! and 'long form'. -//! - AsmJit always tries to use 'short form' as it makes the resulting -//! machine-code smaller, which is always good - this decision is used -//! by majority of assemblers out there. -//! - AsmJit allows to override the default decision by using `short_()` -//! and `long_()` instruction options to force short or long form, -//! respectively. The most useful is `long_()` as it basically forces -//! AsmJit to always emit the longest form. The `short_()` is not that -//! useful as it's automatic (except jumps to non-bound labels). Note that -//! the underscore after each function name avoids collision with built-in -//! C++ types. -//! -//! To illustrate what short form and long form means in binary let's assume -//! we want to emit "add esp, 16" instruction, which has two possible binary -//! encodings: -//! -//! - `83C410` - This is a short form aka `short add esp, 16` - You can see -//! opcode byte (0x8C), MOD/RM byte (0xC4) and an 8-bit immediate value -//! representing `16`. -//! - `81C410000000` - This is a long form aka `long add esp, 16` - You can -//! see a different opcode byte (0x81), the same Mod/RM byte (0xC4) and a -//! 32-bit immediate in little-endian representing `16`. -//! -//! It should be obvious that patching an existing instruction into an instruction -//! having a different size may create various problems. So it's recommended to be -//! careful and to only patch instructions into instructions having the same size. -//! The example below demonstrates how instruction options can be used to guarantee -//! the size of an instruction by forcing the assembler to use long-form encoding: +//! This is an advanced topic that is sometimes unavoidable. AsmJit by default appends machine code it generates +//! into a \ref CodeBuffer, however, it also allows to set the offset in \ref CodeBuffer explicitly and to overwrite +//! its content. This technique is extremely dangerous as X86 instructions have variable length (see below), so you +//! should in general only patch code to change instruction's immediate values or some other details not known the +//! at a time the instruction was emitted. A typical scenario that requires code-patching is when you start emitting +//! function and you don't know how much stack you want to reserve for it. +//! +//! Before we go further it's important to introduce instruction options, because they can help with code-patching +//! (and not only patching, but that will be explained in AVX-512 section): +//! +//! - Many general-purpose instructions (especially arithmetic ones) on X86 have multiple encodings - in AsmJit +//! this is usually called 'short form' and 'long form'. +//! +//! - AsmJit always tries to use 'short form' as it makes the resulting machine-code smaller, which is always +//! good - this decision is used by majority of assemblers out there. +//! +//! - AsmJit allows to override the default decision by using `short_()` and `long_()` instruction options to force +//! short or long form, respectively. The most useful is `long_()` as it basically forces AsmJit to always emit +//! the longest form. The `short_()` is not that useful as it's automatic (except jumps to non-bound labels). Note +//! that the underscore after each function name avoids collision with built-in C++ types. +//! +//! To illustrate what short form and long form means in binary let's assume we want to emit "add esp, 16" instruction, +//! which has two possible binary encodings: +//! +//! - `83C410` - This is a short form aka `short add esp, 16` - You can see opcode byte (0x8C), MOD/RM byte (0xC4) +//! and an 8-bit immediate value representing `16`. +//! +//! - `81C410000000` - This is a long form aka `long add esp, 16` - You can see a different opcode byte (0x81), the +//! same Mod/RM byte (0xC4) and a 32-bit immediate in little-endian representing `16`. +//! +//! It should be obvious that patching an existing instruction into an instruction having a different size may create +//! various problems. So it's recommended to be careful and to only patch instructions into instructions having the +//! same size. The example below demonstrates how instruction options can be used to guarantee the size of an +//! instruction by forcing the assembler to use long-form encoding: //! //! ``` //! #include <asmjit/x86.h> @@ -546,27 +498,21 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) //! } //! ``` //! -//! If you run the example it will just work, because both instructions have -//! the same size. As an experiment you can try removing `long_()` form to -//! see what happens when wrong code is generated. +//! If you run the example it will just work, because both instructions have the same size. As an experiment you can +//! try removing `long_()` form to see what happens when wrong code is generated. //! //! ### Code Patching and REX Prefix //! -//! In 64-bit mode there is one more thing to worry about when patching code: -//! REX prefix. It's a single byte prefix designed to address registers with -//! ids from 9 to 15 and to override the default width of operation from 32 -//! to 64 bits. AsmJit, like other assemblers, only emits REX prefix when it's -//! necessary. If the patched code only changes the immediate value as shown -//! in the previous example then there is nothing to worry about as it doesn't -//! change the logic behind emitting REX prefix, however, if the patched code -//! changes register id or overrides the operation width then it's important -//! to take care of REX prefix as well. -//! -//! AsmJit contains another instruction option that controls (forces) REX -//! prefix - `rex()`. If you use it the instruction emitted will always use -//! REX prefix even when it's encodable without it. The following list contains -//! some instructions and their binary representations to illustrate when it's -//! emitted: +//! In 64-bit mode there is one more thing to worry about when patching code: REX prefix. It's a single byte prefix +//! designed to address registers with ids from 9 to 15 and to override the default width of operation from 32 to 64 +//! bits. AsmJit, like other assemblers, only emits REX prefix when it's necessary. If the patched code only changes +//! the immediate value as shown in the previous example then there is nothing to worry about as it doesn't change +//! the logic behind emitting REX prefix, however, if the patched code changes register id or overrides the operation +//! width then it's important to take care of REX prefix as well. +//! +//! AsmJit contains another instruction option that controls (forces) REX prefix - `rex()`. If you use it the +//! instruction emitted will always use REX prefix even when it's encodable without it. The following list contains +//! some instructions and their binary representations to illustrate when it's emitted: //! //! - `__83C410` - `add esp, 16` - 32-bit operation in 64-bit mode doesn't require REX prefix. //! - `4083C410` - `rex add esp, 16` - 32-bit operation in 64-bit mode with forced REX prefix (0x40). @@ -619,18 +565,15 @@ ASMJIT_BEGIN_SUB_NAMESPACE(x86) //! } //! ``` //! -//! It's important to understand that prefixes are part of instruction options. -//! When a member function that involves adding a prefix is called the prefix -//! is combined with existing instruction options, which will affect the next +//! It's important to understand that prefixes are part of instruction options. When a member function that involves +//! adding a prefix is called the prefix is combined with existing instruction options, which will affect the next //! instruction generated. //! //! ### Generating AVX512 code. //! -//! x86::Assembler can generate AVX512+ code including the use of opmask -//! registers. Opmask can be specified through \ref x86::Assembler::k() -//! function, which stores it as an extra register, which will be used -//! by the next instruction. AsmJit uses such concept for manipulating -//! instruction options as well. +//! x86::Assembler can generate AVX512+ code including the use of opmask registers. Opmask can be specified through +//! \ref x86::Assembler::k() function, which stores it as an extra register, which will be used by the next +//! instruction. AsmJit uses such concept for manipulating instruction options as well. //! //! The following AVX512 features are supported: //! @@ -702,9 +645,8 @@ public: //! \name Internal //! \{ - // NOTE: x86::Assembler uses _privateData to store 'address-override' bit that - // is used to decide whether to emit address-override (67H) prefix based on - // the memory BASE+INDEX registers. It's either `kX86MemInfo_67H_X86` or + // NOTE: x86::Assembler uses _privateData to store 'address-override' bit that is used to decide whether to emit + // address-override (67H) prefix based on the memory BASE+INDEX registers. It's either `kX86MemInfo_67H_X86` or // `kX86MemInfo_67H_X64`. inline uint32_t _addressOverrideMask() const noexcept { return _privateData; } inline void _setAddressOverrideMask(uint32_t m) noexcept { _privateData = m; } @@ -715,7 +657,7 @@ public: //! \name Emit //! \{ - ASMJIT_API Error _emit(uint32_t instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) override; + ASMJIT_API Error _emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) override; //! \} //! \endcond @@ -723,7 +665,7 @@ public: //! \name Align //! \{ - ASMJIT_API Error align(uint32_t alignMode, uint32_t alignment) override; + ASMJIT_API Error align(AlignMode alignMode, uint32_t alignment) override; //! \} |