diff options
author | Peter Johnson <peter@tortall.net> | 2007-11-28 07:21:08 +0000 |
---|---|---|
committer | Peter Johnson <peter@tortall.net> | 2007-11-28 07:21:08 +0000 |
commit | 14596f6658cb247c58d5f65769877ba81bfeac97 (patch) | |
tree | f0836eb732ac680f6f4db719bca642d714cc9c6a /modules/arch | |
parent | 4a778b6027a6f9d6502b6d4ade0487dedba63cab (diff) | |
download | yasm-14596f6658cb247c58d5f65769877ba81bfeac97.tar.gz |
Fix #119. Quite a few SSE/SSE2 instructions assumed 128-bit memory sizes
instead of the correct 64-bit or 32-bit sizes (e.g. xmm/m64 or similar).
It worked fine when no memory size was specified, but it should also work
with the correct size modifier.
svn path=/trunk/yasm/; revision=2015
Diffstat (limited to 'modules/arch')
-rwxr-xr-x | modules/arch/x86/gen_x86_insn.py | 328 | ||||
-rw-r--r-- | modules/arch/x86/tests/Makefile.inc | 2 | ||||
-rw-r--r-- | modules/arch/x86/tests/ssewidth.asm | 566 | ||||
-rw-r--r-- | modules/arch/x86/tests/ssewidth.hex | 1532 |
4 files changed, 2274 insertions, 154 deletions
diff --git a/modules/arch/x86/gen_x86_insn.py b/modules/arch/x86/gen_x86_insn.py index e891634e..9213fe0c 100755 --- a/modules/arch/x86/gen_x86_insn.py +++ b/modules/arch/x86/gen_x86_insn.py @@ -3710,29 +3710,29 @@ add_insn("prefetcht2", "twobytemem", modifiers=[3, 0x0F, 0x18], cpu=["P3"]) add_insn("sfence", "threebyte", modifiers=[0x0F, 0xAE, 0xF8], cpu=["P3"]) -add_group("sseps", +add_group("xmm_xmm128", cpu=["SSE"], - modifiers=["Op1Add"], + modifiers=["PreAdd", "Op1Add"], + prefix=0x00, opcode=[0x0F, 0x00], operands=[Operand(type="SIMDReg", size=128, dest="Spare"), Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")]) -add_insn("addps", "sseps", modifiers=[0x58]) -add_insn("andnps", "sseps", modifiers=[0x55]) -add_insn("andps", "sseps", modifiers=[0x54]) -add_insn("comiss", "sseps", modifiers=[0x2F]) -add_insn("divps", "sseps", modifiers=[0x5E]) -add_insn("maxps", "sseps", modifiers=[0x5F]) -add_insn("minps", "sseps", modifiers=[0x5D]) -add_insn("mulps", "sseps", modifiers=[0x59]) -add_insn("orps", "sseps", modifiers=[0x56]) -add_insn("rcpps", "sseps", modifiers=[0x53]) -add_insn("rsqrtps", "sseps", modifiers=[0x52]) -add_insn("sqrtps", "sseps", modifiers=[0x51]) -add_insn("subps", "sseps", modifiers=[0x5C]) -add_insn("unpckhps", "sseps", modifiers=[0x15]) -add_insn("unpcklps", "sseps", modifiers=[0x14]) -add_insn("xorps", "sseps", modifiers=[0x57]) +add_insn("addps", "xmm_xmm128", modifiers=[0, 0x58]) +add_insn("andnps", "xmm_xmm128", modifiers=[0, 0x55]) +add_insn("andps", "xmm_xmm128", modifiers=[0, 0x54]) +add_insn("divps", "xmm_xmm128", modifiers=[0, 0x5E]) +add_insn("maxps", "xmm_xmm128", modifiers=[0, 0x5F]) +add_insn("minps", "xmm_xmm128", modifiers=[0, 0x5D]) +add_insn("mulps", "xmm_xmm128", modifiers=[0, 0x59]) +add_insn("orps", "xmm_xmm128", modifiers=[0, 0x56]) +add_insn("rcpps", "xmm_xmm128", modifiers=[0, 0x53]) +add_insn("rsqrtps", "xmm_xmm128", modifiers=[0, 0x52]) +add_insn("sqrtps", "xmm_xmm128", modifiers=[0, 0x51]) +add_insn("subps", "xmm_xmm128", modifiers=[0, 0x5C]) +add_insn("unpckhps", "xmm_xmm128", modifiers=[0, 0x15]) +add_insn("unpcklps", "xmm_xmm128", modifiers=[0, 0x14]) +add_insn("xorps", "xmm_xmm128", modifiers=[0, 0x57]) add_group("cvt_rx_xmm32", suffix="l", @@ -3819,79 +3819,104 @@ add_group("cvt_xmm_rmx", add_insn("cvtsi2ss", "cvt_xmm_rmx", modifiers=[0xF3, 0x2A]) -add_group("ssess", +add_group("xmm_xmm32", cpu=["SSE"], modifiers=["PreAdd", "Op1Add"], prefix=0x00, opcode=[0x0F, 0x00], operands=[Operand(type="SIMDReg", size=128, dest="Spare"), - Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")]) + Operand(type="SIMDReg", size=128, dest="EA")]) +add_group("xmm_xmm32", + cpu=["SSE"], + modifiers=["PreAdd", "Op1Add"], + prefix=0x00, + opcode=[0x0F, 0x00], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="Mem", size=32, relaxed=True, dest="EA")]) -add_insn("addss", "ssess", modifiers=[0xF3, 0x58]) -add_insn("divss", "ssess", modifiers=[0xF3, 0x5E]) -add_insn("maxss", "ssess", modifiers=[0xF3, 0x5F]) -add_insn("minss", "ssess", modifiers=[0xF3, 0x5D]) -add_insn("mulss", "ssess", modifiers=[0xF3, 0x59]) -add_insn("rcpss", "ssess", modifiers=[0xF3, 0x53]) -add_insn("rsqrtss", "ssess", modifiers=[0xF3, 0x52]) -add_insn("sqrtss", "ssess", modifiers=[0xF3, 0x51]) -add_insn("subss", "ssess", modifiers=[0xF3, 0x5C]) -add_insn("ucomiss", "ssess", modifiers=[0, 0x2E]) - -add_group("ssecmpps", +add_insn("addss", "xmm_xmm32", modifiers=[0xF3, 0x58]) +add_insn("comiss", "xmm_xmm32", modifiers=[0, 0x2F]) +add_insn("divss", "xmm_xmm32", modifiers=[0xF3, 0x5E]) +add_insn("maxss", "xmm_xmm32", modifiers=[0xF3, 0x5F]) +add_insn("minss", "xmm_xmm32", modifiers=[0xF3, 0x5D]) +add_insn("mulss", "xmm_xmm32", modifiers=[0xF3, 0x59]) +add_insn("rcpss", "xmm_xmm32", modifiers=[0xF3, 0x53]) +add_insn("rsqrtss", "xmm_xmm32", modifiers=[0xF3, 0x52]) +add_insn("sqrtss", "xmm_xmm32", modifiers=[0xF3, 0x51]) +add_insn("subss", "xmm_xmm32", modifiers=[0xF3, 0x5C]) +add_insn("ucomiss", "xmm_xmm32", modifiers=[0, 0x2E]) + +add_group("ssecmp_128", cpu=["SSE"], - modifiers=["Imm8"], + modifiers=["Imm8", "PreAdd"], opcode=[0x0F, 0xC2], operands=[Operand(type="SIMDReg", size=128, dest="Spare"), Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")]) -add_insn("cmpeqps", "ssecmpps", modifiers=[0x00]) -add_insn("cmpleps", "ssecmpps", modifiers=[0x02]) -add_insn("cmpltps", "ssecmpps", modifiers=[0x01]) -add_insn("cmpneqps", "ssecmpps", modifiers=[0x04]) -add_insn("cmpnleps", "ssecmpps", modifiers=[0x06]) -add_insn("cmpnltps", "ssecmpps", modifiers=[0x05]) -add_insn("cmpordps", "ssecmpps", modifiers=[0x07]) -add_insn("cmpunordps", "ssecmpps", modifiers=[0x03]) +add_insn("cmpeqps", "ssecmp_128", modifiers=[0]) +add_insn("cmpleps", "ssecmp_128", modifiers=[2]) +add_insn("cmpltps", "ssecmp_128", modifiers=[1]) +add_insn("cmpneqps", "ssecmp_128", modifiers=[4]) +add_insn("cmpnleps", "ssecmp_128", modifiers=[6]) +add_insn("cmpnltps", "ssecmp_128", modifiers=[5]) +add_insn("cmpordps", "ssecmp_128", modifiers=[7]) +add_insn("cmpunordps", "ssecmp_128", modifiers=[3]) -add_group("ssecmpss", +add_group("ssecmp_32", cpu=["SSE"], modifiers=["Imm8", "PreAdd"], prefix=0x00, opcode=[0x0F, 0xC2], operands=[Operand(type="SIMDReg", size=128, dest="Spare"), - Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")]) + Operand(type="SIMDReg", size=128, dest="EA")]) -add_insn("cmpeqss", "ssecmpss", modifiers=[0, 0xF3]) -add_insn("cmpless", "ssecmpss", modifiers=[2, 0xF3]) -add_insn("cmpltss", "ssecmpss", modifiers=[1, 0xF3]) -add_insn("cmpneqss", "ssecmpss", modifiers=[4, 0xF3]) -add_insn("cmpnless", "ssecmpss", modifiers=[6, 0xF3]) -add_insn("cmpnltss", "ssecmpss", modifiers=[5, 0xF3]) -add_insn("cmpordss", "ssecmpss", modifiers=[7, 0xF3]) -add_insn("cmpunordss", "ssecmpss", modifiers=[3, 0xF3]) +add_group("ssecmp_32", + cpu=["SSE"], + modifiers=["Imm8", "PreAdd"], + prefix=0x00, + opcode=[0x0F, 0xC2], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="Mem", size=32, relaxed=True, dest="EA")]) + +add_insn("cmpeqss", "ssecmp_32", modifiers=[0, 0xF3]) +add_insn("cmpless", "ssecmp_32", modifiers=[2, 0xF3]) +add_insn("cmpltss", "ssecmp_32", modifiers=[1, 0xF3]) +add_insn("cmpneqss", "ssecmp_32", modifiers=[4, 0xF3]) +add_insn("cmpnless", "ssecmp_32", modifiers=[6, 0xF3]) +add_insn("cmpnltss", "ssecmp_32", modifiers=[5, 0xF3]) +add_insn("cmpordss", "ssecmp_32", modifiers=[7, 0xF3]) +add_insn("cmpunordss", "ssecmp_32", modifiers=[3, 0xF3]) -add_group("ssepsimm", +add_group("xmm_xmm128_imm", cpu=["SSE"], - modifiers=["Op1Add"], + modifiers=["PreAdd", "Op1Add"], opcode=[0x0F, 0x00], operands=[Operand(type="SIMDReg", size=128, dest="Spare"), Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"), Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) -add_insn("cmpps", "ssepsimm", modifiers=[0xC2]) -add_insn("shufps", "ssepsimm", modifiers=[0xC6]) +add_insn("cmpps", "xmm_xmm128_imm", modifiers=[0, 0xC2]) +add_insn("shufps", "xmm_xmm128_imm", modifiers=[0, 0xC6]) -add_group("ssessimm", +add_group("xmm_xmm32_imm", cpu=["SSE"], modifiers=["PreAdd", "Op1Add"], prefix=0x00, opcode=[0x0F, 0x00], operands=[Operand(type="SIMDReg", size=128, dest="Spare"), - Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"), + Operand(type="SIMDReg", size=128, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) + +add_group("xmm_xmm32_imm", + cpu=["SSE"], + modifiers=["PreAdd", "Op1Add"], + prefix=0x00, + opcode=[0x0F, 0x00], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="Mem", size=32, relaxed=True, dest="EA"), Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) -add_insn("cmpss", "ssessimm", modifiers=[0xF3, 0xC2]) +add_insn("cmpss", "xmm_xmm32_imm", modifiers=[0xF3, 0xC2]) add_group("ldstmxcsr", cpu=["SSE"], @@ -4158,64 +4183,14 @@ add_insn("pshufw", "pshufw") ##################################################################### # SSE2 instructions ##################################################################### -add_insn("addpd", "ssess", modifiers=[0x66, 0x58], cpu=["SSE2"]) -add_insn("addsd", "ssess", modifiers=[0xF2, 0x58], cpu=["SSE2"]) -add_insn("andnpd", "ssess", modifiers=[0x66, 0x55], cpu=["SSE2"]) -add_insn("andpd", "ssess", modifiers=[0x66, 0x54], cpu=["SSE2"]) -add_insn("comisd", "ssess", modifiers=[0x66, 0x2F], cpu=["SSE2"]) -add_insn("divpd", "ssess", modifiers=[0x66, 0x5E], cpu=["SSE2"]) -add_insn("divsd", "ssess", modifiers=[0xF2, 0x5E], cpu=["SSE2"]) -add_insn("maxpd", "ssess", modifiers=[0x66, 0x5F], cpu=["SSE2"]) -add_insn("maxsd", "ssess", modifiers=[0xF2, 0x5F], cpu=["SSE2"]) -add_insn("minpd", "ssess", modifiers=[0x66, 0x5D], cpu=["SSE2"]) -add_insn("minsd", "ssess", modifiers=[0xF2, 0x5D], cpu=["SSE2"]) -add_insn("mulpd", "ssess", modifiers=[0x66, 0x59], cpu=["SSE2"]) -add_insn("mulsd", "ssess", modifiers=[0xF2, 0x59], cpu=["SSE2"]) -add_insn("orpd", "ssess", modifiers=[0x66, 0x56], cpu=["SSE2"]) -add_insn("sqrtpd", "ssess", modifiers=[0x66, 0x51], cpu=["SSE2"]) -add_insn("sqrtsd", "ssess", modifiers=[0xF2, 0x51], cpu=["SSE2"]) -add_insn("subpd", "ssess", modifiers=[0x66, 0x5C], cpu=["SSE2"]) -add_insn("subsd", "ssess", modifiers=[0xF2, 0x5C], cpu=["SSE2"]) -add_insn("ucomisd", "ssess", modifiers=[0x66, 0x2E], cpu=["SSE2"]) -add_insn("unpckhpd", "ssess", modifiers=[0x66, 0x15], cpu=["SSE2"]) -add_insn("unpcklpd", "ssess", modifiers=[0x66, 0x14], cpu=["SSE2"]) -add_insn("xorpd", "ssess", modifiers=[0x66, 0x57], cpu=["SSE2"]) -add_insn("cvtpd2dq", "ssess", modifiers=[0xF2, 0xE6], cpu=["SSE2"]) -add_insn("cvtpd2ps", "ssess", modifiers=[0x66, 0x5A], cpu=["SSE2"]) -add_insn("cvtps2dq", "ssess", modifiers=[0x66, 0x5B], cpu=["SSE2"]) - -add_insn("cvtdq2ps", "sseps", modifiers=[0x5B], cpu=["SSE2"]) - -add_insn("cmpeqpd", "ssecmpss", modifiers=[0x00, 0x66], cpu=["SSE2"]) -add_insn("cmpeqsd", "ssecmpss", modifiers=[0x00, 0xF2], cpu=["SSE2"]) -add_insn("cmplepd", "ssecmpss", modifiers=[0x02, 0x66], cpu=["SSE2"]) -add_insn("cmplesd", "ssecmpss", modifiers=[0x02, 0xF2], cpu=["SSE2"]) -add_insn("cmpltpd", "ssecmpss", modifiers=[0x01, 0x66], cpu=["SSE2"]) -add_insn("cmpltsd", "ssecmpss", modifiers=[0x01, 0xF2], cpu=["SSE2"]) -add_insn("cmpneqpd", "ssecmpss", modifiers=[0x04, 0x66], cpu=["SSE2"]) -add_insn("cmpneqsd", "ssecmpss", modifiers=[0x04, 0xF2], cpu=["SSE2"]) -add_insn("cmpnlepd", "ssecmpss", modifiers=[0x06, 0x66], cpu=["SSE2"]) -add_insn("cmpnlesd", "ssecmpss", modifiers=[0x06, 0xF2], cpu=["SSE2"]) -add_insn("cmpnltpd", "ssecmpss", modifiers=[0x05, 0x66], cpu=["SSE2"]) -add_insn("cmpnltsd", "ssecmpss", modifiers=[0x05, 0xF2], cpu=["SSE2"]) -add_insn("cmpordpd", "ssecmpss", modifiers=[0x07, 0x66], cpu=["SSE2"]) -add_insn("cmpordsd", "ssecmpss", modifiers=[0x07, 0xF2], cpu=["SSE2"]) -add_insn("cmpunordpd", "ssecmpss", modifiers=[0x03, 0x66], cpu=["SSE2"]) -add_insn("cmpunordsd", "ssecmpss", modifiers=[0x03, 0xF2], cpu=["SSE2"]) - -add_insn("cmppd", "ssessimm", modifiers=[0x66, 0xC2], cpu=["SSE2"]) -add_insn("shufpd", "ssessimm", modifiers=[0x66, 0xC6], cpu=["SSE2"]) - -add_insn("cvtsi2sd", "cvt_xmm_rmx", modifiers=[0xF2, 0x2A], cpu=["SSE2"]) - -add_group("cvt_xmm_xmm64_ss", +add_group("xmm_xmm64", cpu=["SSE2"], modifiers=["PreAdd", "Op1Add"], prefix=0x00, opcode=[0x0F, 0x00], operands=[Operand(type="SIMDReg", size=128, dest="Spare"), Operand(type="SIMDReg", size=128, dest="EA")]) -add_group("cvt_xmm_xmm64_ss", +add_group("xmm_xmm64", cpu=["SSE2"], modifiers=["PreAdd", "Op1Add"], prefix=0x00, @@ -4223,23 +4198,75 @@ add_group("cvt_xmm_xmm64_ss", operands=[Operand(type="SIMDReg", size=128, dest="Spare"), Operand(type="Mem", size=64, relaxed=True, dest="EA")]) -add_insn("cvtdq2pd", "cvt_xmm_xmm64_ss", modifiers=[0xF3, 0xE6]) -add_insn("cvtsd2ss", "cvt_xmm_xmm64_ss", modifiers=[0xF2, 0x5A]) - -add_group("cvt_xmm_xmm64_ps", +add_insn("addsd", "xmm_xmm64", modifiers=[0xF2, 0x58]) +add_insn("comisd", "xmm_xmm64", modifiers=[0x66, 0x2F]) +add_insn("cvtdq2pd", "xmm_xmm64", modifiers=[0xF3, 0xE6]) +add_insn("cvtps2pd", "xmm_xmm64", modifiers=[0, 0x5A]) +add_insn("cvtsd2ss", "xmm_xmm64", modifiers=[0xF2, 0x5A]) +add_insn("divsd", "xmm_xmm64", modifiers=[0xF2, 0x5E]) +add_insn("maxsd", "xmm_xmm64", modifiers=[0xF2, 0x5F]) +add_insn("minsd", "xmm_xmm64", modifiers=[0xF2, 0x5D]) +add_insn("mulsd", "xmm_xmm64", modifiers=[0xF2, 0x59]) +add_insn("subsd", "xmm_xmm64", modifiers=[0xF2, 0x5C]) +add_insn("sqrtsd", "xmm_xmm64", modifiers=[0xF2, 0x51]) +add_insn("ucomisd", "xmm_xmm64", modifiers=[0x66, 0x2E]) + +add_insn("addpd", "xmm_xmm128", modifiers=[0x66, 0x58], cpu=["SSE2"]) +add_insn("andnpd", "xmm_xmm128", modifiers=[0x66, 0x55], cpu=["SSE2"]) +add_insn("andpd", "xmm_xmm128", modifiers=[0x66, 0x54], cpu=["SSE2"]) +add_insn("cvtdq2ps", "xmm_xmm128", modifiers=[0, 0x5B], cpu=["SSE2"]) +add_insn("cvtpd2dq", "xmm_xmm128", modifiers=[0xF2, 0xE6], cpu=["SSE2"]) +add_insn("cvtpd2ps", "xmm_xmm128", modifiers=[0x66, 0x5A], cpu=["SSE2"]) +add_insn("cvtps2dq", "xmm_xmm128", modifiers=[0x66, 0x5B], cpu=["SSE2"]) +add_insn("divpd", "xmm_xmm128", modifiers=[0x66, 0x5E], cpu=["SSE2"]) +add_insn("maxpd", "xmm_xmm128", modifiers=[0x66, 0x5F], cpu=["SSE2"]) +add_insn("minpd", "xmm_xmm128", modifiers=[0x66, 0x5D], cpu=["SSE2"]) +add_insn("mulpd", "xmm_xmm128", modifiers=[0x66, 0x59], cpu=["SSE2"]) +add_insn("orpd", "xmm_xmm128", modifiers=[0x66, 0x56], cpu=["SSE2"]) +add_insn("sqrtpd", "xmm_xmm128", modifiers=[0x66, 0x51], cpu=["SSE2"]) +add_insn("subpd", "xmm_xmm128", modifiers=[0x66, 0x5C], cpu=["SSE2"]) +add_insn("unpckhpd", "xmm_xmm128", modifiers=[0x66, 0x15], cpu=["SSE2"]) +add_insn("unpcklpd", "xmm_xmm128", modifiers=[0x66, 0x14], cpu=["SSE2"]) +add_insn("xorpd", "xmm_xmm128", modifiers=[0x66, 0x57], cpu=["SSE2"]) + +add_group("ssecmp_64", cpu=["SSE2"], - modifiers=["Op1Add"], - opcode=[0x0F, 0x00], + modifiers=["Imm8", "PreAdd"], + prefix=0x00, + opcode=[0x0F, 0xC2], operands=[Operand(type="SIMDReg", size=128, dest="Spare"), Operand(type="SIMDReg", size=128, dest="EA")]) -add_group("cvt_xmm_xmm64_ps", + +add_group("ssecmp_64", cpu=["SSE2"], - modifiers=["Op1Add"], - opcode=[0x0F, 0x00], + modifiers=["Imm8", "PreAdd"], + prefix=0x00, + opcode=[0x0F, 0xC2], operands=[Operand(type="SIMDReg", size=128, dest="Spare"), Operand(type="Mem", size=64, relaxed=True, dest="EA")]) -add_insn("cvtps2pd", "cvt_xmm_xmm64_ps", modifiers=[0x5A]) +add_insn("cmpeqsd", "ssecmp_64", modifiers=[0, 0xF2]) +add_insn("cmplesd", "ssecmp_64", modifiers=[2, 0xF2]) +add_insn("cmpltsd", "ssecmp_64", modifiers=[1, 0xF2]) +add_insn("cmpneqsd", "ssecmp_64", modifiers=[4, 0xF2]) +add_insn("cmpnlesd", "ssecmp_64", modifiers=[6, 0xF2]) +add_insn("cmpnltsd", "ssecmp_64", modifiers=[5, 0xF2]) +add_insn("cmpordsd", "ssecmp_64", modifiers=[7, 0xF2]) +add_insn("cmpunordsd", "ssecmp_64", modifiers=[3, 0xF2]) + +add_insn("cmpeqpd", "ssecmp_128", modifiers=[0, 0x66], cpu=["SSE2"]) +add_insn("cmplepd", "ssecmp_128", modifiers=[2, 0x66], cpu=["SSE2"]) +add_insn("cmpltpd", "ssecmp_128", modifiers=[1, 0x66], cpu=["SSE2"]) +add_insn("cmpneqpd", "ssecmp_128", modifiers=[4, 0x66], cpu=["SSE2"]) +add_insn("cmpnlepd", "ssecmp_128", modifiers=[6, 0x66], cpu=["SSE2"]) +add_insn("cmpnltpd", "ssecmp_128", modifiers=[5, 0x66], cpu=["SSE2"]) +add_insn("cmpordpd", "ssecmp_128", modifiers=[7, 0x66], cpu=["SSE2"]) +add_insn("cmpunordpd", "ssecmp_128", modifiers=[3, 0x66], cpu=["SSE2"]) + +add_insn("cmppd", "xmm_xmm128_imm", modifiers=[0x66, 0xC2], cpu=["SSE2"]) +add_insn("shufpd", "xmm_xmm128_imm", modifiers=[0x66, 0xC6], cpu=["SSE2"]) + +add_insn("cvtsi2sd", "cvt_xmm_rmx", modifiers=[0xF2, 0x2A], cpu=["SSE2"]) add_group("cvt_rx_xmm64", suffix="l", @@ -4305,7 +4332,15 @@ add_group("cmpsd", prefix=0xF2, opcode=[0x0F, 0xC2], operands=[Operand(type="SIMDReg", size=128, dest="Spare"), - Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"), + Operand(type="SIMDReg", size=128, dest="EA"), + Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) + +add_group("cmpsd", + cpu=["SSE2"], + prefix=0xF2, + opcode=[0x0F, 0xC2], + operands=[Operand(type="SIMDReg", size=128, dest="Spare"), + Operand(type="Mem", size=64, relaxed=True, dest="EA"), Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) # cmpsd is added in string instructions above, so don't re-add_insn() @@ -4460,31 +4495,16 @@ add_insn("vmxon", "vmxthreebytemem", modifiers=[0xF3]) add_insn("cvttpd2pi", "cvt_mm_xmm", modifiers=[0x66, 0x2C], cpu=["SSE2"]) add_insn("cvttsd2si", "cvt_rx_xmm64", modifiers=[0xF2, 0x2C], cpu=["SSE2"]) -add_insn("cvttpd2dq", "ssess", modifiers=[0x66, 0xE6], cpu=["SSE2"]) -add_insn("cvttps2dq", "ssess", modifiers=[0xF3, 0x5B], cpu=["SSE2"]) +add_insn("cvttpd2dq", "xmm_xmm128", modifiers=[0x66, 0xE6], cpu=["SSE2"]) +add_insn("cvttps2dq", "xmm_xmm128", modifiers=[0xF3, 0x5B], cpu=["SSE2"]) add_insn("pmuludq", "mmxsse2", modifiers=[0xF4], cpu=["SSE2"]) -add_insn("pshufd", "ssessimm", modifiers=[0x66, 0x70], cpu=["SSE2"]) -add_insn("pshufhw", "ssessimm", modifiers=[0xF3, 0x70], cpu=["SSE2"]) -add_insn("pshuflw", "ssessimm", modifiers=[0xF2, 0x70], cpu=["SSE2"]) -add_insn("punpckhqdq", "ssess", modifiers=[0x66, 0x6D], cpu=["SSE2"]) -add_insn("punpcklqdq", "ssess", modifiers=[0x66, 0x6C], cpu=["SSE2"]) - -add_group("cvt_xmm_xmm32", - cpu=["SSE2"], - modifiers=["PreAdd", "Op1Add"], - prefix=0x00, - opcode=[0x0F, 0x00], - operands=[Operand(type="SIMDReg", size=128, dest="Spare"), - Operand(type="SIMDReg", size=128, dest="EA")]) -add_group("cvt_xmm_xmm32", - cpu=["SSE2"], - modifiers=["PreAdd", "Op1Add"], - prefix=0x00, - opcode=[0x0F, 0x00], - operands=[Operand(type="SIMDReg", size=128, dest="Spare"), - Operand(type="Mem", size=32, relaxed=True, dest="EA")]) +add_insn("pshufd", "xmm_xmm128_imm", modifiers=[0x66, 0x70], cpu=["SSE2"]) +add_insn("pshufhw", "xmm_xmm128_imm", modifiers=[0xF3, 0x70], cpu=["SSE2"]) +add_insn("pshuflw", "xmm_xmm128_imm", modifiers=[0xF2, 0x70], cpu=["SSE2"]) +add_insn("punpckhqdq", "xmm_xmm128", modifiers=[0x66, 0x6D], cpu=["SSE2"]) +add_insn("punpcklqdq", "xmm_xmm128", modifiers=[0x66, 0x6C], cpu=["SSE2"]) -add_insn("cvtss2sd", "cvt_xmm_xmm32", modifiers=[0xF3, 0x5A]) +add_insn("cvtss2sd", "xmm_xmm32", modifiers=[0xF3, 0x5A], cpu=["SSE2"]) add_group("maskmovdqu", cpu=["SSE2"], @@ -4546,17 +4566,17 @@ add_insn("psrldq", "pslrldq", modifiers=[3]) ##################################################################### # SSE3 / PNI Prescott New Instructions instructions ##################################################################### -add_insn("addsubpd", "ssess", modifiers=[0x66, 0xD0], cpu=["SSE3"]) -add_insn("addsubps", "ssess", modifiers=[0xF2, 0xD0], cpu=["SSE3"]) -add_insn("haddpd", "ssess", modifiers=[0x66, 0x7C], cpu=["SSE3"]) -add_insn("haddps", "ssess", modifiers=[0xF2, 0x7C], cpu=["SSE3"]) -add_insn("hsubpd", "ssess", modifiers=[0x66, 0x7D], cpu=["SSE3"]) -add_insn("hsubps", "ssess", modifiers=[0xF2, 0x7D], cpu=["SSE3"]) -add_insn("movshdup", "ssess", modifiers=[0xF3, 0x16], cpu=["SSE3"]) -add_insn("movsldup", "ssess", modifiers=[0xF3, 0x12], cpu=["SSE3"]) +add_insn("addsubpd", "xmm_xmm128", modifiers=[0x66, 0xD0], cpu=["SSE3"]) +add_insn("addsubps", "xmm_xmm128", modifiers=[0xF2, 0xD0], cpu=["SSE3"]) +add_insn("haddpd", "xmm_xmm128", modifiers=[0x66, 0x7C], cpu=["SSE3"]) +add_insn("haddps", "xmm_xmm128", modifiers=[0xF2, 0x7C], cpu=["SSE3"]) +add_insn("hsubpd", "xmm_xmm128", modifiers=[0x66, 0x7D], cpu=["SSE3"]) +add_insn("hsubps", "xmm_xmm128", modifiers=[0xF2, 0x7D], cpu=["SSE3"]) +add_insn("movshdup", "xmm_xmm128", modifiers=[0xF3, 0x16], cpu=["SSE3"]) +add_insn("movsldup", "xmm_xmm128", modifiers=[0xF3, 0x12], cpu=["SSE3"]) add_insn("fisttp", "fildstp", modifiers=[1, 0, 1], cpu=["SSE3"]) add_insn("fisttpll", "fildstp", suffix="q", modifiers=[7], cpu=["SSE3"]) -add_insn("movddup", "cvt_xmm_xmm64_ss", modifiers=[0xF2, 0x12], cpu=["SSE3"]) +add_insn("movddup", "xmm_xmm64", modifiers=[0xF2, 0x12], cpu=["SSE3"]) add_insn("monitor", "threebyte", modifiers=[0x0F, 0x01, 0xC8], cpu=["SSE3"]) add_insn("mwait", "threebyte", modifiers=[0x0F, 0x01, 0xC9], cpu=["SSE3"]) @@ -4565,7 +4585,7 @@ add_group("lddqu", prefix=0xF2, opcode=[0x0F, 0xF0], operands=[Operand(type="SIMDReg", size=128, dest="Spare"), - Operand(type="Mem", dest="EA")]) + Operand(type="Mem", size=128, relaxed=True, dest="EA")]) add_insn("lddqu", "lddqu") diff --git a/modules/arch/x86/tests/Makefile.inc b/modules/arch/x86/tests/Makefile.inc index 87761177..ee1b4c81 100644 --- a/modules/arch/x86/tests/Makefile.inc +++ b/modules/arch/x86/tests/Makefile.inc @@ -163,6 +163,8 @@ EXTRA_DIST += modules/arch/x86/tests/sse5-basic.asm EXTRA_DIST += modules/arch/x86/tests/sse5-basic.hex EXTRA_DIST += modules/arch/x86/tests/sse5-err.asm EXTRA_DIST += modules/arch/x86/tests/sse5-err.errwarn +EXTRA_DIST += modules/arch/x86/tests/ssewidth.asm +EXTRA_DIST += modules/arch/x86/tests/ssewidth.hex EXTRA_DIST += modules/arch/x86/tests/ssse3.asm EXTRA_DIST += modules/arch/x86/tests/ssse3.c EXTRA_DIST += modules/arch/x86/tests/ssse3.hex diff --git a/modules/arch/x86/tests/ssewidth.asm b/modules/arch/x86/tests/ssewidth.asm new file mode 100644 index 00000000..0824b769 --- /dev/null +++ b/modules/arch/x86/tests/ssewidth.asm @@ -0,0 +1,566 @@ +[bits 64] +addpd xmm1, xmm2 +addpd xmm1, dqword [rbx] + +addps xmm1, xmm2 +addps xmm1, dqword [rbx] + +addsd xmm1, xmm2 +addsd xmm1, qword [rbx] + +addss xmm1, xmm2 +addss xmm1, dword [rbx] + +addsubpd xmm1, xmm2 +addsubpd xmm1, dqword [rbx] + +addsubps xmm1, xmm2 +addsubps xmm1, dqword [rbx] + +andnpd xmm1, xmm2 +andnpd xmm1, dqword [rbx] + +andnps xmm1, xmm2 +andnps xmm1, dqword [rbx] + +andpd xmm1, xmm2 +andpd xmm1, dqword [rbx] + +andps xmm1, xmm2 +andps xmm1, dqword [rbx] + +cmppd xmm1, xmm2, 0 +cmppd xmm1, dqword [rbx], 0 +cmpeqpd xmm1, xmm2 +cmpeqpd xmm1, dqword [rbx] + +cmpps xmm1, xmm2, 0 +cmpps xmm1, dqword [rbx], 0 +cmpeqps xmm1, xmm2 +cmpeqps xmm1, dqword [rbx] + +cmpsd xmm1, xmm2, 0 +cmpsd xmm1, qword [rbx], 0 +cmpeqsd xmm1, xmm2 +cmpeqsd xmm1, qword [rbx] + +cmpss xmm1, xmm2, 0 +cmpss xmm1, dword [rbx], 0 +cmpeqss xmm1, xmm2 +cmpeqss xmm1, dword [rbx] + +comisd xmm1, xmm2 +comisd xmm1, qword [rbx] + +comiss xmm1, xmm2 +comiss xmm1, dword [rbx] + +cvtdq2pd xmm1, xmm2 +cvtdq2pd xmm1, qword [rbx] + +cvtdq2ps xmm1, xmm2 +cvtdq2ps xmm1, dqword [rbx] + +cvtpd2dq xmm1, xmm2 +cvtpd2dq xmm1, dqword [rbx] + +cvtpd2pi mm1, xmm2 ; mmx +cvtpd2pi mm1, dqword [rbx] + +cvtpd2ps xmm1, xmm2 +cvtpd2ps xmm1, dqword [rbx] + +cvtpi2pd xmm1, mm2 ; mmx +cvtpi2pd xmm1, qword [rbx] + +cvtpi2ps xmm1, mm2 ; mmx +cvtpi2ps xmm1, qword [rbx] + +cvtps2dq xmm1, xmm2 +cvtps2dq xmm1, dqword [rbx] + +cvtps2pd xmm1, xmm2 +cvtps2pd xmm1, qword [rbx] + +cvtps2pi mm1, xmm2 +cvtps2pi mm1, qword [rbx] + +cvtsd2si rbx, xmm2 +cvtsd2si rbx, qword [rbx] + +cvtsd2ss xmm1, xmm2 +cvtsd2ss xmm1, qword [rbx] + +cvtsi2sd xmm1, ebx +cvtsi2sd xmm1, dword [rbx] +cvtsi2sd xmm1, rbx +cvtsi2sd xmm1, qword [rbx] + +cvtsi2ss xmm1, ebx +cvtsi2ss xmm1, dword [rbx] +cvtsi2ss xmm1, rbx +cvtsi2ss xmm1, qword [rbx] + +cvtss2sd xmm1, xmm2 +cvtss2sd xmm1, dword [rbx] + +cvtss2si ebx, xmm2 +cvtss2si ebx, dword [rbx] +cvtss2si rbx, xmm2 +cvtss2si rbx, dword [rbx] + +cvttpd2dq xmm1, xmm2 +cvttpd2dq xmm1, dqword [rbx] + +cvttpd2pi mm1, xmm2 +cvttpd2pi mm1, dqword [rbx] + +cvttps2dq xmm1, xmm2 +cvttps2dq xmm1, dqword [rbx] + +cvttps2pi mm1, xmm2 +cvttps2pi mm1, qword [rbx] + +cvttsd2si eax, xmm1 +cvttsd2si eax, qword [rbx] +cvttsd2si rax, xmm1 +cvttsd2si rax, qword [rbx] + +cvttss2si eax, xmm1 +cvttss2si eax, dword [rbx] +cvttss2si rax, xmm1 +cvttss2si rax, dword [rbx] + +divpd xmm1, xmm2 +divpd xmm1, dqword [rbx] + +divps xmm1, xmm2 +divps xmm1, dqword [rbx] + +divsd xmm1, xmm2 +divsd xmm1, qword [rbx] + +divss xmm1, xmm2 +divss xmm1, dword [rbx] + +extrq xmm1, 0, 1 +extrq xmm1, byte 0, byte 1 +extrq xmm1, xmm2 + +haddpd xmm1, xmm2 +haddpd xmm1, dqword [rbx] + +haddps xmm1, xmm2 +haddps xmm1, dqword [rbx] + +hsubpd xmm1, xmm2 +hsubpd xmm1, dqword [rbx] + +hsubps xmm1, xmm2 +hsubps xmm1, dqword [rbx] + +insertq xmm1, xmm2, 0, 1 +insertq xmm1, xmm2, byte 0, byte 1 +insertq xmm1, xmm2 + +lddqu xmm1, dqword [rbx] + +ldmxcsr dword [rbx] + +maskmovdqu xmm1, xmm2 + +maxpd xmm1, xmm2 +maxpd xmm1, dqword [rbx] + +maxps xmm1, xmm2 +maxps xmm1, dqword [rbx] + +maxsd xmm1, xmm2 +maxsd xmm1, qword [rbx] + +maxss xmm1, xmm2 +maxss xmm1, dword [rbx] + +minpd xmm1, xmm2 +minpd xmm1, dqword [rbx] + +minps xmm1, xmm2 +minps xmm1, dqword [rbx] + +minsd xmm1, xmm2 +minsd xmm1, qword [rbx] + +minss xmm1, xmm2 +minss xmm1, dword [rbx] + +movapd xmm1, xmm2 +movapd xmm1, dqword [rbx] +movapd dqword [rbx], xmm2 + +movaps xmm1, xmm2 +movaps xmm1, dqword [rbx] +movaps dqword [rbx], xmm2 + +movd xmm1, ebx +movd xmm1, dword [rbx] +movd xmm1, rbx +movd xmm1, qword [rbx] +movd dword [rbx], xmm2 +movd qword [rbx], xmm2 + +movddup xmm1, xmm2 +movddup xmm1, qword [rbx] + +movdq2q mm1, xmm2 + +movdqa xmm1, xmm2 +movdqa xmm1, dqword [rbx] +movdqa dqword [rbx], xmm2 + +movdqu xmm1, xmm2 +movdqu xmm1, dqword [rbx] +movdqu dqword [rbx], xmm2 + +movhlps xmm1, xmm2 + +movhpd xmm1, qword [rbx] +movhpd qword [rbx], xmm2 + +movhps xmm1, qword [rbx] +movhps qword [rbx], xmm2 + +movlhps xmm1, xmm2 + +movlpd xmm1, qword [rbx] +movlpd qword [rbx], xmm2 + +movlps xmm1, qword [rbx] +movlps qword [rbx], xmm2 + +movmskpd ebx, xmm2 + +movmskps ebx, xmm2 + +movntdq dqword [rbx], xmm2 + +movntpd dqword [rbx], xmm2 + +movntps dqword [rbx], xmm2 + +movntsd qword [rbx], xmm2 + +movntss dword [rbx], xmm2 + +movq xmm1, xmm2 +movq xmm1, qword [rbx] +movq qword [rbx], xmm2 + +movq2dq xmm1, mm2 + +movsd xmm1, xmm2 +movsd xmm1, qword [rbx] +movsd qword [rbx], xmm2 + +movshdup xmm1, xmm2 +movshdup xmm1, dqword [rbx] + +movsldup xmm1, xmm2 +movsldup xmm1, dqword [rbx] + +movss xmm1, xmm2 +movss xmm1, dword [rbx] +movss dword [rbx], xmm2 + +movupd xmm1, xmm2 +movupd xmm1, dqword [rbx] +movupd dqword [rbx], xmm2 + +movups xmm1, xmm2 +movups xmm1, dqword [rbx] +movups dqword [rbx], xmm2 + +mulpd xmm1, xmm2 +mulpd xmm1, dqword [rbx] + +mulps xmm1, xmm2 +mulps xmm1, dqword [rbx] + +mulsd xmm1, xmm2 +mulsd xmm1, qword [rbx] + +mulss xmm1, xmm2 +mulss xmm1, dword [rbx] + +orpd xmm1, xmm2 +orpd xmm1, dqword [rbx] + +orps xmm1, xmm2 +orps xmm1, dqword [rbx] + +packssdw xmm1, xmm2 +packssdw xmm1, dqword [rbx] + +packsswb xmm1, xmm2 +packsswb xmm1, dqword [rbx] + +packuswb xmm1, xmm2 +packuswb xmm1, dqword [rbx] + +paddb xmm1, xmm2 +paddb xmm1, dqword [rbx] + +paddd xmm1, xmm2 +paddd xmm1, dqword [rbx] + +paddq xmm1, xmm2 +paddq xmm1, dqword [rbx] + +paddsb xmm1, xmm2 +paddsb xmm1, dqword [rbx] + +paddsw xmm1, xmm2 +paddsw xmm1, dqword [rbx] + +paddusb xmm1, xmm2 +paddusb xmm1, dqword [rbx] + +paddusw xmm1, xmm2 +paddusw xmm1, dqword [rbx] + +paddw xmm1, xmm2 +paddw xmm1, dqword [rbx] + +pand xmm1, xmm2 +pand xmm1, dqword [rbx] + +pandn xmm1, xmm2 +pandn xmm1, dqword [rbx] + +pavgb xmm1, xmm2 +pavgb xmm1, dqword [rbx] + +pavgw xmm1, xmm2 +pavgw xmm1, dqword [rbx] + +pcmpeqb xmm1, xmm2 +pcmpeqb xmm1, dqword [rbx] + +pcmpeqd xmm1, xmm2 +pcmpeqd xmm1, dqword [rbx] + +pcmpeqw xmm1, xmm2 +pcmpeqw xmm1, dqword [rbx] + +pcmpgtb xmm1, xmm2 +pcmpgtb xmm1, dqword [rbx] + +pcmpgtd xmm1, xmm2 +pcmpgtd xmm1, dqword [rbx] + +pcmpgtw xmm1, xmm2 +pcmpgtw xmm1, dqword [rbx] + +pextrw ebx, xmm2, byte 0 + +pinsrw xmm1, ebx, byte 0 +pinsrw xmm1, word [rbx], byte 0 + +pmaddwd xmm1, xmm2 +pmaddwd xmm1, dqword [rbx] + +pmaxsw xmm1, xmm2 +pmaxsw xmm1, dqword [rbx] + +pmaxub xmm1, xmm2 +pmaxub xmm1, dqword [rbx] + +pminsw xmm1, xmm2 +pminsw xmm1, dqword [rbx] + +pminub xmm1, xmm2 +pminub xmm1, dqword [rbx] + +pmovmskb eax, xmm2 + +pmulhuw xmm1, xmm2 +pmulhuw xmm1, dqword [rbx] + +pmulhw xmm1, xmm2 +pmulhw xmm1, dqword [rbx] + +pmullw xmm1, xmm2 +pmullw xmm1, dqword [rbx] + +pmuludq xmm1, xmm2 +pmuludq xmm1, dqword [rbx] + +por xmm1, xmm2 +por xmm1, dqword [rbx] + +psadbw xmm1, xmm2 +psadbw xmm1, dqword [rbx] + +pshufd xmm1, xmm2, byte 0 +pshufd xmm1, dqword [rbx], byte 0 + +pshufhw xmm1, xmm2, byte 0 +pshufhw xmm1, dqword [rbx], byte 0 + +pshuflw xmm1, xmm2, byte 0 +pshuflw xmm1, dqword [rbx], byte 0 + +pslld xmm1, xmm2 +pslld xmm1, dqword [rbx] +pslld xmm1, byte 5 + +pslldq xmm1, byte 5 + +psllq xmm1, xmm2 +psllq xmm1, dqword [rbx] +psllq xmm1, byte 5 + +psllw xmm1, xmm2 +psllw xmm1, dqword [rbx] +psllw xmm1, byte 5 + +psrad xmm1, xmm2 +psrad xmm1, dqword [rbx] +psrad xmm1, byte 5 + +psraw xmm1, xmm2 +psraw xmm1, dqword [rbx] +psraw xmm1, byte 5 + +psrld xmm1, xmm2 +psrld xmm1, dqword [rbx] +psrld xmm1, byte 5 + +psrldq xmm1, byte 5 + +psrlq xmm1, xmm2 +psrlq xmm1, dqword [rbx] +psrlq xmm1, byte 5 + +psrlw xmm1, xmm2 +psrlw xmm1, dqword [rbx] +psrlw xmm1, byte 5 + +psubb xmm1, xmm2 +psubb xmm1, dqword [rbx] + +psubd xmm1, xmm2 +psubd xmm1, dqword [rbx] + +psubq xmm1, xmm2 +psubq xmm1, dqword [rbx] + +psubsb xmm1, xmm2 +psubsb xmm1, dqword [rbx] + +psubsw xmm1, xmm2 +psubsw xmm1, dqword [rbx] + +psubusb xmm1, xmm2 +psubusb xmm1, dqword [rbx] + +psubusw xmm1, xmm2 +psubusw xmm1, dqword [rbx] + +psubw xmm1, xmm2 +psubw xmm1, dqword [rbx] + +punpckhbw xmm1, xmm2 +punpckhbw xmm1, dqword [rbx] + +punpckhdq xmm1, xmm2 +punpckhdq xmm1, dqword [rbx] + +punpckhqdq xmm1, xmm2 +punpckhqdq xmm1, dqword [rbx] + +punpckhwd xmm1, xmm2 +punpckhwd xmm1, dqword [rbx] + +punpcklbw xmm1, xmm2 +punpcklbw xmm1, dqword [rbx] + +punpckldq xmm1, xmm2 +punpckldq xmm1, dqword [rbx] + +punpcklqdq xmm1, xmm2 +punpcklqdq xmm1, dqword [rbx] + +punpcklwd xmm1, xmm2 +punpcklwd xmm1, dqword [rbx] + +pxor xmm1, xmm2 +pxor xmm1, dqword [rbx] + +rcpps xmm1, xmm2 +rcpps xmm1, dqword [rbx] + +rcpss xmm1, xmm2 +rcpss xmm1, dword [rbx] + +rsqrtps xmm1, xmm2 +rsqrtps xmm1, dqword [rbx] + +rsqrtss xmm1, xmm2 +rsqrtss xmm1, dword [rbx] + +shufpd xmm1, xmm2, 0 +shufpd xmm1, dqword [rbx], byte 0 + +shufps xmm1, xmm2, 0 +shufps xmm1, dqword [rbx], byte 0 + +sqrtpd xmm1, xmm2 +sqrtpd xmm1, dqword [rbx] + +sqrtps xmm1, xmm2 +sqrtps xmm1, dqword [rbx] + +sqrtsd xmm1, xmm2 +sqrtsd xmm1, qword [rbx] + +sqrtss xmm1, xmm2 +sqrtss xmm1, dword [rbx] + +stmxcsr dword [rbx] + +subpd xmm1, xmm2 +subpd xmm1, dqword [rbx] + +subps xmm1, xmm2 +subps xmm1, dqword [rbx] + +subsd xmm1, xmm2 +subsd xmm1, qword [rbx] + +subss xmm1, xmm2 +subss xmm1, dword [rbx] + +ucomisd xmm1, xmm2 +ucomisd xmm1, qword [rbx] + +ucomiss xmm1, xmm2 +ucomiss xmm1, dword [rbx] + +unpckhpd xmm1, xmm2 +unpckhpd xmm1, dqword [rbx] + +unpckhps xmm1, xmm2 +unpckhps xmm1, dqword [rbx] + +unpcklpd xmm1, xmm2 +unpcklpd xmm1, dqword [rbx] + +unpcklps xmm1, xmm2 +unpcklps xmm1, dqword [rbx] + +xorpd xmm1, xmm2 +xorpd xmm1, dqword [rbx] + +xorps xmm1, xmm2 +xorps xmm1, dqword [rbx] + diff --git a/modules/arch/x86/tests/ssewidth.hex b/modules/arch/x86/tests/ssewidth.hex new file mode 100644 index 00000000..fc24e8da --- /dev/null +++ b/modules/arch/x86/tests/ssewidth.hex @@ -0,0 +1,1532 @@ +66 +0f +58 +ca +66 +0f +58 +0b +0f +58 +ca +0f +58 +0b +f2 +0f +58 +ca +f2 +0f +58 +0b +f3 +0f +58 +ca +f3 +0f +58 +0b +66 +0f +d0 +ca +66 +0f +d0 +0b +f2 +0f +d0 +ca +f2 +0f +d0 +0b +66 +0f +55 +ca +66 +0f +55 +0b +0f +55 +ca +0f +55 +0b +66 +0f +54 +ca +66 +0f +54 +0b +0f +54 +ca +0f +54 +0b +66 +0f +c2 +ca +00 +66 +0f +c2 +0b +00 +66 +0f +c2 +ca +00 +66 +0f +c2 +0b +00 +0f +c2 +ca +00 +0f +c2 +0b +00 +0f +c2 +ca +00 +0f +c2 +0b +00 +f2 +0f +c2 +ca +00 +f2 +0f +c2 +0b +00 +f2 +0f +c2 +ca +00 +f2 +0f +c2 +0b +00 +f3 +0f +c2 +ca +00 +f3 +0f +c2 +0b +00 +f3 +0f +c2 +ca +00 +f3 +0f +c2 +0b +00 +66 +0f +2f +ca +66 +0f +2f +0b +0f +2f +ca +0f +2f +0b +f3 +0f +e6 +ca +f3 +0f +e6 +0b +0f +5b +ca +0f +5b +0b +f2 +0f +e6 +ca +f2 +0f +e6 +0b +66 +0f +2d +ca +66 +0f +2d +0b +66 +0f +5a +ca +66 +0f +5a +0b +66 +0f +2a +ca +66 +0f +2a +0b +0f +2a +ca +0f +2a +0b +66 +0f +5b +ca +66 +0f +5b +0b +0f +5a +ca +0f +5a +0b +0f +2d +ca +0f +2d +0b +f2 +48 +0f +2d +da +f2 +48 +0f +2d +1b +f2 +0f +5a +ca +f2 +0f +5a +0b +f2 +0f +2a +cb +f2 +0f +2a +0b +f2 +48 +0f +2a +cb +f2 +48 +0f +2a +0b +f3 +0f +2a +cb +f3 +0f +2a +0b +f3 +48 +0f +2a +cb +f3 +48 +0f +2a +0b +f3 +0f +5a +ca +f3 +0f +5a +0b +f3 +0f +2d +da +f3 +0f +2d +1b +f3 +48 +0f +2d +da +f3 +48 +0f +2d +1b +66 +0f +e6 +ca +66 +0f +e6 +0b +66 +0f +2c +ca +66 +0f +2c +0b +f3 +0f +5b +ca +f3 +0f +5b +0b +0f +2c +ca +0f +2c +0b +f2 +0f +2c +c1 +f2 +0f +2c +03 +f2 +48 +0f +2c +c1 +f2 +48 +0f +2c +03 +f3 +0f +2c +c1 +f3 +0f +2c +03 +f3 +48 +0f +2c +c1 +f3 +48 +0f +2c +03 +66 +0f +5e +ca +66 +0f +5e +0b +0f +5e +ca +0f +5e +0b +f2 +0f +5e +ca +f2 +0f +5e +0b +f3 +0f +5e +ca +f3 +0f +5e +0b +66 +0f +78 +c1 +00 +01 +66 +0f +78 +c1 +00 +01 +66 +0f +79 +ca +66 +0f +7c +ca +66 +0f +7c +0b +f2 +0f +7c +ca +f2 +0f +7c +0b +66 +0f +7d +ca +66 +0f +7d +0b +f2 +0f +7d +ca +f2 +0f +7d +0b +f2 +0f +78 +ca +00 +01 +f2 +0f +78 +ca +00 +01 +f2 +0f +79 +ca +f2 +0f +f0 +0b +0f +ae +13 +66 +0f +f7 +ca +66 +0f +5f +ca +66 +0f +5f +0b +0f +5f +ca +0f +5f +0b +f2 +0f +5f +ca +f2 +0f +5f +0b +f3 +0f +5f +ca +f3 +0f +5f +0b +66 +0f +5d +ca +66 +0f +5d +0b +0f +5d +ca +0f +5d +0b +f2 +0f +5d +ca +f2 +0f +5d +0b +f3 +0f +5d +ca +f3 +0f +5d +0b +66 +0f +28 +ca +66 +0f +28 +0b +66 +0f +29 +13 +0f +28 +ca +0f +28 +0b +0f +29 +13 +66 +0f +6e +cb +66 +0f +6e +0b +66 +48 +0f +6e +cb +66 +48 +0f +6e +0b +66 +0f +7e +13 +66 +48 +0f +7e +13 +f2 +0f +12 +ca +f2 +0f +12 +0b +f2 +0f +d6 +ca +66 +0f +6f +ca +66 +0f +6f +0b +66 +0f +7f +13 +f3 +0f +6f +ca +f3 +0f +6f +0b +f3 +0f +7f +13 +0f +12 +ca +66 +0f +16 +0b +66 +0f +17 +13 +0f +16 +0b +0f +17 +13 +0f +16 +ca +66 +0f +12 +0b +66 +0f +13 +13 +0f +12 +0b +0f +13 +13 +66 +0f +50 +da +0f +50 +da +66 +0f +e7 +13 +66 +0f +2b +13 +0f +2b +13 +f2 +0f +2b +13 +f3 +0f +2b +13 +f3 +0f +7e +ca +f3 +0f +7e +0b +66 +0f +d6 +13 +f3 +0f +d6 +ca +f2 +0f +10 +ca +f2 +0f +10 +0b +f2 +0f +11 +13 +f3 +0f +16 +ca +f3 +0f +16 +0b +f3 +0f +12 +ca +f3 +0f +12 +0b +f3 +0f +10 +ca +f3 +0f +10 +0b +f3 +0f +11 +13 +66 +0f +10 +ca +66 +0f +10 +0b +66 +0f +11 +13 +0f +10 +ca +0f +10 +0b +0f +11 +13 +66 +0f +59 +ca +66 +0f +59 +0b +0f +59 +ca +0f +59 +0b +f2 +0f +59 +ca +f2 +0f +59 +0b +f3 +0f +59 +ca +f3 +0f +59 +0b +66 +0f +56 +ca +66 +0f +56 +0b +0f +56 +ca +0f +56 +0b +66 +0f +6b +ca +66 +0f +6b +0b +66 +0f +63 +ca +66 +0f +63 +0b +66 +0f +67 +ca +66 +0f +67 +0b +66 +0f +fc +ca +66 +0f +fc +0b +66 +0f +fe +ca +66 +0f +fe +0b +66 +0f +d4 +ca +66 +0f +d4 +0b +66 +0f +ec +ca +66 +0f +ec +0b +66 +0f +ed +ca +66 +0f +ed +0b +66 +0f +dc +ca +66 +0f +dc +0b +66 +0f +dd +ca +66 +0f +dd +0b +66 +0f +fd +ca +66 +0f +fd +0b +66 +0f +db +ca +66 +0f +db +0b +66 +0f +df +ca +66 +0f +df +0b +66 +0f +e0 +ca +66 +0f +e0 +0b +66 +0f +e3 +ca +66 +0f +e3 +0b +66 +0f +74 +ca +66 +0f +74 +0b +66 +0f +76 +ca +66 +0f +76 +0b +66 +0f +75 +ca +66 +0f +75 +0b +66 +0f +64 +ca +66 +0f +64 +0b +66 +0f +66 +ca +66 +0f +66 +0b +66 +0f +65 +ca +66 +0f +65 +0b +66 +0f +c5 +da +00 +66 +0f +c4 +cb +00 +66 +0f +c4 +0b +00 +66 +0f +f5 +ca +66 +0f +f5 +0b +66 +0f +ee +ca +66 +0f +ee +0b +66 +0f +de +ca +66 +0f +de +0b +66 +0f +ea +ca +66 +0f +ea +0b +66 +0f +da +ca +66 +0f +da +0b +66 +0f +d7 +c2 +66 +0f +e4 +ca +66 +0f +e4 +0b +66 +0f +e5 +ca +66 +0f +e5 +0b +66 +0f +d5 +ca +66 +0f +d5 +0b +66 +0f +f4 +ca +66 +0f +f4 +0b +66 +0f +eb +ca +66 +0f +eb +0b +66 +0f +f6 +ca +66 +0f +f6 +0b +66 +0f +70 +ca +00 +66 +0f +70 +0b +00 +f3 +0f +70 +ca +00 +f3 +0f +70 +0b +00 +f2 +0f +70 +ca +00 +f2 +0f +70 +0b +00 +66 +0f +f2 +ca +66 +0f +f2 +0b +66 +0f +72 +f1 +05 +66 +0f +73 +f9 +05 +66 +0f +f3 +ca +66 +0f +f3 +0b +66 +0f +73 +f1 +05 +66 +0f +f1 +ca +66 +0f +f1 +0b +66 +0f +71 +f1 +05 +66 +0f +e2 +ca +66 +0f +e2 +0b +66 +0f +72 +e1 +05 +66 +0f +e1 +ca +66 +0f +e1 +0b +66 +0f +71 +e1 +05 +66 +0f +d2 +ca +66 +0f +d2 +0b +66 +0f +72 +d1 +05 +66 +0f +73 +d9 +05 +66 +0f +d3 +ca +66 +0f +d3 +0b +66 +0f +73 +d1 +05 +66 +0f +d1 +ca +66 +0f +d1 +0b +66 +0f +71 +d1 +05 +66 +0f +f8 +ca +66 +0f +f8 +0b +66 +0f +fa +ca +66 +0f +fa +0b +66 +0f +fb +ca +66 +0f +fb +0b +66 +0f +e8 +ca +66 +0f +e8 +0b +66 +0f +e9 +ca +66 +0f +e9 +0b +66 +0f +d8 +ca +66 +0f +d8 +0b +66 +0f +d9 +ca +66 +0f +d9 +0b +66 +0f +f9 +ca +66 +0f +f9 +0b +66 +0f +68 +ca +66 +0f +68 +0b +66 +0f +6a +ca +66 +0f +6a +0b +66 +0f +6d +ca +66 +0f +6d +0b +66 +0f +69 +ca +66 +0f +69 +0b +66 +0f +60 +ca +66 +0f +60 +0b +66 +0f +62 +ca +66 +0f +62 +0b +66 +0f +6c +ca +66 +0f +6c +0b +66 +0f +61 +ca +66 +0f +61 +0b +66 +0f +ef +ca +66 +0f +ef +0b +0f +53 +ca +0f +53 +0b +f3 +0f +53 +ca +f3 +0f +53 +0b +0f +52 +ca +0f +52 +0b +f3 +0f +52 +ca +f3 +0f +52 +0b +66 +0f +c6 +ca +00 +66 +0f +c6 +0b +00 +0f +c6 +ca +00 +0f +c6 +0b +00 +66 +0f +51 +ca +66 +0f +51 +0b +0f +51 +ca +0f +51 +0b +f2 +0f +51 +ca +f2 +0f +51 +0b +f3 +0f +51 +ca +f3 +0f +51 +0b +0f +ae +1b +66 +0f +5c +ca +66 +0f +5c +0b +0f +5c +ca +0f +5c +0b +f2 +0f +5c +ca +f2 +0f +5c +0b +f3 +0f +5c +ca +f3 +0f +5c +0b +66 +0f +2e +ca +66 +0f +2e +0b +0f +2e +ca +0f +2e +0b +66 +0f +15 +ca +66 +0f +15 +0b +0f +15 +ca +0f +15 +0b +66 +0f +14 +ca +66 +0f +14 +0b +0f +14 +ca +0f +14 +0b +66 +0f +57 +ca +66 +0f +57 +0b +0f +57 +ca +0f +57 +0b |