summaryrefslogtreecommitdiff
path: root/modules/arch
diff options
context:
space:
mode:
authorPeter Johnson <peter@tortall.net>2007-11-28 07:21:08 +0000
committerPeter Johnson <peter@tortall.net>2007-11-28 07:21:08 +0000
commit14596f6658cb247c58d5f65769877ba81bfeac97 (patch)
treef0836eb732ac680f6f4db719bca642d714cc9c6a /modules/arch
parent4a778b6027a6f9d6502b6d4ade0487dedba63cab (diff)
downloadyasm-14596f6658cb247c58d5f65769877ba81bfeac97.tar.gz
Fix #119. Quite a few SSE/SSE2 instructions assumed 128-bit memory sizes
instead of the correct 64-bit or 32-bit sizes (e.g. xmm/m64 or similar). It worked fine when no memory size was specified, but it should also work with the correct size modifier. svn path=/trunk/yasm/; revision=2015
Diffstat (limited to 'modules/arch')
-rwxr-xr-xmodules/arch/x86/gen_x86_insn.py328
-rw-r--r--modules/arch/x86/tests/Makefile.inc2
-rw-r--r--modules/arch/x86/tests/ssewidth.asm566
-rw-r--r--modules/arch/x86/tests/ssewidth.hex1532
4 files changed, 2274 insertions, 154 deletions
diff --git a/modules/arch/x86/gen_x86_insn.py b/modules/arch/x86/gen_x86_insn.py
index e891634e..9213fe0c 100755
--- a/modules/arch/x86/gen_x86_insn.py
+++ b/modules/arch/x86/gen_x86_insn.py
@@ -3710,29 +3710,29 @@ add_insn("prefetcht2", "twobytemem", modifiers=[3, 0x0F, 0x18], cpu=["P3"])
add_insn("sfence", "threebyte", modifiers=[0x0F, 0xAE, 0xF8], cpu=["P3"])
-add_group("sseps",
+add_group("xmm_xmm128",
cpu=["SSE"],
- modifiers=["Op1Add"],
+ modifiers=["PreAdd", "Op1Add"],
+ prefix=0x00,
opcode=[0x0F, 0x00],
operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
-add_insn("addps", "sseps", modifiers=[0x58])
-add_insn("andnps", "sseps", modifiers=[0x55])
-add_insn("andps", "sseps", modifiers=[0x54])
-add_insn("comiss", "sseps", modifiers=[0x2F])
-add_insn("divps", "sseps", modifiers=[0x5E])
-add_insn("maxps", "sseps", modifiers=[0x5F])
-add_insn("minps", "sseps", modifiers=[0x5D])
-add_insn("mulps", "sseps", modifiers=[0x59])
-add_insn("orps", "sseps", modifiers=[0x56])
-add_insn("rcpps", "sseps", modifiers=[0x53])
-add_insn("rsqrtps", "sseps", modifiers=[0x52])
-add_insn("sqrtps", "sseps", modifiers=[0x51])
-add_insn("subps", "sseps", modifiers=[0x5C])
-add_insn("unpckhps", "sseps", modifiers=[0x15])
-add_insn("unpcklps", "sseps", modifiers=[0x14])
-add_insn("xorps", "sseps", modifiers=[0x57])
+add_insn("addps", "xmm_xmm128", modifiers=[0, 0x58])
+add_insn("andnps", "xmm_xmm128", modifiers=[0, 0x55])
+add_insn("andps", "xmm_xmm128", modifiers=[0, 0x54])
+add_insn("divps", "xmm_xmm128", modifiers=[0, 0x5E])
+add_insn("maxps", "xmm_xmm128", modifiers=[0, 0x5F])
+add_insn("minps", "xmm_xmm128", modifiers=[0, 0x5D])
+add_insn("mulps", "xmm_xmm128", modifiers=[0, 0x59])
+add_insn("orps", "xmm_xmm128", modifiers=[0, 0x56])
+add_insn("rcpps", "xmm_xmm128", modifiers=[0, 0x53])
+add_insn("rsqrtps", "xmm_xmm128", modifiers=[0, 0x52])
+add_insn("sqrtps", "xmm_xmm128", modifiers=[0, 0x51])
+add_insn("subps", "xmm_xmm128", modifiers=[0, 0x5C])
+add_insn("unpckhps", "xmm_xmm128", modifiers=[0, 0x15])
+add_insn("unpcklps", "xmm_xmm128", modifiers=[0, 0x14])
+add_insn("xorps", "xmm_xmm128", modifiers=[0, 0x57])
add_group("cvt_rx_xmm32",
suffix="l",
@@ -3819,79 +3819,104 @@ add_group("cvt_xmm_rmx",
add_insn("cvtsi2ss", "cvt_xmm_rmx", modifiers=[0xF3, 0x2A])
-add_group("ssess",
+add_group("xmm_xmm32",
cpu=["SSE"],
modifiers=["PreAdd", "Op1Add"],
prefix=0x00,
opcode=[0x0F, 0x00],
operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
- Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+ Operand(type="SIMDReg", size=128, dest="EA")])
+add_group("xmm_xmm32",
+ cpu=["SSE"],
+ modifiers=["PreAdd", "Op1Add"],
+ prefix=0x00,
+ opcode=[0x0F, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="Mem", size=32, relaxed=True, dest="EA")])
-add_insn("addss", "ssess", modifiers=[0xF3, 0x58])
-add_insn("divss", "ssess", modifiers=[0xF3, 0x5E])
-add_insn("maxss", "ssess", modifiers=[0xF3, 0x5F])
-add_insn("minss", "ssess", modifiers=[0xF3, 0x5D])
-add_insn("mulss", "ssess", modifiers=[0xF3, 0x59])
-add_insn("rcpss", "ssess", modifiers=[0xF3, 0x53])
-add_insn("rsqrtss", "ssess", modifiers=[0xF3, 0x52])
-add_insn("sqrtss", "ssess", modifiers=[0xF3, 0x51])
-add_insn("subss", "ssess", modifiers=[0xF3, 0x5C])
-add_insn("ucomiss", "ssess", modifiers=[0, 0x2E])
-
-add_group("ssecmpps",
+add_insn("addss", "xmm_xmm32", modifiers=[0xF3, 0x58])
+add_insn("comiss", "xmm_xmm32", modifiers=[0, 0x2F])
+add_insn("divss", "xmm_xmm32", modifiers=[0xF3, 0x5E])
+add_insn("maxss", "xmm_xmm32", modifiers=[0xF3, 0x5F])
+add_insn("minss", "xmm_xmm32", modifiers=[0xF3, 0x5D])
+add_insn("mulss", "xmm_xmm32", modifiers=[0xF3, 0x59])
+add_insn("rcpss", "xmm_xmm32", modifiers=[0xF3, 0x53])
+add_insn("rsqrtss", "xmm_xmm32", modifiers=[0xF3, 0x52])
+add_insn("sqrtss", "xmm_xmm32", modifiers=[0xF3, 0x51])
+add_insn("subss", "xmm_xmm32", modifiers=[0xF3, 0x5C])
+add_insn("ucomiss", "xmm_xmm32", modifiers=[0, 0x2E])
+
+add_group("ssecmp_128",
cpu=["SSE"],
- modifiers=["Imm8"],
+ modifiers=["Imm8", "PreAdd"],
opcode=[0x0F, 0xC2],
operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
-add_insn("cmpeqps", "ssecmpps", modifiers=[0x00])
-add_insn("cmpleps", "ssecmpps", modifiers=[0x02])
-add_insn("cmpltps", "ssecmpps", modifiers=[0x01])
-add_insn("cmpneqps", "ssecmpps", modifiers=[0x04])
-add_insn("cmpnleps", "ssecmpps", modifiers=[0x06])
-add_insn("cmpnltps", "ssecmpps", modifiers=[0x05])
-add_insn("cmpordps", "ssecmpps", modifiers=[0x07])
-add_insn("cmpunordps", "ssecmpps", modifiers=[0x03])
+add_insn("cmpeqps", "ssecmp_128", modifiers=[0])
+add_insn("cmpleps", "ssecmp_128", modifiers=[2])
+add_insn("cmpltps", "ssecmp_128", modifiers=[1])
+add_insn("cmpneqps", "ssecmp_128", modifiers=[4])
+add_insn("cmpnleps", "ssecmp_128", modifiers=[6])
+add_insn("cmpnltps", "ssecmp_128", modifiers=[5])
+add_insn("cmpordps", "ssecmp_128", modifiers=[7])
+add_insn("cmpunordps", "ssecmp_128", modifiers=[3])
-add_group("ssecmpss",
+add_group("ssecmp_32",
cpu=["SSE"],
modifiers=["Imm8", "PreAdd"],
prefix=0x00,
opcode=[0x0F, 0xC2],
operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
- Operand(type="SIMDRM", size=128, relaxed=True, dest="EA")])
+ Operand(type="SIMDReg", size=128, dest="EA")])
-add_insn("cmpeqss", "ssecmpss", modifiers=[0, 0xF3])
-add_insn("cmpless", "ssecmpss", modifiers=[2, 0xF3])
-add_insn("cmpltss", "ssecmpss", modifiers=[1, 0xF3])
-add_insn("cmpneqss", "ssecmpss", modifiers=[4, 0xF3])
-add_insn("cmpnless", "ssecmpss", modifiers=[6, 0xF3])
-add_insn("cmpnltss", "ssecmpss", modifiers=[5, 0xF3])
-add_insn("cmpordss", "ssecmpss", modifiers=[7, 0xF3])
-add_insn("cmpunordss", "ssecmpss", modifiers=[3, 0xF3])
+add_group("ssecmp_32",
+ cpu=["SSE"],
+ modifiers=["Imm8", "PreAdd"],
+ prefix=0x00,
+ opcode=[0x0F, 0xC2],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="Mem", size=32, relaxed=True, dest="EA")])
+
+add_insn("cmpeqss", "ssecmp_32", modifiers=[0, 0xF3])
+add_insn("cmpless", "ssecmp_32", modifiers=[2, 0xF3])
+add_insn("cmpltss", "ssecmp_32", modifiers=[1, 0xF3])
+add_insn("cmpneqss", "ssecmp_32", modifiers=[4, 0xF3])
+add_insn("cmpnless", "ssecmp_32", modifiers=[6, 0xF3])
+add_insn("cmpnltss", "ssecmp_32", modifiers=[5, 0xF3])
+add_insn("cmpordss", "ssecmp_32", modifiers=[7, 0xF3])
+add_insn("cmpunordss", "ssecmp_32", modifiers=[3, 0xF3])
-add_group("ssepsimm",
+add_group("xmm_xmm128_imm",
cpu=["SSE"],
- modifiers=["Op1Add"],
+ modifiers=["PreAdd", "Op1Add"],
opcode=[0x0F, 0x00],
operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
-add_insn("cmpps", "ssepsimm", modifiers=[0xC2])
-add_insn("shufps", "ssepsimm", modifiers=[0xC6])
+add_insn("cmpps", "xmm_xmm128_imm", modifiers=[0, 0xC2])
+add_insn("shufps", "xmm_xmm128_imm", modifiers=[0, 0xC6])
-add_group("ssessimm",
+add_group("xmm_xmm32_imm",
cpu=["SSE"],
modifiers=["PreAdd", "Op1Add"],
prefix=0x00,
opcode=[0x0F, 0x00],
operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
- Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+ Operand(type="SIMDReg", size=128, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_group("xmm_xmm32_imm",
+ cpu=["SSE"],
+ modifiers=["PreAdd", "Op1Add"],
+ prefix=0x00,
+ opcode=[0x0F, 0x00],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="Mem", size=32, relaxed=True, dest="EA"),
Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
-add_insn("cmpss", "ssessimm", modifiers=[0xF3, 0xC2])
+add_insn("cmpss", "xmm_xmm32_imm", modifiers=[0xF3, 0xC2])
add_group("ldstmxcsr",
cpu=["SSE"],
@@ -4158,64 +4183,14 @@ add_insn("pshufw", "pshufw")
#####################################################################
# SSE2 instructions
#####################################################################
-add_insn("addpd", "ssess", modifiers=[0x66, 0x58], cpu=["SSE2"])
-add_insn("addsd", "ssess", modifiers=[0xF2, 0x58], cpu=["SSE2"])
-add_insn("andnpd", "ssess", modifiers=[0x66, 0x55], cpu=["SSE2"])
-add_insn("andpd", "ssess", modifiers=[0x66, 0x54], cpu=["SSE2"])
-add_insn("comisd", "ssess", modifiers=[0x66, 0x2F], cpu=["SSE2"])
-add_insn("divpd", "ssess", modifiers=[0x66, 0x5E], cpu=["SSE2"])
-add_insn("divsd", "ssess", modifiers=[0xF2, 0x5E], cpu=["SSE2"])
-add_insn("maxpd", "ssess", modifiers=[0x66, 0x5F], cpu=["SSE2"])
-add_insn("maxsd", "ssess", modifiers=[0xF2, 0x5F], cpu=["SSE2"])
-add_insn("minpd", "ssess", modifiers=[0x66, 0x5D], cpu=["SSE2"])
-add_insn("minsd", "ssess", modifiers=[0xF2, 0x5D], cpu=["SSE2"])
-add_insn("mulpd", "ssess", modifiers=[0x66, 0x59], cpu=["SSE2"])
-add_insn("mulsd", "ssess", modifiers=[0xF2, 0x59], cpu=["SSE2"])
-add_insn("orpd", "ssess", modifiers=[0x66, 0x56], cpu=["SSE2"])
-add_insn("sqrtpd", "ssess", modifiers=[0x66, 0x51], cpu=["SSE2"])
-add_insn("sqrtsd", "ssess", modifiers=[0xF2, 0x51], cpu=["SSE2"])
-add_insn("subpd", "ssess", modifiers=[0x66, 0x5C], cpu=["SSE2"])
-add_insn("subsd", "ssess", modifiers=[0xF2, 0x5C], cpu=["SSE2"])
-add_insn("ucomisd", "ssess", modifiers=[0x66, 0x2E], cpu=["SSE2"])
-add_insn("unpckhpd", "ssess", modifiers=[0x66, 0x15], cpu=["SSE2"])
-add_insn("unpcklpd", "ssess", modifiers=[0x66, 0x14], cpu=["SSE2"])
-add_insn("xorpd", "ssess", modifiers=[0x66, 0x57], cpu=["SSE2"])
-add_insn("cvtpd2dq", "ssess", modifiers=[0xF2, 0xE6], cpu=["SSE2"])
-add_insn("cvtpd2ps", "ssess", modifiers=[0x66, 0x5A], cpu=["SSE2"])
-add_insn("cvtps2dq", "ssess", modifiers=[0x66, 0x5B], cpu=["SSE2"])
-
-add_insn("cvtdq2ps", "sseps", modifiers=[0x5B], cpu=["SSE2"])
-
-add_insn("cmpeqpd", "ssecmpss", modifiers=[0x00, 0x66], cpu=["SSE2"])
-add_insn("cmpeqsd", "ssecmpss", modifiers=[0x00, 0xF2], cpu=["SSE2"])
-add_insn("cmplepd", "ssecmpss", modifiers=[0x02, 0x66], cpu=["SSE2"])
-add_insn("cmplesd", "ssecmpss", modifiers=[0x02, 0xF2], cpu=["SSE2"])
-add_insn("cmpltpd", "ssecmpss", modifiers=[0x01, 0x66], cpu=["SSE2"])
-add_insn("cmpltsd", "ssecmpss", modifiers=[0x01, 0xF2], cpu=["SSE2"])
-add_insn("cmpneqpd", "ssecmpss", modifiers=[0x04, 0x66], cpu=["SSE2"])
-add_insn("cmpneqsd", "ssecmpss", modifiers=[0x04, 0xF2], cpu=["SSE2"])
-add_insn("cmpnlepd", "ssecmpss", modifiers=[0x06, 0x66], cpu=["SSE2"])
-add_insn("cmpnlesd", "ssecmpss", modifiers=[0x06, 0xF2], cpu=["SSE2"])
-add_insn("cmpnltpd", "ssecmpss", modifiers=[0x05, 0x66], cpu=["SSE2"])
-add_insn("cmpnltsd", "ssecmpss", modifiers=[0x05, 0xF2], cpu=["SSE2"])
-add_insn("cmpordpd", "ssecmpss", modifiers=[0x07, 0x66], cpu=["SSE2"])
-add_insn("cmpordsd", "ssecmpss", modifiers=[0x07, 0xF2], cpu=["SSE2"])
-add_insn("cmpunordpd", "ssecmpss", modifiers=[0x03, 0x66], cpu=["SSE2"])
-add_insn("cmpunordsd", "ssecmpss", modifiers=[0x03, 0xF2], cpu=["SSE2"])
-
-add_insn("cmppd", "ssessimm", modifiers=[0x66, 0xC2], cpu=["SSE2"])
-add_insn("shufpd", "ssessimm", modifiers=[0x66, 0xC6], cpu=["SSE2"])
-
-add_insn("cvtsi2sd", "cvt_xmm_rmx", modifiers=[0xF2, 0x2A], cpu=["SSE2"])
-
-add_group("cvt_xmm_xmm64_ss",
+add_group("xmm_xmm64",
cpu=["SSE2"],
modifiers=["PreAdd", "Op1Add"],
prefix=0x00,
opcode=[0x0F, 0x00],
operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
Operand(type="SIMDReg", size=128, dest="EA")])
-add_group("cvt_xmm_xmm64_ss",
+add_group("xmm_xmm64",
cpu=["SSE2"],
modifiers=["PreAdd", "Op1Add"],
prefix=0x00,
@@ -4223,23 +4198,75 @@ add_group("cvt_xmm_xmm64_ss",
operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
Operand(type="Mem", size=64, relaxed=True, dest="EA")])
-add_insn("cvtdq2pd", "cvt_xmm_xmm64_ss", modifiers=[0xF3, 0xE6])
-add_insn("cvtsd2ss", "cvt_xmm_xmm64_ss", modifiers=[0xF2, 0x5A])
-
-add_group("cvt_xmm_xmm64_ps",
+add_insn("addsd", "xmm_xmm64", modifiers=[0xF2, 0x58])
+add_insn("comisd", "xmm_xmm64", modifiers=[0x66, 0x2F])
+add_insn("cvtdq2pd", "xmm_xmm64", modifiers=[0xF3, 0xE6])
+add_insn("cvtps2pd", "xmm_xmm64", modifiers=[0, 0x5A])
+add_insn("cvtsd2ss", "xmm_xmm64", modifiers=[0xF2, 0x5A])
+add_insn("divsd", "xmm_xmm64", modifiers=[0xF2, 0x5E])
+add_insn("maxsd", "xmm_xmm64", modifiers=[0xF2, 0x5F])
+add_insn("minsd", "xmm_xmm64", modifiers=[0xF2, 0x5D])
+add_insn("mulsd", "xmm_xmm64", modifiers=[0xF2, 0x59])
+add_insn("subsd", "xmm_xmm64", modifiers=[0xF2, 0x5C])
+add_insn("sqrtsd", "xmm_xmm64", modifiers=[0xF2, 0x51])
+add_insn("ucomisd", "xmm_xmm64", modifiers=[0x66, 0x2E])
+
+add_insn("addpd", "xmm_xmm128", modifiers=[0x66, 0x58], cpu=["SSE2"])
+add_insn("andnpd", "xmm_xmm128", modifiers=[0x66, 0x55], cpu=["SSE2"])
+add_insn("andpd", "xmm_xmm128", modifiers=[0x66, 0x54], cpu=["SSE2"])
+add_insn("cvtdq2ps", "xmm_xmm128", modifiers=[0, 0x5B], cpu=["SSE2"])
+add_insn("cvtpd2dq", "xmm_xmm128", modifiers=[0xF2, 0xE6], cpu=["SSE2"])
+add_insn("cvtpd2ps", "xmm_xmm128", modifiers=[0x66, 0x5A], cpu=["SSE2"])
+add_insn("cvtps2dq", "xmm_xmm128", modifiers=[0x66, 0x5B], cpu=["SSE2"])
+add_insn("divpd", "xmm_xmm128", modifiers=[0x66, 0x5E], cpu=["SSE2"])
+add_insn("maxpd", "xmm_xmm128", modifiers=[0x66, 0x5F], cpu=["SSE2"])
+add_insn("minpd", "xmm_xmm128", modifiers=[0x66, 0x5D], cpu=["SSE2"])
+add_insn("mulpd", "xmm_xmm128", modifiers=[0x66, 0x59], cpu=["SSE2"])
+add_insn("orpd", "xmm_xmm128", modifiers=[0x66, 0x56], cpu=["SSE2"])
+add_insn("sqrtpd", "xmm_xmm128", modifiers=[0x66, 0x51], cpu=["SSE2"])
+add_insn("subpd", "xmm_xmm128", modifiers=[0x66, 0x5C], cpu=["SSE2"])
+add_insn("unpckhpd", "xmm_xmm128", modifiers=[0x66, 0x15], cpu=["SSE2"])
+add_insn("unpcklpd", "xmm_xmm128", modifiers=[0x66, 0x14], cpu=["SSE2"])
+add_insn("xorpd", "xmm_xmm128", modifiers=[0x66, 0x57], cpu=["SSE2"])
+
+add_group("ssecmp_64",
cpu=["SSE2"],
- modifiers=["Op1Add"],
- opcode=[0x0F, 0x00],
+ modifiers=["Imm8", "PreAdd"],
+ prefix=0x00,
+ opcode=[0x0F, 0xC2],
operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
Operand(type="SIMDReg", size=128, dest="EA")])
-add_group("cvt_xmm_xmm64_ps",
+
+add_group("ssecmp_64",
cpu=["SSE2"],
- modifiers=["Op1Add"],
- opcode=[0x0F, 0x00],
+ modifiers=["Imm8", "PreAdd"],
+ prefix=0x00,
+ opcode=[0x0F, 0xC2],
operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
Operand(type="Mem", size=64, relaxed=True, dest="EA")])
-add_insn("cvtps2pd", "cvt_xmm_xmm64_ps", modifiers=[0x5A])
+add_insn("cmpeqsd", "ssecmp_64", modifiers=[0, 0xF2])
+add_insn("cmplesd", "ssecmp_64", modifiers=[2, 0xF2])
+add_insn("cmpltsd", "ssecmp_64", modifiers=[1, 0xF2])
+add_insn("cmpneqsd", "ssecmp_64", modifiers=[4, 0xF2])
+add_insn("cmpnlesd", "ssecmp_64", modifiers=[6, 0xF2])
+add_insn("cmpnltsd", "ssecmp_64", modifiers=[5, 0xF2])
+add_insn("cmpordsd", "ssecmp_64", modifiers=[7, 0xF2])
+add_insn("cmpunordsd", "ssecmp_64", modifiers=[3, 0xF2])
+
+add_insn("cmpeqpd", "ssecmp_128", modifiers=[0, 0x66], cpu=["SSE2"])
+add_insn("cmplepd", "ssecmp_128", modifiers=[2, 0x66], cpu=["SSE2"])
+add_insn("cmpltpd", "ssecmp_128", modifiers=[1, 0x66], cpu=["SSE2"])
+add_insn("cmpneqpd", "ssecmp_128", modifiers=[4, 0x66], cpu=["SSE2"])
+add_insn("cmpnlepd", "ssecmp_128", modifiers=[6, 0x66], cpu=["SSE2"])
+add_insn("cmpnltpd", "ssecmp_128", modifiers=[5, 0x66], cpu=["SSE2"])
+add_insn("cmpordpd", "ssecmp_128", modifiers=[7, 0x66], cpu=["SSE2"])
+add_insn("cmpunordpd", "ssecmp_128", modifiers=[3, 0x66], cpu=["SSE2"])
+
+add_insn("cmppd", "xmm_xmm128_imm", modifiers=[0x66, 0xC2], cpu=["SSE2"])
+add_insn("shufpd", "xmm_xmm128_imm", modifiers=[0x66, 0xC6], cpu=["SSE2"])
+
+add_insn("cvtsi2sd", "cvt_xmm_rmx", modifiers=[0xF2, 0x2A], cpu=["SSE2"])
add_group("cvt_rx_xmm64",
suffix="l",
@@ -4305,7 +4332,15 @@ add_group("cmpsd",
prefix=0xF2,
opcode=[0x0F, 0xC2],
operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
- Operand(type="SIMDRM", size=128, relaxed=True, dest="EA"),
+ Operand(type="SIMDReg", size=128, dest="EA"),
+ Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
+
+add_group("cmpsd",
+ cpu=["SSE2"],
+ prefix=0xF2,
+ opcode=[0x0F, 0xC2],
+ operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
+ Operand(type="Mem", size=64, relaxed=True, dest="EA"),
Operand(type="Imm", size=8, relaxed=True, dest="Imm")])
# cmpsd is added in string instructions above, so don't re-add_insn()
@@ -4460,31 +4495,16 @@ add_insn("vmxon", "vmxthreebytemem", modifiers=[0xF3])
add_insn("cvttpd2pi", "cvt_mm_xmm", modifiers=[0x66, 0x2C], cpu=["SSE2"])
add_insn("cvttsd2si", "cvt_rx_xmm64", modifiers=[0xF2, 0x2C], cpu=["SSE2"])
-add_insn("cvttpd2dq", "ssess", modifiers=[0x66, 0xE6], cpu=["SSE2"])
-add_insn("cvttps2dq", "ssess", modifiers=[0xF3, 0x5B], cpu=["SSE2"])
+add_insn("cvttpd2dq", "xmm_xmm128", modifiers=[0x66, 0xE6], cpu=["SSE2"])
+add_insn("cvttps2dq", "xmm_xmm128", modifiers=[0xF3, 0x5B], cpu=["SSE2"])
add_insn("pmuludq", "mmxsse2", modifiers=[0xF4], cpu=["SSE2"])
-add_insn("pshufd", "ssessimm", modifiers=[0x66, 0x70], cpu=["SSE2"])
-add_insn("pshufhw", "ssessimm", modifiers=[0xF3, 0x70], cpu=["SSE2"])
-add_insn("pshuflw", "ssessimm", modifiers=[0xF2, 0x70], cpu=["SSE2"])
-add_insn("punpckhqdq", "ssess", modifiers=[0x66, 0x6D], cpu=["SSE2"])
-add_insn("punpcklqdq", "ssess", modifiers=[0x66, 0x6C], cpu=["SSE2"])
-
-add_group("cvt_xmm_xmm32",
- cpu=["SSE2"],
- modifiers=["PreAdd", "Op1Add"],
- prefix=0x00,
- opcode=[0x0F, 0x00],
- operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
- Operand(type="SIMDReg", size=128, dest="EA")])
-add_group("cvt_xmm_xmm32",
- cpu=["SSE2"],
- modifiers=["PreAdd", "Op1Add"],
- prefix=0x00,
- opcode=[0x0F, 0x00],
- operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
- Operand(type="Mem", size=32, relaxed=True, dest="EA")])
+add_insn("pshufd", "xmm_xmm128_imm", modifiers=[0x66, 0x70], cpu=["SSE2"])
+add_insn("pshufhw", "xmm_xmm128_imm", modifiers=[0xF3, 0x70], cpu=["SSE2"])
+add_insn("pshuflw", "xmm_xmm128_imm", modifiers=[0xF2, 0x70], cpu=["SSE2"])
+add_insn("punpckhqdq", "xmm_xmm128", modifiers=[0x66, 0x6D], cpu=["SSE2"])
+add_insn("punpcklqdq", "xmm_xmm128", modifiers=[0x66, 0x6C], cpu=["SSE2"])
-add_insn("cvtss2sd", "cvt_xmm_xmm32", modifiers=[0xF3, 0x5A])
+add_insn("cvtss2sd", "xmm_xmm32", modifiers=[0xF3, 0x5A], cpu=["SSE2"])
add_group("maskmovdqu",
cpu=["SSE2"],
@@ -4546,17 +4566,17 @@ add_insn("psrldq", "pslrldq", modifiers=[3])
#####################################################################
# SSE3 / PNI Prescott New Instructions instructions
#####################################################################
-add_insn("addsubpd", "ssess", modifiers=[0x66, 0xD0], cpu=["SSE3"])
-add_insn("addsubps", "ssess", modifiers=[0xF2, 0xD0], cpu=["SSE3"])
-add_insn("haddpd", "ssess", modifiers=[0x66, 0x7C], cpu=["SSE3"])
-add_insn("haddps", "ssess", modifiers=[0xF2, 0x7C], cpu=["SSE3"])
-add_insn("hsubpd", "ssess", modifiers=[0x66, 0x7D], cpu=["SSE3"])
-add_insn("hsubps", "ssess", modifiers=[0xF2, 0x7D], cpu=["SSE3"])
-add_insn("movshdup", "ssess", modifiers=[0xF3, 0x16], cpu=["SSE3"])
-add_insn("movsldup", "ssess", modifiers=[0xF3, 0x12], cpu=["SSE3"])
+add_insn("addsubpd", "xmm_xmm128", modifiers=[0x66, 0xD0], cpu=["SSE3"])
+add_insn("addsubps", "xmm_xmm128", modifiers=[0xF2, 0xD0], cpu=["SSE3"])
+add_insn("haddpd", "xmm_xmm128", modifiers=[0x66, 0x7C], cpu=["SSE3"])
+add_insn("haddps", "xmm_xmm128", modifiers=[0xF2, 0x7C], cpu=["SSE3"])
+add_insn("hsubpd", "xmm_xmm128", modifiers=[0x66, 0x7D], cpu=["SSE3"])
+add_insn("hsubps", "xmm_xmm128", modifiers=[0xF2, 0x7D], cpu=["SSE3"])
+add_insn("movshdup", "xmm_xmm128", modifiers=[0xF3, 0x16], cpu=["SSE3"])
+add_insn("movsldup", "xmm_xmm128", modifiers=[0xF3, 0x12], cpu=["SSE3"])
add_insn("fisttp", "fildstp", modifiers=[1, 0, 1], cpu=["SSE3"])
add_insn("fisttpll", "fildstp", suffix="q", modifiers=[7], cpu=["SSE3"])
-add_insn("movddup", "cvt_xmm_xmm64_ss", modifiers=[0xF2, 0x12], cpu=["SSE3"])
+add_insn("movddup", "xmm_xmm64", modifiers=[0xF2, 0x12], cpu=["SSE3"])
add_insn("monitor", "threebyte", modifiers=[0x0F, 0x01, 0xC8], cpu=["SSE3"])
add_insn("mwait", "threebyte", modifiers=[0x0F, 0x01, 0xC9], cpu=["SSE3"])
@@ -4565,7 +4585,7 @@ add_group("lddqu",
prefix=0xF2,
opcode=[0x0F, 0xF0],
operands=[Operand(type="SIMDReg", size=128, dest="Spare"),
- Operand(type="Mem", dest="EA")])
+ Operand(type="Mem", size=128, relaxed=True, dest="EA")])
add_insn("lddqu", "lddqu")
diff --git a/modules/arch/x86/tests/Makefile.inc b/modules/arch/x86/tests/Makefile.inc
index 87761177..ee1b4c81 100644
--- a/modules/arch/x86/tests/Makefile.inc
+++ b/modules/arch/x86/tests/Makefile.inc
@@ -163,6 +163,8 @@ EXTRA_DIST += modules/arch/x86/tests/sse5-basic.asm
EXTRA_DIST += modules/arch/x86/tests/sse5-basic.hex
EXTRA_DIST += modules/arch/x86/tests/sse5-err.asm
EXTRA_DIST += modules/arch/x86/tests/sse5-err.errwarn
+EXTRA_DIST += modules/arch/x86/tests/ssewidth.asm
+EXTRA_DIST += modules/arch/x86/tests/ssewidth.hex
EXTRA_DIST += modules/arch/x86/tests/ssse3.asm
EXTRA_DIST += modules/arch/x86/tests/ssse3.c
EXTRA_DIST += modules/arch/x86/tests/ssse3.hex
diff --git a/modules/arch/x86/tests/ssewidth.asm b/modules/arch/x86/tests/ssewidth.asm
new file mode 100644
index 00000000..0824b769
--- /dev/null
+++ b/modules/arch/x86/tests/ssewidth.asm
@@ -0,0 +1,566 @@
+[bits 64]
+addpd xmm1, xmm2
+addpd xmm1, dqword [rbx]
+
+addps xmm1, xmm2
+addps xmm1, dqword [rbx]
+
+addsd xmm1, xmm2
+addsd xmm1, qword [rbx]
+
+addss xmm1, xmm2
+addss xmm1, dword [rbx]
+
+addsubpd xmm1, xmm2
+addsubpd xmm1, dqword [rbx]
+
+addsubps xmm1, xmm2
+addsubps xmm1, dqword [rbx]
+
+andnpd xmm1, xmm2
+andnpd xmm1, dqword [rbx]
+
+andnps xmm1, xmm2
+andnps xmm1, dqword [rbx]
+
+andpd xmm1, xmm2
+andpd xmm1, dqword [rbx]
+
+andps xmm1, xmm2
+andps xmm1, dqword [rbx]
+
+cmppd xmm1, xmm2, 0
+cmppd xmm1, dqword [rbx], 0
+cmpeqpd xmm1, xmm2
+cmpeqpd xmm1, dqword [rbx]
+
+cmpps xmm1, xmm2, 0
+cmpps xmm1, dqword [rbx], 0
+cmpeqps xmm1, xmm2
+cmpeqps xmm1, dqword [rbx]
+
+cmpsd xmm1, xmm2, 0
+cmpsd xmm1, qword [rbx], 0
+cmpeqsd xmm1, xmm2
+cmpeqsd xmm1, qword [rbx]
+
+cmpss xmm1, xmm2, 0
+cmpss xmm1, dword [rbx], 0
+cmpeqss xmm1, xmm2
+cmpeqss xmm1, dword [rbx]
+
+comisd xmm1, xmm2
+comisd xmm1, qword [rbx]
+
+comiss xmm1, xmm2
+comiss xmm1, dword [rbx]
+
+cvtdq2pd xmm1, xmm2
+cvtdq2pd xmm1, qword [rbx]
+
+cvtdq2ps xmm1, xmm2
+cvtdq2ps xmm1, dqword [rbx]
+
+cvtpd2dq xmm1, xmm2
+cvtpd2dq xmm1, dqword [rbx]
+
+cvtpd2pi mm1, xmm2 ; mmx
+cvtpd2pi mm1, dqword [rbx]
+
+cvtpd2ps xmm1, xmm2
+cvtpd2ps xmm1, dqword [rbx]
+
+cvtpi2pd xmm1, mm2 ; mmx
+cvtpi2pd xmm1, qword [rbx]
+
+cvtpi2ps xmm1, mm2 ; mmx
+cvtpi2ps xmm1, qword [rbx]
+
+cvtps2dq xmm1, xmm2
+cvtps2dq xmm1, dqword [rbx]
+
+cvtps2pd xmm1, xmm2
+cvtps2pd xmm1, qword [rbx]
+
+cvtps2pi mm1, xmm2
+cvtps2pi mm1, qword [rbx]
+
+cvtsd2si rbx, xmm2
+cvtsd2si rbx, qword [rbx]
+
+cvtsd2ss xmm1, xmm2
+cvtsd2ss xmm1, qword [rbx]
+
+cvtsi2sd xmm1, ebx
+cvtsi2sd xmm1, dword [rbx]
+cvtsi2sd xmm1, rbx
+cvtsi2sd xmm1, qword [rbx]
+
+cvtsi2ss xmm1, ebx
+cvtsi2ss xmm1, dword [rbx]
+cvtsi2ss xmm1, rbx
+cvtsi2ss xmm1, qword [rbx]
+
+cvtss2sd xmm1, xmm2
+cvtss2sd xmm1, dword [rbx]
+
+cvtss2si ebx, xmm2
+cvtss2si ebx, dword [rbx]
+cvtss2si rbx, xmm2
+cvtss2si rbx, dword [rbx]
+
+cvttpd2dq xmm1, xmm2
+cvttpd2dq xmm1, dqword [rbx]
+
+cvttpd2pi mm1, xmm2
+cvttpd2pi mm1, dqword [rbx]
+
+cvttps2dq xmm1, xmm2
+cvttps2dq xmm1, dqword [rbx]
+
+cvttps2pi mm1, xmm2
+cvttps2pi mm1, qword [rbx]
+
+cvttsd2si eax, xmm1
+cvttsd2si eax, qword [rbx]
+cvttsd2si rax, xmm1
+cvttsd2si rax, qword [rbx]
+
+cvttss2si eax, xmm1
+cvttss2si eax, dword [rbx]
+cvttss2si rax, xmm1
+cvttss2si rax, dword [rbx]
+
+divpd xmm1, xmm2
+divpd xmm1, dqword [rbx]
+
+divps xmm1, xmm2
+divps xmm1, dqword [rbx]
+
+divsd xmm1, xmm2
+divsd xmm1, qword [rbx]
+
+divss xmm1, xmm2
+divss xmm1, dword [rbx]
+
+extrq xmm1, 0, 1
+extrq xmm1, byte 0, byte 1
+extrq xmm1, xmm2
+
+haddpd xmm1, xmm2
+haddpd xmm1, dqword [rbx]
+
+haddps xmm1, xmm2
+haddps xmm1, dqword [rbx]
+
+hsubpd xmm1, xmm2
+hsubpd xmm1, dqword [rbx]
+
+hsubps xmm1, xmm2
+hsubps xmm1, dqword [rbx]
+
+insertq xmm1, xmm2, 0, 1
+insertq xmm1, xmm2, byte 0, byte 1
+insertq xmm1, xmm2
+
+lddqu xmm1, dqword [rbx]
+
+ldmxcsr dword [rbx]
+
+maskmovdqu xmm1, xmm2
+
+maxpd xmm1, xmm2
+maxpd xmm1, dqword [rbx]
+
+maxps xmm1, xmm2
+maxps xmm1, dqword [rbx]
+
+maxsd xmm1, xmm2
+maxsd xmm1, qword [rbx]
+
+maxss xmm1, xmm2
+maxss xmm1, dword [rbx]
+
+minpd xmm1, xmm2
+minpd xmm1, dqword [rbx]
+
+minps xmm1, xmm2
+minps xmm1, dqword [rbx]
+
+minsd xmm1, xmm2
+minsd xmm1, qword [rbx]
+
+minss xmm1, xmm2
+minss xmm1, dword [rbx]
+
+movapd xmm1, xmm2
+movapd xmm1, dqword [rbx]
+movapd dqword [rbx], xmm2
+
+movaps xmm1, xmm2
+movaps xmm1, dqword [rbx]
+movaps dqword [rbx], xmm2
+
+movd xmm1, ebx
+movd xmm1, dword [rbx]
+movd xmm1, rbx
+movd xmm1, qword [rbx]
+movd dword [rbx], xmm2
+movd qword [rbx], xmm2
+
+movddup xmm1, xmm2
+movddup xmm1, qword [rbx]
+
+movdq2q mm1, xmm2
+
+movdqa xmm1, xmm2
+movdqa xmm1, dqword [rbx]
+movdqa dqword [rbx], xmm2
+
+movdqu xmm1, xmm2
+movdqu xmm1, dqword [rbx]
+movdqu dqword [rbx], xmm2
+
+movhlps xmm1, xmm2
+
+movhpd xmm1, qword [rbx]
+movhpd qword [rbx], xmm2
+
+movhps xmm1, qword [rbx]
+movhps qword [rbx], xmm2
+
+movlhps xmm1, xmm2
+
+movlpd xmm1, qword [rbx]
+movlpd qword [rbx], xmm2
+
+movlps xmm1, qword [rbx]
+movlps qword [rbx], xmm2
+
+movmskpd ebx, xmm2
+
+movmskps ebx, xmm2
+
+movntdq dqword [rbx], xmm2
+
+movntpd dqword [rbx], xmm2
+
+movntps dqword [rbx], xmm2
+
+movntsd qword [rbx], xmm2
+
+movntss dword [rbx], xmm2
+
+movq xmm1, xmm2
+movq xmm1, qword [rbx]
+movq qword [rbx], xmm2
+
+movq2dq xmm1, mm2
+
+movsd xmm1, xmm2
+movsd xmm1, qword [rbx]
+movsd qword [rbx], xmm2
+
+movshdup xmm1, xmm2
+movshdup xmm1, dqword [rbx]
+
+movsldup xmm1, xmm2
+movsldup xmm1, dqword [rbx]
+
+movss xmm1, xmm2
+movss xmm1, dword [rbx]
+movss dword [rbx], xmm2
+
+movupd xmm1, xmm2
+movupd xmm1, dqword [rbx]
+movupd dqword [rbx], xmm2
+
+movups xmm1, xmm2
+movups xmm1, dqword [rbx]
+movups dqword [rbx], xmm2
+
+mulpd xmm1, xmm2
+mulpd xmm1, dqword [rbx]
+
+mulps xmm1, xmm2
+mulps xmm1, dqword [rbx]
+
+mulsd xmm1, xmm2
+mulsd xmm1, qword [rbx]
+
+mulss xmm1, xmm2
+mulss xmm1, dword [rbx]
+
+orpd xmm1, xmm2
+orpd xmm1, dqword [rbx]
+
+orps xmm1, xmm2
+orps xmm1, dqword [rbx]
+
+packssdw xmm1, xmm2
+packssdw xmm1, dqword [rbx]
+
+packsswb xmm1, xmm2
+packsswb xmm1, dqword [rbx]
+
+packuswb xmm1, xmm2
+packuswb xmm1, dqword [rbx]
+
+paddb xmm1, xmm2
+paddb xmm1, dqword [rbx]
+
+paddd xmm1, xmm2
+paddd xmm1, dqword [rbx]
+
+paddq xmm1, xmm2
+paddq xmm1, dqword [rbx]
+
+paddsb xmm1, xmm2
+paddsb xmm1, dqword [rbx]
+
+paddsw xmm1, xmm2
+paddsw xmm1, dqword [rbx]
+
+paddusb xmm1, xmm2
+paddusb xmm1, dqword [rbx]
+
+paddusw xmm1, xmm2
+paddusw xmm1, dqword [rbx]
+
+paddw xmm1, xmm2
+paddw xmm1, dqword [rbx]
+
+pand xmm1, xmm2
+pand xmm1, dqword [rbx]
+
+pandn xmm1, xmm2
+pandn xmm1, dqword [rbx]
+
+pavgb xmm1, xmm2
+pavgb xmm1, dqword [rbx]
+
+pavgw xmm1, xmm2
+pavgw xmm1, dqword [rbx]
+
+pcmpeqb xmm1, xmm2
+pcmpeqb xmm1, dqword [rbx]
+
+pcmpeqd xmm1, xmm2
+pcmpeqd xmm1, dqword [rbx]
+
+pcmpeqw xmm1, xmm2
+pcmpeqw xmm1, dqword [rbx]
+
+pcmpgtb xmm1, xmm2
+pcmpgtb xmm1, dqword [rbx]
+
+pcmpgtd xmm1, xmm2
+pcmpgtd xmm1, dqword [rbx]
+
+pcmpgtw xmm1, xmm2
+pcmpgtw xmm1, dqword [rbx]
+
+pextrw ebx, xmm2, byte 0
+
+pinsrw xmm1, ebx, byte 0
+pinsrw xmm1, word [rbx], byte 0
+
+pmaddwd xmm1, xmm2
+pmaddwd xmm1, dqword [rbx]
+
+pmaxsw xmm1, xmm2
+pmaxsw xmm1, dqword [rbx]
+
+pmaxub xmm1, xmm2
+pmaxub xmm1, dqword [rbx]
+
+pminsw xmm1, xmm2
+pminsw xmm1, dqword [rbx]
+
+pminub xmm1, xmm2
+pminub xmm1, dqword [rbx]
+
+pmovmskb eax, xmm2
+
+pmulhuw xmm1, xmm2
+pmulhuw xmm1, dqword [rbx]
+
+pmulhw xmm1, xmm2
+pmulhw xmm1, dqword [rbx]
+
+pmullw xmm1, xmm2
+pmullw xmm1, dqword [rbx]
+
+pmuludq xmm1, xmm2
+pmuludq xmm1, dqword [rbx]
+
+por xmm1, xmm2
+por xmm1, dqword [rbx]
+
+psadbw xmm1, xmm2
+psadbw xmm1, dqword [rbx]
+
+pshufd xmm1, xmm2, byte 0
+pshufd xmm1, dqword [rbx], byte 0
+
+pshufhw xmm1, xmm2, byte 0
+pshufhw xmm1, dqword [rbx], byte 0
+
+pshuflw xmm1, xmm2, byte 0
+pshuflw xmm1, dqword [rbx], byte 0
+
+pslld xmm1, xmm2
+pslld xmm1, dqword [rbx]
+pslld xmm1, byte 5
+
+pslldq xmm1, byte 5
+
+psllq xmm1, xmm2
+psllq xmm1, dqword [rbx]
+psllq xmm1, byte 5
+
+psllw xmm1, xmm2
+psllw xmm1, dqword [rbx]
+psllw xmm1, byte 5
+
+psrad xmm1, xmm2
+psrad xmm1, dqword [rbx]
+psrad xmm1, byte 5
+
+psraw xmm1, xmm2
+psraw xmm1, dqword [rbx]
+psraw xmm1, byte 5
+
+psrld xmm1, xmm2
+psrld xmm1, dqword [rbx]
+psrld xmm1, byte 5
+
+psrldq xmm1, byte 5
+
+psrlq xmm1, xmm2
+psrlq xmm1, dqword [rbx]
+psrlq xmm1, byte 5
+
+psrlw xmm1, xmm2
+psrlw xmm1, dqword [rbx]
+psrlw xmm1, byte 5
+
+psubb xmm1, xmm2
+psubb xmm1, dqword [rbx]
+
+psubd xmm1, xmm2
+psubd xmm1, dqword [rbx]
+
+psubq xmm1, xmm2
+psubq xmm1, dqword [rbx]
+
+psubsb xmm1, xmm2
+psubsb xmm1, dqword [rbx]
+
+psubsw xmm1, xmm2
+psubsw xmm1, dqword [rbx]
+
+psubusb xmm1, xmm2
+psubusb xmm1, dqword [rbx]
+
+psubusw xmm1, xmm2
+psubusw xmm1, dqword [rbx]
+
+psubw xmm1, xmm2
+psubw xmm1, dqword [rbx]
+
+punpckhbw xmm1, xmm2
+punpckhbw xmm1, dqword [rbx]
+
+punpckhdq xmm1, xmm2
+punpckhdq xmm1, dqword [rbx]
+
+punpckhqdq xmm1, xmm2
+punpckhqdq xmm1, dqword [rbx]
+
+punpckhwd xmm1, xmm2
+punpckhwd xmm1, dqword [rbx]
+
+punpcklbw xmm1, xmm2
+punpcklbw xmm1, dqword [rbx]
+
+punpckldq xmm1, xmm2
+punpckldq xmm1, dqword [rbx]
+
+punpcklqdq xmm1, xmm2
+punpcklqdq xmm1, dqword [rbx]
+
+punpcklwd xmm1, xmm2
+punpcklwd xmm1, dqword [rbx]
+
+pxor xmm1, xmm2
+pxor xmm1, dqword [rbx]
+
+rcpps xmm1, xmm2
+rcpps xmm1, dqword [rbx]
+
+rcpss xmm1, xmm2
+rcpss xmm1, dword [rbx]
+
+rsqrtps xmm1, xmm2
+rsqrtps xmm1, dqword [rbx]
+
+rsqrtss xmm1, xmm2
+rsqrtss xmm1, dword [rbx]
+
+shufpd xmm1, xmm2, 0
+shufpd xmm1, dqword [rbx], byte 0
+
+shufps xmm1, xmm2, 0
+shufps xmm1, dqword [rbx], byte 0
+
+sqrtpd xmm1, xmm2
+sqrtpd xmm1, dqword [rbx]
+
+sqrtps xmm1, xmm2
+sqrtps xmm1, dqword [rbx]
+
+sqrtsd xmm1, xmm2
+sqrtsd xmm1, qword [rbx]
+
+sqrtss xmm1, xmm2
+sqrtss xmm1, dword [rbx]
+
+stmxcsr dword [rbx]
+
+subpd xmm1, xmm2
+subpd xmm1, dqword [rbx]
+
+subps xmm1, xmm2
+subps xmm1, dqword [rbx]
+
+subsd xmm1, xmm2
+subsd xmm1, qword [rbx]
+
+subss xmm1, xmm2
+subss xmm1, dword [rbx]
+
+ucomisd xmm1, xmm2
+ucomisd xmm1, qword [rbx]
+
+ucomiss xmm1, xmm2
+ucomiss xmm1, dword [rbx]
+
+unpckhpd xmm1, xmm2
+unpckhpd xmm1, dqword [rbx]
+
+unpckhps xmm1, xmm2
+unpckhps xmm1, dqword [rbx]
+
+unpcklpd xmm1, xmm2
+unpcklpd xmm1, dqword [rbx]
+
+unpcklps xmm1, xmm2
+unpcklps xmm1, dqword [rbx]
+
+xorpd xmm1, xmm2
+xorpd xmm1, dqword [rbx]
+
+xorps xmm1, xmm2
+xorps xmm1, dqword [rbx]
+
diff --git a/modules/arch/x86/tests/ssewidth.hex b/modules/arch/x86/tests/ssewidth.hex
new file mode 100644
index 00000000..fc24e8da
--- /dev/null
+++ b/modules/arch/x86/tests/ssewidth.hex
@@ -0,0 +1,1532 @@
+66
+0f
+58
+ca
+66
+0f
+58
+0b
+0f
+58
+ca
+0f
+58
+0b
+f2
+0f
+58
+ca
+f2
+0f
+58
+0b
+f3
+0f
+58
+ca
+f3
+0f
+58
+0b
+66
+0f
+d0
+ca
+66
+0f
+d0
+0b
+f2
+0f
+d0
+ca
+f2
+0f
+d0
+0b
+66
+0f
+55
+ca
+66
+0f
+55
+0b
+0f
+55
+ca
+0f
+55
+0b
+66
+0f
+54
+ca
+66
+0f
+54
+0b
+0f
+54
+ca
+0f
+54
+0b
+66
+0f
+c2
+ca
+00
+66
+0f
+c2
+0b
+00
+66
+0f
+c2
+ca
+00
+66
+0f
+c2
+0b
+00
+0f
+c2
+ca
+00
+0f
+c2
+0b
+00
+0f
+c2
+ca
+00
+0f
+c2
+0b
+00
+f2
+0f
+c2
+ca
+00
+f2
+0f
+c2
+0b
+00
+f2
+0f
+c2
+ca
+00
+f2
+0f
+c2
+0b
+00
+f3
+0f
+c2
+ca
+00
+f3
+0f
+c2
+0b
+00
+f3
+0f
+c2
+ca
+00
+f3
+0f
+c2
+0b
+00
+66
+0f
+2f
+ca
+66
+0f
+2f
+0b
+0f
+2f
+ca
+0f
+2f
+0b
+f3
+0f
+e6
+ca
+f3
+0f
+e6
+0b
+0f
+5b
+ca
+0f
+5b
+0b
+f2
+0f
+e6
+ca
+f2
+0f
+e6
+0b
+66
+0f
+2d
+ca
+66
+0f
+2d
+0b
+66
+0f
+5a
+ca
+66
+0f
+5a
+0b
+66
+0f
+2a
+ca
+66
+0f
+2a
+0b
+0f
+2a
+ca
+0f
+2a
+0b
+66
+0f
+5b
+ca
+66
+0f
+5b
+0b
+0f
+5a
+ca
+0f
+5a
+0b
+0f
+2d
+ca
+0f
+2d
+0b
+f2
+48
+0f
+2d
+da
+f2
+48
+0f
+2d
+1b
+f2
+0f
+5a
+ca
+f2
+0f
+5a
+0b
+f2
+0f
+2a
+cb
+f2
+0f
+2a
+0b
+f2
+48
+0f
+2a
+cb
+f2
+48
+0f
+2a
+0b
+f3
+0f
+2a
+cb
+f3
+0f
+2a
+0b
+f3
+48
+0f
+2a
+cb
+f3
+48
+0f
+2a
+0b
+f3
+0f
+5a
+ca
+f3
+0f
+5a
+0b
+f3
+0f
+2d
+da
+f3
+0f
+2d
+1b
+f3
+48
+0f
+2d
+da
+f3
+48
+0f
+2d
+1b
+66
+0f
+e6
+ca
+66
+0f
+e6
+0b
+66
+0f
+2c
+ca
+66
+0f
+2c
+0b
+f3
+0f
+5b
+ca
+f3
+0f
+5b
+0b
+0f
+2c
+ca
+0f
+2c
+0b
+f2
+0f
+2c
+c1
+f2
+0f
+2c
+03
+f2
+48
+0f
+2c
+c1
+f2
+48
+0f
+2c
+03
+f3
+0f
+2c
+c1
+f3
+0f
+2c
+03
+f3
+48
+0f
+2c
+c1
+f3
+48
+0f
+2c
+03
+66
+0f
+5e
+ca
+66
+0f
+5e
+0b
+0f
+5e
+ca
+0f
+5e
+0b
+f2
+0f
+5e
+ca
+f2
+0f
+5e
+0b
+f3
+0f
+5e
+ca
+f3
+0f
+5e
+0b
+66
+0f
+78
+c1
+00
+01
+66
+0f
+78
+c1
+00
+01
+66
+0f
+79
+ca
+66
+0f
+7c
+ca
+66
+0f
+7c
+0b
+f2
+0f
+7c
+ca
+f2
+0f
+7c
+0b
+66
+0f
+7d
+ca
+66
+0f
+7d
+0b
+f2
+0f
+7d
+ca
+f2
+0f
+7d
+0b
+f2
+0f
+78
+ca
+00
+01
+f2
+0f
+78
+ca
+00
+01
+f2
+0f
+79
+ca
+f2
+0f
+f0
+0b
+0f
+ae
+13
+66
+0f
+f7
+ca
+66
+0f
+5f
+ca
+66
+0f
+5f
+0b
+0f
+5f
+ca
+0f
+5f
+0b
+f2
+0f
+5f
+ca
+f2
+0f
+5f
+0b
+f3
+0f
+5f
+ca
+f3
+0f
+5f
+0b
+66
+0f
+5d
+ca
+66
+0f
+5d
+0b
+0f
+5d
+ca
+0f
+5d
+0b
+f2
+0f
+5d
+ca
+f2
+0f
+5d
+0b
+f3
+0f
+5d
+ca
+f3
+0f
+5d
+0b
+66
+0f
+28
+ca
+66
+0f
+28
+0b
+66
+0f
+29
+13
+0f
+28
+ca
+0f
+28
+0b
+0f
+29
+13
+66
+0f
+6e
+cb
+66
+0f
+6e
+0b
+66
+48
+0f
+6e
+cb
+66
+48
+0f
+6e
+0b
+66
+0f
+7e
+13
+66
+48
+0f
+7e
+13
+f2
+0f
+12
+ca
+f2
+0f
+12
+0b
+f2
+0f
+d6
+ca
+66
+0f
+6f
+ca
+66
+0f
+6f
+0b
+66
+0f
+7f
+13
+f3
+0f
+6f
+ca
+f3
+0f
+6f
+0b
+f3
+0f
+7f
+13
+0f
+12
+ca
+66
+0f
+16
+0b
+66
+0f
+17
+13
+0f
+16
+0b
+0f
+17
+13
+0f
+16
+ca
+66
+0f
+12
+0b
+66
+0f
+13
+13
+0f
+12
+0b
+0f
+13
+13
+66
+0f
+50
+da
+0f
+50
+da
+66
+0f
+e7
+13
+66
+0f
+2b
+13
+0f
+2b
+13
+f2
+0f
+2b
+13
+f3
+0f
+2b
+13
+f3
+0f
+7e
+ca
+f3
+0f
+7e
+0b
+66
+0f
+d6
+13
+f3
+0f
+d6
+ca
+f2
+0f
+10
+ca
+f2
+0f
+10
+0b
+f2
+0f
+11
+13
+f3
+0f
+16
+ca
+f3
+0f
+16
+0b
+f3
+0f
+12
+ca
+f3
+0f
+12
+0b
+f3
+0f
+10
+ca
+f3
+0f
+10
+0b
+f3
+0f
+11
+13
+66
+0f
+10
+ca
+66
+0f
+10
+0b
+66
+0f
+11
+13
+0f
+10
+ca
+0f
+10
+0b
+0f
+11
+13
+66
+0f
+59
+ca
+66
+0f
+59
+0b
+0f
+59
+ca
+0f
+59
+0b
+f2
+0f
+59
+ca
+f2
+0f
+59
+0b
+f3
+0f
+59
+ca
+f3
+0f
+59
+0b
+66
+0f
+56
+ca
+66
+0f
+56
+0b
+0f
+56
+ca
+0f
+56
+0b
+66
+0f
+6b
+ca
+66
+0f
+6b
+0b
+66
+0f
+63
+ca
+66
+0f
+63
+0b
+66
+0f
+67
+ca
+66
+0f
+67
+0b
+66
+0f
+fc
+ca
+66
+0f
+fc
+0b
+66
+0f
+fe
+ca
+66
+0f
+fe
+0b
+66
+0f
+d4
+ca
+66
+0f
+d4
+0b
+66
+0f
+ec
+ca
+66
+0f
+ec
+0b
+66
+0f
+ed
+ca
+66
+0f
+ed
+0b
+66
+0f
+dc
+ca
+66
+0f
+dc
+0b
+66
+0f
+dd
+ca
+66
+0f
+dd
+0b
+66
+0f
+fd
+ca
+66
+0f
+fd
+0b
+66
+0f
+db
+ca
+66
+0f
+db
+0b
+66
+0f
+df
+ca
+66
+0f
+df
+0b
+66
+0f
+e0
+ca
+66
+0f
+e0
+0b
+66
+0f
+e3
+ca
+66
+0f
+e3
+0b
+66
+0f
+74
+ca
+66
+0f
+74
+0b
+66
+0f
+76
+ca
+66
+0f
+76
+0b
+66
+0f
+75
+ca
+66
+0f
+75
+0b
+66
+0f
+64
+ca
+66
+0f
+64
+0b
+66
+0f
+66
+ca
+66
+0f
+66
+0b
+66
+0f
+65
+ca
+66
+0f
+65
+0b
+66
+0f
+c5
+da
+00
+66
+0f
+c4
+cb
+00
+66
+0f
+c4
+0b
+00
+66
+0f
+f5
+ca
+66
+0f
+f5
+0b
+66
+0f
+ee
+ca
+66
+0f
+ee
+0b
+66
+0f
+de
+ca
+66
+0f
+de
+0b
+66
+0f
+ea
+ca
+66
+0f
+ea
+0b
+66
+0f
+da
+ca
+66
+0f
+da
+0b
+66
+0f
+d7
+c2
+66
+0f
+e4
+ca
+66
+0f
+e4
+0b
+66
+0f
+e5
+ca
+66
+0f
+e5
+0b
+66
+0f
+d5
+ca
+66
+0f
+d5
+0b
+66
+0f
+f4
+ca
+66
+0f
+f4
+0b
+66
+0f
+eb
+ca
+66
+0f
+eb
+0b
+66
+0f
+f6
+ca
+66
+0f
+f6
+0b
+66
+0f
+70
+ca
+00
+66
+0f
+70
+0b
+00
+f3
+0f
+70
+ca
+00
+f3
+0f
+70
+0b
+00
+f2
+0f
+70
+ca
+00
+f2
+0f
+70
+0b
+00
+66
+0f
+f2
+ca
+66
+0f
+f2
+0b
+66
+0f
+72
+f1
+05
+66
+0f
+73
+f9
+05
+66
+0f
+f3
+ca
+66
+0f
+f3
+0b
+66
+0f
+73
+f1
+05
+66
+0f
+f1
+ca
+66
+0f
+f1
+0b
+66
+0f
+71
+f1
+05
+66
+0f
+e2
+ca
+66
+0f
+e2
+0b
+66
+0f
+72
+e1
+05
+66
+0f
+e1
+ca
+66
+0f
+e1
+0b
+66
+0f
+71
+e1
+05
+66
+0f
+d2
+ca
+66
+0f
+d2
+0b
+66
+0f
+72
+d1
+05
+66
+0f
+73
+d9
+05
+66
+0f
+d3
+ca
+66
+0f
+d3
+0b
+66
+0f
+73
+d1
+05
+66
+0f
+d1
+ca
+66
+0f
+d1
+0b
+66
+0f
+71
+d1
+05
+66
+0f
+f8
+ca
+66
+0f
+f8
+0b
+66
+0f
+fa
+ca
+66
+0f
+fa
+0b
+66
+0f
+fb
+ca
+66
+0f
+fb
+0b
+66
+0f
+e8
+ca
+66
+0f
+e8
+0b
+66
+0f
+e9
+ca
+66
+0f
+e9
+0b
+66
+0f
+d8
+ca
+66
+0f
+d8
+0b
+66
+0f
+d9
+ca
+66
+0f
+d9
+0b
+66
+0f
+f9
+ca
+66
+0f
+f9
+0b
+66
+0f
+68
+ca
+66
+0f
+68
+0b
+66
+0f
+6a
+ca
+66
+0f
+6a
+0b
+66
+0f
+6d
+ca
+66
+0f
+6d
+0b
+66
+0f
+69
+ca
+66
+0f
+69
+0b
+66
+0f
+60
+ca
+66
+0f
+60
+0b
+66
+0f
+62
+ca
+66
+0f
+62
+0b
+66
+0f
+6c
+ca
+66
+0f
+6c
+0b
+66
+0f
+61
+ca
+66
+0f
+61
+0b
+66
+0f
+ef
+ca
+66
+0f
+ef
+0b
+0f
+53
+ca
+0f
+53
+0b
+f3
+0f
+53
+ca
+f3
+0f
+53
+0b
+0f
+52
+ca
+0f
+52
+0b
+f3
+0f
+52
+ca
+f3
+0f
+52
+0b
+66
+0f
+c6
+ca
+00
+66
+0f
+c6
+0b
+00
+0f
+c6
+ca
+00
+0f
+c6
+0b
+00
+66
+0f
+51
+ca
+66
+0f
+51
+0b
+0f
+51
+ca
+0f
+51
+0b
+f2
+0f
+51
+ca
+f2
+0f
+51
+0b
+f3
+0f
+51
+ca
+f3
+0f
+51
+0b
+0f
+ae
+1b
+66
+0f
+5c
+ca
+66
+0f
+5c
+0b
+0f
+5c
+ca
+0f
+5c
+0b
+f2
+0f
+5c
+ca
+f2
+0f
+5c
+0b
+f3
+0f
+5c
+ca
+f3
+0f
+5c
+0b
+66
+0f
+2e
+ca
+66
+0f
+2e
+0b
+0f
+2e
+ca
+0f
+2e
+0b
+66
+0f
+15
+ca
+66
+0f
+15
+0b
+0f
+15
+ca
+0f
+15
+0b
+66
+0f
+14
+ca
+66
+0f
+14
+0b
+0f
+14
+ca
+0f
+14
+0b
+66
+0f
+57
+ca
+66
+0f
+57
+0b
+0f
+57
+ca
+0f
+57
+0b